aboutsummaryrefslogtreecommitdiff
path: root/final/runtime/tools/message-converter.pl
diff options
context:
space:
mode:
Diffstat (limited to 'final/runtime/tools/message-converter.pl')
-rwxr-xr-xfinal/runtime/tools/message-converter.pl775
1 files changed, 775 insertions, 0 deletions
diff --git a/final/runtime/tools/message-converter.pl b/final/runtime/tools/message-converter.pl
new file mode 100755
index 0000000..e22c928
--- /dev/null
+++ b/final/runtime/tools/message-converter.pl
@@ -0,0 +1,775 @@
+#!/usr/bin/perl
+
+#
+#//===----------------------------------------------------------------------===//
+#//
+#// The LLVM Compiler Infrastructure
+#//
+#// This file is dual licensed under the MIT and the University of Illinois Open
+#// Source Licenses. See LICENSE.txt for details.
+#//
+#//===----------------------------------------------------------------------===//
+#
+
+use strict;
+use warnings;
+
+use File::Glob ":glob";
+use Encode qw{ encode };
+
+use FindBin;
+use lib "$FindBin::Bin/lib";
+
+use tools;
+
+our $VERSION = "0.04";
+my $escape = qr{%};
+my $placeholder = qr{(\d)\$(s|l?[du])};
+my $target_os;
+
+my $sections =
+ {
+ meta => { short => "prp" }, # "prp" stands for "property".
+ strings => { short => "str" },
+ formats => { short => "fmt" },
+ messages => { short => "msg" },
+ hints => { short => "hnt" },
+ };
+my @sections = qw{ meta strings formats messages hints };
+# Assign section properties: long name, set number, base number.
+map( $sections->{ $sections[ $_ ] }->{ long } = $sections[ $_ ], ( 0 .. @sections - 1 ) );
+map( $sections->{ $sections[ $_ ] }->{ set } = ( $_ + 1 ), ( 0 .. @sections - 1 ) );
+map( $sections->{ $sections[ $_ ] }->{ base } = ( ( $_ + 1 ) << 16 ), ( 0 .. @sections - 1 ) );
+
+# Properties of Meta section.
+my @properties = qw{ Language Country LangId Version Revision };
+
+
+sub _generate_comment($$$) {
+
+ my ( $data, $open, $close ) = @_;
+ my $bulk =
+ $open . " Do not edit this file! " . $close . "\n" .
+ $open . " The file was generated from " . get_file( $data->{ "%meta" }->{ source } ) .
+ " by " . $tool . " on " . localtime() . ". " . $close . "\n";
+ return $bulk;
+
+}; # sub _generate_comment
+
+
+sub msg2sgn($) {
+
+ # Convert message string to signature. Signature is a list of placeholders in sorted order.
+ # For example, signature of "%1$s value \"%2$s\" is invalid." is "%1$s %2$s".
+
+ my ( $msg ) = @_;
+ my @placeholders;
+ pos( $msg ) = 0;
+ while ( $msg =~ m{\G.*?$escape$placeholder}g ) {
+ $placeholders[ $1 - 1 ] = "%$1\$$2";
+ }; # while
+ for ( my $i = 1; $i <= @placeholders; ++ $i ) {
+ if ( not defined( $placeholders[ $i - 1 ] ) ) {
+ $placeholders[ $i - 1 ] = "%$i\$-";
+ }; # if
+ }; # for $i
+ return join( " ", @placeholders );
+
+}; # sub msg2sgn
+
+
+sub msg2src($) {
+
+ # Convert message string to a C string constant.
+
+ my ( $msg ) = @_;
+ if ( $target_os eq "win" ) {
+ $msg =~ s{$escape$placeholder}{\%$1!$2!}g;
+ }; # if
+ return $msg;
+
+}; # sub msg2src
+
+
+my $special =
+ {
+ "n" => "\n",
+ "t" => "\t",
+ };
+
+sub msg2mc($) {
+ my ( $msg ) = @_;
+ $msg = msg2src( $msg ); # Get windows style placeholders.
+ $msg =~ s{\\(.)}{ exists( $special->{ $1 } ) ? $special->{ $1 } : $1 }ge;
+ return $msg;
+}; # sub msg2mc
+
+
+
+sub parse_message($) {
+
+ my ( $msg ) = @_;
+ pos( $msg ) = 0;
+ for ( ; ; ) {
+ if ( $msg !~ m{\G.*?$escape}gc ) {
+ last;
+ }
+ if ( $msg !~ m{\G$placeholder}gc ) {
+ return "Bad %-sequence near \"%" . substr( $msg, pos( $msg ), 7 ) . "\"";
+ }; # if
+ }; # forever
+ return undef;
+
+}; # sub parse_message
+
+
+sub parse_source($) {
+
+ my ( $name ) = @_;
+
+ my @bulk = read_file( $name, -layer => ":utf8" );
+ my $data = {};
+
+ my $line;
+ my $n = 0; # Line number.
+ my $obsolete = 0; # Counter of obsolete entries.
+ my $last_idx;
+ my %idents;
+ my $section;
+
+ my $error =
+ sub {
+ my ( $n, $line, $msg ) = @_;
+ runtime_error( "Error parsing $name line $n: " . "$msg:\n" . " $line" );
+ }; # sub
+
+ foreach $line ( @bulk ) {
+ ++ $n;
+ # Skip empty lines and comments.
+ if ( $line =~ m{\A\s*(\n|#)} ) {
+ $last_idx = undef;
+ next;
+ }; # if
+ # Parse section header.
+ if ( $line =~ m{\A-\*-\s*([A-Z_]*)\s*-\*-\s*\n\z}i ) {
+ $section = ( lc( $1 ) );
+ if ( not grep( $section eq $_, @sections ) ) {
+ $error->( $n, $line, "Unknown section \"$section\" specified" );
+ }; # if
+ if ( exists( $data->{ $section } ) ) {
+ $error->( $n, $line, "Multiple sections of the same type specified" );
+ }; # if
+ %idents = (); # Clean list of known message identifiers.
+ next;
+ }; # if
+ if ( not defined( $section ) ) {
+ $error->( $n, $line, "Section heading expected" );
+ }; # if
+ # Parse section body.
+ if ( $section eq "meta" ) {
+ if ( $line =~ m{\A([A-Z_][A-Z_0-9]*)\s+"(.*)"\s*?\n?\z}i ) {
+ # Parse meta properties (such as Language, Country, and LangId).
+ my ( $property, $value ) = ( $1, $2 );
+ if ( not grep( $_ eq $property , @properties ) ) {
+ $error->( $n, $line, "Unknown property \"$property\" specified" );
+ }; # if
+ if ( exists( $data->{ "%meta" }->{ $property } ) ) {
+ $error->( $n, $line, "Property \"$property\" has already been specified" );
+ }; # if
+ $data->{ "%meta" }->{ $property } = $value;
+ $last_idx = undef;
+ next;
+ }; # if
+ $error->( $n, $line, "Property line expected" );
+ }; # if
+ # Parse message.
+ if ( $line =~ m{\A([A-Z_][A-Z_0-9]*)\s+"(.*)"\s*?\n?\z}i ) {
+ my ( $ident, $message ) = ( $1, $2 );
+ if ( $ident eq "OBSOLETE" ) {
+ # If id is "OBSOLETE", add a unique suffix. It provides convenient way to mark
+ # obsolete messages.
+ ++ $obsolete;
+ $ident .= $obsolete;
+ }; # if
+ if ( exists( $idents{ $ident } ) ) {
+ $error->( $n, $line, "Identifier \"$ident\" is redefined" );
+ }; # if
+ # Check %-sequences.
+ my $err = parse_message( $message );
+ if ( $err ) {
+ $error->( $n, $line, $err );
+ }; # if
+ # Save message.
+ push( @{ $data->{ $section } }, [ $ident, $message ] );
+ $idents{ $ident } = 1;
+ $last_idx = @{ $data->{ $section } } - 1;
+ next;
+ }; # if
+ # Parse continuation line.
+ if ( $line =~ m{\A\s*"(.*)"\s*\z} ) {
+ my $message = $1;
+ if ( not defined( $last_idx ) ) {
+ $error->( $n, $line, "Unexpected continuation line" );
+ }; # if
+ # Check %-sequences.
+ my $err = parse_message( $message );
+ if ( $err ) {
+ $error->( $n, $line, $err );
+ }; # if
+ # Save continuation.
+ $data->{ $section }->[ $last_idx ]->[ 1 ] .= $message;
+ next;
+ }; # if
+ $error->( $n, $line, "Message definition expected" );
+ }; # foreach
+ $data->{ "%meta" }->{ source } = $name;
+ foreach my $section ( @sections ) {
+ if ( not exists( $data->{ $section } ) ) {
+ $data->{ $section } = [];
+ }; # if
+ }; # foreach $section
+
+ foreach my $property ( @properties ) {
+ if ( not defined( $data->{ "%meta" }->{ $property } ) ) {
+ runtime_error(
+ "Error parsing $name: " .
+ "Required \"$property\" property is not specified"
+ );
+ }; # if
+ push( @{ $data->{ meta } }, [ $property, $data->{ "%meta" }->{ $property } ] );
+ }; # foreach
+
+ return $data;
+
+}; # sub parse_source
+
+
+sub generate_enum($$$) {
+
+ my ( $data, $file, $prefix ) = @_;
+ my $bulk = "";
+
+ $bulk =
+ _generate_comment( $data, "//", "//" ) .
+ "\n" .
+ "enum ${prefix}_id {\n\n" .
+ " // A special id for absence of message.\n" .
+ " ${prefix}_null = 0,\n\n";
+
+ foreach my $section ( @sections ) {
+ my $props = $sections->{ $section }; # Section properties.
+ my $short = $props->{ short }; # Short section name, frequently used.
+ $bulk .=
+ " // Set #$props->{ set }, $props->{ long }.\n" .
+ " ${prefix}_${short}_first = $props->{ base },\n";
+ foreach my $item ( @{ $data->{ $section } } ) {
+ my ( $ident, undef ) = @$item;
+ $bulk .= " ${prefix}_${short}_${ident},\n";
+ }; # foreach
+ $bulk .= " ${prefix}_${short}_last,\n\n";
+ }; # foreach $type
+ $bulk .= " ${prefix}_xxx_lastest\n\n";
+
+ $bulk .=
+ "}; // enum ${prefix}_id\n" .
+ "\n" .
+ "typedef enum ${prefix}_id ${prefix}_id_t;\n" .
+ "\n";
+
+ $bulk .=
+ "\n" .
+ "// end of file //\n";
+
+ write_file( $file, \$bulk );
+
+}; # sub generate_enum
+
+
+sub generate_signature($$) {
+
+ my ( $data, $file ) = @_;
+ my $bulk = "";
+
+ $bulk .= "// message catalog signature file //\n\n";
+
+ foreach my $section ( @sections ) {
+ my $props = $sections->{ $section }; # Section properties.
+ my $short = $props->{ short }; # Short section name, frequently used.
+ $bulk .= "-*- " . uc( $props->{ long } ) . "-*-\n\n";
+ foreach my $item ( @{ $data->{ $section } } ) {
+ my ( $ident, $msg ) = @$item;
+ $bulk .= sprintf( "%-40s %s\n", $ident, msg2sgn( $msg ) );
+ }; # foreach
+ $bulk .= "\n";
+ }; # foreach $type
+
+ $bulk .= "// end of file //\n";
+
+ write_file( $file, \$bulk );
+
+}; # sub generate_signature
+
+
+sub generate_default($$$) {
+
+ my ( $data, $file, $prefix ) = @_;
+ my $bulk = "";
+
+ $bulk .=
+ _generate_comment( $data, "//", "//" ) .
+ "\n";
+
+ foreach my $section ( @sections ) {
+ $bulk .=
+ "static char const *\n" .
+ "__${prefix}_default_${section}" . "[] =\n" .
+ " {\n" .
+ " NULL,\n";
+ foreach my $item ( @{ $data->{ $section } } ) {
+ my ( undef, $msg ) = @$item;
+ $bulk .= " \"" . msg2src( $msg ) . "\",\n";
+ }; # while
+ $bulk .=
+ " NULL\n" .
+ " };\n" .
+ "\n";
+ }; # foreach $type
+
+ $bulk .=
+ "struct kmp_i18n_section {\n" .
+ " int size;\n" .
+ " char const ** str;\n" .
+ "}; // struct kmp_i18n_section\n" .
+ "typedef struct kmp_i18n_section kmp_i18n_section_t;\n" .
+ "\n" .
+ "static kmp_i18n_section_t\n" .
+ "__${prefix}_sections[] =\n" .
+ " {\n" .
+ " { 0, NULL },\n";
+ foreach my $section ( @sections ) {
+ $bulk .=
+ " { " . @{ $data->{ $section } } . ", __${prefix}_default_${section} },\n";
+ }; # foreach $type
+ $bulk .=
+ " { 0, NULL }\n" .
+ " };\n" .
+ "\n";
+
+ $bulk .=
+ "struct kmp_i18n_table {\n" .
+ " int size;\n" .
+ " kmp_i18n_section_t * sect;\n" .
+ "}; // struct kmp_i18n_table\n" .
+ "typedef struct kmp_i18n_table kmp_i18n_table_t;\n" .
+ "\n" .
+ "static kmp_i18n_table_t __kmp_i18n_default_table =\n" .
+ " {\n" .
+ " " . @sections . ",\n" .
+ " __kmp_i18n_sections\n" .
+ " };\n" .
+ "\n" .
+ "// end of file //\n";
+
+ write_file( $file, \$bulk );
+
+}; # sub generate_default
+
+
+sub generate_message_unix($$) {
+
+ my ( $data, $file ) = @_;
+ my $bulk = "";
+
+ $bulk .=
+ _generate_comment( $data, "\$", "\$" ) .
+ "\n" .
+ "\$quote \"\n\n";
+
+ foreach my $section ( @sections ) {
+ $bulk .=
+ "\$ " . ( "-" x 78 ) . "\n\$ $section\n\$ " . ( "-" x 78 ) . "\n\n" .
+ "\$set $sections->{ $section }->{ set }\n" .
+ "\n";
+ my $n = 0;
+ foreach my $item ( @{ $data->{ $section } } ) {
+ my ( undef, $msg ) = @$item;
+ ++ $n;
+ $bulk .= "$n \"" . msg2src( $msg ) . "\"\n";
+ }; # foreach
+ $bulk .= "\n";
+ }; # foreach $type
+
+ $bulk .=
+ "\n" .
+ "\$ end of file \$\n";
+
+ write_file( $file, \$bulk, -layer => ":utf8" );
+
+}; # sub generate_message_linux
+
+
+sub generate_message_windows($$) {
+
+ my ( $data, $file ) = @_;
+ my $bulk = "";
+ my $language = $data->{ "%meta" }->{ Language };
+ my $langid = $data->{ "%meta" }->{ LangId };
+
+ $bulk .=
+ _generate_comment( $data, ";", ";" ) .
+ "\n" .
+ "LanguageNames = ($language=$langid:msg_$langid)\n" .
+ "\n";
+
+ $bulk .=
+ "FacilityNames=(\n";
+ foreach my $section ( @sections ) {
+ my $props = $sections->{ $section }; # Section properties.
+ $bulk .=
+ " $props->{ short }=" . $props->{ set } ."\n";
+ }; # foreach $section
+ $bulk .=
+ ")\n\n";
+
+ foreach my $section ( @sections ) {
+ my $short = $sections->{ $section }->{ short };
+ my $n = 0;
+ foreach my $item ( @{ $data->{ $section } } ) {
+ my ( undef, $msg ) = @$item;
+ ++ $n;
+ $bulk .=
+ "MessageId=$n\n" .
+ "Facility=$short\n" .
+ "Language=$language\n" .
+ msg2mc( $msg ) . "\n.\n\n";
+ }; # foreach $item
+ }; # foreach $section
+
+ $bulk .=
+ "\n" .
+ "; end of file ;\n";
+
+ $bulk = encode( "UTF-16LE", $bulk ); # Convert text to UTF-16LE used in Windows* OS.
+ write_file( $file, \$bulk, -binary => 1 );
+
+}; # sub generate_message_windows
+
+
+#
+# Parse command line.
+#
+
+my $input_file;
+my $enum_file;
+my $signature_file;
+my $default_file;
+my $message_file;
+my $id;
+my $prefix = "";
+get_options(
+ "os=s" => \$target_os,
+ "enum-file=s" => \$enum_file,
+ "signature-file=s" => \$signature_file,
+ "default-file=s" => \$default_file,
+ "message-file=s" => \$message_file,
+ "id|lang-id" => \$id,
+ "prefix=s" => \$prefix,
+);
+if ( @ARGV == 0 ) {
+ cmdline_error( "No source file specified -- nothing to do" );
+}; # if
+if ( @ARGV > 1 ) {
+ cmdline_error( "Too many source files specified" );
+}; # if
+$input_file = $ARGV[ 0 ];
+
+
+my $generate_message;
+if ( $target_os =~ m{\A(?:lin|mac)\z} ) {
+ $generate_message = \&generate_message_unix;
+} elsif ( $target_os eq "win" ) {
+ $generate_message = \&generate_message_windows;
+} else {
+ runtime_error( "OS \"$target_os\" is not supported" );
+}; # if
+
+
+#
+# Do the work.
+#
+
+my $data = parse_source( $input_file );
+if ( defined( $id ) ) {
+ print( $data->{ "%meta" }->{ LangId }, "\n" );
+}; # if
+if ( defined( $enum_file ) ) {
+ generate_enum( $data, $enum_file, $prefix );
+}; # if
+if ( defined( $signature_file ) ) {
+ generate_signature( $data, $signature_file );
+}; # if
+if ( defined( $default_file ) ) {
+ generate_default( $data, $default_file, $prefix );
+}; # if
+if ( defined( $message_file ) ) {
+ $generate_message->( $data, $message_file );
+}; # if
+
+exit( 0 );
+
+__END__
+
+=pod
+
+=head1 NAME
+
+B<message-converter.pl> -- Convert message catalog source file into another text forms.
+
+=head1 SYNOPSIS
+
+B<message-converter.pl> I<option>... <file>
+
+=head1 OPTIONS
+
+=over
+
+=item B<--enum-file=>I<file>
+
+Generate enum file named I<file>.
+
+=item B<--default-file=>I<file>
+
+Generate default messages file named I<file>.
+
+=item B<--lang-id>
+
+Print language identifier of the message catalog source file.
+
+=item B<--message-file=>I<file>
+
+Generate message file.
+
+=item B<--signature-file=>I<file>
+
+Generate signature file.
+
+Signatures are used for checking compatibility. For example, to check a primary
+catalog and its translation to another language, signatures of both catalogs should be generated
+and compared. If signatures are identical, catalogs are compatible.
+
+=item B<--prefix=>I<prefix>
+
+Prefix to be used for all C identifiers (type and variable names) in enum and default messages
+files.
+
+=item B<--os=>I<str>
+
+Specify OS name the message formats to be converted for. If not specified expolicitly, value of
+LIBOMP_OS environment variable is used. If LIBOMP_OS is not defined, host OS is detected.
+
+Depending on OS, B<message-converter.pl> converts message formats to GNU style or MS style.
+
+=item Standard Options
+
+=over
+
+=item B<--doc>
+
+=item B<--manual>
+
+Print full documentation and exit.
+
+=item B<--help>
+
+Print short help message and exit.
+
+=item B<--version>
+
+Print version string and exit.
+
+=back
+
+=back
+
+=head1 ARGUMENTS
+
+=over
+
+=item I<file>
+
+A name of input file.
+
+=back
+
+=head1 DESCRIPTION
+
+=head2 Message Catalog File Format
+
+It is plain text file in UTF-8 encoding. Empty lines and lines beginning with sharp sign (C<#>) are
+ignored. EBNF syntax of content:
+
+ catalog = { section };
+ section = header body;
+ header = "-*- " section-id " -*-" "\n";
+ body = { message };
+ message = message-id string "\n" { string "\n" };
+ section-id = identifier;
+ message-id = "OBSOLETE" | identifier;
+ identifier = letter { letter | digit | "_" };
+ string = """ { character } """;
+
+Identifier starts with letter, with following letters, digits, and underscores. Identifiers are
+case-sensitive. Setion identifiers are fixed: C<META>, C<STRINGS>, C<FORMATS>, C<MESSAGES> and
+C<HINTS>. Message identifiers must be unique within section. Special C<OBSOLETE> pseudo-identifier
+may be used many times.
+
+String is a C string literal which must not cross line boundaries.
+Long messages may occupy multiple lines, a string per line.
+
+Message may include printf-like GNU-style placeholders for arguments: C<%I<n>$I<t>>,
+where I<n> is argument number (C<1>, C<2>, ...),
+I<t> -- argument type, C<s> (string) or C<d> (32-bit integer).
+
+See also comments in F<i18n/en_US.txt>.
+
+=head2 Output Files
+
+This script can generate 3 different text files from single source:
+
+=over
+
+=item Enum file.
+
+Enum file is a C include file, containing definitions of message identifiers, e. g.:
+
+ enum kmp_i18n_id {
+
+ // Set #1, meta.
+ kmp_i18n_prp_first = 65536,
+ kmp_i18n_prp_Language,
+ kmp_i18n_prp_Country,
+ kmp_i18n_prp_LangId,
+ kmp_i18n_prp_Version,
+ kmp_i18n_prp_Revision,
+ kmp_i18n_prp_last,
+
+ // Set #2, strings.
+ kmp_i18n_str_first = 131072,
+ kmp_i18n_str_Error,
+ kmp_i18n_str_UnknownFile,
+ kmp_i18n_str_NotANumber,
+ ...
+
+ // Set #3, fotrmats.
+ ...
+
+ kmp_i18n_xxx_lastest
+
+ }; // enum kmp_i18n_id
+
+ typedef enum kmp_i18n_id kmp_i18n_id_t;
+
+=item Default messages file.
+
+Default messages file is a C include file containing default messages to be embedded into
+application (and used if external message catalog does not exist or could not be open):
+
+ static char const *
+ __kmp_i18n_default_meta[] =
+ {
+ NULL,
+ "English",
+ "USA",
+ "1033",
+ "2",
+ "20090806",
+ NULL
+ };
+
+ static char const *
+ __kmp_i18n_default_strings[] =
+ {
+ "Error",
+ "(unknown file)",
+ "not a number",
+ ...
+ NULL
+ };
+
+ ...
+
+=item Message file.
+
+Message file is an input for message compiler, F<gencat> on Linux* OS and OS X*, or F<mc.exe> on
+Windows* OS.
+
+Here is the example of Linux* OS message file:
+
+ $quote "
+ 1 "Japanese"
+ 2 "Japan"
+ 3 "1041"
+ 4 "2"
+ 5 "Based on Enlish message catalog revision 20090806"
+ ...
+
+Example of Windows* OS message file:
+
+ LanguageNames = (Japanese=10041:msg_1041)
+
+ FacilityNames = (
+ prp=1
+ str=2
+ fmt=3
+ ...
+ )
+
+ MessageId=1
+ Facility=prp
+ Language=Japanese
+ Japanese
+ .
+
+ ...
+
+=item Signature.
+
+Signature is a processed source file: comments stripped, strings deleted, but placeholders kept and
+sorted.
+
+ -*- FORMATS-*-
+
+ Info %1$d %2$s
+ Warning %1$d %2$s
+ Fatal %1$d %2$s
+ SysErr %1$d %2$s
+ Hint %1$- %2$s
+ Pragma %1$s %2$s %3$s %4$s
+
+The purpose of signatures -- compare two message source files for compatibility. If signatures of
+two message sources are the same, binary message catalogs will be compatible.
+
+=back
+
+=head1 EXAMPLES
+
+Generate include file containing message identifiers:
+
+ $ message-converter.pl --enum-file=kmp_i18n_id.inc en_US.txt
+
+Generate include file contating default messages:
+
+ $ message-converter.pl --default-file=kmp_i18n_default.inc en_US.txt
+
+Generate input file for message compiler, Linux* OS example:
+
+ $ message-converter.pl --message-file=ru_RU.UTF-8.msg ru_RU.txt
+
+Generate input file for message compiler, Windows* OS example:
+
+ > message-converter.pl --message-file=ru_RU.UTF-8.mc ru_RU.txt
+
+=cut
+
+# end of file #
+