package Locale::Maketext::Lexicon::Gettext; $Locale::Maketext::Lexicon::Gettext::VERSION = '0.13'; use strict; =head1 NAME Locale::Maketext::Lexicon::Gettext - PO and MO file parser for Maketext =head1 SYNOPSIS Called via B: package Hello::L10N; use base 'Locale::Maketext'; use Locale::Maketext::Lexicon { de => [Gettext => 'hello/de.mo'], }; Directly calling C: use Locale::Maketext::Lexicon::Gettext; my %Lexicon = %{ Locale::Maketext::Lexicon::Gettext->parse() }; __DATA__ #: Hello.pm:10 msgid "Hello, World!" msgstr "Hallo, Welt!" #: Hello.pm:11 msgid "You have %quant(%1,piece) of mail." msgstr "Sie haben %quant(%1,Poststueck,Poststuecken)." =head1 DESCRIPTION This module implements a perl-based C parser for B. It transforms all C<%1>, C<%2>, <%*>... sequences to C<[_1]>, C<[_2]>, C<[_*]>, and so on. It accepts either plain PO file, or a MO file which will be handled with a pure-perl parser adapted from Imacat's C. Since version 0.03, this module also looks for C<%I(I)> in the lexicon strings, and transform it to C<[I,I]>. Any C<%1>, C<%2>... sequences inside the I will have their percent signs (C<%>) replaced by underscores (C<_>). The name of I above should begin with a letter or underscore, followed by any number of alphanumeric characters and/or underscores. As an exception, the function name may also consist of a single asterisk (C<*>) or pound sign (C<#>), which are C's shorthands for C and C, respectively. As an additional feature, this module also parses MIME-header style metadata specified in the null msgstr (C<"">), and add them to the C<%Lexicon> with a C<__> prefix. For example, the example above will set C<__Content-Type> to C, without the newline or the colon. Any normal entry that duplicates a metadata entry takes precedence. Hence, a C line occurs anywhere should override the above value. =head1 NOTES When parsing PO files, fuzzy entries (entries marked with C<#, fuzzy>) are silently ignored. If you wish to use fuzzy entries, specify a true value to the C<_use_fuzzy> option: use Locale::Maketext::Lexicon { de => [Gettext => 'hello/de.mo'], _use_fuzzy => 1, }; =cut my ($InputEncoding, $OutputEncoding, $DoEncoding); sub input_encoding { $InputEncoding }; sub output_encoding { $OutputEncoding }; sub parse { my $self = shift; my (%var, $key, @ret); my @metadata; $InputEncoding = $OutputEncoding = $DoEncoding = undef; use Carp; Carp::cluck "Undefined source called\n" unless defined $_[0]; # Check for magic string of MO files return parse_mo(join('', @_)) if ($_[0] =~ /^\x95\x04\x12\xde/ or $_[0] =~ /^\xde\x12\x04\x95/); local $^W; # no 'uninitialized' warnings, please. require Locale::Maketext::Lexicon; my $UseFuzzy = Locale::Maketext::Lexicon::option('use_fuzzy'); # Parse PO files foreach (@_) { s/[\015\012]*\z//; # fix CRLF issues /^(msgid|msgstr) +"(.*)" *$/ ? do { # leading strings $var{$1} = $2; $key = $1; } : /^"(.*)" *$/ ? do { # continued strings $var{$key} .= $1; } : /^#, +(.*) *$/ ? do { # control variables $var{$_} = 1 for split(/,\s+/, $1); } : /^ *$/ && %var ? do { # interpolate string escapes push @ret, (map transform($_), @var{'msgid', 'msgstr'}) if length $var{msgstr} and !$var{fuzzy} or $UseFuzzy; push @metadata, parse_metadata($var{msgstr}) if $var{msgid} eq ''; %var = (); } : (); } push @ret, map { transform($_) } @var{'msgid', 'msgstr'} if length $var{msgstr}; push @metadata, parse_metadata($var{msgstr}) if $var{msgid} eq ''; return {@metadata, @ret}; } sub parse_metadata { return map { (/^([^\x00-\x1f\x80-\xff :=]+):\s*(.*)$/) ? ($1 eq 'Content-Type') ? do { my $enc = $2; if ($enc =~ /\bcharset=\s*([-\w]+)/i) { $InputEncoding = $1 || ''; $OutputEncoding = Locale::Maketext::Lexicon::encoding() || ''; $InputEncoding = 'utf8' if $InputEncoding =~ /^utf-?8$/i; $OutputEncoding = 'utf8' if $OutputEncoding =~ /^utf-?8$/i; if ( Locale::Maketext::Lexicon::option('decode') and (!$OutputEncoding or $InputEncoding ne $OutputEncoding)) { require Encode::compat if $] < 5.007001; require Encode; $DoEncoding = 1; } } ("__Content-Type", $enc); } : ("__$1", $2) : (); } split(/\r*\n+\r*/, transform(pop)); } sub transform { my $str = shift; if ($DoEncoding and $InputEncoding) { $str = ($InputEncoding eq 'utf8') ? Encode::decode_utf8($str) : Encode::decode($InputEncoding, $str) } $str =~ s/\\([0x]..|c?.)/qq{"\\$1"}/eeg; if ($DoEncoding and $OutputEncoding) { $str = ($OutputEncoding eq 'utf8') ? Encode::encode_utf8($str) : Encode::encode($OutputEncoding, $str) } $str =~ s/([~\[\]])/~$1/g; $str =~ s/(? 0; my ($num, $offo, $offt); # Number of strings $num = unpack $tmpl, substr($content, 8, 4); # Offset to the beginning of the original strings $offo = unpack $tmpl, substr($content, 12, 4); # Offset to the beginning of the translated strings $offt = unpack $tmpl, substr($content, 16, 4); my (@metadata, @ret); for (0 .. $num - 1) { my ($len, $off, $stro, $strt); # The first word is the length of the string $len = unpack $tmpl, substr($content, $offo+$_*8, 4); # The second word is the offset of the string $off = unpack $tmpl, substr($content, $offo+$_*8+4, 4); # Original string $stro = substr($content, $off, $len); # The first word is the length of the string $len = unpack $tmpl, substr($content, $offt+$_*8, 4); # The second word is the offset of the string $off = unpack $tmpl, substr($content, $offt+$_*8+4, 4); # Translated string $strt = substr($content, $off, $len); # Hash it push @metadata, parse_metadata($strt) if $stro eq ''; push @ret, (map transform($_), $stro, $strt) if length $strt; } return {@metadata, @ret}; } 1; =head1 SEE ALSO L, L =head1 AUTHORS Autrijus Tang Eautrijus@autrijus.orgE =head1 COPYRIGHT Copyright 2002, 2003, 2004 by Autrijus Tang Eautrijus@autrijus.orgE. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. See L =cut