diff options
author | Andreas Mair <amair.sob@googlemail.com> | 2006-10-27 12:00:31 +0200 |
---|---|---|
committer | Andreas Mair <amair.sob@googlemail.com> | 2006-10-27 12:00:31 +0200 |
commit | 24b39843b935f8a37b2d9dc909a530b771a61c1f (patch) | |
tree | 73869bd46144e37f32bdc6bff4f93453425f4951 /lib | |
parent | 86c56591df441fad78ac69d2f77196df05305758 (diff) | |
download | vdradmin-am-3.5.0beta.tar.gz vdradmin-am-3.5.0beta.tar.bz2 |
2006-10-27: 3.5.0betav3.5.0beta
- Added: script to convert existing AutoTimers to epgsearch (autotimer2searchtimer.pl).
- Added: Hack for MSIE to always show vertical scroller to prevent horizontal scroller (Submitted by Udo Richter).
- Added: Support for epgsearch plugin; AutoTimer now considered deprecated and unsupported.
- Minor bugfixes
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Parse/RecDescent.pm | 3045 | ||||
-rw-r--r-- | lib/Template/Plugin/HTML.pm | 197 | ||||
-rw-r--r-- | lib/Template/Plugin/JavaScript.pm | 73 | ||||
-rw-r--r-- | lib/Text/Balanced.pm | 2301 |
4 files changed, 270 insertions, 5346 deletions
diff --git a/lib/Parse/RecDescent.pm b/lib/Parse/RecDescent.pm deleted file mode 100644 index 35b9e9d..0000000 --- a/lib/Parse/RecDescent.pm +++ /dev/null @@ -1,3045 +0,0 @@ -# GENERATE RECURSIVE DESCENT PARSER OBJECTS FROM A GRAMMARC -# SEE RecDescent.pod FOR FULL DETAILS - -use 5.005; -use strict; - -package Parse::RecDescent; - -use Text::Balanced qw ( extract_codeblock extract_bracketed extract_quotelike extract_delimited ); - -use vars qw ( $skip ); - - *defskip = \ '\s*'; # DEFAULT SEPARATOR IS OPTIONAL WHITESPACE - $skip = '\s*'; # UNIVERSAL SEPARATOR IS OPTIONAL WHITESPACE -my $MAXREP = 100_000_000; # REPETITIONS MATCH AT MOST 100,000,000 TIMES - - -sub import # IMPLEMENT PRECOMPILER BEHAVIOUR UNDER: - # perl -MParse::RecDescent - <grammarfile> <classname> -{ - local *_die = sub { print @_, "\n"; exit }; - - my ($package, $file, $line) = caller; - if (substr($file,0,1) eq '-' && $line == 0) - { - _die("Usage: perl -MLocalTest - <grammarfile> <classname>") - unless @ARGV == 2; - - my ($sourcefile, $class) = @ARGV; - - local *IN; - open IN, $sourcefile - or _die("Can't open grammar file '$sourcefile'"); - - my $grammar = join '', <IN>; - - Parse::RecDescent->Precompile($grammar, $class, $sourcefile); - exit; - } -} - -sub Save -{ - my ($self, $class) = @_; - $self->{saving} = 1; - $self->Precompile(undef,$class); - $self->{saving} = 0; -} - -sub Precompile -{ - my ($self, $grammar, $class, $sourcefile) = @_; - - $class =~ /^(\w+::)*\w+$/ or croak("Bad class name: $class"); - - my $modulefile = $class; - $modulefile =~ s/.*:://; - $modulefile .= ".pm"; - - open OUT, ">$modulefile" - or croak("Can't write to new module file '$modulefile'"); - - print STDERR "precompiling grammar from file '$sourcefile'\n", - "to class $class in module file '$modulefile'\n" - if $grammar && $sourcefile; - - # local $::RD_HINT = 1; - $self = Parse::RecDescent->new($grammar,1,$class) - || croak("Can't compile bad grammar") - if $grammar; - - foreach ( keys %{$self->{rules}} ) - { $self->{rules}{$_}{changed} = 1 } - - print OUT "package $class;\nuse Parse::RecDescent;\n\n"; - - print OUT "{ my \$ERRORS;\n\n"; - - print OUT $self->_code(); - - print OUT "}\npackage $class; sub new { "; - print OUT "my "; - - require Data::Dumper; - print OUT Data::Dumper->Dump([$self], [qw(self)]); - - print OUT "}"; - - close OUT - or croak("Can't write to new module file '$modulefile'"); -} - - -package Parse::RecDescent::LineCounter; - - -sub TIESCALAR # ($classname, \$text, $thisparser, $prevflag) -{ - bless { - text => $_[1], - parser => $_[2], - prev => $_[3]?1:0, - }, $_[0]; -} - -my %counter_cache; - -sub FETCH -{ - my $parser = $_[0]->{parser}; - my $from = $parser->{fulltextlen}-length(${$_[0]->{text}})-$_[0]->{prev} -; - - unless (exists $counter_cache{$from}) { - $parser->{lastlinenum} = $parser->{offsetlinenum} - - Parse::RecDescent::_linecount(substr($parser->{fulltext},$from)) - + 1; - $counter_cache{$from} = $parser->{lastlinenum}; - } - return $counter_cache{$from}; -} - -sub STORE -{ - my $parser = $_[0]->{parser}; - $parser->{offsetlinenum} -= $parser->{lastlinenum} - $_[1]; - return undef; -} - -sub resync # ($linecounter) -{ - my $self = tied($_[0]); - die "Tried to alter something other than a LineCounter\n" - unless $self =~ /Parse::RecDescent::LineCounter/; - - my $parser = $self->{parser}; - my $apparently = $parser->{offsetlinenum} - - Parse::RecDescent::_linecount(${$self->{text}}) - + 1; - - $parser->{offsetlinenum} += $parser->{lastlinenum} - $apparently; - return 1; -} - -package Parse::RecDescent::ColCounter; - -sub TIESCALAR # ($classname, \$text, $thisparser, $prevflag) -{ - bless { - text => $_[1], - parser => $_[2], - prev => $_[3]?1:0, - }, $_[0]; -} - -sub FETCH -{ - my $parser = $_[0]->{parser}; - my $missing = $parser->{fulltextlen}-length(${$_[0]->{text}})-$_[0]->{prev}+1; - substr($parser->{fulltext},0,$missing) =~ m/^(.*)\Z/m; - return length($1); -} - -sub STORE -{ - die "Can't set column number via \$thiscolumn\n"; -} - - -package Parse::RecDescent::OffsetCounter; - -sub TIESCALAR # ($classname, \$text, $thisparser, $prev) -{ - bless { - text => $_[1], - parser => $_[2], - prev => $_[3]?-1:0, - }, $_[0]; -} - -sub FETCH -{ - my $parser = $_[0]->{parser}; - return $parser->{fulltextlen}-length(${$_[0]->{text}})+$_[0]->{prev}; -} - -sub STORE -{ - die "Can't set current offset via \$thisoffset or \$prevoffset\n"; -} - - - -package Parse::RecDescent::Rule; - -sub new ($$$$$) -{ - my $class = ref($_[0]) || $_[0]; - my $name = $_[1]; - my $owner = $_[2]; - my $line = $_[3]; - my $replace = $_[4]; - - if (defined $owner->{"rules"}{$name}) - { - my $self = $owner->{"rules"}{$name}; - if ($replace && !$self->{"changed"}) - { - $self->reset; - } - return $self; - } - else - { - return $owner->{"rules"}{$name} = - bless - { - "name" => $name, - "prods" => [], - "calls" => [], - "changed" => 0, - "line" => $line, - "impcount" => 0, - "opcount" => 0, - "vars" => "", - }, $class; - } -} - -sub reset($) -{ - @{$_[0]->{"prods"}} = (); - @{$_[0]->{"calls"}} = (); - $_[0]->{"changed"} = 0; - $_[0]->{"impcount"} = 0; - $_[0]->{"opcount"} = 0; - $_[0]->{"vars"} = ""; -} - -sub DESTROY {} - -sub hasleftmost($$) -{ - my ($self, $ref) = @_; - - my $prod; - foreach $prod ( @{$self->{"prods"}} ) - { - return 1 if $prod->hasleftmost($ref); - } - - return 0; -} - -sub leftmostsubrules($) -{ - my $self = shift; - my @subrules = (); - - my $prod; - foreach $prod ( @{$self->{"prods"}} ) - { - push @subrules, $prod->leftmostsubrule(); - } - - return @subrules; -} - -sub expected($) -{ - my $self = shift; - my @expected = (); - - my $prod; - foreach $prod ( @{$self->{"prods"}} ) - { - my $next = $prod->expected(); - unless (! $next or _contains($next,@expected) ) - { - push @expected, $next; - } - } - - return join ', or ', @expected; -} - -sub _contains($@) -{ - my $target = shift; - my $item; - foreach $item ( @_ ) { return 1 if $target eq $item; } - return 0; -} - -sub addcall($$) -{ - my ( $self, $subrule ) = @_; - unless ( _contains($subrule, @{$self->{"calls"}}) ) - { - push @{$self->{"calls"}}, $subrule; - } -} - -sub addprod($$) -{ - my ( $self, $prod ) = @_; - push @{$self->{"prods"}}, $prod; - $self->{"changed"} = 1; - $self->{"impcount"} = 0; - $self->{"opcount"} = 0; - $prod->{"number"} = $#{$self->{"prods"}}; - return $prod; -} - -sub addvar -{ - my ( $self, $var, $parser ) = @_; - if ($var =~ /\A\s*local\s+([%@\$]\w+)/) - { - $parser->{localvars} .= " $1"; - $self->{"vars"} .= "$var;\n" } - else - { $self->{"vars"} .= "my $var;\n" } - $self->{"changed"} = 1; - return 1; -} - -sub addautoscore -{ - my ( $self, $code ) = @_; - $self->{"autoscore"} = $code; - $self->{"changed"} = 1; - return 1; -} - -sub nextoperator($) -{ - my $self = shift; - my $prodcount = scalar @{$self->{"prods"}}; - my $opcount = ++$self->{"opcount"}; - return "_operator_${opcount}_of_production_${prodcount}_of_rule_$self->{name}"; -} - -sub nextimplicit($) -{ - my $self = shift; - my $prodcount = scalar @{$self->{"prods"}}; - my $impcount = ++$self->{"impcount"}; - return "_alternation_${impcount}_of_production_${prodcount}_of_rule_$self->{name}"; -} - - -sub code -{ - my ($self, $namespace, $parser) = @_; - -eval 'undef &' . $namespace . '::' . $self->{"name"} unless $parser->{saving}; - - my $code = -' -# ARGS ARE: ($parser, $text; $repeating, $_noactions, \@args) -sub ' . $namespace . '::' . $self->{"name"} . ' -{ - my $thisparser = $_[0]; - use vars q{$tracelevel}; - local $tracelevel = ($tracelevel||0)+1; - $ERRORS = 0; - my $thisrule = $thisparser->{"rules"}{"' . $self->{"name"} . '"}; - - Parse::RecDescent::_trace(q{Trying rule: [' . $self->{"name"} . ']}, - Parse::RecDescent::_tracefirst($_[1]), - q{' . $self->{"name"} . '}, - $tracelevel) - if defined $::RD_TRACE; - - ' . ($parser->{deferrable} - ? 'my $def_at = @{$thisparser->{deferred}};' - : '') . - ' - my $err_at = @{$thisparser->{errors}}; - - my $score; - my $score_return; - my $_tok; - my $return = undef; - my $_matched=0; - my $commit=0; - my @item = (); - my %item = (); - my $repeating = defined($_[2]) && $_[2]; - my $_noactions = defined($_[3]) && $_[3]; - my @arg = defined $_[4] ? @{ &{$_[4]} } : (); - my %arg = ($#arg & 01) ? @arg : (@arg, undef); - my $text; - my $lastsep=""; - my $expectation = new Parse::RecDescent::Expectation($thisrule->expected()); - $expectation->at($_[1]); - '. ($parser->{_check}{thisoffset}?' - my $thisoffset; - tie $thisoffset, q{Parse::RecDescent::OffsetCounter}, \$text, $thisparser; - ':'') . ($parser->{_check}{prevoffset}?' - my $prevoffset; - tie $prevoffset, q{Parse::RecDescent::OffsetCounter}, \$text, $thisparser, 1; - ':'') . ($parser->{_check}{thiscolumn}?' - my $thiscolumn; - tie $thiscolumn, q{Parse::RecDescent::ColCounter}, \$text, $thisparser; - ':'') . ($parser->{_check}{prevcolumn}?' - my $prevcolumn; - tie $prevcolumn, q{Parse::RecDescent::ColCounter}, \$text, $thisparser, 1; - ':'') . ($parser->{_check}{prevline}?' - my $prevline; - tie $prevline, q{Parse::RecDescent::LineCounter}, \$text, $thisparser, 1; - ':'') . ' - my $thisline; - tie $thisline, q{Parse::RecDescent::LineCounter}, \$text, $thisparser; - - '. $self->{vars} .' -'; - - my $prod; - foreach $prod ( @{$self->{"prods"}} ) - { - $prod->addscore($self->{autoscore},0,0) if $self->{autoscore}; - next unless $prod->checkleftmost(); - $code .= $prod->code($namespace,$self,$parser); - - $code .= $parser->{deferrable} - ? ' splice - @{$thisparser->{deferred}}, $def_at unless $_matched; - ' - : ''; - } - - $code .= -' - unless ( $_matched || defined($return) || defined($score) ) - { - ' .($parser->{deferrable} - ? ' splice @{$thisparser->{deferred}}, $def_at; - ' - : '') . ' - - $_[1] = $text; # NOT SURE THIS IS NEEDED - Parse::RecDescent::_trace(q{<<Didn\'t match rule>>}, - Parse::RecDescent::_tracefirst($_[1]), - q{' . $self->{"name"} .'}, - $tracelevel) - if defined $::RD_TRACE; - return undef; - } - if (!defined($return) && defined($score)) - { - Parse::RecDescent::_trace(q{>>Accepted scored production<<}, "", - q{' . $self->{"name"} .'}, - $tracelevel) - if defined $::RD_TRACE; - $return = $score_return; - } - splice @{$thisparser->{errors}}, $err_at; - $return = $item[$#item] unless defined $return; - if (defined $::RD_TRACE) - { - Parse::RecDescent::_trace(q{>>Matched rule<< (return value: [} . - $return . q{])}, "", - q{' . $self->{"name"} .'}, - $tracelevel); - Parse::RecDescent::_trace(q{(consumed: [} . - Parse::RecDescent::_tracemax(substr($_[1],0,-length($text))) . q{])}, - Parse::RecDescent::_tracefirst($text), - , q{' . $self->{"name"} .'}, - $tracelevel) - } - $_[1] = $text; - return $return; -} -'; - - return $code; -} - -my @left; -sub isleftrec($$) -{ - my ($self, $rules) = @_; - my $root = $self->{"name"}; - @left = $self->leftmostsubrules(); - my $next; - foreach $next ( @left ) - { - next unless defined $rules->{$next}; # SKIP NON-EXISTENT RULES - return 1 if $next eq $root; - my $child; - foreach $child ( $rules->{$next}->leftmostsubrules() ) - { - push(@left, $child) - if ! _contains($child, @left) ; - } - } - return 0; -} - -package Parse::RecDescent::Production; - -sub describe ($;$) -{ - return join ' ', map { $_->describe($_[1]) or () } @{$_[0]->{items}}; -} - -sub new ($$;$$) -{ - my ($self, $line, $uncommit, $error) = @_; - my $class = ref($self) || $self; - - bless - { - "items" => [], - "uncommit" => $uncommit, - "error" => $error, - "line" => $line, - strcount => 0, - patcount => 0, - dircount => 0, - actcount => 0, - }, $class; -} - -sub expected ($) -{ - my $itemcount = scalar @{$_[0]->{"items"}}; - return ($itemcount) ? $_[0]->{"items"}[0]->describe(1) : ''; -} - -sub hasleftmost ($$) -{ - my ($self, $ref) = @_; - return ${$self->{"items"}}[0] eq $ref if scalar @{$self->{"items"}}; - return 0; -} - -sub leftmostsubrule($) -{ - my $self = shift; - - if ( $#{$self->{"items"}} >= 0 ) - { - my $subrule = $self->{"items"}[0]->issubrule(); - return $subrule if defined $subrule; - } - - return (); -} - -sub checkleftmost($) -{ - my @items = @{$_[0]->{"items"}}; - if (@items==1 && ref($items[0]) =~ /\AParse::RecDescent::Error/ - && $items[0]->{commitonly} ) - { - Parse::RecDescent::_warn(2,"Lone <error?> in production treated - as <error?> <reject>"); - Parse::RecDescent::_hint("A production consisting of a single - conditional <error?> directive would - normally succeed (with the value zero) if the - rule is not 'commited' when it is - tried. Since you almost certainly wanted - '<error?> <reject>' Parse::RecDescent - supplied it for you."); - push @{$_[0]->{items}}, - Parse::RecDescent::UncondReject->new(0,0,'<reject>'); - } - elsif (@items==1 && ($items[0]->describe||"") =~ /<rulevar|<autoscore/) - { - # Do nothing - } - elsif (@items && - ( ref($items[0]) =~ /\AParse::RecDescent::UncondReject/ - || ($items[0]->describe||"") =~ /<autoscore/ - )) - { - Parse::RecDescent::_warn(1,"Optimizing away production: [". $_[0]->describe ."]"); - my $what = $items[0]->describe =~ /<rulevar/ - ? "a <rulevar> (which acts like an unconditional <reject> during parsing)" - : $items[0]->describe =~ /<autoscore/ - ? "an <autoscore> (which acts like an unconditional <reject> during parsing)" - : "an unconditional <reject>"; - my $caveat = $items[0]->describe =~ /<rulevar/ - ? " after the specified variable was set up" - : ""; - my $advice = @items > 1 - ? "However, there were also other (useless) items after the leading " - . $items[0]->describe - . ", so you may have been expecting some other behaviour." - : "You can safely ignore this message."; - Parse::RecDescent::_hint("The production starts with $what. That means that the - production can never successfully match, so it was - optimized out of the final parser$caveat. $advice"); - return 0; - } - return 1; -} - -sub changesskip($) -{ - my $item; - foreach $item (@{$_[0]->{"items"}}) - { - if (ref($item) =~ /Parse::RecDescent::(Action|Directive)/) - { - return 1 if $item->{code} =~ /\$skip/; - } - } - return 0; -} - -sub adddirective -{ - my ( $self, $whichop, $line, $name ) = @_; - push @{$self->{op}}, - { type=>$whichop, line=>$line, name=>$name, - offset=> scalar(@{$self->{items}}) }; -} - -sub addscore -{ - my ( $self, $code, $lookahead, $line ) = @_; - $self->additem(Parse::RecDescent::Directive->new( - "local \$^W; - my \$thisscore = do { $code } + 0; - if (!defined(\$score) || \$thisscore>\$score) - { \$score=\$thisscore; \$score_return=\$item[-1]; } - undef;", $lookahead, $line,"<score: $code>") ) - unless $self->{items}[-1]->describe =~ /<score/; - return 1; -} - -sub check_pending -{ - my ( $self, $line ) = @_; - if ($self->{op}) - { - while (my $next = pop @{$self->{op}}) - { - Parse::RecDescent::_error("Incomplete <$next->{type}op:...>.", $line); - Parse::RecDescent::_hint( - "The current production ended without completing the - <$next->{type}op:...> directive that started near line - $next->{line}. Did you forget the closing '>'?"); - } - } - return 1; -} - -sub enddirective -{ - my ( $self, $line, $minrep, $maxrep ) = @_; - unless ($self->{op}) - { - Parse::RecDescent::_error("Unmatched > found.", $line); - Parse::RecDescent::_hint( - "A '>' angle bracket was encountered, which typically - indicates the end of a directive. However no suitable - preceding directive was encountered. Typically this - indicates either a extra '>' in the grammar, or a - problem inside the previous directive."); - return; - } - my $op = pop @{$self->{op}}; - my $span = @{$self->{items}} - $op->{offset}; - if ($op->{type} =~ /left|right/) - { - if ($span != 3) - { - Parse::RecDescent::_error( - "Incorrect <$op->{type}op:...> specification: - expected 3 args, but found $span instead", $line); - Parse::RecDescent::_hint( - "The <$op->{type}op:...> directive requires a - sequence of exactly three elements. For example: - <$op->{type}op:leftarg /op/ rightarg>"); - } - else - { - push @{$self->{items}}, - Parse::RecDescent::Operator->new( - $op->{type}, $minrep, $maxrep, splice(@{$self->{"items"}}, -3)); - $self->{items}[-1]->sethashname($self); - $self->{items}[-1]{name} = $op->{name}; - } - } -} - -sub prevwasreturn -{ - my ( $self, $line ) = @_; - unless (@{$self->{items}}) - { - Parse::RecDescent::_error( - "Incorrect <return:...> specification: - expected item missing", $line); - Parse::RecDescent::_hint( - "The <return:...> directive requires a - sequence of at least one item. For example: - <return: list>"); - return; - } - push @{$self->{items}}, - Parse::RecDescent::Result->new(); -} - -sub additem -{ - my ( $self, $item ) = @_; - $item->sethashname($self); - push @{$self->{"items"}}, $item; - return $item; -} - - -sub preitempos -{ - return q - { - push @itempos, {'offset' => {'from'=>$thisoffset, 'to'=>undef}, - 'line' => {'from'=>$thisline, 'to'=>undef}, - 'column' => {'from'=>$thiscolumn, 'to'=>undef} }; - } -} - -sub incitempos -{ - return q - { - $itempos[$#itempos]{'offset'}{'from'} += length($1); - $itempos[$#itempos]{'line'}{'from'} = $thisline; - $itempos[$#itempos]{'column'}{'from'} = $thiscolumn; - } -} - -sub postitempos -{ - return q - { - $itempos[$#itempos]{'offset'}{'to'} = $prevoffset; - $itempos[$#itempos]{'line'}{'to'} = $prevline; - $itempos[$#itempos]{'column'}{'to'} = $prevcolumn; - } -} - -sub code($$$$) -{ - my ($self,$namespace,$rule,$parser) = @_; - my $code = -' - while (!$_matched' - . (defined $self->{"uncommit"} ? '' : ' && !$commit') - . ') - { - ' . - ($self->changesskip() - ? 'local $skip = defined($skip) ? $skip : $Parse::RecDescent::skip;' - : '') .' - Parse::RecDescent::_trace(q{Trying production: [' - . $self->describe . ']}, - Parse::RecDescent::_tracefirst($_[1]), - q{' . $rule ->{name}. '}, - $tracelevel) - if defined $::RD_TRACE; - my $thisprod = $thisrule->{"prods"}[' . $self->{"number"} . ']; - ' . (defined $self->{"error"} ? '' : '$text = $_[1];' ) . ' - my $_savetext; - @item = (q{' . $rule->{"name"} . '}); - %item = (__RULE__ => q{' . $rule->{"name"} . '}); - my $repcount = 0; - -'; - $code .= -' my @itempos = ({}); -' if $parser->{_check}{itempos}; - - my $item; - my $i; - - for ($i = 0; $i < @{$self->{"items"}}; $i++) - { - $item = ${$self->{items}}[$i]; - - $code .= preitempos() if $parser->{_check}{itempos}; - - $code .= $item->code($namespace,$rule,$parser->{_check}); - - $code .= postitempos() if $parser->{_check}{itempos}; - - } - - if ($parser->{_AUTOACTION} && defined($item) && !$item->isa("Parse::RecDescent::Action")) - { - $code .= $parser->{_AUTOACTION}->code($namespace,$rule); - Parse::RecDescent::_warn(1,"Autogenerating action in rule - \"$rule->{name}\": - $parser->{_AUTOACTION}{code}") - and - Parse::RecDescent::_hint("The \$::RD_AUTOACTION was defined, - so any production not ending in an - explicit action has the specified - \"auto-action\" automatically - appended."); - } - elsif ($parser->{_AUTOTREE} && defined($item) && !$item->isa("Parse::RecDescent::Action")) - { - if ($i==1 && $item->isterminal) - { - $code .= $parser->{_AUTOTREE}{TERMINAL}->code($namespace,$rule); - } - else - { - $code .= $parser->{_AUTOTREE}{NODE}->code($namespace,$rule); - } - Parse::RecDescent::_warn(1,"Autogenerating tree-building action in rule - \"$rule->{name}\"") - and - Parse::RecDescent::_hint("The directive <autotree> was specified, - so any production not ending - in an explicit action has - some parse-tree building code - automatically appended."); - } - - $code .= -' - - Parse::RecDescent::_trace(q{>>Matched production: [' - . $self->describe . ']<<}, - Parse::RecDescent::_tracefirst($text), - q{' . $rule->{name} . '}, - $tracelevel) - if defined $::RD_TRACE; - $_matched = 1; - last; - } - -'; - return $code; -} - -1; - -package Parse::RecDescent::Action; - -sub describe { undef } - -sub sethashname { $_[0]->{hashname} = '__ACTION' . ++$_[1]->{actcount} .'__'; } - -sub new -{ - my $class = ref($_[0]) || $_[0]; - bless - { - "code" => $_[1], - "lookahead" => $_[2], - "line" => $_[3], - }, $class; -} - -sub issubrule { undef } -sub isterminal { 0 } - -sub code($$$$) -{ - my ($self, $namespace, $rule) = @_; - -' - Parse::RecDescent::_trace(q{Trying action}, - Parse::RecDescent::_tracefirst($text), - q{' . $rule->{name} . '}, - $tracelevel) - if defined $::RD_TRACE; - ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' - - $_tok = ($_noactions) ? 0 : do ' . $self->{"code"} . '; - ' . ($self->{"lookahead"}<0?'if':'unless') . ' (defined $_tok) - { - Parse::RecDescent::_trace(q{<<Didn\'t match action>> (return value: [undef])}) - if defined $::RD_TRACE; - last; - } - Parse::RecDescent::_trace(q{>>Matched action<< (return value: [} - . $_tok . q{])}, - Parse::RecDescent::_tracefirst($text)) - if defined $::RD_TRACE; - push @item, $_tok; - ' . ($self->{line}>=0 ? '$item{'. $self->{hashname} .'}=$_tok;' : '' ) .' - ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' -' -} - - -1; - -package Parse::RecDescent::Directive; - -sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } - -sub issubrule { undef } -sub isterminal { 0 } -sub describe { $_[1] ? '' : $_[0]->{name} } - -sub new ($$$$$) -{ - my $class = ref($_[0]) || $_[0]; - bless - { - "code" => $_[1], - "lookahead" => $_[2], - "line" => $_[3], - "name" => $_[4], - }, $class; -} - -sub code($$$$) -{ - my ($self, $namespace, $rule) = @_; - -' - ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' - - Parse::RecDescent::_trace(q{Trying directive: [' - . $self->describe . ']}, - Parse::RecDescent::_tracefirst($text), - q{' . $rule->{name} . '}, - $tracelevel) - if defined $::RD_TRACE; ' .' - $_tok = do { ' . $self->{"code"} . ' }; - if (defined($_tok)) - { - Parse::RecDescent::_trace(q{>>Matched directive<< (return value: [} - . $_tok . q{])}, - Parse::RecDescent::_tracefirst($text)) - if defined $::RD_TRACE; - } - else - { - Parse::RecDescent::_trace(q{<<Didn\'t match directive>>}, - Parse::RecDescent::_tracefirst($text)) - if defined $::RD_TRACE; - } - ' . ($self->{"lookahead"} ? '$text = $_savetext and ' : '' ) .' - last ' - . ($self->{"lookahead"}<0?'if':'unless') . ' defined $_tok; - push @item, $item{'.$self->{hashname}.'}=$_tok; - ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' -' -} - -1; - -package Parse::RecDescent::UncondReject; - -sub issubrule { undef } -sub isterminal { 0 } -sub describe { $_[1] ? '' : $_[0]->{name} } -sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } - -sub new ($$$;$) -{ - my $class = ref($_[0]) || $_[0]; - bless - { - "lookahead" => $_[1], - "line" => $_[2], - "name" => $_[3], - }, $class; -} - -# MARK, YOU MAY WANT TO OPTIMIZE THIS. - - -sub code($$$$) -{ - my ($self, $namespace, $rule) = @_; - -' - Parse::RecDescent::_trace(q{>>Rejecting production<< (found ' - . $self->describe . ')}, - Parse::RecDescent::_tracefirst($text), - q{' . $rule->{name} . '}, - $tracelevel) - if defined $::RD_TRACE; - undef $return; - ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' - - $_tok = undef; - ' . ($self->{"lookahead"} ? '$text = $_savetext and ' : '' ) .' - last ' - . ($self->{"lookahead"}<0?'if':'unless') . ' defined $_tok; -' -} - -1; - -package Parse::RecDescent::Error; - -sub issubrule { undef } -sub isterminal { 0 } -sub describe { $_[1] ? '' : $_[0]->{commitonly} ? '<error?:...>' : '<error...>' } -sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } - -sub new ($$$$$) -{ - my $class = ref($_[0]) || $_[0]; - bless - { - "msg" => $_[1], - "lookahead" => $_[2], - "commitonly" => $_[3], - "line" => $_[4], - }, $class; -} - -sub code($$$$) -{ - my ($self, $namespace, $rule) = @_; - - my $action = ''; - - if ($self->{"msg"}) # ERROR MESSAGE SUPPLIED - { - #WAS: $action .= "Parse::RecDescent::_error(qq{$self->{msg}}" . ',$thisline);'; - $action .= 'push @{$thisparser->{errors}}, [qq{'.$self->{msg}.'},$thisline];'; - - } - else # GENERATE ERROR MESSAGE DURING PARSE - { - $action .= ' - my $rule = $item[0]; - $rule =~ s/_/ /g; - #WAS: Parse::RecDescent::_error("Invalid $rule: " . $expectation->message() ,$thisline); - push @{$thisparser->{errors}}, ["Invalid $rule: " . $expectation->message() ,$thisline]; - '; - } - - my $dir = - new Parse::RecDescent::Directive('if (' . - ($self->{"commitonly"} ? '$commit' : '1') . - ") { do {$action} unless ".' $_noactions; undef } else {0}', - $self->{"lookahead"},0,$self->describe); - $dir->{hashname} = $self->{hashname}; - return $dir->code($namespace, $rule, 0); -} - -1; - -package Parse::RecDescent::Token; - -sub sethashname { $_[0]->{hashname} = '__PATTERN' . ++$_[1]->{patcount} . '__'; } - -sub issubrule { undef } -sub isterminal { 1 } -sub describe ($) { shift->{'description'}} - - -# ARGS ARE: $self, $pattern, $left_delim, $modifiers, $lookahead, $linenum -sub new ($$$$$$) -{ - my $class = ref($_[0]) || $_[0]; - my $pattern = $_[1]; - my $pat = $_[1]; - my $ldel = $_[2]; - my $rdel = $ldel; - $rdel =~ tr/{[(</}])>/; - - my $mod = $_[3]; - - my $desc; - - if ($ldel eq '/') { $desc = "$ldel$pattern$rdel$mod" } - else { $desc = "m$ldel$pattern$rdel$mod" } - $desc =~ s/\\/\\\\/g; - $desc =~ s/\$$/\\\$/g; - $desc =~ s/}/\\}/g; - $desc =~ s/{/\\{/g; - - if (!eval "no strict; - local \$SIG{__WARN__} = sub {0}; - '' =~ m$ldel$pattern$rdel" and $@) - { - Parse::RecDescent::_warn(3, "Token pattern \"m$ldel$pattern$rdel\" - may not be a valid regular expression", - $_[5]); - $@ =~ s/ at \(eval.*/./; - Parse::RecDescent::_hint($@); - } - - # QUIETLY PREVENT (WELL-INTENTIONED) CALAMITY - $mod =~ s/[gc]//g; - $pattern =~ s/(\A|[^\\])\\G/$1/g; - - bless - { - "pattern" => $pattern, - "ldelim" => $ldel, - "rdelim" => $rdel, - "mod" => $mod, - "lookahead" => $_[4], - "line" => $_[5], - "description" => $desc, - }, $class; -} - - -sub code($$$$) -{ - my ($self, $namespace, $rule, $check) = @_; - my $ldel = $self->{"ldelim"}; - my $rdel = $self->{"rdelim"}; - my $sdel = $ldel; - my $mod = $self->{"mod"}; - - $sdel =~ s/[[{(<]/{}/; - -my $code = ' - Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe - . ']}, Parse::RecDescent::_tracefirst($text), - q{' . $rule->{name} . '}, - $tracelevel) - if defined $::RD_TRACE; - $lastsep = ""; - $expectation->is(q{' . ($rule->hasleftmost($self) ? '' - : $self->describe ) . '})->at($text); - ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . ' - - ' . ($self->{"lookahead"}<0?'if':'unless') - . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and ' - . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '') - . ' $text =~ s' . $ldel . '\A(?:' . $self->{"pattern"} . ')' - . $rdel . $sdel . $mod . ') - { - '.($self->{"lookahead"} ? '$text = $_savetext;' : '').' - $expectation->failed(); - Parse::RecDescent::_trace(q{<<Didn\'t match terminal>>}, - Parse::RecDescent::_tracefirst($text)) - if defined $::RD_TRACE; - - last; - } - Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [} - . $& . q{])}, - Parse::RecDescent::_tracefirst($text)) - if defined $::RD_TRACE; - push @item, $item{'.$self->{hashname}.'}=$&; - ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' -'; - - return $code; -} - -1; - -package Parse::RecDescent::Literal; - -sub sethashname { $_[0]->{hashname} = '__STRING' . ++$_[1]->{strcount} . '__'; } - -sub issubrule { undef } -sub isterminal { 1 } -sub describe ($) { shift->{'description'} } - -sub new ($$$$) -{ - my $class = ref($_[0]) || $_[0]; - - my $pattern = $_[1]; - - my $desc = $pattern; - $desc=~s/\\/\\\\/g; - $desc=~s/}/\\}/g; - $desc=~s/{/\\{/g; - - bless - { - "pattern" => $pattern, - "lookahead" => $_[2], - "line" => $_[3], - "description" => "'$desc'", - }, $class; -} - - -sub code($$$$) -{ - my ($self, $namespace, $rule, $check) = @_; - -my $code = ' - Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe - . ']}, - Parse::RecDescent::_tracefirst($text), - q{' . $rule->{name} . '}, - $tracelevel) - if defined $::RD_TRACE; - $lastsep = ""; - $expectation->is(q{' . ($rule->hasleftmost($self) ? '' - : $self->describe ) . '})->at($text); - ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . ' - - ' . ($self->{"lookahead"}<0?'if':'unless') - . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and ' - . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '') - . ' $text =~ s/\A' . quotemeta($self->{"pattern"}) . '//) - { - '.($self->{"lookahead"} ? '$text = $_savetext;' : '').' - $expectation->failed(); - Parse::RecDescent::_trace(qq{<<Didn\'t match terminal>>}, - Parse::RecDescent::_tracefirst($text)) - if defined $::RD_TRACE; - last; - } - Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [} - . $& . q{])}, - Parse::RecDescent::_tracefirst($text)) - if defined $::RD_TRACE; - push @item, $item{'.$self->{hashname}.'}=$&; - ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' -'; - - return $code; -} - -1; - -package Parse::RecDescent::InterpLit; - -sub sethashname { $_[0]->{hashname} = '__STRING' . ++$_[1]->{strcount} . '__'; } - -sub issubrule { undef } -sub isterminal { 1 } -sub describe ($) { shift->{'description'} } - -sub new ($$$$) -{ - my $class = ref($_[0]) || $_[0]; - - my $pattern = $_[1]; - $pattern =~ s#/#\\/#g; - - my $desc = $pattern; - $desc=~s/\\/\\\\/g; - $desc=~s/}/\\}/g; - $desc=~s/{/\\{/g; - - bless - { - "pattern" => $pattern, - "lookahead" => $_[2], - "line" => $_[3], - "description" => "'$desc'", - }, $class; -} - -sub code($$$$) -{ - my ($self, $namespace, $rule, $check) = @_; - -my $code = ' - Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe - . ']}, - Parse::RecDescent::_tracefirst($text), - q{' . $rule->{name} . '}, - $tracelevel) - if defined $::RD_TRACE; - $lastsep = ""; - $expectation->is(q{' . ($rule->hasleftmost($self) ? '' - : $self->describe ) . '})->at($text); - ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . ' - - ' . ($self->{"lookahead"}<0?'if':'unless') - . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and ' - . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '') - . ' do { $_tok = "' . $self->{"pattern"} . '"; 1 } and - substr($text,0,length($_tok)) eq $_tok and - do { substr($text,0,length($_tok)) = ""; 1; } - ) - { - '.($self->{"lookahead"} ? '$text = $_savetext;' : '').' - $expectation->failed(); - Parse::RecDescent::_trace(q{<<Didn\'t match terminal>>}, - Parse::RecDescent::_tracefirst($text)) - if defined $::RD_TRACE; - last; - } - Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [} - . $_tok . q{])}, - Parse::RecDescent::_tracefirst($text)) - if defined $::RD_TRACE; - push @item, $item{'.$self->{hashname}.'}=$_tok; - ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' -'; - - return $code; -} - -1; - -package Parse::RecDescent::Subrule; - -sub issubrule ($) { return $_[0]->{"subrule"} } -sub isterminal { 0 } -sub sethashname {} - -sub describe ($) -{ - my $desc = $_[0]->{"implicit"} || $_[0]->{"subrule"}; - $desc = "<matchrule:$desc>" if $_[0]->{"matchrule"}; - return $desc; -} - -sub callsyntax($$) -{ - if ($_[0]->{"matchrule"}) - { - return "&{'$_[1]'.qq{$_[0]->{subrule}}}"; - } - else - { - return $_[1].$_[0]->{"subrule"}; - } -} - -sub new ($$$$;$$$) -{ - my $class = ref($_[0]) || $_[0]; - bless - { - "subrule" => $_[1], - "lookahead" => $_[2], - "line" => $_[3], - "implicit" => $_[4] || undef, - "matchrule" => $_[5], - "argcode" => $_[6] || undef, - }, $class; -} - - -sub code($$$$) -{ - my ($self, $namespace, $rule) = @_; - -' - Parse::RecDescent::_trace(q{Trying subrule: [' . $self->{"subrule"} . ']}, - Parse::RecDescent::_tracefirst($text), - q{' . $rule->{"name"} . '}, - $tracelevel) - if defined $::RD_TRACE; - if (1) { no strict qw{refs}; - $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}' - # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text); - : 'q{'.$self->describe.'}' ) . ')->at($text); - ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) - . ($self->{"lookahead"}<0?'if':'unless') - . ' (defined ($_tok = ' - . $self->callsyntax($namespace.'::') - . '($thisparser,$text,$repeating,' - . ($self->{"lookahead"}?'1':'$_noactions') - . ($self->{argcode} ? ",sub { return $self->{argcode} }" - : ',sub { \\@arg }') - . '))) - { - '.($self->{"lookahead"} ? '$text = $_savetext;' : '').' - Parse::RecDescent::_trace(q{<<Didn\'t match subrule: [' - . $self->{subrule} . ']>>}, - Parse::RecDescent::_tracefirst($text), - q{' . $rule->{"name"} .'}, - $tracelevel) - if defined $::RD_TRACE; - $expectation->failed(); - last; - } - Parse::RecDescent::_trace(q{>>Matched subrule: [' - . $self->{subrule} . ']<< (return value: [} - . $_tok . q{]}, - - Parse::RecDescent::_tracefirst($text), - q{' . $rule->{"name"} .'}, - $tracelevel) - if defined $::RD_TRACE; - $item{q{' . $self->{subrule} . '}} = $_tok; - push @item, $_tok; - ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' - } -' -} - -package Parse::RecDescent::Repetition; - -sub issubrule ($) { return $_[0]->{"subrule"} } -sub isterminal { 0 } -sub sethashname { } - -sub describe ($) -{ - my $desc = $_[0]->{"expected"} || $_[0]->{"subrule"}; - $desc = "<matchrule:$desc>" if $_[0]->{"matchrule"}; - return $desc; -} - -sub callsyntax($$) -{ - if ($_[0]->{matchrule}) - { return "sub { goto &{''.qq{$_[1]$_[0]->{subrule}}} }"; } - else - { return "\\&$_[1]$_[0]->{subrule}"; } -} - -sub new ($$$$$$$$$$) -{ - my ($self, $subrule, $repspec, $min, $max, $lookahead, $line, $parser, $matchrule, $argcode) = @_; - my $class = ref($self) || $self; - ($max, $min) = ( $min, $max) if ($max<$min); - - my $desc; - if ($subrule=~/\A_alternation_\d+_of_production_\d+_of_rule/) - { $desc = $parser->{"rules"}{$subrule}->expected } - - if ($lookahead) - { - if ($min>0) - { - return new Parse::RecDescent::Subrule($subrule,$lookahead,$line,$desc,$matchrule,$argcode); - } - else - { - Parse::RecDescent::_error("Not symbol (\"!\") before - \"$subrule\" doesn't make - sense.",$line); - Parse::RecDescent::_hint("Lookahead for negated optional - repetitions (such as - \"!$subrule($repspec)\" can never - succeed, since optional items always - match (zero times at worst). - Did you mean a single \"!$subrule\", - instead?"); - } - } - bless - { - "subrule" => $subrule, - "repspec" => $repspec, - "min" => $min, - "max" => $max, - "lookahead" => $lookahead, - "line" => $line, - "expected" => $desc, - "argcode" => $argcode || undef, - "matchrule" => $matchrule, - }, $class; -} - -sub code($$$$) -{ - my ($self, $namespace, $rule) = @_; - - my ($subrule, $repspec, $min, $max, $lookahead) = - @{$self}{ qw{subrule repspec min max lookahead} }; - -' - Parse::RecDescent::_trace(q{Trying repeated subrule: [' . $self->describe . ']}, - Parse::RecDescent::_tracefirst($text), - q{' . $rule->{"name"} . '}, - $tracelevel) - if defined $::RD_TRACE; - $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}' - # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text); - : 'q{'.$self->describe.'}' ) . ')->at($text); - ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' - unless (defined ($_tok = $thisparser->_parserepeat($text, ' - . $self->callsyntax($namespace.'::') - . ', ' . $min . ', ' . $max . ', ' - . ($self->{"lookahead"}?'1':'$_noactions') - . ',$expectation,' - . ($self->{argcode} ? "sub { return $self->{argcode} }" - : 'undef') - . '))) - { - Parse::RecDescent::_trace(q{<<Didn\'t match repeated subrule: [' - . $self->describe . ']>>}, - Parse::RecDescent::_tracefirst($text), - q{' . $rule->{"name"} .'}, - $tracelevel) - if defined $::RD_TRACE; - last; - } - Parse::RecDescent::_trace(q{>>Matched repeated subrule: [' - . $self->{subrule} . ']<< (} - . @$_tok . q{ times)}, - - Parse::RecDescent::_tracefirst($text), - q{' . $rule->{"name"} .'}, - $tracelevel) - if defined $::RD_TRACE; - $item{q{' . "$self->{subrule}($self->{repspec})" . '}} = $_tok; - push @item, $_tok; - ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' - -' -} - -package Parse::RecDescent::Result; - -sub issubrule { 0 } -sub isterminal { 0 } -sub describe { '' } - -sub new -{ - my ($class, $pos) = @_; - - bless {}, $class; -} - -sub code($$$$) -{ - my ($self, $namespace, $rule) = @_; - - ' - $return = $item[-1]; - '; -} - -package Parse::RecDescent::Operator; - -my @opertype = ( " non-optional", "n optional" ); - -sub issubrule { 0 } -sub isterminal { 0 } - -sub describe { $_[0]->{"expected"} } -sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } - - -sub new -{ - my ($class, $type, $minrep, $maxrep, $leftarg, $op, $rightarg) = @_; - - bless - { - "type" => "${type}op", - "leftarg" => $leftarg, - "op" => $op, - "min" => $minrep, - "max" => $maxrep, - "rightarg" => $rightarg, - "expected" => "<${type}op: ".$leftarg->describe." ".$op->describe." ".$rightarg->describe.">", - }, $class; -} - -sub code($$$$) -{ - my ($self, $namespace, $rule) = @_; - - my ($leftarg, $op, $rightarg) = - @{$self}{ qw{leftarg op rightarg} }; - - my $code = ' - Parse::RecDescent::_trace(q{Trying operator: [' . $self->describe . ']}, - Parse::RecDescent::_tracefirst($text), - q{' . $rule->{"name"} . '}, - $tracelevel) - if defined $::RD_TRACE; - $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}' - # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text); - : 'q{'.$self->describe.'}' ) . ')->at($text); - - $_tok = undef; - OPLOOP: while (1) - { - $repcount = 0; - my @item; - '; - - if ($self->{type} eq "leftop" ) - { - $code .= ' - # MATCH LEFTARG - ' . $leftarg->code(@_[1..2]) . ' - - $repcount++; - - my $savetext = $text; - my $backtrack; - - # MATCH (OP RIGHTARG)(s) - while ($repcount < ' . $self->{max} . ') - { - $backtrack = 0; - ' . $op->code(@_[1..2]) . ' - ' . ($op->isterminal() ? 'pop @item;' : '$backtrack=1;' ) . ' - ' . (ref($op) eq 'Parse::RecDescent::Token' - ? 'if (defined $1) {push @item, $item{'.($self->{name}||$self->{hashname}).'}=$1; $backtrack=1;}' - : "" ) . ' - ' . $rightarg->code(@_[1..2]) . ' - $savetext = $text; - $repcount++; - } - $text = $savetext; - pop @item if $backtrack; - - '; - } - else - { - $code .= ' - my $savetext = $text; - my $backtrack; - # MATCH (LEFTARG OP)(s) - while ($repcount < ' . $self->{max} . ') - { - $backtrack = 0; - ' . $leftarg->code(@_[1..2]) . ' - $repcount++; - $backtrack = 1; - ' . $op->code(@_[1..2]) . ' - $savetext = $text; - ' . ($op->isterminal() ? 'pop @item;' : "" ) . ' - ' . (ref($op) eq 'Parse::RecDescent::Token' ? 'do { push @item, $item{'.($self->{name}||$self->{hashname}).'}=$1; } if defined $1;' : "" ) . ' - } - $text = $savetext; - pop @item if $backtrack; - - # MATCH RIGHTARG - ' . $rightarg->code(@_[1..2]) . ' - $repcount++; - '; - } - - $code .= 'unless (@item) { undef $_tok; last }' unless $self->{min}==0; - - $code .= ' - $_tok = [ @item ]; - last; - } - - unless ($repcount>='.$self->{min}.') - { - Parse::RecDescent::_trace(q{<<Didn\'t match operator: [' - . $self->describe - . ']>>}, - Parse::RecDescent::_tracefirst($text), - q{' . $rule->{"name"} .'}, - $tracelevel) - if defined $::RD_TRACE; - $expectation->failed(); - last; - } - Parse::RecDescent::_trace(q{>>Matched operator: [' - . $self->describe - . ']<< (return value: [} - . qq{@{$_tok||[]}} . q{]}, - Parse::RecDescent::_tracefirst($text), - q{' . $rule->{"name"} .'}, - $tracelevel) - if defined $::RD_TRACE; - - push @item, $item{'.($self->{name}||$self->{hashname}).'}=$_tok||[]; - -'; - return $code; -} - - -package Parse::RecDescent::Expectation; - -sub new ($) -{ - bless { - "failed" => 0, - "expected" => "", - "unexpected" => "", - "lastexpected" => "", - "lastunexpected" => "", - "defexpected" => $_[1], - }; -} - -sub is ($$) -{ - $_[0]->{lastexpected} = $_[1]; return $_[0]; -} - -sub at ($$) -{ - $_[0]->{lastunexpected} = $_[1]; return $_[0]; -} - -sub failed ($) -{ - return unless $_[0]->{lastexpected}; - $_[0]->{expected} = $_[0]->{lastexpected} unless $_[0]->{failed}; - $_[0]->{unexpected} = $_[0]->{lastunexpected} unless $_[0]->{failed}; - $_[0]->{failed} = 1; -} - -sub message ($) -{ - my ($self) = @_; - $self->{expected} = $self->{defexpected} unless $self->{expected}; - $self->{expected} =~ s/_/ /g; - if (!$self->{unexpected} || $self->{unexpected} =~ /\A\s*\Z/s) - { - return "Was expecting $self->{expected}"; - } - else - { - $self->{unexpected} =~ /\s*(.*)/; - return "Was expecting $self->{expected} but found \"$1\" instead"; - } -} - -1; - -package Parse::RecDescent; - -use Carp; -use vars qw ( $AUTOLOAD $VERSION ); - -my $ERRORS = 0; - -$VERSION = '1.94'; - -# BUILDING A PARSER - -my $nextnamespace = "namespace000001"; - -sub _nextnamespace() -{ - return "Parse::RecDescent::" . $nextnamespace++; -} - -sub new ($$$) -{ - my $class = ref($_[0]) || $_[0]; - local $Parse::RecDescent::compiling = $_[2]; - my $name_space_name = defined $_[3] - ? "Parse::RecDescent::".$_[3] - : _nextnamespace(); - my $self = - { - "rules" => {}, - "namespace" => $name_space_name, - "startcode" => '', - "localvars" => '', - "_AUTOACTION" => undef, - "_AUTOTREE" => undef, - }; - if ($::RD_AUTOACTION) - { - my $sourcecode = $::RD_AUTOACTION; - $sourcecode = "{ $sourcecode }" - unless $sourcecode =~ /\A\s*\{.*\}\s*\Z/; - $self->{_check}{itempos} = - $sourcecode =~ /\@itempos\b|\$itempos\s*\[/; - $self->{_AUTOACTION} - = new Parse::RecDescent::Action($sourcecode,0,-1) - } - - bless $self, $class; - shift; - return $self->Replace(@_) -} - -sub Compile($$$$) { - - die "Compilation of Parse::RecDescent grammars not yet implemented\n"; -} - -sub DESTROY {} # SO AUTOLOADER IGNORES IT - -# BUILDING A GRAMMAR.... - -sub Replace ($$) -{ - splice(@_, 2, 0, 1); - return _generate(@_); -} - -sub Extend ($$) -{ - splice(@_, 2, 0, 0); - return _generate(@_); -} - -sub _no_rule ($$;$) -{ - _error("Ruleless $_[0] at start of grammar.",$_[1]); - my $desc = $_[2] ? "\"$_[2]\"" : ""; - _hint("You need to define a rule for the $_[0] $desc - to be part of."); -} - -my $NEGLOOKAHEAD = '\G(\s*\.\.\.\!)'; -my $POSLOOKAHEAD = '\G(\s*\.\.\.)'; -my $RULE = '\G\s*(\w+)[ \t]*:'; -my $PROD = '\G\s*([|])'; -my $TOKEN = q{\G\s*/((\\\\/|[^/])*)/([cgimsox]*)}; -my $MTOKEN = q{\G\s*(m\s*[^\w\s])}; -my $LITERAL = q{\G\s*'((\\\\['\\\\]|[^'])*)'}; -my $INTERPLIT = q{\G\s*"((\\\\["\\\\]|[^"])*)"}; -my $SUBRULE = '\G\s*(\w+)'; -my $MATCHRULE = '\G(\s*<matchrule:)'; -my $SIMPLEPAT = '((\\s+/[^/\\\\]*(?:\\\\.[^/\\\\]*)*/)?)'; -my $OPTIONAL = '\G\((\?)'.$SIMPLEPAT.'\)'; -my $ANY = '\G\((s\?)'.$SIMPLEPAT.'\)'; -my $MANY = '\G\((s|\.\.)'.$SIMPLEPAT.'\)'; -my $EXACTLY = '\G\(([1-9]\d*)'.$SIMPLEPAT.'\)'; -my $BETWEEN = '\G\((\d+)\.\.([1-9]\d*)'.$SIMPLEPAT.'\)'; -my $ATLEAST = '\G\((\d+)\.\.'.$SIMPLEPAT.'\)'; -my $ATMOST = '\G\(\.\.([1-9]\d*)'.$SIMPLEPAT.'\)'; -my $BADREP = '\G\((-?\d+)?\.\.(-?\d+)?'.$SIMPLEPAT.'\)'; -my $ACTION = '\G\s*\{'; -my $IMPLICITSUBRULE = '\G\s*\('; -my $COMMENT = '\G\s*(#.*)'; -my $COMMITMK = '\G\s*<commit>'; -my $UNCOMMITMK = '\G\s*<uncommit>'; -my $QUOTELIKEMK = '\G\s*<perl_quotelike>'; -my $CODEBLOCKMK = '\G\s*<perl_codeblock(?:\s+([][()<>{}]+))?>'; -my $VARIABLEMK = '\G\s*<perl_variable>'; -my $NOCHECKMK = '\G\s*<nocheck>'; -my $AUTOTREEMK = '\G\s*<autotree>'; -my $AUTOSTUBMK = '\G\s*<autostub>'; -my $AUTORULEMK = '\G\s*<autorule:(.*?)>'; -my $REJECTMK = '\G\s*<reject>'; -my $CONDREJECTMK = '\G\s*<reject:'; -my $SCOREMK = '\G\s*<score:'; -my $AUTOSCOREMK = '\G\s*<autoscore:'; -my $SKIPMK = '\G\s*<skip:'; -my $OPMK = '\G\s*<(left|right)op(?:=(\'.*?\'))?:'; -my $ENDDIRECTIVEMK = '\G\s*>'; -my $RESYNCMK = '\G\s*<resync>'; -my $RESYNCPATMK = '\G\s*<resync:'; -my $RULEVARPATMK = '\G\s*<rulevar:'; -my $DEFERPATMK = '\G\s*<defer:'; -my $TOKENPATMK = '\G\s*<token:'; -my $AUTOERRORMK = '\G\s*<error(\??)>'; -my $MSGERRORMK = '\G\s*<error(\??):'; -my $UNCOMMITPROD = $PROD.'\s*<uncommit'; -my $ERRORPROD = $PROD.'\s*<error'; -my $LONECOLON = '\G\s*:'; -my $OTHER = '\G\s*([^\s]+)'; - -my $lines = 0; - -sub _generate($$$;$$) -{ - my ($self, $grammar, $replace, $isimplicit, $isleftop) = (@_, 0); - - my $aftererror = 0; - my $lookahead = 0; - my $lookaheadspec = ""; - $lines = _linecount($grammar) unless $lines; - $self->{_check}{itempos} = ($grammar =~ /\@itempos\b|\$itempos\s*\[/) - unless $self->{_check}{itempos}; - for (qw(thisoffset thiscolumn prevline prevoffset prevcolumn)) - { - $self->{_check}{$_} = - ($grammar =~ /\$$_/) || $self->{_check}{itempos} - unless $self->{_check}{$_}; - } - my $line; - - my $rule = undef; - my $prod = undef; - my $item = undef; - my $lastgreedy = ''; - pos $grammar = 0; - study $grammar; - - while (pos $grammar < length $grammar) - { - $line = $lines - _linecount($grammar) + 1; - my $commitonly; - my $code = ""; - my @components = (); - if ($grammar =~ m/$COMMENT/gco) - { - _parse("a comment",0,$line); - next; - } - elsif ($grammar =~ m/$NEGLOOKAHEAD/gco) - { - _parse("a negative lookahead",$aftererror,$line); - $lookahead = $lookahead ? -$lookahead : -1; - $lookaheadspec .= $1; - next; # SKIP LOOKAHEAD RESET AT END OF while LOOP - } - elsif ($grammar =~ m/$POSLOOKAHEAD/gco) - { - _parse("a positive lookahead",$aftererror,$line); - $lookahead = $lookahead ? $lookahead : 1; - $lookaheadspec .= $1; - next; # SKIP LOOKAHEAD RESET AT END OF while LOOP - } - elsif ($grammar =~ m/(?=$ACTION)/gco - and do { ($code) = extract_codeblock($grammar); $code }) - { - _parse("an action", $aftererror, $line, $code); - $item = new Parse::RecDescent::Action($code,$lookahead,$line); - $prod and $prod->additem($item) - or $self->_addstartcode($code); - } - elsif ($grammar =~ m/(?=$IMPLICITSUBRULE)/gco - and do { ($code) = extract_codeblock($grammar,'{([',undef,'(',1); - $code }) - { - $code =~ s/\A\s*\(|\)\Z//g; - _parse("an implicit subrule", $aftererror, $line, - "( $code )"); - my $implicit = $rule->nextimplicit; - $self->_generate("$implicit : $code",$replace,1); - my $pos = pos $grammar; - substr($grammar,$pos,0,$implicit); - pos $grammar = $pos;; - } - elsif ($grammar =~ m/$ENDDIRECTIVEMK/gco) - { - - # EXTRACT TRAILING REPETITION SPECIFIER (IF ANY) - - my ($minrep,$maxrep) = (1,$MAXREP); - if ($grammar =~ m/\G[(]/gc) - { - pos($grammar)--; - - if ($grammar =~ m/$OPTIONAL/gco) - { ($minrep, $maxrep) = (0,1) } - elsif ($grammar =~ m/$ANY/gco) - { $minrep = 0 } - elsif ($grammar =~ m/$EXACTLY/gco) - { ($minrep, $maxrep) = ($1,$1) } - elsif ($grammar =~ m/$BETWEEN/gco) - { ($minrep, $maxrep) = ($1,$2) } - elsif ($grammar =~ m/$ATLEAST/gco) - { $minrep = $1 } - elsif ($grammar =~ m/$ATMOST/gco) - { $maxrep = $1 } - elsif ($grammar =~ m/$MANY/gco) - { } - elsif ($grammar =~ m/$BADREP/gco) - { - _parse("an invalid repetition specifier", 0,$line); - _error("Incorrect specification of a repeated directive", - $line); - _hint("Repeated directives cannot have - a maximum repetition of zero, nor can they have - negative components in their ranges."); - } - } - - $prod && $prod->enddirective($line,$minrep,$maxrep); - } - elsif ($grammar =~ m/\G\s*<[^m]/gc) - { - pos($grammar)-=2; - - if ($grammar =~ m/$OPMK/gco) - { - # $DB::single=1; - _parse("a $1-associative operator directive", $aftererror, $line, "<$1op:...>"); - $prod->adddirective($1, $line,$2||''); - } - elsif ($grammar =~ m/$UNCOMMITMK/gco) - { - _parse("an uncommit marker", $aftererror,$line); - $item = new Parse::RecDescent::Directive('$commit=0;1', - $lookahead,$line,"<uncommit>"); - $prod and $prod->additem($item) - or _no_rule("<uncommit>",$line); - } - elsif ($grammar =~ m/$QUOTELIKEMK/gco) - { - _parse("an perl quotelike marker", $aftererror,$line); - $item = new Parse::RecDescent::Directive( - 'my ($match,@res); - ($match,$text,undef,@res) = - Text::Balanced::extract_quotelike($text,$skip); - $match ? \@res : undef; - ', $lookahead,$line,"<perl_quotelike>"); - $prod and $prod->additem($item) - or _no_rule("<perl_quotelike>",$line); - } - elsif ($grammar =~ m/$CODEBLOCKMK/gco) - { - my $outer = $1||"{}"; - _parse("an perl codeblock marker", $aftererror,$line); - $item = new Parse::RecDescent::Directive( - 'Text::Balanced::extract_codeblock($text,undef,$skip,\''.$outer.'\'); - ', $lookahead,$line,"<perl_codeblock>"); - $prod and $prod->additem($item) - or _no_rule("<perl_codeblock>",$line); - } - elsif ($grammar =~ m/$VARIABLEMK/gco) - { - _parse("an perl variable marker", $aftererror,$line); - $item = new Parse::RecDescent::Directive( - 'Text::Balanced::extract_variable($text,$skip); - ', $lookahead,$line,"<perl_variable>"); - $prod and $prod->additem($item) - or _no_rule("<perl_variable>",$line); - } - elsif ($grammar =~ m/$NOCHECKMK/gco) - { - _parse("a disable checking marker", $aftererror,$line); - if ($rule) - { - _error("<nocheck> directive not at start of grammar", $line); - _hint("The <nocheck> directive can only - be specified at the start of a - grammar (before the first rule - is defined."); - } - else - { - local $::RD_CHECK = 1; - } - } - elsif ($grammar =~ m/$AUTOSTUBMK/gco) - { - _parse("an autostub marker", $aftererror,$line); - $::RD_AUTOSTUB = ""; - } - elsif ($grammar =~ m/$AUTORULEMK/gco) - { - _parse("an autorule marker", $aftererror,$line); - $::RD_AUTOSTUB = $1; - } - elsif ($grammar =~ m/$AUTOTREEMK/gco) - { - _parse("an autotree marker", $aftererror,$line); - if ($rule) - { - _error("<autotree> directive not at start of grammar", $line); - _hint("The <autotree> directive can only - be specified at the start of a - grammar (before the first rule - is defined."); - } - else - { - undef $self->{_AUTOACTION}; - $self->{_AUTOTREE}{NODE} - = new Parse::RecDescent::Action(q{{bless \%item, $item[0]}},0,-1); - $self->{_AUTOTREE}{TERMINAL} - = new Parse::RecDescent::Action(q{{bless {__VALUE__=>$item[1]}, $item[0]}},0,-1); - } - } - - elsif ($grammar =~ m/$REJECTMK/gco) - { - _parse("an reject marker", $aftererror,$line); - $item = new Parse::RecDescent::UncondReject($lookahead,$line,"<reject>"); - $prod and $prod->additem($item) - or _no_rule("<reject>",$line); - } - elsif ($grammar =~ m/(?=$CONDREJECTMK)/gco - and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); - $code }) - { - _parse("a (conditional) reject marker", $aftererror,$line); - $code =~ /\A\s*<reject:(.*)>\Z/s; - $item = new Parse::RecDescent::Directive( - "($1) ? undef : 1", $lookahead,$line,"<reject:$code>"); - $prod and $prod->additem($item) - or _no_rule("<reject:$code>",$line); - } - elsif ($grammar =~ m/(?=$SCOREMK)/gco - and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); - $code }) - { - _parse("a score marker", $aftererror,$line); - $code =~ /\A\s*<score:(.*)>\Z/s; - $prod and $prod->addscore($1, $lookahead, $line) - or _no_rule($code,$line); - } - elsif ($grammar =~ m/(?=$AUTOSCOREMK)/gco - and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); - $code; - } ) - { - _parse("an autoscore specifier", $aftererror,$line,$code); - $code =~ /\A\s*<autoscore:(.*)>\Z/s; - - $rule and $rule->addautoscore($1,$self) - or _no_rule($code,$line); - - $item = new Parse::RecDescent::UncondReject($lookahead,$line,$code); - $prod and $prod->additem($item) - or _no_rule($code,$line); - } - elsif ($grammar =~ m/$RESYNCMK/gco) - { - _parse("a resync to newline marker", $aftererror,$line); - $item = new Parse::RecDescent::Directive( - 'if ($text =~ s/\A[^\n]*\n//) { $return = 0; $& } else { undef }', - $lookahead,$line,"<resync>"); - $prod and $prod->additem($item) - or _no_rule("<resync>",$line); - } - elsif ($grammar =~ m/(?=$RESYNCPATMK)/gco - and do { ($code) = extract_bracketed($grammar,'<'); - $code }) - { - _parse("a resync with pattern marker", $aftererror,$line); - $code =~ /\A\s*<resync:(.*)>\Z/s; - $item = new Parse::RecDescent::Directive( - 'if ($text =~ s/\A'.$1.'//) { $return = 0; $& } else { undef }', - $lookahead,$line,$code); - $prod and $prod->additem($item) - or _no_rule($code,$line); - } - elsif ($grammar =~ m/(?=$SKIPMK)/gco - and do { ($code) = extract_codeblock($grammar,'<'); - $code }) - { - _parse("a skip marker", $aftererror,$line); - $code =~ /\A\s*<skip:(.*)>\Z/s; - $item = new Parse::RecDescent::Directive( - 'my $oldskip = $skip; $skip='.$1.'; $oldskip', - $lookahead,$line,$code); - $prod and $prod->additem($item) - or _no_rule($code,$line); - } - elsif ($grammar =~ m/(?=$RULEVARPATMK)/gco - and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); - $code; - } ) - { - _parse("a rule variable specifier", $aftererror,$line,$code); - $code =~ /\A\s*<rulevar:(.*)>\Z/s; - - $rule and $rule->addvar($1,$self) - or _no_rule($code,$line); - - $item = new Parse::RecDescent::UncondReject($lookahead,$line,$code); - $prod and $prod->additem($item) - or _no_rule($code,$line); - } - elsif ($grammar =~ m/(?=$DEFERPATMK)/gco - and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); - $code; - } ) - { - _parse("a deferred action specifier", $aftererror,$line,$code); - $code =~ s/\A\s*<defer:(.*)>\Z/$1/s; - if ($code =~ /\A\s*[^{]|[^}]\s*\Z/) - { - $code = "{ $code }" - } - - $item = new Parse::RecDescent::Directive( - "push \@{\$thisparser->{deferred}}, sub $code;", - $lookahead,$line,"<defer:$code>"); - $prod and $prod->additem($item) - or _no_rule("<defer:$code>",$line); - - $self->{deferrable} = 1; - } - elsif ($grammar =~ m/(?=$TOKENPATMK)/gco - and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); - $code; - } ) - { - _parse("a token constructor", $aftererror,$line,$code); - $code =~ s/\A\s*<token:(.*)>\Z/$1/s; - - my $types = eval 'no strict; local $SIG{__WARN__} = sub {0}; my @arr=('.$code.'); @arr' || (); - if (!$types) - { - _error("Incorrect token specification: \"$@\"", $line); - _hint("The <token:...> directive requires a list - of one or more strings representing possible - types of the specified token. For example: - <token:NOUN,VERB>"); - } - else - { - $item = new Parse::RecDescent::Directive( - 'no strict; - $return = { text => $item[-1] }; - @{$return->{type}}{'.$code.'} = (1..'.$types.');', - $lookahead,$line,"<token:$code>"); - $prod and $prod->additem($item) - or _no_rule("<token:$code>",$line); - } - } - elsif ($grammar =~ m/$COMMITMK/gco) - { - _parse("an commit marker", $aftererror,$line); - $item = new Parse::RecDescent::Directive('$commit = 1', - $lookahead,$line,"<commit>"); - $prod and $prod->additem($item) - or _no_rule("<commit>",$line); - } - elsif ($grammar =~ m/$AUTOERRORMK/gco) - { - $commitonly = $1; - _parse("an error marker", $aftererror,$line); - $item = new Parse::RecDescent::Error('',$lookahead,$1,$line); - $prod and $prod->additem($item) - or _no_rule("<error>",$line); - $aftererror = !$commitonly; - } - elsif ($grammar =~ m/(?=$MSGERRORMK)/gco - and do { $commitonly = $1; - ($code) = extract_bracketed($grammar,'<'); - $code }) - { - _parse("an error marker", $aftererror,$line,$code); - $code =~ /\A\s*<error\??:(.*)>\Z/s; - $item = new Parse::RecDescent::Error($1,$lookahead,$commitonly,$line); - $prod and $prod->additem($item) - or _no_rule("$code",$line); - $aftererror = !$commitonly; - } - elsif (do { $commitonly = $1; - ($code) = extract_bracketed($grammar,'<'); - $code }) - { - if ($code =~ /^<[A-Z_]+>$/) - { - _error("Token items are not yet - supported: \"$code\"", - $line); - _hint("Items like $code that consist of angle - brackets enclosing a sequence of - uppercase characters will eventually - be used to specify pre-lexed tokens - in a grammar. That functionality is not - yet implemented. Or did you misspell - \"$code\"?"); - } - else - { - _error("Untranslatable item encountered: \"$code\"", - $line); - _hint("Did you misspell \"$code\" - or forget to comment it out?"); - } - } - } - elsif ($grammar =~ m/$RULE/gco) - { - _parseunneg("a rule declaration", 0, - $lookahead,$line) or next; - my $rulename = $1; - if ($rulename =~ /Replace|Extend|Precompile|Save/ ) - { - _warn(2,"Rule \"$rulename\" hidden by method - Parse::RecDescent::$rulename",$line) - and - _hint("The rule named \"$rulename\" cannot be directly - called through the Parse::RecDescent object - for this grammar (although it may still - be used as a subrule of other rules). - It can't be directly called because - Parse::RecDescent::$rulename is already defined (it - is the standard method of all - parsers)."); - } - $rule = new Parse::RecDescent::Rule($rulename,$self,$line,$replace); - $prod->check_pending($line) if $prod; - $prod = $rule->addprod( new Parse::RecDescent::Production ); - $aftererror = 0; - } - elsif ($grammar =~ m/$UNCOMMITPROD/gco) - { - pos($grammar)-=9; - _parseunneg("a new (uncommitted) production", - 0, $lookahead, $line) or next; - - $prod->check_pending($line) if $prod; - $prod = new Parse::RecDescent::Production($line,1); - $rule and $rule->addprod($prod) - or _no_rule("<uncommit>",$line); - $aftererror = 0; - } - elsif ($grammar =~ m/$ERRORPROD/gco) - { - pos($grammar)-=6; - _parseunneg("a new (error) production", $aftererror, - $lookahead,$line) or next; - $prod->check_pending($line) if $prod; - $prod = new Parse::RecDescent::Production($line,0,1); - $rule and $rule->addprod($prod) - or _no_rule("<error>",$line); - $aftererror = 0; - } - elsif ($grammar =~ m/$PROD/gco) - { - _parseunneg("a new production", 0, - $lookahead,$line) or next; - $rule - and (!$prod || $prod->check_pending($line)) - and $prod = $rule->addprod(new Parse::RecDescent::Production($line)) - or _no_rule("production",$line); - $aftererror = 0; - } - elsif ($grammar =~ m/$LITERAL/gco) - { - ($code = $1) =~ s/\\\\/\\/g; - _parse("a literal terminal", $aftererror,$line,$1); - $item = new Parse::RecDescent::Literal($code,$lookahead,$line); - $prod and $prod->additem($item) - or _no_rule("literal terminal",$line,"'$1'"); - } - elsif ($grammar =~ m/$INTERPLIT/gco) - { - _parse("an interpolated literal terminal", $aftererror,$line); - $item = new Parse::RecDescent::InterpLit($1,$lookahead,$line); - $prod and $prod->additem($item) - or _no_rule("interpolated literal terminal",$line,"'$1'"); - } - elsif ($grammar =~ m/$TOKEN/gco) - { - _parse("a /../ pattern terminal", $aftererror,$line); - $item = new Parse::RecDescent::Token($1,'/',$3?$3:'',$lookahead,$line); - $prod and $prod->additem($item) - or _no_rule("pattern terminal",$line,"/$1/"); - } - elsif ($grammar =~ m/(?=$MTOKEN)/gco - and do { ($code, undef, @components) - = extract_quotelike($grammar); - $code } - ) - - { - _parse("an m/../ pattern terminal", $aftererror,$line,$code); - $item = new Parse::RecDescent::Token(@components[3,2,8], - $lookahead,$line); - $prod and $prod->additem($item) - or _no_rule("pattern terminal",$line,$code); - } - elsif ($grammar =~ m/(?=$MATCHRULE)/gco - and do { ($code) = extract_bracketed($grammar,'<'); - $code - } - or $grammar =~ m/$SUBRULE/gco - and $code = $1) - { - my $name = $code; - my $matchrule = 0; - if (substr($name,0,1) eq '<') - { - $name =~ s/$MATCHRULE\s*//; - $name =~ s/\s*>\Z//; - $matchrule = 1; - } - - # EXTRACT TRAILING ARG LIST (IF ANY) - - my ($argcode) = extract_codeblock($grammar, "[]",'') || ''; - - # EXTRACT TRAILING REPETITION SPECIFIER (IF ANY) - - if ($grammar =~ m/\G[(]/gc) - { - pos($grammar)--; - - if ($grammar =~ m/$OPTIONAL/gco) - { - _parse("an zero-or-one subrule match", $aftererror,$line,"$code$argcode($1)"); - $item = new Parse::RecDescent::Repetition($name,$1,0,1, - $lookahead,$line, - $self, - $matchrule, - $argcode); - $prod and $prod->additem($item) - or _no_rule("repetition",$line,"$code$argcode($1)"); - - !$matchrule and $rule and $rule->addcall($name); - } - elsif ($grammar =~ m/$ANY/gco) - { - _parse("a zero-or-more subrule match", $aftererror,$line,"$code$argcode($1)"); - if ($2) - { - my $pos = pos $grammar; - substr($grammar,$pos,0, - "<leftop='$name(s?)': $name $2 $name>(s?) "); - - pos $grammar = $pos; - } - else - { - $item = new Parse::RecDescent::Repetition($name,$1,0,$MAXREP, - $lookahead,$line, - $self, - $matchrule, - $argcode); - $prod and $prod->additem($item) - or _no_rule("repetition",$line,"$code$argcode($1)"); - - !$matchrule and $rule and $rule->addcall($name); - - _check_insatiable($name,$1,$grammar,$line) if $::RD_CHECK; - } - } - elsif ($grammar =~ m/$MANY/gco) - { - _parse("a one-or-more subrule match", $aftererror,$line,"$code$argcode($1)"); - if ($2) - { - # $DB::single=1; - my $pos = pos $grammar; - substr($grammar,$pos,0, - "<leftop='$name(s)': $name $2 $name> "); - - pos $grammar = $pos; - } - else - { - $item = new Parse::RecDescent::Repetition($name,$1,1,$MAXREP, - $lookahead,$line, - $self, - $matchrule, - $argcode); - - $prod and $prod->additem($item) - or _no_rule("repetition",$line,"$code$argcode($1)"); - - !$matchrule and $rule and $rule->addcall($name); - - _check_insatiable($name,$1,$grammar,$line) if $::RD_CHECK; - } - } - elsif ($grammar =~ m/$EXACTLY/gco) - { - _parse("an exactly-$1-times subrule match", $aftererror,$line,"$code$argcode($1)"); - if ($2) - { - my $pos = pos $grammar; - substr($grammar,$pos,0, - "<leftop='$name($1)': $name $2 $name>($1) "); - - pos $grammar = $pos; - } - else - { - $item = new Parse::RecDescent::Repetition($name,$1,$1,$1, - $lookahead,$line, - $self, - $matchrule, - $argcode); - $prod and $prod->additem($item) - or _no_rule("repetition",$line,"$code$argcode($1)"); - - !$matchrule and $rule and $rule->addcall($name); - } - } - elsif ($grammar =~ m/$BETWEEN/gco) - { - _parse("a $1-to-$2 subrule match", $aftererror,$line,"$code$argcode($1..$2)"); - if ($3) - { - my $pos = pos $grammar; - substr($grammar,$pos,0, - "<leftop='$name($1..$2)': $name $3 $name>($1..$2) "); - - pos $grammar = $pos; - } - else - { - $item = new Parse::RecDescent::Repetition($name,"$1..$2",$1,$2, - $lookahead,$line, - $self, - $matchrule, - $argcode); - $prod and $prod->additem($item) - or _no_rule("repetition",$line,"$code$argcode($1..$2)"); - - !$matchrule and $rule and $rule->addcall($name); - } - } - elsif ($grammar =~ m/$ATLEAST/gco) - { - _parse("a $1-or-more subrule match", $aftererror,$line,"$code$argcode($1..)"); - if ($2) - { - my $pos = pos $grammar; - substr($grammar,$pos,0, - "<leftop='$name($1..)': $name $2 $name>($1..) "); - - pos $grammar = $pos; - } - else - { - $item = new Parse::RecDescent::Repetition($name,"$1..",$1,$MAXREP, - $lookahead,$line, - $self, - $matchrule, - $argcode); - $prod and $prod->additem($item) - or _no_rule("repetition",$line,"$code$argcode($1..)"); - - !$matchrule and $rule and $rule->addcall($name); - _check_insatiable($name,"$1..",$grammar,$line) if $::RD_CHECK; - } - } - elsif ($grammar =~ m/$ATMOST/gco) - { - _parse("a one-to-$1 subrule match", $aftererror,$line,"$code$argcode(..$1)"); - if ($2) - { - my $pos = pos $grammar; - substr($grammar,$pos,0, - "<leftop='$name(..$1)': $name $2 $name>(..$1) "); - - pos $grammar = $pos; - } - else - { - $item = new Parse::RecDescent::Repetition($name,"..$1",1,$1, - $lookahead,$line, - $self, - $matchrule, - $argcode); - $prod and $prod->additem($item) - or _no_rule("repetition",$line,"$code$argcode(..$1)"); - - !$matchrule and $rule and $rule->addcall($name); - } - } - elsif ($grammar =~ m/$BADREP/gco) - { - _parse("an subrule match with invalid repetition specifier", 0,$line); - _error("Incorrect specification of a repeated subrule", - $line); - _hint("Repeated subrules like \"$code$argcode$&\" cannot have - a maximum repetition of zero, nor can they have - negative components in their ranges."); - } - } - else - { - _parse("a subrule match", $aftererror,$line,$code); - my $desc; - if ($name=~/\A_alternation_\d+_of_production_\d+_of_rule/) - { $desc = $self->{"rules"}{$name}->expected } - $item = new Parse::RecDescent::Subrule($name, - $lookahead, - $line, - $desc, - $matchrule, - $argcode); - - $prod and $prod->additem($item) - or _no_rule("(sub)rule",$line,$name); - - !$matchrule and $rule and $rule->addcall($name); - } - } - elsif ($grammar =~ m/$LONECOLON/gco ) - { - _error("Unexpected colon encountered", $line); - _hint("Did you mean \"|\" (to start a new production)? - Or perhaps you forgot that the colon - in a rule definition must be - on the same line as the rule name?"); - } - elsif ($grammar =~ m/$ACTION/gco ) # BAD ACTION, ALREADY FAILED - { - _error("Malformed action encountered", - $line); - _hint("Did you forget the closing curly bracket - or is there a syntax error in the action?"); - } - elsif ($grammar =~ m/$OTHER/gco ) - { - _error("Untranslatable item encountered: \"$1\"", - $line); - _hint("Did you misspell \"$1\" - or forget to comment it out?"); - } - - if ($lookaheadspec =~ tr /././ > 3) - { - $lookaheadspec =~ s/\A\s+//; - $lookahead = $lookahead<0 - ? 'a negative lookahead ("...!")' - : 'a positive lookahead ("...")' ; - _warn(1,"Found two or more lookahead specifiers in a - row.",$line) - and - _hint("Multiple positive and/or negative lookaheads - are simply multiplied together to produce a - single positive or negative lookahead - specification. In this case the sequence - \"$lookaheadspec\" was reduced to $lookahead. - Was this your intention?"); - } - $lookahead = 0; - $lookaheadspec = ""; - - $grammar =~ m/\G\s+/gc; - } - - unless ($ERRORS or $isimplicit or !$::RD_CHECK) - { - $self->_check_grammar(); - } - - unless ($ERRORS or $isimplicit or $Parse::RecDescent::compiling) - { - my $code = $self->_code(); - if (defined $::RD_TRACE) - { - print STDERR "printing code (", length($code),") to RD_TRACE\n"; - local *TRACE_FILE; - open TRACE_FILE, ">RD_TRACE" - and print TRACE_FILE "my \$ERRORS;\n$code" - and close TRACE_FILE; - } - - unless ( eval "$code 1" ) - { - _error("Internal error in generated parser code!"); - $@ =~ s/at grammar/in grammar at/; - _hint($@); - } - } - - if ($ERRORS and !_verbosity("HINT")) - { - local $::RD_HINT = 1; - _hint('Set $::RD_HINT (or -RD_HINT if you\'re using "perl -s") - for hints on fixing these problems.'); - } - if ($ERRORS) { $ERRORS=0; return } - return $self; -} - - -sub _addstartcode($$) -{ - my ($self, $code) = @_; - $code =~ s/\A\s*\{(.*)\}\Z/$1/s; - - $self->{"startcode"} .= "$code;\n"; -} - -# CHECK FOR GRAMMAR PROBLEMS.... - -sub _check_insatiable($$$$) -{ - my ($subrule,$repspec,$grammar,$line) = @_; - pos($grammar)=pos($_[2]); - return if $grammar =~ m/$OPTIONAL/gco || $grammar =~ m/$ANY/gco; - my $min = 1; - if ( $grammar =~ m/$MANY/gco - || $grammar =~ m/$EXACTLY/gco - || $grammar =~ m/$ATMOST/gco - || $grammar =~ m/$BETWEEN/gco && do { $min=$2; 1 } - || $grammar =~ m/$ATLEAST/gco && do { $min=$2; 1 } - || $grammar =~ m/$SUBRULE(?!\s*:)/gco - ) - { - return unless $1 eq $subrule && $min > 0; - _warn(3,"Subrule sequence \"$subrule($repspec) $&\" will - (almost certainly) fail.",$line) - and - _hint("Unless subrule \"$subrule\" performs some cunning - lookahead, the repetition \"$subrule($repspec)\" will - insatiably consume as many matches of \"$subrule\" as it - can, leaving none to match the \"$&\" that follows."); - } -} - -sub _check_grammar ($) -{ - my $self = shift; - my $rules = $self->{"rules"}; - my $rule; - foreach $rule ( values %$rules ) - { - next if ! $rule->{"changed"}; - - # CHECK FOR UNDEFINED RULES - - my $call; - foreach $call ( @{$rule->{"calls"}} ) - { - if (!defined ${$rules}{$call} - &&!defined &{"Parse::RecDescent::$call"}) - { - if (!defined $::RD_AUTOSTUB) - { - _warn(3,"Undefined (sub)rule \"$call\" - used in a production.") - and - _hint("Will you be providing this rule - later, or did you perhaps - misspell \"$call\"? Otherwise - it will be treated as an - immediate <reject>."); - eval "sub $self->{namespace}::$call {undef}"; - } - else # EXPERIMENTAL - { - my $rule = $::RD_AUTOSTUB || qq{'$call'}; - _warn(1,"Autogenerating rule: $call") - and - _hint("A call was made to a subrule - named \"$call\", but no such - rule was specified. However, - since \$::RD_AUTOSTUB - was defined, a rule stub - ($call : $rule) was - automatically created."); - - $self->_generate("$call : $rule",0,1); - } - } - } - - # CHECK FOR LEFT RECURSION - - if ($rule->isleftrec($rules)) - { - _error("Rule \"$rule->{name}\" is left-recursive."); - _hint("Redesign the grammar so it's not left-recursive. - That will probably mean you need to re-implement - repetitions using the '(s)' notation. - For example: \"$rule->{name}(s)\"."); - next; - } - } -} - -# GENERATE ACTUAL PARSER CODE - -sub _code($) -{ - my $self = shift; - my $code = qq{ -package $self->{namespace}; -use strict; -use vars qw(\$skip \$AUTOLOAD $self->{localvars} ); -\$skip = '$skip'; -$self->{startcode} - -{ -local \$SIG{__WARN__} = sub {0}; -# PRETEND TO BE IN Parse::RecDescent NAMESPACE -*$self->{namespace}::AUTOLOAD = sub -{ - no strict 'refs'; - \$AUTOLOAD =~ s/^$self->{namespace}/Parse::RecDescent/; - goto &{\$AUTOLOAD}; -} -} - -}; - $code .= "push \@$self->{namespace}\::ISA, 'Parse::RecDescent';"; - $self->{"startcode"} = ''; - - my $rule; - foreach $rule ( values %{$self->{"rules"}} ) - { - if ($rule->{"changed"}) - { - $code .= $rule->code($self->{"namespace"},$self); - $rule->{"changed"} = 0; - } - } - - return $code; -} - - -# EXECUTING A PARSE.... - -sub AUTOLOAD # ($parser, $text; $linenum, @args) -{ - croak "Could not find method: $AUTOLOAD\n" unless ref $_[0]; - my $class = ref($_[0]) || $_[0]; - my $text = ref($_[1]) ? ${$_[1]} : $_[1]; - $_[0]->{lastlinenum} = $_[2]||_linecount($_[1]); - $_[0]->{lastlinenum} = _linecount($_[1]); - $_[0]->{lastlinenum} += $_[2] if @_ > 2; - $_[0]->{offsetlinenum} = $_[0]->{lastlinenum}; - $_[0]->{fulltext} = $text; - $_[0]->{fulltextlen} = length $text; - $_[0]->{deferred} = []; - $_[0]->{errors} = []; - my @args = @_[3..$#_]; - my $args = sub { [ @args ] }; - - $AUTOLOAD =~ s/$class/$_[0]->{namespace}/; - no strict "refs"; - - croak "Unknown starting rule ($AUTOLOAD) called\n" - unless defined &$AUTOLOAD; - my $retval = &{$AUTOLOAD}($_[0],$text,undef,undef,$args); - - if (defined $retval) - { - foreach ( @{$_[0]->{deferred}} ) { &$_; } - } - else - { - foreach ( @{$_[0]->{errors}} ) { _error(@$_); } - } - - if (ref $_[1]) { ${$_[1]} = $text } - - $ERRORS = 0; - return $retval; -} - -sub _parserepeat($$$$$$$$$$) # RETURNS A REF TO AN ARRAY OF MATCHES -{ - my ($parser, $text, $prod, $min, $max, $_noactions, $expectation, $argcode) = @_; - my @tokens = (); - - my $reps; - for ($reps=0; $reps<$max;) - { - $_[6]->at($text); # $_[6] IS $expectation FROM CALLER - my $_savetext = $text; - my $prevtextlen = length $text; - my $_tok; - if (! defined ($_tok = &$prod($parser,$text,1,$_noactions,$argcode))) - { - $text = $_savetext; - last; - } - push @tokens, $_tok if defined $_tok; - last if ++$reps >= $min and $prevtextlen == length $text; - } - - do { $_[6]->failed(); return undef} if $reps<$min; - - $_[1] = $text; - return [@tokens]; -} - - -# ERROR REPORTING.... - -my $errortext; -my $errorprefix; - -open (ERROR, ">&STDERR"); -format ERROR = -@>>>>>>>>>>>>>>>>>>>>: ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -$errorprefix, $errortext -~~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - $errortext -. - -select ERROR; -$| = 1; - -# TRACING - -my $tracemsg; -my $tracecontext; -my $tracerulename; -use vars '$tracelevel'; - -open (TRACE, ">&STDERR"); -format TRACE = -@>|@|||||||||@^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<| -$tracelevel, $tracerulename, '|', $tracemsg - | ~~ |^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<| - $tracemsg -. - -select TRACE; -$| = 1; - -open (TRACECONTEXT, ">&STDERR"); -format TRACECONTEXT = -@>|@|||||||||@ |^<<<<<<<<<<<<<<<<<<<<<<<<<<< -$tracelevel, $tracerulename, '|', $tracecontext - | ~~ | |^<<<<<<<<<<<<<<<<<<<<<<<<<<< - $tracecontext -. - - -select TRACECONTEXT; -$| = 1; - -select STDOUT; - -sub _verbosity($) -{ - defined $::RD_TRACE - or defined $::RD_HINT and $_[0] =~ /ERRORS|WARN|HINT/ - or defined $::RD_WARN and $_[0] =~ /ERRORS|WARN/ - or defined $::RD_ERRORS and $_[0] =~ /ERRORS/ -} - -sub _error($;$) -{ - $ERRORS++; - return 0 if ! _verbosity("ERRORS"); - $errortext = $_[0]; - $errorprefix = "ERROR" . ($_[1] ? " (line $_[1])" : ""); - $errortext =~ s/\s+/ /g; - print ERROR "\n" if _verbosity("WARN"); - write ERROR; - return 1; -} - -sub _warn($$;$) -{ - return 0 unless _verbosity("WARN") && ($::RD_HINT || $_[0] >= ($::RD_WARN||1)); - $errortext = $_[1]; - $errorprefix = "Warning" . ($_[2] ? " (line $_[2])" : ""); - print ERROR "\n"; - $errortext =~ s/\s+/ /g; - write ERROR; - return 1; -} - -sub _hint($) -{ - return 0 unless defined $::RD_HINT; - $errortext = "$_[0])"; - $errorprefix = "(Hint"; - $errortext =~ s/\s+/ /g; - write ERROR; - return 1; -} - -sub _tracemax($) -{ - if (defined $::RD_TRACE - && $::RD_TRACE =~ /\d+/ - && $::RD_TRACE>1 - && $::RD_TRACE+10<length($_[0])) - { - my $count = length($_[0]) - $::RD_TRACE; - return substr($_[0],0,$::RD_TRACE/2) - . "...<$count>..." - . substr($_[0],-$::RD_TRACE/2); - } - else - { - return $_[0]; - } -} - -sub _tracefirst($) -{ - if (defined $::RD_TRACE - && $::RD_TRACE =~ /\d+/ - && $::RD_TRACE>1 - && $::RD_TRACE+10<length($_[0])) - { - my $count = length($_[0]) - $::RD_TRACE; - return substr($_[0],0,$::RD_TRACE) . "...<+$count>"; - } - else - { - return $_[0]; - } -} - -my $lastcontext = ''; -my $lastrulename = ''; -my $lastlevel = ''; - -sub _trace($;$$$) -{ - $tracemsg = $_[0]; - $tracecontext = $_[1]||$lastcontext; - $tracerulename = $_[2]||$lastrulename; - $tracelevel = $_[3]||$lastlevel; - if ($tracerulename) { $lastrulename = $tracerulename } - if ($tracelevel) { $lastlevel = $tracelevel } - - $tracecontext =~ s/\n/\\n/g; - $tracecontext =~ s/\s+/ /g; - $tracerulename = qq{$tracerulename}; - write TRACE; - if ($tracecontext ne $lastcontext) - { - if ($tracecontext) - { - $lastcontext = _tracefirst($tracecontext); - $tracecontext = qq{"$tracecontext"}; - } - else - { - $tracecontext = qq{<NO TEXT LEFT>}; - } - write TRACECONTEXT; - } -} - -sub _parseunneg($$$$) -{ - _parse($_[0],$_[1],$_[3]); - if ($_[2]<0) - { - _error("Can't negate \"$&\".",$_[3]); - _hint("You can't negate $_[0]. Remove the \"...!\" before - \"$&\"."); - return 0; - } - return 1; -} - -sub _parse($$$;$) -{ - my $what = $_[3] || $&; - $what =~ s/^\s+//; - if ($_[1]) - { - _warn(3,"Found $_[0] ($what) after an unconditional <error>",$_[2]) - and - _hint("An unconditional <error> always causes the - production containing it to immediately fail. - \u$_[0] that follows an <error> - will never be reached. Did you mean to use - <error?> instead?"); - } - - return if ! _verbosity("TRACE"); - $errortext = "Treating \"$what\" as $_[0]"; - $errorprefix = "Parse::RecDescent"; - $errortext =~ s/\s+/ /g; - write ERROR; -} - -sub _linecount($) { - scalar substr($_[0], pos $_[0]||0) =~ tr/\n// -} - - -package main; - -use vars qw ( $RD_ERRORS $RD_WARN $RD_HINT $RD_TRACE $RD_CHECK ); -$::RD_CHECK = 1; -$::RD_ERRORS = 1; -$::RD_WARN = 3; - -1; - diff --git a/lib/Template/Plugin/HTML.pm b/lib/Template/Plugin/HTML.pm new file mode 100644 index 0000000..4c4d7f0 --- /dev/null +++ b/lib/Template/Plugin/HTML.pm @@ -0,0 +1,197 @@ +#============================================================= -*-Perl-*- +# +# Template::Plugin::HTML +# +# DESCRIPTION +# +# Template Toolkit plugin providing useful functionality for generating +# HTML. +# +# AUTHOR +# Andy Wardley <abw@kfs.org> +# +# COPYRIGHT +# Copyright (C) 1996-2001 Andy Wardley. All Rights Reserved. +# Copyright (C) 1998-2001 Canon Research Centre Europe Ltd. +# +# This module is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. +# +#---------------------------------------------------------------------------- +# +# $Id: HTML.pm,v 1.1 2006/10/27 10:00:31 amair Exp $ +# +#============================================================================ + +package Template::Plugin::HTML; + +require 5.004; + +use strict; +use vars qw( $VERSION ); +use base qw( Template::Plugin ); +use Template::Plugin; + +$VERSION = sprintf("%d.%02d", q$Revision: 1.1 $ =~ /(\d+)\.(\d+)/); + +sub new { + my ($class, $context, @args) = @_; + my $hash = ref $args[-1] eq 'HASH' ? pop @args : { }; + bless { + _SORTED => $hash->{ sorted } || 0, + }, $class; +} + +sub element { + my ($self, $name, $attr) = @_; + ($name, $attr) = %$name if ref $name eq 'HASH'; + return '' unless defined $name and length $name; + $attr = $self->attributes($attr); + $attr = " $attr" if $attr; + return "<$name$attr>"; +} + +sub attributes { + my ($self, $hash) = @_; + return '' unless UNIVERSAL::isa($hash, 'HASH'); + + my @keys = keys %$hash; + @keys = sort @keys if $self->{ _SORTED }; + + join(' ', map { + "$_=\"" . $self->escape( $hash->{ $_ } ) . '"'; + } @keys); +} + +sub escape { + my ($self, $text) = @_; + for ($text) { + s/&/&/g; + s/</</g; + s/>/>/g; + s/"/"/g; + } + $text; +} + +sub url { + my ($self, $text) = @_; + return undef unless defined $text; + $text =~ s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/eg; + return $text; +} + + +1; + +__END__ + + +#------------------------------------------------------------------------ +# IMPORTANT NOTE +# This documentation is generated automatically from source +# templates. Any changes you make here may be lost. +# +# The 'docsrc' documentation source bundle is available for download +# from http://www.template-toolkit.org/docs.html and contains all +# the source templates, XML files, scripts, etc., from which the +# documentation for the Template Toolkit is built. +#------------------------------------------------------------------------ + +=head1 NAME + +Template::Plugin::HTML - Plugin to create HTML elements + +=head1 SYNOPSIS + + [% USE HTML %] + + [% HTML.escape("if (a < b && c > d) ..." %] + + [% HTML.element(table => { border => 1, cellpadding => 2 }) %] + + [% HTML.attributes(border => 1, cellpadding => 2) %] + +=head1 DESCRIPTION + +The HTML plugin is very new and very basic, implementing a few useful +methods for generating HTML. It is likely to be extended in the future +or integrated with a larger project to generate HTML elements in a generic +way (as discussed recently on the mod_perl mailing list). + +=head1 METHODS + +=head2 escape(text) + +Returns the source text with any HTML reserved characters such as +E<lt>, E<gt>, etc., correctly esacped to their entity equivalents. + +=head2 attributes(hash) + +Returns the elements of the hash array passed by reference correctly +formatted (e.g. values quoted and correctly escaped) as attributes for +an HTML element. + +=head2 element(type, attributes) + +Generates an HTML element of the specified type and with the attributes +provided as an optional hash array reference as the second argument or +as named arguments. + + [% HTML.element(table => { border => 1, cellpadding => 2 }) %] + [% HTML.element('table', border=1, cellpadding=2) %] + [% HTML.element(table => attribs) %] + +=head1 DEBUGGING + +The HTML plugin accepts a 'sorted' option as a constructor argument +which, when set to any true value, causes the attributes generated by +the attributes() method (either directly or via element()) to be +returned in sorted order. Order of attributes isn't important in +HTML, but this is provided mainly for the purposes of debugging where +it is useful to have attributes generated in a deterministic order +rather than whatever order the hash happened to feel like returning +the keys in. + + [% USE HTML(sorted=1) %] + [% HTML.element( foo => { charlie => 1, bravo => 2, alpha => 3 } ) %] + +generates: + + <foo alpha="3" bravo="2" charlie="1"> + +=head1 AUTHOR + +Andy Wardley E<lt>abw@andywardley.comE<gt> + +L<http://www.andywardley.com/|http://www.andywardley.com/> + + + + +=head1 VERSION + +2.57, distributed as part of the +Template Toolkit version 2.14, released on 04 October 2004. + +=head1 COPYRIGHT + + Copyright (C) 1996-2004 Andy Wardley. All Rights Reserved. + Copyright (C) 1998-2002 Canon Research Centre Europe Ltd. + +This module is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=head1 SEE ALSO + +L<Template::Plugin|Template::Plugin> + +=cut + +# Local Variables: +# mode: perl +# perl-indent-level: 4 +# indent-tabs-mode: nil +# End: +# +# vim: expandtab shiftwidth=4: diff --git a/lib/Template/Plugin/JavaScript.pm b/lib/Template/Plugin/JavaScript.pm new file mode 100644 index 0000000..4a82b60 --- /dev/null +++ b/lib/Template/Plugin/JavaScript.pm @@ -0,0 +1,73 @@ +package Template::Plugin::JavaScript; + +use strict; +use vars qw($VERSION); +$VERSION = '0.01'; + +require Template::Plugin; +use base qw(Template::Plugin); + +use vars qw($FILTER_NAME); +$FILTER_NAME = 'js'; + +sub new { + my($self, $context, @args) = @_; + my $name = $args[0] || $FILTER_NAME; + $context->define_filter($name, \&encode_js, 0); + return $self; +} + +sub encode_js { + local $_ = shift; + return '' unless defined $_; + + s!(['"])!\\$1!g; + s!\n!\\n!g; + s!\f!\\f!g; + s!\r!\\r!g; + s!\t!\\t!g; + $_; +} + +1; +__END__ + +=head1 NAME + +Template::Plugin::JavaScript - Encodes text to be safe in JavaScript + +=head1 SYNOPSIS + + [% USE JavaScript %] + <script type="text/javascript"> + document.write("[% sometext | js %]"); + </script> + +=head1 DESCRIPTION + +Template::Plugin::JavaScript is a TT filter that filters text so it +can be safely used in JavaScript quotes. + + [% USE JavaScript %] + document.write("[% FILTER js %] + Here's some text going on. + [% END %]"); + +will become: + + document.write("\nHere\'s some text going on.\n"); + +=head1 AUTHOR + +The original idea comes from Movable Type's C<encode_js> global filter. + +Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt> + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=head1 SEE ALSO + +L<Apache::JavaScript::DocumentWrite> + +=cut diff --git a/lib/Text/Balanced.pm b/lib/Text/Balanced.pm deleted file mode 100644 index aa09c3e..0000000 --- a/lib/Text/Balanced.pm +++ /dev/null @@ -1,2301 +0,0 @@ -# EXTRACT VARIOUSLY DELIMITED TEXT SEQUENCES FROM STRINGS. -# FOR FULL DOCUMENTATION SEE Balanced.pod - -use 5.005; -use strict; - -package Text::Balanced; - -use Exporter; -use SelfLoader; -use vars qw { $VERSION @ISA %EXPORT_TAGS }; - -$VERSION = '1.90'; -@ISA = qw ( Exporter ); - -%EXPORT_TAGS = ( ALL => [ qw( - &extract_delimited - &extract_bracketed - &extract_quotelike - &extract_codeblock - &extract_variable - &extract_tagged - &extract_multiple - - &gen_delimited_pat - &gen_extract_tagged - - &delimited_pat - ) ] ); - -Exporter::export_ok_tags('ALL'); - -# PROTOTYPES - -sub _match_bracketed($$$$$$); -sub _match_variable($$); -sub _match_codeblock($$$$$$$); -sub _match_quotelike($$$$); - -# HANDLE RETURN VALUES IN VARIOUS CONTEXTS - -sub _failmsg { - my ($message, $pos) = @_; - $@ = bless { error=>$message, pos=>$pos }, "Text::Balanced::ErrorMsg"; -} - -sub _fail -{ - my ($wantarray, $textref, $message, $pos) = @_; - _failmsg $message, $pos if $message; - return ("",$$textref,"") if $wantarray; - return undef; -} - -sub _succeed -{ - $@ = undef; - my ($wantarray,$textref) = splice @_, 0, 2; - my ($extrapos, $extralen) = @_>18 ? splice(@_, -2, 2) : (0,0); - my ($startlen) = $_[5]; - my $remainderpos = $_[2]; - if ($wantarray) - { - my @res; - while (my ($from, $len) = splice @_, 0, 2) - { - push @res, substr($$textref,$from,$len); - } - if ($extralen) { # CORRECT FILLET - my $extra = substr($res[0], $extrapos-$startlen, $extralen, "\n"); - $res[1] = "$extra$res[1]"; - eval { substr($$textref,$remainderpos,0) = $extra; - substr($$textref,$extrapos,$extralen,"\n")} ; - #REARRANGE HERE DOC AND FILLET IF POSSIBLE - pos($$textref) = $remainderpos-$extralen+1; # RESET \G - } - else { - pos($$textref) = $remainderpos; # RESET \G - } - return @res; - } - else - { - my $match = substr($$textref,$_[0],$_[1]); - substr($match,$extrapos-$_[0]-$startlen,$extralen,"") if $extralen; - my $extra = $extralen - ? substr($$textref, $extrapos, $extralen)."\n" : ""; - eval {substr($$textref,$_[4],$_[1]+$_[5])=$extra} ; #CHOP OUT PREFIX & MATCH, IF POSSIBLE - pos($$textref) = $_[4]; # RESET \G - return $match; - } -} - -# BUILD A PATTERN MATCHING A SIMPLE DELIMITED STRING - -sub gen_delimited_pat($;$) # ($delimiters;$escapes) -{ - my ($dels, $escs) = @_; - return "" unless $dels =~ /\S/; - $escs = '\\' unless $escs; - $escs .= substr($escs,-1) x (length($dels)-length($escs)); - my @pat = (); - my $i; - for ($i=0; $i<length $dels; $i++) - { - my $del = quotemeta substr($dels,$i,1); - my $esc = quotemeta substr($escs,$i,1); - if ($del eq $esc) - { - push @pat, "$del(?:[^$del]*(?:(?:$del$del)[^$del]*)*)$del"; - } - else - { - push @pat, "$del(?:[^$esc$del]*(?:$esc.[^$esc$del]*)*)$del"; - } - } - my $pat = join '|', @pat; - return "(?:$pat)"; -} - -*delimited_pat = \&gen_delimited_pat; - - -# THE EXTRACTION FUNCTIONS - -sub extract_delimited (;$$$$) -{ - my $textref = defined $_[0] ? \$_[0] : \$_; - my $wantarray = wantarray; - my $del = defined $_[1] ? $_[1] : qq{\'\"\`}; - my $pre = defined $_[2] ? $_[2] : '\s*'; - my $esc = defined $_[3] ? $_[3] : qq{\\}; - my $pat = gen_delimited_pat($del, $esc); - my $startpos = pos $$textref || 0; - return _fail($wantarray, $textref, "Not a delimited pattern", 0) - unless $$textref =~ m/\G($pre)($pat)/gc; - my $prelen = length($1); - my $matchpos = $startpos+$prelen; - my $endpos = pos $$textref; - return _succeed $wantarray, $textref, - $matchpos, $endpos-$matchpos, # MATCH - $endpos, length($$textref)-$endpos, # REMAINDER - $startpos, $prelen; # PREFIX -} - -sub extract_bracketed (;$$$) -{ - my $textref = defined $_[0] ? \$_[0] : \$_; - my $ldel = defined $_[1] ? $_[1] : '{([<'; - my $pre = defined $_[2] ? $_[2] : '\s*'; - my $wantarray = wantarray; - my $qdel = ""; - my $quotelike; - $ldel =~ s/'//g and $qdel .= q{'}; - $ldel =~ s/"//g and $qdel .= q{"}; - $ldel =~ s/`//g and $qdel .= q{`}; - $ldel =~ s/q//g and $quotelike = 1; - $ldel =~ tr/[](){}<>\0-\377/[[(({{<</ds; - my $rdel = $ldel; - unless ($rdel =~ tr/[({</])}>/) - { - return _fail $wantarray, $textref, - "Did not find a suitable bracket in delimiter: \"$_[1]\"", - 0; - } - my $posbug = pos; - $ldel = join('|', map { quotemeta $_ } split('', $ldel)); - $rdel = join('|', map { quotemeta $_ } split('', $rdel)); - pos = $posbug; - - my $startpos = pos $$textref || 0; - my @match = _match_bracketed($textref,$pre, $ldel, $qdel, $quotelike, $rdel); - - return _fail ($wantarray, $textref) unless @match; - - return _succeed ( $wantarray, $textref, - $match[2], $match[5]+2, # MATCH - @match[8,9], # REMAINDER - @match[0,1], # PREFIX - ); -} - -sub _match_bracketed($$$$$$) # $textref, $pre, $ldel, $qdel, $quotelike, $rdel -{ - my ($textref, $pre, $ldel, $qdel, $quotelike, $rdel) = @_; - my ($startpos, $ldelpos, $endpos) = (pos $$textref = pos $$textref||0); - unless ($$textref =~ m/\G$pre/gc) - { - _failmsg "Did not find prefix: /$pre/", $startpos; - return; - } - - $ldelpos = pos $$textref; - - unless ($$textref =~ m/\G($ldel)/gc) - { - _failmsg "Did not find opening bracket after prefix: \"$pre\"", - pos $$textref; - pos $$textref = $startpos; - return; - } - - my @nesting = ( $1 ); - my $textlen = length $$textref; - while (pos $$textref < $textlen) - { - next if $$textref =~ m/\G\\./gcs; - - if ($$textref =~ m/\G($ldel)/gc) - { - push @nesting, $1; - } - elsif ($$textref =~ m/\G($rdel)/gc) - { - my ($found, $brackettype) = ($1, $1); - if ($#nesting < 0) - { - _failmsg "Unmatched closing bracket: \"$found\"", - pos $$textref; - pos $$textref = $startpos; - return; - } - my $expected = pop(@nesting); - $expected =~ tr/({[</)}]>/; - if ($expected ne $brackettype) - { - _failmsg qq{Mismatched closing bracket: expected "$expected" but found "$found"}, - pos $$textref; - pos $$textref = $startpos; - return; - } - last if $#nesting < 0; - } - elsif ($qdel && $$textref =~ m/\G([$qdel])/gc) - { - $$textref =~ m/\G[^\\$1]*(?:\\.[^\\$1]*)*(\Q$1\E)/gsc and next; - _failmsg "Unmatched embedded quote ($1)", - pos $$textref; - pos $$textref = $startpos; - return; - } - elsif ($quotelike && _match_quotelike($textref,"",1,0)) - { - next; - } - - else { $$textref =~ m/\G(?:[a-zA-Z0-9]+|.)/gcs } - } - if ($#nesting>=0) - { - _failmsg "Unmatched opening bracket(s): " - . join("..",@nesting)."..", - pos $$textref; - pos $$textref = $startpos; - return; - } - - $endpos = pos $$textref; - - return ( - $startpos, $ldelpos-$startpos, # PREFIX - $ldelpos, 1, # OPENING BRACKET - $ldelpos+1, $endpos-$ldelpos-2, # CONTENTS - $endpos-1, 1, # CLOSING BRACKET - $endpos, length($$textref)-$endpos, # REMAINDER - ); -} - -sub revbracket($) -{ - my $brack = reverse $_[0]; - $brack =~ tr/[({</])}>/; - return $brack; -} - -my $XMLNAME = q{[a-zA-Z_:][a-zA-Z0-9_:.-]*}; - -sub extract_tagged (;$$$$$) # ($text, $opentag, $closetag, $pre, \%options) -{ - my $textref = defined $_[0] ? \$_[0] : \$_; - my $ldel = $_[1]; - my $rdel = $_[2]; - my $pre = defined $_[3] ? $_[3] : '\s*'; - my %options = defined $_[4] ? %{$_[4]} : (); - my $omode = defined $options{fail} ? $options{fail} : ''; - my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}}) - : defined($options{reject}) ? $options{reject} - : '' - ; - my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}}) - : defined($options{ignore}) ? $options{ignore} - : '' - ; - - if (!defined $ldel) { $ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[^>])*>'; } - $@ = undef; - - my @match = _match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore); - - return _fail(wantarray, $textref) unless @match; - return _succeed wantarray, $textref, - $match[2], $match[3]+$match[5]+$match[7], # MATCH - @match[8..9,0..1,2..7]; # REM, PRE, BITS -} - -sub _match_tagged # ($$$$$$$) -{ - my ($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore) = @_; - my $rdelspec; - - my ($startpos, $opentagpos, $textpos, $parapos, $closetagpos, $endpos) = ( pos($$textref) = pos($$textref)||0 ); - - unless ($$textref =~ m/\G($pre)/gc) - { - _failmsg "Did not find prefix: /$pre/", pos $$textref; - goto failed; - } - - $opentagpos = pos($$textref); - - unless ($$textref =~ m/\G$ldel/gc) - { - _failmsg "Did not find opening tag: /$ldel/", pos $$textref; - goto failed; - } - - $textpos = pos($$textref); - - if (!defined $rdel) - { - $rdelspec = $&; - unless ($rdelspec =~ s/\A([[(<{]+)($XMLNAME).*/ quotemeta "$1\/$2". revbracket($1) /oes) - { - _failmsg "Unable to construct closing tag to match: $rdel", - pos $$textref; - goto failed; - } - } - else - { - $rdelspec = eval "qq{$rdel}" || do { - my $del; - for (qw,~ ! ^ & * ) _ + - = } ] : " ; ' > . ? / | ',) - { next if $rdel =~ /\Q$_/; $del = $_; last } - unless ($del) { - use Carp; - croak "Can't interpolate right delimiter $rdel" - } - eval "qq$del$rdel$del"; - }; - } - - while (pos($$textref) < length($$textref)) - { - next if $$textref =~ m/\G\\./gc; - - if ($$textref =~ m/\G(\n[ \t]*\n)/gc ) - { - $parapos = pos($$textref) - length($1) - unless defined $parapos; - } - elsif ($$textref =~ m/\G($rdelspec)/gc ) - { - $closetagpos = pos($$textref)-length($1); - goto matched; - } - elsif ($ignore && $$textref =~ m/\G(?:$ignore)/gc) - { - next; - } - elsif ($bad && $$textref =~ m/\G($bad)/gcs) - { - pos($$textref) -= length($1); # CUT OFF WHATEVER CAUSED THE SHORTNESS - goto short if ($omode eq 'PARA' || $omode eq 'MAX'); - _failmsg "Found invalid nested tag: $1", pos $$textref; - goto failed; - } - elsif ($$textref =~ m/\G($ldel)/gc) - { - my $tag = $1; - pos($$textref) -= length($tag); # REWIND TO NESTED TAG - unless (_match_tagged(@_)) # MATCH NESTED TAG - { - goto short if $omode eq 'PARA' || $omode eq 'MAX'; - _failmsg "Found unbalanced nested tag: $tag", - pos $$textref; - goto failed; - } - } - else { $$textref =~ m/./gcs } - } - -short: - $closetagpos = pos($$textref); - goto matched if $omode eq 'MAX'; - goto failed unless $omode eq 'PARA'; - - if (defined $parapos) { pos($$textref) = $parapos } - else { $parapos = pos($$textref) } - - return ( - $startpos, $opentagpos-$startpos, # PREFIX - $opentagpos, $textpos-$opentagpos, # OPENING TAG - $textpos, $parapos-$textpos, # TEXT - $parapos, 0, # NO CLOSING TAG - $parapos, length($$textref)-$parapos, # REMAINDER - ); - -matched: - $endpos = pos($$textref); - return ( - $startpos, $opentagpos-$startpos, # PREFIX - $opentagpos, $textpos-$opentagpos, # OPENING TAG - $textpos, $closetagpos-$textpos, # TEXT - $closetagpos, $endpos-$closetagpos, # CLOSING TAG - $endpos, length($$textref)-$endpos, # REMAINDER - ); - -failed: - _failmsg "Did not find closing tag", pos $$textref unless $@; - pos($$textref) = $startpos; - return; -} - -sub extract_variable (;$$) -{ - my $textref = defined $_[0] ? \$_[0] : \$_; - return ("","","") unless defined $$textref; - my $pre = defined $_[1] ? $_[1] : '\s*'; - - my @match = _match_variable($textref,$pre); - - return _fail wantarray, $textref unless @match; - - return _succeed wantarray, $textref, - @match[2..3,4..5,0..1]; # MATCH, REMAINDER, PREFIX -} - -sub _match_variable($$) -{ -# $# -# $^ -# $$ - my ($textref, $pre) = @_; - my $startpos = pos($$textref) = pos($$textref)||0; - unless ($$textref =~ m/\G($pre)/gc) - { - _failmsg "Did not find prefix: /$pre/", pos $$textref; - return; - } - my $varpos = pos($$textref); - unless ($$textref =~ m{\G\$\s*(?!::)(\d+|[][&`'+*./|,";%=~:?!\@<>()-]|\^[a-z]?)}gci) - { - unless ($$textref =~ m/\G((\$#?|[*\@\%]|\\&)+)/gc) - { - _failmsg "Did not find leading dereferencer", pos $$textref; - pos $$textref = $startpos; - return; - } - my $deref = $1; - - unless ($$textref =~ m/\G\s*(?:::|')?(?:[_a-z]\w*(?:::|'))*[_a-z]\w*/gci - or _match_codeblock($textref, "", '\{', '\}', '\{', '\}', 0) - or $deref eq '$#' or $deref eq '$$' ) - { - _failmsg "Bad identifier after dereferencer", pos $$textref; - pos $$textref = $startpos; - return; - } - } - - while (1) - { - next if $$textref =~ m/\G\s*(?:->)?\s*[{]\w+[}]/gc; - next if _match_codeblock($textref, - qr/\s*->\s*(?:[_a-zA-Z]\w+\s*)?/, - qr/[({[]/, qr/[)}\]]/, - qr/[({[]/, qr/[)}\]]/, 0); - next if _match_codeblock($textref, - qr/\s*/, qr/[{[]/, qr/[}\]]/, - qr/[{[]/, qr/[}\]]/, 0); - next if _match_variable($textref,'\s*->\s*'); - next if $$textref =~ m/\G\s*->\s*\w+(?![{([])/gc; - last; - } - - my $endpos = pos($$textref); - return ($startpos, $varpos-$startpos, - $varpos, $endpos-$varpos, - $endpos, length($$textref)-$endpos - ); -} - -sub extract_codeblock (;$$$$$) -{ - my $textref = defined $_[0] ? \$_[0] : \$_; - my $wantarray = wantarray; - my $ldel_inner = defined $_[1] ? $_[1] : '{'; - my $pre = defined $_[2] ? $_[2] : '\s*'; - my $ldel_outer = defined $_[3] ? $_[3] : $ldel_inner; - my $rd = $_[4]; - my $rdel_inner = $ldel_inner; - my $rdel_outer = $ldel_outer; - my $posbug = pos; - for ($ldel_inner, $ldel_outer) { tr/[]()<>{}\0-\377/[[((<<{{/ds } - for ($rdel_inner, $rdel_outer) { tr/[]()<>{}\0-\377/]]))>>}}/ds } - for ($ldel_inner, $ldel_outer, $rdel_inner, $rdel_outer) - { - $_ = '('.join('|',map { quotemeta $_ } split('',$_)).')' - } - pos = $posbug; - - my @match = _match_codeblock($textref, $pre, - $ldel_outer, $rdel_outer, - $ldel_inner, $rdel_inner, - $rd); - return _fail($wantarray, $textref) unless @match; - return _succeed($wantarray, $textref, - @match[2..3,4..5,0..1] # MATCH, REMAINDER, PREFIX - ); - -} - -sub _match_codeblock($$$$$$$) -{ - my ($textref, $pre, $ldel_outer, $rdel_outer, $ldel_inner, $rdel_inner, $rd) = @_; - my $startpos = pos($$textref) = pos($$textref) || 0; - unless ($$textref =~ m/\G($pre)/gc) - { - _failmsg qq{Did not match prefix /$pre/ at"} . - substr($$textref,pos($$textref),20) . - q{..."}, - pos $$textref; - return; - } - my $codepos = pos($$textref); - unless ($$textref =~ m/\G($ldel_outer)/gc) # OUTERMOST DELIMITER - { - _failmsg qq{Did not find expected opening bracket at "} . - substr($$textref,pos($$textref),20) . - q{..."}, - pos $$textref; - pos $$textref = $startpos; - return; - } - my $closing = $1; - $closing =~ tr/([<{/)]>}/; - my $matched; - my $patvalid = 1; - while (pos($$textref) < length($$textref)) - { - $matched = ''; - if ($rd && $$textref =~ m#\G(\Q(?)\E|\Q(s?)\E|\Q(s)\E)#gc) - { - $patvalid = 0; - next; - } - - if ($$textref =~ m/\G\s*#.*/gc) - { - next; - } - - if ($$textref =~ m/\G\s*($rdel_outer)/gc) - { - unless ($matched = ($closing && $1 eq $closing) ) - { - next if $1 eq '>'; # MIGHT BE A "LESS THAN" - _failmsg q{Mismatched closing bracket at "} . - substr($$textref,pos($$textref),20) . - qq{...". Expected '$closing'}, - pos $$textref; - } - last; - } - - if (_match_variable($textref,'\s*') || - _match_quotelike($textref,'\s*',$patvalid,$patvalid) ) - { - $patvalid = 0; - next; - } - - - # NEED TO COVER MANY MORE CASES HERE!!! - if ($$textref =~ m#\G\s*( [-+*x/%^&|.]=? - | [!=]~ - | =(?!>) - | (\*\*|&&|\|\||<<|>>)=? - | split|grep|map|return - | [([] - )#gcx) - { - $patvalid = 1; - next; - } - - if ( _match_codeblock($textref, '\s*', $ldel_inner, $rdel_inner, $ldel_inner, $rdel_inner, $rd) ) - { - $patvalid = 1; - next; - } - - if ($$textref =~ m/\G\s*$ldel_outer/gc) - { - _failmsg q{Improperly nested codeblock at "} . - substr($$textref,pos($$textref),20) . - q{..."}, - pos $$textref; - last; - } - - $patvalid = 0; - $$textref =~ m/\G\s*(\w+|[-=>]>|.|\Z)/gc; - } - continue { $@ = undef } - - unless ($matched) - { - _failmsg 'No match found for opening bracket', pos $$textref - unless $@; - return; - } - - my $endpos = pos($$textref); - return ( $startpos, $codepos-$startpos, - $codepos, $endpos-$codepos, - $endpos, length($$textref)-$endpos, - ); -} - - -my %mods = ( - 'none' => '[cgimsox]*', - 'm' => '[cgimsox]*', - 's' => '[cegimsox]*', - 'tr' => '[cds]*', - 'y' => '[cds]*', - 'qq' => '', - 'qx' => '', - 'qw' => '', - 'qr' => '[imsx]*', - 'q' => '', - ); - -sub extract_quotelike (;$$) -{ - my $textref = $_[0] ? \$_[0] : \$_; - my $wantarray = wantarray; - my $pre = defined $_[1] ? $_[1] : '\s*'; - - my @match = _match_quotelike($textref,$pre,1,0); - return _fail($wantarray, $textref) unless @match; - return _succeed($wantarray, $textref, - $match[2], $match[18]-$match[2], # MATCH - @match[18,19], # REMAINDER - @match[0,1], # PREFIX - @match[2..17], # THE BITS - @match[20,21], # ANY FILLET? - ); -}; - -sub _match_quotelike($$$$) # ($textref, $prepat, $allow_raw_match) -{ - my ($textref, $pre, $rawmatch, $qmark) = @_; - - my ($textlen,$startpos, - $oppos, - $preld1pos,$ld1pos,$str1pos,$rd1pos, - $preld2pos,$ld2pos,$str2pos,$rd2pos, - $modpos) = ( length($$textref), pos($$textref) = pos($$textref) || 0 ); - - unless ($$textref =~ m/\G($pre)/gc) - { - _failmsg qq{Did not find prefix /$pre/ at "} . - substr($$textref, pos($$textref), 20) . - q{..."}, - pos $$textref; - return; - } - $oppos = pos($$textref); - - my $initial = substr($$textref,$oppos,1); - - if ($initial && $initial =~ m|^[\"\'\`]| - || $rawmatch && $initial =~ m|^/| - || $qmark && $initial =~ m|^\?|) - { - unless ($$textref =~ m/ \Q$initial\E [^\\$initial]* (\\.[^\\$initial]*)* \Q$initial\E /gcsx) - { - _failmsg qq{Did not find closing delimiter to match '$initial' at "} . - substr($$textref, $oppos, 20) . - q{..."}, - pos $$textref; - pos $$textref = $startpos; - return; - } - $modpos= pos($$textref); - $rd1pos = $modpos-1; - - if ($initial eq '/' || $initial eq '?') - { - $$textref =~ m/\G$mods{none}/gc - } - - my $endpos = pos($$textref); - return ( - $startpos, $oppos-$startpos, # PREFIX - $oppos, 0, # NO OPERATOR - $oppos, 1, # LEFT DEL - $oppos+1, $rd1pos-$oppos-1, # STR/PAT - $rd1pos, 1, # RIGHT DEL - $modpos, 0, # NO 2ND LDEL - $modpos, 0, # NO 2ND STR - $modpos, 0, # NO 2ND RDEL - $modpos, $endpos-$modpos, # MODIFIERS - $endpos, $textlen-$endpos, # REMAINDER - ); - } - - unless ($$textref =~ m{\G((?:m|s|qq|qx|qw|q|qr|tr|y)\b(?=\s*\S)|<<)}gc) - { - _failmsg q{No quotelike operator found after prefix at "} . - substr($$textref, pos($$textref), 20) . - q{..."}, - pos $$textref; - pos $$textref = $startpos; - return; - } - - my $op = $1; - $preld1pos = pos($$textref); - if ($op eq '<<') { - $ld1pos = pos($$textref); - my $label; - if ($$textref =~ m{\G([A-Za-z_]\w*)}gc) { - $label = $1; - } - elsif ($$textref =~ m{ \G ' ([^'\\]* (?:\\.[^'\\]*)*) ' - | \G " ([^"\\]* (?:\\.[^"\\]*)*) " - | \G ` ([^`\\]* (?:\\.[^`\\]*)*) ` - }gcsx) { - $label = $+; - } - else { - $label = ""; - } - my $extrapos = pos($$textref); - $$textref =~ m{.*\n}gc; - $str1pos = pos($$textref); - unless ($$textref =~ m{.*?\n(?=$label\n)}gc) { - _failmsg qq{Missing here doc terminator ('$label') after "} . - substr($$textref, $startpos, 20) . - q{..."}, - pos $$textref; - pos $$textref = $startpos; - return; - } - $rd1pos = pos($$textref); - $$textref =~ m{$label\n}gc; - $ld2pos = pos($$textref); - return ( - $startpos, $oppos-$startpos, # PREFIX - $oppos, length($op), # OPERATOR - $ld1pos, $extrapos-$ld1pos, # LEFT DEL - $str1pos, $rd1pos-$str1pos, # STR/PAT - $rd1pos, $ld2pos-$rd1pos, # RIGHT DEL - $ld2pos, 0, # NO 2ND LDEL - $ld2pos, 0, # NO 2ND STR - $ld2pos, 0, # NO 2ND RDEL - $ld2pos, 0, # NO MODIFIERS - $ld2pos, $textlen-$ld2pos, # REMAINDER - $extrapos, $str1pos-$extrapos, # FILLETED BIT - ); - } - - $$textref =~ m/\G\s*/gc; - $ld1pos = pos($$textref); - $str1pos = $ld1pos+1; - - unless ($$textref =~ m/\G(\S)/gc) # SHOULD USE LOOKAHEAD - { - _failmsg "No block delimiter found after quotelike $op", - pos $$textref; - pos $$textref = $startpos; - return; - } - pos($$textref) = $ld1pos; # HAVE TO DO THIS BECAUSE LOOKAHEAD BROKEN - my ($ldel1, $rdel1) = ("\Q$1","\Q$1"); - if ($ldel1 =~ /[[(<{]/) - { - $rdel1 =~ tr/[({</])}>/; - _match_bracketed($textref,"",$ldel1,"","",$rdel1) - || do { pos $$textref = $startpos; return }; - } - else - { - $$textref =~ /$ldel1[^\\$ldel1]*(\\.[^\\$ldel1]*)*$ldel1/gcs - || do { pos $$textref = $startpos; return }; - } - $ld2pos = $rd1pos = pos($$textref)-1; - - my $second_arg = $op =~ /s|tr|y/ ? 1 : 0; - if ($second_arg) - { - my ($ldel2, $rdel2); - if ($ldel1 =~ /[[(<{]/) - { - unless ($$textref =~ /\G\s*(\S)/gc) # SHOULD USE LOOKAHEAD - { - _failmsg "Missing second block for quotelike $op", - pos $$textref; - pos $$textref = $startpos; - return; - } - $ldel2 = $rdel2 = "\Q$1"; - $rdel2 =~ tr/[({</])}>/; - } - else - { - $ldel2 = $rdel2 = $ldel1; - } - $str2pos = $ld2pos+1; - - if ($ldel2 =~ /[[(<{]/) - { - pos($$textref)--; # OVERCOME BROKEN LOOKAHEAD - _match_bracketed($textref,"",$ldel2,"","",$rdel2) - || do { pos $$textref = $startpos; return }; - } - else - { - $$textref =~ /[^\\$ldel2]*(\\.[^\\$ldel2]*)*$ldel2/gcs - || do { pos $$textref = $startpos; return }; - } - $rd2pos = pos($$textref)-1; - } - else - { - $ld2pos = $str2pos = $rd2pos = $rd1pos; - } - - $modpos = pos $$textref; - - $$textref =~ m/\G($mods{$op})/gc; - my $endpos = pos $$textref; - - return ( - $startpos, $oppos-$startpos, # PREFIX - $oppos, length($op), # OPERATOR - $ld1pos, 1, # LEFT DEL - $str1pos, $rd1pos-$str1pos, # STR/PAT - $rd1pos, 1, # RIGHT DEL - $ld2pos, $second_arg, # 2ND LDEL (MAYBE) - $str2pos, $rd2pos-$str2pos, # 2ND STR (MAYBE) - $rd2pos, $second_arg, # 2ND RDEL (MAYBE) - $modpos, $endpos-$modpos, # MODIFIERS - $endpos, $textlen-$endpos, # REMAINDER - ); -} - -my $def_func = -[ - sub { extract_variable($_[0], '') }, - sub { extract_quotelike($_[0],'') }, - sub { extract_codeblock($_[0],'{}','') }, -]; - -sub extract_multiple (;$$$$) # ($text, $functions_ref, $max_fields, $ignoreunknown) -{ - my $textref = defined($_[0]) ? \$_[0] : \$_; - my $posbug = pos; - my ($lastpos, $firstpos); - my @fields = (); - - #for ($$textref) - { - my @func = defined $_[1] ? @{$_[1]} : @{$def_func}; - my $max = defined $_[2] && $_[2]>0 ? $_[2] : 1_000_000_000; - my $igunk = $_[3]; - - pos $$textref ||= 0; - - unless (wantarray) - { - use Carp; - carp "extract_multiple reset maximal count to 1 in scalar context" - if $^W && defined($_[2]) && $max > 1; - $max = 1 - } - - my $unkpos; - my $func; - my $class; - - my @class; - foreach $func ( @func ) - { - if (ref($func) eq 'HASH') - { - push @class, (keys %$func)[0]; - $func = (values %$func)[0]; - } - else - { - push @class, undef; - } - } - - FIELD: while (pos($$textref) < length($$textref)) - { - my ($field, $rem); - my @bits; - foreach my $i ( 0..$#func ) - { - my $pref; - $func = $func[$i]; - $class = $class[$i]; - $lastpos = pos $$textref; - if (ref($func) eq 'CODE') - { ($field,$rem,$pref) = @bits = $func->($$textref); - # print "[$field|$rem]" if $field; - } - elsif (ref($func) eq 'Text::Balanced::Extractor') - { @bits = $field = $func->extract($$textref) } - elsif( $$textref =~ m/\G$func/gc ) - { @bits = $field = defined($1) ? $1 : $& } - $pref ||= ""; - if (defined($field) && length($field)) - { - if (!$igunk) { - $unkpos = pos $$textref - if length($pref) && !defined($unkpos); - if (defined $unkpos) - { - push @fields, substr($$textref, $unkpos, $lastpos-$unkpos).$pref; - $firstpos = $unkpos unless defined $firstpos; - undef $unkpos; - last FIELD if @fields == $max; - } - } - push @fields, $class - ? bless (\$field, $class) - : $field; - $firstpos = $lastpos unless defined $firstpos; - $lastpos = pos $$textref; - last FIELD if @fields == $max; - next FIELD; - } - } - if ($$textref =~ /\G(.)/gcs) - { - $unkpos = pos($$textref)-1 - unless $igunk || defined $unkpos; - } - } - - if (defined $unkpos) - { - push @fields, substr($$textref, $unkpos); - $firstpos = $unkpos unless defined $firstpos; - $lastpos = length $$textref; - } - last; - } - - pos $$textref = $lastpos; - return @fields if wantarray; - - $firstpos ||= 0; - eval { substr($$textref,$firstpos,$lastpos-$firstpos)=""; - pos $$textref = $firstpos }; - return $fields[0]; -} - - -sub gen_extract_tagged # ($opentag, $closetag, $pre, \%options) -{ - my $ldel = $_[0]; - my $rdel = $_[1]; - my $pre = defined $_[2] ? $_[2] : '\s*'; - my %options = defined $_[3] ? %{$_[3]} : (); - my $omode = defined $options{fail} ? $options{fail} : ''; - my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}}) - : defined($options{reject}) ? $options{reject} - : '' - ; - my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}}) - : defined($options{ignore}) ? $options{ignore} - : '' - ; - - if (!defined $ldel) { $ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[^>])*>'; } - - my $posbug = pos; - for ($ldel, $pre, $bad, $ignore) { $_ = qr/$_/ if $_ } - pos = $posbug; - - my $closure = sub - { - my $textref = defined $_[0] ? \$_[0] : \$_; - my @match = Text::Balanced::_match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore); - - return _fail(wantarray, $textref) unless @match; - return _succeed wantarray, $textref, - $match[2], $match[3]+$match[5]+$match[7], # MATCH - @match[8..9,0..1,2..7]; # REM, PRE, BITS - }; - - bless $closure, 'Text::Balanced::Extractor'; -} - -package Text::Balanced::Extractor; - -sub extract($$) # ($self, $text) -{ - &{$_[0]}($_[1]); -} - -package Text::Balanced::ErrorMsg; - -use overload '""' => sub { "$_[0]->{error}, detected at offset $_[0]->{pos}" }; - -1; - -__END__ - -=head1 NAME - -Text::Balanced - Extract delimited text sequences from strings. - - -=head1 SYNOPSIS - - use Text::Balanced qw ( - extract_delimited - extract_bracketed - extract_quotelike - extract_codeblock - extract_variable - extract_tagged - extract_multiple - - gen_delimited_pat - gen_extract_tagged - ); - - # Extract the initial substring of $text that is delimited by - # two (unescaped) instances of the first character in $delim. - - ($extracted, $remainder) = extract_delimited($text,$delim); - - - # Extract the initial substring of $text that is bracketed - # with a delimiter(s) specified by $delim (where the string - # in $delim contains one or more of '(){}[]<>'). - - ($extracted, $remainder) = extract_bracketed($text,$delim); - - - # Extract the initial substring of $text that is bounded by - # an XML tag. - - ($extracted, $remainder) = extract_tagged($text); - - - # Extract the initial substring of $text that is bounded by - # a C<BEGIN>...C<END> pair. Don't allow nested C<BEGIN> tags - - ($extracted, $remainder) = - extract_tagged($text,"BEGIN","END",undef,{bad=>["BEGIN"]}); - - - # Extract the initial substring of $text that represents a - # Perl "quote or quote-like operation" - - ($extracted, $remainder) = extract_quotelike($text); - - - # Extract the initial substring of $text that represents a block - # of Perl code, bracketed by any of character(s) specified by $delim - # (where the string $delim contains one or more of '(){}[]<>'). - - ($extracted, $remainder) = extract_codeblock($text,$delim); - - - # Extract the initial substrings of $text that would be extracted by - # one or more sequential applications of the specified functions - # or regular expressions - - @extracted = extract_multiple($text, - [ \&extract_bracketed, - \&extract_quotelike, - \&some_other_extractor_sub, - qr/[xyz]*/, - 'literal', - ]); - -# Create a string representing an optimized pattern (a la Friedl) -# that matches a substring delimited by any of the specified characters -# (in this case: any type of quote or a slash) - - $patstring = gen_delimited_pat(q{'"`/}); - - -# Generate a reference to an anonymous sub that is just like extract_tagged -# but pre-compiled and optimized for a specific pair of tags, and consequently -# much faster (i.e. 3 times faster). It uses qr// for better performance on -# repeated calls, so it only works under Perl 5.005 or later. - - $extract_head = gen_extract_tagged('<HEAD>','</HEAD>'); - - ($extracted, $remainder) = $extract_head->($text); - - -=head1 DESCRIPTION - -The various C<extract_...> subroutines may be used to -extract a delimited substring, possibly after skipping a -specified prefix string. By default, that prefix is -optional whitespace (C</\s*/>), but you can change it to whatever -you wish (see below). - -The substring to be extracted must appear at the -current C<pos> location of the string's variable -(or at index zero, if no C<pos> position is defined). -In other words, the C<extract_...> subroutines I<don't> -extract the first occurance of a substring anywhere -in a string (like an unanchored regex would). Rather, -they extract an occurance of the substring appearing -immediately at the current matching position in the -string (like a C<\G>-anchored regex would). - - - -=head2 General behaviour in list contexts - -In a list context, all the subroutines return a list, the first three -elements of which are always: - -=over 4 - -=item [0] - -The extracted string, including the specified delimiters. -If the extraction fails an empty string is returned. - -=item [1] - -The remainder of the input string (i.e. the characters after the -extracted string). On failure, the entire string is returned. - -=item [2] - -The skipped prefix (i.e. the characters before the extracted string). -On failure, the empty string is returned. - -=back - -Note that in a list context, the contents of the original input text (the first -argument) are not modified in any way. - -However, if the input text was passed in a variable, that variable's -C<pos> value is updated to point at the first character after the -extracted text. That means that in a list context the various -subroutines can be used much like regular expressions. For example: - - while ( $next = (extract_quotelike($text))[0] ) - { - # process next quote-like (in $next) - } - - -=head2 General behaviour in scalar and void contexts - -In a scalar context, the extracted string is returned, having first been -removed from the input text. Thus, the following code also processes -each quote-like operation, but actually removes them from $text: - - while ( $next = extract_quotelike($text) ) - { - # process next quote-like (in $next) - } - -Note that if the input text is a read-only string (i.e. a literal), -no attempt is made to remove the extracted text. - -In a void context the behaviour of the extraction subroutines is -exactly the same as in a scalar context, except (of course) that the -extracted substring is not returned. - -=head2 A note about prefixes - -Prefix patterns are matched without any trailing modifiers (C</gimsox> etc.) -This can bite you if you're expecting a prefix specification like -'.*?(?=<H1>)' to skip everything up to the first <H1> tag. Such a prefix -pattern will only succeed if the <H1> tag is on the current line, since -. normally doesn't match newlines. - -To overcome this limitation, you need to turn on /s matching within -the prefix pattern, using the C<(?s)> directive: '(?s).*?(?=<H1>)' - - -=head2 C<extract_delimited> - -The C<extract_delimited> function formalizes the common idiom -of extracting a single-character-delimited substring from the start of -a string. For example, to extract a single-quote delimited string, the -following code is typically used: - - ($remainder = $text) =~ s/\A('(\\.|[^'])*')//s; - $extracted = $1; - -but with C<extract_delimited> it can be simplified to: - - ($extracted,$remainder) = extract_delimited($text, "'"); - -C<extract_delimited> takes up to four scalars (the input text, the -delimiters, a prefix pattern to be skipped, and any escape characters) -and extracts the initial substring of the text that -is appropriately delimited. If the delimiter string has multiple -characters, the first one encountered in the text is taken to delimit -the substring. -The third argument specifies a prefix pattern that is to be skipped -(but must be present!) before the substring is extracted. -The final argument specifies the escape character to be used for each -delimiter. - -All arguments are optional. If the escape characters are not specified, -every delimiter is escaped with a backslash (C<\>). -If the prefix is not specified, the -pattern C<'\s*'> - optional whitespace - is used. If the delimiter set -is also not specified, the set C</["'`]/> is used. If the text to be processed -is not specified either, C<$_> is used. - -In list context, C<extract_delimited> returns a array of three -elements, the extracted substring (I<including the surrounding -delimiters>), the remainder of the text, and the skipped prefix (if -any). If a suitable delimited substring is not found, the first -element of the array is the empty string, the second is the complete -original text, and the prefix returned in the third element is an -empty string. - -In a scalar context, just the extracted substring is returned. In -a void context, the extracted substring (and any prefix) are simply -removed from the beginning of the first argument. - -Examples: - - # Remove a single-quoted substring from the very beginning of $text: - - $substring = extract_delimited($text, "'", ''); - - # Remove a single-quoted Pascalish substring (i.e. one in which - # doubling the quote character escapes it) from the very - # beginning of $text: - - $substring = extract_delimited($text, "'", '', "'"); - - # Extract a single- or double- quoted substring from the - # beginning of $text, optionally after some whitespace - # (note the list context to protect $text from modification): - - ($substring) = extract_delimited $text, q{"'}; - - - # Delete the substring delimited by the first '/' in $text: - - $text = join '', (extract_delimited($text,'/','[^/]*')[2,1]; - -Note that this last example is I<not> the same as deleting the first -quote-like pattern. For instance, if C<$text> contained the string: - - "if ('./cmd' =~ m/$UNIXCMD/s) { $cmd = $1; }" - -then after the deletion it would contain: - - "if ('.$UNIXCMD/s) { $cmd = $1; }" - -not: - - "if ('./cmd' =~ ms) { $cmd = $1; }" - - -See L<"extract_quotelike"> for a (partial) solution to this problem. - - -=head2 C<extract_bracketed> - -Like C<"extract_delimited">, the C<extract_bracketed> function takes -up to three optional scalar arguments: a string to extract from, a delimiter -specifier, and a prefix pattern. As before, a missing prefix defaults to -optional whitespace and a missing text defaults to C<$_>. However, a missing -delimiter specifier defaults to C<'{}()[]E<lt>E<gt>'> (see below). - -C<extract_bracketed> extracts a balanced-bracket-delimited -substring (using any one (or more) of the user-specified delimiter -brackets: '(..)', '{..}', '[..]', or '<..>'). Optionally it will also -respect quoted unbalanced brackets (see below). - -A "delimiter bracket" is a bracket in list of delimiters passed as -C<extract_bracketed>'s second argument. Delimiter brackets are -specified by giving either the left or right (or both!) versions -of the required bracket(s). Note that the order in which -two or more delimiter brackets are specified is not significant. - -A "balanced-bracket-delimited substring" is a substring bounded by -matched brackets, such that any other (left or right) delimiter -bracket I<within> the substring is also matched by an opposite -(right or left) delimiter bracket I<at the same level of nesting>. Any -type of bracket not in the delimiter list is treated as an ordinary -character. - -In other words, each type of bracket specified as a delimiter must be -balanced and correctly nested within the substring, and any other kind of -("non-delimiter") bracket in the substring is ignored. - -For example, given the string: - - $text = "{ an '[irregularly :-(] {} parenthesized >:-)' string }"; - -then a call to C<extract_bracketed> in a list context: - - @result = extract_bracketed( $text, '{}' ); - -would return: - - ( "{ an '[irregularly :-(] {} parenthesized >:-)' string }" , "" , "" ) - -since both sets of C<'{..}'> brackets are properly nested and evenly balanced. -(In a scalar context just the first element of the array would be returned. In -a void context, C<$text> would be replaced by an empty string.) - -Likewise the call in: - - @result = extract_bracketed( $text, '{[' ); - -would return the same result, since all sets of both types of specified -delimiter brackets are correctly nested and balanced. - -However, the call in: - - @result = extract_bracketed( $text, '{([<' ); - -would fail, returning: - - ( undef , "{ an '[irregularly :-(] {} parenthesized >:-)' string }" ); - -because the embedded pairs of C<'(..)'>s and C<'[..]'>s are "cross-nested" and -the embedded C<'E<gt>'> is unbalanced. (In a scalar context, this call would -return an empty string. In a void context, C<$text> would be unchanged.) - -Note that the embedded single-quotes in the string don't help in this -case, since they have not been specified as acceptable delimiters and are -therefore treated as non-delimiter characters (and ignored). - -However, if a particular species of quote character is included in the -delimiter specification, then that type of quote will be correctly handled. -for example, if C<$text> is: - - $text = '<A HREF=">>>>">link</A>'; - -then - - @result = extract_bracketed( $text, '<">' ); - -returns: - - ( '<A HREF=">>>>">', 'link</A>', "" ) - -as expected. Without the specification of C<"> as an embedded quoter: - - @result = extract_bracketed( $text, '<>' ); - -the result would be: - - ( '<A HREF=">', '>>>">link</A>', "" ) - -In addition to the quote delimiters C<'>, C<">, and C<`>, full Perl quote-like -quoting (i.e. q{string}, qq{string}, etc) can be specified by including the -letter 'q' as a delimiter. Hence: - - @result = extract_bracketed( $text, '<q>' ); - -would correctly match something like this: - - $text = '<leftop: conj /and/ conj>'; - -See also: C<"extract_quotelike"> and C<"extract_codeblock">. - - -=head2 C<extract_variable> - -C<extract_variable> extracts any valid Perl variable or -variable-involved expression, including scalars, arrays, hashes, array -accesses, hash look-ups, method calls through objects, subroutine calles -through subroutine references, etc. - -The subroutine takes up to two optional arguments: - -=over 4 - -=item 1. - -A string to be processed (C<$_> if the string is omitted or C<undef>) - -=item 2. - -A string specifying a pattern to be matched as a prefix (which is to be -skipped). If omitted, optional whitespace is skipped. - -=back - -On success in a list context, an array of 3 elements is returned. The -elements are: - -=over 4 - -=item [0] - -the extracted variable, or variablish expression - -=item [1] - -the remainder of the input text, - -=item [2] - -the prefix substring (if any), - -=back - -On failure, all of these values (except the remaining text) are C<undef>. - -In a scalar context, C<extract_variable> returns just the complete -substring that matched a variablish expression. C<undef> is returned on -failure. In addition, the original input text has the returned substring -(and any prefix) removed from it. - -In a void context, the input text just has the matched substring (and -any specified prefix) removed. - - -=head2 C<extract_tagged> - -C<extract_tagged> extracts and segments text between (balanced) -specified tags. - -The subroutine takes up to five optional arguments: - -=over 4 - -=item 1. - -A string to be processed (C<$_> if the string is omitted or C<undef>) - -=item 2. - -A string specifying a pattern to be matched as the opening tag. -If the pattern string is omitted (or C<undef>) then a pattern -that matches any standard XML tag is used. - -=item 3. - -A string specifying a pattern to be matched at the closing tag. -If the pattern string is omitted (or C<undef>) then the closing -tag is constructed by inserting a C</> after any leading bracket -characters in the actual opening tag that was matched (I<not> the pattern -that matched the tag). For example, if the opening tag pattern -is specified as C<'{{\w+}}'> and actually matched the opening tag -C<"{{DATA}}">, then the constructed closing tag would be C<"{{/DATA}}">. - -=item 4. - -A string specifying a pattern to be matched as a prefix (which is to be -skipped). If omitted, optional whitespace is skipped. - -=item 5. - -A hash reference containing various parsing options (see below) - -=back - -The various options that can be specified are: - -=over 4 - -=item C<reject =E<gt> $listref> - -The list reference contains one or more strings specifying patterns -that must I<not> appear within the tagged text. - -For example, to extract -an HTML link (which should not contain nested links) use: - - extract_tagged($text, '<A>', '</A>', undef, {reject => ['<A>']} ); - -=item C<ignore =E<gt> $listref> - -The list reference contains one or more strings specifying patterns -that are I<not> be be treated as nested tags within the tagged text -(even if they would match the start tag pattern). - -For example, to extract an arbitrary XML tag, but ignore "empty" elements: - - extract_tagged($text, undef, undef, undef, {ignore => ['<[^>]*/>']} ); - -(also see L<"gen_delimited_pat"> below). - - -=item C<fail =E<gt> $str> - -The C<fail> option indicates the action to be taken if a matching end -tag is not encountered (i.e. before the end of the string or some -C<reject> pattern matches). By default, a failure to match a closing -tag causes C<extract_tagged> to immediately fail. - -However, if the string value associated with <reject> is "MAX", then -C<extract_tagged> returns the complete text up to the point of failure. -If the string is "PARA", C<extract_tagged> returns only the first paragraph -after the tag (up to the first line that is either empty or contains -only whitespace characters). -If the string is "", the the default behaviour (i.e. failure) is reinstated. - -For example, suppose the start tag "/para" introduces a paragraph, which then -continues until the next "/endpara" tag or until another "/para" tag is -encountered: - - $text = "/para line 1\n\nline 3\n/para line 4"; - - extract_tagged($text, '/para', '/endpara', undef, - {reject => '/para', fail => MAX ); - - # EXTRACTED: "/para line 1\n\nline 3\n" - -Suppose instead, that if no matching "/endpara" tag is found, the "/para" -tag refers only to the immediately following paragraph: - - $text = "/para line 1\n\nline 3\n/para line 4"; - - extract_tagged($text, '/para', '/endpara', undef, - {reject => '/para', fail => MAX ); - - # EXTRACTED: "/para line 1\n" - -Note that the specified C<fail> behaviour applies to nested tags as well. - -=back - -On success in a list context, an array of 6 elements is returned. The elements are: - -=over 4 - -=item [0] - -the extracted tagged substring (including the outermost tags), - -=item [1] - -the remainder of the input text, - -=item [2] - -the prefix substring (if any), - -=item [3] - -the opening tag - -=item [4] - -the text between the opening and closing tags - -=item [5] - -the closing tag (or "" if no closing tag was found) - -=back - -On failure, all of these values (except the remaining text) are C<undef>. - -In a scalar context, C<extract_tagged> returns just the complete -substring that matched a tagged text (including the start and end -tags). C<undef> is returned on failure. In addition, the original input -text has the returned substring (and any prefix) removed from it. - -In a void context, the input text just has the matched substring (and -any specified prefix) removed. - - -=head2 C<gen_extract_tagged> - -(Note: This subroutine is only available under Perl5.005) - -C<gen_extract_tagged> generates a new anonymous subroutine which -extracts text between (balanced) specified tags. In other words, -it generates a function identical in function to C<extract_tagged>. - -The difference between C<extract_tagged> and the anonymous -subroutines generated by -C<gen_extract_tagged>, is that those generated subroutines: - -=over 4 - -=item * - -do not have to reparse tag specification or parsing options every time -they are called (whereas C<extract_tagged> has to effectively rebuild -its tag parser on every call); - -=item * - -make use of the new qr// construct to pre-compile the regexes they use -(whereas C<extract_tagged> uses standard string variable interpolation -to create tag-matching patterns). - -=back - -The subroutine takes up to four optional arguments (the same set as -C<extract_tagged> except for the string to be processed). It returns -a reference to a subroutine which in turn takes a single argument (the text to -be extracted from). - -In other words, the implementation of C<extract_tagged> is exactly -equivalent to: - - sub extract_tagged - { - my $text = shift; - $extractor = gen_extract_tagged(@_); - return $extractor->($text); - } - -(although C<extract_tagged> is not currently implemented that way, in order -to preserve pre-5.005 compatibility). - -Using C<gen_extract_tagged> to create extraction functions for specific tags -is a good idea if those functions are going to be called more than once, since -their performance is typically twice as good as the more general-purpose -C<extract_tagged>. - - -=head2 C<extract_quotelike> - -C<extract_quotelike> attempts to recognize, extract, and segment any -one of the various Perl quotes and quotelike operators (see -L<perlop(3)>) Nested backslashed delimiters, embedded balanced bracket -delimiters (for the quotelike operators), and trailing modifiers are -all caught. For example, in: - - extract_quotelike 'q # an octothorpe: \# (not the end of the q!) #' - - extract_quotelike ' "You said, \"Use sed\"." ' - - extract_quotelike ' s{([A-Z]{1,8}\.[A-Z]{3})} /\L$1\E/; ' - - extract_quotelike ' tr/\\\/\\\\/\\\//ds; ' - -the full Perl quotelike operations are all extracted correctly. - -Note too that, when using the /x modifier on a regex, any comment -containing the current pattern delimiter will cause the regex to be -immediately terminated. In other words: - - 'm / - (?i) # CASE INSENSITIVE - [a-z_] # LEADING ALPHABETIC/UNDERSCORE - [a-z0-9]* # FOLLOWED BY ANY NUMBER OF ALPHANUMERICS - /x' - -will be extracted as if it were: - - 'm / - (?i) # CASE INSENSITIVE - [a-z_] # LEADING ALPHABETIC/' - -This behaviour is identical to that of the actual compiler. - -C<extract_quotelike> takes two arguments: the text to be processed and -a prefix to be matched at the very beginning of the text. If no prefix -is specified, optional whitespace is the default. If no text is given, -C<$_> is used. - -In a list context, an array of 11 elements is returned. The elements are: - -=over 4 - -=item [0] - -the extracted quotelike substring (including trailing modifiers), - -=item [1] - -the remainder of the input text, - -=item [2] - -the prefix substring (if any), - -=item [3] - -the name of the quotelike operator (if any), - -=item [4] - -the left delimiter of the first block of the operation, - -=item [5] - -the text of the first block of the operation -(that is, the contents of -a quote, the regex of a match or substitution or the target list of a -translation), - -=item [6] - -the right delimiter of the first block of the operation, - -=item [7] - -the left delimiter of the second block of the operation -(that is, if it is a C<s>, C<tr>, or C<y>), - -=item [8] - -the text of the second block of the operation -(that is, the replacement of a substitution or the translation list -of a translation), - -=item [9] - -the right delimiter of the second block of the operation (if any), - -=item [10] - -the trailing modifiers on the operation (if any). - -=back - -For each of the fields marked "(if any)" the default value on success is -an empty string. -On failure, all of these values (except the remaining text) are C<undef>. - - -In a scalar context, C<extract_quotelike> returns just the complete substring -that matched a quotelike operation (or C<undef> on failure). In a scalar or -void context, the input text has the same substring (and any specified -prefix) removed. - -Examples: - - # Remove the first quotelike literal that appears in text - - $quotelike = extract_quotelike($text,'.*?'); - - # Replace one or more leading whitespace-separated quotelike - # literals in $_ with "<QLL>" - - do { $_ = join '<QLL>', (extract_quotelike)[2,1] } until $@; - - - # Isolate the search pattern in a quotelike operation from $text - - ($op,$pat) = (extract_quotelike $text)[3,5]; - if ($op =~ /[ms]/) - { - print "search pattern: $pat\n"; - } - else - { - print "$op is not a pattern matching operation\n"; - } - - -=head2 C<extract_quotelike> and "here documents" - -C<extract_quotelike> can successfully extract "here documents" from an input -string, but with an important caveat in list contexts. - -Unlike other types of quote-like literals, a here document is rarely -a contiguous substring. For example, a typical piece of code using -here document might look like this: - - <<'EOMSG' || die; - This is the message. - EOMSG - exit; - -Given this as an input string in a scalar context, C<extract_quotelike> -would correctly return the string "<<'EOMSG'\nThis is the message.\nEOMSG", -leaving the string " || die;\nexit;" in the original variable. In other words, -the two separate pieces of the here document are successfully extracted and -concatenated. - -In a list context, C<extract_quotelike> would return the list - -=over 4 - -=item [0] - -"<<'EOMSG'\nThis is the message.\nEOMSG\n" (i.e. the full extracted here document, -including fore and aft delimiters), - -=item [1] - -" || die;\nexit;" (i.e. the remainder of the input text, concatenated), - -=item [2] - -"" (i.e. the prefix substring -- trivial in this case), - -=item [3] - -"<<" (i.e. the "name" of the quotelike operator) - -=item [4] - -"'EOMSG'" (i.e. the left delimiter of the here document, including any quotes), - -=item [5] - -"This is the message.\n" (i.e. the text of the here document), - -=item [6] - -"EOMSG" (i.e. the right delimiter of the here document), - -=item [7..10] - -"" (a here document has no second left delimiter, second text, second right -delimiter, or trailing modifiers). - -=back - -However, the matching position of the input variable would be set to -"exit;" (i.e. I<after> the closing delimiter of the here document), -which would cause the earlier " || die;\nexit;" to be skipped in any -sequence of code fragment extractions. - -To avoid this problem, when it encounters a here document whilst -extracting from a modifiable string, C<extract_quotelike> silently -rearranges the string to an equivalent piece of Perl: - - <<'EOMSG' - This is the message. - EOMSG - || die; - exit; - -in which the here document I<is> contiguous. It still leaves the -matching position after the here document, but now the rest of the line -on which the here document starts is not skipped. - -To prevent <extract_quotelike> from mucking about with the input in this way -(this is the only case where a list-context C<extract_quotelike> does so), -you can pass the input variable as an interpolated literal: - - $quotelike = extract_quotelike("$var"); - - -=head2 C<extract_codeblock> - -C<extract_codeblock> attempts to recognize and extract a balanced -bracket delimited substring that may contain unbalanced brackets -inside Perl quotes or quotelike operations. That is, C<extract_codeblock> -is like a combination of C<"extract_bracketed"> and -C<"extract_quotelike">. - -C<extract_codeblock> takes the same initial three parameters as C<extract_bracketed>: -a text to process, a set of delimiter brackets to look for, and a prefix to -match first. It also takes an optional fourth parameter, which allows the -outermost delimiter brackets to be specified separately (see below). - -Omitting the first argument (input text) means process C<$_> instead. -Omitting the second argument (delimiter brackets) indicates that only C<'{'> is to be used. -Omitting the third argument (prefix argument) implies optional whitespace at the start. -Omitting the fourth argument (outermost delimiter brackets) indicates that the -value of the second argument is to be used for the outermost delimiters. - -Once the prefix an dthe outermost opening delimiter bracket have been -recognized, code blocks are extracted by stepping through the input text and -trying the following alternatives in sequence: - -=over 4 - -=item 1. - -Try and match a closing delimiter bracket. If the bracket was the same -species as the last opening bracket, return the substring to that -point. If the bracket was mismatched, return an error. - -=item 2. - -Try to match a quote or quotelike operator. If found, call -C<extract_quotelike> to eat it. If C<extract_quotelike> fails, return -the error it returned. Otherwise go back to step 1. - -=item 3. - -Try to match an opening delimiter bracket. If found, call -C<extract_codeblock> recursively to eat the embedded block. If the -recursive call fails, return an error. Otherwise, go back to step 1. - -=item 4. - -Unconditionally match a bareword or any other single character, and -then go back to step 1. - -=back - - -Examples: - - # Find a while loop in the text - - if ($text =~ s/.*?while\s*\{/{/) - { - $loop = "while " . extract_codeblock($text); - } - - # Remove the first round-bracketed list (which may include - # round- or curly-bracketed code blocks or quotelike operators) - - extract_codeblock $text, "(){}", '[^(]*'; - - -The ability to specify a different outermost delimiter bracket is useful -in some circumstances. For example, in the Parse::RecDescent module, -parser actions which are to be performed only on a successful parse -are specified using a C<E<lt>defer:...E<gt>> directive. For example: - - sentence: subject verb object - <defer: {$::theVerb = $item{verb}} > - -Parse::RecDescent uses C<extract_codeblock($text, '{}E<lt>E<gt>')> to extract the code -within the C<E<lt>defer:...E<gt>> directive, but there's a problem. - -A deferred action like this: - - <defer: {if ($count>10) {$count--}} > - -will be incorrectly parsed as: - - <defer: {if ($count> - -because the "less than" operator is interpreted as a closing delimiter. - -But, by extracting the directive using -S<C<extract_codeblock($text, '{}', undef, 'E<lt>E<gt>')>> -the '>' character is only treated as a delimited at the outermost -level of the code block, so the directive is parsed correctly. - -=head2 C<extract_multiple> - -The C<extract_multiple> subroutine takes a string to be processed and a -list of extractors (subroutines or regular expressions) to apply to that string. - -In an array context C<extract_multiple> returns an array of substrings -of the original string, as extracted by the specified extractors. -In a scalar context, C<extract_multiple> returns the first -substring successfully extracted from the original string. In both -scalar and void contexts the original string has the first successfully -extracted substring removed from it. In all contexts -C<extract_multiple> starts at the current C<pos> of the string, and -sets that C<pos> appropriately after it matches. - -Hence, the aim of of a call to C<extract_multiple> in a list context -is to split the processed string into as many non-overlapping fields as -possible, by repeatedly applying each of the specified extractors -to the remainder of the string. Thus C<extract_multiple> is -a generalized form of Perl's C<split> subroutine. - -The subroutine takes up to four optional arguments: - -=over 4 - -=item 1. - -A string to be processed (C<$_> if the string is omitted or C<undef>) - -=item 2. - -A reference to a list of subroutine references and/or qr// objects and/or -literal strings and/or hash references, specifying the extractors -to be used to split the string. If this argument is omitted (or -C<undef>) the list: - - [ - sub { extract_variable($_[0], '') }, - sub { extract_quotelike($_[0],'') }, - sub { extract_codeblock($_[0],'{}','') }, - ] - -is used. - - -=item 3. - -An number specifying the maximum number of fields to return. If this -argument is omitted (or C<undef>), split continues as long as possible. - -If the third argument is I<N>, then extraction continues until I<N> fields -have been successfully extracted, or until the string has been completely -processed. - -Note that in scalar and void contexts the value of this argument is -automatically reset to 1 (under C<-w>, a warning is issued if the argument -has to be reset). - -=item 4. - -A value indicating whether unmatched substrings (see below) within the -text should be skipped or returned as fields. If the value is true, -such substrings are skipped. Otherwise, they are returned. - -=back - -The extraction process works by applying each extractor in -sequence to the text string. - -If the extractor is a subroutine it is called in a list context and is -expected to return a list of a single element, namely the extracted -text. It may optionally also return two further arguments: a string -representing the text left after extraction (like $' for a pattern -match), and a string representing any prefix skipped before the -extraction (like $` in a pattern match). Note that this is designed -to facilitate the use of other Text::Balanced subroutines with -C<extract_multiple>. Note too that the value returned by an extractor -subroutine need not bear any relationship to the corresponding substring -of the original text (see examples below). - -If the extractor is a precompiled regular expression or a string, -it is matched against the text in a scalar context with a leading -'\G' and the gc modifiers enabled. The extracted value is either -$1 if that variable is defined after the match, or else the -complete match (i.e. $&). - -If the extractor is a hash reference, it must contain exactly one element. -The value of that element is one of the -above extractor types (subroutine reference, regular expression, or string). -The key of that element is the name of a class into which the successful -return value of the extractor will be blessed. - -If an extractor returns a defined value, that value is immediately -treated as the next extracted field and pushed onto the list of fields. -If the extractor was specified in a hash reference, the field is also -blessed into the appropriate class, - -If the extractor fails to match (in the case of a regex extractor), or returns an empty list or an undefined value (in the case of a subroutine extractor), it is -assumed to have failed to extract. -If none of the extractor subroutines succeeds, then one -character is extracted from the start of the text and the extraction -subroutines reapplied. Characters which are thus removed are accumulated and -eventually become the next field (unless the fourth argument is true, in which -case they are disgarded). - -For example, the following extracts substrings that are valid Perl variables: - - @fields = extract_multiple($text, - [ sub { extract_variable($_[0]) } ], - undef, 1); - -This example separates a text into fields which are quote delimited, -curly bracketed, and anything else. The delimited and bracketed -parts are also blessed to identify them (the "anything else" is unblessed): - - @fields = extract_multiple($text, - [ - { Delim => sub { extract_delimited($_[0],q{'"}) } }, - { Brack => sub { extract_bracketed($_[0],'{}') } }, - ]); - -This call extracts the next single substring that is a valid Perl quotelike -operator (and removes it from $text): - - $quotelike = extract_multiple($text, - [ - sub { extract_quotelike($_[0]) }, - ], undef, 1); - -Finally, here is yet another way to do comma-separated value parsing: - - @fields = extract_multiple($csv_text, - [ - sub { extract_delimited($_[0],q{'"}) }, - qr/([^,]+)(.*)/, - ], - undef,1); - -The list in the second argument means: -I<"Try and extract a ' or " delimited string, otherwise extract anything up to a comma...">. -The undef third argument means: -I<"...as many times as possible...">, -and the true value in the fourth argument means -I<"...discarding anything else that appears (i.e. the commas)">. - -If you wanted the commas preserved as separate fields (i.e. like split -does if your split pattern has capturing parentheses), you would -just make the last parameter undefined (or remove it). - - -=head2 C<gen_delimited_pat> - -The C<gen_delimited_pat> subroutine takes a single (string) argument and - > builds a Friedl-style optimized regex that matches a string delimited -by any one of the characters in the single argument. For example: - - gen_delimited_pat(q{'"}) - -returns the regex: - - (?:\"(?:\\\"|(?!\").)*\"|\'(?:\\\'|(?!\').)*\') - -Note that the specified delimiters are automatically quotemeta'd. - -A typical use of C<gen_delimited_pat> would be to build special purpose tags -for C<extract_tagged>. For example, to properly ignore "empty" XML elements -(which might contain quoted strings): - - my $empty_tag = '<(' . gen_delimited_pat(q{'"}) . '|.)+/>'; - - extract_tagged($text, undef, undef, undef, {ignore => [$empty_tag]} ); - - -C<gen_delimited_pat> may also be called with an optional second argument, -which specifies the "escape" character(s) to be used for each delimiter. -For example to match a Pascal-style string (where ' is the delimiter -and '' is a literal ' within the string): - - gen_delimited_pat(q{'},q{'}); - -Different escape characters can be specified for different delimiters. -For example, to specify that '/' is the escape for single quotes -and '%' is the escape for double quotes: - - gen_delimited_pat(q{'"},q{/%}); - -If more delimiters than escape chars are specified, the last escape char -is used for the remaining delimiters. -If no escape char is specified for a given specified delimiter, '\' is used. - -Note that -C<gen_delimited_pat> was previously called -C<delimited_pat>. That name may still be used, but is now deprecated. - - -=head1 DIAGNOSTICS - -In a list context, all the functions return C<(undef,$original_text)> -on failure. In a scalar context, failure is indicated by returning C<undef> -(in this case the input text is not modified in any way). - -In addition, on failure in I<any> context, the C<$@> variable is set. -Accessing C<$@-E<gt>{error}> returns one of the error diagnostics listed -below. -Accessing C<$@-E<gt>{pos}> returns the offset into the original string at -which the error was detected (although not necessarily where it occurred!) -Printing C<$@> directly produces the error message, with the offset appended. -On success, the C<$@> variable is guaranteed to be C<undef>. - -The available diagnostics are: - -=over 4 - -=item C<Did not find a suitable bracket: "%s"> - -The delimiter provided to C<extract_bracketed> was not one of -C<'()[]E<lt>E<gt>{}'>. - -=item C<Did not find prefix: /%s/> - -A non-optional prefix was specified but wasn't found at the start of the text. - -=item C<Did not find opening bracket after prefix: "%s"> - -C<extract_bracketed> or C<extract_codeblock> was expecting a -particular kind of bracket at the start of the text, and didn't find it. - -=item C<No quotelike operator found after prefix: "%s"> - -C<extract_quotelike> didn't find one of the quotelike operators C<q>, -C<qq>, C<qw>, C<qx>, C<s>, C<tr> or C<y> at the start of the substring -it was extracting. - -=item C<Unmatched closing bracket: "%c"> - -C<extract_bracketed>, C<extract_quotelike> or C<extract_codeblock> encountered -a closing bracket where none was expected. - -=item C<Unmatched opening bracket(s): "%s"> - -C<extract_bracketed>, C<extract_quotelike> or C<extract_codeblock> ran -out of characters in the text before closing one or more levels of nested -brackets. - -=item C<Unmatched embedded quote (%s)> - -C<extract_bracketed> attempted to match an embedded quoted substring, but -failed to find a closing quote to match it. - -=item C<Did not find closing delimiter to match '%s'> - -C<extract_quotelike> was unable to find a closing delimiter to match the -one that opened the quote-like operation. - -=item C<Mismatched closing bracket: expected "%c" but found "%s"> - -C<extract_bracketed>, C<extract_quotelike> or C<extract_codeblock> found -a valid bracket delimiter, but it was the wrong species. This usually -indicates a nesting error, but may indicate incorrect quoting or escaping. - -=item C<No block delimiter found after quotelike "%s"> - -C<extract_quotelike> or C<extract_codeblock> found one of the -quotelike operators C<q>, C<qq>, C<qw>, C<qx>, C<s>, C<tr> or C<y> -without a suitable block after it. - -=item C<Did not find leading dereferencer> - -C<extract_variable> was expecting one of '$', '@', or '%' at the start of -a variable, but didn't find any of them. - -=item C<Bad identifier after dereferencer> - -C<extract_variable> found a '$', '@', or '%' indicating a variable, but that -character was not followed by a legal Perl identifier. - -=item C<Did not find expected opening bracket at %s> - -C<extract_codeblock> failed to find any of the outermost opening brackets -that were specified. - -=item C<Improperly nested codeblock at %s> - -A nested code block was found that started with a delimiter that was specified -as being only to be used as an outermost bracket. - -=item C<Missing second block for quotelike "%s"> - -C<extract_codeblock> or C<extract_quotelike> found one of the -quotelike operators C<s>, C<tr> or C<y> followed by only one block. - -=item C<No match found for opening bracket> - -C<extract_codeblock> failed to find a closing bracket to match the outermost -opening bracket. - -=item C<Did not find opening tag: /%s/> - -C<extract_tagged> did not find a suitable opening tag (after any specified -prefix was removed). - -=item C<Unable to construct closing tag to match: /%s/> - -C<extract_tagged> matched the specified opening tag and tried to -modify the matched text to produce a matching closing tag (because -none was specified). It failed to generate the closing tag, almost -certainly because the opening tag did not start with a -bracket of some kind. - -=item C<Found invalid nested tag: %s> - -C<extract_tagged> found a nested tag that appeared in the "reject" list -(and the failure mode was not "MAX" or "PARA"). - -=item C<Found unbalanced nested tag: %s> - -C<extract_tagged> found a nested opening tag that was not matched by a -corresponding nested closing tag (and the failure mode was not "MAX" or "PARA"). - -=item C<Did not find closing tag> - -C<extract_tagged> reached the end of the text without finding a closing tag -to match the original opening tag (and the failure mode was not -"MAX" or "PARA"). - - - - -=back - - -=head1 AUTHOR - -Damian Conway (damian@conway.org) - - -=head1 BUGS AND IRRITATIONS - -There are undoubtedly serious bugs lurking somewhere in this code, if -only because parts of it give the impression of understanding a great deal -more about Perl than they really do. - -Bug reports and other feedback are most welcome. - - -=head1 COPYRIGHT - - Copyright (c) 1997-2001, Damian Conway. All Rights Reserved. - This module is free software. It may be used, redistributed - and/or modified under the same terms as Perl itself. |