summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Parse/RecDescent.pm3045
-rw-r--r--lib/Template/Plugin/HTML.pm197
-rw-r--r--lib/Template/Plugin/JavaScript.pm73
-rw-r--r--lib/Text/Balanced.pm2301
4 files changed, 270 insertions, 5346 deletions
diff --git a/lib/Parse/RecDescent.pm b/lib/Parse/RecDescent.pm
deleted file mode 100644
index 35b9e9d..0000000
--- a/lib/Parse/RecDescent.pm
+++ /dev/null
@@ -1,3045 +0,0 @@
-# GENERATE RECURSIVE DESCENT PARSER OBJECTS FROM A GRAMMARC
-# SEE RecDescent.pod FOR FULL DETAILS
-
-use 5.005;
-use strict;
-
-package Parse::RecDescent;
-
-use Text::Balanced qw ( extract_codeblock extract_bracketed extract_quotelike extract_delimited );
-
-use vars qw ( $skip );
-
- *defskip = \ '\s*'; # DEFAULT SEPARATOR IS OPTIONAL WHITESPACE
- $skip = '\s*'; # UNIVERSAL SEPARATOR IS OPTIONAL WHITESPACE
-my $MAXREP = 100_000_000; # REPETITIONS MATCH AT MOST 100,000,000 TIMES
-
-
-sub import # IMPLEMENT PRECOMPILER BEHAVIOUR UNDER:
- # perl -MParse::RecDescent - <grammarfile> <classname>
-{
- local *_die = sub { print @_, "\n"; exit };
-
- my ($package, $file, $line) = caller;
- if (substr($file,0,1) eq '-' && $line == 0)
- {
- _die("Usage: perl -MLocalTest - <grammarfile> <classname>")
- unless @ARGV == 2;
-
- my ($sourcefile, $class) = @ARGV;
-
- local *IN;
- open IN, $sourcefile
- or _die("Can't open grammar file '$sourcefile'");
-
- my $grammar = join '', <IN>;
-
- Parse::RecDescent->Precompile($grammar, $class, $sourcefile);
- exit;
- }
-}
-
-sub Save
-{
- my ($self, $class) = @_;
- $self->{saving} = 1;
- $self->Precompile(undef,$class);
- $self->{saving} = 0;
-}
-
-sub Precompile
-{
- my ($self, $grammar, $class, $sourcefile) = @_;
-
- $class =~ /^(\w+::)*\w+$/ or croak("Bad class name: $class");
-
- my $modulefile = $class;
- $modulefile =~ s/.*:://;
- $modulefile .= ".pm";
-
- open OUT, ">$modulefile"
- or croak("Can't write to new module file '$modulefile'");
-
- print STDERR "precompiling grammar from file '$sourcefile'\n",
- "to class $class in module file '$modulefile'\n"
- if $grammar && $sourcefile;
-
- # local $::RD_HINT = 1;
- $self = Parse::RecDescent->new($grammar,1,$class)
- || croak("Can't compile bad grammar")
- if $grammar;
-
- foreach ( keys %{$self->{rules}} )
- { $self->{rules}{$_}{changed} = 1 }
-
- print OUT "package $class;\nuse Parse::RecDescent;\n\n";
-
- print OUT "{ my \$ERRORS;\n\n";
-
- print OUT $self->_code();
-
- print OUT "}\npackage $class; sub new { ";
- print OUT "my ";
-
- require Data::Dumper;
- print OUT Data::Dumper->Dump([$self], [qw(self)]);
-
- print OUT "}";
-
- close OUT
- or croak("Can't write to new module file '$modulefile'");
-}
-
-
-package Parse::RecDescent::LineCounter;
-
-
-sub TIESCALAR # ($classname, \$text, $thisparser, $prevflag)
-{
- bless {
- text => $_[1],
- parser => $_[2],
- prev => $_[3]?1:0,
- }, $_[0];
-}
-
-my %counter_cache;
-
-sub FETCH
-{
- my $parser = $_[0]->{parser};
- my $from = $parser->{fulltextlen}-length(${$_[0]->{text}})-$_[0]->{prev}
-;
-
- unless (exists $counter_cache{$from}) {
- $parser->{lastlinenum} = $parser->{offsetlinenum}
- - Parse::RecDescent::_linecount(substr($parser->{fulltext},$from))
- + 1;
- $counter_cache{$from} = $parser->{lastlinenum};
- }
- return $counter_cache{$from};
-}
-
-sub STORE
-{
- my $parser = $_[0]->{parser};
- $parser->{offsetlinenum} -= $parser->{lastlinenum} - $_[1];
- return undef;
-}
-
-sub resync # ($linecounter)
-{
- my $self = tied($_[0]);
- die "Tried to alter something other than a LineCounter\n"
- unless $self =~ /Parse::RecDescent::LineCounter/;
-
- my $parser = $self->{parser};
- my $apparently = $parser->{offsetlinenum}
- - Parse::RecDescent::_linecount(${$self->{text}})
- + 1;
-
- $parser->{offsetlinenum} += $parser->{lastlinenum} - $apparently;
- return 1;
-}
-
-package Parse::RecDescent::ColCounter;
-
-sub TIESCALAR # ($classname, \$text, $thisparser, $prevflag)
-{
- bless {
- text => $_[1],
- parser => $_[2],
- prev => $_[3]?1:0,
- }, $_[0];
-}
-
-sub FETCH
-{
- my $parser = $_[0]->{parser};
- my $missing = $parser->{fulltextlen}-length(${$_[0]->{text}})-$_[0]->{prev}+1;
- substr($parser->{fulltext},0,$missing) =~ m/^(.*)\Z/m;
- return length($1);
-}
-
-sub STORE
-{
- die "Can't set column number via \$thiscolumn\n";
-}
-
-
-package Parse::RecDescent::OffsetCounter;
-
-sub TIESCALAR # ($classname, \$text, $thisparser, $prev)
-{
- bless {
- text => $_[1],
- parser => $_[2],
- prev => $_[3]?-1:0,
- }, $_[0];
-}
-
-sub FETCH
-{
- my $parser = $_[0]->{parser};
- return $parser->{fulltextlen}-length(${$_[0]->{text}})+$_[0]->{prev};
-}
-
-sub STORE
-{
- die "Can't set current offset via \$thisoffset or \$prevoffset\n";
-}
-
-
-
-package Parse::RecDescent::Rule;
-
-sub new ($$$$$)
-{
- my $class = ref($_[0]) || $_[0];
- my $name = $_[1];
- my $owner = $_[2];
- my $line = $_[3];
- my $replace = $_[4];
-
- if (defined $owner->{"rules"}{$name})
- {
- my $self = $owner->{"rules"}{$name};
- if ($replace && !$self->{"changed"})
- {
- $self->reset;
- }
- return $self;
- }
- else
- {
- return $owner->{"rules"}{$name} =
- bless
- {
- "name" => $name,
- "prods" => [],
- "calls" => [],
- "changed" => 0,
- "line" => $line,
- "impcount" => 0,
- "opcount" => 0,
- "vars" => "",
- }, $class;
- }
-}
-
-sub reset($)
-{
- @{$_[0]->{"prods"}} = ();
- @{$_[0]->{"calls"}} = ();
- $_[0]->{"changed"} = 0;
- $_[0]->{"impcount"} = 0;
- $_[0]->{"opcount"} = 0;
- $_[0]->{"vars"} = "";
-}
-
-sub DESTROY {}
-
-sub hasleftmost($$)
-{
- my ($self, $ref) = @_;
-
- my $prod;
- foreach $prod ( @{$self->{"prods"}} )
- {
- return 1 if $prod->hasleftmost($ref);
- }
-
- return 0;
-}
-
-sub leftmostsubrules($)
-{
- my $self = shift;
- my @subrules = ();
-
- my $prod;
- foreach $prod ( @{$self->{"prods"}} )
- {
- push @subrules, $prod->leftmostsubrule();
- }
-
- return @subrules;
-}
-
-sub expected($)
-{
- my $self = shift;
- my @expected = ();
-
- my $prod;
- foreach $prod ( @{$self->{"prods"}} )
- {
- my $next = $prod->expected();
- unless (! $next or _contains($next,@expected) )
- {
- push @expected, $next;
- }
- }
-
- return join ', or ', @expected;
-}
-
-sub _contains($@)
-{
- my $target = shift;
- my $item;
- foreach $item ( @_ ) { return 1 if $target eq $item; }
- return 0;
-}
-
-sub addcall($$)
-{
- my ( $self, $subrule ) = @_;
- unless ( _contains($subrule, @{$self->{"calls"}}) )
- {
- push @{$self->{"calls"}}, $subrule;
- }
-}
-
-sub addprod($$)
-{
- my ( $self, $prod ) = @_;
- push @{$self->{"prods"}}, $prod;
- $self->{"changed"} = 1;
- $self->{"impcount"} = 0;
- $self->{"opcount"} = 0;
- $prod->{"number"} = $#{$self->{"prods"}};
- return $prod;
-}
-
-sub addvar
-{
- my ( $self, $var, $parser ) = @_;
- if ($var =~ /\A\s*local\s+([%@\$]\w+)/)
- {
- $parser->{localvars} .= " $1";
- $self->{"vars"} .= "$var;\n" }
- else
- { $self->{"vars"} .= "my $var;\n" }
- $self->{"changed"} = 1;
- return 1;
-}
-
-sub addautoscore
-{
- my ( $self, $code ) = @_;
- $self->{"autoscore"} = $code;
- $self->{"changed"} = 1;
- return 1;
-}
-
-sub nextoperator($)
-{
- my $self = shift;
- my $prodcount = scalar @{$self->{"prods"}};
- my $opcount = ++$self->{"opcount"};
- return "_operator_${opcount}_of_production_${prodcount}_of_rule_$self->{name}";
-}
-
-sub nextimplicit($)
-{
- my $self = shift;
- my $prodcount = scalar @{$self->{"prods"}};
- my $impcount = ++$self->{"impcount"};
- return "_alternation_${impcount}_of_production_${prodcount}_of_rule_$self->{name}";
-}
-
-
-sub code
-{
- my ($self, $namespace, $parser) = @_;
-
-eval 'undef &' . $namespace . '::' . $self->{"name"} unless $parser->{saving};
-
- my $code =
-'
-# ARGS ARE: ($parser, $text; $repeating, $_noactions, \@args)
-sub ' . $namespace . '::' . $self->{"name"} . '
-{
- my $thisparser = $_[0];
- use vars q{$tracelevel};
- local $tracelevel = ($tracelevel||0)+1;
- $ERRORS = 0;
- my $thisrule = $thisparser->{"rules"}{"' . $self->{"name"} . '"};
-
- Parse::RecDescent::_trace(q{Trying rule: [' . $self->{"name"} . ']},
- Parse::RecDescent::_tracefirst($_[1]),
- q{' . $self->{"name"} . '},
- $tracelevel)
- if defined $::RD_TRACE;
-
- ' . ($parser->{deferrable}
- ? 'my $def_at = @{$thisparser->{deferred}};'
- : '') .
- '
- my $err_at = @{$thisparser->{errors}};
-
- my $score;
- my $score_return;
- my $_tok;
- my $return = undef;
- my $_matched=0;
- my $commit=0;
- my @item = ();
- my %item = ();
- my $repeating = defined($_[2]) && $_[2];
- my $_noactions = defined($_[3]) && $_[3];
- my @arg = defined $_[4] ? @{ &{$_[4]} } : ();
- my %arg = ($#arg & 01) ? @arg : (@arg, undef);
- my $text;
- my $lastsep="";
- my $expectation = new Parse::RecDescent::Expectation($thisrule->expected());
- $expectation->at($_[1]);
- '. ($parser->{_check}{thisoffset}?'
- my $thisoffset;
- tie $thisoffset, q{Parse::RecDescent::OffsetCounter}, \$text, $thisparser;
- ':'') . ($parser->{_check}{prevoffset}?'
- my $prevoffset;
- tie $prevoffset, q{Parse::RecDescent::OffsetCounter}, \$text, $thisparser, 1;
- ':'') . ($parser->{_check}{thiscolumn}?'
- my $thiscolumn;
- tie $thiscolumn, q{Parse::RecDescent::ColCounter}, \$text, $thisparser;
- ':'') . ($parser->{_check}{prevcolumn}?'
- my $prevcolumn;
- tie $prevcolumn, q{Parse::RecDescent::ColCounter}, \$text, $thisparser, 1;
- ':'') . ($parser->{_check}{prevline}?'
- my $prevline;
- tie $prevline, q{Parse::RecDescent::LineCounter}, \$text, $thisparser, 1;
- ':'') . '
- my $thisline;
- tie $thisline, q{Parse::RecDescent::LineCounter}, \$text, $thisparser;
-
- '. $self->{vars} .'
-';
-
- my $prod;
- foreach $prod ( @{$self->{"prods"}} )
- {
- $prod->addscore($self->{autoscore},0,0) if $self->{autoscore};
- next unless $prod->checkleftmost();
- $code .= $prod->code($namespace,$self,$parser);
-
- $code .= $parser->{deferrable}
- ? ' splice
- @{$thisparser->{deferred}}, $def_at unless $_matched;
- '
- : '';
- }
-
- $code .=
-'
- unless ( $_matched || defined($return) || defined($score) )
- {
- ' .($parser->{deferrable}
- ? ' splice @{$thisparser->{deferred}}, $def_at;
- '
- : '') . '
-
- $_[1] = $text; # NOT SURE THIS IS NEEDED
- Parse::RecDescent::_trace(q{<<Didn\'t match rule>>},
- Parse::RecDescent::_tracefirst($_[1]),
- q{' . $self->{"name"} .'},
- $tracelevel)
- if defined $::RD_TRACE;
- return undef;
- }
- if (!defined($return) && defined($score))
- {
- Parse::RecDescent::_trace(q{>>Accepted scored production<<}, "",
- q{' . $self->{"name"} .'},
- $tracelevel)
- if defined $::RD_TRACE;
- $return = $score_return;
- }
- splice @{$thisparser->{errors}}, $err_at;
- $return = $item[$#item] unless defined $return;
- if (defined $::RD_TRACE)
- {
- Parse::RecDescent::_trace(q{>>Matched rule<< (return value: [} .
- $return . q{])}, "",
- q{' . $self->{"name"} .'},
- $tracelevel);
- Parse::RecDescent::_trace(q{(consumed: [} .
- Parse::RecDescent::_tracemax(substr($_[1],0,-length($text))) . q{])},
- Parse::RecDescent::_tracefirst($text),
- , q{' . $self->{"name"} .'},
- $tracelevel)
- }
- $_[1] = $text;
- return $return;
-}
-';
-
- return $code;
-}
-
-my @left;
-sub isleftrec($$)
-{
- my ($self, $rules) = @_;
- my $root = $self->{"name"};
- @left = $self->leftmostsubrules();
- my $next;
- foreach $next ( @left )
- {
- next unless defined $rules->{$next}; # SKIP NON-EXISTENT RULES
- return 1 if $next eq $root;
- my $child;
- foreach $child ( $rules->{$next}->leftmostsubrules() )
- {
- push(@left, $child)
- if ! _contains($child, @left) ;
- }
- }
- return 0;
-}
-
-package Parse::RecDescent::Production;
-
-sub describe ($;$)
-{
- return join ' ', map { $_->describe($_[1]) or () } @{$_[0]->{items}};
-}
-
-sub new ($$;$$)
-{
- my ($self, $line, $uncommit, $error) = @_;
- my $class = ref($self) || $self;
-
- bless
- {
- "items" => [],
- "uncommit" => $uncommit,
- "error" => $error,
- "line" => $line,
- strcount => 0,
- patcount => 0,
- dircount => 0,
- actcount => 0,
- }, $class;
-}
-
-sub expected ($)
-{
- my $itemcount = scalar @{$_[0]->{"items"}};
- return ($itemcount) ? $_[0]->{"items"}[0]->describe(1) : '';
-}
-
-sub hasleftmost ($$)
-{
- my ($self, $ref) = @_;
- return ${$self->{"items"}}[0] eq $ref if scalar @{$self->{"items"}};
- return 0;
-}
-
-sub leftmostsubrule($)
-{
- my $self = shift;
-
- if ( $#{$self->{"items"}} >= 0 )
- {
- my $subrule = $self->{"items"}[0]->issubrule();
- return $subrule if defined $subrule;
- }
-
- return ();
-}
-
-sub checkleftmost($)
-{
- my @items = @{$_[0]->{"items"}};
- if (@items==1 && ref($items[0]) =~ /\AParse::RecDescent::Error/
- && $items[0]->{commitonly} )
- {
- Parse::RecDescent::_warn(2,"Lone <error?> in production treated
- as <error?> <reject>");
- Parse::RecDescent::_hint("A production consisting of a single
- conditional <error?> directive would
- normally succeed (with the value zero) if the
- rule is not 'commited' when it is
- tried. Since you almost certainly wanted
- '<error?> <reject>' Parse::RecDescent
- supplied it for you.");
- push @{$_[0]->{items}},
- Parse::RecDescent::UncondReject->new(0,0,'<reject>');
- }
- elsif (@items==1 && ($items[0]->describe||"") =~ /<rulevar|<autoscore/)
- {
- # Do nothing
- }
- elsif (@items &&
- ( ref($items[0]) =~ /\AParse::RecDescent::UncondReject/
- || ($items[0]->describe||"") =~ /<autoscore/
- ))
- {
- Parse::RecDescent::_warn(1,"Optimizing away production: [". $_[0]->describe ."]");
- my $what = $items[0]->describe =~ /<rulevar/
- ? "a <rulevar> (which acts like an unconditional <reject> during parsing)"
- : $items[0]->describe =~ /<autoscore/
- ? "an <autoscore> (which acts like an unconditional <reject> during parsing)"
- : "an unconditional <reject>";
- my $caveat = $items[0]->describe =~ /<rulevar/
- ? " after the specified variable was set up"
- : "";
- my $advice = @items > 1
- ? "However, there were also other (useless) items after the leading "
- . $items[0]->describe
- . ", so you may have been expecting some other behaviour."
- : "You can safely ignore this message.";
- Parse::RecDescent::_hint("The production starts with $what. That means that the
- production can never successfully match, so it was
- optimized out of the final parser$caveat. $advice");
- return 0;
- }
- return 1;
-}
-
-sub changesskip($)
-{
- my $item;
- foreach $item (@{$_[0]->{"items"}})
- {
- if (ref($item) =~ /Parse::RecDescent::(Action|Directive)/)
- {
- return 1 if $item->{code} =~ /\$skip/;
- }
- }
- return 0;
-}
-
-sub adddirective
-{
- my ( $self, $whichop, $line, $name ) = @_;
- push @{$self->{op}},
- { type=>$whichop, line=>$line, name=>$name,
- offset=> scalar(@{$self->{items}}) };
-}
-
-sub addscore
-{
- my ( $self, $code, $lookahead, $line ) = @_;
- $self->additem(Parse::RecDescent::Directive->new(
- "local \$^W;
- my \$thisscore = do { $code } + 0;
- if (!defined(\$score) || \$thisscore>\$score)
- { \$score=\$thisscore; \$score_return=\$item[-1]; }
- undef;", $lookahead, $line,"<score: $code>") )
- unless $self->{items}[-1]->describe =~ /<score/;
- return 1;
-}
-
-sub check_pending
-{
- my ( $self, $line ) = @_;
- if ($self->{op})
- {
- while (my $next = pop @{$self->{op}})
- {
- Parse::RecDescent::_error("Incomplete <$next->{type}op:...>.", $line);
- Parse::RecDescent::_hint(
- "The current production ended without completing the
- <$next->{type}op:...> directive that started near line
- $next->{line}. Did you forget the closing '>'?");
- }
- }
- return 1;
-}
-
-sub enddirective
-{
- my ( $self, $line, $minrep, $maxrep ) = @_;
- unless ($self->{op})
- {
- Parse::RecDescent::_error("Unmatched > found.", $line);
- Parse::RecDescent::_hint(
- "A '>' angle bracket was encountered, which typically
- indicates the end of a directive. However no suitable
- preceding directive was encountered. Typically this
- indicates either a extra '>' in the grammar, or a
- problem inside the previous directive.");
- return;
- }
- my $op = pop @{$self->{op}};
- my $span = @{$self->{items}} - $op->{offset};
- if ($op->{type} =~ /left|right/)
- {
- if ($span != 3)
- {
- Parse::RecDescent::_error(
- "Incorrect <$op->{type}op:...> specification:
- expected 3 args, but found $span instead", $line);
- Parse::RecDescent::_hint(
- "The <$op->{type}op:...> directive requires a
- sequence of exactly three elements. For example:
- <$op->{type}op:leftarg /op/ rightarg>");
- }
- else
- {
- push @{$self->{items}},
- Parse::RecDescent::Operator->new(
- $op->{type}, $minrep, $maxrep, splice(@{$self->{"items"}}, -3));
- $self->{items}[-1]->sethashname($self);
- $self->{items}[-1]{name} = $op->{name};
- }
- }
-}
-
-sub prevwasreturn
-{
- my ( $self, $line ) = @_;
- unless (@{$self->{items}})
- {
- Parse::RecDescent::_error(
- "Incorrect <return:...> specification:
- expected item missing", $line);
- Parse::RecDescent::_hint(
- "The <return:...> directive requires a
- sequence of at least one item. For example:
- <return: list>");
- return;
- }
- push @{$self->{items}},
- Parse::RecDescent::Result->new();
-}
-
-sub additem
-{
- my ( $self, $item ) = @_;
- $item->sethashname($self);
- push @{$self->{"items"}}, $item;
- return $item;
-}
-
-
-sub preitempos
-{
- return q
- {
- push @itempos, {'offset' => {'from'=>$thisoffset, 'to'=>undef},
- 'line' => {'from'=>$thisline, 'to'=>undef},
- 'column' => {'from'=>$thiscolumn, 'to'=>undef} };
- }
-}
-
-sub incitempos
-{
- return q
- {
- $itempos[$#itempos]{'offset'}{'from'} += length($1);
- $itempos[$#itempos]{'line'}{'from'} = $thisline;
- $itempos[$#itempos]{'column'}{'from'} = $thiscolumn;
- }
-}
-
-sub postitempos
-{
- return q
- {
- $itempos[$#itempos]{'offset'}{'to'} = $prevoffset;
- $itempos[$#itempos]{'line'}{'to'} = $prevline;
- $itempos[$#itempos]{'column'}{'to'} = $prevcolumn;
- }
-}
-
-sub code($$$$)
-{
- my ($self,$namespace,$rule,$parser) = @_;
- my $code =
-'
- while (!$_matched'
- . (defined $self->{"uncommit"} ? '' : ' && !$commit')
- . ')
- {
- ' .
- ($self->changesskip()
- ? 'local $skip = defined($skip) ? $skip : $Parse::RecDescent::skip;'
- : '') .'
- Parse::RecDescent::_trace(q{Trying production: ['
- . $self->describe . ']},
- Parse::RecDescent::_tracefirst($_[1]),
- q{' . $rule ->{name}. '},
- $tracelevel)
- if defined $::RD_TRACE;
- my $thisprod = $thisrule->{"prods"}[' . $self->{"number"} . '];
- ' . (defined $self->{"error"} ? '' : '$text = $_[1];' ) . '
- my $_savetext;
- @item = (q{' . $rule->{"name"} . '});
- %item = (__RULE__ => q{' . $rule->{"name"} . '});
- my $repcount = 0;
-
-';
- $code .=
-' my @itempos = ({});
-' if $parser->{_check}{itempos};
-
- my $item;
- my $i;
-
- for ($i = 0; $i < @{$self->{"items"}}; $i++)
- {
- $item = ${$self->{items}}[$i];
-
- $code .= preitempos() if $parser->{_check}{itempos};
-
- $code .= $item->code($namespace,$rule,$parser->{_check});
-
- $code .= postitempos() if $parser->{_check}{itempos};
-
- }
-
- if ($parser->{_AUTOACTION} && defined($item) && !$item->isa("Parse::RecDescent::Action"))
- {
- $code .= $parser->{_AUTOACTION}->code($namespace,$rule);
- Parse::RecDescent::_warn(1,"Autogenerating action in rule
- \"$rule->{name}\":
- $parser->{_AUTOACTION}{code}")
- and
- Parse::RecDescent::_hint("The \$::RD_AUTOACTION was defined,
- so any production not ending in an
- explicit action has the specified
- \"auto-action\" automatically
- appended.");
- }
- elsif ($parser->{_AUTOTREE} && defined($item) && !$item->isa("Parse::RecDescent::Action"))
- {
- if ($i==1 && $item->isterminal)
- {
- $code .= $parser->{_AUTOTREE}{TERMINAL}->code($namespace,$rule);
- }
- else
- {
- $code .= $parser->{_AUTOTREE}{NODE}->code($namespace,$rule);
- }
- Parse::RecDescent::_warn(1,"Autogenerating tree-building action in rule
- \"$rule->{name}\"")
- and
- Parse::RecDescent::_hint("The directive <autotree> was specified,
- so any production not ending
- in an explicit action has
- some parse-tree building code
- automatically appended.");
- }
-
- $code .=
-'
-
- Parse::RecDescent::_trace(q{>>Matched production: ['
- . $self->describe . ']<<},
- Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{name} . '},
- $tracelevel)
- if defined $::RD_TRACE;
- $_matched = 1;
- last;
- }
-
-';
- return $code;
-}
-
-1;
-
-package Parse::RecDescent::Action;
-
-sub describe { undef }
-
-sub sethashname { $_[0]->{hashname} = '__ACTION' . ++$_[1]->{actcount} .'__'; }
-
-sub new
-{
- my $class = ref($_[0]) || $_[0];
- bless
- {
- "code" => $_[1],
- "lookahead" => $_[2],
- "line" => $_[3],
- }, $class;
-}
-
-sub issubrule { undef }
-sub isterminal { 0 }
-
-sub code($$$$)
-{
- my ($self, $namespace, $rule) = @_;
-
-'
- Parse::RecDescent::_trace(q{Trying action},
- Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{name} . '},
- $tracelevel)
- if defined $::RD_TRACE;
- ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .'
-
- $_tok = ($_noactions) ? 0 : do ' . $self->{"code"} . ';
- ' . ($self->{"lookahead"}<0?'if':'unless') . ' (defined $_tok)
- {
- Parse::RecDescent::_trace(q{<<Didn\'t match action>> (return value: [undef])})
- if defined $::RD_TRACE;
- last;
- }
- Parse::RecDescent::_trace(q{>>Matched action<< (return value: [}
- . $_tok . q{])},
- Parse::RecDescent::_tracefirst($text))
- if defined $::RD_TRACE;
- push @item, $_tok;
- ' . ($self->{line}>=0 ? '$item{'. $self->{hashname} .'}=$_tok;' : '' ) .'
- ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .'
-'
-}
-
-
-1;
-
-package Parse::RecDescent::Directive;
-
-sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; }
-
-sub issubrule { undef }
-sub isterminal { 0 }
-sub describe { $_[1] ? '' : $_[0]->{name} }
-
-sub new ($$$$$)
-{
- my $class = ref($_[0]) || $_[0];
- bless
- {
- "code" => $_[1],
- "lookahead" => $_[2],
- "line" => $_[3],
- "name" => $_[4],
- }, $class;
-}
-
-sub code($$$$)
-{
- my ($self, $namespace, $rule) = @_;
-
-'
- ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .'
-
- Parse::RecDescent::_trace(q{Trying directive: ['
- . $self->describe . ']},
- Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{name} . '},
- $tracelevel)
- if defined $::RD_TRACE; ' .'
- $_tok = do { ' . $self->{"code"} . ' };
- if (defined($_tok))
- {
- Parse::RecDescent::_trace(q{>>Matched directive<< (return value: [}
- . $_tok . q{])},
- Parse::RecDescent::_tracefirst($text))
- if defined $::RD_TRACE;
- }
- else
- {
- Parse::RecDescent::_trace(q{<<Didn\'t match directive>>},
- Parse::RecDescent::_tracefirst($text))
- if defined $::RD_TRACE;
- }
- ' . ($self->{"lookahead"} ? '$text = $_savetext and ' : '' ) .'
- last '
- . ($self->{"lookahead"}<0?'if':'unless') . ' defined $_tok;
- push @item, $item{'.$self->{hashname}.'}=$_tok;
- ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .'
-'
-}
-
-1;
-
-package Parse::RecDescent::UncondReject;
-
-sub issubrule { undef }
-sub isterminal { 0 }
-sub describe { $_[1] ? '' : $_[0]->{name} }
-sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; }
-
-sub new ($$$;$)
-{
- my $class = ref($_[0]) || $_[0];
- bless
- {
- "lookahead" => $_[1],
- "line" => $_[2],
- "name" => $_[3],
- }, $class;
-}
-
-# MARK, YOU MAY WANT TO OPTIMIZE THIS.
-
-
-sub code($$$$)
-{
- my ($self, $namespace, $rule) = @_;
-
-'
- Parse::RecDescent::_trace(q{>>Rejecting production<< (found '
- . $self->describe . ')},
- Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{name} . '},
- $tracelevel)
- if defined $::RD_TRACE;
- undef $return;
- ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .'
-
- $_tok = undef;
- ' . ($self->{"lookahead"} ? '$text = $_savetext and ' : '' ) .'
- last '
- . ($self->{"lookahead"}<0?'if':'unless') . ' defined $_tok;
-'
-}
-
-1;
-
-package Parse::RecDescent::Error;
-
-sub issubrule { undef }
-sub isterminal { 0 }
-sub describe { $_[1] ? '' : $_[0]->{commitonly} ? '<error?:...>' : '<error...>' }
-sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; }
-
-sub new ($$$$$)
-{
- my $class = ref($_[0]) || $_[0];
- bless
- {
- "msg" => $_[1],
- "lookahead" => $_[2],
- "commitonly" => $_[3],
- "line" => $_[4],
- }, $class;
-}
-
-sub code($$$$)
-{
- my ($self, $namespace, $rule) = @_;
-
- my $action = '';
-
- if ($self->{"msg"}) # ERROR MESSAGE SUPPLIED
- {
- #WAS: $action .= "Parse::RecDescent::_error(qq{$self->{msg}}" . ',$thisline);';
- $action .= 'push @{$thisparser->{errors}}, [qq{'.$self->{msg}.'},$thisline];';
-
- }
- else # GENERATE ERROR MESSAGE DURING PARSE
- {
- $action .= '
- my $rule = $item[0];
- $rule =~ s/_/ /g;
- #WAS: Parse::RecDescent::_error("Invalid $rule: " . $expectation->message() ,$thisline);
- push @{$thisparser->{errors}}, ["Invalid $rule: " . $expectation->message() ,$thisline];
- ';
- }
-
- my $dir =
- new Parse::RecDescent::Directive('if (' .
- ($self->{"commitonly"} ? '$commit' : '1') .
- ") { do {$action} unless ".' $_noactions; undef } else {0}',
- $self->{"lookahead"},0,$self->describe);
- $dir->{hashname} = $self->{hashname};
- return $dir->code($namespace, $rule, 0);
-}
-
-1;
-
-package Parse::RecDescent::Token;
-
-sub sethashname { $_[0]->{hashname} = '__PATTERN' . ++$_[1]->{patcount} . '__'; }
-
-sub issubrule { undef }
-sub isterminal { 1 }
-sub describe ($) { shift->{'description'}}
-
-
-# ARGS ARE: $self, $pattern, $left_delim, $modifiers, $lookahead, $linenum
-sub new ($$$$$$)
-{
- my $class = ref($_[0]) || $_[0];
- my $pattern = $_[1];
- my $pat = $_[1];
- my $ldel = $_[2];
- my $rdel = $ldel;
- $rdel =~ tr/{[(</}])>/;
-
- my $mod = $_[3];
-
- my $desc;
-
- if ($ldel eq '/') { $desc = "$ldel$pattern$rdel$mod" }
- else { $desc = "m$ldel$pattern$rdel$mod" }
- $desc =~ s/\\/\\\\/g;
- $desc =~ s/\$$/\\\$/g;
- $desc =~ s/}/\\}/g;
- $desc =~ s/{/\\{/g;
-
- if (!eval "no strict;
- local \$SIG{__WARN__} = sub {0};
- '' =~ m$ldel$pattern$rdel" and $@)
- {
- Parse::RecDescent::_warn(3, "Token pattern \"m$ldel$pattern$rdel\"
- may not be a valid regular expression",
- $_[5]);
- $@ =~ s/ at \(eval.*/./;
- Parse::RecDescent::_hint($@);
- }
-
- # QUIETLY PREVENT (WELL-INTENTIONED) CALAMITY
- $mod =~ s/[gc]//g;
- $pattern =~ s/(\A|[^\\])\\G/$1/g;
-
- bless
- {
- "pattern" => $pattern,
- "ldelim" => $ldel,
- "rdelim" => $rdel,
- "mod" => $mod,
- "lookahead" => $_[4],
- "line" => $_[5],
- "description" => $desc,
- }, $class;
-}
-
-
-sub code($$$$)
-{
- my ($self, $namespace, $rule, $check) = @_;
- my $ldel = $self->{"ldelim"};
- my $rdel = $self->{"rdelim"};
- my $sdel = $ldel;
- my $mod = $self->{"mod"};
-
- $sdel =~ s/[[{(<]/{}/;
-
-my $code = '
- Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe
- . ']}, Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{name} . '},
- $tracelevel)
- if defined $::RD_TRACE;
- $lastsep = "";
- $expectation->is(q{' . ($rule->hasleftmost($self) ? ''
- : $self->describe ) . '})->at($text);
- ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . '
-
- ' . ($self->{"lookahead"}<0?'if':'unless')
- . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and '
- . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '')
- . ' $text =~ s' . $ldel . '\A(?:' . $self->{"pattern"} . ')'
- . $rdel . $sdel . $mod . ')
- {
- '.($self->{"lookahead"} ? '$text = $_savetext;' : '').'
- $expectation->failed();
- Parse::RecDescent::_trace(q{<<Didn\'t match terminal>>},
- Parse::RecDescent::_tracefirst($text))
- if defined $::RD_TRACE;
-
- last;
- }
- Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [}
- . $& . q{])},
- Parse::RecDescent::_tracefirst($text))
- if defined $::RD_TRACE;
- push @item, $item{'.$self->{hashname}.'}=$&;
- ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .'
-';
-
- return $code;
-}
-
-1;
-
-package Parse::RecDescent::Literal;
-
-sub sethashname { $_[0]->{hashname} = '__STRING' . ++$_[1]->{strcount} . '__'; }
-
-sub issubrule { undef }
-sub isterminal { 1 }
-sub describe ($) { shift->{'description'} }
-
-sub new ($$$$)
-{
- my $class = ref($_[0]) || $_[0];
-
- my $pattern = $_[1];
-
- my $desc = $pattern;
- $desc=~s/\\/\\\\/g;
- $desc=~s/}/\\}/g;
- $desc=~s/{/\\{/g;
-
- bless
- {
- "pattern" => $pattern,
- "lookahead" => $_[2],
- "line" => $_[3],
- "description" => "'$desc'",
- }, $class;
-}
-
-
-sub code($$$$)
-{
- my ($self, $namespace, $rule, $check) = @_;
-
-my $code = '
- Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe
- . ']},
- Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{name} . '},
- $tracelevel)
- if defined $::RD_TRACE;
- $lastsep = "";
- $expectation->is(q{' . ($rule->hasleftmost($self) ? ''
- : $self->describe ) . '})->at($text);
- ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . '
-
- ' . ($self->{"lookahead"}<0?'if':'unless')
- . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and '
- . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '')
- . ' $text =~ s/\A' . quotemeta($self->{"pattern"}) . '//)
- {
- '.($self->{"lookahead"} ? '$text = $_savetext;' : '').'
- $expectation->failed();
- Parse::RecDescent::_trace(qq{<<Didn\'t match terminal>>},
- Parse::RecDescent::_tracefirst($text))
- if defined $::RD_TRACE;
- last;
- }
- Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [}
- . $& . q{])},
- Parse::RecDescent::_tracefirst($text))
- if defined $::RD_TRACE;
- push @item, $item{'.$self->{hashname}.'}=$&;
- ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .'
-';
-
- return $code;
-}
-
-1;
-
-package Parse::RecDescent::InterpLit;
-
-sub sethashname { $_[0]->{hashname} = '__STRING' . ++$_[1]->{strcount} . '__'; }
-
-sub issubrule { undef }
-sub isterminal { 1 }
-sub describe ($) { shift->{'description'} }
-
-sub new ($$$$)
-{
- my $class = ref($_[0]) || $_[0];
-
- my $pattern = $_[1];
- $pattern =~ s#/#\\/#g;
-
- my $desc = $pattern;
- $desc=~s/\\/\\\\/g;
- $desc=~s/}/\\}/g;
- $desc=~s/{/\\{/g;
-
- bless
- {
- "pattern" => $pattern,
- "lookahead" => $_[2],
- "line" => $_[3],
- "description" => "'$desc'",
- }, $class;
-}
-
-sub code($$$$)
-{
- my ($self, $namespace, $rule, $check) = @_;
-
-my $code = '
- Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe
- . ']},
- Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{name} . '},
- $tracelevel)
- if defined $::RD_TRACE;
- $lastsep = "";
- $expectation->is(q{' . ($rule->hasleftmost($self) ? ''
- : $self->describe ) . '})->at($text);
- ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . '
-
- ' . ($self->{"lookahead"}<0?'if':'unless')
- . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and '
- . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '')
- . ' do { $_tok = "' . $self->{"pattern"} . '"; 1 } and
- substr($text,0,length($_tok)) eq $_tok and
- do { substr($text,0,length($_tok)) = ""; 1; }
- )
- {
- '.($self->{"lookahead"} ? '$text = $_savetext;' : '').'
- $expectation->failed();
- Parse::RecDescent::_trace(q{<<Didn\'t match terminal>>},
- Parse::RecDescent::_tracefirst($text))
- if defined $::RD_TRACE;
- last;
- }
- Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [}
- . $_tok . q{])},
- Parse::RecDescent::_tracefirst($text))
- if defined $::RD_TRACE;
- push @item, $item{'.$self->{hashname}.'}=$_tok;
- ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .'
-';
-
- return $code;
-}
-
-1;
-
-package Parse::RecDescent::Subrule;
-
-sub issubrule ($) { return $_[0]->{"subrule"} }
-sub isterminal { 0 }
-sub sethashname {}
-
-sub describe ($)
-{
- my $desc = $_[0]->{"implicit"} || $_[0]->{"subrule"};
- $desc = "<matchrule:$desc>" if $_[0]->{"matchrule"};
- return $desc;
-}
-
-sub callsyntax($$)
-{
- if ($_[0]->{"matchrule"})
- {
- return "&{'$_[1]'.qq{$_[0]->{subrule}}}";
- }
- else
- {
- return $_[1].$_[0]->{"subrule"};
- }
-}
-
-sub new ($$$$;$$$)
-{
- my $class = ref($_[0]) || $_[0];
- bless
- {
- "subrule" => $_[1],
- "lookahead" => $_[2],
- "line" => $_[3],
- "implicit" => $_[4] || undef,
- "matchrule" => $_[5],
- "argcode" => $_[6] || undef,
- }, $class;
-}
-
-
-sub code($$$$)
-{
- my ($self, $namespace, $rule) = @_;
-
-'
- Parse::RecDescent::_trace(q{Trying subrule: [' . $self->{"subrule"} . ']},
- Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{"name"} . '},
- $tracelevel)
- if defined $::RD_TRACE;
- if (1) { no strict qw{refs};
- $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}'
- # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text);
- : 'q{'.$self->describe.'}' ) . ')->at($text);
- ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' )
- . ($self->{"lookahead"}<0?'if':'unless')
- . ' (defined ($_tok = '
- . $self->callsyntax($namespace.'::')
- . '($thisparser,$text,$repeating,'
- . ($self->{"lookahead"}?'1':'$_noactions')
- . ($self->{argcode} ? ",sub { return $self->{argcode} }"
- : ',sub { \\@arg }')
- . ')))
- {
- '.($self->{"lookahead"} ? '$text = $_savetext;' : '').'
- Parse::RecDescent::_trace(q{<<Didn\'t match subrule: ['
- . $self->{subrule} . ']>>},
- Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{"name"} .'},
- $tracelevel)
- if defined $::RD_TRACE;
- $expectation->failed();
- last;
- }
- Parse::RecDescent::_trace(q{>>Matched subrule: ['
- . $self->{subrule} . ']<< (return value: [}
- . $_tok . q{]},
-
- Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{"name"} .'},
- $tracelevel)
- if defined $::RD_TRACE;
- $item{q{' . $self->{subrule} . '}} = $_tok;
- push @item, $_tok;
- ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .'
- }
-'
-}
-
-package Parse::RecDescent::Repetition;
-
-sub issubrule ($) { return $_[0]->{"subrule"} }
-sub isterminal { 0 }
-sub sethashname { }
-
-sub describe ($)
-{
- my $desc = $_[0]->{"expected"} || $_[0]->{"subrule"};
- $desc = "<matchrule:$desc>" if $_[0]->{"matchrule"};
- return $desc;
-}
-
-sub callsyntax($$)
-{
- if ($_[0]->{matchrule})
- { return "sub { goto &{''.qq{$_[1]$_[0]->{subrule}}} }"; }
- else
- { return "\\&$_[1]$_[0]->{subrule}"; }
-}
-
-sub new ($$$$$$$$$$)
-{
- my ($self, $subrule, $repspec, $min, $max, $lookahead, $line, $parser, $matchrule, $argcode) = @_;
- my $class = ref($self) || $self;
- ($max, $min) = ( $min, $max) if ($max<$min);
-
- my $desc;
- if ($subrule=~/\A_alternation_\d+_of_production_\d+_of_rule/)
- { $desc = $parser->{"rules"}{$subrule}->expected }
-
- if ($lookahead)
- {
- if ($min>0)
- {
- return new Parse::RecDescent::Subrule($subrule,$lookahead,$line,$desc,$matchrule,$argcode);
- }
- else
- {
- Parse::RecDescent::_error("Not symbol (\"!\") before
- \"$subrule\" doesn't make
- sense.",$line);
- Parse::RecDescent::_hint("Lookahead for negated optional
- repetitions (such as
- \"!$subrule($repspec)\" can never
- succeed, since optional items always
- match (zero times at worst).
- Did you mean a single \"!$subrule\",
- instead?");
- }
- }
- bless
- {
- "subrule" => $subrule,
- "repspec" => $repspec,
- "min" => $min,
- "max" => $max,
- "lookahead" => $lookahead,
- "line" => $line,
- "expected" => $desc,
- "argcode" => $argcode || undef,
- "matchrule" => $matchrule,
- }, $class;
-}
-
-sub code($$$$)
-{
- my ($self, $namespace, $rule) = @_;
-
- my ($subrule, $repspec, $min, $max, $lookahead) =
- @{$self}{ qw{subrule repspec min max lookahead} };
-
-'
- Parse::RecDescent::_trace(q{Trying repeated subrule: [' . $self->describe . ']},
- Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{"name"} . '},
- $tracelevel)
- if defined $::RD_TRACE;
- $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}'
- # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text);
- : 'q{'.$self->describe.'}' ) . ')->at($text);
- ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .'
- unless (defined ($_tok = $thisparser->_parserepeat($text, '
- . $self->callsyntax($namespace.'::')
- . ', ' . $min . ', ' . $max . ', '
- . ($self->{"lookahead"}?'1':'$_noactions')
- . ',$expectation,'
- . ($self->{argcode} ? "sub { return $self->{argcode} }"
- : 'undef')
- . ')))
- {
- Parse::RecDescent::_trace(q{<<Didn\'t match repeated subrule: ['
- . $self->describe . ']>>},
- Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{"name"} .'},
- $tracelevel)
- if defined $::RD_TRACE;
- last;
- }
- Parse::RecDescent::_trace(q{>>Matched repeated subrule: ['
- . $self->{subrule} . ']<< (}
- . @$_tok . q{ times)},
-
- Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{"name"} .'},
- $tracelevel)
- if defined $::RD_TRACE;
- $item{q{' . "$self->{subrule}($self->{repspec})" . '}} = $_tok;
- push @item, $_tok;
- ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .'
-
-'
-}
-
-package Parse::RecDescent::Result;
-
-sub issubrule { 0 }
-sub isterminal { 0 }
-sub describe { '' }
-
-sub new
-{
- my ($class, $pos) = @_;
-
- bless {}, $class;
-}
-
-sub code($$$$)
-{
- my ($self, $namespace, $rule) = @_;
-
- '
- $return = $item[-1];
- ';
-}
-
-package Parse::RecDescent::Operator;
-
-my @opertype = ( " non-optional", "n optional" );
-
-sub issubrule { 0 }
-sub isterminal { 0 }
-
-sub describe { $_[0]->{"expected"} }
-sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; }
-
-
-sub new
-{
- my ($class, $type, $minrep, $maxrep, $leftarg, $op, $rightarg) = @_;
-
- bless
- {
- "type" => "${type}op",
- "leftarg" => $leftarg,
- "op" => $op,
- "min" => $minrep,
- "max" => $maxrep,
- "rightarg" => $rightarg,
- "expected" => "<${type}op: ".$leftarg->describe." ".$op->describe." ".$rightarg->describe.">",
- }, $class;
-}
-
-sub code($$$$)
-{
- my ($self, $namespace, $rule) = @_;
-
- my ($leftarg, $op, $rightarg) =
- @{$self}{ qw{leftarg op rightarg} };
-
- my $code = '
- Parse::RecDescent::_trace(q{Trying operator: [' . $self->describe . ']},
- Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{"name"} . '},
- $tracelevel)
- if defined $::RD_TRACE;
- $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}'
- # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text);
- : 'q{'.$self->describe.'}' ) . ')->at($text);
-
- $_tok = undef;
- OPLOOP: while (1)
- {
- $repcount = 0;
- my @item;
- ';
-
- if ($self->{type} eq "leftop" )
- {
- $code .= '
- # MATCH LEFTARG
- ' . $leftarg->code(@_[1..2]) . '
-
- $repcount++;
-
- my $savetext = $text;
- my $backtrack;
-
- # MATCH (OP RIGHTARG)(s)
- while ($repcount < ' . $self->{max} . ')
- {
- $backtrack = 0;
- ' . $op->code(@_[1..2]) . '
- ' . ($op->isterminal() ? 'pop @item;' : '$backtrack=1;' ) . '
- ' . (ref($op) eq 'Parse::RecDescent::Token'
- ? 'if (defined $1) {push @item, $item{'.($self->{name}||$self->{hashname}).'}=$1; $backtrack=1;}'
- : "" ) . '
- ' . $rightarg->code(@_[1..2]) . '
- $savetext = $text;
- $repcount++;
- }
- $text = $savetext;
- pop @item if $backtrack;
-
- ';
- }
- else
- {
- $code .= '
- my $savetext = $text;
- my $backtrack;
- # MATCH (LEFTARG OP)(s)
- while ($repcount < ' . $self->{max} . ')
- {
- $backtrack = 0;
- ' . $leftarg->code(@_[1..2]) . '
- $repcount++;
- $backtrack = 1;
- ' . $op->code(@_[1..2]) . '
- $savetext = $text;
- ' . ($op->isterminal() ? 'pop @item;' : "" ) . '
- ' . (ref($op) eq 'Parse::RecDescent::Token' ? 'do { push @item, $item{'.($self->{name}||$self->{hashname}).'}=$1; } if defined $1;' : "" ) . '
- }
- $text = $savetext;
- pop @item if $backtrack;
-
- # MATCH RIGHTARG
- ' . $rightarg->code(@_[1..2]) . '
- $repcount++;
- ';
- }
-
- $code .= 'unless (@item) { undef $_tok; last }' unless $self->{min}==0;
-
- $code .= '
- $_tok = [ @item ];
- last;
- }
-
- unless ($repcount>='.$self->{min}.')
- {
- Parse::RecDescent::_trace(q{<<Didn\'t match operator: ['
- . $self->describe
- . ']>>},
- Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{"name"} .'},
- $tracelevel)
- if defined $::RD_TRACE;
- $expectation->failed();
- last;
- }
- Parse::RecDescent::_trace(q{>>Matched operator: ['
- . $self->describe
- . ']<< (return value: [}
- . qq{@{$_tok||[]}} . q{]},
- Parse::RecDescent::_tracefirst($text),
- q{' . $rule->{"name"} .'},
- $tracelevel)
- if defined $::RD_TRACE;
-
- push @item, $item{'.($self->{name}||$self->{hashname}).'}=$_tok||[];
-
-';
- return $code;
-}
-
-
-package Parse::RecDescent::Expectation;
-
-sub new ($)
-{
- bless {
- "failed" => 0,
- "expected" => "",
- "unexpected" => "",
- "lastexpected" => "",
- "lastunexpected" => "",
- "defexpected" => $_[1],
- };
-}
-
-sub is ($$)
-{
- $_[0]->{lastexpected} = $_[1]; return $_[0];
-}
-
-sub at ($$)
-{
- $_[0]->{lastunexpected} = $_[1]; return $_[0];
-}
-
-sub failed ($)
-{
- return unless $_[0]->{lastexpected};
- $_[0]->{expected} = $_[0]->{lastexpected} unless $_[0]->{failed};
- $_[0]->{unexpected} = $_[0]->{lastunexpected} unless $_[0]->{failed};
- $_[0]->{failed} = 1;
-}
-
-sub message ($)
-{
- my ($self) = @_;
- $self->{expected} = $self->{defexpected} unless $self->{expected};
- $self->{expected} =~ s/_/ /g;
- if (!$self->{unexpected} || $self->{unexpected} =~ /\A\s*\Z/s)
- {
- return "Was expecting $self->{expected}";
- }
- else
- {
- $self->{unexpected} =~ /\s*(.*)/;
- return "Was expecting $self->{expected} but found \"$1\" instead";
- }
-}
-
-1;
-
-package Parse::RecDescent;
-
-use Carp;
-use vars qw ( $AUTOLOAD $VERSION );
-
-my $ERRORS = 0;
-
-$VERSION = '1.94';
-
-# BUILDING A PARSER
-
-my $nextnamespace = "namespace000001";
-
-sub _nextnamespace()
-{
- return "Parse::RecDescent::" . $nextnamespace++;
-}
-
-sub new ($$$)
-{
- my $class = ref($_[0]) || $_[0];
- local $Parse::RecDescent::compiling = $_[2];
- my $name_space_name = defined $_[3]
- ? "Parse::RecDescent::".$_[3]
- : _nextnamespace();
- my $self =
- {
- "rules" => {},
- "namespace" => $name_space_name,
- "startcode" => '',
- "localvars" => '',
- "_AUTOACTION" => undef,
- "_AUTOTREE" => undef,
- };
- if ($::RD_AUTOACTION)
- {
- my $sourcecode = $::RD_AUTOACTION;
- $sourcecode = "{ $sourcecode }"
- unless $sourcecode =~ /\A\s*\{.*\}\s*\Z/;
- $self->{_check}{itempos} =
- $sourcecode =~ /\@itempos\b|\$itempos\s*\[/;
- $self->{_AUTOACTION}
- = new Parse::RecDescent::Action($sourcecode,0,-1)
- }
-
- bless $self, $class;
- shift;
- return $self->Replace(@_)
-}
-
-sub Compile($$$$) {
-
- die "Compilation of Parse::RecDescent grammars not yet implemented\n";
-}
-
-sub DESTROY {} # SO AUTOLOADER IGNORES IT
-
-# BUILDING A GRAMMAR....
-
-sub Replace ($$)
-{
- splice(@_, 2, 0, 1);
- return _generate(@_);
-}
-
-sub Extend ($$)
-{
- splice(@_, 2, 0, 0);
- return _generate(@_);
-}
-
-sub _no_rule ($$;$)
-{
- _error("Ruleless $_[0] at start of grammar.",$_[1]);
- my $desc = $_[2] ? "\"$_[2]\"" : "";
- _hint("You need to define a rule for the $_[0] $desc
- to be part of.");
-}
-
-my $NEGLOOKAHEAD = '\G(\s*\.\.\.\!)';
-my $POSLOOKAHEAD = '\G(\s*\.\.\.)';
-my $RULE = '\G\s*(\w+)[ \t]*:';
-my $PROD = '\G\s*([|])';
-my $TOKEN = q{\G\s*/((\\\\/|[^/])*)/([cgimsox]*)};
-my $MTOKEN = q{\G\s*(m\s*[^\w\s])};
-my $LITERAL = q{\G\s*'((\\\\['\\\\]|[^'])*)'};
-my $INTERPLIT = q{\G\s*"((\\\\["\\\\]|[^"])*)"};
-my $SUBRULE = '\G\s*(\w+)';
-my $MATCHRULE = '\G(\s*<matchrule:)';
-my $SIMPLEPAT = '((\\s+/[^/\\\\]*(?:\\\\.[^/\\\\]*)*/)?)';
-my $OPTIONAL = '\G\((\?)'.$SIMPLEPAT.'\)';
-my $ANY = '\G\((s\?)'.$SIMPLEPAT.'\)';
-my $MANY = '\G\((s|\.\.)'.$SIMPLEPAT.'\)';
-my $EXACTLY = '\G\(([1-9]\d*)'.$SIMPLEPAT.'\)';
-my $BETWEEN = '\G\((\d+)\.\.([1-9]\d*)'.$SIMPLEPAT.'\)';
-my $ATLEAST = '\G\((\d+)\.\.'.$SIMPLEPAT.'\)';
-my $ATMOST = '\G\(\.\.([1-9]\d*)'.$SIMPLEPAT.'\)';
-my $BADREP = '\G\((-?\d+)?\.\.(-?\d+)?'.$SIMPLEPAT.'\)';
-my $ACTION = '\G\s*\{';
-my $IMPLICITSUBRULE = '\G\s*\(';
-my $COMMENT = '\G\s*(#.*)';
-my $COMMITMK = '\G\s*<commit>';
-my $UNCOMMITMK = '\G\s*<uncommit>';
-my $QUOTELIKEMK = '\G\s*<perl_quotelike>';
-my $CODEBLOCKMK = '\G\s*<perl_codeblock(?:\s+([][()<>{}]+))?>';
-my $VARIABLEMK = '\G\s*<perl_variable>';
-my $NOCHECKMK = '\G\s*<nocheck>';
-my $AUTOTREEMK = '\G\s*<autotree>';
-my $AUTOSTUBMK = '\G\s*<autostub>';
-my $AUTORULEMK = '\G\s*<autorule:(.*?)>';
-my $REJECTMK = '\G\s*<reject>';
-my $CONDREJECTMK = '\G\s*<reject:';
-my $SCOREMK = '\G\s*<score:';
-my $AUTOSCOREMK = '\G\s*<autoscore:';
-my $SKIPMK = '\G\s*<skip:';
-my $OPMK = '\G\s*<(left|right)op(?:=(\'.*?\'))?:';
-my $ENDDIRECTIVEMK = '\G\s*>';
-my $RESYNCMK = '\G\s*<resync>';
-my $RESYNCPATMK = '\G\s*<resync:';
-my $RULEVARPATMK = '\G\s*<rulevar:';
-my $DEFERPATMK = '\G\s*<defer:';
-my $TOKENPATMK = '\G\s*<token:';
-my $AUTOERRORMK = '\G\s*<error(\??)>';
-my $MSGERRORMK = '\G\s*<error(\??):';
-my $UNCOMMITPROD = $PROD.'\s*<uncommit';
-my $ERRORPROD = $PROD.'\s*<error';
-my $LONECOLON = '\G\s*:';
-my $OTHER = '\G\s*([^\s]+)';
-
-my $lines = 0;
-
-sub _generate($$$;$$)
-{
- my ($self, $grammar, $replace, $isimplicit, $isleftop) = (@_, 0);
-
- my $aftererror = 0;
- my $lookahead = 0;
- my $lookaheadspec = "";
- $lines = _linecount($grammar) unless $lines;
- $self->{_check}{itempos} = ($grammar =~ /\@itempos\b|\$itempos\s*\[/)
- unless $self->{_check}{itempos};
- for (qw(thisoffset thiscolumn prevline prevoffset prevcolumn))
- {
- $self->{_check}{$_} =
- ($grammar =~ /\$$_/) || $self->{_check}{itempos}
- unless $self->{_check}{$_};
- }
- my $line;
-
- my $rule = undef;
- my $prod = undef;
- my $item = undef;
- my $lastgreedy = '';
- pos $grammar = 0;
- study $grammar;
-
- while (pos $grammar < length $grammar)
- {
- $line = $lines - _linecount($grammar) + 1;
- my $commitonly;
- my $code = "";
- my @components = ();
- if ($grammar =~ m/$COMMENT/gco)
- {
- _parse("a comment",0,$line);
- next;
- }
- elsif ($grammar =~ m/$NEGLOOKAHEAD/gco)
- {
- _parse("a negative lookahead",$aftererror,$line);
- $lookahead = $lookahead ? -$lookahead : -1;
- $lookaheadspec .= $1;
- next; # SKIP LOOKAHEAD RESET AT END OF while LOOP
- }
- elsif ($grammar =~ m/$POSLOOKAHEAD/gco)
- {
- _parse("a positive lookahead",$aftererror,$line);
- $lookahead = $lookahead ? $lookahead : 1;
- $lookaheadspec .= $1;
- next; # SKIP LOOKAHEAD RESET AT END OF while LOOP
- }
- elsif ($grammar =~ m/(?=$ACTION)/gco
- and do { ($code) = extract_codeblock($grammar); $code })
- {
- _parse("an action", $aftererror, $line, $code);
- $item = new Parse::RecDescent::Action($code,$lookahead,$line);
- $prod and $prod->additem($item)
- or $self->_addstartcode($code);
- }
- elsif ($grammar =~ m/(?=$IMPLICITSUBRULE)/gco
- and do { ($code) = extract_codeblock($grammar,'{([',undef,'(',1);
- $code })
- {
- $code =~ s/\A\s*\(|\)\Z//g;
- _parse("an implicit subrule", $aftererror, $line,
- "( $code )");
- my $implicit = $rule->nextimplicit;
- $self->_generate("$implicit : $code",$replace,1);
- my $pos = pos $grammar;
- substr($grammar,$pos,0,$implicit);
- pos $grammar = $pos;;
- }
- elsif ($grammar =~ m/$ENDDIRECTIVEMK/gco)
- {
-
- # EXTRACT TRAILING REPETITION SPECIFIER (IF ANY)
-
- my ($minrep,$maxrep) = (1,$MAXREP);
- if ($grammar =~ m/\G[(]/gc)
- {
- pos($grammar)--;
-
- if ($grammar =~ m/$OPTIONAL/gco)
- { ($minrep, $maxrep) = (0,1) }
- elsif ($grammar =~ m/$ANY/gco)
- { $minrep = 0 }
- elsif ($grammar =~ m/$EXACTLY/gco)
- { ($minrep, $maxrep) = ($1,$1) }
- elsif ($grammar =~ m/$BETWEEN/gco)
- { ($minrep, $maxrep) = ($1,$2) }
- elsif ($grammar =~ m/$ATLEAST/gco)
- { $minrep = $1 }
- elsif ($grammar =~ m/$ATMOST/gco)
- { $maxrep = $1 }
- elsif ($grammar =~ m/$MANY/gco)
- { }
- elsif ($grammar =~ m/$BADREP/gco)
- {
- _parse("an invalid repetition specifier", 0,$line);
- _error("Incorrect specification of a repeated directive",
- $line);
- _hint("Repeated directives cannot have
- a maximum repetition of zero, nor can they have
- negative components in their ranges.");
- }
- }
-
- $prod && $prod->enddirective($line,$minrep,$maxrep);
- }
- elsif ($grammar =~ m/\G\s*<[^m]/gc)
- {
- pos($grammar)-=2;
-
- if ($grammar =~ m/$OPMK/gco)
- {
- # $DB::single=1;
- _parse("a $1-associative operator directive", $aftererror, $line, "<$1op:...>");
- $prod->adddirective($1, $line,$2||'');
- }
- elsif ($grammar =~ m/$UNCOMMITMK/gco)
- {
- _parse("an uncommit marker", $aftererror,$line);
- $item = new Parse::RecDescent::Directive('$commit=0;1',
- $lookahead,$line,"<uncommit>");
- $prod and $prod->additem($item)
- or _no_rule("<uncommit>",$line);
- }
- elsif ($grammar =~ m/$QUOTELIKEMK/gco)
- {
- _parse("an perl quotelike marker", $aftererror,$line);
- $item = new Parse::RecDescent::Directive(
- 'my ($match,@res);
- ($match,$text,undef,@res) =
- Text::Balanced::extract_quotelike($text,$skip);
- $match ? \@res : undef;
- ', $lookahead,$line,"<perl_quotelike>");
- $prod and $prod->additem($item)
- or _no_rule("<perl_quotelike>",$line);
- }
- elsif ($grammar =~ m/$CODEBLOCKMK/gco)
- {
- my $outer = $1||"{}";
- _parse("an perl codeblock marker", $aftererror,$line);
- $item = new Parse::RecDescent::Directive(
- 'Text::Balanced::extract_codeblock($text,undef,$skip,\''.$outer.'\');
- ', $lookahead,$line,"<perl_codeblock>");
- $prod and $prod->additem($item)
- or _no_rule("<perl_codeblock>",$line);
- }
- elsif ($grammar =~ m/$VARIABLEMK/gco)
- {
- _parse("an perl variable marker", $aftererror,$line);
- $item = new Parse::RecDescent::Directive(
- 'Text::Balanced::extract_variable($text,$skip);
- ', $lookahead,$line,"<perl_variable>");
- $prod and $prod->additem($item)
- or _no_rule("<perl_variable>",$line);
- }
- elsif ($grammar =~ m/$NOCHECKMK/gco)
- {
- _parse("a disable checking marker", $aftererror,$line);
- if ($rule)
- {
- _error("<nocheck> directive not at start of grammar", $line);
- _hint("The <nocheck> directive can only
- be specified at the start of a
- grammar (before the first rule
- is defined.");
- }
- else
- {
- local $::RD_CHECK = 1;
- }
- }
- elsif ($grammar =~ m/$AUTOSTUBMK/gco)
- {
- _parse("an autostub marker", $aftererror,$line);
- $::RD_AUTOSTUB = "";
- }
- elsif ($grammar =~ m/$AUTORULEMK/gco)
- {
- _parse("an autorule marker", $aftererror,$line);
- $::RD_AUTOSTUB = $1;
- }
- elsif ($grammar =~ m/$AUTOTREEMK/gco)
- {
- _parse("an autotree marker", $aftererror,$line);
- if ($rule)
- {
- _error("<autotree> directive not at start of grammar", $line);
- _hint("The <autotree> directive can only
- be specified at the start of a
- grammar (before the first rule
- is defined.");
- }
- else
- {
- undef $self->{_AUTOACTION};
- $self->{_AUTOTREE}{NODE}
- = new Parse::RecDescent::Action(q{{bless \%item, $item[0]}},0,-1);
- $self->{_AUTOTREE}{TERMINAL}
- = new Parse::RecDescent::Action(q{{bless {__VALUE__=>$item[1]}, $item[0]}},0,-1);
- }
- }
-
- elsif ($grammar =~ m/$REJECTMK/gco)
- {
- _parse("an reject marker", $aftererror,$line);
- $item = new Parse::RecDescent::UncondReject($lookahead,$line,"<reject>");
- $prod and $prod->additem($item)
- or _no_rule("<reject>",$line);
- }
- elsif ($grammar =~ m/(?=$CONDREJECTMK)/gco
- and do { ($code) = extract_codeblock($grammar,'{',undef,'<');
- $code })
- {
- _parse("a (conditional) reject marker", $aftererror,$line);
- $code =~ /\A\s*<reject:(.*)>\Z/s;
- $item = new Parse::RecDescent::Directive(
- "($1) ? undef : 1", $lookahead,$line,"<reject:$code>");
- $prod and $prod->additem($item)
- or _no_rule("<reject:$code>",$line);
- }
- elsif ($grammar =~ m/(?=$SCOREMK)/gco
- and do { ($code) = extract_codeblock($grammar,'{',undef,'<');
- $code })
- {
- _parse("a score marker", $aftererror,$line);
- $code =~ /\A\s*<score:(.*)>\Z/s;
- $prod and $prod->addscore($1, $lookahead, $line)
- or _no_rule($code,$line);
- }
- elsif ($grammar =~ m/(?=$AUTOSCOREMK)/gco
- and do { ($code) = extract_codeblock($grammar,'{',undef,'<');
- $code;
- } )
- {
- _parse("an autoscore specifier", $aftererror,$line,$code);
- $code =~ /\A\s*<autoscore:(.*)>\Z/s;
-
- $rule and $rule->addautoscore($1,$self)
- or _no_rule($code,$line);
-
- $item = new Parse::RecDescent::UncondReject($lookahead,$line,$code);
- $prod and $prod->additem($item)
- or _no_rule($code,$line);
- }
- elsif ($grammar =~ m/$RESYNCMK/gco)
- {
- _parse("a resync to newline marker", $aftererror,$line);
- $item = new Parse::RecDescent::Directive(
- 'if ($text =~ s/\A[^\n]*\n//) { $return = 0; $& } else { undef }',
- $lookahead,$line,"<resync>");
- $prod and $prod->additem($item)
- or _no_rule("<resync>",$line);
- }
- elsif ($grammar =~ m/(?=$RESYNCPATMK)/gco
- and do { ($code) = extract_bracketed($grammar,'<');
- $code })
- {
- _parse("a resync with pattern marker", $aftererror,$line);
- $code =~ /\A\s*<resync:(.*)>\Z/s;
- $item = new Parse::RecDescent::Directive(
- 'if ($text =~ s/\A'.$1.'//) { $return = 0; $& } else { undef }',
- $lookahead,$line,$code);
- $prod and $prod->additem($item)
- or _no_rule($code,$line);
- }
- elsif ($grammar =~ m/(?=$SKIPMK)/gco
- and do { ($code) = extract_codeblock($grammar,'<');
- $code })
- {
- _parse("a skip marker", $aftererror,$line);
- $code =~ /\A\s*<skip:(.*)>\Z/s;
- $item = new Parse::RecDescent::Directive(
- 'my $oldskip = $skip; $skip='.$1.'; $oldskip',
- $lookahead,$line,$code);
- $prod and $prod->additem($item)
- or _no_rule($code,$line);
- }
- elsif ($grammar =~ m/(?=$RULEVARPATMK)/gco
- and do { ($code) = extract_codeblock($grammar,'{',undef,'<');
- $code;
- } )
- {
- _parse("a rule variable specifier", $aftererror,$line,$code);
- $code =~ /\A\s*<rulevar:(.*)>\Z/s;
-
- $rule and $rule->addvar($1,$self)
- or _no_rule($code,$line);
-
- $item = new Parse::RecDescent::UncondReject($lookahead,$line,$code);
- $prod and $prod->additem($item)
- or _no_rule($code,$line);
- }
- elsif ($grammar =~ m/(?=$DEFERPATMK)/gco
- and do { ($code) = extract_codeblock($grammar,'{',undef,'<');
- $code;
- } )
- {
- _parse("a deferred action specifier", $aftererror,$line,$code);
- $code =~ s/\A\s*<defer:(.*)>\Z/$1/s;
- if ($code =~ /\A\s*[^{]|[^}]\s*\Z/)
- {
- $code = "{ $code }"
- }
-
- $item = new Parse::RecDescent::Directive(
- "push \@{\$thisparser->{deferred}}, sub $code;",
- $lookahead,$line,"<defer:$code>");
- $prod and $prod->additem($item)
- or _no_rule("<defer:$code>",$line);
-
- $self->{deferrable} = 1;
- }
- elsif ($grammar =~ m/(?=$TOKENPATMK)/gco
- and do { ($code) = extract_codeblock($grammar,'{',undef,'<');
- $code;
- } )
- {
- _parse("a token constructor", $aftererror,$line,$code);
- $code =~ s/\A\s*<token:(.*)>\Z/$1/s;
-
- my $types = eval 'no strict; local $SIG{__WARN__} = sub {0}; my @arr=('.$code.'); @arr' || ();
- if (!$types)
- {
- _error("Incorrect token specification: \"$@\"", $line);
- _hint("The <token:...> directive requires a list
- of one or more strings representing possible
- types of the specified token. For example:
- <token:NOUN,VERB>");
- }
- else
- {
- $item = new Parse::RecDescent::Directive(
- 'no strict;
- $return = { text => $item[-1] };
- @{$return->{type}}{'.$code.'} = (1..'.$types.');',
- $lookahead,$line,"<token:$code>");
- $prod and $prod->additem($item)
- or _no_rule("<token:$code>",$line);
- }
- }
- elsif ($grammar =~ m/$COMMITMK/gco)
- {
- _parse("an commit marker", $aftererror,$line);
- $item = new Parse::RecDescent::Directive('$commit = 1',
- $lookahead,$line,"<commit>");
- $prod and $prod->additem($item)
- or _no_rule("<commit>",$line);
- }
- elsif ($grammar =~ m/$AUTOERRORMK/gco)
- {
- $commitonly = $1;
- _parse("an error marker", $aftererror,$line);
- $item = new Parse::RecDescent::Error('',$lookahead,$1,$line);
- $prod and $prod->additem($item)
- or _no_rule("<error>",$line);
- $aftererror = !$commitonly;
- }
- elsif ($grammar =~ m/(?=$MSGERRORMK)/gco
- and do { $commitonly = $1;
- ($code) = extract_bracketed($grammar,'<');
- $code })
- {
- _parse("an error marker", $aftererror,$line,$code);
- $code =~ /\A\s*<error\??:(.*)>\Z/s;
- $item = new Parse::RecDescent::Error($1,$lookahead,$commitonly,$line);
- $prod and $prod->additem($item)
- or _no_rule("$code",$line);
- $aftererror = !$commitonly;
- }
- elsif (do { $commitonly = $1;
- ($code) = extract_bracketed($grammar,'<');
- $code })
- {
- if ($code =~ /^<[A-Z_]+>$/)
- {
- _error("Token items are not yet
- supported: \"$code\"",
- $line);
- _hint("Items like $code that consist of angle
- brackets enclosing a sequence of
- uppercase characters will eventually
- be used to specify pre-lexed tokens
- in a grammar. That functionality is not
- yet implemented. Or did you misspell
- \"$code\"?");
- }
- else
- {
- _error("Untranslatable item encountered: \"$code\"",
- $line);
- _hint("Did you misspell \"$code\"
- or forget to comment it out?");
- }
- }
- }
- elsif ($grammar =~ m/$RULE/gco)
- {
- _parseunneg("a rule declaration", 0,
- $lookahead,$line) or next;
- my $rulename = $1;
- if ($rulename =~ /Replace|Extend|Precompile|Save/ )
- {
- _warn(2,"Rule \"$rulename\" hidden by method
- Parse::RecDescent::$rulename",$line)
- and
- _hint("The rule named \"$rulename\" cannot be directly
- called through the Parse::RecDescent object
- for this grammar (although it may still
- be used as a subrule of other rules).
- It can't be directly called because
- Parse::RecDescent::$rulename is already defined (it
- is the standard method of all
- parsers).");
- }
- $rule = new Parse::RecDescent::Rule($rulename,$self,$line,$replace);
- $prod->check_pending($line) if $prod;
- $prod = $rule->addprod( new Parse::RecDescent::Production );
- $aftererror = 0;
- }
- elsif ($grammar =~ m/$UNCOMMITPROD/gco)
- {
- pos($grammar)-=9;
- _parseunneg("a new (uncommitted) production",
- 0, $lookahead, $line) or next;
-
- $prod->check_pending($line) if $prod;
- $prod = new Parse::RecDescent::Production($line,1);
- $rule and $rule->addprod($prod)
- or _no_rule("<uncommit>",$line);
- $aftererror = 0;
- }
- elsif ($grammar =~ m/$ERRORPROD/gco)
- {
- pos($grammar)-=6;
- _parseunneg("a new (error) production", $aftererror,
- $lookahead,$line) or next;
- $prod->check_pending($line) if $prod;
- $prod = new Parse::RecDescent::Production($line,0,1);
- $rule and $rule->addprod($prod)
- or _no_rule("<error>",$line);
- $aftererror = 0;
- }
- elsif ($grammar =~ m/$PROD/gco)
- {
- _parseunneg("a new production", 0,
- $lookahead,$line) or next;
- $rule
- and (!$prod || $prod->check_pending($line))
- and $prod = $rule->addprod(new Parse::RecDescent::Production($line))
- or _no_rule("production",$line);
- $aftererror = 0;
- }
- elsif ($grammar =~ m/$LITERAL/gco)
- {
- ($code = $1) =~ s/\\\\/\\/g;
- _parse("a literal terminal", $aftererror,$line,$1);
- $item = new Parse::RecDescent::Literal($code,$lookahead,$line);
- $prod and $prod->additem($item)
- or _no_rule("literal terminal",$line,"'$1'");
- }
- elsif ($grammar =~ m/$INTERPLIT/gco)
- {
- _parse("an interpolated literal terminal", $aftererror,$line);
- $item = new Parse::RecDescent::InterpLit($1,$lookahead,$line);
- $prod and $prod->additem($item)
- or _no_rule("interpolated literal terminal",$line,"'$1'");
- }
- elsif ($grammar =~ m/$TOKEN/gco)
- {
- _parse("a /../ pattern terminal", $aftererror,$line);
- $item = new Parse::RecDescent::Token($1,'/',$3?$3:'',$lookahead,$line);
- $prod and $prod->additem($item)
- or _no_rule("pattern terminal",$line,"/$1/");
- }
- elsif ($grammar =~ m/(?=$MTOKEN)/gco
- and do { ($code, undef, @components)
- = extract_quotelike($grammar);
- $code }
- )
-
- {
- _parse("an m/../ pattern terminal", $aftererror,$line,$code);
- $item = new Parse::RecDescent::Token(@components[3,2,8],
- $lookahead,$line);
- $prod and $prod->additem($item)
- or _no_rule("pattern terminal",$line,$code);
- }
- elsif ($grammar =~ m/(?=$MATCHRULE)/gco
- and do { ($code) = extract_bracketed($grammar,'<');
- $code
- }
- or $grammar =~ m/$SUBRULE/gco
- and $code = $1)
- {
- my $name = $code;
- my $matchrule = 0;
- if (substr($name,0,1) eq '<')
- {
- $name =~ s/$MATCHRULE\s*//;
- $name =~ s/\s*>\Z//;
- $matchrule = 1;
- }
-
- # EXTRACT TRAILING ARG LIST (IF ANY)
-
- my ($argcode) = extract_codeblock($grammar, "[]",'') || '';
-
- # EXTRACT TRAILING REPETITION SPECIFIER (IF ANY)
-
- if ($grammar =~ m/\G[(]/gc)
- {
- pos($grammar)--;
-
- if ($grammar =~ m/$OPTIONAL/gco)
- {
- _parse("an zero-or-one subrule match", $aftererror,$line,"$code$argcode($1)");
- $item = new Parse::RecDescent::Repetition($name,$1,0,1,
- $lookahead,$line,
- $self,
- $matchrule,
- $argcode);
- $prod and $prod->additem($item)
- or _no_rule("repetition",$line,"$code$argcode($1)");
-
- !$matchrule and $rule and $rule->addcall($name);
- }
- elsif ($grammar =~ m/$ANY/gco)
- {
- _parse("a zero-or-more subrule match", $aftererror,$line,"$code$argcode($1)");
- if ($2)
- {
- my $pos = pos $grammar;
- substr($grammar,$pos,0,
- "<leftop='$name(s?)': $name $2 $name>(s?) ");
-
- pos $grammar = $pos;
- }
- else
- {
- $item = new Parse::RecDescent::Repetition($name,$1,0,$MAXREP,
- $lookahead,$line,
- $self,
- $matchrule,
- $argcode);
- $prod and $prod->additem($item)
- or _no_rule("repetition",$line,"$code$argcode($1)");
-
- !$matchrule and $rule and $rule->addcall($name);
-
- _check_insatiable($name,$1,$grammar,$line) if $::RD_CHECK;
- }
- }
- elsif ($grammar =~ m/$MANY/gco)
- {
- _parse("a one-or-more subrule match", $aftererror,$line,"$code$argcode($1)");
- if ($2)
- {
- # $DB::single=1;
- my $pos = pos $grammar;
- substr($grammar,$pos,0,
- "<leftop='$name(s)': $name $2 $name> ");
-
- pos $grammar = $pos;
- }
- else
- {
- $item = new Parse::RecDescent::Repetition($name,$1,1,$MAXREP,
- $lookahead,$line,
- $self,
- $matchrule,
- $argcode);
-
- $prod and $prod->additem($item)
- or _no_rule("repetition",$line,"$code$argcode($1)");
-
- !$matchrule and $rule and $rule->addcall($name);
-
- _check_insatiable($name,$1,$grammar,$line) if $::RD_CHECK;
- }
- }
- elsif ($grammar =~ m/$EXACTLY/gco)
- {
- _parse("an exactly-$1-times subrule match", $aftererror,$line,"$code$argcode($1)");
- if ($2)
- {
- my $pos = pos $grammar;
- substr($grammar,$pos,0,
- "<leftop='$name($1)': $name $2 $name>($1) ");
-
- pos $grammar = $pos;
- }
- else
- {
- $item = new Parse::RecDescent::Repetition($name,$1,$1,$1,
- $lookahead,$line,
- $self,
- $matchrule,
- $argcode);
- $prod and $prod->additem($item)
- or _no_rule("repetition",$line,"$code$argcode($1)");
-
- !$matchrule and $rule and $rule->addcall($name);
- }
- }
- elsif ($grammar =~ m/$BETWEEN/gco)
- {
- _parse("a $1-to-$2 subrule match", $aftererror,$line,"$code$argcode($1..$2)");
- if ($3)
- {
- my $pos = pos $grammar;
- substr($grammar,$pos,0,
- "<leftop='$name($1..$2)': $name $3 $name>($1..$2) ");
-
- pos $grammar = $pos;
- }
- else
- {
- $item = new Parse::RecDescent::Repetition($name,"$1..$2",$1,$2,
- $lookahead,$line,
- $self,
- $matchrule,
- $argcode);
- $prod and $prod->additem($item)
- or _no_rule("repetition",$line,"$code$argcode($1..$2)");
-
- !$matchrule and $rule and $rule->addcall($name);
- }
- }
- elsif ($grammar =~ m/$ATLEAST/gco)
- {
- _parse("a $1-or-more subrule match", $aftererror,$line,"$code$argcode($1..)");
- if ($2)
- {
- my $pos = pos $grammar;
- substr($grammar,$pos,0,
- "<leftop='$name($1..)': $name $2 $name>($1..) ");
-
- pos $grammar = $pos;
- }
- else
- {
- $item = new Parse::RecDescent::Repetition($name,"$1..",$1,$MAXREP,
- $lookahead,$line,
- $self,
- $matchrule,
- $argcode);
- $prod and $prod->additem($item)
- or _no_rule("repetition",$line,"$code$argcode($1..)");
-
- !$matchrule and $rule and $rule->addcall($name);
- _check_insatiable($name,"$1..",$grammar,$line) if $::RD_CHECK;
- }
- }
- elsif ($grammar =~ m/$ATMOST/gco)
- {
- _parse("a one-to-$1 subrule match", $aftererror,$line,"$code$argcode(..$1)");
- if ($2)
- {
- my $pos = pos $grammar;
- substr($grammar,$pos,0,
- "<leftop='$name(..$1)': $name $2 $name>(..$1) ");
-
- pos $grammar = $pos;
- }
- else
- {
- $item = new Parse::RecDescent::Repetition($name,"..$1",1,$1,
- $lookahead,$line,
- $self,
- $matchrule,
- $argcode);
- $prod and $prod->additem($item)
- or _no_rule("repetition",$line,"$code$argcode(..$1)");
-
- !$matchrule and $rule and $rule->addcall($name);
- }
- }
- elsif ($grammar =~ m/$BADREP/gco)
- {
- _parse("an subrule match with invalid repetition specifier", 0,$line);
- _error("Incorrect specification of a repeated subrule",
- $line);
- _hint("Repeated subrules like \"$code$argcode$&\" cannot have
- a maximum repetition of zero, nor can they have
- negative components in their ranges.");
- }
- }
- else
- {
- _parse("a subrule match", $aftererror,$line,$code);
- my $desc;
- if ($name=~/\A_alternation_\d+_of_production_\d+_of_rule/)
- { $desc = $self->{"rules"}{$name}->expected }
- $item = new Parse::RecDescent::Subrule($name,
- $lookahead,
- $line,
- $desc,
- $matchrule,
- $argcode);
-
- $prod and $prod->additem($item)
- or _no_rule("(sub)rule",$line,$name);
-
- !$matchrule and $rule and $rule->addcall($name);
- }
- }
- elsif ($grammar =~ m/$LONECOLON/gco )
- {
- _error("Unexpected colon encountered", $line);
- _hint("Did you mean \"|\" (to start a new production)?
- Or perhaps you forgot that the colon
- in a rule definition must be
- on the same line as the rule name?");
- }
- elsif ($grammar =~ m/$ACTION/gco ) # BAD ACTION, ALREADY FAILED
- {
- _error("Malformed action encountered",
- $line);
- _hint("Did you forget the closing curly bracket
- or is there a syntax error in the action?");
- }
- elsif ($grammar =~ m/$OTHER/gco )
- {
- _error("Untranslatable item encountered: \"$1\"",
- $line);
- _hint("Did you misspell \"$1\"
- or forget to comment it out?");
- }
-
- if ($lookaheadspec =~ tr /././ > 3)
- {
- $lookaheadspec =~ s/\A\s+//;
- $lookahead = $lookahead<0
- ? 'a negative lookahead ("...!")'
- : 'a positive lookahead ("...")' ;
- _warn(1,"Found two or more lookahead specifiers in a
- row.",$line)
- and
- _hint("Multiple positive and/or negative lookaheads
- are simply multiplied together to produce a
- single positive or negative lookahead
- specification. In this case the sequence
- \"$lookaheadspec\" was reduced to $lookahead.
- Was this your intention?");
- }
- $lookahead = 0;
- $lookaheadspec = "";
-
- $grammar =~ m/\G\s+/gc;
- }
-
- unless ($ERRORS or $isimplicit or !$::RD_CHECK)
- {
- $self->_check_grammar();
- }
-
- unless ($ERRORS or $isimplicit or $Parse::RecDescent::compiling)
- {
- my $code = $self->_code();
- if (defined $::RD_TRACE)
- {
- print STDERR "printing code (", length($code),") to RD_TRACE\n";
- local *TRACE_FILE;
- open TRACE_FILE, ">RD_TRACE"
- and print TRACE_FILE "my \$ERRORS;\n$code"
- and close TRACE_FILE;
- }
-
- unless ( eval "$code 1" )
- {
- _error("Internal error in generated parser code!");
- $@ =~ s/at grammar/in grammar at/;
- _hint($@);
- }
- }
-
- if ($ERRORS and !_verbosity("HINT"))
- {
- local $::RD_HINT = 1;
- _hint('Set $::RD_HINT (or -RD_HINT if you\'re using "perl -s")
- for hints on fixing these problems.');
- }
- if ($ERRORS) { $ERRORS=0; return }
- return $self;
-}
-
-
-sub _addstartcode($$)
-{
- my ($self, $code) = @_;
- $code =~ s/\A\s*\{(.*)\}\Z/$1/s;
-
- $self->{"startcode"} .= "$code;\n";
-}
-
-# CHECK FOR GRAMMAR PROBLEMS....
-
-sub _check_insatiable($$$$)
-{
- my ($subrule,$repspec,$grammar,$line) = @_;
- pos($grammar)=pos($_[2]);
- return if $grammar =~ m/$OPTIONAL/gco || $grammar =~ m/$ANY/gco;
- my $min = 1;
- if ( $grammar =~ m/$MANY/gco
- || $grammar =~ m/$EXACTLY/gco
- || $grammar =~ m/$ATMOST/gco
- || $grammar =~ m/$BETWEEN/gco && do { $min=$2; 1 }
- || $grammar =~ m/$ATLEAST/gco && do { $min=$2; 1 }
- || $grammar =~ m/$SUBRULE(?!\s*:)/gco
- )
- {
- return unless $1 eq $subrule && $min > 0;
- _warn(3,"Subrule sequence \"$subrule($repspec) $&\" will
- (almost certainly) fail.",$line)
- and
- _hint("Unless subrule \"$subrule\" performs some cunning
- lookahead, the repetition \"$subrule($repspec)\" will
- insatiably consume as many matches of \"$subrule\" as it
- can, leaving none to match the \"$&\" that follows.");
- }
-}
-
-sub _check_grammar ($)
-{
- my $self = shift;
- my $rules = $self->{"rules"};
- my $rule;
- foreach $rule ( values %$rules )
- {
- next if ! $rule->{"changed"};
-
- # CHECK FOR UNDEFINED RULES
-
- my $call;
- foreach $call ( @{$rule->{"calls"}} )
- {
- if (!defined ${$rules}{$call}
- &&!defined &{"Parse::RecDescent::$call"})
- {
- if (!defined $::RD_AUTOSTUB)
- {
- _warn(3,"Undefined (sub)rule \"$call\"
- used in a production.")
- and
- _hint("Will you be providing this rule
- later, or did you perhaps
- misspell \"$call\"? Otherwise
- it will be treated as an
- immediate <reject>.");
- eval "sub $self->{namespace}::$call {undef}";
- }
- else # EXPERIMENTAL
- {
- my $rule = $::RD_AUTOSTUB || qq{'$call'};
- _warn(1,"Autogenerating rule: $call")
- and
- _hint("A call was made to a subrule
- named \"$call\", but no such
- rule was specified. However,
- since \$::RD_AUTOSTUB
- was defined, a rule stub
- ($call : $rule) was
- automatically created.");
-
- $self->_generate("$call : $rule",0,1);
- }
- }
- }
-
- # CHECK FOR LEFT RECURSION
-
- if ($rule->isleftrec($rules))
- {
- _error("Rule \"$rule->{name}\" is left-recursive.");
- _hint("Redesign the grammar so it's not left-recursive.
- That will probably mean you need to re-implement
- repetitions using the '(s)' notation.
- For example: \"$rule->{name}(s)\".");
- next;
- }
- }
-}
-
-# GENERATE ACTUAL PARSER CODE
-
-sub _code($)
-{
- my $self = shift;
- my $code = qq{
-package $self->{namespace};
-use strict;
-use vars qw(\$skip \$AUTOLOAD $self->{localvars} );
-\$skip = '$skip';
-$self->{startcode}
-
-{
-local \$SIG{__WARN__} = sub {0};
-# PRETEND TO BE IN Parse::RecDescent NAMESPACE
-*$self->{namespace}::AUTOLOAD = sub
-{
- no strict 'refs';
- \$AUTOLOAD =~ s/^$self->{namespace}/Parse::RecDescent/;
- goto &{\$AUTOLOAD};
-}
-}
-
-};
- $code .= "push \@$self->{namespace}\::ISA, 'Parse::RecDescent';";
- $self->{"startcode"} = '';
-
- my $rule;
- foreach $rule ( values %{$self->{"rules"}} )
- {
- if ($rule->{"changed"})
- {
- $code .= $rule->code($self->{"namespace"},$self);
- $rule->{"changed"} = 0;
- }
- }
-
- return $code;
-}
-
-
-# EXECUTING A PARSE....
-
-sub AUTOLOAD # ($parser, $text; $linenum, @args)
-{
- croak "Could not find method: $AUTOLOAD\n" unless ref $_[0];
- my $class = ref($_[0]) || $_[0];
- my $text = ref($_[1]) ? ${$_[1]} : $_[1];
- $_[0]->{lastlinenum} = $_[2]||_linecount($_[1]);
- $_[0]->{lastlinenum} = _linecount($_[1]);
- $_[0]->{lastlinenum} += $_[2] if @_ > 2;
- $_[0]->{offsetlinenum} = $_[0]->{lastlinenum};
- $_[0]->{fulltext} = $text;
- $_[0]->{fulltextlen} = length $text;
- $_[0]->{deferred} = [];
- $_[0]->{errors} = [];
- my @args = @_[3..$#_];
- my $args = sub { [ @args ] };
-
- $AUTOLOAD =~ s/$class/$_[0]->{namespace}/;
- no strict "refs";
-
- croak "Unknown starting rule ($AUTOLOAD) called\n"
- unless defined &$AUTOLOAD;
- my $retval = &{$AUTOLOAD}($_[0],$text,undef,undef,$args);
-
- if (defined $retval)
- {
- foreach ( @{$_[0]->{deferred}} ) { &$_; }
- }
- else
- {
- foreach ( @{$_[0]->{errors}} ) { _error(@$_); }
- }
-
- if (ref $_[1]) { ${$_[1]} = $text }
-
- $ERRORS = 0;
- return $retval;
-}
-
-sub _parserepeat($$$$$$$$$$) # RETURNS A REF TO AN ARRAY OF MATCHES
-{
- my ($parser, $text, $prod, $min, $max, $_noactions, $expectation, $argcode) = @_;
- my @tokens = ();
-
- my $reps;
- for ($reps=0; $reps<$max;)
- {
- $_[6]->at($text); # $_[6] IS $expectation FROM CALLER
- my $_savetext = $text;
- my $prevtextlen = length $text;
- my $_tok;
- if (! defined ($_tok = &$prod($parser,$text,1,$_noactions,$argcode)))
- {
- $text = $_savetext;
- last;
- }
- push @tokens, $_tok if defined $_tok;
- last if ++$reps >= $min and $prevtextlen == length $text;
- }
-
- do { $_[6]->failed(); return undef} if $reps<$min;
-
- $_[1] = $text;
- return [@tokens];
-}
-
-
-# ERROR REPORTING....
-
-my $errortext;
-my $errorprefix;
-
-open (ERROR, ">&STDERR");
-format ERROR =
-@>>>>>>>>>>>>>>>>>>>>: ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-$errorprefix, $errortext
-~~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
- $errortext
-.
-
-select ERROR;
-$| = 1;
-
-# TRACING
-
-my $tracemsg;
-my $tracecontext;
-my $tracerulename;
-use vars '$tracelevel';
-
-open (TRACE, ">&STDERR");
-format TRACE =
-@>|@|||||||||@^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<|
-$tracelevel, $tracerulename, '|', $tracemsg
- | ~~ |^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<|
- $tracemsg
-.
-
-select TRACE;
-$| = 1;
-
-open (TRACECONTEXT, ">&STDERR");
-format TRACECONTEXT =
-@>|@|||||||||@ |^<<<<<<<<<<<<<<<<<<<<<<<<<<<
-$tracelevel, $tracerulename, '|', $tracecontext
- | ~~ | |^<<<<<<<<<<<<<<<<<<<<<<<<<<<
- $tracecontext
-.
-
-
-select TRACECONTEXT;
-$| = 1;
-
-select STDOUT;
-
-sub _verbosity($)
-{
- defined $::RD_TRACE
- or defined $::RD_HINT and $_[0] =~ /ERRORS|WARN|HINT/
- or defined $::RD_WARN and $_[0] =~ /ERRORS|WARN/
- or defined $::RD_ERRORS and $_[0] =~ /ERRORS/
-}
-
-sub _error($;$)
-{
- $ERRORS++;
- return 0 if ! _verbosity("ERRORS");
- $errortext = $_[0];
- $errorprefix = "ERROR" . ($_[1] ? " (line $_[1])" : "");
- $errortext =~ s/\s+/ /g;
- print ERROR "\n" if _verbosity("WARN");
- write ERROR;
- return 1;
-}
-
-sub _warn($$;$)
-{
- return 0 unless _verbosity("WARN") && ($::RD_HINT || $_[0] >= ($::RD_WARN||1));
- $errortext = $_[1];
- $errorprefix = "Warning" . ($_[2] ? " (line $_[2])" : "");
- print ERROR "\n";
- $errortext =~ s/\s+/ /g;
- write ERROR;
- return 1;
-}
-
-sub _hint($)
-{
- return 0 unless defined $::RD_HINT;
- $errortext = "$_[0])";
- $errorprefix = "(Hint";
- $errortext =~ s/\s+/ /g;
- write ERROR;
- return 1;
-}
-
-sub _tracemax($)
-{
- if (defined $::RD_TRACE
- && $::RD_TRACE =~ /\d+/
- && $::RD_TRACE>1
- && $::RD_TRACE+10<length($_[0]))
- {
- my $count = length($_[0]) - $::RD_TRACE;
- return substr($_[0],0,$::RD_TRACE/2)
- . "...<$count>..."
- . substr($_[0],-$::RD_TRACE/2);
- }
- else
- {
- return $_[0];
- }
-}
-
-sub _tracefirst($)
-{
- if (defined $::RD_TRACE
- && $::RD_TRACE =~ /\d+/
- && $::RD_TRACE>1
- && $::RD_TRACE+10<length($_[0]))
- {
- my $count = length($_[0]) - $::RD_TRACE;
- return substr($_[0],0,$::RD_TRACE) . "...<+$count>";
- }
- else
- {
- return $_[0];
- }
-}
-
-my $lastcontext = '';
-my $lastrulename = '';
-my $lastlevel = '';
-
-sub _trace($;$$$)
-{
- $tracemsg = $_[0];
- $tracecontext = $_[1]||$lastcontext;
- $tracerulename = $_[2]||$lastrulename;
- $tracelevel = $_[3]||$lastlevel;
- if ($tracerulename) { $lastrulename = $tracerulename }
- if ($tracelevel) { $lastlevel = $tracelevel }
-
- $tracecontext =~ s/\n/\\n/g;
- $tracecontext =~ s/\s+/ /g;
- $tracerulename = qq{$tracerulename};
- write TRACE;
- if ($tracecontext ne $lastcontext)
- {
- if ($tracecontext)
- {
- $lastcontext = _tracefirst($tracecontext);
- $tracecontext = qq{"$tracecontext"};
- }
- else
- {
- $tracecontext = qq{<NO TEXT LEFT>};
- }
- write TRACECONTEXT;
- }
-}
-
-sub _parseunneg($$$$)
-{
- _parse($_[0],$_[1],$_[3]);
- if ($_[2]<0)
- {
- _error("Can't negate \"$&\".",$_[3]);
- _hint("You can't negate $_[0]. Remove the \"...!\" before
- \"$&\".");
- return 0;
- }
- return 1;
-}
-
-sub _parse($$$;$)
-{
- my $what = $_[3] || $&;
- $what =~ s/^\s+//;
- if ($_[1])
- {
- _warn(3,"Found $_[0] ($what) after an unconditional <error>",$_[2])
- and
- _hint("An unconditional <error> always causes the
- production containing it to immediately fail.
- \u$_[0] that follows an <error>
- will never be reached. Did you mean to use
- <error?> instead?");
- }
-
- return if ! _verbosity("TRACE");
- $errortext = "Treating \"$what\" as $_[0]";
- $errorprefix = "Parse::RecDescent";
- $errortext =~ s/\s+/ /g;
- write ERROR;
-}
-
-sub _linecount($) {
- scalar substr($_[0], pos $_[0]||0) =~ tr/\n//
-}
-
-
-package main;
-
-use vars qw ( $RD_ERRORS $RD_WARN $RD_HINT $RD_TRACE $RD_CHECK );
-$::RD_CHECK = 1;
-$::RD_ERRORS = 1;
-$::RD_WARN = 3;
-
-1;
-
diff --git a/lib/Template/Plugin/HTML.pm b/lib/Template/Plugin/HTML.pm
new file mode 100644
index 0000000..4c4d7f0
--- /dev/null
+++ b/lib/Template/Plugin/HTML.pm
@@ -0,0 +1,197 @@
+#============================================================= -*-Perl-*-
+#
+# Template::Plugin::HTML
+#
+# DESCRIPTION
+#
+# Template Toolkit plugin providing useful functionality for generating
+# HTML.
+#
+# AUTHOR
+# Andy Wardley <abw@kfs.org>
+#
+# COPYRIGHT
+# Copyright (C) 1996-2001 Andy Wardley. All Rights Reserved.
+# Copyright (C) 1998-2001 Canon Research Centre Europe Ltd.
+#
+# This module is free software; you can redistribute it and/or
+# modify it under the same terms as Perl itself.
+#
+#----------------------------------------------------------------------------
+#
+# $Id: HTML.pm,v 1.1 2006/10/27 10:00:31 amair Exp $
+#
+#============================================================================
+
+package Template::Plugin::HTML;
+
+require 5.004;
+
+use strict;
+use vars qw( $VERSION );
+use base qw( Template::Plugin );
+use Template::Plugin;
+
+$VERSION = sprintf("%d.%02d", q$Revision: 1.1 $ =~ /(\d+)\.(\d+)/);
+
+sub new {
+ my ($class, $context, @args) = @_;
+ my $hash = ref $args[-1] eq 'HASH' ? pop @args : { };
+ bless {
+ _SORTED => $hash->{ sorted } || 0,
+ }, $class;
+}
+
+sub element {
+ my ($self, $name, $attr) = @_;
+ ($name, $attr) = %$name if ref $name eq 'HASH';
+ return '' unless defined $name and length $name;
+ $attr = $self->attributes($attr);
+ $attr = " $attr" if $attr;
+ return "<$name$attr>";
+}
+
+sub attributes {
+ my ($self, $hash) = @_;
+ return '' unless UNIVERSAL::isa($hash, 'HASH');
+
+ my @keys = keys %$hash;
+ @keys = sort @keys if $self->{ _SORTED };
+
+ join(' ', map {
+ "$_=\"" . $self->escape( $hash->{ $_ } ) . '"';
+ } @keys);
+}
+
+sub escape {
+ my ($self, $text) = @_;
+ for ($text) {
+ s/&/&amp;/g;
+ s/</&lt;/g;
+ s/>/&gt;/g;
+ s/"/&quot;/g;
+ }
+ $text;
+}
+
+sub url {
+ my ($self, $text) = @_;
+ return undef unless defined $text;
+ $text =~ s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/eg;
+ return $text;
+}
+
+
+1;
+
+__END__
+
+
+#------------------------------------------------------------------------
+# IMPORTANT NOTE
+# This documentation is generated automatically from source
+# templates. Any changes you make here may be lost.
+#
+# The 'docsrc' documentation source bundle is available for download
+# from http://www.template-toolkit.org/docs.html and contains all
+# the source templates, XML files, scripts, etc., from which the
+# documentation for the Template Toolkit is built.
+#------------------------------------------------------------------------
+
+=head1 NAME
+
+Template::Plugin::HTML - Plugin to create HTML elements
+
+=head1 SYNOPSIS
+
+ [% USE HTML %]
+
+ [% HTML.escape("if (a < b && c > d) ..." %]
+
+ [% HTML.element(table => { border => 1, cellpadding => 2 }) %]
+
+ [% HTML.attributes(border => 1, cellpadding => 2) %]
+
+=head1 DESCRIPTION
+
+The HTML plugin is very new and very basic, implementing a few useful
+methods for generating HTML. It is likely to be extended in the future
+or integrated with a larger project to generate HTML elements in a generic
+way (as discussed recently on the mod_perl mailing list).
+
+=head1 METHODS
+
+=head2 escape(text)
+
+Returns the source text with any HTML reserved characters such as
+E<lt>, E<gt>, etc., correctly esacped to their entity equivalents.
+
+=head2 attributes(hash)
+
+Returns the elements of the hash array passed by reference correctly
+formatted (e.g. values quoted and correctly escaped) as attributes for
+an HTML element.
+
+=head2 element(type, attributes)
+
+Generates an HTML element of the specified type and with the attributes
+provided as an optional hash array reference as the second argument or
+as named arguments.
+
+ [% HTML.element(table => { border => 1, cellpadding => 2 }) %]
+ [% HTML.element('table', border=1, cellpadding=2) %]
+ [% HTML.element(table => attribs) %]
+
+=head1 DEBUGGING
+
+The HTML plugin accepts a 'sorted' option as a constructor argument
+which, when set to any true value, causes the attributes generated by
+the attributes() method (either directly or via element()) to be
+returned in sorted order. Order of attributes isn't important in
+HTML, but this is provided mainly for the purposes of debugging where
+it is useful to have attributes generated in a deterministic order
+rather than whatever order the hash happened to feel like returning
+the keys in.
+
+ [% USE HTML(sorted=1) %]
+ [% HTML.element( foo => { charlie => 1, bravo => 2, alpha => 3 } ) %]
+
+generates:
+
+ <foo alpha="3" bravo="2" charlie="1">
+
+=head1 AUTHOR
+
+Andy Wardley E<lt>abw@andywardley.comE<gt>
+
+L<http://www.andywardley.com/|http://www.andywardley.com/>
+
+
+
+
+=head1 VERSION
+
+2.57, distributed as part of the
+Template Toolkit version 2.14, released on 04 October 2004.
+
+=head1 COPYRIGHT
+
+ Copyright (C) 1996-2004 Andy Wardley. All Rights Reserved.
+ Copyright (C) 1998-2002 Canon Research Centre Europe Ltd.
+
+This module is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=head1 SEE ALSO
+
+L<Template::Plugin|Template::Plugin>
+
+=cut
+
+# Local Variables:
+# mode: perl
+# perl-indent-level: 4
+# indent-tabs-mode: nil
+# End:
+#
+# vim: expandtab shiftwidth=4:
diff --git a/lib/Template/Plugin/JavaScript.pm b/lib/Template/Plugin/JavaScript.pm
new file mode 100644
index 0000000..4a82b60
--- /dev/null
+++ b/lib/Template/Plugin/JavaScript.pm
@@ -0,0 +1,73 @@
+package Template::Plugin::JavaScript;
+
+use strict;
+use vars qw($VERSION);
+$VERSION = '0.01';
+
+require Template::Plugin;
+use base qw(Template::Plugin);
+
+use vars qw($FILTER_NAME);
+$FILTER_NAME = 'js';
+
+sub new {
+ my($self, $context, @args) = @_;
+ my $name = $args[0] || $FILTER_NAME;
+ $context->define_filter($name, \&encode_js, 0);
+ return $self;
+}
+
+sub encode_js {
+ local $_ = shift;
+ return '' unless defined $_;
+
+ s!(['"])!\\$1!g;
+ s!\n!\\n!g;
+ s!\f!\\f!g;
+ s!\r!\\r!g;
+ s!\t!\\t!g;
+ $_;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Template::Plugin::JavaScript - Encodes text to be safe in JavaScript
+
+=head1 SYNOPSIS
+
+ [% USE JavaScript %]
+ <script type="text/javascript">
+ document.write("[% sometext | js %]");
+ </script>
+
+=head1 DESCRIPTION
+
+Template::Plugin::JavaScript is a TT filter that filters text so it
+can be safely used in JavaScript quotes.
+
+ [% USE JavaScript %]
+ document.write("[% FILTER js %]
+ Here's some text going on.
+ [% END %]");
+
+will become:
+
+ document.write("\nHere\'s some text going on.\n");
+
+=head1 AUTHOR
+
+The original idea comes from Movable Type's C<encode_js> global filter.
+
+Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt>
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 SEE ALSO
+
+L<Apache::JavaScript::DocumentWrite>
+
+=cut
diff --git a/lib/Text/Balanced.pm b/lib/Text/Balanced.pm
deleted file mode 100644
index aa09c3e..0000000
--- a/lib/Text/Balanced.pm
+++ /dev/null
@@ -1,2301 +0,0 @@
-# EXTRACT VARIOUSLY DELIMITED TEXT SEQUENCES FROM STRINGS.
-# FOR FULL DOCUMENTATION SEE Balanced.pod
-
-use 5.005;
-use strict;
-
-package Text::Balanced;
-
-use Exporter;
-use SelfLoader;
-use vars qw { $VERSION @ISA %EXPORT_TAGS };
-
-$VERSION = '1.90';
-@ISA = qw ( Exporter );
-
-%EXPORT_TAGS = ( ALL => [ qw(
- &extract_delimited
- &extract_bracketed
- &extract_quotelike
- &extract_codeblock
- &extract_variable
- &extract_tagged
- &extract_multiple
-
- &gen_delimited_pat
- &gen_extract_tagged
-
- &delimited_pat
- ) ] );
-
-Exporter::export_ok_tags('ALL');
-
-# PROTOTYPES
-
-sub _match_bracketed($$$$$$);
-sub _match_variable($$);
-sub _match_codeblock($$$$$$$);
-sub _match_quotelike($$$$);
-
-# HANDLE RETURN VALUES IN VARIOUS CONTEXTS
-
-sub _failmsg {
- my ($message, $pos) = @_;
- $@ = bless { error=>$message, pos=>$pos }, "Text::Balanced::ErrorMsg";
-}
-
-sub _fail
-{
- my ($wantarray, $textref, $message, $pos) = @_;
- _failmsg $message, $pos if $message;
- return ("",$$textref,"") if $wantarray;
- return undef;
-}
-
-sub _succeed
-{
- $@ = undef;
- my ($wantarray,$textref) = splice @_, 0, 2;
- my ($extrapos, $extralen) = @_>18 ? splice(@_, -2, 2) : (0,0);
- my ($startlen) = $_[5];
- my $remainderpos = $_[2];
- if ($wantarray)
- {
- my @res;
- while (my ($from, $len) = splice @_, 0, 2)
- {
- push @res, substr($$textref,$from,$len);
- }
- if ($extralen) { # CORRECT FILLET
- my $extra = substr($res[0], $extrapos-$startlen, $extralen, "\n");
- $res[1] = "$extra$res[1]";
- eval { substr($$textref,$remainderpos,0) = $extra;
- substr($$textref,$extrapos,$extralen,"\n")} ;
- #REARRANGE HERE DOC AND FILLET IF POSSIBLE
- pos($$textref) = $remainderpos-$extralen+1; # RESET \G
- }
- else {
- pos($$textref) = $remainderpos; # RESET \G
- }
- return @res;
- }
- else
- {
- my $match = substr($$textref,$_[0],$_[1]);
- substr($match,$extrapos-$_[0]-$startlen,$extralen,"") if $extralen;
- my $extra = $extralen
- ? substr($$textref, $extrapos, $extralen)."\n" : "";
- eval {substr($$textref,$_[4],$_[1]+$_[5])=$extra} ; #CHOP OUT PREFIX & MATCH, IF POSSIBLE
- pos($$textref) = $_[4]; # RESET \G
- return $match;
- }
-}
-
-# BUILD A PATTERN MATCHING A SIMPLE DELIMITED STRING
-
-sub gen_delimited_pat($;$) # ($delimiters;$escapes)
-{
- my ($dels, $escs) = @_;
- return "" unless $dels =~ /\S/;
- $escs = '\\' unless $escs;
- $escs .= substr($escs,-1) x (length($dels)-length($escs));
- my @pat = ();
- my $i;
- for ($i=0; $i<length $dels; $i++)
- {
- my $del = quotemeta substr($dels,$i,1);
- my $esc = quotemeta substr($escs,$i,1);
- if ($del eq $esc)
- {
- push @pat, "$del(?:[^$del]*(?:(?:$del$del)[^$del]*)*)$del";
- }
- else
- {
- push @pat, "$del(?:[^$esc$del]*(?:$esc.[^$esc$del]*)*)$del";
- }
- }
- my $pat = join '|', @pat;
- return "(?:$pat)";
-}
-
-*delimited_pat = \&gen_delimited_pat;
-
-
-# THE EXTRACTION FUNCTIONS
-
-sub extract_delimited (;$$$$)
-{
- my $textref = defined $_[0] ? \$_[0] : \$_;
- my $wantarray = wantarray;
- my $del = defined $_[1] ? $_[1] : qq{\'\"\`};
- my $pre = defined $_[2] ? $_[2] : '\s*';
- my $esc = defined $_[3] ? $_[3] : qq{\\};
- my $pat = gen_delimited_pat($del, $esc);
- my $startpos = pos $$textref || 0;
- return _fail($wantarray, $textref, "Not a delimited pattern", 0)
- unless $$textref =~ m/\G($pre)($pat)/gc;
- my $prelen = length($1);
- my $matchpos = $startpos+$prelen;
- my $endpos = pos $$textref;
- return _succeed $wantarray, $textref,
- $matchpos, $endpos-$matchpos, # MATCH
- $endpos, length($$textref)-$endpos, # REMAINDER
- $startpos, $prelen; # PREFIX
-}
-
-sub extract_bracketed (;$$$)
-{
- my $textref = defined $_[0] ? \$_[0] : \$_;
- my $ldel = defined $_[1] ? $_[1] : '{([<';
- my $pre = defined $_[2] ? $_[2] : '\s*';
- my $wantarray = wantarray;
- my $qdel = "";
- my $quotelike;
- $ldel =~ s/'//g and $qdel .= q{'};
- $ldel =~ s/"//g and $qdel .= q{"};
- $ldel =~ s/`//g and $qdel .= q{`};
- $ldel =~ s/q//g and $quotelike = 1;
- $ldel =~ tr/[](){}<>\0-\377/[[(({{<</ds;
- my $rdel = $ldel;
- unless ($rdel =~ tr/[({</])}>/)
- {
- return _fail $wantarray, $textref,
- "Did not find a suitable bracket in delimiter: \"$_[1]\"",
- 0;
- }
- my $posbug = pos;
- $ldel = join('|', map { quotemeta $_ } split('', $ldel));
- $rdel = join('|', map { quotemeta $_ } split('', $rdel));
- pos = $posbug;
-
- my $startpos = pos $$textref || 0;
- my @match = _match_bracketed($textref,$pre, $ldel, $qdel, $quotelike, $rdel);
-
- return _fail ($wantarray, $textref) unless @match;
-
- return _succeed ( $wantarray, $textref,
- $match[2], $match[5]+2, # MATCH
- @match[8,9], # REMAINDER
- @match[0,1], # PREFIX
- );
-}
-
-sub _match_bracketed($$$$$$) # $textref, $pre, $ldel, $qdel, $quotelike, $rdel
-{
- my ($textref, $pre, $ldel, $qdel, $quotelike, $rdel) = @_;
- my ($startpos, $ldelpos, $endpos) = (pos $$textref = pos $$textref||0);
- unless ($$textref =~ m/\G$pre/gc)
- {
- _failmsg "Did not find prefix: /$pre/", $startpos;
- return;
- }
-
- $ldelpos = pos $$textref;
-
- unless ($$textref =~ m/\G($ldel)/gc)
- {
- _failmsg "Did not find opening bracket after prefix: \"$pre\"",
- pos $$textref;
- pos $$textref = $startpos;
- return;
- }
-
- my @nesting = ( $1 );
- my $textlen = length $$textref;
- while (pos $$textref < $textlen)
- {
- next if $$textref =~ m/\G\\./gcs;
-
- if ($$textref =~ m/\G($ldel)/gc)
- {
- push @nesting, $1;
- }
- elsif ($$textref =~ m/\G($rdel)/gc)
- {
- my ($found, $brackettype) = ($1, $1);
- if ($#nesting < 0)
- {
- _failmsg "Unmatched closing bracket: \"$found\"",
- pos $$textref;
- pos $$textref = $startpos;
- return;
- }
- my $expected = pop(@nesting);
- $expected =~ tr/({[</)}]>/;
- if ($expected ne $brackettype)
- {
- _failmsg qq{Mismatched closing bracket: expected "$expected" but found "$found"},
- pos $$textref;
- pos $$textref = $startpos;
- return;
- }
- last if $#nesting < 0;
- }
- elsif ($qdel && $$textref =~ m/\G([$qdel])/gc)
- {
- $$textref =~ m/\G[^\\$1]*(?:\\.[^\\$1]*)*(\Q$1\E)/gsc and next;
- _failmsg "Unmatched embedded quote ($1)",
- pos $$textref;
- pos $$textref = $startpos;
- return;
- }
- elsif ($quotelike && _match_quotelike($textref,"",1,0))
- {
- next;
- }
-
- else { $$textref =~ m/\G(?:[a-zA-Z0-9]+|.)/gcs }
- }
- if ($#nesting>=0)
- {
- _failmsg "Unmatched opening bracket(s): "
- . join("..",@nesting)."..",
- pos $$textref;
- pos $$textref = $startpos;
- return;
- }
-
- $endpos = pos $$textref;
-
- return (
- $startpos, $ldelpos-$startpos, # PREFIX
- $ldelpos, 1, # OPENING BRACKET
- $ldelpos+1, $endpos-$ldelpos-2, # CONTENTS
- $endpos-1, 1, # CLOSING BRACKET
- $endpos, length($$textref)-$endpos, # REMAINDER
- );
-}
-
-sub revbracket($)
-{
- my $brack = reverse $_[0];
- $brack =~ tr/[({</])}>/;
- return $brack;
-}
-
-my $XMLNAME = q{[a-zA-Z_:][a-zA-Z0-9_:.-]*};
-
-sub extract_tagged (;$$$$$) # ($text, $opentag, $closetag, $pre, \%options)
-{
- my $textref = defined $_[0] ? \$_[0] : \$_;
- my $ldel = $_[1];
- my $rdel = $_[2];
- my $pre = defined $_[3] ? $_[3] : '\s*';
- my %options = defined $_[4] ? %{$_[4]} : ();
- my $omode = defined $options{fail} ? $options{fail} : '';
- my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}})
- : defined($options{reject}) ? $options{reject}
- : ''
- ;
- my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}})
- : defined($options{ignore}) ? $options{ignore}
- : ''
- ;
-
- if (!defined $ldel) { $ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[^>])*>'; }
- $@ = undef;
-
- my @match = _match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore);
-
- return _fail(wantarray, $textref) unless @match;
- return _succeed wantarray, $textref,
- $match[2], $match[3]+$match[5]+$match[7], # MATCH
- @match[8..9,0..1,2..7]; # REM, PRE, BITS
-}
-
-sub _match_tagged # ($$$$$$$)
-{
- my ($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore) = @_;
- my $rdelspec;
-
- my ($startpos, $opentagpos, $textpos, $parapos, $closetagpos, $endpos) = ( pos($$textref) = pos($$textref)||0 );
-
- unless ($$textref =~ m/\G($pre)/gc)
- {
- _failmsg "Did not find prefix: /$pre/", pos $$textref;
- goto failed;
- }
-
- $opentagpos = pos($$textref);
-
- unless ($$textref =~ m/\G$ldel/gc)
- {
- _failmsg "Did not find opening tag: /$ldel/", pos $$textref;
- goto failed;
- }
-
- $textpos = pos($$textref);
-
- if (!defined $rdel)
- {
- $rdelspec = $&;
- unless ($rdelspec =~ s/\A([[(<{]+)($XMLNAME).*/ quotemeta "$1\/$2". revbracket($1) /oes)
- {
- _failmsg "Unable to construct closing tag to match: $rdel",
- pos $$textref;
- goto failed;
- }
- }
- else
- {
- $rdelspec = eval "qq{$rdel}" || do {
- my $del;
- for (qw,~ ! ^ & * ) _ + - = } ] : " ; ' > . ? / | ',)
- { next if $rdel =~ /\Q$_/; $del = $_; last }
- unless ($del) {
- use Carp;
- croak "Can't interpolate right delimiter $rdel"
- }
- eval "qq$del$rdel$del";
- };
- }
-
- while (pos($$textref) < length($$textref))
- {
- next if $$textref =~ m/\G\\./gc;
-
- if ($$textref =~ m/\G(\n[ \t]*\n)/gc )
- {
- $parapos = pos($$textref) - length($1)
- unless defined $parapos;
- }
- elsif ($$textref =~ m/\G($rdelspec)/gc )
- {
- $closetagpos = pos($$textref)-length($1);
- goto matched;
- }
- elsif ($ignore && $$textref =~ m/\G(?:$ignore)/gc)
- {
- next;
- }
- elsif ($bad && $$textref =~ m/\G($bad)/gcs)
- {
- pos($$textref) -= length($1); # CUT OFF WHATEVER CAUSED THE SHORTNESS
- goto short if ($omode eq 'PARA' || $omode eq 'MAX');
- _failmsg "Found invalid nested tag: $1", pos $$textref;
- goto failed;
- }
- elsif ($$textref =~ m/\G($ldel)/gc)
- {
- my $tag = $1;
- pos($$textref) -= length($tag); # REWIND TO NESTED TAG
- unless (_match_tagged(@_)) # MATCH NESTED TAG
- {
- goto short if $omode eq 'PARA' || $omode eq 'MAX';
- _failmsg "Found unbalanced nested tag: $tag",
- pos $$textref;
- goto failed;
- }
- }
- else { $$textref =~ m/./gcs }
- }
-
-short:
- $closetagpos = pos($$textref);
- goto matched if $omode eq 'MAX';
- goto failed unless $omode eq 'PARA';
-
- if (defined $parapos) { pos($$textref) = $parapos }
- else { $parapos = pos($$textref) }
-
- return (
- $startpos, $opentagpos-$startpos, # PREFIX
- $opentagpos, $textpos-$opentagpos, # OPENING TAG
- $textpos, $parapos-$textpos, # TEXT
- $parapos, 0, # NO CLOSING TAG
- $parapos, length($$textref)-$parapos, # REMAINDER
- );
-
-matched:
- $endpos = pos($$textref);
- return (
- $startpos, $opentagpos-$startpos, # PREFIX
- $opentagpos, $textpos-$opentagpos, # OPENING TAG
- $textpos, $closetagpos-$textpos, # TEXT
- $closetagpos, $endpos-$closetagpos, # CLOSING TAG
- $endpos, length($$textref)-$endpos, # REMAINDER
- );
-
-failed:
- _failmsg "Did not find closing tag", pos $$textref unless $@;
- pos($$textref) = $startpos;
- return;
-}
-
-sub extract_variable (;$$)
-{
- my $textref = defined $_[0] ? \$_[0] : \$_;
- return ("","","") unless defined $$textref;
- my $pre = defined $_[1] ? $_[1] : '\s*';
-
- my @match = _match_variable($textref,$pre);
-
- return _fail wantarray, $textref unless @match;
-
- return _succeed wantarray, $textref,
- @match[2..3,4..5,0..1]; # MATCH, REMAINDER, PREFIX
-}
-
-sub _match_variable($$)
-{
-# $#
-# $^
-# $$
- my ($textref, $pre) = @_;
- my $startpos = pos($$textref) = pos($$textref)||0;
- unless ($$textref =~ m/\G($pre)/gc)
- {
- _failmsg "Did not find prefix: /$pre/", pos $$textref;
- return;
- }
- my $varpos = pos($$textref);
- unless ($$textref =~ m{\G\$\s*(?!::)(\d+|[][&`'+*./|,";%=~:?!\@<>()-]|\^[a-z]?)}gci)
- {
- unless ($$textref =~ m/\G((\$#?|[*\@\%]|\\&)+)/gc)
- {
- _failmsg "Did not find leading dereferencer", pos $$textref;
- pos $$textref = $startpos;
- return;
- }
- my $deref = $1;
-
- unless ($$textref =~ m/\G\s*(?:::|')?(?:[_a-z]\w*(?:::|'))*[_a-z]\w*/gci
- or _match_codeblock($textref, "", '\{', '\}', '\{', '\}', 0)
- or $deref eq '$#' or $deref eq '$$' )
- {
- _failmsg "Bad identifier after dereferencer", pos $$textref;
- pos $$textref = $startpos;
- return;
- }
- }
-
- while (1)
- {
- next if $$textref =~ m/\G\s*(?:->)?\s*[{]\w+[}]/gc;
- next if _match_codeblock($textref,
- qr/\s*->\s*(?:[_a-zA-Z]\w+\s*)?/,
- qr/[({[]/, qr/[)}\]]/,
- qr/[({[]/, qr/[)}\]]/, 0);
- next if _match_codeblock($textref,
- qr/\s*/, qr/[{[]/, qr/[}\]]/,
- qr/[{[]/, qr/[}\]]/, 0);
- next if _match_variable($textref,'\s*->\s*');
- next if $$textref =~ m/\G\s*->\s*\w+(?![{([])/gc;
- last;
- }
-
- my $endpos = pos($$textref);
- return ($startpos, $varpos-$startpos,
- $varpos, $endpos-$varpos,
- $endpos, length($$textref)-$endpos
- );
-}
-
-sub extract_codeblock (;$$$$$)
-{
- my $textref = defined $_[0] ? \$_[0] : \$_;
- my $wantarray = wantarray;
- my $ldel_inner = defined $_[1] ? $_[1] : '{';
- my $pre = defined $_[2] ? $_[2] : '\s*';
- my $ldel_outer = defined $_[3] ? $_[3] : $ldel_inner;
- my $rd = $_[4];
- my $rdel_inner = $ldel_inner;
- my $rdel_outer = $ldel_outer;
- my $posbug = pos;
- for ($ldel_inner, $ldel_outer) { tr/[]()<>{}\0-\377/[[((<<{{/ds }
- for ($rdel_inner, $rdel_outer) { tr/[]()<>{}\0-\377/]]))>>}}/ds }
- for ($ldel_inner, $ldel_outer, $rdel_inner, $rdel_outer)
- {
- $_ = '('.join('|',map { quotemeta $_ } split('',$_)).')'
- }
- pos = $posbug;
-
- my @match = _match_codeblock($textref, $pre,
- $ldel_outer, $rdel_outer,
- $ldel_inner, $rdel_inner,
- $rd);
- return _fail($wantarray, $textref) unless @match;
- return _succeed($wantarray, $textref,
- @match[2..3,4..5,0..1] # MATCH, REMAINDER, PREFIX
- );
-
-}
-
-sub _match_codeblock($$$$$$$)
-{
- my ($textref, $pre, $ldel_outer, $rdel_outer, $ldel_inner, $rdel_inner, $rd) = @_;
- my $startpos = pos($$textref) = pos($$textref) || 0;
- unless ($$textref =~ m/\G($pre)/gc)
- {
- _failmsg qq{Did not match prefix /$pre/ at"} .
- substr($$textref,pos($$textref),20) .
- q{..."},
- pos $$textref;
- return;
- }
- my $codepos = pos($$textref);
- unless ($$textref =~ m/\G($ldel_outer)/gc) # OUTERMOST DELIMITER
- {
- _failmsg qq{Did not find expected opening bracket at "} .
- substr($$textref,pos($$textref),20) .
- q{..."},
- pos $$textref;
- pos $$textref = $startpos;
- return;
- }
- my $closing = $1;
- $closing =~ tr/([<{/)]>}/;
- my $matched;
- my $patvalid = 1;
- while (pos($$textref) < length($$textref))
- {
- $matched = '';
- if ($rd && $$textref =~ m#\G(\Q(?)\E|\Q(s?)\E|\Q(s)\E)#gc)
- {
- $patvalid = 0;
- next;
- }
-
- if ($$textref =~ m/\G\s*#.*/gc)
- {
- next;
- }
-
- if ($$textref =~ m/\G\s*($rdel_outer)/gc)
- {
- unless ($matched = ($closing && $1 eq $closing) )
- {
- next if $1 eq '>'; # MIGHT BE A "LESS THAN"
- _failmsg q{Mismatched closing bracket at "} .
- substr($$textref,pos($$textref),20) .
- qq{...". Expected '$closing'},
- pos $$textref;
- }
- last;
- }
-
- if (_match_variable($textref,'\s*') ||
- _match_quotelike($textref,'\s*',$patvalid,$patvalid) )
- {
- $patvalid = 0;
- next;
- }
-
-
- # NEED TO COVER MANY MORE CASES HERE!!!
- if ($$textref =~ m#\G\s*( [-+*x/%^&|.]=?
- | [!=]~
- | =(?!>)
- | (\*\*|&&|\|\||<<|>>)=?
- | split|grep|map|return
- | [([]
- )#gcx)
- {
- $patvalid = 1;
- next;
- }
-
- if ( _match_codeblock($textref, '\s*', $ldel_inner, $rdel_inner, $ldel_inner, $rdel_inner, $rd) )
- {
- $patvalid = 1;
- next;
- }
-
- if ($$textref =~ m/\G\s*$ldel_outer/gc)
- {
- _failmsg q{Improperly nested codeblock at "} .
- substr($$textref,pos($$textref),20) .
- q{..."},
- pos $$textref;
- last;
- }
-
- $patvalid = 0;
- $$textref =~ m/\G\s*(\w+|[-=>]>|.|\Z)/gc;
- }
- continue { $@ = undef }
-
- unless ($matched)
- {
- _failmsg 'No match found for opening bracket', pos $$textref
- unless $@;
- return;
- }
-
- my $endpos = pos($$textref);
- return ( $startpos, $codepos-$startpos,
- $codepos, $endpos-$codepos,
- $endpos, length($$textref)-$endpos,
- );
-}
-
-
-my %mods = (
- 'none' => '[cgimsox]*',
- 'm' => '[cgimsox]*',
- 's' => '[cegimsox]*',
- 'tr' => '[cds]*',
- 'y' => '[cds]*',
- 'qq' => '',
- 'qx' => '',
- 'qw' => '',
- 'qr' => '[imsx]*',
- 'q' => '',
- );
-
-sub extract_quotelike (;$$)
-{
- my $textref = $_[0] ? \$_[0] : \$_;
- my $wantarray = wantarray;
- my $pre = defined $_[1] ? $_[1] : '\s*';
-
- my @match = _match_quotelike($textref,$pre,1,0);
- return _fail($wantarray, $textref) unless @match;
- return _succeed($wantarray, $textref,
- $match[2], $match[18]-$match[2], # MATCH
- @match[18,19], # REMAINDER
- @match[0,1], # PREFIX
- @match[2..17], # THE BITS
- @match[20,21], # ANY FILLET?
- );
-};
-
-sub _match_quotelike($$$$) # ($textref, $prepat, $allow_raw_match)
-{
- my ($textref, $pre, $rawmatch, $qmark) = @_;
-
- my ($textlen,$startpos,
- $oppos,
- $preld1pos,$ld1pos,$str1pos,$rd1pos,
- $preld2pos,$ld2pos,$str2pos,$rd2pos,
- $modpos) = ( length($$textref), pos($$textref) = pos($$textref) || 0 );
-
- unless ($$textref =~ m/\G($pre)/gc)
- {
- _failmsg qq{Did not find prefix /$pre/ at "} .
- substr($$textref, pos($$textref), 20) .
- q{..."},
- pos $$textref;
- return;
- }
- $oppos = pos($$textref);
-
- my $initial = substr($$textref,$oppos,1);
-
- if ($initial && $initial =~ m|^[\"\'\`]|
- || $rawmatch && $initial =~ m|^/|
- || $qmark && $initial =~ m|^\?|)
- {
- unless ($$textref =~ m/ \Q$initial\E [^\\$initial]* (\\.[^\\$initial]*)* \Q$initial\E /gcsx)
- {
- _failmsg qq{Did not find closing delimiter to match '$initial' at "} .
- substr($$textref, $oppos, 20) .
- q{..."},
- pos $$textref;
- pos $$textref = $startpos;
- return;
- }
- $modpos= pos($$textref);
- $rd1pos = $modpos-1;
-
- if ($initial eq '/' || $initial eq '?')
- {
- $$textref =~ m/\G$mods{none}/gc
- }
-
- my $endpos = pos($$textref);
- return (
- $startpos, $oppos-$startpos, # PREFIX
- $oppos, 0, # NO OPERATOR
- $oppos, 1, # LEFT DEL
- $oppos+1, $rd1pos-$oppos-1, # STR/PAT
- $rd1pos, 1, # RIGHT DEL
- $modpos, 0, # NO 2ND LDEL
- $modpos, 0, # NO 2ND STR
- $modpos, 0, # NO 2ND RDEL
- $modpos, $endpos-$modpos, # MODIFIERS
- $endpos, $textlen-$endpos, # REMAINDER
- );
- }
-
- unless ($$textref =~ m{\G((?:m|s|qq|qx|qw|q|qr|tr|y)\b(?=\s*\S)|<<)}gc)
- {
- _failmsg q{No quotelike operator found after prefix at "} .
- substr($$textref, pos($$textref), 20) .
- q{..."},
- pos $$textref;
- pos $$textref = $startpos;
- return;
- }
-
- my $op = $1;
- $preld1pos = pos($$textref);
- if ($op eq '<<') {
- $ld1pos = pos($$textref);
- my $label;
- if ($$textref =~ m{\G([A-Za-z_]\w*)}gc) {
- $label = $1;
- }
- elsif ($$textref =~ m{ \G ' ([^'\\]* (?:\\.[^'\\]*)*) '
- | \G " ([^"\\]* (?:\\.[^"\\]*)*) "
- | \G ` ([^`\\]* (?:\\.[^`\\]*)*) `
- }gcsx) {
- $label = $+;
- }
- else {
- $label = "";
- }
- my $extrapos = pos($$textref);
- $$textref =~ m{.*\n}gc;
- $str1pos = pos($$textref);
- unless ($$textref =~ m{.*?\n(?=$label\n)}gc) {
- _failmsg qq{Missing here doc terminator ('$label') after "} .
- substr($$textref, $startpos, 20) .
- q{..."},
- pos $$textref;
- pos $$textref = $startpos;
- return;
- }
- $rd1pos = pos($$textref);
- $$textref =~ m{$label\n}gc;
- $ld2pos = pos($$textref);
- return (
- $startpos, $oppos-$startpos, # PREFIX
- $oppos, length($op), # OPERATOR
- $ld1pos, $extrapos-$ld1pos, # LEFT DEL
- $str1pos, $rd1pos-$str1pos, # STR/PAT
- $rd1pos, $ld2pos-$rd1pos, # RIGHT DEL
- $ld2pos, 0, # NO 2ND LDEL
- $ld2pos, 0, # NO 2ND STR
- $ld2pos, 0, # NO 2ND RDEL
- $ld2pos, 0, # NO MODIFIERS
- $ld2pos, $textlen-$ld2pos, # REMAINDER
- $extrapos, $str1pos-$extrapos, # FILLETED BIT
- );
- }
-
- $$textref =~ m/\G\s*/gc;
- $ld1pos = pos($$textref);
- $str1pos = $ld1pos+1;
-
- unless ($$textref =~ m/\G(\S)/gc) # SHOULD USE LOOKAHEAD
- {
- _failmsg "No block delimiter found after quotelike $op",
- pos $$textref;
- pos $$textref = $startpos;
- return;
- }
- pos($$textref) = $ld1pos; # HAVE TO DO THIS BECAUSE LOOKAHEAD BROKEN
- my ($ldel1, $rdel1) = ("\Q$1","\Q$1");
- if ($ldel1 =~ /[[(<{]/)
- {
- $rdel1 =~ tr/[({</])}>/;
- _match_bracketed($textref,"",$ldel1,"","",$rdel1)
- || do { pos $$textref = $startpos; return };
- }
- else
- {
- $$textref =~ /$ldel1[^\\$ldel1]*(\\.[^\\$ldel1]*)*$ldel1/gcs
- || do { pos $$textref = $startpos; return };
- }
- $ld2pos = $rd1pos = pos($$textref)-1;
-
- my $second_arg = $op =~ /s|tr|y/ ? 1 : 0;
- if ($second_arg)
- {
- my ($ldel2, $rdel2);
- if ($ldel1 =~ /[[(<{]/)
- {
- unless ($$textref =~ /\G\s*(\S)/gc) # SHOULD USE LOOKAHEAD
- {
- _failmsg "Missing second block for quotelike $op",
- pos $$textref;
- pos $$textref = $startpos;
- return;
- }
- $ldel2 = $rdel2 = "\Q$1";
- $rdel2 =~ tr/[({</])}>/;
- }
- else
- {
- $ldel2 = $rdel2 = $ldel1;
- }
- $str2pos = $ld2pos+1;
-
- if ($ldel2 =~ /[[(<{]/)
- {
- pos($$textref)--; # OVERCOME BROKEN LOOKAHEAD
- _match_bracketed($textref,"",$ldel2,"","",$rdel2)
- || do { pos $$textref = $startpos; return };
- }
- else
- {
- $$textref =~ /[^\\$ldel2]*(\\.[^\\$ldel2]*)*$ldel2/gcs
- || do { pos $$textref = $startpos; return };
- }
- $rd2pos = pos($$textref)-1;
- }
- else
- {
- $ld2pos = $str2pos = $rd2pos = $rd1pos;
- }
-
- $modpos = pos $$textref;
-
- $$textref =~ m/\G($mods{$op})/gc;
- my $endpos = pos $$textref;
-
- return (
- $startpos, $oppos-$startpos, # PREFIX
- $oppos, length($op), # OPERATOR
- $ld1pos, 1, # LEFT DEL
- $str1pos, $rd1pos-$str1pos, # STR/PAT
- $rd1pos, 1, # RIGHT DEL
- $ld2pos, $second_arg, # 2ND LDEL (MAYBE)
- $str2pos, $rd2pos-$str2pos, # 2ND STR (MAYBE)
- $rd2pos, $second_arg, # 2ND RDEL (MAYBE)
- $modpos, $endpos-$modpos, # MODIFIERS
- $endpos, $textlen-$endpos, # REMAINDER
- );
-}
-
-my $def_func =
-[
- sub { extract_variable($_[0], '') },
- sub { extract_quotelike($_[0],'') },
- sub { extract_codeblock($_[0],'{}','') },
-];
-
-sub extract_multiple (;$$$$) # ($text, $functions_ref, $max_fields, $ignoreunknown)
-{
- my $textref = defined($_[0]) ? \$_[0] : \$_;
- my $posbug = pos;
- my ($lastpos, $firstpos);
- my @fields = ();
-
- #for ($$textref)
- {
- my @func = defined $_[1] ? @{$_[1]} : @{$def_func};
- my $max = defined $_[2] && $_[2]>0 ? $_[2] : 1_000_000_000;
- my $igunk = $_[3];
-
- pos $$textref ||= 0;
-
- unless (wantarray)
- {
- use Carp;
- carp "extract_multiple reset maximal count to 1 in scalar context"
- if $^W && defined($_[2]) && $max > 1;
- $max = 1
- }
-
- my $unkpos;
- my $func;
- my $class;
-
- my @class;
- foreach $func ( @func )
- {
- if (ref($func) eq 'HASH')
- {
- push @class, (keys %$func)[0];
- $func = (values %$func)[0];
- }
- else
- {
- push @class, undef;
- }
- }
-
- FIELD: while (pos($$textref) < length($$textref))
- {
- my ($field, $rem);
- my @bits;
- foreach my $i ( 0..$#func )
- {
- my $pref;
- $func = $func[$i];
- $class = $class[$i];
- $lastpos = pos $$textref;
- if (ref($func) eq 'CODE')
- { ($field,$rem,$pref) = @bits = $func->($$textref);
- # print "[$field|$rem]" if $field;
- }
- elsif (ref($func) eq 'Text::Balanced::Extractor')
- { @bits = $field = $func->extract($$textref) }
- elsif( $$textref =~ m/\G$func/gc )
- { @bits = $field = defined($1) ? $1 : $& }
- $pref ||= "";
- if (defined($field) && length($field))
- {
- if (!$igunk) {
- $unkpos = pos $$textref
- if length($pref) && !defined($unkpos);
- if (defined $unkpos)
- {
- push @fields, substr($$textref, $unkpos, $lastpos-$unkpos).$pref;
- $firstpos = $unkpos unless defined $firstpos;
- undef $unkpos;
- last FIELD if @fields == $max;
- }
- }
- push @fields, $class
- ? bless (\$field, $class)
- : $field;
- $firstpos = $lastpos unless defined $firstpos;
- $lastpos = pos $$textref;
- last FIELD if @fields == $max;
- next FIELD;
- }
- }
- if ($$textref =~ /\G(.)/gcs)
- {
- $unkpos = pos($$textref)-1
- unless $igunk || defined $unkpos;
- }
- }
-
- if (defined $unkpos)
- {
- push @fields, substr($$textref, $unkpos);
- $firstpos = $unkpos unless defined $firstpos;
- $lastpos = length $$textref;
- }
- last;
- }
-
- pos $$textref = $lastpos;
- return @fields if wantarray;
-
- $firstpos ||= 0;
- eval { substr($$textref,$firstpos,$lastpos-$firstpos)="";
- pos $$textref = $firstpos };
- return $fields[0];
-}
-
-
-sub gen_extract_tagged # ($opentag, $closetag, $pre, \%options)
-{
- my $ldel = $_[0];
- my $rdel = $_[1];
- my $pre = defined $_[2] ? $_[2] : '\s*';
- my %options = defined $_[3] ? %{$_[3]} : ();
- my $omode = defined $options{fail} ? $options{fail} : '';
- my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}})
- : defined($options{reject}) ? $options{reject}
- : ''
- ;
- my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}})
- : defined($options{ignore}) ? $options{ignore}
- : ''
- ;
-
- if (!defined $ldel) { $ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[^>])*>'; }
-
- my $posbug = pos;
- for ($ldel, $pre, $bad, $ignore) { $_ = qr/$_/ if $_ }
- pos = $posbug;
-
- my $closure = sub
- {
- my $textref = defined $_[0] ? \$_[0] : \$_;
- my @match = Text::Balanced::_match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore);
-
- return _fail(wantarray, $textref) unless @match;
- return _succeed wantarray, $textref,
- $match[2], $match[3]+$match[5]+$match[7], # MATCH
- @match[8..9,0..1,2..7]; # REM, PRE, BITS
- };
-
- bless $closure, 'Text::Balanced::Extractor';
-}
-
-package Text::Balanced::Extractor;
-
-sub extract($$) # ($self, $text)
-{
- &{$_[0]}($_[1]);
-}
-
-package Text::Balanced::ErrorMsg;
-
-use overload '""' => sub { "$_[0]->{error}, detected at offset $_[0]->{pos}" };
-
-1;
-
-__END__
-
-=head1 NAME
-
-Text::Balanced - Extract delimited text sequences from strings.
-
-
-=head1 SYNOPSIS
-
- use Text::Balanced qw (
- extract_delimited
- extract_bracketed
- extract_quotelike
- extract_codeblock
- extract_variable
- extract_tagged
- extract_multiple
-
- gen_delimited_pat
- gen_extract_tagged
- );
-
- # Extract the initial substring of $text that is delimited by
- # two (unescaped) instances of the first character in $delim.
-
- ($extracted, $remainder) = extract_delimited($text,$delim);
-
-
- # Extract the initial substring of $text that is bracketed
- # with a delimiter(s) specified by $delim (where the string
- # in $delim contains one or more of '(){}[]<>').
-
- ($extracted, $remainder) = extract_bracketed($text,$delim);
-
-
- # Extract the initial substring of $text that is bounded by
- # an XML tag.
-
- ($extracted, $remainder) = extract_tagged($text);
-
-
- # Extract the initial substring of $text that is bounded by
- # a C<BEGIN>...C<END> pair. Don't allow nested C<BEGIN> tags
-
- ($extracted, $remainder) =
- extract_tagged($text,"BEGIN","END",undef,{bad=>["BEGIN"]});
-
-
- # Extract the initial substring of $text that represents a
- # Perl "quote or quote-like operation"
-
- ($extracted, $remainder) = extract_quotelike($text);
-
-
- # Extract the initial substring of $text that represents a block
- # of Perl code, bracketed by any of character(s) specified by $delim
- # (where the string $delim contains one or more of '(){}[]<>').
-
- ($extracted, $remainder) = extract_codeblock($text,$delim);
-
-
- # Extract the initial substrings of $text that would be extracted by
- # one or more sequential applications of the specified functions
- # or regular expressions
-
- @extracted = extract_multiple($text,
- [ \&extract_bracketed,
- \&extract_quotelike,
- \&some_other_extractor_sub,
- qr/[xyz]*/,
- 'literal',
- ]);
-
-# Create a string representing an optimized pattern (a la Friedl)
-# that matches a substring delimited by any of the specified characters
-# (in this case: any type of quote or a slash)
-
- $patstring = gen_delimited_pat(q{'"`/});
-
-
-# Generate a reference to an anonymous sub that is just like extract_tagged
-# but pre-compiled and optimized for a specific pair of tags, and consequently
-# much faster (i.e. 3 times faster). It uses qr// for better performance on
-# repeated calls, so it only works under Perl 5.005 or later.
-
- $extract_head = gen_extract_tagged('<HEAD>','</HEAD>');
-
- ($extracted, $remainder) = $extract_head->($text);
-
-
-=head1 DESCRIPTION
-
-The various C<extract_...> subroutines may be used to
-extract a delimited substring, possibly after skipping a
-specified prefix string. By default, that prefix is
-optional whitespace (C</\s*/>), but you can change it to whatever
-you wish (see below).
-
-The substring to be extracted must appear at the
-current C<pos> location of the string's variable
-(or at index zero, if no C<pos> position is defined).
-In other words, the C<extract_...> subroutines I<don't>
-extract the first occurance of a substring anywhere
-in a string (like an unanchored regex would). Rather,
-they extract an occurance of the substring appearing
-immediately at the current matching position in the
-string (like a C<\G>-anchored regex would).
-
-
-
-=head2 General behaviour in list contexts
-
-In a list context, all the subroutines return a list, the first three
-elements of which are always:
-
-=over 4
-
-=item [0]
-
-The extracted string, including the specified delimiters.
-If the extraction fails an empty string is returned.
-
-=item [1]
-
-The remainder of the input string (i.e. the characters after the
-extracted string). On failure, the entire string is returned.
-
-=item [2]
-
-The skipped prefix (i.e. the characters before the extracted string).
-On failure, the empty string is returned.
-
-=back
-
-Note that in a list context, the contents of the original input text (the first
-argument) are not modified in any way.
-
-However, if the input text was passed in a variable, that variable's
-C<pos> value is updated to point at the first character after the
-extracted text. That means that in a list context the various
-subroutines can be used much like regular expressions. For example:
-
- while ( $next = (extract_quotelike($text))[0] )
- {
- # process next quote-like (in $next)
- }
-
-
-=head2 General behaviour in scalar and void contexts
-
-In a scalar context, the extracted string is returned, having first been
-removed from the input text. Thus, the following code also processes
-each quote-like operation, but actually removes them from $text:
-
- while ( $next = extract_quotelike($text) )
- {
- # process next quote-like (in $next)
- }
-
-Note that if the input text is a read-only string (i.e. a literal),
-no attempt is made to remove the extracted text.
-
-In a void context the behaviour of the extraction subroutines is
-exactly the same as in a scalar context, except (of course) that the
-extracted substring is not returned.
-
-=head2 A note about prefixes
-
-Prefix patterns are matched without any trailing modifiers (C</gimsox> etc.)
-This can bite you if you're expecting a prefix specification like
-'.*?(?=<H1>)' to skip everything up to the first <H1> tag. Such a prefix
-pattern will only succeed if the <H1> tag is on the current line, since
-. normally doesn't match newlines.
-
-To overcome this limitation, you need to turn on /s matching within
-the prefix pattern, using the C<(?s)> directive: '(?s).*?(?=<H1>)'
-
-
-=head2 C<extract_delimited>
-
-The C<extract_delimited> function formalizes the common idiom
-of extracting a single-character-delimited substring from the start of
-a string. For example, to extract a single-quote delimited string, the
-following code is typically used:
-
- ($remainder = $text) =~ s/\A('(\\.|[^'])*')//s;
- $extracted = $1;
-
-but with C<extract_delimited> it can be simplified to:
-
- ($extracted,$remainder) = extract_delimited($text, "'");
-
-C<extract_delimited> takes up to four scalars (the input text, the
-delimiters, a prefix pattern to be skipped, and any escape characters)
-and extracts the initial substring of the text that
-is appropriately delimited. If the delimiter string has multiple
-characters, the first one encountered in the text is taken to delimit
-the substring.
-The third argument specifies a prefix pattern that is to be skipped
-(but must be present!) before the substring is extracted.
-The final argument specifies the escape character to be used for each
-delimiter.
-
-All arguments are optional. If the escape characters are not specified,
-every delimiter is escaped with a backslash (C<\>).
-If the prefix is not specified, the
-pattern C<'\s*'> - optional whitespace - is used. If the delimiter set
-is also not specified, the set C</["'`]/> is used. If the text to be processed
-is not specified either, C<$_> is used.
-
-In list context, C<extract_delimited> returns a array of three
-elements, the extracted substring (I<including the surrounding
-delimiters>), the remainder of the text, and the skipped prefix (if
-any). If a suitable delimited substring is not found, the first
-element of the array is the empty string, the second is the complete
-original text, and the prefix returned in the third element is an
-empty string.
-
-In a scalar context, just the extracted substring is returned. In
-a void context, the extracted substring (and any prefix) are simply
-removed from the beginning of the first argument.
-
-Examples:
-
- # Remove a single-quoted substring from the very beginning of $text:
-
- $substring = extract_delimited($text, "'", '');
-
- # Remove a single-quoted Pascalish substring (i.e. one in which
- # doubling the quote character escapes it) from the very
- # beginning of $text:
-
- $substring = extract_delimited($text, "'", '', "'");
-
- # Extract a single- or double- quoted substring from the
- # beginning of $text, optionally after some whitespace
- # (note the list context to protect $text from modification):
-
- ($substring) = extract_delimited $text, q{"'};
-
-
- # Delete the substring delimited by the first '/' in $text:
-
- $text = join '', (extract_delimited($text,'/','[^/]*')[2,1];
-
-Note that this last example is I<not> the same as deleting the first
-quote-like pattern. For instance, if C<$text> contained the string:
-
- "if ('./cmd' =~ m/$UNIXCMD/s) { $cmd = $1; }"
-
-then after the deletion it would contain:
-
- "if ('.$UNIXCMD/s) { $cmd = $1; }"
-
-not:
-
- "if ('./cmd' =~ ms) { $cmd = $1; }"
-
-
-See L<"extract_quotelike"> for a (partial) solution to this problem.
-
-
-=head2 C<extract_bracketed>
-
-Like C<"extract_delimited">, the C<extract_bracketed> function takes
-up to three optional scalar arguments: a string to extract from, a delimiter
-specifier, and a prefix pattern. As before, a missing prefix defaults to
-optional whitespace and a missing text defaults to C<$_>. However, a missing
-delimiter specifier defaults to C<'{}()[]E<lt>E<gt>'> (see below).
-
-C<extract_bracketed> extracts a balanced-bracket-delimited
-substring (using any one (or more) of the user-specified delimiter
-brackets: '(..)', '{..}', '[..]', or '<..>'). Optionally it will also
-respect quoted unbalanced brackets (see below).
-
-A "delimiter bracket" is a bracket in list of delimiters passed as
-C<extract_bracketed>'s second argument. Delimiter brackets are
-specified by giving either the left or right (or both!) versions
-of the required bracket(s). Note that the order in which
-two or more delimiter brackets are specified is not significant.
-
-A "balanced-bracket-delimited substring" is a substring bounded by
-matched brackets, such that any other (left or right) delimiter
-bracket I<within> the substring is also matched by an opposite
-(right or left) delimiter bracket I<at the same level of nesting>. Any
-type of bracket not in the delimiter list is treated as an ordinary
-character.
-
-In other words, each type of bracket specified as a delimiter must be
-balanced and correctly nested within the substring, and any other kind of
-("non-delimiter") bracket in the substring is ignored.
-
-For example, given the string:
-
- $text = "{ an '[irregularly :-(] {} parenthesized >:-)' string }";
-
-then a call to C<extract_bracketed> in a list context:
-
- @result = extract_bracketed( $text, '{}' );
-
-would return:
-
- ( "{ an '[irregularly :-(] {} parenthesized >:-)' string }" , "" , "" )
-
-since both sets of C<'{..}'> brackets are properly nested and evenly balanced.
-(In a scalar context just the first element of the array would be returned. In
-a void context, C<$text> would be replaced by an empty string.)
-
-Likewise the call in:
-
- @result = extract_bracketed( $text, '{[' );
-
-would return the same result, since all sets of both types of specified
-delimiter brackets are correctly nested and balanced.
-
-However, the call in:
-
- @result = extract_bracketed( $text, '{([<' );
-
-would fail, returning:
-
- ( undef , "{ an '[irregularly :-(] {} parenthesized >:-)' string }" );
-
-because the embedded pairs of C<'(..)'>s and C<'[..]'>s are "cross-nested" and
-the embedded C<'E<gt>'> is unbalanced. (In a scalar context, this call would
-return an empty string. In a void context, C<$text> would be unchanged.)
-
-Note that the embedded single-quotes in the string don't help in this
-case, since they have not been specified as acceptable delimiters and are
-therefore treated as non-delimiter characters (and ignored).
-
-However, if a particular species of quote character is included in the
-delimiter specification, then that type of quote will be correctly handled.
-for example, if C<$text> is:
-
- $text = '<A HREF=">>>>">link</A>';
-
-then
-
- @result = extract_bracketed( $text, '<">' );
-
-returns:
-
- ( '<A HREF=">>>>">', 'link</A>', "" )
-
-as expected. Without the specification of C<"> as an embedded quoter:
-
- @result = extract_bracketed( $text, '<>' );
-
-the result would be:
-
- ( '<A HREF=">', '>>>">link</A>', "" )
-
-In addition to the quote delimiters C<'>, C<">, and C<`>, full Perl quote-like
-quoting (i.e. q{string}, qq{string}, etc) can be specified by including the
-letter 'q' as a delimiter. Hence:
-
- @result = extract_bracketed( $text, '<q>' );
-
-would correctly match something like this:
-
- $text = '<leftop: conj /and/ conj>';
-
-See also: C<"extract_quotelike"> and C<"extract_codeblock">.
-
-
-=head2 C<extract_variable>
-
-C<extract_variable> extracts any valid Perl variable or
-variable-involved expression, including scalars, arrays, hashes, array
-accesses, hash look-ups, method calls through objects, subroutine calles
-through subroutine references, etc.
-
-The subroutine takes up to two optional arguments:
-
-=over 4
-
-=item 1.
-
-A string to be processed (C<$_> if the string is omitted or C<undef>)
-
-=item 2.
-
-A string specifying a pattern to be matched as a prefix (which is to be
-skipped). If omitted, optional whitespace is skipped.
-
-=back
-
-On success in a list context, an array of 3 elements is returned. The
-elements are:
-
-=over 4
-
-=item [0]
-
-the extracted variable, or variablish expression
-
-=item [1]
-
-the remainder of the input text,
-
-=item [2]
-
-the prefix substring (if any),
-
-=back
-
-On failure, all of these values (except the remaining text) are C<undef>.
-
-In a scalar context, C<extract_variable> returns just the complete
-substring that matched a variablish expression. C<undef> is returned on
-failure. In addition, the original input text has the returned substring
-(and any prefix) removed from it.
-
-In a void context, the input text just has the matched substring (and
-any specified prefix) removed.
-
-
-=head2 C<extract_tagged>
-
-C<extract_tagged> extracts and segments text between (balanced)
-specified tags.
-
-The subroutine takes up to five optional arguments:
-
-=over 4
-
-=item 1.
-
-A string to be processed (C<$_> if the string is omitted or C<undef>)
-
-=item 2.
-
-A string specifying a pattern to be matched as the opening tag.
-If the pattern string is omitted (or C<undef>) then a pattern
-that matches any standard XML tag is used.
-
-=item 3.
-
-A string specifying a pattern to be matched at the closing tag.
-If the pattern string is omitted (or C<undef>) then the closing
-tag is constructed by inserting a C</> after any leading bracket
-characters in the actual opening tag that was matched (I<not> the pattern
-that matched the tag). For example, if the opening tag pattern
-is specified as C<'{{\w+}}'> and actually matched the opening tag
-C<"{{DATA}}">, then the constructed closing tag would be C<"{{/DATA}}">.
-
-=item 4.
-
-A string specifying a pattern to be matched as a prefix (which is to be
-skipped). If omitted, optional whitespace is skipped.
-
-=item 5.
-
-A hash reference containing various parsing options (see below)
-
-=back
-
-The various options that can be specified are:
-
-=over 4
-
-=item C<reject =E<gt> $listref>
-
-The list reference contains one or more strings specifying patterns
-that must I<not> appear within the tagged text.
-
-For example, to extract
-an HTML link (which should not contain nested links) use:
-
- extract_tagged($text, '<A>', '</A>', undef, {reject => ['<A>']} );
-
-=item C<ignore =E<gt> $listref>
-
-The list reference contains one or more strings specifying patterns
-that are I<not> be be treated as nested tags within the tagged text
-(even if they would match the start tag pattern).
-
-For example, to extract an arbitrary XML tag, but ignore "empty" elements:
-
- extract_tagged($text, undef, undef, undef, {ignore => ['<[^>]*/>']} );
-
-(also see L<"gen_delimited_pat"> below).
-
-
-=item C<fail =E<gt> $str>
-
-The C<fail> option indicates the action to be taken if a matching end
-tag is not encountered (i.e. before the end of the string or some
-C<reject> pattern matches). By default, a failure to match a closing
-tag causes C<extract_tagged> to immediately fail.
-
-However, if the string value associated with <reject> is "MAX", then
-C<extract_tagged> returns the complete text up to the point of failure.
-If the string is "PARA", C<extract_tagged> returns only the first paragraph
-after the tag (up to the first line that is either empty or contains
-only whitespace characters).
-If the string is "", the the default behaviour (i.e. failure) is reinstated.
-
-For example, suppose the start tag "/para" introduces a paragraph, which then
-continues until the next "/endpara" tag or until another "/para" tag is
-encountered:
-
- $text = "/para line 1\n\nline 3\n/para line 4";
-
- extract_tagged($text, '/para', '/endpara', undef,
- {reject => '/para', fail => MAX );
-
- # EXTRACTED: "/para line 1\n\nline 3\n"
-
-Suppose instead, that if no matching "/endpara" tag is found, the "/para"
-tag refers only to the immediately following paragraph:
-
- $text = "/para line 1\n\nline 3\n/para line 4";
-
- extract_tagged($text, '/para', '/endpara', undef,
- {reject => '/para', fail => MAX );
-
- # EXTRACTED: "/para line 1\n"
-
-Note that the specified C<fail> behaviour applies to nested tags as well.
-
-=back
-
-On success in a list context, an array of 6 elements is returned. The elements are:
-
-=over 4
-
-=item [0]
-
-the extracted tagged substring (including the outermost tags),
-
-=item [1]
-
-the remainder of the input text,
-
-=item [2]
-
-the prefix substring (if any),
-
-=item [3]
-
-the opening tag
-
-=item [4]
-
-the text between the opening and closing tags
-
-=item [5]
-
-the closing tag (or "" if no closing tag was found)
-
-=back
-
-On failure, all of these values (except the remaining text) are C<undef>.
-
-In a scalar context, C<extract_tagged> returns just the complete
-substring that matched a tagged text (including the start and end
-tags). C<undef> is returned on failure. In addition, the original input
-text has the returned substring (and any prefix) removed from it.
-
-In a void context, the input text just has the matched substring (and
-any specified prefix) removed.
-
-
-=head2 C<gen_extract_tagged>
-
-(Note: This subroutine is only available under Perl5.005)
-
-C<gen_extract_tagged> generates a new anonymous subroutine which
-extracts text between (balanced) specified tags. In other words,
-it generates a function identical in function to C<extract_tagged>.
-
-The difference between C<extract_tagged> and the anonymous
-subroutines generated by
-C<gen_extract_tagged>, is that those generated subroutines:
-
-=over 4
-
-=item *
-
-do not have to reparse tag specification or parsing options every time
-they are called (whereas C<extract_tagged> has to effectively rebuild
-its tag parser on every call);
-
-=item *
-
-make use of the new qr// construct to pre-compile the regexes they use
-(whereas C<extract_tagged> uses standard string variable interpolation
-to create tag-matching patterns).
-
-=back
-
-The subroutine takes up to four optional arguments (the same set as
-C<extract_tagged> except for the string to be processed). It returns
-a reference to a subroutine which in turn takes a single argument (the text to
-be extracted from).
-
-In other words, the implementation of C<extract_tagged> is exactly
-equivalent to:
-
- sub extract_tagged
- {
- my $text = shift;
- $extractor = gen_extract_tagged(@_);
- return $extractor->($text);
- }
-
-(although C<extract_tagged> is not currently implemented that way, in order
-to preserve pre-5.005 compatibility).
-
-Using C<gen_extract_tagged> to create extraction functions for specific tags
-is a good idea if those functions are going to be called more than once, since
-their performance is typically twice as good as the more general-purpose
-C<extract_tagged>.
-
-
-=head2 C<extract_quotelike>
-
-C<extract_quotelike> attempts to recognize, extract, and segment any
-one of the various Perl quotes and quotelike operators (see
-L<perlop(3)>) Nested backslashed delimiters, embedded balanced bracket
-delimiters (for the quotelike operators), and trailing modifiers are
-all caught. For example, in:
-
- extract_quotelike 'q # an octothorpe: \# (not the end of the q!) #'
-
- extract_quotelike ' "You said, \"Use sed\"." '
-
- extract_quotelike ' s{([A-Z]{1,8}\.[A-Z]{3})} /\L$1\E/; '
-
- extract_quotelike ' tr/\\\/\\\\/\\\//ds; '
-
-the full Perl quotelike operations are all extracted correctly.
-
-Note too that, when using the /x modifier on a regex, any comment
-containing the current pattern delimiter will cause the regex to be
-immediately terminated. In other words:
-
- 'm /
- (?i) # CASE INSENSITIVE
- [a-z_] # LEADING ALPHABETIC/UNDERSCORE
- [a-z0-9]* # FOLLOWED BY ANY NUMBER OF ALPHANUMERICS
- /x'
-
-will be extracted as if it were:
-
- 'm /
- (?i) # CASE INSENSITIVE
- [a-z_] # LEADING ALPHABETIC/'
-
-This behaviour is identical to that of the actual compiler.
-
-C<extract_quotelike> takes two arguments: the text to be processed and
-a prefix to be matched at the very beginning of the text. If no prefix
-is specified, optional whitespace is the default. If no text is given,
-C<$_> is used.
-
-In a list context, an array of 11 elements is returned. The elements are:
-
-=over 4
-
-=item [0]
-
-the extracted quotelike substring (including trailing modifiers),
-
-=item [1]
-
-the remainder of the input text,
-
-=item [2]
-
-the prefix substring (if any),
-
-=item [3]
-
-the name of the quotelike operator (if any),
-
-=item [4]
-
-the left delimiter of the first block of the operation,
-
-=item [5]
-
-the text of the first block of the operation
-(that is, the contents of
-a quote, the regex of a match or substitution or the target list of a
-translation),
-
-=item [6]
-
-the right delimiter of the first block of the operation,
-
-=item [7]
-
-the left delimiter of the second block of the operation
-(that is, if it is a C<s>, C<tr>, or C<y>),
-
-=item [8]
-
-the text of the second block of the operation
-(that is, the replacement of a substitution or the translation list
-of a translation),
-
-=item [9]
-
-the right delimiter of the second block of the operation (if any),
-
-=item [10]
-
-the trailing modifiers on the operation (if any).
-
-=back
-
-For each of the fields marked "(if any)" the default value on success is
-an empty string.
-On failure, all of these values (except the remaining text) are C<undef>.
-
-
-In a scalar context, C<extract_quotelike> returns just the complete substring
-that matched a quotelike operation (or C<undef> on failure). In a scalar or
-void context, the input text has the same substring (and any specified
-prefix) removed.
-
-Examples:
-
- # Remove the first quotelike literal that appears in text
-
- $quotelike = extract_quotelike($text,'.*?');
-
- # Replace one or more leading whitespace-separated quotelike
- # literals in $_ with "<QLL>"
-
- do { $_ = join '<QLL>', (extract_quotelike)[2,1] } until $@;
-
-
- # Isolate the search pattern in a quotelike operation from $text
-
- ($op,$pat) = (extract_quotelike $text)[3,5];
- if ($op =~ /[ms]/)
- {
- print "search pattern: $pat\n";
- }
- else
- {
- print "$op is not a pattern matching operation\n";
- }
-
-
-=head2 C<extract_quotelike> and "here documents"
-
-C<extract_quotelike> can successfully extract "here documents" from an input
-string, but with an important caveat in list contexts.
-
-Unlike other types of quote-like literals, a here document is rarely
-a contiguous substring. For example, a typical piece of code using
-here document might look like this:
-
- <<'EOMSG' || die;
- This is the message.
- EOMSG
- exit;
-
-Given this as an input string in a scalar context, C<extract_quotelike>
-would correctly return the string "<<'EOMSG'\nThis is the message.\nEOMSG",
-leaving the string " || die;\nexit;" in the original variable. In other words,
-the two separate pieces of the here document are successfully extracted and
-concatenated.
-
-In a list context, C<extract_quotelike> would return the list
-
-=over 4
-
-=item [0]
-
-"<<'EOMSG'\nThis is the message.\nEOMSG\n" (i.e. the full extracted here document,
-including fore and aft delimiters),
-
-=item [1]
-
-" || die;\nexit;" (i.e. the remainder of the input text, concatenated),
-
-=item [2]
-
-"" (i.e. the prefix substring -- trivial in this case),
-
-=item [3]
-
-"<<" (i.e. the "name" of the quotelike operator)
-
-=item [4]
-
-"'EOMSG'" (i.e. the left delimiter of the here document, including any quotes),
-
-=item [5]
-
-"This is the message.\n" (i.e. the text of the here document),
-
-=item [6]
-
-"EOMSG" (i.e. the right delimiter of the here document),
-
-=item [7..10]
-
-"" (a here document has no second left delimiter, second text, second right
-delimiter, or trailing modifiers).
-
-=back
-
-However, the matching position of the input variable would be set to
-"exit;" (i.e. I<after> the closing delimiter of the here document),
-which would cause the earlier " || die;\nexit;" to be skipped in any
-sequence of code fragment extractions.
-
-To avoid this problem, when it encounters a here document whilst
-extracting from a modifiable string, C<extract_quotelike> silently
-rearranges the string to an equivalent piece of Perl:
-
- <<'EOMSG'
- This is the message.
- EOMSG
- || die;
- exit;
-
-in which the here document I<is> contiguous. It still leaves the
-matching position after the here document, but now the rest of the line
-on which the here document starts is not skipped.
-
-To prevent <extract_quotelike> from mucking about with the input in this way
-(this is the only case where a list-context C<extract_quotelike> does so),
-you can pass the input variable as an interpolated literal:
-
- $quotelike = extract_quotelike("$var");
-
-
-=head2 C<extract_codeblock>
-
-C<extract_codeblock> attempts to recognize and extract a balanced
-bracket delimited substring that may contain unbalanced brackets
-inside Perl quotes or quotelike operations. That is, C<extract_codeblock>
-is like a combination of C<"extract_bracketed"> and
-C<"extract_quotelike">.
-
-C<extract_codeblock> takes the same initial three parameters as C<extract_bracketed>:
-a text to process, a set of delimiter brackets to look for, and a prefix to
-match first. It also takes an optional fourth parameter, which allows the
-outermost delimiter brackets to be specified separately (see below).
-
-Omitting the first argument (input text) means process C<$_> instead.
-Omitting the second argument (delimiter brackets) indicates that only C<'{'> is to be used.
-Omitting the third argument (prefix argument) implies optional whitespace at the start.
-Omitting the fourth argument (outermost delimiter brackets) indicates that the
-value of the second argument is to be used for the outermost delimiters.
-
-Once the prefix an dthe outermost opening delimiter bracket have been
-recognized, code blocks are extracted by stepping through the input text and
-trying the following alternatives in sequence:
-
-=over 4
-
-=item 1.
-
-Try and match a closing delimiter bracket. If the bracket was the same
-species as the last opening bracket, return the substring to that
-point. If the bracket was mismatched, return an error.
-
-=item 2.
-
-Try to match a quote or quotelike operator. If found, call
-C<extract_quotelike> to eat it. If C<extract_quotelike> fails, return
-the error it returned. Otherwise go back to step 1.
-
-=item 3.
-
-Try to match an opening delimiter bracket. If found, call
-C<extract_codeblock> recursively to eat the embedded block. If the
-recursive call fails, return an error. Otherwise, go back to step 1.
-
-=item 4.
-
-Unconditionally match a bareword or any other single character, and
-then go back to step 1.
-
-=back
-
-
-Examples:
-
- # Find a while loop in the text
-
- if ($text =~ s/.*?while\s*\{/{/)
- {
- $loop = "while " . extract_codeblock($text);
- }
-
- # Remove the first round-bracketed list (which may include
- # round- or curly-bracketed code blocks or quotelike operators)
-
- extract_codeblock $text, "(){}", '[^(]*';
-
-
-The ability to specify a different outermost delimiter bracket is useful
-in some circumstances. For example, in the Parse::RecDescent module,
-parser actions which are to be performed only on a successful parse
-are specified using a C<E<lt>defer:...E<gt>> directive. For example:
-
- sentence: subject verb object
- <defer: {$::theVerb = $item{verb}} >
-
-Parse::RecDescent uses C<extract_codeblock($text, '{}E<lt>E<gt>')> to extract the code
-within the C<E<lt>defer:...E<gt>> directive, but there's a problem.
-
-A deferred action like this:
-
- <defer: {if ($count>10) {$count--}} >
-
-will be incorrectly parsed as:
-
- <defer: {if ($count>
-
-because the "less than" operator is interpreted as a closing delimiter.
-
-But, by extracting the directive using
-S<C<extract_codeblock($text, '{}', undef, 'E<lt>E<gt>')>>
-the '>' character is only treated as a delimited at the outermost
-level of the code block, so the directive is parsed correctly.
-
-=head2 C<extract_multiple>
-
-The C<extract_multiple> subroutine takes a string to be processed and a
-list of extractors (subroutines or regular expressions) to apply to that string.
-
-In an array context C<extract_multiple> returns an array of substrings
-of the original string, as extracted by the specified extractors.
-In a scalar context, C<extract_multiple> returns the first
-substring successfully extracted from the original string. In both
-scalar and void contexts the original string has the first successfully
-extracted substring removed from it. In all contexts
-C<extract_multiple> starts at the current C<pos> of the string, and
-sets that C<pos> appropriately after it matches.
-
-Hence, the aim of of a call to C<extract_multiple> in a list context
-is to split the processed string into as many non-overlapping fields as
-possible, by repeatedly applying each of the specified extractors
-to the remainder of the string. Thus C<extract_multiple> is
-a generalized form of Perl's C<split> subroutine.
-
-The subroutine takes up to four optional arguments:
-
-=over 4
-
-=item 1.
-
-A string to be processed (C<$_> if the string is omitted or C<undef>)
-
-=item 2.
-
-A reference to a list of subroutine references and/or qr// objects and/or
-literal strings and/or hash references, specifying the extractors
-to be used to split the string. If this argument is omitted (or
-C<undef>) the list:
-
- [
- sub { extract_variable($_[0], '') },
- sub { extract_quotelike($_[0],'') },
- sub { extract_codeblock($_[0],'{}','') },
- ]
-
-is used.
-
-
-=item 3.
-
-An number specifying the maximum number of fields to return. If this
-argument is omitted (or C<undef>), split continues as long as possible.
-
-If the third argument is I<N>, then extraction continues until I<N> fields
-have been successfully extracted, or until the string has been completely
-processed.
-
-Note that in scalar and void contexts the value of this argument is
-automatically reset to 1 (under C<-w>, a warning is issued if the argument
-has to be reset).
-
-=item 4.
-
-A value indicating whether unmatched substrings (see below) within the
-text should be skipped or returned as fields. If the value is true,
-such substrings are skipped. Otherwise, they are returned.
-
-=back
-
-The extraction process works by applying each extractor in
-sequence to the text string.
-
-If the extractor is a subroutine it is called in a list context and is
-expected to return a list of a single element, namely the extracted
-text. It may optionally also return two further arguments: a string
-representing the text left after extraction (like $' for a pattern
-match), and a string representing any prefix skipped before the
-extraction (like $` in a pattern match). Note that this is designed
-to facilitate the use of other Text::Balanced subroutines with
-C<extract_multiple>. Note too that the value returned by an extractor
-subroutine need not bear any relationship to the corresponding substring
-of the original text (see examples below).
-
-If the extractor is a precompiled regular expression or a string,
-it is matched against the text in a scalar context with a leading
-'\G' and the gc modifiers enabled. The extracted value is either
-$1 if that variable is defined after the match, or else the
-complete match (i.e. $&).
-
-If the extractor is a hash reference, it must contain exactly one element.
-The value of that element is one of the
-above extractor types (subroutine reference, regular expression, or string).
-The key of that element is the name of a class into which the successful
-return value of the extractor will be blessed.
-
-If an extractor returns a defined value, that value is immediately
-treated as the next extracted field and pushed onto the list of fields.
-If the extractor was specified in a hash reference, the field is also
-blessed into the appropriate class,
-
-If the extractor fails to match (in the case of a regex extractor), or returns an empty list or an undefined value (in the case of a subroutine extractor), it is
-assumed to have failed to extract.
-If none of the extractor subroutines succeeds, then one
-character is extracted from the start of the text and the extraction
-subroutines reapplied. Characters which are thus removed are accumulated and
-eventually become the next field (unless the fourth argument is true, in which
-case they are disgarded).
-
-For example, the following extracts substrings that are valid Perl variables:
-
- @fields = extract_multiple($text,
- [ sub { extract_variable($_[0]) } ],
- undef, 1);
-
-This example separates a text into fields which are quote delimited,
-curly bracketed, and anything else. The delimited and bracketed
-parts are also blessed to identify them (the "anything else" is unblessed):
-
- @fields = extract_multiple($text,
- [
- { Delim => sub { extract_delimited($_[0],q{'"}) } },
- { Brack => sub { extract_bracketed($_[0],'{}') } },
- ]);
-
-This call extracts the next single substring that is a valid Perl quotelike
-operator (and removes it from $text):
-
- $quotelike = extract_multiple($text,
- [
- sub { extract_quotelike($_[0]) },
- ], undef, 1);
-
-Finally, here is yet another way to do comma-separated value parsing:
-
- @fields = extract_multiple($csv_text,
- [
- sub { extract_delimited($_[0],q{'"}) },
- qr/([^,]+)(.*)/,
- ],
- undef,1);
-
-The list in the second argument means:
-I<"Try and extract a ' or " delimited string, otherwise extract anything up to a comma...">.
-The undef third argument means:
-I<"...as many times as possible...">,
-and the true value in the fourth argument means
-I<"...discarding anything else that appears (i.e. the commas)">.
-
-If you wanted the commas preserved as separate fields (i.e. like split
-does if your split pattern has capturing parentheses), you would
-just make the last parameter undefined (or remove it).
-
-
-=head2 C<gen_delimited_pat>
-
-The C<gen_delimited_pat> subroutine takes a single (string) argument and
- > builds a Friedl-style optimized regex that matches a string delimited
-by any one of the characters in the single argument. For example:
-
- gen_delimited_pat(q{'"})
-
-returns the regex:
-
- (?:\"(?:\\\"|(?!\").)*\"|\'(?:\\\'|(?!\').)*\')
-
-Note that the specified delimiters are automatically quotemeta'd.
-
-A typical use of C<gen_delimited_pat> would be to build special purpose tags
-for C<extract_tagged>. For example, to properly ignore "empty" XML elements
-(which might contain quoted strings):
-
- my $empty_tag = '<(' . gen_delimited_pat(q{'"}) . '|.)+/>';
-
- extract_tagged($text, undef, undef, undef, {ignore => [$empty_tag]} );
-
-
-C<gen_delimited_pat> may also be called with an optional second argument,
-which specifies the "escape" character(s) to be used for each delimiter.
-For example to match a Pascal-style string (where ' is the delimiter
-and '' is a literal ' within the string):
-
- gen_delimited_pat(q{'},q{'});
-
-Different escape characters can be specified for different delimiters.
-For example, to specify that '/' is the escape for single quotes
-and '%' is the escape for double quotes:
-
- gen_delimited_pat(q{'"},q{/%});
-
-If more delimiters than escape chars are specified, the last escape char
-is used for the remaining delimiters.
-If no escape char is specified for a given specified delimiter, '\' is used.
-
-Note that
-C<gen_delimited_pat> was previously called
-C<delimited_pat>. That name may still be used, but is now deprecated.
-
-
-=head1 DIAGNOSTICS
-
-In a list context, all the functions return C<(undef,$original_text)>
-on failure. In a scalar context, failure is indicated by returning C<undef>
-(in this case the input text is not modified in any way).
-
-In addition, on failure in I<any> context, the C<$@> variable is set.
-Accessing C<$@-E<gt>{error}> returns one of the error diagnostics listed
-below.
-Accessing C<$@-E<gt>{pos}> returns the offset into the original string at
-which the error was detected (although not necessarily where it occurred!)
-Printing C<$@> directly produces the error message, with the offset appended.
-On success, the C<$@> variable is guaranteed to be C<undef>.
-
-The available diagnostics are:
-
-=over 4
-
-=item C<Did not find a suitable bracket: "%s">
-
-The delimiter provided to C<extract_bracketed> was not one of
-C<'()[]E<lt>E<gt>{}'>.
-
-=item C<Did not find prefix: /%s/>
-
-A non-optional prefix was specified but wasn't found at the start of the text.
-
-=item C<Did not find opening bracket after prefix: "%s">
-
-C<extract_bracketed> or C<extract_codeblock> was expecting a
-particular kind of bracket at the start of the text, and didn't find it.
-
-=item C<No quotelike operator found after prefix: "%s">
-
-C<extract_quotelike> didn't find one of the quotelike operators C<q>,
-C<qq>, C<qw>, C<qx>, C<s>, C<tr> or C<y> at the start of the substring
-it was extracting.
-
-=item C<Unmatched closing bracket: "%c">
-
-C<extract_bracketed>, C<extract_quotelike> or C<extract_codeblock> encountered
-a closing bracket where none was expected.
-
-=item C<Unmatched opening bracket(s): "%s">
-
-C<extract_bracketed>, C<extract_quotelike> or C<extract_codeblock> ran
-out of characters in the text before closing one or more levels of nested
-brackets.
-
-=item C<Unmatched embedded quote (%s)>
-
-C<extract_bracketed> attempted to match an embedded quoted substring, but
-failed to find a closing quote to match it.
-
-=item C<Did not find closing delimiter to match '%s'>
-
-C<extract_quotelike> was unable to find a closing delimiter to match the
-one that opened the quote-like operation.
-
-=item C<Mismatched closing bracket: expected "%c" but found "%s">
-
-C<extract_bracketed>, C<extract_quotelike> or C<extract_codeblock> found
-a valid bracket delimiter, but it was the wrong species. This usually
-indicates a nesting error, but may indicate incorrect quoting or escaping.
-
-=item C<No block delimiter found after quotelike "%s">
-
-C<extract_quotelike> or C<extract_codeblock> found one of the
-quotelike operators C<q>, C<qq>, C<qw>, C<qx>, C<s>, C<tr> or C<y>
-without a suitable block after it.
-
-=item C<Did not find leading dereferencer>
-
-C<extract_variable> was expecting one of '$', '@', or '%' at the start of
-a variable, but didn't find any of them.
-
-=item C<Bad identifier after dereferencer>
-
-C<extract_variable> found a '$', '@', or '%' indicating a variable, but that
-character was not followed by a legal Perl identifier.
-
-=item C<Did not find expected opening bracket at %s>
-
-C<extract_codeblock> failed to find any of the outermost opening brackets
-that were specified.
-
-=item C<Improperly nested codeblock at %s>
-
-A nested code block was found that started with a delimiter that was specified
-as being only to be used as an outermost bracket.
-
-=item C<Missing second block for quotelike "%s">
-
-C<extract_codeblock> or C<extract_quotelike> found one of the
-quotelike operators C<s>, C<tr> or C<y> followed by only one block.
-
-=item C<No match found for opening bracket>
-
-C<extract_codeblock> failed to find a closing bracket to match the outermost
-opening bracket.
-
-=item C<Did not find opening tag: /%s/>
-
-C<extract_tagged> did not find a suitable opening tag (after any specified
-prefix was removed).
-
-=item C<Unable to construct closing tag to match: /%s/>
-
-C<extract_tagged> matched the specified opening tag and tried to
-modify the matched text to produce a matching closing tag (because
-none was specified). It failed to generate the closing tag, almost
-certainly because the opening tag did not start with a
-bracket of some kind.
-
-=item C<Found invalid nested tag: %s>
-
-C<extract_tagged> found a nested tag that appeared in the "reject" list
-(and the failure mode was not "MAX" or "PARA").
-
-=item C<Found unbalanced nested tag: %s>
-
-C<extract_tagged> found a nested opening tag that was not matched by a
-corresponding nested closing tag (and the failure mode was not "MAX" or "PARA").
-
-=item C<Did not find closing tag>
-
-C<extract_tagged> reached the end of the text without finding a closing tag
-to match the original opening tag (and the failure mode was not
-"MAX" or "PARA").
-
-
-
-
-=back
-
-
-=head1 AUTHOR
-
-Damian Conway (damian@conway.org)
-
-
-=head1 BUGS AND IRRITATIONS
-
-There are undoubtedly serious bugs lurking somewhere in this code, if
-only because parts of it give the impression of understanding a great deal
-more about Perl than they really do.
-
-Bug reports and other feedback are most welcome.
-
-
-=head1 COPYRIGHT
-
- Copyright (c) 1997-2001, Damian Conway. All Rights Reserved.
- This module is free software. It may be used, redistributed
- and/or modified under the same terms as Perl itself.