diff options
Diffstat (limited to 'lib/XML/Stream/Parser.pm')
| -rw-r--r-- | lib/XML/Stream/Parser.pm | 567 |
1 files changed, 0 insertions, 567 deletions
diff --git a/lib/XML/Stream/Parser.pm b/lib/XML/Stream/Parser.pm deleted file mode 100644 index 9ca7832..0000000 --- a/lib/XML/Stream/Parser.pm +++ /dev/null @@ -1,567 +0,0 @@ -############################################################################## -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Library General Public -# License as published by the Free Software Foundation; either -# version 2 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Library General Public License for more details. -# -# You should have received a copy of the GNU Library General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. -# -# Jabber -# Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ -# -############################################################################## - -package XML::Stream::Parser; - -=head1 NAME - - XML::Stream::Parser - SAX XML Parser for XML Streams - -=head1 SYNOPSIS - - Light weight XML parser that builds XML::Parser::Tree objects from the - incoming stream and passes them to a function to tell whoever is using - it that there are new packets. - -=head1 DESCRIPTION - - This module provides a very light weight parser - -=head1 METHODS - -=head1 EXAMPLES - -=head1 AUTHOR - -By Ryan Eatmon in January of 2001 for http://jabber.org/ - -=head1 COPYRIGHT - -This module is free software; you can redistribute it and/or modify -it under the same terms as Perl itself. - -=cut - -use strict; -use vars qw( $VERSION ); - -$VERSION = "1.22"; - -sub new -{ - my $self = { }; - - bless($self); - - my %args; - while($#_ >= 0) { $args{ lc pop(@_) } = pop(@_); } - - $self->{PARSING} = 0; - $self->{DOC} = 0; - $self->{XML} = ""; - $self->{CNAME} = (); - $self->{CURR} = 0; - - $args{nonblocking} = 0 unless exists($args{nonblocking}); - - $self->{NONBLOCKING} = delete($args{nonblocking}); - - $self->{DEBUGTIME} = 0; - $self->{DEBUGTIME} = $args{debugtime} if exists($args{debugtime}); - - $self->{DEBUGLEVEL} = 0; - $self->{DEBUGLEVEL} = $args{debuglevel} if exists($args{debuglevel}); - - $self->{DEBUGFILE} = ""; - - if (exists($args{debugfh}) && ($args{debugfh} ne "")) - { - $self->{DEBUGFILE} = $args{debugfh}; - $self->{DEBUG} = 1; - } - - if ((exists($args{debugfh}) && ($args{debugfh} eq "")) || - (exists($args{debug}) && ($args{debug} ne ""))) - { - $self->{DEBUG} = 1; - if (lc($args{debug}) eq "stdout") - { - $self->{DEBUGFILE} = new FileHandle(">&STDERR"); - $self->{DEBUGFILE}->autoflush(1); - } - else - { - if (-e $args{debug}) - { - if (-w $args{debug}) - { - $self->{DEBUGFILE} = new FileHandle(">$args{debug}"); - $self->{DEBUGFILE}->autoflush(1); - } - else - { - print "WARNING: debug file ($args{debug}) is not writable by you\n"; - print " No debug information being saved.\n"; - $self->{DEBUG} = 0; - } - } - else - { - $self->{DEBUGFILE} = new FileHandle(">$args{debug}"); - if (defined($self->{DEBUGFILE})) - { - $self->{DEBUGFILE}->autoflush(1); - } - else - { - print "WARNING: debug file ($args{debug}) does not exist \n"; - print " and is not writable by you.\n"; - print " No debug information being saved.\n"; - $self->{DEBUG} = 0; - } - } - } - } - - $self->{SID} = exists($args{sid}) ? $args{sid} : "__xmlstream__:sid"; - - $self->{STYLE} = (exists($args{style}) ? lc($args{style}) : "tree"); - $self->{DTD} = (exists($args{dtd}) ? lc($args{dtd}) : 0); - - if ($self->{STYLE} eq "tree") - { - $self->{HANDLER}->{startDocument} = sub{ $self->startDocument(@_); }; - $self->{HANDLER}->{endDocument} = sub{ $self->endDocument(@_); }; - $self->{HANDLER}->{startElement} = sub{ &XML::Stream::Tree::_handle_element(@_); }; - $self->{HANDLER}->{endElement} = sub{ &XML::Stream::Tree::_handle_close(@_); }; - $self->{HANDLER}->{characters} = sub{ &XML::Stream::Tree::_handle_cdata(@_); }; - } - elsif ($self->{STYLE} eq "node") - { - $self->{HANDLER}->{startDocument} = sub{ $self->startDocument(@_); }; - $self->{HANDLER}->{endDocument} = sub{ $self->endDocument(@_); }; - $self->{HANDLER}->{startElement} = sub{ &XML::Stream::Node::_handle_element(@_); }; - $self->{HANDLER}->{endElement} = sub{ &XML::Stream::Node::_handle_close(@_); }; - $self->{HANDLER}->{characters} = sub{ &XML::Stream::Node::_handle_cdata(@_); }; - } - $self->setHandlers(%{$args{handlers}}); - - $self->{XMLONHOLD} = ""; - - return $self; -} - - -########################################################################### -# -# debug - prints the arguments to the debug log if debug is turned on. -# -########################################################################### -sub debug -{ - return if ($_[1] > $_[0]->{DEBUGLEVEL}); - my $self = shift; - my ($limit,@args) = @_; - return if ($self->{DEBUGFILE} eq ""); - my $fh = $self->{DEBUGFILE}; - if ($self->{DEBUGTIME} == 1) - { - my ($sec,$min,$hour) = localtime(time); - print $fh sprintf("[%02d:%02d:%02d] ",$hour,$min,$sec); - } - print $fh "XML::Stream::Parser: $self->{STYLE}: @args\n"; -} - - -sub setSID -{ - my $self = shift; - my $sid = shift; - $self->{SID} = $sid; -} - - -sub getSID -{ - my $self = shift; - return $self->{SID}; -} - - -sub setHandlers -{ - my $self = shift; - my (%handlers) = @_; - - foreach my $handler (keys(%handlers)) - { - $self->{HANDLER}->{$handler} = $handlers{$handler}; - } -} - - -sub parse -{ - my $self = shift; - my $xml = shift; - - return unless defined($xml); - return if ($xml eq ""); - - if ($self->{XMLONHOLD} ne "") - { - $self->{XML} = $self->{XMLONHOLD}; - $self->{XMLONHOLD} = ""; - } - - # XXX change this to not use regex? - while($xml =~ s/<\!--.*?-->//gs) {} - - $self->{XML} .= $xml; - - return if ($self->{PARSING} == 1); - - $self->{PARSING} = 1; - - if(!$self->{DOC} == 1) - { - my $start = index($self->{XML},"<"); - - if ((substr($self->{XML},$start,3) eq "<?x") || - (substr($self->{XML},$start,3) eq "<?X")) - { - my $close = index($self->{XML},"?>"); - if ($close == -1) - { - $self->{PARSING} = 0; - return; - } - $self->{XML} = substr($self->{XML},$close+2,length($self->{XML})-$close-2); - } - - &{$self->{HANDLER}->{startDocument}}($self); - $self->{DOC} = 1; - } - - while(1) - { - if (length($self->{XML}) == 0) - { - $self->{PARSING} = 0; - return $self->returnData(0); - } - my $eclose = -1; - $eclose = index($self->{XML},"</".$self->{CNAME}->[$self->{CURR}].">") - if ($#{$self->{CNAME}} > -1); - - if ($eclose == 0) - { - $self->{XML} = substr($self->{XML},length($self->{CNAME}->[$self->{CURR}])+3,length($self->{XML})-length($self->{CNAME}->[$self->{CURR}])-3); - - $self->{PARSING} = 0 if ($self->{NONBLOCKING} == 1); - &{$self->{HANDLER}->{endElement}}($self,$self->{CNAME}->[$self->{CURR}]); - $self->{PARSING} = 1 if ($self->{NONBLOCKING} == 1); - - $self->{CURR}--; - if ($self->{CURR} == 0) - { - $self->{DOC} = 0; - $self->{PARSING} = 0; - &{$self->{HANDLER}->{endDocument}}($self); - return $self->returnData(0); - } - next; - } - - my $estart = index($self->{XML},"<"); - my $cdatastart = index($self->{XML},"<![CDATA["); - if (($estart == 0) && ($cdatastart != 0)) - { - my $close = index($self->{XML},">"); - if ($close == -1) - { - $self->{PARSING} = 0; - return $self->returnData(0); - } - my $empty = (substr($self->{XML},$close-1,1) eq "/"); - my $starttag = substr($self->{XML},1,$close-($empty ? 2 : 1)); - my $nextspace = index($starttag," "); - my $attribs; - my $name; - if ($nextspace != -1) - { - $name = substr($starttag,0,$nextspace); - $attribs = substr($starttag,$nextspace+1,length($starttag)-$nextspace-1); - } - else - { - $name = $starttag; - } - - my %attribs = $self->attribution($attribs); - if (($self->{DTD} == 1) && (exists($attribs{xmlns}))) - { - } - - &{$self->{HANDLER}->{startElement}}($self,$name,%attribs); - - if($empty == 1) - { - &{$self->{HANDLER}->{endElement}}($self,$name); - } - else - { - $self->{CURR}++; - $self->{CNAME}->[$self->{CURR}] = $name; - } - - $self->{XML} = substr($self->{XML},$close+1,length($self->{XML})-$close-1); - next; - } - - if ($cdatastart == 0) - { - my $cdataclose = index($self->{XML},"]]>"); - if ($cdataclose == -1) - { - $self->{PARSING} = 0; - return $self->returnData(0); - } - - &{$self->{HANDLER}->{characters}}($self,substr($self->{XML},9,$cdataclose-9)); - - $self->{XML} = substr($self->{XML},$cdataclose+3,length($self->{XML})-$cdataclose-3); - next; - } - - if ($estart == -1) - { - $self->{XMLONHOLD} = $self->{XML}; - $self->{XML} = ""; - } - elsif (($cdatastart == -1) || ($cdatastart > $estart)) - { - &{$self->{HANDLER}->{characters}}($self,$self->entityCheck(substr($self->{XML},0,$estart))); - $self->{XML} = substr($self->{XML},$estart,length($self->{XML})-$estart); - } - } -} - - -sub attribution -{ - my $self = shift; - my $str = shift; - - $str = "" unless defined($str); - - my %attribs; - - while(1) - { - my $eq = index($str,"="); - if((length($str) == 0) || ($eq == -1)) - { - return %attribs; - } - - my $ids; - my $id; - my $id1 = index($str,"\'"); - my $id2 = index($str,"\""); - if((($id1 < $id2) && ($id1 != -1)) || ($id2 == -1)) - { - $ids = $id1; - $id = "\'"; - } - if((($id2 < $id1) && ($id1 == -1)) || ($id2 != -1)) - { - $ids = $id2; - $id = "\""; - } - - my $nextid = index($str,$id,$ids+1); - my $val = substr($str,$ids+1,$nextid-$ids-1); - my $key = substr($str,0,$eq); - - while($key =~ s/\s//) {} - - $attribs{$key} = $self->entityCheck($val); - $str = substr($str,$nextid+1,length($str)-$nextid-1); - } - - return %attribs; -} - - -sub entityCheck -{ - my $self = shift; - my $str = shift; - - while($str =~ s/\<\;/\</) {} - while($str =~ s/\>\;/\>/) {} - while($str =~ s/\"\;/\"/) {} - while($str =~ s/\&apos\;/\'/) {} - while($str =~ s/\&\;/\&/) {} - - return $str; -} - - -sub parsefile -{ - my $self = shift; - my $fileName = shift; - - open(FILE,"<",$fileName); - my $file; - while(<FILE>) { $file .= $_; } - $self->parse($file); - close(FILE); - - return $self->returnData(); -} - - -sub returnData -{ - my $self = shift; - my $clearData = shift; - $clearData = 1 unless defined($clearData); - - my $sid = $self->{SID}; - - if ($self->{STYLE} eq "tree") - { - return unless exists($self->{SIDS}->{$sid}->{tree}); - my @tree = @{$self->{SIDS}->{$sid}->{tree}}; - delete($self->{SIDS}->{$sid}->{tree}) if ($clearData == 1); - return ( \@tree ); - } - if ($self->{STYLE} eq "node") - { - return unless exists($self->{SIDS}->{$sid}->{node}); - my $node = $self->{SIDS}->{$sid}->{node}->[0]; - delete($self->{SIDS}->{$sid}->{node}) if ($clearData == 1); - return $node; - } -} - - -sub startDocument -{ - my $self = shift; -} - - -sub endDocument -{ - my $self = shift; -} - - -sub startElement -{ - my $self = shift; - my ($sax, $tag, %att) = @_; - - return unless ($self->{DOC} == 1); - - if ($self->{STYLE} eq "debug") - { - print "$self->{DEBUGHEADER} \\\\ (",join(" ",%att),")\n"; - $self->{DEBUGHEADER} .= $tag." "; - } - else - { - my @NEW; - if($#{$self->{TREE}} < 0) - { - push @{$self->{TREE}}, $tag; - } - else - { - push @{ $self->{TREE}[ $#{$self->{TREE}}]}, $tag; - } - push @NEW, \%att; - push @{$self->{TREE}}, \@NEW; - } -} - - -sub characters -{ - my $self = shift; - my ($sax, $cdata) = @_; - - return unless ($self->{DOC} == 1); - - if ($self->{STYLE} eq "debug") - { - my $str = $cdata; - $str =~ s/\n/\#10\;/g; - print "$self->{DEBUGHEADER} || $str\n"; - } - else - { - return if ($#{$self->{TREE}} == -1); - - my $pos = $#{$self->{TREE}}; - - if ($pos > 0 && $self->{TREE}[$pos - 1] eq "0") - { - $self->{TREE}[$pos - 1] .= $cdata; - } - else - { - push @{$self->{TREE}[$#{$self->{TREE}}]}, 0; - push @{$self->{TREE}[$#{$self->{TREE}}]}, $cdata; - } - } -} - - -sub endElement -{ - my $self = shift; - my ($sax, $tag) = @_; - - return unless ($self->{DOC} == 1); - - if ($self->{STYLE} eq "debug") - { - $self->{DEBUGHEADER} =~ s/\S+\ $//; - print "$self->{DEBUGHEADER} //\n"; - } - else - { - my $CLOSED = pop @{$self->{TREE}}; - - if($#{$self->{TREE}} < 1) - { - push @{$self->{TREE}}, $CLOSED; - - if($self->{TREE}->[0] eq "stream:error") - { - $self->{STREAMERROR} = $self->{TREE}[1]->[2]; - } - } - else - { - push @{$self->{TREE}[$#{$self->{TREE}}]}, $CLOSED; - } - } -} - - -1; |
