diff options
Diffstat (limited to 'lib/XML/Stream/Parser/DTD.pm')
| -rw-r--r-- | lib/XML/Stream/Parser/DTD.pm | 769 |
1 files changed, 769 insertions, 0 deletions
diff --git a/lib/XML/Stream/Parser/DTD.pm b/lib/XML/Stream/Parser/DTD.pm new file mode 100644 index 0000000..25dc888 --- /dev/null +++ b/lib/XML/Stream/Parser/DTD.pm @@ -0,0 +1,769 @@ +############################################################################## +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. +# +# Jabber +# Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ +# +############################################################################## + +package XML::Stream::Parser::DTD; + +=head1 NAME + + XML::Stream::Parser::DTD - XML DTD Parser and Verifier + +=head1 SYNOPSIS + + This is a work in progress. I had need for a DTD parser and verifier + and so am working on it here. If you are reading this then you are + snooping. =) + +=head1 DESCRIPTION + + This module provides the initial code for a DTD parser and verifier. + +=head1 METHODS + +=head1 EXAMPLES + +=head1 AUTHOR + +By Ryan Eatmon in February of 2001 for http://jabber.org/ + +=head1 COPYRIGHT + +This module is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut + +use strict; +use vars qw( $VERSION ); + +$VERSION = "1.22"; + +sub new +{ + my $self = { }; + + bless($self); + + my %args; + while($#_ >= 0) { $args{ lc pop(@_) } = pop(@_); } + + $self->{URI} = $args{uri}; + + $self->{PARSING} = 0; + $self->{DOC} = 0; + $self->{XML} = ""; + $self->{CNAME} = (); + $self->{CURR} = 0; + + $self->{ENTITY}->{"<"} = "<"; + $self->{ENTITY}->{">"} = ">"; + $self->{ENTITY}->{"""} = "\""; + $self->{ENTITY}->{"'"} = "'"; + $self->{ENTITY}->{"&"} = "&"; + + $self->{HANDLER}->{startDocument} = sub{ $self->startDocument(@_); }; + $self->{HANDLER}->{endDocument} = sub{ $self->endDocument(@_); }; + $self->{HANDLER}->{startElement} = sub{ $self->startElement(@_); }; + $self->{HANDLER}->{endElement} = sub{ $self->endElement(@_); }; + + $self->{STYLE} = "debug"; + + open(DTD,$args{uri}); + my $dtd = join("",<DTD>); + close(DTD); + + $self->parse($dtd); + + return $self; +} + + +sub parse +{ + my $self = shift; + my $xml = shift; + + while($xml =~ s/<\!--.*?-->//gs) {} + while($xml =~ s/\n//g) {} + + $self->{XML} .= $xml; + + return if ($self->{PARSING} == 1); + + $self->{PARSING} = 1; + + if(!$self->{DOC} == 1) + { + my $start = index($self->{XML},"<"); + + if (substr($self->{XML},$start,3) =~ /^<\?x$/i) + { + my $close = index($self->{XML},"?>"); + if ($close == -1) + { + $self->{PARSING} = 0; + return; + } + $self->{XML} = substr($self->{XML},$close+2,length($self->{XML})-$close-2); + } + + &{$self->{HANDLER}->{startDocument}}($self); + $self->{DOC} = 1; + } + + while(1) + { + + if (length($self->{XML}) == 0) + { + $self->{PARSING} = 0; + return; + } + + my $estart = index($self->{XML},"<"); + if ($estart == -1) + { + $self->{PARSING} = 0; + return; + } + + my $close = index($self->{XML},">"); + my $dtddata = substr($self->{XML},$estart+1,$close-1); + my $nextspace = index($dtddata," "); + my $attribs; + + my $type = substr($dtddata,0,$nextspace); + $dtddata = substr($dtddata,$nextspace+1,length($dtddata)-$nextspace-1); + $nextspace = index($dtddata," "); + + if ($type eq "!ENTITY") + { + $self->entity($type,$dtddata); + } + else + { + my $tag = substr($dtddata,0,$nextspace); + $dtddata = substr($dtddata,$nextspace+1,length($dtddata)-$nextspace-1); + $nextspace = index($dtddata," "); + + $self->element($type,$tag,$dtddata) if ($type eq "!ELEMENT"); + $self->attlist($type,$tag,$dtddata) if ($type eq "!ATTLIST"); + } + + $self->{XML} = substr($self->{XML},$close+1,length($self->{XML})-$close-1); + next; + } +} + + +sub startDocument +{ + my $self = shift; +} + + +sub endDocument +{ + my $self = shift; +} + + +sub entity +{ + my $self = shift; + my ($type, $data) = @_; + + foreach my $entity (keys(%{$self->{ENTITY}})) + { + $data =~ s/$entity/$self->{ENTITY}->{$entity}/g; + } + + my ($symbol,$tag,undef,$string) = ($data =~ /^\s*(\S+)\s+(\S+)\s+(\"|\')([^\3]*)\3\s*$/); + $self->{ENTITY}->{"${symbol}${tag}\;"} = $string; +} + +sub element +{ + my $self = shift; + my ($type, $tag, $data) = @_; + + foreach my $entity (keys(%{$self->{ENTITY}})) + { + $data =~ s/$entity/$self->{ENTITY}->{$entity}/g; + } + + $self->{COUNTER}->{$tag} = 0 unless exists($self->{COUNTER}->{$tag}); + + $self->parsegrouping($tag,\$self->{ELEMENT}->{$tag},$data); + $self->flattendata(\$self->{ELEMENT}->{$tag}); + +} + + +sub flattendata +{ + my $self = shift; + my $dstr = shift; + + if ($$dstr->{type} eq "list") + { + foreach my $index (0..$#{$$dstr->{list}}) + { + $self->flattendata(\$$dstr->{list}->[$index]); + } + + if (!exists($$dstr->{repeat}) && ($#{$$dstr->{list}} == 0)) + { + $$dstr = $$dstr->{list}->[0]; + } + } +} + +sub parsegrouping +{ + my $self = shift; + my ($tag,$dstr,$data) = @_; + + $data =~ s/^\s*//; + $data =~ s/\s*$//; + + if ($data =~ /[\*\+\?]$/) + { + ($$dstr->{repeat}) = ($data =~ /(.)$/); + $data =~ s/.$//; + } + + if ($data =~ /^\(.*\)$/) + { + my ($seperator) = ($data =~ /^\(\s*\S+\s*(\,|\|)/); + $$dstr->{ordered} = "yes" if ($seperator eq ","); + $$dstr->{ordered} = "no" if ($seperator eq "|"); + + my $count = 0; + $$dstr->{type} = "list"; + foreach my $grouping ($self->groupinglist($data,$seperator)) + { + $self->parsegrouping($tag,\$$dstr->{list}->[$count],$grouping); + $count++; + } + } + else + { + $$dstr->{type} = "element"; + $$dstr->{element} = $data; + $self->{COUNTER}->{$data} = 0 unless exists($self->{COUNTER}->{$data}); + $self->{COUNTER}->{$data}++; + $self->{CHILDREN}->{$tag}->{$data} = 1; + } +} + + +sub attlist +{ + my $self = shift; + my ($type, $tag, $data) = @_; + + foreach my $entity (keys(%{$self->{ENTITY}})) + { + $data =~ s/$entity/$self->{ENTITY}->{$entity}/g; + } + + while($data ne "") + { + my ($att) = ($data =~ /^\s*(\S+)/); + $data =~ s/^\s*\S+\s*//; + + my $value; + if ($data =~ /^\(/) + { + $value = $self->getgrouping($data); + $data = substr($data,length($value)+1,length($data)); + $data =~ s/^\s*//; + $self->{ATTLIST}->{$tag}->{$att}->{type} = "list"; + foreach my $val (split(/\s*\|\s*/,substr($value,1,length($value)-2))) { +$self->{ATTLIST}->{$tag}->{$att}->{value}->{$val} = 1; + } + } + else + { + ($value) = ($data =~ /^(\S+)/); + $data =~ s/^\S+\s*//; + $self->{ATTLIST}->{$tag}->{$att}->{type} = $value; + } + + my $default; + if ($data =~ /^\"|^\'/) + { + my($sq,$val) = ($data =~ /^(\"|\')([^\"\']*)\1/); + $default = $val; + $data =~ s/^$sq$val$sq\s*//; + } + else + { + ($default) = ($data =~ /^(\S+)/); + $data =~ s/^\S+\s*//; + } + + $self->{ATTLIST}->{$tag}->{$att}->{default} = $default; + } +} + + + +sub getgrouping +{ + my $self = shift; + my ($data) = @_; + + my $count = 0; + my $parens = 0; + foreach my $char (split("",$data)) + { + $parens++ if ($char eq "("); + $parens-- if ($char eq ")"); + $count++; + last if ($parens == 0); + } + return substr($data,0,$count); +} + + +sub groupinglist +{ + my $self = shift; + my ($grouping,$seperator) = @_; + + my @list; + my $item = ""; + my $parens = 0; + my $word = ""; + $grouping = substr($grouping,1,length($grouping)-2) if ($grouping =~ /^\(/); + foreach my $char (split("",$grouping)) + { + $parens++ if ($char eq "("); + $parens-- if ($char eq ")"); + if (($parens == 0) && ($char eq $seperator)) + { + push(@list,$word); + $word = ""; + } + else + { + $word .= $char; + } + } + push(@list,$word) unless ($word eq ""); + return @list; +} + + +sub root +{ + my $self = shift; + my $tag = shift; + my @root; + foreach my $tag (keys(%{$self->{COUNTER}})) + { + push(@root,$tag) if ($self->{COUNTER}->{$tag} == 0); + } + + print "ERROR: Too many root tags... Check the DTD...\n" + if ($#root > 0); + return $root[0]; +} + + +sub children +{ + my $self = shift; + my ($tag,$tree) = @_; + + return unless exists ($self->{CHILDREN}->{$tag}); + return if (exists($self->{CHILDREN}->{$tag}->{EMPTY})); + if (defined($tree)) + { + my @current; + foreach my $current (&XML::Stream::GetXMLData("tree array",$tree,"*","","")) + { + push(@current,$$current[0]); + } + return $self->allowedchildren($self->{ELEMENT}->{$tag},\@current); + } + return $self->allowedchildren($self->{ELEMENT}->{$tag}); +} + + +sub allowedchildren +{ + my $self = shift; + my ($dstr,$current) = @_; + + my @allowed; + + if ($dstr->{type} eq "element") + { + my $test = (defined($current) && $#{@{$current}} > -1) ? $$current[0] : ""; + shift(@{$current}) if ($dstr->{element} eq $test); + if ($self->repeatcheck($dstr,$test) == 1) + { + return $dstr->{element}; + } + } + else + { + foreach my $index (0..$#{$dstr->{list}}) + { + push(@allowed,$self->allowedchildren($dstr->{list}->[$index],$current)); + } + } + + return @allowed; +} + + +sub repeatcheck +{ + my $self = shift; + my ($dstr,$tag) = @_; + + $dstr = $self->{ELEMENT}->{$dstr} if exists($self->{ELEMENT}->{$dstr}); + +# print "repeatcheck: tag($tag)\n"; +# print "repeatcheck: repeat($dstr->{repeat})\n" +# if exists($dstr->{repeat}); + + my $return = 0; + $return = ((!defined($tag) || + ($tag eq $dstr->{element})) ? + 0 : + 1) + if (!exists($dstr->{repeat}) || + ($dstr->{repeat} eq "?")); + $return = ((defined($tag) || + (exists($dstr->{ordered}) && + ($dstr->{ordered} eq "yes"))) ? + 1 : + 0) + if (exists($dstr->{repeat}) && + (($dstr->{repeat} eq "+") || + ($dstr->{repeat} eq "*"))); + +# print "repeatcheck: return($return)\n"; + return $return; +} + + +sub required +{ + my $self = shift; + my ($dstr,$tag,$count) = @_; + + $dstr = $self->{ELEMENT}->{$dstr} if exists($self->{ELEMENT}->{$dstr}); + + if ($dstr->{type} eq "element") + { + return 0 if ($dstr->{element} ne $tag); + return 1 if !exists($dstr->{repeat}); + return 1 if (($dstr->{repeat} eq "+") && ($count == 1)) ; + } + else + { + return 0 if (($dstr->{repeat} eq "*") || ($dstr->{repeat} eq "?")); + my $test = 0; + foreach my $index (0..$#{$dstr->{list}}) + { + $test = $test | $self->required($dstr->{list}->[$index],$tag,$count); + } + return $test; + } + return 0; +} + + +sub addchild +{ + my $self = shift; + my ($tag,$child,$tree) = @_; + +# print "addchild: tag($tag) child($child)\n"; + + my @current; + if (defined($tree)) + { +# &Net::Jabber::printData("\$tree",$tree); + + @current = &XML::Stream::GetXMLData("index array",$tree,"*","",""); + +# &Net::Jabber::printData("\$current",\@current); + } + + my @newBranch = $self->addchildrecurse($self->{ELEMENT}->{$tag},$child,\@current); + + return $tree unless ("@newBranch" ne ""); + +# &Net::Jabber::printData("\$newBranch",\@newBranch); + + my $location = shift(@newBranch); + + if ($location eq "end") + { + splice(@{$$tree[1]},@{$$tree[1]},0,@newBranch); + } + else + { + splice(@{$$tree[1]},$location,0,@newBranch); + } + return $tree; +} + + +sub addcdata +{ + my $self = shift; + my ($tag,$child,$tree) = @_; + +# print "addchild: tag($tag) child($child)\n"; + + my @current; + if (defined($tree)) + { +# &Net::Jabber::printData("\$tree",$tree); + + @current = &XML::Stream::GetXMLData("index array",$tree,"*","",""); + +# &Net::Jabber::printData("\$current",\@current); + } + + my @newBranch = $self->addchildrecurse($self->{ELEMENT}->{$tag},$child,\@current); + + return $tree unless ("@newBranch" ne ""); + +# &Net::Jabber::printData("\$newBranch",\@newBranch); + + my $location = shift(@newBranch); + + if ($location eq "end") + { + splice(@{$$tree[1]},@{$$tree[1]},0,@newBranch); + } + else + { + splice(@{$$tree[1]},$location,0,@newBranch); + } + return $tree; +} + + +sub addchildrecurse +{ + my $self = shift; + my ($dstr,$child,$current) = @_; + +# print "addchildrecurse: child($child) type($dstr->{type})\n"; + + if ($dstr->{type} eq "element") + { +# print "addchildrecurse: tag($dstr->{element})\n"; + my $count = 0; + while(($#{@{$current}} > -1) && ($dstr->{element} eq $$current[0])) + { + shift(@{$current}); + shift(@{$current}); + $count++; + } + if (($dstr->{element} eq $child) && + ($self->repeatcheck($dstr,(($count > 0) ? $child : "")) == 1)) + { + my @return = ( "end" , $self->newbranch($child)); + @return = ($$current[1], $self->newbranch($child)) + if ($#{@{$current}} > -1); +# print "addchildrecurse: Found the spot! (",join(",",@return),")\n"; + + return @return; + } + } + else + { + foreach my $index (0..$#{$dstr->{list}}) + { + my @newBranch = $self->addchildrecurse($dstr->{list}->[$index],$child,$current); + return @newBranch if ("@newBranch" ne ""); + } + } +# print "Let's blow....\n"; + return; +} + + +sub deletechild +{ + my $self = shift; + my ($tag,$parent,$parenttree,$tree) = @_; + + return $tree unless exists($self->{ELEMENT}->{$tag}); + return $tree if $self->required($parent,$tag,&XML::Stream::GetXMLData("count",$parenttree,$tag)); + + return []; +} + + + +sub newbranch +{ + my $self = shift; + my $tag = shift; + + $tag = $self->root() unless defined($tag); + + my @tree = (); + + return ("0","") if ($tag eq "#PCDATA"); + + push(@tree,$tag); + push(@tree,[ {} ]); + + foreach my $att ($self->attribs($tag)) + { + $tree[1]->[0]->{$att} = "" + if (($self->{ATTLIST}->{$tag}->{$att}->{default} eq "#REQUIRED") && + ($self->{ATTLIST}->{$tag}->{$att}->{type} eq "CDATA")); + } + + push(@{$tree[1]},$self->recursebranch($self->{ELEMENT}->{$tag})); + return @tree; +} + + +sub recursebranch +{ + my $self = shift; + my $dstr = shift; + + my @tree; + if (($dstr->{type} eq "element") && + ($dstr->{element} ne "EMPTY")) + { + @tree = $self->newbranch($dstr->{element}) + if (!exists($dstr->{repeat}) || + ($dstr->{repeat} eq "+")); + } + else + { + foreach my $index (0..$#{$dstr->{list}}) + { + push(@tree,$self->recursebranch($dstr->{list}->[$index])) +if (!exists($dstr->{repeat}) || + ($dstr->{repeat} eq "+")); + } + } + return @tree; +} + + +sub attribs +{ + my $self = shift; + my ($tag,$tree) = @_; + + return unless exists ($self->{ATTLIST}->{$tag}); + + if (defined($tree)) + { + my %current = &XML::Stream::GetXMLData("attribs",$tree,"","",""); + return $self->allowedattribs($tag,\%current); + } + return $self->allowedattribs($tag); +} + + +sub allowedattribs +{ + my $self = shift; + my ($tag,$current) = @_; + + my %allowed; + foreach my $att (keys(%{$self->{ATTLIST}->{$tag}})) + { + $allowed{$att} = 1 unless (defined($current) && + exists($current->{$att})); + } + return sort {$a cmp $b} keys(%allowed); +} + + +sub attribvalue +{ + my $self = shift; + my $tag = shift; + my $att = shift; + + return $self->{ATTLIST}->{$tag}->{$att}->{type} + if ($self->{ATTLIST}->{$tag}->{$att}->{type} ne "list"); + return sort {$a cmp $b} keys(%{$self->{ATTLIST}->{$tag}->{$att}->{value}}); +} + + +sub addattrib +{ + my $self = shift; + my ($tag,$att,$tree) = @_; + + return $tree unless exists($self->{ATTLIST}->{$tag}); + return $tree unless exists($self->{ATTLIST}->{$tag}->{$att}); + + my $default = $self->{ATTLIST}->{$tag}->{$att}->{default}; + $default = "" if ($default eq "#REQUIRED"); + $default = "" if ($default eq "#IMPLIED"); + + $$tree[1]->[0]->{$att} = $default; + + return $tree; +} + + +sub attribrequired +{ + my $self = shift; + my ($tag,$att) = @_; + + return 0 unless exists($self->{ATTLIST}->{$tag}); + return 0 unless exists($self->{ATTLIST}->{$tag}->{$att}); + + return 1 if ($self->{ATTLIST}->{$tag}->{$att}->{default} eq "#REQUIRED"); + return 0; +} + + +sub deleteattrib +{ + my $self = shift; + my ($tag,$att,$tree) = @_; + + return $tree unless exists($self->{ATTLIST}->{$tag}); + return $tree unless exists($self->{ATTLIST}->{$tag}->{$att}); + + return if $self->attribrequired($tag,$att); + + delete($$tree[1]->[0]->{$att}); + + return $tree; +} + |
