diff options
| author | Andreas Brachold <vdr07@deltab.de> | 2007-08-13 18:41:27 +0000 |
|---|---|---|
| committer | Andreas Brachold <vdr07@deltab.de> | 2007-08-13 18:41:27 +0000 |
| commit | bcbf441e09fb502cf64924ff2530fa144bdf52c5 (patch) | |
| tree | f377707a2dac078db8cd0c7d7abfe69ac1006d71 /lib/XML | |
| download | xxv-bcbf441e09fb502cf64924ff2530fa144bdf52c5.tar.gz xxv-bcbf441e09fb502cf64924ff2530fa144bdf52c5.tar.bz2 | |
* Move files to trunk
Diffstat (limited to 'lib/XML')
| -rw-r--r-- | lib/XML/Dumper.pm | 897 | ||||
| -rw-r--r-- | lib/XML/Simple.pm | 3041 | ||||
| -rw-r--r-- | lib/XML/Stream.pm | 3268 | ||||
| -rw-r--r-- | lib/XML/Stream/Namespace.pm | 190 | ||||
| -rw-r--r-- | lib/XML/Stream/Node.pm | 944 | ||||
| -rw-r--r-- | lib/XML/Stream/Parser.pm | 567 | ||||
| -rw-r--r-- | lib/XML/Stream/Parser/DTD.pm | 769 | ||||
| -rw-r--r-- | lib/XML/Stream/Tree.pm | 682 | ||||
| -rw-r--r-- | lib/XML/Stream/XPath.pm | 50 | ||||
| -rw-r--r-- | lib/XML/Stream/XPath/Op.pm | 919 | ||||
| -rw-r--r-- | lib/XML/Stream/XPath/Query.pm | 374 | ||||
| -rw-r--r-- | lib/XML/Stream/XPath/Value.pm | 153 |
12 files changed, 11854 insertions, 0 deletions
diff --git a/lib/XML/Dumper.pm b/lib/XML/Dumper.pm new file mode 100644 index 0000000..2d9f740 --- /dev/null +++ b/lib/XML/Dumper.pm @@ -0,0 +1,897 @@ +# ============================================================ +# XML:: +# ____ +# | _ \ _ _ _ __ ___ _ __ ___ _ __ +# | | | | | | | '_ ` _ \| '_ \ / _ \ '__| +# | |_| | |_| | | | | | | |_) | __/ | +# |____/ \__,_|_| |_| |_| .__/ \___|_| +# |_| +# Perl module for dumping Perl objects from/to XML +# ============================================================ + +=head1 NAME + +XML::Dumper - Perl module for dumping Perl objects from/to XML + +=head1 SYNOPSIS + + # ===== OO-way + use XML::Dumper; + $dump = new XML::Dumper; + + $xml = $dump->pl2xml( $perl ); + $perl = $dump->xml2pl( $xml ); + $dump->pl2xml( $perl, "my_perl_data.xml.gz" ); + + # ===== Functional way + use XML::Dumper; + + $xml = pl2xml( $perl ); + $perl = xml2pl( $xml ); + +=head1 EXTENDED SYNOPSIS + + use XML::Dumper; + my $dump = new XML::Dumper; + + my $perl = ''; + my $xml = ''; + + # ===== Convert Perl code to XML + $perl = [ + { + fname => 'Fred', + lname => 'Flintstone', + residence => 'Bedrock' + }, + { + fname => 'Barney', + lname => 'Rubble', + residence => 'Bedrock' + } + ]; + $xml = $dump->pl2xml( $perl ); + + # ===== Dump to a file + my $file = "dump.xml"; + $dump->pl2xml( $perl, $file ); + + # ===== Convert XML to Perl code + $xml = q| + <perldata> + <arrayref> + <item key="0"> + <hashref> + <item key="fname">Fred</item> + <item key="lname">Flintstone</item> + <item key="residence">Bedrock</item> + </hashref> + </item> + <item key="1"> + <hashref> + <item key="fname">Barney</item> + <item key="lname">Rubble</item> + <item key="residence">Bedrock</item> + </hashref> + </item> + </arrayref> + </perldata> + |; + + my $perl = $dump->xml2pl( $xml ); + + # ===== Convert an XML file to Perl code + my $perl = $dump->xml2pl( $file ); + + # ===== And serialize Perl code to an XML file + $dump->pl2xml( $perl, $file ); + + # ===== USE COMPRESSION + $dump->pl2xml( $perl, $file.".gz" ); + + # ===== INCLUDE AN IN-DOCUMENT DTD + $dump->dtd; + my $xml_with_dtd = $dump->pl2xml( $perl ); + + # ===== USE EXTERNAL DTD + $dump->dtd( $file, $url ); + my $xml_with_link_to_dtd = $dump->pl2xml( $perl ); + +=head1 DESCRIPTION + +XML::Dumper dumps Perl data to XML format. XML::Dumper can also read XML data +that was previously dumped by the module and convert it back to Perl. You can +use the module read the XML from a file and write the XML to a file. Perl +objects are blessed back to their original packaging; if the modules are +installed on the system where the perl objects are reconstituted from xml, they +will behave as expected. Intuitively, if the perl objects are converted and +reconstituted in the same environment, all should be well. And it is. + +Additionally, because XML benefits so nicely from compression, XML::Dumper +understands gzipped XML files. It does so with an optional dependency on +Compress::Zlib. So, if you dump a Perl variable with a file that has an +extension of '.xml.gz', it will store and compress the file in gzipped format. +Likewise, if you read a file with the extension '.xml.gz', it will uncompress +the file in memory before parsing the XML back into a Perl variable. + +Another fine challenge that this module rises to meet is that it understands +circular definitions and multiple references to a single object. This includes +doubly-linked lists, circular references, and the so-called 'Flyweight' pattern of +Object Oriented programming. So it can take the gnarliest of your perl data, and +should do just fine. + +=head2 FUNCTIONS AND METHODS + +=over 4 + +=cut + +package XML::Dumper; + +require 5.005_62; +use strict; +use warnings; + +require Exporter; +use XML::Parser; +use overload; + +our @ISA = qw( Exporter ); +our %EXPORT_TAGS = ( ); +our @EXPORT_OK = ( ); +our @EXPORT = qw( xml2pl pl2xml xml_compare xml_identity ); +our $VERSION = '0.79'; + +our $COMPRESSION_AVAILABLE; + +BEGIN { + eval { require Compress::Zlib; }; + if( $@ ) { + $COMPRESSION_AVAILABLE = 0; + } else { + $COMPRESSION_AVAILABLE = 1; + } +} + +our $dump = new XML::Dumper; + +# ============================================================ +sub new { +# ============================================================ + +=item * new() - XML::Dumper constructor. + +Creates a lean, mean, XML dumping machine. It's also completely +at your disposal. + +=cut + +# ------------------------------------------------------------ + my ($class) = map { ref || $_ } shift; + my $self = bless {}, $class; + + $self->init; + + return $self; +} + +# ============================================================ +sub init { +# ============================================================ + my $self = shift; + $self->{ perldata } = {}; + $self->{ xml } = {}; + 1; +} + +# ============================================================ +sub dtd { +# ============================================================ + +=item * dtd - + +Generates a Document Type Dictionary for the 'perldata' data +type. The default behaviour is to embed the DTD in the XML, +thereby creating valid XML. Given a filename, the DTD will be +written out to that file and the XML document for your Perl data +will link to the file. Given a filename and an URL, the DTD will +be written out the file and the XML document will link to the URL. +XML::Dumper doesn't try really hard to determine where your DTD's +ought to go or relative paths or anything, so be careful with +what arguments you supply this method, or just go with the default +with the embedded DTD. Between DTD's and Schemas, the potential +for more free-form data to be imported and exported becomes +feasible. + +Usage: + + dtd(); # Causes XML to include embedded DTD + dtd( $file ); # DTD saved to $file; XML will link to $file + dtd( $file, $url ); # DTD saved to $file; XML will link to $url + dtd( 0 ); # Prevents XML from including embedded DTD + +=cut + +# ------------------------------------------------------------ + my $self = ( ref $_[0] && (ref $_[0]) =~ /XML::Dumper/ ) ? shift : $dump; + my $file = shift; + my $url = shift; + + my $dtd = qq{<!ELEMENT scalar (#PCDATA)> +<!ELEMENT scalarref (#PCDATA)> +<!ATTLIST scalarref + blessed_package CDATA #IMPLIED + memory_address CDATA #IMPLIED> +<!ELEMENT arrayref (item*)> +<!ATTLIST arrayref + blessed_package CDATA #IMPLIED + memory_address CDATA #IMPLIED> +<!ELEMENT hashref (item*)> +<!ATTLIST hashref + blessed_package CDATA #IMPLIED + memory_address CDATA #IMPLIED> +<!ELEMENT item (#PCDATA|scalar|scalarref|arrayref|hashref)*> +<!ATTLIST item + key CDATA #REQUIRED + defined CDATA #IMPLIED> +<!ELEMENT perldata (scalar|scalarref|arrayref|hashref)*> +}; + + if( defined $file && $file ) { + open DTD, ">$file" or die $!; + print DTD $dtd; + close DTD; + $url = defined $url ? $url : $file; + $self->{ dtd } = qq{ +<!DOCTYPE perldata SYSTEM "$url"> +}; + } elsif( not defined $file ) { + $self->{ dtd } = join( "\n", + "<?xml version=\"1.0\"?>", + "<!DOCTYPE perldata [", + ( map { /^\t/ ? $_ : " $_" } split /\n/, $dtd ), + ']>', + ''); + } else { + delete $self->{ dtd }; + return; + } + + $self->{ dtd }; +} + +# ============================================================ +sub dump { +# ============================================================ + my $self = shift; + my $ref = shift; + my $indent = shift; + + my $string = ''; + + # ===== HANDLE REFERENCE DUMPING + if( ref $ref ) { + no warnings; + local $_ = ref( $ref ); + my $class = ''; + my $address = ''; + my $reused = ''; + + # ===== HANDLE THE VARIETY OF THINGS A PERL REFERENCE CAN REFER TO + REFERENCE: { + # ---------------------------------------- + OBJECT: { + # ---------------------------------------- + last OBJECT if /^(?:SCALAR|HASH|ARRAY)$/; + $class = $_; + $class = xml_escape( $class ); + ($_,$address) = overload::StrVal( $ref ) =~ /$class=([^(]+)\(([x0-9A-Fa-f]+)\)/; + } + + # ---------------------------------------- + HAS_MEMORY_ADDRESS: { + # ---------------------------------------- + # References which refer to the same memory space point to the + # same thing + last HAS_MEMORY_ADDRESS if( $class ); + ($_,$address) = overload::StrVal( $ref ) =~ /([^(]+)\(([x0-9A-Fa-f]+)\)/; + } + + $reused = exists( $self->{ xml }{ $address } ); + + # ---------------------------------------- + if( /^SCALAR$/ ) { + # ---------------------------------------- + my $type = + "<scalarref". + ($class ? " blessed_package=\"$class\"" : '' ) . + ($address ? " memory_address=\"$address\"" : '' ) . + ( defined $$ref ? '' : " defined=\"false\"" ) . + ">"; + $self->{ xml }{ $address }++ if( $address ); + $string = "\n" . " " x $indent . $type . ($reused ? '' : xml_escape($$ref)) . "</scalarref>"; + last REFERENCE; + } + + # ---------------------------------------- + if( /^HASH$/ ) { + # ---------------------------------------- + $self->{ xml }{ $address }++ if( $address ); + my $type = + "<hashref". + ($class ? " blessed_package=\"$class\"" : '' ). + ($address && $self->{ xml }{ $address } ? " memory_address=\"$address\"" : '' ). + ">"; + $string = "\n" . " " x $indent . $type; + if( not $reused ) { + $indent++; + foreach my $key (sort keys(%$ref)) { + my $type = + "<item " . + "key=\"" . xml_escape( $key ) . "\"" . + ( defined $ref->{ $key } ? '' : " defined=\"false\"" ) . + ">"; + $string .= "\n" . " " x $indent . $type; + if (ref($ref->{$key})) { + $string .= $self->dump( $ref->{$key}, $indent+1); + $string .= "\n" . " " x $indent . "</item>"; + } else { + $string .= xml_escape($ref->{$key}) . "</item>"; + } + } + $indent--; + } + $string .= "\n" . " " x $indent . "</hashref>"; + last REFERENCE; + } + + # ---------------------------------------- + if( /^ARRAY$/ ) { + # ---------------------------------------- + my $type = + "<arrayref". + ($class ? " blessed_package=\"$class\"" : '' ). + ($address ? " memory_address=\"$address\"" : '' ). + ">"; + $string .= "\n" . " " x $indent . $type; + $self->{ xml }{ $address }++ if( $address ); + if( not $reused ) { + $indent++; + for (my $i=0; $i < @$ref; $i++) { + my $defined; + my $type = + "<item " . + "key=\"" . xml_escape( $i ) . "\"" . + ( defined $ref->[ $i ] ? '' : " defined=\"false\"" ) . + ">"; + + $string .= "\n" . " " x $indent . $type; + if (ref($ref->[$i])) { + $string .= $self->dump($ref->[$i], $indent+1); + $string .= "\n" . " " x $indent . "</item>"; + } else { + $string .= xml_escape($ref->[$i]) . "</item>"; + } + } + $indent--; + } + $string .= "\n" . " " x $indent . "</arrayref>"; + last REFERENCE; + } + + } + + # ===== HANDLE SCALAR DUMPING + } else { + my $type = + "<scalar". + ( defined $ref ? '' : " defined=\"false\"" ) . + ">"; + + $string .= "\n" . " " x $indent . $type . xml_escape( $ref ) . "</scalar>"; + } + + return( $string ); +} + +# ============================================================ +sub perl2xml { +# ============================================================ + pl2xml( @_ ); +} + +# ============================================================ +sub pl2xml { +# ============================================================ + +=item * pl2xml( $xml, [ $file ] ) - + +(Also perl2xml(), for those who enjoy readability over brevity). + +Converts Perl data to XML. If a second argument is given, then the Perl data +will be stored to disk as XML, using the second argument as a filename. + +Usage: See Synopsis + +=cut + +# ------------------------------------------------------------ + my $self = ( ref $_[0] && (ref $_[0]) =~ /XML::Dumper/ ) ? shift : $dump; + my $ref = shift; + my $file = shift; + + $self->init; + + my $xml = + ( defined $self->{ dtd } ? $self->{ dtd } : '' ) . + "<perldata>" . $self->dump( $ref, 1 ) . "\n</perldata>\n"; + + if( defined $file ) { + if( $file =~ /\.xml\.gz$/i ) { + if( $COMPRESSION_AVAILABLE ) { + my $compressed_xml = Compress::Zlib::memGzip( $xml ) or die "Failed to compress xml $!"; + open FILE, ">:utf8", $file or die "Can't open '$file' for writing $!"; + binmode FILE; + print FILE $compressed_xml; + close FILE; + + } else { + my $uncompressed_file = $file; + $uncompressed_file =~ s/\.gz$//i; + warn "Compress::Zlib not installed. Saving '$file' as '$uncompressed_file'\n"; + + open FILE, ">:utf8", $uncompressed_file or die "Can't open '$uncompressed_file' for writing $!"; + print FILE $xml; + close FILE; + } + } else { + no warnings; # to shut Perl up about Wide characters for UTF8 output + open FILE, ">$file" or die "Can't open '$file' for writing $!"; + print FILE $xml; + close FILE; + } + } + return $xml; +} + +# ============================================================ +sub undump { +# ============================================================ +# undump +# Takes the XML generated by pl2xml, and recursively undumps it to +# create a data structure in memory. The top-level object is a scalar, +# a reference to a scalar, a hash, or an array. Hashes and arrays may +# themselves contain scalars, or references to scalars, or references to +# hashes or arrays, with the exception that scalar values are never +# "undef" because there's currently no way to represent undef in the +# dumped data. +# +# The key to understanding undump is to understand XML::Parser's +# Tree parsing format: +# +# <tag name>, [ { <attributes }, '0', <[text]>, <[children tag-array pair value(s)]...> ] +# ------------------------------------------------------------ + my $self = shift; + my $tree = shift; + my $callback = shift; + + my $ref = undef; + my $item; + + # make Perl stop whining about deep recursion and soft references + no warnings; + + TREE: for (my $i = 1; $i < $#$tree; $i+=2) { + local $_ = lc( $tree->[ $i ] ); + my $class = ''; + my $address = ''; + + PERL_TYPES: { + # ---------------------------------------- + if( /^scalar$/ ) { + # ---------------------------------------- + $ref = defined $tree->[ $i+1 ][ 2 ] ? $tree->[ $i +1 ][ 2 ] : ''; + if( exists $tree->[ $i+1 ][ 0 ]{ 'defined' } ) { + if( $tree->[ $i +1 ][ 0 ]{ 'defined' } =~ /false/i ) { + $ref = undef; + } + } + last TREE; + } + + # ===== FIND PACKAGE + if( $tree->[ $i+1 ] && ref( $tree->[ $i +1 ] ) eq 'ARRAY' ) { + if( exists $tree->[ $i+1 ][0]{ blessed_package } ) { + $class = $tree->[ $i+1 ][ 0 ]{ blessed_package }; + } + } + + # ===== FIND MEMORY ADDRESS + if( $tree->[ $i+1 ] && ref( $tree->[ $i +1 ] ) eq 'ARRAY' ) { + if( exists $tree->[ $i+1 ][0]{ memory_address } ) { + $address = $tree->[ $i+1 ][ 0 ]{ memory_address }; + } + } + + ALREADY_EXISTS_IN_MEMORY: { + if( exists $self->{ perldata }{ $address } ) { + $ref = $self->{ perldata }{ $address }; + last TREE; + } + } + + # ---------------------------------------- + if( /^scalarref/ ) { + # ---------------------------------------- + $ref = defined $tree->[ $i+1 ][ 2 ] ? \ $tree->[ $i +1 ][ 2 ] : \''; + if( exists $tree->[ $i+1 ][ 0 ]{ 'defined' } ) { + if( $tree->[ $i +1 ][ 0 ]{ 'defined' } =~ /false/i ) { + $ref = \ undef; + } + } + + $self->{ perldata }{ $address } = $ref if( $address ); + if( $class ) { + # Check class name for nasty stuff... + $class =~ m/^[\w-]+(?:::[\w-]+)*$/ + or die "Refusing to load unsafe class name '$class'\n"; + + unless( int( eval( "\%$class"."::")) ) { + eval "require $class;"; + if( $@ ) { + warn $@; + } + } + + bless $ref, $class; + if( defined $callback && $ref->can( $callback ) ) { + $ref->$callback(); + } + } + last TREE; + } + + # ---------------------------------------- + if( /^hash(?:ref)?/ ) { + # ---------------------------------------- + $ref = {}; + $self->{ perldata }{ $address } = $ref if( $address ); + for (my $j = 1; $j < $#{$tree->[$i+1]}; $j+=2) { + next unless $tree->[$i+1][$j] eq 'item'; + my $item_tree = $tree->[$i+1][$j+1]; + if( exists $item_tree->[0]{ key } ) { + my $key = $item_tree->[ 0 ]{ key }; + if( exists $item_tree->[ 0 ]{ 'defined' } ) { + if( $item_tree->[ 0 ]{ 'defined' } =~ /false/ ) { + $ref->{ $key } = undef; + next; + } + } + # ===== XML::PARSER IGNORES ZERO-LENGTH STRINGS + # It indicates the presence of a zero-length string by + # not having the array portion of the tag-name/array pair + # values be of length 1. (Which is to say it captures only + # the attributes of the tag and acknowledges that the tag + # is an empty one. + if( int( @{ $item_tree } ) == 1 ) { + $ref->{ $key } = ''; + next; + } + $ref->{ $key } = $self->undump( $item_tree, $callback ); + } + } + if( $class ) { + # Check class name for nasty stuff... + $class =~ m/^[\w-]+(?:::[\w-]+)*$/ + or die "Refusing to load unsafe class name '$class'\n"; + + unless( int( eval( "\%$class"."::")) ) { + eval "require $class;"; + if( $@ ) { + warn $@; + } + } + + bless $ref, $class; + if( defined $callback && $ref->can( $callback ) ) { + $ref->$callback(); + } + } + last TREE; + } + + # ---------------------------------------- + if( /^arrayref/ ) { + # ---------------------------------------- + $ref = []; + $self->{ perldata }{ $address } = $ref if( $address ); + for (my $j = 1; $j < $#{$tree->[$i+1]}; $j+=2) { + next unless $tree->[$i+1][$j] eq 'item'; + my $item_tree = $tree->[$i+1][$j+1]; + if( exists $item_tree->[0]{ key } ) { + my $key = $item_tree->[0]{ key }; + if( exists $item_tree->[ 0 ]{ 'defined' } ) { + if( $item_tree->[ 0 ]{ 'defined' } =~ /false/ ) { + $ref->[ $key ] = undef; + next; + } + } + # ===== XML::PARSER IGNORES ZERO-LENGTH STRINGS + # See note above. + if( int( @{ $item_tree } ) == 1 ) { + $ref->[ $key ] = ''; + next; + } + $ref->[ $key ] = $self->undump( $item_tree, $callback ); + } + } + if( $class ) { + # Check class name for nasty stuff... + $class =~ m/^[\w-]+(?:::[\w-]+)*$/ + or die "Refusing to load unsafe class name '$class'\n"; + + unless( int( eval( "\%$class"."::")) ) { + eval "require $class;"; + if( $@ ) { + warn $@; + } + } + + bless $ref, $class; + if( defined $callback && $ref->can( $callback ) ) { + $ref->$callback(); + } + } + last TREE; + } + + # ---------------------------------------- + if( /^0$/ ) { # SIMPLE SCALAR + # ---------------------------------------- + $item = $tree->[$i + 1]; + } + } + } + + ## If $ref is not set at this point, it means we've just + ## encountered a scalar value directly inside the item tag. + + $ref = $item unless defined( $ref ); + + return ($ref); +} + +# ============================================================ +sub xml_escape { +# ============================================================ +# Transforms and filters input characters to acceptable XML characters +# (or filters them out completely). There's probably a better +# implementation of this in another module, by now. +# ------------------------------------------------------------ + local $_ = shift; + return $_ if not defined $_; + s/&/&/g; + s/</</g; + s/>/>/g; + s/[\0\ca\cb\cc\cd\ce\cf\cg\ch\ck\cl\cn\co\cp\cq\cr\cs\ct\cu\cv\cw\cx\cy\cz\c[\c\\c]\c^\c_]//g; + s/'/'/g; + s/"/"/g; + return $_; +} + +# ============================================================ +sub xml2perl { +# ============================================================ + xml2pl( @_ ); +} + +# ============================================================ +sub xml2pl { +# ============================================================ + +=item * xml2pl( $xml_or_filename, [ $callback ] ) - + +(Also xml2perl(), for those who enjoy readability over brevity.) + +Converts XML to a Perl datatype. If this method is given a second argument, +XML::Dumper will use the second argument as a callback (if possible). If +the first argument isn't XML and exists as a file, that file will be read +and its contents will be used as the input XML. + +Currently, the only supported invocation of callbacks is through soft +references. That is to say, the callback argument ought to be a string +that matches the name of a callable method for your classes. If you have +a congruent interface, this should work like a peach. If your class +interface doesn't have such a named method, it won't be called. + +=cut + +# ------------------------------------------------------------ + my $self = ( ref $_[0] && (ref $_[0]) =~ /XML::Dumper/) ? shift : $dump; + my $xml = shift; + my $callback = shift; + + $self->init; + + if( $xml !~ /\</ ) { + my $file = $xml; + if( -e $file ) { + my $gzip_header_signature = pack "H4", "1f8b"; + my $first_two_bytes; + + open FILE, "<". $file or die "Can't open '$file' for reading $!"; + defined read FILE, $first_two_bytes, 2 or die "Can't read first two bytes of '$file' $!"; + close FILE; + + if( $first_two_bytes eq $gzip_header_signature ) { + if( $COMPRESSION_AVAILABLE ) { + my $gz = Compress::Zlib::gzopen( $file, "rb" ); + my @xml; + my $buffer; + while( $gz->gzread( $buffer ) > 0 ) { + push @xml, $buffer; + } + $gz->gzclose(); + $xml = join "", @xml; + + } else { + die "Compress::Zlib is not installed. Cannot read gzipped file '$file'"; + } + } else { + + open FILE, $file or die "Can't open file '$file' for reading $!"; + my @xml = <FILE>; + close FILE; + $xml = join "", @xml; + } + + } else { + die "'$file' does not exist as a file and is not XML.\n"; + } + } + + my $parser = new XML::Parser(Style => 'Tree'); + my $tree = $parser->parse($xml); + + # Skip enclosing "perldata" level + my $topItem = $tree->[1]; + my $ref = $self->undump($topItem, $callback); + + return($ref); +} + +# ============================================================ +sub xml_compare { +# ============================================================ + +=item * xml_compare( $xml1, $xml2 ) - Compares xml for content + +Compares two dumped Perl data structures (that is, compares the xml) for +identity in content. Use this function rather than perl's built-in string +comparison. This function will return true for any two perl data that are +either deep clones of each other, or identical. This method is exported +by default. + +=cut + +# ------------------------------------------------------------ + my $self = shift; + my $xml1 = shift; + my $xml2 = shift; + + my $class = ref $self; + if( $class ne 'XML::Dumper' ) { + $xml2 = $xml1; + $xml1 = $self; + } + + $xml1 =~ s/(<[^>]*)\smemory_address="\dx[A-Za-z0-9]+"([^<]*>)/$1$2/g; + $xml2 =~ s/(<[^>]*)\smemory_address="\dx[A-Za-z0-9]+"([^<]*>)/$1$2/g; + $xml1 =~ s/(<[^>]*)\sdefined=\"false\"([^<]>)/$1$2/g; # For backwards + $xml2 =~ s/(<[^>]*)\sdefined=\"false\"([^<]>)/$1$2/g; # compatibility + $xml1 =~ s/<\?xml .*>//; # Ignore XML declaration + $xml2 =~ s/<\?xml .*>//; + $xml1 =~ s/<\!DOCTYPE perldata \[.*\]>//s; # Remove DTD + $xml2 =~ s/<\!DOCTYPE perldata \[.*\]>//s; + $xml1 =~ s/^\s*</</; # Remove empty space + $xml2 =~ s/^\s*</</; + $xml1 =~ s/>\s*</></g; + $xml2 =~ s/>\s*</></g; + $xml1 =~ s/>\s*$/>/; + $xml2 =~ s/>\s*$/>/; + + return $xml1 eq $xml2; +} + +# ============================================================ +sub xml_identity { +# ============================================================ + +=item * xml_identity( $xml1, $xml2 ) - Compares xml for identity + +Compares two dumped Perl data structures (that is, compares the xml) for +identity in instantiation. This function will return true for any two +perl data that are identical, but not for deep clones of each other. This +method is also exported by default. + +=cut + +# ------------------------------------------------------------ + my $self = shift; + my $xml1 = shift; + my $xml2 = shift; + + my $class = ref $self; + if( $class ne 'XML::Dumper' ) { + $xml2 = $xml1; + $xml1 = $self; + } + + return ( $xml1 eq $xml2 ); +} + +1; +__END__ + +=back + +=head1 EXPORTS + +By default, the following methods are exported: + + xml2pl, pl2xml, xml_compare, xml_identity + +=head1 BUGS AND DEPENDENCIES + +XML::Dumper has changed API since 0.4, as a response to a bug report +from PerlMonks. I felt it was necessary, as the functions simply didn't +work as advertised. That is, xml2pl really didnt accept xml as an +argument; what it wanted was an XML Parse tree. To correct for the +API change, simply don't parse the XML before feeding it to XML::Dumper. + +XML::Dumper also has no understanding of typeglobs (references or not), +references to regular expressions, or references to Perl subroutines. +Turns out that Data::Dumper doesn't do references to Perl subroutines, +either, so at least I'm in somewhat good company. + +XML::Dumper requires one perl module, available from CPAN + + XML::Parser + +XML::Parser itself relies on Clark Cooper's Expat implementation in Perl, +which in turn requires James Clark's expat package itself. See the +documentation for XML::Parser for more information. + +=head1 REVISIONS AND CREDITS + +The list of credits got so long that I had to move it to the Changes +file. Thanks to all those who've contributed with bug reports and +suggested features! Keep 'em coming! + +I've had ownership of the module since June of 2002, and very much +appreciate requests on how to make the module better. It has served me +well, both as a learning tool on how I can repay my debt to the Perl +Community, and as a practical module that is useful. I'm thrilled to +be able to offer this bit of code. So, if you have suggestions, bug +reports, or feature requests, please let me know and I'll do my best +to make this a better module. + +=head1 CURRENT MAINTAINER + +Mike Wong E<lt>mike_w3@pacbell.netE<gt> + +XML::Dumper is free software. You can redistribute it and/or +modify it under the same terms as Perl itself. + +=head1 ORIGINAL AUTHOR + +Jonathan Eisenzopf E<lt>eisen@pobox.comE<gt> + +=head1 SEE ALSO + +perl(1) +Compress::Zlib(3) +XML::Parser(3) +Data::DumpXML(3) + +=cut diff --git a/lib/XML/Simple.pm b/lib/XML/Simple.pm new file mode 100644 index 0000000..e2216be --- /dev/null +++ b/lib/XML/Simple.pm @@ -0,0 +1,3041 @@ +# $Id: Simple.pm,v 1.23 2005/01/29 04:16:10 grantm Exp $ + +package XML::Simple; + +=head1 NAME + +XML::Simple - Easy API to maintain XML (esp config files) + +=head1 SYNOPSIS + + use XML::Simple; + + my $ref = XMLin([<xml file or string>] [, <options>]); + + my $xml = XMLout($hashref [, <options>]); + +Or the object oriented way: + + require XML::Simple; + + my $xs = new XML::Simple(options); + + my $ref = $xs->XMLin([<xml file or string>] [, <options>]); + + my $xml = $xs->XMLout($hashref [, <options>]); + +(or see L<"SAX SUPPORT"> for 'the SAX way'). + +To catch common errors: + + use XML::Simple qw(:strict); + +(see L<"STRICT MODE"> for more details). + +=cut + +# See after __END__ for more POD documentation + + +# Load essentials here, other modules loaded on demand later + +use strict; +use Carp; +require Exporter; + + +############################################################################## +# Define some constants +# + +use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $PREFERRED_PARSER); + +@ISA = qw(Exporter); +@EXPORT = qw(XMLin XMLout); +@EXPORT_OK = qw(xml_in xml_out); +$VERSION = '2.14'; +$PREFERRED_PARSER = undef; + +my $StrictMode = 0; +my %CacheScheme = ( + storable => [ \&StorableSave, \&StorableRestore ], + memshare => [ \&MemShareSave, \&MemShareRestore ], + memcopy => [ \&MemCopySave, \&MemCopyRestore ] + ); + +my @KnownOptIn = qw(keyattr keeproot forcecontent contentkey noattr + searchpath forcearray cache suppressempty parseropts + grouptags nsexpand datahandler varattr variables + normalisespace normalizespace valueattr); + +my @KnownOptOut = qw(keyattr keeproot contentkey noattr + rootname xmldecl outputfile noescape suppressempty + grouptags nsexpand handler noindent attrindent nosort + valueattr numericescape); + +my @DefKeyAttr = qw(name key id); +my $DefRootName = qq(opt); +my $DefContentKey = qq(content); +my $DefXmlDecl = qq(<?xml version='1.0' standalone='yes'?>); + +my $xmlns_ns = 'http://www.w3.org/2000/xmlns/'; +my $bad_def_ns_jcn = '{' . $xmlns_ns . '}'; # LibXML::SAX workaround + + +############################################################################## +# Globals for use by caching routines +# + +my %MemShareCache = (); +my %MemCopyCache = (); + + +############################################################################## +# Wrapper for Exporter - handles ':strict' +# + +sub import { + + # Handle the :strict tag + + $StrictMode = 1 if grep(/^:strict$/, @_); + + # Pass everything else to Exporter.pm + + __PACKAGE__->export_to_level(1, grep(!/^:strict$/, @_)); +} + + +############################################################################## +# Constructor for optional object interface. +# + +sub new { + my $class = shift; + + if(@_ % 2) { + croak "Default options must be name=>value pairs (odd number supplied)"; + } + + my %known_opt; + @known_opt{@KnownOptIn, @KnownOptOut} = (undef) x 100; + + my %raw_opt = @_; + my %def_opt; + while(my($key, $val) = each %raw_opt) { + my $lkey = lc($key); + $lkey =~ s/_//g; + croak "Unrecognised option: $key" unless(exists($known_opt{$lkey})); + $def_opt{$lkey} = $val; + } + my $self = { def_opt => \%def_opt }; + + return(bless($self, $class)); +} + + +############################################################################## +# Sub/Method: XMLin() +# +# Exported routine for slurping XML into a hashref - see pod for info. +# +# May be called as object method or as a plain function. +# +# Expects one arg for the source XML, optionally followed by a number of +# name => value option pairs. +# + +sub XMLin { + + # If this is not a method call, create an object + + my $self; + if($_[0] and UNIVERSAL::isa($_[0], 'XML::Simple')) { + $self = shift; + } + else { + $self = new XML::Simple(); + } + + + my $string = shift; + + $self->handle_options('in', @_); + + + # If no XML or filename supplied, look for scriptname.xml in script directory + + unless(defined($string)) { + + # Translate scriptname[.suffix] to scriptname.xml + + require File::Basename; + + my($ScriptName, $ScriptDir, $Extension) = + File::Basename::fileparse($0, '\.[^\.]+'); + + $string = $ScriptName . '.xml'; + + + # Add script directory to searchpath + + if($ScriptDir) { + unshift(@{$self->{opt}->{searchpath}}, $ScriptDir); + } + } + + + # Are we parsing from a file? If so, is there a valid cache available? + + my($filename, $scheme); + unless($string =~ m{<.*?>}s or ref($string) or $string eq '-') { + + require File::Basename; + require File::Spec; + + $filename = $self->find_xml_file($string, @{$self->{opt}->{searchpath}}); + + if($self->{opt}->{cache}) { + foreach $scheme (@{$self->{opt}->{cache}}) { + croak "Unsupported caching scheme: $scheme" + unless($CacheScheme{$scheme}); + + my $opt = $CacheScheme{$scheme}->[1]->($filename); + return($opt) if($opt); + } + } + } + else { + delete($self->{opt}->{cache}); + if($string eq '-') { + # Read from standard input + + local($/) = undef; + $string = <STDIN>; + } + } + + + # Parsing is required, so let's get on with it + + my $tree = $self->build_tree($filename, $string); + + + # Now work some magic on the resulting parse tree + + my($ref); + if($self->{opt}->{keeproot}) { + $ref = $self->collapse({}, @$tree); + } + else { + $ref = $self->collapse(@{$tree->[1]}); + } + + if($self->{opt}->{cache}) { + $CacheScheme{$self->{opt}->{cache}->[0]}->[0]->($ref, $filename); + } + + return($ref); +} + + +############################################################################## +# Method: build_tree() +# +# This routine will be called if there is no suitable pre-parsed tree in a +# cache. It parses the XML and returns an XML::Parser 'Tree' style data +# structure (summarised in the comments for the collapse() routine below). +# +# XML::Simple requires the services of another module that knows how to +# parse XML. If XML::SAX is installed, the default SAX parser will be used, +# otherwise XML::Parser will be used. +# +# This routine expects to be passed a 'string' as argument 1 or a filename as +# argument 2. The 'string' might be a string of XML or it might be a +# reference to an IO::Handle. (This non-intuitive mess results in part from +# the way XML::Parser works but that's really no excuse). +# + +sub build_tree { + my $self = shift; + my $filename = shift; + my $string = shift; + + + my $preferred_parser = $PREFERRED_PARSER; + unless(defined($preferred_parser)) { + $preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} || ''; + } + if($preferred_parser eq 'XML::Parser') { + return($self->build_tree_xml_parser($filename, $string)); + } + + eval { require XML::SAX; }; # We didn't need it until now + if($@) { # No XML::SAX - fall back to XML::Parser + if($preferred_parser) { # unless a SAX parser was expressly requested + croak "XMLin() could not load XML::SAX"; + } + return($self->build_tree_xml_parser($filename, $string)); + } + + $XML::SAX::ParserPackage = $preferred_parser if($preferred_parser); + + my $sp = XML::SAX::ParserFactory->parser(Handler => $self); + + $self->{nocollapse} = 1; + my($tree); + if($filename) { + $tree = $sp->parse_uri($filename); + } + else { + if(ref($string)) { + $tree = $sp->parse_file($string); + } + else { + $tree = $sp->parse_string($string); + } + } + + return($tree); +} + + +############################################################################## +# Method: build_tree_xml_parser() +# +# This routine will be called if XML::SAX is not installed, or if XML::Parser +# was specifically requested. It takes the same arguments as build_tree() and +# returns the same data structure (XML::Parser 'Tree' style). +# + +sub build_tree_xml_parser { + my $self = shift; + my $filename = shift; + my $string = shift; + + + eval { + local($^W) = 0; # Suppress warning from Expat.pm re File::Spec::load() + require XML::Parser; # We didn't need it until now + }; + if($@) { + croak "XMLin() requires either XML::SAX or XML::Parser"; + } + + if($self->{opt}->{nsexpand}) { + carp "'nsexpand' option requires XML::SAX"; + } + + my $xp = new XML::Parser(Style => 'Tree', @{$self->{opt}->{parseropts}}); + my($tree); + if($filename) { + # $tree = $xp->parsefile($filename); # Changed due to prob w/mod_perl + local(*XML_FILE); + open(XML_FILE, '<', $filename) || croak qq($filename - $!); + $tree = $xp->parse(*XML_FILE); + close(XML_FILE); + } + else { + $tree = $xp->parse($string); + } + + return($tree); +} + + +############################################################################## +# Sub: StorableSave() +# +# Wrapper routine for invoking Storable::nstore() to cache a parsed data +# structure. +# + +sub StorableSave { + my($data, $filename) = @_; + + my $cachefile = $filename; + $cachefile =~ s{(\.xml)?$}{.stor}; + + require Storable; # We didn't need it until now + + if ('VMS' eq $^O) { + Storable::nstore($data, $cachefile); + } + else { + # If the following line fails for you, your Storable.pm is old - upgrade + Storable::lock_nstore($data, $cachefile); + } + +} + + +############################################################################## +# Sub: StorableRestore() +# +# Wrapper routine for invoking Storable::retrieve() to read a cached parsed +# data structure. Only returns cached data if the cache file exists and is +# newer than the source XML file. +# + +sub StorableRestore { + my($filename) = @_; + + my $cachefile = $filename; + $cachefile =~ s{(\.xml)?$}{.stor}; + + return unless(-r $cachefile); + return unless((stat($cachefile))[9] > (stat($filename))[9]); + + require Storable; # We didn't need it until now + + if ('VMS' eq $^O) { + return(Storable::retrieve($cachefile)); + } + else { + return(Storable::lock_retrieve($cachefile)); + } + +} + + +############################################################################## +# Sub: MemShareSave() +# +# Takes the supplied data structure reference and stores it away in a global +# hash structure. +# + +sub MemShareSave { + my($data, $filename) = @_; + + $MemShareCache{$filename} = [time(), $data]; +} + + +############################################################################## +# Sub: MemShareRestore() +# +# Takes a filename and looks in a global hash for a cached parsed version. +# + +sub MemShareRestore { + my($filename) = @_; + + return unless($MemShareCache{$filename}); + return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]); + + return($MemShareCache{$filename}->[1]); + +} + + +############################################################################## +# Sub: MemCopySave() +# +# Takes the supplied data structure and stores a copy of it in a global hash +# structure. +# + +sub MemCopySave { + my($data, $filename) = @_; + + require Storable; # We didn't need it until now + + $MemCopyCache{$filename} = [time(), Storable::dclone($data)]; +} + + +############################################################################## +# Sub: MemCopyRestore() +# +# Takes a filename and looks in a global hash for a cached parsed version. +# Returns a reference to a copy of that data structure. +# + +sub MemCopyRestore { + my($filename) = @_; + + return unless($MemCopyCache{$filename}); + return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]); + + return(Storable::dclone($MemCopyCache{$filename}->[1])); + +} + + +############################################################################## +# Sub/Method: XMLout() +# +# Exported routine for 'unslurping' a data structure out to XML. +# +# Expects a reference to a data structure and an optional list of option +# name => value pairs. +# + +sub XMLout { + + # If this is not a method call, create an object + + my $self; + if($_[0] and UNIVERSAL::isa($_[0], 'XML::Simple')) { + $self = shift; + } + else { + $self = new XML::Simple(); + } + + croak "XMLout() requires at least one argument" unless(@_); + my $ref = shift; + + $self->handle_options('out', @_); + + + # If namespace expansion is set, XML::NamespaceSupport is required + + if($self->{opt}->{nsexpand}) { + require XML::NamespaceSupport; + $self->{nsup} = XML::NamespaceSupport->new(); + $self->{ns_prefix} = 'aaa'; + } + + + # Wrap top level arrayref in a hash + + if(UNIVERSAL::isa($ref, 'ARRAY')) { + $ref = { anon => $ref }; + } + + + # Extract rootname from top level hash if keeproot enabled + + if($self->{opt}->{keeproot}) { + my(@keys) = keys(%$ref); + if(@keys == 1) { + $ref = $ref->{$keys[0]}; + $self->{opt}->{rootname} = $keys[0]; + } + } + + # Ensure there are no top level attributes if we're not adding root elements + + elsif($self->{opt}->{rootname} eq '') { + if(UNIVERSAL::isa($ref, 'HASH')) { + my $refsave = $ref; + $ref = {}; + foreach (keys(%$refsave)) { + if(ref($refsave->{$_})) { + $ref->{$_} = $refsave->{$_}; + } + else { + $ref->{$_} = [ $refsave->{$_} ]; + } + } + } + } + + + # Encode the hashref and write to file if necessary + + $self->{_ancestors} = []; + my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, ''); + delete $self->{_ancestors}; + + if($self->{opt}->{xmldecl}) { + $xml = $self->{opt}->{xmldecl} . "\n" . $xml; + } + + if($self->{opt}->{outputfile}) { + if(ref($self->{opt}->{outputfile})) { + return($self->{opt}->{outputfile}->print($xml)); + } + else { + local(*OUT); + open(OUT, '>', "$self->{opt}->{outputfile}") || + croak "open($self->{opt}->{outputfile}): $!"; + binmode(OUT, ':utf8') if($] >= 5.008); + print OUT $xml || croak "print: $!"; + close(OUT); + } + } + elsif($self->{opt}->{handler}) { + require XML::SAX; + my $sp = XML::SAX::ParserFactory->parser( + Handler => $self->{opt}->{handler} + ); + return($sp->parse_string($xml)); + } + else { + return($xml); + } +} + + +############################################################################## +# Method: handle_options() +# +# Helper routine for both XMLin() and XMLout(). Both routines handle their +# first argument and assume all other args are options handled by this routine. +# Saves a hash of options in $self->{opt}. +# +# If default options were passed to the constructor, they will be retrieved +# here and merged with options supplied to the method call. +# +# First argument should be the string 'in' or the string 'out'. +# +# Remaining arguments should be name=>value pairs. Sets up default values +# for options not supplied. Unrecognised options are a fatal error. +# + +sub handle_options { + my $self = shift; + my $dirn = shift; + + + # Determine valid options based on context + + my %known_opt; + if($dirn eq 'in') { + @known_opt{@KnownOptIn} = @KnownOptIn; + } + else { + @known_opt{@KnownOptOut} = @KnownOptOut; + } + + + # Store supplied options in hashref and weed out invalid ones + + if(@_ % 2) { + croak "Options must be name=>value pairs (odd number supplied)"; + } + my %raw_opt = @_; + my $opt = {}; + $self->{opt} = $opt; + + while(my($key, $val) = each %raw_opt) { + my $lkey = lc($key); + $lkey =~ s/_//g; + croak "Unrecognised option: $key" unless($known_opt{$lkey}); + $opt->{$lkey} = $val; + } + + + # Merge in options passed to constructor + + foreach (keys(%known_opt)) { + unless(exists($opt->{$_})) { + if(exists($self->{def_opt}->{$_})) { + $opt->{$_} = $self->{def_opt}->{$_}; + } + } + } + + + # Set sensible defaults if not supplied + + if(exists($opt->{rootname})) { + unless(defined($opt->{rootname})) { + $opt->{rootname} = ''; + } + } + else { + $opt->{rootname} = $DefRootName; + } + + if($opt->{xmldecl} and $opt->{xmldecl} eq '1') { + $opt->{xmldecl} = $DefXmlDecl; + } + + if(exists($opt->{contentkey})) { + if($opt->{contentkey} =~ m{^-(.*)$}) { + $opt->{contentkey} = $1; + $opt->{collapseagain} = 1; + } + } + else { + $opt->{contentkey} = $DefContentKey; + } + + unless(exists($opt->{normalisespace})) { + $opt->{normalisespace} = $opt->{normalizespace}; + } + $opt->{normalisespace} = 0 unless(defined($opt->{normalisespace})); + + # Cleanups for values assumed to be arrays later + + if($opt->{searchpath}) { + unless(ref($opt->{searchpath})) { + $opt->{searchpath} = [ $opt->{searchpath} ]; + } + } + else { + $opt->{searchpath} = [ ]; + } + + if($opt->{cache} and !ref($opt->{cache})) { + $opt->{cache} = [ $opt->{cache} ]; + } + if($opt->{cache}) { + $_ = lc($_) foreach (@{$opt->{cache}}); + } + + if(exists($opt->{parseropts})) { + if($^W) { + carp "Warning: " . + "'ParserOpts' is deprecated, contact the author if you need it"; + } + } + else { + $opt->{parseropts} = [ ]; + } + + + # Special cleanup for {forcearray} which could be regex, arrayref or boolean + # or left to default to 0 + + if(exists($opt->{forcearray})) { + if(ref($opt->{forcearray}) eq 'Regexp') { + $opt->{forcearray} = [ $opt->{forcearray} ]; + } + + if(ref($opt->{forcearray}) eq 'ARRAY') { + my @force_list = @{$opt->{forcearray}}; + if(@force_list) { + $opt->{forcearray} = {}; + foreach my $tag (@force_list) { + if(ref($tag) eq 'Regexp') { + push @{$opt->{forcearray}->{_regex}}, $tag; + } + else { + $opt->{forcearray}->{$tag} = 1; + } + } + } + else { + $opt->{forcearray} = 0; + } + } + else { + $opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 ); + } + } + else { + if($StrictMode and $dirn eq 'in') { + croak "No value specified for 'ForceArray' option in call to XML$dirn()"; + } + $opt->{forcearray} = 0; + } + + + # Special cleanup for {keyattr} which could be arrayref or hashref or left + # to default to arrayref + + if(exists($opt->{keyattr})) { + if(ref($opt->{keyattr})) { + if(ref($opt->{keyattr}) eq 'HASH') { + + # Make a copy so we can mess with it + + $opt->{keyattr} = { %{$opt->{keyattr}} }; + + + # Convert keyattr => { elem => '+attr' } + # to keyattr => { elem => [ 'attr', '+' ] } + + foreach my $el (keys(%{$opt->{keyattr}})) { + if($opt->{keyattr}->{$el} =~ /^(\+|-)?(.*)$/) { + $opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ]; + if($StrictMode and $dirn eq 'in') { + next if($opt->{forcearray} == 1); + next if(ref($opt->{forcearray}) eq 'HASH' + and $opt->{forcearray}->{$el}); + croak "<$el> set in KeyAttr but not in ForceArray"; + } + } + else { + delete($opt->{keyattr}->{$el}); # Never reached (famous last words?) + } + } + } + else { + if(@{$opt->{keyattr}} == 0) { + delete($opt->{keyattr}); + } + } + } + else { + $opt->{keyattr} = [ $opt->{keyattr} ]; + } + } + else { + if($StrictMode) { + croak "No value specified for 'KeyAttr' option in call to XML$dirn()"; + } + $opt->{keyattr} = [ @DefKeyAttr ]; + } + + + # Special cleanup for {valueattr} which could be arrayref or hashref + + if(exists($opt->{valueattr})) { + if(ref($opt->{valueattr}) eq 'ARRAY') { + $opt->{valueattrlist} = {}; + $opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} }); + } + } + + # make sure there's nothing weird in {grouptags} + + if($opt->{grouptags} and !UNIVERSAL::isa($opt->{grouptags}, 'HASH')) { + croak "Illegal value for 'GroupTags' option - expected a hashref"; + } + + + # Check the {variables} option is valid and initialise variables hash + + if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) { + croak "Illegal value for 'Variables' option - expected a hashref"; + } + + if($opt->{variables}) { + $self->{_var_values} = { %{$opt->{variables}} }; + } + elsif($opt->{varattr}) { + $self->{_var_values} = {}; + } + +} + + +############################################################################## +# Method: find_xml_file() +# +# Helper routine for XMLin(). +# Takes a filename, and a list of directories, attempts to locate the file in +# the directories listed. +# Returns a full pathname on success; croaks on failure. +# + +sub find_xml_file { + my $self = shift; + my $file = shift; + my @search_path = @_; + + + my($filename, $filedir) = + File::Basename::fileparse($file); + + if($filename ne $file) { # Ignore searchpath if dir component + return($file) if(-e $file); + } + else { + my($path); + foreach $path (@search_path) { + my $fullpath = File::Spec->catfile($path, $file); + return($fullpath) if(-e $fullpath); + } + } + + # If user did not supply a search path, default to current directory + + if(!@search_path) { + return($file) if(-e $file); + croak "File does not exist: $file"; + } + + croak "Could not find $file in ", join(':', @search_path); +} + + +############################################################################## +# Method: collapse() +# +# Helper routine for XMLin(). This routine really comprises the 'smarts' (or +# value add) of this module. +# +# Takes the parse tree that XML::Parser produced from the supplied XML and +# recurses through it 'collapsing' unnecessary levels of indirection (nested +# arrays etc) to produce a data structure that is easier to work with. +# +# Elements in the original parser tree are represented as an element name +# followed by an arrayref. The first element of the array is a hashref +# containing the attributes. The rest of the array contains a list of any +# nested elements as name+arrayref pairs: +# +# <element name>, [ { <attribute hashref> }, <element name>, [ ... ], ... ] +# +# The special element name '0' (zero) flags text content. +# +# This routine cuts down the noise by discarding any text content consisting of +# only whitespace and then moves the nested elements into the attribute hash +# using the name of the nested element as the hash key and the collapsed +# version of the nested element as the value. Multiple nested elements with +# the same name will initially be represented as an arrayref, but this may be +# 'folded' into a hashref depending on the value of the keyattr option. +# + +sub collapse { + my $self = shift; + + + # Start with the hash of attributes + + my $attr = shift; + if($self->{opt}->{noattr}) { # Discard if 'noattr' set + $attr = {}; + } + elsif($self->{opt}->{normalisespace} == 2) { + while(my($key, $value) = each %$attr) { + $attr->{$key} = $self->normalise_space($value) + } + } + + + # Do variable substitutions + + if(my $var = $self->{_var_values}) { + while(my($key, $val) = each(%$attr)) { + $val =~ s{\$\{(\w+)\}}{ $self->get_var($1) }ge; + $attr->{$key} = $val; + } + } + + + # Roll up 'value' attributes (but only if no nested elements) + + if(!@_ and keys %$attr == 1) { + my($k) = keys %$attr; + if($self->{opt}->{valueattrlist} and $self->{opt}->{valueattrlist}->{$k}) { + return $attr->{$k}; + } + } + + + # Add any nested elements + + my($key, $val); + while(@_) { + $key = shift; + $val = shift; + + if(ref($val)) { + $val = $self->collapse(@$val); + next if(!defined($val) and $self->{opt}->{suppressempty}); + } + elsif($key eq '0') { + next if($val =~ m{^\s*$}s); # Skip all whitespace content + + $val = $self->normalise_space($val) + if($self->{opt}->{normalisespace} == 2); + + # do variable substitutions + + if(my $var = $self->{_var_values}) { + $val =~ s{\$\{(\w+)\}}{ $self->get_var($1) }ge; + } + + + # look for variable definitions + + if(my $var = $self->{opt}->{varattr}) { + if(exists $attr->{$var}) { + $self->set_var($attr->{$var}, $val); + } + } + + + # Collapse text content in element with no attributes to a string + + if(!%$attr and !@_) { + return($self->{opt}->{forcecontent} ? + { $self->{opt}->{contentkey} => $val } : $val + ); + } + $key = $self->{opt}->{contentkey}; + } + + + # Combine duplicate attributes into arrayref if required + + if(exists($attr->{$key})) { + if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) { + push(@{$attr->{$key}}, $val); + } + else { + $attr->{$key} = [ $attr->{$key}, $val ]; + } + } + elsif(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { + $attr->{$key} = [ $val ]; + } + else { + if( $key ne $self->{opt}->{contentkey} + and ( + ($self->{opt}->{forcearray} == 1) + or ( + (ref($self->{opt}->{forcearray}) eq 'HASH') + and ( + $self->{opt}->{forcearray}->{$key} + or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}}) + ) + ) + ) + ) { + $attr->{$key} = [ $val ]; + } + else { + $attr->{$key} = $val; + } + } + + } + + + # Turn arrayrefs into hashrefs if key fields present + + if($self->{opt}->{keyattr}) { + while(($key,$val) = each %$attr) { + if(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { + $attr->{$key} = $self->array_to_hash($key, $val); + } + } + } + + + # disintermediate grouped tags + + if($self->{opt}->{grouptags}) { + while(my($key, $val) = each(%$attr)) { + next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); + next unless(exists($self->{opt}->{grouptags}->{$key})); + + my($child_key, $child_val) = %$val; + + if($self->{opt}->{grouptags}->{$key} eq $child_key) { + $attr->{$key}= $child_val; + } + } + } + + + # Fold hashes containing a single anonymous array up into just the array + + my $count = scalar keys %$attr; + if($count == 1 + and exists $attr->{anon} + and UNIVERSAL::isa($attr->{anon}, 'ARRAY') + ) { + return($attr->{anon}); + } + + + # Do the right thing if hash is empty, otherwise just return it + + if(!%$attr and exists($self->{opt}->{suppressempty})) { + if(defined($self->{opt}->{suppressempty}) and + $self->{opt}->{suppressempty} eq '') { + return(''); + } + return(undef); + } + + + # Roll up named elements with named nested 'value' attributes + + if($self->{opt}->{valueattr}) { + while(my($key, $val) = each(%$attr)) { + next unless($self->{opt}->{valueattr}->{$key}); + next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); + my($k) = keys %$val; + next unless($k eq $self->{opt}->{valueattr}->{$key}); + $attr->{$key} = $val->{$k}; + } + } + + return($attr) + +} + + +############################################################################## +# Method: set_var() +# +# Called when a variable definition is encountered in the XML. (A variable +# definition looks like <element attrname="name">value</element> where attrname +# matches the varattr setting). +# + +sub set_var { + my($self, $name, $value) = @_; + + $self->{_var_values}->{$name} = $value; +} + + +############################################################################## +# Method: get_var() +# +# Called during variable substitution to get the value for the named variable. +# + +sub get_var { + my($self, $name) = @_; + + my $value = $self->{_var_values}->{$name}; + return $value if(defined($value)); + + return '${' . $name . '}'; +} + + +############################################################################## +# Method: normalise_space() +# +# Strips leading and trailing whitespace and collapses sequences of whitespace +# characters to a single space. +# + +sub normalise_space { + my($self, $text) = @_; + + $text =~ s/^\s+//s; + $text =~ s/\s+$//s; + $text =~ s/\s\s+/ /sg; + + return $text; +} + + +############################################################################## +# Method: array_to_hash() +# +# Helper routine for collapse(). +# Attempts to 'fold' an array of hashes into an hash of hashes. Returns a +# reference to the hash on success or the original array if folding is +# not possible. Behaviour is controlled by 'keyattr' option. +# + +sub array_to_hash { + my $self = shift; + my $name = shift; + my $arrayref = shift; + + my $hashref = {}; + + my($i, $key, $val, $flag); + + + # Handle keyattr => { .... } + + if(ref($self->{opt}->{keyattr}) eq 'HASH') { + return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name})); + ($key, $flag) = @{$self->{opt}->{keyattr}->{$name}}; + for($i = 0; $i < @$arrayref; $i++) { + if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and + exists($arrayref->[$i]->{$key}) + ) { + $val = $arrayref->[$i]->{$key}; + if(ref($val)) { + if($StrictMode) { + croak "<$name> element has non-scalar '$key' key attribute"; + } + if($^W) { + carp "Warning: <$name> element has non-scalar '$key' key attribute"; + } + return($arrayref); + } + $val = $self->normalise_space($val) + if($self->{opt}->{normalisespace} == 1); + $hashref->{$val} = { %{$arrayref->[$i]} }; + $hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-'); + delete $hashref->{$val}->{$key} unless($flag eq '+'); + } + else { + croak "<$name> element has no '$key' key attribute" if($StrictMode); + carp "Warning: <$name> element has no '$key' key attribute" if($^W); + return($arrayref); + } + } + } + + + # Or assume keyattr => [ .... ] + + else { + ELEMENT: for($i = 0; $i < @$arrayref; $i++) { + return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH')); + + foreach $key (@{$self->{opt}->{keyattr}}) { + if(defined($arrayref->[$i]->{$key})) { + $val = $arrayref->[$i]->{$key}; + return($arrayref) if(ref($val)); + $val = $self->normalise_space($val) + if($self->{opt}->{normalisespace} == 1); + $hashref->{$val} = { %{$arrayref->[$i]} }; + delete $hashref->{$val}->{$key}; + next ELEMENT; + } + } + + return($arrayref); # No keyfield matched + } + } + + # collapse any hashes which now only have a 'content' key + + if($self->{opt}->{collapseagain}) { + $hashref = $self->collapse_content($hashref); + } + + return($hashref); +} + + +############################################################################## +# Method: collapse_content() +# +# Helper routine for array_to_hash +# +# Arguments expected are: +# - an XML::Simple object +# - a hasref +# the hashref is a former array, turned into a hash by array_to_hash because +# of the presence of key attributes +# at this point collapse_content avoids over-complicated structures like +# dir => { libexecdir => { content => '$exec_prefix/libexec' }, +# localstatedir => { content => '$prefix' }, +# } +# into +# dir => { libexecdir => '$exec_prefix/libexec', +# localstatedir => '$prefix', +# } + +sub collapse_content { + my $self = shift; + my $hashref = shift; + + my $contentkey = $self->{opt}->{contentkey}; + + # first go through the values,checking that they are fit to collapse + foreach my $val (values %$hashref) { + return $hashref unless ( (ref($val) eq 'HASH') + and (keys %$val == 1) + and (exists $val->{$contentkey}) + ); + } + + # now collapse them + foreach my $key (keys %$hashref) { + $hashref->{$key}= $hashref->{$key}->{$contentkey}; + } + + return $hashref; +} + + +############################################################################## +# Method: value_to_xml() +# +# Helper routine for XMLout() - recurses through a data structure building up +# and returning an XML representation of that structure as a string. +# +# Arguments expected are: +# - the data structure to be encoded (usually a reference) +# - the XML tag name to use for this item +# - a string of spaces for use as the current indent level +# + +sub value_to_xml { + my $self = shift;; + + + # Grab the other arguments + + my($ref, $name, $indent) = @_; + + my $named = (defined($name) and $name ne '' ? 1 : 0); + + my $nl = "\n"; + + my $is_root = $indent eq '' ? 1 : 0; # Warning, dirty hack! + if($self->{opt}->{noindent}) { + $indent = ''; + $nl = ''; + } + + + # Convert to XML + + if(ref($ref)) { + croak "circular data structures not supported" + if(grep($_ == $ref, @{$self->{_ancestors}})); + push @{$self->{_ancestors}}, $ref; + } + else { + if($named) { + return(join('', + $indent, '<', $name, '>', + ($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)), + '</', $name, ">", $nl + )); + } + else { + return("$ref$nl"); + } + } + + + # Unfold hash to array if possible + + if(UNIVERSAL::isa($ref, 'HASH') # It is a hash + and keys %$ref # and it's not empty + and $self->{opt}->{keyattr} # and folding is enabled + and !$is_root # and its not the root element + ) { + $ref = $self->hash_to_array($name, $ref); + } + + + my @result = (); + my($key, $value); + + + # Handle hashrefs + + if(UNIVERSAL::isa($ref, 'HASH')) { + + # Reintermediate grouped values if applicable + + if($self->{opt}->{grouptags}) { + $ref = $self->copy_hash($ref); + while(my($key, $val) = each %$ref) { + if($self->{opt}->{grouptags}->{$key}) { + $ref->{$key} = { $self->{opt}->{grouptags}->{$key} => $val }; + } + } + } + + + # Scan for namespace declaration attributes + + my $nsdecls = ''; + my $default_ns_uri; + if($self->{nsup}) { + $ref = $self->copy_hash($ref); + $self->{nsup}->push_context(); + + # Look for default namespace declaration first + + if(exists($ref->{xmlns})) { + $self->{nsup}->declare_prefix('', $ref->{xmlns}); + $nsdecls .= qq( xmlns="$ref->{xmlns}"); + delete($ref->{xmlns}); + } + $default_ns_uri = $self->{nsup}->get_uri(''); + + + # Then check all the other keys + + foreach my $qname (keys(%$ref)) { + my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); + if($uri) { + if($uri eq $xmlns_ns) { + $self->{nsup}->declare_prefix($lname, $ref->{$qname}); + $nsdecls .= qq( xmlns:$lname="$ref->{$qname}"); + delete($ref->{$qname}); + } + } + } + + # Translate any remaining Clarkian names + + foreach my $qname (keys(%$ref)) { + my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); + if($uri) { + if($default_ns_uri and $uri eq $default_ns_uri) { + $ref->{$lname} = $ref->{$qname}; + delete($ref->{$qname}); + } + else { + my $prefix = $self->{nsup}->get_prefix($uri); + unless($prefix) { + # $self->{nsup}->declare_prefix(undef, $uri); + # $prefix = $self->{nsup}->get_prefix($uri); + $prefix = $self->{ns_prefix}++; + $self->{nsup}->declare_prefix($prefix, $uri); + $nsdecls .= qq( xmlns:$prefix="$uri"); + } + $ref->{"$prefix:$lname"} = $ref->{$qname}; + delete($ref->{$qname}); + } + } + } + } + + + my @nested = (); + my $text_content = undef; + if($named) { + push @result, $indent, '<', $name, $nsdecls; + } + + if(keys %$ref) { + my $first_arg = 1; + foreach my $key ($self->sorted_keys($name, $ref)) { + my $value = $ref->{$key}; + next if(substr($key, 0, 1) eq '-'); + if(!defined($value)) { + next if $self->{opt}->{suppressempty}; + unless(exists($self->{opt}->{suppressempty}) + and !defined($self->{opt}->{suppressempty}) + ) { + carp 'Use of uninitialized value' if($^W); + } + if($key eq $self->{opt}->{contentkey}) { + $text_content = ''; + } + else { + $value = exists($self->{opt}->{suppressempty}) ? {} : ''; + } + } + + if(!ref($value) + and $self->{opt}->{valueattr} + and $self->{opt}->{valueattr}->{$key} + ) { + $value = { $self->{opt}->{valueattr}->{$key} => $value }; + } + + if(ref($value) or $self->{opt}->{noattr}) { + push @nested, + $self->value_to_xml($value, $key, "$indent "); + } + else { + $value = $self->escape_value($value) unless($self->{opt}->{noescape}); + if($key eq $self->{opt}->{contentkey}) { + $text_content = $value; + } + else { + push @result, "\n$indent " . ' ' x length($name) + if($self->{opt}->{attrindent} and !$first_arg); + push @result, ' ', $key, '="', $value , '"'; + $first_arg = 0; + } + } + } + } + else { + $text_content = ''; + } + + if(@nested or defined($text_content)) { + if($named) { + push @result, ">"; + if(defined($text_content)) { + push @result, $text_content; + $nested[0] =~ s/^\s+// if(@nested); + } + else { + push @result, $nl; + } + if(@nested) { + push @result, @nested, $indent; + } + push @result, '</', $name, ">", $nl; + } + else { + push @result, @nested; # Special case if no root elements + } + } + else { + push @result, " />", $nl; + } + $self->{nsup}->pop_context() if($self->{nsup}); + } + + + # Handle arrayrefs + + elsif(UNIVERSAL::isa($ref, 'ARRAY')) { + foreach $value (@$ref) { + if(!ref($value)) { + push @result, + $indent, '<', $name, '>', + ($self->{opt}->{noescape} ? $value : $self->escape_value($value)), + '</', $name, ">$nl"; + } + elsif(UNIVERSAL::isa($value, 'HASH')) { + push @result, $self->value_to_xml($value, $name, $indent); + } + else { + push @result, + $indent, '<', $name, ">$nl", + $self->value_to_xml($value, 'anon', "$indent "), + $indent, '</', $name, ">$nl"; + } + } + } + + else { + croak "Can't encode a value of type: " . ref($ref); + } + + + pop @{$self->{_ancestors}} if(ref($ref)); + + return(join('', @result)); +} + + +############################################################################## +# Method: sorted_keys() +# +# Returns the keys of the referenced hash sorted into alphabetical order, but +# with the 'key' key (as in KeyAttr) first, if there is one. +# + +sub sorted_keys { + my($self, $name, $ref) = @_; + + return keys %$ref if $self->{opt}->{nosort}; + + my %hash = %$ref; + my $keyattr = $self->{opt}->{keyattr}; + + my @key; + + if(ref $keyattr eq 'HASH') { + if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) { + push @key, $keyattr->{$name}->[0]; + delete $hash{$keyattr->{$name}->[0]}; + } + } + elsif(ref $keyattr eq 'ARRAY') { + foreach (@{$keyattr}) { + if(exists $hash{$_}) { + push @key, $_; + delete $hash{$_}; + last; + } + } + } + + return(@key, sort keys %hash); +} + +############################################################################## +# Method: escape_value() +# +# Helper routine for automatically escaping values for XMLout(). +# Expects a scalar data value. Returns escaped version. +# + +sub escape_value { + my($self, $data) = @_; + + return '' unless(defined($data)); + + $data =~ s/&/&/sg; + $data =~ s/</</sg; + $data =~ s/>/>/sg; + $data =~ s/"/"/sg; + + my $level = $self->{opt}->{numericescape} or return $data; + + return $self->numeric_escape($data, $level); +} + +sub numeric_escape { + my($self, $data, $level) = @_; + + use utf8; # required for 5.6 + + if($self->{opt}->{numericescape} eq '2') { + $data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse; + } + else { + $data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse; + } + + return $data; +} + + +############################################################################## +# Method: hash_to_array() +# +# Helper routine for value_to_xml(). +# Attempts to 'unfold' a hash of hashes into an array of hashes. Returns a +# reference to the array on success or the original hash if unfolding is +# not possible. +# + +sub hash_to_array { + my $self = shift; + my $parent = shift; + my $hashref = shift; + + my $arrayref = []; + + my($key, $value); + + my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref; + foreach $key (@keys) { + $value = $hashref->{$key}; + return($hashref) unless(UNIVERSAL::isa($value, 'HASH')); + + if(ref($self->{opt}->{keyattr}) eq 'HASH') { + return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent})); + push @$arrayref, $self->copy_hash( + $value, $self->{opt}->{keyattr}->{$parent}->[0] => $key + ); + } + else { + push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value }); + } + } + + return($arrayref); +} + + +############################################################################## +# Method: copy_hash() +# +# Helper routine for hash_to_array(). When unfolding a hash of hashes into +# an array of hashes, we need to copy the key from the outer hash into the +# inner hash. This routine makes a copy of the original hash so we don't +# destroy the original data structure. You might wish to override this +# method if you're using tied hashes and don't want them to get untied. +# + +sub copy_hash { + my($self, $orig, @extra) = @_; + + return { @extra, %$orig }; +} + +############################################################################## +# Methods required for building trees from SAX events +############################################################################## + +sub start_document { + my $self = shift; + + $self->handle_options('in') unless($self->{opt}); + + $self->{lists} = []; + $self->{curlist} = $self->{tree} = []; +} + + +sub start_element { + my $self = shift; + my $element = shift; + + my $name = $element->{Name}; + if($self->{opt}->{nsexpand}) { + $name = $element->{LocalName} || ''; + if($element->{NamespaceURI}) { + $name = '{' . $element->{NamespaceURI} . '}' . $name; + } + } + my $attributes = {}; + if($element->{Attributes}) { # Might be undef + foreach my $attr (values %{$element->{Attributes}}) { + if($self->{opt}->{nsexpand}) { + my $name = $attr->{LocalName} || ''; + if($attr->{NamespaceURI}) { + $name = '{' . $attr->{NamespaceURI} . '}' . $name + } + $name = 'xmlns' if($name eq $bad_def_ns_jcn); + $attributes->{$name} = $attr->{Value}; + } + else { + $attributes->{$attr->{Name}} = $attr->{Value}; + } + } + } + my $newlist = [ $attributes ]; + push @{ $self->{lists} }, $self->{curlist}; + push @{ $self->{curlist} }, $name => $newlist; + $self->{curlist} = $newlist; +} + + +sub characters { + my $self = shift; + my $chars = shift; + + my $text = $chars->{Data}; + my $clist = $self->{curlist}; + my $pos = $#$clist; + + if ($pos > 0 and $clist->[$pos - 1] eq '0') { + $clist->[$pos] .= $text; + } + else { + push @$clist, 0 => $text; + } +} + + +sub end_element { + my $self = shift; + + $self->{curlist} = pop @{ $self->{lists} }; +} + + +sub end_document { + my $self = shift; + + delete($self->{curlist}); + delete($self->{lists}); + + my $tree = $self->{tree}; + delete($self->{tree}); + + + # Return tree as-is to XMLin() + + return($tree) if($self->{nocollapse}); + + + # Or collapse it before returning it to SAX parser class + + if($self->{opt}->{keeproot}) { + $tree = $self->collapse({}, @$tree); + } + else { + $tree = $self->collapse(@{$tree->[1]}); + } + + if($self->{opt}->{datahandler}) { + return($self->{opt}->{datahandler}->($self, $tree)); + } + + return($tree); +} + +*xml_in = \&XMLin; +*xml_out = \&XMLout; + +1; + +__END__ + +=head1 QUICK START + +Say you have a script called B<foo> and a file of configuration options +called B<foo.xml> containing this: + + <config logdir="/var/log/foo/" debugfile="/tmp/foo.debug"> + <server name="sahara" osname="solaris" osversion="2.6"> + <address>10.0.0.101</address> + <address>10.0.1.101</address> + </server> + <server name="gobi" osname="irix" osversion="6.5"> + <address>10.0.0.102</address> + </server> + <server name="kalahari" osname="linux" osversion="2.0.34"> + <address>10.0.0.103</address> + <address>10.0.1.103</address> + </server> + </config> + +The following lines of code in B<foo>: + + use XML::Simple; + + my $config = XMLin(); + +will 'slurp' the configuration options into the hashref $config (because no +arguments are passed to C<XMLin()> the name and location of the XML file will +be inferred from name and location of the script). You can dump out the +contents of the hashref using Data::Dumper: + + use Data::Dumper; + + print Dumper($config); + +which will produce something like this (formatting has been adjusted for +brevity): + + { + 'logdir' => '/var/log/foo/', + 'debugfile' => '/tmp/foo.debug', + 'server' => { + 'sahara' => { + 'osversion' => '2.6', + 'osname' => 'solaris', + 'address' => [ '10.0.0.101', '10.0.1.101' ] + }, + 'gobi' => { + 'osversion' => '6.5', + 'osname' => 'irix', + 'address' => '10.0.0.102' + }, + 'kalahari' => { + 'osversion' => '2.0.34', + 'osname' => 'linux', + 'address' => [ '10.0.0.103', '10.0.1.103' ] + } + } + } + +Your script could then access the name of the log directory like this: + + print $config->{logdir}; + +similarly, the second address on the server 'kalahari' could be referenced as: + + print $config->{server}->{kalahari}->{address}->[1]; + +What could be simpler? (Rhetorical). + +For simple requirements, that's really all there is to it. If you want to +store your XML in a different directory or file, or pass it in as a string or +even pass it in via some derivative of an IO::Handle, you'll need to check out +L<"OPTIONS">. If you want to turn off or tweak the array folding feature (that +neat little transformation that produced $config->{server}) you'll find options +for that as well. + +If you want to generate XML (for example to write a modified version of +$config back out as XML), check out C<XMLout()>. + +If your needs are not so simple, this may not be the module for you. In that +case, you might want to read L<"WHERE TO FROM HERE?">. + +=head1 DESCRIPTION + +The XML::Simple module provides a simple API layer on top of an underlying XML +parsing module (either XML::Parser or one of the SAX2 parser modules). Two +functions are exported: C<XMLin()> and C<XMLout()>. Note: you can explicity +request the lower case versions of the function names: C<xml_in()> and +C<xml_out()>. + +The simplest approach is to call these two functions directly, but an +optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below) +allows them to be called as methods of an B<XML::Simple> object. The object +interface can also be used at either end of a SAX pipeline. + +=head2 XMLin() + +Parses XML formatted data and returns a reference to a data structure which +contains the same information in a more readily accessible form. (Skip +down to L<"EXAMPLES"> below, for more sample code). + +C<XMLin()> accepts an optional XML specifier followed by zero or more 'name => +value' option pairs. The XML specifier can be one of the following: + +=over 4 + +=item A filename + +If the filename contains no directory components C<XMLin()> will look for the +file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the +current directory if the SearchPath option is not defined. eg: + + $ref = XMLin('/etc/params.xml'); + +Note, the filename '-' can be used to parse from STDIN. + +=item undef + +If there is no XML specifier, C<XMLin()> will check the script directory and +each of the SearchPath directories for a file with the same name as the script +but with the extension '.xml'. Note: if you wish to specify options, you +must specify the value 'undef'. eg: + + $ref = XMLin(undef, ForceArray => 1); + +=item A string of XML + +A string containing XML (recognised by the presence of '<' and '>' characters) +will be parsed directly. eg: + + $ref = XMLin('<opt username="bob" password="flurp" />'); + +=item An IO::Handle object + +An IO::Handle object will be read to EOF and its contents parsed. eg: + + $fh = new IO::File('/etc/params.xml'); + $ref = XMLin($fh); + +=back + +=head2 XMLout() + +Takes a data structure (generally a hashref) and returns an XML encoding of +that structure. If the resulting XML is parsed using C<XMLin()>, it should +return a data structure equivalent to the original (see caveats below). + +The C<XMLout()> function can also be used to output the XML as SAX events +see the C<Handler> option and L<"SAX SUPPORT"> for more details). + +When translating hashes to XML, hash keys which have a leading '-' will be +silently skipped. This is the approved method for marking elements of a +data structure which should be ignored by C<XMLout>. (Note: If these items +were not skipped the key names would be emitted as element or attribute names +with a leading '-' which would not be valid XML). + +=head2 Caveats + +Some care is required in creating data structures which will be passed to +C<XMLout()>. Hash keys from the data structure will be encoded as either XML +element names or attribute names. Therefore, you should use hash key names +which conform to the relatively strict XML naming rules: + +Names in XML must begin with a letter. The remaining characters may be +letters, digits, hyphens (-), underscores (_) or full stops (.). It is also +allowable to include one colon (:) in an element name but this should only be +used when working with namespaces (B<XML::Simple> can only usefully work with +namespaces when teamed with a SAX Parser). + +You can use other punctuation characters in hash values (just not in hash +keys) however B<XML::Simple> does not support dumping binary data. + +If you break these rules, the current implementation of C<XMLout()> will +simply emit non-compliant XML which will be rejected if you try to read it +back in. (A later version of B<XML::Simple> might take a more proactive +approach). + +Note also that although you can nest hashes and arrays to arbitrary levels, +circular data structures are not supported and will cause C<XMLout()> to die. + +If you wish to 'round-trip' arbitrary data structures from Perl to XML and back +to Perl, then you should probably disable array folding (using the KeyAttr +option) both with C<XMLout()> and with C<XMLin()>. If you still don't get the +expected results, you may prefer to use L<XML::Dumper> which is designed for +exactly that purpose. + +Refer to L<"WHERE TO FROM HERE?"> if C<XMLout()> is too simple for your needs. + + +=head1 OPTIONS + +B<XML::Simple> supports a number of options (in fact as each release of +B<XML::Simple> adds more options, the module's claim to the name 'Simple' +becomes increasingly tenuous). If you find yourself repeatedly having to +specify the same options, you might like to investigate L<"OPTIONAL OO +INTERFACE"> below. + +If you can't be bothered reading the documentation, refer to +L<"STRICT MODE"> to automatically catch common mistakes. + +Because there are so many options, it's hard for new users to know which ones +are important, so here are the two you really need to know about: + +=over 4 + +=item * + +check out C<ForceArray> because you'll almost certainly want to turn it on + +=item * + +make sure you know what the C<KeyAttr> option does and what its default value is +because it may surprise you otherwise (note in particular that 'KeyAttr' +affects both C<XMLin> and C<XMLout>) + +=back + +The option name headings below have a trailing 'comment' - a hash followed by +two pieces of metadata: + +=over 4 + +=item * + +Options are marked with 'I<in>' if they are recognised by C<XMLin()> and +'I<out>' if they are recognised by C<XMLout()>. + +=item * + +Each option is also flagged to indicate whether it is: + + 'important' - don't use the module until you understand this one + 'handy' - you can skip this on the first time through + 'advanced' - you can skip this on the second time through + 'SAX only' - don't worry about this unless you're using SAX (or + alternatively if you need this, you also need SAX) + 'seldom used' - you'll probably never use this unless you were the + person that requested the feature + +=back + +The options are listed alphabetically: + +Note: option names are no longer case sensitive so you can use the mixed case +versions shown here; all lower case as required by versions 2.03 and earlier; +or you can add underscores between the words (eg: key_attr). + + +=head2 AttrIndent => 1 I<# out - handy> + +When you are using C<XMLout()>, enable this option to have attributes printed +one-per-line with sensible indentation rather than all on one line. + +=head2 Cache => [ cache schemes ] I<# in - advanced> + +Because loading the B<XML::Parser> module and parsing an XML file can consume a +significant number of CPU cycles, it is often desirable to cache the output of +C<XMLin()> for later reuse. + +When parsing from a named file, B<XML::Simple> supports a number of caching +schemes. The 'Cache' option may be used to specify one or more schemes (using +an anonymous array). Each scheme will be tried in turn in the hope of finding +a cached pre-parsed representation of the XML file. If no cached copy is +found, the file will be parsed and the first cache scheme in the list will be +used to save a copy of the results. The following cache schemes have been +implemented: + +=over 4 + +=item storable + +Utilises B<Storable.pm> to read/write a cache file with the same name as the +XML file but with the extension .stor + +=item memshare + +When a file is first parsed, a copy of the resulting data structure is retained +in memory in the B<XML::Simple> module's namespace. Subsequent calls to parse +the same file will return a reference to this structure. This cached version +will persist only for the life of the Perl interpreter (which in the case of +mod_perl for example, may be some significant time). + +Because each caller receives a reference to the same data structure, a change +made by one caller will be visible to all. For this reason, the reference +returned should be treated as read-only. + +=item memcopy + +This scheme works identically to 'memshare' (above) except that each caller +receives a reference to a new data structure which is a copy of the cached +version. Copying the data structure will add a little processing overhead, +therefore this scheme should only be used where the caller intends to modify +the data structure (or wishes to protect itself from others who might). This +scheme uses B<Storable.pm> to perform the copy. + +=back + +Warning! The memory-based caching schemes compare the timestamp on the file to +the time when it was last parsed. If the file is stored on an NFS filesystem +(or other network share) and the clock on the file server is not exactly +synchronised with the clock where your script is run, updates to the source XML +file may appear to be ignored. + +=head2 ContentKey => 'keyname' I<# in+out - seldom used> + +When text content is parsed to a hash value, this option let's you specify a +name for the hash key to override the default 'content'. So for example: + + XMLin('<opt one="1">Text</opt>', ContentKey => 'text') + +will parse to: + + { 'one' => 1, 'text' => 'Text' } + +instead of: + + { 'one' => 1, 'content' => 'Text' } + +C<XMLout()> will also honour the value of this option when converting a hashref +to XML. + +You can also prefix your selected key name with a '-' character to have +C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after +array folding. For example: + + XMLin( + '<opt><item name="one">First</item><item name="two">Second</item></opt>', + KeyAttr => {item => 'name'}, + ForceArray => [ 'item' ], + ContentKey => '-content' + ) + +will parse to: + + { + 'item' => { + 'one' => 'First' + 'two' => 'Second' + } + } + +rather than this (without the '-'): + + { + 'item' => { + 'one' => { 'content' => 'First' } + 'two' => { 'content' => 'Second' } + } + } + +=head2 DataHandler => code_ref I<# in - SAX only> + +When you use an B<XML::Simple> object as a SAX handler, it will return a +'simple tree' data structure in the same format as C<XMLin()> would return. If +this option is set (to a subroutine reference), then when the tree is built the +subroutine will be called and passed two arguments: a reference to the +B<XML::Simple> object and a reference to the data tree. The return value from +the subroutine will be returned to the SAX driver. (See L<"SAX SUPPORT"> for +more details). + +=head2 ForceArray => 1 I<# in - important> + +This option should be set to '1' to force nested elements to be represented +as arrays even when there is only one. Eg, with ForceArray enabled, this +XML: + + <opt> + <name>value</name> + </opt> + +would parse to this: + + { + 'name' => [ + 'value' + ] + } + +instead of this (the default): + + { + 'name' => 'value' + } + +This option is especially useful if the data structure is likely to be written +back out as XML and the default behaviour of rolling single nested elements up +into attributes is not desirable. + +If you are using the array folding feature, you should almost certainly enable +this option. If you do not, single nested elements will not be parsed to +arrays and therefore will not be candidates for folding to a hash. (Given that +the default value of 'KeyAttr' enables array folding, the default value of this +option should probably also have been enabled too - sorry). + +=head2 ForceArray => [ names ] I<# in - important> + +This alternative (and preferred) form of the 'ForceArray' option allows you to +specify a list of element names which should always be forced into an array +representation, rather than the 'all or nothing' approach above. + +It is also possible (since version 2.05) to include compiled regular +expressions in the list - any element names which match the pattern will be +forced to arrays. If the list contains only a single regex, then it is not +necessary to enclose it in an arrayref. Eg: + + ForceArray => qr/_list$/ + +=head2 ForceContent => 1 I<# in - seldom used> + +When C<XMLin()> parses elements which have text content as well as attributes, +the text content must be represented as a hash value rather than a simple +scalar. This option allows you to force text content to always parse to +a hash value even when there are no attributes. So for example: + + XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1) + +will parse to: + + { + 'x' => { 'content' => 'text1' }, + 'y' => { 'a' => 2, 'content' => 'text2' } + } + +instead of: + + { + 'x' => 'text1', + 'y' => { 'a' => 2, 'content' => 'text2' } + } + +=head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy> + +You can use this option to eliminate extra levels of indirection in your Perl +data structure. For example this XML: + + <opt> + <searchpath> + <dir>/usr/bin</dir> + <dir>/usr/local/bin</dir> + <dir>/usr/X11/bin</dir> + </searchpath> + </opt> + +Would normally be read into a structure like this: + + { + searchpath => { + dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] + } + } + +But when read in with the appropriate value for 'GroupTags': + + my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' }); + +It will return this simpler structure: + + { + searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] + } + +The grouping element (C<< <searchpath> >> in the example) must not contain any +attributes or elements other than the grouped element. + +You can specify multiple 'grouping element' to 'grouped element' mappings in +the same hashref. If this option is combined with C<KeyAttr>, the array +folding will occur first and then the grouped element names will be eliminated. + +C<XMLout> will also use the grouptag mappings to re-introduce the tags around +the grouped elements. Beware though that this will occur in all places that +the 'grouping tag' name occurs - you probably don't want to use the same name +for elements as well as attributes. + +=head2 Handler => object_ref I<# out - SAX only> + +Use the 'Handler' option to have C<XMLout()> generate SAX events rather than +returning a string of XML. For more details see L<"SAX SUPPORT"> below. + +Note: the current implementation of this option generates a string of XML +and uses a SAX parser to translate it into SAX events. The normal encoding +rules apply here - your data must be UTF8 encoded unless you specify an +alternative encoding via the 'XMLDecl' option; and by the time the data reaches +the handler object, it will be in UTF8 form regardless of the encoding you +supply. A future implementation of this option may generate the events +directly. + +=head2 KeepRoot => 1 I<# in+out - handy> + +In its attempt to return a data structure free of superfluous detail and +unnecessary levels of indirection, C<XMLin()> normally discards the root +element name. Setting the 'KeepRoot' option to '1' will cause the root element +name to be retained. So after executing this code: + + $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1) + +You'll be able to reference the tempdir as +C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default +C<$config-E<gt>{tempdir}>. + +Similarly, setting the 'KeepRoot' option to '1' will tell C<XMLout()> that the +data structure already contains a root element name and it is not necessary to +add another. + +=head2 KeyAttr => [ list ] I<# in+out - important> + +This option controls the 'array folding' feature which translates nested +elements from an array to a hash. It also controls the 'unfolding' of hashes +to arrays. + +For example, this XML: + + <opt> + <user login="grep" fullname="Gary R Epstein" /> + <user login="stty" fullname="Simon T Tyson" /> + </opt> + +would, by default, parse to this: + + { + 'user' => [ + { + 'login' => 'grep', + 'fullname' => 'Gary R Epstein' + }, + { + 'login' => 'stty', + 'fullname' => 'Simon T Tyson' + } + ] + } + +If the option 'KeyAttr => "login"' were used to specify that the 'login' +attribute is a key, the same XML would parse to: + + { + 'user' => { + 'stty' => { + 'fullname' => 'Simon T Tyson' + }, + 'grep' => { + 'fullname' => 'Gary R Epstein' + } + } + } + +The key attribute names should be supplied in an arrayref if there is more +than one. C<XMLin()> will attempt to match attribute names in the order +supplied. C<XMLout()> will use the first attribute name supplied when +'unfolding' a hash into an array. + +Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do +not want folding on input or unfolding on output you must setting this option +to an empty list to disable the feature. + +Note 2: If you wish to use this option, you should also enable the +C<ForceArray> option. Without 'ForceArray', a single nested element will be +rolled up into a scalar rather than an array and therefore will not be folded +(since only arrays get folded). + +=head2 KeyAttr => { list } I<# in+out - important> + +This alternative (and preferred) method of specifiying the key attributes +allows more fine grained control over which elements are folded and on which +attributes. For example the option 'KeyAttr => { package => 'id' } will cause +any package elements to be folded on the 'id' attribute. No other elements +which have an 'id' attribute will be folded at all. + +Note: C<XMLin()> will generate a warning (or a fatal error in L<"STRICT MODE">) +if this syntax is used and an element which does not have the specified key +attribute is encountered (eg: a 'package' element without an 'id' attribute, to +use the example above). Warnings will only be generated if B<-w> is in force. + +Two further variations are made possible by prefixing a '+' or a '-' character +to the attribute name: + +The option 'KeyAttr => { user => "+login" }' will cause this XML: + + <opt> + <user login="grep" fullname="Gary R Epstein" /> + <user login="stty" fullname="Simon T Tyson" /> + </opt> + +to parse to this data structure: + + { + 'user' => { + 'stty' => { + 'fullname' => 'Simon T Tyson', + 'login' => 'stty' + }, + 'grep' => { + 'fullname' => 'Gary R Epstein', + 'login' => 'grep' + } + } + } + +The '+' indicates that the value of the key attribute should be copied rather +than moved to the folded hash key. + +A '-' prefix would produce this result: + + { + 'user' => { + 'stty' => { + 'fullname' => 'Simon T Tyson', + '-login' => 'stty' + }, + 'grep' => { + 'fullname' => 'Gary R Epstein', + '-login' => 'grep' + } + } + } + +As described earlier, C<XMLout> will ignore hash keys starting with a '-'. + +=head2 NoAttr => 1 I<# in+out - handy> + +When used with C<XMLout()>, the generated XML will contain no attributes. +All hash key/values will be represented as nested elements instead. + +When used with C<XMLin()>, any attributes in the XML will be ignored. + +=head2 NoEscape => 1 I<# out - seldom used> + +By default, C<XMLout()> will translate the characters 'E<lt>', 'E<gt>', '&' and +'"' to '<', '>', '&' and '"' respectively. Use this option to +suppress escaping (presumably because you've already escaped the data in some +more sophisticated manner). + +=head2 NoIndent => 1 I<# out - seldom used> + +Set this option to 1 to disable C<XMLout()>'s default 'pretty printing' mode. +With this option enabled, the XML output will all be on one line (unless there +are newlines in the data) - this may be easier for downstream processing. + +=head2 NoSort => 1 I<# out - seldom used> + +Newer versions of XML::Simple sort elements and attributes alphabetically (*), +by default. Enable this option to suppress the sorting - possibly for +backwards compatibility. + +* Actually, sorting is alphabetical but 'key' attribute or element names (as in +'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements +are sorted alphabetically by the value of the key field. + +=head2 NormaliseSpace => 0 | 1 | 2 I<# in - handy> + +This option controls how whitespace in text content is handled. Recognised +values for the option are: + +=over 4 + +=item * + +0 = (default) whitespace is passed through unaltered (except of course for the +normalisation of whitespace in attribute values which is mandated by the XML +recommendation) + +=item * + +1 = whitespace is normalised in any value used as a hash key (normalising means +removing leading and trailing whitespace and collapsing sequences of whitespace +characters to a single space) + +=item * + +2 = whitespace is normalised in all text content + +=back + +Note: you can spell this option with a 'z' if that is more natural for you. + +=head2 NSExpand => 1 I<# in+out handy - SAX only> + +This option controls namespace expansion - the translation of element and +attribute names of the form 'prefix:name' to '{uri}name'. For example the +element name 'xsl:template' might be expanded to: +'{http://www.w3.org/1999/XSL/Transform}template'. + +By default, C<XMLin()> will return element names and attribute names exactly as +they appear in the XML. Setting this option to 1 will cause all element and +attribute names to be expanded to include their namespace prefix. + +I<Note: You must be using a SAX parser for this option to work (ie: it does not +work with XML::Parser)>. + +This option also controls whether C<XMLout()> performs the reverse translation +from '{uri}name' back to 'prefix:name'. The default is no translation. If +your data contains expanded names, you should set this option to 1 otherwise +C<XMLout> will emit XML which is not well formed. + +I<Note: You must have the XML::NamespaceSupport module installed if you want +C<XMLout()> to translate URIs back to prefixes>. + +=head2 NumericEscape => 0 | 1 | 2 I<# out - handy> + +Use this option to have 'high' (non-ASCII) characters in your Perl data +structure converted to numeric entities (eg: €) in the XML output. Three +levels are possible: + +0 - default: no numeric escaping (OK if you're writing out UTF8) + +1 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output + +2 - all characters above 0x7F are escaped (good for plain ASCII output) + +=head2 OutputFile => <file specifier> I<# out - handy> + +The default behaviour of C<XMLout()> is to return the XML as a string. If you +wish to write the XML to a file, simply supply the filename using the +'OutputFile' option. + +This option also accepts an IO handle object - especially useful in Perl 5.8.0 +and later for output using an encoding other than UTF-8, eg: + + open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!"; + XMLout($ref, OutputFile => $fh); + +=head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this> + +I<Note: This option is now officially deprecated. If you find it useful, email +the author with an example of what you use it for. Do not use this option to +set the ProtocolEncoding, that's just plain wrong - fix the XML>. + +This option allows you to pass parameters to the constructor of the underlying +XML::Parser object (which of course assumes you're not using SAX). + +=head2 RootName => 'string' I<# out - handy> + +By default, when C<XMLout()> generates XML, the root element will be named +'opt'. This option allows you to specify an alternative name. + +Specifying either undef or the empty string for the RootName option will +produce XML with no root elements. In most cases the resulting XML fragment +will not be 'well formed' and therefore could not be read back in by C<XMLin()>. +Nevertheless, the option has been found to be useful in certain circumstances. + +=head2 SearchPath => [ list ] I<# in - handy> + +If you pass C<XMLin()> a filename, but the filename include no directory +component, you can use this option to specify which directories should be +searched to locate the file. You might use this option to search first in the +user's home directory, then in a global directory such as /etc. + +If a filename is provided to C<XMLin()> but SearchPath is not defined, the +file is assumed to be in the current directory. + +If the first parameter to C<XMLin()> is undefined, the default SearchPath +will contain only the directory in which the script itself is located. +Otherwise the default SearchPath will be empty. + +=head2 SuppressEmpty => 1 | '' | undef I<# in+out - handy> + +This option controls what C<XMLin()> should do with empty elements (no +attributes and no content). The default behaviour is to represent them as +empty hashes. Setting this option to a true value (eg: 1) will cause empty +elements to be skipped altogether. Setting the option to 'undef' or the empty +string will cause empty elements to be represented as the undefined value or +the empty string respectively. The latter two alternatives are a little +easier to test for in your code than a hash with no keys. + +The option also controls what C<XMLout()> does with undefined values. Setting +the option to undef causes undefined values to be output as empty elements +(rather than empty attributes), it also suppresses the generation of warnings +about undefined values. Setting the option to a true value (eg: 1) causes +undefined values to be skipped altogether on output. + +=head2 ValueAttr => [ names ] I<# in - handy> + +Use this option to deal elements which always have a single attribute and no +content. Eg: + + <opt> + <colour value="red" /> + <size value="XXL" /> + </opt> + +Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to: + + { + colour => 'red', + size => 'XXL' + } + +instead of this (the default): + + { + colour => { value => 'red' }, + size => { value => 'XXL' } + } + +Note: This form of the ValueAttr option is not compatible with C<XMLout()> - +since the attribute name is discarded at parse time, the original XML cannot be +reconstructed. + +=head2 ValueAttr => { element => attribute, ... } I<# in+out - handy> + +This (preferred) form of the ValueAttr option requires you to specify both +the element and the attribute names. This is not only safer, it also allows +the original XML to be reconstructed by C<XMLout()>. + +Note: You probably don't want to use this option and the NoAttr option at the +same time. + +=head2 Variables => { name => value } I<# in - handy> + +This option allows variables in the XML to be expanded when the file is read. +(there is no facility for putting the variable names back if you regenerate +XML using C<XMLout>). + +A 'variable' is any text of the form C<${name}> which occurs in an attribute +value or in the text content of an element. If 'name' matches a key in the +supplied hashref, C<${name}> will be replaced with the corresponding value from +the hashref. If no matching key is found, the variable will not be replaced. + +=head2 VarAttr => 'attr_name' I<# in - handy> + +In addition to the variables defined using C<Variables>, this option allows +variables to be defined in the XML. A variable definition consists of an +element with an attribute called 'attr_name' (the value of the C<VarAttr> +option). The value of the attribute will be used as the variable name and the +text content of the element will be used as the value. A variable defined in +this way will override a variable defined using the C<Variables> option. For +example: + + XMLin( '<opt> + <dir name="prefix">/usr/local/apache</dir> + <dir name="exec_prefix">${prefix}</dir> + <dir name="bindir">${exec_prefix}/bin</dir> + </opt>', + VarAttr => 'name', ContentKey => '-content' + ); + +produces the following data structure: + + { + dir => { + prefix => '/usr/local/apache', + exec_prefix => '/usr/local/apache', + bindir => '/usr/local/apache/bin', + } + } + +=head2 XMLDecl => 1 or XMLDecl => 'string' I<# out - handy> + +If you want the output from C<XMLout()> to start with the optional XML +declaration, simply set the option to '1'. The default XML declaration is: + + <?xml version='1.0' standalone='yes'?> + +If you want some other string (for example to declare an encoding value), set +the value of this option to the complete string you require. + + +=head1 OPTIONAL OO INTERFACE + +The procedural interface is both simple and convenient however there are a +couple of reasons why you might prefer to use the object oriented (OO) +interface: + +=over 4 + +=item * + +to define a set of default values which should be used on all subsequent calls +to C<XMLin()> or C<XMLout()> + +=item * + +to override methods in B<XML::Simple> to provide customised behaviour + +=back + +The default values for the options described above are unlikely to suit +everyone. The OO interface allows you to effectively override B<XML::Simple>'s +defaults with your preferred values. It works like this: + +First create an XML::Simple parser object with your preferred defaults: + + my $xs = new XML::Simple(ForceArray => 1, KeepRoot => 1); + +then call C<XMLin()> or C<XMLout()> as a method of that object: + + my $ref = $xs->XMLin($xml); + my $xml = $xs->XMLout($ref); + +You can also specify options when you make the method calls and these values +will be merged with the values specified when the object was created. Values +specified in a method call take precedence. + +Overriding methods is a more advanced topic but might be useful if for example +you wished to provide an alternative routine for escaping character data (the +escape_value method) or for building the initial parse tree (the build_tree +method). + +Note: when called as methods, the C<XMLin()> and C<XMLout()> routines may be +called as C<xml_in()> or C<xml_out()>. The method names are aliased so the +only difference is the aesthetics. + +=head1 STRICT MODE + +If you import the B<XML::Simple> routines like this: + + use XML::Simple qw(:strict); + +the following common mistakes will be detected and treated as fatal errors + +=over 4 + +=item * + +Failing to explicitly set the C<KeyAttr> option - if you can't be bothered +reading about this option, turn it off with: KeyAttr => [ ] + +=item * + +Failing to explicitly set the C<ForceArray> option - if you can't be bothered +reading about this option, set it to the safest mode with: ForceArray => 1 + +=item * + +Setting ForceArray to an array, but failing to list all the elements from the +KeyAttr hash. + +=item * + +Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains +one or more E<lt>partE<gt> elements without a 'partnum' attribute (or nested +element). Note: if strict mode is not set but -w is, this condition triggers a +warning. + +=item * + +Data error - as above, but value of key attribute (eg: partnum) is not a +scalar string (due to nested elements etc). This will also trigger a warning +if strict mode is not enabled. + +=back + +=head1 SAX SUPPORT + +From version 1.08_01, B<XML::Simple> includes support for SAX (the Simple API +for XML) - specifically SAX2. + +In a typical SAX application, an XML parser (or SAX 'driver') module generates +SAX events (start of element, character data, end of element, etc) as it parses +an XML document and a 'handler' module processes the events to extract the +required data. This simple model allows for some interesting and powerful +possibilities: + +=over 4 + +=item * + +Applications written to the SAX API can extract data from huge XML documents +without the memory overheads of a DOM or tree API. + +=item * + +The SAX API allows for plug and play interchange of parser modules without +having to change your code to fit a new module's API. A number of SAX parsers +are available with capabilities ranging from extreme portability to blazing +performance. + +=item * + +A SAX 'filter' module can implement both a handler interface for receiving +data and a generator interface for passing modified data on to a downstream +handler. Filters can be chained together in 'pipelines'. + +=item * + +One filter module might split a data stream to direct data to two or more +downstream handlers. + +=item * + +Generating SAX events is not the exclusive preserve of XML parsing modules. +For example, a module might extract data from a relational database using DBI +and pass it on to a SAX pipeline for filtering and formatting. + +=back + +B<XML::Simple> can operate at either end of a SAX pipeline. For example, +you can take a data structure in the form of a hashref and pass it into a +SAX pipeline using the 'Handler' option on C<XMLout()>: + + use XML::Simple; + use Some::SAX::Filter; + use XML::SAX::Writer; + + my $ref = { + .... # your data here + }; + + my $writer = XML::SAX::Writer->new(); + my $filter = Some::SAX::Filter->new(Handler => $writer); + my $simple = XML::Simple->new(Handler => $filter); + $simple->XMLout($ref); + +You can also put B<XML::Simple> at the opposite end of the pipeline to take +advantage of the simple 'tree' data structure once the relevant data has been +isolated through filtering: + + use XML::SAX; + use Some::SAX::Filter; + use XML::Simple; + + my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']); + my $filter = Some::SAX::Filter->new(Handler => $simple); + my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); + + my $ref = $parser->parse_uri('some_huge_file.xml'); + + print $ref->{part}->{'555-1234'}; + +You can build a filter by using an XML::Simple object as a handler and setting +its DataHandler option to point to a routine which takes the resulting tree, +modifies it and sends it off as SAX events to a downstream handler: + + my $writer = XML::SAX::Writer->new(); + my $filter = XML::Simple->new( + DataHandler => sub { + my $simple = shift; + my $data = shift; + + # Modify $data here + + $simple->XMLout($data, Handler => $writer); + } + ); + my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); + + $parser->parse_uri($filename); + +I<Note: In this last example, the 'Handler' option was specified in the call to +C<XMLout()> but it could also have been specified in the constructor>. + +=head1 ENVIRONMENT + +If you don't care which parser module B<XML::Simple> uses then skip this +section entirely (it looks more complicated than it really is). + +B<XML::Simple> will default to using a B<SAX> parser if one is available or +B<XML::Parser> if SAX is not available. + +You can dictate which parser module is used by setting either the environment +variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable +$XML::Simple::PREFERRED_PARSER to contain the module name. The following rules +are used: + +=over 4 + +=item * + +The package variable takes precedence over the environment variable if both are defined. To force B<XML::Simple> to ignore the environment settings and use +its default rules, you can set the package variable to an empty string. + +=item * + +If the 'preferred parser' is set to the string 'XML::Parser', then +L<XML::Parser> will be used (or C<XMLin()> will die if L<XML::Parser> is not +installed). + +=item * + +If the 'preferred parser' is set to some other value, then it is assumed to be +the name of a SAX parser module and is passed to L<XML::SAX::ParserFactory.> +If L<XML::SAX> is not installed, or the requested parser module is not +installed, then C<XMLin()> will die. + +=item * + +If the 'preferred parser' is not defined at all (the normal default +state), an attempt will be made to load L<XML::SAX>. If L<XML::SAX> is +installed, then a parser module will be selected according to +L<XML::SAX::ParserFactory>'s normal rules (which typically means the last SAX +parser installed). + +=item * + +if the 'preferred parser' is not defined and B<XML::SAX> is not +installed, then B<XML::Parser> will be used. C<XMLin()> will die if +L<XML::Parser> is not installed. + +=back + +Note: The B<XML::SAX> distribution includes an XML parser written entirely in +Perl. It is very portable but it is not very fast. You should consider +installing L<XML::LibXML> or L<XML::SAX::Expat> if they are available for your +platform. + +=head1 ERROR HANDLING + +The XML standard is very clear on the issue of non-compliant documents. An +error in parsing any single element (for example a missing end tag) must cause +the whole document to be rejected. B<XML::Simple> will die with an appropriate +message if it encounters a parsing error. + +If dying is not appropriate for your application, you should arrange to call +C<XMLin()> in an eval block and look for errors in $@. eg: + + my $config = eval { XMLin() }; + PopUpMessage($@) if($@); + +Note, there is a common misconception that use of B<eval> will significantly +slow down a script. While that may be true when the code being eval'd is in a +string, it is not true of code like the sample above. + +=head1 EXAMPLES + +When C<XMLin()> reads the following very simple piece of XML: + + <opt username="testuser" password="frodo"></opt> + +it returns the following data structure: + + { + 'username' => 'testuser', + 'password' => 'frodo' + } + +The identical result could have been produced with this alternative XML: + + <opt username="testuser" password="frodo" /> + +Or this (although see 'ForceArray' option for variations): + + <opt> + <username>testuser</username> + <password>frodo</password> + </opt> + +Repeated nested elements are represented as anonymous arrays: + + <opt> + <person firstname="Joe" lastname="Smith"> + <email>joe@smith.com</email> + <email>jsmith@yahoo.com</email> + </person> + <person firstname="Bob" lastname="Smith"> + <email>bob@smith.com</email> + </person> + </opt> + + { + 'person' => [ + { + 'email' => [ + 'joe@smith.com', + 'jsmith@yahoo.com' + ], + 'firstname' => 'Joe', + 'lastname' => 'Smith' + }, + { + 'email' => 'bob@smith.com', + 'firstname' => 'Bob', + 'lastname' => 'Smith' + } + ] + } + +Nested elements with a recognised key attribute are transformed (folded) from +an array into a hash keyed on the value of that attribute (see the C<KeyAttr> +option): + + <opt> + <person key="jsmith" firstname="Joe" lastname="Smith" /> + <person key="tsmith" firstname="Tom" lastname="Smith" /> + <person key="jbloggs" firstname="Joe" lastname="Bloggs" /> + </opt> + + { + 'person' => { + 'jbloggs' => { + 'firstname' => 'Joe', + 'lastname' => 'Bloggs' + }, + 'tsmith' => { + 'firstname' => 'Tom', + 'lastname' => 'Smith' + }, + 'jsmith' => { + 'firstname' => 'Joe', + 'lastname' => 'Smith' + } + } + } + + +The <anon> tag can be used to form anonymous arrays: + + <opt> + <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head> + <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data> + <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data> + <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data> + </opt> + + { + 'head' => [ + [ 'Col 1', 'Col 2', 'Col 3' ] + ], + 'data' => [ + [ 'R1C1', 'R1C2', 'R1C3' ], + [ 'R2C1', 'R2C2', 'R2C3' ], + [ 'R3C1', 'R3C2', 'R3C3' ] + ] + } + +Anonymous arrays can be nested to arbirtrary levels and as a special case, if +the surrounding tags for an XML document contain only an anonymous array the +arrayref will be returned directly rather than the usual hashref: + + <opt> + <anon><anon>Col 1</anon><anon>Col 2</anon></anon> + <anon><anon>R1C1</anon><anon>R1C2</anon></anon> + <anon><anon>R2C1</anon><anon>R2C2</anon></anon> + </opt> + + [ + [ 'Col 1', 'Col 2' ], + [ 'R1C1', 'R1C2' ], + [ 'R2C1', 'R2C2' ] + ] + +Elements which only contain text content will simply be represented as a +scalar. Where an element has both attributes and text content, the element +will be represented as a hashref with the text content in the 'content' key +(see the C<ContentKey> option): + + <opt> + <one>first</one> + <two attr="value">second</two> + </opt> + + { + 'one' => 'first', + 'two' => { 'attr' => 'value', 'content' => 'second' } + } + +Mixed content (elements which contain both text content and nested elements) +will be not be represented in a useful way - element order and significant +whitespace will be lost. If you need to work with mixed content, then +XML::Simple is not the right tool for your job - check out the next section. + +=head1 WHERE TO FROM HERE? + +B<XML::Simple> is able to present a simple API because it makes some +assumptions on your behalf. These include: + +=over 4 + +=item * + +You're not interested in text content consisting only of whitespace + +=item * + +You don't mind that when things get slurped into a hash the order is lost + +=item * + +You don't want fine-grained control of the formatting of generated XML + +=item * + +You would never use a hash key that was not a legal XML element name + +=item * + +You don't need help converting between different encodings + +=back + +In a serious XML project, you'll probably outgrow these assumptions fairly +quickly. This section of the document used to offer some advice on chosing a +more powerful option. That advice has now grown into the 'Perl-XML FAQ' +document which you can find at: L<http://perl-xml.sourceforge.net/faq/> + +The advice in the FAQ boils down to a quick explanation of tree versus +event based parsers and then recommends: + +For event based parsing, use SAX (do not set out to write any new code for +XML::Parser's handler API - it is obselete). + +For tree-based parsing, you could choose between the 'Perlish' approach of +L<XML::Twig> and more standards based DOM implementations - preferably one with +XPath support. + + +=head1 SEE ALSO + +B<XML::Simple> requires either L<XML::Parser> or L<XML::SAX>. + +To generate documents with namespaces, L<XML::NamespaceSupport> is required. + +The optional caching functions require L<Storable>. + +Answers to Frequently Asked Questions about XML::Simple are bundled with this +distribution as: L<XML::Simple::FAQ> + +=head1 COPYRIGHT + +Copyright 1999-2004 Grant McLean E<lt>grantm@cpan.orgE<gt> + +This library is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + +=cut + + diff --git a/lib/XML/Stream.pm b/lib/XML/Stream.pm new file mode 100644 index 0000000..f95f784 --- /dev/null +++ b/lib/XML/Stream.pm @@ -0,0 +1,3268 @@ +############################################################################## +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. +# +# Jabber +# Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ +# +############################################################################## + +package XML::Stream; + +=head1 NAME + +XML::Stream - Creates and XML Stream connection and parses return data + +=head1 SYNOPSIS + + XML::Stream is an attempt at solidifying the use of XML via streaming. + +=head1 DESCRIPTION + + This module provides the user with methods to connect to a remote + server, send a stream of XML to the server, and receive/parse an XML + stream from the server. It is primarily based work for the Etherx XML + router developed by the Jabber Development Team. For more information + about this project visit http://etherx.jabber.org/stream/. + + XML::Stream gives the user the ability to define a central callback + that will be used to handle the tags received from the server. These + tags are passed in the format defined at instantiation time. + the closing tag of an object is seen, the tree is finished and passed + to the call back function. What the user does with it from there is up + to them. + + For a detailed description of how this module works, and about the data + structure that it returns, please view the source of Stream.pm and + look at the detailed description at the end of the file. + + + NOTE: The parser that XML::Stream::Parser provides, as are most Perl + parsers, is synchronous. If you are in the middle of parsing a + packet and call a user defined callback, the Parser is blocked until + your callback finishes. This means you cannot be operating on a + packet, send out another packet and wait for a response to that packet. + It will never get to you. Threading might solve this, but as we all + know threading in Perl is not quite up to par yet. This issue will be + revisted in the future. + + + +=head1 METHODS + + new(debug=>string, - creates the XML::Stream object. debug + debugfh=>FileHandle, should be set to the path for the debug log + debuglevel=>0|1|N, to be written. If set to "stdout" then the + debugtime=>0|1, debug will go there. Also, you can specify + style=>string) a filehandle that already exists byt using + debugfh. debuglevel determines the amount + of debug to generate. 0 is the least, 1 is + a little more, N is the limit you want. + debugtime determines wether a timestamp + should be preappended to the entry. style + defines the way the data structure is + returned. The two available styles are: + + tree - XML::Parser Tree format + node - XML::Stream::Node format + + For more information see the respective man + pages. + + Connect(hostname=>string, - opens a tcp connection to the + port=>integer, specified server and sends the proper + to=>string, opening XML Stream tag. hostname, + from=>string, port, and namespace are required. + myhostname=>string, namespaces allows you to use + namespace=>string, XML::Stream::Namespace objects. + namespaces=>array, to is needed if you want the stream + connectiontype=>string, to attribute to be something other + ssl=>0|1, than the hostname you are connecting + srv=>string) to. from is needed if you want the + stream from attribute to be something + other than the hostname you are + connecting from. myhostname should + not be needed but if the module + cannot determine your hostname + properly (check the debug log), set + this to the correct value, or if you + want the other side of the stream to + think that you are someone else. The + type determines the kind of + connection that is made: + "tcpip" - TCP/IP (default) + "stdinout" - STDIN/STDOUT + "http" - HTTP + HTTP recognizes proxies if the ENV + variables http_proxy or https_proxy + are set. ssl specifies if an SLL + socket should be used for encrypted + communications. This function + returns the same hash from GetRoot() + below. Make sure you get the SID + (Session ID) since you have to use it + to call most other functions in here. + + If srv is specified AND Net::DNS is + installed and can be loaded, then + an SRV query is sent to srv.hostname + and the results processed to replace + the hostname and port. If the lookup + fails, or Net::DNS cannot be loaded, + then hostname and port are left alone + as the defaults. + + + OpenFile(string) - opens a filehandle to the argument specified, and + pretends that it is a stream. It will ignore the + outer tag, and not check if it was a + <stream:stream/>. This is useful for writing a + program that has to parse any XML file that is + basically made up of small packets (like RDF). + + Disconnect(sid) - sends the proper closing XML tag and closes the + specified socket down. + + Process(integer) - waits for data to be available on the socket. If + a timeout is specified then the Process function + waits that period of time before returning nothing. + If a timeout period is not specified then the + function blocks until data is received. The + function returns a hash with session ids as the key, + and status values or data as the hash values. + + SetCallBacks(node=>function, - sets the callback that should be + update=>function) called in various situations. node + is used to handle the data structures + that are built for each top level tag. + Update is used for when Process is + blocking waiting for data, but you + want your original code to be updated. + + GetRoot(sid) - returns the attributes that the stream:stream tag sent + by the other end listed in a hash for the specified + session. + + GetSock(sid) - returns a pointer to the IO::Socket object for the + specified session. + + Send(sid, - sends the string over the specified connection as is. + string) This does no checking if valid XML was sent or not. + Best behavior when sending information. + + GetErrorCode(sid) - returns a string for the specified session that + will hopefully contain some useful information + about why Process or Connect returned an undef + to you. + + XPath(node,path) - returns an array of results that match the xpath. + node can be any of the three types (Tree, Node). + +=head1 VARIABLES + + $NONBLOCKING - tells the Parser to enter into a nonblocking state. This + might cause some funky behavior since you can get nested + callbacks while things are waiting. 1=on, 0=off(default). + +=head1 EXAMPLES + + ########################## + # simple example + + use XML::Stream qw( Tree ); + + $stream = new XML::Stream; + + my $status = $stream->Connect(hostname => "jabber.org", + port => 5222, + namespace => "jabber:client"); + + if (!defined($status)) { + print "ERROR: Could not connect to server\n"; + print " (",$stream->GetErrorCode(),")\n"; + exit(0); + } + + while($node = $stream->Process()) { + # do something with $node + } + + $stream->Disconnect(); + + + ########################### + # example using a handler + + use XML::Stream qw( Tree ); + + $stream = new XML::Stream; + $stream->SetCallBacks(node=>\&noder); + $stream->Connect(hostname => "jabber.org", + port => 5222, + namespace => "jabber:client", + timeout => undef) || die $!; + + # Blocks here forever, noder is called for incoming + # packets when they arrive. + while(defined($stream->Process())) { } + + print "ERROR: Stream died (",$stream->GetErrorCode(),")\n"; + + sub noder + { + my $sid = shift; + my $node = shift; + # do something with $node + } + +=head1 AUTHOR + +Tweaked, tuned, and brightness changes by Ryan Eatmon, reatmon@ti.com +in May of 2000. +Colorized, and Dolby Surround sound added by Thomas Charron, +tcharron@jabber.org +By Jeremie in October of 1999 for http://etherx.jabber.org/streams/ + +=head1 COPYRIGHT + +This module is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut + +use 5.006_001; +use strict; +use Sys::Hostname; +use IO::Socket; +use IO::Select; +use FileHandle; +use Carp; +use POSIX; +use Authen::SASL; +use MIME::Base64; +use utf8; +use Encode; + +$SIG{PIPE} = "IGNORE"; + +use vars qw($VERSION $PAC $SSL $NONBLOCKING %HANDLERS $NETDNS %XMLNS ); + +############################################################################## +# Define the namespaces in an easy/constant manner. +#----------------------------------------------------------------------------- +# 0.9 +#----------------------------------------------------------------------------- +$XMLNS{'stream'} = "http://etherx.jabber.org/streams"; + +#----------------------------------------------------------------------------- +# 1.0 +#----------------------------------------------------------------------------- +$XMLNS{'xmppstreams'} = "urn:ietf:params:xml:ns:xmpp-streams"; +$XMLNS{'xmpp-bind'} = "urn:ietf:params:xml:ns:xmpp-bind"; +$XMLNS{'xmpp-sasl'} = "urn:ietf:params:xml:ns:xmpp-sasl"; +$XMLNS{'xmpp-session'} = "urn:ietf:params:xml:ns:xmpp-session"; +$XMLNS{'xmpp-tls'} = "urn:ietf:params:xml:ns:xmpp-tls"; +############################################################################## + + +if (eval "require Net::DNS;" ) +{ + require Net::DNS; + import Net::DNS; + $NETDNS = 1; +} +else +{ + $NETDNS = 0; +} + + +$VERSION = "1.22"; +$NONBLOCKING = 0; + +use XML::Stream::Namespace; +use XML::Stream::Parser; +use XML::Stream::XPath; + +############################################################################## +# +# Setup the exportable objects +# +############################################################################## +require Exporter; +my @ISA = qw(Exporter); +my @EXPORT_OK = qw(Tree Node); + +sub import +{ + my $class = shift; + + foreach my $module (@_) + { + eval "use XML::Stream::$module;"; + die($@) if ($@); + + my $lc = lc($module); + + eval("\$HANDLERS{\$lc}->{startElement} = \\&XML::Stream::${module}::_handle_element;"); + eval("\$HANDLERS{\$lc}->{endElement} = \\&XML::Stream::${module}::_handle_close;"); + eval("\$HANDLERS{\$lc}->{characters} = \\&XML::Stream::${module}::_handle_cdata;"); + } +} + + +sub new +{ + my $proto = shift; + my $self = { }; + + bless($self,$proto); + + my %args; + while($#_ >= 0) { $args{ lc pop(@_) } = pop(@_); } + + $self->{DATASTYLE} = "tree"; + $self->{DATASTYLE} = delete($args{style}) if exists($args{style}); + + if ((($self->{DATASTYLE} eq "tree") && !defined($XML::Stream::Tree::LOADED)) || + (($self->{DATASTYLE} eq "node") && !defined($XML::Stream::Node::LOADED)) + ) + { + croak("The style that you have chosen was not defined when you \"use\"d the module.\n"); + } + + $self->{DEBUGARGS} = \%args; + + $self->{DEBUGTIME} = 0; + $self->{DEBUGTIME} = $args{debugtime} if exists($args{debugtime}); + + $self->{DEBUGLEVEL} = 0; + $self->{DEBUGLEVEL} = $args{debuglevel} if exists($args{debuglevel}); + + $self->{DEBUGFILE} = ""; + + if (exists($args{debugfh}) && ($args{debugfh} ne "")) + { + $self->{DEBUGFILE} = $args{debugfh}; + $self->{DEBUG} = 1; + } + if ((exists($args{debugfh}) && ($args{debugfh} eq "")) || + (exists($args{debug}) && ($args{debug} ne ""))) + { + $self->{DEBUG} = 1; + if (lc($args{debug}) eq "stdout") + { + $self->{DEBUGFILE} = new FileHandle(">&STDERR"); + $self->{DEBUGFILE}->autoflush(1); + } + else + { + if (-e $args{debug}) + { + if (-w $args{debug}) + { + $self->{DEBUGFILE} = new FileHandle(">$args{debug}"); + $self->{DEBUGFILE}->autoflush(1); + } + else + { + print "WARNING: debug file ($args{debug}) is not writable by you\n"; + print " No debug information being saved.\n"; + $self->{DEBUG} = 0; + } + } + else + { + $self->{DEBUGFILE} = new FileHandle(">$args{debug}"); + if (defined($self->{DEBUGFILE})) + { + $self->{DEBUGFILE}->autoflush(1); + } + else + { + print "WARNING: debug file ($args{debug}) does not exist \n"; + print " and is not writable by you.\n"; + print " No debug information being saved.\n"; + $self->{DEBUG} = 0; + } + } + } + } + + my $hostname = hostname(); + my $address = gethostbyname($hostname) || 'localhost'; + my $fullname = gethostbyaddr($address,AF_INET) || $hostname; + + $self->debug(1,"new: hostname = ($fullname)"); + + #--------------------------------------------------------------------------- + # Setup the defaults that the module will work with. + #--------------------------------------------------------------------------- + $self->{SIDS}->{default}->{hostname} = ""; + $self->{SIDS}->{default}->{port} = ""; + $self->{SIDS}->{default}->{sock} = 0; + $self->{SIDS}->{default}->{ssl} = (exists($args{ssl}) ? $args{ssl} : 0); + $self->{SIDS}->{default}->{namespace} = ""; + $self->{SIDS}->{default}->{myhostname} = $fullname; + $self->{SIDS}->{default}->{derivedhostname} = $fullname; + $self->{SIDS}->{default}->{id} = ""; + + #--------------------------------------------------------------------------- + # We are only going to use one callback, let the user call other callbacks + # on his own. + #--------------------------------------------------------------------------- + $self->SetCallBacks(node=>sub { $self->_node(@_) }); + + $self->{IDCOUNT} = 0; + + return $self; +} + + + + +############################################################################## +#+---------------------------------------------------------------------------- +#| +#| Incoming Connection Functions +#| +#+---------------------------------------------------------------------------- +############################################################################## + +############################################################################## +# +# Listen - starts the stream by listening on a port for someone to connect, +# and send the opening stream tag, and then sending a response based +# on if the received header was correct for this stream. Server +# name, port, and namespace are required otherwise we don't know +# where to listen and what namespace to accept. +# +############################################################################## +sub Listen +{ + my $self = shift; + my %args; + while($#_ >= 0) { $args{ lc pop(@_) } = pop(@_); } + + my $serverid = "server$args{port}"; + + return if exists($self->{SIDS}->{$serverid}); + + push(@{$self->{SIDS}->{server}},$serverid); + + foreach my $key (keys(%{$self->{SIDS}->{default}})) + { + $self->{SIDS}->{$serverid}->{$key} = $self->{SIDS}->{default}->{$key}; + } + + foreach my $key (keys(%args)) + { + $self->{SIDS}->{$serverid}->{$key} = $args{$key}; + } + + $self->debug(1,"Listen: start"); + + if ($self->{SIDS}->{$serverid}->{namespace} eq "") + { + $self->SetErrorCode($serverid,"Namespace not specified"); + return; + } + + #--------------------------------------------------------------------------- + # Check some things that we have to know in order get the connection up + # and running. Server hostname, port number, namespace, etc... + #--------------------------------------------------------------------------- + if ($self->{SIDS}->{$serverid}->{hostname} eq "") + { + $self->SetErrorCode("$serverid","Server hostname not specified"); + return; + } + if ($self->{SIDS}->{$serverid}->{port} eq "") + { + $self->SetErrorCode("$serverid","Server port not specified"); + return; + } + if ($self->{SIDS}->{$serverid}->{myhostname} eq "") + { + $self->{SIDS}->{$serverid}->{myhostname} = $self->{SIDS}->{$serverid}->{derivedhostname}; + } + + #------------------------------------------------------------------------- + # Open the connection to the listed server and port. If that fails then + # abort ourselves and let the user check $! on his own. + #------------------------------------------------------------------------- + + while($self->{SIDS}->{$serverid}->{sock} == 0) + { + $self->{SIDS}->{$serverid}->{sock} = + new IO::Socket::INET(LocalHost=>$self->{SIDS}->{$serverid}->{hostname}, + LocalPort=>$self->{SIDS}->{$serverid}->{port}, + Reuse=>1, + Listen=>10, + Proto=>'tcp'); + select(undef,undef,undef,.1); + } + $self->{SIDS}->{$serverid}->{status} = 1; + $self->nonblock($self->{SIDS}->{$serverid}->{sock}); + $self->{SIDS}->{$serverid}->{sock}->autoflush(1); + + $self->{SELECT} = + new IO::Select($self->{SIDS}->{$serverid}->{sock}); + $self->{SIDS}->{$serverid}->{select} = + new IO::Select($self->{SIDS}->{$serverid}->{sock}); + + $self->{SOCKETS}->{$self->{SIDS}->{$serverid}->{sock}} = "$serverid"; + + return $serverid; +} + + +############################################################################## +# +# ConnectionAccept - accept an incoming connection. +# +############################################################################## +sub ConnectionAccept +{ + my $self = shift; + my $serverid = shift; + + my $sid = $self->NewSID(); + + $self->debug(1,"ConnectionAccept: sid($sid)"); + + $self->{SIDS}->{$sid}->{sock} = $self->{SIDS}->{$serverid}->{sock}->accept(); + + $self->nonblock($self->{SIDS}->{$sid}->{sock}); + $self->{SIDS}->{$sid}->{sock}->autoflush(1); + + $self->debug(3,"ConnectionAccept: sid($sid) client($self->{SIDS}->{$sid}->{sock}) server($self->{SIDS}->{$serverid}->{sock})"); + + $self->{SELECT}->add($self->{SIDS}->{$sid}->{sock}); + + #------------------------------------------------------------------------- + # Create the XML::Stream::Parser and register our callbacks + #------------------------------------------------------------------------- + $self->{SIDS}->{$sid}->{parser} = + new XML::Stream::Parser(%{$self->{DEBUGARGS}}, + nonblocking=>$NONBLOCKING, + sid=>$sid, + style=>$self->{DATASTYLE}, + Handlers=>{ + startElement=>sub{ $self->_handle_root(@_) }, + endElement=>sub{ &{$HANDLERS{$self->{DATASTYLE}}->{endElement}}($self,@_) }, + characters=>sub{ &{$HANDLERS{$self->{DATASTYLE}}->{characters}}($self,@_) }, + } + ); + + $self->{SIDS}->{$sid}->{select} = + new IO::Select($self->{SIDS}->{$sid}->{sock}); + $self->{SIDS}->{$sid}->{connectiontype} = "tcpip"; + $self->{SOCKETS}->{$self->{SIDS}->{$sid}->{sock}} = $sid; + + $self->InitConnection($sid,$serverid); + + #--------------------------------------------------------------------------- + # Grab the init time so that we can check if we get data in the timeout + # period or not. + #--------------------------------------------------------------------------- + $self->{SIDS}->{$sid}->{activitytimeout} = time; + + return $sid; +} + + +############################################################################## +# +# Respond - If this is a listening socket then we need to respond to the +# opening <stream:stream/>. +# +############################################################################## +sub Respond +{ + my $self = shift; + my $sid = shift; + my $serverid = $self->{SIDS}->{$sid}->{serverid}; + + my $root = $self->GetRoot($sid); + + if ($root->{xmlns} ne $self->{SIDS}->{$serverid}->{namespace}) + { + my $error = $self->StreamError($sid,"invalid-namespace","Invalid namespace specified"); + $self->Send($sid,$error); + + $self->{SIDS}->{$sid}->{sock}->flush(); + select(undef,undef,undef,1); + $self->Disconnect($sid); + } + + #--------------------------------------------------------------------------- + # Next, we build the opening handshake. + #--------------------------------------------------------------------------- + my %stream_args; + + $stream_args{from} = + (exists($self->{SIDS}->{$serverid}->{from}) ? + $self->{SIDS}->{$serverid}->{from} : + $self->{SIDS}->{$serverid}->{hostname} + ); + + $stream_args{to} = $self->GetRoot($sid)->{from}; + $stream_args{id} = $sid; + $stream_args{namespaces} = $self->{SIDS}->{$serverid}->{namespaces}; + + my $stream = + $self->StreamHeader( + xmlns=>$self->{SIDS}->{$serverid}->{namespace}, + xmllang=>"en", + %stream_args + ); + + #--------------------------------------------------------------------------- + # Then we send the opening handshake. + #--------------------------------------------------------------------------- + $self->Send($sid,$stream); + delete($self->{SIDS}->{$sid}->{activitytimeout}); +} + + + + +############################################################################## +#+---------------------------------------------------------------------------- +#| +#| Outgoing Connection Functions +#| +#+---------------------------------------------------------------------------- +############################################################################## + +############################################################################## +# +# Connect - starts the stream by connecting to the server, sending the opening +# stream tag, and then waiting for a response and verifying that it +# is correct for this stream. Server name, port, and namespace are +# required otherwise we don't know where to send the stream to... +# +############################################################################## +sub Connect +{ + my $self = shift; + + foreach my $key (keys(%{$self->{SIDS}->{default}})) + { + $self->{SIDS}->{newconnection}->{$key} = $self->{SIDS}->{default}->{$key}; + } + while($#_ >= 0) { $self->{SIDS}->{newconnection}->{ lc pop(@_) } = pop(@_); } + + my $timeout = exists($self->{SIDS}->{newconnection}->{timeout}) ? + delete($self->{SIDS}->{newconnection}->{timeout}) : + ""; + + $self->debug(4,"Connect: timeout($timeout)"); + + + if (exists($self->{SIDS}->{newconnection}->{srv})) + { + $self->debug(1,"Connect: srv requested"); + if ($NETDNS) + { + my $res = new Net::DNS::Resolver(); + my $query = $res->query($self->{SIDS}->{newconnection}->{srv}.".".$self->{SIDS}->{newconnection}->{hostname},"SRV"); + + if ($query) + { + $self->{SIDS}->{newconnection}->{hostname} = ($query->answer)[0]->target(); + $self->{SIDS}->{newconnection}->{port} = ($query->answer)[0]->port(); + $self->debug(1,"Connect: srv host: $self->{SIDS}->{newconnection}->{hostname}"); + $self->debug(1,"Connect: srv post: $self->{SIDS}->{newconnection}->{port}"); + } + else + { + $self->debug(1,"Connect: srv query failed"); + } + } + else + { + $self->debug(1,"Connect: srv query failed"); + } + delete($self->{SIDS}->{newconnection}->{srv}); + } + + $self->{SIDS}->{newconnection}->{connectiontype} = "tcpip" + unless exists($self->{SIDS}->{newconnection}->{connectiontype}); + + $self->debug(1,"Connect: type($self->{SIDS}->{newconnection}->{connectiontype})"); + + if ($self->{SIDS}->{newconnection}->{namespace} eq "") + { + $self->SetErrorCode("newconnection","Namespace not specified"); + return; + } + + #--------------------------------------------------------------------------- + # TCP/IP + #--------------------------------------------------------------------------- + if ($self->{SIDS}->{newconnection}->{connectiontype} eq "tcpip") + { + #----------------------------------------------------------------------- + # Check some things that we have to know in order get the connection up + # and running. Server hostname, port number, namespace, etc... + #----------------------------------------------------------------------- + if ($self->{SIDS}->{newconnection}->{hostname} eq "") + { + $self->SetErrorCode("newconnection","Server hostname not specified"); + return; + } + if ($self->{SIDS}->{newconnection}->{port} eq "") + { + $self->SetErrorCode("newconnection","Server port not specified"); + return; + } + if ($self->{SIDS}->{newconnection}->{myhostname} eq "") + { + $self->{SIDS}->{newconnection}->{myhostname} = $self->{SIDS}->{newconnection}->{derivedhostname}; + } + + #----------------------------------------------------------------------- + # Open the connection to the listed server and port. If that fails then + # abort ourselves and let the user check $! on his own. + #----------------------------------------------------------------------- + $self->{SIDS}->{newconnection}->{sock} = + new IO::Socket::INET(PeerAddr=>$self->{SIDS}->{newconnection}->{hostname}, + PeerPort=>$self->{SIDS}->{newconnection}->{port}, + Proto=>"tcp", + (($timeout ne "") ? ( Timeout=>$timeout ) : ()), + ); + return unless $self->{SIDS}->{newconnection}->{sock}; + + if ($self->{SIDS}->{newconnection}->{ssl} == 1) + { + $self->debug(1,"Connect: Convert normal socket to SSL"); + $self->debug(1,"Connect: sock($self->{SIDS}->{newconnection}->{sock})"); + $self->LoadSSL(); + $self->{SIDS}->{newconnection}->{sock} = + IO::Socket::SSL::socketToSSL($self->{SIDS}->{newconnection}->{sock}, + {SSL_verify_mode=>0x00}); + $self->debug(1,"Connect: ssl_sock($self->{SIDS}->{newconnection}->{sock})"); + $self->debug(1,"Connect: SSL: We are secure") if ($self->{SIDS}->{newconnection}->{sock}); + } + return unless $self->{SIDS}->{newconnection}->{sock}; + } + + #--------------------------------------------------------------------------- + # STDIN/OUT + #--------------------------------------------------------------------------- + if ($self->{SIDS}->{newconnection}->{connectiontype} eq "stdinout") + { + $self->{SIDS}->{newconnection}->{sock} = + new FileHandle(">&STDOUT"); + } + + #--------------------------------------------------------------------------- + # HTTP + #--------------------------------------------------------------------------- + if ($self->{SIDS}->{newconnection}->{connectiontype} eq "http") + { + #----------------------------------------------------------------------- + # Check some things that we have to know in order get the connection up + # and running. Server hostname, port number, namespace, etc... + #----------------------------------------------------------------------- + if ($self->{SIDS}->{newconnection}->{hostname} eq "") + { + $self->SetErrorCode("newconnection","Server hostname not specified"); + return; + } + if ($self->{SIDS}->{newconnection}->{port} eq "") + { + $self->SetErrorCode("newconnection","Server port not specified"); + return; + } + if ($self->{SIDS}->{newconnection}->{myhostname} eq "") + { + $self->{SIDS}->{newconnection}->{myhostname} = $self->{SIDS}->{newconnection}->{derivedhostname}; + } + + if (!defined($PAC)) + { + eval("use HTTP::ProxyAutoConfig;"); + if ($@) + { + $PAC = 0; + } + else + { + require HTTP::ProxyAutoConfig; + $PAC = new HTTP::ProxyAutoConfig(); + } + } + + if ($PAC eq "0") { + if (exists($ENV{"http_proxy"})) + { + my($host,$port) = ($ENV{"http_proxy"} =~ /^(\S+)\:(\d+)$/); + $self->{SIDS}->{newconnection}->{httpproxyhostname} = $host; + $self->{SIDS}->{newconnection}->{httpproxyport} = $port; + $self->{SIDS}->{newconnection}->{httpproxyhostname} =~ s/^http\:\/\///; + } + if (exists($ENV{"https_proxy"})) + { + my($host,$port) = ($ENV{"https_proxy"} =~ /^(\S+)\:(\d+)$/); + $self->{SIDS}->{newconnection}->{httpsproxyhostname} = $host; + $self->{SIDS}->{newconnection}->{httpsproxyport} = $port; + $self->{SIDS}->{newconnection}->{httpsproxyhostname} =~ s/^https?\:\/\///; + } + } + else + { + my $proxy = $PAC->FindProxy("http://".$self->{SIDS}->{newconnection}->{hostname}); + if ($proxy ne "DIRECT") + { + ($self->{SIDS}->{newconnection}->{httpproxyhostname},$self->{SIDS}->{newconnection}->{httpproxyport}) = ($proxy =~ /^PROXY ([^:]+):(\d+)$/); + } + + $proxy = $PAC->FindProxy("https://".$self->{SIDS}->{newconnection}->{hostname}); + + if ($proxy ne "DIRECT") + { + ($self->{SIDS}->{newconnection}->{httpsproxyhostname},$self->{SIDS}->{newconnection}->{httpsproxyport}) = ($proxy =~ /^PROXY ([^:]+):(\d+)$/); + } + } + + $self->debug(1,"Connect: http_proxy($self->{SIDS}->{newconnection}->{httpproxyhostname}:$self->{SIDS}->{newconnection}->{httpproxyport})") + if (exists($self->{SIDS}->{newconnection}->{httpproxyhostname}) && + defined($self->{SIDS}->{newconnection}->{httpproxyhostname}) && + exists($self->{SIDS}->{newconnection}->{httpproxyport}) && + defined($self->{SIDS}->{newconnection}->{httpproxyport})); + $self->debug(1,"Connect: https_proxy($self->{SIDS}->{newconnection}->{httpsproxyhostname}:$self->{SIDS}->{newconnection}->{httpsproxyport})") + if (exists($self->{SIDS}->{newconnection}->{httpsproxyhostname}) && + defined($self->{SIDS}->{newconnection}->{httpsproxyhostname}) && + exists($self->{SIDS}->{newconnection}->{httpsproxyport}) && + defined($self->{SIDS}->{newconnection}->{httpsproxyport})); + + #----------------------------------------------------------------------- + # Open the connection to the listed server and port. If that fails then + # abort ourselves and let the user check $! on his own. + #----------------------------------------------------------------------- + my $connect = "CONNECT $self->{SIDS}->{newconnection}->{hostname}:$self->{SIDS}->{newconnection}->{port} HTTP/1.1\r\nHost: $self->{SIDS}->{newconnection}->{hostname}\r\n\r\n"; + my $put = "PUT http://$self->{SIDS}->{newconnection}->{hostname}:$self->{SIDS}->{newconnection}->{port} HTTP/1.1\r\nHost: $self->{SIDS}->{newconnection}->{hostname}\r\nProxy-Connection: Keep-Alive\r\n\r\n"; + + my $connected = 0; + #----------------------------------------------------------------------- + # Combo #0 - The user didn't specify a proxy + #----------------------------------------------------------------------- + if (!exists($self->{SIDS}->{newconnection}->{httpproxyhostname}) && + !exists($self->{SIDS}->{newconnection}->{httpsproxyhostname})) + { + + $self->debug(1,"Connect: Combo #0: User did not specify a proxy... connecting DIRECT"); + + $self->debug(1,"Connect: Combo #0: Create normal socket"); + $self->{SIDS}->{newconnection}->{sock} = + new IO::Socket::INET(PeerAddr=>$self->{SIDS}->{newconnection}->{hostname}, + PeerPort=>$self->{SIDS}->{newconnection}->{port}, + Proto=>"tcp", + (($timeout ne "") ? ( Timeout=>$timeout ) : ()), + ); + $connected = defined($self->{SIDS}->{newconnection}->{sock}); + $self->debug(1,"Connect: Combo #0: connected($connected)"); + # if ($connected) + # { + # $self->{SIDS}->{newconnection}->{sock}->syswrite($put,length($put),0); + # my $buff; + # $self->{SIDS}->{newconnection}->{sock}->sysread($buff,4*POSIX::BUFSIZ); + # my ($code) = ($buff =~ /^\S+\s+(\S+)\s+/); + # $self->debug(1,"Connect: Combo #1: buff($buff)"); + # $connected = 0 if ($code !~ /2\d\d/); + # } + # $self->debug(1,"Connect: Combo #0: connected($connected)"); + } + + #----------------------------------------------------------------------- + # Combo #1 - PUT through http_proxy + #----------------------------------------------------------------------- + if (!$connected && + exists($self->{SIDS}->{newconnection}->{httpproxyhostname}) && + ($self->{SIDS}->{newconnection}->{ssl} == 0)) + { + + $self->debug(1,"Connect: Combo #1: PUT through http_proxy"); + $self->{SIDS}->{newconnection}->{sock} = + new IO::Socket::INET(PeerAddr=>$self->{SIDS}->{newconnection}->{httpproxyhostname}, + PeerPort=>$self->{SIDS}->{newconnection}->{httpproxyport}, + Proto=>"tcp", + (($timeout ne "") ? ( Timeout=>$timeout ) : ()), + ); + $connected = defined($self->{SIDS}->{newconnection}->{sock}); + $self->debug(1,"Connect: Combo #1: connected($connected)"); + if ($connected) + { + $self->debug(1,"Connect: Combo #1: send($put)"); + $self->{SIDS}->{newconnection}->{sock}->syswrite($put,length($put),0); + my $buff; + $self->{SIDS}->{newconnection}->{sock}->sysread($buff,4*POSIX::BUFSIZ); + my ($code) = ($buff =~ /^\S+\s+(\S+)\s+/); + $self->debug(1,"Connect: Combo #1: buff($buff)"); + $connected = 0 if ($code !~ /2\d\d/); + } + $self->debug(1,"Connect: Combo #1: connected($connected)"); + } + #----------------------------------------------------------------------- + # Combo #2 - CONNECT through http_proxy + #----------------------------------------------------------------------- + if (!$connected && + exists($self->{SIDS}->{newconnection}->{httpproxyhostname}) && + ($self->{SIDS}->{newconnection}->{ssl} == 0)) + { + + $self->debug(1,"Connect: Combo #2: CONNECT through http_proxy"); + $self->{SIDS}->{newconnection}->{sock} = + new IO::Socket::INET(PeerAddr=>$self->{SIDS}->{newconnection}->{httpproxyhostname}, + PeerPort=>$self->{SIDS}->{newconnection}->{httpproxyport}, + Proto=>"tcp", + (($timeout ne "") ? ( Timeout=>$timeout ) : ()), + ); + $connected = defined($self->{SIDS}->{newconnection}->{sock}); + $self->debug(1,"Connect: Combo #2: connected($connected)"); + if ($connected) + { + $self->{SIDS}->{newconnection}->{sock}->syswrite($connect,length($connect),0); + my $buff; + $self->{SIDS}->{newconnection}->{sock}->sysread($buff,4*POSIX::BUFSIZ); + my ($code) = ($buff =~ /^\S+\s+(\S+)\s+/); + $self->debug(1,"Connect: Combo #2: buff($buff)"); + $connected = 0 if ($code !~ /2\d\d/); + } + $self->debug(1,"Connect: Combo #2: connected($connected)"); + } + + #----------------------------------------------------------------------- + # Combo #3 - CONNECT through https_proxy + #----------------------------------------------------------------------- + if (!$connected && + exists($self->{SIDS}->{newconnection}->{httpsproxyhostname})) + { + $self->debug(1,"Connect: Combo #3: CONNECT through https_proxy"); + $self->{SIDS}->{newconnection}->{sock} = + new IO::Socket::INET(PeerAddr=>$self->{SIDS}->{newconnection}->{httpsproxyhostname}, + PeerPort=>$self->{SIDS}->{newconnection}->{httpsproxyport}, + Proto=>"tcp"); + $connected = defined($self->{SIDS}->{newconnection}->{sock}); + $self->debug(1,"Connect: Combo #3: connected($connected)"); + if ($connected) + { + $self->{SIDS}->{newconnection}->{sock}->syswrite($connect,length($connect),0); + my $buff; + $self->{SIDS}->{newconnection}->{sock}->sysread($buff,4*POSIX::BUFSIZ); + my ($code) = ($buff =~ /^\S+\s+(\S+)\s+/); + $self->debug(1,"Connect: Combo #3: buff($buff)"); + $connected = 0 if ($code !~ /2\d\d/); + } + $self->debug(1,"Connect: Combo #3: connected($connected)"); + } + + #----------------------------------------------------------------------- + # We have failed + #----------------------------------------------------------------------- + if (!$connected) + { + $self->debug(1,"Connect: No connection... I have failed... I.. must... end it all..."); + $self->SetErrorCode("newconnection","Unable to open a connection to destination. Please check your http_proxy and/or https_proxy environment variables."); + return; + } + + return unless $self->{SIDS}->{newconnection}->{sock}; + + $self->debug(1,"Connect: We are connected"); + + if (($self->{SIDS}->{newconnection}->{ssl} == 1) && + (ref($self->{SIDS}->{newconnection}->{sock}) eq "IO::Socket::INET")) + { + $self->debug(1,"Connect: Convert normal socket to SSL"); + $self->debug(1,"Connect: sock($self->{SIDS}->{newconnection}->{sock})"); + $self->LoadSSL(); + $self->{SIDS}->{newconnection}->{sock} = + IO::Socket::SSL::socketToSSL($self->{SIDS}->{newconnection}->{sock}, + {SSL_verify_mode=>0x00}); + $self->debug(1,"Connect: ssl_sock($self->{SIDS}->{newconnection}->{sock})"); + $self->debug(1,"Connect: SSL: We are secure") if ($self->{SIDS}->{newconnection}->{sock}); + } + return unless $self->{SIDS}->{newconnection}->{sock}; + } + + $self->debug(1,"Connect: Got a connection"); + + $self->{SIDS}->{newconnection}->{sock}->autoflush(1); + + return $self->OpenStream("newconnection",$timeout); +} + + +############################################################################## +# +# OpenStream - Send the opening stream and save the root element info. +# +############################################################################## +sub OpenStream +{ + my $self = shift; + my $currsid = shift; + my $timeout = shift; + $timeout = "" unless defined($timeout); + + $self->InitConnection($currsid,$currsid); + + #--------------------------------------------------------------------------- + # Next, we build the opening handshake. + #--------------------------------------------------------------------------- + my %stream_args; + + if (($self->{SIDS}->{$currsid}->{connectiontype} eq "tcpip") || + ($self->{SIDS}->{$currsid}->{connectiontype} eq "http")) + { + $stream_args{to}= $self->{SIDS}->{$currsid}->{hostname} + unless exists($self->{SIDS}->{$currsid}->{to}); + + $stream_args{to} = $self->{SIDS}->{$currsid}->{to} + if exists($self->{SIDS}->{$currsid}->{to}); + + $stream_args{from} = $self->{SIDS}->{$currsid}->{myhostname} + if (!exists($self->{SIDS}->{$currsid}->{from}) && + ($self->{SIDS}->{$currsid}->{myhostname} ne "") + ); + + $stream_args{from} = $self->{SIDS}->{$currsid}->{from} + if exists($self->{SIDS}->{$currsid}->{from}); + + $stream_args{id} = $self->{SIDS}->{$currsid}->{id} + if (exists($self->{SIDS}->{$currsid}->{id}) && + ($self->{SIDS}->{$currsid}->{id} ne "") + ); + + $stream_args{namespaces} = $self->{SIDS}->{$currsid}->{namespaces}; + } + + my $stream = + $self->StreamHeader( + xmlns=>$self->{SIDS}->{$currsid}->{namespace}, + xmllang=>"en", + %stream_args + ); + + #--------------------------------------------------------------------------- + # Create the XML::Stream::Parser and register our callbacks + #--------------------------------------------------------------------------- + $self->{SIDS}->{$currsid}->{parser} = + new XML::Stream::Parser(%{$self->{DEBUGARGS}}, + nonblocking=>$NONBLOCKING, + sid=>$currsid, + style=>$self->{DATASTYLE}, + Handlers=>{ + startElement=>sub{ $self->_handle_root(@_) }, + endElement=>sub{ &{$HANDLERS{$self->{DATASTYLE}}->{endElement}}($self,@_) }, + characters=>sub{ &{$HANDLERS{$self->{DATASTYLE}}->{characters}}($self,@_) }, + } + ); + + $self->{SIDS}->{$currsid}->{select} = + new IO::Select($self->{SIDS}->{$currsid}->{sock}); + + if (($self->{SIDS}->{$currsid}->{connectiontype} eq "tcpip") || + ($self->{SIDS}->{$currsid}->{connectiontype} eq "http")) + { + $self->{SELECT} = new IO::Select($self->{SIDS}->{$currsid}->{sock}); + $self->{SOCKETS}->{$self->{SIDS}->{$currsid}->{sock}} = "newconnection"; + } + + if ($self->{SIDS}->{$currsid}->{connectiontype} eq "stdinout") + { + $self->{SELECT} = new IO::Select(*STDIN); + $self->{SOCKETS}->{$self->{SIDS}->{$currsid}->{sock}} = $currsid; + $self->{SOCKETS}->{*STDIN} = $currsid; + $self->{SIDS}->{$currsid}->{select}->add(*STDIN); + } + + $self->{SIDS}->{$currsid}->{status} = 0; + + #--------------------------------------------------------------------------- + # Then we send the opening handshake. + #--------------------------------------------------------------------------- + $self->Send($currsid,$stream) || return; + + #--------------------------------------------------------------------------- + # Before going on let's make sure that the server responded with a valid + # root tag and that the stream is open. + #--------------------------------------------------------------------------- + my $buff = ""; + my $timeEnd = ($timeout eq "") ? "" : time + $timeout; + while($self->{SIDS}->{$currsid}->{status} == 0) + { + my $now = time; + my $wait = (($timeEnd eq "") || ($timeEnd - $now > 10)) ? 10 : + $timeEnd - $now; + + $self->debug(5,"Connect: can_read(",join(",",$self->{SIDS}->{$currsid}->{select}->can_read(0)),")"); + if ($self->{SIDS}->{$currsid}->{select}->can_read($wait)) + { + $self->{SIDS}->{$currsid}->{status} = -1 + unless defined($buff = $self->Read($currsid)); + return unless($self->{SIDS}->{$currsid}->{status} == 0); + return unless($self->ParseStream($currsid,$buff) == 1); + } + else + { + if ($timeout ne "") + { + if (time >= $timeEnd) + { + $self->SetErrorCode($currsid,"Timeout limit reached"); + return; + } + } + } + + return if($self->{SIDS}->{$currsid}->{select}->has_exception(0)); + } + return if($self->{SIDS}->{$currsid}->{status} != 1); + + $self->debug(3,"Connect: status($self->{SIDS}->{$currsid}->{status})"); + + my $sid = $self->GetRoot($currsid)->{id}; + $| = 1; + foreach my $key (keys(%{$self->{SIDS}->{$currsid}})) + { + $self->{SIDS}->{$sid}->{$key} = $self->{SIDS}->{$currsid}->{$key}; + } + $self->{SIDS}->{$sid}->{parser}->setSID($sid); + + if (($self->{SIDS}->{$sid}->{connectiontype} eq "tcpip") || + ($self->{SIDS}->{$sid}->{connectiontype} eq "http")) + { + $self->{SOCKETS}->{$self->{SIDS}->{$currsid}->{sock}} = $sid; + } + + if ($self->{SIDS}->{$sid}->{connectiontype} eq "stdinout") + { + $self->{SOCKETS}->{$self->{SIDS}->{$currsid}->{sock}} = $sid; + $self->{SOCKETS}->{*STDIN} = $sid; + } + + delete($self->{SIDS}->{$currsid}); + + if (exists($self->GetRoot($sid)->{version}) && + ($self->GetRoot($sid)->{version} ne "")) + { + while(!$self->ReceivedStreamFeatures($sid)) + { + $self->Process(1); + } + } + + return $self->GetRoot($sid); +} + + +############################################################################## +# +# OpenFile - starts the stream by opening a file and setting it up so that +# Process reads from the filehandle to get the incoming stream. +# +############################################################################## +sub OpenFile +{ + my $self = shift; + my $file = shift; + + $self->debug(1,"OpenFile: file($file)"); + + $self->{SIDS}->{newconnection}->{connectiontype} = "file"; + + $self->{SIDS}->{newconnection}->{sock} = new FileHandle($file); + $self->{SIDS}->{newconnection}->{sock}->autoflush(1); + + $self->RegisterPrefix("newconnection",&ConstXMLNS("stream"),"stream"); + + #--------------------------------------------------------------------------- + # Create the XML::Stream::Parser and register our callbacks + #--------------------------------------------------------------------------- + $self->{SIDS}->{newconnection}->{parser} = + new XML::Stream::Parser(%{$self->{DEBUGARGS}}, + nonblocking=>$NONBLOCKING, + sid=>"newconnection", + style=>$self->{DATASTYLE}, + Handlers=>{ + startElement=>sub{ $self->_handle_root(@_) }, + endElement=>sub{ &{$HANDLERS{$self->{DATASTYLE}}->{endElement}}($self,@_) }, + characters=>sub{ &{$HANDLERS{$self->{DATASTYLE}}->{characters}}($self,@_) }, + } + ); + + $self->{SIDS}->{newconnection}->{select} = + new IO::Select($self->{SIDS}->{newconnection}->{sock}); + + $self->{SELECT} = new IO::Select($self->{SIDS}->{newconnection}->{sock}); + + $self->{SIDS}->{newconnection}->{status} = 0; + + my $buff = ""; + while($self->{SIDS}->{newconnection}->{status} == 0) + { + $self->debug(5,"OpenFile: can_read(",join(",",$self->{SIDS}->{newconnection}->{select}->can_read(0)),")"); + if ($self->{SIDS}->{newconnection}->{select}->can_read(0)) + { + $self->{SIDS}->{newconnection}->{status} = -1 + unless defined($buff = $self->Read("newconnection")); + return unless($self->{SIDS}->{newconnection}->{status} == 0); + return unless($self->ParseStream("newconnection",$buff) == 1); + } + + return if($self->{SIDS}->{newconnection}->{select}->has_exception(0) && + $self->{SIDS}->{newconnection}->{sock}->error()); + } + return if($self->{SIDS}->{newconnection}->{status} != 1); + + + my $sid = $self->NewSID(); + foreach my $key (keys(%{$self->{SIDS}->{newconnection}})) + { + $self->{SIDS}->{$sid}->{$key} = $self->{SIDS}->{newconnection}->{$key}; + } + $self->{SIDS}->{$sid}->{parser}->setSID($sid); + + $self->{SOCKETS}->{$self->{SIDS}->{newconnection}->{sock}} = $sid; + + delete($self->{SIDS}->{newconnection}); + + return $sid; +} + + + + +############################################################################## +#+---------------------------------------------------------------------------- +#| +#| Common Functions +#| +#+---------------------------------------------------------------------------- +############################################################################## + +############################################################################## +# +# Disconnect - sends the closing XML tag and shuts down the socket. +# +############################################################################## +sub Disconnect +{ + my $self = shift; + my $sid = shift; + + $self->Send($sid,"</stream:stream>"); + close($self->{SIDS}->{$sid}->{sock}) + if (($self->{SIDS}->{$sid}->{connectiontype} eq "tcpip") || + ($self->{SIDS}->{$sid}->{connectiontype} eq "http")); + delete($self->{SOCKETS}->{$self->{SIDS}->{$sid}->{sock}}); + foreach my $key (keys(%{$self->{SIDS}->{$sid}})) + { + delete($self->{SIDS}->{$sid}->{$key}); + } + delete($self->{SIDS}->{$sid}); +} + + +############################################################################## +# +# InitConnection - Initialize the connection data structure +# +############################################################################## +sub InitConnection +{ + my $self = shift; + my $sid = shift; + my $serverid = shift; + + #--------------------------------------------------------------------------- + # Set the default STATUS so that we can keep track of it throughout the + # session. + # 1 = no errors + # 0 = no data has been received yet + # -1 = error from handlers + # -2 = error but keep the connection alive so that we can send some info. + #--------------------------------------------------------------------------- + $self->{SIDS}->{$sid}->{status} = 0; + + #--------------------------------------------------------------------------- + # A storage place for when we don't have a callback registered and we need + # to stockpile the nodes we receive until Process is called and we return + # them. + #--------------------------------------------------------------------------- + $self->{SIDS}->{$sid}->{nodes} = (); + + #--------------------------------------------------------------------------- + # If there is an error on the stream, then we need a place to indicate that. + #--------------------------------------------------------------------------- + $self->{SIDS}->{$sid}->{streamerror} = {}; + + #--------------------------------------------------------------------------- + # Grab the init time so that we can keep the connection alive by sending " " + #--------------------------------------------------------------------------- + $self->{SIDS}->{$sid}->{keepalive} = time; + + #--------------------------------------------------------------------------- + # Keep track of the "server" we are connected to so we can check stuff + # later. + #--------------------------------------------------------------------------- + $self->{SIDS}->{$sid}->{serverid} = $serverid; + + #--------------------------------------------------------------------------- + # Mark the stream:features as MIA. + #--------------------------------------------------------------------------- + $self->{SIDS}->{$sid}->{streamfeatures}->{received} = 0; + + #--------------------------------------------------------------------------- + # First acitivty is the connection... duh. =) + #--------------------------------------------------------------------------- + $self->MarkActivity($sid); +} + + +############################################################################## +# +# ParseStream - takes the incoming stream and makes sure that only full +# XML tags gets passed to the parser. If a full tag has not +# read yet, then the Stream saves the incomplete part and +# sends the rest to the parser. +# +############################################################################## +sub ParseStream +{ + my $self = shift; + my $sid = shift; + my $stream = shift; + + $stream = "" unless defined($stream); + + $self->debug(3,"ParseStream: sid($sid) stream($stream)"); + + $self->{SIDS}->{$sid}->{parser}->parse($stream); + + if (exists($self->{SIDS}->{$sid}->{streamerror}->{type})) + { + $self->debug(3,"ParseStream: ERROR($self->{SIDS}->{$sid}->{streamerror}->{type})"); + $self->SetErrorCode($sid,$self->{SIDS}->{$sid}->{streamerror}); + return 0; + } + + return 1; +} + + +############################################################################## +# +# Process - checks for data on the socket and returns a status code depending +# on if there was data or not. If a timeout is not defined in the +# call then the timeout defined in Connect() is used. If a timeout +# of 0 is used then the call blocks until it gets some data, +# otherwise it returns after the timeout period. +# +############################################################################## +sub Process +{ + my $self = shift; + my $timeout = shift; + $timeout = "" unless defined($timeout); + + $self->debug(4,"Process: timeout($timeout)"); + #--------------------------------------------------------------------------- + # We need to keep track of what's going on in the function and tell the + # outside world about it so let's return something useful. We track this + # information based on sid: + # -1 connection closed and error + # 0 connection open but no data received. + # 1 connection open and data received. + # array connection open and the data that has been collected + # over time (No CallBack specified) + #--------------------------------------------------------------------------- + my %status; + foreach my $sid (keys(%{$self->{SIDS}})) + { + next if ($sid eq "default"); + $self->debug(5,"Process: initialize sid($sid) status to 0"); + $status{$sid} = 0; + } + + #--------------------------------------------------------------------------- + # Either block until there is data and we have parsed it all, or wait a + # certain period of time and then return control to the user. + #--------------------------------------------------------------------------- + my $block = 1; + my $timeEnd = ($timeout eq "") ? "" : time + $timeout; + while($block == 1) + { + $self->debug(4,"Process: let's wait for data"); + + my $now = time; + my $wait = (($timeEnd eq "") || ($timeEnd - $now > 10)) ? 10 : + $timeEnd - $now; + + foreach my $connection ($self->{SELECT}->can_read($wait)) + { + $self->debug(4,"Process: connection($connection)"); + $self->debug(4,"Process: sid($self->{SOCKETS}->{$connection})"); + $self->debug(4,"Process: connection_status($self->{SIDS}->{$self->{SOCKETS}->{$connection}}->{status})"); + + next unless (($self->{SIDS}->{$self->{SOCKETS}->{$connection}}->{status} == 1) || + exists($self->{SIDS}->{$self->{SOCKETS}->{$connection}}->{activitytimeout})); + + my $processit = 1; + if (exists($self->{SIDS}->{server})) + { + foreach my $serverid (@{$self->{SIDS}->{server}}) + { + if (exists($self->{SIDS}->{$serverid}->{sock}) && + ($connection == $self->{SIDS}->{$serverid}->{sock})) + { + my $sid = $self->ConnectionAccept($serverid); + $status{$sid} = 0; + $processit = 0; + last; + } + } + } + if ($processit == 1) + { + my $sid = $self->{SOCKETS}->{$connection}; + $self->debug(4,"Process: there's something to read"); + $self->debug(4,"Process: connection($connection) sid($sid)"); + my $buff; + $self->debug(4,"Process: read"); + $status{$sid} = 1; + $self->{SIDS}->{$sid}->{status} = -1 + if (!defined($buff = $self->Read($sid))); + $buff = "" unless defined($buff); + $self->debug(4,"Process: connection_status($self->{SIDS}->{$sid}->{status})"); + $status{$sid} = -1 unless($self->{SIDS}->{$sid}->{status} == 1); + $self->debug(4,"Process: parse($buff)"); + $status{$sid} = -1 unless($self->ParseStream($sid,$buff) == 1); + } + $block = 0; + } + + if ($timeout ne "") + { + if (time >= $timeEnd) + { + $self->debug(4,"Process: Everyone out of the pool! Time to stop blocking."); + $block = 0; + } + } + + $self->debug(4,"Process: timeout($timeout)"); + + if (exists($self->{CB}->{update})) + { + $self->debug(4,"Process: Calling user defined update function"); + &{$self->{CB}->{update}}(); + } + + $block = 1 if $self->{SELECT}->can_read(0); + + #--------------------------------------------------------------------- + # Check for connections that need to be kept alive + #--------------------------------------------------------------------- + $self->debug(4,"Process: check for keepalives"); + foreach my $sid (keys(%{$self->{SIDS}})) + { + next if ($sid eq "default"); + next if ($sid =~ /^server/); + next if ($status{$sid} == -1); + if ((time - $self->{SIDS}->{$sid}->{keepalive}) > 10) + { + $self->IgnoreActivity($sid,1); + $self->{SIDS}->{$sid}->{status} = -1 + if !defined($self->Send($sid," ")); + $status{$sid} = -1 unless($self->{SIDS}->{$sid}->{status} == 1); + if ($status{$sid} == -1) + { + $self->debug(2,"Process: Keep-Alive failed. What the hell happened?!?!"); + $self->debug(2,"Process: connection_status($self->{SIDS}->{$sid}->{status})"); + } + $self->IgnoreActivity($sid,0); + } + } + #--------------------------------------------------------------------- + # Check for connections that have timed out. + #--------------------------------------------------------------------- + $self->debug(4,"Process: check for timeouts"); + foreach my $sid (keys(%{$self->{SIDS}})) + { + next if ($sid eq "default"); + next if ($sid =~ /^server/); + + if (exists($self->{SIDS}->{$sid}->{activitytimeout})) + { + $self->debug(4,"Process: sid($sid) time(",time,") timeout($self->{SIDS}->{$sid}->{activitytimeout})"); + } + else + { + $self->debug(4,"Process: sid($sid) time(",time,") timeout(undef)"); + } + + $self->Respond($sid) + if (exists($self->{SIDS}->{$sid}->{activitytimeout}) && + defined($self->GetRoot($sid))); + $self->Disconnect($sid) + if (exists($self->{SIDS}->{$sid}->{activitytimeout}) && + ((time - $self->{SIDS}->{$sid}->{activitytimeout}) > 10) && + ($self->{SIDS}->{$sid}->{status} != 1)); + } + + + #--------------------------------------------------------------------- + # If any of the connections have status == -1 then return so that the + # user can handle it. + #--------------------------------------------------------------------- + foreach my $sid (keys(%status)) + { + if ($status{$sid} == -1) + { + $self->debug(4,"Process: sid($sid) is broken... let's tell someone and watch it hit the fan... =)"); + $block = 0; + } + } + + $self->debug(2,"Process: block($block)"); + } + + #--------------------------------------------------------------------------- + # If the Select has an error then shut this party down. + #--------------------------------------------------------------------------- + foreach my $connection ($self->{SELECT}->has_exception(0)) + { + $self->debug(4,"Process: has_exception sid($self->{SOCKETS}->{$connection})"); + $status{$self->{SOCKETS}->{$connection}} = -1; + } + + #--------------------------------------------------------------------------- + # If there are data structures that have not been collected return + # those, otherwise return the status which indicates if nodes were read or + # not. + #--------------------------------------------------------------------------- + foreach my $sid (keys(%status)) + { + $status{$sid} = $self->{SIDS}->{$sid}->{nodes} + if (($status{$sid} == 1) && + ($#{$self->{SIDS}->{$sid}->{nodes}} > -1)); + } + + return %status; +} + + +############################################################################## +# +# Read - Takes the data from the server and returns a string +# +############################################################################## +sub Read +{ + my $self = shift; + my $sid = shift; + my $buff; + my $status = 1; + + $self->debug(3,"Read: sid($sid)"); + $self->debug(3,"Read: connectionType($self->{SIDS}->{$sid}->{connectiontype})"); + $self->debug(3,"Read: socket($self->{SIDS}->{$sid}->{sock})"); + + return if ($self->{SIDS}->{$sid}->{status} == -1); + + if (!defined($self->{SIDS}->{$sid}->{sock})) + { + $self->{SIDS}->{$sid}->{status} = -1; + $self->SetErrorCode($sid,"Socket does not defined."); + return; + } + + $self->{SIDS}->{$sid}->{sock}->flush(); + + $status = $self->{SIDS}->{$sid}->{sock}->sysread($buff,4*POSIX::BUFSIZ) + if (($self->{SIDS}->{$sid}->{connectiontype} eq "tcpip") || + ($self->{SIDS}->{$sid}->{connectiontype} eq "http") || + ($self->{SIDS}->{$sid}->{connectiontype} eq "file")); + $status = sysread(STDIN,$buff,1024) + if ($self->{SIDS}->{$sid}->{connectiontype} eq "stdinout"); + + $buff =~ s/^HTTP[\S\s]+\n\n// if ($self->{SIDS}->{$sid}->{connectiontype} eq "http"); + $self->debug(1,"Read: buff($buff)"); + $self->debug(3,"Read: status($status)") if defined($status); + $self->debug(3,"Read: status(undef)") unless defined($status); + $self->{SIDS}->{$sid}->{keepalive} = time + unless (($buff eq "") || !defined($status) || ($status == 0)); + if (defined($status) && ($status != 0)) + { + $buff = Encode::decode_utf8($buff); + return $buff; + } + #return $buff unless (!defined($status) || ($status == 0)); + $self->debug(1,"Read: ERROR"); + return; +} + + +############################################################################## +# +# Send - Takes the data string and sends it to the server +# +############################################################################## +sub Send +{ + my $self = shift; + my $sid = shift; + $self->debug(1,"Send: (@_)"); + $self->debug(3,"Send: sid($sid)"); + $self->debug(3,"Send: status($self->{SIDS}->{$sid}->{status})"); + + $self->{SIDS}->{$sid}->{keepalive} = time; + + return if ($self->{SIDS}->{$sid}->{status} == -1); + + if (!defined($self->{SIDS}->{$sid}->{sock})) + { + $self->debug(3,"Send: socket not defined"); + $self->{SIDS}->{$sid}->{status} = -1; + $self->SetErrorCode($sid,"Socket not defined."); + return; + } + else + { + $self->debug(3,"Send: socket($self->{SIDS}->{$sid}->{sock})"); + } + + $self->{SIDS}->{$sid}->{sock}->flush(); + + if ($self->{SIDS}->{$sid}->{select}->can_write(0)) + { + $self->debug(3,"Send: can_write"); + + $self->{SENDSTRING} = Encode::encode_utf8(join("",@_)); + + $self->{SENDWRITTEN} = 0; + $self->{SENDOFFSET} = 0; + $self->{SENDLENGTH} = length($self->{SENDSTRING}); + while ($self->{SENDLENGTH}) + { + $self->{SENDWRITTEN} = $self->{SIDS}->{$sid}->{sock}->syswrite($self->{SENDSTRING},$self->{SENDLENGTH},$self->{SENDOFFSET}); + + if (!defined($self->{SENDWRITTEN})) + { + $self->debug(4,"Send: SENDWRITTEN(undef)"); + $self->debug(4,"Send: Ok... what happened? Did we lose the connection?"); + $self->{SIDS}->{$sid}->{status} = -1; + $self->SetErrorCode($sid,"Socket died for an unknown reason."); + return; + } + + $self->debug(4,"Send: SENDWRITTEN($self->{SENDWRITTEN})"); + + $self->{SENDLENGTH} -= $self->{SENDWRITTEN}; + $self->{SENDOFFSET} += $self->{SENDWRITTEN}; + } + } + else + { + $self->debug(3,"Send: can't write..."); + } + + return if($self->{SIDS}->{$sid}->{select}->has_exception(0)); + + $self->debug(3,"Send: no exceptions"); + + $self->{SIDS}->{$sid}->{keepalive} = time; + + $self->MarkActivity($sid); + + return 1; +} + + + + +############################################################################## +#+---------------------------------------------------------------------------- +#| +#| Feature Functions +#| +#+---------------------------------------------------------------------------- +############################################################################## + +############################################################################## +# +# ProcessStreamFeatures - process the <stream:featutres/> block. +# +############################################################################## +sub ProcessStreamFeatures +{ + my $self = shift; + my $sid = shift; + my $node = shift; + + $self->{SIDS}->{$sid}->{streamfeatures}->{received} = 1; + + #------------------------------------------------------------------------- + # SASL - 1.0 + #------------------------------------------------------------------------- + my @sasl = &XPath($node,'*[@xmlns="'.&ConstXMLNS('xmpp-sasl').'"]'); + if ($#sasl > -1) + { + if (&XPath($sasl[0],"name()") eq "mechanisms") + { + my @mechanisms = &XPath($sasl[0],"mechanism/text()"); + $self->{SIDS}->{$sid}->{streamfeatures}->{'xmpp-sasl'} = \@mechanisms; + } + } + + #------------------------------------------------------------------------- + # XMPP-TLS - 1.0 + #------------------------------------------------------------------------- + my @tls = &XPath($node,'*[@xmlns="'.&ConstXMLNS('xmpp-tls').'"]'); + if ($#tls > -1) + { + if (&XPath($tls[0],"name()") eq "starttls") + { + $self->{SIDS}->{$sid}->{streamfeatures}->{'xmpp-tls'} = 1; + my @required = &XPath($tls[0],"required"); + if ($#required > -1) + { + $self->{SIDS}->{$sid}->{streamfeatures}->{'xmpp-tls'} = "required"; + } + } + } + + #------------------------------------------------------------------------- + # XMPP-Bind - 1.0 + #------------------------------------------------------------------------- + my @bind = &XPath($node,'*[@xmlns="'.&ConstXMLNS('xmpp-bind').'"]'); + if ($#bind > -1) + { + $self->{SIDS}->{$sid}->{streamfeatures}->{'xmpp-bind'} = 1; + } + + #------------------------------------------------------------------------- + # XMPP-Session - 1.0 + #------------------------------------------------------------------------- + my @session = &XPath($node,'*[@xmlns="'.&ConstXMLNS('xmpp-session').'"]'); + if ($#session > -1) + { + $self->{SIDS}->{$sid}->{streamfeatures}->{'xmpp-session'} = 1; + } + +} + + +############################################################################## +# +# GetStreamFeature - Return the value of the stream feature (if any). +# +############################################################################## +sub GetStreamFeature +{ + my $self = shift; + my $sid = shift; + my $feature = shift; + + return unless exists($self->{SIDS}->{$sid}->{streamfeatures}->{$feature}); + return $self->{SIDS}->{$sid}->{streamfeatures}->{$feature}; +} + + +############################################################################## +# +# ReceivedStreamFeatures - Have we received the stream:features yet? +# +############################################################################## +sub ReceivedStreamFeatures +{ + my $self = shift; + my $sid = shift; + my $feature = shift; + + return $self->{SIDS}->{$sid}->{streamfeatures}->{received}; +} + + + + +############################################################################## +#+---------------------------------------------------------------------------- +#| +#| TLS Functions +#| +#+---------------------------------------------------------------------------- +############################################################################## + +############################################################################## +# +# ProcessTLSPacket - process a TLS based packet. +# +############################################################################## +sub ProcessTLSPacket +{ + my $self = shift; + my $sid = shift; + my $node = shift; + + my $tag = &XPath($node,"name()"); + + if ($tag eq "failure") + { + $self->TLSClientFailure($sid,$node); + } + + if ($tag eq "proceed") + { + $self->TLSClientProceed($sid,$node); + } +} + + +############################################################################## +# +# StartTLS - client function to have the socket start TLS. +# +############################################################################## +sub StartTLS +{ + my $self = shift; + my $sid = shift; + my $timeout = shift; + $timeout = 120 unless defined($timeout); + $timeout = 120 if ($timeout eq ""); + + $self->TLSStartTLS($sid); + + my $endTime = time + $timeout; + while(!$self->TLSClientDone($sid) && ($endTime >= time)) + { + $self->Process(1); + } + + if (!$self->TLSClientSecure($sid)) + { + return; + } + + return $self->OpenStream($sid,$timeout); +} + + +############################################################################## +# +# TLSStartTLS - send a <starttls/> in the TLS namespace. +# +############################################################################## +sub TLSStartTLS +{ + my $self = shift; + my $sid = shift; + + $self->Send($sid,"<starttls xmlns='".&ConstXMLNS('xmpp-tls')."'/>"); +} + + +############################################################################## +# +# TLSClientProceed - handle a <proceed/> packet. +# +############################################################################## +sub TLSClientProceed +{ + my $self = shift; + my $sid = shift; + my $node = shift; + + $self->debug(1,"TLSClientProceed: Convert normal socket to SSL"); + $self->debug(1,"TLSClientProceed: sock($self->{SIDS}->{$sid}->{sock})"); + if (!$self->LoadSSL()) + { + $self->{SIDS}->{$sid}->{tls}->{error} = "Could not load IO::Socket::SSL."; + $self->{SIDS}->{$sid}->{tls}->{done} = 1; + return; + } + + IO::Socket::SSL->start_SSL($self->{SIDS}->{$sid}->{sock},{SSL_verify_mode=>0x00}); + + $self->debug(1,"TLSClientProceed: ssl_sock($self->{SIDS}->{$sid}->{sock})"); + $self->debug(1,"TLSClientProceed: SSL: We are secure") + if ($self->{SIDS}->{$sid}->{sock}); + + $self->{SIDS}->{$sid}->{tls}->{done} = 1; + $self->{SIDS}->{$sid}->{tls}->{secure} = 1; +} + + +############################################################################## +# +# TLSClientSecure - return 1 if the socket is secure, 0 otherwise. +# +############################################################################## +sub TLSClientSecure +{ + my $self = shift; + my $sid = shift; + + return $self->{SIDS}->{$sid}->{tls}->{secure}; +} + + +############################################################################## +# +# TLSClientDone - return 1 if the TLS process is done +# +############################################################################## +sub TLSClientDone +{ + my $self = shift; + my $sid = shift; + + return $self->{SIDS}->{$sid}->{tls}->{done}; +} + + +############################################################################## +# +# TLSClientError - return the TLS error if any +# +############################################################################## +sub TLSClientError +{ + my $self = shift; + my $sid = shift; + + return $self->{SIDS}->{$sid}->{tls}->{error}; +} + + +############################################################################## +# +# TLSClientFailure - handle a <failure/> +# +############################################################################## +sub TLSClientFailure +{ + my $self = shift; + my $sid = shift; + my $node = shift; + + my $type = &XPath($node,"*/name()"); + + $self->{SIDS}->{$sid}->{tls}->{error} = $type; + $self->{SIDS}->{$sid}->{tls}->{done} = 1; +} + + +############################################################################## +# +# TLSFailure - Send a <failure/> in the TLS namespace +# +############################################################################## +sub TLSFailure +{ + my $self = shift; + my $sid = shift; + my $type = shift; + + $self->Send($sid,"<failure xmlns='".&ConstXMLNS('xmpp-tls')."'><${type}/></failure>"); +} + + + + +############################################################################## +#+---------------------------------------------------------------------------- +#| +#| SASL Functions +#| +#+---------------------------------------------------------------------------- +############################################################################## + +############################################################################## +# +# ProcessSASLPacket - process a SASL based packet. +# +############################################################################## +sub ProcessSASLPacket +{ + my $self = shift; + my $sid = shift; + my $node = shift; + + my $tag = &XPath($node,"name()"); + + if ($tag eq "challenge") + { + $self->SASLAnswerChallenge($sid,$node); + } + + if ($tag eq "failure") + { + $self->SASLClientFailure($sid,$node); + } + + if ($tag eq "success") + { + $self->SASLClientSuccess($sid,$node); + } +} + + +############################################################################## +# +# SASLAnswerChallenge - when we get a <challenge/> we need to do the grunt +# work to return a <response/>. +# +############################################################################## +sub SASLAnswerChallenge +{ + my $self = shift; + my $sid = shift; + my $node = shift; + + my $challenge64 = &XPath($node,"text()"); + my $challenge = MIME::Base64::decode_base64($challenge64); + + #------------------------------------------------------------------------- + # As far as I can tell, if the challenge contains rspauth, then we authed. + # If you try to send that to Authen::SASL, it will spew warnings about + # the missing qop, nonce, etc... However, in order for jabberd2 to think + # that you answered, you have to send back an empty response. Not sure + # which approach is right... So let's hack for now. + #------------------------------------------------------------------------- + my $response = ""; + if ($challenge !~ /rspauth\=/) + { + $response = $self->{SIDS}->{$sid}->{sasl}->{client}->client_step($challenge); + } + + my $response64 = MIME::Base64::encode_base64($response,""); + $self->SASLResponse($sid,$response64); +} + + +############################################################################## +# +# SASLAuth - send an <auth/> in the SASL namespace +# +############################################################################## +sub SASLAuth +{ + my $self = shift; + my $sid = shift; + + my $first_step = $self->{SIDS}->{$sid}->{sasl}->{client}->client_start(); + my $first_step64 = MIME::Base64::encode_base64($first_step,""); + + $self->Send($sid,"<auth xmlns='".&ConstXMLNS('xmpp-sasl')."' mechanism='".$self->{SIDS}->{$sid}->{sasl}->{client}->mechanism()."'>".$first_step64."</auth>"); +} + + +############################################################################## +# +# SASLChallenge - Send a <challenge/> in the SASL namespace +# +############################################################################## +sub SASLChallenge +{ + my $self = shift; + my $sid = shift; + my $challenge = shift; + + $self->Send($sid,"<challenge xmlns='".&ConstXMLNS('xmpp-sasl')."'>${challenge}</challenge>"); +} + + +############################################################################### +# +# SASLClient - This is a helper function to perform all of the required steps +# for doing SASL with the server. +# +############################################################################### +sub SASLClient +{ + my $self = shift; + my $sid = shift; + my $username = shift; + my $password = shift; + + my $mechanisms = $self->GetStreamFeature($sid,"xmpp-sasl"); + + return unless defined($mechanisms); + + my $sasl = new Authen::SASL(mechanism=>join(" ",@{$mechanisms}), + callback=>{ + authname => $username."@".$self->{SIDS}->{$sid}->{hostname}, + + user => $username, + pass => $password + } + ); + + $self->{SIDS}->{$sid}->{sasl}->{client} = $sasl->client_new(); + $self->{SIDS}->{$sid}->{sasl}->{username} = $username; + $self->{SIDS}->{$sid}->{sasl}->{password} = $password; + $self->{SIDS}->{$sid}->{sasl}->{authed} = 0; + $self->{SIDS}->{$sid}->{sasl}->{done} = 0; + + $self->SASLAuth($sid); +} + + +############################################################################## +# +# SASLClientAuthed - return 1 if we authed via SASL, 0 otherwise +# +############################################################################## +sub SASLClientAuthed +{ + my $self = shift; + my $sid = shift; + + return $self->{SIDS}->{$sid}->{sasl}->{authed}; +} + + +############################################################################## +# +# SASLClientDone - return 1 if the SASL process is finished +# +############################################################################## +sub SASLClientDone +{ + my $self = shift; + my $sid = shift; + + return $self->{SIDS}->{$sid}->{sasl}->{done}; +} + + +############################################################################## +# +# SASLClientError - return the error if any +# +############################################################################## +sub SASLClientError +{ + my $self = shift; + my $sid = shift; + + return $self->{SIDS}->{$sid}->{sasl}->{error}; +} + + +############################################################################## +# +# SASLClientFailure - handle a received <failure/> +# +############################################################################## +sub SASLClientFailure +{ + my $self = shift; + my $sid = shift; + my $node = shift; + + my $type = &XPath($node,"*/name()"); + + $self->{SIDS}->{$sid}->{sasl}->{error} = $type; + $self->{SIDS}->{$sid}->{sasl}->{done} = 1; +} + + +############################################################################## +# +# SASLClientSuccess - handle a received <success/> +# +############################################################################## +sub SASLClientSuccess +{ + my $self = shift; + my $sid = shift; + my $node = shift; + + $self->{SIDS}->{$sid}->{sasl}->{authed} = 1; + $self->{SIDS}->{$sid}->{sasl}->{done} = 1; +} + + +############################################################################## +# +# SASLFailure - Send a <failure/> tag in the SASL namespace +# +############################################################################## +sub SASLFailure +{ + my $self = shift; + my $sid = shift; + my $type = shift; + + $self->Send($sid,"<failure xmlns='".&ConstXMLNS('xmpp-sasl')."'><${type}/></failure>"); +} + + +############################################################################## +# +# SASLResponse - Send a <response/> tag in the SASL namespace +# +############################################################################## +sub SASLResponse +{ + my $self = shift; + my $sid = shift; + my $response = shift; + + $self->Send($sid,"<response xmlns='".&ConstXMLNS('xmpp-sasl')."'>${response}</response>"); +} + + + + +############################################################################## +#+---------------------------------------------------------------------------- +#| +#| Packet Handlers +#| +#+---------------------------------------------------------------------------- +############################################################################## + + +############################################################################## +# +# ProcessStreamPacket - process the <stream:XXXX/> packet +# +############################################################################## +sub ProcessStreamPacket +{ + my $self = shift; + my $sid = shift; + my $node = shift; + + my $tag = &XPath($node,"name()"); + my $stream_prefix = $self->StreamPrefix($sid); + my ($type) = ($tag =~ /^${stream_prefix}\:(.+)$/); + + $self->ProcessStreamError($sid,$node) if ($type eq "error"); + $self->ProcessStreamFeatures($sid,$node) if ($type eq "features"); +} + + +############################################################################## +# +# _handle_root - handles a root tag and checks that it is a stream:stream tag +# with the proper namespace. If not then it sets the STATUS +# to -1 and let's the outer code know that an error occurred. +# Then it changes the Start tag handlers to the methond listed +# in $self->{DATASTYLE} +# +############################################################################## +sub _handle_root +{ + my $self = shift; + my ($sax, $tag, %att) = @_; + my $sid = $sax->getSID(); + + $self->debug(2,"_handle_root: sid($sid) sax($sax) tag($tag) att(",%att,")"); + + $self->{SIDS}->{$sid}->{rootTag} = $tag; + + if ($self->{SIDS}->{$sid}->{connectiontype} ne "file") + { + #--------------------------------------------------------------------- + # Make sure we are receiving a valid stream on the same namespace. + #--------------------------------------------------------------------- + + $self->debug(3,"_handle_root: ($self->{SIDS}->{$self->{SIDS}->{$sid}->{serverid}}->{namespace})"); + $self->{SIDS}->{$sid}->{status} = + ((($tag eq "stream:stream") && + exists($att{'xmlns'}) && + ($att{'xmlns'} eq $self->{SIDS}->{$self->{SIDS}->{$sid}->{serverid}}->{namespace}) + ) ? + 1 : + -1 + ); + $self->debug(3,"_handle_root: status($self->{SIDS}->{$sid}->{status})"); + } + else + { + $self->{SIDS}->{$sid}->{status} = 1; + } + + #------------------------------------------------------------------------- + # Get the root tag attributes and save them for later. You never know when + # you'll need to check the namespace or the from attributes sent by the + # server. + #------------------------------------------------------------------------- + $self->{SIDS}->{$sid}->{root} = \%att; + + #------------------------------------------------------------------------- + # Run through the various xmlns:*** attributes and register the namespace + # to prefix map. + #------------------------------------------------------------------------- + foreach my $key (keys(%att)) + { + if ($key =~ /^xmlns\:(.+?)$/) + { + $self->debug(5,"_handle_root: RegisterPrefix: prefix($att{$key}) ns($1)"); + $self->RegisterPrefix($sid,$att{$key},$1); + } + } + + #------------------------------------------------------------------------- + # Sometimes we will get an error, so let's parse the tag assuming that we + # got a stream:error + #------------------------------------------------------------------------- + my $stream_prefix = $self->StreamPrefix($sid); + $self->debug(5,"_handle_root: stream_prefix($stream_prefix)"); + + if ($tag eq $stream_prefix.":error") + { + &XML::Stream::Tree::_handle_element($self,$sax,$tag,%att) + if ($self->{DATASTYLE} eq "tree"); + &XML::Stream::Node::_handle_element($self,$sax,$tag,%att) + if ($self->{DATASTYLE} eq "node"); + } + + #--------------------------------------------------------------------------- + # Now that we have gotten a root tag, let's look for the tags that make up + # the stream. Change the handler for a Start tag to another function. + #--------------------------------------------------------------------------- + $sax->setHandlers(startElement=>sub{ &{$HANDLERS{$self->{DATASTYLE}}->{startElement}}($self,@_) }, + endElement=>sub{ &{$HANDLERS{$self->{DATASTYLE}}->{endElement}}($self,@_) }, + characters=>sub{ &{$HANDLERS{$self->{DATASTYLE}}->{characters}}($self,@_) }, + ); +} + + +############################################################################## +# +# _node - internal callback for nodes. All it does is place the nodes in a +# list so that Process() can return them later. +# +############################################################################## +sub _node +{ + my $self = shift; + my $sid = shift; + my @node = shift; + + if (ref($node[0]) eq "XML::Stream::Node") + { + push(@{$self->{SIDS}->{$sid}->{nodes}},$node[0]); + } + else + { + push(@{$self->{SIDS}->{$sid}->{nodes}},\@node); + } +} + + + + +############################################################################## +#+---------------------------------------------------------------------------- +#| +#| Error Functions +#| +#+---------------------------------------------------------------------------- +############################################################################## + +############################################################################## +# +# GetErrorCode - if you are returned an undef, you can call this function +# and hopefully learn more information about the problem. +# +############################################################################## +sub GetErrorCode +{ + my $self = shift; + my $sid = shift; + + $sid = "newconnection" unless defined($sid); + + $self->debug(3,"GetErrorCode: sid($sid)"); + return ((exists($self->{SIDS}->{$sid}->{errorcode}) && + (ref($self->{SIDS}->{$sid}->{errorcode}) eq "HASH")) ? + $self->{SIDS}->{$sid}->{errorcode} : + { type=>"system", + text=>$!, + } + ); +} + + +############################################################################## +# +# SetErrorCode - sets the error code so that the caller can find out more +# information about the problem +# +############################################################################## +sub SetErrorCode +{ + my $self = shift; + my $sid = shift; + my $errorcode = shift; + + $self->{SIDS}->{$sid}->{errorcode} = $errorcode; +} + + +############################################################################## +# +# ProcessStreamError - Take the XML packet and extract out the error. +# +############################################################################## +sub ProcessStreamError +{ + my $self = shift; + my $sid = shift; + my $node = shift; + + $self->{SIDS}->{$sid}->{streamerror}->{type} = "unknown"; + $self->{SIDS}->{$sid}->{streamerror}->{node} = $node; + + #------------------------------------------------------------------------- + # Check for older 0.9 streams and handle the errors for them. + #------------------------------------------------------------------------- + if (!exists($self->{SIDS}->{$sid}->{root}->{version}) || + ($self->{SIDS}->{$sid}->{root}->{version} eq "") || + ($self->{SIDS}->{$sid}->{root}->{version} < 1.0) + ) + { + $self->{SIDS}->{$sid}->{streamerror}->{text} = + &XPath($node,"text()"); + return; + } + + #------------------------------------------------------------------------- + # Otherwise we are in XMPP land with real stream errors. + #------------------------------------------------------------------------- + my @errors = &XPath($node,'*[@xmlns="'.&ConstXMLNS("xmppstreams").'"]'); + + my $type; + my $text; + foreach my $error (@errors) + { + if (&XPath($error,"name()") eq "text") + { + $self->{SIDS}->{$sid}->{streamerror}->{text} = + &XPath($error,"text()"); + } + else + { + $self->{SIDS}->{$sid}->{streamerror}->{type} = + &XPath($error,"name()"); + } + } +} + + +############################################################################## +# +# StreamError - Given a type and text, generate a <stream:error/> packet to +# send back to the other side. +# +############################################################################## +sub StreamError +{ + my $self = shift; + my $sid = shift; + my $type = shift; + my $text = shift; + + my $root = $self->GetRoot($sid); + my $stream_base = $self->StreamPrefix($sid); + my $error = "<${stream_base}:error>"; + + if (exists($root->{version}) && ($root->{version} ne "")) + { + $error .= "<${type} xmlns='".&ConstXMLNS('xmppstreams')."'/>"; + if (defined($text)) + { + $error .= "<text xmlns='".&ConstXMLNS('xmppstreams')."'>"; + $error .= $text; + $error .= "</text>"; + } + } + else + { + $error .= $text; + } + + $error .= "</${stream_base}:error>"; + + return $error; +} + + + + +############################################################################## +#+---------------------------------------------------------------------------- +#| +#| Activity Monitoring Functions +#| +#+---------------------------------------------------------------------------- +############################################################################## + +############################################################################## +# +# IgnoreActivity - Set the flag that will ignore the activity monitor. +# +############################################################################## +sub IgnoreActivity +{ + my $self = shift; + my $sid = shift; + my $ignoreActivity = shift; + $ignoreActivity = 1 unless defined($ignoreActivity); + + $self->debug(3,"IgnoreActivity: ignoreActivity($ignoreActivity)"); + $self->debug(4,"IgnoreActivity: sid($sid)"); + + $self->{SIDS}->{$sid}->{ignoreActivity} = $ignoreActivity; +} + + +############################################################################## +# +# LastActivity - Return the time of the last activity. +# +############################################################################## +sub LastActivity +{ + my $self = shift; + my $sid = shift; + + $self->debug(3,"LastActivity: sid($sid)"); + $self->debug(1,"LastActivity: lastActivity($self->{SIDS}->{$sid}->{lastActivity})"); + + return $self->{SIDS}->{$sid}->{lastActivity}; +} + + +############################################################################## +# +# MarkActivity - Record the current time for this sid. +# +############################################################################## +sub MarkActivity +{ + my $self = shift; + my $sid = shift; + + return if (exists($self->{SIDS}->{$sid}->{ignoreActivity}) && + ($self->{SIDS}->{$sid}->{ignoreActivity} == 1)); + + $self->debug(3,"MarkActivity: sid($sid)"); + + $self->{SIDS}->{$sid}->{lastActivity} = time; +} + + + + +############################################################################## +#+---------------------------------------------------------------------------- +#| +#| XML Node Interface functions +#| +#| These are generic wrappers around the Tree and Node data types. The +#| problem being that the Tree class cannot support methods. +#| +#+---------------------------------------------------------------------------- +############################################################################## + +############################################################################## +# +# SetXMLData - takes a host of arguments and sets a portion of the specified +# data strucure with that data. The function works in two +# modes "single" or "multiple". "single" denotes that the +# function should locate the current tag that matches this +# data and overwrite it's contents with data passed in. +# "multiple" denotes that a new tag should be created even if +# others exist. +# +# type - single or multiple +# XMLTree - pointer to XML::Stream data object (tree or node) +# tag - name of tag to create/modify (if blank assumes +# working with top level tag) +# data - CDATA to set for tag +# attribs - attributes to ADD to tag +# +############################################################################## +sub SetXMLData +{ + return &XML::Stream::Node::SetXMLData(@_) if (ref($_[1]) eq "XML::Stream::Node"); + return &XML::Stream::Tree::SetXMLData(@_) if (ref($_[1]) eq "ARRAY"); +} + + +############################################################################## +# +# GetXMLData - takes a host of arguments and returns various data structures +# that match them. +# +# type - "existence" - returns 1 or 0 if the tag exists in the +# top level. +# "value" - returns either the CDATA of the tag, or the +# value of the attribute depending on which is +# sought. This ignores any mark ups to the data +# and just returns the raw CDATA. +# "value array" - returns an array of strings representing +# all of the CDATA in the specified tag. +# This ignores any mark ups to the data +# and just returns the raw CDATA. +# "tree" - returns a data structure that represents the +# XML with the specified tag as the root tag. +# Depends on the format that you are working with. +# "tree array" - returns an array of data structures each +# with the specified tag as the root tag. +# "child array" - returns a list of all children nodes +# not including CDATA nodes. +# "attribs" - returns a hash with the attributes, and +# their values, for the things that match +# the parameters +# "count" - returns the number of things that match +# the arguments +# "tag" - returns the root tag of this tree +# XMLTree - pointer to XML::Stream data structure +# tag - tag to pull data from. If blank then the top level +# tag is accessed. +# attrib - attribute value to retrieve. Ignored for types +# "value array", "tree", "tree array". If paired +# with value can be used to filter tags based on +# attributes and values. +# value - only valid if an attribute is supplied. Used to +# filter for tags that only contain this attribute. +# Useful to search through multiple tags that all +# reference different name spaces. +# +############################################################################## +sub GetXMLData +{ + return &XML::Stream::Node::GetXMLData(@_) if (ref($_[1]) eq "XML::Stream::Node"); + return &XML::Stream::Tree::GetXMLData(@_) if (ref($_[1]) eq "ARRAY"); +} + + +############################################################################## +# +# XPath - run an xpath query on a node and return back the result. +# +############################################################################## +sub XPath +{ + my $tree = shift; + my $path = shift; + + my $query = new XML::Stream::XPath::Query($path); + my $result = $query->execute($tree); + if ($result->check()) + { + my %attribs = $result->getAttribs(); + return %attribs if (scalar(keys(%attribs)) > 0); + + my @values = $result->getValues(); + @values = $result->getList() unless ($#values > -1); + return @values if wantarray; + return $values[0]; + } + return; +} + + +############################################################################## +# +# XPathCheck - run an xpath query on a node and return 1 or 0 if the path is +# valid. +# +############################################################################## +sub XPathCheck +{ + my $tree = shift; + my $path = shift; + + my $query = new XML::Stream::XPath::Query($path); + my $result = $query->execute($tree); + return $result->check(); +} + + +############################################################################## +# +# XML2Config - takes an XML data tree and turns it into a hash of hashes. +# This only works for certain kinds of XML trees like this: +# +# <foo> +# <bar>1</bar> +# <x> +# <y>foo</y> +# </x> +# <z>5</z> +# <z>6</z> +# </foo> +# +# The resulting hash would be: +# +# $hash{bar} = 1; +# $hash{x}->{y} = "foo"; +# $hash{z}->[0] = 5; +# $hash{z}->[1] = 6; +# +# Good for config files. +# +############################################################################## +sub XML2Config +{ + return &XML::Stream::Node::XML2Config(@_) if (ref($_[0]) eq "XML::Stream::Node"); + return &XML::Stream::Tree::XML2Config(@_) if (ref($_[0]) eq "ARRAY"); +} + + +############################################################################## +# +# Config2XML - takes a hash and produces an XML string from it. If the hash +# looks like this: +# +# $hash{bar} = 1; +# $hash{x}->{y} = "foo"; +# $hash{z}->[0] = 5; +# $hash{z}->[1] = 6; +# +# The resulting xml would be: +# +# <foo> +# <bar>1</bar> +# <x> +# <y>foo</y> +# </x> +# <z>5</z> +# <z>6</z> +# </foo> +# +# Good for config files. +# +############################################################################## +sub Config2XML +{ + my ($tag,$hash,$indent) = @_; + $indent = "" unless defined($indent); + + my $xml; + + if (ref($hash) eq "ARRAY") + { + foreach my $item (@{$hash}) + { + $xml .= &XML::Stream::Config2XML($tag,$item,$indent); + } + } + else + { + if ((ref($hash) eq "HASH") && ((scalar keys(%{$hash})) == 0)) + { + $xml .= "$indent<$tag/>\n"; + } + else + { + if (ref($hash) eq "") + { + if ($hash eq "") + { + return "$indent<$tag/>\n"; + } + else + { + return "$indent<$tag>$hash</$tag>\n"; + } + } + else + { + $xml .= "$indent<$tag>\n"; + foreach my $item (sort {$a cmp $b} keys(%{$hash})) + { + $xml .= &XML::Stream::Config2XML($item,$hash->{$item}," $indent"); + } + $xml .= "$indent</$tag>\n"; + } + } + } + return $xml; +} + + +############################################################################## +# +# EscapeXML - Simple function to make sure that no bad characters make it into +# in the XML string that might cause the string to be +# misinterpreted. +# +############################################################################## +sub EscapeXML +{ + my $data = shift; + + if (defined($data)) + { + $data =~ s/&/&/g; + $data =~ s/</</g; + $data =~ s/>/>/g; + $data =~ s/\"/"/g; + $data =~ s/\'/'/g; + } + + return $data; +} + + +############################################################################## +# +# UnescapeXML - Simple function to take an escaped string and return it to +# normal. +# +############################################################################## +sub UnescapeXML +{ + my $data = shift; + + if (defined($data)) + { + $data =~ s/&/&/g; + $data =~ s/</</g; + $data =~ s/>/>/g; + $data =~ s/"/\"/g; + $data =~ s/'/\'/g; + } + + return $data; +} + + +############################################################################## +# +# BuildXML - takes one of the data formats that XML::Stream supports and call +# the proper BuildXML_xxx function on it. +# +############################################################################## +sub BuildXML +{ + return &XML::Stream::Node::BuildXML(@_) if (ref($_[0]) eq "XML::Stream::Node"); + return &XML::Stream::Tree::BuildXML(@_) if (ref($_[0]) eq "ARRAY"); + return &XML::Stream::Tree::BuildXML(@_) if (ref($_[1]) eq "ARRAY"); +} + + + +############################################################################## +#+---------------------------------------------------------------------------- +#| +#| Namespace/Prefix Functions +#| +#+---------------------------------------------------------------------------- +############################################################################## + +############################################################################## +# +# ConstXMLNS - Return the namespace from the constant string. +# +############################################################################## +sub ConstXMLNS +{ + my $const = shift; + + return $XMLNS{$const}; +} + + +############################################################################## +# +# StreamPrefix - Return the prefix of the <stream:stream/> +# +############################################################################## +sub StreamPrefix +{ + my $self = shift; + my $sid = shift; + + return $self->ns2prefix($sid,&ConstXMLNS("stream")); +} + + +############################################################################## +# +# RegisterPrefix - setup the map for namespace to prefix +# +############################################################################## +sub RegisterPrefix +{ + my $self = shift; + my $sid = shift; + my $ns = shift; + my $prefix = shift; + + $self->{SIDS}->{$sid}->{ns2prefix}->{$ns} = $prefix; +} + + +############################################################################## +# +# ns2prefix - for a stream, return the prefix for the given namespace +# +############################################################################## +sub ns2prefix +{ + my $self = shift; + my $sid = shift; + my $ns = shift; + + return $self->{SIDS}->{$sid}->{ns2prefix}->{$ns}; +} + + + + +############################################################################## +#+---------------------------------------------------------------------------- +#| +#| Helper Functions +#| +#+---------------------------------------------------------------------------- +############################################################################## + +############################################################################## +# +# GetRoot - returns the hash of attributes for the root <stream:stream/> tag +# so that any attributes returned can be accessed. from and any +# xmlns:foobar might be important. +# +############################################################################## +sub GetRoot +{ + my $self = shift; + my $sid = shift; + return unless exists($self->{SIDS}->{$sid}->{root}); + return $self->{SIDS}->{$sid}->{root}; +} + + +############################################################################## +# +# GetSock - returns the Socket so that an outside function can access it if +# desired. +# +############################################################################## +sub GetSock +{ + my $self = shift; + my $sid = shift; + return $self->{SIDS}->{$sid}->{sock}; +} + + +############################################################################## +# +# LoadSSL - simple call to set everything up for SSL one time. +# +############################################################################## +sub LoadSSL +{ + my $self = shift; + + $self->debug(1,"LoadSSL: Load the IO::Socket::SSL module"); + + if (defined($SSL) && ($SSL == 1)) + { + $self->debug(1,"LoadSSL: Success"); + return 1; + } + + if (defined($SSL) && ($SSL == 0)) + { + $self->debug(1,"LoadSSL: Failure"); + return; + } + + my $SSL_Version = "0.81"; + eval "use IO::Socket::SSL $SSL_Version"; + if ($@) + { + croak("You requested that XML::Stream turn the socket into an SSL socket, but you don't have the correct version of IO::Socket::SSL v$SSL_Version."); + } + IO::Socket::SSL::context_init({SSL_verify_mode=>0x00}); + $SSL = 1; + + $self->debug(1,"LoadSSL: Success"); + return 1; +} + + +############################################################################## +# +# Host2SID - For a server this allows you to lookup the SID of a stream server +# based on the hostname that is is listening on. +# +############################################################################## +sub Host2SID +{ + my $self = shift; + my $hostname = shift; + + foreach my $sid (keys(%{$self->{SIDS}})) + { + next if ($sid eq "default"); + next if ($sid =~ /^server/); + + return $sid if ($self->{SIDS}->{$sid}->{hostname} eq $hostname); + } + return; +} + + +############################################################################## +# +# NewSID - returns a session ID to send to an incoming stream in the return +# header. By default it just increments a counter and returns that, +# or you can define a function and set it using the SetCallBacks +# function. +# +############################################################################## +sub NewSID +{ + my $self = shift; + return &{$self->{CB}->{sid}}() if (exists($self->{CB}->{sid}) && + defined($self->{CB}->{sid})); + return $$.time.$self->{IDCOUNT}++; +} + + +########################################################################### +# +# SetCallBacks - Takes a hash with top level tags to look for as the keys +# and pointers to functions as the values. +# +########################################################################### +sub SetCallBacks +{ + my $self = shift; + while($#_ >= 0) { + my $func = pop(@_); + my $tag = pop(@_); + if (($tag eq "node") && !defined($func)) + { + $self->SetCallBacks(node=>sub { $self->_node(@_) }); + } + else + { + $self->debug(1,"SetCallBacks: tag($tag) func($func)"); + $self->{CB}->{$tag} = $func; + } + } +} + + +############################################################################## +# +# StreamHeader - Given the arguments, return the opening stream header. +# +############################################################################## +sub StreamHeader +{ + my $self = shift; + my (%args) = @_; + + my $stream; + $stream .= "<?xml version='1.0'?>"; + $stream .= "<stream:stream "; + $stream .= "version='1.0' "; + $stream .= "xmlns:stream='".&ConstXMLNS("stream")."' "; + $stream .= "xmlns='$args{xmlns}' "; + $stream .= "to='$args{to}' " if exists($args{to}); + $stream .= "from='$args{from}' " if exists($args{from}); + $stream .= "xml:lang='$args{xmllang}' " if exists($args{xmllang}); + + foreach my $ns (@{$args{namespaces}}) + { + $stream .= " ".$ns->GetStream(); + } + + $stream .= ">"; + + return $stream; +} + + +########################################################################### +# +# debug - prints the arguments to the debug log if debug is turned on. +# +########################################################################### +sub debug +{ + return if ($_[1] > $_[0]->{DEBUGLEVEL}); + my $self = shift; + my ($limit,@args) = @_; + return if ($self->{DEBUGFILE} eq ""); + my $fh = $self->{DEBUGFILE}; + if ($self->{DEBUGTIME} == 1) + { + my ($sec,$min,$hour) = localtime(time); + print $fh sprintf("[%02d:%02d:%02d] ",$hour,$min,$sec); + } + print $fh "XML::Stream: @args\n"; +} + + +############################################################################## +# +# nonblock - set the socket to be non-blocking. +# +############################################################################## +sub nonblock +{ + my $self = shift; + my $socket = shift; + + #-------------------------------------------------------------------------- + # Code copied from POE::Wheel::SocketFactory... + # Win32 does things one way... + #-------------------------------------------------------------------------- + if ($^O eq "MSWin32") + { + ioctl( $socket, 0x80000000 | (4 << 16) | (ord('f') << 8) | 126, 1) || + croak("Can't make socket nonblocking (win32): $!"); + return; + } + + #-------------------------------------------------------------------------- + # And UNIX does them another + #-------------------------------------------------------------------------- + my $flags = fcntl($socket, F_GETFL, 0) + or die "Can't get flags for socket: $!\n"; + fcntl($socket, F_SETFL, $flags | O_NONBLOCK) + or die "Can't make socket nonblocking: $!\n"; +} + + +############################################################################## +# +# printData - debugging function to print out any data structure in an +# organized manner. Very useful for debugging XML::Parser::Tree +# objects. This is a private function that will only exist in +# in the development version. +# +############################################################################## +sub printData +{ + print &sprintData(@_); +} + + +############################################################################## +# +# sprintData - debugging function to build a string out of any data structure +# in an organized manner. Very useful for debugging +# XML::Parser::Tree objects and perl hashes of hashes. +# +# This is a private function. +# +############################################################################## +sub sprintData +{ + my ($preString,$data) = @_; + + my $outString = ""; + + if (ref($data) eq "HASH") + { + my $key; + foreach $key (sort { $a cmp $b } keys(%{$data})) + { + if (ref($$data{$key}) eq "") + { + my $value = defined($$data{$key}) ? $$data{$key} : ""; + $outString .= $preString."{'$key'} = \"".$value."\";\n"; + } + else + { + if (ref($$data{$key}) =~ /Net::Jabber/) + { + $outString .= $preString."{'$key'} = ".ref($$data{$key}).";\n"; + } + else + { + $outString .= $preString."{'$key'};\n"; + $outString .= &sprintData($preString."{'$key'}->",$$data{$key}); + } + } + } + } + else + { + if (ref($data) eq "ARRAY") + { + my $index; + foreach $index (0..$#{$data}) + { + if (ref($$data[$index]) eq "") + { + $outString .= $preString."[$index] = \"$$data[$index]\";\n"; + } + else + { + if (ref($$data[$index]) =~ /Net::Jabber/) + { + $outString .= $preString."[$index] = ".ref($$data[$index]).";\n"; + } + else + { + $outString .= $preString."[$index];\n"; + $outString .= &sprintData($preString."[$index]->",$$data[$index]); + } + } + } + } + else + { + if (ref($data) eq "REF") + { + $outString .= &sprintData($preString."->",$$data); + } + else + { + if (ref($data) eq "") + { + $outString .= $preString." = \"$data\";\n"; + } + else + { + $outString .= $preString." = ".ref($data).";\n"; + } + } + } + } + + return $outString; +} + + +1; diff --git a/lib/XML/Stream/Namespace.pm b/lib/XML/Stream/Namespace.pm new file mode 100644 index 0000000..a9aee25 --- /dev/null +++ b/lib/XML/Stream/Namespace.pm @@ -0,0 +1,190 @@ +############################################################################## +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. +# +# Jabber +# Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ +# +############################################################################## + +package XML::Stream::Namespace; + +=head1 NAME + +XML::Stream::Namespace - Object to make defining Namespaces easier in + XML::Stream. + +=head1 SYNOPSIS + +XML::Stream::Namespace is a helper package to XML::Stream. It provides +a clean way of defining Namespaces for XML::Stream to use when connecting. + +=head1 DESCRIPTION + + This module allows you to set and read elements from an XML::Stream + Namespace. + +=head1 METHODS + + SetNamespace("mynamespace"); + SetXMLNS("http://www.mynamespace.com/xmlns"); + SetAttributes(attrib1=>"value1", + attrib2=>"value2"); + + GetNamespace() returns "mynamespace" + GetXMLNS() returns "http://www.mynamespace.com/xmlns" + GetAttributes() returns a hash ( attrib1=>"value1",attrib2=>"value2") + GetStream() returns the following string: + "xmlns:mynamespace='http://www.nynamespace.com/xmlns' + mynamespace:attrib1='value1' + mynamespace:attrib2='value2'" + +=head1 EXAMPLES + + + $myNamespace = new XML::Stream::Namespace("mynamspace"); + $myNamespace->SetXMLNS("http://www.mynamespace.org/xmlns"); + $myNamespace->SetAttributes(foo=>"bar", + bob=>"vila"); + + $stream = new XML::Stream; + $stream->Connect(name=>"foo.bar.org", + port=>1234, + namespace=>"foo:bar", + namespaces=>[ $myNamespace ]); + + # + # The above Connect will send the following as the opening string + # of the stream to foo.bar.org:1234... + # + # <stream:stream + # xmlns:stream="http://etherx.jabber.org/streams" + # to="foo.bar.org" + # xmlns="foo:bar" + # xmlns:mynamespace="http://www.mynamespace.org/xmlns" + # mynamespace:foo="bar" + # mynamespace:bob="vila"> + # + + +=head1 AUTHOR + +Written by Ryan Eatmon in February 2000 +Idea By Thomas Charron in January of 2000 for http://etherx.jabber.org/streams/ + +=head1 COPYRIGHT + +This module is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut + +use strict; +use Carp; +use vars qw( $VERSION ); + +$VERSION = "1.22"; + +sub new +{ + my $proto = shift; + my $self = { }; + + ($self->{Namespace}) = @_ if ($#_ > -1); + + $self->{Attributes} = {}; + + bless($self,$proto); + return $self; +} + + +sub SetNamespace +{ + my $self = shift; + my ($namespace) = @_; + + $self->{Namespace} = $namespace; +} + + +sub SetXMLNS +{ + my $self = shift; + my ($xmlns) = @_; + + $self->{XMLNS} = $xmlns; +} + + +sub SetAttributes +{ + my $self = shift; + my %att = @_; + + my $key; + foreach $key (keys(%att)) + { + $self->{Attributes}->{$key} = $att{$key}; + } +} + + +sub GetNamespace +{ + my $self = shift; + + return $self->{Namespace}; +} + +sub GetXMLNS +{ + my $self = shift; + + return $self->{XMLNS}; +} + +sub GetAttributes +{ + my $self = shift; + my ($attrib) = @_; + + return $self->{Attributes} if ($attrib eq ""); + return $self->{Attributes}->{$attrib}; +} + + +sub GetStream +{ + my $self = shift; + + my $string = ""; + + $string .= "xmlns:".$self->GetNamespace(); + $string .= "='".$self->GetXMLNS()."'"; + my $attrib; + foreach $attrib (keys(%{$self->GetAttributes()})) + { + $string .= " ".$self->GetNamespace().":"; + $string .= $attrib; + $string .= "='".$self->GetAttributes($attrib)."'"; + } + + return $string; +} + +1; + diff --git a/lib/XML/Stream/Node.pm b/lib/XML/Stream/Node.pm new file mode 100644 index 0000000..4dca834 --- /dev/null +++ b/lib/XML/Stream/Node.pm @@ -0,0 +1,944 @@ +############################################################################## +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. +# +# Jabber +# Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ +# +############################################################################## + +package XML::Stream::Node; + +=head1 NAME + +XML::Stream::Node - Functions to make building and parsing the tree easier +to work with. + +=head1 SYNOPSIS + + Just a collection of functions that do not need to be in memory if you +choose one of the other methods of data storage. + + This creates a hierarchy of Perl objects and provides various methods +to manipulate the structure of the tree. It is much like the C library +libxml. + +=head1 FORMAT + +The result of parsing: + + <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> + +would be: + + [ tag: foo + att: {} + children: [ tag: head + att: {id=>"a"} + children: [ tag: "__xmlstream__:node:cdata" + children: "Hello " + ] + [ tag: em + children: [ tag: "__xmlstream__:node:cdata" + children: "there" + ] + ] + ] + [ tag: bar + children: [ tag: "__xmlstream__:node:cdata" + children: "Howdy " + ] + [ tag: ref + ] + ] + [ tag: "__xmlstream__:node:cdata" + children: "do" + ] + ] + +=head1 METHODS + + new() - creates a new node. If you specify tag, then the root + new(tag) tag is set. If you specify data, then cdata is added + new(tag,cdata) to the node as well. Returns the created node. + + get_tag() - returns the root tag of the node. + + set_tag(tag) - set the root tag of the node to tag. + + add_child(node) - adds the specified node as a child to the current + add_child(tag) node, or creates a new node with the specified tag + add_child(tag,cdata) as the root node. Returns the node added. + + remove_child(node) - removes the child node from the current node. + + remove_cdata() - removes all of the cdata children from the current node. + + add_cdata(string) - adds the string as cdata onto the current nodes + child list. + + get_cdata() - returns all of the cdata children concatenated together + into one string. + + get_attrib(attrib) - returns the value of the attrib if it is valid, + or returns undef is attrib is not a real + attribute. + + put_attrib(hash) - for each key/value pair specified, create an + attribute in the node. + + remove_attrib(attrib) - remove the specified attribute from the node. + + add_raw_xml(string,[string,...]) - directly add a string into the XML + packet as the last child, with no + translation. + + get_raw_xml() - return all of the XML in a single string, undef if there + is no raw XML to include. + + remove_raw_xml() - remove all raw XML strings. + + children() - return all of the children of the node in a list. + + attrib() - returns a hash containing all of the attributes on this + node. + + copy() - return a recursive copy of the node. + + XPath(path) - run XML::Stream::XPath on this node. + + XPathCheck(path) - run XML::Stream::XPath on this node and return 1 or 0 + to see if it matches or not. + + GetXML() - return the node in XML string form. + +=head1 AUTHOR + +By Ryan Eatmon in June 2002 for http://jabber.org/ + +=head1 COPYRIGHT + +This module is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut + +use vars qw( $VERSION $LOADED ); + +$VERSION = "1.22"; +$LOADED = 1; + +sub new +{ + my $proto = shift; + my $class = ref($proto) || $proto; + + if (ref($_[0]) eq "XML::Stream::Node") + { + return $_[0]; + } + + my $self = {}; + bless($self, $proto); + + my ($tag,$data) = @_; + + $self->set_tag($tag) if defined($tag); + $self->add_cdata($data) if defined($data); + $self->remove_raw_xml(); + + return $self; +} + + +sub debug +{ + my $self = shift; + my ($indent) = @_; + + $indent = "" unless defined($indent); + + if ($self->{TAG} eq "__xmlstream__:node:cdata") + { + print $indent,"cdata(",join("",@{$self->{CHILDREN}}),")\n"; + } + else + { + print $indent,"packet($self):\n"; + print $indent,"tag: <$self->{TAG}\n"; + if (scalar(keys(%{$self->{ATTRIBS}})) > 0) + { + print $indent,"attribs:\n"; + foreach my $key (sort {$a cmp $b} keys(%{$self->{ATTRIBS}})) + { + print $indent," $key = '$self->{ATTRIBS}->{$key}'\n"; + } + } + if ($#{$self->{CHILDREN}} == -1) + { + print $indent," />\n"; + } + else + { + print $indent," >\n"; + print $indent,"children:\n"; + foreach my $child (@{$self->{CHILDREN}}) + { + $child->debug($indent." "); + } + } + print $indent," </$self->{TAG}>\n"; + } +} + + +sub children +{ + my $self = shift; + + return () unless exists($self->{CHILDREN}); + return @{$self->{CHILDREN}}; +} + + +sub add_child +{ + my $self = shift; + + my $child = new XML::Stream::Node(@_); + push(@{$self->{CHILDREN}},$child); + return $child; +} + + +sub remove_child +{ + my $self = shift; + my $child = shift; + + foreach my $index (0..$#{$self->{CHILDREN}}) + { + if ($child == $self->{CHILDREN}->[$index]) + { + splice(@{$self->{CHILDREN}},$index,1); + last; + } + } +} + + +sub add_cdata +{ + my $self = shift; + my $child = new XML::Stream::Node("__xmlstream__:node:cdata"); + foreach my $cdata (@_) + { + push(@{$child->{CHILDREN}},$cdata); + } + push(@{$self->{CHILDREN}},$child); + return $child; +} + + +sub get_cdata +{ + my $self = shift; + + my $cdata = ""; + foreach my $child (@{$self->{CHILDREN}}) + { + $cdata .= join("",$child->children()) + if ($child->get_tag() eq "__xmlstream__:node:cdata"); + } + + return $cdata; +} + + +sub remove_cdata +{ + my $self = shift; + + my @remove = (); + foreach my $index (0..$#{$self->{CHILDREN}}) + { + if ($self->{CHILDREN}->[$index]->get_tag() eq "__xmlstream__:node:cdata") + { + + unshift(@remove,$index); + } + } + foreach my $index (@remove) + { + splice(@{$self->{CHILDREN}},$index,1); + } +} + + +sub attrib +{ + my $self = shift; + return () unless exists($self->{ATTRIBS}); + return %{$self->{ATTRIBS}}; +} + + +sub get_attrib +{ + my $self = shift; + my ($key) = @_; + + return unless exists($self->{ATTRIBS}->{$key}); + return $self->{ATTRIBS}->{$key}; +} + + +sub put_attrib +{ + my $self = shift; + my (%att) = @_; + + foreach my $key (keys(%att)) + { + $self->{ATTRIBS}->{$key} = $att{$key}; + } +} + + +sub remove_attrib +{ + my $self = shift; + my ($key) = @_; + + return unless exists($self->{ATTRIBS}->{$key}); + delete($self->{ATTRIBS}->{$key}); +} + + +sub add_raw_xml +{ + my $self = shift; + my (@raw) = @_; + + push(@{$self->{RAWXML}},@raw); +} + +sub get_raw_xml +{ + my $self = shift; + + return if ($#{$self->{RAWXML}} == -1); + return join("",@{$self->{RAWXML}}); +} + + +sub remove_raw_xml +{ + my $self = shift; + $self->{RAWXML} = []; +} + + +sub get_tag +{ + my $self = shift; + + return $self->{TAG}; +} + + +sub set_tag +{ + my $self = shift; + my ($tag) = @_; + + $self->{TAG} = $tag; +} + + +sub XPath +{ + my $self = shift; + my @results = &XML::Stream::XPath($self,@_); + return unless ($#results > -1); + return $results[0] unless wantarray; + return @results; +} + + +sub XPathCheck +{ + my $self = shift; + return &XML::Stream::XPathCheck($self,@_); +} + + +sub GetXML +{ + my $self = shift; + + return &BuildXML($self,@_); +} + + +sub copy +{ + my $self = shift; + + my $new_node = new XML::Stream::Node(); + $new_node->set_tag($self->get_tag()); + $new_node->put_attrib($self->attrib()); + + foreach my $child ($self->children()) + { + if ($child->get_tag() eq "__xmlstream__:node:cdata") + { + $new_node->add_cdata($self->get_cdata()); + } + else + { + $new_node->add_child($child->copy()); + } + } + + return $new_node; +} + + + + + +############################################################################## +# +# _handle_element - handles the main tag elements sent from the server. +# On an open tag it creates a new XML::Parser::Node so +# that _handle_cdata and _handle_element can add data +# and tags to it later. +# +############################################################################## +sub _handle_element +{ + my $self; + $self = $_[0] if (ref($_[0]) eq "XML::Stream::Parser"); + $self = shift unless (ref($_[0]) eq "XML::Stream::Parser"); + my ($sax, $tag, %att) = @_; + my $sid = $sax->getSID(); + + $self->debug(2,"Node: _handle_element: sid($sid) sax($sax) tag($tag) att(",%att,")"); + + my $node = new XML::Stream::Node($tag); + $node->put_attrib(%att); + + $self->debug(2,"Node: _handle_element: check(",$#{$self->{SIDS}->{$sid}->{node}},")"); + + if ($#{$self->{SIDS}->{$sid}->{node}} >= 0) + { + $self->{SIDS}->{$sid}->{node}->[$#{$self->{SIDS}->{$sid}->{node}}]-> + add_child($node); + } + + push(@{$self->{SIDS}->{$sid}->{node}},$node); +} + + +############################################################################## +# +# _handle_cdata - handles the CDATA that is encountered. Also, in the +# spirit of XML::Parser::Node it combines any sequential +# CDATA into one tag. +# +############################################################################## +sub _handle_cdata +{ + my $self; + $self = $_[0] if (ref($_[0]) eq "XML::Stream::Parser"); + $self = shift unless (ref($_[0]) eq "XML::Stream::Parser"); + my ($sax, $cdata) = @_; + my $sid = $sax->getSID(); + + $self->debug(2,"Node: _handle_cdata: sid($sid) sax($sax) cdata($cdata)"); + + return if ($#{$self->{SIDS}->{$sid}->{node}} == -1); + + $self->debug(2,"Node: _handle_cdata: sax($sax) cdata($cdata)"); + + $self->{SIDS}->{$sid}->{node}->[$#{$self->{SIDS}->{$sid}->{node}}]-> + add_cdata($cdata); +} + + +############################################################################## +# +# _handle_close - when we see a close tag we need to pop the last element +# from the list and push it onto the end of the previous +# element. This is how we build our hierarchy. +# +############################################################################## +sub _handle_close +{ + my $self; + $self = $_[0] if (ref($_[0]) eq "XML::Stream::Parser"); + $self = shift unless (ref($_[0]) eq "XML::Stream::Parser"); + my ($sax, $tag) = @_; + my $sid = $sax->getSID(); + + $self->debug(2,"Node: _handle_close: sid($sid) sax($sax) tag($tag)"); + + $self->debug(2,"Node: _handle_close: check(",$#{$self->{SIDS}->{$sid}->{node}},")"); + + if ($#{$self->{SIDS}->{$sid}->{node}} == -1) + { + $self->debug(2,"Node: _handle_close: rootTag($self->{SIDS}->{$sid}->{rootTag}) tag($tag)"); + if ($self->{SIDS}->{$sid}->{rootTag} ne $tag) + { + $self->{SIDS}->{$sid}->{streamerror} = "Root tag mis-match: <$self->{SIDS}->{$sid}->{rootTag}> ... </$tag>\n"; + } + return; + } + + my $CLOSED = pop @{$self->{SIDS}->{$sid}->{node}}; + + $self->debug(2,"Node: _handle_close: check2(",$#{$self->{SIDS}->{$sid}->{node}},")"); + + if($#{$self->{SIDS}->{$sid}->{node}} == -1) + { + push @{$self->{SIDS}->{$sid}->{node}}, $CLOSED; + + if (ref($self) ne "XML::Stream::Parser") + { + my $stream_prefix = $self->StreamPrefix($sid); + + if(defined($self->{SIDS}->{$sid}->{node}->[0]) && + ($self->{SIDS}->{$sid}->{node}->[0]->get_tag() =~ /^${stream_prefix}\:/)) + { + my $node = $self->{SIDS}->{$sid}->{node}->[0]; + $self->{SIDS}->{$sid}->{node} = []; + $self->ProcessStreamPacket($sid,$node); + } + else + { + my $node = $self->{SIDS}->{$sid}->{node}->[0]; + $self->{SIDS}->{$sid}->{node} = []; + + my @special = + &XML::Stream::XPath( + $node, + '[@xmlns="'.&XML::Stream::ConstXMLNS("xmpp-sasl").'" or @xmlns="'.&XML::Stream::ConstXMLNS("xmpp-tls").'"]' + ); + if ($#special > -1) + { + my $xmlns = $node->get_attrib("xmlns"); + + $self->ProcessSASLPacket($sid,$node) + if ($xmlns eq &XML::Stream::ConstXMLNS("xmpp-sasl")); + $self->ProcessTLSPacket($sid,$node) + if ($xmlns eq &XML::Stream::ConstXMLNS("xmpp-tls")); + } + else + { + &{$self->{CB}->{node}}($sid,$node); + } + } + } + } +} + + +############################################################################## +# +# SetXMLData - takes a host of arguments and sets a portion of the specified +# XML::Parser::Node object with that data. The function works +# in two modes "single" or "multiple". "single" denotes that +# the function should locate the current tag that matches this +# data and overwrite it's contents with data passed in. +# "multiple" denotes that a new tag should be created even if +# others exist. +# +# type - single or multiple +# XMLTree - pointer to XML::Stream Node object +# tag - name of tag to create/modify (if blank assumes +# working with top level tag) +# data - CDATA to set for tag +# attribs - attributes to ADD to tag +# +############################################################################## +sub SetXMLData +{ + my ($type,$XMLTree,$tag,$data,$attribs) = @_; + + if ($tag ne "") + { + if ($type eq "single") + { + foreach my $child ($XMLTree->children()) + { + if ($$XMLTree[1]->[$child] eq $tag) + { + $XMLTree->remove_child($child); + + my $newChild = $XMLTree->add_child($tag); + $newChild->put_attrib(%{$attribs}); + $newChild->add_cdata($data) if ($data ne ""); + return; + } + } + } + my $newChild = $XMLTree->add_child($tag); + $newChild->put_attrib(%{$attribs}); + $newChild->add_cdata($data) if ($data ne ""); + } + else + { + $XMLTree->put_attrib(%{$attribs}); + $XMLTree->add_cdata($data) if ($data ne ""); + } +} + + +############################################################################## +# +# GetXMLData - takes a host of arguments and returns various data structures +# that match them. +# +# type - "existence" - returns 1 or 0 if the tag exists in the +# top level. +# "value" - returns either the CDATA of the tag, or the +# value of the attribute depending on which is +# sought. This ignores any mark ups to the data +# and just returns the raw CDATA. +# "value array" - returns an array of strings representing +# all of the CDATA in the specified tag. +# This ignores any mark ups to the data +# and just returns the raw CDATA. +# "tree" - returns an XML::Parser::Node object with the +# specified tag as the root tag. +# "tree array" - returns an array of XML::Parser::Node +# objects each with the specified tag as +# the root tag. +# "child array" - returns a list of all children nodes +# not including CDATA nodes. +# "attribs" - returns a hash with the attributes, and +# their values, for the things that match +# the parameters +# "count" - returns the number of things that match +# the arguments +# "tag" - returns the root tag of this tree +# XMLTree - pointer to XML::Parser::Node object +# tag - tag to pull data from. If blank then the top level +# tag is accessed. +# attrib - attribute value to retrieve. Ignored for types +# "value array", "tree", "tree array". If paired +# with value can be used to filter tags based on +# attributes and values. +# value - only valid if an attribute is supplied. Used to +# filter for tags that only contain this attribute. +# Useful to search through multiple tags that all +# reference different name spaces. +# +############################################################################## +sub GetXMLData +{ + my ($type,$XMLTree,$tag,$attrib,$value) = @_; + + $tag = "" if !defined($tag); + $attrib = "" if !defined($attrib); + $value = "" if !defined($value); + + my $skipthis = 0; + + #------------------------------------------------------------------------- + # Check if a child tag in the root tag is being requested. + #------------------------------------------------------------------------- + if ($tag ne "") + { + my $count = 0; + my @array; + foreach my $child ($XMLTree->children()) + { + if (($child->get_tag() eq $tag) || ($tag eq "*")) + { + #------------------------------------------------------------- + # Filter out tags that do not contain the attribute and value. + #------------------------------------------------------------- + next if (($value ne "") && ($attrib ne "") && $child->get_attrib($attrib) && ($XMLTree->get_attrib($attrib) ne $value)); + next if (($attrib ne "") && !$child->get_attrib($attrib)); + + #------------------------------------------------------------- + # Check for existence + #------------------------------------------------------------- + if ($type eq "existence") + { + return 1; + } + #------------------------------------------------------------- + # Return the raw CDATA value without mark ups, or the value of + # the requested attribute. + #------------------------------------------------------------- + if ($type eq "value") + { + if ($attrib eq "") + { + my $str = $child->get_cdata(); + return $str; + } + return $XMLTree->get_attrib($attrib) + if defined($XMLTree->get_attrib($attrib)); + } + #------------------------------------------------------------- + # Return an array of values that represent the raw CDATA without + # mark up tags for the requested tags. + #------------------------------------------------------------- + if ($type eq "value array") + { + if ($attrib eq "") + { + my $str = $child->get_cdata(); + push(@array,$str); + } + else + { + push(@array, $XMLTree->get_attrib($attrib)) + if defined($XMLTree->get_attrib($attrib)); + } + } + #------------------------------------------------------------- + # Return a pointer to a new XML::Parser::Tree object that has + # the requested tag as the root tag. + #------------------------------------------------------------- + if ($type eq "tree") + { + return $child; + } + #------------------------------------------------------------- + # Return an array of pointers to XML::Parser::Tree objects + # that have the requested tag as the root tags. + #------------------------------------------------------------- + if ($type eq "tree array") + { + push(@array,$child); + } + #------------------------------------------------------------- + # Return an array of pointers to XML::Parser::Tree objects + # that have the requested tag as the root tags. + #------------------------------------------------------------- + if ($type eq "child array") + { + push(@array,$child) if ($child->get_tag() ne "__xmlstream__:node:cdata"); + } + #------------------------------------------------------------- + # Return a count of the number of tags that match + #------------------------------------------------------------- + if ($type eq "count") + { + $count++; + } + #------------------------------------------------------------- + # Return the attribute hash that matches this tag + #------------------------------------------------------------- + if ($type eq "attribs") + { + return $XMLTree->attrib(); + } + } + } + #--------------------------------------------------------------------- + # If we are returning arrays then return array. + #--------------------------------------------------------------------- + if (($type eq "tree array") || ($type eq "value array") || + ($type eq "child array")) + { + return @array; + } + + #--------------------------------------------------------------------- + # If we are returning then count, then do so + #--------------------------------------------------------------------- + if ($type eq "count") + { + return $count; + } + } + else + { + #--------------------------------------------------------------------- + # This is the root tag, so handle things a level up. + #--------------------------------------------------------------------- + + #--------------------------------------------------------------------- + # Return the raw CDATA value without mark ups, or the value of the + # requested attribute. + #--------------------------------------------------------------------- + if ($type eq "value") + { + if ($attrib eq "") + { + my $str = $XMLTree->get_cdata(); + return $str; + } + return $XMLTree->get_attrib($attrib) + if $XMLTree->get_attrib($attrib); + } + #--------------------------------------------------------------------- + # Return a pointer to a new XML::Parser::Tree object that has the + # requested tag as the root tag. + #--------------------------------------------------------------------- + if ($type eq "tree") + { + return $XMLTree; + } + + #--------------------------------------------------------------------- + # Return the 1 if the specified attribute exists in the root tag. + #--------------------------------------------------------------------- + if ($type eq "existence") + { + if ($attrib ne "") + { + return ($XMLTree->get_attrib($attrib) eq $value) if ($value ne ""); + return defined($XMLTree->get_attrib($attrib)); + } + return 0; + } + + #--------------------------------------------------------------------- + # Return the attribute hash that matches this tag + #--------------------------------------------------------------------- + if ($type eq "attribs") + { + return $XMLTree->attrib(); + } + #--------------------------------------------------------------------- + # Return the tag of this node + #--------------------------------------------------------------------- + if ($type eq "tag") + { + return $XMLTree->get_tag(); + } + } + #------------------------------------------------------------------------- + # Return 0 if this was a request for existence, or "" if a request for + # a "value", or [] for "tree", "value array", and "tree array". + #------------------------------------------------------------------------- + return 0 if ($type eq "existence"); + return "" if ($type eq "value"); + return []; +} + + +############################################################################## +# +# BuildXML - takes an XML::Parser::Tree object and builds the XML string +# that it represents. +# +############################################################################## +sub BuildXML +{ + my ($node,$rawXML) = @_; + + my $str = "<".$node->get_tag(); + + my %attrib = $node->attrib(); + + foreach my $att (sort {$a cmp $b} keys(%attrib)) + { + $str .= " ".$att."='".&XML::Stream::EscapeXML($attrib{$att})."'"; + } + + my @children = $node->children(); + if (($#children > -1) || + (defined($rawXML) && ($rawXML ne "")) || + (defined($node->get_raw_xml()) && ($node->get_raw_xml() ne "")) + ) + { + $str .= ">"; + foreach my $child (@children) + { + if ($child->get_tag() eq "__xmlstream__:node:cdata") + { + $str .= &XML::Stream::EscapeXML(join("",$child->children())); + } + else + { + $str .= &XML::Stream::Node::BuildXML($child); + } + } + $str .= $node->get_raw_xml() + if (defined($node->get_raw_xml()) && + ($node->get_raw_xml() ne "") + ); + $str .= $rawXML if (defined($rawXML) && ($rawXML ne "")); + $str .= "</".$node->get_tag().">"; + } + else + { + $str .= "/>"; + } + + return $str; +} + + +############################################################################## +# +# XML2Config - takes an XML data tree and turns it into a hash of hashes. +# This only works for certain kinds of XML trees like this: +# +# <foo> +# <bar>1</bar> +# <x> +# <y>foo</y> +# </x> +# <z>5</z> +# </foo> +# +# The resulting hash would be: +# +# $hash{bar} = 1; +# $hash{x}->{y} = "foo"; +# $hash{z} = 5; +# +# Good for config files. +# +############################################################################## +sub XML2Config +{ + my ($XMLTree) = @_; + + my %hash; + foreach my $tree (&XML::Stream::GetXMLData("tree array",$XMLTree,"*")) + { + if ($tree->get_tag() eq "__xmlstream__:node:cdata") + { + my $str = join("",$tree->children()); + return $str unless ($str =~ /^\s*$/); + } + else + { + if (&XML::Stream::GetXMLData("count",$XMLTree,$tree->get_tag()) > 1) + { + push(@{$hash{$tree->get_tag()}},&XML::Stream::XML2Config($tree)); + } + else + { + $hash{$tree->get_tag()} = &XML::Stream::XML2Config($tree); + } + } + } + return \%hash; +} + + +1; diff --git a/lib/XML/Stream/Parser.pm b/lib/XML/Stream/Parser.pm new file mode 100644 index 0000000..9ca7832 --- /dev/null +++ b/lib/XML/Stream/Parser.pm @@ -0,0 +1,567 @@ +############################################################################## +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. +# +# Jabber +# Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ +# +############################################################################## + +package XML::Stream::Parser; + +=head1 NAME + + XML::Stream::Parser - SAX XML Parser for XML Streams + +=head1 SYNOPSIS + + Light weight XML parser that builds XML::Parser::Tree objects from the + incoming stream and passes them to a function to tell whoever is using + it that there are new packets. + +=head1 DESCRIPTION + + This module provides a very light weight parser + +=head1 METHODS + +=head1 EXAMPLES + +=head1 AUTHOR + +By Ryan Eatmon in January of 2001 for http://jabber.org/ + +=head1 COPYRIGHT + +This module is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut + +use strict; +use vars qw( $VERSION ); + +$VERSION = "1.22"; + +sub new +{ + my $self = { }; + + bless($self); + + my %args; + while($#_ >= 0) { $args{ lc pop(@_) } = pop(@_); } + + $self->{PARSING} = 0; + $self->{DOC} = 0; + $self->{XML} = ""; + $self->{CNAME} = (); + $self->{CURR} = 0; + + $args{nonblocking} = 0 unless exists($args{nonblocking}); + + $self->{NONBLOCKING} = delete($args{nonblocking}); + + $self->{DEBUGTIME} = 0; + $self->{DEBUGTIME} = $args{debugtime} if exists($args{debugtime}); + + $self->{DEBUGLEVEL} = 0; + $self->{DEBUGLEVEL} = $args{debuglevel} if exists($args{debuglevel}); + + $self->{DEBUGFILE} = ""; + + if (exists($args{debugfh}) && ($args{debugfh} ne "")) + { + $self->{DEBUGFILE} = $args{debugfh}; + $self->{DEBUG} = 1; + } + + if ((exists($args{debugfh}) && ($args{debugfh} eq "")) || + (exists($args{debug}) && ($args{debug} ne ""))) + { + $self->{DEBUG} = 1; + if (lc($args{debug}) eq "stdout") + { + $self->{DEBUGFILE} = new FileHandle(">&STDERR"); + $self->{DEBUGFILE}->autoflush(1); + } + else + { + if (-e $args{debug}) + { + if (-w $args{debug}) + { + $self->{DEBUGFILE} = new FileHandle(">$args{debug}"); + $self->{DEBUGFILE}->autoflush(1); + } + else + { + print "WARNING: debug file ($args{debug}) is not writable by you\n"; + print " No debug information being saved.\n"; + $self->{DEBUG} = 0; + } + } + else + { + $self->{DEBUGFILE} = new FileHandle(">$args{debug}"); + if (defined($self->{DEBUGFILE})) + { + $self->{DEBUGFILE}->autoflush(1); + } + else + { + print "WARNING: debug file ($args{debug}) does not exist \n"; + print " and is not writable by you.\n"; + print " No debug information being saved.\n"; + $self->{DEBUG} = 0; + } + } + } + } + + $self->{SID} = exists($args{sid}) ? $args{sid} : "__xmlstream__:sid"; + + $self->{STYLE} = (exists($args{style}) ? lc($args{style}) : "tree"); + $self->{DTD} = (exists($args{dtd}) ? lc($args{dtd}) : 0); + + if ($self->{STYLE} eq "tree") + { + $self->{HANDLER}->{startDocument} = sub{ $self->startDocument(@_); }; + $self->{HANDLER}->{endDocument} = sub{ $self->endDocument(@_); }; + $self->{HANDLER}->{startElement} = sub{ &XML::Stream::Tree::_handle_element(@_); }; + $self->{HANDLER}->{endElement} = sub{ &XML::Stream::Tree::_handle_close(@_); }; + $self->{HANDLER}->{characters} = sub{ &XML::Stream::Tree::_handle_cdata(@_); }; + } + elsif ($self->{STYLE} eq "node") + { + $self->{HANDLER}->{startDocument} = sub{ $self->startDocument(@_); }; + $self->{HANDLER}->{endDocument} = sub{ $self->endDocument(@_); }; + $self->{HANDLER}->{startElement} = sub{ &XML::Stream::Node::_handle_element(@_); }; + $self->{HANDLER}->{endElement} = sub{ &XML::Stream::Node::_handle_close(@_); }; + $self->{HANDLER}->{characters} = sub{ &XML::Stream::Node::_handle_cdata(@_); }; + } + $self->setHandlers(%{$args{handlers}}); + + $self->{XMLONHOLD} = ""; + + return $self; +} + + +########################################################################### +# +# debug - prints the arguments to the debug log if debug is turned on. +# +########################################################################### +sub debug +{ + return if ($_[1] > $_[0]->{DEBUGLEVEL}); + my $self = shift; + my ($limit,@args) = @_; + return if ($self->{DEBUGFILE} eq ""); + my $fh = $self->{DEBUGFILE}; + if ($self->{DEBUGTIME} == 1) + { + my ($sec,$min,$hour) = localtime(time); + print $fh sprintf("[%02d:%02d:%02d] ",$hour,$min,$sec); + } + print $fh "XML::Stream::Parser: $self->{STYLE}: @args\n"; +} + + +sub setSID +{ + my $self = shift; + my $sid = shift; + $self->{SID} = $sid; +} + + +sub getSID +{ + my $self = shift; + return $self->{SID}; +} + + +sub setHandlers +{ + my $self = shift; + my (%handlers) = @_; + + foreach my $handler (keys(%handlers)) + { + $self->{HANDLER}->{$handler} = $handlers{$handler}; + } +} + + +sub parse +{ + my $self = shift; + my $xml = shift; + + return unless defined($xml); + return if ($xml eq ""); + + if ($self->{XMLONHOLD} ne "") + { + $self->{XML} = $self->{XMLONHOLD}; + $self->{XMLONHOLD} = ""; + } + + # XXX change this to not use regex? + while($xml =~ s/<\!--.*?-->//gs) {} + + $self->{XML} .= $xml; + + return if ($self->{PARSING} == 1); + + $self->{PARSING} = 1; + + if(!$self->{DOC} == 1) + { + my $start = index($self->{XML},"<"); + + if ((substr($self->{XML},$start,3) eq "<?x") || + (substr($self->{XML},$start,3) eq "<?X")) + { + my $close = index($self->{XML},"?>"); + if ($close == -1) + { + $self->{PARSING} = 0; + return; + } + $self->{XML} = substr($self->{XML},$close+2,length($self->{XML})-$close-2); + } + + &{$self->{HANDLER}->{startDocument}}($self); + $self->{DOC} = 1; + } + + while(1) + { + if (length($self->{XML}) == 0) + { + $self->{PARSING} = 0; + return $self->returnData(0); + } + my $eclose = -1; + $eclose = index($self->{XML},"</".$self->{CNAME}->[$self->{CURR}].">") + if ($#{$self->{CNAME}} > -1); + + if ($eclose == 0) + { + $self->{XML} = substr($self->{XML},length($self->{CNAME}->[$self->{CURR}])+3,length($self->{XML})-length($self->{CNAME}->[$self->{CURR}])-3); + + $self->{PARSING} = 0 if ($self->{NONBLOCKING} == 1); + &{$self->{HANDLER}->{endElement}}($self,$self->{CNAME}->[$self->{CURR}]); + $self->{PARSING} = 1 if ($self->{NONBLOCKING} == 1); + + $self->{CURR}--; + if ($self->{CURR} == 0) + { + $self->{DOC} = 0; + $self->{PARSING} = 0; + &{$self->{HANDLER}->{endDocument}}($self); + return $self->returnData(0); + } + next; + } + + my $estart = index($self->{XML},"<"); + my $cdatastart = index($self->{XML},"<![CDATA["); + if (($estart == 0) && ($cdatastart != 0)) + { + my $close = index($self->{XML},">"); + if ($close == -1) + { + $self->{PARSING} = 0; + return $self->returnData(0); + } + my $empty = (substr($self->{XML},$close-1,1) eq "/"); + my $starttag = substr($self->{XML},1,$close-($empty ? 2 : 1)); + my $nextspace = index($starttag," "); + my $attribs; + my $name; + if ($nextspace != -1) + { + $name = substr($starttag,0,$nextspace); + $attribs = substr($starttag,$nextspace+1,length($starttag)-$nextspace-1); + } + else + { + $name = $starttag; + } + + my %attribs = $self->attribution($attribs); + if (($self->{DTD} == 1) && (exists($attribs{xmlns}))) + { + } + + &{$self->{HANDLER}->{startElement}}($self,$name,%attribs); + + if($empty == 1) + { + &{$self->{HANDLER}->{endElement}}($self,$name); + } + else + { + $self->{CURR}++; + $self->{CNAME}->[$self->{CURR}] = $name; + } + + $self->{XML} = substr($self->{XML},$close+1,length($self->{XML})-$close-1); + next; + } + + if ($cdatastart == 0) + { + my $cdataclose = index($self->{XML},"]]>"); + if ($cdataclose == -1) + { + $self->{PARSING} = 0; + return $self->returnData(0); + } + + &{$self->{HANDLER}->{characters}}($self,substr($self->{XML},9,$cdataclose-9)); + + $self->{XML} = substr($self->{XML},$cdataclose+3,length($self->{XML})-$cdataclose-3); + next; + } + + if ($estart == -1) + { + $self->{XMLONHOLD} = $self->{XML}; + $self->{XML} = ""; + } + elsif (($cdatastart == -1) || ($cdatastart > $estart)) + { + &{$self->{HANDLER}->{characters}}($self,$self->entityCheck(substr($self->{XML},0,$estart))); + $self->{XML} = substr($self->{XML},$estart,length($self->{XML})-$estart); + } + } +} + + +sub attribution +{ + my $self = shift; + my $str = shift; + + $str = "" unless defined($str); + + my %attribs; + + while(1) + { + my $eq = index($str,"="); + if((length($str) == 0) || ($eq == -1)) + { + return %attribs; + } + + my $ids; + my $id; + my $id1 = index($str,"\'"); + my $id2 = index($str,"\""); + if((($id1 < $id2) && ($id1 != -1)) || ($id2 == -1)) + { + $ids = $id1; + $id = "\'"; + } + if((($id2 < $id1) && ($id1 == -1)) || ($id2 != -1)) + { + $ids = $id2; + $id = "\""; + } + + my $nextid = index($str,$id,$ids+1); + my $val = substr($str,$ids+1,$nextid-$ids-1); + my $key = substr($str,0,$eq); + + while($key =~ s/\s//) {} + + $attribs{$key} = $self->entityCheck($val); + $str = substr($str,$nextid+1,length($str)-$nextid-1); + } + + return %attribs; +} + + +sub entityCheck +{ + my $self = shift; + my $str = shift; + + while($str =~ s/\<\;/\</) {} + while($str =~ s/\>\;/\>/) {} + while($str =~ s/\"\;/\"/) {} + while($str =~ s/\&apos\;/\'/) {} + while($str =~ s/\&\;/\&/) {} + + return $str; +} + + +sub parsefile +{ + my $self = shift; + my $fileName = shift; + + open(FILE,"<",$fileName); + my $file; + while(<FILE>) { $file .= $_; } + $self->parse($file); + close(FILE); + + return $self->returnData(); +} + + +sub returnData +{ + my $self = shift; + my $clearData = shift; + $clearData = 1 unless defined($clearData); + + my $sid = $self->{SID}; + + if ($self->{STYLE} eq "tree") + { + return unless exists($self->{SIDS}->{$sid}->{tree}); + my @tree = @{$self->{SIDS}->{$sid}->{tree}}; + delete($self->{SIDS}->{$sid}->{tree}) if ($clearData == 1); + return ( \@tree ); + } + if ($self->{STYLE} eq "node") + { + return unless exists($self->{SIDS}->{$sid}->{node}); + my $node = $self->{SIDS}->{$sid}->{node}->[0]; + delete($self->{SIDS}->{$sid}->{node}) if ($clearData == 1); + return $node; + } +} + + +sub startDocument +{ + my $self = shift; +} + + +sub endDocument +{ + my $self = shift; +} + + +sub startElement +{ + my $self = shift; + my ($sax, $tag, %att) = @_; + + return unless ($self->{DOC} == 1); + + if ($self->{STYLE} eq "debug") + { + print "$self->{DEBUGHEADER} \\\\ (",join(" ",%att),")\n"; + $self->{DEBUGHEADER} .= $tag." "; + } + else + { + my @NEW; + if($#{$self->{TREE}} < 0) + { + push @{$self->{TREE}}, $tag; + } + else + { + push @{ $self->{TREE}[ $#{$self->{TREE}}]}, $tag; + } + push @NEW, \%att; + push @{$self->{TREE}}, \@NEW; + } +} + + +sub characters +{ + my $self = shift; + my ($sax, $cdata) = @_; + + return unless ($self->{DOC} == 1); + + if ($self->{STYLE} eq "debug") + { + my $str = $cdata; + $str =~ s/\n/\#10\;/g; + print "$self->{DEBUGHEADER} || $str\n"; + } + else + { + return if ($#{$self->{TREE}} == -1); + + my $pos = $#{$self->{TREE}}; + + if ($pos > 0 && $self->{TREE}[$pos - 1] eq "0") + { + $self->{TREE}[$pos - 1] .= $cdata; + } + else + { + push @{$self->{TREE}[$#{$self->{TREE}}]}, 0; + push @{$self->{TREE}[$#{$self->{TREE}}]}, $cdata; + } + } +} + + +sub endElement +{ + my $self = shift; + my ($sax, $tag) = @_; + + return unless ($self->{DOC} == 1); + + if ($self->{STYLE} eq "debug") + { + $self->{DEBUGHEADER} =~ s/\S+\ $//; + print "$self->{DEBUGHEADER} //\n"; + } + else + { + my $CLOSED = pop @{$self->{TREE}}; + + if($#{$self->{TREE}} < 1) + { + push @{$self->{TREE}}, $CLOSED; + + if($self->{TREE}->[0] eq "stream:error") + { + $self->{STREAMERROR} = $self->{TREE}[1]->[2]; + } + } + else + { + push @{$self->{TREE}[$#{$self->{TREE}}]}, $CLOSED; + } + } +} + + +1; diff --git a/lib/XML/Stream/Parser/DTD.pm b/lib/XML/Stream/Parser/DTD.pm new file mode 100644 index 0000000..25dc888 --- /dev/null +++ b/lib/XML/Stream/Parser/DTD.pm @@ -0,0 +1,769 @@ +############################################################################## +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. +# +# Jabber +# Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ +# +############################################################################## + +package XML::Stream::Parser::DTD; + +=head1 NAME + + XML::Stream::Parser::DTD - XML DTD Parser and Verifier + +=head1 SYNOPSIS + + This is a work in progress. I had need for a DTD parser and verifier + and so am working on it here. If you are reading this then you are + snooping. =) + +=head1 DESCRIPTION + + This module provides the initial code for a DTD parser and verifier. + +=head1 METHODS + +=head1 EXAMPLES + +=head1 AUTHOR + +By Ryan Eatmon in February of 2001 for http://jabber.org/ + +=head1 COPYRIGHT + +This module is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut + +use strict; +use vars qw( $VERSION ); + +$VERSION = "1.22"; + +sub new +{ + my $self = { }; + + bless($self); + + my %args; + while($#_ >= 0) { $args{ lc pop(@_) } = pop(@_); } + + $self->{URI} = $args{uri}; + + $self->{PARSING} = 0; + $self->{DOC} = 0; + $self->{XML} = ""; + $self->{CNAME} = (); + $self->{CURR} = 0; + + $self->{ENTITY}->{"<"} = "<"; + $self->{ENTITY}->{">"} = ">"; + $self->{ENTITY}->{"""} = "\""; + $self->{ENTITY}->{"'"} = "'"; + $self->{ENTITY}->{"&"} = "&"; + + $self->{HANDLER}->{startDocument} = sub{ $self->startDocument(@_); }; + $self->{HANDLER}->{endDocument} = sub{ $self->endDocument(@_); }; + $self->{HANDLER}->{startElement} = sub{ $self->startElement(@_); }; + $self->{HANDLER}->{endElement} = sub{ $self->endElement(@_); }; + + $self->{STYLE} = "debug"; + + open(DTD,$args{uri}); + my $dtd = join("",<DTD>); + close(DTD); + + $self->parse($dtd); + + return $self; +} + + +sub parse +{ + my $self = shift; + my $xml = shift; + + while($xml =~ s/<\!--.*?-->//gs) {} + while($xml =~ s/\n//g) {} + + $self->{XML} .= $xml; + + return if ($self->{PARSING} == 1); + + $self->{PARSING} = 1; + + if(!$self->{DOC} == 1) + { + my $start = index($self->{XML},"<"); + + if (substr($self->{XML},$start,3) =~ /^<\?x$/i) + { + my $close = index($self->{XML},"?>"); + if ($close == -1) + { + $self->{PARSING} = 0; + return; + } + $self->{XML} = substr($self->{XML},$close+2,length($self->{XML})-$close-2); + } + + &{$self->{HANDLER}->{startDocument}}($self); + $self->{DOC} = 1; + } + + while(1) + { + + if (length($self->{XML}) == 0) + { + $self->{PARSING} = 0; + return; + } + + my $estart = index($self->{XML},"<"); + if ($estart == -1) + { + $self->{PARSING} = 0; + return; + } + + my $close = index($self->{XML},">"); + my $dtddata = substr($self->{XML},$estart+1,$close-1); + my $nextspace = index($dtddata," "); + my $attribs; + + my $type = substr($dtddata,0,$nextspace); + $dtddata = substr($dtddata,$nextspace+1,length($dtddata)-$nextspace-1); + $nextspace = index($dtddata," "); + + if ($type eq "!ENTITY") + { + $self->entity($type,$dtddata); + } + else + { + my $tag = substr($dtddata,0,$nextspace); + $dtddata = substr($dtddata,$nextspace+1,length($dtddata)-$nextspace-1); + $nextspace = index($dtddata," "); + + $self->element($type,$tag,$dtddata) if ($type eq "!ELEMENT"); + $self->attlist($type,$tag,$dtddata) if ($type eq "!ATTLIST"); + } + + $self->{XML} = substr($self->{XML},$close+1,length($self->{XML})-$close-1); + next; + } +} + + +sub startDocument +{ + my $self = shift; +} + + +sub endDocument +{ + my $self = shift; +} + + +sub entity +{ + my $self = shift; + my ($type, $data) = @_; + + foreach my $entity (keys(%{$self->{ENTITY}})) + { + $data =~ s/$entity/$self->{ENTITY}->{$entity}/g; + } + + my ($symbol,$tag,undef,$string) = ($data =~ /^\s*(\S+)\s+(\S+)\s+(\"|\')([^\3]*)\3\s*$/); + $self->{ENTITY}->{"${symbol}${tag}\;"} = $string; +} + +sub element +{ + my $self = shift; + my ($type, $tag, $data) = @_; + + foreach my $entity (keys(%{$self->{ENTITY}})) + { + $data =~ s/$entity/$self->{ENTITY}->{$entity}/g; + } + + $self->{COUNTER}->{$tag} = 0 unless exists($self->{COUNTER}->{$tag}); + + $self->parsegrouping($tag,\$self->{ELEMENT}->{$tag},$data); + $self->flattendata(\$self->{ELEMENT}->{$tag}); + +} + + +sub flattendata +{ + my $self = shift; + my $dstr = shift; + + if ($$dstr->{type} eq "list") + { + foreach my $index (0..$#{$$dstr->{list}}) + { + $self->flattendata(\$$dstr->{list}->[$index]); + } + + if (!exists($$dstr->{repeat}) && ($#{$$dstr->{list}} == 0)) + { + $$dstr = $$dstr->{list}->[0]; + } + } +} + +sub parsegrouping +{ + my $self = shift; + my ($tag,$dstr,$data) = @_; + + $data =~ s/^\s*//; + $data =~ s/\s*$//; + + if ($data =~ /[\*\+\?]$/) + { + ($$dstr->{repeat}) = ($data =~ /(.)$/); + $data =~ s/.$//; + } + + if ($data =~ /^\(.*\)$/) + { + my ($seperator) = ($data =~ /^\(\s*\S+\s*(\,|\|)/); + $$dstr->{ordered} = "yes" if ($seperator eq ","); + $$dstr->{ordered} = "no" if ($seperator eq "|"); + + my $count = 0; + $$dstr->{type} = "list"; + foreach my $grouping ($self->groupinglist($data,$seperator)) + { + $self->parsegrouping($tag,\$$dstr->{list}->[$count],$grouping); + $count++; + } + } + else + { + $$dstr->{type} = "element"; + $$dstr->{element} = $data; + $self->{COUNTER}->{$data} = 0 unless exists($self->{COUNTER}->{$data}); + $self->{COUNTER}->{$data}++; + $self->{CHILDREN}->{$tag}->{$data} = 1; + } +} + + +sub attlist +{ + my $self = shift; + my ($type, $tag, $data) = @_; + + foreach my $entity (keys(%{$self->{ENTITY}})) + { + $data =~ s/$entity/$self->{ENTITY}->{$entity}/g; + } + + while($data ne "") + { + my ($att) = ($data =~ /^\s*(\S+)/); + $data =~ s/^\s*\S+\s*//; + + my $value; + if ($data =~ /^\(/) + { + $value = $self->getgrouping($data); + $data = substr($data,length($value)+1,length($data)); + $data =~ s/^\s*//; + $self->{ATTLIST}->{$tag}->{$att}->{type} = "list"; + foreach my $val (split(/\s*\|\s*/,substr($value,1,length($value)-2))) { +$self->{ATTLIST}->{$tag}->{$att}->{value}->{$val} = 1; + } + } + else + { + ($value) = ($data =~ /^(\S+)/); + $data =~ s/^\S+\s*//; + $self->{ATTLIST}->{$tag}->{$att}->{type} = $value; + } + + my $default; + if ($data =~ /^\"|^\'/) + { + my($sq,$val) = ($data =~ /^(\"|\')([^\"\']*)\1/); + $default = $val; + $data =~ s/^$sq$val$sq\s*//; + } + else + { + ($default) = ($data =~ /^(\S+)/); + $data =~ s/^\S+\s*//; + } + + $self->{ATTLIST}->{$tag}->{$att}->{default} = $default; + } +} + + + +sub getgrouping +{ + my $self = shift; + my ($data) = @_; + + my $count = 0; + my $parens = 0; + foreach my $char (split("",$data)) + { + $parens++ if ($char eq "("); + $parens-- if ($char eq ")"); + $count++; + last if ($parens == 0); + } + return substr($data,0,$count); +} + + +sub groupinglist +{ + my $self = shift; + my ($grouping,$seperator) = @_; + + my @list; + my $item = ""; + my $parens = 0; + my $word = ""; + $grouping = substr($grouping,1,length($grouping)-2) if ($grouping =~ /^\(/); + foreach my $char (split("",$grouping)) + { + $parens++ if ($char eq "("); + $parens-- if ($char eq ")"); + if (($parens == 0) && ($char eq $seperator)) + { + push(@list,$word); + $word = ""; + } + else + { + $word .= $char; + } + } + push(@list,$word) unless ($word eq ""); + return @list; +} + + +sub root +{ + my $self = shift; + my $tag = shift; + my @root; + foreach my $tag (keys(%{$self->{COUNTER}})) + { + push(@root,$tag) if ($self->{COUNTER}->{$tag} == 0); + } + + print "ERROR: Too many root tags... Check the DTD...\n" + if ($#root > 0); + return $root[0]; +} + + +sub children +{ + my $self = shift; + my ($tag,$tree) = @_; + + return unless exists ($self->{CHILDREN}->{$tag}); + return if (exists($self->{CHILDREN}->{$tag}->{EMPTY})); + if (defined($tree)) + { + my @current; + foreach my $current (&XML::Stream::GetXMLData("tree array",$tree,"*","","")) + { + push(@current,$$current[0]); + } + return $self->allowedchildren($self->{ELEMENT}->{$tag},\@current); + } + return $self->allowedchildren($self->{ELEMENT}->{$tag}); +} + + +sub allowedchildren +{ + my $self = shift; + my ($dstr,$current) = @_; + + my @allowed; + + if ($dstr->{type} eq "element") + { + my $test = (defined($current) && $#{@{$current}} > -1) ? $$current[0] : ""; + shift(@{$current}) if ($dstr->{element} eq $test); + if ($self->repeatcheck($dstr,$test) == 1) + { + return $dstr->{element}; + } + } + else + { + foreach my $index (0..$#{$dstr->{list}}) + { + push(@allowed,$self->allowedchildren($dstr->{list}->[$index],$current)); + } + } + + return @allowed; +} + + +sub repeatcheck +{ + my $self = shift; + my ($dstr,$tag) = @_; + + $dstr = $self->{ELEMENT}->{$dstr} if exists($self->{ELEMENT}->{$dstr}); + +# print "repeatcheck: tag($tag)\n"; +# print "repeatcheck: repeat($dstr->{repeat})\n" +# if exists($dstr->{repeat}); + + my $return = 0; + $return = ((!defined($tag) || + ($tag eq $dstr->{element})) ? + 0 : + 1) + if (!exists($dstr->{repeat}) || + ($dstr->{repeat} eq "?")); + $return = ((defined($tag) || + (exists($dstr->{ordered}) && + ($dstr->{ordered} eq "yes"))) ? + 1 : + 0) + if (exists($dstr->{repeat}) && + (($dstr->{repeat} eq "+") || + ($dstr->{repeat} eq "*"))); + +# print "repeatcheck: return($return)\n"; + return $return; +} + + +sub required +{ + my $self = shift; + my ($dstr,$tag,$count) = @_; + + $dstr = $self->{ELEMENT}->{$dstr} if exists($self->{ELEMENT}->{$dstr}); + + if ($dstr->{type} eq "element") + { + return 0 if ($dstr->{element} ne $tag); + return 1 if !exists($dstr->{repeat}); + return 1 if (($dstr->{repeat} eq "+") && ($count == 1)) ; + } + else + { + return 0 if (($dstr->{repeat} eq "*") || ($dstr->{repeat} eq "?")); + my $test = 0; + foreach my $index (0..$#{$dstr->{list}}) + { + $test = $test | $self->required($dstr->{list}->[$index],$tag,$count); + } + return $test; + } + return 0; +} + + +sub addchild +{ + my $self = shift; + my ($tag,$child,$tree) = @_; + +# print "addchild: tag($tag) child($child)\n"; + + my @current; + if (defined($tree)) + { +# &Net::Jabber::printData("\$tree",$tree); + + @current = &XML::Stream::GetXMLData("index array",$tree,"*","",""); + +# &Net::Jabber::printData("\$current",\@current); + } + + my @newBranch = $self->addchildrecurse($self->{ELEMENT}->{$tag},$child,\@current); + + return $tree unless ("@newBranch" ne ""); + +# &Net::Jabber::printData("\$newBranch",\@newBranch); + + my $location = shift(@newBranch); + + if ($location eq "end") + { + splice(@{$$tree[1]},@{$$tree[1]},0,@newBranch); + } + else + { + splice(@{$$tree[1]},$location,0,@newBranch); + } + return $tree; +} + + +sub addcdata +{ + my $self = shift; + my ($tag,$child,$tree) = @_; + +# print "addchild: tag($tag) child($child)\n"; + + my @current; + if (defined($tree)) + { +# &Net::Jabber::printData("\$tree",$tree); + + @current = &XML::Stream::GetXMLData("index array",$tree,"*","",""); + +# &Net::Jabber::printData("\$current",\@current); + } + + my @newBranch = $self->addchildrecurse($self->{ELEMENT}->{$tag},$child,\@current); + + return $tree unless ("@newBranch" ne ""); + +# &Net::Jabber::printData("\$newBranch",\@newBranch); + + my $location = shift(@newBranch); + + if ($location eq "end") + { + splice(@{$$tree[1]},@{$$tree[1]},0,@newBranch); + } + else + { + splice(@{$$tree[1]},$location,0,@newBranch); + } + return $tree; +} + + +sub addchildrecurse +{ + my $self = shift; + my ($dstr,$child,$current) = @_; + +# print "addchildrecurse: child($child) type($dstr->{type})\n"; + + if ($dstr->{type} eq "element") + { +# print "addchildrecurse: tag($dstr->{element})\n"; + my $count = 0; + while(($#{@{$current}} > -1) && ($dstr->{element} eq $$current[0])) + { + shift(@{$current}); + shift(@{$current}); + $count++; + } + if (($dstr->{element} eq $child) && + ($self->repeatcheck($dstr,(($count > 0) ? $child : "")) == 1)) + { + my @return = ( "end" , $self->newbranch($child)); + @return = ($$current[1], $self->newbranch($child)) + if ($#{@{$current}} > -1); +# print "addchildrecurse: Found the spot! (",join(",",@return),")\n"; + + return @return; + } + } + else + { + foreach my $index (0..$#{$dstr->{list}}) + { + my @newBranch = $self->addchildrecurse($dstr->{list}->[$index],$child,$current); + return @newBranch if ("@newBranch" ne ""); + } + } +# print "Let's blow....\n"; + return; +} + + +sub deletechild +{ + my $self = shift; + my ($tag,$parent,$parenttree,$tree) = @_; + + return $tree unless exists($self->{ELEMENT}->{$tag}); + return $tree if $self->required($parent,$tag,&XML::Stream::GetXMLData("count",$parenttree,$tag)); + + return []; +} + + + +sub newbranch +{ + my $self = shift; + my $tag = shift; + + $tag = $self->root() unless defined($tag); + + my @tree = (); + + return ("0","") if ($tag eq "#PCDATA"); + + push(@tree,$tag); + push(@tree,[ {} ]); + + foreach my $att ($self->attribs($tag)) + { + $tree[1]->[0]->{$att} = "" + if (($self->{ATTLIST}->{$tag}->{$att}->{default} eq "#REQUIRED") && + ($self->{ATTLIST}->{$tag}->{$att}->{type} eq "CDATA")); + } + + push(@{$tree[1]},$self->recursebranch($self->{ELEMENT}->{$tag})); + return @tree; +} + + +sub recursebranch +{ + my $self = shift; + my $dstr = shift; + + my @tree; + if (($dstr->{type} eq "element") && + ($dstr->{element} ne "EMPTY")) + { + @tree = $self->newbranch($dstr->{element}) + if (!exists($dstr->{repeat}) || + ($dstr->{repeat} eq "+")); + } + else + { + foreach my $index (0..$#{$dstr->{list}}) + { + push(@tree,$self->recursebranch($dstr->{list}->[$index])) +if (!exists($dstr->{repeat}) || + ($dstr->{repeat} eq "+")); + } + } + return @tree; +} + + +sub attribs +{ + my $self = shift; + my ($tag,$tree) = @_; + + return unless exists ($self->{ATTLIST}->{$tag}); + + if (defined($tree)) + { + my %current = &XML::Stream::GetXMLData("attribs",$tree,"","",""); + return $self->allowedattribs($tag,\%current); + } + return $self->allowedattribs($tag); +} + + +sub allowedattribs +{ + my $self = shift; + my ($tag,$current) = @_; + + my %allowed; + foreach my $att (keys(%{$self->{ATTLIST}->{$tag}})) + { + $allowed{$att} = 1 unless (defined($current) && + exists($current->{$att})); + } + return sort {$a cmp $b} keys(%allowed); +} + + +sub attribvalue +{ + my $self = shift; + my $tag = shift; + my $att = shift; + + return $self->{ATTLIST}->{$tag}->{$att}->{type} + if ($self->{ATTLIST}->{$tag}->{$att}->{type} ne "list"); + return sort {$a cmp $b} keys(%{$self->{ATTLIST}->{$tag}->{$att}->{value}}); +} + + +sub addattrib +{ + my $self = shift; + my ($tag,$att,$tree) = @_; + + return $tree unless exists($self->{ATTLIST}->{$tag}); + return $tree unless exists($self->{ATTLIST}->{$tag}->{$att}); + + my $default = $self->{ATTLIST}->{$tag}->{$att}->{default}; + $default = "" if ($default eq "#REQUIRED"); + $default = "" if ($default eq "#IMPLIED"); + + $$tree[1]->[0]->{$att} = $default; + + return $tree; +} + + +sub attribrequired +{ + my $self = shift; + my ($tag,$att) = @_; + + return 0 unless exists($self->{ATTLIST}->{$tag}); + return 0 unless exists($self->{ATTLIST}->{$tag}->{$att}); + + return 1 if ($self->{ATTLIST}->{$tag}->{$att}->{default} eq "#REQUIRED"); + return 0; +} + + +sub deleteattrib +{ + my $self = shift; + my ($tag,$att,$tree) = @_; + + return $tree unless exists($self->{ATTLIST}->{$tag}); + return $tree unless exists($self->{ATTLIST}->{$tag}->{$att}); + + return if $self->attribrequired($tag,$att); + + delete($$tree[1]->[0]->{$att}); + + return $tree; +} + diff --git a/lib/XML/Stream/Tree.pm b/lib/XML/Stream/Tree.pm new file mode 100644 index 0000000..b52269c --- /dev/null +++ b/lib/XML/Stream/Tree.pm @@ -0,0 +1,682 @@ +############################################################################## +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. +# +# Jabber +# Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ +# +############################################################################## + +package XML::Stream::Tree; + +=head1 NAME + +XML::Stream::Tree - Functions to make building and parsing the tree easier +to work with. + +=head1 SYNOPSIS + + Just a collection of functions that do not need to be in memory if you +choose one of the other methods of data storage. + +=head1 FORMAT + +The result of parsing: + + <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> + +would be: + Tag Content + ================================================================== + [foo, [{}, + head, [{id => "a"}, + 0, "Hello ", + em, [{}, + 0, "there" + ] + ], + bar, [{}, + 0, "Howdy", + ref, [{}] + ], + 0, "do" + ] + ] + +The above was copied from the XML::Parser man page. Many thanks to +Larry and Clark. + +=head1 AUTHOR + +By Ryan Eatmon in March 2001 for http://jabber.org/ + +=head1 COPYRIGHT + +This module is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut + +use vars qw( $VERSION $LOADED ); + +$VERSION = "1.22"; +$LOADED = 1; + +############################################################################## +# +# _handle_element - handles the main tag elements sent from the server. +# On an open tag it creates a new XML::Parser::Tree so +# that _handle_cdata and _handle_element can add data +# and tags to it later. +# +############################################################################## +sub _handle_element +{ + my $self; + $self = $_[0] if (ref($_[0]) eq "XML::Stream::Parser"); + $self = shift unless (ref($_[0]) eq "XML::Stream::Parser"); + my ($sax, $tag, %att) = @_; + my $sid = $sax->getSID(); + + $self->debug(2,"_handle_element: sid($sid) sax($sax) tag($tag) att(",%att,")"); + + my @NEW; + if($#{$self->{SIDS}->{$sid}->{tree}} < 0) + { + push @{$self->{SIDS}->{$sid}->{tree}}, $tag; + } + else + { + push @{ $self->{SIDS}->{$sid}->{tree}[ $#{$self->{SIDS}->{$sid}->{tree}}]}, $tag; + } + push @NEW, \%att; + push @{$self->{SIDS}->{$sid}->{tree}}, \@NEW; +} + + +############################################################################## +# +# _handle_cdata - handles the CDATA that is encountered. Also, in the +# spirit of XML::Parser::Tree it combines any sequential +# CDATA into one tag. +# +############################################################################## +sub _handle_cdata +{ + my $self; + $self = $_[0] if (ref($_[0]) eq "XML::Stream::Parser"); + $self = shift unless (ref($_[0]) eq "XML::Stream::Parser"); + my ($sax, $cdata) = @_; + my $sid = $sax->getSID(); + + $self->debug(2,"_handle_cdata: sid($sid) sax($sax) cdata($cdata)"); + + return if ($#{$self->{SIDS}->{$sid}->{tree}} == -1); + + $self->debug(2,"_handle_cdata: sax($sax) cdata($cdata)"); + + my $pos = $#{$self->{SIDS}->{$sid}->{tree}}; + $self->debug(2,"_handle_cdata: pos($pos)"); + + if ($pos > 0 && $self->{SIDS}->{$sid}->{tree}[$pos - 1] eq "0") + { + $self->debug(2,"_handle_cdata: append cdata"); + $self->{SIDS}->{$sid}->{tree}[$pos - 1] .= $cdata; + } + else + { + $self->debug(2,"_handle_cdata: new cdata"); + push @{$self->{SIDS}->{$sid}->{tree}[$#{$self->{SIDS}->{$sid}->{tree}}]}, 0; + push @{$self->{SIDS}->{$sid}->{tree}[$#{$self->{SIDS}->{$sid}->{tree}}]}, $cdata; + } +} + + +############################################################################## +# +# _handle_close - when we see a close tag we need to pop the last element +# from the list and push it onto the end of the previous +# element. This is how we build our hierarchy. +# +############################################################################## +sub _handle_close +{ + my $self; + $self = $_[0] if (ref($_[0]) eq "XML::Stream::Parser"); + $self = shift unless (ref($_[0]) eq "XML::Stream::Parser"); + my ($sax, $tag) = @_; + my $sid = $sax->getSID(); + + $self->debug(2,"_handle_close: sid($sid) sax($sax) tag($tag)"); + + my $CLOSED = pop @{$self->{SIDS}->{$sid}->{tree}}; + + $self->debug(2,"_handle_close: check(",$#{$self->{SIDS}->{$sid}->{tree}},")"); + + if ($#{$self->{SIDS}->{$sid}->{tree}} == -1) + { + if ($self->{SIDS}->{$sid}->{rootTag} ne $tag) + { + $self->{SIDS}->{$sid}->{streamerror} = "Root tag mis-match: <$self->{SIDS}->{$sid}->{rootTag}> ... </$tag>\n"; + } + return; + } + + if($#{$self->{SIDS}->{$sid}->{tree}} < 1) + { + + push @{$self->{SIDS}->{$sid}->{tree}}, $CLOSED; + + if (ref($self) ne "XML::Stream::Parser") + { + my $stream_prefix = $self->StreamPrefix($sid); + + if(defined($self->{SIDS}->{$sid}->{tree}->[0]) && + ($self->{SIDS}->{$sid}->{tree}->[0] =~ /^${stream_prefix}\:/)) + { + my @tree = @{$self->{SIDS}->{$sid}->{tree}}; + $self->{SIDS}->{$sid}->{tree} = []; + $self->ProcessStreamPacket($sid,\@tree); + } + else + { + my @tree = @{$self->{SIDS}->{$sid}->{tree}}; + $self->{SIDS}->{$sid}->{tree} = []; + + my @special = + &XML::Stream::XPath( + \@tree, + '[@xmlns="'.&XML::Stream::ConstXMLNS("xmpp-sasl").'" or @xmlns="'.&XML::Stream::ConstXMLNS("xmpp-tls").'"]' + ); + if ($#special > -1) + { + my $xmlns = &GetXMLData("value",\@tree,"","xmlns"); + + $self->ProcessSASLPacket($sid,\@tree) + if ($xmlns eq &XML::Stream::ConstXMLNS("xmpp-sasl")); + $self->ProcessTLSPacket($sid,\@tree) + if ($xmlns eq &XML::Stream::ConstXMLNS("xmpp-tls")); + } + else + { + &{$self->{CB}->{node}}($sid,\@tree); + } + } + } + } + else + { + push @{$self->{SIDS}->{$sid}->{tree}[$#{$self->{SIDS}->{$sid}->{tree}}]}, $CLOSED; + } +} + + +############################################################################## +# +# SetXMLData - takes a host of arguments and sets a portion of the specified +# XML::Parser::Tree object with that data. The function works +# in two modes "single" or "multiple". "single" denotes that +# the function should locate the current tag that matches this +# data and overwrite it's contents with data passed in. +# "multiple" denotes that a new tag should be created even if +# others exist. +# +# type - single or multiple +# XMLTree - pointer to XML::Stream Tree object +# tag - name of tag to create/modify (if blank assumes +# working with top level tag) +# data - CDATA to set for tag +# attribs - attributes to ADD to tag +# +############################################################################## +sub SetXMLData +{ + my ($type,$XMLTree,$tag,$data,$attribs) = @_; + my ($key); + + if ($tag ne "") + { + if ($type eq "single") + { + my ($child); + foreach $child (1..$#{$$XMLTree[1]}) + { + if ($$XMLTree[1]->[$child] eq $tag) + { + if ($data ne "") + { + #todo: add code to handle writing the cdata again and appending it. + $$XMLTree[1]->[$child+1]->[1] = 0; + $$XMLTree[1]->[$child+1]->[2] = $data; + } + foreach $key (keys(%{$attribs})) + { + $$XMLTree[1]->[$child+1]->[0]->{$key} = $$attribs{$key}; + } + return; + } + } + } + $$XMLTree[1]->[($#{$$XMLTree[1]}+1)] = $tag; + $$XMLTree[1]->[($#{$$XMLTree[1]}+1)]->[0] = {}; + foreach $key (keys(%{$attribs})) + { + $$XMLTree[1]->[$#{$$XMLTree[1]}]->[0]->{$key} = $$attribs{$key}; + } + if ($data ne "") + { + $$XMLTree[1]->[$#{$$XMLTree[1]}]->[1] = 0; + $$XMLTree[1]->[$#{$$XMLTree[1]}]->[2] = $data; + } + } + else + { + foreach $key (keys(%{$attribs})) + { + $$XMLTree[1]->[0]->{$key} = $$attribs{$key}; + } + if ($data ne "") + { + if (($#{$$XMLTree[1]} > 0) && + ($$XMLTree[1]->[($#{$$XMLTree[1]}-1)] eq "0")) + { + $$XMLTree[1]->[$#{$$XMLTree[1]}] .= $data; + } + else + { + $$XMLTree[1]->[($#{$$XMLTree[1]}+1)] = 0; + $$XMLTree[1]->[($#{$$XMLTree[1]}+1)] = $data; + } + } + } +} + + +############################################################################## +# +# GetXMLData - takes a host of arguments and returns various data structures +# that match them. +# +# type - "existence" - returns 1 or 0 if the tag exists in the +# top level. +# "value" - returns either the CDATA of the tag, or the +# value of the attribute depending on which is +# sought. This ignores any mark ups to the data +# and just returns the raw CDATA. +# "value array" - returns an array of strings representing +# all of the CDATA in the specified tag. +# This ignores any mark ups to the data +# and just returns the raw CDATA. +# "tree" - returns an XML::Parser::Tree object with the +# specified tag as the root tag. +# "tree array" - returns an array of XML::Parser::Tree +# objects each with the specified tag as +# the root tag. +# "child array" - returns a list of all children nodes +# not including CDATA nodes. +# "attribs" - returns a hash with the attributes, and +# their values, for the things that match +# the parameters +# "count" - returns the number of things that match +# the arguments +# "tag" - returns the root tag of this tree +# XMLTree - pointer to XML::Parser::Tree object +# tag - tag to pull data from. If blank then the top level +# tag is accessed. +# attrib - attribute value to retrieve. Ignored for types +# "value array", "tree", "tree array". If paired +# with value can be used to filter tags based on +# attributes and values. +# value - only valid if an attribute is supplied. Used to +# filter for tags that only contain this attribute. +# Useful to search through multiple tags that all +# reference different name spaces. +# +############################################################################## +sub GetXMLData +{ + my ($type,$XMLTree,$tag,$attrib,$value) = @_; + + $tag = "" if !defined($tag); + $attrib = "" if !defined($attrib); + $value = "" if !defined($value); + + my $skipthis = 0; + + #--------------------------------------------------------------------------- + # Check if a child tag in the root tag is being requested. + #--------------------------------------------------------------------------- + if ($tag ne "") + { + my $count = 0; + my @array; + foreach my $child (1..$#{$$XMLTree[1]}) + { + next if (($child/2) !~ /\./); + if (($$XMLTree[1]->[$child] eq $tag) || ($tag eq "*")) + { + next if (ref($$XMLTree[1]->[$child]) eq "ARRAY"); + + #--------------------------------------------------------------------- + # Filter out tags that do not contain the attribute and value. + #--------------------------------------------------------------------- + next if (($value ne "") && ($attrib ne "") && exists($$XMLTree[1]->[$child+1]->[0]->{$attrib}) && ($$XMLTree[1]->[$child+1]->[0]->{$attrib} ne $value)); + next if (($attrib ne "") && ((ref($$XMLTree[1]->[$child+1]) ne "ARRAY") || !exists($$XMLTree[1]->[$child+1]->[0]->{$attrib}))); + + #--------------------------------------------------------------------- + # Check for existence + #--------------------------------------------------------------------- + if ($type eq "existence") + { + return 1; + } + + #--------------------------------------------------------------------- + # Return the raw CDATA value without mark ups, or the value of the + # requested attribute. + #--------------------------------------------------------------------- + if ($type eq "value") + { + if ($attrib eq "") + { + my $str = ""; + my $next = 0; + my $index; + foreach $index (1..$#{$$XMLTree[1]->[$child+1]}) { + if ($next == 1) { $next = 0; next; } + if ($$XMLTree[1]->[$child+1]->[$index] eq "0") { + $str .= $$XMLTree[1]->[$child+1]->[$index+1]; + $next = 1; + } + } + return $str; + } + return $$XMLTree[1]->[$child+1]->[0]->{$attrib} + if (exists $$XMLTree[1]->[$child+1]->[0]->{$attrib}); + } + #--------------------------------------------------------------------- + # Return an array of values that represent the raw CDATA without + # mark up tags for the requested tags. + #--------------------------------------------------------------------- + if ($type eq "value array") + { + if ($attrib eq "") + { + my $str = ""; + my $next = 0; + my $index; + foreach $index (1..$#{$$XMLTree[1]->[$child+1]}) + { + if ($next == 1) { $next = 0; next; } + if ($$XMLTree[1]->[$child+1]->[$index] eq "0") + { + $str .= $$XMLTree[1]->[$child+1]->[$index+1]; + $next = 1; + } + } + push(@array,$str); + } + else + { + push(@array,$$XMLTree[1]->[$child+1]->[0]->{$attrib}) + if (exists $$XMLTree[1]->[$child+1]->[0]->{$attrib}); + } + } + #--------------------------------------------------------------------- + # Return a pointer to a new XML::Parser::Tree object that has the + # requested tag as the root tag. + #--------------------------------------------------------------------- + if ($type eq "tree") + { + my @tree = ( $$XMLTree[1]->[$child] , $$XMLTree[1]->[$child+1] ); + return @tree; + } + #--------------------------------------------------------------------- + # Return an array of pointers to XML::Parser::Tree objects that have + # the requested tag as the root tags. + #--------------------------------------------------------------------- + if ($type eq "tree array") + { + my @tree = ( $$XMLTree[1]->[$child] , $$XMLTree[1]->[$child+1] ); + push(@array,\@tree); + } + #--------------------------------------------------------------------- + # Return a count of the number of tags that match + #--------------------------------------------------------------------- + if ($type eq "count") + { + if ($$XMLTree[1]->[$child] eq "0") + { + $skipthis = 1; + next; + } + if ($skipthis == 1) + { + $skipthis = 0; + next; + } + $count++; + } + #--------------------------------------------------------------------- + # Return a count of the number of tags that match + #--------------------------------------------------------------------- + if ($type eq "child array") + { + my @tree = ( $$XMLTree[1]->[$child] , $$XMLTree[1]->[$child+1] ); + push(@array,\@tree) if ($tree[0] ne "0"); + } + #--------------------------------------------------------------------- + # Return the attribute hash that matches this tag + #--------------------------------------------------------------------- + if ($type eq "attribs") + { + return (%{$$XMLTree[1]->[$child+1]->[0]}); + } + } + } + #------------------------------------------------------------------------- + # If we are returning arrays then return array. + #------------------------------------------------------------------------- + if (($type eq "tree array") || ($type eq "value array") || + ($type eq "child array")) + { + return @array; + } + + #------------------------------------------------------------------------- + # If we are returning then count, then do so + #------------------------------------------------------------------------- + if ($type eq "count") + { + return $count; + } + } + else + { + #------------------------------------------------------------------------- + # This is the root tag, so handle things a level up. + #------------------------------------------------------------------------- + + #------------------------------------------------------------------------- + # Return the raw CDATA value without mark ups, or the value of the + # requested attribute. + #------------------------------------------------------------------------- + if ($type eq "value") + { + if ($attrib eq "") + { + my $str = ""; + my $next = 0; + my $index; + foreach $index (1..$#{$$XMLTree[1]}) + { + if ($next == 1) { $next = 0; next; } + if ($$XMLTree[1]->[$index] eq "0") + { + $str .= $$XMLTree[1]->[$index+1]; + $next = 1; + } + } + return $str; + } + return $$XMLTree[1]->[0]->{$attrib} + if (exists $$XMLTree[1]->[0]->{$attrib}); + } + #------------------------------------------------------------------------- + # Return a pointer to a new XML::Parser::Tree object that has the + # requested tag as the root tag. + #------------------------------------------------------------------------- + if ($type eq "tree") + { + my @tree = @{$$XMLTree}; + return @tree; + } + + #------------------------------------------------------------------------- + # Return the 1 if the specified attribute exists in the root tag. + #------------------------------------------------------------------------- + if ($type eq "existence") + { + return 1 if (($attrib ne "") && (exists($$XMLTree[1]->[0]->{$attrib}))); + } + + #------------------------------------------------------------------------- + # Return the attribute hash that matches this tag + #------------------------------------------------------------------------- + if ($type eq "attribs") + { + return %{$$XMLTree[1]->[0]}; + } + #------------------------------------------------------------------------- + # Return the tag of this node + #------------------------------------------------------------------------- + if ($type eq "tag") + { + return $$XMLTree[0]; + } + } + #--------------------------------------------------------------------------- + # Return 0 if this was a request for existence, or "" if a request for + # a "value", or [] for "tree", "value array", and "tree array". + #--------------------------------------------------------------------------- + return 0 if ($type eq "existence"); + return "" if ($type eq "value"); + return []; +} + + +############################################################################## +# +# BuildXML - takes an XML::Parser::Tree object and builds the XML string +# that it represents. +# +############################################################################## +sub BuildXML +{ + my ($parseTree,$rawXML) = @_; + + return "" if $#{$parseTree} == -1; + + my $str = ""; + if (ref($parseTree->[0]) eq "") + { + if ($parseTree->[0] eq "0") + { + return &XML::Stream::EscapeXML($parseTree->[1]); + } + + $str = "<".$parseTree->[0]; + foreach my $att (sort {$a cmp $b} keys(%{$parseTree->[1]->[0]})) + { + $str .= " ".$att."='".&XML::Stream::EscapeXML($parseTree->[1]->[0]->{$att})."'"; + } + + if (($#{$parseTree->[1]} > 0) || (defined($rawXML) && ($rawXML ne ""))) + { + $str .= ">"; + + my $index = 1; + while($index <= $#{$parseTree->[1]}) + { + my @newTree = ( $parseTree->[1]->[$index], $parseTree->[1]->[$index+1] ); + $str .= &XML::Stream::Tree::BuildXML(\@newTree); + $index += 2; + } + + $str .= $rawXML if defined($rawXML); + $str .= "</".$parseTree->[0].">"; + } + else + { + $str .= "/>"; + } + + } + + return $str; +} + + +############################################################################## +# +# XML2Config - takes an XML data tree and turns it into a hash of hashes. +# This only works for certain kinds of XML trees like this: +# +# <foo> +# <bar>1</bar> +# <x> +# <y>foo</y> +# </x> +# <z>5</z> +# </foo> +# +# The resulting hash would be: +# +# $hash{bar} = 1; +# $hash{x}->{y} = "foo"; +# $hash{z} = 5; +# +# Good for config files. +# +############################################################################## +sub XML2Config +{ + my ($XMLTree) = @_; + + my %hash; + foreach my $tree (&XML::Stream::GetXMLData("tree array",$XMLTree,"*")) + { + if ($tree->[0] eq "0") + { + return $tree->[1] unless ($tree->[1] =~ /^\s*$/); + } + else + { + if (&XML::Stream::GetXMLData("count",$XMLTree,$tree->[0]) > 1) + { + push(@{$hash{$tree->[0]}},&XML::Stream::XML2Config($tree)); + } + else + { + $hash{$tree->[0]} = &XML::Stream::XML2Config($tree); + } + } + } + return \%hash; +} + + +1; diff --git a/lib/XML/Stream/XPath.pm b/lib/XML/Stream/XPath.pm new file mode 100644 index 0000000..164a7a7 --- /dev/null +++ b/lib/XML/Stream/XPath.pm @@ -0,0 +1,50 @@ +############################################################################## +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. +# +# Jabber +# Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ +# +############################################################################## + +package XML::Stream::XPath; + +use 5.006_001; +use strict; +use vars qw( $VERSION %FUNCTIONS ); + +$VERSION = "1.22"; + +use XML::Stream::XPath::Value; +use XML::Stream::XPath::Op; +use XML::Stream::XPath::Query; + +sub AddFunction +{ + my $function = shift; + my $code = shift; + if (!defined($code)) + { + delete($FUNCTIONS{$code}); + return; + } + + $FUNCTIONS{$function} = $code; +} + + +1; + diff --git a/lib/XML/Stream/XPath/Op.pm b/lib/XML/Stream/XPath/Op.pm new file mode 100644 index 0000000..4209a5c --- /dev/null +++ b/lib/XML/Stream/XPath/Op.pm @@ -0,0 +1,919 @@ +############################################################################## +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. +# +# Jabber +# Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ +# +############################################################################## + + +############################################################################## +# +# Op - Base Op class +# +############################################################################## +package XML::Stream::XPath::Op; + +use 5.006_001; +use strict; +use vars qw( $VERSION ); + +$VERSION = "1.22"; + +sub new +{ + my $proto = shift; + return &allocate($proto,@_); +} + +sub allocate +{ + my $proto = shift; + my $self = { }; + + bless($self,$proto); + + $self->{TYPE} = shift; + $self->{VALUE} = shift; + + return $self; +} + +sub getValue +{ + my $self = shift; + return $self->{VALUE}; +} + +sub calcStr +{ + my $self = shift; + return $self->{VALUE}; +} + +sub getType +{ + my $self = shift; + return $self->{TYPE}; +} + + +sub isValid +{ + my $self = shift; + my $ctxt = shift; + return 1; +} + +sub display +{ + my $self = shift; + my $space = shift; + $space = "" unless defined($space); + + print $space,"OP: type($self->{TYPE}) value($self->{VALUE})\n"; +} + + + +############################################################################## +# +# PositionOp - class to handle [0] ops +# +############################################################################## +package XML::Stream::XPath::PositionOp; + +use vars qw (@ISA); +@ISA = ( "XML::Stream::XPath::Op" ); + +sub new +{ + my $proto = shift; + my $self = $proto->allocate("POSITION",""); + $self->{POS} = shift; + + return $self; +} + + +sub isValid +{ + my $self = shift; + my $ctxt = shift; + + my @elems = $$ctxt->getList(); + my @valid_elems; + if ($#elems+1 < $self->{POS}) + { + return; + } + + push(@valid_elems, $elems[$self->{POS}-1]); + + $$ctxt->setList(@valid_elems); + + return 1; +} + + + +############################################################################## +# +# ContextOp - class to handle [...] ops +# +############################################################################## +package XML::Stream::XPath::ContextOp; + +use vars qw (@ISA); +@ISA = ( "XML::Stream::XPath::Op" ); + +sub new +{ + my $proto = shift; + my $self = $proto->allocate("CONTEXT",""); + $self->{OP} = shift; + return $self; +} + + +sub isValid +{ + my $self = shift; + my $ctxt = shift; + + my @elems = $$ctxt->getList(); + my @valid_elems; + foreach my $elem (@elems) + { + my $tmp_ctxt = new XML::Stream::XPath::Value($elem); + $tmp_ctxt->in_context(1); + if ($self->{OP}->isValid(\$tmp_ctxt)) + { + push(@valid_elems,$elem); + } + } + + $$ctxt->setList(@valid_elems); + + if ($#valid_elems == -1) + { + return; + } + + return 1; +} + + +sub display +{ + my $self = shift; + my $space = shift; + $space = "" unless defined($space); + + print "${space}OP: type(CONTEXT) op: \n"; + $self->{OP}->display("$space "); +} + + + + +############################################################################## +# +# AllOp - class to handle // ops +# +############################################################################## +package XML::Stream::XPath::AllOp; + +use vars qw (@ISA); +@ISA = ( "XML::Stream::XPath::Op" ); + +sub new +{ + my $proto = shift; + my $name = shift; + my $self = $proto->allocate("ALL",$name); + return $self; +} + + +sub isValid +{ + my $self = shift; + my $ctxt = shift; + + my @elems = $$ctxt->getList(); + + if ($#elems == -1) + { + return; + } + + my @valid_elems; + + foreach my $elem (@elems) + { + push(@valid_elems,$self->descend($elem)); + } + + $$ctxt->setList(@valid_elems); + + if ($#valid_elems == -1) + { + return; + } + + return 1; +} + + +sub descend +{ + my $self = shift; + my $elem = shift; + + my @valid_elems; + + if (($self->{VALUE} eq "*") || (&XML::Stream::GetXMLData("tag",$elem) eq $self->{VALUE})) + { + push(@valid_elems,$elem); + } + + foreach my $child (&XML::Stream::GetXMLData("child array",$elem,"*")) + { + push(@valid_elems,$self->descend($child)); + } + + return @valid_elems; +} + + + +############################################################################## +# +# NodeOp - class to handle ops based on node names +# +############################################################################## +package XML::Stream::XPath::NodeOp; + +use vars qw (@ISA); +@ISA = ( "XML::Stream::XPath::Op" ); + +sub new +{ + my $proto = shift; + my $name = shift; + my $is_root = shift; + $is_root = 0 unless defined($is_root); + my $self = $proto->allocate("NODE",$name); + $self->{ISROOT} = $is_root; + return $self; +} + + +sub isValid +{ + my $self = shift; + my $ctxt = shift; + + if ($self->{ISROOT}) + { + my $elem = $$ctxt->getFirstElem(); + if (&XML::Stream::GetXMLData("tag",$elem) ne $self->{VALUE}) + { + return; + } + return 1; + } + + my @valid_elems; + + foreach my $elem ($$ctxt->getList()) + { + my $valid = 0; + + foreach my $child (&XML::Stream::GetXMLData("child array",$elem,"*")) + { + if (($self->{VALUE} eq "*") || + (&XML::Stream::GetXMLData("tag",$child) eq $self->{VALUE})) + { + if ($$ctxt->in_context()) + { + $valid = 1; + } + else + { + push(@valid_elems,$child); + } + } + } + if ($valid) + { + push(@valid_elems,$elem); + } + } + + $$ctxt->setList(@valid_elems); + + if ($#valid_elems == -1) + { + return; + } + + return 1; +} + + +sub calcStr +{ + my $self = shift; + my $elem = shift; + return &XML::Stream::GetXMLData("value",$elem); +} + + +############################################################################## +# +# EqualOp - class to handle [ x = y ] ops +# +############################################################################## +package XML::Stream::XPath::EqualOp; + +use vars qw (@ISA); +@ISA = ( "XML::Stream::XPath::Op" ); + +sub new +{ + my $proto = shift; + my $self = $proto->allocate("EQUAL",""); + $self->{OP_L} = shift; + $self->{OP_R} = shift; + return $self; +} + + +sub isValid +{ + my $self = shift; + my $ctxt = shift; + + my $tmp_ctxt = new XML::Stream::XPath::Value(); + $tmp_ctxt->setList($$ctxt->getList()); + $tmp_ctxt->in_context(0); + + if (!$self->{OP_L}->isValid(\$tmp_ctxt) || !$self->{OP_R}->isValid(\$tmp_ctxt)) + { + return; + } + + my @valid_elems; + foreach my $elem ($tmp_ctxt->getList) + { + if ($self->{OP_L}->calcStr($elem) eq $self->{OP_R}->calcStr($elem)) + { + push(@valid_elems,$elem); + } + } + + if ( $#valid_elems > -1) + { + @valid_elems = $$ctxt->getList(); + } + + $$ctxt->setList(@valid_elems); + + if ($#valid_elems == -1) + { + return; + } + + return 1; +} + + +sub display +{ + my $self = shift; + my $space = shift; + $space = "" unless defined($space); + + print $space,"OP: type(EQUAL)\n"; + print $space," op_l: "; + $self->{OP_L}->display($space." "); + + print $space," op_r: "; + $self->{OP_R}->display($space." "); +} + + + +############################################################################## +# +# NotEqualOp - class to handle [ x != y ] ops +# +############################################################################## +package XML::Stream::XPath::NotEqualOp; + +use vars qw (@ISA); +@ISA = ( "XML::Stream::XPath::Op" ); + +sub new +{ + my $proto = shift; + my $self = $proto->allocate("NOTEQUAL",""); + $self->{OP_L} = shift; + $self->{OP_R} = shift; + return $self; +} + + +sub isValid +{ + my $self = shift; + my $ctxt = shift; + + my $tmp_ctxt = new XML::Stream::XPath::Value(); + $tmp_ctxt->setList($$ctxt->getList()); + $tmp_ctxt->in_context(0); + + if (!$self->{OP_L}->isValid(\$tmp_ctxt) || !$self->{OP_R}->isValid(\$tmp_ctxt)) + { + return; + } + + my @valid_elems; + foreach my $elem ($tmp_ctxt->getList) + { + if ($self->{OP_L}->calcStr($elem) ne $self->{OP_R}->calcStr($elem)) + { + push(@valid_elems,$elem); + } + } + + if ( $#valid_elems > -1) + { + @valid_elems = $$ctxt->getList(); + } + + $$ctxt->setList(@valid_elems); + + if ($#valid_elems == -1) + { + return; + } + + return 1; +} + + +sub display +{ + my $self = shift; + my $space = shift; + $space = "" unless defined($space); + + print $space,"OP: type(NOTEQUAL)\n"; + print $space," op_l: "; + $self->{OP_L}->display($space." "); + + print $space," op_r: "; + $self->{OP_R}->display($space." "); +} + + + +############################################################################## +# +# AttributeOp - class to handle @foo ops. +# +############################################################################## +package XML::Stream::XPath::AttributeOp; + +use vars qw (@ISA); +@ISA = ( "XML::Stream::XPath::Op" ); + +sub new +{ + my $proto = shift; + my $name = shift; + my $self = $proto->allocate("ATTRIBUTE",$name); + return $self; +} + + +sub isValid +{ + my $self = shift; + my $ctxt = shift; + + my @elems = $$ctxt->getList(); + my @valid_elems; + my @values; + my %attribs; + + foreach my $elem (@elems) + { + if ($self->{VALUE} ne "*") + { + if (&XML::Stream::GetXMLData("value",$elem,"",$self->{VALUE})) + { + $self->{VAL} = $self->calcStr($elem); + push(@valid_elems,$elem); + push(@values,$self->{VAL}); + } + } + else + { + my %attrib = &XML::Stream::GetXMLData("attribs",$elem); + if (scalar(keys(%attrib)) > 0) + { + push(@valid_elems,$elem); + foreach my $key (keys(%attrib)) + { + $attribs{$key} = $attrib{$key}; + } + } + } + } + + $$ctxt->setList(@valid_elems); + $$ctxt->setValues(@values); + $$ctxt->setAttribs(%attribs); + + if ($#valid_elems == -1) + { + return; + } + + return 1; +} + + +sub getValue +{ + my $self = shift; + return $self->{VAL}; +} + + +sub calcStr +{ + my $self = shift; + my $elem = shift; + return &XML::Stream::GetXMLData("value",$elem,"",$self->{VALUE}); +} + + + + +############################################################################## +# +# AndOp - class to handle [ .... and .... ] ops +# +############################################################################## +package XML::Stream::XPath::AndOp; + +use vars qw (@ISA); +@ISA = ( "XML::Stream::XPath::Op" ); + +sub new +{ + my $proto = shift; + my $self = $proto->allocate("AND","and"); + $self->{OP_L} = shift; + $self->{OP_R} = shift; + return $self; +} + + +sub isValid +{ + my $self = shift; + my $ctxt = shift; + + my $opl = $self->{OP_L}->isValid($ctxt); + my $opr = $self->{OP_R}->isValid($ctxt); + + if ($opl && $opr) + { + return 1; + } + else + { + return; + } +} + + +sub display +{ + my $self = shift; + my $space = shift; + $space = "" unless defined($space); + + print $space,"OP: type(AND)\n"; + print $space," op_l: \n"; + $self->{OP_L}->display($space." "); + + print $space," op_r: \n"; + $self->{OP_R}->display($space." "); +} + + + +############################################################################## +# +# OrOp - class to handle [ .... or .... ] ops +# +############################################################################## +package XML::Stream::XPath::OrOp; + +use vars qw (@ISA); +@ISA = ( "XML::Stream::XPath::Op" ); + +sub new +{ + my $proto = shift; + my $self = $proto->allocate("OR","or"); + $self->{OP_L} = shift; + $self->{OP_R} = shift; + return $self; +} + + +sub isValid +{ + my $self = shift; + my $ctxt = shift; + + my @elems = $$ctxt->getList(); + my @valid_elems; + + foreach my $elem (@elems) + { + my $tmp_ctxt_l = new XML::Stream::XPath::Value($elem); + $tmp_ctxt_l->in_context(1); + my $tmp_ctxt_r = new XML::Stream::XPath::Value($elem); + $tmp_ctxt_r->in_context(1); + + my $opl = $self->{OP_L}->isValid(\$tmp_ctxt_l); + my $opr = $self->{OP_R}->isValid(\$tmp_ctxt_r); + + if ($opl || $opr) + { + push(@valid_elems,$elem); + } + } + + $$ctxt->setList(@valid_elems); + + if ($#valid_elems == -1) + { + return; + } + + return 1; +} + + +sub display +{ + my $self = shift; + my $space = shift; + $space = "" unless defined($space); + + print "${space}OP: type(OR)\n"; + print "$space op_l: "; + $self->{OP_L}->display("$space "); + + print "$space op_r: "; + $self->{OP_R}->display("$space "); +} + + + +############################################################################## +# +# FunctionOp - class to handle xxxx(...) ops +# +############################################################################## +package XML::Stream::XPath::FunctionOp; + +use vars qw (@ISA); +@ISA = ( "XML::Stream::XPath::Op" ); + +sub new +{ + my $proto = shift; + my $function = shift; + my $self = $proto->allocate("FUNCTION",$function); + $self->{CLOSED} = 0; + return $self; +} + + +sub addArg +{ + my $self = shift; + my $arg = shift; + + push(@{$self->{ARGOPS}},$arg); +} + + +sub isValid +{ + my $self = shift; + my $ctxt = shift; + + my $result; + eval("\$result = &{\$XML::Stream::XPath::FUNCTIONS{\$self->{VALUE}}}(\$ctxt,\@{\$self->{ARGOPS}});"); + return $result; +} + + +sub calcStr +{ + my $self = shift; + my $elem = shift; + + my $result; + eval("\$result = &{\$XML::Stream::XPath::VALUES{\$self->{VALUE}}}(\$elem);"); + return $result; + +} + + +sub display +{ + my $self = shift; + my $space = shift; + $space = "" unless defined($space); + + print $space,"OP: type(FUNCTION)\n"; + print $space," $self->{VALUE}(\n"; + foreach my $arg (@{$self->{ARGOPS}}) + { + print $arg,"\n"; + $arg->display($space." "); + } + print "$space )\n"; +} + + +sub function_name +{ + my $ctxt = shift; + my (@args) = @_; + + my @elems = $$ctxt->getList(); + my @valid_elems; + my @valid_values; + foreach my $elem (@elems) + { + my $text = &value_name($elem); + if (defined($text)) + { + push(@valid_elems,$elem); + push(@valid_values,$text); + } + } + + $$ctxt->setList(@valid_elems); + $$ctxt->setValues(@valid_values); + + if ($#valid_elems == -1) + { + return; + } + + return 1; +} + + +sub function_not +{ + my $ctxt = shift; + my (@args) = @_; + + my @elems = $$ctxt->getList(); + my @valid_elems; + foreach my $elem (@elems) + { + my $tmp_ctxt = new XML::Stream::XPath::Value($elem); + $tmp_ctxt->in_context(1); + if (!($args[0]->isValid(\$tmp_ctxt))) + { + push(@valid_elems,$elem); + } + } + + $$ctxt->setList(@valid_elems); + + if ($#valid_elems == -1) + { + return; + } + + return 1; +} + + +sub function_text +{ + my $ctxt = shift; + my (@args) = @_; + + my @elems = $$ctxt->getList(); + my @valid_elems; + my @valid_values; + foreach my $elem (@elems) + { + my $text = &value_text($elem); + if (defined($text)) + { + push(@valid_elems,$elem); + push(@valid_values,$text); + } + } + + $$ctxt->setList(@valid_elems); + $$ctxt->setValues(@valid_values); + + if ($#valid_elems == -1) + { + return; + } + + return 1; +} + + +sub function_startswith +{ + my $ctxt = shift; + my (@args) = @_; + + my @elems = $$ctxt->getList(); + my @valid_elems; + foreach my $elem (@elems) + { + my $val1 = $args[0]->calcStr($elem); + my $val2 = $args[1]->calcStr($elem); + + if (substr($val1,0,length($val2)) eq $val2) + { + push(@valid_elems,$elem); + } + } + + $$ctxt->setList(@valid_elems); + + if ($#valid_elems == -1) + { + return; + } + + return 1; +} + + +sub value_name +{ + my $elem = shift; + return &XML::Stream::GetXMLData("tag",$elem); +} + + +sub value_text +{ + my $elem = shift; + return &XML::Stream::GetXMLData("value",$elem); +} + + + +$XML::Stream::XPath::FUNCTIONS{'name'} = \&function_name; +$XML::Stream::XPath::FUNCTIONS{'not'} = \&function_not; +$XML::Stream::XPath::FUNCTIONS{'text'} = \&function_text; +$XML::Stream::XPath::FUNCTIONS{'starts-with'} = \&function_startswith; + +$XML::Stream::XPath::VALUES{'name'} = \&value_name; +$XML::Stream::XPath::VALUES{'text'} = \&value_text; + +1; + + diff --git a/lib/XML/Stream/XPath/Query.pm b/lib/XML/Stream/XPath/Query.pm new file mode 100644 index 0000000..c4831fe --- /dev/null +++ b/lib/XML/Stream/XPath/Query.pm @@ -0,0 +1,374 @@ +############################################################################## +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. +# +# Jabber +# Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ +# +############################################################################## + +package XML::Stream::XPath::Query; + +use 5.006_001; +use strict; +use Carp; +use vars qw( $VERSION ); + +$VERSION = "1.22"; + +sub new +{ + my $proto = shift; + my $self = { }; + + bless($self,$proto); + + $self->{TOKENS} = [ '/','[',']','@','"',"'",'=','!','(',')',':',' ',',']; + $self->{QUERY} = shift; + + if (!defined($self->{QUERY}) || ($self->{QUERY} eq "")) + { + confess("No query string specified"); + } + + $self->parseQuery(); + + return $self; +} + + +sub getNextToken +{ + my $self = shift; + my $pos = shift; + + my @toks = grep{ $_ eq substr($self->{QUERY},$$pos,1)} @{$self->{TOKENS}}; + while( $#toks == -1 ) + { + $$pos++; + if ($$pos > length($self->{QUERY})) + { + $$pos = length($self->{QUERY}); + return 0; + } + @toks = grep{ $_ eq substr($self->{QUERY},$$pos,1)} @{$self->{TOKENS}}; + } + + return $toks[0]; +} + + +sub getNextIdentifier +{ + my $self = shift; + my $pos = shift; + my $sp = $$pos; + $self->getNextToken($pos); + return substr($self->{QUERY},$sp,$$pos-$sp); +} + + +sub getOp +{ + my $self = shift; + my $pos = shift; + my $in_context = shift; + $in_context = 0 unless defined($in_context); + + my $ret_op; + + my $loop = 1; + while( $loop ) + { + my $pos_start = $$pos; + + my $token = $self->getNextToken($pos); + if (($token eq "0") && $in_context) + { + return; + } + + my $token_start = ++$$pos; + my $ident; + + if (defined($token)) + { + + if ($pos_start != ($token_start-1)) + { + $$pos = $pos_start; + my $temp_ident = $self->getNextIdentifier($pos); + $ret_op = new XML::Stream::XPath::NodeOp($temp_ident,"0"); + } + elsif ($token eq "/") + { + if (substr($self->{QUERY},$token_start,1) eq "/") + { + $$pos++; + my $temp_ident = $self->getNextIdentifier($pos); + $ret_op = new XML::Stream::XPath::AllOp($temp_ident); + } + else + { + my $temp_ident = $self->getNextIdentifier($pos); + if ($temp_ident ne "") + { + $ret_op = new XML::Stream::XPath::NodeOp($temp_ident,($pos_start == 0 ? "1" : "0")); + } + } + } + elsif ($token eq "\@") + { + $ret_op = new XML::Stream::XPath::AttributeOp($self->getNextIdentifier($pos)); + } + elsif ($token eq "]") + { + if ($in_context eq "[") + { + $ret_op = pop(@{$self->{OPS}}); + $in_context = 0; + } + else + { + confess("Found ']' but not in context"); + return; + } + } + elsif (($token eq "\"") || ($token eq "\'")) + { + $$pos = index($self->{QUERY},$token,$token_start); + $ret_op = new XML::Stream::XPath::Op("LITERAL",substr($self->{QUERY},$token_start,$$pos-$token_start)); + $$pos++; + } + elsif ($token eq " ") + { + $ident = $self->getNextIdentifier($pos); + if ($ident eq "and") + { + $$pos++; + my $tmp_op = $self->getOp($pos,$in_context); + if (!defined($tmp_op)) + { + confess("Invalid 'and' operation"); + return; + } + $ret_op = new XML::Stream::XPath::AndOp($self->{OPS}->[$#{$self->{OPS}}],$tmp_op); + $in_context = 0; + pop(@{$self->{OPS}}); + } + elsif ($ident eq "or") + { + $$pos++; + my $tmp_op = $self->getOp($pos,$in_context); + if (!defined($tmp_op)) + { + confess("Invalid 'or' operation"); + return; + } + $ret_op = new XML::Stream::XPath::OrOp($self->{OPS}->[$#{$self->{OPS}}],$tmp_op); + $in_context = 0; + pop(@{$self->{OPS}}); + } + } + elsif ($token eq "[") + { + if ($self->getNextToken($pos) eq "]") + { + if ($$pos == $token_start) + { + confess("Nothing in the []"); + return; + } + + $$pos = $token_start; + my $val = $self->getNextIdentifier($pos); + if ($val =~ /^\d+$/) + { + $ret_op = new XML::Stream::XPath::PositionOp($val); + $$pos++; + } + else + { + $$pos = $pos_start + 1; + $ret_op = new XML::Stream::XPath::ContextOp($self->getOp($pos,$token)); + } + } + else + { + $$pos = $pos_start + 1; + $ret_op = new XML::Stream::XPath::ContextOp($self->getOp($pos,$token)); + } + } + elsif ($token eq "(") + { + #------------------------------------------------------------- + # The function name would have been mistaken for a NodeOp. + # Pop it off the back and get the function name. + #------------------------------------------------------------- + my $op = pop(@{$self->{OPS}}); + if ($op->getType() ne "NODE") + { + confess("No function name specified."); + } + my $function = $op->getValue(); + if (!exists($XML::Stream::XPath::FUNCTIONS{$function})) + { + confess("Undefined function \"$function\""); + } + $ret_op = new XML::Stream::XPath::FunctionOp($function); + + my $op_pos = $#{$self->{OPS}} + 1; + + $self->getOp($pos,$token); + + foreach my $arg ($op_pos..$#{$self->{OPS}}) + { + $ret_op->addArg($self->{OPS}->[$arg]); + } + + splice(@{$self->{OPS}},$op_pos); + + } + elsif ($token eq ")") + { + if ($in_context eq "(") + { + $ret_op = undef; + $in_context = 0; + } + else + { + confess("Found ')' but not in context"); + } + } + elsif ($token eq ",") + { + if ($in_context ne "(") + { + confess("Found ',' but not in a function"); + } + + } + elsif ($token eq "=") + { + my $tmp_op; + while(!defined($tmp_op)) + { + $tmp_op = $self->getOp($pos); + } + $ret_op = new XML::Stream::XPath::EqualOp($self->{OPS}->[$#{$self->{OPS}}],$tmp_op); + pop(@{$self->{OPS}}); + } + elsif ($token eq "!") + { + if (substr($self->{QUERY},$token_start,1) ne "=") + { + confess("Badly formed !="); + } + $$pos++; + + my $tmp_op; + while(!defined($tmp_op)) + { + $tmp_op = $self->getOp($pos); + } + $ret_op = new XML::Stream::XPath::NotEqualOp($self->{OPS}->[$#{$self->{OPS}}],$tmp_op); + pop(@{$self->{OPS}}); + } + else + { + confess("Unhandled \"$token\""); + } + + if ($in_context) + { + if (defined($ret_op)) + { + push(@{$self->{OPS}},$ret_op); + } + $ret_op = undef; + } + } + else + { + confess("Token undefined"); + } + + $loop = 0 unless $in_context; + } + + return $ret_op; +} + + +sub parseQuery +{ + my $self = shift; + my $query = shift; + + my $op; + my $pos = 0; + while($pos < length($self->{QUERY})) + { + $op = $self->getOp(\$pos); + if (defined($op)) + { + push(@{$self->{OPS}},$op); + } + } + + #foreach my $op (@{$self->{OPS}}) + #{ + # $op->display(); + #} + + return 1; +} + + +sub execute +{ + my $self = shift; + my $root = shift; + + my $ctxt = new XML::Stream::XPath::Value($root); + + foreach my $op (@{$self->{OPS}}) + { + if (!$op->isValid(\$ctxt)) + { + $ctxt->setValid(0); + return $ctxt; + } + } + + $ctxt->setValid(1); + return $ctxt; +} + + +sub check +{ + my $self = shift; + my $root = shift; + + my $ctxt = $self->execute($root); + return $ctxt->check(); +} + + +1; + diff --git a/lib/XML/Stream/XPath/Value.pm b/lib/XML/Stream/XPath/Value.pm new file mode 100644 index 0000000..425e183 --- /dev/null +++ b/lib/XML/Stream/XPath/Value.pm @@ -0,0 +1,153 @@ +############################################################################## +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. +# +# Jabber +# Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ +# +############################################################################## + +package XML::Stream::XPath::Value; + +use 5.006_001; +use strict; +use vars qw( $VERSION ); + +$VERSION = "1.22"; + +sub new +{ + my $proto = shift; + my $self = { }; + + bless($self,$proto); + + $self->setList(@_); + $self->setValues(); + $self->setAttribs(); + $self->setValid(0); + $self->in_context(0); + + return $self; +} + + +sub setList +{ + my $self = shift; + my (@values) = @_; + $self->{LIST} = \@values; +} + + +sub getList +{ + my $self = shift; + return unless ($#{$self->{LIST}} > -1); + return @{$self->{LIST}}; +} + + +sub getFirstElem +{ + my $self = shift; + return unless ($#{$self->{LIST}} > -1); + return $self->{LIST}->[0]; +} + + +sub setValues +{ + my $self = shift; + my (@values) = @_; + $self->{VALUES} = \@values; +} + + +sub getValues +{ + my $self = shift; + return unless ($#{$self->{VALUES}} > -1); + return $self->{VALUES}->[0] if !wantarray; + return @{$self->{VALUES}}; +} + + +sub setAttribs +{ + my $self = shift; + my (%attribs) = @_; + $self->{ATTRIBS} = \%attribs; +} + + +sub getAttribs +{ + my $self = shift; + return unless (scalar(keys(%{$self->{ATTRIBS}})) > 0); + return %{$self->{ATTRIBS}}; +} + + +sub setValid +{ + my $self = shift; + my $valid = shift; + $self->{VALID} = $valid; +} + + +sub check +{ + my $self = shift; + return $self->{VALID}; +} + + +sub in_context +{ + my $self = shift; + my $in_context = shift; + + if (defined($in_context)) + { + $self->{INCONTEXT} = $in_context; + } + return $self->{INCONTEXT}; +} + + +sub display +{ + my $self = shift; + if (0) + { + print "VALUE: list(",join(",",@{$self->{LIST}}),")\n"; + } + else + { + print "VALUE: list(\n"; + foreach my $elem (@{$self->{LIST}}) + { + print "VALUE: ",$elem->GetXML(),"\n"; + } + print "VALUE: )\n"; + } + print "VALUE: values(",join(",",@{$self->{VALUES}}),")\n"; +} + +1; + |
