summaryrefslogtreecommitdiff
path: root/lib/Net/IP/Match
diff options
context:
space:
mode:
authorAndreas Brachold <vdr07@deltab.de>2007-08-13 18:41:27 +0000
committerAndreas Brachold <vdr07@deltab.de>2007-08-13 18:41:27 +0000
commitbcbf441e09fb502cf64924ff2530fa144bdf52c5 (patch)
treef377707a2dac078db8cd0c7d7abfe69ac1006d71 /lib/Net/IP/Match
downloadxxv-bcbf441e09fb502cf64924ff2530fa144bdf52c5.tar.gz
xxv-bcbf441e09fb502cf64924ff2530fa144bdf52c5.tar.bz2
* Move files to trunk
Diffstat (limited to 'lib/Net/IP/Match')
-rw-r--r--lib/Net/IP/Match/Regexp.pm293
1 files changed, 293 insertions, 0 deletions
diff --git a/lib/Net/IP/Match/Regexp.pm b/lib/Net/IP/Match/Regexp.pm
new file mode 100644
index 0000000..9d89837
--- /dev/null
+++ b/lib/Net/IP/Match/Regexp.pm
@@ -0,0 +1,293 @@
+package Net::IP::Match::Regexp;
+
+require 5.006;
+use strict;
+use warnings;
+
+use base 'Exporter';
+our @EXPORT = qw();
+our @EXPORT_OK = qw( create_iprange_regexp match_ip );
+our $VERSION = '0.91';
+
+=head1 NAME
+
+Net::IP::Match::Regexp - Efficiently match IPv4 addresses against IPv4 ranges via regexp
+
+=head1 LICENSE
+
+Copyright 2005 Clotho Advanced Media, Inc., <cpan@clotho.com>
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=head1 SYNOPSIS
+
+ use Net::IP::Match::Regexp qw( create_iprange_regexp match_ip );
+
+ my $regexp = create_iprange_regexp(
+ qw( 10.0.0.0/8 87.134.66.128 87.134.87.0/24 145.97.0.0/16 )
+ );
+ if (match_ip("209.249.163.62", $regexp)) {
+ ...
+ }
+
+=head1 DESCRIPTION
+
+This module allows you to check an IP address against one or more IP
+ranges. It employs Perl's highly optimized regular expression engine
+to do the hard work, so it is very fast. It is optimized for speed by
+doing the match against a pre-computed regexp, which implicitly checks
+the broadest IP ranges first. An advantage is that the regexp can be
+comuted and stored in advance (in source code, in a database table,
+etc) and reused, saving much time if the IP ranges don't change too
+often. The match can optionally report a value instead of just a
+boolean, which makes module useful for mapping IP ranges to names or
+codes or anything else.
+
+=head1 SEE ALSO
+
+There are several other CPAN modules that perform a similar function.
+This one is faster than the other ones that I've found and tried.
+Here is a synopsis of those others:
+
+=head2 Net::IP::Match
+
+Optimized for speed by taking a "source filter" approach. That is, it
+modifies your source code at run time, kind of like a C preprocessor.
+A huge limitation is that the IP ranges must be hard-coded into your
+program.
+
+=head2 Net::IP::Match::XS
+
+(Also released as Net::IP::CMatch)
+
+Optimized for speed by doing the match in C instead of in Perl. This
+module loses efficiency, however, because the IP ranges must be
+re-parsed every invokation.
+
+=head2 Net::IP::Match::Resolver
+
+Uses Net::IP::Match::XS to implement a range-to-name map.
+
+
+=head1 PERFORMANCE
+
+I ran a test on my Mac G5 to compare this module to
+Net::IP::Match::XS. The test was intended to be a realistic net
+filter case: 100,000 random IPs tested against 300 semi-random IP
+ranges. Times are in seconds.
+
+ Module | Setup time | Run time
+ -----------------------+------------+----------
+ Net::IP::Match::Regexp | 0.057 | 1.663
+ Net::IP::Match::XS | n/a | 4.238
+
+=head1 IMPLEMENTATION
+
+The speed of this module comes from the short-circuit nature of
+regular expressions. The setup function turns all of the IP ranges
+into binary strings, and mixes them into a regexp with C<|> choices
+between ones and zeros. This regexp can then be passed to the match
+function. When an unambiguous match is found, the regexp sets a
+variable (via the regexp $^R feature) and terminates. That variable
+becomes the return value for the match, typically a true value.
+
+Here's an example of the regexp for a single range, that of my company's subnet:
+
+ print create_iprange_regexp("209.249.163.0/25")'
+ # ^1101000111111001101000110(?{'1'})
+
+If I add another range, say a NAT LAN, I get:
+
+ print create_iprange_regexp("209.249.163.0/25", "192.168.0.0/16")'
+ # ^110(?:0000010101000(?{'1'})|1000111111001101000110(?{'1'}))
+
+Note that for a 192.168.x.x address, the regexp checks at most the
+first 16 bits (1100000010101000) whereas for a 209.249.163.x address,
+it goes out to 15 bits (1101000111111001101000110). The cool part is
+that for an IP address that starts in the lower half (say 127.0.0.1)
+only needs to check the first bit (0) to see that the regexp won't
+match.
+
+If mapped return values are specified for the ranges, they get embedded
+into the regexp like so:
+
+ print create_iprange_regexp({"209.249.163.0/25" => "clotho.com",
+ "192.168.0.0/16" => "localhost"})'
+ # ^110(?:0000010101000(?{'localhost'})|1000111111001101000110(?{'clotho.com'}))
+
+This could be implemented in C to be even faster. In C, it would
+probably be better to use a binary tree instead of a regexp. However,
+a goal of this module is to create a serializable representation of
+the range data, and the regexp is perfect for that. So, we'll
+probably never do a C version.
+
+=head1 COMPATIBILITY
+
+Because this module relies on the C<(?{ code })> feature of regexps,
+it won't work on old Perl versions. I've successfully tested this
+module on Perl 5.6.0, 5.8.1 and 5.8.6. In theory, the code regexp
+feature should work in 5.005, but I've used "our" and the like, so it
+won't work there. I don't have a 5.005 to test anyway...
+
+=head1 FUNCTIONS
+
+=over
+
+=cut
+
+
+=item create_iprange_regexp IPRANGE | MAP, ...
+
+This function digests IP ranges into a regular expression that can
+subsequently be used to efficiently test single IP addresses. It
+returns a regular expression string that can be passed to match_ip().
+
+The simple way to use this is to pass a list of IP ranges as
+C<aaa.bbb.ccc.ddd/n>. When used this way, the return value of the
+match_ip() function will be simply C<1> or C<undef>.
+
+The more complex way is to pass a hash reference of IP range => return
+value pairs. When used this way, the return value of the match_ip()
+function will be the specified return value or C<undef> for no match.
+
+For example:
+
+ my $re1 = create_iprange_regexp("209.249.163.0/25", "127.0.0.1/32");
+ print match_ip("209.249.163.62", $re1); # prints "1"
+
+ my $re2 = create_iprange_regexp({"209.249.163.0/25" => "clotho.com",
+ "127.0.0.1/32" => "localhost"});
+ print match_ip("209.249.163.62", $re2); # prints "clotho.com"
+
+Note that these two styles can be mixed (a rarely used feature).
+These two examples are equivalent:
+
+ create_iprange_regexp("127.0.0.1/32",
+ {"209.249.163.0/25" => "clotho.com"},
+ "10.0.0.0/8",
+ {"192.168.0.0/16" => "LAN"});
+
+ create_iprange_regexp({"127.0.0.1/32" => 1,
+ "209.249.163.0/25" => "clotho.com",
+ "10.0.0.0/8" => 1,
+ "192.168.0.0/16" => "LAN"});
+
+Special note: the value string will be wrapped in single-quotes in the
+regexp. Therefore, you must double-escape any single quotes in that value.
+For example:
+
+ create_iprange_regexp({"208.201.239.36/31" => "O\\'Reilly publishing"});
+
+Warning: This function does no checking for validity of IP ranges. It
+happily accepts C<1000.0.0.0/-38>. Hopefully a future version will
+validate the ranges, perhaps via Net::CIDR or Net::IP.
+
+=cut
+
+sub create_iprange_regexp
+{
+ # If an argument is a hash ref, flatten it
+ # If an argument is a scalar, make it a key and give it a value of 1
+ my %map = map {ref $_ ? %$_ : ($_ => 1)} @_;
+
+ # The tree is a temporary construct. It has three possible
+ # properties: 0, 1, and code. The code is the return value for a
+ # match.
+ my %tree;
+
+ for my $range (keys %map)
+ {
+ my ($ip,$mask) = split /\//, $range;
+
+ my $tree = \%tree;
+ my @bits = split //, unpack("B32", pack("C4", split(/\./, $ip)));
+ for my $val (@bits[0..$mask-1])
+ {
+ # If this case is hit, it means that our IP range is a subset
+ # of some other range.
+ last if ($tree->{code});
+
+ $tree->{$val} ||= {};
+ $tree = $tree->{$val};
+ }
+ # If the code is already set, it's a non-fatal error (bad data)
+ $tree->{code} ||= $map{$range};
+
+ # prune redundant branches
+ # this is only important if the range data is poor
+ delete $tree->{0};
+ delete $tree->{1};
+ }
+
+ # Recurse into the tree making it into a regexp
+ my $re = "^".tree2re(\%tree);
+ return $re;
+}
+
+=item match_ip IP_ADDR, REGEXP
+
+Given a single IP address as a string of the form C<aaa.bbb.ccc.ddd>
+and a regular expression string (typically the output of
+create_iprange_regexp()), this function returns a pre-specified value
+(typically C<1>) if the IP is in one of the ranges, or C<undef> if no
+ranges match.
+
+See create_ipranges_regexp() for more details about the return value
+of this function.
+
+Warning: This function does no checking for validity of the IP address.
+
+=cut
+
+sub match_ip
+{
+ my ($ip,$re) = @_;
+
+ local $^R;
+ use re 'eval';
+ unpack("B32", pack("C4", split(/\./, $ip))) =~ /$re/;
+ return $^R;
+}
+
+# Helper function. This recurses to build the regular expression
+# string from a tree of IP ranges constructed by
+# create_iprange_regexp().
+
+sub tree2re
+{
+ my $tree = shift;
+
+ if ($tree->{code})
+ {
+ return "(?{'$$tree{code}'})";
+ }
+ elsif ($tree->{0} && $tree->{1})
+ {
+ return "(?:0".tree2re($tree->{0})."|1".tree2re($tree->{1}).")";
+ }
+ elsif ($tree->{0})
+ {
+ return "0".tree2re($tree->{0});
+ }
+ elsif ($tree->{1})
+ {
+ return "1".tree2re($tree->{1});
+ }
+ else
+ {
+ die "Internal error";
+ }
+}
+
+1;
+
+__END__
+
+=back
+
+=head1 AUTHOR
+
+Clotho Advanced Media, Inc. I<cpan@clotho.com>
+
+Primary developer: Chris Dolan