summaryrefslogtreecommitdiff
path: root/lib/URI/Escape.pm
diff options
context:
space:
mode:
authorAndreas Brachold <vdr07@deltab.de>2007-11-11 06:55:13 +0000
committerAndreas Brachold <vdr07@deltab.de>2007-11-11 06:55:13 +0000
commit3282be229999dc36c197b264d63063a18d136331 (patch)
tree98a42db29d955b39e7bed1b599fdcc56c3a29de9 /lib/URI/Escape.pm
parentcfdd733c17cfa4f1a43b827a656e9e53cc2524ac (diff)
downloadxxv-3282be229999dc36c197b264d63063a18d136331.tar.gz
xxv-3282be229999dc36c197b264d63063a18d136331.tar.bz2
* Update installation list with required modules
* Remove unused/doubled provided external perl moduls
Diffstat (limited to 'lib/URI/Escape.pm')
-rw-r--r--lib/URI/Escape.pm218
1 files changed, 0 insertions, 218 deletions
diff --git a/lib/URI/Escape.pm b/lib/URI/Escape.pm
deleted file mode 100644
index 5f0de55..0000000
--- a/lib/URI/Escape.pm
+++ /dev/null
@@ -1,218 +0,0 @@
-#
-# $Id: Escape.pm,v 3.28 2004/11/05 13:58:31 gisle Exp $
-#
-
-package URI::Escape;
-use strict;
-
-=head1 NAME
-
-URI::Escape - Escape and unescape unsafe characters
-
-=head1 SYNOPSIS
-
- use URI::Escape;
- $safe = uri_escape("10% is enough\n");
- $verysafe = uri_escape("foo", "\0-\377");
- $str = uri_unescape($safe);
-
-=head1 DESCRIPTION
-
-This module provides functions to escape and unescape URI strings as
-defined by RFC 2396 (and updated by RFC 2732).
-A URI consists of a restricted set of characters,
-denoted as C<uric> in RFC 2396. The restricted set of characters
-consists of digits, letters, and a few graphic symbols chosen from
-those common to most of the character encodings and input facilities
-available to Internet users:
-
- "A" .. "Z", "a" .. "z", "0" .. "9",
- ";", "/", "?", ":", "@", "&", "=", "+", "$", ",", "[", "]", # reserved
- "-", "_", ".", "!", "~", "*", "'", "(", ")"
-
-In addition, any byte (octet) can be represented in a URI by an escape
-sequence: a triplet consisting of the character "%" followed by two
-hexadecimal digits. A byte can also be represented directly by a
-character, using the US-ASCII character for that octet (iff the
-character is part of C<uric>).
-
-Some of the C<uric> characters are I<reserved> for use as delimiters
-or as part of certain URI components. These must be escaped if they are
-to be treated as ordinary data. Read RFC 2396 for further details.
-
-The functions provided (and exported by default) from this module are:
-
-=over 4
-
-=item uri_escape( $string )
-
-=item uri_escape( $string, $unsafe )
-
-Replaces each unsafe character in the $string with the corresponding
-escape sequence and returns the result. The $string argument should
-be a string of bytes. The uri_escape() function will croak if given a
-characters with code above 255. Use uri_escape_utf8() if you know you
-have such chars or/and want chars in the 128 .. 255 range treated as
-UTF-8.
-
-The uri_escape() function takes an optional second argument that
-overrides the set of characters that are to be escaped. The set is
-specified as a string that can be used in a regular expression
-character class (between [ ]). E.g.:
-
- "\x00-\x1f\x7f-\xff" # all control and hi-bit characters
- "a-z" # all lower case characters
- "^A-Za-z" # everything not a letter
-
-The default set of characters to be escaped is all those which are
-I<not> part of the C<uric> character class shown above as well as the
-reserved characters. I.e. the default is:
-
- "^A-Za-z0-9\-_.!~*'()"
-
-=item uri_escape_utf8( $string )
-
-=item uri_escape_utf8( $string, $unsafe )
-
-Works like uri_escape(), but will encode chars as UTF-8 before
-escaping them. This makes this function able do deal with characters
-with code above 255 in $string. Note that chars in the 128 .. 255
-range will be escaped differently by this function compared to what
-uri_escape() would. For chars in the 0 .. 127 range there is no
-difference.
-
-The call:
-
- $uri = uri_escape_utf8($string);
-
-will be the same as:
-
- use Encode qw(encode);
- $uri = uri_escape(encode("UTF-8", $string));
-
-but will even work for perl-5.6 for chars in the 128 .. 255 range.
-
-Note: Javascript has a function called escape() that produce the
-sequence "%uXXXX" for chars in the 256 .. 65535 range. This function
-has really nothing to do with URI escaping but some folks got confused
-since it "does the right thing" in the 0 .. 255 range. Because of
-this you sometimes see "URIs" with these kind of escapes. The
-JavaScript encodeURI() function is similar to uri_escape_utf8().
-
-=item uri_unescape($string,...)
-
-Returns a string with each %XX sequence replaced with the actual byte
-(octet).
-
-This does the same as:
-
- $string =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
-
-but does not modify the string in-place as this RE would. Using the
-uri_unescape() function instead of the RE might make the code look
-cleaner and is a few characters less to type.
-
-In a simple benchmark test I did,
-calling the function (instead of the inline RE above) if a few chars
-were unescaped was something like 40% slower, and something like 700% slower if none were. If
-you are going to unescape a lot of times it might be a good idea to
-inline the RE.
-
-If the uri_unescape() function is passed multiple strings, then each
-one is returned unescaped.
-
-=back
-
-The module can also export the C<%escapes> hash, which contains the
-mapping from all 256 bytes to the corresponding escape codes. Lookup
-in this hash is faster than evaluating C<sprintf("%%%02X", ord($byte))>
-each time.
-
-=head1 SEE ALSO
-
-L<URI>
-
-
-=head1 COPYRIGHT
-
-Copyright 1995-2004 Gisle Aas.
-
-This program is free software; you can redistribute it and/or modify
-it under the same terms as Perl itself.
-
-=cut
-
-use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION);
-use vars qw(%escapes);
-
-require Exporter;
-@ISA = qw(Exporter);
-@EXPORT = qw(uri_escape uri_unescape);
-@EXPORT_OK = qw(%escapes uri_escape_utf8);
-$VERSION = sprintf("%d.%02d", q$Revision: 3.28 $ =~ /(\d+)\.(\d+)/);
-
-use Carp ();
-
-# Build a char->hex map
-for (0..255) {
- $escapes{chr($_)} = sprintf("%%%02X", $_);
-}
-
-my %subst; # compiled patternes
-
-sub uri_escape
-{
- my($text, $patn) = @_;
- return undef unless defined $text;
- if (defined $patn){
- unless (exists $subst{$patn}) {
- # Because we can't compile the regex we fake it with a cached sub
- (my $tmp = $patn) =~ s,/,\\/,g;
- eval "\$subst{\$patn} = sub {\$_[0] =~ s/([$tmp])/\$escapes{\$1} || _fail_hi(\$1)/ge; }";
- Carp::croak("uri_escape: $@") if $@;
- }
- &{$subst{$patn}}($text);
- } else {
- # Default unsafe characters. RFC 2732 ^(uric - reserved)
- $text =~ s/([^A-Za-z0-9\-_.!~*'()])/$escapes{$1} || _fail_hi($1)/ge;
- }
- $text;
-}
-
-sub _fail_hi {
- my $chr = shift;
- Carp::croak(sprintf "Can't escape \\x{%04X}, try uri_escape_utf8() instead", ord($chr));
-}
-
-sub uri_escape_utf8
-{
- my $text = shift;
- if ($] < 5.008) {
- $text =~ s/([^\0-\x7F])/do {my $o = ord($1); sprintf("%c%c", 0xc0 | ($o >> 6), 0x80 | ($o & 0x3f)) }/ge;
- }
- else {
- utf8::encode($text);
- }
-
- return uri_escape($text, @_);
-}
-
-sub uri_unescape
-{
- # Note from RFC1630: "Sequences which start with a percent sign
- # but are not followed by two hexadecimal characters are reserved
- # for future extension"
- my $str = shift;
- if (@_ && wantarray) {
- # not executed for the common case of a single argument
- my @str = ($str, @_); # need to copy
- foreach (@str) {
- s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
- }
- return @str;
- }
- $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
- $str;
-}
-
-1;