made the pack completely portable and wrote relevent bat files to go with it

This commit is contained in:
Draqoken
2025-04-09 17:04:56 +03:00
parent 5e77d7e9cf
commit 5e4144c3c0
7417 changed files with 2181044 additions and 19 deletions

View File

@@ -0,0 +1,458 @@
package HTTP::Config;
use strict;
use warnings;
our $VERSION = '7.00';
use URI;
sub new {
my $class = shift;
return bless [], $class;
}
sub entries {
my $self = shift;
@$self;
}
sub empty {
my $self = shift;
not @$self;
}
sub add {
if (@_ == 2) {
my $self = shift;
push(@$self, shift);
return;
}
my($self, %spec) = @_;
push(@$self, \%spec);
return;
}
sub find2 {
my($self, %spec) = @_;
my @found;
my @rest;
ITEM:
for my $item (@$self) {
for my $k (keys %spec) {
no warnings 'uninitialized';
if (!exists $item->{$k} || $spec{$k} ne $item->{$k}) {
push(@rest, $item);
next ITEM;
}
}
push(@found, $item);
}
return \@found unless wantarray;
return \@found, \@rest;
}
sub find {
my $self = shift;
my $f = $self->find2(@_);
return @$f if wantarray;
return $f->[0];
}
sub remove {
my($self, %spec) = @_;
my($removed, $rest) = $self->find2(%spec);
@$self = @$rest if @$removed;
return @$removed;
}
my %MATCH = (
m_scheme => sub {
my($v, $uri) = @_;
return $uri->_scheme eq $v; # URI known to be canonical
},
m_secure => sub {
my($v, $uri) = @_;
my $secure = $uri->can("secure") ? $uri->secure : $uri->_scheme eq "https";
return $secure == !!$v;
},
m_host_port => sub {
my($v, $uri) = @_;
return unless $uri->can("host_port");
return $uri->host_port eq $v, 7;
},
m_host => sub {
my($v, $uri) = @_;
return unless $uri->can("host");
return $uri->host eq $v, 6;
},
m_port => sub {
my($v, $uri) = @_;
return unless $uri->can("port");
return $uri->port eq $v;
},
m_domain => sub {
my($v, $uri) = @_;
return unless $uri->can("host");
my $h = $uri->host;
$h = "$h.local" unless $h =~ /\./;
$v = ".$v" unless $v =~ /^\./;
return length($v), 5 if substr($h, -length($v)) eq $v;
return 0;
},
m_path => sub {
my($v, $uri) = @_;
return unless $uri->can("path");
return $uri->path eq $v, 4;
},
m_path_prefix => sub {
my($v, $uri) = @_;
return unless $uri->can("path");
my $path = $uri->path;
my $len = length($v);
return $len, 3 if $path eq $v;
return 0 if length($path) <= $len;
$v .= "/" unless $v =~ m,/\z,,;
return $len, 3 if substr($path, 0, length($v)) eq $v;
return 0;
},
m_path_match => sub {
my($v, $uri) = @_;
return unless $uri->can("path");
return $uri->path =~ $v;
},
m_uri__ => sub {
my($v, $k, $uri) = @_;
return unless $uri->can($k);
return 1 unless defined $v;
return $uri->$k eq $v;
},
m_method => sub {
my($v, $uri, $request) = @_;
return $request && $request->method eq $v;
},
m_proxy => sub {
my($v, $uri, $request) = @_;
return $request && ($request->{proxy} || "") eq $v;
},
m_code => sub {
my($v, $uri, $request, $response) = @_;
$v =~ s/xx\z//;
return unless $response;
return length($v), 2 if substr($response->code, 0, length($v)) eq $v;
},
m_media_type => sub { # for request too??
my($v, $uri, $request, $response) = @_;
return unless $response;
return 1, 1 if $v eq "*/*";
my $ct = $response->content_type;
return 2, 1 if $v =~ s,/\*\z,, && $ct =~ m,^\Q$v\E/,;
return 3, 1 if $v eq "html" && $response->content_is_html;
return 4, 1 if $v eq "xhtml" && $response->content_is_xhtml;
return 10, 1 if $v eq $ct;
return 0;
},
m_header__ => sub {
my($v, $k, $uri, $request, $response) = @_;
return unless $request;
my $req_header = $request->header($k);
return 1 if defined($req_header) && $req_header eq $v;
if ($response) {
my $res_header = $response->header($k);
return 1 if defined($res_header) && $res_header eq $v;
}
return 0;
},
m_response_attr__ => sub {
my($v, $k, $uri, $request, $response) = @_;
return unless $response;
return 1 if !defined($v) && exists $response->{$k};
return 0 unless exists $response->{$k};
return 1 if $response->{$k} eq $v;
return 0;
},
);
sub matching {
my $self = shift;
if (@_ == 1) {
if ($_[0]->can("request")) {
unshift(@_, $_[0]->request);
unshift(@_, undef) unless defined $_[0];
}
unshift(@_, $_[0]->uri_canonical) if $_[0] && $_[0]->can("uri_canonical");
}
my($uri, $request, $response) = @_;
$uri = URI->new($uri) unless ref($uri);
my @m;
ITEM:
for my $item (@$self) {
my $order;
for my $ikey (keys %$item) {
my $mkey = $ikey;
my $k;
$k = $1 if $mkey =~ s/__(.*)/__/;
if (my $m = $MATCH{$mkey}) {
#print "$ikey $mkey\n";
my($c, $o);
my @arg = (
defined($k) ? $k : (),
$uri, $request, $response
);
my $v = $item->{$ikey};
$v = [$v] unless ref($v) eq "ARRAY";
for (@$v) {
($c, $o) = $m->($_, @arg);
#print " - $_ ==> $c $o\n";
last if $c;
}
next ITEM unless $c;
$order->[$o || 0] += $c;
}
}
$order->[7] ||= 0;
$item->{_order} = join(".", reverse map sprintf("%03d", $_ || 0), @$order);
push(@m, $item);
}
@m = sort { $b->{_order} cmp $a->{_order} } @m;
delete $_->{_order} for @m;
return @m if wantarray;
return $m[0];
}
sub add_item {
my $self = shift;
my $item = shift;
return $self->add(item => $item, @_);
}
sub remove_items {
my $self = shift;
return map $_->{item}, $self->remove(@_);
}
sub matching_items {
my $self = shift;
return map $_->{item}, $self->matching(@_);
}
1;
=pod
=encoding UTF-8
=head1 NAME
HTTP::Config - Configuration for request and response objects
=head1 VERSION
version 7.00
=head1 SYNOPSIS
use HTTP::Config;
my $c = HTTP::Config->new;
$c->add(m_domain => ".example.com", m_scheme => "http", verbose => 1);
use HTTP::Request;
my $request = HTTP::Request->new(GET => "http://www.example.com");
if (my @m = $c->matching($request)) {
print "Yadayada\n" if $m[0]->{verbose};
}
=head1 DESCRIPTION
An C<HTTP::Config> object is a list of entries that
can be matched against request or request/response pairs. Its
purpose is to hold configuration data that can be looked up given a
request or response object.
Each configuration entry is a hash. Some keys specify matching to
occur against attributes of request/response objects. Other keys can
be used to hold user data.
The following methods are provided:
=over 4
=item $conf = HTTP::Config->new
Constructs a new empty C<HTTP::Config> object and returns it.
=item $conf->entries
Returns the list of entries in the configuration object.
In scalar context returns the number of entries.
=item $conf->empty
Return true if there are no entries in the configuration object.
This is just a shorthand for C<< not $conf->entries >>.
=item $conf->add( %matchspec, %other )
=item $conf->add( \%entry )
Adds a new entry to the configuration.
You can either pass separate key/value pairs or a hash reference.
=item $conf->remove( %spec )
Removes (and returns) the entries that have matches for all the key/value pairs in %spec.
If %spec is empty this will match all entries; so it will empty the configuration object.
=item $conf->matching( $uri, $request, $response )
=item $conf->matching( $uri )
=item $conf->matching( $request )
=item $conf->matching( $response )
Returns the entries that match the given $uri, $request and $response triplet.
If called with a single $request object then the $uri is obtained by calling its 'uri_canonical' method.
If called with a single $response object, then the request object is obtained by calling its 'request' method;
and then the $uri is obtained as if a single $request was provided.
The entries are returned with the most specific matches first.
In scalar context returns the most specific match or C<undef> in none match.
=item $conf->add_item( $item, %matchspec )
=item $conf->remove_items( %spec )
=item $conf->matching_items( $uri, $request, $response )
Wrappers that hides the entries themselves.
=back
=head2 Matching
The following keys on a configuration entry specify matching. For all
of these you can provide an array of values instead of a single value.
The entry matches if at least one of the values in the array matches.
Entries that require match against a response object attribute will never match
unless a response object was provided.
=over
=item m_scheme => $scheme
Matches if the URI uses the specified scheme; e.g. "http".
=item m_secure => $bool
If $bool is TRUE; matches if the URI uses a secure scheme. If $bool
is FALSE; matches if the URI does not use a secure scheme. An example
of a secure scheme is "https".
=item m_host_port => "$hostname:$port"
Matches if the URI's host_port method return the specified value.
=item m_host => $hostname
Matches if the URI's host method returns the specified value.
=item m_port => $port
Matches if the URI's port method returns the specified value.
=item m_domain => ".$domain"
Matches if the URI's host method return a value that within the given
domain. The hostname "www.example.com" will for instance match the
domain ".com".
=item m_path => $path
Matches if the URI's path method returns the specified value.
=item m_path_prefix => $path
Matches if the URI's path is the specified path or has the specified
path as prefix.
=item m_path_match => $Regexp
Matches if the regular expression matches the URI's path. Eg. qr/\.html$/.
=item m_method => $method
Matches if the request method matches the specified value. Eg. "GET" or "POST".
=item m_code => $digit
=item m_code => $status_code
Matches if the response status code matches. If a single digit is
specified; matches for all response status codes beginning with that digit.
=item m_proxy => $url
Matches if the request is to be sent to the given Proxy server.
=item m_media_type => "*/*"
=item m_media_type => "text/*"
=item m_media_type => "html"
=item m_media_type => "xhtml"
=item m_media_type => "text/html"
Matches if the response media type matches.
With a value of "html" matches if $response->content_is_html returns TRUE.
With a value of "xhtml" matches if $response->content_is_xhtml returns TRUE.
=item m_uri__I<$method> => undef
Matches if the URI object provides the method.
=item m_uri__I<$method> => $string
Matches if the URI's $method method returns the given value.
=item m_header__I<$field> => $string
Matches if either the request or the response have a header $field with the given value.
=item m_response_attr__I<$key> => undef
=item m_response_attr__I<$key> => $string
Matches if the response object has that key, or the entry has the given value.
=back
=head1 SEE ALSO
L<URI>, L<HTTP::Request>, L<HTTP::Response>
=head1 AUTHOR
Gisle Aas <gisle@activestate.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 1994 by Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
__END__
#ABSTRACT: Configuration for request and response objects

View File

@@ -0,0 +1,658 @@
use 5.008001;
use strict;
use warnings;
package HTTP::CookieJar;
# ABSTRACT: A minimalist HTTP user agent cookie jar
our $VERSION = '0.014';
use Carp ();
use HTTP::Date ();
my $HAS_MPS = eval { require Mozilla::PublicSuffix; 1 };
#pod =construct new
#pod
#pod my $jar = HTTP::CookieJar->new;
#pod
#pod Return a new, empty cookie jar
#pod
#pod =cut
sub new {
my ($class) = @_;
bless { store => {} }, $class;
}
#pod =method add
#pod
#pod $jar->add(
#pod "http://www.example.com/", "lang=en-US; Path=/; Domain=example.com"
#pod );
#pod
#pod Given a request URL and a C<Set-Cookie> header string, attempts to adds the
#pod cookie to the jar. If the cookie is expired, instead it deletes any matching
#pod cookie from the jar. A C<Max-Age> attribute will be converted to an absolute
#pod C<Expires> attribute.
#pod
#pod It will throw an exception if the request URL is missing or invalid. Returns true if
#pod successful cookie processing or undef/empty-list on failure.
#pod
#pod =cut
sub add {
my ( $self, $request, $cookie ) = @_;
return unless defined $cookie and length $cookie;
my ( $scheme, $host, $port, $request_path ) = eval { _split_url($request) };
Carp::croak($@) if $@;
return unless my $parse = _parse_cookie($cookie);
my $name = $parse->{name};
# check and normalize domain
if ( exists $parse->{domain} ) {
_normalize_domain( $host, $parse ) or return;
}
else {
$parse->{domain} = $host;
$parse->{hostonly} = 1;
}
my $domain = $parse->{domain};
# normalize path
if ( !exists $parse->{path} || substr( $parse->{path}, 0, 1 ) ne "/" ) {
$parse->{path} = _default_path($request_path);
}
my $path = $parse->{path};
# set timestamps and normalize expires
my $now = $parse->{creation_time} = $parse->{last_access_time} = time;
if ( exists $parse->{'max-age'} ) {
# "If delta-seconds is less than or equal to zero (0), let expiry-time
# be the earliest representable date and time."
$parse->{expires} = $parse->{'max-age'} <= 0
? 0
: $now + $parse->{'max-age'};
delete $parse->{'max-age'};
}
# update creation time from old cookie, if exists
if ( my $old = $self->{store}{$domain}{$path}{$name} ) {
$parse->{creation_time} = $old->{creation_time};
}
# if cookie has expired, purge any old matching cookie, too
if ( defined $parse->{expires} && $parse->{expires} < $now ) {
delete $self->{store}{$domain}{$path}{$name};
}
else {
$self->{store}{$domain}{$path}{$name} = $parse;
}
return 1;
}
#pod =method clear
#pod
#pod $jar->clear
#pod
#pod Empties the cookie jar.
#pod
#pod =cut
sub clear {
my ($self) = @_;
$self->{store} = {};
return 1;
}
#pod =method cookies_for
#pod
#pod my @cookies = $jar->cookies_for("http://www.example.com/foo/bar");
#pod
#pod Given a request URL, returns a list of hash references representing cookies
#pod that should be sent. The hash references are copies -- changing values
#pod will not change the cookies in the jar.
#pod
#pod Cookies set C<secure> will only be returned if the request scheme is C<https>.
#pod Expired cookies will not be returned.
#pod
#pod Keys of a cookie hash reference might include:
#pod
#pod =for :list
#pod * name -- the name of the cookie
#pod * value -- the value of the cookie
#pod * domain -- the domain name to which the cookie applies
#pod * path -- the path to which the cookie applies
#pod * expires -- if present, when the cookie expires in epoch seconds
#pod * secure -- if present, the cookie was set C<Secure>
#pod * httponly -- if present, the cookie was set C<HttpOnly>
#pod * hostonly -- if present, the cookie may only be used with the domain as a host
#pod * creation_time -- epoch time when the cookie was first stored
#pod * last_access_time -- epoch time when the cookie was last accessed (i.e. "now")
#pod
#pod Keep in mind that C<httponly> means it should only be used in requests and not
#pod made available via Javascript, etc. This is pretty meaningless for Perl user
#pod agents.
#pod
#pod Generally, user agents should use the C<cookie_header> method instead.
#pod
#pod It will throw an exception if the request URL is missing or invalid.
#pod
#pod =cut
sub cookies_for {
my ( $self, $request ) = @_;
my @found = $self->_cookies_for($request);
return map { {%$_} } @found;
}
# _cookies_for returns originals, not copies, which helps in testing
sub _cookies_for {
my ( $self, $request ) = @_;
my ( $scheme, $host, $port, $request_path ) = eval { _split_url($request) };
Carp::croak($@) if $@;
my @found;
my $now = time;
for my $cookie ( $self->_all_cookies ) {
next if $cookie->{hostonly} && $host ne $cookie->{domain};
next if $cookie->{secure} && $scheme ne 'https';
next if defined( $cookie->{expires} ) && $cookie->{expires} < $now;
next unless _domain_match( $host, $cookie->{domain} );
next unless _path_match( $request_path, $cookie->{path} );
$cookie->{last_access_time} = time;
push @found, $cookie;
}
@found = sort {
length( $b->{path} ) <=> length( $a->{path} )
|| $a->{creation_time} <=> $b->{creation_time}
} @found;
return @found;
}
#pod =method cookie_header
#pod
#pod my $header = $jar->cookie_header("http://www.example.com/foo/bar");
#pod
#pod Given a request URL, returns a correctly-formatted string with all relevant
#pod cookies for the request. This string is ready to be used in a C<Cookie> header
#pod in an HTTP request. E.g.:
#pod
#pod SID=31d4d96e407aad42; lang=en-US
#pod
#pod It follows the same exclusion rules as C<cookies_for>.
#pod
#pod If the request is invalid or no cookies apply, it will return an empty string.
#pod
#pod =cut
sub cookie_header {
my ( $self, $req ) = @_;
return join( "; ", map { "$_->{name}=$_->{value}" } $self->cookies_for($req) );
}
#pod =method dump_cookies
#pod
#pod my @list = $jar->dump_cookies;
#pod my @list = $jar->dump_cookies( { persistent => 1 } );
#pod
#pod Returns a list of raw cookies in string form. The strings resemble what
#pod would be received from C<Set-Cookie> headers, but with additional internal
#pod fields. The list is only intended for use with C<load_cookies> to allow
#pod cookie jar persistence.
#pod
#pod If a hash reference with a true C<persistent> key is given as an argument,
#pod cookies without an C<Expires> time (i.e. "session cookies") will be omitted.
#pod
#pod Here is a trivial example of saving a cookie jar file with L<Path::Tiny>:
#pod
#pod path("jar.txt")->spew( join "\n", $jar->dump_cookies );
#pod
#pod =cut
sub dump_cookies {
my ( $self, $args ) = @_;
my @list;
for my $c ( $self->_all_cookies ) {
my @parts = "$c->{name}=$c->{value}";
if ( defined $c->{expires} ) {
push @parts, 'Expires=' . HTTP::Date::time2str( $c->{expires} );
}
else {
next if $args->{persistent};
}
for my $attr (qw/Domain Path Creation_Time Last_Access_Time/) {
push @parts, "$attr=$c->{lc $attr}" if defined $c->{ lc $attr };
}
for my $attr (qw/Secure HttpOnly HostOnly/) {
push @parts, $attr if $c->{ lc $attr };
}
push @list, join( "; ", @parts );
}
return @list;
}
#pod =method load_cookies
#pod
#pod $jar->load_cookies( @cookies );
#pod
#pod Given a list of cookie strings from C<dump_cookies>, it adds them to
#pod the cookie jar. Cookies added in this way will supersede any existing
#pod cookies with similar domain, path and name.
#pod
#pod It returns the jar object for convenience when loading a new object:
#pod
#pod my $jar = HTTP::CookieJar->new->load_cookies( @cookies );
#pod
#pod Here is a trivial example of loading a cookie jar file with L<Path::Tiny>:
#pod
#pod my $jar = HTTP::CookieJar->new->load_cookies(
#pod path("jar.txt")->lines
#pod );
#pod
#pod =cut
sub load_cookies {
my ( $self, @cookies ) = @_;
for my $cookie (@cookies) {
my $p = _parse_cookie( $cookie, 1 );
next unless exists $p->{domain} && exists $p->{path};
$p->{$_} = time for grep { !defined $p->{$_} } qw/creation_time last_access_time/;
$self->{store}{ $p->{domain} }{ $p->{path} }{ $p->{name} } = $p;
}
return $self;
}
#--------------------------------------------------------------------------#
# private methods
#--------------------------------------------------------------------------#
# return flattened list of all cookies
sub _all_cookies {
return map { values %$_ } map { values %$_ } values %{ $_[0]->{store} };
}
#--------------------------------------------------------------------------#
# Helper subroutines
#--------------------------------------------------------------------------#
my $pub_re = qr/(?:domain|path|expires|max-age|httponly|secure)/;
my $pvt_re = qr/(?:$pub_re|creation_time|last_access_time|hostonly)/;
sub _parse_cookie {
my ( $cookie, $private ) = @_;
$cookie = '' unless defined $cookie;
my ( $kvp, @attrs ) = split /;/, $cookie;
$kvp = '' unless defined $kvp;
my ( $name, $value ) =
map { s/^\s*//; s/\s*$//; $_ } split( /=/, $kvp, 2 ); ## no critic
return unless defined $name and length $name;
$value = '' unless defined $value;
my $parse = { name => $name, value => $value };
for my $s (@attrs) {
next unless defined $s && $s =~ /\S/;
my ( $k, $v ) = map { s/^\s*//; s/\s*$//; $_ } split( /=/, $s, 2 ); ## no critic
$k = lc $k;
next unless $private ? ( $k =~ m/^$pvt_re$/ ) : ( $k =~ m/^$pub_re$/ );
$v = 1 if $k =~ m/^(?:httponly|secure|hostonly)$/; # boolean flag if present
$v = HTTP::Date::str2time($v) || 0 if $k eq 'expires'; # convert to epoch
next unless length $v;
$v =~ s{^\.}{} if $k eq 'domain'; # strip leading dot
$v =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if $k eq 'path'; # unescape
$parse->{$k} = $v;
}
return $parse;
}
sub _domain_match {
my ( $string, $dom_string ) = @_;
return 1 if $dom_string eq $string;
return unless $string =~ /[a-z]/i; # non-numeric
if ( $string =~ s{\Q$dom_string\E$}{} ) {
return substr( $string, -1, 1 ) eq '.'; # "foo."
}
return;
}
sub _normalize_domain {
my ( $host, $parse ) = @_;
if ($HAS_MPS) {
my $host_pub_suff = eval { Mozilla::PublicSuffix::public_suffix($host) };
$host_pub_suff = '' unless defined $host_pub_suff;
if ( _domain_match( $host_pub_suff, $parse->{domain} ) ) {
if ( $parse->{domain} eq $host ) {
return $parse->{hostonly} = 1;
}
else {
return;
}
}
}
if ( $parse->{domain} !~ m{\.} && $parse->{domain} eq $host ) {
return $parse->{hostonly} = 1;
}
return _domain_match( $host, $parse->{domain} );
}
sub _default_path {
my ($path) = @_;
return "/" if !length $path || substr( $path, 0, 1 ) ne "/";
my ($default) = $path =~ m{^(.*)/}; # greedy to last /
return length($default) ? $default : "/";
}
sub _path_match {
my ( $req_path, $cookie_path ) = @_;
return 1 if $req_path eq $cookie_path;
if ( $req_path =~ m{^\Q$cookie_path\E(.*)} ) {
my $rest = $1;
return 1 if substr( $cookie_path, -1, 1 ) eq '/';
return 1 if substr( $rest, 0, 1 ) eq '/';
}
return;
}
sub _split_url {
my $url = shift;
die(qq/No URL provided\n/) unless defined $url and length $url;
# URI regex adapted from the URI module
# XXX path_query here really chops at ? or # to get just the path and not the query
my ( $scheme, $authority, $path_query ) = $url =~ m<\A([^:/?#]+)://([^/?#]*)([^#?]*)>
or die(qq/Cannot parse URL: '$url'\n/);
$scheme = lc $scheme;
$path_query = "/$path_query" unless $path_query =~ m<\A/>;
$path_query =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
my $host = ( length($authority) ) ? lc $authority : 'localhost';
$host =~ s/\A[^@]*@//; # userinfo
my $port = do {
$host =~ s/:([0-9]*)\z// && length $1
? $1
: ( $scheme eq 'http' ? 80 : $scheme eq 'https' ? 443 : undef );
};
return ( $scheme, $host, $port, $path_query );
}
1;
# vim: ts=4 sts=4 sw=4 et:
__END__
=pod
=encoding UTF-8
=head1 NAME
HTTP::CookieJar - A minimalist HTTP user agent cookie jar
=head1 VERSION
version 0.014
=head1 SYNOPSIS
use HTTP::CookieJar;
my $jar = HTTP::CookieJar->new;
# add cookie received from a request
$jar->add( "http://www.example.com/", "CUSTOMER=WILE_E_COYOTE; Path=/; Domain=example.com" );
# extract cookie header for a given request
my $cookie = $jar->cookie_header( "http://www.example.com/" );
=head1 DESCRIPTION
This module implements a minimalist HTTP user agent cookie jar in
conformance with L<RFC 6265|http://tools.ietf.org/html/rfc6265>.
Unlike the commonly used L<HTTP::Cookies> module, this module does
not require use of L<HTTP::Request> and L<HTTP::Response> objects.
An LWP-compatible adapter is available as L<HTTP::CookieJar::LWP>.
=head1 CONSTRUCTORS
=head2 new
my $jar = HTTP::CookieJar->new;
Return a new, empty cookie jar
=head1 METHODS
=head2 add
$jar->add(
"http://www.example.com/", "lang=en-US; Path=/; Domain=example.com"
);
Given a request URL and a C<Set-Cookie> header string, attempts to adds the
cookie to the jar. If the cookie is expired, instead it deletes any matching
cookie from the jar. A C<Max-Age> attribute will be converted to an absolute
C<Expires> attribute.
It will throw an exception if the request URL is missing or invalid. Returns true if
successful cookie processing or undef/empty-list on failure.
=head2 clear
$jar->clear
Empties the cookie jar.
=head2 cookies_for
my @cookies = $jar->cookies_for("http://www.example.com/foo/bar");
Given a request URL, returns a list of hash references representing cookies
that should be sent. The hash references are copies -- changing values
will not change the cookies in the jar.
Cookies set C<secure> will only be returned if the request scheme is C<https>.
Expired cookies will not be returned.
Keys of a cookie hash reference might include:
=over 4
=item *
name -- the name of the cookie
=item *
value -- the value of the cookie
=item *
domain -- the domain name to which the cookie applies
=item *
path -- the path to which the cookie applies
=item *
expires -- if present, when the cookie expires in epoch seconds
=item *
secure -- if present, the cookie was set C<Secure>
=item *
httponly -- if present, the cookie was set C<HttpOnly>
=item *
hostonly -- if present, the cookie may only be used with the domain as a host
=item *
creation_time -- epoch time when the cookie was first stored
=item *
last_access_time -- epoch time when the cookie was last accessed (i.e. "now")
=back
Keep in mind that C<httponly> means it should only be used in requests and not
made available via Javascript, etc. This is pretty meaningless for Perl user
agents.
Generally, user agents should use the C<cookie_header> method instead.
It will throw an exception if the request URL is missing or invalid.
=head2 cookie_header
my $header = $jar->cookie_header("http://www.example.com/foo/bar");
Given a request URL, returns a correctly-formatted string with all relevant
cookies for the request. This string is ready to be used in a C<Cookie> header
in an HTTP request. E.g.:
SID=31d4d96e407aad42; lang=en-US
It follows the same exclusion rules as C<cookies_for>.
If the request is invalid or no cookies apply, it will return an empty string.
=head2 dump_cookies
my @list = $jar->dump_cookies;
my @list = $jar->dump_cookies( { persistent => 1 } );
Returns a list of raw cookies in string form. The strings resemble what
would be received from C<Set-Cookie> headers, but with additional internal
fields. The list is only intended for use with C<load_cookies> to allow
cookie jar persistence.
If a hash reference with a true C<persistent> key is given as an argument,
cookies without an C<Expires> time (i.e. "session cookies") will be omitted.
Here is a trivial example of saving a cookie jar file with L<Path::Tiny>:
path("jar.txt")->spew( join "\n", $jar->dump_cookies );
=head2 load_cookies
$jar->load_cookies( @cookies );
Given a list of cookie strings from C<dump_cookies>, it adds them to
the cookie jar. Cookies added in this way will supersede any existing
cookies with similar domain, path and name.
It returns the jar object for convenience when loading a new object:
my $jar = HTTP::CookieJar->new->load_cookies( @cookies );
Here is a trivial example of loading a cookie jar file with L<Path::Tiny>:
my $jar = HTTP::CookieJar->new->load_cookies(
path("jar.txt")->lines
);
=for Pod::Coverage method_names_here
=head1 LIMITATIONS AND CAVEATS
=head2 RFC 6265 vs prior standards
This modules adheres as closely as possible to the user-agent rules
of RFC 6265. Therefore, it does not handle nor generate C<Set-Cookie2>
and C<Cookie2> headers, implement C<.local> suffixes, or do path/domain
matching in accord with prior RFC's.
=head2 Internationalized domain names
Internationalized domain names given in requests must be properly
encoded in ASCII form.
=head2 Public suffixes
If L<Mozilla::PublicSuffix> is installed, cookie domains will be checked
against the public suffix list. Public suffix cookies are only allowed
as host-only cookies.
=head2 Third-party cookies
According to RFC 6265, a cookie may be accepted only if has no C<Domain>
attribute (in which case it is "host-only") or if the C<Domain> attribute is a
suffix of the request URL. This effectively prohibits Site A from setting a
cookie for unrelated Site B, which is one potential third-party cookie vector.
=head1 SEE ALSO
=over 4
=item *
L<HTTP::Cookies>
=item *
L<Mojo::UserAgent::CookieJar>
=back
=for :stopwords cpan testmatrix url bugtracker rt cpants kwalitee diff irc mailto metadata placeholders metacpan
=head1 SUPPORT
=head2 Bugs / Feature Requests
Please report any bugs or feature requests through the issue tracker
at L<https://github.com/dagolden/HTTP-CookieJar/issues>.
You will be notified automatically of any progress on your issue.
=head2 Source Code
This is open source software. The code repository is available for
public review and contribution under the terms of the license.
L<https://github.com/dagolden/HTTP-CookieJar>
git clone https://github.com/dagolden/HTTP-CookieJar.git
=head1 AUTHOR
David Golden <dagolden@cpan.org>
=head1 CONTRIBUTORS
=for stopwords Dan Book David Golden jvolkening
=over 4
=item *
Dan Book <grinnz@grinnz.com>
=item *
David Golden <xdg@xdg.me>
=item *
jvolkening <jdv@base2bio.com>
=back
=head1 COPYRIGHT AND LICENSE
This software is Copyright (c) 2013 by David Golden.
This is free software, licensed under:
The Apache License, Version 2.0, January 2004
=cut

View File

@@ -0,0 +1,126 @@
use 5.008001;
use strict;
use warnings;
package HTTP::CookieJar::LWP;
# ABSTRACT: LWP adapter for HTTP::CookieJar
our $VERSION = '0.014';
use parent 'HTTP::CookieJar';
sub add_cookie_header {
my ( $self, $request ) = @_;
my $url = _get_url( $request, $request->uri );
return unless ( $url->scheme =~ /^https?\z/ );
my $header = $self->cookie_header($url);
$request->header( Cookie => $header );
return $request;
}
sub extract_cookies {
my ( $self, $response ) = @_;
my $request = $response->request
or return;
my $url = _get_url( $request, $request->uri );
$self->add( $url, $_ ) for $response->_header("Set-Cookie");
return $response;
}
#--------------------------------------------------------------------------#
# helper subroutines
#--------------------------------------------------------------------------#
sub _get_url {
my ( $request, $url ) = @_;
my $new_url = $url->clone;
if ( my $h = $request->header("Host") ) {
$h =~ s/:\d+$//; # might have a port as well
$new_url->host($h);
}
return $new_url;
}
sub _url_path {
my $url = shift;
my $path;
if ( $url->can('epath') ) {
$path = $url->epath; # URI::URL method
}
else {
$path = $url->path; # URI::_generic method
}
$path = "/" unless length $path;
$path;
}
sub _normalize_path # so that plain string compare can be used
{
my $x;
$_[0] =~ s/%([0-9a-fA-F][0-9a-fA-F])/
$x = uc($1);
$x eq "2F" || $x eq "25" ? "%$x" :
pack("C", hex($x));
/eg;
$_[0] =~ s/([\0-\x20\x7f-\xff])/sprintf("%%%02X",ord($1))/eg;
}
1;
# vim: ts=4 sts=4 sw=4 et:
__END__
=pod
=encoding UTF-8
=head1 NAME
HTTP::CookieJar::LWP - LWP adapter for HTTP::CookieJar
=head1 VERSION
version 0.014
=head1 SYNOPSIS
use LWP::UserAgent;
use HTTP::CookieJar::LWP;
my $ua = LWP::UserAgent->new(
cookie_jar => HTTP::CookieJar::LWP->new
);
=head1 DESCRIPTION
This module is an experimental adapter to make L<HTTP::CookieJar> work with
L<LWP>. It implements the two methods that C<LWP> calls from L<HTTP::Cookies>.
It is not a general-purpose drop-in replacement for C<HTTP::Cookies> in any
other way.
=for Pod::Coverage method_names_here
add_cookie_header
extract_cookies
=head1 AUTHOR
David Golden <dagolden@cpan.org>
=head1 COPYRIGHT AND LICENSE
This software is Copyright (c) 2013 by David Golden.
This is free software, licensed under:
The Apache License, Version 2.0, January 2004
=cut

View File

@@ -0,0 +1,900 @@
package HTTP::Cookies;
use strict;
use HTTP::Date qw(str2time parse_date time2str);
use HTTP::Headers::Util qw(_split_header_words join_header_words);
our $EPOCH_OFFSET;
our $VERSION = '6.11';
# Legacy: because "use "HTTP::Cookies" used be the ONLY way
# to load the class HTTP::Cookies::Netscape.
require HTTP::Cookies::Netscape;
$EPOCH_OFFSET = 0; # difference from Unix epoch
# A HTTP::Cookies object is a hash. The main attribute is the
# COOKIES 3 level hash: $self->{COOKIES}{$domain}{$path}{$key}.
sub new
{
my $class = shift;
my $self = bless {
COOKIES => {},
}, $class;
my %cnf = @_;
for (keys %cnf) {
$self->{lc($_)} = $cnf{$_};
}
$self->load;
$self;
}
sub add_cookie_header
{
my $self = shift;
my $request = shift || return;
my $url = $request->uri;
my $scheme = $url->scheme;
unless ($scheme =~ /^https?\z/) {
return;
}
my $domain = _host($request, $url);
$domain = "$domain.local" unless $domain =~ /\./;
my $secure_request = ($scheme eq "https");
my $req_path = _url_path($url);
my $req_port = $url->port;
my $now = time();
_normalize_path($req_path) if $req_path =~ /%/;
my @cval; # cookie values for the "Cookie" header
my $set_ver;
my $netscape_only = 0; # An exact domain match applies to any cookie
while ($domain =~ /\./) {
# Checking $domain for cookies"
my $cookies = $self->{COOKIES}{$domain};
next unless $cookies;
if ($self->{delayload} && defined($cookies->{'//+delayload'})) {
my $cookie_data = $cookies->{'//+delayload'}{'cookie'};
delete $self->{COOKIES}{$domain};
$self->load_cookie($cookie_data->[1]);
$cookies = $self->{COOKIES}{$domain};
next unless $cookies; # should not really happen
}
# Want to add cookies corresponding to the most specific paths
# first (i.e. longest path first)
my $path;
for $path (sort {length($b) <=> length($a) } keys %$cookies) {
if (index($req_path, $path) != 0) {
next;
}
my($key,$array);
while (($key,$array) = each %{$cookies->{$path}}) {
my($version,$val,$port,$path_spec,$secure,$expires) = @$array;
if ($secure && !$secure_request) {
next;
}
if ($expires && $expires < $now) {
next;
}
if ($port) {
my $found;
if ($port =~ s/^_//) {
# The corresponding Set-Cookie attribute was empty
$found++ if $port eq $req_port;
$port = "";
}
else {
my $p;
for $p (split(/,/, $port)) {
$found++, last if $p eq $req_port;
}
}
unless ($found) {
next;
}
}
if ($version > 0 && $netscape_only) {
next;
}
# set version number of cookie header.
# XXX: What should it be if multiple matching
# Set-Cookie headers have different versions themselves
if (!$set_ver++) {
if ($version >= 1) {
push(@cval, "\$Version=$version");
}
elsif (!$self->{hide_cookie2}) {
$request->header(Cookie2 => '$Version="1"');
}
}
# do we need to quote the value
if ($val =~ /\W/ && $version) {
$val =~ s/([\\\"])/\\$1/g;
$val = qq("$val");
}
# and finally remember this cookie
push(@cval, "$key=$val");
if ($version >= 1) {
push(@cval, qq(\$Path="$path")) if $path_spec;
push(@cval, qq(\$Domain="$domain")) if $domain =~ /^\./;
if (defined $port) {
my $p = '$Port';
$p .= qq(="$port") if length $port;
push(@cval, $p);
}
}
}
}
} continue {
# Try with a more general domain, alternately stripping
# leading name components and leading dots. When this
# results in a domain with no leading dot, it is for
# Netscape cookie compatibility only:
#
# a.b.c.net Any cookie
# .b.c.net Any cookie
# b.c.net Netscape cookie only
# .c.net Any cookie
if ($domain =~ s/^\.+//) {
$netscape_only = 1;
}
else {
$domain =~ s/[^.]*//;
$netscape_only = 0;
}
}
if (@cval) {
if (my $old = $request->header("Cookie")) {
unshift(@cval, $old);
}
$request->header(Cookie => join("; ", @cval));
if (my $hash = $request->{_http_cookies}) {
%$hash = (map split(/=/, $_, 2), @cval);
}
}
$request;
}
sub get_cookies
{
my $self = shift;
my $url = shift;
$url = "https://$url" unless $url =~ m,^[a-zA-Z][a-zA-Z0-9.+\-]*:,;
require HTTP::Request;
my $req = HTTP::Request->new(GET => $url);
my $cookies = $req->{_http_cookies} = {};
$self->add_cookie_header($req);
if (@_) {
return map $cookies->{$_}, @_ if wantarray;
return $cookies->{$_[0]};
}
return $cookies;
}
sub extract_cookies
{
my $self = shift;
my $response = shift || return;
my @set = _split_header_words($response->_header("Set-Cookie2"));
my @ns_set = $response->_header("Set-Cookie");
return $response unless @set || @ns_set; # quick exit
my $request = $response->request;
my $url = $request->uri;
my $req_host = _host($request, $url);
$req_host = "$req_host.local" unless $req_host =~ /\./;
my $req_port = $url->port;
my $req_path = _url_path($url);
_normalize_path($req_path) if $req_path =~ /%/;
if (@ns_set) {
# The old Netscape cookie format for Set-Cookie
# http://curl.haxx.se/rfc/cookie_spec.html
# can for instance contain an unquoted "," in the expires
# field, so we have to use this ad-hoc parser.
my $now = time();
# Build a hash of cookies that was present in Set-Cookie2
# headers. We need to skip them if we also find them in a
# Set-Cookie header.
my %in_set2;
for (@set) {
$in_set2{$_->[0]}++;
}
my $set;
for $set (@ns_set) {
$set =~ s/^\s+//;
my @cur;
my $param;
my $expires;
my $first_param = 1;
for $param (@{_split_text($set)}) {
next unless length($param);
my($k,$v) = split(/\s*=\s*/, $param, 2);
if (defined $v) {
$v =~ s/\s+$//;
#print "$k => $v\n";
}
else {
$k =~ s/\s+$//;
#print "$k => undef";
}
if (!$first_param && lc($k) eq "expires") {
my $etime = str2time($v);
if (defined $etime) {
push(@cur, "Max-Age" => $etime - $now);
$expires++;
}
else {
# parse_date can deal with years outside the range of time_t,
my($year, $mon, $day, $hour, $min, $sec, $tz) = parse_date($v);
if ($year) {
my $thisyear = (gmtime)[5] + 1900;
if ($year < $thisyear) {
push(@cur, "Max-Age" => -1); # any negative value will do
$expires++;
}
elsif ($year >= $thisyear + 10) {
# the date is at least 10 years into the future, just replace
# it with something approximate
push(@cur, "Max-Age" => 10 * 365 * 24 * 60 * 60);
$expires++;
}
}
}
}
elsif (!$first_param && lc($k) eq 'max-age') {
$expires++;
}
elsif (!$first_param && lc($k) =~ /^(?:version|discard|ns-cookie)/) {
# ignore
}
else {
push(@cur, $k => $v);
}
$first_param = 0;
}
next unless @cur;
next if $in_set2{$cur[0]};
# push(@cur, "Port" => $req_port);
push(@cur, "Discard" => undef) unless $expires;
push(@cur, "Version" => 0);
push(@cur, "ns-cookie" => 1);
push(@set, \@cur);
}
}
SET_COOKIE:
for my $set (@set) {
next unless @$set >= 2;
my $key = shift @$set;
my $val = shift @$set;
my %hash;
while (@$set) {
my $k = shift @$set;
my $v = shift @$set;
my $lc = lc($k);
# don't loose case distinction for unknown fields
$k = $lc if $lc =~ /^(?:discard|domain|max-age|
path|port|secure|version)$/x;
if ($k eq "discard" || $k eq "secure") {
$v = 1 unless defined $v;
}
next if exists $hash{$k}; # only first value is significant
$hash{$k} = $v;
};
my %orig_hash = %hash;
my $version = delete $hash{version};
$version = 1 unless defined($version);
my $discard = delete $hash{discard};
my $secure = delete $hash{secure};
my $maxage = delete $hash{'max-age'};
my $ns_cookie = delete $hash{'ns-cookie'};
# Check domain
my $domain = delete $hash{domain};
$domain = lc($domain) if defined $domain;
if (defined($domain)
&& $domain ne $req_host && $domain ne ".$req_host") {
if ($domain !~ /\./ && $domain ne "local") {
next SET_COOKIE;
}
$domain = ".$domain" unless $domain =~ /^\./;
if ($domain =~ /\.\d+$/) {
next SET_COOKIE;
}
my $len = length($domain);
unless (substr($req_host, -$len) eq $domain) {
next SET_COOKIE;
}
my $hostpre = substr($req_host, 0, length($req_host) - $len);
if ($hostpre =~ /\./ && !$ns_cookie) {
next SET_COOKIE;
}
}
else {
$domain = $req_host;
}
my $path = delete $hash{path};
my $path_spec;
if (defined $path && $path ne '') {
$path_spec++;
_normalize_path($path) if $path =~ /%/;
if (!$ns_cookie &&
substr($req_path, 0, length($path)) ne $path) {
next SET_COOKIE;
}
}
else {
$path = $req_path;
$path =~ s,/[^/]*$,,;
$path = "/" unless length($path);
}
my $port;
if (exists $hash{port}) {
$port = delete $hash{port};
if (defined $port) {
$port =~ s/\s+//g;
my $found;
for my $p (split(/,/, $port)) {
unless ($p =~ /^\d+$/) {
next SET_COOKIE;
}
$found++ if $p eq $req_port;
}
unless ($found) {
next SET_COOKIE;
}
}
else {
$port = "_$req_port";
}
}
$self->set_cookie($version,$key,$val,$path,$domain,$port,$path_spec,$secure,$maxage,$discard, \%hash)
if $self->set_cookie_ok(\%orig_hash);
}
$response;
}
sub set_cookie_ok
{
1;
}
sub set_cookie
{
my $self = shift;
my($version,
$key, $val, $path, $domain, $port,
$path_spec, $secure, $maxage, $discard, $rest) = @_;
# path and key can not be empty (key can't start with '$')
return $self if !defined($path) || $path !~ m,^/, ||
!defined($key) || $key =~ m,^\$,;
# ensure legal port
if (defined $port) {
return $self unless $port =~ /^_?\d+(?:,\d+)*$/;
}
my $expires;
if (defined $maxage) {
if ($maxage <= 0) {
delete $self->{COOKIES}{$domain}{$path}{$key};
return $self;
}
$expires = time() + $maxage;
}
$version = 0 unless defined $version;
my @array = ($version, $val,$port,
$path_spec,
$secure, $expires, $discard);
push(@array, {%$rest}) if defined($rest) && %$rest;
# trim off undefined values at end
pop(@array) while !defined $array[-1];
$self->{COOKIES}{$domain}{$path}{$key} = \@array;
$self;
}
sub save
{
my $self = shift;
my %args = (
file => $self->{'file'},
ignore_discard => $self->{'ignore_discard'},
@_ == 1 ? ( file => $_[0] ) : @_
);
Carp::croak('Unexpected argument to save method') if keys %args > 2;
my $file = $args{'file'} || return;
open(my $fh, '>', $file) or die "Can't open $file: $!";
print {$fh} "#LWP-Cookies-1.0\n";
print {$fh} $self->as_string(!$args{'ignore_discard'});
close $fh or die "Can't close $file: $!";
1;
}
sub load
{
my $self = shift;
my $file = shift || $self->{'file'} || return;
local $/ = "\n"; # make sure we got standard record separator
open(my $fh, '<', $file) or return;
# check that we have the proper header
my $magic = <$fh>;
chomp $magic;
unless ($magic =~ /^#LWP-Cookies-\d+\.\d+/) {
warn "$file does not seem to contain cookies";
return;
}
# go through the file
while (my $line = <$fh>) {
chomp $line;
next unless $line =~ s/^Set-Cookie3:\s*//;
my $cookie;
for $cookie (_split_header_words($line)) {
my($key,$val) = splice(@$cookie, 0, 2);
my %hash;
while (@$cookie) {
my $k = shift @$cookie;
my $v = shift @$cookie;
$hash{$k} = $v;
}
my $version = delete $hash{version};
my $path = delete $hash{path};
my $domain = delete $hash{domain};
my $port = delete $hash{port};
my $expires = str2time(delete $hash{expires});
my $path_spec = exists $hash{path_spec}; delete $hash{path_spec};
my $secure = exists $hash{secure}; delete $hash{secure};
my $discard = exists $hash{discard}; delete $hash{discard};
my @array = ($version, $val, $port, $path_spec, $secure, $expires,
$discard);
push(@array, \%hash) if %hash;
$self->{COOKIES}{$domain}{$path}{$key} = \@array;
}
}
1;
}
sub revert
{
my $self = shift;
$self->clear->load;
$self;
}
sub clear
{
my $self = shift;
if (@_ == 0) {
$self->{COOKIES} = {};
}
elsif (@_ == 1) {
delete $self->{COOKIES}{$_[0]};
}
elsif (@_ == 2) {
delete $self->{COOKIES}{$_[0]}{$_[1]};
}
elsif (@_ == 3) {
delete $self->{COOKIES}{$_[0]}{$_[1]}{$_[2]};
}
else {
require Carp;
Carp::carp('Usage: $c->clear([domain [,path [,key]]])');
}
$self;
}
sub clear_temporary_cookies
{
my($self) = @_;
$self->scan(sub {
if($_[9] or # "Discard" flag set
not $_[8]) { # No expire field?
$_[8] = -1; # Set the expire/max_age field
$self->set_cookie(@_); # Clear the cookie
}
});
}
sub DESTROY
{
my $self = shift;
local($., $@, $!, $^E, $?);
$self->save if $self->{'autosave'};
}
sub scan
{
my($self, $cb) = @_;
my($domain,$path,$key);
for $domain (sort keys %{$self->{COOKIES}}) {
for $path (sort keys %{$self->{COOKIES}{$domain}}) {
for $key (sort keys %{$self->{COOKIES}{$domain}{$path}}) {
my($version,$val,$port,$path_spec,
$secure,$expires,$discard,$rest) =
@{$self->{COOKIES}{$domain}{$path}{$key}};
$rest = {} unless defined($rest);
&$cb($version,$key,$val,$path,$domain,$port,
$path_spec,$secure,$expires,$discard,$rest);
}
}
}
}
sub as_string
{
my($self, $skip_discard) = @_;
my @res;
$self->scan(sub {
my($version,$key,$val,$path,$domain,$port,
$path_spec,$secure,$expires,$discard,$rest) = @_;
return if $discard && $skip_discard;
my @h = ($key, $val);
push(@h, "path", $path);
push(@h, "domain" => $domain);
push(@h, "port" => $port) if defined $port;
push(@h, "path_spec" => undef) if $path_spec;
push(@h, "secure" => undef) if $secure;
push(@h, "expires" => HTTP::Date::time2isoz($expires)) if $expires;
push(@h, "discard" => undef) if $discard;
my $k;
for $k (sort keys %$rest) {
push(@h, $k, $rest->{$k});
}
push(@h, "version" => $version);
push(@res, "Set-Cookie3: " . join_header_words(\@h));
});
join("\n", @res, "");
}
sub _host
{
my($request, $url) = @_;
if (my $h = $request->header("Host")) {
$h =~ s/:\d+$//; # might have a port as well
return lc($h);
}
return lc($url->host);
}
sub _url_path
{
my $url = shift;
my $path;
if($url->can('epath')) {
$path = $url->epath; # URI::URL method
}
else {
$path = $url->path; # URI::_generic method
}
$path = "/" unless length $path;
$path;
}
sub _normalize_path # so that plain string compare can be used
{
my $x;
$_[0] =~ s/%([0-9a-fA-F][0-9a-fA-F])/
$x = uc($1);
$x eq "2F" || $x eq "25" ? "%$x" :
pack("C", hex($x));
/eg;
$_[0] =~ s/([\0-\x20\x7f-\xff])/sprintf("%%%02X",ord($1))/eg;
}
# deals with splitting values by ; and the fact that they could
# be in quotes which can also have escaping.
sub _split_text {
my $val = shift;
my @vals = grep { $_ ne q{} } split(/([;\\"])/, $val);
my @chunks;
# divide it up into chunks to be processed.
my $in_string = 0;
my @current_string;
for(my $i = 0; $i < @vals; $i++) {
my $chunk = $vals[$i];
if($in_string) {
if($chunk eq q{\\}) {
# don't care about next char probably.
# having said that, probably need to be appending to the chunks
# just dropping this.
$i++;
if($i < @vals) {
push @current_string, $vals[$i];
}
} elsif($chunk eq q{"}) {
$in_string = 0;
}
else {
push @current_string, $chunk;
}
} else {
if($chunk eq q{"}) {
$in_string = 1;
}
elsif($chunk eq q{;}) {
push @chunks, join(q{}, @current_string);
@current_string = ();
}
else {
push @current_string, $chunk;
}
}
}
push @chunks, join(q{}, @current_string) if @current_string;
s/^\s+// for @chunks;
return \@chunks;
}
1;
=pod
=encoding UTF-8
=head1 NAME
HTTP::Cookies - HTTP cookie jars
=head1 VERSION
version 6.11
=head1 SYNOPSIS
use HTTP::Cookies;
$cookie_jar = HTTP::Cookies->new(
file => "$ENV{'HOME'}/lwp_cookies.dat",
autosave => 1,
);
use LWP;
my $browser = LWP::UserAgent->new;
$browser->cookie_jar($cookie_jar);
Or for an empty and temporary cookie jar:
use LWP;
my $browser = LWP::UserAgent->new;
$browser->cookie_jar( {} );
=head1 DESCRIPTION
This class is for objects that represent a "cookie jar" -- that is, a
database of all the HTTP cookies that a given LWP::UserAgent object
knows about.
Cookies are a general mechanism which server side connections can use
to both store and retrieve information on the client side of the
connection. For more information about cookies refer to
L<Cookie Spec|http://curl.haxx.se/rfc/cookie_spec.html> and
L<Cookie Central|http://www.cookiecentral.com>. This module also implements the
new style cookies described in L<RFC 2965|https://tools.ietf.org/html/rfc2965>.
The two variants of cookies are supposed to be able to coexist happily.
Instances of the class I<HTTP::Cookies> are able to store a collection
of Set-Cookie2: and Set-Cookie: headers and are able to use this
information to initialize Cookie-headers in I<HTTP::Request> objects.
The state of a I<HTTP::Cookies> object can be saved in and restored from
files.
=head1 LIMITATIONS
This module does not support L<< Public Suffix|https://publicsuffix.org/
>> encouraged by a more recent standard, L<< RFC
6265|https://tools.ietf.org/html/rfc6265 >>.
This module's shortcomings mean that a malicious Web site can set
cookies to track your user agent across all sites under a top level
domain. See F<< t/publicsuffix.t >> in this module's distribution for
details.
L<< HTTP::CookieJar::LWP >> supports Public Suffix, but only provides a
limited subset of this module's functionality and L<< does not
support|HTTP::CookieJar/LIMITATIONS-AND-CAVEATS >> standards older than
I<RFC 6265>.
=head1 METHODS
The following methods are provided:
=over 4
=item $cookie_jar = HTTP::Cookies->new
The constructor takes hash style parameters. The following
parameters are recognized:
file: name of the file to restore cookies from and save cookies to
autosave: save during destruction (bool)
ignore_discard: save even cookies that are requested to be discarded (bool)
hide_cookie2: do not add Cookie2 header to requests
Future parameters might include (not yet implemented):
max_cookies 300
max_cookies_per_domain 20
max_cookie_size 4096
no_cookies list of domain names that we never return cookies to
=item $cookie_jar->get_cookies( $url_or_domain )
=item $cookie_jar->get_cookies( $url_or_domain, $cookie_key,... )
Returns a hash of the cookies that applies to the given URL. If a
domainname is given as argument, then a prefix of "https://" is assumed.
If one or more $cookie_key parameters are provided return the given values,
or C<undef> if the cookie isn't available.
=item $cookie_jar->add_cookie_header( $request )
The add_cookie_header() method will set the appropriate Cookie:-header
for the I<HTTP::Request> object given as argument. The $request must
have a valid url attribute before this method is called.
=item $cookie_jar->extract_cookies( $response )
The extract_cookies() method will look for Set-Cookie: and
Set-Cookie2: headers in the I<HTTP::Response> object passed as
argument. Any of these headers that are found are used to update
the state of the $cookie_jar.
=item $cookie_jar->set_cookie( $version, $key, $val, $path, $domain, $port, $path_spec, $secure, $maxage, $discard, \%rest )
The set_cookie() method updates the state of the $cookie_jar. The
$key, $val, $domain, $port and $path arguments are strings. The
$path_spec, $secure, $discard arguments are boolean values. The $maxage
value is a number indicating number of seconds that this cookie will
live. A value of $maxage <= 0 will delete this cookie. The $version argument
sets the version of the cookie; the default value is 0 ( original Netscape
spec ). Setting $version to another value indicates the RFC to which the
cookie conforms (e.g. version 1 for RFC 2109). %rest defines various other
attributes like "Comment" and "CommentURL".
=item $cookie_jar->save
=item $cookie_jar->save( $file )
=item $cookie_jar->save( file => $file, ignore_discard => $ignore_discard )
This method saves the state of the $cookie_jar to a file.
The state can then be restored later using the load() method. If a
filename is not specified we will use the name specified during
construction. If the $ignore_discard value is true (or not specified,
but attribute I<ignore_discard> was set at cookie jar construction),
then we will even save cookies that are marked to be discarded.
The default is to save a sequence of "Set-Cookie3" lines.
"Set-Cookie3" is a proprietary LWP format, not known to be compatible
with any browser. The I<HTTP::Cookies::Netscape> sub-class can
be used to save in a format compatible with Netscape.
=item $cookie_jar->load
=item $cookie_jar->load( $file )
This method reads the cookies from the file and adds them to the
$cookie_jar. The file must be in the format written by the save()
method.
=item $cookie_jar->revert
This method empties the $cookie_jar and re-loads the $cookie_jar
from the last save file.
=item $cookie_jar->clear
=item $cookie_jar->clear( $domain )
=item $cookie_jar->clear( $domain, $path )
=item $cookie_jar->clear( $domain, $path, $key )
Invoking this method without arguments will empty the whole
$cookie_jar. If given a single argument only cookies belonging to
that domain will be removed. If given two arguments, cookies
belonging to the specified path within that domain are removed. If
given three arguments, then the cookie with the specified key, path
and domain is removed.
=item $cookie_jar->clear_temporary_cookies
Discard all temporary cookies. Scans for all cookies in the jar
with either no expire field or a true C<discard> flag. To be
called when the user agent shuts down according to RFC 2965.
=item $cookie_jar->scan( \&callback )
The argument is a subroutine that will be invoked for each cookie
stored in the $cookie_jar. The subroutine will be invoked with
the following arguments:
0 version
1 key
2 val
3 path
4 domain
5 port
6 path_spec
7 secure
8 expires
9 discard
10 hash
=item $cookie_jar->as_string
=item $cookie_jar->as_string( $skip_discardables )
The as_string() method will return the state of the $cookie_jar
represented as a sequence of "Set-Cookie3" header lines separated by
"\n". If $skip_discardables is TRUE, it will not return lines for
cookies with the I<Discard> attribute.
=back
=head1 SEE ALSO
L<HTTP::Cookies::Netscape>, L<HTTP::Cookies::Microsoft>
=head1 AUTHOR
Gisle Aas <gisle@activestate.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2002 by Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
__END__
#ABSTRACT: HTTP cookie jars

View File

@@ -0,0 +1,324 @@
package HTTP::Cookies::Microsoft;
use strict;
our $VERSION = '6.11';
require HTTP::Cookies;
our @ISA=qw(HTTP::Cookies);
sub load_cookies_from_file
{
my ($file) = @_;
my @cookies;
open (my $fh, '<', $file) || return;
while (my $key = <$fh>) {
chomp $key;
my ($value, $domain_path, $flags, $lo_expire, $hi_expire);
my ($lo_create, $hi_create, $sep);
chomp($value = <$fh>);
chomp($domain_path= <$fh>);
chomp($flags = <$fh>); # 0x0001 bit is for secure
chomp($lo_expire = <$fh>);
chomp($hi_expire = <$fh>);
chomp($lo_create = <$fh>);
chomp($hi_create = <$fh>);
chomp($sep = <$fh>);
if (!defined($key) || !defined($value) || !defined($domain_path) ||
!defined($flags) || !defined($hi_expire) || !defined($lo_expire) ||
!defined($hi_create) || !defined($lo_create) || !defined($sep) ||
($sep ne '*'))
{
last;
}
if ($domain_path =~ /^([^\/]+)(\/.*)$/) {
my $domain = $1;
my $path = $2;
push @cookies, {
KEY => $key, VALUE => $value, DOMAIN => $domain,
PATH => $path, FLAGS =>$flags, HIXP =>$hi_expire,
LOXP => $lo_expire, HICREATE => $hi_create,
LOCREATE => $lo_create
};
}
}
return \@cookies;
}
sub get_user_name
{
use Win32;
use locale;
my $user = lc(Win32::LoginName());
return $user;
}
# MSIE stores create and expire times as Win32 FILETIME,
# which is 64 bits of 100 nanosecond intervals since Jan 01 1601
#
# But Cookies code expects time in 32-bit value expressed
# in seconds since Jan 01 1970
#
sub epoch_time_offset_from_win32_filetime
{
my ($high, $low) = @_;
#--------------------------------------------------------
# USEFUL CONSTANT
#--------------------------------------------------------
# 0x019db1de 0xd53e8000 is 1970 Jan 01 00:00:00 in Win32 FILETIME
#
# 100 nanosecond intervals == 0.1 microsecond intervals
my $filetime_low32_1970 = 0xd53e8000;
my $filetime_high32_1970 = 0x019db1de;
#------------------------------------
# ALGORITHM
#------------------------------------
# To go from 100 nanosecond intervals to seconds since 00:00 Jan 01 1970:
#
# 1. Adjust 100 nanosecond intervals to Jan 01 1970 base
# 2. Divide by 10 to get to microseconds (1/millionth second)
# 3. Divide by 1000000 (10 ^ 6) to get to seconds
#
# We can combine Step 2 & 3 into one divide.
#
# After much trial and error, I came up with the following code which
# avoids using Math::BigInt or floating pt, but still gives correct answers
# If the filetime is before the epoch, return 0
if (($high < $filetime_high32_1970) ||
(($high == $filetime_high32_1970) && ($low < $filetime_low32_1970)))
{
return 0;
}
# Can't multiply by 0x100000000, (1 << 32),
# without Perl issuing an integer overflow warning
#
# So use two multiplies by 0x10000 instead of one multiply by 0x100000000
#
# The result is the same.
#
my $date1970 = (($filetime_high32_1970 * 0x10000) * 0x10000) + $filetime_low32_1970;
my $time = (($high * 0x10000) * 0x10000) + $low;
$time -= $date1970;
$time /= 10000000;
return $time;
}
sub load_cookie
{
my($self, $file) = @_;
my $now = time() - $HTTP::Cookies::EPOCH_OFFSET;
my $cookie_data;
if (-f $file)
{
# open the cookie file and get the data
$cookie_data = load_cookies_from_file($file);
foreach my $cookie (@{$cookie_data})
{
my $secure = ($cookie->{FLAGS} & 1) != 0;
my $expires = epoch_time_offset_from_win32_filetime($cookie->{HIXP}, $cookie->{LOXP});
$self->set_cookie(undef, $cookie->{KEY}, $cookie->{VALUE},
$cookie->{PATH}, $cookie->{DOMAIN}, undef,
0, $secure, $expires-$now, 0);
}
}
}
sub load
{
my($self, $cookie_index) = @_;
my $now = time() - $HTTP::Cookies::EPOCH_OFFSET;
my $cookie_dir = '';
my $delay_load = (defined($self->{'delayload'}) && $self->{'delayload'});
my $user_name = get_user_name();
my $data;
$cookie_index ||= $self->{'file'} || return;
if ($cookie_index =~ /[\\\/][^\\\/]+$/) {
$cookie_dir = $` . "\\";
}
open (my $fh, '<:raw', $cookie_index) || return;
if (256 != read($fh, $data, 256)) {
warn "$cookie_index file is not large enough";
return;
}
# Cookies' index.dat file starts with 32 bytes of signature
# followed by an offset to the first record, stored as a little-endian DWORD
my ($sig, $size) = unpack('a32 V', $data);
# check that sig is valid (only tested in IE6.0)
if (($sig !~ /^Client UrlCache MMF Ver 5\.2/) || (0x4000 != $size)) {
warn "$cookie_index ['$sig' $size] does not seem to contain cookies";
return;
}
# move the file ptr to start of the first record
if (0 == seek($fh, $size, 0)) {
return;
}
# Cookies are usually stored in 'URL ' records in two contiguous 0x80 byte sectors (256 bytes)
# so read in two 0x80 byte sectors and adjust if not a Cookie.
while (256 == read($fh, $data, 256)) {
# each record starts with a 4-byte signature
# and a count (little-endian DWORD) of 0x80 byte sectors for the record
($sig, $size) = unpack('a4 V', $data);
# Cookies are found in 'URL ' records
if ('URL ' ne $sig) {
# skip over uninteresting record: I've seen 'HASH' and 'LEAK' records
if (($sig eq 'HASH') || ($sig eq 'LEAK')) {
# '-2' takes into account the two 0x80 byte sectors we've just read in
if (($size > 0) && ($size != 2)) {
if (0 == seek($fh, ($size-2)*0x80, 1)) {
# Seek failed. Something's wrong. Gonna stop.
last;
}
}
}
next;
}
#$REMOVE Need to check if URL records in Cookies' index.dat will
# ever use more than two 0x80 byte sectors
if ($size > 2) {
my $more_data = ($size-2)*0x80;
if ($more_data != read($fh, $data, $more_data, 256)) {
last;
}
}
(my $user_name2 = $user_name) =~ s/ /_/g;
if ($data =~ /Cookie:\Q$user_name\E@([\x21-\xFF]+).*?((?:\Q$user_name\E|\Q$user_name2\E)@[\x21-\xFF]+\.txt)/) {
my $cookie_file = $cookie_dir . $2; # form full pathname
if (!$delay_load) {
$self->load_cookie($cookie_file);
}
else {
my $domain = $1;
# grab only the domain name, drop everything from the first dir sep on
if ($domain =~ m{[\\/]}) {
$domain = $`;
}
# set the delayload cookie for this domain with
# the cookie_file as cookie for later-loading info
$self->set_cookie(undef, 'cookie', $cookie_file, '//+delayload', $domain, undef, 0, 0, $now+86_400, 0);
}
}
}
1;
}
1;
=pod
=encoding UTF-8
=head1 NAME
HTTP::Cookies::Microsoft - Access to Microsoft cookies files
=head1 VERSION
version 6.11
=head1 SYNOPSIS
use LWP;
use HTTP::Cookies::Microsoft;
use Win32::TieRegistry(Delimiter => "/");
my $cookies_dir = $Registry->
{"CUser/Software/Microsoft/Windows/CurrentVersion/Explorer/Shell Folders/Cookies"};
$cookie_jar = HTTP::Cookies::Microsoft->new(
file => "$cookies_dir\\index.dat",
'delayload' => 1,
);
my $browser = LWP::UserAgent->new;
$browser->cookie_jar( $cookie_jar );
=head1 DESCRIPTION
This is a subclass of C<HTTP::Cookies> which
loads Microsoft Internet Explorer 5.x and 6.x for Windows (MSIE)
cookie files.
See the documentation for L<HTTP::Cookies>.
=head1 METHODS
The following methods are provided:
=over 4
=item $cookie_jar = HTTP::Cookies::Microsoft->new;
The constructor takes hash style parameters. In addition
to the regular HTTP::Cookies parameters, HTTP::Cookies::Microsoft
recognizes the following:
delayload: delay loading of cookie data until a request
is actually made. This results in faster
runtime unless you use most of the cookies
since only the domain's cookie data
is loaded on demand.
=back
=head1 CAVEATS
Please note that the code DOESN'T support saving to the MSIE
cookie file format.
=head1 AUTHOR
Johnny Lee <typo_pl@hotmail.com>
=head1 COPYRIGHT
Copyright 2002 Johnny Lee
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=head1 AUTHOR
Gisle Aas <gisle@activestate.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2002 by Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
__END__
#ABSTRACT: Access to Microsoft cookies files

View File

@@ -0,0 +1,133 @@
package HTTP::Cookies::Netscape;
use strict;
our $VERSION = '6.11';
require HTTP::Cookies;
our @ISA=qw(HTTP::Cookies);
sub load
{
my ($self, $file) = @_;
$file ||= $self->{'file'} || return;
local $/ = "\n"; # make sure we got standard record separator
open (my $fh, '<', $file) || return;
my $magic = <$fh>;
chomp $magic;
unless ($magic =~ /^#(?: Netscape)? HTTP Cookie File/) {
warn "$file does not look like a netscape cookies file";
return;
}
my $now = time() - $HTTP::Cookies::EPOCH_OFFSET;
while (my $line = <$fh>) {
chomp($line);
$line =~ s/\s*\#HttpOnly_//;
next if $line =~ /^\s*\#/;
next if $line =~ /^\s*$/;
$line =~ tr/\n\r//d;
my($domain,$bool1,$path,$secure, $expires,$key,$val) = split(/\t/, $line);
$secure = ($secure eq "TRUE");
$self->set_cookie(undef, $key, $val, $path, $domain, undef, 0, $secure, $expires-$now, 0);
}
1;
}
sub save
{
my $self = shift;
my %args = (
file => $self->{'file'},
ignore_discard => $self->{'ignore_discard'},
@_ == 1 ? ( file => $_[0] ) : @_
);
Carp::croak('Unexpected argument to save method') if keys %args > 2;
my $file = $args{'file'} || return;
open(my $fh, '>', $file) || return;
# Use old, now broken link to the old cookie spec just in case something
# else (not us!) requires the comment block exactly this way.
print {$fh} <<EOT;
# Netscape HTTP Cookie File
# http://www.netscape.com/newsref/std/cookie_spec.html
# This is a generated file! Do not edit.
EOT
my $now = time - $HTTP::Cookies::EPOCH_OFFSET;
$self->scan(sub {
my ($version, $key, $val, $path, $domain, $port, $path_spec, $secure, $expires, $discard, $rest) = @_;
return if $discard && !$args{'ignore_discard'};
$expires = $expires ? $expires - $HTTP::Cookies::EPOCH_OFFSET : 0;
return if $now > $expires;
$secure = $secure ? "TRUE" : "FALSE";
my $bool = $domain =~ /^\./ ? "TRUE" : "FALSE";
print {$fh} join("\t", $domain, $bool, $path, $secure, $expires, $key, $val), "\n";
});
1;
}
1;
=pod
=encoding UTF-8
=head1 NAME
HTTP::Cookies::Netscape - Access to Netscape cookies files
=head1 VERSION
version 6.11
=head1 SYNOPSIS
use LWP;
use HTTP::Cookies::Netscape;
$cookie_jar = HTTP::Cookies::Netscape->new(
file => "c:/program files/netscape/users/ZombieCharity/cookies.txt",
);
my $browser = LWP::UserAgent->new;
$browser->cookie_jar( $cookie_jar );
=head1 DESCRIPTION
This is a subclass of C<HTTP::Cookies> that reads (and optionally
writes) Netscape/Mozilla cookie files.
See the documentation for L<HTTP::Cookies>.
=head1 CAVEATS
Please note that the Netscape/Mozilla cookie file format can't store
all the information available in the Set-Cookie2 headers, so you will
probably lose some information if you save in this format.
At time of writing, this module seems to work fine with Mozilla
Phoenix/Firebird.
=head1 SEE ALSO
L<HTTP::Cookies::Microsoft>
=head1 AUTHOR
Gisle Aas <gisle@activestate.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2002 by Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
__END__
#ABSTRACT: Access to Netscape cookies files

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,419 @@
package HTTP::Date;
use strict;
our $VERSION = '6.06';
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(time2str str2time);
our @EXPORT_OK = qw(parse_date time2iso time2isoz);
require Time::Local;
our ( @DoW, @MoY, %MoY );
@DoW = qw(Sun Mon Tue Wed Thu Fri Sat);
@MoY = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
@MoY{@MoY} = ( 1 .. 12 );
my %GMT_ZONE = ( GMT => 1, UTC => 1, UT => 1, Z => 1 );
sub time2str (;$) {
my $time = shift;
$time = time unless defined $time;
my ( $sec, $min, $hour, $mday, $mon, $year, $wday ) = gmtime($time);
sprintf(
"%s, %02d %s %04d %02d:%02d:%02d GMT",
$DoW[$wday],
$mday, $MoY[$mon], $year + 1900,
$hour, $min, $sec
);
}
sub str2time ($;$) {
my $str = shift;
return undef unless defined $str;
# fast exit for strictly conforming string
if ( $str
=~ /^[SMTWF][a-z][a-z], (\d\d) ([JFMAJSOND][a-z][a-z]) (\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$/
) {
return eval {
my $t = Time::Local::timegm( $6, $5, $4, $1, $MoY{$2} - 1, $3 );
$t < 0 ? undef : $t;
};
}
my @d = parse_date($str);
return undef unless @d;
$d[1]--; # month
my $tz = pop(@d);
unless ( defined $tz ) {
unless ( defined( $tz = shift ) ) {
return eval {
my $frac = $d[-1];
$frac -= ( $d[-1] = int($frac) );
my $t = Time::Local::timelocal( reverse @d ) + $frac;
$t < 0 ? undef : $t;
};
}
}
my $offset = 0;
if ( $GMT_ZONE{ uc $tz } ) {
# offset already zero
}
elsif ( $tz =~ /^([-+])?(\d\d?):?(\d\d)?$/ ) {
$offset = 3600 * $2;
$offset += 60 * $3 if $3;
$offset *= -1 if $1 && $1 eq '-';
}
else {
eval { require Time::Zone } || return undef;
$offset = Time::Zone::tz_offset($tz);
return undef unless defined $offset;
}
return eval {
my $frac = $d[-1];
$frac -= ( $d[-1] = int($frac) );
my $t = Time::Local::timegm( reverse @d ) + $frac;
$t < 0 ? undef : $t - $offset;
};
}
sub parse_date ($) {
local ($_) = shift;
return unless defined;
# More lax parsing below
s/^\s+//; # kill leading space
s/^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*//i; # Useless weekday
my ( $day, $mon, $yr, $hr, $min, $sec, $tz, $ampm );
# Then we are able to check for most of the formats with this regexp
(
( $day, $mon, $yr, $hr, $min, $sec, $tz )
= /^
(\d\d?) # day
(?:\s+|[-\/])
(\w+) # month
(?:\s+|[-\/])
(\d+) # year
(?:
(?:\s+|:) # separator before clock
(\d\d?):(\d\d) # hour:min
(?::(\d\d))? # optional seconds
)? # optional clock
\s*
([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
\s*
(?:\(\w+\)|\w{3,})? # ASCII representation of timezone.
\s*$
/x
)
||
# Try the ctime and asctime format
(
( $mon, $day, $hr, $min, $sec, $tz, $yr )
= /^
(\w{1,3}) # month
\s+
(\d\d?) # day
\s+
(\d\d?):(\d\d) # hour:min
(?::(\d\d))? # optional seconds
\s+
(?:([A-Za-z]+)\s+)? # optional timezone
(\d+) # year
\s*$ # allow trailing whitespace
/x
)
||
# Then the Unix 'ls -l' date format
(
( $mon, $day, $yr, $hr, $min, $sec )
= /^
(\w{3}) # month
\s+
(\d\d?) # day
\s+
(?:
(\d\d\d\d) | # year
(\d{1,2}):(\d{2}) # hour:min
(?::(\d\d))? # optional seconds
)
\s*$
/x
)
||
# ISO 8601 format '1996-02-29 12:00:00 -0100' and variants
(
( $yr, $mon, $day, $hr, $min, $sec, $tz )
= /^
(\d{4}) # year
[-\/]?
(\d\d?) # numerical month
[-\/]?
(\d\d?) # day
(?:
(?:\s+|[-:Tt]) # separator before clock
(\d\d?):?(\d\d) # hour:min
(?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
)? # optional clock
\s*
([-+]?\d\d?:?(:?\d\d)?
|Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
\s*$
/x
)
||
# Windows 'dir': '11-12-96 03:52PM' and four-digit year variant
(
( $mon, $day, $yr, $hr, $min, $ampm )
= /^
(\d{2}) # numerical month
-
(\d{2}) # day
-
(\d{2,4}) # year
\s+
(\d\d?):(\d\d)([APap][Mm]) # hour:min AM or PM
\s*$
/x
)
|| return; # unrecognized format
# Translate month name to number
$mon
= $MoY{$mon}
|| $MoY{"\u\L$mon"}
|| ( $mon =~ /^\d\d?$/ && $mon >= 1 && $mon <= 12 && int($mon) )
|| return;
# If the year is missing, we assume first date before the current,
# because of the formats we support such dates are mostly present
# on "ls -l" listings.
unless ( defined $yr ) {
my $cur_mon;
( $cur_mon, $yr ) = (localtime)[ 4, 5 ];
$yr += 1900;
$cur_mon++;
$yr-- if $mon > $cur_mon;
}
elsif ( length($yr) < 3 ) {
# Find "obvious" year
my $cur_yr = (localtime)[5] + 1900;
my $m = $cur_yr % 100;
my $tmp = $yr;
$yr += $cur_yr - $m;
$m -= $tmp;
$yr += ( $m > 0 ) ? 100 : -100
if abs($m) > 50;
}
# Make sure clock elements are defined
$hr = 0 unless defined($hr);
$min = 0 unless defined($min);
$sec = 0 unless defined($sec);
# Compensate for AM/PM
if ($ampm) {
$ampm = uc $ampm;
$hr = 0 if $hr == 12 && $ampm eq 'AM';
$hr += 12 if $ampm eq 'PM' && $hr != 12;
}
return ( $yr, $mon, $day, $hr, $min, $sec, $tz )
if wantarray;
if ( defined $tz ) {
$tz = "Z" if $tz =~ /^(GMT|UTC?|[-+]?0+)$/;
}
else {
$tz = "";
}
return sprintf(
"%04d-%02d-%02d %02d:%02d:%02d%s",
$yr, $mon, $day, $hr, $min, $sec, $tz
);
}
sub time2iso (;$) {
my $time = shift;
$time = time unless defined $time;
my ( $sec, $min, $hour, $mday, $mon, $year ) = localtime($time);
sprintf(
"%04d-%02d-%02d %02d:%02d:%02d",
$year + 1900, $mon + 1, $mday, $hour, $min, $sec
);
}
sub time2isoz (;$) {
my $time = shift;
$time = time unless defined $time;
my ( $sec, $min, $hour, $mday, $mon, $year ) = gmtime($time);
sprintf(
"%04d-%02d-%02d %02d:%02d:%02dZ",
$year + 1900, $mon + 1, $mday, $hour, $min, $sec
);
}
1;
# ABSTRACT: HTTP::Date - date conversion routines
#
__END__
=pod
=encoding UTF-8
=head1 NAME
HTTP::Date - HTTP::Date - date conversion routines
=head1 VERSION
version 6.06
=head1 SYNOPSIS
use HTTP::Date;
$string = time2str($time); # Format as GMT ASCII time
$time = str2time($string); # convert ASCII date to machine time
=head1 DESCRIPTION
This module provides functions that deal the date formats used by the
HTTP protocol (and then some more). Only the first two functions,
time2str() and str2time(), are exported by default.
=over 4
=item time2str( [$time] )
The time2str() function converts a machine time (seconds since epoch)
to a string. If the function is called without an argument or with an
undefined argument, it will use the current time.
The string returned is in the format preferred for the HTTP protocol.
This is a fixed length subset of the format defined by RFC 1123,
represented in Universal Time (GMT). An example of a time stamp
in this format is:
Sun, 06 Nov 1994 08:49:37 GMT
=item str2time( $str [, $zone] )
The str2time() function converts a string to machine time. It returns
C<undef> if the format of $str is unrecognized, otherwise whatever the
C<Time::Local> functions can make out of the parsed time. Dates
before the system's epoch may not work on all operating systems. The
time formats recognized are the same as for parse_date().
The function also takes an optional second argument that specifies the
default time zone to use when converting the date. This parameter is
ignored if the zone is found in the date string itself. If this
parameter is missing, and the date string format does not contain any
zone specification, then the local time zone is assumed.
If the zone is not "C<GMT>" or numerical (like "C<-0800>" or
"C<+0100>"), then the C<Time::Zone> module must be installed in order
to get the date recognized.
=item parse_date( $str )
This function will try to parse a date string, and then return it as a
list of numerical values followed by a (possible undefined) time zone
specifier; ($year, $month, $day, $hour, $min, $sec, $tz). The $year
will be the full 4-digit year, and $month numbers start with 1 (for January).
In scalar context the numbers are interpolated in a string of the
"YYYY-MM-DD hh:mm:ss TZ"-format and returned.
If the date is unrecognized, then the empty list is returned (C<undef> in
scalar context).
The function is able to parse the following formats:
"Wed, 09 Feb 1994 22:23:32 GMT" -- HTTP format
"Thu Feb 3 17:03:55 GMT 1994" -- ctime(3) format
"Thu Feb 3 00:00:00 1994", -- ANSI C asctime() format
"Tuesday, 08-Feb-94 14:15:29 GMT" -- old rfc850 HTTP format
"Tuesday, 08-Feb-1994 14:15:29 GMT" -- broken rfc850 HTTP format
"03/Feb/1994:17:03:55 -0700" -- common logfile format
"09 Feb 1994 22:23:32 GMT" -- HTTP format (no weekday)
"08-Feb-94 14:15:29 GMT" -- rfc850 format (no weekday)
"08-Feb-1994 14:15:29 GMT" -- broken rfc850 format (no weekday)
"1994-02-03 14:15:29 -0100" -- ISO 8601 format
"1994-02-03 14:15:29" -- zone is optional
"1994-02-03" -- only date
"1994-02-03T14:15:29" -- Use T as separator
"19940203T141529Z" -- ISO 8601 compact format
"19940203" -- only date
"08-Feb-94" -- old rfc850 HTTP format (no weekday, no time)
"08-Feb-1994" -- broken rfc850 HTTP format (no weekday, no time)
"09 Feb 1994" -- proposed new HTTP format (no weekday, no time)
"03/Feb/1994" -- common logfile format (no time, no offset)
"Feb 3 1994" -- Unix 'ls -l' format
"Feb 3 17:03" -- Unix 'ls -l' format
"11-15-96 03:52PM" -- Windows 'dir' format
"11-15-1996 03:52PM" -- Windows 'dir' format with four-digit year
The parser ignores leading and trailing whitespace. It also allow the
seconds to be missing and the month to be numerical in most formats.
If the year is missing, then we assume that the date is the first
matching date I<before> current month. If the year is given with only
2 digits, then parse_date() will select the century that makes the
year closest to the current date.
=item time2iso( [$time] )
Same as time2str(), but returns a "YYYY-MM-DD hh:mm:ss"-formatted
string representing time in the local time zone.
=item time2isoz( [$time] )
Same as time2str(), but returns a "YYYY-MM-DD hh:mm:ssZ"-formatted
string representing Universal Time.
=back
=head1 SEE ALSO
L<perlfunc/time>, L<Time::Zone>
=head1 AUTHOR
Gisle Aas <gisle@activestate.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 1995 by Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut

View File

@@ -0,0 +1,879 @@
package HTTP::Headers;
use strict;
use warnings;
our $VERSION = '7.00';
use Clone qw(clone);
use Carp ();
# The $TRANSLATE_UNDERSCORE variable controls whether '_' can be used
# as a replacement for '-' in header field names.
our $TRANSLATE_UNDERSCORE = 1 unless defined $TRANSLATE_UNDERSCORE;
# "Good Practice" order of HTTP message headers:
# - General-Headers
# - Request-Headers
# - Response-Headers
# - Entity-Headers
my @general_headers = qw(
Cache-Control Connection Date Pragma Trailer Transfer-Encoding Upgrade
Via Warning
);
my @request_headers = qw(
Accept Accept-Charset Accept-Encoding Accept-Language
Authorization Expect From Host
If-Match If-Modified-Since If-None-Match If-Range If-Unmodified-Since
Max-Forwards Proxy-Authorization Range Referer TE User-Agent
);
my @response_headers = qw(
Accept-Ranges Age ETag Location Proxy-Authenticate Retry-After Server
Vary WWW-Authenticate
);
my @entity_headers = qw(
Allow Content-Encoding Content-Language Content-Length Content-Location
Content-MD5 Content-Range Content-Type Expires Last-Modified
);
my %entity_header = map { lc($_) => 1 } @entity_headers;
my @header_order = (
@general_headers,
@request_headers,
@response_headers,
@entity_headers,
);
# Make alternative representations of @header_order. This is used
# for sorting and case matching.
my %header_order;
my %standard_case;
{
my $i = 0;
for (@header_order) {
my $lc = lc $_;
$header_order{$lc} = ++$i;
$standard_case{$lc} = $_;
}
}
sub new
{
my($class) = shift;
my $self = bless {}, $class;
$self->header(@_) if @_; # set up initial headers
$self;
}
sub header
{
my $self = shift;
Carp::croak('Usage: $h->header($field, ...)') unless @_;
my(@old);
my %seen;
while (@_) {
my $field = shift;
my $op = @_ ? ($seen{lc($field)}++ ? 'PUSH' : 'SET') : 'GET';
@old = $self->_header($field, shift, $op);
}
return @old if wantarray;
return $old[0] if @old <= 1;
join(", ", @old);
}
sub clear
{
my $self = shift;
%$self = ();
}
sub push_header
{
my $self = shift;
return $self->_header(@_, 'PUSH_H') if @_ == 2;
while (@_) {
$self->_header(splice(@_, 0, 2), 'PUSH_H');
}
}
sub init_header
{
Carp::croak('Usage: $h->init_header($field, $val)') if @_ != 3;
shift->_header(@_, 'INIT');
}
sub remove_header
{
my($self, @fields) = @_;
my $field;
my @values;
foreach $field (@fields) {
$field =~ tr/_/-/ if $field !~ /^:/ && $TRANSLATE_UNDERSCORE;
my $v = delete $self->{lc $field};
push(@values, ref($v) eq 'ARRAY' ? @$v : $v) if defined $v;
}
return @values;
}
sub remove_content_headers
{
my $self = shift;
unless (defined(wantarray)) {
# fast branch that does not create return object
delete @$self{grep $entity_header{$_} || /^content-/, keys %$self};
return;
}
my $c = ref($self)->new;
for my $f (grep $entity_header{$_} || /^content-/, keys %$self) {
$c->{$f} = delete $self->{$f};
}
if (exists $self->{'::std_case'}) {
$c->{'::std_case'} = $self->{'::std_case'};
}
$c;
}
sub _header
{
my($self, $field, $val, $op) = @_;
Carp::croak("Illegal field name '$field'")
if rindex($field, ':') > 1 || !length($field);
unless ($field =~ /^:/) {
$field =~ tr/_/-/ if $TRANSLATE_UNDERSCORE;
my $old = $field;
$field = lc $field;
unless($standard_case{$field} || $self->{'::std_case'}{$field}) {
# generate a %std_case entry for this field
$old =~ s/\b(\w)/\u$1/g;
$self->{'::std_case'}{$field} = $old;
}
}
$op ||= defined($val) ? 'SET' : 'GET';
if ($op eq 'PUSH_H') {
# Like PUSH but where we don't care about the return value
if (exists $self->{$field}) {
my $h = $self->{$field};
if (ref($h) eq 'ARRAY') {
push(@$h, ref($val) eq "ARRAY" ? @$val : $val);
}
else {
$self->{$field} = [$h, ref($val) eq "ARRAY" ? @$val : $val]
}
return;
}
$self->{$field} = $val;
return;
}
my $h = $self->{$field};
my @old = ref($h) eq 'ARRAY' ? @$h : (defined($h) ? ($h) : ());
unless ($op eq 'GET' || ($op eq 'INIT' && @old)) {
if (defined($val)) {
my @new = ($op eq 'PUSH') ? @old : ();
if (ref($val) ne 'ARRAY') {
push(@new, $val);
}
else {
push(@new, @$val);
}
$self->{$field} = @new > 1 ? \@new : $new[0];
}
elsif ($op ne 'PUSH') {
delete $self->{$field};
}
}
@old;
}
sub _sorted_field_names
{
my $self = shift;
return [ sort {
($header_order{$a} || 999) <=> ($header_order{$b} || 999) ||
$a cmp $b
} grep !/^::/, keys %$self ];
}
sub header_field_names {
my $self = shift;
return map $standard_case{$_} || $self->{'::std_case'}{$_} || $_, @{ $self->_sorted_field_names },
if wantarray;
return grep !/^::/, keys %$self;
}
sub scan
{
my($self, $sub) = @_;
my $key;
for $key (@{ $self->_sorted_field_names }) {
my $vals = $self->{$key};
if (ref($vals) eq 'ARRAY') {
my $val;
for $val (@$vals) {
$sub->($standard_case{$key} || $self->{'::std_case'}{$key} || $key, $val);
}
}
else {
$sub->($standard_case{$key} || $self->{'::std_case'}{$key} || $key, $vals);
}
}
}
sub flatten {
my($self)=@_;
(
map {
my $k = $_;
map {
( $k => $_ )
} $self->header($_);
} $self->header_field_names
);
}
sub as_string
{
my($self, $endl) = @_;
$endl = "\n" unless defined $endl;
my @result = ();
for my $key (@{ $self->_sorted_field_names }) {
next if index($key, '_') == 0;
my $vals = $self->{$key};
if ( ref($vals) eq 'ARRAY' ) {
for my $val (@$vals) {
$val = '' if not defined $val;
my $field = $standard_case{$key} || $self->{'::std_case'}{$key} || $key;
$field =~ s/^://;
if ( index($val, "\n") >= 0 ) {
$val = _process_newline($val, $endl);
}
push @result, $field . ': ' . $val;
}
}
else {
$vals = '' if not defined $vals;
my $field = $standard_case{$key} || $self->{'::std_case'}{$key} || $key;
$field =~ s/^://;
if ( index($vals, "\n") >= 0 ) {
$vals = _process_newline($vals, $endl);
}
push @result, $field . ': ' . $vals;
}
}
join($endl, @result, '');
}
sub _process_newline {
local $_ = shift;
my $endl = shift;
# must handle header values with embedded newlines with care
s/\s+$//; # trailing newlines and space must go
s/\n(\x0d?\n)+/\n/g; # no empty lines
s/\n([^\040\t])/\n $1/g; # initial space for continuation
s/\n/$endl/g; # substitute with requested line ending
$_;
}
sub _date_header
{
require HTTP::Date;
my($self, $header, $time) = @_;
my($old) = $self->_header($header);
if (defined $time) {
$self->_header($header, HTTP::Date::time2str($time));
}
$old =~ s/;.*// if defined($old);
HTTP::Date::str2time($old);
}
sub date { shift->_date_header('Date', @_); }
sub expires { shift->_date_header('Expires', @_); }
sub if_modified_since { shift->_date_header('If-Modified-Since', @_); }
sub if_unmodified_since { shift->_date_header('If-Unmodified-Since', @_); }
sub last_modified { shift->_date_header('Last-Modified', @_); }
# This is used as a private LWP extension. The Client-Date header is
# added as a timestamp to a response when it has been received.
sub client_date { shift->_date_header('Client-Date', @_); }
# The retry_after field is dual format (can also be a expressed as
# number of seconds from now), so we don't provide an easy way to
# access it until we have know how both these interfaces can be
# addressed. One possibility is to return a negative value for
# relative seconds and a positive value for epoch based time values.
#sub retry_after { shift->_date_header('Retry-After', @_); }
sub content_type {
my $self = shift;
my $ct = $self->{'content-type'};
$self->{'content-type'} = shift if @_;
$ct = $ct->[0] if ref($ct) eq 'ARRAY';
return '' unless defined($ct) && length($ct);
my @ct = split(/;\s*/, $ct, 2);
for ($ct[0]) {
s/\s+//g;
$_ = lc($_);
}
wantarray ? @ct : $ct[0];
}
sub content_type_charset {
my $self = shift;
require HTTP::Headers::Util;
my $h = $self->{'content-type'};
$h = $h->[0] if ref($h);
$h = "" unless defined $h;
my @v = HTTP::Headers::Util::split_header_words($h);
if (@v) {
my($ct, undef, %ct_param) = @{$v[0]};
my $charset = $ct_param{charset};
if ($ct) {
$ct = lc($ct);
$ct =~ s/\s+//;
}
if ($charset) {
$charset = uc($charset);
$charset =~ s/^\s+//; $charset =~ s/\s+\z//;
undef($charset) if $charset eq "";
}
return $ct, $charset if wantarray;
return $charset;
}
return undef, undef if wantarray;
return undef;
}
sub content_is_text {
my $self = shift;
return $self->content_type =~ m,^text/,;
}
sub content_is_html {
my $self = shift;
return $self->content_type eq 'text/html' || $self->content_is_xhtml;
}
sub content_is_xhtml {
my $ct = shift->content_type;
return $ct eq "application/xhtml+xml" ||
$ct eq "application/vnd.wap.xhtml+xml";
}
sub content_is_xml {
my $ct = shift->content_type;
return 1 if $ct eq "text/xml";
return 1 if $ct eq "application/xml";
return 1 if $ct =~ /\+xml$/;
return 0;
}
sub referer {
my $self = shift;
if (@_ && $_[0] =~ /#/) {
# Strip fragment per RFC 2616, section 14.36.
my $uri = shift;
if (ref($uri)) {
$uri = $uri->clone;
$uri->fragment(undef);
}
else {
$uri =~ s/\#.*//;
}
unshift @_, $uri;
}
($self->_header('Referer', @_))[0];
}
*referrer = \&referer; # on tchrist's request
sub title { (shift->_header('Title', @_))[0] }
sub content_encoding { (shift->_header('Content-Encoding', @_))[0] }
sub content_language { (shift->_header('Content-Language', @_))[0] }
sub content_length { (shift->_header('Content-Length', @_))[0] }
sub user_agent { (shift->_header('User-Agent', @_))[0] }
sub server { (shift->_header('Server', @_))[0] }
sub from { (shift->_header('From', @_))[0] }
sub warning { (shift->_header('Warning', @_))[0] }
sub www_authenticate { (shift->_header('WWW-Authenticate', @_))[0] }
sub authorization { (shift->_header('Authorization', @_))[0] }
sub proxy_authenticate { (shift->_header('Proxy-Authenticate', @_))[0] }
sub proxy_authorization { (shift->_header('Proxy-Authorization', @_))[0] }
sub authorization_basic { shift->_basic_auth("Authorization", @_) }
sub proxy_authorization_basic { shift->_basic_auth("Proxy-Authorization", @_) }
sub _basic_auth {
require MIME::Base64;
my($self, $h, $user, $passwd) = @_;
my($old) = $self->_header($h);
if (defined $user) {
Carp::croak("Basic authorization user name can't contain ':'")
if $user =~ /:/;
$passwd = '' unless defined $passwd;
$self->_header($h => 'Basic ' .
MIME::Base64::encode("$user:$passwd", ''));
}
if (defined $old && $old =~ s/^\s*Basic\s+//) {
my $val = MIME::Base64::decode($old);
return $val unless wantarray;
return split(/:/, $val, 2);
}
return;
}
1;
=pod
=encoding UTF-8
=head1 NAME
HTTP::Headers - Class encapsulating HTTP Message headers
=head1 VERSION
version 7.00
=head1 SYNOPSIS
require HTTP::Headers;
$h = HTTP::Headers->new;
$h->header('Content-Type' => 'text/plain'); # set
$ct = $h->header('Content-Type'); # get
$h->remove_header('Content-Type'); # delete
=head1 DESCRIPTION
The C<HTTP::Headers> class encapsulates HTTP-style message headers.
The headers consist of attribute-value pairs also called fields, which
may be repeated, and which are printed in a particular order. The
field names are cases insensitive.
Instances of this class are usually created as member variables of the
C<HTTP::Request> and C<HTTP::Response> classes, internal to the
library.
The following methods are available:
=over 4
=item $h = HTTP::Headers->new
Constructs a new C<HTTP::Headers> object. You might pass some initial
attribute-value pairs as parameters to the constructor. I<E.g.>:
$h = HTTP::Headers->new(
Date => 'Thu, 03 Feb 1994 00:00:00 GMT',
Content_Type => 'text/html; version=3.2',
Content_Base => 'http://www.perl.org/');
The constructor arguments are passed to the C<header> method which is
described below.
=item $h->clone
Returns a copy of this C<HTTP::Headers> object.
=item $h->header( $field )
=item $h->header( $field => $value )
=item $h->header( $f1 => $v1, $f2 => $v2, ... )
Get or set the value of one or more header fields. The header field
name ($field) is not case sensitive. To make the life easier for perl
users who wants to avoid quoting before the => operator, you can use
'_' as a replacement for '-' in header names.
The header() method accepts multiple ($field => $value) pairs, which
means that you can update several fields with a single invocation.
The $value argument may be a plain string or a reference to an array
of strings for a multi-valued field. If the $value is provided as
C<undef> then the field is removed. If the $value is not given, then
that header field will remain unchanged. In addition to being a string,
$value may be something that stringifies.
The old value (or values) of the last of the header fields is returned.
If no such field exists C<undef> will be returned.
A multi-valued field will be returned as separate values in list
context and will be concatenated with ", " as separator in scalar
context. The HTTP spec (RFC 2616) promises that joining multiple
values in this way will not change the semantic of a header field, but
in practice there are cases like old-style Netscape cookies (see
L<HTTP::Cookies>) where "," is used as part of the syntax of a single
field value.
Examples:
$header->header(MIME_Version => '1.0',
User_Agent => 'My-Web-Client/0.01');
$header->header(Accept => "text/html, text/plain, image/*");
$header->header(Accept => [qw(text/html text/plain image/*)]);
@accepts = $header->header('Accept'); # get multiple values
$accepts = $header->header('Accept'); # get values as a single string
=item $h->push_header( $field => $value )
=item $h->push_header( $f1 => $v1, $f2 => $v2, ... )
Add a new field value for the specified header field. Previous values
for the same field are retained.
As for the header() method, the field name ($field) is not case
sensitive and '_' can be used as a replacement for '-'.
The $value argument may be a scalar or a reference to a list of
scalars.
$header->push_header(Accept => 'image/jpeg');
$header->push_header(Accept => [map "image/$_", qw(gif png tiff)]);
=item $h->init_header( $field => $value )
Set the specified header to the given value, but only if no previous
value for that field is set.
The header field name ($field) is not case sensitive and '_'
can be used as a replacement for '-'.
The $value argument may be a scalar or a reference to a list of
scalars.
=item $h->remove_header( $field, ... )
This function removes the header fields with the specified names.
The header field names ($field) are not case sensitive and '_'
can be used as a replacement for '-'.
The return value is the values of the fields removed. In scalar
context the number of fields removed is returned.
Note that if you pass in multiple field names then it is generally not
possible to tell which of the returned values belonged to which field.
=item $h->remove_content_headers
This will remove all the header fields used to describe the content of
a message. All header field names prefixed with C<Content-> fall
into this category, as well as C<Allow>, C<Expires> and
C<Last-Modified>. RFC 2616 denotes these fields as I<Entity Header
Fields>.
The return value is a new C<HTTP::Headers> object that contains the
removed headers only.
=item $h->clear
This will remove all header fields.
=item $h->header_field_names
Returns the list of distinct names for the fields present in the
header. The field names have case as suggested by HTTP spec, and the
names are returned in the recommended "Good Practice" order.
In scalar context return the number of distinct field names.
=item $h->scan( \&process_header_field )
Apply a subroutine to each header field in turn. The callback routine
is called with two parameters; the name of the field and a single
value (a string). If a header field is multi-valued, then the
routine is called once for each value. The field name passed to the
callback routine has case as suggested by HTTP spec, and the headers
will be visited in the recommended "Good Practice" order.
Any return values of the callback routine are ignored. The loop can
be broken by raising an exception (C<die>), but the caller of scan()
would have to trap the exception itself.
=item $h->flatten()
Returns the list of pairs of keys and values.
=item $h->as_string
=item $h->as_string( $eol )
Return the header fields as a formatted MIME header. Since it
internally uses the C<scan> method to build the string, the result
will use case as suggested by HTTP spec, and it will follow
recommended "Good Practice" of ordering the header fields. Long header
values are not folded.
The optional $eol parameter specifies the line ending sequence to
use. The default is "\n". Embedded "\n" characters in header field
values will be substituted with this line ending sequence.
=back
=head1 CONVENIENCE METHODS
The most frequently used headers can also be accessed through the
following convenience methods. Most of these methods can both be used to read
and to set the value of a header. The header value is set if you pass
an argument to the method. The old header value is always returned.
If the given header did not exist then C<undef> is returned.
Methods that deal with dates/times always convert their value to system
time (seconds since Jan 1, 1970) and they also expect this kind of
value when the header value is set.
=over 4
=item $h->date
This header represents the date and time at which the message was
originated. I<E.g.>:
$h->date(time); # set current date
=item $h->expires
This header gives the date and time after which the entity should be
considered stale.
=item $h->if_modified_since
=item $h->if_unmodified_since
These header fields are used to make a request conditional. If the requested
resource has (or has not) been modified since the time specified in this field,
then the server will return a C<304 Not Modified> response instead of
the document itself.
=item $h->last_modified
This header indicates the date and time at which the resource was last
modified. I<E.g.>:
# check if document is more than 1 hour old
if (my $last_mod = $h->last_modified) {
if ($last_mod < time - 60*60) {
...
}
}
=item $h->content_type
The Content-Type header field indicates the media type of the message
content. I<E.g.>:
$h->content_type('text/html');
The value returned will be converted to lower case, and potential
parameters will be chopped off and returned as a separate value if in
an array context. If there is no such header field, then the empty
string is returned. This makes it safe to do the following:
if ($h->content_type eq 'text/html') {
# we enter this place even if the real header value happens to
# be 'TEXT/HTML; version=3.0'
...
}
=item $h->content_type_charset
Returns the upper-cased charset specified in the Content-Type header. In list
context return the lower-cased bare content type followed by the upper-cased
charset. Both values will be C<undef> if not specified in the header.
=item $h->content_is_text
Returns TRUE if the Content-Type header field indicate that the
content is textual.
=item $h->content_is_html
Returns TRUE if the Content-Type header field indicate that the
content is some kind of HTML (including XHTML). This method can't be
used to set Content-Type.
=item $h->content_is_xhtml
Returns TRUE if the Content-Type header field indicate that the
content is XHTML. This method can't be used to set Content-Type.
=item $h->content_is_xml
Returns TRUE if the Content-Type header field indicate that the
content is XML. This method can't be used to set Content-Type.
=item $h->content_encoding
The Content-Encoding header field is used as a modifier to the
media type. When present, its value indicates what additional
encoding mechanism has been applied to the resource.
=item $h->content_length
A decimal number indicating the size in bytes of the message content.
=item $h->content_language
The natural language(s) of the intended audience for the message
content. The value is one or more language tags as defined by RFC
1766. Eg. "no" for some kind of Norwegian and "en-US" for English the
way it is written in the US.
=item $h->title
The title of the document. In libwww-perl this header will be
initialized automatically from the E<lt>TITLE>...E<lt>/TITLE> element
of HTML documents. I<This header is no longer part of the HTTP
standard.>
=item $h->user_agent
This header field is used in request messages and contains information
about the user agent originating the request. I<E.g.>:
$h->user_agent('Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 6.0)');
=item $h->server
The server header field contains information about the software being
used by the originating server program handling the request.
=item $h->from
This header should contain an Internet e-mail address for the human
user who controls the requesting user agent. The address should be
machine-usable, as defined by RFC822. E.g.:
$h->from('King Kong <king@kong.com>');
I<This header is no longer part of the HTTP standard.>
=item $h->referer
Used to specify the address (URI) of the document from which the
requested resource address was obtained.
The "Free On-line Dictionary of Computing" as this to say about the
word I<referer>:
<World-Wide Web> A misspelling of "referrer" which
somehow made it into the {HTTP} standard. A given {web
page}'s referer (sic) is the {URL} of whatever web page
contains the link that the user followed to the current
page. Most browsers pass this information as part of a
request.
(1998-10-19)
By popular demand C<referrer> exists as an alias for this method so you
can avoid this misspelling in your programs and still send the right
thing on the wire.
When setting the referrer, this method removes the fragment from the
given URI if it is present, as mandated by RFC2616. Note that
the removal does I<not> happen automatically if using the header(),
push_header() or init_header() methods to set the referrer.
=item $h->www_authenticate
This header must be included as part of a C<401 Unauthorized> response.
The field value consist of a challenge that indicates the
authentication scheme and parameters applicable to the requested URI.
=item $h->proxy_authenticate
This header must be included in a C<407 Proxy Authentication Required>
response.
=item $h->authorization
=item $h->proxy_authorization
A user agent that wishes to authenticate itself with a server or a
proxy, may do so by including these headers.
=item $h->authorization_basic
This method is used to get or set an authorization header that use the
"Basic Authentication Scheme". In array context it will return two
values; the user name and the password. In scalar context it will
return I<"uname:password"> as a single string value.
When used to set the header value, it expects two arguments. I<E.g.>:
$h->authorization_basic($uname, $password);
The method will croak if the $uname contains a colon ':'.
=item $h->proxy_authorization_basic
Same as authorization_basic() but will set the "Proxy-Authorization"
header instead.
=back
=head1 NON-CANONICALIZED FIELD NAMES
The header field name spelling is normally canonicalized including the
'_' to '-' translation. There are some application where this is not
appropriate. Prefixing field names with ':' allow you to force a
specific spelling. For example if you really want a header field name
to show up as C<foo_bar> instead of "Foo-Bar", you might set it like
this:
$h->header(":foo_bar" => 1);
These field names are returned with the ':' intact for
$h->header_field_names and the $h->scan callback, but the colons do
not show in $h->as_string.
=head1 AUTHOR
Gisle Aas <gisle@activestate.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 1994 by Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
__END__
#ABSTRACT: Class encapsulating HTTP Message headers

View File

@@ -0,0 +1,127 @@
package HTTP::Headers::Auth;
use strict;
use warnings;
our $VERSION = '7.00';
use HTTP::Headers;
package
HTTP::Headers;
BEGIN {
# we provide a new (and better) implementations below
undef(&www_authenticate);
undef(&proxy_authenticate);
}
require HTTP::Headers::Util;
sub _parse_authenticate
{
my @ret;
for (HTTP::Headers::Util::split_header_words(@_)) {
if (!defined($_->[1])) {
# this is a new auth scheme
push(@ret, shift(@$_) => {});
shift @$_;
}
if (@ret) {
# this a new parameter pair for the last auth scheme
while (@$_) {
my $k = shift @$_;
my $v = shift @$_;
$ret[-1]{$k} = $v;
}
}
else {
# something wrong, parameter pair without any scheme seen
# IGNORE
}
}
@ret;
}
sub _authenticate
{
my $self = shift;
my $header = shift;
my @old = $self->_header($header);
if (@_) {
$self->remove_header($header);
my @new = @_;
while (@new) {
my $a_scheme = shift(@new);
if ($a_scheme =~ /\s/) {
# assume complete valid value, pass it through
$self->push_header($header, $a_scheme);
}
else {
my @param;
if (@new) {
my $p = $new[0];
if (ref($p) eq "ARRAY") {
@param = @$p;
shift(@new);
}
elsif (ref($p) eq "HASH") {
@param = %$p;
shift(@new);
}
}
my $val = ucfirst(lc($a_scheme));
if (@param) {
my $sep = " ";
while (@param) {
my $k = shift @param;
my $v = shift @param;
if ($v =~ /[^0-9a-zA-Z]/ || lc($k) eq "realm") {
# must quote the value
$v =~ s,([\\\"]),\\$1,g;
$v = qq("$v");
}
$val .= "$sep$k=$v";
$sep = ", ";
}
}
$self->push_header($header, $val);
}
}
}
return unless defined wantarray;
wantarray ? _parse_authenticate(@old) : join(", ", @old);
}
sub www_authenticate { shift->_authenticate("WWW-Authenticate", @_) }
sub proxy_authenticate { shift->_authenticate("Proxy-Authenticate", @_) }
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
HTTP::Headers::Auth
=head1 VERSION
version 7.00
=head1 AUTHOR
Gisle Aas <gisle@activestate.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 1994 by Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut

View File

@@ -0,0 +1,123 @@
package HTTP::Headers::ETag;
use strict;
use warnings;
our $VERSION = '7.00';
require HTTP::Date;
require HTTP::Headers;
package
HTTP::Headers;
sub _etags
{
my $self = shift;
my $header = shift;
my @old = _split_etag_list($self->_header($header));
if (@_) {
$self->_header($header => join(", ", _split_etag_list(@_)));
}
wantarray ? @old : join(", ", @old);
}
sub etag { shift->_etags("ETag", @_); }
sub if_match { shift->_etags("If-Match", @_); }
sub if_none_match { shift->_etags("If-None-Match", @_); }
sub if_range {
# Either a date or an entity-tag
my $self = shift;
my @old = $self->_header("If-Range");
if (@_) {
my $new = shift;
if (!defined $new) {
$self->remove_header("If-Range");
}
elsif ($new =~ /^\d+$/) {
$self->_date_header("If-Range", $new);
}
else {
$self->_etags("If-Range", $new);
}
}
return unless defined(wantarray);
for (@old) {
my $t = HTTP::Date::str2time($_);
$_ = $t if $t;
}
wantarray ? @old : join(", ", @old);
}
# Split a list of entity tag values. The return value is a list
# consisting of one element per entity tag. Suitable for parsing
# headers like C<If-Match>, C<If-None-Match>. You might even want to
# use it on C<ETag> and C<If-Range> entity tag values, because it will
# normalize them to the common form.
#
# entity-tag = [ weak ] opaque-tag
# weak = "W/"
# opaque-tag = quoted-string
sub _split_etag_list
{
my(@val) = @_;
my @res;
for (@val) {
while (length) {
my $weak = "";
$weak = "W/" if s,^\s*[wW]/,,;
my $etag = "";
if (s/^\s*(\"[^\"\\]*(?:\\.[^\"\\]*)*\")//) {
push(@res, "$weak$1");
}
elsif (s/^\s*,//) {
push(@res, qq(W/"")) if $weak;
}
elsif (s/^\s*([^,\s]+)//) {
$etag = $1;
$etag =~ s/([\"\\])/\\$1/g;
push(@res, qq($weak"$etag"));
}
elsif (s/^\s+// || !length) {
push(@res, qq(W/"")) if $weak;
}
else {
die "This should not happen: '$_'";
}
}
}
@res;
}
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
HTTP::Headers::ETag
=head1 VERSION
version 7.00
=head1 AUTHOR
Gisle Aas <gisle@activestate.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 1994 by Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut

View File

@@ -0,0 +1,213 @@
package HTTP::Headers::Util;
use strict;
use warnings;
our $VERSION = '7.00';
use Exporter 5.57 'import';
our @EXPORT_OK=qw(split_header_words _split_header_words join_header_words);
sub split_header_words {
my @res = &_split_header_words;
for my $arr (@res) {
for (my $i = @$arr - 2; $i >= 0; $i -= 2) {
$arr->[$i] = lc($arr->[$i]);
}
}
return @res;
}
sub _split_header_words
{
my(@val) = @_;
my @res;
for (@val) {
my @cur;
while (length) {
if (s/^\s*(=*[^\s=;,]+)//) { # 'token' or parameter 'attribute'
push(@cur, $1);
# a quoted value
if (s/^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"//) {
my $val = $1;
$val =~ s/\\(.)/$1/g;
push(@cur, $val);
# some unquoted value
}
elsif (s/^\s*=\s*([^;,\s]*)//) {
my $val = $1;
$val =~ s/\s+$//;
push(@cur, $val);
# no value, a lone token
}
else {
push(@cur, undef);
}
}
elsif (s/^\s*,//) {
push(@res, [@cur]) if @cur;
@cur = ();
}
elsif (s/^\s*;// || s/^\s+// || s/^=//) {
# continue
}
else {
die "This should not happen: '$_'";
}
}
push(@res, \@cur) if @cur;
}
@res;
}
sub join_header_words
{
@_ = ([@_]) if @_ && !ref($_[0]);
my @res;
for (@_) {
my @cur = @$_;
my @attr;
while (@cur) {
my $k = shift @cur;
my $v = shift @cur;
if (defined $v) {
if ($v =~ /[\x00-\x20()<>@,;:\\\"\/\[\]?={}\x7F-\xFF]/ || !length($v)) {
$v =~ s/([\"\\])/\\$1/g; # escape " and \
$k .= qq(="$v");
}
else {
# token
$k .= "=$v";
}
}
push(@attr, $k);
}
push(@res, join("; ", @attr)) if @attr;
}
join(", ", @res);
}
1;
=pod
=encoding UTF-8
=head1 NAME
HTTP::Headers::Util - Header value parsing utility functions
=head1 VERSION
version 7.00
=head1 SYNOPSIS
use HTTP::Headers::Util qw(split_header_words);
@values = split_header_words($h->header("Content-Type"));
=head1 DESCRIPTION
This module provides a few functions that helps parsing and
construction of valid HTTP header values. None of the functions are
exported by default.
The following functions are available:
=over 4
=item split_header_words( @header_values )
This function will parse the header values given as argument into a
list of anonymous arrays containing key/value pairs. The function
knows how to deal with ",", ";" and "=" as well as quoted values after
"=". A list of space separated tokens are parsed as if they were
separated by ";".
If the @header_values passed as argument contains multiple values,
then they are treated as if they were a single value separated by
comma ",".
This means that this function is useful for parsing header fields that
follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
the requirement for tokens).
headers = #header
header = (token | parameter) *( [";"] (token | parameter))
token = 1*<any CHAR except CTLs or separators>
separators = "(" | ")" | "<" | ">" | "@"
| "," | ";" | ":" | "\" | <">
| "/" | "[" | "]" | "?" | "="
| "{" | "}" | SP | HT
quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
qdtext = <any TEXT except <">>
quoted-pair = "\" CHAR
parameter = attribute "=" value
attribute = token
value = token | quoted-string
Each I<header> is represented by an anonymous array of key/value
pairs. The keys will be all be forced to lower case.
The value for a simple token (not part of a parameter) is C<undef>.
Syntactically incorrect headers will not necessarily be parsed as you
would want.
This is easier to describe with some examples:
split_header_words('foo="bar"; port="80,81"; DISCARD, BAR=baz');
split_header_words('text/html; charset="iso-8859-1"');
split_header_words('Basic realm="\\"foo\\\\bar\\""');
will return
[foo=>'bar', port=>'80,81', discard=> undef], [bar=>'baz' ]
['text/html' => undef, charset => 'iso-8859-1']
[basic => undef, realm => "\"foo\\bar\""]
If you don't want the function to convert tokens and attribute keys to
lower case you can call it as C<_split_header_words> instead (with a
leading underscore).
=item join_header_words( @arrays )
This will do the opposite of the conversion done by split_header_words().
It takes a list of anonymous arrays as arguments (or a list of
key/value pairs) and produces a single header value. Attribute values
are quoted if needed.
Example:
join_header_words(["text/plain" => undef, charset => "iso-8859/1"]);
join_header_words("text/plain" => undef, charset => "iso-8859/1");
will both return the string:
text/plain; charset="iso-8859/1"
=back
=head1 AUTHOR
Gisle Aas <gisle@activestate.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 1994 by Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
__END__
#ABSTRACT: Header value parsing utility functions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,528 @@
package HTTP::Negotiate;
$VERSION = "6.01";
sub Version { $VERSION; }
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(choose);
require HTTP::Headers;
$DEBUG = 0;
sub choose ($;$)
{
my($variants, $request) = @_;
my(%accept);
unless (defined $request) {
# Create a request object from the CGI environment variables
$request = HTTP::Headers->new;
$request->header('Accept', $ENV{HTTP_ACCEPT})
if $ENV{HTTP_ACCEPT};
$request->header('Accept-Charset', $ENV{HTTP_ACCEPT_CHARSET})
if $ENV{HTTP_ACCEPT_CHARSET};
$request->header('Accept-Encoding', $ENV{HTTP_ACCEPT_ENCODING})
if $ENV{HTTP_ACCEPT_ENCODING};
$request->header('Accept-Language', $ENV{HTTP_ACCEPT_LANGUAGE})
if $ENV{HTTP_ACCEPT_LANGUAGE};
}
# Get all Accept values from the request. Build a hash initialized
# like this:
#
# %accept = ( type => { 'audio/*' => { q => 0.2, mbx => 20000 },
# 'audio/basic' => { q => 1 },
# },
# language => { 'no' => { q => 1 },
# }
# );
$request->scan(sub {
my($key, $val) = @_;
my $type;
if ($key =~ s/^Accept-//) {
$type = lc($key);
}
elsif ($key eq "Accept") {
$type = "type";
}
else {
return;
}
$val =~ s/\s+//g;
my $default_q = 1;
for my $name (split(/,/, $val)) {
my(%param, $param);
if ($name =~ s/;(.*)//) {
for $param (split(/;/, $1)) {
my ($pk, $pv) = split(/=/, $param, 2);
$param{lc $pk} = $pv;
}
}
$name = lc $name;
if (defined $param{'q'}) {
$param{'q'} = 1 if $param{'q'} > 1;
$param{'q'} = 0 if $param{'q'} < 0;
}
else {
$param{'q'} = $default_q;
# This makes sure that the first ones are slightly better off
# and therefore more likely to be chosen.
$default_q -= 0.0001;
}
$accept{$type}{$name} = \%param;
}
});
# Check if any of the variants specify a language. We do this
# because it influences how we treat those without (they default to
# 0.5 instead of 1).
my $any_lang = 0;
for $var (@$variants) {
if ($var->[5]) {
$any_lang = 1;
last;
}
}
if ($DEBUG) {
print "Negotiation parameters in the request\n";
for $type (keys %accept) {
print " $type:\n";
for $name (keys %{$accept{$type}}) {
print " $name\n";
for $pv (keys %{$accept{$type}{$name}}) {
print " $pv = $accept{$type}{$name}{$pv}\n";
}
}
}
}
my @Q = (); # This is where we collect the results of the
# quality calculations
# Calculate quality for all the variants that are available.
for (@$variants) {
my($id, $qs, $ct, $enc, $cs, $lang, $bs) = @$_;
$qs = 1 unless defined $qs;
$ct = '' unless defined $ct;
$bs = 0 unless defined $bs;
$lang = lc($lang) if $lang; # lg tags are always case-insensitive
if ($DEBUG) {
print "\nEvaluating $id (ct='$ct')\n";
printf " qs = %.3f\n", $qs;
print " enc = $enc\n" if $enc && !ref($enc);
print " enc = @$enc\n" if $enc && ref($enc);
print " cs = $cs\n" if $cs;
print " lang = $lang\n" if $lang;
print " bs = $bs\n" if $bs;
}
# Calculate encoding quality
my $qe = 1;
# If the variant has no assigned Content-Encoding, or if no
# Accept-Encoding field is present, then the value assigned
# is "qe=1". If *all* of the variant's content encodings
# are listed in the Accept-Encoding field, then the value
# assigned is "qw=1". If *any* of the variant's content
# encodings are not listed in the provided Accept-Encoding
# field, then the value assigned is "qe=0"
if (exists $accept{'encoding'} && $enc) {
my @enc = ref($enc) ? @$enc : ($enc);
for (@enc) {
print "Is encoding $_ accepted? " if $DEBUG;
unless(exists $accept{'encoding'}{$_}) {
print "no\n" if $DEBUG;
$qe = 0;
last;
}
else {
print "yes\n" if $DEBUG;
}
}
}
# Calculate charset quality
my $qc = 1;
# If the variant's media-type has no charset parameter,
# or the variant's charset is US-ASCII, or if no Accept-Charset
# field is present, then the value assigned is "qc=1". If the
# variant's charset is listed in the Accept-Charset field,
# then the value assigned is "qc=1. Otherwise, if the variant's
# charset is not listed in the provided Accept-Encoding field,
# then the value assigned is "qc=0".
if (exists $accept{'charset'} && $cs && $cs ne 'us-ascii' ) {
$qc = 0 unless $accept{'charset'}{$cs};
}
# Calculate language quality
my $ql = 1;
if ($lang && exists $accept{'language'}) {
my @lang = ref($lang) ? @$lang : ($lang);
# If any of the variant's content languages are listed
# in the Accept-Language field, the the value assigned is
# the largest of the "q" parameter values for those language
# tags.
my $q = undef;
for (@lang) {
next unless exists $accept{'language'}{$_};
my $this_q = $accept{'language'}{$_}{'q'};
$q = $this_q unless defined $q;
$q = $this_q if $this_q > $q;
}
if(defined $q) {
$DEBUG and print " -- Exact language match at q=$q\n";
}
else {
# If there was no exact match and at least one of
# the Accept-Language field values is a complete
# subtag prefix of the content language tag(s), then
# the "q" parameter value of the largest matching
# prefix is used.
$DEBUG and print " -- No exact language match\n";
my $selected = undef;
for $al (keys %{ $accept{'language'} }) {
if (index($al, "$lang-") == 0) {
# $lang starting with $al isn't enough, or else
# Accept-Language: hu (Hungarian) would seem
# to accept a document in hup (Hupa)
$DEBUG and print " -- $al ISA $lang\n";
$selected = $al unless defined $selected;
$selected = $al if length($al) > length($selected);
}
else {
$DEBUG and print " -- $lang isn't a $al\n";
}
}
$q = $accept{'language'}{$selected}{'q'} if $selected;
# If none of the variant's content language tags or
# tag prefixes are listed in the provided
# Accept-Language field, then the value assigned
# is "ql=0.001"
$q = 0.001 unless defined $q;
}
$ql = $q;
}
else {
$ql = 0.5 if $any_lang && exists $accept{'language'};
}
my $q = 1;
my $mbx = undef;
# If no Accept field is given, then the value assigned is "q=1".
# If at least one listed media range matches the variant's media
# type, then the "q" parameter value assigned to the most specific
# of those matched is used (e.g. "text/html;version=3.0" is more
# specific than "text/html", which is more specific than "text/*",
# which in turn is more specific than "*/*"). If not media range
# in the provided Accept field matches the variant's media type,
# then the value assigned is "q=0".
if (exists $accept{'type'} && $ct) {
# First we clean up our content-type
$ct =~ s/\s+//g;
my $params = "";
$params = $1 if $ct =~ s/;(.*)//;
my($type, $subtype) = split("/", $ct, 2);
my %param = ();
for $param (split(/;/, $params)) {
my($pk,$pv) = split(/=/, $param, 2);
$param{$pk} = $pv;
}
my $sel_q = undef;
my $sel_mbx = undef;
my $sel_specificness = 0;
ACCEPT_TYPE:
for $at (keys %{ $accept{'type'} }) {
print "Consider $at...\n" if $DEBUG;
my($at_type, $at_subtype) = split("/", $at, 2);
# Is it a match on the type
next if $at_type ne '*' && $at_type ne $type;
next if $at_subtype ne '*' && $at_subtype ne $subtype;
my $specificness = 0;
$specificness++ if $at_type ne '*';
$specificness++ if $at_subtype ne '*';
# Let's see if content-type parameters also match
while (($pk, $pv) = each %param) {
print "Check if $pk = $pv is true\n" if $DEBUG;
next unless exists $accept{'type'}{$at}{$pk};
next ACCEPT_TYPE
unless $accept{'type'}{$at}{$pk} eq $pv;
print "yes it is!!\n" if $DEBUG;
$specificness++;
}
print "Hurray, type match with specificness = $specificness\n"
if $DEBUG;
if (!defined($sel_q) || $sel_specificness < $specificness) {
$sel_q = $accept{'type'}{$at}{'q'};
$sel_mbx = $accept{'type'}{$at}{'mbx'};
$sel_specificness = $specificness;
}
}
$q = $sel_q || 0;
$mbx = $sel_mbx;
}
my $Q;
if (!defined($mbx) || $mbx >= $bs) {
$Q = $qs * $qe * $qc * $ql * $q;
}
else {
$Q = 0;
print "Variant's size is too large ==> Q=0\n" if $DEBUG;
}
if ($DEBUG) {
$mbx = "undef" unless defined $mbx;
printf "Q=%.4f", $Q;
print " (q=$q, mbx=$mbx, qe=$qe, qc=$qc, ql=$ql, qs=$qs)\n";
}
push(@Q, [$id, $Q, $bs]);
}
@Q = sort { $b->[1] <=> $a->[1] || $a->[2] <=> $b->[2] } @Q;
return @Q if wantarray;
return undef unless @Q;
return undef if $Q[0][1] == 0;
$Q[0][0];
}
1;
__END__
=head1 NAME
HTTP::Negotiate - choose a variant to serve
=head1 SYNOPSIS
use HTTP::Negotiate qw(choose);
# ID QS Content-Type Encoding Char-Set Lang Size
$variants =
[['var1', 1.000, 'text/html', undef, 'iso-8859-1', 'en', 3000],
['var2', 0.950, 'text/plain', 'gzip', 'us-ascii', 'no', 400],
['var3', 0.3, 'image/gif', undef, undef, undef, 43555],
];
@preferred = choose($variants, $request_headers);
$the_one = choose($variants);
=head1 DESCRIPTION
This module provides a complete implementation of the HTTP content
negotiation algorithm specified in F<draft-ietf-http-v11-spec-00.ps>
chapter 12. Content negotiation allows for the selection of a
preferred content representation based upon attributes of the
negotiable variants and the value of the various Accept* header fields
in the request.
The variants are ordered by preference by calling the function
choose().
The first parameter is reference to an array of the variants to
choose among.
Each element in this array is an array with the values [$id, $qs,
$content_type, $content_encoding, $charset, $content_language,
$content_length] whose meanings are described
below. The $content_encoding and $content_language can be either a
single scalar value or an array reference if there are several values.
The second optional parameter is either a HTTP::Headers or a HTTP::Request
object which is searched for "Accept*" headers. If this
parameter is missing, then the accept specification is initialized
from the CGI environment variables HTTP_ACCEPT, HTTP_ACCEPT_CHARSET,
HTTP_ACCEPT_ENCODING and HTTP_ACCEPT_LANGUAGE.
In an array context, choose() returns a list of [variant
identifier, calculated quality, size] tuples. The values are sorted by
quality, highest quality first. If the calculated quality is the same
for two variants, then they are sorted by size (smallest first). I<E.g.>:
(['var1', 1, 2000], ['var2', 0.3, 512], ['var3', 0.3, 1024]);
Note that also zero quality variants are included in the return list
even if these should never be served to the client.
In a scalar context, it returns the identifier of the variant with the
highest score or C<undef> if none have non-zero quality.
If the $HTTP::Negotiate::DEBUG variable is set to TRUE, then a lot of
noise is generated on STDOUT during evaluation of choose().
=head1 VARIANTS
A variant is described by a list of the following values. If the
attribute does not make sense or is unknown for a variant, then use
C<undef> instead.
=over 3
=item identifier
This is a string that you use as the name for the variant. This
identifier for the preferred variants returned by choose().
=item qs
This is a number between 0.000 and 1.000 that describes the "source
quality". This is what F<draft-ietf-http-v11-spec-00.ps> says about this
value:
Source quality is measured by the content provider as representing the
amount of degradation from the original source. For example, a
picture in JPEG form would have a lower qs when translated to the XBM
format, and much lower qs when translated to an ASCII-art
representation. Note, however, that this is a function of the source
- an original piece of ASCII-art may degrade in quality if it is
captured in JPEG form. The qs values should be assigned to each
variant by the content provider; if no qs value has been assigned, the
default is generally "qs=1".
=item content-type
This is the media type of the variant. The media type does not
include a charset attribute, but might contain other parameters.
Examples are:
text/html
text/html;version=2.0
text/plain
image/gif
image/jpg
=item content-encoding
This is one or more content encodings that has been applied to the
variant. The content encoding is generally used as a modifier to the
content media type. The most common content encodings are:
gzip
compress
=item content-charset
This is the character set used when the variant contains text.
The charset value should generally be C<undef> or one of these:
us-ascii
iso-8859-1 ... iso-8859-9
iso-2022-jp
iso-2022-jp-2
iso-2022-kr
unicode-1-1
unicode-1-1-utf-7
unicode-1-1-utf-8
=item content-language
This describes one or more languages that are used in the variant.
Language is described like this in F<draft-ietf-http-v11-spec-00.ps>: A
language is in this context a natural language spoken, written, or
otherwise conveyed by human beings for communication of information to
other human beings. Computer languages are explicitly excluded.
The language tags are defined by RFC 3066. Examples
are:
no Norwegian
en International English
en-US US English
en-cockney
=item content-length
This is the number of bytes used to represent the content.
=back
=head1 ACCEPT HEADERS
The following Accept* headers can be used for describing content
preferences in a request (This description is an edited extract from
F<draft-ietf-http-v11-spec-00.ps>):
=over 3
=item Accept
This header can be used to indicate a list of media ranges which are
acceptable as a response to the request. The "*" character is used to
group media types into ranges, with "*/*" indicating all media types
and "type/*" indicating all subtypes of that type.
The parameter q is used to indicate the quality factor, which
represents the user's preference for that range of media types. The
parameter mbx gives the maximum acceptable size of the response
content. The default values are: q=1 and mbx=infinity. If no Accept
header is present, then the client accepts all media types with q=1.
For example:
Accept: audio/*;q=0.2;mbx=200000, audio/basic
would mean: "I prefer audio/basic (of any size), but send me any audio
type if it is the best available after an 80% mark-down in quality and
its size is less than 200000 bytes"
=item Accept-Charset
Used to indicate what character sets are acceptable for the response.
The "us-ascii" character set is assumed to be acceptable for all user
agents. If no Accept-Charset field is given, the default is that any
charset is acceptable. Example:
Accept-Charset: iso-8859-1, unicode-1-1
=item Accept-Encoding
Restricts the Content-Encoding values which are acceptable in the
response. If no Accept-Encoding field is present, the server may
assume that the client will accept any content encoding. An empty
Accept-Encoding means that no content encoding is acceptable. Example:
Accept-Encoding: compress, gzip
=item Accept-Language
This field is similar to Accept, but restricts the set of natural
languages that are preferred in a response. Each language may be
given an associated quality value which represents an estimate of the
user's comprehension of that language. For example:
Accept-Language: no, en-gb;q=0.8, de;q=0.55
would mean: "I prefer Norwegian, but will accept British English (with
80% comprehension) or German (with 55% comprehension).
=back
=head1 COPYRIGHT
Copyright 1996,2001 Gisle Aas.
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=head1 AUTHOR
Gisle Aas <gisle@aas.no>
=cut

View File

@@ -0,0 +1,352 @@
package HTTP::Request;
use strict;
use warnings;
our $VERSION = '7.00';
use parent 'HTTP::Message';
sub new
{
my($class, $method, $uri, $header, $content) = @_;
my $self = $class->SUPER::new($header, $content);
$self->method($method);
$self->uri($uri);
$self;
}
sub parse
{
my($class, $str) = @_;
Carp::carp('Undefined argument to parse()') if $^W && ! defined $str;
my $request_line;
if (defined $str && $str =~ s/^(.*)\n//) {
$request_line = $1;
}
else {
$request_line = $str;
$str = "";
}
my $self = $class->SUPER::parse($str);
if (defined $request_line) {
my($method, $uri, $protocol) = split(' ', $request_line);
$self->method($method);
$self->uri($uri) if defined($uri);
$self->protocol($protocol) if $protocol;
}
$self;
}
sub clone
{
my $self = shift;
my $clone = bless $self->SUPER::clone, ref($self);
$clone->method($self->method);
$clone->uri($self->uri);
$clone;
}
sub method
{
shift->_elem('_method', @_);
}
sub uri
{
my $self = shift;
my $old = $self->{'_uri'};
if (@_) {
my $uri = shift;
if (!defined $uri) {
# that's ok
}
elsif (ref $uri) {
Carp::croak("A URI can't be a " . ref($uri) . " reference")
if ref($uri) eq 'HASH' or ref($uri) eq 'ARRAY';
Carp::croak("Can't use a " . ref($uri) . " object as a URI")
unless $uri->can('scheme') && $uri->can('canonical');
$uri = $uri->clone;
unless ($HTTP::URI_CLASS eq "URI") {
# Argh!! Hate this... old LWP legacy!
eval { local $SIG{__DIE__}; $uri = $uri->abs; };
die $@ if $@ && $@ !~ /Missing base argument/;
}
}
else {
$uri = $HTTP::URI_CLASS->new($uri);
}
$self->{'_uri'} = $uri;
delete $self->{'_uri_canonical'};
}
$old;
}
*url = \&uri; # legacy
sub uri_canonical
{
my $self = shift;
my $uri = $self->{_uri};
if (defined (my $canon = $self->{_uri_canonical})) {
# early bailout if these are the exact same string;
# rely on stringification of the URI objects
return $canon if $canon eq $uri;
}
# otherwise we need to refresh the memoized value
$self->{_uri_canonical} = $uri->canonical;
}
sub accept_decodable
{
my $self = shift;
$self->header("Accept-Encoding", scalar($self->decodable));
}
sub as_string
{
my $self = shift;
my($eol) = @_;
$eol = "\n" unless defined $eol;
my $req_line = $self->method || "-";
my $uri = $self->uri;
$uri = (defined $uri) ? $uri->as_string : "-";
$req_line .= " $uri";
my $proto = $self->protocol;
$req_line .= " $proto" if $proto;
return join($eol, $req_line, $self->SUPER::as_string(@_));
}
sub dump
{
my $self = shift;
my @pre = ($self->method || "-", $self->uri || "-");
if (my $prot = $self->protocol) {
push(@pre, $prot);
}
return $self->SUPER::dump(
preheader => join(" ", @pre),
@_,
);
}
1;
=pod
=encoding UTF-8
=head1 NAME
HTTP::Request - HTTP style request message
=head1 VERSION
version 7.00
=head1 SYNOPSIS
require HTTP::Request;
$request = HTTP::Request->new(GET => 'http://www.example.com/');
and usually used like this:
$ua = LWP::UserAgent->new;
$response = $ua->request($request);
=head1 DESCRIPTION
C<HTTP::Request> is a class encapsulating HTTP style requests,
consisting of a request line, some headers, and a content body. Note
that the LWP library uses HTTP style requests even for non-HTTP
protocols. Instances of this class are usually passed to the
request() method of an C<LWP::UserAgent> object.
C<HTTP::Request> is a subclass of C<HTTP::Message> and therefore
inherits its methods. The following additional methods are available:
=over 4
=item $r = HTTP::Request->new( $method, $uri )
=item $r = HTTP::Request->new( $method, $uri, $header )
=item $r = HTTP::Request->new( $method, $uri, $header, $content )
Constructs a new C<HTTP::Request> object describing a request on the
object $uri using method $method. The $method argument must be a
string. The $uri argument can be either a string, or a reference to a
C<URI> object. The optional $header argument should be a reference to
an C<HTTP::Headers> object or a plain array reference of key/value
pairs. The optional $content argument should be a string of bytes.
=item $r = HTTP::Request->parse( $str )
This constructs a new request object by parsing the given string.
=item $r->method
=item $r->method( $val )
This is used to get/set the method attribute. The method should be a
short string like "GET", "HEAD", "PUT", "PATCH" or "POST".
=item $r->uri
=item $r->uri( $val )
This is used to get/set the uri attribute. The $val can be a
reference to a URI object or a plain string. If a string is given,
then it should be parsable as an absolute URI.
=item $r->header( $field )
=item $r->header( $field => $value )
This is used to get/set header values and it is inherited from
C<HTTP::Headers> via C<HTTP::Message>. See L<HTTP::Headers> for
details and other similar methods that can be used to access the
headers.
=item $r->accept_decodable
This will set the C<Accept-Encoding> header to the list of encodings
that decoded_content() can decode.
=item $r->content
=item $r->content( $bytes )
This is used to get/set the content and it is inherited from the
C<HTTP::Message> base class. See L<HTTP::Message> for details and
other methods that can be used to access the content.
Note that the content should be a string of bytes. Strings in perl
can contain characters outside the range of a byte. The C<Encode>
module can be used to turn such strings into a string of bytes.
=item $r->as_string
=item $r->as_string( $eol )
Method returning a textual representation of the request.
=back
=head1 EXAMPLES
Creating requests to be sent with L<LWP::UserAgent> or others can be easy. Here
are a few examples.
=head2 Simple POST
Here, we'll create a simple POST request that could be used to send JSON data
to an endpoint.
#!/usr/bin/env perl
use strict;
use warnings;
use HTTP::Request ();
use JSON::MaybeXS qw(encode_json);
my $url = 'https://www.example.com/api/user/123';
my $header = ['Content-Type' => 'application/json; charset=UTF-8'];
my $data = {foo => 'bar', baz => 'quux'};
my $encoded_data = encode_json($data);
my $r = HTTP::Request->new('POST', $url, $header, $encoded_data);
# at this point, we could send it via LWP::UserAgent
# my $ua = LWP::UserAgent->new();
# my $res = $ua->request($r);
=head2 Batch POST Request
Some services, like Google, allow multiple requests to be sent in one batch.
L<https://developers.google.com/drive/v3/web/batch> for example. Using the
C<add_part> method from L<HTTP::Message> makes this simple.
#!/usr/bin/env perl
use strict;
use warnings;
use HTTP::Request ();
use JSON::MaybeXS qw(encode_json);
my $auth_token = 'auth_token';
my $batch_url = 'https://www.googleapis.com/batch';
my $url = 'https://www.googleapis.com/drive/v3/files/fileId/permissions?fields=id';
my $url_no_email = 'https://www.googleapis.com/drive/v3/files/fileId/permissions?fields=id&sendNotificationEmail=false';
# generate a JSON post request for one of the batch entries
my $req1 = build_json_request($url, {
emailAddress => 'example@appsrocks.com',
role => "writer",
type => "user",
});
# generate a JSON post request for one of the batch entries
my $req2 = build_json_request($url_no_email, {
domain => "appsrocks.com",
role => "reader",
type => "domain",
});
# generate a multipart request to send all of the other requests
my $r = HTTP::Request->new('POST', $batch_url, [
'Accept-Encoding' => 'gzip',
# if we don't provide a boundary here, HTTP::Message will generate
# one for us. We could use UUID::uuid() here if we wanted.
'Content-Type' => 'multipart/mixed; boundary=END_OF_PART'
]);
# add the two POST requests to the main request
$r->add_part($req1, $req2);
# at this point, we could send it via LWP::UserAgent
# my $ua = LWP::UserAgent->new();
# my $res = $ua->request($r);
exit();
sub build_json_request {
my ($url, $href) = @_;
my $header = ['Authorization' => "Bearer $auth_token", 'Content-Type' => 'application/json; charset=UTF-8'];
return HTTP::Request->new('POST', $url, $header, encode_json($href));
}
=head1 SEE ALSO
L<HTTP::Headers>, L<HTTP::Message>, L<HTTP::Request::Common>,
L<HTTP::Response>
=head1 AUTHOR
Gisle Aas <gisle@activestate.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 1994 by Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
__END__
#ABSTRACT: HTTP style request message

View File

@@ -0,0 +1,577 @@
package HTTP::Request::Common;
use strict;
use warnings;
our $VERSION = '7.00';
our $DYNAMIC_FILE_UPLOAD ||= 0; # make it defined (don't know why)
our $READ_BUFFER_SIZE = 8192;
use Exporter 5.57 'import';
our @EXPORT =qw(GET HEAD PUT PATCH POST OPTIONS);
our @EXPORT_OK = qw($DYNAMIC_FILE_UPLOAD DELETE);
require HTTP::Request;
use Carp();
use File::Spec;
my $CRLF = "\015\012"; # "\r\n" is not portable
sub GET { _simple_req('GET', @_); }
sub HEAD { _simple_req('HEAD', @_); }
sub DELETE { _simple_req('DELETE', @_); }
sub PATCH { request_type_with_data('PATCH', @_); }
sub POST { request_type_with_data('POST', @_); }
sub PUT { request_type_with_data('PUT', @_); }
sub OPTIONS { request_type_with_data('OPTIONS', @_); }
sub request_type_with_data
{
my $type = shift;
my $url = shift;
my $req = HTTP::Request->new($type => $url);
my $content;
$content = shift if @_ and ref $_[0];
my($k, $v);
while (($k,$v) = splice(@_, 0, 2)) {
if (lc($k) eq 'content') {
$content = $v;
}
else {
$req->push_header($k, $v);
}
}
my $ct = $req->header('Content-Type');
unless ($ct) {
$ct = 'application/x-www-form-urlencoded';
}
elsif ($ct eq 'form-data') {
$ct = 'multipart/form-data';
}
if (ref $content) {
if ($ct =~ m,^multipart/form-data\s*(;|$),i) {
require HTTP::Headers::Util;
my @v = HTTP::Headers::Util::split_header_words($ct);
Carp::carp("Multiple Content-Type headers") if @v > 1;
@v = @{$v[0]};
my $boundary;
my $boundary_index;
for (my @tmp = @v; @tmp;) {
my($k, $v) = splice(@tmp, 0, 2);
if ($k eq "boundary") {
$boundary = $v;
$boundary_index = @v - @tmp - 1;
last;
}
}
($content, $boundary) = form_data($content, $boundary, $req);
if ($boundary_index) {
$v[$boundary_index] = $boundary;
}
else {
push(@v, boundary => $boundary);
}
$ct = HTTP::Headers::Util::join_header_words(@v);
}
else {
# We use a temporary URI object to format
# the application/x-www-form-urlencoded content.
require URI;
my $url = URI->new('http:');
$url->query_form(ref($content) eq "HASH" ? %$content : @$content);
$content = $url->query;
}
}
$req->header('Content-Type' => $ct); # might be redundant
if (defined($content)) {
$req->header('Content-Length' =>
length($content)) unless ref($content);
$req->content($content);
}
else {
$req->header('Content-Length' => 0);
}
$req;
}
sub _simple_req
{
my($method, $url) = splice(@_, 0, 2);
my $req = HTTP::Request->new($method => $url);
my($k, $v);
my $content;
while (($k,$v) = splice(@_, 0, 2)) {
if (lc($k) eq 'content') {
$req->add_content($v);
$content++;
}
else {
$req->push_header($k, $v);
}
}
if ($content && !defined($req->header("Content-Length"))) {
$req->header("Content-Length", length(${$req->content_ref}));
}
$req;
}
sub form_data # RFC1867
{
my($data, $boundary, $req) = @_;
my @data = ref($data) eq "HASH" ? %$data : @$data; # copy
my $fhparts;
my @parts;
while (my ($k,$v) = splice(@data, 0, 2)) {
if (!ref($v)) {
$k =~ s/([\\\"])/\\$1/g; # escape quotes and backslashes
no warnings 'uninitialized';
push(@parts,
qq(Content-Disposition: form-data; name="$k"$CRLF$CRLF$v));
}
else {
my($file, $usename, @headers) = @$v;
unless (defined $usename) {
$usename = $file;
$usename = (File::Spec->splitpath($usename))[-1] if defined($usename);
}
$k =~ s/([\\\"])/\\$1/g;
my $disp = qq(form-data; name="$k");
if (defined($usename) and length($usename)) {
$usename =~ s/([\\\"])/\\$1/g;
$disp .= qq(; filename="$usename");
}
my $content = "";
my $h = HTTP::Headers->new(@headers);
if ($file) {
open(my $fh, "<", $file) or Carp::croak("Can't open file $file: $!");
binmode($fh);
if ($DYNAMIC_FILE_UPLOAD) {
# will read file later, close it now in order to
# not accumulate to many open file handles
close($fh);
$content = \$file;
}
else {
local($/) = undef; # slurp files
$content = <$fh>;
close($fh);
}
unless ($h->header("Content-Type")) {
require LWP::MediaTypes;
LWP::MediaTypes::guess_media_type($file, $h);
}
}
if ($h->header("Content-Disposition")) {
# just to get it sorted first
$disp = $h->header("Content-Disposition");
$h->remove_header("Content-Disposition");
}
if ($h->header("Content")) {
$content = $h->header("Content");
$h->remove_header("Content");
}
my $head = join($CRLF, "Content-Disposition: $disp",
$h->as_string($CRLF),
"");
if (ref $content) {
push(@parts, [$head, $$content]);
$fhparts++;
}
else {
push(@parts, $head . $content);
}
}
}
return ("", "none") unless @parts;
my $content;
if ($fhparts) {
$boundary = boundary(10) # hopefully enough randomness
unless $boundary;
# add the boundaries to the @parts array
for (1..@parts-1) {
splice(@parts, $_*2-1, 0, "$CRLF--$boundary$CRLF");
}
unshift(@parts, "--$boundary$CRLF");
push(@parts, "$CRLF--$boundary--$CRLF");
# See if we can generate Content-Length header
my $length = 0;
for (@parts) {
if (ref $_) {
my ($head, $f) = @$_;
my $file_size;
unless ( -f $f && ($file_size = -s _) ) {
# The file is either a dynamic file like /dev/audio
# or perhaps a file in the /proc file system where
# stat may return a 0 size even though reading it
# will produce data. So we cannot make
# a Content-Length header.
undef $length;
last;
}
$length += $file_size + length $head;
}
else {
$length += length;
}
}
$length && $req->header('Content-Length' => $length);
# set up a closure that will return content piecemeal
$content = sub {
for (;;) {
unless (@parts) {
defined $length && $length != 0 &&
Carp::croak "length of data sent did not match calculated Content-Length header. Probably because uploaded file changed in size during transfer.";
return;
}
my $p = shift @parts;
unless (ref $p) {
$p .= shift @parts while @parts && !ref($parts[0]);
defined $length && ($length -= length $p);
return $p;
}
my($buf, $fh) = @$p;
unless (ref($fh)) {
my $file = $fh;
undef($fh);
open($fh, "<", $file) || Carp::croak("Can't open file $file: $!");
binmode($fh);
}
my $buflength = length $buf;
my $n = read($fh, $buf, $READ_BUFFER_SIZE, $buflength);
if ($n) {
$buflength += $n;
unshift(@parts, ["", $fh]);
}
else {
close($fh);
}
if ($buflength) {
defined $length && ($length -= $buflength);
return $buf
}
}
};
}
else {
$boundary = boundary() unless $boundary;
my $bno = 0;
CHECK_BOUNDARY:
{
for (@parts) {
if (index($_, $boundary) >= 0) {
# must have a better boundary
$boundary = boundary(++$bno);
redo CHECK_BOUNDARY;
}
}
last;
}
$content = "--$boundary$CRLF" .
join("$CRLF--$boundary$CRLF", @parts) .
"$CRLF--$boundary--$CRLF";
}
wantarray ? ($content, $boundary) : $content;
}
sub boundary
{
my $size = shift || return "xYzZY";
require MIME::Base64;
my $b = MIME::Base64::encode(join("", map chr(rand(256)), 1..$size*3), "");
$b =~ s/[\W]/X/g; # ensure alnum only
$b;
}
1;
=pod
=encoding UTF-8
=head1 NAME
HTTP::Request::Common - Construct common HTTP::Request objects
=head1 VERSION
version 7.00
=head1 SYNOPSIS
use HTTP::Request::Common;
$ua = LWP::UserAgent->new;
$ua->request(GET 'http://www.sn.no/');
$ua->request(POST 'http://somewhere/foo', foo => bar, bar => foo);
$ua->request(PATCH 'http://somewhere/foo', foo => bar, bar => foo);
$ua->request(PUT 'http://somewhere/foo', foo => bar, bar => foo);
$ua->request(OPTIONS 'http://somewhere/foo', foo => bar, bar => foo);
=head1 DESCRIPTION
This module provides functions that return newly created C<HTTP::Request>
objects. These functions are usually more convenient to use than the
standard C<HTTP::Request> constructor for the most common requests.
Note that L<LWP::UserAgent> has several convenience methods, including
C<get>, C<head>, C<delete>, C<post> and C<put>.
The following functions are provided:
=over 4
=item GET $url
=item GET $url, Header => Value,...
The C<GET> function returns an L<HTTP::Request> object initialized with
the "GET" method and the specified URL. It is roughly equivalent to the
following call
HTTP::Request->new(
GET => $url,
HTTP::Headers->new(Header => Value,...),
)
but is less cluttered. What is different is that a header named
C<Content> will initialize the content part of the request instead of
setting a header field. Note that GET requests should normally not
have a content, so this hack makes more sense for the C<PUT>, C<PATCH>
and C<POST> functions described below.
The C<get(...)> method of L<LWP::UserAgent> exists as a shortcut for
C<< $ua->request(GET ...) >>.
=item HEAD $url
=item HEAD $url, Header => Value,...
Like GET() but the method in the request is "HEAD".
The C<head(...)> method of L<LWP::UserAgent> exists as a shortcut for
C<< $ua->request(HEAD ...) >>.
=item DELETE $url
=item DELETE $url, Header => Value,...
Like C<GET> but the method in the request is C<DELETE>. This function
is not exported by default.
=item PATCH $url
=item PATCH $url, Header => Value,...
=item PATCH $url, $form_ref, Header => Value,...
=item PATCH $url, Header => Value,..., Content => $form_ref
=item PATCH $url, Header => Value,..., Content => $content
The same as C<POST> below, but the method in the request is C<PATCH>.
=item PUT $url
=item PUT $url, Header => Value,...
=item PUT $url, $form_ref, Header => Value,...
=item PUT $url, Header => Value,..., Content => $form_ref
=item PUT $url, Header => Value,..., Content => $content
The same as C<POST> below, but the method in the request is C<PUT>
=item OPTIONS $url
=item OPTIONS $url, Header => Value,...
=item OPTIONS $url, $form_ref, Header => Value,...
=item OPTIONS $url, Header => Value,..., Content => $form_ref
=item OPTIONS $url, Header => Value,..., Content => $content
The same as C<POST> below, but the method in the request is C<OPTIONS>
This was added in version 6.21, so you should require that in your code:
use HTTP::Request::Common 6.21;
=item POST $url
=item POST $url, Header => Value,...
=item POST $url, $form_ref, Header => Value,...
=item POST $url, Header => Value,..., Content => $form_ref
=item POST $url, Header => Value,..., Content => $content
C<POST>, C<PATCH> and C<PUT> all work with the same parameters.
%data = ( title => 'something', body => something else' );
$ua = LWP::UserAgent->new();
$request = HTTP::Request::Common::POST( $url, [ %data ] );
$response = $ua->request($request);
They take a second optional array or hash reference
parameter C<$form_ref>. The content can also be specified
directly using the C<Content> pseudo-header, and you may also provide
the C<$form_ref> this way.
The C<Content> pseudo-header steals a bit of the header field namespace as
there is no way to directly specify a header that is actually called
"Content". If you really need this you must update the request
returned in a separate statement.
The C<$form_ref> argument can be used to pass key/value pairs for the
form content. By default we will initialize a request using the
C<application/x-www-form-urlencoded> content type. This means that
you can emulate an HTML E<lt>form> POSTing like this:
POST 'http://www.perl.org/survey.cgi',
[ name => 'Gisle Aas',
email => 'gisle@aas.no',
gender => 'M',
born => '1964',
perc => '3%',
];
This will create an L<HTTP::Request> object that looks like this:
POST http://www.perl.org/survey.cgi
Content-Length: 66
Content-Type: application/x-www-form-urlencoded
name=Gisle%20Aas&email=gisle%40aas.no&gender=M&born=1964&perc=3%25
Multivalued form fields can be specified by either repeating the field
name or by passing the value as an array reference.
The POST method also supports the C<multipart/form-data> content used
for I<Form-based File Upload> as specified in RFC 1867. You trigger
this content format by specifying a content type of C<'form-data'> as
one of the request headers. If one of the values in the C<$form_ref> is
an array reference, then it is treated as a file part specification
with the following interpretation:
[ $file, $filename, Header => Value... ]
[ undef, $filename, Header => Value,..., Content => $content ]
The first value in the array ($file) is the name of a file to open.
This file will be read and its content placed in the request. The
routine will croak if the file can't be opened. Use an C<undef> as
$file value if you want to specify the content directly with a
C<Content> header. The $filename is the filename to report in the
request. If this value is undefined, then the basename of the $file
will be used. You can specify an empty string as $filename if you
want to suppress sending the filename when you provide a $file value.
If a $file is provided by no C<Content-Type> header, then C<Content-Type>
and C<Content-Encoding> will be filled in automatically with the values
returned by C<LWP::MediaTypes::guess_media_type()>
Sending my F<~/.profile> to the survey used as example above can be
achieved by this:
POST 'http://www.perl.org/survey.cgi',
Content_Type => 'form-data',
Content => [ name => 'Gisle Aas',
email => 'gisle@aas.no',
gender => 'M',
born => '1964',
init => ["$ENV{HOME}/.profile"],
]
This will create an L<HTTP::Request> object that almost looks this (the
boundary and the content of your F<~/.profile> is likely to be
different):
POST http://www.perl.org/survey.cgi
Content-Length: 388
Content-Type: multipart/form-data; boundary="6G+f"
--6G+f
Content-Disposition: form-data; name="name"
Gisle Aas
--6G+f
Content-Disposition: form-data; name="email"
gisle@aas.no
--6G+f
Content-Disposition: form-data; name="gender"
M
--6G+f
Content-Disposition: form-data; name="born"
1964
--6G+f
Content-Disposition: form-data; name="init"; filename=".profile"
Content-Type: text/plain
PATH=/local/perl/bin:$PATH
export PATH
--6G+f--
If you set the C<$DYNAMIC_FILE_UPLOAD> variable (exportable) to some TRUE
value, then you get back a request object with a subroutine closure as
the content attribute. This subroutine will read the content of any
files on demand and return it in suitable chunks. This allow you to
upload arbitrary big files without using lots of memory. You can even
upload infinite files like F</dev/audio> if you wish; however, if
the file is not a plain file, there will be no C<Content-Length> header
defined for the request. Not all servers (or server
applications) like this. Also, if the file(s) change in size between
the time the C<Content-Length> is calculated and the time that the last
chunk is delivered, the subroutine will C<Croak>.
The C<post(...)> method of L<LWP::UserAgent> exists as a shortcut for
C<< $ua->request(POST ...) >>.
=back
=head1 SEE ALSO
L<HTTP::Request>, L<LWP::UserAgent>
Also, there are some examples in L<HTTP::Request/"EXAMPLES"> that you might
find useful. For example, batch requests are explained there.
=head1 AUTHOR
Gisle Aas <gisle@activestate.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 1994 by Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
__END__
#ABSTRACT: Construct common HTTP::Request objects

View File

@@ -0,0 +1,671 @@
package HTTP::Response;
use strict;
use warnings;
our $VERSION = '7.00';
use parent 'HTTP::Message';
use HTTP::Status ();
sub new
{
my($class, $rc, $msg, $header, $content) = @_;
my $self = $class->SUPER::new($header, $content);
$self->code($rc);
$self->message($msg);
$self;
}
sub parse
{
my($class, $str) = @_;
Carp::carp('Undefined argument to parse()') if $^W && ! defined $str;
my $status_line;
if (defined $str && $str =~ s/^(.*)\n//) {
$status_line = $1;
}
else {
$status_line = $str;
$str = "";
}
$status_line =~ s/\r\z// if defined $status_line;
my $self = $class->SUPER::parse($str);
if (defined $status_line) {
my($protocol, $code, $message);
if ($status_line =~ /^\d{3} /) {
# Looks like a response created by HTTP::Response->new
($code, $message) = split(' ', $status_line, 2);
} else {
($protocol, $code, $message) = split(' ', $status_line, 3);
}
$self->protocol($protocol) if $protocol;
$self->code($code) if defined($code);
$self->message($message) if defined($message);
}
$self;
}
sub clone
{
my $self = shift;
my $clone = bless $self->SUPER::clone, ref($self);
$clone->code($self->code);
$clone->message($self->message);
$clone->request($self->request->clone) if $self->request;
# we don't clone previous
$clone;
}
sub code { shift->_elem('_rc', @_); }
sub message { shift->_elem('_msg', @_); }
sub previous { shift->_elem('_previous',@_); }
sub request { shift->_elem('_request', @_); }
sub status_line
{
my $self = shift;
my $code = $self->{'_rc'} || "000";
my $mess = $self->{'_msg'} || HTTP::Status::status_message($code) || "Unknown code";
return "$code $mess";
}
sub base
{
my $self = shift;
my $base = (
$self->header('Content-Base'), # used to be HTTP/1.1
$self->header('Base'), # HTTP/1.0
)[0];
if ($base && $base =~ /^$URI::scheme_re:/o) {
# already absolute
return $HTTP::URI_CLASS->new($base);
}
my $req = $self->request;
if ($req) {
# if $base is undef here, the return value is effectively
# just a copy of $self->request->uri.
return $HTTP::URI_CLASS->new_abs($base, $req->uri);
}
# can't find an absolute base
return undef;
}
sub redirects {
my $self = shift;
my @r;
my $r = $self;
while (my $p = $r->previous) {
push(@r, $p);
$r = $p;
}
return @r unless wantarray;
return reverse @r;
}
sub filename
{
my $self = shift;
my $file;
my $cd = $self->header('Content-Disposition');
if ($cd) {
require HTTP::Headers::Util;
if (my @cd = HTTP::Headers::Util::split_header_words($cd)) {
my ($disposition, undef, %cd_param) = @{$cd[-1]};
$file = $cd_param{filename};
# RFC 2047 encoded?
if ($file && $file =~ /^=\?(.+?)\?(.+?)\?(.+)\?=$/) {
my $charset = $1;
my $encoding = uc($2);
my $encfile = $3;
if ($encoding eq 'Q' || $encoding eq 'B') {
local($SIG{__DIE__});
eval {
if ($encoding eq 'Q') {
$encfile =~ s/_/ /g;
require MIME::QuotedPrint;
$encfile = MIME::QuotedPrint::decode($encfile);
}
else { # $encoding eq 'B'
require MIME::Base64;
$encfile = MIME::Base64::decode($encfile);
}
require Encode;
require Encode::Locale;
Encode::from_to($encfile, $charset, "locale_fs");
};
$file = $encfile unless $@;
}
}
}
}
unless (defined($file) && length($file)) {
my $uri;
if (my $cl = $self->header('Content-Location')) {
$uri = URI->new($cl);
}
elsif (my $request = $self->request) {
$uri = $request->uri;
}
if ($uri) {
$file = ($uri->path_segments)[-1];
}
}
if ($file) {
$file =~ s,.*[\\/],,; # basename
}
if ($file && !length($file)) {
$file = undef;
}
$file;
}
sub as_string
{
my $self = shift;
my($eol) = @_;
$eol = "\n" unless defined $eol;
my $status_line = $self->status_line;
my $proto = $self->protocol;
$status_line = "$proto $status_line" if $proto;
return join($eol, $status_line, $self->SUPER::as_string(@_));
}
sub dump
{
my $self = shift;
my $status_line = $self->status_line;
my $proto = $self->protocol;
$status_line = "$proto $status_line" if $proto;
return $self->SUPER::dump(
preheader => $status_line,
@_,
);
}
sub is_info { HTTP::Status::is_info (shift->{'_rc'}); }
sub is_success { HTTP::Status::is_success (shift->{'_rc'}); }
sub is_redirect { HTTP::Status::is_redirect (shift->{'_rc'}); }
sub is_error { HTTP::Status::is_error (shift->{'_rc'}); }
sub is_client_error { HTTP::Status::is_client_error (shift->{'_rc'}); }
sub is_server_error { HTTP::Status::is_server_error (shift->{'_rc'}); }
sub error_as_HTML
{
my $self = shift;
my $title = 'An Error Occurred';
my $body = $self->status_line;
$body =~ s/&/&amp;/g;
$body =~ s/</&lt;/g;
return <<EOM;
<html>
<head><title>$title</title></head>
<body>
<h1>$title</h1>
<p>$body</p>
</body>
</html>
EOM
}
sub current_age
{
my $self = shift;
my $time = shift;
# Implementation of RFC 2616 section 13.2.3
# (age calculations)
my $response_time = $self->client_date;
my $date = $self->date;
my $age = 0;
if ($response_time && $date) {
$age = $response_time - $date; # apparent_age
$age = 0 if $age < 0;
}
my $age_v = $self->header('Age');
if ($age_v && $age_v > $age) {
$age = $age_v; # corrected_received_age
}
if ($response_time) {
my $request = $self->request;
if ($request) {
my $request_time = $request->date;
if ($request_time && $request_time < $response_time) {
# Add response_delay to age to get 'corrected_initial_age'
$age += $response_time - $request_time;
}
}
$age += ($time || time) - $response_time;
}
return $age;
}
sub freshness_lifetime
{
my($self, %opt) = @_;
# First look for the Cache-Control: max-age=n header
for my $cc ($self->header('Cache-Control')) {
for my $cc_dir (split(/\s*,\s*/, $cc)) {
return $1 if $cc_dir =~ /^max-age\s*=\s*(\d+)/i;
}
}
# Next possibility is to look at the "Expires" header
my $date = $self->date || $self->client_date || $opt{time} || time;
if (my $expires = $self->expires) {
return $expires - $date;
}
# Must apply heuristic expiration
return undef if exists $opt{heuristic_expiry} && !$opt{heuristic_expiry};
# Default heuristic expiration parameters
$opt{h_min} ||= 60;
$opt{h_max} ||= 24 * 3600;
$opt{h_lastmod_fraction} ||= 0.10; # 10% since last-mod suggested by RFC2616
$opt{h_default} ||= 3600;
# Should give a warning if more than 24 hours according to
# RFC 2616 section 13.2.4. Here we just make this the default
# maximum value.
if (my $last_modified = $self->last_modified) {
my $h_exp = ($date - $last_modified) * $opt{h_lastmod_fraction};
return $opt{h_min} if $h_exp < $opt{h_min};
return $opt{h_max} if $h_exp > $opt{h_max};
return $h_exp;
}
# default when all else fails
return $opt{h_min} if $opt{h_min} > $opt{h_default};
return $opt{h_default};
}
sub is_fresh
{
my($self, %opt) = @_;
$opt{time} ||= time;
my $f = $self->freshness_lifetime(%opt);
return undef unless defined($f);
return $f > $self->current_age($opt{time});
}
sub fresh_until
{
my($self, %opt) = @_;
$opt{time} ||= time;
my $f = $self->freshness_lifetime(%opt);
return undef unless defined($f);
return $f - $self->current_age($opt{time}) + $opt{time};
}
1;
=pod
=encoding UTF-8
=head1 NAME
HTTP::Response - HTTP style response message
=head1 VERSION
version 7.00
=head1 SYNOPSIS
Response objects are returned by the request() method of the C<LWP::UserAgent>:
# ...
$response = $ua->request($request);
if ($response->is_success) {
print $response->decoded_content;
}
else {
print STDERR $response->status_line, "\n";
}
=head1 DESCRIPTION
The C<HTTP::Response> class encapsulates HTTP style responses. A
response consists of a response line, some headers, and a content
body. Note that the LWP library uses HTTP style responses even for
non-HTTP protocol schemes. Instances of this class are usually
created and returned by the request() method of an C<LWP::UserAgent>
object.
C<HTTP::Response> is a subclass of C<HTTP::Message> and therefore
inherits its methods. The following additional methods are available:
=over 4
=item $r = HTTP::Response->new( $code )
=item $r = HTTP::Response->new( $code, $msg )
=item $r = HTTP::Response->new( $code, $msg, $header )
=item $r = HTTP::Response->new( $code, $msg, $header, $content )
Constructs a new C<HTTP::Response> object describing a response with
response code $code and optional message $msg. The optional $header
argument should be a reference to an C<HTTP::Headers> object or a
plain array reference of key/value pairs. The optional $content
argument should be a string of bytes. The meanings of these arguments are
described below.
=item $r = HTTP::Response->parse( $str )
This constructs a new response object by parsing the given string.
=item $r->code
=item $r->code( $code )
This is used to get/set the code attribute. The code is a 3 digit
number that encode the overall outcome of an HTTP response. The
C<HTTP::Status> module provide constants that provide mnemonic names
for the code attribute.
=item $r->message
=item $r->message( $message )
This is used to get/set the message attribute. The message is a short
human readable single line string that explains the response code.
=item $r->header( $field )
=item $r->header( $field => $value )
This is used to get/set header values and it is inherited from
C<HTTP::Headers> via C<HTTP::Message>. See L<HTTP::Headers> for
details and other similar methods that can be used to access the
headers.
=item $r->content
=item $r->content( $bytes )
This is used to get/set the raw content and it is inherited from the
C<HTTP::Message> base class. See L<HTTP::Message> for details and
other methods that can be used to access the content.
=item $r->decoded_content( %options )
This will return the content after any C<Content-Encoding> and
charsets have been decoded. See L<HTTP::Message> for details.
=item $r->request
=item $r->request( $request )
This is used to get/set the request attribute. The request attribute
is a reference to the request that caused this response. It does
not have to be the same request passed to the $ua->request() method,
because there might have been redirects and authorization retries in
between.
=item $r->previous
=item $r->previous( $response )
This is used to get/set the previous attribute. The previous
attribute is used to link together chains of responses. You get
chains of responses if the first response is redirect or unauthorized.
The value is C<undef> if this is the first response in a chain.
Note that the method $r->redirects is provided as a more convenient
way to access the response chain.
=item $r->status_line
Returns the string "E<lt>code> E<lt>message>". If the message attribute
is not set then the official name of E<lt>code> (see L<HTTP::Status>)
is substituted.
=item $r->base
Returns the base URI for this response. The return value will be a
reference to a URI object.
The base URI is obtained from one the following sources (in priority
order):
=over 4
=item 1.
Embedded in the document content, for instance <BASE HREF="...">
in HTML documents.
=item 2.
A "Content-Base:" header in the response.
For backwards compatibility with older HTTP implementations we will
also look for the "Base:" header.
=item 3.
The URI used to request this response. This might not be the original
URI that was passed to $ua->request() method, because we might have
received some redirect responses first.
=back
If none of these sources provide an absolute URI, undef is returned.
B<Note>: previous versions of HTTP::Response would also consider
a "Content-Location:" header,
as L<RFC 2616|https://www.rfc-editor.org/rfc/rfc2616> said it should be.
But this was never widely implemented by browsers,
and now L<RFC 7231|https://www.rfc-editor.org/rfc/rfc7231>
says it should no longer be considered.
When the LWP protocol modules produce the HTTP::Response object, then any base
URI embedded in the document (step 1) will already have initialized the
"Content-Base:" header. (See L<LWP::UserAgent/parse_head>). This means that
this method only performs the last 2 steps (the content is not always available
either).
=item $r->filename
Returns a filename for this response. Note that doing sanity checks
on the returned filename (eg. removing characters that cannot be used
on the target filesystem where the filename would be used, and
laundering it for security purposes) are the caller's responsibility;
the only related thing done by this method is that it makes a simple
attempt to return a plain filename with no preceding path segments.
The filename is obtained from one the following sources (in priority
order):
=over 4
=item 1.
A "Content-Disposition:" header in the response. Proper decoding of
RFC 2047 encoded filenames requires the C<MIME::QuotedPrint> (for "Q"
encoding), C<MIME::Base64> (for "B" encoding), and C<Encode> modules.
=item 2.
A "Content-Location:" header in the response.
=item 3.
The URI used to request this response. This might not be the original
URI that was passed to $ua->request() method, because we might have
received some redirect responses first.
=back
If a filename cannot be derived from any of these sources, undef is
returned.
=item $r->as_string
=item $r->as_string( $eol )
Returns a textual representation of the response.
=item $r->is_info
=item $r->is_success
=item $r->is_redirect
=item $r->is_error
=item $r->is_client_error
=item $r->is_server_error
These methods indicate if the response was informational, successful, a
redirection, or an error. See L<HTTP::Status> for the meaning of these.
=item $r->error_as_HTML
Returns a string containing a complete HTML document indicating what
error occurred. This method should only be called when $r->is_error
is TRUE.
=item $r->redirects
Returns the list of redirect responses that lead up to this response
by following the $r->previous chain. The list order is oldest first.
In scalar context return the number of redirect responses leading up
to this one.
=item $r->current_age
Calculates the "current age" of the response as specified by RFC 2616
section 13.2.3. The age of a response is the time since it was sent
by the origin server. The returned value is a number representing the
age in seconds.
=item $r->freshness_lifetime( %opt )
Calculates the "freshness lifetime" of the response as specified by
RFC 2616 section 13.2.4. The "freshness lifetime" is the length of
time between the generation of a response and its expiration time.
The returned value is the number of seconds until expiry.
If the response does not contain an "Expires" or a "Cache-Control"
header, then this function will apply some simple heuristic based on
the "Last-Modified" header to determine a suitable lifetime. The
following options might be passed to control the heuristics:
=over
=item heuristic_expiry => $bool
If passed as a FALSE value, don't apply heuristics and just return
C<undef> when "Expires" or "Cache-Control" is lacking.
=item h_lastmod_fraction => $num
This number represent the fraction of the difference since the
"Last-Modified" timestamp to make the expiry time. The default is
C<0.10>, the suggested typical setting of 10% in RFC 2616.
=item h_min => $sec
This is the lower limit of the heuristic expiry age to use. The
default is C<60> (1 minute).
=item h_max => $sec
This is the upper limit of the heuristic expiry age to use. The
default is C<86400> (24 hours).
=item h_default => $sec
This is the expiry age to use when nothing else applies. The default
is C<3600> (1 hour) or "h_min" if greater.
=back
=item $r->is_fresh( %opt )
Returns TRUE if the response is fresh, based on the values of
freshness_lifetime() and current_age(). If the response is no longer
fresh, then it has to be re-fetched or re-validated by the origin
server.
Options might be passed to control expiry heuristics, see the
description of freshness_lifetime().
=item $r->fresh_until( %opt )
Returns the time (seconds since epoch) when this entity is no longer fresh.
Options might be passed to control expiry heuristics, see the
description of freshness_lifetime().
=back
=head1 SEE ALSO
L<HTTP::Headers>, L<HTTP::Message>, L<HTTP::Status>, L<HTTP::Request>
=head1 AUTHOR
Gisle Aas <gisle@activestate.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 1994 by Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
__END__
#ABSTRACT: HTTP style response message

View File

@@ -0,0 +1,389 @@
package HTTP::Status;
use strict;
use warnings;
our $VERSION = '7.00';
use Exporter 5.57 'import';
our @EXPORT = qw(is_info is_success is_redirect is_error status_message);
our @EXPORT_OK = qw(is_client_error is_server_error is_cacheable_by_default status_constant_name status_codes);
# Note also addition of mnemonics to @EXPORT below
# Unmarked codes are from RFC 7231 (2017-12-20)
# See also:
# https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml
my %StatusCode = (
100 => 'Continue',
101 => 'Switching Protocols',
102 => 'Processing', # RFC 2518: WebDAV
103 => 'Early Hints', # RFC 8297: Indicating Hints
# 104 .. 199
200 => 'OK',
201 => 'Created',
202 => 'Accepted',
203 => 'Non-Authoritative Information',
204 => 'No Content',
205 => 'Reset Content',
206 => 'Partial Content', # RFC 7233: Range Requests
207 => 'Multi-Status', # RFC 4918: WebDAV
208 => 'Already Reported', # RFC 5842: WebDAV bindings
# 209 .. 225
226 => 'IM Used', # RFC 3229: Delta encoding
# 227 .. 299
300 => 'Multiple Choices',
301 => 'Moved Permanently',
302 => 'Found',
303 => 'See Other',
304 => 'Not Modified', # RFC 7232: Conditional Request
305 => 'Use Proxy',
306 => '(Unused)', # RFC 9110: Previously used and reserved
307 => 'Temporary Redirect',
308 => 'Permanent Redirect', # RFC 7528: Permanent Redirect
# 309 .. 399
400 => 'Bad Request',
401 => 'Unauthorized', # RFC 7235: Authentication
402 => 'Payment Required',
403 => 'Forbidden',
404 => 'Not Found',
405 => 'Method Not Allowed',
406 => 'Not Acceptable',
407 => 'Proxy Authentication Required', # RFC 7235: Authentication
408 => 'Request Timeout',
409 => 'Conflict',
410 => 'Gone',
411 => 'Length Required',
412 => 'Precondition Failed', # RFC 7232: Conditional Request
413 => 'Content Too Large',
414 => 'URI Too Long',
415 => 'Unsupported Media Type',
416 => 'Range Not Satisfiable', # RFC 7233: Range Requests
417 => 'Expectation Failed',
418 => "I'm a teapot", # RFC 2324: RFC9110 reserved it
# 419 .. 420
421 => 'Misdirected Request', # RFC 7540: HTTP/2
422 => 'Unprocessable Content', # RFC 9110: WebDAV
423 => 'Locked', # RFC 4918: WebDAV
424 => 'Failed Dependency', # RFC 4918: WebDAV
425 => 'Too Early', # RFC 8470: Using Early Data in HTTP
426 => 'Upgrade Required',
# 427
428 => 'Precondition Required', # RFC 6585: Additional Codes
429 => 'Too Many Requests', # RFC 6585: Additional Codes
# 430
431 => 'Request Header Fields Too Large', # RFC 6585: Additional Codes
# 432 .. 450
451 => 'Unavailable For Legal Reasons', # RFC 7725: Legal Obstacles
# 452 .. 499
500 => 'Internal Server Error',
501 => 'Not Implemented',
502 => 'Bad Gateway',
503 => 'Service Unavailable',
504 => 'Gateway Timeout',
505 => 'HTTP Version Not Supported',
506 => 'Variant Also Negotiates', # RFC 2295: Transparant Ngttn
507 => 'Insufficient Storage', # RFC 4918: WebDAV
508 => 'Loop Detected', # RFC 5842: WebDAV bindings
# 509
510 => 'Not Extended', # RFC 2774: Extension Framework
511 => 'Network Authentication Required', # RFC 6585: Additional Codes
# Keep some unofficial codes that used to be in this distribution
449 => 'Retry with', # microsoft
509 => 'Bandwidth Limit Exceeded', # Apache / cPanel
);
my %StatusCodeName;
my $mnemonicCode = '';
my ($code, $message);
while (($code, $message) = each %StatusCode) {
next if $message eq '(Unused)';
# create mnemonic subroutines
$message =~ s/I'm/I am/;
$message =~ tr/a-z \-/A-Z__/;
my $constant_name = "HTTP_".$message;
$mnemonicCode .= "sub $constant_name () { $code }\n";
$mnemonicCode .= "*RC_$message = \\&HTTP_$message;\n"; # legacy
$mnemonicCode .= "push(\@EXPORT_OK, 'HTTP_$message');\n";
$mnemonicCode .= "push(\@EXPORT, 'RC_$message');\n";
$StatusCodeName{$code} = $constant_name
}
eval $mnemonicCode; # only one eval for speed
die if $@;
# backwards compatibility
*RC_MOVED_TEMPORARILY = \&RC_FOUND; # 302 was renamed in the standard
push(@EXPORT, "RC_MOVED_TEMPORARILY");
my %compat = (
UNPROCESSABLE_ENTITY => \&HTTP_UNPROCESSABLE_CONTENT,
PAYLOAD_TOO_LARGE => \&HTTP_CONTENT_TOO_LARGE,
REQUEST_ENTITY_TOO_LARGE => \&HTTP_CONTENT_TOO_LARGE,
REQUEST_URI_TOO_LARGE => \&HTTP_URI_TOO_LONG,
REQUEST_RANGE_NOT_SATISFIABLE => \&HTTP_RANGE_NOT_SATISFIABLE,
NO_CODE => \&HTTP_TOO_EARLY,
UNORDERED_COLLECTION => \&HTTP_TOO_EARLY,
);
foreach my $name (keys %compat) {
push(@EXPORT, "RC_$name");
push(@EXPORT_OK, "HTTP_$name");
no strict 'refs';
*{"RC_$name"} = $compat{$name};
*{"HTTP_$name"} = $compat{$name};
}
our %EXPORT_TAGS = (
constants => [grep /^HTTP_/, @EXPORT_OK],
is => [grep /^is_/, @EXPORT, @EXPORT_OK],
);
sub status_message ($) { $StatusCode{$_[0]}; }
sub status_constant_name ($) {
exists($StatusCodeName{$_[0]}) ? $StatusCodeName{$_[0]} : undef;
}
sub is_info ($) { $_[0] && $_[0] >= 100 && $_[0] < 200; }
sub is_success ($) { $_[0] && $_[0] >= 200 && $_[0] < 300; }
sub is_redirect ($) { $_[0] && $_[0] >= 300 && $_[0] < 400; }
sub is_error ($) { $_[0] && $_[0] >= 400 && $_[0] < 600; }
sub is_client_error ($) { $_[0] && $_[0] >= 400 && $_[0] < 500; }
sub is_server_error ($) { $_[0] && $_[0] >= 500 && $_[0] < 600; }
sub is_cacheable_by_default ($) { $_[0] && ( $_[0] == 200 # OK
|| $_[0] == 203 # Non-Authoritative Information
|| $_[0] == 204 # No Content
|| $_[0] == 206 # Not Acceptable
|| $_[0] == 300 # Multiple Choices
|| $_[0] == 301 # Moved Permanently
|| $_[0] == 308 # Permanent Redirect
|| $_[0] == 404 # Not Found
|| $_[0] == 405 # Method Not Allowed
|| $_[0] == 410 # Gone
|| $_[0] == 414 # Request-URI Too Large
|| $_[0] == 451 # Unavailable For Legal Reasons
|| $_[0] == 501 # Not Implemented
);
}
sub status_codes { %StatusCode; }
1;
=pod
=encoding UTF-8
=head1 NAME
HTTP::Status - HTTP Status code processing
=head1 VERSION
version 7.00
=head1 SYNOPSIS
use HTTP::Status qw(:constants :is status_message);
if ($rc != HTTP_OK) {
print status_message($rc), "\n";
}
if (is_success($rc)) { ... }
if (is_error($rc)) { ... }
if (is_redirect($rc)) { ... }
=head1 DESCRIPTION
I<HTTP::Status> is a library of routines for defining and
classifying HTTP status codes for libwww-perl. Status codes are
used to encode the overall outcome of an HTTP response message. Codes
correspond to those defined in RFC 2616 and RFC 2518.
=head1 CONSTANTS
The following constant functions can be used as mnemonic status code
names. None of these are exported by default. Use the C<:constants>
tag to import them all.
HTTP_CONTINUE (100)
HTTP_SWITCHING_PROTOCOLS (101)
HTTP_PROCESSING (102)
HTTP_EARLY_HINTS (103)
HTTP_OK (200)
HTTP_CREATED (201)
HTTP_ACCEPTED (202)
HTTP_NON_AUTHORITATIVE_INFORMATION (203)
HTTP_NO_CONTENT (204)
HTTP_RESET_CONTENT (205)
HTTP_PARTIAL_CONTENT (206)
HTTP_MULTI_STATUS (207)
HTTP_ALREADY_REPORTED (208)
HTTP_IM_USED (226)
HTTP_MULTIPLE_CHOICES (300)
HTTP_MOVED_PERMANENTLY (301)
HTTP_FOUND (302)
HTTP_SEE_OTHER (303)
HTTP_NOT_MODIFIED (304)
HTTP_USE_PROXY (305)
HTTP_TEMPORARY_REDIRECT (307)
HTTP_PERMANENT_REDIRECT (308)
HTTP_BAD_REQUEST (400)
HTTP_UNAUTHORIZED (401)
HTTP_PAYMENT_REQUIRED (402)
HTTP_FORBIDDEN (403)
HTTP_NOT_FOUND (404)
HTTP_METHOD_NOT_ALLOWED (405)
HTTP_NOT_ACCEPTABLE (406)
HTTP_PROXY_AUTHENTICATION_REQUIRED (407)
HTTP_REQUEST_TIMEOUT (408)
HTTP_CONFLICT (409)
HTTP_GONE (410)
HTTP_LENGTH_REQUIRED (411)
HTTP_PRECONDITION_FAILED (412)
HTTP_CONTENT_TOO_LARGE (413)
HTTP_URI_TOO_LONG (414)
HTTP_UNSUPPORTED_MEDIA_TYPE (415)
HTTP_RANGE_NOT_SATISFIABLE (416)
HTTP_EXPECTATION_FAILED (417)
HTTP_MISDIRECTED REQUEST (421)
HTTP_UNPROCESSABLE_CONTENT (422)
HTTP_LOCKED (423)
HTTP_FAILED_DEPENDENCY (424)
HTTP_TOO_EARLY (425)
HTTP_UPGRADE_REQUIRED (426)
HTTP_PRECONDITION_REQUIRED (428)
HTTP_TOO_MANY_REQUESTS (429)
HTTP_REQUEST_HEADER_FIELDS_TOO_LARGE (431)
HTTP_UNAVAILABLE_FOR_LEGAL_REASONS (451)
HTTP_INTERNAL_SERVER_ERROR (500)
HTTP_NOT_IMPLEMENTED (501)
HTTP_BAD_GATEWAY (502)
HTTP_SERVICE_UNAVAILABLE (503)
HTTP_GATEWAY_TIMEOUT (504)
HTTP_HTTP_VERSION_NOT_SUPPORTED (505)
HTTP_VARIANT_ALSO_NEGOTIATES (506)
HTTP_INSUFFICIENT_STORAGE (507)
HTTP_LOOP_DETECTED (508)
HTTP_NOT_EXTENDED (510)
HTTP_NETWORK_AUTHENTICATION_REQUIRED (511)
=head1 FUNCTIONS
The following additional functions are provided. Most of them are
exported by default. The C<:is> import tag can be used to import all
the classification functions.
=over 4
=item status_message( $code )
The status_message() function will translate status codes to human
readable strings. The string is the same as found in the constant
names above.
For example, C<status_message(303)> will return C<"Not Found">.
If the $code is not registered in the L<list of IANA HTTP Status
Codes|https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml>
then C<undef> is returned.
=item status_constant_name( $code )
The status_constant_name() function will translate a status code
to a string which has the name of the constant for that status code.
For example, C<status_constant_name(404)> will return C<"HTTP_NOT_FOUND">.
If the C<$code> is not registered in the L<list of IANA HTTP Status
Codes|https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml>
then C<undef> is returned.
=item is_info( $code )
Return TRUE if C<$code> is an I<Informational> status code (1xx). This
class of status code indicates a provisional response which can't have
any content.
=item is_success( $code )
Return TRUE if C<$code> is a I<Successful> status code (2xx).
=item is_redirect( $code )
Return TRUE if C<$code> is a I<Redirection> status code (3xx). This class of
status code indicates that further action needs to be taken by the
user agent in order to fulfill the request.
=item is_error( $code )
Return TRUE if C<$code> is an I<Error> status code (4xx or 5xx). The function
returns TRUE for both client and server error status codes.
=item is_client_error( $code )
Return TRUE if C<$code> is a I<Client Error> status code (4xx). This class
of status code is intended for cases in which the client seems to have
erred.
This function is B<not> exported by default.
=item is_server_error( $code )
Return TRUE if C<$code> is a I<Server Error> status code (5xx). This class
of status codes is intended for cases in which the server is aware
that it has erred or is incapable of performing the request.
This function is B<not> exported by default.
=item is_cacheable_by_default( $code )
Return TRUE if C<$code> indicates that a response is cacheable by default, and
it can be reused by a cache with heuristic expiration. All other status codes
are not cacheable by default. See L<RFC 7231 - HTTP/1.1 Semantics and Content,
Section 6.1. Overview of Status Codes|https://tools.ietf.org/html/rfc7231#section-6.1>.
This function is B<not> exported by default.
=item status_codes
Returns a hash mapping numerical HTTP status code (e.g. 200) to text status messages (e.g. "OK")
This function is B<not> exported by default.
=back
=head1 SEE ALSO
L<IANA HTTP Status Codes|https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml>
=head1 BUGS
For legacy reasons all the C<HTTP_> constants are exported by default
with the prefix C<RC_>. It's recommended to use explicit imports and
the C<:constants> tag instead of relying on this.
=head1 AUTHOR
Gisle Aas <gisle@activestate.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 1994 by Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
__END__
#ABSTRACT: HTTP Status code processing