made the pack completely portable and wrote relevent bat files to go with it

This commit is contained in:
Draqoken
2025-04-09 17:04:56 +03:00
parent 5e77d7e9cf
commit 5e4144c3c0
7417 changed files with 2181044 additions and 19 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,2 @@
#!/bin/sh
exec "/usr/bin/gpg-wks-client" "$@"

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,24 @@
#!/bin/sh
#set -vx
# At this time, while this script is trivial, we ignore any parameters given.
# However, for backwards compatibility reasons, future versions of this script must
# support the syntax "update-ca-trust extract" trigger the generation of output
# files in $DEST.
DEST=/etc/pki/ca-trust/extracted
# OpenSSL PEM bundle that includes trust flags
# (BEGIN TRUSTED CERTIFICATE)
/usr/bin/p11-kit extract --format=openssl-bundle --filter=certificates --overwrite --comment $DEST/openssl/ca-bundle.trust.crt
/usr/bin/p11-kit extract --format=pem-bundle --filter=ca-anchors --overwrite --comment --purpose server-auth $DEST/pem/tls-ca-bundle.pem
/usr/bin/p11-kit extract --format=pem-bundle --filter=ca-anchors --overwrite --comment --purpose email $DEST/pem/email-ca-bundle.pem
/usr/bin/p11-kit extract --format=pem-bundle --filter=ca-anchors --overwrite --comment --purpose code-signing $DEST/pem/objsign-ca-bundle.pem
/usr/bin/p11-kit extract --format=java-cacerts --filter=ca-anchors --overwrite --purpose server-auth $DEST/java/cacerts
# The usual symbolic links are not present to keep these file in sync in MSYS2, so copying is necessary
mkdir -p /usr/ssl/certs
cp -f $DEST/pem/tls-ca-bundle.pem /usr/ssl/certs/ca-bundle.crt
cp -f $DEST/pem/tls-ca-bundle.pem /usr/ssl/cert.pem
cp -f $DEST/openssl/ca-bundle.trust.crt /usr/ssl/certs/ca-bundle.trust.crt

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,217 @@
package B::Showlex;
our $VERSION = '1.05';
use strict;
use B qw(svref_2object comppadlist class);
use B::Terse ();
use B::Concise ();
#
# Invoke as
# perl -MO=Showlex,foo bar.pl
# to see the names of lexical variables used by &foo
# or as
# perl -MO=Showlex bar.pl
# to see the names of file scope lexicals used by bar.pl
#
# borrowed from B::Concise
our $walkHandle = \*STDOUT;
sub walk_output { # updates $walkHandle
$walkHandle = B::Concise::walk_output(@_);
#print "got $walkHandle";
#print $walkHandle "using it";
$walkHandle;
}
sub shownamearray {
my ($name, $av) = @_;
my @els = $av->ARRAY;
my $count = @els;
my $i;
print $walkHandle "$name has $count entries\n";
for ($i = 0; $i < $count; $i++) {
my $sv = $els[$i];
if (class($sv) ne "SPECIAL") {
printf $walkHandle "$i: (0x%lx) %s\n",
$$sv, $sv->PVX // "undef" || "const";
} else {
printf $walkHandle "$i: %s\n", $sv->terse;
#printf $walkHandle "$i: %s\n", B::Concise::concise_sv($sv);
}
}
}
sub showvaluearray {
my ($name, $av) = @_;
my @els = $av->ARRAY;
my $count = @els;
my $i;
print $walkHandle "$name has $count entries\n";
for ($i = 0; $i < $count; $i++) {
printf $walkHandle "$i: %s\n", $els[$i]->terse;
#print $walkHandle "$i: %s\n", B::Concise::concise_sv($els[$i]);
}
}
sub showlex {
my ($objname, $namesav, $valsav) = @_;
shownamearray("Pad of lexical names for $objname", $namesav);
showvaluearray("Pad of lexical values for $objname", $valsav);
}
my ($newlex, $nosp1); # rendering state vars
sub padname_terse {
my $name = shift;
return $name->terse if class($name) eq 'SPECIAL';
my $str = $name->PVX;
return sprintf "(0x%lx) %s",
$$name,
length $str ? qq'"$str"' : defined $str ? "const" : 'undef';
}
sub newlex { # drop-in for showlex
my ($objname, $names, $vals) = @_;
my @names = $names->ARRAY;
my @vals = $vals->ARRAY;
my $count = @names;
print $walkHandle "$objname Pad has $count entries\n";
printf $walkHandle "0: %s\n", padname_terse($names[0]) unless $nosp1;
for (my $i = 1; $i < $count; $i++) {
printf $walkHandle "$i: %s = %s\n", padname_terse($names[$i]),
$vals[$i]->terse,
unless $nosp1
and class($names[$i]) eq 'SPECIAL' || !$names[$i]->LEN;
}
}
sub showlex_obj {
my ($objname, $obj) = @_;
$objname =~ s/^&main::/&/;
showlex($objname, svref_2object($obj)->PADLIST->ARRAY) if !$newlex;
newlex ($objname, svref_2object($obj)->PADLIST->ARRAY) if $newlex;
}
sub showlex_main {
showlex("comppadlist", comppadlist->ARRAY) if !$newlex;
newlex ("main", comppadlist->ARRAY) if $newlex;
}
sub compile {
my @options = grep(/^-/, @_);
my @args = grep(!/^-/, @_);
for my $o (@options) {
$newlex = 1 if $o eq "-newlex";
$nosp1 = 1 if $o eq "-nosp";
}
return \&showlex_main unless @args;
return sub {
my $objref;
foreach my $objname (@args) {
next unless $objname; # skip nulls w/o carping
if (ref $objname) {
print $walkHandle "B::Showlex::compile($objname)\n";
$objref = $objname;
} else {
$objname = "main::$objname" unless $objname =~ /::/;
print $walkHandle "$objname:\n";
no strict 'refs';
die "err: unknown function ($objname)\n"
unless *{$objname}{CODE};
$objref = \&$objname;
}
showlex_obj($objname, $objref);
}
}
}
1;
__END__
=head1 NAME
B::Showlex - Show lexical variables used in functions or files
=head1 SYNOPSIS
perl -MO=Showlex[,-OPTIONS][,SUBROUTINE] foo.pl
=head1 DESCRIPTION
When a comma-separated list of subroutine names is given as options, Showlex
prints the lexical variables used in those subroutines. Otherwise, it prints
the file-scope lexicals in the file.
=head1 EXAMPLES
Traditional form:
$ perl -MO=Showlex -e 'my ($i,$j,$k)=(1,"foo")'
Pad of lexical names for comppadlist has 4 entries
0: (0x8caea4) undef
1: (0x9db0fb0) $i
2: (0x9db0f38) $j
3: (0x9db0f50) $k
Pad of lexical values for comppadlist has 5 entries
0: SPECIAL #1 &PL_sv_undef
1: NULL (0x9da4234)
2: NULL (0x9db0f2c)
3: NULL (0x9db0f44)
4: NULL (0x9da4264)
-e syntax OK
New-style form:
$ perl -MO=Showlex,-newlex -e 'my ($i,$j,$k)=(1,"foo")'
main Pad has 4 entries
0: (0x8caea4) undef
1: (0xa0c4fb8) "$i" = NULL (0xa0b8234)
2: (0xa0c4f40) "$j" = NULL (0xa0c4f34)
3: (0xa0c4f58) "$k" = NULL (0xa0c4f4c)
-e syntax OK
New form, no specials, outside O framework:
$ perl -MB::Showlex -e \
'my ($i,$j,$k)=(1,"foo"); B::Showlex::compile(-newlex,-nosp)->()'
main Pad has 4 entries
1: (0x998ffb0) "$i" = IV (0x9983234) 1
2: (0x998ff68) "$j" = PV (0x998ff5c) "foo"
3: (0x998ff80) "$k" = NULL (0x998ff74)
Note that this example shows the values of the lexicals, whereas the other
examples did not (as they're compile-time only).
=head2 OPTIONS
The C<-newlex> option produces a more readable C<< name => value >> format,
and is shown in the second example above.
The C<-nosp> option eliminates reporting of SPECIALs, such as C<0: SPECIAL
#1 &PL_sv_undef> above. Reporting of SPECIALs can sometimes overwhelm
your declared lexicals.
=head1 SEE ALSO
L<B::Showlex> can also be used outside of the O framework, as in the third
example. See L<B::Concise> for a fuller explanation of reasons.
=head1 TODO
Some of the reported info, such as hex addresses, is not particularly
valuable. Other information would be more useful for the typical
programmer, such as line-numbers, pad-slot reuses, etc.. Given this,
-newlex is not a particularly good flag-name.
=head1 AUTHOR
Malcolm Beattie, C<mbeattie@sable.ox.ac.uk>
=cut

View File

@@ -0,0 +1,104 @@
package B::Terse;
our $VERSION = '1.09';
use strict;
use B qw(class @specialsv_name);
use B::Concise qw(concise_subref set_style_standard);
use Carp;
sub terse {
my ($order, $subref) = @_;
set_style_standard("terse");
if ($order eq "exec") {
concise_subref('exec', $subref);
} else {
concise_subref('basic', $subref);
}
}
sub compile {
my @args = @_;
my $order = @args ? shift(@args) : "";
$order = "-exec" if $order eq "exec";
unshift @args, $order if $order ne "";
B::Concise::compile("-terse", @args);
}
sub indent {
my ($level) = @_ ? shift : 0;
return " " x $level;
}
sub B::SV::terse {
my($sv, $level) = (@_, 0);
my %info;
B::Concise::concise_sv($sv, \%info);
my $s = indent($level)
. B::Concise::fmt_line(\%info, $sv,
"#svclass~(?((#svaddr))?)~#svval", 0);
chomp $s;
print "$s\n" unless defined wantarray;
$s;
}
sub B::NULL::terse {
my ($sv, $level) = (@_, 0);
my $s = indent($level) . sprintf "%s (0x%lx)", class($sv), $$sv;
print "$s\n" unless defined wantarray;
$s;
}
sub B::SPECIAL::terse {
my ($sv, $level) = (@_, 0);
my $s = indent($level)
. sprintf( "%s #%d %s", class($sv), $$sv, $specialsv_name[$$sv]);
print "$s\n" unless defined wantarray;
$s;
}
1;
__END__
=head1 NAME
B::Terse - Walk Perl syntax tree, printing terse info about ops
=head1 SYNOPSIS
perl -MO=Terse[,OPTIONS] foo.pl
=head1 DESCRIPTION
This module prints the contents of the parse tree, but without as much
information as CPAN module B::Debug. For comparison, C<print "Hello, world.">
produced 96 lines of output from B::Debug, but only 6 from B::Terse.
This module is useful for people who are writing their own back end,
or who are learning about the Perl internals. It's not useful to the
average programmer.
This version of B::Terse is really just a wrapper that calls L<B::Concise>
with the B<-terse> option. It is provided for compatibility with old scripts
(and habits) but using B::Concise directly is now recommended instead.
For compatibility with the old B::Terse, this module also adds a
method named C<terse> to B::OP and B::SV objects. The B::SV method is
largely compatible with the old one, though authors of new software
might be advised to choose a more user-friendly output format. The
B::OP C<terse> method, however, doesn't work well. Since B::Terse was
first written, much more information in OPs has migrated to the
scratchpad datastructure, but the C<terse> interface doesn't have any
way of getting to the correct pad. As a kludge, the new version will
always use the pad for the main program, but for OPs in subroutines
this will give the wrong answer or crash.
=head1 AUTHOR
The original version of B::Terse was written by Malcolm Beattie,
E<lt>mbeattie@sable.ox.ac.ukE<gt>. This wrapper was written by Stephen
McCamant, E<lt>smcc@MIT.EDUE<gt>.
=cut

View File

@@ -0,0 +1,496 @@
package B::Xref;
our $VERSION = '1.07';
=head1 NAME
B::Xref - Generates cross reference reports for Perl programs
=head1 SYNOPSIS
perl -MO=Xref[,OPTIONS] foo.pl
=head1 DESCRIPTION
The B::Xref module is used to generate a cross reference listing of all
definitions and uses of variables, subroutines and formats in a Perl program.
It is implemented as a backend for the Perl compiler.
The report generated is in the following format:
File filename1
Subroutine subname1
Package package1
object1 line numbers
object2 line numbers
...
Package package2
...
Each B<File> section reports on a single file. Each B<Subroutine> section
reports on a single subroutine apart from the special cases
"(definitions)" and "(main)". These report, respectively, on subroutine
definitions found by the initial symbol table walk and on the main part of
the program or module external to all subroutines.
The report is then grouped by the B<Package> of each variable,
subroutine or format with the special case "(lexicals)" meaning
lexical variables. Each B<object> name (implicitly qualified by its
containing B<Package>) includes its type character(s) at the beginning
where possible. Lexical variables are easier to track and even
included dereferencing information where possible.
The C<line numbers> are a comma separated list of line numbers (some
preceded by code letters) where that object is used in some way.
Simple uses aren't preceded by a code letter. Introductions (such as
where a lexical is first defined with C<my>) are indicated with the
letter "i". Subroutine and method calls are indicated by the character
"&". Subroutine definitions are indicated by "s" and format
definitions by "f".
For instance, here's part of the report from the I<pod2man> program that
comes with Perl:
Subroutine clear_noremap
Package (lexical)
$ready_to_print i1069, 1079
Package main
$& 1086
$. 1086
$0 1086
$1 1087
$2 1085, 1085
$3 1085, 1085
$ARGV 1086
%HTML_Escapes 1085, 1085
This shows the variables used in the subroutine C<clear_noremap>. The
variable C<$ready_to_print> is a my() (lexical) variable,
B<i>ntroduced (first declared with my()) on line 1069, and used on
line 1079. The variable C<$&> from the main package is used on 1086,
and so on.
A line number may be prefixed by a single letter:
=over 4
=item i
Lexical variable introduced (declared with my()) for the first time.
=item &
Subroutine or method call.
=item s
Subroutine defined.
=item r
Format defined.
=back
The most useful option the cross referencer has is to save the report
to a separate file. For instance, to save the report on
I<myperlprogram> to the file I<report>:
$ perl -MO=Xref,-oreport myperlprogram
=head1 OPTIONS
Option words are separated by commas (not whitespace) and follow the
usual conventions of compiler backend options.
=over 8
=item C<-oFILENAME>
Directs output to C<FILENAME> instead of standard output.
=item C<-r>
Raw output. Instead of producing a human-readable report, outputs a line
in machine-readable form for each definition/use of a variable/sub/format.
=item C<-d>
Don't output the "(definitions)" sections.
=item C<-D[tO]>
(Internal) debug options, probably only useful if C<-r> included.
The C<t> option prints the object on the top of the stack as it's
being tracked. The C<O> option prints each operator as it's being
processed in the execution order of the program.
=back
=head1 BUGS
Non-lexical variables are quite difficult to track through a program.
Sometimes the type of a non-lexical variable's use is impossible to
determine. Introductions of non-lexical non-scalars don't seem to be
reported properly.
=head1 AUTHOR
Malcolm Beattie, mbeattie@sable.ox.ac.uk.
=cut
use strict;
use Config;
use B qw(peekop class comppadlist main_start svref_2object walksymtable
OPpLVAL_INTRO SVf_POK SVf_ROK OPpOUR_INTRO cstring
);
sub UNKNOWN { ["?", "?", "?"] }
my @pad; # lexicals in current pad
# as ["(lexical)", type, name]
my %done; # keyed by $$op: set when each $op is done
my $top = UNKNOWN; # shadows top element of stack as
# [pack, type, name] (pack can be "(lexical)")
my $file; # shadows current filename
my $line; # shadows current line number
my $subname; # shadows current sub name
my %table; # Multi-level hash to record all uses etc.
my @todo = (); # List of CVs that need processing
my %code = (intro => "i", used => "",
subdef => "s", subused => "&",
formdef => "f", meth => "->");
# Options
my ($debug_op, $debug_top, $nodefs, $raw);
sub process {
my ($var, $event) = @_;
my ($pack, $type, $name) = @$var;
if ($type eq "*") {
if ($event eq "used") {
return;
} elsif ($event eq "subused") {
$type = "&";
}
}
$type =~ s/(.)\*$/$1/g;
if ($raw) {
printf "%-16s %-12s %5d %-12s %4s %-16s %s\n",
$file, $subname, $line, $pack, $type, $name, $event;
} else {
# Wheee
push(@{$table{$file}->{$subname}->{$pack}->{$type.$name}->{$event}},
$line);
}
}
sub load_pad {
my $padlist = shift;
my ($namelistav, $vallistav, @namelist, $ix);
@pad = ();
return if class($padlist) =~ '^(?:SPECIAL|NULL)\z';
($namelistav,$vallistav) = $padlist->ARRAY;
@namelist = $namelistav->ARRAY;
for ($ix = 1; $ix < @namelist; $ix++) {
my $namesv = $namelist[$ix];
next if class($namesv) eq "SPECIAL";
my ($type, $name) = $namesv->PV =~ /^(.)([^\0]*)(\0.*)?$/;
$pad[$ix] = ["(lexical)", $type || '?', $name || '?'];
}
if ($Config{useithreads}) {
my (@vallist);
@vallist = $vallistav->ARRAY;
for ($ix = 1; $ix < @vallist; $ix++) {
my $valsv = $vallist[$ix];
next unless class($valsv) eq "GV";
next if class($valsv->STASH) eq 'SPECIAL';
# these pad GVs don't have corresponding names, so same @pad
# array can be used without collisions
$pad[$ix] = [$valsv->STASH->NAME, "*", $valsv->NAME];
}
}
}
sub xref {
my $start = shift;
my $op;
for ($op = $start; $$op; $op = $op->next) {
last if $done{$$op}++;
warn sprintf("top = [%s, %s, %s]\n", @$top) if $debug_top;
warn peekop($op), "\n" if $debug_op;
my $opname = $op->name;
if ($opname =~ /^(or|and|mapwhile|grepwhile|range|cond_expr)$/) {
xref($op->other);
} elsif ($opname eq "match" || $opname eq "subst") {
xref($op->pmreplstart);
} elsif ($opname eq "substcont") {
xref($op->other->pmreplstart);
$op = $op->other;
redo;
} elsif ($opname eq "enterloop") {
xref($op->redoop);
xref($op->nextop);
xref($op->lastop);
} elsif ($opname eq "subst") {
xref($op->pmreplstart);
} else {
no strict 'refs';
my $ppname = "pp_$opname";
&$ppname($op) if defined(&$ppname);
}
}
}
sub xref_cv {
my $cv = shift;
my $pack = $cv->GV->STASH->NAME;
$subname = ($pack eq "main" ? "" : "$pack\::") . $cv->GV->NAME;
load_pad($cv->PADLIST);
xref($cv->START);
$subname = "(main)";
}
sub xref_object {
my $cvref = shift;
xref_cv(svref_2object($cvref));
}
sub xref_main {
$subname = "(main)";
load_pad(comppadlist);
xref(main_start);
while (@todo) {
xref_cv(shift @todo);
}
}
sub pp_nextstate {
my $op = shift;
$file = $op->file;
$line = $op->line;
$top = UNKNOWN;
}
sub pp_padrange {
my $op = shift;
my $count = $op->private & 127;
for my $i (0..$count-1) {
$top = $pad[$op->targ + $i];
process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used");
}
}
sub pp_padsv {
my $op = shift;
$top = $pad[$op->targ];
process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used");
}
sub pp_padav { pp_padsv(@_) }
sub pp_padhv { pp_padsv(@_) }
sub deref {
my ($op, $var, $as) = @_;
$var->[1] = $as . $var->[1];
process($var, $op->private & OPpOUR_INTRO ? "intro" : "used");
}
sub pp_rv2cv { deref(shift, $top, "&"); }
sub pp_rv2hv { deref(shift, $top, "%"); }
sub pp_rv2sv { deref(shift, $top, "\$"); }
sub pp_rv2av { deref(shift, $top, "\@"); }
sub pp_rv2gv { deref(shift, $top, "*"); }
sub pp_gvsv {
my $op = shift;
my $gv;
if ($Config{useithreads}) {
$top = $pad[$op->padix];
$top = UNKNOWN unless $top;
$top->[1] = '$';
}
else {
$gv = $op->gv;
$top = [$gv->STASH->NAME, '$', $gv->SAFENAME];
}
process($top, $op->private & OPpLVAL_INTRO ||
$op->private & OPpOUR_INTRO ? "intro" : "used");
}
sub pp_gv {
my $op = shift;
my $gv;
if ($Config{useithreads}) {
$top = $pad[$op->padix];
$top = UNKNOWN unless $top;
$top->[1] = '*';
}
else {
$gv = $op->gv;
if ($gv->FLAGS & SVf_ROK) { # sub ref
my $cv = $gv->RV;
$top = [$cv->STASH->NAME, '*', B::safename($cv->NAME_HEK)]
}
else {
$top = [$gv->STASH->NAME, '*', $gv->SAFENAME];
}
}
process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used");
}
sub pp_const {
my $op = shift;
my $sv = $op->sv;
# constant could be in the pad (under useithreads)
if ($$sv) {
$top = ["?", "",
(class($sv) ne "SPECIAL" && $sv->FLAGS & SVf_POK)
? cstring($sv->PV) : "?"];
}
else {
$top = $pad[$op->targ];
$top = UNKNOWN unless $top;
}
}
sub pp_method {
my $op = shift;
$top = ["(method)", "->".$top->[1], $top->[2]];
}
sub pp_entersub {
my $op = shift;
if ($top->[1] eq "m") {
process($top, "meth");
} else {
process($top, "subused");
}
$top = UNKNOWN;
}
#
# Stuff for cross referencing definitions of variables and subs
#
sub B::GV::xref {
my $gv = shift;
my $cv = $gv->CV;
if ($$cv) {
#return if $done{$$cv}++;
$file = $gv->FILE;
$line = $gv->LINE;
process([$gv->STASH->NAME, "&", $gv->NAME], "subdef");
push(@todo, $cv);
}
my $form = $gv->FORM;
if ($$form) {
return if $done{$$form}++;
$file = $gv->FILE;
$line = $gv->LINE;
process([$gv->STASH->NAME, "", $gv->NAME], "formdef");
}
}
sub xref_definitions {
my ($pack, %exclude);
return if $nodefs;
$subname = "(definitions)";
foreach $pack (qw(B O AutoLoader DynaLoader XSLoader Config DB VMS
strict vars FileHandle Exporter Carp PerlIO::Layer
attributes utf8 warnings)) {
$exclude{$pack."::"} = 1;
}
no strict qw(vars refs);
walksymtable(\%{"main::"}, "xref", sub { !defined($exclude{$_[0]}) });
}
sub output {
return if $raw;
my ($file, $subname, $pack, $name, $ev, $perfile, $persubname,
$perpack, $pername, $perev);
foreach $file (sort(keys(%table))) {
$perfile = $table{$file};
print "File $file\n";
foreach $subname (sort(keys(%$perfile))) {
$persubname = $perfile->{$subname};
print " Subroutine $subname\n";
foreach $pack (sort(keys(%$persubname))) {
$perpack = $persubname->{$pack};
print " Package $pack\n";
foreach $name (sort(keys(%$perpack))) {
$pername = $perpack->{$name};
my @lines;
foreach $ev (qw(intro formdef subdef meth subused used)) {
$perev = $pername->{$ev};
if (defined($perev) && @$perev) {
my $code = $code{$ev};
push(@lines, map("$code$_", @$perev));
}
}
printf " %-16s %s\n", $name, join(", ", @lines);
}
}
}
}
}
sub compile {
my @options = @_;
my ($option, $opt, $arg);
OPTION:
while ($option = shift @options) {
if ($option =~ /^-(.)(.*)/) {
$opt = $1;
$arg = $2;
} else {
unshift @options, $option;
last OPTION;
}
if ($opt eq "-" && $arg eq "-") {
shift @options;
last OPTION;
} elsif ($opt eq "o") {
$arg ||= shift @options;
open(STDOUT, '>', $arg) or return "$arg: $!\n";
} elsif ($opt eq "d") {
$nodefs = 1;
} elsif ($opt eq "r") {
$raw = 1;
} elsif ($opt eq "D") {
$arg ||= shift @options;
foreach $arg (split(//, $arg)) {
if ($arg eq "o") {
B->debug(1);
} elsif ($arg eq "O") {
$debug_op = 1;
} elsif ($arg eq "t") {
$debug_top = 1;
}
}
}
}
if (@options) {
return sub {
my $objname;
xref_definitions();
foreach $objname (@options) {
$objname = "main::$objname" unless $objname =~ /::/;
eval "xref_object(\\&$objname)";
die "xref_object(\\&$objname) failed: $@" if $@;
}
output();
}
} else {
return sub {
xref_definitions();
xref_main();
output();
}
}
}
1;

View File

@@ -0,0 +1,390 @@
package Compress::Raw::Bzip2;
use strict ;
use warnings ;
require 5.006 ;
require Exporter;
use Carp ;
use bytes ;
our ($VERSION, $XS_VERSION, @ISA, @EXPORT, $AUTOLOAD);
$VERSION = '2.204_001';
$XS_VERSION = $VERSION;
$VERSION = eval $VERSION;
@ISA = qw(Exporter);
# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
@EXPORT = qw(
BZ_RUN
BZ_FLUSH
BZ_FINISH
BZ_OK
BZ_RUN_OK
BZ_FLUSH_OK
BZ_FINISH_OK
BZ_STREAM_END
BZ_SEQUENCE_ERROR
BZ_PARAM_ERROR
BZ_MEM_ERROR
BZ_DATA_ERROR
BZ_DATA_ERROR_MAGIC
BZ_IO_ERROR
BZ_UNEXPECTED_EOF
BZ_OUTBUFF_FULL
BZ_CONFIG_ERROR
);
sub AUTOLOAD {
my($constname);
($constname = $AUTOLOAD) =~ s/.*:://;
my ($error, $val) = constant($constname);
Carp::croak $error if $error;
no strict 'refs';
*{$AUTOLOAD} = sub { $val };
goto &{$AUTOLOAD};
}
use constant FLAG_APPEND => 1 ;
use constant FLAG_CRC => 2 ;
use constant FLAG_ADLER => 4 ;
use constant FLAG_CONSUME_INPUT => 8 ;
eval {
require XSLoader;
XSLoader::load('Compress::Raw::Bzip2', $XS_VERSION);
1;
}
or do {
require DynaLoader;
local @ISA = qw(DynaLoader);
bootstrap Compress::Raw::Bzip2 $XS_VERSION ;
};
#sub Compress::Raw::Bzip2::new
#{
# my $class = shift ;
# my ($ptr, $status) = _new(@_);
# return wantarray ? (undef, $status) : undef
# unless $ptr ;
# my $obj = bless [$ptr], $class ;
# return wantarray ? ($obj, $status) : $obj;
#}
#
#package Compress::Raw::Bunzip2 ;
#
#sub Compress::Raw::Bunzip2::new
#{
# my $class = shift ;
# my ($ptr, $status) = _new(@_);
# return wantarray ? (undef, $status) : undef
# unless $ptr ;
# my $obj = bless [$ptr], $class ;
# return wantarray ? ($obj, $status) : $obj;
#}
sub Compress::Raw::Bzip2::STORABLE_freeze
{
my $type = ref shift;
croak "Cannot freeze $type object\n";
}
sub Compress::Raw::Bzip2::STORABLE_thaw
{
my $type = ref shift;
croak "Cannot thaw $type object\n";
}
sub Compress::Raw::Bunzip2::STORABLE_freeze
{
my $type = ref shift;
croak "Cannot freeze $type object\n";
}
sub Compress::Raw::Bunzip2::STORABLE_thaw
{
my $type = ref shift;
croak "Cannot thaw $type object\n";
}
package Compress::Raw::Bzip2;
1;
__END__
=head1 NAME
Compress::Raw::Bzip2 - Low-Level Interface to bzip2 compression library
=head1 SYNOPSIS
use Compress::Raw::Bzip2 ;
my ($bz, $status) = new Compress::Raw::Bzip2 [OPTS]
or die "Cannot create bzip2 object: $bzerno\n";
$status = $bz->bzdeflate($input, $output);
$status = $bz->bzflush($output);
$status = $bz->bzclose($output);
my ($bz, $status) = new Compress::Raw::Bunzip2 [OPTS]
or die "Cannot create bunzip2 object: $bzerno\n";
$status = $bz->bzinflate($input, $output);
my $version = Compress::Raw::Bzip2::bzlibversion();
=head1 DESCRIPTION
C<Compress::Raw::Bzip2> provides an interface to the in-memory
compression/uncompression functions from the bzip2 compression library.
Although the primary purpose for the existence of C<Compress::Raw::Bzip2>
is for use by the C<IO::Compress::Bzip2> and C<IO::Compress::Bunzip2>
modules, it can be used on its own for simple compression/uncompression
tasks.
=head1 Compression
=head2 ($z, $status) = new Compress::Raw::Bzip2 $appendOutput, $blockSize100k, $workfactor;
Creates a new compression object.
If successful, it will return the initialised compression object, C<$z>
and a C<$status> of C<BZ_OK> in a list context. In scalar context it
returns the deflation object, C<$z>, only.
If not successful, the returned compression object, C<$z>, will be
I<undef> and C<$status> will hold the a I<bzip2> error code.
Below is a list of the valid options:
=over 5
=item B<$appendOutput>
Controls whether the compressed data is appended to the output buffer in
the C<bzdeflate>, C<bzflush> and C<bzclose> methods.
Defaults to 1.
=item B<$blockSize100k>
To quote the bzip2 documentation
blockSize100k specifies the block size to be used for compression. It
should be a value between 1 and 9 inclusive, and the actual block size
used is 100000 x this figure. 9 gives the best compression but takes
most memory.
Defaults to 1.
=item B<$workfactor>
To quote the bzip2 documentation
This parameter controls how the compression phase behaves when
presented with worst case, highly repetitive, input data. If
compression runs into difficulties caused by repetitive data, the
library switches from the standard sorting algorithm to a fallback
algorithm. The fallback is slower than the standard algorithm by
perhaps a factor of three, but always behaves reasonably, no matter how
bad the input.
Lower values of workFactor reduce the amount of effort the standard
algorithm will expend before resorting to the fallback. You should set
this parameter carefully; too low, and many inputs will be handled by
the fallback algorithm and so compress rather slowly, too high, and
your average-to-worst case compression times can become very large. The
default value of 30 gives reasonable behaviour over a wide range of
circumstances.
Allowable values range from 0 to 250 inclusive. 0 is a special case,
equivalent to using the default value of 30.
Defaults to 0.
=back
=head2 $status = $bz->bzdeflate($input, $output);
Reads the contents of C<$input>, compresses it and writes the compressed
data to C<$output>.
Returns C<BZ_RUN_OK> on success and a C<bzip2> error code on failure.
If C<appendOutput> is enabled in the constructor for the bzip2 object, the
compressed data will be appended to C<$output>. If not enabled, C<$output>
will be truncated before the compressed data is written to it.
=head2 $status = $bz->bzflush($output);
Flushes any pending compressed data to C<$output>.
Returns C<BZ_RUN_OK> on success and a C<bzip2> error code on failure.
=head2 $status = $bz->bzclose($output);
Terminates the compressed data stream and flushes any pending compressed
data to C<$output>.
Returns C<BZ_STREAM_END> on success and a C<bzip2> error code on failure.
=head2 Example
=head1 Uncompression
=head2 ($z, $status) = new Compress::Raw::Bunzip2 $appendOutput, $consumeInput, $small, $verbosity, $limitOutput;
If successful, it will return the initialised uncompression object, C<$z>
and a C<$status> of C<BZ_OK> in a list context. In scalar context it
returns the deflation object, C<$z>, only.
If not successful, the returned uncompression object, C<$z>, will be
I<undef> and C<$status> will hold the a I<bzip2> error code.
Below is a list of the valid options:
=over 5
=item B<$appendOutput>
Controls whether the compressed data is appended to the output buffer in the
C<bzinflate>, C<bzflush> and C<bzclose> methods.
Defaults to 1.
=item B<$consumeInput>
=item B<$small>
To quote the bzip2 documentation
If small is nonzero, the library will use an alternative decompression
algorithm which uses less memory but at the cost of decompressing more
slowly (roughly speaking, half the speed, but the maximum memory
requirement drops to around 2300k).
Defaults to 0.
=item B<$limitOutput>
The C<LimitOutput> option changes the behavior of the C<< $i->bzinflate >>
method so that the amount of memory used by the output buffer can be
limited.
When C<LimitOutput> is used the size of the output buffer used will either
be the 16k or the amount of memory already allocated to C<$output>,
whichever is larger. Predicting the output size available is tricky, so
don't rely on getting an exact output buffer size.
When C<LimitOutout> is not specified C<< $i->bzinflate >> will use as much
memory as it takes to write all the uncompressed data it creates by
uncompressing the input buffer.
If C<LimitOutput> is enabled, the C<ConsumeInput> option will also be
enabled.
This option defaults to false.
=item B<$verbosity>
This parameter is ignored.
Defaults to 0.
=back
=head2 $status = $z->bzinflate($input, $output);
Uncompresses C<$input> and writes the uncompressed data to C<$output>.
Returns C<BZ_OK> if the uncompression was successful, but the end of the
compressed data stream has not been reached. Returns C<BZ_STREAM_END> on
successful uncompression and the end of the compression stream has been
reached.
If C<consumeInput> is enabled in the constructor for the bunzip2 object,
C<$input> will have all compressed data removed from it after
uncompression. On C<BZ_OK> return this will mean that C<$input> will be an
empty string; when C<BZ_STREAM_END> C<$input> will either be an empty
string or will contain whatever data immediately followed the compressed
data stream.
If C<appendOutput> is enabled in the constructor for the bunzip2 object,
the uncompressed data will be appended to C<$output>. If not enabled,
C<$output> will be truncated before the uncompressed data is written to it.
=head1 Misc
=head2 my $version = Compress::Raw::Bzip2::bzlibversion();
Returns the version of the underlying bzip2 library.
=head1 Constants
The following bzip2 constants are exported by this module
BZ_RUN
BZ_FLUSH
BZ_FINISH
BZ_OK
BZ_RUN_OK
BZ_FLUSH_OK
BZ_FINISH_OK
BZ_STREAM_END
BZ_SEQUENCE_ERROR
BZ_PARAM_ERROR
BZ_MEM_ERROR
BZ_DATA_ERROR
BZ_DATA_ERROR_MAGIC
BZ_IO_ERROR
BZ_UNEXPECTED_EOF
BZ_OUTBUFF_FULL
BZ_CONFIG_ERROR
=head1 SUPPORT
General feedback/questions/bug reports should be sent to
L<https://github.com/pmqs/Compress-Raw-Bzip2/issues> (preferred) or
L<https://rt.cpan.org/Public/Dist/Display.html?Name=Compress-Raw-Bzip2>.
=head1 SEE ALSO
L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzma>, L<IO::Uncompress::UnLzma>, L<IO::Compress::Xz>, L<IO::Uncompress::UnXz>, L<IO::Compress::Lzip>, L<IO::Uncompress::UnLzip>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Compress::Zstd>, L<IO::Uncompress::UnZstd>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
L<IO::Compress::FAQ|IO::Compress::FAQ>
L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
L<Archive::Tar|Archive::Tar>,
L<IO::Zlib|IO::Zlib>
The primary site for the bzip2 program is L<https://sourceware.org/bzip2/>.
See the module L<Compress::Bzip2|Compress::Bzip2>
=head1 AUTHOR
This module was written by Paul Marquess, C<pmqs@cpan.org>.
=head1 MODIFICATION HISTORY
See the Changes file.
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2005-2023 Paul Marquess. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,111 @@
# This file was created by configpm when Perl was built. Any changes
# made to this file will be lost the next time perl is built.
# for a description of the variables, please have a look at the
# Glossary file, as written in the Porting folder, or use the url:
# https://github.com/Perl/perl5/blob/blead/Porting/Glossary
package Config;
use strict;
use warnings;
our ( %Config, $VERSION );
$VERSION = "5.038002";
# Skip @Config::EXPORT because it only contains %Config, which we special
# case below as it's not a function. @Config::EXPORT won't change in the
# lifetime of Perl 5.
my %Export_Cache = (myconfig => 1, config_sh => 1, config_vars => 1,
config_re => 1, compile_date => 1, local_patches => 1,
bincompat_options => 1, non_bincompat_options => 1,
header_files => 1);
@Config::EXPORT = qw(%Config);
@Config::EXPORT_OK = keys %Export_Cache;
# Need to stub all the functions to make code such as print Config::config_sh
# keep working
sub bincompat_options;
sub compile_date;
sub config_re;
sub config_sh;
sub config_vars;
sub header_files;
sub local_patches;
sub myconfig;
sub non_bincompat_options;
# Define our own import method to avoid pulling in the full Exporter:
sub import {
shift;
@_ = @Config::EXPORT unless @_;
my @funcs = grep $_ ne '%Config', @_;
my $export_Config = @funcs < @_ ? 1 : 0;
no strict 'refs';
my $callpkg = caller(0);
foreach my $func (@funcs) {
die qq{"$func" is not exported by the Config module\n}
unless $Export_Cache{$func};
*{$callpkg.'::'.$func} = \&{$func};
}
*{"$callpkg\::Config"} = \%Config if $export_Config;
return;
}
die "$0: Perl lib version (5.38.2) doesn't match executable '$^X' version ($])"
unless $^V;
$^V eq 5.38.2
or die sprintf "%s: Perl lib version (5.38.2) doesn't match executable '$^X' version (%vd)", $0, $^V;
sub FETCH {
my($self, $key) = @_;
# check for cached value (which may be undef so we use exists not defined)
return exists $self->{$key} ? $self->{$key} : $self->fetch_string($key);
}
sub TIEHASH {
bless $_[1], $_[0];
}
sub DESTROY { }
sub AUTOLOAD {
require 'Config_heavy.pl';
goto \&launcher unless $Config::AUTOLOAD =~ /launcher$/;
die "&Config::AUTOLOAD failed on $Config::AUTOLOAD";
}
# tie returns the object, so the value returned to require will be true.
tie %Config, 'Config', {
archlibexp => '/usr/lib/perl5/core_perl',
archname => 'x86_64-msys-thread-multi',
cc => 'gcc',
d_readlink => undef,
d_symlink => 'define',
dlext => 'dll',
dlsrc => 'dl_dlopen.xs',
dont_use_nlink => undef,
exe_ext => '.exe',
inc_version_list => ' ',
intsize => '4',
ldlibpthname => 'PATH',
libpth => '/usr/lib',
osname => 'msys',
osvers => '3.5.7-882031da.x86_64',
path_sep => ':',
privlibexp => '/usr/share/perl5/core_perl',
scriptdir => '/usr/bin/core_perl',
sitearchexp => '/usr/lib/perl5/site_perl',
sitelibexp => '/usr/share/perl5/site_perl',
so => 'dll',
useithreads => 'define',
usevendorprefix => 'define',
version => '5.38.2',
};

View File

@@ -0,0 +1,12 @@
######################################################################
# WARNING: 'lib/Config_git.pl' is generated by make_patchnum.pl
# DO NOT EDIT DIRECTLY - edit make_patchnum.pl instead
######################################################################
$Config::Git_Data=<<'ENDOFGIT';
git_commit_id=''
git_describe=''
git_branch=''
git_uncommitted_changes=''
git_commit_id_title=''
ENDOFGIT

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,838 @@
package Cwd;
use strict;
use Exporter;
our $VERSION = '3.89';
my $xs_version = $VERSION;
$VERSION =~ tr/_//d;
our @ISA = qw/ Exporter /;
our @EXPORT = qw(cwd getcwd fastcwd fastgetcwd);
push @EXPORT, qw(getdcwd) if $^O eq 'MSWin32';
our @EXPORT_OK = qw(chdir abs_path fast_abs_path realpath fast_realpath);
# sys_cwd may keep the builtin command
# All the functionality of this module may provided by builtins,
# there is no sense to process the rest of the file.
# The best choice may be to have this in BEGIN, but how to return from BEGIN?
if ($^O eq 'os2') {
local $^W = 0;
*cwd = defined &sys_cwd ? \&sys_cwd : \&_os2_cwd;
*getcwd = \&cwd;
*fastgetcwd = \&cwd;
*fastcwd = \&cwd;
*fast_abs_path = \&sys_abspath if defined &sys_abspath;
*abs_path = \&fast_abs_path;
*realpath = \&fast_abs_path;
*fast_realpath = \&fast_abs_path;
return 1;
}
# Need to look up the feature settings on VMS. The preferred way is to use the
# VMS::Feature module, but that may not be available to dual life modules.
my $use_vms_feature;
BEGIN {
if ($^O eq 'VMS') {
if (eval { local $SIG{__DIE__};
local @INC = @INC;
pop @INC if $INC[-1] eq '.';
require VMS::Feature; }) {
$use_vms_feature = 1;
}
}
}
# Need to look up the UNIX report mode. This may become a dynamic mode
# in the future.
sub _vms_unix_rpt {
my $unix_rpt;
if ($use_vms_feature) {
$unix_rpt = VMS::Feature::current("filename_unix_report");
} else {
my $env_unix_rpt = $ENV{'DECC$FILENAME_UNIX_REPORT'} || '';
$unix_rpt = $env_unix_rpt =~ /^[ET1]/i;
}
return $unix_rpt;
}
# Need to look up the EFS character set mode. This may become a dynamic
# mode in the future.
sub _vms_efs {
my $efs;
if ($use_vms_feature) {
$efs = VMS::Feature::current("efs_charset");
} else {
my $env_efs = $ENV{'DECC$EFS_CHARSET'} || '';
$efs = $env_efs =~ /^[ET1]/i;
}
return $efs;
}
# If loading the XS stuff doesn't work, we can fall back to pure perl
if(! defined &getcwd && defined &DynaLoader::boot_DynaLoader) { # skipped on miniperl
require XSLoader;
XSLoader::load( __PACKAGE__, $xs_version);
}
# Big nasty table of function aliases
my %METHOD_MAP =
(
VMS =>
{
cwd => '_vms_cwd',
getcwd => '_vms_cwd',
fastcwd => '_vms_cwd',
fastgetcwd => '_vms_cwd',
abs_path => '_vms_abs_path',
fast_abs_path => '_vms_abs_path',
},
MSWin32 =>
{
# We assume that &_NT_cwd is defined as an XSUB or in the core.
cwd => '_NT_cwd',
getcwd => '_NT_cwd',
fastcwd => '_NT_cwd',
fastgetcwd => '_NT_cwd',
abs_path => 'fast_abs_path',
realpath => 'fast_abs_path',
},
dos =>
{
cwd => '_dos_cwd',
getcwd => '_dos_cwd',
fastgetcwd => '_dos_cwd',
fastcwd => '_dos_cwd',
abs_path => 'fast_abs_path',
},
# QNX4. QNX6 has a $os of 'nto'.
qnx =>
{
cwd => '_qnx_cwd',
getcwd => '_qnx_cwd',
fastgetcwd => '_qnx_cwd',
fastcwd => '_qnx_cwd',
abs_path => '_qnx_abs_path',
fast_abs_path => '_qnx_abs_path',
},
cygwin =>
{
getcwd => 'cwd',
fastgetcwd => 'cwd',
fastcwd => 'cwd',
abs_path => 'fast_abs_path',
realpath => 'fast_abs_path',
},
msys =>
{
getcwd => 'cwd',
fastgetcwd => 'cwd',
fastcwd => 'cwd',
abs_path => 'fast_abs_path',
realpath => 'fast_abs_path',
},
amigaos =>
{
getcwd => '_backtick_pwd',
fastgetcwd => '_backtick_pwd',
fastcwd => '_backtick_pwd',
abs_path => 'fast_abs_path',
}
);
$METHOD_MAP{NT} = $METHOD_MAP{MSWin32};
# Find the pwd command in the expected locations. We assume these
# are safe. This prevents _backtick_pwd() consulting $ENV{PATH}
# so everything works under taint mode.
my $pwd_cmd;
if($^O ne 'MSWin32') {
foreach my $try ('/bin/pwd',
'/usr/bin/pwd',
'/QOpenSys/bin/pwd', # OS/400 PASE.
) {
if( -x $try ) {
$pwd_cmd = $try;
last;
}
}
}
# Android has a built-in pwd. Using $pwd_cmd will DTRT if
# this perl was compiled with -Dd_useshellcmds, which is the
# default for Android, but the block below is needed for the
# miniperl running on the host when cross-compiling, and
# potentially for native builds with -Ud_useshellcmds.
if ($^O =~ /android/) {
# If targetsh is executable, then we're either a full
# perl, or a miniperl for a native build.
if ( exists($Config::Config{targetsh}) && -x $Config::Config{targetsh}) {
$pwd_cmd = "$Config::Config{targetsh} -c pwd"
}
else {
my $sh = $Config::Config{sh} || (-x '/system/bin/sh' ? '/system/bin/sh' : 'sh');
$pwd_cmd = "$sh -c pwd"
}
}
my $found_pwd_cmd = defined($pwd_cmd);
# Lazy-load Carp
sub _carp { require Carp; Carp::carp(@_) }
sub _croak { require Carp; Carp::croak(@_) }
# The 'natural and safe form' for UNIX (pwd may be setuid root)
sub _backtick_pwd {
# Localize %ENV entries in a way that won't create new hash keys.
# Under AmigaOS we don't want to localize as it stops perl from
# finding 'sh' in the PATH.
my @localize = grep exists $ENV{$_}, qw(IFS CDPATH ENV BASH_ENV) if $^O ne "amigaos";
local @ENV{@localize} if @localize;
# empty PATH is the same as "." on *nix, so localize it to /something/
# we won't *use* the path as code above turns $pwd_cmd into a specific
# executable, but it will blow up anyway under taint. We could set it to
# anything absolute. Perhaps "/" would be better.
local $ENV{PATH}= "/usr/bin"
if $^O ne "amigaos";
my $cwd = `$pwd_cmd`;
# Belt-and-suspenders in case someone said "undef $/".
local $/ = "\n";
# `pwd` may fail e.g. if the disk is full
chomp($cwd) if defined $cwd;
$cwd;
}
# Since some ports may predefine cwd internally (e.g., NT)
# we take care not to override an existing definition for cwd().
unless ($METHOD_MAP{$^O}{cwd} or defined &cwd) {
if( $found_pwd_cmd )
{
*cwd = \&_backtick_pwd;
}
else {
# getcwd() might have an empty prototype
*cwd = sub { getcwd(); };
}
}
if ($^O eq 'cygwin' || $^O eq 'msys') {
# We need to make sure cwd() is called with no args, because it's
# got an arg-less prototype and will die if args are present.
local $^W = 0;
my $orig_cwd = \&cwd;
*cwd = sub { &$orig_cwd() }
}
# set a reasonable (and very safe) default for fastgetcwd, in case it
# isn't redefined later (20001212 rspier)
*fastgetcwd = \&cwd;
# A non-XS version of getcwd() - also used to bootstrap the perl build
# process, when miniperl is running and no XS loading happens.
sub _perl_getcwd
{
abs_path('.');
}
# By John Bazik
#
# Usage: $cwd = &fastcwd;
#
# This is a faster version of getcwd. It's also more dangerous because
# you might chdir out of a directory that you can't chdir back into.
sub fastcwd_ {
my($odev, $oino, $cdev, $cino, $tdev, $tino);
my(@path, $path);
local(*DIR);
my($orig_cdev, $orig_cino) = stat('.');
($cdev, $cino) = ($orig_cdev, $orig_cino);
for (;;) {
my $direntry;
($odev, $oino) = ($cdev, $cino);
CORE::chdir('..') || return undef;
($cdev, $cino) = stat('.');
last if $odev == $cdev && $oino eq $cino;
opendir(DIR, '.') || return undef;
for (;;) {
$direntry = readdir(DIR);
last unless defined $direntry;
next if $direntry eq '.';
next if $direntry eq '..';
($tdev, $tino) = lstat($direntry);
last unless $tdev != $odev || $tino ne $oino;
}
closedir(DIR);
return undef unless defined $direntry; # should never happen
unshift(@path, $direntry);
}
$path = '/' . join('/', @path);
if ($^O eq 'apollo') { $path = "/".$path; }
# At this point $path may be tainted (if tainting) and chdir would fail.
# Untaint it then check that we landed where we started.
$path =~ /^(.*)\z/s # untaint
&& CORE::chdir($1) or return undef;
($cdev, $cino) = stat('.');
die "Unstable directory path, current directory changed unexpectedly"
if $cdev != $orig_cdev || $cino ne $orig_cino;
$path;
}
if (not defined &fastcwd) { *fastcwd = \&fastcwd_ }
# Keeps track of current working directory in PWD environment var
# Usage:
# use Cwd 'chdir';
# chdir $newdir;
my $chdir_init = 0;
sub chdir_init {
if ($ENV{'PWD'} and $^O ne 'os2' and $^O ne 'dos' and $^O ne 'MSWin32') {
my($dd,$di) = stat('.');
my($pd,$pi) = stat($ENV{'PWD'});
if (!defined $dd or !defined $pd or $di ne $pi or $dd != $pd) {
$ENV{'PWD'} = cwd();
}
}
else {
my $wd = cwd();
$wd = Win32::GetFullPathName($wd) if $^O eq 'MSWin32';
$ENV{'PWD'} = $wd;
}
# Strip an automounter prefix (where /tmp_mnt/foo/bar == /foo/bar)
if ($^O ne 'MSWin32' and $ENV{'PWD'} =~ m|(/[^/]+(/[^/]+/[^/]+))(.*)|s) {
my($pd,$pi) = stat($2);
my($dd,$di) = stat($1);
if (defined $pd and defined $dd and $di ne $pi and $dd == $pd) {
$ENV{'PWD'}="$2$3";
}
}
$chdir_init = 1;
}
sub chdir {
my $newdir = @_ ? shift : ''; # allow for no arg (chdir to HOME dir)
if ($^O eq "cygwin" || $^O eq "msys") {
$newdir =~ s|\A///+|//|;
$newdir =~ s|(?<=[^/])//+|/|g;
}
elsif ($^O ne 'MSWin32') {
$newdir =~ s|///*|/|g;
}
chdir_init() unless $chdir_init;
my $newpwd;
if ($^O eq 'MSWin32') {
# get the full path name *before* the chdir()
$newpwd = Win32::GetFullPathName($newdir);
}
return 0 unless CORE::chdir $newdir;
if ($^O eq 'VMS') {
return $ENV{'PWD'} = $ENV{'DEFAULT'}
}
elsif ($^O eq 'MSWin32') {
$ENV{'PWD'} = $newpwd;
return 1;
}
if (ref $newdir eq 'GLOB') { # in case a file/dir handle is passed in
$ENV{'PWD'} = cwd();
} elsif ($newdir =~ m#^/#s) {
$ENV{'PWD'} = $newdir;
} else {
my @curdir = split(m#/#,$ENV{'PWD'});
@curdir = ('') unless @curdir;
my $component;
foreach $component (split(m#/#, $newdir)) {
next if $component eq '.';
pop(@curdir),next if $component eq '..';
push(@curdir,$component);
}
$ENV{'PWD'} = join('/',@curdir) || '/';
}
1;
}
sub _perl_abs_path
{
my $start = @_ ? shift : '.';
my($dotdots, $cwd, @pst, @cst, $dir, @tst);
unless (@cst = stat( $start ))
{
return undef;
}
unless (-d _) {
# Make sure we can be invoked on plain files, not just directories.
# NOTE that this routine assumes that '/' is the only directory separator.
my ($dir, $file) = $start =~ m{^(.*)/(.+)$}
or return cwd() . '/' . $start;
# Can't use "-l _" here, because the previous stat was a stat(), not an lstat().
if (-l $start) {
my $link_target = readlink($start);
die "Can't resolve link $start: $!" unless defined $link_target;
require File::Spec;
$link_target = $dir . '/' . $link_target
unless File::Spec->file_name_is_absolute($link_target);
return abs_path($link_target);
}
return $dir ? abs_path($dir) . "/$file" : "/$file";
}
$cwd = '';
$dotdots = $start;
do
{
$dotdots .= '/..';
@pst = @cst;
local *PARENT;
unless (opendir(PARENT, $dotdots))
{
return undef;
}
unless (@cst = stat($dotdots))
{
my $e = $!;
closedir(PARENT);
$! = $e;
return undef;
}
if ($pst[0] == $cst[0] && $pst[1] eq $cst[1])
{
$dir = undef;
}
else
{
do
{
unless (defined ($dir = readdir(PARENT)))
{
closedir(PARENT);
require Errno;
$! = Errno::ENOENT();
return undef;
}
$tst[0] = $pst[0]+1 unless (@tst = lstat("$dotdots/$dir"))
}
while ($dir eq '.' || $dir eq '..' || $tst[0] != $pst[0] ||
$tst[1] ne $pst[1]);
}
$cwd = (defined $dir ? "$dir" : "" ) . "/$cwd" ;
closedir(PARENT);
} while (defined $dir);
chop($cwd) unless $cwd eq '/'; # drop the trailing /
$cwd;
}
my $Curdir;
sub fast_abs_path {
local $ENV{PWD} = $ENV{PWD} || ''; # Guard against clobberage
my $cwd = getcwd();
defined $cwd or return undef;
require File::Spec;
my $path = @_ ? shift : ($Curdir ||= File::Spec->curdir);
# Detaint else we'll explode in taint mode. This is safe because
# we're not doing anything dangerous with it.
($path) = $path =~ /(.*)/s;
($cwd) = $cwd =~ /(.*)/s;
unless (-e $path) {
require Errno;
$! = Errno::ENOENT();
return undef;
}
unless (-d _) {
# Make sure we can be invoked on plain files, not just directories.
my ($vol, $dir, $file) = File::Spec->splitpath($path);
return File::Spec->catfile($cwd, $path) unless length $dir;
if (-l $path) {
my $link_target = readlink($path);
defined $link_target or return undef;
$link_target = File::Spec->catpath($vol, $dir, $link_target)
unless File::Spec->file_name_is_absolute($link_target);
return fast_abs_path($link_target);
}
return $dir eq File::Spec->rootdir
? File::Spec->catpath($vol, $dir, $file)
: fast_abs_path(File::Spec->catpath($vol, $dir, '')) . '/' . $file;
}
if (!CORE::chdir($path)) {
return undef;
}
my $realpath = getcwd();
if (! ((-d $cwd) && (CORE::chdir($cwd)))) {
_croak("Cannot chdir back to $cwd: $!");
}
$realpath;
}
# added function alias to follow principle of least surprise
# based on previous aliasing. --tchrist 27-Jan-00
*fast_realpath = \&fast_abs_path;
# --- PORTING SECTION ---
# VMS: $ENV{'DEFAULT'} points to default directory at all times
# 06-Mar-1996 Charles Bailey bailey@newman.upenn.edu
# Note: Use of Cwd::chdir() causes the logical name PWD to be defined
# in the process logical name table as the default device and directory
# seen by Perl. This may not be the same as the default device
# and directory seen by DCL after Perl exits, since the effects
# the CRTL chdir() function persist only until Perl exits.
sub _vms_cwd {
return $ENV{'DEFAULT'};
}
sub _vms_abs_path {
return $ENV{'DEFAULT'} unless @_;
my $path = shift;
my $efs = _vms_efs;
my $unix_rpt = _vms_unix_rpt;
if (defined &VMS::Filespec::vmsrealpath) {
my $path_unix = 0;
my $path_vms = 0;
$path_unix = 1 if ($path =~ m#(?<=\^)/#);
$path_unix = 1 if ($path =~ /^\.\.?$/);
$path_vms = 1 if ($path =~ m#[\[<\]]#);
$path_vms = 1 if ($path =~ /^--?$/);
my $unix_mode = $path_unix;
if ($efs) {
# In case of a tie, the Unix report mode decides.
if ($path_vms == $path_unix) {
$unix_mode = $unix_rpt;
} else {
$unix_mode = 0 if $path_vms;
}
}
if ($unix_mode) {
# Unix format
return VMS::Filespec::unixrealpath($path);
}
# VMS format
my $new_path = VMS::Filespec::vmsrealpath($path);
# Perl expects directories to be in directory format
$new_path = VMS::Filespec::pathify($new_path) if -d $path;
return $new_path;
}
# Fallback to older algorithm if correct ones are not
# available.
if (-l $path) {
my $link_target = readlink($path);
die "Can't resolve link $path: $!" unless defined $link_target;
return _vms_abs_path($link_target);
}
# may need to turn foo.dir into [.foo]
my $pathified = VMS::Filespec::pathify($path);
$path = $pathified if defined $pathified;
return VMS::Filespec::rmsexpand($path);
}
sub _os2_cwd {
my $pwd = `cmd /c cd`;
chomp $pwd;
$pwd =~ s:\\:/:g ;
$ENV{'PWD'} = $pwd;
return $pwd;
}
sub _win32_cwd_simple {
my $pwd = `cd`;
chomp $pwd;
$pwd =~ s:\\:/:g ;
$ENV{'PWD'} = $pwd;
return $pwd;
}
sub _win32_cwd {
my $pwd;
$pwd = Win32::GetCwd();
$pwd =~ s:\\:/:g ;
$ENV{'PWD'} = $pwd;
return $pwd;
}
*_NT_cwd = defined &Win32::GetCwd ? \&_win32_cwd : \&_win32_cwd_simple;
sub _dos_cwd {
my $pwd;
if (!defined &Dos::GetCwd) {
chomp($pwd = `command /c cd`);
$pwd =~ s:\\:/:g ;
} else {
$pwd = Dos::GetCwd();
}
$ENV{'PWD'} = $pwd;
return $pwd;
}
sub _qnx_cwd {
local $ENV{PATH} = '';
local $ENV{CDPATH} = '';
local $ENV{ENV} = '';
my $pwd = `/usr/bin/fullpath -t`;
chomp $pwd;
$ENV{'PWD'} = $pwd;
return $pwd;
}
sub _qnx_abs_path {
local $ENV{PATH} = '';
local $ENV{CDPATH} = '';
local $ENV{ENV} = '';
my $path = @_ ? shift : '.';
local *REALPATH;
defined( open(REALPATH, '-|') || exec '/usr/bin/fullpath', '-t', $path ) or
die "Can't open /usr/bin/fullpath: $!";
my $realpath = <REALPATH>;
close REALPATH;
chomp $realpath;
return $realpath;
}
# Now that all the base-level functions are set up, alias the
# user-level functions to the right places
if (exists $METHOD_MAP{$^O}) {
my $map = $METHOD_MAP{$^O};
foreach my $name (keys %$map) {
local $^W = 0; # assignments trigger 'subroutine redefined' warning
no strict 'refs';
*{$name} = \&{$map->{$name}};
}
}
# built-in from 5.30
*getcwd = \&Internals::getcwd
if !defined &getcwd && defined &Internals::getcwd;
# In case the XS version doesn't load.
*abs_path = \&_perl_abs_path unless defined &abs_path;
*getcwd = \&_perl_getcwd unless defined &getcwd;
# added function alias for those of us more
# used to the libc function. --tchrist 27-Jan-00
*realpath = \&abs_path;
1;
__END__
=head1 NAME
Cwd - get pathname of current working directory
=head1 SYNOPSIS
use Cwd;
my $dir = getcwd;
use Cwd 'abs_path';
my $abs_path = abs_path($file);
=head1 DESCRIPTION
This module provides functions for determining the pathname of the
current working directory. It is recommended that getcwd (or another
*cwd() function) be used in I<all> code to ensure portability.
By default, it exports the functions cwd(), getcwd(), fastcwd(), and
fastgetcwd() (and, on Win32, getdcwd()) into the caller's namespace.
=head2 getcwd and friends
Each of these functions are called without arguments and return the
absolute path of the current working directory.
=over 4
=item getcwd
my $cwd = getcwd();
Returns the current working directory. On error returns C<undef>,
with C<$!> set to indicate the error.
Exposes the POSIX function getcwd(3) or re-implements it if it's not
available.
=item cwd
my $cwd = cwd();
The cwd() is the most natural form for the current architecture. For
most systems it is identical to `pwd` (but without the trailing line
terminator).
=item fastcwd
my $cwd = fastcwd();
A more dangerous version of getcwd(), but potentially faster.
It might conceivably chdir() you out of a directory that it can't
chdir() you back into. If fastcwd encounters a problem it will return
undef but will probably leave you in a different directory. For a
measure of extra security, if everything appears to have worked, the
fastcwd() function will check that it leaves you in the same directory
that it started in. If it has changed it will C<die> with the message
"Unstable directory path, current directory changed
unexpectedly". That should never happen.
=item fastgetcwd
my $cwd = fastgetcwd();
The fastgetcwd() function is provided as a synonym for cwd().
=item getdcwd
my $cwd = getdcwd();
my $cwd = getdcwd('C:');
The getdcwd() function is also provided on Win32 to get the current working
directory on the specified drive, since Windows maintains a separate current
working directory for each drive. If no drive is specified then the current
drive is assumed.
This function simply calls the Microsoft C library _getdcwd() function.
=back
=head2 abs_path and friends
These functions are exported only on request. They each take a single
argument and return the absolute pathname for it. If no argument is
given they'll use the current working directory.
=over 4
=item abs_path
my $abs_path = abs_path($file);
Uses the same algorithm as getcwd(). Symbolic links and relative-path
components ("." and "..") are resolved to return the canonical
pathname, just like realpath(3). On error returns C<undef>, with C<$!>
set to indicate the error.
=item realpath
my $abs_path = realpath($file);
A synonym for abs_path().
=item fast_abs_path
my $abs_path = fast_abs_path($file);
A more dangerous, but potentially faster version of abs_path.
=back
=head2 $ENV{PWD}
If you ask to override your chdir() built-in function,
use Cwd qw(chdir);
then your PWD environment variable will be kept up to date. Note that
it will only be kept up to date if all packages which use chdir import
it from Cwd.
=head1 NOTES
=over 4
=item *
Since the path separators are different on some operating systems ('/'
on Unix, ':' on MacPerl, etc...) we recommend you use the File::Spec
modules wherever portability is a concern.
=item *
Actually, on Mac OS, the C<getcwd()>, C<fastgetcwd()> and C<fastcwd()>
functions are all aliases for the C<cwd()> function, which, on Mac OS,
calls `pwd`. Likewise, the C<abs_path()> function is an alias for
C<fast_abs_path()>.
=back
=head1 AUTHOR
Maintained by perl5-porters <F<perl5-porters@perl.org>>.
=head1 COPYRIGHT
Copyright (c) 2004 by the Perl 5 Porters. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
Portions of the C code in this library are copyright (c) 1994 by the
Regents of the University of California. All rights reserved. The
license on this code is compatible with the licensing of the rest of
the distribution - please see the source code in F<Cwd.xs> for the
details.
=head1 SEE ALSO
L<File::chdir>
=cut

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,585 @@
# Devel::Peek - A data debugging tool for the XS programmer
# The documentation is after the __END__
package Devel::Peek;
$VERSION = '1.33';
$XS_VERSION = $VERSION;
$VERSION = eval $VERSION;
require Exporter;
require XSLoader;
@EXPORT = qw(Dump mstat DeadCode DumpArray DumpWithOP DumpProg
fill_mstats mstats_fillhash mstats2hash runops_debug debug_flags);
@EXPORT_OK = qw(SvREFCNT CvGV);
%EXPORT_TAGS = ('ALL' => [@EXPORT, @EXPORT_OK]);
XSLoader::load();
sub import {
my $c = shift;
my $ops_rx = qr/^:opd(=[stP]*)?\b/;
my @db = grep m/$ops_rx/, @_;
@_ = grep !m/$ops_rx/, @_;
if (@db) {
die "Too many :opd options" if @db > 1;
runops_debug(1);
my $flags = ($db[0] =~ m/$ops_rx/ and $1);
$flags = 'st' unless defined $flags;
my $f = 0;
$f |= 2 if $flags =~ /s/;
$f |= 8 if $flags =~ /t/;
$f |= 64 if $flags =~ /P/;
$^D |= $f if $f;
}
unshift @_, $c;
goto &Exporter::import;
}
sub DumpWithOP ($;$) {
local($Devel::Peek::dump_ops)=1;
my $depth = @_ > 1 ? $_[1] : 4 ;
Dump($_[0],$depth);
}
$D_flags = 'psltocPmfrxuLHXDSTR';
sub debug_flags (;$) {
my $out = "";
for my $i (0 .. length($D_flags)-1) {
$out .= substr $D_flags, $i, 1 if $^D & (1<<$i);
}
my $arg = shift;
my $num = $arg;
if (defined $arg and $arg =~ /\D/) {
die "unknown flags in debug_flags()" if $arg =~ /[^-$D_flags]/;
my ($on,$off) = split /-/, "$arg-";
$num = $^D;
$num |= (1<<index($D_flags, $_)) for split //, $on;
$num &= ~(1<<index($D_flags, $_)) for split //, $off;
}
$^D = $num if defined $arg;
$out
}
sub B::Deparse::pp_Devel_Peek_Dump {
my ($deparse,$op,$cx) = @_;
my @kids = $deparse->deparse($op->first, 6);
my $sib = $op->first->sibling;
if (ref $sib ne 'B::NULL') {
push @kids, $deparse->deparse($sib, 6);
}
return "Devel::Peek::Dump(" . join(", ", @kids) . ")";
}
1;
__END__
=head1 NAME
Devel::Peek - A data debugging tool for the XS programmer
=head1 SYNOPSIS
use Devel::Peek;
Dump( $a );
Dump( $a, 5 );
Dump( @a );
Dump( %h );
DumpArray( 5, $a, $b, ... );
mstat "Point 5";
use Devel::Peek ':opd=st';
=head1 DESCRIPTION
Devel::Peek contains functions which allows raw Perl datatypes to be
manipulated from a Perl script. This is used by those who do XS programming
to check that the data they are sending from C to Perl looks as they think
it should look. The trick, then, is to know what the raw datatype is
supposed to look like when it gets to Perl. This document offers some tips
and hints to describe good and bad raw data.
It is very possible that this document will fall far short of being useful
to the casual reader. The reader is expected to understand the material in
the first few sections of L<perlguts>.
Devel::Peek supplies a C<Dump()> function which can dump a raw Perl
datatype, and C<mstat("marker")> function to report on memory usage
(if perl is compiled with corresponding option). The function
DeadCode() provides statistics on the data "frozen" into inactive
C<CV>. Devel::Peek also supplies C<SvREFCNT()> which can query reference
counts on SVs. This document will take a passive, and safe, approach
to data debugging and for that it will describe only the C<Dump()>
function.
All output is to STDERR.
The C<Dump()> function takes one or two arguments: something to dump, and
an optional limit for recursion and array elements (default is 4). The
first argument is evaluated in rvalue scalar context, with exceptions for
@array and %hash, which dump the array or hash itself. So C<Dump @array>
works, as does C<Dump $foo>. And C<Dump pos> will call C<pos> in rvalue
context, whereas C<Dump ${\pos}> will call it in lvalue context.
Function C<DumpArray()> allows dumping of multiple values (useful when you
need to analyze returns of functions).
The global variable $Devel::Peek::pv_limit can be set to limit the
number of character printed in various string values. Setting it to 0
means no limit.
If C<use Devel::Peek> directive has a C<:opd=FLAGS> argument,
this switches on debugging of opcode dispatch. C<FLAGS> should be a
combination of C<s>, C<t>, and C<P> (see
L<< B<-D> flags in perlrun|perlrun/B<-D>I<letters> >>).
C<:opd> is a shortcut for C<:opd=st>.
=head2 Runtime debugging
C<CvGV($cv)> return one of the globs associated to a subroutine reference $cv.
debug_flags() returns a string representation of C<$^D> (similar to
what is allowed for B<-D> flag). When called with a numeric argument,
sets $^D to the corresponding value. When called with an argument of
the form C<"flags-flags">, set on/off bits of C<$^D> corresponding to
letters before/after C<->. (The returned value is for C<$^D> before
the modification.)
runops_debug() returns true if the current I<opcode dispatcher> is the
debugging one. When called with an argument, switches to debugging or
non-debugging dispatcher depending on the argument (active for
newly-entered subs/etc only). (The returned value is for the dispatcher before the modification.)
=head2 Memory footprint debugging
When perl is compiled with support for memory footprint debugging
(default with Perl's malloc()), Devel::Peek provides an access to this API.
Use mstat() function to emit a memory state statistic to the terminal.
For more information on the format of output of mstat() see
L<perldebguts/Using $ENV{PERL_DEBUG_MSTATS}>.
Three additional functions allow access to this statistic from Perl.
First, use C<mstats_fillhash(%hash)> to get the information contained
in the output of mstat() into %hash. The field of this hash are
minbucket nbuckets sbrk_good sbrk_slack sbrked_remains sbrks
start_slack topbucket topbucket_ev topbucket_odd total total_chain
total_sbrk totfree
Two additional fields C<free>, C<used> contain array references which
provide per-bucket count of free and used chunks. Two other fields
C<mem_size>, C<available_size> contain array references which provide
the information about the allocated size and usable size of chunks in
each bucket. Again, see L<perldebguts/Using $ENV{PERL_DEBUG_MSTATS}>
for details.
Keep in mind that only the first several "odd-numbered" buckets are
used, so the information on size of the "odd-numbered" buckets which are
not used is probably meaningless.
The information in
mem_size available_size minbucket nbuckets
is the property of a particular build of perl, and does not depend on
the current process. If you do not provide the optional argument to
the functions mstats_fillhash(), fill_mstats(), mstats2hash(), then
the information in fields C<mem_size>, C<available_size> is not
updated.
C<fill_mstats($buf)> is a much cheaper call (both speedwise and
memory-wise) which collects the statistic into $buf in
machine-readable form. At a later moment you may need to call
C<mstats2hash($buf, %hash)> to use this information to fill %hash.
All three APIs C<fill_mstats($buf)>, C<mstats_fillhash(%hash)>, and
C<mstats2hash($buf, %hash)> are designed to allocate no memory if used
I<the second time> on the same $buf and/or %hash.
So, if you want to collect memory info in a cycle, you may call
$#buf = 999;
fill_mstats($_) for @buf;
mstats_fillhash(%report, 1); # Static info too
foreach (@buf) {
# Do something...
fill_mstats $_; # Collect statistic
}
foreach (@buf) {
mstats2hash($_, %report); # Preserve static info
# Do something with %report
}
=head1 EXAMPLES
The following examples don't attempt to show everything as that would be a
monumental task, and, frankly, we don't want this manpage to be an internals
document for Perl. The examples do demonstrate some basics of the raw Perl
datatypes, and should suffice to get most determined people on their way.
There are no guidewires or safety nets, nor blazed trails, so be prepared to
travel alone from this point and on and, if at all possible, don't fall into
the quicksand (it's bad for business).
Oh, one final bit of advice: take L<perlguts> with you. When you return we
expect to see it well-thumbed.
=head2 A simple scalar string
Let's begin by looking a simple scalar which is holding a string.
use Devel::Peek;
$a = 42; $a = "hello";
Dump $a;
The output:
SV = PVIV(0xbc288) at 0xbe9a8
REFCNT = 1
FLAGS = (POK,pPOK)
IV = 42
PV = 0xb2048 "hello"\0
CUR = 5
LEN = 8
This says C<$a> is an SV, a scalar. The scalar type is a PVIV, which is
capable of holding an integer (IV) and/or a string (PV) value. The scalar's
head is allocated at address 0xbe9a8, while the body is at 0xbc288.
Its reference count is 1. It has the C<POK> flag set, meaning its
current PV field is valid. Because POK is set we look at the PV item
to see what is in the scalar. The \0 at the end indicate that this
PV is properly NUL-terminated.
Note that the IV field still contains its old numeric value, but because
FLAGS doesn't have IOK set, we must ignore the IV item.
CUR indicates the number of characters in the PV. LEN indicates the
number of bytes allocated for the PV (at least one more than CUR, because
LEN includes an extra byte for the end-of-string marker, then usually
rounded up to some efficient allocation unit).
=head2 A simple scalar number
If the scalar contains a number the raw SV will be leaner.
use Devel::Peek;
$a = 42;
Dump $a;
The output:
SV = IV(0xbc818) at 0xbe9a8
REFCNT = 1
FLAGS = (IOK,pIOK)
IV = 42
This says C<$a> is an SV, a scalar. The scalar is an IV, a number. Its
reference count is 1. It has the C<IOK> flag set, meaning it is currently
being evaluated as a number. Because IOK is set we look at the IV item to
see what is in the scalar.
=head2 A simple scalar with an extra reference
If the scalar from the previous example had an extra reference:
use Devel::Peek;
$a = 42;
$b = \$a;
Dump $a;
The output:
SV = IV(0xbe860) at 0xbe9a8
REFCNT = 2
FLAGS = (IOK,pIOK)
IV = 42
Notice that this example differs from the previous example only in its
reference count. Compare this to the next example, where we dump C<$b>
instead of C<$a>.
=head2 A reference to a simple scalar
This shows what a reference looks like when it references a simple scalar.
use Devel::Peek;
$a = 42;
$b = \$a;
Dump $b;
The output:
SV = IV(0xf041c) at 0xbe9a0
REFCNT = 1
FLAGS = (ROK)
RV = 0xbab08
SV = IV(0xbe860) at 0xbe9a8
REFCNT = 2
FLAGS = (IOK,pIOK)
IV = 42
Starting from the top, this says C<$b> is an SV. The scalar is an IV,
which is capable of holding an integer or reference value.
It has the C<ROK> flag set, meaning it is a reference (rather than an
integer or string). Notice that Dump
follows the reference and shows us what C<$b> was referencing. We see the
same C<$a> that we found in the previous example.
Note that the value of C<RV> coincides with the numbers we see when we
stringify $b. The addresses inside IV() are addresses of
C<X***> structures which hold the current state of an C<SV>. This
address may change during lifetime of an SV.
=head2 A reference to an array
This shows what a reference to an array looks like.
use Devel::Peek;
$a = [42];
Dump $a;
The output:
SV = IV(0xc85998) at 0xc859a8
REFCNT = 1
FLAGS = (ROK)
RV = 0xc70de8
SV = PVAV(0xc71e10) at 0xc70de8
REFCNT = 1
FLAGS = ()
ARRAY = 0xc7e820
FILL = 0
MAX = 0
FLAGS = (REAL)
Elt No. 0
SV = IV(0xc70f88) at 0xc70f98
REFCNT = 1
FLAGS = (IOK,pIOK)
IV = 42
This says C<$a> is a reference (ROK), which points to
another SV which is a PVAV, an array. The array has one element,
element zero, which is another SV. The field C<FILL> above indicates
the last element in the array, similar to C<$#$a>.
If C<$a> pointed to an array of two elements then we would see the
following.
use Devel::Peek 'Dump';
$a = [42,24];
Dump $a;
The output:
SV = IV(0x158c998) at 0x158c9a8
REFCNT = 1
FLAGS = (ROK)
RV = 0x1577de8
SV = PVAV(0x1578e10) at 0x1577de8
REFCNT = 1
FLAGS = ()
ARRAY = 0x1585820
FILL = 1
MAX = 1
FLAGS = (REAL)
Elt No. 0
SV = IV(0x1577f88) at 0x1577f98
REFCNT = 1
FLAGS = (IOK,pIOK)
IV = 42
Elt No. 1
SV = IV(0x158be88) at 0x158be98
REFCNT = 1
FLAGS = (IOK,pIOK)
IV = 24
Note that C<Dump> will not report I<all> the elements in the array,
only several first (depending on how deep it already went into the
report tree).
=head2 A reference to a hash
The following shows the raw form of a reference to a hash.
use Devel::Peek;
$a = {hello=>42};
Dump $a;
The output:
SV = IV(0x55cb50b50fb0) at 0x55cb50b50fc0
REFCNT = 1
FLAGS = (ROK)
RV = 0x55cb50b2b758
SV = PVHV(0x55cb50b319c0) at 0x55cb50b2b758
REFCNT = 1
FLAGS = (SHAREKEYS)
ARRAY = 0x55cb50b941a0 (0:7, 1:1)
hash quality = 100.0%
KEYS = 1
FILL = 1
MAX = 7
Elt "hello" HASH = 0x3128ece4
SV = IV(0x55cb50b464f8) at 0x55cb50b46508
REFCNT = 1
FLAGS = (IOK,pIOK)
IV = 42
This shows C<$a> is a reference pointing to an SV. That SV is a PVHV, a hash.
The "quality" of a hash is defined as the total number of comparisons needed
to access every element once, relative to the expected number needed for a
random hash. The value can go over 100%.
The total number of comparisons is equal to the sum of the squares of the
number of entries in each bucket. For a random hash of C<<n>> keys into
C<<k>> buckets, the expected value is:
n + n(n-1)/2k
=head2 Dumping a large array or hash
The C<Dump()> function, by default, dumps up to 4 elements from a
toplevel array or hash. This number can be increased by supplying a
second argument to the function.
use Devel::Peek;
$a = [10,11,12,13,14];
Dump $a;
Notice that C<Dump()> prints only elements 10 through 13 in the above code.
The following code will print all of the elements.
use Devel::Peek 'Dump';
$a = [10,11,12,13,14];
Dump $a, 5;
=head2 A reference to an SV which holds a C pointer
This is what you really need to know as an XS programmer, of course. When
an XSUB returns a pointer to a C structure that pointer is stored in an SV
and a reference to that SV is placed on the XSUB stack. So the output from
an XSUB which uses something like the T_PTROBJ map might look something like
this:
SV = IV(0xf381c) at 0xc859a8
REFCNT = 1
FLAGS = (ROK)
RV = 0xb8ad8
SV = PVMG(0xbb3c8) at 0xc859a0
REFCNT = 1
FLAGS = (OBJECT,IOK,pIOK)
IV = 729160
NV = 0
PV = 0
STASH = 0xc1d10 "CookBookB::Opaque"
This shows that we have an SV which is a reference, which points at another
SV. In this case that second SV is a PVMG, a blessed scalar. Because it is
blessed it has the C<OBJECT> flag set. Note that an SV which holds a C
pointer also has the C<IOK> flag set. The C<STASH> is set to the package
name which this SV was blessed into.
The output from an XSUB which uses something like the T_PTRREF map, which
doesn't bless the object, might look something like this:
SV = IV(0xf381c) at 0xc859a8
REFCNT = 1
FLAGS = (ROK)
RV = 0xb8ad8
SV = PVMG(0xbb3c8) at 0xc859a0
REFCNT = 1
FLAGS = (IOK,pIOK)
IV = 729160
NV = 0
PV = 0
=head2 A reference to a subroutine
Looks like this:
SV = IV(0x24d2dd8) at 0x24d2de8
REFCNT = 1
FLAGS = (TEMP,ROK)
RV = 0x24e79d8
SV = PVCV(0x24e5798) at 0x24e79d8
REFCNT = 2
FLAGS = ()
COMP_STASH = 0x22c9c50 "main"
START = 0x22eed60 ===> 0
ROOT = 0x22ee490
GVGV::GV = 0x22de9d8 "MY" :: "top_targets"
FILE = "(eval 5)"
DEPTH = 0
FLAGS = 0x0
OUTSIDE_SEQ = 93
PADLIST = 0x22e9ed8
PADNAME = 0x22e9ec0(0x22eed00) PAD = 0x22e9ea8(0x22eecd0)
OUTSIDE = 0x22c9fb0 (MAIN)
This shows that
=over 4
=item *
the subroutine is not an XSUB (since C<START> and C<ROOT> are
non-zero, and C<XSUB> is not listed, and is thus null);
=item *
that it was compiled in the package C<main>;
=item *
under the name C<MY::top_targets>;
=item *
inside a 5th eval in the program;
=item *
it is not currently executed (because C<DEPTH> is 0);
=item *
it has no prototype (C<PROTOTYPE> field is missing).
=back
=head1 EXPORTS
C<Dump>, C<mstat>, C<DeadCode>, C<DumpArray>, C<DumpWithOP> and
C<DumpProg>, C<fill_mstats>, C<mstats_fillhash>, C<mstats2hash> by
default. Additionally available C<SvREFCNT>, C<SvREFCNT_inc> and
C<SvREFCNT_dec>.
=head1 BUGS
Readers have been known to skip important parts of L<perlguts>, causing much
frustration for all.
=head1 AUTHOR
Ilya Zakharevich ilya@math.ohio-state.edu
Copyright (c) 1995-98 Ilya Zakharevich. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
Author of this software makes no claim whatsoever about suitability,
reliability, edability, editability or usability of this product, and
should not be kept liable for any damage resulting from the use of
it. If you can use it, you are in luck, if not, I should not be kept
responsible. Keep a handy copy of your backup tape at hand.
=head1 SEE ALSO
L<perlguts>, and L<perlguts>, again.
=cut

View File

@@ -0,0 +1,388 @@
package Digest::MD5;
use strict;
use warnings;
our $VERSION = '2.58_01';
require Exporter;
*import = \&Exporter::import;
our @EXPORT_OK = qw(md5 md5_hex md5_base64);
our @ISA;
eval {
require Digest::base;
@ISA = qw/Digest::base/;
};
if ($@) {
my $err = $@;
*add_bits = sub { die $err };
}
eval {
require XSLoader;
XSLoader::load('Digest::MD5', $VERSION);
};
if ($@) {
my $olderr = $@;
eval {
# Try to load the pure perl version
require Digest::Perl::MD5;
Digest::Perl::MD5->import(qw(md5 md5_hex md5_base64));
unshift(@ISA, "Digest::Perl::MD5"); # make OO interface work
};
if ($@) {
# restore the original error
die $olderr;
}
}
else {
*reset = \&new;
}
1;
__END__
=head1 NAME
Digest::MD5 - Perl interface to the MD5 Algorithm
=head1 SYNOPSIS
# Functional style
use Digest::MD5 qw(md5 md5_hex md5_base64);
$digest = md5($data);
$digest = md5_hex($data);
$digest = md5_base64($data);
# OO style
use Digest::MD5;
$ctx = Digest::MD5->new;
$ctx->add($data);
$ctx->addfile($file_handle);
$digest = $ctx->digest;
$digest = $ctx->hexdigest;
$digest = $ctx->b64digest;
=head1 DESCRIPTION
The C<Digest::MD5> module allows you to use the RSA Data Security
Inc. MD5 Message Digest algorithm from within Perl programs. The
algorithm takes as input a message of arbitrary length and produces as
output a 128-bit "fingerprint" or "message digest" of the input.
Note that the MD5 algorithm is not as strong as it used to be. It has
since 2005 been easy to generate different messages that produce the
same MD5 digest. It still seems hard to generate messages that
produce a given digest, but it is probably wise to move to stronger
algorithms for applications that depend on the digest to uniquely identify
a message.
The C<Digest::MD5> module provide a procedural interface for simple
use, as well as an object oriented interface that can handle messages
of arbitrary length and which can read files directly.
=head1 FUNCTIONS
The following functions are provided by the C<Digest::MD5> module.
None of these functions are exported by default.
=over 4
=item md5($data,...)
This function will concatenate all arguments, calculate the MD5 digest
of this "message", and return it in binary form. The returned string
will be 16 bytes long.
The result of md5("a", "b", "c") will be exactly the same as the
result of md5("abc").
=item md5_hex($data,...)
Same as md5(), but will return the digest in hexadecimal form. The
length of the returned string will be 32 and it will only contain
characters from this set: '0'..'9' and 'a'..'f'.
=item md5_base64($data,...)
Same as md5(), but will return the digest as a base64 encoded string.
The length of the returned string will be 22 and it will only contain
characters from this set: 'A'..'Z', 'a'..'z', '0'..'9', '+' and
'/'.
Note that the base64 encoded string returned is not padded to be a
multiple of 4 bytes long. If you want interoperability with other
base64 encoded md5 digests you might want to append the redundant
string "==" to the result.
=back
=head1 METHODS
The object oriented interface to C<Digest::MD5> is described in this
section. After a C<Digest::MD5> object has been created, you will add
data to it and finally ask for the digest in a suitable format. A
single object can be used to calculate multiple digests.
The following methods are provided:
=over 4
=item $md5 = Digest::MD5->new
The constructor returns a new C<Digest::MD5> object which encapsulate
the state of the MD5 message-digest algorithm.
If called as an instance method (i.e. $md5->new) it will just reset the
state the object to the state of a newly created object. No new
object is created in this case.
=item $md5->reset
This is just an alias for $md5->new.
=item $md5->clone
This a copy of the $md5 object. It is useful when you do not want to
destroy the digests state, but need an intermediate value of the
digest, e.g. when calculating digests iteratively on a continuous data
stream. Example:
my $md5 = Digest::MD5->new;
while (<>) {
$md5->add($_);
print "Line $.: ", $md5->clone->hexdigest, "\n";
}
=item $md5->add($data,...)
The $data provided as argument are appended to the message we
calculate the digest for. The return value is the $md5 object itself.
All these lines will have the same effect on the state of the $md5
object:
$md5->add("a"); $md5->add("b"); $md5->add("c");
$md5->add("a")->add("b")->add("c");
$md5->add("a", "b", "c");
$md5->add("abc");
=item $md5->addfile($io_handle)
The $io_handle will be read until EOF and its content appended to the
message we calculate the digest for. The return value is the $md5
object itself.
The addfile() method will croak() if it fails reading data for some
reason. If it croaks it is unpredictable what the state of the $md5
object will be in. The addfile() method might have been able to read
the file partially before it failed. It is probably wise to discard
or reset the $md5 object if this occurs.
In most cases you want to make sure that the $io_handle is in
C<binmode> before you pass it as argument to the addfile() method.
=item $md5->add_bits($data, $nbits)
=item $md5->add_bits($bitstring)
Since the MD5 algorithm is byte oriented you might only add bits as
multiples of 8, so you probably want to just use add() instead. The
add_bits() method is provided for compatibility with other digest
implementations. See L<Digest> for description of the arguments
that add_bits() take.
=item $md5->digest
Return the binary digest for the message. The returned string will be
16 bytes long.
Note that the C<digest> operation is effectively a destructive,
read-once operation. Once it has been performed, the C<Digest::MD5>
object is automatically C<reset> and can be used to calculate another
digest value. Call $md5->clone->digest if you want to calculate the
digest without resetting the digest state.
=item $md5->hexdigest
Same as $md5->digest, but will return the digest in hexadecimal
form. The length of the returned string will be 32 and it will only
contain characters from this set: '0'..'9' and 'a'..'f'.
=item $md5->b64digest
Same as $md5->digest, but will return the digest as a base64 encoded
string. The length of the returned string will be 22 and it will only
contain characters from this set: 'A'..'Z', 'a'..'z', '0'..'9', '+'
and '/'.
The base64 encoded string returned is not padded to be a multiple of 4
bytes long. If you want interoperability with other base64 encoded
md5 digests you might want to append the string "==" to the result.
=item @ctx = $md5->context
=item $md5->context(@ctx)
Saves or restores the internal state.
When called with no arguments, returns a list:
number of blocks processed,
a 16-byte internal state buffer,
then optionally up to 63 bytes of unprocessed data if there are any.
When passed those same arguments, restores the state.
This is only useful for specialised operations.
=back
=head1 EXAMPLES
The simplest way to use this library is to import the md5_hex()
function (or one of its cousins):
use Digest::MD5 qw(md5_hex);
print "Digest is ", md5_hex("foobarbaz"), "\n";
The above example would print out the message:
Digest is 6df23dc03f9b54cc38a0fc1483df6e21
The same checksum can also be calculated in OO style:
use Digest::MD5;
$md5 = Digest::MD5->new;
$md5->add('foo', 'bar');
$md5->add('baz');
$digest = $md5->hexdigest;
print "Digest is $digest\n";
With OO style, you can break the message arbitrarily. This means that we
are no longer limited to have space for the whole message in memory, i.e.
we can handle messages of any size.
This is useful when calculating checksum for files:
use Digest::MD5;
my $filename = shift || "/etc/passwd";
open (my $fh, '<', $filename) or die "Can't open '$filename': $!";
binmode($fh);
$md5 = Digest::MD5->new;
while (<$fh>) {
$md5->add($_);
}
close($fh);
print $md5->b64digest, " $filename\n";
Or we can use the addfile method for more efficient reading of
the file:
use Digest::MD5;
my $filename = shift || "/etc/passwd";
open (my $fh, '<', $filename) or die "Can't open '$filename': $!";
binmode ($fh);
print Digest::MD5->new->addfile($fh)->hexdigest, " $filename\n";
Since the MD5 algorithm is only defined for strings of bytes, it can not be
used on strings that contains chars with ordinal number above 255 (Unicode
strings). The MD5 functions and methods will croak if you try to feed them
such input data:
use Digest::MD5 qw(md5_hex);
my $str = "abc\x{300}";
print md5_hex($str), "\n"; # croaks
# Wide character in subroutine entry
What you can do is calculate the MD5 checksum of the UTF-8
representation of such strings. This is achieved by filtering the
string through encode_utf8() function:
use Digest::MD5 qw(md5_hex);
use Encode qw(encode_utf8);
my $str = "abc\x{300}";
print md5_hex(encode_utf8($str)), "\n";
# 8c2d46911f3f5a326455f0ed7a8ed3b3
=head1 SEE ALSO
L<Digest>,
L<Digest::MD2>,
L<Digest::SHA>,
L<Digest::HMAC>
L<md5sum(1)>
RFC 1321
http://en.wikipedia.org/wiki/MD5
The paper "How to Break MD5 and Other Hash Functions" by Xiaoyun Wang
and Hongbo Yu.
=head1 COPYRIGHT
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
Copyright 1998-2003 Gisle Aas.
Copyright 1995-1996 Neil Winton.
Copyright 1991-1992 RSA Data Security, Inc.
The MD5 algorithm is defined in RFC 1321. This implementation is
derived from the reference C code in RFC 1321 which is covered by
the following copyright statement:
=over 4
=item
Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
rights reserved.
License to copy and use this software is granted provided that it
is identified as the "RSA Data Security, Inc. MD5 Message-Digest
Algorithm" in all material mentioning or referencing this software
or this function.
License is also granted to make and use derivative works provided
that such works are identified as "derived from the RSA Data
Security, Inc. MD5 Message-Digest Algorithm" in all material
mentioning or referencing the derived work.
RSA Data Security, Inc. makes no representations concerning either
the merchantability of this software or the suitability of this
software for any particular purpose. It is provided "as is"
without express or implied warranty of any kind.
These notices must be retained in any copies of any part of this
documentation and/or software.
=back
This copyright does not prohibit distribution of any version of Perl
containing this extension under the terms of the GNU or Artistic
licenses.
=head1 AUTHORS
The original C<MD5> interface was written by Neil Winton
(C<N.Winton@axion.bt.co.uk>).
The C<Digest::MD5> module is written by Gisle Aas <gisle@ActiveState.com>.
=cut

View File

@@ -0,0 +1,830 @@
package Digest::SHA;
require 5.003000;
use strict;
use warnings;
use vars qw($VERSION @ISA @EXPORT_OK $errmsg);
use Fcntl qw(O_RDONLY O_RDWR);
use Cwd qw(getcwd);
use integer;
$VERSION = '6.04';
require Exporter;
@ISA = qw(Exporter);
@EXPORT_OK = qw(
$errmsg
hmac_sha1 hmac_sha1_base64 hmac_sha1_hex
hmac_sha224 hmac_sha224_base64 hmac_sha224_hex
hmac_sha256 hmac_sha256_base64 hmac_sha256_hex
hmac_sha384 hmac_sha384_base64 hmac_sha384_hex
hmac_sha512 hmac_sha512_base64 hmac_sha512_hex
hmac_sha512224 hmac_sha512224_base64 hmac_sha512224_hex
hmac_sha512256 hmac_sha512256_base64 hmac_sha512256_hex
sha1 sha1_base64 sha1_hex
sha224 sha224_base64 sha224_hex
sha256 sha256_base64 sha256_hex
sha384 sha384_base64 sha384_hex
sha512 sha512_base64 sha512_hex
sha512224 sha512224_base64 sha512224_hex
sha512256 sha512256_base64 sha512256_hex);
# Inherit from Digest::base if possible
eval {
require Digest::base;
push(@ISA, 'Digest::base');
};
# The following routines aren't time-critical, so they can be left in Perl
sub new {
my($class, $alg) = @_;
$alg =~ s/\D+//g if defined $alg;
if (ref($class)) { # instance method
if (!defined($alg) || ($alg == $class->algorithm)) {
sharewind($class);
return($class);
}
return shainit($class, $alg) ? $class : undef;
}
$alg = 1 unless defined $alg;
return $class->newSHA($alg);
}
BEGIN { *reset = \&new }
sub add_bits {
my($self, $data, $nbits) = @_;
unless (defined $nbits) {
$nbits = length($data);
$data = pack("B*", $data);
}
$nbits = length($data) * 8 if $nbits > length($data) * 8;
shawrite($data, $nbits, $self);
return($self);
}
sub _bail {
my $msg = shift;
$errmsg = $!;
$msg .= ": $!";
require Carp;
Carp::croak($msg);
}
{
my $_can_T_filehandle;
sub _istext {
local *FH = shift;
my $file = shift;
if (! defined $_can_T_filehandle) {
local $^W = 0;
my $istext = eval { -T FH };
$_can_T_filehandle = $@ ? 0 : 1;
return $_can_T_filehandle ? $istext : -T $file;
}
return $_can_T_filehandle ? -T FH : -T $file;
}
}
sub _addfile {
my ($self, $handle) = @_;
my $n;
my $buf = "";
while (($n = read($handle, $buf, 4096))) {
$self->add($buf);
}
_bail("Read failed") unless defined $n;
$self;
}
sub addfile {
my ($self, $file, $mode) = @_;
return(_addfile($self, $file)) unless ref(\$file) eq 'SCALAR';
$mode = defined($mode) ? $mode : "";
my ($binary, $UNIVERSAL, $BITS) =
map { $_ eq $mode } ("b", "U", "0");
## Always interpret "-" to mean STDIN; otherwise use
## sysopen to handle full range of POSIX file names.
## If $file is a directory, force an EISDIR error
## by attempting to open with mode O_RDWR
local *FH;
if ($file eq '-') {
if (-d STDIN) {
sysopen(FH, getcwd(), O_RDWR)
or _bail('Open failed');
}
open(FH, '< -')
or _bail('Open failed');
}
else {
sysopen(FH, $file, -d $file ? O_RDWR : O_RDONLY)
or _bail('Open failed');
}
if ($BITS) {
my ($n, $buf) = (0, "");
while (($n = read(FH, $buf, 4096))) {
$buf =~ tr/01//cd;
$self->add_bits($buf);
}
_bail("Read failed") unless defined $n;
close(FH);
return($self);
}
binmode(FH) if $binary || $UNIVERSAL;
if ($UNIVERSAL && _istext(*FH, $file)) {
$self->_addfileuniv(*FH);
}
else { $self->_addfilebin(*FH) }
close(FH);
$self;
}
sub getstate {
my $self = shift;
my $alg = $self->algorithm or return;
my $state = $self->_getstate or return;
my $nD = $alg <= 256 ? 8 : 16;
my $nH = $alg <= 256 ? 32 : 64;
my $nB = $alg <= 256 ? 64 : 128;
my($H, $block, $blockcnt, $lenhh, $lenhl, $lenlh, $lenll) =
$state =~ /^(.{$nH})(.{$nB})(.{4})(.{4})(.{4})(.{4})(.{4})$/s;
for ($alg, $H, $block, $blockcnt, $lenhh, $lenhl, $lenlh, $lenll) {
return unless defined $_;
}
my @s = ();
push(@s, "alg:" . $alg);
push(@s, "H:" . join(":", unpack("H*", $H) =~ /.{$nD}/g));
push(@s, "block:" . join(":", unpack("H*", $block) =~ /.{2}/g));
push(@s, "blockcnt:" . unpack("N", $blockcnt));
push(@s, "lenhh:" . unpack("N", $lenhh));
push(@s, "lenhl:" . unpack("N", $lenhl));
push(@s, "lenlh:" . unpack("N", $lenlh));
push(@s, "lenll:" . unpack("N", $lenll));
join("\n", @s) . "\n";
}
sub putstate {
my($class, $state) = @_;
my %s = ();
for (split(/\n/, $state)) {
s/^\s+//;
s/\s+$//;
next if (/^(#|$)/);
my @f = split(/[:\s]+/);
my $tag = shift(@f);
$s{$tag} = join('', @f);
}
# H and block may contain arbitrary values, but check everything else
grep { $_ == $s{'alg'} } (1,224,256,384,512,512224,512256) or return;
length($s{'H'}) == ($s{'alg'} <= 256 ? 64 : 128) or return;
length($s{'block'}) == ($s{'alg'} <= 256 ? 128 : 256) or return;
{
no integer;
for (qw(blockcnt lenhh lenhl lenlh lenll)) {
0 <= $s{$_} or return;
$s{$_} <= 4294967295 or return;
}
$s{'blockcnt'} < ($s{'alg'} <= 256 ? 512 : 1024) or return;
}
my $packed_state = (
pack("H*", $s{'H'}) .
pack("H*", $s{'block'}) .
pack("N", $s{'blockcnt'}) .
pack("N", $s{'lenhh'}) .
pack("N", $s{'lenhl'}) .
pack("N", $s{'lenlh'}) .
pack("N", $s{'lenll'})
);
return $class->new($s{'alg'})->_putstate($packed_state);
}
sub dump {
my $self = shift;
my $file = shift;
my $state = $self->getstate or return;
$file = "-" if (!defined($file) || $file eq "");
local *FH;
open(FH, "> $file") or return;
print FH $state;
close(FH);
return($self);
}
sub load {
my $class = shift;
my $file = shift;
$file = "-" if (!defined($file) || $file eq "");
local *FH;
open(FH, "< $file") or return;
my $str = join('', <FH>);
close(FH);
$class->putstate($str);
}
eval {
require XSLoader;
XSLoader::load('Digest::SHA', $VERSION);
1;
} or do {
require DynaLoader;
push @ISA, 'DynaLoader';
Digest::SHA->bootstrap($VERSION);
};
1;
__END__
=head1 NAME
Digest::SHA - Perl extension for SHA-1/224/256/384/512
=head1 SYNOPSIS
In programs:
# Functional interface
use Digest::SHA qw(sha1 sha1_hex sha1_base64 ...);
$digest = sha1($data);
$digest = sha1_hex($data);
$digest = sha1_base64($data);
$digest = sha256($data);
$digest = sha384_hex($data);
$digest = sha512_base64($data);
# Object-oriented
use Digest::SHA;
$sha = Digest::SHA->new($alg);
$sha->add($data); # feed data into stream
$sha->addfile(*F);
$sha->addfile($filename);
$sha->add_bits($bits);
$sha->add_bits($data, $nbits);
$sha_copy = $sha->clone; # make copy of digest object
$state = $sha->getstate; # save current state to string
$sha->putstate($state); # restore previous $state
$digest = $sha->digest; # compute digest
$digest = $sha->hexdigest;
$digest = $sha->b64digest;
From the command line:
$ shasum files
$ shasum --help
=head1 SYNOPSIS (HMAC-SHA)
# Functional interface only
use Digest::SHA qw(hmac_sha1 hmac_sha1_hex ...);
$digest = hmac_sha1($data, $key);
$digest = hmac_sha224_hex($data, $key);
$digest = hmac_sha256_base64($data, $key);
=head1 ABSTRACT
Digest::SHA is a complete implementation of the NIST Secure Hash Standard.
It gives Perl programmers a convenient way to calculate SHA-1, SHA-224,
SHA-256, SHA-384, SHA-512, SHA-512/224, and SHA-512/256 message digests.
The module can handle all types of input, including partial-byte data.
=head1 DESCRIPTION
Digest::SHA is written in C for speed. If your platform lacks a
C compiler, you can install the functionally equivalent (but much
slower) L<Digest::SHA::PurePerl> module.
The programming interface is easy to use: it's the same one found
in CPAN's L<Digest> module. So, if your applications currently
use L<Digest::MD5> and you'd prefer the stronger security of SHA,
it's a simple matter to convert them.
The interface provides two ways to calculate digests: all-at-once,
or in stages. To illustrate, the following short program computes
the SHA-256 digest of "hello world" using each approach:
use Digest::SHA qw(sha256_hex);
$data = "hello world";
@frags = split(//, $data);
# all-at-once (Functional style)
$digest1 = sha256_hex($data);
# in-stages (OOP style)
$state = Digest::SHA->new(256);
for (@frags) { $state->add($_) }
$digest2 = $state->hexdigest;
print $digest1 eq $digest2 ?
"whew!\n" : "oops!\n";
To calculate the digest of an n-bit message where I<n> is not a
multiple of 8, use the I<add_bits()> method. For example, consider
the 446-bit message consisting of the bit-string "110" repeated
148 times, followed by "11". Here's how to display its SHA-1
digest:
use Digest::SHA;
$bits = "110" x 148 . "11";
$sha = Digest::SHA->new(1)->add_bits($bits);
print $sha->hexdigest, "\n";
Note that for larger bit-strings, it's more efficient to use the
two-argument version I<add_bits($data, $nbits)>, where I<$data> is
in the customary packed binary format used for Perl strings.
The module also lets you save intermediate SHA states to a string. The
I<getstate()> method generates portable, human-readable text describing
the current state of computation. You can subsequently restore that
state with I<putstate()> to resume where the calculation left off.
To see what a state description looks like, just run the following:
use Digest::SHA;
print Digest::SHA->new->add("Shaw" x 1962)->getstate;
As an added convenience, the Digest::SHA module offers routines to
calculate keyed hashes using the HMAC-SHA-1/224/256/384/512
algorithms. These services exist in functional form only, and
mimic the style and behavior of the I<sha()>, I<sha_hex()>, and
I<sha_base64()> functions.
# Test vector from draft-ietf-ipsec-ciph-sha-256-01.txt
use Digest::SHA qw(hmac_sha256_hex);
print hmac_sha256_hex("Hi There", chr(0x0b) x 32), "\n";
=head1 UNICODE AND SIDE EFFECTS
Perl supports Unicode strings as of version 5.6. Such strings may
contain wide characters, namely, characters whose ordinal values are
greater than 255. This can cause problems for digest algorithms such
as SHA that are specified to operate on sequences of bytes.
The rule by which Digest::SHA handles a Unicode string is easy
to state, but potentially confusing to grasp: the string is interpreted
as a sequence of byte values, where each byte value is equal to the
ordinal value (viz. code point) of its corresponding Unicode character.
That way, the Unicode string 'abc' has exactly the same digest value as
the ordinary string 'abc'.
Since a wide character does not fit into a byte, the Digest::SHA
routines croak if they encounter one. Whereas if a Unicode string
contains no wide characters, the module accepts it quite happily.
The following code illustrates the two cases:
$str1 = pack('U*', (0..255));
print sha1_hex($str1); # ok
$str2 = pack('U*', (0..256));
print sha1_hex($str2); # croaks
Be aware that the digest routines silently convert UTF-8 input into its
equivalent byte sequence in the native encoding (cf. utf8::downgrade).
This side effect influences only the way Perl stores the data internally,
but otherwise leaves the actual value of the data intact.
=head1 NIST STATEMENT ON SHA-1
NIST acknowledges that the work of Prof. Xiaoyun Wang constitutes a
practical collision attack on SHA-1. Therefore, NIST encourages the
rapid adoption of the SHA-2 hash functions (e.g. SHA-256) for applications
requiring strong collision resistance, such as digital signatures.
ref. L<http://csrc.nist.gov/groups/ST/hash/statement.html>
=head1 PADDING OF BASE64 DIGESTS
By convention, CPAN Digest modules do B<not> pad their Base64 output.
Problems can occur when feeding such digests to other software that
expects properly padded Base64 encodings.
For the time being, any necessary padding must be done by the user.
Fortunately, this is a simple operation: if the length of a Base64-encoded
digest isn't a multiple of 4, simply append "=" characters to the end
of the digest until it is:
while (length($b64_digest) % 4) {
$b64_digest .= '=';
}
To illustrate, I<sha256_base64("abc")> is computed to be
ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0
which has a length of 43. So, the properly padded version is
ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0=
=head1 EXPORT
None by default.
=head1 EXPORTABLE FUNCTIONS
Provided your C compiler supports a 64-bit type (e.g. the I<long
long> of C99, or I<__int64> used by Microsoft C/C++), all of these
functions will be available for use. Otherwise, you won't be able
to perform the SHA-384 and SHA-512 transforms, both of which require
64-bit operations.
I<Functional style>
=over 4
=item B<sha1($data, ...)>
=item B<sha224($data, ...)>
=item B<sha256($data, ...)>
=item B<sha384($data, ...)>
=item B<sha512($data, ...)>
=item B<sha512224($data, ...)>
=item B<sha512256($data, ...)>
Logically joins the arguments into a single string, and returns
its SHA-1/224/256/384/512 digest encoded as a binary string.
=item B<sha1_hex($data, ...)>
=item B<sha224_hex($data, ...)>
=item B<sha256_hex($data, ...)>
=item B<sha384_hex($data, ...)>
=item B<sha512_hex($data, ...)>
=item B<sha512224_hex($data, ...)>
=item B<sha512256_hex($data, ...)>
Logically joins the arguments into a single string, and returns
its SHA-1/224/256/384/512 digest encoded as a hexadecimal string.
=item B<sha1_base64($data, ...)>
=item B<sha224_base64($data, ...)>
=item B<sha256_base64($data, ...)>
=item B<sha384_base64($data, ...)>
=item B<sha512_base64($data, ...)>
=item B<sha512224_base64($data, ...)>
=item B<sha512256_base64($data, ...)>
Logically joins the arguments into a single string, and returns
its SHA-1/224/256/384/512 digest encoded as a Base64 string.
It's important to note that the resulting string does B<not> contain
the padding characters typical of Base64 encodings. This omission is
deliberate, and is done to maintain compatibility with the family of
CPAN Digest modules. See L</"PADDING OF BASE64 DIGESTS"> for details.
=back
I<OOP style>
=over 4
=item B<new($alg)>
Returns a new Digest::SHA object. Allowed values for I<$alg> are 1,
224, 256, 384, 512, 512224, or 512256. It's also possible to use
common string representations of the algorithm (e.g. "sha256",
"SHA-384"). If the argument is missing, SHA-1 will be used by
default.
Invoking I<new> as an instance method will reset the object to the
initial state associated with I<$alg>. If the argument is missing,
the object will continue using the same algorithm that was selected
at creation.
=item B<reset($alg)>
This method has exactly the same effect as I<new($alg)>. In fact,
I<reset> is just an alias for I<new>.
=item B<hashsize>
Returns the number of digest bits for this object. The values are
160, 224, 256, 384, 512, 224, and 256 for SHA-1, SHA-224, SHA-256,
SHA-384, SHA-512, SHA-512/224 and SHA-512/256, respectively.
=item B<algorithm>
Returns the digest algorithm for this object. The values are 1,
224, 256, 384, 512, 512224, and 512256 for SHA-1, SHA-224, SHA-256,
SHA-384, SHA-512, SHA-512/224, and SHA-512/256, respectively.
=item B<clone>
Returns a duplicate copy of the object.
=item B<add($data, ...)>
Logically joins the arguments into a single string, and uses it to
update the current digest state. In other words, the following
statements have the same effect:
$sha->add("a"); $sha->add("b"); $sha->add("c");
$sha->add("a")->add("b")->add("c");
$sha->add("a", "b", "c");
$sha->add("abc");
The return value is the updated object itself.
=item B<add_bits($data, $nbits)>
=item B<add_bits($bits)>
Updates the current digest state by appending bits to it. The
return value is the updated object itself.
The first form causes the most-significant I<$nbits> of I<$data>
to be appended to the stream. The I<$data> argument is in the
customary binary format used for Perl strings.
The second form takes an ASCII string of "0" and "1" characters as
its argument. It's equivalent to
$sha->add_bits(pack("B*", $bits), length($bits));
So, the following two statements do the same thing:
$sha->add_bits("111100001010");
$sha->add_bits("\xF0\xA0", 12);
Note that SHA-1 and SHA-2 use I<most-significant-bit ordering>
for their internal state. This means that
$sha3->add_bits("110");
is equivalent to
$sha3->add_bits("1")->add_bits("1")->add_bits("0");
=item B<addfile(*FILE)>
Reads from I<FILE> until EOF, and appends that data to the current
state. The return value is the updated object itself.
=item B<addfile($filename [, $mode])>
Reads the contents of I<$filename>, and appends that data to the current
state. The return value is the updated object itself.
By default, I<$filename> is simply opened and read; no special modes
or I/O disciplines are used. To change this, set the optional I<$mode>
argument to one of the following values:
"b" read file in binary mode
"U" use universal newlines
"0" use BITS mode
The "U" mode is modeled on Python's "Universal Newlines" concept, whereby
DOS and Mac OS line terminators are converted internally to UNIX newlines
before processing. This ensures consistent digest values when working
simultaneously across multiple file systems. B<The "U" mode influences
only text files>, namely those passing Perl's I<-T> test; binary files
are processed with no translation whatsoever.
The BITS mode ("0") interprets the contents of I<$filename> as a logical
stream of bits, where each ASCII '0' or '1' character represents a 0 or
1 bit, respectively. All other characters are ignored. This provides
a convenient way to calculate the digest values of partial-byte data
by using files, rather than having to write separate programs employing
the I<add_bits> method.
=item B<getstate>
Returns a string containing a portable, human-readable representation
of the current SHA state.
=item B<putstate($str)>
Returns a Digest::SHA object representing the SHA state contained
in I<$str>. The format of I<$str> matches the format of the output
produced by method I<getstate>. If called as a class method, a new
object is created; if called as an instance method, the object is reset
to the state contained in I<$str>.
=item B<dump($filename)>
Writes the output of I<getstate> to I<$filename>. If the argument is
missing, or equal to the empty string, the state information will be
written to STDOUT.
=item B<load($filename)>
Returns a Digest::SHA object that results from calling I<putstate> on
the contents of I<$filename>. If the argument is missing, or equal to
the empty string, the state information will be read from STDIN.
=item B<digest>
Returns the digest encoded as a binary string.
Note that the I<digest> method is a read-once operation. Once it
has been performed, the Digest::SHA object is automatically reset
in preparation for calculating another digest value. Call
I<$sha-E<gt>clone-E<gt>digest> if it's necessary to preserve the
original digest state.
=item B<hexdigest>
Returns the digest encoded as a hexadecimal string.
Like I<digest>, this method is a read-once operation. Call
I<$sha-E<gt>clone-E<gt>hexdigest> if it's necessary to preserve
the original digest state.
=item B<b64digest>
Returns the digest encoded as a Base64 string.
Like I<digest>, this method is a read-once operation. Call
I<$sha-E<gt>clone-E<gt>b64digest> if it's necessary to preserve
the original digest state.
It's important to note that the resulting string does B<not> contain
the padding characters typical of Base64 encodings. This omission is
deliberate, and is done to maintain compatibility with the family of
CPAN Digest modules. See L</"PADDING OF BASE64 DIGESTS"> for details.
=back
I<HMAC-SHA-1/224/256/384/512>
=over 4
=item B<hmac_sha1($data, $key)>
=item B<hmac_sha224($data, $key)>
=item B<hmac_sha256($data, $key)>
=item B<hmac_sha384($data, $key)>
=item B<hmac_sha512($data, $key)>
=item B<hmac_sha512224($data, $key)>
=item B<hmac_sha512256($data, $key)>
Returns the HMAC-SHA-1/224/256/384/512 digest of I<$data>/I<$key>,
with the result encoded as a binary string. Multiple I<$data>
arguments are allowed, provided that I<$key> is the last argument
in the list.
=item B<hmac_sha1_hex($data, $key)>
=item B<hmac_sha224_hex($data, $key)>
=item B<hmac_sha256_hex($data, $key)>
=item B<hmac_sha384_hex($data, $key)>
=item B<hmac_sha512_hex($data, $key)>
=item B<hmac_sha512224_hex($data, $key)>
=item B<hmac_sha512256_hex($data, $key)>
Returns the HMAC-SHA-1/224/256/384/512 digest of I<$data>/I<$key>,
with the result encoded as a hexadecimal string. Multiple I<$data>
arguments are allowed, provided that I<$key> is the last argument
in the list.
=item B<hmac_sha1_base64($data, $key)>
=item B<hmac_sha224_base64($data, $key)>
=item B<hmac_sha256_base64($data, $key)>
=item B<hmac_sha384_base64($data, $key)>
=item B<hmac_sha512_base64($data, $key)>
=item B<hmac_sha512224_base64($data, $key)>
=item B<hmac_sha512256_base64($data, $key)>
Returns the HMAC-SHA-1/224/256/384/512 digest of I<$data>/I<$key>,
with the result encoded as a Base64 string. Multiple I<$data>
arguments are allowed, provided that I<$key> is the last argument
in the list.
It's important to note that the resulting string does B<not> contain
the padding characters typical of Base64 encodings. This omission is
deliberate, and is done to maintain compatibility with the family of
CPAN Digest modules. See L</"PADDING OF BASE64 DIGESTS"> for details.
=back
=head1 SEE ALSO
L<Digest>, L<Digest::SHA::PurePerl>
The Secure Hash Standard (Draft FIPS PUB 180-4) can be found at:
L<http://csrc.nist.gov/publications/drafts/fips180-4/Draft-FIPS180-4_Feb2011.pdf>
The Keyed-Hash Message Authentication Code (HMAC):
L<http://csrc.nist.gov/publications/fips/fips198/fips-198a.pdf>
=head1 AUTHOR
Mark Shelor <mshelor@cpan.org>
=head1 ACKNOWLEDGMENTS
The author is particularly grateful to
Gisle Aas
H. Merijn Brand
Sean Burke
Chris Carey
Alexandr Ciornii
Chris David
Jim Doble
Thomas Drugeon
Julius Duque
Jeffrey Friedl
Robert Gilmour
Brian Gladman
Jarkko Hietaniemi
Adam Kennedy
Mark Lawrence
Andy Lester
Alex Muntada
Steve Peters
Chris Skiscim
Martin Thurn
Gunnar Wolf
Adam Woodbury
"who by trained skill rescued life from such great billows and such thick
darkness and moored it in so perfect a calm and in so brilliant a light"
- Lucretius
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2003-2022 Mark Shelor
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
L<perlartistic>
=cut

View File

@@ -0,0 +1,776 @@
# Generated from DynaLoader_pm.PL, this file is unique for every OS
use strict;
package DynaLoader;
# And Gandalf said: 'Many folk like to know beforehand what is to
# be set on the table; but those who have laboured to prepare the
# feast like to keep their secret; for wonder makes the words of
# praise louder.'
# (Quote from Tolkien suggested by Anno Siegel.)
#
# See pod text at end of file for documentation.
# See also ext/DynaLoader/README in source tree for other information.
#
# Tim.Bunce@ig.co.uk, August 1994
BEGIN {
our $VERSION = '1.54';
}
# Note: in almost any other piece of code "our" would have been a better
# option than "use vars", but DynaLoader's bootstrap files need the
# side effect of the variable being declared in any scope whose current
# package is DynaLoader, not just the current lexical one.
use vars qw(@dl_library_path @dl_resolve_using @dl_require_symbols
$dl_debug @dl_librefs @dl_modules @dl_shared_objects
$dl_dlext $dl_so $dlsrc @args $module @dirs $file $bscode);
use Config;
# enable debug/trace messages from DynaLoader perl code
$dl_debug = $ENV{PERL_DL_DEBUG} || 0 unless defined $dl_debug;
#
# Flags to alter dl_load_file behaviour. Assigned bits:
# 0x01 make symbols available for linking later dl_load_file's.
# (only known to work on Solaris 2 using dlopen(RTLD_GLOBAL))
# (ignored under VMS; effect is built-in to image linking)
# (ignored under Android; the linker always uses RTLD_LOCAL)
#
# This is called as a class method $module->dl_load_flags. The
# definition here will be inherited and result on "default" loading
# behaviour unless a sub-class of DynaLoader defines its own version.
#
sub dl_load_flags { 0x00 }
($dl_dlext, $dl_so, $dlsrc) = @Config::Config{qw(dlext so dlsrc)};
# Some systems need special handling to expand file specifications
# (VMS support by Charles Bailey <bailey@HMIVAX.HUMGEN.UPENN.EDU>)
# See dl_expandspec() for more details. Should be harmless but
# inefficient to define on systems that don't need it.
my $do_expand = 0;
@dl_require_symbols = (); # names of symbols we need
@dl_library_path = (); # path to look for files
#XSLoader.pm may have added elements before we were required
#@dl_shared_objects = (); # shared objects for symbols we have
#@dl_librefs = (); # things we have loaded
#@dl_modules = (); # Modules we have loaded
# Initialise @dl_library_path with the 'standard' library path
# for this platform as determined by Configure.
push(@dl_library_path, split(' ', $Config::Config{libpth}));
my $ldlibpthname = $Config::Config{ldlibpthname};
my $ldlibpthname_defined = defined $Config::Config{ldlibpthname};
my $pthsep = $Config::Config{path_sep};
# Add to @dl_library_path any extra directories we can gather from environment
# during runtime.
if ($ldlibpthname_defined &&
exists $ENV{$ldlibpthname}) {
push(@dl_library_path, split(/$pthsep/, $ENV{$ldlibpthname}));
}
# E.g. HP-UX supports both its native SHLIB_PATH *and* LD_LIBRARY_PATH.
if ($ldlibpthname_defined &&
$ldlibpthname ne 'LD_LIBRARY_PATH' &&
exists $ENV{LD_LIBRARY_PATH}) {
push(@dl_library_path, split(/$pthsep/, $ENV{LD_LIBRARY_PATH}));
}
# No prizes for guessing why we don't say 'bootstrap DynaLoader;' here.
# NOTE: All dl_*.xs (including dl_none.xs) define a dl_error() XSUB
boot_DynaLoader('DynaLoader') if defined(&boot_DynaLoader) &&
!defined(&dl_error);
if ($dl_debug) {
print STDERR "DynaLoader.pm loaded (@INC, @dl_library_path)\n";
print STDERR "DynaLoader not linked into this perl\n"
unless defined(&boot_DynaLoader);
}
1; # End of main code
sub croak { require Carp; Carp::croak(@_) }
sub bootstrap_inherit {
my $module = $_[0];
no strict qw/refs vars/;
local *isa = *{"$module\::ISA"};
local @isa = (@isa, 'DynaLoader');
# Cannot goto due to delocalization. Will report errors on a wrong line?
bootstrap(@_);
}
sub bootstrap {
# use local vars to enable $module.bs script to edit values
local(@args) = @_;
local($module) = $args[0];
local(@dirs, $file);
unless ($module) {
require Carp;
Carp::confess("Usage: DynaLoader::bootstrap(module)");
}
# A common error on platforms which don't support dynamic loading.
# Since it's fatal and potentially confusing we give a detailed message.
croak("Can't load module $module, dynamic loading not available in this perl.\n".
" (You may need to build a new perl executable which either supports\n".
" dynamic loading or has the $module module statically linked into it.)\n")
unless defined(&dl_load_file);
my @modparts = split(/::/,$module);
my $modfname = $modparts[-1];
my $modfname_orig = $modfname; # For .bs file search
# Some systems have restrictions on files names for DLL's etc.
# mod2fname returns appropriate file base name (typically truncated)
# It may also edit @modparts if required.
$modfname = &mod2fname(\@modparts) if defined &mod2fname;
my $modpname = join('/',@modparts);
print STDERR "DynaLoader::bootstrap for $module ",
"(auto/$modpname/$modfname.$dl_dlext)\n"
if $dl_debug;
my $dir;
foreach (@INC) {
$dir = "$_/auto/$modpname";
next unless -d $dir; # skip over uninteresting directories
# check for common cases to avoid autoload of dl_findfile
my $try = "$dir/$modfname.$dl_dlext";
last if $file = ($do_expand) ? dl_expandspec($try) : ((-f $try) && $try);
# no luck here, save dir for possible later dl_findfile search
push @dirs, $dir;
}
# last resort, let dl_findfile have a go in all known locations
$file = dl_findfile(map("-L$_",@dirs,@INC), $modfname) unless $file;
croak("Can't locate loadable object for module $module in \@INC (\@INC contains: @INC)")
unless $file; # wording similar to error from 'require'
my $bootname = "boot_$module";
$bootname =~ s/\W/_/g;
@dl_require_symbols = ($bootname);
# Execute optional '.bootstrap' perl script for this module.
# The .bs file can be used to configure @dl_resolve_using etc to
# match the needs of the individual module on this architecture.
# N.B. The .bs file does not following the naming convention used
# by mod2fname.
my $bs = "$dir/$modfname_orig";
$bs =~ s/(\.\w+)?(;\d*)?$/\.bs/; # look for .bs 'beside' the library
if (-s $bs) { # only read file if it's not empty
print STDERR "BS: $bs ($^O, $dlsrc)\n" if $dl_debug;
eval { local @INC = ('.'); do $bs; };
warn "$bs: $@\n" if $@;
}
my $boot_symbol_ref;
# Many dynamic extension loading problems will appear to come from
# this section of code: XYZ failed at line 123 of DynaLoader.pm.
# Often these errors are actually occurring in the initialisation
# C code of the extension XS file. Perl reports the error as being
# in this perl code simply because this was the last perl code
# it executed.
my $flags = $module->dl_load_flags;
my $libref = dl_load_file($file, $flags) or
croak("Can't load '$file' for module $module: ".dl_error());
push(@dl_librefs,$libref); # record loaded object
$boot_symbol_ref = dl_find_symbol($libref, $bootname) or
croak("Can't find '$bootname' symbol in $file\n");
push(@dl_modules, $module); # record loaded module
boot:
my $xs = dl_install_xsub("${module}::bootstrap", $boot_symbol_ref, $file);
# See comment block above
push(@dl_shared_objects, $file); # record files loaded
&$xs(@args);
}
sub dl_findfile {
# This function does not automatically consider the architecture
# or the perl library auto directories.
my (@args) = @_;
my (@dirs, $dir); # which directories to search
my (@found); # full paths to real files we have found
#my $dl_ext= 'dll'; # $Config::Config{'dlext'} suffix for perl extensions
#my $dl_so = 'dll'; # $Config::Config{'so'} suffix for shared libraries
print STDERR "dl_findfile(@args)\n" if $dl_debug;
# accumulate directories but process files as they appear
arg: foreach(@args) {
# Special fast case: full filepath requires no search
if (m:/: && -f $_) {
push(@found,$_);
last arg unless wantarray;
next;
}
# Deal with directories first:
# Using a -L prefix is the preferred option (faster and more robust)
if ( s{^-L}{} ) { push(@dirs, $_); next; }
# Otherwise we try to try to spot directories by a heuristic
# (this is a more complicated issue than it first appears)
if (m:/: && -d $_) { push(@dirs, $_); next; }
# Only files should get this far...
my(@names, $name); # what filenames to look for
if ( s{^-l}{} ) { # convert -lname to appropriate library name
push(@names, "lib$_.$dl_so", "lib$_.a");
} else { # Umm, a bare name. Try various alternatives:
# these should be ordered with the most likely first
push(@names,"$_.$dl_dlext") unless m/\.$dl_dlext$/o;
push(@names,"$_.$dl_so") unless m/\.$dl_so$/o;
push(@names,"msys-$_.$dl_so") unless m:/:;
push(@names,"lib$_.$dl_so") unless m:/:;
push(@names, $_);
}
my $dirsep = '/';
foreach $dir (@dirs, @dl_library_path) {
next unless -d $dir;
foreach $name (@names) {
my($file) = "$dir$dirsep$name";
print STDERR " checking in $dir for $name\n" if $dl_debug;
if ($do_expand && ($file = dl_expandspec($file))) {
push @found, $file;
next arg; # no need to look any further
}
elsif (-f $file) {
push(@found, $file);
next arg; # no need to look any further
}
}
}
}
if ($dl_debug) {
foreach(@dirs) {
print STDERR " dl_findfile ignored non-existent directory: $_\n" unless -d $_;
}
print STDERR "dl_findfile found: @found\n";
}
return $found[0] unless wantarray;
@found;
}
sub dl_expandspec {
my($spec) = @_;
# Optional function invoked if DynaLoader.pm sets $do_expand.
# Most systems do not require or use this function.
# Some systems may implement it in the dl_*.xs file in which case
# this Perl version should be excluded at build time.
# This function is designed to deal with systems which treat some
# 'filenames' in a special way. For example VMS 'Logical Names'
# (something like unix environment variables - but different).
# This function should recognise such names and expand them into
# full file paths.
# Must return undef if $spec is invalid or file does not exist.
my $file = $spec; # default output to input
return undef unless -f $file;
print STDERR "dl_expandspec($spec) => $file\n" if $dl_debug;
$file;
}
sub dl_find_symbol_anywhere
{
my $sym = shift;
my $libref;
foreach $libref (@dl_librefs) {
my $symref = dl_find_symbol($libref,$sym,1);
return $symref if $symref;
}
return undef;
}
__END__
=head1 NAME
DynaLoader - Dynamically load C libraries into Perl code
=head1 SYNOPSIS
package YourPackage;
require DynaLoader;
@ISA = qw(... DynaLoader ...);
__PACKAGE__->bootstrap;
# optional method for 'global' loading
sub dl_load_flags { 0x01 }
=head1 DESCRIPTION
This document defines a standard generic interface to the dynamic
linking mechanisms available on many platforms. Its primary purpose is
to implement automatic dynamic loading of Perl modules.
This document serves as both a specification for anyone wishing to
implement the DynaLoader for a new platform and as a guide for
anyone wishing to use the DynaLoader directly in an application.
The DynaLoader is designed to be a very simple high-level
interface that is sufficiently general to cover the requirements
of SunOS, HP-UX, Linux, VMS and other platforms.
It is also hoped that the interface will cover the needs of OS/2, NT
etc and also allow pseudo-dynamic linking (using C<ld -A> at runtime).
It must be stressed that the DynaLoader, by itself, is practically
useless for accessing non-Perl libraries because it provides almost no
Perl-to-C 'glue'. There is, for example, no mechanism for calling a C
library function or supplying arguments. A C::DynaLib module
is available from CPAN sites which performs that function for some
common system types. And since the year 2000, there's also Inline::C,
a module that allows you to write Perl subroutines in C. Also available
from your local CPAN site.
DynaLoader Interface Summary
@dl_library_path
@dl_resolve_using
@dl_require_symbols
$dl_debug
$dl_dlext
@dl_librefs
@dl_modules
@dl_shared_objects
Implemented in:
bootstrap($modulename) Perl
@filepaths = dl_findfile(@names) Perl
$flags = $modulename->dl_load_flags Perl
$symref = dl_find_symbol_anywhere($symbol) Perl
$libref = dl_load_file($filename, $flags) C
$status = dl_unload_file($libref) C
$symref = dl_find_symbol($libref, $symbol) C
@symbols = dl_undef_symbols() C
dl_install_xsub($name, $symref [, $filename]) C
$message = dl_error C
=over 4
=item @dl_library_path
The standard/default list of directories in which dl_findfile() will
search for libraries etc. Directories are searched in order:
$dl_library_path[0], [1], ... etc
@dl_library_path is initialised to hold the list of 'normal' directories
(F</usr/lib>, etc) determined by B<Configure> (C<$Config{'libpth'}>). This should
ensure portability across a wide range of platforms.
@dl_library_path should also be initialised with any other directories
that can be determined from the environment at runtime (such as
LD_LIBRARY_PATH for SunOS).
After initialisation @dl_library_path can be manipulated by an
application using push and unshift before calling dl_findfile().
Unshift can be used to add directories to the front of the search order
either to save search time or to override libraries with the same name
in the 'normal' directories.
The load function that dl_load_file() calls may require an absolute
pathname. The dl_findfile() function and @dl_library_path can be
used to search for and return the absolute pathname for the
library/object that you wish to load.
=item @dl_resolve_using
A list of additional libraries or other shared objects which can be
used to resolve any undefined symbols that might be generated by a
later call to load_file().
This is only required on some platforms which do not handle dependent
libraries automatically. For example the Socket Perl extension
library (F<auto/Socket/Socket.so>) contains references to many socket
functions which need to be resolved when it's loaded. Most platforms
will automatically know where to find the 'dependent' library (e.g.,
F</usr/lib/libsocket.so>). A few platforms need to be told the
location of the dependent library explicitly. Use @dl_resolve_using
for this.
Example usage:
@dl_resolve_using = dl_findfile('-lsocket');
=item @dl_require_symbols
A list of one or more symbol names that are in the library/object file
to be dynamically loaded. This is only required on some platforms.
=item @dl_librefs
An array of the handles returned by successful calls to dl_load_file(),
made by bootstrap, in the order in which they were loaded.
Can be used with dl_find_symbol() to look for a symbol in any of
the loaded files.
=item @dl_modules
An array of module (package) names that have been bootstrap'ed.
=item @dl_shared_objects
An array of file names for the shared objects that were loaded.
=item dl_error()
Syntax:
$message = dl_error();
Error message text from the last failed DynaLoader function. Note
that, similar to errno in unix, a successful function call does not
reset this message.
Implementations should detect the error as soon as it occurs in any of
the other functions and save the corresponding message for later
retrieval. This will avoid problems on some platforms (such as SunOS)
where the error message is very temporary (e.g., dlerror()).
=item $dl_debug
Internal debugging messages are enabled when $dl_debug is set true.
Currently setting $dl_debug only affects the Perl side of the
DynaLoader. These messages should help an application developer to
resolve any DynaLoader usage problems.
$dl_debug is set to C<$ENV{'PERL_DL_DEBUG'}> if defined.
For the DynaLoader developer/porter there is a similar debugging
variable added to the C code (see dlutils.c) and enabled if Perl was
built with the B<-DDEBUGGING> flag. This can also be set via the
PERL_DL_DEBUG environment variable. Set to 1 for minimal information or
higher for more.
=item $dl_dlext
When specified (localised) in a module's F<.pm> file, indicates the extension
which the module's loadable object will have. For example:
local $DynaLoader::dl_dlext = 'unusual_ext';
would indicate that the module's loadable object has an extension of
C<unusual_ext> instead of the more usual C<$Config{dlext}>. NOTE: This also
requires that the module's F<Makefile.PL> specify (in C<WriteMakefile()>):
DLEXT => 'unusual_ext',
=item dl_findfile()
Syntax:
@filepaths = dl_findfile(@names)
Determine the full paths (including file suffix) of one or more
loadable files given their generic names and optionally one or more
directories. Searches directories in @dl_library_path by default and
returns an empty list if no files were found.
Names can be specified in a variety of platform independent forms. Any
names in the form B<-lname> are converted into F<libname.*>, where F<.*> is
an appropriate suffix for the platform.
If a name does not already have a suitable prefix and/or suffix then
the corresponding file will be searched for by trying combinations of
prefix and suffix appropriate to the platform: "$name.o", "lib$name.*"
and "$name".
If any directories are included in @names they are searched before
@dl_library_path. Directories may be specified as B<-Ldir>. Any other
names are treated as filenames to be searched for.
Using arguments of the form C<-Ldir> and C<-lname> is recommended.
Example:
@dl_resolve_using = dl_findfile(qw(-L/usr/5lib -lposix));
=item dl_expandspec()
Syntax:
$filepath = dl_expandspec($spec)
Some unusual systems, such as VMS, require special filename handling in
order to deal with symbolic names for files (i.e., VMS's Logical Names).
To support these systems a dl_expandspec() function can be implemented
either in the F<dl_*.xs> file or code can be added to the dl_expandspec()
function in F<DynaLoader.pm>. See F<DynaLoader_pm.PL> for more information.
=item dl_load_file()
Syntax:
$libref = dl_load_file($filename, $flags)
Dynamically load $filename, which must be the path to a shared object
or library. An opaque 'library reference' is returned as a handle for
the loaded object. Returns undef on error.
The $flags argument to alters dl_load_file behaviour.
Assigned bits:
0x01 make symbols available for linking later dl_load_file's.
(only known to work on Solaris 2 using dlopen(RTLD_GLOBAL))
(ignored under VMS; this is a normal part of image linking)
(On systems that provide a handle for the loaded object such as SunOS
and HPUX, $libref will be that handle. On other systems $libref will
typically be $filename or a pointer to a buffer containing $filename.
The application should not examine or alter $libref in any way.)
This is the function that does the real work. It should use the
current values of @dl_require_symbols and @dl_resolve_using if required.
SunOS: dlopen($filename)
HP-UX: shl_load($filename)
Linux: dld_create_reference(@dl_require_symbols); dld_link($filename)
VMS: lib$find_image_symbol($filename,$dl_require_symbols[0])
(The dlopen() function is also used by Solaris and some versions of
Linux, and is a common choice when providing a "wrapper" on other
mechanisms as is done in the OS/2 port.)
=item dl_unload_file()
Syntax:
$status = dl_unload_file($libref)
Dynamically unload $libref, which must be an opaque 'library reference' as
returned from dl_load_file. Returns one on success and zero on failure.
This function is optional and may not necessarily be provided on all platforms.
If it is defined and perl is compiled with the C macro C<DL_UNLOAD_ALL_AT_EXIT>
defined, then it is called automatically when the interpreter exits for
every shared object or library loaded by DynaLoader::bootstrap. All such
library references are stored in @dl_librefs by DynaLoader::Bootstrap as it
loads the libraries. The files are unloaded in last-in, first-out order.
This unloading is usually necessary when embedding a shared-object perl (e.g.
one configured with -Duseshrplib) within a larger application, and the perl
interpreter is created and destroyed several times within the lifetime of the
application. In this case it is possible that the system dynamic linker will
unload and then subsequently reload the shared libperl without relocating any
references to it from any files DynaLoaded by the previous incarnation of the
interpreter. As a result, any shared objects opened by DynaLoader may point to
a now invalid 'ghost' of the libperl shared object, causing apparently random
memory corruption and crashes. This behaviour is most commonly seen when using
Apache and mod_perl built with the APXS mechanism.
SunOS: dlclose($libref)
HP-UX: ???
Linux: ???
VMS: ???
(The dlclose() function is also used by Solaris and some versions of
Linux, and is a common choice when providing a "wrapper" on other
mechanisms as is done in the OS/2 port.)
=item dl_load_flags()
Syntax:
$flags = dl_load_flags $modulename;
Designed to be a method call, and to be overridden by a derived class
(i.e. a class which has DynaLoader in its @ISA). The definition in
DynaLoader itself returns 0, which produces standard behavior from
dl_load_file().
=item dl_find_symbol()
Syntax:
$symref = dl_find_symbol($libref, $symbol)
Return the address of the symbol $symbol or C<undef> if not found. If the
target system has separate functions to search for symbols of different
types then dl_find_symbol() should search for function symbols first and
then other types.
The exact manner in which the address is returned in $symref is not
currently defined. The only initial requirement is that $symref can
be passed to, and understood by, dl_install_xsub().
SunOS: dlsym($libref, $symbol)
HP-UX: shl_findsym($libref, $symbol)
Linux: dld_get_func($symbol) and/or dld_get_symbol($symbol)
VMS: lib$find_image_symbol($libref,$symbol)
=item dl_find_symbol_anywhere()
Syntax:
$symref = dl_find_symbol_anywhere($symbol)
Applies dl_find_symbol() to the members of @dl_librefs and returns
the first match found.
=item dl_undef_symbols()
Example
@symbols = dl_undef_symbols()
Return a list of symbol names which remain undefined after load_file().
Returns C<()> if not known. Don't worry if your platform does not provide
a mechanism for this. Most do not need it and hence do not provide it,
they just return an empty list.
=item dl_install_xsub()
Syntax:
dl_install_xsub($perl_name, $symref [, $filename])
Create a new Perl external subroutine named $perl_name using $symref as
a pointer to the function which implements the routine. This is simply
a direct call to newXS()/newXS_flags(). Returns a reference to the installed
function.
The $filename parameter is used by Perl to identify the source file for
the function if required by die(), caller() or the debugger. If
$filename is not defined then "DynaLoader" will be used.
=item bootstrap()
Syntax:
bootstrap($module [...])
This is the normal entry point for automatic dynamic loading in Perl.
It performs the following actions:
=over 8
=item *
locates an auto/$module directory by searching @INC
=item *
uses dl_findfile() to determine the filename to load
=item *
sets @dl_require_symbols to C<("boot_$module")>
=item *
executes an F<auto/$module/$module.bs> file if it exists
(typically used to add to @dl_resolve_using any files which
are required to load the module on the current platform)
=item *
calls dl_load_flags() to determine how to load the file.
=item *
calls dl_load_file() to load the file
=item *
calls dl_undef_symbols() and warns if any symbols are undefined
=item *
calls dl_find_symbol() for "boot_$module"
=item *
calls dl_install_xsub() to install it as "${module}::bootstrap"
=item *
calls &{"${module}::bootstrap"} to bootstrap the module (actually
it uses the function reference returned by dl_install_xsub for speed)
=back
All arguments to bootstrap() are passed to the module's bootstrap function.
The default code generated by F<xsubpp> expects $module [, $version]
If the optional $version argument is not given, it defaults to
C<$XS_VERSION // $VERSION> in the module's symbol table. The default code
compares the Perl-space version with the version of the compiled XS code,
and croaks with an error if they do not match.
=back
=head1 AUTHOR
Tim Bunce, 11 August 1994.
This interface is based on the work and comments of (in no particular
order): Larry Wall, Robert Sanders, Dean Roehrich, Jeff Okamoto, Anno
Siegel, Thomas Neumann, Paul Marquess, Charles Bailey, myself and others.
Larry Wall designed the elegant inherited bootstrap mechanism and
implemented the first Perl 5 dynamic loader using it.
Solaris global loading added by Nick Ing-Simmons with design/coding
assistance from Tim Bunce, January 1996.
=cut

View File

@@ -0,0 +1,976 @@
#
# $Id: Encode.pm,v 3.19 2022/08/04 04:42:30 dankogai Exp $
#
package Encode;
use strict;
use warnings;
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
our $VERSION;
BEGIN {
$VERSION = sprintf "%d.%02d", q$Revision: 3.19 $ =~ /(\d+)/g;
require XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
}
use Exporter 5.57 'import';
use Carp ();
our @CARP_NOT = qw(Encode::Encoder);
# Public, encouraged API is exported by default
our @EXPORT = qw(
decode decode_utf8 encode encode_utf8 str2bytes bytes2str
encodings find_encoding find_mime_encoding clone_encoding
);
our @FB_FLAGS = qw(
DIE_ON_ERR WARN_ON_ERR RETURN_ON_ERR LEAVE_SRC
PERLQQ HTMLCREF XMLCREF STOP_AT_PARTIAL
);
our @FB_CONSTS = qw(
FB_DEFAULT FB_CROAK FB_QUIET FB_WARN
FB_PERLQQ FB_HTMLCREF FB_XMLCREF
);
our @EXPORT_OK = (
qw(
_utf8_off _utf8_on define_encoding from_to is_16bit is_8bit
is_utf8 perlio_ok resolve_alias utf8_downgrade utf8_upgrade
),
@FB_FLAGS, @FB_CONSTS,
);
our %EXPORT_TAGS = (
all => [ @EXPORT, @EXPORT_OK ],
default => [ @EXPORT ],
fallbacks => [ @FB_CONSTS ],
fallback_all => [ @FB_CONSTS, @FB_FLAGS ],
);
# Documentation moved after __END__ for speed - NI-S
our $ON_EBCDIC = ( ord("A") == 193 );
use Encode::Alias ();
use Encode::MIME::Name;
use Storable;
# Make a %Encoding package variable to allow a certain amount of cheating
our %Encoding;
our %ExtModule;
require Encode::Config;
# See
# https://bugzilla.redhat.com/show_bug.cgi?id=435505#c2
# to find why sig handlers inside eval{} are disabled.
eval {
local $SIG{__DIE__};
local $SIG{__WARN__};
local @INC = @INC;
pop @INC if @INC && $INC[-1] eq '.';
require Encode::ConfigLocal;
};
sub encodings {
my %enc;
my $arg = $_[1] || '';
if ( $arg eq ":all" ) {
%enc = ( %Encoding, %ExtModule );
}
else {
%enc = %Encoding;
for my $mod ( map { m/::/ ? $_ : "Encode::$_" } @_ ) {
DEBUG and warn $mod;
for my $enc ( keys %ExtModule ) {
$ExtModule{$enc} eq $mod and $enc{$enc} = $mod;
}
}
}
return sort { lc $a cmp lc $b }
grep { !/^(?:Internal|Unicode|Guess)$/o } keys %enc;
}
sub perlio_ok {
my $obj = ref( $_[0] ) ? $_[0] : find_encoding( $_[0] );
$obj->can("perlio_ok") and return $obj->perlio_ok();
return 0; # safety net
}
sub define_encoding {
my $obj = shift;
my $name = shift;
$Encoding{$name} = $obj;
my $lc = lc($name);
define_alias( $lc => $obj ) unless $lc eq $name;
while (@_) {
my $alias = shift;
define_alias( $alias, $obj );
}
my $class = ref($obj);
push @Encode::CARP_NOT, $class unless grep { $_ eq $class } @Encode::CARP_NOT;
push @Encode::Encoding::CARP_NOT, $class unless grep { $_ eq $class } @Encode::Encoding::CARP_NOT;
return $obj;
}
sub getEncoding {
my ( $class, $name, $skip_external ) = @_;
defined($name) or return;
$name =~ s/\s+//g; # https://rt.cpan.org/Ticket/Display.html?id=65796
ref($name) && $name->can('renew') and return $name;
exists $Encoding{$name} and return $Encoding{$name};
my $lc = lc $name;
exists $Encoding{$lc} and return $Encoding{$lc};
my $oc = $class->find_alias($name);
defined($oc) and return $oc;
$lc ne $name and $oc = $class->find_alias($lc);
defined($oc) and return $oc;
unless ($skip_external) {
if ( my $mod = $ExtModule{$name} || $ExtModule{$lc} ) {
$mod =~ s,::,/,g;
$mod .= '.pm';
eval { require $mod; };
exists $Encoding{$name} and return $Encoding{$name};
}
}
return;
}
# HACK: These two functions must be defined in Encode and because of
# cyclic dependency between Encode and Encode::Alias, Exporter does not work
sub find_alias {
goto &Encode::Alias::find_alias;
}
sub define_alias {
goto &Encode::Alias::define_alias;
}
sub find_encoding($;$) {
my ( $name, $skip_external ) = @_;
return __PACKAGE__->getEncoding( $name, $skip_external );
}
sub find_mime_encoding($;$) {
my ( $mime_name, $skip_external ) = @_;
my $name = Encode::MIME::Name::get_encode_name( $mime_name );
return find_encoding( $name, $skip_external );
}
sub resolve_alias($) {
my $obj = find_encoding(shift);
defined $obj and return $obj->name;
return;
}
sub clone_encoding($) {
my $obj = find_encoding(shift);
ref $obj or return;
return Storable::dclone($obj);
}
onBOOT;
if ($ON_EBCDIC) {
package Encode::UTF_EBCDIC;
use parent 'Encode::Encoding';
my $obj = bless { Name => "UTF_EBCDIC" } => "Encode::UTF_EBCDIC";
Encode::define_encoding($obj, 'Unicode');
sub decode {
my ( undef, $str, $chk ) = @_;
my $res = '';
for ( my $i = 0 ; $i < length($str) ; $i++ ) {
$res .=
chr(
utf8::unicode_to_native( ord( substr( $str, $i, 1 ) ) )
);
}
$_[1] = '' if $chk;
return $res;
}
sub encode {
my ( undef, $str, $chk ) = @_;
my $res = '';
for ( my $i = 0 ; $i < length($str) ; $i++ ) {
$res .=
chr(
utf8::native_to_unicode( ord( substr( $str, $i, 1 ) ) )
);
}
$_[1] = '' if $chk;
return $res;
}
}
{
# https://rt.cpan.org/Public/Bug/Display.html?id=103253
package Encode::XS;
use parent 'Encode::Encoding';
}
{
package Encode::utf8;
use parent 'Encode::Encoding';
my %obj = (
'utf8' => { Name => 'utf8' },
'utf-8-strict' => { Name => 'utf-8-strict', strict_utf8 => 1 }
);
for ( keys %obj ) {
bless $obj{$_} => __PACKAGE__;
Encode::define_encoding( $obj{$_} => $_ );
}
sub cat_decode {
# ($obj, $dst, $src, $pos, $trm, $chk)
# currently ignores $chk
my ( undef, undef, undef, $pos, $trm ) = @_;
my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ];
use bytes;
if ( ( my $npos = index( $$rsrc, $trm, $pos ) ) >= 0 ) {
$$rdst .=
substr( $$rsrc, $pos, $npos - $pos + length($trm) );
$$rpos = $npos + length($trm);
return 1;
}
$$rdst .= substr( $$rsrc, $pos );
$$rpos = length($$rsrc);
return '';
}
}
1;
__END__
=head1 NAME
Encode - character encodings in Perl
=head1 SYNOPSIS
use Encode qw(decode encode);
$characters = decode('UTF-8', $octets, Encode::FB_CROAK);
$octets = encode('UTF-8', $characters, Encode::FB_CROAK);
=head2 Table of Contents
Encode consists of a collection of modules whose details are too extensive
to fit in one document. This one itself explains the top-level APIs
and general topics at a glance. For other topics and more details,
see the documentation for these modules:
=over 2
=item L<Encode::Alias> - Alias definitions to encodings
=item L<Encode::Encoding> - Encode Implementation Base Class
=item L<Encode::Supported> - List of Supported Encodings
=item L<Encode::CN> - Simplified Chinese Encodings
=item L<Encode::JP> - Japanese Encodings
=item L<Encode::KR> - Korean Encodings
=item L<Encode::TW> - Traditional Chinese Encodings
=back
=head1 DESCRIPTION
The C<Encode> module provides the interface between Perl strings
and the rest of the system. Perl strings are sequences of
I<characters>.
The repertoire of characters that Perl can represent is a superset of those
defined by the Unicode Consortium. On most platforms the ordinal
values of a character as returned by C<ord(I<S>)> is the I<Unicode
codepoint> for that character. The exceptions are platforms where
the legacy encoding is some variant of EBCDIC rather than a superset
of ASCII; see L<perlebcdic>.
During recent history, data is moved around a computer in 8-bit chunks,
often called "bytes" but also known as "octets" in standards documents.
Perl is widely used to manipulate data of many types: not only strings of
characters representing human or computer languages, but also "binary"
data, being the machine's representation of numbers, pixels in an image, or
just about anything.
When Perl is processing "binary data", the programmer wants Perl to
process "sequences of bytes". This is not a problem for Perl: because a
byte has 256 possible values, it easily fits in Perl's much larger
"logical character".
This document mostly explains the I<how>. L<perlunitut> and L<perlunifaq>
explain the I<why>.
=head2 TERMINOLOGY
=head3 character
A character in the range 0 .. 2**32-1 (or more);
what Perl's strings are made of.
=head3 byte
A character in the range 0..255;
a special case of a Perl character.
=head3 octet
8 bits of data, with ordinal values 0..255;
term for bytes passed to or from a non-Perl context, such as a disk file,
standard I/O stream, database, command-line argument, environment variable,
socket etc.
=head1 THE PERL ENCODING API
=head2 Basic methods
=head3 encode
$octets = encode(ENCODING, STRING[, CHECK])
Encodes the scalar value I<STRING> from Perl's internal form into
I<ENCODING> and returns a sequence of octets. I<ENCODING> can be either a
canonical name or an alias. For encoding names and aliases, see
L</"Defining Aliases">. For CHECK, see L</"Handling Malformed Data">.
B<CAVEAT>: the input scalar I<STRING> might be modified in-place depending
on what is set in CHECK. See L</LEAVE_SRC> if you want your inputs to be
left unchanged.
For example, to convert a string from Perl's internal format into
ISO-8859-1, also known as Latin1:
$octets = encode("iso-8859-1", $string);
B<CAVEAT>: When you run C<$octets = encode("UTF-8", $string)>, then
$octets I<might not be equal to> $string. Though both contain the
same data, the UTF8 flag for $octets is I<always> off. When you
encode anything, the UTF8 flag on the result is always off, even when it
contains a completely valid UTF-8 string. See L</"The UTF8 flag"> below.
If the $string is C<undef>, then C<undef> is returned.
C<str2bytes> may be used as an alias for C<encode>.
=head3 decode
$string = decode(ENCODING, OCTETS[, CHECK])
This function returns the string that results from decoding the scalar
value I<OCTETS>, assumed to be a sequence of octets in I<ENCODING>, into
Perl's internal form. As with encode(),
I<ENCODING> can be either a canonical name or an alias. For encoding names
and aliases, see L</"Defining Aliases">; for I<CHECK>, see L</"Handling
Malformed Data">.
B<CAVEAT>: the input scalar I<OCTETS> might be modified in-place depending
on what is set in CHECK. See L</LEAVE_SRC> if you want your inputs to be
left unchanged.
For example, to convert ISO-8859-1 data into a string in Perl's
internal format:
$string = decode("iso-8859-1", $octets);
B<CAVEAT>: When you run C<$string = decode("UTF-8", $octets)>, then $string
I<might not be equal to> $octets. Though both contain the same data, the
UTF8 flag for $string is on. See L</"The UTF8 flag">
below.
If the $string is C<undef>, then C<undef> is returned.
C<bytes2str> may be used as an alias for C<decode>.
=head3 find_encoding
[$obj =] find_encoding(ENCODING)
Returns the I<encoding object> corresponding to I<ENCODING>. Returns
C<undef> if no matching I<ENCODING> is find. The returned object is
what does the actual encoding or decoding.
$string = decode($name, $bytes);
is in fact
$string = do {
$obj = find_encoding($name);
croak qq(encoding "$name" not found) unless ref $obj;
$obj->decode($bytes);
};
with more error checking.
You can therefore save time by reusing this object as follows;
my $enc = find_encoding("iso-8859-1");
while(<>) {
my $string = $enc->decode($_);
... # now do something with $string;
}
Besides L</decode> and L</encode>, other methods are
available as well. For instance, C<name()> returns the canonical
name of the encoding object.
find_encoding("latin1")->name; # iso-8859-1
See L<Encode::Encoding> for details.
=head3 find_mime_encoding
[$obj =] find_mime_encoding(MIME_ENCODING)
Returns the I<encoding object> corresponding to I<MIME_ENCODING>. Acts
same as C<find_encoding()> but C<mime_name()> of returned object must
match to I<MIME_ENCODING>. So as opposite of C<find_encoding()>
canonical names and aliases are not used when searching for object.
find_mime_encoding("utf8"); # returns undef because "utf8" is not valid I<MIME_ENCODING>
find_mime_encoding("utf-8"); # returns encode object "utf-8-strict"
find_mime_encoding("UTF-8"); # same as "utf-8" because I<MIME_ENCODING> is case insensitive
find_mime_encoding("utf-8-strict"); returns undef because "utf-8-strict" is not valid I<MIME_ENCODING>
=head3 from_to
[$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK])
Converts I<in-place> data between two encodings. The data in $octets
must be encoded as octets and I<not> as characters in Perl's internal
format. For example, to convert ISO-8859-1 data into Microsoft's CP1250
encoding:
from_to($octets, "iso-8859-1", "cp1250");
and to convert it back:
from_to($octets, "cp1250", "iso-8859-1");
Because the conversion happens in place, the data to be
converted cannot be a string constant: it must be a scalar variable.
C<from_to()> returns the length of the converted string in octets on success,
and C<undef> on error.
B<CAVEAT>: The following operations may look the same, but are not:
from_to($data, "iso-8859-1", "UTF-8"); #1
$data = decode("iso-8859-1", $data); #2
Both #1 and #2 make $data consist of a completely valid UTF-8 string,
but only #2 turns the UTF8 flag on. #1 is equivalent to:
$data = encode("UTF-8", decode("iso-8859-1", $data));
See L</"The UTF8 flag"> below.
Also note that:
from_to($octets, $from, $to, $check);
is equivalent to:
$octets = encode($to, decode($from, $octets), $check);
Yes, it does I<not> respect the $check during decoding. It is
deliberately done that way. If you need minute control, use C<decode>
followed by C<encode> as follows:
$octets = encode($to, decode($from, $octets, $check_from), $check_to);
=head3 encode_utf8
$octets = encode_utf8($string);
B<WARNING>: L<This function can produce invalid UTF-8!|/UTF-8 vs. utf8 vs. UTF8>
Do not use it for data exchange.
Unless you want Perl's older "lax" mode, prefer
C<$octets = encode("UTF-8", $string)>.
Equivalent to C<$octets = encode("utf8", $string)>. The characters in
$string are encoded in Perl's internal format, and the result is returned
as a sequence of octets. Because all possible characters in Perl have a
(loose, not strict) utf8 representation, this function cannot fail.
=head3 decode_utf8
$string = decode_utf8($octets [, CHECK]);
B<WARNING>: L<This function accepts invalid UTF-8!|/UTF-8 vs. utf8 vs. UTF8>
Do not use it for data exchange.
Unless you want Perl's older "lax" mode, prefer
C<$string = decode("UTF-8", $octets [, CHECK])>.
Equivalent to C<$string = decode("utf8", $octets [, CHECK])>.
The sequence of octets represented by $octets is decoded
from (loose, not strict) utf8 into a sequence of logical characters.
Because not all sequences of octets are valid not strict utf8,
it is quite possible for this function to fail.
For CHECK, see L</"Handling Malformed Data">.
B<CAVEAT>: the input I<$octets> might be modified in-place depending on
what is set in CHECK. See L</LEAVE_SRC> if you want your inputs to be
left unchanged.
=head2 Listing available encodings
use Encode;
@list = Encode->encodings();
Returns a list of canonical names of available encodings that have already
been loaded. To get a list of all available encodings including those that
have not yet been loaded, say:
@all_encodings = Encode->encodings(":all");
Or you can give the name of a specific module:
@with_jp = Encode->encodings("Encode::JP");
When "C<::>" is not in the name, "C<Encode::>" is assumed.
@ebcdic = Encode->encodings("EBCDIC");
To find out in detail which encodings are supported by this package,
see L<Encode::Supported>.
=head2 Defining Aliases
To add a new alias to a given encoding, use:
use Encode;
use Encode::Alias;
define_alias(NEWNAME => ENCODING);
After that, I<NEWNAME> can be used as an alias for I<ENCODING>.
I<ENCODING> may be either the name of an encoding or an
I<encoding object>.
Before you do that, first make sure the alias is nonexistent using
C<resolve_alias()>, which returns the canonical name thereof.
For example:
Encode::resolve_alias("latin1") eq "iso-8859-1" # true
Encode::resolve_alias("iso-8859-12") # false; nonexistent
Encode::resolve_alias($name) eq $name # true if $name is canonical
C<resolve_alias()> does not need C<use Encode::Alias>; it can be
imported via C<use Encode qw(resolve_alias)>.
See L<Encode::Alias> for details.
=head2 Finding IANA Character Set Registry names
The canonical name of a given encoding does not necessarily agree with
IANA Character Set Registry, commonly seen as C<< Content-Type:
text/plain; charset=I<WHATEVER> >>. For most cases, the canonical name
works, but sometimes it does not, most notably with "utf-8-strict".
As of C<Encode> version 2.21, a new method C<mime_name()> is therefore added.
use Encode;
my $enc = find_encoding("UTF-8");
warn $enc->name; # utf-8-strict
warn $enc->mime_name; # UTF-8
See also: L<Encode::Encoding>
=head1 Encoding via PerlIO
If your perl supports C<PerlIO> (which is the default), you can use a
C<PerlIO> layer to decode and encode directly via a filehandle. The
following two examples are fully identical in functionality:
### Version 1 via PerlIO
open(INPUT, "< :encoding(shiftjis)", $infile)
|| die "Can't open < $infile for reading: $!";
open(OUTPUT, "> :encoding(euc-jp)", $outfile)
|| die "Can't open > $output for writing: $!";
while (<INPUT>) { # auto decodes $_
print OUTPUT; # auto encodes $_
}
close(INPUT) || die "can't close $infile: $!";
close(OUTPUT) || die "can't close $outfile: $!";
### Version 2 via from_to()
open(INPUT, "< :raw", $infile)
|| die "Can't open < $infile for reading: $!";
open(OUTPUT, "> :raw", $outfile)
|| die "Can't open > $output for writing: $!";
while (<INPUT>) {
from_to($_, "shiftjis", "euc-jp", 1); # switch encoding
print OUTPUT; # emit raw (but properly encoded) data
}
close(INPUT) || die "can't close $infile: $!";
close(OUTPUT) || die "can't close $outfile: $!";
In the first version above, you let the appropriate encoding layer
handle the conversion. In the second, you explicitly translate
from one encoding to the other.
Unfortunately, it may be that encodings are not C<PerlIO>-savvy. You can check
to see whether your encoding is supported by C<PerlIO> by invoking the
C<perlio_ok> method on it:
Encode::perlio_ok("hz"); # false
find_encoding("euc-cn")->perlio_ok; # true wherever PerlIO is available
use Encode qw(perlio_ok); # imported upon request
perlio_ok("euc-jp")
Fortunately, all encodings that come with C<Encode> core are C<PerlIO>-savvy
except for C<hz> and C<ISO-2022-kr>. For the gory details, see
L<Encode::Encoding> and L<Encode::PerlIO>.
=head1 Handling Malformed Data
The optional I<CHECK> argument tells C<Encode> what to do when
encountering malformed data. Without I<CHECK>, C<Encode::FB_DEFAULT>
(== 0) is assumed.
As of version 2.12, C<Encode> supports coderef values for C<CHECK>;
see below.
B<NOTE:> Not all encodings support this feature.
Some encodings ignore the I<CHECK> argument. For example,
L<Encode::Unicode> ignores I<CHECK> and it always croaks on error.
=head2 List of I<CHECK> values
=head3 FB_DEFAULT
I<CHECK> = Encode::FB_DEFAULT ( == 0)
If I<CHECK> is 0, encoding and decoding replace any malformed character
with a I<substitution character>. When you encode, I<SUBCHAR> is used.
When you decode, the Unicode REPLACEMENT CHARACTER, code point U+FFFD, is
used. If the data is supposed to be UTF-8, an optional lexical warning of
warning category C<"utf8"> is given.
=head3 FB_CROAK
I<CHECK> = Encode::FB_CROAK ( == 1)
If I<CHECK> is 1, methods immediately die with an error
message. Therefore, when I<CHECK> is 1, you should trap
exceptions with C<eval{}>, unless you really want to let it C<die>.
=head3 FB_QUIET
I<CHECK> = Encode::FB_QUIET
If I<CHECK> is set to C<Encode::FB_QUIET>, encoding and decoding immediately
return the portion of the data that has been processed so far when an
error occurs. The data argument is overwritten with everything
after that point; that is, the unprocessed portion of the data. This is
handy when you have to call C<decode> repeatedly in the case where your
source data may contain partial multi-byte character sequences,
(that is, you are reading with a fixed-width buffer). Here's some sample
code to do exactly that:
my($buffer, $string) = ("", "");
while (read($fh, $buffer, 256, length($buffer))) {
$string .= decode($encoding, $buffer, Encode::FB_QUIET);
# $buffer now contains the unprocessed partial character
}
=head3 FB_WARN
I<CHECK> = Encode::FB_WARN
This is the same as C<FB_QUIET> above, except that instead of being silent
on errors, it issues a warning. This is handy for when you are debugging.
B<CAVEAT>: All warnings from Encode module are reported, independently of
L<pragma warnings|warnings> settings. If you want to follow settings of
lexical warnings configured by L<pragma warnings|warnings> then append
also check value C<ENCODE::ONLY_PRAGMA_WARNINGS>. This value is available
since Encode version 2.99.
=head3 FB_PERLQQ FB_HTMLCREF FB_XMLCREF
=over 2
=item perlqq mode (I<CHECK> = Encode::FB_PERLQQ)
=item HTML charref mode (I<CHECK> = Encode::FB_HTMLCREF)
=item XML charref mode (I<CHECK> = Encode::FB_XMLCREF)
=back
For encodings that are implemented by the C<Encode::XS> module, C<CHECK> C<==>
C<Encode::FB_PERLQQ> puts C<encode> and C<decode> into C<perlqq> fallback mode.
When you decode, C<\xI<HH>> is inserted for a malformed character, where
I<HH> is the hex representation of the octet that could not be decoded to
utf8. When you encode, C<\x{I<HHHH>}> will be inserted, where I<HHHH> is
the Unicode code point (in any number of hex digits) of the character that
cannot be found in the character repertoire of the encoding.
The HTML/XML character reference modes are about the same. In place of
C<\x{I<HHHH>}>, HTML uses C<&#I<NNN>;> where I<NNN> is a decimal number, and
XML uses C<&#xI<HHHH>;> where I<HHHH> is the hexadecimal number.
In C<Encode> 2.10 or later, C<LEAVE_SRC> is also implied.
=head3 The bitmask
These modes are all actually set via a bitmask. Here is how the C<FB_I<XXX>>
constants are laid out. You can import the C<FB_I<XXX>> constants via
C<use Encode qw(:fallbacks)>, and you can import the generic bitmask
constants via C<use Encode qw(:fallback_all)>.
FB_DEFAULT FB_CROAK FB_QUIET FB_WARN FB_PERLQQ
DIE_ON_ERR 0x0001 X
WARN_ON_ERR 0x0002 X
RETURN_ON_ERR 0x0004 X X
LEAVE_SRC 0x0008 X
PERLQQ 0x0100 X
HTMLCREF 0x0200
XMLCREF 0x0400
=head3 LEAVE_SRC
Encode::LEAVE_SRC
If the C<Encode::LEAVE_SRC> bit is I<not> set but I<CHECK> is set, then the
source string to encode() or decode() will be overwritten in place.
If you're not interested in this, then bitwise-OR it with the bitmask.
=head2 coderef for CHECK
As of C<Encode> 2.12, C<CHECK> can also be a code reference which takes the
ordinal value of the unmapped character as an argument and returns
octets that represent the fallback character. For instance:
$ascii = encode("ascii", $utf8, sub{ sprintf "<U+%04X>", shift });
Acts like C<FB_PERLQQ> but U+I<XXXX> is used instead of C<\x{I<XXXX>}>.
Fallback for C<decode> must return decoded string (sequence of characters)
and takes a list of ordinal values as its arguments. So for
example if you wish to decode octets as UTF-8, and use ISO-8859-15 as
a fallback for bytes that are not valid UTF-8, you could write
$str = decode 'UTF-8', $octets, sub {
my $tmp = join '', map chr, @_;
return decode 'ISO-8859-15', $tmp;
};
=head1 Defining Encodings
To define a new encoding, use:
use Encode qw(define_encoding);
define_encoding($object, CANONICAL_NAME [, alias...]);
I<CANONICAL_NAME> will be associated with I<$object>. The object
should provide the interface described in L<Encode::Encoding>.
If more than two arguments are provided, additional
arguments are considered aliases for I<$object>.
See L<Encode::Encoding> for details.
=head1 The UTF8 flag
Before the introduction of Unicode support in Perl, The C<eq> operator
just compared the strings represented by two scalars. Beginning with
Perl 5.8, C<eq> compares two strings with simultaneous consideration of
I<the UTF8 flag>. To explain why we made it so, I quote from page 402 of
I<Programming Perl, 3rd ed.>
=over 2
=item Goal #1:
Old byte-oriented programs should not spontaneously break on the old
byte-oriented data they used to work on.
=item Goal #2:
Old byte-oriented programs should magically start working on the new
character-oriented data when appropriate.
=item Goal #3:
Programs should run just as fast in the new character-oriented mode
as in the old byte-oriented mode.
=item Goal #4:
Perl should remain one language, rather than forking into a
byte-oriented Perl and a character-oriented Perl.
=back
When I<Programming Perl, 3rd ed.> was written, not even Perl 5.6.0 had been
born yet, many features documented in the book remained unimplemented for a
long time. Perl 5.8 corrected much of this, and the introduction of the
UTF8 flag is one of them. You can think of there being two fundamentally
different kinds of strings and string-operations in Perl: one a
byte-oriented mode for when the internal UTF8 flag is off, and the other a
character-oriented mode for when the internal UTF8 flag is on.
This UTF8 flag is not visible in Perl scripts, exactly for the same reason
you cannot (or rather, you I<don't have to>) see whether a scalar contains
a string, an integer, or a floating-point number. But you can still peek
and poke these if you will. See the next section.
=head2 Messing with Perl's Internals
The following API uses parts of Perl's internals in the current
implementation. As such, they are efficient but may change in a future
release.
=head3 is_utf8
is_utf8(STRING [, CHECK])
[INTERNAL] Tests whether the UTF8 flag is turned on in the I<STRING>.
If I<CHECK> is true, also checks whether I<STRING> contains well-formed
UTF-8. Returns true if successful, false otherwise.
Typically only necessary for debugging and testing. Don't use this flag as
a marker to distinguish character and binary data, that should be decided
for each variable when you write your code.
B<CAVEAT>: If I<STRING> has UTF8 flag set, it does B<NOT> mean that
I<STRING> is UTF-8 encoded and vice-versa.
As of Perl 5.8.1, L<utf8> also has the C<utf8::is_utf8> function.
=head3 _utf8_on
_utf8_on(STRING)
[INTERNAL] Turns the I<STRING>'s internal UTF8 flag B<on>. The I<STRING>
is I<not> checked for containing only well-formed UTF-8. Do not use this
unless you I<know with absolute certainty> that the STRING holds only
well-formed UTF-8. Returns the previous state of the UTF8 flag (so please
don't treat the return value as indicating success or failure), or C<undef>
if I<STRING> is not a string.
B<NOTE>: For security reasons, this function does not work on tainted values.
=head3 _utf8_off
_utf8_off(STRING)
[INTERNAL] Turns the I<STRING>'s internal UTF8 flag B<off>. Do not use
frivolously. Returns the previous state of the UTF8 flag, or C<undef> if
I<STRING> is not a string. Do not treat the return value as indicative of
success or failure, because that isn't what it means: it is only the
previous setting.
B<NOTE>: For security reasons, this function does not work on tainted values.
=head1 UTF-8 vs. utf8 vs. UTF8
....We now view strings not as sequences of bytes, but as sequences
of numbers in the range 0 .. 2**32-1 (or in the case of 64-bit
computers, 0 .. 2**64-1) -- Programming Perl, 3rd ed.
That has historically been Perl's notion of UTF-8, as that is how UTF-8 was
first conceived by Ken Thompson when he invented it. However, thanks to
later revisions to the applicable standards, official UTF-8 is now rather
stricter than that. For example, its range is much narrower (0 .. 0x10_FFFF
to cover only 21 bits instead of 32 or 64 bits) and some sequences
are not allowed, like those used in surrogate pairs, the 31 non-character
code points 0xFDD0 .. 0xFDEF, the last two code points in I<any> plane
(0xI<XX>_FFFE and 0xI<XX>_FFFF), all non-shortest encodings, etc.
The former default in which Perl would always use a loose interpretation of
UTF-8 has now been overruled:
From: Larry Wall <larry@wall.org>
Date: December 04, 2004 11:51:58 JST
To: perl-unicode@perl.org
Subject: Re: Make Encode.pm support the real UTF-8
Message-Id: <20041204025158.GA28754@wall.org>
On Fri, Dec 03, 2004 at 10:12:12PM +0000, Tim Bunce wrote:
: I've no problem with 'utf8' being perl's unrestricted uft8 encoding,
: but "UTF-8" is the name of the standard and should give the
: corresponding behaviour.
For what it's worth, that's how I've always kept them straight in my
head.
Also for what it's worth, Perl 6 will mostly default to strict but
make it easy to switch back to lax.
Larry
Got that? As of Perl 5.8.7, B<"UTF-8"> means UTF-8 in its current
sense, which is conservative and strict and security-conscious, whereas
B<"utf8"> means UTF-8 in its former sense, which was liberal and loose and
lax. C<Encode> version 2.10 or later thus groks this subtle but critically
important distinction between C<"UTF-8"> and C<"utf8">.
encode("utf8", "\x{FFFF_FFFF}", 1); # okay
encode("UTF-8", "\x{FFFF_FFFF}", 1); # croaks
This distinction is also important for decoding. In the following,
C<$s> stores character U+200000, which exceeds UTF-8's allowed range.
C<$s> thus stores an invalid Unicode code point:
$s = decode("utf8", "\xf8\x88\x80\x80\x80");
C<"UTF-8">, by contrast, will either coerce the input to something valid:
$s = decode("UTF-8", "\xf8\x88\x80\x80\x80"); # U+FFFD
.. or croak:
decode("UTF-8", "\xf8\x88\x80\x80\x80", FB_CROAK|LEAVE_SRC);
In the C<Encode> module, C<"UTF-8"> is actually a canonical name for
C<"utf-8-strict">. That hyphen between the C<"UTF"> and the C<"8"> is
critical; without it, C<Encode> goes "liberal" and (perhaps overly-)permissive:
find_encoding("UTF-8")->name # is 'utf-8-strict'
find_encoding("utf-8")->name # ditto. names are case insensitive
find_encoding("utf_8")->name # ditto. "_" are treated as "-"
find_encoding("UTF8")->name # is 'utf8'.
Perl's internal UTF8 flag is called "UTF8", without a hyphen. It indicates
whether a string is internally encoded as "utf8", also without a hyphen.
=head1 SEE ALSO
L<Encode::Encoding>,
L<Encode::Supported>,
L<Encode::PerlIO>,
L<encoding>,
L<perlebcdic>,
L<perlfunc/open>,
L<perlunicode>, L<perluniintro>, L<perlunifaq>, L<perlunitut>
L<utf8>,
the Perl Unicode Mailing List L<http://lists.perl.org/list/perl-unicode.html>
=head1 MAINTAINER
This project was originated by the late Nick Ing-Simmons and later
maintained by Dan Kogai I<< <dankogai@cpan.org> >>. See AUTHORS
for a full list of people involved. For any questions, send mail to
I<< <perl-unicode@perl.org> >> so that we can all share.
While Dan Kogai retains the copyright as a maintainer, credit
should go to all those involved. See AUTHORS for a list of those
who submitted code to the project.
=head1 COPYRIGHT
Copyright 2002-2014 Dan Kogai I<< <dankogai@cpan.org> >>.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut

View File

@@ -0,0 +1,399 @@
package Encode::Alias;
use strict;
use warnings;
our $VERSION = do { my @r = ( q$Revision: 2.25 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
use Exporter 'import';
# Public, encouraged API is exported by default
our @EXPORT =
qw (
define_alias
find_alias
);
our @Alias; # ordered matching list
our %Alias; # cached known aliases
sub find_alias {
my $class = shift;
my $find = shift;
unless ( exists $Alias{$find} ) {
$Alias{$find} = undef; # Recursion guard
for ( my $i = 0 ; $i < @Alias ; $i += 2 ) {
my $alias = $Alias[$i];
my $val = $Alias[ $i + 1 ];
my $new;
if ( ref($alias) eq 'Regexp' && $find =~ $alias ) {
DEBUG and warn "eval $val";
$new = eval $val;
DEBUG and $@ and warn "$val, $@";
}
elsif ( ref($alias) eq 'CODE' ) {
DEBUG and warn "$alias", "->", "($find)";
$new = $alias->($find);
}
elsif ( lc($find) eq lc($alias) ) {
$new = $val;
}
if ( defined($new) ) {
next if $new eq $find; # avoid (direct) recursion on bugs
DEBUG and warn "$alias, $new";
my $enc =
( ref($new) ) ? $new : Encode::find_encoding($new);
if ($enc) {
$Alias{$find} = $enc;
last;
}
}
}
# case insensitive search when canonical is not in all lowercase
# RT ticket #7835
unless ( $Alias{$find} ) {
my $lcfind = lc($find);
for my $name ( keys %Encode::Encoding, keys %Encode::ExtModule )
{
$lcfind eq lc($name) or next;
$Alias{$find} = Encode::find_encoding($name);
DEBUG and warn "$find => $name";
}
}
}
if (DEBUG) {
my $name;
if ( my $e = $Alias{$find} ) {
$name = $e->name;
}
else {
$name = "";
}
warn "find_alias($class, $find)->name = $name";
}
return $Alias{$find};
}
sub define_alias {
while (@_) {
my $alias = shift;
my $name = shift;
unshift( @Alias, $alias => $name ) # newer one has precedence
if defined $alias;
if ( ref($alias) ) {
# clear %Alias cache to allow overrides
my @a = keys %Alias;
for my $k (@a) {
if ( ref($alias) eq 'Regexp' && $k =~ $alias ) {
DEBUG and warn "delete \$Alias\{$k\}";
delete $Alias{$k};
}
elsif ( ref($alias) eq 'CODE' && $alias->($k) ) {
DEBUG and warn "delete \$Alias\{$k\}";
delete $Alias{$k};
}
}
}
elsif (defined $alias) {
DEBUG and warn "delete \$Alias\{$alias\}";
delete $Alias{$alias};
}
elsif (DEBUG) {
require Carp;
Carp::croak("undef \$alias");
}
}
}
# HACK: Encode must be used after define_alias is declarated as Encode calls define_alias
use Encode ();
# Allow latin-1 style names as well
# 0 1 2 3 4 5 6 7 8 9 10
our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
# Allow winlatin1 style names as well
our %Winlatin2cp = (
'latin1' => 1252,
'latin2' => 1250,
'cyrillic' => 1251,
'greek' => 1253,
'turkish' => 1254,
'hebrew' => 1255,
'arabic' => 1256,
'baltic' => 1257,
'vietnamese' => 1258,
);
init_aliases();
sub undef_aliases {
@Alias = ();
%Alias = ();
}
sub init_aliases {
undef_aliases();
# Try all-lower-case version should all else fails
define_alias( qr/^(.*)$/ => '"\L$1"' );
# UTF/UCS stuff
define_alias( qr/^(unicode-1-1-)?UTF-?7$/i => '"UTF-7"' );
define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
define_alias(
qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"',
qr/^UCS-?4-?(BE|LE|)?$/i => 'uc("UTF-32$1")',
qr/^iso-10646-1$/i => '"UCS-2BE"'
);
define_alias(
qr/^UTF-?(16|32)-?BE$/i => '"UTF-$1BE"',
qr/^UTF-?(16|32)-?LE$/i => '"UTF-$1LE"',
qr/^UTF-?(16|32)$/i => '"UTF-$1"',
);
# ASCII
define_alias( qr/^(?:US-?)ascii$/i => '"ascii"' );
define_alias( 'C' => 'ascii' );
define_alias( qr/\b(?:ISO[-_]?)?646(?:[-_]?US)?$/i => '"ascii"' );
# Allow variants of iso-8859-1 etc.
define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
# ISO-8859-8-I => ISO-8859-8
# https://en.wikipedia.org/wiki/ISO-8859-8-I
define_alias( qr/\biso[-_]8859[-_]8[-_]I$/i => '"iso-8859-8"' );
# At least HP-UX has these.
define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' );
# More HP stuff.
define_alias(
qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i =>
'"${1}8"' );
# The Official name of ASCII.
define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
# This is a font issue, not an encoding issue.
# (The currency symbol of the Latin 1 upper half
# has been redefined as the euro symbol.)
define_alias( qr/^(.+)\@euro$/i => '"$1"' );
define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i =>
'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef'
);
define_alias(
qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
hebrew|arabic|baltic|vietnamese)$/ix =>
'"cp" . $Encode::Alias::Winlatin2cp{lc($1)}'
);
# Common names for non-latin preferred MIME names
define_alias(
'ascii' => 'US-ascii',
'cyrillic' => 'iso-8859-5',
'arabic' => 'iso-8859-6',
'greek' => 'iso-8859-7',
'hebrew' => 'iso-8859-8',
'thai' => 'iso-8859-11',
);
# RT #20781
define_alias(qr/\btis-?620\b/i => '"iso-8859-11"');
# At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
# And Microsoft has their own naming (again, surprisingly).
# And windows-* is registered in IANA!
define_alias(
qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"' );
# Sometimes seen with a leading zero.
# define_alias( qr/\bcp037\b/i => '"cp37"');
# Mac Mappings
# predefined in *.ucm; unneeded
# define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
define_alias( qr/^(?:x[_-])?mac[_-](.*)$/i => '"mac$1"' );
# http://rt.cpan.org/Ticket/Display.html?id=36326
define_alias( qr/^macintosh$/i => '"MacRoman"' );
# https://rt.cpan.org/Ticket/Display.html?id=78125
define_alias( qr/^macce$/i => '"MacCentralEurRoman"' );
# Ououououou. gone. They are different!
# define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
# Standardize on the dashed versions.
define_alias( qr/\bkoi8[\s\-_]*([ru])$/i => '"koi8-$1"' );
unless ($Encode::ON_EBCDIC) {
# for Encode::CN
define_alias( qr/\beuc.*cn$/i => '"euc-cn"' );
define_alias( qr/\bcn.*euc$/i => '"euc-cn"' );
# define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
# CP936 doesn't have vendor-addon for GBK, so they're identical.
define_alias( qr/^gbk$/i => '"cp936"' );
# This fixes gb2312 vs. euc-cn confusion, practically
define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' );
# for Encode::JP
define_alias( qr/\bjis$/i => '"7bit-jis"' );
define_alias( qr/\beuc.*jp$/i => '"euc-jp"' );
define_alias( qr/\bjp.*euc$/i => '"euc-jp"' );
define_alias( qr/\bujis$/i => '"euc-jp"' );
define_alias( qr/\bshift.*jis$/i => '"shiftjis"' );
define_alias( qr/\bsjis$/i => '"shiftjis"' );
define_alias( qr/\bwindows-31j$/i => '"cp932"' );
# for Encode::KR
define_alias( qr/\beuc.*kr$/i => '"euc-kr"' );
define_alias( qr/\bkr.*euc$/i => '"euc-kr"' );
# This fixes ksc5601 vs. euc-kr confusion, practically
define_alias( qr/(?:x-)?uhc$/i => '"cp949"' );
define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' );
define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' );
# for Encode::TW
define_alias( qr/\bbig-?5$/i => '"big5-eten"' );
define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' );
define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' );
define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' );
define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
}
# https://github.com/dankogai/p5-encode/issues/37
define_alias(qr/cp65000/i => '"UTF-7"');
define_alias(qr/cp65001/i => '"utf-8-strict"');
# utf8 is blessed :)
define_alias( qr/\bUTF-8$/i => '"utf-8-strict"' );
# At last, Map white space and _ to '-'
define_alias( qr/^([^\s_]+)[\s_]+([^\s_]*)$/i => '"$1-$2"' );
}
1;
__END__
# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
# TODO: HP-UX '15' encodings japanese15 korean15 roi15
# TODO: Cyrillic encoding ISO-IR-111 (useful?)
# TODO: Armenian encoding ARMSCII-8
# TODO: Hebrew encoding ISO-8859-8-1
# TODO: Thai encoding TCVN
# TODO: Vietnamese encodings VPS
# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
# ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
# Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
# Kannada Khmer Korean Laotian Malayalam Mongolian
# Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
=head1 NAME
Encode::Alias - alias definitions to encodings
=head1 SYNOPSIS
use Encode;
use Encode::Alias;
define_alias( "newName" => ENCODING);
define_alias( qr/.../ => ENCODING);
define_alias( sub { return ENCODING if ...; } );
=head1 DESCRIPTION
Allows newName to be used as an alias for ENCODING. ENCODING may be
either the name of an encoding or an encoding object (as described
in L<Encode>).
Currently the first argument to define_alias() can be specified in the
following ways:
=over 4
=item As a simple string.
=item As a qr// compiled regular expression, e.g.:
define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
In this case, if I<ENCODING> is not a reference, it is C<eval>-ed
in order to allow C<$1> etc. to be substituted. The example is one
way to alias names as used in X11 fonts to the MIME names for the
iso-8859-* family. Note the double quotes inside the single quotes.
(or, you don't have to do this yourself because this example is predefined)
If you are using a regex here, you have to use the quotes as shown or
it won't work. Also note that regex handling is tricky even for the
experienced. Use this feature with caution.
=item As a code reference, e.g.:
define_alias( sub {shift =~ /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
The same effect as the example above in a different way. The coderef
takes the alias name as an argument and returns a canonical name on
success or undef if not. Note the second argument is ignored if provided.
Use this with even more caution than the regex version.
=back
=head3 Changes in code reference aliasing
As of Encode 1.87, the older form
define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
no longer works.
Encode up to 1.86 internally used "local $_" to implement this older
form. But consider the code below;
use Encode;
$_ = "eeeee" ;
while (/(e)/g) {
my $utf = decode('aliased-encoding-name', $1);
print "position:",pos,"\n";
}
Prior to Encode 1.86 this fails because of "local $_".
=head2 Alias overloading
You can override predefined aliases by simply applying define_alias().
The new alias is always evaluated first, and when necessary,
define_alias() flushes the internal cache to make the new definition
available.
# redirect SHIFT_JIS to MS/IBM Code Page 932, which is a
# superset of SHIFT_JIS
define_alias( qr/shift.*jis$/i => '"cp932"' );
define_alias( qr/sjis$/i => '"cp932"' );
If you want to zap all predefined aliases, you can use
Encode::Alias->undef_aliases;
to do so. And
Encode::Alias->init_aliases;
gets the factory settings back.
Note that define_alias() will not be able to override the canonical name
of encodings. Encodings are first looked up by canonical name before
potential aliases are tried.
=head1 SEE ALSO
L<Encode>, L<Encode::Supported>
=cut

View File

@@ -0,0 +1,120 @@
package Encode::Byte;
use strict;
use warnings;
use Encode;
our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
1;
__END__
=head1 NAME
Encode::Byte - Single Byte Encodings
=head1 SYNOPSIS
use Encode qw/encode decode/;
$greek = encode("iso-8859-7", $utf8); # loads Encode::Byte implicitly
$utf8 = decode("iso-8859-7", $greek); # ditto
=head1 ABSTRACT
This module implements various single byte encodings. For most cases it uses
\x80-\xff (upper half) to map non-ASCII characters. Encodings
supported are as follows.
Canonical Alias Description
--------------------------------------------------------------------
# ISO 8859 series
(iso-8859-1 is in built-in)
iso-8859-2 latin2 [ISO]
iso-8859-3 latin3 [ISO]
iso-8859-4 latin4 [ISO]
iso-8859-5 [ISO]
iso-8859-6 [ISO]
iso-8859-7 [ISO]
iso-8859-8 [ISO]
iso-8859-9 latin5 [ISO]
iso-8859-10 latin6 [ISO]
iso-8859-11
(iso-8859-12 is nonexistent)
iso-8859-13 latin7 [ISO]
iso-8859-14 latin8 [ISO]
iso-8859-15 latin9 [ISO]
iso-8859-16 latin10 [ISO]
# Cyrillic
koi8-f
koi8-r cp878 [RFC1489]
koi8-u [RFC2319]
# Vietnamese
viscii
# all cp* are also available as ibm-*, ms-*, and windows-*
# also see L<http://msdn.microsoft.com/en-us/library/aa752010%28VS.85%29.aspx>
cp424
cp437
cp737
cp775
cp850
cp852
cp855
cp856
cp857
cp860
cp861
cp862
cp863
cp864
cp865
cp866
cp869
cp874
cp1006
cp1250 WinLatin2
cp1251 WinCyrillic
cp1252 WinLatin1
cp1253 WinGreek
cp1254 WinTurkish
cp1255 WinHebrew
cp1256 WinArabic
cp1257 WinBaltic
cp1258 WinVietnamese
# Macintosh
# Also see L<http://developer.apple.com/technotes/tn/tn1150.html>
MacArabic
MacCentralEurRoman
MacCroatian
MacCyrillic
MacFarsi
MacGreek
MacHebrew
MacIcelandic
MacRoman
MacRomanian
MacRumanian
MacSami
MacThai
MacTurkish
MacUkrainian
# More vendor encodings
AdobeStandardEncoding
nextstep
hp-roman8
=head1 DESCRIPTION
To find how to use this module in detail, see L<Encode>.
=head1 SEE ALSO
L<Encode>
=cut

View File

@@ -0,0 +1,66 @@
#
# $Id: CJKConstants.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $
#
package Encode::CJKConstants;
use strict;
use warnings;
our $RCSID = q$Id: CJKConstants.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $;
our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Carp;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw();
our @EXPORT_OK = qw(%CHARCODE %ESC %RE);
our %EXPORT_TAGS = ( 'all' => [ @EXPORT_OK, @EXPORT ] );
my %_0208 = (
1978 => '\e\$\@',
1983 => '\e\$B',
1990 => '\e&\@\e\$B',
);
our %CHARCODE = (
UNDEF_EUC => "\xa2\xae", # <20><> in EUC
UNDEF_SJIS => "\x81\xac", # <20><> in SJIS
UNDEF_JIS => "\xa2\xf7", # <20><> -- used in unicode
UNDEF_UNICODE => "\x20\x20", # <20><> -- used in unicode
);
our %ESC = (
GB_2312 => "\e\$A",
JIS_0208 => "\e\$B",
JIS_0212 => "\e\$(D",
KSC_5601 => "\e\$(C",
ASC => "\e\(B",
KANA => "\e\(I",
'2022_KR' => "\e\$)C",
);
our %RE = (
ASCII => '[\x00-\x7f]',
BIN => '[\x00-\x06\x7f\xff]',
EUC_0212 => '\x8f[\xa1-\xfe][\xa1-\xfe]',
EUC_C => '[\xa1-\xfe][\xa1-\xfe]',
EUC_KANA => '\x8e[\xa1-\xdf]',
JIS_0208 => "$_0208{1978}|$_0208{1983}|$_0208{1990}",
JIS_0212 => "\e" . '\$\(D',
ISO_ASC => "\e" . '\([BJ]',
JIS_KANA => "\e" . '\(I',
'2022_KR' => "\e" . '\$\)C',
SJIS_C => '[\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]',
SJIS_KANA => '[\xa1-\xdf]',
UTF8 => '[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]'
);
1;
=head1 NAME
Encode::CJKConstants.pm -- Internally used by Encode::??::ISO_2022_*
=cut

View File

@@ -0,0 +1,74 @@
package Encode::CN;
BEGIN {
if ( ord("A") == 193 ) {
die "Encode::CN not supported on EBCDIC\n";
}
}
use strict;
use warnings;
use Encode;
our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
# Relocated from Encode.pm
use Encode::CN::HZ;
# use Encode::CN::2022_CN;
1;
__END__
=head1 NAME
Encode::CN - China-based Chinese Encodings
=head1 SYNOPSIS
use Encode qw/encode decode/;
$euc_cn = encode("euc-cn", $utf8); # loads Encode::CN implicitly
$utf8 = decode("euc-cn", $euc_cn); # ditto
=head1 DESCRIPTION
This module implements China-based Chinese charset encodings.
Encodings supported are as follows.
Canonical Alias Description
--------------------------------------------------------------------
euc-cn /\beuc.*cn$/i EUC (Extended Unix Character)
/\bcn.*euc$/i
/\bGB[-_ ]?2312(?:\D.*$|$)/i (see below)
gb2312-raw The raw (low-bit) GB2312 character map
gb12345-raw Traditional chinese counterpart to
GB2312 (raw)
iso-ir-165 GB2312 + GB6345 + GB8565 + additions
MacChineseSimp GB2312 + Apple Additions
cp936 Code Page 936, also known as GBK
(Extended GuoBiao)
hz 7-bit escaped GB2312 encoding
--------------------------------------------------------------------
To find how to use this module in detail, see L<Encode>.
=head1 NOTES
Due to size concerns, C<GB 18030> (an extension to C<GBK>) is distributed
separately on CPAN, under the name L<Encode::HanExtra>. That module
also contains extra Taiwan-based encodings.
=head1 BUGS
When you see C<charset=gb2312> on mails and web pages, they really
mean C<euc-cn> encodings. To fix that, C<gb2312> is aliased to C<euc-cn>.
Use C<gb2312-raw> when you really mean it.
The ASCII region (0x00-0x7f) is preserved for all encodings, even though
this conflicts with mappings by the Unicode Consortium.
=head1 SEE ALSO
L<Encode>
=cut

View File

@@ -0,0 +1,201 @@
package Encode::CN::HZ;
use strict;
use warnings;
use utf8 ();
use vars qw($VERSION);
$VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
use parent qw(Encode::Encoding);
__PACKAGE__->Define('hz');
# HZ is a combination of ASCII and escaped GB, so we implement it
# with the GB2312(raw) encoding here. Cf. RFCs 1842 & 1843.
# not ported for EBCDIC. Which should be used, "~" or "\x7E"?
sub needs_lines { 1 }
sub decode ($$;$) {
my ( $obj, $str, $chk ) = @_;
return undef unless defined $str;
my $GB = Encode::find_encoding('gb2312-raw');
my $ret = substr($str, 0, 0); # to propagate taintedness
my $in_ascii = 1; # default mode is ASCII.
while ( length $str ) {
if ($in_ascii) { # ASCII mode
if ( $str =~ s/^([\x00-\x7D\x7F]+)// ) { # no '~' => ASCII
$ret .= $1;
# EBCDIC should need ascii2native, but not ported.
}
elsif ( $str =~ s/^\x7E\x7E// ) { # escaped tilde
$ret .= '~';
}
elsif ( $str =~ s/^\x7E\cJ// ) { # '\cJ' == LF in ASCII
1; # no-op
}
elsif ( $str =~ s/^\x7E\x7B// ) { # '~{'
$in_ascii = 0; # to GB
}
else { # encounters an invalid escape, \x80 or greater
last;
}
}
else { # GB mode; the byte ranges are as in RFC 1843.
no warnings 'uninitialized';
if ( $str =~ s/^((?:[\x21-\x77][\x21-\x7E])+)// ) {
my $prefix = $1;
$ret .= $GB->decode( $prefix, $chk );
}
elsif ( $str =~ s/^\x7E\x7D// ) { # '~}'
$in_ascii = 1;
}
else { # invalid
last;
}
}
}
$_[1] = '' if $chk; # needs_lines guarantees no partial character
return $ret;
}
sub cat_decode {
my ( $obj, undef, $src, $pos, $trm, $chk ) = @_;
my ( $rdst, $rsrc, $rpos ) = \@_[ 1 .. 3 ];
my $GB = Encode::find_encoding('gb2312-raw');
my $ret = '';
my $in_ascii = 1; # default mode is ASCII.
my $ini_pos = pos($$rsrc);
substr( $src, 0, $pos ) = '';
my $ini_len = bytes::length($src);
# $trm is the first of the pair '~~', then 2nd tilde is to be removed.
# XXX: Is better C<$src =~ s/^\x7E// or die if ...>?
$src =~ s/^\x7E// if $trm eq "\x7E";
while ( length $src ) {
my $now;
if ($in_ascii) { # ASCII mode
if ( $src =~ s/^([\x00-\x7D\x7F])// ) { # no '~' => ASCII
$now = $1;
}
elsif ( $src =~ s/^\x7E\x7E// ) { # escaped tilde
$now = '~';
}
elsif ( $src =~ s/^\x7E\cJ// ) { # '\cJ' == LF in ASCII
next;
}
elsif ( $src =~ s/^\x7E\x7B// ) { # '~{'
$in_ascii = 0; # to GB
next;
}
else { # encounters an invalid escape, \x80 or greater
last;
}
}
else { # GB mode; the byte ranges are as in RFC 1843.
if ( $src =~ s/^((?:[\x21-\x77][\x21-\x7F])+)// ) {
$now = $GB->decode( $1, $chk );
}
elsif ( $src =~ s/^\x7E\x7D// ) { # '~}'
$in_ascii = 1;
next;
}
else { # invalid
last;
}
}
next if !defined $now;
$ret .= $now;
if ( $now eq $trm ) {
$$rdst .= $ret;
$$rpos = $ini_pos + $pos + $ini_len - bytes::length($src);
pos($$rsrc) = $ini_pos;
return 1;
}
}
$$rdst .= $ret;
$$rpos = $ini_pos + $pos + $ini_len - bytes::length($src);
pos($$rsrc) = $ini_pos;
return ''; # terminator not found
}
sub encode($$;$) {
my ( $obj, $str, $chk ) = @_;
return undef unless defined $str;
my $GB = Encode::find_encoding('gb2312-raw');
my $ret = substr($str, 0, 0); # to propagate taintedness;
my $in_ascii = 1; # default mode is ASCII.
no warnings 'utf8'; # $str may be malformed UTF8 at the end of a chunk.
while ( length $str ) {
if ( $str =~ s/^([[:ascii:]]+)// ) {
my $tmp = $1;
$tmp =~ s/~/~~/g; # escapes tildes
if ( !$in_ascii ) {
$ret .= "\x7E\x7D"; # '~}'
$in_ascii = 1;
}
$ret .= pack 'a*', $tmp; # remove UTF8 flag.
}
elsif ( $str =~ s/(.)// ) {
my $s = $1;
my $tmp = $GB->encode( $s, $chk || 0 );
last if !defined $tmp;
if ( length $tmp == 2 ) { # maybe a valid GB char (XXX)
if ($in_ascii) {
$ret .= "\x7E\x7B"; # '~{'
$in_ascii = 0;
}
$ret .= $tmp;
}
elsif ( length $tmp ) { # maybe FALLBACK in ASCII (XXX)
if ( !$in_ascii ) {
$ret .= "\x7E\x7D"; # '~}'
$in_ascii = 1;
}
$ret .= $tmp;
}
}
else { # if $str is malformed UTF8 *and* if length $str != 0.
last;
}
}
$_[1] = $str if $chk;
# The state at the end of the chunk is discarded, even if in GB mode.
# That results in the combination of GB-OUT and GB-IN, i.e. "~}~{".
# Parhaps it is harmless, but further investigations may be required...
if ( !$in_ascii ) {
$ret .= "\x7E\x7D"; # '~}'
$in_ascii = 1;
}
utf8::encode($ret); # https://rt.cpan.org/Ticket/Display.html?id=35120
return $ret;
}
1;
__END__
=head1 NAME
Encode::CN::HZ -- internally used by Encode::CN
=cut

View File

@@ -0,0 +1,170 @@
#
# Demand-load module list
#
package Encode::Config;
our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use strict;
use warnings;
our %ExtModule = (
# Encode::Byte
#iso-8859-1 is in Encode.pm itself
'iso-8859-2' => 'Encode::Byte',
'iso-8859-3' => 'Encode::Byte',
'iso-8859-4' => 'Encode::Byte',
'iso-8859-5' => 'Encode::Byte',
'iso-8859-6' => 'Encode::Byte',
'iso-8859-7' => 'Encode::Byte',
'iso-8859-8' => 'Encode::Byte',
'iso-8859-9' => 'Encode::Byte',
'iso-8859-10' => 'Encode::Byte',
'iso-8859-11' => 'Encode::Byte',
'iso-8859-13' => 'Encode::Byte',
'iso-8859-14' => 'Encode::Byte',
'iso-8859-15' => 'Encode::Byte',
'iso-8859-16' => 'Encode::Byte',
'koi8-f' => 'Encode::Byte',
'koi8-r' => 'Encode::Byte',
'koi8-u' => 'Encode::Byte',
'viscii' => 'Encode::Byte',
'cp424' => 'Encode::Byte',
'cp437' => 'Encode::Byte',
'cp737' => 'Encode::Byte',
'cp775' => 'Encode::Byte',
'cp850' => 'Encode::Byte',
'cp852' => 'Encode::Byte',
'cp855' => 'Encode::Byte',
'cp856' => 'Encode::Byte',
'cp857' => 'Encode::Byte',
'cp858' => 'Encode::Byte',
'cp860' => 'Encode::Byte',
'cp861' => 'Encode::Byte',
'cp862' => 'Encode::Byte',
'cp863' => 'Encode::Byte',
'cp864' => 'Encode::Byte',
'cp865' => 'Encode::Byte',
'cp866' => 'Encode::Byte',
'cp869' => 'Encode::Byte',
'cp874' => 'Encode::Byte',
'cp1006' => 'Encode::Byte',
'cp1250' => 'Encode::Byte',
'cp1251' => 'Encode::Byte',
'cp1252' => 'Encode::Byte',
'cp1253' => 'Encode::Byte',
'cp1254' => 'Encode::Byte',
'cp1255' => 'Encode::Byte',
'cp1256' => 'Encode::Byte',
'cp1257' => 'Encode::Byte',
'cp1258' => 'Encode::Byte',
'AdobeStandardEncoding' => 'Encode::Byte',
'MacArabic' => 'Encode::Byte',
'MacCentralEurRoman' => 'Encode::Byte',
'MacCroatian' => 'Encode::Byte',
'MacCyrillic' => 'Encode::Byte',
'MacFarsi' => 'Encode::Byte',
'MacGreek' => 'Encode::Byte',
'MacHebrew' => 'Encode::Byte',
'MacIcelandic' => 'Encode::Byte',
'MacRoman' => 'Encode::Byte',
'MacRomanian' => 'Encode::Byte',
'MacRumanian' => 'Encode::Byte',
'MacSami' => 'Encode::Byte',
'MacThai' => 'Encode::Byte',
'MacTurkish' => 'Encode::Byte',
'MacUkrainian' => 'Encode::Byte',
'nextstep' => 'Encode::Byte',
'hp-roman8' => 'Encode::Byte',
#'gsm0338' => 'Encode::Byte',
'gsm0338' => 'Encode::GSM0338',
# Encode::EBCDIC
'cp37' => 'Encode::EBCDIC',
'cp500' => 'Encode::EBCDIC',
'cp875' => 'Encode::EBCDIC',
'cp1026' => 'Encode::EBCDIC',
'cp1047' => 'Encode::EBCDIC',
'posix-bc' => 'Encode::EBCDIC',
# Encode::Symbol
'dingbats' => 'Encode::Symbol',
'symbol' => 'Encode::Symbol',
'AdobeSymbol' => 'Encode::Symbol',
'AdobeZdingbat' => 'Encode::Symbol',
'MacDingbats' => 'Encode::Symbol',
'MacSymbol' => 'Encode::Symbol',
# Encode::Unicode
'UCS-2BE' => 'Encode::Unicode',
'UCS-2LE' => 'Encode::Unicode',
'UTF-16' => 'Encode::Unicode',
'UTF-16BE' => 'Encode::Unicode',
'UTF-16LE' => 'Encode::Unicode',
'UTF-32' => 'Encode::Unicode',
'UTF-32BE' => 'Encode::Unicode',
'UTF-32LE' => 'Encode::Unicode',
'UTF-7' => 'Encode::Unicode::UTF7',
);
unless ( ord("A") == 193 ) {
%ExtModule = (
%ExtModule,
'euc-cn' => 'Encode::CN',
'gb12345-raw' => 'Encode::CN',
'gb2312-raw' => 'Encode::CN',
'hz' => 'Encode::CN',
'iso-ir-165' => 'Encode::CN',
'cp936' => 'Encode::CN',
'MacChineseSimp' => 'Encode::CN',
'7bit-jis' => 'Encode::JP',
'euc-jp' => 'Encode::JP',
'iso-2022-jp' => 'Encode::JP',
'iso-2022-jp-1' => 'Encode::JP',
'jis0201-raw' => 'Encode::JP',
'jis0208-raw' => 'Encode::JP',
'jis0212-raw' => 'Encode::JP',
'cp932' => 'Encode::JP',
'MacJapanese' => 'Encode::JP',
'shiftjis' => 'Encode::JP',
'euc-kr' => 'Encode::KR',
'iso-2022-kr' => 'Encode::KR',
'johab' => 'Encode::KR',
'ksc5601-raw' => 'Encode::KR',
'cp949' => 'Encode::KR',
'MacKorean' => 'Encode::KR',
'big5-eten' => 'Encode::TW',
'big5-hkscs' => 'Encode::TW',
'cp950' => 'Encode::TW',
'MacChineseTrad' => 'Encode::TW',
#'big5plus' => 'Encode::HanExtra',
#'euc-tw' => 'Encode::HanExtra',
#'gb18030' => 'Encode::HanExtra',
'MIME-Header' => 'Encode::MIME::Header',
'MIME-B' => 'Encode::MIME::Header',
'MIME-Q' => 'Encode::MIME::Header',
'MIME-Header-ISO_2022_JP' => 'Encode::MIME::Header::ISO_2022_JP',
);
}
#
# Why not export ? to keep ConfigLocal Happy!
#
while ( my ( $enc, $mod ) = each %ExtModule ) {
$Encode::ExtModule{$enc} = $mod;
}
1;
__END__
=head1 NAME
Encode::Config -- internally used by Encode
=cut

View File

@@ -0,0 +1,45 @@
package Encode::EBCDIC;
use strict;
use warnings;
use Encode;
our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
1;
__END__
=head1 NAME
Encode::EBCDIC - EBCDIC Encodings
=head1 SYNOPSIS
use Encode qw/encode decode/;
$posix_bc = encode("posix-bc", $utf8); # loads Encode::EBCDIC implicitly
$utf8 = decode("", $posix_bc); # ditto
=head1 ABSTRACT
This module implements various EBCDIC-Based encodings. Encodings
supported are as follows.
Canonical Alias Description
--------------------------------------------------------------------
cp37
cp500
cp875
cp1026
cp1047
posix-bc
=head1 DESCRIPTION
To find how to use this module in detail, see L<Encode>.
=head1 SEE ALSO
L<Encode>, L<perlebcdic>
=cut

View File

@@ -0,0 +1,253 @@
#
# $Id: Encoder.pm,v 2.3 2013/09/14 07:51:59 dankogai Exp $
#
package Encode::Encoder;
use strict;
use warnings;
our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw ( encoder );
our $AUTOLOAD;
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
use Encode qw(encode decode find_encoding from_to);
use Carp;
sub new {
my ( $class, $data, $encname ) = @_;
unless ($encname) {
$encname = Encode::is_utf8($data) ? 'utf8' : '';
}
else {
my $obj = find_encoding($encname)
or croak __PACKAGE__, ": unknown encoding: $encname";
$encname = $obj->name;
}
my $self = {
data => $data,
encoding => $encname,
};
bless $self => $class;
}
sub encoder { __PACKAGE__->new(@_) }
sub data {
my ( $self, $data ) = @_;
if ( defined $data ) {
$self->{data} = $data;
return $data;
}
else {
return $self->{data};
}
}
sub encoding {
my ( $self, $encname ) = @_;
if ($encname) {
my $obj = find_encoding($encname)
or confess __PACKAGE__, ": unknown encoding: $encname";
$self->{encoding} = $obj->name;
return $self;
}
else {
return $self->{encoding};
}
}
sub bytes {
my ( $self, $encname ) = @_;
$encname ||= $self->{encoding};
my $obj = find_encoding($encname)
or confess __PACKAGE__, ": unknown encoding: $encname";
$self->{data} = $obj->decode( $self->{data}, 1 );
$self->{encoding} = '';
return $self;
}
sub DESTROY { # defined so it won't autoload.
DEBUG and warn shift;
}
sub AUTOLOAD {
my $self = shift;
my $type = ref($self)
or confess "$self is not an object";
my $myname = $AUTOLOAD;
$myname =~ s/.*://; # strip fully-qualified portion
my $obj = find_encoding($myname)
or confess __PACKAGE__, ": unknown encoding: $myname";
DEBUG and warn $self->{encoding}, " => ", $obj->name;
if ( $self->{encoding} ) {
from_to( $self->{data}, $self->{encoding}, $obj->name, 1 );
}
else {
$self->{data} = $obj->encode( $self->{data}, 1 );
}
$self->{encoding} = $obj->name;
return $self;
}
use overload
q("") => sub { $_[0]->{data} },
q(0+) => sub { use bytes(); bytes::length( $_[0]->{data} ) },
fallback => 1,
;
1;
__END__
=head1 NAME
Encode::Encoder -- Object Oriented Encoder
=head1 SYNOPSIS
use Encode::Encoder;
# Encode::encode("ISO-8859-1", $data);
Encode::Encoder->new($data)->iso_8859_1; # OOP way
# shortcut
use Encode::Encoder qw(encoder);
encoder($data)->iso_8859_1;
# you can stack them!
encoder($data)->iso_8859_1->base64; # provided base64() is defined
# you can use it as a decoder as well
encoder($base64)->bytes('base64')->latin1;
# stringified
print encoder($data)->utf8->latin1; # prints the string in latin1
# numified
encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data)
=head1 ABSTRACT
B<Encode::Encoder> allows you to use Encode in an object-oriented
style. This is not only more intuitive than a functional approach,
but also handier when you want to stack encodings. Suppose you want
your UTF-8 string converted to Latin1 then Base64: you can simply say
my $base64 = encoder($utf8)->latin1->base64;
instead of
my $latin1 = encode("latin1", $utf8);
my $base64 = encode_base64($utf8);
or the lazier and more convoluted
my $base64 = encode_base64(encode("latin1", $utf8));
=head1 Description
Here is how to use this module.
=over 4
=item *
There are at least two instance variables stored in a hash reference,
{data} and {encoding}.
=item *
When there is no method, it takes the method name as the name of the
encoding and encodes the instance I<data> with I<encoding>. If successful,
the instance I<encoding> is set accordingly.
=item *
You can retrieve the result via -E<gt>data but usually you don't have to
because the stringify operator ("") is overridden to do exactly that.
=back
=head2 Predefined Methods
This module predefines the methods below:
=over 4
=item $e = Encode::Encoder-E<gt>new([$data, $encoding]);
returns an encoder object. Its data is initialized with $data if
present, and its encoding is set to $encoding if present.
When $encoding is omitted, it defaults to utf8 if $data is already in
utf8 or "" (empty string) otherwise.
=item encoder()
is an alias of Encode::Encoder-E<gt>new(). This one is exported on demand.
=item $e-E<gt>data([$data])
When $data is present, sets the instance data to $data and returns the
object itself. Otherwise, the current instance data is returned.
=item $e-E<gt>encoding([$encoding])
When $encoding is present, sets the instance encoding to $encoding and
returns the object itself. Otherwise, the current instance encoding is
returned.
=item $e-E<gt>bytes([$encoding])
decodes instance data from $encoding, or the instance encoding if
omitted. If the conversion is successful, the instance encoding
will be set to "".
The name I<bytes> was deliberately picked to avoid namespace tainting
-- this module may be used as a base class so method names that appear
in Encode::Encoding are avoided.
=back
=head2 Example: base64 transcoder
This module is designed to work with L<Encode::Encoding>.
To make the Base64 transcoder example above really work, you could
write a module like this:
package Encode::Base64;
use parent 'Encode::Encoding';
__PACKAGE__->Define('base64');
use MIME::Base64;
sub encode{
my ($obj, $data) = @_;
return encode_base64($data);
}
sub decode{
my ($obj, $data) = @_;
return decode_base64($data);
}
1;
__END__
And your caller module would be something like this:
use Encode::Encoder;
use Encode::Base64;
# now you can really do the following
encoder($data)->iso_8859_1->base64;
encoder($base64)->bytes('base64')->latin1;
=head2 Operator Overloading
This module overloads two operators, stringify ("") and numify (0+).
Stringify dumps the data inside the object.
Numify returns the number of bytes in the instance data.
They come in handy when you want to print or find the size of data.
=head1 SEE ALSO
L<Encode>,
L<Encode::Encoding>
=cut

View File

@@ -0,0 +1,356 @@
package Encode::Encoding;
# Base class for classes which implement encodings
use strict;
use warnings;
our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
our @CARP_NOT = qw(Encode Encode::Encoder);
use Carp ();
use Encode ();
use Encode::MIME::Name;
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
sub Define {
my $obj = shift;
my $canonical = shift;
$obj = bless { Name => $canonical }, $obj unless ref $obj;
# warn "$canonical => $obj\n";
Encode::define_encoding( $obj, $canonical, @_ );
}
sub name { return shift->{'Name'} }
sub mime_name {
return Encode::MIME::Name::get_mime_name(shift->name);
}
sub renew {
my $self = shift;
my $clone = bless {%$self} => ref($self);
$clone->{renewed}++; # so the caller can see it
DEBUG and warn $clone->{renewed};
return $clone;
}
sub renewed { return $_[0]->{renewed} || 0 }
*new_sequence = \&renew;
sub needs_lines { 0 }
sub perlio_ok {
return eval { require PerlIO::encoding } ? 1 : 0;
}
# (Temporary|legacy) methods
sub toUnicode { shift->decode(@_) }
sub fromUnicode { shift->encode(@_) }
#
# Needs to be overloaded or just croak
#
sub encode {
my $obj = shift;
my $class = ref($obj) ? ref($obj) : $obj;
Carp::croak( $class . "->encode() not defined!" );
}
sub decode {
my $obj = shift;
my $class = ref($obj) ? ref($obj) : $obj;
Carp::croak( $class . "->encode() not defined!" );
}
sub DESTROY { }
1;
__END__
=head1 NAME
Encode::Encoding - Encode Implementation Base Class
=head1 SYNOPSIS
package Encode::MyEncoding;
use parent qw(Encode::Encoding);
__PACKAGE__->Define(qw(myCanonical myAlias));
=head1 DESCRIPTION
As mentioned in L<Encode>, encodings are (in the current
implementation at least) defined as objects. The mapping of encoding
name to object is via the C<%Encode::Encoding> hash. Though you can
directly manipulate this hash, it is strongly encouraged to use this
base class module and add encode() and decode() methods.
=head2 Methods you should implement
You are strongly encouraged to implement methods below, at least
either encode() or decode().
=over 4
=item -E<gt>encode($string [,$check])
MUST return the octet sequence representing I<$string>.
=over 2
=item *
If I<$check> is true, it SHOULD modify I<$string> in place to remove
the converted part (i.e. the whole string unless there is an error).
If perlio_ok() is true, SHOULD becomes MUST.
=item *
If an error occurs, it SHOULD return the octet sequence for the
fragment of string that has been converted and modify $string in-place
to remove the converted part leaving it starting with the problem
fragment. If perlio_ok() is true, SHOULD becomes MUST.
=item *
If I<$check> is false then C<encode> MUST make a "best effort" to
convert the string - for example, by using a replacement character.
=back
=item -E<gt>decode($octets [,$check])
MUST return the string that I<$octets> represents.
=over 2
=item *
If I<$check> is true, it SHOULD modify I<$octets> in place to remove
the converted part (i.e. the whole sequence unless there is an
error). If perlio_ok() is true, SHOULD becomes MUST.
=item *
If an error occurs, it SHOULD return the fragment of string that has
been converted and modify $octets in-place to remove the converted
part leaving it starting with the problem fragment. If perlio_ok() is
true, SHOULD becomes MUST.
=item *
If I<$check> is false then C<decode> should make a "best effort" to
convert the string - for example by using Unicode's "\x{FFFD}" as a
replacement character.
=back
=back
If you want your encoding to work with L<encoding> pragma, you should
also implement the method below.
=over 4
=item -E<gt>cat_decode($destination, $octets, $offset, $terminator [,$check])
MUST decode I<$octets> with I<$offset> and concatenate it to I<$destination>.
Decoding will terminate when $terminator (a string) appears in output.
I<$offset> will be modified to the last $octets position at end of decode.
Returns true if $terminator appears output, else returns false.
=back
=head2 Other methods defined in Encode::Encodings
You do not have to override methods shown below unless you have to.
=over 4
=item -E<gt>name
Predefined As:
sub name { return shift->{'Name'} }
MUST return the string representing the canonical name of the encoding.
=item -E<gt>mime_name
Predefined As:
sub mime_name{
return Encode::MIME::Name::get_mime_name(shift->name);
}
MUST return the string representing the IANA charset name of the encoding.
=item -E<gt>renew
Predefined As:
sub renew {
my $self = shift;
my $clone = bless { %$self } => ref($self);
$clone->{renewed}++;
return $clone;
}
This method reconstructs the encoding object if necessary. If you need
to store the state during encoding, this is where you clone your object.
PerlIO ALWAYS calls this method to make sure it has its own private
encoding object.
=item -E<gt>renewed
Predefined As:
sub renewed { $_[0]->{renewed} || 0 }
Tells whether the object is renewed (and how many times). Some
modules emit C<Use of uninitialized value in null operation> warning
unless the value is numeric so return 0 for false.
=item -E<gt>perlio_ok()
Predefined As:
sub perlio_ok {
return eval { require PerlIO::encoding } ? 1 : 0;
}
If your encoding does not support PerlIO for some reasons, just;
sub perlio_ok { 0 }
=item -E<gt>needs_lines()
Predefined As:
sub needs_lines { 0 };
If your encoding can work with PerlIO but needs line buffering, you
MUST define this method so it returns true. 7bit ISO-2022 encodings
are one example that needs this. When this method is missing, false
is assumed.
=back
=head2 Example: Encode::ROT13
package Encode::ROT13;
use strict;
use parent qw(Encode::Encoding);
__PACKAGE__->Define('rot13');
sub encode($$;$){
my ($obj, $str, $chk) = @_;
$str =~ tr/A-Za-z/N-ZA-Mn-za-m/;
$_[1] = '' if $chk; # this is what in-place edit means
return $str;
}
# Jr pna or ynml yvxr guvf;
*decode = \&encode;
1;
=head1 Why the heck Encode API is different?
It should be noted that the I<$check> behaviour is different from the
outer public API. The logic is that the "unchecked" case is useful
when the encoding is part of a stream which may be reporting errors
(e.g. STDERR). In such cases, it is desirable to get everything
through somehow without causing additional errors which obscure the
original one. Also, the encoding is best placed to know what the
correct replacement character is, so if that is the desired behaviour
then letting low level code do it is the most efficient.
By contrast, if I<$check> is true, the scheme above allows the
encoding to do as much as it can and tell the layer above how much
that was. What is lacking at present is a mechanism to report what
went wrong. The most likely interface will be an additional method
call to the object, or perhaps (to avoid forcing per-stream objects
on otherwise stateless encodings) an additional parameter.
It is also highly desirable that encoding classes inherit from
C<Encode::Encoding> as a base class. This allows that class to define
additional behaviour for all encoding objects.
package Encode::MyEncoding;
use parent qw(Encode::Encoding);
__PACKAGE__->Define(qw(myCanonical myAlias));
to create an object with C<< bless {Name => ...}, $class >>, and call
define_encoding. They inherit their C<name> method from
C<Encode::Encoding>.
=head2 Compiled Encodings
For the sake of speed and efficiency, most of the encodings are now
supported via a I<compiled form>: XS modules generated from UCM
files. Encode provides the enc2xs tool to achieve that. Please see
L<enc2xs> for more details.
=head1 SEE ALSO
L<perlmod>, L<enc2xs>
=begin future
=over 4
=item Scheme 1
The fixup routine gets passed the remaining fragment of string being
processed. It modifies it in place to remove bytes/characters it can
understand and returns a string used to represent them. For example:
sub fixup {
my $ch = substr($_[0],0,1,'');
return sprintf("\x{%02X}",ord($ch);
}
This scheme is close to how the underlying C code for Encode works,
but gives the fixup routine very little context.
=item Scheme 2
The fixup routine gets passed the original string, an index into
it of the problem area, and the output string so far. It appends
what it wants to the output string and returns a new index into the
original string. For example:
sub fixup {
# my ($s,$i,$d) = @_;
my $ch = substr($_[0],$_[1],1);
$_[2] .= sprintf("\x{%02X}",ord($ch);
return $_[1]+1;
}
This scheme gives maximal control to the fixup routine but is more
complicated to code, and may require that the internals of Encode be tweaked to
keep the original string intact.
=item Other Schemes
Hybrids of the above.
Multiple return values rather than in-place modifications.
Index into the string could be C<pos($str)> allowing C<s/\G...//>.
=back
=end future
=cut

View File

@@ -0,0 +1,293 @@
#
# $Id: GSM0338.pm,v 2.10 2021/05/24 10:56:53 dankogai Exp $
#
package Encode::GSM0338;
use strict;
use warnings;
use Carp;
use vars qw($VERSION);
$VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
use parent qw(Encode::Encoding);
__PACKAGE__->Define('gsm0338');
use utf8;
# Mapping table according to 3GPP TS 23.038 version 16.0.0 Release 16 and ETSI TS 123 038 V16.0.0 (2020-07)
# https://www.etsi.org/deliver/etsi_ts/123000_123099/123038/16.00.00_60/ts_123038v160000p.pdf (page 20 and 22)
our %UNI2GSM = (
"\x{000A}" => "\x0A", # LINE FEED
"\x{000C}" => "\x1B\x0A", # FORM FEED
"\x{000D}" => "\x0D", # CARRIAGE RETURN
"\x{0020}" => "\x20", # SPACE
"\x{0021}" => "\x21", # EXCLAMATION MARK
"\x{0022}" => "\x22", # QUOTATION MARK
"\x{0023}" => "\x23", # NUMBER SIGN
"\x{0024}" => "\x02", # DOLLAR SIGN
"\x{0025}" => "\x25", # PERCENT SIGN
"\x{0026}" => "\x26", # AMPERSAND
"\x{0027}" => "\x27", # APOSTROPHE
"\x{0028}" => "\x28", # LEFT PARENTHESIS
"\x{0029}" => "\x29", # RIGHT PARENTHESIS
"\x{002A}" => "\x2A", # ASTERISK
"\x{002B}" => "\x2B", # PLUS SIGN
"\x{002C}" => "\x2C", # COMMA
"\x{002D}" => "\x2D", # HYPHEN-MINUS
"\x{002E}" => "\x2E", # FULL STOP
"\x{002F}" => "\x2F", # SOLIDUS
"\x{0030}" => "\x30", # DIGIT ZERO
"\x{0031}" => "\x31", # DIGIT ONE
"\x{0032}" => "\x32", # DIGIT TWO
"\x{0033}" => "\x33", # DIGIT THREE
"\x{0034}" => "\x34", # DIGIT FOUR
"\x{0035}" => "\x35", # DIGIT FIVE
"\x{0036}" => "\x36", # DIGIT SIX
"\x{0037}" => "\x37", # DIGIT SEVEN
"\x{0038}" => "\x38", # DIGIT EIGHT
"\x{0039}" => "\x39", # DIGIT NINE
"\x{003A}" => "\x3A", # COLON
"\x{003B}" => "\x3B", # SEMICOLON
"\x{003C}" => "\x3C", # LESS-THAN SIGN
"\x{003D}" => "\x3D", # EQUALS SIGN
"\x{003E}" => "\x3E", # GREATER-THAN SIGN
"\x{003F}" => "\x3F", # QUESTION MARK
"\x{0040}" => "\x00", # COMMERCIAL AT
"\x{0041}" => "\x41", # LATIN CAPITAL LETTER A
"\x{0042}" => "\x42", # LATIN CAPITAL LETTER B
"\x{0043}" => "\x43", # LATIN CAPITAL LETTER C
"\x{0044}" => "\x44", # LATIN CAPITAL LETTER D
"\x{0045}" => "\x45", # LATIN CAPITAL LETTER E
"\x{0046}" => "\x46", # LATIN CAPITAL LETTER F
"\x{0047}" => "\x47", # LATIN CAPITAL LETTER G
"\x{0048}" => "\x48", # LATIN CAPITAL LETTER H
"\x{0049}" => "\x49", # LATIN CAPITAL LETTER I
"\x{004A}" => "\x4A", # LATIN CAPITAL LETTER J
"\x{004B}" => "\x4B", # LATIN CAPITAL LETTER K
"\x{004C}" => "\x4C", # LATIN CAPITAL LETTER L
"\x{004D}" => "\x4D", # LATIN CAPITAL LETTER M
"\x{004E}" => "\x4E", # LATIN CAPITAL LETTER N
"\x{004F}" => "\x4F", # LATIN CAPITAL LETTER O
"\x{0050}" => "\x50", # LATIN CAPITAL LETTER P
"\x{0051}" => "\x51", # LATIN CAPITAL LETTER Q
"\x{0052}" => "\x52", # LATIN CAPITAL LETTER R
"\x{0053}" => "\x53", # LATIN CAPITAL LETTER S
"\x{0054}" => "\x54", # LATIN CAPITAL LETTER T
"\x{0055}" => "\x55", # LATIN CAPITAL LETTER U
"\x{0056}" => "\x56", # LATIN CAPITAL LETTER V
"\x{0057}" => "\x57", # LATIN CAPITAL LETTER W
"\x{0058}" => "\x58", # LATIN CAPITAL LETTER X
"\x{0059}" => "\x59", # LATIN CAPITAL LETTER Y
"\x{005A}" => "\x5A", # LATIN CAPITAL LETTER Z
"\x{005B}" => "\x1B\x3C", # LEFT SQUARE BRACKET
"\x{005C}" => "\x1B\x2F", # REVERSE SOLIDUS
"\x{005D}" => "\x1B\x3E", # RIGHT SQUARE BRACKET
"\x{005E}" => "\x1B\x14", # CIRCUMFLEX ACCENT
"\x{005F}" => "\x11", # LOW LINE
"\x{0061}" => "\x61", # LATIN SMALL LETTER A
"\x{0062}" => "\x62", # LATIN SMALL LETTER B
"\x{0063}" => "\x63", # LATIN SMALL LETTER C
"\x{0064}" => "\x64", # LATIN SMALL LETTER D
"\x{0065}" => "\x65", # LATIN SMALL LETTER E
"\x{0066}" => "\x66", # LATIN SMALL LETTER F
"\x{0067}" => "\x67", # LATIN SMALL LETTER G
"\x{0068}" => "\x68", # LATIN SMALL LETTER H
"\x{0069}" => "\x69", # LATIN SMALL LETTER I
"\x{006A}" => "\x6A", # LATIN SMALL LETTER J
"\x{006B}" => "\x6B", # LATIN SMALL LETTER K
"\x{006C}" => "\x6C", # LATIN SMALL LETTER L
"\x{006D}" => "\x6D", # LATIN SMALL LETTER M
"\x{006E}" => "\x6E", # LATIN SMALL LETTER N
"\x{006F}" => "\x6F", # LATIN SMALL LETTER O
"\x{0070}" => "\x70", # LATIN SMALL LETTER P
"\x{0071}" => "\x71", # LATIN SMALL LETTER Q
"\x{0072}" => "\x72", # LATIN SMALL LETTER R
"\x{0073}" => "\x73", # LATIN SMALL LETTER S
"\x{0074}" => "\x74", # LATIN SMALL LETTER T
"\x{0075}" => "\x75", # LATIN SMALL LETTER U
"\x{0076}" => "\x76", # LATIN SMALL LETTER V
"\x{0077}" => "\x77", # LATIN SMALL LETTER W
"\x{0078}" => "\x78", # LATIN SMALL LETTER X
"\x{0079}" => "\x79", # LATIN SMALL LETTER Y
"\x{007A}" => "\x7A", # LATIN SMALL LETTER Z
"\x{007B}" => "\x1B\x28", # LEFT CURLY BRACKET
"\x{007C}" => "\x1B\x40", # VERTICAL LINE
"\x{007D}" => "\x1B\x29", # RIGHT CURLY BRACKET
"\x{007E}" => "\x1B\x3D", # TILDE
"\x{00A1}" => "\x40", # INVERTED EXCLAMATION MARK
"\x{00A3}" => "\x01", # POUND SIGN
"\x{00A4}" => "\x24", # CURRENCY SIGN
"\x{00A5}" => "\x03", # YEN SIGN
"\x{00A7}" => "\x5F", # SECTION SIGN
"\x{00BF}" => "\x60", # INVERTED QUESTION MARK
"\x{00C4}" => "\x5B", # LATIN CAPITAL LETTER A WITH DIAERESIS
"\x{00C5}" => "\x0E", # LATIN CAPITAL LETTER A WITH RING ABOVE
"\x{00C6}" => "\x1C", # LATIN CAPITAL LETTER AE
"\x{00C7}" => "\x09", # LATIN CAPITAL LETTER C WITH CEDILLA
"\x{00C9}" => "\x1F", # LATIN CAPITAL LETTER E WITH ACUTE
"\x{00D1}" => "\x5D", # LATIN CAPITAL LETTER N WITH TILDE
"\x{00D6}" => "\x5C", # LATIN CAPITAL LETTER O WITH DIAERESIS
"\x{00D8}" => "\x0B", # LATIN CAPITAL LETTER O WITH STROKE
"\x{00DC}" => "\x5E", # LATIN CAPITAL LETTER U WITH DIAERESIS
"\x{00DF}" => "\x1E", # LATIN SMALL LETTER SHARP S
"\x{00E0}" => "\x7F", # LATIN SMALL LETTER A WITH GRAVE
"\x{00E4}" => "\x7B", # LATIN SMALL LETTER A WITH DIAERESIS
"\x{00E5}" => "\x0F", # LATIN SMALL LETTER A WITH RING ABOVE
"\x{00E6}" => "\x1D", # LATIN SMALL LETTER AE
"\x{00E8}" => "\x04", # LATIN SMALL LETTER E WITH GRAVE
"\x{00E9}" => "\x05", # LATIN SMALL LETTER E WITH ACUTE
"\x{00EC}" => "\x07", # LATIN SMALL LETTER I WITH GRAVE
"\x{00F1}" => "\x7D", # LATIN SMALL LETTER N WITH TILDE
"\x{00F2}" => "\x08", # LATIN SMALL LETTER O WITH GRAVE
"\x{00F6}" => "\x7C", # LATIN SMALL LETTER O WITH DIAERESIS
"\x{00F8}" => "\x0C", # LATIN SMALL LETTER O WITH STROKE
"\x{00F9}" => "\x06", # LATIN SMALL LETTER U WITH GRAVE
"\x{00FC}" => "\x7E", # LATIN SMALL LETTER U WITH DIAERESIS
"\x{0393}" => "\x13", # GREEK CAPITAL LETTER GAMMA
"\x{0394}" => "\x10", # GREEK CAPITAL LETTER DELTA
"\x{0398}" => "\x19", # GREEK CAPITAL LETTER THETA
"\x{039B}" => "\x14", # GREEK CAPITAL LETTER LAMDA
"\x{039E}" => "\x1A", # GREEK CAPITAL LETTER XI
"\x{03A0}" => "\x16", # GREEK CAPITAL LETTER PI
"\x{03A3}" => "\x18", # GREEK CAPITAL LETTER SIGMA
"\x{03A6}" => "\x12", # GREEK CAPITAL LETTER PHI
"\x{03A8}" => "\x17", # GREEK CAPITAL LETTER PSI
"\x{03A9}" => "\x15", # GREEK CAPITAL LETTER OMEGA
"\x{20AC}" => "\x1B\x65", # EURO SIGN
);
our %GSM2UNI = reverse %UNI2GSM;
our $ESC = "\x1b";
sub decode ($$;$) {
my ( $obj, $bytes, $chk ) = @_;
return undef unless defined $bytes;
my $str = substr( $bytes, 0, 0 ); # to propagate taintedness;
while ( length $bytes ) {
my $seq = '';
my $c;
do {
$c = substr( $bytes, 0, 1, '' );
$seq .= $c;
} while ( length $bytes and $c eq $ESC );
my $u =
exists $GSM2UNI{$seq} ? $GSM2UNI{$seq}
: ( $chk && ref $chk eq 'CODE' ) ? $chk->( unpack 'C*', $seq )
: "\x{FFFD}";
if ( not exists $GSM2UNI{$seq} and $chk and not ref $chk ) {
if ( substr( $seq, 0, 1 ) eq $ESC
and ( $chk & Encode::STOP_AT_PARTIAL ) )
{
$bytes .= $seq;
last;
}
croak join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq )
. ' does not map to Unicode'
if $chk & Encode::DIE_ON_ERR;
carp join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq )
. ' does not map to Unicode'
if $chk & Encode::WARN_ON_ERR;
if ( $chk & Encode::RETURN_ON_ERR ) {
$bytes .= $seq;
last;
}
}
$str .= $u;
}
$_[1] = $bytes if not ref $chk and $chk and !( $chk & Encode::LEAVE_SRC );
return $str;
}
sub encode($$;$) {
my ( $obj, $str, $chk ) = @_;
return undef unless defined $str;
my $bytes = substr( $str, 0, 0 ); # to propagate taintedness
while ( length $str ) {
my $u = substr( $str, 0, 1, '' );
my $c;
my $seq =
exists $UNI2GSM{$u} ? $UNI2GSM{$u}
: ( $chk && ref $chk eq 'CODE' ) ? $chk->( ord($u) )
: $UNI2GSM{'?'};
if ( not exists $UNI2GSM{$u} and $chk and not ref $chk ) {
croak sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name )
if $chk & Encode::DIE_ON_ERR;
carp sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name )
if $chk & Encode::WARN_ON_ERR;
if ( $chk & Encode::RETURN_ON_ERR ) {
$str .= $u;
last;
}
}
$bytes .= $seq;
}
$_[1] = $str if not ref $chk and $chk and !( $chk & Encode::LEAVE_SRC );
return $bytes;
}
1;
__END__
=head1 NAME
Encode::GSM0338 -- ETSI GSM 03.38 Encoding
=head1 SYNOPSIS
use Encode qw/encode decode/;
$gsm0338 = encode("gsm0338", $unicode); # loads Encode::GSM0338 implicitly
$unicode = decode("gsm0338", $gsm0338); # ditto
=head1 DESCRIPTION
GSM0338 is for GSM handsets. Though it shares alphanumerals with ASCII,
control character ranges and other parts are mapped very differently,
mainly to store Greek characters. There are also escape sequences
(starting with 0x1B) to cover e.g. the Euro sign.
This was once handled by L<Encode::Bytes> but because of all those
unusual specifications, Encode 2.20 has relocated the support to
this module.
This module implements only I<GSM 7 bit Default Alphabet> and
I<GSM 7 bit default alphabet extension table> according to standard
3GPP TS 23.038 version 16. Therefore I<National Language Single Shift>
and I<National Language Locking Shift> are not implemented nor supported.
=head2 Septets
This modules operates with octets (like any other Encode module) and not
with packed septets (unlike other GSM standards). Therefore for processing
binary SMS or parts of GSM TPDU payload (3GPP TS 23.040) it is needed to do
conversion between octets and packed septets. For this purpose perl's C<pack>
and C<unpack> functions may be useful:
$bytes = substr(pack('(b*)*', unpack '(A7)*', unpack 'b*', $septets), 0, $num_of_septets);
$unicode = decode('GSM0338', $bytes);
$bytes = encode('GSM0338', $unicode);
$septets = pack 'b*', join '', map { substr $_, 0, 7 } unpack '(A8)*', unpack 'b*', $bytes;
$num_of_septets = length $bytes;
Please note that for correct decoding of packed septets it is required to
know number of septets packed in binary buffer as binary buffer is always
padded with zero bits and 7 zero bits represents character C<@>. Number
of septets is also stored in TPDU payload when dealing with 3GPP TS 23.040.
=head1 BUGS
Encode::GSM0338 2.7 and older versions (part of Encode 3.06) incorrectly
handled zero bytes (character C<@>). This was fixed in Encode::GSM0338
version 2.8 (part of Encode 3.07).
=head1 SEE ALSO
L<3GPP TS 23.038|https://www.3gpp.org/dynareport/23038.htm>
L<ETSI TS 123 038 V16.0.0 (2020-07)|https://www.etsi.org/deliver/etsi_ts/123000_123099/123038/16.00.00_60/ts_123038v160000p.pdf>
L<Encode>
=cut

View File

@@ -0,0 +1,356 @@
package Encode::Guess;
use strict;
use warnings;
use Encode qw(:fallbacks find_encoding);
our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
my $Canon = 'Guess';
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
our %DEF_SUSPECTS = map { $_ => find_encoding($_) } qw(ascii utf8);
my $obj = bless {
Name => $Canon,
Suspects => {%DEF_SUSPECTS},
} => __PACKAGE__;
Encode::define_encoding($obj, $Canon);
use parent qw(Encode::Encoding);
sub needs_lines { 1 }
sub perlio_ok { 0 }
our @EXPORT = qw(guess_encoding);
our $NoUTFAutoGuess = 0;
our $UTF8_BOM = pack( "C3", 0xef, 0xbb, 0xbf );
sub import { # Exporter not used so we do it on our own
my $callpkg = caller;
for my $item (@EXPORT) {
no strict 'refs';
*{"$callpkg\::$item"} = \&{"$item"};
}
set_suspects(@_);
}
sub set_suspects {
my $class = shift;
my $self = ref($class) ? $class : $Encode::Encoding{$Canon};
$self->{Suspects} = {%DEF_SUSPECTS};
$self->add_suspects(@_);
}
sub add_suspects {
my $class = shift;
my $self = ref($class) ? $class : $Encode::Encoding{$Canon};
for my $c (@_) {
my $e = find_encoding($c) or die "Unknown encoding: $c";
$self->{Suspects}{ $e->name } = $e;
DEBUG and warn "Added: ", $e->name;
}
}
sub decode($$;$) {
my ( $obj, $octet, $chk ) = @_;
my $guessed = guess( $obj, $octet );
unless ( ref($guessed) ) {
require Carp;
Carp::croak($guessed);
}
my $utf8 = $guessed->decode( $octet, $chk || 0 );
$_[1] = $octet if $chk;
return $utf8;
}
sub guess_encoding {
guess( $Encode::Encoding{$Canon}, @_ );
}
sub guess {
my $class = shift;
my $obj = ref($class) ? $class : $Encode::Encoding{$Canon};
my $octet = shift;
# sanity check
return "Empty string, empty guess" unless defined $octet and length $octet;
# cheat 0: utf8 flag;
if ( Encode::is_utf8($octet) ) {
return find_encoding('utf8') unless $NoUTFAutoGuess;
Encode::_utf8_off($octet);
}
# cheat 1: BOM
use Encode::Unicode;
unless ($NoUTFAutoGuess) {
my $BOM = pack( 'C3', unpack( "C3", $octet ) );
return find_encoding('utf8')
if ( defined $BOM and $BOM eq $UTF8_BOM );
$BOM = unpack( 'N', $octet );
return find_encoding('UTF-32')
if ( defined $BOM and ( $BOM == 0xFeFF or $BOM == 0xFFFe0000 ) );
$BOM = unpack( 'n', $octet );
return find_encoding('UTF-16')
if ( defined $BOM and ( $BOM == 0xFeFF or $BOM == 0xFFFe ) );
if ( $octet =~ /\x00/o )
{ # if \x00 found, we assume UTF-(16|32)(BE|LE)
my $utf;
my ( $be, $le ) = ( 0, 0 );
if ( $octet =~ /\x00\x00/o ) { # UTF-32(BE|LE) assumed
$utf = "UTF-32";
for my $char ( unpack( 'N*', $octet ) ) {
$char & 0x0000ffff and $be++;
$char & 0xffff0000 and $le++;
}
}
else { # UTF-16(BE|LE) assumed
$utf = "UTF-16";
for my $char ( unpack( 'n*', $octet ) ) {
$char & 0x00ff and $be++;
$char & 0xff00 and $le++;
}
}
DEBUG and warn "$utf, be == $be, le == $le";
$be == $le
and return
"Encodings ambiguous between $utf BE and LE ($be, $le)";
$utf .= ( $be > $le ) ? 'BE' : 'LE';
return find_encoding($utf);
}
}
my %try = %{ $obj->{Suspects} };
for my $c (@_) {
my $e = find_encoding($c) or die "Unknown encoding: $c";
$try{ $e->name } = $e;
DEBUG and warn "Added: ", $e->name;
}
my $nline = 1;
for my $line ( split /\r\n?|\n/, $octet ) {
# cheat 2 -- \e in the string
if ( $line =~ /\e/o ) {
my @keys = keys %try;
delete @try{qw/utf8 ascii/};
for my $k (@keys) {
ref( $try{$k} ) eq 'Encode::XS' and delete $try{$k};
}
}
my %ok = %try;
# warn join(",", keys %try);
for my $k ( keys %try ) {
my $scratch = $line;
$try{$k}->decode( $scratch, FB_QUIET );
if ( $scratch eq '' ) {
DEBUG and warn sprintf( "%4d:%-24s ok\n", $nline, $k );
}
else {
use bytes ();
DEBUG
and warn sprintf( "%4d:%-24s not ok; %d bytes left\n",
$nline, $k, bytes::length($scratch) );
delete $ok{$k};
}
}
%ok or return "No appropriate encodings found!";
if ( scalar( keys(%ok) ) == 1 ) {
my ($retval) = values(%ok);
return $retval;
}
%try = %ok;
$nline++;
}
$try{ascii}
or return "Encodings too ambiguous: " . join( " or ", keys %try );
return $try{ascii};
}
1;
__END__
=head1 NAME
Encode::Guess -- Guesses encoding from data
=head1 SYNOPSIS
# if you are sure $data won't contain anything bogus
use Encode;
use Encode::Guess qw/euc-jp shiftjis 7bit-jis/;
my $utf8 = decode("Guess", $data);
my $data = encode("Guess", $utf8); # this doesn't work!
# more elaborate way
use Encode::Guess;
my $enc = guess_encoding($data, qw/euc-jp shiftjis 7bit-jis/);
ref($enc) or die "Can't guess: $enc"; # trap error this way
$utf8 = $enc->decode($data);
# or
$utf8 = decode($enc->name, $data)
=head1 ABSTRACT
Encode::Guess enables you to guess in what encoding a given data is
encoded, or at least tries to.
=head1 DESCRIPTION
By default, it checks only ascii, utf8 and UTF-16/32 with BOM.
use Encode::Guess; # ascii/utf8/BOMed UTF
To use it more practically, you have to give the names of encodings to
check (I<suspects> as follows). The name of suspects can either be
canonical names or aliases.
CAVEAT: Unlike UTF-(16|32), BOM in utf8 is NOT AUTOMATICALLY STRIPPED.
# tries all major Japanese Encodings as well
use Encode::Guess qw/euc-jp shiftjis 7bit-jis/;
If the C<$Encode::Guess::NoUTFAutoGuess> variable is set to a true
value, no heuristics will be applied to UTF8/16/32, and the result
will be limited to the suspects and C<ascii>.
=over 4
=item Encode::Guess->set_suspects
You can also change the internal suspects list via C<set_suspects>
method.
use Encode::Guess;
Encode::Guess->set_suspects(qw/euc-jp shiftjis 7bit-jis/);
=item Encode::Guess->add_suspects
Or you can use C<add_suspects> method. The difference is that
C<set_suspects> flushes the current suspects list while
C<add_suspects> adds.
use Encode::Guess;
Encode::Guess->add_suspects(qw/euc-jp shiftjis 7bit-jis/);
# now the suspects are euc-jp,shiftjis,7bit-jis, AND
# euc-kr,euc-cn, and big5-eten
Encode::Guess->add_suspects(qw/euc-kr euc-cn big5-eten/);
=item Encode::decode("Guess" ...)
When you are content with suspects list, you can now
my $utf8 = Encode::decode("Guess", $data);
=item Encode::Guess->guess($data)
But it will croak if:
=over
=item *
Two or more suspects remain
=item *
No suspects left
=back
So you should instead try this;
my $decoder = Encode::Guess->guess($data);
On success, $decoder is an object that is documented in
L<Encode::Encoding>. So you can now do this;
my $utf8 = $decoder->decode($data);
On failure, $decoder now contains an error message so the whole thing
would be as follows;
my $decoder = Encode::Guess->guess($data);
die $decoder unless ref($decoder);
my $utf8 = $decoder->decode($data);
=item guess_encoding($data, [, I<list of suspects>])
You can also try C<guess_encoding> function which is exported by
default. It takes $data to check and it also takes the list of
suspects by option. The optional suspect list is I<not reflected> to
the internal suspects list.
my $decoder = guess_encoding($data, qw/euc-jp euc-kr euc-cn/);
die $decoder unless ref($decoder);
my $utf8 = $decoder->decode($data);
# check only ascii, utf8 and UTF-(16|32) with BOM
my $decoder = guess_encoding($data);
=back
=head1 CAVEATS
=over 4
=item *
Because of the algorithm used, ISO-8859 series and other single-byte
encodings do not work well unless either one of ISO-8859 is the only
one suspect (besides ascii and utf8).
use Encode::Guess;
# perhaps ok
my $decoder = guess_encoding($data, 'latin1');
# definitely NOT ok
my $decoder = guess_encoding($data, qw/latin1 greek/);
The reason is that Encode::Guess guesses encoding by trial and error.
It first splits $data into lines and tries to decode the line for each
suspect. It keeps it going until all but one encoding is eliminated
out of suspects list. ISO-8859 series is just too successful for most
cases (because it fills almost all code points in \x00-\xff).
=item *
Do not mix national standard encodings and the corresponding vendor
encodings.
# a very bad idea
my $decoder
= guess_encoding($data, qw/shiftjis MacJapanese cp932/);
The reason is that vendor encoding is usually a superset of national
standard so it becomes too ambiguous for most cases.
=item *
On the other hand, mixing various national standard encodings
automagically works unless $data is too short to allow for guessing.
# This is ok if $data is long enough
my $decoder =
guess_encoding($data, qw/euc-cn
euc-jp shiftjis 7bit-jis
euc-kr
big5-eten/);
=item *
DO NOT PUT TOO MANY SUSPECTS! Don't you try something like this!
my $decoder = guess_encoding($data,
Encode->encodings(":all"));
=back
It is, after all, just a guess. You should alway be explicit when it
comes to encodings. But there are some, especially Japanese,
environment that guess-coding is a must. Use this module with care.
=head1 TO DO
Encode::Guess does not work on EBCDIC platforms.
=head1 SEE ALSO
L<Encode>, L<Encode::Encoding>
=cut

View File

@@ -0,0 +1,95 @@
package Encode::JP;
BEGIN {
if ( ord("A") == 193 ) {
die "Encode::JP not supported on EBCDIC\n";
}
}
use strict;
use warnings;
use Encode;
our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
use Encode::JP::JIS7;
1;
__END__
=head1 NAME
Encode::JP - Japanese Encodings
=head1 SYNOPSIS
use Encode qw/encode decode/;
$euc_jp = encode("euc-jp", $utf8); # loads Encode::JP implicitly
$utf8 = decode("euc-jp", $euc_jp); # ditto
=head1 ABSTRACT
This module implements Japanese charset encodings. Encodings
supported are as follows.
Canonical Alias Description
--------------------------------------------------------------------
euc-jp /\beuc.*jp$/i EUC (Extended Unix Character)
/\bjp.*euc/i
/\bujis$/i
shiftjis /\bshift.*jis$/i Shift JIS (aka MS Kanji)
/\bsjis$/i
7bit-jis /\bjis$/i 7bit JIS
iso-2022-jp ISO-2022-JP [RFC1468]
= 7bit JIS with all Halfwidth Kana
converted to Fullwidth
iso-2022-jp-1 ISO-2022-JP-1 [RFC2237]
= ISO-2022-JP with JIS X 0212-1990
support. See below
MacJapanese Shift JIS + Apple vendor mappings
cp932 /\bwindows-31j$/i Code Page 932
= Shift JIS + MS/IBM vendor mappings
jis0201-raw JIS0201, raw format
jis0208-raw JIS0208, raw format
jis0212-raw JIS0212, raw format
--------------------------------------------------------------------
=head1 DESCRIPTION
To find out how to use this module in detail, see L<Encode>.
=head1 Note on ISO-2022-JP(-1)?
ISO-2022-JP-1 (RFC2237) is a superset of ISO-2022-JP (RFC1468) which
adds support for JIS X 0212-1990. That means you can use the same
code to decode to utf8 but not vice versa.
$utf8 = decode('iso-2022-jp-1', $stream);
and
$utf8 = decode('iso-2022-jp', $stream);
yield the same result but
$with_0212 = encode('iso-2022-jp-1', $utf8);
is now different from
$without_0212 = encode('iso-2022-jp', $utf8 );
In the latter case, characters that map to 0212 are first converted
to U+3013 (0xA2AE in EUC-JP; a white square also known as 'Tofu' or
'geta mark') then fed to the decoding engine. U+FFFD is not used,
in order to preserve text layout as much as possible.
=head1 BUGS
The ASCII region (0x00-0x7f) is preserved for all encodings, even
though this conflicts with mappings by the Unicode Consortium.
=head1 SEE ALSO
L<Encode>
=cut

View File

@@ -0,0 +1,176 @@
#
# $Id: H2Z.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $
#
package Encode::JP::H2Z;
use strict;
use warnings;
our $RCSID = q$Id: H2Z.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $;
our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode::CJKConstants qw(:all);
use vars qw(%_D2Z $_PAT_D2Z
%_Z2D $_PAT_Z2D
%_H2Z $_PAT_H2Z
%_Z2H $_PAT_Z2H);
%_H2Z = (
"\x8e\xa1" => "\xa1\xa3", #<23><>
"\x8e\xa2" => "\xa1\xd6", #<23><>
"\x8e\xa3" => "\xa1\xd7", #<23><>
"\x8e\xa4" => "\xa1\xa2", #<23><>
"\x8e\xa5" => "\xa1\xa6", #<23><>
"\x8e\xa6" => "\xa5\xf2", #<23><>
"\x8e\xa7" => "\xa5\xa1", #<23><>
"\x8e\xa8" => "\xa5\xa3", #<23><>
"\x8e\xa9" => "\xa5\xa5", #<23><>
"\x8e\xaa" => "\xa5\xa7", #<23><>
"\x8e\xab" => "\xa5\xa9", #<23><>
"\x8e\xac" => "\xa5\xe3", #<23><>
"\x8e\xad" => "\xa5\xe5", #<23><>
"\x8e\xae" => "\xa5\xe7", #<23><>
"\x8e\xaf" => "\xa5\xc3", #<23><>
"\x8e\xb0" => "\xa1\xbc", #<23><>
"\x8e\xb1" => "\xa5\xa2", #<23><>
"\x8e\xb2" => "\xa5\xa4", #<23><>
"\x8e\xb3" => "\xa5\xa6", #<23><>
"\x8e\xb4" => "\xa5\xa8", #<23><>
"\x8e\xb5" => "\xa5\xaa", #<23><>
"\x8e\xb6" => "\xa5\xab", #<23><>
"\x8e\xb7" => "\xa5\xad", #<23><>
"\x8e\xb8" => "\xa5\xaf", #<23><>
"\x8e\xb9" => "\xa5\xb1", #<23><>
"\x8e\xba" => "\xa5\xb3", #<23><>
"\x8e\xbb" => "\xa5\xb5", #<23><>
"\x8e\xbc" => "\xa5\xb7", #<23><>
"\x8e\xbd" => "\xa5\xb9", #<23><>
"\x8e\xbe" => "\xa5\xbb", #<23><>
"\x8e\xbf" => "\xa5\xbd", #<23><>
"\x8e\xc0" => "\xa5\xbf", #<23><>
"\x8e\xc1" => "\xa5\xc1", #<23><>
"\x8e\xc2" => "\xa5\xc4", #<23><>
"\x8e\xc3" => "\xa5\xc6", #<23><>
"\x8e\xc4" => "\xa5\xc8", #<23><>
"\x8e\xc5" => "\xa5\xca", #<23><>
"\x8e\xc6" => "\xa5\xcb", #<23><>
"\x8e\xc7" => "\xa5\xcc", #<23><>
"\x8e\xc8" => "\xa5\xcd", #<23><>
"\x8e\xc9" => "\xa5\xce", #<23><>
"\x8e\xca" => "\xa5\xcf", #<23><>
"\x8e\xcb" => "\xa5\xd2", #<23><>
"\x8e\xcc" => "\xa5\xd5", #<23><>
"\x8e\xcd" => "\xa5\xd8", #<23><>
"\x8e\xce" => "\xa5\xdb", #<23><>
"\x8e\xcf" => "\xa5\xde", #<23><>
"\x8e\xd0" => "\xa5\xdf", #<23><>
"\x8e\xd1" => "\xa5\xe0", #<23><>
"\x8e\xd2" => "\xa5\xe1", #<23><>
"\x8e\xd3" => "\xa5\xe2", #<23><>
"\x8e\xd4" => "\xa5\xe4", #<23><>
"\x8e\xd5" => "\xa5\xe6", #<23><>
"\x8e\xd6" => "\xa5\xe8", #<23><>
"\x8e\xd7" => "\xa5\xe9", #<23><>
"\x8e\xd8" => "\xa5\xea", #<23><>
"\x8e\xd9" => "\xa5\xeb", #<23><>
"\x8e\xda" => "\xa5\xec", #<23><>
"\x8e\xdb" => "\xa5\xed", #<23><>
"\x8e\xdc" => "\xa5\xef", #<23><>
"\x8e\xdd" => "\xa5\xf3", #<23><>
"\x8e\xde" => "\xa1\xab", #<23><>
"\x8e\xdf" => "\xa1\xac", #<23><>
);
%_D2Z = (
"\x8e\xb6\x8e\xde" => "\xa5\xac", #<23><>
"\x8e\xb7\x8e\xde" => "\xa5\xae", #<23><>
"\x8e\xb8\x8e\xde" => "\xa5\xb0", #<23><>
"\x8e\xb9\x8e\xde" => "\xa5\xb2", #<23><>
"\x8e\xba\x8e\xde" => "\xa5\xb4", #<23><>
"\x8e\xbb\x8e\xde" => "\xa5\xb6", #<23><>
"\x8e\xbc\x8e\xde" => "\xa5\xb8", #<23><>
"\x8e\xbd\x8e\xde" => "\xa5\xba", #<23><>
"\x8e\xbe\x8e\xde" => "\xa5\xbc", #<23><>
"\x8e\xbf\x8e\xde" => "\xa5\xbe", #<23><>
"\x8e\xc0\x8e\xde" => "\xa5\xc0", #<23><>
"\x8e\xc1\x8e\xde" => "\xa5\xc2", #<23><>
"\x8e\xc2\x8e\xde" => "\xa5\xc5", #<23><>
"\x8e\xc3\x8e\xde" => "\xa5\xc7", #<23><>
"\x8e\xc4\x8e\xde" => "\xa5\xc9", #<23><>
"\x8e\xca\x8e\xde" => "\xa5\xd0", #<23><>
"\x8e\xcb\x8e\xde" => "\xa5\xd3", #<23><>
"\x8e\xcc\x8e\xde" => "\xa5\xd6", #<23><>
"\x8e\xcd\x8e\xde" => "\xa5\xd9", #<23><>
"\x8e\xce\x8e\xde" => "\xa5\xdc", #<23><>
"\x8e\xca\x8e\xdf" => "\xa5\xd1", #<23><>
"\x8e\xcb\x8e\xdf" => "\xa5\xd4", #<23><>
"\x8e\xcc\x8e\xdf" => "\xa5\xd7", #<23><>
"\x8e\xcd\x8e\xdf" => "\xa5\xda", #<23><>
"\x8e\xce\x8e\xdf" => "\xa5\xdd", #<23><>
"\x8e\xb3\x8e\xde" => "\xa5\xf4", #<23><>
);
# init only once;
#$_PAT_D2Z = join("|", keys %_D2Z);
#$_PAT_H2Z = join("|", keys %_H2Z);
%_Z2H = reverse %_H2Z;
%_Z2D = reverse %_D2Z;
#$_PAT_Z2H = join("|", keys %_Z2H);
#$_PAT_Z2D = join("|", keys %_Z2D);
sub h2z {
no warnings qw(uninitialized);
my $r_str = shift;
my ($keep_dakuten) = @_;
my $n = 0;
unless ($keep_dakuten) {
$n = (
$$r_str =~ s(
($RE{EUC_KANA}
(?:\x8e[\xde\xdf])?)
){
my $str = $1;
$_D2Z{$str} || $_H2Z{$str} ||
# in case dakuten and handakuten are side-by-side!
$_H2Z{substr($str,0,2)} . $_H2Z{substr($str,2,2)};
}eogx
);
}
else {
$n = (
$$r_str =~ s(
($RE{EUC_KANA})
){
$_H2Z{$1};
}eogx
);
}
$n;
}
sub z2h {
my $r_str = shift;
my $n = (
$$r_str =~ s(
($RE{EUC_C}|$RE{EUC_0212}|$RE{EUC_KANA})
){
$_Z2D{$1} || $_Z2H{$1} || $1;
}eogx
);
$n;
}
1;
__END__
=head1 NAME
Encode::JP::H2Z -- internally used by Encode::JP::2022_JP*
=cut

View File

@@ -0,0 +1,168 @@
package Encode::JP::JIS7;
use strict;
use warnings;
our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
for my $name ( '7bit-jis', 'iso-2022-jp', 'iso-2022-jp-1' ) {
my $h2z = ( $name eq '7bit-jis' ) ? 0 : 1;
my $jis0212 = ( $name eq 'iso-2022-jp' ) ? 0 : 1;
my $obj = bless {
Name => $name,
h2z => $h2z,
jis0212 => $jis0212,
} => __PACKAGE__;
Encode::define_encoding($obj, $name);
}
use parent qw(Encode::Encoding);
# we override this to 1 so PerlIO works
sub needs_lines { 1 }
use Encode::CJKConstants qw(:all);
#
# decode is identical for all 2022 variants
#
sub decode($$;$) {
my ( $obj, $str, $chk ) = @_;
return undef unless defined $str;
my $residue = '';
if ($chk) {
$str =~ s/([^\x00-\x7f].*)$//so and $residue = $1;
}
$residue .= jis_euc( \$str );
$_[1] = $residue if $chk;
return Encode::decode( 'euc-jp', $str, FB_PERLQQ );
}
#
# encode is different
#
sub encode($$;$) {
require Encode::JP::H2Z;
my ( $obj, $utf8, $chk ) = @_;
return undef unless defined $utf8;
# empty the input string in the stack so perlio is ok
$_[1] = '' if $chk;
my ( $h2z, $jis0212 ) = @$obj{qw(h2z jis0212)};
my $octet = Encode::encode( 'euc-jp', $utf8, $chk || 0 );
$h2z and &Encode::JP::H2Z::h2z( \$octet );
euc_jis( \$octet, $jis0212 );
return $octet;
}
#
# cat_decode
#
my $re_scan_jis_g = qr{
\G ( ($RE{JIS_0212}) | $RE{JIS_0208} |
($RE{ISO_ASC}) | ($RE{JIS_KANA}) | )
([^\e]*)
}x;
sub cat_decode { # ($obj, $dst, $src, $pos, $trm, $chk)
my ( $obj, undef, undef, $pos, $trm ) = @_; # currently ignores $chk
my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ];
local ${^ENCODING};
use bytes;
my $opos = pos($$rsrc);
pos($$rsrc) = $pos;
while ( $$rsrc =~ /$re_scan_jis_g/gc ) {
my ( $esc, $esc_0212, $esc_asc, $esc_kana, $chunk ) =
( $1, $2, $3, $4, $5 );
unless ($chunk) { $esc or last; next; }
if ( $esc && !$esc_asc ) {
$chunk =~ tr/\x21-\x7e/\xa1-\xfe/;
if ($esc_kana) {
$chunk =~ s/([\xa1-\xdf])/\x8e$1/og;
}
elsif ($esc_0212) {
$chunk =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og;
}
$chunk = Encode::decode( 'euc-jp', $chunk, 0 );
}
elsif ( ( my $npos = index( $chunk, $trm ) ) >= 0 ) {
$$rdst .= substr( $chunk, 0, $npos + length($trm) );
$$rpos += length($esc) + $npos + length($trm);
pos($$rsrc) = $opos;
return 1;
}
$$rdst .= $chunk;
$$rpos = pos($$rsrc);
}
$$rpos = pos($$rsrc);
pos($$rsrc) = $opos;
return '';
}
# JIS<->EUC
my $re_scan_jis = qr{
(?:($RE{JIS_0212})|$RE{JIS_0208}|($RE{ISO_ASC})|($RE{JIS_KANA}))([^\e]*)
}x;
sub jis_euc {
local ${^ENCODING};
my $r_str = shift;
$$r_str =~ s($re_scan_jis)
{
my ($esc_0212, $esc_asc, $esc_kana, $chunk) =
($1, $2, $3, $4);
if (!$esc_asc) {
$chunk =~ tr/\x21-\x7e/\xa1-\xfe/;
if ($esc_kana) {
$chunk =~ s/([\xa1-\xdf])/\x8e$1/og;
}
elsif ($esc_0212) {
$chunk =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og;
}
}
$chunk;
}geox;
my ($residue) = ( $$r_str =~ s/(\e.*)$//so );
return $residue;
}
sub euc_jis {
no warnings qw(uninitialized);
local ${^ENCODING};
my $r_str = shift;
my $jis0212 = shift;
$$r_str =~ s{
((?:$RE{EUC_C})+|(?:$RE{EUC_KANA})+|(?:$RE{EUC_0212})+)
}{
my $chunk = $1;
my $esc =
( $chunk =~ tr/\x8E//d ) ? $ESC{KANA} :
( $chunk =~ tr/\x8F//d ) ? $ESC{JIS_0212} :
$ESC{JIS_0208};
if ($esc eq $ESC{JIS_0212} && !$jis0212){
# fallback to '?'
$chunk =~ tr/\xA1-\xFE/\x3F/;
}else{
$chunk =~ tr/\xA1-\xFE/\x21-\x7E/;
}
$esc . $chunk . $ESC{ASC};
}geox;
$$r_str =~ s/\Q$ESC{ASC}\E
(\Q$ESC{KANA}\E|\Q$ESC{JIS_0212}\E|\Q$ESC{JIS_0208}\E)/$1/gox;
$$r_str;
}
1;
__END__
=head1 NAME
Encode::JP::JIS7 -- internally used by Encode::JP
=cut

View File

@@ -0,0 +1,69 @@
package Encode::KR;
BEGIN {
if ( ord("A") == 193 ) {
die "Encode::KR not supported on EBCDIC\n";
}
}
use strict;
use warnings;
use Encode;
our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
use Encode::KR::2022_KR;
1;
__END__
=head1 NAME
Encode::KR - Korean Encodings
=head1 SYNOPSIS
use Encode qw/encode decode/;
$euc_kr = encode("euc-kr", $utf8); # loads Encode::KR implicitly
$utf8 = decode("euc-kr", $euc_kr); # ditto
=head1 DESCRIPTION
This module implements Korean charset encodings. Encodings supported
are as follows.
Canonical Alias Description
--------------------------------------------------------------------
euc-kr /\beuc.*kr$/i EUC (Extended Unix Character)
/\bkr.*euc$/i
ksc5601-raw Korean standard code set (as is)
cp949 /(?:x-)?uhc$/i
/(?:x-)?windows-949$/i
/\bks_c_5601-1987$/i
Code Page 949 (EUC-KR + 8,822
(additional Hangul syllables)
MacKorean EUC-KR + Apple Vendor Mappings
johab JOHAB A supplementary encoding defined in
Annex 3 of KS X 1001:1998
iso-2022-kr iso-2022-kr [RFC1557]
--------------------------------------------------------------------
To find how to use this module in detail, see L<Encode>.
=head1 BUGS
When you see C<charset=ks_c_5601-1987> on mails and web pages, they really
mean "cp949" encodings. To fix that, the following aliases are set;
qr/(?:x-)?uhc$/i => '"cp949"'
qr/(?:x-)?windows-949$/i => '"cp949"'
qr/ks_c_5601-1987$/i => '"cp949"'
The ASCII region (0x00-0x7f) is preserved for all encodings, even
though this conflicts with mappings by the Unicode Consortium.
=head1 SEE ALSO
L<Encode>
=cut

View File

@@ -0,0 +1,83 @@
package Encode::KR::2022_KR;
use strict;
use warnings;
our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
use parent qw(Encode::Encoding);
__PACKAGE__->Define('iso-2022-kr');
sub needs_lines { 1 }
sub perlio_ok {
return 0; # for the time being
}
sub decode {
my ( $obj, $str, $chk ) = @_;
return undef unless defined $str;
my $res = $str;
my $residue = iso_euc( \$res );
# This is for PerlIO
$_[1] = $residue if $chk;
return Encode::decode( 'euc-kr', $res, FB_PERLQQ );
}
sub encode {
my ( $obj, $utf8, $chk ) = @_;
return undef unless defined $utf8;
# empty the input string in the stack so perlio is ok
$_[1] = '' if $chk;
my $octet = Encode::encode( 'euc-kr', $utf8, FB_PERLQQ );
euc_iso( \$octet );
return $octet;
}
use Encode::CJKConstants qw(:all);
# ISO<->EUC
sub iso_euc {
my $r_str = shift;
$$r_str =~ s/$RE{'2022_KR'}//gox; # remove the designator
$$r_str =~ s{ # replace characters in GL
\x0e # between SO(\x0e) and SI(\x0f)
([^\x0f]*) # with characters in GR
\x0f
}
{
my $out= $1;
$out =~ tr/\x21-\x7e/\xa1-\xfe/;
$out;
}geox;
my ($residue) = ( $$r_str =~ s/(\e.*)$//so );
return $residue;
}
sub euc_iso {
no warnings qw(uninitialized);
my $r_str = shift;
substr( $$r_str, 0, 0 ) =
$ESC{'2022_KR'}; # put the designator at the beg.
$$r_str =~
s{ # move KS X 1001 characters in GR to GL
($RE{EUC_C}+) # and enclose them with SO and SI
}{
my $str = $1;
$str =~ tr/\xA1-\xFE/\x21-\x7E/;
"\x0e" . $str . "\x0f";
}geox;
$$r_str;
}
1;
__END__
=head1 NAME
Encode::KR::2022_KR -- internally used by Encode::KR
=cut

View File

@@ -0,0 +1,431 @@
package Encode::MIME::Header;
use strict;
use warnings;
our $VERSION = do { my @r = ( q$Revision: 2.29 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Carp ();
use Encode ();
use MIME::Base64 ();
my %seed = (
decode_b => 1, # decodes 'B' encoding ?
decode_q => 1, # decodes 'Q' encoding ?
encode => 'B', # encode with 'B' or 'Q' ?
charset => 'UTF-8', # encode charset
bpl => 75, # bytes per line
);
my @objs;
push @objs, bless {
%seed,
Name => 'MIME-Header',
} => __PACKAGE__;
push @objs, bless {
%seed,
decode_q => 0,
Name => 'MIME-B',
} => __PACKAGE__;
push @objs, bless {
%seed,
decode_b => 0,
encode => 'Q',
Name => 'MIME-Q',
} => __PACKAGE__;
Encode::define_encoding($_, $_->{Name}) foreach @objs;
use parent qw(Encode::Encoding);
sub needs_lines { 1 }
sub perlio_ok { 0 }
# RFC 2047 and RFC 2231 grammar
my $re_charset = qr/[!"#\$%&'+\-0-9A-Z\\\^_`a-z\{\|\}~]+/;
my $re_language = qr/[A-Za-z]{1,8}(?:-[0-9A-Za-z]{1,8})*/;
my $re_encoding = qr/[QqBb]/;
my $re_encoded_text = qr/[^\?]*/;
my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/;
my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding\?$re_encoded_text)\?=/;
my $re_capture_encoded_word_split = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/;
# in strict mode check also for valid base64 characters and also for valid quoted printable codes
my $re_encoding_strict_b = qr/[Bb]/;
my $re_encoding_strict_q = qr/[Qq]/;
my $re_encoded_text_strict_b = qr/(?:[0-9A-Za-z\+\/]{4})*(?:[0-9A-Za-z\+\/]{2}==|[0-9A-Za-z\+\/]{3}=|)/;
my $re_encoded_text_strict_q = qr/(?:[\x21-\x3C\x3E\x40-\x7E]|=[0-9A-Fa-f]{2})*/; # NOTE: first part are printable US-ASCII except ?, =, SPACE and TAB
my $re_encoded_word_strict = qr/=\?$re_charset(?:\*$re_language)?\?(?:$re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
my $re_capture_encoded_word_strict = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
my $re_newline = qr/(?:\r\n|[\r\n])/;
# in strict mode encoded words must be always separated by spaces or tabs (or folded newline)
# except in comments when separator between words and comment round brackets can be omitted
my $re_word_begin_strict = qr/(?:(?:[ \t]|\A)\(?|(?:[^\\]|\A)\)\()/;
my $re_word_sep_strict = qr/(?:$re_newline?[ \t])+/;
my $re_word_end_strict = qr/(?:\)\(|\)?(?:$re_newline?[ \t]|\z))/;
my $re_match = qr/()((?:$re_encoded_word\s*)*$re_encoded_word)()/;
my $re_match_strict = qr/($re_word_begin_strict)((?:$re_encoded_word_strict$re_word_sep_strict)*$re_encoded_word_strict)(?=$re_word_end_strict)/;
my $re_capture = qr/$re_capture_encoded_word(?:\s*)?/;
my $re_capture_strict = qr/$re_capture_encoded_word_strict$re_word_sep_strict?/;
our $STRICT_DECODE = 0;
sub decode($$;$) {
my ($obj, $str, $chk) = @_;
return undef unless defined $str;
my $re_match_decode = $STRICT_DECODE ? $re_match_strict : $re_match;
my $re_capture_decode = $STRICT_DECODE ? $re_capture_strict : $re_capture;
my $stop = 0;
my $output = substr($str, 0, 0); # to propagate taintedness
# decode each line separately, match whole continuous folded line at one call
1 while not $stop and $str =~ s{^((?:[^\r\n]*(?:$re_newline[ \t])?)*)($re_newline)?}{
my $line = $1;
my $sep = defined $2 ? $2 : '';
$stop = 1 unless length($line) or length($sep);
# in non strict mode append missing '=' padding characters for b words
# fixes below concatenation of consecutive encoded mime words
1 while not $STRICT_DECODE and $line =~ s/(=\?$re_charset(?:\*$re_language)?\?[Bb]\?)((?:[^\?]{4})*[^\?]{1,3})(\?=)/$1.$2.('='x(4-length($2)%4)).$3/se;
# NOTE: this code partially could break $chk support
# in non strict mode concat consecutive encoded mime words with same charset, language and encoding
# fixes breaking inside multi-byte characters
1 while not $STRICT_DECODE and $line =~ s/$re_capture_encoded_word_split\s*=\?\1\2\?\3\?($re_encoded_text)\?=/=\?$1$2\?$3\?$4$5\?=/so;
# process sequence of encoded MIME words at once
1 while not $stop and $line =~ s{^(.*?)$re_match_decode}{
my $begin = $1 . $2;
my $words = $3;
$begin =~ tr/\r\n//d;
$output .= $begin;
# decode one MIME word
1 while not $stop and $words =~ s{^(.*?)($re_capture_decode)}{
$output .= $1;
my $orig = $2;
my $charset = $3;
my ($mime_enc, $text) = split /\?/, $5;
$text =~ tr/\r\n//d;
my $enc = Encode::find_mime_encoding($charset);
# in non strict mode allow also perl encoding aliases
if ( not defined $enc and not $STRICT_DECODE ) {
# make sure that decoded string will be always strict UTF-8
$charset = 'UTF-8' if lc($charset) eq 'utf8';
$enc = Encode::find_encoding($charset);
}
if ( not defined $enc ) {
Carp::croak qq(Unknown charset "$charset") if not ref $chk and $chk and $chk & Encode::DIE_ON_ERR;
Carp::carp qq(Unknown charset "$charset") if not ref $chk and $chk and $chk & Encode::WARN_ON_ERR;
$stop = 1 if not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
$output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
$stop ? $orig : '';
} else {
if ( uc($mime_enc) eq 'B' and $obj->{decode_b} ) {
my $decoded = _decode_b($enc, $text, $chk);
$stop = 1 if not defined $decoded and not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
$output .= (defined $decoded ? $decoded : $text) unless $stop;
$stop ? $orig : '';
} elsif ( uc($mime_enc) eq 'Q' and $obj->{decode_q} ) {
my $decoded = _decode_q($enc, $text, $chk);
$stop = 1 if not defined $decoded and not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
$output .= (defined $decoded ? $decoded : $text) unless $stop;
$stop ? $orig : '';
} else {
Carp::croak qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk and $chk & Encode::DIE_ON_ERR;
Carp::carp qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk and $chk & Encode::WARN_ON_ERR;
$stop = 1 if not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
$output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
$stop ? $orig : '';
}
}
}se;
if ( not $stop ) {
$output .= $words;
$words = '';
}
$words;
}se;
if ( not $stop ) {
$line =~ tr/\r\n//d;
$output .= $line . $sep;
$line = '';
$sep = '';
}
$line . $sep;
}se;
$_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
return $output;
}
sub _decode_b {
my ($enc, $text, $chk) = @_;
# MIME::Base64::decode ignores everything after a '=' padding character
# in non strict mode split string after each sequence of padding characters and decode each substring
my $octets = $STRICT_DECODE ?
MIME::Base64::decode($text) :
join('', map { MIME::Base64::decode($_) } split /(?<==)(?=[^=])/, $text);
return _decode_octets($enc, $octets, $chk);
}
sub _decode_q {
my ($enc, $text, $chk) = @_;
$text =~ s/_/ /go;
$text =~ s/=([0-9A-Fa-f]{2})/pack('C', hex($1))/ego;
return _decode_octets($enc, $text, $chk);
}
sub _decode_octets {
my ($enc, $octets, $chk) = @_;
$chk = 0 unless defined $chk;
$chk &= ~Encode::LEAVE_SRC if not ref $chk and $chk;
my $output = $enc->decode($octets, $chk);
return undef if not ref $chk and $chk and $octets ne '';
return $output;
}
sub encode($$;$) {
my ($obj, $str, $chk) = @_;
return undef unless defined $str;
my $output = $obj->_fold_line($obj->_encode_string($str, $chk));
$_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
return $output . substr($str, 0, 0); # to propagate taintedness
}
sub _fold_line {
my ($obj, $line) = @_;
my $bpl = $obj->{bpl};
my $output = '';
while ( length($line) ) {
if ( $line =~ s/^(.{0,$bpl})(\s|\z)// ) {
$output .= $1;
$output .= "\r\n" . $2 if length($line);
} elsif ( $line =~ s/(\s)(.*)$// ) {
$output .= $line;
$line = $2;
$output .= "\r\n" . $1 if length($line);
} else {
$output .= $line;
last;
}
}
return $output;
}
sub _encode_string {
my ($obj, $str, $chk) = @_;
my $wordlen = $obj->{bpl} > 76 ? 76 : $obj->{bpl};
my $enc = Encode::find_mime_encoding($obj->{charset});
my $enc_chk = $chk;
$enc_chk = 0 unless defined $enc_chk;
$enc_chk |= Encode::LEAVE_SRC if not ref $enc_chk and $enc_chk;
my @result = ();
my $octets = '';
while ( length( my $chr = substr($str, 0, 1, '') ) ) {
my $seq = $enc->encode($chr, $enc_chk);
if ( not length($seq) ) {
substr($str, 0, 0, $chr);
last;
}
if ( $obj->_encoded_word_len($octets . $seq) > $wordlen ) {
push @result, $obj->_encode_word($octets);
$octets = '';
}
$octets .= $seq;
}
length($octets) and push @result, $obj->_encode_word($octets);
$_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
return join(' ', @result);
}
sub _encode_word {
my ($obj, $octets) = @_;
my $charset = $obj->{charset};
my $encode = $obj->{encode};
my $text = $encode eq 'B' ? _encode_b($octets) : _encode_q($octets);
return "=?$charset?$encode?$text?=";
}
sub _encoded_word_len {
my ($obj, $octets) = @_;
my $charset = $obj->{charset};
my $encode = $obj->{encode};
my $text_len = $encode eq 'B' ? _encoded_b_len($octets) : _encoded_q_len($octets);
return length("=?$charset?$encode??=") + $text_len;
}
sub _encode_b {
my ($octets) = @_;
return MIME::Base64::encode($octets, '');
}
sub _encoded_b_len {
my ($octets) = @_;
return ( length($octets) + 2 ) / 3 * 4;
}
my $re_invalid_q_char = qr/[^0-9A-Za-z !*+\-\/]/;
sub _encode_q {
my ($octets) = @_;
$octets =~ s{($re_invalid_q_char)}{
join('', map { sprintf('=%02X', $_) } unpack('C*', $1))
}egox;
$octets =~ s/ /_/go;
return $octets;
}
sub _encoded_q_len {
my ($octets) = @_;
my $invalid_count = () = $octets =~ /$re_invalid_q_char/sgo;
return ( $invalid_count * 3 ) + ( length($octets) - $invalid_count );
}
1;
__END__
=head1 NAME
Encode::MIME::Header -- MIME encoding for an unstructured email header
=head1 SYNOPSIS
use Encode qw(encode decode);
my $mime_str = encode("MIME-Header", "Sample:Text \N{U+263A}");
# $mime_str is "=?UTF-8?B?U2FtcGxlOlRleHQg4pi6?="
my $mime_q_str = encode("MIME-Q", "Sample:Text \N{U+263A}");
# $mime_q_str is "=?UTF-8?Q?Sample=3AText_=E2=98=BA?="
my $str = decode("MIME-Header",
"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\r\n " .
"=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?="
);
# $str is "If you can read this you understand the example."
use Encode qw(decode :fallbacks);
use Encode::MIME::Header;
local $Encode::MIME::Header::STRICT_DECODE = 1;
my $strict_string = decode("MIME-Header", $mime_string, FB_CROAK);
# use strict decoding and croak on errors
=head1 ABSTRACT
This module implements L<RFC 2047|https://tools.ietf.org/html/rfc2047> MIME
encoding for an unstructured field body of the email header. It can also be
used for L<RFC 822|https://tools.ietf.org/html/rfc822> 'text' token. However,
it cannot be used directly for the whole header with the field name or for the
structured header fields like From, To, Cc, Message-Id, etc... There are 3
encoding names supported by this module: C<MIME-Header>, C<MIME-B> and
C<MIME-Q>.
=head1 DESCRIPTION
Decode method takes an unstructured field body of the email header (or
L<RFC 822|https://tools.ietf.org/html/rfc822> 'text' token) as its input and
decodes each MIME encoded-word from input string to a sequence of bytes
according to L<RFC 2047|https://tools.ietf.org/html/rfc2047> and
L<RFC 2231|https://tools.ietf.org/html/rfc2231>. Subsequently, each sequence
of bytes with the corresponding MIME charset is decoded with
L<the Encode module|Encode> and finally, one output string is returned. Text
parts of the input string which do not contain MIME encoded-word stay
unmodified in the output string. Folded newlines between two consecutive MIME
encoded-words are discarded, others are preserved in the output string.
C<MIME-B> can decode Base64 variant, C<MIME-Q> can decode Quoted-Printable
variant and C<MIME-Header> can decode both of them. If L<Encode module|Encode>
does not support particular MIME charset or chosen variant then an action based
on L<CHECK flags|Encode/Handling Malformed Data> is performed (by default, the
MIME encoded-word is not decoded).
Encode method takes a scalar string as its input and uses
L<strict UTF-8|Encode/UTF-8 vs. utf8 vs. UTF8> encoder for encoding it to UTF-8
bytes. Then a sequence of UTF-8 bytes is encoded into MIME encoded-words
(C<MIME-Header> and C<MIME-B> use a Base64 variant while C<MIME-Q> uses a
Quoted-Printable variant) where each MIME encoded-word is limited to 75
characters. MIME encoded-words are separated by C<CRLF SPACE> and joined to
one output string. Output string is suitable for unstructured field body of
the email header.
Both encode and decode methods propagate
L<CHECK flags|Encode/Handling Malformed Data> when encoding and decoding the
MIME charset.
=head1 BUGS
Versions prior to 2.22 (part of Encode 2.83) have a malfunctioning decoder
and encoder. The MIME encoder infamously inserted additional spaces or
discarded white spaces between consecutive MIME encoded-words, which led to
invalid MIME headers produced by this module. The MIME decoder had a tendency
to discard white spaces, incorrectly interpret data or attempt to decode Base64
MIME encoded-words as Quoted-Printable. These problems were fixed in version
2.22. It is highly recommended not to use any version prior 2.22!
Versions prior to 2.24 (part of Encode 2.87) ignored
L<CHECK flags|Encode/Handling Malformed Data>. The MIME encoder used
L<not strict utf8|Encode/UTF-8 vs. utf8 vs. UTF8> encoder for input Unicode
strings which could lead to invalid UTF-8 sequences. MIME decoder used also
L<not strict utf8|Encode/UTF-8 vs. utf8 vs. UTF8> decoder and additionally
called the decode method with a C<Encode::FB_PERLQQ> flag (thus user-specified
L<CHECK flags|Encode/Handling Malformed Data> were ignored). Moreover, it
automatically croaked when a MIME encoded-word contained unknown encoding.
Since version 2.24, this module uses
L<strict UTF-8|Encode/UTF-8 vs. utf8 vs. UTF8> encoder and decoder. And
L<CHECK flags|Encode/Handling Malformed Data> are correctly propagated.
Since version 2.22 (part of Encode 2.83), the MIME encoder should be fully
compliant to L<RFC 2047|https://tools.ietf.org/html/rfc2047> and
L<RFC 2231|https://tools.ietf.org/html/rfc2231>. Due to the aforementioned
bugs in previous versions of the MIME encoder, there is a I<less strict>
compatible mode for the MIME decoder which is used by default. It should be
able to decode MIME encoded-words encoded by pre 2.22 versions of this module.
However, note that this is not correct according to
L<RFC 2047|https://tools.ietf.org/html/rfc2047>.
In default I<not strict> mode the MIME decoder attempts to decode every substring
which looks like a MIME encoded-word. Therefore, the MIME encoded-words do not
need to be separated by white space. To enforce a correct I<strict> mode, set
variable C<$Encode::MIME::Header::STRICT_DECODE> to 1 e.g. by localizing:
use Encode::MIME::Header;
local $Encode::MIME::Header::STRICT_DECODE = 1;
=head1 AUTHORS
Pali E<lt>pali@cpan.orgE<gt>
=head1 SEE ALSO
L<Encode>,
L<RFC 822|https://tools.ietf.org/html/rfc822>,
L<RFC 2047|https://tools.ietf.org/html/rfc2047>,
L<RFC 2231|https://tools.ietf.org/html/rfc2231>
=cut

View File

@@ -0,0 +1,133 @@
package Encode::MIME::Header::ISO_2022_JP;
use strict;
use warnings;
use parent qw(Encode::MIME::Header);
my $obj =
bless { decode_b => '1', decode_q => '1', encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } =>
__PACKAGE__;
Encode::define_encoding($obj, 'MIME-Header-ISO_2022_JP');
use constant HEAD => '=?ISO-2022-JP?B?';
use constant TAIL => '?=';
use Encode::CJKConstants qw(%RE);
our $VERSION = do { my @r = ( q$Revision: 1.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
# I owe the below codes totally to
# Jcode by Dan Kogai & http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64
sub encode {
my $self = shift;
my $str = shift;
return undef unless defined $str;
utf8::encode($str) if ( Encode::is_utf8($str) );
Encode::from_to( $str, 'utf8', 'euc-jp' );
my ($trailing_crlf) = ( $str =~ /(\n|\r|\x0d\x0a)$/o );
$str = _mime_unstructured_header( $str, $self->{bpl} );
not $trailing_crlf and $str =~ s/(\n|\r|\x0d\x0a)$//o;
return $str;
}
sub _mime_unstructured_header {
my ( $oldheader, $bpl ) = @_;
my $crlf = $oldheader =~ /\n$/;
my ( $header, @words, @wordstmp, $i ) = ('');
$oldheader =~ s/\s+$//;
@wordstmp = split /\s+/, $oldheader;
for ( $i = 0 ; $i < $#wordstmp ; $i++ ) {
if ( $wordstmp[$i] !~ /^[\x21-\x7E]+$/
and $wordstmp[ $i + 1 ] !~ /^[\x21-\x7E]+$/ )
{
$wordstmp[ $i + 1 ] = "$wordstmp[$i] $wordstmp[$i + 1]";
}
else {
push( @words, $wordstmp[$i] );
}
}
push( @words, $wordstmp[-1] );
for my $word (@words) {
if ( $word =~ /^[\x21-\x7E]+$/ ) {
$header =~ /(?:.*\n)*(.*)/;
if ( length($1) + length($word) > $bpl ) {
$header .= "\n $word";
}
else {
$header .= $word;
}
}
else {
$header = _add_encoded_word( $word, $header, $bpl );
}
$header =~ /(?:.*\n)*(.*)/;
if ( length($1) == $bpl ) {
$header .= "\n ";
}
else {
$header .= ' ';
}
}
$header =~ s/\n? $//mg;
$crlf ? "$header\n" : $header;
}
sub _add_encoded_word {
my ( $str, $line, $bpl ) = @_;
my $result = '';
while ( length($str) ) {
my $target = $str;
$str = '';
if (
length($line) + 22 +
( $target =~ /^(?:$RE{EUC_0212}|$RE{EUC_C})/o ) * 8 > $bpl )
{
$line =~ s/[ \t\n\r]*$/\n/;
$result .= $line;
$line = ' ';
}
while (1) {
my $iso_2022_jp = $target;
Encode::from_to( $iso_2022_jp, 'euc-jp', 'iso-2022-jp' );
my $encoded =
HEAD . MIME::Base64::encode_base64( $iso_2022_jp, '' ) . TAIL;
if ( length($encoded) + length($line) > $bpl ) {
$target =~
s/($RE{EUC_0212}|$RE{EUC_KANA}|$RE{EUC_C}|$RE{ASCII})$//o;
$str = $1 . $str;
}
else {
$line .= $encoded;
last;
}
}
}
$result . $line;
}
1;
__END__

View File

@@ -0,0 +1,103 @@
package Encode::MIME::Name;
use strict;
use warnings;
our $VERSION = do { my @r = ( q$Revision: 1.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
# NOTE: This table must be 1:1 mapping
our %MIME_NAME_OF = (
'AdobeStandardEncoding' => 'Adobe-Standard-Encoding',
'AdobeSymbol' => 'Adobe-Symbol-Encoding',
'ascii' => 'US-ASCII',
'big5-hkscs' => 'Big5-HKSCS',
'cp1026' => 'IBM1026',
'cp1047' => 'IBM1047',
'cp1250' => 'windows-1250',
'cp1251' => 'windows-1251',
'cp1252' => 'windows-1252',
'cp1253' => 'windows-1253',
'cp1254' => 'windows-1254',
'cp1255' => 'windows-1255',
'cp1256' => 'windows-1256',
'cp1257' => 'windows-1257',
'cp1258' => 'windows-1258',
'cp37' => 'IBM037',
'cp424' => 'IBM424',
'cp437' => 'IBM437',
'cp500' => 'IBM500',
'cp775' => 'IBM775',
'cp850' => 'IBM850',
'cp852' => 'IBM852',
'cp855' => 'IBM855',
'cp857' => 'IBM857',
'cp860' => 'IBM860',
'cp861' => 'IBM861',
'cp862' => 'IBM862',
'cp863' => 'IBM863',
'cp864' => 'IBM864',
'cp865' => 'IBM865',
'cp866' => 'IBM866',
'cp869' => 'IBM869',
'cp936' => 'GBK',
'euc-cn' => 'EUC-CN',
'euc-jp' => 'EUC-JP',
'euc-kr' => 'EUC-KR',
#'gb2312-raw' => 'GB2312', # no, you're wrong, I18N::Charset
'hp-roman8' => 'hp-roman8',
'hz' => 'HZ-GB-2312',
'iso-2022-jp' => 'ISO-2022-JP',
'iso-2022-jp-1' => 'ISO-2022-JP-1',
'iso-2022-kr' => 'ISO-2022-KR',
'iso-8859-1' => 'ISO-8859-1',
'iso-8859-10' => 'ISO-8859-10',
'iso-8859-13' => 'ISO-8859-13',
'iso-8859-14' => 'ISO-8859-14',
'iso-8859-15' => 'ISO-8859-15',
'iso-8859-16' => 'ISO-8859-16',
'iso-8859-2' => 'ISO-8859-2',
'iso-8859-3' => 'ISO-8859-3',
'iso-8859-4' => 'ISO-8859-4',
'iso-8859-5' => 'ISO-8859-5',
'iso-8859-6' => 'ISO-8859-6',
'iso-8859-7' => 'ISO-8859-7',
'iso-8859-8' => 'ISO-8859-8',
'iso-8859-9' => 'ISO-8859-9',
#'jis0201-raw' => 'JIS_X0201',
#'jis0208-raw' => 'JIS_C6226-1983',
#'jis0212-raw' => 'JIS_X0212-1990',
'koi8-r' => 'KOI8-R',
'koi8-u' => 'KOI8-U',
#'ksc5601-raw' => 'KS_C_5601-1987',
'shiftjis' => 'Shift_JIS',
'UTF-16' => 'UTF-16',
'UTF-16BE' => 'UTF-16BE',
'UTF-16LE' => 'UTF-16LE',
'UTF-32' => 'UTF-32',
'UTF-32BE' => 'UTF-32BE',
'UTF-32LE' => 'UTF-32LE',
'UTF-7' => 'UTF-7',
'utf-8-strict' => 'UTF-8',
'viscii' => 'VISCII',
);
# NOTE: %MIME_NAME_OF is still 1:1 mapping
our %ENCODE_NAME_OF = map { uc $MIME_NAME_OF{$_} => $_ } keys %MIME_NAME_OF;
# Add additional 1:N mapping
$MIME_NAME_OF{'utf8'} = 'UTF-8';
sub get_mime_name($) { $MIME_NAME_OF{$_[0]} };
sub get_encode_name($) { $ENCODE_NAME_OF{uc $_[0]} };
1;
__END__
=head1 NAME
Encode::MIME::NAME -- internally used by Encode
=head1 SEE ALSO
L<I18N::Charset>
=cut

View File

@@ -0,0 +1,44 @@
package Encode::Symbol;
use strict;
use warnings;
use Encode;
our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
1;
__END__
=head1 NAME
Encode::Symbol - Symbol Encodings
=head1 SYNOPSIS
use Encode qw/encode decode/;
$symbol = encode("symbol", $utf8); # loads Encode::Symbol implicitly
$utf8 = decode("", $symbol); # ditto
=head1 ABSTRACT
This module implements symbol and dingbats encodings. Encodings
supported are as follows.
Canonical Alias Description
--------------------------------------------------------------------
symbol
dingbats
AdobeZDingbat
AdobeSymbol
MacDingbats
=head1 DESCRIPTION
To find out how to use this module in detail, see L<Encode>.
=head1 SEE ALSO
L<Encode>
=cut

View File

@@ -0,0 +1,75 @@
package Encode::TW;
BEGIN {
if ( ord("A") == 193 ) {
die "Encode::TW not supported on EBCDIC\n";
}
}
use strict;
use warnings;
use Encode;
our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
1;
__END__
=head1 NAME
Encode::TW - Taiwan-based Chinese Encodings
=head1 SYNOPSIS
use Encode qw/encode decode/;
$big5 = encode("big5", $utf8); # loads Encode::TW implicitly
$utf8 = decode("big5", $big5); # ditto
=head1 DESCRIPTION
This module implements tradition Chinese charset encodings as used
in Taiwan and Hong Kong.
Encodings supported are as follows.
Canonical Alias Description
--------------------------------------------------------------------
big5-eten /\bbig-?5$/i Big5 encoding (with ETen extensions)
/\bbig5-?et(en)?$/i
/\btca-?big5$/i
big5-hkscs /\bbig5-?hk(scs)?$/i
/\bhk(scs)?-?big5$/i
Big5 + Cantonese characters in Hong Kong
MacChineseTrad Big5 + Apple Vendor Mappings
cp950 Code Page 950
= Big5 + Microsoft vendor mappings
--------------------------------------------------------------------
To find out how to use this module in detail, see L<Encode>.
=head1 NOTES
Due to size concerns, C<EUC-TW> (Extended Unix Character), C<CCCII>
(Chinese Character Code for Information Interchange), C<BIG5PLUS>
(CMEX's Big5+) and C<BIG5EXT> (CMEX's Big5e) are distributed separately
on CPAN, under the name L<Encode::HanExtra>. That module also contains
extra China-based encodings.
=head1 BUGS
Since the original C<big5> encoding (1984) is not supported anywhere
(glibc and DOS-based systems uses C<big5> to mean C<big5-eten>; Microsoft
uses C<big5> to mean C<cp950>), a conscious decision was made to alias
C<big5> to C<big5-eten>, which is the de facto superset of the original
big5.
The C<CNS11643> encoding files are not complete. For common C<CNS11643>
manipulation, please use C<EUC-TW> in L<Encode::HanExtra>, which contains
planes 1-7.
The ASCII region (0x00-0x7f) is preserved for all encodings, even
though this conflicts with mappings by the Unicode Consortium.
=head1 SEE ALSO
L<Encode>
=cut

View File

@@ -0,0 +1,272 @@
package Encode::Unicode;
use strict;
use warnings;
our $VERSION = do { my @r = ( q$Revision: 2.20 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
#
# Object Generator 8 transcoders all at once!
#
use Encode ();
our %BOM_Unknown = map { $_ => 1 } qw(UTF-16 UTF-32);
for my $name (
qw(UTF-16 UTF-16BE UTF-16LE
UTF-32 UTF-32BE UTF-32LE
UCS-2BE UCS-2LE)
)
{
my ( $size, $endian, $ucs2, $mask );
$name =~ /^(\w+)-(\d+)(\w*)$/o;
if ( $ucs2 = ( $1 eq 'UCS' ) ) {
$size = 2;
}
else {
$size = $2 / 8;
}
$endian = ( $3 eq 'BE' ) ? 'n' : ( $3 eq 'LE' ) ? 'v' : '';
$size == 4 and $endian = uc($endian);
my $obj = bless {
Name => $name,
size => $size,
endian => $endian,
ucs2 => $ucs2,
} => __PACKAGE__;
Encode::define_encoding($obj, $name);
}
use parent qw(Encode::Encoding);
sub renew {
my $self = shift;
$BOM_Unknown{ $self->name } or return $self;
my $clone = bless {%$self} => ref($self);
$clone->{renewed}++; # so the caller knows it is renewed.
return $clone;
}
1;
__END__
=head1 NAME
Encode::Unicode -- Various Unicode Transformation Formats
=cut
=head1 SYNOPSIS
use Encode qw/encode decode/;
$ucs2 = encode("UCS-2BE", $utf8);
$utf8 = decode("UCS-2BE", $ucs2);
=head1 ABSTRACT
This module implements all Character Encoding Schemes of Unicode that
are officially documented by Unicode Consortium (except, of course,
for UTF-8, which is a native format in perl).
=over 4
=item L<http://www.unicode.org/glossary/> says:
I<Character Encoding Scheme> A character encoding form plus byte
serialization. There are Seven character encoding schemes in Unicode:
UTF-8, UTF-16, UTF-16BE, UTF-16LE, UTF-32 (UCS-4), UTF-32BE (UCS-4BE) and
UTF-32LE (UCS-4LE), and UTF-7.
Since UTF-7 is a 7-bit (re)encoded version of UTF-16BE, It is not part of
Unicode's Character Encoding Scheme. It is separately implemented in
Encode::Unicode::UTF7. For details see L<Encode::Unicode::UTF7>.
=item Quick Reference
Decodes from ord(N) Encodes chr(N) to...
octet/char BOM S.P d800-dfff ord > 0xffff \x{1abcd} ==
---------------+-----------------+------------------------------
UCS-2BE 2 N N is bogus Not Available
UCS-2LE 2 N N bogus Not Available
UTF-16 2/4 Y Y is S.P S.P BE/LE
UTF-16BE 2/4 N Y S.P S.P 0xd82a,0xdfcd
UTF-16LE 2/4 N Y S.P S.P 0x2ad8,0xcddf
UTF-32 4 Y - is bogus As is BE/LE
UTF-32BE 4 N - bogus As is 0x0001abcd
UTF-32LE 4 N - bogus As is 0xcdab0100
UTF-8 1-4 - - bogus >= 4 octets \xf0\x9a\af\8d
---------------+-----------------+------------------------------
=back
=head1 Size, Endianness, and BOM
You can categorize these CES by 3 criteria: size of each character,
endianness, and Byte Order Mark.
=head2 by size
UCS-2 is a fixed-length encoding with each character taking 16 bits.
It B<does not> support I<surrogate pairs>. When a surrogate pair
is encountered during decode(), its place is filled with \x{FFFD}
if I<CHECK> is 0, or the routine croaks if I<CHECK> is 1. When a
character whose ord value is larger than 0xFFFF is encountered,
its place is filled with \x{FFFD} if I<CHECK> is 0, or the routine
croaks if I<CHECK> is 1.
UTF-16 is almost the same as UCS-2 but it supports I<surrogate pairs>.
When it encounters a high surrogate (0xD800-0xDBFF), it fetches the
following low surrogate (0xDC00-0xDFFF) and C<desurrogate>s them to
form a character. Bogus surrogates result in death. When \x{10000}
or above is encountered during encode(), it C<ensurrogate>s them and
pushes the surrogate pair to the output stream.
UTF-32 (UCS-4) is a fixed-length encoding with each character taking 32 bits.
Since it is 32-bit, there is no need for I<surrogate pairs>.
=head2 by endianness
The first (and now failed) goal of Unicode was to map all character
repertoires into a fixed-length integer so that programmers are happy.
Since each character is either a I<short> or I<long> in C, you have to
pay attention to the endianness of each platform when you pass data
to one another.
Anything marked as BE is Big Endian (or network byte order) and LE is
Little Endian (aka VAX byte order). For anything not marked either
BE or LE, a character called Byte Order Mark (BOM) indicating the
endianness is prepended to the string.
CAVEAT: Though BOM in utf8 (\xEF\xBB\xBF) is valid, it is meaningless
and as of this writing Encode suite just leave it as is (\x{FeFF}).
=over 4
=item BOM as integer when fetched in network byte order
16 32 bits/char
-------------------------
BE 0xFeFF 0x0000FeFF
LE 0xFFFe 0xFFFe0000
-------------------------
=back
This modules handles the BOM as follows.
=over 4
=item *
When BE or LE is explicitly stated as the name of encoding, BOM is
simply treated as a normal character (ZERO WIDTH NO-BREAK SPACE).
=item *
When BE or LE is omitted during decode(), it checks if BOM is at the
beginning of the string; if one is found, the endianness is set to
what the BOM says.
=item *
Default Byte Order
When no BOM is found, Encode 2.76 and blow croaked. Since Encode
2.77, it falls back to BE accordingly to RFC2781 and the Unicode
Standard version 8.0
=item *
When BE or LE is omitted during encode(), it returns a BE-encoded
string with BOM prepended. So when you want to encode a whole text
file, make sure you encode() the whole text at once, not line by line
or each line, not file, will have a BOM prepended.
=item *
C<UCS-2> is an exception. Unlike others, this is an alias of UCS-2BE.
UCS-2 is already registered by IANA and others that way.
=back
=head1 Surrogate Pairs
To say the least, surrogate pairs were the biggest mistake of the
Unicode Consortium. But according to the late Douglas Adams in I<The
Hitchhiker's Guide to the Galaxy> Trilogy, C<In the beginning the
Universe was created. This has made a lot of people very angry and
been widely regarded as a bad move>. Their mistake was not of this
magnitude so let's forgive them.
(I don't dare make any comparison with Unicode Consortium and the
Vogons here ;) Or, comparing Encode to Babel Fish is completely
appropriate -- if you can only stick this into your ear :)
Surrogate pairs were born when the Unicode Consortium finally
admitted that 16 bits were not big enough to hold all the world's
character repertoires. But they already made UCS-2 16-bit. What
do we do?
Back then, the range 0xD800-0xDFFF was not allocated. Let's split
that range in half and use the first half to represent the C<upper
half of a character> and the second half to represent the C<lower
half of a character>. That way, you can represent 1024 * 1024 =
1048576 more characters. Now we can store character ranges up to
\x{10ffff} even with 16-bit encodings. This pair of half-character is
now called a I<surrogate pair> and UTF-16 is the name of the encoding
that embraces them.
Here is a formula to ensurrogate a Unicode character \x{10000} and
above;
$hi = ($uni - 0x10000) / 0x400 + 0xD800;
$lo = ($uni - 0x10000) % 0x400 + 0xDC00;
And to desurrogate;
$uni = 0x10000 + ($hi - 0xD800) * 0x400 + ($lo - 0xDC00);
Note this move has made \x{D800}-\x{DFFF} into a forbidden zone but
perl does not prohibit the use of characters within this range. To perl,
every one of \x{0000_0000} up to \x{ffff_ffff} (*) is I<a character>.
(*) or \x{ffff_ffff_ffff_ffff} if your perl is compiled with 64-bit
integer support!
=head1 Error Checking
Unlike most encodings which accept various ways to handle errors,
Unicode encodings simply croaks.
% perl -MEncode -e'$_ = "\xfe\xff\xd8\xd9\xda\xdb\0\n"' \
-e'Encode::from_to($_, "utf16","shift_jis", 0); print'
UTF-16:Malformed LO surrogate d8d9 at /path/to/Encode.pm line 184.
% perl -MEncode -e'$a = "BOM missing"' \
-e' Encode::from_to($a, "utf16", "shift_jis", 0); print'
UTF-16:Unrecognised BOM 424f at /path/to/Encode.pm line 184.
Unlike other encodings where mappings are not one-to-one against
Unicode, UTFs are supposed to map 100% against one another. So Encode
is more strict on UTFs.
Consider that "division by zero" of Encode :)
=head1 SEE ALSO
L<Encode>, L<Encode::Unicode::UTF7>, L<https://www.unicode.org/glossary/>,
L<https://www.unicode.org/faq/utf_bom.html>,
RFC 2781 L<http://www.ietf.org/rfc/rfc2781.txt>,
The whole Unicode standard L<https://www.unicode.org/standard/standard.html>
Ch. 6 pp. 275 of C<Programming Perl (3rd Edition)>
by Tom Christiansen, brian d foy & Larry Wall;
O'Reilly & Associates; ISBN 978-0-596-00492-7
=cut

View File

@@ -0,0 +1,133 @@
#
# $Id: UTF7.pm,v 2.10 2017/06/10 17:23:50 dankogai Exp $
#
package Encode::Unicode::UTF7;
use strict;
use warnings;
use parent qw(Encode::Encoding);
__PACKAGE__->Define('UTF-7');
our $VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use MIME::Base64;
use Encode qw(find_encoding);
#
# Algorithms taken from Unicode::String by Gisle Aas
#
our $OPTIONAL_DIRECT_CHARS = 1;
my $specials = quotemeta "\'(),-./:?";
$OPTIONAL_DIRECT_CHARS
and $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}";
# \s will not work because it matches U+3000 DEOGRAPHIC SPACE
# We use qr/[\n\r\t\ ] instead
my $re_asis = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/;
my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/;
my $e_utf16 = find_encoding("UTF-16BE");
sub needs_lines { 1 }
sub encode($$;$) {
my ( $obj, $str, $chk ) = @_;
return undef unless defined $str;
my $len = length($str);
pos($str) = 0;
my $bytes = substr($str, 0, 0); # to propagate taintedness
while ( pos($str) < $len ) {
if ( $str =~ /\G($re_asis+)/ogc ) {
my $octets = $1;
utf8::downgrade($octets);
$bytes .= $octets;
}
elsif ( $str =~ /\G($re_encoded+)/ogsc ) {
if ( $1 eq "+" ) {
$bytes .= "+-";
}
else {
my $s = $1;
my $base64 = encode_base64( $e_utf16->encode($s), '' );
$base64 =~ s/=+$//;
$bytes .= "+$base64-";
}
}
else {
die "This should not happen! (pos=" . pos($str) . ")";
}
}
$_[1] = '' if $chk;
return $bytes;
}
sub decode($$;$) {
use re 'taint';
my ( $obj, $bytes, $chk ) = @_;
return undef unless defined $bytes;
my $len = length($bytes);
my $str = substr($bytes, 0, 0); # to propagate taintedness;
pos($bytes) = 0;
no warnings 'uninitialized';
while ( pos($bytes) < $len ) {
if ( $bytes =~ /\G([^+]+)/ogc ) {
$str .= $1;
}
elsif ( $bytes =~ /\G\+-/ogc ) {
$str .= "+";
}
elsif ( $bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc ) {
my $base64 = $1;
my $pad = length($base64) % 4;
$base64 .= "=" x ( 4 - $pad ) if $pad;
$str .= $e_utf16->decode( decode_base64($base64) );
}
elsif ( $bytes =~ /\G\+/ogc ) {
$^W and warn "Bad UTF7 data escape";
$str .= "+";
}
else {
die "This should not happen " . pos($bytes);
}
}
$_[1] = '' if $chk;
return $str;
}
1;
__END__
=head1 NAME
Encode::Unicode::UTF7 -- UTF-7 encoding
=head1 SYNOPSIS
use Encode qw/encode decode/;
$utf7 = encode("UTF-7", $utf8);
$utf8 = decode("UTF-7", $ucs2);
=head1 ABSTRACT
This module implements UTF-7 encoding documented in RFC 2152. UTF-7,
as its name suggests, is a 7-bit re-encoded version of UTF-16BE. It
is designed to be MTA-safe and expected to be a standard way to
exchange Unicoded mails via mails. But with the advent of UTF-8 and
8-bit compliant MTAs, UTF-7 is hardly ever used.
UTF-7 was not supported by Encode until version 1.95 because of that.
But Unicode::String, a module by Gisle Aas which adds Unicode supports
to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added
so Encode can supersede Unicode::String 100%.
=head1 In Practice
When you want to encode Unicode for mails and web pages, however, do
not use UTF-7 unless you are sure your recipients and readers can
handle it. Very few MUAs and WWW Browsers support these days (only
Mozilla seems to support one). For general cases, use UTF-8 for
message body and MIME-Header for header instead.
=head1 SEE ALSO
L<Encode>, L<Encode::Unicode>, L<Unicode::String>
RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt>
=cut

View File

@@ -0,0 +1,285 @@
# -*- buffer-read-only: t -*-
#
# This file is auto-generated by ext/Errno/Errno_pm.PL.
# ***ANY*** changes here will be lost.
#
package Errno;
use Exporter 'import';
use strict;
use Config;
"$Config{'archname'}-$Config{'osvers'}" eq
"x86_64-msys-thread-multi-3.5.7-882031da.x86_64" or
die "Errno architecture (x86_64-msys-thread-multi-3.5.7-882031da.x86_64) does not match executable architecture ($Config{'archname'}-$Config{'osvers'})";
our $VERSION = "1.37";
$VERSION = eval $VERSION;
my %err;
BEGIN {
%err = (
EPERM => 1,
ENOENT => 2,
ESRCH => 3,
EINTR => 4,
EIO => 5,
ENXIO => 6,
E2BIG => 7,
ENOEXEC => 8,
EBADF => 9,
ECHILD => 10,
EAGAIN => 11,
EWOULDBLOCK => 11,
ENOMEM => 12,
EACCES => 13,
EFAULT => 14,
ENOTBLK => 15,
EBUSY => 16,
EEXIST => 17,
EXDEV => 18,
ENODEV => 19,
ENOTDIR => 20,
EISDIR => 21,
EINVAL => 22,
ENFILE => 23,
EMFILE => 24,
ENOTTY => 25,
ETXTBSY => 26,
EFBIG => 27,
ENOSPC => 28,
ESPIPE => 29,
EROFS => 30,
EMLINK => 31,
EPIPE => 32,
EDOM => 33,
ERANGE => 34,
ENOMSG => 35,
EIDRM => 36,
ECHRNG => 37,
EL2NSYNC => 38,
EL3HLT => 39,
EL3RST => 40,
ELNRNG => 41,
EUNATCH => 42,
ENOCSI => 43,
EL2HLT => 44,
EDEADLK => 45,
ENOLCK => 46,
EBADE => 50,
EBADR => 51,
EXFULL => 52,
ENOANO => 53,
EBADRQC => 54,
EBADSLT => 55,
EDEADLOCK => 56,
EBFONT => 57,
ENOSTR => 60,
ENODATA => 61,
ETIME => 62,
ENOSR => 63,
ENONET => 64,
ENOPKG => 65,
EREMOTE => 66,
ENOLINK => 67,
EADV => 68,
ESRMNT => 69,
ECOMM => 70,
EPROTO => 71,
EMULTIHOP => 74,
ELBIN => 75,
EDOTDOT => 76,
EBADMSG => 77,
EFTYPE => 79,
ENOTUNIQ => 80,
EBADFD => 81,
EREMCHG => 82,
ELIBACC => 83,
ELIBBAD => 84,
ELIBSCN => 85,
ELIBMAX => 86,
ELIBEXEC => 87,
ENOSYS => 88,
ENMFILE => 89,
ENOTEMPTY => 90,
ENAMETOOLONG => 91,
ELOOP => 92,
EOPNOTSUPP => 95,
EPFNOSUPPORT => 96,
ECONNRESET => 104,
ENOBUFS => 105,
EAFNOSUPPORT => 106,
EPROTOTYPE => 107,
ENOTSOCK => 108,
ENOPROTOOPT => 109,
ESHUTDOWN => 110,
ECONNREFUSED => 111,
EADDRINUSE => 112,
ECONNABORTED => 113,
ENETUNREACH => 114,
ENETDOWN => 115,
ETIMEDOUT => 116,
EHOSTDOWN => 117,
EHOSTUNREACH => 118,
EINPROGRESS => 119,
EALREADY => 120,
EDESTADDRREQ => 121,
EMSGSIZE => 122,
EPROTONOSUPPORT => 123,
ESOCKTNOSUPPORT => 124,
EADDRNOTAVAIL => 125,
ENETRESET => 126,
EISCONN => 127,
ENOTCONN => 128,
ETOOMANYREFS => 129,
EPROCLIM => 130,
EUSERS => 131,
EDQUOT => 132,
ESTALE => 133,
ENOTSUP => 134,
ENOMEDIUM => 135,
ENOSHARE => 136,
ECASECLASH => 137,
EILSEQ => 138,
EOVERFLOW => 139,
ECANCELED => 140,
ENOTRECOVERABLE => 141,
EOWNERDEAD => 142,
ESTRPIPE => 143,
);
# Generate proxy constant subroutines for all the values.
# Well, almost all the values. Unfortunately we can't assume that at this
# point that our symbol table is empty, as code such as if the parser has
# seen code such as C<exists &Errno::EINVAL>, it will have created the
# typeglob.
# Doing this before defining @EXPORT_OK etc means that even if a platform is
# crazy enough to define EXPORT_OK as an error constant, everything will
# still work, because the parser will upgrade the PCS to a real typeglob.
# We rely on the subroutine definitions below to update the internal caches.
# Don't use %each, as we don't want a copy of the value.
foreach my $name (keys %err) {
if ($Errno::{$name}) {
# We expect this to be reached fairly rarely, so take an approach
# which uses the least compile time effort in the common case:
eval "sub $name() { $err{$name} }; 1" or die $@;
} else {
$Errno::{$name} = \$err{$name};
}
}
}
our @EXPORT_OK = keys %err;
our %EXPORT_TAGS = (
POSIX => [qw(
E2BIG EACCES EADDRINUSE EADDRNOTAVAIL EAFNOSUPPORT EAGAIN EALREADY
EBADF EBUSY ECHILD ECONNABORTED ECONNREFUSED ECONNRESET EDEADLK
EDESTADDRREQ EDOM EDQUOT EEXIST EFAULT EFBIG EHOSTDOWN EHOSTUNREACH
EINPROGRESS EINTR EINVAL EIO EISCONN EISDIR ELOOP EMFILE EMLINK
EMSGSIZE ENAMETOOLONG ENETDOWN ENETRESET ENETUNREACH ENFILE ENOBUFS
ENODEV ENOENT ENOEXEC ENOLCK ENOMEM ENOPROTOOPT ENOSPC ENOSYS ENOTBLK
ENOTCONN ENOTDIR ENOTEMPTY ENOTSOCK ENOTTY ENXIO EOPNOTSUPP EPERM
EPFNOSUPPORT EPIPE EPROCLIM EPROTONOSUPPORT EPROTOTYPE ERANGE EREMOTE
EROFS ESHUTDOWN ESOCKTNOSUPPORT ESPIPE ESRCH ESTALE ETIMEDOUT
ETOOMANYREFS ETXTBSY EUSERS EWOULDBLOCK EXDEV
)],
);
sub TIEHASH { bless \%err }
sub FETCH {
my (undef, $errname) = @_;
return "" unless exists $err{$errname};
my $errno = $err{$errname};
return $errno == $! ? $errno : 0;
}
sub STORE {
require Carp;
Carp::confess("ERRNO hash is read only!");
}
# This is the true return value
*CLEAR = *DELETE = \*STORE; # Typeglob aliasing uses less space
sub NEXTKEY {
each %err;
}
sub FIRSTKEY {
my $s = scalar keys %err; # initialize iterator
each %err;
}
sub EXISTS {
my (undef, $errname) = @_;
exists $err{$errname};
}
sub _tie_it {
tie %{$_[0]}, __PACKAGE__;
}
__END__
=head1 NAME
Errno - System errno constants
=head1 SYNOPSIS
use Errno qw(EINTR EIO :POSIX);
=head1 DESCRIPTION
C<Errno> defines and conditionally exports all the error constants
defined in your system F<errno.h> include file. It has a single export
tag, C<:POSIX>, which will export all POSIX defined error numbers.
On Windows, C<Errno> also defines and conditionally exports all the
Winsock error constants defined in your system F<WinError.h> include
file. These are included in a second export tag, C<:WINSOCK>.
C<Errno> also makes C<%!> magic such that each element of C<%!> has a
non-zero value only if C<$!> is set to that value. For example:
my $fh;
unless (open($fh, "<", "/fangorn/spouse")) {
if ($!{ENOENT}) {
warn "Get a wife!\n";
} else {
warn "This path is barred: $!";
}
}
If a specified constant C<EFOO> does not exist on the system, C<$!{EFOO}>
returns C<"">. You may use C<exists $!{EFOO}> to check whether the
constant is available on the system.
Perl automatically loads C<Errno> the first time you use C<%!>, so you don't
need an explicit C<use>.
=head1 CAVEATS
Importing a particular constant may not be very portable, because the
import will fail on platforms that do not have that constant. A more
portable way to set C<$!> to a valid value is to use:
if (exists &Errno::EFOO) {
$! = &Errno::EFOO;
}
=head1 AUTHOR
Graham Barr <gbarr@pobox.com>
=head1 COPYRIGHT
Copyright (c) 1997-8 Graham Barr. All rights reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut
# ex: set ro:

View File

@@ -0,0 +1,191 @@
package Fcntl;
=head1 NAME
Fcntl - load the C Fcntl.h defines
=head1 SYNOPSIS
use Fcntl;
use Fcntl qw(:DEFAULT :flock);
=head1 DESCRIPTION
This module is just a translation of the C F<fcntl.h> file.
Unlike the old mechanism of requiring a translated F<fcntl.ph>
file, this uses the B<h2xs> program (see the Perl source distribution)
and your native C compiler. This means that it has a
far more likely chance of getting the numbers right.
=head1 NOTE
Only C<#define> symbols get translated; you must still correctly
pack up your own arguments to pass as args for locking functions, etc.
=head1 EXPORTED SYMBOLS
By default your system's F_* and O_* constants (eg, F_DUPFD and
O_CREAT) and the FD_CLOEXEC constant are exported into your namespace.
You can request that the flock() constants (LOCK_SH, LOCK_EX, LOCK_NB
and LOCK_UN) be provided by using the tag C<:flock>. See L<Exporter>.
You can request that the old constants (FAPPEND, FASYNC, FCREAT,
FDEFER, FEXCL, FNDELAY, FNONBLOCK, FSYNC, FTRUNC) be provided for
compatibility reasons by using the tag C<:Fcompat>. For new
applications the newer versions of these constants are suggested
(O_APPEND, O_ASYNC, O_CREAT, O_DEFER, O_EXCL, O_NDELAY, O_NONBLOCK,
O_SYNC, O_TRUNC).
For ease of use also the SEEK_* constants (for seek() and sysseek(),
e.g. SEEK_END) and the S_I* constants (for chmod() and stat()) are
available for import. They can be imported either separately or using
the tags C<:seek> and C<:mode>.
Please refer to your native fcntl(2), open(2), fseek(3), lseek(2)
(equal to Perl's seek() and sysseek(), respectively), and chmod(2)
documentation to see what constants are implemented in your system.
See L<perlopentut> to learn about the uses of the O_* constants
with sysopen().
See L<perlfunc/seek> and L<perlfunc/sysseek> about the SEEK_* constants.
See L<perlfunc/stat> about the S_I* constants.
=cut
use strict;
use Exporter 'import';
require XSLoader;
our $VERSION = '1.15';
XSLoader::load();
# Named groups of exports
our %EXPORT_TAGS = (
'flock' => [qw(LOCK_SH LOCK_EX LOCK_NB LOCK_UN)],
'Fcompat' => [qw(FAPPEND FASYNC FCREAT FDEFER FDSYNC FEXCL FLARGEFILE
FNDELAY FNONBLOCK FRSYNC FSYNC FTRUNC)],
'seek' => [qw(SEEK_SET SEEK_CUR SEEK_END)],
'mode' => [qw(S_ISUID S_ISGID S_ISVTX S_ISTXT
_S_IFMT S_IFREG S_IFDIR S_IFLNK
S_IFSOCK S_IFBLK S_IFCHR S_IFIFO S_IFWHT S_ENFMT
S_IRUSR S_IWUSR S_IXUSR S_IRWXU
S_IRGRP S_IWGRP S_IXGRP S_IRWXG
S_IROTH S_IWOTH S_IXOTH S_IRWXO
S_IREAD S_IWRITE S_IEXEC
S_ISREG S_ISDIR S_ISLNK S_ISSOCK
S_ISBLK S_ISCHR S_ISFIFO
S_ISWHT S_ISENFMT
S_IFMT S_IMODE
)],
);
# Items to export into callers namespace by default
# (move infrequently used names to @EXPORT_OK below)
our @EXPORT =
qw(
FD_CLOEXEC
F_ALLOCSP
F_ALLOCSP64
F_COMPAT
F_DUP2FD
F_DUPFD
F_EXLCK
F_FREESP
F_FREESP64
F_FSYNC
F_FSYNC64
F_GETFD
F_GETFL
F_GETLK
F_GETLK64
F_GETOWN
F_NODNY
F_POSIX
F_RDACC
F_RDDNY
F_RDLCK
F_RWACC
F_RWDNY
F_SETFD
F_SETFL
F_SETLK
F_SETLK64
F_SETLKW
F_SETLKW64
F_SETOWN
F_SHARE
F_SHLCK
F_UNLCK
F_UNSHARE
F_WRACC
F_WRDNY
F_WRLCK
O_ACCMODE
O_ALIAS
O_APPEND
O_ASYNC
O_BINARY
O_CREAT
O_DEFER
O_DIRECT
O_DIRECTORY
O_DSYNC
O_EXCL
O_EXLOCK
O_LARGEFILE
O_NDELAY
O_NOCTTY
O_NOFOLLOW
O_NOINHERIT
O_NONBLOCK
O_RANDOM
O_RAW
O_RDONLY
O_RDWR
O_RSRC
O_RSYNC
O_SEQUENTIAL
O_SHLOCK
O_SYNC
O_TEMPORARY
O_TEXT
O_TRUNC
O_WRONLY
);
# Other items we are prepared to export if requested
our @EXPORT_OK = (qw(
DN_ACCESS
DN_ATTRIB
DN_CREATE
DN_DELETE
DN_MODIFY
DN_MULTISHOT
DN_RENAME
F_GETLEASE
F_GETPIPE_SZ
F_GETSIG
F_NOTIFY
F_SETLEASE
F_SETPIPE_SZ
F_SETSIG
LOCK_MAND
LOCK_READ
LOCK_RW
LOCK_WRITE
O_ALT_IO
O_EVTONLY
O_IGNORE_CTTY
O_NOATIME
O_NOLINK
O_NOSIGPIPE
O_NOTRANS
O_SYMLINK
O_TTY_INIT
), map {@{$_}} values %EXPORT_TAGS);
1;

View File

@@ -0,0 +1,304 @@
#!perl -w
#
# Documentation at the __END__
#
package File::DosGlob;
our $VERSION = '1.12';
use strict;
use warnings;
require XSLoader;
XSLoader::load();
sub doglob {
my $cond = shift;
my @retval = ();
my $fix_drive_relative_paths;
OUTER:
for my $pat (@_) {
my @matched = ();
my @globdirs = ();
my $head = '.';
my $sepchr = '/';
my $tail;
next OUTER unless defined $pat and $pat ne '';
# if arg is within quotes strip em and do no globbing
if ($pat =~ /^"(.*)"\z/s) {
$pat = $1;
if ($cond eq 'd') { push(@retval, $pat) if -d $pat }
else { push(@retval, $pat) if -e $pat }
next OUTER;
}
# wildcards with a drive prefix such as h:*.pm must be changed
# to h:./*.pm to expand correctly
if ($pat =~ m|^([A-Za-z]:)[^/\\]|s) {
substr($pat,0,2) = $1 . "./";
$fix_drive_relative_paths = 1;
}
if ($pat =~ m|^(.*)([\\/])([^\\/]*)\z|s) {
($head, $sepchr, $tail) = ($1,$2,$3);
push (@retval, $pat), next OUTER if $tail eq '';
if ($head =~ /[*?]/) {
@globdirs = doglob('d', $head);
push(@retval, doglob($cond, map {"$_$sepchr$tail"} @globdirs)),
next OUTER if @globdirs;
}
$head .= $sepchr if $head eq '' or $head =~ /^[A-Za-z]:\z/s;
$pat = $tail;
}
#
# If file component has no wildcards, we can avoid opendir
unless ($pat =~ /[*?]/) {
$head = '' if $head eq '.';
$head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr;
$head .= $pat;
if ($cond eq 'd') { push(@retval,$head) if -d $head }
else { push(@retval,$head) if -e $head }
next OUTER;
}
opendir(D, $head) or next OUTER;
my @leaves = readdir D;
closedir D;
# VMS-format filespecs, especially if they contain extended characters,
# are unlikely to match patterns correctly, so Unixify them.
if ($^O eq 'VMS') {
require VMS::Filespec;
@leaves = map {$_ =~ s/\.$//; VMS::Filespec::unixify($_)} @leaves;
}
$head = '' if $head eq '.';
$head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr;
# escape regex metachars but not glob chars
$pat =~ s:([].+^\-\${}()[|]):\\$1:g;
# and convert DOS-style wildcards to regex
$pat =~ s/\*/.*/g;
$pat =~ s/\?/.?/g;
my $matchsub = sub { $_[0] =~ m|^$pat\z|is };
INNER:
for my $e (@leaves) {
next INNER if $e eq '.' or $e eq '..';
next INNER if $cond eq 'd' and ! -d "$head$e";
push(@matched, "$head$e"), next INNER if &$matchsub($e);
#
# [DOS compatibility special case]
# Failed, add a trailing dot and try again, but only
# if name does not have a dot in it *and* pattern
# has a dot *and* name is shorter than 9 chars.
#
if (index($e,'.') == -1 and length($e) < 9
and index($pat,'\\.') != -1) {
push(@matched, "$head$e"), next INNER if &$matchsub("$e.");
}
}
push @retval, @matched if @matched;
}
if ($fix_drive_relative_paths) {
s|^([A-Za-z]:)\./|$1| for @retval;
}
return @retval;
}
#
# this can be used to override CORE::glob in a specific
# package by saying C<use File::DosGlob 'glob';> in that
# namespace.
#
# context (keyed by second cxix arg provided by core)
our %entries;
sub glob {
my($pat,$cxix) = ($_[0], _callsite());
my @pat;
# glob without args defaults to $_
$pat = $_ unless defined $pat;
# if we're just beginning, do it all first
if (!$entries{$cxix}) {
# extract patterns
if ($pat =~ /\s/) {
require Text::ParseWords;
@pat = Text::ParseWords::parse_line('\s+',0,$pat);
}
else {
push @pat, $pat;
}
# Mike Mestnik: made to do abc{1,2,3} == abc1 abc2 abc3.
# abc3 will be the original {3} (and drop the {}).
# abc1 abc2 will be put in @appendpat.
# This was just the easiest way, not nearly the best.
REHASH: {
my @appendpat = ();
for (@pat) {
# There must be a "," I.E. abc{efg} is not what we want.
while ( /^(.*)(?<!\\)\{(.*?)(?<!\\)\,.*?(?<!\\)\}(.*)$/ ) {
my ($start, $match, $end) = ($1, $2, $3);
#print "Got: \n\t$start\n\t$match\n\t$end\n";
my $tmp = "$start$match$end";
while ( $tmp =~ s/^(.*?)(?<!\\)\{(?:.*(?<!\\)\,)?(.*\Q$match\E.*?)(?:(?<!\\)\,.*)?(?<!\\)\}(.*)$/$1$2$3/ ) {
# these expansions will be performed by the original,
# when we call REHASH.
}
push @appendpat, ("$tmp");
s/^\Q$start\E(?<!\\)\{\Q$match\E(?<!\\)\,/$start\{/;
if ( /^\Q$start\E(?<!\\)\{(?!.*?(?<!\\)\,.*?\Q$end\E$)(.*)(?<!\\)\}\Q$end\E$/ ) {
$match = $1;
#print "GOT: \n\t$start\n\t$match\n\t$end\n\n";
$_ = "$start$match$end";
}
}
#print "Sould have "GOT" vs "Got"!\n";
#FIXME: There should be checking for this.
# How or what should be done about failure is beyond me.
}
if ( $#appendpat != -1
) {
#FIXME: Max loop, no way! :")
for ( @appendpat ) {
push @pat, $_;
}
goto REHASH;
}
}
for ( @pat ) {
s/\\([{},])/$1/g;
}
$entries{$cxix} = [doglob(1,@pat)];
}
# chuck it all out, quick or slow
if (wantarray) {
return @{delete $entries{$cxix}};
}
else {
if (scalar @{$entries{$cxix}}) {
return shift @{$entries{$cxix}};
}
else {
# return undef for EOL
delete $entries{$cxix};
return undef;
}
}
}
{
no strict 'refs';
sub import {
my $pkg = shift;
return unless @_;
my $sym = shift;
my $callpkg = ($sym =~ s/^GLOBAL_//s ? 'CORE::GLOBAL' : caller(0));
*{$callpkg.'::'.$sym} = \&{$pkg.'::'.$sym} if $sym eq 'glob';
}
}
1;
__END__
=head1 NAME
File::DosGlob - DOS like globbing and then some
=head1 SYNOPSIS
require 5.004;
# override CORE::glob in current package
use File::DosGlob 'glob';
# override CORE::glob in ALL packages (use with extreme caution!)
use File::DosGlob 'GLOBAL_glob';
@perlfiles = glob "..\\pe?l/*.p?";
print <..\\pe?l/*.p?>;
# from the command line (overrides only in main::)
> perl -MFile::DosGlob=glob -e "print <../pe*/*p?>"
=head1 DESCRIPTION
A module that implements DOS-like globbing with a few enhancements.
It is largely compatible with perlglob.exe (the M$ setargv.obj
version) in all but one respect--it understands wildcards in
directory components.
For example, C<< <..\\l*b\\file/*glob.p?> >> will work as expected (in
that it will find something like '..\lib\File/DosGlob.pm' alright).
Note that all path components are case-insensitive, and that
backslashes and forward slashes are both accepted, and preserved.
You may have to double the backslashes if you are putting them in
literally, due to double-quotish parsing of the pattern by perl.
Spaces in the argument delimit distinct patterns, so
C<glob('*.exe *.dll')> globs all filenames that end in C<.exe>
or C<.dll>. If you want to put in literal spaces in the glob
pattern, you can escape them with either double quotes, or backslashes.
e.g. C<glob('c:/"Program Files"/*/*.dll')>, or
C<glob('c:/Program\ Files/*/*.dll')>. The argument is tokenized using
C<Text::ParseWords::parse_line()>, so see L<Text::ParseWords> for details
of the quoting rules used.
Extending it to csh patterns is left as an exercise to the reader.
=head1 EXPORTS (by request only)
glob()
=head1 BUGS
Should probably be built into the core, and needs to stop
pandering to DOS habits. Needs a dose of optimization too.
=head1 AUTHOR
Gurusamy Sarathy <gsar@activestate.com>
=head1 HISTORY
=over 4
=item *
Support for globally overriding glob() (GSAR 3-JUN-98)
=item *
Scalar context, independent iterator context fixes (GSAR 15-SEP-97)
=item *
A few dir-vs-file optimizations result in glob importation being
10 times faster than using perlglob.exe, and using perlglob.bat is
only twice as slow as perlglob.exe (GSAR 28-MAY-97)
=item *
Several cleanups prompted by lack of compatible perlglob.exe
under Borland (GSAR 27-MAY-97)
=item *
Initial version (GSAR 20-FEB-97)
=back
=head1 SEE ALSO
perl
perlglob.bat
Text::ParseWords
=cut

View File

@@ -0,0 +1,405 @@
package File::Glob;
use strict;
our($DEFAULT_FLAGS);
require XSLoader;
# NOTE: The glob() export is only here for compatibility with 5.6.0.
# csh_glob() should not be used directly, unless you know what you're doing.
our %EXPORT_TAGS = (
'glob' => [ qw(
GLOB_ABEND
GLOB_ALPHASORT
GLOB_ALTDIRFUNC
GLOB_BRACE
GLOB_CSH
GLOB_ERR
GLOB_ERROR
GLOB_LIMIT
GLOB_MARK
GLOB_NOCASE
GLOB_NOCHECK
GLOB_NOMAGIC
GLOB_NOSORT
GLOB_NOSPACE
GLOB_QUOTE
GLOB_TILDE
bsd_glob
) ],
);
$EXPORT_TAGS{bsd_glob} = [@{$EXPORT_TAGS{glob}}];
our @EXPORT_OK = (@{$EXPORT_TAGS{'glob'}}, 'csh_glob');
our $VERSION = '1.40';
sub import {
require Exporter;
local $Exporter::ExportLevel = $Exporter::ExportLevel + 1;
Exporter::import(grep {
my $passthrough;
if ($_ eq ':case') {
$DEFAULT_FLAGS &= ~GLOB_NOCASE()
}
elsif ($_ eq ':nocase') {
$DEFAULT_FLAGS |= GLOB_NOCASE();
}
elsif ($_ eq ':globally') {
no warnings 'redefine';
*CORE::GLOBAL::glob = \&File::Glob::csh_glob;
}
elsif ($_ eq ':bsd_glob') {
no strict; *{caller."::glob"} = \&bsd_glob_override;
$passthrough = 1;
}
else {
$passthrough = 1;
}
$passthrough;
} @_);
}
XSLoader::load();
$DEFAULT_FLAGS = GLOB_CSH();
if ($^O =~ /^(?:MSWin32|VMS|os2|riscos)$/) {
$DEFAULT_FLAGS |= GLOB_NOCASE();
}
1;
__END__
=head1 NAME
File::Glob - Perl extension for BSD glob routine
=head1 SYNOPSIS
use File::Glob ':bsd_glob';
@list = bsd_glob('*.[ch]');
$homedir = bsd_glob('~gnat', GLOB_TILDE | GLOB_ERR);
if (GLOB_ERROR) {
# an error occurred reading $homedir
}
## override the core glob (CORE::glob() does this automatically
## by default anyway, since v5.6.0)
use File::Glob ':globally';
my @sources = <*.{c,h,y}>;
## override the core glob, forcing case sensitivity
use File::Glob qw(:globally :case);
my @sources = <*.{c,h,y}>;
## override the core glob forcing case insensitivity
use File::Glob qw(:globally :nocase);
my @sources = <*.{c,h,y}>;
## glob on all files in home directory
use File::Glob ':globally';
my @sources = <~gnat/*>;
=head1 DESCRIPTION
The glob angle-bracket operator C<< <> >> is a pathname generator that
implements the rules for file name pattern matching used by Unix-like shells
such as the Bourne shell or C shell.
File::Glob::bsd_glob() implements the FreeBSD glob(3) routine, which is
a superset of the POSIX glob() (described in IEEE Std 1003.2 "POSIX.2").
bsd_glob() takes a mandatory C<pattern> argument, and an optional
C<flags> argument, and returns a list of filenames matching the
pattern, with interpretation of the pattern modified by the C<flags>
variable.
Since v5.6.0, Perl's CORE::glob() is implemented in terms of bsd_glob().
Note that they don't share the same prototype--CORE::glob() only accepts
a single argument. Due to historical reasons, CORE::glob() will also
split its argument on whitespace, treating it as multiple patterns,
whereas bsd_glob() considers them as one pattern. But see C<:bsd_glob>
under L</EXPORTS>, below.
=head2 META CHARACTERS
\ Quote the next metacharacter
[] Character class
{} Multiple pattern
* Match any string of characters
? Match any single character
~ User name home directory
The metanotation C<a{b,c,d}e> is a shorthand for C<abe ace ade>. Left to
right order is preserved, with results of matches being sorted separately
at a low level to preserve this order. As a special case C<{>, C<}>, and
C<{}> are passed undisturbed.
=head2 EXPORTS
See also the L</POSIX FLAGS> below, which can be exported individually.
=head3 C<:bsd_glob>
The C<:bsd_glob> export tag exports bsd_glob() and the constants listed
below. It also overrides glob() in the calling package with one that
behaves like bsd_glob() with regard to spaces (the space is treated as part
of a file name), but supports iteration in scalar context; i.e., it
preserves the core function's feature of returning the next item each time
it is called.
=head3 C<:glob>
The C<:glob> tag, now discouraged, is the old version of C<:bsd_glob>. It
exports the same constants and functions, but its glob() override does not
support iteration; it returns the last file name in scalar context. That
means this will loop forever:
use File::Glob ':glob';
while (my $file = <* copy.txt>) {
...
}
=head3 C<bsd_glob>
This function, which is included in the two export tags listed above,
takes one or two arguments. The first is the glob pattern. The
second, if given, is a set of flags ORed together. The available
flags and the default set of flags are listed below under L</POSIX FLAGS>.
Remember that to use the named constants for flags you must import
them, for example with C<:bsd_glob> described above. If not imported,
and C<use strict> is not in effect, then the constants will be
treated as bareword strings, which won't do what you what.
=head3 C<:nocase> and C<:case>
These two export tags globally modify the default flags that bsd_glob()
and, except on VMS, Perl's built-in C<glob> operator use. C<GLOB_NOCASE>
is turned on or off, respectively.
=head3 C<csh_glob>
The csh_glob() function can also be exported, but you should not use it
directly unless you really know what you are doing. It splits the pattern
into words and feeds each one to bsd_glob(). Perl's own glob() function
uses this internally.
=head2 POSIX FLAGS
If no flags argument is give then C<GLOB_CSH> is set, and on VMS and
Windows systems, C<GLOB_NOCASE> too. Otherwise the flags to use are
determined solely by the flags argument. The POSIX defined flags are:
=over 4
=item C<GLOB_ERR>
Force bsd_glob() to return an error when it encounters a directory it
cannot open or read. Ordinarily bsd_glob() continues to find matches.
=item C<GLOB_LIMIT>
Make bsd_glob() return an error (GLOB_NOSPACE) when the pattern expands
to a size bigger than the system constant C<ARG_MAX> (usually found in
limits.h). If your system does not define this constant, bsd_glob() uses
C<sysconf(_SC_ARG_MAX)> or C<_POSIX_ARG_MAX> where available (in that
order). You can inspect these values using the standard C<POSIX>
extension.
=item C<GLOB_MARK>
Each pathname that is a directory that matches the pattern has a slash
appended.
=item C<GLOB_NOCASE>
By default, file names are assumed to be case sensitive; this flag
makes bsd_glob() treat case differences as not significant.
=item C<GLOB_NOCHECK>
If the pattern does not match any pathname, then bsd_glob() returns a list
consisting of only the pattern. If C<GLOB_QUOTE> is set, its effect
is present in the pattern returned.
=item C<GLOB_NOSORT>
By default, the pathnames are sorted in ascending ASCII order; this
flag prevents that sorting (speeding up bsd_glob()).
=back
The FreeBSD extensions to the POSIX standard are the following flags:
=over 4
=item C<GLOB_BRACE>
Pre-process the string to expand C<{pat,pat,...}> strings like csh(1).
The pattern '{}' is left unexpanded for historical reasons (and csh(1)
does the same thing to ease typing of find(1) patterns).
=item C<GLOB_NOMAGIC>
Same as C<GLOB_NOCHECK> but it only returns the pattern if it does not
contain any of the special characters "*", "?" or "[". C<NOMAGIC> is
provided to simplify implementing the historic csh(1) globbing
behaviour and should probably not be used anywhere else.
=item C<GLOB_QUOTE>
Use the backslash ('\') character for quoting: every occurrence of a
backslash followed by a character in the pattern is replaced by that
character, avoiding any special interpretation of the character.
(But see below for exceptions on DOSISH systems).
=item C<GLOB_TILDE>
Expand patterns that start with '~' to user name home directories.
=item C<GLOB_CSH>
For convenience, C<GLOB_CSH> is a synonym for
C<GLOB_BRACE | GLOB_NOMAGIC | GLOB_QUOTE | GLOB_TILDE | GLOB_ALPHASORT>.
=back
The POSIX provided C<GLOB_APPEND>, C<GLOB_DOOFFS>, and the FreeBSD
extensions C<GLOB_ALTDIRFUNC>, and C<GLOB_MAGCHAR> flags have not been
implemented in the Perl version because they involve more complex
interaction with the underlying C structures.
The following flag has been added in the Perl implementation for
csh compatibility:
=over 4
=item C<GLOB_ALPHASORT>
If C<GLOB_NOSORT> is not in effect, sort filenames is alphabetical
order (case does not matter) rather than in ASCII order.
=back
=head1 DIAGNOSTICS
bsd_glob() returns a list of matching paths, possibly zero length. If an
error occurred, &File::Glob::GLOB_ERROR will be non-zero and C<$!> will be
set. &File::Glob::GLOB_ERROR is guaranteed to be zero if no error occurred,
or one of the following values otherwise:
=over 4
=item C<GLOB_NOSPACE>
An attempt to allocate memory failed.
=item C<GLOB_ABEND>
The glob was stopped because an error was encountered.
=back
In the case where bsd_glob() has found some matching paths, but is
interrupted by an error, it will return a list of filenames B<and>
set &File::Glob::ERROR.
Note that bsd_glob() deviates from POSIX and FreeBSD glob(3) behaviour
by not considering C<ENOENT> and C<ENOTDIR> as errors - bsd_glob() will
continue processing despite those errors, unless the C<GLOB_ERR> flag is
set.
Be aware that all filenames returned from File::Glob are tainted.
=head1 NOTES
=over 4
=item *
If you want to use multiple patterns, e.g. C<bsd_glob("a* b*")>, you should
probably throw them in a set as in C<bsd_glob("{a*,b*}")>. This is because
the argument to bsd_glob() isn't subjected to parsing by the C shell.
Remember that you can use a backslash to escape things.
=item *
On DOSISH systems, backslash is a valid directory separator character.
In this case, use of backslash as a quoting character (via GLOB_QUOTE)
interferes with the use of backslash as a directory separator. The
best (simplest, most portable) solution is to use forward slashes for
directory separators, and backslashes for quoting. However, this does
not match "normal practice" on these systems. As a concession to user
expectation, therefore, backslashes (under GLOB_QUOTE) only quote the
glob metacharacters '[', ']', '{', '}', '-', '~', and backslash itself.
All other backslashes are passed through unchanged.
=item *
Win32 users should use the real slash. If you really want to use
backslashes, consider using Sarathy's File::DosGlob, which comes with
the standard Perl distribution.
=back
=head1 SEE ALSO
L<perlfunc/glob>, glob(3)
=head1 AUTHOR
The Perl interface was written by Nathan Torkington E<lt>gnat@frii.comE<gt>,
and is released under the artistic license. Further modifications were
made by Greg Bacon E<lt>gbacon@cs.uah.eduE<gt>, Gurusamy Sarathy
E<lt>gsar@activestate.comE<gt>, and Thomas Wegner
E<lt>wegner_thomas@yahoo.comE<gt>. The C glob code has the
following copyright:
Copyright (c) 1989, 1993 The Regents of the University of California.
All rights reserved.
This code is derived from software contributed to Berkeley by
Guido van Rossum.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
=over 4
=item 1.
Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
=item 2.
Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
=item 3.
Neither the name of the University nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
=back
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
=cut

View File

@@ -0,0 +1,342 @@
package File::Spec;
use strict;
our $VERSION = '3.88';
$VERSION =~ tr/_//d;
my %module = (
MSWin32 => 'Win32',
os2 => 'OS2',
VMS => 'VMS',
NetWare => 'Win32', # Yes, File::Spec::Win32 works on NetWare.
symbian => 'Win32', # Yes, File::Spec::Win32 works on symbian.
dos => 'OS2', # Yes, File::Spec::OS2 works on DJGPP.
cygwin => 'Cygwin',
msys => 'Cygwin',
amigaos => 'AmigaOS');
my $module = $module{$^O} || 'Unix';
require "File/Spec/$module.pm";
our @ISA = ("File::Spec::$module");
1;
__END__
=head1 NAME
File::Spec - portably perform operations on file names
=head1 SYNOPSIS
use File::Spec;
my $x = File::Spec->catfile('a', 'b', 'c');
which returns 'a/b/c' under Unix. Or:
use File::Spec::Functions;
my $x = catfile('a', 'b', 'c');
=head1 DESCRIPTION
This module is designed to support operations commonly performed on file
specifications (usually called "file names", but not to be confused with the
contents of a file, or Perl's file handles), such as concatenating several
directory and file names into a single path, or determining whether a path
is rooted. It is based on code directly taken from MakeMaker 5.17, code
written by Andreas KE<ouml>nig, Andy Dougherty, Charles Bailey, Ilya
Zakharevich, Paul Schinder, and others.
Since these functions are different for most operating systems, each set of
OS specific routines is available in a separate module, including:
File::Spec::Unix
File::Spec::Mac
File::Spec::OS2
File::Spec::Win32
File::Spec::VMS
The module appropriate for the current OS is automatically loaded by
File::Spec. Since some modules (like VMS) make use of facilities available
only under that OS, it may not be possible to load all modules under all
operating systems.
Since File::Spec is object oriented, subroutines should not be called directly,
as in:
File::Spec::catfile('a','b');
but rather as class methods:
File::Spec->catfile('a','b');
For simple uses, L<File::Spec::Functions> provides convenient functional
forms of these methods.
=head1 METHODS
=over 2
=item canonpath
X<canonpath>
No physical check on the filesystem, but a logical cleanup of a
path.
$cpath = File::Spec->canonpath( $path ) ;
Note that this does *not* collapse F<x/../y> sections into F<y>. This
is by design. If F</foo> on your system is a symlink to F</bar/baz>,
then F</foo/../quux> is actually F</bar/quux>, not F</quux> as a naive
F<../>-removal would give you. If you want to do this kind of
processing, you probably want C<Cwd>'s C<realpath()> function to
actually traverse the filesystem cleaning up paths like this.
=item catdir
X<catdir>
Concatenate two or more directory names to form a complete path ending
with a directory. But remove the trailing slash from the resulting
string, because it doesn't look good, isn't necessary and confuses
OS/2. Of course, if this is the root directory, don't cut off the
trailing slash :-)
$path = File::Spec->catdir( @directories );
=item catfile
X<catfile>
Concatenate one or more directory names and a filename to form a
complete path ending with a filename
$path = File::Spec->catfile( @directories, $filename );
=item curdir
X<curdir>
Returns a string representation of the current directory.
$curdir = File::Spec->curdir();
=item devnull
X<devnull>
Returns a string representation of the null device.
$devnull = File::Spec->devnull();
=item rootdir
X<rootdir>
Returns a string representation of the root directory.
$rootdir = File::Spec->rootdir();
=item tmpdir
X<tmpdir>
Returns a string representation of the first writable directory from a
list of possible temporary directories. Returns the current directory
if no writable temporary directories are found. The list of directories
checked depends on the platform; e.g. File::Spec::Unix checks C<$ENV{TMPDIR}>
(unless taint is on) and F</tmp>.
$tmpdir = File::Spec->tmpdir();
=item updir
X<updir>
Returns a string representation of the parent directory.
$updir = File::Spec->updir();
=item no_upwards
Given a list of files in a directory (such as from C<readdir()>),
strip out C<'.'> and C<'..'>.
B<SECURITY NOTE:> This does NOT filter paths containing C<'..'>, like
C<'../../../../etc/passwd'>, only literal matches to C<'.'> and C<'..'>.
@paths = File::Spec->no_upwards( readdir $dirhandle );
=item case_tolerant
Returns a true or false value indicating, respectively, that alphabetic
case is not or is significant when comparing file specifications.
Cygwin and Win32 accept an optional drive argument.
$is_case_tolerant = File::Spec->case_tolerant();
=item file_name_is_absolute
Takes as its argument a path, and returns true if it is an absolute path.
$is_absolute = File::Spec->file_name_is_absolute( $path );
This does not consult the local filesystem on Unix, Win32, OS/2, or
Mac OS (Classic). It does consult the working environment for VMS
(see L<File::Spec::VMS/file_name_is_absolute>).
=item path
X<path>
Takes no argument. Returns the environment variable C<PATH> (or the local
platform's equivalent) as a list.
@PATH = File::Spec->path();
=item join
X<join, path>
join is the same as catfile.
=item splitpath
X<splitpath> X<split, path>
Splits a path in to volume, directory, and filename portions. On systems
with no concept of volume, returns '' for volume.
($volume,$directories,$file) =
File::Spec->splitpath( $path );
($volume,$directories,$file) =
File::Spec->splitpath( $path, $no_file );
For systems with no syntax differentiating filenames from directories,
assumes that the last file is a path unless C<$no_file> is true or a
trailing separator or F</.> or F</..> is present. On Unix, this means that C<$no_file>
true makes this return ( '', $path, '' ).
The directory portion may or may not be returned with a trailing '/'.
The results can be passed to L</catpath()> to get back a path equivalent to
(usually identical to) the original path.
=item splitdir
X<splitdir> X<split, dir>
The opposite of L</catdir>.
@dirs = File::Spec->splitdir( $directories );
C<$directories> must be only the directory portion of the path on systems
that have the concept of a volume or that have path syntax that differentiates
files from directories.
Unlike just splitting the directories on the separator, empty
directory names (C<''>) can be returned, because these are significant
on some OSes.
=item catpath()
Takes volume, directory and file portions and returns an entire path. Under
Unix, C<$volume> is ignored, and directory and file are concatenated. A '/' is
inserted if need be. On other OSes, C<$volume> is significant.
$full_path = File::Spec->catpath( $volume, $directory, $file );
=item abs2rel
X<abs2rel> X<absolute, path> X<relative, path>
Takes a destination path and an optional base path returns a relative path
from the base path to the destination path:
$rel_path = File::Spec->abs2rel( $path ) ;
$rel_path = File::Spec->abs2rel( $path, $base ) ;
If C<$base> is not present or '', then L<Cwd::cwd()|Cwd> is used. If C<$base> is
relative, then it is converted to absolute form using
L</rel2abs()>. This means that it is taken to be relative to
L<Cwd::cwd()|Cwd>.
On systems with the concept of volume, if C<$path> and C<$base> appear to be
on two different volumes, we will not attempt to resolve the two
paths, and we will instead simply return C<$path>. Note that previous
versions of this module ignored the volume of C<$base>, which resulted in
garbage results part of the time.
On systems that have a grammar that indicates filenames, this ignores the
C<$base> filename as well. Otherwise all path components are assumed to be
directories.
If C<$path> is relative, it is converted to absolute form using L</rel2abs()>.
This means that it is taken to be relative to L<Cwd::cwd()|Cwd>.
No checks against the filesystem are made. On VMS, there is
interaction with the working environment, as logicals and
macros are expanded.
Based on code written by Shigio Yamaguchi.
=item rel2abs()
X<rel2abs> X<absolute, path> X<relative, path>
Converts a relative path to an absolute path.
$abs_path = File::Spec->rel2abs( $path ) ;
$abs_path = File::Spec->rel2abs( $path, $base ) ;
If C<$base> is not present or '', then L<Cwd::cwd()|Cwd> is used. If C<$base> is relative,
then it is converted to absolute form using L</rel2abs()>. This means that it
is taken to be relative to L<Cwd::cwd()|Cwd>.
On systems with the concept of volume, if C<$path> and C<$base> appear to be
on two different volumes, we will not attempt to resolve the two
paths, and we will instead simply return C<$path>. Note that previous
versions of this module ignored the volume of C<$base>, which resulted in
garbage results part of the time.
On systems that have a grammar that indicates filenames, this ignores the
C<$base> filename as well. Otherwise all path components are assumed to be
directories.
If C<$path> is absolute, it is cleaned up and returned using L</canonpath>.
No checks against the filesystem are made. On VMS, there is
interaction with the working environment, as logicals and
macros are expanded.
Based on code written by Shigio Yamaguchi.
=back
For further information, please see L<File::Spec::Unix>,
L<File::Spec::Mac>, L<File::Spec::OS2>, L<File::Spec::Win32>, or
L<File::Spec::VMS>.
=head1 SEE ALSO
L<File::Spec::Unix>, L<File::Spec::Mac>, L<File::Spec::OS2>,
L<File::Spec::Win32>, L<File::Spec::VMS>, L<File::Spec::Functions>,
L<ExtUtils::MakeMaker>
=head1 AUTHOR
Maintained by perl5-porters <F<perl5-porters@perl.org>>.
The vast majority of the code was written by
Kenneth Albanowski C<< <kjahds@kjahds.com> >>,
Andy Dougherty C<< <doughera@lafayette.edu> >>,
Andreas KE<ouml>nig C<< <A.Koenig@franz.ww.TU-Berlin.DE> >>,
Tim Bunce C<< <Tim.Bunce@ig.co.uk> >>.
VMS support by Charles Bailey C<< <bailey@newman.upenn.edu> >>.
OS/2 support by Ilya Zakharevich C<< <ilya@math.ohio-state.edu> >>.
Mac support by Paul Schinder C<< <schinder@pobox.com> >>, and
Thomas Wegner C<< <wegner_thomas@yahoo.com> >>.
abs2rel() and rel2abs() written by Shigio Yamaguchi C<< <shigio@tamacom.com> >>,
modified by Barrie Slaymaker C<< <barries@slaysys.com> >>.
splitpath(), splitdir(), catpath() and catdir() by Barrie Slaymaker.
=head1 COPYRIGHT
Copyright (c) 2004-2013 by the Perl 5 Porters. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut

View File

@@ -0,0 +1,61 @@
package File::Spec::AmigaOS;
use strict;
require File::Spec::Unix;
our $VERSION = '3.88';
$VERSION =~ tr/_//d;
our @ISA = qw(File::Spec::Unix);
=head1 NAME
File::Spec::AmigaOS - File::Spec for AmigaOS
=head1 SYNOPSIS
require File::Spec::AmigaOS; # Done automatically by File::Spec
# if needed
=head1 DESCRIPTION
Methods for manipulating file specifications.
=head1 METHODS
=over 2
=item tmpdir
Returns $ENV{TMPDIR} or if that is unset, "/t".
=cut
my $tmpdir;
sub tmpdir {
return $tmpdir if defined $tmpdir;
$tmpdir = $_[0]->_tmpdir( $ENV{TMPDIR}, "/t" );
}
=item file_name_is_absolute
Returns true if there's a colon in the file name,
or if it begins with a slash.
=cut
sub file_name_is_absolute {
my ($self, $file) = @_;
# Not 100% robust as a "/" must not preceded a ":"
# but this cannot happen in a well formed path.
return $file =~ m{^/|:}s;
}
=back
All the other methods are from L<File::Spec::Unix>.
=cut
1;

View File

@@ -0,0 +1,163 @@
package File::Spec::Cygwin;
use strict;
require File::Spec::Unix;
our $VERSION = '3.88';
$VERSION =~ tr/_//d;
our @ISA = qw(File::Spec::Unix);
=head1 NAME
File::Spec::Cygwin - methods for Cygwin file specs
=head1 SYNOPSIS
require File::Spec::Cygwin; # Done internally by File::Spec if needed
=head1 DESCRIPTION
See L<File::Spec> and L<File::Spec::Unix>. This package overrides the
implementation of these methods, not the semantics.
This module is still in beta. Cygwin-knowledgeable folks are invited
to offer patches and suggestions.
=cut
=pod
=over 4
=item canonpath
Any C<\> (backslashes) are converted to C</> (forward slashes),
and then File::Spec::Unix canonpath() is called on the result.
=cut
sub canonpath {
my($self,$path) = @_;
return unless defined $path;
$path =~ s|\\|/|g;
# Handle network path names beginning with double slash
my $node = '';
if ( $path =~ s@^(//[^/]+)(?:/|\z)@/@s ) {
$node = $1;
}
return $node . $self->SUPER::canonpath($path);
}
sub catdir {
my $self = shift;
return unless @_;
# Don't create something that looks like a //network/path
if ($_[0] and ($_[0] eq '/' or $_[0] eq '\\')) {
shift;
return $self->SUPER::catdir('', @_);
}
$self->SUPER::catdir(@_);
}
=pod
=item file_name_is_absolute
True is returned if the file name begins with C<drive_letter:>,
and if not, File::Spec::Unix file_name_is_absolute() is called.
=cut
sub file_name_is_absolute {
my ($self,$file) = @_;
return 1 if $file =~ m{^([a-z]:)?[\\/]}is; # C:/test
return $self->SUPER::file_name_is_absolute($file);
}
=item tmpdir (override)
Returns a string representation of the first existing directory
from the following list:
$ENV{TMPDIR}
/tmp
$ENV{'TMP'}
$ENV{'TEMP'}
C:/temp
If running under taint mode, and if the environment
variables are tainted, they are not used.
=cut
sub tmpdir {
my $cached = $_[0]->_cached_tmpdir(qw 'TMPDIR TMP TEMP');
return $cached if defined $cached;
$_[0]->_cache_tmpdir(
$_[0]->_tmpdir(
$ENV{TMPDIR}, "/tmp", $ENV{'TMP'}, $ENV{'TEMP'}, 'C:/temp'
),
qw 'TMPDIR TMP TEMP'
);
}
=item case_tolerant
Override Unix. Cygwin case-tolerance depends on managed mount settings and
as with MsWin32 on GetVolumeInformation() $ouFsFlags == FS_CASE_SENSITIVE,
indicating the case significance when comparing file specifications.
Default: 1
=cut
sub case_tolerant {
return 1 unless ($^O eq 'cygwin' || $^O eq 'msys')
and defined &Cygwin::mount_flags;
my $drive = shift;
if (! $drive) {
my @flags = split(/,/, Cygwin::mount_flags('/msys'));
my $prefix = pop(@flags);
if (! $prefix || $prefix eq 'cygdrive') {
$drive = '/cygdrive/c';
} elsif ($prefix eq '/') {
$drive = '/c';
} else {
$drive = "$prefix/c";
}
}
my $mntopts = Cygwin::mount_flags($drive);
if ($mntopts and ($mntopts =~ /,managed/)) {
return 0;
}
eval {
local @INC = @INC;
pop @INC if $INC[-1] eq '.';
require Win32API::File;
} or return 1;
my $osFsType = "\0"x256;
my $osVolName = "\0"x256;
my $ouFsFlags = 0;
Win32API::File::GetVolumeInformation($drive, $osVolName, 256, [], [], $ouFsFlags, $osFsType, 256 );
if ($ouFsFlags & Win32API::File::FS_CASE_SENSITIVE()) { return 0; }
else { return 1; }
}
=back
=head1 COPYRIGHT
Copyright (c) 2004,2007 by the Perl 5 Porters. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut
1;

View File

@@ -0,0 +1,78 @@
package File::Spec::Epoc;
use strict;
our $VERSION = '3.88';
$VERSION =~ tr/_//d;
require File::Spec::Unix;
our @ISA = qw(File::Spec::Unix);
=head1 NAME
File::Spec::Epoc - methods for Epoc file specs
=head1 SYNOPSIS
require File::Spec::Epoc; # Done internally by File::Spec if needed
=head1 DESCRIPTION
See File::Spec::Unix for a documentation of the methods provided
there. This package overrides the implementation of these methods, not
the semantics.
This package is still a work in progress. ;-)
=cut
sub case_tolerant {
return 1;
}
=pod
=over 4
=item canonpath()
No physical check on the filesystem, but a logical cleanup of a
path. On UNIX eliminated successive slashes and successive "/.".
=back
=cut
sub canonpath {
my ($self,$path) = @_;
return unless defined $path;
$path =~ s|/+|/|g; # xx////xx -> xx/xx
$path =~ s|(/\.)+/|/|g; # xx/././xx -> xx/xx
$path =~ s|^(\./)+||s unless $path eq "./"; # ./xx -> xx
$path =~ s|^/(\.\./)+|/|s; # /../../xx -> xx
$path =~ s|/\Z(?!\n)|| unless $path eq "/"; # xx/ -> xx
return $path;
}
=pod
=head1 AUTHOR
o.flebbe@gmx.de
=head1 COPYRIGHT
Copyright (c) 2004 by the Perl 5 Porters. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 SEE ALSO
See L<File::Spec> and L<File::Spec::Unix>. This package overrides the
implementation of these methods, not the semantics.
=cut
1;

View File

@@ -0,0 +1,128 @@
package File::Spec::Functions;
use File::Spec;
use strict;
our $VERSION = '3.88';
$VERSION =~ tr/_//d;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(
canonpath
catdir
catfile
curdir
rootdir
updir
no_upwards
file_name_is_absolute
path
);
our @EXPORT_OK = qw(
devnull
tmpdir
splitpath
splitdir
catpath
abs2rel
rel2abs
case_tolerant
);
our %EXPORT_TAGS = ( ALL => [ @EXPORT_OK, @EXPORT ] );
require File::Spec::Unix;
my %udeps = (
canonpath => [],
catdir => [qw(canonpath)],
catfile => [qw(canonpath catdir)],
case_tolerant => [],
curdir => [],
devnull => [],
rootdir => [],
updir => [],
);
foreach my $meth (@EXPORT, @EXPORT_OK) {
my $sub = File::Spec->can($meth);
no strict 'refs';
if (exists($udeps{$meth}) && $sub == File::Spec::Unix->can($meth) &&
!(grep {
File::Spec->can($_) != File::Spec::Unix->can($_)
} @{$udeps{$meth}}) &&
defined(&{"File::Spec::Unix::_fn_$meth"})) {
*{$meth} = \&{"File::Spec::Unix::_fn_$meth"};
} else {
*{$meth} = sub {&$sub('File::Spec', @_)};
}
}
1;
__END__
=head1 NAME
File::Spec::Functions - portably perform operations on file names
=head1 SYNOPSIS
use File::Spec::Functions;
$x = catfile('a','b');
=head1 DESCRIPTION
This module exports convenience functions for all of the class methods
provided by File::Spec.
For a reference of available functions, please consult L<File::Spec::Unix>,
which contains the entire set, and which is inherited by the modules for
other platforms. For further information, please see L<File::Spec::Mac>,
L<File::Spec::OS2>, L<File::Spec::Win32>, or L<File::Spec::VMS>.
=head2 Exports
The following functions are exported by default.
canonpath
catdir
catfile
curdir
rootdir
updir
no_upwards
file_name_is_absolute
path
The following functions are exported only by request.
devnull
tmpdir
splitpath
splitdir
catpath
abs2rel
rel2abs
case_tolerant
All the functions may be imported using the C<:ALL> tag.
=head1 COPYRIGHT
Copyright (c) 2004 by the Perl 5 Porters. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 SEE ALSO
File::Spec, File::Spec::Unix, File::Spec::Mac, File::Spec::OS2,
File::Spec::Win32, File::Spec::VMS, ExtUtils::MakeMaker
=cut

View File

@@ -0,0 +1,765 @@
package File::Spec::Mac;
use strict;
use Cwd ();
require File::Spec::Unix;
our $VERSION = '3.88';
$VERSION =~ tr/_//d;
our @ISA = qw(File::Spec::Unix);
sub case_tolerant { 1 }
=head1 NAME
File::Spec::Mac - File::Spec for Mac OS (Classic)
=head1 SYNOPSIS
require File::Spec::Mac; # Done internally by File::Spec if needed
=head1 DESCRIPTION
Methods for manipulating file specifications.
=head1 METHODS
=over 2
=item canonpath
On Mac OS, there's nothing to be done. Returns what it's given.
=cut
sub canonpath {
my ($self,$path) = @_;
return $path;
}
=item catdir()
Concatenate two or more directory names to form a path separated by colons
(":") ending with a directory. Resulting paths are B<relative> by default,
but can be forced to be absolute (but avoid this, see below). Automatically
puts a trailing ":" on the end of the complete path, because that's what's
done in MacPerl's environment and helps to distinguish a file path from a
directory path.
B<IMPORTANT NOTE:> Beginning with version 1.3 of this module, the resulting
path is relative by default and I<not> absolute. This decision was made due
to portability reasons. Since C<File::Spec-E<gt>catdir()> returns relative paths
on all other operating systems, it will now also follow this convention on Mac
OS. Note that this may break some existing scripts.
The intended purpose of this routine is to concatenate I<directory names>.
But because of the nature of Macintosh paths, some additional possibilities
are allowed to make using this routine give reasonable results for some
common situations. In other words, you are also allowed to concatenate
I<paths> instead of directory names (strictly speaking, a string like ":a"
is a path, but not a name, since it contains a punctuation character ":").
So, beside calls like
catdir("a") = ":a:"
catdir("a","b") = ":a:b:"
catdir() = "" (special case)
calls like the following
catdir(":a:") = ":a:"
catdir(":a","b") = ":a:b:"
catdir(":a:","b") = ":a:b:"
catdir(":a:",":b:") = ":a:b:"
catdir(":") = ":"
are allowed.
Here are the rules that are used in C<catdir()>; note that we try to be as
compatible as possible to Unix:
=over 2
=item 1.
The resulting path is relative by default, i.e. the resulting path will have a
leading colon.
=item 2.
A trailing colon is added automatically to the resulting path, to denote a
directory.
=item 3.
Generally, each argument has one leading ":" and one trailing ":"
removed (if any). They are then joined together by a ":". Special
treatment applies for arguments denoting updir paths like "::lib:",
see (4), or arguments consisting solely of colons ("colon paths"),
see (5).
=item 4.
When an updir path like ":::lib::" is passed as argument, the number
of directories to climb up is handled correctly, not removing leading
or trailing colons when necessary. E.g.
catdir(":::a","::b","c") = ":::a::b:c:"
catdir(":::a::","::b","c") = ":::a:::b:c:"
=item 5.
Adding a colon ":" or empty string "" to a path at I<any> position
doesn't alter the path, i.e. these arguments are ignored. (When a ""
is passed as the first argument, it has a special meaning, see
(6)). This way, a colon ":" is handled like a "." (curdir) on Unix,
while an empty string "" is generally ignored (see
L<File::Spec::Unix/canonpath()> ). Likewise, a "::" is handled like a ".."
(updir), and a ":::" is handled like a "../.." etc. E.g.
catdir("a",":",":","b") = ":a:b:"
catdir("a",":","::",":b") = ":a::b:"
=item 6.
If the first argument is an empty string "" or is a volume name, i.e. matches
the pattern /^[^:]+:/, the resulting path is B<absolute>.
=item 7.
Passing an empty string "" as the first argument to C<catdir()> is
like passingC<File::Spec-E<gt>rootdir()> as the first argument, i.e.
catdir("","a","b") is the same as
catdir(rootdir(),"a","b").
This is true on Unix, where C<catdir("","a","b")> yields "/a/b" and
C<rootdir()> is "/". Note that C<rootdir()> on Mac OS is the startup
volume, which is the closest in concept to Unix' "/". This should help
to run existing scripts originally written for Unix.
=item 8.
For absolute paths, some cleanup is done, to ensure that the volume
name isn't immediately followed by updirs. This is invalid, because
this would go beyond "root". Generally, these cases are handled like
their Unix counterparts:
Unix:
Unix->catdir("","") = "/"
Unix->catdir("",".") = "/"
Unix->catdir("","..") = "/" # can't go
# beyond root
Unix->catdir("",".","..","..","a") = "/a"
Mac:
Mac->catdir("","") = rootdir() # (e.g. "HD:")
Mac->catdir("",":") = rootdir()
Mac->catdir("","::") = rootdir() # can't go
# beyond root
Mac->catdir("",":","::","::","a") = rootdir() . "a:"
# (e.g. "HD:a:")
However, this approach is limited to the first arguments following
"root" (again, see L<File::Spec::Unix/canonpath()>. If there are more
arguments that move up the directory tree, an invalid path going
beyond root can be created.
=back
As you've seen, you can force C<catdir()> to create an absolute path
by passing either an empty string or a path that begins with a volume
name as the first argument. However, you are strongly encouraged not
to do so, since this is done only for backward compatibility. Newer
versions of File::Spec come with a method called C<catpath()> (see
below), that is designed to offer a portable solution for the creation
of absolute paths. It takes volume, directory and file portions and
returns an entire path. While C<catdir()> is still suitable for the
concatenation of I<directory names>, you are encouraged to use
C<catpath()> to concatenate I<volume names> and I<directory
paths>. E.g.
$dir = File::Spec->catdir("tmp","sources");
$abs_path = File::Spec->catpath("MacintoshHD:", $dir,"");
yields
"MacintoshHD:tmp:sources:" .
=cut
sub catdir {
my $self = shift;
return '' unless @_;
my @args = @_;
my $first_arg;
my $relative;
# take care of the first argument
if ($args[0] eq '') { # absolute path, rootdir
shift @args;
$relative = 0;
$first_arg = $self->rootdir;
} elsif ($args[0] =~ /^[^:]+:/) { # absolute path, volume name
$relative = 0;
$first_arg = shift @args;
# add a trailing ':' if need be (may be it's a path like HD:dir)
$first_arg = "$first_arg:" unless ($first_arg =~ /:\Z(?!\n)/);
} else { # relative path
$relative = 1;
if ( $args[0] =~ /^::+\Z(?!\n)/ ) {
# updir colon path ('::', ':::' etc.), don't shift
$first_arg = ':';
} elsif ($args[0] eq ':') {
$first_arg = shift @args;
} else {
# add a trailing ':' if need be
$first_arg = shift @args;
$first_arg = "$first_arg:" unless ($first_arg =~ /:\Z(?!\n)/);
}
}
# For all other arguments,
# (a) ignore arguments that equal ':' or '',
# (b) handle updir paths specially:
# '::' -> concatenate '::'
# '::' . '::' -> concatenate ':::' etc.
# (c) add a trailing ':' if need be
my $result = $first_arg;
while (@args) {
my $arg = shift @args;
unless (($arg eq '') || ($arg eq ':')) {
if ($arg =~ /^::+\Z(?!\n)/ ) { # updir colon path like ':::'
my $updir_count = length($arg) - 1;
while ((@args) && ($args[0] =~ /^::+\Z(?!\n)/) ) { # while updir colon path
$arg = shift @args;
$updir_count += (length($arg) - 1);
}
$arg = (':' x $updir_count);
} else {
$arg =~ s/^://s; # remove a leading ':' if any
$arg = "$arg:" unless ($arg =~ /:\Z(?!\n)/); # ensure trailing ':'
}
$result .= $arg;
}#unless
}
if ( ($relative) && ($result !~ /^:/) ) {
# add a leading colon if need be
$result = ":$result";
}
unless ($relative) {
# remove updirs immediately following the volume name
$result =~ s/([^:]+:)(:*)(.*)\Z(?!\n)/$1$3/;
}
return $result;
}
=item catfile
Concatenate one or more directory names and a filename to form a
complete path ending with a filename. Resulting paths are B<relative>
by default, but can be forced to be absolute (but avoid this).
B<IMPORTANT NOTE:> Beginning with version 1.3 of this module, the
resulting path is relative by default and I<not> absolute. This
decision was made due to portability reasons. Since
C<File::Spec-E<gt>catfile()> returns relative paths on all other
operating systems, it will now also follow this convention on Mac OS.
Note that this may break some existing scripts.
The last argument is always considered to be the file portion. Since
C<catfile()> uses C<catdir()> (see above) for the concatenation of the
directory portions (if any), the following with regard to relative and
absolute paths is true:
catfile("") = ""
catfile("file") = "file"
but
catfile("","") = rootdir() # (e.g. "HD:")
catfile("","file") = rootdir() . file # (e.g. "HD:file")
catfile("HD:","file") = "HD:file"
This means that C<catdir()> is called only when there are two or more
arguments, as one might expect.
Note that the leading ":" is removed from the filename, so that
catfile("a","b","file") = ":a:b:file" and
catfile("a","b",":file") = ":a:b:file"
give the same answer.
To concatenate I<volume names>, I<directory paths> and I<filenames>,
you are encouraged to use C<catpath()> (see below).
=cut
sub catfile {
my $self = shift;
return '' unless @_;
my $file = pop @_;
return $file unless @_;
my $dir = $self->catdir(@_);
$file =~ s/^://s;
return $dir.$file;
}
=item curdir
Returns a string representing the current directory. On Mac OS, this is ":".
=cut
sub curdir {
return ":";
}
=item devnull
Returns a string representing the null device. On Mac OS, this is "Dev:Null".
=cut
sub devnull {
return "Dev:Null";
}
=item rootdir
Returns the empty string. Mac OS has no real root directory.
=cut
sub rootdir { '' }
=item tmpdir
Returns the contents of $ENV{TMPDIR}, if that directory exits or the
current working directory otherwise. Under MacPerl, $ENV{TMPDIR} will
contain a path like "MacintoshHD:Temporary Items:", which is a hidden
directory on your startup volume.
=cut
sub tmpdir {
my $cached = $_[0]->_cached_tmpdir('TMPDIR');
return $cached if defined $cached;
$_[0]->_cache_tmpdir($_[0]->_tmpdir( $ENV{TMPDIR} ), 'TMPDIR');
}
=item updir
Returns a string representing the parent directory. On Mac OS, this is "::".
=cut
sub updir {
return "::";
}
=item file_name_is_absolute
Takes as argument a path and returns true, if it is an absolute path.
If the path has a leading ":", it's a relative path. Otherwise, it's an
absolute path, unless the path doesn't contain any colons, i.e. it's a name
like "a". In this particular case, the path is considered to be relative
(i.e. it is considered to be a filename). Use ":" in the appropriate place
in the path if you want to distinguish unambiguously. As a special case,
the filename '' is always considered to be absolute. Note that with version
1.2 of File::Spec::Mac, this does no longer consult the local filesystem.
E.g.
File::Spec->file_name_is_absolute("a"); # false (relative)
File::Spec->file_name_is_absolute(":a:b:"); # false (relative)
File::Spec->file_name_is_absolute("MacintoshHD:");
# true (absolute)
File::Spec->file_name_is_absolute(""); # true (absolute)
=cut
sub file_name_is_absolute {
my ($self,$file) = @_;
if ($file =~ /:/) {
return (! ($file =~ m/^:/s) );
} elsif ( $file eq '' ) {
return 1 ;
} else {
return 0; # i.e. a file like "a"
}
}
=item path
Returns the null list for the MacPerl application, since the concept is
usually meaningless under Mac OS. But if you're using the MacPerl tool under
MPW, it gives back $ENV{Commands} suitably split, as is done in
:lib:ExtUtils:MM_Mac.pm.
=cut
sub path {
#
# The concept is meaningless under the MacPerl application.
# Under MPW, it has a meaning.
#
return unless exists $ENV{Commands};
return split(/,/, $ENV{Commands});
}
=item splitpath
($volume,$directories,$file) = File::Spec->splitpath( $path );
($volume,$directories,$file) = File::Spec->splitpath( $path,
$no_file );
Splits a path into volume, directory, and filename portions.
On Mac OS, assumes that the last part of the path is a filename unless
$no_file is true or a trailing separator ":" is present.
The volume portion is always returned with a trailing ":". The directory portion
is always returned with a leading (to denote a relative path) and a trailing ":"
(to denote a directory). The file portion is always returned I<without> a leading ":".
Empty portions are returned as empty string ''.
The results can be passed to C<catpath()> to get back a path equivalent to
(usually identical to) the original path.
=cut
sub splitpath {
my ($self,$path, $nofile) = @_;
my ($volume,$directory,$file);
if ( $nofile ) {
( $volume, $directory ) = $path =~ m|^((?:[^:]+:)?)(.*)|s;
}
else {
$path =~
m|^( (?: [^:]+: )? )
( (?: .*: )? )
( .* )
|xs;
$volume = $1;
$directory = $2;
$file = $3;
}
$volume = '' unless defined($volume);
$directory = ":$directory" if ( $volume && $directory ); # take care of "HD::dir"
if ($directory) {
# Make sure non-empty directories begin and end in ':'
$directory .= ':' unless (substr($directory,-1) eq ':');
$directory = ":$directory" unless (substr($directory,0,1) eq ':');
} else {
$directory = '';
}
$file = '' unless defined($file);
return ($volume,$directory,$file);
}
=item splitdir
The opposite of C<catdir()>.
@dirs = File::Spec->splitdir( $directories );
$directories should be only the directory portion of the path on systems
that have the concept of a volume or that have path syntax that differentiates
files from directories. Consider using C<splitpath()> otherwise.
Unlike just splitting the directories on the separator, empty directory names
(C<"">) can be returned. Since C<catdir()> on Mac OS always appends a trailing
colon to distinguish a directory path from a file path, a single trailing colon
will be ignored, i.e. there's no empty directory name after it.
Hence, on Mac OS, both
File::Spec->splitdir( ":a:b::c:" ); and
File::Spec->splitdir( ":a:b::c" );
yield:
( "a", "b", "::", "c")
while
File::Spec->splitdir( ":a:b::c::" );
yields:
( "a", "b", "::", "c", "::")
=cut
sub splitdir {
my ($self, $path) = @_;
my @result = ();
my ($head, $sep, $tail, $volume, $directories);
return @result if ( (!defined($path)) || ($path eq '') );
return (':') if ($path eq ':');
( $volume, $sep, $directories ) = $path =~ m|^((?:[^:]+:)?)(:*)(.*)|s;
# deprecated, but handle it correctly
if ($volume) {
push (@result, $volume);
$sep .= ':';
}
while ($sep || $directories) {
if (length($sep) > 1) {
my $updir_count = length($sep) - 1;
for (my $i=0; $i<$updir_count; $i++) {
# push '::' updir_count times;
# simulate Unix '..' updirs
push (@result, '::');
}
}
$sep = '';
if ($directories) {
( $head, $sep, $tail ) = $directories =~ m|^((?:[^:]+)?)(:*)(.*)|s;
push (@result, $head);
$directories = $tail;
}
}
return @result;
}
=item catpath
$path = File::Spec->catpath($volume,$directory,$file);
Takes volume, directory and file portions and returns an entire path. On Mac OS,
$volume, $directory and $file are concatenated. A ':' is inserted if need be. You
may pass an empty string for each portion. If all portions are empty, the empty
string is returned. If $volume is empty, the result will be a relative path,
beginning with a ':'. If $volume and $directory are empty, a leading ":" (if any)
is removed form $file and the remainder is returned. If $file is empty, the
resulting path will have a trailing ':'.
=cut
sub catpath {
my ($self,$volume,$directory,$file) = @_;
if ( (! $volume) && (! $directory) ) {
$file =~ s/^:// if $file;
return $file ;
}
# We look for a volume in $volume, then in $directory, but not both
my ($dir_volume, $dir_dirs) = $self->splitpath($directory, 1);
$volume = $dir_volume unless length $volume;
my $path = $volume; # may be ''
$path .= ':' unless (substr($path, -1) eq ':'); # ensure trailing ':'
if ($directory) {
$directory = $dir_dirs if $volume;
$directory =~ s/^://; # remove leading ':' if any
$path .= $directory;
$path .= ':' unless (substr($path, -1) eq ':'); # ensure trailing ':'
}
if ($file) {
$file =~ s/^://; # remove leading ':' if any
$path .= $file;
}
return $path;
}
=item abs2rel
Takes a destination path and an optional base path and returns a relative path
from the base path to the destination path:
$rel_path = File::Spec->abs2rel( $path ) ;
$rel_path = File::Spec->abs2rel( $path, $base ) ;
Note that both paths are assumed to have a notation that distinguishes a
directory path (with trailing ':') from a file path (without trailing ':').
If $base is not present or '', then the current working directory is used.
If $base is relative, then it is converted to absolute form using C<rel2abs()>.
This means that it is taken to be relative to the current working directory.
If $path and $base appear to be on two different volumes, we will not
attempt to resolve the two paths, and we will instead simply return
$path. Note that previous versions of this module ignored the volume
of $base, which resulted in garbage results part of the time.
If $base doesn't have a trailing colon, the last element of $base is
assumed to be a filename. This filename is ignored. Otherwise all path
components are assumed to be directories.
If $path is relative, it is converted to absolute form using C<rel2abs()>.
This means that it is taken to be relative to the current working directory.
Based on code written by Shigio Yamaguchi.
=cut
# maybe this should be done in canonpath() ?
sub _resolve_updirs {
my $path = shift @_;
my $proceed;
# resolve any updirs, e.g. "HD:tmp::file" -> "HD:file"
do {
$proceed = ($path =~ s/^(.*):[^:]+::(.*?)\z/$1:$2/);
} while ($proceed);
return $path;
}
sub abs2rel {
my($self,$path,$base) = @_;
# Clean up $path
if ( ! $self->file_name_is_absolute( $path ) ) {
$path = $self->rel2abs( $path ) ;
}
# Figure out the effective $base and clean it up.
if ( !defined( $base ) || $base eq '' ) {
$base = Cwd::getcwd();
}
elsif ( ! $self->file_name_is_absolute( $base ) ) {
$base = $self->rel2abs( $base ) ;
$base = _resolve_updirs( $base ); # resolve updirs in $base
}
else {
$base = _resolve_updirs( $base );
}
# Split up paths - ignore $base's file
my ( $path_vol, $path_dirs, $path_file ) = $self->splitpath( $path );
my ( $base_vol, $base_dirs ) = $self->splitpath( $base );
return $path unless lc( $path_vol ) eq lc( $base_vol );
# Now, remove all leading components that are the same
my @pathchunks = $self->splitdir( $path_dirs );
my @basechunks = $self->splitdir( $base_dirs );
while ( @pathchunks &&
@basechunks &&
lc( $pathchunks[0] ) eq lc( $basechunks[0] ) ) {
shift @pathchunks ;
shift @basechunks ;
}
# @pathchunks now has the directories to descend in to.
# ensure relative path, even if @pathchunks is empty
$path_dirs = $self->catdir( ':', @pathchunks );
# @basechunks now contains the number of directories to climb out of.
$base_dirs = (':' x @basechunks) . ':' ;
return $self->catpath( '', $self->catdir( $base_dirs, $path_dirs ), $path_file ) ;
}
=item rel2abs
Converts a relative path to an absolute path:
$abs_path = File::Spec->rel2abs( $path ) ;
$abs_path = File::Spec->rel2abs( $path, $base ) ;
Note that both paths are assumed to have a notation that distinguishes a
directory path (with trailing ':') from a file path (without trailing ':').
If $base is not present or '', then $base is set to the current working
directory. If $base is relative, then it is converted to absolute form
using C<rel2abs()>. This means that it is taken to be relative to the
current working directory.
If $base doesn't have a trailing colon, the last element of $base is
assumed to be a filename. This filename is ignored. Otherwise all path
components are assumed to be directories.
If $path is already absolute, it is returned and $base is ignored.
Based on code written by Shigio Yamaguchi.
=cut
sub rel2abs {
my ($self,$path,$base) = @_;
if ( ! $self->file_name_is_absolute($path) ) {
# Figure out the effective $base and clean it up.
if ( !defined( $base ) || $base eq '' ) {
$base = Cwd::getcwd();
}
elsif ( ! $self->file_name_is_absolute($base) ) {
$base = $self->rel2abs($base) ;
}
# Split up paths
# ignore $path's volume
my ( $path_dirs, $path_file ) = ($self->splitpath($path))[1,2] ;
# ignore $base's file part
my ( $base_vol, $base_dirs ) = $self->splitpath($base) ;
# Glom them together
$path_dirs = ':' if ($path_dirs eq '');
$base_dirs =~ s/:$//; # remove trailing ':', if any
$base_dirs = $base_dirs . $path_dirs;
$path = $self->catpath( $base_vol, $base_dirs, $path_file );
}
return $path;
}
=back
=head1 AUTHORS
See the authors list in I<File::Spec>. Mac OS support by Paul Schinder
<schinder@pobox.com> and Thomas Wegner <wegner_thomas@yahoo.com>.
=head1 COPYRIGHT
Copyright (c) 2004 by the Perl 5 Porters. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 SEE ALSO
See L<File::Spec> and L<File::Spec::Unix>. This package overrides the
implementation of these methods, not the semantics.
=cut
1;

View File

@@ -0,0 +1,271 @@
package File::Spec::OS2;
use strict;
use Cwd ();
require File::Spec::Unix;
our $VERSION = '3.88';
$VERSION =~ tr/_//d;
our @ISA = qw(File::Spec::Unix);
sub devnull {
return "/dev/nul";
}
sub case_tolerant {
return 1;
}
sub file_name_is_absolute {
my ($self,$file) = @_;
return scalar($file =~ m{^([a-z]:)?[\\/]}is);
}
sub path {
my $path = $ENV{PATH};
$path =~ s:\\:/:g;
my @path = split(';',$path);
foreach (@path) { $_ = '.' if $_ eq '' }
return @path;
}
sub tmpdir {
my $cached = $_[0]->_cached_tmpdir(qw 'TMPDIR TEMP TMP');
return $cached if defined $cached;
my @d = @ENV{qw(TMPDIR TEMP TMP)}; # function call could autovivivy
$_[0]->_cache_tmpdir(
$_[0]->_tmpdir( @d, '/tmp', '/' ), qw 'TMPDIR TEMP TMP'
);
}
sub catdir {
my $self = shift;
my @args = @_;
foreach (@args) {
tr[\\][/];
# append a backslash to each argument unless it has one there
$_ .= "/" unless m{/$};
}
return $self->canonpath(join('', @args));
}
sub canonpath {
my ($self,$path) = @_;
return unless defined $path;
$path =~ s/^([a-z]:)/\l$1/s;
$path =~ s|\\|/|g;
$path =~ s|([^/])/+|$1/|g; # xx////xx -> xx/xx
$path =~ s|(/\.)+/|/|g; # xx/././xx -> xx/xx
$path =~ s|^(\./)+(?=[^/])||s; # ./xx -> xx
$path =~ s|/\Z(?!\n)||
unless $path =~ m#^([a-z]:)?/\Z(?!\n)#si;# xx/ -> xx
$path =~ s{^/\.\.$}{/}; # /.. -> /
1 while $path =~ s{^/\.\.}{}; # /../xx -> /xx
return $path;
}
sub splitpath {
my ($self,$path, $nofile) = @_;
my ($volume,$directory,$file) = ('','','');
if ( $nofile ) {
$path =~
m{^( (?:[a-zA-Z]:|(?:\\\\|//)[^\\/]+[\\/][^\\/]+)? )
(.*)
}xs;
$volume = $1;
$directory = $2;
}
else {
$path =~
m{^ ( (?: [a-zA-Z]: |
(?:\\\\|//)[^\\/]+[\\/][^\\/]+
)?
)
( (?:.*[\\\\/](?:\.\.?\Z(?!\n))?)? )
(.*)
}xs;
$volume = $1;
$directory = $2;
$file = $3;
}
return ($volume,$directory,$file);
}
sub splitdir {
my ($self,$directories) = @_ ;
split m|[\\/]|, $directories, -1;
}
sub catpath {
my ($self,$volume,$directory,$file) = @_;
# If it's UNC, make sure the glue separator is there, reusing
# whatever separator is first in the $volume
$volume .= $1
if ( $volume =~ m@^([\\/])[\\/][^\\/]+[\\/][^\\/]+\Z(?!\n)@s &&
$directory =~ m@^[^\\/]@s
) ;
$volume .= $directory ;
# If the volume is not just A:, make sure the glue separator is
# there, reusing whatever separator is first in the $volume if possible.
if ( $volume !~ m@^[a-zA-Z]:\Z(?!\n)@s &&
$volume =~ m@[^\\/]\Z(?!\n)@ &&
$file =~ m@[^\\/]@
) {
$volume =~ m@([\\/])@ ;
my $sep = $1 ? $1 : '/' ;
$volume .= $sep ;
}
$volume .= $file ;
return $volume ;
}
sub abs2rel {
my($self,$path,$base) = @_;
# Clean up $path
if ( ! $self->file_name_is_absolute( $path ) ) {
$path = $self->rel2abs( $path ) ;
} else {
$path = $self->canonpath( $path ) ;
}
# Figure out the effective $base and clean it up.
if ( !defined( $base ) || $base eq '' ) {
$base = Cwd::getcwd();
} elsif ( ! $self->file_name_is_absolute( $base ) ) {
$base = $self->rel2abs( $base ) ;
} else {
$base = $self->canonpath( $base ) ;
}
# Split up paths
my ( $path_volume, $path_directories, $path_file ) = $self->splitpath( $path, 1 ) ;
my ( $base_volume, $base_directories ) = $self->splitpath( $base, 1 ) ;
return $path unless $path_volume eq $base_volume;
# Now, remove all leading components that are the same
my @pathchunks = $self->splitdir( $path_directories );
my @basechunks = $self->splitdir( $base_directories );
while ( @pathchunks &&
@basechunks &&
lc( $pathchunks[0] ) eq lc( $basechunks[0] )
) {
shift @pathchunks ;
shift @basechunks ;
}
# No need to catdir, we know these are well formed.
$path_directories = CORE::join( '/', @pathchunks );
$base_directories = CORE::join( '/', @basechunks );
# $base_directories now contains the directories the resulting relative
# path must ascend out of before it can descend to $path_directory. So,
# replace all names with $parentDir
#FA Need to replace between backslashes...
$base_directories =~ s|[^\\/]+|..|g ;
# Glue the two together, using a separator if necessary, and preventing an
# empty result.
#FA Must check that new directories are not empty.
if ( $path_directories ne '' && $base_directories ne '' ) {
$path_directories = "$base_directories/$path_directories" ;
} else {
$path_directories = "$base_directories$path_directories" ;
}
return $self->canonpath(
$self->catpath( "", $path_directories, $path_file )
) ;
}
sub rel2abs {
my ($self,$path,$base ) = @_;
if ( ! $self->file_name_is_absolute( $path ) ) {
if ( !defined( $base ) || $base eq '' ) {
$base = Cwd::getcwd();
}
elsif ( ! $self->file_name_is_absolute( $base ) ) {
$base = $self->rel2abs( $base ) ;
}
else {
$base = $self->canonpath( $base ) ;
}
my ( $path_directories, $path_file ) =
($self->splitpath( $path, 1 ))[1,2] ;
my ( $base_volume, $base_directories ) =
$self->splitpath( $base, 1 ) ;
$path = $self->catpath(
$base_volume,
$self->catdir( $base_directories, $path_directories ),
$path_file
) ;
}
return $self->canonpath( $path ) ;
}
1;
__END__
=head1 NAME
File::Spec::OS2 - methods for OS/2 file specs
=head1 SYNOPSIS
require File::Spec::OS2; # Done internally by File::Spec if needed
=head1 DESCRIPTION
See L<File::Spec> and L<File::Spec::Unix>. This package overrides the
implementation of these methods, not the semantics.
Amongst the changes made for OS/2 are...
=over 4
=item tmpdir
Modifies the list of places temp directory information is looked for.
$ENV{TMPDIR}
$ENV{TEMP}
$ENV{TMP}
/tmp
/
=item splitpath
Volumes can be drive letters or UNC sharenames (\\server\share).
=back
=head1 COPYRIGHT
Copyright (c) 2004 by the Perl 5 Porters. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut

View File

@@ -0,0 +1,575 @@
package File::Spec::Unix;
use strict;
use Cwd ();
our $VERSION = '3.88';
$VERSION =~ tr/_//d;
=head1 NAME
File::Spec::Unix - File::Spec for Unix, base for other File::Spec modules
=head1 SYNOPSIS
require File::Spec::Unix; # Done automatically by File::Spec
=head1 DESCRIPTION
Methods for manipulating file specifications. Other File::Spec
modules, such as File::Spec::Mac, inherit from File::Spec::Unix and
override specific methods.
=head1 METHODS
=over 2
=item canonpath()
No physical check on the filesystem, but a logical cleanup of a
path. On UNIX eliminates successive slashes and successive "/.".
$cpath = File::Spec->canonpath( $path ) ;
Note that this does *not* collapse F<x/../y> sections into F<y>. This
is by design. If F</foo> on your system is a symlink to F</bar/baz>,
then F</foo/../quux> is actually F</bar/quux>, not F</quux> as a naive
F<../>-removal would give you. If you want to do this kind of
processing, you probably want C<Cwd>'s C<realpath()> function to
actually traverse the filesystem cleaning up paths like this.
=cut
sub _pp_canonpath {
my ($self,$path) = @_;
return unless defined $path;
# Handle POSIX-style node names beginning with double slash (qnx, nto)
# (POSIX says: "a pathname that begins with two successive slashes
# may be interpreted in an implementation-defined manner, although
# more than two leading slashes shall be treated as a single slash.")
my $node = '';
my $double_slashes_special = $^O eq 'qnx' || $^O eq 'nto';
if ( $double_slashes_special
&& ( $path =~ s{^(//[^/]+)/?\z}{}s || $path =~ s{^(//[^/]+)/}{/}s ) ) {
$node = $1;
}
# This used to be
# $path =~ s|/+|/|g unless ($^O eq 'cygwin' || $^O eq 'msys');
# but that made tests 29, 30, 35, 46, and 213 (as of #13272) to fail
# (Mainly because trailing "" directories didn't get stripped).
# Why would cygwin avoid collapsing multiple slashes into one? --jhi
$path =~ s|/{2,}|/|g; # xx////xx -> xx/xx
$path =~ s{(?:/\.)+(?:/|\z)}{/}g; # xx/././xx -> xx/xx
$path =~ s|^(?:\./)+||s unless $path eq "./"; # ./xx -> xx
$path =~ s|^/(?:\.\./)+|/|; # /../../xx -> xx
$path =~ s|^/\.\.$|/|; # /.. -> /
$path =~ s|/\z|| unless $path eq "/"; # xx/ -> xx
return "$node$path";
}
*canonpath = \&_pp_canonpath unless defined &canonpath;
=item catdir()
Concatenate two or more directory names to form a complete path ending
with a directory. But remove the trailing slash from the resulting
string, because it doesn't look good, isn't necessary and confuses
OS2. Of course, if this is the root directory, don't cut off the
trailing slash :-)
=cut
sub _pp_catdir {
my $self = shift;
$self->canonpath(join('/', @_, '')); # '' because need a trailing '/'
}
*catdir = \&_pp_catdir unless defined &catdir;
=item catfile
Concatenate one or more directory names and a filename to form a
complete path ending with a filename
=cut
sub _pp_catfile {
my $self = shift;
my $file = $self->canonpath(pop @_);
return $file unless @_;
my $dir = $self->catdir(@_);
$dir .= "/" unless substr($dir,-1) eq "/";
return $dir.$file;
}
*catfile = \&_pp_catfile unless defined &catfile;
=item curdir
Returns a string representation of the current directory. "." on UNIX.
=cut
sub curdir { '.' }
use constant _fn_curdir => ".";
=item devnull
Returns a string representation of the null device. "/dev/null" on UNIX.
=cut
sub devnull { '/dev/null' }
use constant _fn_devnull => "/dev/null";
=item rootdir
Returns a string representation of the root directory. "/" on UNIX.
=cut
sub rootdir { '/' }
use constant _fn_rootdir => "/";
=item tmpdir
Returns a string representation of the first writable directory from
the following list or the current directory if none from the list are
writable:
$ENV{TMPDIR}
/tmp
If running under taint mode, and if $ENV{TMPDIR}
is tainted, it is not used.
=cut
my ($tmpdir, %tmpenv);
# Cache and return the calculated tmpdir, recording which env vars
# determined it.
sub _cache_tmpdir {
@tmpenv{@_[2..$#_]} = @ENV{@_[2..$#_]};
return $tmpdir = $_[1];
}
# Retrieve the cached tmpdir, checking first whether relevant env vars have
# changed and invalidated the cache.
sub _cached_tmpdir {
shift;
local $^W;
return if grep $ENV{$_} ne $tmpenv{$_}, @_;
return $tmpdir;
}
sub _tmpdir {
my $self = shift;
my @dirlist = @_;
my $taint = do { no strict 'refs'; ${"\cTAINT"} };
if ($taint) { # Check for taint mode on perl >= 5.8.0
require Scalar::Util;
@dirlist = grep { ! Scalar::Util::tainted($_) } @dirlist;
}
elsif ($] < 5.007) { # No ${^TAINT} before 5.8
@dirlist = grep { !defined($_) || eval { eval('1'.substr $_,0,0) } }
@dirlist;
}
foreach (@dirlist) {
next unless defined && -d && -w _;
$tmpdir = $_;
last;
}
$tmpdir = $self->curdir unless defined $tmpdir;
$tmpdir = defined $tmpdir && $self->canonpath($tmpdir);
if ( !$self->file_name_is_absolute($tmpdir) ) {
# See [perl #120593] for the full details
# If possible, return a full path, rather than '.' or 'lib', but
# jump through some hoops to avoid returning a tainted value.
($tmpdir) = grep {
$taint ? ! Scalar::Util::tainted($_) :
$] < 5.007 ? eval { eval('1'.substr $_,0,0) } : 1
} $self->rel2abs($tmpdir), $tmpdir;
}
return $tmpdir;
}
sub tmpdir {
my $cached = $_[0]->_cached_tmpdir('TMPDIR');
return $cached if defined $cached;
$_[0]->_cache_tmpdir($_[0]->_tmpdir( $ENV{TMPDIR}, "/tmp" ), 'TMPDIR');
}
=item updir
Returns a string representation of the parent directory. ".." on UNIX.
=cut
sub updir { '..' }
use constant _fn_updir => "..";
=item no_upwards
Given a list of file names, strip out those that refer to a parent
directory. (Does not strip symlinks, only '.', '..', and equivalents.)
=cut
sub no_upwards {
my $self = shift;
return grep(!/^\.{1,2}\z/s, @_);
}
=item case_tolerant
Returns a true or false value indicating, respectively, that alphabetic
is not or is significant when comparing file specifications.
=cut
sub case_tolerant { 0 }
use constant _fn_case_tolerant => 0;
=item file_name_is_absolute
Takes as argument a path and returns true if it is an absolute path.
This does not consult the local filesystem on Unix, Win32, OS/2 or Mac
OS (Classic). It does consult the working environment for VMS (see
L<File::Spec::VMS/file_name_is_absolute>).
=cut
sub file_name_is_absolute {
my ($self,$file) = @_;
return scalar($file =~ m:^/:s);
}
=item path
Takes no argument, returns the environment variable PATH as an array.
=cut
sub path {
return () unless exists $ENV{PATH};
my @path = split(':', $ENV{PATH});
foreach (@path) { $_ = '.' if $_ eq '' }
return @path;
}
=item join
join is the same as catfile.
=cut
sub join {
my $self = shift;
return $self->catfile(@_);
}
=item splitpath
($volume,$directories,$file) = File::Spec->splitpath( $path );
($volume,$directories,$file) = File::Spec->splitpath( $path,
$no_file );
Splits a path into volume, directory, and filename portions. On systems
with no concept of volume, returns '' for volume.
For systems with no syntax differentiating filenames from directories,
assumes that the last file is a path unless $no_file is true or a
trailing separator or /. or /.. is present. On Unix this means that $no_file
true makes this return ( '', $path, '' ).
The directory portion may or may not be returned with a trailing '/'.
The results can be passed to L</catpath()> to get back a path equivalent to
(usually identical to) the original path.
=cut
sub splitpath {
my ($self,$path, $nofile) = @_;
my ($volume,$directory,$file) = ('','','');
if ( $nofile ) {
$directory = $path;
}
else {
$path =~ m|^ ( (?: .* / (?: \.\.?\z )? )? ) ([^/]*) |xs;
$directory = $1;
$file = $2;
}
return ($volume,$directory,$file);
}
=item splitdir
The opposite of L</catdir()>.
@dirs = File::Spec->splitdir( $directories );
$directories must be only the directory portion of the path on systems
that have the concept of a volume or that have path syntax that differentiates
files from directories.
Unlike just splitting the directories on the separator, empty
directory names (C<''>) can be returned, because these are significant
on some OSs.
On Unix,
File::Spec->splitdir( "/a/b//c/" );
Yields:
( '', 'a', 'b', '', 'c', '' )
=cut
sub splitdir {
return split m|/|, $_[1], -1; # Preserve trailing fields
}
=item catpath()
Takes volume, directory and file portions and returns an entire path. Under
Unix, $volume is ignored, and directory and file are concatenated. A '/' is
inserted if needed (though if the directory portion doesn't start with
'/' it is not added). On other OSs, $volume is significant.
=cut
sub catpath {
my ($self,$volume,$directory,$file) = @_;
if ( $directory ne '' &&
$file ne '' &&
substr( $directory, -1 ) ne '/' &&
substr( $file, 0, 1 ) ne '/'
) {
$directory .= "/$file" ;
}
else {
$directory .= $file ;
}
return $directory ;
}
=item abs2rel
Takes a destination path and an optional base path returns a relative path
from the base path to the destination path:
$rel_path = File::Spec->abs2rel( $path ) ;
$rel_path = File::Spec->abs2rel( $path, $base ) ;
If $base is not present or '', then L<cwd()|Cwd> is used. If $base is
relative, then it is converted to absolute form using
L</rel2abs()>. This means that it is taken to be relative to
L<cwd()|Cwd>.
On systems that have a grammar that indicates filenames, this ignores the
$base filename. Otherwise all path components are assumed to be
directories.
If $path is relative, it is converted to absolute form using L</rel2abs()>.
This means that it is taken to be relative to L<cwd()|Cwd>.
No checks against the filesystem are made, so the result may not be correct if
C<$base> contains symbolic links. (Apply
L<Cwd::abs_path()|Cwd/abs_path> beforehand if that
is a concern.) On VMS, there is interaction with the working environment, as
logicals and macros are expanded.
Based on code written by Shigio Yamaguchi.
=cut
sub abs2rel {
my($self,$path,$base) = @_;
$base = Cwd::getcwd() unless defined $base and length $base;
($path, $base) = map $self->canonpath($_), $path, $base;
my $path_directories;
my $base_directories;
if (grep $self->file_name_is_absolute($_), $path, $base) {
($path, $base) = map $self->rel2abs($_), $path, $base;
my ($path_volume) = $self->splitpath($path, 1);
my ($base_volume) = $self->splitpath($base, 1);
# Can't relativize across volumes
return $path unless $path_volume eq $base_volume;
$path_directories = ($self->splitpath($path, 1))[1];
$base_directories = ($self->splitpath($base, 1))[1];
# For UNC paths, the user might give a volume like //foo/bar that
# strictly speaking has no directory portion. Treat it as if it
# had the root directory for that volume.
if (!length($base_directories) and $self->file_name_is_absolute($base)) {
$base_directories = $self->rootdir;
}
}
else {
my $wd= ($self->splitpath(Cwd::getcwd(), 1))[1];
$path_directories = $self->catdir($wd, $path);
$base_directories = $self->catdir($wd, $base);
}
# Now, remove all leading components that are the same
my @pathchunks = $self->splitdir( $path_directories );
my @basechunks = $self->splitdir( $base_directories );
if ($base_directories eq $self->rootdir) {
return $self->curdir if $path_directories eq $self->rootdir;
shift @pathchunks;
return $self->canonpath( $self->catpath('', $self->catdir( @pathchunks ), '') );
}
my @common;
while (@pathchunks && @basechunks && $self->_same($pathchunks[0], $basechunks[0])) {
push @common, shift @pathchunks ;
shift @basechunks ;
}
return $self->curdir unless @pathchunks || @basechunks;
# @basechunks now contains the directories the resulting relative path
# must ascend out of before it can descend to $path_directory. If there
# are updir components, we must descend into the corresponding directories
# (this only works if they are no symlinks).
my @reverse_base;
while( defined(my $dir= shift @basechunks) ) {
if( $dir ne $self->updir ) {
unshift @reverse_base, $self->updir;
push @common, $dir;
}
elsif( @common ) {
if( @reverse_base && $reverse_base[0] eq $self->updir ) {
shift @reverse_base;
pop @common;
}
else {
unshift @reverse_base, pop @common;
}
}
}
my $result_dirs = $self->catdir( @reverse_base, @pathchunks );
return $self->canonpath( $self->catpath('', $result_dirs, '') );
}
sub _same {
$_[1] eq $_[2];
}
=item rel2abs()
Converts a relative path to an absolute path.
$abs_path = File::Spec->rel2abs( $path ) ;
$abs_path = File::Spec->rel2abs( $path, $base ) ;
If $base is not present or '', then L<cwd()|Cwd> is used. If $base is
relative, then it is converted to absolute form using
L</rel2abs()>. This means that it is taken to be relative to
L<cwd()|Cwd>.
On systems that have a grammar that indicates filenames, this ignores
the $base filename. Otherwise all path components are assumed to be
directories.
If $path is absolute, it is cleaned up and returned using L</canonpath()>.
No checks against the filesystem are made. On VMS, there is
interaction with the working environment, as logicals and
macros are expanded.
Based on code written by Shigio Yamaguchi.
=cut
sub rel2abs {
my ($self,$path,$base ) = @_;
# Clean up $path
if ( ! $self->file_name_is_absolute( $path ) ) {
# Figure out the effective $base and clean it up.
if ( !defined( $base ) || $base eq '' ) {
$base = Cwd::getcwd();
}
elsif ( ! $self->file_name_is_absolute( $base ) ) {
$base = $self->rel2abs( $base ) ;
}
else {
$base = $self->canonpath( $base ) ;
}
# Glom them together
$path = $self->catdir( $base, $path ) ;
}
return $self->canonpath( $path ) ;
}
=back
=head1 COPYRIGHT
Copyright (c) 2004 by the Perl 5 Porters. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
Please submit bug reports at L<https://github.com/Perl/perl5/issues>.
=head1 SEE ALSO
L<File::Spec>
=cut
# Internal method to reduce xx\..\yy -> yy
sub _collapse {
my($fs, $path) = @_;
my $updir = $fs->updir;
my $curdir = $fs->curdir;
my($vol, $dirs, $file) = $fs->splitpath($path);
my @dirs = $fs->splitdir($dirs);
pop @dirs if @dirs && $dirs[-1] eq '';
my @collapsed;
foreach my $dir (@dirs) {
if( $dir eq $updir and # if we have an updir
@collapsed and # and something to collapse
length $collapsed[-1] and # and its not the rootdir
$collapsed[-1] ne $updir and # nor another updir
$collapsed[-1] ne $curdir # nor the curdir
)
{ # then
pop @collapsed; # collapse
}
else { # else
push @collapsed, $dir; # just hang onto it
}
}
return $fs->catpath($vol,
$fs->catdir(@collapsed),
$file
);
}
1;

View File

@@ -0,0 +1,569 @@
package File::Spec::VMS;
use strict;
use Cwd ();
require File::Spec::Unix;
our $VERSION = '3.88';
$VERSION =~ tr/_//d;
our @ISA = qw(File::Spec::Unix);
use File::Basename;
use VMS::Filespec;
=head1 NAME
File::Spec::VMS - methods for VMS file specs
=head1 SYNOPSIS
require File::Spec::VMS; # Done internally by File::Spec if needed
=head1 DESCRIPTION
See File::Spec::Unix for a documentation of the methods provided
there. This package overrides the implementation of these methods, not
the semantics.
The default behavior is to allow either VMS or Unix syntax on input and to
return VMS syntax on output unless Unix syntax has been explicitly requested
via the C<DECC$FILENAME_UNIX_REPORT> CRTL feature.
=over 4
=cut
# Need to look up the feature settings. The preferred way is to use the
# VMS::Feature module, but that may not be available to dual life modules.
my $use_feature;
BEGIN {
if (eval { local $SIG{__DIE__};
local @INC = @INC;
pop @INC if $INC[-1] eq '.';
require VMS::Feature; }) {
$use_feature = 1;
}
}
# Need to look up the UNIX report mode. This may become a dynamic mode
# in the future.
sub _unix_rpt {
my $unix_rpt;
if ($use_feature) {
$unix_rpt = VMS::Feature::current("filename_unix_report");
} else {
my $env_unix_rpt = $ENV{'DECC$FILENAME_UNIX_REPORT'} || '';
$unix_rpt = $env_unix_rpt =~ /^[ET1]/i;
}
return $unix_rpt;
}
=item canonpath (override)
Removes redundant portions of file specifications and returns results
in native syntax unless Unix filename reporting has been enabled.
=cut
sub canonpath {
my($self,$path) = @_;
return undef unless defined $path;
my $unix_rpt = $self->_unix_rpt;
if ($path =~ m|/|) {
my $pathify = $path =~ m|/\Z(?!\n)|;
$path = $self->SUPER::canonpath($path);
return $path if $unix_rpt;
$path = $pathify ? vmspath($path) : vmsify($path);
}
$path =~ s/(?<!\^)</[/; # < and > ==> [ and ]
$path =~ s/(?<!\^)>/]/;
$path =~ s/(?<!\^)\]\[\./\.\]\[/g; # ][. ==> .][
$path =~ s/(?<!\^)\[000000\.\]\[/\[/g; # [000000.][ ==> [
$path =~ s/(?<!\^)\[000000\./\[/g; # [000000. ==> [
$path =~ s/(?<!\^)\.\]\[000000\]/\]/g; # .][000000] ==> ]
$path =~ s/(?<!\^)\.\]\[/\./g; # foo.][bar ==> foo.bar
1 while ($path =~ s/(?<!\^)([\[\.])(-+)\.(-+)([\.\]])/$1$2$3$4/);
# That loop does the following
# with any amount of dashes:
# .-.-. ==> .--.
# [-.-. ==> [--.
# .-.-] ==> .--]
# [-.-] ==> [--]
1 while ($path =~ s/(?<!\^)([\[\.])(?:\^.|[^\]\.])+\.-(-+)([\]\.])/$1$2$3/);
# That loop does the following
# with any amount (minimum 2)
# of dashes:
# .foo.--. ==> .-.
# .foo.--] ==> .-]
# [foo.--. ==> [-.
# [foo.--] ==> [-]
#
# And then, the remaining cases
$path =~ s/(?<!\^)\[\.-/[-/; # [.- ==> [-
$path =~ s/(?<!\^)\.(?:\^.|[^\]\.])+\.-\./\./g; # .foo.-. ==> .
$path =~ s/(?<!\^)\[(?:\^.|[^\]\.])+\.-\./\[/g; # [foo.-. ==> [
$path =~ s/(?<!\^)\.(?:\^.|[^\]\.])+\.-\]/\]/g; # .foo.-] ==> ]
# [foo.-] ==> [000000]
$path =~ s/(?<!\^)\[(?:\^.|[^\]\.])+\.-\]/\[000000\]/g;
# [] ==>
$path =~ s/(?<!\^)\[\]// unless $path eq '[]';
return $unix_rpt ? unixify($path) : $path;
}
=item catdir (override)
Concatenates a list of file specifications, and returns the result as a
native directory specification unless the Unix filename reporting feature
has been enabled. No check is made for "impossible" cases (e.g. elements
other than the first being absolute filespecs).
=cut
sub catdir {
my $self = shift;
my $dir = pop;
my $unix_rpt = $self->_unix_rpt;
my @dirs = grep {defined() && length()} @_;
my $rslt;
if (@dirs) {
my $path = (@dirs == 1 ? $dirs[0] : $self->catdir(@dirs));
my ($spath,$sdir) = ($path,$dir);
$spath =~ s/\.dir\Z(?!\n)//i; $sdir =~ s/\.dir\Z(?!\n)//i;
if ($unix_rpt) {
$spath = unixify($spath) unless $spath =~ m#/#;
$sdir= unixify($sdir) unless $sdir =~ m#/#;
return $self->SUPER::catdir($spath, $sdir)
}
$rslt = vmspath( unixify($spath) . '/' . unixify($sdir));
# Special case for VMS absolute directory specs: these will have
# had device prepended during trip through Unix syntax in
# eliminate_macros(), since Unix syntax has no way to express
# "absolute from the top of this device's directory tree".
if ($spath =~ /^[\[<][^.\-]/s) { $rslt =~ s/^[^\[<]+//s; }
} else {
# Single directory. Return an empty string on null input; otherwise
# just return a canonical path.
if (not defined $dir or not length $dir) {
$rslt = '';
} else {
$rslt = $unix_rpt ? $dir : vmspath($dir);
}
}
return $self->canonpath($rslt);
}
=item catfile (override)
Concatenates a list of directory specifications with a filename specification
to build a path.
=cut
sub catfile {
my $self = shift;
my $tfile = pop();
my $file = $self->canonpath($tfile);
my @files = grep {defined() && length()} @_;
my $unix_rpt = $self->_unix_rpt;
my $rslt;
if (@files) {
my $path = (@files == 1 ? $files[0] : $self->catdir(@files));
my $spath = $path;
# Something building a VMS path in pieces may try to pass a
# directory name in filename format, so normalize it.
$spath =~ s/\.dir\Z(?!\n)//i;
# If the spath ends with a directory delimiter and the file is bare,
# then just concatenate them.
if ($spath =~ /^(?<!\^)[^\)\]\/:>]+\)\Z(?!\n)/s && basename($file) eq $file) {
$rslt = "$spath$file";
} else {
$rslt = unixify($spath);
$rslt .= (defined($rslt) && length($rslt) ? '/' : '') . unixify($file);
$rslt = vmsify($rslt) unless $unix_rpt;
}
}
else {
# Only passed a single file?
my $xfile = (defined($file) && length($file)) ? $file : '';
$rslt = $unix_rpt ? $xfile : vmsify($xfile);
}
return $self->canonpath($rslt) unless $unix_rpt;
# In Unix report mode, do not strip off redundant path information.
return $rslt;
}
=item curdir (override)
Returns a string representation of the current directory: '[]' or '.'
=cut
sub curdir {
my $self = shift @_;
return '.' if ($self->_unix_rpt);
return '[]';
}
=item devnull (override)
Returns a string representation of the null device: '_NLA0:' or '/dev/null'
=cut
sub devnull {
my $self = shift @_;
return '/dev/null' if ($self->_unix_rpt);
return "_NLA0:";
}
=item rootdir (override)
Returns a string representation of the root directory: 'SYS$DISK:[000000]'
or '/'
=cut
sub rootdir {
my $self = shift @_;
if ($self->_unix_rpt) {
# Root may exist, try it first.
my $try = '/';
my ($dev1, $ino1) = stat('/');
my ($dev2, $ino2) = stat('.');
# Perl falls back to '.' if it can not determine '/'
if (($dev1 != $dev2) || ($ino1 != $ino2)) {
return $try;
}
# Fall back to UNIX format sys$disk.
return '/sys$disk/';
}
return 'SYS$DISK:[000000]';
}
=item tmpdir (override)
Returns a string representation of the first writable directory
from the following list or '' if none are writable:
/tmp if C<DECC$FILENAME_UNIX_REPORT> is enabled.
sys$scratch:
$ENV{TMPDIR}
If running under taint mode, and if $ENV{TMPDIR}
is tainted, it is not used.
=cut
sub tmpdir {
my $self = shift @_;
my $tmpdir = $self->_cached_tmpdir('TMPDIR');
return $tmpdir if defined $tmpdir;
if ($self->_unix_rpt) {
$tmpdir = $self->_tmpdir('/tmp', '/sys$scratch', $ENV{TMPDIR});
}
else {
$tmpdir = $self->_tmpdir( 'sys$scratch:', $ENV{TMPDIR} );
}
$self->_cache_tmpdir($tmpdir, 'TMPDIR');
}
=item updir (override)
Returns a string representation of the parent directory: '[-]' or '..'
=cut
sub updir {
my $self = shift @_;
return '..' if ($self->_unix_rpt);
return '[-]';
}
=item case_tolerant (override)
VMS file specification syntax is case-tolerant.
=cut
sub case_tolerant {
return 1;
}
=item path (override)
Translate logical name DCL$PATH as a searchlist, rather than trying
to C<split> string value of C<$ENV{'PATH'}>.
=cut
sub path {
my (@dirs,$dir,$i);
while ($dir = $ENV{'DCL$PATH;' . $i++}) { push(@dirs,$dir); }
return @dirs;
}
=item file_name_is_absolute (override)
Checks for VMS directory spec as well as Unix separators.
=cut
sub file_name_is_absolute {
my ($self,$file) = @_;
# If it's a logical name, expand it.
$file = $ENV{$file} while $file =~ /^[\w\$\-]+\Z(?!\n)/s && $ENV{$file};
return scalar($file =~ m!^/!s ||
$file =~ m![<\[][^.\-\]>]! ||
$file =~ /^[A-Za-z0-9_\$\-\~]+(?<!\^):/);
}
=item splitpath (override)
($volume,$directories,$file) = File::Spec->splitpath( $path );
($volume,$directories,$file) = File::Spec->splitpath( $path,
$no_file );
Passing a true value for C<$no_file> indicates that the path being
split only contains directory components, even on systems where you
can usually (when not supporting a foreign syntax) tell the difference
between directories and files at a glance.
=cut
sub splitpath {
my($self,$path, $nofile) = @_;
my($dev,$dir,$file) = ('','','');
my $vmsify_path = vmsify($path);
if ( $nofile ) {
#vmsify('d1/d2/d3') returns '[.d1.d2]d3'
#vmsify('/d1/d2/d3') returns 'd1:[d2]d3'
if( $vmsify_path =~ /(.*)\](.+)/ ){
$vmsify_path = $1.'.'.$2.']';
}
$vmsify_path =~ /(.+:)?(.*)/s;
$dir = defined $2 ? $2 : ''; # dir can be '0'
return ($1 || '',$dir,$file);
}
else {
$vmsify_path =~ /(.+:)?([\[<].*[\]>])?(.*)/s;
return ($1 || '',$2 || '',$3);
}
}
=item splitdir (override)
Split a directory specification into the components.
=cut
sub splitdir {
my($self,$dirspec) = @_;
my @dirs = ();
return @dirs if ( (!defined $dirspec) || ('' eq $dirspec) );
$dirspec =~ s/(?<!\^)</[/; # < and > ==> [ and ]
$dirspec =~ s/(?<!\^)>/]/;
$dirspec =~ s/(?<!\^)\]\[\./\.\]\[/g; # ][. ==> .][
$dirspec =~ s/(?<!\^)\[000000\.\]\[/\[/g; # [000000.][ ==> [
$dirspec =~ s/(?<!\^)\[000000\./\[/g; # [000000. ==> [
$dirspec =~ s/(?<!\^)\.\]\[000000\]/\]/g; # .][000000] ==> ]
$dirspec =~ s/(?<!\^)\.\]\[/\./g; # foo.][bar ==> foo.bar
while ($dirspec =~ s/(^|[\[\<\.])\-(\-+)($|[\]\>\.])/$1-.$2$3/g) {}
# That loop does the following
# with any amount of dashes:
# .--. ==> .-.-.
# [--. ==> [-.-.
# .--] ==> .-.-]
# [--] ==> [-.-]
$dirspec = "[$dirspec]" unless $dirspec =~ /(?<!\^)[\[<]/; # make legal
$dirspec =~ s/^(\[|<)\./$1/;
@dirs = split /(?<!\^)\./, vmspath($dirspec);
$dirs[0] =~ s/^[\[<]//s; $dirs[-1] =~ s/[\]>]\Z(?!\n)//s;
@dirs;
}
=item catpath (override)
Construct a complete filespec.
=cut
sub catpath {
my($self,$dev,$dir,$file) = @_;
# We look for a volume in $dev, then in $dir, but not both
my ($dir_volume, $dir_dir, $dir_file) = $self->splitpath($dir);
$dev = $dir_volume unless length $dev;
$dir = length $dir_file ? $self->catfile($dir_dir, $dir_file) : $dir_dir;
if ($dev =~ m|^(?<!\^)/+([^/]+)|) { $dev = "$1:"; }
else { $dev .= ':' unless $dev eq '' or $dev =~ /:\Z(?!\n)/; }
if (length($dev) or length($dir)) {
$dir = "[$dir]" unless $dir =~ /(?<!\^)[\[<\/]/;
$dir = vmspath($dir);
}
$dir = '' if length($dev) && ($dir eq '[]' || $dir eq '<>');
"$dev$dir$file";
}
=item abs2rel (override)
Attempt to convert an absolute file specification to a relative specification.
=cut
sub abs2rel {
my $self = shift;
my($path,$base) = @_;
$base = Cwd::getcwd() unless defined $base and length $base;
# If there is no device or directory syntax on $base, make sure it
# is treated as a directory.
$base = vmspath($base) unless $base =~ m{(?<!\^)[\[<:]};
for ($path, $base) { $_ = $self->rel2abs($_) }
# Are we even starting $path on the same (node::)device as $base? Note that
# logical paths or nodename differences may be on the "same device"
# but the comparison that ignores device differences so as to concatenate
# [---] up directory specs is not even a good idea in cases where there is
# a logical path difference between $path and $base nodename and/or device.
# Hence we fall back to returning the absolute $path spec
# if there is a case blind device (or node) difference of any sort
# and we do not even try to call $parse() or consult %ENV for $trnlnm()
# (this module needs to run on non VMS platforms after all).
my ($path_volume, $path_directories, $path_file) = $self->splitpath($path);
my ($base_volume, $base_directories, $base_file) = $self->splitpath($base);
return $self->canonpath( $path ) unless lc($path_volume) eq lc($base_volume);
# Now, remove all leading components that are the same
my @pathchunks = $self->splitdir( $path_directories );
my $pathchunks = @pathchunks;
unshift(@pathchunks,'000000') unless $pathchunks[0] eq '000000';
my @basechunks = $self->splitdir( $base_directories );
my $basechunks = @basechunks;
unshift(@basechunks,'000000') unless $basechunks[0] eq '000000';
while ( @pathchunks &&
@basechunks &&
lc( $pathchunks[0] ) eq lc( $basechunks[0] )
) {
shift @pathchunks ;
shift @basechunks ;
}
# @basechunks now contains the directories to climb out of,
# @pathchunks now has the directories to descend in to.
if ((@basechunks > 0) || ($basechunks != $pathchunks)) {
$path_directories = join '.', ('-' x @basechunks, @pathchunks) ;
}
else {
$path_directories = join '.', @pathchunks;
}
$path_directories = '['.$path_directories.']';
return $self->canonpath( $self->catpath( '', $path_directories, $path_file ) ) ;
}
=item rel2abs (override)
Return an absolute file specification from a relative one.
=cut
sub rel2abs {
my $self = shift ;
my ($path,$base ) = @_;
return undef unless defined $path;
if ($path =~ m/\//) {
$path = ( -d $path || $path =~ m/\/\z/ # educated guessing about
? vmspath($path) # whether it's a directory
: vmsify($path) );
}
$base = vmspath($base) if defined $base && $base =~ m/\//;
# Clean up and split up $path
if ( ! $self->file_name_is_absolute( $path ) ) {
# Figure out the effective $base and clean it up.
if ( !defined( $base ) || $base eq '' ) {
$base = Cwd::getcwd();
}
elsif ( ! $self->file_name_is_absolute( $base ) ) {
$base = $self->rel2abs( $base ) ;
}
else {
$base = $self->canonpath( $base ) ;
}
# Split up paths
my ( $path_directories, $path_file ) =
($self->splitpath( $path ))[1,2] ;
my ( $base_volume, $base_directories ) =
$self->splitpath( $base ) ;
$path_directories = '' if $path_directories eq '[]' ||
$path_directories eq '<>';
my $sep = '' ;
$sep = '.'
if ( $base_directories =~ m{[^.\]>]\Z(?!\n)} &&
$path_directories =~ m{^[^.\[<]}s
) ;
$base_directories = "$base_directories$sep$path_directories";
$base_directories =~ s{\.?[\]>][\[<]\.?}{.};
$path = $self->catpath( $base_volume, $base_directories, $path_file );
}
return $self->canonpath( $path ) ;
}
=back
=head1 COPYRIGHT
Copyright (c) 2004-14 by the Perl 5 Porters. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 SEE ALSO
See L<File::Spec> and L<File::Spec::Unix>. This package overrides the
implementation of these methods, not the semantics.
An explanation of VMS file specs can be found at
L<http://h71000.www7.hp.com/doc/731FINAL/4506/4506pro_014.html#apps_locating_naming_files>.
=cut
1;

View File

@@ -0,0 +1,439 @@
package File::Spec::Win32;
use strict;
use Cwd ();
require File::Spec::Unix;
our $VERSION = '3.88';
$VERSION =~ tr/_//d;
our @ISA = qw(File::Spec::Unix);
# Some regexes we use for path splitting
my $DRIVE_RX = '[a-zA-Z]:';
my $UNC_RX = '(?:\\\\\\\\|//)[^\\\\/]+[\\\\/][^\\\\/]+';
my $VOL_RX = "(?:$DRIVE_RX|$UNC_RX)";
=head1 NAME
File::Spec::Win32 - methods for Win32 file specs
=head1 SYNOPSIS
require File::Spec::Win32; # Done internally by File::Spec if needed
=head1 DESCRIPTION
See File::Spec::Unix for a documentation of the methods provided
there. This package overrides the implementation of these methods, not
the semantics.
=over 4
=item devnull
Returns a string representation of the null device.
=cut
sub devnull {
return "nul";
}
sub rootdir { '\\' }
=item tmpdir
Returns a string representation of the first existing directory
from the following list:
$ENV{TMPDIR}
$ENV{TEMP}
$ENV{TMP}
SYS:/temp
C:\system\temp
C:/temp
/tmp
/
The SYS:/temp is preferred in Novell NetWare and the C:\system\temp
for Symbian (the File::Spec::Win32 is used also for those platforms).
If running under taint mode, and if the environment
variables are tainted, they are not used.
=cut
sub tmpdir {
my $tmpdir = $_[0]->_cached_tmpdir(qw(TMPDIR TEMP TMP));
return $tmpdir if defined $tmpdir;
$tmpdir = $_[0]->_tmpdir( map( $ENV{$_}, qw(TMPDIR TEMP TMP) ),
'SYS:/temp',
'C:\system\temp',
'C:/temp',
'/tmp',
'/' );
$_[0]->_cache_tmpdir($tmpdir, qw(TMPDIR TEMP TMP));
}
=item case_tolerant
MSWin32 case-tolerance depends on GetVolumeInformation() $ouFsFlags == FS_CASE_SENSITIVE,
indicating the case significance when comparing file specifications.
Since XP FS_CASE_SENSITIVE is effectively disabled for the NT subsubsystem.
See L<http://cygwin.com/ml/cygwin/2007-07/msg00891.html>
Default: 1
=cut
sub case_tolerant {
eval {
local @INC = @INC;
pop @INC if $INC[-1] eq '.';
require Win32API::File;
} or return 1;
my $drive = shift || "C:";
my $osFsType = "\0"x256;
my $osVolName = "\0"x256;
my $ouFsFlags = 0;
Win32API::File::GetVolumeInformation($drive, $osVolName, 256, [], [], $ouFsFlags, $osFsType, 256 );
if ($ouFsFlags & Win32API::File::FS_CASE_SENSITIVE()) { return 0; }
else { return 1; }
}
=item file_name_is_absolute
As of right now, this returns 2 if the path is absolute with a
volume, 1 if it's absolute with no volume, 0 otherwise.
=cut
sub file_name_is_absolute {
my ($self,$file) = @_;
if ($file =~ m{^($VOL_RX)}o) {
my $vol = $1;
return ($vol =~ m{^$UNC_RX}o ? 2
: $file =~ m{^$DRIVE_RX[\\/]}o ? 2
: 0);
}
return $file =~ m{^[\\/]} ? 1 : 0;
}
=item catfile
Concatenate one or more directory names and a filename to form a
complete path ending with a filename
=cut
sub catfile {
shift;
# Legacy / compatibility support
#
shift, return _canon_cat( "/", @_ )
if !@_ || $_[0] eq "";
# Compatibility with File::Spec <= 3.26:
# catfile('A:', 'foo') should return 'A:\foo'.
return _canon_cat( ($_[0].'\\'), @_[1..$#_] )
if $_[0] =~ m{^$DRIVE_RX\z}o;
return _canon_cat( @_ );
}
sub catdir {
shift;
# Legacy / compatibility support
#
return ""
unless @_;
shift, return _canon_cat( "/", @_ )
if $_[0] eq "";
# Compatibility with File::Spec <= 3.26:
# catdir('A:', 'foo') should return 'A:\foo'.
return _canon_cat( ($_[0].'\\'), @_[1..$#_] )
if $_[0] =~ m{^$DRIVE_RX\z}o;
return _canon_cat( @_ );
}
sub path {
my @path = split(';', $ENV{PATH});
s/"//g for @path;
@path = grep length, @path;
unshift(@path, ".");
return @path;
}
=item canonpath
No physical check on the filesystem, but a logical cleanup of a
path. On UNIX eliminated successive slashes and successive "/.".
On Win32 makes
dir1\dir2\dir3\..\..\dir4 -> \dir\dir4 and even
dir1\dir2\dir3\...\dir4 -> \dir\dir4
=cut
sub canonpath {
# Legacy / compatibility support
#
return $_[1] if !defined($_[1]) or $_[1] eq '';
return _canon_cat( $_[1] );
}
=item splitpath
($volume,$directories,$file) = File::Spec->splitpath( $path );
($volume,$directories,$file) = File::Spec->splitpath( $path,
$no_file );
Splits a path into volume, directory, and filename portions. Assumes that
the last file is a path unless the path ends in '\\', '\\.', '\\..'
or $no_file is true. On Win32 this means that $no_file true makes this return
( $volume, $path, '' ).
Separators accepted are \ and /.
Volumes can be drive letters or UNC sharenames (\\server\share).
The results can be passed to L</catpath> to get back a path equivalent to
(usually identical to) the original path.
=cut
sub splitpath {
my ($self,$path, $nofile) = @_;
my ($volume,$directory,$file) = ('','','');
if ( $nofile ) {
$path =~
m{^ ( $VOL_RX ? ) (.*) }sox;
$volume = $1;
$directory = $2;
}
else {
$path =~
m{^ ( $VOL_RX ? )
( (?:.*[\\/](?:\.\.?\Z(?!\n))?)? )
(.*)
}sox;
$volume = $1;
$directory = $2;
$file = $3;
}
return ($volume,$directory,$file);
}
=item splitdir
The opposite of L<catdir()|File::Spec/catdir>.
@dirs = File::Spec->splitdir( $directories );
$directories must be only the directory portion of the path on systems
that have the concept of a volume or that have path syntax that differentiates
files from directories.
Unlike just splitting the directories on the separator, leading empty and
trailing directory entries can be returned, because these are significant
on some OSs. So,
File::Spec->splitdir( "/a/b/c" );
Yields:
( '', 'a', 'b', '', 'c', '' )
=cut
sub splitdir {
my ($self,$directories) = @_ ;
#
# split() likes to forget about trailing null fields, so here we
# check to be sure that there will not be any before handling the
# simple case.
#
if ( $directories !~ m|[\\/]\Z(?!\n)| ) {
return split( m|[\\/]|, $directories );
}
else {
#
# since there was a trailing separator, add a file name to the end,
# then do the split, then replace it with ''.
#
my( @directories )= split( m|[\\/]|, "${directories}dummy" ) ;
$directories[ $#directories ]= '' ;
return @directories ;
}
}
=item catpath
Takes volume, directory and file portions and returns an entire path. Under
Unix, $volume is ignored, and this is just like catfile(). On other OSs,
the $volume become significant.
=cut
sub catpath {
my ($self,$volume,$directory,$file) = @_;
# If it's UNC, make sure the glue separator is there, reusing
# whatever separator is first in the $volume
my $v;
$volume .= $v
if ( (($v) = $volume =~ m@^([\\/])[\\/][^\\/]+[\\/][^\\/]+\Z(?!\n)@s) &&
$directory =~ m@^[^\\/]@s
) ;
$volume .= $directory ;
# If the volume is not just A:, make sure the glue separator is
# there, reusing whatever separator is first in the $volume if possible.
if ( $volume !~ m@^[a-zA-Z]:\Z(?!\n)@s &&
$volume =~ m@[^\\/]\Z(?!\n)@ &&
$file =~ m@[^\\/]@
) {
$volume =~ m@([\\/])@ ;
my $sep = $1 ? $1 : '\\' ;
$volume .= $sep ;
}
$volume .= $file ;
return $volume ;
}
sub _same {
lc($_[1]) eq lc($_[2]);
}
sub rel2abs {
my ($self,$path,$base ) = @_;
my $is_abs = $self->file_name_is_absolute($path);
# Check for volume (should probably document the '2' thing...)
return $self->canonpath( $path ) if $is_abs == 2;
if ($is_abs) {
# It's missing a volume, add one
my $vol = ($self->splitpath( Cwd::getcwd() ))[0];
return $self->canonpath( $vol . $path );
}
if ( !defined( $base ) || $base eq '' ) {
$base = Cwd::getdcwd( ($self->splitpath( $path ))[0] ) if defined &Cwd::getdcwd ;
$base = Cwd::getcwd() unless defined $base ;
}
elsif ( ! $self->file_name_is_absolute( $base ) ) {
$base = $self->rel2abs( $base ) ;
}
else {
$base = $self->canonpath( $base ) ;
}
my ( $path_directories, $path_file ) =
($self->splitpath( $path, 1 ))[1,2] ;
my ( $base_volume, $base_directories ) =
$self->splitpath( $base, 1 ) ;
$path = $self->catpath(
$base_volume,
$self->catdir( $base_directories, $path_directories ),
$path_file
) ;
return $self->canonpath( $path ) ;
}
=back
=head2 Note For File::Spec::Win32 Maintainers
Novell NetWare inherits its File::Spec behaviour from File::Spec::Win32.
=head1 COPYRIGHT
Copyright (c) 2004,2007 by the Perl 5 Porters. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 SEE ALSO
See L<File::Spec> and L<File::Spec::Unix>. This package overrides the
implementation of these methods, not the semantics.
=cut
sub _canon_cat # @path -> path
{
my ($first, @rest) = @_;
my $volume = $first =~ s{ \A ([A-Za-z]:) ([\\/]?) }{}x # drive letter
? ucfirst( $1 ).( $2 ? "\\" : "" )
: $first =~ s{ \A (?:\\\\|//) ([^\\/]+)
(?: [\\/] ([^\\/]+) )?
[\\/]? }{}xs # UNC volume
? "\\\\$1".( defined $2 ? "\\$2" : "" )."\\"
: $first =~ s{ \A [\\/] }{}x # root dir
? "\\"
: "";
my $path = join "\\", $first, @rest;
$path =~ tr#\\/#\\\\#s; # xx/yy --> xx\yy & xx\\yy --> xx\yy
# xx/././yy --> xx/yy
$path =~ s{(?:
(?:\A|\\) # at begin or after a slash
\.
(?:\\\.)* # and more
(?:\\|\z) # at end or followed by slash
)+ # performance boost -- I do not know why
}{\\}gx;
# xx\yy\..\zz --> xx\zz
while ( $path =~ s{(?:
(?:\A|\\) # at begin or after a slash
[^\\]+ # rip this 'yy' off
\\\.\.
(?<!\A\.\.\\\.\.) # do *not* replace ^..\..
(?<!\\\.\.\\\.\.) # do *not* replace \..\..
(?:\\|\z) # at end or followed by slash
)+ # performance boost -- I do not know why
}{\\}sx ) {}
$path =~ s#\A\\##; # \xx --> xx NOTE: this is *not* root
$path =~ s#\\\z##; # xx\ --> xx
if ( $volume =~ m#\\\z# )
{ # <vol>\.. --> <vol>\
$path =~ s{ \A # at begin
\.\.
(?:\\\.\.)* # and more
(?:\\|\z) # at end or followed by slash
}{}x;
return $1 # \\HOST\SHARE\ --> \\HOST\SHARE
if $path eq ""
and $volume =~ m#\A(\\\\.*)\\\z#s;
}
return $path ne "" || $volume ? $volume.$path : ".";
}
1;

View File

@@ -0,0 +1,538 @@
# Call.pm
#
# Copyright (c) 1995-2011 Paul Marquess. All rights reserved.
# Copyright (c) 2011-2014, 2018-2022 Reini Urban. All rights reserved.
# Copyright (c) 2014-2017 cPanel Inc. All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
package Filter::Util::Call ;
require 5.006 ; # our
require Exporter;
use XSLoader ();
use strict;
use warnings;
our @ISA = qw(Exporter);
our @EXPORT = qw( filter_add filter_del filter_read filter_read_exact) ;
our $VERSION = "1.64" ;
our $XS_VERSION = $VERSION;
$VERSION = eval $VERSION;
sub filter_read_exact($)
{
my ($size) = @_ ;
my ($left) = $size ;
my ($status) ;
unless ( $size > 0 ) {
require Carp;
Carp::croak("filter_read_exact: size parameter must be > 0");
}
# try to read a block which is exactly $size bytes long
while ($left and ($status = filter_read($left)) > 0) {
$left = $size - length $_ ;
}
# EOF with pending data is a special case
return 1 if $status == 0 and length $_ ;
return $status ;
}
sub filter_add($)
{
my($obj) = @_ ;
# Did we get a code reference?
my $coderef = (ref $obj eq 'CODE');
# If the parameter isn't already a reference, make it one.
if (!$coderef and (!ref($obj) or ref($obj) =~ /^ARRAY|HASH$/)) {
$obj = bless (\$obj, (caller)[0]);
}
# finish off the installation of the filter in C.
Filter::Util::Call::real_import($obj, (caller)[0], $coderef) ;
}
XSLoader::load('Filter::Util::Call');
1;
__END__
=head1 NAME
Filter::Util::Call - Perl Source Filter Utility Module
=head1 SYNOPSIS
use Filter::Util::Call ;
=head1 DESCRIPTION
This module provides you with the framework to write I<Source Filters>
in Perl.
An alternate interface to Filter::Util::Call is now available. See
L<Filter::Simple> for more details.
A I<Perl Source Filter> is implemented as a Perl module. The structure
of the module can take one of two broadly similar formats. To
distinguish between them, the first will be referred to as I<method
filter> and the second as I<closure filter>.
Here is a skeleton for the I<method filter>:
package MyFilter ;
use Filter::Util::Call ;
sub import
{
my($type, @arguments) = @_ ;
filter_add([]) ;
}
sub filter
{
my($self) = @_ ;
my($status) ;
$status = filter_read() ;
$status ;
}
1 ;
and this is the equivalent skeleton for the I<closure filter>:
package MyFilter ;
use Filter::Util::Call ;
sub import
{
my($type, @arguments) = @_ ;
filter_add(
sub
{
my($status) ;
$status = filter_read() ;
$status ;
} )
}
1 ;
To make use of either of the two filter modules above, place the line
below in a Perl source file.
use MyFilter;
In fact, the skeleton modules shown above are fully functional I<Source
Filters>, albeit fairly useless ones. All they does is filter the
source stream without modifying it at all.
As you can see both modules have a broadly similar structure. They both
make use of the C<Filter::Util::Call> module and both have an C<import>
method. The difference between them is that the I<method filter>
requires a I<filter> method, whereas the I<closure filter> gets the
equivalent of a I<filter> method with the anonymous sub passed to
I<filter_add>.
To make proper use of the I<closure filter> shown above you need to
have a good understanding of the concept of a I<closure>. See
L<perlref> for more details on the mechanics of I<closures>.
=head2 B<use Filter::Util::Call>
The following functions are exported by C<Filter::Util::Call>:
filter_add()
filter_read()
filter_read_exact()
filter_del()
=head2 B<import()>
The C<import> method is used to create an instance of the filter. It is
called indirectly by Perl when it encounters the C<use MyFilter> line
in a source file (See L<perlfunc/import> for more details on
C<import>).
It will always have at least one parameter automatically passed by Perl
- this corresponds to the name of the package. In the example above it
will be C<"MyFilter">.
Apart from the first parameter, import can accept an optional list of
parameters. These can be used to pass parameters to the filter. For
example:
use MyFilter qw(a b c) ;
will result in the C<@_> array having the following values:
@_ [0] => "MyFilter"
@_ [1] => "a"
@_ [2] => "b"
@_ [3] => "c"
Before terminating, the C<import> function must explicitly install the
filter by calling C<filter_add>.
=head2 B<filter_add()>
The function, C<filter_add>, actually installs the filter. It takes one
parameter which should be a reference. The kind of reference used will
dictate which of the two filter types will be used.
If a CODE reference is used then a I<closure filter> will be assumed.
If a CODE reference is not used, a I<method filter> will be assumed.
In a I<method filter>, the reference can be used to store context
information. The reference will be I<blessed> into the package by
C<filter_add>, unless the reference was already blessed.
See the filters at the end of this documents for examples of using
context information using both I<method filters> and I<closure
filters>.
=head2 B<filter() and anonymous sub>
Both the C<filter> method used with a I<method filter> and the
anonymous sub used with a I<closure filter> is where the main
processing for the filter is done.
The big difference between the two types of filter is that the I<method
filter> uses the object passed to the method to store any context data,
whereas the I<closure filter> uses the lexical variables that are
maintained by the closure.
Note that the single parameter passed to the I<method filter>,
C<$self>, is the same reference that was passed to C<filter_add>
blessed into the filter's package. See the example filters later on for
details of using C<$self>.
Here is a list of the common features of the anonymous sub and the
C<filter()> method.
=over 5
=item B<$_>
Although C<$_> doesn't actually appear explicitly in the sample filters
above, it is implicitly used in a number of places.
Firstly, when either C<filter> or the anonymous sub are called, a local
copy of C<$_> will automatically be created. It will always contain the
empty string at this point.
Next, both C<filter_read> and C<filter_read_exact> will append any
source data that is read to the end of C<$_>.
Finally, when C<filter> or the anonymous sub are finished processing,
they are expected to return the filtered source using C<$_>.
This implicit use of C<$_> greatly simplifies the filter.
=item B<$status>
The status value that is returned by the user's C<filter> method or
anonymous sub and the C<filter_read> and C<read_exact> functions take
the same set of values, namely:
< 0 Error
= 0 EOF
> 0 OK
=item B<filter_read> and B<filter_read_exact>
These functions are used by the filter to obtain either a line or block
from the next filter in the chain or the actual source file if there
aren't any other filters.
The function C<filter_read> takes two forms:
$status = filter_read() ;
$status = filter_read($size) ;
The first form is used to request a I<line>, the second requests a
I<block>.
In line mode, C<filter_read> will append the next source line to the
end of the C<$_> scalar.
In block mode, C<filter_read> will append a block of data which is <=
C<$size> to the end of the C<$_> scalar. It is important to emphasise
the that C<filter_read> will not necessarily read a block which is
I<precisely> C<$size> bytes.
If you need to be able to read a block which has an exact size, you can
use the function C<filter_read_exact>. It works identically to
C<filter_read> in block mode, except it will try to read a block which
is exactly C<$size> bytes in length. The only circumstances when it
will not return a block which is C<$size> bytes long is on EOF or
error.
It is I<very> important to check the value of C<$status> after I<every>
call to C<filter_read> or C<filter_read_exact>.
=item B<filter_del>
The function, C<filter_del>, is used to disable the current filter. It
does not affect the running of the filter. All it does is tell Perl not
to call filter any more.
See L<Example 4: Using filter_del> for details.
=item I<real_import>
Internal function which adds the filter, based on the L<filter_add>
argument type.
=item I<unimport()>
May be used to disable a filter, but is rarely needed. See L<filter_del>.
=back
=head1 LIMITATIONS
See L<perlfilter/LIMITATIONS> for an overview of the general problems
filtering code in a textual line-level only.
=over
=item __DATA__ is ignored
The content from the __DATA__ block is not filtered.
This is a serious limitation, e.g. for the L<Switch> module.
See L<http://search.cpan.org/perldoc?Switch#LIMITATIONS> for more.
=item Max. codesize limited to 32-bit
Currently internal buffer lengths are limited to 32-bit only.
=back
=head1 EXAMPLES
Here are a few examples which illustrate the key concepts - as such
most of them are of little practical use.
The C<examples> sub-directory has copies of all these filters
implemented both as I<method filters> and as I<closure filters>.
=head2 Example 1: A simple filter.
Below is a I<method filter> which is hard-wired to replace all
occurrences of the string C<"Joe"> to C<"Jim">. Not particularly
Useful, but it is the first example and I wanted to keep it simple.
package Joe2Jim ;
use Filter::Util::Call ;
sub import
{
my($type) = @_ ;
filter_add(bless []) ;
}
sub filter
{
my($self) = @_ ;
my($status) ;
s/Joe/Jim/g
if ($status = filter_read()) > 0 ;
$status ;
}
1 ;
Here is an example of using the filter:
use Joe2Jim ;
print "Where is Joe?\n" ;
And this is what the script above will print:
Where is Jim?
=head2 Example 2: Using the context
The previous example was not particularly useful. To make it more
general purpose we will make use of the context data and allow any
arbitrary I<from> and I<to> strings to be used. This time we will use a
I<closure filter>. To reflect its enhanced role, the filter is called
C<Subst>.
package Subst ;
use Filter::Util::Call ;
use Carp ;
sub import
{
croak("usage: use Subst qw(from to)")
unless @_ == 3 ;
my ($self, $from, $to) = @_ ;
filter_add(
sub
{
my ($status) ;
s/$from/$to/
if ($status = filter_read()) > 0 ;
$status ;
})
}
1 ;
and is used like this:
use Subst qw(Joe Jim) ;
print "Where is Joe?\n" ;
=head2 Example 3: Using the context within the filter
Here is a filter which a variation of the C<Joe2Jim> filter. As well as
substituting all occurrences of C<"Joe"> to C<"Jim"> it keeps a count
of the number of substitutions made in the context object.
Once EOF is detected (C<$status> is zero) the filter will insert an
extra line into the source stream. When this extra line is executed it
will print a count of the number of substitutions actually made.
Note that C<$status> is set to C<1> in this case.
package Count ;
use Filter::Util::Call ;
sub filter
{
my ($self) = @_ ;
my ($status) ;
if (($status = filter_read()) > 0 ) {
s/Joe/Jim/g ;
++ $$self ;
}
elsif ($$self >= 0) { # EOF
$_ = "print q[Made ${$self} substitutions\n]" ;
$status = 1 ;
$$self = -1 ;
}
$status ;
}
sub import
{
my ($self) = @_ ;
my ($count) = 0 ;
filter_add(\$count) ;
}
1 ;
Here is a script which uses it:
use Count ;
print "Hello Joe\n" ;
print "Where is Joe\n" ;
Outputs:
Hello Jim
Where is Jim
Made 2 substitutions
=head2 Example 4: Using filter_del
Another variation on a theme. This time we will modify the C<Subst>
filter to allow a starting and stopping pattern to be specified as well
as the I<from> and I<to> patterns. If you know the I<vi> editor, it is
the equivalent of this command:
:/start/,/stop/s/from/to/
When used as a filter we want to invoke it like this:
use NewSubst qw(start stop from to) ;
Here is the module.
package NewSubst ;
use Filter::Util::Call ;
use Carp ;
sub import
{
my ($self, $start, $stop, $from, $to) = @_ ;
my ($found) = 0 ;
croak("usage: use Subst qw(start stop from to)")
unless @_ == 5 ;
filter_add(
sub
{
my ($status) ;
if (($status = filter_read()) > 0) {
$found = 1
if $found == 0 and /$start/ ;
if ($found) {
s/$from/$to/ ;
filter_del() if /$stop/ ;
}
}
$status ;
} )
}
1 ;
=head1 Filter::Simple
If you intend using the Filter::Call functionality, I would strongly
recommend that you check out Damian Conway's excellent Filter::Simple
module. Damian's module provides a much cleaner interface than
Filter::Util::Call. Although it doesn't allow the fine control that
Filter::Util::Call does, it should be adequate for the majority of
applications. It's available at
http://search.cpan.org/dist/Filter-Simple/
=head1 AUTHOR
Paul Marquess
=head1 DATE
26th January 1996
=head1 LICENSE
Copyright (c) 1995-2011 Paul Marquess. All rights reserved.
Copyright (c) 2011-2014, 2018-2022 Reini Urban. All rights reserved.
Copyright (c) 2014-2017 cPanel Inc. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut

View File

@@ -0,0 +1,856 @@
package Hash::Util;
require 5.007003;
use strict;
use Carp;
use warnings;
no warnings 'uninitialized';
use warnings::register;
no warnings 'experimental::builtin';
use builtin qw(reftype);
require Exporter;
our @EXPORT_OK = qw(
fieldhash fieldhashes
all_keys
lock_keys unlock_keys
lock_value unlock_value
lock_hash unlock_hash
lock_keys_plus
hash_locked hash_unlocked
hashref_locked hashref_unlocked
hidden_keys legal_keys
lock_ref_keys unlock_ref_keys
lock_ref_value unlock_ref_value
lock_hashref unlock_hashref
lock_ref_keys_plus
hidden_ref_keys legal_ref_keys
hash_seed hash_value hv_store
bucket_stats bucket_stats_formatted bucket_info bucket_array
lock_hash_recurse unlock_hash_recurse
lock_hashref_recurse unlock_hashref_recurse
hash_traversal_mask
bucket_ratio
used_buckets
num_buckets
);
BEGIN {
# make sure all our XS routines are available early so their prototypes
# are correctly applied in the following code.
our $VERSION = '0.30';
require XSLoader;
XSLoader::load();
}
sub import {
my $class = shift;
if ( grep /fieldhash/, @_ ) {
require Hash::Util::FieldHash;
Hash::Util::FieldHash->import(':all'); # for re-export
}
unshift @_, $class;
goto &Exporter::import;
}
=head1 NAME
Hash::Util - A selection of general-utility hash subroutines
=head1 SYNOPSIS
# Restricted hashes
use Hash::Util qw(
fieldhash fieldhashes
all_keys
lock_keys unlock_keys
lock_value unlock_value
lock_hash unlock_hash
lock_keys_plus
hash_locked hash_unlocked
hashref_locked hashref_unlocked
hidden_keys legal_keys
lock_ref_keys unlock_ref_keys
lock_ref_value unlock_ref_value
lock_hashref unlock_hashref
lock_ref_keys_plus
hidden_ref_keys legal_ref_keys
hash_seed hash_value hv_store
bucket_stats bucket_info bucket_array
lock_hash_recurse unlock_hash_recurse
lock_hashref_recurse unlock_hashref_recurse
hash_traversal_mask
);
my %hash = (foo => 42, bar => 23);
# Ways to restrict a hash
lock_keys(%hash);
lock_keys(%hash, @keyset);
lock_keys_plus(%hash, @additional_keys);
# Ways to inspect the properties of a restricted hash
my @legal = legal_keys(%hash);
my @hidden = hidden_keys(%hash);
my $ref = all_keys(%hash,@keys,@hidden);
my $is_locked = hash_locked(%hash);
# Remove restrictions on the hash
unlock_keys(%hash);
# Lock individual values in a hash
lock_value (%hash, 'foo');
unlock_value(%hash, 'foo');
# Ways to change the restrictions on both keys and values
lock_hash (%hash);
unlock_hash(%hash);
my $hashes_are_randomised = hash_seed() !~ /^\0+$/;
my $int_hash_value = hash_value( 'string' );
my $mask= hash_traversal_mask(%hash);
hash_traversal_mask(%hash,1234);
=head1 DESCRIPTION
C<Hash::Util> and C<Hash::Util::FieldHash> contain special functions
for manipulating hashes that don't really warrant a keyword.
C<Hash::Util> contains a set of functions that support
L<restricted hashes|/"Restricted hashes">. These are described in
this document. C<Hash::Util::FieldHash> contains an (unrelated)
set of functions that support the use of hashes in
I<inside-out classes>, described in L<Hash::Util::FieldHash>.
By default C<Hash::Util> does not export anything.
=head2 Restricted hashes
5.8.0 introduces the ability to restrict a hash to a certain set of
keys. No keys outside of this set can be added. It also introduces
the ability to lock an individual key so it cannot be deleted and the
ability to ensure that an individual value cannot be changed.
This is intended to largely replace the deprecated pseudo-hashes.
=over 4
=item B<lock_keys>
=item B<unlock_keys>
lock_keys(%hash);
lock_keys(%hash, @keys);
Restricts the given %hash's set of keys to @keys. If @keys is not
given it restricts it to its current keyset. No more keys can be
added. delete() and exists() will still work, but will not alter
the set of allowed keys. B<Note>: the current implementation prevents
the hash from being bless()ed while it is in a locked state. Any attempt
to do so will raise an exception. Of course you can still bless()
the hash before you call lock_keys() so this shouldn't be a problem.
unlock_keys(%hash);
Removes the restriction on the %hash's keyset.
B<Note> that if any of the values of the hash have been locked they will not
be unlocked after this sub executes.
Both routines return a reference to the hash operated on.
=cut
sub lock_ref_keys {
my($hash, @keys) = @_;
_clear_placeholders(%$hash);
if( @keys ) {
my %keys = map { ($_ => 1) } @keys;
my %original_keys = map { ($_ => 1) } keys %$hash;
foreach my $k (keys %original_keys) {
croak "Hash has key '$k' which is not in the new key set"
unless $keys{$k};
}
foreach my $k (@keys) {
$hash->{$k} = undef unless exists $hash->{$k};
}
Internals::SvREADONLY %$hash, 1;
foreach my $k (@keys) {
delete $hash->{$k} unless $original_keys{$k};
}
}
else {
Internals::SvREADONLY %$hash, 1;
}
return $hash;
}
sub unlock_ref_keys {
my $hash = shift;
Internals::SvREADONLY %$hash, 0;
return $hash;
}
sub lock_keys (\%;@) { lock_ref_keys(@_) }
sub unlock_keys (\%) { unlock_ref_keys(@_) }
#=item B<_clear_placeholders>
#
# This function removes any placeholder keys from a hash. See Perl_hv_clear_placeholders()
# in hv.c for what it does exactly. It is currently exposed as XS by universal.c and
# injected into the Hash::Util namespace.
#
# It is not intended for use outside of this module, and may be changed
# or removed without notice or deprecation cycle.
#
#=cut
#
# sub _clear_placeholders {} # just in case someone searches...
=item B<lock_keys_plus>
lock_keys_plus(%hash,@additional_keys)
Similar to C<lock_keys()>, with the difference being that the optional key list
specifies keys that may or may not be already in the hash. Essentially this is
an easier way to say
lock_keys(%hash,@additional_keys,keys %hash);
Returns a reference to %hash
=cut
sub lock_ref_keys_plus {
my ($hash,@keys) = @_;
my @delete;
_clear_placeholders(%$hash);
foreach my $key (@keys) {
unless (exists($hash->{$key})) {
$hash->{$key}=undef;
push @delete,$key;
}
}
Internals::SvREADONLY(%$hash,1);
delete @{$hash}{@delete};
return $hash
}
sub lock_keys_plus(\%;@) { lock_ref_keys_plus(@_) }
=item B<lock_value>
=item B<unlock_value>
lock_value (%hash, $key);
unlock_value(%hash, $key);
Locks and unlocks the value for an individual key of a hash. The value of a
locked key cannot be changed.
Unless %hash has already been locked the key/value could be deleted
regardless of this setting.
Returns a reference to the %hash.
=cut
sub lock_ref_value {
my($hash, $key) = @_;
# I'm doubtful about this warning, as it seems not to be true.
# Marking a value in the hash as RO is useful, regardless
# of the status of the hash itself.
carp "Cannot usefully lock values in an unlocked hash"
if !Internals::SvREADONLY(%$hash) && warnings::enabled;
Internals::SvREADONLY $hash->{$key}, 1;
return $hash
}
sub unlock_ref_value {
my($hash, $key) = @_;
Internals::SvREADONLY $hash->{$key}, 0;
return $hash
}
sub lock_value (\%$) { lock_ref_value(@_) }
sub unlock_value (\%$) { unlock_ref_value(@_) }
=item B<lock_hash>
=item B<unlock_hash>
lock_hash(%hash);
lock_hash() locks an entire hash, making all keys and values read-only.
No value can be changed, no keys can be added or deleted.
unlock_hash(%hash);
unlock_hash() does the opposite of lock_hash(). All keys and values
are made writable. All values can be changed and keys can be added
and deleted.
Returns a reference to the %hash.
=cut
sub lock_hashref {
my $hash = shift;
lock_ref_keys($hash);
foreach my $value (values %$hash) {
Internals::SvREADONLY($value,1);
}
return $hash;
}
sub unlock_hashref {
my $hash = shift;
foreach my $value (values %$hash) {
Internals::SvREADONLY($value, 0);
}
unlock_ref_keys($hash);
return $hash;
}
sub lock_hash (\%) { lock_hashref(@_) }
sub unlock_hash (\%) { unlock_hashref(@_) }
=item B<lock_hash_recurse>
=item B<unlock_hash_recurse>
lock_hash_recurse(%hash);
lock_hash() locks an entire hash and any hashes it references recursively,
making all keys and values read-only. No value can be changed, no keys can
be added or deleted.
This method B<only> recurses into hashes that are referenced by another hash.
Thus a Hash of Hashes (HoH) will all be restricted, but a Hash of Arrays of
Hashes (HoAoH) will only have the top hash restricted.
unlock_hash_recurse(%hash);
unlock_hash_recurse() does the opposite of lock_hash_recurse(). All keys and
values are made writable. All values can be changed and keys can be added
and deleted. Identical recursion restrictions apply as to lock_hash_recurse().
Returns a reference to the %hash.
=cut
sub lock_hashref_recurse {
my $hash = shift;
lock_ref_keys($hash);
foreach my $value (values %$hash) {
my $type = reftype($value);
if (defined($type) and $type eq 'HASH') {
lock_hashref_recurse($value);
}
Internals::SvREADONLY($value,1);
}
return $hash
}
sub unlock_hashref_recurse {
my $hash = shift;
foreach my $value (values %$hash) {
my $type = reftype($value);
if (defined($type) and $type eq 'HASH') {
unlock_hashref_recurse($value);
}
Internals::SvREADONLY($value,0);
}
unlock_ref_keys($hash);
return $hash;
}
sub lock_hash_recurse (\%) { lock_hashref_recurse(@_) }
sub unlock_hash_recurse (\%) { unlock_hashref_recurse(@_) }
=item B<hashref_locked>
=item B<hash_locked>
hashref_locked(\%hash) and print "Hash is locked!\n";
hash_locked(%hash) and print "Hash is locked!\n";
Returns true if the hash and its keys are locked.
=cut
sub hashref_locked {
my $hash=shift;
Internals::SvREADONLY(%$hash);
}
sub hash_locked(\%) { hashref_locked(@_) }
=item B<hashref_unlocked>
=item B<hash_unlocked>
hashref_unlocked(\%hash) and print "Hash is unlocked!\n";
hash_unlocked(%hash) and print "Hash is unlocked!\n";
Returns true if the hash and its keys are unlocked.
=cut
sub hashref_unlocked {
my $hash=shift;
!Internals::SvREADONLY(%$hash);
}
sub hash_unlocked(\%) { hashref_unlocked(@_) }
=for demerphqs_editor
sub legal_ref_keys{}
sub hidden_ref_keys{}
sub all_keys{}
=cut
sub legal_keys(\%) { legal_ref_keys(@_) }
sub hidden_keys(\%){ hidden_ref_keys(@_) }
=item B<legal_keys>
my @keys = legal_keys(%hash);
Returns the list of the keys that are legal in a restricted hash.
In the case of an unrestricted hash this is identical to calling
keys(%hash).
=item B<hidden_keys>
my @keys = hidden_keys(%hash);
Returns the list of the keys that are legal in a restricted hash but
do not have a value associated to them. Thus if 'foo' is a
"hidden" key of the %hash it will return false for both C<defined>
and C<exists> tests.
In the case of an unrestricted hash this will return an empty list.
B<NOTE> this is an experimental feature that is heavily dependent
on the current implementation of restricted hashes. Should the
implementation change, this routine may become meaningless, in which
case it will return an empty list.
=item B<all_keys>
all_keys(%hash,@keys,@hidden);
Populates the arrays @keys with the all the keys that would pass
an C<exists> tests, and populates @hidden with the remaining legal
keys that have not been utilized.
Returns a reference to the hash.
In the case of an unrestricted hash this will be equivalent to
$ref = do {
@keys = keys %hash;
@hidden = ();
\%hash
};
B<NOTE> this is an experimental feature that is heavily dependent
on the current implementation of restricted hashes. Should the
implementation change this routine may become meaningless in which
case it will behave identically to how it would behave on an
unrestricted hash.
=item B<hash_seed>
my $hash_seed = hash_seed();
hash_seed() returns the seed bytes used to randomise hash ordering.
B<Note that the hash seed is sensitive information>: by knowing it one
can craft a denial-of-service attack against Perl code, even remotely,
see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
B<Do not disclose the hash seed> to people who don't need to know it.
See also L<perlrun/PERL_HASH_SEED_DEBUG>.
Prior to Perl 5.17.6 this function returned a UV, it now returns a string,
which may be of nearly any size as determined by the hash function your
Perl has been built with. Possible sizes may be but are not limited to
4 bytes (for most hash algorithms) and 16 bytes (for siphash).
=item B<hash_value>
my $hash_value = hash_value($string);
my $hash_value = hash_value($string, $seed);
C<hash_value($string)>
returns
the current perl's internal hash value for a given string.
C<hash_value($string, $seed)>
returns the hash value as if computed with a different seed.
If the custom seed is too short, the function errors out.
The minimum length of the seed is implementation-dependent.
Returns a 32-bit integer
representing the hash value of the string passed in.
The 1-parameter value is only reliable
for the lifetime of the process.
It may be different
depending on invocation, environment variables, perl version,
architectures, and build options.
B<Note that the hash value of a given string is sensitive information>:
by knowing it one can deduce the hash seed which in turn can allow one to
craft a denial-of-service attack against Perl code, even remotely,
see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
B<Do not disclose the hash value of a string> to people who don't need to
know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>.
=item B<bucket_info>
Return a set of basic information about a hash.
my ($keys, $buckets, $used, @length_counts)= bucket_info($hash);
Fields are as follows:
0: Number of keys in the hash
1: Number of buckets in the hash
2: Number of used buckets in the hash
rest : list of counts, Kth element is the number of buckets
with K keys in it.
See also bucket_stats() and bucket_array().
=item B<bucket_stats>
Returns a list of statistics about a hash.
my ($keys, $buckets, $used, $quality, $utilization_ratio,
$collision_pct, $mean, $stddev, @length_counts)
= bucket_stats($hashref);
Fields are as follows:
0: Number of keys in the hash
1: Number of buckets in the hash
2: Number of used buckets in the hash
3: Hash Quality Score
4: Percent of buckets used
5: Percent of keys which are in collision
6: Mean bucket length of occupied buckets
7: Standard Deviation of bucket lengths of occupied buckets
rest : list of counts, Kth element is the number of buckets
with K keys in it.
See also bucket_info() and bucket_array().
Note that Hash Quality Score would be 1 for an ideal hash, numbers
close to and below 1 indicate good hashing, and number significantly
above indicate a poor score. In practice it should be around 0.95 to 1.05.
It is defined as:
$score= sum( $count[$length] * ($length * ($length + 1) / 2) )
/
( ( $keys / 2 * $buckets ) *
( $keys + ( 2 * $buckets ) - 1 ) )
The formula is from the Red Dragon book (reformulated to use the data available)
and is documented at L<http://www.strchr.com/hash_functions>
=item B<bucket_array>
my $array= bucket_array(\%hash);
Returns a packed representation of the bucket array associated with a hash. Each element
of the array is either an integer K, in which case it represents K empty buckets, or
a reference to another array which contains the keys that are in that bucket.
B<Note that the information returned by bucket_array is sensitive information>:
by knowing it one can directly attack perl's hash function which in turn may allow
one to craft a denial-of-service attack against Perl code, even remotely,
see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
B<Do not disclose the output of this function> to people who don't need to
know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>. This function is provided strictly
for debugging and diagnostics purposes only, it is hard to imagine a reason why it
would be used in production code.
=cut
sub bucket_stats {
my ($hash) = @_;
my ($keys, $buckets, $used, @length_counts) = bucket_info($hash);
my $sum;
my $score;
for (1 .. $#length_counts) {
$sum += ($length_counts[$_] * $_);
$score += $length_counts[$_] * ( $_ * ($_ + 1 ) / 2 );
}
$score = $score /
(( $keys / (2 * $buckets )) * ( $keys + ( 2 * $buckets ) - 1 ))
if $keys;
my ($mean, $stddev)= (0, 0);
if ($used) {
$mean= $sum / $used;
$sum= 0;
$sum += ($length_counts[$_] * (($_-$mean)**2)) for 1 .. $#length_counts;
$stddev= sqrt($sum/$used);
}
return $keys, $buckets, $used, $keys ? ($score, $used/$buckets, ($keys-$used)/$keys, $mean, $stddev, @length_counts) : ();
}
=item B<bucket_stats_formatted>
print bucket_stats_formatted($hashref);
Return a formatted report of the information returned by bucket_stats().
An example report looks like this:
Keys: 50 Buckets: 33/64 Quality-Score: 1.01 (Good)
Utilized Buckets: 51.56% Optimal: 78.12% Keys In Collision: 34.00%
Chain Length - mean: 1.52 stddev: 0.66
Buckets 64 [0000000000000000000000000000000111111111111111111122222222222333]
Len 0 Pct: 48.44 [###############################]
Len 1 Pct: 29.69 [###################]
Len 2 Pct: 17.19 [###########]
Len 3 Pct: 4.69 [###]
Keys 50 [11111111111111111111111111111111122222222222222333]
Pos 1 Pct: 66.00 [#################################]
Pos 2 Pct: 28.00 [##############]
Pos 3 Pct: 6.00 [###]
The first set of stats gives some summary statistical information,
including the quality score translated into "Good", "Poor" and "Bad",
(score<=1.05, score<=1.2, score>1.2). See the documentation in
bucket_stats() for more details.
The two sets of barcharts give stats and a visual indication of performance
of the hash.
The first gives data on bucket chain lengths and provides insight on how
much work a fetch *miss* will take. In this case we have to inspect every item
in a bucket before we can be sure the item is not in the list. The performance
for an insert is equivalent to this case, as is a delete where the item
is not in the hash.
The second gives data on how many keys are at each depth in the chain, and
gives an idea of how much work a fetch *hit* will take. The performance for
an update or delete of an item in the hash is equivalent to this case.
Note that these statistics are summary only. Actual performance will depend
on real hit/miss ratios accessing the hash. If you are concerned by hit ratios
you are recommended to "oversize" your hash by using something like:
keys(%hash)= keys(%hash) << $k;
With $k chosen carefully, and likely to be a small number like 1 or 2. In
theory the larger the bucket array the less chance of collision.
=cut
sub _bucket_stats_formatted_bars {
my ($total, $ary, $start_idx, $title, $row_title)= @_;
my $return = "";
my $max_width= $total > 64 ? 64 : $total;
my $bar_width= $max_width / $total;
my $str= "";
if ( @$ary < 10) {
for my $idx ($start_idx .. $#$ary) {
$str .= $idx x sprintf("%.0f", ($ary->[$idx] * $bar_width));
}
} else {
$str= "-" x $max_width;
}
$return .= sprintf "%-7s %6d [%s]\n",$title, $total, $str;
foreach my $idx ($start_idx .. $#$ary) {
$return .= sprintf "%-.3s %3d %6.2f%% %6d [%s]\n",
$row_title,
$idx,
$ary->[$idx] / $total * 100,
$ary->[$idx],
"#" x sprintf("%.0f", ($ary->[$idx] * $bar_width)),
;
}
return $return;
}
sub bucket_stats_formatted {
my ($hashref)= @_;
my ($keys, $buckets, $used, $score, $utilization_ratio, $collision_pct,
$mean, $stddev, @length_counts) = bucket_stats($hashref);
my $return= sprintf "Keys: %d Buckets: %d/%d Quality-Score: %.2f (%s)\n"
. "Utilized Buckets: %.2f%% Optimal: %.2f%% Keys In Collision: %.2f%%\n"
. "Chain Length - mean: %.2f stddev: %.2f\n",
$keys, $used, $buckets, $score, $score <= 1.05 ? "Good" : $score < 1.2 ? "Poor" : "Bad",
$utilization_ratio * 100,
$keys/$buckets * 100,
$collision_pct * 100,
$mean, $stddev;
my @key_depth;
$key_depth[$_]= $length_counts[$_] + ( $key_depth[$_+1] || 0 )
for reverse 1 .. $#length_counts;
if ($keys) {
$return .= _bucket_stats_formatted_bars($buckets, \@length_counts, 0, "Buckets", "Len");
$return .= _bucket_stats_formatted_bars($keys, \@key_depth, 1, "Keys", "Pos");
}
return $return
}
=item B<hv_store>
my $sv = 0;
hv_store(%hash,$key,$sv) or die "Failed to alias!";
$hash{$key} = 1;
print $sv; # prints 1
Stores an alias to a variable in a hash instead of copying the value.
=item B<hash_traversal_mask>
As of Perl 5.18 every hash has its own hash traversal order, and this order
changes every time a new element is inserted into the hash. This functionality
is provided by maintaining an unsigned integer mask (U32) which is xor'ed
with the actual bucket id during a traversal of the hash buckets using keys(),
values() or each().
You can use this subroutine to get and set the traversal mask for a specific
hash. Setting the mask ensures that a given hash will produce the same key
order. B<Note> that this does B<not> guarantee that B<two> hashes will produce
the same key order for the same hash seed and traversal mask, items that
collide into one bucket may have different orders regardless of this setting.
=item B<bucket_ratio>
This function behaves the same way that scalar(%hash) behaved prior to
Perl 5.25. Specifically if the hash is tied, then it calls the SCALAR tied
hash method, if untied then if the hash is empty it return 0, otherwise it
returns a string containing the number of used buckets in the hash,
followed by a slash, followed by the total number of buckets in the hash.
my %hash=("foo"=>1);
print Hash::Util::bucket_ratio(%hash); # prints "1/8"
=item B<used_buckets>
This function returns the count of used buckets in the hash. It is expensive
to calculate and the value is NOT cached, so avoid use of this function
in production code.
=item B<num_buckets>
This function returns the total number of buckets the hash holds, or would
hold if the array were created. (When a hash is freshly created the array
may not be allocated even though this value will be non-zero.)
=back
=head2 Operating on references to hashes.
Most subroutines documented in this module have equivalent versions
that operate on references to hashes instead of native hashes.
The following is a list of these subs. They are identical except
in name and in that instead of taking a %hash they take a $hashref,
and additionally are not prototyped.
=over 4
=item lock_ref_keys
=item unlock_ref_keys
=item lock_ref_keys_plus
=item lock_ref_value
=item unlock_ref_value
=item lock_hashref
=item unlock_hashref
=item lock_hashref_recurse
=item unlock_hashref_recurse
=item hash_ref_unlocked
=item legal_ref_keys
=item hidden_ref_keys
=back
=head1 CAVEATS
Note that the trapping of the restricted operations is not atomic:
for example
eval { %hash = (illegal_key => 1) }
leaves the C<%hash> empty rather than with its original contents.
=head1 BUGS
The interface exposed by this module is very close to the current
implementation of restricted hashes. Over time it is expected that
this behavior will be extended and the interface abstracted further.
=head1 AUTHOR
Michael G Schwern <schwern@pobox.com> on top of code by Nick
Ing-Simmons and Jeffrey Friedl.
hv_store() is from Array::RefElem, Copyright 2000 Gisle Aas.
Additional code by Yves Orton.
Description of C<hash_value($string, $seed)>
by Christopher Yeleighton <ne01026@shark.2a.pl>
=head1 SEE ALSO
L<Scalar::Util>, L<List::Util> and L<perlsec/"Algorithmic Complexity Attacks">.
L<Hash::Util::FieldHash>.
=cut
1;

View File

@@ -0,0 +1,860 @@
package Hash::Util::FieldHash;
use strict;
use warnings;
no warnings 'experimental::builtin';
use builtin qw(reftype);
our $VERSION = '1.26';
use Exporter 'import';
our %EXPORT_TAGS = (
'all' => [ qw(
fieldhash
fieldhashes
idhash
idhashes
id
id_2obj
register
)],
);
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
{
require XSLoader;
my %ob_reg; # private object registry
sub _ob_reg { \ %ob_reg }
XSLoader::load();
}
sub fieldhash (\%) {
for ( shift ) {
return unless ref() && reftype( $_) eq 'HASH';
return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0);
return $_ if Hash::Util::FieldHash::_fieldhash( $_, 2) == 2;
return;
}
}
sub idhash (\%) {
for ( shift ) {
return unless ref() && reftype( $_) eq 'HASH';
return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0);
return $_ if Hash::Util::FieldHash::_fieldhash( $_, 1) == 1;
return;
}
}
sub fieldhashes { map &fieldhash( $_), @_ }
sub idhashes { map &idhash( $_), @_ }
1;
__END__
=head1 NAME
Hash::Util::FieldHash - Support for Inside-Out Classes
=head1 SYNOPSIS
### Create fieldhashes
use Hash::Util qw(fieldhash fieldhashes);
# Create a single field hash
fieldhash my %foo;
# Create three at once...
fieldhashes \ my(%foo, %bar, %baz);
# ...or any number
fieldhashes @hashrefs;
### Create an idhash and register it for garbage collection
use Hash::Util::FieldHash qw(idhash register);
idhash my %name;
my $object = \ do { my $o };
# register the idhash for garbage collection with $object
register($object, \ %name);
# the following entry will be deleted when $object goes out of scope
$name{$object} = 'John Doe';
### Register an ordinary hash for garbage collection
use Hash::Util::FieldHash qw(id register);
my %name;
my $object = \ do { my $o };
# register the hash %name for garbage collection of $object's id
register $object, \ %name;
# the following entry will be deleted when $object goes out of scope
$name{id $object} = 'John Doe';
=head1 FUNCTIONS
C<Hash::Util::FieldHash> offers a number of functions in support of
L<The Inside-out Technique> of class construction.
=over
=item id
id($obj)
Returns the reference address of a reference $obj. If $obj is
not a reference, returns $obj.
This function is a stand-in replacement for
L<Scalar::Util::refaddr|Scalar::Util/refaddr>,
that is, it returns
the reference address of its argument as a numeric value. The only
difference is that C<refaddr()> returns C<undef> when given a
non-reference while C<id()> returns its argument unchanged.
C<id()> also uses a caching technique that makes it faster when
the id of an object is requested often, but slower if it is needed
only once or twice.
=item id_2obj
$obj = id_2obj($id)
If C<$id> is the id of a registered object (see L</register>), returns
the object, otherwise an undefined value. For registered objects this
is the inverse function of C<id()>.
=item register
register($obj)
register($obj, @hashrefs)
In the first form, registers an object to work with for the function
C<id_2obj()>. In the second form, it additionally marks the given
hashrefs down for garbage collection. This means that when the object
goes out of scope, any entries in the given hashes under the key of
C<id($obj)> will be deleted from the hashes.
It is a fatal error to register a non-reference $obj. Any non-hashrefs
among the following arguments are silently ignored.
It is I<not> an error to register the same object multiple times with
varying sets of hashrefs. Any hashrefs that are not registered yet
will be added, others ignored.
Registry also implies thread support. When a new thread is created,
all references are replaced with new ones, including all objects.
If a hash uses the reference address of an object as a key, that
connection would be broken. With a registered object, its id will
be updated in all hashes registered with it.
=item idhash
idhash my %hash
Makes an idhash from the argument, which must be a hash.
An I<idhash> works like a normal hash, except that it stringifies a
I<reference used as a key> differently. A reference is stringified
as if the C<id()> function had been invoked on it, that is, its
reference address in decimal is used as the key.
=item idhashes
idhashes \ my(%hash, %gnash, %trash)
idhashes \ @hashrefs
Creates many idhashes from its hashref arguments. Returns those
arguments that could be converted or their number in scalar context.
=item fieldhash
fieldhash %hash;
Creates a single fieldhash. The argument must be a hash. Returns
a reference to the given hash if successful, otherwise nothing.
A I<fieldhash> is, in short, an idhash with auto-registry. When an
object (or, indeed, any reference) is used as a fieldhash key, the
fieldhash is automatically registered for garbage collection with
the object, as if C<register $obj, \ %fieldhash> had been called.
=item fieldhashes
fieldhashes @hashrefs;
Creates any number of field hashes. Arguments must be hash references.
Returns the converted hashrefs in list context, their number in scalar
context.
=back
=head1 DESCRIPTION
A word on terminology: I shall use the term I<field> for a scalar
piece of data that a class associates with an object. Other terms that
have been used for this concept are "object variable", "(object) property",
"(object) attribute" and more. Especially "attribute" has some currency
among Perl programmer, but that clashes with the C<attributes> pragma. The
term "field" also has some currency in this sense and doesn't seem
to conflict with other Perl terminology.
In Perl, an object is a blessed reference. The standard way of associating
data with an object is to store the data inside the object's body, that is,
the piece of data pointed to by the reference.
In consequence, if two or more classes want to access an object they
I<must> agree on the type of reference and also on the organization of
data within the object body. Failure to agree on the type results in
immediate death when the wrong method tries to access an object. Failure
to agree on data organization may lead to one class trampling over the
data of another.
This object model leads to a tight coupling between subclasses.
If one class wants to inherit from another (and both classes access
object data), the classes must agree about implementation details.
Inheritance can only be used among classes that are maintained together,
in a single source or not.
In particular, it is not possible to write general-purpose classes
in this technique, classes that can advertise themselves as "Put me
on your @ISA list and use my methods". If the other class has different
ideas about how the object body is used, there is trouble.
For reference C<Name_hash> in L</Example 1> shows the standard implementation of
a simple class C<Name> in the well-known hash based way. It also demonstrates
the predictable failure to construct a common subclass C<NamedFile>
of C<Name> and the class C<IO::File> (whose objects I<must> be globrefs).
Thus, techniques are of interest that store object data I<not> in
the object body but some other place.
=head2 The Inside-out Technique
With I<inside-out> classes, each class declares a (typically lexical)
hash for each field it wants to use. The reference address of an
object is used as the hash key. By definition, the reference address
is unique to each object so this guarantees a place for each field that
is private to the class and unique to each object. See C<Name_id>
in L</Example 1> for a simple example.
In comparison to the standard implementation where the object is a
hash and the fields correspond to hash keys, here the fields correspond
to hashes, and the object determines the hash key. Thus the hashes
appear to be turned I<inside out>.
The body of an object is never examined by an inside-out class, only
its reference address is used. This allows for the body of an actual
object to be I<anything at all> while the object methods of the class
still work as designed. This is a key feature of inside-out classes.
=head2 Problems of Inside-out
Inside-out classes give us freedom of inheritance, but as usual there
is a price.
Most obviously, there is the necessity of retrieving the reference
address of an object for each data access. It's a minor inconvenience,
but it does clutter the code.
More important (and less obvious) is the necessity of garbage
collection. When a normal object dies, anything stored in the
object body is garbage-collected by perl. With inside-out objects,
Perl knows nothing about the data stored in field hashes by a class,
but these must be deleted when the object goes out of scope. Thus
the class must provide a C<DESTROY> method to take care of that.
In the presence of multiple classes it can be non-trivial
to make sure that every relevant destructor is called for
every object. Perl calls the first one it finds on the
inheritance tree (if any) and that's it.
A related issue is thread-safety. When a new thread is created,
the Perl interpreter is cloned, which implies that all reference
addresses in use will be replaced with new ones. Thus, if a class
tries to access a field of a cloned object its (cloned) data will
still be stored under the now invalid reference address of the
original in the parent thread. A general C<CLONE> method must
be provided to re-establish the association.
=head2 Solutions
C<Hash::Util::FieldHash> addresses these issues on several
levels.
The C<id()> function is provided in addition to the
existing C<Scalar::Util::refaddr()>. Besides its short name
it can be a little faster under some circumstances (and a
bit slower under others). Benchmark if it matters. The
working of C<id()> also allows the use of the class name
as a I<generic object> as described L<further down|/"The Generic Object">.
The C<id()> function is incorporated in I<id hashes> in the sense
that it is called automatically on every key that is used with
the hash. No explicit call is necessary.
The problems of garbage collection and thread safety are both
addressed by the function C<register()>. It registers an object
together with any number of hashes. Registry means that when the
object dies, an entry in any of the hashes under the reference
address of this object will be deleted. This guarantees garbage
collection in these hashes. It also means that on thread
cloning the object's entries in registered hashes will be
replaced with updated entries whose key is the cloned object's
reference address. Thus the object-data association becomes
thread-safe.
Object registry is best done when the object is initialized
for use with a class. That way, garbage collection and thread
safety are established for every object and every field that is
initialized.
Finally, I<field hashes> incorporate all these functions in one
package. Besides automatically calling the C<id()> function
on every object used as a key, the object is registered with
the field hash on first use. Classes based on field hashes
are fully garbage-collected and thread safe without further
measures.
=head2 More Problems
Another problem that occurs with inside-out classes is serialization.
Since the object data is not in its usual place, standard routines
like C<Storable::freeze()>, C<Storable::thaw()> and
C<Data::Dumper::Dumper()> can't deal with it on their own. Both
C<Data::Dumper> and C<Storable> provide the necessary hooks to
make things work, but the functions or methods used by the hooks
must be provided by each inside-out class.
A general solution to the serialization problem would require another
level of registry, one that associates I<classes> and fields.
So far, the functions of C<Hash::Util::FieldHash> are unaware of
any classes, which I consider a feature. Therefore C<Hash::Util::FieldHash>
doesn't address the serialization problems.
=head2 The Generic Object
Classes based on the C<id()> function (and hence classes based on
C<idhash()> and C<fieldhash()>) show a peculiar behavior in that
the class name can be used like an object. Specifically, methods
that set or read data associated with an object continue to work as
class methods, just as if the class name were an object, distinct from
all other objects, with its own data. This object may be called
the I<generic object> of the class.
This works because field hashes respond to keys that are not references
like a normal hash would and use the string offered as the hash key.
Thus, if a method is called as a class method, the field hash is presented
with the class name instead of an object and blithely uses it as a key.
Since the keys of real objects are decimal numbers, there is no
conflict and the slot in the field hash can be used like any other.
The C<id()> function behaves correspondingly with respect to non-reference
arguments.
Two possible uses (besides ignoring the property) come to mind.
A singleton class could be implemented this using the generic object.
If necessary, an C<init()> method could die or ignore calls with
actual objects (references), so only the generic object will ever exist.
Another use of the generic object would be as a template. It is
a convenient place to store class-specific defaults for various
fields to be used in actual object initialization.
Usually, the feature can be entirely ignored. Calling I<object
methods> as I<class methods> normally leads to an error and isn't used
routinely anywhere. It may be a problem that this error isn't
indicated by a class with a generic object.
=head2 How to use Field Hashes
Traditionally, the definition of an inside-out class contains a bare
block inside which a number of lexical hashes are declared and the
basic accessor methods defined, usually through C<Scalar::Util::refaddr>.
Further methods may be defined outside this block. There has to be
a DESTROY method and, for thread support, a CLONE method.
When field hashes are used, the basic structure remains the same.
Each lexical hash will be made a field hash. The call to C<refaddr>
can be omitted from the accessor methods. DESTROY and CLONE methods
are not necessary.
If you have an existing inside-out class, simply making all hashes
field hashes with no other change should make no difference. Through
the calls to C<refaddr> or equivalent, the field hashes never get to
see a reference and work like normal hashes. Your DESTROY (and
CLONE) methods are still needed.
To make the field hashes kick in, it is easiest to redefine C<refaddr>
as
sub refaddr { shift }
instead of importing it from C<Scalar::Util>. It should now be possible
to disable DESTROY and CLONE. Note that while it isn't disabled,
DESTROY will be called before the garbage collection of field hashes,
so it will be invoked with a functional object and will continue to
function.
It is not desirable to import the functions C<fieldhash> and/or
C<fieldhashes> into every class that is going to use them. They
are only used once to set up the class. When the class is up and running,
these functions serve no more purpose.
If there are only a few field hashes to declare, it is simplest to
use Hash::Util::FieldHash;
early and call the functions qualified:
Hash::Util::FieldHash::fieldhash my %foo;
Otherwise, import the functions into a convenient package like
C<HUF> or, more general, C<Aux>
{
package Aux;
use Hash::Util::FieldHash ':all';
}
and call
Aux::fieldhash my %foo;
as needed.
=head2 Garbage-Collected Hashes
Garbage collection in a field hash means that entries will "spontaneously"
disappear when the object that created them disappears. That must be
borne in mind, especially when looping over a field hash. If anything
you do inside the loop could cause an object to go out of scope, a
random key may be deleted from the hash you are looping over. That
can throw the loop iterator, so it's best to cache a consistent snapshot
of the keys and/or values and loop over that. You will still have to
check that a cached entry still exists when you get to it.
Garbage collection can be confusing when keys are created in a field hash
from normal scalars as well as references. Once a reference is I<used> with
a field hash, the entry will be collected, even if it was later overwritten
with a plain scalar key (every positive integer is a candidate). This
is true even if the original entry was deleted in the meantime. In fact,
deletion from a field hash, and also a test for existence constitute
I<use> in this sense and create a liability to delete the entry when
the reference goes out of scope. If you happen to create an entry
with an identical key from a string or integer, that will be collected
instead. Thus, mixed use of references and plain scalars as field hash
keys is not entirely supported.
=head1 EXAMPLES
The examples show a very simple class that implements a I<name>, consisting
of a first and last name (no middle initial). The name class has four
methods:
=over
=item * C<init()>
An object method that initializes the first and last name to its
two arguments. If called as a class method, C<init()> creates an
object in the given class and initializes that.
=item * C<first()>
Retrieve the first name
=item * C<last()>
Retrieve the last name
=item * C<name()>
Retrieve the full name, the first and last name joined by a blank.
=back
The examples show this class implemented with different levels of
support by C<Hash::Util::FieldHash>. All supported combinations
are shown. The difference between implementations is often quite
small. The implementations are:
=over
=item * C<Name_hash>
A conventional (not inside-out) implementation where an object is
a hash that stores the field values, without support by
C<Hash::Util::FieldHash>. This implementation doesn't allow
arbitrary inheritance.
=item * C<Name_id>
Inside-out implementation based on the C<id()> function. It needs
a C<DESTROY> method. For thread support a C<CLONE> method (not shown)
would also be needed. Instead of C<Hash::Util::FieldHash::id()> the
function C<Scalar::Util::refaddr> could be used with very little
functional difference. This is the basic pattern of an inside-out
class.
=item * C<Name_idhash>
Idhash-based inside-out implementation. Like C<Name_id> it needs
a C<DESTROY> method and would need C<CLONE> for thread support.
=item * C<Name_id_reg>
Inside-out implementation based on the C<id()> function with explicit
object registry. No destructor is needed and objects are thread safe.
=item * C<Name_idhash_reg>
Idhash-based inside-out implementation with explicit object registry.
No destructor is needed and objects are thread safe.
=item * C<Name_fieldhash>
FieldHash-based inside-out implementation. Object registry happens
automatically. No destructor is needed and objects are thread safe.
=back
These examples are realized in the code below, which could be copied
to a file F<Example.pm>.
=head2 Example 1
use strict; use warnings;
{
package Name_hash; # standard implementation: the
# object is a hash
sub init {
my $obj = shift;
my ($first, $last) = @_;
# create an object if called as class method
$obj = bless {}, $obj unless ref $obj;
$obj->{ first} = $first;
$obj->{ last} = $last;
$obj;
}
sub first { shift()->{ first} }
sub last { shift()->{ last} }
sub name {
my $n = shift;
join ' ' => $n->first, $n->last;
}
}
{
package Name_id;
use Hash::Util::FieldHash qw(id);
my (%first, %last);
sub init {
my $obj = shift;
my ($first, $last) = @_;
# create an object if called as class method
$obj = bless \ my $o, $obj unless ref $obj;
$first{ id $obj} = $first;
$last{ id $obj} = $last;
$obj;
}
sub first { $first{ id shift()} }
sub last { $last{ id shift()} }
sub name {
my $n = shift;
join ' ' => $n->first, $n->last;
}
sub DESTROY {
my $id = id shift;
delete $first{ $id};
delete $last{ $id};
}
}
{
package Name_idhash;
use Hash::Util::FieldHash;
Hash::Util::FieldHash::idhashes( \ my (%first, %last) );
sub init {
my $obj = shift;
my ($first, $last) = @_;
# create an object if called as class method
$obj = bless \ my $o, $obj unless ref $obj;
$first{ $obj} = $first;
$last{ $obj} = $last;
$obj;
}
sub first { $first{ shift()} }
sub last { $last{ shift()} }
sub name {
my $n = shift;
join ' ' => $n->first, $n->last;
}
sub DESTROY {
my $n = shift;
delete $first{ $n};
delete $last{ $n};
}
}
{
package Name_id_reg;
use Hash::Util::FieldHash qw(id register);
my (%first, %last);
sub init {
my $obj = shift;
my ($first, $last) = @_;
# create an object if called as class method
$obj = bless \ my $o, $obj unless ref $obj;
register( $obj, \ (%first, %last) );
$first{ id $obj} = $first;
$last{ id $obj} = $last;
$obj;
}
sub first { $first{ id shift()} }
sub last { $last{ id shift()} }
sub name {
my $n = shift;
join ' ' => $n->first, $n->last;
}
}
{
package Name_idhash_reg;
use Hash::Util::FieldHash qw(register);
Hash::Util::FieldHash::idhashes \ my (%first, %last);
sub init {
my $obj = shift;
my ($first, $last) = @_;
# create an object if called as class method
$obj = bless \ my $o, $obj unless ref $obj;
register( $obj, \ (%first, %last) );
$first{ $obj} = $first;
$last{ $obj} = $last;
$obj;
}
sub first { $first{ shift()} }
sub last { $last{ shift()} }
sub name {
my $n = shift;
join ' ' => $n->first, $n->last;
}
}
{
package Name_fieldhash;
use Hash::Util::FieldHash;
Hash::Util::FieldHash::fieldhashes \ my (%first, %last);
sub init {
my $obj = shift;
my ($first, $last) = @_;
# create an object if called as class method
$obj = bless \ my $o, $obj unless ref $obj;
$first{ $obj} = $first;
$last{ $obj} = $last;
$obj;
}
sub first { $first{ shift()} }
sub last { $last{ shift()} }
sub name {
my $n = shift;
join ' ' => $n->first, $n->last;
}
}
1;
To exercise the various implementations the script L<below|/"Example 2"> can
be used.
It sets up a class C<Name> that is a mirror of one of the implementation
classes C<Name_hash>, C<Name_id>, ..., C<Name_fieldhash>. That determines
which implementation is run.
The script first verifies the function of the C<Name> class.
In the second step, the free inheritability of the implementation
(or lack thereof) is demonstrated. For this purpose it constructs
a class called C<NamedFile> which is a common subclass of C<Name> and
the standard class C<IO::File>. This puts inheritability to the test
because objects of C<IO::File> I<must> be globrefs. Objects of C<NamedFile>
should behave like a file opened for reading and also support the C<name()>
method. This class juncture works with exception of the C<Name_hash>
implementation, where object initialization fails because of the
incompatibility of object bodies.
=head2 Example 2
use strict; use warnings; $| = 1;
use Example;
{
package Name;
use parent 'Name_id'; # define here which implementation to run
}
# Verify that the base package works
my $n = Name->init(qw(Albert Einstein));
print $n->name, "\n";
print "\n";
# Create a named file handle (See definition below)
my $nf = NamedFile->init(qw(/tmp/x Filomena File));
# use as a file handle...
for ( 1 .. 3 ) {
my $l = <$nf>;
print "line $_: $l";
}
# ...and as a Name object
print "...brought to you by ", $nf->name, "\n";
exit;
# Definition of NamedFile
package NamedFile;
use parent 'Name';
use parent 'IO::File';
sub init {
my $obj = shift;
my ($file, $first, $last) = @_;
$obj = $obj->IO::File::new() unless ref $obj;
$obj->open($file) or die "Can't read '$file': $!";
$obj->Name::init($first, $last);
}
__END__
=head1 GUTS
To make C<Hash::Util::FieldHash> work, there were two changes to
F<perl> itself. C<PERL_MAGIC_uvar> was made available for hashes,
and weak references now call uvar C<get> magic after a weakref has been
cleared. The first feature is used to make field hashes intercept
their keys upon access. The second one triggers garbage collection.
=head2 The C<PERL_MAGIC_uvar> interface for hashes
C<PERL_MAGIC_uvar> I<get> magic is called from C<hv_fetch_common> and
C<hv_delete_common> through the function C<hv_magic_uvar_xkey>, which
defines the interface. The call happens for hashes with "uvar" magic
if the C<ufuncs> structure has equal values in the C<uf_val> and C<uf_set>
fields. Hashes are unaffected if (and as long as) these fields
hold different values.
Upon the call, the C<mg_obj> field will hold the hash key to be accessed.
Upon return, the C<SV*> value in C<mg_obj> will be used in place of the
original key in the hash access. The integer index value in the first
parameter will be the C<action> value from C<hv_fetch_common>, or -1
if the call is from C<hv_delete_common>.
This is a template for a function suitable for the C<uf_val> field in
a C<ufuncs> structure for this call. The C<uf_set> and C<uf_index>
fields are irrelevant.
IV watch_key(pTHX_ IV action, SV* field) {
MAGIC* mg = mg_find(field, PERL_MAGIC_uvar);
SV* keysv = mg->mg_obj;
/* Do whatever you need to. If you decide to
supply a different key newkey, return it like this
*/
sv_2mortal(newkey);
mg->mg_obj = newkey;
return 0;
}
=head2 Weakrefs call uvar magic
When a weak reference is stored in an C<SV> that has "uvar" magic, C<set>
magic is called after the reference has gone stale. This hook can be
used to trigger further garbage-collection activities associated with
the referenced object.
=head2 How field hashes work
The three features of key hashes, I<key replacement>, I<thread support>,
and I<garbage collection> are supported by a data structure called
the I<object registry>. This is a private hash where every object
is stored. An "object" in this sense is any reference (blessed or
unblessed) that has been used as a field hash key.
The object registry keeps track of references that have been used as
field hash keys. The keys are generated from the reference address
like in a field hash (though the registry isn't a field hash). Each
value is a weak copy of the original reference, stored in an C<SV> that
is itself magical (C<PERL_MAGIC_uvar> again). The magical structure
holds a list (another hash, really) of field hashes that the reference
has been used with. When the weakref becomes stale, the magic is
activated and uses the list to delete the reference from all field
hashes it has been used with. After that, the entry is removed from
the object registry itself. Implicitly, that frees the magic structure
and the storage it has been using.
Whenever a reference is used as a field hash key, the object registry
is checked and a new entry is made if necessary. The field hash is
then added to the list of fields this reference has used.
The object registry is also used to repair a field hash after thread
cloning. Here, the entire object registry is processed. For every
reference found there, the field hashes it has used are visited and
the entry is updated.
=head2 Internal function Hash::Util::FieldHash::_fieldhash
# test if %hash is a field hash
my $result = _fieldhash \ %hash, 0;
# make %hash a field hash
my $result = _fieldhash \ %hash, 1;
C<_fieldhash> is the internal function used to create field hashes.
It takes two arguments, a hashref and a mode. If the mode is boolean
false, the hash is not changed but tested if it is a field hash. If
the hash isn't a field hash the return value is boolean false. If it
is, the return value indicates the mode of field hash. When called with
a boolean true mode, it turns the given hash into a field hash of this
mode, returning the mode of the created field hash. C<_fieldhash>
does not erase the given hash.
Currently there is only one type of field hash, and only the boolean
value of the mode makes a difference, but that may change.
=head1 AUTHOR
Anno Siegel (ANNO) wrote the xs code and the changes in perl proper
Jerry Hedden (JDHEDDEN) made it faster
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2006-2007 by (Anno Siegel)
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.7 or,
at your option, any later version of Perl 5 you may have available.
=cut

View File

@@ -0,0 +1,292 @@
package I18N::Langinfo;
use 5.006;
use strict;
use warnings;
use Carp;
use Exporter 'import';
require XSLoader;
our @EXPORT = qw(langinfo);
our @EXPORT_OK = qw(
ABDAY_1
ABDAY_2
ABDAY_3
ABDAY_4
ABDAY_5
ABDAY_6
ABDAY_7
ABMON_1
ABMON_10
ABMON_11
ABMON_12
ABMON_2
ABMON_3
ABMON_4
ABMON_5
ABMON_6
ABMON_7
ABMON_8
ABMON_9
ALT_DIGITS
AM_STR
CODESET
CRNCYSTR
DAY_1
DAY_2
DAY_3
DAY_4
DAY_5
DAY_6
DAY_7
D_FMT
D_T_FMT
ERA
ERA_D_FMT
ERA_D_T_FMT
ERA_T_FMT
MON_1
MON_10
MON_11
MON_12
MON_2
MON_3
MON_4
MON_5
MON_6
MON_7
MON_8
MON_9
NOEXPR
NOSTR
PM_STR
RADIXCHAR
THOUSEP
T_FMT
T_FMT_AMPM
YESEXPR
YESSTR
);
our $VERSION = '0.22';
XSLoader::load();
1;
__END__
=head1 NAME
I18N::Langinfo - query locale information
=head1 SYNOPSIS
use I18N::Langinfo;
=head1 DESCRIPTION
The langinfo() function queries various locale information that can be
used to localize output and user interfaces. It uses the current underlying
locale, regardless of whether or not it was called from within the scope of
S<C<use locale>>. The langinfo() function requires
one numeric argument that identifies the locale constant to query:
if no argument is supplied, C<$_> is used. The numeric constants
appropriate to be used as arguments are exportable from I18N::Langinfo.
The following example will import the langinfo() function itself and
three constants to be used as arguments to langinfo(): a constant for
the abbreviated first day of the week (the numbering starts from
Sunday = 1) and two more constants for the affirmative and negative
answers for a yes/no question in the current locale.
use I18N::Langinfo qw(langinfo ABDAY_1 YESSTR NOSTR);
my ($abday_1, $yesstr, $nostr) =
map { langinfo($_) } (ABDAY_1, YESSTR, NOSTR);
print "$abday_1? [$yesstr/$nostr] ";
In other words, in the "C" (or English) locale the above will probably
print something like:
Sun? [yes/no]
but under a French locale
dim? [oui/non]
The usually available constants are as follows.
=over 4
=item *
For abbreviated and full length days of the week and months of the year:
ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5 ABDAY_6 ABDAY_7
ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12
DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7
MON_1 MON_2 MON_3 MON_4 MON_5 MON_6
MON_7 MON_8 MON_9 MON_10 MON_11 MON_12
=item *
For the date-time, date, and time formats used by the strftime() function
(see L<POSIX>):
D_T_FMT D_FMT T_FMT
=item *
For the locales for which it makes sense to have ante meridiem and post
meridiem time formats:
AM_STR PM_STR T_FMT_AMPM
=item *
For the character code set being used (such as "ISO8859-1", "cp850",
"koi8-r", "sjis", "utf8", etc.), and for the currency string:
CODESET CRNCYSTR
=item *
For an alternate representation of digits, for the
radix character used between the integer and the fractional part
of decimal numbers, the group separator string for large-ish floating point
numbers (yes, the final two are redundant with
L<POSIX::localeconv()|POSIX/localeconv>):
ALT_DIGITS RADIXCHAR THOUSEP
=item *
For the affirmative and negative responses and expressions:
YESSTR YESEXPR NOSTR NOEXPR
=item *
For the eras based on typically some ruler, such as the Japanese Emperor
(naturally only defined in the appropriate locales):
ERA ERA_D_FMT ERA_D_T_FMT ERA_T_FMT
=back
=head2 For systems without C<nl_langinfo>
This module originally was just a wrapper for the libc C<nl_langinfo>
function, and did not work on systems lacking it, such as Windows.
Starting in Perl 5.28, this module works on all platforms. When
C<nl_langinfo> is not available, it uses various methods to construct
what that function, if present, would return. But there are potential
glitches. These are the items that could be different:
=over
=item C<ERA>
Unimplemented, so returns C<"">.
=item C<CODESET>
This should work properly for Windows platforms. On almost all other modern
platforms, it will reliably return "UTF-8" if that is the code set.
Otherwise, it depends on the locale's name. If that is of the form
C<foo.bar>, it will assume C<bar> is the code set; and it also knows about the
two locales "C" and "POSIX". If none of those apply it returns C<"">.
=item C<YESEXPR>
=item C<YESSTR>
=item C<NOEXPR>
=item C<NOSTR>
Only the values for English are returned. C<YESSTR> and C<NOSTR> have been
removed from POSIX 2008, and are retained here for backwards compatibility.
Your platform's C<nl_langinfo> may not support them.
=item C<D_FMT>
Always evaluates to C<%x>, the locale's appropriate date representation.
=item C<T_FMT>
Always evaluates to C<%X>, the locale's appropriate time representation.
=item C<D_T_FMT>
Always evaluates to C<%c>, the locale's appropriate date and time
representation.
=item C<CRNCYSTR>
The return may be incorrect for those rare locales where the currency symbol
replaces the radix character. If you have examples of it needing to work
differently, please file a report at L<https://github.com/Perl/perl5/issues>.
=item C<ALT_DIGITS>
Currently this gives the same results as Linux does. If you have examples of
it needing to work differently, please file a report at
L<https://github.com/Perl/perl5/issues>.
=item C<ERA_D_FMT>
=item C<ERA_T_FMT>
=item C<ERA_D_T_FMT>
=item C<T_FMT_AMPM>
These are derived by using C<strftime()>, and not all versions of that function
know about them. C<""> is returned for these on such systems.
=back
See your L<nl_langinfo(3)> for more information about the available
constants. (Often this means having to look directly at the
F<langinfo.h> C header file.)
=head2 EXPORT
By default only the C<langinfo()> function is exported.
=head1 BUGS
Before Perl 5.28, the returned values are unreliable for the C<RADIXCHAR> and
C<THOUSEP> locale constants.
Starting in 5.28, changing locales on threaded builds is supported on systems
that offer thread-safe locale functions. These include POSIX 2008 systems and
Windows starting with Visual Studio 2005, and this module will work properly
in such situations. However, on threaded builds on Windows prior to Visual
Studio 2015, retrieving the items C<CRNCYSTR> and C<THOUSEP> can result in a
race with a thread that has converted to use the global locale. It is quite
uncommon for a thread to have done this. It would be possible to construct a
workaround for this; patches welcome: see L<perlapi/switch_to_global_locale>.
=head1 SEE ALSO
L<perllocale>, L<POSIX/localeconv>, L<POSIX/setlocale>, L<nl_langinfo(3)>.
=head1 AUTHOR
Jarkko Hietaniemi, E<lt>jhi@hut.fiE<gt>. Now maintained by Perl 5 porters.
=head1 COPYRIGHT AND LICENSE
Copyright 2001 by Jarkko Hietaniemi
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut

View File

@@ -0,0 +1,70 @@
#
package IO;
use XSLoader ();
use Carp;
use strict;
use warnings;
our $VERSION = "1.52";
XSLoader::load 'IO', $VERSION;
sub import {
shift;
warnings::warnif('deprecated', qq{Parameterless "use IO" deprecated})
if @_ == 0 ;
my @l = @_ ? @_ : qw(Handle Seekable File Pipe Socket Dir);
local @INC = @INC;
pop @INC if $INC[-1] eq '.';
eval join("", map { "require IO::" . (/(\w+)/)[0] . ";\n" } @l)
or croak $@;
}
1;
__END__
=head1 NAME
IO - load various IO modules
=head1 SYNOPSIS
use IO qw(Handle File); # loads IO modules, here IO::Handle, IO::File
use IO; # DEPRECATED
=head1 DESCRIPTION
C<IO> provides a simple mechanism to load several of the IO modules
in one go. The IO modules belonging to the core are:
IO::Handle
IO::Seekable
IO::File
IO::Pipe
IO::Socket
IO::Dir
IO::Select
IO::Poll
Some other IO modules don't belong to the perl core but can be loaded
as well if they have been installed from CPAN. You can discover which
ones exist with this query: L<https://metacpan.org/search?q=IO%3A%3A>.
For more information on any of these modules, please see its respective
documentation.
=head1 DEPRECATED
use IO; # loads all the modules listed below
The loaded modules are IO::Handle, IO::Seekable, IO::File, IO::Pipe,
IO::Socket, IO::Dir. You should instead explicitly import the IO
modules you want.
=cut

View File

@@ -0,0 +1,247 @@
# IO::Dir.pm
#
# Copyright (c) 1997-8 Graham Barr <gbarr@pobox.com>. All rights reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
package IO::Dir;
use 5.008_001;
use strict;
use Carp;
use Symbol;
use Exporter;
use IO::File;
use Tie::Hash;
use File::stat;
use File::Spec;
our @ISA = qw(Tie::Hash Exporter);
our $VERSION = "1.52";
our @EXPORT_OK = qw(DIR_UNLINK);
sub DIR_UNLINK () { 1 }
sub new {
@_ >= 1 && @_ <= 2 or croak 'usage: IO::Dir->new([DIRNAME])';
my $class = shift;
my $dh = gensym;
if (@_) {
IO::Dir::open($dh, $_[0])
or return undef;
}
bless $dh, $class;
}
sub DESTROY {
my ($dh) = @_;
local($., $@, $!, $^E, $?);
no warnings 'io';
closedir($dh);
}
sub open {
@_ == 2 or croak 'usage: $dh->open(DIRNAME)';
my ($dh, $dirname) = @_;
return undef
unless opendir($dh, $dirname);
# a dir name should always have a ":" in it; assume dirname is
# in current directory
$dirname = ':' . $dirname if ( ($^O eq 'MacOS') && ($dirname !~ /:/) );
${*$dh}{io_dir_path} = $dirname;
1;
}
sub close {
@_ == 1 or croak 'usage: $dh->close()';
my ($dh) = @_;
closedir($dh);
}
sub read {
@_ == 1 or croak 'usage: $dh->read()';
my ($dh) = @_;
readdir($dh);
}
sub seek {
@_ == 2 or croak 'usage: $dh->seek(POS)';
my ($dh,$pos) = @_;
seekdir($dh,$pos);
}
sub tell {
@_ == 1 or croak 'usage: $dh->tell()';
my ($dh) = @_;
telldir($dh);
}
sub rewind {
@_ == 1 or croak 'usage: $dh->rewind()';
my ($dh) = @_;
rewinddir($dh);
}
sub TIEHASH {
my($class,$dir,$options) = @_;
my $dh = $class->new($dir)
or return undef;
$options ||= 0;
${*$dh}{io_dir_unlink} = $options & DIR_UNLINK;
$dh;
}
sub FIRSTKEY {
my($dh) = @_;
$dh->rewind;
scalar $dh->read;
}
sub NEXTKEY {
my($dh) = @_;
scalar $dh->read;
}
sub EXISTS {
my($dh,$key) = @_;
-e File::Spec->catfile(${*$dh}{io_dir_path}, $key);
}
sub FETCH {
my($dh,$key) = @_;
&lstat(File::Spec->catfile(${*$dh}{io_dir_path}, $key));
}
sub STORE {
my($dh,$key,$data) = @_;
my($atime,$mtime) = ref($data) ? @$data : ($data,$data);
my $file = File::Spec->catfile(${*$dh}{io_dir_path}, $key);
unless(-e $file) {
my $io = IO::File->new($file,O_CREAT | O_RDWR);
$io->close if $io;
}
utime($atime,$mtime, $file);
}
sub DELETE {
my($dh,$key) = @_;
# Only unlink if unlink-ing is enabled
return 0
unless ${*$dh}{io_dir_unlink};
my $file = File::Spec->catfile(${*$dh}{io_dir_path}, $key);
-d $file
? rmdir($file)
: unlink($file);
}
1;
__END__
=head1 NAME
IO::Dir - supply object methods for directory handles
=head1 SYNOPSIS
use IO::Dir;
my $d = IO::Dir->new(".");
if (defined $d) {
while (defined($_ = $d->read)) { something($_); }
$d->rewind;
while (defined($_ = $d->read)) { something_else($_); }
undef $d;
}
tie my %dir, 'IO::Dir', ".";
foreach (keys %dir) {
print $_, " " , $dir{$_}->size,"\n";
}
=head1 DESCRIPTION
The C<IO::Dir> package provides two interfaces to perl's directory reading
routines.
The first interface is an object approach. C<IO::Dir> provides an object
constructor and methods, which are just wrappers around perl's built in
directory reading routines.
=over 4
=item new ( [ DIRNAME ] )
C<new> is the constructor for C<IO::Dir> objects. It accepts one optional
argument which, if given, C<new> will pass to C<open>
=back
The following methods are wrappers for the directory related functions built
into perl (the trailing 'dir' has been removed from the names). See L<perlfunc>
for details of these functions.
=over 4
=item open ( DIRNAME )
=item read ()
=item seek ( POS )
=item tell ()
=item rewind ()
=item close ()
=back
C<IO::Dir> also provides an interface to reading directories via a tied
hash. The tied hash extends the interface beyond just the directory
reading routines by the use of C<lstat>, from the C<File::stat> package,
C<unlink>, C<rmdir> and C<utime>.
=over 4
=item tie %hash, 'IO::Dir', DIRNAME [, OPTIONS ]
=back
The keys of the hash will be the names of the entries in the directory.
Reading a value from the hash will be the result of calling
C<File::stat::lstat>. Deleting an element from the hash will
delete the corresponding file or subdirectory,
provided that C<DIR_UNLINK> is included in the C<OPTIONS>.
Assigning to an entry in the hash will cause the time stamps of the file
to be modified. If the file does not exist then it will be created. Assigning
a single integer to a hash element will cause both the access and
modification times to be changed to that value. Alternatively a reference to
an array of two values can be passed. The first array element will be used to
set the access time and the second element will be used to set the modification
time.
=head1 SEE ALSO
L<File::stat>
=head1 AUTHOR
Graham Barr. Currently maintained by the Perl Porters. Please report all
bugs at L<https://github.com/Perl/perl5/issues>.
=head1 COPYRIGHT
Copyright (c) 1997-2003 Graham Barr <gbarr@pobox.com>. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut

View File

@@ -0,0 +1,202 @@
#
package IO::File;
=head1 NAME
IO::File - supply object methods for filehandles
=head1 SYNOPSIS
use IO::File;
my $fh = IO::File->new();
if ($fh->open("< file")) {
print <$fh>;
$fh->close;
}
my $fh = IO::File->new("> file");
if (defined $fh) {
print $fh "bar\n";
$fh->close;
}
my $fh = IO::File->new("file", "r");
if (defined $fh) {
print <$fh>;
undef $fh; # automatically closes the file
}
my $fh = IO::File->new("file", O_WRONLY|O_APPEND);
if (defined $fh) {
print $fh "corge\n";
my $pos = $fh->getpos;
$fh->setpos($pos);
undef $fh; # automatically closes the file
}
autoflush STDOUT 1;
=head1 DESCRIPTION
C<IO::File> inherits from C<IO::Handle> and C<IO::Seekable>. It extends
these classes with methods that are specific to file handles.
=head1 CONSTRUCTOR
=over 4
=item new ( FILENAME [,MODE [,PERMS]] )
Creates an C<IO::File>. If it receives any parameters, they are passed to
the method C<open>; if the open fails, the object is destroyed. Otherwise,
it is returned to the caller.
=item new_tmpfile
Creates an C<IO::File> opened for read/write on a newly created temporary
file. On systems where this is possible, the temporary file is anonymous
(i.e. it is unlinked after creation, but held open). If the temporary
file cannot be created or opened, the C<IO::File> object is destroyed.
Otherwise, it is returned to the caller.
=back
=head1 METHODS
=over 4
=item open( FILENAME [,MODE [,PERMS]] )
=item open( FILENAME, IOLAYERS )
C<open> accepts one, two or three parameters. With one parameter,
it is just a front end for the built-in C<open> function. With two or three
parameters, the first parameter is a filename that may include
whitespace or other special characters, and the second parameter is
the open mode, optionally followed by a file permission value.
If C<IO::File::open> receives a Perl mode string ("E<gt>", "+E<lt>", etc.)
or an ANSI C fopen() mode string ("w", "r+", etc.), it uses the basic
Perl C<open> operator (but protects any special characters).
If C<IO::File::open> is given a numeric mode, it passes that mode
and the optional permissions value to the Perl C<sysopen> operator.
The permissions default to 0666.
If C<IO::File::open> is given a mode that includes the C<:> character,
it passes all the three arguments to the three-argument C<open> operator.
For convenience, C<IO::File> exports the O_XXX constants from the
Fcntl module, if this module is available.
=item binmode( [LAYER] )
C<binmode> sets C<binmode> on the underlying C<IO> object, as documented
in C<perldoc -f binmode>.
C<binmode> accepts one optional parameter, which is the layer to be
passed on to the C<binmode> call.
=back
=head1 NOTE
Some operating systems may perform C<IO::File::new()> or C<IO::File::open()>
on a directory without errors. This behavior is not portable and not
suggested for use. Using C<opendir()> and C<readdir()> or C<IO::Dir> are
suggested instead.
=head1 SEE ALSO
L<perlfunc>,
L<perlop/"I/O Operators">,
L<IO::Handle>,
L<IO::Seekable>,
L<IO::Dir>
=head1 HISTORY
Derived from FileHandle.pm by Graham Barr E<lt>F<gbarr@pobox.com>E<gt>.
=cut
use 5.008_001;
use strict;
use Carp;
use Symbol;
use SelectSaver;
use IO::Seekable;
require Exporter;
our @ISA = qw(IO::Handle IO::Seekable Exporter);
our $VERSION = "1.52";
our @EXPORT = @IO::Seekable::EXPORT;
eval {
# Make all Fcntl O_XXX constants available for importing
require Fcntl;
my @O = grep /^O_/, @Fcntl::EXPORT;
Fcntl->import(@O); # first we import what we want to export
push(@EXPORT, @O);
};
################################################
## Constructor
##
sub new {
my $type = shift;
my $class = ref($type) || $type || "IO::File";
@_ >= 0 && @_ <= 3
or croak "usage: $class->new([FILENAME [,MODE [,PERMS]]])";
my $fh = $class->SUPER::new();
if (@_) {
$fh->open(@_)
or return undef;
}
$fh;
}
################################################
## Open
##
sub open {
@_ >= 2 && @_ <= 4 or croak 'usage: $fh->open(FILENAME [,MODE [,PERMS]])';
my ($fh, $file) = @_;
if (@_ > 2) {
my ($mode, $perms) = @_[2, 3];
if ($mode =~ /^\d+$/) {
defined $perms or $perms = 0666;
return sysopen($fh, $file, $mode, $perms);
} elsif ($mode =~ /:/) {
return open($fh, $mode, $file) if @_ == 3;
croak 'usage: $fh->open(FILENAME, IOLAYERS)';
} else {
return open($fh, IO::Handle::_open_mode_string($mode), $file);
}
}
open($fh, $file);
}
################################################
## Binmode
##
sub binmode {
( @_ == 1 or @_ == 2 ) or croak 'usage $fh->binmode([LAYER])';
my($fh, $layer) = @_;
return binmode $$fh unless $layer;
return binmode $$fh, $layer;
}
1;

View File

@@ -0,0 +1,631 @@
package IO::Handle;
=head1 NAME
IO::Handle - supply object methods for I/O handles
=head1 SYNOPSIS
use IO::Handle;
my $io = IO::Handle->new();
if ($io->fdopen(fileno(STDIN),"r")) {
print $io->getline;
$io->close;
}
my $io = IO::Handle->new();
if ($io->fdopen(fileno(STDOUT),"w")) {
$io->print("Some text\n");
}
# setvbuf is not available by default on Perls 5.8.0 and later.
use IO::Handle '_IOLBF';
$io->setvbuf(my $buffer_var, _IOLBF, 1024);
undef $io; # automatically closes the file if it's open
autoflush STDOUT 1;
=head1 DESCRIPTION
C<IO::Handle> is the base class for all other IO handle classes. It is
not intended that objects of C<IO::Handle> would be created directly,
but instead C<IO::Handle> is inherited from by several other classes
in the IO hierarchy.
If you are reading this documentation, looking for a replacement for
the C<FileHandle> package, then I suggest you read the documentation
for C<IO::File> too.
=head1 CONSTRUCTOR
=over 4
=item new ()
Creates a new C<IO::Handle> object.
=item new_from_fd ( FD, MODE )
Creates an C<IO::Handle> like C<new> does.
It requires two parameters, which are passed to the method C<fdopen>;
if the fdopen fails, the object is destroyed. Otherwise, it is returned
to the caller.
=back
=head1 METHODS
See L<perlfunc> for complete descriptions of each of the following
supported C<IO::Handle> methods, which are just front ends for the
corresponding built-in functions:
$io->close
$io->eof
$io->fcntl( FUNCTION, SCALAR )
$io->fileno
$io->format_write( [FORMAT_NAME] )
$io->getc
$io->ioctl( FUNCTION, SCALAR )
$io->read ( BUF, LEN, [OFFSET] )
$io->print ( ARGS )
$io->printf ( FMT, [ARGS] )
$io->say ( ARGS )
$io->stat
$io->sysread ( BUF, LEN, [OFFSET] )
$io->syswrite ( BUF, [LEN, [OFFSET]] )
$io->truncate ( LEN )
See L<perlvar> for complete descriptions of each of the following
supported C<IO::Handle> methods. All of them return the previous
value of the attribute and takes an optional single argument that when
given will set the value. If no argument is given the previous value
is unchanged (except for $io->autoflush will actually turn ON
autoflush by default).
$io->autoflush ( [BOOL] ) $|
$io->format_page_number( [NUM] ) $%
$io->format_lines_per_page( [NUM] ) $=
$io->format_lines_left( [NUM] ) $-
$io->format_name( [STR] ) $~
$io->format_top_name( [STR] ) $^
$io->input_line_number( [NUM]) $.
The following methods are not supported on a per-filehandle basis.
IO::Handle->format_line_break_characters( [STR] ) $:
IO::Handle->format_formfeed( [STR]) $^L
IO::Handle->output_field_separator( [STR] ) $,
IO::Handle->output_record_separator( [STR] ) $\
IO::Handle->input_record_separator( [STR] ) $/
Furthermore, for doing normal I/O you might need these:
=over 4
=item $io->fdopen ( FD, MODE )
C<fdopen> is like an ordinary C<open> except that its first parameter
is not a filename but rather a file handle name, an IO::Handle object,
or a file descriptor number. (For the documentation of the C<open>
method, see L<IO::File>.)
=item $io->opened
Returns true if the object is currently a valid file descriptor, false
otherwise.
=item $io->getline
This works like <$io> described in L<perlop/"I/O Operators">
except that it's more readable and can be safely called in a
list context but still returns just one line. If used as the conditional
within a C<while> or C-style C<for> loop, however, you will need to
emulate the functionality of <$io> with C<< defined($_ = $io->getline) >>.
=item $io->getlines
This works like <$io> when called in a list context to read all
the remaining lines in a file, except that it's more readable.
It will also croak() if accidentally called in a scalar context.
=item $io->ungetc ( ORD )
Pushes a character with the given ordinal value back onto the given
handle's input stream. Only one character of pushback per handle is
guaranteed.
=item $io->write ( BUF, LEN [, OFFSET ] )
This C<write> is somewhat like C<write> found in C, in that it is the
opposite of read. The wrapper for the perl C<write> function is
called C<format_write>. However, whilst the C C<write> function returns
the number of bytes written, this C<write> function simply returns true
if successful (like C<print>). A more C-like C<write> is C<syswrite>
(see above).
=item $io->error
Returns a true value if the given handle has experienced any errors
since it was opened or since the last call to C<clearerr>, or if the
handle is invalid. It only returns false for a valid handle with no
outstanding errors.
=item $io->clearerr
Clear the given handle's error indicator. Returns -1 if the handle is
invalid, 0 otherwise.
=item $io->sync
C<sync> synchronizes a file's in-memory state with that on the
physical medium. C<sync> does not operate at the perlio api level, but
operates on the file descriptor (similar to sysread, sysseek and
systell). This means that any data held at the perlio api level will not
be synchronized. To synchronize data that is buffered at the perlio api
level you must use the flush method. C<sync> is not implemented on all
platforms. Returns "0 but true" on success, C<undef> on error, C<undef>
for an invalid handle. See L<fsync(3c)>.
=item $io->flush
C<flush> causes perl to flush any buffered data at the perlio api level.
Any unread data in the buffer will be discarded, and any unwritten data
will be written to the underlying file descriptor. Returns "0 but true"
on success, C<undef> on error.
=item $io->printflush ( ARGS )
Turns on autoflush, print ARGS and then restores the autoflush status of the
C<IO::Handle> object. Returns the return value from print.
=item $io->blocking ( [ BOOL ] )
If called with an argument C<blocking> will turn on non-blocking IO if
C<BOOL> is false, and turn it off if C<BOOL> is true.
C<blocking> will return the value of the previous setting, or the
current setting if C<BOOL> is not given.
If an error occurs C<blocking> will return undef and C<$!> will be set.
=back
If the C functions setbuf() and/or setvbuf() are available, then
C<IO::Handle::setbuf> and C<IO::Handle::setvbuf> set the buffering
policy for an IO::Handle. The calling sequences for the Perl functions
are the same as their C counterparts--including the constants C<_IOFBF>,
C<_IOLBF>, and C<_IONBF> for setvbuf()--except that the buffer parameter
specifies a scalar variable to use as a buffer. You should only
change the buffer before any I/O, or immediately after calling flush.
WARNING: The IO::Handle::setvbuf() is not available by default on
Perls 5.8.0 and later because setvbuf() is rather specific to using
the stdio library, while Perl prefers the new perlio subsystem instead.
WARNING: A variable used as a buffer by C<setbuf> or C<setvbuf> B<must not
be modified> in any way until the IO::Handle is closed or C<setbuf> or
C<setvbuf> is called again, or memory corruption may result! Remember that
the order of global destruction is undefined, so even if your buffer
variable remains in scope until program termination, it may be undefined
before the file IO::Handle is closed. Note that you need to import the
constants C<_IOFBF>, C<_IOLBF>, and C<_IONBF> explicitly. Like C, setbuf
returns nothing. setvbuf returns "0 but true", on success, C<undef> on
failure.
Lastly, there is a special method for working under B<-T> and setuid/gid
scripts:
=over 4
=item $io->untaint
Marks the object as taint-clean, and as such data read from it will also
be considered taint-clean. Note that this is a very trusting action to
take, and appropriate consideration for the data source and potential
vulnerability should be kept in mind. Returns 0 on success, -1 if setting
the taint-clean flag failed. (eg invalid handle)
=back
=head1 NOTE
An C<IO::Handle> object is a reference to a symbol/GLOB reference (see
the L<Symbol> package). Some modules that
inherit from C<IO::Handle> may want to keep object related variables
in the hash table part of the GLOB. In an attempt to prevent modules
trampling on each other I propose the that any such module should prefix
its variables with its own name separated by _'s. For example the IO::Socket
module keeps a C<timeout> variable in 'io_socket_timeout'.
=head1 SEE ALSO
L<perlfunc>,
L<perlop/"I/O Operators">,
L<IO::File>
=head1 BUGS
Due to backwards compatibility, all filehandles resemble objects
of class C<IO::Handle>, or actually classes derived from that class.
They actually aren't. Which means you can't derive your own
class from C<IO::Handle> and inherit those methods.
=head1 HISTORY
Derived from FileHandle.pm by Graham Barr E<lt>F<gbarr@pobox.com>E<gt>
=cut
use 5.008_001;
use strict;
use Carp;
use Symbol;
use SelectSaver;
use IO (); # Load the XS module
require Exporter;
our @ISA = qw(Exporter);
our $VERSION = "1.52";
our @EXPORT_OK = qw(
autoflush
output_field_separator
output_record_separator
input_record_separator
input_line_number
format_page_number
format_lines_per_page
format_lines_left
format_name
format_top_name
format_line_break_characters
format_formfeed
format_write
print
printf
say
getline
getlines
printflush
flush
SEEK_SET
SEEK_CUR
SEEK_END
_IOFBF
_IOLBF
_IONBF
);
################################################
## Constructors, destructors.
##
sub new {
my $class = ref($_[0]) || $_[0] || "IO::Handle";
if (@_ != 1) {
# Since perl will automatically require IO::File if needed, but
# also initialises IO::File's @ISA as part of the core we must
# ensure IO::File is loaded if IO::Handle is. This avoids effect-
# ively "half-loading" IO::File.
if ($] > 5.013 && $class eq 'IO::File' && !$INC{"IO/File.pm"}) {
require IO::File;
shift;
return IO::File::->new(@_);
}
croak "usage: $class->new()";
}
my $io = gensym;
bless $io, $class;
}
sub new_from_fd {
my $class = ref($_[0]) || $_[0] || "IO::Handle";
@_ == 3 or croak "usage: $class->new_from_fd(FD, MODE)";
my $io = gensym;
shift;
IO::Handle::fdopen($io, @_)
or return undef;
bless $io, $class;
}
#
# There is no need for DESTROY to do anything, because when the
# last reference to an IO object is gone, Perl automatically
# closes its associated files (if any). However, to avoid any
# attempts to autoload DESTROY, we here define it to do nothing.
#
sub DESTROY {}
################################################
## Open and close.
##
sub _open_mode_string {
my ($mode) = @_;
$mode =~ /^\+?(<|>>?)$/
or $mode =~ s/^r(\+?)$/$1</
or $mode =~ s/^w(\+?)$/$1>/
or $mode =~ s/^a(\+?)$/$1>>/
or croak "IO::Handle: bad open mode: $mode";
$mode;
}
sub fdopen {
@_ == 3 or croak 'usage: $io->fdopen(FD, MODE)';
my ($io, $fd, $mode) = @_;
local(*GLOB);
if (ref($fd) && "$fd" =~ /GLOB\(/o) {
# It's a glob reference; Alias it as we cannot get name of anon GLOBs
my $n = qualify(*GLOB);
*GLOB = *{*$fd};
$fd = $n;
} elsif ($fd =~ m#^\d+$#) {
# It's an FD number; prefix with "=".
$fd = "=$fd";
}
open($io, _open_mode_string($mode) . '&' . $fd)
? $io : undef;
}
sub close {
@_ == 1 or croak 'usage: $io->close()';
my($io) = @_;
close($io);
}
################################################
## Normal I/O functions.
##
# flock
# select
sub opened {
@_ == 1 or croak 'usage: $io->opened()';
defined fileno($_[0]);
}
sub fileno {
@_ == 1 or croak 'usage: $io->fileno()';
fileno($_[0]);
}
sub getc {
@_ == 1 or croak 'usage: $io->getc()';
getc($_[0]);
}
sub eof {
@_ == 1 or croak 'usage: $io->eof()';
eof($_[0]);
}
sub print {
@_ or croak 'usage: $io->print(ARGS)';
my $this = shift;
print $this @_;
}
sub printf {
@_ >= 2 or croak 'usage: $io->printf(FMT,[ARGS])';
my $this = shift;
printf $this @_;
}
sub say {
@_ or croak 'usage: $io->say(ARGS)';
my $this = shift;
local $\ = "\n";
print $this @_;
}
sub truncate {
@_ == 2 or croak 'usage: $io->truncate(LEN)';
truncate($_[0], $_[1]);
}
sub read {
@_ == 3 || @_ == 4 or croak 'usage: $io->read(BUF, LEN [, OFFSET])';
read($_[0], $_[1], $_[2], $_[3] || 0);
}
sub sysread {
@_ == 3 || @_ == 4 or croak 'usage: $io->sysread(BUF, LEN [, OFFSET])';
sysread($_[0], $_[1], $_[2], $_[3] || 0);
}
sub write {
@_ >= 2 && @_ <= 4 or croak 'usage: $io->write(BUF [, LEN [, OFFSET]])';
local($\) = "";
$_[2] = length($_[1]) unless defined $_[2];
print { $_[0] } substr($_[1], $_[3] || 0, $_[2]);
}
sub syswrite {
@_ >= 2 && @_ <= 4 or croak 'usage: $io->syswrite(BUF [, LEN [, OFFSET]])';
if (defined($_[2])) {
syswrite($_[0], $_[1], $_[2], $_[3] || 0);
} else {
syswrite($_[0], $_[1]);
}
}
sub stat {
@_ == 1 or croak 'usage: $io->stat()';
stat($_[0]);
}
################################################
## State modification functions.
##
sub autoflush {
my $old = SelectSaver->new(qualify($_[0], caller));
my $prev = $|;
$| = @_ > 1 ? $_[1] : 1;
$prev;
}
sub output_field_separator {
carp "output_field_separator is not supported on a per-handle basis"
if ref($_[0]);
my $prev = $,;
$, = $_[1] if @_ > 1;
$prev;
}
sub output_record_separator {
carp "output_record_separator is not supported on a per-handle basis"
if ref($_[0]);
my $prev = $\;
$\ = $_[1] if @_ > 1;
$prev;
}
sub input_record_separator {
carp "input_record_separator is not supported on a per-handle basis"
if ref($_[0]);
my $prev = $/;
$/ = $_[1] if @_ > 1;
$prev;
}
sub input_line_number {
local $.;
() = tell qualify($_[0], caller) if ref($_[0]);
my $prev = $.;
$. = $_[1] if @_ > 1;
$prev;
}
sub format_page_number {
my $old;
$old = SelectSaver->new(qualify($_[0], caller)) if ref($_[0]);
my $prev = $%;
$% = $_[1] if @_ > 1;
$prev;
}
sub format_lines_per_page {
my $old;
$old = SelectSaver->new(qualify($_[0], caller)) if ref($_[0]);
my $prev = $=;
$= = $_[1] if @_ > 1;
$prev;
}
sub format_lines_left {
my $old;
$old = SelectSaver->new(qualify($_[0], caller)) if ref($_[0]);
my $prev = $-;
$- = $_[1] if @_ > 1;
$prev;
}
sub format_name {
my $old;
$old = SelectSaver->new(qualify($_[0], caller)) if ref($_[0]);
my $prev = $~;
$~ = qualify($_[1], caller) if @_ > 1;
$prev;
}
sub format_top_name {
my $old;
$old = SelectSaver->new(qualify($_[0], caller)) if ref($_[0]);
my $prev = $^;
$^ = qualify($_[1], caller) if @_ > 1;
$prev;
}
sub format_line_break_characters {
carp "format_line_break_characters is not supported on a per-handle basis"
if ref($_[0]);
my $prev = $:;
$: = $_[1] if @_ > 1;
$prev;
}
sub format_formfeed {
carp "format_formfeed is not supported on a per-handle basis"
if ref($_[0]);
my $prev = $^L;
$^L = $_[1] if @_ > 1;
$prev;
}
sub formline {
my $io = shift;
my $picture = shift;
local($^A) = $^A;
local($\) = "";
formline($picture, @_);
print $io $^A;
}
sub format_write {
@_ < 3 || croak 'usage: $io->write( [FORMAT_NAME] )';
if (@_ == 2) {
my ($io, $fmt) = @_;
my $oldfmt = $io->format_name(qualify($fmt,caller));
CORE::write($io);
$io->format_name($oldfmt);
} else {
CORE::write($_[0]);
}
}
sub fcntl {
@_ == 3 || croak 'usage: $io->fcntl( OP, VALUE );';
my ($io, $op) = @_;
return fcntl($io, $op, $_[2]);
}
sub ioctl {
@_ == 3 || croak 'usage: $io->ioctl( OP, VALUE );';
my ($io, $op) = @_;
return ioctl($io, $op, $_[2]);
}
# this sub is for compatibility with older releases of IO that used
# a sub called constant to determine if a constant existed -- GMB
#
# The SEEK_* and _IO?BF constants were the only constants at that time
# any new code should just check defined(&CONSTANT_NAME)
sub constant {
no strict 'refs';
my $name = shift;
(($name =~ /^(SEEK_(SET|CUR|END)|_IO[FLN]BF)$/) && defined &{$name})
? &{$name}() : undef;
}
# so that flush.pl can be deprecated
sub printflush {
my $io = shift;
my $old;
$old = SelectSaver->new(qualify($io, caller)) if ref($io);
local $| = 1;
if(ref($io)) {
print $io @_;
}
else {
print @_;
}
}
1;

View File

@@ -0,0 +1,256 @@
# IO::Pipe.pm
#
# Copyright (c) 1996-8 Graham Barr <gbarr@pobox.com>. All rights reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
package IO::Pipe;
use 5.008_001;
use IO::Handle;
use strict;
use Carp;
use Symbol;
our $VERSION = "1.52";
sub new {
my $type = shift;
my $class = ref($type) || $type || "IO::Pipe";
@_ == 0 || @_ == 2 or croak "usage: $class->([READFH, WRITEFH])";
my $me = bless gensym(), $class;
my($readfh,$writefh) = @_ ? @_ : $me->handles;
pipe($readfh, $writefh)
or return undef;
@{*$me} = ($readfh, $writefh);
$me;
}
sub handles {
@_ == 1 or croak 'usage: $pipe->handles()';
(IO::Pipe::End->new(), IO::Pipe::End->new());
}
my $do_spawn = $^O eq 'os2' || $^O eq 'MSWin32';
sub _doit {
my $me = shift;
my $rw = shift;
my $pid = $do_spawn ? 0 : fork();
if($pid) { # Parent
return $pid;
}
elsif(defined $pid) { # Child or spawn
my $fh;
my $io = $rw ? \*STDIN : \*STDOUT;
my ($mode, $save) = $rw ? "r" : "w";
if ($do_spawn) {
require Fcntl;
$save = IO::Handle->new_from_fd($io, $mode);
my $handle = shift;
# Close in child:
unless ($^O eq 'MSWin32') {
fcntl($handle, Fcntl::F_SETFD(), 1) or croak "fcntl: $!";
}
$fh = $rw ? ${*$me}[0] : ${*$me}[1];
} else {
shift;
$fh = $rw ? $me->reader() : $me->writer(); # close the other end
}
bless $io, "IO::Handle";
$io->fdopen($fh, $mode);
$fh->close;
if ($do_spawn) {
$pid = eval { system 1, @_ }; # 1 == P_NOWAIT
my $err = $!;
$io->fdopen($save, $mode);
$save->close or croak "Cannot close $!";
croak "IO::Pipe: Cannot spawn-NOWAIT: $err" if not $pid or $pid < 0;
return $pid;
} else {
exec @_ or
croak "IO::Pipe: Cannot exec: $!";
}
}
else {
croak "IO::Pipe: Cannot fork: $!";
}
# NOT Reached
}
sub reader {
@_ >= 1 or croak 'usage: $pipe->reader( [SUB_COMMAND_ARGS] )';
my $me = shift;
return undef
unless(ref($me) || ref($me = $me->new));
my $fh = ${*$me}[0];
my $pid;
$pid = $me->_doit(0, $fh, @_)
if(@_);
close ${*$me}[1];
bless $me, ref($fh);
*$me = *$fh; # Alias self to handle
$me->fdopen($fh->fileno,"r")
unless defined($me->fileno);
bless $fh; # Really wan't un-bless here
${*$me}{'io_pipe_pid'} = $pid
if defined $pid;
$me;
}
sub writer {
@_ >= 1 or croak 'usage: $pipe->writer( [SUB_COMMAND_ARGS] )';
my $me = shift;
return undef
unless(ref($me) || ref($me = $me->new));
my $fh = ${*$me}[1];
my $pid;
$pid = $me->_doit(1, $fh, @_)
if(@_);
close ${*$me}[0];
bless $me, ref($fh);
*$me = *$fh; # Alias self to handle
$me->fdopen($fh->fileno,"w")
unless defined($me->fileno);
bless $fh; # Really wan't un-bless here
${*$me}{'io_pipe_pid'} = $pid
if defined $pid;
$me;
}
package IO::Pipe::End;
our(@ISA);
@ISA = qw(IO::Handle);
sub close {
my $fh = shift;
my $r = $fh->SUPER::close(@_);
waitpid(${*$fh}{'io_pipe_pid'},0)
if(defined ${*$fh}{'io_pipe_pid'});
$r;
}
1;
__END__
=head1 NAME
IO::Pipe - supply object methods for pipes
=head1 SYNOPSIS
use IO::Pipe;
$pipe = IO::Pipe->new();
if($pid = fork()) { # Parent
$pipe->reader();
while(<$pipe>) {
...
}
}
elsif(defined $pid) { # Child
$pipe->writer();
print $pipe ...
}
or
$pipe = IO::Pipe->new();
$pipe->reader(qw(ls -l));
while(<$pipe>) {
...
}
=head1 DESCRIPTION
C<IO::Pipe> provides an interface to creating pipes between
processes.
=head1 CONSTRUCTOR
=over 4
=item new ( [READER, WRITER] )
Creates an C<IO::Pipe>, which is a reference to a newly created symbol
(see the L<Symbol> package). C<IO::Pipe::new> optionally takes two
arguments, which should be objects blessed into C<IO::Handle>, or a
subclass thereof. These two objects will be used for the system call
to C<pipe>. If no arguments are given then method C<handles> is called
on the new C<IO::Pipe> object.
These two handles are held in the array part of the GLOB until either
C<reader> or C<writer> is called.
=back
=head1 METHODS
=over 4
=item reader ([ARGS])
The object is re-blessed into a sub-class of C<IO::Handle>, and becomes a
handle at the reading end of the pipe. If C<ARGS> are given then C<fork>
is called and C<ARGS> are passed to exec.
=item writer ([ARGS])
The object is re-blessed into a sub-class of C<IO::Handle>, and becomes a
handle at the writing end of the pipe. If C<ARGS> are given then C<fork>
is called and C<ARGS> are passed to exec.
=item handles ()
This method is called during construction by C<IO::Pipe::new>
on the newly created C<IO::Pipe> object. It returns an array of two objects
blessed into C<IO::Pipe::End>, or a subclass thereof.
=back
=head1 SEE ALSO
L<IO::Handle>
=head1 AUTHOR
Graham Barr. Currently maintained by the Perl Porters. Please report all
bugs at L<https://github.com/Perl/perl5/issues>.
=head1 COPYRIGHT
Copyright (c) 1996-8 Graham Barr <gbarr@pobox.com>. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut

View File

@@ -0,0 +1,208 @@
# IO::Poll.pm
#
# Copyright (c) 1997-8 Graham Barr <gbarr@pobox.com>. All rights reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
package IO::Poll;
use strict;
use IO::Handle;
use Exporter ();
our @ISA = qw(Exporter);
our $VERSION = "1.52";
our @EXPORT = qw( POLLIN
POLLOUT
POLLERR
POLLHUP
POLLNVAL
);
our @EXPORT_OK = qw(
POLLPRI
POLLRDNORM
POLLWRNORM
POLLRDBAND
POLLWRBAND
POLLNORM
);
# [0] maps fd's to requested masks
# [1] maps fd's to returned masks
# [2] maps fd's to handles
sub new {
my $class = shift;
my $self = bless [{},{},{}], $class;
$self;
}
sub mask {
my $self = shift;
my $io = shift;
my $fd = fileno($io);
return unless defined $fd;
if (@_) {
my $mask = shift;
if($mask) {
$self->[0]{$fd}{$io} = $mask; # the error events are always returned
$self->[1]{$fd} = 0; # output mask
$self->[2]{$io} = $io; # remember handle
} else {
delete $self->[0]{$fd}{$io};
unless(%{$self->[0]{$fd}}) {
# We no longer have any handles for this FD
delete $self->[1]{$fd};
delete $self->[0]{$fd};
}
delete $self->[2]{$io};
}
}
return unless exists $self->[0]{$fd} and exists $self->[0]{$fd}{$io};
return $self->[0]{$fd}{$io};
}
sub poll {
my($self,$timeout) = @_;
$self->[1] = {};
my($fd,$mask,$iom);
my @poll = ();
while(($fd,$iom) = each %{$self->[0]}) {
$mask = 0;
$mask |= $_ for values(%$iom);
push(@poll,$fd => $mask);
}
my $ret = _poll(defined($timeout) ? $timeout * 1000 : -1,@poll);
return $ret
unless $ret > 0;
while(@poll) {
my($fd,$got) = splice(@poll,0,2);
$self->[1]{$fd} = $got if $got;
}
return $ret;
}
sub events {
my $self = shift;
my $io = shift;
my $fd = fileno($io);
exists $self->[1]{$fd} and exists $self->[0]{$fd}{$io}
? $self->[1]{$fd} & ($self->[0]{$fd}{$io}|POLLHUP|POLLERR|POLLNVAL)
: 0;
}
sub remove {
my $self = shift;
my $io = shift;
$self->mask($io,0);
}
sub handles {
my $self = shift;
return values %{$self->[2]} unless @_;
my $events = shift || 0;
my($fd,$ev,$io,$mask);
my @handles = ();
while(($fd,$ev) = each %{$self->[1]}) {
while (($io,$mask) = each %{$self->[0]{$fd}}) {
$mask |= POLLHUP|POLLERR|POLLNVAL; # must allow these
push @handles,$self->[2]{$io} if ($ev & $mask) & $events;
}
}
return @handles;
}
1;
__END__
=head1 NAME
IO::Poll - Object interface to system poll call
=head1 SYNOPSIS
use IO::Poll qw(POLLRDNORM POLLWRNORM POLLIN POLLHUP);
$poll = IO::Poll->new();
$poll->mask($input_handle => POLLIN);
$poll->mask($output_handle => POLLOUT);
$poll->poll($timeout);
$ev = $poll->events($input);
=head1 DESCRIPTION
C<IO::Poll> is a simple interface to the system level poll routine.
=head1 METHODS
=over 4
=item mask ( IO [, EVENT_MASK ] )
If EVENT_MASK is given, then, if EVENT_MASK is non-zero, IO is added to the
list of file descriptors and the next call to poll will check for
any event specified in EVENT_MASK. If EVENT_MASK is zero then IO will be
removed from the list of file descriptors.
If EVENT_MASK is not given then the return value will be the current
event mask value for IO.
=item poll ( [ TIMEOUT ] )
Call the system level poll routine. If TIMEOUT is not specified then the
call will block. Returns the number of handles which had events
happen, or -1 on error.
=item events ( IO )
Returns the event mask which represents the events that happened on IO
during the last call to C<poll>.
=item remove ( IO )
Remove IO from the list of file descriptors for the next poll.
=item handles( [ EVENT_MASK ] )
Returns a list of handles. If EVENT_MASK is not given then a list of all
handles known will be returned. If EVENT_MASK is given then a list
of handles will be returned which had one of the events specified by
EVENT_MASK happen during the last call ti C<poll>
=back
=head1 SEE ALSO
L<poll(2)>, L<IO::Handle>, L<IO::Select>
=head1 AUTHOR
Graham Barr. Currently maintained by the Perl Porters. Please report all
bugs at L<https://github.com/Perl/perl5/issues>.
=head1 COPYRIGHT
Copyright (c) 1997-8 Graham Barr <gbarr@pobox.com>. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut

View File

@@ -0,0 +1,126 @@
#
package IO::Seekable;
=head1 NAME
IO::Seekable - supply seek based methods for I/O objects
=head1 SYNOPSIS
use IO::Seekable;
package IO::Something;
@ISA = qw(IO::Seekable);
=head1 DESCRIPTION
C<IO::Seekable> does not have a constructor of its own as it is intended to
be inherited by other C<IO::Handle> based objects. It provides methods
which allow seeking of the file descriptors.
=over 4
=item $io->getpos
Returns an opaque value that represents the current position of the
IO::File, or C<undef> if this is not possible (eg an unseekable stream such
as a terminal, pipe or socket). If the fgetpos() function is available in
your C library it is used to implements getpos, else perl emulates getpos
using C's ftell() function.
=item $io->setpos
Uses the value of a previous getpos call to return to a previously visited
position. Returns "0 but true" on success, C<undef> on failure.
=back
See L<perlfunc> for complete descriptions of each of the following
supported C<IO::Seekable> methods, which are just front ends for the
corresponding built-in functions:
=over 4
=item $io->seek ( POS, WHENCE )
Seek the IO::File to position POS, relative to WHENCE:
=over 8
=item WHENCE=0 (SEEK_SET)
POS is absolute position. (Seek relative to the start of the file)
=item WHENCE=1 (SEEK_CUR)
POS is an offset from the current position. (Seek relative to current)
=item WHENCE=2 (SEEK_END)
POS is an offset from the end of the file. (Seek relative to end)
=back
The SEEK_* constants can be imported from the C<Fcntl> module if you
don't wish to use the numbers C<0> C<1> or C<2> in your code.
Returns C<1> upon success, C<0> otherwise.
=item $io->sysseek( POS, WHENCE )
Similar to $io->seek, but sets the IO::File's position using the system
call lseek(2) directly, so will confuse most perl IO operators except
sysread and syswrite (see L<perlfunc> for full details)
Returns the new position, or C<undef> on failure. A position
of zero is returned as the string C<"0 but true">
=item $io->tell
Returns the IO::File's current position, or -1 on error.
=back
=head1 SEE ALSO
L<perlfunc>,
L<perlop/"I/O Operators">,
L<IO::Handle>
L<IO::File>
=head1 HISTORY
Derived from FileHandle.pm by Graham Barr E<lt>gbarr@pobox.comE<gt>
=cut
use 5.008_001;
use Carp;
use strict;
use IO::Handle ();
# XXX we can't get these from IO::Handle or we'll get prototype
# mismatch warnings on C<use POSIX; use IO::File;> :-(
use Fcntl qw(SEEK_SET SEEK_CUR SEEK_END);
require Exporter;
our @EXPORT = qw(SEEK_SET SEEK_CUR SEEK_END);
our @ISA = qw(Exporter);
our $VERSION = "1.52";
sub seek {
@_ == 3 or croak 'usage: $io->seek(POS, WHENCE)';
seek($_[0], $_[1], $_[2]);
}
sub sysseek {
@_ == 3 or croak 'usage: $io->sysseek(POS, WHENCE)';
sysseek($_[0], $_[1], $_[2]);
}
sub tell {
@_ == 1 or croak 'usage: $io->tell()';
tell($_[0]);
}
1;

View File

@@ -0,0 +1,417 @@
# IO::Select.pm
#
# Copyright (c) 1997-8 Graham Barr <gbarr@pobox.com>. All rights reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
package IO::Select;
use strict;
use warnings::register;
require Exporter;
our $VERSION = "1.52";
our @ISA = qw(Exporter); # This is only so we can do version checking
sub VEC_BITS () {0}
sub FD_COUNT () {1}
sub FIRST_FD () {2}
sub new
{
my $self = shift;
my $type = ref($self) || $self;
my $vec = bless [undef,0], $type;
$vec->add(@_)
if @_;
$vec;
}
sub add
{
shift->_update('add', @_);
}
sub remove
{
shift->_update('remove', @_);
}
sub exists
{
my $vec = shift;
my $fno = $vec->_fileno(shift);
return undef unless defined $fno;
$vec->[$fno + FIRST_FD];
}
sub _fileno
{
my($self, $f) = @_;
return unless defined $f;
$f = $f->[0] if ref($f) eq 'ARRAY';
if($f =~ /^[0-9]+$/) { # plain file number
return $f;
}
elsif(defined(my $fd = fileno($f))) {
return $fd;
}
else {
# Neither a plain file number nor an opened filehandle; but maybe it was
# previously registered and has since been closed. ->remove still wants to
# know what fileno it had
foreach my $i ( FIRST_FD .. $#$self ) {
return $i - FIRST_FD if defined $self->[$i] && $self->[$i] == $f;
}
return undef;
}
}
sub _update
{
my $vec = shift;
my $add = shift eq 'add';
my $bits = $vec->[VEC_BITS];
$bits = '' unless defined $bits;
my $count = 0;
my $f;
foreach $f (@_)
{
my $fn = $vec->_fileno($f);
if ($add) {
next unless defined $fn;
my $i = $fn + FIRST_FD;
if (defined $vec->[$i]) {
$vec->[$i] = $f; # if array rest might be different, so we update
next;
}
$vec->[FD_COUNT]++;
vec($bits, $fn, 1) = 1;
$vec->[$i] = $f;
} else { # remove
if ( ! defined $fn ) { # remove if fileno undef'd
$fn = 0;
for my $fe (@{$vec}[FIRST_FD .. $#$vec]) {
if (defined($fe) && $fe == $f) {
$vec->[FD_COUNT]--;
$fe = undef;
vec($bits, $fn, 1) = 0;
last;
}
++$fn;
}
}
else {
my $i = $fn + FIRST_FD;
next unless defined $vec->[$i];
$vec->[FD_COUNT]--;
vec($bits, $fn, 1) = 0;
$vec->[$i] = undef;
}
}
$count++;
}
$vec->[VEC_BITS] = $vec->[FD_COUNT] ? $bits : undef;
$count;
}
sub can_read
{
my $vec = shift;
my $timeout = shift;
my $r = $vec->[VEC_BITS];
defined($r) && (select($r,undef,undef,$timeout) > 0)
? handles($vec, $r)
: ();
}
sub can_write
{
my $vec = shift;
my $timeout = shift;
my $w = $vec->[VEC_BITS];
defined($w) && (select(undef,$w,undef,$timeout) > 0)
? handles($vec, $w)
: ();
}
sub has_exception
{
my $vec = shift;
my $timeout = shift;
my $e = $vec->[VEC_BITS];
defined($e) && (select(undef,undef,$e,$timeout) > 0)
? handles($vec, $e)
: ();
}
sub has_error
{
warnings::warn("Call to deprecated method 'has_error', use 'has_exception'")
if warnings::enabled();
goto &has_exception;
}
sub count
{
my $vec = shift;
$vec->[FD_COUNT];
}
sub bits
{
my $vec = shift;
$vec->[VEC_BITS];
}
sub as_string # for debugging
{
my $vec = shift;
my $str = ref($vec) . ": ";
my $bits = $vec->bits;
my $count = $vec->count;
$str .= defined($bits) ? unpack("b*", $bits) : "undef";
$str .= " $count";
my @handles = @$vec;
splice(@handles, 0, FIRST_FD);
for (@handles) {
$str .= " " . (defined($_) ? "$_" : "-");
}
$str;
}
sub _max
{
my($a,$b,$c) = @_;
$a > $b
? $a > $c
? $a
: $c
: $b > $c
? $b
: $c;
}
sub select
{
shift
if defined $_[0] && !ref($_[0]);
my($r,$w,$e,$t) = @_;
my @result = ();
my $rb = defined $r ? $r->[VEC_BITS] : undef;
my $wb = defined $w ? $w->[VEC_BITS] : undef;
my $eb = defined $e ? $e->[VEC_BITS] : undef;
if(select($rb,$wb,$eb,$t) > 0)
{
my @r = ();
my @w = ();
my @e = ();
my $i = _max(defined $r ? scalar(@$r)-1 : 0,
defined $w ? scalar(@$w)-1 : 0,
defined $e ? scalar(@$e)-1 : 0);
for( ; $i >= FIRST_FD ; $i--)
{
my $j = $i - FIRST_FD;
push(@r, $r->[$i])
if defined $rb && defined $r->[$i] && vec($rb, $j, 1);
push(@w, $w->[$i])
if defined $wb && defined $w->[$i] && vec($wb, $j, 1);
push(@e, $e->[$i])
if defined $eb && defined $e->[$i] && vec($eb, $j, 1);
}
@result = (\@r, \@w, \@e);
}
@result;
}
sub handles
{
my $vec = shift;
my $bits = shift;
my @h = ();
my $i;
my $max = scalar(@$vec) - 1;
for ($i = FIRST_FD; $i <= $max; $i++)
{
next unless defined $vec->[$i];
push(@h, $vec->[$i])
if !defined($bits) || vec($bits, $i - FIRST_FD, 1);
}
@h;
}
1;
__END__
=head1 NAME
IO::Select - OO interface to the select system call
=head1 SYNOPSIS
use IO::Select;
$s = IO::Select->new();
$s->add(\*STDIN);
$s->add($some_handle);
@ready = $s->can_read($timeout);
@ready = IO::Select->new(@handles)->can_read(0);
=head1 DESCRIPTION
The C<IO::Select> package implements an object approach to the system C<select>
function call. It allows the user to see what IO handles, see L<IO::Handle>,
are ready for reading, writing or have an exception pending.
=head1 CONSTRUCTOR
=over 4
=item new ( [ HANDLES ] )
The constructor creates a new object and optionally initialises it with a set
of handles.
=back
=head1 METHODS
=over 4
=item add ( HANDLES )
Add the list of handles to the C<IO::Select> object. It is these values that
will be returned when an event occurs. C<IO::Select> keeps these values in a
cache which is indexed by the C<fileno> of the handle, so if more than one
handle with the same C<fileno> is specified then only the last one is cached.
Each handle can be an C<IO::Handle> object, an integer or an array
reference where the first element is an C<IO::Handle> or an integer.
=item remove ( HANDLES )
Remove all the given handles from the object. This method also works
by the C<fileno> of the handles. So the exact handles that were added
need not be passed, just handles that have an equivalent C<fileno>
=item exists ( HANDLE )
Returns a true value (actually the handle itself) if it is present.
Returns undef otherwise.
=item handles
Return an array of all registered handles.
=item can_read ( [ TIMEOUT ] )
Return an array of handles that are ready for reading. C<TIMEOUT> is the
maximum amount of time to wait before returning an empty list (with C<$!>
unchanged), in seconds, possibly fractional. If C<TIMEOUT> is not given
and any handles are registered then the call will block indefinitely.
Upon error, an empty list is returned, with C<$!> set to indicate the
error. To distinguish between timeout and error, set C<$!> to zero
before calling this method, and check it after an empty list is returned.
=item can_write ( [ TIMEOUT ] )
Same as C<can_read> except check for handles that can be written to.
=item has_exception ( [ TIMEOUT ] )
Same as C<can_read> except check for handles that have an exception
condition, for example pending out-of-band data.
=item count ()
Returns the number of handles that the object will check for when
one of the C<can_> methods is called or the object is passed to
the C<select> static method.
=item bits()
Return the bit string suitable as argument to the core select() call.
=item select ( READ, WRITE, EXCEPTION [, TIMEOUT ] )
C<select> is a static method, that is you call it with the package name
like C<new>. C<READ>, C<WRITE> and C<EXCEPTION> are either C<undef> or
C<IO::Select> objects. C<TIMEOUT> is optional and has the same effect as
for the core select call.
If at least one handle is ready for the specified kind of operation,
the result will be an array of 3 elements, each a reference to an array
which will hold the handles that are ready for reading, writing and
have exceptions respectively. Upon timeout, an empty list is returned,
with C<$!> unchanged. Upon error, an empty list is returned, with C<$!>
set to indicate the error. To distinguish between timeout and error,
set C<$!> to zero before calling this method, and check it after an
empty list is returned.
=back
=head1 EXAMPLE
Here is a short example which shows how C<IO::Select> could be used
to write a server which communicates with several sockets while also
listening for more connections on a listen socket
use IO::Select;
use IO::Socket;
$lsn = IO::Socket::INET->new(Listen => 1, LocalPort => 8080);
$sel = IO::Select->new( $lsn );
while(@ready = $sel->can_read) {
foreach $fh (@ready) {
if($fh == $lsn) {
# Create a new socket
$new = $lsn->accept;
$sel->add($new);
}
else {
# Process socket
# Maybe we have finished with the socket
$sel->remove($fh);
$fh->close;
}
}
}
=head1 AUTHOR
Graham Barr. Currently maintained by the Perl Porters. Please report all
bugs at L<https://github.com/Perl/perl5/issues>.
=head1 COPYRIGHT
Copyright (c) 1997-8 Graham Barr <gbarr@pobox.com>. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut

View File

@@ -0,0 +1,933 @@
# IO::Socket.pm
#
# Copyright (c) 1997-8 Graham Barr <gbarr@pobox.com>. All rights reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
package IO::Socket;
use 5.008_001;
use IO::Handle;
use Socket 1.3;
use Carp;
use strict;
use Exporter;
use Errno;
# legacy
require IO::Socket::INET;
require IO::Socket::UNIX if ($^O ne 'epoc' && $^O ne 'symbian');
our @ISA = qw(IO::Handle);
our $VERSION = "1.52";
our @EXPORT_OK = qw(sockatmark);
our $errstr;
sub import {
my $pkg = shift;
if (@_ && $_[0] eq 'sockatmark') { # not very extensible but for now, fast
Exporter::export_to_level('IO::Socket', 1, $pkg, 'sockatmark');
} else {
my $callpkg = caller;
Exporter::export 'Socket', $callpkg, @_;
}
}
sub new {
my($class,%arg) = @_;
my $sock = $class->SUPER::new();
$sock->autoflush(1);
${*$sock}{'io_socket_timeout'} = delete $arg{Timeout};
return scalar(%arg) ? $sock->configure(\%arg)
: $sock;
}
my @domain2pkg;
sub register_domain {
my($p,$d) = @_;
$domain2pkg[$d] = $p;
}
sub configure {
my($sock,$arg) = @_;
my $domain = delete $arg->{Domain};
croak 'IO::Socket: Cannot configure a generic socket'
unless defined $domain;
croak "IO::Socket: Unsupported socket domain"
unless defined $domain2pkg[$domain];
croak "IO::Socket: Cannot configure socket in domain '$domain'"
unless ref($sock) eq "IO::Socket";
bless($sock, $domain2pkg[$domain]);
$sock->configure($arg);
}
sub socket {
@_ == 4 or croak 'usage: $sock->socket(DOMAIN, TYPE, PROTOCOL)';
my($sock,$domain,$type,$protocol) = @_;
socket($sock,$domain,$type,$protocol) or
return undef;
${*$sock}{'io_socket_domain'} = $domain;
${*$sock}{'io_socket_type'} = $type;
# "A value of 0 for protocol will let the system select an
# appropriate protocol"
# so we need to look up what the system selected,
# not cache PF_UNSPEC.
${*$sock}{'io_socket_proto'} = $protocol if $protocol;
$sock;
}
sub socketpair {
@_ == 4 || croak 'usage: IO::Socket->socketpair(DOMAIN, TYPE, PROTOCOL)';
my($class,$domain,$type,$protocol) = @_;
my $sock1 = $class->new();
my $sock2 = $class->new();
socketpair($sock1,$sock2,$domain,$type,$protocol) or
return ();
${*$sock1}{'io_socket_type'} = ${*$sock2}{'io_socket_type'} = $type;
${*$sock1}{'io_socket_proto'} = ${*$sock2}{'io_socket_proto'} = $protocol;
($sock1,$sock2);
}
sub connect {
@_ == 2 or croak 'usage: $sock->connect(NAME)';
my $sock = shift;
my $addr = shift;
my $timeout = ${*$sock}{'io_socket_timeout'};
my $err;
my $blocking;
$blocking = $sock->blocking(0) if $timeout;
if (!connect($sock, $addr)) {
if (defined $timeout && ($!{EINPROGRESS} || $!{EWOULDBLOCK})) {
require IO::Select;
my $sel = IO::Select->new( $sock );
undef $!;
my($r,$w,$e) = IO::Select::select(undef,$sel,$sel,$timeout);
if(@$e[0]) {
# Windows return from select after the timeout in case of
# WSAECONNREFUSED(10061) if exception set is not used.
# This behavior is different from Linux.
# Using the exception
# set we now emulate the behavior in Linux
# - Karthik Rajagopalan
$err = $sock->getsockopt(SOL_SOCKET,SO_ERROR);
$errstr = $@ = "connect: $err";
}
elsif(!@$w[0]) {
$err = $! || (exists &Errno::ETIMEDOUT ? &Errno::ETIMEDOUT : 1);
$errstr = $@ = "connect: timeout";
}
elsif (!connect($sock,$addr) &&
not ($!{EISCONN} || ($^O eq 'MSWin32' &&
($! == (($] < 5.019004) ? 10022 : Errno::EINVAL))))
) {
# Some systems refuse to re-connect() to
# an already open socket and set errno to EISCONN.
# Windows sets errno to WSAEINVAL (10022) (pre-5.19.4) or
# EINVAL (22) (5.19.4 onwards).
$err = $!;
$errstr = $@ = "connect: $!";
}
}
elsif ($blocking || !($!{EINPROGRESS} || $!{EWOULDBLOCK})) {
$err = $!;
$errstr = $@ = "connect: $!";
}
}
$sock->blocking(1) if $blocking;
$! = $err if $err;
$err ? undef : $sock;
}
# Enable/disable blocking IO on sockets.
# Without args return the current status of blocking,
# with args change the mode as appropriate, returning the
# old setting, or in case of error during the mode change
# undef.
sub blocking {
my $sock = shift;
return $sock->SUPER::blocking(@_)
if $^O ne 'MSWin32' && $^O ne 'VMS';
# Windows handles blocking differently
#
# http://groups.google.co.uk/group/perl.perl5.porters/browse_thread/thread/b4e2b1d88280ddff/630b667a66e3509f?#630b667a66e3509f
# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/winsock/winsock/ioctlsocket_2.asp
#
# 0x8004667e is FIONBIO
#
# which is used to set blocking behaviour.
# NOTE:
# This is a little confusing, the perl keyword for this is
# 'blocking' but the OS level behaviour is 'non-blocking', probably
# because sockets are blocking by default.
# Therefore internally we have to reverse the semantics.
my $orig= !${*$sock}{io_sock_nonblocking};
return $orig unless @_;
my $block = shift;
if ( !$block != !$orig ) {
${*$sock}{io_sock_nonblocking} = $block ? 0 : 1;
ioctl($sock, 0x8004667e, pack("L!",${*$sock}{io_sock_nonblocking}))
or return undef;
}
return $orig;
}
sub close {
@_ == 1 or croak 'usage: $sock->close()';
my $sock = shift;
${*$sock}{'io_socket_peername'} = undef;
$sock->SUPER::close();
}
sub bind {
@_ == 2 or croak 'usage: $sock->bind(NAME)';
my $sock = shift;
my $addr = shift;
return bind($sock, $addr) ? $sock
: undef;
}
sub listen {
@_ >= 1 && @_ <= 2 or croak 'usage: $sock->listen([QUEUE])';
my($sock,$queue) = @_;
$queue = 5
unless $queue && $queue > 0;
return listen($sock, $queue) ? $sock
: undef;
}
sub accept {
@_ == 1 || @_ == 2 or croak 'usage $sock->accept([PKG])';
my $sock = shift;
my $pkg = shift || $sock;
my $timeout = ${*$sock}{'io_socket_timeout'};
my $new = $pkg->new(Timeout => $timeout);
my $peer = undef;
if(defined $timeout) {
require IO::Select;
my $sel = IO::Select->new( $sock );
unless ($sel->can_read($timeout)) {
$errstr = $@ = 'accept: timeout';
$! = (exists &Errno::ETIMEDOUT ? &Errno::ETIMEDOUT : 1);
return;
}
}
$peer = accept($new,$sock)
or return;
${*$new}{$_} = ${*$sock}{$_} for qw( io_socket_domain io_socket_type io_socket_proto );
return wantarray ? ($new, $peer)
: $new;
}
sub sockname {
@_ == 1 or croak 'usage: $sock->sockname()';
getsockname($_[0]);
}
sub peername {
@_ == 1 or croak 'usage: $sock->peername()';
my($sock) = @_;
${*$sock}{'io_socket_peername'} ||= getpeername($sock);
}
sub connected {
@_ == 1 or croak 'usage: $sock->connected()';
my($sock) = @_;
getpeername($sock);
}
sub send {
@_ >= 2 && @_ <= 4 or croak 'usage: $sock->send(BUF, [FLAGS, [TO]])';
my $sock = $_[0];
my $flags = $_[2] || 0;
my $peer;
if ($_[3]) {
# the caller explicitly requested a TO, so use it
# this is non-portable for "connected" UDP sockets
$peer = $_[3];
}
elsif (!defined getpeername($sock)) {
# we're not connected, so we require a peer from somewhere
$peer = $sock->peername;
croak 'send: Cannot determine peer address'
unless(defined $peer);
}
my $r = $peer
? send($sock, $_[1], $flags, $peer)
: send($sock, $_[1], $flags);
# remember who we send to, if it was successful
${*$sock}{'io_socket_peername'} = $peer
if(@_ == 4 && defined $r);
$r;
}
sub recv {
@_ == 3 || @_ == 4 or croak 'usage: $sock->recv(BUF, LEN [, FLAGS])';
my $sock = $_[0];
my $len = $_[2];
my $flags = $_[3] || 0;
# remember who we recv'd from
${*$sock}{'io_socket_peername'} = recv($sock, $_[1]='', $len, $flags);
}
sub shutdown {
@_ == 2 or croak 'usage: $sock->shutdown(HOW)';
my($sock, $how) = @_;
${*$sock}{'io_socket_peername'} = undef;
shutdown($sock, $how);
}
sub setsockopt {
@_ == 4 or croak '$sock->setsockopt(LEVEL, OPTNAME, OPTVAL)';
setsockopt($_[0],$_[1],$_[2],$_[3]);
}
my $intsize = length(pack("i",0));
sub getsockopt {
@_ == 3 or croak '$sock->getsockopt(LEVEL, OPTNAME)';
my $r = getsockopt($_[0],$_[1],$_[2]);
# Just a guess
$r = unpack("i", $r)
if(defined $r && length($r) == $intsize);
$r;
}
sub sockopt {
my $sock = shift;
@_ == 1 ? $sock->getsockopt(SOL_SOCKET,@_)
: $sock->setsockopt(SOL_SOCKET,@_);
}
sub atmark {
@_ == 1 or croak 'usage: $sock->atmark()';
my($sock) = @_;
sockatmark($sock);
}
sub timeout {
@_ == 1 || @_ == 2 or croak 'usage: $sock->timeout([VALUE])';
my($sock,$val) = @_;
my $r = ${*$sock}{'io_socket_timeout'};
${*$sock}{'io_socket_timeout'} = defined $val ? 0 + $val : $val
if(@_ == 2);
$r;
}
sub sockdomain {
@_ == 1 or croak 'usage: $sock->sockdomain()';
my $sock = shift;
if (!defined(${*$sock}{'io_socket_domain'})) {
my $addr = $sock->sockname();
${*$sock}{'io_socket_domain'} = sockaddr_family($addr)
if (defined($addr));
}
${*$sock}{'io_socket_domain'};
}
sub socktype {
@_ == 1 or croak 'usage: $sock->socktype()';
my $sock = shift;
${*$sock}{'io_socket_type'} = $sock->sockopt(Socket::SO_TYPE)
if (!defined(${*$sock}{'io_socket_type'}) && defined(eval{Socket::SO_TYPE}));
${*$sock}{'io_socket_type'}
}
sub protocol {
@_ == 1 or croak 'usage: $sock->protocol()';
my($sock) = @_;
${*$sock}{'io_socket_proto'} = $sock->sockopt(Socket::SO_PROTOCOL)
if (!defined(${*$sock}{'io_socket_proto'}) && defined(eval{Socket::SO_PROTOCOL}));
${*$sock}{'io_socket_proto'};
}
1;
__END__
=head1 NAME
IO::Socket - Object interface to socket communications
=head1 SYNOPSIS
use strict;
use warnings;
use IO::Socket qw(AF_INET AF_UNIX);
# create a new AF_INET socket
my $sock = IO::Socket->new(Domain => AF_INET);
# which is the same as
$sock = IO::Socket::INET->new();
# create a new AF_UNIX socket
$sock = IO::Socket->new(Domain => AF_UNIX);
# which is the same as
$sock = IO::Socket::UNIX->new();
=head1 DESCRIPTION
C<IO::Socket> provides an object-oriented, L<IO::Handle>-based interface to
creating and using sockets via L<Socket>, which provides a near one-to-one
interface to the C socket library.
C<IO::Socket> is a base class that really only defines methods for those
operations which are common to all types of sockets. Operations which are
specific to a particular socket domain have methods defined in subclasses of
C<IO::Socket>. See L<IO::Socket::INET>, L<IO::Socket::UNIX>, and
L<IO::Socket::IP> for examples of such a subclass.
C<IO::Socket> will export all functions (and constants) defined by L<Socket>.
=head1 CONSTRUCTOR ARGUMENTS
Given that C<IO::Socket> doesn't have attributes in the traditional sense, the
following arguments, rather than attributes, can be passed into the
constructor.
Constructor arguments should be passed in C<< Key => 'Value' >> pairs.
The only required argument is L<IO::Socket/"Domain">.
=head2 Blocking
my $sock = IO::Socket->new(..., Blocking => 1);
$sock = IO::Socket->new(..., Blocking => 0);
If defined but false, the socket will be set to non-blocking mode. If not
specified it defaults to C<1> (blocking mode).
=head2 Domain
my $sock = IO::Socket->new(Domain => IO::Socket::AF_INET);
$sock = IO::Socket->new(Domain => IO::Socket::AF_UNIX);
The socket domain will define which subclass of C<IO::Socket> to use. The two
options available along with this distribution are C<AF_INET> and C<AF_UNIX>.
C<AF_INET> is for the internet address family of sockets and is handled via
L<IO::Socket::INET>. C<AF_INET> sockets are bound to an internet address and
port.
C<AF_UNIX> is for the unix domain socket and is handled via
L<IO::Socket::UNIX>. C<AF_UNIX> sockets are bound to the file system as their
address name space.
This argument is B<required>. All other arguments are optional.
=head2 Listen
my $sock = IO::Socket->new(..., Listen => 5);
Listen should be an integer value or left unset.
If provided, this argument will place the socket into listening mode. New
connections can then be accepted using the L<IO::Socket/"accept"> method. The
value given is used as the C<listen(2)> queue size.
If the C<Listen> argument is given, but false, the queue size will be set to
5.
=head2 Timeout
my $sock = IO::Socket->new(..., Timeout => 5);
The timeout value, in seconds, for this socket connection. How exactly this
value is utilized is defined in the socket domain subclasses that make use of
the value.
=head2 Type
my $sock = IO::Socket->new(..., Type => IO::Socket::SOCK_STREAM);
The socket type that will be used. These are usually C<SOCK_STREAM>,
C<SOCK_DGRAM>, or C<SOCK_RAW>. If this argument is left undefined an attempt
will be made to infer the type from the service name.
For example, you'll usually use C<SOCK_STREAM> with a C<tcp> connection and
C<SOCK_DGRAM> with a C<udp> connection.
=head1 CONSTRUCTORS
C<IO::Socket> extends the L<IO::Handle> constructor.
=head2 new
my $sock = IO::Socket->new();
# get a new IO::Socket::INET instance
$sock = IO::Socket->new(Domain => IO::Socket::AF_INET);
# get a new IO::Socket::UNIX instance
$sock = IO::Socket->new(Domain => IO::Socket::AF_UNIX);
# Domain is the only required argument
$sock = IO::Socket->new(
Domain => IO::Socket::AF_INET, # AF_INET, AF_UNIX
Type => IO::Socket::SOCK_STREAM, # SOCK_STREAM, SOCK_DGRAM, ...
Proto => 'tcp', # 'tcp', 'udp', IPPROTO_TCP, IPPROTO_UDP
# and so on...
);
Creates an C<IO::Socket>, which is a reference to a newly created symbol (see
the L<Symbol> package). C<new> optionally takes arguments, these arguments
are defined in L<IO::Socket/"CONSTRUCTOR ARGUMENTS">.
Any of the L<IO::Socket/"CONSTRUCTOR ARGUMENTS"> may be passed to the
constructor, but if any arguments are provided, then one of them must be
the L<IO::Socket/"Domain"> argument. The L<IO::Socket/"Domain"> argument can,
by default, be either C<AF_INET> or C<AF_UNIX>. Other domains can be used if a
proper subclass for the domain family is registered. All other arguments will
be passed to the C<configuration> method of the package for that domain.
If the constructor fails it will return C<undef> and set the C<$errstr> package
variable to contain an error message.
$sock = IO::Socket->new(...)
or die "Cannot create socket - $IO::Socket::errstr\n";
For legacy reasons the error message is also set into the global C<$@>
variable, and you may still find older code which looks here instead.
$sock = IO::Socket->new(...)
or die "Cannot create socket - $@\n";
=head1 METHODS
C<IO::Socket> inherits all methods from L<IO::Handle> and implements the
following new ones.
=head2 accept
my $client_sock = $sock->accept();
my $inet_sock = $sock->accept('IO::Socket::INET');
The accept method will perform the system call C<accept> on the socket and
return a new object. The new object will be created in the same class as the
listen socket, unless a specific package name is specified. This object can be
used to communicate with the client that was trying to connect.
This differs slightly from the C<accept> function in L<perlfunc>.
In a scalar context the new socket is returned, or C<undef> upon
failure. In a list context a two-element array is returned containing
the new socket and the peer address; the list will be empty upon failure.
=head2 atmark
my $integer = $sock->atmark();
# read in some data on a given socket
my $data;
$sock->read($data, 1024) until $sock->atmark;
# or, export the function to use:
use IO::Socket 'sockatmark';
$sock->read($data, 1024) until sockatmark($sock);
True if the socket is currently positioned at the urgent data mark, false
otherwise. If your system doesn't yet implement C<sockatmark> this will throw
an exception.
If your system does not support C<sockatmark>, the C<use> declaration will
fail at compile time.
=head2 autoflush
# by default, autoflush will be turned on when referenced
$sock->autoflush(); # turns on autoflush
# turn off autoflush
$sock->autoflush(0);
# turn on autoflush
$sock->autoflush(1);
This attribute isn't overridden from L<IO::Handle>'s implementation. However,
since we turn it on by default, it's worth mentioning here.
=head2 bind
use Socket qw(pack_sockaddr_in);
my $port = 3000;
my $ip_address = '0.0.0.0';
my $packed_addr = pack_sockaddr_in($port, $ip_address);
$sock->bind($packed_addr);
Binds a network address to a socket, just as C<bind(2)> does. Returns true if
it succeeded, false otherwise. You should provide a packed address of the
appropriate type for the socket.
=head2 connected
my $peer_addr = $sock->connected();
if ($peer_addr) {
say "We're connected to $peer_addr";
}
If the socket is in a connected state, the peer address is returned. If the
socket is not in a connected state, C<undef> is returned.
Note that this method considers a half-open TCP socket to be "in a connected
state". Specifically, it does not distinguish between the
B<ESTABLISHED> and B<CLOSE-WAIT> TCP states; it returns the peer address,
rather than C<undef>, in either case. Thus, in general, it cannot
be used to reliably learn whether the peer has initiated a graceful shutdown
because in most cases (see below) the local TCP state machine remains in
B<CLOSE-WAIT> until the local application calls L<IO::Socket/"shutdown"> or
C<close>. Only at that point does this function return C<undef>.
The "in most cases" hedge is because local TCP state machine behavior may
depend on the peer's socket options. In particular, if the peer socket has
C<SO_LINGER> enabled with a zero timeout, then the peer's C<close> will
generate a C<RST> segment. Upon receipt of that segment, the local TCP
transitions immediately to B<CLOSED>, and in that state, this method I<will>
return C<undef>.
=head2 getsockopt
my $value = $sock->getsockopt(SOL_SOCKET, SO_REUSEADDR);
my $buf = $socket->getsockopt(SOL_SOCKET, SO_RCVBUF);
say "Receive buffer is $buf bytes";
Get an option associated with the socket. Levels other than C<SOL_SOCKET>
may be specified here. As a convenience, this method will unpack a byte buffer
of the correct size back into a number.
=head2 listen
$sock->listen(5);
Does the same thing that the C<listen(2)> system call does. Returns true if it
succeeded, false otherwise. Listens to a socket with a given queue size.
=head2 peername
my $sockaddr_in = $sock->peername();
Returns the packed C<sockaddr> address of the other end of the socket
connection. It calls C<getpeername>.
=head2 protocol
my $proto = $sock->protocol();
Returns the number for the protocol being used on the socket, if
known. If the protocol is unknown, as with an C<AF_UNIX> socket, zero
is returned.
=head2 recv
my $buffer = "";
my $length = 1024;
my $flags = 0; # default. optional
$sock->recv($buffer, $length);
$sock->recv($buffer, $length, $flags);
Similar in functionality to L<perlfunc/recv>.
Receives a message on a socket. Attempts to receive C<$length> characters of
data into C<$buffer> from the specified socket. C<$buffer> will be grown or
shrunk to the length actually read. Takes the same flags as the system call of
the same name. Returns the address of the sender if socket's protocol supports
this; returns an empty string otherwise. If there's an error, returns
C<undef>. This call is actually implemented in terms of the C<recvfrom(2)>
system call.
Flags are ORed together values, such as C<MSG_BCAST>, C<MSG_OOB>,
C<MSG_TRUNC>. The default value for the flags is C<0>.
The cached value of L<IO::Socket/"peername"> is updated with the result of
C<recv>.
B<Note:> In Perl v5.30 and newer, if the socket has been marked as C<:utf8>,
C<recv> will throw an exception. The C<:encoding(...)> layer implicitly
introduces the C<:utf8> layer. See L<perlfunc/binmode>.
B<Note:> In Perl versions older than v5.30, depending on the status of the
socket, either (8-bit) bytes or characters are received. By default all
sockets operate on bytes, but for example if the socket has been changed
using L<perlfunc/binmode> to operate with the C<:encoding(UTF-8)> I/O layer
(see the L<perlfunc/open> pragma), the I/O will operate on UTF8-encoded
Unicode characters, not bytes. Similarly for the C<:encoding> layer: in
that case pretty much any characters can be read.
=head2 send
my $message = "Hello, world!";
my $flags = 0; # defaults to zero
my $to = '0.0.0.0'; # optional destination
my $sent = $sock->send($message);
$sent = $sock->send($message, $flags);
$sent = $sock->send($message, $flags, $to);
Similar in functionality to L<perlfunc/send>.
Sends a message on a socket. Attempts to send the scalar message to the
socket. Takes the same flags as the system call of the same name. On
unconnected sockets, you must specify a destination to send to, in which case
it does a C<sendto(2)> syscall. Returns the number of characters sent, or
C<undef> on error. The C<sendmsg(2)> syscall is currently unimplemented.
The C<flags> option is optional and defaults to C<0>.
After a successful send with C<$to>, further calls to C<send> on an
unconnected socket without C<$to> will send to the same address, and C<$to>
will be used as the result of L<IO::Socket/"peername">.
B<Note:> In Perl v5.30 and newer, if the socket has been marked as C<:utf8>,
C<send> will throw an exception. The C<:encoding(...)> layer implicitly
introduces the C<:utf8> layer. See L<perlfunc/binmode>.
B<Note:> In Perl versions older than v5.30, depending on the status of the
socket, either (8-bit) bytes or characters are sent. By default all
sockets operate on bytes, but for example if the socket has been changed
using L<perlfunc/binmode> to operate with the C<:encoding(UTF-8)> I/O layer
(see the L<perlfunc/open> pragma), the I/O will operate on UTF8-encoded
Unicode characters, not bytes. Similarly for the C<:encoding> layer: in
that case pretty much any characters can be sent.
=head2 setsockopt
$sock->setsockopt(SOL_SOCKET, SO_REUSEADDR, 1);
$sock->setsockopt(SOL_SOCKET, SO_RCVBUF, 64*1024);
Set option associated with the socket. Levels other than C<SOL_SOCKET>
may be specified here. As a convenience, this method will convert a number
into a packed byte buffer.
=head2 shutdown
$sock->shutdown(SHUT_RD); # we stopped reading data
$sock->shutdown(SHUT_WR); # we stopped writing data
$sock->shutdown(SHUT_RDWR); # we stopped using this socket
Shuts down a socket connection in the manner indicated by the value passed in,
which has the same interpretation as in the syscall of the same name.
This is useful with sockets when you want to tell the other side you're done
writing but not done reading, or vice versa. It's also a more insistent form
of C<close> because it also disables the file descriptor in any
forked copies in other processes.
Returns C<1> for success; on error, returns C<undef> if the socket is
not a valid filehandle, or returns C<0> and sets C<$!> for any other failure.
=head2 sockdomain
my $domain = $sock->sockdomain();
Returns the number for the socket domain type. For example, for
an C<AF_INET> socket the value of C<&AF_INET> will be returned.
=head2 socket
my $sock = IO::Socket->new(); # no values given
# now let's actually get a socket with the socket method
# domain, type, and protocol are required
$sock = $sock->socket(AF_INET, SOCK_STREAM, 'tcp');
Opens a socket of the specified kind and returns it. Domain, type, and
protocol are specified the same as for the syscall of the same name.
=head2 socketpair
my ($r, $w) = $sock->socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC);
($r, $w) = IO::Socket::UNIX
->socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC);
Will return a list of two sockets created (read and write), or an empty list
on failure.
Differs slightly from C<socketpair> in L<perlfunc> in that the argument list
is a bit simpler.
=head2 sockname
my $packed_addr = $sock->sockname();
Returns the packed C<sockaddr> address of this end of the connection. It's the
same as C<getsockname(2)>.
=head2 sockopt
my $value = $sock->sockopt(SO_REUSEADDR);
$sock->sockopt(SO_REUSEADDR, 1);
Unified method to both set and get options in the C<SOL_SOCKET> level. If
called with one argument then L<IO::Socket/"getsockopt"> is called, otherwise
L<IO::Socket/"setsockopt"> is called.
=head2 socktype
my $type = $sock->socktype();
Returns the number for the socket type. For example, for
a C<SOCK_STREAM> socket the value of C<&SOCK_STREAM> will be returned.
=head2 timeout
my $seconds = $sock->timeout();
my $old_val = $sock->timeout(5); # set new and return old value
Set or get the timeout value (in seconds) associated with this socket.
If called without any arguments then the current setting is returned. If
called with an argument the current setting is changed and the previous
value returned.
This method is available to all C<IO::Socket> implementations but may or may
not be used by the individual domain subclasses.
=head1 EXAMPLES
Let's create a TCP server on C<localhost:3333>.
use strict;
use warnings;
use feature 'say';
use IO::Socket qw(AF_INET AF_UNIX SOCK_STREAM SHUT_WR);
my $server = IO::Socket->new(
Domain => AF_INET,
Type => SOCK_STREAM,
Proto => 'tcp',
LocalHost => '0.0.0.0',
LocalPort => 3333,
ReusePort => 1,
Listen => 5,
) || die "Can't open socket: $IO::Socket::errstr";
say "Waiting on 3333";
while (1) {
# waiting for a new client connection
my $client = $server->accept();
# get information about a newly connected client
my $client_address = $client->peerhost();
my $client_port = $client->peerport();
say "Connection from $client_address:$client_port";
# read up to 1024 characters from the connected client
my $data = "";
$client->recv($data, 1024);
say "received data: $data";
# write response data to the connected client
$data = "ok";
$client->send($data);
# notify client that response has been sent
$client->shutdown(SHUT_WR);
}
$server->close();
A client for such a server could be
use strict;
use warnings;
use feature 'say';
use IO::Socket qw(AF_INET AF_UNIX SOCK_STREAM SHUT_WR);
my $client = IO::Socket->new(
Domain => AF_INET,
Type => SOCK_STREAM,
proto => 'tcp',
PeerPort => 3333,
PeerHost => '0.0.0.0',
) || die "Can't open socket: $IO::Socket::errstr";
say "Sending Hello World!";
my $size = $client->send("Hello World!");
say "Sent data of length: $size";
$client->shutdown(SHUT_WR);
my $buffer;
$client->recv($buffer, 1024);
say "Got back $buffer";
$client->close();
=head1 LIMITATIONS
On some systems, for an IO::Socket object created with C<new_from_fd>,
or created with L<IO::Socket/"accept"> from such an object, the
L<IO::Socket/"protocol">, L<IO::Socket/"sockdomain"> and
L<IO::Socket/"socktype"> methods may return C<undef>.
=head1 SEE ALSO
L<Socket>, L<IO::Handle>, L<IO::Socket::INET>, L<IO::Socket::UNIX>,
L<IO::Socket::IP>
=head1 AUTHOR
Graham Barr. atmark() by Lincoln Stein. Currently maintained by the Perl 5
Porters. Please report all bugs at L<https://github.com/Perl/perl5/issues>.
=head1 COPYRIGHT
Copyright (c) 1997-8 Graham Barr <gbarr@pobox.com>. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
The atmark() implementation: Copyright 2001, Lincoln Stein <lstein@cshl.org>.
This module is distributed under the same terms as Perl itself.
Feel free to use, modify and redistribute it as long as you retain
the correct attribution.
=cut

View File

@@ -0,0 +1,471 @@
# IO::Socket::INET.pm
#
# Copyright (c) 1997-8 Graham Barr <gbarr@pobox.com>. All rights reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
package IO::Socket::INET;
use strict;
use IO::Socket;
use Socket;
use Carp;
use Exporter;
use Errno;
our @ISA = qw(IO::Socket);
our $VERSION = "1.52";
my $EINVAL = exists(&Errno::EINVAL) ? Errno::EINVAL() : 1;
IO::Socket::INET->register_domain( AF_INET );
my %socket_type = ( tcp => SOCK_STREAM,
udp => SOCK_DGRAM,
icmp => SOCK_RAW
);
my %proto_number;
$proto_number{tcp} = Socket::IPPROTO_TCP() if defined &Socket::IPPROTO_TCP;
$proto_number{udp} = Socket::IPPROTO_UDP() if defined &Socket::IPPROTO_UDP;
$proto_number{icmp} = Socket::IPPROTO_ICMP() if defined &Socket::IPPROTO_ICMP;
my %proto_name = reverse %proto_number;
sub new {
my $class = shift;
unshift(@_, "PeerAddr") if @_ == 1;
return $class->SUPER::new(@_);
}
sub _cache_proto {
my @proto = @_;
for (map lc($_), $proto[0], split(' ', $proto[1])) {
$proto_number{$_} = $proto[2];
}
$proto_name{$proto[2]} = $proto[0];
}
sub _get_proto_number {
my $name = lc(shift);
return undef unless defined $name;
return $proto_number{$name} if exists $proto_number{$name};
my @proto = eval { getprotobyname($name) };
return undef unless @proto;
_cache_proto(@proto);
return $proto[2];
}
sub _get_proto_name {
my $num = shift;
return undef unless defined $num;
return $proto_name{$num} if exists $proto_name{$num};
my @proto = eval { getprotobynumber($num) };
return undef unless @proto;
_cache_proto(@proto);
return $proto[0];
}
sub _sock_info {
my($addr,$port,$proto) = @_;
my $origport = $port;
my @serv = ();
$port = $1
if(defined $addr && $addr =~ s,:([\w\(\)/]+)$,,);
if(defined $proto && $proto =~ /\D/) {
my $num = _get_proto_number($proto);
unless (defined $num) {
$IO::Socket::errstr = $@ = "Bad protocol '$proto'";
return;
}
$proto = $num;
}
if(defined $port) {
my $defport = ($port =~ s,\((\d+)\)$,,) ? $1 : undef;
my $pnum = ($port =~ m,^(\d+)$,)[0];
@serv = getservbyname($port, _get_proto_name($proto) || "")
if ($port =~ m,\D,);
$port = $serv[2] || $defport || $pnum;
unless (defined $port) {
$IO::Socket::errstr = $@ = "Bad service '$origport'";
return;
}
$proto = _get_proto_number($serv[3]) if @serv && !$proto;
}
return ($addr || undef,
$port || undef,
$proto || undef
);
}
sub _error {
my $sock = shift;
my $err = shift;
{
local($!);
my $title = ref($sock).": ";
$IO::Socket::errstr = $@ = join("", $_[0] =~ /^$title/ ? "" : $title, @_);
$sock->close()
if(defined fileno($sock));
}
$! = $err;
return undef;
}
sub _get_addr {
my($sock,$addr_str, $multi) = @_;
my @addr;
if ($multi && $addr_str !~ /^\d+(?:\.\d+){3}$/) {
(undef, undef, undef, undef, @addr) = gethostbyname($addr_str);
} else {
my $h = inet_aton($addr_str);
push(@addr, $h) if defined $h;
}
@addr;
}
sub configure {
my($sock,$arg) = @_;
my($lport,$rport,$laddr,$raddr,$proto,$type);
$arg->{LocalAddr} = $arg->{LocalHost}
if exists $arg->{LocalHost} && !exists $arg->{LocalAddr};
($laddr,$lport,$proto) = _sock_info($arg->{LocalAddr},
$arg->{LocalPort},
$arg->{Proto})
or return _error($sock, $!, $@);
$laddr = defined $laddr ? inet_aton($laddr)
: INADDR_ANY;
return _error($sock, $EINVAL, "Bad hostname '",$arg->{LocalAddr},"'")
unless(defined $laddr);
$arg->{PeerAddr} = $arg->{PeerHost}
if exists $arg->{PeerHost} && !exists $arg->{PeerAddr};
unless(exists $arg->{Listen}) {
($raddr,$rport,$proto) = _sock_info($arg->{PeerAddr},
$arg->{PeerPort},
$proto)
or return _error($sock, $!, $@);
}
$proto ||= _get_proto_number('tcp');
$type = $arg->{Type} || $socket_type{lc _get_proto_name($proto)};
my @raddr = ();
if(defined $raddr) {
@raddr = $sock->_get_addr($raddr, $arg->{MultiHomed});
return _error($sock, $EINVAL, "Bad hostname '",$arg->{PeerAddr},"'")
unless @raddr;
}
while(1) {
$sock->socket(AF_INET, $type, $proto) or
return _error($sock, $!, "$!");
if (defined $arg->{Blocking}) {
defined $sock->blocking($arg->{Blocking})
or return _error($sock, $!, "$!");
}
if ($arg->{Reuse} || $arg->{ReuseAddr}) {
$sock->sockopt(SO_REUSEADDR,1) or
return _error($sock, $!, "$!");
}
if ($arg->{ReusePort}) {
$sock->sockopt(SO_REUSEPORT,1) or
return _error($sock, $!, "$!");
}
if ($arg->{Broadcast}) {
$sock->sockopt(SO_BROADCAST,1) or
return _error($sock, $!, "$!");
}
if($lport || ($laddr ne INADDR_ANY) || exists $arg->{Listen}) {
$sock->bind($lport || 0, $laddr) or
return _error($sock, $!, "$!");
}
if(exists $arg->{Listen}) {
$sock->listen($arg->{Listen} || 5) or
return _error($sock, $!, "$!");
last;
}
# don't try to connect unless we're given a PeerAddr
last unless exists($arg->{PeerAddr});
$raddr = shift @raddr;
return _error($sock, $EINVAL, 'Cannot determine remote port')
unless($rport || $type == SOCK_DGRAM || $type == SOCK_RAW);
last
unless($type == SOCK_STREAM || defined $raddr);
return _error($sock, $EINVAL, "Bad hostname '",$arg->{PeerAddr},"'")
unless defined $raddr;
# my $timeout = ${*$sock}{'io_socket_timeout'};
# my $before = time() if $timeout;
undef $@;
if ($sock->connect(pack_sockaddr_in($rport, $raddr))) {
# ${*$sock}{'io_socket_timeout'} = $timeout;
return $sock;
}
return _error($sock, $!, $@ || "Timeout")
unless @raddr;
# if ($timeout) {
# my $new_timeout = $timeout - (time() - $before);
# return _error($sock,
# (exists(&Errno::ETIMEDOUT) ? Errno::ETIMEDOUT() : $EINVAL),
# "Timeout") if $new_timeout <= 0;
# ${*$sock}{'io_socket_timeout'} = $new_timeout;
# }
}
$sock;
}
sub connect {
@_ == 2 || @_ == 3 or
croak 'usage: $sock->connect(NAME) or $sock->connect(PORT, ADDR)';
my $sock = shift;
return $sock->SUPER::connect(@_ == 1 ? shift : pack_sockaddr_in(@_));
}
sub bind {
@_ == 2 || @_ == 3 or
croak 'usage: $sock->bind(NAME) or $sock->bind(PORT, ADDR)';
my $sock = shift;
return $sock->SUPER::bind(@_ == 1 ? shift : pack_sockaddr_in(@_))
}
sub sockaddr {
@_ == 1 or croak 'usage: $sock->sockaddr()';
my($sock) = @_;
my $name = $sock->sockname;
$name ? (sockaddr_in($name))[1] : undef;
}
sub sockport {
@_ == 1 or croak 'usage: $sock->sockport()';
my($sock) = @_;
my $name = $sock->sockname;
$name ? (sockaddr_in($name))[0] : undef;
}
sub sockhost {
@_ == 1 or croak 'usage: $sock->sockhost()';
my($sock) = @_;
my $addr = $sock->sockaddr;
$addr ? inet_ntoa($addr) : undef;
}
sub peeraddr {
@_ == 1 or croak 'usage: $sock->peeraddr()';
my($sock) = @_;
my $name = $sock->peername;
$name ? (sockaddr_in($name))[1] : undef;
}
sub peerport {
@_ == 1 or croak 'usage: $sock->peerport()';
my($sock) = @_;
my $name = $sock->peername;
$name ? (sockaddr_in($name))[0] : undef;
}
sub peerhost {
@_ == 1 or croak 'usage: $sock->peerhost()';
my($sock) = @_;
my $addr = $sock->peeraddr;
$addr ? inet_ntoa($addr) : undef;
}
1;
__END__
=head1 NAME
IO::Socket::INET - Object interface for AF_INET domain sockets
=head1 SYNOPSIS
use IO::Socket::INET;
=head1 DESCRIPTION
C<IO::Socket::INET> provides an object interface to creating and using sockets
in the AF_INET domain. It is built upon the L<IO::Socket> interface and
inherits all the methods defined by L<IO::Socket>.
=head1 CONSTRUCTOR
=over 4
=item new ( [ARGS] )
Creates an C<IO::Socket::INET> object, which is a reference to a
newly created symbol (see the L<Symbol> package). C<new>
optionally takes arguments, these arguments are in key-value pairs.
In addition to the key-value pairs accepted by L<IO::Socket>,
C<IO::Socket::INET> provides.
PeerAddr Remote host address <hostname>[:<port>]
PeerHost Synonym for PeerAddr
PeerPort Remote port or service <service>[(<no>)] | <no>
LocalAddr Local host bind address hostname[:port]
LocalHost Synonym for LocalAddr
LocalPort Local host bind port <service>[(<no>)] | <no>
Proto Protocol name (or number) "tcp" | "udp" | ...
Type Socket type SOCK_STREAM | SOCK_DGRAM | ...
Listen Queue size for listen
ReuseAddr Set SO_REUSEADDR before binding
Reuse Set SO_REUSEADDR before binding (deprecated,
prefer ReuseAddr)
ReusePort Set SO_REUSEPORT before binding
Broadcast Set SO_BROADCAST before binding
Timeout Timeout value for various operations
MultiHomed Try all addresses for multi-homed hosts
Blocking Determine if connection will be blocking mode
If C<Listen> is defined then a listen socket is created, else if the
socket type, which is derived from the protocol, is SOCK_STREAM then
connect() is called. If the C<Listen> argument is given, but false,
the queue size will be set to 5.
Although it is not illegal, the use of C<MultiHomed> on a socket
which is in non-blocking mode is of little use. This is because the
first connect will never fail with a timeout as the connect call
will not block.
The C<PeerAddr> can be a hostname or the IP-address on the
"xx.xx.xx.xx" form. The C<PeerPort> can be a number or a symbolic
service name. The service name might be followed by a number in
parenthesis which is used if the service is not known by the system.
The C<PeerPort> specification can also be embedded in the C<PeerAddr>
by preceding it with a ":".
If C<Proto> is not given and you specify a symbolic C<PeerPort> port,
then the constructor will try to derive C<Proto> from the service
name. As a last resort C<Proto> "tcp" is assumed. The C<Type>
parameter will be deduced from C<Proto> if not specified.
If the constructor is only passed a single argument, it is assumed to
be a C<PeerAddr> specification.
If C<Blocking> is set to 0, the connection will be in nonblocking mode.
If not specified it defaults to 1 (blocking mode).
Examples:
$sock = IO::Socket::INET->new(PeerAddr => 'www.perl.org',
PeerPort => 'http(80)',
Proto => 'tcp');
$sock = IO::Socket::INET->new(PeerAddr => 'localhost:smtp(25)');
$sock = IO::Socket::INET->new(Listen => 5,
LocalAddr => 'localhost',
LocalPort => 9000,
Proto => 'tcp');
$sock = IO::Socket::INET->new('127.0.0.1:25');
$sock = IO::Socket::INET->new(
PeerPort => 9999,
PeerAddr => inet_ntoa(INADDR_BROADCAST),
Proto => 'udp',
LocalAddr => 'localhost',
Broadcast => 1 )
or die "Can't bind : $IO::Socket::errstr\n";
If the constructor fails it will return C<undef> and set the
C<$IO::Socket::errstr> package variable to contain an error message.
$sock = IO::Socket::INET->new(...)
or die "Cannot create socket - $IO::Socket::errstr\n";
For legacy reasons the error message is also set into the global C<$@>
variable, and you may still find older code which looks here instead.
$sock = IO::Socket::INET->new(...)
or die "Cannot create socket - $@\n";
=back
=head2 METHODS
=over 4
=item sockaddr ()
Return the address part of the sockaddr structure for the socket
=item sockport ()
Return the port number that the socket is using on the local host
=item sockhost ()
Return the address part of the sockaddr structure for the socket in a
text form xx.xx.xx.xx
=item peeraddr ()
Return the address part of the sockaddr structure for the socket on
the peer host
=item peerport ()
Return the port number for the socket on the peer host.
=item peerhost ()
Return the address part of the sockaddr structure for the socket on the
peer host in a text form xx.xx.xx.xx
=back
=head1 SEE ALSO
L<Socket>, L<IO::Socket>
=head1 AUTHOR
Graham Barr. Currently maintained by the Perl Porters. Please report all
bugs at L<https://github.com/Perl/perl5/issues>.
=head1 COPYRIGHT
Copyright (c) 1996-8 Graham Barr <gbarr@pobox.com>. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut

View File

@@ -0,0 +1,173 @@
# IO::Socket::UNIX.pm
#
# Copyright (c) 1997-8 Graham Barr <gbarr@pobox.com>. All rights reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
package IO::Socket::UNIX;
use strict;
use IO::Socket;
use Carp;
our @ISA = qw(IO::Socket);
our $VERSION = "1.52";
IO::Socket::UNIX->register_domain( AF_UNIX );
sub new {
my $class = shift;
unshift(@_, "Peer") if @_ == 1;
return $class->SUPER::new(@_);
}
sub configure {
my($sock,$arg) = @_;
my($bport,$cport);
my $type = $arg->{Type} || SOCK_STREAM;
$sock->socket(AF_UNIX, $type, 0) or
return undef;
if(exists $arg->{Blocking}) {
$sock->blocking($arg->{Blocking}) or
return undef;
}
if(exists $arg->{Local}) {
my $addr = sockaddr_un($arg->{Local});
$sock->bind($addr) or
return undef;
}
if(exists $arg->{Listen} && $type != SOCK_DGRAM) {
$sock->listen($arg->{Listen} || 5) or
return undef;
}
elsif(exists $arg->{Peer}) {
my $addr = sockaddr_un($arg->{Peer});
$sock->connect($addr) or
return undef;
}
$sock;
}
sub hostpath {
@_ == 1 or croak 'usage: $sock->hostpath()';
my $n = $_[0]->sockname || return undef;
(sockaddr_un($n))[0];
}
sub peerpath {
@_ == 1 or croak 'usage: $sock->peerpath()';
my $n = $_[0]->peername || return undef;
(sockaddr_un($n))[0];
}
1; # Keep require happy
__END__
=head1 NAME
IO::Socket::UNIX - Object interface for AF_UNIX domain sockets
=head1 SYNOPSIS
use IO::Socket::UNIX;
my $SOCK_PATH = "$ENV{HOME}/unix-domain-socket-test.sock";
# Server:
my $server = IO::Socket::UNIX->new(
Type => SOCK_STREAM(),
Local => $SOCK_PATH,
Listen => 1,
);
my $count = 1;
while (my $conn = $server->accept()) {
$conn->print("Hello " . ($count++) . "\n");
}
# Client:
my $client = IO::Socket::UNIX->new(
Type => SOCK_STREAM(),
Peer => $SOCK_PATH,
);
# Now read and write from $client
=head1 DESCRIPTION
C<IO::Socket::UNIX> provides an object interface to creating and using sockets
in the AF_UNIX domain. It is built upon the L<IO::Socket> interface and
inherits all the methods defined by L<IO::Socket>.
=head1 CONSTRUCTOR
=over 4
=item new ( [ARGS] )
Creates an C<IO::Socket::UNIX> object, which is a reference to a
newly created symbol (see the L<Symbol> package). C<new>
optionally takes arguments, these arguments are in key-value pairs.
In addition to the key-value pairs accepted by L<IO::Socket>,
C<IO::Socket::UNIX> provides.
Type Type of socket (eg SOCK_STREAM or SOCK_DGRAM)
Local Path to local fifo
Peer Path to peer fifo
Listen Queue size for listen
If the constructor is only passed a single argument, it is assumed to
be a C<Peer> specification.
If the C<Listen> argument is given, but false, the queue size will be set to 5.
If the constructor fails it will return C<undef> and set the
C<$IO::Socket::errstr> package variable to contain an error message.
$sock = IO::Socket::UNIX->new(...)
or die "Cannot create socket - $IO::Socket::errstr\n";
For legacy reasons the error message is also set into the global C<$@>
variable, and you may still find older code which looks here instead.
$sock = IO::Socket::UNIX->new(...)
or die "Cannot create socket - $@\n";
=back
=head1 METHODS
=over 4
=item hostpath()
Returns the pathname to the fifo at the local end
=item peerpath()
Returns the pathanme to the fifo at the peer end
=back
=head1 SEE ALSO
L<Socket>, L<IO::Socket>
=head1 AUTHOR
Graham Barr. Currently maintained by the Perl Porters. Please report all
bugs at L<https://github.com/Perl/perl5/issues>.
=head1 COPYRIGHT
Copyright (c) 1996-8 Graham Barr <gbarr@pobox.com>. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut

View File

@@ -0,0 +1,238 @@
################################################################################
#
# Version 2.x, Copyright (C) 2007-2013, Marcus Holland-Moritz <mhx@cpan.org>.
# Version 1.x, Copyright (C) 1997, Graham Barr <gbarr@pobox.com>.
#
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
################################################################################
package IPC::Msg;
use IPC::SysV qw(IPC_STAT IPC_SET IPC_RMID);
use strict;
use vars qw($VERSION);
use Carp;
$VERSION = '2.09';
# Figure out if we have support for native sized types
my $N = do { my $foo = eval { pack "L!", 0 }; $@ ? '' : '!' };
{
package IPC::Msg::stat;
use Class::Struct qw(struct);
struct 'IPC::Msg::stat' => [
uid => '$',
gid => '$',
cuid => '$',
cgid => '$',
mode => '$',
qnum => '$',
qbytes => '$',
lspid => '$',
lrpid => '$',
stime => '$',
rtime => '$',
ctime => '$',
];
}
sub new {
@_ == 3 || croak 'IPC::Msg->new( KEY , FLAGS )';
my $class = shift;
my $id = msgget($_[0],$_[1]);
defined($id)
? bless \$id, $class
: undef;
}
sub id {
my $self = shift;
$$self;
}
sub stat {
my $self = shift;
my $data = "";
msgctl($$self,IPC_STAT,$data) or
return undef;
IPC::Msg::stat->new->unpack($data);
}
sub set {
my $self = shift;
my $ds;
if(@_ == 1) {
$ds = shift;
}
else {
croak 'Bad arg count' if @_ % 2;
my %arg = @_;
$ds = $self->stat
or return undef;
my($key,$val);
$ds->$key($val)
while(($key,$val) = each %arg);
}
msgctl($$self,IPC_SET,$ds->pack);
}
sub remove {
my $self = shift;
(msgctl($$self,IPC_RMID,0), undef $$self)[0];
}
sub rcv {
@_ <= 5 && @_ >= 3 or croak '$msg->rcv( BUF, LEN, TYPE, FLAGS )';
my $self = shift;
my $buf = "";
msgrcv($$self,$buf,$_[1],$_[2] || 0, $_[3] || 0) or
return;
my $type;
($type,$_[0]) = unpack("l$N a*",$buf);
$type;
}
sub snd {
@_ <= 4 && @_ >= 3 or croak '$msg->snd( TYPE, BUF, FLAGS )';
my $self = shift;
msgsnd($$self,pack("l$N a*",$_[0],$_[1]), $_[2] || 0);
}
1;
__END__
=head1 NAME
IPC::Msg - SysV Msg IPC object class
=head1 SYNOPSIS
use IPC::SysV qw(IPC_PRIVATE S_IRUSR S_IWUSR);
use IPC::Msg;
$msg = IPC::Msg->new(IPC_PRIVATE, S_IRUSR | S_IWUSR);
$msg->snd($msgtype, $msgdata);
$msg->rcv($buf, 256);
$ds = $msg->stat;
$msg->remove;
=head1 DESCRIPTION
A class providing an object based interface to SysV IPC message queues.
=head1 METHODS
=over 4
=item new ( KEY , FLAGS )
Creates a new message queue associated with C<KEY>. A new queue is
created if
=over 4
=item *
C<KEY> is equal to C<IPC_PRIVATE>
=item *
C<KEY> does not already have a message queue associated with
it, and C<I<FLAGS> & IPC_CREAT> is true.
=back
On creation of a new message queue C<FLAGS> is used to set the
permissions. Be careful not to set any flags that the Sys V
IPC implementation does not allow: in some systems setting
execute bits makes the operations fail.
=item id
Returns the system message queue identifier.
=item rcv ( BUF, LEN [, TYPE [, FLAGS ]] )
Read a message from the queue. Returns the type of the message read.
See L<msgrcv(2)>. The BUF becomes tainted.
=item remove
Remove and destroy the message queue from the system.
=item set ( STAT )
=item set ( NAME => VALUE [, NAME => VALUE ...] )
C<set> will set the following values of the C<stat> structure associated
with the message queue.
uid
gid
mode (oly the permission bits)
qbytes
C<set> accepts either a stat object, as returned by the C<stat> method,
or a list of I<name>-I<value> pairs.
=item snd ( TYPE, MSG [, FLAGS ] )
Place a message on the queue with the data from C<MSG> and with type C<TYPE>.
See L<msgsnd(2)>.
=item stat
Returns an object of type C<IPC::Msg::stat> which is a sub-class of
C<Class::Struct>. It provides the following fields. For a description
of these fields see you system documentation.
uid
gid
cuid
cgid
mode
qnum
qbytes
lspid
lrpid
stime
rtime
ctime
=back
=head1 SEE ALSO
L<IPC::SysV>, L<Class::Struct>
=head1 AUTHORS
Graham Barr <gbarr@pobox.com>,
Marcus Holland-Moritz <mhx@cpan.org>
=head1 COPYRIGHT
Version 2.x, Copyright (C) 2007-2013, Marcus Holland-Moritz.
Version 1.x, Copyright (c) 1997, Graham Barr.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut

View File

@@ -0,0 +1,314 @@
################################################################################
#
# Version 2.x, Copyright (C) 2007-2013, Marcus Holland-Moritz <mhx@cpan.org>.
# Version 1.x, Copyright (C) 1997, Graham Barr <gbarr@pobox.com>.
#
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
################################################################################
package IPC::Semaphore;
use IPC::SysV qw(GETNCNT GETZCNT GETVAL SETVAL GETPID GETALL SETALL
IPC_STAT IPC_SET IPC_RMID);
use strict;
use vars qw($VERSION);
use Carp;
$VERSION = '2.09';
# Figure out if we have support for native sized types
my $N = do { my $foo = eval { pack "L!", 0 }; $@ ? '' : '!' };
{
package IPC::Semaphore::stat;
use Class::Struct qw(struct);
struct 'IPC::Semaphore::stat' => [
uid => '$',
gid => '$',
cuid => '$',
cgid => '$',
mode => '$',
ctime => '$',
otime => '$',
nsems => '$',
];
}
sub new {
@_ == 4 || croak __PACKAGE__ . '->new( KEY, NSEMS, FLAGS )';
my $class = shift;
my $id = semget($_[0],$_[1],$_[2]);
defined($id)
? bless \$id, $class
: undef;
}
sub id {
my $self = shift;
$$self;
}
sub remove {
my $self = shift;
my $result = semctl($$self,0,IPC_RMID,0);
undef $$self;
$result;
}
sub getncnt {
@_ == 2 || croak '$sem->getncnt( SEM )';
my $self = shift;
my $sem = shift;
my $v = semctl($$self,$sem,GETNCNT,0);
$v ? 0 + $v : undef;
}
sub getzcnt {
@_ == 2 || croak '$sem->getzcnt( SEM )';
my $self = shift;
my $sem = shift;
my $v = semctl($$self,$sem,GETZCNT,0);
$v ? 0 + $v : undef;
}
sub getval {
@_ == 2 || croak '$sem->getval( SEM )';
my $self = shift;
my $sem = shift;
my $v = semctl($$self,$sem,GETVAL,0);
$v ? 0 + $v : undef;
}
sub getpid {
@_ == 2 || croak '$sem->getpid( SEM )';
my $self = shift;
my $sem = shift;
my $v = semctl($$self,$sem,GETPID,0);
$v ? 0 + $v : undef;
}
sub op {
@_ >= 4 || croak '$sem->op( OPLIST )';
my $self = shift;
croak 'Bad arg count' if @_ % 3;
my $data = pack("s$N*",@_);
semop($$self,$data);
}
sub stat {
my $self = shift;
my $data = "";
semctl($$self,0,IPC_STAT,$data)
or return undef;
IPC::Semaphore::stat->new->unpack($data);
}
sub set {
my $self = shift;
my $ds;
if(@_ == 1) {
$ds = shift;
}
else {
croak 'Bad arg count' if @_ % 2;
my %arg = @_;
$ds = $self->stat
or return undef;
my($key,$val);
$ds->$key($val)
while(($key,$val) = each %arg);
}
my $v = semctl($$self,0,IPC_SET,$ds->pack);
$v ? 0 + $v : undef;
}
sub getall {
my $self = shift;
my $data = "";
semctl($$self,0,GETALL,$data)
or return ();
(unpack("s$N*",$data));
}
sub setall {
my $self = shift;
my $data = pack("s$N*",@_);
semctl($$self,0,SETALL,$data);
}
sub setval {
@_ == 3 || croak '$sem->setval( SEM, VAL )';
my $self = shift;
my $sem = shift;
my $val = shift;
semctl($$self,$sem,SETVAL,$val);
}
1;
__END__
=head1 NAME
IPC::Semaphore - SysV Semaphore IPC object class
=head1 SYNOPSIS
use IPC::SysV qw(IPC_PRIVATE S_IRUSR S_IWUSR IPC_CREAT);
use IPC::Semaphore;
$sem = IPC::Semaphore->new(IPC_PRIVATE, 10, S_IRUSR | S_IWUSR | IPC_CREAT);
$sem->setall( (0) x 10);
@sem = $sem->getall;
$ncnt = $sem->getncnt;
$zcnt = $sem->getzcnt;
$ds = $sem->stat;
$sem->remove;
=head1 DESCRIPTION
A class providing an object based interface to SysV IPC semaphores.
=head1 METHODS
=over 4
=item new ( KEY , NSEMS , FLAGS )
Create a new semaphore set associated with C<KEY>. C<NSEMS> is the number
of semaphores in the set. A new set is created if
=over 4
=item *
C<KEY> is equal to C<IPC_PRIVATE>
=item *
C<KEY> does not already have a semaphore identifier
associated with it, and C<I<FLAGS> & IPC_CREAT> is true.
=back
On creation of a new semaphore set C<FLAGS> is used to set the
permissions. Be careful not to set any flags that the Sys V
IPC implementation does not allow: in some systems setting
execute bits makes the operations fail.
=item getall
Returns the values of the semaphore set as an array.
=item getncnt ( SEM )
Returns the number of processes waiting for the semaphore C<SEM> to
become greater than its current value
=item getpid ( SEM )
Returns the process id of the last process that performed an operation
on the semaphore C<SEM>.
=item getval ( SEM )
Returns the current value of the semaphore C<SEM>.
=item getzcnt ( SEM )
Returns the number of processes waiting for the semaphore C<SEM> to
become zero.
=item id
Returns the system identifier for the semaphore set.
=item op ( OPLIST )
C<OPLIST> is a list of operations to pass to C<semop>. C<OPLIST> is
a concatenation of smaller lists, each which has three values. The
first is the semaphore number, the second is the operation and the last
is a flags value. See L<semop(2)> for more details. For example
$sem->op(
0, -1, IPC_NOWAIT,
1, 1, IPC_NOWAIT
);
=item remove
Remove and destroy the semaphore set from the system.
=item set ( STAT )
=item set ( NAME => VALUE [, NAME => VALUE ...] )
C<set> will set the following values of the C<stat> structure associated
with the semaphore set.
uid
gid
mode (only the permission bits)
C<set> accepts either a stat object, as returned by the C<stat> method,
or a list of I<name>-I<value> pairs.
=item setall ( VALUES )
Sets all values in the semaphore set to those given on the C<VALUES> list.
C<VALUES> must contain the correct number of values.
=item setval ( N , VALUE )
Set the C<N>th value in the semaphore set to C<VALUE>
=item stat
Returns an object of type C<IPC::Semaphore::stat> which is a sub-class of
C<Class::Struct>. It provides the following fields. For a description
of these fields see your system documentation.
uid
gid
cuid
cgid
mode
ctime
otime
nsems
=back
=head1 SEE ALSO
L<IPC::SysV>, L<Class::Struct>, L<semget(2)>, L<semctl(2)>, L<semop(2)>
=head1 AUTHORS
Graham Barr <gbarr@pobox.com>,
Marcus Holland-Moritz <mhx@cpan.org>
=head1 COPYRIGHT
Version 2.x, Copyright (C) 2007-2013, Marcus Holland-Moritz.
Version 1.x, Copyright (c) 1997, Graham Barr.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut

Some files were not shown because too many files have changed in this diff Show More