| 1 | #!/import/archperf/ws/devtools/4/v8plus/bin/perl |
| 2 | eval 'exec /import/archperf/ws/devtools/4/v8plus/bin/perl -S $0 ${1+"$@"}' |
| 3 | if $running_under_some_shell; |
| 4 | #!./perl |
| 5 | # $Id: piconv,v 2.1 2004/10/06 05:07:20 dankogai Exp $ |
| 6 | # |
| 7 | use 5.8.0; |
| 8 | use strict; |
| 9 | use Encode ; |
| 10 | use Encode::Alias; |
| 11 | my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio); |
| 12 | |
| 13 | use File::Basename; |
| 14 | my $name = basename($0); |
| 15 | |
| 16 | use Getopt::Long qw(:config no_ignore_case); |
| 17 | |
| 18 | my %Opt; |
| 19 | |
| 20 | help() |
| 21 | unless |
| 22 | GetOptions(\%Opt, |
| 23 | 'from|f=s', |
| 24 | 'to|t=s', |
| 25 | 'list|l', |
| 26 | 'string|s=s', |
| 27 | 'check|C=i', |
| 28 | 'c', |
| 29 | 'perlqq|p', |
| 30 | 'debug|D', |
| 31 | 'scheme|S=s', |
| 32 | 'resolve|r=s', |
| 33 | 'help', |
| 34 | ); |
| 35 | |
| 36 | $Opt{help} and help(); |
| 37 | $Opt{list} and list_encodings(); |
| 38 | my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG}; |
| 39 | defined $Opt{resolve} and resolve_encoding($Opt{resolve}); |
| 40 | $Opt{from} || $Opt{to} || help(); |
| 41 | my $from = $Opt{from} || $locale or help("from_encoding unspecified"); |
| 42 | my $to = $Opt{to} || $locale or help("to_encoding unspecified"); |
| 43 | $Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit; |
| 44 | my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to'; |
| 45 | $Opt{check} ||= $Opt{c}; |
| 46 | $Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ; |
| 47 | |
| 48 | if ($Opt{debug}){ |
| 49 | my $cfrom = Encode->getEncoding($from)->name; |
| 50 | my $cto = Encode->getEncoding($to)->name; |
| 51 | print <<"EOT"; |
| 52 | Scheme: $scheme |
| 53 | From: $from => $cfrom |
| 54 | To: $to => $cto |
| 55 | EOT |
| 56 | } |
| 57 | |
| 58 | # we do not use <> (or ARGV) for the sake of binmode() |
| 59 | @ARGV or push @ARGV, \*STDIN; |
| 60 | |
| 61 | unless ($scheme eq 'perlio'){ |
| 62 | binmode STDOUT; |
| 63 | for my $argv (@ARGV){ |
| 64 | my $ifh = ref $argv ? $argv : undef; |
| 65 | $ifh or open $ifh, "<", $argv or next; |
| 66 | binmode $ifh; |
| 67 | if ($scheme eq 'from_to'){ # default |
| 68 | while(<$ifh>){ |
| 69 | Encode::from_to($_, $from, $to, $Opt{check}); |
| 70 | print; |
| 71 | } |
| 72 | }elsif ($scheme eq 'decode_encode'){ # step-by-step |
| 73 | while(<$ifh>){ |
| 74 | my $decoded = decode($from, $_, $Opt{check}); |
| 75 | my $encoded = encode($to, $decoded); |
| 76 | print $encoded; |
| 77 | } |
| 78 | } else { # won't reach |
| 79 | die "$name: unknown scheme: $scheme"; |
| 80 | } |
| 81 | } |
| 82 | }else{ |
| 83 | # NI-S favorite |
| 84 | binmode STDOUT => "raw:encoding($to)"; |
| 85 | for my $argv (@ARGV){ |
| 86 | my $ifh = ref $argv ? $argv : undef; |
| 87 | $ifh or open $ifh, "<", $argv or next; |
| 88 | binmode $ifh => "raw:encoding($from)"; |
| 89 | print while(<$ifh>); |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | sub list_encodings{ |
| 94 | print join("\n", Encode->encodings(":all")), "\n"; |
| 95 | exit 0; |
| 96 | } |
| 97 | |
| 98 | sub resolve_encoding { |
| 99 | if (my $alias = Encode::resolve_alias($_[0])) { |
| 100 | print $alias, "\n"; |
| 101 | exit 0; |
| 102 | } else { |
| 103 | warn "$name: $_[0] is not known to Encode\n"; |
| 104 | exit 1; |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | sub help{ |
| 109 | my $message = shift; |
| 110 | $message and print STDERR "$name error: $message\n"; |
| 111 | print STDERR <<"EOT"; |
| 112 | $name [-f from_encoding] [-t to_encoding] [-s string] [files...] |
| 113 | $name -l |
| 114 | $name -r encoding_alias |
| 115 | -l,--list |
| 116 | lists all available encodings |
| 117 | -r,--resolve encoding_alias |
| 118 | resolve encoding to its (Encode) canonical name |
| 119 | -f,--from from_encoding |
| 120 | when omitted, the current locale will be used |
| 121 | -t,--to to_encoding |
| 122 | when omitted, the current locale will be used |
| 123 | -s,--string string |
| 124 | "string" will be the input instead of STDIN or files |
| 125 | The following are mainly of interest to Encode hackers: |
| 126 | -D,--debug show debug information |
| 127 | -C N | -c | -p check the validity of the input |
| 128 | -S,--scheme scheme use the scheme for conversion |
| 129 | EOT |
| 130 | exit; |
| 131 | } |
| 132 | |
| 133 | __END__ |
| 134 | |
| 135 | =head1 NAME |
| 136 | |
| 137 | piconv -- iconv(1), reinvented in perl |
| 138 | |
| 139 | =head1 SYNOPSIS |
| 140 | |
| 141 | piconv [-f from_encoding] [-t to_encoding] [-s string] [files...] |
| 142 | piconv -l |
| 143 | piconv [-C N|-c|-p] |
| 144 | piconv -S scheme ... |
| 145 | piconv -r encoding |
| 146 | piconv -D ... |
| 147 | piconv -h |
| 148 | |
| 149 | =head1 DESCRIPTION |
| 150 | |
| 151 | B<piconv> is perl version of B<iconv>, a character encoding converter |
| 152 | widely available for various Unixen today. This script was primarily |
| 153 | a technology demonstrator for Perl 5.8.0, but you can use piconv in the |
| 154 | place of iconv for virtually any case. |
| 155 | |
| 156 | piconv converts the character encoding of either STDIN or files |
| 157 | specified in the argument and prints out to STDOUT. |
| 158 | |
| 159 | Here is the list of options. Each option can be in short format (-f) |
| 160 | or long (--from). |
| 161 | |
| 162 | =over 4 |
| 163 | |
| 164 | =item -f,--from from_encoding |
| 165 | |
| 166 | Specifies the encoding you are converting from. Unlike B<iconv>, |
| 167 | this option can be omitted. In such cases, the current locale is used. |
| 168 | |
| 169 | =item -t,--to to_encoding |
| 170 | |
| 171 | Specifies the encoding you are converting to. Unlike B<iconv>, |
| 172 | this option can be omitted. In such cases, the current locale is used. |
| 173 | |
| 174 | Therefore, when both -f and -t are omitted, B<piconv> just acts |
| 175 | like B<cat>. |
| 176 | |
| 177 | =item -s,--string I<string> |
| 178 | |
| 179 | uses I<string> instead of file for the source of text. |
| 180 | |
| 181 | =item -l,--list |
| 182 | |
| 183 | Lists all available encodings, one per line, in case-insensitive |
| 184 | order. Note that only the canonical names are listed; many aliases |
| 185 | exist. For example, the names are case-insensitive, and many standard |
| 186 | and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850" |
| 187 | instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported> |
| 188 | for a full discussion. |
| 189 | |
| 190 | =item -C,--check I<N> |
| 191 | |
| 192 | Check the validity of the stream if I<N> = 1. When I<N> = -1, something |
| 193 | interesting happens when it encounters an invalid character. |
| 194 | |
| 195 | =item -c |
| 196 | |
| 197 | Same as C<-C 1>. |
| 198 | |
| 199 | =item -p,--perlqq |
| 200 | |
| 201 | Same as C<-C -1>. |
| 202 | |
| 203 | =item -h,--help |
| 204 | |
| 205 | Show usage. |
| 206 | |
| 207 | =item -D,--debug |
| 208 | |
| 209 | Invokes debugging mode. Primarily for Encode hackers. |
| 210 | |
| 211 | =item -S,--scheme scheme |
| 212 | |
| 213 | Selects which scheme is to be used for conversion. Available schemes |
| 214 | are as follows: |
| 215 | |
| 216 | =over 4 |
| 217 | |
| 218 | =item from_to |
| 219 | |
| 220 | Uses Encode::from_to for conversion. This is the default. |
| 221 | |
| 222 | =item decode_encode |
| 223 | |
| 224 | Input strings are decode()d then encode()d. A straight two-step |
| 225 | implementation. |
| 226 | |
| 227 | =item perlio |
| 228 | |
| 229 | The new perlIO layer is used. NI-S' favorite. |
| 230 | |
| 231 | =back |
| 232 | |
| 233 | Like the I<-D> option, this is also for Encode hackers. |
| 234 | |
| 235 | =back |
| 236 | |
| 237 | =head1 SEE ALSO |
| 238 | |
| 239 | L<iconv/1> |
| 240 | L<locale/3> |
| 241 | L<Encode> |
| 242 | L<Encode::Supported> |
| 243 | L<Encode::Alias> |
| 244 | L<PerlIO> |
| 245 | |
| 246 | =cut |