Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | #!/import/archperf/ws/devtools/4/amd64/bin/perl |
2 | eval 'exec /import/archperf/ws/devtools/4/amd64/bin/perl -S $0 ${1+"$@"}' | |
3 | if $running_under_some_shell; | |
4 | #!./perl | |
5 | # $Id: piconv,v 2.1 2004/10/06 05:07:20 dankogai Exp $ | |
6 | # | |
7 | use 5.8.0; | |
8 | use strict; | |
9 | use Encode ; | |
10 | use Encode::Alias; | |
11 | my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio); | |
12 | ||
13 | use File::Basename; | |
14 | my $name = basename($0); | |
15 | ||
16 | use Getopt::Long qw(:config no_ignore_case); | |
17 | ||
18 | my %Opt; | |
19 | ||
20 | help() | |
21 | unless | |
22 | GetOptions(\%Opt, | |
23 | 'from|f=s', | |
24 | 'to|t=s', | |
25 | 'list|l', | |
26 | 'string|s=s', | |
27 | 'check|C=i', | |
28 | 'c', | |
29 | 'perlqq|p', | |
30 | 'debug|D', | |
31 | 'scheme|S=s', | |
32 | 'resolve|r=s', | |
33 | 'help', | |
34 | ); | |
35 | ||
36 | $Opt{help} and help(); | |
37 | $Opt{list} and list_encodings(); | |
38 | my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG}; | |
39 | defined $Opt{resolve} and resolve_encoding($Opt{resolve}); | |
40 | $Opt{from} || $Opt{to} || help(); | |
41 | my $from = $Opt{from} || $locale or help("from_encoding unspecified"); | |
42 | my $to = $Opt{to} || $locale or help("to_encoding unspecified"); | |
43 | $Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit; | |
44 | my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to'; | |
45 | $Opt{check} ||= $Opt{c}; | |
46 | $Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ; | |
47 | ||
48 | if ($Opt{debug}){ | |
49 | my $cfrom = Encode->getEncoding($from)->name; | |
50 | my $cto = Encode->getEncoding($to)->name; | |
51 | print <<"EOT"; | |
52 | Scheme: $scheme | |
53 | From: $from => $cfrom | |
54 | To: $to => $cto | |
55 | EOT | |
56 | } | |
57 | ||
58 | # we do not use <> (or ARGV) for the sake of binmode() | |
59 | @ARGV or push @ARGV, \*STDIN; | |
60 | ||
61 | unless ($scheme eq 'perlio'){ | |
62 | binmode STDOUT; | |
63 | for my $argv (@ARGV){ | |
64 | my $ifh = ref $argv ? $argv : undef; | |
65 | $ifh or open $ifh, "<", $argv or next; | |
66 | binmode $ifh; | |
67 | if ($scheme eq 'from_to'){ # default | |
68 | while(<$ifh>){ | |
69 | Encode::from_to($_, $from, $to, $Opt{check}); | |
70 | print; | |
71 | } | |
72 | }elsif ($scheme eq 'decode_encode'){ # step-by-step | |
73 | while(<$ifh>){ | |
74 | my $decoded = decode($from, $_, $Opt{check}); | |
75 | my $encoded = encode($to, $decoded); | |
76 | print $encoded; | |
77 | } | |
78 | } else { # won't reach | |
79 | die "$name: unknown scheme: $scheme"; | |
80 | } | |
81 | } | |
82 | }else{ | |
83 | # NI-S favorite | |
84 | binmode STDOUT => "raw:encoding($to)"; | |
85 | for my $argv (@ARGV){ | |
86 | my $ifh = ref $argv ? $argv : undef; | |
87 | $ifh or open $ifh, "<", $argv or next; | |
88 | binmode $ifh => "raw:encoding($from)"; | |
89 | print while(<$ifh>); | |
90 | } | |
91 | } | |
92 | ||
93 | sub list_encodings{ | |
94 | print join("\n", Encode->encodings(":all")), "\n"; | |
95 | exit 0; | |
96 | } | |
97 | ||
98 | sub resolve_encoding { | |
99 | if (my $alias = Encode::resolve_alias($_[0])) { | |
100 | print $alias, "\n"; | |
101 | exit 0; | |
102 | } else { | |
103 | warn "$name: $_[0] is not known to Encode\n"; | |
104 | exit 1; | |
105 | } | |
106 | } | |
107 | ||
108 | sub help{ | |
109 | my $message = shift; | |
110 | $message and print STDERR "$name error: $message\n"; | |
111 | print STDERR <<"EOT"; | |
112 | $name [-f from_encoding] [-t to_encoding] [-s string] [files...] | |
113 | $name -l | |
114 | $name -r encoding_alias | |
115 | -l,--list | |
116 | lists all available encodings | |
117 | -r,--resolve encoding_alias | |
118 | resolve encoding to its (Encode) canonical name | |
119 | -f,--from from_encoding | |
120 | when omitted, the current locale will be used | |
121 | -t,--to to_encoding | |
122 | when omitted, the current locale will be used | |
123 | -s,--string string | |
124 | "string" will be the input instead of STDIN or files | |
125 | The following are mainly of interest to Encode hackers: | |
126 | -D,--debug show debug information | |
127 | -C N | -c | -p check the validity of the input | |
128 | -S,--scheme scheme use the scheme for conversion | |
129 | EOT | |
130 | exit; | |
131 | } | |
132 | ||
133 | __END__ | |
134 | ||
135 | =head1 NAME | |
136 | ||
137 | piconv -- iconv(1), reinvented in perl | |
138 | ||
139 | =head1 SYNOPSIS | |
140 | ||
141 | piconv [-f from_encoding] [-t to_encoding] [-s string] [files...] | |
142 | piconv -l | |
143 | piconv [-C N|-c|-p] | |
144 | piconv -S scheme ... | |
145 | piconv -r encoding | |
146 | piconv -D ... | |
147 | piconv -h | |
148 | ||
149 | =head1 DESCRIPTION | |
150 | ||
151 | B<piconv> is perl version of B<iconv>, a character encoding converter | |
152 | widely available for various Unixen today. This script was primarily | |
153 | a technology demonstrator for Perl 5.8.0, but you can use piconv in the | |
154 | place of iconv for virtually any case. | |
155 | ||
156 | piconv converts the character encoding of either STDIN or files | |
157 | specified in the argument and prints out to STDOUT. | |
158 | ||
159 | Here is the list of options. Each option can be in short format (-f) | |
160 | or long (--from). | |
161 | ||
162 | =over 4 | |
163 | ||
164 | =item -f,--from from_encoding | |
165 | ||
166 | Specifies the encoding you are converting from. Unlike B<iconv>, | |
167 | this option can be omitted. In such cases, the current locale is used. | |
168 | ||
169 | =item -t,--to to_encoding | |
170 | ||
171 | Specifies the encoding you are converting to. Unlike B<iconv>, | |
172 | this option can be omitted. In such cases, the current locale is used. | |
173 | ||
174 | Therefore, when both -f and -t are omitted, B<piconv> just acts | |
175 | like B<cat>. | |
176 | ||
177 | =item -s,--string I<string> | |
178 | ||
179 | uses I<string> instead of file for the source of text. | |
180 | ||
181 | =item -l,--list | |
182 | ||
183 | Lists all available encodings, one per line, in case-insensitive | |
184 | order. Note that only the canonical names are listed; many aliases | |
185 | exist. For example, the names are case-insensitive, and many standard | |
186 | and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850" | |
187 | instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported> | |
188 | for a full discussion. | |
189 | ||
190 | =item -C,--check I<N> | |
191 | ||
192 | Check the validity of the stream if I<N> = 1. When I<N> = -1, something | |
193 | interesting happens when it encounters an invalid character. | |
194 | ||
195 | =item -c | |
196 | ||
197 | Same as C<-C 1>. | |
198 | ||
199 | =item -p,--perlqq | |
200 | ||
201 | Same as C<-C -1>. | |
202 | ||
203 | =item -h,--help | |
204 | ||
205 | Show usage. | |
206 | ||
207 | =item -D,--debug | |
208 | ||
209 | Invokes debugging mode. Primarily for Encode hackers. | |
210 | ||
211 | =item -S,--scheme scheme | |
212 | ||
213 | Selects which scheme is to be used for conversion. Available schemes | |
214 | are as follows: | |
215 | ||
216 | =over 4 | |
217 | ||
218 | =item from_to | |
219 | ||
220 | Uses Encode::from_to for conversion. This is the default. | |
221 | ||
222 | =item decode_encode | |
223 | ||
224 | Input strings are decode()d then encode()d. A straight two-step | |
225 | implementation. | |
226 | ||
227 | =item perlio | |
228 | ||
229 | The new perlIO layer is used. NI-S' favorite. | |
230 | ||
231 | =back | |
232 | ||
233 | Like the I<-D> option, this is also for Encode hackers. | |
234 | ||
235 | =back | |
236 | ||
237 | =head1 SEE ALSO | |
238 | ||
239 | L<iconv/1> | |
240 | L<locale/3> | |
241 | L<Encode> | |
242 | L<Encode::Supported> | |
243 | L<Encode::Alias> | |
244 | L<PerlIO> | |
245 | ||
246 | =cut |