Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | #!/import/bw/tools/local/perl-5.8.0/bin/perl |
2 | eval 'exec /import/bw/tools/local/perl-5.8.0/bin/perl -S $0 ${1+"$@"}' | |
3 | if $running_under_some_shell; | |
4 | #!./perl | |
5 | # $Id: piconv,v 1.25 2002/06/01 18:07:49 dankogai Exp dankogai $ | |
6 | # | |
7 | use 5.8.0; | |
8 | use strict; | |
9 | use Encode ; | |
10 | use Encode::Alias; | |
11 | my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio); | |
12 | ||
13 | use Getopt::Std; | |
14 | ||
15 | my %Opt; getopts("pcC:hDS:lf:t:s:", \%Opt); | |
16 | $Opt{h} and help(); | |
17 | $Opt{l} and list_encodings(); | |
18 | my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG}; | |
19 | $Opt{f} || $Opt{t} || help(); | |
20 | my $from = $Opt{f} || $locale or help("from_encoding unspecified"); | |
21 | my $to = $Opt{t} || $locale or help("to_encoding unspecified"); | |
22 | $Opt{s} and Encode::from_to($Opt{s}, $from, $to) and print $Opt{s} and exit; | |
23 | my $scheme = exists $Scheme{$Opt{S}} ? $Opt{S} : 'from_to'; | |
24 | $Opt{C} ||= $Opt{c}; | |
25 | $Opt{p} and $Opt{C} = Encode::FB_PERLQQ; | |
26 | ||
27 | if ($Opt{D}){ | |
28 | my $cfrom = Encode->getEncoding($from)->name; | |
29 | my $cto = Encode->getEncoding($to)->name; | |
30 | print <<"EOT"; | |
31 | Scheme: $scheme | |
32 | From: $from => $cfrom | |
33 | To: $to => $cto | |
34 | EOT | |
35 | } | |
36 | ||
37 | # default | |
38 | if ($scheme eq 'from_to'){ | |
39 | while(<>){ | |
40 | Encode::from_to($_, $from, $to, $Opt{C}); print; | |
41 | }; | |
42 | # step-by-step | |
43 | }elsif ($scheme eq 'decode_encode'){ | |
44 | while(<>){ | |
45 | my $decoded = decode($from, $_, $Opt{C}); | |
46 | my $encoded = encode($to, $decoded); | |
47 | print $encoded; | |
48 | }; | |
49 | # NI-S favorite | |
50 | }elsif ($scheme eq 'perlio'){ | |
51 | binmode(STDIN, ":encoding($from)"); | |
52 | binmode(STDOUT, ":encoding($to)"); | |
53 | while(<>){ print; } | |
54 | }else{ # won't reach | |
55 | die "unknown scheme: $scheme"; | |
56 | } | |
57 | ||
58 | sub list_encodings{ | |
59 | print join("\n", Encode->encodings(":all")), "\n"; | |
60 | exit; | |
61 | } | |
62 | ||
63 | sub help{ | |
64 | my $message = shift; | |
65 | use File::Basename; | |
66 | my $name = basename($0); | |
67 | $message and print STDERR "$name error: $message\n"; | |
68 | print STDERR <<"EOT"; | |
69 | $name [-f from_encoding] [-t to_encoding] [-s string] [files...] | |
70 | $name -l | |
71 | -l lists all available encodings (the canonical names, many aliases exist) | |
72 | -f from_encoding When omitted, the current locale will be used. | |
73 | -t to_encoding When omitted, the current locale will be used. | |
74 | -s string "string" will be converted instead of STDIN. | |
75 | EOT | |
76 | exit; | |
77 | } | |
78 | ||
79 | __END__ | |
80 | ||
81 | =head1 NAME | |
82 | ||
83 | piconv -- iconv(1), reinvented in perl | |
84 | ||
85 | =head1 SYNOPSIS | |
86 | ||
87 | piconv [-f from_encoding] [-t to_encoding] [-s string] [files...] | |
88 | piconv -l | |
89 | ||
90 | =head1 DESCRIPTION | |
91 | ||
92 | B<piconv> is perl version of B<iconv>, a character encoding converter | |
93 | widely available for various Unixen today. This script was primarily | |
94 | a technology demonstrator for Perl 5.8.0, but you can use piconv in the | |
95 | place of iconv for virtually any case. | |
96 | ||
97 | piconv converts the character encoding of either STDIN or files | |
98 | specified in the argument and prints out to STDOUT. | |
99 | ||
100 | Here is the list of options. | |
101 | ||
102 | =over 4 | |
103 | ||
104 | =item -f from_encoding | |
105 | ||
106 | Specifies the encoding you are converting from. Unlike B<iconv>, | |
107 | this option can be omitted. In such cases, the current locale is used. | |
108 | ||
109 | =item -t to_encoding | |
110 | ||
111 | Specifies the encoding you are converting to. Unlike B<iconv>, | |
112 | this option can be omitted. In such cases, the current locale is used. | |
113 | ||
114 | Therefore, when both -f and -t are omitted, B<piconv> just acts | |
115 | like B<cat>. | |
116 | ||
117 | =item -s I<string> | |
118 | ||
119 | uses I<string> instead of file for the source of text. Same as B<iconv>. | |
120 | ||
121 | =item -l | |
122 | ||
123 | Lists all available encodings, one per line, in case-insensitive | |
124 | order. Note that only the canonical names are listed; many aliases | |
125 | exist. For example, the names are case-insensitive, and many standard | |
126 | and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850" | |
127 | instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported> | |
128 | for a full discussion. | |
129 | ||
130 | =item -C I<N> | |
131 | ||
132 | Check the validity of the stream if I<N> = 1. When I<N> = -1, something | |
133 | interesting happens when it encounters an invalid character. | |
134 | ||
135 | =item -c | |
136 | ||
137 | Same as C<-C 1>. | |
138 | ||
139 | =item -p | |
140 | ||
141 | Same as C<-C -1>. | |
142 | ||
143 | =item -h | |
144 | ||
145 | Show usage. | |
146 | ||
147 | =item -D | |
148 | ||
149 | Invokes debugging mode. Primarily for Encode hackers. | |
150 | ||
151 | =item -S scheme | |
152 | ||
153 | Selects which scheme is to be used for conversion. Available schemes | |
154 | are as follows: | |
155 | ||
156 | =over 4 | |
157 | ||
158 | =item from_to | |
159 | ||
160 | Uses Encode::from_to for conversion. This is the default. | |
161 | ||
162 | =item decode_encode | |
163 | ||
164 | Input strings are decode()d then encode()d. A straight two-step | |
165 | implementation. | |
166 | ||
167 | =item perlio | |
168 | ||
169 | The new perlIO layer is used. NI-S' favorite. | |
170 | ||
171 | =back | |
172 | ||
173 | Like the I<-D> option, this is also for Encode hackers. | |
174 | ||
175 | =back | |
176 | ||
177 | =head1 SEE ALSO | |
178 | ||
179 | L<iconv(1)> | |
180 | L<locale(3)> | |
181 | L<Encode> | |
182 | L<Encode::Supported> | |
183 | L<Encode::Alias> | |
184 | L<PerlIO> | |
185 | ||
186 | =cut |