Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | # |
2 | # $Id: UTF7.pm,v 2.1 2004/05/25 16:27:14 dankogai Exp $ | |
3 | # | |
4 | package Encode::Unicode::UTF7; | |
5 | use strict; | |
6 | no warnings 'redefine'; | |
7 | use base qw(Encode::Encoding); | |
8 | __PACKAGE__->Define('UTF-7'); | |
9 | our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; | |
10 | use MIME::Base64; | |
11 | use Encode; | |
12 | ||
13 | # | |
14 | # Algorithms taken from Unicode::String by Gisle Aas | |
15 | # | |
16 | ||
17 | our $OPTIONAL_DIRECT_CHARS = 1; | |
18 | my $specials = quotemeta "\'(),-./:?"; | |
19 | $OPTIONAL_DIRECT_CHARS and | |
20 | $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}"; | |
21 | # \s will not work because it matches U+3000 DEOGRAPHIC SPACE | |
22 | # We use qr/[\n\r\t\ ] instead | |
23 | my $re_asis = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/; | |
24 | my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/; | |
25 | my $e_utf16 = find_encoding("UTF-16BE"); | |
26 | ||
27 | sub needs_lines { 1 }; | |
28 | ||
29 | sub encode($$;$){ | |
30 | my ($obj, $str, $chk) = @_; | |
31 | my $len = length($str); | |
32 | pos($str) = 0; | |
33 | my $bytes = ''; | |
34 | while (pos($str) < $len){ | |
35 | if ($str =~ /\G($re_asis+)/ogc){ | |
36 | $bytes .= $1; | |
37 | }elsif($str =~ /\G($re_encoded+)/ogsc){ | |
38 | if ($1 eq "+"){ | |
39 | $bytes .= "+-"; | |
40 | }else{ | |
41 | my $s = $1; | |
42 | my $base64 = encode_base64($e_utf16->encode($s), ''); | |
43 | $base64 =~ s/=+$//; | |
44 | $bytes .= "+$base64-"; | |
45 | } | |
46 | }else{ | |
47 | die "This should not happen! (pos=" . pos($str) . ")"; | |
48 | } | |
49 | } | |
50 | $_[1] = '' if $chk; | |
51 | return $bytes; | |
52 | } | |
53 | ||
54 | sub decode{ | |
55 | my ($obj, $bytes, $chk) = @_; | |
56 | my $len = length($bytes); | |
57 | my $str = ""; | |
58 | while (pos($bytes) < $len) { | |
59 | if ($bytes =~ /\G([^+]+)/ogc) { | |
60 | $str .= $1; | |
61 | }elsif($bytes =~ /\G\+-/ogc) { | |
62 | $str .= "+"; | |
63 | }elsif($bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc) { | |
64 | my $base64 = $1; | |
65 | my $pad = length($base64) % 4; | |
66 | $base64 .= "=" x (4 - $pad) if $pad; | |
67 | $str .= $e_utf16->decode(decode_base64($base64)); | |
68 | }elsif($bytes =~ /\G\+/ogc) { | |
69 | $^W and warn "Bad UTF7 data escape"; | |
70 | $str .= "+"; | |
71 | }else{ | |
72 | die "This should not happen " . pos($bytes); | |
73 | } | |
74 | } | |
75 | $_[1] = '' if $chk; | |
76 | return $str; | |
77 | } | |
78 | 1; | |
79 | __END__ | |
80 | ||
81 | =head1 NAME | |
82 | ||
83 | Encode::Unicode::UTF7 -- UTF-7 encoding | |
84 | ||
85 | =head1 SYNOPSIS | |
86 | ||
87 | use Encode qw/encode decode/; | |
88 | $utf7 = encode("UTF-7", $utf8); | |
89 | $utf8 = decode("UTF-7", $ucs2); | |
90 | ||
91 | =head1 ABSTRACT | |
92 | ||
93 | This module implements UTF-7 encoding documented in RFC 2152. UTF-7, | |
94 | as its name suggests, is a 7-bit re-encoded version of UTF-16BE. It | |
95 | is designed to be MTA-safe and expected to be a standard way to | |
96 | exchange Unicoded mails via mails. But with the advent of UTF-8 and | |
97 | 8-bit compliant MTAs, UTF-7 is hardly ever used. | |
98 | ||
99 | UTF-7 was not supported by Encode until version 1.95 because of that. | |
100 | But Unicode::String, a module by Gisle Aas which adds Unicode supports | |
101 | to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added | |
102 | so Encode can supersede Unicode::String 100%. | |
103 | ||
104 | =head1 In Practice | |
105 | ||
106 | When you want to encode Unicode for mails and web pages, however, do | |
107 | not use UTF-7 unless you are sure your recipients and readers can | |
108 | handle it. Very few MUAs and WWW Browsers support these days (only | |
109 | Mozilla seems to support one). For general cases, use UTF-8 for | |
110 | message body and MIME-Header for header instead. | |
111 | ||
112 | =head1 SEE ALSO | |
113 | ||
114 | L<Encode>, L<Encode::Unicode>, L<Unicode::String> | |
115 | ||
116 | RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt> | |
117 | ||
118 | =cut |