package Encode
::MIME
::Header
;
our $VERSION = do { my @r = (q
$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x
$#r, @r };
use Encode
qw(find_encoding encode_utf8 decode_utf8);
decode_b
=> '1', # decodes 'B' encoding ?
decode_q
=> '1', # decodes 'Q' encoding ?
encode
=> 'B', # encode with 'B' or 'Q' ?
bpl
=> 75, # bytes per line
$Encode::Encoding
{'MIME-Header'} =
$Encode::Encoding
{'MIME-B'} =
$Encode::Encoding
{'MIME-Q'} =
use base
qw(Encode::Encoding);
my ($obj, $str, $chk) = @_;
# zap spaces between encoded words
$str =~ s/\?=\s+=\?/\?==\?/gos;
# multi-line header to single line
$str =~ s/(:?\r|\n|\r\n)[ \t]//gos;
([0-9A
-Za
-z\
-_
]+) # charset (encoding)
(.*?
) # Base64-encodede contents
$obj->{decode_b
} or croak
qq(MIME
"B" unsupported
);
$obj->{decode_q
} or croak
qq(MIME
"Q" unsupported
);
croak
qq(MIME
"$2" encoding is nonexistent
!);
my $d = find_encoding
($enc) or croak
qq(Unknown encoding
"$enc");
my $db64 = decode_base64
(shift);
return $d->name eq 'utf8' ?
Encode
::decode_utf8
($db64) : $d->decode($db64, Encode
::FB_PERLQQ
);
my $d = find_encoding
($enc) or croak
qq(Unknown encoding
"$enc");
$q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
return $d->name eq 'utf8' ?
Encode
::decode_utf8
($q) : $d->decode($q, Encode
::FB_PERLQQ
);
unpack("C*", qq{()<>@
,;:\"\'/[]?
.=}));
(?
:[0-9A
-Za
-z\
-_
]+) # charset (encoding)
\?(?
:[QqBb
])\? # delimiter
(?
:.*?
) # Base64-encodede contents
my $re_especials = qr{$re_encoded_word|$especials}xo;
my ($obj, $str, $chk) = @_;
for my $line (split /\r|\n|\r\n/o, $str){
for my $word (split /($re_especials)/o, $line){
if ($word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o){
push @word, $obj->_encode($word);
if (bytes
::length($subline) + bytes
::length($word) > $obj->{bpl
}){
$subline and push @subline, $subline;
push @line, join("\n " => @subline);
return join("\n", @line);
use constant HEAD
=> '=?UTF-8?';
use constant TAIL
=> '?=';
use constant SINGLE
=> { B
=> \
&_encode_b
, Q
=> \
&_encode_q
, };
my $llen = ($o->{bpl
} - length(HEAD
) - 2 - length(TAIL
));
# to coerce a floating-point arithmetics, the following contains
# .0 in numbers -- dankogai
$llen *= $enc eq 'B' ?
3.0/4.0 : 1.0/3.0;
while(length(my $chr = substr($str, 0, 1, ''))){
if (bytes
::length($chunk) + bytes
::length($chr) > $llen){
push @result, SINGLE
->{$enc}($chunk);
$chunk and push @result, SINGLE
->{$enc}($chunk);
HEAD
. 'B?' . encode_base64
(encode_utf8
(shift), '') . TAIL
;
join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
return decode_utf8
(HEAD
. 'Q?' . $chunk . TAIL
);
Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
use Encode qw/encode decode/;
$utf8 = decode('MIME-Header', $header);
$header = encode('MIME-Header', $utf8);
This module implements RFC 2047 Mime Header Encoding. There are 3
variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
difference is described below
----------------------------------------------
MIME-Header Both B and Q =?UTF-8?B?....?=
MIME-B B only; Q croaks =?UTF-8?B?....?=
MIME-Q Q only; B croaks =?UTF-8?Q?....?=
When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
is extracted and decoded for I<X> encoding (B for Base64, Q for
Quoted-Printable). Then the decoded chunk is fed to
decode(I<encoding>). So long as I<encoding> is supported by Encode,
any source encoding is fine.
When you encode, it just encodes UTF-8 string with I<X> encoding then
quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
encode are left as is and long lines are folded within 76 bytes per
It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
and =?ISO-8859-1?= but that makes the implementation too complicated.
These days major mail agents all support =?UTF-8? so I think it is
Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by
Makamaka. Thre are still too many MUAs especially cellular phone
handsets which does not grok UTF-8.
RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other