git.subgeniuskitty.com - OpenSPARC-T2-SAM/.git/blame_incremental - sam-t2/devtools/amd64/lib/perl5/5.8.8/Encode/Unicode/UTF7.pm

... / ...

Commit	Line	Data
	1	#
	2	# $Id: UTF7.pm,v 2.1 2004/05/25 16:27:14 dankogai Exp $
	3	#
	4	package Encode::Unicode::UTF7;
	5	use strict;
	6	no warnings 'redefine';
	7	use base qw(Encode::Encoding);
	8	__PACKAGE__->Define('UTF-7');
	9	our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
	10	use MIME::Base64;
	11	use Encode;
	12
	13	#
	14	# Algorithms taken from Unicode::String by Gisle Aas
	15	#
	16
	17	our $OPTIONAL_DIRECT_CHARS = 1;
	18	my $specials = quotemeta "\'(),-./:?";
	19	$OPTIONAL_DIRECT_CHARS and
	20	$specials .= quotemeta "!\"#$%&*;<=>@[]^_`{\|}";
	21	# \s will not work because it matches U+3000 DEOGRAPHIC SPACE
	22	# We use qr/[\n\r\t\ ] instead
	23	my $re_asis = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/;
	24	my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/;
	25	my $e_utf16 = find_encoding("UTF-16BE");
	26
	27	sub needs_lines { 1 };
	28
	29	sub encode($$;$){
	30	my ($obj, $str, $chk) = @_;
	31	my $len = length($str);
	32	pos($str) = 0;
	33	my $bytes = '';
	34	while (pos($str) < $len){
	35	if ($str =~ /\G($re_asis+)/ogc){
	36	$bytes .= $1;
	37	}elsif($str =~ /\G($re_encoded+)/ogsc){
	38	if ($1 eq "+"){
	39	$bytes .= "+-";
	40	}else{
	41	my $s = $1;
	42	my $base64 = encode_base64($e_utf16->encode($s), '');
	43	$base64 =~ s/=+$//;
	44	$bytes .= "+$base64-";
	45	}
	46	}else{
	47	die "This should not happen! (pos=" . pos($str) . ")";
	48	}
	49	}
	50	$_[1] = '' if $chk;
	51	return $bytes;
	52	}
	53
	54	sub decode{
	55	my ($obj, $bytes, $chk) = @_;
	56	my $len = length($bytes);
	57	my $str = "";
	58	while (pos($bytes) < $len) {
	59	if ($bytes =~ /\G([^+]+)/ogc) {
	60	$str .= $1;
	61	}elsif($bytes =~ /\G\+-/ogc) {
	62	$str .= "+";
	63	}elsif($bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc) {
	64	my $base64 = $1;
	65	my $pad = length($base64) % 4;
	66	$base64 .= "=" x (4 - $pad) if $pad;
	67	$str .= $e_utf16->decode(decode_base64($base64));
	68	}elsif($bytes =~ /\G\+/ogc) {
	69	$^W and warn "Bad UTF7 data escape";
	70	$str .= "+";
	71	}else{
	72	die "This should not happen " . pos($bytes);
	73	}
	74	}
	75	$_[1] = '' if $chk;
	76	return $str;
	77	}
	78	1;
	79	__END__
	80
	81	=head1 NAME
	82
	83	Encode::Unicode::UTF7 -- UTF-7 encoding
	84
	85	=head1 SYNOPSIS
	86
	87	use Encode qw/encode decode/;
	88	$utf7 = encode("UTF-7", $utf8);
	89	$utf8 = decode("UTF-7", $ucs2);
	90
	91	=head1 ABSTRACT
	92
	93	This module implements UTF-7 encoding documented in RFC 2152. UTF-7,
	94	as its name suggests, is a 7-bit re-encoded version of UTF-16BE. It
	95	is designed to be MTA-safe and expected to be a standard way to
	96	exchange Unicoded mails via mails. But with the advent of UTF-8 and
	97	8-bit compliant MTAs, UTF-7 is hardly ever used.
	98
	99	UTF-7 was not supported by Encode until version 1.95 because of that.
	100	But Unicode::String, a module by Gisle Aas which adds Unicode supports
	101	to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added
	102	so Encode can supersede Unicode::String 100%.
	103
	104	=head1 In Practice
	105
	106	When you want to encode Unicode for mails and web pages, however, do
	107	not use UTF-7 unless you are sure your recipients and readers can
	108	handle it. Very few MUAs and WWW Browsers support these days (only
	109	Mozilla seems to support one). For general cases, use UTF-8 for
	110	message body and MIME-Header for header instead.
	111
	112	=head1 SEE ALSO
	113
	114	L<Encode>, L<Encode::Unicode>, L<Unicode::String>
	115
	116	RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt>
	117
	118	=cut