# -*- coding: iso-8859-1 -*-
""" Codec for the Punicode encoding, as specified in RFC 3492
Written by Martin v. Löwis.
##################### Encoding #####################################
"""3.1 Basic code point segregation"""
extended
= extended
.keys()
return "".join(base
).encode("ascii"),extended
def selective_len(str, max):
"""Return the length of str, considering only characters below max."""
def selective_find(str, char
, index
, pos
):
"""Return a pair (index, pos), indicating the next occurrence of
char in str. index is the position of the character considering
only ordinals up to and including char, and pos is the position in
the full string. index/pos is the starting position in the full
def insertion_unsort(str, extended
):
"""3.2 Insertion unsort coding"""
curlen
= selective_len(str, char
)
delta
= (curlen
+1) * (char
- oldchar
)
index
,pos
= selective_find(str,c
,index
,pos
)
delta
+= index
- oldindex
# Punycode parameters: tmin = 1, tmax = 26, base = 36
res
= 36 * (j
+ 1) - bias
digits
= "abcdefghijklmnopqrstuvwxyz0123456789"
def generate_generalized_integer(N
, bias
):
"""3.3 Generalized variable-length integers"""
result
.append(digits
[t
+ ((N
- t
) % (36 - t
))])
def adapt(delta
, first
, numchars
):
delta
+= delta
// numchars
# ((base - tmin) * tmax) // 2 == 455
delta
= delta
// 35 # base - tmin
bias
= divisions
+ (36 * delta
// (delta
+ 38))
def generate_integers(baselen
, deltas
):
"""3.4 Bias adaptation"""
# Punycode parameters: initial bias = 72, damp = 700, skew = 38
for points
, delta
in enumerate(deltas
):
s
= generate_generalized_integer(delta
, bias
)
bias
= adapt(delta
, points
==0, baselen
+points
+1)
def punycode_encode(text
):
base
, extended
= segregate(text
)
base
= base
.encode("ascii")
deltas
= insertion_unsort(text
, extended
)
extended
= generate_integers(len(base
), deltas
)
return base
+ "-" + extended
##################### Decoding #####################################
def decode_generalized_number(extended
, extpos
, bias
, errors
):
"""3.3 Generalized variable-length integers"""
char
= ord(extended
[extpos
])
raise UnicodeError, "incomplete punicode string"
if 0x41 <= char
<= 0x5A: # A-Z
elif 0x30 <= char
<= 0x39:
digit
= char
- 22 # 0x30-26
raise UnicodeError("Invalid extended code point '%s'"
def insertion_sort(base
, extended
, errors
):
"""3.2 Insertion unsort coding"""
while extpos
< len(extended
):
newpos
, delta
= decode_generalized_number(extended
, extpos
,
# There was an error in decoding. We can't continue because
# synchronization is lost.
char
+= pos
// (len(base
) + 1)
raise UnicodeError, ("Invalid character U+%x" % char
)
pos
= pos
% (len(base
) + 1)
base
= base
[:pos
] + unichr(char
) + base
[pos
:]
bias
= adapt(delta
, (extpos
== 0), len(base
))
def punycode_decode(text
, errors
):
base
= unicode(base
, "ascii", errors
)
extended
= extended
.upper()
return insertion_sort(base
, extended
, errors
)
class Codec(codecs
.Codec
):
def encode(self
,input,errors
='strict'):
res
= punycode_encode(input)
def decode(self
,input,errors
='strict'):
if errors
not in ('strict', 'replace', 'ignore'):
raise UnicodeError, "Unsupported error handling "+errors
res
= punycode_decode(input, errors
)
class StreamWriter(Codec
,codecs
.StreamWriter
):
class StreamReader(Codec
,codecs
.StreamReader
):
return (Codec().encode
,Codec().decode
,StreamReader
,StreamWriter
)