# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
import stringprep
, unicodedata
, re
, codecs
dots
= re
.compile(u
"[\u002E\u3002\uFF0E\uFF61]")
uace_prefix
= unicode(ace_prefix
, "ascii")
# This assumes query strings, so AllowUnassigned is true
if stringprep
.in_table_b1(c
):
newlabel
.append(stringprep
.map_table_b2(c
))
label
= u
"".join(newlabel
)
label
= unicodedata
.normalize("NFKC", label
)
if stringprep
.in_table_c12(c
) or \
stringprep
.in_table_c22(c
) or \
stringprep
.in_table_c3(c
) or \
stringprep
.in_table_c4(c
) or \
stringprep
.in_table_c5(c
) or \
stringprep
.in_table_c6(c
) or \
stringprep
.in_table_c7(c
) or \
stringprep
.in_table_c8(c
) or \
stringprep
.in_table_c9(c
):
raise UnicodeError, "Invalid character %s" % repr(c
)
RandAL
= map(stringprep
.in_table_d1
, label
)
# There is a RandAL char in the string. Must perform further
# 1) The characters in section 5.8 MUST be prohibited.
# This is table C.8, which was already checked
# 2) If a string contains any RandALCat character, the string
# MUST NOT contain any LCat character.
if filter(stringprep
.in_table_d2
, label
):
raise UnicodeError, "Violation of BIDI requirement 2"
# 3) If a string contains any RandALCat character, a
# RandALCat character MUST be the first character of the
# string, and a RandALCat character MUST be the last
# character of the string.
if not RandAL
[0] or not RandAL
[-1]:
raise UnicodeError, "Violation of BIDI requirement 3"
label
= label
.encode("ascii")
# Skip to step 3: UseSTD3ASCIIRules is false, so
raise UnicodeError, "label too long"
# Step 3: UseSTD3ASCIIRules is false
label
= label
.encode("ascii")
raise UnicodeError, "label too long"
# Step 5: Check ACE prefix
if label
.startswith(uace_prefix
):
raise UnicodeError, "Label starts with ACE prefix"
# Step 6: Encode with PUNYCODE
label
= label
.encode("punycode")
# Step 7: Prepend ACE prefix
label
= ace_prefix
+ label
raise UnicodeError, "label too long"
# Step 1: Check for ASCII
if isinstance(label
, str):
label
= label
.encode("ascii")
# Step 2: Perform nameprep
# It doesn't say this, but apparently, it should be ASCII now
label
= label
.encode("ascii")
raise UnicodeError, "Invalid character in IDN label"
# Step 3: Check for ACE prefix
if not label
.startswith(ace_prefix
):
return unicode(label
, "ascii")
# Step 4: Remove ACE prefix
label1
= label
[len(ace_prefix
):]
# Step 5: Decode using PUNYCODE
result
= label1
.decode("punycode")
# Step 7: Compare the result of step 6 with the one of step 3
# label2 will already be in lower case.
if label
.lower() != label2
:
raise UnicodeError, ("IDNA does not round-trip", label
, label2
)
# Step 8: return the result of step 5
class Codec(codecs
.Codec
):
def encode(self
,input,errors
='strict'):
# IDNA is quite clear that implementations must be strict
raise UnicodeError, "unsupported error handling "+errors
labels
= dots
.split(input)
if labels
and len(labels
[-1])==0:
result
.append(ToASCII(label
))
return ".".join(result
)+trailing_dot
, len(input)
def decode(self
,input,errors
='strict'):
raise UnicodeError, "Unsupported error handling "+errors
# IDNA allows decoding to operate on Unicode strings, too.
if isinstance(input, unicode):
labels
= dots
.split(input)
labels
= input.split(".")
if labels
and len(labels
[-1]) == 0:
result
.append(ToUnicode(label
))
return u
".".join(result
)+trailing_dot
, len(input)
class StreamWriter(Codec
,codecs
.StreamWriter
):
class StreamReader(Codec
,codecs
.StreamReader
):
return (Codec().encode
,Codec().decode
,StreamReader
,StreamWriter
)