[OpenSPARC-T2-DV] / tools / src / nas,5.n2.os.2 / lib / python / lib / python2.4 / encodings / idna.py

# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)

import stringprep, unicodedata, re, codecs

# IDNA section 3.1
dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]")

# IDNA section 5
ace_prefix = "xn--"
uace_prefix = unicode(ace_prefix, "ascii")

# This assumes query strings, so AllowUnassigned is true
def nameprep(label):
    # Map
    newlabel = []
    for c in label:
        if stringprep.in_table_b1(c):
            # Map to nothing
            continue
        newlabel.append(stringprep.map_table_b2(c))
    label = u"".join(newlabel)

    # Normalize
    label = unicodedata.normalize("NFKC", label)

    # Prohibit
    for c in label:
        if stringprep.in_table_c12(c) or \
           stringprep.in_table_c22(c) or \
           stringprep.in_table_c3(c) or \
           stringprep.in_table_c4(c) or \
           stringprep.in_table_c5(c) or \
           stringprep.in_table_c6(c) or \
           stringprep.in_table_c7(c) or \
           stringprep.in_table_c8(c) or \
           stringprep.in_table_c9(c):
            raise UnicodeError, "Invalid character %s" % repr(c)

    # Check bidi
    RandAL = map(stringprep.in_table_d1, label)
    for c in RandAL:
        if c:
            # There is a RandAL char in the string. Must perform further
            # tests:
            # 1) The characters in section 5.8 MUST be prohibited.
            # This is table C.8, which was already checked
            # 2) If a string contains any RandALCat character, the string
            # MUST NOT contain any LCat character.
            if filter(stringprep.in_table_d2, label):
                raise UnicodeError, "Violation of BIDI requirement 2"

            # 3) If a string contains any RandALCat character, a
            # RandALCat character MUST be the first character of the
            # string, and a RandALCat character MUST be the last
            # character of the string.
            if not RandAL[0] or not RandAL[-1]:
                raise UnicodeError, "Violation of BIDI requirement 3"

    return label

def ToASCII(label):
    try:
        # Step 1: try ASCII
        label = label.encode("ascii")
    except UnicodeError:
        pass
    else:
        # Skip to step 3: UseSTD3ASCIIRules is false, so
        # Skip to step 8.
        if 0 < len(label) < 64:
            return label
        raise UnicodeError, "label too long"

    # Step 2: nameprep
    label = nameprep(label)

    # Step 3: UseSTD3ASCIIRules is false
    # Step 4: try ASCII
    try:
        label = label.encode("ascii")
    except UnicodeError:
        pass
    else:
        # Skip to step 8.
        if 0 < len(label) < 64:
            return label
        raise UnicodeError, "label too long"

    # Step 5: Check ACE prefix
    if label.startswith(uace_prefix):
        raise UnicodeError, "Label starts with ACE prefix"

    # Step 6: Encode with PUNYCODE
    label = label.encode("punycode")

    # Step 7: Prepend ACE prefix
    label = ace_prefix + label

    # Step 8: Check size
    if 0 < len(label) < 64:
        return label
    raise UnicodeError, "label too long"

def ToUnicode(label):
    # Step 1: Check for ASCII
    if isinstance(label, str):
        pure_ascii = True
    else:
        try:
            label = label.encode("ascii")
            pure_ascii = True
        except UnicodeError:
            pure_ascii = False
    if not pure_ascii:
        # Step 2: Perform nameprep
        label = nameprep(label)
        # It doesn't say this, but apparently, it should be ASCII now
        try:
            label = label.encode("ascii")
        except UnicodeError:
            raise UnicodeError, "Invalid character in IDN label"
    # Step 3: Check for ACE prefix
    if not label.startswith(ace_prefix):
        return unicode(label, "ascii")

    # Step 4: Remove ACE prefix
    label1 = label[len(ace_prefix):]

    # Step 5: Decode using PUNYCODE
    result = label1.decode("punycode")

    # Step 6: Apply ToASCII
    label2 = ToASCII(result)

    # Step 7: Compare the result of step 6 with the one of step 3
    # label2 will already be in lower case.
    if label.lower() != label2:
        raise UnicodeError, ("IDNA does not round-trip", label, label2)

    # Step 8: return the result of step 5
    return result

### Codec APIs

class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):

        if errors != 'strict':
            # IDNA is quite clear that implementations must be strict
            raise UnicodeError, "unsupported error handling "+errors

        if not input:
            return "", 0

        result = []
        labels = dots.split(input)
        if labels and len(labels[-1])==0:
            trailing_dot = '.'
            del labels[-1]
        else:
            trailing_dot = ''
        for label in labels:
            result.append(ToASCII(label))
        # Join with U+002E
        return ".".join(result)+trailing_dot, len(input)

    def decode(self,input,errors='strict'):

        if errors != 'strict':
            raise UnicodeError, "Unsupported error handling "+errors

        if not input:
            return u"", 0

        # IDNA allows decoding to operate on Unicode strings, too.
        if isinstance(input, unicode):
            labels = dots.split(input)
        else:
            # Must be ASCII string
            input = str(input)
            unicode(input, "ascii")
            labels = input.split(".")

        if labels and len(labels[-1]) == 0:
            trailing_dot = u'.'
            del labels[-1]
        else:
            trailing_dot = u''

        result = []
        for label in labels:
            result.append(ToUnicode(label))

        return u".".join(result)+trailing_dot, len(input)

class StreamWriter(Codec,codecs.StreamWriter):
    pass

class StreamReader(Codec,codecs.StreamReader):
    pass

### encodings module API

def getregentry():

    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
Commit	Line	Data
86530b38 AT	1	# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
	2
	3	import stringprep, unicodedata, re, codecs
	4
	5	# IDNA section 3.1
	6	dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]")
	7
	8	# IDNA section 5
	9	ace_prefix = "xn--"
	10	uace_prefix = unicode(ace_prefix, "ascii")
	11
	12	# This assumes query strings, so AllowUnassigned is true
	13	def nameprep(label):
	14	# Map
	15	newlabel = []
	16	for c in label:
	17	if stringprep.in_table_b1(c):
	18	# Map to nothing
	19	continue
	20	newlabel.append(stringprep.map_table_b2(c))
	21	label = u"".join(newlabel)
	22
	23	# Normalize
	24	label = unicodedata.normalize("NFKC", label)
	25
	26	# Prohibit
	27	for c in label:
	28	if stringprep.in_table_c12(c) or \
	29	stringprep.in_table_c22(c) or \
	30	stringprep.in_table_c3(c) or \
	31	stringprep.in_table_c4(c) or \
	32	stringprep.in_table_c5(c) or \
	33	stringprep.in_table_c6(c) or \
	34	stringprep.in_table_c7(c) or \
	35	stringprep.in_table_c8(c) or \
	36	stringprep.in_table_c9(c):
	37	raise UnicodeError, "Invalid character %s" % repr(c)
	38
	39	# Check bidi
	40	RandAL = map(stringprep.in_table_d1, label)
	41	for c in RandAL:
	42	if c:
	43	# There is a RandAL char in the string. Must perform further
	44	# tests:
	45	# 1) The characters in section 5.8 MUST be prohibited.
	46	# This is table C.8, which was already checked
	47	# 2) If a string contains any RandALCat character, the string
	48	# MUST NOT contain any LCat character.
	49	if filter(stringprep.in_table_d2, label):
	50	raise UnicodeError, "Violation of BIDI requirement 2"
	51
	52	# 3) If a string contains any RandALCat character, a
	53	# RandALCat character MUST be the first character of the
	54	# string, and a RandALCat character MUST be the last
	55	# character of the string.
	56	if not RandAL[0] or not RandAL[-1]:
	57	raise UnicodeError, "Violation of BIDI requirement 3"
	58
	59	return label
	60
	61	def ToASCII(label):
	62	try:
	63	# Step 1: try ASCII
	64	label = label.encode("ascii")
65	except UnicodeError:
66	pass
67	else:
68	# Skip to step 3: UseSTD3ASCIIRules is false, so
69	# Skip to step 8.
70	if 0 < len(label) < 64:
71	return label
72	raise UnicodeError, "label too long"
73
74	# Step 2: nameprep
75	label = nameprep(label)
76
77	# Step 3: UseSTD3ASCIIRules is false
78	# Step 4: try ASCII
79	try:
80	label = label.encode("ascii")
81	except UnicodeError:
82	pass
83	else:
84	# Skip to step 8.
85	if 0 < len(label) < 64:
86	return label
87	raise UnicodeError, "label too long"
88
89	# Step 5: Check ACE prefix
90	if label.startswith(uace_prefix):
91	raise UnicodeError, "Label starts with ACE prefix"
92
93	# Step 6: Encode with PUNYCODE
94	label = label.encode("punycode")
95
96	# Step 7: Prepend ACE prefix
97	label = ace_prefix + label
98
99	# Step 8: Check size
100	if 0 < len(label) < 64:
101	return label
102	raise UnicodeError, "label too long"
103
104	def ToUnicode(label):
105	# Step 1: Check for ASCII
106	if isinstance(label, str):
107	pure_ascii = True
108	else:
109	try:
110	label = label.encode("ascii")
111	pure_ascii = True
112	except UnicodeError:
113	pure_ascii = False
114	if not pure_ascii:
115	# Step 2: Perform nameprep
116	label = nameprep(label)
117	# It doesn't say this, but apparently, it should be ASCII now
118	try:
119	label = label.encode("ascii")
120	except UnicodeError:
121	raise UnicodeError, "Invalid character in IDN label"
122	# Step 3: Check for ACE prefix
123	if not label.startswith(ace_prefix):
124	return unicode(label, "ascii")
125
126	# Step 4: Remove ACE prefix
127	label1 = label[len(ace_prefix):]
128
129	# Step 5: Decode using PUNYCODE
130	result = label1.decode("punycode")
131
132	# Step 6: Apply ToASCII
133	label2 = ToASCII(result)
134
135	# Step 7: Compare the result of step 6 with the one of step 3
136	# label2 will already be in lower case.
137	if label.lower() != label2:
138	raise UnicodeError, ("IDNA does not round-trip", label, label2)
139
140	# Step 8: return the result of step 5
141	return result
142
143	### Codec APIs
144
145	class Codec(codecs.Codec):
146	def encode(self,input,errors='strict'):
147
148	if errors != 'strict':
149	# IDNA is quite clear that implementations must be strict
150	raise UnicodeError, "unsupported error handling "+errors
151
152	if not input:
153	return "", 0
154
155	result = []
156	labels = dots.split(input)
157	if labels and len(labels[-1])==0:
158	trailing_dot = '.'
159	del labels[-1]
160	else:
161	trailing_dot = ''
162	for label in labels:
163	result.append(ToASCII(label))
164	# Join with U+002E
165	return ".".join(result)+trailing_dot, len(input)
166
167	def decode(self,input,errors='strict'):
168
169	if errors != 'strict':
170	raise UnicodeError, "Unsupported error handling "+errors
171
172	if not input:
173	return u"", 0
174
175	# IDNA allows decoding to operate on Unicode strings, too.
176	if isinstance(input, unicode):
177	labels = dots.split(input)
178	else:
179	# Must be ASCII string
180	input = str(input)
181	unicode(input, "ascii")
182	labels = input.split(".")
183
184	if labels and len(labels[-1]) == 0:
185	trailing_dot = u'.'
186	del labels[-1]
187	else:
188	trailing_dot = u''
189
190	result = []
191	for label in labels:
192	result.append(ToUnicode(label))
193
194	return u".".join(result)+trailing_dot, len(input)
195
196	class StreamWriter(Codec,codecs.StreamWriter):
197	pass
198
199	class StreamReader(Codec,codecs.StreamReader):
200	pass
201
202	### encodings module API
203
204	def getregentry():
205
206	return (Codec().encode,Codec().decode,StreamReader,StreamWriter)