Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | """ Test script for the Unicode implementation. |
2 | ||
3 | Written by Bill Tutt. | |
4 | Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com) | |
5 | ||
6 | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. | |
7 | ||
8 | """#" | |
9 | ||
10 | import unittest | |
11 | ||
12 | from test import test_support | |
13 | ||
14 | class UnicodeNamesTest(unittest.TestCase): | |
15 | ||
16 | def checkletter(self, name, code): | |
17 | # Helper that put all \N escapes inside eval'd raw strings, | |
18 | # to make sure this script runs even if the compiler | |
19 | # chokes on \N escapes | |
20 | res = eval(ur'u"\N{%s}"' % name) | |
21 | self.assertEqual(res, code) | |
22 | return res | |
23 | ||
24 | def test_general(self): | |
25 | # General and case insensitivity test: | |
26 | chars = [ | |
27 | "LATIN CAPITAL LETTER T", | |
28 | "LATIN SMALL LETTER H", | |
29 | "LATIN SMALL LETTER E", | |
30 | "SPACE", | |
31 | "LATIN SMALL LETTER R", | |
32 | "LATIN CAPITAL LETTER E", | |
33 | "LATIN SMALL LETTER D", | |
34 | "SPACE", | |
35 | "LATIN SMALL LETTER f", | |
36 | "LATIN CAPITAL LeTtEr o", | |
37 | "LATIN SMaLl LETTER x", | |
38 | "SPACE", | |
39 | "LATIN SMALL LETTER A", | |
40 | "LATIN SMALL LETTER T", | |
41 | "LATIN SMALL LETTER E", | |
42 | "SPACE", | |
43 | "LATIN SMALL LETTER T", | |
44 | "LATIN SMALL LETTER H", | |
45 | "LATIN SMALL LETTER E", | |
46 | "SpAcE", | |
47 | "LATIN SMALL LETTER S", | |
48 | "LATIN SMALL LETTER H", | |
49 | "LATIN small LETTER e", | |
50 | "LATIN small LETTER e", | |
51 | "LATIN SMALL LETTER P", | |
52 | "FULL STOP" | |
53 | ] | |
54 | string = u"The rEd fOx ate the sheep." | |
55 | ||
56 | self.assertEqual( | |
57 | u"".join([self.checkletter(*args) for args in zip(chars, string)]), | |
58 | string | |
59 | ) | |
60 | ||
61 | def test_ascii_letters(self): | |
62 | import unicodedata | |
63 | ||
64 | for char in "".join(map(chr, xrange(ord("a"), ord("z")))): | |
65 | name = "LATIN SMALL LETTER %s" % char.upper() | |
66 | code = unicodedata.lookup(name) | |
67 | self.assertEqual(unicodedata.name(code), name) | |
68 | ||
69 | def test_hangul_syllables(self): | |
70 | self.checkletter("HANGUL SYLLABLE GA", u"\uac00") | |
71 | self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8") | |
72 | self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0") | |
73 | self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8") | |
74 | self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0") | |
75 | self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88") | |
76 | self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370") | |
77 | self.checkletter("HANGUL SYLLABLE YI", u"\uc758") | |
78 | self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40") | |
79 | self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28") | |
80 | self.checkletter("HANGUL SYLLABLE PAN", u"\ud310") | |
81 | self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8") | |
82 | self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3") | |
83 | ||
84 | import unicodedata | |
85 | self.assertRaises(ValueError, unicodedata.name, u"\ud7a4") | |
86 | ||
87 | def test_cjk_unified_ideographs(self): | |
88 | self.checkletter("CJK UNIFIED IDEOGRAPH-3400", u"\u3400") | |
89 | self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", u"\u4db5") | |
90 | self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", u"\u4e00") | |
91 | self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", u"\u9fa5") | |
92 | self.checkletter("CJK UNIFIED IDEOGRAPH-20000", u"\U00020000") | |
93 | self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", u"\U0002a6d6") | |
94 | ||
95 | def test_bmp_characters(self): | |
96 | import unicodedata | |
97 | count = 0 | |
98 | for code in xrange(0x10000): | |
99 | char = unichr(code) | |
100 | name = unicodedata.name(char, None) | |
101 | if name is not None: | |
102 | self.assertEqual(unicodedata.lookup(name), char) | |
103 | count += 1 | |
104 | ||
105 | def test_misc_symbols(self): | |
106 | self.checkletter("PILCROW SIGN", u"\u00b6") | |
107 | self.checkletter("REPLACEMENT CHARACTER", u"\uFFFD") | |
108 | self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", u"\uFF9F") | |
109 | self.checkletter("FULLWIDTH LATIN SMALL LETTER A", u"\uFF41") | |
110 | ||
111 | def test_errors(self): | |
112 | import unicodedata | |
113 | self.assertRaises(TypeError, unicodedata.name) | |
114 | self.assertRaises(TypeError, unicodedata.name, u'xx') | |
115 | self.assertRaises(TypeError, unicodedata.lookup) | |
116 | self.assertRaises(KeyError, unicodedata.lookup, u'unknown') | |
117 | ||
118 | def test_strict_eror_handling(self): | |
119 | # bogus character name | |
120 | self.assertRaises( | |
121 | UnicodeError, | |
122 | unicode, "\\N{blah}", 'unicode-escape', 'strict' | |
123 | ) | |
124 | # long bogus character name | |
125 | self.assertRaises( | |
126 | UnicodeError, | |
127 | unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict' | |
128 | ) | |
129 | # missing closing brace | |
130 | self.assertRaises( | |
131 | UnicodeError, | |
132 | unicode, "\\N{SPACE", 'unicode-escape', 'strict' | |
133 | ) | |
134 | # missing opening brace | |
135 | self.assertRaises( | |
136 | UnicodeError, | |
137 | unicode, "\\NSPACE", 'unicode-escape', 'strict' | |
138 | ) | |
139 | ||
140 | def test_main(): | |
141 | test_support.run_unittest(UnicodeNamesTest) | |
142 | ||
143 | if __name__ == "__main__": | |
144 | test_main() |