Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | """ Test script for the unicodedata module. |
2 | ||
3 | Written by Marc-Andre Lemburg (mal@lemburg.com). | |
4 | ||
5 | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. | |
6 | ||
7 | """#" | |
8 | import unittest, test.test_support | |
9 | import sha | |
10 | ||
11 | encoding = 'utf-8' | |
12 | ||
13 | ||
14 | ### Run tests | |
15 | ||
16 | class UnicodeMethodsTest(unittest.TestCase): | |
17 | ||
18 | # update this, if the database changes | |
19 | expectedchecksum = 'a37276dc2c158bef6dfd908ad34525c97180fad9' | |
20 | ||
21 | def test_method_checksum(self): | |
22 | h = sha.sha() | |
23 | for i in range(65536): | |
24 | char = unichr(i) | |
25 | data = [ | |
26 | # Predicates (single char) | |
27 | u"01"[char.isalnum()], | |
28 | u"01"[char.isalpha()], | |
29 | u"01"[char.isdecimal()], | |
30 | u"01"[char.isdigit()], | |
31 | u"01"[char.islower()], | |
32 | u"01"[char.isnumeric()], | |
33 | u"01"[char.isspace()], | |
34 | u"01"[char.istitle()], | |
35 | u"01"[char.isupper()], | |
36 | ||
37 | # Predicates (multiple chars) | |
38 | u"01"[(char + u'abc').isalnum()], | |
39 | u"01"[(char + u'abc').isalpha()], | |
40 | u"01"[(char + u'123').isdecimal()], | |
41 | u"01"[(char + u'123').isdigit()], | |
42 | u"01"[(char + u'abc').islower()], | |
43 | u"01"[(char + u'123').isnumeric()], | |
44 | u"01"[(char + u' \t').isspace()], | |
45 | u"01"[(char + u'abc').istitle()], | |
46 | u"01"[(char + u'ABC').isupper()], | |
47 | ||
48 | # Mappings (single char) | |
49 | char.lower(), | |
50 | char.upper(), | |
51 | char.title(), | |
52 | ||
53 | # Mappings (multiple chars) | |
54 | (char + u'abc').lower(), | |
55 | (char + u'ABC').upper(), | |
56 | (char + u'abc').title(), | |
57 | (char + u'ABC').title(), | |
58 | ||
59 | ] | |
60 | h.update(u''.join(data).encode(encoding)) | |
61 | result = h.hexdigest() | |
62 | self.assertEqual(result, self.expectedchecksum) | |
63 | ||
64 | class UnicodeDatabaseTest(unittest.TestCase): | |
65 | ||
66 | def setUp(self): | |
67 | # In case unicodedata is not available, this will raise an ImportError, | |
68 | # but the other test cases will still be run | |
69 | import unicodedata | |
70 | self.db = unicodedata | |
71 | ||
72 | def tearDown(self): | |
73 | del self.db | |
74 | ||
75 | class UnicodeFunctionsTest(UnicodeDatabaseTest): | |
76 | ||
77 | # update this, if the database changes | |
78 | expectedchecksum = 'cfe20a967a450ebc82ca68c3e4eed344164e11af' | |
79 | ||
80 | def test_function_checksum(self): | |
81 | data = [] | |
82 | h = sha.sha() | |
83 | ||
84 | for i in range(0x10000): | |
85 | char = unichr(i) | |
86 | data = [ | |
87 | # Properties | |
88 | str(self.db.digit(char, -1)), | |
89 | str(self.db.numeric(char, -1)), | |
90 | str(self.db.decimal(char, -1)), | |
91 | self.db.category(char), | |
92 | self.db.bidirectional(char), | |
93 | self.db.decomposition(char), | |
94 | str(self.db.mirrored(char)), | |
95 | str(self.db.combining(char)), | |
96 | ] | |
97 | h.update(''.join(data)) | |
98 | result = h.hexdigest() | |
99 | self.assertEqual(result, self.expectedchecksum) | |
100 | ||
101 | def test_digit(self): | |
102 | self.assertEqual(self.db.digit(u'A', None), None) | |
103 | self.assertEqual(self.db.digit(u'9'), 9) | |
104 | self.assertEqual(self.db.digit(u'\u215b', None), None) | |
105 | self.assertEqual(self.db.digit(u'\u2468'), 9) | |
106 | ||
107 | self.assertRaises(TypeError, self.db.digit) | |
108 | self.assertRaises(TypeError, self.db.digit, u'xx') | |
109 | self.assertRaises(ValueError, self.db.digit, u'x') | |
110 | ||
111 | def test_numeric(self): | |
112 | self.assertEqual(self.db.numeric(u'A',None), None) | |
113 | self.assertEqual(self.db.numeric(u'9'), 9) | |
114 | self.assertEqual(self.db.numeric(u'\u215b'), 0.125) | |
115 | self.assertEqual(self.db.numeric(u'\u2468'), 9.0) | |
116 | ||
117 | self.assertRaises(TypeError, self.db.numeric) | |
118 | self.assertRaises(TypeError, self.db.numeric, u'xx') | |
119 | self.assertRaises(ValueError, self.db.numeric, u'x') | |
120 | ||
121 | def test_decimal(self): | |
122 | self.assertEqual(self.db.decimal(u'A',None), None) | |
123 | self.assertEqual(self.db.decimal(u'9'), 9) | |
124 | self.assertEqual(self.db.decimal(u'\u215b', None), None) | |
125 | self.assertEqual(self.db.decimal(u'\u2468', None), None) | |
126 | ||
127 | self.assertRaises(TypeError, self.db.decimal) | |
128 | self.assertRaises(TypeError, self.db.decimal, u'xx') | |
129 | self.assertRaises(ValueError, self.db.decimal, u'x') | |
130 | ||
131 | def test_category(self): | |
132 | self.assertEqual(self.db.category(u'\uFFFE'), 'Cn') | |
133 | self.assertEqual(self.db.category(u'a'), 'Ll') | |
134 | self.assertEqual(self.db.category(u'A'), 'Lu') | |
135 | ||
136 | self.assertRaises(TypeError, self.db.category) | |
137 | self.assertRaises(TypeError, self.db.category, u'xx') | |
138 | ||
139 | def test_bidirectional(self): | |
140 | self.assertEqual(self.db.bidirectional(u'\uFFFE'), '') | |
141 | self.assertEqual(self.db.bidirectional(u' '), 'WS') | |
142 | self.assertEqual(self.db.bidirectional(u'A'), 'L') | |
143 | ||
144 | self.assertRaises(TypeError, self.db.bidirectional) | |
145 | self.assertRaises(TypeError, self.db.bidirectional, u'xx') | |
146 | ||
147 | def test_decomposition(self): | |
148 | self.assertEqual(self.db.decomposition(u'\uFFFE'),'') | |
149 | self.assertEqual(self.db.decomposition(u'\u00bc'), '<fraction> 0031 2044 0034') | |
150 | ||
151 | self.assertRaises(TypeError, self.db.decomposition) | |
152 | self.assertRaises(TypeError, self.db.decomposition, u'xx') | |
153 | ||
154 | def test_mirrored(self): | |
155 | self.assertEqual(self.db.mirrored(u'\uFFFE'), 0) | |
156 | self.assertEqual(self.db.mirrored(u'a'), 0) | |
157 | self.assertEqual(self.db.mirrored(u'\u2201'), 1) | |
158 | ||
159 | self.assertRaises(TypeError, self.db.mirrored) | |
160 | self.assertRaises(TypeError, self.db.mirrored, u'xx') | |
161 | ||
162 | def test_combining(self): | |
163 | self.assertEqual(self.db.combining(u'\uFFFE'), 0) | |
164 | self.assertEqual(self.db.combining(u'a'), 0) | |
165 | self.assertEqual(self.db.combining(u'\u20e1'), 230) | |
166 | ||
167 | self.assertRaises(TypeError, self.db.combining) | |
168 | self.assertRaises(TypeError, self.db.combining, u'xx') | |
169 | ||
170 | def test_normalize(self): | |
171 | self.assertRaises(TypeError, self.db.normalize) | |
172 | self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx') | |
173 | self.assertEqual(self.db.normalize('NFKC', u''), u'') | |
174 | # The rest can be found in test_normalization.py | |
175 | # which requires an external file. | |
176 | ||
177 | def test_east_asian_width(self): | |
178 | eaw = self.db.east_asian_width | |
179 | self.assertRaises(TypeError, eaw, 'a') | |
180 | self.assertRaises(TypeError, eaw, u'') | |
181 | self.assertRaises(TypeError, eaw, u'ra') | |
182 | self.assertEqual(eaw(u'\x1e'), 'N') | |
183 | self.assertEqual(eaw(u'\x20'), 'Na') | |
184 | self.assertEqual(eaw(u'\uC894'), 'W') | |
185 | self.assertEqual(eaw(u'\uFF66'), 'H') | |
186 | self.assertEqual(eaw(u'\uFF1F'), 'F') | |
187 | self.assertEqual(eaw(u'\u2010'), 'A') | |
188 | ||
189 | class UnicodeMiscTest(UnicodeDatabaseTest): | |
190 | ||
191 | def test_decimal_numeric_consistent(self): | |
192 | # Test that decimal and numeric are consistent, | |
193 | # i.e. if a character has a decimal value, | |
194 | # its numeric value should be the same. | |
195 | count = 0 | |
196 | for i in xrange(0x10000): | |
197 | c = unichr(i) | |
198 | dec = self.db.decimal(c, -1) | |
199 | if dec != -1: | |
200 | self.assertEqual(dec, self.db.numeric(c)) | |
201 | count += 1 | |
202 | self.assert_(count >= 10) # should have tested at least the ASCII digits | |
203 | ||
204 | def test_digit_numeric_consistent(self): | |
205 | # Test that digit and numeric are consistent, | |
206 | # i.e. if a character has a digit value, | |
207 | # its numeric value should be the same. | |
208 | count = 0 | |
209 | for i in xrange(0x10000): | |
210 | c = unichr(i) | |
211 | dec = self.db.digit(c, -1) | |
212 | if dec != -1: | |
213 | self.assertEqual(dec, self.db.numeric(c)) | |
214 | count += 1 | |
215 | self.assert_(count >= 10) # should have tested at least the ASCII digits | |
216 | ||
217 | def test_main(): | |
218 | test.test_support.run_unittest( | |
219 | UnicodeMiscTest, | |
220 | UnicodeMethodsTest, | |
221 | UnicodeFunctionsTest | |
222 | ) | |
223 | ||
224 | if __name__ == "__main__": | |
225 | test_main() |