Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | from test.test_support import verbose, TestFailed, TestSkipped, verify |
2 | import sys | |
3 | import os | |
4 | from unicodedata import normalize | |
5 | ||
6 | TESTDATAFILE = "NormalizationTest-3.2.0" + os.extsep + "txt" | |
7 | ||
8 | # This search allows using a build directory just inside the source | |
9 | # directory, and saving just one copy of the test data in the source | |
10 | # tree, rather than having a copy in each build directory. | |
11 | # There might be a better way to do this. | |
12 | ||
13 | for path in [os.path.curdir, os.path.pardir]: | |
14 | fn = os.path.join(path, TESTDATAFILE) | |
15 | skip_expected = not os.path.exists(fn) | |
16 | if not skip_expected: | |
17 | TESTDATAFILE = fn | |
18 | break | |
19 | ||
20 | class RangeError: | |
21 | pass | |
22 | ||
23 | def NFC(str): | |
24 | return normalize("NFC", str) | |
25 | ||
26 | def NFKC(str): | |
27 | return normalize("NFKC", str) | |
28 | ||
29 | def NFD(str): | |
30 | return normalize("NFD", str) | |
31 | ||
32 | def NFKD(str): | |
33 | return normalize("NFKD", str) | |
34 | ||
35 | def unistr(data): | |
36 | data = [int(x, 16) for x in data.split(" ")] | |
37 | for x in data: | |
38 | if x > sys.maxunicode: | |
39 | raise RangeError | |
40 | return u"".join([unichr(x) for x in data]) | |
41 | ||
42 | def test_main(): | |
43 | if skip_expected: | |
44 | raise TestSkipped(TESTDATAFILE + " not found, download from " + | |
45 | "http://www.unicode.org/Public/3.2-Update/" + TESTDATAFILE) | |
46 | ||
47 | part1_data = {} | |
48 | for line in open(TESTDATAFILE): | |
49 | if '#' in line: | |
50 | line = line.split('#')[0] | |
51 | line = line.strip() | |
52 | if not line: | |
53 | continue | |
54 | if line.startswith("@Part"): | |
55 | part = line | |
56 | continue | |
57 | try: | |
58 | c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] | |
59 | except RangeError: | |
60 | # Skip unsupported characters | |
61 | continue | |
62 | ||
63 | if verbose: | |
64 | print line | |
65 | ||
66 | # Perform tests | |
67 | verify(c2 == NFC(c1) == NFC(c2) == NFC(c3), line) | |
68 | verify(c4 == NFC(c4) == NFC(c5), line) | |
69 | verify(c3 == NFD(c1) == NFD(c2) == NFD(c3), line) | |
70 | verify(c5 == NFD(c4) == NFD(c5), line) | |
71 | verify(c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5), | |
72 | line) | |
73 | verify(c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5), | |
74 | line) | |
75 | ||
76 | # Record part 1 data | |
77 | if part == "@Part1": | |
78 | part1_data[c1] = 1 | |
79 | ||
80 | # Perform tests for all other data | |
81 | for c in range(sys.maxunicode+1): | |
82 | X = unichr(c) | |
83 | if X in part1_data: | |
84 | continue | |
85 | assert X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c | |
86 | ||
87 | # Check for bug 834676 | |
88 | normalize('NFC',u'\ud55c\uae00') | |
89 | ||
90 | if __name__ == "__main__": | |
91 | test_main() |