from test
.test_support
import verbose
, TestFailed
, TestSkipped
, verify
from unicodedata
import normalize
TESTDATAFILE
= "NormalizationTest-3.2.0" + os
.extsep
+ "txt"
# This search allows using a build directory just inside the source
# directory, and saving just one copy of the test data in the source
# tree, rather than having a copy in each build directory.
# There might be a better way to do this.
for path
in [os
.path
.curdir
, os
.path
.pardir
]:
fn
= os
.path
.join(path
, TESTDATAFILE
)
skip_expected
= not os
.path
.exists(fn
)
return normalize("NFC", str)
return normalize("NFKC", str)
return normalize("NFD", str)
return normalize("NFKD", str)
data
= [int(x
, 16) for x
in data
.split(" ")]
return u
"".join([unichr(x
) for x
in data
])
raise TestSkipped(TESTDATAFILE
+ " not found, download from " +
"http://www.unicode.org/Public/3.2-Update/" + TESTDATAFILE
)
for line
in open(TESTDATAFILE
):
line
= line
.split('#')[0]
if line
.startswith("@Part"):
c1
,c2
,c3
,c4
,c5
= [unistr(x
) for x
in line
.split(';')[:-1]]
# Skip unsupported characters
verify(c2
== NFC(c1
) == NFC(c2
) == NFC(c3
), line
)
verify(c4
== NFC(c4
) == NFC(c5
), line
)
verify(c3
== NFD(c1
) == NFD(c2
) == NFD(c3
), line
)
verify(c5
== NFD(c4
) == NFD(c5
), line
)
verify(c4
== NFKC(c1
) == NFKC(c2
) == NFKC(c3
) == NFKC(c4
) == NFKC(c5
),
verify(c5
== NFKD(c1
) == NFKD(c2
) == NFKD(c3
) == NFKD(c4
) == NFKD(c5
),
# Perform tests for all other data
for c
in range(sys
.maxunicode
+1):
assert X
== NFC(X
) == NFD(X
) == NFKC(X
) == NFKD(X
), c
normalize('NFC',u
'\ud55c\uae00')
if __name__
== "__main__":