Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | # Test some Unicode file name semantics |
2 | # We dont test many operations on files other than | |
3 | # that their names can be used with Unicode characters. | |
4 | import os, glob, time, shutil | |
5 | import unicodedata | |
6 | ||
7 | import unittest | |
8 | from test.test_support import run_suite, TestSkipped, TESTFN_UNICODE | |
9 | from test.test_support import TESTFN_ENCODING, TESTFN_UNICODE_UNENCODEABLE | |
10 | try: | |
11 | TESTFN_ENCODED = TESTFN_UNICODE.encode(TESTFN_ENCODING) | |
12 | except (UnicodeError, TypeError): | |
13 | # Either the file system encoding is None, or the file name | |
14 | # cannot be encoded in the file system encoding. | |
15 | raise TestSkipped("No Unicode filesystem semantics on this platform.") | |
16 | ||
17 | if TESTFN_ENCODED.decode(TESTFN_ENCODING) != TESTFN_UNICODE: | |
18 | # The file system encoding does not support Latin-1 | |
19 | # (which test_support assumes), so try the file system | |
20 | # encoding instead. | |
21 | import sys | |
22 | try: | |
23 | TESTFN_UNICODE = unicode("@test-\xe0\xf2", sys.getfilesystemencoding()) | |
24 | TESTFN_ENCODED = TESTFN_UNICODE.encode(TESTFN_ENCODING) | |
25 | if '?' in TESTFN_ENCODED: | |
26 | # MBCS will not report the error properly | |
27 | raise UnicodeError, "mbcs encoding problem" | |
28 | except (UnicodeError, TypeError): | |
29 | raise TestSkipped("Cannot find a suiteable filename.") | |
30 | ||
31 | if TESTFN_ENCODED.decode(TESTFN_ENCODING) != TESTFN_UNICODE: | |
32 | raise TestSkipped("Cannot find a suitable filename.") | |
33 | ||
34 | def remove_if_exists(filename): | |
35 | if os.path.exists(filename): | |
36 | os.unlink(filename) | |
37 | ||
38 | class TestUnicodeFiles(unittest.TestCase): | |
39 | # The 'do_' functions are the actual tests. They generally assume the | |
40 | # file already exists etc. | |
41 | ||
42 | # Do all the tests we can given only a single filename. The file should | |
43 | # exist. | |
44 | def _do_single(self, filename): | |
45 | self.failUnless(os.path.exists(filename)) | |
46 | self.failUnless(os.path.isfile(filename)) | |
47 | self.failUnless(os.access(filename, os.R_OK)) | |
48 | self.failUnless(os.path.exists(os.path.abspath(filename))) | |
49 | self.failUnless(os.path.isfile(os.path.abspath(filename))) | |
50 | self.failUnless(os.access(os.path.abspath(filename), os.R_OK)) | |
51 | os.chmod(filename, 0777) | |
52 | os.utime(filename, None) | |
53 | os.utime(filename, (time.time(), time.time())) | |
54 | # Copy/rename etc tests using the same filename | |
55 | self._do_copyish(filename, filename) | |
56 | # Filename should appear in glob output | |
57 | self.failUnless( | |
58 | os.path.abspath(filename)==os.path.abspath(glob.glob(filename)[0])) | |
59 | # basename should appear in listdir. | |
60 | path, base = os.path.split(os.path.abspath(filename)) | |
61 | if isinstance(base, str): | |
62 | base = base.decode(TESTFN_ENCODING) | |
63 | file_list = os.listdir(path) | |
64 | # listdir() with a unicode arg may or may not return Unicode | |
65 | # objects, depending on the platform. | |
66 | if file_list and isinstance(file_list[0], str): | |
67 | file_list = [f.decode(TESTFN_ENCODING) for f in file_list] | |
68 | ||
69 | # Normalize the unicode strings, as round-tripping the name via the OS | |
70 | # may return a different (but equivalent) value. | |
71 | base = unicodedata.normalize("NFD", base) | |
72 | file_list = [unicodedata.normalize("NFD", f) for f in file_list] | |
73 | ||
74 | self.failUnless(base in file_list) | |
75 | ||
76 | # Do as many "equivalancy' tests as we can - ie, check that although we | |
77 | # have different types for the filename, they refer to the same file. | |
78 | def _do_equivilent(self, filename1, filename2): | |
79 | # Note we only check "filename1 against filename2" - we don't bother | |
80 | # checking "filename2 against 1", as we assume we are called again with | |
81 | # the args reversed. | |
82 | self.failUnless(type(filename1)!=type(filename2), | |
83 | "No point checking equivalent filenames of the same type") | |
84 | # stat and lstat should return the same results. | |
85 | self.failUnlessEqual(os.stat(filename1), | |
86 | os.stat(filename2)) | |
87 | self.failUnlessEqual(os.lstat(filename1), | |
88 | os.lstat(filename2)) | |
89 | # Copy/rename etc tests using equivalent filename | |
90 | self._do_copyish(filename1, filename2) | |
91 | ||
92 | # Tests that copy, move, etc one file to another. | |
93 | def _do_copyish(self, filename1, filename2): | |
94 | # Should be able to rename the file using either name. | |
95 | self.failUnless(os.path.isfile(filename1)) # must exist. | |
96 | os.rename(filename1, filename2 + ".new") | |
97 | self.failUnless(os.path.isfile(filename1+".new")) | |
98 | os.rename(filename1 + ".new", filename2) | |
99 | self.failUnless(os.path.isfile(filename2)) | |
100 | ||
101 | # Try using shutil on the filenames. | |
102 | try: | |
103 | filename1==filename2 | |
104 | except UnicodeDecodeError: | |
105 | # these filenames can't be compared - shutil.copy tries to do | |
106 | # just that. This is really a bug in 'shutil' - if one of shutil's | |
107 | # 2 params are Unicode and the other isn't, it should coerce the | |
108 | # string to Unicode with the filesystem encoding before comparison. | |
109 | pass | |
110 | else: | |
111 | # filenames can be compared. | |
112 | shutil.copy(filename1, filename2 + ".new") | |
113 | os.unlink(filename1 + ".new") # remove using equiv name. | |
114 | # And a couple of moves, one using each name. | |
115 | shutil.move(filename1, filename2 + ".new") | |
116 | self.failUnless(not os.path.exists(filename2)) | |
117 | shutil.move(filename1 + ".new", filename2) | |
118 | self.failUnless(os.path.exists(filename1)) | |
119 | # Note - due to the implementation of shutil.move, | |
120 | # it tries a rename first. This only fails on Windows when on | |
121 | # different file systems - and this test can't ensure that. | |
122 | # So we test the shutil.copy2 function, which is the thing most | |
123 | # likely to fail. | |
124 | shutil.copy2(filename1, filename2 + ".new") | |
125 | os.unlink(filename1 + ".new") | |
126 | ||
127 | def _do_directory(self, make_name, chdir_name, encoded): | |
128 | cwd = os.getcwd() | |
129 | if os.path.isdir(make_name): | |
130 | os.rmdir(make_name) | |
131 | os.mkdir(make_name) | |
132 | try: | |
133 | os.chdir(chdir_name) | |
134 | try: | |
135 | if not encoded: | |
136 | cwd_result = os.getcwdu() | |
137 | name_result = make_name | |
138 | else: | |
139 | cwd_result = os.getcwd().decode(TESTFN_ENCODING) | |
140 | name_result = make_name.decode(TESTFN_ENCODING) | |
141 | ||
142 | cwd_result = unicodedata.normalize("NFD", cwd_result) | |
143 | name_result = unicodedata.normalize("NFD", name_result) | |
144 | ||
145 | self.failUnlessEqual(os.path.basename(cwd_result),name_result) | |
146 | finally: | |
147 | os.chdir(cwd) | |
148 | finally: | |
149 | os.rmdir(make_name) | |
150 | ||
151 | # The '_test' functions 'entry points with params' - ie, what the | |
152 | # top-level 'test' functions would be if they could take params | |
153 | def _test_single(self, filename): | |
154 | remove_if_exists(filename) | |
155 | f = file(filename, "w") | |
156 | f.close() | |
157 | try: | |
158 | self._do_single(filename) | |
159 | finally: | |
160 | os.unlink(filename) | |
161 | self.failUnless(not os.path.exists(filename)) | |
162 | # and again with os.open. | |
163 | f = os.open(filename, os.O_CREAT) | |
164 | os.close(f) | |
165 | try: | |
166 | self._do_single(filename) | |
167 | finally: | |
168 | os.unlink(filename) | |
169 | ||
170 | def _test_equivalent(self, filename1, filename2): | |
171 | remove_if_exists(filename1) | |
172 | self.failUnless(not os.path.exists(filename2)) | |
173 | f = file(filename1, "w") | |
174 | f.close() | |
175 | try: | |
176 | self._do_equivilent(filename1, filename2) | |
177 | finally: | |
178 | os.unlink(filename1) | |
179 | ||
180 | # The 'test' functions are unittest entry points, and simply call our | |
181 | # _test functions with each of the filename combinations we wish to test | |
182 | def test_single_files(self): | |
183 | self._test_single(TESTFN_ENCODED) | |
184 | self._test_single(TESTFN_UNICODE) | |
185 | if TESTFN_UNICODE_UNENCODEABLE is not None: | |
186 | self._test_single(TESTFN_UNICODE_UNENCODEABLE) | |
187 | ||
188 | def test_equivalent_files(self): | |
189 | self._test_equivalent(TESTFN_ENCODED, TESTFN_UNICODE) | |
190 | self._test_equivalent(TESTFN_UNICODE, TESTFN_ENCODED) | |
191 | ||
192 | def test_directories(self): | |
193 | # For all 'equivilent' combinations: | |
194 | # Make dir with encoded, chdir with unicode, checkdir with encoded | |
195 | # (or unicode/encoded/unicode, etc | |
196 | ext = ".dir" | |
197 | self._do_directory(TESTFN_ENCODED+ext, TESTFN_ENCODED+ext, True) | |
198 | self._do_directory(TESTFN_ENCODED+ext, TESTFN_UNICODE+ext, True) | |
199 | self._do_directory(TESTFN_UNICODE+ext, TESTFN_ENCODED+ext, False) | |
200 | self._do_directory(TESTFN_UNICODE+ext, TESTFN_UNICODE+ext, False) | |
201 | # Our directory name that can't use a non-unicode name. | |
202 | if TESTFN_UNICODE_UNENCODEABLE is not None: | |
203 | self._do_directory(TESTFN_UNICODE_UNENCODEABLE+ext, | |
204 | TESTFN_UNICODE_UNENCODEABLE+ext, | |
205 | False) | |
206 | ||
207 | def test_main(): | |
208 | suite = unittest.TestSuite() | |
209 | suite.addTest(unittest.makeSuite(TestUnicodeFiles)) | |
210 | run_suite(suite) | |
211 | ||
212 | if __name__ == "__main__": | |
213 | test_main() |