Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | """Utilities for comparing files and directories. |
2 | ||
3 | Classes: | |
4 | dircmp | |
5 | ||
6 | Functions: | |
7 | cmp(f1, f2, shallow=1) -> int | |
8 | cmpfiles(a, b, common) -> ([], [], []) | |
9 | ||
10 | """ | |
11 | ||
12 | import os | |
13 | import stat | |
14 | import warnings | |
15 | from itertools import ifilter, ifilterfalse, imap, izip | |
16 | ||
17 | __all__ = ["cmp","dircmp","cmpfiles"] | |
18 | ||
19 | _cache = {} | |
20 | BUFSIZE=8*1024 | |
21 | ||
22 | def cmp(f1, f2, shallow=1, use_statcache=None): | |
23 | """Compare two files. | |
24 | ||
25 | Arguments: | |
26 | ||
27 | f1 -- First file name | |
28 | ||
29 | f2 -- Second file name | |
30 | ||
31 | shallow -- Just check stat signature (do not read the files). | |
32 | defaults to 1. | |
33 | ||
34 | use_statcache -- obsolete argument. | |
35 | ||
36 | Return value: | |
37 | ||
38 | True if the files are the same, False otherwise. | |
39 | ||
40 | This function uses a cache for past comparisons and the results, | |
41 | with a cache invalidation mechanism relying on stale signatures. | |
42 | ||
43 | """ | |
44 | if use_statcache is not None: | |
45 | warnings.warn("use_statcache argument is deprecated", | |
46 | DeprecationWarning) | |
47 | ||
48 | s1 = _sig(os.stat(f1)) | |
49 | s2 = _sig(os.stat(f2)) | |
50 | if s1[0] != stat.S_IFREG or s2[0] != stat.S_IFREG: | |
51 | return False | |
52 | if shallow and s1 == s2: | |
53 | return True | |
54 | if s1[1] != s2[1]: | |
55 | return False | |
56 | ||
57 | result = _cache.get((f1, f2)) | |
58 | if result and (s1, s2) == result[:2]: | |
59 | return result[2] | |
60 | outcome = _do_cmp(f1, f2) | |
61 | _cache[f1, f2] = s1, s2, outcome | |
62 | return outcome | |
63 | ||
64 | def _sig(st): | |
65 | return (stat.S_IFMT(st.st_mode), | |
66 | st.st_size, | |
67 | st.st_mtime) | |
68 | ||
69 | def _do_cmp(f1, f2): | |
70 | bufsize = BUFSIZE | |
71 | fp1 = open(f1, 'rb') | |
72 | fp2 = open(f2, 'rb') | |
73 | while True: | |
74 | b1 = fp1.read(bufsize) | |
75 | b2 = fp2.read(bufsize) | |
76 | if b1 != b2: | |
77 | return False | |
78 | if not b1: | |
79 | return True | |
80 | ||
81 | # Directory comparison class. | |
82 | # | |
83 | class dircmp: | |
84 | """A class that manages the comparison of 2 directories. | |
85 | ||
86 | dircmp(a,b,ignore=None,hide=None) | |
87 | A and B are directories. | |
88 | IGNORE is a list of names to ignore, | |
89 | defaults to ['RCS', 'CVS', 'tags']. | |
90 | HIDE is a list of names to hide, | |
91 | defaults to [os.curdir, os.pardir]. | |
92 | ||
93 | High level usage: | |
94 | x = dircmp(dir1, dir2) | |
95 | x.report() -> prints a report on the differences between dir1 and dir2 | |
96 | or | |
97 | x.report_partial_closure() -> prints report on differences between dir1 | |
98 | and dir2, and reports on common immediate subdirectories. | |
99 | x.report_full_closure() -> like report_partial_closure, | |
100 | but fully recursive. | |
101 | ||
102 | Attributes: | |
103 | left_list, right_list: The files in dir1 and dir2, | |
104 | filtered by hide and ignore. | |
105 | common: a list of names in both dir1 and dir2. | |
106 | left_only, right_only: names only in dir1, dir2. | |
107 | common_dirs: subdirectories in both dir1 and dir2. | |
108 | common_files: files in both dir1 and dir2. | |
109 | common_funny: names in both dir1 and dir2 where the type differs between | |
110 | dir1 and dir2, or the name is not stat-able. | |
111 | same_files: list of identical files. | |
112 | diff_files: list of filenames which differ. | |
113 | funny_files: list of files which could not be compared. | |
114 | subdirs: a dictionary of dircmp objects, keyed by names in common_dirs. | |
115 | """ | |
116 | ||
117 | def __init__(self, a, b, ignore=None, hide=None): # Initialize | |
118 | self.left = a | |
119 | self.right = b | |
120 | if hide is None: | |
121 | self.hide = [os.curdir, os.pardir] # Names never to be shown | |
122 | else: | |
123 | self.hide = hide | |
124 | if ignore is None: | |
125 | self.ignore = ['RCS', 'CVS', 'tags'] # Names ignored in comparison | |
126 | else: | |
127 | self.ignore = ignore | |
128 | ||
129 | def phase0(self): # Compare everything except common subdirectories | |
130 | self.left_list = _filter(os.listdir(self.left), | |
131 | self.hide+self.ignore) | |
132 | self.right_list = _filter(os.listdir(self.right), | |
133 | self.hide+self.ignore) | |
134 | self.left_list.sort() | |
135 | self.right_list.sort() | |
136 | ||
137 | def phase1(self): # Compute common names | |
138 | a = dict(izip(imap(os.path.normcase, self.left_list), self.left_list)) | |
139 | b = dict(izip(imap(os.path.normcase, self.right_list), self.right_list)) | |
140 | self.common = map(a.__getitem__, ifilter(b.has_key, a)) | |
141 | self.left_only = map(a.__getitem__, ifilterfalse(b.has_key, a)) | |
142 | self.right_only = map(b.__getitem__, ifilterfalse(a.has_key, b)) | |
143 | ||
144 | def phase2(self): # Distinguish files, directories, funnies | |
145 | self.common_dirs = [] | |
146 | self.common_files = [] | |
147 | self.common_funny = [] | |
148 | ||
149 | for x in self.common: | |
150 | a_path = os.path.join(self.left, x) | |
151 | b_path = os.path.join(self.right, x) | |
152 | ||
153 | ok = 1 | |
154 | try: | |
155 | a_stat = os.stat(a_path) | |
156 | except os.error, why: | |
157 | # print 'Can\'t stat', a_path, ':', why[1] | |
158 | ok = 0 | |
159 | try: | |
160 | b_stat = os.stat(b_path) | |
161 | except os.error, why: | |
162 | # print 'Can\'t stat', b_path, ':', why[1] | |
163 | ok = 0 | |
164 | ||
165 | if ok: | |
166 | a_type = stat.S_IFMT(a_stat.st_mode) | |
167 | b_type = stat.S_IFMT(b_stat.st_mode) | |
168 | if a_type != b_type: | |
169 | self.common_funny.append(x) | |
170 | elif stat.S_ISDIR(a_type): | |
171 | self.common_dirs.append(x) | |
172 | elif stat.S_ISREG(a_type): | |
173 | self.common_files.append(x) | |
174 | else: | |
175 | self.common_funny.append(x) | |
176 | else: | |
177 | self.common_funny.append(x) | |
178 | ||
179 | def phase3(self): # Find out differences between common files | |
180 | xx = cmpfiles(self.left, self.right, self.common_files) | |
181 | self.same_files, self.diff_files, self.funny_files = xx | |
182 | ||
183 | def phase4(self): # Find out differences between common subdirectories | |
184 | # A new dircmp object is created for each common subdirectory, | |
185 | # these are stored in a dictionary indexed by filename. | |
186 | # The hide and ignore properties are inherited from the parent | |
187 | self.subdirs = {} | |
188 | for x in self.common_dirs: | |
189 | a_x = os.path.join(self.left, x) | |
190 | b_x = os.path.join(self.right, x) | |
191 | self.subdirs[x] = dircmp(a_x, b_x, self.ignore, self.hide) | |
192 | ||
193 | def phase4_closure(self): # Recursively call phase4() on subdirectories | |
194 | self.phase4() | |
195 | for sd in self.subdirs.itervalues(): | |
196 | sd.phase4_closure() | |
197 | ||
198 | def report(self): # Print a report on the differences between a and b | |
199 | # Output format is purposely lousy | |
200 | print 'diff', self.left, self.right | |
201 | if self.left_only: | |
202 | self.left_only.sort() | |
203 | print 'Only in', self.left, ':', self.left_only | |
204 | if self.right_only: | |
205 | self.right_only.sort() | |
206 | print 'Only in', self.right, ':', self.right_only | |
207 | if self.same_files: | |
208 | self.same_files.sort() | |
209 | print 'Identical files :', self.same_files | |
210 | if self.diff_files: | |
211 | self.diff_files.sort() | |
212 | print 'Differing files :', self.diff_files | |
213 | if self.funny_files: | |
214 | self.funny_files.sort() | |
215 | print 'Trouble with common files :', self.funny_files | |
216 | if self.common_dirs: | |
217 | self.common_dirs.sort() | |
218 | print 'Common subdirectories :', self.common_dirs | |
219 | if self.common_funny: | |
220 | self.common_funny.sort() | |
221 | print 'Common funny cases :', self.common_funny | |
222 | ||
223 | def report_partial_closure(self): # Print reports on self and on subdirs | |
224 | self.report() | |
225 | for sd in self.subdirs.itervalues(): | |
226 | ||
227 | sd.report() | |
228 | ||
229 | def report_full_closure(self): # Report on self and subdirs recursively | |
230 | self.report() | |
231 | for sd in self.subdirs.itervalues(): | |
232 | ||
233 | sd.report_full_closure() | |
234 | ||
235 | methodmap = dict(subdirs=phase4, | |
236 | same_files=phase3, diff_files=phase3, funny_files=phase3, | |
237 | common_dirs = phase2, common_files=phase2, common_funny=phase2, | |
238 | common=phase1, left_only=phase1, right_only=phase1, | |
239 | left_list=phase0, right_list=phase0) | |
240 | ||
241 | def __getattr__(self, attr): | |
242 | if attr not in self.methodmap: | |
243 | raise AttributeError, attr | |
244 | self.methodmap[attr](self) | |
245 | return getattr(self, attr) | |
246 | ||
247 | def cmpfiles(a, b, common, shallow=1, use_statcache=None): | |
248 | """Compare common files in two directories. | |
249 | ||
250 | a, b -- directory names | |
251 | common -- list of file names found in both directories | |
252 | shallow -- if true, do comparison based solely on stat() information | |
253 | use_statcache -- obsolete argument | |
254 | ||
255 | Returns a tuple of three lists: | |
256 | files that compare equal | |
257 | files that are different | |
258 | filenames that aren't regular files. | |
259 | ||
260 | """ | |
261 | if use_statcache is not None: | |
262 | warnings.warn("use_statcache argument is deprecated", | |
263 | DeprecationWarning) | |
264 | res = ([], [], []) | |
265 | for x in common: | |
266 | ax = os.path.join(a, x) | |
267 | bx = os.path.join(b, x) | |
268 | res[_cmp(ax, bx, shallow)].append(x) | |
269 | return res | |
270 | ||
271 | ||
272 | # Compare two files. | |
273 | # Return: | |
274 | # 0 for equal | |
275 | # 1 for different | |
276 | # 2 for funny cases (can't stat, etc.) | |
277 | # | |
278 | def _cmp(a, b, sh, abs=abs, cmp=cmp): | |
279 | try: | |
280 | return not abs(cmp(a, b, sh)) | |
281 | except os.error: | |
282 | return 2 | |
283 | ||
284 | ||
285 | # Return a copy with items that occur in skip removed. | |
286 | # | |
287 | def _filter(flist, skip): | |
288 | return list(ifilterfalse(skip.__contains__, flist)) | |
289 | ||
290 | ||
291 | # Demonstration and testing. | |
292 | # | |
293 | def demo(): | |
294 | import sys | |
295 | import getopt | |
296 | options, args = getopt.getopt(sys.argv[1:], 'r') | |
297 | if len(args) != 2: | |
298 | raise getopt.GetoptError('need exactly two args', None) | |
299 | dd = dircmp(args[0], args[1]) | |
300 | if ('-r', '') in options: | |
301 | dd.report_full_closure() | |
302 | else: | |
303 | dd.report() | |
304 | ||
305 | if __name__ == '__main__': | |
306 | demo() |