Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | """Efficiently compare files, boolean outcome only (equal / not equal). |
2 | ||
3 | Tricks (used in this order): | |
4 | - Use the statcache module to avoid statting files more than once | |
5 | - Files with identical type, size & mtime are assumed to be clones | |
6 | - Files with different type or size cannot be identical | |
7 | - We keep a cache of outcomes of earlier comparisons | |
8 | - We don't fork a process to run 'cmp' but read the files ourselves | |
9 | """ | |
10 | ||
11 | import os | |
12 | from stat import * | |
13 | import statcache | |
14 | ||
15 | ||
16 | # The cache. | |
17 | # | |
18 | cache = {} | |
19 | ||
20 | ||
21 | def cmp(f1, f2, shallow=1): | |
22 | """Compare two files, use the cache if possible. | |
23 | May raise os.error if a stat or open of either fails. | |
24 | Return 1 for identical files, 0 for different. | |
25 | Raise exceptions if either file could not be statted, read, etc.""" | |
26 | s1, s2 = sig(statcache.stat(f1)), sig(statcache.stat(f2)) | |
27 | if not S_ISREG(s1[0]) or not S_ISREG(s2[0]): | |
28 | # Either is a not a plain file -- always report as different | |
29 | return 0 | |
30 | if shallow and s1 == s2: | |
31 | # type, size & mtime match -- report same | |
32 | return 1 | |
33 | if s1[:2] != s2[:2]: # Types or sizes differ, don't bother | |
34 | # types or sizes differ -- report different | |
35 | return 0 | |
36 | # same type and size -- look in the cache | |
37 | key = f1 + ' ' + f2 | |
38 | if cache.has_key(key): | |
39 | cs1, cs2, outcome = cache[key] | |
40 | # cache hit | |
41 | if s1 == cs1 and s2 == cs2: | |
42 | # cached signatures match | |
43 | return outcome | |
44 | # stale cached signature(s) | |
45 | # really compare | |
46 | outcome = do_cmp(f1, f2) | |
47 | cache[key] = s1, s2, outcome | |
48 | return outcome | |
49 | ||
50 | def sig(st): | |
51 | """Return signature (i.e., type, size, mtime) from raw stat data.""" | |
52 | return S_IFMT(st[ST_MODE]), st[ST_SIZE], st[ST_MTIME] | |
53 | ||
54 | def do_cmp(f1, f2): | |
55 | """Compare two files, really.""" | |
56 | #print ' cmp', f1, f2 # XXX remove when debugged | |
57 | bufsize = 8*1024 # Could be tuned | |
58 | fp1 = open(f1, 'rb') | |
59 | fp2 = open(f2, 'rb') | |
60 | while 1: | |
61 | b1 = fp1.read(bufsize) | |
62 | b2 = fp2.read(bufsize) | |
63 | if b1 != b2: return 0 | |
64 | if not b1: return 1 |