"""Utilities for comparing files and directories.
cmp(f1, f2, shallow=1) -> int
cmpfiles(a, b, common) -> ([], [], [])
from itertools
import ifilter
, ifilterfalse
, imap
, izip
__all__
= ["cmp","dircmp","cmpfiles"]
def cmp(f1
, f2
, shallow
=1, use_statcache
=None):
shallow -- Just check stat signature (do not read the files).
use_statcache -- obsolete argument.
True if the files are the same, False otherwise.
This function uses a cache for past comparisons and the results,
with a cache invalidation mechanism relying on stale signatures.
if use_statcache
is not None:
warnings
.warn("use_statcache argument is deprecated",
if s1
[0] != stat
.S_IFREG
or s2
[0] != stat
.S_IFREG
:
result
= _cache
.get((f1
, f2
))
if result
and (s1
, s2
) == result
[:2]:
outcome
= _do_cmp(f1
, f2
)
_cache
[f1
, f2
] = s1
, s2
, outcome
return (stat
.S_IFMT(st
.st_mode
),
# Directory comparison class.
"""A class that manages the comparison of 2 directories.
dircmp(a,b,ignore=None,hide=None)
IGNORE is a list of names to ignore,
defaults to ['RCS', 'CVS', 'tags'].
HIDE is a list of names to hide,
defaults to [os.curdir, os.pardir].
x.report() -> prints a report on the differences between dir1 and dir2
x.report_partial_closure() -> prints report on differences between dir1
and dir2, and reports on common immediate subdirectories.
x.report_full_closure() -> like report_partial_closure,
left_list, right_list: The files in dir1 and dir2,
filtered by hide and ignore.
common: a list of names in both dir1 and dir2.
left_only, right_only: names only in dir1, dir2.
common_dirs: subdirectories in both dir1 and dir2.
common_files: files in both dir1 and dir2.
common_funny: names in both dir1 and dir2 where the type differs between
dir1 and dir2, or the name is not stat-able.
same_files: list of identical files.
diff_files: list of filenames which differ.
funny_files: list of files which could not be compared.
subdirs: a dictionary of dircmp objects, keyed by names in common_dirs.
def __init__(self
, a
, b
, ignore
=None, hide
=None): # Initialize
self
.hide
= [os
.curdir
, os
.pardir
] # Names never to be shown
self
.ignore
= ['RCS', 'CVS', 'tags'] # Names ignored in comparison
def phase0(self
): # Compare everything except common subdirectories
self
.left_list
= _filter(os
.listdir(self
.left
),
self
.right_list
= _filter(os
.listdir(self
.right
),
def phase1(self
): # Compute common names
a
= dict(izip(imap(os
.path
.normcase
, self
.left_list
), self
.left_list
))
b
= dict(izip(imap(os
.path
.normcase
, self
.right_list
), self
.right_list
))
self
.common
= map(a
.__getitem
__, ifilter(b
.has_key
, a
))
self
.left_only
= map(a
.__getitem
__, ifilterfalse(b
.has_key
, a
))
self
.right_only
= map(b
.__getitem
__, ifilterfalse(a
.has_key
, b
))
def phase2(self
): # Distinguish files, directories, funnies
a_path
= os
.path
.join(self
.left
, x
)
b_path
= os
.path
.join(self
.right
, x
)
# print 'Can\'t stat', a_path, ':', why[1]
# print 'Can\'t stat', b_path, ':', why[1]
a_type
= stat
.S_IFMT(a_stat
.st_mode
)
b_type
= stat
.S_IFMT(b_stat
.st_mode
)
self
.common_funny
.append(x
)
elif stat
.S_ISDIR(a_type
):
self
.common_dirs
.append(x
)
elif stat
.S_ISREG(a_type
):
self
.common_files
.append(x
)
self
.common_funny
.append(x
)
self
.common_funny
.append(x
)
def phase3(self
): # Find out differences between common files
xx
= cmpfiles(self
.left
, self
.right
, self
.common_files
)
self
.same_files
, self
.diff_files
, self
.funny_files
= xx
def phase4(self
): # Find out differences between common subdirectories
# A new dircmp object is created for each common subdirectory,
# these are stored in a dictionary indexed by filename.
# The hide and ignore properties are inherited from the parent
for x
in self
.common_dirs
:
a_x
= os
.path
.join(self
.left
, x
)
b_x
= os
.path
.join(self
.right
, x
)
self
.subdirs
[x
] = dircmp(a_x
, b_x
, self
.ignore
, self
.hide
)
def phase4_closure(self
): # Recursively call phase4() on subdirectories
for sd
in self
.subdirs
.itervalues():
def report(self
): # Print a report on the differences between a and b
# Output format is purposely lousy
print 'diff', self
.left
, self
.right
print 'Only in', self
.left
, ':', self
.left_only
print 'Only in', self
.right
, ':', self
.right_only
print 'Identical files :', self
.same_files
print 'Differing files :', self
.diff_files
print 'Trouble with common files :', self
.funny_files
print 'Common subdirectories :', self
.common_dirs
print 'Common funny cases :', self
.common_funny
def report_partial_closure(self
): # Print reports on self and on subdirs
for sd
in self
.subdirs
.itervalues():
def report_full_closure(self
): # Report on self and subdirs recursively
for sd
in self
.subdirs
.itervalues():
methodmap
= dict(subdirs
=phase4
,
same_files
=phase3
, diff_files
=phase3
, funny_files
=phase3
,
common_dirs
= phase2
, common_files
=phase2
, common_funny
=phase2
,
common
=phase1
, left_only
=phase1
, right_only
=phase1
,
left_list
=phase0
, right_list
=phase0
)
def __getattr__(self
, attr
):
if attr
not in self
.methodmap
:
raise AttributeError, attr
self
.methodmap
[attr
](self
)
return getattr(self
, attr
)
def cmpfiles(a
, b
, common
, shallow
=1, use_statcache
=None):
"""Compare common files in two directories.
common -- list of file names found in both directories
shallow -- if true, do comparison based solely on stat() information
use_statcache -- obsolete argument
Returns a tuple of three lists:
filenames that aren't regular files.
if use_statcache
is not None:
warnings
.warn("use_statcache argument is deprecated",
res
[_cmp(ax
, bx
, shallow
)].append(x
)
# 2 for funny cases (can't stat, etc.)
def _cmp(a
, b
, sh
, abs=abs, cmp=cmp):
return not abs(cmp(a
, b
, sh
))
# Return a copy with items that occur in skip removed.
def _filter(flist
, skip
):
return list(ifilterfalse(skip
.__contains
__, flist
))
# Demonstration and testing.
options
, args
= getopt
.getopt(sys
.argv
[1:], 'r')
raise getopt
.GetoptError('need exactly two args', None)
dd
= dircmp(args
[0], args
[1])
if ('-r', '') in options
:
if __name__
== '__main__':