[OpenSPARC-T2-DV] / tools / src / nas,5.n2.os.2 / lib / python / lib / python2.4 / tabnanny.py

#! /usr/bin/env python

"""The Tab Nanny despises ambiguous indentation.  She knows no mercy.

tabnanny -- Detection of ambiguous indentation

For the time being this module is intended to be called as a script.
However it is possible to import it into an IDE and use the function
check() described below.

Warning: The API provided by this module is likely to change in future
releases; such changes may not be backward compatible.
"""

# Released to the public domain, by Tim Peters, 15 April 1998.

# XXX Note: this is now a standard library module.
# XXX The API needs to undergo changes however; the current code is too
# XXX script-like.  This will be addressed later.

__version__ = "6"

import os
import sys
import getopt
import tokenize
if not hasattr(tokenize, 'NL'):
    raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")

__all__ = ["check", "NannyNag", "process_tokens"]

verbose = 0
filename_only = 0

def errprint(*args):
    sep = ""
    for arg in args:
        sys.stderr.write(sep + str(arg))
        sep = " "
    sys.stderr.write("\n")

def main():
    global verbose, filename_only
    try:
        opts, args = getopt.getopt(sys.argv[1:], "qv")
    except getopt.error, msg:
        errprint(msg)
        return
    for o, a in opts:
        if o == '-q':
            filename_only = filename_only + 1
        if o == '-v':
            verbose = verbose + 1
    if not args:
        errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
        return
    for arg in args:
        check(arg)

class NannyNag(Exception):
    """
    Raised by tokeneater() if detecting an ambiguous indent.
    Captured and handled in check().
    """
    def __init__(self, lineno, msg, line):
        self.lineno, self.msg, self.line = lineno, msg, line
    def get_lineno(self):
        return self.lineno
    def get_msg(self):
        return self.msg
    def get_line(self):
        return self.line

def check(file):
    """check(file_or_dir)

    If file_or_dir is a directory and not a symbolic link, then recursively
    descend the directory tree named by file_or_dir, checking all .py files
    along the way. If file_or_dir is an ordinary Python source file, it is
    checked for whitespace related problems. The diagnostic messages are
    written to standard output using the print statement.
    """

    if os.path.isdir(file) and not os.path.islink(file):
        if verbose:
            print "%r: listing directory" % (file,)
        names = os.listdir(file)
        for name in names:
            fullname = os.path.join(file, name)
            if (os.path.isdir(fullname) and
                not os.path.islink(fullname) or
                os.path.normcase(name[-3:]) == ".py"):
                check(fullname)
        return

    try:
        f = open(file)
    except IOError, msg:
        errprint("%r: I/O Error: %s" % (file, msg))
        return

    if verbose > 1:
        print "checking %r ..." % file

    try:
        process_tokens(tokenize.generate_tokens(f.readline))

    except tokenize.TokenError, msg:
        errprint("%r: Token Error: %s" % (file, msg))
        return

    except NannyNag, nag:
        badline = nag.get_lineno()
        line = nag.get_line()
        if verbose:
            print "%r: *** Line %d: trouble in tab city! ***" % (file, badline)
            print "offending line: %r" % (line,)
            print nag.get_msg()
        else:
            if ' ' in file: file = '"' + file + '"'
            if filename_only: print file
            else: print file, badline, repr(line)
        return

    if verbose:
        print "%r: Clean bill of health." % (file,)

class Whitespace:
    # the characters used for space and tab
    S, T = ' \t'

    # members:
    #   raw
    #       the original string
    #   n
    #       the number of leading whitespace characters in raw
    #   nt
    #       the number of tabs in raw[:n]
    #   norm
    #       the normal form as a pair (count, trailing), where:
    #       count
    #           a tuple such that raw[:n] contains count[i]
    #           instances of S * i + T
    #       trailing
    #           the number of trailing spaces in raw[:n]
    #       It's A Theorem that m.indent_level(t) ==
    #       n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
    #   is_simple
    #       true iff raw[:n] is of the form (T*)(S*)

    def __init__(self, ws):
        self.raw  = ws
        S, T = Whitespace.S, Whitespace.T
        count = []
        b = n = nt = 0
        for ch in self.raw:
            if ch == S:
                n = n + 1
                b = b + 1
            elif ch == T:
                n = n + 1
                nt = nt + 1
                if b >= len(count):
                    count = count + [0] * (b - len(count) + 1)
                count[b] = count[b] + 1
                b = 0
            else:
                break
        self.n    = n
        self.nt   = nt
        self.norm = tuple(count), b
        self.is_simple = len(count) <= 1

    # return length of longest contiguous run of spaces (whether or not
    # preceding a tab)
    def longest_run_of_spaces(self):
        count, trailing = self.norm
        return max(len(count)-1, trailing)

    def indent_level(self, tabsize):
        # count, il = self.norm
        # for i in range(len(count)):
        #    if count[i]:
        #        il = il + (i/tabsize + 1)*tabsize * count[i]
        # return il

        # quicker:
        # il = trailing + sum (i/ts + 1)*ts*count[i] =
        # trailing + ts * sum (i/ts + 1)*count[i] =
        # trailing + ts * sum i/ts*count[i] + count[i] =
        # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
        # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
        # and note that i/ts*count[i] is 0 when i < ts

        count, trailing = self.norm
        il = 0
        for i in range(tabsize, len(count)):
            il = il + i/tabsize * count[i]
        return trailing + tabsize * (il + self.nt)

    # return true iff self.indent_level(t) == other.indent_level(t)
    # for all t >= 1
    def equal(self, other):
        return self.norm == other.norm

    # return a list of tuples (ts, i1, i2) such that
    # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
    # Intended to be used after not self.equal(other) is known, in which
    # case it will return at least one witnessing tab size.
    def not_equal_witness(self, other):
        n = max(self.longest_run_of_spaces(),
                other.longest_run_of_spaces()) + 1
        a = []
        for ts in range(1, n+1):
            if self.indent_level(ts) != other.indent_level(ts):
                a.append( (ts,
                           self.indent_level(ts),
                           other.indent_level(ts)) )
        return a

    # Return True iff self.indent_level(t) < other.indent_level(t)
    # for all t >= 1.
    # The algorithm is due to Vincent Broman.
    # Easy to prove it's correct.
    # XXXpost that.
    # Trivial to prove n is sharp (consider T vs ST).
    # Unknown whether there's a faster general way.  I suspected so at
    # first, but no longer.
    # For the special (but common!) case where M and N are both of the
    # form (T*)(S*), M.less(N) iff M.len() < N.len() and
    # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
    # XXXwrite that up.
    # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
    def less(self, other):
        if self.n >= other.n:
            return False
        if self.is_simple and other.is_simple:
            return self.nt <= other.nt
        n = max(self.longest_run_of_spaces(),
                other.longest_run_of_spaces()) + 1
        # the self.n >= other.n test already did it for ts=1
        for ts in range(2, n+1):
            if self.indent_level(ts) >= other.indent_level(ts):
                return False
        return True

    # return a list of tuples (ts, i1, i2) such that
    # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
    # Intended to be used after not self.less(other) is known, in which
    # case it will return at least one witnessing tab size.
    def not_less_witness(self, other):
        n = max(self.longest_run_of_spaces(),
                other.longest_run_of_spaces()) + 1
        a = []
        for ts in range(1, n+1):
            if self.indent_level(ts) >= other.indent_level(ts):
                a.append( (ts,
                           self.indent_level(ts),
                           other.indent_level(ts)) )
        return a

def format_witnesses(w):
    firsts = map(lambda tup: str(tup[0]), w)
    prefix = "at tab size"
    if len(w) > 1:
        prefix = prefix + "s"
    return prefix + " " + ', '.join(firsts)

def process_tokens(tokens):
    INDENT = tokenize.INDENT
    DEDENT = tokenize.DEDENT
    NEWLINE = tokenize.NEWLINE
    JUNK = tokenize.COMMENT, tokenize.NL
    indents = [Whitespace("")]
    check_equal = 0

    for (type, token, start, end, line) in tokens:
        if type == NEWLINE:
            # a program statement, or ENDMARKER, will eventually follow,
            # after some (possibly empty) run of tokens of the form
            #     (NL | COMMENT)* (INDENT | DEDENT+)?
            # If an INDENT appears, setting check_equal is wrong, and will
            # be undone when we see the INDENT.
            check_equal = 1

        elif type == INDENT:
            check_equal = 0
            thisguy = Whitespace(token)
            if not indents[-1].less(thisguy):
                witness = indents[-1].not_less_witness(thisguy)
                msg = "indent not greater e.g. " + format_witnesses(witness)
                raise NannyNag(start[0], msg, line)
            indents.append(thisguy)

        elif type == DEDENT:
            # there's nothing we need to check here!  what's important is
            # that when the run of DEDENTs ends, the indentation of the
            # program statement (or ENDMARKER) that triggered the run is
            # equal to what's left at the top of the indents stack

            # Ouch!  This assert triggers if the last line of the source
            # is indented *and* lacks a newline -- then DEDENTs pop out
            # of thin air.
            # assert check_equal  # else no earlier NEWLINE, or an earlier INDENT
            check_equal = 1

            del indents[-1]

        elif check_equal and type not in JUNK:
            # this is the first "real token" following a NEWLINE, so it
            # must be the first token of the next program statement, or an
            # ENDMARKER; the "line" argument exposes the leading whitespace
            # for this statement; in the case of ENDMARKER, line is an empty
            # string, so will properly match the empty string with which the
            # "indents" stack was seeded
            check_equal = 0
            thisguy = Whitespace(line)
            if not indents[-1].equal(thisguy):
                witness = indents[-1].not_equal_witness(thisguy)
                msg = "indent not equal e.g. " + format_witnesses(witness)
                raise NannyNag(start[0], msg, line)


if __name__ == '__main__':
    main()
Commit	Line	Data
86530b38 AT	1	#! /usr/bin/env python
	2
	3	"""The Tab Nanny despises ambiguous indentation. She knows no mercy.
	4
	5	tabnanny -- Detection of ambiguous indentation
	6
	7	For the time being this module is intended to be called as a script.
	8	However it is possible to import it into an IDE and use the function
	9	check() described below.
	10
	11	Warning: The API provided by this module is likely to change in future
	12	releases; such changes may not be backward compatible.
	13	"""
	14
	15	# Released to the public domain, by Tim Peters, 15 April 1998.
	16
	17	# XXX Note: this is now a standard library module.
	18	# XXX The API needs to undergo changes however; the current code is too
	19	# XXX script-like. This will be addressed later.
	20
	21	__version__ = "6"
	22
	23	import os
	24	import sys
	25	import getopt
	26	import tokenize
	27	if not hasattr(tokenize, 'NL'):
	28	raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
	29
	30	__all__ = ["check", "NannyNag", "process_tokens"]
	31
	32	verbose = 0
	33	filename_only = 0
	34
	35	def errprint(*args):
	36	sep = ""
	37	for arg in args:
	38	sys.stderr.write(sep + str(arg))
	39	sep = " "
	40	sys.stderr.write("\n")
	41
	42	def main():
	43	global verbose, filename_only
	44	try:
	45	opts, args = getopt.getopt(sys.argv[1:], "qv")
	46	except getopt.error, msg:
	47	errprint(msg)
	48	return
	49	for o, a in opts:
	50	if o == '-q':
	51	filename_only = filename_only + 1
	52	if o == '-v':
	53	verbose = verbose + 1
	54	if not args:
	55	errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
	56	return
	57	for arg in args:
	58	check(arg)
	59
	60	class NannyNag(Exception):
	61	"""
	62	Raised by tokeneater() if detecting an ambiguous indent.
	63	Captured and handled in check().
	64	"""
65	def __init__(self, lineno, msg, line):
66	self.lineno, self.msg, self.line = lineno, msg, line
67	def get_lineno(self):
68	return self.lineno
69	def get_msg(self):
70	return self.msg
71	def get_line(self):
72	return self.line
73
74	def check(file):
75	"""check(file_or_dir)
76
77	If file_or_dir is a directory and not a symbolic link, then recursively
78	descend the directory tree named by file_or_dir, checking all .py files
79	along the way. If file_or_dir is an ordinary Python source file, it is
80	checked for whitespace related problems. The diagnostic messages are
81	written to standard output using the print statement.
82	"""
83
84	if os.path.isdir(file) and not os.path.islink(file):
85	if verbose:
86	print "%r: listing directory" % (file,)
87	names = os.listdir(file)
88	for name in names:
89	fullname = os.path.join(file, name)
90	if (os.path.isdir(fullname) and
91	not os.path.islink(fullname) or
92	os.path.normcase(name[-3:]) == ".py"):
93	check(fullname)
94	return
95
96	try:
97	f = open(file)
98	except IOError, msg:
99	errprint("%r: I/O Error: %s" % (file, msg))
100	return
101
102	if verbose > 1:
103	print "checking %r ..." % file
104
105	try:
106	process_tokens(tokenize.generate_tokens(f.readline))
107
108	except tokenize.TokenError, msg:
109	errprint("%r: Token Error: %s" % (file, msg))
110	return
111
112	except NannyNag, nag:
113	badline = nag.get_lineno()
114	line = nag.get_line()
115	if verbose:
116	print "%r: * Line %d: trouble in tab city! *" % (file, badline)
117	print "offending line: %r" % (line,)
118	print nag.get_msg()
119	else:
120	if ' ' in file: file = '"' + file + '"'
121	if filename_only: print file
122	else: print file, badline, repr(line)
123	return
124
125	if verbose:
126	print "%r: Clean bill of health." % (file,)
127
128	class Whitespace:
129	# the characters used for space and tab
130	S, T = ' \t'
131
132	# members:
133	# raw
134	# the original string
135	# n
136	# the number of leading whitespace characters in raw
137	# nt
138	# the number of tabs in raw[:n]
139	# norm
140	# the normal form as a pair (count, trailing), where:
141	# count
142	# a tuple such that raw[:n] contains count[i]
143	# instances of S * i + T
144	# trailing
145	# the number of trailing spaces in raw[:n]
146	# It's A Theorem that m.indent_level(t) ==
147	# n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
148	# is_simple
149	# true iff raw[:n] is of the form (T)(S)
150
151	def __init__(self, ws):
152	self.raw = ws
153	S, T = Whitespace.S, Whitespace.T
154	count = []
155	b = n = nt = 0
156	for ch in self.raw:
157	if ch == S:
158	n = n + 1
159	b = b + 1
160	elif ch == T:
161	n = n + 1
162	nt = nt + 1
163	if b >= len(count):
164	count = count + [0] * (b - len(count) + 1)
165	count[b] = count[b] + 1
166	b = 0
167	else:
168	break
169	self.n = n
170	self.nt = nt
171	self.norm = tuple(count), b
172	self.is_simple = len(count) <= 1
173
174	# return length of longest contiguous run of spaces (whether or not
175	# preceding a tab)
176	def longest_run_of_spaces(self):
177	count, trailing = self.norm
178	return max(len(count)-1, trailing)
179
180	def indent_level(self, tabsize):
181	# count, il = self.norm
182	# for i in range(len(count)):
183	# if count[i]:
184	# il = il + (i/tabsize + 1)tabsize count[i]
185	# return il
186
187	# quicker:
188	# il = trailing + sum (i/ts + 1)tscount[i] =
189	# trailing + ts * sum (i/ts + 1)*count[i] =
190	# trailing + ts * sum i/ts*count[i] + count[i] =
191	# trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
192	# trailing + ts * [(sum i/ts*count[i]) + num_tabs]
193	# and note that i/ts*count[i] is 0 when i < ts
194
195	count, trailing = self.norm
196	il = 0
197	for i in range(tabsize, len(count)):
198	il = il + i/tabsize * count[i]
199	return trailing + tabsize * (il + self.nt)
200
201	# return true iff self.indent_level(t) == other.indent_level(t)
202	# for all t >= 1
203	def equal(self, other):
204	return self.norm == other.norm
205
206	# return a list of tuples (ts, i1, i2) such that
207	# i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
208	# Intended to be used after not self.equal(other) is known, in which
209	# case it will return at least one witnessing tab size.
210	def not_equal_witness(self, other):
211	n = max(self.longest_run_of_spaces(),
212	other.longest_run_of_spaces()) + 1
213	a = []
214	for ts in range(1, n+1):
215	if self.indent_level(ts) != other.indent_level(ts):
216	a.append( (ts,
217	self.indent_level(ts),
218	other.indent_level(ts)) )
219	return a
220
221	# Return True iff self.indent_level(t) < other.indent_level(t)
222	# for all t >= 1.
223	# The algorithm is due to Vincent Broman.
224	# Easy to prove it's correct.
225	# XXXpost that.
226	# Trivial to prove n is sharp (consider T vs ST).
227	# Unknown whether there's a faster general way. I suspected so at
228	# first, but no longer.
229	# For the special (but common!) case where M and N are both of the
230	# form (T)(S), M.less(N) iff M.len() < N.len() and
231	# M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
232	# XXXwrite that up.
233	# Note that M is of the form (T)(S) iff len(M.norm[0]) <= 1.
234	def less(self, other):
235	if self.n >= other.n:
236	return False
237	if self.is_simple and other.is_simple:
238	return self.nt <= other.nt
239	n = max(self.longest_run_of_spaces(),
240	other.longest_run_of_spaces()) + 1
241	# the self.n >= other.n test already did it for ts=1
242	for ts in range(2, n+1):
243	if self.indent_level(ts) >= other.indent_level(ts):
244	return False
245	return True
246
247	# return a list of tuples (ts, i1, i2) such that
248	# i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
249	# Intended to be used after not self.less(other) is known, in which
250	# case it will return at least one witnessing tab size.
251	def not_less_witness(self, other):
252	n = max(self.longest_run_of_spaces(),
253	other.longest_run_of_spaces()) + 1
254	a = []
255	for ts in range(1, n+1):
256	if self.indent_level(ts) >= other.indent_level(ts):
257	a.append( (ts,
258	self.indent_level(ts),
259	other.indent_level(ts)) )
260	return a
261
262	def format_witnesses(w):
263	firsts = map(lambda tup: str(tup[0]), w)
264	prefix = "at tab size"
265	if len(w) > 1:
266	prefix = prefix + "s"
267	return prefix + " " + ', '.join(firsts)
268
269	def process_tokens(tokens):
270	INDENT = tokenize.INDENT
271	DEDENT = tokenize.DEDENT
272	NEWLINE = tokenize.NEWLINE
273	JUNK = tokenize.COMMENT, tokenize.NL
274	indents = [Whitespace("")]
275	check_equal = 0
276
277	for (type, token, start, end, line) in tokens:
278	if type == NEWLINE:
279	# a program statement, or ENDMARKER, will eventually follow,
280	# after some (possibly empty) run of tokens of the form
281	# (NL \| COMMENT)* (INDENT \| DEDENT+)?
282	# If an INDENT appears, setting check_equal is wrong, and will
283	# be undone when we see the INDENT.
284	check_equal = 1
285
286	elif type == INDENT:
287	check_equal = 0
288	thisguy = Whitespace(token)
289	if not indents[-1].less(thisguy):
290	witness = indents[-1].not_less_witness(thisguy)
291	msg = "indent not greater e.g. " + format_witnesses(witness)
292	raise NannyNag(start[0], msg, line)
293	indents.append(thisguy)
294
295	elif type == DEDENT:
296	# there's nothing we need to check here! what's important is
297	# that when the run of DEDENTs ends, the indentation of the
298	# program statement (or ENDMARKER) that triggered the run is
299	# equal to what's left at the top of the indents stack
300
301	# Ouch! This assert triggers if the last line of the source
302	# is indented and lacks a newline -- then DEDENTs pop out
303	# of thin air.
304	# assert check_equal # else no earlier NEWLINE, or an earlier INDENT
305	check_equal = 1
306
307	del indents[-1]
308
309	elif check_equal and type not in JUNK:
310	# this is the first "real token" following a NEWLINE, so it
311	# must be the first token of the next program statement, or an
312	# ENDMARKER; the "line" argument exposes the leading whitespace
313	# for this statement; in the case of ENDMARKER, line is an empty
314	# string, so will properly match the empty string with which the
315	# "indents" stack was seeded
316	check_equal = 0
317	thisguy = Whitespace(line)
318	if not indents[-1].equal(thisguy):
319	witness = indents[-1].not_equal_witness(thisguy)
320	msg = "indent not equal e.g. " + format_witnesses(witness)
321	raise NannyNag(start[0], msg, line)
322
323
324	if __name__ == '__main__':
325	main()