[OpenSPARC-T2-DV] / tools / src / nas,5.n2.os.2 / lib / python / lib / python2.4 / idlelib / PyParse.py

import re
import sys

# Reason last stmt is continued (or C_NONE if it's not).
C_NONE, C_BACKSLASH, C_STRING, C_BRACKET = range(4)

if 0:   # for throwaway debugging output
    def dump(*stuff):
        sys.__stdout__.write(" ".join(map(str, stuff)) + "\n")

# Find what looks like the start of a popular stmt.

_synchre = re.compile(r"""
    ^
    [ \t]*
    (?: if
    |   for
    |   while
    |   else
    |   def
    |   return
    |   assert
    |   break
    |   class
    |   continue
    |   elif
    |   try
    |   except
    |   raise
    |   import
    |   yield
    )
    \b
""", re.VERBOSE | re.MULTILINE).search

# Match blank line or non-indenting comment line.

_junkre = re.compile(r"""
    [ \t]*
    (?: \# \S .* )?
    \n
""", re.VERBOSE).match

# Match any flavor of string; the terminating quote is optional
# so that we're robust in the face of incomplete program text.

_match_stringre = re.compile(r"""
    \""" [^"\\]* (?:
                     (?: \\. | "(?!"") )
                     [^"\\]*
                 )*
    (?: \""" )?

|   " [^"\\\n]* (?: \\. [^"\\\n]* )* "?

|   ''' [^'\\]* (?:
                   (?: \\. | '(?!'') )
                   [^'\\]*
                )*
    (?: ''' )?

|   ' [^'\\\n]* (?: \\. [^'\\\n]* )* '?
""", re.VERBOSE | re.DOTALL).match

# Match a line that starts with something interesting;
# used to find the first item of a bracket structure.

_itemre = re.compile(r"""
    [ \t]*
    [^\s#\\]    # if we match, m.end()-1 is the interesting char
""", re.VERBOSE).match

# Match start of stmts that should be followed by a dedent.

_closere = re.compile(r"""
    \s*
    (?: return
    |   break
    |   continue
    |   raise
    |   pass
    )
    \b
""", re.VERBOSE).match

# Chew up non-special chars as quickly as possible.  If match is
# successful, m.end() less 1 is the index of the last boring char
# matched.  If match is unsuccessful, the string starts with an
# interesting char.

_chew_ordinaryre = re.compile(r"""
    [^[\](){}#'"\\]+
""", re.VERBOSE).match

# Build translation table to map uninteresting chars to "x", open
# brackets to "(", and close brackets to ")".

_tran = ['x'] * 256
for ch in "({[":
    _tran[ord(ch)] = '('
for ch in ")}]":
    _tran[ord(ch)] = ')'
for ch in "\"'\\\n#":
    _tran[ord(ch)] = ch
_tran = ''.join(_tran)
del ch

try:
    UnicodeType = type(unicode(""))
except NameError:
    UnicodeType = None

class Parser:

    def __init__(self, indentwidth, tabwidth):
        self.indentwidth = indentwidth
        self.tabwidth = tabwidth

    def set_str(self, str):
        assert len(str) == 0 or str[-1] == '\n'
        if type(str) is UnicodeType:
            # The parse functions have no idea what to do with Unicode, so
            # replace all Unicode characters with "x".  This is "safe"
            # so long as the only characters germane to parsing the structure
            # of Python are 7-bit ASCII.  It's *necessary* because Unicode
            # strings don't have a .translate() method that supports
            # deletechars.
            uniphooey = str
            str = []
            push = str.append
            for raw in map(ord, uniphooey):
                push(raw < 127 and chr(raw) or "x")
            str = "".join(str)
        self.str = str
        self.study_level = 0

    # Return index of a good place to begin parsing, as close to the
    # end of the string as possible.  This will be the start of some
    # popular stmt like "if" or "def".  Return None if none found:
    # the caller should pass more prior context then, if possible, or
    # if not (the entire program text up until the point of interest
    # has already been tried) pass 0 to set_lo.
    #
    # This will be reliable iff given a reliable is_char_in_string
    # function, meaning that when it says "no", it's absolutely
    # guaranteed that the char is not in a string.
    #
    # Ack, hack: in the shell window this kills us, because there's
    # no way to tell the differences between output, >>> etc and
    # user input.  Indeed, IDLE's first output line makes the rest
    # look like it's in an unclosed paren!:
    # Python 1.5.2 (#0, Apr 13 1999, ...

    def find_good_parse_start(self, use_ps1, is_char_in_string=None,
                              _synchre=_synchre):
        str, pos = self.str, None
        if use_ps1:
            # shell window
            ps1 = '\n' + sys.ps1
            i = str.rfind(ps1)
            if i >= 0:
                pos = i + len(ps1)
                # make it look like there's a newline instead
                # of ps1 at the start -- hacking here once avoids
                # repeated hackery later
                self.str = str[:pos-1] + '\n' + str[pos:]
            return pos

        # File window -- real work.
        if not is_char_in_string:
            # no clue -- make the caller pass everything
            return None

        # Peek back from the end for a good place to start,
        # but don't try too often; pos will be left None, or
        # bumped to a legitimate synch point.
        limit = len(str)
        for tries in range(5):
            i = str.rfind(":\n", 0, limit)
            if i < 0:
                break
            i = str.rfind('\n', 0, i) + 1  # start of colon line
            m = _synchre(str, i, limit)
            if m and not is_char_in_string(m.start()):
                pos = m.start()
                break
            limit = i
        if pos is None:
            # Nothing looks like a block-opener, or stuff does
            # but is_char_in_string keeps returning true; most likely
            # we're in or near a giant string, the colorizer hasn't
            # caught up enough to be helpful, or there simply *aren't*
            # any interesting stmts.  In any of these cases we're
            # going to have to parse the whole thing to be sure, so
            # give it one last try from the start, but stop wasting
            # time here regardless of the outcome.
            m = _synchre(str)
            if m and not is_char_in_string(m.start()):
                pos = m.start()
            return pos

        # Peeking back worked; look forward until _synchre no longer
        # matches.
        i = pos + 1
        while 1:
            m = _synchre(str, i)
            if m:
                s, i = m.span()
                if not is_char_in_string(s):
                    pos = s
            else:
                break
        return pos

    # Throw away the start of the string.  Intended to be called with
    # find_good_parse_start's result.

    def set_lo(self, lo):
        assert lo == 0 or self.str[lo-1] == '\n'
        if lo > 0:
            self.str = self.str[lo:]

    # As quickly as humanly possible <wink>, find the line numbers (0-
    # based) of the non-continuation lines.
    # Creates self.{goodlines, continuation}.

    def _study1(self):
        if self.study_level >= 1:
            return
        self.study_level = 1

        # Map all uninteresting characters to "x", all open brackets
        # to "(", all close brackets to ")", then collapse runs of
        # uninteresting characters.  This can cut the number of chars
        # by a factor of 10-40, and so greatly speed the following loop.
        str = self.str
        str = str.translate(_tran)
        str = str.replace('xxxxxxxx', 'x')
        str = str.replace('xxxx', 'x')
        str = str.replace('xx', 'x')
        str = str.replace('xx', 'x')
        str = str.replace('\nx', '\n')
        # note that replacing x\n with \n would be incorrect, because
        # x may be preceded by a backslash

        # March over the squashed version of the program, accumulating
        # the line numbers of non-continued stmts, and determining
        # whether & why the last stmt is a continuation.
        continuation = C_NONE
        level = lno = 0     # level is nesting level; lno is line number
        self.goodlines = goodlines = [0]
        push_good = goodlines.append
        i, n = 0, len(str)
        while i < n:
            ch = str[i]
            i = i+1

            # cases are checked in decreasing order of frequency
            if ch == 'x':
                continue

            if ch == '\n':
                lno = lno + 1
                if level == 0:
                    push_good(lno)
                    # else we're in an unclosed bracket structure
                continue

            if ch == '(':
                level = level + 1
                continue

            if ch == ')':
                if level:
                    level = level - 1
                    # else the program is invalid, but we can't complain
                continue

            if ch == '"' or ch == "'":
                # consume the string
                quote = ch
                if str[i-1:i+2] == quote * 3:
                    quote = quote * 3
                w = len(quote) - 1
                i = i+w
                while i < n:
                    ch = str[i]
                    i = i+1

                    if ch == 'x':
                        continue

                    if str[i-1:i+w] == quote:
                        i = i+w
                        break

                    if ch == '\n':
                        lno = lno + 1
                        if w == 0:
                            # unterminated single-quoted string
                            if level == 0:
                                push_good(lno)
                            break
                        continue

                    if ch == '\\':
                        assert i < n
                        if str[i] == '\n':
                            lno = lno + 1
                        i = i+1
                        continue

                    # else comment char or paren inside string

                else:
                    # didn't break out of the loop, so we're still
                    # inside a string
                    continuation = C_STRING
                continue    # with outer loop

            if ch == '#':
                # consume the comment
                i = str.find('\n', i)
                assert i >= 0
                continue

            assert ch == '\\'
            assert i < n
            if str[i] == '\n':
                lno = lno + 1
                if i+1 == n:
                    continuation = C_BACKSLASH
            i = i+1

        # The last stmt may be continued for all 3 reasons.
        # String continuation takes precedence over bracket
        # continuation, which beats backslash continuation.
        if continuation != C_STRING and level > 0:
            continuation = C_BRACKET
        self.continuation = continuation

        # Push the final line number as a sentinel value, regardless of
        # whether it's continued.
        assert (continuation == C_NONE) == (goodlines[-1] == lno)
        if goodlines[-1] != lno:
            push_good(lno)

    def get_continuation_type(self):
        self._study1()
        return self.continuation

    # study1 was sufficient to determine the continuation status,
    # but doing more requires looking at every character.  study2
    # does this for the last interesting statement in the block.
    # Creates:
    #     self.stmt_start, stmt_end
    #         slice indices of last interesting stmt
    #     self.lastch
    #         last non-whitespace character before optional trailing
    #         comment
    #     self.lastopenbracketpos
    #         if continuation is C_BRACKET, index of last open bracket

    def _study2(self):
        if self.study_level >= 2:
            return
        self._study1()
        self.study_level = 2

        # Set p and q to slice indices of last interesting stmt.
        str, goodlines = self.str, self.goodlines
        i = len(goodlines) - 1
        p = len(str)    # index of newest line
        while i:
            assert p
            # p is the index of the stmt at line number goodlines[i].
            # Move p back to the stmt at line number goodlines[i-1].
            q = p
            for nothing in range(goodlines[i-1], goodlines[i]):
                # tricky: sets p to 0 if no preceding newline
                p = str.rfind('\n', 0, p-1) + 1
            # The stmt str[p:q] isn't a continuation, but may be blank
            # or a non-indenting comment line.
            if  _junkre(str, p):
                i = i-1
            else:
                break
        if i == 0:
            # nothing but junk!
            assert p == 0
            q = p
        self.stmt_start, self.stmt_end = p, q

        # Analyze this stmt, to find the last open bracket (if any)
        # and last interesting character (if any).
        lastch = ""
        stack = []  # stack of open bracket indices
        push_stack = stack.append
        while p < q:
            # suck up all except ()[]{}'"#\\
            m = _chew_ordinaryre(str, p, q)
            if m:
                # we skipped at least one boring char
                newp = m.end()
                # back up over totally boring whitespace
                i = newp - 1    # index of last boring char
                while i >= p and str[i] in " \t\n":
                    i = i-1
                if i >= p:
                    lastch = str[i]
                p = newp
                if p >= q:
                    break

            ch = str[p]

            if ch in "([{":
                push_stack(p)
                lastch = ch
                p = p+1
                continue

            if ch in ")]}":
                if stack:
                    del stack[-1]
                lastch = ch
                p = p+1
                continue

            if ch == '"' or ch == "'":
                # consume string
                # Note that study1 did this with a Python loop, but
                # we use a regexp here; the reason is speed in both
                # cases; the string may be huge, but study1 pre-squashed
                # strings to a couple of characters per line.  study1
                # also needed to keep track of newlines, and we don't
                # have to.
                lastch = ch
                p = _match_stringre(str, p, q).end()
                continue

            if ch == '#':
                # consume comment and trailing newline
                p = str.find('\n', p, q) + 1
                assert p > 0
                continue

            assert ch == '\\'
            p = p+1     # beyond backslash
            assert p < q
            if str[p] != '\n':
                # the program is invalid, but can't complain
                lastch = ch + str[p]
            p = p+1     # beyond escaped char

        # end while p < q:

        self.lastch = lastch
        if stack:
            self.lastopenbracketpos = stack[-1]

    # Assuming continuation is C_BRACKET, return the number
    # of spaces the next line should be indented.

    def compute_bracket_indent(self):
        self._study2()
        assert self.continuation == C_BRACKET
        j = self.lastopenbracketpos
        str = self.str
        n = len(str)
        origi = i = str.rfind('\n', 0, j) + 1
        j = j+1     # one beyond open bracket
        # find first list item; set i to start of its line
        while j < n:
            m = _itemre(str, j)
            if m:
                j = m.end() - 1     # index of first interesting char
                extra = 0
                break
            else:
                # this line is junk; advance to next line
                i = j = str.find('\n', j) + 1
        else:
            # nothing interesting follows the bracket;
            # reproduce the bracket line's indentation + a level
            j = i = origi
            while str[j] in " \t":
                j = j+1
            extra = self.indentwidth
        return len(str[i:j].expandtabs(self.tabwidth)) + extra

    # Return number of physical lines in last stmt (whether or not
    # it's an interesting stmt!  this is intended to be called when
    # continuation is C_BACKSLASH).

    def get_num_lines_in_stmt(self):
        self._study1()
        goodlines = self.goodlines
        return goodlines[-1] - goodlines[-2]

    # Assuming continuation is C_BACKSLASH, return the number of spaces
    # the next line should be indented.  Also assuming the new line is
    # the first one following the initial line of the stmt.

    def compute_backslash_indent(self):
        self._study2()
        assert self.continuation == C_BACKSLASH
        str = self.str
        i = self.stmt_start
        while str[i] in " \t":
            i = i+1
        startpos = i

        # See whether the initial line starts an assignment stmt; i.e.,
        # look for an = operator
        endpos = str.find('\n', startpos) + 1
        found = level = 0
        while i < endpos:
            ch = str[i]
            if ch in "([{":
                level = level + 1
                i = i+1
            elif ch in ")]}":
                if level:
                    level = level - 1
                i = i+1
            elif ch == '"' or ch == "'":
                i = _match_stringre(str, i, endpos).end()
            elif ch == '#':
                break
            elif level == 0 and ch == '=' and \
                   (i == 0 or str[i-1] not in "=<>!") and \
                   str[i+1] != '=':
                found = 1
                break
            else:
                i = i+1

        if found:
            # found a legit =, but it may be the last interesting
            # thing on the line
            i = i+1     # move beyond the =
            found = re.match(r"\s*\\", str[i:endpos]) is None

        if not found:
            # oh well ... settle for moving beyond the first chunk
            # of non-whitespace chars
            i = startpos
            while str[i] not in " \t\n":
                i = i+1

        return len(str[self.stmt_start:i].expandtabs(\
                                     self.tabwidth)) + 1

    # Return the leading whitespace on the initial line of the last
    # interesting stmt.

    def get_base_indent_string(self):
        self._study2()
        i, n = self.stmt_start, self.stmt_end
        j = i
        str = self.str
        while j < n and str[j] in " \t":
            j = j + 1
        return str[i:j]

    # Did the last interesting stmt open a block?

    def is_block_opener(self):
        self._study2()
        return self.lastch == ':'

    # Did the last interesting stmt close a block?

    def is_block_closer(self):
        self._study2()
        return _closere(self.str, self.stmt_start) is not None

    # index of last open bracket ({[, or None if none
    lastopenbracketpos = None

    def get_last_open_bracket_pos(self):
        self._study2()
        return self.lastopenbracketpos
Commit	Line	Data
86530b38 AT	1	import re
	2	import sys
	3
	4	# Reason last stmt is continued (or C_NONE if it's not).
	5	C_NONE, C_BACKSLASH, C_STRING, C_BRACKET = range(4)
	6
	7	if 0: # for throwaway debugging output
	8	def dump(*stuff):
	9	sys.__stdout__.write(" ".join(map(str, stuff)) + "\n")
	10
	11	# Find what looks like the start of a popular stmt.
	12
	13	_synchre = re.compile(r"""
	14	^
	15	[ \t]*
	16	(?: if
	17	\| for
	18	\| while
	19	\| else
	20	\| def
	21	\| return
	22	\| assert
	23	\| break
	24	\| class
	25	\| continue
	26	\| elif
	27	\| try
	28	\| except
	29	\| raise
	30	\| import
	31	\| yield
	32	)
	33	\b
	34	""", re.VERBOSE \| re.MULTILINE).search
	35
	36	# Match blank line or non-indenting comment line.
	37
	38	_junkre = re.compile(r"""
	39	[ \t]*
	40	(?: \# \S .* )?
	41	\n
	42	""", re.VERBOSE).match
	43
	44	# Match any flavor of string; the terminating quote is optional
	45	# so that we're robust in the face of incomplete program text.
	46
	47	_match_stringre = re.compile(r"""
	48	\""" [^"\\]* (?:
	49	(?: \\. \| "(?!"") )
	50	[^"\\]*
	51	)*
	52	(?: \""" )?
	53
	54	\| " [^"\\\n]* (?: \\. [^"\\\n]* )* "?
	55
	56	\| ''' [^'\\]* (?:
	57	(?: \\. \| '(?!'') )
	58	[^'\\]*
	59	)*
	60	(?: ''' )?
	61
	62	\| ' [^'\\\n]* (?: \\. [^'\\\n]* )* '?
	63	""", re.VERBOSE \| re.DOTALL).match
	64
65	# Match a line that starts with something interesting;
66	# used to find the first item of a bracket structure.
67
68	_itemre = re.compile(r"""
69	[ \t]*
70	[^\s#\\] # if we match, m.end()-1 is the interesting char
71	""", re.VERBOSE).match
72
73	# Match start of stmts that should be followed by a dedent.
74
75	_closere = re.compile(r"""
76	\s*
77	(?: return
78	\| break
79	\| continue
80	\| raise
81	\| pass
82	)
83	\b
84	""", re.VERBOSE).match
85
86	# Chew up non-special chars as quickly as possible. If match is
87	# successful, m.end() less 1 is the index of the last boring char
88	# matched. If match is unsuccessful, the string starts with an
89	# interesting char.
90
91	_chew_ordinaryre = re.compile(r"""
92	[^[\](){}#'"\\]+
93	""", re.VERBOSE).match
94
95	# Build translation table to map uninteresting chars to "x", open
96	# brackets to "(", and close brackets to ")".
97
98	_tran = ['x'] * 256
99	for ch in "({[":
100	_tran[ord(ch)] = '('
101	for ch in ")}]":
102	_tran[ord(ch)] = ')'
103	for ch in "\"'\\\n#":
104	_tran[ord(ch)] = ch
105	_tran = ''.join(_tran)
106	del ch
107
108	try:
109	UnicodeType = type(unicode(""))
110	except NameError:
111	UnicodeType = None
112
113	class Parser:
114
115	def __init__(self, indentwidth, tabwidth):
116	self.indentwidth = indentwidth
117	self.tabwidth = tabwidth
118
119	def set_str(self, str):
120	assert len(str) == 0 or str[-1] == '\n'
121	if type(str) is UnicodeType:
122	# The parse functions have no idea what to do with Unicode, so
123	# replace all Unicode characters with "x". This is "safe"
124	# so long as the only characters germane to parsing the structure
125	# of Python are 7-bit ASCII. It's necessary because Unicode
126	# strings don't have a .translate() method that supports
127	# deletechars.
128	uniphooey = str
129	str = []
130	push = str.append
131	for raw in map(ord, uniphooey):
132	push(raw < 127 and chr(raw) or "x")
133	str = "".join(str)
134	self.str = str
135	self.study_level = 0
136
137	# Return index of a good place to begin parsing, as close to the
138	# end of the string as possible. This will be the start of some
139	# popular stmt like "if" or "def". Return None if none found:
140	# the caller should pass more prior context then, if possible, or
141	# if not (the entire program text up until the point of interest
142	# has already been tried) pass 0 to set_lo.
143	#
144	# This will be reliable iff given a reliable is_char_in_string
145	# function, meaning that when it says "no", it's absolutely
146	# guaranteed that the char is not in a string.
147	#
148	# Ack, hack: in the shell window this kills us, because there's
149	# no way to tell the differences between output, >>> etc and
150	# user input. Indeed, IDLE's first output line makes the rest
151	# look like it's in an unclosed paren!:
152	# Python 1.5.2 (#0, Apr 13 1999, ...
153
154	def find_good_parse_start(self, use_ps1, is_char_in_string=None,
155	_synchre=_synchre):
156	str, pos = self.str, None
157	if use_ps1:
158	# shell window
159	ps1 = '\n' + sys.ps1
160	i = str.rfind(ps1)
161	if i >= 0:
162	pos = i + len(ps1)
163	# make it look like there's a newline instead
164	# of ps1 at the start -- hacking here once avoids
165	# repeated hackery later
166	self.str = str[:pos-1] + '\n' + str[pos:]
167	return pos
168
169	# File window -- real work.
170	if not is_char_in_string:
171	# no clue -- make the caller pass everything
172	return None
173
174	# Peek back from the end for a good place to start,
175	# but don't try too often; pos will be left None, or
176	# bumped to a legitimate synch point.
177	limit = len(str)
178	for tries in range(5):
179	i = str.rfind(":\n", 0, limit)
180	if i < 0:
181	break
182	i = str.rfind('\n', 0, i) + 1 # start of colon line
183	m = _synchre(str, i, limit)
184	if m and not is_char_in_string(m.start()):
185	pos = m.start()
186	break
187	limit = i
188	if pos is None:
189	# Nothing looks like a block-opener, or stuff does
190	# but is_char_in_string keeps returning true; most likely
191	# we're in or near a giant string, the colorizer hasn't
192	# caught up enough to be helpful, or there simply aren't
193	# any interesting stmts. In any of these cases we're
194	# going to have to parse the whole thing to be sure, so
195	# give it one last try from the start, but stop wasting
196	# time here regardless of the outcome.
197	m = _synchre(str)
198	if m and not is_char_in_string(m.start()):
199	pos = m.start()
200	return pos
201
202	# Peeking back worked; look forward until _synchre no longer
203	# matches.
204	i = pos + 1
205	while 1:
206	m = _synchre(str, i)
207	if m:
208	s, i = m.span()
209	if not is_char_in_string(s):
210	pos = s
211	else:
212	break
213	return pos
214
215	# Throw away the start of the string. Intended to be called with
216	# find_good_parse_start's result.
217
218	def set_lo(self, lo):
219	assert lo == 0 or self.str[lo-1] == '\n'
220	if lo > 0:
221	self.str = self.str[lo:]
222
223	# As quickly as humanly possible <wink>, find the line numbers (0-
224	# based) of the non-continuation lines.
225	# Creates self.{goodlines, continuation}.
226
227	def _study1(self):
228	if self.study_level >= 1:
229	return
230	self.study_level = 1
231
232	# Map all uninteresting characters to "x", all open brackets
233	# to "(", all close brackets to ")", then collapse runs of
234	# uninteresting characters. This can cut the number of chars
235	# by a factor of 10-40, and so greatly speed the following loop.
236	str = self.str
237	str = str.translate(_tran)
238	str = str.replace('xxxxxxxx', 'x')
239	str = str.replace('xxxx', 'x')
240	str = str.replace('xx', 'x')
241	str = str.replace('xx', 'x')
242	str = str.replace('\nx', '\n')
243	# note that replacing x\n with \n would be incorrect, because
244	# x may be preceded by a backslash
245
246	# March over the squashed version of the program, accumulating
247	# the line numbers of non-continued stmts, and determining
248	# whether & why the last stmt is a continuation.
249	continuation = C_NONE
250	level = lno = 0 # level is nesting level; lno is line number
251	self.goodlines = goodlines = [0]
252	push_good = goodlines.append
253	i, n = 0, len(str)
254	while i < n:
255	ch = str[i]
256	i = i+1
257
258	# cases are checked in decreasing order of frequency
259	if ch == 'x':
260	continue
261
262	if ch == '\n':
263	lno = lno + 1
264	if level == 0:
265	push_good(lno)
266	# else we're in an unclosed bracket structure
267	continue
268
269	if ch == '(':
270	level = level + 1
271	continue
272
273	if ch == ')':
274	if level:
275	level = level - 1
276	# else the program is invalid, but we can't complain
277	continue
278
279	if ch == '"' or ch == "'":
280	# consume the string
281	quote = ch
282	if str[i-1:i+2] == quote * 3:
283	quote = quote * 3
284	w = len(quote) - 1
285	i = i+w
286	while i < n:
287	ch = str[i]
288	i = i+1
289
290	if ch == 'x':
291	continue
292
293	if str[i-1:i+w] == quote:
294	i = i+w
295	break
296
297	if ch == '\n':
298	lno = lno + 1
299	if w == 0:
300	# unterminated single-quoted string
301	if level == 0:
302	push_good(lno)
303	break
304	continue
305
306	if ch == '\\':
307	assert i < n
308	if str[i] == '\n':
309	lno = lno + 1
310	i = i+1
311	continue
312
313	# else comment char or paren inside string
314
315	else:
316	# didn't break out of the loop, so we're still
317	# inside a string
318	continuation = C_STRING
319	continue # with outer loop
320
321	if ch == '#':
322	# consume the comment
323	i = str.find('\n', i)
324	assert i >= 0
325	continue
326
327	assert ch == '\\'
328	assert i < n
329	if str[i] == '\n':
330	lno = lno + 1
331	if i+1 == n:
332	continuation = C_BACKSLASH
333	i = i+1
334
335	# The last stmt may be continued for all 3 reasons.
336	# String continuation takes precedence over bracket
337	# continuation, which beats backslash continuation.
338	if continuation != C_STRING and level > 0:
339	continuation = C_BRACKET
340	self.continuation = continuation
341
342	# Push the final line number as a sentinel value, regardless of
343	# whether it's continued.
344	assert (continuation == C_NONE) == (goodlines[-1] == lno)
345	if goodlines[-1] != lno:
346	push_good(lno)
347
348	def get_continuation_type(self):
349	self._study1()
350	return self.continuation
351
352	# study1 was sufficient to determine the continuation status,
353	# but doing more requires looking at every character. study2
354	# does this for the last interesting statement in the block.
355	# Creates:
356	# self.stmt_start, stmt_end
357	# slice indices of last interesting stmt
358	# self.lastch
359	# last non-whitespace character before optional trailing
360	# comment
361	# self.lastopenbracketpos
362	# if continuation is C_BRACKET, index of last open bracket
363
364	def _study2(self):
365	if self.study_level >= 2:
366	return
367	self._study1()
368	self.study_level = 2
369
370	# Set p and q to slice indices of last interesting stmt.
371	str, goodlines = self.str, self.goodlines
372	i = len(goodlines) - 1
373	p = len(str) # index of newest line
374	while i:
375	assert p
376	# p is the index of the stmt at line number goodlines[i].
377	# Move p back to the stmt at line number goodlines[i-1].
378	q = p
379	for nothing in range(goodlines[i-1], goodlines[i]):
380	# tricky: sets p to 0 if no preceding newline
381	p = str.rfind('\n', 0, p-1) + 1
382	# The stmt str[p:q] isn't a continuation, but may be blank
383	# or a non-indenting comment line.
384	if _junkre(str, p):
385	i = i-1
386	else:
387	break
388	if i == 0:
389	# nothing but junk!
390	assert p == 0
391	q = p
392	self.stmt_start, self.stmt_end = p, q
393
394	# Analyze this stmt, to find the last open bracket (if any)
395	# and last interesting character (if any).
396	lastch = ""
397	stack = [] # stack of open bracket indices
398	push_stack = stack.append
399	while p < q:
400	# suck up all except ()[]{}'"#\\
401	m = _chew_ordinaryre(str, p, q)
402	if m:
403	# we skipped at least one boring char
404	newp = m.end()
405	# back up over totally boring whitespace
406	i = newp - 1 # index of last boring char
407	while i >= p and str[i] in " \t\n":
408	i = i-1
409	if i >= p:
410	lastch = str[i]
411	p = newp
412	if p >= q:
413	break
414
415	ch = str[p]
416
417	if ch in "([{":
418	push_stack(p)
419	lastch = ch
420	p = p+1
421	continue
422
423	if ch in ")]}":
424	if stack:
425	del stack[-1]
426	lastch = ch
427	p = p+1
428	continue
429
430	if ch == '"' or ch == "'":
431	# consume string
432	# Note that study1 did this with a Python loop, but
433	# we use a regexp here; the reason is speed in both
434	# cases; the string may be huge, but study1 pre-squashed
435	# strings to a couple of characters per line. study1
436	# also needed to keep track of newlines, and we don't
437	# have to.
438	lastch = ch
439	p = _match_stringre(str, p, q).end()
440	continue
441
442	if ch == '#':
443	# consume comment and trailing newline
444	p = str.find('\n', p, q) + 1
445	assert p > 0
446	continue
447
448	assert ch == '\\'
449	p = p+1 # beyond backslash
450	assert p < q
451	if str[p] != '\n':
452	# the program is invalid, but can't complain
453	lastch = ch + str[p]
454	p = p+1 # beyond escaped char
455
456	# end while p < q:
457
458	self.lastch = lastch
459	if stack:
460	self.lastopenbracketpos = stack[-1]
461
462	# Assuming continuation is C_BRACKET, return the number
463	# of spaces the next line should be indented.
464
465	def compute_bracket_indent(self):
466	self._study2()
467	assert self.continuation == C_BRACKET
468	j = self.lastopenbracketpos
469	str = self.str
470	n = len(str)
471	origi = i = str.rfind('\n', 0, j) + 1
472	j = j+1 # one beyond open bracket
473	# find first list item; set i to start of its line
474	while j < n:
475	m = _itemre(str, j)
476	if m:
477	j = m.end() - 1 # index of first interesting char
478	extra = 0
479	break
480	else:
481	# this line is junk; advance to next line
482	i = j = str.find('\n', j) + 1
483	else:
484	# nothing interesting follows the bracket;
485	# reproduce the bracket line's indentation + a level
486	j = i = origi
487	while str[j] in " \t":
488	j = j+1
489	extra = self.indentwidth
490	return len(str[i:j].expandtabs(self.tabwidth)) + extra
491
492	# Return number of physical lines in last stmt (whether or not
493	# it's an interesting stmt! this is intended to be called when
494	# continuation is C_BACKSLASH).
495
496	def get_num_lines_in_stmt(self):
497	self._study1()
498	goodlines = self.goodlines
499	return goodlines[-1] - goodlines[-2]
500
501	# Assuming continuation is C_BACKSLASH, return the number of spaces
502	# the next line should be indented. Also assuming the new line is
503	# the first one following the initial line of the stmt.
504
505	def compute_backslash_indent(self):
506	self._study2()
507	assert self.continuation == C_BACKSLASH
508	str = self.str
509	i = self.stmt_start
510	while str[i] in " \t":
511	i = i+1
512	startpos = i
513
514	# See whether the initial line starts an assignment stmt; i.e.,
515	# look for an = operator
516	endpos = str.find('\n', startpos) + 1
517	found = level = 0
518	while i < endpos:
519	ch = str[i]
520	if ch in "([{":
521	level = level + 1
522	i = i+1
523	elif ch in ")]}":
524	if level:
525	level = level - 1
526	i = i+1
527	elif ch == '"' or ch == "'":
528	i = _match_stringre(str, i, endpos).end()
529	elif ch == '#':
530	break
531	elif level == 0 and ch == '=' and \
532	(i == 0 or str[i-1] not in "=<>!") and \
533	str[i+1] != '=':
534	found = 1
535	break
536	else:
537	i = i+1
538
539	if found:
540	# found a legit =, but it may be the last interesting
541	# thing on the line
542	i = i+1 # move beyond the =
543	found = re.match(r"\s*\\", str[i:endpos]) is None
544
545	if not found:
546	# oh well ... settle for moving beyond the first chunk
547	# of non-whitespace chars
548	i = startpos
549	while str[i] not in " \t\n":
550	i = i+1
551
552	return len(str[self.stmt_start:i].expandtabs(\
553	self.tabwidth)) + 1
554
555	# Return the leading whitespace on the initial line of the last
556	# interesting stmt.
557
558	def get_base_indent_string(self):
559	self._study2()
560	i, n = self.stmt_start, self.stmt_end
561	j = i
562	str = self.str
563	while j < n and str[j] in " \t":
564	j = j + 1
565	return str[i:j]
566
567	# Did the last interesting stmt open a block?
568
569	def is_block_opener(self):
570	self._study2()
571	return self.lastch == ':'
572
573	# Did the last interesting stmt close a block?
574
575	def is_block_closer(self):
576	self._study2()
577	return _closere(self.str, self.stmt_start) is not None
578
579	# index of last open bracket ({[, or None if none
580	lastopenbracketpos = None
581
582	def get_last_open_bracket_pos(self):
583	self._study2()
584	return self.lastopenbracketpos