git.subgeniuskitty.com - OpenSPARC-T2-SAM/.git/blame - sam-t2/devtools/v9/lib/python2.4/regsub.py

Commit	Line	Data
920dae64 AT	1	"""Regexp-based split and replace using the obsolete regex module.
	2
	3	This module is only for backward compatibility. These operations
	4	are now provided by the new regular expression module, "re".
	5
	6	sub(pat, repl, str): replace first occurrence of pattern in string
	7	gsub(pat, repl, str): replace all occurrences of pattern in string
	8	split(str, pat, maxsplit): split string using pattern as delimiter
	9	splitx(str, pat, maxsplit): split string using pattern as delimiter plus
	10	return delimiters
	11	"""
	12
	13	import warnings
	14	warnings.warn("the regsub module is deprecated; please use re.sub()",
	15	DeprecationWarning)
	16
	17	# Ignore further deprecation warnings about this module
	18	warnings.filterwarnings("ignore", "", DeprecationWarning, __name__)
	19
	20	import regex
	21
	22	__all__ = ["sub","gsub","split","splitx","capwords"]
	23
	24	# Replace first occurrence of pattern pat in string str by replacement
	25	# repl. If the pattern isn't found, the string is returned unchanged.
	26	# The replacement may contain references \digit to subpatterns and
	27	# escaped backslashes. The pattern may be a string or an already
	28	# compiled pattern.
	29
	30	def sub(pat, repl, str):
	31	prog = compile(pat)
	32	if prog.search(str) >= 0:
	33	regs = prog.regs
	34	a, b = regs[0]
	35	str = str[:a] + expand(repl, regs, str) + str[b:]
	36	return str
	37
	38
	39	# Replace all (non-overlapping) occurrences of pattern pat in string
	40	# str by replacement repl. The same rules as for sub() apply.
	41	# Empty matches for the pattern are replaced only when not adjacent to
	42	# a previous match, so e.g. gsub('', '-', 'abc') returns '-a-b-c-'.
	43
	44	def gsub(pat, repl, str):
	45	prog = compile(pat)
	46	new = ''
	47	start = 0
	48	first = 1
	49	while prog.search(str, start) >= 0:
	50	regs = prog.regs
	51	a, b = regs[0]
	52	if a == b == start and not first:
	53	if start >= len(str) or prog.search(str, start+1) < 0:
	54	break
	55	regs = prog.regs
	56	a, b = regs[0]
	57	new = new + str[start:a] + expand(repl, regs, str)
	58	start = b
	59	first = 0
	60	new = new + str[start:]
	61	return new
	62
	63
	64	# Split string str in fields separated by delimiters matching pattern
65	# pat. Only non-empty matches for the pattern are considered, so e.g.
66	# split('abc', '') returns ['abc'].
67	# The optional 3rd argument sets the number of splits that are performed.
68
69	def split(str, pat, maxsplit = 0):
70	return intsplit(str, pat, maxsplit, 0)
71
72	# Split string str in fields separated by delimiters matching pattern
73	# pat. Only non-empty matches for the pattern are considered, so e.g.
74	# split('abc', '') returns ['abc']. The delimiters are also included
75	# in the list.
76	# The optional 3rd argument sets the number of splits that are performed.
77
78
79	def splitx(str, pat, maxsplit = 0):
80	return intsplit(str, pat, maxsplit, 1)
81
82	# Internal function used to implement split() and splitx().
83
84	def intsplit(str, pat, maxsplit, retain):
85	prog = compile(pat)
86	res = []
87	start = next = 0
88	splitcount = 0
89	while prog.search(str, next) >= 0:
90	regs = prog.regs
91	a, b = regs[0]
92	if a == b:
93	next = next + 1
94	if next >= len(str):
95	break
96	else:
97	res.append(str[start:a])
98	if retain:
99	res.append(str[a:b])
100	start = next = b
101	splitcount = splitcount + 1
102	if (maxsplit and (splitcount >= maxsplit)):
103	break
104	res.append(str[start:])
105	return res
106
107
108	# Capitalize words split using a pattern
109
110	def capwords(str, pat='[^a-zA-Z0-9_]+'):
111	words = splitx(str, pat)
112	for i in range(0, len(words), 2):
113	words[i] = words[i].capitalize()
114	return "".join(words)
115
116
117	# Internal subroutines:
118	# compile(pat): compile a pattern, caching already compiled patterns
119	# expand(repl, regs, str): expand \digit escapes in replacement string
120
121
122	# Manage a cache of compiled regular expressions.
123	#
124	# If the pattern is a string a compiled version of it is returned. If
125	# the pattern has been used before we return an already compiled
126	# version from the cache; otherwise we compile it now and save the
127	# compiled version in the cache, along with the syntax it was compiled
128	# with. Instead of a string, a compiled regular expression can also
129	# be passed.
130
131	cache = {}
132
133	def compile(pat):
134	if type(pat) != type(''):
135	return pat # Assume it is a compiled regex
136	key = (pat, regex.get_syntax())
137	if key in cache:
138	prog = cache[key] # Get it from the cache
139	else:
140	prog = cache[key] = regex.compile(pat)
141	return prog
142
143
144	def clear_cache():
145	global cache
146	cache = {}
147
148
149	# Expand \digit in the replacement.
150	# Each occurrence of \digit is replaced by the substring of str
151	# indicated by regs[digit]. To include a literal \ in the
152	# replacement, double it; other \ escapes are left unchanged (i.e.
153	# the \ and the following character are both copied).
154
155	def expand(repl, regs, str):
156	if '\\' not in repl:
157	return repl
158	new = ''
159	i = 0
160	ord0 = ord('0')
161	while i < len(repl):
162	c = repl[i]; i = i+1
163	if c != '\\' or i >= len(repl):
164	new = new + c
165	else:
166	c = repl[i]; i = i+1
167	if '0' <= c <= '9':
168	a, b = regs[ord(c)-ord0]
169	new = new + str[a:b]
170	elif c == '\\':
171	new = new + c
172	else:
173	new = new + '\\' + c
174	return new
175
176
177	# Test program, reads sequences "pat repl str" from stdin.
178	# Optional argument specifies pattern used to split lines.
179
180	def test():
181	import sys
182	if sys.argv[1:]:
183	delpat = sys.argv[1]
184	else:
185	delpat = '[ \t\n]+'
186	while 1:
187	if sys.stdin.isatty(): sys.stderr.write('--> ')
188	line = sys.stdin.readline()
189	if not line: break
190	if line[-1] == '\n': line = line[:-1]
191	fields = split(line, delpat)
192	if len(fields) != 3:
193	print 'Sorry, not three fields'
194	print 'split:', repr(fields)
195	continue
196	[pat, repl, str] = split(line, delpat)
197	print 'sub :', repr(sub(pat, repl, str))
198	print 'gsub:', repr(gsub(pat, repl, str))