Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / devtools / v9 / lib / python2.4 / test / test_re.py
CommitLineData
920dae64
AT
1import sys
2sys.path = ['.'] + sys.path
3
4from test.test_support import verbose, run_unittest
5import re
6from sre import Scanner
7import sys, os, traceback
8from weakref import proxy
9
10# Misc tests from Tim Peters' re.doc
11
12# WARNING: Don't change details in these tests if you don't know
13# what you're doing. Some of these tests were carefuly modeled to
14# cover most of the code.
15
16import unittest
17
18class ReTests(unittest.TestCase):
19
20 def test_weakref(self):
21 s = 'QabbbcR'
22 x = re.compile('ab+c')
23 y = proxy(x)
24 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
25
26 def test_search_star_plus(self):
27 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
28 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
29 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
30 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
31 self.assertEqual(re.search('x', 'aaa'), None)
32 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
33 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
34 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
35 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
36 self.assertEqual(re.match('a+', 'xxx'), None)
37
38 def bump_num(self, matchobj):
39 int_value = int(matchobj.group(0))
40 return str(int_value + 1)
41
42 def test_basic_re_sub(self):
43 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
44 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
45 '9.3 -3 24x100y')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
47 '9.3 -3 23x99y')
48
49 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
50 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
51
52 s = r"\1\1"
53 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
54 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
55 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
56
57 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
61
62 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
63 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
65 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
66 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
67
68 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
69
70 def test_bug_449964(self):
71 # fails for group followed by other escape
72 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
73 'xx\bxx\b')
74
75 def test_bug_449000(self):
76 # Test for sub() on escaped characters
77 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
78 'abc\ndef\n')
79 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
85
86 def test_sub_template_numeric_escape(self):
87 # bug 776311 and friends
88 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
89 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
90 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
91 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
92 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
93 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
94 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
95
96 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
97 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
98
99 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
100 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
101 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
102 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
103 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
104
105 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
106 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
107
108 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
109 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
110 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
111 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
112 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
113 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
114 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
115 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
116 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
117 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
118 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
119 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
120
121 # in python2.3 (etc), these loop endlessly in sre_parser.py
122 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
123 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
124 'xz8')
125 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
126 'xza')
127
128 def test_qualified_re_sub(self):
129 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
130 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
131
132 def test_bug_114660(self):
133 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
134 'hello there')
135
136 def test_bug_462270(self):
137 # Test for empty sub() behaviour, see SF bug #462270
138 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
139 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
140
141 def test_symbolic_refs(self):
142 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
143 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
144 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
145 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
146 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
147 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
148 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
149 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
150 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
151
152 def test_re_subn(self):
153 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
154 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
155 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
156 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
157 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
158
159 def test_re_split(self):
160 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
161 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
162 self.assertEqual(re.split("(:*)", ":a:b::c"),
163 ['', ':', 'a', ':', 'b', '::', 'c'])
164 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
165 self.assertEqual(re.split("(:)*", ":a:b::c"),
166 ['', ':', 'a', ':', 'b', ':', 'c'])
167 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
168 ['', ':', 'a', ':b::', 'c'])
169 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
170 ['', None, ':', 'a', None, ':', '', 'b', None, '',
171 None, '::', 'c'])
172 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
173 ['', 'a', '', '', 'c'])
174
175 def test_qualified_re_split(self):
176 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
177 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
178 self.assertEqual(re.split("(:)", ":a:b::c", 2),
179 ['', ':', 'a', ':', 'b::c'])
180 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
181 ['', ':', 'a', ':', 'b::c'])
182
183 def test_re_findall(self):
184 self.assertEqual(re.findall(":+", "abc"), [])
185 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
186 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
187 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
188 (":", ":"),
189 (":", "::")])
190
191 def test_bug_117612(self):
192 self.assertEqual(re.findall(r"(a|(b))", "aba"),
193 [("a", ""),("b", "b"),("a", "")])
194
195 def test_re_match(self):
196 self.assertEqual(re.match('a', 'a').groups(), ())
197 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
198 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
199 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
200 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
201
202 pat = re.compile('((a)|(b))(c)?')
203 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
204 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
205 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
206 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
207 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
208
209 # A single group
210 m = re.match('(a)', 'a')
211 self.assertEqual(m.group(0), 'a')
212 self.assertEqual(m.group(0), 'a')
213 self.assertEqual(m.group(1), 'a')
214 self.assertEqual(m.group(1, 1), ('a', 'a'))
215
216 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
217 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
218 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
219 (None, 'b', None))
220 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
221
222 def test_re_groupref_exists(self):
223 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
224 ('(', 'a'))
225 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
226 (None, 'a'))
227 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
228 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
229 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
230 ('a', 'b'))
231 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
232 (None, 'd'))
233 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
234 (None, 'd'))
235 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
236 ('a', ''))
237
238 # Tests for bug #1177831: exercise groups other than the first group
239 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
240 self.assertEqual(p.match('abc').groups(),
241 ('a', 'b', 'c'))
242 self.assertEqual(p.match('ad').groups(),
243 ('a', None, 'd'))
244 self.assertEqual(p.match('abd'), None)
245 self.assertEqual(p.match('ac'), None)
246
247
248 def test_re_groupref(self):
249 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
250 ('|', 'a'))
251 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
252 (None, 'a'))
253 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
254 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
255 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
256 ('a', 'a'))
257 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
258 (None, None))
259
260 def test_groupdict(self):
261 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
262 'first second').groupdict(),
263 {'first':'first', 'second':'second'})
264
265 def test_expand(self):
266 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
267 "first second")
268 .expand(r"\2 \1 \g<second> \g<first>"),
269 "second first second first")
270
271 def test_repeat_minmax(self):
272 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
273 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
274 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
275 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
276
277 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
278 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
279 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
280 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
281 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
282 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
283 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
284 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
285
286 self.assertEqual(re.match("^x{1}$", "xxx"), None)
287 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
288 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
289 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
290
291 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
292 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
293 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
294 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
295 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
296 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
297 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
298 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
299
300 self.assertEqual(re.match("^x{}$", "xxx"), None)
301 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
302
303 def test_getattr(self):
304 self.assertEqual(re.match("(a)", "a").pos, 0)
305 self.assertEqual(re.match("(a)", "a").endpos, 1)
306 self.assertEqual(re.match("(a)", "a").string, "a")
307 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
308 self.assertNotEqual(re.match("(a)", "a").re, None)
309
310 def test_special_escapes(self):
311 self.assertEqual(re.search(r"\b(b.)\b",
312 "abcd abc bcd bx").group(1), "bx")
313 self.assertEqual(re.search(r"\B(b.)\B",
314 "abc bcd bc abxd").group(1), "bx")
315 self.assertEqual(re.search(r"\b(b.)\b",
316 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
317 self.assertEqual(re.search(r"\B(b.)\B",
318 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
319 self.assertEqual(re.search(r"\b(b.)\b",
320 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
321 self.assertEqual(re.search(r"\B(b.)\B",
322 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
323 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
324 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
325 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
326 self.assertEqual(re.search(r"\b(b.)\b",
327 u"abcd abc bcd bx").group(1), "bx")
328 self.assertEqual(re.search(r"\B(b.)\B",
329 u"abc bcd bc abxd").group(1), "bx")
330 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
331 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
332 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
333 self.assertEqual(re.search(r"\d\D\w\W\s\S",
334 "1aa! a").group(0), "1aa! a")
335 self.assertEqual(re.search(r"\d\D\w\W\s\S",
336 "1aa! a", re.LOCALE).group(0), "1aa! a")
337 self.assertEqual(re.search(r"\d\D\w\W\s\S",
338 "1aa! a", re.UNICODE).group(0), "1aa! a")
339
340 def test_ignore_case(self):
341 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
342 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
343
344 def test_bigcharset(self):
345 self.assertEqual(re.match(u"([\u2222\u2223])",
346 u"\u2222").group(1), u"\u2222")
347 self.assertEqual(re.match(u"([\u2222\u2223])",
348 u"\u2222", re.UNICODE).group(1), u"\u2222")
349
350 def test_anyall(self):
351 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
352 "a\nb")
353 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
354 "a\n\nb")
355
356 def test_non_consuming(self):
357 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
358 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
359 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
360 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
361 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
362 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
363 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
364
365 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
366 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
367 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
368 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
369
370 def test_ignore_case(self):
371 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
372 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
373 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
374 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
375 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
376 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
377 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
378 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
379
380 def test_category(self):
381 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
382
383 def test_getlower(self):
384 import _sre
385 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
386 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
387 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
388
389 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
390 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
391
392 def test_not_literal(self):
393 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
394 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
395
396 def test_search_coverage(self):
397 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
398 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
399
400 def test_re_escape(self):
401 p=""
402 for i in range(0, 256):
403 p = p + chr(i)
404 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
405 True)
406 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
407
408 pat=re.compile(re.escape(p))
409 self.assertEqual(pat.match(p) is not None, True)
410 self.assertEqual(pat.match(p).span(), (0,256))
411
412 def test_pickling(self):
413 import pickle
414 self.pickle_test(pickle)
415 import cPickle
416 self.pickle_test(cPickle)
417
418 def pickle_test(self, pickle):
419 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
420 s = pickle.dumps(oldpat)
421 newpat = pickle.loads(s)
422 self.assertEqual(oldpat, newpat)
423
424 def test_constants(self):
425 self.assertEqual(re.I, re.IGNORECASE)
426 self.assertEqual(re.L, re.LOCALE)
427 self.assertEqual(re.M, re.MULTILINE)
428 self.assertEqual(re.S, re.DOTALL)
429 self.assertEqual(re.X, re.VERBOSE)
430
431 def test_flags(self):
432 for flag in [re.I, re.M, re.X, re.S, re.L]:
433 self.assertNotEqual(re.compile('^pattern$', flag), None)
434
435 def test_sre_character_literals(self):
436 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
437 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
438 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
439 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
440 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
441 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
442 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
443 self.assertRaises(re.error, re.match, "\911", "")
444
445 def test_sre_character_class_literals(self):
446 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
447 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
448 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
449 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
450 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
451 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
452 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
453 self.assertRaises(re.error, re.match, "[\911]", "")
454
455 def test_bug_113254(self):
456 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
457 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
458 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
459
460 def test_bug_527371(self):
461 # bug described in patches 527371/672491
462 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
463 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
464 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
465 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
466 self.assertEqual(re.match("((a))", "a").lastindex, 1)
467
468 def test_bug_545855(self):
469 # bug 545855 -- This pattern failed to cause a compile error as it
470 # should, instead provoking a TypeError.
471 self.assertRaises(re.error, re.compile, 'foo[a-')
472
473 def test_bug_418626(self):
474 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
475 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
476 # pattern '*?' on a long string.
477 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
478 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
479 20003)
480 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
481 # non-simple '*?' still used to hit the recursion limit, before the
482 # non-recursive scheme was implemented.
483 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
484
485 def test_bug_612074(self):
486 pat=u"["+re.escape(u"\u2039")+u"]"
487 self.assertEqual(re.compile(pat) and 1, 1)
488
489 def test_stack_overflow(self):
490 # nasty cases that used to overflow the straightforward recursive
491 # implementation of repeated groups.
492 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
493 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
494 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
495
496 def test_scanner(self):
497 def s_ident(scanner, token): return token
498 def s_operator(scanner, token): return "op%s" % token
499 def s_float(scanner, token): return float(token)
500 def s_int(scanner, token): return int(token)
501
502 scanner = Scanner([
503 (r"[a-zA-Z_]\w*", s_ident),
504 (r"\d+\.\d*", s_float),
505 (r"\d+", s_int),
506 (r"=|\+|-|\*|/", s_operator),
507 (r"\s+", None),
508 ])
509
510 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
511
512 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
513 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
514 'op+', 'bar'], ''))
515
516 def test_bug_448951(self):
517 # bug 448951 (similar to 429357, but with single char match)
518 # (Also test greedy matches.)
519 for op in '','?','*':
520 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
521 (None, None))
522 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
523 ('a:', 'a'))
524
525 def test_bug_725106(self):
526 # capturing groups in alternatives in repeats
527 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
528 ('b', 'a'))
529 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
530 ('c', 'b'))
531 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
532 ('b', None))
533 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
534 ('b', None))
535 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
536 ('b', 'a'))
537 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
538 ('c', 'b'))
539 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
540 ('b', None))
541 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
542 ('b', None))
543
544 def test_bug_725149(self):
545 # mark_stack_base restoring before restoring marks
546 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
547 ('a', None))
548 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
549 ('a', None, None))
550
551 def test_bug_764548(self):
552 # bug 764548, re.compile() barfs on str/unicode subclasses
553 try:
554 unicode
555 except NameError:
556 return # no problem if we have no unicode
557 class my_unicode(unicode): pass
558 pat = re.compile(my_unicode("abc"))
559 self.assertEqual(pat.match("xyz"), None)
560
561 def test_finditer(self):
562 iter = re.finditer(r":+", "a:b::c:::d")
563 self.assertEqual([item.group(0) for item in iter],
564 [":", "::", ":::"])
565
566 def test_bug_926075(self):
567 try:
568 unicode
569 except NameError:
570 return # no problem if we have no unicode
571 self.assert_(re.compile('bug_926075') is not
572 re.compile(eval("u'bug_926075'")))
573
574 def test_bug_931848(self):
575 try:
576 unicode
577 except NameError:
578 pass
579 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
580 self.assertEqual(re.compile(pattern).split("a.b.c"),
581 ['a','b','c'])
582
583 def test_bug_581080(self):
584 iter = re.finditer(r"\s", "a b")
585 self.assertEqual(iter.next().span(), (1,2))
586 self.assertRaises(StopIteration, iter.next)
587
588 scanner = re.compile(r"\s").scanner("a b")
589 self.assertEqual(scanner.search().span(), (1, 2))
590 self.assertEqual(scanner.search(), None)
591
592 def test_bug_817234(self):
593 iter = re.finditer(r".*", "asdf")
594 self.assertEqual(iter.next().span(), (0, 4))
595 self.assertEqual(iter.next().span(), (4, 4))
596 self.assertRaises(StopIteration, iter.next)
597
598
599def run_re_tests():
600 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
601 if verbose:
602 print 'Running re_tests test suite'
603 else:
604 # To save time, only run the first and last 10 tests
605 #tests = tests[:10] + tests[-10:]
606 pass
607
608 for t in tests:
609 sys.stdout.flush()
610 pattern = s = outcome = repl = expected = None
611 if len(t) == 5:
612 pattern, s, outcome, repl, expected = t
613 elif len(t) == 3:
614 pattern, s, outcome = t
615 else:
616 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
617
618 try:
619 obj = re.compile(pattern)
620 except re.error:
621 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
622 else:
623 print '=== Syntax error:', t
624 except KeyboardInterrupt: raise KeyboardInterrupt
625 except:
626 print '*** Unexpected error ***', t
627 if verbose:
628 traceback.print_exc(file=sys.stdout)
629 else:
630 try:
631 result = obj.search(s)
632 except re.error, msg:
633 print '=== Unexpected exception', t, repr(msg)
634 if outcome == SYNTAX_ERROR:
635 # This should have been a syntax error; forget it.
636 pass
637 elif outcome == FAIL:
638 if result is None: pass # No match, as expected
639 else: print '=== Succeeded incorrectly', t
640 elif outcome == SUCCEED:
641 if result is not None:
642 # Matched, as expected, so now we compute the
643 # result string and compare it to our expected result.
644 start, end = result.span(0)
645 vardict={'found': result.group(0),
646 'groups': result.group(),
647 'flags': result.re.flags}
648 for i in range(1, 100):
649 try:
650 gi = result.group(i)
651 # Special hack because else the string concat fails:
652 if gi is None:
653 gi = "None"
654 except IndexError:
655 gi = "Error"
656 vardict['g%d' % i] = gi
657 for i in result.re.groupindex.keys():
658 try:
659 gi = result.group(i)
660 if gi is None:
661 gi = "None"
662 except IndexError:
663 gi = "Error"
664 vardict[i] = gi
665 repl = eval(repl, vardict)
666 if repl != expected:
667 print '=== grouping error', t,
668 print repr(repl) + ' should be ' + repr(expected)
669 else:
670 print '=== Failed incorrectly', t
671
672 # Try the match on a unicode string, and check that it
673 # still succeeds.
674 try:
675 result = obj.search(unicode(s, "latin-1"))
676 if result is None:
677 print '=== Fails on unicode match', t
678 except NameError:
679 continue # 1.5.2
680 except TypeError:
681 continue # unicode test case
682
683 # Try the match on a unicode pattern, and check that it
684 # still succeeds.
685 obj=re.compile(unicode(pattern, "latin-1"))
686 result = obj.search(s)
687 if result is None:
688 print '=== Fails on unicode pattern match', t
689
690 # Try the match with the search area limited to the extent
691 # of the match and see if it still succeeds. \B will
692 # break (because it won't match at the end or start of a
693 # string), so we'll ignore patterns that feature it.
694
695 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
696 and result is not None:
697 obj = re.compile(pattern)
698 result = obj.search(s, result.start(0), result.end(0) + 1)
699 if result is None:
700 print '=== Failed on range-limited match', t
701
702 # Try the match with IGNORECASE enabled, and check that it
703 # still succeeds.
704 obj = re.compile(pattern, re.IGNORECASE)
705 result = obj.search(s)
706 if result is None:
707 print '=== Fails on case-insensitive match', t
708
709 # Try the match with LOCALE enabled, and check that it
710 # still succeeds.
711 obj = re.compile(pattern, re.LOCALE)
712 result = obj.search(s)
713 if result is None:
714 print '=== Fails on locale-sensitive match', t
715
716 # Try the match with UNICODE locale enabled, and check
717 # that it still succeeds.
718 obj = re.compile(pattern, re.UNICODE)
719 result = obj.search(s)
720 if result is None:
721 print '=== Fails on unicode-sensitive match', t
722
723def test_main():
724 run_unittest(ReTests)
725 run_re_tests()
726
727if __name__ == "__main__":
728 test_main()