Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | # |
2 | # Test suite for the textwrap module. | |
3 | # | |
4 | # Original tests written by Greg Ward <gward@python.net>. | |
5 | # Converted to PyUnit by Peter Hansen <peter@engcorp.com>. | |
6 | # Currently maintained by Greg Ward. | |
7 | # | |
8 | # $Id: test_textwrap.py,v 1.27.4.1 2005/03/05 02:38:33 gward Exp $ | |
9 | # | |
10 | ||
11 | import unittest | |
12 | from test import test_support | |
13 | ||
14 | from textwrap import TextWrapper, wrap, fill, dedent | |
15 | ||
16 | ||
17 | class BaseTestCase(unittest.TestCase): | |
18 | '''Parent class with utility methods for textwrap tests.''' | |
19 | ||
20 | def show(self, textin): | |
21 | if isinstance(textin, list): | |
22 | result = [] | |
23 | for i in range(len(textin)): | |
24 | result.append(" %d: %r" % (i, textin[i])) | |
25 | result = '\n'.join(result) | |
26 | elif isinstance(textin, basestring): | |
27 | result = " %s\n" % repr(textin) | |
28 | return result | |
29 | ||
30 | ||
31 | def check(self, result, expect): | |
32 | self.assertEquals(result, expect, | |
33 | 'expected:\n%s\nbut got:\n%s' % ( | |
34 | self.show(expect), self.show(result))) | |
35 | ||
36 | def check_wrap(self, text, width, expect, **kwargs): | |
37 | result = wrap(text, width, **kwargs) | |
38 | self.check(result, expect) | |
39 | ||
40 | def check_split(self, text, expect): | |
41 | result = self.wrapper._split(text) | |
42 | self.assertEquals(result, expect, | |
43 | "\nexpected %r\n" | |
44 | "but got %r" % (expect, result)) | |
45 | ||
46 | ||
47 | class WrapTestCase(BaseTestCase): | |
48 | ||
49 | def setUp(self): | |
50 | self.wrapper = TextWrapper(width=45) | |
51 | ||
52 | def test_simple(self): | |
53 | # Simple case: just words, spaces, and a bit of punctuation | |
54 | ||
55 | text = "Hello there, how are you this fine day? I'm glad to hear it!" | |
56 | ||
57 | self.check_wrap(text, 12, | |
58 | ["Hello there,", | |
59 | "how are you", | |
60 | "this fine", | |
61 | "day? I'm", | |
62 | "glad to hear", | |
63 | "it!"]) | |
64 | self.check_wrap(text, 42, | |
65 | ["Hello there, how are you this fine day?", | |
66 | "I'm glad to hear it!"]) | |
67 | self.check_wrap(text, 80, [text]) | |
68 | ||
69 | ||
70 | def test_whitespace(self): | |
71 | # Whitespace munging and end-of-sentence detection | |
72 | ||
73 | text = """\ | |
74 | This is a paragraph that already has | |
75 | line breaks. But some of its lines are much longer than the others, | |
76 | so it needs to be wrapped. | |
77 | Some lines are \ttabbed too. | |
78 | What a mess! | |
79 | """ | |
80 | ||
81 | expect = ["This is a paragraph that already has line", | |
82 | "breaks. But some of its lines are much", | |
83 | "longer than the others, so it needs to be", | |
84 | "wrapped. Some lines are tabbed too. What a", | |
85 | "mess!"] | |
86 | ||
87 | wrapper = TextWrapper(45, fix_sentence_endings=True) | |
88 | result = wrapper.wrap(text) | |
89 | self.check(result, expect) | |
90 | ||
91 | result = wrapper.fill(text) | |
92 | self.check(result, '\n'.join(expect)) | |
93 | ||
94 | def test_fix_sentence_endings(self): | |
95 | wrapper = TextWrapper(60, fix_sentence_endings=True) | |
96 | ||
97 | # SF #847346: ensure that fix_sentence_endings=True does the | |
98 | # right thing even on input short enough that it doesn't need to | |
99 | # be wrapped. | |
100 | text = "A short line. Note the single space." | |
101 | expect = ["A short line. Note the single space."] | |
102 | self.check(wrapper.wrap(text), expect) | |
103 | ||
104 | # Test some of the hairy end cases that _fix_sentence_endings() | |
105 | # is supposed to handle (the easy stuff is tested in | |
106 | # test_whitespace() above). | |
107 | text = "Well, Doctor? What do you think?" | |
108 | expect = ["Well, Doctor? What do you think?"] | |
109 | self.check(wrapper.wrap(text), expect) | |
110 | ||
111 | text = "Well, Doctor?\nWhat do you think?" | |
112 | self.check(wrapper.wrap(text), expect) | |
113 | ||
114 | text = 'I say, chaps! Anyone for "tennis?"\nHmmph!' | |
115 | expect = ['I say, chaps! Anyone for "tennis?" Hmmph!'] | |
116 | self.check(wrapper.wrap(text), expect) | |
117 | ||
118 | wrapper.width = 20 | |
119 | expect = ['I say, chaps!', 'Anyone for "tennis?"', 'Hmmph!'] | |
120 | self.check(wrapper.wrap(text), expect) | |
121 | ||
122 | text = 'And she said, "Go to hell!"\nCan you believe that?' | |
123 | expect = ['And she said, "Go to', | |
124 | 'hell!" Can you', | |
125 | 'believe that?'] | |
126 | self.check(wrapper.wrap(text), expect) | |
127 | ||
128 | wrapper.width = 60 | |
129 | expect = ['And she said, "Go to hell!" Can you believe that?'] | |
130 | self.check(wrapper.wrap(text), expect) | |
131 | ||
132 | def test_wrap_short(self): | |
133 | # Wrapping to make short lines longer | |
134 | ||
135 | text = "This is a\nshort paragraph." | |
136 | ||
137 | self.check_wrap(text, 20, ["This is a short", | |
138 | "paragraph."]) | |
139 | self.check_wrap(text, 40, ["This is a short paragraph."]) | |
140 | ||
141 | ||
142 | def test_wrap_short_1line(self): | |
143 | # Test endcases | |
144 | ||
145 | text = "This is a short line." | |
146 | ||
147 | self.check_wrap(text, 30, ["This is a short line."]) | |
148 | self.check_wrap(text, 30, ["(1) This is a short line."], | |
149 | initial_indent="(1) ") | |
150 | ||
151 | ||
152 | def test_hyphenated(self): | |
153 | # Test breaking hyphenated words | |
154 | ||
155 | text = ("this-is-a-useful-feature-for-" | |
156 | "reformatting-posts-from-tim-peters'ly") | |
157 | ||
158 | self.check_wrap(text, 40, | |
159 | ["this-is-a-useful-feature-for-", | |
160 | "reformatting-posts-from-tim-peters'ly"]) | |
161 | self.check_wrap(text, 41, | |
162 | ["this-is-a-useful-feature-for-", | |
163 | "reformatting-posts-from-tim-peters'ly"]) | |
164 | self.check_wrap(text, 42, | |
165 | ["this-is-a-useful-feature-for-reformatting-", | |
166 | "posts-from-tim-peters'ly"]) | |
167 | ||
168 | def test_hyphenated_numbers(self): | |
169 | # Test that hyphenated numbers (eg. dates) are not broken like words. | |
170 | text = ("Python 1.0.0 was released on 1994-01-26. Python 1.0.1 was\n" | |
171 | "released on 1994-02-15.") | |
172 | ||
173 | self.check_wrap(text, 30, ['Python 1.0.0 was released on', | |
174 | '1994-01-26. Python 1.0.1 was', | |
175 | 'released on 1994-02-15.']) | |
176 | self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.', | |
177 | 'Python 1.0.1 was released on 1994-02-15.']) | |
178 | ||
179 | text = "I do all my shopping at 7-11." | |
180 | self.check_wrap(text, 25, ["I do all my shopping at", | |
181 | "7-11."]) | |
182 | self.check_wrap(text, 27, ["I do all my shopping at", | |
183 | "7-11."]) | |
184 | self.check_wrap(text, 29, ["I do all my shopping at 7-11."]) | |
185 | ||
186 | def test_em_dash(self): | |
187 | # Test text with em-dashes | |
188 | text = "Em-dashes should be written -- thus." | |
189 | self.check_wrap(text, 25, | |
190 | ["Em-dashes should be", | |
191 | "written -- thus."]) | |
192 | ||
193 | # Probe the boundaries of the properly written em-dash, | |
194 | # ie. " -- ". | |
195 | self.check_wrap(text, 29, | |
196 | ["Em-dashes should be written", | |
197 | "-- thus."]) | |
198 | expect = ["Em-dashes should be written --", | |
199 | "thus."] | |
200 | self.check_wrap(text, 30, expect) | |
201 | self.check_wrap(text, 35, expect) | |
202 | self.check_wrap(text, 36, | |
203 | ["Em-dashes should be written -- thus."]) | |
204 | ||
205 | # The improperly written em-dash is handled too, because | |
206 | # it's adjacent to non-whitespace on both sides. | |
207 | text = "You can also do--this or even---this." | |
208 | expect = ["You can also do", | |
209 | "--this or even", | |
210 | "---this."] | |
211 | self.check_wrap(text, 15, expect) | |
212 | self.check_wrap(text, 16, expect) | |
213 | expect = ["You can also do--", | |
214 | "this or even---", | |
215 | "this."] | |
216 | self.check_wrap(text, 17, expect) | |
217 | self.check_wrap(text, 19, expect) | |
218 | expect = ["You can also do--this or even", | |
219 | "---this."] | |
220 | self.check_wrap(text, 29, expect) | |
221 | self.check_wrap(text, 31, expect) | |
222 | expect = ["You can also do--this or even---", | |
223 | "this."] | |
224 | self.check_wrap(text, 32, expect) | |
225 | self.check_wrap(text, 35, expect) | |
226 | ||
227 | # All of the above behaviour could be deduced by probing the | |
228 | # _split() method. | |
229 | text = "Here's an -- em-dash and--here's another---and another!" | |
230 | expect = ["Here's", " ", "an", " ", "--", " ", "em-", "dash", " ", | |
231 | "and", "--", "here's", " ", "another", "---", | |
232 | "and", " ", "another!"] | |
233 | self.check_split(text, expect) | |
234 | ||
235 | text = "and then--bam!--he was gone" | |
236 | expect = ["and", " ", "then", "--", "bam!", "--", | |
237 | "he", " ", "was", " ", "gone"] | |
238 | self.check_split(text, expect) | |
239 | ||
240 | ||
241 | def test_unix_options (self): | |
242 | # Test that Unix-style command-line options are wrapped correctly. | |
243 | # Both Optik (OptionParser) and Docutils rely on this behaviour! | |
244 | ||
245 | text = "You should use the -n option, or --dry-run in its long form." | |
246 | self.check_wrap(text, 20, | |
247 | ["You should use the", | |
248 | "-n option, or --dry-", | |
249 | "run in its long", | |
250 | "form."]) | |
251 | self.check_wrap(text, 21, | |
252 | ["You should use the -n", | |
253 | "option, or --dry-run", | |
254 | "in its long form."]) | |
255 | expect = ["You should use the -n option, or", | |
256 | "--dry-run in its long form."] | |
257 | self.check_wrap(text, 32, expect) | |
258 | self.check_wrap(text, 34, expect) | |
259 | self.check_wrap(text, 35, expect) | |
260 | self.check_wrap(text, 38, expect) | |
261 | expect = ["You should use the -n option, or --dry-", | |
262 | "run in its long form."] | |
263 | self.check_wrap(text, 39, expect) | |
264 | self.check_wrap(text, 41, expect) | |
265 | expect = ["You should use the -n option, or --dry-run", | |
266 | "in its long form."] | |
267 | self.check_wrap(text, 42, expect) | |
268 | ||
269 | # Again, all of the above can be deduced from _split(). | |
270 | text = "the -n option, or --dry-run or --dryrun" | |
271 | expect = ["the", " ", "-n", " ", "option,", " ", "or", " ", | |
272 | "--dry-", "run", " ", "or", " ", "--dryrun"] | |
273 | self.check_split(text, expect) | |
274 | ||
275 | def test_funky_hyphens (self): | |
276 | # Screwy edge cases cooked up by David Goodger. All reported | |
277 | # in SF bug #596434. | |
278 | self.check_split("what the--hey!", ["what", " ", "the", "--", "hey!"]) | |
279 | self.check_split("what the--", ["what", " ", "the--"]) | |
280 | self.check_split("what the--.", ["what", " ", "the--."]) | |
281 | self.check_split("--text--.", ["--text--."]) | |
282 | ||
283 | # When I first read bug #596434, this is what I thought David | |
284 | # was talking about. I was wrong; these have always worked | |
285 | # fine. The real problem is tested in test_funky_parens() | |
286 | # below... | |
287 | self.check_split("--option", ["--option"]) | |
288 | self.check_split("--option-opt", ["--option-", "opt"]) | |
289 | self.check_split("foo --option-opt bar", | |
290 | ["foo", " ", "--option-", "opt", " ", "bar"]) | |
291 | ||
292 | def test_punct_hyphens(self): | |
293 | # Oh bother, SF #965425 found another problem with hyphens -- | |
294 | # hyphenated words in single quotes weren't handled correctly. | |
295 | # In fact, the bug is that *any* punctuation around a hyphenated | |
296 | # word was handled incorrectly, except for a leading "--", which | |
297 | # was special-cased for Optik and Docutils. So test a variety | |
298 | # of styles of punctuation around a hyphenated word. | |
299 | # (Actually this is based on an Optik bug report, #813077). | |
300 | self.check_split("the 'wibble-wobble' widget", | |
301 | ['the', ' ', "'wibble-", "wobble'", ' ', 'widget']) | |
302 | self.check_split('the "wibble-wobble" widget', | |
303 | ['the', ' ', '"wibble-', 'wobble"', ' ', 'widget']) | |
304 | self.check_split("the (wibble-wobble) widget", | |
305 | ['the', ' ', "(wibble-", "wobble)", ' ', 'widget']) | |
306 | self.check_split("the ['wibble-wobble'] widget", | |
307 | ['the', ' ', "['wibble-", "wobble']", ' ', 'widget']) | |
308 | ||
309 | def test_funky_parens (self): | |
310 | # Second part of SF bug #596434: long option strings inside | |
311 | # parentheses. | |
312 | self.check_split("foo (--option) bar", | |
313 | ["foo", " ", "(--option)", " ", "bar"]) | |
314 | ||
315 | # Related stuff -- make sure parens work in simpler contexts. | |
316 | self.check_split("foo (bar) baz", | |
317 | ["foo", " ", "(bar)", " ", "baz"]) | |
318 | self.check_split("blah (ding dong), wubba", | |
319 | ["blah", " ", "(ding", " ", "dong),", | |
320 | " ", "wubba"]) | |
321 | ||
322 | def test_initial_whitespace(self): | |
323 | # SF bug #622849 reported inconsistent handling of leading | |
324 | # whitespace; let's test that a bit, shall we? | |
325 | text = " This is a sentence with leading whitespace." | |
326 | self.check_wrap(text, 50, | |
327 | [" This is a sentence with leading whitespace."]) | |
328 | self.check_wrap(text, 30, | |
329 | [" This is a sentence with", "leading whitespace."]) | |
330 | ||
331 | def test_unicode(self): | |
332 | # *Very* simple test of wrapping Unicode strings. I'm sure | |
333 | # there's more to it than this, but let's at least make | |
334 | # sure textwrap doesn't crash on Unicode input! | |
335 | text = u"Hello there, how are you today?" | |
336 | self.check_wrap(text, 50, [u"Hello there, how are you today?"]) | |
337 | self.check_wrap(text, 20, [u"Hello there, how are", "you today?"]) | |
338 | olines = self.wrapper.wrap(text) | |
339 | assert isinstance(olines, list) and isinstance(olines[0], unicode) | |
340 | otext = self.wrapper.fill(text) | |
341 | assert isinstance(otext, unicode) | |
342 | ||
343 | def test_split(self): | |
344 | # Ensure that the standard _split() method works as advertised | |
345 | # in the comments | |
346 | ||
347 | text = "Hello there -- you goof-ball, use the -b option!" | |
348 | ||
349 | result = self.wrapper._split(text) | |
350 | self.check(result, | |
351 | ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-", | |
352 | "ball,", " ", "use", " ", "the", " ", "-b", " ", "option!"]) | |
353 | ||
354 | def test_bad_width(self): | |
355 | # Ensure that width <= 0 is caught. | |
356 | text = "Whatever, it doesn't matter." | |
357 | self.assertRaises(ValueError, wrap, text, 0) | |
358 | self.assertRaises(ValueError, wrap, text, -1) | |
359 | ||
360 | ||
361 | class LongWordTestCase (BaseTestCase): | |
362 | def setUp(self): | |
363 | self.wrapper = TextWrapper() | |
364 | self.text = '''\ | |
365 | Did you say "supercalifragilisticexpialidocious?" | |
366 | How *do* you spell that odd word, anyways? | |
367 | ''' | |
368 | ||
369 | def test_break_long(self): | |
370 | # Wrap text with long words and lots of punctuation | |
371 | ||
372 | self.check_wrap(self.text, 30, | |
373 | ['Did you say "supercalifragilis', | |
374 | 'ticexpialidocious?" How *do*', | |
375 | 'you spell that odd word,', | |
376 | 'anyways?']) | |
377 | self.check_wrap(self.text, 50, | |
378 | ['Did you say "supercalifragilisticexpialidocious?"', | |
379 | 'How *do* you spell that odd word, anyways?']) | |
380 | ||
381 | # SF bug 797650. Prevent an infinite loop by making sure that at | |
382 | # least one character gets split off on every pass. | |
383 | self.check_wrap('-'*10+'hello', 10, | |
384 | ['----------', | |
385 | ' h', | |
386 | ' e', | |
387 | ' l', | |
388 | ' l', | |
389 | ' o'], | |
390 | subsequent_indent = ' '*15) | |
391 | ||
392 | def test_nobreak_long(self): | |
393 | # Test with break_long_words disabled | |
394 | self.wrapper.break_long_words = 0 | |
395 | self.wrapper.width = 30 | |
396 | expect = ['Did you say', | |
397 | '"supercalifragilisticexpialidocious?"', | |
398 | 'How *do* you spell that odd', | |
399 | 'word, anyways?' | |
400 | ] | |
401 | result = self.wrapper.wrap(self.text) | |
402 | self.check(result, expect) | |
403 | ||
404 | # Same thing with kwargs passed to standalone wrap() function. | |
405 | result = wrap(self.text, width=30, break_long_words=0) | |
406 | self.check(result, expect) | |
407 | ||
408 | ||
409 | class IndentTestCases(BaseTestCase): | |
410 | ||
411 | # called before each test method | |
412 | def setUp(self): | |
413 | self.text = '''\ | |
414 | This paragraph will be filled, first without any indentation, | |
415 | and then with some (including a hanging indent).''' | |
416 | ||
417 | ||
418 | def test_fill(self): | |
419 | # Test the fill() method | |
420 | ||
421 | expect = '''\ | |
422 | This paragraph will be filled, first | |
423 | without any indentation, and then with | |
424 | some (including a hanging indent).''' | |
425 | ||
426 | result = fill(self.text, 40) | |
427 | self.check(result, expect) | |
428 | ||
429 | ||
430 | def test_initial_indent(self): | |
431 | # Test initial_indent parameter | |
432 | ||
433 | expect = [" This paragraph will be filled,", | |
434 | "first without any indentation, and then", | |
435 | "with some (including a hanging indent)."] | |
436 | result = wrap(self.text, 40, initial_indent=" ") | |
437 | self.check(result, expect) | |
438 | ||
439 | expect = "\n".join(expect) | |
440 | result = fill(self.text, 40, initial_indent=" ") | |
441 | self.check(result, expect) | |
442 | ||
443 | ||
444 | def test_subsequent_indent(self): | |
445 | # Test subsequent_indent parameter | |
446 | ||
447 | expect = '''\ | |
448 | * This paragraph will be filled, first | |
449 | without any indentation, and then | |
450 | with some (including a hanging | |
451 | indent).''' | |
452 | ||
453 | result = fill(self.text, 40, | |
454 | initial_indent=" * ", subsequent_indent=" ") | |
455 | self.check(result, expect) | |
456 | ||
457 | ||
458 | # Despite the similar names, DedentTestCase is *not* the inverse | |
459 | # of IndentTestCase! | |
460 | class DedentTestCase(unittest.TestCase): | |
461 | ||
462 | def test_dedent_nomargin(self): | |
463 | # No lines indented. | |
464 | text = "Hello there.\nHow are you?\nOh good, I'm glad." | |
465 | self.assertEquals(dedent(text), text) | |
466 | ||
467 | # Similar, with a blank line. | |
468 | text = "Hello there.\n\nBoo!" | |
469 | self.assertEquals(dedent(text), text) | |
470 | ||
471 | # Some lines indented, but overall margin is still zero. | |
472 | text = "Hello there.\n This is indented." | |
473 | self.assertEquals(dedent(text), text) | |
474 | ||
475 | # Again, add a blank line. | |
476 | text = "Hello there.\n\n Boo!\n" | |
477 | self.assertEquals(dedent(text), text) | |
478 | ||
479 | def test_dedent_even(self): | |
480 | # All lines indented by two spaces. | |
481 | text = " Hello there.\n How are ya?\n Oh good." | |
482 | expect = "Hello there.\nHow are ya?\nOh good." | |
483 | self.assertEquals(dedent(text), expect) | |
484 | ||
485 | # Same, with blank lines. | |
486 | text = " Hello there.\n\n How are ya?\n Oh good.\n" | |
487 | expect = "Hello there.\n\nHow are ya?\nOh good.\n" | |
488 | self.assertEquals(dedent(text), expect) | |
489 | ||
490 | # Now indent one of the blank lines. | |
491 | text = " Hello there.\n \n How are ya?\n Oh good.\n" | |
492 | expect = "Hello there.\n\nHow are ya?\nOh good.\n" | |
493 | self.assertEquals(dedent(text), expect) | |
494 | ||
495 | def test_dedent_uneven(self): | |
496 | # Lines indented unevenly. | |
497 | text = '''\ | |
498 | def foo(): | |
499 | while 1: | |
500 | return foo | |
501 | ''' | |
502 | expect = '''\ | |
503 | def foo(): | |
504 | while 1: | |
505 | return foo | |
506 | ''' | |
507 | self.assertEquals(dedent(text), expect) | |
508 | ||
509 | # Uneven indentation with a blank line. | |
510 | text = " Foo\n Bar\n\n Baz\n" | |
511 | expect = "Foo\n Bar\n\n Baz\n" | |
512 | self.assertEquals(dedent(text), expect) | |
513 | ||
514 | # Uneven indentation with a whitespace-only line. | |
515 | text = " Foo\n Bar\n \n Baz\n" | |
516 | expect = "Foo\n Bar\n\n Baz\n" | |
517 | self.assertEquals(dedent(text), expect) | |
518 | ||
519 | ||
520 | ||
521 | def test_main(): | |
522 | test_support.run_unittest(WrapTestCase, | |
523 | LongWordTestCase, | |
524 | IndentTestCases, | |
525 | DedentTestCase) | |
526 | ||
527 | if __name__ == '__main__': | |
528 | test_main() |