Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | """A collection of string operations (most are no longer used). |
2 | ||
3 | Warning: most of the code you see here isn't normally used nowadays. | |
4 | Beginning with Python 1.6, many of these functions are implemented as | |
5 | methods on the standard string object. They used to be implemented by | |
6 | a built-in module called strop, but strop is now obsolete itself. | |
7 | ||
8 | Public module variables: | |
9 | ||
10 | whitespace -- a string containing all characters considered whitespace | |
11 | lowercase -- a string containing all characters considered lowercase letters | |
12 | uppercase -- a string containing all characters considered uppercase letters | |
13 | letters -- a string containing all characters considered letters | |
14 | digits -- a string containing all characters considered decimal digits | |
15 | hexdigits -- a string containing all characters considered hexadecimal digits | |
16 | octdigits -- a string containing all characters considered octal digits | |
17 | punctuation -- a string containing all characters considered punctuation | |
18 | printable -- a string containing all characters considered printable | |
19 | ||
20 | """ | |
21 | ||
22 | # Some strings for ctype-style character classification | |
23 | whitespace = ' \t\n\r\v\f' | |
24 | lowercase = 'abcdefghijklmnopqrstuvwxyz' | |
25 | uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
26 | letters = lowercase + uppercase | |
27 | ascii_lowercase = lowercase | |
28 | ascii_uppercase = uppercase | |
29 | ascii_letters = ascii_lowercase + ascii_uppercase | |
30 | digits = '0123456789' | |
31 | hexdigits = digits + 'abcdef' + 'ABCDEF' | |
32 | octdigits = '01234567' | |
33 | punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" | |
34 | printable = digits + letters + punctuation + whitespace | |
35 | ||
36 | # Case conversion helpers | |
37 | # Use str to convert Unicode literal in case of -U | |
38 | # Note that Cookie.py bogusly uses _idmap :( | |
39 | l = map(chr, xrange(256)) | |
40 | _idmap = str('').join(l) | |
41 | del l | |
42 | ||
43 | # Functions which aren't available as string methods. | |
44 | ||
45 | # Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". | |
46 | # See also regsub.capwords(). | |
47 | def capwords(s, sep=None): | |
48 | """capwords(s, [sep]) -> string | |
49 | ||
50 | Split the argument into words using split, capitalize each | |
51 | word using capitalize, and join the capitalized words using | |
52 | join. Note that this replaces runs of whitespace characters by | |
53 | a single space. | |
54 | ||
55 | """ | |
56 | return (sep or ' ').join([x.capitalize() for x in s.split(sep)]) | |
57 | ||
58 | ||
59 | # Construct a translation string | |
60 | _idmapL = None | |
61 | def maketrans(fromstr, tostr): | |
62 | """maketrans(frm, to) -> string | |
63 | ||
64 | Return a translation table (a string of 256 bytes long) | |
65 | suitable for use in string.translate. The strings frm and to | |
66 | must be of the same length. | |
67 | ||
68 | """ | |
69 | if len(fromstr) != len(tostr): | |
70 | raise ValueError, "maketrans arguments must have same length" | |
71 | global _idmapL | |
72 | if not _idmapL: | |
73 | _idmapL = map(None, _idmap) | |
74 | L = _idmapL[:] | |
75 | fromstr = map(ord, fromstr) | |
76 | for i in range(len(fromstr)): | |
77 | L[fromstr[i]] = tostr[i] | |
78 | return ''.join(L) | |
79 | ||
80 | ||
81 | \f | |
82 | #################################################################### | |
83 | import re as _re | |
84 | ||
85 | class _multimap: | |
86 | """Helper class for combining multiple mappings. | |
87 | ||
88 | Used by .{safe_,}substitute() to combine the mapping and keyword | |
89 | arguments. | |
90 | """ | |
91 | def __init__(self, primary, secondary): | |
92 | self._primary = primary | |
93 | self._secondary = secondary | |
94 | ||
95 | def __getitem__(self, key): | |
96 | try: | |
97 | return self._primary[key] | |
98 | except KeyError: | |
99 | return self._secondary[key] | |
100 | ||
101 | ||
102 | class _TemplateMetaclass(type): | |
103 | pattern = r""" | |
104 | %(delim)s(?: | |
105 | (?P<escaped>%(delim)s) | # Escape sequence of two delimiters | |
106 | (?P<named>%(id)s) | # delimiter and a Python identifier | |
107 | {(?P<braced>%(id)s)} | # delimiter and a braced identifier | |
108 | (?P<invalid>) # Other ill-formed delimiter exprs | |
109 | ) | |
110 | """ | |
111 | ||
112 | def __init__(cls, name, bases, dct): | |
113 | super(_TemplateMetaclass, cls).__init__(name, bases, dct) | |
114 | if 'pattern' in dct: | |
115 | pattern = cls.pattern | |
116 | else: | |
117 | pattern = _TemplateMetaclass.pattern % { | |
118 | 'delim' : _re.escape(cls.delimiter), | |
119 | 'id' : cls.idpattern, | |
120 | } | |
121 | cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE) | |
122 | ||
123 | ||
124 | class Template: | |
125 | """A string class for supporting $-substitutions.""" | |
126 | __metaclass__ = _TemplateMetaclass | |
127 | ||
128 | delimiter = '$' | |
129 | idpattern = r'[_a-z][_a-z0-9]*' | |
130 | ||
131 | def __init__(self, template): | |
132 | self.template = template | |
133 | ||
134 | # Search for $$, $identifier, ${identifier}, and any bare $'s | |
135 | ||
136 | def _invalid(self, mo): | |
137 | i = mo.start('invalid') | |
138 | lines = self.template[:i].splitlines(True) | |
139 | if not lines: | |
140 | colno = 1 | |
141 | lineno = 1 | |
142 | else: | |
143 | colno = i - len(''.join(lines[:-1])) | |
144 | lineno = len(lines) | |
145 | raise ValueError('Invalid placeholder in string: line %d, col %d' % | |
146 | (lineno, colno)) | |
147 | ||
148 | def substitute(self, *args, **kws): | |
149 | if len(args) > 1: | |
150 | raise TypeError('Too many positional arguments') | |
151 | if not args: | |
152 | mapping = kws | |
153 | elif kws: | |
154 | mapping = _multimap(kws, args[0]) | |
155 | else: | |
156 | mapping = args[0] | |
157 | # Helper function for .sub() | |
158 | def convert(mo): | |
159 | # Check the most common path first. | |
160 | named = mo.group('named') or mo.group('braced') | |
161 | if named is not None: | |
162 | val = mapping[named] | |
163 | # We use this idiom instead of str() because the latter will | |
164 | # fail if val is a Unicode containing non-ASCII characters. | |
165 | return '%s' % val | |
166 | if mo.group('escaped') is not None: | |
167 | return self.delimiter | |
168 | if mo.group('invalid') is not None: | |
169 | self._invalid(mo) | |
170 | raise ValueError('Unrecognized named group in pattern', | |
171 | self.pattern) | |
172 | return self.pattern.sub(convert, self.template) | |
173 | ||
174 | def safe_substitute(self, *args, **kws): | |
175 | if len(args) > 1: | |
176 | raise TypeError('Too many positional arguments') | |
177 | if not args: | |
178 | mapping = kws | |
179 | elif kws: | |
180 | mapping = _multimap(kws, args[0]) | |
181 | else: | |
182 | mapping = args[0] | |
183 | # Helper function for .sub() | |
184 | def convert(mo): | |
185 | named = mo.group('named') | |
186 | if named is not None: | |
187 | try: | |
188 | # We use this idiom instead of str() because the latter | |
189 | # will fail if val is a Unicode containing non-ASCII | |
190 | return '%s' % mapping[named] | |
191 | except KeyError: | |
192 | return self.delimiter + named | |
193 | braced = mo.group('braced') | |
194 | if braced is not None: | |
195 | try: | |
196 | return '%s' % mapping[braced] | |
197 | except KeyError: | |
198 | return self.delimiter + '{' + braced + '}' | |
199 | if mo.group('escaped') is not None: | |
200 | return self.delimiter | |
201 | if mo.group('invalid') is not None: | |
202 | return self.delimiter | |
203 | raise ValueError('Unrecognized named group in pattern', | |
204 | self.pattern) | |
205 | return self.pattern.sub(convert, self.template) | |
206 | ||
207 | ||
208 | \f | |
209 | #################################################################### | |
210 | # NOTE: Everything below here is deprecated. Use string methods instead. | |
211 | # This stuff will go away in Python 3.0. | |
212 | ||
213 | # Backward compatible names for exceptions | |
214 | index_error = ValueError | |
215 | atoi_error = ValueError | |
216 | atof_error = ValueError | |
217 | atol_error = ValueError | |
218 | ||
219 | # convert UPPER CASE letters to lower case | |
220 | def lower(s): | |
221 | """lower(s) -> string | |
222 | ||
223 | Return a copy of the string s converted to lowercase. | |
224 | ||
225 | """ | |
226 | return s.lower() | |
227 | ||
228 | # Convert lower case letters to UPPER CASE | |
229 | def upper(s): | |
230 | """upper(s) -> string | |
231 | ||
232 | Return a copy of the string s converted to uppercase. | |
233 | ||
234 | """ | |
235 | return s.upper() | |
236 | ||
237 | # Swap lower case letters and UPPER CASE | |
238 | def swapcase(s): | |
239 | """swapcase(s) -> string | |
240 | ||
241 | Return a copy of the string s with upper case characters | |
242 | converted to lowercase and vice versa. | |
243 | ||
244 | """ | |
245 | return s.swapcase() | |
246 | ||
247 | # Strip leading and trailing tabs and spaces | |
248 | def strip(s, chars=None): | |
249 | """strip(s [,chars]) -> string | |
250 | ||
251 | Return a copy of the string s with leading and trailing | |
252 | whitespace removed. | |
253 | If chars is given and not None, remove characters in chars instead. | |
254 | If chars is unicode, S will be converted to unicode before stripping. | |
255 | ||
256 | """ | |
257 | return s.strip(chars) | |
258 | ||
259 | # Strip leading tabs and spaces | |
260 | def lstrip(s, chars=None): | |
261 | """lstrip(s [,chars]) -> string | |
262 | ||
263 | Return a copy of the string s with leading whitespace removed. | |
264 | If chars is given and not None, remove characters in chars instead. | |
265 | ||
266 | """ | |
267 | return s.lstrip(chars) | |
268 | ||
269 | # Strip trailing tabs and spaces | |
270 | def rstrip(s, chars=None): | |
271 | """rstrip(s [,chars]) -> string | |
272 | ||
273 | Return a copy of the string s with trailing whitespace removed. | |
274 | If chars is given and not None, remove characters in chars instead. | |
275 | ||
276 | """ | |
277 | return s.rstrip(chars) | |
278 | ||
279 | ||
280 | # Split a string into a list of space/tab-separated words | |
281 | def split(s, sep=None, maxsplit=-1): | |
282 | """split(s [,sep [,maxsplit]]) -> list of strings | |
283 | ||
284 | Return a list of the words in the string s, using sep as the | |
285 | delimiter string. If maxsplit is given, splits at no more than | |
286 | maxsplit places (resulting in at most maxsplit+1 words). If sep | |
287 | is not specified or is None, any whitespace string is a separator. | |
288 | ||
289 | (split and splitfields are synonymous) | |
290 | ||
291 | """ | |
292 | return s.split(sep, maxsplit) | |
293 | splitfields = split | |
294 | ||
295 | # Split a string into a list of space/tab-separated words | |
296 | def rsplit(s, sep=None, maxsplit=-1): | |
297 | """rsplit(s [,sep [,maxsplit]]) -> list of strings | |
298 | ||
299 | Return a list of the words in the string s, using sep as the | |
300 | delimiter string, starting at the end of the string and working | |
301 | to the front. If maxsplit is given, at most maxsplit splits are | |
302 | done. If sep is not specified or is None, any whitespace string | |
303 | is a separator. | |
304 | """ | |
305 | return s.rsplit(sep, maxsplit) | |
306 | ||
307 | # Join fields with optional separator | |
308 | def join(words, sep = ' '): | |
309 | """join(list [,sep]) -> string | |
310 | ||
311 | Return a string composed of the words in list, with | |
312 | intervening occurrences of sep. The default separator is a | |
313 | single space. | |
314 | ||
315 | (joinfields and join are synonymous) | |
316 | ||
317 | """ | |
318 | return sep.join(words) | |
319 | joinfields = join | |
320 | ||
321 | # Find substring, raise exception if not found | |
322 | def index(s, *args): | |
323 | """index(s, sub [,start [,end]]) -> int | |
324 | ||
325 | Like find but raises ValueError when the substring is not found. | |
326 | ||
327 | """ | |
328 | return s.index(*args) | |
329 | ||
330 | # Find last substring, raise exception if not found | |
331 | def rindex(s, *args): | |
332 | """rindex(s, sub [,start [,end]]) -> int | |
333 | ||
334 | Like rfind but raises ValueError when the substring is not found. | |
335 | ||
336 | """ | |
337 | return s.rindex(*args) | |
338 | ||
339 | # Count non-overlapping occurrences of substring | |
340 | def count(s, *args): | |
341 | """count(s, sub[, start[,end]]) -> int | |
342 | ||
343 | Return the number of occurrences of substring sub in string | |
344 | s[start:end]. Optional arguments start and end are | |
345 | interpreted as in slice notation. | |
346 | ||
347 | """ | |
348 | return s.count(*args) | |
349 | ||
350 | # Find substring, return -1 if not found | |
351 | def find(s, *args): | |
352 | """find(s, sub [,start [,end]]) -> in | |
353 | ||
354 | Return the lowest index in s where substring sub is found, | |
355 | such that sub is contained within s[start,end]. Optional | |
356 | arguments start and end are interpreted as in slice notation. | |
357 | ||
358 | Return -1 on failure. | |
359 | ||
360 | """ | |
361 | return s.find(*args) | |
362 | ||
363 | # Find last substring, return -1 if not found | |
364 | def rfind(s, *args): | |
365 | """rfind(s, sub [,start [,end]]) -> int | |
366 | ||
367 | Return the highest index in s where substring sub is found, | |
368 | such that sub is contained within s[start,end]. Optional | |
369 | arguments start and end are interpreted as in slice notation. | |
370 | ||
371 | Return -1 on failure. | |
372 | ||
373 | """ | |
374 | return s.rfind(*args) | |
375 | ||
376 | # for a bit of speed | |
377 | _float = float | |
378 | _int = int | |
379 | _long = long | |
380 | ||
381 | # Convert string to float | |
382 | def atof(s): | |
383 | """atof(s) -> float | |
384 | ||
385 | Return the floating point number represented by the string s. | |
386 | ||
387 | """ | |
388 | return _float(s) | |
389 | ||
390 | ||
391 | # Convert string to integer | |
392 | def atoi(s , base=10): | |
393 | """atoi(s [,base]) -> int | |
394 | ||
395 | Return the integer represented by the string s in the given | |
396 | base, which defaults to 10. The string s must consist of one | |
397 | or more digits, possibly preceded by a sign. If base is 0, it | |
398 | is chosen from the leading characters of s, 0 for octal, 0x or | |
399 | 0X for hexadecimal. If base is 16, a preceding 0x or 0X is | |
400 | accepted. | |
401 | ||
402 | """ | |
403 | return _int(s, base) | |
404 | ||
405 | ||
406 | # Convert string to long integer | |
407 | def atol(s, base=10): | |
408 | """atol(s [,base]) -> long | |
409 | ||
410 | Return the long integer represented by the string s in the | |
411 | given base, which defaults to 10. The string s must consist | |
412 | of one or more digits, possibly preceded by a sign. If base | |
413 | is 0, it is chosen from the leading characters of s, 0 for | |
414 | octal, 0x or 0X for hexadecimal. If base is 16, a preceding | |
415 | 0x or 0X is accepted. A trailing L or l is not accepted, | |
416 | unless base is 0. | |
417 | ||
418 | """ | |
419 | return _long(s, base) | |
420 | ||
421 | ||
422 | # Left-justify a string | |
423 | def ljust(s, width, *args): | |
424 | """ljust(s, width[, fillchar]) -> string | |
425 | ||
426 | Return a left-justified version of s, in a field of the | |
427 | specified width, padded with spaces as needed. The string is | |
428 | never truncated. If specified the fillchar is used instead of spaces. | |
429 | ||
430 | """ | |
431 | return s.ljust(width, *args) | |
432 | ||
433 | # Right-justify a string | |
434 | def rjust(s, width, *args): | |
435 | """rjust(s, width[, fillchar]) -> string | |
436 | ||
437 | Return a right-justified version of s, in a field of the | |
438 | specified width, padded with spaces as needed. The string is | |
439 | never truncated. If specified the fillchar is used instead of spaces. | |
440 | ||
441 | """ | |
442 | return s.rjust(width, *args) | |
443 | ||
444 | # Center a string | |
445 | def center(s, width, *args): | |
446 | """center(s, width[, fillchar]) -> string | |
447 | ||
448 | Return a center version of s, in a field of the specified | |
449 | width. padded with spaces as needed. The string is never | |
450 | truncated. If specified the fillchar is used instead of spaces. | |
451 | ||
452 | """ | |
453 | return s.center(width, *args) | |
454 | ||
455 | # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' | |
456 | # Decadent feature: the argument may be a string or a number | |
457 | # (Use of this is deprecated; it should be a string as with ljust c.s.) | |
458 | def zfill(x, width): | |
459 | """zfill(x, width) -> string | |
460 | ||
461 | Pad a numeric string x with zeros on the left, to fill a field | |
462 | of the specified width. The string x is never truncated. | |
463 | ||
464 | """ | |
465 | if not isinstance(x, basestring): | |
466 | x = repr(x) | |
467 | return x.zfill(width) | |
468 | ||
469 | # Expand tabs in a string. | |
470 | # Doesn't take non-printing chars into account, but does understand \n. | |
471 | def expandtabs(s, tabsize=8): | |
472 | """expandtabs(s [,tabsize]) -> string | |
473 | ||
474 | Return a copy of the string s with all tab characters replaced | |
475 | by the appropriate number of spaces, depending on the current | |
476 | column, and the tabsize (default 8). | |
477 | ||
478 | """ | |
479 | return s.expandtabs(tabsize) | |
480 | ||
481 | # Character translation through look-up table. | |
482 | def translate(s, table, deletions=""): | |
483 | """translate(s,table [,deletions]) -> string | |
484 | ||
485 | Return a copy of the string s, where all characters occurring | |
486 | in the optional argument deletions are removed, and the | |
487 | remaining characters have been mapped through the given | |
488 | translation table, which must be a string of length 256. The | |
489 | deletions argument is not allowed for Unicode strings. | |
490 | ||
491 | """ | |
492 | if deletions: | |
493 | return s.translate(table, deletions) | |
494 | else: | |
495 | # Add s[:0] so that if s is Unicode and table is an 8-bit string, | |
496 | # table is converted to Unicode. This means that table *cannot* | |
497 | # be a dictionary -- for that feature, use u.translate() directly. | |
498 | return s.translate(table + s[:0]) | |
499 | ||
500 | # Capitalize a string, e.g. "aBc dEf" -> "Abc def". | |
501 | def capitalize(s): | |
502 | """capitalize(s) -> string | |
503 | ||
504 | Return a copy of the string s with only its first character | |
505 | capitalized. | |
506 | ||
507 | """ | |
508 | return s.capitalize() | |
509 | ||
510 | # Substring replacement (global) | |
511 | def replace(s, old, new, maxsplit=-1): | |
512 | """replace (str, old, new[, maxsplit]) -> string | |
513 | ||
514 | Return a copy of string str with all occurrences of substring | |
515 | old replaced by new. If the optional argument maxsplit is | |
516 | given, only the first maxsplit occurrences are replaced. | |
517 | ||
518 | """ | |
519 | return s.replace(old, new, maxsplit) | |
520 | ||
521 | ||
522 | # Try importing optional built-in module "strop" -- if it exists, | |
523 | # it redefines some string operations that are 100-1000 times faster. | |
524 | # It also defines values for whitespace, lowercase and uppercase | |
525 | # that match <ctype.h>'s definitions. | |
526 | ||
527 | try: | |
528 | from strop import maketrans, lowercase, uppercase, whitespace | |
529 | letters = lowercase + uppercase | |
530 | except ImportError: | |
531 | pass # Use the original versions |