Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | # module 'string' -- A collection of string operations |
2 | ||
3 | # Warning: most of the code you see here isn't normally used nowadays. With | |
4 | # Python 1.6, many of these functions are implemented as methods on the | |
5 | # standard string object. They used to be implemented by a built-in module | |
6 | # called strop, but strop is now obsolete itself. | |
7 | ||
8 | """Common string manipulations. | |
9 | ||
10 | Public module variables: | |
11 | ||
12 | whitespace -- a string containing all characters considered whitespace | |
13 | lowercase -- a string containing all characters considered lowercase letters | |
14 | uppercase -- a string containing all characters considered uppercase letters | |
15 | letters -- a string containing all characters considered letters | |
16 | digits -- a string containing all characters considered decimal digits | |
17 | hexdigits -- a string containing all characters considered hexadecimal digits | |
18 | octdigits -- a string containing all characters considered octal digits | |
19 | ||
20 | """ | |
21 | ||
22 | # Some strings for ctype-style character classification | |
23 | whitespace = ' \t\n\r\v\f' | |
24 | lowercase = 'abcdefghijklmnopqrstuvwxyz' | |
25 | uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
26 | letters = lowercase + uppercase | |
27 | digits = '0123456789' | |
28 | hexdigits = digits + 'abcdef' + 'ABCDEF' | |
29 | octdigits = '01234567' | |
30 | ||
31 | # Case conversion helpers | |
32 | _idmap = '' | |
33 | for i in range(256): _idmap = _idmap + chr(i) | |
34 | del i | |
35 | ||
36 | # Backward compatible names for exceptions | |
37 | index_error = ValueError | |
38 | atoi_error = ValueError | |
39 | atof_error = ValueError | |
40 | atol_error = ValueError | |
41 | ||
42 | # convert UPPER CASE letters to lower case | |
43 | def lower(s): | |
44 | """lower(s) -> string | |
45 | ||
46 | Return a copy of the string s converted to lowercase. | |
47 | ||
48 | """ | |
49 | return s.lower() | |
50 | ||
51 | # Convert lower case letters to UPPER CASE | |
52 | def upper(s): | |
53 | """upper(s) -> string | |
54 | ||
55 | Return a copy of the string s converted to uppercase. | |
56 | ||
57 | """ | |
58 | return s.upper() | |
59 | ||
60 | # Swap lower case letters and UPPER CASE | |
61 | def swapcase(s): | |
62 | """swapcase(s) -> string | |
63 | ||
64 | Return a copy of the string s with upper case characters | |
65 | converted to lowercase and vice versa. | |
66 | ||
67 | """ | |
68 | return s.swapcase() | |
69 | ||
70 | # Strip leading and trailing tabs and spaces | |
71 | def strip(s): | |
72 | """strip(s) -> string | |
73 | ||
74 | Return a copy of the string s with leading and trailing | |
75 | whitespace removed. | |
76 | ||
77 | """ | |
78 | return s.strip() | |
79 | ||
80 | # Strip leading tabs and spaces | |
81 | def lstrip(s): | |
82 | """lstrip(s) -> string | |
83 | ||
84 | Return a copy of the string s with leading whitespace removed. | |
85 | ||
86 | """ | |
87 | return s.lstrip() | |
88 | ||
89 | # Strip trailing tabs and spaces | |
90 | def rstrip(s): | |
91 | """rstrip(s) -> string | |
92 | ||
93 | Return a copy of the string s with trailing whitespace | |
94 | removed. | |
95 | ||
96 | """ | |
97 | return s.rstrip() | |
98 | ||
99 | ||
100 | # Split a string into a list of space/tab-separated words | |
101 | def split(s, sep=None, maxsplit=0): | |
102 | """split(str [,sep [,maxsplit]]) -> list of strings | |
103 | ||
104 | Return a list of the words in the string s, using sep as the | |
105 | delimiter string. If maxsplit is nonzero, splits into at most | |
106 | maxsplit words If sep is not specified, any whitespace string | |
107 | is a separator. Maxsplit defaults to 0. | |
108 | ||
109 | (split and splitfields are synonymous) | |
110 | ||
111 | """ | |
112 | return s.split(sep, maxsplit) | |
113 | splitfields = split | |
114 | ||
115 | # Join fields with optional separator | |
116 | def join(words, sep = ' '): | |
117 | """join(list [,sep]) -> string | |
118 | ||
119 | Return a string composed of the words in list, with | |
120 | intervening occurrences of sep. The default separator is a | |
121 | single space. | |
122 | ||
123 | (joinfields and join are synonymous) | |
124 | ||
125 | """ | |
126 | return sep.join(words) | |
127 | joinfields = join | |
128 | ||
129 | # for a little bit of speed | |
130 | _apply = apply | |
131 | ||
132 | # Find substring, raise exception if not found | |
133 | def index(s, *args): | |
134 | """index(s, sub [,start [,end]]) -> int | |
135 | ||
136 | Like find but raises ValueError when the substring is not found. | |
137 | ||
138 | """ | |
139 | return _apply(s.index, args) | |
140 | ||
141 | # Find last substring, raise exception if not found | |
142 | def rindex(s, *args): | |
143 | """rindex(s, sub [,start [,end]]) -> int | |
144 | ||
145 | Like rfind but raises ValueError when the substring is not found. | |
146 | ||
147 | """ | |
148 | return _apply(s.rindex, args) | |
149 | ||
150 | # Count non-overlapping occurrences of substring | |
151 | def count(s, *args): | |
152 | """count(s, sub[, start[,end]]) -> int | |
153 | ||
154 | Return the number of occurrences of substring sub in string | |
155 | s[start:end]. Optional arguments start and end are | |
156 | interpreted as in slice notation. | |
157 | ||
158 | """ | |
159 | return _apply(s.count, args) | |
160 | ||
161 | # Find substring, return -1 if not found | |
162 | def find(s, *args): | |
163 | """find(s, sub [,start [,end]]) -> in | |
164 | ||
165 | Return the lowest index in s where substring sub is found, | |
166 | such that sub is contained within s[start,end]. Optional | |
167 | arguments start and end are interpreted as in slice notation. | |
168 | ||
169 | Return -1 on failure. | |
170 | ||
171 | """ | |
172 | return _apply(s.find, args) | |
173 | ||
174 | # Find last substring, return -1 if not found | |
175 | def rfind(s, *args): | |
176 | """rfind(s, sub [,start [,end]]) -> int | |
177 | ||
178 | Return the highest index in s where substring sub is found, | |
179 | such that sub is contained within s[start,end]. Optional | |
180 | arguments start and end are interpreted as in slice notation. | |
181 | ||
182 | Return -1 on failure. | |
183 | ||
184 | """ | |
185 | return _apply(s.rfind, args) | |
186 | ||
187 | # for a bit of speed | |
188 | _float = float | |
189 | _int = int | |
190 | _long = long | |
191 | _StringType = type('') | |
192 | ||
193 | # Convert string to float | |
194 | def atof(s): | |
195 | """atof(s) -> float | |
196 | ||
197 | Return the floating point number represented by the string s. | |
198 | ||
199 | """ | |
200 | if type(s) == _StringType: | |
201 | return _float(s) | |
202 | else: | |
203 | raise TypeError('argument 1: expected string, %s found' % | |
204 | type(s).__name__) | |
205 | ||
206 | # Convert string to integer | |
207 | def atoi(*args): | |
208 | """atoi(s [,base]) -> int | |
209 | ||
210 | Return the integer represented by the string s in the given | |
211 | base, which defaults to 10. The string s must consist of one | |
212 | or more digits, possibly preceded by a sign. If base is 0, it | |
213 | is chosen from the leading characters of s, 0 for octal, 0x or | |
214 | 0X for hexadecimal. If base is 16, a preceding 0x or 0X is | |
215 | accepted. | |
216 | ||
217 | """ | |
218 | try: | |
219 | s = args[0] | |
220 | except IndexError: | |
221 | raise TypeError('function requires at least 1 argument: %d given' % | |
222 | len(args)) | |
223 | # Don't catch type error resulting from too many arguments to int(). The | |
224 | # error message isn't compatible but the error type is, and this function | |
225 | # is complicated enough already. | |
226 | if type(s) == _StringType: | |
227 | return _apply(_int, args) | |
228 | else: | |
229 | raise TypeError('argument 1: expected string, %s found' % | |
230 | type(s).__name__) | |
231 | ||
232 | ||
233 | # Convert string to long integer | |
234 | def atol(*args): | |
235 | """atol(s [,base]) -> long | |
236 | ||
237 | Return the long integer represented by the string s in the | |
238 | given base, which defaults to 10. The string s must consist | |
239 | of one or more digits, possibly preceded by a sign. If base | |
240 | is 0, it is chosen from the leading characters of s, 0 for | |
241 | octal, 0x or 0X for hexadecimal. If base is 16, a preceding | |
242 | 0x or 0X is accepted. A trailing L or l is not accepted, | |
243 | unless base is 0. | |
244 | ||
245 | """ | |
246 | try: | |
247 | s = args[0] | |
248 | except IndexError: | |
249 | raise TypeError('function requires at least 1 argument: %d given' % | |
250 | len(args)) | |
251 | # Don't catch type error resulting from too many arguments to long(). The | |
252 | # error message isn't compatible but the error type is, and this function | |
253 | # is complicated enough already. | |
254 | if type(s) == _StringType: | |
255 | return _apply(_long, args) | |
256 | else: | |
257 | raise TypeError('argument 1: expected string, %s found' % | |
258 | type(s).__name__) | |
259 | ||
260 | ||
261 | # Left-justify a string | |
262 | def ljust(s, width): | |
263 | """ljust(s, width) -> string | |
264 | ||
265 | Return a left-justified version of s, in a field of the | |
266 | specified width, padded with spaces as needed. The string is | |
267 | never truncated. | |
268 | ||
269 | """ | |
270 | n = width - len(s) | |
271 | if n <= 0: return s | |
272 | return s + ' '*n | |
273 | ||
274 | # Right-justify a string | |
275 | def rjust(s, width): | |
276 | """rjust(s, width) -> string | |
277 | ||
278 | Return a right-justified version of s, in a field of the | |
279 | specified width, padded with spaces as needed. The string is | |
280 | never truncated. | |
281 | ||
282 | """ | |
283 | n = width - len(s) | |
284 | if n <= 0: return s | |
285 | return ' '*n + s | |
286 | ||
287 | # Center a string | |
288 | def center(s, width): | |
289 | """center(s, width) -> string | |
290 | ||
291 | Return a center version of s, in a field of the specified | |
292 | width. padded with spaces as needed. The string is never | |
293 | truncated. | |
294 | ||
295 | """ | |
296 | n = width - len(s) | |
297 | if n <= 0: return s | |
298 | half = n/2 | |
299 | if n%2 and width%2: | |
300 | # This ensures that center(center(s, i), j) = center(s, j) | |
301 | half = half+1 | |
302 | return ' '*half + s + ' '*(n-half) | |
303 | ||
304 | # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' | |
305 | # Decadent feature: the argument may be a string or a number | |
306 | # (Use of this is deprecated; it should be a string as with ljust c.s.) | |
307 | def zfill(x, width): | |
308 | """zfill(x, width) -> string | |
309 | ||
310 | Pad a numeric string x with zeros on the left, to fill a field | |
311 | of the specified width. The string x is never truncated. | |
312 | ||
313 | """ | |
314 | if type(x) == type(''): s = x | |
315 | else: s = repr(x) | |
316 | n = len(s) | |
317 | if n >= width: return s | |
318 | sign = '' | |
319 | if s[0] in ('-', '+'): | |
320 | sign, s = s[0], s[1:] | |
321 | return sign + '0'*(width-n) + s | |
322 | ||
323 | # Expand tabs in a string. | |
324 | # Doesn't take non-printing chars into account, but does understand \n. | |
325 | def expandtabs(s, tabsize=8): | |
326 | """expandtabs(s [,tabsize]) -> string | |
327 | ||
328 | Return a copy of the string s with all tab characters replaced | |
329 | by the appropriate number of spaces, depending on the current | |
330 | column, and the tabsize (default 8). | |
331 | ||
332 | """ | |
333 | res = line = '' | |
334 | for c in s: | |
335 | if c == '\t': | |
336 | c = ' '*(tabsize - len(line) % tabsize) | |
337 | line = line + c | |
338 | if c == '\n': | |
339 | res = res + line | |
340 | line = '' | |
341 | return res + line | |
342 | ||
343 | # Character translation through look-up table. | |
344 | def translate(s, table, deletions=""): | |
345 | """translate(s,table [,deletechars]) -> string | |
346 | ||
347 | Return a copy of the string s, where all characters occurring | |
348 | in the optional argument deletechars are removed, and the | |
349 | remaining characters have been mapped through the given | |
350 | translation table, which must be a string of length 256. | |
351 | ||
352 | """ | |
353 | return s.translate(table, deletions) | |
354 | ||
355 | # Capitalize a string, e.g. "aBc dEf" -> "Abc def". | |
356 | def capitalize(s): | |
357 | """capitalize(s) -> string | |
358 | ||
359 | Return a copy of the string s with only its first character | |
360 | capitalized. | |
361 | ||
362 | """ | |
363 | return s.capitalize() | |
364 | ||
365 | # Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". | |
366 | # See also regsub.capwords(). | |
367 | def capwords(s, sep=None): | |
368 | """capwords(s, [sep]) -> string | |
369 | ||
370 | Split the argument into words using split, capitalize each | |
371 | word using capitalize, and join the capitalized words using | |
372 | join. Note that this replaces runs of whitespace characters by | |
373 | a single space. | |
374 | ||
375 | """ | |
376 | return join(map(capitalize, s.split(sep)), sep or ' ') | |
377 | ||
378 | # Construct a translation string | |
379 | _idmapL = None | |
380 | def maketrans(fromstr, tostr): | |
381 | """maketrans(frm, to) -> string | |
382 | ||
383 | Return a translation table (a string of 256 bytes long) | |
384 | suitable for use in string.translate. The strings frm and to | |
385 | must be of the same length. | |
386 | ||
387 | """ | |
388 | if len(fromstr) != len(tostr): | |
389 | raise ValueError, "maketrans arguments must have same length" | |
390 | global _idmapL | |
391 | if not _idmapL: | |
392 | _idmapL = map(None, _idmap) | |
393 | L = _idmapL[:] | |
394 | fromstr = map(ord, fromstr) | |
395 | for i in range(len(fromstr)): | |
396 | L[fromstr[i]] = tostr[i] | |
397 | return join(L, "") | |
398 | ||
399 | # Substring replacement (global) | |
400 | def replace(s, old, new, maxsplit=0): | |
401 | """replace (str, old, new[, maxsplit]) -> string | |
402 | ||
403 | Return a copy of string str with all occurrences of substring | |
404 | old replaced by new. If the optional argument maxsplit is | |
405 | given, only the first maxsplit occurrences are replaced. | |
406 | ||
407 | """ | |
408 | return s.replace(old, new, maxsplit) | |
409 | ||
410 | ||
411 | # XXX: transitional | |
412 | # | |
413 | # If string objects do not have methods, then we need to use the old string.py | |
414 | # library, which uses strop for many more things than just the few outlined | |
415 | # below. | |
416 | try: | |
417 | ''.upper | |
418 | except AttributeError: | |
419 | from stringold import * | |
420 | ||
421 | # Try importing optional built-in module "strop" -- if it exists, | |
422 | # it redefines some string operations that are 100-1000 times faster. | |
423 | # It also defines values for whitespace, lowercase and uppercase | |
424 | # that match <ctype.h>'s definitions. | |
425 | ||
426 | try: | |
427 | from strop import maketrans, lowercase, uppercase, whitespace | |
428 | letters = lowercase + uppercase | |
429 | except ImportError: | |
430 | pass # Use the original versions |