Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | """Helper class to quickly write a loop over all standard input files. |
2 | ||
3 | Typical use is: | |
4 | ||
5 | import fileinput | |
6 | for line in fileinput.input(): | |
7 | process(line) | |
8 | ||
9 | This iterates over the lines of all files listed in sys.argv[1:], | |
10 | defaulting to sys.stdin if the list is empty. If a filename is '-' it | |
11 | is also replaced by sys.stdin. To specify an alternative list of | |
12 | filenames, pass it as the argument to input(). A single file name is | |
13 | also allowed. | |
14 | ||
15 | Functions filename(), lineno() return the filename and cumulative line | |
16 | number of the line that has just been read; filelineno() returns its | |
17 | line number in the current file; isfirstline() returns true iff the | |
18 | line just read is the first line of its file; isstdin() returns true | |
19 | iff the line was read from sys.stdin. Function nextfile() closes the | |
20 | current file so that the next iteration will read the first line from | |
21 | the next file (if any); lines not read from the file will not count | |
22 | towards the cumulative line count; the filename is not changed until | |
23 | after the first line of the next file has been read. Function close() | |
24 | closes the sequence. | |
25 | ||
26 | Before any lines have been read, filename() returns None and both line | |
27 | numbers are zero; nextfile() has no effect. After all lines have been | |
28 | read, filename() and the line number functions return the values | |
29 | pertaining to the last line read; nextfile() has no effect. | |
30 | ||
31 | All files are opened in text mode. If an I/O error occurs during | |
32 | opening or reading a file, the IOError exception is raised. | |
33 | ||
34 | If sys.stdin is used more than once, the second and further use will | |
35 | return no lines, except perhaps for interactive use, or if it has been | |
36 | explicitly reset (e.g. using sys.stdin.seek(0)). | |
37 | ||
38 | Empty files are opened and immediately closed; the only time their | |
39 | presence in the list of filenames is noticeable at all is when the | |
40 | last file opened is empty. | |
41 | ||
42 | It is possible that the last line of a file doesn't end in a newline | |
43 | character; otherwise lines are returned including the trailing | |
44 | newline. | |
45 | ||
46 | Class FileInput is the implementation; its methods filename(), | |
47 | lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close() | |
48 | correspond to the functions in the module. In addition it has a | |
49 | readline() method which returns the next input line, and a | |
50 | __getitem__() method which implements the sequence behavior. The | |
51 | sequence must be accessed in strictly sequential order; sequence | |
52 | access and readline() cannot be mixed. | |
53 | ||
54 | Optional in-place filtering: if the keyword argument inplace=1 is | |
55 | passed to input() or to the FileInput constructor, the file is moved | |
56 | to a backup file and standard output is directed to the input file. | |
57 | This makes it possible to write a filter that rewrites its input file | |
58 | in place. If the keyword argument backup=".<some extension>" is also | |
59 | given, it specifies the extension for the backup file, and the backup | |
60 | file remains around; by default, the extension is ".bak" and it is | |
61 | deleted when the output file is closed. In-place filtering is | |
62 | disabled when standard input is read. XXX The current implementation | |
63 | does not work for MS-DOS 8+3 filesystems. | |
64 | ||
65 | Performance: this module is unfortunately one of the slower ways of | |
66 | processing large numbers of input lines. Nevertheless, a significant | |
67 | speed-up has been obtained by using readlines(bufsize) instead of | |
68 | readline(). A new keyword argument, bufsize=N, is present on the | |
69 | input() function and the FileInput() class to override the default | |
70 | buffer size. | |
71 | ||
72 | XXX Possible additions: | |
73 | ||
74 | - optional getopt argument processing | |
75 | - specify open mode ('r' or 'rb') | |
76 | - fileno() | |
77 | - isatty() | |
78 | - read(), read(size), even readlines() | |
79 | ||
80 | """ | |
81 | ||
82 | import sys, os | |
83 | ||
84 | __all__ = ["input","close","nextfile","filename","lineno","filelineno", | |
85 | "isfirstline","isstdin","FileInput"] | |
86 | ||
87 | _state = None | |
88 | ||
89 | DEFAULT_BUFSIZE = 8*1024 | |
90 | ||
91 | def input(files=None, inplace=0, backup="", bufsize=0): | |
92 | """input([files[, inplace[, backup]]]) | |
93 | ||
94 | Create an instance of the FileInput class. The instance will be used | |
95 | as global state for the functions of this module, and is also returned | |
96 | to use during iteration. The parameters to this function will be passed | |
97 | along to the constructor of the FileInput class. | |
98 | """ | |
99 | global _state | |
100 | if _state and _state._file: | |
101 | raise RuntimeError, "input() already active" | |
102 | _state = FileInput(files, inplace, backup, bufsize) | |
103 | return _state | |
104 | ||
105 | def close(): | |
106 | """Close the sequence.""" | |
107 | global _state | |
108 | state = _state | |
109 | _state = None | |
110 | if state: | |
111 | state.close() | |
112 | ||
113 | def nextfile(): | |
114 | """ | |
115 | Close the current file so that the next iteration will read the first | |
116 | line from the next file (if any); lines not read from the file will | |
117 | not count towards the cumulative line count. The filename is not | |
118 | changed until after the first line of the next file has been read. | |
119 | Before the first line has been read, this function has no effect; | |
120 | it cannot be used to skip the first file. After the last line of the | |
121 | last file has been read, this function has no effect. | |
122 | """ | |
123 | if not _state: | |
124 | raise RuntimeError, "no active input()" | |
125 | return _state.nextfile() | |
126 | ||
127 | def filename(): | |
128 | """ | |
129 | Return the name of the file currently being read. | |
130 | Before the first line has been read, returns None. | |
131 | """ | |
132 | if not _state: | |
133 | raise RuntimeError, "no active input()" | |
134 | return _state.filename() | |
135 | ||
136 | def lineno(): | |
137 | """ | |
138 | Return the cumulative line number of the line that has just been read. | |
139 | Before the first line has been read, returns 0. After the last line | |
140 | of the last file has been read, returns the line number of that line. | |
141 | """ | |
142 | if not _state: | |
143 | raise RuntimeError, "no active input()" | |
144 | return _state.lineno() | |
145 | ||
146 | def filelineno(): | |
147 | """ | |
148 | Return the line number in the current file. Before the first line | |
149 | has been read, returns 0. After the last line of the last file has | |
150 | been read, returns the line number of that line within the file. | |
151 | """ | |
152 | if not _state: | |
153 | raise RuntimeError, "no active input()" | |
154 | return _state.filelineno() | |
155 | ||
156 | def isfirstline(): | |
157 | """ | |
158 | Returns true the line just read is the first line of its file, | |
159 | otherwise returns false. | |
160 | """ | |
161 | if not _state: | |
162 | raise RuntimeError, "no active input()" | |
163 | return _state.isfirstline() | |
164 | ||
165 | def isstdin(): | |
166 | """ | |
167 | Returns true if the last line was read from sys.stdin, | |
168 | otherwise returns false. | |
169 | """ | |
170 | if not _state: | |
171 | raise RuntimeError, "no active input()" | |
172 | return _state.isstdin() | |
173 | ||
174 | class FileInput: | |
175 | """class FileInput([files[, inplace[, backup]]]) | |
176 | ||
177 | Class FileInput is the implementation of the module; its methods | |
178 | filename(), lineno(), fileline(), isfirstline(), isstdin(), nextfile() | |
179 | and close() correspond to the functions of the same name in the module. | |
180 | In addition it has a readline() method which returns the next | |
181 | input line, and a __getitem__() method which implements the | |
182 | sequence behavior. The sequence must be accessed in strictly | |
183 | sequential order; random access and readline() cannot be mixed. | |
184 | """ | |
185 | ||
186 | def __init__(self, files=None, inplace=0, backup="", bufsize=0): | |
187 | if type(files) == type(''): | |
188 | files = (files,) | |
189 | else: | |
190 | if files is None: | |
191 | files = sys.argv[1:] | |
192 | if not files: | |
193 | files = ('-',) | |
194 | else: | |
195 | files = tuple(files) | |
196 | self._files = files | |
197 | self._inplace = inplace | |
198 | self._backup = backup | |
199 | self._bufsize = bufsize or DEFAULT_BUFSIZE | |
200 | self._savestdout = None | |
201 | self._output = None | |
202 | self._filename = None | |
203 | self._lineno = 0 | |
204 | self._filelineno = 0 | |
205 | self._file = None | |
206 | self._isstdin = False | |
207 | self._backupfilename = None | |
208 | self._buffer = [] | |
209 | self._bufindex = 0 | |
210 | ||
211 | def __del__(self): | |
212 | self.close() | |
213 | ||
214 | def close(self): | |
215 | self.nextfile() | |
216 | self._files = () | |
217 | ||
218 | def __iter__(self): | |
219 | return self | |
220 | ||
221 | def next(self): | |
222 | try: | |
223 | line = self._buffer[self._bufindex] | |
224 | except IndexError: | |
225 | pass | |
226 | else: | |
227 | self._bufindex += 1 | |
228 | self._lineno += 1 | |
229 | self._filelineno += 1 | |
230 | return line | |
231 | line = self.readline() | |
232 | if not line: | |
233 | raise StopIteration | |
234 | return line | |
235 | ||
236 | def __getitem__(self, i): | |
237 | if i != self._lineno: | |
238 | raise RuntimeError, "accessing lines out of order" | |
239 | try: | |
240 | return self.next() | |
241 | except StopIteration: | |
242 | raise IndexError, "end of input reached" | |
243 | ||
244 | def nextfile(self): | |
245 | savestdout = self._savestdout | |
246 | self._savestdout = 0 | |
247 | if savestdout: | |
248 | sys.stdout = savestdout | |
249 | ||
250 | output = self._output | |
251 | self._output = 0 | |
252 | if output: | |
253 | output.close() | |
254 | ||
255 | file = self._file | |
256 | self._file = 0 | |
257 | if file and not self._isstdin: | |
258 | file.close() | |
259 | ||
260 | backupfilename = self._backupfilename | |
261 | self._backupfilename = 0 | |
262 | if backupfilename and not self._backup: | |
263 | try: os.unlink(backupfilename) | |
264 | except OSError: pass | |
265 | ||
266 | self._isstdin = False | |
267 | self._buffer = [] | |
268 | self._bufindex = 0 | |
269 | ||
270 | def readline(self): | |
271 | try: | |
272 | line = self._buffer[self._bufindex] | |
273 | except IndexError: | |
274 | pass | |
275 | else: | |
276 | self._bufindex += 1 | |
277 | self._lineno += 1 | |
278 | self._filelineno += 1 | |
279 | return line | |
280 | if not self._file: | |
281 | if not self._files: | |
282 | return "" | |
283 | self._filename = self._files[0] | |
284 | self._files = self._files[1:] | |
285 | self._filelineno = 0 | |
286 | self._file = None | |
287 | self._isstdin = False | |
288 | self._backupfilename = 0 | |
289 | if self._filename == '-': | |
290 | self._filename = '<stdin>' | |
291 | self._file = sys.stdin | |
292 | self._isstdin = True | |
293 | else: | |
294 | if self._inplace: | |
295 | self._backupfilename = ( | |
296 | self._filename + (self._backup or os.extsep+"bak")) | |
297 | try: os.unlink(self._backupfilename) | |
298 | except os.error: pass | |
299 | # The next few lines may raise IOError | |
300 | os.rename(self._filename, self._backupfilename) | |
301 | self._file = open(self._backupfilename, "r") | |
302 | try: | |
303 | perm = os.fstat(self._file.fileno()).st_mode | |
304 | except OSError: | |
305 | self._output = open(self._filename, "w") | |
306 | else: | |
307 | fd = os.open(self._filename, | |
308 | os.O_CREAT | os.O_WRONLY | os.O_TRUNC, | |
309 | perm) | |
310 | self._output = os.fdopen(fd, "w") | |
311 | try: | |
312 | if hasattr(os, 'chmod'): | |
313 | os.chmod(self._filename, perm) | |
314 | except OSError: | |
315 | pass | |
316 | self._savestdout = sys.stdout | |
317 | sys.stdout = self._output | |
318 | else: | |
319 | # This may raise IOError | |
320 | self._file = open(self._filename, "r") | |
321 | self._buffer = self._file.readlines(self._bufsize) | |
322 | self._bufindex = 0 | |
323 | if not self._buffer: | |
324 | self.nextfile() | |
325 | # Recursive call | |
326 | return self.readline() | |
327 | ||
328 | def filename(self): | |
329 | return self._filename | |
330 | ||
331 | def lineno(self): | |
332 | return self._lineno | |
333 | ||
334 | def filelineno(self): | |
335 | return self._filelineno | |
336 | ||
337 | def isfirstline(self): | |
338 | return self._filelineno == 1 | |
339 | ||
340 | def isstdin(self): | |
341 | return self._isstdin | |
342 | ||
343 | def _test(): | |
344 | import getopt | |
345 | inplace = 0 | |
346 | backup = 0 | |
347 | opts, args = getopt.getopt(sys.argv[1:], "ib:") | |
348 | for o, a in opts: | |
349 | if o == '-i': inplace = 1 | |
350 | if o == '-b': backup = a | |
351 | for line in input(args, inplace=inplace, backup=backup): | |
352 | if line[-1:] == '\n': line = line[:-1] | |
353 | if line[-1:] == '\r': line = line[:-1] | |
354 | print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(), | |
355 | isfirstline() and "*" or "", line) | |
356 | print "%d: %s[%d]" % (lineno(), filename(), filelineno()) | |
357 | ||
358 | if __name__ == '__main__': | |
359 | _test() |