Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / devtools / v9 / lib / python2.4 / gzip.py
CommitLineData
920dae64
AT
1"""Functions that read and write gzipped files.
2
3The user of the file doesn't have to worry about the compression,
4but random access is not allowed."""
5
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7
8import struct, sys, time
9import zlib
10import __builtin__
11
12__all__ = ["GzipFile","open"]
13
14FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
15
16READ, WRITE = 1, 2
17
18def U32(i):
19 """Return i as an unsigned integer, assuming it fits in 32 bits.
20
21 If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
22 """
23 if i < 0:
24 i += 1L << 32
25 return i
26
27def LOWU32(i):
28 """Return the low-order 32 bits of an int, as a non-negative int."""
29 return i & 0xFFFFFFFFL
30
31def write32(output, value):
32 output.write(struct.pack("<l", value))
33
34def write32u(output, value):
35 # The L format writes the bit pattern correctly whether signed
36 # or unsigned.
37 output.write(struct.pack("<L", value))
38
39def read32(input):
40 return struct.unpack("<l", input.read(4))[0]
41
42def open(filename, mode="rb", compresslevel=9):
43 """Shorthand for GzipFile(filename, mode, compresslevel).
44
45 The filename argument is required; mode defaults to 'rb'
46 and compresslevel defaults to 9.
47
48 """
49 return GzipFile(filename, mode, compresslevel)
50
51class GzipFile:
52 """The GzipFile class simulates most of the methods of a file object with
53 the exception of the readinto() and truncate() methods.
54
55 """
56
57 myfileobj = None
58 max_read_chunk = 10 * 1024 * 1024
59
60 def __init__(self, filename=None, mode=None,
61 compresslevel=9, fileobj=None):
62 """Constructor for the GzipFile class.
63
64 At least one of fileobj and filename must be given a
65 non-trivial value.
66
67 The new class instance is based on fileobj, which can be a regular
68 file, a StringIO object, or any other object which simulates a file.
69 It defaults to None, in which case filename is opened to provide
70 a file object.
71
72 When fileobj is not None, the filename argument is only used to be
73 included in the gzip file header, which may includes the original
74 filename of the uncompressed file. It defaults to the filename of
75 fileobj, if discernible; otherwise, it defaults to the empty string,
76 and in this case the original filename is not included in the header.
77
78 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
79 depending on whether the file will be read or written. The default
80 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
81 Be aware that only the 'rb', 'ab', and 'wb' values should be used
82 for cross-platform portability.
83
84 The compresslevel argument is an integer from 1 to 9 controlling the
85 level of compression; 1 is fastest and produces the least compression,
86 and 9 is slowest and produces the most compression. The default is 9.
87
88 """
89
90 # guarantee the file is opened in binary mode on platforms
91 # that care about that sort of thing
92 if mode and 'b' not in mode:
93 mode += 'b'
94 if fileobj is None:
95 fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
96 if filename is None:
97 if hasattr(fileobj, 'name'): filename = fileobj.name
98 else: filename = ''
99 if mode is None:
100 if hasattr(fileobj, 'mode'): mode = fileobj.mode
101 else: mode = 'rb'
102
103 if mode[0:1] == 'r':
104 self.mode = READ
105 # Set flag indicating start of a new member
106 self._new_member = True
107 self.extrabuf = ""
108 self.extrasize = 0
109 self.filename = filename
110
111 elif mode[0:1] == 'w' or mode[0:1] == 'a':
112 self.mode = WRITE
113 self._init_write(filename)
114 self.compress = zlib.compressobj(compresslevel,
115 zlib.DEFLATED,
116 -zlib.MAX_WBITS,
117 zlib.DEF_MEM_LEVEL,
118 0)
119 else:
120 raise IOError, "Mode " + mode + " not supported"
121
122 self.fileobj = fileobj
123 self.offset = 0
124
125 if self.mode == WRITE:
126 self._write_gzip_header()
127
128 def __repr__(self):
129 s = repr(self.fileobj)
130 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
131
132 def _init_write(self, filename):
133 if filename[-3:] != '.gz':
134 filename = filename + '.gz'
135 self.filename = filename
136 self.crc = zlib.crc32("")
137 self.size = 0
138 self.writebuf = []
139 self.bufsize = 0
140
141 def _write_gzip_header(self):
142 self.fileobj.write('\037\213') # magic header
143 self.fileobj.write('\010') # compression method
144 fname = self.filename[:-3]
145 flags = 0
146 if fname:
147 flags = FNAME
148 self.fileobj.write(chr(flags))
149 write32u(self.fileobj, long(time.time()))
150 self.fileobj.write('\002')
151 self.fileobj.write('\377')
152 if fname:
153 self.fileobj.write(fname + '\000')
154
155 def _init_read(self):
156 self.crc = zlib.crc32("")
157 self.size = 0
158
159 def _read_gzip_header(self):
160 magic = self.fileobj.read(2)
161 if magic != '\037\213':
162 raise IOError, 'Not a gzipped file'
163 method = ord( self.fileobj.read(1) )
164 if method != 8:
165 raise IOError, 'Unknown compression method'
166 flag = ord( self.fileobj.read(1) )
167 # modtime = self.fileobj.read(4)
168 # extraflag = self.fileobj.read(1)
169 # os = self.fileobj.read(1)
170 self.fileobj.read(6)
171
172 if flag & FEXTRA:
173 # Read & discard the extra field, if present
174 xlen = ord(self.fileobj.read(1))
175 xlen = xlen + 256*ord(self.fileobj.read(1))
176 self.fileobj.read(xlen)
177 if flag & FNAME:
178 # Read and discard a null-terminated string containing the filename
179 while True:
180 s = self.fileobj.read(1)
181 if not s or s=='\000':
182 break
183 if flag & FCOMMENT:
184 # Read and discard a null-terminated string containing a comment
185 while True:
186 s = self.fileobj.read(1)
187 if not s or s=='\000':
188 break
189 if flag & FHCRC:
190 self.fileobj.read(2) # Read & discard the 16-bit header CRC
191
192
193 def write(self,data):
194 if self.mode != WRITE:
195 import errno
196 raise IOError(errno.EBADF, "write() on read-only GzipFile object")
197
198 if self.fileobj is None:
199 raise ValueError, "write() on closed GzipFile object"
200 if len(data) > 0:
201 self.size = self.size + len(data)
202 self.crc = zlib.crc32(data, self.crc)
203 self.fileobj.write( self.compress.compress(data) )
204 self.offset += len(data)
205
206 def read(self, size=-1):
207 if self.mode != READ:
208 import errno
209 raise IOError(errno.EBADF, "read() on write-only GzipFile object")
210
211 if self.extrasize <= 0 and self.fileobj is None:
212 return ''
213
214 readsize = 1024
215 if size < 0: # get the whole thing
216 try:
217 while True:
218 self._read(readsize)
219 readsize = min(self.max_read_chunk, readsize * 2)
220 except EOFError:
221 size = self.extrasize
222 else: # just get some more of it
223 try:
224 while size > self.extrasize:
225 self._read(readsize)
226 readsize = min(self.max_read_chunk, readsize * 2)
227 except EOFError:
228 if size > self.extrasize:
229 size = self.extrasize
230
231 chunk = self.extrabuf[:size]
232 self.extrabuf = self.extrabuf[size:]
233 self.extrasize = self.extrasize - size
234
235 self.offset += size
236 return chunk
237
238 def _unread(self, buf):
239 self.extrabuf = buf + self.extrabuf
240 self.extrasize = len(buf) + self.extrasize
241 self.offset -= len(buf)
242
243 def _read(self, size=1024):
244 if self.fileobj is None:
245 raise EOFError, "Reached EOF"
246
247 if self._new_member:
248 # If the _new_member flag is set, we have to
249 # jump to the next member, if there is one.
250 #
251 # First, check if we're at the end of the file;
252 # if so, it's time to stop; no more members to read.
253 pos = self.fileobj.tell() # Save current position
254 self.fileobj.seek(0, 2) # Seek to end of file
255 if pos == self.fileobj.tell():
256 raise EOFError, "Reached EOF"
257 else:
258 self.fileobj.seek( pos ) # Return to original position
259
260 self._init_read()
261 self._read_gzip_header()
262 self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
263 self._new_member = False
264
265 # Read a chunk of data from the file
266 buf = self.fileobj.read(size)
267
268 # If the EOF has been reached, flush the decompression object
269 # and mark this object as finished.
270
271 if buf == "":
272 uncompress = self.decompress.flush()
273 self._read_eof()
274 self._add_read_data( uncompress )
275 raise EOFError, 'Reached EOF'
276
277 uncompress = self.decompress.decompress(buf)
278 self._add_read_data( uncompress )
279
280 if self.decompress.unused_data != "":
281 # Ending case: we've come to the end of a member in the file,
282 # so seek back to the start of the unused data, finish up
283 # this member, and read a new gzip header.
284 # (The number of bytes to seek back is the length of the unused
285 # data, minus 8 because _read_eof() will rewind a further 8 bytes)
286 self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
287
288 # Check the CRC and file size, and set the flag so we read
289 # a new member on the next call
290 self._read_eof()
291 self._new_member = True
292
293 def _add_read_data(self, data):
294 self.crc = zlib.crc32(data, self.crc)
295 self.extrabuf = self.extrabuf + data
296 self.extrasize = self.extrasize + len(data)
297 self.size = self.size + len(data)
298
299 def _read_eof(self):
300 # We've read to the end of the file, so we have to rewind in order
301 # to reread the 8 bytes containing the CRC and the file size.
302 # We check the that the computed CRC and size of the
303 # uncompressed data matches the stored values. Note that the size
304 # stored is the true file size mod 2**32.
305 self.fileobj.seek(-8, 1)
306 crc32 = read32(self.fileobj)
307 isize = U32(read32(self.fileobj)) # may exceed 2GB
308 if U32(crc32) != U32(self.crc):
309 raise IOError, "CRC check failed"
310 elif isize != LOWU32(self.size):
311 raise IOError, "Incorrect length of data produced"
312
313 def close(self):
314 if self.mode == WRITE:
315 self.fileobj.write(self.compress.flush())
316 write32(self.fileobj, self.crc)
317 # self.size may exceed 2GB, or even 4GB
318 write32u(self.fileobj, LOWU32(self.size))
319 self.fileobj = None
320 elif self.mode == READ:
321 self.fileobj = None
322 if self.myfileobj:
323 self.myfileobj.close()
324 self.myfileobj = None
325
326 def __del__(self):
327 try:
328 if (self.myfileobj is None and
329 self.fileobj is None):
330 return
331 except AttributeError:
332 return
333 self.close()
334
335 def flush(self):
336 self.fileobj.flush()
337
338 def fileno(self):
339 """Invoke the underlying file object's fileno() method.
340
341 This will raise AttributeError if the underlying file object
342 doesn't support fileno().
343 """
344 return self.fileobj.fileno()
345
346 def isatty(self):
347 return False
348
349 def tell(self):
350 return self.offset
351
352 def rewind(self):
353 '''Return the uncompressed stream file position indicator to the
354 beginning of the file'''
355 if self.mode != READ:
356 raise IOError("Can't rewind in write mode")
357 self.fileobj.seek(0)
358 self._new_member = True
359 self.extrabuf = ""
360 self.extrasize = 0
361 self.offset = 0
362
363 def seek(self, offset):
364 if self.mode == WRITE:
365 if offset < self.offset:
366 raise IOError('Negative seek in write mode')
367 count = offset - self.offset
368 for i in range(count // 1024):
369 self.write(1024 * '\0')
370 self.write((count % 1024) * '\0')
371 elif self.mode == READ:
372 if offset < self.offset:
373 # for negative seek, rewind and do positive seek
374 self.rewind()
375 count = offset - self.offset
376 for i in range(count // 1024):
377 self.read(1024)
378 self.read(count % 1024)
379
380 def readline(self, size=-1):
381 if size < 0: size = sys.maxint
382 bufs = []
383 readsize = min(100, size) # Read from the file in small chunks
384 while True:
385 if size == 0:
386 return "".join(bufs) # Return resulting line
387
388 c = self.read(readsize)
389 i = c.find('\n')
390 if size is not None:
391 # We set i=size to break out of the loop under two
392 # conditions: 1) there's no newline, and the chunk is
393 # larger than size, or 2) there is a newline, but the
394 # resulting line would be longer than 'size'.
395 if i==-1 and len(c) > size: i=size-1
396 elif size <= i: i = size -1
397
398 if i >= 0 or c == '':
399 bufs.append(c[:i+1]) # Add portion of last chunk
400 self._unread(c[i+1:]) # Push back rest of chunk
401 return ''.join(bufs) # Return resulting line
402
403 # Append chunk to list, decrease 'size',
404 bufs.append(c)
405 size = size - len(c)
406 readsize = min(size, readsize * 2)
407
408 def readlines(self, sizehint=0):
409 # Negative numbers result in reading all the lines
410 if sizehint <= 0:
411 sizehint = sys.maxint
412 L = []
413 while sizehint > 0:
414 line = self.readline()
415 if line == "":
416 break
417 L.append(line)
418 sizehint = sizehint - len(line)
419
420 return L
421
422 def writelines(self, L):
423 for line in L:
424 self.write(line)
425
426 def __iter__(self):
427 return self
428
429 def next(self):
430 line = self.readline()
431 if line:
432 return line
433 else:
434 raise StopIteration
435
436
437def _test():
438 # Act like gzip; with -d, act like gunzip.
439 # The input file is not deleted, however, nor are any other gzip
440 # options or features supported.
441 args = sys.argv[1:]
442 decompress = args and args[0] == "-d"
443 if decompress:
444 args = args[1:]
445 if not args:
446 args = ["-"]
447 for arg in args:
448 if decompress:
449 if arg == "-":
450 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
451 g = sys.stdout
452 else:
453 if arg[-3:] != ".gz":
454 print "filename doesn't end in .gz:", repr(arg)
455 continue
456 f = open(arg, "rb")
457 g = __builtin__.open(arg[:-3], "wb")
458 else:
459 if arg == "-":
460 f = sys.stdin
461 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
462 else:
463 f = __builtin__.open(arg, "rb")
464 g = open(arg + ".gz", "wb")
465 while True:
466 chunk = f.read(1024)
467 if not chunk:
468 break
469 g.write(chunk)
470 if g is not sys.stdout:
471 g.close()
472 if f is not sys.stdin:
473 f.close()
474
475if __name__ == '__main__':
476 _test()