git.subgeniuskitty.com - OpenSPARC-T2-SAM/.git/blame - sam-t2/devtools/amd64/lib/python2.4/gzip.py

Commit	Line	Data
920dae64 AT	1	"""Functions that read and write gzipped files.
	2
	3	The user of the file doesn't have to worry about the compression,
	4	but random access is not allowed."""
	5
	6	# based on Andrew Kuchling's minigzip.py distributed with the zlib module
	7
	8	import struct, sys, time
	9	import zlib
	10	import __builtin__
	11
	12	__all__ = ["GzipFile","open"]
	13
	14	FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
	15
	16	READ, WRITE = 1, 2
	17
	18	def U32(i):
	19	"""Return i as an unsigned integer, assuming it fits in 32 bits.
	20
	21	If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
	22	"""
	23	if i < 0:
	24	i += 1L << 32
	25	return i
	26
	27	def LOWU32(i):
	28	"""Return the low-order 32 bits of an int, as a non-negative int."""
	29	return i & 0xFFFFFFFFL
	30
	31	def write32(output, value):
	32	output.write(struct.pack("<l", value))
	33
	34	def write32u(output, value):
	35	# The L format writes the bit pattern correctly whether signed
	36	# or unsigned.
	37	output.write(struct.pack("<L", value))
	38
	39	def read32(input):
	40	return struct.unpack("<l", input.read(4))[0]
	41
	42	def open(filename, mode="rb", compresslevel=9):
	43	"""Shorthand for GzipFile(filename, mode, compresslevel).
	44
	45	The filename argument is required; mode defaults to 'rb'
	46	and compresslevel defaults to 9.
	47
	48	"""
	49	return GzipFile(filename, mode, compresslevel)
	50
	51	class GzipFile:
	52	"""The GzipFile class simulates most of the methods of a file object with
	53	the exception of the readinto() and truncate() methods.
	54
	55	"""
	56
	57	myfileobj = None
	58	max_read_chunk = 10 * 1024 * 1024
	59
	60	def __init__(self, filename=None, mode=None,
	61	compresslevel=9, fileobj=None):
	62	"""Constructor for the GzipFile class.
	63
	64	At least one of fileobj and filename must be given a
65	non-trivial value.
66
67	The new class instance is based on fileobj, which can be a regular
68	file, a StringIO object, or any other object which simulates a file.
69	It defaults to None, in which case filename is opened to provide
70	a file object.
71
72	When fileobj is not None, the filename argument is only used to be
73	included in the gzip file header, which may includes the original
74	filename of the uncompressed file. It defaults to the filename of
75	fileobj, if discernible; otherwise, it defaults to the empty string,
76	and in this case the original filename is not included in the header.
77
78	The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
79	depending on whether the file will be read or written. The default
80	is the mode of fileobj if discernible; otherwise, the default is 'rb'.
81	Be aware that only the 'rb', 'ab', and 'wb' values should be used
82	for cross-platform portability.
83
84	The compresslevel argument is an integer from 1 to 9 controlling the
85	level of compression; 1 is fastest and produces the least compression,
86	and 9 is slowest and produces the most compression. The default is 9.
87
88	"""
89
90	# guarantee the file is opened in binary mode on platforms
91	# that care about that sort of thing
92	if mode and 'b' not in mode:
93	mode += 'b'
94	if fileobj is None:
95	fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
96	if filename is None:
97	if hasattr(fileobj, 'name'): filename = fileobj.name
98	else: filename = ''
99	if mode is None:
100	if hasattr(fileobj, 'mode'): mode = fileobj.mode
101	else: mode = 'rb'
102
103	if mode[0:1] == 'r':
104	self.mode = READ
105	# Set flag indicating start of a new member
106	self._new_member = True
107	self.extrabuf = ""
108	self.extrasize = 0
109	self.filename = filename
110
111	elif mode[0:1] == 'w' or mode[0:1] == 'a':
112	self.mode = WRITE
113	self._init_write(filename)
114	self.compress = zlib.compressobj(compresslevel,
115	zlib.DEFLATED,
116	-zlib.MAX_WBITS,
117	zlib.DEF_MEM_LEVEL,
118	0)
119	else:
120	raise IOError, "Mode " + mode + " not supported"
121
122	self.fileobj = fileobj
123	self.offset = 0
124
125	if self.mode == WRITE:
126	self._write_gzip_header()
127
128	def __repr__(self):
129	s = repr(self.fileobj)
130	return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
131
132	def _init_write(self, filename):
133	if filename[-3:] != '.gz':
134	filename = filename + '.gz'
135	self.filename = filename
136	self.crc = zlib.crc32("")
137	self.size = 0
138	self.writebuf = []
139	self.bufsize = 0
140
141	def _write_gzip_header(self):
142	self.fileobj.write('\037\213') # magic header
143	self.fileobj.write('\010') # compression method
144	fname = self.filename[:-3]
145	flags = 0
146	if fname:
147	flags = FNAME
148	self.fileobj.write(chr(flags))
149	write32u(self.fileobj, long(time.time()))
150	self.fileobj.write('\002')
151	self.fileobj.write('\377')
152	if fname:
153	self.fileobj.write(fname + '\000')
154
155	def _init_read(self):
156	self.crc = zlib.crc32("")
157	self.size = 0
158
159	def _read_gzip_header(self):
160	magic = self.fileobj.read(2)
161	if magic != '\037\213':
162	raise IOError, 'Not a gzipped file'
163	method = ord( self.fileobj.read(1) )
164	if method != 8:
165	raise IOError, 'Unknown compression method'
166	flag = ord( self.fileobj.read(1) )
167	# modtime = self.fileobj.read(4)
168	# extraflag = self.fileobj.read(1)
169	# os = self.fileobj.read(1)
170	self.fileobj.read(6)
171
172	if flag & FEXTRA:
173	# Read & discard the extra field, if present
174	xlen = ord(self.fileobj.read(1))
175	xlen = xlen + 256*ord(self.fileobj.read(1))
176	self.fileobj.read(xlen)
177	if flag & FNAME:
178	# Read and discard a null-terminated string containing the filename
179	while True:
180	s = self.fileobj.read(1)
181	if not s or s=='\000':
182	break
183	if flag & FCOMMENT:
184	# Read and discard a null-terminated string containing a comment
185	while True:
186	s = self.fileobj.read(1)
187	if not s or s=='\000':
188	break
189	if flag & FHCRC:
190	self.fileobj.read(2) # Read & discard the 16-bit header CRC
191
192
193	def write(self,data):
194	if self.mode != WRITE:
195	import errno
196	raise IOError(errno.EBADF, "write() on read-only GzipFile object")
197
198	if self.fileobj is None:
199	raise ValueError, "write() on closed GzipFile object"
200	if len(data) > 0:
201	self.size = self.size + len(data)
202	self.crc = zlib.crc32(data, self.crc)
203	self.fileobj.write( self.compress.compress(data) )
204	self.offset += len(data)
205
206	def read(self, size=-1):
207	if self.mode != READ:
208	import errno
209	raise IOError(errno.EBADF, "read() on write-only GzipFile object")
210
211	if self.extrasize <= 0 and self.fileobj is None:
212	return ''
213
214	readsize = 1024
215	if size < 0: # get the whole thing
216	try:
217	while True:
218	self._read(readsize)
219	readsize = min(self.max_read_chunk, readsize * 2)
220	except EOFError:
221	size = self.extrasize
222	else: # just get some more of it
223	try:
224	while size > self.extrasize:
225	self._read(readsize)
226	readsize = min(self.max_read_chunk, readsize * 2)
227	except EOFError:
228	if size > self.extrasize:
229	size = self.extrasize
230
231	chunk = self.extrabuf[:size]
232	self.extrabuf = self.extrabuf[size:]
233	self.extrasize = self.extrasize - size
234
235	self.offset += size
236	return chunk
237
238	def _unread(self, buf):
239	self.extrabuf = buf + self.extrabuf
240	self.extrasize = len(buf) + self.extrasize
241	self.offset -= len(buf)
242
243	def _read(self, size=1024):
244	if self.fileobj is None:
245	raise EOFError, "Reached EOF"
246
247	if self._new_member:
248	# If the _new_member flag is set, we have to
249	# jump to the next member, if there is one.
250	#
251	# First, check if we're at the end of the file;
252	# if so, it's time to stop; no more members to read.
253	pos = self.fileobj.tell() # Save current position
254	self.fileobj.seek(0, 2) # Seek to end of file
255	if pos == self.fileobj.tell():
256	raise EOFError, "Reached EOF"
257	else:
258	self.fileobj.seek( pos ) # Return to original position
259
260	self._init_read()
261	self._read_gzip_header()
262	self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
263	self._new_member = False
264
265	# Read a chunk of data from the file
266	buf = self.fileobj.read(size)
267
268	# If the EOF has been reached, flush the decompression object
269	# and mark this object as finished.
270
271	if buf == "":
272	uncompress = self.decompress.flush()
273	self._read_eof()
274	self._add_read_data( uncompress )
275	raise EOFError, 'Reached EOF'
276
277	uncompress = self.decompress.decompress(buf)
278	self._add_read_data( uncompress )
279
280	if self.decompress.unused_data != "":
281	# Ending case: we've come to the end of a member in the file,
282	# so seek back to the start of the unused data, finish up
283	# this member, and read a new gzip header.
284	# (The number of bytes to seek back is the length of the unused
285	# data, minus 8 because _read_eof() will rewind a further 8 bytes)
286	self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
287
288	# Check the CRC and file size, and set the flag so we read
289	# a new member on the next call
290	self._read_eof()
291	self._new_member = True
292
293	def _add_read_data(self, data):
294	self.crc = zlib.crc32(data, self.crc)
295	self.extrabuf = self.extrabuf + data
296	self.extrasize = self.extrasize + len(data)
297	self.size = self.size + len(data)
298
299	def _read_eof(self):
300	# We've read to the end of the file, so we have to rewind in order
301	# to reread the 8 bytes containing the CRC and the file size.
302	# We check the that the computed CRC and size of the
303	# uncompressed data matches the stored values. Note that the size
304	# stored is the true file size mod 2**32.
305	self.fileobj.seek(-8, 1)
306	crc32 = read32(self.fileobj)
307	isize = U32(read32(self.fileobj)) # may exceed 2GB
308	if U32(crc32) != U32(self.crc):
309	raise IOError, "CRC check failed"
310	elif isize != LOWU32(self.size):
311	raise IOError, "Incorrect length of data produced"
312
313	def close(self):
314	if self.mode == WRITE:
315	self.fileobj.write(self.compress.flush())
316	write32(self.fileobj, self.crc)
317	# self.size may exceed 2GB, or even 4GB
318	write32u(self.fileobj, LOWU32(self.size))
319	self.fileobj = None
320	elif self.mode == READ:
321	self.fileobj = None
322	if self.myfileobj:
323	self.myfileobj.close()
324	self.myfileobj = None
325
326	def __del__(self):
327	try:
328	if (self.myfileobj is None and
329	self.fileobj is None):
330	return
331	except AttributeError:
332	return
333	self.close()
334
335	def flush(self):
336	self.fileobj.flush()
337
338	def fileno(self):
339	"""Invoke the underlying file object's fileno() method.
340
341	This will raise AttributeError if the underlying file object
342	doesn't support fileno().
343	"""
344	return self.fileobj.fileno()
345
346	def isatty(self):
347	return False
348
349	def tell(self):
350	return self.offset
351
352	def rewind(self):
353	'''Return the uncompressed stream file position indicator to the
354	beginning of the file'''
355	if self.mode != READ:
356	raise IOError("Can't rewind in write mode")
357	self.fileobj.seek(0)
358	self._new_member = True
359	self.extrabuf = ""
360	self.extrasize = 0
361	self.offset = 0
362
363	def seek(self, offset):
364	if self.mode == WRITE:
365	if offset < self.offset:
366	raise IOError('Negative seek in write mode')
367	count = offset - self.offset
368	for i in range(count // 1024):
369	self.write(1024 * '\0')
370	self.write((count % 1024) * '\0')
371	elif self.mode == READ:
372	if offset < self.offset:
373	# for negative seek, rewind and do positive seek
374	self.rewind()
375	count = offset - self.offset
376	for i in range(count // 1024):
377	self.read(1024)
378	self.read(count % 1024)
379
380	def readline(self, size=-1):
381	if size < 0: size = sys.maxint
382	bufs = []
383	readsize = min(100, size) # Read from the file in small chunks
384	while True:
385	if size == 0:
386	return "".join(bufs) # Return resulting line
387
388	c = self.read(readsize)
389	i = c.find('\n')
390	if size is not None:
391	# We set i=size to break out of the loop under two
392	# conditions: 1) there's no newline, and the chunk is
393	# larger than size, or 2) there is a newline, but the
394	# resulting line would be longer than 'size'.
395	if i==-1 and len(c) > size: i=size-1
396	elif size <= i: i = size -1
397
398	if i >= 0 or c == '':
399	bufs.append(c[:i+1]) # Add portion of last chunk
400	self._unread(c[i+1:]) # Push back rest of chunk
401	return ''.join(bufs) # Return resulting line
402
403	# Append chunk to list, decrease 'size',
404	bufs.append(c)
405	size = size - len(c)
406	readsize = min(size, readsize * 2)
407
408	def readlines(self, sizehint=0):
409	# Negative numbers result in reading all the lines
410	if sizehint <= 0:
411	sizehint = sys.maxint
412	L = []
413	while sizehint > 0:
414	line = self.readline()
415	if line == "":
416	break
417	L.append(line)
418	sizehint = sizehint - len(line)
419
420	return L
421
422	def writelines(self, L):
423	for line in L:
424	self.write(line)
425
426	def __iter__(self):
427	return self
428
429	def next(self):
430	line = self.readline()
431	if line:
432	return line
433	else:
434	raise StopIteration
435
436
437	def _test():
438	# Act like gzip; with -d, act like gunzip.
439	# The input file is not deleted, however, nor are any other gzip
440	# options or features supported.
441	args = sys.argv[1:]
442	decompress = args and args[0] == "-d"
443	if decompress:
444	args = args[1:]
445	if not args:
446	args = ["-"]
447	for arg in args:
448	if decompress:
449	if arg == "-":
450	f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
451	g = sys.stdout
452	else:
453	if arg[-3:] != ".gz":
454	print "filename doesn't end in .gz:", repr(arg)
455	continue
456	f = open(arg, "rb")
457	g = __builtin__.open(arg[:-3], "wb")
458	else:
459	if arg == "-":
460	f = sys.stdin
461	g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
462	else:
463	f = __builtin__.open(arg, "rb")
464	g = open(arg + ".gz", "wb")
465	while True:
466	chunk = f.read(1024)
467	if not chunk:
468	break
469	g.write(chunk)
470	if g is not sys.stdout:
471	g.close()
472	if f is not sys.stdin:
473	f.close()
474
475	if __name__ == '__main__':
476	_test()