# -*- coding: iso-8859-1 -*-
#-------------------------------------------------------------------
#-------------------------------------------------------------------
# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
"""Read from and write to tar format archives.
__version__
= "$Revision: 1.21.2.5 $"
# $Source: /cvsroot/python/python/dist/src/Lib/tarfile.py,v $
__author__
= "Lars Gustäbel (lars@gustaebel.de)"
__date__
= "$Date: 2005/08/27 10:08:21 $"
__cvsid__
= "$Id: tarfile.py,v 1.21.2.5 2005/08/27 10:08:21 loewis Exp $"
__credits__
= "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
if sys
.platform
== 'mac':
# This module needs work for MacOS9, especially in the area of pathname
# handling. In many places it is assumed a simple substitution of / by the
# local os.path.sep is good enough to convert pathnames, but this does not
# work with the mac rooted:path:name versus :nonrooted:path:name syntax
raise ImportError, "tarfile does not work for platform==mac"
__all__
= ["TarFile", "TarInfo", "is_tarfile", "TarError"]
#---------------------------------------------------------
#---------------------------------------------------------
NUL
= "\0" # the null character
BLOCKSIZE
= 512 # length of processing blocks
RECORDSIZE
= BLOCKSIZE
* 20 # length of records
MAGIC
= "ustar" # magic tar string
VERSION
= "00" # version number
LENGTH_NAME
= 100 # maximum length of a filename
LENGTH_LINK
= 100 # maximum length of a linkname
LENGTH_PREFIX
= 155 # maximum length of the prefix field
MAXSIZE_MEMBER
= 077777777777L # maximum size of a file (11 octal digits)
REGTYPE
= "0" # regular file
AREGTYPE
= "\0" # regular file
LNKTYPE
= "1" # link (inside tarfile)
SYMTYPE
= "2" # symbolic link
CHRTYPE
= "3" # character special device
BLKTYPE
= "4" # block special device
DIRTYPE
= "5" # directory
FIFOTYPE
= "6" # fifo special device
CONTTYPE
= "7" # contiguous file
GNUTYPE_LONGNAME
= "L" # GNU tar extension for longnames
GNUTYPE_LONGLINK
= "K" # GNU tar extension for longlink
GNUTYPE_SPARSE
= "S" # GNU tar extension for sparse file
#---------------------------------------------------------
#---------------------------------------------------------
SUPPORTED_TYPES
= (REGTYPE
, AREGTYPE
, LNKTYPE
, # file types that tarfile
SYMTYPE
, DIRTYPE
, FIFOTYPE
, # can cope with.
CONTTYPE
, CHRTYPE
, BLKTYPE
,
GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
,
REGULAR_TYPES
= (REGTYPE
, AREGTYPE
, # file types that somehow
CONTTYPE
, GNUTYPE_SPARSE
) # represent regular files
#---------------------------------------------------------
# Bits used in the mode field, values in octal.
#---------------------------------------------------------
S_IFLNK
= 0120000 # symbolic link
S_IFREG
= 0100000 # regular file
S_IFBLK
= 0060000 # block device
S_IFDIR
= 0040000 # directory
S_IFCHR
= 0020000 # character device
TSUID
= 04000 # set UID on execution
TSGID
= 02000 # set GID on execution
TUREAD
= 0400 # read by owner
TUWRITE
= 0200 # write by owner
TUEXEC
= 0100 # execute/search by owner
TGREAD
= 0040 # read by group
TGWRITE
= 0020 # write by group
TGEXEC
= 0010 # execute/search by group
TOREAD
= 0004 # read by other
TOWRITE
= 0002 # write by other
TOEXEC
= 0001 # execute/search by other
#---------------------------------------------------------
#---------------------------------------------------------
"""Convert a null-terminated string buffer to a python string.
"""Calculate the checksum for a member's header. It's a simple addition
of all bytes, treating the chksum field as if filled with spaces.
buf is a 512 byte long string buffer which holds the header.
chk
= 256 # chksum field is treated as blanks,
# so the initial value is 8 * ord(" ")
for c
in buf
[:148]: chk
+= ord(c
) # sum up all bytes before chksum
for c
in buf
[156:]: chk
+= ord(c
) # sum up all bytes after chksum
def copyfileobj(src
, dst
, length
=None):
"""Copy length bytes from fileobj src to fileobj dst.
If length is None, copy the entire content.
shutil
.copyfileobj(src
, dst
)
blocks
, remainder
= divmod(length
, BUFSIZE
)
raise IOError, "end of file reached"
buf
= src
.read(remainder
)
raise IOError, "end of file reached"
"""Convert a file's mode to a string of the form
for table
in filemode_table
:
normpath
= lambda path
: os
.path
.normpath(path
).replace(os
.sep
, "/")
normpath
= os
.path
.normpath
class TarError(Exception):
class ExtractError(TarError
):
"""General exception for extract errors."""
class ReadError(TarError
):
"""Exception for unreadble tar archives."""
class CompressionError(TarError
):
"""Exception for unavailable compression methods."""
class StreamError(TarError
):
"""Exception for unsupported operations on stream-like TarFiles."""
#---------------------------
# internal stream interface
#---------------------------
"""Low-level file object. Supports reading and writing.
It is used instead of a regular file object for streaming
def __init__(self
, name
, mode
):
"w": os
.O_WRONLY | os
.O_CREAT | os
.O_TRUNC
,
if hasattr(os
, "O_BINARY"):
self
.fd
= os
.open(name
, mode
)
return os
.read(self
.fd
, size
)
"""Class that serves as an adapter between TarFile and
a stream-like object. The stream-like object only
needs to have a read() or write() method and is accessed
blockwise. Use of gzip or bzip2 compression is possible.
A stream-like object could be for example: sys.stdin,
sys.stdout, a socket, a tape device etc.
_Stream is intended to be used only internally.
def __init__(self
, name
, mode
, type, fileobj
, bufsize
):
"""Construct a _Stream object.
fileobj
= _LowLevelFile(name
, mode
)
raise CompressionError
, "zlib module is not available"
self
.crc
= zlib
.crc32("")
raise CompressionError
, "bz2 module is not available"
self
.cmp = bz2
.BZ2Decompressor()
self
.cmp = bz2
.BZ2Compressor()
def _init_write_gz(self
):
"""Initialize for writing with gzip compression.
self
.cmp = self
.zlib
.compressobj(9, self
.zlib
.DEFLATED
,
timestamp
= struct
.pack("<L", long(time
.time()))
self
.__write
("\037\213\010\010%s\002\377" % timestamp
)
if self
.name
.endswith(".gz"):
self
.name
= self
.name
[:-3]
self
.__write
(self
.name
+ NUL
)
"""Write string s to the stream.
self
.crc
= self
.zlib
.crc32(s
, self
.crc
)
"""Write string s to the stream if a whole new block
while len(self
.buf
) > self
.bufsize
:
self
.fileobj
.write(self
.buf
[:self
.bufsize
])
self
.buf
= self
.buf
[self
.bufsize
:]
"""Close the _Stream object. No operation should be
if self
.mode
== "w" and self
.type != "tar":
self
.buf
+= self
.cmp.flush()
if self
.mode
== "w" and self
.buf
:
self
.fileobj
.write(self
.buf
)
self
.fileobj
.write(struct
.pack("<l", self
.crc
))
self
.fileobj
.write(struct
.pack("<L", self
.pos
& 0xffffFFFFL
))
"""Initialize for reading a gzip compressed fileobj.
self
.cmp = self
.zlib
.decompressobj(-self
.zlib
.MAX_WBITS
)
# taken from gzip.GzipFile with some alterations
if self
.__read
(2) != "\037\213":
raise ReadError
, "not a gzip file"
if self
.__read
(1) != "\010":
raise CompressionError
, "unsupported compression method"
flag
= ord(self
.__read
(1))
xlen
= ord(self
.__read
(1)) + 256 * ord(self
.__read
(1))
"""Return the stream's file pointer position.
"""Set the stream's file pointer to pos. Negative seeking
blocks
, remainder
= divmod(pos
- self
.pos
, self
.bufsize
)
raise StreamError
, "seeking backwards is not allowed"
def read(self
, size
=None):
"""Return the next size number of bytes from the stream.
If size is not defined, return all bytes of the stream
buf
= self
._read
(self
.bufsize
)
"""Return size bytes from the stream.
buf
= self
.__read
(self
.bufsize
)
buf
= self
.cmp.decompress(buf
)
"""Return size bytes from stream. If internal buffer is empty,
read another block from the stream.
buf
= self
.fileobj
.read(self
.bufsize
)
#------------------------
#------------------------
class ExFileObject(object):
"""File-like object for reading an archive member.
Is returned by TarFile.extractfile(). Support for
def __init__(self
, tarfile
, tarinfo
):
self
.fileobj
= tarfile
.fileobj
self
.offset
= tarinfo
.offset_data
self
.sparse
= tarinfo
.sparse
self
.read
= self
._readsparse
self
.read
= self
._readnormal
"""Overloadable read method.
return self
.fileobj
.read(size
)
def readline(self
, size
=-1):
"""Read a line with approx. size. If size is negative,
read a whole line. readline() and read() must not
nl
= self
.linebuffer
.find("\n")
size
-= len(self
.linebuffer
)
while (nl
< 0 and size
> 0):
buf
= self
.read(min(size
, 100))
nl
= self
.linebuffer
.find("\n")
buf
= self
.linebuffer
[:nl
]
self
.linebuffer
= self
.linebuffer
[nl
+ 1:]
"""Return a list with all (following) lines.
def _readnormal(self
, size
=None):
"""Read operation for regular files.
raise ValueError, "file is closed"
self
.fileobj
.seek(self
.offset
+ self
.pos
)
bytesleft
= self
.size
- self
.pos
bytestoread
= min(size
, bytesleft
)
return self
.__read
(bytestoread
)
def _readsparse(self
, size
=None):
"""Read operation for sparse files.
raise ValueError, "file is closed"
size
= self
.size
- self
.pos
buf
= self
._readsparsesection
(size
)
def _readsparsesection(self
, size
):
"""Read a single section of a sparse file.
section
= self
.sparse
.find(self
.pos
)
toread
= min(size
, section
.offset
+ section
.size
- self
.pos
)
if isinstance(section
, _data
):
realpos
= section
.realpos
+ self
.pos
- section
.offset
self
.fileobj
.seek(self
.offset
+ realpos
)
return self
.__read
(toread
)
"""Return the current file position.
def seek(self
, pos
, whence
=0):
"""Seek to a position in the file.
self
.pos
= min(max(pos
, 0), self
.size
)
self
.pos
= max(self
.pos
+ pos
, 0)
self
.pos
= min(self
.pos
+ pos
, self
.size
)
self
.pos
= max(min(self
.size
+ pos
, self
.size
), 0)
"""Close the file object.
"""Informational class which holds the details about an
archive member given by a tar header block.
TarInfo objects are returned by TarFile.getmember(),
TarFile.getmembers() and TarFile.gettarinfo() and are
usually created internally.
def __init__(self
, name
=""):
"""Construct a TarInfo object. name is the optional name
self
.name
= name
# member name (dirnames must end with '/')
self
.mode
= 0666 # file permissions
self
.size
= 0 # file size
self
.mtime
= 0 # modification time
self
.chksum
= 0 # header checksum
self
.type = REGTYPE
# member type
self
.linkname
= "" # link name
self
.uname
= "user" # user name
self
.gname
= "group" # group name
self
.devminor
= 0 #-for use with CHRTYPE and BLKTYPE
self
.prefix
= "" # prefix to filename or holding information
self
.offset
= 0 # the tar header starts here
self
.offset_data
= 0 # the file's data starts here
return "<%s %r at %#x>" % (self
.__class
__.__name
__,self
.name
,id(self
))
"""Construct a TarInfo object from a 512 byte string buffer.
tarinfo
.name
= nts(buf
[0:100])
tarinfo
.mode
= int(buf
[100:108], 8)
tarinfo
.uid
= int(buf
[108:116],8)
tarinfo
.gid
= int(buf
[116:124],8)
# There are two possible codings for the size field we
# have to discriminate, see comment in tobuf() below.
if buf
[124] != chr(0200):
tarinfo
.size
= long(buf
[124:136], 8)
tarinfo
.size
+= ord(buf
[125 + i
])
tarinfo
.mtime
= long(buf
[136:148], 8)
tarinfo
.chksum
= int(buf
[148:156], 8)
tarinfo
.type = buf
[156:157]
tarinfo
.linkname
= nts(buf
[157:257])
tarinfo
.uname
= nts(buf
[265:297])
tarinfo
.gname
= nts(buf
[297:329])
tarinfo
.devmajor
= int(buf
[329:337], 8)
tarinfo
.devminor
= int(buf
[337:345], 8)
tarinfo
.devmajor
= tarinfo
.devmajor
= 0
tarinfo
.prefix
= buf
[345:500]
# The prefix field is used for filenames > 100 in
# name = prefix + '/' + name
if tarinfo
.type != GNUTYPE_SPARSE
:
tarinfo
.name
= normpath(os
.path
.join(nts(tarinfo
.prefix
), tarinfo
.name
))
# Directory names should have a '/' at the end.
if tarinfo
.isdir() and tarinfo
.name
[-1:] != "/":
frombuf
= classmethod(frombuf
)
"""Return a tar header block as a 512 byte string.
# Prefer the size to be encoded as 11 octal ascii digits
# which is the most portable. If the size exceeds this
# limit (>= 8 GB), encode it as an 88-bit value which is
if self
.size
<= MAXSIZE_MEMBER
:
size
= "%011o" % self
.size
size
= chr(s
& 0377) + size
# The following code was contributed by Detlef Lannert.
for value
, fieldsize
in (
("%07o" % (self
.mode
& 07777), 8),
("%011o" % self
.mtime
, 12),
("%07o" % self
.devmajor
, 8),
("%07o" % self
.devminor
, 8),
parts
.append(value
[:fieldsize
] + (fieldsize
- l
) * NUL
)
chksum
= calc_chksum(buf
)
buf
= buf
[:148] + "%06o\0" % chksum
+ buf
[155:]
buf
+= (BLOCKSIZE
- len(buf
)) * NUL
return self
.type in REGULAR_TYPES
return self
.type == DIRTYPE
return self
.type == SYMTYPE
return self
.type == LNKTYPE
return self
.type == CHRTYPE
return self
.type == BLKTYPE
return self
.type == FIFOTYPE
return self
.type == GNUTYPE_SPARSE
return self
.type in (CHRTYPE
, BLKTYPE
, FIFOTYPE
)
"""The TarFile Class provides an interface to tar archives.
debug
= 0 # May be set from 0 (no msgs) to 3 (all msgs)
dereference
= False # If true, add content of linked file to the
# tar file, else the link.
ignore_zeros
= False # If true, skips empty or invalid blocks and
errorlevel
= 0 # If 0, fatal errors only appear in debug
# messages (if debug >= 0). If > 0, errors
# are passed to the caller as exceptions.
posix
= False # If True, generates POSIX.1-1990-compliant
# archives (no GNU extensions!)
fileobject
= ExFileObject
def __init__(self
, name
=None, mode
="r", fileobj
=None):
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
read from an existing archive, 'a' to append data to an existing
file or 'w' to create a new file overwriting an existing one. `mode'
If `fileobj' is given, it is used for reading or writing data. If it
can be determined, `mode' is overridden by `fileobj's mode.
`fileobj' is not closed, when TarFile is closed.
if len(mode
) > 1 or mode
not in "raw":
raise ValueError, "mode must be 'r', 'a' or 'w'"
self
.mode
= {"r": "rb", "a": "r+b", "w": "wb"}[mode
]
fileobj
= file(self
.name
, self
.mode
)
if self
.name
is None and hasattr(fileobj
, "name"):
if hasattr(fileobj
, "mode"):
self
.members
= [] # list of members as TarInfo objects
self
._loaded
= False # flag if all members have been read
self
.offset
= 0L # current position in the archive file
self
.inodes
= {} # dictionary caching the inodes of
# archive members already added
self
.firstmember
= self
.next()
# Move to the end of the archive,
# before the first empty block.
self
.fileobj
.seek(- BLOCKSIZE
, 1)
#--------------------------------------------------------------------------
# Below are the classmethods which act as alternate constructors to the
# TarFile class. The open() method is the only one that is needed for
# public use; it is the "super"-constructor and is able to select an
# adequate "sub"-constructor for a particular compression using the mapping
# This concept allows one to subclass TarFile without losing the comfort of
# the super-constructor. A sub-constructor is registered and made available
# by adding it to the mapping in OPEN_METH.
def open(cls
, name
=None, mode
="r", fileobj
=None, bufsize
=20*512):
"""Open a tar archive for reading, writing or appending. Return
an appropriate TarFile class.
'r' open for reading with transparent compression
'r:' open for reading exclusively uncompressed
'r:gz' open for reading with gzip compression
'r:bz2' open for reading with bzip2 compression
'a' or 'a:' open for appending
'w' or 'w:' open for writing without compression
'w:gz' open for writing with gzip compression
'w:bz2' open for writing with bzip2 compression
'r|' open an uncompressed stream of tar blocks for reading
'r|gz' open a gzip compressed stream of tar blocks
'r|bz2' open a bzip2 compressed stream of tar blocks
'w|' open an uncompressed stream for writing
'w|gz' open a gzip compressed stream for writing
'w|bz2' open a bzip2 compressed stream for writing
if not name
and not fileobj
:
raise ValueError, "nothing to open"
filemode
, comptype
= mode
.split(":", 1)
filemode
= filemode
or "r"
comptype
= comptype
or "tar"
# Select the *open() function according to
if comptype
in cls
.OPEN_METH
:
func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
raise CompressionError
, "unknown compression type %r" % comptype
return func(name
, filemode
, fileobj
)
filemode
, comptype
= mode
.split("|", 1)
filemode
= filemode
or "r"
comptype
= comptype
or "tar"
raise ValueError, "mode must be 'r' or 'w'"
_Stream(name
, filemode
, comptype
, fileobj
, bufsize
))
# Find out which *open() is appropriate for opening the file.
for comptype
in cls
.OPEN_METH
:
func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
return func(name
, "r", fileobj
)
except (ReadError
, CompressionError
):
raise ReadError
, "file could not be opened successfully"
return cls
.taropen(name
, mode
, fileobj
)
raise ValueError, "undiscernible mode"
def taropen(cls
, name
, mode
="r", fileobj
=None):
"""Open uncompressed tar archive name for reading or writing.
if len(mode
) > 1 or mode
not in "raw":
raise ValueError, "mode must be 'r', 'a' or 'w'"
return cls(name
, mode
, fileobj
)
taropen
= classmethod(taropen
)
def gzopen(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9):
"""Open gzip compressed tar archive name for reading or writing.
Appending is not allowed.
if len(mode
) > 1 or mode
not in "rw":
raise ValueError, "mode must be 'r' or 'w'"
except (ImportError, AttributeError):
raise CompressionError
, "gzip module is not available"
pre
, ext
= os
.path
.splitext(name
)
pre
= os
.path
.basename(pre
)
fileobj
= file(name
, mode
+ "b")
t
= cls
.taropen(tarname
, mode
,
gzip
.GzipFile(name
, mode
, compresslevel
, fileobj
)
raise ReadError
, "not a gzip file"
gzopen
= classmethod(gzopen
)
def bz2open(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9):
"""Open bzip2 compressed tar archive name for reading or writing.
Appending is not allowed.
if len(mode
) > 1 or mode
not in "rw":
raise ValueError, "mode must be 'r' or 'w'."
raise CompressionError
, "bz2 module is not available"
pre
, ext
= os
.path
.splitext(name
)
pre
= os
.path
.basename(pre
)
raise ValueError, "no support for external file objects"
t
= cls
.taropen(tarname
, mode
, bz2
.BZ2File(name
, mode
, compresslevel
=compresslevel
))
raise ReadError
, "not a bzip2 file"
bz2open
= classmethod(bz2open
)
# All *open() methods are registered here.
"tar": "taropen", # uncompressed tar
"gz": "gzopen", # gzip compressed tar
"bz2": "bz2open" # bzip2 compressed tar
#--------------------------------------------------------------------------
# The public methods which TarFile provides:
"""Close the TarFile. In write-mode, two finishing zero blocks are
self
.fileobj
.write(NUL
* (BLOCKSIZE
* 2))
self
.offset
+= (BLOCKSIZE
* 2)
# fill up the end with zero-blocks
# (like option -b20 for tar does)
blocks
, remainder
= divmod(self
.offset
, RECORDSIZE
)
self
.fileobj
.write(NUL
* (RECORDSIZE
- remainder
))
def getmember(self
, name
):
"""Return a TarInfo object for member `name'. If `name' can not be
found in the archive, KeyError is raised. If a member occurs more
than once in the archive, its last occurence is assumed to be the
tarinfo
= self
._getmember
(name
)
raise KeyError, "filename %r not found" % name
"""Return the members of the archive as a list of TarInfo objects. The
list has the same order as the members in the archive.
if not self
._loaded
: # if we want to obtain a list of
self
._load
() # all members, we first have to
# scan the whole archive.
"""Return the members of the archive as a list of their names. It has
the same order as the list returned by getmembers().
return [tarinfo
.name
for tarinfo
in self
.getmembers()]
def gettarinfo(self
, name
=None, arcname
=None, fileobj
=None):
"""Create a TarInfo object for either the file `name' or the file
object `fileobj' (using os.fstat on its file descriptor). You can
modify some of the TarInfo's attributes before you add it using
addfile(). If given, `arcname' specifies an alternative name for the
# When fileobj is given, replace name by
# Building the name of the member in the archive.
# Backward slashes are converted to forward slashes,
# Absolute paths are turned to relative paths.
arcname
= normpath(arcname
)
drv
, arcname
= os
.path
.splitdrive(arcname
)
while arcname
[0:1] == "/":
# Now, fill the TarInfo object with
# information specific for the file.
# Use os.stat or os.lstat, depending on platform
# and if symlinks shall be resolved.
if hasattr(os
, "lstat") and not self
.dereference
:
statres
= os
.fstat(fileobj
.fileno())
inode
= (statres
.st_ino
, statres
.st_dev
)
if inode
in self
.inodes
and not self
.dereference
:
# Is it a hardlink to an already
linkname
= self
.inodes
[inode
]
# The inode is added only if its valid.
# For win32 it is always 0.
self
.inodes
[inode
] = arcname
elif stat
.S_ISFIFO(stmd
):
linkname
= os
.readlink(name
)
# Fill the TarInfo object with all
# information we can get.
tarinfo
.uid
= statres
.st_uid
tarinfo
.gid
= statres
.st_gid
tarinfo
.size
= statres
.st_size
tarinfo
.mtime
= statres
.st_mtime
tarinfo
.linkname
= linkname
tarinfo
.uname
= pwd
.getpwuid(tarinfo
.uid
)[0]
tarinfo
.gname
= grp
.getgrgid(tarinfo
.gid
)[0]
if type in (CHRTYPE
, BLKTYPE
):
if hasattr(os
, "major") and hasattr(os
, "minor"):
tarinfo
.devmajor
= os
.major(statres
.st_rdev
)
tarinfo
.devminor
= os
.minor(statres
.st_rdev
)
def list(self
, verbose
=True):
"""Print a table of contents to sys.stdout. If `verbose' is False, only
the names of the members are printed. If it is True, an `ls -l'-like
print filemode(tarinfo
.mode
),
print "%s/%s" % (tarinfo
.uname
or tarinfo
.uid
,
tarinfo
.gname
or tarinfo
.gid
),
if tarinfo
.ischr() or tarinfo
.isblk():
print "%10s" % ("%d,%d" \
% (tarinfo
.devmajor
, tarinfo
.devminor
)),
print "%10d" % tarinfo
.size
,
print "%d-%02d-%02d %02d:%02d:%02d" \
% time
.localtime(tarinfo
.mtime
)[:6],
print "->", tarinfo
.linkname
,
print "link to", tarinfo
.linkname
,
def add(self
, name
, arcname
=None, recursive
=True):
"""Add the file `name' to the archive. `name' may be any type of file
(directory, fifo, symbolic link, etc.). If given, `arcname'
specifies an alternative name for the file in the archive.
Directories are added recursively by default. This can be avoided by
setting `recursive' to False.
# Skip if somebody tries to archive the archive...
if self
.name
is not None \
and os
.path
.abspath(name
) == os
.path
.abspath(self
.name
):
self
._dbg
(2, "tarfile: Skipped %r" % name
)
# Special case: The user wants to add the current
for f
in os
.listdir("."):
self
.add(f
, os
.path
.join(arcname
, f
))
# Create a TarInfo object from the file.
tarinfo
= self
.gettarinfo(name
, arcname
)
self
._dbg
(1, "tarfile: Unsupported type %r" % name
)
# Append the tar header and data to the archive.
for f
in os
.listdir(name
):
self
.add(os
.path
.join(name
, f
), os
.path
.join(arcname
, f
))
def addfile(self
, tarinfo
, fileobj
=None):
"""Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
given, tarinfo.size bytes are read from it and added to the archive.
You can create TarInfo objects using gettarinfo().
On Windows platforms, `fileobj' should always be opened with mode
'rb' to avoid irritation about the file size.
tarinfo
.name
= normpath(tarinfo
.name
)
# directories should end with '/'
tarinfo
.linkname
= normpath(tarinfo
.linkname
)
if tarinfo
.size
> MAXSIZE_MEMBER
:
raise ValueError, "file is too large (>= 8 GB)"
self
._dbg
(2, "tarfile: Created GNU tar largefile header")
if len(tarinfo
.linkname
) > LENGTH_LINK
:
raise ValueError, "linkname is too long (>%d)" \
self
._create
_gnulong
(tarinfo
.linkname
, GNUTYPE_LONGLINK
)
tarinfo
.linkname
= tarinfo
.linkname
[:LENGTH_LINK
-1]
self
._dbg
(2, "tarfile: Created GNU tar extension LONGLINK")
if len(tarinfo
.name
) > LENGTH_NAME
:
prefix
= tarinfo
.name
[:LENGTH_PREFIX
+ 1]
while prefix
and prefix
[-1] != "/":
name
= tarinfo
.name
[len(prefix
):]
if not prefix
or len(name
) > LENGTH_NAME
:
raise ValueError, "name is too long (>%d)" \
self
._create
_gnulong
(tarinfo
.name
, GNUTYPE_LONGNAME
)
tarinfo
.name
= tarinfo
.name
[:LENGTH_NAME
- 1]
self
._dbg
(2, "tarfile: Created GNU tar extension LONGNAME")
self
.fileobj
.write(tarinfo
.tobuf())
# If there's data to follow, append it.
copyfileobj(fileobj
, self
.fileobj
, tarinfo
.size
)
blocks
, remainder
= divmod(tarinfo
.size
, BLOCKSIZE
)
self
.fileobj
.write(NUL
* (BLOCKSIZE
- remainder
))
self
.offset
+= blocks
* BLOCKSIZE
self
.members
.append(tarinfo
)
def extract(self
, member
, path
=""):
"""Extract a member from the archive to the current working directory,
using its full name. Its file information is extracted as accurately
as possible. `member' may be a filename or a TarInfo object. You can
specify a different directory using `path'.
if isinstance(member
, TarInfo
):
tarinfo
= self
.getmember(member
)
# Prepare the link target for makelink().
tarinfo
._link
_target
= os
.path
.join(path
, tarinfo
.linkname
)
self
._extract
_member
(tarinfo
, os
.path
.join(path
, tarinfo
.name
))
except EnvironmentError, e
:
self
._dbg
(1, "tarfile: %s" % e
.strerror
)
self
._dbg
(1, "tarfile: %s %r" % (e
.strerror
, e
.filename
))
self
._dbg
(1, "tarfile: %s" % e
)
def extractfile(self
, member
):
"""Extract a member from the archive as a file object. `member' may be
a filename or a TarInfo object. If `member' is a regular file, a
file-like object is returned. If `member' is a link, a file-like
object is constructed from the link's target. If `member' is none of
the above, None is returned.
The file-like object is read-only and provides the following
methods: read(), readline(), readlines(), seek() and tell()
if isinstance(member
, TarInfo
):
tarinfo
= self
.getmember(member
)
return self
.fileobject(self
, tarinfo
)
elif tarinfo
.type not in SUPPORTED_TYPES
:
# If a member's type is unknown, it is treated as a
return self
.fileobject(self
, tarinfo
)
elif tarinfo
.islnk() or tarinfo
.issym():
if isinstance(self
.fileobj
, _Stream
):
# A small but ugly workaround for the case that someone tries
# to extract a (sym)link as a file-object from a non-seekable
raise StreamError
, "cannot extract (sym)link as file object"
# A (sym)link's file object is its target's file object.
return self
.extractfile(self
._getmember
(tarinfo
.linkname
,
# If there's no data associated with the member (directory, chrdev,
# blkdev, etc.), return None instead of a file object.
def _extract_member(self
, tarinfo
, targetpath
):
"""Extract the TarInfo object tarinfo to a physical
# Fetch the TarInfo object for the given name
# and build the destination pathname, replacing
# forward slashes to platform specific separators.
if targetpath
[-1:] == "/":
targetpath
= targetpath
[:-1]
targetpath
= os
.path
.normpath(targetpath
)
# Create all upper directories.
upperdirs
= os
.path
.dirname(targetpath
)
if upperdirs
and not os
.path
.exists(upperdirs
):
self
._extract
_member
(ti
, ti
.name
)
if tarinfo
.islnk() or tarinfo
.issym():
self
._dbg
(1, "%s -> %s" % (tarinfo
.name
, tarinfo
.linkname
))
self
._dbg
(1, tarinfo
.name
)
self
.makefile(tarinfo
, targetpath
)
self
.makedir(tarinfo
, targetpath
)
self
.makefifo(tarinfo
, targetpath
)
elif tarinfo
.ischr() or tarinfo
.isblk():
self
.makedev(tarinfo
, targetpath
)
elif tarinfo
.islnk() or tarinfo
.issym():
self
.makelink(tarinfo
, targetpath
)
elif tarinfo
.type not in SUPPORTED_TYPES
:
self
.makeunknown(tarinfo
, targetpath
)
self
.makefile(tarinfo
, targetpath
)
self
.chown(tarinfo
, targetpath
)
self
.chmod(tarinfo
, targetpath
)
self
.utime(tarinfo
, targetpath
)
#--------------------------------------------------------------------------
# Below are the different file methods. They are called via
# _extract_member() when extract() is called. They can be replaced in a
# subclass to implement other functionality.
def makedir(self
, tarinfo
, targetpath
):
"""Make a directory called targetpath.
except EnvironmentError, e
:
if e
.errno
!= errno
.EEXIST
:
def makefile(self
, tarinfo
, targetpath
):
"""Make a file called targetpath.
source
= self
.extractfile(tarinfo
)
target
= file(targetpath
, "wb")
copyfileobj(source
, target
)
def makeunknown(self
, tarinfo
, targetpath
):
"""Make a file from a TarInfo object with an unknown type
self
.makefile(tarinfo
, targetpath
)
self
._dbg
(1, "tarfile: Unknown file type %r, " \
"extracted as regular file." % tarinfo
.type)
def makefifo(self
, tarinfo
, targetpath
):
"""Make a fifo called targetpath.
if hasattr(os
, "mkfifo"):
raise ExtractError
, "fifo not supported by system"
def makedev(self
, tarinfo
, targetpath
):
"""Make a character or block device called targetpath.
if not hasattr(os
, "mknod") or not hasattr(os
, "makedev"):
raise ExtractError
, "special devices not supported by system"
os
.mknod(targetpath
, mode
,
os
.makedev(tarinfo
.devmajor
, tarinfo
.devminor
))
def makelink(self
, tarinfo
, targetpath
):
"""Make a (symbolic) link called targetpath. If it cannot be created
(platform limitation), we try to make a copy of the referenced file
linkpath
= tarinfo
.linkname
os
.symlink(linkpath
, targetpath
)
os
.link(tarinfo
._link
_target
, targetpath
)
linkpath
= os
.path
.join(os
.path
.dirname(tarinfo
.name
),
linkpath
= normpath(linkpath
)
self
._extract
_member
(self
.getmember(linkpath
), targetpath
)
except (EnvironmentError, KeyError), e
:
linkpath
= os
.path
.normpath(linkpath
)
shutil
.copy2(linkpath
, targetpath
)
except EnvironmentError, e
:
raise IOError, "link could not be created"
def chown(self
, tarinfo
, targetpath
):
"""Set owner of targetpath according to tarinfo.
if pwd
and hasattr(os
, "geteuid") and os
.geteuid() == 0:
# We have to be root to do so.
g
= grp
.getgrnam(tarinfo
.gname
)[2]
g
= grp
.getgrgid(tarinfo
.gid
)[2]
u
= pwd
.getpwnam(tarinfo
.uname
)[2]
u
= pwd
.getpwuid(tarinfo
.uid
)[2]
if tarinfo
.issym() and hasattr(os
, "lchown"):
os
.lchown(targetpath
, u
, g
)
if sys
.platform
!= "os2emx":
os
.chown(targetpath
, u
, g
)
except EnvironmentError, e
:
raise ExtractError
, "could not change owner"
def chmod(self
, tarinfo
, targetpath
):
"""Set file permissions of targetpath according to tarinfo.
os
.chmod(targetpath
, tarinfo
.mode
)
except EnvironmentError, e
:
raise ExtractError
, "could not change mode"
def utime(self
, tarinfo
, targetpath
):
"""Set modification time of targetpath according to tarinfo.
if not hasattr(os
, 'utime'):
if sys
.platform
== "win32" and tarinfo
.isdir():
# According to msdn.microsoft.com, it is an error (EACCES)
# to use utime() on directories.
os
.utime(targetpath
, (tarinfo
.mtime
, tarinfo
.mtime
))
except EnvironmentError, e
:
raise ExtractError
, "could not change modification time"
#--------------------------------------------------------------------------
"""Return the next member of the archive as a TarInfo object, when
TarFile is opened for reading. Return None if there is no more
if self
.firstmember
is not None:
self
.fileobj
.seek(self
.offset
)
buf
= self
.fileobj
.read(BLOCKSIZE
)
tarinfo
= TarInfo
.frombuf(buf
)
if buf
.count(NUL
) == BLOCKSIZE
:
self
._dbg
(2, "0x%X: %s block" % (self
.offset
, adj
))
# Block is empty or unreadable.
# If the first block is invalid. That does not
# look like a tar archive we can handle.
raise ReadError
,"empty, unreadable or compressed file"
# We shouldn't rely on this checksum, because some tar programs
# calculate it differently and it is merely validating the
# header block. We could just as well skip this part, which would
# have a slight effect on performance...
if tarinfo
.chksum
!= calc_chksum(buf
):
self
._dbg
(1, "tarfile: Bad Checksum %r" % tarinfo
.name
)
# Set the TarInfo object's offset to the current position of the
# TarFile and set self.offset to the position where the data blocks
tarinfo
.offset
= self
.offset
# Check if the TarInfo object has a typeflag for which a callback
# method is registered in the TYPE_METH. If so, then call it.
if tarinfo
.type in self
.TYPE_METH
:
return self
.TYPE_METH
[tarinfo
.type](self
, tarinfo
)
tarinfo
.offset_data
= self
.offset
if tarinfo
.isreg() or tarinfo
.type not in SUPPORTED_TYPES
:
# Skip the following data blocks.
self
.offset
+= self
._block
(tarinfo
.size
)
if tarinfo
.isreg() and tarinfo
.name
[:-1] == "/":
# some old tar programs don't know DIRTYPE
self
.members
.append(tarinfo
)
#--------------------------------------------------------------------------
# Below are some methods which are called for special typeflags in the
# next() method, e.g. for unwrapping GNU longname/longlink blocks. They
# are registered in TYPE_METH below. You can register your own methods
# A registered method is called with a TarInfo object as only argument.
# During its execution the method MUST perform the following tasks:
# 1. set tarinfo.offset_data to the position where the data blocks begin,
# if there is data to follow.
# 2. set self.offset to the position where the next member's header will
# 3. append the tarinfo object to self.members, if it is supposed to appear
# as a member of the TarFile object.
# 4. return tarinfo or another valid TarInfo object.
def proc_gnulong(self
, tarinfo
):
"""Evaluate the blocks that hold a GNU longname
block
= self
.fileobj
.read(BLOCKSIZE
)
next
.offset
= tarinfo
.offset
if tarinfo
.type == GNUTYPE_LONGNAME
:
elif tarinfo
.type == GNUTYPE_LONGLINK
:
def proc_sparse(self
, tarinfo
):
"""Analyze a GNU sparse header plus extra headers.
# There are 4 possible sparse structs in the
offset
= int(buf
[pos
:pos
+ 12], 8)
numbytes
= int(buf
[pos
+ 12:pos
+ 24], 8)
sp
.append(_hole(lastpos
, offset
- lastpos
))
sp
.append(_data(offset
, numbytes
, realpos
))
lastpos
= offset
+ numbytes
isextended
= ord(buf
[482])
origsize
= int(buf
[483:495], 8)
# If the isextended flag is given,
# there are extra headers to process.
buf
= self
.fileobj
.read(BLOCKSIZE
)
offset
= int(buf
[pos
:pos
+ 12], 8)
numbytes
= int(buf
[pos
+ 12:pos
+ 24], 8)
sp
.append(_hole(lastpos
, offset
- lastpos
))
sp
.append(_data(offset
, numbytes
, realpos
))
lastpos
= offset
+ numbytes
isextended
= ord(buf
[504])
sp
.append(_hole(lastpos
, origsize
- lastpos
))
tarinfo
.offset_data
= self
.offset
self
.offset
+= self
._block
(tarinfo
.size
)
self
.members
.append(tarinfo
)
# The type mapping for the next() method. The keys are single character
# strings, the typeflag. The values are methods which are called when
# next() encounters such a typeflag.
GNUTYPE_LONGNAME
: proc_gnulong
,
GNUTYPE_LONGLINK
: proc_gnulong
,
GNUTYPE_SPARSE
: proc_sparse
#--------------------------------------------------------------------------
"""Round up a byte count by BLOCKSIZE and return it,
e.g. _block(834) => 1024.
blocks
, remainder
= divmod(count
, BLOCKSIZE
)
return blocks
* BLOCKSIZE
def _getmember(self
, name
, tarinfo
=None):
"""Find an archive member by name from bottom to top.
If tarinfo is given, it is used as the starting point.
# Ensure that all members have been loaded.
members
= self
.getmembers()
end
= members
.index(tarinfo
)
for i
in xrange(end
- 1, -1, -1):
if name
== members
[i
].name
:
"""Read through the entire archive file and look for readable
def _check(self
, mode
=None):
"""Check if TarFile is still open, and if the operation's mode
corresponds to TarFile's mode.
raise IOError, "%s is closed" % self
.__class
__.__name
__
if mode
is not None and self
._mode
not in mode
:
raise IOError, "bad operation for mode %r" % self
._mode
"""Provide an iterator object.
return iter(self
.members
)
def _create_gnulong(self
, name
, type):
"""Write a GNU longname/longlink member to the TarFile.
It consists of an extended tar header, with the length
of the longname as size, followed by data blocks,
which contain the longname as a null terminated string.
tarinfo
.name
= "././@LongLink"
self
.fileobj
.write(tarinfo
.tobuf())
blocks
, remainder
= divmod(tarinfo
.size
, BLOCKSIZE
)
self
.fileobj
.write(NUL
* (BLOCKSIZE
- remainder
))
self
.offset
+= blocks
* BLOCKSIZE
def _dbg(self
, level
, msg
):
"""Write debugging output to sys.stderr.
for tarinfo in TarFile(...):
def __init__(self
, tarfile
):
"""Construct a TarIter object.
"""Return iterator object.
"""Return the next item using TarFile's next() method.
When all members have been read, set TarFile as _loaded.
# Fix for SF #1100429: Under rare circumstances it can
# happen that getmembers() is called during iteration,
# which will cause TarIter to stop prematurely.
if not self
.tarfile
._loaded
:
tarinfo
= self
.tarfile
.next()
self
.tarfile
._loaded
= True
tarinfo
= self
.tarfile
.members
[self
.index
]
# Helper classes for sparse file support
"""Base class for _data and _hole.
def __init__(self
, offset
, size
):
def __contains__(self
, offset
):
return self
.offset
<= offset
< self
.offset
+ self
.size
"""Represent a data section in a sparse file.
def __init__(self
, offset
, size
, realpos
):
_section
.__init
__(self
, offset
, size
)
"""Represent a hole section in a sparse file.
"""Ringbuffer class which increases performance
#---------------------------------------------
# zipfile compatible TarFile class
#---------------------------------------------
TAR_PLAIN
= 0 # zipfile.ZIP_STORED
TAR_GZIPPED
= 8 # zipfile.ZIP_DEFLATED
"""TarFile class compatible with standard module zipfile's
def __init__(self
, file, mode
="r", compression
=TAR_PLAIN
):
if compression
== TAR_PLAIN
:
self
.tarfile
= TarFile
.taropen(file, mode
)
elif compression
== TAR_GZIPPED
:
self
.tarfile
= TarFile
.gzopen(file, mode
)
raise ValueError, "unknown compression constant"
members
= self
.tarfile
.getmembers()
for i
in xrange(len(members
)):
m
.date_time
= time
.gmtime(m
.mtime
)[:6]
return map(lambda m
: m
.name
, self
.infolist())
return filter(lambda m
: m
.type in REGULAR_TYPES
,
self
.tarfile
.getmembers())
return self
.tarfile
.getmember(name
)
return self
.tarfile
.extractfile(self
.tarfile
.getmember(name
)).read()
def write(self
, filename
, arcname
=None, compress_type
=None):
self
.tarfile
.add(filename
, arcname
)
def writestr(self
, zinfo
, bytes
):
zinfo
.name
= zinfo
.filename
zinfo
.size
= zinfo
.file_size
zinfo
.mtime
= calendar
.timegm(zinfo
.date_time
)
self
.tarfile
.addfile(zinfo
, StringIO
.StringIO(bytes
))
"""Return True if name points to a tar archive that we
are able to handle, else return False.