Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / devtools / amd64 / lib / python2.4 / mimetypes.py
CommitLineData
920dae64
AT
1"""Guess the MIME type of a file.
2
3This module defines two useful functions:
4
5guess_type(url, strict=1) -- guess the MIME type and encoding of a URL.
6
7guess_extension(type, strict=1) -- guess the extension for a given MIME type.
8
9It also contains the following, for tuning the behavior:
10
11Data:
12
13knownfiles -- list of files to parse
14inited -- flag set when init() has been called
15suffix_map -- dictionary mapping suffixes to suffixes
16encodings_map -- dictionary mapping suffixes to encodings
17types_map -- dictionary mapping suffixes to types
18
19Functions:
20
21init([files]) -- parse a list of files, default knownfiles
22read_mime_types(file) -- parse one file, return a dictionary or None
23"""
24
25import os
26import posixpath
27import urllib
28
29__all__ = [
30 "guess_type","guess_extension","guess_all_extensions",
31 "add_type","read_mime_types","init"
32]
33
34knownfiles = [
35 "/etc/mime.types",
36 "/usr/local/etc/httpd/conf/mime.types",
37 "/usr/local/lib/netscape/mime.types",
38 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
39 "/usr/local/etc/mime.types", # Apache 1.3
40 ]
41
42inited = False
43
44
45class MimeTypes:
46 """MIME-types datastore.
47
48 This datastore can handle information from mime.types-style files
49 and supports basic determination of MIME type from a filename or
50 URL, and can guess a reasonable extension given a MIME type.
51 """
52
53 def __init__(self, filenames=(), strict=True):
54 if not inited:
55 init()
56 self.encodings_map = encodings_map.copy()
57 self.suffix_map = suffix_map.copy()
58 self.types_map = ({}, {}) # dict for (non-strict, strict)
59 self.types_map_inv = ({}, {})
60 for (ext, type) in types_map.items():
61 self.add_type(type, ext, True)
62 for (ext, type) in common_types.items():
63 self.add_type(type, ext, False)
64 for name in filenames:
65 self.read(name, strict)
66
67 def add_type(self, type, ext, strict=True):
68 """Add a mapping between a type and an extension.
69
70 When the extension is already known, the new
71 type will replace the old one. When the type
72 is already known the extension will be added
73 to the list of known extensions.
74
75 If strict is true, information will be added to
76 list of standard types, else to the list of non-standard
77 types.
78 """
79 self.types_map[strict][ext] = type
80 exts = self.types_map_inv[strict].setdefault(type, [])
81 if ext not in exts:
82 exts.append(ext)
83
84 def guess_type(self, url, strict=True):
85 """Guess the type of a file based on its URL.
86
87 Return value is a tuple (type, encoding) where type is None if
88 the type can't be guessed (no or unknown suffix) or a string
89 of the form type/subtype, usable for a MIME Content-type
90 header; and encoding is None for no encoding or the name of
91 the program used to encode (e.g. compress or gzip). The
92 mappings are table driven. Encoding suffixes are case
93 sensitive; type suffixes are first tried case sensitive, then
94 case insensitive.
95
96 The suffixes .tgz, .taz and .tz (case sensitive!) are all
97 mapped to '.tar.gz'. (This is table-driven too, using the
98 dictionary suffix_map.)
99
100 Optional `strict' argument when False adds a bunch of commonly found,
101 but non-standard types.
102 """
103 scheme, url = urllib.splittype(url)
104 if scheme == 'data':
105 # syntax of data URLs:
106 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
107 # mediatype := [ type "/" subtype ] *( ";" parameter )
108 # data := *urlchar
109 # parameter := attribute "=" value
110 # type/subtype defaults to "text/plain"
111 comma = url.find(',')
112 if comma < 0:
113 # bad data URL
114 return None, None
115 semi = url.find(';', 0, comma)
116 if semi >= 0:
117 type = url[:semi]
118 else:
119 type = url[:comma]
120 if '=' in type or '/' not in type:
121 type = 'text/plain'
122 return type, None # never compressed, so encoding is None
123 base, ext = posixpath.splitext(url)
124 while ext in self.suffix_map:
125 base, ext = posixpath.splitext(base + self.suffix_map[ext])
126 if ext in self.encodings_map:
127 encoding = self.encodings_map[ext]
128 base, ext = posixpath.splitext(base)
129 else:
130 encoding = None
131 types_map = self.types_map[True]
132 if ext in types_map:
133 return types_map[ext], encoding
134 elif ext.lower() in types_map:
135 return types_map[ext.lower()], encoding
136 elif strict:
137 return None, encoding
138 types_map = self.types_map[False]
139 if ext in types_map:
140 return types_map[ext], encoding
141 elif ext.lower() in types_map:
142 return types_map[ext.lower()], encoding
143 else:
144 return None, encoding
145
146 def guess_all_extensions(self, type, strict=True):
147 """Guess the extensions for a file based on its MIME type.
148
149 Return value is a list of strings giving the possible filename
150 extensions, including the leading dot ('.'). The extension is not
151 guaranteed to have been associated with any particular data stream,
152 but would be mapped to the MIME type `type' by guess_type().
153
154 Optional `strict' argument when false adds a bunch of commonly found,
155 but non-standard types.
156 """
157 type = type.lower()
158 extensions = self.types_map_inv[True].get(type, [])
159 if not strict:
160 for ext in self.types_map_inv[False].get(type, []):
161 if ext not in extensions:
162 extensions.append(ext)
163 return extensions
164
165 def guess_extension(self, type, strict=True):
166 """Guess the extension for a file based on its MIME type.
167
168 Return value is a string giving a filename extension,
169 including the leading dot ('.'). The extension is not
170 guaranteed to have been associated with any particular data
171 stream, but would be mapped to the MIME type `type' by
172 guess_type(). If no extension can be guessed for `type', None
173 is returned.
174
175 Optional `strict' argument when false adds a bunch of commonly found,
176 but non-standard types.
177 """
178 extensions = self.guess_all_extensions(type, strict)
179 if not extensions:
180 return None
181 return extensions[0]
182
183 def read(self, filename, strict=True):
184 """
185 Read a single mime.types-format file, specified by pathname.
186
187 If strict is true, information will be added to
188 list of standard types, else to the list of non-standard
189 types.
190 """
191 fp = open(filename)
192 self.readfp(fp, strict)
193 fp.close()
194
195 def readfp(self, fp, strict=True):
196 """
197 Read a single mime.types-format file.
198
199 If strict is true, information will be added to
200 list of standard types, else to the list of non-standard
201 types.
202 """
203 while 1:
204 line = fp.readline()
205 if not line:
206 break
207 words = line.split()
208 for i in range(len(words)):
209 if words[i][0] == '#':
210 del words[i:]
211 break
212 if not words:
213 continue
214 type, suffixes = words[0], words[1:]
215 for suff in suffixes:
216 self.add_type(type, '.' + suff, strict)
217
218def guess_type(url, strict=True):
219 """Guess the type of a file based on its URL.
220
221 Return value is a tuple (type, encoding) where type is None if the
222 type can't be guessed (no or unknown suffix) or a string of the
223 form type/subtype, usable for a MIME Content-type header; and
224 encoding is None for no encoding or the name of the program used
225 to encode (e.g. compress or gzip). The mappings are table
226 driven. Encoding suffixes are case sensitive; type suffixes are
227 first tried case sensitive, then case insensitive.
228
229 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
230 to ".tar.gz". (This is table-driven too, using the dictionary
231 suffix_map).
232
233 Optional `strict' argument when false adds a bunch of commonly found, but
234 non-standard types.
235 """
236 init()
237 return guess_type(url, strict)
238
239
240def guess_all_extensions(type, strict=True):
241 """Guess the extensions for a file based on its MIME type.
242
243 Return value is a list of strings giving the possible filename
244 extensions, including the leading dot ('.'). The extension is not
245 guaranteed to have been associated with any particular data
246 stream, but would be mapped to the MIME type `type' by
247 guess_type(). If no extension can be guessed for `type', None
248 is returned.
249
250 Optional `strict' argument when false adds a bunch of commonly found,
251 but non-standard types.
252 """
253 init()
254 return guess_all_extensions(type, strict)
255
256def guess_extension(type, strict=True):
257 """Guess the extension for a file based on its MIME type.
258
259 Return value is a string giving a filename extension, including the
260 leading dot ('.'). The extension is not guaranteed to have been
261 associated with any particular data stream, but would be mapped to the
262 MIME type `type' by guess_type(). If no extension can be guessed for
263 `type', None is returned.
264
265 Optional `strict' argument when false adds a bunch of commonly found,
266 but non-standard types.
267 """
268 init()
269 return guess_extension(type, strict)
270
271def add_type(type, ext, strict=True):
272 """Add a mapping between a type and an extension.
273
274 When the extension is already known, the new
275 type will replace the old one. When the type
276 is already known the extension will be added
277 to the list of known extensions.
278
279 If strict is true, information will be added to
280 list of standard types, else to the list of non-standard
281 types.
282 """
283 init()
284 return add_type(type, ext, strict)
285
286
287def init(files=None):
288 global guess_all_extensions, guess_extension, guess_type
289 global suffix_map, types_map, encodings_map, common_types
290 global add_type, inited
291 inited = True
292 db = MimeTypes()
293 if files is None:
294 files = knownfiles
295 for file in files:
296 if os.path.isfile(file):
297 db.readfp(open(file))
298 encodings_map = db.encodings_map
299 suffix_map = db.suffix_map
300 types_map = db.types_map[True]
301 guess_all_extensions = db.guess_all_extensions
302 guess_extension = db.guess_extension
303 guess_type = db.guess_type
304 add_type = db.add_type
305 common_types = db.types_map[False]
306
307
308def read_mime_types(file):
309 try:
310 f = open(file)
311 except IOError:
312 return None
313 db = MimeTypes()
314 db.readfp(f, True)
315 return db.types_map[True]
316
317
318suffix_map = {
319 '.tgz': '.tar.gz',
320 '.taz': '.tar.gz',
321 '.tz': '.tar.gz',
322 }
323
324encodings_map = {
325 '.gz': 'gzip',
326 '.Z': 'compress',
327 }
328
329# Before adding new types, make sure they are either registered with IANA, at
330# http://www.isi.edu/in-notes/iana/assignments/media-types
331# or extensions, i.e. using the x- prefix
332
333# If you add to these, please keep them sorted!
334types_map = {
335 '.a' : 'application/octet-stream',
336 '.ai' : 'application/postscript',
337 '.aif' : 'audio/x-aiff',
338 '.aifc' : 'audio/x-aiff',
339 '.aiff' : 'audio/x-aiff',
340 '.au' : 'audio/basic',
341 '.avi' : 'video/x-msvideo',
342 '.bat' : 'text/plain',
343 '.bcpio' : 'application/x-bcpio',
344 '.bin' : 'application/octet-stream',
345 '.bmp' : 'image/x-ms-bmp',
346 '.c' : 'text/plain',
347 # Duplicates :(
348 '.cdf' : 'application/x-cdf',
349 '.cdf' : 'application/x-netcdf',
350 '.cpio' : 'application/x-cpio',
351 '.csh' : 'application/x-csh',
352 '.css' : 'text/css',
353 '.dll' : 'application/octet-stream',
354 '.doc' : 'application/msword',
355 '.dot' : 'application/msword',
356 '.dvi' : 'application/x-dvi',
357 '.eml' : 'message/rfc822',
358 '.eps' : 'application/postscript',
359 '.etx' : 'text/x-setext',
360 '.exe' : 'application/octet-stream',
361 '.gif' : 'image/gif',
362 '.gtar' : 'application/x-gtar',
363 '.h' : 'text/plain',
364 '.hdf' : 'application/x-hdf',
365 '.htm' : 'text/html',
366 '.html' : 'text/html',
367 '.ief' : 'image/ief',
368 '.jpe' : 'image/jpeg',
369 '.jpeg' : 'image/jpeg',
370 '.jpg' : 'image/jpeg',
371 '.js' : 'application/x-javascript',
372 '.ksh' : 'text/plain',
373 '.latex' : 'application/x-latex',
374 '.m1v' : 'video/mpeg',
375 '.man' : 'application/x-troff-man',
376 '.me' : 'application/x-troff-me',
377 '.mht' : 'message/rfc822',
378 '.mhtml' : 'message/rfc822',
379 '.mif' : 'application/x-mif',
380 '.mov' : 'video/quicktime',
381 '.movie' : 'video/x-sgi-movie',
382 '.mp2' : 'audio/mpeg',
383 '.mp3' : 'audio/mpeg',
384 '.mpa' : 'video/mpeg',
385 '.mpe' : 'video/mpeg',
386 '.mpeg' : 'video/mpeg',
387 '.mpg' : 'video/mpeg',
388 '.ms' : 'application/x-troff-ms',
389 '.nc' : 'application/x-netcdf',
390 '.nws' : 'message/rfc822',
391 '.o' : 'application/octet-stream',
392 '.obj' : 'application/octet-stream',
393 '.oda' : 'application/oda',
394 '.p12' : 'application/x-pkcs12',
395 '.p7c' : 'application/pkcs7-mime',
396 '.pbm' : 'image/x-portable-bitmap',
397 '.pdf' : 'application/pdf',
398 '.pfx' : 'application/x-pkcs12',
399 '.pgm' : 'image/x-portable-graymap',
400 '.pl' : 'text/plain',
401 '.png' : 'image/png',
402 '.pnm' : 'image/x-portable-anymap',
403 '.pot' : 'application/vnd.ms-powerpoint',
404 '.ppa' : 'application/vnd.ms-powerpoint',
405 '.ppm' : 'image/x-portable-pixmap',
406 '.pps' : 'application/vnd.ms-powerpoint',
407 '.ppt' : 'application/vnd.ms-powerpoint',
408 '.ps' : 'application/postscript',
409 '.pwz' : 'application/vnd.ms-powerpoint',
410 '.py' : 'text/x-python',
411 '.pyc' : 'application/x-python-code',
412 '.pyo' : 'application/x-python-code',
413 '.qt' : 'video/quicktime',
414 '.ra' : 'audio/x-pn-realaudio',
415 '.ram' : 'application/x-pn-realaudio',
416 '.ras' : 'image/x-cmu-raster',
417 '.rdf' : 'application/xml',
418 '.rgb' : 'image/x-rgb',
419 '.roff' : 'application/x-troff',
420 '.rtx' : 'text/richtext',
421 '.sgm' : 'text/x-sgml',
422 '.sgml' : 'text/x-sgml',
423 '.sh' : 'application/x-sh',
424 '.shar' : 'application/x-shar',
425 '.snd' : 'audio/basic',
426 '.so' : 'application/octet-stream',
427 '.src' : 'application/x-wais-source',
428 '.sv4cpio': 'application/x-sv4cpio',
429 '.sv4crc' : 'application/x-sv4crc',
430 '.swf' : 'application/x-shockwave-flash',
431 '.t' : 'application/x-troff',
432 '.tar' : 'application/x-tar',
433 '.tcl' : 'application/x-tcl',
434 '.tex' : 'application/x-tex',
435 '.texi' : 'application/x-texinfo',
436 '.texinfo': 'application/x-texinfo',
437 '.tif' : 'image/tiff',
438 '.tiff' : 'image/tiff',
439 '.tr' : 'application/x-troff',
440 '.tsv' : 'text/tab-separated-values',
441 '.txt' : 'text/plain',
442 '.ustar' : 'application/x-ustar',
443 '.vcf' : 'text/x-vcard',
444 '.wav' : 'audio/x-wav',
445 '.wiz' : 'application/msword',
446 '.xbm' : 'image/x-xbitmap',
447 '.xlb' : 'application/vnd.ms-excel',
448 # Duplicates :(
449 '.xls' : 'application/excel',
450 '.xls' : 'application/vnd.ms-excel',
451 '.xml' : 'text/xml',
452 '.xpm' : 'image/x-xpixmap',
453 '.xsl' : 'application/xml',
454 '.xwd' : 'image/x-xwindowdump',
455 '.zip' : 'application/zip',
456 }
457
458# These are non-standard types, commonly found in the wild. They will only
459# match if strict=0 flag is given to the API methods.
460
461# Please sort these too
462common_types = {
463 '.jpg' : 'image/jpg',
464 '.mid' : 'audio/midi',
465 '.midi': 'audio/midi',
466 '.pct' : 'image/pict',
467 '.pic' : 'image/pict',
468 '.pict': 'image/pict',
469 '.rtf' : 'application/rtf',
470 '.xul' : 'text/xul'
471 }
472
473
474if __name__ == '__main__':
475 import sys
476 import getopt
477
478 USAGE = """\
479Usage: mimetypes.py [options] type
480
481Options:
482 --help / -h -- print this message and exit
483 --lenient / -l -- additionally search of some common, but non-standard
484 types.
485 --extension / -e -- guess extension instead of type
486
487More than one type argument may be given.
488"""
489
490 def usage(code, msg=''):
491 print USAGE
492 if msg: print msg
493 sys.exit(code)
494
495 try:
496 opts, args = getopt.getopt(sys.argv[1:], 'hle',
497 ['help', 'lenient', 'extension'])
498 except getopt.error, msg:
499 usage(1, msg)
500
501 strict = 1
502 extension = 0
503 for opt, arg in opts:
504 if opt in ('-h', '--help'):
505 usage(0)
506 elif opt in ('-l', '--lenient'):
507 strict = 0
508 elif opt in ('-e', '--extension'):
509 extension = 1
510 for gtype in args:
511 if extension:
512 guess = guess_extension(gtype, strict)
513 if not guess: print "I don't know anything about type", gtype
514 else: print guess
515 else:
516 guess, encoding = guess_type(gtype, strict)
517 if not guess: print "I don't know anything about type", gtype
518 else: print 'type:', guess, 'encoding:', encoding