Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | """Open an arbitrary URL. |
2 | ||
3 | See the following document for more info on URLs: | |
4 | "Names and Addresses, URIs, URLs, URNs, URCs", at | |
5 | http://www.w3.org/pub/WWW/Addressing/Overview.html | |
6 | ||
7 | See also the HTTP spec (from which the error codes are derived): | |
8 | "HTTP - Hypertext Transfer Protocol", at | |
9 | http://www.w3.org/pub/WWW/Protocols/ | |
10 | ||
11 | Related standards and specs: | |
12 | - RFC1808: the "relative URL" spec. (authoritative status) | |
13 | - RFC1738 - the "URL standard". (authoritative status) | |
14 | - RFC1630 - the "URI spec". (informational status) | |
15 | ||
16 | The object returned by URLopener().open(file) will differ per | |
17 | protocol. All you know is that is has methods read(), readline(), | |
18 | readlines(), fileno(), close() and info(). The read*(), fileno() | |
19 | and close() methods work like those of open files. | |
20 | The info() method returns a mimetools.Message object which can be | |
21 | used to query various info about the object, if available. | |
22 | (mimetools.Message objects are queried with the getheader() method.) | |
23 | """ | |
24 | ||
25 | import string | |
26 | import socket | |
27 | import os | |
28 | import time | |
29 | import sys | |
30 | from urlparse import urljoin as basejoin | |
31 | ||
32 | __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve", | |
33 | "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus", | |
34 | "urlencode", "url2pathname", "pathname2url", "splittag", | |
35 | "localhost", "thishost", "ftperrors", "basejoin", "unwrap", | |
36 | "splittype", "splithost", "splituser", "splitpasswd", "splitport", | |
37 | "splitnport", "splitquery", "splitattr", "splitvalue", | |
38 | "splitgophertype", "getproxies"] | |
39 | ||
40 | __version__ = '1.16' # XXX This version is not always updated :-( | |
41 | ||
42 | MAXFTPCACHE = 10 # Trim the ftp cache beyond this size | |
43 | ||
44 | # Helper for non-unix systems | |
45 | if os.name == 'mac': | |
46 | from macurl2path import url2pathname, pathname2url | |
47 | elif os.name == 'nt': | |
48 | from nturl2path import url2pathname, pathname2url | |
49 | elif os.name == 'riscos': | |
50 | from rourl2path import url2pathname, pathname2url | |
51 | else: | |
52 | def url2pathname(pathname): | |
53 | return unquote(pathname) | |
54 | def pathname2url(pathname): | |
55 | return quote(pathname) | |
56 | ||
57 | # This really consists of two pieces: | |
58 | # (1) a class which handles opening of all sorts of URLs | |
59 | # (plus assorted utilities etc.) | |
60 | # (2) a set of functions for parsing URLs | |
61 | # XXX Should these be separated out into different modules? | |
62 | ||
63 | ||
64 | # Shortcut for basic usage | |
65 | _urlopener = None | |
66 | def urlopen(url, data=None, proxies=None): | |
67 | """urlopen(url [, data]) -> open file-like object""" | |
68 | global _urlopener | |
69 | if proxies is not None: | |
70 | opener = FancyURLopener(proxies=proxies) | |
71 | elif not _urlopener: | |
72 | opener = FancyURLopener() | |
73 | _urlopener = opener | |
74 | else: | |
75 | opener = _urlopener | |
76 | if data is None: | |
77 | return opener.open(url) | |
78 | else: | |
79 | return opener.open(url, data) | |
80 | def urlretrieve(url, filename=None, reporthook=None, data=None): | |
81 | global _urlopener | |
82 | if not _urlopener: | |
83 | _urlopener = FancyURLopener() | |
84 | return _urlopener.retrieve(url, filename, reporthook, data) | |
85 | def urlcleanup(): | |
86 | if _urlopener: | |
87 | _urlopener.cleanup() | |
88 | ||
89 | # exception raised when downloaded size does not match content-length | |
90 | class ContentTooShortError(IOError): | |
91 | def __init__(self, message, content): | |
92 | IOError.__init__(self, message) | |
93 | self.content = content | |
94 | ||
95 | ftpcache = {} | |
96 | class URLopener: | |
97 | """Class to open URLs. | |
98 | This is a class rather than just a subroutine because we may need | |
99 | more than one set of global protocol-specific options. | |
100 | Note -- this is a base class for those who don't want the | |
101 | automatic handling of errors type 302 (relocated) and 401 | |
102 | (authorization needed).""" | |
103 | ||
104 | __tempfiles = None | |
105 | ||
106 | version = "Python-urllib/%s" % __version__ | |
107 | ||
108 | # Constructor | |
109 | def __init__(self, proxies=None, **x509): | |
110 | if proxies is None: | |
111 | proxies = getproxies() | |
112 | assert hasattr(proxies, 'has_key'), "proxies must be a mapping" | |
113 | self.proxies = proxies | |
114 | self.key_file = x509.get('key_file') | |
115 | self.cert_file = x509.get('cert_file') | |
116 | self.addheaders = [('User-agent', self.version)] | |
117 | self.__tempfiles = [] | |
118 | self.__unlink = os.unlink # See cleanup() | |
119 | self.tempcache = None | |
120 | # Undocumented feature: if you assign {} to tempcache, | |
121 | # it is used to cache files retrieved with | |
122 | # self.retrieve(). This is not enabled by default | |
123 | # since it does not work for changing documents (and I | |
124 | # haven't got the logic to check expiration headers | |
125 | # yet). | |
126 | self.ftpcache = ftpcache | |
127 | # Undocumented feature: you can use a different | |
128 | # ftp cache by assigning to the .ftpcache member; | |
129 | # in case you want logically independent URL openers | |
130 | # XXX This is not threadsafe. Bah. | |
131 | ||
132 | def __del__(self): | |
133 | self.close() | |
134 | ||
135 | def close(self): | |
136 | self.cleanup() | |
137 | ||
138 | def cleanup(self): | |
139 | # This code sometimes runs when the rest of this module | |
140 | # has already been deleted, so it can't use any globals | |
141 | # or import anything. | |
142 | if self.__tempfiles: | |
143 | for file in self.__tempfiles: | |
144 | try: | |
145 | self.__unlink(file) | |
146 | except OSError: | |
147 | pass | |
148 | del self.__tempfiles[:] | |
149 | if self.tempcache: | |
150 | self.tempcache.clear() | |
151 | ||
152 | def addheader(self, *args): | |
153 | """Add a header to be used by the HTTP interface only | |
154 | e.g. u.addheader('Accept', 'sound/basic')""" | |
155 | self.addheaders.append(args) | |
156 | ||
157 | # External interface | |
158 | def open(self, fullurl, data=None): | |
159 | """Use URLopener().open(file) instead of open(file, 'r').""" | |
160 | fullurl = unwrap(toBytes(fullurl)) | |
161 | if self.tempcache and fullurl in self.tempcache: | |
162 | filename, headers = self.tempcache[fullurl] | |
163 | fp = open(filename, 'rb') | |
164 | return addinfourl(fp, headers, fullurl) | |
165 | urltype, url = splittype(fullurl) | |
166 | if not urltype: | |
167 | urltype = 'file' | |
168 | if urltype in self.proxies: | |
169 | proxy = self.proxies[urltype] | |
170 | urltype, proxyhost = splittype(proxy) | |
171 | host, selector = splithost(proxyhost) | |
172 | url = (host, fullurl) # Signal special case to open_*() | |
173 | else: | |
174 | proxy = None | |
175 | name = 'open_' + urltype | |
176 | self.type = urltype | |
177 | name = name.replace('-', '_') | |
178 | if not hasattr(self, name): | |
179 | if proxy: | |
180 | return self.open_unknown_proxy(proxy, fullurl, data) | |
181 | else: | |
182 | return self.open_unknown(fullurl, data) | |
183 | try: | |
184 | if data is None: | |
185 | return getattr(self, name)(url) | |
186 | else: | |
187 | return getattr(self, name)(url, data) | |
188 | except socket.error, msg: | |
189 | raise IOError, ('socket error', msg), sys.exc_info()[2] | |
190 | ||
191 | def open_unknown(self, fullurl, data=None): | |
192 | """Overridable interface to open unknown URL type.""" | |
193 | type, url = splittype(fullurl) | |
194 | raise IOError, ('url error', 'unknown url type', type) | |
195 | ||
196 | def open_unknown_proxy(self, proxy, fullurl, data=None): | |
197 | """Overridable interface to open unknown URL type.""" | |
198 | type, url = splittype(fullurl) | |
199 | raise IOError, ('url error', 'invalid proxy for %s' % type, proxy) | |
200 | ||
201 | # External interface | |
202 | def retrieve(self, url, filename=None, reporthook=None, data=None): | |
203 | """retrieve(url) returns (filename, headers) for a local object | |
204 | or (tempfilename, headers) for a remote object.""" | |
205 | url = unwrap(toBytes(url)) | |
206 | if self.tempcache and url in self.tempcache: | |
207 | return self.tempcache[url] | |
208 | type, url1 = splittype(url) | |
209 | if filename is None and (not type or type == 'file'): | |
210 | try: | |
211 | fp = self.open_local_file(url1) | |
212 | hdrs = fp.info() | |
213 | del fp | |
214 | return url2pathname(splithost(url1)[1]), hdrs | |
215 | except IOError, msg: | |
216 | pass | |
217 | fp = self.open(url, data) | |
218 | headers = fp.info() | |
219 | if filename: | |
220 | tfp = open(filename, 'wb') | |
221 | else: | |
222 | import tempfile | |
223 | garbage, path = splittype(url) | |
224 | garbage, path = splithost(path or "") | |
225 | path, garbage = splitquery(path or "") | |
226 | path, garbage = splitattr(path or "") | |
227 | suffix = os.path.splitext(path)[1] | |
228 | (fd, filename) = tempfile.mkstemp(suffix) | |
229 | self.__tempfiles.append(filename) | |
230 | tfp = os.fdopen(fd, 'wb') | |
231 | result = filename, headers | |
232 | if self.tempcache is not None: | |
233 | self.tempcache[url] = result | |
234 | bs = 1024*8 | |
235 | size = -1 | |
236 | read = 0 | |
237 | blocknum = 0 | |
238 | if reporthook: | |
239 | if "content-length" in headers: | |
240 | size = int(headers["Content-Length"]) | |
241 | reporthook(blocknum, bs, size) | |
242 | while 1: | |
243 | block = fp.read(bs) | |
244 | if block == "": | |
245 | break | |
246 | read += len(block) | |
247 | tfp.write(block) | |
248 | blocknum += 1 | |
249 | if reporthook: | |
250 | reporthook(blocknum, bs, size) | |
251 | fp.close() | |
252 | tfp.close() | |
253 | del fp | |
254 | del tfp | |
255 | ||
256 | # raise exception if actual size does not match content-length header | |
257 | if size >= 0 and read < size: | |
258 | raise ContentTooShortError("retrieval incomplete: got only %i out " | |
259 | "of %i bytes" % (read, size), result) | |
260 | ||
261 | return result | |
262 | ||
263 | # Each method named open_<type> knows how to open that type of URL | |
264 | ||
265 | def open_http(self, url, data=None): | |
266 | """Use HTTP protocol.""" | |
267 | import httplib | |
268 | user_passwd = None | |
269 | if isinstance(url, str): | |
270 | host, selector = splithost(url) | |
271 | if host: | |
272 | user_passwd, host = splituser(host) | |
273 | host = unquote(host) | |
274 | realhost = host | |
275 | else: | |
276 | host, selector = url | |
277 | urltype, rest = splittype(selector) | |
278 | url = rest | |
279 | user_passwd = None | |
280 | if urltype.lower() != 'http': | |
281 | realhost = None | |
282 | else: | |
283 | realhost, rest = splithost(rest) | |
284 | if realhost: | |
285 | user_passwd, realhost = splituser(realhost) | |
286 | if user_passwd: | |
287 | selector = "%s://%s%s" % (urltype, realhost, rest) | |
288 | if proxy_bypass(realhost): | |
289 | host = realhost | |
290 | ||
291 | #print "proxy via http:", host, selector | |
292 | if not host: raise IOError, ('http error', 'no host given') | |
293 | if user_passwd: | |
294 | import base64 | |
295 | auth = base64.encodestring(user_passwd).strip() | |
296 | else: | |
297 | auth = None | |
298 | h = httplib.HTTP(host) | |
299 | if data is not None: | |
300 | h.putrequest('POST', selector) | |
301 | h.putheader('Content-type', 'application/x-www-form-urlencoded') | |
302 | h.putheader('Content-length', '%d' % len(data)) | |
303 | else: | |
304 | h.putrequest('GET', selector) | |
305 | if auth: h.putheader('Authorization', 'Basic %s' % auth) | |
306 | if realhost: h.putheader('Host', realhost) | |
307 | for args in self.addheaders: h.putheader(*args) | |
308 | h.endheaders() | |
309 | if data is not None: | |
310 | h.send(data) | |
311 | errcode, errmsg, headers = h.getreply() | |
312 | fp = h.getfile() | |
313 | if errcode == 200: | |
314 | return addinfourl(fp, headers, "http:" + url) | |
315 | else: | |
316 | if data is None: | |
317 | return self.http_error(url, fp, errcode, errmsg, headers) | |
318 | else: | |
319 | return self.http_error(url, fp, errcode, errmsg, headers, data) | |
320 | ||
321 | def http_error(self, url, fp, errcode, errmsg, headers, data=None): | |
322 | """Handle http errors. | |
323 | Derived class can override this, or provide specific handlers | |
324 | named http_error_DDD where DDD is the 3-digit error code.""" | |
325 | # First check if there's a specific handler for this error | |
326 | name = 'http_error_%d' % errcode | |
327 | if hasattr(self, name): | |
328 | method = getattr(self, name) | |
329 | if data is None: | |
330 | result = method(url, fp, errcode, errmsg, headers) | |
331 | else: | |
332 | result = method(url, fp, errcode, errmsg, headers, data) | |
333 | if result: return result | |
334 | return self.http_error_default(url, fp, errcode, errmsg, headers) | |
335 | ||
336 | def http_error_default(self, url, fp, errcode, errmsg, headers): | |
337 | """Default error handler: close the connection and raise IOError.""" | |
338 | void = fp.read() | |
339 | fp.close() | |
340 | raise IOError, ('http error', errcode, errmsg, headers) | |
341 | ||
342 | if hasattr(socket, "ssl"): | |
343 | def open_https(self, url, data=None): | |
344 | """Use HTTPS protocol.""" | |
345 | import httplib | |
346 | user_passwd = None | |
347 | if isinstance(url, str): | |
348 | host, selector = splithost(url) | |
349 | if host: | |
350 | user_passwd, host = splituser(host) | |
351 | host = unquote(host) | |
352 | realhost = host | |
353 | else: | |
354 | host, selector = url | |
355 | urltype, rest = splittype(selector) | |
356 | url = rest | |
357 | user_passwd = None | |
358 | if urltype.lower() != 'https': | |
359 | realhost = None | |
360 | else: | |
361 | realhost, rest = splithost(rest) | |
362 | if realhost: | |
363 | user_passwd, realhost = splituser(realhost) | |
364 | if user_passwd: | |
365 | selector = "%s://%s%s" % (urltype, realhost, rest) | |
366 | #print "proxy via https:", host, selector | |
367 | if not host: raise IOError, ('https error', 'no host given') | |
368 | if user_passwd: | |
369 | import base64 | |
370 | auth = base64.encodestring(user_passwd).strip() | |
371 | else: | |
372 | auth = None | |
373 | h = httplib.HTTPS(host, 0, | |
374 | key_file=self.key_file, | |
375 | cert_file=self.cert_file) | |
376 | if data is not None: | |
377 | h.putrequest('POST', selector) | |
378 | h.putheader('Content-type', | |
379 | 'application/x-www-form-urlencoded') | |
380 | h.putheader('Content-length', '%d' % len(data)) | |
381 | else: | |
382 | h.putrequest('GET', selector) | |
383 | if auth: h.putheader('Authorization', 'Basic %s' % auth) | |
384 | if realhost: h.putheader('Host', realhost) | |
385 | for args in self.addheaders: h.putheader(*args) | |
386 | h.endheaders() | |
387 | if data is not None: | |
388 | h.send(data) | |
389 | errcode, errmsg, headers = h.getreply() | |
390 | fp = h.getfile() | |
391 | if errcode == 200: | |
392 | return addinfourl(fp, headers, "https:" + url) | |
393 | else: | |
394 | if data is None: | |
395 | return self.http_error(url, fp, errcode, errmsg, headers) | |
396 | else: | |
397 | return self.http_error(url, fp, errcode, errmsg, headers, | |
398 | data) | |
399 | ||
400 | def open_gopher(self, url): | |
401 | """Use Gopher protocol.""" | |
402 | import gopherlib | |
403 | host, selector = splithost(url) | |
404 | if not host: raise IOError, ('gopher error', 'no host given') | |
405 | host = unquote(host) | |
406 | type, selector = splitgophertype(selector) | |
407 | selector, query = splitquery(selector) | |
408 | selector = unquote(selector) | |
409 | if query: | |
410 | query = unquote(query) | |
411 | fp = gopherlib.send_query(selector, query, host) | |
412 | else: | |
413 | fp = gopherlib.send_selector(selector, host) | |
414 | return addinfourl(fp, noheaders(), "gopher:" + url) | |
415 | ||
416 | def open_file(self, url): | |
417 | """Use local file or FTP depending on form of URL.""" | |
418 | if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': | |
419 | return self.open_ftp(url) | |
420 | else: | |
421 | return self.open_local_file(url) | |
422 | ||
423 | def open_local_file(self, url): | |
424 | """Use local file.""" | |
425 | import mimetypes, mimetools, email.Utils | |
426 | try: | |
427 | from cStringIO import StringIO | |
428 | except ImportError: | |
429 | from StringIO import StringIO | |
430 | host, file = splithost(url) | |
431 | localname = url2pathname(file) | |
432 | try: | |
433 | stats = os.stat(localname) | |
434 | except OSError, e: | |
435 | raise IOError(e.errno, e.strerror, e.filename) | |
436 | size = stats.st_size | |
437 | modified = email.Utils.formatdate(stats.st_mtime, usegmt=True) | |
438 | mtype = mimetypes.guess_type(url)[0] | |
439 | headers = mimetools.Message(StringIO( | |
440 | 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % | |
441 | (mtype or 'text/plain', size, modified))) | |
442 | if not host: | |
443 | urlfile = file | |
444 | if file[:1] == '/': | |
445 | urlfile = 'file://' + file | |
446 | return addinfourl(open(localname, 'rb'), | |
447 | headers, urlfile) | |
448 | host, port = splitport(host) | |
449 | if not port \ | |
450 | and socket.gethostbyname(host) in (localhost(), thishost()): | |
451 | urlfile = file | |
452 | if file[:1] == '/': | |
453 | urlfile = 'file://' + file | |
454 | return addinfourl(open(localname, 'rb'), | |
455 | headers, urlfile) | |
456 | raise IOError, ('local file error', 'not on local host') | |
457 | ||
458 | def open_ftp(self, url): | |
459 | """Use FTP protocol.""" | |
460 | import mimetypes, mimetools | |
461 | try: | |
462 | from cStringIO import StringIO | |
463 | except ImportError: | |
464 | from StringIO import StringIO | |
465 | host, path = splithost(url) | |
466 | if not host: raise IOError, ('ftp error', 'no host given') | |
467 | host, port = splitport(host) | |
468 | user, host = splituser(host) | |
469 | if user: user, passwd = splitpasswd(user) | |
470 | else: passwd = None | |
471 | host = unquote(host) | |
472 | user = unquote(user or '') | |
473 | passwd = unquote(passwd or '') | |
474 | host = socket.gethostbyname(host) | |
475 | if not port: | |
476 | import ftplib | |
477 | port = ftplib.FTP_PORT | |
478 | else: | |
479 | port = int(port) | |
480 | path, attrs = splitattr(path) | |
481 | path = unquote(path) | |
482 | dirs = path.split('/') | |
483 | dirs, file = dirs[:-1], dirs[-1] | |
484 | if dirs and not dirs[0]: dirs = dirs[1:] | |
485 | if dirs and not dirs[0]: dirs[0] = '/' | |
486 | key = user, host, port, '/'.join(dirs) | |
487 | # XXX thread unsafe! | |
488 | if len(self.ftpcache) > MAXFTPCACHE: | |
489 | # Prune the cache, rather arbitrarily | |
490 | for k in self.ftpcache.keys(): | |
491 | if k != key: | |
492 | v = self.ftpcache[k] | |
493 | del self.ftpcache[k] | |
494 | v.close() | |
495 | try: | |
496 | if not key in self.ftpcache: | |
497 | self.ftpcache[key] = \ | |
498 | ftpwrapper(user, passwd, host, port, dirs) | |
499 | if not file: type = 'D' | |
500 | else: type = 'I' | |
501 | for attr in attrs: | |
502 | attr, value = splitvalue(attr) | |
503 | if attr.lower() == 'type' and \ | |
504 | value in ('a', 'A', 'i', 'I', 'd', 'D'): | |
505 | type = value.upper() | |
506 | (fp, retrlen) = self.ftpcache[key].retrfile(file, type) | |
507 | mtype = mimetypes.guess_type("ftp:" + url)[0] | |
508 | headers = "" | |
509 | if mtype: | |
510 | headers += "Content-Type: %s\n" % mtype | |
511 | if retrlen is not None and retrlen >= 0: | |
512 | headers += "Content-Length: %d\n" % retrlen | |
513 | headers = mimetools.Message(StringIO(headers)) | |
514 | return addinfourl(fp, headers, "ftp:" + url) | |
515 | except ftperrors(), msg: | |
516 | raise IOError, ('ftp error', msg), sys.exc_info()[2] | |
517 | ||
518 | def open_data(self, url, data=None): | |
519 | """Use "data" URL.""" | |
520 | # ignore POSTed data | |
521 | # | |
522 | # syntax of data URLs: | |
523 | # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data | |
524 | # mediatype := [ type "/" subtype ] *( ";" parameter ) | |
525 | # data := *urlchar | |
526 | # parameter := attribute "=" value | |
527 | import mimetools | |
528 | try: | |
529 | from cStringIO import StringIO | |
530 | except ImportError: | |
531 | from StringIO import StringIO | |
532 | try: | |
533 | [type, data] = url.split(',', 1) | |
534 | except ValueError: | |
535 | raise IOError, ('data error', 'bad data URL') | |
536 | if not type: | |
537 | type = 'text/plain;charset=US-ASCII' | |
538 | semi = type.rfind(';') | |
539 | if semi >= 0 and '=' not in type[semi:]: | |
540 | encoding = type[semi+1:] | |
541 | type = type[:semi] | |
542 | else: | |
543 | encoding = '' | |
544 | msg = [] | |
545 | msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT', | |
546 | time.gmtime(time.time()))) | |
547 | msg.append('Content-type: %s' % type) | |
548 | if encoding == 'base64': | |
549 | import base64 | |
550 | data = base64.decodestring(data) | |
551 | else: | |
552 | data = unquote(data) | |
553 | msg.append('Content-length: %d' % len(data)) | |
554 | msg.append('') | |
555 | msg.append(data) | |
556 | msg = '\n'.join(msg) | |
557 | f = StringIO(msg) | |
558 | headers = mimetools.Message(f, 0) | |
559 | f.fileno = None # needed for addinfourl | |
560 | return addinfourl(f, headers, url) | |
561 | ||
562 | ||
563 | class FancyURLopener(URLopener): | |
564 | """Derived class with handlers for errors we can handle (perhaps).""" | |
565 | ||
566 | def __init__(self, *args, **kwargs): | |
567 | URLopener.__init__(self, *args, **kwargs) | |
568 | self.auth_cache = {} | |
569 | self.tries = 0 | |
570 | self.maxtries = 10 | |
571 | ||
572 | def http_error_default(self, url, fp, errcode, errmsg, headers): | |
573 | """Default error handling -- don't raise an exception.""" | |
574 | return addinfourl(fp, headers, "http:" + url) | |
575 | ||
576 | def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): | |
577 | """Error 302 -- relocated (temporarily).""" | |
578 | self.tries += 1 | |
579 | if self.maxtries and self.tries >= self.maxtries: | |
580 | if hasattr(self, "http_error_500"): | |
581 | meth = self.http_error_500 | |
582 | else: | |
583 | meth = self.http_error_default | |
584 | self.tries = 0 | |
585 | return meth(url, fp, 500, | |
586 | "Internal Server Error: Redirect Recursion", headers) | |
587 | result = self.redirect_internal(url, fp, errcode, errmsg, headers, | |
588 | data) | |
589 | self.tries = 0 | |
590 | return result | |
591 | ||
592 | def redirect_internal(self, url, fp, errcode, errmsg, headers, data): | |
593 | if 'location' in headers: | |
594 | newurl = headers['location'] | |
595 | elif 'uri' in headers: | |
596 | newurl = headers['uri'] | |
597 | else: | |
598 | return | |
599 | void = fp.read() | |
600 | fp.close() | |
601 | # In case the server sent a relative URL, join with original: | |
602 | newurl = basejoin(self.type + ":" + url, newurl) | |
603 | return self.open(newurl) | |
604 | ||
605 | def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): | |
606 | """Error 301 -- also relocated (permanently).""" | |
607 | return self.http_error_302(url, fp, errcode, errmsg, headers, data) | |
608 | ||
609 | def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): | |
610 | """Error 303 -- also relocated (essentially identical to 302).""" | |
611 | return self.http_error_302(url, fp, errcode, errmsg, headers, data) | |
612 | ||
613 | def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): | |
614 | """Error 307 -- relocated, but turn POST into error.""" | |
615 | if data is None: | |
616 | return self.http_error_302(url, fp, errcode, errmsg, headers, data) | |
617 | else: | |
618 | return self.http_error_default(url, fp, errcode, errmsg, headers) | |
619 | ||
620 | def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): | |
621 | """Error 401 -- authentication required. | |
622 | See this URL for a description of the basic authentication scheme: | |
623 | http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt""" | |
624 | if not 'www-authenticate' in headers: | |
625 | URLopener.http_error_default(self, url, fp, | |
626 | errcode, errmsg, headers) | |
627 | stuff = headers['www-authenticate'] | |
628 | import re | |
629 | match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) | |
630 | if not match: | |
631 | URLopener.http_error_default(self, url, fp, | |
632 | errcode, errmsg, headers) | |
633 | scheme, realm = match.groups() | |
634 | if scheme.lower() != 'basic': | |
635 | URLopener.http_error_default(self, url, fp, | |
636 | errcode, errmsg, headers) | |
637 | name = 'retry_' + self.type + '_basic_auth' | |
638 | if data is None: | |
639 | return getattr(self,name)(url, realm) | |
640 | else: | |
641 | return getattr(self,name)(url, realm, data) | |
642 | ||
643 | def retry_http_basic_auth(self, url, realm, data=None): | |
644 | host, selector = splithost(url) | |
645 | i = host.find('@') + 1 | |
646 | host = host[i:] | |
647 | user, passwd = self.get_user_passwd(host, realm, i) | |
648 | if not (user or passwd): return None | |
649 | host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host | |
650 | newurl = 'http://' + host + selector | |
651 | if data is None: | |
652 | return self.open(newurl) | |
653 | else: | |
654 | return self.open(newurl, data) | |
655 | ||
656 | def retry_https_basic_auth(self, url, realm, data=None): | |
657 | host, selector = splithost(url) | |
658 | i = host.find('@') + 1 | |
659 | host = host[i:] | |
660 | user, passwd = self.get_user_passwd(host, realm, i) | |
661 | if not (user or passwd): return None | |
662 | host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host | |
663 | newurl = '//' + host + selector | |
664 | return self.open_https(newurl, data) | |
665 | ||
666 | def get_user_passwd(self, host, realm, clear_cache = 0): | |
667 | key = realm + '@' + host.lower() | |
668 | if key in self.auth_cache: | |
669 | if clear_cache: | |
670 | del self.auth_cache[key] | |
671 | else: | |
672 | return self.auth_cache[key] | |
673 | user, passwd = self.prompt_user_passwd(host, realm) | |
674 | if user or passwd: self.auth_cache[key] = (user, passwd) | |
675 | return user, passwd | |
676 | ||
677 | def prompt_user_passwd(self, host, realm): | |
678 | """Override this in a GUI environment!""" | |
679 | import getpass | |
680 | try: | |
681 | user = raw_input("Enter username for %s at %s: " % (realm, | |
682 | host)) | |
683 | passwd = getpass.getpass("Enter password for %s in %s at %s: " % | |
684 | (user, realm, host)) | |
685 | return user, passwd | |
686 | except KeyboardInterrupt: | |
687 | ||
688 | return None, None | |
689 | ||
690 | ||
691 | # Utility functions | |
692 | ||
693 | _localhost = None | |
694 | def localhost(): | |
695 | """Return the IP address of the magic hostname 'localhost'.""" | |
696 | global _localhost | |
697 | if _localhost is None: | |
698 | _localhost = socket.gethostbyname('localhost') | |
699 | return _localhost | |
700 | ||
701 | _thishost = None | |
702 | def thishost(): | |
703 | """Return the IP address of the current host.""" | |
704 | global _thishost | |
705 | if _thishost is None: | |
706 | _thishost = socket.gethostbyname(socket.gethostname()) | |
707 | return _thishost | |
708 | ||
709 | _ftperrors = None | |
710 | def ftperrors(): | |
711 | """Return the set of errors raised by the FTP class.""" | |
712 | global _ftperrors | |
713 | if _ftperrors is None: | |
714 | import ftplib | |
715 | _ftperrors = ftplib.all_errors | |
716 | return _ftperrors | |
717 | ||
718 | _noheaders = None | |
719 | def noheaders(): | |
720 | """Return an empty mimetools.Message object.""" | |
721 | global _noheaders | |
722 | if _noheaders is None: | |
723 | import mimetools | |
724 | try: | |
725 | from cStringIO import StringIO | |
726 | except ImportError: | |
727 | from StringIO import StringIO | |
728 | _noheaders = mimetools.Message(StringIO(), 0) | |
729 | _noheaders.fp.close() # Recycle file descriptor | |
730 | return _noheaders | |
731 | ||
732 | ||
733 | # Utility classes | |
734 | ||
735 | class ftpwrapper: | |
736 | """Class used by open_ftp() for cache of open FTP connections.""" | |
737 | ||
738 | def __init__(self, user, passwd, host, port, dirs): | |
739 | self.user = user | |
740 | self.passwd = passwd | |
741 | self.host = host | |
742 | self.port = port | |
743 | self.dirs = dirs | |
744 | self.init() | |
745 | ||
746 | def init(self): | |
747 | import ftplib | |
748 | self.busy = 0 | |
749 | self.ftp = ftplib.FTP() | |
750 | self.ftp.connect(self.host, self.port) | |
751 | self.ftp.login(self.user, self.passwd) | |
752 | for dir in self.dirs: | |
753 | self.ftp.cwd(dir) | |
754 | ||
755 | def retrfile(self, file, type): | |
756 | import ftplib | |
757 | self.endtransfer() | |
758 | if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 | |
759 | else: cmd = 'TYPE ' + type; isdir = 0 | |
760 | try: | |
761 | self.ftp.voidcmd(cmd) | |
762 | except ftplib.all_errors: | |
763 | self.init() | |
764 | self.ftp.voidcmd(cmd) | |
765 | conn = None | |
766 | if file and not isdir: | |
767 | # Use nlst to see if the file exists at all | |
768 | try: | |
769 | self.ftp.nlst(file) | |
770 | except ftplib.error_perm, reason: | |
771 | raise IOError, ('ftp error', reason), sys.exc_info()[2] | |
772 | # Restore the transfer mode! | |
773 | self.ftp.voidcmd(cmd) | |
774 | # Try to retrieve as a file | |
775 | try: | |
776 | cmd = 'RETR ' + file | |
777 | conn = self.ftp.ntransfercmd(cmd) | |
778 | except ftplib.error_perm, reason: | |
779 | if str(reason)[:3] != '550': | |
780 | raise IOError, ('ftp error', reason), sys.exc_info()[2] | |
781 | if not conn: | |
782 | # Set transfer mode to ASCII! | |
783 | self.ftp.voidcmd('TYPE A') | |
784 | # Try a directory listing | |
785 | if file: cmd = 'LIST ' + file | |
786 | else: cmd = 'LIST' | |
787 | conn = self.ftp.ntransfercmd(cmd) | |
788 | self.busy = 1 | |
789 | # Pass back both a suitably decorated object and a retrieval length | |
790 | return (addclosehook(conn[0].makefile('rb'), | |
791 | self.endtransfer), conn[1]) | |
792 | def endtransfer(self): | |
793 | if not self.busy: | |
794 | return | |
795 | self.busy = 0 | |
796 | try: | |
797 | self.ftp.voidresp() | |
798 | except ftperrors(): | |
799 | pass | |
800 | ||
801 | def close(self): | |
802 | self.endtransfer() | |
803 | try: | |
804 | self.ftp.close() | |
805 | except ftperrors(): | |
806 | pass | |
807 | ||
808 | class addbase: | |
809 | """Base class for addinfo and addclosehook.""" | |
810 | ||
811 | def __init__(self, fp): | |
812 | self.fp = fp | |
813 | self.read = self.fp.read | |
814 | self.readline = self.fp.readline | |
815 | if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines | |
816 | if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno | |
817 | if hasattr(self.fp, "__iter__"): | |
818 | self.__iter__ = self.fp.__iter__ | |
819 | if hasattr(self.fp, "next"): | |
820 | self.next = self.fp.next | |
821 | ||
822 | def __repr__(self): | |
823 | return '<%s at %r whose fp = %r>' % (self.__class__.__name__, | |
824 | id(self), self.fp) | |
825 | ||
826 | def close(self): | |
827 | self.read = None | |
828 | self.readline = None | |
829 | self.readlines = None | |
830 | self.fileno = None | |
831 | if self.fp: self.fp.close() | |
832 | self.fp = None | |
833 | ||
834 | class addclosehook(addbase): | |
835 | """Class to add a close hook to an open file.""" | |
836 | ||
837 | def __init__(self, fp, closehook, *hookargs): | |
838 | addbase.__init__(self, fp) | |
839 | self.closehook = closehook | |
840 | self.hookargs = hookargs | |
841 | ||
842 | def close(self): | |
843 | addbase.close(self) | |
844 | if self.closehook: | |
845 | self.closehook(*self.hookargs) | |
846 | self.closehook = None | |
847 | self.hookargs = None | |
848 | ||
849 | class addinfo(addbase): | |
850 | """class to add an info() method to an open file.""" | |
851 | ||
852 | def __init__(self, fp, headers): | |
853 | addbase.__init__(self, fp) | |
854 | self.headers = headers | |
855 | ||
856 | def info(self): | |
857 | return self.headers | |
858 | ||
859 | class addinfourl(addbase): | |
860 | """class to add info() and geturl() methods to an open file.""" | |
861 | ||
862 | def __init__(self, fp, headers, url): | |
863 | addbase.__init__(self, fp) | |
864 | self.headers = headers | |
865 | self.url = url | |
866 | ||
867 | def info(self): | |
868 | return self.headers | |
869 | ||
870 | def geturl(self): | |
871 | return self.url | |
872 | ||
873 | ||
874 | # Utilities to parse URLs (most of these return None for missing parts): | |
875 | # unwrap('<URL:type://host/path>') --> 'type://host/path' | |
876 | # splittype('type:opaquestring') --> 'type', 'opaquestring' | |
877 | # splithost('//host[:port]/path') --> 'host[:port]', '/path' | |
878 | # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' | |
879 | # splitpasswd('user:passwd') -> 'user', 'passwd' | |
880 | # splitport('host:port') --> 'host', 'port' | |
881 | # splitquery('/path?query') --> '/path', 'query' | |
882 | # splittag('/path#tag') --> '/path', 'tag' | |
883 | # splitattr('/path;attr1=value1;attr2=value2;...') -> | |
884 | # '/path', ['attr1=value1', 'attr2=value2', ...] | |
885 | # splitvalue('attr=value') --> 'attr', 'value' | |
886 | # splitgophertype('/Xselector') --> 'X', 'selector' | |
887 | # unquote('abc%20def') -> 'abc def' | |
888 | # quote('abc def') -> 'abc%20def') | |
889 | ||
890 | try: | |
891 | unicode | |
892 | except NameError: | |
893 | def _is_unicode(x): | |
894 | return 0 | |
895 | else: | |
896 | def _is_unicode(x): | |
897 | return isinstance(x, unicode) | |
898 | ||
899 | def toBytes(url): | |
900 | """toBytes(u"URL") --> 'URL'.""" | |
901 | # Most URL schemes require ASCII. If that changes, the conversion | |
902 | # can be relaxed | |
903 | if _is_unicode(url): | |
904 | try: | |
905 | url = url.encode("ASCII") | |
906 | except UnicodeError: | |
907 | raise UnicodeError("URL " + repr(url) + | |
908 | " contains non-ASCII characters") | |
909 | return url | |
910 | ||
911 | def unwrap(url): | |
912 | """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" | |
913 | url = url.strip() | |
914 | if url[:1] == '<' and url[-1:] == '>': | |
915 | url = url[1:-1].strip() | |
916 | if url[:4] == 'URL:': url = url[4:].strip() | |
917 | return url | |
918 | ||
919 | _typeprog = None | |
920 | def splittype(url): | |
921 | """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" | |
922 | global _typeprog | |
923 | if _typeprog is None: | |
924 | import re | |
925 | _typeprog = re.compile('^([^/:]+):') | |
926 | ||
927 | match = _typeprog.match(url) | |
928 | if match: | |
929 | scheme = match.group(1) | |
930 | return scheme.lower(), url[len(scheme) + 1:] | |
931 | return None, url | |
932 | ||
933 | _hostprog = None | |
934 | def splithost(url): | |
935 | """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" | |
936 | global _hostprog | |
937 | if _hostprog is None: | |
938 | import re | |
939 | _hostprog = re.compile('^//([^/]*)(.*)$') | |
940 | ||
941 | match = _hostprog.match(url) | |
942 | if match: return match.group(1, 2) | |
943 | return None, url | |
944 | ||
945 | _userprog = None | |
946 | def splituser(host): | |
947 | """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" | |
948 | global _userprog | |
949 | if _userprog is None: | |
950 | import re | |
951 | _userprog = re.compile('^(.*)@(.*)$') | |
952 | ||
953 | match = _userprog.match(host) | |
954 | if match: return map(unquote, match.group(1, 2)) | |
955 | return None, host | |
956 | ||
957 | _passwdprog = None | |
958 | def splitpasswd(user): | |
959 | """splitpasswd('user:passwd') -> 'user', 'passwd'.""" | |
960 | global _passwdprog | |
961 | if _passwdprog is None: | |
962 | import re | |
963 | _passwdprog = re.compile('^([^:]*):(.*)$') | |
964 | ||
965 | match = _passwdprog.match(user) | |
966 | if match: return match.group(1, 2) | |
967 | return user, None | |
968 | ||
969 | # splittag('/path#tag') --> '/path', 'tag' | |
970 | _portprog = None | |
971 | def splitport(host): | |
972 | """splitport('host:port') --> 'host', 'port'.""" | |
973 | global _portprog | |
974 | if _portprog is None: | |
975 | import re | |
976 | _portprog = re.compile('^(.*):([0-9]+)$') | |
977 | ||
978 | match = _portprog.match(host) | |
979 | if match: return match.group(1, 2) | |
980 | return host, None | |
981 | ||
982 | _nportprog = None | |
983 | def splitnport(host, defport=-1): | |
984 | """Split host and port, returning numeric port. | |
985 | Return given default port if no ':' found; defaults to -1. | |
986 | Return numerical port if a valid number are found after ':'. | |
987 | Return None if ':' but not a valid number.""" | |
988 | global _nportprog | |
989 | if _nportprog is None: | |
990 | import re | |
991 | _nportprog = re.compile('^(.*):(.*)$') | |
992 | ||
993 | match = _nportprog.match(host) | |
994 | if match: | |
995 | host, port = match.group(1, 2) | |
996 | try: | |
997 | if not port: raise ValueError, "no digits" | |
998 | nport = int(port) | |
999 | except ValueError: | |
1000 | nport = None | |
1001 | return host, nport | |
1002 | return host, defport | |
1003 | ||
1004 | _queryprog = None | |
1005 | def splitquery(url): | |
1006 | """splitquery('/path?query') --> '/path', 'query'.""" | |
1007 | global _queryprog | |
1008 | if _queryprog is None: | |
1009 | import re | |
1010 | _queryprog = re.compile('^(.*)\?([^?]*)$') | |
1011 | ||
1012 | match = _queryprog.match(url) | |
1013 | if match: return match.group(1, 2) | |
1014 | return url, None | |
1015 | ||
1016 | _tagprog = None | |
1017 | def splittag(url): | |
1018 | """splittag('/path#tag') --> '/path', 'tag'.""" | |
1019 | global _tagprog | |
1020 | if _tagprog is None: | |
1021 | import re | |
1022 | _tagprog = re.compile('^(.*)#([^#]*)$') | |
1023 | ||
1024 | match = _tagprog.match(url) | |
1025 | if match: return match.group(1, 2) | |
1026 | return url, None | |
1027 | ||
1028 | def splitattr(url): | |
1029 | """splitattr('/path;attr1=value1;attr2=value2;...') -> | |
1030 | '/path', ['attr1=value1', 'attr2=value2', ...].""" | |
1031 | words = url.split(';') | |
1032 | return words[0], words[1:] | |
1033 | ||
1034 | _valueprog = None | |
1035 | def splitvalue(attr): | |
1036 | """splitvalue('attr=value') --> 'attr', 'value'.""" | |
1037 | global _valueprog | |
1038 | if _valueprog is None: | |
1039 | import re | |
1040 | _valueprog = re.compile('^([^=]*)=(.*)$') | |
1041 | ||
1042 | match = _valueprog.match(attr) | |
1043 | if match: return match.group(1, 2) | |
1044 | return attr, None | |
1045 | ||
1046 | def splitgophertype(selector): | |
1047 | """splitgophertype('/Xselector') --> 'X', 'selector'.""" | |
1048 | if selector[:1] == '/' and selector[1:2]: | |
1049 | return selector[1], selector[2:] | |
1050 | return None, selector | |
1051 | ||
1052 | _hextochr = dict(('%02x' % i, chr(i)) for i in range(256)) | |
1053 | _hextochr.update(('%02X' % i, chr(i)) for i in range(256)) | |
1054 | ||
1055 | def unquote(s): | |
1056 | """unquote('abc%20def') -> 'abc def'.""" | |
1057 | res = s.split('%') | |
1058 | for i in xrange(1, len(res)): | |
1059 | item = res[i] | |
1060 | try: | |
1061 | res[i] = _hextochr[item[:2]] + item[2:] | |
1062 | except KeyError: | |
1063 | res[i] = '%' + item | |
1064 | return "".join(res) | |
1065 | ||
1066 | def unquote_plus(s): | |
1067 | """unquote('%7e/abc+def') -> '~/abc def'""" | |
1068 | s = s.replace('+', ' ') | |
1069 | return unquote(s) | |
1070 | ||
1071 | always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
1072 | 'abcdefghijklmnopqrstuvwxyz' | |
1073 | '0123456789' '_.-') | |
1074 | _safemaps = {} | |
1075 | ||
1076 | def quote(s, safe = '/'): | |
1077 | """quote('abc def') -> 'abc%20def' | |
1078 | ||
1079 | Each part of a URL, e.g. the path info, the query, etc., has a | |
1080 | different set of reserved characters that must be quoted. | |
1081 | ||
1082 | RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists | |
1083 | the following reserved characters. | |
1084 | ||
1085 | reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | | |
1086 | "$" | "," | |
1087 | ||
1088 | Each of these characters is reserved in some component of a URL, | |
1089 | but not necessarily in all of them. | |
1090 | ||
1091 | By default, the quote function is intended for quoting the path | |
1092 | section of a URL. Thus, it will not encode '/'. This character | |
1093 | is reserved, but in typical usage the quote function is being | |
1094 | called on a path where the existing slash characters are used as | |
1095 | reserved characters. | |
1096 | """ | |
1097 | cachekey = (safe, always_safe) | |
1098 | try: | |
1099 | safe_map = _safemaps[cachekey] | |
1100 | except KeyError: | |
1101 | safe += always_safe | |
1102 | safe_map = {} | |
1103 | for i in range(256): | |
1104 | c = chr(i) | |
1105 | safe_map[c] = (c in safe) and c or ('%%%02X' % i) | |
1106 | _safemaps[cachekey] = safe_map | |
1107 | res = map(safe_map.__getitem__, s) | |
1108 | return ''.join(res) | |
1109 | ||
1110 | def quote_plus(s, safe = ''): | |
1111 | """Quote the query fragment of a URL; replacing ' ' with '+'""" | |
1112 | if ' ' in s: | |
1113 | s = quote(s, safe + ' ') | |
1114 | return s.replace(' ', '+') | |
1115 | return quote(s, safe) | |
1116 | ||
1117 | def urlencode(query,doseq=0): | |
1118 | """Encode a sequence of two-element tuples or dictionary into a URL query string. | |
1119 | ||
1120 | If any values in the query arg are sequences and doseq is true, each | |
1121 | sequence element is converted to a separate parameter. | |
1122 | ||
1123 | If the query arg is a sequence of two-element tuples, the order of the | |
1124 | parameters in the output will match the order of parameters in the | |
1125 | input. | |
1126 | """ | |
1127 | ||
1128 | if hasattr(query,"items"): | |
1129 | # mapping objects | |
1130 | query = query.items() | |
1131 | else: | |
1132 | # it's a bother at times that strings and string-like objects are | |
1133 | # sequences... | |
1134 | try: | |
1135 | # non-sequence items should not work with len() | |
1136 | # non-empty strings will fail this | |
1137 | if len(query) and not isinstance(query[0], tuple): | |
1138 | raise TypeError | |
1139 | # zero-length sequences of all types will get here and succeed, | |
1140 | # but that's a minor nit - since the original implementation | |
1141 | # allowed empty dicts that type of behavior probably should be | |
1142 | # preserved for consistency | |
1143 | except TypeError: | |
1144 | ty,va,tb = sys.exc_info() | |
1145 | raise TypeError, "not a valid non-string sequence or mapping object", tb | |
1146 | ||
1147 | l = [] | |
1148 | if not doseq: | |
1149 | # preserve old behavior | |
1150 | for k, v in query: | |
1151 | k = quote_plus(str(k)) | |
1152 | v = quote_plus(str(v)) | |
1153 | l.append(k + '=' + v) | |
1154 | else: | |
1155 | for k, v in query: | |
1156 | k = quote_plus(str(k)) | |
1157 | if isinstance(v, str): | |
1158 | v = quote_plus(v) | |
1159 | l.append(k + '=' + v) | |
1160 | elif _is_unicode(v): | |
1161 | # is there a reasonable way to convert to ASCII? | |
1162 | # encode generates a string, but "replace" or "ignore" | |
1163 | # lose information and "strict" can raise UnicodeError | |
1164 | v = quote_plus(v.encode("ASCII","replace")) | |
1165 | l.append(k + '=' + v) | |
1166 | else: | |
1167 | try: | |
1168 | # is this a sufficient test for sequence-ness? | |
1169 | x = len(v) | |
1170 | except TypeError: | |
1171 | # not a sequence | |
1172 | v = quote_plus(str(v)) | |
1173 | l.append(k + '=' + v) | |
1174 | else: | |
1175 | # loop over the sequence | |
1176 | for elt in v: | |
1177 | l.append(k + '=' + quote_plus(str(elt))) | |
1178 | return '&'.join(l) | |
1179 | ||
1180 | # Proxy handling | |
1181 | def getproxies_environment(): | |
1182 | """Return a dictionary of scheme -> proxy server URL mappings. | |
1183 | ||
1184 | Scan the environment for variables named <scheme>_proxy; | |
1185 | this seems to be the standard convention. If you need a | |
1186 | different way, you can pass a proxies dictionary to the | |
1187 | [Fancy]URLopener constructor. | |
1188 | ||
1189 | """ | |
1190 | proxies = {} | |
1191 | for name, value in os.environ.items(): | |
1192 | name = name.lower() | |
1193 | if value and name[-6:] == '_proxy': | |
1194 | proxies[name[:-6]] = value | |
1195 | return proxies | |
1196 | ||
1197 | if sys.platform == 'darwin': | |
1198 | def getproxies_internetconfig(): | |
1199 | """Return a dictionary of scheme -> proxy server URL mappings. | |
1200 | ||
1201 | By convention the mac uses Internet Config to store | |
1202 | proxies. An HTTP proxy, for instance, is stored under | |
1203 | the HttpProxy key. | |
1204 | ||
1205 | """ | |
1206 | try: | |
1207 | import ic | |
1208 | except ImportError: | |
1209 | return {} | |
1210 | ||
1211 | try: | |
1212 | config = ic.IC() | |
1213 | except ic.error: | |
1214 | return {} | |
1215 | proxies = {} | |
1216 | # HTTP: | |
1217 | if 'UseHTTPProxy' in config and config['UseHTTPProxy']: | |
1218 | try: | |
1219 | value = config['HTTPProxyHost'] | |
1220 | except ic.error: | |
1221 | pass | |
1222 | else: | |
1223 | proxies['http'] = 'http://%s' % value | |
1224 | # FTP: XXXX To be done. | |
1225 | # Gopher: XXXX To be done. | |
1226 | return proxies | |
1227 | ||
1228 | def proxy_bypass(x): | |
1229 | return 0 | |
1230 | ||
1231 | def getproxies(): | |
1232 | return getproxies_environment() or getproxies_internetconfig() | |
1233 | ||
1234 | elif os.name == 'nt': | |
1235 | def getproxies_registry(): | |
1236 | """Return a dictionary of scheme -> proxy server URL mappings. | |
1237 | ||
1238 | Win32 uses the registry to store proxies. | |
1239 | ||
1240 | """ | |
1241 | proxies = {} | |
1242 | try: | |
1243 | import _winreg | |
1244 | except ImportError: | |
1245 | # Std module, so should be around - but you never know! | |
1246 | return proxies | |
1247 | try: | |
1248 | internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, | |
1249 | r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') | |
1250 | proxyEnable = _winreg.QueryValueEx(internetSettings, | |
1251 | 'ProxyEnable')[0] | |
1252 | if proxyEnable: | |
1253 | # Returned as Unicode but problems if not converted to ASCII | |
1254 | proxyServer = str(_winreg.QueryValueEx(internetSettings, | |
1255 | 'ProxyServer')[0]) | |
1256 | if '=' in proxyServer: | |
1257 | # Per-protocol settings | |
1258 | for p in proxyServer.split(';'): | |
1259 | protocol, address = p.split('=', 1) | |
1260 | # See if address has a type:// prefix | |
1261 | import re | |
1262 | if not re.match('^([^/:]+)://', address): | |
1263 | address = '%s://%s' % (protocol, address) | |
1264 | proxies[protocol] = address | |
1265 | else: | |
1266 | # Use one setting for all protocols | |
1267 | if proxyServer[:5] == 'http:': | |
1268 | proxies['http'] = proxyServer | |
1269 | else: | |
1270 | proxies['http'] = 'http://%s' % proxyServer | |
1271 | proxies['ftp'] = 'ftp://%s' % proxyServer | |
1272 | internetSettings.Close() | |
1273 | except (WindowsError, ValueError, TypeError): | |
1274 | # Either registry key not found etc, or the value in an | |
1275 | # unexpected format. | |
1276 | # proxies already set up to be empty so nothing to do | |
1277 | pass | |
1278 | return proxies | |
1279 | ||
1280 | def getproxies(): | |
1281 | """Return a dictionary of scheme -> proxy server URL mappings. | |
1282 | ||
1283 | Returns settings gathered from the environment, if specified, | |
1284 | or the registry. | |
1285 | ||
1286 | """ | |
1287 | return getproxies_environment() or getproxies_registry() | |
1288 | ||
1289 | def proxy_bypass(host): | |
1290 | try: | |
1291 | import _winreg | |
1292 | import re | |
1293 | except ImportError: | |
1294 | # Std modules, so should be around - but you never know! | |
1295 | return 0 | |
1296 | try: | |
1297 | internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, | |
1298 | r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') | |
1299 | proxyEnable = _winreg.QueryValueEx(internetSettings, | |
1300 | 'ProxyEnable')[0] | |
1301 | proxyOverride = str(_winreg.QueryValueEx(internetSettings, | |
1302 | 'ProxyOverride')[0]) | |
1303 | # ^^^^ Returned as Unicode but problems if not converted to ASCII | |
1304 | except WindowsError: | |
1305 | return 0 | |
1306 | if not proxyEnable or not proxyOverride: | |
1307 | return 0 | |
1308 | # try to make a host list from name and IP address. | |
1309 | host = [host] | |
1310 | try: | |
1311 | addr = socket.gethostbyname(host[0]) | |
1312 | if addr != host: | |
1313 | host.append(addr) | |
1314 | except socket.error: | |
1315 | pass | |
1316 | # make a check value list from the registry entry: replace the | |
1317 | # '<local>' string by the localhost entry and the corresponding | |
1318 | # canonical entry. | |
1319 | proxyOverride = proxyOverride.split(';') | |
1320 | i = 0 | |
1321 | while i < len(proxyOverride): | |
1322 | if proxyOverride[i] == '<local>': | |
1323 | proxyOverride[i:i+1] = ['localhost', | |
1324 | '127.0.0.1', | |
1325 | socket.gethostname(), | |
1326 | socket.gethostbyname( | |
1327 | socket.gethostname())] | |
1328 | i += 1 | |
1329 | # print proxyOverride | |
1330 | # now check if we match one of the registry values. | |
1331 | for test in proxyOverride: | |
1332 | test = test.replace(".", r"\.") # mask dots | |
1333 | test = test.replace("*", r".*") # change glob sequence | |
1334 | test = test.replace("?", r".") # change glob char | |
1335 | for val in host: | |
1336 | # print "%s <--> %s" %( test, val ) | |
1337 | if re.match(test, val, re.I): | |
1338 | return 1 | |
1339 | return 0 | |
1340 | ||
1341 | else: | |
1342 | # By default use environment variables | |
1343 | getproxies = getproxies_environment | |
1344 | ||
1345 | def proxy_bypass(host): | |
1346 | return 0 | |
1347 | ||
1348 | # Test and time quote() and unquote() | |
1349 | def test1(): | |
1350 | s = '' | |
1351 | for i in range(256): s = s + chr(i) | |
1352 | s = s*4 | |
1353 | t0 = time.time() | |
1354 | qs = quote(s) | |
1355 | uqs = unquote(qs) | |
1356 | t1 = time.time() | |
1357 | if uqs != s: | |
1358 | print 'Wrong!' | |
1359 | print repr(s) | |
1360 | print repr(qs) | |
1361 | print repr(uqs) | |
1362 | print round(t1 - t0, 3), 'sec' | |
1363 | ||
1364 | ||
1365 | def reporthook(blocknum, blocksize, totalsize): | |
1366 | # Report during remote transfers | |
1367 | print "Block number: %d, Block size: %d, Total size: %d" % ( | |
1368 | blocknum, blocksize, totalsize) | |
1369 | ||
1370 | # Test program | |
1371 | def test(args=[]): | |
1372 | if not args: | |
1373 | args = [ | |
1374 | '/etc/passwd', | |
1375 | 'file:/etc/passwd', | |
1376 | 'file://localhost/etc/passwd', | |
1377 | 'ftp://ftp.python.org/pub/python/README', | |
1378 | ## 'gopher://gopher.micro.umn.edu/1/', | |
1379 | 'http://www.python.org/index.html', | |
1380 | ] | |
1381 | if hasattr(URLopener, "open_https"): | |
1382 | args.append('https://synergy.as.cmu.edu/~geek/') | |
1383 | try: | |
1384 | for url in args: | |
1385 | print '-'*10, url, '-'*10 | |
1386 | fn, h = urlretrieve(url, None, reporthook) | |
1387 | print fn | |
1388 | if h: | |
1389 | print '======' | |
1390 | for k in h.keys(): print k + ':', h[k] | |
1391 | print '======' | |
1392 | fp = open(fn, 'rb') | |
1393 | data = fp.read() | |
1394 | del fp | |
1395 | if '\r' in data: | |
1396 | table = string.maketrans("", "") | |
1397 | data = data.translate(table, "\r") | |
1398 | print data | |
1399 | fn, h = None, None | |
1400 | print '-'*40 | |
1401 | finally: | |
1402 | urlcleanup() | |
1403 | ||
1404 | def main(): | |
1405 | import getopt, sys | |
1406 | try: | |
1407 | opts, args = getopt.getopt(sys.argv[1:], "th") | |
1408 | except getopt.error, msg: | |
1409 | print msg | |
1410 | print "Use -h for help" | |
1411 | return | |
1412 | t = 0 | |
1413 | for o, a in opts: | |
1414 | if o == '-t': | |
1415 | t = t + 1 | |
1416 | if o == '-h': | |
1417 | print "Usage: python urllib.py [-t] [url ...]" | |
1418 | print "-t runs self-test;", | |
1419 | print "otherwise, contents of urls are printed" | |
1420 | return | |
1421 | if t: | |
1422 | if t > 1: | |
1423 | test1() | |
1424 | test(args) | |
1425 | else: | |
1426 | if not args: | |
1427 | print "Use -h for help" | |
1428 | for url in args: | |
1429 | print urlopen(url).read(), | |
1430 | ||
1431 | # Run test program when run as a script | |
1432 | if __name__ == '__main__': | |
1433 | main() |