Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / devtools / v9 / lib / python2.4 / urllib2.py
CommitLineData
920dae64
AT
1"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below). It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work. Each Handler implements a particular protocol or
10option. The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL. For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns. The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15deals with digest authentication.
16
17urlopen(url, data=None) -- basic usage is that same as original
18urllib. pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back. One difference is that you can also pass
20a Request instance instead of URL. Raises a URLError (subclass of
21IOError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- function that creates a new OpenerDirector instance.
25will install the default handlers. accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate. if one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- installs a new opener as the default opener.
31
32objects of interest:
33OpenerDirector --
34
35Request -- an object that encapsulates the state of a request. the
36state can be a simple as the URL. it can also include extra HTTP
37headers, e.g. a User-Agent.
38
39BaseHandler --
40
41exceptions:
42URLError-- a subclass of IOError, individual protocols have their own
43specific subclass
44
45HTTPError-- also a valid HTTP response, so you can treat an HTTP error
46as an exceptional event or valid response
47
48internals:
49BaseHandler and parent
50_call_chain conventions
51
52Example usage:
53
54import urllib2
55
56# set up authentication info
57authinfo = urllib2.HTTPBasicAuthHandler()
58authinfo.add_password('realm', 'host', 'username', 'password')
59
60proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
61
62# build a new opener that adds authentication and caching FTP handlers
63opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
64
65# install it
66urllib2.install_opener(opener)
67
68f = urllib2.urlopen('http://www.python.org/')
69
70
71"""
72
73# XXX issues:
74# If an authentication error handler that tries to perform
75# authentication for some reason but fails, how should the error be
76# signalled? The client needs to know the HTTP error code. But if
77# the handler knows that the problem was, e.g., that it didn't know
78# that hash algo that requested in the challenge, it would be good to
79# pass that information along to the client, too.
80
81# XXX to do:
82# name!
83# documentation (getting there)
84# complex proxies
85# abstract factory for opener
86# ftp errors aren't handled cleanly
87# gopher can return a socket.error
88# check digest against correct (i.e. non-apache) implementation
89
90import base64
91import ftplib
92import gopherlib
93import httplib
94import inspect
95import md5
96import mimetypes
97import mimetools
98import os
99import posixpath
100import random
101import re
102import sha
103import socket
104import sys
105import time
106import urlparse
107import bisect
108import cookielib
109
110try:
111 from cStringIO import StringIO
112except ImportError:
113 from StringIO import StringIO
114
115# not sure how many of these need to be gotten rid of
116from urllib import (unwrap, unquote, splittype, splithost,
117 addinfourl, splitport, splitgophertype, splitquery,
118 splitattr, ftpwrapper, noheaders, splituser, splitpasswd, splitvalue)
119
120# support for FileHandler, proxies via environment variables
121from urllib import localhost, url2pathname, getproxies
122
123__version__ = "2.4"
124
125_opener = None
126def urlopen(url, data=None):
127 global _opener
128 if _opener is None:
129 _opener = build_opener()
130 return _opener.open(url, data)
131
132def install_opener(opener):
133 global _opener
134 _opener = opener
135
136# do these error classes make sense?
137# make sure all of the IOError stuff is overridden. we just want to be
138# subtypes.
139
140class URLError(IOError):
141 # URLError is a sub-type of IOError, but it doesn't share any of
142 # the implementation. need to override __init__ and __str__.
143 # It sets self.args for compatibility with other EnvironmentError
144 # subclasses, but args doesn't have the typical format with errno in
145 # slot 0 and strerror in slot 1. This may be better than nothing.
146 def __init__(self, reason):
147 self.args = reason,
148 self.reason = reason
149
150 def __str__(self):
151 return '<urlopen error %s>' % self.reason
152
153class HTTPError(URLError, addinfourl):
154 """Raised when HTTP error occurs, but also acts like non-error return"""
155 __super_init = addinfourl.__init__
156
157 def __init__(self, url, code, msg, hdrs, fp):
158 self.code = code
159 self.msg = msg
160 self.hdrs = hdrs
161 self.fp = fp
162 self.filename = url
163 # The addinfourl classes depend on fp being a valid file
164 # object. In some cases, the HTTPError may not have a valid
165 # file object. If this happens, the simplest workaround is to
166 # not initialize the base classes.
167 if fp is not None:
168 self.__super_init(fp, hdrs, url)
169
170 def __str__(self):
171 return 'HTTP Error %s: %s' % (self.code, self.msg)
172
173class GopherError(URLError):
174 pass
175
176
177class Request:
178
179 def __init__(self, url, data=None, headers={},
180 origin_req_host=None, unverifiable=False):
181 # unwrap('<URL:type://host/path>') --> 'type://host/path'
182 self.__original = unwrap(url)
183 self.type = None
184 # self.__r_type is what's left after doing the splittype
185 self.host = None
186 self.port = None
187 self.data = data
188 self.headers = {}
189 for key, value in headers.items():
190 self.add_header(key, value)
191 self.unredirected_hdrs = {}
192 if origin_req_host is None:
193 origin_req_host = cookielib.request_host(self)
194 self.origin_req_host = origin_req_host
195 self.unverifiable = unverifiable
196
197 def __getattr__(self, attr):
198 # XXX this is a fallback mechanism to guard against these
199 # methods getting called in a non-standard order. this may be
200 # too complicated and/or unnecessary.
201 # XXX should the __r_XXX attributes be public?
202 if attr[:12] == '_Request__r_':
203 name = attr[12:]
204 if hasattr(Request, 'get_' + name):
205 getattr(self, 'get_' + name)()
206 return getattr(self, attr)
207 raise AttributeError, attr
208
209 def get_method(self):
210 if self.has_data():
211 return "POST"
212 else:
213 return "GET"
214
215 # XXX these helper methods are lame
216
217 def add_data(self, data):
218 self.data = data
219
220 def has_data(self):
221 return self.data is not None
222
223 def get_data(self):
224 return self.data
225
226 def get_full_url(self):
227 return self.__original
228
229 def get_type(self):
230 if self.type is None:
231 self.type, self.__r_type = splittype(self.__original)
232 if self.type is None:
233 raise ValueError, "unknown url type: %s" % self.__original
234 return self.type
235
236 def get_host(self):
237 if self.host is None:
238 self.host, self.__r_host = splithost(self.__r_type)
239 if self.host:
240 self.host = unquote(self.host)
241 return self.host
242
243 def get_selector(self):
244 return self.__r_host
245
246 def set_proxy(self, host, type):
247 self.host, self.type = host, type
248 self.__r_host = self.__original
249
250 def get_origin_req_host(self):
251 return self.origin_req_host
252
253 def is_unverifiable(self):
254 return self.unverifiable
255
256 def add_header(self, key, val):
257 # useful for something like authentication
258 self.headers[key.capitalize()] = val
259
260 def add_unredirected_header(self, key, val):
261 # will not be added to a redirected request
262 self.unredirected_hdrs[key.capitalize()] = val
263
264 def has_header(self, header_name):
265 return (header_name in self.headers or
266 header_name in self.unredirected_hdrs)
267
268 def get_header(self, header_name, default=None):
269 return self.headers.get(
270 header_name,
271 self.unredirected_hdrs.get(header_name, default))
272
273 def header_items(self):
274 hdrs = self.unredirected_hdrs.copy()
275 hdrs.update(self.headers)
276 return hdrs.items()
277
278class OpenerDirector:
279 def __init__(self):
280 server_version = "Python-urllib/%s" % __version__
281 self.addheaders = [('User-agent', server_version)]
282 # manage the individual handlers
283 self.handlers = []
284 self.handle_open = {}
285 self.handle_error = {}
286 self.process_response = {}
287 self.process_request = {}
288
289 def add_handler(self, handler):
290 added = False
291 for meth in dir(handler):
292 i = meth.find("_")
293 protocol = meth[:i]
294 condition = meth[i+1:]
295
296 if condition.startswith("error"):
297 j = condition.find("_") + i + 1
298 kind = meth[j+1:]
299 try:
300 kind = int(kind)
301 except ValueError:
302 pass
303 lookup = self.handle_error.get(protocol, {})
304 self.handle_error[protocol] = lookup
305 elif condition == "open":
306 kind = protocol
307 lookup = getattr(self, "handle_"+condition)
308 elif condition in ["response", "request"]:
309 kind = protocol
310 lookup = getattr(self, "process_"+condition)
311 else:
312 continue
313
314 handlers = lookup.setdefault(kind, [])
315 if handlers:
316 bisect.insort(handlers, handler)
317 else:
318 handlers.append(handler)
319 added = True
320
321 if added:
322 # XXX why does self.handlers need to be sorted?
323 bisect.insort(self.handlers, handler)
324 handler.add_parent(self)
325
326 def close(self):
327 # Only exists for backwards compatibility.
328 pass
329
330 def _call_chain(self, chain, kind, meth_name, *args):
331 # XXX raise an exception if no one else should try to handle
332 # this url. return None if you can't but someone else could.
333 handlers = chain.get(kind, ())
334 for handler in handlers:
335 func = getattr(handler, meth_name)
336
337 result = func(*args)
338 if result is not None:
339 return result
340
341 def open(self, fullurl, data=None):
342 # accept a URL or a Request object
343 if isinstance(fullurl, basestring):
344 req = Request(fullurl, data)
345 else:
346 req = fullurl
347 if data is not None:
348 req.add_data(data)
349
350 protocol = req.get_type()
351
352 # pre-process request
353 meth_name = protocol+"_request"
354 for processor in self.process_request.get(protocol, []):
355 meth = getattr(processor, meth_name)
356 req = meth(req)
357
358 response = self._open(req, data)
359
360 # post-process response
361 meth_name = protocol+"_response"
362 for processor in self.process_response.get(protocol, []):
363 meth = getattr(processor, meth_name)
364 response = meth(req, response)
365
366 return response
367
368 def _open(self, req, data=None):
369 result = self._call_chain(self.handle_open, 'default',
370 'default_open', req)
371 if result:
372 return result
373
374 protocol = req.get_type()
375 result = self._call_chain(self.handle_open, protocol, protocol +
376 '_open', req)
377 if result:
378 return result
379
380 return self._call_chain(self.handle_open, 'unknown',
381 'unknown_open', req)
382
383 def error(self, proto, *args):
384 if proto in ['http', 'https']:
385 # XXX http[s] protocols are special-cased
386 dict = self.handle_error['http'] # https is not different than http
387 proto = args[2] # YUCK!
388 meth_name = 'http_error_%s' % proto
389 http_err = 1
390 orig_args = args
391 else:
392 dict = self.handle_error
393 meth_name = proto + '_error'
394 http_err = 0
395 args = (dict, proto, meth_name) + args
396 result = self._call_chain(*args)
397 if result:
398 return result
399
400 if http_err:
401 args = (dict, 'default', 'http_error_default') + orig_args
402 return self._call_chain(*args)
403
404# XXX probably also want an abstract factory that knows when it makes
405# sense to skip a superclass in favor of a subclass and when it might
406# make sense to include both
407
408def build_opener(*handlers):
409 """Create an opener object from a list of handlers.
410
411 The opener will use several default handlers, including support
412 for HTTP and FTP.
413
414 If any of the handlers passed as arguments are subclasses of the
415 default handlers, the default handlers will not be used.
416 """
417
418 opener = OpenerDirector()
419 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
420 HTTPDefaultErrorHandler, HTTPRedirectHandler,
421 FTPHandler, FileHandler, HTTPErrorProcessor]
422 if hasattr(httplib, 'HTTPS'):
423 default_classes.append(HTTPSHandler)
424 skip = []
425 for klass in default_classes:
426 for check in handlers:
427 if inspect.isclass(check):
428 if issubclass(check, klass):
429 skip.append(klass)
430 elif isinstance(check, klass):
431 skip.append(klass)
432 for klass in skip:
433 default_classes.remove(klass)
434
435 for klass in default_classes:
436 opener.add_handler(klass())
437
438 for h in handlers:
439 if inspect.isclass(h):
440 h = h()
441 opener.add_handler(h)
442 return opener
443
444class BaseHandler:
445 handler_order = 500
446
447 def add_parent(self, parent):
448 self.parent = parent
449
450 def close(self):
451 # Only exists for backwards compatibility
452 pass
453
454 def __lt__(self, other):
455 if not hasattr(other, "handler_order"):
456 # Try to preserve the old behavior of having custom classes
457 # inserted after default ones (works only for custom user
458 # classes which are not aware of handler_order).
459 return True
460 return self.handler_order < other.handler_order
461
462
463class HTTPErrorProcessor(BaseHandler):
464 """Process HTTP error responses."""
465 handler_order = 1000 # after all other processing
466
467 def http_response(self, request, response):
468 code, msg, hdrs = response.code, response.msg, response.info()
469
470 if code not in (200, 206):
471 response = self.parent.error(
472 'http', request, response, code, msg, hdrs)
473
474 return response
475
476 https_response = http_response
477
478class HTTPDefaultErrorHandler(BaseHandler):
479 def http_error_default(self, req, fp, code, msg, hdrs):
480 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
481
482class HTTPRedirectHandler(BaseHandler):
483 # maximum number of redirections to any single URL
484 # this is needed because of the state that cookies introduce
485 max_repeats = 4
486 # maximum total number of redirections (regardless of URL) before
487 # assuming we're in a loop
488 max_redirections = 10
489
490 def redirect_request(self, req, fp, code, msg, headers, newurl):
491 """Return a Request or None in response to a redirect.
492
493 This is called by the http_error_30x methods when a
494 redirection response is received. If a redirection should
495 take place, return a new Request to allow http_error_30x to
496 perform the redirect. Otherwise, raise HTTPError if no-one
497 else should try to handle this url. Return None if you can't
498 but another Handler might.
499 """
500 m = req.get_method()
501 if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
502 or code in (301, 302, 303) and m == "POST"):
503 # Strictly (according to RFC 2616), 301 or 302 in response
504 # to a POST MUST NOT cause a redirection without confirmation
505 # from the user (of urllib2, in this case). In practice,
506 # essentially all clients do redirect in this case, so we
507 # do the same.
508 return Request(newurl,
509 headers=req.headers,
510 origin_req_host=req.get_origin_req_host(),
511 unverifiable=True)
512 else:
513 raise HTTPError(req.get_full_url(), code, msg, headers, fp)
514
515 # Implementation note: To avoid the server sending us into an
516 # infinite loop, the request object needs to track what URLs we
517 # have already seen. Do this by adding a handler-specific
518 # attribute to the Request object.
519 def http_error_302(self, req, fp, code, msg, headers):
520 # Some servers (incorrectly) return multiple Location headers
521 # (so probably same goes for URI). Use first header.
522 if 'location' in headers:
523 newurl = headers.getheaders('location')[0]
524 elif 'uri' in headers:
525 newurl = headers.getheaders('uri')[0]
526 else:
527 return
528 newurl = urlparse.urljoin(req.get_full_url(), newurl)
529
530 # XXX Probably want to forget about the state of the current
531 # request, although that might interact poorly with other
532 # handlers that also use handler-specific request attributes
533 new = self.redirect_request(req, fp, code, msg, headers, newurl)
534 if new is None:
535 return
536
537 # loop detection
538 # .redirect_dict has a key url if url was previously visited.
539 if hasattr(req, 'redirect_dict'):
540 visited = new.redirect_dict = req.redirect_dict
541 if (visited.get(newurl, 0) >= self.max_repeats or
542 len(visited) >= self.max_redirections):
543 raise HTTPError(req.get_full_url(), code,
544 self.inf_msg + msg, headers, fp)
545 else:
546 visited = new.redirect_dict = req.redirect_dict = {}
547 visited[newurl] = visited.get(newurl, 0) + 1
548
549 # Don't close the fp until we are sure that we won't use it
550 # with HTTPError.
551 fp.read()
552 fp.close()
553
554 return self.parent.open(new)
555
556 http_error_301 = http_error_303 = http_error_307 = http_error_302
557
558 inf_msg = "The HTTP server returned a redirect error that would " \
559 "lead to an infinite loop.\n" \
560 "The last 30x error message was:\n"
561
562class ProxyHandler(BaseHandler):
563 # Proxies must be in front
564 handler_order = 100
565
566 def __init__(self, proxies=None):
567 if proxies is None:
568 proxies = getproxies()
569 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
570 self.proxies = proxies
571 for type, url in proxies.items():
572 setattr(self, '%s_open' % type,
573 lambda r, proxy=url, type=type, meth=self.proxy_open: \
574 meth(r, proxy, type))
575
576 def proxy_open(self, req, proxy, type):
577 orig_type = req.get_type()
578 type, r_type = splittype(proxy)
579 host, XXX = splithost(r_type)
580 if '@' in host:
581 user_pass, host = host.split('@', 1)
582 if ':' in user_pass:
583 user, password = user_pass.split(':', 1)
584 user_pass = base64.encodestring('%s:%s' % (unquote(user),
585 unquote(password))).strip()
586 req.add_header('Proxy-authorization', 'Basic ' + user_pass)
587 host = unquote(host)
588 req.set_proxy(host, type)
589 if orig_type == type:
590 # let other handlers take care of it
591 # XXX this only makes sense if the proxy is before the
592 # other handlers
593 return None
594 else:
595 # need to start over, because the other handlers don't
596 # grok the proxy's URL type
597 return self.parent.open(req)
598
599# feature suggested by Duncan Booth
600# XXX custom is not a good name
601class CustomProxy:
602 # either pass a function to the constructor or override handle
603 def __init__(self, proto, func=None, proxy_addr=None):
604 self.proto = proto
605 self.func = func
606 self.addr = proxy_addr
607
608 def handle(self, req):
609 if self.func and self.func(req):
610 return 1
611
612 def get_proxy(self):
613 return self.addr
614
615class CustomProxyHandler(BaseHandler):
616 # Proxies must be in front
617 handler_order = 100
618
619 def __init__(self, *proxies):
620 self.proxies = {}
621
622 def proxy_open(self, req):
623 proto = req.get_type()
624 try:
625 proxies = self.proxies[proto]
626 except KeyError:
627 return None
628 for p in proxies:
629 if p.handle(req):
630 req.set_proxy(p.get_proxy())
631 return self.parent.open(req)
632 return None
633
634 def do_proxy(self, p, req):
635 return self.parent.open(req)
636
637 def add_proxy(self, cpo):
638 if cpo.proto in self.proxies:
639 self.proxies[cpo.proto].append(cpo)
640 else:
641 self.proxies[cpo.proto] = [cpo]
642
643class HTTPPasswordMgr:
644 def __init__(self):
645 self.passwd = {}
646
647 def add_password(self, realm, uri, user, passwd):
648 # uri could be a single URI or a sequence
649 if isinstance(uri, basestring):
650 uri = [uri]
651 uri = tuple(map(self.reduce_uri, uri))
652 if not realm in self.passwd:
653 self.passwd[realm] = {}
654 self.passwd[realm][uri] = (user, passwd)
655
656 def find_user_password(self, realm, authuri):
657 domains = self.passwd.get(realm, {})
658 authuri = self.reduce_uri(authuri)
659 for uris, authinfo in domains.iteritems():
660 for uri in uris:
661 if self.is_suburi(uri, authuri):
662 return authinfo
663 return None, None
664
665 def reduce_uri(self, uri):
666 """Accept netloc or URI and extract only the netloc and path"""
667 parts = urlparse.urlparse(uri)
668 if parts[1]:
669 return parts[1], parts[2] or '/'
670 else:
671 return parts[2], '/'
672
673 def is_suburi(self, base, test):
674 """Check if test is below base in a URI tree
675
676 Both args must be URIs in reduced form.
677 """
678 if base == test:
679 return True
680 if base[0] != test[0]:
681 return False
682 common = posixpath.commonprefix((base[1], test[1]))
683 if len(common) == len(base[1]):
684 return True
685 return False
686
687
688class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
689
690 def find_user_password(self, realm, authuri):
691 user, password = HTTPPasswordMgr.find_user_password(self, realm,
692 authuri)
693 if user is not None:
694 return user, password
695 return HTTPPasswordMgr.find_user_password(self, None, authuri)
696
697
698class AbstractBasicAuthHandler:
699
700 rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
701
702 # XXX there can actually be multiple auth-schemes in a
703 # www-authenticate header. should probably be a lot more careful
704 # in parsing them to extract multiple alternatives
705
706 def __init__(self, password_mgr=None):
707 if password_mgr is None:
708 password_mgr = HTTPPasswordMgr()
709 self.passwd = password_mgr
710 self.add_password = self.passwd.add_password
711
712 def http_error_auth_reqed(self, authreq, host, req, headers):
713 # XXX could be multiple headers
714 authreq = headers.get(authreq, None)
715 if authreq:
716 mo = AbstractBasicAuthHandler.rx.search(authreq)
717 if mo:
718 scheme, realm = mo.groups()
719 if scheme.lower() == 'basic':
720 return self.retry_http_basic_auth(host, req, realm)
721
722 def retry_http_basic_auth(self, host, req, realm):
723 user,pw = self.passwd.find_user_password(realm, host)
724 if pw is not None:
725 raw = "%s:%s" % (user, pw)
726 auth = 'Basic %s' % base64.encodestring(raw).strip()
727 if req.headers.get(self.auth_header, None) == auth:
728 return None
729 req.add_header(self.auth_header, auth)
730 return self.parent.open(req)
731 else:
732 return None
733
734class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
735
736 auth_header = 'Authorization'
737
738 def http_error_401(self, req, fp, code, msg, headers):
739 host = urlparse.urlparse(req.get_full_url())[1]
740 return self.http_error_auth_reqed('www-authenticate',
741 host, req, headers)
742
743
744class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
745
746 auth_header = 'Proxy-authorization'
747
748 def http_error_407(self, req, fp, code, msg, headers):
749 host = req.get_host()
750 return self.http_error_auth_reqed('proxy-authenticate',
751 host, req, headers)
752
753
754def randombytes(n):
755 """Return n random bytes."""
756 # Use /dev/urandom if it is available. Fall back to random module
757 # if not. It might be worthwhile to extend this function to use
758 # other platform-specific mechanisms for getting random bytes.
759 if os.path.exists("/dev/urandom"):
760 f = open("/dev/urandom")
761 s = f.read(n)
762 f.close()
763 return s
764 else:
765 L = [chr(random.randrange(0, 256)) for i in range(n)]
766 return "".join(L)
767
768class AbstractDigestAuthHandler:
769 # Digest authentication is specified in RFC 2617.
770
771 # XXX The client does not inspect the Authentication-Info header
772 # in a successful response.
773
774 # XXX It should be possible to test this implementation against
775 # a mock server that just generates a static set of challenges.
776
777 # XXX qop="auth-int" supports is shaky
778
779 def __init__(self, passwd=None):
780 if passwd is None:
781 passwd = HTTPPasswordMgr()
782 self.passwd = passwd
783 self.add_password = self.passwd.add_password
784 self.retried = 0
785 self.nonce_count = 0
786
787 def reset_retry_count(self):
788 self.retried = 0
789
790 def http_error_auth_reqed(self, auth_header, host, req, headers):
791 authreq = headers.get(auth_header, None)
792 if self.retried > 5:
793 # Don't fail endlessly - if we failed once, we'll probably
794 # fail a second time. Hm. Unless the Password Manager is
795 # prompting for the information. Crap. This isn't great
796 # but it's better than the current 'repeat until recursion
797 # depth exceeded' approach <wink>
798 raise HTTPError(req.get_full_url(), 401, "digest auth failed",
799 headers, None)
800 else:
801 self.retried += 1
802 if authreq:
803 scheme = authreq.split()[0]
804 if scheme.lower() == 'digest':
805 return self.retry_http_digest_auth(req, authreq)
806 else:
807 raise ValueError("AbstractDigestAuthHandler doesn't know "
808 "about %s"%(scheme))
809
810 def retry_http_digest_auth(self, req, auth):
811 token, challenge = auth.split(' ', 1)
812 chal = parse_keqv_list(parse_http_list(challenge))
813 auth = self.get_authorization(req, chal)
814 if auth:
815 auth_val = 'Digest %s' % auth
816 if req.headers.get(self.auth_header, None) == auth_val:
817 return None
818 req.add_header(self.auth_header, auth_val)
819 resp = self.parent.open(req)
820 return resp
821
822 def get_cnonce(self, nonce):
823 # The cnonce-value is an opaque
824 # quoted string value provided by the client and used by both client
825 # and server to avoid chosen plaintext attacks, to provide mutual
826 # authentication, and to provide some message integrity protection.
827 # This isn't a fabulous effort, but it's probably Good Enough.
828 dig = sha.new("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
829 randombytes(8))).hexdigest()
830 return dig[:16]
831
832 def get_authorization(self, req, chal):
833 try:
834 realm = chal['realm']
835 nonce = chal['nonce']
836 qop = chal.get('qop')
837 algorithm = chal.get('algorithm', 'MD5')
838 # mod_digest doesn't send an opaque, even though it isn't
839 # supposed to be optional
840 opaque = chal.get('opaque', None)
841 except KeyError:
842 return None
843
844 H, KD = self.get_algorithm_impls(algorithm)
845 if H is None:
846 return None
847
848 user, pw = self.passwd.find_user_password(realm, req.get_full_url())
849 if user is None:
850 return None
851
852 # XXX not implemented yet
853 if req.has_data():
854 entdig = self.get_entity_digest(req.get_data(), chal)
855 else:
856 entdig = None
857
858 A1 = "%s:%s:%s" % (user, realm, pw)
859 A2 = "%s:%s" % (req.get_method(),
860 # XXX selector: what about proxies and full urls
861 req.get_selector())
862 if qop == 'auth':
863 self.nonce_count += 1
864 ncvalue = '%08x' % self.nonce_count
865 cnonce = self.get_cnonce(nonce)
866 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
867 respdig = KD(H(A1), noncebit)
868 elif qop is None:
869 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
870 else:
871 # XXX handle auth-int.
872 pass
873
874 # XXX should the partial digests be encoded too?
875
876 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
877 'response="%s"' % (user, realm, nonce, req.get_selector(),
878 respdig)
879 if opaque:
880 base = base + ', opaque="%s"' % opaque
881 if entdig:
882 base = base + ', digest="%s"' % entdig
883 if algorithm != 'MD5':
884 base = base + ', algorithm="%s"' % algorithm
885 if qop:
886 base = base + ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
887 return base
888
889 def get_algorithm_impls(self, algorithm):
890 # lambdas assume digest modules are imported at the top level
891 if algorithm == 'MD5':
892 H = lambda x: md5.new(x).hexdigest()
893 elif algorithm == 'SHA':
894 H = lambda x: sha.new(x).hexdigest()
895 # XXX MD5-sess
896 KD = lambda s, d: H("%s:%s" % (s, d))
897 return H, KD
898
899 def get_entity_digest(self, data, chal):
900 # XXX not implemented yet
901 return None
902
903
904class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
905 """An authentication protocol defined by RFC 2069
906
907 Digest authentication improves on basic authentication because it
908 does not transmit passwords in the clear.
909 """
910
911 auth_header = 'Authorization'
912
913 def http_error_401(self, req, fp, code, msg, headers):
914 host = urlparse.urlparse(req.get_full_url())[1]
915 retry = self.http_error_auth_reqed('www-authenticate',
916 host, req, headers)
917 self.reset_retry_count()
918 return retry
919
920
921class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
922
923 auth_header = 'Proxy-Authorization'
924
925 def http_error_407(self, req, fp, code, msg, headers):
926 host = req.get_host()
927 retry = self.http_error_auth_reqed('proxy-authenticate',
928 host, req, headers)
929 self.reset_retry_count()
930 return retry
931
932class AbstractHTTPHandler(BaseHandler):
933
934 def __init__(self, debuglevel=0):
935 self._debuglevel = debuglevel
936
937 def set_http_debuglevel(self, level):
938 self._debuglevel = level
939
940 def do_request_(self, request):
941 host = request.get_host()
942 if not host:
943 raise URLError('no host given')
944
945 if request.has_data(): # POST
946 data = request.get_data()
947 if not request.has_header('Content-type'):
948 request.add_unredirected_header(
949 'Content-type',
950 'application/x-www-form-urlencoded')
951 if not request.has_header('Content-length'):
952 request.add_unredirected_header(
953 'Content-length', '%d' % len(data))
954
955 scheme, sel = splittype(request.get_selector())
956 sel_host, sel_path = splithost(sel)
957 if not request.has_header('Host'):
958 request.add_unredirected_header('Host', sel_host or host)
959 for name, value in self.parent.addheaders:
960 name = name.capitalize()
961 if not request.has_header(name):
962 request.add_unredirected_header(name, value)
963
964 return request
965
966 def do_open(self, http_class, req):
967 """Return an addinfourl object for the request, using http_class.
968
969 http_class must implement the HTTPConnection API from httplib.
970 The addinfourl return value is a file-like object. It also
971 has methods and attributes including:
972 - info(): return a mimetools.Message object for the headers
973 - geturl(): return the original request URL
974 - code: HTTP status code
975 """
976 host = req.get_host()
977 if not host:
978 raise URLError('no host given')
979
980 h = http_class(host) # will parse host:port
981 h.set_debuglevel(self._debuglevel)
982
983 headers = dict(req.headers)
984 headers.update(req.unredirected_hdrs)
985 # We want to make an HTTP/1.1 request, but the addinfourl
986 # class isn't prepared to deal with a persistent connection.
987 # It will try to read all remaining data from the socket,
988 # which will block while the server waits for the next request.
989 # So make sure the connection gets closed after the (only)
990 # request.
991 headers["Connection"] = "close"
992 try:
993 h.request(req.get_method(), req.get_selector(), req.data, headers)
994 r = h.getresponse()
995 except socket.error, err: # XXX what error?
996 raise URLError(err)
997
998 # Pick apart the HTTPResponse object to get the addinfourl
999 # object initialized properly.
1000
1001 # Wrap the HTTPResponse object in socket's file object adapter
1002 # for Windows. That adapter calls recv(), so delegate recv()
1003 # to read(). This weird wrapping allows the returned object to
1004 # have readline() and readlines() methods.
1005
1006 # XXX It might be better to extract the read buffering code
1007 # out of socket._fileobject() and into a base class.
1008
1009 r.recv = r.read
1010 fp = socket._fileobject(r)
1011
1012 resp = addinfourl(fp, r.msg, req.get_full_url())
1013 resp.code = r.status
1014 resp.msg = r.reason
1015 return resp
1016
1017
1018class HTTPHandler(AbstractHTTPHandler):
1019
1020 def http_open(self, req):
1021 return self.do_open(httplib.HTTPConnection, req)
1022
1023 http_request = AbstractHTTPHandler.do_request_
1024
1025if hasattr(httplib, 'HTTPS'):
1026 class HTTPSHandler(AbstractHTTPHandler):
1027
1028 def https_open(self, req):
1029 return self.do_open(httplib.HTTPSConnection, req)
1030
1031 https_request = AbstractHTTPHandler.do_request_
1032
1033class HTTPCookieProcessor(BaseHandler):
1034 def __init__(self, cookiejar=None):
1035 if cookiejar is None:
1036 cookiejar = cookielib.CookieJar()
1037 self.cookiejar = cookiejar
1038
1039 def http_request(self, request):
1040 self.cookiejar.add_cookie_header(request)
1041 return request
1042
1043 def http_response(self, request, response):
1044 self.cookiejar.extract_cookies(response, request)
1045 return response
1046
1047 https_request = http_request
1048 https_response = http_response
1049
1050class UnknownHandler(BaseHandler):
1051 def unknown_open(self, req):
1052 type = req.get_type()
1053 raise URLError('unknown url type: %s' % type)
1054
1055def parse_keqv_list(l):
1056 """Parse list of key=value strings where keys are not duplicated."""
1057 parsed = {}
1058 for elt in l:
1059 k, v = elt.split('=', 1)
1060 if v[0] == '"' and v[-1] == '"':
1061 v = v[1:-1]
1062 parsed[k] = v
1063 return parsed
1064
1065def parse_http_list(s):
1066 """Parse lists as described by RFC 2068 Section 2.
1067
1068 In particular, parse comma-separated lists where the elements of
1069 the list may include quoted-strings. A quoted-string could
1070 contain a comma. A non-quoted string could have quotes in the
1071 middle. Neither commas nor quotes count if they are escaped.
1072 Only double-quotes count, not single-quotes.
1073 """
1074 res = []
1075 part = ''
1076
1077 escape = quote = False
1078 for cur in s:
1079 if escape:
1080 part += cur
1081 escape = False
1082 continue
1083 if quote:
1084 if cur == '\\':
1085 escape = True
1086 continue
1087 elif cur == '"':
1088 quote = False
1089 part += cur
1090 continue
1091
1092 if cur == ',':
1093 res.append(part)
1094 part = ''
1095 continue
1096
1097 if cur == '"':
1098 quote = True
1099
1100 part += cur
1101
1102 # append last part
1103 if part:
1104 res.append(part)
1105
1106 return [part.strip() for part in res]
1107
1108class FileHandler(BaseHandler):
1109 # Use local file or FTP depending on form of URL
1110 def file_open(self, req):
1111 url = req.get_selector()
1112 if url[:2] == '//' and url[2:3] != '/':
1113 req.type = 'ftp'
1114 return self.parent.open(req)
1115 else:
1116 return self.open_local_file(req)
1117
1118 # names for the localhost
1119 names = None
1120 def get_names(self):
1121 if FileHandler.names is None:
1122 FileHandler.names = (socket.gethostbyname('localhost'),
1123 socket.gethostbyname(socket.gethostname()))
1124 return FileHandler.names
1125
1126 # not entirely sure what the rules are here
1127 def open_local_file(self, req):
1128 import email.Utils
1129 host = req.get_host()
1130 file = req.get_selector()
1131 localfile = url2pathname(file)
1132 stats = os.stat(localfile)
1133 size = stats.st_size
1134 modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
1135 mtype = mimetypes.guess_type(file)[0]
1136 headers = mimetools.Message(StringIO(
1137 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1138 (mtype or 'text/plain', size, modified)))
1139 if host:
1140 host, port = splitport(host)
1141 if not host or \
1142 (not port and socket.gethostbyname(host) in self.get_names()):
1143 return addinfourl(open(localfile, 'rb'),
1144 headers, 'file:'+file)
1145 raise URLError('file not on local host')
1146
1147class FTPHandler(BaseHandler):
1148 def ftp_open(self, req):
1149 host = req.get_host()
1150 if not host:
1151 raise IOError, ('ftp error', 'no host given')
1152 host, port = splitport(host)
1153 if port is None:
1154 port = ftplib.FTP_PORT
1155 else:
1156 port = int(port)
1157
1158 # username/password handling
1159 user, host = splituser(host)
1160 if user:
1161 user, passwd = splitpasswd(user)
1162 else:
1163 passwd = None
1164 host = unquote(host)
1165 user = unquote(user or '')
1166 passwd = unquote(passwd or '')
1167
1168 try:
1169 host = socket.gethostbyname(host)
1170 except socket.error, msg:
1171 raise URLError(msg)
1172 path, attrs = splitattr(req.get_selector())
1173 dirs = path.split('/')
1174 dirs = map(unquote, dirs)
1175 dirs, file = dirs[:-1], dirs[-1]
1176 if dirs and not dirs[0]:
1177 dirs = dirs[1:]
1178 try:
1179 fw = self.connect_ftp(user, passwd, host, port, dirs)
1180 type = file and 'I' or 'D'
1181 for attr in attrs:
1182 attr, value = splitvalue(attr)
1183 if attr.lower() == 'type' and \
1184 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1185 type = value.upper()
1186 fp, retrlen = fw.retrfile(file, type)
1187 headers = ""
1188 mtype = mimetypes.guess_type(req.get_full_url())[0]
1189 if mtype:
1190 headers += "Content-type: %s\n" % mtype
1191 if retrlen is not None and retrlen >= 0:
1192 headers += "Content-length: %d\n" % retrlen
1193 sf = StringIO(headers)
1194 headers = mimetools.Message(sf)
1195 return addinfourl(fp, headers, req.get_full_url())
1196 except ftplib.all_errors, msg:
1197 raise IOError, ('ftp error', msg), sys.exc_info()[2]
1198
1199 def connect_ftp(self, user, passwd, host, port, dirs):
1200 fw = ftpwrapper(user, passwd, host, port, dirs)
1201## fw.ftp.set_debuglevel(1)
1202 return fw
1203
1204class CacheFTPHandler(FTPHandler):
1205 # XXX would be nice to have pluggable cache strategies
1206 # XXX this stuff is definitely not thread safe
1207 def __init__(self):
1208 self.cache = {}
1209 self.timeout = {}
1210 self.soonest = 0
1211 self.delay = 60
1212 self.max_conns = 16
1213
1214 def setTimeout(self, t):
1215 self.delay = t
1216
1217 def setMaxConns(self, m):
1218 self.max_conns = m
1219
1220 def connect_ftp(self, user, passwd, host, port, dirs):
1221 key = user, host, port, '/'.join(dirs)
1222 if key in self.cache:
1223 self.timeout[key] = time.time() + self.delay
1224 else:
1225 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs)
1226 self.timeout[key] = time.time() + self.delay
1227 self.check_cache()
1228 return self.cache[key]
1229
1230 def check_cache(self):
1231 # first check for old ones
1232 t = time.time()
1233 if self.soonest <= t:
1234 for k, v in self.timeout.items():
1235 if v < t:
1236 self.cache[k].close()
1237 del self.cache[k]
1238 del self.timeout[k]
1239 self.soonest = min(self.timeout.values())
1240
1241 # then check the size
1242 if len(self.cache) == self.max_conns:
1243 for k, v in self.timeout.items():
1244 if v == self.soonest:
1245 del self.cache[k]
1246 del self.timeout[k]
1247 break
1248 self.soonest = min(self.timeout.values())
1249
1250class GopherHandler(BaseHandler):
1251 def gopher_open(self, req):
1252 host = req.get_host()
1253 if not host:
1254 raise GopherError('no host given')
1255 host = unquote(host)
1256 selector = req.get_selector()
1257 type, selector = splitgophertype(selector)
1258 selector, query = splitquery(selector)
1259 selector = unquote(selector)
1260 if query:
1261 query = unquote(query)
1262 fp = gopherlib.send_query(selector, query, host)
1263 else:
1264 fp = gopherlib.send_selector(selector, host)
1265 return addinfourl(fp, noheaders(), req.get_full_url())
1266
1267#bleck! don't use this yet
1268class OpenerFactory:
1269
1270 default_handlers = [UnknownHandler, HTTPHandler,
1271 HTTPDefaultErrorHandler, HTTPRedirectHandler,
1272 FTPHandler, FileHandler]
1273 handlers = []
1274 replacement_handlers = []
1275
1276 def add_handler(self, h):
1277 self.handlers = self.handlers + [h]
1278
1279 def replace_handler(self, h):
1280 pass
1281
1282 def build_opener(self):
1283 opener = OpenerDirector()
1284 for ph in self.default_handlers:
1285 if inspect.isclass(ph):
1286 ph = ph()
1287 opener.add_handler(ph)