Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / src / nas,5.n2.os.2 / lib / python / lib / python2.4 / cookielib.py
CommitLineData
86530b38
AT
1"""HTTP cookie handling for web clients.
2
3This module has (now fairly distant) origins in Gisle Aas' Perl module
4HTTP::Cookies, from the libwww-perl library.
5
6Docstrings, comments and debug strings in this code refer to the
7attributes of the HTTP cookie system as cookie-attributes, to distinguish
8them clearly from Python attributes.
9
10Class diagram (note that the classes which do not derive from
11FileCookieJar are not distributed with the Python standard library, but
12are available from http://wwwsearch.sf.net/):
13
14 CookieJar____
15 / \ \
16 FileCookieJar \ \
17 / | \ \ \
18 MozillaCookieJar | LWPCookieJar \ \
19 | | \
20 | ---MSIEBase | \
21 | / | | \
22 | / MSIEDBCookieJar BSDDBCookieJar
23 |/
24 MSIECookieJar
25
26"""
27
28import sys, re, urlparse, copy, time, urllib, logging
29from types import StringTypes
30try:
31 import threading as _threading
32except ImportError:
33 import dummy_threading as _threading
34import httplib # only for the default HTTP port
35from calendar import timegm
36
37debug = logging.getLogger("cookielib").debug
38
39DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
40MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
41 "instance initialised with one)")
42
43def reraise_unmasked_exceptions(unmasked=()):
44 # There are a few catch-all except: statements in this module, for
45 # catching input that's bad in unexpected ways.
46 # This function re-raises some exceptions we don't want to trap.
47 unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError)
48 etype = sys.exc_info()[0]
49 if issubclass(etype, unmasked):
50 raise
51 # swallowed an exception
52 import warnings, traceback, StringIO
53 f = StringIO.StringIO()
54 traceback.print_exc(None, f)
55 msg = f.getvalue()
56 warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2)
57
58
59# Date/time conversion
60# -----------------------------------------------------------------------------
61
62EPOCH_YEAR = 1970
63def _timegm(tt):
64 year, month, mday, hour, min, sec = tt[:6]
65 if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
66 (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
67 return timegm(tt)
68 else:
69 return None
70
71DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
72MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
73 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
74MONTHS_LOWER = []
75for month in MONTHS: MONTHS_LOWER.append(month.lower())
76
77def time2isoz(t=None):
78 """Return a string representing time in seconds since epoch, t.
79
80 If the function is called without an argument, it will use the current
81 time.
82
83 The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
84 representing Universal Time (UTC, aka GMT). An example of this format is:
85
86 1994-11-24 08:49:37Z
87
88 """
89 if t is None: t = time.time()
90 year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
91 return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
92 year, mon, mday, hour, min, sec)
93
94def time2netscape(t=None):
95 """Return a string representing time in seconds since epoch, t.
96
97 If the function is called without an argument, it will use the current
98 time.
99
100 The format of the returned string is like this:
101
102 Wed, DD-Mon-YYYY HH:MM:SS GMT
103
104 """
105 if t is None: t = time.time()
106 year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
107 return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
108 DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec)
109
110
111UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
112
113TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
114def offset_from_tz_string(tz):
115 offset = None
116 if tz in UTC_ZONES:
117 offset = 0
118 else:
119 m = TIMEZONE_RE.search(tz)
120 if m:
121 offset = 3600 * int(m.group(2))
122 if m.group(3):
123 offset = offset + 60 * int(m.group(3))
124 if m.group(1) == '-':
125 offset = -offset
126 return offset
127
128def _str2time(day, mon, yr, hr, min, sec, tz):
129 # translate month name to number
130 # month numbers start with 1 (January)
131 try:
132 mon = MONTHS_LOWER.index(mon.lower())+1
133 except ValueError:
134 # maybe it's already a number
135 try:
136 imon = int(mon)
137 except ValueError:
138 return None
139 if 1 <= imon <= 12:
140 mon = imon
141 else:
142 return None
143
144 # make sure clock elements are defined
145 if hr is None: hr = 0
146 if min is None: min = 0
147 if sec is None: sec = 0
148
149 yr = int(yr)
150 day = int(day)
151 hr = int(hr)
152 min = int(min)
153 sec = int(sec)
154
155 if yr < 1000:
156 # find "obvious" year
157 cur_yr = time.localtime(time.time())[0]
158 m = cur_yr % 100
159 tmp = yr
160 yr = yr + cur_yr - m
161 m = m - tmp
162 if abs(m) > 50:
163 if m > 0: yr = yr + 100
164 else: yr = yr - 100
165
166 # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
167 t = _timegm((yr, mon, day, hr, min, sec, tz))
168
169 if t is not None:
170 # adjust time using timezone string, to get absolute time since epoch
171 if tz is None:
172 tz = "UTC"
173 tz = tz.upper()
174 offset = offset_from_tz_string(tz)
175 if offset is None:
176 return None
177 t = t - offset
178
179 return t
180
181STRICT_DATE_RE = re.compile(
182 r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
183 "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
184WEEKDAY_RE = re.compile(
185 r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
186LOOSE_HTTP_DATE_RE = re.compile(
187 r"""^
188 (\d\d?) # day
189 (?:\s+|[-\/])
190 (\w+) # month
191 (?:\s+|[-\/])
192 (\d+) # year
193 (?:
194 (?:\s+|:) # separator before clock
195 (\d\d?):(\d\d) # hour:min
196 (?::(\d\d))? # optional seconds
197 )? # optional clock
198 \s*
199 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
200 \s*
201 (?:\(\w+\))? # ASCII representation of timezone in parens.
202 \s*$""", re.X)
203def http2time(text):
204 """Returns time in seconds since epoch of time represented by a string.
205
206 Return value is an integer.
207
208 None is returned if the format of str is unrecognized, the time is outside
209 the representable range, or the timezone string is not recognized. If the
210 string contains no timezone, UTC is assumed.
211
212 The timezone in the string may be numerical (like "-0800" or "+0100") or a
213 string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
214 timezone strings equivalent to UTC (zero offset) are known to the function.
215
216 The function loosely parses the following formats:
217
218 Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
219 Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
220 Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
221 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
222 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
223 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
224
225 The parser ignores leading and trailing whitespace. The time may be
226 absent.
227
228 If the year is given with only 2 digits, the function will select the
229 century that makes the year closest to the current date.
230
231 """
232 # fast exit for strictly conforming string
233 m = STRICT_DATE_RE.search(text)
234 if m:
235 g = m.groups()
236 mon = MONTHS_LOWER.index(g[1].lower()) + 1
237 tt = (int(g[2]), mon, int(g[0]),
238 int(g[3]), int(g[4]), float(g[5]))
239 return _timegm(tt)
240
241 # No, we need some messy parsing...
242
243 # clean up
244 text = text.lstrip()
245 text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
246
247 # tz is time zone specifier string
248 day, mon, yr, hr, min, sec, tz = [None]*7
249
250 # loose regexp parse
251 m = LOOSE_HTTP_DATE_RE.search(text)
252 if m is not None:
253 day, mon, yr, hr, min, sec, tz = m.groups()
254 else:
255 return None # bad format
256
257 return _str2time(day, mon, yr, hr, min, sec, tz)
258
259ISO_DATE_RE = re.compile(
260 """^
261 (\d{4}) # year
262 [-\/]?
263 (\d\d?) # numerical month
264 [-\/]?
265 (\d\d?) # day
266 (?:
267 (?:\s+|[-:Tt]) # separator before clock
268 (\d\d?):?(\d\d) # hour:min
269 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
270 )? # optional clock
271 \s*
272 ([-+]?\d\d?:?(:?\d\d)?
273 |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
274 \s*$""", re.X)
275def iso2time(text):
276 """
277 As for http2time, but parses the ISO 8601 formats:
278
279 1994-02-03 14:15:29 -0100 -- ISO 8601 format
280 1994-02-03 14:15:29 -- zone is optional
281 1994-02-03 -- only date
282 1994-02-03T14:15:29 -- Use T as separator
283 19940203T141529Z -- ISO 8601 compact format
284 19940203 -- only date
285
286 """
287 # clean up
288 text = text.lstrip()
289
290 # tz is time zone specifier string
291 day, mon, yr, hr, min, sec, tz = [None]*7
292
293 # loose regexp parse
294 m = ISO_DATE_RE.search(text)
295 if m is not None:
296 # XXX there's an extra bit of the timezone I'm ignoring here: is
297 # this the right thing to do?
298 yr, mon, day, hr, min, sec, tz, _ = m.groups()
299 else:
300 return None # bad format
301
302 return _str2time(day, mon, yr, hr, min, sec, tz)
303
304
305# Header parsing
306# -----------------------------------------------------------------------------
307
308def unmatched(match):
309 """Return unmatched part of re.Match object."""
310 start, end = match.span(0)
311 return match.string[:start]+match.string[end:]
312
313HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
314HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
315HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")
316HEADER_ESCAPE_RE = re.compile(r"\\(.)")
317def split_header_words(header_values):
318 r"""Parse header values into a list of lists containing key,value pairs.
319
320 The function knows how to deal with ",", ";" and "=" as well as quoted
321 values after "=". A list of space separated tokens are parsed as if they
322 were separated by ";".
323
324 If the header_values passed as argument contains multiple values, then they
325 are treated as if they were a single value separated by comma ",".
326
327 This means that this function is useful for parsing header fields that
328 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
329 the requirement for tokens).
330
331 headers = #header
332 header = (token | parameter) *( [";"] (token | parameter))
333
334 token = 1*<any CHAR except CTLs or separators>
335 separators = "(" | ")" | "<" | ">" | "@"
336 | "," | ";" | ":" | "\" | <">
337 | "/" | "[" | "]" | "?" | "="
338 | "{" | "}" | SP | HT
339
340 quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
341 qdtext = <any TEXT except <">>
342 quoted-pair = "\" CHAR
343
344 parameter = attribute "=" value
345 attribute = token
346 value = token | quoted-string
347
348 Each header is represented by a list of key/value pairs. The value for a
349 simple token (not part of a parameter) is None. Syntactically incorrect
350 headers will not necessarily be parsed as you would want.
351
352 This is easier to describe with some examples:
353
354 >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
355 [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
356 >>> split_header_words(['text/html; charset="iso-8859-1"'])
357 [[('text/html', None), ('charset', 'iso-8859-1')]]
358 >>> split_header_words([r'Basic realm="\"foo\bar\""'])
359 [[('Basic', None), ('realm', '"foobar"')]]
360
361 """
362 assert type(header_values) not in StringTypes
363 result = []
364 for text in header_values:
365 orig_text = text
366 pairs = []
367 while text:
368 m = HEADER_TOKEN_RE.search(text)
369 if m:
370 text = unmatched(m)
371 name = m.group(1)
372 m = HEADER_QUOTED_VALUE_RE.search(text)
373 if m: # quoted value
374 text = unmatched(m)
375 value = m.group(1)
376 value = HEADER_ESCAPE_RE.sub(r"\1", value)
377 else:
378 m = HEADER_VALUE_RE.search(text)
379 if m: # unquoted value
380 text = unmatched(m)
381 value = m.group(1)
382 value = value.rstrip()
383 else:
384 # no value, a lone token
385 value = None
386 pairs.append((name, value))
387 elif text.lstrip().startswith(","):
388 # concatenated headers, as per RFC 2616 section 4.2
389 text = text.lstrip()[1:]
390 if pairs: result.append(pairs)
391 pairs = []
392 else:
393 # skip junk
394 non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
395 assert nr_junk_chars > 0, (
396 "split_header_words bug: '%s', '%s', %s" %
397 (orig_text, text, pairs))
398 text = non_junk
399 if pairs: result.append(pairs)
400 return result
401
402HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
403def join_header_words(lists):
404 """Do the inverse (almost) of the conversion done by split_header_words.
405
406 Takes a list of lists of (key, value) pairs and produces a single header
407 value. Attribute values are quoted if needed.
408
409 >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
410 'text/plain; charset="iso-8859/1"'
411 >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
412 'text/plain, charset="iso-8859/1"'
413
414 """
415 headers = []
416 for pairs in lists:
417 attr = []
418 for k, v in pairs:
419 if v is not None:
420 if not re.search(r"^\w+$", v):
421 v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
422 v = '"%s"' % v
423 k = "%s=%s" % (k, v)
424 attr.append(k)
425 if attr: headers.append("; ".join(attr))
426 return ", ".join(headers)
427
428def parse_ns_headers(ns_headers):
429 """Ad-hoc parser for Netscape protocol cookie-attributes.
430
431 The old Netscape cookie format for Set-Cookie can for instance contain
432 an unquoted "," in the expires field, so we have to use this ad-hoc
433 parser instead of split_header_words.
434
435 XXX This may not make the best possible effort to parse all the crap
436 that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
437 parser is probably better, so could do worse than following that if
438 this ever gives any trouble.
439
440 Currently, this is also used for parsing RFC 2109 cookies.
441
442 """
443 known_attrs = ("expires", "domain", "path", "secure",
444 # RFC 2109 attrs (may turn up in Netscape cookies, too)
445 "port", "max-age")
446
447 result = []
448 for ns_header in ns_headers:
449 pairs = []
450 version_set = False
451 for ii, param in enumerate(re.split(r";\s*", ns_header)):
452 param = param.rstrip()
453 if param == "": continue
454 if "=" not in param:
455 k, v = param, None
456 else:
457 k, v = re.split(r"\s*=\s*", param, 1)
458 k = k.lstrip()
459 if ii != 0:
460 lc = k.lower()
461 if lc in known_attrs:
462 k = lc
463 if k == "version":
464 # This is an RFC 2109 cookie. Will be treated as RFC 2965
465 # cookie in rest of code.
466 # Probably it should be parsed with split_header_words, but
467 # that's too much hassle.
468 version_set = True
469 if k == "expires":
470 # convert expires date to seconds since epoch
471 if v.startswith('"'): v = v[1:]
472 if v.endswith('"'): v = v[:-1]
473 v = http2time(v) # None if invalid
474 pairs.append((k, v))
475
476 if pairs:
477 if not version_set:
478 pairs.append(("version", "0"))
479 result.append(pairs)
480
481 return result
482
483
484IPV4_RE = re.compile(r"\.\d+$")
485def is_HDN(text):
486 """Return True if text is a host domain name."""
487 # XXX
488 # This may well be wrong. Which RFC is HDN defined in, if any (for
489 # the purposes of RFC 2965)?
490 # For the current implementation, what about IPv6? Remember to look
491 # at other uses of IPV4_RE also, if change this.
492 if IPV4_RE.search(text):
493 return False
494 if text == "":
495 return False
496 if text[0] == "." or text[-1] == ".":
497 return False
498 return True
499
500def domain_match(A, B):
501 """Return True if domain A domain-matches domain B, according to RFC 2965.
502
503 A and B may be host domain names or IP addresses.
504
505 RFC 2965, section 1:
506
507 Host names can be specified either as an IP address or a HDN string.
508 Sometimes we compare one host name with another. (Such comparisons SHALL
509 be case-insensitive.) Host A's name domain-matches host B's if
510
511 * their host name strings string-compare equal; or
512
513 * A is a HDN string and has the form NB, where N is a non-empty
514 name string, B has the form .B', and B' is a HDN string. (So,
515 x.y.com domain-matches .Y.com but not Y.com.)
516
517 Note that domain-match is not a commutative operation: a.b.c.com
518 domain-matches .c.com, but not the reverse.
519
520 """
521 # Note that, if A or B are IP addresses, the only relevant part of the
522 # definition of the domain-match algorithm is the direct string-compare.
523 A = A.lower()
524 B = B.lower()
525 if A == B:
526 return True
527 if not is_HDN(A):
528 return False
529 i = A.rfind(B)
530 if i == -1 or i == 0:
531 # A does not have form NB, or N is the empty string
532 return False
533 if not B.startswith("."):
534 return False
535 if not is_HDN(B[1:]):
536 return False
537 return True
538
539def liberal_is_HDN(text):
540 """Return True if text is a sort-of-like a host domain name.
541
542 For accepting/blocking domains.
543
544 """
545 if IPV4_RE.search(text):
546 return False
547 return True
548
549def user_domain_match(A, B):
550 """For blocking/accepting domains.
551
552 A and B may be host domain names or IP addresses.
553
554 """
555 A = A.lower()
556 B = B.lower()
557 if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
558 if A == B:
559 # equal IP addresses
560 return True
561 return False
562 initial_dot = B.startswith(".")
563 if initial_dot and A.endswith(B):
564 return True
565 if not initial_dot and A == B:
566 return True
567 return False
568
569cut_port_re = re.compile(r":\d+$")
570def request_host(request):
571 """Return request-host, as defined by RFC 2965.
572
573 Variation from RFC: returned value is lowercased, for convenient
574 comparison.
575
576 """
577 url = request.get_full_url()
578 host = urlparse.urlparse(url)[1]
579 if host == "":
580 host = request.get_header("Host", "")
581
582 # remove port, if present
583 host = cut_port_re.sub("", host, 1)
584 return host.lower()
585
586def eff_request_host(request):
587 """Return a tuple (request-host, effective request-host name).
588
589 As defined by RFC 2965, except both are lowercased.
590
591 """
592 erhn = req_host = request_host(request)
593 if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
594 erhn = req_host + ".local"
595 return req_host, erhn
596
597def request_path(request):
598 """request-URI, as defined by RFC 2965."""
599 url = request.get_full_url()
600 #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
601 #req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
602 path, parameters, query, frag = urlparse.urlparse(url)[2:]
603 if parameters:
604 path = "%s;%s" % (path, parameters)
605 path = escape_path(path)
606 req_path = urlparse.urlunparse(("", "", path, "", query, frag))
607 if not req_path.startswith("/"):
608 # fix bad RFC 2396 absoluteURI
609 req_path = "/"+req_path
610 return req_path
611
612def request_port(request):
613 host = request.get_host()
614 i = host.find(':')
615 if i >= 0:
616 port = host[i+1:]
617 try:
618 int(port)
619 except ValueError:
620 debug("nonnumeric port: '%s'", port)
621 return None
622 else:
623 port = DEFAULT_HTTP_PORT
624 return port
625
626# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
627# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
628HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
629ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
630def uppercase_escaped_char(match):
631 return "%%%s" % match.group(1).upper()
632def escape_path(path):
633 """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
634 # There's no knowing what character encoding was used to create URLs
635 # containing %-escapes, but since we have to pick one to escape invalid
636 # path characters, we pick UTF-8, as recommended in the HTML 4.0
637 # specification:
638 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
639 # And here, kind of: draft-fielding-uri-rfc2396bis-03
640 # (And in draft IRI specification: draft-duerst-iri-05)
641 # (And here, for new URI schemes: RFC 2718)
642 if isinstance(path, unicode):
643 path = path.encode("utf-8")
644 path = urllib.quote(path, HTTP_PATH_SAFE)
645 path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
646 return path
647
648def reach(h):
649 """Return reach of host h, as defined by RFC 2965, section 1.
650
651 The reach R of a host name H is defined as follows:
652
653 * If
654
655 - H is the host domain name of a host; and,
656
657 - H has the form A.B; and
658
659 - A has no embedded (that is, interior) dots; and
660
661 - B has at least one embedded dot, or B is the string "local".
662 then the reach of H is .B.
663
664 * Otherwise, the reach of H is H.
665
666 >>> reach("www.acme.com")
667 '.acme.com'
668 >>> reach("acme.com")
669 'acme.com'
670 >>> reach("acme.local")
671 '.local'
672
673 """
674 i = h.find(".")
675 if i >= 0:
676 #a = h[:i] # this line is only here to show what a is
677 b = h[i+1:]
678 i = b.find(".")
679 if is_HDN(h) and (i >= 0 or b == "local"):
680 return "."+b
681 return h
682
683def is_third_party(request):
684 """
685
686 RFC 2965, section 3.3.6:
687
688 An unverifiable transaction is to a third-party host if its request-
689 host U does not domain-match the reach R of the request-host O in the
690 origin transaction.
691
692 """
693 req_host = request_host(request)
694 if not domain_match(req_host, reach(request.get_origin_req_host())):
695 return True
696 else:
697 return False
698
699
700class Cookie:
701 """HTTP Cookie.
702
703 This class represents both Netscape and RFC 2965 cookies.
704
705 This is deliberately a very simple class. It just holds attributes. It's
706 possible to construct Cookie instances that don't comply with the cookie
707 standards. CookieJar.make_cookies is the factory function for Cookie
708 objects -- it deals with cookie parsing, supplying defaults, and
709 normalising to the representation used in this class. CookiePolicy is
710 responsible for checking them to see whether they should be accepted from
711 and returned to the server.
712
713 Note that the port may be present in the headers, but unspecified ("Port"
714 rather than"Port=80", for example); if this is the case, port is None.
715
716 """
717
718 def __init__(self, version, name, value,
719 port, port_specified,
720 domain, domain_specified, domain_initial_dot,
721 path, path_specified,
722 secure,
723 expires,
724 discard,
725 comment,
726 comment_url,
727 rest):
728
729 if version is not None: version = int(version)
730 if expires is not None: expires = int(expires)
731 if port is None and port_specified is True:
732 raise ValueError("if port is None, port_specified must be false")
733
734 self.version = version
735 self.name = name
736 self.value = value
737 self.port = port
738 self.port_specified = port_specified
739 # normalise case, as per RFC 2965 section 3.3.3
740 self.domain = domain.lower()
741 self.domain_specified = domain_specified
742 # Sigh. We need to know whether the domain given in the
743 # cookie-attribute had an initial dot, in order to follow RFC 2965
744 # (as clarified in draft errata). Needed for the returned $Domain
745 # value.
746 self.domain_initial_dot = domain_initial_dot
747 self.path = path
748 self.path_specified = path_specified
749 self.secure = secure
750 self.expires = expires
751 self.discard = discard
752 self.comment = comment
753 self.comment_url = comment_url
754
755 self._rest = copy.copy(rest)
756
757 def has_nonstandard_attr(self, name):
758 return name in self._rest
759 def get_nonstandard_attr(self, name, default=None):
760 return self._rest.get(name, default)
761 def set_nonstandard_attr(self, name, value):
762 self._rest[name] = value
763
764 def is_expired(self, now=None):
765 if now is None: now = time.time()
766 if (self.expires is not None) and (self.expires <= now):
767 return True
768 return False
769
770 def __str__(self):
771 if self.port is None: p = ""
772 else: p = ":"+self.port
773 limit = self.domain + p + self.path
774 if self.value is not None:
775 namevalue = "%s=%s" % (self.name, self.value)
776 else:
777 namevalue = self.name
778 return "<Cookie %s for %s>" % (namevalue, limit)
779
780 def __repr__(self):
781 args = []
782 for name in ["version", "name", "value",
783 "port", "port_specified",
784 "domain", "domain_specified", "domain_initial_dot",
785 "path", "path_specified",
786 "secure", "expires", "discard", "comment", "comment_url",
787 ]:
788 attr = getattr(self, name)
789 args.append("%s=%s" % (name, repr(attr)))
790 args.append("rest=%s" % repr(self._rest))
791 return "Cookie(%s)" % ", ".join(args)
792
793
794class CookiePolicy:
795 """Defines which cookies get accepted from and returned to server.
796
797 May also modify cookies, though this is probably a bad idea.
798
799 The subclass DefaultCookiePolicy defines the standard rules for Netscape
800 and RFC 2965 cookies -- override that if you want a customised policy.
801
802 """
803 def set_ok(self, cookie, request):
804 """Return true if (and only if) cookie should be accepted from server.
805
806 Currently, pre-expired cookies never get this far -- the CookieJar
807 class deletes such cookies itself.
808
809 """
810 raise NotImplementedError()
811
812 def return_ok(self, cookie, request):
813 """Return true if (and only if) cookie should be returned to server."""
814 raise NotImplementedError()
815
816 def domain_return_ok(self, domain, request):
817 """Return false if cookies should not be returned, given cookie domain.
818 """
819 return True
820
821 def path_return_ok(self, path, request):
822 """Return false if cookies should not be returned, given cookie path.
823 """
824 return True
825
826
827class DefaultCookiePolicy(CookiePolicy):
828 """Implements the standard rules for accepting and returning cookies."""
829
830 DomainStrictNoDots = 1
831 DomainStrictNonDomain = 2
832 DomainRFC2965Match = 4
833
834 DomainLiberal = 0
835 DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
836
837 def __init__(self,
838 blocked_domains=None, allowed_domains=None,
839 netscape=True, rfc2965=False,
840 hide_cookie2=False,
841 strict_domain=False,
842 strict_rfc2965_unverifiable=True,
843 strict_ns_unverifiable=False,
844 strict_ns_domain=DomainLiberal,
845 strict_ns_set_initial_dollar=False,
846 strict_ns_set_path=False,
847 ):
848 """Constructor arguments should be passed as keyword arguments only."""
849 self.netscape = netscape
850 self.rfc2965 = rfc2965
851 self.hide_cookie2 = hide_cookie2
852 self.strict_domain = strict_domain
853 self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
854 self.strict_ns_unverifiable = strict_ns_unverifiable
855 self.strict_ns_domain = strict_ns_domain
856 self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
857 self.strict_ns_set_path = strict_ns_set_path
858
859 if blocked_domains is not None:
860 self._blocked_domains = tuple(blocked_domains)
861 else:
862 self._blocked_domains = ()
863
864 if allowed_domains is not None:
865 allowed_domains = tuple(allowed_domains)
866 self._allowed_domains = allowed_domains
867
868 def blocked_domains(self):
869 """Return the sequence of blocked domains (as a tuple)."""
870 return self._blocked_domains
871 def set_blocked_domains(self, blocked_domains):
872 """Set the sequence of blocked domains."""
873 self._blocked_domains = tuple(blocked_domains)
874
875 def is_blocked(self, domain):
876 for blocked_domain in self._blocked_domains:
877 if user_domain_match(domain, blocked_domain):
878 return True
879 return False
880
881 def allowed_domains(self):
882 """Return None, or the sequence of allowed domains (as a tuple)."""
883 return self._allowed_domains
884 def set_allowed_domains(self, allowed_domains):
885 """Set the sequence of allowed domains, or None."""
886 if allowed_domains is not None:
887 allowed_domains = tuple(allowed_domains)
888 self._allowed_domains = allowed_domains
889
890 def is_not_allowed(self, domain):
891 if self._allowed_domains is None:
892 return False
893 for allowed_domain in self._allowed_domains:
894 if user_domain_match(domain, allowed_domain):
895 return False
896 return True
897
898 def set_ok(self, cookie, request):
899 """
900 If you override .set_ok(), be sure to call this method. If it returns
901 false, so should your subclass (assuming your subclass wants to be more
902 strict about which cookies to accept).
903
904 """
905 debug(" - checking cookie %s=%s", cookie.name, cookie.value)
906
907 assert cookie.name is not None
908
909 for n in "version", "verifiability", "name", "path", "domain", "port":
910 fn_name = "set_ok_"+n
911 fn = getattr(self, fn_name)
912 if not fn(cookie, request):
913 return False
914
915 return True
916
917 def set_ok_version(self, cookie, request):
918 if cookie.version is None:
919 # Version is always set to 0 by parse_ns_headers if it's a Netscape
920 # cookie, so this must be an invalid RFC 2965 cookie.
921 debug(" Set-Cookie2 without version attribute (%s=%s)",
922 cookie.name, cookie.value)
923 return False
924 if cookie.version > 0 and not self.rfc2965:
925 debug(" RFC 2965 cookies are switched off")
926 return False
927 elif cookie.version == 0 and not self.netscape:
928 debug(" Netscape cookies are switched off")
929 return False
930 return True
931
932 def set_ok_verifiability(self, cookie, request):
933 if request.is_unverifiable() and is_third_party(request):
934 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
935 debug(" third-party RFC 2965 cookie during "
936 "unverifiable transaction")
937 return False
938 elif cookie.version == 0 and self.strict_ns_unverifiable:
939 debug(" third-party Netscape cookie during "
940 "unverifiable transaction")
941 return False
942 return True
943
944 def set_ok_name(self, cookie, request):
945 # Try and stop servers setting V0 cookies designed to hack other
946 # servers that know both V0 and V1 protocols.
947 if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
948 cookie.name.startswith("$")):
949 debug(" illegal name (starts with '$'): '%s'", cookie.name)
950 return False
951 return True
952
953 def set_ok_path(self, cookie, request):
954 if cookie.path_specified:
955 req_path = request_path(request)
956 if ((cookie.version > 0 or
957 (cookie.version == 0 and self.strict_ns_set_path)) and
958 not req_path.startswith(cookie.path)):
959 debug(" path attribute %s is not a prefix of request "
960 "path %s", cookie.path, req_path)
961 return False
962 return True
963
964 def set_ok_domain(self, cookie, request):
965 if self.is_blocked(cookie.domain):
966 debug(" domain %s is in user block-list", cookie.domain)
967 return False
968 if self.is_not_allowed(cookie.domain):
969 debug(" domain %s is not in user allow-list", cookie.domain)
970 return False
971 if cookie.domain_specified:
972 req_host, erhn = eff_request_host(request)
973 domain = cookie.domain
974 if self.strict_domain and (domain.count(".") >= 2):
975 i = domain.rfind(".")
976 j = domain.rfind(".", 0, i)
977 if j == 0: # domain like .foo.bar
978 tld = domain[i+1:]
979 sld = domain[j+1:i]
980 if (sld.lower() in [
981 "co", "ac",
982 "com", "edu", "org", "net", "gov", "mil", "int"] and
983 len(tld) == 2):
984 # domain like .co.uk
985 debug(" country-code second level domain %s", domain)
986 return False
987 if domain.startswith("."):
988 undotted_domain = domain[1:]
989 else:
990 undotted_domain = domain
991 embedded_dots = (undotted_domain.find(".") >= 0)
992 if not embedded_dots and domain != ".local":
993 debug(" non-local domain %s contains no embedded dot",
994 domain)
995 return False
996 if cookie.version == 0:
997 if (not erhn.endswith(domain) and
998 (not erhn.startswith(".") and
999 not ("."+erhn).endswith(domain))):
1000 debug(" effective request-host %s (even with added "
1001 "initial dot) does not end end with %s",
1002 erhn, domain)
1003 return False
1004 if (cookie.version > 0 or
1005 (self.strict_ns_domain & self.DomainRFC2965Match)):
1006 if not domain_match(erhn, domain):
1007 debug(" effective request-host %s does not domain-match "
1008 "%s", erhn, domain)
1009 return False
1010 if (cookie.version > 0 or
1011 (self.strict_ns_domain & self.DomainStrictNoDots)):
1012 host_prefix = req_host[:-len(domain)]
1013 if (host_prefix.find(".") >= 0 and
1014 not IPV4_RE.search(req_host)):
1015 debug(" host prefix %s for domain %s contains a dot",
1016 host_prefix, domain)
1017 return False
1018 return True
1019
1020 def set_ok_port(self, cookie, request):
1021 if cookie.port_specified:
1022 req_port = request_port(request)
1023 if req_port is None:
1024 req_port = "80"
1025 else:
1026 req_port = str(req_port)
1027 for p in cookie.port.split(","):
1028 try:
1029 int(p)
1030 except ValueError:
1031 debug(" bad port %s (not numeric)", p)
1032 return False
1033 if p == req_port:
1034 break
1035 else:
1036 debug(" request port (%s) not found in %s",
1037 req_port, cookie.port)
1038 return False
1039 return True
1040
1041 def return_ok(self, cookie, request):
1042 """
1043 If you override .return_ok(), be sure to call this method. If it
1044 returns false, so should your subclass (assuming your subclass wants to
1045 be more strict about which cookies to return).
1046
1047 """
1048 # Path has already been checked by .path_return_ok(), and domain
1049 # blocking done by .domain_return_ok().
1050 debug(" - checking cookie %s=%s", cookie.name, cookie.value)
1051
1052 for n in "version", "verifiability", "secure", "expires", "port", "domain":
1053 fn_name = "return_ok_"+n
1054 fn = getattr(self, fn_name)
1055 if not fn(cookie, request):
1056 return False
1057 return True
1058
1059 def return_ok_version(self, cookie, request):
1060 if cookie.version > 0 and not self.rfc2965:
1061 debug(" RFC 2965 cookies are switched off")
1062 return False
1063 elif cookie.version == 0 and not self.netscape:
1064 debug(" Netscape cookies are switched off")
1065 return False
1066 return True
1067
1068 def return_ok_verifiability(self, cookie, request):
1069 if request.is_unverifiable() and is_third_party(request):
1070 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
1071 debug(" third-party RFC 2965 cookie during unverifiable "
1072 "transaction")
1073 return False
1074 elif cookie.version == 0 and self.strict_ns_unverifiable:
1075 debug(" third-party Netscape cookie during unverifiable "
1076 "transaction")
1077 return False
1078 return True
1079
1080 def return_ok_secure(self, cookie, request):
1081 if cookie.secure and request.get_type() != "https":
1082 debug(" secure cookie with non-secure request")
1083 return False
1084 return True
1085
1086 def return_ok_expires(self, cookie, request):
1087 if cookie.is_expired(self._now):
1088 debug(" cookie expired")
1089 return False
1090 return True
1091
1092 def return_ok_port(self, cookie, request):
1093 if cookie.port:
1094 req_port = request_port(request)
1095 if req_port is None:
1096 req_port = "80"
1097 for p in cookie.port.split(","):
1098 if p == req_port:
1099 break
1100 else:
1101 debug(" request port %s does not match cookie port %s",
1102 req_port, cookie.port)
1103 return False
1104 return True
1105
1106 def return_ok_domain(self, cookie, request):
1107 req_host, erhn = eff_request_host(request)
1108 domain = cookie.domain
1109
1110 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
1111 if (cookie.version == 0 and
1112 (self.strict_ns_domain & self.DomainStrictNonDomain) and
1113 not cookie.domain_specified and domain != erhn):
1114 debug(" cookie with unspecified domain does not string-compare "
1115 "equal to request domain")
1116 return False
1117
1118 if cookie.version > 0 and not domain_match(erhn, domain):
1119 debug(" effective request-host name %s does not domain-match "
1120 "RFC 2965 cookie domain %s", erhn, domain)
1121 return False
1122 if cookie.version == 0 and not ("."+erhn).endswith(domain):
1123 debug(" request-host %s does not match Netscape cookie domain "
1124 "%s", req_host, domain)
1125 return False
1126 return True
1127
1128 def domain_return_ok(self, domain, request):
1129 # Liberal check of. This is here as an optimization to avoid
1130 # having to load lots of MSIE cookie files unless necessary.
1131 req_host, erhn = eff_request_host(request)
1132 if not req_host.startswith("."):
1133 req_host = "."+req_host
1134 if not erhn.startswith("."):
1135 erhn = "."+erhn
1136 if not (req_host.endswith(domain) or erhn.endswith(domain)):
1137 #debug(" request domain %s does not match cookie domain %s",
1138 # req_host, domain)
1139 return False
1140
1141 if self.is_blocked(domain):
1142 debug(" domain %s is in user block-list", domain)
1143 return False
1144 if self.is_not_allowed(domain):
1145 debug(" domain %s is not in user allow-list", domain)
1146 return False
1147
1148 return True
1149
1150 def path_return_ok(self, path, request):
1151 debug("- checking cookie path=%s", path)
1152 req_path = request_path(request)
1153 if not req_path.startswith(path):
1154 debug(" %s does not path-match %s", req_path, path)
1155 return False
1156 return True
1157
1158
1159def vals_sorted_by_key(adict):
1160 keys = adict.keys()
1161 keys.sort()
1162 return map(adict.get, keys)
1163
1164def deepvalues(mapping):
1165 """Iterates over nested mapping, depth-first, in sorted order by key."""
1166 values = vals_sorted_by_key(mapping)
1167 for obj in values:
1168 mapping = False
1169 try:
1170 obj.items
1171 except AttributeError:
1172 pass
1173 else:
1174 mapping = True
1175 for subobj in deepvalues(obj):
1176 yield subobj
1177 if not mapping:
1178 yield obj
1179
1180
1181# Used as second parameter to dict.get() method, to distinguish absent
1182# dict key from one with a None value.
1183class Absent: pass
1184
1185class CookieJar:
1186 """Collection of HTTP cookies.
1187
1188 You may not need to know about this class: try
1189 urllib2.build_opener(HTTPCookieProcessor).open(url).
1190
1191 """
1192
1193 non_word_re = re.compile(r"\W")
1194 quote_re = re.compile(r"([\"\\])")
1195 strict_domain_re = re.compile(r"\.?[^.]*")
1196 domain_re = re.compile(r"[^.]*")
1197 dots_re = re.compile(r"^\.+")
1198
1199 magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
1200
1201 def __init__(self, policy=None):
1202 if policy is None:
1203 policy = DefaultCookiePolicy()
1204 self._policy = policy
1205
1206 self._cookies_lock = _threading.RLock()
1207 self._cookies = {}
1208
1209 def set_policy(self, policy):
1210 self._policy = policy
1211
1212 def _cookies_for_domain(self, domain, request):
1213 cookies = []
1214 if not self._policy.domain_return_ok(domain, request):
1215 return []
1216 debug("Checking %s for cookies to return", domain)
1217 cookies_by_path = self._cookies[domain]
1218 for path in cookies_by_path.keys():
1219 if not self._policy.path_return_ok(path, request):
1220 continue
1221 cookies_by_name = cookies_by_path[path]
1222 for cookie in cookies_by_name.values():
1223 if not self._policy.return_ok(cookie, request):
1224 debug(" not returning cookie")
1225 continue
1226 debug(" it's a match")
1227 cookies.append(cookie)
1228 return cookies
1229
1230 def _cookies_for_request(self, request):
1231 """Return a list of cookies to be returned to server."""
1232 cookies = []
1233 for domain in self._cookies.keys():
1234 cookies.extend(self._cookies_for_domain(domain, request))
1235 return cookies
1236
1237 def _cookie_attrs(self, cookies):
1238 """Return a list of cookie-attributes to be returned to server.
1239
1240 like ['foo="bar"; $Path="/"', ...]
1241
1242 The $Version attribute is also added when appropriate (currently only
1243 once per request).
1244
1245 """
1246 # add cookies in order of most specific (ie. longest) path first
1247 def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
1248 cookies.sort(decreasing_size)
1249
1250 version_set = False
1251
1252 attrs = []
1253 for cookie in cookies:
1254 # set version of Cookie header
1255 # XXX
1256 # What should it be if multiple matching Set-Cookie headers have
1257 # different versions themselves?
1258 # Answer: there is no answer; was supposed to be settled by
1259 # RFC 2965 errata, but that may never appear...
1260 version = cookie.version
1261 if not version_set:
1262 version_set = True
1263 if version > 0:
1264 attrs.append("$Version=%s" % version)
1265
1266 # quote cookie value if necessary
1267 # (not for Netscape protocol, which already has any quotes
1268 # intact, due to the poorly-specified Netscape Cookie: syntax)
1269 if ((cookie.value is not None) and
1270 self.non_word_re.search(cookie.value) and version > 0):
1271 value = self.quote_re.sub(r"\\\1", cookie.value)
1272 else:
1273 value = cookie.value
1274
1275 # add cookie-attributes to be returned in Cookie header
1276 if cookie.value is None:
1277 attrs.append(cookie.name)
1278 else:
1279 attrs.append("%s=%s" % (cookie.name, value))
1280 if version > 0:
1281 if cookie.path_specified:
1282 attrs.append('$Path="%s"' % cookie.path)
1283 if cookie.domain.startswith("."):
1284 domain = cookie.domain
1285 if (not cookie.domain_initial_dot and
1286 domain.startswith(".")):
1287 domain = domain[1:]
1288 attrs.append('$Domain="%s"' % domain)
1289 if cookie.port is not None:
1290 p = "$Port"
1291 if cookie.port_specified:
1292 p = p + ('="%s"' % cookie.port)
1293 attrs.append(p)
1294
1295 return attrs
1296
1297 def add_cookie_header(self, request):
1298 """Add correct Cookie: header to request (urllib2.Request object).
1299
1300 The Cookie2 header is also added unless policy.hide_cookie2 is true.
1301
1302 """
1303 debug("add_cookie_header")
1304 self._cookies_lock.acquire()
1305
1306 self._policy._now = self._now = int(time.time())
1307
1308 req_host, erhn = eff_request_host(request)
1309 strict_non_domain = (
1310 self._policy.strict_ns_domain & self._policy.DomainStrictNonDomain)
1311
1312 cookies = self._cookies_for_request(request)
1313
1314 attrs = self._cookie_attrs(cookies)
1315 if attrs:
1316 if not request.has_header("Cookie"):
1317 request.add_unredirected_header(
1318 "Cookie", "; ".join(attrs))
1319
1320 # if necessary, advertise that we know RFC 2965
1321 if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
1322 not request.has_header("Cookie2")):
1323 for cookie in cookies:
1324 if cookie.version != 1:
1325 request.add_unredirected_header("Cookie2", '$Version="1"')
1326 break
1327
1328 self._cookies_lock.release()
1329
1330 self.clear_expired_cookies()
1331
1332 def _normalized_cookie_tuples(self, attrs_set):
1333 """Return list of tuples containing normalised cookie information.
1334
1335 attrs_set is the list of lists of key,value pairs extracted from
1336 the Set-Cookie or Set-Cookie2 headers.
1337
1338 Tuples are name, value, standard, rest, where name and value are the
1339 cookie name and value, standard is a dictionary containing the standard
1340 cookie-attributes (discard, secure, version, expires or max-age,
1341 domain, path and port) and rest is a dictionary containing the rest of
1342 the cookie-attributes.
1343
1344 """
1345 cookie_tuples = []
1346
1347 boolean_attrs = "discard", "secure"
1348 value_attrs = ("version",
1349 "expires", "max-age",
1350 "domain", "path", "port",
1351 "comment", "commenturl")
1352
1353 for cookie_attrs in attrs_set:
1354 name, value = cookie_attrs[0]
1355
1356 # Build dictionary of standard cookie-attributes (standard) and
1357 # dictionary of other cookie-attributes (rest).
1358
1359 # Note: expiry time is normalised to seconds since epoch. V0
1360 # cookies should have the Expires cookie-attribute, and V1 cookies
1361 # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1362 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1363 # accept either (but prefer Max-Age).
1364 max_age_set = False
1365
1366 bad_cookie = False
1367
1368 standard = {}
1369 rest = {}
1370 for k, v in cookie_attrs[1:]:
1371 lc = k.lower()
1372 # don't lose case distinction for unknown fields
1373 if lc in value_attrs or lc in boolean_attrs:
1374 k = lc
1375 if k in boolean_attrs and v is None:
1376 # boolean cookie-attribute is present, but has no value
1377 # (like "discard", rather than "port=80")
1378 v = True
1379 if k in standard:
1380 # only first value is significant
1381 continue
1382 if k == "domain":
1383 if v is None:
1384 debug(" missing value for domain attribute")
1385 bad_cookie = True
1386 break
1387 # RFC 2965 section 3.3.3
1388 v = v.lower()
1389 if k == "expires":
1390 if max_age_set:
1391 # Prefer max-age to expires (like Mozilla)
1392 continue
1393 if v is None:
1394 debug(" missing or invalid value for expires "
1395 "attribute: treating as session cookie")
1396 continue
1397 if k == "max-age":
1398 max_age_set = True
1399 try:
1400 v = int(v)
1401 except ValueError:
1402 debug(" missing or invalid (non-numeric) value for "
1403 "max-age attribute")
1404 bad_cookie = True
1405 break
1406 # convert RFC 2965 Max-Age to seconds since epoch
1407 # XXX Strictly you're supposed to follow RFC 2616
1408 # age-calculation rules. Remember that zero Max-Age is a
1409 # is a request to discard (old and new) cookie, though.
1410 k = "expires"
1411 v = self._now + v
1412 if (k in value_attrs) or (k in boolean_attrs):
1413 if (v is None and
1414 k not in ["port", "comment", "commenturl"]):
1415 debug(" missing value for %s attribute" % k)
1416 bad_cookie = True
1417 break
1418 standard[k] = v
1419 else:
1420 rest[k] = v
1421
1422 if bad_cookie:
1423 continue
1424
1425 cookie_tuples.append((name, value, standard, rest))
1426
1427 return cookie_tuples
1428
1429 def _cookie_from_cookie_tuple(self, tup, request):
1430 # standard is dict of standard cookie-attributes, rest is dict of the
1431 # rest of them
1432 name, value, standard, rest = tup
1433
1434 domain = standard.get("domain", Absent)
1435 path = standard.get("path", Absent)
1436 port = standard.get("port", Absent)
1437 expires = standard.get("expires", Absent)
1438
1439 # set the easy defaults
1440 version = standard.get("version", None)
1441 if version is not None: version = int(version)
1442 secure = standard.get("secure", False)
1443 # (discard is also set if expires is Absent)
1444 discard = standard.get("discard", False)
1445 comment = standard.get("comment", None)
1446 comment_url = standard.get("commenturl", None)
1447
1448 # set default path
1449 if path is not Absent and path != "":
1450 path_specified = True
1451 path = escape_path(path)
1452 else:
1453 path_specified = False
1454 path = request_path(request)
1455 i = path.rfind("/")
1456 if i != -1:
1457 if version == 0:
1458 # Netscape spec parts company from reality here
1459 path = path[:i]
1460 else:
1461 path = path[:i+1]
1462 if len(path) == 0: path = "/"
1463
1464 # set default domain
1465 domain_specified = domain is not Absent
1466 # but first we have to remember whether it starts with a dot
1467 domain_initial_dot = False
1468 if domain_specified:
1469 domain_initial_dot = bool(domain.startswith("."))
1470 if domain is Absent:
1471 req_host, erhn = eff_request_host(request)
1472 domain = erhn
1473 elif not domain.startswith("."):
1474 domain = "."+domain
1475
1476 # set default port
1477 port_specified = False
1478 if port is not Absent:
1479 if port is None:
1480 # Port attr present, but has no value: default to request port.
1481 # Cookie should then only be sent back on that port.
1482 port = request_port(request)
1483 else:
1484 port_specified = True
1485 port = re.sub(r"\s+", "", port)
1486 else:
1487 # No port attr present. Cookie can be sent back on any port.
1488 port = None
1489
1490 # set default expires and discard
1491 if expires is Absent:
1492 expires = None
1493 discard = True
1494 elif expires <= self._now:
1495 # Expiry date in past is request to delete cookie. This can't be
1496 # in DefaultCookiePolicy, because can't delete cookies there.
1497 try:
1498 self.clear(domain, path, name)
1499 except KeyError:
1500 pass
1501 debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1502 domain, path, name)
1503 return None
1504
1505 return Cookie(version,
1506 name, value,
1507 port, port_specified,
1508 domain, domain_specified, domain_initial_dot,
1509 path, path_specified,
1510 secure,
1511 expires,
1512 discard,
1513 comment,
1514 comment_url,
1515 rest)
1516
1517 def _cookies_from_attrs_set(self, attrs_set, request):
1518 cookie_tuples = self._normalized_cookie_tuples(attrs_set)
1519
1520 cookies = []
1521 for tup in cookie_tuples:
1522 cookie = self._cookie_from_cookie_tuple(tup, request)
1523 if cookie: cookies.append(cookie)
1524 return cookies
1525
1526 def make_cookies(self, response, request):
1527 """Return sequence of Cookie objects extracted from response object."""
1528 # get cookie-attributes for RFC 2965 and Netscape protocols
1529 headers = response.info()
1530 rfc2965_hdrs = headers.getheaders("Set-Cookie2")
1531 ns_hdrs = headers.getheaders("Set-Cookie")
1532
1533 rfc2965 = self._policy.rfc2965
1534 netscape = self._policy.netscape
1535
1536 if ((not rfc2965_hdrs and not ns_hdrs) or
1537 (not ns_hdrs and not rfc2965) or
1538 (not rfc2965_hdrs and not netscape) or
1539 (not netscape and not rfc2965)):
1540 return [] # no relevant cookie headers: quick exit
1541
1542 try:
1543 cookies = self._cookies_from_attrs_set(
1544 split_header_words(rfc2965_hdrs), request)
1545 except:
1546 reraise_unmasked_exceptions()
1547 cookies = []
1548
1549 if ns_hdrs and netscape:
1550 try:
1551 ns_cookies = self._cookies_from_attrs_set(
1552 parse_ns_headers(ns_hdrs), request)
1553 except:
1554 reraise_unmasked_exceptions()
1555 ns_cookies = []
1556
1557 # Look for Netscape cookies (from Set-Cookie headers) that match
1558 # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1559 # For each match, keep the RFC 2965 cookie and ignore the Netscape
1560 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1561 # bundled in with the Netscape cookies for this purpose, which is
1562 # reasonable behaviour.
1563 if rfc2965:
1564 lookup = {}
1565 for cookie in cookies:
1566 lookup[(cookie.domain, cookie.path, cookie.name)] = None
1567
1568 def no_matching_rfc2965(ns_cookie, lookup=lookup):
1569 key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
1570 return key not in lookup
1571 ns_cookies = filter(no_matching_rfc2965, ns_cookies)
1572
1573 if ns_cookies:
1574 cookies.extend(ns_cookies)
1575
1576 return cookies
1577
1578 def set_cookie_if_ok(self, cookie, request):
1579 """Set a cookie if policy says it's OK to do so."""
1580 self._cookies_lock.acquire()
1581 self._policy._now = self._now = int(time.time())
1582
1583 if self._policy.set_ok(cookie, request):
1584 self.set_cookie(cookie)
1585
1586 self._cookies_lock.release()
1587
1588 def set_cookie(self, cookie):
1589 """Set a cookie, without checking whether or not it should be set."""
1590 c = self._cookies
1591 self._cookies_lock.acquire()
1592 try:
1593 if cookie.domain not in c: c[cookie.domain] = {}
1594 c2 = c[cookie.domain]
1595 if cookie.path not in c2: c2[cookie.path] = {}
1596 c3 = c2[cookie.path]
1597 c3[cookie.name] = cookie
1598 finally:
1599 self._cookies_lock.release()
1600
1601 def extract_cookies(self, response, request):
1602 """Extract cookies from response, where allowable given the request."""
1603 debug("extract_cookies: %s", response.info())
1604 self._cookies_lock.acquire()
1605 self._policy._now = self._now = int(time.time())
1606
1607 for cookie in self.make_cookies(response, request):
1608 if self._policy.set_ok(cookie, request):
1609 debug(" setting cookie: %s", cookie)
1610 self.set_cookie(cookie)
1611 self._cookies_lock.release()
1612
1613 def clear(self, domain=None, path=None, name=None):
1614 """Clear some cookies.
1615
1616 Invoking this method without arguments will clear all cookies. If
1617 given a single argument, only cookies belonging to that domain will be
1618 removed. If given two arguments, cookies belonging to the specified
1619 path within that domain are removed. If given three arguments, then
1620 the cookie with the specified name, path and domain is removed.
1621
1622 Raises KeyError if no matching cookie exists.
1623
1624 """
1625 if name is not None:
1626 if (domain is None) or (path is None):
1627 raise ValueError(
1628 "domain and path must be given to remove a cookie by name")
1629 del self._cookies[domain][path][name]
1630 elif path is not None:
1631 if domain is None:
1632 raise ValueError(
1633 "domain must be given to remove cookies by path")
1634 del self._cookies[domain][path]
1635 elif domain is not None:
1636 del self._cookies[domain]
1637 else:
1638 self._cookies = {}
1639
1640 def clear_session_cookies(self):
1641 """Discard all session cookies.
1642
1643 Note that the .save() method won't save session cookies anyway, unless
1644 you ask otherwise by passing a true ignore_discard argument.
1645
1646 """
1647 self._cookies_lock.acquire()
1648 for cookie in self:
1649 if cookie.discard:
1650 self.clear(cookie.domain, cookie.path, cookie.name)
1651 self._cookies_lock.release()
1652
1653 def clear_expired_cookies(self):
1654 """Discard all expired cookies.
1655
1656 You probably don't need to call this method: expired cookies are never
1657 sent back to the server (provided you're using DefaultCookiePolicy),
1658 this method is called by CookieJar itself every so often, and the
1659 .save() method won't save expired cookies anyway (unless you ask
1660 otherwise by passing a true ignore_expires argument).
1661
1662 """
1663 self._cookies_lock.acquire()
1664 now = time.time()
1665 for cookie in self:
1666 if cookie.is_expired(now):
1667 self.clear(cookie.domain, cookie.path, cookie.name)
1668 self._cookies_lock.release()
1669
1670 def __iter__(self):
1671 return deepvalues(self._cookies)
1672
1673 def __len__(self):
1674 """Return number of contained cookies."""
1675 i = 0
1676 for cookie in self: i = i + 1
1677 return i
1678
1679 def __repr__(self):
1680 r = []
1681 for cookie in self: r.append(repr(cookie))
1682 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1683
1684 def __str__(self):
1685 r = []
1686 for cookie in self: r.append(str(cookie))
1687 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1688
1689
1690class LoadError(Exception): pass
1691
1692class FileCookieJar(CookieJar):
1693 """CookieJar that can be loaded from and saved to a file."""
1694
1695 def __init__(self, filename=None, delayload=False, policy=None):
1696 """
1697 Cookies are NOT loaded from the named file until either the .load() or
1698 .revert() method is called.
1699
1700 """
1701 CookieJar.__init__(self, policy)
1702 if filename is not None:
1703 try:
1704 filename+""
1705 except:
1706 raise ValueError("filename must be string-like")
1707 self.filename = filename
1708 self.delayload = bool(delayload)
1709
1710 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1711 """Save cookies to a file."""
1712 raise NotImplementedError()
1713
1714 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1715 """Load cookies from a file."""
1716 if filename is None:
1717 if self.filename is not None: filename = self.filename
1718 else: raise ValueError(MISSING_FILENAME_TEXT)
1719
1720 f = open(filename)
1721 try:
1722 self._really_load(f, filename, ignore_discard, ignore_expires)
1723 finally:
1724 f.close()
1725
1726 def revert(self, filename=None,
1727 ignore_discard=False, ignore_expires=False):
1728 """Clear all cookies and reload cookies from a saved file.
1729
1730 Raises LoadError (or IOError) if reversion is not successful; the
1731 object's state will not be altered if this happens.
1732
1733 """
1734 if filename is None:
1735 if self.filename is not None: filename = self.filename
1736 else: raise ValueError(MISSING_FILENAME_TEXT)
1737
1738 self._cookies_lock.acquire()
1739
1740 old_state = copy.deepcopy(self._cookies)
1741 self._cookies = {}
1742 try:
1743 self.load(filename, ignore_discard, ignore_expires)
1744 except (LoadError, IOError):
1745 self._cookies = old_state
1746 raise
1747
1748 self._cookies_lock.release()
1749
1750from _LWPCookieJar import LWPCookieJar, lwp_cookie_str
1751from _MozillaCookieJar import MozillaCookieJar