git.subgeniuskitty.com - OpenSPARC-T2-SAM/.git/blame - sam-t2/devtools/v9/lib/python2.4/cookielib.py

Commit	Line	Data
920dae64 AT	1	"""HTTP cookie handling for web clients.
	2
	3	This module has (now fairly distant) origins in Gisle Aas' Perl module
	4	HTTP::Cookies, from the libwww-perl library.
	5
	6	Docstrings, comments and debug strings in this code refer to the
	7	attributes of the HTTP cookie system as cookie-attributes, to distinguish
	8	them clearly from Python attributes.
	9
	10	Class diagram (note that the classes which do not derive from
	11	FileCookieJar are not distributed with the Python standard library, but
	12	are available from http://wwwsearch.sf.net/):
	13
	14	CookieJar____
	15	/ \ \
	16	FileCookieJar \ \
	17	/ \| \ \ \
	18	MozillaCookieJar \| LWPCookieJar \ \
	19	\| \| \
	20	\| ---MSIEBase \| \
	21	\| / \| \| \
	22	\| / MSIEDBCookieJar BSDDBCookieJar
	23	\|/
	24	MSIECookieJar
	25
	26	"""
	27
	28	import sys, re, urlparse, copy, time, urllib, logging
	29	from types import StringTypes
	30	try:
	31	import threading as _threading
	32	except ImportError:
	33	import dummy_threading as _threading
	34	import httplib # only for the default HTTP port
	35	from calendar import timegm
	36
	37	debug = logging.getLogger("cookielib").debug
	38
	39	DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
	40	MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
	41	"instance initialised with one)")
	42
	43	def reraise_unmasked_exceptions(unmasked=()):
	44	# There are a few catch-all except: statements in this module, for
	45	# catching input that's bad in unexpected ways.
	46	# This function re-raises some exceptions we don't want to trap.
	47	unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError)
	48	etype = sys.exc_info()[0]
	49	if issubclass(etype, unmasked):
	50	raise
	51	# swallowed an exception
	52	import warnings, traceback, StringIO
	53	f = StringIO.StringIO()
	54	traceback.print_exc(None, f)
	55	msg = f.getvalue()
	56	warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2)
	57
	58
	59	# Date/time conversion
	60	# -----------------------------------------------------------------------------
	61
	62	EPOCH_YEAR = 1970
	63	def _timegm(tt):
	64	year, month, mday, hour, min, sec = tt[:6]
65	if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
66	(0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
67	return timegm(tt)
68	else:
69	return None
70
71	DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
72	MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
73	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
74	MONTHS_LOWER = []
75	for month in MONTHS: MONTHS_LOWER.append(month.lower())
76
77	def time2isoz(t=None):
78	"""Return a string representing time in seconds since epoch, t.
79
80	If the function is called without an argument, it will use the current
81	time.
82
83	The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
84	representing Universal Time (UTC, aka GMT). An example of this format is:
85
86	1994-11-24 08:49:37Z
87
88	"""
89	if t is None: t = time.time()
90	year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
91	return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
92	year, mon, mday, hour, min, sec)
93
94	def time2netscape(t=None):
95	"""Return a string representing time in seconds since epoch, t.
96
97	If the function is called without an argument, it will use the current
98	time.
99
100	The format of the returned string is like this:
101
102	Wed, DD-Mon-YYYY HH:MM:SS GMT
103
104	"""
105	if t is None: t = time.time()
106	year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
107	return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
108	DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec)
109
110
111	UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
112
113	TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
114	def offset_from_tz_string(tz):
115	offset = None
116	if tz in UTC_ZONES:
117	offset = 0
118	else:
119	m = TIMEZONE_RE.search(tz)
120	if m:
121	offset = 3600 * int(m.group(2))
122	if m.group(3):
123	offset = offset + 60 * int(m.group(3))
124	if m.group(1) == '-':
125	offset = -offset
126	return offset
127
128	def _str2time(day, mon, yr, hr, min, sec, tz):
129	# translate month name to number
130	# month numbers start with 1 (January)
131	try:
132	mon = MONTHS_LOWER.index(mon.lower())+1
133	except ValueError:
134	# maybe it's already a number
135	try:
136	imon = int(mon)
137	except ValueError:
138	return None
139	if 1 <= imon <= 12:
140	mon = imon
141	else:
142	return None
143
144	# make sure clock elements are defined
145	if hr is None: hr = 0
146	if min is None: min = 0
147	if sec is None: sec = 0
148
149	yr = int(yr)
150	day = int(day)
151	hr = int(hr)
152	min = int(min)
153	sec = int(sec)
154
155	if yr < 1000:
156	# find "obvious" year
157	cur_yr = time.localtime(time.time())[0]
158	m = cur_yr % 100
159	tmp = yr
160	yr = yr + cur_yr - m
161	m = m - tmp
162	if abs(m) > 50:
163	if m > 0: yr = yr + 100
164	else: yr = yr - 100
165
166	# convert UTC time tuple to seconds since epoch (not timezone-adjusted)
167	t = _timegm((yr, mon, day, hr, min, sec, tz))
168
169	if t is not None:
170	# adjust time using timezone string, to get absolute time since epoch
171	if tz is None:
172	tz = "UTC"
173	tz = tz.upper()
174	offset = offset_from_tz_string(tz)
175	if offset is None:
176	return None
177	t = t - offset
178
179	return t
180
181	STRICT_DATE_RE = re.compile(
182	r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
183	"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
184	WEEKDAY_RE = re.compile(
185	r"^(?:Sun\|Mon\|Tue\|Wed\|Thu\|Fri\|Sat)[a-z],?\s", re.I)
186	LOOSE_HTTP_DATE_RE = re.compile(
187	r"""^
188	(\d\d?) # day
189	(?:\s+\|[-\/])
190	(\w+) # month
191	(?:\s+\|[-\/])
192	(\d+) # year
193	(?:
194	(?:\s+\|:) # separator before clock
195	(\d\d?):(\d\d) # hour:min
196	(?::(\d\d))? # optional seconds
197	)? # optional clock
198	\s*
199	([-+]?\d{2,4}\|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
200	\s*
201	(?:$\w+$)? # ASCII representation of timezone in parens.
202	\s*$""", re.X)
203	def http2time(text):
204	"""Returns time in seconds since epoch of time represented by a string.
205
206	Return value is an integer.
207
208	None is returned if the format of str is unrecognized, the time is outside
209	the representable range, or the timezone string is not recognized. If the
210	string contains no timezone, UTC is assumed.
211
212	The timezone in the string may be numerical (like "-0800" or "+0100") or a
213	string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
214	timezone strings equivalent to UTC (zero offset) are known to the function.
215
216	The function loosely parses the following formats:
217
218	Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
219	Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
220	Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
221	09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
222	08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
223	08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
224
225	The parser ignores leading and trailing whitespace. The time may be
226	absent.
227
228	If the year is given with only 2 digits, the function will select the
229	century that makes the year closest to the current date.
230
231	"""
232	# fast exit for strictly conforming string
233	m = STRICT_DATE_RE.search(text)
234	if m:
235	g = m.groups()
236	mon = MONTHS_LOWER.index(g[1].lower()) + 1
237	tt = (int(g[2]), mon, int(g[0]),
238	int(g[3]), int(g[4]), float(g[5]))
239	return _timegm(tt)
240
241	# No, we need some messy parsing...
242
243	# clean up
244	text = text.lstrip()
245	text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
246
247	# tz is time zone specifier string
248	day, mon, yr, hr, min, sec, tz = [None]*7
249
250	# loose regexp parse
251	m = LOOSE_HTTP_DATE_RE.search(text)
252	if m is not None:
253	day, mon, yr, hr, min, sec, tz = m.groups()
254	else:
255	return None # bad format
256
257	return _str2time(day, mon, yr, hr, min, sec, tz)
258
259	ISO_DATE_RE = re.compile(
260	"""^
261	(\d{4}) # year
262	[-\/]?
263	(\d\d?) # numerical month
264	[-\/]?
265	(\d\d?) # day
266	(?:
267	(?:\s+\|[-:Tt]) # separator before clock
268	(\d\d?):?(\d\d) # hour:min
269	(?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
270	)? # optional clock
271	\s*
272	([-+]?\d\d?:?(:?\d\d)?
273	\|Z\|z)? # timezone (Z is "zero meridian", i.e. GMT)
274	\s*$""", re.X)
275	def iso2time(text):
276	"""
277	As for http2time, but parses the ISO 8601 formats:
278
279	1994-02-03 14:15:29 -0100 -- ISO 8601 format
280	1994-02-03 14:15:29 -- zone is optional
281	1994-02-03 -- only date
282	1994-02-03T14:15:29 -- Use T as separator
283	19940203T141529Z -- ISO 8601 compact format
284	19940203 -- only date
285
286	"""
287	# clean up
288	text = text.lstrip()
289
290	# tz is time zone specifier string
291	day, mon, yr, hr, min, sec, tz = [None]*7
292
293	# loose regexp parse
294	m = ISO_DATE_RE.search(text)
295	if m is not None:
296	# XXX there's an extra bit of the timezone I'm ignoring here: is
297	# this the right thing to do?
298	yr, mon, day, hr, min, sec, tz, _ = m.groups()
299	else:
300	return None # bad format
301
302	return _str2time(day, mon, yr, hr, min, sec, tz)
303
304
305	# Header parsing
306	# -----------------------------------------------------------------------------
307
308	def unmatched(match):
309	"""Return unmatched part of re.Match object."""
310	start, end = match.span(0)
311	return match.string[:start]+match.string[end:]
312
313	HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
314	HEADER_QUOTED_VALUE_RE = re.compile(r"^\s=\s\"([^\"\\](?:\\.[^\"\\])*)\"")
315	HEADER_VALUE_RE = re.compile(r"^\s=\s([^\s;,]*)")
316	HEADER_ESCAPE_RE = re.compile(r"\\(.)")
317	def split_header_words(header_values):
318	r"""Parse header values into a list of lists containing key,value pairs.
319
320	The function knows how to deal with ",", ";" and "=" as well as quoted
321	values after "=". A list of space separated tokens are parsed as if they
322	were separated by ";".
323
324	If the header_values passed as argument contains multiple values, then they
325	are treated as if they were a single value separated by comma ",".
326
327	This means that this function is useful for parsing header fields that
328	follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
329	the requirement for tokens).
330
331	headers = #header
332	header = (token \| parameter) *( [";"] (token \| parameter))
333
334	token = 1*<any CHAR except CTLs or separators>
335	separators = "(" \| ")" \| "<" \| ">" \| "@"
336	\| "," \| ";" \| ":" \| "\" \| <">
337	\| "/" \| "[" \| "]" \| "?" \| "="
338	\| "{" \| "}" \| SP \| HT
339
340	quoted-string = ( <"> *(qdtext \| quoted-pair ) <"> )
341	qdtext = <any TEXT except <">>
342	quoted-pair = "\" CHAR
343
344	parameter = attribute "=" value
345	attribute = token
346	value = token \| quoted-string
347
348	Each header is represented by a list of key/value pairs. The value for a
349	simple token (not part of a parameter) is None. Syntactically incorrect
350	headers will not necessarily be parsed as you would want.
351
352	This is easier to describe with some examples:
353
354	>>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
355	[[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
356	>>> split_header_words(['text/html; charset="iso-8859-1"'])
357	[[('text/html', None), ('charset', 'iso-8859-1')]]
358	>>> split_header_words([r'Basic realm="\"foo\bar\""'])
359	[[('Basic', None), ('realm', '"foobar"')]]
360
361	"""
362	assert type(header_values) not in StringTypes
363	result = []
364	for text in header_values:
365	orig_text = text
366	pairs = []
367	while text:
368	m = HEADER_TOKEN_RE.search(text)
369	if m:
370	text = unmatched(m)
371	name = m.group(1)
372	m = HEADER_QUOTED_VALUE_RE.search(text)
373	if m: # quoted value
374	text = unmatched(m)
375	value = m.group(1)
376	value = HEADER_ESCAPE_RE.sub(r"\1", value)
377	else:
378	m = HEADER_VALUE_RE.search(text)
379	if m: # unquoted value
380	text = unmatched(m)
381	value = m.group(1)
382	value = value.rstrip()
383	else:
384	# no value, a lone token
385	value = None
386	pairs.append((name, value))
387	elif text.lstrip().startswith(","):
388	# concatenated headers, as per RFC 2616 section 4.2
389	text = text.lstrip()[1:]
390	if pairs: result.append(pairs)
391	pairs = []
392	else:
393	# skip junk
394	non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
395	assert nr_junk_chars > 0, (
396	"split_header_words bug: '%s', '%s', %s" %
397	(orig_text, text, pairs))
398	text = non_junk
399	if pairs: result.append(pairs)
400	return result
401
402	HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
403	def join_header_words(lists):
404	"""Do the inverse (almost) of the conversion done by split_header_words.
405
406	Takes a list of lists of (key, value) pairs and produces a single header
407	value. Attribute values are quoted if needed.
408
409	>>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
410	'text/plain; charset="iso-8859/1"'
411	>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
412	'text/plain, charset="iso-8859/1"'
413
414	"""
415	headers = []
416	for pairs in lists:
417	attr = []
418	for k, v in pairs:
419	if v is not None:
420	if not re.search(r"^\w+$", v):
421	v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
422	v = '"%s"' % v
423	k = "%s=%s" % (k, v)
424	attr.append(k)
425	if attr: headers.append("; ".join(attr))
426	return ", ".join(headers)
427
428	def parse_ns_headers(ns_headers):
429	"""Ad-hoc parser for Netscape protocol cookie-attributes.
430
431	The old Netscape cookie format for Set-Cookie can for instance contain
432	an unquoted "," in the expires field, so we have to use this ad-hoc
433	parser instead of split_header_words.
434
435	XXX This may not make the best possible effort to parse all the crap
436	that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
437	parser is probably better, so could do worse than following that if
438	this ever gives any trouble.
439
440	Currently, this is also used for parsing RFC 2109 cookies.
441
442	"""
443	known_attrs = ("expires", "domain", "path", "secure",
444	# RFC 2109 attrs (may turn up in Netscape cookies, too)
445	"port", "max-age")
446
447	result = []
448	for ns_header in ns_headers:
449	pairs = []
450	version_set = False
451	for ii, param in enumerate(re.split(r";\s*", ns_header)):
452	param = param.rstrip()
453	if param == "": continue
454	if "=" not in param:
455	k, v = param, None
456	else:
457	k, v = re.split(r"\s=\s", param, 1)
458	k = k.lstrip()
459	if ii != 0:
460	lc = k.lower()
461	if lc in known_attrs:
462	k = lc
463	if k == "version":
464	# This is an RFC 2109 cookie. Will be treated as RFC 2965
465	# cookie in rest of code.
466	# Probably it should be parsed with split_header_words, but
467	# that's too much hassle.
468	version_set = True
469	if k == "expires":
470	# convert expires date to seconds since epoch
471	if v.startswith('"'): v = v[1:]
472	if v.endswith('"'): v = v[:-1]
473	v = http2time(v) # None if invalid
474	pairs.append((k, v))
475
476	if pairs:
477	if not version_set:
478	pairs.append(("version", "0"))
479	result.append(pairs)
480
481	return result
482
483
484	IPV4_RE = re.compile(r"\.\d+$")
485	def is_HDN(text):
486	"""Return True if text is a host domain name."""
487	# XXX
488	# This may well be wrong. Which RFC is HDN defined in, if any (for
489	# the purposes of RFC 2965)?
490	# For the current implementation, what about IPv6? Remember to look
491	# at other uses of IPV4_RE also, if change this.
492	if IPV4_RE.search(text):
493	return False
494	if text == "":
495	return False
496	if text[0] == "." or text[-1] == ".":
497	return False
498	return True
499
500	def domain_match(A, B):
501	"""Return True if domain A domain-matches domain B, according to RFC 2965.
502
503	A and B may be host domain names or IP addresses.
504
505	RFC 2965, section 1:
506
507	Host names can be specified either as an IP address or a HDN string.
508	Sometimes we compare one host name with another. (Such comparisons SHALL
509	be case-insensitive.) Host A's name domain-matches host B's if
510
511	* their host name strings string-compare equal; or
512
513	* A is a HDN string and has the form NB, where N is a non-empty
514	name string, B has the form .B', and B' is a HDN string. (So,
515	x.y.com domain-matches .Y.com but not Y.com.)
516
517	Note that domain-match is not a commutative operation: a.b.c.com
518	domain-matches .c.com, but not the reverse.
519
520	"""
521	# Note that, if A or B are IP addresses, the only relevant part of the
522	# definition of the domain-match algorithm is the direct string-compare.
523	A = A.lower()
524	B = B.lower()
525	if A == B:
526	return True
527	if not is_HDN(A):
528	return False
529	i = A.rfind(B)
530	if i == -1 or i == 0:
531	# A does not have form NB, or N is the empty string
532	return False
533	if not B.startswith("."):
534	return False
535	if not is_HDN(B[1:]):
536	return False
537	return True
538
539	def liberal_is_HDN(text):
540	"""Return True if text is a sort-of-like a host domain name.
541
542	For accepting/blocking domains.
543
544	"""
545	if IPV4_RE.search(text):
546	return False
547	return True
548
549	def user_domain_match(A, B):
550	"""For blocking/accepting domains.
551
552	A and B may be host domain names or IP addresses.
553
554	"""
555	A = A.lower()
556	B = B.lower()
557	if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
558	if A == B:
559	# equal IP addresses
560	return True
561	return False
562	initial_dot = B.startswith(".")
563	if initial_dot and A.endswith(B):
564	return True
565	if not initial_dot and A == B:
566	return True
567	return False
568
569	cut_port_re = re.compile(r":\d+$")
570	def request_host(request):
571	"""Return request-host, as defined by RFC 2965.
572
573	Variation from RFC: returned value is lowercased, for convenient
574	comparison.
575
576	"""
577	url = request.get_full_url()
578	host = urlparse.urlparse(url)[1]
579	if host == "":
580	host = request.get_header("Host", "")
581
582	# remove port, if present
583	host = cut_port_re.sub("", host, 1)
584	return host.lower()
585
586	def eff_request_host(request):
587	"""Return a tuple (request-host, effective request-host name).
588
589	As defined by RFC 2965, except both are lowercased.
590
591	"""
592	erhn = req_host = request_host(request)
593	if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
594	erhn = req_host + ".local"
595	return req_host, erhn
596
597	def request_path(request):
598	"""request-URI, as defined by RFC 2965."""
599	url = request.get_full_url()
600	#scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
601	#req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
602	path, parameters, query, frag = urlparse.urlparse(url)[2:]
603	if parameters:
604	path = "%s;%s" % (path, parameters)
605	path = escape_path(path)
606	req_path = urlparse.urlunparse(("", "", path, "", query, frag))
607	if not req_path.startswith("/"):
608	# fix bad RFC 2396 absoluteURI
609	req_path = "/"+req_path
610	return req_path
611
612	def request_port(request):
613	host = request.get_host()
614	i = host.find(':')
615	if i >= 0:
616	port = host[i+1:]
617	try:
618	int(port)
619	except ValueError:
620	debug("nonnumeric port: '%s'", port)
621	return None
622	else:
623	port = DEFAULT_HTTP_PORT
624	return port
625
626	# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
627	# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
628	HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
629	ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
630	def uppercase_escaped_char(match):
631	return "%%%s" % match.group(1).upper()
632	def escape_path(path):
633	"""Escape any invalid characters in HTTP URL, and uppercase all escapes."""
634	# There's no knowing what character encoding was used to create URLs
635	# containing %-escapes, but since we have to pick one to escape invalid
636	# path characters, we pick UTF-8, as recommended in the HTML 4.0
637	# specification:
638	# http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
639	# And here, kind of: draft-fielding-uri-rfc2396bis-03
640	# (And in draft IRI specification: draft-duerst-iri-05)
641	# (And here, for new URI schemes: RFC 2718)
642	if isinstance(path, unicode):
643	path = path.encode("utf-8")
644	path = urllib.quote(path, HTTP_PATH_SAFE)
645	path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
646	return path
647
648	def reach(h):
649	"""Return reach of host h, as defined by RFC 2965, section 1.
650
651	The reach R of a host name H is defined as follows:
652
653	* If
654
655	- H is the host domain name of a host; and,
656
657	- H has the form A.B; and
658
659	- A has no embedded (that is, interior) dots; and
660
661	- B has at least one embedded dot, or B is the string "local".
662	then the reach of H is .B.
663
664	* Otherwise, the reach of H is H.
665
666	>>> reach("www.acme.com")
667	'.acme.com'
668	>>> reach("acme.com")
669	'acme.com'
670	>>> reach("acme.local")
671	'.local'
672
673	"""
674	i = h.find(".")
675	if i >= 0:
676	#a = h[:i] # this line is only here to show what a is
677	b = h[i+1:]
678	i = b.find(".")
679	if is_HDN(h) and (i >= 0 or b == "local"):
680	return "."+b
681	return h
682
683	def is_third_party(request):
684	"""
685
686	RFC 2965, section 3.3.6:
687
688	An unverifiable transaction is to a third-party host if its request-
689	host U does not domain-match the reach R of the request-host O in the
690	origin transaction.
691
692	"""
693	req_host = request_host(request)
694	if not domain_match(req_host, reach(request.get_origin_req_host())):
695	return True
696	else:
697	return False
698
699
700	class Cookie:
701	"""HTTP Cookie.
702
703	This class represents both Netscape and RFC 2965 cookies.
704
705	This is deliberately a very simple class. It just holds attributes. It's
706	possible to construct Cookie instances that don't comply with the cookie
707	standards. CookieJar.make_cookies is the factory function for Cookie
708	objects -- it deals with cookie parsing, supplying defaults, and
709	normalising to the representation used in this class. CookiePolicy is
710	responsible for checking them to see whether they should be accepted from
711	and returned to the server.
712
713	Note that the port may be present in the headers, but unspecified ("Port"
714	rather than"Port=80", for example); if this is the case, port is None.
715
716	"""
717
718	def __init__(self, version, name, value,
719	port, port_specified,
720	domain, domain_specified, domain_initial_dot,
721	path, path_specified,
722	secure,
723	expires,
724	discard,
725	comment,
726	comment_url,
727	rest):
728
729	if version is not None: version = int(version)
730	if expires is not None: expires = int(expires)
731	if port is None and port_specified is True:
732	raise ValueError("if port is None, port_specified must be false")
733
734	self.version = version
735	self.name = name
736	self.value = value
737	self.port = port
738	self.port_specified = port_specified
739	# normalise case, as per RFC 2965 section 3.3.3
740	self.domain = domain.lower()
741	self.domain_specified = domain_specified
742	# Sigh. We need to know whether the domain given in the
743	# cookie-attribute had an initial dot, in order to follow RFC 2965
744	# (as clarified in draft errata). Needed for the returned $Domain
745	# value.
746	self.domain_initial_dot = domain_initial_dot
747	self.path = path
748	self.path_specified = path_specified
749	self.secure = secure
750	self.expires = expires
751	self.discard = discard
752	self.comment = comment
753	self.comment_url = comment_url
754
755	self._rest = copy.copy(rest)
756
757	def has_nonstandard_attr(self, name):
758	return name in self._rest
759	def get_nonstandard_attr(self, name, default=None):
760	return self._rest.get(name, default)
761	def set_nonstandard_attr(self, name, value):
762	self._rest[name] = value
763
764	def is_expired(self, now=None):
765	if now is None: now = time.time()
766	if (self.expires is not None) and (self.expires <= now):
767	return True
768	return False
769
770	def __str__(self):
771	if self.port is None: p = ""
772	else: p = ":"+self.port
773	limit = self.domain + p + self.path
774	if self.value is not None:
775	namevalue = "%s=%s" % (self.name, self.value)
776	else:
777	namevalue = self.name
778	return "<Cookie %s for %s>" % (namevalue, limit)
779
780	def __repr__(self):
781	args = []
782	for name in ["version", "name", "value",
783	"port", "port_specified",
784	"domain", "domain_specified", "domain_initial_dot",
785	"path", "path_specified",
786	"secure", "expires", "discard", "comment", "comment_url",
787	]:
788	attr = getattr(self, name)
789	args.append("%s=%s" % (name, repr(attr)))
790	args.append("rest=%s" % repr(self._rest))
791	return "Cookie(%s)" % ", ".join(args)
792
793
794	class CookiePolicy:
795	"""Defines which cookies get accepted from and returned to server.
796
797	May also modify cookies, though this is probably a bad idea.
798
799	The subclass DefaultCookiePolicy defines the standard rules for Netscape
800	and RFC 2965 cookies -- override that if you want a customised policy.
801
802	"""
803	def set_ok(self, cookie, request):
804	"""Return true if (and only if) cookie should be accepted from server.
805
806	Currently, pre-expired cookies never get this far -- the CookieJar
807	class deletes such cookies itself.
808
809	"""
810	raise NotImplementedError()
811
812	def return_ok(self, cookie, request):
813	"""Return true if (and only if) cookie should be returned to server."""
814	raise NotImplementedError()
815
816	def domain_return_ok(self, domain, request):
817	"""Return false if cookies should not be returned, given cookie domain.
818	"""
819	return True
820
821	def path_return_ok(self, path, request):
822	"""Return false if cookies should not be returned, given cookie path.
823	"""
824	return True
825
826
827	class DefaultCookiePolicy(CookiePolicy):
828	"""Implements the standard rules for accepting and returning cookies."""
829
830	DomainStrictNoDots = 1
831	DomainStrictNonDomain = 2
832	DomainRFC2965Match = 4
833
834	DomainLiberal = 0
835	DomainStrict = DomainStrictNoDots\|DomainStrictNonDomain
836
837	def __init__(self,
838	blocked_domains=None, allowed_domains=None,
839	netscape=True, rfc2965=False,
840	hide_cookie2=False,
841	strict_domain=False,
842	strict_rfc2965_unverifiable=True,
843	strict_ns_unverifiable=False,
844	strict_ns_domain=DomainLiberal,
845	strict_ns_set_initial_dollar=False,
846	strict_ns_set_path=False,
847	):
848	"""Constructor arguments should be passed as keyword arguments only."""
849	self.netscape = netscape
850	self.rfc2965 = rfc2965
851	self.hide_cookie2 = hide_cookie2
852	self.strict_domain = strict_domain
853	self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
854	self.strict_ns_unverifiable = strict_ns_unverifiable
855	self.strict_ns_domain = strict_ns_domain
856	self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
857	self.strict_ns_set_path = strict_ns_set_path
858
859	if blocked_domains is not None:
860	self._blocked_domains = tuple(blocked_domains)
861	else:
862	self._blocked_domains = ()
863
864	if allowed_domains is not None:
865	allowed_domains = tuple(allowed_domains)
866	self._allowed_domains = allowed_domains
867
868	def blocked_domains(self):
869	"""Return the sequence of blocked domains (as a tuple)."""
870	return self._blocked_domains
871	def set_blocked_domains(self, blocked_domains):
872	"""Set the sequence of blocked domains."""
873	self._blocked_domains = tuple(blocked_domains)
874
875	def is_blocked(self, domain):
876	for blocked_domain in self._blocked_domains:
877	if user_domain_match(domain, blocked_domain):
878	return True
879	return False
880
881	def allowed_domains(self):
882	"""Return None, or the sequence of allowed domains (as a tuple)."""
883	return self._allowed_domains
884	def set_allowed_domains(self, allowed_domains):
885	"""Set the sequence of allowed domains, or None."""
886	if allowed_domains is not None:
887	allowed_domains = tuple(allowed_domains)
888	self._allowed_domains = allowed_domains
889
890	def is_not_allowed(self, domain):
891	if self._allowed_domains is None:
892	return False
893	for allowed_domain in self._allowed_domains:
894	if user_domain_match(domain, allowed_domain):
895	return False
896	return True
897
898	def set_ok(self, cookie, request):
899	"""
900	If you override .set_ok(), be sure to call this method. If it returns
901	false, so should your subclass (assuming your subclass wants to be more
902	strict about which cookies to accept).
903
904	"""
905	debug(" - checking cookie %s=%s", cookie.name, cookie.value)
906
907	assert cookie.name is not None
908
909	for n in "version", "verifiability", "name", "path", "domain", "port":
910	fn_name = "set_ok_"+n
911	fn = getattr(self, fn_name)
912	if not fn(cookie, request):
913	return False
914
915	return True
916
917	def set_ok_version(self, cookie, request):
918	if cookie.version is None:
919	# Version is always set to 0 by parse_ns_headers if it's a Netscape
920	# cookie, so this must be an invalid RFC 2965 cookie.
921	debug(" Set-Cookie2 without version attribute (%s=%s)",
922	cookie.name, cookie.value)
923	return False
924	if cookie.version > 0 and not self.rfc2965:
925	debug(" RFC 2965 cookies are switched off")
926	return False
927	elif cookie.version == 0 and not self.netscape:
928	debug(" Netscape cookies are switched off")
929	return False
930	return True
931
932	def set_ok_verifiability(self, cookie, request):
933	if request.is_unverifiable() and is_third_party(request):
934	if cookie.version > 0 and self.strict_rfc2965_unverifiable:
935	debug(" third-party RFC 2965 cookie during "
936	"unverifiable transaction")
937	return False
938	elif cookie.version == 0 and self.strict_ns_unverifiable:
939	debug(" third-party Netscape cookie during "
940	"unverifiable transaction")
941	return False
942	return True
943
944	def set_ok_name(self, cookie, request):
945	# Try and stop servers setting V0 cookies designed to hack other
946	# servers that know both V0 and V1 protocols.
947	if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
948	cookie.name.startswith("$")):
949	debug(" illegal name (starts with '$'): '%s'", cookie.name)
950	return False
951	return True
952
953	def set_ok_path(self, cookie, request):
954	if cookie.path_specified:
955	req_path = request_path(request)
956	if ((cookie.version > 0 or
957	(cookie.version == 0 and self.strict_ns_set_path)) and
958	not req_path.startswith(cookie.path)):
959	debug(" path attribute %s is not a prefix of request "
960	"path %s", cookie.path, req_path)
961	return False
962	return True
963
964	def set_ok_domain(self, cookie, request):
965	if self.is_blocked(cookie.domain):
966	debug(" domain %s is in user block-list", cookie.domain)
967	return False
968	if self.is_not_allowed(cookie.domain):
969	debug(" domain %s is not in user allow-list", cookie.domain)
970	return False
971	if cookie.domain_specified:
972	req_host, erhn = eff_request_host(request)
973	domain = cookie.domain
974	if self.strict_domain and (domain.count(".") >= 2):
975	i = domain.rfind(".")
976	j = domain.rfind(".", 0, i)
977	if j == 0: # domain like .foo.bar
978	tld = domain[i+1:]
979	sld = domain[j+1:i]
980	if (sld.lower() in [
981	"co", "ac",
982	"com", "edu", "org", "net", "gov", "mil", "int"] and
983	len(tld) == 2):
984	# domain like .co.uk
985	debug(" country-code second level domain %s", domain)
986	return False
987	if domain.startswith("."):
988	undotted_domain = domain[1:]
989	else:
990	undotted_domain = domain
991	embedded_dots = (undotted_domain.find(".") >= 0)
992	if not embedded_dots and domain != ".local":
993	debug(" non-local domain %s contains no embedded dot",
994	domain)
995	return False
996	if cookie.version == 0:
997	if (not erhn.endswith(domain) and
998	(not erhn.startswith(".") and
999	not ("."+erhn).endswith(domain))):
1000	debug(" effective request-host %s (even with added "
1001	"initial dot) does not end end with %s",
1002	erhn, domain)
1003	return False
1004	if (cookie.version > 0 or
1005	(self.strict_ns_domain & self.DomainRFC2965Match)):
1006	if not domain_match(erhn, domain):
1007	debug(" effective request-host %s does not domain-match "
1008	"%s", erhn, domain)
1009	return False
1010	if (cookie.version > 0 or
1011	(self.strict_ns_domain & self.DomainStrictNoDots)):
1012	host_prefix = req_host[:-len(domain)]
1013	if (host_prefix.find(".") >= 0 and
1014	not IPV4_RE.search(req_host)):
1015	debug(" host prefix %s for domain %s contains a dot",
1016	host_prefix, domain)
1017	return False
1018	return True
1019
1020	def set_ok_port(self, cookie, request):
1021	if cookie.port_specified:
1022	req_port = request_port(request)
1023	if req_port is None:
1024	req_port = "80"
1025	else:
1026	req_port = str(req_port)
1027	for p in cookie.port.split(","):
1028	try:
1029	int(p)
1030	except ValueError:
1031	debug(" bad port %s (not numeric)", p)
1032	return False
1033	if p == req_port:
1034	break
1035	else:
1036	debug(" request port (%s) not found in %s",
1037	req_port, cookie.port)
1038	return False
1039	return True
1040
1041	def return_ok(self, cookie, request):
1042	"""
1043	If you override .return_ok(), be sure to call this method. If it
1044	returns false, so should your subclass (assuming your subclass wants to
1045	be more strict about which cookies to return).
1046
1047	"""
1048	# Path has already been checked by .path_return_ok(), and domain
1049	# blocking done by .domain_return_ok().
1050	debug(" - checking cookie %s=%s", cookie.name, cookie.value)
1051
1052	for n in "version", "verifiability", "secure", "expires", "port", "domain":
1053	fn_name = "return_ok_"+n
1054	fn = getattr(self, fn_name)
1055	if not fn(cookie, request):
1056	return False
1057	return True
1058
1059	def return_ok_version(self, cookie, request):
1060	if cookie.version > 0 and not self.rfc2965:
1061	debug(" RFC 2965 cookies are switched off")
1062	return False
1063	elif cookie.version == 0 and not self.netscape:
1064	debug(" Netscape cookies are switched off")
1065	return False
1066	return True
1067
1068	def return_ok_verifiability(self, cookie, request):
1069	if request.is_unverifiable() and is_third_party(request):
1070	if cookie.version > 0 and self.strict_rfc2965_unverifiable:
1071	debug(" third-party RFC 2965 cookie during unverifiable "
1072	"transaction")
1073	return False
1074	elif cookie.version == 0 and self.strict_ns_unverifiable:
1075	debug(" third-party Netscape cookie during unverifiable "
1076	"transaction")
1077	return False
1078	return True
1079
1080	def return_ok_secure(self, cookie, request):
1081	if cookie.secure and request.get_type() != "https":
1082	debug(" secure cookie with non-secure request")
1083	return False
1084	return True
1085
1086	def return_ok_expires(self, cookie, request):
1087	if cookie.is_expired(self._now):
1088	debug(" cookie expired")
1089	return False
1090	return True
1091
1092	def return_ok_port(self, cookie, request):
1093	if cookie.port:
1094	req_port = request_port(request)
1095	if req_port is None:
1096	req_port = "80"
1097	for p in cookie.port.split(","):
1098	if p == req_port:
1099	break
1100	else:
1101	debug(" request port %s does not match cookie port %s",
1102	req_port, cookie.port)
1103	return False
1104	return True
1105
1106	def return_ok_domain(self, cookie, request):
1107	req_host, erhn = eff_request_host(request)
1108	domain = cookie.domain
1109
1110	# strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
1111	if (cookie.version == 0 and
1112	(self.strict_ns_domain & self.DomainStrictNonDomain) and
1113	not cookie.domain_specified and domain != erhn):
1114	debug(" cookie with unspecified domain does not string-compare "
1115	"equal to request domain")
1116	return False
1117
1118	if cookie.version > 0 and not domain_match(erhn, domain):
1119	debug(" effective request-host name %s does not domain-match "
1120	"RFC 2965 cookie domain %s", erhn, domain)
1121	return False
1122	if cookie.version == 0 and not ("."+erhn).endswith(domain):
1123	debug(" request-host %s does not match Netscape cookie domain "
1124	"%s", req_host, domain)
1125	return False
1126	return True
1127
1128	def domain_return_ok(self, domain, request):
1129	# Liberal check of. This is here as an optimization to avoid
1130	# having to load lots of MSIE cookie files unless necessary.
1131	req_host, erhn = eff_request_host(request)
1132	if not req_host.startswith("."):
1133	req_host = "."+req_host
1134	if not erhn.startswith("."):
1135	erhn = "."+erhn
1136	if not (req_host.endswith(domain) or erhn.endswith(domain)):
1137	#debug(" request domain %s does not match cookie domain %s",
1138	# req_host, domain)
1139	return False
1140
1141	if self.is_blocked(domain):
1142	debug(" domain %s is in user block-list", domain)
1143	return False
1144	if self.is_not_allowed(domain):
1145	debug(" domain %s is not in user allow-list", domain)
1146	return False
1147
1148	return True
1149
1150	def path_return_ok(self, path, request):
1151	debug("- checking cookie path=%s", path)
1152	req_path = request_path(request)
1153	if not req_path.startswith(path):
1154	debug(" %s does not path-match %s", req_path, path)
1155	return False
1156	return True
1157
1158
1159	def vals_sorted_by_key(adict):
1160	keys = adict.keys()
1161	keys.sort()
1162	return map(adict.get, keys)
1163
1164	def deepvalues(mapping):
1165	"""Iterates over nested mapping, depth-first, in sorted order by key."""
1166	values = vals_sorted_by_key(mapping)
1167	for obj in values:
1168	mapping = False
1169	try:
1170	obj.items
1171	except AttributeError:
1172	pass
1173	else:
1174	mapping = True
1175	for subobj in deepvalues(obj):
1176	yield subobj
1177	if not mapping:
1178	yield obj
1179
1180
1181	# Used as second parameter to dict.get() method, to distinguish absent
1182	# dict key from one with a None value.
1183	class Absent: pass
1184
1185	class CookieJar:
1186	"""Collection of HTTP cookies.
1187
1188	You may not need to know about this class: try
1189	urllib2.build_opener(HTTPCookieProcessor).open(url).
1190
1191	"""
1192
1193	non_word_re = re.compile(r"\W")
1194	quote_re = re.compile(r"([\"\\])")
1195	strict_domain_re = re.compile(r"\.?[^.]*")
1196	domain_re = re.compile(r"[^.]*")
1197	dots_re = re.compile(r"^\.+")
1198
1199	magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
1200
1201	def __init__(self, policy=None):
1202	if policy is None:
1203	policy = DefaultCookiePolicy()
1204	self._policy = policy
1205
1206	self._cookies_lock = _threading.RLock()
1207	self._cookies = {}
1208
1209	def set_policy(self, policy):
1210	self._policy = policy
1211
1212	def _cookies_for_domain(self, domain, request):
1213	cookies = []
1214	if not self._policy.domain_return_ok(domain, request):
1215	return []
1216	debug("Checking %s for cookies to return", domain)
1217	cookies_by_path = self._cookies[domain]
1218	for path in cookies_by_path.keys():
1219	if not self._policy.path_return_ok(path, request):
1220	continue
1221	cookies_by_name = cookies_by_path[path]
1222	for cookie in cookies_by_name.values():
1223	if not self._policy.return_ok(cookie, request):
1224	debug(" not returning cookie")
1225	continue
1226	debug(" it's a match")
1227	cookies.append(cookie)
1228	return cookies
1229
1230	def _cookies_for_request(self, request):
1231	"""Return a list of cookies to be returned to server."""
1232	cookies = []
1233	for domain in self._cookies.keys():
1234	cookies.extend(self._cookies_for_domain(domain, request))
1235	return cookies
1236
1237	def _cookie_attrs(self, cookies):
1238	"""Return a list of cookie-attributes to be returned to server.
1239
1240	like ['foo="bar"; $Path="/"', ...]
1241
1242	The $Version attribute is also added when appropriate (currently only
1243	once per request).
1244
1245	"""
1246	# add cookies in order of most specific (ie. longest) path first
1247	def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
1248	cookies.sort(decreasing_size)
1249
1250	version_set = False
1251
1252	attrs = []
1253	for cookie in cookies:
1254	# set version of Cookie header
1255	# XXX
1256	# What should it be if multiple matching Set-Cookie headers have
1257	# different versions themselves?
1258	# Answer: there is no answer; was supposed to be settled by
1259	# RFC 2965 errata, but that may never appear...
1260	version = cookie.version
1261	if not version_set:
1262	version_set = True
1263	if version > 0:
1264	attrs.append("$Version=%s" % version)
1265
1266	# quote cookie value if necessary
1267	# (not for Netscape protocol, which already has any quotes
1268	# intact, due to the poorly-specified Netscape Cookie: syntax)
1269	if ((cookie.value is not None) and
1270	self.non_word_re.search(cookie.value) and version > 0):
1271	value = self.quote_re.sub(r"\\\1", cookie.value)
1272	else:
1273	value = cookie.value
1274
1275	# add cookie-attributes to be returned in Cookie header
1276	if cookie.value is None:
1277	attrs.append(cookie.name)
1278	else:
1279	attrs.append("%s=%s" % (cookie.name, value))
1280	if version > 0:
1281	if cookie.path_specified:
1282	attrs.append('$Path="%s"' % cookie.path)
1283	if cookie.domain.startswith("."):
1284	domain = cookie.domain
1285	if (not cookie.domain_initial_dot and
1286	domain.startswith(".")):
1287	domain = domain[1:]
1288	attrs.append('$Domain="%s"' % domain)
1289	if cookie.port is not None:
1290	p = "$Port"
1291	if cookie.port_specified:
1292	p = p + ('="%s"' % cookie.port)
1293	attrs.append(p)
1294
1295	return attrs
1296
1297	def add_cookie_header(self, request):
1298	"""Add correct Cookie: header to request (urllib2.Request object).
1299
1300	The Cookie2 header is also added unless policy.hide_cookie2 is true.
1301
1302	"""
1303	debug("add_cookie_header")
1304	self._cookies_lock.acquire()
1305
1306	self._policy._now = self._now = int(time.time())
1307
1308	req_host, erhn = eff_request_host(request)
1309	strict_non_domain = (
1310	self._policy.strict_ns_domain & self._policy.DomainStrictNonDomain)
1311
1312	cookies = self._cookies_for_request(request)
1313
1314	attrs = self._cookie_attrs(cookies)
1315	if attrs:
1316	if not request.has_header("Cookie"):
1317	request.add_unredirected_header(
1318	"Cookie", "; ".join(attrs))
1319
1320	# if necessary, advertise that we know RFC 2965
1321	if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
1322	not request.has_header("Cookie2")):
1323	for cookie in cookies:
1324	if cookie.version != 1:
1325	request.add_unredirected_header("Cookie2", '$Version="1"')
1326	break
1327
1328	self._cookies_lock.release()
1329
1330	self.clear_expired_cookies()
1331
1332	def _normalized_cookie_tuples(self, attrs_set):
1333	"""Return list of tuples containing normalised cookie information.
1334
1335	attrs_set is the list of lists of key,value pairs extracted from
1336	the Set-Cookie or Set-Cookie2 headers.
1337
1338	Tuples are name, value, standard, rest, where name and value are the
1339	cookie name and value, standard is a dictionary containing the standard
1340	cookie-attributes (discard, secure, version, expires or max-age,
1341	domain, path and port) and rest is a dictionary containing the rest of
1342	the cookie-attributes.
1343
1344	"""
1345	cookie_tuples = []
1346
1347	boolean_attrs = "discard", "secure"
1348	value_attrs = ("version",
1349	"expires", "max-age",
1350	"domain", "path", "port",
1351	"comment", "commenturl")
1352
1353	for cookie_attrs in attrs_set:
1354	name, value = cookie_attrs[0]
1355
1356	# Build dictionary of standard cookie-attributes (standard) and
1357	# dictionary of other cookie-attributes (rest).
1358
1359	# Note: expiry time is normalised to seconds since epoch. V0
1360	# cookies should have the Expires cookie-attribute, and V1 cookies
1361	# should have Max-Age, but since V1 includes RFC 2109 cookies (and
1362	# since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1363	# accept either (but prefer Max-Age).
1364	max_age_set = False
1365
1366	bad_cookie = False
1367
1368	standard = {}
1369	rest = {}
1370	for k, v in cookie_attrs[1:]:
1371	lc = k.lower()
1372	# don't lose case distinction for unknown fields
1373	if lc in value_attrs or lc in boolean_attrs:
1374	k = lc
1375	if k in boolean_attrs and v is None:
1376	# boolean cookie-attribute is present, but has no value
1377	# (like "discard", rather than "port=80")
1378	v = True
1379	if k in standard:
1380	# only first value is significant
1381	continue
1382	if k == "domain":
1383	if v is None:
1384	debug(" missing value for domain attribute")
1385	bad_cookie = True
1386	break
1387	# RFC 2965 section 3.3.3
1388	v = v.lower()
1389	if k == "expires":
1390	if max_age_set:
1391	# Prefer max-age to expires (like Mozilla)
1392	continue
1393	if v is None:
1394	debug(" missing or invalid value for expires "
1395	"attribute: treating as session cookie")
1396	continue
1397	if k == "max-age":
1398	max_age_set = True
1399	try:
1400	v = int(v)
1401	except ValueError:
1402	debug(" missing or invalid (non-numeric) value for "
1403	"max-age attribute")
1404	bad_cookie = True
1405	break
1406	# convert RFC 2965 Max-Age to seconds since epoch
1407	# XXX Strictly you're supposed to follow RFC 2616
1408	# age-calculation rules. Remember that zero Max-Age is a
1409	# is a request to discard (old and new) cookie, though.
1410	k = "expires"
1411	v = self._now + v
1412	if (k in value_attrs) or (k in boolean_attrs):
1413	if (v is None and
1414	k not in ["port", "comment", "commenturl"]):
1415	debug(" missing value for %s attribute" % k)
1416	bad_cookie = True
1417	break
1418	standard[k] = v
1419	else:
1420	rest[k] = v
1421
1422	if bad_cookie:
1423	continue
1424
1425	cookie_tuples.append((name, value, standard, rest))
1426
1427	return cookie_tuples
1428
1429	def _cookie_from_cookie_tuple(self, tup, request):
1430	# standard is dict of standard cookie-attributes, rest is dict of the
1431	# rest of them
1432	name, value, standard, rest = tup
1433
1434	domain = standard.get("domain", Absent)
1435	path = standard.get("path", Absent)
1436	port = standard.get("port", Absent)
1437	expires = standard.get("expires", Absent)
1438
1439	# set the easy defaults
1440	version = standard.get("version", None)
1441	if version is not None: version = int(version)
1442	secure = standard.get("secure", False)
1443	# (discard is also set if expires is Absent)
1444	discard = standard.get("discard", False)
1445	comment = standard.get("comment", None)
1446	comment_url = standard.get("commenturl", None)
1447
1448	# set default path
1449	if path is not Absent and path != "":
1450	path_specified = True
1451	path = escape_path(path)
1452	else:
1453	path_specified = False
1454	path = request_path(request)
1455	i = path.rfind("/")
1456	if i != -1:
1457	if version == 0:
1458	# Netscape spec parts company from reality here
1459	path = path[:i]
1460	else:
1461	path = path[:i+1]
1462	if len(path) == 0: path = "/"
1463
1464	# set default domain
1465	domain_specified = domain is not Absent
1466	# but first we have to remember whether it starts with a dot
1467	domain_initial_dot = False
1468	if domain_specified:
1469	domain_initial_dot = bool(domain.startswith("."))
1470	if domain is Absent:
1471	req_host, erhn = eff_request_host(request)
1472	domain = erhn
1473	elif not domain.startswith("."):
1474	domain = "."+domain
1475
1476	# set default port
1477	port_specified = False
1478	if port is not Absent:
1479	if port is None:
1480	# Port attr present, but has no value: default to request port.
1481	# Cookie should then only be sent back on that port.
1482	port = request_port(request)
1483	else:
1484	port_specified = True
1485	port = re.sub(r"\s+", "", port)
1486	else:
1487	# No port attr present. Cookie can be sent back on any port.
1488	port = None
1489
1490	# set default expires and discard
1491	if expires is Absent:
1492	expires = None
1493	discard = True
1494	elif expires <= self._now:
1495	# Expiry date in past is request to delete cookie. This can't be
1496	# in DefaultCookiePolicy, because can't delete cookies there.
1497	try:
1498	self.clear(domain, path, name)
1499	except KeyError:
1500	pass
1501	debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1502	domain, path, name)
1503	return None
1504
1505	return Cookie(version,
1506	name, value,
1507	port, port_specified,
1508	domain, domain_specified, domain_initial_dot,
1509	path, path_specified,
1510	secure,
1511	expires,
1512	discard,
1513	comment,
1514	comment_url,
1515	rest)
1516
1517	def _cookies_from_attrs_set(self, attrs_set, request):
1518	cookie_tuples = self._normalized_cookie_tuples(attrs_set)
1519
1520	cookies = []
1521	for tup in cookie_tuples:
1522	cookie = self._cookie_from_cookie_tuple(tup, request)
1523	if cookie: cookies.append(cookie)
1524	return cookies
1525
1526	def make_cookies(self, response, request):
1527	"""Return sequence of Cookie objects extracted from response object."""
1528	# get cookie-attributes for RFC 2965 and Netscape protocols
1529	headers = response.info()
1530	rfc2965_hdrs = headers.getheaders("Set-Cookie2")
1531	ns_hdrs = headers.getheaders("Set-Cookie")
1532
1533	rfc2965 = self._policy.rfc2965
1534	netscape = self._policy.netscape
1535
1536	if ((not rfc2965_hdrs and not ns_hdrs) or
1537	(not ns_hdrs and not rfc2965) or
1538	(not rfc2965_hdrs and not netscape) or
1539	(not netscape and not rfc2965)):
1540	return [] # no relevant cookie headers: quick exit
1541
1542	try:
1543	cookies = self._cookies_from_attrs_set(
1544	split_header_words(rfc2965_hdrs), request)
1545	except:
1546	reraise_unmasked_exceptions()
1547	cookies = []
1548
1549	if ns_hdrs and netscape:
1550	try:
1551	ns_cookies = self._cookies_from_attrs_set(
1552	parse_ns_headers(ns_hdrs), request)
1553	except:
1554	reraise_unmasked_exceptions()
1555	ns_cookies = []
1556
1557	# Look for Netscape cookies (from Set-Cookie headers) that match
1558	# corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1559	# For each match, keep the RFC 2965 cookie and ignore the Netscape
1560	# cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1561	# bundled in with the Netscape cookies for this purpose, which is
1562	# reasonable behaviour.
1563	if rfc2965:
1564	lookup = {}
1565	for cookie in cookies:
1566	lookup[(cookie.domain, cookie.path, cookie.name)] = None
1567
1568	def no_matching_rfc2965(ns_cookie, lookup=lookup):
1569	key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
1570	return key not in lookup
1571	ns_cookies = filter(no_matching_rfc2965, ns_cookies)
1572
1573	if ns_cookies:
1574	cookies.extend(ns_cookies)
1575
1576	return cookies
1577
1578	def set_cookie_if_ok(self, cookie, request):
1579	"""Set a cookie if policy says it's OK to do so."""
1580	self._cookies_lock.acquire()
1581	self._policy._now = self._now = int(time.time())
1582
1583	if self._policy.set_ok(cookie, request):
1584	self.set_cookie(cookie)
1585
1586	self._cookies_lock.release()
1587
1588	def set_cookie(self, cookie):
1589	"""Set a cookie, without checking whether or not it should be set."""
1590	c = self._cookies
1591	self._cookies_lock.acquire()
1592	try:
1593	if cookie.domain not in c: c[cookie.domain] = {}
1594	c2 = c[cookie.domain]
1595	if cookie.path not in c2: c2[cookie.path] = {}
1596	c3 = c2[cookie.path]
1597	c3[cookie.name] = cookie
1598	finally:
1599	self._cookies_lock.release()
1600
1601	def extract_cookies(self, response, request):
1602	"""Extract cookies from response, where allowable given the request."""
1603	debug("extract_cookies: %s", response.info())
1604	self._cookies_lock.acquire()
1605	self._policy._now = self._now = int(time.time())
1606
1607	for cookie in self.make_cookies(response, request):
1608	if self._policy.set_ok(cookie, request):
1609	debug(" setting cookie: %s", cookie)
1610	self.set_cookie(cookie)
1611	self._cookies_lock.release()
1612
1613	def clear(self, domain=None, path=None, name=None):
1614	"""Clear some cookies.
1615
1616	Invoking this method without arguments will clear all cookies. If
1617	given a single argument, only cookies belonging to that domain will be
1618	removed. If given two arguments, cookies belonging to the specified
1619	path within that domain are removed. If given three arguments, then
1620	the cookie with the specified name, path and domain is removed.
1621
1622	Raises KeyError if no matching cookie exists.
1623
1624	"""
1625	if name is not None:
1626	if (domain is None) or (path is None):
1627	raise ValueError(
1628	"domain and path must be given to remove a cookie by name")
1629	del self._cookies[domain][path][name]
1630	elif path is not None:
1631	if domain is None:
1632	raise ValueError(
1633	"domain must be given to remove cookies by path")
1634	del self._cookies[domain][path]
1635	elif domain is not None:
1636	del self._cookies[domain]
1637	else:
1638	self._cookies = {}
1639
1640	def clear_session_cookies(self):
1641	"""Discard all session cookies.
1642
1643	Note that the .save() method won't save session cookies anyway, unless
1644	you ask otherwise by passing a true ignore_discard argument.
1645
1646	"""
1647	self._cookies_lock.acquire()
1648	for cookie in self:
1649	if cookie.discard:
1650	self.clear(cookie.domain, cookie.path, cookie.name)
1651	self._cookies_lock.release()
1652
1653	def clear_expired_cookies(self):
1654	"""Discard all expired cookies.
1655
1656	You probably don't need to call this method: expired cookies are never
1657	sent back to the server (provided you're using DefaultCookiePolicy),
1658	this method is called by CookieJar itself every so often, and the
1659	.save() method won't save expired cookies anyway (unless you ask
1660	otherwise by passing a true ignore_expires argument).
1661
1662	"""
1663	self._cookies_lock.acquire()
1664	now = time.time()
1665	for cookie in self:
1666	if cookie.is_expired(now):
1667	self.clear(cookie.domain, cookie.path, cookie.name)
1668	self._cookies_lock.release()
1669
1670	def __iter__(self):
1671	return deepvalues(self._cookies)
1672
1673	def __len__(self):
1674	"""Return number of contained cookies."""
1675	i = 0
1676	for cookie in self: i = i + 1
1677	return i
1678
1679	def __repr__(self):
1680	r = []
1681	for cookie in self: r.append(repr(cookie))
1682	return "<%s[%s]>" % (self.__class__, ", ".join(r))
1683
1684	def __str__(self):
1685	r = []
1686	for cookie in self: r.append(str(cookie))
1687	return "<%s[%s]>" % (self.__class__, ", ".join(r))
1688
1689
1690	class LoadError(Exception): pass
1691
1692	class FileCookieJar(CookieJar):
1693	"""CookieJar that can be loaded from and saved to a file."""
1694
1695	def __init__(self, filename=None, delayload=False, policy=None):
1696	"""
1697	Cookies are NOT loaded from the named file until either the .load() or
1698	.revert() method is called.
1699
1700	"""
1701	CookieJar.__init__(self, policy)
1702	if filename is not None:
1703	try:
1704	filename+""
1705	except:
1706	raise ValueError("filename must be string-like")
1707	self.filename = filename
1708	self.delayload = bool(delayload)
1709
1710	def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1711	"""Save cookies to a file."""
1712	raise NotImplementedError()
1713
1714	def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1715	"""Load cookies from a file."""
1716	if filename is None:
1717	if self.filename is not None: filename = self.filename
1718	else: raise ValueError(MISSING_FILENAME_TEXT)
1719
1720	f = open(filename)
1721	try:
1722	self._really_load(f, filename, ignore_discard, ignore_expires)
1723	finally:
1724	f.close()
1725
1726	def revert(self, filename=None,
1727	ignore_discard=False, ignore_expires=False):
1728	"""Clear all cookies and reload cookies from a saved file.
1729
1730	Raises LoadError (or IOError) if reversion is not successful; the
1731	object's state will not be altered if this happens.
1732
1733	"""
1734	if filename is None:
1735	if self.filename is not None: filename = self.filename
1736	else: raise ValueError(MISSING_FILENAME_TEXT)
1737
1738	self._cookies_lock.acquire()
1739
1740	old_state = copy.deepcopy(self._cookies)
1741	self._cookies = {}
1742	try:
1743	self.load(filename, ignore_discard, ignore_expires)
1744	except (LoadError, IOError):
1745	self._cookies = old_state
1746	raise
1747
1748	self._cookies_lock.release()
1749
1750	from _LWPCookieJar import LWPCookieJar, lwp_cookie_str
1751	from _MozillaCookieJar import MozillaCookieJar