Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | """Classes to represent arbitrary sets (including sets of sets). |
2 | ||
3 | This module implements sets using dictionaries whose values are | |
4 | ignored. The usual operations (union, intersection, deletion, etc.) | |
5 | are provided as both methods and operators. | |
6 | ||
7 | Important: sets are not sequences! While they support 'x in s', | |
8 | 'len(s)', and 'for x in s', none of those operations are unique for | |
9 | sequences; for example, mappings support all three as well. The | |
10 | characteristic operation for sequences is subscripting with small | |
11 | integers: s[i], for i in range(len(s)). Sets don't support | |
12 | subscripting at all. Also, sequences allow multiple occurrences and | |
13 | their elements have a definite order; sets on the other hand don't | |
14 | record multiple occurrences and don't remember the order of element | |
15 | insertion (which is why they don't support s[i]). | |
16 | ||
17 | The following classes are provided: | |
18 | ||
19 | BaseSet -- All the operations common to both mutable and immutable | |
20 | sets. This is an abstract class, not meant to be directly | |
21 | instantiated. | |
22 | ||
23 | Set -- Mutable sets, subclass of BaseSet; not hashable. | |
24 | ||
25 | ImmutableSet -- Immutable sets, subclass of BaseSet; hashable. | |
26 | An iterable argument is mandatory to create an ImmutableSet. | |
27 | ||
28 | _TemporarilyImmutableSet -- A wrapper around a Set, hashable, | |
29 | giving the same hash value as the immutable set equivalent | |
30 | would have. Do not use this class directly. | |
31 | ||
32 | Only hashable objects can be added to a Set. In particular, you cannot | |
33 | really add a Set as an element to another Set; if you try, what is | |
34 | actually added is an ImmutableSet built from it (it compares equal to | |
35 | the one you tried adding). | |
36 | ||
37 | When you ask if `x in y' where x is a Set and y is a Set or | |
38 | ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and | |
39 | what's tested is actually `z in y'. | |
40 | ||
41 | """ | |
42 | ||
43 | # Code history: | |
44 | # | |
45 | # - Greg V. Wilson wrote the first version, using a different approach | |
46 | # to the mutable/immutable problem, and inheriting from dict. | |
47 | # | |
48 | # - Alex Martelli modified Greg's version to implement the current | |
49 | # Set/ImmutableSet approach, and make the data an attribute. | |
50 | # | |
51 | # - Guido van Rossum rewrote much of the code, made some API changes, | |
52 | # and cleaned up the docstrings. | |
53 | # | |
54 | # - Raymond Hettinger added a number of speedups and other | |
55 | # improvements. | |
56 | ||
57 | from __future__ import generators | |
58 | try: | |
59 | from itertools import ifilter, ifilterfalse | |
60 | except ImportError: | |
61 | # Code to make the module run under Py2.2 | |
62 | def ifilter(predicate, iterable): | |
63 | if predicate is None: | |
64 | def predicate(x): | |
65 | return x | |
66 | for x in iterable: | |
67 | if predicate(x): | |
68 | yield x | |
69 | def ifilterfalse(predicate, iterable): | |
70 | if predicate is None: | |
71 | def predicate(x): | |
72 | return x | |
73 | for x in iterable: | |
74 | if not predicate(x): | |
75 | yield x | |
76 | try: | |
77 | True, False | |
78 | except NameError: | |
79 | True, False = (0==0, 0!=0) | |
80 | ||
81 | __all__ = ['BaseSet', 'Set', 'ImmutableSet'] | |
82 | ||
83 | class BaseSet(object): | |
84 | """Common base class for mutable and immutable sets.""" | |
85 | ||
86 | __slots__ = ['_data'] | |
87 | ||
88 | # Constructor | |
89 | ||
90 | def __init__(self): | |
91 | """This is an abstract class.""" | |
92 | # Don't call this from a concrete subclass! | |
93 | if self.__class__ is BaseSet: | |
94 | raise TypeError, ("BaseSet is an abstract class. " | |
95 | "Use Set or ImmutableSet.") | |
96 | ||
97 | # Standard protocols: __len__, __repr__, __str__, __iter__ | |
98 | ||
99 | def __len__(self): | |
100 | """Return the number of elements of a set.""" | |
101 | return len(self._data) | |
102 | ||
103 | def __repr__(self): | |
104 | """Return string representation of a set. | |
105 | ||
106 | This looks like 'Set([<list of elements>])'. | |
107 | """ | |
108 | return self._repr() | |
109 | ||
110 | # __str__ is the same as __repr__ | |
111 | __str__ = __repr__ | |
112 | ||
113 | def _repr(self, sorted=False): | |
114 | elements = self._data.keys() | |
115 | if sorted: | |
116 | elements.sort() | |
117 | return '%s(%r)' % (self.__class__.__name__, elements) | |
118 | ||
119 | def __iter__(self): | |
120 | """Return an iterator over the elements or a set. | |
121 | ||
122 | This is the keys iterator for the underlying dict. | |
123 | """ | |
124 | return self._data.iterkeys() | |
125 | ||
126 | # Three-way comparison is not supported. However, because __eq__ is | |
127 | # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and | |
128 | # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this | |
129 | # case). | |
130 | ||
131 | def __cmp__(self, other): | |
132 | raise TypeError, "can't compare sets using cmp()" | |
133 | ||
134 | # Equality comparisons using the underlying dicts. Mixed-type comparisons | |
135 | # are allowed here, where Set == z for non-Set z always returns False, | |
136 | # and Set != z always True. This allows expressions like "x in y" to | |
137 | # give the expected result when y is a sequence of mixed types, not | |
138 | # raising a pointless TypeError just because y contains a Set, or x is | |
139 | # a Set and y contain's a non-set ("in" invokes only __eq__). | |
140 | # Subtle: it would be nicer if __eq__ and __ne__ could return | |
141 | # NotImplemented instead of True or False. Then the other comparand | |
142 | # would get a chance to determine the result, and if the other comparand | |
143 | # also returned NotImplemented then it would fall back to object address | |
144 | # comparison (which would always return False for __eq__ and always | |
145 | # True for __ne__). However, that doesn't work, because this type | |
146 | # *also* implements __cmp__: if, e.g., __eq__ returns NotImplemented, | |
147 | # Python tries __cmp__ next, and the __cmp__ here then raises TypeError. | |
148 | ||
149 | def __eq__(self, other): | |
150 | if isinstance(other, BaseSet): | |
151 | return self._data == other._data | |
152 | else: | |
153 | return False | |
154 | ||
155 | def __ne__(self, other): | |
156 | if isinstance(other, BaseSet): | |
157 | return self._data != other._data | |
158 | else: | |
159 | return True | |
160 | ||
161 | # Copying operations | |
162 | ||
163 | def copy(self): | |
164 | """Return a shallow copy of a set.""" | |
165 | result = self.__class__() | |
166 | result._data.update(self._data) | |
167 | return result | |
168 | ||
169 | __copy__ = copy # For the copy module | |
170 | ||
171 | def __deepcopy__(self, memo): | |
172 | """Return a deep copy of a set; used by copy module.""" | |
173 | # This pre-creates the result and inserts it in the memo | |
174 | # early, in case the deep copy recurses into another reference | |
175 | # to this same set. A set can't be an element of itself, but | |
176 | # it can certainly contain an object that has a reference to | |
177 | # itself. | |
178 | from copy import deepcopy | |
179 | result = self.__class__() | |
180 | memo[id(self)] = result | |
181 | data = result._data | |
182 | value = True | |
183 | for elt in self: | |
184 | data[deepcopy(elt, memo)] = value | |
185 | return result | |
186 | ||
187 | # Standard set operations: union, intersection, both differences. | |
188 | # Each has an operator version (e.g. __or__, invoked with |) and a | |
189 | # method version (e.g. union). | |
190 | # Subtle: Each pair requires distinct code so that the outcome is | |
191 | # correct when the type of other isn't suitable. For example, if | |
192 | # we did "union = __or__" instead, then Set().union(3) would return | |
193 | # NotImplemented instead of raising TypeError (albeit that *why* it | |
194 | # raises TypeError as-is is also a bit subtle). | |
195 | ||
196 | def __or__(self, other): | |
197 | """Return the union of two sets as a new set. | |
198 | ||
199 | (I.e. all elements that are in either set.) | |
200 | """ | |
201 | if not isinstance(other, BaseSet): | |
202 | return NotImplemented | |
203 | return self.union(other) | |
204 | ||
205 | def union(self, other): | |
206 | """Return the union of two sets as a new set. | |
207 | ||
208 | (I.e. all elements that are in either set.) | |
209 | """ | |
210 | result = self.__class__(self) | |
211 | result._update(other) | |
212 | return result | |
213 | ||
214 | def __and__(self, other): | |
215 | """Return the intersection of two sets as a new set. | |
216 | ||
217 | (I.e. all elements that are in both sets.) | |
218 | """ | |
219 | if not isinstance(other, BaseSet): | |
220 | return NotImplemented | |
221 | return self.intersection(other) | |
222 | ||
223 | def intersection(self, other): | |
224 | """Return the intersection of two sets as a new set. | |
225 | ||
226 | (I.e. all elements that are in both sets.) | |
227 | """ | |
228 | if not isinstance(other, BaseSet): | |
229 | other = Set(other) | |
230 | if len(self) <= len(other): | |
231 | little, big = self, other | |
232 | else: | |
233 | little, big = other, self | |
234 | common = ifilter(big._data.has_key, little) | |
235 | return self.__class__(common) | |
236 | ||
237 | def __xor__(self, other): | |
238 | """Return the symmetric difference of two sets as a new set. | |
239 | ||
240 | (I.e. all elements that are in exactly one of the sets.) | |
241 | """ | |
242 | if not isinstance(other, BaseSet): | |
243 | return NotImplemented | |
244 | return self.symmetric_difference(other) | |
245 | ||
246 | def symmetric_difference(self, other): | |
247 | """Return the symmetric difference of two sets as a new set. | |
248 | ||
249 | (I.e. all elements that are in exactly one of the sets.) | |
250 | """ | |
251 | result = self.__class__() | |
252 | data = result._data | |
253 | value = True | |
254 | selfdata = self._data | |
255 | try: | |
256 | otherdata = other._data | |
257 | except AttributeError: | |
258 | otherdata = Set(other)._data | |
259 | for elt in ifilterfalse(otherdata.has_key, selfdata): | |
260 | data[elt] = value | |
261 | for elt in ifilterfalse(selfdata.has_key, otherdata): | |
262 | data[elt] = value | |
263 | return result | |
264 | ||
265 | def __sub__(self, other): | |
266 | """Return the difference of two sets as a new Set. | |
267 | ||
268 | (I.e. all elements that are in this set and not in the other.) | |
269 | """ | |
270 | if not isinstance(other, BaseSet): | |
271 | return NotImplemented | |
272 | return self.difference(other) | |
273 | ||
274 | def difference(self, other): | |
275 | """Return the difference of two sets as a new Set. | |
276 | ||
277 | (I.e. all elements that are in this set and not in the other.) | |
278 | """ | |
279 | result = self.__class__() | |
280 | data = result._data | |
281 | try: | |
282 | otherdata = other._data | |
283 | except AttributeError: | |
284 | otherdata = Set(other)._data | |
285 | value = True | |
286 | for elt in ifilterfalse(otherdata.has_key, self): | |
287 | data[elt] = value | |
288 | return result | |
289 | ||
290 | # Membership test | |
291 | ||
292 | def __contains__(self, element): | |
293 | """Report whether an element is a member of a set. | |
294 | ||
295 | (Called in response to the expression `element in self'.) | |
296 | """ | |
297 | try: | |
298 | return element in self._data | |
299 | except TypeError: | |
300 | transform = getattr(element, "__as_temporarily_immutable__", None) | |
301 | if transform is None: | |
302 | raise # re-raise the TypeError exception we caught | |
303 | return transform() in self._data | |
304 | ||
305 | # Subset and superset test | |
306 | ||
307 | def issubset(self, other): | |
308 | """Report whether another set contains this set.""" | |
309 | self._binary_sanity_check(other) | |
310 | if len(self) > len(other): # Fast check for obvious cases | |
311 | return False | |
312 | for elt in ifilterfalse(other._data.has_key, self): | |
313 | return False | |
314 | return True | |
315 | ||
316 | def issuperset(self, other): | |
317 | """Report whether this set contains another set.""" | |
318 | self._binary_sanity_check(other) | |
319 | if len(self) < len(other): # Fast check for obvious cases | |
320 | return False | |
321 | for elt in ifilterfalse(self._data.has_key, other): | |
322 | return False | |
323 | return True | |
324 | ||
325 | # Inequality comparisons using the is-subset relation. | |
326 | __le__ = issubset | |
327 | __ge__ = issuperset | |
328 | ||
329 | def __lt__(self, other): | |
330 | self._binary_sanity_check(other) | |
331 | return len(self) < len(other) and self.issubset(other) | |
332 | ||
333 | def __gt__(self, other): | |
334 | self._binary_sanity_check(other) | |
335 | return len(self) > len(other) and self.issuperset(other) | |
336 | ||
337 | # Assorted helpers | |
338 | ||
339 | def _binary_sanity_check(self, other): | |
340 | # Check that the other argument to a binary operation is also | |
341 | # a set, raising a TypeError otherwise. | |
342 | if not isinstance(other, BaseSet): | |
343 | raise TypeError, "Binary operation only permitted between sets" | |
344 | ||
345 | def _compute_hash(self): | |
346 | # Calculate hash code for a set by xor'ing the hash codes of | |
347 | # the elements. This ensures that the hash code does not depend | |
348 | # on the order in which elements are added to the set. This is | |
349 | # not called __hash__ because a BaseSet should not be hashable; | |
350 | # only an ImmutableSet is hashable. | |
351 | result = 0 | |
352 | for elt in self: | |
353 | result ^= hash(elt) | |
354 | return result | |
355 | ||
356 | def _update(self, iterable): | |
357 | # The main loop for update() and the subclass __init__() methods. | |
358 | data = self._data | |
359 | ||
360 | # Use the fast update() method when a dictionary is available. | |
361 | if isinstance(iterable, BaseSet): | |
362 | data.update(iterable._data) | |
363 | return | |
364 | ||
365 | value = True | |
366 | ||
367 | if type(iterable) in (list, tuple, xrange): | |
368 | # Optimized: we know that __iter__() and next() can't | |
369 | # raise TypeError, so we can move 'try:' out of the loop. | |
370 | it = iter(iterable) | |
371 | while True: | |
372 | try: | |
373 | for element in it: | |
374 | data[element] = value | |
375 | return | |
376 | except TypeError: | |
377 | transform = getattr(element, "__as_immutable__", None) | |
378 | if transform is None: | |
379 | raise # re-raise the TypeError exception we caught | |
380 | data[transform()] = value | |
381 | else: | |
382 | # Safe: only catch TypeError where intended | |
383 | for element in iterable: | |
384 | try: | |
385 | data[element] = value | |
386 | except TypeError: | |
387 | transform = getattr(element, "__as_immutable__", None) | |
388 | if transform is None: | |
389 | raise # re-raise the TypeError exception we caught | |
390 | data[transform()] = value | |
391 | ||
392 | ||
393 | class ImmutableSet(BaseSet): | |
394 | """Immutable set class.""" | |
395 | ||
396 | __slots__ = ['_hashcode'] | |
397 | ||
398 | # BaseSet + hashing | |
399 | ||
400 | def __init__(self, iterable=None): | |
401 | """Construct an immutable set from an optional iterable.""" | |
402 | self._hashcode = None | |
403 | self._data = {} | |
404 | if iterable is not None: | |
405 | self._update(iterable) | |
406 | ||
407 | def __hash__(self): | |
408 | if self._hashcode is None: | |
409 | self._hashcode = self._compute_hash() | |
410 | return self._hashcode | |
411 | ||
412 | def __getstate__(self): | |
413 | return self._data, self._hashcode | |
414 | ||
415 | def __setstate__(self, state): | |
416 | self._data, self._hashcode = state | |
417 | ||
418 | class Set(BaseSet): | |
419 | """ Mutable set class.""" | |
420 | ||
421 | __slots__ = [] | |
422 | ||
423 | # BaseSet + operations requiring mutability; no hashing | |
424 | ||
425 | def __init__(self, iterable=None): | |
426 | """Construct a set from an optional iterable.""" | |
427 | self._data = {} | |
428 | if iterable is not None: | |
429 | self._update(iterable) | |
430 | ||
431 | def __getstate__(self): | |
432 | # getstate's results are ignored if it is not | |
433 | return self._data, | |
434 | ||
435 | def __setstate__(self, data): | |
436 | self._data, = data | |
437 | ||
438 | def __hash__(self): | |
439 | """A Set cannot be hashed.""" | |
440 | # We inherit object.__hash__, so we must deny this explicitly | |
441 | raise TypeError, "Can't hash a Set, only an ImmutableSet." | |
442 | ||
443 | # In-place union, intersection, differences. | |
444 | # Subtle: The xyz_update() functions deliberately return None, | |
445 | # as do all mutating operations on built-in container types. | |
446 | # The __xyz__ spellings have to return self, though. | |
447 | ||
448 | def __ior__(self, other): | |
449 | """Update a set with the union of itself and another.""" | |
450 | self._binary_sanity_check(other) | |
451 | self._data.update(other._data) | |
452 | return self | |
453 | ||
454 | def union_update(self, other): | |
455 | """Update a set with the union of itself and another.""" | |
456 | self._update(other) | |
457 | ||
458 | def __iand__(self, other): | |
459 | """Update a set with the intersection of itself and another.""" | |
460 | self._binary_sanity_check(other) | |
461 | self._data = (self & other)._data | |
462 | return self | |
463 | ||
464 | def intersection_update(self, other): | |
465 | """Update a set with the intersection of itself and another.""" | |
466 | if isinstance(other, BaseSet): | |
467 | self &= other | |
468 | else: | |
469 | self._data = (self.intersection(other))._data | |
470 | ||
471 | def __ixor__(self, other): | |
472 | """Update a set with the symmetric difference of itself and another.""" | |
473 | self._binary_sanity_check(other) | |
474 | self.symmetric_difference_update(other) | |
475 | return self | |
476 | ||
477 | def symmetric_difference_update(self, other): | |
478 | """Update a set with the symmetric difference of itself and another.""" | |
479 | data = self._data | |
480 | value = True | |
481 | if not isinstance(other, BaseSet): | |
482 | other = Set(other) | |
483 | if self is other: | |
484 | self.clear() | |
485 | for elt in other: | |
486 | if elt in data: | |
487 | del data[elt] | |
488 | else: | |
489 | data[elt] = value | |
490 | ||
491 | def __isub__(self, other): | |
492 | """Remove all elements of another set from this set.""" | |
493 | self._binary_sanity_check(other) | |
494 | self.difference_update(other) | |
495 | return self | |
496 | ||
497 | def difference_update(self, other): | |
498 | """Remove all elements of another set from this set.""" | |
499 | data = self._data | |
500 | if not isinstance(other, BaseSet): | |
501 | other = Set(other) | |
502 | if self is other: | |
503 | self.clear() | |
504 | for elt in ifilter(data.has_key, other): | |
505 | del data[elt] | |
506 | ||
507 | # Python dict-like mass mutations: update, clear | |
508 | ||
509 | def update(self, iterable): | |
510 | """Add all values from an iterable (such as a list or file).""" | |
511 | self._update(iterable) | |
512 | ||
513 | def clear(self): | |
514 | """Remove all elements from this set.""" | |
515 | self._data.clear() | |
516 | ||
517 | # Single-element mutations: add, remove, discard | |
518 | ||
519 | def add(self, element): | |
520 | """Add an element to a set. | |
521 | ||
522 | This has no effect if the element is already present. | |
523 | """ | |
524 | try: | |
525 | self._data[element] = True | |
526 | except TypeError: | |
527 | transform = getattr(element, "__as_immutable__", None) | |
528 | if transform is None: | |
529 | raise # re-raise the TypeError exception we caught | |
530 | self._data[transform()] = True | |
531 | ||
532 | def remove(self, element): | |
533 | """Remove an element from a set; it must be a member. | |
534 | ||
535 | If the element is not a member, raise a KeyError. | |
536 | """ | |
537 | try: | |
538 | del self._data[element] | |
539 | except TypeError: | |
540 | transform = getattr(element, "__as_temporarily_immutable__", None) | |
541 | if transform is None: | |
542 | raise # re-raise the TypeError exception we caught | |
543 | del self._data[transform()] | |
544 | ||
545 | def discard(self, element): | |
546 | """Remove an element from a set if it is a member. | |
547 | ||
548 | If the element is not a member, do nothing. | |
549 | """ | |
550 | try: | |
551 | self.remove(element) | |
552 | except KeyError: | |
553 | pass | |
554 | ||
555 | def pop(self): | |
556 | """Remove and return an arbitrary set element.""" | |
557 | return self._data.popitem()[0] | |
558 | ||
559 | def __as_immutable__(self): | |
560 | # Return a copy of self as an immutable set | |
561 | return ImmutableSet(self) | |
562 | ||
563 | def __as_temporarily_immutable__(self): | |
564 | # Return self wrapped in a temporarily immutable set | |
565 | return _TemporarilyImmutableSet(self) | |
566 | ||
567 | ||
568 | class _TemporarilyImmutableSet(BaseSet): | |
569 | # Wrap a mutable set as if it was temporarily immutable. | |
570 | # This only supplies hashing and equality comparisons. | |
571 | ||
572 | def __init__(self, set): | |
573 | self._set = set | |
574 | self._data = set._data # Needed by ImmutableSet.__eq__() | |
575 | ||
576 | def __hash__(self): | |
577 | return self._set._compute_hash() |