Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | """Manage shelves of pickled objects. |
2 | ||
3 | A "shelf" is a persistent, dictionary-like object. The difference | |
4 | with dbm databases is that the values (not the keys!) in a shelf can | |
5 | be essentially arbitrary Python objects -- anything that the "pickle" | |
6 | module can handle. This includes most class instances, recursive data | |
7 | types, and objects containing lots of shared sub-objects. The keys | |
8 | are ordinary strings. | |
9 | ||
10 | To summarize the interface (key is a string, data is an arbitrary | |
11 | object): | |
12 | ||
13 | import shelve | |
14 | d = shelve.open(filename) # open, with (g)dbm filename -- no suffix | |
15 | ||
16 | d[key] = data # store data at key (overwrites old data if | |
17 | # using an existing key) | |
18 | data = d[key] # retrieve a COPY of the data at key (raise | |
19 | # KeyError if no such key) -- NOTE that this | |
20 | # access returns a *copy* of the entry! | |
21 | del d[key] # delete data stored at key (raises KeyError | |
22 | # if no such key) | |
23 | flag = d.has_key(key) # true if the key exists; same as "key in d" | |
24 | list = d.keys() # a list of all existing keys (slow!) | |
25 | ||
26 | d.close() # close it | |
27 | ||
28 | Dependent on the implementation, closing a persistent dictionary may | |
29 | or may not be necessary to flush changes to disk. | |
30 | ||
31 | Normally, d[key] returns a COPY of the entry. This needs care when | |
32 | mutable entries are mutated: for example, if d[key] is a list, | |
33 | d[key].append(anitem) | |
34 | does NOT modify the entry d[key] itself, as stored in the persistent | |
35 | mapping -- it only modifies the copy, which is then immediately | |
36 | discarded, so that the append has NO effect whatsoever. To append an | |
37 | item to d[key] in a way that will affect the persistent mapping, use: | |
38 | data = d[key] | |
39 | data.append(anitem) | |
40 | d[key] = data | |
41 | ||
42 | To avoid the problem with mutable entries, you may pass the keyword | |
43 | argument writeback=True in the call to shelve.open. When you use: | |
44 | d = shelve.open(filename, writeback=True) | |
45 | then d keeps a cache of all entries you access, and writes them all back | |
46 | to the persistent mapping when you call d.close(). This ensures that | |
47 | such usage as d[key].append(anitem) works as intended. | |
48 | ||
49 | However, using keyword argument writeback=True may consume vast amount | |
50 | of memory for the cache, and it may make d.close() very slow, if you | |
51 | access many of d's entries after opening it in this way: d has no way to | |
52 | check which of the entries you access are mutable and/or which ones you | |
53 | actually mutate, so it must cache, and write back at close, all of the | |
54 | entries that you access. You can call d.sync() to write back all the | |
55 | entries in the cache, and empty the cache (d.sync() also synchronizes | |
56 | the persistent dictionary on disk, if feasible). | |
57 | """ | |
58 | ||
59 | # Try using cPickle and cStringIO if available. | |
60 | ||
61 | try: | |
62 | from cPickle import Pickler, Unpickler | |
63 | except ImportError: | |
64 | from pickle import Pickler, Unpickler | |
65 | ||
66 | try: | |
67 | from cStringIO import StringIO | |
68 | except ImportError: | |
69 | from StringIO import StringIO | |
70 | ||
71 | import UserDict | |
72 | import warnings | |
73 | ||
74 | __all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"] | |
75 | ||
76 | class Shelf(UserDict.DictMixin): | |
77 | """Base class for shelf implementations. | |
78 | ||
79 | This is initialized with a dictionary-like object. | |
80 | See the module's __doc__ string for an overview of the interface. | |
81 | """ | |
82 | ||
83 | def __init__(self, dict, protocol=None, writeback=False, binary=None): | |
84 | self.dict = dict | |
85 | if protocol is not None and binary is not None: | |
86 | raise ValueError, "can't specify both 'protocol' and 'binary'" | |
87 | if binary is not None: | |
88 | warnings.warn("The 'binary' argument to Shelf() is deprecated", | |
89 | PendingDeprecationWarning) | |
90 | protocol = int(binary) | |
91 | if protocol is None: | |
92 | protocol = 0 | |
93 | self._protocol = protocol | |
94 | self.writeback = writeback | |
95 | self.cache = {} | |
96 | ||
97 | def keys(self): | |
98 | return self.dict.keys() | |
99 | ||
100 | def __len__(self): | |
101 | return len(self.dict) | |
102 | ||
103 | def has_key(self, key): | |
104 | return self.dict.has_key(key) | |
105 | ||
106 | def __contains__(self, key): | |
107 | return self.dict.has_key(key) | |
108 | ||
109 | def get(self, key, default=None): | |
110 | if self.dict.has_key(key): | |
111 | return self[key] | |
112 | return default | |
113 | ||
114 | def __getitem__(self, key): | |
115 | try: | |
116 | value = self.cache[key] | |
117 | except KeyError: | |
118 | f = StringIO(self.dict[key]) | |
119 | value = Unpickler(f).load() | |
120 | if self.writeback: | |
121 | self.cache[key] = value | |
122 | return value | |
123 | ||
124 | def __setitem__(self, key, value): | |
125 | if self.writeback: | |
126 | self.cache[key] = value | |
127 | f = StringIO() | |
128 | p = Pickler(f, self._protocol) | |
129 | p.dump(value) | |
130 | self.dict[key] = f.getvalue() | |
131 | ||
132 | def __delitem__(self, key): | |
133 | del self.dict[key] | |
134 | try: | |
135 | del self.cache[key] | |
136 | except KeyError: | |
137 | pass | |
138 | ||
139 | def close(self): | |
140 | self.sync() | |
141 | try: | |
142 | self.dict.close() | |
143 | except AttributeError: | |
144 | pass | |
145 | self.dict = 0 | |
146 | ||
147 | def __del__(self): | |
148 | self.close() | |
149 | ||
150 | def sync(self): | |
151 | if self.writeback and self.cache: | |
152 | self.writeback = False | |
153 | for key, entry in self.cache.iteritems(): | |
154 | self[key] = entry | |
155 | self.writeback = True | |
156 | self.cache = {} | |
157 | if hasattr(self.dict, 'sync'): | |
158 | self.dict.sync() | |
159 | ||
160 | ||
161 | class BsdDbShelf(Shelf): | |
162 | """Shelf implementation using the "BSD" db interface. | |
163 | ||
164 | This adds methods first(), next(), previous(), last() and | |
165 | set_location() that have no counterpart in [g]dbm databases. | |
166 | ||
167 | The actual database must be opened using one of the "bsddb" | |
168 | modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or | |
169 | bsddb.rnopen) and passed to the constructor. | |
170 | ||
171 | See the module's __doc__ string for an overview of the interface. | |
172 | """ | |
173 | ||
174 | def __init__(self, dict, protocol=None, writeback=False, binary=None): | |
175 | Shelf.__init__(self, dict, protocol, writeback, binary) | |
176 | ||
177 | def set_location(self, key): | |
178 | (key, value) = self.dict.set_location(key) | |
179 | f = StringIO(value) | |
180 | return (key, Unpickler(f).load()) | |
181 | ||
182 | def next(self): | |
183 | (key, value) = self.dict.next() | |
184 | f = StringIO(value) | |
185 | return (key, Unpickler(f).load()) | |
186 | ||
187 | def previous(self): | |
188 | (key, value) = self.dict.previous() | |
189 | f = StringIO(value) | |
190 | return (key, Unpickler(f).load()) | |
191 | ||
192 | def first(self): | |
193 | (key, value) = self.dict.first() | |
194 | f = StringIO(value) | |
195 | return (key, Unpickler(f).load()) | |
196 | ||
197 | def last(self): | |
198 | (key, value) = self.dict.last() | |
199 | f = StringIO(value) | |
200 | return (key, Unpickler(f).load()) | |
201 | ||
202 | ||
203 | class DbfilenameShelf(Shelf): | |
204 | """Shelf implementation using the "anydbm" generic dbm interface. | |
205 | ||
206 | This is initialized with the filename for the dbm database. | |
207 | See the module's __doc__ string for an overview of the interface. | |
208 | """ | |
209 | ||
210 | def __init__(self, filename, flag='c', protocol=None, writeback=False, binary=None): | |
211 | import anydbm | |
212 | Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback, binary) | |
213 | ||
214 | ||
215 | def open(filename, flag='c', protocol=None, writeback=False, binary=None): | |
216 | """Open a persistent dictionary for reading and writing. | |
217 | ||
218 | The filename parameter is the base filename for the underlying | |
219 | database. As a side-effect, an extension may be added to the | |
220 | filename and more than one file may be created. The optional flag | |
221 | parameter has the same interpretation as the flag parameter of | |
222 | anydbm.open(). The optional protocol parameter specifies the | |
223 | version of the pickle protocol (0, 1, or 2). | |
224 | ||
225 | The optional binary parameter is deprecated and may be set to True | |
226 | to force the use of binary pickles for serializing data values. | |
227 | ||
228 | See the module's __doc__ string for an overview of the interface. | |
229 | """ | |
230 | ||
231 | return DbfilenameShelf(filename, flag, protocol, writeback, binary) |