Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /*- |
2 | * Copyright (c) 1990 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Mike Olson. | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * 3. All advertising materials mentioning features or use of this software | |
17 | * must display the following acknowledgement: | |
18 | * This product includes software developed by the University of | |
19 | * California, Berkeley and its contributors. | |
20 | * 4. Neither the name of the University nor the names of its contributors | |
21 | * may be used to endorse or promote products derived from this software | |
22 | * without specific prior written permission. | |
23 | * | |
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
34 | * SUCH DAMAGE. | |
35 | */ | |
36 | ||
37 | #if defined(LIBC_SCCS) && !defined(lint) | |
38 | static char sccsid[] = "@(#)big.c 5.2 (Berkeley) 2/22/91"; | |
39 | #endif /* LIBC_SCCS and not lint */ | |
40 | ||
41 | #include <sys/types.h> | |
42 | #include <db.h> | |
43 | #include <stdlib.h> | |
44 | #include <string.h> | |
45 | #include "btree.h" | |
46 | ||
47 | /* | |
48 | * _BT_GETBIG -- Get big data from indirect pages. | |
49 | * | |
50 | * This routine chases indirect blocks for the big object at the | |
51 | * specified page to a buffer, and return the address of the buffer. | |
52 | * | |
53 | * Parameters: | |
54 | * t -- btree with the indirect blocks | |
55 | * pgno -- page number that starts the chain | |
56 | * p -- (char **) to get the address of the buffer containing | |
57 | * the key or datum. | |
58 | * sz -- pointer to an int to get the size of the instantiated | |
59 | * object. | |
60 | * | |
61 | * Returns: | |
62 | * RET_SUCCESS, RET_ERROR. | |
63 | * | |
64 | * Side Effects: | |
65 | * None. | |
66 | */ | |
67 | ||
68 | int | |
69 | _bt_getbig(t, pgno, p, sz) | |
70 | BTREE_P t; | |
71 | pgno_t pgno; | |
72 | char **p; | |
73 | int *sz; | |
74 | { | |
75 | pgno_t save; | |
76 | size_t nbytes; | |
77 | size_t nhere; | |
78 | BTHEADER *h; | |
79 | char *top, *from, *where; | |
80 | ||
81 | save = t->bt_curpage->h_pgno; | |
82 | if (_bt_getpage(t, pgno) == RET_ERROR) | |
83 | return (RET_ERROR); | |
84 | ||
85 | h = t->bt_curpage; | |
86 | ||
87 | bcopy((char *) &(h->h_linp[0]), | |
88 | (char *) &nbytes, | |
89 | (size_t) sizeof(nbytes)); | |
90 | ||
91 | if ((*p = (char *) malloc(nbytes)) == (char *) NULL) | |
92 | return (RET_ERROR); | |
93 | ||
94 | *sz = nbytes; | |
95 | from = ((char *) (&h->h_linp[0])) + sizeof(nbytes); | |
96 | top = ((char *) h) + t->bt_psize; | |
97 | ||
98 | /* need more space for data? */ | |
99 | ||
100 | where = *p; | |
101 | ||
102 | while (nbytes > 0) { | |
103 | nhere = (int) (top - from); | |
104 | if (nhere > nbytes) { | |
105 | (void) bcopy(from, where, (size_t) nbytes); | |
106 | nbytes = 0; | |
107 | } else { | |
108 | (void) bcopy(from, where, nhere); | |
109 | where += nhere; | |
110 | nbytes -= nhere; | |
111 | if (_bt_getpage(t, h->h_nextpg) == RET_ERROR) | |
112 | return (RET_ERROR); | |
113 | h = t->bt_curpage; | |
114 | top = ((char *) h) + t->bt_psize; | |
115 | from = (char *) &(h->h_linp[0]); | |
116 | } | |
117 | } | |
118 | ||
119 | if (_bt_getpage(t, save) == RET_ERROR) | |
120 | return (RET_ERROR); | |
121 | ||
122 | return (RET_SUCCESS); | |
123 | } | |
124 | ||
125 | /* | |
126 | * _BT_DELINDIR -- Delete a chain of indirect blocks from the btree. | |
127 | * | |
128 | * When a large item is deleted from the tree, this routine puts the | |
129 | * space that it occupied onto the free list for later reuse. The | |
130 | * bt_free entry in the btree structure points at the head of this list. | |
131 | * This value is also stored on disk in the btree's metadata. | |
132 | * | |
133 | * Parameters: | |
134 | * t -- btree from which to delete pages | |
135 | * chain -- page number that starts the chain. | |
136 | * | |
137 | * Returns: | |
138 | * RET_SUCCESS, RET_ERROR. | |
139 | * | |
140 | * Side Effects: | |
141 | * Invalidates the current on-disk version of the btree's | |
142 | * metadata (if any), and updates the free list appropriately. | |
143 | */ | |
144 | ||
145 | int | |
146 | _bt_delindir(t, chain) | |
147 | BTREE_P t; | |
148 | pgno_t chain; | |
149 | { | |
150 | BTHEADER *h; | |
151 | pgno_t save; | |
152 | pgno_t oldfree; | |
153 | ||
154 | h = t->bt_curpage; | |
155 | save = h->h_pgno; | |
156 | if (_bt_getpage(t, chain) == RET_ERROR) | |
157 | return (RET_ERROR); | |
158 | ||
159 | /* | |
160 | * If some internal node is pointing at this chain, don't | |
161 | * delete it. | |
162 | */ | |
163 | ||
164 | if (t->bt_curpage->h_flags & F_PRESERVE) { | |
165 | if (_bt_getpage(t, save) == RET_ERROR) | |
166 | return (RET_ERROR); | |
167 | return (RET_SUCCESS); | |
168 | } | |
169 | ||
170 | /* if there's nothing on the free list, this is easy... */ | |
171 | if (t->bt_free == P_NONE) { | |
172 | t->bt_free = chain; | |
173 | } else { | |
174 | oldfree = t->bt_free; | |
175 | ||
176 | /* find the end of the data chain for the deleted datum */ | |
177 | t->bt_free = chain; | |
178 | do { | |
179 | if (_bt_getpage(t, chain) == RET_ERROR) | |
180 | return (RET_ERROR); | |
181 | h = t->bt_curpage; | |
182 | if (h->h_nextpg != P_NONE) | |
183 | chain = h->h_nextpg; | |
184 | } while (h->h_nextpg != P_NONE); | |
185 | ||
186 | /* link freed pages into free list */ | |
187 | h->h_nextpg = oldfree; | |
188 | if (_bt_write(t, h, RELEASE) == RET_ERROR) | |
189 | return (RET_ERROR); | |
190 | if (_bt_getpage(t, oldfree) == RET_ERROR) | |
191 | return (RET_ERROR); | |
192 | h = t->bt_curpage; | |
193 | h->h_prevpg = chain; | |
194 | if (_bt_write(t, h, RELEASE) == RET_ERROR) | |
195 | return (RET_ERROR); | |
196 | } | |
197 | ||
198 | /* restore the tree's current page pointer */ | |
199 | if (_bt_getpage(t, save) == RET_ERROR) | |
200 | return (RET_ERROR); | |
201 | ||
202 | /* we have trashed the tree metadata; rewrite it later */ | |
203 | t->bt_flags &= ~BTF_METAOK; | |
204 | ||
205 | return (RET_SUCCESS); | |
206 | } | |
207 | ||
208 | /* | |
209 | * _BT_INDIRECT -- Write a series of indirect pages for big objects. | |
210 | * | |
211 | * A chain of indirect pages looks like | |
212 | * | |
213 | * +-------------------+ +---------------------+ | |
214 | * |hdr|size| | |hdr| | | |
215 | * +---+----+ | +---+ | | |
216 | * | ... data ... | | ... data ... | ... | |
217 | * | | | | | |
218 | * +-------------------+ +---------------------+ | |
219 | * | |
220 | * where hdr is a standard btree page header (with the indirect bit | |
221 | * set), size on the first page is the real size of the datum, and | |
222 | * data are bytes of the datum, split across as many pages as necessary. | |
223 | * Indirect pages are chained together with the h_prevpg and h_nextpg | |
224 | * entries of the page header struct. | |
225 | * | |
226 | * A single DBT is written per chain, so space on the last page is | |
227 | * wasted. | |
228 | * | |
229 | * We return the page number of the start of the chain. | |
230 | * | |
231 | * When a big object is deleted from a tree, the space that it occupied | |
232 | * is placed on a free list for later reuse. This routine checks that | |
233 | * free list before allocating new pages to the big datum being inserted. | |
234 | * | |
235 | * Parameters: | |
236 | * t -- btree in which to store indirect blocks | |
237 | * data -- DBT with the big datum in it | |
238 | * pgno -- place to put the starting page number of the chain | |
239 | * | |
240 | * Returns: | |
241 | * RET_SUCCESS, RET_ERROR. | |
242 | * | |
243 | * Side Effects: | |
244 | * Current page is changed on return. | |
245 | */ | |
246 | ||
247 | int | |
248 | _bt_indirect(t, data, pgno) | |
249 | BTREE_P t; | |
250 | DBT *data; | |
251 | pgno_t *pgno; | |
252 | { | |
253 | pgno_t prev; | |
254 | char *top; | |
255 | char *where; | |
256 | char *from; | |
257 | size_t dsize; | |
258 | pgno_t nextchn; | |
259 | int ischain; | |
260 | BTHEADER *cur; | |
261 | ||
262 | /* get set for first page in chain */ | |
263 | prev = P_NONE; | |
264 | dsize = data->size; | |
265 | from = (char *) data->data; | |
266 | ||
267 | /* if there are blocks on the free list, use them first */ | |
268 | if ((*pgno = t->bt_free) == P_NONE) { | |
269 | if ((cur = _bt_allocpg(t)) == (BTHEADER *) NULL) | |
270 | return (RET_ERROR); | |
271 | ||
272 | ischain = 0; | |
273 | *pgno = cur->h_pgno = ++(t->bt_npages); | |
274 | } else { | |
275 | if (_bt_getpage(t, *pgno) == RET_ERROR) | |
276 | return (RET_ERROR); | |
277 | ischain = 1; | |
278 | cur = t->bt_curpage; | |
279 | } | |
280 | ||
281 | cur->h_flags = F_CONT|F_LEAF; | |
282 | (void) bcopy((char *) &dsize, (char *) &cur->h_linp[0], sizeof(size_t)); | |
283 | where = ((char *) (&cur->h_linp[0])) + sizeof(size_t); | |
284 | ||
285 | /* fill and write pages in the chain */ | |
286 | for (;;) { | |
287 | int nhere; | |
288 | ||
289 | top = ((char *) cur) + t->bt_psize; | |
290 | cur->h_prevpg = prev; | |
291 | nextchn = cur->h_nextpg; | |
292 | nhere = (int) (top - where); | |
293 | ||
294 | if (nhere >= dsize) { | |
295 | (void) bcopy(from, where, (int) dsize); | |
296 | cur->h_nextpg = P_NONE; | |
297 | dsize = 0; | |
298 | } else { | |
299 | (void) bcopy(from, where, (int) nhere); | |
300 | dsize -= nhere; | |
301 | from += nhere; | |
302 | if (nextchn == P_NONE) | |
303 | cur->h_nextpg = t->bt_npages + 1; | |
304 | prev = cur->h_pgno; | |
305 | } | |
306 | ||
307 | /* current page is ready to go; write it out */ | |
308 | if (_bt_write(t, cur, RELEASE) == RET_ERROR) | |
309 | return (RET_ERROR); | |
310 | ||
311 | /* free the current page, if appropriate */ | |
312 | if (ISDISK(t) && !ISCACHE(t) && !ischain) { | |
313 | (void) free ((char *) cur); | |
314 | } | |
315 | ||
316 | /* done? */ | |
317 | if (dsize == 0) | |
318 | break; | |
319 | ||
320 | /* allocate another page */ | |
321 | if (nextchn == P_NONE) { | |
322 | if ((cur = _bt_allocpg(t)) == (BTHEADER *) NULL) | |
323 | return (RET_ERROR); | |
324 | ischain = 0; | |
325 | cur->h_pgno = ++(t->bt_npages); | |
326 | } else { | |
327 | if (_bt_getpage(t, nextchn) == RET_ERROR) | |
328 | return (RET_ERROR); | |
329 | ischain = 1; | |
330 | cur = t->bt_curpage; | |
331 | } | |
332 | cur->h_flags = F_LEAF | F_CONT; | |
333 | ||
334 | where = (char *) (&cur->h_linp[0]); | |
335 | } | |
336 | ||
337 | /* if we used pages from the free list, record changes to it */ | |
338 | if (*pgno == t->bt_free) { | |
339 | t->bt_free = nextchn; | |
340 | t->bt_flags &= ~BTF_METAOK; | |
341 | } | |
342 | ||
343 | return (RET_SUCCESS); | |
344 | } | |
345 | ||
346 | /* | |
347 | * _BT_MARKCHAIN -- Mark a chain of pages as used by an internal node. | |
348 | * | |
349 | * Chains of indirect blocks pointed to by leaf nodes get reclaimed | |
350 | * when the item that points to them gets deleted. Chains pointed | |
351 | * to by internal nodes never get deleted. This routine marks a | |
352 | * chain as pointed to by an internal node. | |
353 | * | |
354 | * Parameters: | |
355 | * t -- tree in which to mark | |
356 | * chain -- number of first page in chain | |
357 | * | |
358 | * Returns: | |
359 | * RET_SUCCESS, RET_ERROR. | |
360 | * | |
361 | * Side Effects: | |
362 | * None. | |
363 | */ | |
364 | ||
365 | int | |
366 | _bt_markchain(t, chain) | |
367 | BTREE_P t; | |
368 | pgno_t chain; | |
369 | { | |
370 | pgno_t save; | |
371 | ||
372 | save = t->bt_curpage->h_pgno; | |
373 | ||
374 | if (_bt_getpage(t, chain) == RET_ERROR) | |
375 | return (RET_ERROR); | |
376 | ||
377 | t->bt_curpage->h_flags |= (F_DIRTY|F_PRESERVE); | |
378 | ||
379 | if (_bt_getpage(t, save) == RET_ERROR) | |
380 | return (RET_ERROR); | |
381 | ||
382 | return (RET_SUCCESS); | |
383 | } |