| 1 | /*- |
| 2 | * Copyright (c) 1990 The Regents of the University of California. |
| 3 | * All rights reserved. |
| 4 | * |
| 5 | * This code is derived from software contributed to Berkeley by |
| 6 | * Mike Olson. |
| 7 | * |
| 8 | * Redistribution and use in source and binary forms, with or without |
| 9 | * modification, are permitted provided that the following conditions |
| 10 | * are met: |
| 11 | * 1. Redistributions of source code must retain the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer. |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer in the |
| 15 | * documentation and/or other materials provided with the distribution. |
| 16 | * 3. All advertising materials mentioning features or use of this software |
| 17 | * must display the following acknowledgement: |
| 18 | * This product includes software developed by the University of |
| 19 | * California, Berkeley and its contributors. |
| 20 | * 4. Neither the name of the University nor the names of its contributors |
| 21 | * may be used to endorse or promote products derived from this software |
| 22 | * without specific prior written permission. |
| 23 | * |
| 24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 34 | * SUCH DAMAGE. |
| 35 | */ |
| 36 | |
| 37 | #if defined(LIBC_SCCS) && !defined(lint) |
| 38 | static char sccsid[] = "@(#)big.c 5.2 (Berkeley) 2/22/91"; |
| 39 | #endif /* LIBC_SCCS and not lint */ |
| 40 | |
| 41 | #include <sys/types.h> |
| 42 | #include <db.h> |
| 43 | #include <stdlib.h> |
| 44 | #include <string.h> |
| 45 | #include "btree.h" |
| 46 | |
| 47 | /* |
| 48 | * _BT_GETBIG -- Get big data from indirect pages. |
| 49 | * |
| 50 | * This routine chases indirect blocks for the big object at the |
| 51 | * specified page to a buffer, and return the address of the buffer. |
| 52 | * |
| 53 | * Parameters: |
| 54 | * t -- btree with the indirect blocks |
| 55 | * pgno -- page number that starts the chain |
| 56 | * p -- (char **) to get the address of the buffer containing |
| 57 | * the key or datum. |
| 58 | * sz -- pointer to an int to get the size of the instantiated |
| 59 | * object. |
| 60 | * |
| 61 | * Returns: |
| 62 | * RET_SUCCESS, RET_ERROR. |
| 63 | * |
| 64 | * Side Effects: |
| 65 | * None. |
| 66 | */ |
| 67 | |
| 68 | int |
| 69 | _bt_getbig(t, pgno, p, sz) |
| 70 | BTREE_P t; |
| 71 | pgno_t pgno; |
| 72 | char **p; |
| 73 | int *sz; |
| 74 | { |
| 75 | pgno_t save; |
| 76 | size_t nbytes; |
| 77 | size_t nhere; |
| 78 | BTHEADER *h; |
| 79 | char *top, *from, *where; |
| 80 | |
| 81 | save = t->bt_curpage->h_pgno; |
| 82 | if (_bt_getpage(t, pgno) == RET_ERROR) |
| 83 | return (RET_ERROR); |
| 84 | |
| 85 | h = t->bt_curpage; |
| 86 | |
| 87 | bcopy((char *) &(h->h_linp[0]), |
| 88 | (char *) &nbytes, |
| 89 | (size_t) sizeof(nbytes)); |
| 90 | |
| 91 | if ((*p = (char *) malloc(nbytes)) == (char *) NULL) |
| 92 | return (RET_ERROR); |
| 93 | |
| 94 | *sz = nbytes; |
| 95 | from = ((char *) (&h->h_linp[0])) + sizeof(nbytes); |
| 96 | top = ((char *) h) + t->bt_psize; |
| 97 | |
| 98 | /* need more space for data? */ |
| 99 | |
| 100 | where = *p; |
| 101 | |
| 102 | while (nbytes > 0) { |
| 103 | nhere = (int) (top - from); |
| 104 | if (nhere > nbytes) { |
| 105 | (void) bcopy(from, where, (size_t) nbytes); |
| 106 | nbytes = 0; |
| 107 | } else { |
| 108 | (void) bcopy(from, where, nhere); |
| 109 | where += nhere; |
| 110 | nbytes -= nhere; |
| 111 | if (_bt_getpage(t, h->h_nextpg) == RET_ERROR) |
| 112 | return (RET_ERROR); |
| 113 | h = t->bt_curpage; |
| 114 | top = ((char *) h) + t->bt_psize; |
| 115 | from = (char *) &(h->h_linp[0]); |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | if (_bt_getpage(t, save) == RET_ERROR) |
| 120 | return (RET_ERROR); |
| 121 | |
| 122 | return (RET_SUCCESS); |
| 123 | } |
| 124 | |
| 125 | /* |
| 126 | * _BT_DELINDIR -- Delete a chain of indirect blocks from the btree. |
| 127 | * |
| 128 | * When a large item is deleted from the tree, this routine puts the |
| 129 | * space that it occupied onto the free list for later reuse. The |
| 130 | * bt_free entry in the btree structure points at the head of this list. |
| 131 | * This value is also stored on disk in the btree's metadata. |
| 132 | * |
| 133 | * Parameters: |
| 134 | * t -- btree from which to delete pages |
| 135 | * chain -- page number that starts the chain. |
| 136 | * |
| 137 | * Returns: |
| 138 | * RET_SUCCESS, RET_ERROR. |
| 139 | * |
| 140 | * Side Effects: |
| 141 | * Invalidates the current on-disk version of the btree's |
| 142 | * metadata (if any), and updates the free list appropriately. |
| 143 | */ |
| 144 | |
| 145 | int |
| 146 | _bt_delindir(t, chain) |
| 147 | BTREE_P t; |
| 148 | pgno_t chain; |
| 149 | { |
| 150 | BTHEADER *h; |
| 151 | pgno_t save; |
| 152 | pgno_t oldfree; |
| 153 | |
| 154 | h = t->bt_curpage; |
| 155 | save = h->h_pgno; |
| 156 | if (_bt_getpage(t, chain) == RET_ERROR) |
| 157 | return (RET_ERROR); |
| 158 | |
| 159 | /* |
| 160 | * If some internal node is pointing at this chain, don't |
| 161 | * delete it. |
| 162 | */ |
| 163 | |
| 164 | if (t->bt_curpage->h_flags & F_PRESERVE) { |
| 165 | if (_bt_getpage(t, save) == RET_ERROR) |
| 166 | return (RET_ERROR); |
| 167 | return (RET_SUCCESS); |
| 168 | } |
| 169 | |
| 170 | /* if there's nothing on the free list, this is easy... */ |
| 171 | if (t->bt_free == P_NONE) { |
| 172 | t->bt_free = chain; |
| 173 | } else { |
| 174 | oldfree = t->bt_free; |
| 175 | |
| 176 | /* find the end of the data chain for the deleted datum */ |
| 177 | t->bt_free = chain; |
| 178 | do { |
| 179 | if (_bt_getpage(t, chain) == RET_ERROR) |
| 180 | return (RET_ERROR); |
| 181 | h = t->bt_curpage; |
| 182 | if (h->h_nextpg != P_NONE) |
| 183 | chain = h->h_nextpg; |
| 184 | } while (h->h_nextpg != P_NONE); |
| 185 | |
| 186 | /* link freed pages into free list */ |
| 187 | h->h_nextpg = oldfree; |
| 188 | if (_bt_write(t, h, RELEASE) == RET_ERROR) |
| 189 | return (RET_ERROR); |
| 190 | if (_bt_getpage(t, oldfree) == RET_ERROR) |
| 191 | return (RET_ERROR); |
| 192 | h = t->bt_curpage; |
| 193 | h->h_prevpg = chain; |
| 194 | if (_bt_write(t, h, RELEASE) == RET_ERROR) |
| 195 | return (RET_ERROR); |
| 196 | } |
| 197 | |
| 198 | /* restore the tree's current page pointer */ |
| 199 | if (_bt_getpage(t, save) == RET_ERROR) |
| 200 | return (RET_ERROR); |
| 201 | |
| 202 | /* we have trashed the tree metadata; rewrite it later */ |
| 203 | t->bt_flags &= ~BTF_METAOK; |
| 204 | |
| 205 | return (RET_SUCCESS); |
| 206 | } |
| 207 | |
| 208 | /* |
| 209 | * _BT_INDIRECT -- Write a series of indirect pages for big objects. |
| 210 | * |
| 211 | * A chain of indirect pages looks like |
| 212 | * |
| 213 | * +-------------------+ +---------------------+ |
| 214 | * |hdr|size| | |hdr| | |
| 215 | * +---+----+ | +---+ | |
| 216 | * | ... data ... | | ... data ... | ... |
| 217 | * | | | | |
| 218 | * +-------------------+ +---------------------+ |
| 219 | * |
| 220 | * where hdr is a standard btree page header (with the indirect bit |
| 221 | * set), size on the first page is the real size of the datum, and |
| 222 | * data are bytes of the datum, split across as many pages as necessary. |
| 223 | * Indirect pages are chained together with the h_prevpg and h_nextpg |
| 224 | * entries of the page header struct. |
| 225 | * |
| 226 | * A single DBT is written per chain, so space on the last page is |
| 227 | * wasted. |
| 228 | * |
| 229 | * We return the page number of the start of the chain. |
| 230 | * |
| 231 | * When a big object is deleted from a tree, the space that it occupied |
| 232 | * is placed on a free list for later reuse. This routine checks that |
| 233 | * free list before allocating new pages to the big datum being inserted. |
| 234 | * |
| 235 | * Parameters: |
| 236 | * t -- btree in which to store indirect blocks |
| 237 | * data -- DBT with the big datum in it |
| 238 | * pgno -- place to put the starting page number of the chain |
| 239 | * |
| 240 | * Returns: |
| 241 | * RET_SUCCESS, RET_ERROR. |
| 242 | * |
| 243 | * Side Effects: |
| 244 | * Current page is changed on return. |
| 245 | */ |
| 246 | |
| 247 | int |
| 248 | _bt_indirect(t, data, pgno) |
| 249 | BTREE_P t; |
| 250 | DBT *data; |
| 251 | pgno_t *pgno; |
| 252 | { |
| 253 | pgno_t prev; |
| 254 | char *top; |
| 255 | char *where; |
| 256 | char *from; |
| 257 | size_t dsize; |
| 258 | pgno_t nextchn; |
| 259 | int ischain; |
| 260 | BTHEADER *cur; |
| 261 | |
| 262 | /* get set for first page in chain */ |
| 263 | prev = P_NONE; |
| 264 | dsize = data->size; |
| 265 | from = (char *) data->data; |
| 266 | |
| 267 | /* if there are blocks on the free list, use them first */ |
| 268 | if ((*pgno = t->bt_free) == P_NONE) { |
| 269 | if ((cur = _bt_allocpg(t)) == (BTHEADER *) NULL) |
| 270 | return (RET_ERROR); |
| 271 | |
| 272 | ischain = 0; |
| 273 | *pgno = cur->h_pgno = ++(t->bt_npages); |
| 274 | } else { |
| 275 | if (_bt_getpage(t, *pgno) == RET_ERROR) |
| 276 | return (RET_ERROR); |
| 277 | ischain = 1; |
| 278 | cur = t->bt_curpage; |
| 279 | } |
| 280 | |
| 281 | cur->h_flags = F_CONT|F_LEAF; |
| 282 | (void) bcopy((char *) &dsize, (char *) &cur->h_linp[0], sizeof(size_t)); |
| 283 | where = ((char *) (&cur->h_linp[0])) + sizeof(size_t); |
| 284 | |
| 285 | /* fill and write pages in the chain */ |
| 286 | for (;;) { |
| 287 | int nhere; |
| 288 | |
| 289 | top = ((char *) cur) + t->bt_psize; |
| 290 | cur->h_prevpg = prev; |
| 291 | nextchn = cur->h_nextpg; |
| 292 | nhere = (int) (top - where); |
| 293 | |
| 294 | if (nhere >= dsize) { |
| 295 | (void) bcopy(from, where, (int) dsize); |
| 296 | cur->h_nextpg = P_NONE; |
| 297 | dsize = 0; |
| 298 | } else { |
| 299 | (void) bcopy(from, where, (int) nhere); |
| 300 | dsize -= nhere; |
| 301 | from += nhere; |
| 302 | if (nextchn == P_NONE) |
| 303 | cur->h_nextpg = t->bt_npages + 1; |
| 304 | prev = cur->h_pgno; |
| 305 | } |
| 306 | |
| 307 | /* current page is ready to go; write it out */ |
| 308 | if (_bt_write(t, cur, RELEASE) == RET_ERROR) |
| 309 | return (RET_ERROR); |
| 310 | |
| 311 | /* free the current page, if appropriate */ |
| 312 | if (ISDISK(t) && !ISCACHE(t) && !ischain) { |
| 313 | (void) free ((char *) cur); |
| 314 | } |
| 315 | |
| 316 | /* done? */ |
| 317 | if (dsize == 0) |
| 318 | break; |
| 319 | |
| 320 | /* allocate another page */ |
| 321 | if (nextchn == P_NONE) { |
| 322 | if ((cur = _bt_allocpg(t)) == (BTHEADER *) NULL) |
| 323 | return (RET_ERROR); |
| 324 | ischain = 0; |
| 325 | cur->h_pgno = ++(t->bt_npages); |
| 326 | } else { |
| 327 | if (_bt_getpage(t, nextchn) == RET_ERROR) |
| 328 | return (RET_ERROR); |
| 329 | ischain = 1; |
| 330 | cur = t->bt_curpage; |
| 331 | } |
| 332 | cur->h_flags = F_LEAF | F_CONT; |
| 333 | |
| 334 | where = (char *) (&cur->h_linp[0]); |
| 335 | } |
| 336 | |
| 337 | /* if we used pages from the free list, record changes to it */ |
| 338 | if (*pgno == t->bt_free) { |
| 339 | t->bt_free = nextchn; |
| 340 | t->bt_flags &= ~BTF_METAOK; |
| 341 | } |
| 342 | |
| 343 | return (RET_SUCCESS); |
| 344 | } |
| 345 | |
| 346 | /* |
| 347 | * _BT_MARKCHAIN -- Mark a chain of pages as used by an internal node. |
| 348 | * |
| 349 | * Chains of indirect blocks pointed to by leaf nodes get reclaimed |
| 350 | * when the item that points to them gets deleted. Chains pointed |
| 351 | * to by internal nodes never get deleted. This routine marks a |
| 352 | * chain as pointed to by an internal node. |
| 353 | * |
| 354 | * Parameters: |
| 355 | * t -- tree in which to mark |
| 356 | * chain -- number of first page in chain |
| 357 | * |
| 358 | * Returns: |
| 359 | * RET_SUCCESS, RET_ERROR. |
| 360 | * |
| 361 | * Side Effects: |
| 362 | * None. |
| 363 | */ |
| 364 | |
| 365 | int |
| 366 | _bt_markchain(t, chain) |
| 367 | BTREE_P t; |
| 368 | pgno_t chain; |
| 369 | { |
| 370 | pgno_t save; |
| 371 | |
| 372 | save = t->bt_curpage->h_pgno; |
| 373 | |
| 374 | if (_bt_getpage(t, chain) == RET_ERROR) |
| 375 | return (RET_ERROR); |
| 376 | |
| 377 | t->bt_curpage->h_flags |= (F_DIRTY|F_PRESERVE); |
| 378 | |
| 379 | if (_bt_getpage(t, save) == RET_ERROR) |
| 380 | return (RET_ERROR); |
| 381 | |
| 382 | return (RET_SUCCESS); |
| 383 | } |