BSD 4_3_Net_2 release
[unix-history] / usr / src / lib / libc / db / hash / bigkey.c
CommitLineData
ab4abb4f
KB
1/*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Margo Seltzer.
7 *
af359dea
C
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
ab4abb4f
KB
35 */
36
37#if defined(LIBC_SCCS) && !defined(lint)
af359dea 38static char sccsid[] = "@(#)bigkey.c 5.5 (Berkeley) 3/12/91";
ab4abb4f
KB
39#endif /* LIBC_SCCS and not lint */
40
41/******************************************************************************
42
43PACKAGE: hash
44
45DESCRIPTION:
46 Big key/data handling for the hashing package.
47
48ROUTINES:
49 External
50 __big_keydata
51 __big_split
52 __big_insert
53 __big_return
54 __big_delete
55 __find_last_page
56 Internal
57 collect_key
58 collect_data
59******************************************************************************/
60/* Includes */
dbe8e807 61#include <sys/param.h>
ab4abb4f
KB
62#include <assert.h>
63#include <errno.h>
64#include <db.h>
65#include <stdio.h>
fb91cf55
KB
66#include <stdlib.h>
67#include <string.h>
ab4abb4f
KB
68#include "hash.h"
69#include "page.h"
70
71/* Externals */
72/* buf.c */
73extern BUFHEAD *__get_buf();
74
096e4a66
KB
75/* dynahash.c */
76extern u_int call_hash();
77
ab4abb4f
KB
78/* page.c */
79extern BUFHEAD *__add_ovflpage();
80
81/* My externals */
82extern int __big_keydata();
83extern int __big_split();
84extern int __big_insert();
85extern int __big_return();
86extern int __big_delete();
87extern u_short __find_last_page();
88extern int __find_bigpair();
89
90/* My internals */
91static int collect_key();
92static int collect_data();
93
94#ifdef HASH_STATISTICS
95extern long hash_accesses, hash_collisions, hash_expansions, hash_overflows;
96#endif
97/*
98Big_insert
99
100You need to do an insert and the key/data pair is too big
1010 ==> OK
102-1 ==> ERROR
103*/
104extern int
105__big_insert ( bufp, key, val )
106BUFHEAD *bufp;
107DBT *key, *val;
108{
109 char *cp = bufp->page; /* Character pointer of p */
110 register u_short *p = (u_short *)cp;
111 char *key_data, *val_data;
112 int key_size, val_size;
113 int n;
114 u_short space, move_bytes, off;
115
c6ad6315 116 key_data = (char *)key->data;
ab4abb4f 117 key_size = key->size;
c6ad6315 118 val_data = (char *)val->data;
ab4abb4f
KB
119 val_size = val->size;
120
121 /* First move the Key */
122 for ( space = FREESPACE(p) - BIGOVERHEAD;
123 key_size;
124 space = FREESPACE(p) - BIGOVERHEAD ) {
125 move_bytes = MIN(space, key_size);
126 off = OFFSET(p) - move_bytes;
127 bcopy (key_data, cp+off, move_bytes );
128 key_size -= move_bytes;
129 key_data += move_bytes;
130 n = p[0];
131 p[++n] = off;
132 p[0] = ++n;
133 FREESPACE(p) = off - PAGE_META(n);
134 OFFSET(p) = off;
135 p[n] = PARTIAL_KEY;
136 bufp = __add_ovflpage(bufp);
137 if ( !bufp ) {
138 return(-1);
139 }
140 n = p[0];
141 if ( !key_size ) {
142 if ( FREESPACE(p) ) {
143 move_bytes = MIN (FREESPACE(p), val_size);
144 off = OFFSET(p) - move_bytes;
145 p[n] = off;
146 bcopy ( val_data, cp + off, move_bytes );
147 val_data += move_bytes;
148 val_size -= move_bytes;
149 p[n-2] = FULL_KEY_DATA;
150 FREESPACE(p) = FREESPACE(p) - move_bytes;
151 OFFSET(p) = off;
152 }
153 else p[n-2] = FULL_KEY;
154 }
155 p = (u_short *)bufp->page;
156 cp = bufp->page;
157 bufp->flags |= BUF_MOD;
158 }
159
160 /* Now move the data */
161 for ( space = FREESPACE(p) - BIGOVERHEAD;
162 val_size;
163 space = FREESPACE(p) - BIGOVERHEAD ) {
164 move_bytes = MIN(space, val_size);
165 /*
166 Here's the hack to make sure that if the data ends
167 on the same page as the key ends, FREESPACE is
168 at least one
169 */
170 if ( space == val_size && val_size == val->size ) {
171 move_bytes--;
172 }
173 off = OFFSET(p) - move_bytes;
174 bcopy (val_data, cp+off, move_bytes );
175 val_size -= move_bytes;
176 val_data += move_bytes;
177 n = p[0];
178 p[++n] = off;
179 p[0] = ++n;
180 FREESPACE(p) = off - PAGE_META(n);
181 OFFSET(p) = off;
182 if ( val_size ) {
183 p[n] = FULL_KEY;
184 bufp = __add_ovflpage (bufp);
185 if ( !bufp ) {
186 return(-1);
187 }
188 cp = bufp->page;
189 p = (u_short *)cp;
190 } else {
191 p[n] = FULL_KEY_DATA;
192 }
193 bufp->flags |= BUF_MOD;
194 }
195 return(0);
196}
197
198/*
199 Called when bufp's page contains a partial key (index should be 1)
200
201 All pages in the big key/data pair except bufp are freed. We cannot
202 free bufp because the page pointing to it is lost and we can't
203 get rid of its pointer.
204
205 Returns 0 => OK
206 -1 => ERROR
207*/
208extern int
209__big_delete (bufp, ndx)
210BUFHEAD *bufp;
211int ndx;
212{
213 register BUFHEAD *rbufp = bufp;
214 register BUFHEAD *last_bfp = NULL;
215 char *cp;
216 u_short *bp = (u_short *)bufp->page;
217 u_short *xbp;
218 u_short pageno = 0;
219 u_short off, free_sp;
220 int key_done = 0;
221 int n;
222
223 while (!key_done || (bp[2] != FULL_KEY_DATA)) {
224 if ( bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA ) key_done = 1;
225
226 /*
227 If there is freespace left on a FULL_KEY_DATA page,
228 then the data is short and fits entirely on this
229 page, and this is the last page.
230 */
231 if ( bp[2] == FULL_KEY_DATA && FREESPACE(bp) ) break;
232 pageno = bp[bp[0]-1];
233 rbufp->flags |= BUF_MOD;
234 rbufp = __get_buf ( pageno, rbufp, 0 );
235 if ( last_bfp ) __free_ovflpage(last_bfp);
236 last_bfp = rbufp;
237 if ( !rbufp ) return(-1); /* Error */
238 bp = (u_short *)rbufp->page;
239 }
240
241 /*
242 If we get here then rbufp points to the last page of
243 the big key/data pair. Bufp points to the first
244 one -- it should now be empty pointing to the next
245 page after this pair. Can't free it because we don't
246 have the page pointing to it.
247 */
248
249 /* This is information from the last page of the pair */
250 n = bp[0];
251 pageno = bp[n-1];
252
253 /* Now, bp is the first page of the pair */
254 bp = (u_short *)bufp->page;
255 if ( n > 2 ) {
256 /* There is an overflow page */
257 bp[1] = pageno;
258 bp[2] = OVFLPAGE;
259 bufp->ovfl = rbufp->ovfl;
260 } else {
261 /* This is the last page */
262 bufp->ovfl = NULL;
263 }
264 n -= 2;
265 bp[0] = n;
266 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n);
267 OFFSET(bp) = hashp->BSIZE - 1;
268
269 bufp->flags |= BUF_MOD;
270 if ( rbufp ) __free_ovflpage(rbufp);
271 if ( last_bfp != rbufp ) __free_ovflpage(last_bfp);
272
273 hashp->NKEYS--;
274 return(0);
275}
276
277/*
278 0 = key not found
279 -1 = get next overflow page
280 -2 means key not found and this is big key/data
281 -3 error
282*/
283extern int
284__find_bigpair(bufp, ndx, key, size )
285BUFHEAD *bufp;
286int ndx;
287char *key;
288int size;
289{
290 register u_short *bp = (u_short *)bufp->page;
291 register char *p = bufp->page;
292 int ksize = size;
293 char *kkey = key;
294 u_short bytes;
295
296
297 for ( bytes = hashp->BSIZE - bp[ndx];
298 bytes <= size && bp[ndx+1] == PARTIAL_KEY;
299 bytes = hashp->BSIZE - bp[ndx] ) {
300
301 if ( bcmp ( p+bp[ndx], kkey, bytes ))return(-2);
302 kkey += bytes;
303 ksize -= bytes;
304 bufp = __get_buf ( bp[ndx+2], bufp, 0 );
305 if ( !bufp ) {
306 return(-3);
307 }
308 p = bufp->page;
309 bp = (u_short *)p;
310 ndx = 1;
311 }
312
313 if ( (bytes != ksize) || bcmp ( p+bp[ndx], kkey, bytes )) {
314#ifdef HASH_STATISTICS
315 hash_collisions++;
316#endif
317 return(-2);
318 }
319 else return (ndx);
320}
321
322
323/*
324 Given the buffer pointer of the first overflow page of a big pair,
325 find the end of the big pair
326
327 This will set bpp to the buffer header of the last page of the big pair.
328 It will return the pageno of the overflow page following the last page of
329 the pair; 0 if there isn't any (i.e. big pair is the last key in the
330 bucket)
331*/
332extern u_short
333__find_last_page ( bpp )
334BUFHEAD **bpp;
335{
336 int n;
337 u_short pageno;
338 BUFHEAD *bufp = *bpp;
339 u_short *bp = (u_short *)bufp->page;
340
341 while ( 1 ) {
342 n = bp[0];
343
344 /*
345 This is the last page if:
346 the tag is FULL_KEY_DATA and either
347 only 2 entries
348 OVFLPAGE marker is explicit
349 there is freespace on the page
350 */
351 if ( bp[2] == FULL_KEY_DATA &&
352 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)) ) ) break;
353
354 pageno = bp[n-1];
355 bufp = __get_buf ( pageno, bufp, 0 );
356 if ( !bufp ) return (0); /* Need to indicate an error! */
357 bp = (u_short *)bufp->page;
358 }
359
360 *bpp = bufp;
361 if ( bp[0] > 2 ) return ( bp[3] );
362 else return(0);
363}
364
365
366/*
367 Return the data for the key/data pair
368 that begins on this page at this index
369 (index should always be 1)
370*/
371extern int
372__big_return ( bufp, ndx, val, set_current )
373BUFHEAD *bufp;
374int ndx;
375DBT *val;
376int set_current;
377{
378 BUFHEAD *save_p;
379 u_short save_addr;
380 u_short *bp = (u_short *)bufp->page;
381 u_short off, len;
382 char *cp, *tp;
383
384 while ( bp[ndx+1] == PARTIAL_KEY ) {
385 bufp = __get_buf ( bp[bp[0]-1], bufp, 0 );
386 if ( !bufp ) return(-1);
387 bp = (u_short *)bufp->page;
388 ndx = 1;
389 }
390
391 if ( bp[ndx+1] == FULL_KEY ) {
392 bufp = __get_buf ( bp[bp[0]-1], bufp, 0 );
393 if ( !bufp ) return(-1);
394 bp = (u_short *)bufp->page;
395 save_p = bufp;
396 save_addr = save_p->addr;
397 off = bp[1];
398 len = 0;
399 } else if (!FREESPACE(bp)) {
400 /*
401 This is a hack. We can't distinguish between
402 FULL_KEY_DATA that contains complete data or
403 incomplete data, so we require that if the
404 data is complete, there is at least 1 byte
405 of free space left.
406 */
407 off = bp[bp[0]];
408 len = bp[1] - off;
409 save_p = bufp;
410 save_addr = bufp->addr;
411 bufp = __get_buf ( bp[bp[0]-1], bufp, 0 );
412 if ( !bufp ) return(-1);
413 bp = (u_short *)bufp->page;
414 } else {
415 /* The data is all on one page */
416 tp = (char *)bp;
417 off = bp[bp[0]];
c6ad6315 418 val->data = (u_char *)tp + off;
ab4abb4f
KB
419 val->size = bp[1] - off;
420 if ( set_current ) {
421 if ( bp[0] == 2 ) { /* No more buckets in chain */
422 hashp->cpage = NULL;
423 hashp->cbucket++;
424 hashp->cndx=1;
425 } else {
426 hashp->cpage = __get_buf ( bp[bp[0]-1], bufp, 0 );
427 if ( !hashp->cpage )return(-1);
428 hashp->cndx = 1;
429 if ( !((u_short *)hashp->cpage->page)[0] ) {
430 hashp->cbucket++;
431 hashp->cpage = NULL;
432 }
433 }
434 }
435 return(0);
436 }
437
438 val->size = collect_data ( bufp, len, set_current );
439 if ( val->size == -1 ) {
440 return(-1);
441 }
442 if ( save_p->addr != save_addr ) {
443 /* We are pretty short on buffers */
444 errno = EINVAL; /* OUT OF BUFFERS */
445 return(-1);
446 }
447 bcopy ( (save_p->page)+off, hashp->tmp_buf, len );
c6ad6315 448 val->data = (u_char *)hashp->tmp_buf;
ab4abb4f
KB
449 return(0);
450}
451
452/*
453 Count how big the total datasize is by
454 recursing through the pages. Then allocate
455 a buffer and copy the data as you recurse up.
456*/
457static int
458collect_data ( bufp, len, set )
459BUFHEAD *bufp;
460int len;
461int set;
462{
463 register char *p = bufp->page;
464 register u_short *bp = (u_short *)p;
465 u_short save_addr;
466 int mylen, totlen;
467 BUFHEAD *xbp;
468
469 mylen = hashp->BSIZE - bp[1];
470 save_addr = bufp->addr;
471
472 if ( bp[2] == FULL_KEY_DATA ) { /* End of Data */
473 totlen = len + mylen;
474 if ( hashp->tmp_buf ) free (hashp->tmp_buf);
475 hashp->tmp_buf = (char *)malloc ( totlen );
476 if ( !hashp->tmp_buf ) {
477 return(-1);
478 }
479 if ( set ) {
480 hashp->cndx = 1;
481 if ( bp[0] == 2 ) { /* No more buckets in chain */
482 hashp->cpage = NULL;
483 hashp->cbucket++;
484 } else {
485 hashp->cpage = __get_buf ( bp[bp[0]-1], bufp, 0 );
486 if (!hashp->cpage) {
487 return(-1);
488 } else if ( !((u_short *)hashp->cpage->page)[0] ) {
489 hashp->cbucket++;
490 hashp->cpage = NULL;
491 }
492 }
493 }
494 } else {
495 xbp = __get_buf ( bp[bp[0]-1], bufp, 0 );
496 if ( !xbp || ((totlen = collect_data ( xbp, len + mylen, set )) < 1) ) {
497 return(-1);
498 }
499 }
500 if ( bufp->addr != save_addr ) {
501 errno = EINVAL; /* Out of buffers */
502 return(-1);
503 }
504 bcopy ( (bufp->page) + bp[1], &hashp->tmp_buf[len], mylen );
505 return ( totlen );
506}
507
508/*
509 Fill in the key and data
510 for this big pair
511*/
512extern int
513__big_keydata ( bufp, ndx, key, val, set )
514BUFHEAD *bufp;
515int ndx;
516DBT *key, *val;
517int set;
518{
519 key->size = collect_key ( bufp, 0, val, set );
520 if ( key->size == -1 ) {
521 return (-1);
522 }
c6ad6315 523 key->data = (u_char *)hashp->tmp_key;
ab4abb4f
KB
524 return(0);
525}
526
527/*
528 Count how big the total key size is by
529 recursing through the pages. Then collect
530 the data, allocate a buffer and copy the key as
531 you recurse up.
532*/
533static int
534collect_key ( bufp, len, val, set )
535BUFHEAD *bufp;
536int len;
537DBT *val;
538int set;
539{
540 char *p = bufp->page;
541 u_short *bp = (u_short *)p;
542 u_short save_addr;
543 int mylen, totlen;
544 BUFHEAD *xbp;
545
546 mylen = hashp->BSIZE - bp[1];
547
548 save_addr = bufp->addr;
549 totlen = len + mylen;
550 if ( bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA ) {/* End of Key */
551 if ( hashp->tmp_key ) free (hashp->tmp_key);
552 hashp->tmp_key = (char *)malloc ( totlen );
553 if ( !hashp->tmp_key ) {
554 return(-1);
555 }
556 __big_return ( bufp, 1, val, set );
557 } else {
558 xbp = __get_buf (bp[bp[0]-1], bufp, 0);
559 if ( !xbp || ((totlen = collect_key (xbp, totlen, val, set)) < 1 ) ) {
560 return(-1);
561 }
562 }
563 if ( bufp->addr != save_addr ) {
564 errno = EINVAL; /* MIS -- OUT OF BUFFERS */
565 return (-1);
566 }
567 bcopy ( (bufp->page) + bp[1], &hashp->tmp_key[len], mylen );
568 return ( totlen );
569}
570
571
572/*
573 return 0 => OK
574 -1 => error
575*/
576extern int
577__big_split ( op, np, big_keyp, addr, obucket, ret )
578BUFHEAD *op; /* Pointer to where to put keys that go in old bucket */
579BUFHEAD *np; /* Pointer to new bucket page */
580BUFHEAD *big_keyp; /* Pointer to first page containing the big key/data */
581u_short addr; /* Address of big_keyp */
096e4a66 582u_int obucket; /* Old Bucket */
ab4abb4f
KB
583SPLIT_RETURN *ret;
584{
585 register u_short *prev_pagep;
586 register BUFHEAD *tmpp;
587 register u_short *tp;
588 BUFHEAD *bp = big_keyp;
589 u_short off, free_space;
590 u_short n;
591
592 DBT key, val;
593
096e4a66 594 u_int change;
ab4abb4f
KB
595
596 /* Now figure out where the big key/data goes */
597 if (__big_keydata ( big_keyp, 1, &key, &val, 0 )) {
598 return(-1);
599 }
600 change = (__call_hash ( key.data, key.size ) != obucket );
601
602 if ( ret->next_addr = __find_last_page ( &big_keyp ) ) {
603 if (!(ret->nextp = __get_buf ( ret->next_addr, big_keyp, 0 ))) {
604 return(-1);;
605 }
606 } else {
607 ret->nextp = NULL;
608 }
609
610 /* Now make one of np/op point to the big key/data pair */
611 assert(np->ovfl == NULL);
612 if ( change ) tmpp = np;
613 else tmpp = op;
614
615 tmpp->flags |= BUF_MOD;
616#ifdef DEBUG1
617 fprintf ( stderr, "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr,
618 (tmpp->ovfl?tmpp->ovfl->addr:0),
619 (bp?bp->addr:0) );
620#endif
621 tmpp->ovfl = bp; /* one of op/np point to big_keyp */
622 tp = (u_short *)tmpp->page;
623 assert ( FREESPACE(tp) >= OVFLSIZE);
624 n = tp[0];
625 off = OFFSET(tp);
626 free_space = FREESPACE(tp);
627 tp[++n] = addr;
628 tp[++n] = OVFLPAGE;
629 tp[0] = n;
630 OFFSET(tp) = off;
631 FREESPACE(tp) = free_space - OVFLSIZE;
632
633 /*
634 Finally, set the new and old return values.
635 BIG_KEYP contains a pointer to the last page of the big key_data pair.
636 Make sure that big_keyp has no following page (2 elements) or create
637 an empty following page.
638 */
639
640 ret->newp = np;
641 ret->oldp = op;
642
643 tp = (u_short *)big_keyp->page;
644 big_keyp->flags |= BUF_MOD;
645 if ( tp[0] > 2 ) {
646 /*
647 There may be either one or two offsets on this page
648 If there is one, then the overflow page is linked on
649 normally and tp[4] is OVFLPAGE. If there are two, tp[4]
650 contains the second offset and needs to get stuffed in
651 after the next overflow page is added
652 */
653 n = tp[4];
654 free_space = FREESPACE(tp);
655 off = OFFSET(tp);
656 tp[0] -= 2;
657 FREESPACE(tp) = free_space + OVFLSIZE;
658 OFFSET(tp) = off;
659 tmpp = __add_ovflpage ( big_keyp );
660 if ( !tmpp ) {
661 return(-1);
662 }
663 tp[4] = n;
664 } else {
665 tmpp = big_keyp;
666 }
667
668 if ( change ) ret->newp = tmpp;
669 else ret->oldp = tmpp;
670
671 return(0);
672}