BSD 4_3_Net_2 development
[unix-history] / .ref-8c8a5b54e79564c14fc7a2823a21a8f048449bcf / usr / src / lib / libc / db / hash / hash_bigkey.c
CommitLineData
ab4abb4f
KB
1/*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Margo Seltzer.
7 *
8 * %sccs.include.redist.c%
9 */
10
11#if defined(LIBC_SCCS) && !defined(lint)
096e4a66 12static char sccsid[] = "@(#)hash_bigkey.c 5.5 (Berkeley) %G%";
ab4abb4f
KB
13#endif /* LIBC_SCCS and not lint */
14
15/******************************************************************************
16
17PACKAGE: hash
18
19DESCRIPTION:
20 Big key/data handling for the hashing package.
21
22ROUTINES:
23 External
24 __big_keydata
25 __big_split
26 __big_insert
27 __big_return
28 __big_delete
29 __find_last_page
30 Internal
31 collect_key
32 collect_data
33******************************************************************************/
34/* Includes */
dbe8e807 35#include <sys/param.h>
ab4abb4f
KB
36#include <assert.h>
37#include <errno.h>
38#include <db.h>
39#include <stdio.h>
fb91cf55
KB
40#include <stdlib.h>
41#include <string.h>
ab4abb4f
KB
42#include "hash.h"
43#include "page.h"
44
45/* Externals */
46/* buf.c */
47extern BUFHEAD *__get_buf();
48
096e4a66
KB
49/* dynahash.c */
50extern u_int call_hash();
51
ab4abb4f
KB
52/* page.c */
53extern BUFHEAD *__add_ovflpage();
54
55/* My externals */
56extern int __big_keydata();
57extern int __big_split();
58extern int __big_insert();
59extern int __big_return();
60extern int __big_delete();
61extern u_short __find_last_page();
62extern int __find_bigpair();
63
64/* My internals */
65static int collect_key();
66static int collect_data();
67
68#ifdef HASH_STATISTICS
69extern long hash_accesses, hash_collisions, hash_expansions, hash_overflows;
70#endif
71/*
72Big_insert
73
74You need to do an insert and the key/data pair is too big
750 ==> OK
76-1 ==> ERROR
77*/
78extern int
79__big_insert ( bufp, key, val )
80BUFHEAD *bufp;
81DBT *key, *val;
82{
83 char *cp = bufp->page; /* Character pointer of p */
84 register u_short *p = (u_short *)cp;
85 char *key_data, *val_data;
86 int key_size, val_size;
87 int n;
88 u_short space, move_bytes, off;
89
c6ad6315 90 key_data = (char *)key->data;
ab4abb4f 91 key_size = key->size;
c6ad6315 92 val_data = (char *)val->data;
ab4abb4f
KB
93 val_size = val->size;
94
95 /* First move the Key */
96 for ( space = FREESPACE(p) - BIGOVERHEAD;
97 key_size;
98 space = FREESPACE(p) - BIGOVERHEAD ) {
99 move_bytes = MIN(space, key_size);
100 off = OFFSET(p) - move_bytes;
101 bcopy (key_data, cp+off, move_bytes );
102 key_size -= move_bytes;
103 key_data += move_bytes;
104 n = p[0];
105 p[++n] = off;
106 p[0] = ++n;
107 FREESPACE(p) = off - PAGE_META(n);
108 OFFSET(p) = off;
109 p[n] = PARTIAL_KEY;
110 bufp = __add_ovflpage(bufp);
111 if ( !bufp ) {
112 return(-1);
113 }
114 n = p[0];
115 if ( !key_size ) {
116 if ( FREESPACE(p) ) {
117 move_bytes = MIN (FREESPACE(p), val_size);
118 off = OFFSET(p) - move_bytes;
119 p[n] = off;
120 bcopy ( val_data, cp + off, move_bytes );
121 val_data += move_bytes;
122 val_size -= move_bytes;
123 p[n-2] = FULL_KEY_DATA;
124 FREESPACE(p) = FREESPACE(p) - move_bytes;
125 OFFSET(p) = off;
126 }
127 else p[n-2] = FULL_KEY;
128 }
129 p = (u_short *)bufp->page;
130 cp = bufp->page;
131 bufp->flags |= BUF_MOD;
132 }
133
134 /* Now move the data */
135 for ( space = FREESPACE(p) - BIGOVERHEAD;
136 val_size;
137 space = FREESPACE(p) - BIGOVERHEAD ) {
138 move_bytes = MIN(space, val_size);
139 /*
140 Here's the hack to make sure that if the data ends
141 on the same page as the key ends, FREESPACE is
142 at least one
143 */
144 if ( space == val_size && val_size == val->size ) {
145 move_bytes--;
146 }
147 off = OFFSET(p) - move_bytes;
148 bcopy (val_data, cp+off, move_bytes );
149 val_size -= move_bytes;
150 val_data += move_bytes;
151 n = p[0];
152 p[++n] = off;
153 p[0] = ++n;
154 FREESPACE(p) = off - PAGE_META(n);
155 OFFSET(p) = off;
156 if ( val_size ) {
157 p[n] = FULL_KEY;
158 bufp = __add_ovflpage (bufp);
159 if ( !bufp ) {
160 return(-1);
161 }
162 cp = bufp->page;
163 p = (u_short *)cp;
164 } else {
165 p[n] = FULL_KEY_DATA;
166 }
167 bufp->flags |= BUF_MOD;
168 }
169 return(0);
170}
171
172/*
173 Called when bufp's page contains a partial key (index should be 1)
174
175 All pages in the big key/data pair except bufp are freed. We cannot
176 free bufp because the page pointing to it is lost and we can't
177 get rid of its pointer.
178
179 Returns 0 => OK
180 -1 => ERROR
181*/
182extern int
183__big_delete (bufp, ndx)
184BUFHEAD *bufp;
185int ndx;
186{
187 register BUFHEAD *rbufp = bufp;
188 register BUFHEAD *last_bfp = NULL;
189 char *cp;
190 u_short *bp = (u_short *)bufp->page;
191 u_short *xbp;
192 u_short pageno = 0;
193 u_short off, free_sp;
194 int key_done = 0;
195 int n;
196
197 while (!key_done || (bp[2] != FULL_KEY_DATA)) {
198 if ( bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA ) key_done = 1;
199
200 /*
201 If there is freespace left on a FULL_KEY_DATA page,
202 then the data is short and fits entirely on this
203 page, and this is the last page.
204 */
205 if ( bp[2] == FULL_KEY_DATA && FREESPACE(bp) ) break;
206 pageno = bp[bp[0]-1];
207 rbufp->flags |= BUF_MOD;
208 rbufp = __get_buf ( pageno, rbufp, 0 );
209 if ( last_bfp ) __free_ovflpage(last_bfp);
210 last_bfp = rbufp;
211 if ( !rbufp ) return(-1); /* Error */
212 bp = (u_short *)rbufp->page;
213 }
214
215 /*
216 If we get here then rbufp points to the last page of
217 the big key/data pair. Bufp points to the first
218 one -- it should now be empty pointing to the next
219 page after this pair. Can't free it because we don't
220 have the page pointing to it.
221 */
222
223 /* This is information from the last page of the pair */
224 n = bp[0];
225 pageno = bp[n-1];
226
227 /* Now, bp is the first page of the pair */
228 bp = (u_short *)bufp->page;
229 if ( n > 2 ) {
230 /* There is an overflow page */
231 bp[1] = pageno;
232 bp[2] = OVFLPAGE;
233 bufp->ovfl = rbufp->ovfl;
234 } else {
235 /* This is the last page */
236 bufp->ovfl = NULL;
237 }
238 n -= 2;
239 bp[0] = n;
240 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n);
241 OFFSET(bp) = hashp->BSIZE - 1;
242
243 bufp->flags |= BUF_MOD;
244 if ( rbufp ) __free_ovflpage(rbufp);
245 if ( last_bfp != rbufp ) __free_ovflpage(last_bfp);
246
247 hashp->NKEYS--;
248 return(0);
249}
250
251/*
252 0 = key not found
253 -1 = get next overflow page
254 -2 means key not found and this is big key/data
255 -3 error
256*/
257extern int
258__find_bigpair(bufp, ndx, key, size )
259BUFHEAD *bufp;
260int ndx;
261char *key;
262int size;
263{
264 register u_short *bp = (u_short *)bufp->page;
265 register char *p = bufp->page;
266 int ksize = size;
267 char *kkey = key;
268 u_short bytes;
269
270
271 for ( bytes = hashp->BSIZE - bp[ndx];
272 bytes <= size && bp[ndx+1] == PARTIAL_KEY;
273 bytes = hashp->BSIZE - bp[ndx] ) {
274
275 if ( bcmp ( p+bp[ndx], kkey, bytes ))return(-2);
276 kkey += bytes;
277 ksize -= bytes;
278 bufp = __get_buf ( bp[ndx+2], bufp, 0 );
279 if ( !bufp ) {
280 return(-3);
281 }
282 p = bufp->page;
283 bp = (u_short *)p;
284 ndx = 1;
285 }
286
287 if ( (bytes != ksize) || bcmp ( p+bp[ndx], kkey, bytes )) {
288#ifdef HASH_STATISTICS
289 hash_collisions++;
290#endif
291 return(-2);
292 }
293 else return (ndx);
294}
295
296
297/*
298 Given the buffer pointer of the first overflow page of a big pair,
299 find the end of the big pair
300
301 This will set bpp to the buffer header of the last page of the big pair.
302 It will return the pageno of the overflow page following the last page of
303 the pair; 0 if there isn't any (i.e. big pair is the last key in the
304 bucket)
305*/
306extern u_short
307__find_last_page ( bpp )
308BUFHEAD **bpp;
309{
310 int n;
311 u_short pageno;
312 BUFHEAD *bufp = *bpp;
313 u_short *bp = (u_short *)bufp->page;
314
315 while ( 1 ) {
316 n = bp[0];
317
318 /*
319 This is the last page if:
320 the tag is FULL_KEY_DATA and either
321 only 2 entries
322 OVFLPAGE marker is explicit
323 there is freespace on the page
324 */
325 if ( bp[2] == FULL_KEY_DATA &&
326 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)) ) ) break;
327
328 pageno = bp[n-1];
329 bufp = __get_buf ( pageno, bufp, 0 );
330 if ( !bufp ) return (0); /* Need to indicate an error! */
331 bp = (u_short *)bufp->page;
332 }
333
334 *bpp = bufp;
335 if ( bp[0] > 2 ) return ( bp[3] );
336 else return(0);
337}
338
339
340/*
341 Return the data for the key/data pair
342 that begins on this page at this index
343 (index should always be 1)
344*/
345extern int
346__big_return ( bufp, ndx, val, set_current )
347BUFHEAD *bufp;
348int ndx;
349DBT *val;
350int set_current;
351{
352 BUFHEAD *save_p;
353 u_short save_addr;
354 u_short *bp = (u_short *)bufp->page;
355 u_short off, len;
356 char *cp, *tp;
357
358 while ( bp[ndx+1] == PARTIAL_KEY ) {
359 bufp = __get_buf ( bp[bp[0]-1], bufp, 0 );
360 if ( !bufp ) return(-1);
361 bp = (u_short *)bufp->page;
362 ndx = 1;
363 }
364
365 if ( bp[ndx+1] == FULL_KEY ) {
366 bufp = __get_buf ( bp[bp[0]-1], bufp, 0 );
367 if ( !bufp ) return(-1);
368 bp = (u_short *)bufp->page;
369 save_p = bufp;
370 save_addr = save_p->addr;
371 off = bp[1];
372 len = 0;
373 } else if (!FREESPACE(bp)) {
374 /*
375 This is a hack. We can't distinguish between
376 FULL_KEY_DATA that contains complete data or
377 incomplete data, so we require that if the
378 data is complete, there is at least 1 byte
379 of free space left.
380 */
381 off = bp[bp[0]];
382 len = bp[1] - off;
383 save_p = bufp;
384 save_addr = bufp->addr;
385 bufp = __get_buf ( bp[bp[0]-1], bufp, 0 );
386 if ( !bufp ) return(-1);
387 bp = (u_short *)bufp->page;
388 } else {
389 /* The data is all on one page */
390 tp = (char *)bp;
391 off = bp[bp[0]];
c6ad6315 392 val->data = (u_char *)tp + off;
ab4abb4f
KB
393 val->size = bp[1] - off;
394 if ( set_current ) {
395 if ( bp[0] == 2 ) { /* No more buckets in chain */
396 hashp->cpage = NULL;
397 hashp->cbucket++;
398 hashp->cndx=1;
399 } else {
400 hashp->cpage = __get_buf ( bp[bp[0]-1], bufp, 0 );
401 if ( !hashp->cpage )return(-1);
402 hashp->cndx = 1;
403 if ( !((u_short *)hashp->cpage->page)[0] ) {
404 hashp->cbucket++;
405 hashp->cpage = NULL;
406 }
407 }
408 }
409 return(0);
410 }
411
412 val->size = collect_data ( bufp, len, set_current );
413 if ( val->size == -1 ) {
414 return(-1);
415 }
416 if ( save_p->addr != save_addr ) {
417 /* We are pretty short on buffers */
418 errno = EINVAL; /* OUT OF BUFFERS */
419 return(-1);
420 }
421 bcopy ( (save_p->page)+off, hashp->tmp_buf, len );
c6ad6315 422 val->data = (u_char *)hashp->tmp_buf;
ab4abb4f
KB
423 return(0);
424}
425
426/*
427 Count how big the total datasize is by
428 recursing through the pages. Then allocate
429 a buffer and copy the data as you recurse up.
430*/
431static int
432collect_data ( bufp, len, set )
433BUFHEAD *bufp;
434int len;
435int set;
436{
437 register char *p = bufp->page;
438 register u_short *bp = (u_short *)p;
439 u_short save_addr;
440 int mylen, totlen;
441 BUFHEAD *xbp;
442
443 mylen = hashp->BSIZE - bp[1];
444 save_addr = bufp->addr;
445
446 if ( bp[2] == FULL_KEY_DATA ) { /* End of Data */
447 totlen = len + mylen;
448 if ( hashp->tmp_buf ) free (hashp->tmp_buf);
449 hashp->tmp_buf = (char *)malloc ( totlen );
450 if ( !hashp->tmp_buf ) {
451 return(-1);
452 }
453 if ( set ) {
454 hashp->cndx = 1;
455 if ( bp[0] == 2 ) { /* No more buckets in chain */
456 hashp->cpage = NULL;
457 hashp->cbucket++;
458 } else {
459 hashp->cpage = __get_buf ( bp[bp[0]-1], bufp, 0 );
460 if (!hashp->cpage) {
461 return(-1);
462 } else if ( !((u_short *)hashp->cpage->page)[0] ) {
463 hashp->cbucket++;
464 hashp->cpage = NULL;
465 }
466 }
467 }
468 } else {
469 xbp = __get_buf ( bp[bp[0]-1], bufp, 0 );
470 if ( !xbp || ((totlen = collect_data ( xbp, len + mylen, set )) < 1) ) {
471 return(-1);
472 }
473 }
474 if ( bufp->addr != save_addr ) {
475 errno = EINVAL; /* Out of buffers */
476 return(-1);
477 }
478 bcopy ( (bufp->page) + bp[1], &hashp->tmp_buf[len], mylen );
479 return ( totlen );
480}
481
482/*
483 Fill in the key and data
484 for this big pair
485*/
486extern int
487__big_keydata ( bufp, ndx, key, val, set )
488BUFHEAD *bufp;
489int ndx;
490DBT *key, *val;
491int set;
492{
493 key->size = collect_key ( bufp, 0, val, set );
494 if ( key->size == -1 ) {
495 return (-1);
496 }
c6ad6315 497 key->data = (u_char *)hashp->tmp_key;
ab4abb4f
KB
498 return(0);
499}
500
501/*
502 Count how big the total key size is by
503 recursing through the pages. Then collect
504 the data, allocate a buffer and copy the key as
505 you recurse up.
506*/
507static int
508collect_key ( bufp, len, val, set )
509BUFHEAD *bufp;
510int len;
511DBT *val;
512int set;
513{
514 char *p = bufp->page;
515 u_short *bp = (u_short *)p;
516 u_short save_addr;
517 int mylen, totlen;
518 BUFHEAD *xbp;
519
520 mylen = hashp->BSIZE - bp[1];
521
522 save_addr = bufp->addr;
523 totlen = len + mylen;
524 if ( bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA ) {/* End of Key */
525 if ( hashp->tmp_key ) free (hashp->tmp_key);
526 hashp->tmp_key = (char *)malloc ( totlen );
527 if ( !hashp->tmp_key ) {
528 return(-1);
529 }
530 __big_return ( bufp, 1, val, set );
531 } else {
532 xbp = __get_buf (bp[bp[0]-1], bufp, 0);
533 if ( !xbp || ((totlen = collect_key (xbp, totlen, val, set)) < 1 ) ) {
534 return(-1);
535 }
536 }
537 if ( bufp->addr != save_addr ) {
538 errno = EINVAL; /* MIS -- OUT OF BUFFERS */
539 return (-1);
540 }
541 bcopy ( (bufp->page) + bp[1], &hashp->tmp_key[len], mylen );
542 return ( totlen );
543}
544
545
546/*
547 return 0 => OK
548 -1 => error
549*/
550extern int
551__big_split ( op, np, big_keyp, addr, obucket, ret )
552BUFHEAD *op; /* Pointer to where to put keys that go in old bucket */
553BUFHEAD *np; /* Pointer to new bucket page */
554BUFHEAD *big_keyp; /* Pointer to first page containing the big key/data */
555u_short addr; /* Address of big_keyp */
096e4a66 556u_int obucket; /* Old Bucket */
ab4abb4f
KB
557SPLIT_RETURN *ret;
558{
559 register u_short *prev_pagep;
560 register BUFHEAD *tmpp;
561 register u_short *tp;
562 BUFHEAD *bp = big_keyp;
563 u_short off, free_space;
564 u_short n;
565
566 DBT key, val;
567
096e4a66 568 u_int change;
ab4abb4f
KB
569
570 /* Now figure out where the big key/data goes */
571 if (__big_keydata ( big_keyp, 1, &key, &val, 0 )) {
572 return(-1);
573 }
574 change = (__call_hash ( key.data, key.size ) != obucket );
575
576 if ( ret->next_addr = __find_last_page ( &big_keyp ) ) {
577 if (!(ret->nextp = __get_buf ( ret->next_addr, big_keyp, 0 ))) {
578 return(-1);;
579 }
580 } else {
581 ret->nextp = NULL;
582 }
583
584 /* Now make one of np/op point to the big key/data pair */
585 assert(np->ovfl == NULL);
586 if ( change ) tmpp = np;
587 else tmpp = op;
588
589 tmpp->flags |= BUF_MOD;
590#ifdef DEBUG1
591 fprintf ( stderr, "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr,
592 (tmpp->ovfl?tmpp->ovfl->addr:0),
593 (bp?bp->addr:0) );
594#endif
595 tmpp->ovfl = bp; /* one of op/np point to big_keyp */
596 tp = (u_short *)tmpp->page;
597 assert ( FREESPACE(tp) >= OVFLSIZE);
598 n = tp[0];
599 off = OFFSET(tp);
600 free_space = FREESPACE(tp);
601 tp[++n] = addr;
602 tp[++n] = OVFLPAGE;
603 tp[0] = n;
604 OFFSET(tp) = off;
605 FREESPACE(tp) = free_space - OVFLSIZE;
606
607 /*
608 Finally, set the new and old return values.
609 BIG_KEYP contains a pointer to the last page of the big key_data pair.
610 Make sure that big_keyp has no following page (2 elements) or create
611 an empty following page.
612 */
613
614 ret->newp = np;
615 ret->oldp = op;
616
617 tp = (u_short *)big_keyp->page;
618 big_keyp->flags |= BUF_MOD;
619 if ( tp[0] > 2 ) {
620 /*
621 There may be either one or two offsets on this page
622 If there is one, then the overflow page is linked on
623 normally and tp[4] is OVFLPAGE. If there are two, tp[4]
624 contains the second offset and needs to get stuffed in
625 after the next overflow page is added
626 */
627 n = tp[4];
628 free_space = FREESPACE(tp);
629 off = OFFSET(tp);
630 tp[0] -= 2;
631 FREESPACE(tp) = free_space + OVFLSIZE;
632 OFFSET(tp) = off;
633 tmpp = __add_ovflpage ( big_keyp );
634 if ( !tmpp ) {
635 return(-1);
636 }
637 tp[4] = n;
638 } else {
639 tmpp = big_keyp;
640 }
641
642 if ( change ) ret->newp = tmpp;
643 else ret->oldp = tmpp;
644
645 return(0);
646}