* Copyright (c) 1991 The Regents of the University of California.
* This code is derived from software contributed to Berkeley by
* %sccs.include.redist.c%
* @(#)btree.h 5.3 (Berkeley) %G%
#define DEFMAXKEYPAGE (0) /* Maximum keys per page */
#define DEFMINKEYPAGE (2) /* Minimum keys per page */
#define MINCACHE (5) /* Minimum cached pages */
#define MINPSIZE (512) /* Minimum page size */
* Page 0 of a btree file contains a BTMETA structure. The rest of the first
* page is empty, so that all disk operations are page-aligned. This page is
* also used as an out-of-band page, i.e. page pointers that point to nowhere
* point to page 0. The m_nrecs field is used only the RECNO code. This is
* because the btree doesn't really need it and it requires that put or delete
* calls modify the meta data.
#define P_INVALID 0 /* Invalid tree page number. */
#define P_META 0 /* Tree meta-info page number. */
#define P_ROOT 1 /* Tree root page number. */
u_long m_magic
; /* magic number */
u_long m_version
; /* version */
u_long m_psize
; /* page size */
u_long m_free
; /* page number of first free page */
u_long m_nrecs
; /* R: number of records */
#define SAVEMETA (BTF_NODUPS | BTF_RECNO)
u_long m_flags
; /* bt_flags & SAVEMETA */
u_long m_lorder
; /* byte order */
* There are five page layouts in the btree: btree internal pages, btree leaf
* pages, recno internal pages, recno leaf pages and overflow pages. Each type
* of page starts with a page header as typed by PAGE.
pgno_t pgno
; /* this page's page number */
pgno_t prevpg
; /* left sibling */
pgno_t nextpg
; /* right sibling */
#define P_BINTERNAL 0x01 /* btree internal page */
#define P_BLEAF 0x02 /* leaf page */
#define P_OVERFLOW 0x04 /* overflow page */
#define P_RINTERNAL 0x08 /* recno internal page */
#define P_RLEAF 0x10 /* leaf page */
#define P_TYPE 0x1f /* type mask */
#define P_PRESERVE 0x20 /* never delete this chain of pages */
index_t lower
; /* lower bound of free space on page */
index_t upper
; /* upper bound of free space on page */
index_t linp
[1]; /* long-aligned VARIABLE LENGTH DATA */
/* First and next index. */
#define BTDATAOFF (sizeof(PAGE) - sizeof(index_t))
#define NEXTINDEX(p) (((p)->lower - BTDATAOFF) / sizeof(index_t))
* For pages other than overflow pages, there is an array of offsets into the
* rest of the page immediately following the page header. Each offset is to
* an item which is unique to the type of page. The h_lower offset is just
* past the last filled-in index. The h_upper offset is the first item on the
* page. Offsets are from the beginning of the page.
* If an item is too big to store on a single page, a flag is set and the item
* is a { page, size } pair such that the page is the first page of an overflow
* chain with size bytes of item. Overflow pages are simply bytes without any
* The size and page number fields in the items are long aligned so they can be
* manipulated without copying.
#define LALIGN(l) (((l) + sizeof(u_long) - 1) & ~(sizeof(u_long) - 1))
#define NOVFLSIZE (sizeof(pgno_t) + sizeof(size_t))
* For the btree internal pages, the item is a key. BINTERNALs are {key, pgno}
* pairs, such that the key compares less than or equal to all of the records
* on that page. For a tree without duplicate keys, an internal page with two
* consecutive keys, a and b, will have all records greater than or equal to a
* and less than b stored on the page associated with a. Duplicate keys are
* somewhat special and can cause duplicate internal and leaf page records and
* some minor modifications of the above rule.
typedef struct BINTERNAL
{
size_t ksize
; /* key size */
pgno_t pgno
; /* page number stored on */
#define P_BIGDATA 0x01 /* overflow data */
#define P_BIGKEY 0x02 /* overflow key */
char bytes
[1]; /* data */
/* Get the page's BINTERNAL structure at index indx. */
#define GETBINTERNAL(pg, indx) \
((BINTERNAL *)((char *)(pg) + (pg)->linp[indx]))
/* Get the number of bytes in the entry. */
#define NBINTERNAL(len) \
LALIGN(sizeof(size_t) + sizeof(pgno_t) + sizeof(u_char) + (len))
/* Copy a BINTERNAL entry to the page. */
#define WR_BINTERNAL(p, size, pgno, flags) { \
* For the recno internal pages, the item is a page number with the number of
* keys found on that page and below.
typedef struct RINTERNAL
{
recno_t nrecs
; /* number of records */
pgno_t pgno
; /* page number stored below */
/* Get the page's RINTERNAL structure at index indx. */
#define GETRINTERNAL(pg, indx) \
((RINTERNAL *)((char *)(pg) + (pg)->linp[indx]))
/* Get the number of bytes in the entry. */
LALIGN(sizeof(recno_t) + sizeof(pgno_t))
/* Copy a RINTERAL entry to the page. */
#define WR_RINTERNAL(p, nrecs, pgno) { \
/* For the btree leaf pages, the item is a key and data pair. */
size_t ksize
; /* size of key */
size_t dsize
; /* size of data */
u_char flags
; /* P_BIGDATA, P_BIGKEY */
char bytes
[1]; /* data */
/* Get the page's BLEAF structure at index indx. */
#define GETBLEAF(pg, indx) \
((BLEAF *)((char *)(pg) + (pg)->linp[indx]))
/* Get the number of bytes in the entry. */
LALIGN(sizeof(size_t) + sizeof(size_t) + sizeof(u_char) + \
/* Get the number of bytes in the user's key/data pair. */
#define NBLEAFDBT(ksize, dsize) \
LALIGN(sizeof(size_t) + sizeof(size_t) + sizeof(u_char) + \
/* Copy a BLEAF entry to the page. */
#define WR_BLEAF(p, key, data, flags) { \
*(size_t *)p = key->size; \
*(size_t *)p = data->size; \
bcopy(key->data, p, key->size); \
bcopy(data->data, p, data->size); \
/* For the recno leaf pages, the item is a data entry. */
size_t dsize
; /* size of data */
u_char flags
; /* P_BIGDATA */
/* Get the page's RLEAF structure at index indx. */
#define GETRLEAF(pg, indx) \
((RLEAF *)((char *)(pg) + (pg)->linp[indx]))
/* Get the number of bytes in the entry. */
LALIGN(sizeof(size_t) + sizeof(u_char) + (p)->dsize)
/* Get the number of bytes from the user's data. */
#define NRLEAFDBT(dsize) \
LALIGN(sizeof(size_t) + sizeof(u_char) + (dsize))
/* Copy a RLEAF entry to the page. */
#define WR_RLEAF(p, data, flags) { \
*(size_t *)p = data->size; \
bcopy(data->data, p, data->size); \
* A record in the tree is either a pointer to a page and an index in the page
* or a page number and an index. These structures are used as a cursor, stack
* entry and search returns as well as to pass records to other routines.
* One comment about searches. Internal page searches must find the largest
* record less than key in the tree so that descents work. Leaf page searches
* must find the smallest record greater than key so that the returned index
* is the record's correct position for insertion.
pgno_t pgno
; /* the page number */
index_t index
; /* the index on the page */
PAGE
*page
; /* the (pinned) page */
index_t index
; /* the index on the page */
/* The in-memory btree/recno data structure. */
MPOOL
*bt_mp
; /* memory pool cookie */
DB
*bt_dbp
; /* pointer to enclosing DB */
EPGNO bt_bcursor
; /* btree cursor */
recno_t bt_rcursor
; /* R: recno cursor */
#define BT_POP(t) (t->bt_sp ? t->bt_stack + --t->bt_sp : NULL)
#define BT_CLR(t) (t->bt_sp = 0)
EPGNO
*bt_stack
; /* stack of parent pages */
u_int bt_sp
; /* current stack pointer */
u_int bt_maxstack
; /* largest stack */
char *bt_kbuf
; /* key buffer */
size_t bt_kbufsz
; /* key buffer size */
char *bt_dbuf
; /* data buffer */
size_t bt_dbufsz
; /* data buffer size */
int bt_fd
; /* tree file descriptor */
FILE *bt_rfp
; /* R: record FILE pointer */
int bt_rfd
; /* R: record file descriptor */
pgno_t bt_free
; /* next free page */
size_t bt_psize
; /* page size */
int bt_maxkeypage
; /* maximum keys per page */
size_t bt_minkeypage
; /* minimum keys per page */
int bt_lorder
; /* byte order */
enum { NOT
, BACK
, FORWARD
, } bt_order
;
EPGNO bt_last
; /* last insert */
/* B: key comparison function */
int (*bt_cmp
) __P((const DBT
*, const DBT
*));
/* B: prefix comparison function */
int (*bt_pfx
) __P((const DBT
*, const DBT
*));
/* R: recno input function */
int (*bt_irec
) __P((struct BTREE
*, recno_t
));
recno_t bt_nrecs
; /* R: number of records in the tree */
caddr_t bt_smap
; /* R: start of mapped space */
caddr_t bt_emap
; /* R: end of mapped space */
size_t bt_reclen
; /* R: fixed record length */
u_char bt_bval
; /* R: delimiting byte/pad character */
#define BTF_DELCRSR 0x001 /* B: delete cursor when closes/moves */
#define BTF_FIXEDLEN 0x002 /* fixed length records */
#define BTF_INMEM 0x004 /* in-memory tree */
#define BTF_METADIRTY 0x008 /* B: need to write meta-data */
#define BTF_MODIFIED 0x010 /* tree modified */
#define BTF_NODUPS 0x020 /* no duplicate keys permitted */
#define BTF_RDONLY 0x040 /* read-only tree */
#define BTF_RECNO 0x080 /* record oriented tree */
#define BTF_SEQINIT 0x100 /* sequential scan initialized */
u_long bt_flags
; /* btree state */
#define ISSET(t, f) ((t)->bt_flags & (f))
#define NOTSET(t, f) (!((t)->bt_flags & (f)))
#define SET(t, f) ((t)->bt_flags |= (f))
#define UNSET(t, f) ((t)->bt_flags &= ~(f))