Commit | Line | Data |
---|---|---|
43dfd197 | 1 | /*- |
027027db KB |
2 | * Copyright (c) 1990, 1993 |
3 | * The Regents of the University of California. All rights reserved. | |
43dfd197 KB |
4 | * |
5 | * %sccs.include.redist.c% | |
6 | */ | |
7 | ||
8 | #if defined(LIBC_SCCS) && !defined(lint) | |
027027db | 9 | static char sccsid[] = "@(#)mpool.c 8.1 (Berkeley) %G%"; |
43dfd197 KB |
10 | #endif /* LIBC_SCCS and not lint */ |
11 | ||
12 | #include <sys/param.h> | |
13 | #include <sys/stat.h> | |
94ac72c5 | 14 | |
43dfd197 | 15 | #include <errno.h> |
43dfd197 KB |
16 | #include <stdio.h> |
17 | #include <stdlib.h> | |
18 | #include <string.h> | |
94ac72c5 KB |
19 | #include <unistd.h> |
20 | ||
21 | #include <db.h> | |
43dfd197 KB |
22 | #define __MPOOLINTERFACE_PRIVATE |
23 | #include "mpool.h" | |
24 | ||
25 | static BKT *mpool_bkt __P((MPOOL *)); | |
26 | static BKT *mpool_look __P((MPOOL *, pgno_t)); | |
27 | static int mpool_write __P((MPOOL *, BKT *)); | |
28 | #ifdef DEBUG | |
af46d2af | 29 | static void __mpoolerr __P((const char *fmt, ...)); |
43dfd197 KB |
30 | #endif |
31 | ||
32 | /* | |
33 | * MPOOL_OPEN -- initialize a memory pool. | |
34 | * | |
35 | * Parameters: | |
36 | * key: Shared buffer key. | |
37 | * fd: File descriptor. | |
38 | * pagesize: File page size. | |
39 | * maxcache: Max number of cached pages. | |
40 | * | |
41 | * Returns: | |
42 | * MPOOL pointer, NULL on error. | |
43 | */ | |
44 | MPOOL * | |
45 | mpool_open(key, fd, pagesize, maxcache) | |
46 | DBT *key; | |
47 | int fd; | |
48 | pgno_t pagesize, maxcache; | |
49 | { | |
50 | struct stat sb; | |
51 | MPOOL *mp; | |
52 | int entry; | |
53 | ||
54 | if (fstat(fd, &sb)) | |
55 | return (NULL); | |
56 | /* XXX | |
57 | * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so | |
58 | * that stat(2) returns true for ISSOCK on pipes. Until then, this is | |
59 | * fairly close. | |
60 | */ | |
61 | if (!S_ISREG(sb.st_mode)) { | |
62 | errno = ESPIPE; | |
63 | return (NULL); | |
64 | } | |
65 | ||
66 | if ((mp = malloc(sizeof(MPOOL))) == NULL) | |
67 | return (NULL); | |
68 | mp->free.cnext = mp->free.cprev = (BKT *)&mp->free; | |
69 | mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru; | |
70 | for (entry = 0; entry < HASHSIZE; ++entry) | |
71 | mp->hashtable[entry].hnext = mp->hashtable[entry].hprev = | |
72 | mp->hashtable[entry].cnext = mp->hashtable[entry].cprev = | |
73 | (BKT *)&mp->hashtable[entry]; | |
74 | mp->curcache = 0; | |
75 | mp->maxcache = maxcache; | |
76 | mp->pagesize = pagesize; | |
77 | mp->npages = sb.st_size / pagesize; | |
78 | mp->fd = fd; | |
3f71d69e KB |
79 | mp->pgcookie = NULL; |
80 | mp->pgin = mp->pgout = NULL; | |
43dfd197 KB |
81 | |
82 | #ifdef STATISTICS | |
83 | mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush = | |
84 | mp->pageget = mp->pagenew = mp->pageput = mp->pageread = | |
85 | mp->pagewrite = 0; | |
86 | #endif | |
87 | return (mp); | |
88 | } | |
89 | ||
90 | /* | |
91 | * MPOOL_FILTER -- initialize input/output filters. | |
92 | * | |
93 | * Parameters: | |
94 | * pgin: Page in conversion routine. | |
95 | * pgout: Page out conversion routine. | |
96 | * pgcookie: Cookie for page in/out routines. | |
97 | */ | |
98 | void | |
99 | mpool_filter(mp, pgin, pgout, pgcookie) | |
100 | MPOOL *mp; | |
101 | void (*pgin) __P((void *, pgno_t, void *)); | |
102 | void (*pgout) __P((void *, pgno_t, void *)); | |
103 | void *pgcookie; | |
104 | { | |
105 | mp->pgin = pgin; | |
106 | mp->pgout = pgout; | |
107 | mp->pgcookie = pgcookie; | |
108 | } | |
109 | ||
110 | /* | |
111 | * MPOOL_NEW -- get a new page | |
112 | * | |
113 | * Parameters: | |
114 | * mp: mpool cookie | |
115 | * pgnoadddr: place to store new page number | |
116 | * Returns: | |
117 | * RET_ERROR, RET_SUCCESS | |
118 | */ | |
119 | void * | |
120 | mpool_new(mp, pgnoaddr) | |
121 | MPOOL *mp; | |
122 | pgno_t *pgnoaddr; | |
123 | { | |
124 | BKT *b; | |
125 | BKTHDR *hp; | |
126 | ||
127 | #ifdef STATISTICS | |
128 | ++mp->pagenew; | |
129 | #endif | |
130 | /* | |
131 | * Get a BKT from the cache. Assign a new page number, attach it to | |
132 | * the hash and lru chains and return. | |
133 | */ | |
134 | if ((b = mpool_bkt(mp)) == NULL) | |
135 | return (NULL); | |
136 | *pgnoaddr = b->pgno = mp->npages++; | |
137 | b->flags = MPOOL_PINNED; | |
138 | inshash(b, b->pgno); | |
139 | inschain(b, &mp->lru); | |
140 | return (b->page); | |
141 | } | |
142 | ||
143 | /* | |
144 | * MPOOL_GET -- get a page from the pool | |
145 | * | |
146 | * Parameters: | |
147 | * mp: mpool cookie | |
148 | * pgno: page number | |
149 | * flags: not used | |
150 | * | |
151 | * Returns: | |
152 | * RET_ERROR, RET_SUCCESS | |
153 | */ | |
154 | void * | |
155 | mpool_get(mp, pgno, flags) | |
156 | MPOOL *mp; | |
157 | pgno_t pgno; | |
158 | u_int flags; /* XXX not used? */ | |
159 | { | |
160 | BKT *b; | |
161 | BKTHDR *hp; | |
162 | off_t off; | |
163 | int nr; | |
164 | ||
165 | /* | |
166 | * If asking for a specific page that is already in the cache, find | |
167 | * it and return it. | |
168 | */ | |
169 | if (b = mpool_look(mp, pgno)) { | |
170 | #ifdef STATISTICS | |
171 | ++mp->pageget; | |
172 | #endif | |
173 | #ifdef DEBUG | |
174 | if (b->flags & MPOOL_PINNED) | |
af46d2af KB |
175 | __mpoolerr("mpool_get: page %d already pinned", |
176 | b->pgno); | |
43dfd197 KB |
177 | #endif |
178 | rmchain(b); | |
179 | inschain(b, &mp->lru); | |
180 | b->flags |= MPOOL_PINNED; | |
181 | return (b->page); | |
182 | } | |
183 | ||
184 | /* Not allowed to retrieve a non-existent page. */ | |
185 | if (pgno >= mp->npages) { | |
186 | errno = EINVAL; | |
187 | return (NULL); | |
188 | } | |
189 | ||
190 | /* Get a page from the cache. */ | |
191 | if ((b = mpool_bkt(mp)) == NULL) | |
192 | return (NULL); | |
193 | b->pgno = pgno; | |
194 | b->flags = MPOOL_PINNED; | |
195 | ||
196 | #ifdef STATISTICS | |
197 | ++mp->pageread; | |
198 | #endif | |
199 | /* Read in the contents. */ | |
200 | off = mp->pagesize * pgno; | |
201 | if (lseek(mp->fd, off, SEEK_SET) != off) | |
202 | return (NULL); | |
203 | if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) { | |
204 | if (nr >= 0) | |
205 | errno = EFTYPE; | |
206 | return (NULL); | |
207 | } | |
208 | if (mp->pgin) | |
209 | (mp->pgin)(mp->pgcookie, b->pgno, b->page); | |
210 | ||
211 | inshash(b, b->pgno); | |
212 | inschain(b, &mp->lru); | |
213 | #ifdef STATISTICS | |
214 | ++mp->pageget; | |
215 | #endif | |
216 | return (b->page); | |
217 | } | |
218 | ||
219 | /* | |
220 | * MPOOL_PUT -- return a page to the pool | |
221 | * | |
222 | * Parameters: | |
223 | * mp: mpool cookie | |
224 | * page: page pointer | |
225 | * pgno: page number | |
226 | * | |
227 | * Returns: | |
228 | * RET_ERROR, RET_SUCCESS | |
229 | */ | |
230 | int | |
231 | mpool_put(mp, page, flags) | |
232 | MPOOL *mp; | |
233 | void *page; | |
234 | u_int flags; | |
235 | { | |
236 | BKT *baddr; | |
237 | #ifdef DEBUG | |
238 | BKT *b; | |
239 | #endif | |
240 | ||
241 | #ifdef STATISTICS | |
242 | ++mp->pageput; | |
243 | #endif | |
88e50704 | 244 | baddr = (BKT *)((char *)page - sizeof(BKT)); |
43dfd197 KB |
245 | #ifdef DEBUG |
246 | if (!(baddr->flags & MPOOL_PINNED)) | |
af46d2af | 247 | __mpoolerr("mpool_put: page %d not pinned", b->pgno); |
43dfd197 KB |
248 | for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { |
249 | if (b == (BKT *)&mp->lru) | |
af46d2af | 250 | __mpoolerr("mpool_put: %0x: bad address", baddr); |
43dfd197 KB |
251 | if (b == baddr) |
252 | break; | |
253 | } | |
254 | #endif | |
255 | baddr->flags &= ~MPOOL_PINNED; | |
256 | baddr->flags |= flags & MPOOL_DIRTY; | |
257 | return (RET_SUCCESS); | |
258 | } | |
259 | ||
260 | /* | |
261 | * MPOOL_CLOSE -- close the buffer pool | |
262 | * | |
263 | * Parameters: | |
264 | * mp: mpool cookie | |
265 | * | |
266 | * Returns: | |
267 | * RET_ERROR, RET_SUCCESS | |
268 | */ | |
269 | int | |
270 | mpool_close(mp) | |
271 | MPOOL *mp; | |
272 | { | |
273 | BKT *b, *next; | |
274 | ||
275 | /* Free up any space allocated to the lru pages. */ | |
276 | for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) { | |
277 | next = b->cprev; | |
43dfd197 KB |
278 | free(b); |
279 | } | |
7b2cd66a | 280 | free(mp); |
43dfd197 KB |
281 | return (RET_SUCCESS); |
282 | } | |
283 | ||
284 | /* | |
285 | * MPOOL_SYNC -- sync the file to disk. | |
286 | * | |
287 | * Parameters: | |
288 | * mp: mpool cookie | |
289 | * | |
290 | * Returns: | |
291 | * RET_ERROR, RET_SUCCESS | |
292 | */ | |
293 | int | |
294 | mpool_sync(mp) | |
295 | MPOOL *mp; | |
296 | { | |
297 | BKT *b; | |
298 | ||
299 | for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) | |
300 | if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR) | |
301 | return (RET_ERROR); | |
302 | return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); | |
303 | } | |
304 | ||
305 | /* | |
306 | * MPOOL_BKT -- get/create a BKT from the cache | |
307 | * | |
308 | * Parameters: | |
309 | * mp: mpool cookie | |
310 | * | |
311 | * Returns: | |
312 | * NULL on failure and a pointer to the BKT on success | |
313 | */ | |
314 | static BKT * | |
315 | mpool_bkt(mp) | |
316 | MPOOL *mp; | |
317 | { | |
318 | BKT *b; | |
319 | ||
320 | if (mp->curcache < mp->maxcache) | |
321 | goto new; | |
322 | ||
323 | /* | |
324 | * If the cache is maxxed out, search the lru list for a buffer we | |
325 | * can flush. If we find one, write it if necessary and take it off | |
326 | * any lists. If we don't find anything we grow the cache anyway. | |
327 | * The cache never shrinks. | |
328 | */ | |
329 | for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) | |
330 | if (!(b->flags & MPOOL_PINNED)) { | |
331 | if (b->flags & MPOOL_DIRTY && | |
332 | mpool_write(mp, b) == RET_ERROR) | |
333 | return (NULL); | |
334 | rmhash(b); | |
335 | rmchain(b); | |
336 | #ifdef STATISTICS | |
337 | ++mp->pageflush; | |
338 | #endif | |
339 | #ifdef DEBUG | |
340 | { | |
341 | void *spage; | |
342 | spage = b->page; | |
343 | memset(b, 0xff, sizeof(BKT) + mp->pagesize); | |
344 | b->page = spage; | |
345 | } | |
346 | #endif | |
347 | return (b); | |
348 | } | |
349 | ||
350 | new: if ((b = malloc(sizeof(BKT) + mp->pagesize)) == NULL) | |
351 | return (NULL); | |
352 | #ifdef STATISTICS | |
353 | ++mp->pagealloc; | |
354 | #endif | |
355 | #ifdef DEBUG | |
356 | memset(b, 0xff, sizeof(BKT) + mp->pagesize); | |
357 | #endif | |
358 | b->page = (char *)b + sizeof(BKT); | |
359 | ++mp->curcache; | |
360 | return (b); | |
361 | } | |
362 | ||
363 | /* | |
364 | * MPOOL_WRITE -- sync a page to disk | |
365 | * | |
366 | * Parameters: | |
367 | * mp: mpool cookie | |
368 | * | |
369 | * Returns: | |
370 | * RET_ERROR, RET_SUCCESS | |
371 | */ | |
372 | static int | |
373 | mpool_write(mp, b) | |
374 | MPOOL *mp; | |
375 | BKT *b; | |
376 | { | |
377 | off_t off; | |
378 | ||
379 | if (mp->pgout) | |
380 | (mp->pgout)(mp->pgcookie, b->pgno, b->page); | |
381 | ||
382 | #ifdef STATISTICS | |
383 | ++mp->pagewrite; | |
384 | #endif | |
385 | off = mp->pagesize * b->pgno; | |
386 | if (lseek(mp->fd, off, SEEK_SET) != off) | |
387 | return (RET_ERROR); | |
388 | if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize) | |
389 | return (RET_ERROR); | |
390 | b->flags &= ~MPOOL_DIRTY; | |
391 | return (RET_SUCCESS); | |
392 | } | |
393 | ||
394 | /* | |
395 | * MPOOL_LOOK -- lookup a page | |
396 | * | |
397 | * Parameters: | |
398 | * mp: mpool cookie | |
399 | * pgno: page number | |
400 | * | |
401 | * Returns: | |
402 | * NULL on failure and a pointer to the BKT on success | |
403 | */ | |
404 | static BKT * | |
405 | mpool_look(mp, pgno) | |
406 | MPOOL *mp; | |
407 | pgno_t pgno; | |
408 | { | |
409 | register BKT *b; | |
410 | register BKTHDR *tb; | |
411 | ||
412 | /* XXX | |
413 | * If find the buffer, put it first on the hash chain so can | |
414 | * find it again quickly. | |
415 | */ | |
416 | tb = &mp->hashtable[HASHKEY(pgno)]; | |
417 | for (b = tb->hnext; b != (BKT *)tb; b = b->hnext) | |
418 | if (b->pgno == pgno) { | |
419 | #ifdef STATISTICS | |
420 | ++mp->cachehit; | |
421 | #endif | |
422 | return (b); | |
423 | } | |
424 | #ifdef STATISTICS | |
425 | ++mp->cachemiss; | |
426 | #endif | |
427 | return (NULL); | |
428 | } | |
429 | ||
430 | #ifdef STATISTICS | |
431 | /* | |
432 | * MPOOL_STAT -- cache statistics | |
433 | * | |
434 | * Parameters: | |
435 | * mp: mpool cookie | |
436 | */ | |
437 | void | |
438 | mpool_stat(mp) | |
439 | MPOOL *mp; | |
440 | { | |
441 | BKT *b; | |
442 | int cnt; | |
443 | char *sep; | |
444 | ||
445 | (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); | |
446 | (void)fprintf(stderr, | |
447 | "page size %lu, cacheing %lu pages of %lu page max cache\n", | |
448 | mp->pagesize, mp->curcache, mp->maxcache); | |
449 | (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", | |
450 | mp->pageput, mp->pageget, mp->pagenew); | |
451 | (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", | |
452 | mp->pagealloc, mp->pageflush); | |
453 | if (mp->cachehit + mp->cachemiss) | |
454 | (void)fprintf(stderr, | |
455 | "%.0f%% cache hit rate (%lu hits, %lu misses)\n", | |
456 | ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) | |
457 | * 100, mp->cachehit, mp->cachemiss); | |
458 | (void)fprintf(stderr, "%lu page reads, %lu page writes\n", | |
459 | mp->pageread, mp->pagewrite); | |
460 | ||
461 | sep = ""; | |
462 | cnt = 0; | |
463 | for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { | |
464 | (void)fprintf(stderr, "%s%d", sep, b->pgno); | |
465 | if (b->flags & MPOOL_DIRTY) | |
466 | (void)fprintf(stderr, "d"); | |
467 | if (b->flags & MPOOL_PINNED) | |
468 | (void)fprintf(stderr, "P"); | |
469 | if (++cnt == 10) { | |
470 | sep = "\n"; | |
471 | cnt = 0; | |
472 | } else | |
473 | sep = ", "; | |
474 | ||
475 | } | |
476 | (void)fprintf(stderr, "\n"); | |
477 | } | |
478 | #endif | |
479 | ||
480 | #ifdef DEBUG | |
481 | #if __STDC__ | |
482 | #include <stdarg.h> | |
483 | #else | |
484 | #include <varargs.h> | |
485 | #endif | |
486 | ||
487 | static void | |
488 | #if __STDC__ | |
af46d2af | 489 | __mpoolerr(const char *fmt, ...) |
43dfd197 | 490 | #else |
af46d2af | 491 | __mpoolerr(fmt, va_alist) |
43dfd197 KB |
492 | char *fmt; |
493 | va_dcl | |
494 | #endif | |
495 | { | |
496 | va_list ap; | |
497 | #if __STDC__ | |
498 | va_start(ap, fmt); | |
499 | #else | |
500 | va_start(ap); | |
501 | #endif | |
502 | (void)vfprintf(stderr, fmt, ap); | |
503 | va_end(ap); | |
504 | (void)fprintf(stderr, "\n"); | |
505 | abort(); | |
506 | /* NOTREACHED */ | |
507 | } | |
508 | #endif |