386BSD 0.1 development
[unix-history] / usr / src / sys.386bsd / kern / vfs__bio.c
CommitLineData
9255a3b8
WJ
1/*
2 * Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This software is a component of "386BSD" developed by
16 William F. Jolitz, TeleMuse.
17 * 4. Neither the name of the developer nor the name "386BSD"
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ
22 * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS
23 * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT.
24 * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT
25 * NOT MAKE USE THIS WORK.
26 *
27 * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED
28 * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN
29 * REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES
30 * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING
31 * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND
32 * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE
33 * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS
34 * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND
37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39 * ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 *
48 */
49static char rcsid[] = "$Header: /usr/bill/working/sys/kern/RCS/vfs__bio.c,v 1.2 92/01/21 21:30:08 william Exp $";
50
51#include "param.h"
52#include "proc.h"
53#include "vnode.h"
54#include "buf.h"
55#include "specdev.h"
56#include "mount.h"
57#include "malloc.h"
58#include "vm/vm.h"
59#include "resourcevar.h"
60
61struct buf *getnewbuf(int);
62extern vm_map_t buffer_map;
63
64/*
65 * Initialize buffer headers and related structures.
66 */
67void bufinit()
68{
69 struct bufhd *bh;
70 struct buf *bp;
71
72 /* first, make a null hash table */
73 for(bh = bufhash; bh < bufhash + BUFHSZ; bh++) {
74 bh->b_flags = 0;
75 bh->b_forw = (struct buf *)bh;
76 bh->b_back = (struct buf *)bh;
77 }
78
79 /* next, make a null set of free lists */
80 for(bp = bfreelist; bp < bfreelist + BQUEUES; bp++) {
81 bp->b_flags = 0;
82 bp->av_forw = bp;
83 bp->av_back = bp;
84 bp->b_forw = bp;
85 bp->b_back = bp;
86 }
87
88 /* finally, initialize each buffer header and stick on empty q */
89 for(bp = buf; bp < buf + nbuf ; bp++) {
90 bp->b_flags = B_HEAD | B_INVAL; /* we're just an empty header */
91 bp->b_dev = NODEV;
92 bp->b_vp = 0;
93 binstailfree(bp, bfreelist + BQ_EMPTY);
94 binshash(bp, bfreelist + BQ_EMPTY);
95 }
96}
97
98/*
99 * Find the block in the buffer pool.
100 * If the buffer is not present, allocate a new buffer and load
101 * its contents according to the filesystem fill routine.
102 */
103int
104bread(struct vnode *vp, daddr_t blkno, int size, struct ucred *cred,
105 struct buf **bpp)
106{
107 struct buf *bp;
108 int rv = 0;
109
110 bp = getblk (vp, blkno, size);
111
112 /* if not found in cache, do some I/O */
113 if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) {
114 bp->b_flags |= B_READ;
115 bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
116 bp->b_rcred = cred;
117 VOP_STRATEGY(bp);
118 rv = biowait (bp);
119 }
120 *bpp = bp;
121
122 return (rv);
123}
124
125/*
126 * Operates like bread, but also starts I/O on the specified
127 * read-ahead block. [See page 55 of Bach's Book]
128 */
129int
130breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno, int rabsize,
131 struct ucred *cred, struct buf **bpp)
132{
133 struct buf *bp, *rabp;
134 int rv = 0, needwait = 0;
135
136 bp = getblk (vp, blkno, size);
137
138 /* if not found in cache, do some I/O */
139 if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) {
140 bp->b_flags |= B_READ;
141 bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
142 bp->b_rcred = cred;
143 VOP_STRATEGY(bp);
144 needwait++;
145 }
146
147 rabp = getblk (vp, rablkno, rabsize);
148
149 /* if not found in cache, do some I/O (overlapped with first) */
150 if ((rabp->b_flags & B_CACHE) == 0 || (rabp->b_flags & B_INVAL) != 0) {
151 rabp->b_flags |= B_READ | B_ASYNC;
152 rabp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
153 rabp->b_rcred = cred;
154 VOP_STRATEGY(rabp);
155 } else
156 brelse(rabp);
157
158 /* wait for original I/O */
159 if (needwait)
160 rv = biowait (bp);
161
162 *bpp = bp;
163 return (rv);
164}
165
166/*
167 * Synchronous write.
168 * Release buffer on completion.
169 */
170int
171bwrite(register struct buf *bp)
172{
173 int rv;
174
175 if(bp->b_flags & B_INVAL) {
176 brelse(bp);
177 return (0);
178 } else {
179 int wasdelayed;
180
181 if(!(bp->b_flags & B_BUSY))
182 panic("bwrite: not busy");
183
184 wasdelayed = bp->b_flags & B_DELWRI;
185 bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_ASYNC|B_DELWRI);
186 if(wasdelayed)
187 reassignbuf(bp, bp->b_vp);
188
189 bp->b_flags |= B_DIRTY;
190 bp->b_vp->v_numoutput++;
191 VOP_STRATEGY(bp);
192 rv = biowait(bp);
193 brelse(bp);
194 return (rv);
195 }
196}
197
198/*
199 * Delayed write.
200 *
201 * The buffer is marked dirty, but is not queued for I/O.
202 * This routine should be used when the buffer is expected
203 * to be modified again soon, typically a small write that
204 * partially fills a buffer.
205 *
206 * NB: magnetic tapes cannot be delayed; they must be
207 * written in the order that the writes are requested.
208 */
209void
210bdwrite(register struct buf *bp)
211{
212
213 if(!(bp->b_flags & B_BUSY))
214 panic("bdwrite: not busy");
215
216 if(bp->b_flags & B_INVAL) {
217 brelse(bp);
218 }
219 if(bp->b_flags & B_TAPE) {
220 bwrite(bp);
221 return;
222 }
223 bp->b_flags &= ~(B_READ|B_DONE);
224 bp->b_flags |= B_DIRTY|B_DELWRI;
225 reassignbuf(bp, bp->b_vp);
226 brelse(bp);
227 return;
228}
229
230/*
231 * Asynchronous write.
232 * Start I/O on a buffer, but do not wait for it to complete.
233 * The buffer is released when the I/O completes.
234 */
235void
236bawrite(register struct buf *bp)
237{
238
239 if(!(bp->b_flags & B_BUSY))
240 panic("bawrite: not busy");
241
242 if(bp->b_flags & B_INVAL)
243 brelse(bp);
244 else {
245 int wasdelayed;
246
247 wasdelayed = bp->b_flags & B_DELWRI;
248 bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
249 if(wasdelayed)
250 reassignbuf(bp, bp->b_vp);
251
252 bp->b_flags |= B_DIRTY | B_ASYNC;
253 bp->b_vp->v_numoutput++;
254 VOP_STRATEGY(bp);
255 }
256}
257
258/*
259 * Release a buffer.
260 * Even if the buffer is dirty, no I/O is started.
261 */
262void
263brelse(register struct buf *bp)
264{
265 int x;
266
267 /* anyone need a "free" block? */
268 x=splbio();
269 if ((bfreelist + BQ_AGE)->b_flags & B_WANTED) {
270 (bfreelist + BQ_AGE) ->b_flags &= ~B_WANTED;
271 wakeup(bfreelist);
272 }
273 /* anyone need this very block? */
274 if (bp->b_flags & B_WANTED) {
275 bp->b_flags &= ~B_WANTED;
276 wakeup(bp);
277 }
278
279 if (bp->b_flags & (B_INVAL|B_ERROR)) {
280 bp->b_flags |= B_INVAL;
281 bp->b_flags &= ~(B_DELWRI|B_CACHE);
282 if(bp->b_vp)
283 brelvp(bp);
284 }
285
286 /* enqueue */
287 /* just an empty buffer head ... */
288 /*if(bp->b_flags & B_HEAD)
289 binsheadfree(bp, bfreelist + BQ_EMPTY)*/
290 /* buffers with junk contents */
291 /*else*/ if(bp->b_flags & (B_ERROR|B_INVAL|B_NOCACHE))
292 binsheadfree(bp, bfreelist + BQ_AGE)
293 /* buffers with stale but valid contents */
294 else if(bp->b_flags & B_AGE)
295 binstailfree(bp, bfreelist + BQ_AGE)
296 /* buffers with valid and quite potentially reuseable contents */
297 else
298 binstailfree(bp, bfreelist + BQ_LRU)
299
300 /* unlock */
301 bp->b_flags &= ~B_BUSY;
302 splx(x);
303
304}
305
306int freebufspace;
307int allocbufspace;
308
309/*
310 * Find a buffer which is available for use.
311 * If free memory for buffer space and an empty header from the empty list,
312 * use that. Otherwise, select something from a free list.
313 * Preference is to AGE list, then LRU list.
314 */
315static struct buf *
316getnewbuf(int sz)
317{
318 struct buf *bp;
319 int x;
320
321 x = splbio();
322start:
323 /* can we constitute a new buffer? */
324 if (freebufspace > sz
325 && bfreelist[BQ_EMPTY].av_forw != (struct buf *)bfreelist+BQ_EMPTY) {
326 caddr_t addr;
327
328/*#define notyet*/
329#ifndef notyet
330 if ((addr = malloc (sz, M_TEMP, M_WAITOK)) == 0) goto tryfree;
331#else /* notyet */
332 /* get new memory buffer */
333 if (round_page(sz) == sz)
334 addr = (caddr_t) kmem_alloc_wired_wait(buffer_map, sz);
335 else
336 addr = (caddr_t) malloc (sz, M_TEMP, M_WAITOK);
337 /*if ((addr = malloc (sz, M_TEMP, M_NOWAIT)) == 0) goto tryfree;*/
338 bzero(addr, sz);
339#endif /* notyet */
340 freebufspace -= sz;
341 allocbufspace += sz;
342
343 bp = bfreelist[BQ_EMPTY].av_forw;
344 bp->b_flags = B_BUSY | B_INVAL;
345 bremfree(bp);
346 bp->b_un.b_addr = addr;
347 goto fillin;
348 }
349
350tryfree:
351 if (bfreelist[BQ_AGE].av_forw != (struct buf *)bfreelist+BQ_AGE) {
352 bp = bfreelist[BQ_AGE].av_forw;
353 bremfree(bp);
354 } else if (bfreelist[BQ_LRU].av_forw != (struct buf *)bfreelist+BQ_LRU) {
355 bp = bfreelist[BQ_LRU].av_forw;
356 bremfree(bp);
357 } else {
358 /* wait for a free buffer of any kind */
359 (bfreelist + BQ_AGE)->b_flags |= B_WANTED;
360 sleep(bfreelist, PRIBIO);
361 splx(x);
362 return (0);
363 }
364
365 /* if we are a delayed write, convert to an async write! */
366 if (bp->b_flags & B_DELWRI) {
367 bp->b_flags |= B_BUSY;
368 bawrite (bp);
369 goto start;
370 }
371
372
373 if(bp->b_vp)
374 brelvp(bp);
375
376 /* we are not free, nor do we contain interesting data */
377 bp->b_flags = B_BUSY;
378fillin:
379 bremhash(bp);
380 splx(x);
381 bp->b_dev = NODEV;
382 bp->b_vp = NULL;
383 bp->b_blkno = bp->b_lblkno = 0;
384 bp->b_iodone = 0;
385 bp->b_error = 0;
386 bp->b_wcred = bp->b_rcred = NOCRED;
387 if (bp->b_bufsize != sz)
388 allocbuf(bp, sz);
389 bp->b_bcount = bp->b_bufsize = sz;
390 bp->b_dirtyoff = bp->b_dirtyend = 0;
391 return (bp);
392}
393
394/*
395 * Check to see if a block is currently memory resident.
396 */
397struct buf *
398incore(struct vnode *vp, daddr_t blkno)
399{
400 struct buf *bh;
401 struct buf *bp;
402
403 bh = BUFHASH(vp, blkno);
404
405 /* Search hash chain */
406 bp = bh->b_forw;
407 while (bp != (struct buf *) bh) {
408 /* hit */
409 if (bp->b_lblkno == blkno && bp->b_vp == vp
410 && (bp->b_flags & B_INVAL) == 0)
411 return (bp);
412 bp = bp->b_forw;
413 }
414
415 return(0);
416}
417
418/*
419 * Get a block of requested size that is associated with
420 * a given vnode and block offset. If it is found in the
421 * block cache, mark it as having been found, make it busy
422 * and return it. Otherwise, return an empty block of the
423 * correct size. It is up to the caller to insure that the
424 * cached blocks be of the correct size.
425 */
426struct buf *
427getblk(register struct vnode *vp, daddr_t blkno, int size)
428{
429 struct buf *bp, *bh;
430 int x;
431
432 for (;;) {
433 if (bp = incore(vp, blkno)) {
434 x = splbio();
435 if (bp->b_flags & B_BUSY) {
436 bp->b_flags |= B_WANTED;
437 sleep (bp, PRIBIO);
438 splx(x);
439 continue;
440 }
441 bp->b_flags |= B_BUSY | B_CACHE;
442 bremfree(bp);
443 if (size > bp->b_bufsize)
444 panic("now what do we do?");
445 /* if (bp->b_bufsize != size) allocbuf(bp, size); */
446 } else {
447
448 if((bp = getnewbuf(size)) == 0) continue;
449 bp->b_blkno = bp->b_lblkno = blkno;
450 bgetvp(vp, bp);
451 x = splbio();
452 bh = BUFHASH(vp, blkno);
453 binshash(bp, bh);
454 bp->b_flags = B_BUSY;
455 }
456 splx(x);
457 return (bp);
458 }
459}
460
461/*
462 * Get an empty, disassociated buffer of given size.
463 */
464struct buf *
465geteblk(int size)
466{
467 struct buf *bp;
468 int x;
469
470 while ((bp = getnewbuf(size)) == 0)
471 ;
472 x = splbio();
473 binshash(bp, bfreelist + BQ_AGE);
474 splx(x);
475
476 return (bp);
477}
478
479/*
480 * Exchange a buffer's underlying buffer storage for one of different
481 * size, taking care to maintain contents appropriately. When buffer
482 * increases in size, caller is responsible for filling out additional
483 * contents. When buffer shrinks in size, data is lost, so caller must
484 * first return it to backing store before shrinking the buffer, as
485 * no implied I/O will be done.
486 *
487 * Expanded buffer is returned as value.
488 */
489void
490allocbuf(register struct buf *bp, int size)
491{
492 caddr_t newcontents;
493
494 /* get new memory buffer */
495#ifndef notyet
496 newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
497#else /* notyet */
498 if (round_page(size) == size)
499 newcontents = (caddr_t) kmem_alloc_wired_wait(buffer_map, size);
500 else
501 newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
502#endif /* notyet */
503
504 /* copy the old into the new, up to the maximum that will fit */
505 bcopy (bp->b_un.b_addr, newcontents, min(bp->b_bufsize, size));
506
507 /* return old contents to free heap */
508#ifndef notyet
509 free (bp->b_un.b_addr, M_TEMP);
510#else /* notyet */
511 if (round_page(bp->b_bufsize) == bp->b_bufsize)
512 kmem_free_wakeup(buffer_map, bp->b_un.b_addr, bp->b_bufsize);
513 else
514 free (bp->b_un.b_addr, M_TEMP);
515#endif /* notyet */
516
517 /* adjust buffer cache's idea of memory allocated to buffer contents */
518 freebufspace -= size - bp->b_bufsize;
519 allocbufspace += size - bp->b_bufsize;
520
521 /* update buffer header */
522 bp->b_un.b_addr = newcontents;
523 bp->b_bcount = bp->b_bufsize = size;
524}
525
526/*
527 * Patiently await operations to complete on this buffer.
528 * When they do, extract error value and return it.
529 * Extract and return any errors associated with the I/O.
530 * If an invalid block, force it off the lookup hash chains.
531 */
532int
533biowait(register struct buf *bp)
534{
535 int x;
536
537 x = splbio();
538 while ((bp->b_flags & B_DONE) == 0)
539 sleep((caddr_t)bp, PRIBIO);
540 if((bp->b_flags & B_ERROR) || bp->b_error) {
541 if ((bp->b_flags & B_INVAL) == 0) {
542 bp->b_flags |= B_INVAL;
543 bremhash(bp);
544 binshash(bp, bfreelist + BQ_AGE);
545 }
546 if (!bp->b_error)
547 bp->b_error = EIO;
548 else
549 bp->b_flags |= B_ERROR;
550 splx(x);
551 return (bp->b_error);
552 } else {
553 splx(x);
554 return (0);
555 }
556}
557
558/*
559 * Finish up operations on a buffer, calling an optional
560 * function (if requested), and releasing the buffer if
561 * marked asynchronous. Then mark this buffer done so that
562 * others biowait()'ing for it will notice when they are
563 * woken up from sleep().
564 */
565int
566biodone(register struct buf *bp)
567{
568 int x;
569
570 x = splbio();
571 if (bp->b_flags & B_CALL) (*bp->b_iodone)(bp);
572 bp->b_flags &= ~B_CALL;
573 if ((bp->b_flags & (B_READ|B_DIRTY)) == B_DIRTY) {
574 bp->b_flags &= ~B_DIRTY;
575 vwakeup(bp);
576 }
577 if (bp->b_flags & B_ASYNC)
578 brelse(bp);
579 bp->b_flags &= ~B_ASYNC;
580 bp->b_flags |= B_DONE;
581 wakeup(bp);
582 splx(x);
583}