Replace last vestiges of deprecated sleep() with tsleep() to provide
[unix-history] / sys / kern / vfs__bio.c
CommitLineData
15637ed4
RG
1/*
2 * Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This software is a component of "386BSD" developed by
16 William F. Jolitz, TeleMuse.
17 * 4. Neither the name of the developer nor the name "386BSD"
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ
22 * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS
23 * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT.
24 * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT
25 * NOT MAKE USE THIS WORK.
26 *
27 * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED
28 * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN
29 * REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES
30 * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING
31 * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND
32 * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE
33 * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS
34 * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND
37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39 * ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 *
73419b27 48 * $Id: vfs__bio.c,v 1.8 1993/11/07 17:46:24 wollman Exp $
15637ed4 49 */
15637ed4
RG
50
51#include "param.h"
dd18dc33 52#include "systm.h"
15637ed4
RG
53#include "proc.h"
54#include "vnode.h"
55#include "buf.h"
56#include "specdev.h"
57#include "mount.h"
58#include "malloc.h"
59#include "vm/vm.h"
60#include "resourcevar.h"
61
bbc3f849
GW
62/* From sys/buf.h */
63struct buf *buf; /* the buffer pool itself */
64char *buffers;
65int nbuf; /* number of buffer headers */
66int bufpages; /* number of memory pages in the buffer pool */
67struct buf *swbuf; /* swap I/O headers */
68int nswbuf;
69struct bufhd bufhash[BUFHSZ]; /* heads of hash lists */
70struct buf bfreelist[BQUEUES]; /* heads of available lists */
71struct buf bswlist; /* head of free swap header list */
72struct buf *bclnlist; /* head of cleaned page list */
73
dbd7c74f 74static struct buf *getnewbuf(int);
15637ed4
RG
75extern vm_map_t buffer_map;
76
77/*
78 * Initialize buffer headers and related structures.
79 */
80void bufinit()
81{
82 struct bufhd *bh;
83 struct buf *bp;
84
85 /* first, make a null hash table */
86 for(bh = bufhash; bh < bufhash + BUFHSZ; bh++) {
87 bh->b_flags = 0;
88 bh->b_forw = (struct buf *)bh;
89 bh->b_back = (struct buf *)bh;
90 }
91
92 /* next, make a null set of free lists */
93 for(bp = bfreelist; bp < bfreelist + BQUEUES; bp++) {
94 bp->b_flags = 0;
95 bp->av_forw = bp;
96 bp->av_back = bp;
97 bp->b_forw = bp;
98 bp->b_back = bp;
99 }
100
101 /* finally, initialize each buffer header and stick on empty q */
102 for(bp = buf; bp < buf + nbuf ; bp++) {
103 bp->b_flags = B_HEAD | B_INVAL; /* we're just an empty header */
104 bp->b_dev = NODEV;
105 bp->b_vp = 0;
106 binstailfree(bp, bfreelist + BQ_EMPTY);
107 binshash(bp, bfreelist + BQ_EMPTY);
108 }
109}
110
111/*
112 * Find the block in the buffer pool.
113 * If the buffer is not present, allocate a new buffer and load
114 * its contents according to the filesystem fill routine.
115 */
116int
117bread(struct vnode *vp, daddr_t blkno, int size, struct ucred *cred,
118 struct buf **bpp)
119{
120 struct buf *bp;
121 int rv = 0;
122
123 bp = getblk (vp, blkno, size);
124
125 /* if not found in cache, do some I/O */
126 if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) {
8a8a439a
NW
127 if (curproc && curproc->p_stats) /* count block I/O */
128 curproc->p_stats->p_ru.ru_inblock++;
15637ed4
RG
129 bp->b_flags |= B_READ;
130 bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
131 if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/
132 bp->b_rcred = cred;
133 VOP_STRATEGY(bp);
134 rv = biowait (bp);
135 }
136 *bpp = bp;
137
138 return (rv);
139}
140
141/*
142 * Operates like bread, but also starts I/O on the specified
143 * read-ahead block. [See page 55 of Bach's Book]
144 */
145int
146breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno, int rabsize,
147 struct ucred *cred, struct buf **bpp)
148{
149 struct buf *bp, *rabp;
150 int rv = 0, needwait = 0;
151
152 bp = getblk (vp, blkno, size);
153
154 /* if not found in cache, do some I/O */
155 if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) {
8a8a439a
NW
156 if (curproc && curproc->p_stats) /* count block I/O */
157 curproc->p_stats->p_ru.ru_inblock++;
15637ed4
RG
158 bp->b_flags |= B_READ;
159 bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
160 if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/
161 bp->b_rcred = cred;
162 VOP_STRATEGY(bp);
163 needwait++;
164 }
165
166 rabp = getblk (vp, rablkno, rabsize);
167
168 /* if not found in cache, do some I/O (overlapped with first) */
169 if ((rabp->b_flags & B_CACHE) == 0 || (rabp->b_flags & B_INVAL) != 0) {
8a8a439a
NW
170 if (curproc && curproc->p_stats) /* count block I/O */
171 curproc->p_stats->p_ru.ru_inblock++;
15637ed4
RG
172 rabp->b_flags |= B_READ | B_ASYNC;
173 rabp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
174 if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/
175 rabp->b_rcred = cred;
176 VOP_STRATEGY(rabp);
177 } else
178 brelse(rabp);
179
180 /* wait for original I/O */
181 if (needwait)
182 rv = biowait (bp);
183
184 *bpp = bp;
185 return (rv);
186}
187
188/*
189 * Synchronous write.
190 * Release buffer on completion.
191 */
192int
193bwrite(register struct buf *bp)
194{
195 int rv;
196
197 if(bp->b_flags & B_INVAL) {
198 brelse(bp);
199 return (0);
200 } else {
201 int wasdelayed;
202
203 if(!(bp->b_flags & B_BUSY))
204 panic("bwrite: not busy");
205
206 wasdelayed = bp->b_flags & B_DELWRI;
207 bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_ASYNC|B_DELWRI);
208 if(wasdelayed)
209 reassignbuf(bp, bp->b_vp);
210
8a8a439a
NW
211 if (curproc && curproc->p_stats) /* count block I/O */
212 curproc->p_stats->p_ru.ru_oublock++;
15637ed4
RG
213 bp->b_flags |= B_DIRTY;
214 bp->b_vp->v_numoutput++;
215 VOP_STRATEGY(bp);
216 rv = biowait(bp);
217 brelse(bp);
218 return (rv);
219 }
220}
221
222/*
223 * Delayed write.
224 *
225 * The buffer is marked dirty, but is not queued for I/O.
226 * This routine should be used when the buffer is expected
227 * to be modified again soon, typically a small write that
228 * partially fills a buffer.
229 *
230 * NB: magnetic tapes cannot be delayed; they must be
231 * written in the order that the writes are requested.
232 */
233void
234bdwrite(register struct buf *bp)
235{
236
237 if(!(bp->b_flags & B_BUSY))
238 panic("bdwrite: not busy");
239
240 if(bp->b_flags & B_INVAL) {
241 brelse(bp);
2877196e 242 return;
15637ed4
RG
243 }
244 if(bp->b_flags & B_TAPE) {
245 bwrite(bp);
246 return;
247 }
248 bp->b_flags &= ~(B_READ|B_DONE);
249 bp->b_flags |= B_DIRTY|B_DELWRI;
250 reassignbuf(bp, bp->b_vp);
251 brelse(bp);
252 return;
253}
254
255/*
256 * Asynchronous write.
257 * Start I/O on a buffer, but do not wait for it to complete.
258 * The buffer is released when the I/O completes.
259 */
260void
261bawrite(register struct buf *bp)
262{
263
264 if(!(bp->b_flags & B_BUSY))
265 panic("bawrite: not busy");
266
267 if(bp->b_flags & B_INVAL)
268 brelse(bp);
269 else {
270 int wasdelayed;
271
272 wasdelayed = bp->b_flags & B_DELWRI;
273 bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
274 if(wasdelayed)
275 reassignbuf(bp, bp->b_vp);
276
8a8a439a
NW
277 if (curproc && curproc->p_stats) /* count block I/O */
278 curproc->p_stats->p_ru.ru_oublock++;
15637ed4
RG
279 bp->b_flags |= B_DIRTY | B_ASYNC;
280 bp->b_vp->v_numoutput++;
281 VOP_STRATEGY(bp);
282 }
283}
284
285/*
286 * Release a buffer.
287 * Even if the buffer is dirty, no I/O is started.
288 */
289void
290brelse(register struct buf *bp)
291{
292 int x;
293
294 /* anyone need a "free" block? */
295 x=splbio();
296 if ((bfreelist + BQ_AGE)->b_flags & B_WANTED) {
297 (bfreelist + BQ_AGE) ->b_flags &= ~B_WANTED;
298 wakeup(bfreelist);
299 }
300 /* anyone need this very block? */
301 if (bp->b_flags & B_WANTED) {
302 bp->b_flags &= ~B_WANTED;
303 wakeup(bp);
304 }
305
306 if (bp->b_flags & (B_INVAL|B_ERROR)) {
307 bp->b_flags |= B_INVAL;
308 bp->b_flags &= ~(B_DELWRI|B_CACHE);
309 if(bp->b_vp)
310 brelvp(bp);
311 }
312
313 /* enqueue */
314 /* just an empty buffer head ... */
315 /*if(bp->b_flags & B_HEAD)
316 binsheadfree(bp, bfreelist + BQ_EMPTY)*/
317 /* buffers with junk contents */
318 /*else*/ if(bp->b_flags & (B_ERROR|B_INVAL|B_NOCACHE))
319 binsheadfree(bp, bfreelist + BQ_AGE)
320 /* buffers with stale but valid contents */
321 else if(bp->b_flags & B_AGE)
322 binstailfree(bp, bfreelist + BQ_AGE)
323 /* buffers with valid and quite potentially reuseable contents */
324 else
325 binstailfree(bp, bfreelist + BQ_LRU)
326
327 /* unlock */
328 bp->b_flags &= ~B_BUSY;
329 splx(x);
330
331}
332
333int freebufspace;
334int allocbufspace;
335
336/*
337 * Find a buffer which is available for use.
338 * If free memory for buffer space and an empty header from the empty list,
339 * use that. Otherwise, select something from a free list.
340 * Preference is to AGE list, then LRU list.
341 */
342static struct buf *
343getnewbuf(int sz)
344{
345 struct buf *bp;
346 int x;
347
348 x = splbio();
349start:
350 /* can we constitute a new buffer? */
351 if (freebufspace > sz
352 && bfreelist[BQ_EMPTY].av_forw != (struct buf *)bfreelist+BQ_EMPTY) {
353 caddr_t addr;
354
355/*#define notyet*/
356#ifndef notyet
357 if ((addr = malloc (sz, M_TEMP, M_WAITOK)) == 0) goto tryfree;
358#else /* notyet */
359 /* get new memory buffer */
360 if (round_page(sz) == sz)
361 addr = (caddr_t) kmem_alloc_wired_wait(buffer_map, sz);
362 else
363 addr = (caddr_t) malloc (sz, M_TEMP, M_WAITOK);
364 /*if ((addr = malloc (sz, M_TEMP, M_NOWAIT)) == 0) goto tryfree;*/
365 bzero(addr, sz);
366#endif /* notyet */
367 freebufspace -= sz;
368 allocbufspace += sz;
369
370 bp = bfreelist[BQ_EMPTY].av_forw;
371 bp->b_flags = B_BUSY | B_INVAL;
372 bremfree(bp);
373 bp->b_un.b_addr = addr;
374 bp->b_bufsize = sz; /* 20 Aug 92*/
375 goto fillin;
376 }
377
378tryfree:
379 if (bfreelist[BQ_AGE].av_forw != (struct buf *)bfreelist+BQ_AGE) {
380 bp = bfreelist[BQ_AGE].av_forw;
381 bremfree(bp);
382 } else if (bfreelist[BQ_LRU].av_forw != (struct buf *)bfreelist+BQ_LRU) {
383 bp = bfreelist[BQ_LRU].av_forw;
384 bremfree(bp);
385 } else {
386 /* wait for a free buffer of any kind */
387 (bfreelist + BQ_AGE)->b_flags |= B_WANTED;
73419b27 388 tsleep(bfreelist, PRIBIO, "newbuf", 0);
15637ed4
RG
389 splx(x);
390 return (0);
391 }
392
393 /* if we are a delayed write, convert to an async write! */
394 if (bp->b_flags & B_DELWRI) {
395 bp->b_flags |= B_BUSY;
396 bawrite (bp);
397 goto start;
398 }
399
400
401 if(bp->b_vp)
402 brelvp(bp);
403
404 /* we are not free, nor do we contain interesting data */
405 if (bp->b_rcred != NOCRED) crfree(bp->b_rcred); /* 25 Apr 92*/
406 if (bp->b_wcred != NOCRED) crfree(bp->b_wcred);
407 bp->b_flags = B_BUSY;
408fillin:
409 bremhash(bp);
410 splx(x);
411 bp->b_dev = NODEV;
412 bp->b_vp = NULL;
413 bp->b_blkno = bp->b_lblkno = 0;
414 bp->b_iodone = 0;
415 bp->b_error = 0;
8b376180 416 bp->b_resid = 0;
15637ed4
RG
417 bp->b_wcred = bp->b_rcred = NOCRED;
418 if (bp->b_bufsize != sz)
419 allocbuf(bp, sz);
420 bp->b_bcount = bp->b_bufsize = sz;
421 bp->b_dirtyoff = bp->b_dirtyend = 0;
422 return (bp);
423}
424
425/*
426 * Check to see if a block is currently memory resident.
427 */
428struct buf *
429incore(struct vnode *vp, daddr_t blkno)
430{
431 struct buf *bh;
432 struct buf *bp;
433
434 bh = BUFHASH(vp, blkno);
435
436 /* Search hash chain */
437 bp = bh->b_forw;
438 while (bp != (struct buf *) bh) {
439 /* hit */
440 if (bp->b_lblkno == blkno && bp->b_vp == vp
441 && (bp->b_flags & B_INVAL) == 0)
442 return (bp);
443 bp = bp->b_forw;
444 }
445
446 return(0);
447}
448
449/*
450 * Get a block of requested size that is associated with
451 * a given vnode and block offset. If it is found in the
452 * block cache, mark it as having been found, make it busy
453 * and return it. Otherwise, return an empty block of the
454 * correct size. It is up to the caller to insure that the
455 * cached blocks be of the correct size.
456 */
457struct buf *
458getblk(register struct vnode *vp, daddr_t blkno, int size)
459{
460 struct buf *bp, *bh;
461 int x;
462
463 for (;;) {
464 if (bp = incore(vp, blkno)) {
465 x = splbio();
466 if (bp->b_flags & B_BUSY) {
467 bp->b_flags |= B_WANTED;
73419b27 468 tsleep (bp, PRIBIO, "getblk", 0);
15637ed4
RG
469 splx(x);
470 continue;
471 }
472 bp->b_flags |= B_BUSY | B_CACHE;
473 bremfree(bp);
474 if (size > bp->b_bufsize)
475 panic("now what do we do?");
476 /* if (bp->b_bufsize != size) allocbuf(bp, size); */
477 } else {
478
479 if((bp = getnewbuf(size)) == 0) continue;
480 bp->b_blkno = bp->b_lblkno = blkno;
481 bgetvp(vp, bp);
482 x = splbio();
483 bh = BUFHASH(vp, blkno);
484 binshash(bp, bh);
485 bp->b_flags = B_BUSY;
486 }
487 splx(x);
488 return (bp);
489 }
490}
491
492/*
493 * Get an empty, disassociated buffer of given size.
494 */
495struct buf *
496geteblk(int size)
497{
498 struct buf *bp;
499 int x;
500
501 while ((bp = getnewbuf(size)) == 0)
502 ;
503 x = splbio();
504 binshash(bp, bfreelist + BQ_AGE);
505 splx(x);
506
507 return (bp);
508}
509
510/*
511 * Exchange a buffer's underlying buffer storage for one of different
512 * size, taking care to maintain contents appropriately. When buffer
513 * increases in size, caller is responsible for filling out additional
514 * contents. When buffer shrinks in size, data is lost, so caller must
515 * first return it to backing store before shrinking the buffer, as
516 * no implied I/O will be done.
517 *
518 * Expanded buffer is returned as value.
519 */
520void
521allocbuf(register struct buf *bp, int size)
522{
523 caddr_t newcontents;
524
525 /* get new memory buffer */
526#ifndef notyet
527 newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
528#else /* notyet */
529 if (round_page(size) == size)
530 newcontents = (caddr_t) kmem_alloc_wired_wait(buffer_map, size);
531 else
532 newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
533#endif /* notyet */
534
535 /* copy the old into the new, up to the maximum that will fit */
536 bcopy (bp->b_un.b_addr, newcontents, min(bp->b_bufsize, size));
537
538 /* return old contents to free heap */
539#ifndef notyet
540 free (bp->b_un.b_addr, M_TEMP);
541#else /* notyet */
542 if (round_page(bp->b_bufsize) == bp->b_bufsize)
543 kmem_free_wakeup(buffer_map, bp->b_un.b_addr, bp->b_bufsize);
544 else
545 free (bp->b_un.b_addr, M_TEMP);
546#endif /* notyet */
547
548 /* adjust buffer cache's idea of memory allocated to buffer contents */
549 freebufspace -= size - bp->b_bufsize;
550 allocbufspace += size - bp->b_bufsize;
551
552 /* update buffer header */
553 bp->b_un.b_addr = newcontents;
554 bp->b_bcount = bp->b_bufsize = size;
555}
556
557/*
558 * Patiently await operations to complete on this buffer.
559 * When they do, extract error value and return it.
560 * Extract and return any errors associated with the I/O.
561 * If an invalid block, force it off the lookup hash chains.
562 */
563int
564biowait(register struct buf *bp)
565{
566 int x;
567
568 x = splbio();
569 while ((bp->b_flags & B_DONE) == 0)
73419b27 570 tsleep((caddr_t)bp, PRIBIO, "biowait", 0);
15637ed4
RG
571 if((bp->b_flags & B_ERROR) || bp->b_error) {
572 if ((bp->b_flags & B_INVAL) == 0) {
573 bp->b_flags |= B_INVAL;
574 bremhash(bp);
575 binshash(bp, bfreelist + BQ_AGE);
576 }
577 if (!bp->b_error)
578 bp->b_error = EIO;
579 else
580 bp->b_flags |= B_ERROR;
581 splx(x);
582 return (bp->b_error);
583 } else {
584 splx(x);
585 return (0);
586 }
587}
588
589/*
590 * Finish up operations on a buffer, calling an optional
591 * function (if requested), and releasing the buffer if
592 * marked asynchronous. Then mark this buffer done so that
593 * others biowait()'ing for it will notice when they are
594 * woken up from sleep().
595 */
596int
597biodone(register struct buf *bp)
598{
599 int x;
600
601 x = splbio();
602 if (bp->b_flags & B_CALL) (*bp->b_iodone)(bp);
603 bp->b_flags &= ~B_CALL;
604 if ((bp->b_flags & (B_READ|B_DIRTY)) == B_DIRTY) {
605 bp->b_flags &= ~B_DIRTY;
606 vwakeup(bp);
607 }
608 if (bp->b_flags & B_ASYNC)
609 brelse(bp);
610 bp->b_flags &= ~B_ASYNC;
611 bp->b_flags |= B_DONE;
612 wakeup(bp);
613 splx(x);
614}