Commit | Line | Data |
---|---|---|
3ac90e3c | 1 | /* vfs_bio.c 4.30 82/05/12 */ |
ad30fb67 KM |
2 | |
3 | /* merged into kernel: @(#)bio.c 2.3 4/8/82 */ | |
663dbc72 BJ |
4 | |
5 | #include "../h/param.h" | |
6 | #include "../h/systm.h" | |
7 | #include "../h/dir.h" | |
8 | #include "../h/user.h" | |
9 | #include "../h/buf.h" | |
10 | #include "../h/conf.h" | |
11 | #include "../h/proc.h" | |
12 | #include "../h/seg.h" | |
13 | #include "../h/pte.h" | |
14 | #include "../h/vm.h" | |
973ecc4f | 15 | #include "../h/trace.h" |
663dbc72 | 16 | |
5603d07d BJ |
17 | /* |
18 | * The following several routines allocate and free | |
19 | * buffers with various side effects. In general the | |
20 | * arguments to an allocate routine are a device and | |
21 | * a block number, and the value is a pointer to | |
22 | * to the buffer header; the buffer is marked "busy" | |
23 | * so that no one else can touch it. If the block was | |
24 | * already in core, no I/O need be done; if it is | |
25 | * already busy, the process waits until it becomes free. | |
26 | * The following routines allocate a buffer: | |
27 | * getblk | |
28 | * bread | |
29 | * breada | |
30 | * baddr (if it is incore) | |
31 | * Eventually the buffer must be released, possibly with the | |
32 | * side effect of writing it out, by using one of | |
33 | * bwrite | |
34 | * bdwrite | |
35 | * bawrite | |
36 | * brelse | |
37 | */ | |
38 | ||
a0eab615 BJ |
39 | struct buf bfreelist[BQUEUES]; |
40 | struct buf bswlist, *bclnlist; | |
41 | ||
5603d07d | 42 | #define BUFHSZ 63 |
ad30fb67 | 43 | #define RND (MAXBSIZE/DEV_BSIZE) |
46387ee3 | 44 | struct bufhd bufhash[BUFHSZ]; |
337ed2cc | 45 | #define BUFHASH(dev, dblkno) \ |
ad30fb67 | 46 | ((struct buf *)&bufhash[((int)(dev)+(((int)(dblkno))/RND)) % BUFHSZ]) |
5603d07d BJ |
47 | |
48 | /* | |
49 | * Initialize hash links for buffers. | |
50 | */ | |
51 | bhinit() | |
52 | { | |
53 | register int i; | |
46387ee3 | 54 | register struct bufhd *bp; |
5603d07d | 55 | |
46387ee3 BJ |
56 | for (bp = bufhash, i = 0; i < BUFHSZ; i++, bp++) |
57 | bp->b_forw = bp->b_back = (struct buf *)bp; | |
5603d07d BJ |
58 | } |
59 | ||
663dbc72 BJ |
60 | /* #define DISKMON 1 */ |
61 | ||
62 | #ifdef DISKMON | |
63 | struct { | |
64 | int nbuf; | |
65 | long nread; | |
66 | long nreada; | |
67 | long ncache; | |
68 | long nwrite; | |
4c05b581 | 69 | long bufcount[64]; |
663dbc72 BJ |
70 | } io_info; |
71 | #endif | |
72 | ||
73 | /* | |
74 | * Swap IO headers - | |
75 | * They contain the necessary information for the swap I/O. | |
76 | * At any given time, a swap header can be in three | |
77 | * different lists. When free it is in the free list, | |
78 | * when allocated and the I/O queued, it is on the swap | |
79 | * device list, and finally, if the operation was a dirty | |
80 | * page push, when the I/O completes, it is inserted | |
81 | * in a list of cleaned pages to be processed by the pageout daemon. | |
82 | */ | |
4c05b581 BJ |
83 | struct buf *swbuf; |
84 | short *swsize; /* CAN WE JUST USE B_BCOUNT? */ | |
85 | int *swpf; | |
663dbc72 | 86 | |
663dbc72 | 87 | |
443c8066 | 88 | #ifndef UNFAST |
663dbc72 BJ |
89 | #define notavail(bp) \ |
90 | { \ | |
668cc26d | 91 | int x = spl6(); \ |
663dbc72 BJ |
92 | (bp)->av_back->av_forw = (bp)->av_forw; \ |
93 | (bp)->av_forw->av_back = (bp)->av_back; \ | |
94 | (bp)->b_flags |= B_BUSY; \ | |
668cc26d | 95 | splx(x); \ |
663dbc72 BJ |
96 | } |
97 | #endif | |
98 | ||
99 | /* | |
100 | * Read in (if necessary) the block and return a buffer pointer. | |
101 | */ | |
102 | struct buf * | |
ad30fb67 KM |
103 | bread(dev, blkno, size) |
104 | dev_t dev; | |
105 | daddr_t blkno; | |
106 | int size; | |
663dbc72 BJ |
107 | { |
108 | register struct buf *bp; | |
109 | ||
ad30fb67 | 110 | bp = getblk(dev, blkno, size); |
663dbc72 | 111 | if (bp->b_flags&B_DONE) { |
15f77b9b BJ |
112 | #ifdef TRACE |
113 | trace(TR_BREADHIT, dev, blkno); | |
973ecc4f | 114 | #endif |
663dbc72 BJ |
115 | #ifdef DISKMON |
116 | io_info.ncache++; | |
117 | #endif | |
118 | return(bp); | |
119 | } | |
120 | bp->b_flags |= B_READ; | |
663dbc72 | 121 | (*bdevsw[major(dev)].d_strategy)(bp); |
15f77b9b BJ |
122 | #ifdef TRACE |
123 | trace(TR_BREADMISS, dev, blkno); | |
973ecc4f | 124 | #endif |
663dbc72 BJ |
125 | #ifdef DISKMON |
126 | io_info.nread++; | |
127 | #endif | |
128 | u.u_vm.vm_inblk++; /* pay for read */ | |
129 | iowait(bp); | |
130 | return(bp); | |
131 | } | |
132 | ||
133 | /* | |
134 | * Read in the block, like bread, but also start I/O on the | |
135 | * read-ahead block (which is not allocated to the caller) | |
136 | */ | |
137 | struct buf * | |
ad30fb67 KM |
138 | breada(dev, blkno, rablkno, size) |
139 | dev_t dev; | |
140 | daddr_t blkno, rablkno; | |
141 | int size; | |
663dbc72 BJ |
142 | { |
143 | register struct buf *bp, *rabp; | |
144 | ||
145 | bp = NULL; | |
146 | if (!incore(dev, blkno)) { | |
ad30fb67 | 147 | bp = getblk(dev, blkno, size); |
663dbc72 BJ |
148 | if ((bp->b_flags&B_DONE) == 0) { |
149 | bp->b_flags |= B_READ; | |
663dbc72 | 150 | (*bdevsw[major(dev)].d_strategy)(bp); |
15f77b9b BJ |
151 | #ifdef TRACE |
152 | trace(TR_BREADMISS, dev, blkno); | |
973ecc4f | 153 | #endif |
663dbc72 BJ |
154 | #ifdef DISKMON |
155 | io_info.nread++; | |
156 | #endif | |
157 | u.u_vm.vm_inblk++; /* pay for read */ | |
158 | } | |
15f77b9b | 159 | #ifdef TRACE |
973ecc4f | 160 | else |
15f77b9b | 161 | trace(TR_BREADHIT, dev, blkno); |
973ecc4f | 162 | #endif |
663dbc72 BJ |
163 | } |
164 | if (rablkno && !incore(dev, rablkno)) { | |
ad30fb67 | 165 | rabp = getblk(dev, rablkno, size); |
973ecc4f | 166 | if (rabp->b_flags & B_DONE) { |
663dbc72 | 167 | brelse(rabp); |
15f77b9b BJ |
168 | #ifdef TRACE |
169 | trace(TR_BREADHITRA, dev, blkno); | |
973ecc4f BJ |
170 | #endif |
171 | } else { | |
663dbc72 | 172 | rabp->b_flags |= B_READ|B_ASYNC; |
663dbc72 | 173 | (*bdevsw[major(dev)].d_strategy)(rabp); |
15f77b9b BJ |
174 | #ifdef TRACE |
175 | trace(TR_BREADMISSRA, dev, rablock); | |
973ecc4f | 176 | #endif |
663dbc72 BJ |
177 | #ifdef DISKMON |
178 | io_info.nreada++; | |
179 | #endif | |
180 | u.u_vm.vm_inblk++; /* pay in advance */ | |
181 | } | |
182 | } | |
183 | if(bp == NULL) | |
ad30fb67 | 184 | return(bread(dev, blkno, size)); |
663dbc72 BJ |
185 | iowait(bp); |
186 | return(bp); | |
187 | } | |
188 | ||
189 | /* | |
190 | * Write the buffer, waiting for completion. | |
191 | * Then release the buffer. | |
192 | */ | |
193 | bwrite(bp) | |
194 | register struct buf *bp; | |
195 | { | |
196 | register flag; | |
197 | ||
198 | flag = bp->b_flags; | |
199 | bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); | |
663dbc72 BJ |
200 | #ifdef DISKMON |
201 | io_info.nwrite++; | |
202 | #endif | |
203 | if ((flag&B_DELWRI) == 0) | |
204 | u.u_vm.vm_oublk++; /* noone paid yet */ | |
15f77b9b | 205 | #ifdef TRACE |
53f9ca20 | 206 | trace(TR_BWRITE, bp->b_dev, bp->b_blkno); |
973ecc4f | 207 | #endif |
663dbc72 BJ |
208 | (*bdevsw[major(bp->b_dev)].d_strategy)(bp); |
209 | if ((flag&B_ASYNC) == 0) { | |
210 | iowait(bp); | |
211 | brelse(bp); | |
212 | } else if (flag & B_DELWRI) | |
213 | bp->b_flags |= B_AGE; | |
214 | else | |
215 | geterror(bp); | |
216 | } | |
217 | ||
218 | /* | |
219 | * Release the buffer, marking it so that if it is grabbed | |
220 | * for another purpose it will be written out before being | |
221 | * given up (e.g. when writing a partial block where it is | |
222 | * assumed that another write for the same block will soon follow). | |
223 | * This can't be done for magtape, since writes must be done | |
224 | * in the same order as requested. | |
225 | */ | |
226 | bdwrite(bp) | |
227 | register struct buf *bp; | |
228 | { | |
e1e57888 | 229 | register int flags; |
663dbc72 BJ |
230 | |
231 | if ((bp->b_flags&B_DELWRI) == 0) | |
232 | u.u_vm.vm_oublk++; /* noone paid yet */ | |
e1e57888 RE |
233 | flags = bdevsw[major(bp->b_dev)].d_flags; |
234 | if(flags & B_TAPE) | |
663dbc72 BJ |
235 | bawrite(bp); |
236 | else { | |
237 | bp->b_flags |= B_DELWRI | B_DONE; | |
238 | brelse(bp); | |
239 | } | |
240 | } | |
241 | ||
242 | /* | |
243 | * Release the buffer, start I/O on it, but don't wait for completion. | |
244 | */ | |
245 | bawrite(bp) | |
246 | register struct buf *bp; | |
247 | { | |
248 | ||
249 | bp->b_flags |= B_ASYNC; | |
250 | bwrite(bp); | |
251 | } | |
252 | ||
253 | /* | |
254 | * release the buffer, with no I/O implied. | |
255 | */ | |
256 | brelse(bp) | |
257 | register struct buf *bp; | |
258 | { | |
46387ee3 | 259 | register struct buf *flist; |
663dbc72 BJ |
260 | register s; |
261 | ||
262 | if (bp->b_flags&B_WANTED) | |
263 | wakeup((caddr_t)bp); | |
46387ee3 BJ |
264 | if (bfreelist[0].b_flags&B_WANTED) { |
265 | bfreelist[0].b_flags &= ~B_WANTED; | |
266 | wakeup((caddr_t)bfreelist); | |
663dbc72 | 267 | } |
60a71525 BJ |
268 | if (bp->b_flags&B_ERROR) |
269 | if (bp->b_flags & B_LOCKED) | |
270 | bp->b_flags &= ~B_ERROR; /* try again later */ | |
271 | else | |
272 | bp->b_dev = NODEV; /* no assoc */ | |
663dbc72 | 273 | s = spl6(); |
46387ee3 BJ |
274 | if (bp->b_flags & (B_ERROR|B_INVAL)) { |
275 | /* block has no info ... put at front of most free list */ | |
276 | flist = &bfreelist[BQUEUES-1]; | |
277 | flist->av_forw->av_back = bp; | |
278 | bp->av_forw = flist->av_forw; | |
279 | flist->av_forw = bp; | |
280 | bp->av_back = flist; | |
663dbc72 | 281 | } else { |
46387ee3 BJ |
282 | if (bp->b_flags & B_LOCKED) |
283 | flist = &bfreelist[BQ_LOCKED]; | |
284 | else if (bp->b_flags & B_AGE) | |
285 | flist = &bfreelist[BQ_AGE]; | |
286 | else | |
287 | flist = &bfreelist[BQ_LRU]; | |
288 | flist->av_back->av_forw = bp; | |
289 | bp->av_back = flist->av_back; | |
290 | flist->av_back = bp; | |
291 | bp->av_forw = flist; | |
663dbc72 BJ |
292 | } |
293 | bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); | |
294 | splx(s); | |
295 | } | |
296 | ||
297 | /* | |
298 | * See if the block is associated with some buffer | |
299 | * (mainly to avoid getting hung up on a wait in breada) | |
300 | */ | |
301 | incore(dev, blkno) | |
302 | dev_t dev; | |
303 | daddr_t blkno; | |
304 | { | |
305 | register struct buf *bp; | |
46387ee3 | 306 | register struct buf *dp; |
663dbc72 | 307 | |
ad30fb67 | 308 | dp = BUFHASH(dev, blkno); |
46387ee3 | 309 | for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) |
ad30fb67 | 310 | if (bp->b_blkno == blkno && bp->b_dev == dev && |
46387ee3 | 311 | !(bp->b_flags & B_INVAL)) |
5603d07d | 312 | return (1); |
5603d07d | 313 | return (0); |
663dbc72 BJ |
314 | } |
315 | ||
316 | struct buf * | |
ad30fb67 KM |
317 | baddr(dev, blkno, size) |
318 | dev_t dev; | |
319 | daddr_t blkno; | |
320 | int size; | |
663dbc72 BJ |
321 | { |
322 | ||
323 | if (incore(dev, blkno)) | |
ad30fb67 | 324 | return (bread(dev, blkno, size)); |
663dbc72 BJ |
325 | return (0); |
326 | } | |
327 | ||
328 | /* | |
329 | * Assign a buffer for the given block. If the appropriate | |
330 | * block is already associated, return it; otherwise search | |
331 | * for the oldest non-busy buffer and reassign it. | |
23900030 BJ |
332 | * |
333 | * We use splx here because this routine may be called | |
334 | * on the interrupt stack during a dump, and we don't | |
335 | * want to lower the ipl back to 0. | |
663dbc72 BJ |
336 | */ |
337 | struct buf * | |
ad30fb67 KM |
338 | getblk(dev, blkno, size) |
339 | dev_t dev; | |
340 | daddr_t blkno; | |
341 | int size; | |
663dbc72 | 342 | { |
5603d07d | 343 | register struct buf *bp, *dp, *ep; |
5aa9d5ea RE |
344 | #ifdef DISKMON |
345 | register int i; | |
346 | #endif | |
23900030 | 347 | int s; |
663dbc72 | 348 | |
01659974 BJ |
349 | if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) |
350 | blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); | |
ad30fb67 | 351 | dp = BUFHASH(dev, blkno); |
663dbc72 | 352 | loop: |
46387ee3 | 353 | for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { |
ad30fb67 | 354 | if (bp->b_blkno != blkno || bp->b_dev != dev || |
46387ee3 | 355 | bp->b_flags&B_INVAL) |
663dbc72 | 356 | continue; |
23900030 | 357 | s = spl6(); |
663dbc72 BJ |
358 | if (bp->b_flags&B_BUSY) { |
359 | bp->b_flags |= B_WANTED; | |
360 | sleep((caddr_t)bp, PRIBIO+1); | |
23900030 | 361 | splx(s); |
663dbc72 BJ |
362 | goto loop; |
363 | } | |
23900030 | 364 | splx(s); |
663dbc72 BJ |
365 | #ifdef DISKMON |
366 | i = 0; | |
367 | dp = bp->av_forw; | |
46387ee3 | 368 | while ((dp->b_flags & B_HEAD) == 0) { |
663dbc72 BJ |
369 | i++; |
370 | dp = dp->av_forw; | |
371 | } | |
4c05b581 | 372 | if (i<64) |
663dbc72 BJ |
373 | io_info.bufcount[i]++; |
374 | #endif | |
375 | notavail(bp); | |
ad30fb67 | 376 | brealloc(bp, size); |
663dbc72 BJ |
377 | bp->b_flags |= B_CACHE; |
378 | return(bp); | |
379 | } | |
5603d07d BJ |
380 | if (major(dev) >= nblkdev) |
381 | panic("blkdev"); | |
23900030 | 382 | s = spl6(); |
46387ee3 BJ |
383 | for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--) |
384 | if (ep->av_forw != ep) | |
385 | break; | |
386 | if (ep == bfreelist) { /* no free blocks at all */ | |
387 | ep->b_flags |= B_WANTED; | |
388 | sleep((caddr_t)ep, PRIBIO+1); | |
23900030 | 389 | splx(s); |
663dbc72 BJ |
390 | goto loop; |
391 | } | |
23900030 | 392 | splx(s); |
46387ee3 | 393 | bp = ep->av_forw; |
663dbc72 BJ |
394 | notavail(bp); |
395 | if (bp->b_flags & B_DELWRI) { | |
396 | bp->b_flags |= B_ASYNC; | |
397 | bwrite(bp); | |
398 | goto loop; | |
399 | } | |
15f77b9b | 400 | #ifdef TRACE |
53f9ca20 | 401 | trace(TR_BRELSE, bp->b_dev, bp->b_blkno); |
973ecc4f | 402 | #endif |
663dbc72 | 403 | bp->b_flags = B_BUSY; |
ad30fb67 | 404 | bfree(bp); |
663dbc72 BJ |
405 | bp->b_back->b_forw = bp->b_forw; |
406 | bp->b_forw->b_back = bp->b_back; | |
407 | bp->b_forw = dp->b_forw; | |
408 | bp->b_back = dp; | |
409 | dp->b_forw->b_back = bp; | |
410 | dp->b_forw = bp; | |
411 | bp->b_dev = dev; | |
ad30fb67 KM |
412 | bp->b_blkno = blkno; |
413 | brealloc(bp, size); | |
663dbc72 BJ |
414 | return(bp); |
415 | } | |
416 | ||
417 | /* | |
418 | * get an empty block, | |
419 | * not assigned to any particular device | |
420 | */ | |
421 | struct buf * | |
ad30fb67 KM |
422 | geteblk(size) |
423 | int size; | |
663dbc72 | 424 | { |
436518b9 | 425 | register struct buf *bp, *dp; |
530d0032 | 426 | int s; |
663dbc72 BJ |
427 | |
428 | loop: | |
530d0032 | 429 | s = spl6(); |
46387ee3 BJ |
430 | for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--) |
431 | if (dp->av_forw != dp) | |
432 | break; | |
433 | if (dp == bfreelist) { /* no free blocks */ | |
434 | dp->b_flags |= B_WANTED; | |
435 | sleep((caddr_t)dp, PRIBIO+1); | |
436 | goto loop; | |
663dbc72 | 437 | } |
530d0032 | 438 | splx(s); |
46387ee3 | 439 | bp = dp->av_forw; |
663dbc72 BJ |
440 | notavail(bp); |
441 | if (bp->b_flags & B_DELWRI) { | |
442 | bp->b_flags |= B_ASYNC; | |
443 | bwrite(bp); | |
444 | goto loop; | |
445 | } | |
15f77b9b | 446 | #ifdef TRACE |
53f9ca20 | 447 | trace(TR_BRELSE, bp->b_dev, bp->b_blkno); |
973ecc4f | 448 | #endif |
46387ee3 | 449 | bp->b_flags = B_BUSY|B_INVAL; |
663dbc72 BJ |
450 | bp->b_back->b_forw = bp->b_forw; |
451 | bp->b_forw->b_back = bp->b_back; | |
452 | bp->b_forw = dp->b_forw; | |
453 | bp->b_back = dp; | |
454 | dp->b_forw->b_back = bp; | |
455 | dp->b_forw = bp; | |
456 | bp->b_dev = (dev_t)NODEV; | |
ad30fb67 | 457 | bp->b_bcount = size; |
663dbc72 BJ |
458 | return(bp); |
459 | } | |
460 | ||
ad30fb67 KM |
461 | /* |
462 | * Allocate space associated with a buffer. | |
463 | */ | |
464 | brealloc(bp, size) | |
465 | register struct buf *bp; | |
466 | int size; | |
467 | { | |
468 | daddr_t start, last; | |
469 | register struct buf *ep; | |
470 | struct buf *dp; | |
471 | int s; | |
472 | ||
473 | /* | |
474 | * First need to make sure that all overlaping previous I/O | |
475 | * is dispatched with. | |
476 | */ | |
477 | if (size == bp->b_bcount) | |
478 | return; | |
479 | if (size < bp->b_bcount) { | |
480 | bp->b_bcount = size; | |
481 | return; | |
482 | } | |
483 | start = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); | |
484 | last = bp->b_blkno + (size / DEV_BSIZE) - 1; | |
485 | if (bp->b_bcount == 0) { | |
486 | start++; | |
487 | if (start == last) | |
488 | goto allocit; | |
489 | } | |
490 | dp = BUFHASH(bp->b_dev, bp->b_blkno); | |
491 | loop: | |
492 | (void) spl0(); | |
493 | for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { | |
494 | if (ep->b_blkno < start || ep->b_blkno > last || | |
495 | ep->b_dev != bp->b_dev || ep->b_flags&B_INVAL) | |
496 | continue; | |
497 | s = spl6(); | |
498 | if (ep->b_flags&B_BUSY) { | |
499 | ep->b_flags |= B_WANTED; | |
500 | sleep((caddr_t)ep, PRIBIO+1); | |
501 | splx(s); | |
502 | goto loop; | |
503 | } | |
504 | (void) spl0(); | |
505 | /* | |
506 | * What we would really like to do is kill this | |
507 | * I/O since it is now useless. We cannot do that | |
508 | * so we force it to complete, so that it cannot | |
509 | * over-write our useful data later. | |
510 | */ | |
511 | if (ep->b_flags & B_DELWRI) { | |
512 | notavail(ep); | |
513 | ep->b_flags |= B_ASYNC; | |
514 | bwrite(ep); | |
515 | goto loop; | |
516 | } | |
517 | } | |
518 | allocit: | |
519 | /* | |
520 | * Here the buffer is already available, so all we | |
521 | * need to do is set the size. Someday a better memory | |
522 | * management scheme will be implemented. | |
523 | */ | |
524 | bp->b_bcount = size; | |
525 | } | |
526 | ||
527 | /* | |
528 | * Release space associated with a buffer. | |
529 | */ | |
530 | bfree(bp) | |
531 | struct buf *bp; | |
532 | { | |
533 | /* | |
534 | * Here the buffer does not change, so all we | |
535 | * need to do is set the size. Someday a better memory | |
536 | * management scheme will be implemented. | |
537 | */ | |
538 | bp->b_bcount = 0; | |
539 | } | |
540 | ||
663dbc72 BJ |
541 | /* |
542 | * Wait for I/O completion on the buffer; return errors | |
543 | * to the user. | |
544 | */ | |
545 | iowait(bp) | |
ad30fb67 | 546 | register struct buf *bp; |
663dbc72 | 547 | { |
530d0032 | 548 | int s; |
663dbc72 | 549 | |
530d0032 | 550 | s = spl6(); |
663dbc72 BJ |
551 | while ((bp->b_flags&B_DONE)==0) |
552 | sleep((caddr_t)bp, PRIBIO); | |
530d0032 | 553 | splx(s); |
663dbc72 BJ |
554 | geterror(bp); |
555 | } | |
556 | ||
443c8066 | 557 | #ifdef UNFAST |
663dbc72 BJ |
558 | /* |
559 | * Unlink a buffer from the available list and mark it busy. | |
560 | * (internal interface) | |
561 | */ | |
562 | notavail(bp) | |
563 | register struct buf *bp; | |
564 | { | |
565 | register s; | |
566 | ||
567 | s = spl6(); | |
568 | bp->av_back->av_forw = bp->av_forw; | |
569 | bp->av_forw->av_back = bp->av_back; | |
570 | bp->b_flags |= B_BUSY; | |
571 | splx(s); | |
572 | } | |
573 | #endif | |
574 | ||
575 | /* | |
576 | * Mark I/O complete on a buffer. If the header | |
577 | * indicates a dirty page push completion, the | |
578 | * header is inserted into the ``cleaned'' list | |
579 | * to be processed by the pageout daemon. Otherwise | |
580 | * release it if I/O is asynchronous, and wake | |
581 | * up anyone waiting for it. | |
582 | */ | |
583 | iodone(bp) | |
584 | register struct buf *bp; | |
585 | { | |
586 | register int s; | |
587 | ||
80e7c811 BJ |
588 | if (bp->b_flags & B_DONE) |
589 | panic("dup iodone"); | |
663dbc72 BJ |
590 | bp->b_flags |= B_DONE; |
591 | if (bp->b_flags & B_DIRTY) { | |
592 | if (bp->b_flags & B_ERROR) | |
593 | panic("IO err in push"); | |
594 | s = spl6(); | |
663dbc72 BJ |
595 | bp->av_forw = bclnlist; |
596 | bp->b_bcount = swsize[bp - swbuf]; | |
597 | bp->b_pfcent = swpf[bp - swbuf]; | |
796c66c0 BJ |
598 | cnt.v_pgout++; |
599 | cnt.v_pgpgout += bp->b_bcount / NBPG; | |
663dbc72 BJ |
600 | bclnlist = bp; |
601 | if (bswlist.b_flags & B_WANTED) | |
602 | wakeup((caddr_t)&proc[2]); | |
603 | splx(s); | |
a3ee1d55 | 604 | return; |
663dbc72 BJ |
605 | } |
606 | if (bp->b_flags&B_ASYNC) | |
607 | brelse(bp); | |
608 | else { | |
609 | bp->b_flags &= ~B_WANTED; | |
610 | wakeup((caddr_t)bp); | |
611 | } | |
612 | } | |
613 | ||
614 | /* | |
615 | * Zero the core associated with a buffer. | |
616 | */ | |
617 | clrbuf(bp) | |
ad30fb67 | 618 | struct buf *bp; |
663dbc72 | 619 | { |
ad30fb67 KM |
620 | register int *p; |
621 | register int c; | |
663dbc72 BJ |
622 | |
623 | p = bp->b_un.b_words; | |
ad30fb67 | 624 | c = bp->b_bcount/sizeof(int); |
663dbc72 BJ |
625 | do |
626 | *p++ = 0; | |
627 | while (--c); | |
628 | bp->b_resid = 0; | |
629 | } | |
630 | ||
631 | /* | |
632 | * swap I/O - | |
633 | * | |
634 | * If the flag indicates a dirty page push initiated | |
635 | * by the pageout daemon, we map the page into the i th | |
636 | * virtual page of process 2 (the daemon itself) where i is | |
637 | * the index of the swap header that has been allocated. | |
638 | * We simply initialize the header and queue the I/O but | |
639 | * do not wait for completion. When the I/O completes, | |
640 | * iodone() will link the header to a list of cleaned | |
641 | * pages to be processed by the pageout daemon. | |
642 | */ | |
643 | swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) | |
644 | struct proc *p; | |
645 | swblk_t dblkno; | |
646 | caddr_t addr; | |
647 | int flag, nbytes; | |
648 | dev_t dev; | |
649 | unsigned pfcent; | |
650 | { | |
651 | register struct buf *bp; | |
652 | register int c; | |
653 | int p2dp; | |
654 | register struct pte *dpte, *vpte; | |
530d0032 | 655 | int s; |
663dbc72 | 656 | |
530d0032 | 657 | s = spl6(); |
663dbc72 BJ |
658 | while (bswlist.av_forw == NULL) { |
659 | bswlist.b_flags |= B_WANTED; | |
660 | sleep((caddr_t)&bswlist, PSWP+1); | |
661 | } | |
662 | bp = bswlist.av_forw; | |
663 | bswlist.av_forw = bp->av_forw; | |
530d0032 | 664 | splx(s); |
663dbc72 BJ |
665 | |
666 | bp->b_flags = B_BUSY | B_PHYS | rdflg | flag; | |
667 | if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0) | |
668 | if (rdflg == B_READ) | |
669 | sum.v_pswpin += btoc(nbytes); | |
670 | else | |
671 | sum.v_pswpout += btoc(nbytes); | |
672 | bp->b_proc = p; | |
673 | if (flag & B_DIRTY) { | |
674 | p2dp = ((bp - swbuf) * CLSIZE) * KLMAX; | |
675 | dpte = dptopte(&proc[2], p2dp); | |
676 | vpte = vtopte(p, btop(addr)); | |
677 | for (c = 0; c < nbytes; c += NBPG) { | |
678 | if (vpte->pg_pfnum == 0 || vpte->pg_fod) | |
679 | panic("swap bad pte"); | |
680 | *dpte++ = *vpte++; | |
681 | } | |
682 | bp->b_un.b_addr = (caddr_t)ctob(p2dp); | |
683 | } else | |
684 | bp->b_un.b_addr = addr; | |
685 | while (nbytes > 0) { | |
686 | c = imin(ctob(120), nbytes); | |
687 | bp->b_bcount = c; | |
688 | bp->b_blkno = dblkno; | |
689 | bp->b_dev = dev; | |
d2f87136 BJ |
690 | if (flag & B_DIRTY) { |
691 | swpf[bp - swbuf] = pfcent; | |
692 | swsize[bp - swbuf] = nbytes; | |
693 | } | |
53f9ca20 BJ |
694 | #ifdef TRACE |
695 | trace(TR_SWAPIO, dev, bp->b_blkno); | |
696 | #endif | |
663dbc72 BJ |
697 | (*bdevsw[major(dev)].d_strategy)(bp); |
698 | if (flag & B_DIRTY) { | |
699 | if (c < nbytes) | |
700 | panic("big push"); | |
663dbc72 BJ |
701 | return; |
702 | } | |
530d0032 | 703 | s = spl6(); |
663dbc72 BJ |
704 | while((bp->b_flags&B_DONE)==0) |
705 | sleep((caddr_t)bp, PSWP); | |
530d0032 | 706 | splx(s); |
663dbc72 BJ |
707 | bp->b_un.b_addr += c; |
708 | bp->b_flags &= ~B_DONE; | |
709 | if (bp->b_flags & B_ERROR) { | |
710 | if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE) | |
711 | panic("hard IO err in swap"); | |
712 | swkill(p, (char *)0); | |
713 | } | |
714 | nbytes -= c; | |
715 | dblkno += btoc(c); | |
716 | } | |
530d0032 | 717 | s = spl6(); |
663dbc72 BJ |
718 | bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); |
719 | bp->av_forw = bswlist.av_forw; | |
720 | bswlist.av_forw = bp; | |
721 | if (bswlist.b_flags & B_WANTED) { | |
722 | bswlist.b_flags &= ~B_WANTED; | |
723 | wakeup((caddr_t)&bswlist); | |
724 | wakeup((caddr_t)&proc[2]); | |
725 | } | |
530d0032 | 726 | splx(s); |
663dbc72 BJ |
727 | } |
728 | ||
729 | /* | |
730 | * If rout == 0 then killed on swap error, else | |
731 | * rout is the name of the routine where we ran out of | |
732 | * swap space. | |
733 | */ | |
734 | swkill(p, rout) | |
735 | struct proc *p; | |
736 | char *rout; | |
737 | { | |
444f631c | 738 | char *mesg; |
663dbc72 | 739 | |
444f631c | 740 | printf("pid %d: ", p->p_pid); |
663dbc72 | 741 | if (rout) |
444f631c | 742 | printf(mesg = "killed due to no swap space\n"); |
663dbc72 | 743 | else |
444f631c BJ |
744 | printf(mesg = "killed on swap error\n"); |
745 | uprintf("sorry, pid %d was %s", p->p_pid, mesg); | |
663dbc72 BJ |
746 | /* |
747 | * To be sure no looping (e.g. in vmsched trying to | |
748 | * swap out) mark process locked in core (as though | |
749 | * done by user) after killing it so noone will try | |
750 | * to swap it out. | |
751 | */ | |
a30d2e97 | 752 | psignal(p, SIGKILL); |
663dbc72 BJ |
753 | p->p_flag |= SULOCK; |
754 | } | |
755 | ||
756 | /* | |
757 | * make sure all write-behind blocks | |
758 | * on dev (or NODEV for all) | |
759 | * are flushed out. | |
760 | * (from umount and update) | |
ad30fb67 | 761 | * (and temporarily pagein) |
663dbc72 BJ |
762 | */ |
763 | bflush(dev) | |
764 | dev_t dev; | |
765 | { | |
766 | register struct buf *bp; | |
46387ee3 | 767 | register struct buf *flist; |
530d0032 | 768 | int s; |
663dbc72 BJ |
769 | |
770 | loop: | |
530d0032 | 771 | s = spl6(); |
46387ee3 BJ |
772 | for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++) |
773 | for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { | |
663dbc72 BJ |
774 | if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) { |
775 | bp->b_flags |= B_ASYNC; | |
776 | notavail(bp); | |
777 | bwrite(bp); | |
778 | goto loop; | |
779 | } | |
780 | } | |
530d0032 | 781 | splx(s); |
663dbc72 BJ |
782 | } |
783 | ||
784 | /* | |
785 | * Raw I/O. The arguments are | |
786 | * The strategy routine for the device | |
787 | * A buffer, which will always be a special buffer | |
788 | * header owned exclusively by the device for this purpose | |
789 | * The device number | |
790 | * Read/write flag | |
791 | * Essentially all the work is computing physical addresses and | |
792 | * validating them. | |
793 | * If the user has the proper access privilidges, the process is | |
794 | * marked 'delayed unlock' and the pages involved in the I/O are | |
795 | * faulted and locked. After the completion of the I/O, the above pages | |
796 | * are unlocked. | |
797 | */ | |
798 | physio(strat, bp, dev, rw, mincnt) | |
799 | int (*strat)(); | |
800 | register struct buf *bp; | |
801 | unsigned (*mincnt)(); | |
802 | { | |
803 | register int c; | |
804 | char *a; | |
530d0032 | 805 | int s; |
663dbc72 BJ |
806 | |
807 | if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) { | |
808 | u.u_error = EFAULT; | |
809 | return; | |
810 | } | |
530d0032 | 811 | s = spl6(); |
663dbc72 BJ |
812 | while (bp->b_flags&B_BUSY) { |
813 | bp->b_flags |= B_WANTED; | |
814 | sleep((caddr_t)bp, PRIBIO+1); | |
815 | } | |
ef3b3d5a | 816 | splx(s); |
663dbc72 BJ |
817 | bp->b_error = 0; |
818 | bp->b_proc = u.u_procp; | |
819 | bp->b_un.b_addr = u.u_base; | |
52a593fa | 820 | while (u.u_count != 0) { |
663dbc72 BJ |
821 | bp->b_flags = B_BUSY | B_PHYS | rw; |
822 | bp->b_dev = dev; | |
823 | bp->b_blkno = u.u_offset >> PGSHIFT; | |
824 | bp->b_bcount = u.u_count; | |
825 | (*mincnt)(bp); | |
826 | c = bp->b_bcount; | |
827 | u.u_procp->p_flag |= SPHYSIO; | |
828 | vslock(a = bp->b_un.b_addr, c); | |
829 | (*strat)(bp); | |
81263dba | 830 | (void) spl6(); |
663dbc72 BJ |
831 | while ((bp->b_flags&B_DONE) == 0) |
832 | sleep((caddr_t)bp, PRIBIO); | |
833 | vsunlock(a, c, rw); | |
834 | u.u_procp->p_flag &= ~SPHYSIO; | |
835 | if (bp->b_flags&B_WANTED) | |
836 | wakeup((caddr_t)bp); | |
530d0032 | 837 | splx(s); |
663dbc72 BJ |
838 | bp->b_un.b_addr += c; |
839 | u.u_count -= c; | |
840 | u.u_offset += c; | |
52a593fa BJ |
841 | if (bp->b_flags&B_ERROR) |
842 | break; | |
663dbc72 BJ |
843 | } |
844 | bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS); | |
845 | u.u_count = bp->b_resid; | |
846 | geterror(bp); | |
847 | } | |
848 | ||
849 | /*ARGSUSED*/ | |
850 | unsigned | |
851 | minphys(bp) | |
852 | struct buf *bp; | |
853 | { | |
854 | ||
2ec65c94 BJ |
855 | if (bp->b_bcount > 63 * 1024) |
856 | bp->b_bcount = 63 * 1024; | |
663dbc72 BJ |
857 | } |
858 | ||
ad30fb67 | 859 | |
663dbc72 BJ |
860 | /* |
861 | * Pick up the device's error number and pass it to the user; | |
862 | * if there is an error but the number is 0 set a generalized | |
863 | * code. Actually the latter is always true because devices | |
864 | * don't yet return specific errors. | |
865 | */ | |
866 | geterror(bp) | |
867 | register struct buf *bp; | |
868 | { | |
869 | ||
870 | if (bp->b_flags&B_ERROR) | |
871 | if ((u.u_error = bp->b_error)==0) | |
872 | u.u_error = EIO; | |
873 | } | |
7b8b5a01 RE |
874 | |
875 | /* | |
876 | * Invalidate in core blocks belonging to closed or umounted filesystem | |
877 | * | |
878 | * This is not nicely done at all - the buffer ought to be removed from the | |
879 | * hash chains & have its dev/blkno fields clobbered, but unfortunately we | |
880 | * can't do that here, as it is quite possible that the block is still | |
881 | * being used for i/o. Eventually, all disc drivers should be forced to | |
882 | * have a close routine, which ought ensure that the queue is empty, then | |
883 | * properly flush the queues. Until that happy day, this suffices for | |
884 | * correctness. ... kre | |
885 | */ | |
886 | binval(dev) | |
887 | dev_t dev; | |
888 | { | |
634ebdbe RE |
889 | register struct buf *bp; |
890 | register struct bufhd *hp; | |
891 | #define dp ((struct buf *)hp) | |
7b8b5a01 | 892 | |
634ebdbe RE |
893 | for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) |
894 | for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) | |
895 | if (bp->b_dev == dev) | |
896 | bp->b_flags |= B_INVAL; | |
7b8b5a01 | 897 | } |