vget now locks optionally
[unix-history] / usr / src / sys / nfs / nfs_bio.c
CommitLineData
39d108be 1/*
99315dca
KB
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
39d108be
RM
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
dbf0c423 8 * %sccs.include.redist.c%
39d108be 9 *
cf5ef508 10 * @(#)nfs_bio.c 8.3 (Berkeley) %G%
39d108be
RM
11 */
12
400a1380 13#include <sys/param.h>
5d2b9b6c 14#include <sys/systm.h>
400a1380
KM
15#include <sys/resourcevar.h>
16#include <sys/proc.h>
17#include <sys/buf.h>
18#include <sys/vnode.h>
19#include <sys/trace.h>
20#include <sys/mount.h>
21#include <sys/kernel.h>
5548a02f 22
400a1380 23#include <vm/vm.h>
5548a02f 24
400a1380
KM
25#include <nfs/nfsnode.h>
26#include <nfs/rpcv2.h>
27#include <nfs/nfsv2.h>
28#include <nfs/nfs.h>
29#include <nfs/nfsmount.h>
30#include <nfs/nqnfs.h>
39d108be 31
715a1dcc 32struct buf *incore(), *nfs_getcacheblk();
6d73426c
KM
33extern struct queue_entry nfs_bufq;
34extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
35extern int nfs_numasync;
39d108be
RM
36
37/*
38 * Vnode op for read using bio
39 * Any similarity to readip() is purely coincidental
40 */
f0f1cbaa 41nfs_bioread(vp, uio, ioflag, cred)
39d108be 42 register struct vnode *vp;
170bfd05 43 register struct uio *uio;
39d108be
RM
44 int ioflag;
45 struct ucred *cred;
46{
47 register struct nfsnode *np = VTONFS(vp);
6d73426c
KM
48 register int biosize, diff;
49 struct buf *bp, *rabp;
39d108be 50 struct vattr vattr;
6d73426c 51 struct proc *p;
2c5b44a2 52 struct nfsmount *nmp;
6d73426c
KM
53 daddr_t lbn, bn, rabn;
54 caddr_t baddr;
55 int got_buf, len, nra, error = 0, n, on, not_readin;
39d108be 56
d4e5799e
KM
57#ifdef lint
58 ioflag = ioflag;
59#endif /* lint */
b40809cc 60#ifdef DIAGNOSTIC
39d108be
RM
61 if (uio->uio_rw != UIO_READ)
62 panic("nfs_read mode");
b40809cc 63#endif
39d108be 64 if (uio->uio_resid == 0)
b71430cc 65 return (0);
f0f1cbaa 66 if (uio->uio_offset < 0 && vp->v_type != VDIR)
b71430cc 67 return (EINVAL);
2c5b44a2
KM
68 nmp = VFSTONFS(vp->v_mount);
69 biosize = nmp->nm_rsize;
6d73426c 70 p = uio->uio_procp;
39d108be 71 /*
2c5b44a2
KM
72 * For nfs, cache consistency can only be maintained approximately.
73 * Although RFC1094 does not specify the criteria, the following is
74 * believed to be compatible with the reference port.
75 * For nqnfs, full cache consistency is maintained within the loop.
76 * For nfs:
39d108be
RM
77 * If the file's modify time on the server has changed since the
78 * last read rpc or you have written to the file,
79 * you may have lost data cache consistency with the
80 * server, so flush all of the file's data out of the cache.
f0f1cbaa
KM
81 * Then force a getattr rpc to ensure that you have up to date
82 * attributes.
2c5b44a2
KM
83 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
84 * the ones changing the modify time.
39d108be
RM
85 * NB: This implies that cache data can be read when up to
86 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
87 * attributes this could be forced by setting n_attrstamp to 0 before
9342689a 88 * the VOP_GETATTR() call.
39d108be 89 */
2c5b44a2 90 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
f0f1cbaa 91 if (np->n_flag & NMODIFIED) {
2c5b44a2 92 if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
6d73426c
KM
93 vp->v_type != VREG) {
94 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
95 return (error);
96 }
f0f1cbaa
KM
97 np->n_attrstamp = 0;
98 np->n_direofoffset = 0;
6d73426c 99 if (error = VOP_GETATTR(vp, &vattr, cred, p))
e8540f59 100 return (error);
7e11a0c9 101 np->n_mtime = vattr.va_mtime.ts_sec;
f0f1cbaa 102 } else {
6d73426c 103 if (error = VOP_GETATTR(vp, &vattr, cred, p))
f0f1cbaa 104 return (error);
7e11a0c9 105 if (np->n_mtime != vattr.va_mtime.ts_sec) {
f0f1cbaa 106 np->n_direofoffset = 0;
6d73426c
KM
107 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
108 return (error);
7e11a0c9 109 np->n_mtime = vattr.va_mtime.ts_sec;
f0f1cbaa 110 }
39d108be
RM
111 }
112 }
113 do {
2c5b44a2
KM
114
115 /*
116 * Get a valid lease. If cached data is stale, flush it.
117 */
6d73426c
KM
118 if (nmp->nm_flag & NFSMNT_NQNFS) {
119 if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
120 do {
121 error = nqnfs_getlease(vp, NQL_READ, cred, p);
122 } while (error == NQNFS_EXPIRED);
123 if (error)
2c5b44a2 124 return (error);
6d73426c 125 if (np->n_lrev != np->n_brev ||
bed47dca 126 (np->n_flag & NQNFSNONCACHE) ||
6d73426c 127 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
2c5b44a2 128 if (vp->v_type == VDIR) {
6d73426c
KM
129 np->n_direofoffset = 0;
130 cache_purge(vp);
2c5b44a2 131 }
6d73426c
KM
132 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
133 return (error);
2c5b44a2 134 np->n_brev = np->n_lrev;
6d73426c
KM
135 }
136 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
137 np->n_direofoffset = 0;
138 cache_purge(vp);
139 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
140 return (error);
2c5b44a2
KM
141 }
142 }
143 if (np->n_flag & NQNFSNONCACHE) {
144 switch (vp->v_type) {
145 case VREG:
146 error = nfs_readrpc(vp, uio, cred);
147 break;
148 case VLNK:
149 error = nfs_readlinkrpc(vp, uio, cred);
150 break;
151 case VDIR:
152 error = nfs_readdirrpc(vp, uio, cred);
153 break;
154 };
155 return (error);
156 }
6d73426c 157 baddr = (caddr_t)0;
f0f1cbaa
KM
158 switch (vp->v_type) {
159 case VREG:
e8540f59 160 nfsstats.biocache_reads++;
170bfd05
KM
161 lbn = uio->uio_offset / biosize;
162 on = uio->uio_offset & (biosize-1);
6d73426c
KM
163 bn = lbn * (biosize / DEV_BSIZE);
164 not_readin = 1;
165
166 /*
167 * Start the read ahead(s), as required.
168 */
169 if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
170 lbn == vp->v_lastr + 1) {
171 for (nra = 0; nra < nmp->nm_readahead &&
172 (lbn + 1 + nra) * biosize < np->n_size; nra++) {
173 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
715a1dcc 174 if (!incore(vp, rabn)) {
6d73426c
KM
175 rabp = nfs_getcacheblk(vp, rabn, biosize, p);
176 if (!rabp)
177 return (EINTR);
178 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
179 rabp->b_flags |= (B_READ | B_ASYNC);
180 if (nfs_asyncio(rabp, cred)) {
181 rabp->b_flags |= B_INVAL;
182 brelse(rabp);
183 }
184 }
185 }
186 }
187 }
188
189 /*
190 * If the block is in the cache and has the required data
191 * in a valid region, just copy it out.
192 * Otherwise, get the block and write back/read in,
193 * as required.
194 */
715a1dcc 195 if ((bp = incore(vp, bn)) &&
6d73426c
KM
196 (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
197 (B_BUSY | B_WRITEINPROG))
198 got_buf = 0;
199 else {
200again:
201 bp = nfs_getcacheblk(vp, bn, biosize, p);
202 if (!bp)
203 return (EINTR);
204 got_buf = 1;
205 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
206 bp->b_flags |= B_READ;
207 not_readin = 0;
208 if (error = nfs_doio(bp, cred, p)) {
209 brelse(bp);
210 return (error);
211 }
212 }
213 }
65ae7af4 214 n = min((unsigned)(biosize - on), uio->uio_resid);
39d108be 215 diff = np->n_size - uio->uio_offset;
39d108be
RM
216 if (diff < n)
217 n = diff;
6d73426c
KM
218 if (not_readin && n > 0) {
219 if (on < bp->b_validoff || (on + n) > bp->b_validend) {
220 if (!got_buf) {
221 bp = nfs_getcacheblk(vp, bn, biosize, p);
222 if (!bp)
223 return (EINTR);
224 got_buf = 1;
225 }
2c5b44a2
KM
226 bp->b_flags |= B_INVAL;
227 if (bp->b_dirtyend > 0) {
6d73426c
KM
228 if ((bp->b_flags & B_DELWRI) == 0)
229 panic("nfsbioread");
230 if (VOP_BWRITE(bp) == EINTR)
231 return (EINTR);
2c5b44a2 232 } else
6d73426c 233 brelse(bp);
2c5b44a2
KM
234 goto again;
235 }
2c5b44a2 236 }
d1a28114 237 vp->v_lastr = lbn;
6d73426c
KM
238 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
239 if (diff < n)
240 n = diff;
f0f1cbaa
KM
241 break;
242 case VLNK:
243 nfsstats.biocache_readlinks++;
6d73426c
KM
244 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
245 if (!bp)
246 return (EINTR);
247 if ((bp->b_flags & B_DONE) == 0) {
248 bp->b_flags |= B_READ;
249 if (error = nfs_doio(bp, cred, p)) {
250 brelse(bp);
251 return (error);
252 }
253 }
65ae7af4 254 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
6d73426c
KM
255 got_buf = 1;
256 on = 0;
f0f1cbaa
KM
257 break;
258 case VDIR:
259 nfsstats.biocache_readdirs++;
6d73426c
KM
260 bn = (daddr_t)uio->uio_offset;
261 bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p);
262 if (!bp)
263 return (EINTR);
264 if ((bp->b_flags & B_DONE) == 0) {
265 bp->b_flags |= B_READ;
266 if (error = nfs_doio(bp, cred, p)) {
2c5b44a2
KM
267 brelse(bp);
268 return (error);
269 }
6d73426c
KM
270 }
271
272 /*
273 * If not eof and read aheads are enabled, start one.
274 * (You need the current block first, so that you have the
275 * directory offset cookie of the next block.
276 */
277 rabn = bp->b_blkno;
278 if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
279 rabn != 0 && rabn != np->n_direofoffset &&
715a1dcc 280 !incore(vp, rabn)) {
6d73426c
KM
281 rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p);
282 if (rabp) {
283 if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) {
284 rabp->b_flags |= (B_READ | B_ASYNC);
285 if (nfs_asyncio(rabp, cred)) {
286 rabp->b_flags |= B_INVAL;
287 brelse(rabp);
2c5b44a2 288 }
6d73426c 289 }
2c5b44a2 290 }
2c5b44a2 291 }
6d73426c
KM
292 on = 0;
293 n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
294 got_buf = 1;
295 break;
296 };
297
298 if (n > 0) {
299 if (!baddr)
6e36b147 300 baddr = bp->b_data;
6d73426c 301 error = uiomove(baddr + on, (int)n, uio);
2c5b44a2 302 }
f0f1cbaa
KM
303 switch (vp->v_type) {
304 case VREG:
6d73426c 305 if (n + on == biosize || uio->uio_offset == np->n_size)
39d108be 306 bp->b_flags |= B_AGE;
f0f1cbaa
KM
307 break;
308 case VLNK:
309 n = 0;
310 break;
311 case VDIR:
312 uio->uio_offset = bp->b_blkno;
313 break;
314 };
6d73426c
KM
315 if (got_buf)
316 brelse(bp);
317 } while (error == 0 && uio->uio_resid > 0 && n > 0);
39d108be
RM
318 return (error);
319}
320
321/*
322 * Vnode op for write using bio
323 */
da692166 324nfs_write(ap)
e19a2ad1
KM
325 struct vop_write_args /* {
326 struct vnode *a_vp;
327 struct uio *a_uio;
328 int a_ioflag;
329 struct ucred *a_cred;
330 } */ *ap;
39d108be 331{
170bfd05 332 register int biosize;
e19a2ad1
KM
333 register struct uio *uio = ap->a_uio;
334 struct proc *p = uio->uio_procp;
335 register struct vnode *vp = ap->a_vp;
336 struct nfsnode *np = VTONFS(vp);
337 register struct ucred *cred = ap->a_cred;
338 int ioflag = ap->a_ioflag;
39d108be 339 struct buf *bp;
f0f1cbaa 340 struct vattr vattr;
2c5b44a2 341 struct nfsmount *nmp;
39d108be 342 daddr_t lbn, bn;
3c15394b 343 int n, on, error = 0;
39d108be 344
b40809cc 345#ifdef DIAGNOSTIC
e19a2ad1 346 if (uio->uio_rw != UIO_WRITE)
f0f1cbaa 347 panic("nfs_write mode");
e19a2ad1 348 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
b40809cc
KM
349 panic("nfs_write proc");
350#endif
e19a2ad1 351 if (vp->v_type != VREG)
f0f1cbaa 352 return (EIO);
9b61ab4a
KM
353 if (np->n_flag & NWRITEERR) {
354 np->n_flag &= ~NWRITEERR;
355 return (np->n_error);
356 }
e19a2ad1 357 if (ioflag & (IO_APPEND | IO_SYNC)) {
225498c1 358 if (np->n_flag & NMODIFIED) {
41f343df 359 np->n_attrstamp = 0;
6d73426c
KM
360 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
361 return (error);
225498c1 362 }
e19a2ad1 363 if (ioflag & IO_APPEND) {
225498c1 364 np->n_attrstamp = 0;
e19a2ad1 365 if (error = VOP_GETATTR(vp, &vattr, cred, p))
225498c1 366 return (error);
e19a2ad1 367 uio->uio_offset = np->n_size;
225498c1
KM
368 }
369 }
e19a2ad1
KM
370 nmp = VFSTONFS(vp->v_mount);
371 if (uio->uio_offset < 0)
b71430cc 372 return (EINVAL);
e19a2ad1 373 if (uio->uio_resid == 0)
b71430cc 374 return (0);
39d108be
RM
375 /*
376 * Maybe this should be above the vnode op call, but so long as
377 * file servers have no limits, i don't think it matters
378 */
e19a2ad1 379 if (p && uio->uio_offset + uio->uio_resid >
4547a923
MK
380 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
381 psignal(p, SIGXFSZ);
b71430cc 382 return (EFBIG);
39d108be 383 }
170bfd05
KM
384 /*
385 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
386 * will be the same size within a filesystem. nfs_writerpc will
387 * still use nm_wsize when sizing the rpc's.
388 */
2c5b44a2 389 biosize = nmp->nm_rsize;
39d108be 390 do {
2c5b44a2
KM
391
392 /*
393 * Check for a valid write lease.
394 * If non-cachable, just do the rpc
395 */
396 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
e19a2ad1 397 NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
2c5b44a2 398 do {
e19a2ad1 399 error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
2c5b44a2
KM
400 } while (error == NQNFS_EXPIRED);
401 if (error)
402 return (error);
e19a2ad1 403 if (np->n_lrev != np->n_brev ||
2c5b44a2 404 (np->n_flag & NQNFSNONCACHE)) {
6d73426c
KM
405 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
406 return (error);
2c5b44a2
KM
407 np->n_brev = np->n_lrev;
408 }
409 }
410 if (np->n_flag & NQNFSNONCACHE)
6d73426c 411 return (nfs_writerpc(vp, uio, cred, ioflag));
e8540f59 412 nfsstats.biocache_writes++;
e19a2ad1
KM
413 lbn = uio->uio_offset / biosize;
414 on = uio->uio_offset & (biosize-1);
65ae7af4 415 n = min((unsigned)(biosize - on), uio->uio_resid);
2c5b44a2 416 bn = lbn * (biosize / DEV_BSIZE);
141671b8 417again:
6d73426c
KM
418 bp = nfs_getcacheblk(vp, bn, biosize, p);
419 if (!bp)
420 return (EINTR);
39d108be 421 if (bp->b_wcred == NOCRED) {
e19a2ad1
KM
422 crhold(cred);
423 bp->b_wcred = cred;
39d108be 424 }
6d73426c
KM
425 np->n_flag |= NMODIFIED;
426 if (uio->uio_offset + n > np->n_size) {
427 np->n_size = uio->uio_offset + n;
428 vnode_pager_setsize(vp, (u_long)np->n_size);
429 }
2c5b44a2
KM
430
431 /*
432 * If the new write will leave a contiguous dirty
433 * area, just update the b_dirtyoff and b_dirtyend,
434 * otherwise force a write rpc of the old dirty area.
435 */
436 if (bp->b_dirtyend > 0 &&
437 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
438 bp->b_proc = p;
6d73426c
KM
439 if (VOP_BWRITE(bp) == EINTR)
440 return (EINTR);
2c5b44a2
KM
441 goto again;
442 }
443
444 /*
445 * Check for valid write lease and get one as required.
446 * In case getblk() and/or bwrite() delayed us.
447 */
448 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
e19a2ad1 449 NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
2c5b44a2 450 do {
e19a2ad1 451 error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
2c5b44a2
KM
452 } while (error == NQNFS_EXPIRED);
453 if (error) {
454 brelse(bp);
455 return (error);
456 }
e19a2ad1 457 if (np->n_lrev != np->n_brev ||
2c5b44a2 458 (np->n_flag & NQNFSNONCACHE)) {
41f343df 459 brelse(bp);
6d73426c
KM
460 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
461 return (error);
2c5b44a2 462 np->n_brev = np->n_lrev;
41f343df 463 goto again;
39d108be 464 }
39d108be 465 }
6e36b147 466 if (error = uiomove((char *)bp->b_data + on, n, uio)) {
6d73426c 467 bp->b_flags |= B_ERROR;
141671b8 468 brelse(bp);
b71430cc 469 return (error);
141671b8 470 }
2c5b44a2 471 if (bp->b_dirtyend > 0) {
65ae7af4 472 bp->b_dirtyoff = min(on, bp->b_dirtyoff);
6d73426c 473 bp->b_dirtyend = max((on + n), bp->b_dirtyend);
2c5b44a2
KM
474 } else {
475 bp->b_dirtyoff = on;
6d73426c 476 bp->b_dirtyend = on + n;
2c5b44a2 477 }
6d73426c 478#ifndef notdef
2c5b44a2
KM
479 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
480 bp->b_validoff > bp->b_dirtyend) {
481 bp->b_validoff = bp->b_dirtyoff;
482 bp->b_validend = bp->b_dirtyend;
483 } else {
65ae7af4
JSP
484 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
485 bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
2c5b44a2 486 }
6d73426c
KM
487#else
488 bp->b_validoff = bp->b_dirtyoff;
489 bp->b_validend = bp->b_dirtyend;
490#endif
491 if (ioflag & IO_APPEND)
492 bp->b_flags |= B_APPENDWRITE;
2c5b44a2
KM
493
494 /*
495 * If the lease is non-cachable or IO_SYNC do bwrite().
496 */
e19a2ad1 497 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
2c5b44a2 498 bp->b_proc = p;
6d73426c
KM
499 if (error = VOP_BWRITE(bp))
500 return (error);
501 } else if ((n + on) == biosize &&
502 (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
f0f1cbaa 503 bp->b_proc = (struct proc *)0;
39d108be 504 bawrite(bp);
6d73426c 505 } else
39d108be 506 bdwrite(bp);
6d73426c
KM
507 } while (uio->uio_resid > 0 && n > 0);
508 return (0);
509}
510
511/*
512 * Get an nfs cache block.
513 * Allocate a new one if the block isn't currently in the cache
514 * and return the block marked busy. If the calling process is
515 * interrupted by a signal for an interruptible mount point, return
516 * NULL.
517 */
518struct buf *
519nfs_getcacheblk(vp, bn, size, p)
520 struct vnode *vp;
521 daddr_t bn;
522 int size;
523 struct proc *p;
524{
525 register struct buf *bp;
526 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
527
528 if (nmp->nm_flag & NFSMNT_INT) {
715a1dcc 529 bp = getblk(vp, bn, size, PCATCH, 0);
6d73426c
KM
530 while (bp == (struct buf *)0) {
531 if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
532 return ((struct buf *)0);
715a1dcc 533 bp = getblk(vp, bn, size, 0, 2 * hz);
39d108be 534 }
6d73426c 535 } else
715a1dcc 536 bp = getblk(vp, bn, size, 0, 0);
6d73426c
KM
537 return (bp);
538}
539
540/*
541 * Flush and invalidate all dirty buffers. If another process is already
542 * doing the flush, just wait for completion.
543 */
544nfs_vinvalbuf(vp, flags, cred, p, intrflg)
545 struct vnode *vp;
546 int flags;
547 struct ucred *cred;
548 struct proc *p;
549 int intrflg;
550{
551 register struct nfsnode *np = VTONFS(vp);
552 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
553 int error = 0, slpflag, slptimeo;
554
555 if ((nmp->nm_flag & NFSMNT_INT) == 0)
556 intrflg = 0;
557 if (intrflg) {
558 slpflag = PCATCH;
559 slptimeo = 2 * hz;
560 } else {
561 slpflag = 0;
562 slptimeo = 0;
563 }
564 /*
565 * First wait for any other process doing a flush to complete.
566 */
567 while (np->n_flag & NFLUSHINPROG) {
568 np->n_flag |= NFLUSHWANT;
569 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
570 slptimeo);
571 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
572 return (EINTR);
573 }
574
575 /*
576 * Now, flush as required.
577 */
578 np->n_flag |= NFLUSHINPROG;
715a1dcc 579 error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
6d73426c
KM
580 while (error) {
581 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
582 np->n_flag &= ~NFLUSHINPROG;
583 if (np->n_flag & NFLUSHWANT) {
584 np->n_flag &= ~NFLUSHWANT;
585 wakeup((caddr_t)&np->n_flag);
586 }
587 return (EINTR);
588 }
715a1dcc 589 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
6d73426c
KM
590 }
591 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
592 if (np->n_flag & NFLUSHWANT) {
593 np->n_flag &= ~NFLUSHWANT;
594 wakeup((caddr_t)&np->n_flag);
595 }
596 return (0);
597}
598
599/*
600 * Initiate asynchronous I/O. Return an error if no nfsiods are available.
601 * This is mainly to avoid queueing async I/O requests when the nfsiods
602 * are all hung on a dead server.
603 */
604nfs_asyncio(bp, cred)
605 register struct buf *bp;
606 struct ucred *cred;
607{
608 register int i;
609
610 if (nfs_numasync == 0)
611 return (EIO);
612 for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
613 if (nfs_iodwant[i]) {
614 if (bp->b_flags & B_READ) {
615 if (bp->b_rcred == NOCRED && cred != NOCRED) {
616 crhold(cred);
617 bp->b_rcred = cred;
618 }
619 } else {
620 if (bp->b_wcred == NOCRED && cred != NOCRED) {
621 crhold(cred);
622 bp->b_wcred = cred;
623 }
624 }
625
626 queue_enter_tail(&nfs_bufq, bp, struct buf *, b_freelist);
627 nfs_iodwant[i] = (struct proc *)0;
628 wakeup((caddr_t)&nfs_iodwant[i]);
629 return (0);
630 }
631 return (EIO);
632}
633
634/*
635 * Do an I/O operation to/from a cache block. This may be called
636 * synchronously or from an nfsiod.
637 */
638int
639nfs_doio(bp, cr, p)
640 register struct buf *bp;
641 struct cred *cr;
642 struct proc *p;
643{
644 register struct uio *uiop;
645 register struct vnode *vp;
646 struct nfsnode *np;
647 struct nfsmount *nmp;
648 int error, diff, len;
649 struct uio uio;
650 struct iovec io;
651
652 vp = bp->b_vp;
653 np = VTONFS(vp);
654 nmp = VFSTONFS(vp->v_mount);
655 uiop = &uio;
656 uiop->uio_iov = &io;
657 uiop->uio_iovcnt = 1;
658 uiop->uio_segflg = UIO_SYSSPACE;
659 uiop->uio_procp = p;
660
661 /*
662 * Historically, paging was done with physio, but no more.
663 */
664 if (bp->b_flags & B_PHYS)
665 panic("doio phys");
666 if (bp->b_flags & B_READ) {
667 io.iov_len = uiop->uio_resid = bp->b_bcount;
6e36b147 668 io.iov_base = bp->b_data;
6d73426c
KM
669 uiop->uio_rw = UIO_READ;
670 switch (vp->v_type) {
671 case VREG:
672 uiop->uio_offset = bp->b_blkno * DEV_BSIZE;
673 nfsstats.read_bios++;
674 error = nfs_readrpc(vp, uiop, cr);
675 if (!error) {
676 bp->b_validoff = 0;
677 if (uiop->uio_resid) {
678 /*
679 * If len > 0, there is a hole in the file and
680 * no writes after the hole have been pushed to
681 * the server yet.
682 * Just zero fill the rest of the valid area.
683 */
684 diff = bp->b_bcount - uiop->uio_resid;
685 len = np->n_size - (bp->b_blkno * DEV_BSIZE
686 + diff);
687 if (len > 0) {
688 len = min(len, uiop->uio_resid);
6e36b147 689 bzero((char *)bp->b_data + diff, len);
6d73426c
KM
690 bp->b_validend = diff + len;
691 } else
692 bp->b_validend = diff;
693 } else
694 bp->b_validend = bp->b_bcount;
695 }
696 if (p && (vp->v_flag & VTEXT) &&
697 (((nmp->nm_flag & NFSMNT_NQNFS) &&
698 np->n_lrev != np->n_brev) ||
699 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
700 np->n_mtime != np->n_vattr.va_mtime.ts_sec))) {
701 uprintf("Process killed due to text file modification\n");
702 psignal(p, SIGKILL);
cf5ef508 703 p->p_flag |= P_NOSWAP;
6d73426c
KM
704 }
705 break;
706 case VLNK:
707 uiop->uio_offset = 0;
708 nfsstats.readlink_bios++;
709 error = nfs_readlinkrpc(vp, uiop, cr);
710 break;
711 case VDIR:
712 uiop->uio_offset = bp->b_lblkno;
713 nfsstats.readdir_bios++;
714 if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS)
715 error = nfs_readdirlookrpc(vp, uiop, cr);
716 else
717 error = nfs_readdirrpc(vp, uiop, cr);
718 /*
719 * Save offset cookie in b_blkno.
720 */
721 bp->b_blkno = uiop->uio_offset;
722 break;
723 };
724 if (error) {
725 bp->b_flags |= B_ERROR;
726 bp->b_error = error;
727 }
728 } else {
729 io.iov_len = uiop->uio_resid = bp->b_dirtyend
730 - bp->b_dirtyoff;
731 uiop->uio_offset = (bp->b_blkno * DEV_BSIZE)
732 + bp->b_dirtyoff;
6e36b147 733 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
6d73426c
KM
734 uiop->uio_rw = UIO_WRITE;
735 nfsstats.write_bios++;
736 if (bp->b_flags & B_APPENDWRITE)
737 error = nfs_writerpc(vp, uiop, cr, IO_APPEND);
738 else
739 error = nfs_writerpc(vp, uiop, cr, 0);
740 bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE);
741
742 /*
743 * For an interrupted write, the buffer is still valid and the
744 * write hasn't been pushed to the server yet, so we can't set
745 * B_ERROR and report the interruption by setting B_EINTR. For
746 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
747 * is essentially a noop.
748 */
749 if (error == EINTR) {
750 bp->b_flags &= ~B_INVAL;
751 bp->b_flags |= B_DELWRI;
752
753 /*
754 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
755 * buffer to the clean list, we have to reassign it back to the
756 * dirty one. Ugh.
757 */
758 if (bp->b_flags & B_ASYNC)
759 reassignbuf(bp, vp);
760 else
761 bp->b_flags |= B_EINTR;
762 } else {
763 if (error) {
764 bp->b_flags |= B_ERROR;
765 bp->b_error = np->n_error = error;
766 np->n_flag |= NWRITEERR;
767 }
768 bp->b_dirtyoff = bp->b_dirtyend = 0;
769 }
770 }
771 bp->b_resid = uiop->uio_resid;
772 biodone(bp);
39d108be
RM
773 return (error);
774}