clean up includes; nfs_netaddr_match goes to vfs_addr.c
[unix-history] / usr / src / sys / nfs / nfs_bio.c
CommitLineData
39d108be
RM
1/*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
dbf0c423 8 * %sccs.include.redist.c%
39d108be 9 *
e19a2ad1 10 * @(#)nfs_bio.c 7.28 (Berkeley) %G%
39d108be
RM
11 */
12
400a1380
KM
13#include <sys/param.h>
14#include <sys/resourcevar.h>
15#include <sys/proc.h>
16#include <sys/buf.h>
17#include <sys/vnode.h>
18#include <sys/trace.h>
19#include <sys/mount.h>
20#include <sys/kernel.h>
21#include <machine/endian.h>
22#include <vm/vm.h>
23#include <nfs/nfsnode.h>
24#include <nfs/rpcv2.h>
25#include <nfs/nfsv2.h>
26#include <nfs/nfs.h>
27#include <nfs/nfsmount.h>
28#include <nfs/nqnfs.h>
39d108be
RM
29
30/* True and false, how exciting */
31#define TRUE 1
32#define FALSE 0
33
34/*
35 * Vnode op for read using bio
36 * Any similarity to readip() is purely coincidental
37 */
f0f1cbaa 38nfs_bioread(vp, uio, ioflag, cred)
39d108be 39 register struct vnode *vp;
170bfd05 40 register struct uio *uio;
39d108be
RM
41 int ioflag;
42 struct ucred *cred;
43{
9342689a 44 USES_VOP_GETATTR;
39d108be 45 register struct nfsnode *np = VTONFS(vp);
170bfd05 46 register int biosize;
39d108be
RM
47 struct buf *bp;
48 struct vattr vattr;
2c5b44a2
KM
49 struct nfsmount *nmp;
50 daddr_t lbn, bn, rablock[NFS_MAXRAHEAD];
51 int rasize[NFS_MAXRAHEAD], nra, diff, error = 0;
52 int n, on;
39d108be 53
d4e5799e
KM
54#ifdef lint
55 ioflag = ioflag;
56#endif /* lint */
b40809cc 57#ifdef DIAGNOSTIC
39d108be
RM
58 if (uio->uio_rw != UIO_READ)
59 panic("nfs_read mode");
b40809cc 60#endif
39d108be 61 if (uio->uio_resid == 0)
b71430cc 62 return (0);
f0f1cbaa 63 if (uio->uio_offset < 0 && vp->v_type != VDIR)
b71430cc 64 return (EINVAL);
2c5b44a2
KM
65 nmp = VFSTONFS(vp->v_mount);
66 biosize = nmp->nm_rsize;
39d108be 67 /*
2c5b44a2
KM
68 * For nfs, cache consistency can only be maintained approximately.
69 * Although RFC1094 does not specify the criteria, the following is
70 * believed to be compatible with the reference port.
71 * For nqnfs, full cache consistency is maintained within the loop.
72 * For nfs:
39d108be
RM
73 * If the file's modify time on the server has changed since the
74 * last read rpc or you have written to the file,
75 * you may have lost data cache consistency with the
76 * server, so flush all of the file's data out of the cache.
f0f1cbaa
KM
77 * Then force a getattr rpc to ensure that you have up to date
78 * attributes.
2c5b44a2
KM
79 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
80 * the ones changing the modify time.
39d108be
RM
81 * NB: This implies that cache data can be read when up to
82 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
83 * attributes this could be forced by setting n_attrstamp to 0 before
9342689a 84 * the VOP_GETATTR() call.
39d108be 85 */
2c5b44a2 86 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
f0f1cbaa
KM
87 if (np->n_flag & NMODIFIED) {
88 np->n_flag &= ~NMODIFIED;
2c5b44a2
KM
89 if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
90 vp->v_type != VREG)
e19a2ad1 91 vinvalbuf(vp, TRUE, cred, uio->uio_procp);
f0f1cbaa
KM
92 np->n_attrstamp = 0;
93 np->n_direofoffset = 0;
9342689a 94 if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp))
e8540f59 95 return (error);
7e11a0c9 96 np->n_mtime = vattr.va_mtime.ts_sec;
f0f1cbaa 97 } else {
9342689a 98 if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp))
f0f1cbaa 99 return (error);
7e11a0c9 100 if (np->n_mtime != vattr.va_mtime.ts_sec) {
f0f1cbaa 101 np->n_direofoffset = 0;
e19a2ad1 102 vinvalbuf(vp, TRUE, cred, uio->uio_procp);
7e11a0c9 103 np->n_mtime = vattr.va_mtime.ts_sec;
f0f1cbaa 104 }
39d108be
RM
105 }
106 }
107 do {
2c5b44a2
KM
108
109 /*
110 * Get a valid lease. If cached data is stale, flush it.
111 */
112 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
113 NQNFS_CKINVALID(vp, np, NQL_READ)) {
114 do {
115 error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp);
116 } while (error == NQNFS_EXPIRED);
117 if (error)
118 return (error);
e19a2ad1 119 if (np->n_lrev != np->n_brev ||
2c5b44a2
KM
120 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
121 if (vp->v_type == VDIR) {
122 np->n_direofoffset = 0;
123 cache_purge(vp);
124 }
125 np->n_flag &= ~NMODIFIED;
e19a2ad1 126 vinvalbuf(vp, TRUE, cred, uio->uio_procp);
2c5b44a2
KM
127 np->n_brev = np->n_lrev;
128 }
129 }
130 if (np->n_flag & NQNFSNONCACHE) {
131 switch (vp->v_type) {
132 case VREG:
133 error = nfs_readrpc(vp, uio, cred);
134 break;
135 case VLNK:
136 error = nfs_readlinkrpc(vp, uio, cred);
137 break;
138 case VDIR:
139 error = nfs_readdirrpc(vp, uio, cred);
140 break;
141 };
142 return (error);
143 }
f0f1cbaa
KM
144 switch (vp->v_type) {
145 case VREG:
e8540f59 146 nfsstats.biocache_reads++;
170bfd05
KM
147 lbn = uio->uio_offset / biosize;
148 on = uio->uio_offset & (biosize-1);
149 n = MIN((unsigned)(biosize - on), uio->uio_resid);
39d108be
RM
150 diff = np->n_size - uio->uio_offset;
151 if (diff <= 0)
b71430cc 152 return (error);
39d108be
RM
153 if (diff < n)
154 n = diff;
170bfd05 155 bn = lbn*(biosize/DEV_BSIZE);
2c5b44a2
KM
156 for (nra = 0; nra < nmp->nm_readahead &&
157 (lbn + 1 + nra) * biosize < np->n_size; nra++) {
158 rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
159 rasize[nra] = biosize;
160 }
161again:
162 if (nra > 0 && lbn >= vp->v_lastr)
163 error = breadn(vp, bn, biosize, rablock, rasize, nra,
39d108be
RM
164 cred, &bp);
165 else
170bfd05 166 error = bread(vp, bn, biosize, cred, &bp);
2c5b44a2
KM
167 if (bp->b_validend > 0) {
168 if (on < bp->b_validoff || (on+n) > bp->b_validend) {
169 bp->b_flags |= B_INVAL;
170 if (bp->b_dirtyend > 0) {
171 if ((bp->b_flags & B_DELWRI) == 0)
172 panic("nfsbioread");
173 (void) bwrite(bp);
174 } else
175 brelse(bp);
176 goto again;
177 }
178 } else {
179 bp->b_validoff = 0;
180 bp->b_validend = biosize - bp->b_resid;
181 }
d1a28114 182 vp->v_lastr = lbn;
39d108be 183 if (bp->b_resid) {
170bfd05
KM
184 diff = (on >= (biosize-bp->b_resid)) ? 0 :
185 (biosize-bp->b_resid-on);
f0f1cbaa 186 n = MIN(n, diff);
39d108be 187 }
f0f1cbaa
KM
188 break;
189 case VLNK:
190 nfsstats.biocache_readlinks++;
191 on = 0;
192 error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp);
193 n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
194 break;
195 case VDIR:
196 nfsstats.biocache_readdirs++;
197 on = 0;
b40809cc
KM
198 error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp);
199 n = MIN(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
f0f1cbaa
KM
200 break;
201 };
202 if (error) {
203 brelse(bp);
204 return (error);
205 }
2c5b44a2
KM
206
207 /*
208 * For nqnfs:
209 * Must check for valid lease, since it may have expired while in
210 * bread(). If expired, get a lease.
211 * If data is stale, flush and try again.
212 * nb: If a read rpc is done by bread() or breada() and there is
213 * no valid lease, a get_lease request will be piggy backed.
214 */
215 if (nmp->nm_flag & NFSMNT_NQNFS) {
216 if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
217 do {
218 error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp);
219 } while (error == NQNFS_EXPIRED);
220 if (error) {
221 brelse(bp);
222 return (error);
223 }
224 if ((np->n_flag & NQNFSNONCACHE) ||
e19a2ad1 225 np->n_lrev != np->n_brev ||
2c5b44a2
KM
226 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
227 if (vp->v_type == VDIR) {
228 np->n_direofoffset = 0;
229 cache_purge(vp);
230 }
231 brelse(bp);
232 np->n_flag &= ~NMODIFIED;
e19a2ad1 233 vinvalbuf(vp, TRUE, cred, uio->uio_procp);
2c5b44a2
KM
234 np->n_brev = np->n_lrev;
235 continue;
236 }
237 } else if ((np->n_flag & NQNFSNONCACHE) ||
238 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
239 np->n_direofoffset = 0;
240 brelse(bp);
241 np->n_flag &= ~NMODIFIED;
e19a2ad1 242 vinvalbuf(vp, TRUE, cred, uio->uio_procp);
2c5b44a2
KM
243 np->n_brev = np->n_lrev;
244 continue;
245 }
246 }
f0f1cbaa
KM
247 if (n > 0)
248 error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
249 switch (vp->v_type) {
250 case VREG:
170bfd05 251 if (n+on == biosize || uio->uio_offset == np->n_size)
39d108be 252 bp->b_flags |= B_AGE;
f0f1cbaa
KM
253 break;
254 case VLNK:
255 n = 0;
256 break;
257 case VDIR:
258 uio->uio_offset = bp->b_blkno;
259 break;
260 };
261 brelse(bp);
39d108be 262 } while (error == 0 && uio->uio_resid > 0 && n != 0);
39d108be
RM
263 return (error);
264}
265
266/*
267 * Vnode op for write using bio
268 */
9342689a 269nfs_write (ap)
e19a2ad1
KM
270 struct vop_write_args /* {
271 struct vnode *a_vp;
272 struct uio *a_uio;
273 int a_ioflag;
274 struct ucred *a_cred;
275 } */ *ap;
39d108be 276{
9342689a 277 USES_VOP_GETATTR;
170bfd05 278 register int biosize;
e19a2ad1
KM
279 register struct uio *uio = ap->a_uio;
280 struct proc *p = uio->uio_procp;
281 register struct vnode *vp = ap->a_vp;
282 struct nfsnode *np = VTONFS(vp);
283 register struct ucred *cred = ap->a_cred;
284 int ioflag = ap->a_ioflag;
39d108be 285 struct buf *bp;
f0f1cbaa 286 struct vattr vattr;
2c5b44a2 287 struct nfsmount *nmp;
39d108be 288 daddr_t lbn, bn;
3c15394b 289 int n, on, error = 0;
39d108be 290
b40809cc 291#ifdef DIAGNOSTIC
e19a2ad1 292 if (uio->uio_rw != UIO_WRITE)
f0f1cbaa 293 panic("nfs_write mode");
e19a2ad1 294 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
b40809cc
KM
295 panic("nfs_write proc");
296#endif
e19a2ad1 297 if (vp->v_type != VREG)
f0f1cbaa 298 return (EIO);
9b61ab4a
KM
299 if (np->n_flag & NWRITEERR) {
300 np->n_flag &= ~NWRITEERR;
301 return (np->n_error);
302 }
e19a2ad1 303 if (ioflag & (IO_APPEND | IO_SYNC)) {
225498c1
KM
304 if (np->n_flag & NMODIFIED) {
305 np->n_flag &= ~NMODIFIED;
e19a2ad1 306 vinvalbuf(vp, TRUE, cred, p);
225498c1 307 }
e19a2ad1 308 if (ioflag & IO_APPEND) {
225498c1 309 np->n_attrstamp = 0;
e19a2ad1 310 if (error = VOP_GETATTR(vp, &vattr, cred, p))
225498c1 311 return (error);
e19a2ad1 312 uio->uio_offset = np->n_size;
225498c1
KM
313 }
314 }
e19a2ad1
KM
315 nmp = VFSTONFS(vp->v_mount);
316 if (uio->uio_offset < 0)
b71430cc 317 return (EINVAL);
e19a2ad1 318 if (uio->uio_resid == 0)
b71430cc 319 return (0);
39d108be
RM
320 /*
321 * Maybe this should be above the vnode op call, but so long as
322 * file servers have no limits, i don't think it matters
323 */
e19a2ad1 324 if (p && uio->uio_offset + uio->uio_resid >
4547a923
MK
325 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
326 psignal(p, SIGXFSZ);
b71430cc 327 return (EFBIG);
39d108be 328 }
170bfd05
KM
329 /*
330 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
331 * will be the same size within a filesystem. nfs_writerpc will
332 * still use nm_wsize when sizing the rpc's.
333 */
2c5b44a2 334 biosize = nmp->nm_rsize;
f0f1cbaa 335 np->n_flag |= NMODIFIED;
39d108be 336 do {
2c5b44a2
KM
337
338 /*
339 * Check for a valid write lease.
340 * If non-cachable, just do the rpc
341 */
342 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
e19a2ad1 343 NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
2c5b44a2 344 do {
e19a2ad1 345 error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
2c5b44a2
KM
346 } while (error == NQNFS_EXPIRED);
347 if (error)
348 return (error);
e19a2ad1 349 if (np->n_lrev != np->n_brev ||
2c5b44a2 350 (np->n_flag & NQNFSNONCACHE)) {
e19a2ad1 351 vinvalbuf(vp, TRUE, cred, p);
2c5b44a2
KM
352 np->n_brev = np->n_lrev;
353 }
354 }
355 if (np->n_flag & NQNFSNONCACHE)
e19a2ad1 356 return (nfs_writerpc(vp, uio, cred));
e8540f59 357 nfsstats.biocache_writes++;
e19a2ad1
KM
358 lbn = uio->uio_offset / biosize;
359 on = uio->uio_offset & (biosize-1);
360 n = MIN((unsigned)(biosize - on), uio->uio_resid);
361 if (uio->uio_offset + n > np->n_size) {
362 np->n_size = uio->uio_offset + n;
363 vnode_pager_setsize(vp, (u_long)np->n_size);
8986c97c 364 }
2c5b44a2 365 bn = lbn * (biosize / DEV_BSIZE);
141671b8 366again:
e19a2ad1 367 bp = getblk(vp, bn, biosize);
39d108be 368 if (bp->b_wcred == NOCRED) {
e19a2ad1
KM
369 crhold(cred);
370 bp->b_wcred = cred;
39d108be 371 }
2c5b44a2
KM
372
373 /*
374 * If the new write will leave a contiguous dirty
375 * area, just update the b_dirtyoff and b_dirtyend,
376 * otherwise force a write rpc of the old dirty area.
377 */
378 if (bp->b_dirtyend > 0 &&
379 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
380 bp->b_proc = p;
381 if (error = bwrite(bp))
382 return (error);
383 goto again;
384 }
385
386 /*
387 * Check for valid write lease and get one as required.
388 * In case getblk() and/or bwrite() delayed us.
389 */
390 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
e19a2ad1 391 NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
2c5b44a2 392 do {
e19a2ad1 393 error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
2c5b44a2
KM
394 } while (error == NQNFS_EXPIRED);
395 if (error) {
396 brelse(bp);
397 return (error);
398 }
e19a2ad1 399 if (np->n_lrev != np->n_brev ||
2c5b44a2 400 (np->n_flag & NQNFSNONCACHE)) {
e19a2ad1 401 vinvalbuf(vp, TRUE, cred, p);
2c5b44a2 402 np->n_brev = np->n_lrev;
39d108be 403 }
39d108be 404 }
e19a2ad1 405 if (error = uiomove(bp->b_un.b_addr + on, n, uio)) {
141671b8 406 brelse(bp);
b71430cc 407 return (error);
141671b8 408 }
2c5b44a2
KM
409 if (bp->b_dirtyend > 0) {
410 bp->b_dirtyoff = MIN(on, bp->b_dirtyoff);
411 bp->b_dirtyend = MAX((on+n), bp->b_dirtyend);
412 } else {
413 bp->b_dirtyoff = on;
414 bp->b_dirtyend = on+n;
415 }
416 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
417 bp->b_validoff > bp->b_dirtyend) {
418 bp->b_validoff = bp->b_dirtyoff;
419 bp->b_validend = bp->b_dirtyend;
420 } else {
421 bp->b_validoff = MIN(bp->b_validoff, bp->b_dirtyoff);
422 bp->b_validend = MAX(bp->b_validend, bp->b_dirtyend);
423 }
424
425 /*
426 * If the lease is non-cachable or IO_SYNC do bwrite().
427 */
e19a2ad1 428 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
2c5b44a2
KM
429 bp->b_proc = p;
430 bwrite(bp);
431 } else if ((n+on) == biosize &&
432 (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
39d108be 433 bp->b_flags |= B_AGE;
f0f1cbaa 434 bp->b_proc = (struct proc *)0;
39d108be
RM
435 bawrite(bp);
436 } else {
f0f1cbaa 437 bp->b_proc = (struct proc *)0;
39d108be
RM
438 bdwrite(bp);
439 }
e19a2ad1 440 } while (error == 0 && uio->uio_resid > 0 && n != 0);
39d108be
RM
441 return (error);
442}