Commit | Line | Data |
---|---|---|
39d108be RM |
1 | /* |
2 | * Copyright (c) 1989 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Rick Macklem at The University of Guelph. | |
7 | * | |
dbf0c423 | 8 | * %sccs.include.redist.c% |
39d108be | 9 | * |
ea67b335 | 10 | * @(#)nfs_bio.c 7.24 (Berkeley) %G% |
39d108be RM |
11 | */ |
12 | ||
400a1380 KM |
13 | #include <sys/param.h> |
14 | #include <sys/resourcevar.h> | |
15 | #include <sys/proc.h> | |
16 | #include <sys/buf.h> | |
17 | #include <sys/vnode.h> | |
18 | #include <sys/trace.h> | |
19 | #include <sys/mount.h> | |
20 | #include <sys/kernel.h> | |
21 | #include <machine/endian.h> | |
22 | #include <vm/vm.h> | |
23 | #include <nfs/nfsnode.h> | |
24 | #include <nfs/rpcv2.h> | |
25 | #include <nfs/nfsv2.h> | |
26 | #include <nfs/nfs.h> | |
27 | #include <nfs/nfsmount.h> | |
28 | #include <nfs/nqnfs.h> | |
39d108be RM |
29 | |
30 | /* True and false, how exciting */ | |
31 | #define TRUE 1 | |
32 | #define FALSE 0 | |
33 | ||
34 | /* | |
35 | * Vnode op for read using bio | |
36 | * Any similarity to readip() is purely coincidental | |
37 | */ | |
f0f1cbaa | 38 | nfs_bioread(vp, uio, ioflag, cred) |
39d108be | 39 | register struct vnode *vp; |
170bfd05 | 40 | register struct uio *uio; |
39d108be RM |
41 | int ioflag; |
42 | struct ucred *cred; | |
43 | { | |
9342689a | 44 | USES_VOP_GETATTR; |
39d108be | 45 | register struct nfsnode *np = VTONFS(vp); |
170bfd05 | 46 | register int biosize; |
39d108be RM |
47 | struct buf *bp; |
48 | struct vattr vattr; | |
2c5b44a2 KM |
49 | struct nfsmount *nmp; |
50 | daddr_t lbn, bn, rablock[NFS_MAXRAHEAD]; | |
51 | int rasize[NFS_MAXRAHEAD], nra, diff, error = 0; | |
52 | int n, on; | |
39d108be | 53 | |
d4e5799e KM |
54 | #ifdef lint |
55 | ioflag = ioflag; | |
56 | #endif /* lint */ | |
b40809cc | 57 | #ifdef DIAGNOSTIC |
39d108be RM |
58 | if (uio->uio_rw != UIO_READ) |
59 | panic("nfs_read mode"); | |
b40809cc | 60 | #endif |
39d108be | 61 | if (uio->uio_resid == 0) |
b71430cc | 62 | return (0); |
f0f1cbaa | 63 | if (uio->uio_offset < 0 && vp->v_type != VDIR) |
b71430cc | 64 | return (EINVAL); |
2c5b44a2 KM |
65 | nmp = VFSTONFS(vp->v_mount); |
66 | biosize = nmp->nm_rsize; | |
39d108be | 67 | /* |
2c5b44a2 KM |
68 | * For nfs, cache consistency can only be maintained approximately. |
69 | * Although RFC1094 does not specify the criteria, the following is | |
70 | * believed to be compatible with the reference port. | |
71 | * For nqnfs, full cache consistency is maintained within the loop. | |
72 | * For nfs: | |
39d108be RM |
73 | * If the file's modify time on the server has changed since the |
74 | * last read rpc or you have written to the file, | |
75 | * you may have lost data cache consistency with the | |
76 | * server, so flush all of the file's data out of the cache. | |
f0f1cbaa KM |
77 | * Then force a getattr rpc to ensure that you have up to date |
78 | * attributes. | |
2c5b44a2 KM |
79 | * The mount flag NFSMNT_MYWRITE says "Assume that my writes are |
80 | * the ones changing the modify time. | |
39d108be RM |
81 | * NB: This implies that cache data can be read when up to |
82 | * NFS_ATTRTIMEO seconds out of date. If you find that you need current | |
83 | * attributes this could be forced by setting n_attrstamp to 0 before | |
9342689a | 84 | * the VOP_GETATTR() call. |
39d108be | 85 | */ |
2c5b44a2 | 86 | if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { |
f0f1cbaa KM |
87 | if (np->n_flag & NMODIFIED) { |
88 | np->n_flag &= ~NMODIFIED; | |
2c5b44a2 KM |
89 | if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || |
90 | vp->v_type != VREG) | |
91 | vinvalbuf(vp, TRUE); | |
f0f1cbaa KM |
92 | np->n_attrstamp = 0; |
93 | np->n_direofoffset = 0; | |
9342689a | 94 | if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) |
e8540f59 KM |
95 | return (error); |
96 | np->n_mtime = vattr.va_mtime.tv_sec; | |
f0f1cbaa | 97 | } else { |
9342689a | 98 | if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) |
f0f1cbaa KM |
99 | return (error); |
100 | if (np->n_mtime != vattr.va_mtime.tv_sec) { | |
101 | np->n_direofoffset = 0; | |
102 | vinvalbuf(vp, TRUE); | |
103 | np->n_mtime = vattr.va_mtime.tv_sec; | |
104 | } | |
39d108be RM |
105 | } |
106 | } | |
107 | do { | |
2c5b44a2 KM |
108 | |
109 | /* | |
110 | * Get a valid lease. If cached data is stale, flush it. | |
111 | */ | |
112 | if ((nmp->nm_flag & NFSMNT_NQNFS) && | |
113 | NQNFS_CKINVALID(vp, np, NQL_READ)) { | |
114 | do { | |
115 | error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); | |
116 | } while (error == NQNFS_EXPIRED); | |
117 | if (error) | |
118 | return (error); | |
119 | if (QUADNE(np->n_lrev, np->n_brev) || | |
120 | ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { | |
121 | if (vp->v_type == VDIR) { | |
122 | np->n_direofoffset = 0; | |
123 | cache_purge(vp); | |
124 | } | |
125 | np->n_flag &= ~NMODIFIED; | |
126 | vinvalbuf(vp, TRUE); | |
127 | np->n_brev = np->n_lrev; | |
128 | } | |
129 | } | |
130 | if (np->n_flag & NQNFSNONCACHE) { | |
131 | switch (vp->v_type) { | |
132 | case VREG: | |
133 | error = nfs_readrpc(vp, uio, cred); | |
134 | break; | |
135 | case VLNK: | |
136 | error = nfs_readlinkrpc(vp, uio, cred); | |
137 | break; | |
138 | case VDIR: | |
139 | error = nfs_readdirrpc(vp, uio, cred); | |
140 | break; | |
141 | }; | |
142 | return (error); | |
143 | } | |
f0f1cbaa KM |
144 | switch (vp->v_type) { |
145 | case VREG: | |
e8540f59 | 146 | nfsstats.biocache_reads++; |
170bfd05 KM |
147 | lbn = uio->uio_offset / biosize; |
148 | on = uio->uio_offset & (biosize-1); | |
149 | n = MIN((unsigned)(biosize - on), uio->uio_resid); | |
39d108be RM |
150 | diff = np->n_size - uio->uio_offset; |
151 | if (diff <= 0) | |
b71430cc | 152 | return (error); |
39d108be RM |
153 | if (diff < n) |
154 | n = diff; | |
170bfd05 | 155 | bn = lbn*(biosize/DEV_BSIZE); |
2c5b44a2 KM |
156 | for (nra = 0; nra < nmp->nm_readahead && |
157 | (lbn + 1 + nra) * biosize < np->n_size; nra++) { | |
158 | rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE); | |
159 | rasize[nra] = biosize; | |
160 | } | |
161 | again: | |
162 | if (nra > 0 && lbn >= vp->v_lastr) | |
163 | error = breadn(vp, bn, biosize, rablock, rasize, nra, | |
39d108be RM |
164 | cred, &bp); |
165 | else | |
170bfd05 | 166 | error = bread(vp, bn, biosize, cred, &bp); |
2c5b44a2 KM |
167 | if (bp->b_validend > 0) { |
168 | if (on < bp->b_validoff || (on+n) > bp->b_validend) { | |
169 | bp->b_flags |= B_INVAL; | |
170 | if (bp->b_dirtyend > 0) { | |
171 | if ((bp->b_flags & B_DELWRI) == 0) | |
172 | panic("nfsbioread"); | |
173 | (void) bwrite(bp); | |
174 | } else | |
175 | brelse(bp); | |
176 | goto again; | |
177 | } | |
178 | } else { | |
179 | bp->b_validoff = 0; | |
180 | bp->b_validend = biosize - bp->b_resid; | |
181 | } | |
d1a28114 | 182 | vp->v_lastr = lbn; |
39d108be | 183 | if (bp->b_resid) { |
170bfd05 KM |
184 | diff = (on >= (biosize-bp->b_resid)) ? 0 : |
185 | (biosize-bp->b_resid-on); | |
f0f1cbaa | 186 | n = MIN(n, diff); |
39d108be | 187 | } |
f0f1cbaa KM |
188 | break; |
189 | case VLNK: | |
190 | nfsstats.biocache_readlinks++; | |
191 | on = 0; | |
192 | error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp); | |
193 | n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); | |
194 | break; | |
195 | case VDIR: | |
196 | nfsstats.biocache_readdirs++; | |
197 | on = 0; | |
b40809cc KM |
198 | error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp); |
199 | n = MIN(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); | |
f0f1cbaa KM |
200 | break; |
201 | }; | |
202 | if (error) { | |
203 | brelse(bp); | |
204 | return (error); | |
205 | } | |
2c5b44a2 KM |
206 | |
207 | /* | |
208 | * For nqnfs: | |
209 | * Must check for valid lease, since it may have expired while in | |
210 | * bread(). If expired, get a lease. | |
211 | * If data is stale, flush and try again. | |
212 | * nb: If a read rpc is done by bread() or breada() and there is | |
213 | * no valid lease, a get_lease request will be piggy backed. | |
214 | */ | |
215 | if (nmp->nm_flag & NFSMNT_NQNFS) { | |
216 | if (NQNFS_CKINVALID(vp, np, NQL_READ)) { | |
217 | do { | |
218 | error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); | |
219 | } while (error == NQNFS_EXPIRED); | |
220 | if (error) { | |
221 | brelse(bp); | |
222 | return (error); | |
223 | } | |
224 | if ((np->n_flag & NQNFSNONCACHE) || | |
225 | QUADNE(np->n_lrev, np->n_brev) || | |
226 | ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { | |
227 | if (vp->v_type == VDIR) { | |
228 | np->n_direofoffset = 0; | |
229 | cache_purge(vp); | |
230 | } | |
231 | brelse(bp); | |
232 | np->n_flag &= ~NMODIFIED; | |
233 | vinvalbuf(vp, TRUE); | |
234 | np->n_brev = np->n_lrev; | |
235 | continue; | |
236 | } | |
237 | } else if ((np->n_flag & NQNFSNONCACHE) || | |
238 | ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { | |
239 | np->n_direofoffset = 0; | |
240 | brelse(bp); | |
241 | np->n_flag &= ~NMODIFIED; | |
242 | vinvalbuf(vp, TRUE); | |
243 | np->n_brev = np->n_lrev; | |
244 | continue; | |
245 | } | |
246 | } | |
f0f1cbaa KM |
247 | if (n > 0) |
248 | error = uiomove(bp->b_un.b_addr + on, (int)n, uio); | |
249 | switch (vp->v_type) { | |
250 | case VREG: | |
170bfd05 | 251 | if (n+on == biosize || uio->uio_offset == np->n_size) |
39d108be | 252 | bp->b_flags |= B_AGE; |
f0f1cbaa KM |
253 | break; |
254 | case VLNK: | |
255 | n = 0; | |
256 | break; | |
257 | case VDIR: | |
258 | uio->uio_offset = bp->b_blkno; | |
259 | break; | |
260 | }; | |
261 | brelse(bp); | |
39d108be | 262 | } while (error == 0 && uio->uio_resid > 0 && n != 0); |
39d108be RM |
263 | return (error); |
264 | } | |
265 | ||
266 | /* | |
267 | * Vnode op for write using bio | |
268 | */ | |
9342689a JH |
269 | nfs_write (ap) |
270 | struct vop_write_args *ap; | |
271 | #define vp (ap->a_vp) | |
272 | #define uio (ap->a_uio) | |
273 | #define ioflag (ap->a_ioflag) | |
274 | #define cred (ap->a_cred) | |
39d108be | 275 | { |
9342689a | 276 | USES_VOP_GETATTR; |
170bfd05 | 277 | register int biosize; |
2c5b44a2 | 278 | struct proc *p = uio->uio_procp; |
39d108be RM |
279 | struct buf *bp; |
280 | struct nfsnode *np = VTONFS(vp); | |
f0f1cbaa | 281 | struct vattr vattr; |
2c5b44a2 | 282 | struct nfsmount *nmp; |
39d108be | 283 | daddr_t lbn, bn; |
3c15394b | 284 | int n, on, error = 0; |
39d108be | 285 | |
b40809cc | 286 | #ifdef DIAGNOSTIC |
f0f1cbaa KM |
287 | if (uio->uio_rw != UIO_WRITE) |
288 | panic("nfs_write mode"); | |
b40809cc KM |
289 | if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) |
290 | panic("nfs_write proc"); | |
291 | #endif | |
f0f1cbaa KM |
292 | if (vp->v_type != VREG) |
293 | return (EIO); | |
225498c1 KM |
294 | if (ioflag & (IO_APPEND | IO_SYNC)) { |
295 | if (np->n_flag & NMODIFIED) { | |
296 | np->n_flag &= ~NMODIFIED; | |
297 | vinvalbuf(vp, TRUE); | |
298 | } | |
299 | if (ioflag & IO_APPEND) { | |
300 | np->n_attrstamp = 0; | |
9342689a | 301 | if (error = VOP_GETATTR(vp, &vattr, cred, p)) |
225498c1 KM |
302 | return (error); |
303 | uio->uio_offset = np->n_size; | |
304 | } | |
305 | } | |
2c5b44a2 | 306 | nmp = VFSTONFS(vp->v_mount); |
b71430cc KM |
307 | if (uio->uio_offset < 0) |
308 | return (EINVAL); | |
39d108be | 309 | if (uio->uio_resid == 0) |
b71430cc | 310 | return (0); |
39d108be RM |
311 | /* |
312 | * Maybe this should be above the vnode op call, but so long as | |
313 | * file servers have no limits, i don't think it matters | |
314 | */ | |
2c5b44a2 | 315 | if (p && uio->uio_offset + uio->uio_resid > |
4547a923 MK |
316 | p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { |
317 | psignal(p, SIGXFSZ); | |
b71430cc | 318 | return (EFBIG); |
39d108be | 319 | } |
170bfd05 KM |
320 | /* |
321 | * I use nm_rsize, not nm_wsize so that all buffer cache blocks | |
322 | * will be the same size within a filesystem. nfs_writerpc will | |
323 | * still use nm_wsize when sizing the rpc's. | |
324 | */ | |
2c5b44a2 | 325 | biosize = nmp->nm_rsize; |
f0f1cbaa | 326 | np->n_flag |= NMODIFIED; |
39d108be | 327 | do { |
2c5b44a2 KM |
328 | |
329 | /* | |
330 | * Check for a valid write lease. | |
331 | * If non-cachable, just do the rpc | |
332 | */ | |
333 | if ((nmp->nm_flag & NFSMNT_NQNFS) && | |
334 | NQNFS_CKINVALID(vp, np, NQL_WRITE)) { | |
335 | do { | |
336 | error = nqnfs_getlease(vp, NQL_WRITE, cred, p); | |
337 | } while (error == NQNFS_EXPIRED); | |
338 | if (error) | |
339 | return (error); | |
340 | if (QUADNE(np->n_lrev, np->n_brev) || | |
341 | (np->n_flag & NQNFSNONCACHE)) { | |
342 | vinvalbuf(vp, TRUE); | |
343 | np->n_brev = np->n_lrev; | |
344 | } | |
345 | } | |
346 | if (np->n_flag & NQNFSNONCACHE) | |
347 | return (nfs_writerpc(vp, uio, cred)); | |
e8540f59 | 348 | nfsstats.biocache_writes++; |
170bfd05 KM |
349 | lbn = uio->uio_offset / biosize; |
350 | on = uio->uio_offset & (biosize-1); | |
351 | n = MIN((unsigned)(biosize - on), uio->uio_resid); | |
2c5b44a2 KM |
352 | if (uio->uio_offset + n > np->n_size) { |
353 | np->n_size = uio->uio_offset + n; | |
400a1380 | 354 | vnode_pager_setsize(vp, (u_long)np->n_size); |
8986c97c | 355 | } |
2c5b44a2 | 356 | bn = lbn * (biosize / DEV_BSIZE); |
141671b8 | 357 | again: |
170bfd05 | 358 | bp = getblk(vp, bn, biosize); |
39d108be RM |
359 | if (bp->b_wcred == NOCRED) { |
360 | crhold(cred); | |
361 | bp->b_wcred = cred; | |
362 | } | |
2c5b44a2 KM |
363 | |
364 | /* | |
365 | * If the new write will leave a contiguous dirty | |
366 | * area, just update the b_dirtyoff and b_dirtyend, | |
367 | * otherwise force a write rpc of the old dirty area. | |
368 | */ | |
369 | if (bp->b_dirtyend > 0 && | |
370 | (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { | |
371 | bp->b_proc = p; | |
372 | if (error = bwrite(bp)) | |
373 | return (error); | |
374 | goto again; | |
375 | } | |
376 | ||
377 | /* | |
378 | * Check for valid write lease and get one as required. | |
379 | * In case getblk() and/or bwrite() delayed us. | |
380 | */ | |
381 | if ((nmp->nm_flag & NFSMNT_NQNFS) && | |
382 | NQNFS_CKINVALID(vp, np, NQL_WRITE)) { | |
383 | do { | |
384 | error = nqnfs_getlease(vp, NQL_WRITE, cred, p); | |
385 | } while (error == NQNFS_EXPIRED); | |
386 | if (error) { | |
387 | brelse(bp); | |
388 | return (error); | |
389 | } | |
390 | if (QUADNE(np->n_lrev, np->n_brev) || | |
391 | (np->n_flag & NQNFSNONCACHE)) { | |
392 | vinvalbuf(vp, TRUE); | |
393 | np->n_brev = np->n_lrev; | |
39d108be | 394 | } |
39d108be | 395 | } |
141671b8 KM |
396 | if (error = uiomove(bp->b_un.b_addr + on, n, uio)) { |
397 | brelse(bp); | |
b71430cc | 398 | return (error); |
141671b8 | 399 | } |
2c5b44a2 KM |
400 | if (bp->b_dirtyend > 0) { |
401 | bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); | |
402 | bp->b_dirtyend = MAX((on+n), bp->b_dirtyend); | |
403 | } else { | |
404 | bp->b_dirtyoff = on; | |
405 | bp->b_dirtyend = on+n; | |
406 | } | |
407 | if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || | |
408 | bp->b_validoff > bp->b_dirtyend) { | |
409 | bp->b_validoff = bp->b_dirtyoff; | |
410 | bp->b_validend = bp->b_dirtyend; | |
411 | } else { | |
412 | bp->b_validoff = MIN(bp->b_validoff, bp->b_dirtyoff); | |
413 | bp->b_validend = MAX(bp->b_validend, bp->b_dirtyend); | |
414 | } | |
415 | ||
416 | /* | |
417 | * If the lease is non-cachable or IO_SYNC do bwrite(). | |
418 | */ | |
419 | if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { | |
420 | bp->b_proc = p; | |
421 | bwrite(bp); | |
422 | } else if ((n+on) == biosize && | |
423 | (nmp->nm_flag & NFSMNT_NQNFS) == 0) { | |
39d108be | 424 | bp->b_flags |= B_AGE; |
f0f1cbaa | 425 | bp->b_proc = (struct proc *)0; |
39d108be RM |
426 | bawrite(bp); |
427 | } else { | |
f0f1cbaa | 428 | bp->b_proc = (struct proc *)0; |
39d108be RM |
429 | bdwrite(bp); |
430 | } | |
431 | } while (error == 0 && uio->uio_resid > 0 && n != 0); | |
39d108be RM |
432 | return (error); |
433 | } | |
9342689a JH |
434 | #undef vp |
435 | #undef uio | |
436 | #undef ioflag | |
437 | #undef cred |