Commit | Line | Data |
---|---|---|
39d108be RM |
1 | /* |
2 | * Copyright (c) 1989 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Rick Macklem at The University of Guelph. | |
7 | * | |
dbf0c423 | 8 | * %sccs.include.redist.c% |
39d108be | 9 | * |
7e11a0c9 | 10 | * @(#)nfs_bio.c 7.27 (Berkeley) %G% |
39d108be RM |
11 | */ |
12 | ||
400a1380 KM |
13 | #include <sys/param.h> |
14 | #include <sys/resourcevar.h> | |
15 | #include <sys/proc.h> | |
16 | #include <sys/buf.h> | |
17 | #include <sys/vnode.h> | |
18 | #include <sys/trace.h> | |
19 | #include <sys/mount.h> | |
20 | #include <sys/kernel.h> | |
21 | #include <machine/endian.h> | |
22 | #include <vm/vm.h> | |
23 | #include <nfs/nfsnode.h> | |
24 | #include <nfs/rpcv2.h> | |
25 | #include <nfs/nfsv2.h> | |
26 | #include <nfs/nfs.h> | |
27 | #include <nfs/nfsmount.h> | |
28 | #include <nfs/nqnfs.h> | |
39d108be RM |
29 | |
30 | /* True and false, how exciting */ | |
31 | #define TRUE 1 | |
32 | #define FALSE 0 | |
33 | ||
34 | /* | |
35 | * Vnode op for read using bio | |
36 | * Any similarity to readip() is purely coincidental | |
37 | */ | |
f0f1cbaa | 38 | nfs_bioread(vp, uio, ioflag, cred) |
39d108be | 39 | register struct vnode *vp; |
170bfd05 | 40 | register struct uio *uio; |
39d108be RM |
41 | int ioflag; |
42 | struct ucred *cred; | |
43 | { | |
9342689a | 44 | USES_VOP_GETATTR; |
39d108be | 45 | register struct nfsnode *np = VTONFS(vp); |
170bfd05 | 46 | register int biosize; |
39d108be RM |
47 | struct buf *bp; |
48 | struct vattr vattr; | |
2c5b44a2 KM |
49 | struct nfsmount *nmp; |
50 | daddr_t lbn, bn, rablock[NFS_MAXRAHEAD]; | |
51 | int rasize[NFS_MAXRAHEAD], nra, diff, error = 0; | |
52 | int n, on; | |
39d108be | 53 | |
d4e5799e KM |
54 | #ifdef lint |
55 | ioflag = ioflag; | |
56 | #endif /* lint */ | |
b40809cc | 57 | #ifdef DIAGNOSTIC |
39d108be RM |
58 | if (uio->uio_rw != UIO_READ) |
59 | panic("nfs_read mode"); | |
b40809cc | 60 | #endif |
39d108be | 61 | if (uio->uio_resid == 0) |
b71430cc | 62 | return (0); |
f0f1cbaa | 63 | if (uio->uio_offset < 0 && vp->v_type != VDIR) |
b71430cc | 64 | return (EINVAL); |
2c5b44a2 KM |
65 | nmp = VFSTONFS(vp->v_mount); |
66 | biosize = nmp->nm_rsize; | |
39d108be | 67 | /* |
2c5b44a2 KM |
68 | * For nfs, cache consistency can only be maintained approximately. |
69 | * Although RFC1094 does not specify the criteria, the following is | |
70 | * believed to be compatible with the reference port. | |
71 | * For nqnfs, full cache consistency is maintained within the loop. | |
72 | * For nfs: | |
39d108be RM |
73 | * If the file's modify time on the server has changed since the |
74 | * last read rpc or you have written to the file, | |
75 | * you may have lost data cache consistency with the | |
76 | * server, so flush all of the file's data out of the cache. | |
f0f1cbaa KM |
77 | * Then force a getattr rpc to ensure that you have up to date |
78 | * attributes. | |
2c5b44a2 KM |
79 | * The mount flag NFSMNT_MYWRITE says "Assume that my writes are |
80 | * the ones changing the modify time. | |
39d108be RM |
81 | * NB: This implies that cache data can be read when up to |
82 | * NFS_ATTRTIMEO seconds out of date. If you find that you need current | |
83 | * attributes this could be forced by setting n_attrstamp to 0 before | |
9342689a | 84 | * the VOP_GETATTR() call. |
39d108be | 85 | */ |
2c5b44a2 | 86 | if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { |
f0f1cbaa KM |
87 | if (np->n_flag & NMODIFIED) { |
88 | np->n_flag &= ~NMODIFIED; | |
2c5b44a2 KM |
89 | if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || |
90 | vp->v_type != VREG) | |
91 | vinvalbuf(vp, TRUE); | |
f0f1cbaa KM |
92 | np->n_attrstamp = 0; |
93 | np->n_direofoffset = 0; | |
9342689a | 94 | if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) |
e8540f59 | 95 | return (error); |
7e11a0c9 | 96 | np->n_mtime = vattr.va_mtime.ts_sec; |
f0f1cbaa | 97 | } else { |
9342689a | 98 | if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) |
f0f1cbaa | 99 | return (error); |
7e11a0c9 | 100 | if (np->n_mtime != vattr.va_mtime.ts_sec) { |
f0f1cbaa KM |
101 | np->n_direofoffset = 0; |
102 | vinvalbuf(vp, TRUE); | |
7e11a0c9 | 103 | np->n_mtime = vattr.va_mtime.ts_sec; |
f0f1cbaa | 104 | } |
39d108be RM |
105 | } |
106 | } | |
107 | do { | |
2c5b44a2 KM |
108 | |
109 | /* | |
110 | * Get a valid lease. If cached data is stale, flush it. | |
111 | */ | |
112 | if ((nmp->nm_flag & NFSMNT_NQNFS) && | |
113 | NQNFS_CKINVALID(vp, np, NQL_READ)) { | |
114 | do { | |
115 | error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); | |
116 | } while (error == NQNFS_EXPIRED); | |
117 | if (error) | |
118 | return (error); | |
119 | if (QUADNE(np->n_lrev, np->n_brev) || | |
120 | ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { | |
121 | if (vp->v_type == VDIR) { | |
122 | np->n_direofoffset = 0; | |
123 | cache_purge(vp); | |
124 | } | |
125 | np->n_flag &= ~NMODIFIED; | |
126 | vinvalbuf(vp, TRUE); | |
127 | np->n_brev = np->n_lrev; | |
128 | } | |
129 | } | |
130 | if (np->n_flag & NQNFSNONCACHE) { | |
131 | switch (vp->v_type) { | |
132 | case VREG: | |
133 | error = nfs_readrpc(vp, uio, cred); | |
134 | break; | |
135 | case VLNK: | |
136 | error = nfs_readlinkrpc(vp, uio, cred); | |
137 | break; | |
138 | case VDIR: | |
139 | error = nfs_readdirrpc(vp, uio, cred); | |
140 | break; | |
141 | }; | |
142 | return (error); | |
143 | } | |
f0f1cbaa KM |
144 | switch (vp->v_type) { |
145 | case VREG: | |
e8540f59 | 146 | nfsstats.biocache_reads++; |
170bfd05 KM |
147 | lbn = uio->uio_offset / biosize; |
148 | on = uio->uio_offset & (biosize-1); | |
149 | n = MIN((unsigned)(biosize - on), uio->uio_resid); | |
39d108be RM |
150 | diff = np->n_size - uio->uio_offset; |
151 | if (diff <= 0) | |
b71430cc | 152 | return (error); |
39d108be RM |
153 | if (diff < n) |
154 | n = diff; | |
170bfd05 | 155 | bn = lbn*(biosize/DEV_BSIZE); |
2c5b44a2 KM |
156 | for (nra = 0; nra < nmp->nm_readahead && |
157 | (lbn + 1 + nra) * biosize < np->n_size; nra++) { | |
158 | rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE); | |
159 | rasize[nra] = biosize; | |
160 | } | |
161 | again: | |
162 | if (nra > 0 && lbn >= vp->v_lastr) | |
163 | error = breadn(vp, bn, biosize, rablock, rasize, nra, | |
39d108be RM |
164 | cred, &bp); |
165 | else | |
170bfd05 | 166 | error = bread(vp, bn, biosize, cred, &bp); |
2c5b44a2 KM |
167 | if (bp->b_validend > 0) { |
168 | if (on < bp->b_validoff || (on+n) > bp->b_validend) { | |
169 | bp->b_flags |= B_INVAL; | |
170 | if (bp->b_dirtyend > 0) { | |
171 | if ((bp->b_flags & B_DELWRI) == 0) | |
172 | panic("nfsbioread"); | |
173 | (void) bwrite(bp); | |
174 | } else | |
175 | brelse(bp); | |
176 | goto again; | |
177 | } | |
178 | } else { | |
179 | bp->b_validoff = 0; | |
180 | bp->b_validend = biosize - bp->b_resid; | |
181 | } | |
d1a28114 | 182 | vp->v_lastr = lbn; |
39d108be | 183 | if (bp->b_resid) { |
170bfd05 KM |
184 | diff = (on >= (biosize-bp->b_resid)) ? 0 : |
185 | (biosize-bp->b_resid-on); | |
f0f1cbaa | 186 | n = MIN(n, diff); |
39d108be | 187 | } |
f0f1cbaa KM |
188 | break; |
189 | case VLNK: | |
190 | nfsstats.biocache_readlinks++; | |
191 | on = 0; | |
192 | error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp); | |
193 | n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); | |
194 | break; | |
195 | case VDIR: | |
196 | nfsstats.biocache_readdirs++; | |
197 | on = 0; | |
b40809cc KM |
198 | error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp); |
199 | n = MIN(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); | |
f0f1cbaa KM |
200 | break; |
201 | }; | |
202 | if (error) { | |
203 | brelse(bp); | |
204 | return (error); | |
205 | } | |
2c5b44a2 KM |
206 | |
207 | /* | |
208 | * For nqnfs: | |
209 | * Must check for valid lease, since it may have expired while in | |
210 | * bread(). If expired, get a lease. | |
211 | * If data is stale, flush and try again. | |
212 | * nb: If a read rpc is done by bread() or breada() and there is | |
213 | * no valid lease, a get_lease request will be piggy backed. | |
214 | */ | |
215 | if (nmp->nm_flag & NFSMNT_NQNFS) { | |
216 | if (NQNFS_CKINVALID(vp, np, NQL_READ)) { | |
217 | do { | |
218 | error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); | |
219 | } while (error == NQNFS_EXPIRED); | |
220 | if (error) { | |
221 | brelse(bp); | |
222 | return (error); | |
223 | } | |
224 | if ((np->n_flag & NQNFSNONCACHE) || | |
225 | QUADNE(np->n_lrev, np->n_brev) || | |
226 | ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { | |
227 | if (vp->v_type == VDIR) { | |
228 | np->n_direofoffset = 0; | |
229 | cache_purge(vp); | |
230 | } | |
231 | brelse(bp); | |
232 | np->n_flag &= ~NMODIFIED; | |
233 | vinvalbuf(vp, TRUE); | |
234 | np->n_brev = np->n_lrev; | |
235 | continue; | |
236 | } | |
237 | } else if ((np->n_flag & NQNFSNONCACHE) || | |
238 | ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { | |
239 | np->n_direofoffset = 0; | |
240 | brelse(bp); | |
241 | np->n_flag &= ~NMODIFIED; | |
242 | vinvalbuf(vp, TRUE); | |
243 | np->n_brev = np->n_lrev; | |
244 | continue; | |
245 | } | |
246 | } | |
f0f1cbaa KM |
247 | if (n > 0) |
248 | error = uiomove(bp->b_un.b_addr + on, (int)n, uio); | |
249 | switch (vp->v_type) { | |
250 | case VREG: | |
170bfd05 | 251 | if (n+on == biosize || uio->uio_offset == np->n_size) |
39d108be | 252 | bp->b_flags |= B_AGE; |
f0f1cbaa KM |
253 | break; |
254 | case VLNK: | |
255 | n = 0; | |
256 | break; | |
257 | case VDIR: | |
258 | uio->uio_offset = bp->b_blkno; | |
259 | break; | |
260 | }; | |
261 | brelse(bp); | |
39d108be | 262 | } while (error == 0 && uio->uio_resid > 0 && n != 0); |
39d108be RM |
263 | return (error); |
264 | } | |
265 | ||
266 | /* | |
267 | * Vnode op for write using bio | |
268 | */ | |
9342689a JH |
269 | nfs_write (ap) |
270 | struct vop_write_args *ap; | |
39d108be | 271 | { |
9342689a | 272 | USES_VOP_GETATTR; |
170bfd05 | 273 | register int biosize; |
e1b76915 | 274 | struct proc *p = ap->a_uio->uio_procp; |
39d108be | 275 | struct buf *bp; |
e1b76915 | 276 | struct nfsnode *np = VTONFS(ap->a_vp); |
f0f1cbaa | 277 | struct vattr vattr; |
2c5b44a2 | 278 | struct nfsmount *nmp; |
39d108be | 279 | daddr_t lbn, bn; |
3c15394b | 280 | int n, on, error = 0; |
39d108be | 281 | |
b40809cc | 282 | #ifdef DIAGNOSTIC |
e1b76915 | 283 | if (ap->a_uio->uio_rw != UIO_WRITE) |
f0f1cbaa | 284 | panic("nfs_write mode"); |
e1b76915 | 285 | if (ap->a_uio->uio_segflg == UIO_USERSPACE && ap->a_uio->uio_procp != curproc) |
b40809cc KM |
286 | panic("nfs_write proc"); |
287 | #endif | |
e1b76915 | 288 | if (ap->a_vp->v_type != VREG) |
f0f1cbaa | 289 | return (EIO); |
9b61ab4a KM |
290 | if (np->n_flag & NWRITEERR) { |
291 | np->n_flag &= ~NWRITEERR; | |
292 | return (np->n_error); | |
293 | } | |
e1b76915 | 294 | if (ap->a_ioflag & (IO_APPEND | IO_SYNC)) { |
225498c1 KM |
295 | if (np->n_flag & NMODIFIED) { |
296 | np->n_flag &= ~NMODIFIED; | |
e1b76915 | 297 | vinvalbuf(ap->a_vp, TRUE); |
225498c1 | 298 | } |
e1b76915 | 299 | if (ap->a_ioflag & IO_APPEND) { |
225498c1 | 300 | np->n_attrstamp = 0; |
e1b76915 | 301 | if (error = VOP_GETATTR(ap->a_vp, &vattr, ap->a_cred, p)) |
225498c1 | 302 | return (error); |
e1b76915 | 303 | ap->a_uio->uio_offset = np->n_size; |
225498c1 KM |
304 | } |
305 | } | |
e1b76915 JH |
306 | nmp = VFSTONFS(ap->a_vp->v_mount); |
307 | if (ap->a_uio->uio_offset < 0) | |
b71430cc | 308 | return (EINVAL); |
e1b76915 | 309 | if (ap->a_uio->uio_resid == 0) |
b71430cc | 310 | return (0); |
39d108be RM |
311 | /* |
312 | * Maybe this should be above the vnode op call, but so long as | |
313 | * file servers have no limits, i don't think it matters | |
314 | */ | |
e1b76915 | 315 | if (p && ap->a_uio->uio_offset + ap->a_uio->uio_resid > |
4547a923 MK |
316 | p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { |
317 | psignal(p, SIGXFSZ); | |
b71430cc | 318 | return (EFBIG); |
39d108be | 319 | } |
170bfd05 KM |
320 | /* |
321 | * I use nm_rsize, not nm_wsize so that all buffer cache blocks | |
322 | * will be the same size within a filesystem. nfs_writerpc will | |
323 | * still use nm_wsize when sizing the rpc's. | |
324 | */ | |
2c5b44a2 | 325 | biosize = nmp->nm_rsize; |
f0f1cbaa | 326 | np->n_flag |= NMODIFIED; |
39d108be | 327 | do { |
2c5b44a2 KM |
328 | |
329 | /* | |
330 | * Check for a valid write lease. | |
331 | * If non-cachable, just do the rpc | |
332 | */ | |
333 | if ((nmp->nm_flag & NFSMNT_NQNFS) && | |
e1b76915 | 334 | NQNFS_CKINVALID(ap->a_vp, np, NQL_WRITE)) { |
2c5b44a2 | 335 | do { |
e1b76915 | 336 | error = nqnfs_getlease(ap->a_vp, NQL_WRITE, ap->a_cred, p); |
2c5b44a2 KM |
337 | } while (error == NQNFS_EXPIRED); |
338 | if (error) | |
339 | return (error); | |
340 | if (QUADNE(np->n_lrev, np->n_brev) || | |
341 | (np->n_flag & NQNFSNONCACHE)) { | |
e1b76915 | 342 | vinvalbuf(ap->a_vp, TRUE); |
2c5b44a2 KM |
343 | np->n_brev = np->n_lrev; |
344 | } | |
345 | } | |
346 | if (np->n_flag & NQNFSNONCACHE) | |
e1b76915 | 347 | return (nfs_writerpc(ap->a_vp, ap->a_uio, ap->a_cred)); |
e8540f59 | 348 | nfsstats.biocache_writes++; |
e1b76915 JH |
349 | lbn = ap->a_uio->uio_offset / biosize; |
350 | on = ap->a_uio->uio_offset & (biosize-1); | |
351 | n = MIN((unsigned)(biosize - on), ap->a_uio->uio_resid); | |
352 | if (ap->a_uio->uio_offset + n > np->n_size) { | |
353 | np->n_size = ap->a_uio->uio_offset + n; | |
354 | vnode_pager_setsize(ap->a_vp, (u_long)np->n_size); | |
8986c97c | 355 | } |
2c5b44a2 | 356 | bn = lbn * (biosize / DEV_BSIZE); |
141671b8 | 357 | again: |
e1b76915 | 358 | bp = getblk(ap->a_vp, bn, biosize); |
39d108be | 359 | if (bp->b_wcred == NOCRED) { |
e1b76915 JH |
360 | crhold(ap->a_cred); |
361 | bp->b_wcred = ap->a_cred; | |
39d108be | 362 | } |
2c5b44a2 KM |
363 | |
364 | /* | |
365 | * If the new write will leave a contiguous dirty | |
366 | * area, just update the b_dirtyoff and b_dirtyend, | |
367 | * otherwise force a write rpc of the old dirty area. | |
368 | */ | |
369 | if (bp->b_dirtyend > 0 && | |
370 | (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { | |
371 | bp->b_proc = p; | |
372 | if (error = bwrite(bp)) | |
373 | return (error); | |
374 | goto again; | |
375 | } | |
376 | ||
377 | /* | |
378 | * Check for valid write lease and get one as required. | |
379 | * In case getblk() and/or bwrite() delayed us. | |
380 | */ | |
381 | if ((nmp->nm_flag & NFSMNT_NQNFS) && | |
e1b76915 | 382 | NQNFS_CKINVALID(ap->a_vp, np, NQL_WRITE)) { |
2c5b44a2 | 383 | do { |
e1b76915 | 384 | error = nqnfs_getlease(ap->a_vp, NQL_WRITE, ap->a_cred, p); |
2c5b44a2 KM |
385 | } while (error == NQNFS_EXPIRED); |
386 | if (error) { | |
387 | brelse(bp); | |
388 | return (error); | |
389 | } | |
390 | if (QUADNE(np->n_lrev, np->n_brev) || | |
391 | (np->n_flag & NQNFSNONCACHE)) { | |
e1b76915 | 392 | vinvalbuf(ap->a_vp, TRUE); |
2c5b44a2 | 393 | np->n_brev = np->n_lrev; |
39d108be | 394 | } |
39d108be | 395 | } |
e1b76915 | 396 | if (error = uiomove(bp->b_un.b_addr + on, n, ap->a_uio)) { |
141671b8 | 397 | brelse(bp); |
b71430cc | 398 | return (error); |
141671b8 | 399 | } |
2c5b44a2 KM |
400 | if (bp->b_dirtyend > 0) { |
401 | bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); | |
402 | bp->b_dirtyend = MAX((on+n), bp->b_dirtyend); | |
403 | } else { | |
404 | bp->b_dirtyoff = on; | |
405 | bp->b_dirtyend = on+n; | |
406 | } | |
407 | if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || | |
408 | bp->b_validoff > bp->b_dirtyend) { | |
409 | bp->b_validoff = bp->b_dirtyoff; | |
410 | bp->b_validend = bp->b_dirtyend; | |
411 | } else { | |
412 | bp->b_validoff = MIN(bp->b_validoff, bp->b_dirtyoff); | |
413 | bp->b_validend = MAX(bp->b_validend, bp->b_dirtyend); | |
414 | } | |
415 | ||
416 | /* | |
417 | * If the lease is non-cachable or IO_SYNC do bwrite(). | |
418 | */ | |
e1b76915 | 419 | if ((np->n_flag & NQNFSNONCACHE) || (ap->a_ioflag & IO_SYNC)) { |
2c5b44a2 KM |
420 | bp->b_proc = p; |
421 | bwrite(bp); | |
422 | } else if ((n+on) == biosize && | |
423 | (nmp->nm_flag & NFSMNT_NQNFS) == 0) { | |
39d108be | 424 | bp->b_flags |= B_AGE; |
f0f1cbaa | 425 | bp->b_proc = (struct proc *)0; |
39d108be RM |
426 | bawrite(bp); |
427 | } else { | |
f0f1cbaa | 428 | bp->b_proc = (struct proc *)0; |
39d108be RM |
429 | bdwrite(bp); |
430 | } | |
e1b76915 | 431 | } while (error == 0 && ap->a_uio->uio_resid > 0 && n != 0); |
39d108be RM |
432 | return (error); |
433 | } |