Commit | Line | Data |
---|---|---|
39d108be RM |
1 | /* |
2 | * Copyright (c) 1989 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Rick Macklem at The University of Guelph. | |
7 | * | |
dbf0c423 | 8 | * %sccs.include.redist.c% |
39d108be | 9 | * |
e19a2ad1 | 10 | * @(#)nfs_bio.c 7.28 (Berkeley) %G% |
39d108be RM |
11 | */ |
12 | ||
400a1380 KM |
13 | #include <sys/param.h> |
14 | #include <sys/resourcevar.h> | |
15 | #include <sys/proc.h> | |
16 | #include <sys/buf.h> | |
17 | #include <sys/vnode.h> | |
18 | #include <sys/trace.h> | |
19 | #include <sys/mount.h> | |
20 | #include <sys/kernel.h> | |
21 | #include <machine/endian.h> | |
22 | #include <vm/vm.h> | |
23 | #include <nfs/nfsnode.h> | |
24 | #include <nfs/rpcv2.h> | |
25 | #include <nfs/nfsv2.h> | |
26 | #include <nfs/nfs.h> | |
27 | #include <nfs/nfsmount.h> | |
28 | #include <nfs/nqnfs.h> | |
39d108be RM |
29 | |
30 | /* True and false, how exciting */ | |
31 | #define TRUE 1 | |
32 | #define FALSE 0 | |
33 | ||
34 | /* | |
35 | * Vnode op for read using bio | |
36 | * Any similarity to readip() is purely coincidental | |
37 | */ | |
f0f1cbaa | 38 | nfs_bioread(vp, uio, ioflag, cred) |
39d108be | 39 | register struct vnode *vp; |
170bfd05 | 40 | register struct uio *uio; |
39d108be RM |
41 | int ioflag; |
42 | struct ucred *cred; | |
43 | { | |
9342689a | 44 | USES_VOP_GETATTR; |
39d108be | 45 | register struct nfsnode *np = VTONFS(vp); |
170bfd05 | 46 | register int biosize; |
39d108be RM |
47 | struct buf *bp; |
48 | struct vattr vattr; | |
2c5b44a2 KM |
49 | struct nfsmount *nmp; |
50 | daddr_t lbn, bn, rablock[NFS_MAXRAHEAD]; | |
51 | int rasize[NFS_MAXRAHEAD], nra, diff, error = 0; | |
52 | int n, on; | |
39d108be | 53 | |
d4e5799e KM |
54 | #ifdef lint |
55 | ioflag = ioflag; | |
56 | #endif /* lint */ | |
b40809cc | 57 | #ifdef DIAGNOSTIC |
39d108be RM |
58 | if (uio->uio_rw != UIO_READ) |
59 | panic("nfs_read mode"); | |
b40809cc | 60 | #endif |
39d108be | 61 | if (uio->uio_resid == 0) |
b71430cc | 62 | return (0); |
f0f1cbaa | 63 | if (uio->uio_offset < 0 && vp->v_type != VDIR) |
b71430cc | 64 | return (EINVAL); |
2c5b44a2 KM |
65 | nmp = VFSTONFS(vp->v_mount); |
66 | biosize = nmp->nm_rsize; | |
39d108be | 67 | /* |
2c5b44a2 KM |
68 | * For nfs, cache consistency can only be maintained approximately. |
69 | * Although RFC1094 does not specify the criteria, the following is | |
70 | * believed to be compatible with the reference port. | |
71 | * For nqnfs, full cache consistency is maintained within the loop. | |
72 | * For nfs: | |
39d108be RM |
73 | * If the file's modify time on the server has changed since the |
74 | * last read rpc or you have written to the file, | |
75 | * you may have lost data cache consistency with the | |
76 | * server, so flush all of the file's data out of the cache. | |
f0f1cbaa KM |
77 | * Then force a getattr rpc to ensure that you have up to date |
78 | * attributes. | |
2c5b44a2 KM |
79 | * The mount flag NFSMNT_MYWRITE says "Assume that my writes are |
80 | * the ones changing the modify time. | |
39d108be RM |
81 | * NB: This implies that cache data can be read when up to |
82 | * NFS_ATTRTIMEO seconds out of date. If you find that you need current | |
83 | * attributes this could be forced by setting n_attrstamp to 0 before | |
9342689a | 84 | * the VOP_GETATTR() call. |
39d108be | 85 | */ |
2c5b44a2 | 86 | if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { |
f0f1cbaa KM |
87 | if (np->n_flag & NMODIFIED) { |
88 | np->n_flag &= ~NMODIFIED; | |
2c5b44a2 KM |
89 | if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || |
90 | vp->v_type != VREG) | |
e19a2ad1 | 91 | vinvalbuf(vp, TRUE, cred, uio->uio_procp); |
f0f1cbaa KM |
92 | np->n_attrstamp = 0; |
93 | np->n_direofoffset = 0; | |
9342689a | 94 | if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) |
e8540f59 | 95 | return (error); |
7e11a0c9 | 96 | np->n_mtime = vattr.va_mtime.ts_sec; |
f0f1cbaa | 97 | } else { |
9342689a | 98 | if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) |
f0f1cbaa | 99 | return (error); |
7e11a0c9 | 100 | if (np->n_mtime != vattr.va_mtime.ts_sec) { |
f0f1cbaa | 101 | np->n_direofoffset = 0; |
e19a2ad1 | 102 | vinvalbuf(vp, TRUE, cred, uio->uio_procp); |
7e11a0c9 | 103 | np->n_mtime = vattr.va_mtime.ts_sec; |
f0f1cbaa | 104 | } |
39d108be RM |
105 | } |
106 | } | |
107 | do { | |
2c5b44a2 KM |
108 | |
109 | /* | |
110 | * Get a valid lease. If cached data is stale, flush it. | |
111 | */ | |
112 | if ((nmp->nm_flag & NFSMNT_NQNFS) && | |
113 | NQNFS_CKINVALID(vp, np, NQL_READ)) { | |
114 | do { | |
115 | error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); | |
116 | } while (error == NQNFS_EXPIRED); | |
117 | if (error) | |
118 | return (error); | |
e19a2ad1 | 119 | if (np->n_lrev != np->n_brev || |
2c5b44a2 KM |
120 | ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { |
121 | if (vp->v_type == VDIR) { | |
122 | np->n_direofoffset = 0; | |
123 | cache_purge(vp); | |
124 | } | |
125 | np->n_flag &= ~NMODIFIED; | |
e19a2ad1 | 126 | vinvalbuf(vp, TRUE, cred, uio->uio_procp); |
2c5b44a2 KM |
127 | np->n_brev = np->n_lrev; |
128 | } | |
129 | } | |
130 | if (np->n_flag & NQNFSNONCACHE) { | |
131 | switch (vp->v_type) { | |
132 | case VREG: | |
133 | error = nfs_readrpc(vp, uio, cred); | |
134 | break; | |
135 | case VLNK: | |
136 | error = nfs_readlinkrpc(vp, uio, cred); | |
137 | break; | |
138 | case VDIR: | |
139 | error = nfs_readdirrpc(vp, uio, cred); | |
140 | break; | |
141 | }; | |
142 | return (error); | |
143 | } | |
f0f1cbaa KM |
144 | switch (vp->v_type) { |
145 | case VREG: | |
e8540f59 | 146 | nfsstats.biocache_reads++; |
170bfd05 KM |
147 | lbn = uio->uio_offset / biosize; |
148 | on = uio->uio_offset & (biosize-1); | |
149 | n = MIN((unsigned)(biosize - on), uio->uio_resid); | |
39d108be RM |
150 | diff = np->n_size - uio->uio_offset; |
151 | if (diff <= 0) | |
b71430cc | 152 | return (error); |
39d108be RM |
153 | if (diff < n) |
154 | n = diff; | |
170bfd05 | 155 | bn = lbn*(biosize/DEV_BSIZE); |
2c5b44a2 KM |
156 | for (nra = 0; nra < nmp->nm_readahead && |
157 | (lbn + 1 + nra) * biosize < np->n_size; nra++) { | |
158 | rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE); | |
159 | rasize[nra] = biosize; | |
160 | } | |
161 | again: | |
162 | if (nra > 0 && lbn >= vp->v_lastr) | |
163 | error = breadn(vp, bn, biosize, rablock, rasize, nra, | |
39d108be RM |
164 | cred, &bp); |
165 | else | |
170bfd05 | 166 | error = bread(vp, bn, biosize, cred, &bp); |
2c5b44a2 KM |
167 | if (bp->b_validend > 0) { |
168 | if (on < bp->b_validoff || (on+n) > bp->b_validend) { | |
169 | bp->b_flags |= B_INVAL; | |
170 | if (bp->b_dirtyend > 0) { | |
171 | if ((bp->b_flags & B_DELWRI) == 0) | |
172 | panic("nfsbioread"); | |
173 | (void) bwrite(bp); | |
174 | } else | |
175 | brelse(bp); | |
176 | goto again; | |
177 | } | |
178 | } else { | |
179 | bp->b_validoff = 0; | |
180 | bp->b_validend = biosize - bp->b_resid; | |
181 | } | |
d1a28114 | 182 | vp->v_lastr = lbn; |
39d108be | 183 | if (bp->b_resid) { |
170bfd05 KM |
184 | diff = (on >= (biosize-bp->b_resid)) ? 0 : |
185 | (biosize-bp->b_resid-on); | |
f0f1cbaa | 186 | n = MIN(n, diff); |
39d108be | 187 | } |
f0f1cbaa KM |
188 | break; |
189 | case VLNK: | |
190 | nfsstats.biocache_readlinks++; | |
191 | on = 0; | |
192 | error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp); | |
193 | n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); | |
194 | break; | |
195 | case VDIR: | |
196 | nfsstats.biocache_readdirs++; | |
197 | on = 0; | |
b40809cc KM |
198 | error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp); |
199 | n = MIN(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); | |
f0f1cbaa KM |
200 | break; |
201 | }; | |
202 | if (error) { | |
203 | brelse(bp); | |
204 | return (error); | |
205 | } | |
2c5b44a2 KM |
206 | |
207 | /* | |
208 | * For nqnfs: | |
209 | * Must check for valid lease, since it may have expired while in | |
210 | * bread(). If expired, get a lease. | |
211 | * If data is stale, flush and try again. | |
212 | * nb: If a read rpc is done by bread() or breada() and there is | |
213 | * no valid lease, a get_lease request will be piggy backed. | |
214 | */ | |
215 | if (nmp->nm_flag & NFSMNT_NQNFS) { | |
216 | if (NQNFS_CKINVALID(vp, np, NQL_READ)) { | |
217 | do { | |
218 | error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); | |
219 | } while (error == NQNFS_EXPIRED); | |
220 | if (error) { | |
221 | brelse(bp); | |
222 | return (error); | |
223 | } | |
224 | if ((np->n_flag & NQNFSNONCACHE) || | |
e19a2ad1 | 225 | np->n_lrev != np->n_brev || |
2c5b44a2 KM |
226 | ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { |
227 | if (vp->v_type == VDIR) { | |
228 | np->n_direofoffset = 0; | |
229 | cache_purge(vp); | |
230 | } | |
231 | brelse(bp); | |
232 | np->n_flag &= ~NMODIFIED; | |
e19a2ad1 | 233 | vinvalbuf(vp, TRUE, cred, uio->uio_procp); |
2c5b44a2 KM |
234 | np->n_brev = np->n_lrev; |
235 | continue; | |
236 | } | |
237 | } else if ((np->n_flag & NQNFSNONCACHE) || | |
238 | ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { | |
239 | np->n_direofoffset = 0; | |
240 | brelse(bp); | |
241 | np->n_flag &= ~NMODIFIED; | |
e19a2ad1 | 242 | vinvalbuf(vp, TRUE, cred, uio->uio_procp); |
2c5b44a2 KM |
243 | np->n_brev = np->n_lrev; |
244 | continue; | |
245 | } | |
246 | } | |
f0f1cbaa KM |
247 | if (n > 0) |
248 | error = uiomove(bp->b_un.b_addr + on, (int)n, uio); | |
249 | switch (vp->v_type) { | |
250 | case VREG: | |
170bfd05 | 251 | if (n+on == biosize || uio->uio_offset == np->n_size) |
39d108be | 252 | bp->b_flags |= B_AGE; |
f0f1cbaa KM |
253 | break; |
254 | case VLNK: | |
255 | n = 0; | |
256 | break; | |
257 | case VDIR: | |
258 | uio->uio_offset = bp->b_blkno; | |
259 | break; | |
260 | }; | |
261 | brelse(bp); | |
39d108be | 262 | } while (error == 0 && uio->uio_resid > 0 && n != 0); |
39d108be RM |
263 | return (error); |
264 | } | |
265 | ||
266 | /* | |
267 | * Vnode op for write using bio | |
268 | */ | |
9342689a | 269 | nfs_write (ap) |
e19a2ad1 KM |
270 | struct vop_write_args /* { |
271 | struct vnode *a_vp; | |
272 | struct uio *a_uio; | |
273 | int a_ioflag; | |
274 | struct ucred *a_cred; | |
275 | } */ *ap; | |
39d108be | 276 | { |
9342689a | 277 | USES_VOP_GETATTR; |
170bfd05 | 278 | register int biosize; |
e19a2ad1 KM |
279 | register struct uio *uio = ap->a_uio; |
280 | struct proc *p = uio->uio_procp; | |
281 | register struct vnode *vp = ap->a_vp; | |
282 | struct nfsnode *np = VTONFS(vp); | |
283 | register struct ucred *cred = ap->a_cred; | |
284 | int ioflag = ap->a_ioflag; | |
39d108be | 285 | struct buf *bp; |
f0f1cbaa | 286 | struct vattr vattr; |
2c5b44a2 | 287 | struct nfsmount *nmp; |
39d108be | 288 | daddr_t lbn, bn; |
3c15394b | 289 | int n, on, error = 0; |
39d108be | 290 | |
b40809cc | 291 | #ifdef DIAGNOSTIC |
e19a2ad1 | 292 | if (uio->uio_rw != UIO_WRITE) |
f0f1cbaa | 293 | panic("nfs_write mode"); |
e19a2ad1 | 294 | if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) |
b40809cc KM |
295 | panic("nfs_write proc"); |
296 | #endif | |
e19a2ad1 | 297 | if (vp->v_type != VREG) |
f0f1cbaa | 298 | return (EIO); |
9b61ab4a KM |
299 | if (np->n_flag & NWRITEERR) { |
300 | np->n_flag &= ~NWRITEERR; | |
301 | return (np->n_error); | |
302 | } | |
e19a2ad1 | 303 | if (ioflag & (IO_APPEND | IO_SYNC)) { |
225498c1 KM |
304 | if (np->n_flag & NMODIFIED) { |
305 | np->n_flag &= ~NMODIFIED; | |
e19a2ad1 | 306 | vinvalbuf(vp, TRUE, cred, p); |
225498c1 | 307 | } |
e19a2ad1 | 308 | if (ioflag & IO_APPEND) { |
225498c1 | 309 | np->n_attrstamp = 0; |
e19a2ad1 | 310 | if (error = VOP_GETATTR(vp, &vattr, cred, p)) |
225498c1 | 311 | return (error); |
e19a2ad1 | 312 | uio->uio_offset = np->n_size; |
225498c1 KM |
313 | } |
314 | } | |
e19a2ad1 KM |
315 | nmp = VFSTONFS(vp->v_mount); |
316 | if (uio->uio_offset < 0) | |
b71430cc | 317 | return (EINVAL); |
e19a2ad1 | 318 | if (uio->uio_resid == 0) |
b71430cc | 319 | return (0); |
39d108be RM |
320 | /* |
321 | * Maybe this should be above the vnode op call, but so long as | |
322 | * file servers have no limits, i don't think it matters | |
323 | */ | |
e19a2ad1 | 324 | if (p && uio->uio_offset + uio->uio_resid > |
4547a923 MK |
325 | p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { |
326 | psignal(p, SIGXFSZ); | |
b71430cc | 327 | return (EFBIG); |
39d108be | 328 | } |
170bfd05 KM |
329 | /* |
330 | * I use nm_rsize, not nm_wsize so that all buffer cache blocks | |
331 | * will be the same size within a filesystem. nfs_writerpc will | |
332 | * still use nm_wsize when sizing the rpc's. | |
333 | */ | |
2c5b44a2 | 334 | biosize = nmp->nm_rsize; |
f0f1cbaa | 335 | np->n_flag |= NMODIFIED; |
39d108be | 336 | do { |
2c5b44a2 KM |
337 | |
338 | /* | |
339 | * Check for a valid write lease. | |
340 | * If non-cachable, just do the rpc | |
341 | */ | |
342 | if ((nmp->nm_flag & NFSMNT_NQNFS) && | |
e19a2ad1 | 343 | NQNFS_CKINVALID(vp, np, NQL_WRITE)) { |
2c5b44a2 | 344 | do { |
e19a2ad1 | 345 | error = nqnfs_getlease(vp, NQL_WRITE, cred, p); |
2c5b44a2 KM |
346 | } while (error == NQNFS_EXPIRED); |
347 | if (error) | |
348 | return (error); | |
e19a2ad1 | 349 | if (np->n_lrev != np->n_brev || |
2c5b44a2 | 350 | (np->n_flag & NQNFSNONCACHE)) { |
e19a2ad1 | 351 | vinvalbuf(vp, TRUE, cred, p); |
2c5b44a2 KM |
352 | np->n_brev = np->n_lrev; |
353 | } | |
354 | } | |
355 | if (np->n_flag & NQNFSNONCACHE) | |
e19a2ad1 | 356 | return (nfs_writerpc(vp, uio, cred)); |
e8540f59 | 357 | nfsstats.biocache_writes++; |
e19a2ad1 KM |
358 | lbn = uio->uio_offset / biosize; |
359 | on = uio->uio_offset & (biosize-1); | |
360 | n = MIN((unsigned)(biosize - on), uio->uio_resid); | |
361 | if (uio->uio_offset + n > np->n_size) { | |
362 | np->n_size = uio->uio_offset + n; | |
363 | vnode_pager_setsize(vp, (u_long)np->n_size); | |
8986c97c | 364 | } |
2c5b44a2 | 365 | bn = lbn * (biosize / DEV_BSIZE); |
141671b8 | 366 | again: |
e19a2ad1 | 367 | bp = getblk(vp, bn, biosize); |
39d108be | 368 | if (bp->b_wcred == NOCRED) { |
e19a2ad1 KM |
369 | crhold(cred); |
370 | bp->b_wcred = cred; | |
39d108be | 371 | } |
2c5b44a2 KM |
372 | |
373 | /* | |
374 | * If the new write will leave a contiguous dirty | |
375 | * area, just update the b_dirtyoff and b_dirtyend, | |
376 | * otherwise force a write rpc of the old dirty area. | |
377 | */ | |
378 | if (bp->b_dirtyend > 0 && | |
379 | (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { | |
380 | bp->b_proc = p; | |
381 | if (error = bwrite(bp)) | |
382 | return (error); | |
383 | goto again; | |
384 | } | |
385 | ||
386 | /* | |
387 | * Check for valid write lease and get one as required. | |
388 | * In case getblk() and/or bwrite() delayed us. | |
389 | */ | |
390 | if ((nmp->nm_flag & NFSMNT_NQNFS) && | |
e19a2ad1 | 391 | NQNFS_CKINVALID(vp, np, NQL_WRITE)) { |
2c5b44a2 | 392 | do { |
e19a2ad1 | 393 | error = nqnfs_getlease(vp, NQL_WRITE, cred, p); |
2c5b44a2 KM |
394 | } while (error == NQNFS_EXPIRED); |
395 | if (error) { | |
396 | brelse(bp); | |
397 | return (error); | |
398 | } | |
e19a2ad1 | 399 | if (np->n_lrev != np->n_brev || |
2c5b44a2 | 400 | (np->n_flag & NQNFSNONCACHE)) { |
e19a2ad1 | 401 | vinvalbuf(vp, TRUE, cred, p); |
2c5b44a2 | 402 | np->n_brev = np->n_lrev; |
39d108be | 403 | } |
39d108be | 404 | } |
e19a2ad1 | 405 | if (error = uiomove(bp->b_un.b_addr + on, n, uio)) { |
141671b8 | 406 | brelse(bp); |
b71430cc | 407 | return (error); |
141671b8 | 408 | } |
2c5b44a2 KM |
409 | if (bp->b_dirtyend > 0) { |
410 | bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); | |
411 | bp->b_dirtyend = MAX((on+n), bp->b_dirtyend); | |
412 | } else { | |
413 | bp->b_dirtyoff = on; | |
414 | bp->b_dirtyend = on+n; | |
415 | } | |
416 | if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || | |
417 | bp->b_validoff > bp->b_dirtyend) { | |
418 | bp->b_validoff = bp->b_dirtyoff; | |
419 | bp->b_validend = bp->b_dirtyend; | |
420 | } else { | |
421 | bp->b_validoff = MIN(bp->b_validoff, bp->b_dirtyoff); | |
422 | bp->b_validend = MAX(bp->b_validend, bp->b_dirtyend); | |
423 | } | |
424 | ||
425 | /* | |
426 | * If the lease is non-cachable or IO_SYNC do bwrite(). | |
427 | */ | |
e19a2ad1 | 428 | if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { |
2c5b44a2 KM |
429 | bp->b_proc = p; |
430 | bwrite(bp); | |
431 | } else if ((n+on) == biosize && | |
432 | (nmp->nm_flag & NFSMNT_NQNFS) == 0) { | |
39d108be | 433 | bp->b_flags |= B_AGE; |
f0f1cbaa | 434 | bp->b_proc = (struct proc *)0; |
39d108be RM |
435 | bawrite(bp); |
436 | } else { | |
f0f1cbaa | 437 | bp->b_proc = (struct proc *)0; |
39d108be RM |
438 | bdwrite(bp); |
439 | } | |
e19a2ad1 | 440 | } while (error == 0 && uio->uio_resid > 0 && n != 0); |
39d108be RM |
441 | return (error); |
442 | } |