locking now done above vnode level; read/write no longer take an offp
[unix-history] / usr / src / sys / nfs / nfs_bio.c
/*
* Copyright (c) 1989 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Rick Macklem at The University of Guelph.
*
* Redistribution and use in source and binary forms are permitted
* provided that the above copyright notice and this paragraph are
* duplicated in all such forms and that any documentation,
* advertising materials, and other materials related to such
* distribution and use acknowledge that the software was developed
* by the University of California, Berkeley. The name of the
* University may not be used to endorse or promote products derived
* from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* @(#)nfs_bio.c 7.5 (Berkeley) %G%
*/
#include "param.h"
#include "user.h"
#include "buf.h"
#include "vnode.h"
#include "trace.h"
#include "mount.h"
#include "nfsnode.h"
#include "nfsiom.h"
/* True and false, how exciting */
#define TRUE 1
#define FALSE 0
/*
* Vnode op for read using bio
* Any similarity to readip() is purely coincidental
*/
nfs_read(vp, uio, ioflag, cred)
register struct vnode *vp;
struct uio *uio;
int ioflag;
struct ucred *cred;
{
register struct nfsnode *np = VTONFS(vp);
struct buf *bp;
struct vattr vattr;
daddr_t lbn, bn, rablock;
int diff, error = 0;
long n, on;
/*
* Avoid caching directories. Once everything is using getdirentries()
* this will never happen anyhow.
*/
if (vp->v_type == VDIR)
return (nfs_readrpc(vp, uio, cred));
if (uio->uio_rw != UIO_READ)
panic("nfs_read mode");
if (vp->v_type != VREG)
panic("nfs_read type");
if (uio->uio_resid == 0)
return (0);
if (uio->uio_offset < 0)
return (EINVAL);
/*
* If the file's modify time on the server has changed since the
* last read rpc or you have written to the file,
* you may have lost data cache consistency with the
* server, so flush all of the file's data out of the cache.
* This will implicitly bring the modify time up to date, since
* up to date attributes are returned in the reply to any write rpc's
* NB: This implies that cache data can be read when up to
* NFS_ATTRTIMEO seconds out of date. If you find that you need current
* attributes this could be forced by setting n_attrstamp to 0 before
* the nfs_getattr() call.
*/
if (np->n_flag & NMODIFIED) {
np->n_flag &= ~NMODIFIED;
if (error = nfs_blkflush(vp, (daddr_t)0, np->n_size, TRUE))
return (error);
if (error = nfs_getattr(vp, &vattr, cred))
return (error);
np->n_mtime = vattr.va_mtime.tv_sec;
} else {
if (error = nfs_getattr(vp, &vattr, cred))
return (error);
if (np->n_mtime != vattr.va_mtime.tv_sec) {
if (error = nfs_blkflush(vp, (daddr_t)0,
np->n_size, TRUE))
return (error);
np->n_mtime = vattr.va_mtime.tv_sec;
}
}
np->n_flag |= NBUFFERED;
do {
lbn = uio->uio_offset >> NFS_BIOSHIFT;
on = uio->uio_offset & (NFS_BIOSIZE-1);
n = MIN((unsigned)(NFS_BIOSIZE - on), uio->uio_resid);
diff = np->n_size - uio->uio_offset;
if (diff <= 0)
return (error);
if (diff < n)
n = diff;
bn = lbn*(NFS_BIOSIZE/DEV_BSIZE);
rablock = (lbn+1)*(NFS_BIOSIZE/DEV_BSIZE);
if (np->n_lastr+1 == lbn && np->n_size > (rablock*DEV_BSIZE))
error = breada(vp, bn, NFS_BIOSIZE, rablock, NFS_BIOSIZE,
cred, &bp);
else
error = bread(vp, bn, NFS_BIOSIZE, cred, &bp);
np->n_lastr = lbn;
if (bp->b_resid) {
diff = (on >= (NFS_BIOSIZE-bp->b_resid)) ? 0 :
(NFS_BIOSIZE-bp->b_resid-on);
n = MIN(n, diff);
}
if (error) {
brelse(bp);
return (error);
}
if (n > 0)
error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
if (n+on == NFS_BIOSIZE || uio->uio_offset == np->n_size)
bp->b_flags |= B_AGE;
brelse(bp);
} while (error == 0 && uio->uio_resid > 0 && n != 0);
return (error);
}
/*
* Vnode op for write using bio
*/
nfs_write(vp, uio, ioflag, cred)
register struct vnode *vp;
register struct uio *uio;
int ioflag;
struct ucred *cred;
{
struct buf *bp;
struct nfsnode *np = VTONFS(vp);
daddr_t lbn, bn;
int i, n, on, count, error = 0;
/* Should we try and do this ?? */
if (vp->v_type == VREG && (ioflag & IO_APPEND))
uio->uio_offset = np->n_size;
#ifdef notdef
cnt = uio->uio_resid;
osize = np->n_size;
#endif
if (uio->uio_rw != UIO_WRITE)
panic("nfs_write mode");
if (vp->v_type != VREG)
panic("nfs_write type");
if (uio->uio_offset < 0)
return (EINVAL);
if (uio->uio_resid == 0)
return (0);
/*
* Maybe this should be above the vnode op call, but so long as
* file servers have no limits, i don't think it matters
*/
if (vp->v_type == VREG &&
uio->uio_offset + uio->uio_resid >
u.u_rlimit[RLIMIT_FSIZE].rlim_cur) {
psignal(u.u_procp, SIGXFSZ);
return (EFBIG);
}
np->n_flag |= (NMODIFIED|NBUFFERED);
do {
lbn = uio->uio_offset >> NFS_BIOSHIFT;
on = uio->uio_offset & (NFS_BIOSIZE-1);
n = MIN((unsigned)(NFS_BIOSIZE - on), uio->uio_resid);
if (uio->uio_offset+n > np->n_size)
np->n_size = uio->uio_offset+n;
bn = lbn*(NFS_BIOSIZE/DEV_BSIZE);
count = howmany(NFS_BIOSIZE, CLBYTES);
for (i = 0; i < count; i++)
munhash(vp, bn + i * CLBYTES / DEV_BSIZE);
bp = getblk(vp, bn, NFS_BIOSIZE);
if (bp->b_wcred == NOCRED) {
crhold(cred);
bp->b_wcred = cred;
}
if (bp->b_dirtyend > 0) {
/*
* If the new write will leave a contiguous
* dirty area, just update the b_dirtyoff and
* b_dirtyend
* otherwise force a write rpc of the old dirty
* area
*/
if (on <= bp->b_dirtyend && (on+n) >= bp->b_dirtyoff) {
bp->b_dirtyoff = MIN(on, bp->b_dirtyoff);
bp->b_dirtyend = MAX((on+n), bp->b_dirtyend);
} else {
/*
* Like bwrite() but without the brelse
*/
bp->b_flags &= ~(B_READ | B_DONE |
B_ERROR | B_DELWRI | B_ASYNC);
u.u_ru.ru_oublock++;
VOP_STRATEGY(bp);
error = biowait(bp);
if (bp->b_flags & B_ERROR) {
brelse(bp);
if (bp->b_error)
error = bp->b_error;
else
error = EIO;
return (error);
}
bp->b_dirtyoff = on;
bp->b_dirtyend = on+n;
}
} else {
bp->b_dirtyoff = on;
bp->b_dirtyend = on+n;
}
if (error = uiomove(bp->b_un.b_addr + on, n, uio))
return (error);
if ((n+on) == NFS_BIOSIZE) {
bp->b_flags |= B_AGE;
bawrite(bp);
} else {
bdwrite(bp);
}
} while (error == 0 && uio->uio_resid > 0 && n != 0);
#ifdef notdef
/* Should we try and do this for nfs ?? */
if (error && (ioflag & IO_UNIT)) {
np->n_size = osize;
uio->uio_offset -= cnt - uio->uio_resid;
uio->uio_resid = cnt;
}
#endif
return (error);
}
/*
* Flush and invalidate all of the buffers associated with the blocks of vp
*/
nfs_blkflush(vp, blkno, size, invalidate)
struct vnode *vp;
daddr_t blkno;
long size;
int invalidate;
{
register struct buf *ep;
struct buf *dp;
daddr_t curblk, nextblk, ecurblk, lastblk;
int s, error, allerrors = 0;
/*
* Iterate through each possible hash chain.
*/
lastblk = blkno + btodb(size+DEV_BSIZE-1) - 1;
for (curblk = blkno; curblk <= lastblk; curblk = nextblk) {
#if RND & (RND-1)
nextblk = ((curblk / RND) + 1) * RND;
#else
nextblk = ((curblk & ~(RND-1)) + RND);
#endif
ecurblk = nextblk > lastblk ? lastblk : nextblk - 1;
dp = BUFHASH(vp, curblk);
loop:
for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
if (ep->b_vp != vp || (ep->b_flags & B_INVAL))
continue;
/* look for overlap */
if (ep->b_bcount == 0 || ep->b_blkno > ecurblk ||
ep->b_blkno + btodb(ep->b_bcount) <= curblk)
continue;
s = splbio();
if (ep->b_flags&B_BUSY) {
ep->b_flags |= B_WANTED;
sleep((caddr_t)ep, PRIBIO+1);
splx(s);
goto loop;
}
if (ep->b_flags & B_DELWRI) {
splx(s);
notavail(ep);
if (error = bwrite(ep))
allerrors = error;
goto loop;
}
splx(s);
if (invalidate) {
notavail(ep);
ep->b_flags |= B_INVAL;
brelvp(ep);
brelse(ep);
}
}
}
return (allerrors);
}