enable clustered read/write by default as we now believe it works;
[unix-history] / usr / src / sys / ufs / ffs / ffs_vnops.c
CommitLineData
da7c5cc6 1/*
7188ac27
KM
2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3 * All rights reserved.
da7c5cc6 4 *
b702c21d 5 * %sccs.include.redist.c%
7188ac27 6 *
c89a1368 7 * @(#)ffs_vnops.c 7.101 (Berkeley) %G%
a6673c09
KB
8 */
9
10#include <sys/param.h>
11#include <sys/systm.h>
a6673c09
KB
12#include <sys/resourcevar.h>
13#include <sys/kernel.h>
14#include <sys/file.h>
15#include <sys/stat.h>
16#include <sys/buf.h>
17#include <sys/proc.h>
18#include <sys/conf.h>
19#include <sys/mount.h>
20#include <sys/vnode.h>
a6673c09
KB
21#include <sys/malloc.h>
22
80e9de4e
KM
23#include <vm/vm.h>
24
41d6cb49
KM
25#include <miscfs/specfs/specdev.h>
26#include <miscfs/fifofs/fifo.h>
27
a6673c09
KB
28#include <ufs/ufs/lockf.h>
29#include <ufs/ufs/quota.h>
30#include <ufs/ufs/inode.h>
31#include <ufs/ufs/dir.h>
32#include <ufs/ufs/ufs_extern.h>
33
34#include <ufs/ffs/fs.h>
35#include <ufs/ffs/ffs_extern.h>
36
37/* Global vfs data structures for ufs. */
9342689a
JH
38int (**ffs_vnodeop_p)();
39struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
40 { &vop_default_desc, vn_default_error },
41 { &vop_lookup_desc, ufs_lookup }, /* lookup */
42 { &vop_create_desc, ufs_create }, /* create */
aef53d88
JH
43 { &vop_mknod_desc, ufs_mknod }, /* mknod */
44 { &vop_open_desc, ufs_open }, /* open */
45 { &vop_close_desc, ufs_close }, /* close */
9342689a
JH
46 { &vop_access_desc, ufs_access }, /* access */
47 { &vop_getattr_desc, ufs_getattr }, /* getattr */
48 { &vop_setattr_desc, ufs_setattr }, /* setattr */
aef53d88
JH
49 { &vop_read_desc, ffs_read }, /* read */
50 { &vop_write_desc, ffs_write }, /* write */
51 { &vop_ioctl_desc, ufs_ioctl }, /* ioctl */
9342689a 52 { &vop_select_desc, ufs_select }, /* select */
aef53d88
JH
53 { &vop_mmap_desc, ufs_mmap }, /* mmap */
54 { &vop_fsync_desc, ffs_fsync }, /* fsync */
55 { &vop_seek_desc, ufs_seek }, /* seek */
9342689a 56 { &vop_remove_desc, ufs_remove }, /* remove */
aef53d88 57 { &vop_link_desc, ufs_link }, /* link */
9342689a 58 { &vop_rename_desc, ufs_rename }, /* rename */
aef53d88
JH
59 { &vop_mkdir_desc, ufs_mkdir }, /* mkdir */
60 { &vop_rmdir_desc, ufs_rmdir }, /* rmdir */
9342689a
JH
61 { &vop_symlink_desc, ufs_symlink }, /* symlink */
62 { &vop_readdir_desc, ufs_readdir }, /* readdir */
63 { &vop_readlink_desc, ufs_readlink }, /* readlink */
64 { &vop_abortop_desc, ufs_abortop }, /* abortop */
8ab48e83 65 { &vop_inactive_desc, ufs_inactive }, /* inactive */
9342689a 66 { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */
aef53d88 67 { &vop_lock_desc, ufs_lock }, /* lock */
9342689a 68 { &vop_unlock_desc, ufs_unlock }, /* unlock */
0ae79846 69 { &vop_bmap_desc, ufs_bmap }, /* bmap */
9342689a 70 { &vop_strategy_desc, ufs_strategy }, /* strategy */
aef53d88 71 { &vop_print_desc, ufs_print }, /* print */
9342689a
JH
72 { &vop_islocked_desc, ufs_islocked }, /* islocked */
73 { &vop_advlock_desc, ufs_advlock }, /* advlock */
74 { &vop_blkatoff_desc, ffs_blkatoff }, /* blkatoff */
9342689a 75 { &vop_valloc_desc, ffs_valloc }, /* valloc */
aef53d88 76 { &vop_vfree_desc, ffs_vfree }, /* vfree */
9342689a
JH
77 { &vop_truncate_desc, ffs_truncate }, /* truncate */
78 { &vop_update_desc, ffs_update }, /* update */
36c34e72 79 { &vop_bwrite_desc, vn_bwrite },
9342689a 80 { (struct vnodeop_desc*)NULL, (int(*)())NULL }
a6673c09 81};
9342689a
JH
82struct vnodeopv_desc ffs_vnodeop_opv_desc =
83 { &ffs_vnodeop_p, ffs_vnodeop_entries };
528f664c 84
9342689a
JH
85int (**ffs_specop_p)();
86struct vnodeopv_entry_desc ffs_specop_entries[] = {
87 { &vop_default_desc, vn_default_error },
88 { &vop_lookup_desc, spec_lookup }, /* lookup */
89 { &vop_create_desc, spec_create }, /* create */
90 { &vop_mknod_desc, spec_mknod }, /* mknod */
aef53d88 91 { &vop_open_desc, spec_open }, /* open */
9342689a
JH
92 { &vop_close_desc, ufsspec_close }, /* close */
93 { &vop_access_desc, ufs_access }, /* access */
94 { &vop_getattr_desc, ufs_getattr }, /* getattr */
95 { &vop_setattr_desc, ufs_setattr }, /* setattr */
96 { &vop_read_desc, ufsspec_read }, /* read */
97 { &vop_write_desc, ufsspec_write }, /* write */
98 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
99 { &vop_select_desc, spec_select }, /* select */
aef53d88 100 { &vop_mmap_desc, spec_mmap }, /* mmap */
06337661 101 { &vop_fsync_desc, ffs_fsync }, /* fsync */
aef53d88 102 { &vop_seek_desc, spec_seek }, /* seek */
9342689a 103 { &vop_remove_desc, spec_remove }, /* remove */
aef53d88 104 { &vop_link_desc, spec_link }, /* link */
9342689a
JH
105 { &vop_rename_desc, spec_rename }, /* rename */
106 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
107 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
108 { &vop_symlink_desc, spec_symlink }, /* symlink */
109 { &vop_readdir_desc, spec_readdir }, /* readdir */
110 { &vop_readlink_desc, spec_readlink }, /* readlink */
111 { &vop_abortop_desc, spec_abortop }, /* abortop */
8ab48e83 112 { &vop_inactive_desc, ufs_inactive }, /* inactive */
9342689a 113 { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */
aef53d88 114 { &vop_lock_desc, ufs_lock }, /* lock */
9342689a 115 { &vop_unlock_desc, ufs_unlock }, /* unlock */
aef53d88 116 { &vop_bmap_desc, spec_bmap }, /* bmap */
9342689a 117 { &vop_strategy_desc, spec_strategy }, /* strategy */
aef53d88 118 { &vop_print_desc, ufs_print }, /* print */
9342689a
JH
119 { &vop_islocked_desc, ufs_islocked }, /* islocked */
120 { &vop_advlock_desc, spec_advlock }, /* advlock */
121 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
9342689a 122 { &vop_valloc_desc, spec_valloc }, /* valloc */
bccabe5d 123 { &vop_vfree_desc, ffs_vfree }, /* vfree */
9342689a
JH
124 { &vop_truncate_desc, spec_truncate }, /* truncate */
125 { &vop_update_desc, ffs_update }, /* update */
36c34e72 126 { &vop_bwrite_desc, vn_bwrite },
9342689a 127 { (struct vnodeop_desc*)NULL, (int(*)())NULL }
9d5097aa 128};
9342689a
JH
129struct vnodeopv_desc ffs_specop_opv_desc =
130 { &ffs_specop_p, ffs_specop_entries };
9d5097aa
KM
131
132#ifdef FIFO
9342689a
JH
133int (**ffs_fifoop_p)();
134struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
135 { &vop_default_desc, vn_default_error },
136 { &vop_lookup_desc, fifo_lookup }, /* lookup */
137 { &vop_create_desc, fifo_create }, /* create */
138 { &vop_mknod_desc, fifo_mknod }, /* mknod */
aef53d88 139 { &vop_open_desc, fifo_open }, /* open */
9342689a
JH
140 { &vop_close_desc, ufsfifo_close }, /* close */
141 { &vop_access_desc, ufs_access }, /* access */
142 { &vop_getattr_desc, ufs_getattr }, /* getattr */
143 { &vop_setattr_desc, ufs_setattr }, /* setattr */
144 { &vop_read_desc, ufsfifo_read }, /* read */
145 { &vop_write_desc, ufsfifo_write }, /* write */
146 { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */
147 { &vop_select_desc, fifo_select }, /* select */
aef53d88 148 { &vop_mmap_desc, fifo_mmap }, /* mmap */
06337661 149 { &vop_fsync_desc, ffs_fsync }, /* fsync */
aef53d88 150 { &vop_seek_desc, fifo_seek }, /* seek */
9342689a 151 { &vop_remove_desc, fifo_remove }, /* remove */
aef53d88 152 { &vop_link_desc, fifo_link }, /* link */
9342689a
JH
153 { &vop_rename_desc, fifo_rename }, /* rename */
154 { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */
155 { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */
156 { &vop_symlink_desc, fifo_symlink }, /* symlink */
157 { &vop_readdir_desc, fifo_readdir }, /* readdir */
158 { &vop_readlink_desc, fifo_readlink }, /* readlink */
159 { &vop_abortop_desc, fifo_abortop }, /* abortop */
8ab48e83 160 { &vop_inactive_desc, ufs_inactive }, /* inactive */
9342689a 161 { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */
aef53d88 162 { &vop_lock_desc, ufs_lock }, /* lock */
9342689a 163 { &vop_unlock_desc, ufs_unlock }, /* unlock */
aef53d88 164 { &vop_bmap_desc, fifo_bmap }, /* bmap */
9342689a 165 { &vop_strategy_desc, fifo_strategy }, /* strategy */
aef53d88 166 { &vop_print_desc, ufs_print }, /* print */
9342689a
JH
167 { &vop_islocked_desc, ufs_islocked }, /* islocked */
168 { &vop_advlock_desc, fifo_advlock }, /* advlock */
169 { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */
9342689a 170 { &vop_valloc_desc, fifo_valloc }, /* valloc */
bccabe5d 171 { &vop_vfree_desc, ffs_vfree }, /* vfree */
9342689a
JH
172 { &vop_truncate_desc, fifo_truncate }, /* truncate */
173 { &vop_update_desc, ffs_update }, /* update */
36c34e72 174 { &vop_bwrite_desc, vn_bwrite },
9342689a 175 { (struct vnodeop_desc*)NULL, (int(*)())NULL }
9d5097aa 176};
9342689a
JH
177struct vnodeopv_desc ffs_fifoop_opv_desc =
178 { &ffs_fifoop_p, ffs_fifoop_entries };
9d5097aa
KM
179#endif /* FIFO */
180
02abfdb0
KM
181/*
182 * Enabling cluster read/write operations.
02abfdb0 183 */
71d600b8
KM
184#include <sys/sysctl.h>
185int doclusterread = 1;
186struct ctldebug debug11 = { "doclusterread", &doclusterread };
187int doclusterwrite = 1;
188struct ctldebug debug12 = { "doclusterwrite", &doclusterwrite };
d67a03eb 189
b373e060
KM
190/*
191 * Vnode op for reading.
192 */
193/* ARGSUSED */
cc1475d2 194ffs_read(ap)
06337661
KM
195 struct vop_read_args /* {
196 struct vnode *a_vp;
197 struct uio *a_uio;
198 int a_ioflag;
199 struct ucred *a_cred;
200 } */ *ap;
b373e060 201{
cc1475d2
KM
202 register struct vnode *vp = ap->a_vp;
203 register struct inode *ip = VTOI(vp);
204 register struct uio *uio = ap->a_uio;
b373e060
KM
205 register struct fs *fs;
206 struct buf *bp;
207 daddr_t lbn, bn, rablock;
b7077901 208 off_t diff;
91bf7fb4 209 int type, rasize, error = 0;
1ca49178 210 long size, n, on;
b373e060 211
91bf7fb4 212 type = ip->i_mode & IFMT;
5b169cb7 213#ifdef DIAGNOSTIC
cc1475d2 214 if (uio->uio_rw != UIO_READ)
a6673c09 215 panic("ffs_read mode");
b373e060 216 if (type != IFDIR && type != IFREG && type != IFLNK)
a6673c09 217 panic("ffs_read type");
517ac0f1 218 if (type == IFLNK && (int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
1c985e53 219 panic("read short symlink");
5b169cb7 220#endif
cc1475d2 221 if (uio->uio_resid == 0)
b373e060 222 return (0);
cc1475d2 223 fs = ip->i_fs;
3d2d8c1a
KM
224 if ((u_quad_t)uio->uio_offset > fs->fs_maxfilesize)
225 return (EFBIG);
b373e060 226 ip->i_flag |= IACC;
b373e060 227 do {
cc1475d2
KM
228 lbn = lblkno(fs, uio->uio_offset);
229 on = blkoff(fs, uio->uio_offset);
ffbb1df8 230 n = min((unsigned)(fs->fs_bsize - on), uio->uio_resid);
cc1475d2 231 diff = ip->i_size - uio->uio_offset;
b373e060
KM
232 if (diff <= 0)
233 return (0);
234 if (diff < n)
235 n = diff;
b373e060 236 size = blksize(fs, ip, lbn);
02abfdb0
KM
237 rablock = lbn + 1;
238 if (doclusterread && lblktosize(fs, rablock) <= ip->i_size) {
52ee9502
MS
239 error = cluster_read(vp, ip->i_size, lbn, size,
240 NOCRED, &bp);
02abfdb0
KM
241 } else if (vp->v_lastr + 1 == lbn &&
242 lblktosize(fs, rablock) < ip->i_size) {
243 rasize = blksize(fs, ip, rablock);
244 error = breadn(vp, lbn, size, &rablock,
245 &rasize, 1, NOCRED, &bp);
246 } else
cc1475d2
KM
247 error = bread(vp, lbn, size, NOCRED, &bp);
248 vp->v_lastr = lbn;
ffbb1df8 249 n = min(n, size - bp->b_resid);
b373e060
KM
250 if (error) {
251 brelse(bp);
252 return (error);
253 }
cc1475d2 254 error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
91bf7fb4
KM
255 if (type == IFREG &&
256 (n + on == fs->fs_bsize || uio->uio_offset == ip->i_size))
b373e060
KM
257 bp->b_flags |= B_AGE;
258 brelse(bp);
cc1475d2 259 } while (error == 0 && uio->uio_resid > 0 && n != 0);
b373e060
KM
260 return (error);
261}
262
263/*
264 * Vnode op for writing.
265 */
cc1475d2 266ffs_write(ap)
06337661
KM
267 struct vop_write_args /* {
268 struct vnode *a_vp;
269 struct uio *a_uio;
270 int a_ioflag;
271 struct ucred *a_cred;
272 } */ *ap;
b373e060 273{
406c9a0d 274 register struct vnode *vp = ap->a_vp;
cc1475d2 275 register struct uio *uio = ap->a_uio;
406c9a0d 276 register struct inode *ip = VTOI(vp);
b373e060 277 register struct fs *fs;
cc1475d2
KM
278 struct proc *p = uio->uio_procp;
279 int ioflag = ap->a_ioflag;
d056f176 280 struct timeval tv;
b373e060
KM
281 struct buf *bp;
282 daddr_t lbn, bn;
0308fc84 283 off_t osize;
8986c97c
KM
284 int n, on, flags;
285 int size, resid, error = 0;
b373e060 286
5b169cb7 287#ifdef DIAGNOSTIC
cc1475d2 288 if (uio->uio_rw != UIO_WRITE)
a6673c09 289 panic("ffs_write mode");
5b169cb7 290#endif
406c9a0d 291 switch (vp->v_type) {
b373e060 292 case VREG:
cc1475d2
KM
293 if (ioflag & IO_APPEND)
294 uio->uio_offset = ip->i_size;
e23a5fff
KM
295 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
296 return (EPERM);
b373e060
KM
297 /* fall through */
298 case VLNK:
299 break;
300
301 case VDIR:
cc1475d2 302 if ((ioflag & IO_SYNC) == 0)
a6673c09 303 panic("ffs_write nonsync dir write");
b373e060
KM
304 break;
305
306 default:
a6673c09 307 panic("ffs_write type");
b373e060 308 }
cc1475d2 309 if (uio->uio_resid == 0)
b373e060 310 return (0);
c2dc2eb9 311 fs = ip->i_fs;
592a05b4
KM
312 if (uio->uio_offset < 0 ||
313 (u_quad_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
c2dc2eb9 314 return (EFBIG);
b373e060
KM
315 /*
316 * Maybe this should be above the vnode op call, but so long as
317 * file servers have no limits, i don't think it matters
318 */
406c9a0d 319 if (vp->v_type == VREG && p &&
cc1475d2 320 uio->uio_offset + uio->uio_resid >
c6f5111d
MK
321 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
322 psignal(p, SIGXFSZ);
b373e060
KM
323 return (EFBIG);
324 }
cc1475d2 325 resid = uio->uio_resid;
b373e060 326 osize = ip->i_size;
e16fa59e 327 flags = 0;
cc1475d2 328 if (ioflag & IO_SYNC)
e16fa59e 329 flags = B_SYNC;
b373e060 330 do {
cc1475d2
KM
331 lbn = lblkno(fs, uio->uio_offset);
332 on = blkoff(fs, uio->uio_offset);
ffbb1df8 333 n = min((unsigned)(fs->fs_bsize - on), uio->uio_resid);
b373e060 334 if (n < fs->fs_bsize)
e16fa59e 335 flags |= B_CLRBUF;
b373e060 336 else
e16fa59e 337 flags &= ~B_CLRBUF;
e1b76915 338 if (error = ffs_balloc(ip, lbn, on + n, ap->a_cred, &bp, flags))
b373e060 339 break;
e16fa59e 340 bn = bp->b_blkno;
cc1475d2
KM
341 if (uio->uio_offset + n > ip->i_size) {
342 ip->i_size = uio->uio_offset + n;
406c9a0d 343 vnode_pager_setsize(vp, (u_long)ip->i_size);
8986c97c 344 }
b373e060 345 size = blksize(fs, ip, lbn);
406c9a0d 346 (void) vnode_pager_uncache(vp);
ffbb1df8 347 n = min(n, size - bp->b_resid);
cc1475d2
KM
348 error = uiomove(bp->b_un.b_addr + on, n, uio);
349 if (ioflag & IO_SYNC)
b373e060
KM
350 (void) bwrite(bp);
351 else if (n + on == fs->fs_bsize) {
02abfdb0
KM
352 if (doclusterwrite) {
353 cluster_write(bp, ip->i_size);
354 } else {
355 bp->b_flags |= B_AGE;
356 bawrite(bp);
357 }
b373e060
KM
358 } else
359 bdwrite(bp);
360 ip->i_flag |= IUPD|ICHG;
d82ca8c7 361 if (ap->a_cred && ap->a_cred->cr_uid != 0)
b373e060 362 ip->i_mode &= ~(ISUID|ISGID);
cc1475d2
KM
363 } while (error == 0 && uio->uio_resid > 0 && n != 0);
364 if (error && (ioflag & IO_UNIT)) {
06337661
KM
365 (void)VOP_TRUNCATE(vp, osize, ioflag & IO_SYNC, ap->a_cred,
366 uio->uio_procp);
cc1475d2
KM
367 uio->uio_offset -= resid - uio->uio_resid;
368 uio->uio_resid = resid;
b373e060 369 }
d056f176
KM
370 if (!error && (ioflag & IO_SYNC)) {
371 tv = time;
372 error = VOP_UPDATE(vp, &tv, &tv, 1);
373 }
b373e060
KM
374 return (error);
375}
376
4f083fd7 377/*
7188ac27 378 * Synch an open file.
4f083fd7 379 */
7188ac27 380/* ARGSUSED */
a6673c09 381int
cc1475d2 382ffs_fsync(ap)
06337661
KM
383 struct vop_fsync_args /* {
384 struct vnode *a_vp;
385 struct ucred *a_cred;
386 int a_waitfor;
387 struct proc *a_p;
388 } */ *ap;
528f664c 389{
06337661
KM
390 register struct vnode *vp = ap->a_vp;
391 struct inode *ip = VTOI(vp);
392 register struct buf *bp;
d056f176 393 struct timeval tv;
06337661
KM
394 struct buf *nbp;
395 int s;
7188ac27 396
06337661
KM
397 /*
398 * Flush all dirty buffers associated with a vnode.
399 */
400loop:
401 s = splbio();
bd60d0f0
KM
402 for (bp = vp->v_dirtyblkhd.le_next; bp; bp = nbp) {
403 nbp = bp->b_vnbufs.qe_next;
06337661
KM
404 if ((bp->b_flags & B_BUSY))
405 continue;
406 if ((bp->b_flags & B_DELWRI) == 0)
407 panic("ffs_fsync: not dirty");
408 bremfree(bp);
409 bp->b_flags |= B_BUSY;
410 splx(s);
411 /*
412 * Wait for I/O associated with indirect blocks to complete,
413 * since there is no way to quickly wait for them below.
414 */
415 if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT)
416 (void) bawrite(bp);
417 else
418 (void) bwrite(bp);
419 goto loop;
420 }
421 if (ap->a_waitfor == MNT_WAIT) {
422 while (vp->v_numoutput) {
423 vp->v_flag |= VBWAIT;
424 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
425 }
426#ifdef DIAGNOSTIC
bd60d0f0 427 if (vp->v_dirtyblkhd.le_next) {
06337661
KM
428 vprint("ffs_fsync: dirty", vp);
429 goto loop;
430 }
431#endif
432 }
433 splx(s);
d056f176
KM
434 tv = time;
435 return (VOP_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT));
9d5097aa 436}
52ee9502 437