add pathconf
[unix-history] / usr / src / sys / ufs / lfs / lfs_syscalls.c
CommitLineData
c66054e3
KB
1/*-
2 * Copyright (c) 1991 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 *
4d388002 7 * @(#)lfs_syscalls.c 7.30 (Berkeley) %G%
c66054e3
KB
8 */
9
10#include <sys/param.h>
11#include <sys/proc.h>
12#include <sys/buf.h>
13#include <sys/mount.h>
14#include <sys/vnode.h>
15#include <sys/malloc.h>
16#include <sys/kernel.h>
17
18#include <ufs/ufs/quota.h>
19#include <ufs/ufs/inode.h>
20#include <ufs/ufs/ufsmount.h>
9f72b66d 21#include <ufs/ufs/ufs_extern.h>
c66054e3
KB
22
23#include <ufs/lfs/lfs.h>
24#include <ufs/lfs/lfs_extern.h>
58f30bab
MS
25#define BUMP_FIP(SP) \
26 (SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
c66054e3 27
58f30bab
MS
28#define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
29#define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
30
31/*
32 * Before committing to add something to a segment summary, make sure there
33 * is enough room. S is the bytes added to the summary.
34 */
35#define CHECK_SEG(s) \
36if (sp->sum_bytes_left < (s)) { \
37 (void) lfs_writeseg(fs, sp); \
58f30bab 38}
9f72b66d
KB
39struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
40
c66054e3
KB
41/*
42 * lfs_markv:
43 *
44 * This will mark inodes and blocks dirty, so they are written into the log.
45 * It will block until all the blocks have been written. The segment create
46 * time passed in the block_info and inode_info structures is used to decide
47 * if the data is valid for each block (in case some process dirtied a block
48 * or inode that is being cleaned between the determination that a block is
49 * live and the lfs_markv call).
50 *
51 * 0 on success
52 * -1/errno is return on error.
53 */
4529d4a7
MS
54struct lfs_markv_args {
55 fsid_t fsid; /* file system */
56 BLOCK_INFO *blkiov; /* block array */
57 int blkcnt; /* count of block array entries */
58};
c66054e3
KB
59int
60lfs_markv(p, uap, retval)
61 struct proc *p;
4529d4a7 62 struct lfs_markv_args *uap;
c66054e3
KB
63 int *retval;
64{
9f72b66d 65 struct segment *sp;
c66054e3
KB
66 BLOCK_INFO *blkp;
67 IFILE *ifp;
9f72b66d 68 struct buf *bp, **bpp;
b338862d 69 struct inode *ip;
c66054e3
KB
70 struct lfs *fs;
71 struct mount *mntp;
72 struct vnode *vp;
a5bd9f52 73 void *start;
b338862d 74 ino_t lastino;
9f72b66d 75 daddr_t b_daddr, v_daddr;
c66054e3
KB
76 u_long bsize;
77 int cnt, error;
78
79 if (error = suser(p->p_ucred, &p->p_acflag))
80 return (error);
c66054e3
KB
81 if ((mntp = getvfs(&uap->fsid)) == NULL)
82 return (EINVAL);
83
84 cnt = uap->blkcnt;
54c08a75 85 start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
9f72b66d
KB
86 if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO)))
87 goto err1;
c66054e3 88
9f72b66d 89 /* Mark blocks/inodes dirty. */
b338862d 90 fs = VFSTOUFS(mntp)->um_lfs;
d8a61808 91 bsize = fs->lfs_bsize;
9f72b66d
KB
92 error = 0;
93
ee1bb292
MS
94 lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
95 sp = fs->lfs_sp;
9f72b66d
KB
96 for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
97 blkp = start; cnt--; ++blkp) {
b338862d
KB
98 /*
99 * Get the IFILE entry (only once) and see if the file still
100 * exists.
101 */
102 if (lastino != blkp->bi_inode) {
9f72b66d 103 if (lastino != LFS_UNUSED_INUM) {
58f30bab 104 /* Finish up last file */
87291b98 105 lfs_updatemeta(sp);
9f72b66d 106 lfs_writeinode(fs, sp, ip);
ee1bb292 107 lfs_vunref(vp);
58f30bab
MS
108 if (sp->fip->fi_nblocks)
109 BUMP_FIP(sp);
110 else {
111 DEC_FINFO(sp);
112 sp->sum_bytes_left +=
113 sizeof(FINFO) - sizeof(daddr_t);
114
da447738 115 }
9f72b66d 116 }
58f30bab
MS
117
118 /* Start a new file */
119 CHECK_SEG(sizeof(FINFO));
120 sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
121 INC_FINFO(sp);
122 sp->start_lbp = &sp->fip->fi_blocks[0];
123 sp->vp = NULL;
83318e24
MS
124 sp->fip->fi_version = blkp->bi_version;
125 sp->fip->fi_nblocks = 0;
126 sp->fip->fi_ino = blkp->bi_inode;
b338862d 127 lastino = blkp->bi_inode;
5be74b00
KB
128 if (blkp->bi_inode == LFS_IFILE_INUM)
129 v_daddr = fs->lfs_idaddr;
130 else {
131 LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
132 v_daddr = ifp->if_daddr;
133 brelse(bp);
134 }
9f72b66d 135 if (v_daddr == LFS_UNUSED_DADDR)
b338862d 136 continue;
58f30bab 137
9f72b66d
KB
138 /* Get the vnode/inode. */
139 if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
83318e24
MS
140 blkp->bi_lbn == LFS_UNUSED_LBN ?
141 blkp->bi_bp : NULL)) {
9f72b66d
KB
142#ifdef DIAGNOSTIC
143 printf("lfs_markv: VFS_VGET failed (%d)\n",
144 blkp->bi_inode);
145#endif
87291b98 146 lastino = LFS_UNUSED_INUM;
58f30bab 147 v_daddr = LFS_UNUSED_DADDR;
9f72b66d
KB
148 continue;
149 }
87291b98 150 sp->vp = vp;
9f72b66d
KB
151 ip = VTOI(vp);
152 } else if (v_daddr == LFS_UNUSED_DADDR)
c66054e3 153 continue;
a5bd9f52 154
9f72b66d
KB
155 /* If this BLOCK_INFO didn't contain a block, keep going. */
156 if (blkp->bi_lbn == LFS_UNUSED_LBN)
157 continue;
58f30bab 158 if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
5be74b00 159 b_daddr != blkp->bi_daddr)
a5bd9f52 160 continue;
9f72b66d
KB
161 /*
162 * If we got to here, then we are keeping the block. If it
163 * is an indirect block, we want to actually put it in the
164 * buffer cache so that it can be updated in the finish_meta
165 * section. If it's not, we need to allocate a fake buffer
166 * so that writeseg can perform the copyin and write the buffer.
167 */
168 if (blkp->bi_lbn >= 0) /* Data Block */
169 bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
170 blkp->bi_bp);
171 else {
4d388002 172 bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0);
db675ac8 173 if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
9f72b66d
KB
174 (error = copyin(blkp->bi_bp, bp->b_un.b_addr,
175 bsize)))
176 goto err2;
177 if (error = VOP_BWRITE(bp))
178 goto err2;
b338862d 179 }
87291b98
KB
180 while (lfs_gatherblock(sp, bp, NULL));
181 }
182 if (sp->vp) {
183 lfs_updatemeta(sp);
184 lfs_writeinode(fs, sp, ip);
ee1bb292 185 lfs_vunref(vp);
58f30bab
MS
186 if (!sp->fip->fi_nblocks) {
187 DEC_FINFO(sp);
188 sp->sum_bytes_left += sizeof(FINFO) - sizeof(daddr_t);
189 }
c66054e3 190 }
9f72b66d
KB
191 (void) lfs_writeseg(fs, sp);
192 lfs_segunlock(fs);
a5bd9f52 193 free(start, M_SEGMENT);
9f72b66d
KB
194 return (error);
195/*
196 * XXX If we come in to error 2, we might have indirect blocks that were
197 * updated and now have bad block pointers. I don't know what to do
198 * about this.
199 */
c66054e3 200
ee1bb292 201err2: lfs_vunref(vp);
9f72b66d
KB
202 /* Free up fakebuffers */
203 for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
204 if ((*bpp)->b_flags & B_CALL) {
205 brelvp(*bpp);
206 free(*bpp, M_SEGMENT);
207 } else
208 brelse(*bpp);
209 lfs_segunlock(fs);
210err1:
a5bd9f52 211 free(start, M_SEGMENT);
9f72b66d 212 return(error);
c66054e3
KB
213}
214
215/*
216 * lfs_bmapv:
217 *
b338862d 218 * This will fill in the current disk address for arrays of blocks.
c66054e3
KB
219 *
220 * 0 on success
221 * -1/errno is return on error.
222 */
4529d4a7
MS
223struct lfs_bmapv_args {
224 fsid_t fsid; /* file system */
225 BLOCK_INFO *blkiov; /* block array */
226 int blkcnt; /* count of block array entries */
227};
c66054e3
KB
228int
229lfs_bmapv(p, uap, retval)
230 struct proc *p;
4529d4a7 231 struct lfs_bmapv_args *uap;
c66054e3
KB
232 int *retval;
233{
234 BLOCK_INFO *blkp;
235 struct mount *mntp;
236 struct vnode *vp;
a5bd9f52 237 void *start;
c66054e3 238 daddr_t daddr;
a5bd9f52 239 int cnt, error, step;
c66054e3
KB
240
241 if (error = suser(p->p_ucred, &p->p_acflag))
242 return (error);
c66054e3
KB
243 if ((mntp = getvfs(&uap->fsid)) == NULL)
244 return (EINVAL);
245
246 cnt = uap->blkcnt;
a5bd9f52 247 start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
c66054e3
KB
248 if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) {
249 free(blkp, M_SEGMENT);
250 return (error);
251 }
252
a5bd9f52 253 for (step = cnt; step--; ++blkp) {
5be74b00
KB
254 if (blkp->bi_lbn == LFS_UNUSED_LBN)
255 continue;
ee1bb292 256 /* Could be a deadlock ? */
ecc0e956 257 if (VFS_VGET(mntp, blkp->bi_inode, &vp))
a5bd9f52
KB
258 daddr = LFS_UNUSED_DADDR;
259 else {
58f30bab 260 if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
a5bd9f52
KB
261 daddr = LFS_UNUSED_DADDR;
262 vput(vp);
263 }
264 blkp->bi_daddr = daddr;
265 }
266 copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO));
267 free(start, M_SEGMENT);
c66054e3
KB
268 return (0);
269}
270
271/*
272 * lfs_segclean:
273 *
274 * Mark the segment clean.
275 *
276 * 0 on success
277 * -1/errno is return on error.
278 */
4529d4a7
MS
279struct lfs_segclean_args {
280 fsid_t fsid; /* file system */
281 u_long segment; /* segment number */
282};
c66054e3
KB
283int
284lfs_segclean(p, uap, retval)
285 struct proc *p;
4529d4a7 286 struct lfs_segclean_args *uap;
c66054e3
KB
287 int *retval;
288{
386bc90a 289 CLEANERINFO *cip;
c66054e3
KB
290 SEGUSE *sup;
291 struct buf *bp;
292 struct mount *mntp;
293 struct lfs *fs;
294 int error;
295
296 if (error = suser(p->p_ucred, &p->p_acflag))
297 return (error);
c66054e3
KB
298 if ((mntp = getvfs(&uap->fsid)) == NULL)
299 return (EINVAL);
300
301 fs = VFSTOUFS(mntp)->um_lfs;
386bc90a 302
83318e24
MS
303 if (datosn(fs, fs->lfs_curseg) == uap->segment)
304 return (EBUSY);
305
c66054e3 306 LFS_SEGENTRY(sup, fs, uap->segment, bp);
83318e24
MS
307 if (sup->su_flags & SEGUSE_ACTIVE) {
308 brelse(bp);
309 return(EBUSY);
310 }
9f72b66d 311 fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
d96a172f
KB
312 fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
313 sup->su_ninos * btodb(fs->lfs_bsize);
c66054e3 314 sup->su_flags &= ~SEGUSE_DIRTY;
9f72b66d 315 (void) VOP_BWRITE(bp);
386bc90a
KB
316
317 LFS_CLEANERINFO(cip, fs, bp);
318 ++cip->clean;
319 --cip->dirty;
9f72b66d
KB
320 (void) VOP_BWRITE(bp);
321 wakeup(&fs->lfs_avail);
c66054e3
KB
322 return (0);
323}
324
325/*
326 * lfs_segwait:
327 *
328 * This will block until a segment in file system fsid is written. A timeout
329 * in milliseconds may be specified which will awake the cleaner automatically.
330 * An fsid of -1 means any file system, and a timeout of 0 means forever.
331 *
332 * 0 on success
333 * 1 on timeout
334 * -1/errno is return on error.
335 */
4529d4a7
MS
336struct lfs_segwait_args {
337 fsid_t fsid; /* file system */
338 struct timeval *tv; /* timeout */
339};
c66054e3
KB
340int
341lfs_segwait(p, uap, retval)
342 struct proc *p;
4529d4a7 343 struct lfs_segwait_args *uap;
c66054e3
KB
344 int *retval;
345{
346 extern int lfs_allclean_wakeup;
347 struct mount *mntp;
348 struct timeval atv;
349 void *addr;
350 u_long timeout;
351 int error, s;
352
9f72b66d 353 if (error = suser(p->p_ucred, &p->p_acflag)) {
c66054e3 354 return (error);
9f72b66d 355}
c66054e3
KB
356#ifdef WHEN_QUADS_WORK
357 if (uap->fsid == (fsid_t)-1)
358 addr = &lfs_allclean_wakeup;
359 else {
360 if ((mntp = getvfs(&uap->fsid)) == NULL)
361 return (EINVAL);
362 addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
363 }
364#else
365 if ((mntp = getvfs(&uap->fsid)) == NULL)
366 addr = &lfs_allclean_wakeup;
367 else
368 addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
369#endif
370
371 if (uap->tv) {
372 if (error = copyin(uap->tv, &atv, sizeof(struct timeval)))
373 return (error);
374 if (itimerfix(&atv))
375 return (EINVAL);
374f3b4e
KM
376 s = splclock();
377 timevaladd(&atv, (struct timeval *)&time);
c66054e3 378 timeout = hzto(&atv);
856f902f 379 splx(s);
c66054e3
KB
380 } else
381 timeout = 0;
382
383 error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
384 return (error == ERESTART ? EINTR : 0);
385}
9f72b66d
KB
386
387/*
388 * VFS_VGET call specialized for the cleaner. The cleaner already knows the
389 * daddr from the ifile, so don't look it up again. If the cleaner is
390 * processing IINFO structures, it may have the ondisk inode already, so
391 * don't go retrieving it again.
392 */
393int
394lfs_fastvget(mp, ino, daddr, vpp, dinp)
395 struct mount *mp;
396 ino_t ino;
397 daddr_t daddr;
398 struct vnode **vpp;
399 struct dinode *dinp;
400{
401 register struct inode *ip;
402 struct vnode *vp;
403 struct ufsmount *ump;
404 struct buf *bp;
405 dev_t dev;
406 int error;
407
408 ump = VFSTOUFS(mp);
409 dev = ump->um_dev;
ee1bb292
MS
410 /*
411 * This is playing fast and loose. Someone may have the inode
412 * locked, in which case they are going to be distinctly unhappy
413 * if we trash something.
414 */
415 if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
416 lfs_vref(*vpp);
417 if ((*vpp)->v_flag & VXLOCK)
418 printf ("Cleaned vnode VXLOCKED\n");
da447738 419 ip = VTOI(*vpp);
ee1bb292
MS
420 if (ip->i_flags & ILOCKED)
421 printf ("Cleaned vnode ILOCKED\n");
83318e24
MS
422 if (!(ip->i_flag & IMOD)) {
423 ++ump->um_lfs->lfs_uinodes;
424 ip->i_flag |= IMOD;
425 }
da447738 426 ip->i_flag |= IMOD;
9f72b66d 427 return (0);
da447738 428 }
9f72b66d
KB
429
430 /* Allocate new vnode/inode. */
431 if (error = lfs_vcreate(mp, ino, &vp)) {
432 *vpp = NULL;
433 return (error);
434 }
435
436 /*
437 * Put it onto its hash chain and lock it so that other requests for
438 * this inode will block if they arrive while we are sleeping waiting
439 * for old data structures to be purged or for the contents of the
440 * disk portion of this inode to be read.
441 */
442 ip = VTOI(vp);
443 ufs_ihashins(ip);
444
445 /*
446 * XXX
447 * This may not need to be here, logically it should go down with
448 * the i_devvp initialization.
449 * Ask Kirk.
450 */
451 ip->i_lfs = ump->um_lfs;
452
453 /* Read in the disk contents for the inode, copy into the inode. */
454 if (dinp)
455 if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
456 return (error);
457 else {
458 if (error = bread(ump->um_devvp, daddr,
459 (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
460 /*
461 * The inode does not contain anything useful, so it
462 * would be misleading to leave it on its hash chain.
463 * Iput() will return it to the free list.
464 */
465 ufs_ihashrem(ip);
466
467 /* Unlock and discard unneeded inode. */
ee1bb292 468 lfs_vunref(vp);
9f72b66d
KB
469 brelse(bp);
470 *vpp = NULL;
471 return (error);
472 }
473 ip->i_din = *lfs_ifind(ump->um_lfs, ino, bp->b_un.b_dino);
474 brelse(bp);
475 }
476
d243f10d
KB
477 /* Inode was just read from user space or disk, make sure it's locked */
478 ip->i_flag |= ILOCKED;
479
9f72b66d
KB
480 /*
481 * Initialize the vnode from the inode, check for aliases. In all
482 * cases re-init ip, the underlying vnode/inode may have changed.
483 */
484 if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
ee1bb292 485 lfs_vunref(vp);
9f72b66d
KB
486 *vpp = NULL;
487 return (error);
488 }
489 /*
490 * Finish inode initialization now that aliasing has been resolved.
491 */
492 ip->i_devvp = ump->um_devvp;
493 ip->i_flag |= IMOD;
494 ++ump->um_lfs->lfs_uinodes;
495 VREF(ip->i_devvp);
496 *vpp = vp;
497 return (0);
498}
499struct buf *
500lfs_fakebuf(vp, lbn, size, uaddr)
501 struct vnode *vp;
502 int lbn;
503 size_t size;
504 caddr_t uaddr;
505{
506 struct buf *bp;
507
508 bp = lfs_newbuf(vp, lbn, 0);
509 bp->b_saveaddr = uaddr;
510 bp->b_bufsize = size;
511 bp->b_bcount = size;
512 bp->b_flags |= B_INVAL;
513 return(bp);
514}