delete VERBOSE #ifdef's
[unix-history] / usr / src / sys / ufs / lfs / lfs_segment.c
CommitLineData
84c30241
KB
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 *
fa870596 7 * @(#)lfs_segment.c 7.28 (Berkeley) %G%
84c30241
KB
8 */
9
34a084a9
KB
10#include <sys/param.h>
11#include <sys/systm.h>
12#include <sys/namei.h>
34a084a9 13#include <sys/kernel.h>
a1b8db53 14#include <sys/resourcevar.h>
34a084a9
KB
15#include <sys/file.h>
16#include <sys/stat.h>
17#include <sys/buf.h>
18#include <sys/proc.h>
19#include <sys/conf.h>
20#include <sys/vnode.h>
34a084a9
KB
21#include <sys/malloc.h>
22#include <sys/mount.h>
23
33d38333
KM
24#include <miscfs/specfs/specdev.h>
25#include <miscfs/fifofs/fifo.h>
26
0a011bb1
KB
27#include <ufs/ufs/quota.h>
28#include <ufs/ufs/inode.h>
29#include <ufs/ufs/dir.h>
30#include <ufs/ufs/ufsmount.h>
34a084a9 31
0a011bb1
KB
32#include <ufs/lfs/lfs.h>
33#include <ufs/lfs/lfs_extern.h>
84c30241 34
dc7e45d3 35/* In-memory description of a segment about to be written. */
dc7e45d3 36struct segment {
a1b8db53
KB
37 struct buf **bpp; /* pointer to buffer array */
38 struct buf **cbpp; /* pointer to next available bp */
39 struct buf *ibp; /* buffer pointer to inode page */
40 struct finfo *fip; /* current fileinfo pointer */
dc7e45d3
KB
41 void *segsum; /* segment summary info */
42 u_long ninodes; /* number of inodes in this segment */
43 u_long seg_bytes_left; /* bytes left in segment */
44 u_long sum_bytes_left; /* bytes left in summary block */
45 u_long seg_number; /* number of this segment */
46#define SEGM_CKP 0x01 /* doing a checkpoint */
47 u_long seg_flags; /* run-time flags for this segment */
dc7e45d3
KB
48};
49
84c30241 50/*
dc7e45d3
KB
51 * Determine if it's OK to start a partial in this segment, or if we need
52 * to go on to a new segment.
8954e52c 53 */
dc7e45d3
KB
54#define LFS_PARTIAL_FITS(fs) \
55 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \
56 1 << (fs)->lfs_fsbtodb)
57
80443139 58void lfs_callback __P((struct buf *));
a1b8db53
KB
59void lfs_gather __P((struct lfs *, struct segment *,
60 struct vnode *, int (*) __P((struct lfs *, struct buf *))));
61void lfs_initseg __P((struct lfs *, struct segment *));
62void lfs_iset __P((struct inode *, daddr_t, time_t));
63int lfs_match_data __P((struct lfs *, struct buf *));
64int lfs_match_dindir __P((struct lfs *, struct buf *));
65int lfs_match_indir __P((struct lfs *, struct buf *));
66int lfs_match_tindir __P((struct lfs *, struct buf *));
67struct buf *
4c60cc5b 68 lfs_newbuf __P((struct lfs *, daddr_t, size_t));
87804018 69void lfs_newseg __P((struct lfs *));
a1b8db53 70void lfs_shellsort __P((struct buf **, daddr_t *, register int));
87804018 71void lfs_updatemeta __P((struct lfs *,
a1b8db53
KB
72 struct segment *, struct vnode *, daddr_t *, struct buf **, int));
73void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *));
3ce71481
KB
74int lfs_writeinode __P((struct lfs *, struct segment *, struct inode *));
75int lfs_writeseg __P((struct lfs *, struct segment *));
a1b8db53 76void lfs_writesuper __P((struct lfs *, struct segment *));
3ce71481
KB
77void lfs_writevnodes __P((struct lfs *fs, struct mount *mp,
78 struct segment *sp, int dirops));
84c30241 79
dc7e45d3
KB
80int lfs_allclean_wakeup; /* Cleaner wakeup address. */
81
44819c56
KB
82/*
83 * Ifile and meta data blocks are not marked busy, so segment writes MUST be
84 * single threaded. Currently, there are two paths into lfs_segwrite, sync()
85 * and getnewbuf(). They both mark the file system busy. Lfs_vflush()
86 * explicitly marks the file system busy. So lfs_segwrite is safe. I think.
87 */
88
89int
90lfs_vflush(vp)
91 struct vnode *vp;
92{
93 struct inode *ip;
94 struct lfs *fs;
44819c56
KB
95 struct segment *sp;
96 int error, s;
97
200cb75d
KB
98 fs = VFSTOUFS(vp->v_mount)->um_lfs;
99 lfs_seglock(fs);
44819c56
KB
100
101 /*
102 * Allocate a segment structure and enough space to hold pointers to
103 * the maximum possible number of buffers which can be described in a
104 * single summary block.
105 */
106 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK);
107 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) /
108 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK);
109 sp->seg_flags = SEGM_CKP;
44819c56
KB
110
111 /*
112 * Keep a cumulative count of the outstanding I/O operations. If the
113 * disk drive catches up with us it could go to zero before we finish,
114 * so we artificially increment it by one until we've scheduled all of
115 * the writes we intend to do.
116 */
117 s = splbio();
4c60cc5b 118 ++fs->lfs_iocount;
44819c56
KB
119 splx(s);
120
44819c56 121 ip = VTOI(vp);
290d4594 122 do {
685d5160 123 lfs_initseg(fs, sp);
290d4594
KB
124 do {
125 if (vp->v_dirtyblkhd != NULL)
126 lfs_writefile(fs, sp, vp);
127 } while (lfs_writeinode(fs, sp, ip));
128 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG);
44819c56 129
290d4594 130 } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM);
44819c56
KB
131
132 /*
133 * If the I/O count is non-zero, sleep until it reaches zero. At the
134 * moment, the user's process hangs around so we can sleep.
135 */
136 s = splbio();
137 if (--fs->lfs_iocount && (error =
14df3d9f
KB
138 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0))) {
139 free(sp->bpp, M_SEGMENT);
140 free(sp, M_SEGMENT);
44819c56 141 return (error);
14df3d9f 142 }
44819c56 143 splx(s);
200cb75d 144 lfs_segunlock(fs);
44819c56 145
14df3d9f
KB
146 /*
147 * XXX
148 * Should be writing a checkpoint?
149 */
44819c56
KB
150 free(sp->bpp, M_SEGMENT);
151 free(sp, M_SEGMENT);
152
153 return (0);
154}
155
3ce71481
KB
156void
157lfs_writevnodes(fs, mp, sp, dirops)
158 struct lfs *fs;
159 struct mount *mp;
160 struct segment *sp;
161 int dirops;
162{
163 struct inode *ip;
164 struct vnode *vp;
165 int error, s;
166
167loop: for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) {
168 /*
169 * If the vnode that we are about to sync is no longer
170 * associated with this mount point, start over.
171 */
172 if (vp->v_mount != mp)
173 goto loop;
174
175 if (dirops && !(vp->v_flag & VDIROP) ||
176 !dirops && (vp->v_flag & VDIROP))
177 continue;
178 /*
179 * XXX
180 * Up the ref count so we don't get tossed out of
181 * memory.
182 */
183 VREF(vp);
184
185 /*
186 * Write the inode/file if dirty and it's not the
187 * the IFILE.
188 */
189 ip = VTOI(vp);
190 if ((ip->i_flag & (IMOD | IACC | IUPD | ICHG) ||
191 vp->v_dirtyblkhd != NULL) &&
192 ip->i_number != LFS_IFILE_INUM) {
193 if (vp->v_dirtyblkhd != NULL)
194 lfs_writefile(fs, sp, vp);
195 (void) lfs_writeinode(fs, sp, ip);
196 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG);
197 }
198 vp->v_flag &= ~VDIROP;
199 vrele(vp);
200 }
201}
202
84c30241 203int
275ca4f0 204lfs_segwrite(mp, do_ckp)
a1b8db53 205 struct mount *mp;
dc7e45d3 206 int do_ckp; /* Do a checkpoint. */
84c30241 207{
5b4e3ef5 208 struct buf *bp;
a1b8db53 209 struct inode *ip;
0a011bb1 210 struct lfs *fs;
a1b8db53
KB
211 struct segment *sp;
212 struct vnode *vp;
5b4e3ef5
KB
213 SEGUSE *segusep;
214 daddr_t ibno;
215 int error, i, s;
84c30241 216
44819c56 217 fs = VFSTOUFS(mp)->um_lfs;
200cb75d 218 lfs_seglock(fs);
44819c56
KB
219
220 /*
221 * Allocate a segment structure and enough space to hold pointers to
222 * the maximum possible number of buffers which can be described in a
223 * single summary block.
224 */
225 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK);
226 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) /
227 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK);
228 sp->seg_flags = do_ckp ? SEGM_CKP : 0;
229 lfs_initseg(fs, sp);
a1b8db53 230
8954e52c 231 /*
4c60cc5b
KB
232 * Keep a cumulative count of the outstanding I/O operations. If the
233 * disk drive catches up with us it could go to zero before we finish,
234 * so we artificially increment it by one until we've scheduled all of
235 * the writes we intend to do. If not a checkpoint, we never do the
236 * final decrement, avoiding the wakeup in the callback routine.
8954e52c 237 */
4c60cc5b 238 s = splbio();
290d4594 239 ++fs->lfs_iocount;
4c60cc5b 240 splx(s);
aa4dc149 241
3ce71481 242 lfs_writevnodes(fs, mp, sp, 0);
3ce71481
KB
243 fs->lfs_writer = 1;
244 if (fs->lfs_dirops && (error =
245 tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) {
246 free(sp->bpp, M_SEGMENT);
247 free(sp, M_SEGMENT);
248 fs->lfs_writer = 0;
290d4594 249 return (error);
3ce71481 250 }
dc7e45d3 251
3ce71481 252 lfs_writevnodes(fs, mp, sp, 1);
dc7e45d3 253
3ce71481 254 /*
5b4e3ef5
KB
255 * If we are doing a checkpoint, mark everything since the
256 * last checkpoint as no longer ACTIVE.
3ce71481 257 */
5b4e3ef5
KB
258 if (do_ckp)
259 for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz;
260 --ibno >= fs->lfs_cleansz; ) {
261 if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize,
262 NOCRED, &bp))
263
264 panic("lfs: ifile read");
265 segusep = (SEGUSE *)bp->b_un.b_addr;
266 for (i = fs->lfs_sepb; i--; segusep++)
267 segusep->su_flags &= ~SEGUSE_ACTIVE;
268
269 LFS_UBWRITE(bp);
270 }
271
3ce71481 272 if (do_ckp || fs->lfs_doifile) {
87804018
KB
273 vp = fs->lfs_ivnode;
274 while (vget(vp));
275 ip = VTOI(vp);
5b4e3ef5
KB
276 if (vp->v_dirtyblkhd != NULL)
277 lfs_writefile(fs, sp, vp);
278 (void)lfs_writeinode(fs, sp, ip);
87804018
KB
279 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG);
280 vput(vp);
5b4e3ef5
KB
281 /*
282 * This should never happen because we just guaranteed
283 * that all the segment usage table blocks are dirty, so
284 * no new ones should get written.
285 */
286 if (lfs_writeseg(fs, sp) && do_ckp)
287 panic("lfs_segwrite: created dirty blocks on ckp");
3ce71481
KB
288 } else
289 (void) lfs_writeseg(fs, sp);
aa4dc149 290
275ca4f0 291 /*
dc7e45d3
KB
292 * If the I/O count is non-zero, sleep until it reaches zero. At the
293 * moment, the user's process hangs around so we can sleep.
275ca4f0 294 */
3ce71481
KB
295 fs->lfs_writer = 0;
296 fs->lfs_doifile = 0;
297 wakeup(&fs->lfs_dirops);
298
290d4594
KB
299 s = splbio();
300 --fs->lfs_iocount;
dc7e45d3 301 if (do_ckp) {
4c60cc5b 302 if (fs->lfs_iocount && (error =
14df3d9f
KB
303 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs sync", 0))) {
304 free(sp->bpp, M_SEGMENT);
305 free(sp, M_SEGMENT);
12304d41 306 return (error);
14df3d9f 307 }
dc7e45d3
KB
308 splx(s);
309 lfs_writesuper(fs, sp);
4c60cc5b
KB
310 } else
311 splx(s);
275ca4f0 312
200cb75d
KB
313 lfs_segunlock(fs);
314
c222b129
KB
315 free(sp->bpp, M_SEGMENT);
316 free(sp, M_SEGMENT);
275ca4f0 317
dc7e45d3 318 return (0);
84c30241
KB
319}
320
dc7e45d3
KB
321/*
322 * Write the dirty blocks associated with a vnode.
323 */
87804018 324void
dc7e45d3 325lfs_writefile(fs, sp, vp)
0a011bb1 326 struct lfs *fs;
a1b8db53
KB
327 struct segment *sp;
328 struct vnode *vp;
84c30241 329{
dc7e45d3 330 struct buf *bp;
a1b8db53 331 struct finfo *fip;
dc7e45d3 332 IFILE *ifp;
275ca4f0 333
a1b8db53
KB
334 if (sp->seg_bytes_left < fs->lfs_bsize ||
335 sp->sum_bytes_left < sizeof(struct finfo)) {
3ce71481 336 (void) lfs_writeseg(fs, sp);
a1b8db53
KB
337 lfs_initseg(fs, sp);
338 }
339 sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t);
84c30241 340
a1b8db53
KB
341 fip = sp->fip;
342 fip->fi_nblocks = 0;
343 fip->fi_ino = VTOI(vp)->i_number;
344 LFS_IENTRY(ifp, fs, fip->fi_ino, bp);
345 fip->fi_version = ifp->if_version;
346 brelse(bp);
347
348 /*
349 * It may not be necessary to write the meta-data blocks at this point,
350 * as the roll-forward recovery code should be able to reconstruct the
351 * list.
352 */
353 lfs_gather(fs, sp, vp, lfs_match_data);
354 lfs_gather(fs, sp, vp, lfs_match_indir);
355 lfs_gather(fs, sp, vp, lfs_match_dindir);
dc7e45d3 356#ifdef TRIPLE
a1b8db53 357 lfs_gather(fs, sp, vp, lfs_match_tindir);
dc7e45d3 358#endif
aa4dc149 359
a1b8db53 360 fip = sp->fip;
dc7e45d3 361#ifdef META
a1b8db53 362 printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks);
dc7e45d3 363#endif
a1b8db53
KB
364 if (fip->fi_nblocks != 0) {
365 ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
366 sp->fip =
367 (struct finfo *)((caddr_t)fip + sizeof(struct finfo) +
368 sizeof(daddr_t) * (fip->fi_nblocks - 1));
9a46ddb2
CS
369 } else
370 sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t);
12304d41
KB
371}
372
3ce71481 373int
12304d41
KB
374lfs_writeinode(fs, sp, ip)
375 struct lfs *fs;
a1b8db53
KB
376 struct segment *sp;
377 struct inode *ip;
12304d41 378{
a1b8db53 379 struct buf *bp, *ibp;
87804018 380 IFILE *ifp;
9a46ddb2
CS
381 SEGUSE *sup;
382 daddr_t daddr;
87804018 383 ino_t ino;
12304d41 384 int ndx;
3ce71481 385 int redo_ifile = 0;
12304d41 386
12304d41
KB
387 /* Allocate a new inode block if necessary. */
388 if (sp->ibp == NULL) {
389 /* Allocate a new segment if necessary. */
390 if (sp->seg_bytes_left < fs->lfs_bsize ||
391 sp->sum_bytes_left < sizeof(daddr_t)) {
3ce71481 392 (void) lfs_writeseg(fs, sp);
12304d41
KB
393 lfs_initseg(fs, sp);
394 }
395
396 /* Get next inode block. */
9a46ddb2 397 daddr = fs->lfs_offset;
12304d41
KB
398 fs->lfs_offset += fsbtodb(fs, 1);
399 sp->ibp = *sp->cbpp++ =
4c60cc5b 400 lfs_newbuf(fs, daddr, fs->lfs_bsize);
12304d41 401
4c60cc5b 402 /* Set remaining space counters. */
12304d41
KB
403 sp->seg_bytes_left -= fs->lfs_bsize;
404 sp->sum_bytes_left -= sizeof(daddr_t);
87804018 405 ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) -
12304d41 406 sp->ninodes / INOPB(fs) - 1;
9a46ddb2 407 ((daddr_t *)(sp->segsum))[ndx] = daddr;
12304d41
KB
408 }
409
a1b8db53 410 /* Update the inode times and copy the inode onto the inode page. */
87804018 411 ITIMES(ip, &time, &time);
12304d41 412 bp = sp->ibp;
a1b8db53 413 bp->b_un.b_dino[sp->ninodes % INOPB(fs)] = ip->i_din;
12304d41
KB
414
415 /* Increment inode count in segment summary block. */
416 ++((SEGSUM *)(sp->segsum))->ss_ninos;
417
418 /* If this page is full, set flag to allocate a new page. */
419 if (++sp->ninodes % INOPB(fs) == 0)
420 sp->ibp = NULL;
421
422 /*
87804018
KB
423 * If updating the ifile, update the super-block. Update the disk
424 * address and access times for this inode in the ifile.
12304d41 425 */
87804018 426 ino = ip->i_number;
4645c316
KB
427 if (ino == LFS_IFILE_INUM) {
428 daddr = fs->lfs_idaddr;
12304d41 429 fs->lfs_idaddr = bp->b_blkno;
4645c316
KB
430 } else {
431 LFS_IENTRY(ifp, fs, ino, ibp);
432 daddr = ifp->if_daddr;
433 ifp->if_daddr = bp->b_blkno;
434 LFS_UBWRITE(ibp);
435 }
9a46ddb2 436
3ce71481
KB
437 /*
438 * No need to update segment usage if there was no former inode address
439 * or if the last inode address is in the current partial segment.
440 */
441 if (daddr != LFS_UNUSED_DADDR &&
685d5160 442 !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) {
9a46ddb2
CS
443 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
444#ifdef DIAGNOSTIC
3ce71481 445 if (sup->su_nbytes < sizeof(struct dinode)) {
2b3ba73a
KB
446 /* XXX -- Change to a panic. */
447 printf("lfs: negative bytes (segment %d)\n",
9a46ddb2 448 datosn(fs, daddr));
3ce71481
KB
449 panic("negative bytes");
450 }
9a46ddb2
CS
451#endif
452 sup->su_nbytes -= sizeof(struct dinode);
453 LFS_UBWRITE(bp);
4645c316 454 redo_ifile = (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED));
9a46ddb2 455 }
290d4594 456 return (redo_ifile);
275ca4f0
KB
457}
458
87804018 459void
275ca4f0 460lfs_gather(fs, sp, vp, match)
0a011bb1 461 struct lfs *fs;
a1b8db53
KB
462 struct segment *sp;
463 struct vnode *vp;
464 int (*match) __P((struct lfs *, struct buf *));
275ca4f0 465{
3ce71481
KB
466 struct buf **bpp, *bp;
467struct buf *lastbp;
a1b8db53
KB
468 struct finfo *fip;
469 struct inode *ip;
275ca4f0 470 daddr_t *lbp, *start_lbp;
aa4dc149
KB
471 u_long version;
472 int s;
275ca4f0
KB
473
474 ip = VTOI(vp);
475 bpp = sp->cbpp;
476 fip = sp->fip;
275ca4f0 477 start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks];
275ca4f0 478
92f4ed04 479loop: s = splbio();
3ce71481
KB
480 lastbp = NULL;
481 for (bp = vp->v_dirtyblkhd; bp; lastbp = bp, bp = bp->b_blockf) {
482 if (bp->b_flags & B_BUSY || !match(fs, bp) ||
483 bp->b_flags & B_GATHERED)
275ca4f0 484 continue;
aa4dc149 485#ifdef DIAGNOSTIC
dc7e45d3 486 if (!(bp->b_flags & B_DELWRI))
12304d41 487 panic("lfs_gather: bp not B_DELWRI");
dc7e45d3 488 if (!(bp->b_flags & B_LOCKED))
12304d41 489 panic("lfs_gather: bp not B_LOCKED");
aa4dc149 490#endif
aa4dc149 491 /*
dc7e45d3
KB
492 * If full, finish this segment. We may be doing I/O, so
493 * release and reacquire the splbio().
aa4dc149
KB
494 */
495 if (sp->sum_bytes_left < sizeof(daddr_t) ||
275ca4f0 496 sp->seg_bytes_left < fs->lfs_bsize) {
275ca4f0 497 splx(s);
aa4dc149 498 lfs_updatemeta(fs,
dc7e45d3 499 sp, vp, start_lbp, bpp, lbp - start_lbp);
275ca4f0 500
aa4dc149
KB
501 /* Add the current file to the segment summary. */
502 ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
275ca4f0 503
aa4dc149 504 version = fip->fi_version;
3ce71481 505 (void) lfs_writeseg(fs, sp);
12304d41 506 lfs_initseg(fs, sp);
aa4dc149 507
275ca4f0 508 fip = sp->fip;
275ca4f0 509 fip->fi_version = version;
aa4dc149 510 fip->fi_ino = ip->i_number;
275ca4f0 511 start_lbp = lbp = fip->fi_blocks;
aa4dc149 512
9a46ddb2
CS
513 sp->sum_bytes_left -=
514 sizeof(struct finfo) - sizeof(daddr_t);
515
aa4dc149 516 bpp = sp->cbpp;
92f4ed04 517 goto loop;
275ca4f0 518 }
9a46ddb2
CS
519
520 /* Insert into the buffer list, update the FINFO block. */
3ce71481 521 bp->b_flags |= B_GATHERED;
9a46ddb2
CS
522 *sp->cbpp++ = bp;
523 ++fip->fi_nblocks;
524 *lbp++ = bp->b_lblkno;
525
526 sp->sum_bytes_left -= sizeof(daddr_t);
527 sp->seg_bytes_left -= bp->b_bufsize;
84c30241 528 }
275ca4f0 529 splx(s);
dc7e45d3 530 lfs_updatemeta(fs, sp, vp, start_lbp, bpp, lbp - start_lbp);
84c30241
KB
531}
532
84c30241 533/*
aa4dc149 534 * Update the metadata that points to the blocks listed in the FINFO
84c30241
KB
535 * array.
536 */
87804018 537void
dc7e45d3 538lfs_updatemeta(fs, sp, vp, lbp, bpp, nblocks)
0a011bb1 539 struct lfs *fs;
a1b8db53
KB
540 struct segment *sp;
541 struct vnode *vp;
275ca4f0 542 daddr_t *lbp;
a1b8db53 543 struct buf **bpp;
275ca4f0 544 int nblocks;
84c30241 545{
12304d41 546 SEGUSE *sup;
a1b8db53 547 struct buf *bp;
dc7e45d3 548 INDIR a[NIADDR], *ap;
a1b8db53 549 struct inode *ip;
12304d41 550 daddr_t daddr, lbn, off;
dc7e45d3 551 int db_per_fsb, error, i, num;
84c30241 552
aa4dc149 553 if (nblocks == 0)
275ca4f0
KB
554 return;
555
12304d41 556 /* Sort the blocks. */
87804018 557 lfs_shellsort(bpp, lbp, nblocks);
275ca4f0 558
12304d41
KB
559 /*
560 * Assign disk addresses, and update references to the logical
561 * block and the segment usage information.
562 */
dc7e45d3 563 db_per_fsb = fsbtodb(fs, 1);
12304d41
KB
564 for (i = nblocks; i--; ++bpp) {
565 lbn = *lbp++;
566 (*bpp)->b_blkno = off = fs->lfs_offset;
dc7e45d3 567 fs->lfs_offset += db_per_fsb;
275ca4f0 568
dc7e45d3 569 if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num))
a1b8db53 570 panic("lfs_updatemeta: lfs_bmaparray %d", error);
dc7e45d3
KB
571 ip = VTOI(vp);
572 switch (num) {
573 case 0:
12304d41 574 ip->i_db[lbn] = off;
dc7e45d3
KB
575 break;
576 case 1:
12304d41 577 ip->i_ib[a[0].in_off] = off;
dc7e45d3
KB
578 break;
579 default:
580 ap = &a[num - 1];
dc7e45d3
KB
581 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp))
582 panic("lfs_updatemeta: bread bno %d",
583 ap->in_lbn);
4256edb9
KB
584 /*
585 * Bread may create a new indirect block which needs
586 * to get counted for the inode.
587 */
5b4e3ef5 588 if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) {
4256edb9 589 ip->i_blocks += btodb(fs->lfs_bsize);
5b4e3ef5
KB
590 fs->lfs_bfree -= btodb(fs->lfs_bsize);
591 }
12304d41 592 bp->b_un.b_daddr[ap->in_off] = off;
9342689a 593 VOP_BWRITE(bp);
12304d41
KB
594 }
595
596 /* Update segment usage information. */
597 if (daddr != UNASSIGNED) {
598 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
12304d41 599#ifdef DIAGNOSTIC
3ce71481 600 if (sup->su_nbytes < fs->lfs_bsize) {
2b3ba73a
KB
601 /* XXX -- Change to a panic. */
602 printf("lfs: negative bytes (segment %d)\n",
12304d41 603 datosn(fs, daddr));
3ce71481
KB
604 panic ("Negative Bytes");
605 }
12304d41
KB
606#endif
607 sup->su_nbytes -= fs->lfs_bsize;
a1b8db53 608 LFS_UBWRITE(bp);
84c30241 609 }
84c30241 610 }
84c30241
KB
611}
612
12304d41
KB
613/*
614 * Start a new segment.
615 */
87804018 616void
12304d41 617lfs_initseg(fs, sp)
0a011bb1 618 struct lfs *fs;
a1b8db53 619 struct segment *sp;
84c30241 620{
12304d41
KB
621 SEGUSE *sup;
622 SEGSUM *ssp;
623 struct buf *bp;
624 daddr_t lbn, *lbnp;
275ca4f0 625
12304d41 626 /* Advance to the next segment. */
c222b129 627 if (!LFS_PARTIAL_FITS(fs)) {
9a46ddb2 628 /* Wake up any cleaning procs waiting on this file system. */
4c60cc5b
KB
629 wakeup(&fs->lfs_nextseg);
630 wakeup(&lfs_allclean_wakeup);
9a46ddb2 631
c222b129
KB
632 lfs_newseg(fs);
633 fs->lfs_offset = fs->lfs_curseg;
12304d41
KB
634 sp->seg_number = datosn(fs, fs->lfs_curseg);
635 sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE;
636
637 /*
c222b129
KB
638 * If the segment contains a superblock, update the offset
639 * and summary address to skip over it.
12304d41 640 */
87804018 641 LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
c222b129 642 if (sup->su_flags & SEGUSE_SUPERBLOCK) {
12304d41
KB
643 fs->lfs_offset += LFS_SBPAD / DEV_BSIZE;
644 sp->seg_bytes_left -= LFS_SBPAD;
275ca4f0 645 }
a1b8db53 646 brelse(bp);
12304d41
KB
647 } else {
648 sp->seg_number = datosn(fs, fs->lfs_curseg);
649 sp->seg_bytes_left = (fs->lfs_dbpseg -
650 (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE;
651 }
3ce71481 652 fs->lfs_lastpseg = fs->lfs_offset;
aa4dc149 653
12304d41
KB
654 sp->ibp = NULL;
655 sp->ninodes = 0;
aa4dc149 656
12304d41
KB
657 /* Get a new buffer for SEGSUM and enter it into the buffer list. */
658 sp->cbpp = sp->bpp;
4c60cc5b 659 *sp->cbpp = lfs_newbuf(fs, fs->lfs_offset, LFS_SUMMARY_SIZE);
12304d41
KB
660 sp->segsum = (*sp->cbpp)->b_un.b_addr;
661 ++sp->cbpp;
662 fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE;
aa4dc149 663
12304d41
KB
664 /* Set point to SEGSUM, initialize it. */
665 ssp = sp->segsum;
666 ssp->ss_next = fs->lfs_nextseg;
12304d41 667 ssp->ss_nfinfo = ssp->ss_ninos = 0;
aa4dc149 668
12304d41 669 /* Set pointer to first FINFO, initialize it. */
a1b8db53 670 sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM));
12304d41 671 sp->fip->fi_nblocks = 0;
aa4dc149 672
12304d41
KB
673 sp->seg_bytes_left -= LFS_SUMMARY_SIZE;
674 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM);
675}
aa4dc149 676
12304d41
KB
677/*
678 * Return the next segment to write.
679 */
87804018 680void
12304d41
KB
681lfs_newseg(fs)
682 struct lfs *fs;
683{
c222b129 684 CLEANERINFO *cip;
12304d41
KB
685 SEGUSE *sup;
686 struct buf *bp;
c222b129 687 int curseg, isdirty, sn;
12304d41 688
5b4e3ef5
KB
689 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp);
690 sup->su_flags |= SEGUSE_DIRTY;
691 LFS_UBWRITE(bp);
c222b129
KB
692
693 LFS_CLEANERINFO(cip, fs, bp);
694 --cip->clean;
695 ++cip->dirty;
a1b8db53 696 LFS_UBWRITE(bp);
c222b129
KB
697
698 fs->lfs_lastseg = fs->lfs_curseg;
699 fs->lfs_curseg = fs->lfs_nextseg;
700 for (sn = curseg = datosn(fs, fs->lfs_curseg);;) {
12304d41 701 sn = (sn + 1) % fs->lfs_nseg;
c222b129 702 if (sn == curseg)
12304d41
KB
703 panic("lfs_nextseg: no clean segments");
704 LFS_SEGENTRY(sup, fs, sn, bp);
705 isdirty = sup->su_flags & SEGUSE_DIRTY;
a1b8db53 706 brelse(bp);
12304d41
KB
707 if (!isdirty)
708 break;
709 }
5b4e3ef5 710
c222b129 711 fs->lfs_nextseg = sntoda(fs, sn);
84c30241
KB
712}
713
3ce71481 714int
84c30241 715lfs_writeseg(fs, sp)
0a011bb1 716 struct lfs *fs;
a1b8db53 717 struct segment *sp;
84c30241 718{
4c60cc5b 719 struct buf **bpp, *bp, *cbp;
84c30241 720 SEGUSE *sup;
a1b8db53 721 SEGSUM *ssp;
dc7e45d3 722 dev_t i_dev;
4c60cc5b 723 size_t size;
3ce71481
KB
724 u_long *datap, *dp;
725 int ch_per_blk, do_again, i, nblocks, num, s;
726 int (*strategy)__P((struct vop_strategy_args *));
200cb75d 727 struct vop_strategy_args vop_strategy_a;
5b4e3ef5 728 u_short ninos;
4c60cc5b 729 char *p;
84c30241 730
3ce71481
KB
731 /* Checkpoint always writes superblock, even if no data blocks. */
732 if ((nblocks = sp->cbpp - sp->bpp) == 0 && !(sp->seg_flags & SEGM_CKP))
290d4594 733 return (0);
a1b8db53 734
84c30241 735 /*
a1b8db53
KB
736 * Compute checksum across data and then across summary; the first
737 * block (the summary block) is skipped. Set the create time here
738 * so that it's guaranteed to be later than the inode mod times.
dc7e45d3
KB
739 *
740 * XXX
741 * Fix this to do it inline, instead of malloc/copy.
84c30241 742 */
dc7e45d3 743 datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK);
12304d41
KB
744 for (bpp = sp->bpp, i = nblocks - 1; i--;)
745 *dp++ = (*++bpp)->b_un.b_words[0];
a1b8db53 746 ssp = (SEGSUM *)sp->segsum;
89bed312 747 ssp->ss_create = time.tv_sec;
685d5160 748 ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long));
a1b8db53
KB
749 ssp->ss_sumsum =
750 cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum));
c222b129 751 free(datap, M_SEGMENT);
8954e52c 752
3ce71481
KB
753 /* Update the segment usage information. */
754 LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
5b4e3ef5
KB
755 ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs);
756 sup->su_nbytes += nblocks - 1 - ninos << fs->lfs_bshift;
3ce71481 757 sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode);
685d5160 758 sup->su_nbytes += LFS_SUMMARY_SIZE;
3ce71481 759 sup->su_lastmod = time.tv_sec;
5b4e3ef5
KB
760 sup->su_flags |= SEGUSE_ACTIVE;
761 sup->su_ninos += ninos;
762 ++sup->su_nsums;
3ce71481 763 LFS_UBWRITE(bp);
5b4e3ef5 764 fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE);
3ce71481
KB
765 do_again = !(bp->b_flags & B_GATHERED);
766
dc7e45d3 767 i_dev = VTOI(fs->lfs_ivnode)->i_dev;
8cfb9f42 768 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
275ca4f0 769
4c60cc5b
KB
770 /*
771 * When we simply write the blocks we lose a rotation for every block
772 * written. To avoid this problem, we allocate memory in chunks, copy
773 * the buffers into the chunk and write the chunk. 56K was chosen as
774 * some driver/controllers can't handle unsigned 16 bit transfers.
775 * When the data is copied to the chunk, turn off the the B_LOCKED bit
776 * and brelse the buffer (which will move them to the LRU list). Add
777 * the B_CALL flag to the buffer header so we can count I/O's for the
778 * checkpoints and so we can release the allocated memory.
779 *
780 * XXX
781 * This should be removed if the new virtual memory system allows us to
782 * easily make the buffers contiguous in kernel memory and if that's
783 * fast enough.
784 */
785#define LFS_CHUNKSIZE (56 * 1024)
786 ch_per_blk = LFS_CHUNKSIZE / fs->lfs_bsize;
787 for (bpp = sp->bpp, i = nblocks; i;) {
788 num = ch_per_blk;
789 if (num > i)
790 num = i;
791 i -= num;
792 size = num * fs->lfs_bsize;
793
794 cbp = lfs_newbuf(fs, (*bpp)->b_blkno, 0);
795 cbp->b_dev = i_dev;
796 cbp->b_flags = B_ASYNC | B_BUSY | B_CALL;
797 cbp->b_iodone = lfs_callback;
798 cbp->b_saveaddr = cbp->b_un.b_addr;
799 cbp->b_un.b_addr = malloc(size, M_SEGMENT, M_WAITOK);
800
801 s = splbio();
802 ++fs->lfs_iocount;
803 for (p = cbp->b_un.b_addr; num--;) {
804 bp = *bpp++;
805 bcopy(bp->b_un.b_addr, p, bp->b_bcount);
806 p += bp->b_bcount;
3ce71481
KB
807 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI |
808 B_LOCKED | B_GATHERED);
809 if (!(bp->b_flags & (B_NOCACHE | B_INVAL))) {
4c60cc5b
KB
810 bremfree(bp);
811 reassignbuf(bp, bp->b_vp);
812 }
813 brelse(bp);
dc7e45d3 814 }
4c60cc5b
KB
815 splx(s);
816 cbp->b_bcount = p - cbp->b_un.b_addr;
8cfb9f42
JH
817 vop_strategy_a.a_desc = VDESC(vop_strategy);
818 vop_strategy_a.a_bp = cbp;
819 (strategy)(&vop_strategy_a);
8954e52c 820 }
290d4594 821 return (do_again);
275ca4f0
KB
822}
823
87804018 824void
dc7e45d3 825lfs_writesuper(fs, sp)
0a011bb1 826 struct lfs *fs;
a1b8db53 827 struct segment *sp;
275ca4f0 828{
a1b8db53 829 struct buf *bp;
dc7e45d3 830 dev_t i_dev;
8cfb9f42 831 int (*strategy) __P((struct vop_strategy_args *));
200cb75d 832 struct vop_strategy_args vop_strategy_a;
275ca4f0 833
dc7e45d3 834 i_dev = VTOI(fs->lfs_ivnode)->i_dev;
8cfb9f42 835 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
14712628 836
aa4dc149 837 /* Checksum the superblock and copy it into a buffer. */
0a011bb1 838 fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum));
4c60cc5b 839 bp = lfs_newbuf(fs, fs->lfs_sboffs[0], LFS_SBPAD);
dc7e45d3 840 *bp->b_un.b_lfs = *fs;
275ca4f0 841
14712628 842 /* Write the first superblock (wait). */
dc7e45d3 843 bp->b_dev = i_dev;
dc7e45d3 844 bp->b_flags |= B_BUSY;
12304d41 845 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
8cfb9f42
JH
846 vop_strategy_a.a_desc = VDESC(vop_strategy);
847 vop_strategy_a.a_bp = bp;
848 (strategy)(&vop_strategy_a);
275ca4f0 849 biowait(bp);
aa4dc149 850
14712628 851 /* Write the second superblock (don't wait). */
275ca4f0 852 bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1];
12304d41 853 bp->b_flags |= B_ASYNC | B_BUSY;
dc7e45d3 854 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
8cfb9f42 855 (strategy)(&vop_strategy_a);
275ca4f0
KB
856}
857
aa4dc149
KB
858/*
859 * Logical block number match routines used when traversing the dirty block
860 * chain.
861 */
87804018
KB
862int
863lfs_match_data(fs, bp)
dc7e45d3 864 struct lfs *fs;
a1b8db53 865 struct buf *bp;
275ca4f0 866{
aa4dc149 867 return (bp->b_lblkno >= 0);
275ca4f0
KB
868}
869
87804018
KB
870int
871lfs_match_indir(fs, bp)
dc7e45d3 872 struct lfs *fs;
a1b8db53 873 struct buf *bp;
275ca4f0 874{
dc7e45d3
KB
875 int lbn;
876
877 lbn = bp->b_lblkno;
878 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0);
275ca4f0
KB
879}
880
87804018
KB
881int
882lfs_match_dindir(fs, bp)
dc7e45d3 883 struct lfs *fs;
a1b8db53 884 struct buf *bp;
275ca4f0 885{
dc7e45d3
KB
886 int lbn;
887
888 lbn = bp->b_lblkno;
889 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1);
aa4dc149
KB
890}
891
87804018
KB
892int
893lfs_match_tindir(fs, bp)
0a011bb1 894 struct lfs *fs;
a1b8db53 895 struct buf *bp;
aa4dc149 896{
dc7e45d3 897 int lbn;
aa4dc149 898
dc7e45d3
KB
899 lbn = bp->b_lblkno;
900 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2);
901}
aa4dc149 902
dc7e45d3
KB
903/*
904 * Allocate a new buffer header.
905 */
a1b8db53 906struct buf *
4c60cc5b 907lfs_newbuf(fs, daddr, size)
dc7e45d3 908 struct lfs *fs;
dc7e45d3
KB
909 daddr_t daddr;
910 size_t size;
911{
a1b8db53 912 struct buf *bp;
aa4dc149 913
dc7e45d3
KB
914 bp = getnewbuf();
915 bremhash(bp);
916 bgetvp(fs->lfs_ivnode, bp);
917 bp->b_bcount = 0;
918 bp->b_lblkno = daddr;
919 bp->b_blkno = daddr;
920 bp->b_error = 0;
921 bp->b_resid = 0;
4c60cc5b
KB
922 if (size)
923 allocbuf(bp, size);
dc7e45d3 924 bp->b_flags |= B_NOCACHE;
4c60cc5b 925 bp->b_saveaddr = NULL;
12304d41 926 binshash(bp, &bfreelist[BQ_AGE]);
dc7e45d3
KB
927 return (bp);
928}
aa4dc149 929
80443139 930void
dc7e45d3 931lfs_callback(bp)
a1b8db53 932 struct buf *bp;
dc7e45d3
KB
933{
934 struct lfs *fs;
aa4dc149 935
dc7e45d3
KB
936 fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs;
937#ifdef DIAGNOSTIC
938 if (fs->lfs_iocount == 0)
939 panic("lfs_callback: zero iocount\n");
940#endif
941 if (--fs->lfs_iocount == 0)
4c60cc5b 942 wakeup(&fs->lfs_iocount);
12304d41 943
4c60cc5b
KB
944 if (bp->b_saveaddr) {
945 free(bp->b_un.b_addr, M_SEGMENT);
946 bp->b_un.b_addr = bp->b_saveaddr;
2b3ba73a 947 bp->b_saveaddr = NULL;
4c60cc5b 948 }
dc7e45d3 949 brelse(bp);
84c30241
KB
950}
951
952/*
953 * Shellsort (diminishing increment sort) from Data Structures and
954 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
955 * see also Knuth Vol. 3, page 84. The increments are selected from
956 * formula (8), page 95. Roughly O(N^3/2).
957 */
958/*
959 * This is our own private copy of shellsort because we want to sort
960 * two parallel arrays (the array of buffer pointers and the array of
961 * logical block numbers) simultaneously. Note that we cast the array
962 * of logical block numbers to a unsigned in this routine so that the
963 * negative block numbers (meta data blocks) sort AFTER the data blocks.
964 */
87804018
KB
965void
966lfs_shellsort(bp_array, lb_array, nmemb)
a1b8db53 967 struct buf **bp_array;
275ca4f0 968 daddr_t *lb_array;
84c30241
KB
969 register int nmemb;
970{
971 static int __rsshell_increments[] = { 4, 1, 0 };
972 register int incr, *incrp, t1, t2;
a1b8db53 973 struct buf *bp_temp;
84c30241
KB
974 u_long lb_temp;
975
976 for (incrp = __rsshell_increments; incr = *incrp++;)
977 for (t1 = incr; t1 < nmemb; ++t1)
978 for (t2 = t1 - incr; t2 >= 0;)
979 if (lb_array[t2] > lb_array[t2 + incr]) {
980 lb_temp = lb_array[t2];
981 lb_array[t2] = lb_array[t2 + incr];
982 lb_array[t2 + incr] = lb_temp;
983 bp_temp = bp_array[t2];
984 bp_array[t2] = bp_array[t2 + incr];
985 bp_array[t2 + incr] = bp_temp;
986 t2 -= incr;
987 } else
988 break;
989}