CLEANINFO didn't need last_seg, last_time for now
[unix-history] / usr / src / sys / ufs / lfs / lfs_segment.c
CommitLineData
84c30241
KB
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 *
2742ac45 7 * @(#)lfs_segment.c 7.5 (Berkeley) %G%
84c30241
KB
8 */
9
34a084a9
KB
10#include <sys/param.h>
11#include <sys/systm.h>
12#include <sys/namei.h>
13#include <sys/resourcevar.h>
14#include <sys/kernel.h>
15#include <sys/file.h>
16#include <sys/stat.h>
17#include <sys/buf.h>
18#include <sys/proc.h>
19#include <sys/conf.h>
20#include <sys/vnode.h>
21#include <sys/specdev.h>
22#include <sys/fifo.h>
23#include <sys/malloc.h>
24#include <sys/mount.h>
12304d41 25#include <sys/kernel.h> /* XXX delete when time goes away */
34a084a9 26
0a011bb1
KB
27#include <ufs/ufs/quota.h>
28#include <ufs/ufs/inode.h>
29#include <ufs/ufs/dir.h>
30#include <ufs/ufs/ufsmount.h>
34a084a9 31
0a011bb1
KB
32#include <ufs/lfs/lfs.h>
33#include <ufs/lfs/lfs_extern.h>
84c30241 34
dc7e45d3
KB
35/* In-memory description of a segment about to be written. */
36typedef struct segment SEGMENT;
37struct segment {
38 BUF **bpp; /* pointer to buffer array */
39 BUF **cbpp; /* pointer to next available bp */
40 BUF *ibp; /* buffer pointer to inode page */
41 void *segsum; /* segment summary info */
42 u_long ninodes; /* number of inodes in this segment */
43 u_long seg_bytes_left; /* bytes left in segment */
44 u_long sum_bytes_left; /* bytes left in summary block */
45 u_long seg_number; /* number of this segment */
46#define SEGM_CKP 0x01 /* doing a checkpoint */
47 u_long seg_flags; /* run-time flags for this segment */
48 FINFO *fip; /* current fileinfo pointer */
49};
50
84c30241 51/*
dc7e45d3
KB
52 * Determine if it's OK to start a partial in this segment, or if we need
53 * to go on to a new segment.
8954e52c 54 */
dc7e45d3
KB
55#define LFS_PARTIAL_FITS(fs) \
56 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \
57 1 << (fs)->lfs_fsbtodb)
58
59#define datosn(fs, daddr) /* disk address to segment number */ \
60 (((daddr) - (fs)->lfs_sboffs[0]) / fsbtodb((fs), (fs)->lfs_ssize))
61
62#define sntoda(fs, sn) /* segment number to disk address */ \
63 ((daddr_t)((sn) * ((fs)->lfs_ssize << (fs)->lfs_fsbtodb) + \
64 (fs)->lfs_sboffs[0]))
65
66static int lfs_callback __P((BUF *));
67static void lfs_gather __P((struct lfs *,
68 SEGMENT *, VNODE *, int (*) __P((struct lfs *, BUF *))));
12304d41 69static void lfs_initseg __P((struct lfs *, SEGMENT *));
dc7e45d3 70static BUF *lfs_newbuf __P((struct lfs *, SEGMENT *, daddr_t, size_t));
12304d41 71static daddr_t lfs_newseg __P((struct lfs *));
0a011bb1 72static void lfs_updatemeta __P((struct lfs *,
dc7e45d3
KB
73 SEGMENT *, VNODE *, daddr_t *, BUF **, int));
74static void lfs_writefile __P((struct lfs *, SEGMENT *, VNODE *));
75static void lfs_writeinode __P((struct lfs *, SEGMENT *, INODE *));
0a011bb1 76static void lfs_writeseg __P((struct lfs *, SEGMENT *));
dc7e45d3
KB
77static void lfs_writesuper __P((struct lfs *, SEGMENT *));
78static int match_data __P((struct lfs *, BUF *));
79static int match_dindir __P((struct lfs *, BUF *));
80static int match_indir __P((struct lfs *, BUF *));
81static int match_tindir __P((struct lfs *, BUF *));
275ca4f0 82static void shellsort __P((BUF **, daddr_t *, register int));
84c30241 83
dc7e45d3
KB
84int lfs_allclean_wakeup; /* Cleaner wakeup address. */
85
84c30241 86int
275ca4f0 87lfs_segwrite(mp, do_ckp)
84c30241 88 MOUNT *mp;
dc7e45d3 89 int do_ckp; /* Do a checkpoint. */
84c30241 90{
84c30241 91 INODE *ip;
0a011bb1 92 struct lfs *fs;
84c30241
KB
93 VNODE *vp;
94 SEGMENT *sp;
12304d41 95 int s, error;
84c30241 96
dc7e45d3
KB
97#ifdef VERBOSE
98 printf("lfs_segwrite\n");
aa4dc149 99#endif
8954e52c 100 /*
dc7e45d3
KB
101 * If doing a checkpoint, we keep a cumulative count of the outstanding
102 * I/O operations. If the disk drive catches up with us it could go to
103 * zero before we finish, so we artificially increment it by one until
104 * we've scheduled all of the writes we intend to do.
8954e52c 105 */
12304d41 106 fs = VFSTOUFS(mp)->um_lfs;
dc7e45d3
KB
107 if (do_ckp) {
108 s = splbio();
109 fs->lfs_iocount = 1;
110 splx(s);
111 }
aa4dc149 112
dc7e45d3
KB
113 /*
114 * Allocate a segment structure and enough space to hold pointers to
115 * the maximum possible number of buffers which can be described in a
116 * single summary block.
117 */
118 sp = malloc(sizeof(SEGMENT), M_SEGMENT, M_WAITOK);
119 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) /
120 sizeof(daddr_t) + 1) * sizeof(BUF *), M_SEGMENT, M_WAITOK);
121 sp->seg_flags = do_ckp ? SEGM_CKP : 0;
12304d41 122 lfs_initseg(fs, sp);
84c30241
KB
123loop:
124 for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) {
125 /*
126 * If the vnode that we are about to sync is no longer
127 * associated with this mount point, start over.
128 */
84c30241
KB
129 if (vp->v_mount != mp)
130 goto loop;
131 if (VOP_ISLOCKED(vp))
132 continue;
dc7e45d3 133
12304d41
KB
134 /*
135 * Write the inode/file if dirty and it's not the
136 * the IFILE.
137 */
84c30241 138 ip = VTOI(vp);
dc7e45d3
KB
139 if (ip->i_flag & (IMOD | IACC | IUPD | ICHG) == 0 &&
140 vp->v_dirtyblkhd == NULL ||
141 ip->i_number == LFS_IFILE_INUM)
84c30241 142 continue;
dc7e45d3 143
84c30241
KB
144 if (vget(vp))
145 goto loop;
dc7e45d3 146 lfs_writefile(fs, sp, vp);
12304d41 147 lfs_writeinode(fs, sp, ip);
84c30241
KB
148 vput(vp);
149 }
dc7e45d3
KB
150 if (do_ckp) {
151 lfs_writefile(fs, sp, fs->lfs_ivnode);
152 lfs_writeinode(fs, sp, VTOI(fs->lfs_ivnode));
153 }
aa4dc149
KB
154 lfs_writeseg(fs, sp);
155
275ca4f0 156 /*
dc7e45d3
KB
157 * If the I/O count is non-zero, sleep until it reaches zero. At the
158 * moment, the user's process hangs around so we can sleep.
275ca4f0 159 */
dc7e45d3
KB
160 if (do_ckp) {
161 s = splbio();
12304d41
KB
162 if (--fs->lfs_iocount &&
163 (error = tsleep(&fs->lfs_iocount, PRIBIO + 1, "sync", 0)))
164 return (error);
dc7e45d3
KB
165 splx(s);
166 lfs_writesuper(fs, sp);
167 }
275ca4f0 168
dc7e45d3
KB
169 (void)free(sp->bpp, M_SEGMENT);
170 (void)free(sp, M_SEGMENT);
275ca4f0 171
dc7e45d3
KB
172 /* Wake up any cleaning processes waiting on this file system. */
173 wakeup(&fs->lfs_nextseg);
174 wakeup(&lfs_allclean_wakeup);
12304d41 175printf("sync returned\n");
dc7e45d3 176 return (0);
84c30241
KB
177}
178
dc7e45d3
KB
179/*
180 * Write the dirty blocks associated with a vnode.
181 */
84c30241 182static void
dc7e45d3 183lfs_writefile(fs, sp, vp)
0a011bb1 184 struct lfs *fs;
84c30241 185 SEGMENT *sp;
dc7e45d3 186 VNODE *vp;
84c30241 187{
dc7e45d3
KB
188 struct buf *bp;
189 FINFO *fip;
190 IFILE *ifp;
191 ino_t inum;
275ca4f0 192
dc7e45d3
KB
193#ifdef VERBOSE
194 printf("lfs_writefile\n");
195#endif
196 inum = VTOI(vp)->i_number;
197 if (vp->v_dirtyblkhd != NULL) {
198 if (sp->seg_bytes_left < fs->lfs_bsize ||
199 sp->sum_bytes_left < sizeof(FINFO)) {
200 lfs_writeseg(fs, sp);
12304d41 201 lfs_initseg(fs, sp);
dc7e45d3
KB
202 }
203 sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
84c30241 204
dc7e45d3
KB
205 fip = sp->fip;
206 fip->fi_nblocks = 0;
207 if (inum == LFS_IFILE_INUM)
208 fip->fi_version = 1;
209 else {
210 LFS_IENTRY(ifp, fs, inum, bp);
211 fip->fi_version = ifp->if_version;
212 brelse(bp);
213 }
214 fip->fi_ino = inum;
84c30241 215
dc7e45d3
KB
216 /*
217 * It may not be necessary to write the meta-data blocks
218 * at this point, as the roll-forward recovery code should
219 * be able to reconstruct the list.
220 */
221 lfs_gather(fs, sp, vp, match_data);
222 lfs_gather(fs, sp, vp, match_indir);
223 lfs_gather(fs, sp, vp, match_dindir);
224#ifdef TRIPLE
225 lfs_gather(fs, sp, vp, match_tindir);
226#endif
aa4dc149 227
dc7e45d3
KB
228 fip = sp->fip;
229#ifdef META
230 printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks);
231#endif
232 if (fip->fi_nblocks != 0) {
233 ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
234 sp->fip = (FINFO *)((caddr_t)fip + sizeof(FINFO) +
235 sizeof(daddr_t) * (fip->fi_nblocks - 1));
236 }
237 }
12304d41
KB
238}
239
240static void
241lfs_writeinode(fs, sp, ip)
242 struct lfs *fs;
243 SEGMENT *sp;
244 INODE *ip;
245{
246 BUF *bp;
247 daddr_t next_addr;
248 int ndx;
249
250#ifdef VERBOSE
251 printf("lfs_writeinode\n");
252#endif
253 /* Allocate a new inode block if necessary. */
254 if (sp->ibp == NULL) {
255 /* Allocate a new segment if necessary. */
256 if (sp->seg_bytes_left < fs->lfs_bsize ||
257 sp->sum_bytes_left < sizeof(daddr_t)) {
258 lfs_writeseg(fs, sp);
259 lfs_initseg(fs, sp);
260 }
261
262 /* Get next inode block. */
263 next_addr = fs->lfs_offset;
264 fs->lfs_offset += fsbtodb(fs, 1);
265 sp->ibp = *sp->cbpp++ =
266 lfs_newbuf(fs, sp, next_addr, fs->lfs_bsize);
267
268 /* Set remaining space counter. */
269 sp->seg_bytes_left -= fs->lfs_bsize;
270 sp->sum_bytes_left -= sizeof(daddr_t);
271 ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) -
272 sp->ninodes / INOPB(fs) - 1;
273 ((daddr_t *)(sp->segsum))[ndx] = next_addr;
274 }
275
276 /* Copy the new inode onto the inode page.
277 * XXX
278 * Do struct assignment.
279 */
280 bp = sp->ibp;
281 bcopy(&ip->i_din,
282 bp->b_un.b_dino + (sp->ninodes % INOPB(fs)), sizeof(DINODE));
283
284 /* Increment inode count in segment summary block. */
285 ++((SEGSUM *)(sp->segsum))->ss_ninos;
286
287 /* If this page is full, set flag to allocate a new page. */
288 if (++sp->ninodes % INOPB(fs) == 0)
289 sp->ibp = NULL;
290
291 /*
292 * If updating the ifile, update the super-block; otherwise, update
293 * the ifile itself. In either case, turn off inode update flags.
294 */
295 if (ip->i_number == LFS_IFILE_INUM)
296 fs->lfs_idaddr = bp->b_blkno;
297 else
298 lfs_iset(ip, bp->b_blkno, ip->i_atime);
299 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG);
275ca4f0
KB
300}
301
dc7e45d3 302static void
275ca4f0 303lfs_gather(fs, sp, vp, match)
0a011bb1 304 struct lfs *fs;
275ca4f0
KB
305 SEGMENT *sp;
306 VNODE *vp;
dc7e45d3 307 int (*match) __P((struct lfs *, BUF *));
275ca4f0
KB
308{
309 BUF **bpp, *bp, *nbp;
310 FINFO *fip;
311 INODE *ip;
275ca4f0 312 daddr_t *lbp, *start_lbp;
aa4dc149
KB
313 u_long version;
314 int s;
275ca4f0 315
dc7e45d3
KB
316#ifdef VERBOSE
317 printf("lfs_gather\n");
318#endif
275ca4f0
KB
319 ip = VTOI(vp);
320 bpp = sp->cbpp;
321 fip = sp->fip;
275ca4f0 322 start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks];
275ca4f0
KB
323
324 s = splbio();
325 for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
326 nbp = bp->b_blockf;
12304d41
KB
327 /*
328 * XXX
329 * Should probably sleep on any BUSY buffer if
330 * doing an fsync?
331 */
aa4dc149 332 if (bp->b_flags & B_BUSY)
275ca4f0 333 continue;
aa4dc149 334#ifdef DIAGNOSTIC
dc7e45d3 335 if (!(bp->b_flags & B_DELWRI))
12304d41 336 panic("lfs_gather: bp not B_DELWRI");
dc7e45d3 337 if (!(bp->b_flags & B_LOCKED))
12304d41 338 panic("lfs_gather: bp not B_LOCKED");
aa4dc149 339#endif
dc7e45d3 340 if (!match(fs, bp))
275ca4f0 341 continue;
aa4dc149 342
aa4dc149 343 /* Insert into the buffer list, update the FINFO block. */
275ca4f0 344 *sp->cbpp++ = bp;
aa4dc149
KB
345 ++fip->fi_nblocks;
346 *lbp++ = bp->b_lblkno;
347
aa4dc149 348 /*
dc7e45d3
KB
349 * If full, finish this segment. We may be doing I/O, so
350 * release and reacquire the splbio().
aa4dc149 351 */
dc7e45d3
KB
352 sp->sum_bytes_left -= sizeof(daddr_t);
353 sp->seg_bytes_left -= bp->b_bufsize;
aa4dc149 354 if (sp->sum_bytes_left < sizeof(daddr_t) ||
275ca4f0 355 sp->seg_bytes_left < fs->lfs_bsize) {
275ca4f0 356 splx(s);
aa4dc149 357 lfs_updatemeta(fs,
dc7e45d3 358 sp, vp, start_lbp, bpp, lbp - start_lbp);
275ca4f0 359
aa4dc149
KB
360 /* Add the current file to the segment summary. */
361 ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
275ca4f0 362
aa4dc149 363 version = fip->fi_version;
dc7e45d3 364 lfs_writeseg(fs, sp);
12304d41 365 lfs_initseg(fs, sp);
aa4dc149 366
275ca4f0 367 fip = sp->fip;
275ca4f0 368 fip->fi_version = version;
aa4dc149 369 fip->fi_ino = ip->i_number;
275ca4f0 370 start_lbp = lbp = fip->fi_blocks;
aa4dc149
KB
371
372 bpp = sp->cbpp;
275ca4f0
KB
373 s = splbio();
374 }
84c30241 375 }
275ca4f0 376 splx(s);
dc7e45d3 377 lfs_updatemeta(fs, sp, vp, start_lbp, bpp, lbp - start_lbp);
84c30241
KB
378}
379
84c30241 380/*
aa4dc149 381 * Update the metadata that points to the blocks listed in the FINFO
84c30241
KB
382 * array.
383 */
275ca4f0 384static void
dc7e45d3 385lfs_updatemeta(fs, sp, vp, lbp, bpp, nblocks)
0a011bb1 386 struct lfs *fs;
275ca4f0 387 SEGMENT *sp;
dc7e45d3 388 VNODE *vp;
275ca4f0 389 daddr_t *lbp;
84c30241 390 BUF **bpp;
275ca4f0 391 int nblocks;
84c30241 392{
12304d41
KB
393 SEGUSE *sup;
394 BUF *bp;
dc7e45d3
KB
395 INDIR a[NIADDR], *ap;
396 INODE *ip;
12304d41 397 daddr_t daddr, lbn, off;
dc7e45d3 398 int db_per_fsb, error, i, num;
84c30241 399
dc7e45d3
KB
400#ifdef VERBOSE
401 printf("lfs_updatemeta\n");
402#endif
aa4dc149 403 if (nblocks == 0)
275ca4f0
KB
404 return;
405
12304d41 406 /* Sort the blocks. */
275ca4f0
KB
407 shellsort(bpp, lbp, nblocks);
408
12304d41
KB
409 /*
410 * Assign disk addresses, and update references to the logical
411 * block and the segment usage information.
412 */
dc7e45d3 413 db_per_fsb = fsbtodb(fs, 1);
12304d41
KB
414 for (i = nblocks; i--; ++bpp) {
415 lbn = *lbp++;
416 (*bpp)->b_blkno = off = fs->lfs_offset;
dc7e45d3 417 fs->lfs_offset += db_per_fsb;
275ca4f0 418
dc7e45d3 419 if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num))
12304d41
KB
420 panic("lfs_updatemeta: lfs_bmaparray returned %d",
421 error);
dc7e45d3
KB
422#ifdef META
423 printf("daddr: %d num: %d\n", daddr, num);
424 if (num != 0) {
425 int x;
12304d41 426 printf("array from bmaparray:\n");
dc7e45d3
KB
427 for (x = 0; x < num; x++)
428 printf("\tlbn %d off %d\n", a[x].in_lbn, a[x].in_off);
429 }
430#endif
dc7e45d3
KB
431 ip = VTOI(vp);
432 switch (num) {
433 case 0:
14712628 434#ifdef META
dc7e45d3 435 printf("update inode for direct block %d\n", lbn);
14712628 436#endif
12304d41 437 ip->i_db[lbn] = off;
dc7e45d3
KB
438 break;
439 case 1:
12304d41 440 ip->i_ib[a[0].in_off] = off;
dc7e45d3
KB
441 break;
442 default:
443 ap = &a[num - 1];
14712628 444#ifdef META
dc7e45d3
KB
445 printf("update indirect block %d offset %d\n",
446 ap->in_lbn, ap->in_off);
14712628 447#endif
dc7e45d3
KB
448 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp))
449 panic("lfs_updatemeta: bread bno %d",
450 ap->in_lbn);
12304d41
KB
451 bp->b_un.b_daddr[ap->in_off] = off;
452 lfs_bwrite(bp);
453 }
454
455 /* Update segment usage information. */
456 if (daddr != UNASSIGNED) {
457 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
458 sup->su_lastmod = time.tv_sec;
459#ifdef DIAGNOSTIC
460 if (sup->su_nbytes < fs->lfs_bsize)
461 panic("lfs: negative bytes (segment %d)\n",
462 datosn(fs, daddr));
463#endif
464 sup->su_nbytes -= fs->lfs_bsize;
275ca4f0 465 lfs_bwrite(bp);
84c30241 466 }
84c30241 467 }
84c30241
KB
468}
469
12304d41
KB
470/*
471 * Start a new segment.
472 */
dc7e45d3 473static void
12304d41 474lfs_initseg(fs, sp)
0a011bb1 475 struct lfs *fs;
275ca4f0 476 SEGMENT *sp;
84c30241 477{
12304d41
KB
478 SEGUSE *sup;
479 SEGSUM *ssp;
480 struct buf *bp;
481 daddr_t lbn, *lbnp;
275ca4f0 482
dc7e45d3 483#ifdef VERBOSE
12304d41 484 printf("lfs_initseg\n");
dc7e45d3 485#endif
12304d41
KB
486 /* Advance to the next segment. */
487 if (1 || !LFS_PARTIAL_FITS(fs)) {
488 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_curseg), bp);
489 sup->su_flags &= ~SEGUSE_ACTIVE;
490 lfs_bwrite(bp);
491 fs->lfs_curseg = fs->lfs_offset = fs->lfs_nextseg;
492 fs->lfs_nextseg = lfs_newseg(fs);
493 sp->seg_number = datosn(fs, fs->lfs_curseg);
494 sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE;
495
496 /*
497 * If su_nbytes is non-zero after the segment was cleaned,
498 * the segment contains a super-block. Update offset and
499 * summary address to skip over the superblock.
500 */
501 LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
502 if (sup->su_nbytes != 0) {
503 fs->lfs_offset += LFS_SBPAD / DEV_BSIZE;
504 sp->seg_bytes_left -= LFS_SBPAD;
275ca4f0 505 }
12304d41
KB
506 brelse(bp);
507 } else {
508 sp->seg_number = datosn(fs, fs->lfs_curseg);
509 sp->seg_bytes_left = (fs->lfs_dbpseg -
510 (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE;
511 }
aa4dc149 512
12304d41
KB
513 sp->ibp = NULL;
514 sp->ninodes = 0;
aa4dc149 515
12304d41
KB
516 /* Get a new buffer for SEGSUM and enter it into the buffer list. */
517 sp->cbpp = sp->bpp;
518 *sp->cbpp = lfs_newbuf(fs, sp, fs->lfs_offset, LFS_SUMMARY_SIZE);
519 sp->segsum = (*sp->cbpp)->b_un.b_addr;
520 ++sp->cbpp;
521 fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE;
aa4dc149 522
12304d41
KB
523 /* Set point to SEGSUM, initialize it. */
524 ssp = sp->segsum;
525 ssp->ss_next = fs->lfs_nextseg;
526 ssp->ss_create = time.tv_sec;
527 ssp->ss_nfinfo = ssp->ss_ninos = 0;
aa4dc149 528
12304d41
KB
529 /* Set pointer to first FINFO, initialize it. */
530 sp->fip = (FINFO *)(sp->segsum + sizeof(SEGSUM));
531 sp->fip->fi_nblocks = 0;
aa4dc149 532
12304d41
KB
533 sp->seg_bytes_left -= LFS_SUMMARY_SIZE;
534 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM);
535}
aa4dc149 536
12304d41
KB
537/*
538 * Return the next segment to write.
539 */
540static daddr_t
541lfs_newseg(fs)
542 struct lfs *fs;
543{
544 SEGUSE *sup;
545 struct buf *bp;
546 int isdirty, segnum, sn;
547
548#ifdef VERBOSE
549 printf("lfs_newseg\n");
550#endif
551 segnum = datosn(fs, fs->lfs_nextseg);
552 LFS_SEGENTRY(sup, fs, segnum, bp);
553 sup->su_flags |= SEGUSE_ACTIVE;
554 lfs_bwrite(bp);
555 for (sn = segnum;;) {
556 sn = (sn + 1) % fs->lfs_nseg;
557 if (sn == segnum)
558 panic("lfs_nextseg: no clean segments");
559 LFS_SEGENTRY(sup, fs, sn, bp);
560 isdirty = sup->su_flags & SEGUSE_DIRTY;
561 brelse(bp);
562 if (!isdirty)
563 break;
564 }
565 return (sntoda(fs, sn));
84c30241
KB
566}
567
568static void
569lfs_writeseg(fs, sp)
0a011bb1 570 struct lfs *fs;
84c30241
KB
571 SEGMENT *sp;
572{
12304d41 573 BUF **bpp, *bp;
84c30241 574 SEGUSE *sup;
dc7e45d3
KB
575 SEGSUM *segp;
576 dev_t i_dev;
577 u_long *datap, *dp;
aa4dc149 578 void *pmeta;
dc7e45d3 579 int flags, i, nblocks, s, (*strategy) __P((BUF *));
84c30241 580
dc7e45d3
KB
581#ifdef VERBOSE
582 printf("lfs_writeseg\n");
583#endif
aa4dc149
KB
584 /* Update superblock segment address. */
585 fs->lfs_lastseg = sntoda(fs, sp->seg_number);
aa4dc149 586 nblocks = sp->cbpp - sp->bpp;
dc7e45d3
KB
587
588 LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
589 sup->su_nbytes += LFS_SUMMARY_SIZE + (nblocks - 1 << fs->lfs_bshift);
84c30241
KB
590 sup->su_lastmod = time.tv_sec;
591 sup->su_flags = SEGUSE_DIRTY;
dc7e45d3 592 lfs_bwrite(bp);
84c30241
KB
593
594 /*
12304d41
KB
595 * Compute checksum across data and then across summary;
596 * the first block (the summary block) is skipped.
dc7e45d3
KB
597 *
598 * XXX
599 * Fix this to do it inline, instead of malloc/copy.
84c30241 600 */
dc7e45d3 601 datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK);
12304d41
KB
602 for (bpp = sp->bpp, i = nblocks - 1; i--;)
603 *dp++ = (*++bpp)->b_un.b_words[0];
dc7e45d3
KB
604
605 segp = (SEGSUM *)sp->segsum;
606 segp->ss_datasum = cksum(datap, nblocks * sizeof(u_long));
607 segp->ss_sumsum = cksum(&segp->ss_datasum,
608 LFS_SUMMARY_SIZE - sizeof(segp->ss_sumsum));
dc7e45d3 609 (void)free(datap, M_SEGMENT);
8954e52c 610
dc7e45d3
KB
611 /*
612 * When we gathered the blocks for I/O we did not mark them busy or
613 * remove them from the freelist. As we do this, turn off the B_LOCKED
614 * bit so the future brelse will put them on the LRU list, and add the
615 * B_CALL flags if we're doing a checkpoint so we can count I/O's. LFS
616 * requires that the super blocks (on checkpoint) be written after all
617 * the segment data.
618 */
619 i_dev = VTOI(fs->lfs_ivnode)->i_dev;
620 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy;
275ca4f0 621
8954e52c 622 s = splbio();
dc7e45d3
KB
623 if (sp->seg_flags & SEGM_CKP) {
624 fs->lfs_iocount += nblocks;
12304d41 625 flags = B_ASYNC | B_BUSY | B_CALL;
dc7e45d3 626 } else
12304d41 627 flags = B_ASYNC | B_BUSY;
dc7e45d3
KB
628 for (bpp = sp->bpp, i = nblocks; i--;) {
629 bp = *bpp++;
630 bp->b_flags |= flags;
12304d41
KB
631 bp->b_flags &=
632 ~(B_DONE | B_ERROR | B_READ | B_DELWRI | B_LOCKED);
dc7e45d3
KB
633 bp->b_dev = i_dev;
634 bp->b_iodone = lfs_callback;
dc7e45d3
KB
635 if (!(bp->b_flags & B_NOCACHE)) {
636 bremfree(bp);
637 reassignbuf(bp, bp->b_vp);
638 }
8954e52c 639 }
dc7e45d3
KB
640 splx(s);
641
642 for (bpp = sp->bpp, i = nblocks; i--;)
643 (strategy)(*bpp++);
275ca4f0
KB
644}
645
646static void
dc7e45d3 647lfs_writesuper(fs, sp)
0a011bb1 648 struct lfs *fs;
dc7e45d3 649 SEGMENT *sp;
275ca4f0
KB
650{
651 BUF *bp;
dc7e45d3 652 dev_t i_dev;
aa4dc149 653 int (*strategy) __P((BUF *));
275ca4f0 654
dc7e45d3
KB
655#ifdef VERBOSE
656 printf("lfs_writesuper\n");
657#endif
dc7e45d3
KB
658 i_dev = VTOI(fs->lfs_ivnode)->i_dev;
659 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy;
14712628 660
aa4dc149 661 /* Checksum the superblock and copy it into a buffer. */
0a011bb1 662 fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum));
dc7e45d3
KB
663 bp = lfs_newbuf(fs, sp, fs->lfs_sboffs[0], LFS_SBPAD);
664 *bp->b_un.b_lfs = *fs;
275ca4f0 665
14712628 666 /* Write the first superblock (wait). */
dc7e45d3 667 bp->b_dev = i_dev;
dc7e45d3 668 bp->b_flags |= B_BUSY;
12304d41 669 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
aa4dc149 670 (strategy)(bp);
275ca4f0 671 biowait(bp);
aa4dc149 672
14712628 673 /* Write the second superblock (don't wait). */
275ca4f0 674 bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1];
12304d41 675 bp->b_flags |= B_ASYNC | B_BUSY;
dc7e45d3 676 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
aa4dc149 677 (strategy)(bp);
275ca4f0
KB
678}
679
aa4dc149
KB
680/*
681 * Logical block number match routines used when traversing the dirty block
682 * chain.
683 */
34a084a9 684static int
dc7e45d3
KB
685match_data(fs, bp)
686 struct lfs *fs;
275ca4f0
KB
687 BUF *bp;
688{
aa4dc149 689 return (bp->b_lblkno >= 0);
275ca4f0
KB
690}
691
34a084a9 692static int
dc7e45d3
KB
693match_indir(fs, bp)
694 struct lfs *fs;
275ca4f0
KB
695 BUF *bp;
696{
dc7e45d3
KB
697 int lbn;
698
699 lbn = bp->b_lblkno;
700 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0);
275ca4f0
KB
701}
702
34a084a9 703static int
dc7e45d3
KB
704match_dindir(fs, bp)
705 struct lfs *fs;
275ca4f0
KB
706 BUF *bp;
707{
dc7e45d3
KB
708 int lbn;
709
710 lbn = bp->b_lblkno;
711 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1);
aa4dc149
KB
712}
713
dc7e45d3
KB
714static int
715match_tindir(fs, bp)
0a011bb1 716 struct lfs *fs;
dc7e45d3 717 BUF *bp;
aa4dc149 718{
dc7e45d3 719 int lbn;
aa4dc149 720
dc7e45d3
KB
721 lbn = bp->b_lblkno;
722 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2);
723}
aa4dc149 724
dc7e45d3
KB
725/*
726 * Allocate a new buffer header.
727 */
728static BUF *
729lfs_newbuf(fs, sp, daddr, size)
730 struct lfs *fs;
731 SEGMENT *sp;
732 daddr_t daddr;
733 size_t size;
734{
735 BUF *bp;
aa4dc149 736
dc7e45d3
KB
737#ifdef VERBOSE
738 printf("lfs_newbuf\n");
739#endif
740 bp = getnewbuf();
741 bremhash(bp);
742 bgetvp(fs->lfs_ivnode, bp);
743 bp->b_bcount = 0;
744 bp->b_lblkno = daddr;
745 bp->b_blkno = daddr;
746 bp->b_error = 0;
747 bp->b_resid = 0;
748 allocbuf(bp, size);
749 bp->b_flags |= B_NOCACHE;
12304d41 750 binshash(bp, &bfreelist[BQ_AGE]);
dc7e45d3
KB
751 return (bp);
752}
aa4dc149 753
dc7e45d3
KB
754/*
755 * The buffer cache callback routine.
756 */
757static int /* XXX should be void */
758lfs_callback(bp)
759 BUF *bp;
760{
761 struct lfs *fs;
aa4dc149 762
dc7e45d3
KB
763 fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs;
764#ifdef DIAGNOSTIC
765 if (fs->lfs_iocount == 0)
766 panic("lfs_callback: zero iocount\n");
767#endif
768 if (--fs->lfs_iocount == 0)
dc7e45d3 769 wakeup(&fs->lfs_iocount);
12304d41 770
dc7e45d3 771 brelse(bp);
84c30241
KB
772}
773
774/*
775 * Shellsort (diminishing increment sort) from Data Structures and
776 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
777 * see also Knuth Vol. 3, page 84. The increments are selected from
778 * formula (8), page 95. Roughly O(N^3/2).
779 */
780/*
781 * This is our own private copy of shellsort because we want to sort
782 * two parallel arrays (the array of buffer pointers and the array of
783 * logical block numbers) simultaneously. Note that we cast the array
784 * of logical block numbers to a unsigned in this routine so that the
785 * negative block numbers (meta data blocks) sort AFTER the data blocks.
786 */
787static void
788shellsort(bp_array, lb_array, nmemb)
789 BUF **bp_array;
275ca4f0 790 daddr_t *lb_array;
84c30241
KB
791 register int nmemb;
792{
793 static int __rsshell_increments[] = { 4, 1, 0 };
794 register int incr, *incrp, t1, t2;
795 BUF *bp_temp;
796 u_long lb_temp;
797
798 for (incrp = __rsshell_increments; incr = *incrp++;)
799 for (t1 = incr; t1 < nmemb; ++t1)
800 for (t2 = t1 - incr; t2 >= 0;)
801 if (lb_array[t2] > lb_array[t2 + incr]) {
802 lb_temp = lb_array[t2];
803 lb_array[t2] = lb_array[t2 + incr];
804 lb_array[t2 + incr] = lb_temp;
805 bp_temp = bp_array[t2];
806 bp_array[t2] = bp_array[t2 + incr];
807 bp_array[t2 + incr] = bp_temp;
808 t2 -= incr;
809 } else
810 break;
811}