checkpoint after Margo's changes to make sync work
[unix-history] / usr / src / sys / ufs / lfs / lfs_segment.c
CommitLineData
84c30241
KB
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 *
aa4dc149 7 * @(#)lfs_segment.c 5.4 (Berkeley) %G%
84c30241
KB
8 */
9
275ca4f0 10#ifdef LOGFS
84c30241
KB
11#include "param.h"
12#include "systm.h"
13#include "namei.h"
14#include "resourcevar.h"
15#include "kernel.h"
16#include "file.h"
17#include "stat.h"
18#include "buf.h"
19#include "proc.h"
20#include "conf.h"
21#include "vnode.h"
22#include "specdev.h"
23#include "fifo.h"
24#include "malloc.h"
25#include "mount.h"
26#include "../ufs/lockf.h"
27#include "../ufs/quota.h"
28#include "../ufs/inode.h"
29#include "../ufs/dir.h"
30#include "../ufs/ufsmount.h"
31#include "lfs.h"
32#include "lfs_extern.h"
33
34/*
8954e52c
KB
35 * Add a check so that if the segment is empty, you don't write it.
36 *
37 * Change lfs_ialloc to allocate a new page of inodes if you have to.
38 *
39 * Need to keep vnode v_numoutput up to date for pending writes? Could
40 * actually fire off the datablock writes before you finish. This would give
41 * them a chance to get started earlier.
42 */
84c30241
KB
43
44static int lfs_biocallback __P((BUF *));
45static void lfs_endsum __P((LFS *, SEGMENT *, int));
275ca4f0
KB
46static SEGMENT *lfs_gather
47 __P((LFS *, SEGMENT *, VNODE *, int (*) __P((BUF *))));
84c30241
KB
48static BUF *lfs_newbuf __P((LFS *, daddr_t, size_t));
49static SEGMENT *lfs_newseg __P((LFS *));
aa4dc149 50static SEGMENT *lfs_newsum __P((LFS *, SEGMENT *));
84c30241 51static daddr_t lfs_nextseg __P((LFS *));
aa4dc149 52static void lfs_updatemeta __P((LFS *, SEGMENT *, INODE *, daddr_t *,
275ca4f0 53 BUF **, int));
8954e52c 54static SEGMENT *lfs_writeckp __P((LFS *, SEGMENT *));
aa4dc149
KB
55static SEGMENT *lfs_writefile __P((LFS *, SEGMENT *, VNODE *, int));
56static SEGMENT *lfs_writeinode __P((LFS *, SEGMENT *, INODE *));
84c30241 57static void lfs_writeseg __P((LFS *, SEGMENT *));
8954e52c 58static void lfs_writesum __P((LFS *));
aa4dc149 59static void lfs_writesuper __P((LFS *));
275ca4f0
KB
60static int match_data __P((BUF *));
61static int match_dindir __P((BUF *));
62static int match_indir __P((BUF *));
aa4dc149 63static daddr_t next __P((LFS *, SEGMENT *, int *));
275ca4f0 64static void shellsort __P((BUF **, daddr_t *, register int));
84c30241
KB
65
66/*
67 * XXX -- when we add fragments in here, we will need to allocate a larger
68 * buffer pointer array (sp->bpp).
69 */
70int
275ca4f0 71lfs_segwrite(mp, do_ckp)
84c30241 72 MOUNT *mp;
275ca4f0 73 int do_ckp; /* do a checkpoint too */
84c30241 74{
84c30241
KB
75 INODE *ip;
76 LFS *fs;
77 VNODE *vp;
78 SEGMENT *sp;
8954e52c 79 int s;
84c30241 80
84c30241 81 fs = VFSTOUFS(mp)->um_lfs;
aa4dc149
KB
82
83#ifdef DIAGNOSTIC
84 if (fs->lfs_seglist != NULL)
85 panic("lfs_segwrite: seglist not NULL");
86#endif
87
8954e52c
KB
88 /*
89 * LFS requires that the summary blocks be written after the rest of
90 * the segment, and that the super blocks (on checkpoint) be written
91 * last of all. We keep a cumulative count of the outstanding blocks
92 * from all of the segments, and write these blocks when this count
93 * goes to zero. If the disk drive catches up with us it could go
94 * to zero before we finish, so we artificially increment it by one
95 * until we've scheduled all of the writes we intend to do. At the
96 * moment, the user's process hangs around so we can sleep; this should
97 * probably be redone using a kernel thread.
98 */
99 s = splbio();
100 fs->lfs_iocount = 1;
101 splx(s);
aa4dc149 102
84c30241
KB
103 sp = lfs_newseg(fs);
104loop:
105 for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) {
106 /*
107 * If the vnode that we are about to sync is no longer
108 * associated with this mount point, start over.
109 */
84c30241
KB
110 if (vp->v_mount != mp)
111 goto loop;
112 if (VOP_ISLOCKED(vp))
113 continue;
114 ip = VTOI(vp);
115 if (ip->i_number == LFS_IFILE_INUM)
116 continue;
aa4dc149 117 if ((ip->i_flag & (IMOD | IACC | IUPD | ICHG)) == 0 &&
84c30241
KB
118 vp->v_dirtyblkhd == NULL)
119 continue;
120 if (vget(vp))
121 goto loop;
aa4dc149 122 sp = lfs_writefile(fs, sp, vp, do_ckp);
84c30241
KB
123 vput(vp);
124 }
275ca4f0 125 if (do_ckp)
8954e52c 126 sp = lfs_writeckp(fs, sp);
aa4dc149
KB
127 lfs_writeseg(fs, sp);
128
8954e52c
KB
129 s = splbio();
130 if (--fs->lfs_iocount)
131 sleep(&fs->lfs_iocount, PRIBIO + 1);
132 splx(s);
133 lfs_writesum(fs);
134 if (do_ckp)
aa4dc149
KB
135 lfs_writesuper(fs);
136printf("After writesuper returning\n");
137
84c30241
KB
138 return (0);
139}
140
aa4dc149 141static int /* XXX should be void */
84c30241
KB
142lfs_biocallback(bp)
143 BUF *bp;
144{
145 LFS *fs;
84c30241 146
275ca4f0 147 /*
8954e52c
KB
148 * XXX
149 * Reset the flags (probably wrong). If the contents of the buffer
150 * are valid, move back onto the clean list.
275ca4f0 151 */
8954e52c
KB
152 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
153 fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs;
275ca4f0
KB
154 if (bp->b_flags & B_NOCACHE)
155 bp->b_vp = NULL;
8954e52c 156 else
275ca4f0 157 reassignbuf(bp, bp->b_vp);
8954e52c 158 brelse(bp);
275ca4f0 159
8954e52c
KB
160printf("callback: buffer: %x iocount %d\n", bp, fs->lfs_iocount);
161 if (fs->lfs_iocount == 0)
162 panic("lfs_biocallback: zero iocount\n");
275ca4f0 163
8954e52c
KB
164 if (--fs->lfs_iocount == 0)
165 wakeup(&fs->lfs_iocount);
84c30241
KB
166}
167
aa4dc149 168/* Finish up a summary block. */
84c30241
KB
169static void
170lfs_endsum(fs, sp, calc_next)
171 LFS *fs;
172 SEGMENT *sp;
aa4dc149 173 int calc_next;
84c30241 174{
84c30241 175 SEGSUM *ssp;
aa4dc149 176 int nsums_per_blk;
275ca4f0 177
275ca4f0
KB
178 if (sp->sbp == NULL)
179 return;
84c30241 180
84c30241 181 ssp = sp->segsum;
84c30241 182
8954e52c 183 /*
aa4dc149
KB
184 * Compute the address of the next summary block if calc_next is set,
185 * otherwise end the chain. If the summary block is full, close it
186 * by setting sp->sbp to NULL, so lfs_newsum will allocate a new one.
187 * Calculate the checksum last.
8954e52c
KB
188 */
189 nsums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE;
aa4dc149
KB
190 if (sp->nsums % nsums_per_blk == 0) {
191 ssp->ss_nextsum =
192 calc_next ? next(fs, sp, NULL) +
193 (nsums_per_blk - 1) * LFS_SUMMARY_SIZE / DEV_BSIZE :
194 (daddr_t)-1;
275ca4f0
KB
195 sp->sbp = NULL;
196 } else
aa4dc149
KB
197 ssp->ss_nextsum = calc_next ?
198 sp->sum_addr - LFS_SUMMARY_SIZE / DEV_BSIZE : (daddr_t)-1;
199
200 ssp->ss_cksum =
201 cksum(&ssp->ss_cksum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum));
275ca4f0
KB
202}
203
204static SEGMENT *
205lfs_gather(fs, sp, vp, match)
206 LFS *fs;
207 SEGMENT *sp;
208 VNODE *vp;
209 int (*match) __P((BUF *));
210{
211 BUF **bpp, *bp, *nbp;
212 FINFO *fip;
213 INODE *ip;
275ca4f0 214 daddr_t *lbp, *start_lbp;
aa4dc149
KB
215 u_long version;
216 int s;
275ca4f0
KB
217
218 ip = VTOI(vp);
219 bpp = sp->cbpp;
220 fip = sp->fip;
275ca4f0 221 start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks];
275ca4f0
KB
222
223 s = splbio();
224 for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
225 nbp = bp->b_blockf;
aa4dc149 226 if (bp->b_flags & B_BUSY)
275ca4f0 227 continue;
aa4dc149 228#ifdef DIAGNOSTIC
275ca4f0 229 if ((bp->b_flags & B_DELWRI) == 0)
aa4dc149
KB
230 panic("lfs_gather: not dirty");
231#endif
275ca4f0
KB
232 if (!match(bp))
233 continue;
aa4dc149
KB
234
235 /* Remove the buffer from the free lists, prepare it for I/O. */
275ca4f0
KB
236 bremfree(bp);
237 bp->b_flags |= B_BUSY | B_CALL;
275ca4f0 238 bp->b_iodone = lfs_biocallback;
8954e52c 239 bp->b_dev = VTOI(fs->lfs_ivnode)->i_dev;
275ca4f0 240
aa4dc149 241 /* Insert into the buffer list, update the FINFO block. */
275ca4f0 242 *sp->cbpp++ = bp;
aa4dc149
KB
243 ++fip->fi_nblocks;
244 *lbp++ = bp->b_lblkno;
245
275ca4f0
KB
246 sp->sum_bytes_left -= sizeof(daddr_t);
247 sp->seg_bytes_left -= bp->b_bufsize;
aa4dc149
KB
248
249 /*
250 * Allocate a new summary block (and, possibly, a new segment)
251 * if necessary. In this case we sort the blocks we've done
252 * so far and assign disk addresses so we can start the new
253 * block correctly. We may be doing I/O, so we need to release
254 * the splbio() before anything else.
255 */
256 if (sp->sum_bytes_left < sizeof(daddr_t) ||
275ca4f0 257 sp->seg_bytes_left < fs->lfs_bsize) {
275ca4f0 258 splx(s);
aa4dc149
KB
259 lfs_updatemeta(fs,
260 sp, ip, start_lbp, bpp, lbp - start_lbp);
275ca4f0 261
aa4dc149
KB
262 /* Add the current file to the segment summary. */
263 ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
275ca4f0 264
aa4dc149 265 version = fip->fi_version;
275ca4f0
KB
266 if (sp->seg_bytes_left < fs->lfs_bsize) {
267 lfs_writeseg(fs, sp);
268 sp = lfs_newseg(fs);
269 } else if (sp->sum_bytes_left < sizeof(daddr_t))
aa4dc149
KB
270 sp = lfs_newsum(fs, sp);
271
272 /* A new FINFO either way. */
275ca4f0 273 fip = sp->fip;
275ca4f0 274 fip->fi_version = version;
aa4dc149 275 fip->fi_ino = ip->i_number;
275ca4f0 276 start_lbp = lbp = fip->fi_blocks;
aa4dc149
KB
277
278 bpp = sp->cbpp;
275ca4f0
KB
279 s = splbio();
280 }
84c30241 281 }
275ca4f0
KB
282 splx(s);
283 lfs_updatemeta(fs, sp, ip, start_lbp, bpp, lbp - start_lbp);
aa4dc149 284 return (sp);
84c30241
KB
285}
286
aa4dc149
KB
287/*
288 * Allocate a new buffer header.
289 */
84c30241
KB
290static BUF *
291lfs_newbuf(fs, daddr, size)
292 LFS *fs;
293 daddr_t daddr;
294 size_t size;
295{
296 BUF *bp;
84c30241 297
84c30241
KB
298 bp = getnewbuf();
299 bremhash(bp);
275ca4f0 300 bp->b_vp = fs->lfs_ivnode;
84c30241 301 bp->b_bcount = 0;
275ca4f0 302 bp->b_blkno = bp->b_lblkno = daddr;
84c30241
KB
303 bp->b_error = 0;
304 bp->b_resid = 0;
8954e52c 305 bp->b_flags |= B_DELWRI | B_NOCACHE;
275ca4f0 306 bp->b_iodone = lfs_biocallback;
8954e52c 307 bp->b_dev = VTOI(fs->lfs_ivnode)->i_dev;
84c30241
KB
308 allocbuf(bp, size);
309 return (bp);
310}
311
aa4dc149
KB
312/*
313 * Start a new segment.
314 */
84c30241
KB
315static SEGMENT *
316lfs_newseg(fs)
317 LFS *fs;
318{
aa4dc149 319 FINFO *fip;
84c30241
KB
320 SEGMENT *sp;
321 SEGUSE *sup;
aa4dc149
KB
322 SEGSUM *ssp;
323 daddr_t lbn, *lbnp;
84c30241 324
8954e52c
KB
325printf("lfs_newseg: new segment %x\n", fs->lfs_nextseg);
326
84c30241 327 sp = malloc(sizeof(SEGMENT), M_SEGMENT, M_WAITOK);
8954e52c 328 sp->nextp = NULL;
84c30241
KB
329 sp->cbpp = sp->bpp =
330 malloc(fs->lfs_ssize * sizeof(BUF *), M_SEGMENT, M_WAITOK);
8954e52c 331 sp->ibp = sp->sbp = NULL;
aa4dc149 332 sp->seg_bytes_left = (fs->lfs_segmask + 1);
84c30241
KB
333 sp->saddr = fs->lfs_nextseg;
334 sp->sum_addr = sp->saddr + sp->seg_bytes_left / DEV_BSIZE;
335 sp->ninodes = 0;
aa4dc149 336 sp->nsums = 0;
275ca4f0
KB
337 sp->seg_number =
338 (sp->saddr - fs->lfs_sboffs[0]) / fsbtodb(fs, fs->lfs_ssize);
84c30241 339
aa4dc149 340 /* Advance to the next segment. */
8954e52c 341 fs->lfs_nextseg = lfs_nextseg(fs);
8954e52c 342
aa4dc149
KB
343 /* Initialize the summary block. */
344 sp = lfs_newsum(fs, sp);
84c30241 345
aa4dc149
KB
346 /*
347 * If su_nbytes non-zero after the segment was cleaned, the segment
348 * contains a super-block. Add segment summary information to not
349 * allocate over it.
350 */
351 sup = fs->lfs_segtab + sp->seg_number;
84c30241 352 if (sup->su_nbytes != 0) {
275ca4f0 353 ssp = (SEGSUM *)sp->segsum;
aa4dc149 354 ++ssp->ss_nfinfo;
84c30241
KB
355 fip = sp->fip;
356 fip->fi_nblocks = LFS_SBPAD >> fs->lfs_bshift;
357 fip->fi_version = 1;
358 fip->fi_ino = LFS_UNUSED_INUM;
84c30241 359 lbnp = fip->fi_blocks;
aa4dc149 360 for (lbn = 0; lbn < fip->fi_nblocks; ++lbn)
84c30241 361 *lbnp++ = lbn;
aa4dc149 362 sp->saddr += fsbtodb(fs, fip->fi_nblocks);
84c30241 363 sp->seg_bytes_left -= sup->su_nbytes;
aa4dc149 364 sp->sum_bytes_left -=
84c30241
KB
365 sizeof(FINFO) + (fip->fi_nblocks - 1) * sizeof(daddr_t);
366 sp->fip = (FINFO *)lbnp;
367 }
aa4dc149 368 return (sp);
84c30241
KB
369}
370
aa4dc149 371static SEGMENT *
84c30241
KB
372lfs_newsum(fs, sp)
373 LFS *fs;
374 SEGMENT *sp;
375{
376 SEGSUM *ssp;
aa4dc149 377 int nblocks;
84c30241
KB
378
379printf("lfs_newsum\n");
aa4dc149 380
275ca4f0 381 lfs_endsum(fs, sp, 1);
aa4dc149
KB
382
383 /* Allocate a new buffer if necessary. */
275ca4f0 384 if (sp->sbp == NULL) {
aa4dc149 385 /* Allocate a new segment if necessary. */
275ca4f0
KB
386 if (sp->seg_bytes_left < fs->lfs_bsize) {
387 lfs_writeseg(fs, sp);
388 sp = lfs_newseg(fs);
389 }
aa4dc149
KB
390
391 /* Get the next summary block. */
392 sp->sum_addr = next(fs, sp, &nblocks);
393
394 /*
395 * Get a new buffer and enter into the buffer list from
396 * the top of the list.
397 */
398 sp->sbp = sp->bpp[fs->lfs_ssize - (nblocks + 1)] =
399 lfs_newbuf(fs, sp->sum_addr, fs->lfs_bsize);
400
275ca4f0 401 sp->seg_bytes_left -= fs->lfs_bsize;
aa4dc149
KB
402
403 /*
404 * Do a callback for all but the very last summary block in
405 * the segment, for which we wait.
406 */
407 if (sp->nsums != 0)
408 sp->sbp->b_flags |= B_CALL;
409 /*
410 * Fill in the block from the end. The summary block is filled
411 * in from the end to the beginning so that the last summary
412 * is the last thing written, verifying the entire block. This
413 * should go away when fragments are available.
414 */
8954e52c
KB
415 sp->segsum =
416 sp->sbp->b_un.b_addr + fs->lfs_bsize - LFS_SUMMARY_SIZE;
275ca4f0 417 sp->sum_addr += (fs->lfs_bsize - LFS_SUMMARY_SIZE) / DEV_BSIZE;
aa4dc149
KB
418
419 printf("alloc summary: bp %x, lblkno %x, bp index %d\n",
420 sp->sbp, sp->sbp->b_lblkno, fs->lfs_ssize - nblocks);
84c30241 421 } else {
275ca4f0
KB
422 sp->segsum -= LFS_SUMMARY_SIZE;
423 sp->sum_addr -= LFS_SUMMARY_SIZE / DEV_BSIZE;
84c30241 424 }
aa4dc149 425 ++sp->nsums;
84c30241 426
aa4dc149 427 /* Set point to SEGSUM, initialize it. */
275ca4f0 428 ssp = sp->segsum;
8954e52c 429 ssp->ss_next = fs->lfs_nextseg;
275ca4f0 430 ssp->ss_prev = fs->lfs_lastseg;
84c30241
KB
431 ssp->ss_nextsum = (daddr_t)-1;
432 ssp->ss_create = time.tv_sec;
aa4dc149 433 ssp->ss_nfinfo = ssp->ss_ninos = 0;
84c30241 434
aa4dc149
KB
435 /* Set pointer to first FINFO, initialize it. */
436 sp->fip = (FINFO *)(sp->segsum + sizeof(SEGSUM));
437
438 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM);
439 return (sp);
84c30241
KB
440}
441
aa4dc149
KB
442#define seginc(fs, sn) /* increment segment number */ \
443 (((sn) + 1) % (fs)->lfs_nseg)
444/*
445 * Return the next segment to write.
446 */
84c30241
KB
447static daddr_t
448lfs_nextseg(fs)
449 LFS *fs;
450{
451 int segnum, sn;
84c30241 452
aa4dc149
KB
453 segnum = sn = datosn(fs, fs->lfs_nextseg);
454 while ((sn = seginc(fs, sn)) != segnum &&
455 fs->lfs_segtab[sn].su_flags & SEGUSE_DIRTY);
275ca4f0 456
84c30241
KB
457 if (sn == segnum)
458 panic("lfs_nextseg: file system full"); /* XXX */
aa4dc149 459 return (sntoda(fs, sn));
84c30241
KB
460}
461
462/*
aa4dc149 463 * Update the metadata that points to the blocks listed in the FINFO
84c30241
KB
464 * array.
465 */
275ca4f0
KB
466static void
467lfs_updatemeta(fs, sp, ip, lbp, bpp, nblocks)
84c30241 468 LFS *fs;
275ca4f0 469 SEGMENT *sp;
84c30241 470 INODE *ip;
275ca4f0 471 daddr_t *lbp;
84c30241 472 BUF **bpp;
275ca4f0 473 int nblocks;
84c30241
KB
474{
475 SEGUSE *segup;
aa4dc149
KB
476 BUF **lbpp, *bp;
477 daddr_t daddr, iblkno;
478 int db_per_fsb, error, i;
275ca4f0 479 long lbn;
84c30241 480
aa4dc149 481 if (nblocks == 0)
275ca4f0 482 return;
aa4dc149 483printf("lfs_updatemeta of %d blocks\n", nblocks);
275ca4f0 484
aa4dc149 485 /* Sort the blocks and add disk addresses */
275ca4f0
KB
486 shellsort(bpp, lbp, nblocks);
487
488 db_per_fsb = 1 << fs->lfs_fsbtodb;
aa4dc149 489 for (lbpp = bpp, i = 0; i < nblocks; ++i, ++lbpp) {
275ca4f0
KB
490 (*lbpp)->b_blkno = sp->saddr;
491 sp->saddr += db_per_fsb;
492 }
493
aa4dc149 494 for (lbpp = bpp, i = 0; i < nblocks; ++i, ++lbpp) {
275ca4f0
KB
495 lbn = lbp[i];
496printf("lfs_updatemeta: block %d\n", lbn);
aa4dc149
KB
497 if (error = lfs_bmap(ip, lbn, &daddr))
498 panic("lfs_updatemeta: lfs_bmap");
499
500 /* Update in-core copy of old segment usage information. */
501 if (daddr != UNASSIGNED) {
502 segup = fs->lfs_segtab + datosn(fs, daddr);
84c30241 503 segup->su_lastmod = time.tv_sec;
aa4dc149
KB
504#ifdef DIAGNOSTIC
505 if (segup->su_nbytes < fs->lfs_bsize) {
506 printf("lfs: negative bytes (segment %d)\n",
507 segup - fs->lfs_segtab);
508 panic("lfs: negative bytes in segment\n");
509 }
510#endif
511 segup->su_nbytes -= fs->lfs_bsize;
84c30241
KB
512 }
513
275ca4f0 514 /*
aa4dc149 515 * Now change whomever points to lbn. We could start with the
275ca4f0
KB
516 * smallest (most negative) block number in these if clauses,
517 * but we assume that indirect blocks are least common, and
aa4dc149
KB
518 * handle them separately. The test for < 0 is correct and
519 * minimizes the path in the common case.
275ca4f0 520 */
aa4dc149
KB
521#define BREAD(bno) \
522 if (error = bread(ITOV(ip), (bno), fs->lfs_bsize, NOCRED, &bp)) \
523 panic("lfs_updatemeta: bread");
275ca4f0 524
aa4dc149
KB
525 if (lbn < 0)
526 if (lbn < -NIADDR) {
527 printf("meta: update indirect block %d\n",
528 D_INDIR);
529 BREAD(D_INDIR);
530 bp->b_un.b_daddr[-lbn % NINDIR(fs)] =
275ca4f0 531 (*lbpp)->b_blkno;
aa4dc149
KB
532 lfs_bwrite(bp);
533 } else {
534 ip->i_ib[-lbn-1] = (*lbpp)->b_blkno;
535 } else if (lbn < NDADDR) {
536 ip->i_db[lbn] = (*lbpp)->b_blkno;
537 } else if ((lbn -= NDADDR) < NINDIR(fs)) {
538 printf("meta: update indirect block %d\n", S_INDIR);
539 BREAD(S_INDIR);
84c30241 540 bp->b_un.b_daddr[lbn] = (*lbpp)->b_blkno;
aa4dc149
KB
541 lfs_bwrite(bp);
542 } else if ((lbn =
543 (lbn - NINDIR(fs)) / NINDIR(fs)) < NINDIR(fs)) {
544 iblkno = -(lbn + NIADDR + 1);
545 printf("meta: update indirect block %d\n", iblkno);
546 BREAD(iblkno);
84c30241 547 bp->b_un.b_daddr[lbn % NINDIR(fs)] = (*lbpp)->b_blkno;
275ca4f0 548 lfs_bwrite(bp);
aa4dc149
KB
549 } else
550 panic("lfs_updatemeta: logical block number too large");
84c30241 551 }
275ca4f0
KB
552}
553
8954e52c 554static SEGMENT *
275ca4f0
KB
555lfs_writeckp(fs, sp)
556 LFS *fs;
557 SEGMENT *sp;
558{
559 BUF *bp;
560 FINFO *fip;
561 INODE *ip;
562 SEGUSE *sup;
8954e52c 563 void *xp;
275ca4f0
KB
564 daddr_t *lbp;
565 int bytes_needed, i;
275ca4f0
KB
566
567printf("lfs_writeckp\n");
568 /*
569 * This will write the dirty ifile blocks, but not the segusage
570 * table nor the ifile inode.
571 */
aa4dc149 572 sp = lfs_writefile(fs, sp, fs->lfs_ivnode, 1);
275ca4f0
KB
573
574 /*
aa4dc149
KB
575 * If the segment usage table and the ifile inode won't fit in this
576 * segment, put them in the next one.
275ca4f0
KB
577 */
578 bytes_needed = fs->lfs_segtabsz << fs->lfs_bshift;
579 if (sp->ninodes % INOPB(fs) == 0)
580 bytes_needed += fs->lfs_bsize;
581
582 if (sp->seg_bytes_left < bytes_needed) {
583 lfs_writeseg(fs, sp);
584 sp = lfs_newseg(fs);
aa4dc149
KB
585 ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
586 } else if (sp->sum_bytes_left < fs->lfs_segtabsz * sizeof(daddr_t)) {
587 sp = lfs_newsum(fs, sp);
588 ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
589 }
275ca4f0
KB
590
591#ifdef DEBUG
592 if (sp->seg_bytes_left < bytes_needed)
593 panic("lfs_writeckp: unable to write checkpoint");
594#endif
275ca4f0 595 /*
8954e52c
KB
596 * Update the segment usage information and the ifile inode
597 * and write it out.
275ca4f0 598 */
275ca4f0 599 sup = fs->lfs_segtab + sp->seg_number;
8954e52c
KB
600 sup->su_nbytes =
601 (fs->lfs_segmask + 1) - sp->seg_bytes_left + bytes_needed;
275ca4f0
KB
602 sup->su_lastmod = time.tv_sec;
603 sup->su_flags = SEGUSE_DIRTY;
604
aa4dc149
KB
605 /*
606 * Get buffers for the segusage table and write it out. Don't
607 * bother updating the FINFO pointer, it's not used after this.
608 */
275ca4f0
KB
609 ip = VTOI(fs->lfs_ivnode);
610 fip = sp->fip;
611 lbp = &fip->fi_blocks[fip->fi_nblocks];
aa4dc149
KB
612 for (xp = fs->lfs_segtab, i = 0; i < fs->lfs_segtabsz;
613 xp += fs->lfs_bsize, ++i, ++lbp) {
8954e52c
KB
614 *sp->cbpp++ = bp = lfs_newbuf(fs, sp->saddr, fs->lfs_bsize);
615 bp->b_flags |= B_CALL;
aa4dc149
KB
616 bcopy(xp, bp->b_un.b_addr, fs->lfs_bsize);
617 ip->i_db[i] = sp->saddr;
275ca4f0
KB
618 sp->saddr += (1 << fs->lfs_fsbtodb);
619 *lbp = i;
aa4dc149 620 ++fip->fi_nblocks;
275ca4f0 621 }
aa4dc149 622 return (lfs_writeinode(fs, sp, VTOI(fs->lfs_ivnode)));
84c30241
KB
623}
624
625/*
aa4dc149 626 * Write the dirty blocks associated with a vnode.
84c30241
KB
627 */
628static SEGMENT *
aa4dc149 629lfs_writefile(fs, sp, vp, do_ckp)
84c30241 630 LFS *fs;
aa4dc149 631 SEGMENT *sp;
84c30241 632 VNODE *vp;
275ca4f0 633 int do_ckp;
84c30241 634{
84c30241 635 FINFO *fip;
8954e52c 636 ino_t inum;
84c30241 637
8954e52c 638 inum = VTOI(vp)->i_number;
aa4dc149
KB
639 printf("lfs_writefile: node %d\n", inum);
640
641 if (vp->v_dirtyblkhd != NULL) {
642 if (sp->seg_bytes_left < fs->lfs_bsize) {
643 lfs_writeseg(fs, sp);
644 sp = lfs_newseg(fs);
645 } else if (sp->sum_bytes_left < sizeof(FINFO))
646 sp = lfs_newsum(fs, sp);
647 sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
648
649 fip = sp->fip;
650 fip->fi_nblocks = 0;
651 fip->fi_version =
652 inum == LFS_IFILE_INUM ? 1 : lfs_getversion(fs, inum);
653 fip->fi_ino = inum;
654
655 sp = lfs_gather(fs, sp, vp, match_data);
656 if (do_ckp) {
657 sp = lfs_gather(fs, sp, vp, match_indir);
658 sp = lfs_gather(fs, sp, vp, match_dindir);
659 }
84c30241 660
275ca4f0 661 fip = sp->fip;
aa4dc149
KB
662 printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks);
663
664 /*
665 * If this is the ifile, always update the file count as we'll
666 * be adding the segment usage information even if we didn't
667 * write any blocks. Also, don't update the FINFO pointer for
668 * the ifile as the segment usage information hasn't yet been
669 * added.
670 */
671 if (inum == LFS_IFILE_INUM)
672 ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
673 else if (fip->fi_nblocks != 0) {
674 ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
675 sp->fip = (FINFO *)((caddr_t)fip + sizeof(FINFO) +
676 sizeof(daddr_t) * (fip->fi_nblocks - 1));
84c30241 677 }
84c30241 678 }
aa4dc149
KB
679
680 /* If this isn't the ifile, update the inode. */
681 if (inum != LFS_IFILE_INUM)
682 sp = lfs_writeinode(fs, sp, VTOI(vp));
683 return (sp);
84c30241
KB
684}
685
275ca4f0 686static SEGMENT *
aa4dc149 687lfs_writeinode(fs, sp, ip)
275ca4f0
KB
688 LFS *fs;
689 SEGMENT *sp;
aa4dc149 690 INODE *ip;
84c30241 691{
275ca4f0 692 BUF *bp;
aa4dc149
KB
693 daddr_t next_addr;
694 int nblocks;
275ca4f0
KB
695
696printf("lfs_writeinode\n");
aa4dc149 697 /* Allocate a new inode block if necessary. */
275ca4f0 698 if (sp->ibp == NULL) {
aa4dc149 699 /* Allocate a new segment if necessary. */
275ca4f0
KB
700 if (sp->seg_bytes_left < fs->lfs_bsize) {
701 lfs_writeseg(fs, sp);
702 sp = lfs_newseg(fs);
703 }
aa4dc149
KB
704
705 /* Get next inode block. */
706 next_addr = next(fs, sp, &nblocks);
707
708 /*
709 * Get a new buffer and enter into the buffer list from
710 * the top of the list.
711 */
712 sp->ibp = sp->bpp[fs->lfs_ssize - (nblocks + 1)] =
713 lfs_newbuf(fs, next_addr, fs->lfs_bsize);
8954e52c 714 sp->ibp->b_flags |= B_CALL;
aa4dc149
KB
715
716 /* Set remaining space counter. */
275ca4f0 717 sp->seg_bytes_left -= fs->lfs_bsize;
aa4dc149
KB
718
719 printf("alloc inode: bp %x, lblkno %x, bp index %d\n",
720 sp->sbp, sp->sbp->b_lblkno, fs->lfs_ssize - nblocks);
275ca4f0 721 }
aa4dc149
KB
722
723 /* Copy the new inode onto the inode page. */
275ca4f0 724 bp = sp->ibp;
aa4dc149
KB
725 bcopy(&ip->i_din,
726 bp->b_un.b_dino + (sp->ninodes % INOPB(fs)), sizeof(DINODE));
727
728 /* Increment inode count in segment summary block. */
729 ++((SEGSUM *)(sp->segsum))->ss_ninos;
730
731 /* If this page is full, set flag to allocate a new page. */
732 if (++sp->ninodes % INOPB(fs) == 0)
275ca4f0 733 sp->ibp = NULL;
aa4dc149
KB
734
735 /*
736 * If updating the ifile, update the super-block; otherwise, update
737 * the ifile itself. In either case, turn of inode update flags.
738 */
275ca4f0 739 if (ip->i_number == LFS_IFILE_INUM)
aa4dc149 740 fs->lfs_idaddr = bp->b_blkno;
275ca4f0 741 else
aa4dc149
KB
742 lfs_iset(ip, bp->b_blkno, ip->i_atime);
743 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG);
744 return (sp);
84c30241
KB
745}
746
747static void
748lfs_writeseg(fs, sp)
749 LFS *fs;
750 SEGMENT *sp;
751{
275ca4f0 752 BUF **bpp;
84c30241 753 SEGUSE *sup;
aa4dc149
KB
754 int i, nblocks, s, (*strategy) __P((BUF *));
755 void *pmeta;
84c30241
KB
756
757printf("lfs_writeseg\n");
aa4dc149
KB
758 /* Update superblock segment address. */
759 fs->lfs_lastseg = sntoda(fs, sp->seg_number);
760
761 /* Finish up any summary block. */
275ca4f0 762 lfs_endsum(fs, sp, 0);
84c30241 763
84c30241
KB
764 /*
765 * Copy inode and summary block buffer pointers down so they are
275ca4f0 766 * contiguous with the page buffer pointers.
84c30241 767 */
aa4dc149
KB
768 (void)next(fs, sp, &nblocks);
769 pmeta = (sp->bpp + fs->lfs_ssize) - nblocks;
770 if (pmeta != sp->cbpp)
771 bcopy(pmeta, sp->cbpp, sizeof(BUF *) * nblocks);
772 sp->cbpp += nblocks;
773 nblocks = sp->cbpp - sp->bpp;
84c30241 774
84c30241
KB
775 sup = fs->lfs_segtab + sp->seg_number;
776 sup->su_nbytes = nblocks << fs->lfs_bshift;
777 sup->su_lastmod = time.tv_sec;
778 sup->su_flags = SEGUSE_DIRTY;
779
780 /*
275ca4f0 781 * Since we need to guarantee that the summary block gets written last,
84c30241 782 * we issue the writes in two sets. The first n-1 buffers first, and
8954e52c 783 * then, after they've completed, the summary buffer. Only when that
275ca4f0 784 * final write completes is the segment valid.
84c30241 785 */
aa4dc149 786 --nblocks; /* Don't count last summary block. */
8954e52c 787
84c30241
KB
788 sp->nextp = fs->lfs_seglist;
789 fs->lfs_seglist = sp;
275ca4f0 790
8954e52c
KB
791 s = splbio();
792 fs->lfs_iocount += nblocks;
793 splx(s);
aa4dc149
KB
794
795 strategy =
796 VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp->v_op->vop_strategy;
8954e52c 797 for (bpp = sp->bpp, i = 0; i < nblocks; ++i, ++bpp)
aa4dc149 798 (strategy)(*bpp);
8954e52c
KB
799}
800
801static void
802lfs_writesum(fs)
803 LFS *fs;
804{
805 BUF *bp;
806 SEGMENT *next_sp, *sp;
aa4dc149 807 int (*strategy) __P((BUF *));
8954e52c
KB
808
809printf("lfs_writesum\n");
aa4dc149
KB
810 strategy =
811 VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp->v_op->vop_strategy;
8954e52c
KB
812 for (sp = fs->lfs_seglist; sp; sp = next_sp) {
813 bp = *(sp->cbpp - 1);
aa4dc149 814 (strategy)(bp);
8954e52c
KB
815 biowait(bp);
816 next_sp = sp->nextp;
817 free(sp->bpp, M_SEGMENT);
818 free(sp, M_SEGMENT);
819 }
aa4dc149
KB
820 /* Segment list is done. */
821 fs->lfs_seglist = NULL;
275ca4f0
KB
822}
823
824static void
aa4dc149 825lfs_writesuper(fs)
275ca4f0 826 LFS *fs;
275ca4f0
KB
827{
828 BUF *bp;
aa4dc149 829 int (*strategy) __P((BUF *));
275ca4f0
KB
830
831printf("lfs_writesuper\n");
aa4dc149 832 /* Checksum the superblock and copy it into a buffer. */
275ca4f0
KB
833 fs->lfs_cksum = cksum(fs, sizeof(LFS) - sizeof(fs->lfs_cksum));
834 bp = lfs_newbuf(fs, fs->lfs_sboffs[0], LFS_SBPAD);
275ca4f0
KB
835 bcopy(fs, bp->b_un.b_lfs, sizeof(LFS));
836
aa4dc149
KB
837 /* Write the first superblock. */
838 strategy =
839 VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp->v_op->vop_strategy;
840 (strategy)(bp);
275ca4f0 841 biowait(bp);
aa4dc149
KB
842
843 /* Write the second superblock. */
275ca4f0
KB
844 bp->b_flags &= ~B_DONE;
845 bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1];
aa4dc149 846 (strategy)(bp);
8954e52c
KB
847
848 bp->b_vp = NULL; /* No associated vnode. */
aa4dc149 849 biowait(bp);
275ca4f0 850 brelse(bp);
aa4dc149 851printf("lfs_writesuper is returning\n");
275ca4f0
KB
852}
853
aa4dc149
KB
854/*
855 * Logical block number match routines used when traversing the dirty block
856 * chain.
857 */
858int
275ca4f0
KB
859match_data(bp)
860 BUF *bp;
861{
aa4dc149 862 return (bp->b_lblkno >= 0);
275ca4f0
KB
863}
864
aa4dc149 865int
275ca4f0
KB
866match_dindir(bp)
867 BUF *bp;
868{
aa4dc149 869 return (bp->b_lblkno == D_INDIR);
275ca4f0
KB
870}
871
872/*
873 * These are single indirect blocks. There are three types:
aa4dc149
KB
874 *
875 * the one in the inode (lblkno == S_INDIR, or -1).
876 * the ones that hang off of the double indirect in the inode (D_INDIR);
877 * these all have addresses in the range -2NINDIR to -(3NINDIR-1).
878 * the ones that hang off of the double indirect that hangs off of the
879 * triple indirect. These all have addresses < -(NINDIR^2).
880 *
881 * Since we currently don't support triple indirect blocks, this gets
882 * simpler, and we just look for block numbers less than -NIADDR.
275ca4f0 883 */
aa4dc149 884int
275ca4f0
KB
885match_indir(bp)
886 BUF *bp;
887{
aa4dc149
KB
888 return (bp->b_lblkno == S_INDIR || bp->b_lblkno < -NIADDR);
889}
890
891/* Get the next inode/summary block. */
892static daddr_t
893next(fs, sp, nbp)
894 LFS *fs;
895 SEGMENT *sp;
896 int *nbp;
897{
898 int nblocks, nino_blocks, nseg_blocks, sums_per_block;
899
900 /* Fs blocks allocated to summary blocks. */
901 sums_per_block = fs->lfs_bsize / LFS_SUMMARY_SIZE;
902 nseg_blocks = (sp->nsums + sums_per_block - 1) / sums_per_block;
903
904 /* Fs blocks allocated to inodes. */
905 nino_blocks = (sp->ninodes + INOPB(fs) - 1) / INOPB(fs);
906
907 /* Total number of fs blocks allocated. */
908 nblocks = nseg_blocks + nino_blocks;
909
910 if (nbp)
911 *nbp = nblocks;
912
913 /*
914 * The disk address of the new inode/summary block is the address of
915 * the start of the segment after this one minus the number of blocks
916 * that we've already used.
917 */
918 return (sntoda(fs, sp->seg_number + 1) - fsbtodb(fs, nblocks + 1));
84c30241
KB
919}
920
921/*
922 * Shellsort (diminishing increment sort) from Data Structures and
923 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
924 * see also Knuth Vol. 3, page 84. The increments are selected from
925 * formula (8), page 95. Roughly O(N^3/2).
926 */
927/*
928 * This is our own private copy of shellsort because we want to sort
929 * two parallel arrays (the array of buffer pointers and the array of
930 * logical block numbers) simultaneously. Note that we cast the array
931 * of logical block numbers to a unsigned in this routine so that the
932 * negative block numbers (meta data blocks) sort AFTER the data blocks.
933 */
934static void
935shellsort(bp_array, lb_array, nmemb)
936 BUF **bp_array;
275ca4f0 937 daddr_t *lb_array;
84c30241
KB
938 register int nmemb;
939{
940 static int __rsshell_increments[] = { 4, 1, 0 };
941 register int incr, *incrp, t1, t2;
942 BUF *bp_temp;
943 u_long lb_temp;
944
945 for (incrp = __rsshell_increments; incr = *incrp++;)
946 for (t1 = incr; t1 < nmemb; ++t1)
947 for (t2 = t1 - incr; t2 >= 0;)
948 if (lb_array[t2] > lb_array[t2 + incr]) {
949 lb_temp = lb_array[t2];
950 lb_array[t2] = lb_array[t2 + incr];
951 lb_array[t2 + incr] = lb_temp;
952 bp_temp = bp_array[t2];
953 bp_array[t2] = bp_array[t2 + incr];
954 bp_array[t2 + incr] = bp_temp;
955 t2 -= incr;
956 } else
957 break;
958}
275ca4f0 959#endif /* LOGFS */