move CS subdomain hack into hack directory
[unix-history] / usr / src / sys / ufs / lfs / lfs_segment.c
CommitLineData
84c30241
KB
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 *
275ca4f0 7 * @(#)lfs_segment.c 5.2 (Berkeley) %G%
84c30241
KB
8 */
9
275ca4f0 10#ifdef LOGFS
84c30241
KB
11#include "param.h"
12#include "systm.h"
13#include "namei.h"
14#include "resourcevar.h"
15#include "kernel.h"
16#include "file.h"
17#include "stat.h"
18#include "buf.h"
19#include "proc.h"
20#include "conf.h"
21#include "vnode.h"
22#include "specdev.h"
23#include "fifo.h"
24#include "malloc.h"
25#include "mount.h"
26#include "../ufs/lockf.h"
27#include "../ufs/quota.h"
28#include "../ufs/inode.h"
29#include "../ufs/dir.h"
30#include "../ufs/ufsmount.h"
31#include "lfs.h"
32#include "lfs_extern.h"
33
34/*
275ca4f0
KB
35Add a check so that if the segment is empty, you don't write it.
36Write the code with lfs_ialloc to allocate a new page of inodes if you have to.
37Make an incoming sync wait until the previous one finishes. Keith
38 will write this. When this happens, we no longer have to be
39 able to chain superblocks together and handle multiple segments
40 writing -- Seems like we can call biowait to wait for an io.
41 However, I don't think we want to wait on the summary I/O
42 necessarily, because if we've got lots of dirty buffers piling
43 up, it would be nice to process them and get the segment all
44 ready to write. Perhaps we can just wait before firing up the
45 next set of writes, rather than waiting to start doing anything.
46 Also -- my lfs_writesuper should wait until all the segment writes
47 are done (I added a biowait, but we need to make sure that the SEGMENT
48 structure hasn't been freed before we get there).
49Need to keep vnode v_numoutput up to date for pending writes?
50???Could actually fire off the datablock writes before you finish. This
51would give them a chance to get started earlier...
84c30241
KB
52*/
53
54static int lfs_biocallback __P((BUF *));
55static void lfs_endsum __P((LFS *, SEGMENT *, int));
275ca4f0
KB
56static SEGMENT *lfs_gather
57 __P((LFS *, SEGMENT *, VNODE *, int (*) __P((BUF *))));
84c30241
KB
58static BUF *lfs_newbuf __P((LFS *, daddr_t, size_t));
59static SEGMENT *lfs_newseg __P((LFS *));
60static void lfs_newsum __P((LFS *, SEGMENT *));
61static daddr_t lfs_nextseg __P((LFS *));
275ca4f0
KB
62static void lfs_updatemeta __P((LFS *, SEGMENT *, INODE *, daddr_t *,
63 BUF **, int));
64static void lfs_writeckp __P((LFS *, SEGMENT *));
65static SEGMENT *lfs_writefile __P((SEGMENT *, LFS *, VNODE *, int));
66static SEGMENT *lfs_writeinode __P((LFS *, SEGMENT *, VNODE *));
84c30241 67static void lfs_writeseg __P((LFS *, SEGMENT *));
275ca4f0
KB
68static void lfs_writesuper __P((LFS *, SEGMENT *));
69static int match_data __P((BUF *));
70static int match_dindir __P((BUF *));
71static int match_indir __P((BUF *));
72static void shellsort __P((BUF **, daddr_t *, register int));
84c30241
KB
73
74/*
75 * XXX -- when we add fragments in here, we will need to allocate a larger
76 * buffer pointer array (sp->bpp).
77 */
78int
275ca4f0 79lfs_segwrite(mp, do_ckp)
84c30241 80 MOUNT *mp;
275ca4f0 81 int do_ckp; /* do a checkpoint too */
84c30241
KB
82{
83 FINFO *fip; /* current file info structure */
84 INODE *ip;
85 LFS *fs;
86 VNODE *vp;
87 SEGMENT *sp;
88
84c30241 89 fs = VFSTOUFS(mp)->um_lfs;
84c30241
KB
90 sp = lfs_newseg(fs);
91loop:
92 for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) {
93 /*
94 * If the vnode that we are about to sync is no longer
95 * associated with this mount point, start over.
96 */
84c30241
KB
97 if (vp->v_mount != mp)
98 goto loop;
99 if (VOP_ISLOCKED(vp))
100 continue;
101 ip = VTOI(vp);
102 if (ip->i_number == LFS_IFILE_INUM)
103 continue;
104 if ((ip->i_flag & (IMOD|IACC|IUPD|ICHG)) == 0 &&
105 vp->v_dirtyblkhd == NULL)
106 continue;
107 if (vget(vp))
108 goto loop;
275ca4f0 109 sp = lfs_writefile(sp, fs, vp, do_ckp);
84c30241
KB
110 vput(vp);
111 }
275ca4f0
KB
112 if (do_ckp)
113 lfs_writeckp(fs, sp);
114 else
115 lfs_writeseg(fs, sp);
116#ifdef NOTLFS
117 vflushbuf(ump->um_devvp, waitfor == MNT_WAIT ? B_SYNC : 0);
118#endif
84c30241
KB
119 return (0);
120}
121
122static int
123lfs_biocallback(bp)
124 BUF *bp;
125{
126 LFS *fs;
127 SEGMENT *sp, *next_sp;
128 UFSMOUNT *ump;
129 VNODE *devvp;
130
275ca4f0
KB
131 /*
132 * Grab the mount point for later (used to find the file system and
133 * block device) and, if the contents are valid, move the buffer back
134 * onto the clean list.
135 */
136printf("lfs_biocallback: buffer %x\n", bp, bp->b_lblkno);
84c30241 137 ump = VFSTOUFS(bp->b_vp->v_mount);
275ca4f0
KB
138 if (bp->b_flags & B_NOCACHE)
139 bp->b_vp = NULL;
140 else {
141 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
142 reassignbuf(bp, bp->b_vp);
143 }
144
84c30241
KB
145 fs = ump->um_lfs;
146 devvp = ump->um_devvp;
275ca4f0
KB
147 brelse(bp); /* move up... XXX */
148
149printf("\nlfs_biocallback: iocount %d\n", fs->lfs_iocount);
150 if (fs->lfs_iocount == 0) {
151 /* Wake up any other syncs waiting on this file system. */
152 return;
153 }
154 --fs->lfs_iocount;
155 if (fs->lfs_iocount == 0) {
156printf("\nlfs_biocallback: doing summary write\n");
84c30241
KB
157 /* Fire off summary writes */
158 for (sp = fs->lfs_seglist; sp; sp = next_sp) {
159 next_sp = sp->nextp;
275ca4f0
KB
160#ifdef MOVETONEWBUF
161 (*(sp->cbpp - 1))->b_dev = bp->b_dev;
162#endif
163 (devvp->v_op->vop_strategy)(*(sp->cbpp - 1));
84c30241
KB
164 free(sp->bpp, M_SEGMENT);
165 free(sp, M_SEGMENT);
166 }
167 }
168}
169
84c30241
KB
170static void
171lfs_endsum(fs, sp, calc_next)
172 LFS *fs;
173 SEGMENT *sp;
174 int calc_next; /* if 1, calculate next, else -1 */
175{
176 BUF *bp;
177 SEGSUM *ssp;
178 daddr_t next_addr;
275ca4f0
KB
179 int npages, nseg_pages, nsums_per_blk;
180
181/* printf("lfs_endsum\n"); /**/
182 if (sp->sbp == NULL)
183 return;
84c30241 184
84c30241
KB
185 ssp = sp->segsum;
186 if (!calc_next)
187 ssp->ss_nextsum = (daddr_t) -1;
275ca4f0
KB
188 else
189 ssp->ss_nextsum = sp->sum_addr - LFS_SUMMARY_SIZE / DEV_BSIZE;
84c30241 190
275ca4f0 191 if ((sp->sum_num % (fs->lfs_bsize / LFS_SUMMARY_SIZE)) == (nsums_per_blk - 1)) {
84c30241 192 /*
275ca4f0
KB
193 * This buffer is now full. Compute the next address if appropriate
194 * and the checksum, and close the buffer by setting sp->sbp NULL.
84c30241 195 */
275ca4f0
KB
196 if (calc_next) {
197 nsums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE;
198 nseg_pages = 1 + sp->sum_num / nsums_per_blk;
199 npages = nseg_pages + (sp->ninodes + INOPB(fs) - 1) / INOPB(fs);
200 next_addr = fs->lfs_sboffs[0] +
201 (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize)
202 - fsbtodb(fs, (npages - 1)) - LFS_SUMMARY_SIZE / DEV_BSIZE;
84c30241 203 ssp->ss_nextsum = next_addr;
84c30241 204 }
275ca4f0
KB
205 ssp->ss_cksum = cksum(&ssp->ss_cksum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum));
206 sp->sbp = NULL;
207 } else
84c30241
KB
208 /* Calculate cksum on previous segment summary */
209 ssp->ss_cksum = cksum(&ssp->ss_cksum,
210 LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum));
275ca4f0
KB
211}
212
213static SEGMENT *
214lfs_gather(fs, sp, vp, match)
215 LFS *fs;
216 SEGMENT *sp;
217 VNODE *vp;
218 int (*match) __P((BUF *));
219{
220 BUF **bpp, *bp, *nbp;
221 FINFO *fip;
222 INODE *ip;
223 int count, s, version;
224 daddr_t *lbp, *start_lbp;
225
226 ip = VTOI(vp);
227 bpp = sp->cbpp;
228 fip = sp->fip;
229 version = fip->fi_version;
230 start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks];
231 count = 0;
232
233 s = splbio();
234 for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
235 nbp = bp->b_blockf;
236 if ((bp->b_flags & B_BUSY))
237 continue;
238 if ((bp->b_flags & B_DELWRI) == 0)
239 panic("lfs_write: not dirty");
240 if (!match(bp))
241 continue;
242 bremfree(bp);
243 bp->b_flags |= B_BUSY | B_CALL;
244 bp->b_dev = VTOI(fs->lfs_ivnode)->i_dev;
245 bp->b_iodone = lfs_biocallback;
246
247 *lbp++ = bp->b_lblkno;
248 *sp->cbpp++ = bp;
249 fip->fi_nblocks++;
250 sp->sum_bytes_left -= sizeof(daddr_t);
251 sp->seg_bytes_left -= bp->b_bufsize;
252 if (sp->sum_bytes_left < sizeof(daddr_t) ||
253 sp->seg_bytes_left < fs->lfs_bsize) {
254 /*
255 * We are about to allocate a new summary block
256 * and possibly a new segment. So, we need to
257 * sort the blocks we've done so far, and assign
258 * the disk addresses, so we can start a new block
259 * correctly. We may be doing I/O so we need to
260 * release the s lock before doing anything.
261 */
262 splx(s);
263 lfs_updatemeta(fs, sp, ip, start_lbp, bpp,
264 lbp - start_lbp);
265
266 /* Put this file in the segment summary */
267 ((SEGSUM *)(sp->segsum))->ss_nfinfo++;
268
269 if (sp->seg_bytes_left < fs->lfs_bsize) {
270 lfs_writeseg(fs, sp);
271 sp = lfs_newseg(fs);
272 } else if (sp->sum_bytes_left < sizeof(daddr_t))
273 lfs_newsum(fs, sp);
274 fip = sp->fip;
275 fip->fi_ino = ip->i_number;
276 fip->fi_version = version;
277 bpp = sp->cbpp;
278 /* You know that you have a new FINFO either way */
279 start_lbp = lbp = fip->fi_blocks;
280 s = splbio();
281 }
84c30241 282 }
275ca4f0
KB
283 splx(s);
284 lfs_updatemeta(fs, sp, ip, start_lbp, bpp, lbp - start_lbp);
285
286 return(sp);
84c30241
KB
287}
288
275ca4f0 289
84c30241
KB
290static BUF *
291lfs_newbuf(fs, daddr, size)
292 LFS *fs;
293 daddr_t daddr;
294 size_t size;
295{
296 BUF *bp;
297 VNODE *devvp;
298
84c30241
KB
299 bp = getnewbuf();
300 bremhash(bp);
301
302 /*
303 * XXX
304 * Need a devvp, but this isn't a particularly clean way to get one.
275ca4f0 305 * devvp = VTOI(fs->lfs_ivnode)->i_devvp;
84c30241 306 */
275ca4f0
KB
307#ifdef NOTWORKING
308 devvp = VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp;
84c30241 309 bgetvp(devvp, bp);
275ca4f0
KB
310#endif
311 bp->b_vp = fs->lfs_ivnode;
312 bp->b_dev = VTOI(fs->lfs_ivnode)->i_dev;
84c30241 313 bp->b_bcount = 0;
275ca4f0 314 bp->b_blkno = bp->b_lblkno = daddr;
84c30241
KB
315 bp->b_error = 0;
316 bp->b_resid = 0;
275ca4f0
KB
317 bp->b_flags |= B_CALL | B_DELWRI | B_NOCACHE | B_WRITE;
318 bp->b_iodone = lfs_biocallback;
319#ifdef PROBABLYWRONG
84c30241 320 binshash(bp, BUFHASH(devvp, daddr));
275ca4f0 321#endif
84c30241 322 allocbuf(bp, size);
275ca4f0
KB
323#ifdef PROBABLYWRONG
324 reassignbuf(bp, devvp);
325#endif
84c30241
KB
326 return (bp);
327}
328
329
330/*
331 * Start a new segment
332 */
333static SEGMENT *
334lfs_newseg(fs)
335 LFS *fs;
336{
337 SEGMENT *sp;
338 SEGUSE *sup;
339
340printf("lfs_newseg\n");
341 /* Get buffer space to write out a segment */
342 sp = malloc(sizeof(SEGMENT), M_SEGMENT, M_WAITOK);
275ca4f0
KB
343 sp->ibp = NULL;
344 sp->sbp = NULL;
84c30241
KB
345 sp->cbpp = sp->bpp =
346 malloc(fs->lfs_ssize * sizeof(BUF *), M_SEGMENT, M_WAITOK);
347 sp->nextp = NULL;
348 sp->sum_bytes_left = LFS_SUMMARY_SIZE;
349 sp->seg_bytes_left = (fs->lfs_segmask + 1) - LFS_SUMMARY_SIZE;
350 sp->saddr = fs->lfs_nextseg;
275ca4f0 351printf("lfs_newseg: About to write segment %lx\n", sp->saddr);
84c30241
KB
352 sp->sum_addr = sp->saddr + sp->seg_bytes_left / DEV_BSIZE;
353 sp->ninodes = 0;
354 sp->sum_num = -1;
275ca4f0
KB
355 sp->seg_number =
356 (sp->saddr - fs->lfs_sboffs[0]) / fsbtodb(fs, fs->lfs_ssize);
84c30241
KB
357
358 /* initialize segment summary info */
359 lfs_newsum(fs, sp);
360 sup = fs->lfs_segtab + sp->seg_number;
361
362 if (sup->su_nbytes != 0) {
363 /* This is a segment containing a super block */
364 FINFO *fip;
365 daddr_t lbn, *lbnp;
275ca4f0 366 SEGSUM *ssp;
84c30241 367
275ca4f0
KB
368 ssp = (SEGSUM *)sp->segsum;
369 ssp->ss_nfinfo++;
84c30241
KB
370 fip = sp->fip;
371 fip->fi_nblocks = LFS_SBPAD >> fs->lfs_bshift;
372 fip->fi_version = 1;
373 fip->fi_ino = LFS_UNUSED_INUM;
374 sp->saddr += fsbtodb(fs, fip->fi_nblocks);
375 lbnp = fip->fi_blocks;
376 for (lbn = 0; lbn < fip->fi_nblocks; lbn++)
377 *lbnp++ = lbn;
378 sp->seg_bytes_left -= sup->su_nbytes;
379 sp->sum_bytes_left -=
380 sizeof(FINFO) + (fip->fi_nblocks - 1) * sizeof(daddr_t);
381 sp->fip = (FINFO *)lbnp;
382 }
383 return(sp);
384}
385
386
387static void
388lfs_newsum(fs, sp)
389 LFS *fs;
390 SEGMENT *sp;
391{
392 SEGSUM *ssp;
275ca4f0 393 int npages, nseg_pages, sums_per_blk;
84c30241
KB
394
395printf("lfs_newsum\n");
275ca4f0
KB
396 lfs_endsum(fs, sp, 1);
397 ++sp->sum_num;
398 if (sp->sbp == NULL) {
399 /* Allocate a new buffer. */
400 if (sp->seg_bytes_left < fs->lfs_bsize) {
401 lfs_writeseg(fs, sp);
402 sp = lfs_newseg(fs);
403 }
404 sums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE;
405 nseg_pages = 1 + sp->sum_num / sums_per_blk;
406 npages = nseg_pages + (sp->ninodes + INOPB(fs) - 1) / INOPB(fs);
407 sp->sum_addr = fs->lfs_sboffs[0] +
408 (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize)
409 - fsbtodb(fs, npages);
410 sp->sbp = lfs_newbuf(fs, sp->sum_addr, fs->lfs_bsize);
411 sp->bpp[fs->lfs_ssize - npages] = sp->sbp;
412printf("Inserting summary block, address %x at index %d\n",
413sp->sbp->b_lblkno, fs->lfs_ssize - npages);
414 sp->seg_bytes_left -= fs->lfs_bsize;
415 sp->segsum = sp->sbp->b_un.b_addr + fs->lfs_bsize - LFS_SUMMARY_SIZE;
416 sp->sum_addr += (fs->lfs_bsize - LFS_SUMMARY_SIZE) / DEV_BSIZE;
84c30241 417 } else {
275ca4f0
KB
418 sp->segsum -= LFS_SUMMARY_SIZE;
419 sp->sum_addr -= LFS_SUMMARY_SIZE / DEV_BSIZE;
84c30241
KB
420 }
421
275ca4f0
KB
422 ssp = sp->segsum;
423 ssp->ss_next = fs->lfs_nextseg = lfs_nextseg(fs);
424 ssp->ss_prev = fs->lfs_lastseg;
425
84c30241
KB
426 /* Initialize segment summary info. */
427 sp->fip = (FINFO *)(sp->segsum + sizeof(SEGSUM));
275ca4f0 428 sp->fip->fi_nblocks = 0;
84c30241
KB
429 ssp->ss_nextsum = (daddr_t)-1;
430 ssp->ss_create = time.tv_sec;
431
432 ssp->ss_nfinfo = 0;
433 ssp->ss_ninos = 0;
434 sp->sum_bytes_left -= LFS_SUMMARY_SIZE;
435 sp->seg_bytes_left -= LFS_SUMMARY_SIZE;
436}
437
438#define seginc(fs, sn) ((sn + 1) % fs->lfs_nseg)
439static daddr_t
440lfs_nextseg(fs)
441 LFS *fs;
442{
443 int segnum, sn;
444 SEGUSE *sup;
445
84c30241 446 segnum = satosn(fs, fs->lfs_nextseg);
275ca4f0
KB
447 for (sn = seginc(fs, segnum); sn != segnum; sn = seginc(fs, sn))
448 if (!(fs->lfs_segtab[sn].su_flags & SEGUSE_DIRTY))
84c30241 449 break;
275ca4f0 450
84c30241
KB
451 if (sn == segnum)
452 panic("lfs_nextseg: file system full"); /* XXX */
453 return(sntosa(fs, sn));
454}
455
456/*
457 * Update the metadata that points to the blocks listed in the FIP
458 * array.
459 */
275ca4f0
KB
460static void
461lfs_updatemeta(fs, sp, ip, lbp, bpp, nblocks)
84c30241 462 LFS *fs;
275ca4f0 463 SEGMENT *sp;
84c30241 464 INODE *ip;
275ca4f0 465 daddr_t *lbp;
84c30241 466 BUF **bpp;
275ca4f0 467 int nblocks;
84c30241
KB
468{
469 SEGUSE *segup;
275ca4f0 470 BUF **lbpp, *bp, *mbp;
84c30241 471 daddr_t da, iblkno;
275ca4f0
KB
472 int db_per_fsb, error, i, oldsegnum;
473 long lbn;
84c30241 474
275ca4f0
KB
475printf("lfs_updatemeta of %d blocks\n", nblocks);
476 if ((nblocks == 0) && (ip->i_flag & (IMOD|IACC|IUPD|ICHG)) == 0)
477 return;
478
479 /* First sort the blocks and add disk addresses */
480 shellsort(bpp, lbp, nblocks);
481
482 db_per_fsb = 1 << fs->lfs_fsbtodb;
483 for (lbpp = bpp, i = 0; i < nblocks; i++, lbpp++) {
484 (*lbpp)->b_blkno = sp->saddr;
485 sp->saddr += db_per_fsb;
486 }
487
488 for (lbpp = bpp, i = 0; i < nblocks; i++, lbpp++) {
489 lbn = lbp[i];
490printf("lfs_updatemeta: block %d\n", lbn);
84c30241 491 if (error = lfs_bmap(ip, lbn, &da))
275ca4f0 492 panic("lfs_updatemeta: lfs_bmap returned error");
84c30241
KB
493
494 if (da) {
275ca4f0 495 /* Update segment usage information */
84c30241
KB
496 oldsegnum = (da - fs->lfs_sboffs[0]) /
497 fsbtodb(fs, fs->lfs_ssize);
498 segup = fs->lfs_segtab+oldsegnum;
499 segup->su_lastmod = time.tv_sec;
500 if ((segup->su_nbytes -= fs->lfs_bsize) < 0)
501 printf("lfs_updatemeta: negative bytes %s %d\n",
502 "in segment", oldsegnum);
503 }
504
275ca4f0
KB
505 /*
506 * Now change whoever points to lbn. We could start with the
507 * smallest (most negative) block number in these if clauses,
508 * but we assume that indirect blocks are least common, and
509 * handle them separately.
510 */
511 bp = NULL;
512 if (lbn < 0) {
513 if (lbn < -NIADDR) {
514printf("lfs_updatemeta: changing indirect block %d\n", D_INDIR);
515 if (error = bread(ITOV(ip), D_INDIR,
516 fs->lfs_bsize, NOCRED, &bp))
517 panic("lfs_updatemeta: error on bread");
518
519 bp->b_un.b_daddr[-lbn % NINDIR(fs)] =
520 (*lbpp)->b_blkno;
521 } else
522 ip->i_din.di_ib[-lbn-1] = (*lbpp)->b_blkno;
523
524 } else if (lbn < NDADDR)
84c30241
KB
525 ip->i_din.di_db[lbn] = (*lbpp)->b_blkno;
526 else if ((lbn -= NDADDR) < NINDIR(fs)) {
527printf("lfs_updatemeta: changing indirect block %d\n", S_INDIR);
275ca4f0
KB
528 if (error = bread(ITOV(ip), S_INDIR, fs->lfs_bsize,
529 NOCRED, &bp))
530 panic("lfs_updatemeta: bread returned error");
531
84c30241 532 bp->b_un.b_daddr[lbn] = (*lbpp)->b_blkno;
84c30241
KB
533 } else if ( (lbn = (lbn - NINDIR(fs)) / NINDIR(fs)) <
534 NINDIR(fs)) {
535
536 iblkno = - (lbn + NIADDR + 1);
537printf("lfs_updatemeta: changing indirect block %d\n", iblkno);
275ca4f0
KB
538 if (error = bread(ITOV(ip), iblkno, fs->lfs_bsize,
539 NOCRED, &bp))
540 panic("lfs_updatemeta: bread returned error");
541
84c30241
KB
542 bp->b_un.b_daddr[lbn % NINDIR(fs)] = (*lbpp)->b_blkno;
543 }
544 else
275ca4f0
KB
545 panic("lfs_updatemeta: logical block number too large");
546 if (bp)
547 lfs_bwrite(bp);
84c30241 548 }
275ca4f0
KB
549}
550
551static void
552lfs_writeckp(fs, sp)
553 LFS *fs;
554 SEGMENT *sp;
555{
556 BUF *bp;
557 FINFO *fip;
558 INODE *ip;
559 SEGUSE *sup;
560 daddr_t *lbp;
561 int bytes_needed, i;
562 void *xp;
563
564printf("lfs_writeckp\n");
565 /*
566 * This will write the dirty ifile blocks, but not the segusage
567 * table nor the ifile inode.
568 */
569 sp = lfs_writefile(sp, fs, fs->lfs_ivnode, 1);
570
571 /*
572 * Make sure that the segment usage table and the ifile inode will
573 * fit in this segment. If they won't, put them in the next segment
574 */
575 bytes_needed = fs->lfs_segtabsz << fs->lfs_bshift;
576 if (sp->ninodes % INOPB(fs) == 0)
577 bytes_needed += fs->lfs_bsize;
578
579 if (sp->seg_bytes_left < bytes_needed) {
580 lfs_writeseg(fs, sp);
581 sp = lfs_newseg(fs);
582 } else if (sp->sum_bytes_left < (fs->lfs_segtabsz * sizeof(daddr_t)))
583 lfs_newsum(fs, sp);
584
585#ifdef DEBUG
586 if (sp->seg_bytes_left < bytes_needed)
587 panic("lfs_writeckp: unable to write checkpoint");
588#endif
589
590 /*
591 * Now, update the segment usage information and the ifile inode and
592 * and write it out
593 */
594
595 sup = fs->lfs_segtab + sp->seg_number;
596 sup->su_nbytes = (fs->lfs_segmask + 1) - sp->seg_bytes_left +
597 bytes_needed;
598 sup->su_lastmod = time.tv_sec;
599 sup->su_flags = SEGUSE_DIRTY;
600
601 /* Get buffers for the segusage table and write it out */
602 ip = VTOI(fs->lfs_ivnode);
603 fip = sp->fip;
604 lbp = &fip->fi_blocks[fip->fi_nblocks];
605 for (xp = fs->lfs_segtab, i = 0; i < fs->lfs_segtabsz;
606 i++, xp += fs->lfs_bsize, lbp++) {
607 bp = lfs_newbuf(fs, sp->saddr, fs->lfs_bsize);
608 *sp->cbpp++ = bp;
609 bcopy(xp, bp->b_un.b_words, fs->lfs_bsize);
610 ip->i_din.di_db[i] = sp->saddr;
611 sp->saddr += (1 << fs->lfs_fsbtodb);
612 *lbp = i;
613 fip->fi_nblocks++;
614 }
615 sp = lfs_writeinode(fs, sp, fs->lfs_ivnode);
616 lfs_writeseg(fs, sp);
617 lfs_writesuper(fs, sp);
84c30241
KB
618}
619
620/*
84c30241
KB
621 * XXX -- I think we need to figure out what to do if we write
622 * the segment and find more dirty blocks when we're done.
623 */
624static SEGMENT *
275ca4f0 625lfs_writefile(sp, fs, vp, do_ckp)
84c30241
KB
626 SEGMENT *sp;
627 LFS *fs;
628 VNODE *vp;
275ca4f0 629 int do_ckp;
84c30241 630{
84c30241
KB
631 FINFO *fip;
632 INODE *ip;
84c30241
KB
633
634 /* initialize the FINFO structure */
635 ip = VTOI(vp);
636printf("lfs_writefile: node %d\n", ip->i_number);
637loop:
275ca4f0
KB
638 sp->fip->fi_nblocks = 0;
639 sp->fip->fi_ino = ip->i_number;
640 if (ip->i_number != LFS_IFILE_INUM)
641 sp->fip->fi_version = lfs_getversion(fs, ip->i_number);
642 else
643 sp->fip->fi_version = 1;
644
645 sp = lfs_gather(fs, sp, vp, match_data);
646 if (do_ckp) {
647 sp = lfs_gather(fs, sp, vp, match_indir);
648 sp = lfs_gather(fs, sp, vp, match_dindir);
649 }
84c30241 650
275ca4f0
KB
651(void)printf("lfs_writefile: adding %d blocks to segment\n",
652sp->fip->fi_nblocks);
653 /*
654 * Update the inode for this file and reflect new inode
655 * address in the ifile. If this is the ifile, don't update
656 * the inode, because we're checkpointing and will update the
657 * inode with the segment usage information (so we musn't
658 * bump the finfo pointer either).
659 */
660 if (ip->i_number != LFS_IFILE_INUM) {
661 sp = lfs_writeinode(fs, sp, vp);
662 fip = sp->fip;
663 if (fip->fi_nblocks) {
84c30241 664 ((SEGSUM *)(sp->segsum))->ss_nfinfo++;
275ca4f0
KB
665 sp->fip = (FINFO *)((u_long)fip + sizeof(FINFO) +
666 sizeof(u_long) * fip->fi_nblocks - 1);
84c30241 667 }
84c30241
KB
668 }
669 return(sp);
670}
671
275ca4f0
KB
672static SEGMENT *
673lfs_writeinode(fs, sp, vp)
674 LFS *fs;
675 SEGMENT *sp;
676 VNODE *vp;
84c30241 677{
275ca4f0
KB
678 BUF *bp;
679 INODE *ip;
680 SEGSUM *ssp;
681 daddr_t iaddr, next_addr;
682 int npages, nseg_pages, sums_per_blk;
683 struct dinode *dip;
684
685printf("lfs_writeinode\n");
686 sums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE;
687 if (sp->ibp == NULL) {
688 /* Allocate a new buffer. */
689 if (sp->seg_bytes_left < fs->lfs_bsize) {
690 lfs_writeseg(fs, sp);
691 sp = lfs_newseg(fs);
692 }
693 nseg_pages = (sp->sum_num + sums_per_blk) / sums_per_blk;
694 npages = nseg_pages + (sp->ninodes + INOPB(fs)) / INOPB(fs);
695 next_addr = fs->lfs_sboffs[0] +
696 (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize)
697 - fsbtodb(fs, npages);
698 sp->ibp = lfs_newbuf(fs, next_addr, fs->lfs_bsize);
699 sp->ibp->b_flags |= B_BUSY;
700 sp->bpp[fs->lfs_ssize - npages] = sp->ibp;
701 sp->seg_bytes_left -= fs->lfs_bsize;
702printf("alloc inode block @ daddr %x, bp = %x inserted at %d\n",
703next_addr, sp->ibp, fs->lfs_ssize - npages);
704 }
705 ip = VTOI(vp);
706 bp = sp->ibp;
707 dip = bp->b_un.b_dino + (sp->ninodes % INOPB(fs));
708 bcopy(&ip->i_din, dip, sizeof(struct dinode));
709 iaddr = bp->b_blkno;
710 ++sp->ninodes;
711 ssp = sp->segsum;
712 ++ssp->ss_ninos;
713 if (sp->ninodes % INOPB(fs) == 0)
714 sp->ibp = NULL;
715 if (ip->i_number == LFS_IFILE_INUM)
716 fs->lfs_idaddr = iaddr;
717 else
718 lfs_iset(ip, iaddr, ip->i_atime); /* Update ifile */
719 ip->i_flags &= ~(IMOD|IACC|IUPD|ICHG); /* make inode clean */
720 return(sp);
84c30241
KB
721}
722
723static void
724lfs_writeseg(fs, sp)
725 LFS *fs;
726 SEGMENT *sp;
727{
275ca4f0 728 BUF **bpp;
84c30241
KB
729 SEGSUM *ssp;
730 SEGUSE *sup;
731 VNODE *devvp;
732 int nblocks, nbuffers, ninode_blocks, nsegsums, nsum_pb;
733 int i, metaoff, nmeta;
275ca4f0 734struct buf **xbp; int xi;
84c30241
KB
735
736printf("lfs_writeseg\n");
275ca4f0
KB
737 fs->lfs_lastseg = sntosa(fs, sp->seg_number);
738 lfs_endsum(fs, sp, 0);
84c30241 739
275ca4f0 740#ifdef HELLNO
84c30241 741 /* Finish off any inodes */
275ca4f0
KB
742 if (sp->ibp)
743 brelse(sp->ibp);
744#endif
84c30241
KB
745
746 /*
747 * Copy inode and summary block buffer pointers down so they are
275ca4f0 748 * contiguous with the page buffer pointers.
84c30241 749 */
275ca4f0
KB
750 ssp = sp->segsum;
751 nsum_pb = fs->lfs_bsize / LFS_SUMMARY_SIZE;
752 nbuffers = sp->cbpp - sp->bpp;
753 nsegsums = 1 + sp->sum_num / nsum_pb;
754 ninode_blocks = (sp->ninodes + INOPB(fs) - 1) / INOPB(fs);
755 nmeta = ninode_blocks + nsegsums;
84c30241 756 metaoff = fs->lfs_ssize - nmeta;
275ca4f0 757 nblocks = nbuffers + nmeta;
84c30241 758 if (sp->bpp + metaoff != sp->cbpp)
275ca4f0
KB
759 bcopy(sp->bpp + metaoff, sp->cbpp, sizeof(BUF *) * nmeta);
760 sp->cbpp += nmeta;
84c30241 761
84c30241
KB
762 sup = fs->lfs_segtab + sp->seg_number;
763 sup->su_nbytes = nblocks << fs->lfs_bshift;
764 sup->su_lastmod = time.tv_sec;
765 sup->su_flags = SEGUSE_DIRTY;
766
767 /*
275ca4f0 768 * Since we need to guarantee that the summary block gets written last,
84c30241
KB
769 * we issue the writes in two sets. The first n-1 buffers first, and
770 * then, after they've completed, the last 1 buffer. Only when that
275ca4f0 771 * final write completes is the segment valid.
84c30241
KB
772 */
773 devvp = VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp;
275ca4f0
KB
774 /*
775 * Since no writes are yet scheduled, no need to block here; if we
776 * scheduled the writes at multiple points, we'd need an splbio()
777 * here.
778 */
779 fs->lfs_iocount = nblocks - 1;
84c30241
KB
780 sp->nextp = fs->lfs_seglist;
781 fs->lfs_seglist = sp;
275ca4f0
KB
782
783 for (bpp = sp->bpp, i = 0; i < (nblocks - 1); i++, ++bpp)
784 /* (*(devvp->v_op->vop_strategy)) */ sdstrategy(*bpp);
785}
786
787static void
788lfs_writesuper(fs, sp)
789 LFS *fs;
790 SEGMENT *sp;
791{
792 BUF *bp;
793 VNODE *devvp;
794
795printf("lfs_writesuper\n");
796 /* Wait for segment write to complete */
797 /* XXX probably should do this biowait(*(sp->cbpp - 1)); */
798
799 /* Get a buffer for the super block */
800 fs->lfs_cksum = cksum(fs, sizeof(LFS) - sizeof(fs->lfs_cksum));
801 bp = lfs_newbuf(fs, fs->lfs_sboffs[0], LFS_SBPAD);
802 bp->b_flags &= ~B_CALL;
803 bp->b_vp = NULL;
804 bp->b_iodone = NULL;
805 bcopy(fs, bp->b_un.b_lfs, sizeof(LFS));
806
807 /* Write the first superblock; wait. */
808 devvp = VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp;
809#ifdef MOVETONEWBUF
810 bp->b_dev = devvp->v_rdev;
811#endif
812 (*devvp->v_op->vop_strategy)(bp);
813 biowait(bp);
814
815 /* Now, write the second one for which we don't have to wait */
816 bp->b_flags &= ~B_DONE;
817 bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1];
818 (*devvp->v_op->vop_strategy)(bp);
819 brelse(bp);
820}
821
822/* Block match routines used when traversing the dirty block chain. */
823match_data(bp)
824 BUF *bp;
825{
826 return(bp->b_lblkno >= 0);
827}
828
829
830match_dindir(bp)
831 BUF *bp;
832{
833 return(bp->b_lblkno == D_INDIR);
834}
835
836/*
837 * These are single indirect blocks. There are three types:
838 * the one in the inode (address S_INDIR = -1).
839 * the ones that hang off of D_INDIR the double indirect in the inode.
840 * these all have addresses in the range -2NINDIR to -(3NINDIR-1)
841 * the ones that hang off of double indirect that hang off of the
842 * triple indirect. These all have addresses < -(NINDIR^2).
843 * Since we currently don't support, triple indirect blocks, this gets simpler.
844 * We just need to look for block numbers less than -NIADDR.
845 */
846match_indir(bp)
847 BUF *bp;
848{
849 return(bp->b_lblkno == S_INDIR || bp->b_lblkno < -NIADDR);
84c30241
KB
850}
851
852/*
853 * Shellsort (diminishing increment sort) from Data Structures and
854 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
855 * see also Knuth Vol. 3, page 84. The increments are selected from
856 * formula (8), page 95. Roughly O(N^3/2).
857 */
858/*
859 * This is our own private copy of shellsort because we want to sort
860 * two parallel arrays (the array of buffer pointers and the array of
861 * logical block numbers) simultaneously. Note that we cast the array
862 * of logical block numbers to a unsigned in this routine so that the
863 * negative block numbers (meta data blocks) sort AFTER the data blocks.
864 */
865static void
866shellsort(bp_array, lb_array, nmemb)
867 BUF **bp_array;
275ca4f0 868 daddr_t *lb_array;
84c30241
KB
869 register int nmemb;
870{
871 static int __rsshell_increments[] = { 4, 1, 0 };
872 register int incr, *incrp, t1, t2;
873 BUF *bp_temp;
874 u_long lb_temp;
875
876 for (incrp = __rsshell_increments; incr = *incrp++;)
877 for (t1 = incr; t1 < nmemb; ++t1)
878 for (t2 = t1 - incr; t2 >= 0;)
879 if (lb_array[t2] > lb_array[t2 + incr]) {
880 lb_temp = lb_array[t2];
881 lb_array[t2] = lb_array[t2 + incr];
882 lb_array[t2 + incr] = lb_temp;
883 bp_temp = bp_array[t2];
884 bp_array[t2] = bp_array[t2 + incr];
885 bp_array[t2 + incr] = bp_temp;
886 t2 -= incr;
887 } else
888 break;
889}
275ca4f0 890#endif /* LOGFS */