move CS subdomain hack into hack directory
[unix-history] / usr / src / sys / ufs / lfs / lfs_segment.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 *
7 * @(#)lfs_segment.c 5.2 (Berkeley) %G%
8 */
9
10#ifdef LOGFS
11#include "param.h"
12#include "systm.h"
13#include "namei.h"
14#include "resourcevar.h"
15#include "kernel.h"
16#include "file.h"
17#include "stat.h"
18#include "buf.h"
19#include "proc.h"
20#include "conf.h"
21#include "vnode.h"
22#include "specdev.h"
23#include "fifo.h"
24#include "malloc.h"
25#include "mount.h"
26#include "../ufs/lockf.h"
27#include "../ufs/quota.h"
28#include "../ufs/inode.h"
29#include "../ufs/dir.h"
30#include "../ufs/ufsmount.h"
31#include "lfs.h"
32#include "lfs_extern.h"
33
34/*
35Add a check so that if the segment is empty, you don't write it.
36Write the code with lfs_ialloc to allocate a new page of inodes if you have to.
37Make an incoming sync wait until the previous one finishes. Keith
38 will write this. When this happens, we no longer have to be
39 able to chain superblocks together and handle multiple segments
40 writing -- Seems like we can call biowait to wait for an io.
41 However, I don't think we want to wait on the summary I/O
42 necessarily, because if we've got lots of dirty buffers piling
43 up, it would be nice to process them and get the segment all
44 ready to write. Perhaps we can just wait before firing up the
45 next set of writes, rather than waiting to start doing anything.
46 Also -- my lfs_writesuper should wait until all the segment writes
47 are done (I added a biowait, but we need to make sure that the SEGMENT
48 structure hasn't been freed before we get there).
49Need to keep vnode v_numoutput up to date for pending writes?
50???Could actually fire off the datablock writes before you finish. This
51would give them a chance to get started earlier...
52*/
53
54static int lfs_biocallback __P((BUF *));
55static void lfs_endsum __P((LFS *, SEGMENT *, int));
56static SEGMENT *lfs_gather
57 __P((LFS *, SEGMENT *, VNODE *, int (*) __P((BUF *))));
58static BUF *lfs_newbuf __P((LFS *, daddr_t, size_t));
59static SEGMENT *lfs_newseg __P((LFS *));
60static void lfs_newsum __P((LFS *, SEGMENT *));
61static daddr_t lfs_nextseg __P((LFS *));
62static void lfs_updatemeta __P((LFS *, SEGMENT *, INODE *, daddr_t *,
63 BUF **, int));
64static void lfs_writeckp __P((LFS *, SEGMENT *));
65static SEGMENT *lfs_writefile __P((SEGMENT *, LFS *, VNODE *, int));
66static SEGMENT *lfs_writeinode __P((LFS *, SEGMENT *, VNODE *));
67static void lfs_writeseg __P((LFS *, SEGMENT *));
68static void lfs_writesuper __P((LFS *, SEGMENT *));
69static int match_data __P((BUF *));
70static int match_dindir __P((BUF *));
71static int match_indir __P((BUF *));
72static void shellsort __P((BUF **, daddr_t *, register int));
73
74/*
75 * XXX -- when we add fragments in here, we will need to allocate a larger
76 * buffer pointer array (sp->bpp).
77 */
78int
79lfs_segwrite(mp, do_ckp)
80 MOUNT *mp;
81 int do_ckp; /* do a checkpoint too */
82{
83 FINFO *fip; /* current file info structure */
84 INODE *ip;
85 LFS *fs;
86 VNODE *vp;
87 SEGMENT *sp;
88
89 fs = VFSTOUFS(mp)->um_lfs;
90 sp = lfs_newseg(fs);
91loop:
92 for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) {
93 /*
94 * If the vnode that we are about to sync is no longer
95 * associated with this mount point, start over.
96 */
97 if (vp->v_mount != mp)
98 goto loop;
99 if (VOP_ISLOCKED(vp))
100 continue;
101 ip = VTOI(vp);
102 if (ip->i_number == LFS_IFILE_INUM)
103 continue;
104 if ((ip->i_flag & (IMOD|IACC|IUPD|ICHG)) == 0 &&
105 vp->v_dirtyblkhd == NULL)
106 continue;
107 if (vget(vp))
108 goto loop;
109 sp = lfs_writefile(sp, fs, vp, do_ckp);
110 vput(vp);
111 }
112 if (do_ckp)
113 lfs_writeckp(fs, sp);
114 else
115 lfs_writeseg(fs, sp);
116#ifdef NOTLFS
117 vflushbuf(ump->um_devvp, waitfor == MNT_WAIT ? B_SYNC : 0);
118#endif
119 return (0);
120}
121
122static int
123lfs_biocallback(bp)
124 BUF *bp;
125{
126 LFS *fs;
127 SEGMENT *sp, *next_sp;
128 UFSMOUNT *ump;
129 VNODE *devvp;
130
131 /*
132 * Grab the mount point for later (used to find the file system and
133 * block device) and, if the contents are valid, move the buffer back
134 * onto the clean list.
135 */
136printf("lfs_biocallback: buffer %x\n", bp, bp->b_lblkno);
137 ump = VFSTOUFS(bp->b_vp->v_mount);
138 if (bp->b_flags & B_NOCACHE)
139 bp->b_vp = NULL;
140 else {
141 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
142 reassignbuf(bp, bp->b_vp);
143 }
144
145 fs = ump->um_lfs;
146 devvp = ump->um_devvp;
147 brelse(bp); /* move up... XXX */
148
149printf("\nlfs_biocallback: iocount %d\n", fs->lfs_iocount);
150 if (fs->lfs_iocount == 0) {
151 /* Wake up any other syncs waiting on this file system. */
152 return;
153 }
154 --fs->lfs_iocount;
155 if (fs->lfs_iocount == 0) {
156printf("\nlfs_biocallback: doing summary write\n");
157 /* Fire off summary writes */
158 for (sp = fs->lfs_seglist; sp; sp = next_sp) {
159 next_sp = sp->nextp;
160#ifdef MOVETONEWBUF
161 (*(sp->cbpp - 1))->b_dev = bp->b_dev;
162#endif
163 (devvp->v_op->vop_strategy)(*(sp->cbpp - 1));
164 free(sp->bpp, M_SEGMENT);
165 free(sp, M_SEGMENT);
166 }
167 }
168}
169
170static void
171lfs_endsum(fs, sp, calc_next)
172 LFS *fs;
173 SEGMENT *sp;
174 int calc_next; /* if 1, calculate next, else -1 */
175{
176 BUF *bp;
177 SEGSUM *ssp;
178 daddr_t next_addr;
179 int npages, nseg_pages, nsums_per_blk;
180
181/* printf("lfs_endsum\n"); /**/
182 if (sp->sbp == NULL)
183 return;
184
185 ssp = sp->segsum;
186 if (!calc_next)
187 ssp->ss_nextsum = (daddr_t) -1;
188 else
189 ssp->ss_nextsum = sp->sum_addr - LFS_SUMMARY_SIZE / DEV_BSIZE;
190
191 if ((sp->sum_num % (fs->lfs_bsize / LFS_SUMMARY_SIZE)) == (nsums_per_blk - 1)) {
192 /*
193 * This buffer is now full. Compute the next address if appropriate
194 * and the checksum, and close the buffer by setting sp->sbp NULL.
195 */
196 if (calc_next) {
197 nsums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE;
198 nseg_pages = 1 + sp->sum_num / nsums_per_blk;
199 npages = nseg_pages + (sp->ninodes + INOPB(fs) - 1) / INOPB(fs);
200 next_addr = fs->lfs_sboffs[0] +
201 (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize)
202 - fsbtodb(fs, (npages - 1)) - LFS_SUMMARY_SIZE / DEV_BSIZE;
203 ssp->ss_nextsum = next_addr;
204 }
205 ssp->ss_cksum = cksum(&ssp->ss_cksum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum));
206 sp->sbp = NULL;
207 } else
208 /* Calculate cksum on previous segment summary */
209 ssp->ss_cksum = cksum(&ssp->ss_cksum,
210 LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum));
211}
212
213static SEGMENT *
214lfs_gather(fs, sp, vp, match)
215 LFS *fs;
216 SEGMENT *sp;
217 VNODE *vp;
218 int (*match) __P((BUF *));
219{
220 BUF **bpp, *bp, *nbp;
221 FINFO *fip;
222 INODE *ip;
223 int count, s, version;
224 daddr_t *lbp, *start_lbp;
225
226 ip = VTOI(vp);
227 bpp = sp->cbpp;
228 fip = sp->fip;
229 version = fip->fi_version;
230 start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks];
231 count = 0;
232
233 s = splbio();
234 for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
235 nbp = bp->b_blockf;
236 if ((bp->b_flags & B_BUSY))
237 continue;
238 if ((bp->b_flags & B_DELWRI) == 0)
239 panic("lfs_write: not dirty");
240 if (!match(bp))
241 continue;
242 bremfree(bp);
243 bp->b_flags |= B_BUSY | B_CALL;
244 bp->b_dev = VTOI(fs->lfs_ivnode)->i_dev;
245 bp->b_iodone = lfs_biocallback;
246
247 *lbp++ = bp->b_lblkno;
248 *sp->cbpp++ = bp;
249 fip->fi_nblocks++;
250 sp->sum_bytes_left -= sizeof(daddr_t);
251 sp->seg_bytes_left -= bp->b_bufsize;
252 if (sp->sum_bytes_left < sizeof(daddr_t) ||
253 sp->seg_bytes_left < fs->lfs_bsize) {
254 /*
255 * We are about to allocate a new summary block
256 * and possibly a new segment. So, we need to
257 * sort the blocks we've done so far, and assign
258 * the disk addresses, so we can start a new block
259 * correctly. We may be doing I/O so we need to
260 * release the s lock before doing anything.
261 */
262 splx(s);
263 lfs_updatemeta(fs, sp, ip, start_lbp, bpp,
264 lbp - start_lbp);
265
266 /* Put this file in the segment summary */
267 ((SEGSUM *)(sp->segsum))->ss_nfinfo++;
268
269 if (sp->seg_bytes_left < fs->lfs_bsize) {
270 lfs_writeseg(fs, sp);
271 sp = lfs_newseg(fs);
272 } else if (sp->sum_bytes_left < sizeof(daddr_t))
273 lfs_newsum(fs, sp);
274 fip = sp->fip;
275 fip->fi_ino = ip->i_number;
276 fip->fi_version = version;
277 bpp = sp->cbpp;
278 /* You know that you have a new FINFO either way */
279 start_lbp = lbp = fip->fi_blocks;
280 s = splbio();
281 }
282 }
283 splx(s);
284 lfs_updatemeta(fs, sp, ip, start_lbp, bpp, lbp - start_lbp);
285
286 return(sp);
287}
288
289
290static BUF *
291lfs_newbuf(fs, daddr, size)
292 LFS *fs;
293 daddr_t daddr;
294 size_t size;
295{
296 BUF *bp;
297 VNODE *devvp;
298
299 bp = getnewbuf();
300 bremhash(bp);
301
302 /*
303 * XXX
304 * Need a devvp, but this isn't a particularly clean way to get one.
305 * devvp = VTOI(fs->lfs_ivnode)->i_devvp;
306 */
307#ifdef NOTWORKING
308 devvp = VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp;
309 bgetvp(devvp, bp);
310#endif
311 bp->b_vp = fs->lfs_ivnode;
312 bp->b_dev = VTOI(fs->lfs_ivnode)->i_dev;
313 bp->b_bcount = 0;
314 bp->b_blkno = bp->b_lblkno = daddr;
315 bp->b_error = 0;
316 bp->b_resid = 0;
317 bp->b_flags |= B_CALL | B_DELWRI | B_NOCACHE | B_WRITE;
318 bp->b_iodone = lfs_biocallback;
319#ifdef PROBABLYWRONG
320 binshash(bp, BUFHASH(devvp, daddr));
321#endif
322 allocbuf(bp, size);
323#ifdef PROBABLYWRONG
324 reassignbuf(bp, devvp);
325#endif
326 return (bp);
327}
328
329
330/*
331 * Start a new segment
332 */
333static SEGMENT *
334lfs_newseg(fs)
335 LFS *fs;
336{
337 SEGMENT *sp;
338 SEGUSE *sup;
339
340printf("lfs_newseg\n");
341 /* Get buffer space to write out a segment */
342 sp = malloc(sizeof(SEGMENT), M_SEGMENT, M_WAITOK);
343 sp->ibp = NULL;
344 sp->sbp = NULL;
345 sp->cbpp = sp->bpp =
346 malloc(fs->lfs_ssize * sizeof(BUF *), M_SEGMENT, M_WAITOK);
347 sp->nextp = NULL;
348 sp->sum_bytes_left = LFS_SUMMARY_SIZE;
349 sp->seg_bytes_left = (fs->lfs_segmask + 1) - LFS_SUMMARY_SIZE;
350 sp->saddr = fs->lfs_nextseg;
351printf("lfs_newseg: About to write segment %lx\n", sp->saddr);
352 sp->sum_addr = sp->saddr + sp->seg_bytes_left / DEV_BSIZE;
353 sp->ninodes = 0;
354 sp->sum_num = -1;
355 sp->seg_number =
356 (sp->saddr - fs->lfs_sboffs[0]) / fsbtodb(fs, fs->lfs_ssize);
357
358 /* initialize segment summary info */
359 lfs_newsum(fs, sp);
360 sup = fs->lfs_segtab + sp->seg_number;
361
362 if (sup->su_nbytes != 0) {
363 /* This is a segment containing a super block */
364 FINFO *fip;
365 daddr_t lbn, *lbnp;
366 SEGSUM *ssp;
367
368 ssp = (SEGSUM *)sp->segsum;
369 ssp->ss_nfinfo++;
370 fip = sp->fip;
371 fip->fi_nblocks = LFS_SBPAD >> fs->lfs_bshift;
372 fip->fi_version = 1;
373 fip->fi_ino = LFS_UNUSED_INUM;
374 sp->saddr += fsbtodb(fs, fip->fi_nblocks);
375 lbnp = fip->fi_blocks;
376 for (lbn = 0; lbn < fip->fi_nblocks; lbn++)
377 *lbnp++ = lbn;
378 sp->seg_bytes_left -= sup->su_nbytes;
379 sp->sum_bytes_left -=
380 sizeof(FINFO) + (fip->fi_nblocks - 1) * sizeof(daddr_t);
381 sp->fip = (FINFO *)lbnp;
382 }
383 return(sp);
384}
385
386
387static void
388lfs_newsum(fs, sp)
389 LFS *fs;
390 SEGMENT *sp;
391{
392 SEGSUM *ssp;
393 int npages, nseg_pages, sums_per_blk;
394
395printf("lfs_newsum\n");
396 lfs_endsum(fs, sp, 1);
397 ++sp->sum_num;
398 if (sp->sbp == NULL) {
399 /* Allocate a new buffer. */
400 if (sp->seg_bytes_left < fs->lfs_bsize) {
401 lfs_writeseg(fs, sp);
402 sp = lfs_newseg(fs);
403 }
404 sums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE;
405 nseg_pages = 1 + sp->sum_num / sums_per_blk;
406 npages = nseg_pages + (sp->ninodes + INOPB(fs) - 1) / INOPB(fs);
407 sp->sum_addr = fs->lfs_sboffs[0] +
408 (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize)
409 - fsbtodb(fs, npages);
410 sp->sbp = lfs_newbuf(fs, sp->sum_addr, fs->lfs_bsize);
411 sp->bpp[fs->lfs_ssize - npages] = sp->sbp;
412printf("Inserting summary block, address %x at index %d\n",
413sp->sbp->b_lblkno, fs->lfs_ssize - npages);
414 sp->seg_bytes_left -= fs->lfs_bsize;
415 sp->segsum = sp->sbp->b_un.b_addr + fs->lfs_bsize - LFS_SUMMARY_SIZE;
416 sp->sum_addr += (fs->lfs_bsize - LFS_SUMMARY_SIZE) / DEV_BSIZE;
417 } else {
418 sp->segsum -= LFS_SUMMARY_SIZE;
419 sp->sum_addr -= LFS_SUMMARY_SIZE / DEV_BSIZE;
420 }
421
422 ssp = sp->segsum;
423 ssp->ss_next = fs->lfs_nextseg = lfs_nextseg(fs);
424 ssp->ss_prev = fs->lfs_lastseg;
425
426 /* Initialize segment summary info. */
427 sp->fip = (FINFO *)(sp->segsum + sizeof(SEGSUM));
428 sp->fip->fi_nblocks = 0;
429 ssp->ss_nextsum = (daddr_t)-1;
430 ssp->ss_create = time.tv_sec;
431
432 ssp->ss_nfinfo = 0;
433 ssp->ss_ninos = 0;
434 sp->sum_bytes_left -= LFS_SUMMARY_SIZE;
435 sp->seg_bytes_left -= LFS_SUMMARY_SIZE;
436}
437
438#define seginc(fs, sn) ((sn + 1) % fs->lfs_nseg)
439static daddr_t
440lfs_nextseg(fs)
441 LFS *fs;
442{
443 int segnum, sn;
444 SEGUSE *sup;
445
446 segnum = satosn(fs, fs->lfs_nextseg);
447 for (sn = seginc(fs, segnum); sn != segnum; sn = seginc(fs, sn))
448 if (!(fs->lfs_segtab[sn].su_flags & SEGUSE_DIRTY))
449 break;
450
451 if (sn == segnum)
452 panic("lfs_nextseg: file system full"); /* XXX */
453 return(sntosa(fs, sn));
454}
455
456/*
457 * Update the metadata that points to the blocks listed in the FIP
458 * array.
459 */
460static void
461lfs_updatemeta(fs, sp, ip, lbp, bpp, nblocks)
462 LFS *fs;
463 SEGMENT *sp;
464 INODE *ip;
465 daddr_t *lbp;
466 BUF **bpp;
467 int nblocks;
468{
469 SEGUSE *segup;
470 BUF **lbpp, *bp, *mbp;
471 daddr_t da, iblkno;
472 int db_per_fsb, error, i, oldsegnum;
473 long lbn;
474
475printf("lfs_updatemeta of %d blocks\n", nblocks);
476 if ((nblocks == 0) && (ip->i_flag & (IMOD|IACC|IUPD|ICHG)) == 0)
477 return;
478
479 /* First sort the blocks and add disk addresses */
480 shellsort(bpp, lbp, nblocks);
481
482 db_per_fsb = 1 << fs->lfs_fsbtodb;
483 for (lbpp = bpp, i = 0; i < nblocks; i++, lbpp++) {
484 (*lbpp)->b_blkno = sp->saddr;
485 sp->saddr += db_per_fsb;
486 }
487
488 for (lbpp = bpp, i = 0; i < nblocks; i++, lbpp++) {
489 lbn = lbp[i];
490printf("lfs_updatemeta: block %d\n", lbn);
491 if (error = lfs_bmap(ip, lbn, &da))
492 panic("lfs_updatemeta: lfs_bmap returned error");
493
494 if (da) {
495 /* Update segment usage information */
496 oldsegnum = (da - fs->lfs_sboffs[0]) /
497 fsbtodb(fs, fs->lfs_ssize);
498 segup = fs->lfs_segtab+oldsegnum;
499 segup->su_lastmod = time.tv_sec;
500 if ((segup->su_nbytes -= fs->lfs_bsize) < 0)
501 printf("lfs_updatemeta: negative bytes %s %d\n",
502 "in segment", oldsegnum);
503 }
504
505 /*
506 * Now change whoever points to lbn. We could start with the
507 * smallest (most negative) block number in these if clauses,
508 * but we assume that indirect blocks are least common, and
509 * handle them separately.
510 */
511 bp = NULL;
512 if (lbn < 0) {
513 if (lbn < -NIADDR) {
514printf("lfs_updatemeta: changing indirect block %d\n", D_INDIR);
515 if (error = bread(ITOV(ip), D_INDIR,
516 fs->lfs_bsize, NOCRED, &bp))
517 panic("lfs_updatemeta: error on bread");
518
519 bp->b_un.b_daddr[-lbn % NINDIR(fs)] =
520 (*lbpp)->b_blkno;
521 } else
522 ip->i_din.di_ib[-lbn-1] = (*lbpp)->b_blkno;
523
524 } else if (lbn < NDADDR)
525 ip->i_din.di_db[lbn] = (*lbpp)->b_blkno;
526 else if ((lbn -= NDADDR) < NINDIR(fs)) {
527printf("lfs_updatemeta: changing indirect block %d\n", S_INDIR);
528 if (error = bread(ITOV(ip), S_INDIR, fs->lfs_bsize,
529 NOCRED, &bp))
530 panic("lfs_updatemeta: bread returned error");
531
532 bp->b_un.b_daddr[lbn] = (*lbpp)->b_blkno;
533 } else if ( (lbn = (lbn - NINDIR(fs)) / NINDIR(fs)) <
534 NINDIR(fs)) {
535
536 iblkno = - (lbn + NIADDR + 1);
537printf("lfs_updatemeta: changing indirect block %d\n", iblkno);
538 if (error = bread(ITOV(ip), iblkno, fs->lfs_bsize,
539 NOCRED, &bp))
540 panic("lfs_updatemeta: bread returned error");
541
542 bp->b_un.b_daddr[lbn % NINDIR(fs)] = (*lbpp)->b_blkno;
543 }
544 else
545 panic("lfs_updatemeta: logical block number too large");
546 if (bp)
547 lfs_bwrite(bp);
548 }
549}
550
551static void
552lfs_writeckp(fs, sp)
553 LFS *fs;
554 SEGMENT *sp;
555{
556 BUF *bp;
557 FINFO *fip;
558 INODE *ip;
559 SEGUSE *sup;
560 daddr_t *lbp;
561 int bytes_needed, i;
562 void *xp;
563
564printf("lfs_writeckp\n");
565 /*
566 * This will write the dirty ifile blocks, but not the segusage
567 * table nor the ifile inode.
568 */
569 sp = lfs_writefile(sp, fs, fs->lfs_ivnode, 1);
570
571 /*
572 * Make sure that the segment usage table and the ifile inode will
573 * fit in this segment. If they won't, put them in the next segment
574 */
575 bytes_needed = fs->lfs_segtabsz << fs->lfs_bshift;
576 if (sp->ninodes % INOPB(fs) == 0)
577 bytes_needed += fs->lfs_bsize;
578
579 if (sp->seg_bytes_left < bytes_needed) {
580 lfs_writeseg(fs, sp);
581 sp = lfs_newseg(fs);
582 } else if (sp->sum_bytes_left < (fs->lfs_segtabsz * sizeof(daddr_t)))
583 lfs_newsum(fs, sp);
584
585#ifdef DEBUG
586 if (sp->seg_bytes_left < bytes_needed)
587 panic("lfs_writeckp: unable to write checkpoint");
588#endif
589
590 /*
591 * Now, update the segment usage information and the ifile inode and
592 * and write it out
593 */
594
595 sup = fs->lfs_segtab + sp->seg_number;
596 sup->su_nbytes = (fs->lfs_segmask + 1) - sp->seg_bytes_left +
597 bytes_needed;
598 sup->su_lastmod = time.tv_sec;
599 sup->su_flags = SEGUSE_DIRTY;
600
601 /* Get buffers for the segusage table and write it out */
602 ip = VTOI(fs->lfs_ivnode);
603 fip = sp->fip;
604 lbp = &fip->fi_blocks[fip->fi_nblocks];
605 for (xp = fs->lfs_segtab, i = 0; i < fs->lfs_segtabsz;
606 i++, xp += fs->lfs_bsize, lbp++) {
607 bp = lfs_newbuf(fs, sp->saddr, fs->lfs_bsize);
608 *sp->cbpp++ = bp;
609 bcopy(xp, bp->b_un.b_words, fs->lfs_bsize);
610 ip->i_din.di_db[i] = sp->saddr;
611 sp->saddr += (1 << fs->lfs_fsbtodb);
612 *lbp = i;
613 fip->fi_nblocks++;
614 }
615 sp = lfs_writeinode(fs, sp, fs->lfs_ivnode);
616 lfs_writeseg(fs, sp);
617 lfs_writesuper(fs, sp);
618}
619
620/*
621 * XXX -- I think we need to figure out what to do if we write
622 * the segment and find more dirty blocks when we're done.
623 */
624static SEGMENT *
625lfs_writefile(sp, fs, vp, do_ckp)
626 SEGMENT *sp;
627 LFS *fs;
628 VNODE *vp;
629 int do_ckp;
630{
631 FINFO *fip;
632 INODE *ip;
633
634 /* initialize the FINFO structure */
635 ip = VTOI(vp);
636printf("lfs_writefile: node %d\n", ip->i_number);
637loop:
638 sp->fip->fi_nblocks = 0;
639 sp->fip->fi_ino = ip->i_number;
640 if (ip->i_number != LFS_IFILE_INUM)
641 sp->fip->fi_version = lfs_getversion(fs, ip->i_number);
642 else
643 sp->fip->fi_version = 1;
644
645 sp = lfs_gather(fs, sp, vp, match_data);
646 if (do_ckp) {
647 sp = lfs_gather(fs, sp, vp, match_indir);
648 sp = lfs_gather(fs, sp, vp, match_dindir);
649 }
650
651(void)printf("lfs_writefile: adding %d blocks to segment\n",
652sp->fip->fi_nblocks);
653 /*
654 * Update the inode for this file and reflect new inode
655 * address in the ifile. If this is the ifile, don't update
656 * the inode, because we're checkpointing and will update the
657 * inode with the segment usage information (so we musn't
658 * bump the finfo pointer either).
659 */
660 if (ip->i_number != LFS_IFILE_INUM) {
661 sp = lfs_writeinode(fs, sp, vp);
662 fip = sp->fip;
663 if (fip->fi_nblocks) {
664 ((SEGSUM *)(sp->segsum))->ss_nfinfo++;
665 sp->fip = (FINFO *)((u_long)fip + sizeof(FINFO) +
666 sizeof(u_long) * fip->fi_nblocks - 1);
667 }
668 }
669 return(sp);
670}
671
672static SEGMENT *
673lfs_writeinode(fs, sp, vp)
674 LFS *fs;
675 SEGMENT *sp;
676 VNODE *vp;
677{
678 BUF *bp;
679 INODE *ip;
680 SEGSUM *ssp;
681 daddr_t iaddr, next_addr;
682 int npages, nseg_pages, sums_per_blk;
683 struct dinode *dip;
684
685printf("lfs_writeinode\n");
686 sums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE;
687 if (sp->ibp == NULL) {
688 /* Allocate a new buffer. */
689 if (sp->seg_bytes_left < fs->lfs_bsize) {
690 lfs_writeseg(fs, sp);
691 sp = lfs_newseg(fs);
692 }
693 nseg_pages = (sp->sum_num + sums_per_blk) / sums_per_blk;
694 npages = nseg_pages + (sp->ninodes + INOPB(fs)) / INOPB(fs);
695 next_addr = fs->lfs_sboffs[0] +
696 (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize)
697 - fsbtodb(fs, npages);
698 sp->ibp = lfs_newbuf(fs, next_addr, fs->lfs_bsize);
699 sp->ibp->b_flags |= B_BUSY;
700 sp->bpp[fs->lfs_ssize - npages] = sp->ibp;
701 sp->seg_bytes_left -= fs->lfs_bsize;
702printf("alloc inode block @ daddr %x, bp = %x inserted at %d\n",
703next_addr, sp->ibp, fs->lfs_ssize - npages);
704 }
705 ip = VTOI(vp);
706 bp = sp->ibp;
707 dip = bp->b_un.b_dino + (sp->ninodes % INOPB(fs));
708 bcopy(&ip->i_din, dip, sizeof(struct dinode));
709 iaddr = bp->b_blkno;
710 ++sp->ninodes;
711 ssp = sp->segsum;
712 ++ssp->ss_ninos;
713 if (sp->ninodes % INOPB(fs) == 0)
714 sp->ibp = NULL;
715 if (ip->i_number == LFS_IFILE_INUM)
716 fs->lfs_idaddr = iaddr;
717 else
718 lfs_iset(ip, iaddr, ip->i_atime); /* Update ifile */
719 ip->i_flags &= ~(IMOD|IACC|IUPD|ICHG); /* make inode clean */
720 return(sp);
721}
722
723static void
724lfs_writeseg(fs, sp)
725 LFS *fs;
726 SEGMENT *sp;
727{
728 BUF **bpp;
729 SEGSUM *ssp;
730 SEGUSE *sup;
731 VNODE *devvp;
732 int nblocks, nbuffers, ninode_blocks, nsegsums, nsum_pb;
733 int i, metaoff, nmeta;
734struct buf **xbp; int xi;
735
736printf("lfs_writeseg\n");
737 fs->lfs_lastseg = sntosa(fs, sp->seg_number);
738 lfs_endsum(fs, sp, 0);
739
740#ifdef HELLNO
741 /* Finish off any inodes */
742 if (sp->ibp)
743 brelse(sp->ibp);
744#endif
745
746 /*
747 * Copy inode and summary block buffer pointers down so they are
748 * contiguous with the page buffer pointers.
749 */
750 ssp = sp->segsum;
751 nsum_pb = fs->lfs_bsize / LFS_SUMMARY_SIZE;
752 nbuffers = sp->cbpp - sp->bpp;
753 nsegsums = 1 + sp->sum_num / nsum_pb;
754 ninode_blocks = (sp->ninodes + INOPB(fs) - 1) / INOPB(fs);
755 nmeta = ninode_blocks + nsegsums;
756 metaoff = fs->lfs_ssize - nmeta;
757 nblocks = nbuffers + nmeta;
758 if (sp->bpp + metaoff != sp->cbpp)
759 bcopy(sp->bpp + metaoff, sp->cbpp, sizeof(BUF *) * nmeta);
760 sp->cbpp += nmeta;
761
762 sup = fs->lfs_segtab + sp->seg_number;
763 sup->su_nbytes = nblocks << fs->lfs_bshift;
764 sup->su_lastmod = time.tv_sec;
765 sup->su_flags = SEGUSE_DIRTY;
766
767 /*
768 * Since we need to guarantee that the summary block gets written last,
769 * we issue the writes in two sets. The first n-1 buffers first, and
770 * then, after they've completed, the last 1 buffer. Only when that
771 * final write completes is the segment valid.
772 */
773 devvp = VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp;
774 /*
775 * Since no writes are yet scheduled, no need to block here; if we
776 * scheduled the writes at multiple points, we'd need an splbio()
777 * here.
778 */
779 fs->lfs_iocount = nblocks - 1;
780 sp->nextp = fs->lfs_seglist;
781 fs->lfs_seglist = sp;
782
783 for (bpp = sp->bpp, i = 0; i < (nblocks - 1); i++, ++bpp)
784 /* (*(devvp->v_op->vop_strategy)) */ sdstrategy(*bpp);
785}
786
787static void
788lfs_writesuper(fs, sp)
789 LFS *fs;
790 SEGMENT *sp;
791{
792 BUF *bp;
793 VNODE *devvp;
794
795printf("lfs_writesuper\n");
796 /* Wait for segment write to complete */
797 /* XXX probably should do this biowait(*(sp->cbpp - 1)); */
798
799 /* Get a buffer for the super block */
800 fs->lfs_cksum = cksum(fs, sizeof(LFS) - sizeof(fs->lfs_cksum));
801 bp = lfs_newbuf(fs, fs->lfs_sboffs[0], LFS_SBPAD);
802 bp->b_flags &= ~B_CALL;
803 bp->b_vp = NULL;
804 bp->b_iodone = NULL;
805 bcopy(fs, bp->b_un.b_lfs, sizeof(LFS));
806
807 /* Write the first superblock; wait. */
808 devvp = VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp;
809#ifdef MOVETONEWBUF
810 bp->b_dev = devvp->v_rdev;
811#endif
812 (*devvp->v_op->vop_strategy)(bp);
813 biowait(bp);
814
815 /* Now, write the second one for which we don't have to wait */
816 bp->b_flags &= ~B_DONE;
817 bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1];
818 (*devvp->v_op->vop_strategy)(bp);
819 brelse(bp);
820}
821
822/* Block match routines used when traversing the dirty block chain. */
823match_data(bp)
824 BUF *bp;
825{
826 return(bp->b_lblkno >= 0);
827}
828
829
830match_dindir(bp)
831 BUF *bp;
832{
833 return(bp->b_lblkno == D_INDIR);
834}
835
836/*
837 * These are single indirect blocks. There are three types:
838 * the one in the inode (address S_INDIR = -1).
839 * the ones that hang off of D_INDIR the double indirect in the inode.
840 * these all have addresses in the range -2NINDIR to -(3NINDIR-1)
841 * the ones that hang off of double indirect that hang off of the
842 * triple indirect. These all have addresses < -(NINDIR^2).
843 * Since we currently don't support, triple indirect blocks, this gets simpler.
844 * We just need to look for block numbers less than -NIADDR.
845 */
846match_indir(bp)
847 BUF *bp;
848{
849 return(bp->b_lblkno == S_INDIR || bp->b_lblkno < -NIADDR);
850}
851
852/*
853 * Shellsort (diminishing increment sort) from Data Structures and
854 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
855 * see also Knuth Vol. 3, page 84. The increments are selected from
856 * formula (8), page 95. Roughly O(N^3/2).
857 */
858/*
859 * This is our own private copy of shellsort because we want to sort
860 * two parallel arrays (the array of buffer pointers and the array of
861 * logical block numbers) simultaneously. Note that we cast the array
862 * of logical block numbers to a unsigned in this routine so that the
863 * negative block numbers (meta data blocks) sort AFTER the data blocks.
864 */
865static void
866shellsort(bp_array, lb_array, nmemb)
867 BUF **bp_array;
868 daddr_t *lb_array;
869 register int nmemb;
870{
871 static int __rsshell_increments[] = { 4, 1, 0 };
872 register int incr, *incrp, t1, t2;
873 BUF *bp_temp;
874 u_long lb_temp;
875
876 for (incrp = __rsshell_increments; incr = *incrp++;)
877 for (t1 = incr; t1 < nmemb; ++t1)
878 for (t2 = t1 - incr; t2 >= 0;)
879 if (lb_array[t2] > lb_array[t2 + incr]) {
880 lb_temp = lb_array[t2];
881 lb_array[t2] = lb_array[t2 + incr];
882 lb_array[t2 + incr] = lb_temp;
883 bp_temp = bp_array[t2];
884 bp_array[t2] = bp_array[t2 + incr];
885 bp_array[t2 + incr] = bp_temp;
886 t2 -= incr;
887 } else
888 break;
889}
890#endif /* LOGFS */