Commit | Line | Data |
---|---|---|
84c30241 KB |
1 | /* |
2 | * Copyright (c) 1991 Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * %sccs.include.redist.c% | |
6 | * | |
275ca4f0 | 7 | * @(#)lfs_segment.c 5.2 (Berkeley) %G% |
84c30241 KB |
8 | */ |
9 | ||
275ca4f0 | 10 | #ifdef LOGFS |
84c30241 KB |
11 | #include "param.h" |
12 | #include "systm.h" | |
13 | #include "namei.h" | |
14 | #include "resourcevar.h" | |
15 | #include "kernel.h" | |
16 | #include "file.h" | |
17 | #include "stat.h" | |
18 | #include "buf.h" | |
19 | #include "proc.h" | |
20 | #include "conf.h" | |
21 | #include "vnode.h" | |
22 | #include "specdev.h" | |
23 | #include "fifo.h" | |
24 | #include "malloc.h" | |
25 | #include "mount.h" | |
26 | #include "../ufs/lockf.h" | |
27 | #include "../ufs/quota.h" | |
28 | #include "../ufs/inode.h" | |
29 | #include "../ufs/dir.h" | |
30 | #include "../ufs/ufsmount.h" | |
31 | #include "lfs.h" | |
32 | #include "lfs_extern.h" | |
33 | ||
34 | /* | |
275ca4f0 KB |
35 | Add a check so that if the segment is empty, you don't write it. |
36 | Write the code with lfs_ialloc to allocate a new page of inodes if you have to. | |
37 | Make an incoming sync wait until the previous one finishes. Keith | |
38 | will write this. When this happens, we no longer have to be | |
39 | able to chain superblocks together and handle multiple segments | |
40 | writing -- Seems like we can call biowait to wait for an io. | |
41 | However, I don't think we want to wait on the summary I/O | |
42 | necessarily, because if we've got lots of dirty buffers piling | |
43 | up, it would be nice to process them and get the segment all | |
44 | ready to write. Perhaps we can just wait before firing up the | |
45 | next set of writes, rather than waiting to start doing anything. | |
46 | Also -- my lfs_writesuper should wait until all the segment writes | |
47 | are done (I added a biowait, but we need to make sure that the SEGMENT | |
48 | structure hasn't been freed before we get there). | |
49 | Need to keep vnode v_numoutput up to date for pending writes? | |
50 | ???Could actually fire off the datablock writes before you finish. This | |
51 | would give them a chance to get started earlier... | |
84c30241 KB |
52 | */ |
53 | ||
54 | static int lfs_biocallback __P((BUF *)); | |
55 | static void lfs_endsum __P((LFS *, SEGMENT *, int)); | |
275ca4f0 KB |
56 | static SEGMENT *lfs_gather |
57 | __P((LFS *, SEGMENT *, VNODE *, int (*) __P((BUF *)))); | |
84c30241 KB |
58 | static BUF *lfs_newbuf __P((LFS *, daddr_t, size_t)); |
59 | static SEGMENT *lfs_newseg __P((LFS *)); | |
60 | static void lfs_newsum __P((LFS *, SEGMENT *)); | |
61 | static daddr_t lfs_nextseg __P((LFS *)); | |
275ca4f0 KB |
62 | static void lfs_updatemeta __P((LFS *, SEGMENT *, INODE *, daddr_t *, |
63 | BUF **, int)); | |
64 | static void lfs_writeckp __P((LFS *, SEGMENT *)); | |
65 | static SEGMENT *lfs_writefile __P((SEGMENT *, LFS *, VNODE *, int)); | |
66 | static SEGMENT *lfs_writeinode __P((LFS *, SEGMENT *, VNODE *)); | |
84c30241 | 67 | static void lfs_writeseg __P((LFS *, SEGMENT *)); |
275ca4f0 KB |
68 | static void lfs_writesuper __P((LFS *, SEGMENT *)); |
69 | static int match_data __P((BUF *)); | |
70 | static int match_dindir __P((BUF *)); | |
71 | static int match_indir __P((BUF *)); | |
72 | static void shellsort __P((BUF **, daddr_t *, register int)); | |
84c30241 KB |
73 | |
74 | /* | |
75 | * XXX -- when we add fragments in here, we will need to allocate a larger | |
76 | * buffer pointer array (sp->bpp). | |
77 | */ | |
78 | int | |
275ca4f0 | 79 | lfs_segwrite(mp, do_ckp) |
84c30241 | 80 | MOUNT *mp; |
275ca4f0 | 81 | int do_ckp; /* do a checkpoint too */ |
84c30241 KB |
82 | { |
83 | FINFO *fip; /* current file info structure */ | |
84 | INODE *ip; | |
85 | LFS *fs; | |
86 | VNODE *vp; | |
87 | SEGMENT *sp; | |
88 | ||
84c30241 | 89 | fs = VFSTOUFS(mp)->um_lfs; |
84c30241 KB |
90 | sp = lfs_newseg(fs); |
91 | loop: | |
92 | for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { | |
93 | /* | |
94 | * If the vnode that we are about to sync is no longer | |
95 | * associated with this mount point, start over. | |
96 | */ | |
84c30241 KB |
97 | if (vp->v_mount != mp) |
98 | goto loop; | |
99 | if (VOP_ISLOCKED(vp)) | |
100 | continue; | |
101 | ip = VTOI(vp); | |
102 | if (ip->i_number == LFS_IFILE_INUM) | |
103 | continue; | |
104 | if ((ip->i_flag & (IMOD|IACC|IUPD|ICHG)) == 0 && | |
105 | vp->v_dirtyblkhd == NULL) | |
106 | continue; | |
107 | if (vget(vp)) | |
108 | goto loop; | |
275ca4f0 | 109 | sp = lfs_writefile(sp, fs, vp, do_ckp); |
84c30241 KB |
110 | vput(vp); |
111 | } | |
275ca4f0 KB |
112 | if (do_ckp) |
113 | lfs_writeckp(fs, sp); | |
114 | else | |
115 | lfs_writeseg(fs, sp); | |
116 | #ifdef NOTLFS | |
117 | vflushbuf(ump->um_devvp, waitfor == MNT_WAIT ? B_SYNC : 0); | |
118 | #endif | |
84c30241 KB |
119 | return (0); |
120 | } | |
121 | ||
122 | static int | |
123 | lfs_biocallback(bp) | |
124 | BUF *bp; | |
125 | { | |
126 | LFS *fs; | |
127 | SEGMENT *sp, *next_sp; | |
128 | UFSMOUNT *ump; | |
129 | VNODE *devvp; | |
130 | ||
275ca4f0 KB |
131 | /* |
132 | * Grab the mount point for later (used to find the file system and | |
133 | * block device) and, if the contents are valid, move the buffer back | |
134 | * onto the clean list. | |
135 | */ | |
136 | printf("lfs_biocallback: buffer %x\n", bp, bp->b_lblkno); | |
84c30241 | 137 | ump = VFSTOUFS(bp->b_vp->v_mount); |
275ca4f0 KB |
138 | if (bp->b_flags & B_NOCACHE) |
139 | bp->b_vp = NULL; | |
140 | else { | |
141 | bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); | |
142 | reassignbuf(bp, bp->b_vp); | |
143 | } | |
144 | ||
84c30241 KB |
145 | fs = ump->um_lfs; |
146 | devvp = ump->um_devvp; | |
275ca4f0 KB |
147 | brelse(bp); /* move up... XXX */ |
148 | ||
149 | printf("\nlfs_biocallback: iocount %d\n", fs->lfs_iocount); | |
150 | if (fs->lfs_iocount == 0) { | |
151 | /* Wake up any other syncs waiting on this file system. */ | |
152 | return; | |
153 | } | |
154 | --fs->lfs_iocount; | |
155 | if (fs->lfs_iocount == 0) { | |
156 | printf("\nlfs_biocallback: doing summary write\n"); | |
84c30241 KB |
157 | /* Fire off summary writes */ |
158 | for (sp = fs->lfs_seglist; sp; sp = next_sp) { | |
159 | next_sp = sp->nextp; | |
275ca4f0 KB |
160 | #ifdef MOVETONEWBUF |
161 | (*(sp->cbpp - 1))->b_dev = bp->b_dev; | |
162 | #endif | |
163 | (devvp->v_op->vop_strategy)(*(sp->cbpp - 1)); | |
84c30241 KB |
164 | free(sp->bpp, M_SEGMENT); |
165 | free(sp, M_SEGMENT); | |
166 | } | |
167 | } | |
168 | } | |
169 | ||
84c30241 KB |
170 | static void |
171 | lfs_endsum(fs, sp, calc_next) | |
172 | LFS *fs; | |
173 | SEGMENT *sp; | |
174 | int calc_next; /* if 1, calculate next, else -1 */ | |
175 | { | |
176 | BUF *bp; | |
177 | SEGSUM *ssp; | |
178 | daddr_t next_addr; | |
275ca4f0 KB |
179 | int npages, nseg_pages, nsums_per_blk; |
180 | ||
181 | /* printf("lfs_endsum\n"); /**/ | |
182 | if (sp->sbp == NULL) | |
183 | return; | |
84c30241 | 184 | |
84c30241 KB |
185 | ssp = sp->segsum; |
186 | if (!calc_next) | |
187 | ssp->ss_nextsum = (daddr_t) -1; | |
275ca4f0 KB |
188 | else |
189 | ssp->ss_nextsum = sp->sum_addr - LFS_SUMMARY_SIZE / DEV_BSIZE; | |
84c30241 | 190 | |
275ca4f0 | 191 | if ((sp->sum_num % (fs->lfs_bsize / LFS_SUMMARY_SIZE)) == (nsums_per_blk - 1)) { |
84c30241 | 192 | /* |
275ca4f0 KB |
193 | * This buffer is now full. Compute the next address if appropriate |
194 | * and the checksum, and close the buffer by setting sp->sbp NULL. | |
84c30241 | 195 | */ |
275ca4f0 KB |
196 | if (calc_next) { |
197 | nsums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE; | |
198 | nseg_pages = 1 + sp->sum_num / nsums_per_blk; | |
199 | npages = nseg_pages + (sp->ninodes + INOPB(fs) - 1) / INOPB(fs); | |
200 | next_addr = fs->lfs_sboffs[0] + | |
201 | (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize) | |
202 | - fsbtodb(fs, (npages - 1)) - LFS_SUMMARY_SIZE / DEV_BSIZE; | |
84c30241 | 203 | ssp->ss_nextsum = next_addr; |
84c30241 | 204 | } |
275ca4f0 KB |
205 | ssp->ss_cksum = cksum(&ssp->ss_cksum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum)); |
206 | sp->sbp = NULL; | |
207 | } else | |
84c30241 KB |
208 | /* Calculate cksum on previous segment summary */ |
209 | ssp->ss_cksum = cksum(&ssp->ss_cksum, | |
210 | LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum)); | |
275ca4f0 KB |
211 | } |
212 | ||
213 | static SEGMENT * | |
214 | lfs_gather(fs, sp, vp, match) | |
215 | LFS *fs; | |
216 | SEGMENT *sp; | |
217 | VNODE *vp; | |
218 | int (*match) __P((BUF *)); | |
219 | { | |
220 | BUF **bpp, *bp, *nbp; | |
221 | FINFO *fip; | |
222 | INODE *ip; | |
223 | int count, s, version; | |
224 | daddr_t *lbp, *start_lbp; | |
225 | ||
226 | ip = VTOI(vp); | |
227 | bpp = sp->cbpp; | |
228 | fip = sp->fip; | |
229 | version = fip->fi_version; | |
230 | start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks]; | |
231 | count = 0; | |
232 | ||
233 | s = splbio(); | |
234 | for (bp = vp->v_dirtyblkhd; bp; bp = nbp) { | |
235 | nbp = bp->b_blockf; | |
236 | if ((bp->b_flags & B_BUSY)) | |
237 | continue; | |
238 | if ((bp->b_flags & B_DELWRI) == 0) | |
239 | panic("lfs_write: not dirty"); | |
240 | if (!match(bp)) | |
241 | continue; | |
242 | bremfree(bp); | |
243 | bp->b_flags |= B_BUSY | B_CALL; | |
244 | bp->b_dev = VTOI(fs->lfs_ivnode)->i_dev; | |
245 | bp->b_iodone = lfs_biocallback; | |
246 | ||
247 | *lbp++ = bp->b_lblkno; | |
248 | *sp->cbpp++ = bp; | |
249 | fip->fi_nblocks++; | |
250 | sp->sum_bytes_left -= sizeof(daddr_t); | |
251 | sp->seg_bytes_left -= bp->b_bufsize; | |
252 | if (sp->sum_bytes_left < sizeof(daddr_t) || | |
253 | sp->seg_bytes_left < fs->lfs_bsize) { | |
254 | /* | |
255 | * We are about to allocate a new summary block | |
256 | * and possibly a new segment. So, we need to | |
257 | * sort the blocks we've done so far, and assign | |
258 | * the disk addresses, so we can start a new block | |
259 | * correctly. We may be doing I/O so we need to | |
260 | * release the s lock before doing anything. | |
261 | */ | |
262 | splx(s); | |
263 | lfs_updatemeta(fs, sp, ip, start_lbp, bpp, | |
264 | lbp - start_lbp); | |
265 | ||
266 | /* Put this file in the segment summary */ | |
267 | ((SEGSUM *)(sp->segsum))->ss_nfinfo++; | |
268 | ||
269 | if (sp->seg_bytes_left < fs->lfs_bsize) { | |
270 | lfs_writeseg(fs, sp); | |
271 | sp = lfs_newseg(fs); | |
272 | } else if (sp->sum_bytes_left < sizeof(daddr_t)) | |
273 | lfs_newsum(fs, sp); | |
274 | fip = sp->fip; | |
275 | fip->fi_ino = ip->i_number; | |
276 | fip->fi_version = version; | |
277 | bpp = sp->cbpp; | |
278 | /* You know that you have a new FINFO either way */ | |
279 | start_lbp = lbp = fip->fi_blocks; | |
280 | s = splbio(); | |
281 | } | |
84c30241 | 282 | } |
275ca4f0 KB |
283 | splx(s); |
284 | lfs_updatemeta(fs, sp, ip, start_lbp, bpp, lbp - start_lbp); | |
285 | ||
286 | return(sp); | |
84c30241 KB |
287 | } |
288 | ||
275ca4f0 | 289 | |
84c30241 KB |
290 | static BUF * |
291 | lfs_newbuf(fs, daddr, size) | |
292 | LFS *fs; | |
293 | daddr_t daddr; | |
294 | size_t size; | |
295 | { | |
296 | BUF *bp; | |
297 | VNODE *devvp; | |
298 | ||
84c30241 KB |
299 | bp = getnewbuf(); |
300 | bremhash(bp); | |
301 | ||
302 | /* | |
303 | * XXX | |
304 | * Need a devvp, but this isn't a particularly clean way to get one. | |
275ca4f0 | 305 | * devvp = VTOI(fs->lfs_ivnode)->i_devvp; |
84c30241 | 306 | */ |
275ca4f0 KB |
307 | #ifdef NOTWORKING |
308 | devvp = VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp; | |
84c30241 | 309 | bgetvp(devvp, bp); |
275ca4f0 KB |
310 | #endif |
311 | bp->b_vp = fs->lfs_ivnode; | |
312 | bp->b_dev = VTOI(fs->lfs_ivnode)->i_dev; | |
84c30241 | 313 | bp->b_bcount = 0; |
275ca4f0 | 314 | bp->b_blkno = bp->b_lblkno = daddr; |
84c30241 KB |
315 | bp->b_error = 0; |
316 | bp->b_resid = 0; | |
275ca4f0 KB |
317 | bp->b_flags |= B_CALL | B_DELWRI | B_NOCACHE | B_WRITE; |
318 | bp->b_iodone = lfs_biocallback; | |
319 | #ifdef PROBABLYWRONG | |
84c30241 | 320 | binshash(bp, BUFHASH(devvp, daddr)); |
275ca4f0 | 321 | #endif |
84c30241 | 322 | allocbuf(bp, size); |
275ca4f0 KB |
323 | #ifdef PROBABLYWRONG |
324 | reassignbuf(bp, devvp); | |
325 | #endif | |
84c30241 KB |
326 | return (bp); |
327 | } | |
328 | ||
329 | ||
330 | /* | |
331 | * Start a new segment | |
332 | */ | |
333 | static SEGMENT * | |
334 | lfs_newseg(fs) | |
335 | LFS *fs; | |
336 | { | |
337 | SEGMENT *sp; | |
338 | SEGUSE *sup; | |
339 | ||
340 | printf("lfs_newseg\n"); | |
341 | /* Get buffer space to write out a segment */ | |
342 | sp = malloc(sizeof(SEGMENT), M_SEGMENT, M_WAITOK); | |
275ca4f0 KB |
343 | sp->ibp = NULL; |
344 | sp->sbp = NULL; | |
84c30241 KB |
345 | sp->cbpp = sp->bpp = |
346 | malloc(fs->lfs_ssize * sizeof(BUF *), M_SEGMENT, M_WAITOK); | |
347 | sp->nextp = NULL; | |
348 | sp->sum_bytes_left = LFS_SUMMARY_SIZE; | |
349 | sp->seg_bytes_left = (fs->lfs_segmask + 1) - LFS_SUMMARY_SIZE; | |
350 | sp->saddr = fs->lfs_nextseg; | |
275ca4f0 | 351 | printf("lfs_newseg: About to write segment %lx\n", sp->saddr); |
84c30241 KB |
352 | sp->sum_addr = sp->saddr + sp->seg_bytes_left / DEV_BSIZE; |
353 | sp->ninodes = 0; | |
354 | sp->sum_num = -1; | |
275ca4f0 KB |
355 | sp->seg_number = |
356 | (sp->saddr - fs->lfs_sboffs[0]) / fsbtodb(fs, fs->lfs_ssize); | |
84c30241 KB |
357 | |
358 | /* initialize segment summary info */ | |
359 | lfs_newsum(fs, sp); | |
360 | sup = fs->lfs_segtab + sp->seg_number; | |
361 | ||
362 | if (sup->su_nbytes != 0) { | |
363 | /* This is a segment containing a super block */ | |
364 | FINFO *fip; | |
365 | daddr_t lbn, *lbnp; | |
275ca4f0 | 366 | SEGSUM *ssp; |
84c30241 | 367 | |
275ca4f0 KB |
368 | ssp = (SEGSUM *)sp->segsum; |
369 | ssp->ss_nfinfo++; | |
84c30241 KB |
370 | fip = sp->fip; |
371 | fip->fi_nblocks = LFS_SBPAD >> fs->lfs_bshift; | |
372 | fip->fi_version = 1; | |
373 | fip->fi_ino = LFS_UNUSED_INUM; | |
374 | sp->saddr += fsbtodb(fs, fip->fi_nblocks); | |
375 | lbnp = fip->fi_blocks; | |
376 | for (lbn = 0; lbn < fip->fi_nblocks; lbn++) | |
377 | *lbnp++ = lbn; | |
378 | sp->seg_bytes_left -= sup->su_nbytes; | |
379 | sp->sum_bytes_left -= | |
380 | sizeof(FINFO) + (fip->fi_nblocks - 1) * sizeof(daddr_t); | |
381 | sp->fip = (FINFO *)lbnp; | |
382 | } | |
383 | return(sp); | |
384 | } | |
385 | ||
386 | ||
387 | static void | |
388 | lfs_newsum(fs, sp) | |
389 | LFS *fs; | |
390 | SEGMENT *sp; | |
391 | { | |
392 | SEGSUM *ssp; | |
275ca4f0 | 393 | int npages, nseg_pages, sums_per_blk; |
84c30241 KB |
394 | |
395 | printf("lfs_newsum\n"); | |
275ca4f0 KB |
396 | lfs_endsum(fs, sp, 1); |
397 | ++sp->sum_num; | |
398 | if (sp->sbp == NULL) { | |
399 | /* Allocate a new buffer. */ | |
400 | if (sp->seg_bytes_left < fs->lfs_bsize) { | |
401 | lfs_writeseg(fs, sp); | |
402 | sp = lfs_newseg(fs); | |
403 | } | |
404 | sums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE; | |
405 | nseg_pages = 1 + sp->sum_num / sums_per_blk; | |
406 | npages = nseg_pages + (sp->ninodes + INOPB(fs) - 1) / INOPB(fs); | |
407 | sp->sum_addr = fs->lfs_sboffs[0] + | |
408 | (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize) | |
409 | - fsbtodb(fs, npages); | |
410 | sp->sbp = lfs_newbuf(fs, sp->sum_addr, fs->lfs_bsize); | |
411 | sp->bpp[fs->lfs_ssize - npages] = sp->sbp; | |
412 | printf("Inserting summary block, address %x at index %d\n", | |
413 | sp->sbp->b_lblkno, fs->lfs_ssize - npages); | |
414 | sp->seg_bytes_left -= fs->lfs_bsize; | |
415 | sp->segsum = sp->sbp->b_un.b_addr + fs->lfs_bsize - LFS_SUMMARY_SIZE; | |
416 | sp->sum_addr += (fs->lfs_bsize - LFS_SUMMARY_SIZE) / DEV_BSIZE; | |
84c30241 | 417 | } else { |
275ca4f0 KB |
418 | sp->segsum -= LFS_SUMMARY_SIZE; |
419 | sp->sum_addr -= LFS_SUMMARY_SIZE / DEV_BSIZE; | |
84c30241 KB |
420 | } |
421 | ||
275ca4f0 KB |
422 | ssp = sp->segsum; |
423 | ssp->ss_next = fs->lfs_nextseg = lfs_nextseg(fs); | |
424 | ssp->ss_prev = fs->lfs_lastseg; | |
425 | ||
84c30241 KB |
426 | /* Initialize segment summary info. */ |
427 | sp->fip = (FINFO *)(sp->segsum + sizeof(SEGSUM)); | |
275ca4f0 | 428 | sp->fip->fi_nblocks = 0; |
84c30241 KB |
429 | ssp->ss_nextsum = (daddr_t)-1; |
430 | ssp->ss_create = time.tv_sec; | |
431 | ||
432 | ssp->ss_nfinfo = 0; | |
433 | ssp->ss_ninos = 0; | |
434 | sp->sum_bytes_left -= LFS_SUMMARY_SIZE; | |
435 | sp->seg_bytes_left -= LFS_SUMMARY_SIZE; | |
436 | } | |
437 | ||
438 | #define seginc(fs, sn) ((sn + 1) % fs->lfs_nseg) | |
439 | static daddr_t | |
440 | lfs_nextseg(fs) | |
441 | LFS *fs; | |
442 | { | |
443 | int segnum, sn; | |
444 | SEGUSE *sup; | |
445 | ||
84c30241 | 446 | segnum = satosn(fs, fs->lfs_nextseg); |
275ca4f0 KB |
447 | for (sn = seginc(fs, segnum); sn != segnum; sn = seginc(fs, sn)) |
448 | if (!(fs->lfs_segtab[sn].su_flags & SEGUSE_DIRTY)) | |
84c30241 | 449 | break; |
275ca4f0 | 450 | |
84c30241 KB |
451 | if (sn == segnum) |
452 | panic("lfs_nextseg: file system full"); /* XXX */ | |
453 | return(sntosa(fs, sn)); | |
454 | } | |
455 | ||
456 | /* | |
457 | * Update the metadata that points to the blocks listed in the FIP | |
458 | * array. | |
459 | */ | |
275ca4f0 KB |
460 | static void |
461 | lfs_updatemeta(fs, sp, ip, lbp, bpp, nblocks) | |
84c30241 | 462 | LFS *fs; |
275ca4f0 | 463 | SEGMENT *sp; |
84c30241 | 464 | INODE *ip; |
275ca4f0 | 465 | daddr_t *lbp; |
84c30241 | 466 | BUF **bpp; |
275ca4f0 | 467 | int nblocks; |
84c30241 KB |
468 | { |
469 | SEGUSE *segup; | |
275ca4f0 | 470 | BUF **lbpp, *bp, *mbp; |
84c30241 | 471 | daddr_t da, iblkno; |
275ca4f0 KB |
472 | int db_per_fsb, error, i, oldsegnum; |
473 | long lbn; | |
84c30241 | 474 | |
275ca4f0 KB |
475 | printf("lfs_updatemeta of %d blocks\n", nblocks); |
476 | if ((nblocks == 0) && (ip->i_flag & (IMOD|IACC|IUPD|ICHG)) == 0) | |
477 | return; | |
478 | ||
479 | /* First sort the blocks and add disk addresses */ | |
480 | shellsort(bpp, lbp, nblocks); | |
481 | ||
482 | db_per_fsb = 1 << fs->lfs_fsbtodb; | |
483 | for (lbpp = bpp, i = 0; i < nblocks; i++, lbpp++) { | |
484 | (*lbpp)->b_blkno = sp->saddr; | |
485 | sp->saddr += db_per_fsb; | |
486 | } | |
487 | ||
488 | for (lbpp = bpp, i = 0; i < nblocks; i++, lbpp++) { | |
489 | lbn = lbp[i]; | |
490 | printf("lfs_updatemeta: block %d\n", lbn); | |
84c30241 | 491 | if (error = lfs_bmap(ip, lbn, &da)) |
275ca4f0 | 492 | panic("lfs_updatemeta: lfs_bmap returned error"); |
84c30241 KB |
493 | |
494 | if (da) { | |
275ca4f0 | 495 | /* Update segment usage information */ |
84c30241 KB |
496 | oldsegnum = (da - fs->lfs_sboffs[0]) / |
497 | fsbtodb(fs, fs->lfs_ssize); | |
498 | segup = fs->lfs_segtab+oldsegnum; | |
499 | segup->su_lastmod = time.tv_sec; | |
500 | if ((segup->su_nbytes -= fs->lfs_bsize) < 0) | |
501 | printf("lfs_updatemeta: negative bytes %s %d\n", | |
502 | "in segment", oldsegnum); | |
503 | } | |
504 | ||
275ca4f0 KB |
505 | /* |
506 | * Now change whoever points to lbn. We could start with the | |
507 | * smallest (most negative) block number in these if clauses, | |
508 | * but we assume that indirect blocks are least common, and | |
509 | * handle them separately. | |
510 | */ | |
511 | bp = NULL; | |
512 | if (lbn < 0) { | |
513 | if (lbn < -NIADDR) { | |
514 | printf("lfs_updatemeta: changing indirect block %d\n", D_INDIR); | |
515 | if (error = bread(ITOV(ip), D_INDIR, | |
516 | fs->lfs_bsize, NOCRED, &bp)) | |
517 | panic("lfs_updatemeta: error on bread"); | |
518 | ||
519 | bp->b_un.b_daddr[-lbn % NINDIR(fs)] = | |
520 | (*lbpp)->b_blkno; | |
521 | } else | |
522 | ip->i_din.di_ib[-lbn-1] = (*lbpp)->b_blkno; | |
523 | ||
524 | } else if (lbn < NDADDR) | |
84c30241 KB |
525 | ip->i_din.di_db[lbn] = (*lbpp)->b_blkno; |
526 | else if ((lbn -= NDADDR) < NINDIR(fs)) { | |
527 | printf("lfs_updatemeta: changing indirect block %d\n", S_INDIR); | |
275ca4f0 KB |
528 | if (error = bread(ITOV(ip), S_INDIR, fs->lfs_bsize, |
529 | NOCRED, &bp)) | |
530 | panic("lfs_updatemeta: bread returned error"); | |
531 | ||
84c30241 | 532 | bp->b_un.b_daddr[lbn] = (*lbpp)->b_blkno; |
84c30241 KB |
533 | } else if ( (lbn = (lbn - NINDIR(fs)) / NINDIR(fs)) < |
534 | NINDIR(fs)) { | |
535 | ||
536 | iblkno = - (lbn + NIADDR + 1); | |
537 | printf("lfs_updatemeta: changing indirect block %d\n", iblkno); | |
275ca4f0 KB |
538 | if (error = bread(ITOV(ip), iblkno, fs->lfs_bsize, |
539 | NOCRED, &bp)) | |
540 | panic("lfs_updatemeta: bread returned error"); | |
541 | ||
84c30241 KB |
542 | bp->b_un.b_daddr[lbn % NINDIR(fs)] = (*lbpp)->b_blkno; |
543 | } | |
544 | else | |
275ca4f0 KB |
545 | panic("lfs_updatemeta: logical block number too large"); |
546 | if (bp) | |
547 | lfs_bwrite(bp); | |
84c30241 | 548 | } |
275ca4f0 KB |
549 | } |
550 | ||
551 | static void | |
552 | lfs_writeckp(fs, sp) | |
553 | LFS *fs; | |
554 | SEGMENT *sp; | |
555 | { | |
556 | BUF *bp; | |
557 | FINFO *fip; | |
558 | INODE *ip; | |
559 | SEGUSE *sup; | |
560 | daddr_t *lbp; | |
561 | int bytes_needed, i; | |
562 | void *xp; | |
563 | ||
564 | printf("lfs_writeckp\n"); | |
565 | /* | |
566 | * This will write the dirty ifile blocks, but not the segusage | |
567 | * table nor the ifile inode. | |
568 | */ | |
569 | sp = lfs_writefile(sp, fs, fs->lfs_ivnode, 1); | |
570 | ||
571 | /* | |
572 | * Make sure that the segment usage table and the ifile inode will | |
573 | * fit in this segment. If they won't, put them in the next segment | |
574 | */ | |
575 | bytes_needed = fs->lfs_segtabsz << fs->lfs_bshift; | |
576 | if (sp->ninodes % INOPB(fs) == 0) | |
577 | bytes_needed += fs->lfs_bsize; | |
578 | ||
579 | if (sp->seg_bytes_left < bytes_needed) { | |
580 | lfs_writeseg(fs, sp); | |
581 | sp = lfs_newseg(fs); | |
582 | } else if (sp->sum_bytes_left < (fs->lfs_segtabsz * sizeof(daddr_t))) | |
583 | lfs_newsum(fs, sp); | |
584 | ||
585 | #ifdef DEBUG | |
586 | if (sp->seg_bytes_left < bytes_needed) | |
587 | panic("lfs_writeckp: unable to write checkpoint"); | |
588 | #endif | |
589 | ||
590 | /* | |
591 | * Now, update the segment usage information and the ifile inode and | |
592 | * and write it out | |
593 | */ | |
594 | ||
595 | sup = fs->lfs_segtab + sp->seg_number; | |
596 | sup->su_nbytes = (fs->lfs_segmask + 1) - sp->seg_bytes_left + | |
597 | bytes_needed; | |
598 | sup->su_lastmod = time.tv_sec; | |
599 | sup->su_flags = SEGUSE_DIRTY; | |
600 | ||
601 | /* Get buffers for the segusage table and write it out */ | |
602 | ip = VTOI(fs->lfs_ivnode); | |
603 | fip = sp->fip; | |
604 | lbp = &fip->fi_blocks[fip->fi_nblocks]; | |
605 | for (xp = fs->lfs_segtab, i = 0; i < fs->lfs_segtabsz; | |
606 | i++, xp += fs->lfs_bsize, lbp++) { | |
607 | bp = lfs_newbuf(fs, sp->saddr, fs->lfs_bsize); | |
608 | *sp->cbpp++ = bp; | |
609 | bcopy(xp, bp->b_un.b_words, fs->lfs_bsize); | |
610 | ip->i_din.di_db[i] = sp->saddr; | |
611 | sp->saddr += (1 << fs->lfs_fsbtodb); | |
612 | *lbp = i; | |
613 | fip->fi_nblocks++; | |
614 | } | |
615 | sp = lfs_writeinode(fs, sp, fs->lfs_ivnode); | |
616 | lfs_writeseg(fs, sp); | |
617 | lfs_writesuper(fs, sp); | |
84c30241 KB |
618 | } |
619 | ||
620 | /* | |
84c30241 KB |
621 | * XXX -- I think we need to figure out what to do if we write |
622 | * the segment and find more dirty blocks when we're done. | |
623 | */ | |
624 | static SEGMENT * | |
275ca4f0 | 625 | lfs_writefile(sp, fs, vp, do_ckp) |
84c30241 KB |
626 | SEGMENT *sp; |
627 | LFS *fs; | |
628 | VNODE *vp; | |
275ca4f0 | 629 | int do_ckp; |
84c30241 | 630 | { |
84c30241 KB |
631 | FINFO *fip; |
632 | INODE *ip; | |
84c30241 KB |
633 | |
634 | /* initialize the FINFO structure */ | |
635 | ip = VTOI(vp); | |
636 | printf("lfs_writefile: node %d\n", ip->i_number); | |
637 | loop: | |
275ca4f0 KB |
638 | sp->fip->fi_nblocks = 0; |
639 | sp->fip->fi_ino = ip->i_number; | |
640 | if (ip->i_number != LFS_IFILE_INUM) | |
641 | sp->fip->fi_version = lfs_getversion(fs, ip->i_number); | |
642 | else | |
643 | sp->fip->fi_version = 1; | |
644 | ||
645 | sp = lfs_gather(fs, sp, vp, match_data); | |
646 | if (do_ckp) { | |
647 | sp = lfs_gather(fs, sp, vp, match_indir); | |
648 | sp = lfs_gather(fs, sp, vp, match_dindir); | |
649 | } | |
84c30241 | 650 | |
275ca4f0 KB |
651 | (void)printf("lfs_writefile: adding %d blocks to segment\n", |
652 | sp->fip->fi_nblocks); | |
653 | /* | |
654 | * Update the inode for this file and reflect new inode | |
655 | * address in the ifile. If this is the ifile, don't update | |
656 | * the inode, because we're checkpointing and will update the | |
657 | * inode with the segment usage information (so we musn't | |
658 | * bump the finfo pointer either). | |
659 | */ | |
660 | if (ip->i_number != LFS_IFILE_INUM) { | |
661 | sp = lfs_writeinode(fs, sp, vp); | |
662 | fip = sp->fip; | |
663 | if (fip->fi_nblocks) { | |
84c30241 | 664 | ((SEGSUM *)(sp->segsum))->ss_nfinfo++; |
275ca4f0 KB |
665 | sp->fip = (FINFO *)((u_long)fip + sizeof(FINFO) + |
666 | sizeof(u_long) * fip->fi_nblocks - 1); | |
84c30241 | 667 | } |
84c30241 KB |
668 | } |
669 | return(sp); | |
670 | } | |
671 | ||
275ca4f0 KB |
672 | static SEGMENT * |
673 | lfs_writeinode(fs, sp, vp) | |
674 | LFS *fs; | |
675 | SEGMENT *sp; | |
676 | VNODE *vp; | |
84c30241 | 677 | { |
275ca4f0 KB |
678 | BUF *bp; |
679 | INODE *ip; | |
680 | SEGSUM *ssp; | |
681 | daddr_t iaddr, next_addr; | |
682 | int npages, nseg_pages, sums_per_blk; | |
683 | struct dinode *dip; | |
684 | ||
685 | printf("lfs_writeinode\n"); | |
686 | sums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE; | |
687 | if (sp->ibp == NULL) { | |
688 | /* Allocate a new buffer. */ | |
689 | if (sp->seg_bytes_left < fs->lfs_bsize) { | |
690 | lfs_writeseg(fs, sp); | |
691 | sp = lfs_newseg(fs); | |
692 | } | |
693 | nseg_pages = (sp->sum_num + sums_per_blk) / sums_per_blk; | |
694 | npages = nseg_pages + (sp->ninodes + INOPB(fs)) / INOPB(fs); | |
695 | next_addr = fs->lfs_sboffs[0] + | |
696 | (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize) | |
697 | - fsbtodb(fs, npages); | |
698 | sp->ibp = lfs_newbuf(fs, next_addr, fs->lfs_bsize); | |
699 | sp->ibp->b_flags |= B_BUSY; | |
700 | sp->bpp[fs->lfs_ssize - npages] = sp->ibp; | |
701 | sp->seg_bytes_left -= fs->lfs_bsize; | |
702 | printf("alloc inode block @ daddr %x, bp = %x inserted at %d\n", | |
703 | next_addr, sp->ibp, fs->lfs_ssize - npages); | |
704 | } | |
705 | ip = VTOI(vp); | |
706 | bp = sp->ibp; | |
707 | dip = bp->b_un.b_dino + (sp->ninodes % INOPB(fs)); | |
708 | bcopy(&ip->i_din, dip, sizeof(struct dinode)); | |
709 | iaddr = bp->b_blkno; | |
710 | ++sp->ninodes; | |
711 | ssp = sp->segsum; | |
712 | ++ssp->ss_ninos; | |
713 | if (sp->ninodes % INOPB(fs) == 0) | |
714 | sp->ibp = NULL; | |
715 | if (ip->i_number == LFS_IFILE_INUM) | |
716 | fs->lfs_idaddr = iaddr; | |
717 | else | |
718 | lfs_iset(ip, iaddr, ip->i_atime); /* Update ifile */ | |
719 | ip->i_flags &= ~(IMOD|IACC|IUPD|ICHG); /* make inode clean */ | |
720 | return(sp); | |
84c30241 KB |
721 | } |
722 | ||
723 | static void | |
724 | lfs_writeseg(fs, sp) | |
725 | LFS *fs; | |
726 | SEGMENT *sp; | |
727 | { | |
275ca4f0 | 728 | BUF **bpp; |
84c30241 KB |
729 | SEGSUM *ssp; |
730 | SEGUSE *sup; | |
731 | VNODE *devvp; | |
732 | int nblocks, nbuffers, ninode_blocks, nsegsums, nsum_pb; | |
733 | int i, metaoff, nmeta; | |
275ca4f0 | 734 | struct buf **xbp; int xi; |
84c30241 KB |
735 | |
736 | printf("lfs_writeseg\n"); | |
275ca4f0 KB |
737 | fs->lfs_lastseg = sntosa(fs, sp->seg_number); |
738 | lfs_endsum(fs, sp, 0); | |
84c30241 | 739 | |
275ca4f0 | 740 | #ifdef HELLNO |
84c30241 | 741 | /* Finish off any inodes */ |
275ca4f0 KB |
742 | if (sp->ibp) |
743 | brelse(sp->ibp); | |
744 | #endif | |
84c30241 KB |
745 | |
746 | /* | |
747 | * Copy inode and summary block buffer pointers down so they are | |
275ca4f0 | 748 | * contiguous with the page buffer pointers. |
84c30241 | 749 | */ |
275ca4f0 KB |
750 | ssp = sp->segsum; |
751 | nsum_pb = fs->lfs_bsize / LFS_SUMMARY_SIZE; | |
752 | nbuffers = sp->cbpp - sp->bpp; | |
753 | nsegsums = 1 + sp->sum_num / nsum_pb; | |
754 | ninode_blocks = (sp->ninodes + INOPB(fs) - 1) / INOPB(fs); | |
755 | nmeta = ninode_blocks + nsegsums; | |
84c30241 | 756 | metaoff = fs->lfs_ssize - nmeta; |
275ca4f0 | 757 | nblocks = nbuffers + nmeta; |
84c30241 | 758 | if (sp->bpp + metaoff != sp->cbpp) |
275ca4f0 KB |
759 | bcopy(sp->bpp + metaoff, sp->cbpp, sizeof(BUF *) * nmeta); |
760 | sp->cbpp += nmeta; | |
84c30241 | 761 | |
84c30241 KB |
762 | sup = fs->lfs_segtab + sp->seg_number; |
763 | sup->su_nbytes = nblocks << fs->lfs_bshift; | |
764 | sup->su_lastmod = time.tv_sec; | |
765 | sup->su_flags = SEGUSE_DIRTY; | |
766 | ||
767 | /* | |
275ca4f0 | 768 | * Since we need to guarantee that the summary block gets written last, |
84c30241 KB |
769 | * we issue the writes in two sets. The first n-1 buffers first, and |
770 | * then, after they've completed, the last 1 buffer. Only when that | |
275ca4f0 | 771 | * final write completes is the segment valid. |
84c30241 KB |
772 | */ |
773 | devvp = VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp; | |
275ca4f0 KB |
774 | /* |
775 | * Since no writes are yet scheduled, no need to block here; if we | |
776 | * scheduled the writes at multiple points, we'd need an splbio() | |
777 | * here. | |
778 | */ | |
779 | fs->lfs_iocount = nblocks - 1; | |
84c30241 KB |
780 | sp->nextp = fs->lfs_seglist; |
781 | fs->lfs_seglist = sp; | |
275ca4f0 KB |
782 | |
783 | for (bpp = sp->bpp, i = 0; i < (nblocks - 1); i++, ++bpp) | |
784 | /* (*(devvp->v_op->vop_strategy)) */ sdstrategy(*bpp); | |
785 | } | |
786 | ||
787 | static void | |
788 | lfs_writesuper(fs, sp) | |
789 | LFS *fs; | |
790 | SEGMENT *sp; | |
791 | { | |
792 | BUF *bp; | |
793 | VNODE *devvp; | |
794 | ||
795 | printf("lfs_writesuper\n"); | |
796 | /* Wait for segment write to complete */ | |
797 | /* XXX probably should do this biowait(*(sp->cbpp - 1)); */ | |
798 | ||
799 | /* Get a buffer for the super block */ | |
800 | fs->lfs_cksum = cksum(fs, sizeof(LFS) - sizeof(fs->lfs_cksum)); | |
801 | bp = lfs_newbuf(fs, fs->lfs_sboffs[0], LFS_SBPAD); | |
802 | bp->b_flags &= ~B_CALL; | |
803 | bp->b_vp = NULL; | |
804 | bp->b_iodone = NULL; | |
805 | bcopy(fs, bp->b_un.b_lfs, sizeof(LFS)); | |
806 | ||
807 | /* Write the first superblock; wait. */ | |
808 | devvp = VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp; | |
809 | #ifdef MOVETONEWBUF | |
810 | bp->b_dev = devvp->v_rdev; | |
811 | #endif | |
812 | (*devvp->v_op->vop_strategy)(bp); | |
813 | biowait(bp); | |
814 | ||
815 | /* Now, write the second one for which we don't have to wait */ | |
816 | bp->b_flags &= ~B_DONE; | |
817 | bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; | |
818 | (*devvp->v_op->vop_strategy)(bp); | |
819 | brelse(bp); | |
820 | } | |
821 | ||
822 | /* Block match routines used when traversing the dirty block chain. */ | |
823 | match_data(bp) | |
824 | BUF *bp; | |
825 | { | |
826 | return(bp->b_lblkno >= 0); | |
827 | } | |
828 | ||
829 | ||
830 | match_dindir(bp) | |
831 | BUF *bp; | |
832 | { | |
833 | return(bp->b_lblkno == D_INDIR); | |
834 | } | |
835 | ||
836 | /* | |
837 | * These are single indirect blocks. There are three types: | |
838 | * the one in the inode (address S_INDIR = -1). | |
839 | * the ones that hang off of D_INDIR the double indirect in the inode. | |
840 | * these all have addresses in the range -2NINDIR to -(3NINDIR-1) | |
841 | * the ones that hang off of double indirect that hang off of the | |
842 | * triple indirect. These all have addresses < -(NINDIR^2). | |
843 | * Since we currently don't support, triple indirect blocks, this gets simpler. | |
844 | * We just need to look for block numbers less than -NIADDR. | |
845 | */ | |
846 | match_indir(bp) | |
847 | BUF *bp; | |
848 | { | |
849 | return(bp->b_lblkno == S_INDIR || bp->b_lblkno < -NIADDR); | |
84c30241 KB |
850 | } |
851 | ||
852 | /* | |
853 | * Shellsort (diminishing increment sort) from Data Structures and | |
854 | * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; | |
855 | * see also Knuth Vol. 3, page 84. The increments are selected from | |
856 | * formula (8), page 95. Roughly O(N^3/2). | |
857 | */ | |
858 | /* | |
859 | * This is our own private copy of shellsort because we want to sort | |
860 | * two parallel arrays (the array of buffer pointers and the array of | |
861 | * logical block numbers) simultaneously. Note that we cast the array | |
862 | * of logical block numbers to a unsigned in this routine so that the | |
863 | * negative block numbers (meta data blocks) sort AFTER the data blocks. | |
864 | */ | |
865 | static void | |
866 | shellsort(bp_array, lb_array, nmemb) | |
867 | BUF **bp_array; | |
275ca4f0 | 868 | daddr_t *lb_array; |
84c30241 KB |
869 | register int nmemb; |
870 | { | |
871 | static int __rsshell_increments[] = { 4, 1, 0 }; | |
872 | register int incr, *incrp, t1, t2; | |
873 | BUF *bp_temp; | |
874 | u_long lb_temp; | |
875 | ||
876 | for (incrp = __rsshell_increments; incr = *incrp++;) | |
877 | for (t1 = incr; t1 < nmemb; ++t1) | |
878 | for (t2 = t1 - incr; t2 >= 0;) | |
879 | if (lb_array[t2] > lb_array[t2 + incr]) { | |
880 | lb_temp = lb_array[t2]; | |
881 | lb_array[t2] = lb_array[t2 + incr]; | |
882 | lb_array[t2 + incr] = lb_temp; | |
883 | bp_temp = bp_array[t2]; | |
884 | bp_array[t2] = bp_array[t2 + incr]; | |
885 | bp_array[t2 + incr] = bp_temp; | |
886 | t2 -= incr; | |
887 | } else | |
888 | break; | |
889 | } | |
275ca4f0 | 890 | #endif /* LOGFS */ |