Commit | Line | Data |
---|---|---|
84c30241 KB |
1 | /* |
2 | * Copyright (c) 1991 Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * %sccs.include.redist.c% | |
6 | * | |
2742ac45 | 7 | * @(#)lfs_segment.c 7.5 (Berkeley) %G% |
84c30241 KB |
8 | */ |
9 | ||
34a084a9 KB |
10 | #include <sys/param.h> |
11 | #include <sys/systm.h> | |
12 | #include <sys/namei.h> | |
13 | #include <sys/resourcevar.h> | |
14 | #include <sys/kernel.h> | |
15 | #include <sys/file.h> | |
16 | #include <sys/stat.h> | |
17 | #include <sys/buf.h> | |
18 | #include <sys/proc.h> | |
19 | #include <sys/conf.h> | |
20 | #include <sys/vnode.h> | |
21 | #include <sys/specdev.h> | |
22 | #include <sys/fifo.h> | |
23 | #include <sys/malloc.h> | |
24 | #include <sys/mount.h> | |
12304d41 | 25 | #include <sys/kernel.h> /* XXX delete when time goes away */ |
34a084a9 | 26 | |
0a011bb1 KB |
27 | #include <ufs/ufs/quota.h> |
28 | #include <ufs/ufs/inode.h> | |
29 | #include <ufs/ufs/dir.h> | |
30 | #include <ufs/ufs/ufsmount.h> | |
34a084a9 | 31 | |
0a011bb1 KB |
32 | #include <ufs/lfs/lfs.h> |
33 | #include <ufs/lfs/lfs_extern.h> | |
84c30241 | 34 | |
dc7e45d3 KB |
35 | /* In-memory description of a segment about to be written. */ |
36 | typedef struct segment SEGMENT; | |
37 | struct segment { | |
38 | BUF **bpp; /* pointer to buffer array */ | |
39 | BUF **cbpp; /* pointer to next available bp */ | |
40 | BUF *ibp; /* buffer pointer to inode page */ | |
41 | void *segsum; /* segment summary info */ | |
42 | u_long ninodes; /* number of inodes in this segment */ | |
43 | u_long seg_bytes_left; /* bytes left in segment */ | |
44 | u_long sum_bytes_left; /* bytes left in summary block */ | |
45 | u_long seg_number; /* number of this segment */ | |
46 | #define SEGM_CKP 0x01 /* doing a checkpoint */ | |
47 | u_long seg_flags; /* run-time flags for this segment */ | |
48 | FINFO *fip; /* current fileinfo pointer */ | |
49 | }; | |
50 | ||
84c30241 | 51 | /* |
dc7e45d3 KB |
52 | * Determine if it's OK to start a partial in this segment, or if we need |
53 | * to go on to a new segment. | |
8954e52c | 54 | */ |
dc7e45d3 KB |
55 | #define LFS_PARTIAL_FITS(fs) \ |
56 | ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ | |
57 | 1 << (fs)->lfs_fsbtodb) | |
58 | ||
59 | #define datosn(fs, daddr) /* disk address to segment number */ \ | |
60 | (((daddr) - (fs)->lfs_sboffs[0]) / fsbtodb((fs), (fs)->lfs_ssize)) | |
61 | ||
62 | #define sntoda(fs, sn) /* segment number to disk address */ \ | |
63 | ((daddr_t)((sn) * ((fs)->lfs_ssize << (fs)->lfs_fsbtodb) + \ | |
64 | (fs)->lfs_sboffs[0])) | |
65 | ||
66 | static int lfs_callback __P((BUF *)); | |
67 | static void lfs_gather __P((struct lfs *, | |
68 | SEGMENT *, VNODE *, int (*) __P((struct lfs *, BUF *)))); | |
12304d41 | 69 | static void lfs_initseg __P((struct lfs *, SEGMENT *)); |
dc7e45d3 | 70 | static BUF *lfs_newbuf __P((struct lfs *, SEGMENT *, daddr_t, size_t)); |
12304d41 | 71 | static daddr_t lfs_newseg __P((struct lfs *)); |
0a011bb1 | 72 | static void lfs_updatemeta __P((struct lfs *, |
dc7e45d3 KB |
73 | SEGMENT *, VNODE *, daddr_t *, BUF **, int)); |
74 | static void lfs_writefile __P((struct lfs *, SEGMENT *, VNODE *)); | |
75 | static void lfs_writeinode __P((struct lfs *, SEGMENT *, INODE *)); | |
0a011bb1 | 76 | static void lfs_writeseg __P((struct lfs *, SEGMENT *)); |
dc7e45d3 KB |
77 | static void lfs_writesuper __P((struct lfs *, SEGMENT *)); |
78 | static int match_data __P((struct lfs *, BUF *)); | |
79 | static int match_dindir __P((struct lfs *, BUF *)); | |
80 | static int match_indir __P((struct lfs *, BUF *)); | |
81 | static int match_tindir __P((struct lfs *, BUF *)); | |
275ca4f0 | 82 | static void shellsort __P((BUF **, daddr_t *, register int)); |
84c30241 | 83 | |
dc7e45d3 KB |
84 | int lfs_allclean_wakeup; /* Cleaner wakeup address. */ |
85 | ||
84c30241 | 86 | int |
275ca4f0 | 87 | lfs_segwrite(mp, do_ckp) |
84c30241 | 88 | MOUNT *mp; |
dc7e45d3 | 89 | int do_ckp; /* Do a checkpoint. */ |
84c30241 | 90 | { |
84c30241 | 91 | INODE *ip; |
0a011bb1 | 92 | struct lfs *fs; |
84c30241 KB |
93 | VNODE *vp; |
94 | SEGMENT *sp; | |
12304d41 | 95 | int s, error; |
84c30241 | 96 | |
dc7e45d3 KB |
97 | #ifdef VERBOSE |
98 | printf("lfs_segwrite\n"); | |
aa4dc149 | 99 | #endif |
8954e52c | 100 | /* |
dc7e45d3 KB |
101 | * If doing a checkpoint, we keep a cumulative count of the outstanding |
102 | * I/O operations. If the disk drive catches up with us it could go to | |
103 | * zero before we finish, so we artificially increment it by one until | |
104 | * we've scheduled all of the writes we intend to do. | |
8954e52c | 105 | */ |
12304d41 | 106 | fs = VFSTOUFS(mp)->um_lfs; |
dc7e45d3 KB |
107 | if (do_ckp) { |
108 | s = splbio(); | |
109 | fs->lfs_iocount = 1; | |
110 | splx(s); | |
111 | } | |
aa4dc149 | 112 | |
dc7e45d3 KB |
113 | /* |
114 | * Allocate a segment structure and enough space to hold pointers to | |
115 | * the maximum possible number of buffers which can be described in a | |
116 | * single summary block. | |
117 | */ | |
118 | sp = malloc(sizeof(SEGMENT), M_SEGMENT, M_WAITOK); | |
119 | sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / | |
120 | sizeof(daddr_t) + 1) * sizeof(BUF *), M_SEGMENT, M_WAITOK); | |
121 | sp->seg_flags = do_ckp ? SEGM_CKP : 0; | |
12304d41 | 122 | lfs_initseg(fs, sp); |
84c30241 KB |
123 | loop: |
124 | for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { | |
125 | /* | |
126 | * If the vnode that we are about to sync is no longer | |
127 | * associated with this mount point, start over. | |
128 | */ | |
84c30241 KB |
129 | if (vp->v_mount != mp) |
130 | goto loop; | |
131 | if (VOP_ISLOCKED(vp)) | |
132 | continue; | |
dc7e45d3 | 133 | |
12304d41 KB |
134 | /* |
135 | * Write the inode/file if dirty and it's not the | |
136 | * the IFILE. | |
137 | */ | |
84c30241 | 138 | ip = VTOI(vp); |
dc7e45d3 KB |
139 | if (ip->i_flag & (IMOD | IACC | IUPD | ICHG) == 0 && |
140 | vp->v_dirtyblkhd == NULL || | |
141 | ip->i_number == LFS_IFILE_INUM) | |
84c30241 | 142 | continue; |
dc7e45d3 | 143 | |
84c30241 KB |
144 | if (vget(vp)) |
145 | goto loop; | |
dc7e45d3 | 146 | lfs_writefile(fs, sp, vp); |
12304d41 | 147 | lfs_writeinode(fs, sp, ip); |
84c30241 KB |
148 | vput(vp); |
149 | } | |
dc7e45d3 KB |
150 | if (do_ckp) { |
151 | lfs_writefile(fs, sp, fs->lfs_ivnode); | |
152 | lfs_writeinode(fs, sp, VTOI(fs->lfs_ivnode)); | |
153 | } | |
aa4dc149 KB |
154 | lfs_writeseg(fs, sp); |
155 | ||
275ca4f0 | 156 | /* |
dc7e45d3 KB |
157 | * If the I/O count is non-zero, sleep until it reaches zero. At the |
158 | * moment, the user's process hangs around so we can sleep. | |
275ca4f0 | 159 | */ |
dc7e45d3 KB |
160 | if (do_ckp) { |
161 | s = splbio(); | |
12304d41 KB |
162 | if (--fs->lfs_iocount && |
163 | (error = tsleep(&fs->lfs_iocount, PRIBIO + 1, "sync", 0))) | |
164 | return (error); | |
dc7e45d3 KB |
165 | splx(s); |
166 | lfs_writesuper(fs, sp); | |
167 | } | |
275ca4f0 | 168 | |
dc7e45d3 KB |
169 | (void)free(sp->bpp, M_SEGMENT); |
170 | (void)free(sp, M_SEGMENT); | |
275ca4f0 | 171 | |
dc7e45d3 KB |
172 | /* Wake up any cleaning processes waiting on this file system. */ |
173 | wakeup(&fs->lfs_nextseg); | |
174 | wakeup(&lfs_allclean_wakeup); | |
12304d41 | 175 | printf("sync returned\n"); |
dc7e45d3 | 176 | return (0); |
84c30241 KB |
177 | } |
178 | ||
dc7e45d3 KB |
179 | /* |
180 | * Write the dirty blocks associated with a vnode. | |
181 | */ | |
84c30241 | 182 | static void |
dc7e45d3 | 183 | lfs_writefile(fs, sp, vp) |
0a011bb1 | 184 | struct lfs *fs; |
84c30241 | 185 | SEGMENT *sp; |
dc7e45d3 | 186 | VNODE *vp; |
84c30241 | 187 | { |
dc7e45d3 KB |
188 | struct buf *bp; |
189 | FINFO *fip; | |
190 | IFILE *ifp; | |
191 | ino_t inum; | |
275ca4f0 | 192 | |
dc7e45d3 KB |
193 | #ifdef VERBOSE |
194 | printf("lfs_writefile\n"); | |
195 | #endif | |
196 | inum = VTOI(vp)->i_number; | |
197 | if (vp->v_dirtyblkhd != NULL) { | |
198 | if (sp->seg_bytes_left < fs->lfs_bsize || | |
199 | sp->sum_bytes_left < sizeof(FINFO)) { | |
200 | lfs_writeseg(fs, sp); | |
12304d41 | 201 | lfs_initseg(fs, sp); |
dc7e45d3 KB |
202 | } |
203 | sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t); | |
84c30241 | 204 | |
dc7e45d3 KB |
205 | fip = sp->fip; |
206 | fip->fi_nblocks = 0; | |
207 | if (inum == LFS_IFILE_INUM) | |
208 | fip->fi_version = 1; | |
209 | else { | |
210 | LFS_IENTRY(ifp, fs, inum, bp); | |
211 | fip->fi_version = ifp->if_version; | |
212 | brelse(bp); | |
213 | } | |
214 | fip->fi_ino = inum; | |
84c30241 | 215 | |
dc7e45d3 KB |
216 | /* |
217 | * It may not be necessary to write the meta-data blocks | |
218 | * at this point, as the roll-forward recovery code should | |
219 | * be able to reconstruct the list. | |
220 | */ | |
221 | lfs_gather(fs, sp, vp, match_data); | |
222 | lfs_gather(fs, sp, vp, match_indir); | |
223 | lfs_gather(fs, sp, vp, match_dindir); | |
224 | #ifdef TRIPLE | |
225 | lfs_gather(fs, sp, vp, match_tindir); | |
226 | #endif | |
aa4dc149 | 227 | |
dc7e45d3 KB |
228 | fip = sp->fip; |
229 | #ifdef META | |
230 | printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks); | |
231 | #endif | |
232 | if (fip->fi_nblocks != 0) { | |
233 | ++((SEGSUM *)(sp->segsum))->ss_nfinfo; | |
234 | sp->fip = (FINFO *)((caddr_t)fip + sizeof(FINFO) + | |
235 | sizeof(daddr_t) * (fip->fi_nblocks - 1)); | |
236 | } | |
237 | } | |
12304d41 KB |
238 | } |
239 | ||
240 | static void | |
241 | lfs_writeinode(fs, sp, ip) | |
242 | struct lfs *fs; | |
243 | SEGMENT *sp; | |
244 | INODE *ip; | |
245 | { | |
246 | BUF *bp; | |
247 | daddr_t next_addr; | |
248 | int ndx; | |
249 | ||
250 | #ifdef VERBOSE | |
251 | printf("lfs_writeinode\n"); | |
252 | #endif | |
253 | /* Allocate a new inode block if necessary. */ | |
254 | if (sp->ibp == NULL) { | |
255 | /* Allocate a new segment if necessary. */ | |
256 | if (sp->seg_bytes_left < fs->lfs_bsize || | |
257 | sp->sum_bytes_left < sizeof(daddr_t)) { | |
258 | lfs_writeseg(fs, sp); | |
259 | lfs_initseg(fs, sp); | |
260 | } | |
261 | ||
262 | /* Get next inode block. */ | |
263 | next_addr = fs->lfs_offset; | |
264 | fs->lfs_offset += fsbtodb(fs, 1); | |
265 | sp->ibp = *sp->cbpp++ = | |
266 | lfs_newbuf(fs, sp, next_addr, fs->lfs_bsize); | |
267 | ||
268 | /* Set remaining space counter. */ | |
269 | sp->seg_bytes_left -= fs->lfs_bsize; | |
270 | sp->sum_bytes_left -= sizeof(daddr_t); | |
271 | ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) - | |
272 | sp->ninodes / INOPB(fs) - 1; | |
273 | ((daddr_t *)(sp->segsum))[ndx] = next_addr; | |
274 | } | |
275 | ||
276 | /* Copy the new inode onto the inode page. | |
277 | * XXX | |
278 | * Do struct assignment. | |
279 | */ | |
280 | bp = sp->ibp; | |
281 | bcopy(&ip->i_din, | |
282 | bp->b_un.b_dino + (sp->ninodes % INOPB(fs)), sizeof(DINODE)); | |
283 | ||
284 | /* Increment inode count in segment summary block. */ | |
285 | ++((SEGSUM *)(sp->segsum))->ss_ninos; | |
286 | ||
287 | /* If this page is full, set flag to allocate a new page. */ | |
288 | if (++sp->ninodes % INOPB(fs) == 0) | |
289 | sp->ibp = NULL; | |
290 | ||
291 | /* | |
292 | * If updating the ifile, update the super-block; otherwise, update | |
293 | * the ifile itself. In either case, turn off inode update flags. | |
294 | */ | |
295 | if (ip->i_number == LFS_IFILE_INUM) | |
296 | fs->lfs_idaddr = bp->b_blkno; | |
297 | else | |
298 | lfs_iset(ip, bp->b_blkno, ip->i_atime); | |
299 | ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); | |
275ca4f0 KB |
300 | } |
301 | ||
dc7e45d3 | 302 | static void |
275ca4f0 | 303 | lfs_gather(fs, sp, vp, match) |
0a011bb1 | 304 | struct lfs *fs; |
275ca4f0 KB |
305 | SEGMENT *sp; |
306 | VNODE *vp; | |
dc7e45d3 | 307 | int (*match) __P((struct lfs *, BUF *)); |
275ca4f0 KB |
308 | { |
309 | BUF **bpp, *bp, *nbp; | |
310 | FINFO *fip; | |
311 | INODE *ip; | |
275ca4f0 | 312 | daddr_t *lbp, *start_lbp; |
aa4dc149 KB |
313 | u_long version; |
314 | int s; | |
275ca4f0 | 315 | |
dc7e45d3 KB |
316 | #ifdef VERBOSE |
317 | printf("lfs_gather\n"); | |
318 | #endif | |
275ca4f0 KB |
319 | ip = VTOI(vp); |
320 | bpp = sp->cbpp; | |
321 | fip = sp->fip; | |
275ca4f0 | 322 | start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks]; |
275ca4f0 KB |
323 | |
324 | s = splbio(); | |
325 | for (bp = vp->v_dirtyblkhd; bp; bp = nbp) { | |
326 | nbp = bp->b_blockf; | |
12304d41 KB |
327 | /* |
328 | * XXX | |
329 | * Should probably sleep on any BUSY buffer if | |
330 | * doing an fsync? | |
331 | */ | |
aa4dc149 | 332 | if (bp->b_flags & B_BUSY) |
275ca4f0 | 333 | continue; |
aa4dc149 | 334 | #ifdef DIAGNOSTIC |
dc7e45d3 | 335 | if (!(bp->b_flags & B_DELWRI)) |
12304d41 | 336 | panic("lfs_gather: bp not B_DELWRI"); |
dc7e45d3 | 337 | if (!(bp->b_flags & B_LOCKED)) |
12304d41 | 338 | panic("lfs_gather: bp not B_LOCKED"); |
aa4dc149 | 339 | #endif |
dc7e45d3 | 340 | if (!match(fs, bp)) |
275ca4f0 | 341 | continue; |
aa4dc149 | 342 | |
aa4dc149 | 343 | /* Insert into the buffer list, update the FINFO block. */ |
275ca4f0 | 344 | *sp->cbpp++ = bp; |
aa4dc149 KB |
345 | ++fip->fi_nblocks; |
346 | *lbp++ = bp->b_lblkno; | |
347 | ||
aa4dc149 | 348 | /* |
dc7e45d3 KB |
349 | * If full, finish this segment. We may be doing I/O, so |
350 | * release and reacquire the splbio(). | |
aa4dc149 | 351 | */ |
dc7e45d3 KB |
352 | sp->sum_bytes_left -= sizeof(daddr_t); |
353 | sp->seg_bytes_left -= bp->b_bufsize; | |
aa4dc149 | 354 | if (sp->sum_bytes_left < sizeof(daddr_t) || |
275ca4f0 | 355 | sp->seg_bytes_left < fs->lfs_bsize) { |
275ca4f0 | 356 | splx(s); |
aa4dc149 | 357 | lfs_updatemeta(fs, |
dc7e45d3 | 358 | sp, vp, start_lbp, bpp, lbp - start_lbp); |
275ca4f0 | 359 | |
aa4dc149 KB |
360 | /* Add the current file to the segment summary. */ |
361 | ++((SEGSUM *)(sp->segsum))->ss_nfinfo; | |
275ca4f0 | 362 | |
aa4dc149 | 363 | version = fip->fi_version; |
dc7e45d3 | 364 | lfs_writeseg(fs, sp); |
12304d41 | 365 | lfs_initseg(fs, sp); |
aa4dc149 | 366 | |
275ca4f0 | 367 | fip = sp->fip; |
275ca4f0 | 368 | fip->fi_version = version; |
aa4dc149 | 369 | fip->fi_ino = ip->i_number; |
275ca4f0 | 370 | start_lbp = lbp = fip->fi_blocks; |
aa4dc149 KB |
371 | |
372 | bpp = sp->cbpp; | |
275ca4f0 KB |
373 | s = splbio(); |
374 | } | |
84c30241 | 375 | } |
275ca4f0 | 376 | splx(s); |
dc7e45d3 | 377 | lfs_updatemeta(fs, sp, vp, start_lbp, bpp, lbp - start_lbp); |
84c30241 KB |
378 | } |
379 | ||
84c30241 | 380 | /* |
aa4dc149 | 381 | * Update the metadata that points to the blocks listed in the FINFO |
84c30241 KB |
382 | * array. |
383 | */ | |
275ca4f0 | 384 | static void |
dc7e45d3 | 385 | lfs_updatemeta(fs, sp, vp, lbp, bpp, nblocks) |
0a011bb1 | 386 | struct lfs *fs; |
275ca4f0 | 387 | SEGMENT *sp; |
dc7e45d3 | 388 | VNODE *vp; |
275ca4f0 | 389 | daddr_t *lbp; |
84c30241 | 390 | BUF **bpp; |
275ca4f0 | 391 | int nblocks; |
84c30241 | 392 | { |
12304d41 KB |
393 | SEGUSE *sup; |
394 | BUF *bp; | |
dc7e45d3 KB |
395 | INDIR a[NIADDR], *ap; |
396 | INODE *ip; | |
12304d41 | 397 | daddr_t daddr, lbn, off; |
dc7e45d3 | 398 | int db_per_fsb, error, i, num; |
84c30241 | 399 | |
dc7e45d3 KB |
400 | #ifdef VERBOSE |
401 | printf("lfs_updatemeta\n"); | |
402 | #endif | |
aa4dc149 | 403 | if (nblocks == 0) |
275ca4f0 KB |
404 | return; |
405 | ||
12304d41 | 406 | /* Sort the blocks. */ |
275ca4f0 KB |
407 | shellsort(bpp, lbp, nblocks); |
408 | ||
12304d41 KB |
409 | /* |
410 | * Assign disk addresses, and update references to the logical | |
411 | * block and the segment usage information. | |
412 | */ | |
dc7e45d3 | 413 | db_per_fsb = fsbtodb(fs, 1); |
12304d41 KB |
414 | for (i = nblocks; i--; ++bpp) { |
415 | lbn = *lbp++; | |
416 | (*bpp)->b_blkno = off = fs->lfs_offset; | |
dc7e45d3 | 417 | fs->lfs_offset += db_per_fsb; |
275ca4f0 | 418 | |
dc7e45d3 | 419 | if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num)) |
12304d41 KB |
420 | panic("lfs_updatemeta: lfs_bmaparray returned %d", |
421 | error); | |
dc7e45d3 KB |
422 | #ifdef META |
423 | printf("daddr: %d num: %d\n", daddr, num); | |
424 | if (num != 0) { | |
425 | int x; | |
12304d41 | 426 | printf("array from bmaparray:\n"); |
dc7e45d3 KB |
427 | for (x = 0; x < num; x++) |
428 | printf("\tlbn %d off %d\n", a[x].in_lbn, a[x].in_off); | |
429 | } | |
430 | #endif | |
dc7e45d3 KB |
431 | ip = VTOI(vp); |
432 | switch (num) { | |
433 | case 0: | |
14712628 | 434 | #ifdef META |
dc7e45d3 | 435 | printf("update inode for direct block %d\n", lbn); |
14712628 | 436 | #endif |
12304d41 | 437 | ip->i_db[lbn] = off; |
dc7e45d3 KB |
438 | break; |
439 | case 1: | |
12304d41 | 440 | ip->i_ib[a[0].in_off] = off; |
dc7e45d3 KB |
441 | break; |
442 | default: | |
443 | ap = &a[num - 1]; | |
14712628 | 444 | #ifdef META |
dc7e45d3 KB |
445 | printf("update indirect block %d offset %d\n", |
446 | ap->in_lbn, ap->in_off); | |
14712628 | 447 | #endif |
dc7e45d3 KB |
448 | if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) |
449 | panic("lfs_updatemeta: bread bno %d", | |
450 | ap->in_lbn); | |
12304d41 KB |
451 | bp->b_un.b_daddr[ap->in_off] = off; |
452 | lfs_bwrite(bp); | |
453 | } | |
454 | ||
455 | /* Update segment usage information. */ | |
456 | if (daddr != UNASSIGNED) { | |
457 | LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); | |
458 | sup->su_lastmod = time.tv_sec; | |
459 | #ifdef DIAGNOSTIC | |
460 | if (sup->su_nbytes < fs->lfs_bsize) | |
461 | panic("lfs: negative bytes (segment %d)\n", | |
462 | datosn(fs, daddr)); | |
463 | #endif | |
464 | sup->su_nbytes -= fs->lfs_bsize; | |
275ca4f0 | 465 | lfs_bwrite(bp); |
84c30241 | 466 | } |
84c30241 | 467 | } |
84c30241 KB |
468 | } |
469 | ||
12304d41 KB |
470 | /* |
471 | * Start a new segment. | |
472 | */ | |
dc7e45d3 | 473 | static void |
12304d41 | 474 | lfs_initseg(fs, sp) |
0a011bb1 | 475 | struct lfs *fs; |
275ca4f0 | 476 | SEGMENT *sp; |
84c30241 | 477 | { |
12304d41 KB |
478 | SEGUSE *sup; |
479 | SEGSUM *ssp; | |
480 | struct buf *bp; | |
481 | daddr_t lbn, *lbnp; | |
275ca4f0 | 482 | |
dc7e45d3 | 483 | #ifdef VERBOSE |
12304d41 | 484 | printf("lfs_initseg\n"); |
dc7e45d3 | 485 | #endif |
12304d41 KB |
486 | /* Advance to the next segment. */ |
487 | if (1 || !LFS_PARTIAL_FITS(fs)) { | |
488 | LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_curseg), bp); | |
489 | sup->su_flags &= ~SEGUSE_ACTIVE; | |
490 | lfs_bwrite(bp); | |
491 | fs->lfs_curseg = fs->lfs_offset = fs->lfs_nextseg; | |
492 | fs->lfs_nextseg = lfs_newseg(fs); | |
493 | sp->seg_number = datosn(fs, fs->lfs_curseg); | |
494 | sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; | |
495 | ||
496 | /* | |
497 | * If su_nbytes is non-zero after the segment was cleaned, | |
498 | * the segment contains a super-block. Update offset and | |
499 | * summary address to skip over the superblock. | |
500 | */ | |
501 | LFS_SEGENTRY(sup, fs, sp->seg_number, bp); | |
502 | if (sup->su_nbytes != 0) { | |
503 | fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; | |
504 | sp->seg_bytes_left -= LFS_SBPAD; | |
275ca4f0 | 505 | } |
12304d41 KB |
506 | brelse(bp); |
507 | } else { | |
508 | sp->seg_number = datosn(fs, fs->lfs_curseg); | |
509 | sp->seg_bytes_left = (fs->lfs_dbpseg - | |
510 | (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; | |
511 | } | |
aa4dc149 | 512 | |
12304d41 KB |
513 | sp->ibp = NULL; |
514 | sp->ninodes = 0; | |
aa4dc149 | 515 | |
12304d41 KB |
516 | /* Get a new buffer for SEGSUM and enter it into the buffer list. */ |
517 | sp->cbpp = sp->bpp; | |
518 | *sp->cbpp = lfs_newbuf(fs, sp, fs->lfs_offset, LFS_SUMMARY_SIZE); | |
519 | sp->segsum = (*sp->cbpp)->b_un.b_addr; | |
520 | ++sp->cbpp; | |
521 | fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; | |
aa4dc149 | 522 | |
12304d41 KB |
523 | /* Set point to SEGSUM, initialize it. */ |
524 | ssp = sp->segsum; | |
525 | ssp->ss_next = fs->lfs_nextseg; | |
526 | ssp->ss_create = time.tv_sec; | |
527 | ssp->ss_nfinfo = ssp->ss_ninos = 0; | |
aa4dc149 | 528 | |
12304d41 KB |
529 | /* Set pointer to first FINFO, initialize it. */ |
530 | sp->fip = (FINFO *)(sp->segsum + sizeof(SEGSUM)); | |
531 | sp->fip->fi_nblocks = 0; | |
aa4dc149 | 532 | |
12304d41 KB |
533 | sp->seg_bytes_left -= LFS_SUMMARY_SIZE; |
534 | sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); | |
535 | } | |
aa4dc149 | 536 | |
12304d41 KB |
537 | /* |
538 | * Return the next segment to write. | |
539 | */ | |
540 | static daddr_t | |
541 | lfs_newseg(fs) | |
542 | struct lfs *fs; | |
543 | { | |
544 | SEGUSE *sup; | |
545 | struct buf *bp; | |
546 | int isdirty, segnum, sn; | |
547 | ||
548 | #ifdef VERBOSE | |
549 | printf("lfs_newseg\n"); | |
550 | #endif | |
551 | segnum = datosn(fs, fs->lfs_nextseg); | |
552 | LFS_SEGENTRY(sup, fs, segnum, bp); | |
553 | sup->su_flags |= SEGUSE_ACTIVE; | |
554 | lfs_bwrite(bp); | |
555 | for (sn = segnum;;) { | |
556 | sn = (sn + 1) % fs->lfs_nseg; | |
557 | if (sn == segnum) | |
558 | panic("lfs_nextseg: no clean segments"); | |
559 | LFS_SEGENTRY(sup, fs, sn, bp); | |
560 | isdirty = sup->su_flags & SEGUSE_DIRTY; | |
561 | brelse(bp); | |
562 | if (!isdirty) | |
563 | break; | |
564 | } | |
565 | return (sntoda(fs, sn)); | |
84c30241 KB |
566 | } |
567 | ||
568 | static void | |
569 | lfs_writeseg(fs, sp) | |
0a011bb1 | 570 | struct lfs *fs; |
84c30241 KB |
571 | SEGMENT *sp; |
572 | { | |
12304d41 | 573 | BUF **bpp, *bp; |
84c30241 | 574 | SEGUSE *sup; |
dc7e45d3 KB |
575 | SEGSUM *segp; |
576 | dev_t i_dev; | |
577 | u_long *datap, *dp; | |
aa4dc149 | 578 | void *pmeta; |
dc7e45d3 | 579 | int flags, i, nblocks, s, (*strategy) __P((BUF *)); |
84c30241 | 580 | |
dc7e45d3 KB |
581 | #ifdef VERBOSE |
582 | printf("lfs_writeseg\n"); | |
583 | #endif | |
aa4dc149 KB |
584 | /* Update superblock segment address. */ |
585 | fs->lfs_lastseg = sntoda(fs, sp->seg_number); | |
aa4dc149 | 586 | nblocks = sp->cbpp - sp->bpp; |
dc7e45d3 KB |
587 | |
588 | LFS_SEGENTRY(sup, fs, sp->seg_number, bp); | |
589 | sup->su_nbytes += LFS_SUMMARY_SIZE + (nblocks - 1 << fs->lfs_bshift); | |
84c30241 KB |
590 | sup->su_lastmod = time.tv_sec; |
591 | sup->su_flags = SEGUSE_DIRTY; | |
dc7e45d3 | 592 | lfs_bwrite(bp); |
84c30241 KB |
593 | |
594 | /* | |
12304d41 KB |
595 | * Compute checksum across data and then across summary; |
596 | * the first block (the summary block) is skipped. | |
dc7e45d3 KB |
597 | * |
598 | * XXX | |
599 | * Fix this to do it inline, instead of malloc/copy. | |
84c30241 | 600 | */ |
dc7e45d3 | 601 | datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); |
12304d41 KB |
602 | for (bpp = sp->bpp, i = nblocks - 1; i--;) |
603 | *dp++ = (*++bpp)->b_un.b_words[0]; | |
dc7e45d3 KB |
604 | |
605 | segp = (SEGSUM *)sp->segsum; | |
606 | segp->ss_datasum = cksum(datap, nblocks * sizeof(u_long)); | |
607 | segp->ss_sumsum = cksum(&segp->ss_datasum, | |
608 | LFS_SUMMARY_SIZE - sizeof(segp->ss_sumsum)); | |
dc7e45d3 | 609 | (void)free(datap, M_SEGMENT); |
8954e52c | 610 | |
dc7e45d3 KB |
611 | /* |
612 | * When we gathered the blocks for I/O we did not mark them busy or | |
613 | * remove them from the freelist. As we do this, turn off the B_LOCKED | |
614 | * bit so the future brelse will put them on the LRU list, and add the | |
615 | * B_CALL flags if we're doing a checkpoint so we can count I/O's. LFS | |
616 | * requires that the super blocks (on checkpoint) be written after all | |
617 | * the segment data. | |
618 | */ | |
619 | i_dev = VTOI(fs->lfs_ivnode)->i_dev; | |
620 | strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy; | |
275ca4f0 | 621 | |
8954e52c | 622 | s = splbio(); |
dc7e45d3 KB |
623 | if (sp->seg_flags & SEGM_CKP) { |
624 | fs->lfs_iocount += nblocks; | |
12304d41 | 625 | flags = B_ASYNC | B_BUSY | B_CALL; |
dc7e45d3 | 626 | } else |
12304d41 | 627 | flags = B_ASYNC | B_BUSY; |
dc7e45d3 KB |
628 | for (bpp = sp->bpp, i = nblocks; i--;) { |
629 | bp = *bpp++; | |
630 | bp->b_flags |= flags; | |
12304d41 KB |
631 | bp->b_flags &= |
632 | ~(B_DONE | B_ERROR | B_READ | B_DELWRI | B_LOCKED); | |
dc7e45d3 KB |
633 | bp->b_dev = i_dev; |
634 | bp->b_iodone = lfs_callback; | |
dc7e45d3 KB |
635 | if (!(bp->b_flags & B_NOCACHE)) { |
636 | bremfree(bp); | |
637 | reassignbuf(bp, bp->b_vp); | |
638 | } | |
8954e52c | 639 | } |
dc7e45d3 KB |
640 | splx(s); |
641 | ||
642 | for (bpp = sp->bpp, i = nblocks; i--;) | |
643 | (strategy)(*bpp++); | |
275ca4f0 KB |
644 | } |
645 | ||
646 | static void | |
dc7e45d3 | 647 | lfs_writesuper(fs, sp) |
0a011bb1 | 648 | struct lfs *fs; |
dc7e45d3 | 649 | SEGMENT *sp; |
275ca4f0 KB |
650 | { |
651 | BUF *bp; | |
dc7e45d3 | 652 | dev_t i_dev; |
aa4dc149 | 653 | int (*strategy) __P((BUF *)); |
275ca4f0 | 654 | |
dc7e45d3 KB |
655 | #ifdef VERBOSE |
656 | printf("lfs_writesuper\n"); | |
657 | #endif | |
dc7e45d3 KB |
658 | i_dev = VTOI(fs->lfs_ivnode)->i_dev; |
659 | strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy; | |
14712628 | 660 | |
aa4dc149 | 661 | /* Checksum the superblock and copy it into a buffer. */ |
0a011bb1 | 662 | fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); |
dc7e45d3 KB |
663 | bp = lfs_newbuf(fs, sp, fs->lfs_sboffs[0], LFS_SBPAD); |
664 | *bp->b_un.b_lfs = *fs; | |
275ca4f0 | 665 | |
14712628 | 666 | /* Write the first superblock (wait). */ |
dc7e45d3 | 667 | bp->b_dev = i_dev; |
dc7e45d3 | 668 | bp->b_flags |= B_BUSY; |
12304d41 | 669 | bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); |
aa4dc149 | 670 | (strategy)(bp); |
275ca4f0 | 671 | biowait(bp); |
aa4dc149 | 672 | |
14712628 | 673 | /* Write the second superblock (don't wait). */ |
275ca4f0 | 674 | bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; |
12304d41 | 675 | bp->b_flags |= B_ASYNC | B_BUSY; |
dc7e45d3 | 676 | bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); |
aa4dc149 | 677 | (strategy)(bp); |
275ca4f0 KB |
678 | } |
679 | ||
aa4dc149 KB |
680 | /* |
681 | * Logical block number match routines used when traversing the dirty block | |
682 | * chain. | |
683 | */ | |
34a084a9 | 684 | static int |
dc7e45d3 KB |
685 | match_data(fs, bp) |
686 | struct lfs *fs; | |
275ca4f0 KB |
687 | BUF *bp; |
688 | { | |
aa4dc149 | 689 | return (bp->b_lblkno >= 0); |
275ca4f0 KB |
690 | } |
691 | ||
34a084a9 | 692 | static int |
dc7e45d3 KB |
693 | match_indir(fs, bp) |
694 | struct lfs *fs; | |
275ca4f0 KB |
695 | BUF *bp; |
696 | { | |
dc7e45d3 KB |
697 | int lbn; |
698 | ||
699 | lbn = bp->b_lblkno; | |
700 | return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); | |
275ca4f0 KB |
701 | } |
702 | ||
34a084a9 | 703 | static int |
dc7e45d3 KB |
704 | match_dindir(fs, bp) |
705 | struct lfs *fs; | |
275ca4f0 KB |
706 | BUF *bp; |
707 | { | |
dc7e45d3 KB |
708 | int lbn; |
709 | ||
710 | lbn = bp->b_lblkno; | |
711 | return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); | |
aa4dc149 KB |
712 | } |
713 | ||
dc7e45d3 KB |
714 | static int |
715 | match_tindir(fs, bp) | |
0a011bb1 | 716 | struct lfs *fs; |
dc7e45d3 | 717 | BUF *bp; |
aa4dc149 | 718 | { |
dc7e45d3 | 719 | int lbn; |
aa4dc149 | 720 | |
dc7e45d3 KB |
721 | lbn = bp->b_lblkno; |
722 | return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); | |
723 | } | |
aa4dc149 | 724 | |
dc7e45d3 KB |
725 | /* |
726 | * Allocate a new buffer header. | |
727 | */ | |
728 | static BUF * | |
729 | lfs_newbuf(fs, sp, daddr, size) | |
730 | struct lfs *fs; | |
731 | SEGMENT *sp; | |
732 | daddr_t daddr; | |
733 | size_t size; | |
734 | { | |
735 | BUF *bp; | |
aa4dc149 | 736 | |
dc7e45d3 KB |
737 | #ifdef VERBOSE |
738 | printf("lfs_newbuf\n"); | |
739 | #endif | |
740 | bp = getnewbuf(); | |
741 | bremhash(bp); | |
742 | bgetvp(fs->lfs_ivnode, bp); | |
743 | bp->b_bcount = 0; | |
744 | bp->b_lblkno = daddr; | |
745 | bp->b_blkno = daddr; | |
746 | bp->b_error = 0; | |
747 | bp->b_resid = 0; | |
748 | allocbuf(bp, size); | |
749 | bp->b_flags |= B_NOCACHE; | |
12304d41 | 750 | binshash(bp, &bfreelist[BQ_AGE]); |
dc7e45d3 KB |
751 | return (bp); |
752 | } | |
aa4dc149 | 753 | |
dc7e45d3 KB |
754 | /* |
755 | * The buffer cache callback routine. | |
756 | */ | |
757 | static int /* XXX should be void */ | |
758 | lfs_callback(bp) | |
759 | BUF *bp; | |
760 | { | |
761 | struct lfs *fs; | |
aa4dc149 | 762 | |
dc7e45d3 KB |
763 | fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs; |
764 | #ifdef DIAGNOSTIC | |
765 | if (fs->lfs_iocount == 0) | |
766 | panic("lfs_callback: zero iocount\n"); | |
767 | #endif | |
768 | if (--fs->lfs_iocount == 0) | |
dc7e45d3 | 769 | wakeup(&fs->lfs_iocount); |
12304d41 | 770 | |
dc7e45d3 | 771 | brelse(bp); |
84c30241 KB |
772 | } |
773 | ||
774 | /* | |
775 | * Shellsort (diminishing increment sort) from Data Structures and | |
776 | * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; | |
777 | * see also Knuth Vol. 3, page 84. The increments are selected from | |
778 | * formula (8), page 95. Roughly O(N^3/2). | |
779 | */ | |
780 | /* | |
781 | * This is our own private copy of shellsort because we want to sort | |
782 | * two parallel arrays (the array of buffer pointers and the array of | |
783 | * logical block numbers) simultaneously. Note that we cast the array | |
784 | * of logical block numbers to a unsigned in this routine so that the | |
785 | * negative block numbers (meta data blocks) sort AFTER the data blocks. | |
786 | */ | |
787 | static void | |
788 | shellsort(bp_array, lb_array, nmemb) | |
789 | BUF **bp_array; | |
275ca4f0 | 790 | daddr_t *lb_array; |
84c30241 KB |
791 | register int nmemb; |
792 | { | |
793 | static int __rsshell_increments[] = { 4, 1, 0 }; | |
794 | register int incr, *incrp, t1, t2; | |
795 | BUF *bp_temp; | |
796 | u_long lb_temp; | |
797 | ||
798 | for (incrp = __rsshell_increments; incr = *incrp++;) | |
799 | for (t1 = incr; t1 < nmemb; ++t1) | |
800 | for (t2 = t1 - incr; t2 >= 0;) | |
801 | if (lb_array[t2] > lb_array[t2 + incr]) { | |
802 | lb_temp = lb_array[t2]; | |
803 | lb_array[t2] = lb_array[t2 + incr]; | |
804 | lb_array[t2 + incr] = lb_temp; | |
805 | bp_temp = bp_array[t2]; | |
806 | bp_array[t2] = bp_array[t2 + incr]; | |
807 | bp_array[t2 + incr] = bp_temp; | |
808 | t2 -= incr; | |
809 | } else | |
810 | break; | |
811 | } |