Commit | Line | Data |
---|---|---|
461723e7 KM |
1 | /*- |
2 | * Copyright (c) 1980, 1991 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * %sccs.include.redist.c% | |
76797561 DF |
6 | */ |
7 | ||
7a65e725 | 8 | #ifndef lint |
2cc3129b | 9 | static char sccsid[] = "@(#)tape.c 5.23 (Berkeley) %G%"; |
332edf81 | 10 | #endif /* not lint */ |
7a65e725 | 11 | |
9e7586cd | 12 | #ifdef sunos |
7dd80c8d | 13 | #include <sys/param.h> |
9e7586cd KM |
14 | #include <stdio.h> |
15 | #include <ctype.h> | |
16 | #include <sys/stat.h> | |
17 | #include <sys/time.h> | |
18 | #include <sys/dir.h> | |
19 | #include <sys/vnode.h> | |
20 | #include <ufs/inode.h> | |
3d66b39b | 21 | #include <ufs/fs.h> |
9e7586cd | 22 | #else |
13298603 | 23 | #include <sys/param.h> |
332edf81 | 24 | #include <sys/wait.h> |
3d66b39b KB |
25 | #include <ufs/ufs/dinode.h> |
26 | #include <ufs/ffs/fs.h> | |
9e7586cd | 27 | #endif |
13298603 | 28 | #include <signal.h> |
1db7a225 | 29 | #include <fcntl.h> |
13298603 KB |
30 | #include <protocols/dumprestore.h> |
31 | #include <errno.h> | |
cdff0ca6 | 32 | #include <setjmp.h> |
13298603 KB |
33 | #ifdef __STDC__ |
34 | #include <unistd.h> | |
35 | #include <stdlib.h> | |
36 | #include <string.h> | |
37 | #endif | |
cdff0ca6 | 38 | #include <sys/socket.h> |
13298603 | 39 | #include "dump.h" |
e9a09562 | 40 | #include "pathnames.h" |
ae4b153c | 41 | |
c4c501b5 KM |
42 | int writesize; /* size of malloc()ed buffer for tape */ |
43 | long lastspclrec = -1; /* tape block number of last written header */ | |
44 | int trecno = 0; /* next record to write in current block */ | |
70c0f96a | 45 | extern long blocksperfile; /* number of blocks per output file */ |
dd80a182 MK |
46 | long blocksthisvol; /* number of blocks on current output file */ |
47 | extern int ntrec; /* blocking factor on tape */ | |
48 | extern int cartridge; | |
cdff0ca6 | 49 | extern char *host; |
1ff6a29e | 50 | char *nexttape; |
a40f6134 | 51 | #ifdef RDUMP |
332edf81 CT |
52 | int rmtopen(), rmtwrite(); |
53 | void rmtclose(); | |
a40f6134 | 54 | #endif RDUMP |
cdff0ca6 | 55 | void rollforward(); |
332edf81 | 56 | int atomic(); |
9548dea4 | 57 | void doslave(), enslave(), flushtape(), killall(); |
332edf81 | 58 | |
1ddebffe | 59 | /* |
09e9de78 | 60 | * Concurrent dump mods (Caltech) - disk block reading and tape writing |
23b4aba9 KM |
61 | * are exported to several slave processes. While one slave writes the |
62 | * tape, the others read disk blocks; they pass control of the tape in | |
cdff0ca6 | 63 | * a ring via signals. The parent process traverses the filesystem and |
9548dea4 | 64 | * sends writeheader()'s and lists of daddr's to the slaves via pipes. |
cdff0ca6 | 65 | * The following structure defines the instruction packets sent to slaves. |
23b4aba9 | 66 | */ |
cdff0ca6 | 67 | struct req { |
23b4aba9 KM |
68 | daddr_t dblk; |
69 | int count; | |
cdff0ca6 | 70 | }; |
23b4aba9 KM |
71 | int reqsiz; |
72 | ||
09e9de78 | 73 | #define SLAVES 3 /* 1 slave writing, 1 reading, 1 for slack */ |
cdff0ca6 KM |
74 | struct slave { |
75 | int tapea; /* header number at start of this chunk */ | |
76 | int count; /* count to next header (used for TS_TAPE */ | |
77 | /* after EOT) */ | |
78 | int inode; /* inode that we are currently dealing with */ | |
79 | int fd; /* FD for this slave */ | |
80 | int pid; /* PID for this slave */ | |
81 | int sent; /* 1 == we've sent this slave requests */ | |
82 | int firstrec; /* record number of this block */ | |
83 | char (*tblock)[TP_BSIZE]; /* buffer for data blocks */ | |
84 | struct req *req; /* buffer for requests */ | |
85 | } slaves[SLAVES+1]; | |
86 | struct slave *slp; | |
87 | ||
88 | char (*nextblock)[TP_BSIZE]; | |
89 | ||
90 | int master; /* pid of master, for sending error signals */ | |
91 | int tenths; /* length of tape used per block written */ | |
92 | static int caught; /* have we caught the signal to proceed? */ | |
93 | static int ready; /* have we reached the lock point without having */ | |
94 | /* received the SIGUSR2 signal from the prev slave? */ | |
95 | static jmp_buf jmpbuf; /* where to jump to if we are ready when the */ | |
96 | /* SIGUSR2 arrives from the previous slave */ | |
23b4aba9 | 97 | |
332edf81 | 98 | int |
1ddebffe SL |
99 | alloctape() |
100 | { | |
09e9de78 | 101 | int pgoff = getpagesize() - 1; |
cdff0ca6 KM |
102 | char *buf; |
103 | int i; | |
a40f6134 | 104 | |
1ddebffe | 105 | writesize = ntrec * TP_BSIZE; |
cdff0ca6 | 106 | reqsiz = (ntrec + 1) * sizeof(struct req); |
09e9de78 | 107 | /* |
a40f6134 KM |
108 | * CDC 92181's and 92185's make 0.8" gaps in 1600-bpi start/stop mode |
109 | * (see DEC TU80 User's Guide). The shorter gaps of 6250-bpi require | |
110 | * repositioning after stopping, i.e, streaming mode, where the gap is | |
111 | * variable, 0.30" to 0.45". The gap is maximal when the tape stops. | |
112 | */ | |
1ff6a29e MK |
113 | if (blocksperfile == 0) |
114 | tenths = writesize / density + | |
115 | (cartridge ? 16 : density == 625 ? 5 : 8); | |
a40f6134 KM |
116 | /* |
117 | * Allocate tape buffer contiguous with the array of instruction | |
9548dea4 | 118 | * packets, so flushtape() can write them together with one write(). |
a40f6134 | 119 | * Align tape buffer on page boundary to speed up tape write(). |
09e9de78 | 120 | */ |
cdff0ca6 KM |
121 | for (i = 0; i <= SLAVES; i++) { |
122 | buf = (char *) malloc(reqsiz + writesize + pgoff + TP_BSIZE); | |
123 | if (buf == NULL) | |
124 | return(0); | |
125 | slaves[i].tblock = (char (*)[TP_BSIZE]) | |
126 | (((long)&buf[ntrec + 1] + pgoff) &~ pgoff); | |
127 | slaves[i].req = (struct req *)slaves[i].tblock - ntrec - 1; | |
128 | } | |
129 | slp = &slaves[0]; | |
130 | slp->count = 1; | |
131 | slp->tapea = 0; | |
132 | slp->firstrec = 0; | |
133 | nextblock = slp->tblock; | |
09e9de78 | 134 | return(1); |
1ddebffe SL |
135 | } |
136 | ||
332edf81 | 137 | void |
9548dea4 | 138 | writerec(dp) |
b6407c9d | 139 | char *dp; |
ae4b153c | 140 | { |
cdff0ca6 KM |
141 | |
142 | slp->req[trecno].dblk = (daddr_t)0; | |
143 | slp->req[trecno].count = 1; | |
144 | *(union u_spcl *)(*(nextblock)++) = *(union u_spcl *)dp; | |
c4c501b5 | 145 | lastspclrec = spcl.c_tapea; |
09e9de78 | 146 | trecno++; |
ae4b153c | 147 | spcl.c_tapea++; |
332edf81 | 148 | if (trecno >= ntrec) |
9548dea4 | 149 | flushtape(); |
ae4b153c BJ |
150 | } |
151 | ||
332edf81 | 152 | void |
9548dea4 | 153 | dumpblock(blkno, size) |
f5bba473 KM |
154 | daddr_t blkno; |
155 | int size; | |
ae4b153c | 156 | { |
a40f6134 | 157 | int avail, tpblks, dblkno; |
f5bba473 | 158 | |
b6407c9d | 159 | dblkno = fsbtodb(sblock, blkno); |
332edf81 | 160 | tpblks = size >> tp_bshift; |
23b4aba9 | 161 | while ((avail = MIN(tpblks, ntrec - trecno)) > 0) { |
cdff0ca6 KM |
162 | slp->req[trecno].dblk = dblkno; |
163 | slp->req[trecno].count = avail; | |
a40f6134 | 164 | trecno += avail; |
f5bba473 | 165 | spcl.c_tapea += avail; |
a40f6134 | 166 | if (trecno >= ntrec) |
9548dea4 | 167 | flushtape(); |
332edf81 | 168 | dblkno += avail << (tp_bshift - dev_bshift); |
b6407c9d | 169 | tpblks -= avail; |
f5bba473 | 170 | } |
ae4b153c BJ |
171 | } |
172 | ||
173 | int nogripe = 0; | |
174 | ||
332edf81 CT |
175 | void |
176 | tperror() | |
177 | { | |
cdff0ca6 | 178 | |
23b4aba9 | 179 | if (pipeout) { |
70c0f96a | 180 | msg("write error on %s\n", tape); |
332edf81 | 181 | quit("Cannot recover\n"); |
23b4aba9 KM |
182 | /* NOTREACHED */ |
183 | } | |
dd80a182 | 184 | msg("write error %d blocks into volume %d\n", blocksthisvol, tapeno); |
70c0f96a | 185 | broadcast("DUMP WRITE ERROR!\n"); |
23b4aba9 KM |
186 | if (!query("Do you want to restart?")) |
187 | dumpabort(); | |
70c0f96a | 188 | msg("Closing this volume. Prepare to restart with new media;\n"); |
23b4aba9 | 189 | msg("this dump volume will be rewritten.\n"); |
09e9de78 | 190 | killall(); |
23b4aba9 KM |
191 | nogripe = 1; |
192 | close_rewind(); | |
193 | Exit(X_REWRITE); | |
194 | } | |
195 | ||
332edf81 | 196 | void |
a40f6134 | 197 | sigpipe() |
23b4aba9 | 198 | { |
a40f6134 | 199 | |
332edf81 | 200 | quit("Broken pipe\n"); |
a40f6134 KM |
201 | } |
202 | ||
332edf81 | 203 | void |
9548dea4 | 204 | flushtape() |
ae4b153c | 205 | { |
cdff0ca6 KM |
206 | int i, blks, got; |
207 | long lastfirstrec; | |
13298603 | 208 | #ifndef __STDC__ |
cdff0ca6 | 209 | int write(), read(); |
13298603 KB |
210 | #endif |
211 | ||
cdff0ca6 | 212 | int siz = (char *)nextblock - (char *)slp->req; |
ae4b153c | 213 | |
cdff0ca6 KM |
214 | slp->req[trecno].count = 0; /* Sentinel */ |
215 | ||
216 | if (atomic(write, slp->fd, slp->req, siz) != siz) | |
332edf81 | 217 | quit("error writing command pipe: %s\n", strerror(errno)); |
cdff0ca6 KM |
218 | slp->sent = 1; /* we sent a request, read the response later */ |
219 | ||
220 | lastfirstrec = slp->firstrec; | |
221 | ||
222 | if (++slp >= &slaves[SLAVES]) | |
223 | slp = &slaves[0]; | |
224 | ||
225 | /* Read results back from next slave */ | |
226 | if (slp->sent) { | |
227 | if (atomic(read, slp->fd, &got, sizeof got) != sizeof got) { | |
228 | perror(" DUMP: error reading command pipe in master"); | |
229 | dumpabort(); | |
230 | } | |
231 | slp->sent = 0; | |
232 | ||
233 | /* Check for end of tape */ | |
234 | if (got < writesize) { | |
235 | msg("End of tape detected\n"); | |
236 | ||
237 | /* | |
238 | * Drain the results, don't care what the values were. | |
239 | * If we read them here then trewind won't... | |
240 | */ | |
241 | for (i = 0; i < SLAVES; i++) { | |
242 | if (slaves[i].sent) { | |
243 | if (atomic(read, slaves[i].fd, &got, | |
244 | sizeof got) != sizeof got) { | |
245 | perror(" DUMP: error reading command pipe in master"); | |
246 | dumpabort(); | |
247 | } | |
248 | slaves[i].sent = 0; | |
249 | } | |
250 | } | |
251 | ||
252 | close_rewind(); | |
253 | rollforward(); | |
254 | return; | |
255 | } | |
256 | } | |
257 | ||
258 | blks = 0; | |
259 | if (spcl.c_type != TS_END) { | |
260 | for (i = 0; i < spcl.c_count; i++) | |
261 | if (spcl.c_addr[i] != 0) | |
262 | blks++; | |
263 | } | |
264 | slp->count = lastspclrec + blks + 1 - spcl.c_tapea; | |
265 | slp->tapea = spcl.c_tapea; | |
266 | slp->firstrec = lastfirstrec + ntrec; | |
267 | slp->inode = curino; | |
268 | nextblock = slp->tblock; | |
ae4b153c | 269 | trecno = 0; |
09e9de78 | 270 | asize += tenths; |
1ddebffe | 271 | blockswritten += ntrec; |
dd80a182 | 272 | blocksthisvol += ntrec; |
70c0f96a | 273 | if (!pipeout && (blocksperfile ? |
dd80a182 | 274 | (blocksthisvol >= blocksperfile) : (asize > tsize))) { |
ae4b153c | 275 | close_rewind(); |
cdff0ca6 | 276 | startnewtape(0); |
ae4b153c BJ |
277 | } |
278 | timeest(); | |
279 | } | |
280 | ||
332edf81 | 281 | void |
1db7a225 | 282 | trewind() |
ae4b153c | 283 | { |
09e9de78 | 284 | int f; |
cdff0ca6 | 285 | int got; |
a47b7e40 KM |
286 | |
287 | if (pipeout) | |
288 | return; | |
cdff0ca6 KM |
289 | for (f = 0; f < SLAVES; f++) { |
290 | /* | |
291 | * Drain the results, but unlike EOT we DO (or should) care | |
292 | * what the return values were, since if we detect EOT after | |
293 | * we think we've written the last blocks to the tape anyway, | |
294 | * we have to replay those blocks with rollforward. | |
295 | * | |
296 | * fixme: punt for now. | |
297 | */ | |
298 | if (slaves[f].sent) { | |
299 | if (atomic(read, slaves[f].fd, &got, sizeof got) | |
300 | != sizeof got) { | |
301 | perror(" DUMP: error reading command pipe in master"); | |
302 | dumpabort(); | |
303 | } | |
304 | slaves[f].sent = 0; | |
305 | if (got != writesize) { | |
306 | msg("EOT detected in last 2 tape records!\n"); | |
307 | msg("Use a longer tape, decrease the size estimate\n"); | |
308 | quit("or use no size estimate at all.\n"); | |
309 | } | |
310 | } | |
311 | close(slaves[f].fd); | |
312 | } | |
332edf81 CT |
313 | while (wait((int *)NULL) >= 0) /* wait for any signals from slaves */ |
314 | /* void */; | |
dd80a182 | 315 | msg("Closing %s\n", tape); |
cdff0ca6 | 316 | |
23b4aba9 | 317 | #ifdef RDUMP |
a40f6134 KM |
318 | if (host) { |
319 | rmtclose(); | |
320 | while (rmtopen(tape, 0) < 0) | |
321 | sleep(10); | |
322 | rmtclose(); | |
323 | return; | |
324 | } | |
cdff0ca6 | 325 | #endif |
9548dea4 | 326 | close(tapefd); |
be3f486f BJ |
327 | while ((f = open(tape, 0)) < 0) |
328 | sleep (10); | |
329 | close(f); | |
ae4b153c BJ |
330 | } |
331 | ||
332edf81 | 332 | void |
ae4b153c BJ |
333 | close_rewind() |
334 | { | |
1db7a225 | 335 | trewind(); |
dd80a182 MK |
336 | if (nexttape) |
337 | return; | |
23b4aba9 | 338 | if (!nogripe) { |
70c0f96a KM |
339 | msg("Change Volumes: Mount volume #%d\n", tapeno+1); |
340 | broadcast("CHANGE DUMP VOLUMES!\7\7\n"); | |
ae4b153c | 341 | } |
dd80a182 | 342 | while (!query("Is the new volume mounted and ready to go?")) |
a40f6134 | 343 | if (query("Do you want to abort?")) { |
ae4b153c | 344 | dumpabort(); |
a40f6134 KM |
345 | /*NOTREACHED*/ |
346 | } | |
ae4b153c BJ |
347 | } |
348 | ||
cdff0ca6 KM |
349 | #ifdef ROLLDEBUG |
350 | int do_sum(block) | |
351 | union u_spcl *block; | |
352 | ||
353 | { | |
354 | char sum = 0; | |
355 | int i; | |
356 | ||
357 | for (i = 0; i < TP_BSIZE; i++) { | |
358 | sum = sum ^ block->dummy[i]; | |
359 | } | |
360 | return(sum); | |
361 | } | |
362 | #endif | |
363 | ||
364 | void | |
365 | rollforward() | |
366 | { | |
367 | register struct req *p, *q, *prev; | |
368 | register struct slave *tslp; | |
369 | int i, next, size, savedtapea, got; | |
370 | union u_spcl *ntb, *otb; | |
371 | #ifdef ROLLDEBUG | |
372 | int j; | |
373 | #endif | |
374 | tslp = &slaves[SLAVES]; | |
375 | ntb = (union u_spcl *)tslp->tblock[1]; | |
376 | ||
377 | /* | |
378 | * Each of the N slaves should have requests that need to | |
379 | * be replayed on the next tape. Use the extra slave buffers | |
380 | * (slaves[SLAVES]) to construct request lists to be sent to | |
381 | * each slave in turn. | |
382 | */ | |
383 | for (i = 0; i < SLAVES; i++) { | |
384 | q = &tslp->req[1]; | |
385 | otb = (union u_spcl *)slp->tblock; | |
386 | ||
387 | /* | |
388 | * For each request in the current slave, copy it to tslp. | |
389 | */ | |
390 | #ifdef ROLLDEBUG | |
391 | printf("replaying reqs to slave %d (%d)\n", slp - &slaves[0], | |
392 | slp->pid); | |
393 | j = 0; | |
394 | #endif | |
395 | ||
396 | for (p = slp->req; p->count > 0; p += p->count) { | |
397 | #ifdef ROLLDEBUG | |
398 | printf(" req %d count %d dblk %d\n", | |
399 | j++, p->count, p->dblk); | |
400 | if (p->dblk == 0) | |
401 | printf("\tsum %x\n", do_sum(otb)); | |
402 | #endif | |
403 | *q = *p; | |
404 | if (p->dblk == 0) | |
405 | *ntb++ = *otb++; /* copy the datablock also */ | |
406 | prev = q; | |
407 | q += q->count; | |
408 | } | |
409 | if (prev->dblk != 0) | |
410 | prev->count -= 1; | |
411 | else | |
412 | ntb--; | |
413 | q -= 1; | |
414 | q->count = 0; | |
415 | q = &tslp->req[0]; | |
416 | if (i == 0) { | |
417 | q->dblk = 0; | |
418 | q->count = 1; | |
419 | trecno = 0; | |
420 | nextblock = tslp->tblock; | |
421 | savedtapea = spcl.c_tapea; | |
422 | spcl.c_tapea = slp->tapea; | |
423 | startnewtape(0); | |
424 | spcl.c_tapea = savedtapea; | |
425 | lastspclrec = savedtapea - 1; | |
426 | } | |
427 | size = (char *)ntb - (char *)q; | |
428 | if (atomic(write, slp->fd, q, size) != size) { | |
429 | perror(" DUMP: error writing command pipe"); | |
430 | dumpabort(); | |
431 | } | |
432 | slp->sent = 1; | |
433 | #ifdef ROLLDEBUG | |
434 | printf("after the shift:\n"); | |
435 | j = 0; | |
436 | for (p = tslp->req; p->count > 0; p += p->count) { | |
437 | printf(" req %d count %d dblk %d\n", | |
438 | j++, p->count, p->dblk); | |
439 | if (p->dblk == 0) { | |
440 | /* dump block also */ | |
441 | } | |
442 | } | |
443 | #endif | |
444 | if (++slp >= &slaves[SLAVES]) | |
445 | slp = &slaves[0]; | |
446 | ||
447 | q->count = 1; | |
448 | ||
449 | if (prev->dblk != 0) { | |
450 | /* | |
451 | * If the last one was a disk block, make the | |
452 | * first of this one be the last bit of that disk | |
453 | * block... | |
454 | */ | |
455 | q->dblk = prev->dblk + | |
456 | prev->count * (TP_BSIZE / DEV_BSIZE); | |
457 | ntb = (union u_spcl *)tslp->tblock; | |
458 | } else { | |
459 | /* | |
460 | * It wasn't a disk block. Copy the data to its | |
461 | * new location in the buffer. | |
462 | */ | |
463 | q->dblk = 0; | |
464 | *((union u_spcl *)tslp->tblock) = *ntb; | |
465 | ntb = (union u_spcl *)tslp->tblock[1]; | |
466 | } | |
467 | } | |
468 | slp->req[0] = *q; | |
469 | nextblock = slp->tblock; | |
470 | if (q->dblk == 0) | |
471 | nextblock++; | |
472 | trecno = 1; | |
473 | ||
474 | /* | |
475 | * Clear the first slaves' response. One hopes that it | |
476 | * worked ok, otherwise the tape is much too short! | |
477 | */ | |
478 | if (slp->sent) { | |
479 | if (atomic(read, slp->fd, &got, sizeof got) != sizeof got) { | |
480 | perror(" DUMP: error reading command pipe in master"); | |
481 | dumpabort(); | |
482 | } | |
483 | slp->sent = 0; | |
484 | ||
485 | if (got != writesize) { | |
486 | quit("EOT detected at start of the tape!\n"); | |
487 | } | |
488 | } | |
489 | } | |
490 | ||
ae4b153c | 491 | /* |
cdff0ca6 KM |
492 | * We implement taking and restoring checkpoints on the tape level. |
493 | * When each tape is opened, a new process is created by forking; this | |
494 | * saves all of the necessary context in the parent. The child | |
495 | * continues the dump; the parent waits around, saving the context. | |
496 | * If the child returns X_REWRITE, then it had problems writing that tape; | |
497 | * this causes the parent to fork again, duplicating the context, and | |
498 | * everything continues as if nothing had happened. | |
ae4b153c | 499 | */ |
332edf81 | 500 | void |
cdff0ca6 KM |
501 | startnewtape(top) |
502 | int top; | |
ae4b153c BJ |
503 | { |
504 | int parentpid; | |
505 | int childpid; | |
506 | int status; | |
507 | int waitpid; | |
cdff0ca6 | 508 | int i; |
1ff6a29e | 509 | char *p; |
7dd80c8d KM |
510 | #ifdef sunos |
511 | void (*interrupt)(); | |
512 | char *index(); | |
513 | #else | |
514 | sig_t interrupt; | |
515 | #endif | |
ae4b153c | 516 | |
392fe950 | 517 | interrupt = signal(SIGINT, SIG_IGN); |
ae4b153c BJ |
518 | parentpid = getpid(); |
519 | ||
520 | restore_check_point: | |
392fe950 | 521 | (void)signal(SIGINT, interrupt); |
a40f6134 KM |
522 | /* |
523 | * All signals are inherited... | |
524 | */ | |
ae4b153c | 525 | childpid = fork(); |
23b4aba9 | 526 | if (childpid < 0) { |
ae4b153c BJ |
527 | msg("Context save fork fails in parent %d\n", parentpid); |
528 | Exit(X_ABORT); | |
529 | } | |
23b4aba9 | 530 | if (childpid != 0) { |
ae4b153c BJ |
531 | /* |
532 | * PARENT: | |
533 | * save the context by waiting | |
534 | * until the child doing all of the work returns. | |
23b4aba9 | 535 | * don't catch the interrupt |
ae4b153c | 536 | */ |
a40f6134 | 537 | signal(SIGINT, SIG_IGN); |
ae4b153c BJ |
538 | #ifdef TDEBUG |
539 | msg("Tape: %d; parent process: %d child process %d\n", | |
540 | tapeno+1, parentpid, childpid); | |
541 | #endif TDEBUG | |
23b4aba9 KM |
542 | while ((waitpid = wait(&status)) != childpid) |
543 | msg("Parent %d waiting for child %d has another child %d return\n", | |
544 | parentpid, childpid, waitpid); | |
545 | if (status & 0xFF) { | |
ae4b153c BJ |
546 | msg("Child %d returns LOB status %o\n", |
547 | childpid, status&0xFF); | |
548 | } | |
549 | status = (status >> 8) & 0xFF; | |
550 | #ifdef TDEBUG | |
23b4aba9 | 551 | switch(status) { |
ae4b153c BJ |
552 | case X_FINOK: |
553 | msg("Child %d finishes X_FINOK\n", childpid); | |
554 | break; | |
cdff0ca6 | 555 | case X_ABORT: |
ae4b153c BJ |
556 | msg("Child %d finishes X_ABORT\n", childpid); |
557 | break; | |
558 | case X_REWRITE: | |
559 | msg("Child %d finishes X_REWRITE\n", childpid); | |
560 | break; | |
561 | default: | |
23b4aba9 | 562 | msg("Child %d finishes unknown %d\n", |
a40f6134 | 563 | childpid, status); |
ae4b153c BJ |
564 | break; |
565 | } | |
566 | #endif TDEBUG | |
23b4aba9 | 567 | switch(status) { |
ae4b153c BJ |
568 | case X_FINOK: |
569 | Exit(X_FINOK); | |
570 | case X_ABORT: | |
571 | Exit(X_ABORT); | |
572 | case X_REWRITE: | |
573 | goto restore_check_point; | |
574 | default: | |
575 | msg("Bad return code from dump: %d\n", status); | |
576 | Exit(X_ABORT); | |
577 | } | |
578 | /*NOTREACHED*/ | |
579 | } else { /* we are the child; just continue */ | |
580 | #ifdef TDEBUG | |
581 | sleep(4); /* allow time for parent's message to get out */ | |
582 | msg("Child on Tape %d has parent %d, my pid = %d\n", | |
583 | tapeno+1, parentpid, getpid()); | |
a40f6134 | 584 | #endif TDEBUG |
1ff6a29e MK |
585 | /* |
586 | * If we have a name like "/dev/rmt0,/dev/rmt1", | |
587 | * use the name before the comma first, and save | |
dd80a182 | 588 | * the remaining names for subsequent volumes. |
1ff6a29e | 589 | */ |
cdff0ca6 | 590 | tapeno++; /* current tape sequence */ |
dd80a182 MK |
591 | if (nexttape || index(tape, ',')) { |
592 | if (nexttape && *nexttape) | |
593 | tape = nexttape; | |
594 | if (p = index(tape, ',')) { | |
595 | *p = '\0'; | |
596 | nexttape = p + 1; | |
597 | } else | |
598 | nexttape = NULL; | |
599 | msg("Dumping volume %d on %s\n", tapeno, tape); | |
600 | } | |
23b4aba9 | 601 | #ifdef RDUMP |
9548dea4 KM |
602 | while ((tapefd = (host ? rmtopen(tape, 2) : |
603 | pipeout ? 1 : open(tape, O_WRONLY|O_CREAT, 0666))) < 0) | |
cdff0ca6 KM |
604 | #else |
605 | while ((tapefd = (pipeout ? 1 : | |
606 | open(tape, O_WRONLY|O_CREAT, 0666))) < 0) | |
607 | #endif | |
e9a09562 | 608 | { |
70c0f96a | 609 | msg("Cannot open output \"%s\".\n", tape); |
e9a09562 | 610 | if (!query("Do you want to retry the open?")) |
23b4aba9 | 611 | dumpabort(); |
e9a09562 | 612 | } |
23b4aba9 KM |
613 | |
614 | enslave(); /* Share open tape file descriptor with slaves */ | |
ae4b153c BJ |
615 | |
616 | asize = 0; | |
dd80a182 | 617 | blocksthisvol = 0; |
cdff0ca6 KM |
618 | if (top) |
619 | newtape++; /* new tape signal */ | |
620 | spcl.c_count = slp->count; | |
621 | /* | |
622 | * measure firstrec in TP_BSIZE units since restore doesn't | |
623 | * know the correct ntrec value... | |
624 | */ | |
625 | spcl.c_firstrec = slp->firstrec; | |
ae4b153c BJ |
626 | spcl.c_volume++; |
627 | spcl.c_type = TS_TAPE; | |
022f1b15 | 628 | spcl.c_flags |= DR_NEWHEADER; |
cdff0ca6 | 629 | writeheader(slp->inode); |
022f1b15 | 630 | spcl.c_flags &=~ DR_NEWHEADER; |
ae4b153c | 631 | if (tapeno > 1) |
dd80a182 | 632 | msg("Volume %d begins with blocks from inode %d\n", |
cdff0ca6 | 633 | tapeno, slp->inode); |
ae4b153c BJ |
634 | } |
635 | } | |
636 | ||
332edf81 | 637 | void |
ae4b153c BJ |
638 | dumpabort() |
639 | { | |
cdff0ca6 | 640 | |
23b4aba9 | 641 | if (master != 0 && master != getpid()) |
a40f6134 | 642 | kill(master, SIGTERM); /* Signals master to call dumpabort */ |
09e9de78 KM |
643 | else { |
644 | killall(); | |
645 | msg("The ENTIRE dump is aborted.\n"); | |
646 | } | |
ae4b153c BJ |
647 | Exit(X_ABORT); |
648 | } | |
649 | ||
332edf81 | 650 | void |
ae4b153c | 651 | Exit(status) |
1db7a225 | 652 | int status; |
ae4b153c | 653 | { |
cdff0ca6 | 654 | |
ae4b153c BJ |
655 | #ifdef TDEBUG |
656 | msg("pid = %d exits with status %d\n", getpid(), status); | |
657 | #endif TDEBUG | |
ed7c701e | 658 | exit(status); |
ae4b153c | 659 | } |
23b4aba9 | 660 | |
09e9de78 | 661 | /* |
cdff0ca6 | 662 | * proceed - handler for SIGUSR2, used to synchronize IO between the slaves. |
09e9de78 | 663 | */ |
332edf81 | 664 | void |
cdff0ca6 | 665 | proceed() |
09e9de78 | 666 | { |
cdff0ca6 KM |
667 | |
668 | if (ready) | |
669 | longjmp(jmpbuf, 1); | |
670 | caught++; | |
09e9de78 | 671 | } |
23b4aba9 | 672 | |
332edf81 | 673 | void |
23b4aba9 KM |
674 | enslave() |
675 | { | |
cdff0ca6 | 676 | int cmd[2]; |
09e9de78 | 677 | register int i, j; |
23b4aba9 KM |
678 | |
679 | master = getpid(); | |
cdff0ca6 KM |
680 | |
681 | signal(SIGTERM, dumpabort); /* Slave sends SIGTERM on dumpabort() */ | |
a40f6134 KM |
682 | signal(SIGPIPE, sigpipe); |
683 | signal(SIGUSR1, tperror); /* Slave sends SIGUSR1 on tape errors */ | |
cdff0ca6 KM |
684 | signal(SIGUSR2, proceed); /* Slave sends SIGUSR2 to next slave */ |
685 | ||
09e9de78 | 686 | for (i = 0; i < SLAVES; i++) { |
cdff0ca6 KM |
687 | if (i == slp - &slaves[0]) { |
688 | caught = 1; | |
09e9de78 | 689 | } else { |
cdff0ca6 | 690 | caught = 0; |
09e9de78 | 691 | } |
cdff0ca6 KM |
692 | |
693 | if (socketpair(AF_UNIX, SOCK_STREAM, 0, cmd) < 0 || | |
694 | (slaves[i].pid = fork()) < 0) | |
332edf81 CT |
695 | quit("too many slaves, %d (recompile smaller): %s\n", |
696 | i, strerror(errno)); | |
cdff0ca6 KM |
697 | |
698 | slaves[i].fd = cmd[1]; | |
699 | slaves[i].sent = 0; | |
700 | if (slaves[i].pid == 0) { /* Slave starts up here */ | |
23b4aba9 | 701 | for (j = 0; j <= i; j++) |
cdff0ca6 | 702 | close(slaves[j].fd); |
a40f6134 | 703 | signal(SIGINT, SIG_IGN); /* Master handles this */ |
cdff0ca6 | 704 | doslave(cmd[0], i); |
23b4aba9 KM |
705 | Exit(X_FINOK); |
706 | } | |
23b4aba9 | 707 | } |
cdff0ca6 KM |
708 | |
709 | for (i = 0; i < SLAVES; i++) | |
710 | atomic(write, slaves[i].fd, &slaves[(i + 1) % SLAVES].pid, | |
711 | sizeof slaves[0].pid); | |
712 | ||
713 | master = 0; | |
23b4aba9 KM |
714 | } |
715 | ||
332edf81 | 716 | void |
09e9de78 | 717 | killall() |
87801efd | 718 | { |
09e9de78 | 719 | register int i; |
87801efd | 720 | |
09e9de78 | 721 | for (i = 0; i < SLAVES; i++) |
cdff0ca6 KM |
722 | if (slaves[i].pid > 0) |
723 | kill(slaves[i].pid, SIGKILL); | |
87801efd KM |
724 | } |
725 | ||
09e9de78 KM |
726 | /* |
727 | * Synchronization - each process has a lockfile, and shares file | |
728 | * descriptors to the following process's lockfile. When our write | |
729 | * completes, we release our lock on the following process's lock- | |
730 | * file, allowing the following process to lock it and proceed. We | |
731 | * get the lock back for the next cycle by swapping descriptors. | |
732 | */ | |
332edf81 | 733 | void |
cdff0ca6 KM |
734 | doslave(cmd, slave_number) |
735 | register int cmd; | |
736 | int slave_number; | |
23b4aba9 | 737 | { |
cdff0ca6 KM |
738 | register int nread; |
739 | int nextslave, size, wrote, eot_count; | |
13298603 KB |
740 | #ifndef __STDC__ |
741 | int read(); | |
742 | #endif | |
cdff0ca6 KM |
743 | #ifdef ROLLDEBUG |
744 | int dodump = 2; | |
745 | FILE *out; | |
746 | char name[64]; | |
747 | #endif | |
87801efd | 748 | |
9548dea4 KM |
749 | /* |
750 | * Need our own seek pointer. | |
751 | */ | |
752 | close(diskfd); | |
753 | if ((diskfd = open(disk, O_RDONLY)) < 0) | |
332edf81 | 754 | quit("slave couldn't reopen disk: %s\n", strerror(errno)); |
cdff0ca6 KM |
755 | |
756 | /* | |
757 | * Need the pid of the next slave in the loop... | |
758 | */ | |
759 | if ((nread = atomic(read, cmd, &nextslave, sizeof nextslave)) | |
760 | != sizeof nextslave) { | |
761 | quit("master/slave protocol botched - didn't get pid of next slave.\n"); | |
762 | } | |
763 | ||
764 | #ifdef ROLLDEBUG | |
765 | sprintf(name, "slave.%d", slave_number); | |
766 | out = fopen(name, "w"); | |
767 | #endif | |
09e9de78 | 768 | /* |
a40f6134 | 769 | * Get list of blocks to dump, read the blocks into tape buffer |
09e9de78 | 770 | */ |
cdff0ca6 KM |
771 | while ((nread = atomic(read, cmd, slp->req, reqsiz)) == reqsiz) { |
772 | register struct req *p = slp->req; | |
773 | int j; | |
774 | struct req *rover; | |
775 | char (*orover)[TP_BSIZE]; | |
776 | ||
777 | j = 0; | |
778 | for (trecno = 0; trecno < ntrec; | |
779 | trecno += p->count, p += p->count) { | |
23b4aba9 | 780 | if (p->dblk) { |
cdff0ca6 | 781 | bread(p->dblk, slp->tblock[trecno], |
a40f6134 | 782 | p->count * TP_BSIZE); |
23b4aba9 | 783 | } else { |
a40f6134 | 784 | if (p->count != 1 || atomic(read, cmd, |
cdff0ca6 | 785 | slp->tblock[trecno], TP_BSIZE) != TP_BSIZE) |
332edf81 | 786 | quit("master/slave protocol botched.\n"); |
23b4aba9 | 787 | } |
cdff0ca6 KM |
788 | #ifdef ROLLDEBUG |
789 | if (dodump) { | |
790 | fprintf(out, " req %d count %d dblk %d\n", | |
791 | j++, p->count, p->dblk); | |
792 | if (p->dblk == 0) { | |
793 | fprintf(out, "\tsum %x\n", | |
794 | do_sum(slp->tblock[trecno])); | |
795 | } | |
796 | } | |
797 | #endif | |
798 | } | |
799 | #ifdef ROLLDEBUG | |
800 | if (dodump) { | |
801 | fprintf(out, "\n"); | |
23b4aba9 | 802 | } |
cdff0ca6 KM |
803 | if (--dodump == 0) { |
804 | fclose(out); | |
805 | } | |
806 | #endif | |
807 | if (setjmp(jmpbuf) == 0) { | |
808 | ready = 1; | |
809 | if (!caught) | |
810 | pause(); | |
811 | } | |
812 | ready = 0; | |
813 | caught = 0; | |
814 | ||
815 | /* Try to write the data... */ | |
816 | eot_count = 0; | |
817 | size = 0; | |
a40f6134 | 818 | |
cdff0ca6 | 819 | while (eot_count < 10 && size < writesize) { |
23b4aba9 | 820 | #ifdef RDUMP |
cdff0ca6 KM |
821 | if (host) |
822 | wrote = rmtwrite(slp->tblock[0]+size, | |
823 | writesize-size); | |
dd80a182 | 824 | else |
cdff0ca6 KM |
825 | #endif |
826 | wrote = write(tapefd, slp->tblock[0]+size, | |
827 | writesize-size); | |
828 | #ifdef WRITEDEBUG | |
829 | printf("slave %d wrote %d\n", slave_number, wrote); | |
830 | #endif | |
831 | if (wrote < 0) | |
832 | break; | |
833 | if (wrote == 0) | |
834 | eot_count++; | |
835 | size += wrote; | |
836 | } | |
837 | ||
838 | #ifdef WRITEDEBUG | |
839 | if (size != writesize) | |
840 | printf("slave %d only wrote %d out of %d bytes and gave up.\n", | |
841 | slave_number, size, writesize); | |
842 | #endif | |
843 | ||
844 | if (eot_count > 0) | |
845 | size = 0; | |
846 | ||
847 | /* | |
848 | * fixme: Pyramids running OSx return ENOSPC | |
849 | * at EOT on 1/2 inch drives. | |
850 | */ | |
851 | if (size < 0) { | |
a40f6134 KM |
852 | kill(master, SIGUSR1); |
853 | for (;;) | |
854 | sigpause(0); | |
cdff0ca6 KM |
855 | } else { |
856 | /* | |
857 | * pass size of write back to master | |
858 | * (for EOT handling) | |
859 | */ | |
860 | atomic(write, cmd, &size, sizeof size); | |
861 | } | |
862 | ||
863 | /* | |
864 | * If partial write, don't want next slave to go. | |
865 | * Also jolts him awake. | |
866 | */ | |
867 | kill(nextslave, SIGUSR2); | |
868 | } | |
332edf81 CT |
869 | if (nread != 0) |
870 | quit("error reading command pipe: %s\n", strerror(errno)); | |
23b4aba9 | 871 | } |
ca485693 KM |
872 | |
873 | /* | |
a40f6134 KM |
874 | * Since a read from a pipe may not return all we asked for, |
875 | * or a write may not write all we ask if we get a signal, | |
876 | * loop until the count is satisfied (or error). | |
ca485693 | 877 | */ |
332edf81 | 878 | int |
a40f6134 KM |
879 | atomic(func, fd, buf, count) |
880 | int (*func)(), fd, count; | |
ca485693 | 881 | char *buf; |
ca485693 | 882 | { |
a40f6134 KM |
883 | int got, need = count; |
884 | ||
885 | while ((got = (*func)(fd, buf, need)) > 0 && (need -= got) > 0) | |
ca485693 | 886 | buf += got; |
a40f6134 | 887 | return (got < 0 ? got : count - need); |
ca485693 | 888 | } |