make lockfile generate a more senible error message
[unix-history] / usr / src / sbin / dump / tape.c
CommitLineData
76797561
DF
1/*
2 * Copyright (c) 1980 Regents of the University of California.
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 */
6
7a65e725 7#ifndef lint
c2b1998f 8static char sccsid[] = "@(#)tape.c 5.4 (Berkeley) %G%";
76797561 9#endif not lint
7a65e725 10
09e9de78 11#include <sys/file.h>
a40f6134 12#include "dump.h"
ae4b153c 13
1ddebffe
SL
14char (*tblock)[TP_BSIZE]; /* Pointer to malloc()ed buffer for tape */
15int writesize; /* Size of malloc()ed buffer for tape */
f5bba473 16int trecno = 0;
a40f6134
KM
17extern int ntrec; /* blocking factor on tape */
18extern int cartridge;
19extern int read(), write();
20#ifdef RDUMP
21extern char *host;
22#endif RDUMP
1ddebffe
SL
23
24/*
09e9de78 25 * Concurrent dump mods (Caltech) - disk block reading and tape writing
23b4aba9
KM
26 * are exported to several slave processes. While one slave writes the
27 * tape, the others read disk blocks; they pass control of the tape in
09e9de78 28 * a ring via flock(). The parent process traverses the filesystem and
a40f6134 29 * sends spclrec()'s and lists of daddr's to the slaves via pipes.
23b4aba9
KM
30 */
31struct req { /* instruction packets sent to slaves */
32 daddr_t dblk;
33 int count;
34} *req;
35int reqsiz;
36
09e9de78 37#define SLAVES 3 /* 1 slave writing, 1 reading, 1 for slack */
a40f6134
KM
38int slavefd[SLAVES]; /* pipes from master to each slave */
39int slavepid[SLAVES]; /* used by killall() */
40int rotor; /* next slave to be instructed */
41int master; /* pid of master, for sending error signals */
42int tenths; /* length of tape used per block written */
23b4aba9 43
1ddebffe
SL
44alloctape()
45{
09e9de78 46 int pgoff = getpagesize() - 1;
a40f6134 47
1ddebffe 48 writesize = ntrec * TP_BSIZE;
a40f6134 49 reqsiz = ntrec * sizeof(struct req);
09e9de78 50 /*
a40f6134
KM
51 * CDC 92181's and 92185's make 0.8" gaps in 1600-bpi start/stop mode
52 * (see DEC TU80 User's Guide). The shorter gaps of 6250-bpi require
53 * repositioning after stopping, i.e, streaming mode, where the gap is
54 * variable, 0.30" to 0.45". The gap is maximal when the tape stops.
55 */
56 tenths = writesize/density + (cartridge ? 16 : density == 625 ? 5 : 8);
57 /*
58 * Allocate tape buffer contiguous with the array of instruction
59 * packets, so flusht() can write them together with one write().
60 * Align tape buffer on page boundary to speed up tape write().
09e9de78 61 */
09e9de78
KM
62 req = (struct req *)malloc(reqsiz + writesize + pgoff);
63 if (req == NULL)
64 return(0);
65 tblock = (char (*)[TP_BSIZE]) (((long)&req[ntrec] + pgoff) &~ pgoff);
a40f6134 66 req = (struct req *)tblock - ntrec;
09e9de78 67 return(1);
1ddebffe
SL
68}
69
a40f6134 70
ae4b153c 71taprec(dp)
b6407c9d 72 char *dp;
ae4b153c 73{
23b4aba9
KM
74 req[trecno].dblk = (daddr_t)0;
75 req[trecno].count = 1;
09e9de78
KM
76 *(union u_spcl *)(*tblock++) = *(union u_spcl *)dp; /* movc3 */
77 trecno++;
ae4b153c 78 spcl.c_tapea++;
09e9de78 79 if(trecno >= ntrec)
ae4b153c
BJ
80 flusht();
81}
82
f5bba473
KM
83dmpblk(blkno, size)
84 daddr_t blkno;
85 int size;
ae4b153c 86{
a40f6134 87 int avail, tpblks, dblkno;
f5bba473 88
b6407c9d 89 dblkno = fsbtodb(sblock, blkno);
23b4aba9
KM
90 tpblks = size / TP_BSIZE;
91 while ((avail = MIN(tpblks, ntrec - trecno)) > 0) {
23b4aba9
KM
92 req[trecno].dblk = dblkno;
93 req[trecno].count = avail;
a40f6134 94 trecno += avail;
f5bba473 95 spcl.c_tapea += avail;
a40f6134 96 if (trecno >= ntrec)
23b4aba9 97 flusht();
b6407c9d
KM
98 dblkno += avail * (TP_BSIZE / DEV_BSIZE);
99 tpblks -= avail;
f5bba473 100 }
ae4b153c
BJ
101}
102
103int nogripe = 0;
104
23b4aba9
KM
105tperror() {
106 if (pipeout) {
107 msg("Tape write error on %s\n", tape);
108 msg("Cannot recover\n");
109 dumpabort();
110 /* NOTREACHED */
111 }
a40f6134 112 msg("Tape write error %d feet into tape %d\n", asize/120L, tapeno);
23b4aba9
KM
113 broadcast("TAPE ERROR!\n");
114 if (!query("Do you want to restart?"))
115 dumpabort();
116 msg("This tape will rewind. After it is rewound,\n");
117 msg("replace the faulty tape with a new one;\n");
118 msg("this dump volume will be rewritten.\n");
09e9de78 119 killall();
23b4aba9
KM
120 nogripe = 1;
121 close_rewind();
122 Exit(X_REWRITE);
123}
124
a40f6134 125sigpipe()
23b4aba9 126{
a40f6134
KM
127
128 msg("Broken pipe\n");
129 dumpabort();
130}
131
132#ifdef RDUMP
133/*
134 * compatibility routine
135 */
136tflush(i)
23b4aba9 137 int i;
a40f6134 138{
23b4aba9
KM
139
140 for (i = 0; i < ntrec; i++)
141 spclrec();
142}
143#endif RDUMP
144
ae4b153c
BJ
145flusht()
146{
a40f6134 147 int siz = (char *)tblock - (char *)req;
ae4b153c 148
a40f6134
KM
149 if (atomic(write, slavefd[rotor], req, siz) != siz) {
150 perror(" DUMP: error writing command pipe");
09e9de78
KM
151 dumpabort();
152 }
23b4aba9
KM
153 if (++rotor >= SLAVES) rotor = 0;
154 tblock = (char (*)[TP_BSIZE]) &req[ntrec];
ae4b153c 155 trecno = 0;
09e9de78 156 asize += tenths;
1ddebffe 157 blockswritten += ntrec;
a47b7e40 158 if (!pipeout && asize > tsize) {
ae4b153c
BJ
159 close_rewind();
160 otape();
161 }
162 timeest();
163}
164
165rewind()
166{
09e9de78 167 int f;
a47b7e40
KM
168
169 if (pipeout)
170 return;
23b4aba9
KM
171 for (f = 0; f < SLAVES; f++)
172 close(slavefd[f]);
173 while (wait(NULL) >= 0) ; /* wait for any signals from slaves */
174 msg("Tape rewinding\n");
175#ifdef RDUMP
a40f6134
KM
176 if (host) {
177 rmtclose();
178 while (rmtopen(tape, 0) < 0)
179 sleep(10);
180 rmtclose();
181 return;
182 }
183#endif RDUMP
be3f486f
BJ
184 close(to);
185 while ((f = open(tape, 0)) < 0)
186 sleep (10);
187 close(f);
ae4b153c
BJ
188}
189
190close_rewind()
191{
23b4aba9
KM
192 rewind();
193 if (!nogripe) {
ae4b153c
BJ
194 msg("Change Tapes: Mount tape #%d\n", tapeno+1);
195 broadcast("CHANGE TAPES!\7\7\n");
196 }
23b4aba9 197 while (!query("Is the new tape mounted and ready to go?"))
a40f6134 198 if (query("Do you want to abort?")) {
ae4b153c 199 dumpabort();
a40f6134
KM
200 /*NOTREACHED*/
201 }
ae4b153c
BJ
202}
203
204/*
23b4aba9 205 * We implement taking and restoring checkpoints on the tape level.
ae4b153c
BJ
206 * When each tape is opened, a new process is created by forking; this
207 * saves all of the necessary context in the parent. The child
208 * continues the dump; the parent waits around, saving the context.
209 * If the child returns X_REWRITE, then it had problems writing that tape;
210 * this causes the parent to fork again, duplicating the context, and
211 * everything continues as if nothing had happened.
212 */
213
214otape()
215{
216 int parentpid;
217 int childpid;
218 int status;
219 int waitpid;
a40f6134 220 int (*interrupt)() = signal(SIGINT, SIG_IGN);
ae4b153c 221
ae4b153c
BJ
222 parentpid = getpid();
223
224 restore_check_point:
a40f6134
KM
225 signal(SIGINT, interrupt);
226 /*
227 * All signals are inherited...
228 */
ae4b153c 229 childpid = fork();
23b4aba9 230 if (childpid < 0) {
ae4b153c
BJ
231 msg("Context save fork fails in parent %d\n", parentpid);
232 Exit(X_ABORT);
233 }
23b4aba9 234 if (childpid != 0) {
ae4b153c
BJ
235 /*
236 * PARENT:
237 * save the context by waiting
238 * until the child doing all of the work returns.
23b4aba9 239 * don't catch the interrupt
ae4b153c 240 */
a40f6134 241 signal(SIGINT, SIG_IGN);
ae4b153c
BJ
242#ifdef TDEBUG
243 msg("Tape: %d; parent process: %d child process %d\n",
244 tapeno+1, parentpid, childpid);
245#endif TDEBUG
23b4aba9
KM
246 while ((waitpid = wait(&status)) != childpid)
247 msg("Parent %d waiting for child %d has another child %d return\n",
248 parentpid, childpid, waitpid);
249 if (status & 0xFF) {
ae4b153c
BJ
250 msg("Child %d returns LOB status %o\n",
251 childpid, status&0xFF);
252 }
253 status = (status >> 8) & 0xFF;
254#ifdef TDEBUG
23b4aba9 255 switch(status) {
ae4b153c
BJ
256 case X_FINOK:
257 msg("Child %d finishes X_FINOK\n", childpid);
258 break;
259 case X_ABORT:
260 msg("Child %d finishes X_ABORT\n", childpid);
261 break;
262 case X_REWRITE:
263 msg("Child %d finishes X_REWRITE\n", childpid);
264 break;
265 default:
23b4aba9 266 msg("Child %d finishes unknown %d\n",
a40f6134 267 childpid, status);
ae4b153c
BJ
268 break;
269 }
270#endif TDEBUG
23b4aba9 271 switch(status) {
ae4b153c
BJ
272 case X_FINOK:
273 Exit(X_FINOK);
274 case X_ABORT:
275 Exit(X_ABORT);
276 case X_REWRITE:
277 goto restore_check_point;
278 default:
279 msg("Bad return code from dump: %d\n", status);
280 Exit(X_ABORT);
281 }
282 /*NOTREACHED*/
283 } else { /* we are the child; just continue */
284#ifdef TDEBUG
285 sleep(4); /* allow time for parent's message to get out */
286 msg("Child on Tape %d has parent %d, my pid = %d\n",
287 tapeno+1, parentpid, getpid());
a40f6134 288#endif TDEBUG
23b4aba9 289#ifdef RDUMP
a40f6134
KM
290 while ((to = (host ? rmtopen(tape, 2) :
291 pipeout ? 1 : creat(tape, 0666))) < 0)
292#else RDUMP
23b4aba9 293 while ((to = pipeout ? 1 : creat(tape, 0666)) < 0)
09e9de78 294#endif RDUMP
23b4aba9
KM
295 if (!query("Cannot open tape. Do you want to retry the open?"))
296 dumpabort();
297
298 enslave(); /* Share open tape file descriptor with slaves */
ae4b153c
BJ
299
300 asize = 0;
301 tapeno++; /* current tape sequence */
302 newtape++; /* new tape signal */
303 spcl.c_volume++;
304 spcl.c_type = TS_TAPE;
305 spclrec();
306 if (tapeno > 1)
307 msg("Tape %d begins with blocks from ino %d\n",
308 tapeno, ino);
309 }
310}
311
ae4b153c
BJ
312dumpabort()
313{
23b4aba9 314 if (master != 0 && master != getpid())
a40f6134 315 kill(master, SIGTERM); /* Signals master to call dumpabort */
09e9de78
KM
316 else {
317 killall();
318 msg("The ENTIRE dump is aborted.\n");
319 }
ae4b153c
BJ
320 Exit(X_ABORT);
321}
322
323Exit(status)
324{
325#ifdef TDEBUG
326 msg("pid = %d exits with status %d\n", getpid(), status);
327#endif TDEBUG
ed7c701e 328 exit(status);
ae4b153c 329}
23b4aba9 330
09e9de78 331/*
a40f6134 332 * could use pipe() for this if flock() worked on pipes
09e9de78
KM
333 */
334lockfile(fd)
335 int fd[2];
336{
337 char tmpname[20];
338
339 strcpy(tmpname, "/tmp/dumplockXXXXXX");
340 mktemp(tmpname);
c2b1998f
KM
341 if ((fd[1] = creat(tmpname, 0400)) < 0) {
342 msg("Could not create lockfile ");
343 perror(tmpname);
344 dumpabort();
345 }
346 if ((fd[0] = open(tmpname, 0)) < 0) {
347 msg("Could not reopen lockfile ");
348 perror(tmpname);
349 dumpabort();
350 }
09e9de78 351 unlink(tmpname);
09e9de78 352}
23b4aba9
KM
353
354enslave()
355{
09e9de78
KM
356 int first[2], prev[2], next[2], cmd[2]; /* file descriptors */
357 register int i, j;
23b4aba9
KM
358
359 master = getpid();
a40f6134
KM
360 signal(SIGTERM, dumpabort); /* Slave sends SIGTERM on dumpabort() */
361 signal(SIGPIPE, sigpipe);
362 signal(SIGUSR1, tperror); /* Slave sends SIGUSR1 on tape errors */
09e9de78
KM
363 lockfile(first);
364 for (i = 0; i < SLAVES; i++) {
365 if (i == 0) {
366 prev[0] = first[1];
367 prev[1] = first[0];
368 } else {
369 prev[0] = next[0];
370 prev[1] = next[1];
371 flock(prev[1], LOCK_EX);
372 }
c2b1998f
KM
373 if (i < SLAVES - 1) {
374 lockfile(next);
375 } else {
376 next[0] = first[0];
377 next[1] = first[1]; /* Last slave loops back */
378 }
379 if (pipe(cmd) < 0 || (slavepid[i] = fork()) < 0) {
380 msg("too many slaves, %d (recompile smaller) ", i);
381 perror("");
23b4aba9
KM
382 dumpabort();
383 }
23b4aba9 384 slavefd[i] = cmd[1];
a40f6134 385 if (slavepid[i] == 0) { /* Slave starts up here */
23b4aba9
KM
386 for (j = 0; j <= i; j++)
387 close(slavefd[j]);
a40f6134
KM
388 signal(SIGINT, SIG_IGN); /* Master handles this */
389 doslave(cmd[0], prev, next);
23b4aba9
KM
390 Exit(X_FINOK);
391 }
392 close(cmd[0]);
09e9de78
KM
393 if (i > 0) {
394 close(prev[0]);
395 close(prev[1]);
396 }
23b4aba9 397 }
09e9de78
KM
398 close(first[0]);
399 close(first[1]);
400 master = 0; rotor = 0;
23b4aba9
KM
401}
402
09e9de78 403killall()
87801efd 404{
09e9de78 405 register int i;
87801efd 406
09e9de78
KM
407 for (i = 0; i < SLAVES; i++)
408 if (slavepid[i] > 0)
409 kill(slavepid[i], SIGKILL);
87801efd
KM
410}
411
09e9de78
KM
412/*
413 * Synchronization - each process has a lockfile, and shares file
414 * descriptors to the following process's lockfile. When our write
415 * completes, we release our lock on the following process's lock-
416 * file, allowing the following process to lock it and proceed. We
417 * get the lock back for the next cycle by swapping descriptors.
418 */
a40f6134
KM
419doslave(cmd, prev, next)
420 register int cmd, prev[2], next[2];
23b4aba9 421{
a40f6134 422 register int nread, toggle = 0;
87801efd 423
23b4aba9 424 close(fi);
a40f6134 425 if ((fi = open(disk, 0)) < 0) { /* Need our own seek pointer */
09e9de78 426 perror(" DUMP: slave couldn't reopen disk");
a40f6134 427 dumpabort();
23b4aba9 428 }
09e9de78 429 /*
a40f6134 430 * Get list of blocks to dump, read the blocks into tape buffer
09e9de78 431 */
a40f6134 432 while ((nread = atomic(read, cmd, req, reqsiz)) == reqsiz) {
23b4aba9
KM
433 register struct req *p = req;
434 for (trecno = 0; trecno < ntrec; trecno += p->count, p += p->count) {
435 if (p->dblk) {
23b4aba9 436 bread(p->dblk, tblock[trecno],
a40f6134 437 p->count * TP_BSIZE);
23b4aba9 438 } else {
a40f6134
KM
439 if (p->count != 1 || atomic(read, cmd,
440 tblock[trecno], TP_BSIZE) != TP_BSIZE) {
09e9de78
KM
441 msg("Master/slave protocol botched");
442 dumpabort();
443 }
23b4aba9
KM
444 }
445 }
09e9de78 446 flock(prev[toggle], LOCK_EX); /* Wait our turn */
a40f6134 447
23b4aba9 448#ifdef RDUMP
a40f6134
KM
449 if ((host ? rmtwrite(tblock[0], writesize)
450 : write(to, tblock[0], writesize)) != writesize) {
451#else RDUMP
452 if (write(to, tblock[0], writesize) != writesize) {
09e9de78 453#endif RDUMP
a40f6134
KM
454 kill(master, SIGUSR1);
455 for (;;)
456 sigpause(0);
23b4aba9 457 }
09e9de78
KM
458 toggle ^= 1;
459 flock(next[toggle], LOCK_UN); /* Next slave's turn */
460 } /* Also jolts him awake */
a40f6134
KM
461 if (nread != 0) {
462 perror(" DUMP: error reading command pipe");
463 dumpabort();
23b4aba9 464 }
23b4aba9 465}
ca485693
KM
466
467/*
a40f6134
KM
468 * Since a read from a pipe may not return all we asked for,
469 * or a write may not write all we ask if we get a signal,
470 * loop until the count is satisfied (or error).
ca485693 471 */
a40f6134
KM
472atomic(func, fd, buf, count)
473 int (*func)(), fd, count;
ca485693 474 char *buf;
ca485693 475{
a40f6134
KM
476 int got, need = count;
477
478 while ((got = (*func)(fd, buf, need)) > 0 && (need -= got) > 0)
ca485693 479 buf += got;
a40f6134 480 return (got < 0 ? got : count - need);
ca485693 481}