4.4BSD snapshot (revision 8.1)
[unix-history] / usr / src / usr.bin / checknr / checknr.c
CommitLineData
fcd2465c 1/*
14c01716
KB
2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
f7a60349 4 *
f15db449 5 * %sccs.include.redist.c%
fcd2465c
DF
6 */
7
8#ifndef lint
14c01716
KB
9static char copyright[] =
10"@(#) Copyright (c) 1980, 1993\n\
11 The Regents of the University of California. All rights reserved.\n";
f7a60349 12#endif /* not lint */
fcd2465c 13
94a7a178 14#ifndef lint
14c01716 15static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) %G%";
f7a60349 16#endif /* not lint */
fcd2465c 17
b5da8a99
BJ
18/*
19 * checknr: check an nroff/troff input file for matching macro calls.
20 * we also attempt to match size and font changes, but only the embedded
21 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
22 * later but for now think of these restrictions as contributions to
23 * structured typesetting.
24 */
25#include <stdio.h>
26#include <ctype.h>
27
28#define MAXSTK 100 /* Stack size */
29#define MAXBR 100 /* Max number of bracket pairs known */
30#define MAXCMDS 500 /* Max number of commands known */
31
32/*
33 * The stack on which we remember what we've seen so far.
34 */
35struct stkstr {
36 int opno; /* number of opening bracket */
37 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
38 int parm; /* parm to size, font, etc */
39 int lno; /* line number the thing came in in */
40} stk[MAXSTK];
41int stktop;
42
43/*
44 * The kinds of opening and closing brackets.
45 */
46struct brstr {
47 char *opbr;
48 char *clbr;
49} br[MAXBR] = {
50 /* A few bare bones troff commands */
51#define SZ 0
52 "sz", "sz", /* also \s */
53#define FT 1
54 "ft", "ft", /* also \f */
e1062a05
BJ
55 /* the -mm package */
56 "AL", "LE",
57 "AS", "AE",
58 "BL", "LE",
59 "BS", "BE",
60 "DF", "DE",
61 "DL", "LE",
62 "DS", "DE",
63 "FS", "FE",
64 "ML", "LE",
65 "NS", "NE",
66 "RL", "LE",
67 "VL", "LE",
b5da8a99
BJ
68 /* the -ms package */
69 "AB", "AE",
94a7a178 70 "BD", "DE",
b5da8a99 71 "CD", "DE",
e1062a05
BJ
72 "DS", "DE",
73 "FS", "FE",
b5da8a99 74 "ID", "DE",
b5da8a99 75 "KF", "KE",
e1062a05
BJ
76 "KS", "KE",
77 "LD", "DE",
78 "LG", "NL",
b5da8a99 79 "QS", "QE",
e1062a05
BJ
80 "RS", "RE",
81 "SM", "NL",
94a7a178
SL
82 "XA", "XE",
83 "XS", "XE",
b5da8a99 84 /* The -me package */
b5da8a99 85 "(b", ")b",
b5da8a99
BJ
86 "(c", ")c",
87 "(d", ")d",
88 "(f", ")f",
e1062a05
BJ
89 "(l", ")l",
90 "(q", ")q",
b5da8a99 91 "(x", ")x",
e1062a05
BJ
92 "(z", ")z",
93 /* Things needed by preprocessors */
94 "EQ", "EN",
95 "TS", "TE",
96 /* Refer */
97 "[", "]",
b5da8a99
BJ
98 0, 0
99};
100
101/*
e1062a05 102 * All commands known to nroff, plus macro packages.
b5da8a99
BJ
103 * Used so we can complain about unrecognized commands.
104 */
105char *knowncmds[MAXCMDS] = {
e1062a05
BJ
106"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
107"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
108"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
109"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
94a7a178
SL
110"AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
111"BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
112"D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
113"EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
114"FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
115"IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
116"LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
117"MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
118"P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
119"RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
420b269d
JB
120"SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
121"TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
122"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
123"[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
124"][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
125"bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
126"ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
127"ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
128"fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
129"ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
130"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
131"n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
132"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
133"q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
134"sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
135"ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
136"yr", 0
b5da8a99
BJ
137};
138
139int lineno; /* current line number in input file */
140char line[256]; /* the current line */
141char *cfilename; /* name of current file */
142int nfiles; /* number of files to process */
143int fflag; /* -f: ignore \f */
144int sflag; /* -s: ignore \s */
145int ncmds; /* size of knowncmds */
146int slot; /* slot in knowncmds found by binsrch */
147
148char *malloc();
149
150main(argc, argv)
151int argc;
152char **argv;
153{
154 FILE *f;
155 int i;
156 char *cp;
205b7e14 157 char b1[4];
b5da8a99 158
205b7e14
MH
159 /* Figure out how many known commands there are */
160 while (knowncmds[ncmds])
161 ncmds++;
b5da8a99
BJ
162 while (argc > 1 && argv[1][0] == '-') {
163 switch(argv[1][1]) {
205b7e14
MH
164
165 /* -a: add pairs of macros */
b5da8a99 166 case 'a':
b5da8a99 167 i = strlen(argv[1]) - 2;
205b7e14
MH
168 if (i % 6 != 0)
169 usage();
b5da8a99
BJ
170 /* look for empty macro slots */
171 for (i=0; br[i].opbr; i++)
172 ;
173 for (cp=argv[1]+3; cp[-1]; cp += 6) {
205b7e14
MH
174 br[i].opbr = malloc(3);
175 strncpy(br[i].opbr, cp, 2);
176 br[i].clbr = malloc(3);
177 strncpy(br[i].clbr, cp+3, 2);
178 addmac(br[i].opbr); /* knows pairs are also known cmds */
179 addmac(br[i].clbr);
b5da8a99
BJ
180 i++;
181 }
182 break;
205b7e14
MH
183
184 /* -c: add known commands */
185 case 'c':
186 i = strlen(argv[1]) - 2;
187 if (i % 3 != 0)
188 usage();
189 for (cp=argv[1]+3; cp[-1]; cp += 3) {
190 if (cp[2] && cp[2] != '.')
191 usage();
192 strncpy(b1, cp, 2);
193 addmac(b1);
194 }
195 break;
196
197 /* -f: ignore font changes */
b5da8a99
BJ
198 case 'f':
199 fflag = 1;
200 break;
205b7e14
MH
201
202 /* -s: ignore size changes */
b5da8a99
BJ
203 case 's':
204 sflag = 1;
205 break;
206 default:
205b7e14 207 usage();
b5da8a99
BJ
208 }
209 argc--; argv++;
210 }
211
212 nfiles = argc - 1;
213
214 if (nfiles > 0) {
215 for (i=1; i<argc; i++) {
216 cfilename = argv[i];
217 f = fopen(cfilename, "r");
218 if (f == NULL)
219 perror(cfilename);
220 else
221 process(f);
222 }
223 } else {
224 cfilename = "stdin";
225 process(stdin);
226 }
227 exit(0);
228}
229
205b7e14
MH
230usage()
231{
232 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
233 exit(1);
234}
235
b5da8a99
BJ
236process(f)
237FILE *f;
238{
239 register int i, n;
240 char mac[5]; /* The current macro or nroff command */
241 int pl;
242
243 stktop = -1;
244 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
245 if (line[0] == '.') {
246 /*
247 * find and isolate the macro/command name.
248 */
249 strncpy(mac, line+1, 4);
250 if (isspace(mac[0])) {
251 pe(lineno);
252 printf("Empty command\n");
253 } else if (isspace(mac[1])) {
254 mac[1] = 0;
255 } else if (isspace(mac[2])) {
256 mac[2] = 0;
e1062a05 257 } else if (mac[0] != '\\' || mac[1] != '\"') {
b5da8a99
BJ
258 pe(lineno);
259 printf("Command too long\n");
260 }
261
262 /*
263 * Is it a known command?
264 */
265 checkknown(mac);
266
267 /*
268 * Should we add it?
269 */
270 if (eq(mac, "de"))
271 addcmd(line);
272
273 chkcmd(line, mac);
274 }
275
276 /*
277 * At this point we process the line looking
278 * for \s and \f.
279 */
280 for (i=0; line[i]; i++)
281 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
282 if (!sflag && line[++i]=='s') {
283 pl = line[++i];
284 if (isdigit(pl)) {
285 n = pl - '0';
286 pl = ' ';
287 } else
288 n = 0;
289 while (isdigit(line[++i]))
290 n = 10 * n + line[i] - '0';
291 i--;
292 if (n == 0) {
293 if (stk[stktop].opno == SZ) {
294 stktop--;
295 } else {
296 pe(lineno);
297 printf("unmatched \\s0\n");
298 }
299 } else {
300 stk[++stktop].opno = SZ;
301 stk[stktop].pl = pl;
302 stk[stktop].parm = n;
303 stk[stktop].lno = lineno;
304 }
305 } else if (!fflag && line[i]=='f') {
306 n = line[++i];
307 if (n == 'P') {
308 if (stk[stktop].opno == FT) {
309 stktop--;
310 } else {
311 pe(lineno);
312 printf("unmatched \\fP\n");
313 }
314 } else {
315 stk[++stktop].opno = FT;
316 stk[stktop].pl = 1;
317 stk[stktop].parm = n;
318 stk[stktop].lno = lineno;
319 }
320 }
321 }
322 }
323 /*
324 * We've hit the end and look at all this stuff that hasn't been
325 * matched yet! Complain, complain.
326 */
327 for (i=stktop; i>=0; i--) {
328 complain(i);
329 }
330}
331
332complain(i)
333{
334 pe(stk[i].lno);
335 printf("Unmatched ");
336 prop(i);
337 printf("\n");
338}
339
340prop(i)
341{
342 if (stk[i].pl == 0)
343 printf(".%s", br[stk[i].opno].opbr);
344 else switch(stk[i].opno) {
345 case SZ:
346 printf("\\s%c%d", stk[i].pl, stk[i].parm);
347 break;
348 case FT:
349 printf("\\f%c", stk[i].parm);
350 break;
351 default:
352 printf("Bug: stk[%d].opno = %d = .%s, .%s",
353 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
354 }
355}
356
357chkcmd(line, mac)
358char *line;
359char *mac;
360{
361 register int i, n;
362
363 /*
364 * Check to see if it matches top of stack.
365 */
366 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
367 stktop--; /* OK. Pop & forget */
368 else {
369 /* No. Maybe it's an opener */
370 for (i=0; br[i].opbr; i++) {
371 if (eq(mac, br[i].opbr)) {
372 /* Found. Push it. */
373 stktop++;
374 stk[stktop].opno = i;
375 stk[stktop].pl = 0;
376 stk[stktop].parm = 0;
377 stk[stktop].lno = lineno;
378 break;
379 }
380 /*
381 * Maybe it's an unmatched closer.
382 * NOTE: this depends on the fact
383 * that none of the closers can be
384 * openers too.
385 */
386 if (eq(mac, br[i].clbr)) {
387 nomatch(mac);
388 break;
389 }
390 }
391 }
392}
393
394nomatch(mac)
395char *mac;
396{
397 register int i, j;
398
399 /*
400 * Look for a match further down on stack
401 * If we find one, it suggests that the stuff in
402 * between is supposed to match itself.
403 */
404 for (j=stktop; j>=0; j--)
405 if (eq(mac,br[stk[j].opno].clbr)) {
406 /* Found. Make a good diagnostic. */
407 if (j == stktop-2) {
408 /*
409 * Check for special case \fx..\fR and don't
410 * complain.
411 */
412 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
413 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
414 stktop = j -1;
415 return;
416 }
417 /*
418 * We have two unmatched frobs. Chances are
419 * they were intended to match, so we mention
420 * them together.
421 */
422 pe(stk[j+1].lno);
423 prop(j+1);
424 printf(" does not match %d: ", stk[j+2].lno);
425 prop(j+2);
426 printf("\n");
427 } else for (i=j+1; i <= stktop; i++) {
428 complain(i);
429 }
430 stktop = j-1;
431 return;
432 }
433 /* Didn't find one. Throw this away. */
434 pe(lineno);
435 printf("Unmatched .%s\n", mac);
436}
437
438/* eq: are two strings equal? */
439eq(s1, s2)
440char *s1, *s2;
441{
442 return (strcmp(s1, s2) == 0);
443}
444
445/* print the first part of an error message, given the line number */
446pe(lineno)
447int lineno;
448{
449 if (nfiles > 1)
450 printf("%s: ", cfilename);
451 printf("%d: ", lineno);
452}
453
454checkknown(mac)
455char *mac;
456{
b5da8a99
BJ
457
458 if (eq(mac, "."))
459 return;
460 if (binsrch(mac) >= 0)
461 return;
e1062a05
BJ
462 if (mac[0] == '\\' && mac[1] == '"') /* comments */
463 return;
b5da8a99
BJ
464
465 pe(lineno);
466 printf("Unknown command: .%s\n", mac);
467}
468
469/*
470 * We have a .de xx line in "line". Add xx to the list of known commands.
471 */
472addcmd(line)
473char *line;
474{
475 char *mac;
b5da8a99
BJ
476
477 /* grab the macro being defined */
478 mac = line+4;
479 while (isspace(*mac))
480 mac++;
481 if (*mac == 0) {
482 pe(lineno);
483 printf("illegal define: %s\n", line);
484 return;
485 }
486 mac[2] = 0;
487 if (isspace(mac[1]) || mac[1] == '\\')
488 mac[1] = 0;
489 if (ncmds >= MAXCMDS) {
490 printf("Only %d known commands allowed\n", MAXCMDS);
491 exit(1);
492 }
205b7e14
MH
493 addmac(mac);
494}
495
496/*
497 * Add mac to the list. We should really have some kind of tree
498 * structure here but this is a quick-and-dirty job and I just don't
499 * have time to mess with it. (I wonder if this will come back to haunt
500 * me someday?) Anyway, I claim that .de is fairly rare in user
501 * nroff programs, and the register loop below is pretty fast.
502 */
503addmac(mac)
504char *mac;
505{
506 register char **src, **dest, **loc;
507
6f145a48
RH
508 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
509#ifdef DEBUG
510 printf("binsrch(%s) -> already in table\n", mac);
511#endif DEBUG
512 return;
513 }
b5da8a99 514 /* binsrch sets slot as a side effect */
205b7e14
MH
515#ifdef DEBUG
516printf("binsrch(%s) -> %d\n", mac, slot);
517#endif
b5da8a99
BJ
518 loc = &knowncmds[slot];
519 src = &knowncmds[ncmds-1];
520 dest = src+1;
521 while (dest > loc)
522 *dest-- = *src--;
523 *loc = malloc(3);
524 strcpy(*loc, mac);
525 ncmds++;
205b7e14
MH
526#ifdef DEBUG
527printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
528#endif
b5da8a99
BJ
529}
530
531/*
532 * Do a binary search in knowncmds for mac.
533 * If found, return the index. If not, return -1.
534 */
535binsrch(mac)
536char *mac;
537{
538 register char *p; /* pointer to current cmd in list */
539 register int d; /* difference if any */
540 register int mid; /* mid point in binary search */
541 register int top, bot; /* boundaries of bin search, inclusive */
542
543 top = ncmds-1;
544 bot = 0;
545 while (top >= bot) {
546 mid = (top+bot)/2;
547 p = knowncmds[mid];
548 d = p[0] - mac[0];
549 if (d == 0)
550 d = p[1] - mac[1];
551 if (d == 0)
552 return mid;
553 if (d < 0)
554 bot = mid + 1;
555 else
556 top = mid - 1;
557 }
558 slot = bot; /* place it would have gone */
559 return -1;
560}