added .DT macro which it didn't know about (-ms)
[unix-history] / usr / src / usr.bin / checknr / checknr.c
CommitLineData
7f56a5b5 1static char *sccsid = "@(#)checknr.c 4.4 (Berkeley) %G%";
b5da8a99
BJ
2/*
3 * checknr: check an nroff/troff input file for matching macro calls.
4 * we also attempt to match size and font changes, but only the embedded
5 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
6 * later but for now think of these restrictions as contributions to
7 * structured typesetting.
8 */
9#include <stdio.h>
10#include <ctype.h>
11
12#define MAXSTK 100 /* Stack size */
13#define MAXBR 100 /* Max number of bracket pairs known */
14#define MAXCMDS 500 /* Max number of commands known */
15
16/*
17 * The stack on which we remember what we've seen so far.
18 */
19struct stkstr {
20 int opno; /* number of opening bracket */
21 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
22 int parm; /* parm to size, font, etc */
23 int lno; /* line number the thing came in in */
24} stk[MAXSTK];
25int stktop;
26
27/*
28 * The kinds of opening and closing brackets.
29 */
30struct brstr {
31 char *opbr;
32 char *clbr;
33} br[MAXBR] = {
34 /* A few bare bones troff commands */
35#define SZ 0
36 "sz", "sz", /* also \s */
37#define FT 1
38 "ft", "ft", /* also \f */
e1062a05
BJ
39 /* the -mm package */
40 "AL", "LE",
41 "AS", "AE",
42 "BL", "LE",
43 "BS", "BE",
44 "DF", "DE",
45 "DL", "LE",
46 "DS", "DE",
47 "FS", "FE",
48 "ML", "LE",
49 "NS", "NE",
50 "RL", "LE",
51 "VL", "LE",
b5da8a99
BJ
52 /* the -ms package */
53 "AB", "AE",
b5da8a99 54 "CD", "DE",
e1062a05
BJ
55 "DS", "DE",
56 "FS", "FE",
b5da8a99 57 "ID", "DE",
b5da8a99 58 "KF", "KE",
e1062a05
BJ
59 "KS", "KE",
60 "LD", "DE",
61 "LG", "NL",
b5da8a99 62 "QS", "QE",
e1062a05
BJ
63 "RS", "RE",
64 "SM", "NL",
b5da8a99 65 /* The -me package */
b5da8a99 66 "(b", ")b",
b5da8a99
BJ
67 "(c", ")c",
68 "(d", ")d",
69 "(f", ")f",
e1062a05
BJ
70 "(l", ")l",
71 "(q", ")q",
b5da8a99 72 "(x", ")x",
e1062a05
BJ
73 "(z", ")z",
74 /* Things needed by preprocessors */
75 "EQ", "EN",
76 "TS", "TE",
77 /* Refer */
78 "[", "]",
b5da8a99
BJ
79 0, 0
80};
81
82/*
e1062a05 83 * All commands known to nroff, plus macro packages.
b5da8a99
BJ
84 * Used so we can complain about unrecognized commands.
85 */
86char *knowncmds[MAXCMDS] = {
e1062a05
BJ
87"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
88"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
89"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
90"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
91"AE", "AF", "AI", "AL", "AS", "AT", "AU", "AX", "B", "B1", "B2", "BD",
92"BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D",
7f56a5b5 93"DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", "EN", "EQ",
e1062a05
BJ
94"EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS",
95"FV", "FX", "H", "HC", "HM", "HO", "HU", "I", "ID", "IE", "IH", "IM",
96"IP", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", "LC", "LD", "LE", "LG",
97"LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH",
98"NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", "PF", "PH", "PP", "PT",
99"PY", "QE", "QP", "QS", "R", "RA", "RC", "RE", "RL", "RP", "RQ", "RS",
100"RT", "S", "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", "SP", "SY",
101"TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", "TR", "TS",
102"TX", "UL", "US", "UX", "VL", "WC", "WH", "XD", "XF", "XK", "XP", "[", "[-",
103"[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
104"][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
105"bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
106"ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
107"ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
108"fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
109"ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
110"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
111"n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
112"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
113"q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
114"sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
115"ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", "yr",
b5da8a99
BJ
1160
117};
118
119int lineno; /* current line number in input file */
120char line[256]; /* the current line */
121char *cfilename; /* name of current file */
122int nfiles; /* number of files to process */
123int fflag; /* -f: ignore \f */
124int sflag; /* -s: ignore \s */
125int ncmds; /* size of knowncmds */
126int slot; /* slot in knowncmds found by binsrch */
127
128char *malloc();
129
130main(argc, argv)
131int argc;
132char **argv;
133{
134 FILE *f;
135 int i;
136 char *cp;
205b7e14 137 char b1[4];
b5da8a99
BJ
138
139 if (argc <= 1)
205b7e14
MH
140 usage();
141 /* Figure out how many known commands there are */
142 while (knowncmds[ncmds])
143 ncmds++;
b5da8a99
BJ
144 while (argc > 1 && argv[1][0] == '-') {
145 switch(argv[1][1]) {
205b7e14
MH
146
147 /* -a: add pairs of macros */
b5da8a99 148 case 'a':
b5da8a99 149 i = strlen(argv[1]) - 2;
205b7e14
MH
150 if (i % 6 != 0)
151 usage();
b5da8a99
BJ
152 /* look for empty macro slots */
153 for (i=0; br[i].opbr; i++)
154 ;
155 for (cp=argv[1]+3; cp[-1]; cp += 6) {
205b7e14
MH
156 br[i].opbr = malloc(3);
157 strncpy(br[i].opbr, cp, 2);
158 br[i].clbr = malloc(3);
159 strncpy(br[i].clbr, cp+3, 2);
160 addmac(br[i].opbr); /* knows pairs are also known cmds */
161 addmac(br[i].clbr);
b5da8a99
BJ
162 i++;
163 }
164 break;
205b7e14
MH
165
166 /* -c: add known commands */
167 case 'c':
168 i = strlen(argv[1]) - 2;
169 if (i % 3 != 0)
170 usage();
171 for (cp=argv[1]+3; cp[-1]; cp += 3) {
172 if (cp[2] && cp[2] != '.')
173 usage();
174 strncpy(b1, cp, 2);
175 addmac(b1);
176 }
177 break;
178
179 /* -f: ignore font changes */
b5da8a99
BJ
180 case 'f':
181 fflag = 1;
182 break;
205b7e14
MH
183
184 /* -s: ignore size changes */
b5da8a99
BJ
185 case 's':
186 sflag = 1;
187 break;
188 default:
205b7e14 189 usage();
b5da8a99
BJ
190 }
191 argc--; argv++;
192 }
193
194 nfiles = argc - 1;
195
196 if (nfiles > 0) {
197 for (i=1; i<argc; i++) {
198 cfilename = argv[i];
199 f = fopen(cfilename, "r");
200 if (f == NULL)
201 perror(cfilename);
202 else
203 process(f);
204 }
205 } else {
206 cfilename = "stdin";
207 process(stdin);
208 }
209 exit(0);
210}
211
205b7e14
MH
212usage()
213{
214 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
215 exit(1);
216}
217
b5da8a99
BJ
218process(f)
219FILE *f;
220{
221 register int i, n;
222 char mac[5]; /* The current macro or nroff command */
223 int pl;
224
225 stktop = -1;
226 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
227 if (line[0] == '.') {
228 /*
229 * find and isolate the macro/command name.
230 */
231 strncpy(mac, line+1, 4);
232 if (isspace(mac[0])) {
233 pe(lineno);
234 printf("Empty command\n");
235 } else if (isspace(mac[1])) {
236 mac[1] = 0;
237 } else if (isspace(mac[2])) {
238 mac[2] = 0;
e1062a05 239 } else if (mac[0] != '\\' || mac[1] != '\"') {
b5da8a99
BJ
240 pe(lineno);
241 printf("Command too long\n");
242 }
243
244 /*
245 * Is it a known command?
246 */
247 checkknown(mac);
248
249 /*
250 * Should we add it?
251 */
252 if (eq(mac, "de"))
253 addcmd(line);
254
255 chkcmd(line, mac);
256 }
257
258 /*
259 * At this point we process the line looking
260 * for \s and \f.
261 */
262 for (i=0; line[i]; i++)
263 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
264 if (!sflag && line[++i]=='s') {
265 pl = line[++i];
266 if (isdigit(pl)) {
267 n = pl - '0';
268 pl = ' ';
269 } else
270 n = 0;
271 while (isdigit(line[++i]))
272 n = 10 * n + line[i] - '0';
273 i--;
274 if (n == 0) {
275 if (stk[stktop].opno == SZ) {
276 stktop--;
277 } else {
278 pe(lineno);
279 printf("unmatched \\s0\n");
280 }
281 } else {
282 stk[++stktop].opno = SZ;
283 stk[stktop].pl = pl;
284 stk[stktop].parm = n;
285 stk[stktop].lno = lineno;
286 }
287 } else if (!fflag && line[i]=='f') {
288 n = line[++i];
289 if (n == 'P') {
290 if (stk[stktop].opno == FT) {
291 stktop--;
292 } else {
293 pe(lineno);
294 printf("unmatched \\fP\n");
295 }
296 } else {
297 stk[++stktop].opno = FT;
298 stk[stktop].pl = 1;
299 stk[stktop].parm = n;
300 stk[stktop].lno = lineno;
301 }
302 }
303 }
304 }
305 /*
306 * We've hit the end and look at all this stuff that hasn't been
307 * matched yet! Complain, complain.
308 */
309 for (i=stktop; i>=0; i--) {
310 complain(i);
311 }
312}
313
314complain(i)
315{
316 pe(stk[i].lno);
317 printf("Unmatched ");
318 prop(i);
319 printf("\n");
320}
321
322prop(i)
323{
324 if (stk[i].pl == 0)
325 printf(".%s", br[stk[i].opno].opbr);
326 else switch(stk[i].opno) {
327 case SZ:
328 printf("\\s%c%d", stk[i].pl, stk[i].parm);
329 break;
330 case FT:
331 printf("\\f%c", stk[i].parm);
332 break;
333 default:
334 printf("Bug: stk[%d].opno = %d = .%s, .%s",
335 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
336 }
337}
338
339chkcmd(line, mac)
340char *line;
341char *mac;
342{
343 register int i, n;
344
345 /*
346 * Check to see if it matches top of stack.
347 */
348 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
349 stktop--; /* OK. Pop & forget */
350 else {
351 /* No. Maybe it's an opener */
352 for (i=0; br[i].opbr; i++) {
353 if (eq(mac, br[i].opbr)) {
354 /* Found. Push it. */
355 stktop++;
356 stk[stktop].opno = i;
357 stk[stktop].pl = 0;
358 stk[stktop].parm = 0;
359 stk[stktop].lno = lineno;
360 break;
361 }
362 /*
363 * Maybe it's an unmatched closer.
364 * NOTE: this depends on the fact
365 * that none of the closers can be
366 * openers too.
367 */
368 if (eq(mac, br[i].clbr)) {
369 nomatch(mac);
370 break;
371 }
372 }
373 }
374}
375
376nomatch(mac)
377char *mac;
378{
379 register int i, j;
380
381 /*
382 * Look for a match further down on stack
383 * If we find one, it suggests that the stuff in
384 * between is supposed to match itself.
385 */
386 for (j=stktop; j>=0; j--)
387 if (eq(mac,br[stk[j].opno].clbr)) {
388 /* Found. Make a good diagnostic. */
389 if (j == stktop-2) {
390 /*
391 * Check for special case \fx..\fR and don't
392 * complain.
393 */
394 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
395 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
396 stktop = j -1;
397 return;
398 }
399 /*
400 * We have two unmatched frobs. Chances are
401 * they were intended to match, so we mention
402 * them together.
403 */
404 pe(stk[j+1].lno);
405 prop(j+1);
406 printf(" does not match %d: ", stk[j+2].lno);
407 prop(j+2);
408 printf("\n");
409 } else for (i=j+1; i <= stktop; i++) {
410 complain(i);
411 }
412 stktop = j-1;
413 return;
414 }
415 /* Didn't find one. Throw this away. */
416 pe(lineno);
417 printf("Unmatched .%s\n", mac);
418}
419
420/* eq: are two strings equal? */
421eq(s1, s2)
422char *s1, *s2;
423{
424 return (strcmp(s1, s2) == 0);
425}
426
427/* print the first part of an error message, given the line number */
428pe(lineno)
429int lineno;
430{
431 if (nfiles > 1)
432 printf("%s: ", cfilename);
433 printf("%d: ", lineno);
434}
435
436checkknown(mac)
437char *mac;
438{
b5da8a99
BJ
439
440 if (eq(mac, "."))
441 return;
442 if (binsrch(mac) >= 0)
443 return;
e1062a05
BJ
444 if (mac[0] == '\\' && mac[1] == '"') /* comments */
445 return;
b5da8a99
BJ
446
447 pe(lineno);
448 printf("Unknown command: .%s\n", mac);
449}
450
451/*
452 * We have a .de xx line in "line". Add xx to the list of known commands.
453 */
454addcmd(line)
455char *line;
456{
457 char *mac;
b5da8a99
BJ
458
459 /* grab the macro being defined */
460 mac = line+4;
461 while (isspace(*mac))
462 mac++;
463 if (*mac == 0) {
464 pe(lineno);
465 printf("illegal define: %s\n", line);
466 return;
467 }
468 mac[2] = 0;
469 if (isspace(mac[1]) || mac[1] == '\\')
470 mac[1] = 0;
471 if (ncmds >= MAXCMDS) {
472 printf("Only %d known commands allowed\n", MAXCMDS);
473 exit(1);
474 }
205b7e14
MH
475 addmac(mac);
476}
477
478/*
479 * Add mac to the list. We should really have some kind of tree
480 * structure here but this is a quick-and-dirty job and I just don't
481 * have time to mess with it. (I wonder if this will come back to haunt
482 * me someday?) Anyway, I claim that .de is fairly rare in user
483 * nroff programs, and the register loop below is pretty fast.
484 */
485addmac(mac)
486char *mac;
487{
488 register char **src, **dest, **loc;
489
b5da8a99
BJ
490 binsrch(mac); /* it's OK to redefine something */
491 /* binsrch sets slot as a side effect */
205b7e14
MH
492#ifdef DEBUG
493printf("binsrch(%s) -> %d\n", mac, slot);
494#endif
b5da8a99
BJ
495 loc = &knowncmds[slot];
496 src = &knowncmds[ncmds-1];
497 dest = src+1;
498 while (dest > loc)
499 *dest-- = *src--;
500 *loc = malloc(3);
501 strcpy(*loc, mac);
502 ncmds++;
205b7e14
MH
503#ifdef DEBUG
504printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
505#endif
b5da8a99
BJ
506}
507
508/*
509 * Do a binary search in knowncmds for mac.
510 * If found, return the index. If not, return -1.
511 */
512binsrch(mac)
513char *mac;
514{
515 register char *p; /* pointer to current cmd in list */
516 register int d; /* difference if any */
517 register int mid; /* mid point in binary search */
518 register int top, bot; /* boundaries of bin search, inclusive */
519
520 top = ncmds-1;
521 bot = 0;
522 while (top >= bot) {
523 mid = (top+bot)/2;
524 p = knowncmds[mid];
525 d = p[0] - mac[0];
526 if (d == 0)
527 d = p[1] - mac[1];
528 if (d == 0)
529 return mid;
530 if (d < 0)
531 bot = mid + 1;
532 else
533 top = mid - 1;
534 }
535 slot = bot; /* place it would have gone */
536 return -1;
537}
e1062a05
BJ
538
539