date and time created 83/02/11 15:44:54 by rrh
[unix-history] / usr / src / usr.bin / checknr / checknr.c
CommitLineData
645c264b 1static char *sccsid = "@(#)checknr.c 4.5 (Berkeley) %G%";
b5da8a99
BJ
2/*
3 * checknr: check an nroff/troff input file for matching macro calls.
4 * we also attempt to match size and font changes, but only the embedded
5 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
6 * later but for now think of these restrictions as contributions to
7 * structured typesetting.
8 */
9#include <stdio.h>
10#include <ctype.h>
11
12#define MAXSTK 100 /* Stack size */
13#define MAXBR 100 /* Max number of bracket pairs known */
14#define MAXCMDS 500 /* Max number of commands known */
15
16/*
17 * The stack on which we remember what we've seen so far.
18 */
19struct stkstr {
20 int opno; /* number of opening bracket */
21 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
22 int parm; /* parm to size, font, etc */
23 int lno; /* line number the thing came in in */
24} stk[MAXSTK];
25int stktop;
26
27/*
28 * The kinds of opening and closing brackets.
29 */
30struct brstr {
31 char *opbr;
32 char *clbr;
33} br[MAXBR] = {
34 /* A few bare bones troff commands */
35#define SZ 0
36 "sz", "sz", /* also \s */
37#define FT 1
38 "ft", "ft", /* also \f */
e1062a05
BJ
39 /* the -mm package */
40 "AL", "LE",
41 "AS", "AE",
42 "BL", "LE",
43 "BS", "BE",
44 "DF", "DE",
45 "DL", "LE",
46 "DS", "DE",
47 "FS", "FE",
48 "ML", "LE",
49 "NS", "NE",
50 "RL", "LE",
51 "VL", "LE",
b5da8a99
BJ
52 /* the -ms package */
53 "AB", "AE",
b5da8a99 54 "CD", "DE",
e1062a05
BJ
55 "DS", "DE",
56 "FS", "FE",
b5da8a99 57 "ID", "DE",
b5da8a99 58 "KF", "KE",
e1062a05
BJ
59 "KS", "KE",
60 "LD", "DE",
61 "LG", "NL",
b5da8a99 62 "QS", "QE",
e1062a05
BJ
63 "RS", "RE",
64 "SM", "NL",
b5da8a99 65 /* The -me package */
b5da8a99 66 "(b", ")b",
b5da8a99
BJ
67 "(c", ")c",
68 "(d", ")d",
69 "(f", ")f",
e1062a05
BJ
70 "(l", ")l",
71 "(q", ")q",
b5da8a99 72 "(x", ")x",
e1062a05
BJ
73 "(z", ")z",
74 /* Things needed by preprocessors */
75 "EQ", "EN",
76 "TS", "TE",
77 /* Refer */
78 "[", "]",
b5da8a99
BJ
79 0, 0
80};
81
82/*
e1062a05 83 * All commands known to nroff, plus macro packages.
b5da8a99
BJ
84 * Used so we can complain about unrecognized commands.
85 */
86char *knowncmds[MAXCMDS] = {
e1062a05
BJ
87"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
88"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
89"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
90"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
91"AE", "AF", "AI", "AL", "AS", "AT", "AU", "AX", "B", "B1", "B2", "BD",
92"BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D",
7f56a5b5 93"DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", "EN", "EQ",
e1062a05
BJ
94"EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS",
95"FV", "FX", "H", "HC", "HM", "HO", "HU", "I", "ID", "IE", "IH", "IM",
96"IP", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", "LC", "LD", "LE", "LG",
97"LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH",
98"NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", "PF", "PH", "PP", "PT",
99"PY", "QE", "QP", "QS", "R", "RA", "RC", "RE", "RL", "RP", "RQ", "RS",
100"RT", "S", "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", "SP", "SY",
101"TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", "TR", "TS",
102"TX", "UL", "US", "UX", "VL", "WC", "WH", "XD", "XF", "XK", "XP", "[", "[-",
103"[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
104"][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
105"bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
106"ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
107"ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
108"fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
109"ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
110"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
111"n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
112"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
113"q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
114"sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
115"ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", "yr",
b5da8a99
BJ
1160
117};
118
119int lineno; /* current line number in input file */
120char line[256]; /* the current line */
121char *cfilename; /* name of current file */
122int nfiles; /* number of files to process */
123int fflag; /* -f: ignore \f */
124int sflag; /* -s: ignore \s */
125int ncmds; /* size of knowncmds */
126int slot; /* slot in knowncmds found by binsrch */
127
128char *malloc();
129
130main(argc, argv)
131int argc;
132char **argv;
133{
134 FILE *f;
135 int i;
136 char *cp;
205b7e14 137 char b1[4];
b5da8a99 138
205b7e14
MH
139 /* Figure out how many known commands there are */
140 while (knowncmds[ncmds])
141 ncmds++;
b5da8a99
BJ
142 while (argc > 1 && argv[1][0] == '-') {
143 switch(argv[1][1]) {
205b7e14
MH
144
145 /* -a: add pairs of macros */
b5da8a99 146 case 'a':
b5da8a99 147 i = strlen(argv[1]) - 2;
205b7e14
MH
148 if (i % 6 != 0)
149 usage();
b5da8a99
BJ
150 /* look for empty macro slots */
151 for (i=0; br[i].opbr; i++)
152 ;
153 for (cp=argv[1]+3; cp[-1]; cp += 6) {
205b7e14
MH
154 br[i].opbr = malloc(3);
155 strncpy(br[i].opbr, cp, 2);
156 br[i].clbr = malloc(3);
157 strncpy(br[i].clbr, cp+3, 2);
158 addmac(br[i].opbr); /* knows pairs are also known cmds */
159 addmac(br[i].clbr);
b5da8a99
BJ
160 i++;
161 }
162 break;
205b7e14
MH
163
164 /* -c: add known commands */
165 case 'c':
166 i = strlen(argv[1]) - 2;
167 if (i % 3 != 0)
168 usage();
169 for (cp=argv[1]+3; cp[-1]; cp += 3) {
170 if (cp[2] && cp[2] != '.')
171 usage();
172 strncpy(b1, cp, 2);
173 addmac(b1);
174 }
175 break;
176
177 /* -f: ignore font changes */
b5da8a99
BJ
178 case 'f':
179 fflag = 1;
180 break;
205b7e14
MH
181
182 /* -s: ignore size changes */
b5da8a99
BJ
183 case 's':
184 sflag = 1;
185 break;
186 default:
205b7e14 187 usage();
b5da8a99
BJ
188 }
189 argc--; argv++;
190 }
191
192 nfiles = argc - 1;
193
194 if (nfiles > 0) {
195 for (i=1; i<argc; i++) {
196 cfilename = argv[i];
197 f = fopen(cfilename, "r");
198 if (f == NULL)
199 perror(cfilename);
200 else
201 process(f);
202 }
203 } else {
204 cfilename = "stdin";
205 process(stdin);
206 }
207 exit(0);
208}
209
205b7e14
MH
210usage()
211{
212 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
213 exit(1);
214}
215
b5da8a99
BJ
216process(f)
217FILE *f;
218{
219 register int i, n;
220 char mac[5]; /* The current macro or nroff command */
221 int pl;
222
223 stktop = -1;
224 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
225 if (line[0] == '.') {
226 /*
227 * find and isolate the macro/command name.
228 */
229 strncpy(mac, line+1, 4);
230 if (isspace(mac[0])) {
231 pe(lineno);
232 printf("Empty command\n");
233 } else if (isspace(mac[1])) {
234 mac[1] = 0;
235 } else if (isspace(mac[2])) {
236 mac[2] = 0;
e1062a05 237 } else if (mac[0] != '\\' || mac[1] != '\"') {
b5da8a99
BJ
238 pe(lineno);
239 printf("Command too long\n");
240 }
241
242 /*
243 * Is it a known command?
244 */
245 checkknown(mac);
246
247 /*
248 * Should we add it?
249 */
250 if (eq(mac, "de"))
251 addcmd(line);
252
253 chkcmd(line, mac);
254 }
255
256 /*
257 * At this point we process the line looking
258 * for \s and \f.
259 */
260 for (i=0; line[i]; i++)
261 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
262 if (!sflag && line[++i]=='s') {
263 pl = line[++i];
264 if (isdigit(pl)) {
265 n = pl - '0';
266 pl = ' ';
267 } else
268 n = 0;
269 while (isdigit(line[++i]))
270 n = 10 * n + line[i] - '0';
271 i--;
272 if (n == 0) {
273 if (stk[stktop].opno == SZ) {
274 stktop--;
275 } else {
276 pe(lineno);
277 printf("unmatched \\s0\n");
278 }
279 } else {
280 stk[++stktop].opno = SZ;
281 stk[stktop].pl = pl;
282 stk[stktop].parm = n;
283 stk[stktop].lno = lineno;
284 }
285 } else if (!fflag && line[i]=='f') {
286 n = line[++i];
287 if (n == 'P') {
288 if (stk[stktop].opno == FT) {
289 stktop--;
290 } else {
291 pe(lineno);
292 printf("unmatched \\fP\n");
293 }
294 } else {
295 stk[++stktop].opno = FT;
296 stk[stktop].pl = 1;
297 stk[stktop].parm = n;
298 stk[stktop].lno = lineno;
299 }
300 }
301 }
302 }
303 /*
304 * We've hit the end and look at all this stuff that hasn't been
305 * matched yet! Complain, complain.
306 */
307 for (i=stktop; i>=0; i--) {
308 complain(i);
309 }
310}
311
312complain(i)
313{
314 pe(stk[i].lno);
315 printf("Unmatched ");
316 prop(i);
317 printf("\n");
318}
319
320prop(i)
321{
322 if (stk[i].pl == 0)
323 printf(".%s", br[stk[i].opno].opbr);
324 else switch(stk[i].opno) {
325 case SZ:
326 printf("\\s%c%d", stk[i].pl, stk[i].parm);
327 break;
328 case FT:
329 printf("\\f%c", stk[i].parm);
330 break;
331 default:
332 printf("Bug: stk[%d].opno = %d = .%s, .%s",
333 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
334 }
335}
336
337chkcmd(line, mac)
338char *line;
339char *mac;
340{
341 register int i, n;
342
343 /*
344 * Check to see if it matches top of stack.
345 */
346 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
347 stktop--; /* OK. Pop & forget */
348 else {
349 /* No. Maybe it's an opener */
350 for (i=0; br[i].opbr; i++) {
351 if (eq(mac, br[i].opbr)) {
352 /* Found. Push it. */
353 stktop++;
354 stk[stktop].opno = i;
355 stk[stktop].pl = 0;
356 stk[stktop].parm = 0;
357 stk[stktop].lno = lineno;
358 break;
359 }
360 /*
361 * Maybe it's an unmatched closer.
362 * NOTE: this depends on the fact
363 * that none of the closers can be
364 * openers too.
365 */
366 if (eq(mac, br[i].clbr)) {
367 nomatch(mac);
368 break;
369 }
370 }
371 }
372}
373
374nomatch(mac)
375char *mac;
376{
377 register int i, j;
378
379 /*
380 * Look for a match further down on stack
381 * If we find one, it suggests that the stuff in
382 * between is supposed to match itself.
383 */
384 for (j=stktop; j>=0; j--)
385 if (eq(mac,br[stk[j].opno].clbr)) {
386 /* Found. Make a good diagnostic. */
387 if (j == stktop-2) {
388 /*
389 * Check for special case \fx..\fR and don't
390 * complain.
391 */
392 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
393 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
394 stktop = j -1;
395 return;
396 }
397 /*
398 * We have two unmatched frobs. Chances are
399 * they were intended to match, so we mention
400 * them together.
401 */
402 pe(stk[j+1].lno);
403 prop(j+1);
404 printf(" does not match %d: ", stk[j+2].lno);
405 prop(j+2);
406 printf("\n");
407 } else for (i=j+1; i <= stktop; i++) {
408 complain(i);
409 }
410 stktop = j-1;
411 return;
412 }
413 /* Didn't find one. Throw this away. */
414 pe(lineno);
415 printf("Unmatched .%s\n", mac);
416}
417
418/* eq: are two strings equal? */
419eq(s1, s2)
420char *s1, *s2;
421{
422 return (strcmp(s1, s2) == 0);
423}
424
425/* print the first part of an error message, given the line number */
426pe(lineno)
427int lineno;
428{
429 if (nfiles > 1)
430 printf("%s: ", cfilename);
431 printf("%d: ", lineno);
432}
433
434checkknown(mac)
435char *mac;
436{
b5da8a99
BJ
437
438 if (eq(mac, "."))
439 return;
440 if (binsrch(mac) >= 0)
441 return;
e1062a05
BJ
442 if (mac[0] == '\\' && mac[1] == '"') /* comments */
443 return;
b5da8a99
BJ
444
445 pe(lineno);
446 printf("Unknown command: .%s\n", mac);
447}
448
449/*
450 * We have a .de xx line in "line". Add xx to the list of known commands.
451 */
452addcmd(line)
453char *line;
454{
455 char *mac;
b5da8a99
BJ
456
457 /* grab the macro being defined */
458 mac = line+4;
459 while (isspace(*mac))
460 mac++;
461 if (*mac == 0) {
462 pe(lineno);
463 printf("illegal define: %s\n", line);
464 return;
465 }
466 mac[2] = 0;
467 if (isspace(mac[1]) || mac[1] == '\\')
468 mac[1] = 0;
469 if (ncmds >= MAXCMDS) {
470 printf("Only %d known commands allowed\n", MAXCMDS);
471 exit(1);
472 }
205b7e14
MH
473 addmac(mac);
474}
475
476/*
477 * Add mac to the list. We should really have some kind of tree
478 * structure here but this is a quick-and-dirty job and I just don't
479 * have time to mess with it. (I wonder if this will come back to haunt
480 * me someday?) Anyway, I claim that .de is fairly rare in user
481 * nroff programs, and the register loop below is pretty fast.
482 */
483addmac(mac)
484char *mac;
485{
486 register char **src, **dest, **loc;
487
b5da8a99
BJ
488 binsrch(mac); /* it's OK to redefine something */
489 /* binsrch sets slot as a side effect */
205b7e14
MH
490#ifdef DEBUG
491printf("binsrch(%s) -> %d\n", mac, slot);
492#endif
b5da8a99
BJ
493 loc = &knowncmds[slot];
494 src = &knowncmds[ncmds-1];
495 dest = src+1;
496 while (dest > loc)
497 *dest-- = *src--;
498 *loc = malloc(3);
499 strcpy(*loc, mac);
500 ncmds++;
205b7e14
MH
501#ifdef DEBUG
502printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
503#endif
b5da8a99
BJ
504}
505
506/*
507 * Do a binary search in knowncmds for mac.
508 * If found, return the index. If not, return -1.
509 */
510binsrch(mac)
511char *mac;
512{
513 register char *p; /* pointer to current cmd in list */
514 register int d; /* difference if any */
515 register int mid; /* mid point in binary search */
516 register int top, bot; /* boundaries of bin search, inclusive */
517
518 top = ncmds-1;
519 bot = 0;
520 while (top >= bot) {
521 mid = (top+bot)/2;
522 p = knowncmds[mid];
523 d = p[0] - mac[0];
524 if (d == 0)
525 d = p[1] - mac[1];
526 if (d == 0)
527 return mid;
528 if (d < 0)
529 bot = mid + 1;
530 else
531 top = mid - 1;
532 }
533 slot = bot; /* place it would have gone */
534 return -1;
535}
e1062a05
BJ
536
537