added .DT macro which it didn't know about (-ms)
[unix-history] / usr / src / usr.bin / checknr / checknr.c
... / ...
CommitLineData
1static char *sccsid = "@(#)checknr.c 4.4 (Berkeley) %G%";
2/*
3 * checknr: check an nroff/troff input file for matching macro calls.
4 * we also attempt to match size and font changes, but only the embedded
5 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
6 * later but for now think of these restrictions as contributions to
7 * structured typesetting.
8 */
9#include <stdio.h>
10#include <ctype.h>
11
12#define MAXSTK 100 /* Stack size */
13#define MAXBR 100 /* Max number of bracket pairs known */
14#define MAXCMDS 500 /* Max number of commands known */
15
16/*
17 * The stack on which we remember what we've seen so far.
18 */
19struct stkstr {
20 int opno; /* number of opening bracket */
21 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
22 int parm; /* parm to size, font, etc */
23 int lno; /* line number the thing came in in */
24} stk[MAXSTK];
25int stktop;
26
27/*
28 * The kinds of opening and closing brackets.
29 */
30struct brstr {
31 char *opbr;
32 char *clbr;
33} br[MAXBR] = {
34 /* A few bare bones troff commands */
35#define SZ 0
36 "sz", "sz", /* also \s */
37#define FT 1
38 "ft", "ft", /* also \f */
39 /* the -mm package */
40 "AL", "LE",
41 "AS", "AE",
42 "BL", "LE",
43 "BS", "BE",
44 "DF", "DE",
45 "DL", "LE",
46 "DS", "DE",
47 "FS", "FE",
48 "ML", "LE",
49 "NS", "NE",
50 "RL", "LE",
51 "VL", "LE",
52 /* the -ms package */
53 "AB", "AE",
54 "CD", "DE",
55 "DS", "DE",
56 "FS", "FE",
57 "ID", "DE",
58 "KF", "KE",
59 "KS", "KE",
60 "LD", "DE",
61 "LG", "NL",
62 "QS", "QE",
63 "RS", "RE",
64 "SM", "NL",
65 /* The -me package */
66 "(b", ")b",
67 "(c", ")c",
68 "(d", ")d",
69 "(f", ")f",
70 "(l", ")l",
71 "(q", ")q",
72 "(x", ")x",
73 "(z", ")z",
74 /* Things needed by preprocessors */
75 "EQ", "EN",
76 "TS", "TE",
77 /* Refer */
78 "[", "]",
79 0, 0
80};
81
82/*
83 * All commands known to nroff, plus macro packages.
84 * Used so we can complain about unrecognized commands.
85 */
86char *knowncmds[MAXCMDS] = {
87"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
88"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
89"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
90"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
91"AE", "AF", "AI", "AL", "AS", "AT", "AU", "AX", "B", "B1", "B2", "BD",
92"BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D",
93"DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", "EN", "EQ",
94"EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS",
95"FV", "FX", "H", "HC", "HM", "HO", "HU", "I", "ID", "IE", "IH", "IM",
96"IP", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", "LC", "LD", "LE", "LG",
97"LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH",
98"NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", "PF", "PH", "PP", "PT",
99"PY", "QE", "QP", "QS", "R", "RA", "RC", "RE", "RL", "RP", "RQ", "RS",
100"RT", "S", "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", "SP", "SY",
101"TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", "TR", "TS",
102"TX", "UL", "US", "UX", "VL", "WC", "WH", "XD", "XF", "XK", "XP", "[", "[-",
103"[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
104"][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
105"bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
106"ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
107"ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
108"fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
109"ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
110"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
111"n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
112"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
113"q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
114"sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
115"ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", "yr",
1160
117};
118
119int lineno; /* current line number in input file */
120char line[256]; /* the current line */
121char *cfilename; /* name of current file */
122int nfiles; /* number of files to process */
123int fflag; /* -f: ignore \f */
124int sflag; /* -s: ignore \s */
125int ncmds; /* size of knowncmds */
126int slot; /* slot in knowncmds found by binsrch */
127
128char *malloc();
129
130main(argc, argv)
131int argc;
132char **argv;
133{
134 FILE *f;
135 int i;
136 char *cp;
137 char b1[4];
138
139 if (argc <= 1)
140 usage();
141 /* Figure out how many known commands there are */
142 while (knowncmds[ncmds])
143 ncmds++;
144 while (argc > 1 && argv[1][0] == '-') {
145 switch(argv[1][1]) {
146
147 /* -a: add pairs of macros */
148 case 'a':
149 i = strlen(argv[1]) - 2;
150 if (i % 6 != 0)
151 usage();
152 /* look for empty macro slots */
153 for (i=0; br[i].opbr; i++)
154 ;
155 for (cp=argv[1]+3; cp[-1]; cp += 6) {
156 br[i].opbr = malloc(3);
157 strncpy(br[i].opbr, cp, 2);
158 br[i].clbr = malloc(3);
159 strncpy(br[i].clbr, cp+3, 2);
160 addmac(br[i].opbr); /* knows pairs are also known cmds */
161 addmac(br[i].clbr);
162 i++;
163 }
164 break;
165
166 /* -c: add known commands */
167 case 'c':
168 i = strlen(argv[1]) - 2;
169 if (i % 3 != 0)
170 usage();
171 for (cp=argv[1]+3; cp[-1]; cp += 3) {
172 if (cp[2] && cp[2] != '.')
173 usage();
174 strncpy(b1, cp, 2);
175 addmac(b1);
176 }
177 break;
178
179 /* -f: ignore font changes */
180 case 'f':
181 fflag = 1;
182 break;
183
184 /* -s: ignore size changes */
185 case 's':
186 sflag = 1;
187 break;
188 default:
189 usage();
190 }
191 argc--; argv++;
192 }
193
194 nfiles = argc - 1;
195
196 if (nfiles > 0) {
197 for (i=1; i<argc; i++) {
198 cfilename = argv[i];
199 f = fopen(cfilename, "r");
200 if (f == NULL)
201 perror(cfilename);
202 else
203 process(f);
204 }
205 } else {
206 cfilename = "stdin";
207 process(stdin);
208 }
209 exit(0);
210}
211
212usage()
213{
214 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
215 exit(1);
216}
217
218process(f)
219FILE *f;
220{
221 register int i, n;
222 char mac[5]; /* The current macro or nroff command */
223 int pl;
224
225 stktop = -1;
226 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
227 if (line[0] == '.') {
228 /*
229 * find and isolate the macro/command name.
230 */
231 strncpy(mac, line+1, 4);
232 if (isspace(mac[0])) {
233 pe(lineno);
234 printf("Empty command\n");
235 } else if (isspace(mac[1])) {
236 mac[1] = 0;
237 } else if (isspace(mac[2])) {
238 mac[2] = 0;
239 } else if (mac[0] != '\\' || mac[1] != '\"') {
240 pe(lineno);
241 printf("Command too long\n");
242 }
243
244 /*
245 * Is it a known command?
246 */
247 checkknown(mac);
248
249 /*
250 * Should we add it?
251 */
252 if (eq(mac, "de"))
253 addcmd(line);
254
255 chkcmd(line, mac);
256 }
257
258 /*
259 * At this point we process the line looking
260 * for \s and \f.
261 */
262 for (i=0; line[i]; i++)
263 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
264 if (!sflag && line[++i]=='s') {
265 pl = line[++i];
266 if (isdigit(pl)) {
267 n = pl - '0';
268 pl = ' ';
269 } else
270 n = 0;
271 while (isdigit(line[++i]))
272 n = 10 * n + line[i] - '0';
273 i--;
274 if (n == 0) {
275 if (stk[stktop].opno == SZ) {
276 stktop--;
277 } else {
278 pe(lineno);
279 printf("unmatched \\s0\n");
280 }
281 } else {
282 stk[++stktop].opno = SZ;
283 stk[stktop].pl = pl;
284 stk[stktop].parm = n;
285 stk[stktop].lno = lineno;
286 }
287 } else if (!fflag && line[i]=='f') {
288 n = line[++i];
289 if (n == 'P') {
290 if (stk[stktop].opno == FT) {
291 stktop--;
292 } else {
293 pe(lineno);
294 printf("unmatched \\fP\n");
295 }
296 } else {
297 stk[++stktop].opno = FT;
298 stk[stktop].pl = 1;
299 stk[stktop].parm = n;
300 stk[stktop].lno = lineno;
301 }
302 }
303 }
304 }
305 /*
306 * We've hit the end and look at all this stuff that hasn't been
307 * matched yet! Complain, complain.
308 */
309 for (i=stktop; i>=0; i--) {
310 complain(i);
311 }
312}
313
314complain(i)
315{
316 pe(stk[i].lno);
317 printf("Unmatched ");
318 prop(i);
319 printf("\n");
320}
321
322prop(i)
323{
324 if (stk[i].pl == 0)
325 printf(".%s", br[stk[i].opno].opbr);
326 else switch(stk[i].opno) {
327 case SZ:
328 printf("\\s%c%d", stk[i].pl, stk[i].parm);
329 break;
330 case FT:
331 printf("\\f%c", stk[i].parm);
332 break;
333 default:
334 printf("Bug: stk[%d].opno = %d = .%s, .%s",
335 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
336 }
337}
338
339chkcmd(line, mac)
340char *line;
341char *mac;
342{
343 register int i, n;
344
345 /*
346 * Check to see if it matches top of stack.
347 */
348 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
349 stktop--; /* OK. Pop & forget */
350 else {
351 /* No. Maybe it's an opener */
352 for (i=0; br[i].opbr; i++) {
353 if (eq(mac, br[i].opbr)) {
354 /* Found. Push it. */
355 stktop++;
356 stk[stktop].opno = i;
357 stk[stktop].pl = 0;
358 stk[stktop].parm = 0;
359 stk[stktop].lno = lineno;
360 break;
361 }
362 /*
363 * Maybe it's an unmatched closer.
364 * NOTE: this depends on the fact
365 * that none of the closers can be
366 * openers too.
367 */
368 if (eq(mac, br[i].clbr)) {
369 nomatch(mac);
370 break;
371 }
372 }
373 }
374}
375
376nomatch(mac)
377char *mac;
378{
379 register int i, j;
380
381 /*
382 * Look for a match further down on stack
383 * If we find one, it suggests that the stuff in
384 * between is supposed to match itself.
385 */
386 for (j=stktop; j>=0; j--)
387 if (eq(mac,br[stk[j].opno].clbr)) {
388 /* Found. Make a good diagnostic. */
389 if (j == stktop-2) {
390 /*
391 * Check for special case \fx..\fR and don't
392 * complain.
393 */
394 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
395 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
396 stktop = j -1;
397 return;
398 }
399 /*
400 * We have two unmatched frobs. Chances are
401 * they were intended to match, so we mention
402 * them together.
403 */
404 pe(stk[j+1].lno);
405 prop(j+1);
406 printf(" does not match %d: ", stk[j+2].lno);
407 prop(j+2);
408 printf("\n");
409 } else for (i=j+1; i <= stktop; i++) {
410 complain(i);
411 }
412 stktop = j-1;
413 return;
414 }
415 /* Didn't find one. Throw this away. */
416 pe(lineno);
417 printf("Unmatched .%s\n", mac);
418}
419
420/* eq: are two strings equal? */
421eq(s1, s2)
422char *s1, *s2;
423{
424 return (strcmp(s1, s2) == 0);
425}
426
427/* print the first part of an error message, given the line number */
428pe(lineno)
429int lineno;
430{
431 if (nfiles > 1)
432 printf("%s: ", cfilename);
433 printf("%d: ", lineno);
434}
435
436checkknown(mac)
437char *mac;
438{
439
440 if (eq(mac, "."))
441 return;
442 if (binsrch(mac) >= 0)
443 return;
444 if (mac[0] == '\\' && mac[1] == '"') /* comments */
445 return;
446
447 pe(lineno);
448 printf("Unknown command: .%s\n", mac);
449}
450
451/*
452 * We have a .de xx line in "line". Add xx to the list of known commands.
453 */
454addcmd(line)
455char *line;
456{
457 char *mac;
458
459 /* grab the macro being defined */
460 mac = line+4;
461 while (isspace(*mac))
462 mac++;
463 if (*mac == 0) {
464 pe(lineno);
465 printf("illegal define: %s\n", line);
466 return;
467 }
468 mac[2] = 0;
469 if (isspace(mac[1]) || mac[1] == '\\')
470 mac[1] = 0;
471 if (ncmds >= MAXCMDS) {
472 printf("Only %d known commands allowed\n", MAXCMDS);
473 exit(1);
474 }
475 addmac(mac);
476}
477
478/*
479 * Add mac to the list. We should really have some kind of tree
480 * structure here but this is a quick-and-dirty job and I just don't
481 * have time to mess with it. (I wonder if this will come back to haunt
482 * me someday?) Anyway, I claim that .de is fairly rare in user
483 * nroff programs, and the register loop below is pretty fast.
484 */
485addmac(mac)
486char *mac;
487{
488 register char **src, **dest, **loc;
489
490 binsrch(mac); /* it's OK to redefine something */
491 /* binsrch sets slot as a side effect */
492#ifdef DEBUG
493printf("binsrch(%s) -> %d\n", mac, slot);
494#endif
495 loc = &knowncmds[slot];
496 src = &knowncmds[ncmds-1];
497 dest = src+1;
498 while (dest > loc)
499 *dest-- = *src--;
500 *loc = malloc(3);
501 strcpy(*loc, mac);
502 ncmds++;
503#ifdef DEBUG
504printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
505#endif
506}
507
508/*
509 * Do a binary search in knowncmds for mac.
510 * If found, return the index. If not, return -1.
511 */
512binsrch(mac)
513char *mac;
514{
515 register char *p; /* pointer to current cmd in list */
516 register int d; /* difference if any */
517 register int mid; /* mid point in binary search */
518 register int top, bot; /* boundaries of bin search, inclusive */
519
520 top = ncmds-1;
521 bot = 0;
522 while (top >= bot) {
523 mid = (top+bot)/2;
524 p = knowncmds[mid];
525 d = p[0] - mac[0];
526 if (d == 0)
527 d = p[1] - mac[1];
528 if (d == 0)
529 return mid;
530 if (d < 0)
531 bot = mid + 1;
532 else
533 top = mid - 1;
534 }
535 slot = bot; /* place it would have gone */
536 return -1;
537}
538
539