date and time created 80/10/30 00:35:38 by mckusick
[unix-history] / usr / src / usr.bin / checknr / checknr.c
CommitLineData
205b7e14 1static char *sccsid = "@(#)checknr.c 4.2 (Berkeley) %G%";
b5da8a99
BJ
2/*
3 * checknr: check an nroff/troff input file for matching macro calls.
4 * we also attempt to match size and font changes, but only the embedded
5 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
6 * later but for now think of these restrictions as contributions to
7 * structured typesetting.
8 */
9#include <stdio.h>
10#include <ctype.h>
11
12#define MAXSTK 100 /* Stack size */
13#define MAXBR 100 /* Max number of bracket pairs known */
14#define MAXCMDS 500 /* Max number of commands known */
15
16/*
17 * The stack on which we remember what we've seen so far.
18 */
19struct stkstr {
20 int opno; /* number of opening bracket */
21 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
22 int parm; /* parm to size, font, etc */
23 int lno; /* line number the thing came in in */
24} stk[MAXSTK];
25int stktop;
26
27/*
28 * The kinds of opening and closing brackets.
29 */
30struct brstr {
31 char *opbr;
32 char *clbr;
33} br[MAXBR] = {
34 /* A few bare bones troff commands */
35#define SZ 0
36 "sz", "sz", /* also \s */
37#define FT 1
38 "ft", "ft", /* also \f */
39 /* the -ms package */
40 "AB", "AE",
41 "RS", "RE",
42 "LG", "NL",
43 "SM", "NL",
44 "FS", "FE",
45 "DS", "DE",
46 "CD", "DE",
47 "LD", "DE",
48 "ID", "DE",
49 "KS", "KE",
50 "KF", "KE",
51 "QS", "QE",
52 /* Things needed by preprocessors */
53 "TS", "TE",
54 "EQ", "EN",
55 /* The -me package */
56 "(l", ")l",
57 "(q", ")q",
58 "(b", ")b",
59 "(z", ")z",
60 "(c", ")c",
61 "(d", ")d",
62 "(f", ")f",
63 "(x", ")x",
64 0, 0
65};
66
67/*
68 * All commands known to nroff, plus ms and me.
69 * Used so we can complain about unrecognized commands.
70 */
71char *knowncmds[MAXCMDS] = {
72"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l",
73"(q", "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q",
74")t", ")x", ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(",
75"@)", "@C", "@D", "@F", "@I", "@M", "@c", "@e", "@f", "@h",
76"@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", "AB", "AE",
77"AE", "AI", "AI", "AT", "AU", "AU", "AX", "B", "B1", "B2",
78"BD", "BG", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D",
79"DA", "DE", "DF", "DS", "EG", "EM", "EN", "EQ", "EQ", "FA",
80"FE", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS", "FV", "FX",
81"HO", "I", "ID", "IE", "IH", "IM", "IP", "IZ", "KD", "KE",
82"KF", "KQ", "KS", "LB", "LD", "LG", "LP", "MC", "ME", "MF",
83"MH", "MR", "ND", "NH", "NL", "NP", "OK", "PP", "PT", "PY",
84"QE", "QP", "QS", "R", "RA", "RC", "RE", "RP", "RQ", "RS",
85"RT", "S0", "S2", "S3", "SG", "SH", "SM", "SY", "TA", "TC",
86"TD", "TE", "TH", "TL", "TL", "TM", "TQ", "TR", "TS", "TS",
87"TX", "UL", "US", "UX", "WH", "XD", "XF", "XK", "XP", "[-",
88"[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]-",
89"]<", "]>", "][", "ab", "ac", "ad", "af", "am", "ar", "as",
90"b", "ba", "bc", "bd", "bi", "bl", "bp", "bp", "br", "bx",
91"c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da",
92"de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef",
93"eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl",
94"fo", "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw",
95"hx", "hy", "i", "ie", "if", "ig", "in", "ip", "it", "ix",
96"lc", "lg", "li", "ll", "ll", "ln", "lo", "lp", "ls", "lt",
97"m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", "n2", "na",
98"ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
99"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po",
100"po", "pp", "ps", "q", "r", "rb", "rd", "re", "re", "rm",
101"rn", "ro", "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so",
102"sp", "ss", "st", "sv", "sz", "ta", "tc", "th", "ti", "tl",
103"tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "yr",
1040
105};
106
107int lineno; /* current line number in input file */
108char line[256]; /* the current line */
109char *cfilename; /* name of current file */
110int nfiles; /* number of files to process */
111int fflag; /* -f: ignore \f */
112int sflag; /* -s: ignore \s */
113int ncmds; /* size of knowncmds */
114int slot; /* slot in knowncmds found by binsrch */
115
116char *malloc();
117
118main(argc, argv)
119int argc;
120char **argv;
121{
122 FILE *f;
123 int i;
124 char *cp;
205b7e14 125 char b1[4];
b5da8a99
BJ
126
127 if (argc <= 1)
205b7e14
MH
128 usage();
129 /* Figure out how many known commands there are */
130 while (knowncmds[ncmds])
131 ncmds++;
b5da8a99
BJ
132 while (argc > 1 && argv[1][0] == '-') {
133 switch(argv[1][1]) {
205b7e14
MH
134
135 /* -a: add pairs of macros */
b5da8a99 136 case 'a':
b5da8a99 137 i = strlen(argv[1]) - 2;
205b7e14
MH
138 if (i % 6 != 0)
139 usage();
b5da8a99
BJ
140 /* look for empty macro slots */
141 for (i=0; br[i].opbr; i++)
142 ;
143 for (cp=argv[1]+3; cp[-1]; cp += 6) {
205b7e14
MH
144 br[i].opbr = malloc(3);
145 strncpy(br[i].opbr, cp, 2);
146 br[i].clbr = malloc(3);
147 strncpy(br[i].clbr, cp+3, 2);
148 addmac(br[i].opbr); /* knows pairs are also known cmds */
149 addmac(br[i].clbr);
b5da8a99
BJ
150 i++;
151 }
152 break;
205b7e14
MH
153
154 /* -c: add known commands */
155 case 'c':
156 i = strlen(argv[1]) - 2;
157 if (i % 3 != 0)
158 usage();
159 for (cp=argv[1]+3; cp[-1]; cp += 3) {
160 if (cp[2] && cp[2] != '.')
161 usage();
162 strncpy(b1, cp, 2);
163 addmac(b1);
164 }
165 break;
166
167 /* -f: ignore font changes */
b5da8a99
BJ
168 case 'f':
169 fflag = 1;
170 break;
205b7e14
MH
171
172 /* -s: ignore size changes */
b5da8a99
BJ
173 case 's':
174 sflag = 1;
175 break;
176 default:
205b7e14 177 usage();
b5da8a99
BJ
178 }
179 argc--; argv++;
180 }
181
182 nfiles = argc - 1;
183
184 if (nfiles > 0) {
185 for (i=1; i<argc; i++) {
186 cfilename = argv[i];
187 f = fopen(cfilename, "r");
188 if (f == NULL)
189 perror(cfilename);
190 else
191 process(f);
192 }
193 } else {
194 cfilename = "stdin";
195 process(stdin);
196 }
197 exit(0);
198}
199
205b7e14
MH
200usage()
201{
202 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
203 exit(1);
204}
205
b5da8a99
BJ
206process(f)
207FILE *f;
208{
209 register int i, n;
210 char mac[5]; /* The current macro or nroff command */
211 int pl;
212
213 stktop = -1;
214 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
215 if (line[0] == '.') {
216 /*
217 * find and isolate the macro/command name.
218 */
219 strncpy(mac, line+1, 4);
220 if (isspace(mac[0])) {
221 pe(lineno);
222 printf("Empty command\n");
223 } else if (isspace(mac[1])) {
224 mac[1] = 0;
225 } else if (isspace(mac[2])) {
226 mac[2] = 0;
227 } else if (mac[2] != '\\' || mac[3] != '\"') {
228 pe(lineno);
229 printf("Command too long\n");
230 }
231
232 /*
233 * Is it a known command?
234 */
235 checkknown(mac);
236
237 /*
238 * Should we add it?
239 */
240 if (eq(mac, "de"))
241 addcmd(line);
242
243 chkcmd(line, mac);
244 }
245
246 /*
247 * At this point we process the line looking
248 * for \s and \f.
249 */
250 for (i=0; line[i]; i++)
251 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
252 if (!sflag && line[++i]=='s') {
253 pl = line[++i];
254 if (isdigit(pl)) {
255 n = pl - '0';
256 pl = ' ';
257 } else
258 n = 0;
259 while (isdigit(line[++i]))
260 n = 10 * n + line[i] - '0';
261 i--;
262 if (n == 0) {
263 if (stk[stktop].opno == SZ) {
264 stktop--;
265 } else {
266 pe(lineno);
267 printf("unmatched \\s0\n");
268 }
269 } else {
270 stk[++stktop].opno = SZ;
271 stk[stktop].pl = pl;
272 stk[stktop].parm = n;
273 stk[stktop].lno = lineno;
274 }
275 } else if (!fflag && line[i]=='f') {
276 n = line[++i];
277 if (n == 'P') {
278 if (stk[stktop].opno == FT) {
279 stktop--;
280 } else {
281 pe(lineno);
282 printf("unmatched \\fP\n");
283 }
284 } else {
285 stk[++stktop].opno = FT;
286 stk[stktop].pl = 1;
287 stk[stktop].parm = n;
288 stk[stktop].lno = lineno;
289 }
290 }
291 }
292 }
293 /*
294 * We've hit the end and look at all this stuff that hasn't been
295 * matched yet! Complain, complain.
296 */
297 for (i=stktop; i>=0; i--) {
298 complain(i);
299 }
300}
301
302complain(i)
303{
304 pe(stk[i].lno);
305 printf("Unmatched ");
306 prop(i);
307 printf("\n");
308}
309
310prop(i)
311{
312 if (stk[i].pl == 0)
313 printf(".%s", br[stk[i].opno].opbr);
314 else switch(stk[i].opno) {
315 case SZ:
316 printf("\\s%c%d", stk[i].pl, stk[i].parm);
317 break;
318 case FT:
319 printf("\\f%c", stk[i].parm);
320 break;
321 default:
322 printf("Bug: stk[%d].opno = %d = .%s, .%s",
323 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
324 }
325}
326
327chkcmd(line, mac)
328char *line;
329char *mac;
330{
331 register int i, n;
332
333 /*
334 * Check to see if it matches top of stack.
335 */
336 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
337 stktop--; /* OK. Pop & forget */
338 else {
339 /* No. Maybe it's an opener */
340 for (i=0; br[i].opbr; i++) {
341 if (eq(mac, br[i].opbr)) {
342 /* Found. Push it. */
343 stktop++;
344 stk[stktop].opno = i;
345 stk[stktop].pl = 0;
346 stk[stktop].parm = 0;
347 stk[stktop].lno = lineno;
348 break;
349 }
350 /*
351 * Maybe it's an unmatched closer.
352 * NOTE: this depends on the fact
353 * that none of the closers can be
354 * openers too.
355 */
356 if (eq(mac, br[i].clbr)) {
357 nomatch(mac);
358 break;
359 }
360 }
361 }
362}
363
364nomatch(mac)
365char *mac;
366{
367 register int i, j;
368
369 /*
370 * Look for a match further down on stack
371 * If we find one, it suggests that the stuff in
372 * between is supposed to match itself.
373 */
374 for (j=stktop; j>=0; j--)
375 if (eq(mac,br[stk[j].opno].clbr)) {
376 /* Found. Make a good diagnostic. */
377 if (j == stktop-2) {
378 /*
379 * Check for special case \fx..\fR and don't
380 * complain.
381 */
382 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
383 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
384 stktop = j -1;
385 return;
386 }
387 /*
388 * We have two unmatched frobs. Chances are
389 * they were intended to match, so we mention
390 * them together.
391 */
392 pe(stk[j+1].lno);
393 prop(j+1);
394 printf(" does not match %d: ", stk[j+2].lno);
395 prop(j+2);
396 printf("\n");
397 } else for (i=j+1; i <= stktop; i++) {
398 complain(i);
399 }
400 stktop = j-1;
401 return;
402 }
403 /* Didn't find one. Throw this away. */
404 pe(lineno);
405 printf("Unmatched .%s\n", mac);
406}
407
408/* eq: are two strings equal? */
409eq(s1, s2)
410char *s1, *s2;
411{
412 return (strcmp(s1, s2) == 0);
413}
414
415/* print the first part of an error message, given the line number */
416pe(lineno)
417int lineno;
418{
419 if (nfiles > 1)
420 printf("%s: ", cfilename);
421 printf("%d: ", lineno);
422}
423
424checkknown(mac)
425char *mac;
426{
b5da8a99
BJ
427
428 if (eq(mac, "."))
429 return;
430 if (binsrch(mac) >= 0)
431 return;
432
433 pe(lineno);
434 printf("Unknown command: .%s\n", mac);
435}
436
437/*
438 * We have a .de xx line in "line". Add xx to the list of known commands.
439 */
440addcmd(line)
441char *line;
442{
443 char *mac;
b5da8a99
BJ
444
445 /* grab the macro being defined */
446 mac = line+4;
447 while (isspace(*mac))
448 mac++;
449 if (*mac == 0) {
450 pe(lineno);
451 printf("illegal define: %s\n", line);
452 return;
453 }
454 mac[2] = 0;
455 if (isspace(mac[1]) || mac[1] == '\\')
456 mac[1] = 0;
457 if (ncmds >= MAXCMDS) {
458 printf("Only %d known commands allowed\n", MAXCMDS);
459 exit(1);
460 }
205b7e14
MH
461 addmac(mac);
462}
463
464/*
465 * Add mac to the list. We should really have some kind of tree
466 * structure here but this is a quick-and-dirty job and I just don't
467 * have time to mess with it. (I wonder if this will come back to haunt
468 * me someday?) Anyway, I claim that .de is fairly rare in user
469 * nroff programs, and the register loop below is pretty fast.
470 */
471addmac(mac)
472char *mac;
473{
474 register char **src, **dest, **loc;
475
b5da8a99
BJ
476 binsrch(mac); /* it's OK to redefine something */
477 /* binsrch sets slot as a side effect */
205b7e14
MH
478#ifdef DEBUG
479printf("binsrch(%s) -> %d\n", mac, slot);
480#endif
b5da8a99
BJ
481 loc = &knowncmds[slot];
482 src = &knowncmds[ncmds-1];
483 dest = src+1;
484 while (dest > loc)
485 *dest-- = *src--;
486 *loc = malloc(3);
487 strcpy(*loc, mac);
488 ncmds++;
205b7e14
MH
489#ifdef DEBUG
490printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
491#endif
b5da8a99
BJ
492}
493
494/*
495 * Do a binary search in knowncmds for mac.
496 * If found, return the index. If not, return -1.
497 */
498binsrch(mac)
499char *mac;
500{
501 register char *p; /* pointer to current cmd in list */
502 register int d; /* difference if any */
503 register int mid; /* mid point in binary search */
504 register int top, bot; /* boundaries of bin search, inclusive */
505
506 top = ncmds-1;
507 bot = 0;
508 while (top >= bot) {
509 mid = (top+bot)/2;
510 p = knowncmds[mid];
511 d = p[0] - mac[0];
512 if (d == 0)
513 d = p[1] - mac[1];
514 if (d == 0)
515 return mid;
516 if (d < 0)
517 bot = mid + 1;
518 else
519 top = mid - 1;
520 }
521 slot = bot; /* place it would have gone */
522 return -1;
523}