manual page distributed with 4.1BSD
[unix-history] / usr / src / usr.bin / checknr / checknr.c
CommitLineData
94a7a178 1#ifndef lint
6f145a48 2static char sccsid[] = "@(#)checknr.c 4.7 (Berkeley) %G%";
94a7a178 3#endif
b5da8a99
BJ
4/*
5 * checknr: check an nroff/troff input file for matching macro calls.
6 * we also attempt to match size and font changes, but only the embedded
7 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
8 * later but for now think of these restrictions as contributions to
9 * structured typesetting.
10 */
11#include <stdio.h>
12#include <ctype.h>
13
14#define MAXSTK 100 /* Stack size */
15#define MAXBR 100 /* Max number of bracket pairs known */
16#define MAXCMDS 500 /* Max number of commands known */
17
18/*
19 * The stack on which we remember what we've seen so far.
20 */
21struct stkstr {
22 int opno; /* number of opening bracket */
23 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
24 int parm; /* parm to size, font, etc */
25 int lno; /* line number the thing came in in */
26} stk[MAXSTK];
27int stktop;
28
29/*
30 * The kinds of opening and closing brackets.
31 */
32struct brstr {
33 char *opbr;
34 char *clbr;
35} br[MAXBR] = {
36 /* A few bare bones troff commands */
37#define SZ 0
38 "sz", "sz", /* also \s */
39#define FT 1
40 "ft", "ft", /* also \f */
e1062a05
BJ
41 /* the -mm package */
42 "AL", "LE",
43 "AS", "AE",
44 "BL", "LE",
45 "BS", "BE",
46 "DF", "DE",
47 "DL", "LE",
48 "DS", "DE",
49 "FS", "FE",
50 "ML", "LE",
51 "NS", "NE",
52 "RL", "LE",
53 "VL", "LE",
b5da8a99
BJ
54 /* the -ms package */
55 "AB", "AE",
94a7a178 56 "BD", "DE",
b5da8a99 57 "CD", "DE",
e1062a05
BJ
58 "DS", "DE",
59 "FS", "FE",
b5da8a99 60 "ID", "DE",
b5da8a99 61 "KF", "KE",
e1062a05
BJ
62 "KS", "KE",
63 "LD", "DE",
64 "LG", "NL",
b5da8a99 65 "QS", "QE",
e1062a05
BJ
66 "RS", "RE",
67 "SM", "NL",
94a7a178
SL
68 "XA", "XE",
69 "XS", "XE",
b5da8a99 70 /* The -me package */
b5da8a99 71 "(b", ")b",
b5da8a99
BJ
72 "(c", ")c",
73 "(d", ")d",
74 "(f", ")f",
e1062a05
BJ
75 "(l", ")l",
76 "(q", ")q",
b5da8a99 77 "(x", ")x",
e1062a05
BJ
78 "(z", ")z",
79 /* Things needed by preprocessors */
80 "EQ", "EN",
81 "TS", "TE",
82 /* Refer */
83 "[", "]",
b5da8a99
BJ
84 0, 0
85};
86
87/*
e1062a05 88 * All commands known to nroff, plus macro packages.
b5da8a99
BJ
89 * Used so we can complain about unrecognized commands.
90 */
91char *knowncmds[MAXCMDS] = {
e1062a05
BJ
92"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
93"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
94"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
95"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
94a7a178
SL
96"AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
97"BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
98"D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
99"EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
100"FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
101"IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
102"LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
103"MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
104"P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
105"RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
106"SG", "SH", "SK", "SM", "SP", "SY", "TA", "TB", "TC", "TD", "TE", "TH",
107"TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", "WC",
108"WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", "[1",
109"[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", "][",
110"ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", "bi",
111"bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct",
112"cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef",
113"eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp",
114"ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", "ie",
115"if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", "lo",
116"lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", "n2",
117"na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", "of",
118"oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", "q",
119"r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", "sc",
120"sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", "ti",
121"tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", "yr",
b5da8a99
BJ
1220
123};
124
125int lineno; /* current line number in input file */
126char line[256]; /* the current line */
127char *cfilename; /* name of current file */
128int nfiles; /* number of files to process */
129int fflag; /* -f: ignore \f */
130int sflag; /* -s: ignore \s */
131int ncmds; /* size of knowncmds */
132int slot; /* slot in knowncmds found by binsrch */
133
134char *malloc();
135
136main(argc, argv)
137int argc;
138char **argv;
139{
140 FILE *f;
141 int i;
142 char *cp;
205b7e14 143 char b1[4];
b5da8a99 144
205b7e14
MH
145 /* Figure out how many known commands there are */
146 while (knowncmds[ncmds])
147 ncmds++;
b5da8a99
BJ
148 while (argc > 1 && argv[1][0] == '-') {
149 switch(argv[1][1]) {
205b7e14
MH
150
151 /* -a: add pairs of macros */
b5da8a99 152 case 'a':
b5da8a99 153 i = strlen(argv[1]) - 2;
205b7e14
MH
154 if (i % 6 != 0)
155 usage();
b5da8a99
BJ
156 /* look for empty macro slots */
157 for (i=0; br[i].opbr; i++)
158 ;
159 for (cp=argv[1]+3; cp[-1]; cp += 6) {
205b7e14
MH
160 br[i].opbr = malloc(3);
161 strncpy(br[i].opbr, cp, 2);
162 br[i].clbr = malloc(3);
163 strncpy(br[i].clbr, cp+3, 2);
164 addmac(br[i].opbr); /* knows pairs are also known cmds */
165 addmac(br[i].clbr);
b5da8a99
BJ
166 i++;
167 }
168 break;
205b7e14
MH
169
170 /* -c: add known commands */
171 case 'c':
172 i = strlen(argv[1]) - 2;
173 if (i % 3 != 0)
174 usage();
175 for (cp=argv[1]+3; cp[-1]; cp += 3) {
176 if (cp[2] && cp[2] != '.')
177 usage();
178 strncpy(b1, cp, 2);
179 addmac(b1);
180 }
181 break;
182
183 /* -f: ignore font changes */
b5da8a99
BJ
184 case 'f':
185 fflag = 1;
186 break;
205b7e14
MH
187
188 /* -s: ignore size changes */
b5da8a99
BJ
189 case 's':
190 sflag = 1;
191 break;
192 default:
205b7e14 193 usage();
b5da8a99
BJ
194 }
195 argc--; argv++;
196 }
197
198 nfiles = argc - 1;
199
200 if (nfiles > 0) {
201 for (i=1; i<argc; i++) {
202 cfilename = argv[i];
203 f = fopen(cfilename, "r");
204 if (f == NULL)
205 perror(cfilename);
206 else
207 process(f);
208 }
209 } else {
210 cfilename = "stdin";
211 process(stdin);
212 }
213 exit(0);
214}
215
205b7e14
MH
216usage()
217{
218 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
219 exit(1);
220}
221
b5da8a99
BJ
222process(f)
223FILE *f;
224{
225 register int i, n;
226 char mac[5]; /* The current macro or nroff command */
227 int pl;
228
229 stktop = -1;
230 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
231 if (line[0] == '.') {
232 /*
233 * find and isolate the macro/command name.
234 */
235 strncpy(mac, line+1, 4);
236 if (isspace(mac[0])) {
237 pe(lineno);
238 printf("Empty command\n");
239 } else if (isspace(mac[1])) {
240 mac[1] = 0;
241 } else if (isspace(mac[2])) {
242 mac[2] = 0;
e1062a05 243 } else if (mac[0] != '\\' || mac[1] != '\"') {
b5da8a99
BJ
244 pe(lineno);
245 printf("Command too long\n");
246 }
247
248 /*
249 * Is it a known command?
250 */
251 checkknown(mac);
252
253 /*
254 * Should we add it?
255 */
256 if (eq(mac, "de"))
257 addcmd(line);
258
259 chkcmd(line, mac);
260 }
261
262 /*
263 * At this point we process the line looking
264 * for \s and \f.
265 */
266 for (i=0; line[i]; i++)
267 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
268 if (!sflag && line[++i]=='s') {
269 pl = line[++i];
270 if (isdigit(pl)) {
271 n = pl - '0';
272 pl = ' ';
273 } else
274 n = 0;
275 while (isdigit(line[++i]))
276 n = 10 * n + line[i] - '0';
277 i--;
278 if (n == 0) {
279 if (stk[stktop].opno == SZ) {
280 stktop--;
281 } else {
282 pe(lineno);
283 printf("unmatched \\s0\n");
284 }
285 } else {
286 stk[++stktop].opno = SZ;
287 stk[stktop].pl = pl;
288 stk[stktop].parm = n;
289 stk[stktop].lno = lineno;
290 }
291 } else if (!fflag && line[i]=='f') {
292 n = line[++i];
293 if (n == 'P') {
294 if (stk[stktop].opno == FT) {
295 stktop--;
296 } else {
297 pe(lineno);
298 printf("unmatched \\fP\n");
299 }
300 } else {
301 stk[++stktop].opno = FT;
302 stk[stktop].pl = 1;
303 stk[stktop].parm = n;
304 stk[stktop].lno = lineno;
305 }
306 }
307 }
308 }
309 /*
310 * We've hit the end and look at all this stuff that hasn't been
311 * matched yet! Complain, complain.
312 */
313 for (i=stktop; i>=0; i--) {
314 complain(i);
315 }
316}
317
318complain(i)
319{
320 pe(stk[i].lno);
321 printf("Unmatched ");
322 prop(i);
323 printf("\n");
324}
325
326prop(i)
327{
328 if (stk[i].pl == 0)
329 printf(".%s", br[stk[i].opno].opbr);
330 else switch(stk[i].opno) {
331 case SZ:
332 printf("\\s%c%d", stk[i].pl, stk[i].parm);
333 break;
334 case FT:
335 printf("\\f%c", stk[i].parm);
336 break;
337 default:
338 printf("Bug: stk[%d].opno = %d = .%s, .%s",
339 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
340 }
341}
342
343chkcmd(line, mac)
344char *line;
345char *mac;
346{
347 register int i, n;
348
349 /*
350 * Check to see if it matches top of stack.
351 */
352 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
353 stktop--; /* OK. Pop & forget */
354 else {
355 /* No. Maybe it's an opener */
356 for (i=0; br[i].opbr; i++) {
357 if (eq(mac, br[i].opbr)) {
358 /* Found. Push it. */
359 stktop++;
360 stk[stktop].opno = i;
361 stk[stktop].pl = 0;
362 stk[stktop].parm = 0;
363 stk[stktop].lno = lineno;
364 break;
365 }
366 /*
367 * Maybe it's an unmatched closer.
368 * NOTE: this depends on the fact
369 * that none of the closers can be
370 * openers too.
371 */
372 if (eq(mac, br[i].clbr)) {
373 nomatch(mac);
374 break;
375 }
376 }
377 }
378}
379
380nomatch(mac)
381char *mac;
382{
383 register int i, j;
384
385 /*
386 * Look for a match further down on stack
387 * If we find one, it suggests that the stuff in
388 * between is supposed to match itself.
389 */
390 for (j=stktop; j>=0; j--)
391 if (eq(mac,br[stk[j].opno].clbr)) {
392 /* Found. Make a good diagnostic. */
393 if (j == stktop-2) {
394 /*
395 * Check for special case \fx..\fR and don't
396 * complain.
397 */
398 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
399 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
400 stktop = j -1;
401 return;
402 }
403 /*
404 * We have two unmatched frobs. Chances are
405 * they were intended to match, so we mention
406 * them together.
407 */
408 pe(stk[j+1].lno);
409 prop(j+1);
410 printf(" does not match %d: ", stk[j+2].lno);
411 prop(j+2);
412 printf("\n");
413 } else for (i=j+1; i <= stktop; i++) {
414 complain(i);
415 }
416 stktop = j-1;
417 return;
418 }
419 /* Didn't find one. Throw this away. */
420 pe(lineno);
421 printf("Unmatched .%s\n", mac);
422}
423
424/* eq: are two strings equal? */
425eq(s1, s2)
426char *s1, *s2;
427{
428 return (strcmp(s1, s2) == 0);
429}
430
431/* print the first part of an error message, given the line number */
432pe(lineno)
433int lineno;
434{
435 if (nfiles > 1)
436 printf("%s: ", cfilename);
437 printf("%d: ", lineno);
438}
439
440checkknown(mac)
441char *mac;
442{
b5da8a99
BJ
443
444 if (eq(mac, "."))
445 return;
446 if (binsrch(mac) >= 0)
447 return;
e1062a05
BJ
448 if (mac[0] == '\\' && mac[1] == '"') /* comments */
449 return;
b5da8a99
BJ
450
451 pe(lineno);
452 printf("Unknown command: .%s\n", mac);
453}
454
455/*
456 * We have a .de xx line in "line". Add xx to the list of known commands.
457 */
458addcmd(line)
459char *line;
460{
461 char *mac;
b5da8a99
BJ
462
463 /* grab the macro being defined */
464 mac = line+4;
465 while (isspace(*mac))
466 mac++;
467 if (*mac == 0) {
468 pe(lineno);
469 printf("illegal define: %s\n", line);
470 return;
471 }
472 mac[2] = 0;
473 if (isspace(mac[1]) || mac[1] == '\\')
474 mac[1] = 0;
475 if (ncmds >= MAXCMDS) {
476 printf("Only %d known commands allowed\n", MAXCMDS);
477 exit(1);
478 }
205b7e14
MH
479 addmac(mac);
480}
481
482/*
483 * Add mac to the list. We should really have some kind of tree
484 * structure here but this is a quick-and-dirty job and I just don't
485 * have time to mess with it. (I wonder if this will come back to haunt
486 * me someday?) Anyway, I claim that .de is fairly rare in user
487 * nroff programs, and the register loop below is pretty fast.
488 */
489addmac(mac)
490char *mac;
491{
492 register char **src, **dest, **loc;
493
6f145a48
RH
494 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
495#ifdef DEBUG
496 printf("binsrch(%s) -> already in table\n", mac);
497#endif DEBUG
498 return;
499 }
b5da8a99 500 /* binsrch sets slot as a side effect */
205b7e14
MH
501#ifdef DEBUG
502printf("binsrch(%s) -> %d\n", mac, slot);
503#endif
b5da8a99
BJ
504 loc = &knowncmds[slot];
505 src = &knowncmds[ncmds-1];
506 dest = src+1;
507 while (dest > loc)
508 *dest-- = *src--;
509 *loc = malloc(3);
510 strcpy(*loc, mac);
511 ncmds++;
205b7e14
MH
512#ifdef DEBUG
513printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
514#endif
b5da8a99
BJ
515}
516
517/*
518 * Do a binary search in knowncmds for mac.
519 * If found, return the index. If not, return -1.
520 */
521binsrch(mac)
522char *mac;
523{
524 register char *p; /* pointer to current cmd in list */
525 register int d; /* difference if any */
526 register int mid; /* mid point in binary search */
527 register int top, bot; /* boundaries of bin search, inclusive */
528
529 top = ncmds-1;
530 bot = 0;
531 while (top >= bot) {
532 mid = (top+bot)/2;
533 p = knowncmds[mid];
534 d = p[0] - mac[0];
535 if (d == 0)
536 d = p[1] - mac[1];
537 if (d == 0)
538 return mid;
539 if (d < 0)
540 bot = mid + 1;
541 else
542 top = mid - 1;
543 }
544 slot = bot; /* place it would have gone */
545 return -1;
546}