date and time created 80/10/01 17:27:12 by bill
[unix-history] / usr / src / usr.bin / checknr / checknr.c
CommitLineData
b5da8a99
BJ
1static char *sccsid = "@(#)checknr.c 4.1 (Berkeley) %G%";
2/*
3 * checknr: check an nroff/troff input file for matching macro calls.
4 * we also attempt to match size and font changes, but only the embedded
5 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
6 * later but for now think of these restrictions as contributions to
7 * structured typesetting.
8 */
9#include <stdio.h>
10#include <ctype.h>
11
12#define MAXSTK 100 /* Stack size */
13#define MAXBR 100 /* Max number of bracket pairs known */
14#define MAXCMDS 500 /* Max number of commands known */
15
16/*
17 * The stack on which we remember what we've seen so far.
18 */
19struct stkstr {
20 int opno; /* number of opening bracket */
21 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
22 int parm; /* parm to size, font, etc */
23 int lno; /* line number the thing came in in */
24} stk[MAXSTK];
25int stktop;
26
27/*
28 * The kinds of opening and closing brackets.
29 */
30struct brstr {
31 char *opbr;
32 char *clbr;
33} br[MAXBR] = {
34 /* A few bare bones troff commands */
35#define SZ 0
36 "sz", "sz", /* also \s */
37#define FT 1
38 "ft", "ft", /* also \f */
39 /* the -ms package */
40 "AB", "AE",
41 "RS", "RE",
42 "LG", "NL",
43 "SM", "NL",
44 "FS", "FE",
45 "DS", "DE",
46 "CD", "DE",
47 "LD", "DE",
48 "ID", "DE",
49 "KS", "KE",
50 "KF", "KE",
51 "QS", "QE",
52 /* Things needed by preprocessors */
53 "TS", "TE",
54 "EQ", "EN",
55 /* The -me package */
56 "(l", ")l",
57 "(q", ")q",
58 "(b", ")b",
59 "(z", ")z",
60 "(c", ")c",
61 "(d", ")d",
62 "(f", ")f",
63 "(x", ")x",
64 0, 0
65};
66
67/*
68 * All commands known to nroff, plus ms and me.
69 * Used so we can complain about unrecognized commands.
70 */
71char *knowncmds[MAXCMDS] = {
72"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l",
73"(q", "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q",
74")t", ")x", ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(",
75"@)", "@C", "@D", "@F", "@I", "@M", "@c", "@e", "@f", "@h",
76"@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", "AB", "AE",
77"AE", "AI", "AI", "AT", "AU", "AU", "AX", "B", "B1", "B2",
78"BD", "BG", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D",
79"DA", "DE", "DF", "DS", "EG", "EM", "EN", "EQ", "EQ", "FA",
80"FE", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS", "FV", "FX",
81"HO", "I", "ID", "IE", "IH", "IM", "IP", "IZ", "KD", "KE",
82"KF", "KQ", "KS", "LB", "LD", "LG", "LP", "MC", "ME", "MF",
83"MH", "MR", "ND", "NH", "NL", "NP", "OK", "PP", "PT", "PY",
84"QE", "QP", "QS", "R", "RA", "RC", "RE", "RP", "RQ", "RS",
85"RT", "S0", "S2", "S3", "SG", "SH", "SM", "SY", "TA", "TC",
86"TD", "TE", "TH", "TL", "TL", "TM", "TQ", "TR", "TS", "TS",
87"TX", "UL", "US", "UX", "WH", "XD", "XF", "XK", "XP", "[-",
88"[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]-",
89"]<", "]>", "][", "ab", "ac", "ad", "af", "am", "ar", "as",
90"b", "ba", "bc", "bd", "bi", "bl", "bp", "bp", "br", "bx",
91"c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da",
92"de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef",
93"eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl",
94"fo", "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw",
95"hx", "hy", "i", "ie", "if", "ig", "in", "ip", "it", "ix",
96"lc", "lg", "li", "ll", "ll", "ln", "lo", "lp", "ls", "lt",
97"m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", "n2", "na",
98"ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
99"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po",
100"po", "pp", "ps", "q", "r", "rb", "rd", "re", "re", "rm",
101"rn", "ro", "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so",
102"sp", "ss", "st", "sv", "sz", "ta", "tc", "th", "ti", "tl",
103"tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "yr",
1040
105};
106
107int lineno; /* current line number in input file */
108char line[256]; /* the current line */
109char *cfilename; /* name of current file */
110int nfiles; /* number of files to process */
111int fflag; /* -f: ignore \f */
112int sflag; /* -s: ignore \s */
113int ncmds; /* size of knowncmds */
114int slot; /* slot in knowncmds found by binsrch */
115
116char *malloc();
117
118main(argc, argv)
119int argc;
120char **argv;
121{
122 FILE *f;
123 int i;
124 char *cp;
125
126 if (argc <= 1)
127 goto usage;
128 while (argc > 1 && argv[1][0] == '-') {
129 switch(argv[1][1]) {
130 case 'a':
131 /* -a: add pairs of macros */
132 i = strlen(argv[1]) - 2;
133 if (i % 6 != 0) {
134usage:
135 printf("Usage: nrc -s -f -a.xx.yy.xx.yy... (.xx, .yy)\n");
136 break;
137 }
138 /* look for empty macro slots */
139 for (i=0; br[i].opbr; i++)
140 ;
141 for (cp=argv[1]+3; cp[-1]; cp += 6) {
142 br[i].opbr = cp;
143 br[i].clbr = cp+3;
144 cp[2] = cp[5] = 0;
145 i++;
146 }
147 break;
148 case 'f':
149 fflag = 1;
150 break;
151 case 's':
152 sflag = 1;
153 break;
154 default:
155 printf("Illegal flag: %s\n", argv[1]);
156 break;
157 }
158 argc--; argv++;
159 }
160
161 nfiles = argc - 1;
162
163 if (nfiles > 0) {
164 for (i=1; i<argc; i++) {
165 cfilename = argv[i];
166 f = fopen(cfilename, "r");
167 if (f == NULL)
168 perror(cfilename);
169 else
170 process(f);
171 }
172 } else {
173 cfilename = "stdin";
174 process(stdin);
175 }
176 exit(0);
177}
178
179process(f)
180FILE *f;
181{
182 register int i, n;
183 char mac[5]; /* The current macro or nroff command */
184 int pl;
185
186 stktop = -1;
187 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
188 if (line[0] == '.') {
189 /*
190 * find and isolate the macro/command name.
191 */
192 strncpy(mac, line+1, 4);
193 if (isspace(mac[0])) {
194 pe(lineno);
195 printf("Empty command\n");
196 } else if (isspace(mac[1])) {
197 mac[1] = 0;
198 } else if (isspace(mac[2])) {
199 mac[2] = 0;
200 } else if (mac[2] != '\\' || mac[3] != '\"') {
201 pe(lineno);
202 printf("Command too long\n");
203 }
204
205 /*
206 * Is it a known command?
207 */
208 checkknown(mac);
209
210 /*
211 * Should we add it?
212 */
213 if (eq(mac, "de"))
214 addcmd(line);
215
216 chkcmd(line, mac);
217 }
218
219 /*
220 * At this point we process the line looking
221 * for \s and \f.
222 */
223 for (i=0; line[i]; i++)
224 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
225 if (!sflag && line[++i]=='s') {
226 pl = line[++i];
227 if (isdigit(pl)) {
228 n = pl - '0';
229 pl = ' ';
230 } else
231 n = 0;
232 while (isdigit(line[++i]))
233 n = 10 * n + line[i] - '0';
234 i--;
235 if (n == 0) {
236 if (stk[stktop].opno == SZ) {
237 stktop--;
238 } else {
239 pe(lineno);
240 printf("unmatched \\s0\n");
241 }
242 } else {
243 stk[++stktop].opno = SZ;
244 stk[stktop].pl = pl;
245 stk[stktop].parm = n;
246 stk[stktop].lno = lineno;
247 }
248 } else if (!fflag && line[i]=='f') {
249 n = line[++i];
250 if (n == 'P') {
251 if (stk[stktop].opno == FT) {
252 stktop--;
253 } else {
254 pe(lineno);
255 printf("unmatched \\fP\n");
256 }
257 } else {
258 stk[++stktop].opno = FT;
259 stk[stktop].pl = 1;
260 stk[stktop].parm = n;
261 stk[stktop].lno = lineno;
262 }
263 }
264 }
265 }
266 /*
267 * We've hit the end and look at all this stuff that hasn't been
268 * matched yet! Complain, complain.
269 */
270 for (i=stktop; i>=0; i--) {
271 complain(i);
272 }
273}
274
275complain(i)
276{
277 pe(stk[i].lno);
278 printf("Unmatched ");
279 prop(i);
280 printf("\n");
281}
282
283prop(i)
284{
285 if (stk[i].pl == 0)
286 printf(".%s", br[stk[i].opno].opbr);
287 else switch(stk[i].opno) {
288 case SZ:
289 printf("\\s%c%d", stk[i].pl, stk[i].parm);
290 break;
291 case FT:
292 printf("\\f%c", stk[i].parm);
293 break;
294 default:
295 printf("Bug: stk[%d].opno = %d = .%s, .%s",
296 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
297 }
298}
299
300chkcmd(line, mac)
301char *line;
302char *mac;
303{
304 register int i, n;
305
306 /*
307 * Check to see if it matches top of stack.
308 */
309 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
310 stktop--; /* OK. Pop & forget */
311 else {
312 /* No. Maybe it's an opener */
313 for (i=0; br[i].opbr; i++) {
314 if (eq(mac, br[i].opbr)) {
315 /* Found. Push it. */
316 stktop++;
317 stk[stktop].opno = i;
318 stk[stktop].pl = 0;
319 stk[stktop].parm = 0;
320 stk[stktop].lno = lineno;
321 break;
322 }
323 /*
324 * Maybe it's an unmatched closer.
325 * NOTE: this depends on the fact
326 * that none of the closers can be
327 * openers too.
328 */
329 if (eq(mac, br[i].clbr)) {
330 nomatch(mac);
331 break;
332 }
333 }
334 }
335}
336
337nomatch(mac)
338char *mac;
339{
340 register int i, j;
341
342 /*
343 * Look for a match further down on stack
344 * If we find one, it suggests that the stuff in
345 * between is supposed to match itself.
346 */
347 for (j=stktop; j>=0; j--)
348 if (eq(mac,br[stk[j].opno].clbr)) {
349 /* Found. Make a good diagnostic. */
350 if (j == stktop-2) {
351 /*
352 * Check for special case \fx..\fR and don't
353 * complain.
354 */
355 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
356 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
357 stktop = j -1;
358 return;
359 }
360 /*
361 * We have two unmatched frobs. Chances are
362 * they were intended to match, so we mention
363 * them together.
364 */
365 pe(stk[j+1].lno);
366 prop(j+1);
367 printf(" does not match %d: ", stk[j+2].lno);
368 prop(j+2);
369 printf("\n");
370 } else for (i=j+1; i <= stktop; i++) {
371 complain(i);
372 }
373 stktop = j-1;
374 return;
375 }
376 /* Didn't find one. Throw this away. */
377 pe(lineno);
378 printf("Unmatched .%s\n", mac);
379}
380
381/* eq: are two strings equal? */
382eq(s1, s2)
383char *s1, *s2;
384{
385 return (strcmp(s1, s2) == 0);
386}
387
388/* print the first part of an error message, given the line number */
389pe(lineno)
390int lineno;
391{
392 if (nfiles > 1)
393 printf("%s: ", cfilename);
394 printf("%d: ", lineno);
395}
396
397checkknown(mac)
398char *mac;
399{
400 /* First time figure out ncmds. */
401 if (ncmds == 0) {
402 while (knowncmds[ncmds])
403 ncmds++;
404 }
405
406 if (eq(mac, "."))
407 return;
408 if (binsrch(mac) >= 0)
409 return;
410
411 pe(lineno);
412 printf("Unknown command: .%s\n", mac);
413}
414
415/*
416 * We have a .de xx line in "line". Add xx to the list of known commands.
417 */
418addcmd(line)
419char *line;
420{
421 char *mac;
422 register char **src, **dest, **loc;
423
424 /* grab the macro being defined */
425 mac = line+4;
426 while (isspace(*mac))
427 mac++;
428 if (*mac == 0) {
429 pe(lineno);
430 printf("illegal define: %s\n", line);
431 return;
432 }
433 mac[2] = 0;
434 if (isspace(mac[1]) || mac[1] == '\\')
435 mac[1] = 0;
436 if (ncmds >= MAXCMDS) {
437 printf("Only %d known commands allowed\n", MAXCMDS);
438 exit(1);
439 }
440
441 /*
442 * Add mac to the list. We should really have some kind of tree
443 * structure here but this is a quick-and-dirty job and I just don't
444 * have time to mess with it. (I wonder if this will come back to haunt
445 * me someday?) Anyway, I claim that .de is fairly rare in user
446 * nroff programs, and the register loop below is pretty fast.
447 */
448 binsrch(mac); /* it's OK to redefine something */
449 /* binsrch sets slot as a side effect */
450 loc = &knowncmds[slot];
451 src = &knowncmds[ncmds-1];
452 dest = src+1;
453 while (dest > loc)
454 *dest-- = *src--;
455 *loc = malloc(3);
456 strcpy(*loc, mac);
457 ncmds++;
458}
459
460/*
461 * Do a binary search in knowncmds for mac.
462 * If found, return the index. If not, return -1.
463 */
464binsrch(mac)
465char *mac;
466{
467 register char *p; /* pointer to current cmd in list */
468 register int d; /* difference if any */
469 register int mid; /* mid point in binary search */
470 register int top, bot; /* boundaries of bin search, inclusive */
471
472 top = ncmds-1;
473 bot = 0;
474 while (top >= bot) {
475 mid = (top+bot)/2;
476 p = knowncmds[mid];
477 d = p[0] - mac[0];
478 if (d == 0)
479 d = p[1] - mac[1];
480 if (d == 0)
481 return mid;
482 if (d < 0)
483 bot = mid + 1;
484 else
485 top = mid - 1;
486 }
487 slot = bot; /* place it would have gone */
488 return -1;
489}