386BSD 0.1 development
[unix-history] / usr / src / usr.bin / checknr / checknr.c
CommitLineData
60b9c768
WJ
1/*
2 * Copyright (c) 1980 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35char copyright[] =
36"@(#) Copyright (c) 1980 The Regents of the University of California.\n\
37 All rights reserved.\n";
38#endif /* not lint */
39
40#ifndef lint
41static char sccsid[] = "@(#)checknr.c 5.4 (Berkeley) 6/1/90";
42#endif /* not lint */
43
44/*
45 * checknr: check an nroff/troff input file for matching macro calls.
46 * we also attempt to match size and font changes, but only the embedded
47 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
48 * later but for now think of these restrictions as contributions to
49 * structured typesetting.
50 */
51#include <stdio.h>
52#include <ctype.h>
53
54#define MAXSTK 100 /* Stack size */
55#define MAXBR 100 /* Max number of bracket pairs known */
56#define MAXCMDS 500 /* Max number of commands known */
57
58/*
59 * The stack on which we remember what we've seen so far.
60 */
61struct stkstr {
62 int opno; /* number of opening bracket */
63 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
64 int parm; /* parm to size, font, etc */
65 int lno; /* line number the thing came in in */
66} stk[MAXSTK];
67int stktop;
68
69/*
70 * The kinds of opening and closing brackets.
71 */
72struct brstr {
73 char *opbr;
74 char *clbr;
75} br[MAXBR] = {
76 /* A few bare bones troff commands */
77#define SZ 0
78 "sz", "sz", /* also \s */
79#define FT 1
80 "ft", "ft", /* also \f */
81 /* the -mm package */
82 "AL", "LE",
83 "AS", "AE",
84 "BL", "LE",
85 "BS", "BE",
86 "DF", "DE",
87 "DL", "LE",
88 "DS", "DE",
89 "FS", "FE",
90 "ML", "LE",
91 "NS", "NE",
92 "RL", "LE",
93 "VL", "LE",
94 /* the -ms package */
95 "AB", "AE",
96 "BD", "DE",
97 "CD", "DE",
98 "DS", "DE",
99 "FS", "FE",
100 "ID", "DE",
101 "KF", "KE",
102 "KS", "KE",
103 "LD", "DE",
104 "LG", "NL",
105 "QS", "QE",
106 "RS", "RE",
107 "SM", "NL",
108 "XA", "XE",
109 "XS", "XE",
110 /* The -me package */
111 "(b", ")b",
112 "(c", ")c",
113 "(d", ")d",
114 "(f", ")f",
115 "(l", ")l",
116 "(q", ")q",
117 "(x", ")x",
118 "(z", ")z",
119 /* Things needed by preprocessors */
120 "EQ", "EN",
121 "TS", "TE",
122 /* Refer */
123 "[", "]",
124 0, 0
125};
126
127/*
128 * All commands known to nroff, plus macro packages.
129 * Used so we can complain about unrecognized commands.
130 */
131char *knowncmds[MAXCMDS] = {
132"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
133"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
134"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
135"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
136"AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
137"BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
138"D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
139"EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
140"FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
141"IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
142"LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
143"MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
144"P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
145"RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
146"SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
147"TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
148"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
149"[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
150"][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
151"bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
152"ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
153"ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
154"fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
155"ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
156"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
157"n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
158"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
159"q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
160"sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
161"ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
162"yr", 0
163};
164
165int lineno; /* current line number in input file */
166char line[256]; /* the current line */
167char *cfilename; /* name of current file */
168int nfiles; /* number of files to process */
169int fflag; /* -f: ignore \f */
170int sflag; /* -s: ignore \s */
171int ncmds; /* size of knowncmds */
172int slot; /* slot in knowncmds found by binsrch */
173
174char *malloc();
175
176main(argc, argv)
177int argc;
178char **argv;
179{
180 FILE *f;
181 int i;
182 char *cp;
183 char b1[4];
184
185 /* Figure out how many known commands there are */
186 while (knowncmds[ncmds])
187 ncmds++;
188 while (argc > 1 && argv[1][0] == '-') {
189 switch(argv[1][1]) {
190
191 /* -a: add pairs of macros */
192 case 'a':
193 i = strlen(argv[1]) - 2;
194 if (i % 6 != 0)
195 usage();
196 /* look for empty macro slots */
197 for (i=0; br[i].opbr; i++)
198 ;
199 for (cp=argv[1]+3; cp[-1]; cp += 6) {
200 br[i].opbr = malloc(3);
201 strncpy(br[i].opbr, cp, 2);
202 br[i].clbr = malloc(3);
203 strncpy(br[i].clbr, cp+3, 2);
204 addmac(br[i].opbr); /* knows pairs are also known cmds */
205 addmac(br[i].clbr);
206 i++;
207 }
208 break;
209
210 /* -c: add known commands */
211 case 'c':
212 i = strlen(argv[1]) - 2;
213 if (i % 3 != 0)
214 usage();
215 for (cp=argv[1]+3; cp[-1]; cp += 3) {
216 if (cp[2] && cp[2] != '.')
217 usage();
218 strncpy(b1, cp, 2);
219 addmac(b1);
220 }
221 break;
222
223 /* -f: ignore font changes */
224 case 'f':
225 fflag = 1;
226 break;
227
228 /* -s: ignore size changes */
229 case 's':
230 sflag = 1;
231 break;
232 default:
233 usage();
234 }
235 argc--; argv++;
236 }
237
238 nfiles = argc - 1;
239
240 if (nfiles > 0) {
241 for (i=1; i<argc; i++) {
242 cfilename = argv[i];
243 f = fopen(cfilename, "r");
244 if (f == NULL)
245 perror(cfilename);
246 else
247 process(f);
248 }
249 } else {
250 cfilename = "stdin";
251 process(stdin);
252 }
253 exit(0);
254}
255
256usage()
257{
258 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
259 exit(1);
260}
261
262process(f)
263FILE *f;
264{
265 register int i, n;
266 char mac[5]; /* The current macro or nroff command */
267 int pl;
268
269 stktop = -1;
270 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
271 if (line[0] == '.') {
272 /*
273 * find and isolate the macro/command name.
274 */
275 strncpy(mac, line+1, 4);
276 if (isspace(mac[0])) {
277 pe(lineno);
278 printf("Empty command\n");
279 } else if (isspace(mac[1])) {
280 mac[1] = 0;
281 } else if (isspace(mac[2])) {
282 mac[2] = 0;
283 } else if (mac[0] != '\\' || mac[1] != '\"') {
284 pe(lineno);
285 printf("Command too long\n");
286 }
287
288 /*
289 * Is it a known command?
290 */
291 checkknown(mac);
292
293 /*
294 * Should we add it?
295 */
296 if (eq(mac, "de"))
297 addcmd(line);
298
299 chkcmd(line, mac);
300 }
301
302 /*
303 * At this point we process the line looking
304 * for \s and \f.
305 */
306 for (i=0; line[i]; i++)
307 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
308 if (!sflag && line[++i]=='s') {
309 pl = line[++i];
310 if (isdigit(pl)) {
311 n = pl - '0';
312 pl = ' ';
313 } else
314 n = 0;
315 while (isdigit(line[++i]))
316 n = 10 * n + line[i] - '0';
317 i--;
318 if (n == 0) {
319 if (stk[stktop].opno == SZ) {
320 stktop--;
321 } else {
322 pe(lineno);
323 printf("unmatched \\s0\n");
324 }
325 } else {
326 stk[++stktop].opno = SZ;
327 stk[stktop].pl = pl;
328 stk[stktop].parm = n;
329 stk[stktop].lno = lineno;
330 }
331 } else if (!fflag && line[i]=='f') {
332 n = line[++i];
333 if (n == 'P') {
334 if (stk[stktop].opno == FT) {
335 stktop--;
336 } else {
337 pe(lineno);
338 printf("unmatched \\fP\n");
339 }
340 } else {
341 stk[++stktop].opno = FT;
342 stk[stktop].pl = 1;
343 stk[stktop].parm = n;
344 stk[stktop].lno = lineno;
345 }
346 }
347 }
348 }
349 /*
350 * We've hit the end and look at all this stuff that hasn't been
351 * matched yet! Complain, complain.
352 */
353 for (i=stktop; i>=0; i--) {
354 complain(i);
355 }
356}
357
358complain(i)
359{
360 pe(stk[i].lno);
361 printf("Unmatched ");
362 prop(i);
363 printf("\n");
364}
365
366prop(i)
367{
368 if (stk[i].pl == 0)
369 printf(".%s", br[stk[i].opno].opbr);
370 else switch(stk[i].opno) {
371 case SZ:
372 printf("\\s%c%d", stk[i].pl, stk[i].parm);
373 break;
374 case FT:
375 printf("\\f%c", stk[i].parm);
376 break;
377 default:
378 printf("Bug: stk[%d].opno = %d = .%s, .%s",
379 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
380 }
381}
382
383chkcmd(line, mac)
384char *line;
385char *mac;
386{
387 register int i, n;
388
389 /*
390 * Check to see if it matches top of stack.
391 */
392 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
393 stktop--; /* OK. Pop & forget */
394 else {
395 /* No. Maybe it's an opener */
396 for (i=0; br[i].opbr; i++) {
397 if (eq(mac, br[i].opbr)) {
398 /* Found. Push it. */
399 stktop++;
400 stk[stktop].opno = i;
401 stk[stktop].pl = 0;
402 stk[stktop].parm = 0;
403 stk[stktop].lno = lineno;
404 break;
405 }
406 /*
407 * Maybe it's an unmatched closer.
408 * NOTE: this depends on the fact
409 * that none of the closers can be
410 * openers too.
411 */
412 if (eq(mac, br[i].clbr)) {
413 nomatch(mac);
414 break;
415 }
416 }
417 }
418}
419
420nomatch(mac)
421char *mac;
422{
423 register int i, j;
424
425 /*
426 * Look for a match further down on stack
427 * If we find one, it suggests that the stuff in
428 * between is supposed to match itself.
429 */
430 for (j=stktop; j>=0; j--)
431 if (eq(mac,br[stk[j].opno].clbr)) {
432 /* Found. Make a good diagnostic. */
433 if (j == stktop-2) {
434 /*
435 * Check for special case \fx..\fR and don't
436 * complain.
437 */
438 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
439 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
440 stktop = j -1;
441 return;
442 }
443 /*
444 * We have two unmatched frobs. Chances are
445 * they were intended to match, so we mention
446 * them together.
447 */
448 pe(stk[j+1].lno);
449 prop(j+1);
450 printf(" does not match %d: ", stk[j+2].lno);
451 prop(j+2);
452 printf("\n");
453 } else for (i=j+1; i <= stktop; i++) {
454 complain(i);
455 }
456 stktop = j-1;
457 return;
458 }
459 /* Didn't find one. Throw this away. */
460 pe(lineno);
461 printf("Unmatched .%s\n", mac);
462}
463
464/* eq: are two strings equal? */
465eq(s1, s2)
466char *s1, *s2;
467{
468 return (strcmp(s1, s2) == 0);
469}
470
471/* print the first part of an error message, given the line number */
472pe(lineno)
473int lineno;
474{
475 if (nfiles > 1)
476 printf("%s: ", cfilename);
477 printf("%d: ", lineno);
478}
479
480checkknown(mac)
481char *mac;
482{
483
484 if (eq(mac, "."))
485 return;
486 if (binsrch(mac) >= 0)
487 return;
488 if (mac[0] == '\\' && mac[1] == '"') /* comments */
489 return;
490
491 pe(lineno);
492 printf("Unknown command: .%s\n", mac);
493}
494
495/*
496 * We have a .de xx line in "line". Add xx to the list of known commands.
497 */
498addcmd(line)
499char *line;
500{
501 char *mac;
502
503 /* grab the macro being defined */
504 mac = line+4;
505 while (isspace(*mac))
506 mac++;
507 if (*mac == 0) {
508 pe(lineno);
509 printf("illegal define: %s\n", line);
510 return;
511 }
512 mac[2] = 0;
513 if (isspace(mac[1]) || mac[1] == '\\')
514 mac[1] = 0;
515 if (ncmds >= MAXCMDS) {
516 printf("Only %d known commands allowed\n", MAXCMDS);
517 exit(1);
518 }
519 addmac(mac);
520}
521
522/*
523 * Add mac to the list. We should really have some kind of tree
524 * structure here but this is a quick-and-dirty job and I just don't
525 * have time to mess with it. (I wonder if this will come back to haunt
526 * me someday?) Anyway, I claim that .de is fairly rare in user
527 * nroff programs, and the register loop below is pretty fast.
528 */
529addmac(mac)
530char *mac;
531{
532 register char **src, **dest, **loc;
533
534 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
535#ifdef DEBUG
536 printf("binsrch(%s) -> already in table\n", mac);
537#endif DEBUG
538 return;
539 }
540 /* binsrch sets slot as a side effect */
541#ifdef DEBUG
542printf("binsrch(%s) -> %d\n", mac, slot);
543#endif
544 loc = &knowncmds[slot];
545 src = &knowncmds[ncmds-1];
546 dest = src+1;
547 while (dest > loc)
548 *dest-- = *src--;
549 *loc = malloc(3);
550 strcpy(*loc, mac);
551 ncmds++;
552#ifdef DEBUG
553printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
554#endif
555}
556
557/*
558 * Do a binary search in knowncmds for mac.
559 * If found, return the index. If not, return -1.
560 */
561binsrch(mac)
562char *mac;
563{
564 register char *p; /* pointer to current cmd in list */
565 register int d; /* difference if any */
566 register int mid; /* mid point in binary search */
567 register int top, bot; /* boundaries of bin search, inclusive */
568
569 top = ncmds-1;
570 bot = 0;
571 while (top >= bot) {
572 mid = (top+bot)/2;
573 p = knowncmds[mid];
574 d = p[0] - mac[0];
575 if (d == 0)
576 d = p[1] - mac[1];
577 if (d == 0)
578 return mid;
579 if (d < 0)
580 bot = mid + 1;
581 else
582 top = mid - 1;
583 }
584 slot = bot; /* place it would have gone */
585 return -1;
586}