port to tahoe by Nir peleg of CCI
[unix-history] / usr / src / usr.bin / checknr / checknr.c
CommitLineData
fcd2465c
DF
1/*
2 * Copyright (c) 1980 Regents of the University of California.
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 */
6
7#ifndef lint
8char copyright[] =
9"@(#) Copyright (c) 1980 Regents of the University of California.\n\
10 All rights reserved.\n";
11#endif not lint
12
94a7a178 13#ifndef lint
420b269d 14static char sccsid[] = "@(#)checknr.c 5.2 (Berkeley) %G%";
fcd2465c
DF
15#endif not lint
16
b5da8a99
BJ
17/*
18 * checknr: check an nroff/troff input file for matching macro calls.
19 * we also attempt to match size and font changes, but only the embedded
20 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
21 * later but for now think of these restrictions as contributions to
22 * structured typesetting.
23 */
24#include <stdio.h>
25#include <ctype.h>
26
27#define MAXSTK 100 /* Stack size */
28#define MAXBR 100 /* Max number of bracket pairs known */
29#define MAXCMDS 500 /* Max number of commands known */
30
31/*
32 * The stack on which we remember what we've seen so far.
33 */
34struct stkstr {
35 int opno; /* number of opening bracket */
36 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
37 int parm; /* parm to size, font, etc */
38 int lno; /* line number the thing came in in */
39} stk[MAXSTK];
40int stktop;
41
42/*
43 * The kinds of opening and closing brackets.
44 */
45struct brstr {
46 char *opbr;
47 char *clbr;
48} br[MAXBR] = {
49 /* A few bare bones troff commands */
50#define SZ 0
51 "sz", "sz", /* also \s */
52#define FT 1
53 "ft", "ft", /* also \f */
e1062a05
BJ
54 /* the -mm package */
55 "AL", "LE",
56 "AS", "AE",
57 "BL", "LE",
58 "BS", "BE",
59 "DF", "DE",
60 "DL", "LE",
61 "DS", "DE",
62 "FS", "FE",
63 "ML", "LE",
64 "NS", "NE",
65 "RL", "LE",
66 "VL", "LE",
b5da8a99
BJ
67 /* the -ms package */
68 "AB", "AE",
94a7a178 69 "BD", "DE",
b5da8a99 70 "CD", "DE",
e1062a05
BJ
71 "DS", "DE",
72 "FS", "FE",
b5da8a99 73 "ID", "DE",
b5da8a99 74 "KF", "KE",
e1062a05
BJ
75 "KS", "KE",
76 "LD", "DE",
77 "LG", "NL",
b5da8a99 78 "QS", "QE",
e1062a05
BJ
79 "RS", "RE",
80 "SM", "NL",
94a7a178
SL
81 "XA", "XE",
82 "XS", "XE",
b5da8a99 83 /* The -me package */
b5da8a99 84 "(b", ")b",
b5da8a99
BJ
85 "(c", ")c",
86 "(d", ")d",
87 "(f", ")f",
e1062a05
BJ
88 "(l", ")l",
89 "(q", ")q",
b5da8a99 90 "(x", ")x",
e1062a05
BJ
91 "(z", ")z",
92 /* Things needed by preprocessors */
93 "EQ", "EN",
94 "TS", "TE",
95 /* Refer */
96 "[", "]",
b5da8a99
BJ
97 0, 0
98};
99
100/*
e1062a05 101 * All commands known to nroff, plus macro packages.
b5da8a99
BJ
102 * Used so we can complain about unrecognized commands.
103 */
104char *knowncmds[MAXCMDS] = {
e1062a05
BJ
105"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
106"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
107"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
108"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
94a7a178
SL
109"AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
110"BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
111"D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
112"EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
113"FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
114"IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
115"LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
116"MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
117"P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
118"RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
420b269d
JB
119"SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
120"TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
121"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
122"[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
123"][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
124"bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
125"ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
126"ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
127"fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
128"ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
129"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
130"n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
131"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
132"q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
133"sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
134"ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
135"yr", 0
b5da8a99
BJ
136};
137
138int lineno; /* current line number in input file */
139char line[256]; /* the current line */
140char *cfilename; /* name of current file */
141int nfiles; /* number of files to process */
142int fflag; /* -f: ignore \f */
143int sflag; /* -s: ignore \s */
144int ncmds; /* size of knowncmds */
145int slot; /* slot in knowncmds found by binsrch */
146
147char *malloc();
148
149main(argc, argv)
150int argc;
151char **argv;
152{
153 FILE *f;
154 int i;
155 char *cp;
205b7e14 156 char b1[4];
b5da8a99 157
205b7e14
MH
158 /* Figure out how many known commands there are */
159 while (knowncmds[ncmds])
160 ncmds++;
b5da8a99
BJ
161 while (argc > 1 && argv[1][0] == '-') {
162 switch(argv[1][1]) {
205b7e14
MH
163
164 /* -a: add pairs of macros */
b5da8a99 165 case 'a':
b5da8a99 166 i = strlen(argv[1]) - 2;
205b7e14
MH
167 if (i % 6 != 0)
168 usage();
b5da8a99
BJ
169 /* look for empty macro slots */
170 for (i=0; br[i].opbr; i++)
171 ;
172 for (cp=argv[1]+3; cp[-1]; cp += 6) {
205b7e14
MH
173 br[i].opbr = malloc(3);
174 strncpy(br[i].opbr, cp, 2);
175 br[i].clbr = malloc(3);
176 strncpy(br[i].clbr, cp+3, 2);
177 addmac(br[i].opbr); /* knows pairs are also known cmds */
178 addmac(br[i].clbr);
b5da8a99
BJ
179 i++;
180 }
181 break;
205b7e14
MH
182
183 /* -c: add known commands */
184 case 'c':
185 i = strlen(argv[1]) - 2;
186 if (i % 3 != 0)
187 usage();
188 for (cp=argv[1]+3; cp[-1]; cp += 3) {
189 if (cp[2] && cp[2] != '.')
190 usage();
191 strncpy(b1, cp, 2);
192 addmac(b1);
193 }
194 break;
195
196 /* -f: ignore font changes */
b5da8a99
BJ
197 case 'f':
198 fflag = 1;
199 break;
205b7e14
MH
200
201 /* -s: ignore size changes */
b5da8a99
BJ
202 case 's':
203 sflag = 1;
204 break;
205 default:
205b7e14 206 usage();
b5da8a99
BJ
207 }
208 argc--; argv++;
209 }
210
211 nfiles = argc - 1;
212
213 if (nfiles > 0) {
214 for (i=1; i<argc; i++) {
215 cfilename = argv[i];
216 f = fopen(cfilename, "r");
217 if (f == NULL)
218 perror(cfilename);
219 else
220 process(f);
221 }
222 } else {
223 cfilename = "stdin";
224 process(stdin);
225 }
226 exit(0);
227}
228
205b7e14
MH
229usage()
230{
231 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
232 exit(1);
233}
234
b5da8a99
BJ
235process(f)
236FILE *f;
237{
238 register int i, n;
239 char mac[5]; /* The current macro or nroff command */
240 int pl;
241
242 stktop = -1;
243 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
244 if (line[0] == '.') {
245 /*
246 * find and isolate the macro/command name.
247 */
248 strncpy(mac, line+1, 4);
249 if (isspace(mac[0])) {
250 pe(lineno);
251 printf("Empty command\n");
252 } else if (isspace(mac[1])) {
253 mac[1] = 0;
254 } else if (isspace(mac[2])) {
255 mac[2] = 0;
e1062a05 256 } else if (mac[0] != '\\' || mac[1] != '\"') {
b5da8a99
BJ
257 pe(lineno);
258 printf("Command too long\n");
259 }
260
261 /*
262 * Is it a known command?
263 */
264 checkknown(mac);
265
266 /*
267 * Should we add it?
268 */
269 if (eq(mac, "de"))
270 addcmd(line);
271
272 chkcmd(line, mac);
273 }
274
275 /*
276 * At this point we process the line looking
277 * for \s and \f.
278 */
279 for (i=0; line[i]; i++)
280 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
281 if (!sflag && line[++i]=='s') {
282 pl = line[++i];
283 if (isdigit(pl)) {
284 n = pl - '0';
285 pl = ' ';
286 } else
287 n = 0;
288 while (isdigit(line[++i]))
289 n = 10 * n + line[i] - '0';
290 i--;
291 if (n == 0) {
292 if (stk[stktop].opno == SZ) {
293 stktop--;
294 } else {
295 pe(lineno);
296 printf("unmatched \\s0\n");
297 }
298 } else {
299 stk[++stktop].opno = SZ;
300 stk[stktop].pl = pl;
301 stk[stktop].parm = n;
302 stk[stktop].lno = lineno;
303 }
304 } else if (!fflag && line[i]=='f') {
305 n = line[++i];
306 if (n == 'P') {
307 if (stk[stktop].opno == FT) {
308 stktop--;
309 } else {
310 pe(lineno);
311 printf("unmatched \\fP\n");
312 }
313 } else {
314 stk[++stktop].opno = FT;
315 stk[stktop].pl = 1;
316 stk[stktop].parm = n;
317 stk[stktop].lno = lineno;
318 }
319 }
320 }
321 }
322 /*
323 * We've hit the end and look at all this stuff that hasn't been
324 * matched yet! Complain, complain.
325 */
326 for (i=stktop; i>=0; i--) {
327 complain(i);
328 }
329}
330
331complain(i)
332{
333 pe(stk[i].lno);
334 printf("Unmatched ");
335 prop(i);
336 printf("\n");
337}
338
339prop(i)
340{
341 if (stk[i].pl == 0)
342 printf(".%s", br[stk[i].opno].opbr);
343 else switch(stk[i].opno) {
344 case SZ:
345 printf("\\s%c%d", stk[i].pl, stk[i].parm);
346 break;
347 case FT:
348 printf("\\f%c", stk[i].parm);
349 break;
350 default:
351 printf("Bug: stk[%d].opno = %d = .%s, .%s",
352 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
353 }
354}
355
356chkcmd(line, mac)
357char *line;
358char *mac;
359{
360 register int i, n;
361
362 /*
363 * Check to see if it matches top of stack.
364 */
365 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
366 stktop--; /* OK. Pop & forget */
367 else {
368 /* No. Maybe it's an opener */
369 for (i=0; br[i].opbr; i++) {
370 if (eq(mac, br[i].opbr)) {
371 /* Found. Push it. */
372 stktop++;
373 stk[stktop].opno = i;
374 stk[stktop].pl = 0;
375 stk[stktop].parm = 0;
376 stk[stktop].lno = lineno;
377 break;
378 }
379 /*
380 * Maybe it's an unmatched closer.
381 * NOTE: this depends on the fact
382 * that none of the closers can be
383 * openers too.
384 */
385 if (eq(mac, br[i].clbr)) {
386 nomatch(mac);
387 break;
388 }
389 }
390 }
391}
392
393nomatch(mac)
394char *mac;
395{
396 register int i, j;
397
398 /*
399 * Look for a match further down on stack
400 * If we find one, it suggests that the stuff in
401 * between is supposed to match itself.
402 */
403 for (j=stktop; j>=0; j--)
404 if (eq(mac,br[stk[j].opno].clbr)) {
405 /* Found. Make a good diagnostic. */
406 if (j == stktop-2) {
407 /*
408 * Check for special case \fx..\fR and don't
409 * complain.
410 */
411 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
412 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
413 stktop = j -1;
414 return;
415 }
416 /*
417 * We have two unmatched frobs. Chances are
418 * they were intended to match, so we mention
419 * them together.
420 */
421 pe(stk[j+1].lno);
422 prop(j+1);
423 printf(" does not match %d: ", stk[j+2].lno);
424 prop(j+2);
425 printf("\n");
426 } else for (i=j+1; i <= stktop; i++) {
427 complain(i);
428 }
429 stktop = j-1;
430 return;
431 }
432 /* Didn't find one. Throw this away. */
433 pe(lineno);
434 printf("Unmatched .%s\n", mac);
435}
436
437/* eq: are two strings equal? */
438eq(s1, s2)
439char *s1, *s2;
440{
441 return (strcmp(s1, s2) == 0);
442}
443
444/* print the first part of an error message, given the line number */
445pe(lineno)
446int lineno;
447{
448 if (nfiles > 1)
449 printf("%s: ", cfilename);
450 printf("%d: ", lineno);
451}
452
453checkknown(mac)
454char *mac;
455{
b5da8a99
BJ
456
457 if (eq(mac, "."))
458 return;
459 if (binsrch(mac) >= 0)
460 return;
e1062a05
BJ
461 if (mac[0] == '\\' && mac[1] == '"') /* comments */
462 return;
b5da8a99
BJ
463
464 pe(lineno);
465 printf("Unknown command: .%s\n", mac);
466}
467
468/*
469 * We have a .de xx line in "line". Add xx to the list of known commands.
470 */
471addcmd(line)
472char *line;
473{
474 char *mac;
b5da8a99
BJ
475
476 /* grab the macro being defined */
477 mac = line+4;
478 while (isspace(*mac))
479 mac++;
480 if (*mac == 0) {
481 pe(lineno);
482 printf("illegal define: %s\n", line);
483 return;
484 }
485 mac[2] = 0;
486 if (isspace(mac[1]) || mac[1] == '\\')
487 mac[1] = 0;
488 if (ncmds >= MAXCMDS) {
489 printf("Only %d known commands allowed\n", MAXCMDS);
490 exit(1);
491 }
205b7e14
MH
492 addmac(mac);
493}
494
495/*
496 * Add mac to the list. We should really have some kind of tree
497 * structure here but this is a quick-and-dirty job and I just don't
498 * have time to mess with it. (I wonder if this will come back to haunt
499 * me someday?) Anyway, I claim that .de is fairly rare in user
500 * nroff programs, and the register loop below is pretty fast.
501 */
502addmac(mac)
503char *mac;
504{
505 register char **src, **dest, **loc;
506
6f145a48
RH
507 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
508#ifdef DEBUG
509 printf("binsrch(%s) -> already in table\n", mac);
510#endif DEBUG
511 return;
512 }
b5da8a99 513 /* binsrch sets slot as a side effect */
205b7e14
MH
514#ifdef DEBUG
515printf("binsrch(%s) -> %d\n", mac, slot);
516#endif
b5da8a99
BJ
517 loc = &knowncmds[slot];
518 src = &knowncmds[ncmds-1];
519 dest = src+1;
520 while (dest > loc)
521 *dest-- = *src--;
522 *loc = malloc(3);
523 strcpy(*loc, mac);
524 ncmds++;
205b7e14
MH
525#ifdef DEBUG
526printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
527#endif
b5da8a99
BJ
528}
529
530/*
531 * Do a binary search in knowncmds for mac.
532 * If found, return the index. If not, return -1.
533 */
534binsrch(mac)
535char *mac;
536{
537 register char *p; /* pointer to current cmd in list */
538 register int d; /* difference if any */
539 register int mid; /* mid point in binary search */
540 register int top, bot; /* boundaries of bin search, inclusive */
541
542 top = ncmds-1;
543 bot = 0;
544 while (top >= bot) {
545 mid = (top+bot)/2;
546 p = knowncmds[mid];
547 d = p[0] - mac[0];
548 if (d == 0)
549 d = p[1] - mac[1];
550 if (d == 0)
551 return mid;
552 if (d < 0)
553 bot = mid + 1;
554 else
555 top = mid - 1;
556 }
557 slot = bot; /* place it would have gone */
558 return -1;
559}