| 1 | static char *sccsid = "@(#)checknr.c 4.4 (Berkeley) %G%"; |
| 2 | /* |
| 3 | * checknr: check an nroff/troff input file for matching macro calls. |
| 4 | * we also attempt to match size and font changes, but only the embedded |
| 5 | * kind. These must end in \s0 and \fP resp. Maybe more sophistication |
| 6 | * later but for now think of these restrictions as contributions to |
| 7 | * structured typesetting. |
| 8 | */ |
| 9 | #include <stdio.h> |
| 10 | #include <ctype.h> |
| 11 | |
| 12 | #define MAXSTK 100 /* Stack size */ |
| 13 | #define MAXBR 100 /* Max number of bracket pairs known */ |
| 14 | #define MAXCMDS 500 /* Max number of commands known */ |
| 15 | |
| 16 | /* |
| 17 | * The stack on which we remember what we've seen so far. |
| 18 | */ |
| 19 | struct stkstr { |
| 20 | int opno; /* number of opening bracket */ |
| 21 | int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ |
| 22 | int parm; /* parm to size, font, etc */ |
| 23 | int lno; /* line number the thing came in in */ |
| 24 | } stk[MAXSTK]; |
| 25 | int stktop; |
| 26 | |
| 27 | /* |
| 28 | * The kinds of opening and closing brackets. |
| 29 | */ |
| 30 | struct brstr { |
| 31 | char *opbr; |
| 32 | char *clbr; |
| 33 | } br[MAXBR] = { |
| 34 | /* A few bare bones troff commands */ |
| 35 | #define SZ 0 |
| 36 | "sz", "sz", /* also \s */ |
| 37 | #define FT 1 |
| 38 | "ft", "ft", /* also \f */ |
| 39 | /* the -mm package */ |
| 40 | "AL", "LE", |
| 41 | "AS", "AE", |
| 42 | "BL", "LE", |
| 43 | "BS", "BE", |
| 44 | "DF", "DE", |
| 45 | "DL", "LE", |
| 46 | "DS", "DE", |
| 47 | "FS", "FE", |
| 48 | "ML", "LE", |
| 49 | "NS", "NE", |
| 50 | "RL", "LE", |
| 51 | "VL", "LE", |
| 52 | /* the -ms package */ |
| 53 | "AB", "AE", |
| 54 | "CD", "DE", |
| 55 | "DS", "DE", |
| 56 | "FS", "FE", |
| 57 | "ID", "DE", |
| 58 | "KF", "KE", |
| 59 | "KS", "KE", |
| 60 | "LD", "DE", |
| 61 | "LG", "NL", |
| 62 | "QS", "QE", |
| 63 | "RS", "RE", |
| 64 | "SM", "NL", |
| 65 | /* The -me package */ |
| 66 | "(b", ")b", |
| 67 | "(c", ")c", |
| 68 | "(d", ")d", |
| 69 | "(f", ")f", |
| 70 | "(l", ")l", |
| 71 | "(q", ")q", |
| 72 | "(x", ")x", |
| 73 | "(z", ")z", |
| 74 | /* Things needed by preprocessors */ |
| 75 | "EQ", "EN", |
| 76 | "TS", "TE", |
| 77 | /* Refer */ |
| 78 | "[", "]", |
| 79 | 0, 0 |
| 80 | }; |
| 81 | |
| 82 | /* |
| 83 | * All commands known to nroff, plus macro packages. |
| 84 | * Used so we can complain about unrecognized commands. |
| 85 | */ |
| 86 | char *knowncmds[MAXCMDS] = { |
| 87 | "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", |
| 88 | "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", |
| 89 | "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", |
| 90 | "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", |
| 91 | "AE", "AF", "AI", "AL", "AS", "AT", "AU", "AX", "B", "B1", "B2", "BD", |
| 92 | "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D", |
| 93 | "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", "EN", "EQ", |
| 94 | "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS", |
| 95 | "FV", "FX", "H", "HC", "HM", "HO", "HU", "I", "ID", "IE", "IH", "IM", |
| 96 | "IP", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", "LC", "LD", "LE", "LG", |
| 97 | "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH", |
| 98 | "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", "PF", "PH", "PP", "PT", |
| 99 | "PY", "QE", "QP", "QS", "R", "RA", "RC", "RE", "RL", "RP", "RQ", "RS", |
| 100 | "RT", "S", "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", "SP", "SY", |
| 101 | "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", "TR", "TS", |
| 102 | "TX", "UL", "US", "UX", "VL", "WC", "WH", "XD", "XF", "XK", "XP", "[", "[-", |
| 103 | "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", |
| 104 | "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", |
| 105 | "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", |
| 106 | "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", |
| 107 | "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", |
| 108 | "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", |
| 109 | "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", |
| 110 | "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", |
| 111 | "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", |
| 112 | "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", |
| 113 | "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", |
| 114 | "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", |
| 115 | "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", "yr", |
| 116 | 0 |
| 117 | }; |
| 118 | |
| 119 | int lineno; /* current line number in input file */ |
| 120 | char line[256]; /* the current line */ |
| 121 | char *cfilename; /* name of current file */ |
| 122 | int nfiles; /* number of files to process */ |
| 123 | int fflag; /* -f: ignore \f */ |
| 124 | int sflag; /* -s: ignore \s */ |
| 125 | int ncmds; /* size of knowncmds */ |
| 126 | int slot; /* slot in knowncmds found by binsrch */ |
| 127 | |
| 128 | char *malloc(); |
| 129 | |
| 130 | main(argc, argv) |
| 131 | int argc; |
| 132 | char **argv; |
| 133 | { |
| 134 | FILE *f; |
| 135 | int i; |
| 136 | char *cp; |
| 137 | char b1[4]; |
| 138 | |
| 139 | if (argc <= 1) |
| 140 | usage(); |
| 141 | /* Figure out how many known commands there are */ |
| 142 | while (knowncmds[ncmds]) |
| 143 | ncmds++; |
| 144 | while (argc > 1 && argv[1][0] == '-') { |
| 145 | switch(argv[1][1]) { |
| 146 | |
| 147 | /* -a: add pairs of macros */ |
| 148 | case 'a': |
| 149 | i = strlen(argv[1]) - 2; |
| 150 | if (i % 6 != 0) |
| 151 | usage(); |
| 152 | /* look for empty macro slots */ |
| 153 | for (i=0; br[i].opbr; i++) |
| 154 | ; |
| 155 | for (cp=argv[1]+3; cp[-1]; cp += 6) { |
| 156 | br[i].opbr = malloc(3); |
| 157 | strncpy(br[i].opbr, cp, 2); |
| 158 | br[i].clbr = malloc(3); |
| 159 | strncpy(br[i].clbr, cp+3, 2); |
| 160 | addmac(br[i].opbr); /* knows pairs are also known cmds */ |
| 161 | addmac(br[i].clbr); |
| 162 | i++; |
| 163 | } |
| 164 | break; |
| 165 | |
| 166 | /* -c: add known commands */ |
| 167 | case 'c': |
| 168 | i = strlen(argv[1]) - 2; |
| 169 | if (i % 3 != 0) |
| 170 | usage(); |
| 171 | for (cp=argv[1]+3; cp[-1]; cp += 3) { |
| 172 | if (cp[2] && cp[2] != '.') |
| 173 | usage(); |
| 174 | strncpy(b1, cp, 2); |
| 175 | addmac(b1); |
| 176 | } |
| 177 | break; |
| 178 | |
| 179 | /* -f: ignore font changes */ |
| 180 | case 'f': |
| 181 | fflag = 1; |
| 182 | break; |
| 183 | |
| 184 | /* -s: ignore size changes */ |
| 185 | case 's': |
| 186 | sflag = 1; |
| 187 | break; |
| 188 | default: |
| 189 | usage(); |
| 190 | } |
| 191 | argc--; argv++; |
| 192 | } |
| 193 | |
| 194 | nfiles = argc - 1; |
| 195 | |
| 196 | if (nfiles > 0) { |
| 197 | for (i=1; i<argc; i++) { |
| 198 | cfilename = argv[i]; |
| 199 | f = fopen(cfilename, "r"); |
| 200 | if (f == NULL) |
| 201 | perror(cfilename); |
| 202 | else |
| 203 | process(f); |
| 204 | } |
| 205 | } else { |
| 206 | cfilename = "stdin"; |
| 207 | process(stdin); |
| 208 | } |
| 209 | exit(0); |
| 210 | } |
| 211 | |
| 212 | usage() |
| 213 | { |
| 214 | printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); |
| 215 | exit(1); |
| 216 | } |
| 217 | |
| 218 | process(f) |
| 219 | FILE *f; |
| 220 | { |
| 221 | register int i, n; |
| 222 | char mac[5]; /* The current macro or nroff command */ |
| 223 | int pl; |
| 224 | |
| 225 | stktop = -1; |
| 226 | for (lineno = 1; fgets(line, sizeof line, f); lineno++) { |
| 227 | if (line[0] == '.') { |
| 228 | /* |
| 229 | * find and isolate the macro/command name. |
| 230 | */ |
| 231 | strncpy(mac, line+1, 4); |
| 232 | if (isspace(mac[0])) { |
| 233 | pe(lineno); |
| 234 | printf("Empty command\n"); |
| 235 | } else if (isspace(mac[1])) { |
| 236 | mac[1] = 0; |
| 237 | } else if (isspace(mac[2])) { |
| 238 | mac[2] = 0; |
| 239 | } else if (mac[0] != '\\' || mac[1] != '\"') { |
| 240 | pe(lineno); |
| 241 | printf("Command too long\n"); |
| 242 | } |
| 243 | |
| 244 | /* |
| 245 | * Is it a known command? |
| 246 | */ |
| 247 | checkknown(mac); |
| 248 | |
| 249 | /* |
| 250 | * Should we add it? |
| 251 | */ |
| 252 | if (eq(mac, "de")) |
| 253 | addcmd(line); |
| 254 | |
| 255 | chkcmd(line, mac); |
| 256 | } |
| 257 | |
| 258 | /* |
| 259 | * At this point we process the line looking |
| 260 | * for \s and \f. |
| 261 | */ |
| 262 | for (i=0; line[i]; i++) |
| 263 | if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { |
| 264 | if (!sflag && line[++i]=='s') { |
| 265 | pl = line[++i]; |
| 266 | if (isdigit(pl)) { |
| 267 | n = pl - '0'; |
| 268 | pl = ' '; |
| 269 | } else |
| 270 | n = 0; |
| 271 | while (isdigit(line[++i])) |
| 272 | n = 10 * n + line[i] - '0'; |
| 273 | i--; |
| 274 | if (n == 0) { |
| 275 | if (stk[stktop].opno == SZ) { |
| 276 | stktop--; |
| 277 | } else { |
| 278 | pe(lineno); |
| 279 | printf("unmatched \\s0\n"); |
| 280 | } |
| 281 | } else { |
| 282 | stk[++stktop].opno = SZ; |
| 283 | stk[stktop].pl = pl; |
| 284 | stk[stktop].parm = n; |
| 285 | stk[stktop].lno = lineno; |
| 286 | } |
| 287 | } else if (!fflag && line[i]=='f') { |
| 288 | n = line[++i]; |
| 289 | if (n == 'P') { |
| 290 | if (stk[stktop].opno == FT) { |
| 291 | stktop--; |
| 292 | } else { |
| 293 | pe(lineno); |
| 294 | printf("unmatched \\fP\n"); |
| 295 | } |
| 296 | } else { |
| 297 | stk[++stktop].opno = FT; |
| 298 | stk[stktop].pl = 1; |
| 299 | stk[stktop].parm = n; |
| 300 | stk[stktop].lno = lineno; |
| 301 | } |
| 302 | } |
| 303 | } |
| 304 | } |
| 305 | /* |
| 306 | * We've hit the end and look at all this stuff that hasn't been |
| 307 | * matched yet! Complain, complain. |
| 308 | */ |
| 309 | for (i=stktop; i>=0; i--) { |
| 310 | complain(i); |
| 311 | } |
| 312 | } |
| 313 | |
| 314 | complain(i) |
| 315 | { |
| 316 | pe(stk[i].lno); |
| 317 | printf("Unmatched "); |
| 318 | prop(i); |
| 319 | printf("\n"); |
| 320 | } |
| 321 | |
| 322 | prop(i) |
| 323 | { |
| 324 | if (stk[i].pl == 0) |
| 325 | printf(".%s", br[stk[i].opno].opbr); |
| 326 | else switch(stk[i].opno) { |
| 327 | case SZ: |
| 328 | printf("\\s%c%d", stk[i].pl, stk[i].parm); |
| 329 | break; |
| 330 | case FT: |
| 331 | printf("\\f%c", stk[i].parm); |
| 332 | break; |
| 333 | default: |
| 334 | printf("Bug: stk[%d].opno = %d = .%s, .%s", |
| 335 | i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); |
| 336 | } |
| 337 | } |
| 338 | |
| 339 | chkcmd(line, mac) |
| 340 | char *line; |
| 341 | char *mac; |
| 342 | { |
| 343 | register int i, n; |
| 344 | |
| 345 | /* |
| 346 | * Check to see if it matches top of stack. |
| 347 | */ |
| 348 | if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) |
| 349 | stktop--; /* OK. Pop & forget */ |
| 350 | else { |
| 351 | /* No. Maybe it's an opener */ |
| 352 | for (i=0; br[i].opbr; i++) { |
| 353 | if (eq(mac, br[i].opbr)) { |
| 354 | /* Found. Push it. */ |
| 355 | stktop++; |
| 356 | stk[stktop].opno = i; |
| 357 | stk[stktop].pl = 0; |
| 358 | stk[stktop].parm = 0; |
| 359 | stk[stktop].lno = lineno; |
| 360 | break; |
| 361 | } |
| 362 | /* |
| 363 | * Maybe it's an unmatched closer. |
| 364 | * NOTE: this depends on the fact |
| 365 | * that none of the closers can be |
| 366 | * openers too. |
| 367 | */ |
| 368 | if (eq(mac, br[i].clbr)) { |
| 369 | nomatch(mac); |
| 370 | break; |
| 371 | } |
| 372 | } |
| 373 | } |
| 374 | } |
| 375 | |
| 376 | nomatch(mac) |
| 377 | char *mac; |
| 378 | { |
| 379 | register int i, j; |
| 380 | |
| 381 | /* |
| 382 | * Look for a match further down on stack |
| 383 | * If we find one, it suggests that the stuff in |
| 384 | * between is supposed to match itself. |
| 385 | */ |
| 386 | for (j=stktop; j>=0; j--) |
| 387 | if (eq(mac,br[stk[j].opno].clbr)) { |
| 388 | /* Found. Make a good diagnostic. */ |
| 389 | if (j == stktop-2) { |
| 390 | /* |
| 391 | * Check for special case \fx..\fR and don't |
| 392 | * complain. |
| 393 | */ |
| 394 | if (stk[j+1].opno==FT && stk[j+1].parm!='R' |
| 395 | && stk[j+2].opno==FT && stk[j+2].parm=='R') { |
| 396 | stktop = j -1; |
| 397 | return; |
| 398 | } |
| 399 | /* |
| 400 | * We have two unmatched frobs. Chances are |
| 401 | * they were intended to match, so we mention |
| 402 | * them together. |
| 403 | */ |
| 404 | pe(stk[j+1].lno); |
| 405 | prop(j+1); |
| 406 | printf(" does not match %d: ", stk[j+2].lno); |
| 407 | prop(j+2); |
| 408 | printf("\n"); |
| 409 | } else for (i=j+1; i <= stktop; i++) { |
| 410 | complain(i); |
| 411 | } |
| 412 | stktop = j-1; |
| 413 | return; |
| 414 | } |
| 415 | /* Didn't find one. Throw this away. */ |
| 416 | pe(lineno); |
| 417 | printf("Unmatched .%s\n", mac); |
| 418 | } |
| 419 | |
| 420 | /* eq: are two strings equal? */ |
| 421 | eq(s1, s2) |
| 422 | char *s1, *s2; |
| 423 | { |
| 424 | return (strcmp(s1, s2) == 0); |
| 425 | } |
| 426 | |
| 427 | /* print the first part of an error message, given the line number */ |
| 428 | pe(lineno) |
| 429 | int lineno; |
| 430 | { |
| 431 | if (nfiles > 1) |
| 432 | printf("%s: ", cfilename); |
| 433 | printf("%d: ", lineno); |
| 434 | } |
| 435 | |
| 436 | checkknown(mac) |
| 437 | char *mac; |
| 438 | { |
| 439 | |
| 440 | if (eq(mac, ".")) |
| 441 | return; |
| 442 | if (binsrch(mac) >= 0) |
| 443 | return; |
| 444 | if (mac[0] == '\\' && mac[1] == '"') /* comments */ |
| 445 | return; |
| 446 | |
| 447 | pe(lineno); |
| 448 | printf("Unknown command: .%s\n", mac); |
| 449 | } |
| 450 | |
| 451 | /* |
| 452 | * We have a .de xx line in "line". Add xx to the list of known commands. |
| 453 | */ |
| 454 | addcmd(line) |
| 455 | char *line; |
| 456 | { |
| 457 | char *mac; |
| 458 | |
| 459 | /* grab the macro being defined */ |
| 460 | mac = line+4; |
| 461 | while (isspace(*mac)) |
| 462 | mac++; |
| 463 | if (*mac == 0) { |
| 464 | pe(lineno); |
| 465 | printf("illegal define: %s\n", line); |
| 466 | return; |
| 467 | } |
| 468 | mac[2] = 0; |
| 469 | if (isspace(mac[1]) || mac[1] == '\\') |
| 470 | mac[1] = 0; |
| 471 | if (ncmds >= MAXCMDS) { |
| 472 | printf("Only %d known commands allowed\n", MAXCMDS); |
| 473 | exit(1); |
| 474 | } |
| 475 | addmac(mac); |
| 476 | } |
| 477 | |
| 478 | /* |
| 479 | * Add mac to the list. We should really have some kind of tree |
| 480 | * structure here but this is a quick-and-dirty job and I just don't |
| 481 | * have time to mess with it. (I wonder if this will come back to haunt |
| 482 | * me someday?) Anyway, I claim that .de is fairly rare in user |
| 483 | * nroff programs, and the register loop below is pretty fast. |
| 484 | */ |
| 485 | addmac(mac) |
| 486 | char *mac; |
| 487 | { |
| 488 | register char **src, **dest, **loc; |
| 489 | |
| 490 | binsrch(mac); /* it's OK to redefine something */ |
| 491 | /* binsrch sets slot as a side effect */ |
| 492 | #ifdef DEBUG |
| 493 | printf("binsrch(%s) -> %d\n", mac, slot); |
| 494 | #endif |
| 495 | loc = &knowncmds[slot]; |
| 496 | src = &knowncmds[ncmds-1]; |
| 497 | dest = src+1; |
| 498 | while (dest > loc) |
| 499 | *dest-- = *src--; |
| 500 | *loc = malloc(3); |
| 501 | strcpy(*loc, mac); |
| 502 | ncmds++; |
| 503 | #ifdef DEBUG |
| 504 | printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); |
| 505 | #endif |
| 506 | } |
| 507 | |
| 508 | /* |
| 509 | * Do a binary search in knowncmds for mac. |
| 510 | * If found, return the index. If not, return -1. |
| 511 | */ |
| 512 | binsrch(mac) |
| 513 | char *mac; |
| 514 | { |
| 515 | register char *p; /* pointer to current cmd in list */ |
| 516 | register int d; /* difference if any */ |
| 517 | register int mid; /* mid point in binary search */ |
| 518 | register int top, bot; /* boundaries of bin search, inclusive */ |
| 519 | |
| 520 | top = ncmds-1; |
| 521 | bot = 0; |
| 522 | while (top >= bot) { |
| 523 | mid = (top+bot)/2; |
| 524 | p = knowncmds[mid]; |
| 525 | d = p[0] - mac[0]; |
| 526 | if (d == 0) |
| 527 | d = p[1] - mac[1]; |
| 528 | if (d == 0) |
| 529 | return mid; |
| 530 | if (d < 0) |
| 531 | bot = mid + 1; |
| 532 | else |
| 533 | top = mid - 1; |
| 534 | } |
| 535 | slot = bot; /* place it would have gone */ |
| 536 | return -1; |
| 537 | } |
| 538 | |
| 539 | |