| 1 | /* flexdef - definitions file for flex */ |
| 2 | |
| 3 | /*- |
| 4 | * Copyright (c) 1990 The Regents of the University of California. |
| 5 | * All rights reserved. |
| 6 | * |
| 7 | * This code is derived from software contributed to Berkeley by |
| 8 | * Vern Paxson. |
| 9 | * |
| 10 | * The United States Government has rights in this work pursuant |
| 11 | * to contract no. DE-AC03-76SF00098 between the United States |
| 12 | * Department of Energy and the University of California. |
| 13 | * |
| 14 | * Redistribution and use in source and binary forms are permitted provided |
| 15 | * that: (1) source distributions retain this entire copyright notice and |
| 16 | * comment, and (2) distributions including binaries display the following |
| 17 | * acknowledgement: ``This product includes software developed by the |
| 18 | * University of California, Berkeley and its contributors'' in the |
| 19 | * documentation or other materials provided with the distribution and in |
| 20 | * all advertising materials mentioning features or use of this software. |
| 21 | * Neither the name of the University nor the names of its contributors may |
| 22 | * be used to endorse or promote products derived from this software without |
| 23 | * specific prior written permission. |
| 24 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED |
| 25 | * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF |
| 26 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. |
| 27 | */ |
| 28 | |
| 29 | /* @(#) $Header: /home/cvs/386BSD/src/usr.bin/lex/flexdef.h,v 1.2 1993/06/29 03:27:08 nate Exp $ (LBL) */ |
| 30 | |
| 31 | #ifndef FILE |
| 32 | #include <stdio.h> |
| 33 | #endif |
| 34 | |
| 35 | /* always be prepared to generate an 8-bit scanner */ |
| 36 | #define FLEX_8_BIT_CHARS |
| 37 | |
| 38 | #ifdef FLEX_8_BIT_CHARS |
| 39 | #define CSIZE 256 |
| 40 | #define Char unsigned char |
| 41 | #else |
| 42 | #define Char char |
| 43 | #define CSIZE 128 |
| 44 | #endif |
| 45 | |
| 46 | /* size of input alphabet - should be size of ASCII set */ |
| 47 | #ifndef DEFAULT_CSIZE |
| 48 | #define DEFAULT_CSIZE 128 |
| 49 | #endif |
| 50 | |
| 51 | #ifndef PROTO |
| 52 | #ifdef __STDC__ |
| 53 | #define PROTO(proto) proto |
| 54 | #else |
| 55 | #define PROTO(proto) () |
| 56 | #endif |
| 57 | #endif |
| 58 | |
| 59 | |
| 60 | #ifdef USG |
| 61 | #define SYS_V |
| 62 | #endif |
| 63 | |
| 64 | #ifdef SYS_V |
| 65 | #include <string.h> |
| 66 | #else |
| 67 | |
| 68 | #include <strings.h> |
| 69 | #ifdef lint |
| 70 | char *sprintf(); /* keep lint happy */ |
| 71 | #endif |
| 72 | #ifdef SCO_UNIX |
| 73 | void *memset(); |
| 74 | #else |
| 75 | #ifndef __386BSD__ |
| 76 | char *memset(); |
| 77 | #endif |
| 78 | #endif |
| 79 | #endif |
| 80 | |
| 81 | #ifdef AMIGA |
| 82 | #define bzero(s, n) setmem((char *)(s), n, '\0') |
| 83 | #ifndef abs |
| 84 | #define abs(x) ((x) < 0 ? -(x) : (x)) |
| 85 | #endif |
| 86 | #else |
| 87 | #define bzero(s, n) (void) memset((char *)(s), '\0', n) |
| 88 | #endif |
| 89 | |
| 90 | #ifdef VMS |
| 91 | #define unlink delete |
| 92 | #define SHORT_FILE_NAMES |
| 93 | #endif |
| 94 | |
| 95 | #ifdef __STDC__ |
| 96 | |
| 97 | #ifdef __GNUC__ |
| 98 | #include <stddef.h> |
| 99 | void *malloc( size_t ); |
| 100 | void free( void* ); |
| 101 | #else |
| 102 | #include <stdlib.h> |
| 103 | #endif |
| 104 | |
| 105 | #else /* ! __STDC__ */ |
| 106 | char *malloc(), *realloc(); |
| 107 | #endif |
| 108 | |
| 109 | |
| 110 | /* maximum line length we'll have to deal with */ |
| 111 | #define MAXLINE BUFSIZ |
| 112 | |
| 113 | /* maximum size of file name */ |
| 114 | #define FILENAMESIZE 1024 |
| 115 | |
| 116 | #ifndef min |
| 117 | #define min(x,y) ((x) < (y) ? (x) : (y)) |
| 118 | #endif |
| 119 | #ifndef max |
| 120 | #define max(x,y) ((x) > (y) ? (x) : (y)) |
| 121 | #endif |
| 122 | |
| 123 | #ifdef MS_DOS |
| 124 | #ifndef abs |
| 125 | #define abs(x) ((x) < 0 ? -(x) : (x)) |
| 126 | #endif |
| 127 | #define SHORT_FILE_NAMES |
| 128 | #endif |
| 129 | |
| 130 | #define true 1 |
| 131 | #define false 0 |
| 132 | |
| 133 | |
| 134 | #ifndef DEFAULT_SKELETON_FILE |
| 135 | #define DEFAULT_SKELETON_FILE "/usr/share/misc/lex.skel" |
| 136 | #endif |
| 137 | |
| 138 | /* special chk[] values marking the slots taking by end-of-buffer and action |
| 139 | * numbers |
| 140 | */ |
| 141 | #define EOB_POSITION -1 |
| 142 | #define ACTION_POSITION -2 |
| 143 | |
| 144 | /* number of data items per line for -f output */ |
| 145 | #define NUMDATAITEMS 10 |
| 146 | |
| 147 | /* number of lines of data in -f output before inserting a blank line for |
| 148 | * readability. |
| 149 | */ |
| 150 | #define NUMDATALINES 10 |
| 151 | |
| 152 | /* transition_struct_out() definitions */ |
| 153 | #define TRANS_STRUCT_PRINT_LENGTH 15 |
| 154 | |
| 155 | /* returns true if an nfa state has an epsilon out-transition slot |
| 156 | * that can be used. This definition is currently not used. |
| 157 | */ |
| 158 | #define FREE_EPSILON(state) \ |
| 159 | (transchar[state] == SYM_EPSILON && \ |
| 160 | trans2[state] == NO_TRANSITION && \ |
| 161 | finalst[state] != state) |
| 162 | |
| 163 | /* returns true if an nfa state has an epsilon out-transition character |
| 164 | * and both slots are free |
| 165 | */ |
| 166 | #define SUPER_FREE_EPSILON(state) \ |
| 167 | (transchar[state] == SYM_EPSILON && \ |
| 168 | trans1[state] == NO_TRANSITION) \ |
| 169 | |
| 170 | /* maximum number of NFA states that can comprise a DFA state. It's real |
| 171 | * big because if there's a lot of rules, the initial state will have a |
| 172 | * huge epsilon closure. |
| 173 | */ |
| 174 | #define INITIAL_MAX_DFA_SIZE 750 |
| 175 | #define MAX_DFA_SIZE_INCREMENT 750 |
| 176 | |
| 177 | |
| 178 | /* a note on the following masks. They are used to mark accepting numbers |
| 179 | * as being special. As such, they implicitly limit the number of accepting |
| 180 | * numbers (i.e., rules) because if there are too many rules the rule numbers |
| 181 | * will overload the mask bits. Fortunately, this limit is \large/ (0x2000 == |
| 182 | * 8192) so unlikely to actually cause any problems. A check is made in |
| 183 | * new_rule() to ensure that this limit is not reached. |
| 184 | */ |
| 185 | |
| 186 | /* mask to mark a trailing context accepting number */ |
| 187 | #define YY_TRAILING_MASK 0x2000 |
| 188 | |
| 189 | /* mask to mark the accepting number of the "head" of a trailing context rule */ |
| 190 | #define YY_TRAILING_HEAD_MASK 0x4000 |
| 191 | |
| 192 | /* maximum number of rules, as outlined in the above note */ |
| 193 | #define MAX_RULE (YY_TRAILING_MASK - 1) |
| 194 | |
| 195 | |
| 196 | /* NIL must be 0. If not, its special meaning when making equivalence classes |
| 197 | * (it marks the representative of a given e.c.) will be unidentifiable |
| 198 | */ |
| 199 | #define NIL 0 |
| 200 | |
| 201 | #define JAM -1 /* to mark a missing DFA transition */ |
| 202 | #define NO_TRANSITION NIL |
| 203 | #define UNIQUE -1 /* marks a symbol as an e.c. representative */ |
| 204 | #define INFINITY -1 /* for x{5,} constructions */ |
| 205 | |
| 206 | #define INITIAL_MAX_CCLS 100 /* max number of unique character classes */ |
| 207 | #define MAX_CCLS_INCREMENT 100 |
| 208 | |
| 209 | /* size of table holding members of character classes */ |
| 210 | #define INITIAL_MAX_CCL_TBL_SIZE 500 |
| 211 | #define MAX_CCL_TBL_SIZE_INCREMENT 250 |
| 212 | |
| 213 | #define INITIAL_MAX_RULES 100 /* default maximum number of rules */ |
| 214 | #define MAX_RULES_INCREMENT 100 |
| 215 | |
| 216 | #define INITIAL_MNS 2000 /* default maximum number of nfa states */ |
| 217 | #define MNS_INCREMENT 1000 /* amount to bump above by if it's not enough */ |
| 218 | |
| 219 | #define INITIAL_MAX_DFAS 1000 /* default maximum number of dfa states */ |
| 220 | #define MAX_DFAS_INCREMENT 1000 |
| 221 | |
| 222 | #define JAMSTATE -32766 /* marks a reference to the state that always jams */ |
| 223 | |
| 224 | /* enough so that if it's subtracted from an NFA state number, the result |
| 225 | * is guaranteed to be negative |
| 226 | */ |
| 227 | #define MARKER_DIFFERENCE 32000 |
| 228 | #define MAXIMUM_MNS 31999 |
| 229 | |
| 230 | /* maximum number of nxt/chk pairs for non-templates */ |
| 231 | #define INITIAL_MAX_XPAIRS 2000 |
| 232 | #define MAX_XPAIRS_INCREMENT 2000 |
| 233 | |
| 234 | /* maximum number of nxt/chk pairs needed for templates */ |
| 235 | #define INITIAL_MAX_TEMPLATE_XPAIRS 2500 |
| 236 | #define MAX_TEMPLATE_XPAIRS_INCREMENT 2500 |
| 237 | |
| 238 | #define SYM_EPSILON (CSIZE + 1) /* to mark transitions on the symbol epsilon */ |
| 239 | |
| 240 | #define INITIAL_MAX_SCS 40 /* maximum number of start conditions */ |
| 241 | #define MAX_SCS_INCREMENT 40 /* amount to bump by if it's not enough */ |
| 242 | |
| 243 | #define ONE_STACK_SIZE 500 /* stack of states with only one out-transition */ |
| 244 | #define SAME_TRANS -1 /* transition is the same as "default" entry for state */ |
| 245 | |
| 246 | /* the following percentages are used to tune table compression: |
| 247 | |
| 248 | * the percentage the number of out-transitions a state must be of the |
| 249 | * number of equivalence classes in order to be considered for table |
| 250 | * compaction by using protos |
| 251 | */ |
| 252 | #define PROTO_SIZE_PERCENTAGE 15 |
| 253 | |
| 254 | /* the percentage the number of homogeneous out-transitions of a state |
| 255 | * must be of the number of total out-transitions of the state in order |
| 256 | * that the state's transition table is first compared with a potential |
| 257 | * template of the most common out-transition instead of with the first |
| 258 | * proto in the proto queue |
| 259 | */ |
| 260 | #define CHECK_COM_PERCENTAGE 50 |
| 261 | |
| 262 | /* the percentage the number of differences between a state's transition |
| 263 | * table and the proto it was first compared with must be of the total |
| 264 | * number of out-transitions of the state in order to keep the first |
| 265 | * proto as a good match and not search any further |
| 266 | */ |
| 267 | #define FIRST_MATCH_DIFF_PERCENTAGE 10 |
| 268 | |
| 269 | /* the percentage the number of differences between a state's transition |
| 270 | * table and the most similar proto must be of the state's total number |
| 271 | * of out-transitions to use the proto as an acceptable close match |
| 272 | */ |
| 273 | #define ACCEPTABLE_DIFF_PERCENTAGE 50 |
| 274 | |
| 275 | /* the percentage the number of homogeneous out-transitions of a state |
| 276 | * must be of the number of total out-transitions of the state in order |
| 277 | * to consider making a template from the state |
| 278 | */ |
| 279 | #define TEMPLATE_SAME_PERCENTAGE 60 |
| 280 | |
| 281 | /* the percentage the number of differences between a state's transition |
| 282 | * table and the most similar proto must be of the state's total number |
| 283 | * of out-transitions to create a new proto from the state |
| 284 | */ |
| 285 | #define NEW_PROTO_DIFF_PERCENTAGE 20 |
| 286 | |
| 287 | /* the percentage the total number of out-transitions of a state must be |
| 288 | * of the number of equivalence classes in order to consider trying to |
| 289 | * fit the transition table into "holes" inside the nxt/chk table. |
| 290 | */ |
| 291 | #define INTERIOR_FIT_PERCENTAGE 15 |
| 292 | |
| 293 | /* size of region set aside to cache the complete transition table of |
| 294 | * protos on the proto queue to enable quick comparisons |
| 295 | */ |
| 296 | #define PROT_SAVE_SIZE 2000 |
| 297 | |
| 298 | #define MSP 50 /* maximum number of saved protos (protos on the proto queue) */ |
| 299 | |
| 300 | /* maximum number of out-transitions a state can have that we'll rummage |
| 301 | * around through the interior of the internal fast table looking for a |
| 302 | * spot for it |
| 303 | */ |
| 304 | #define MAX_XTIONS_FULL_INTERIOR_FIT 4 |
| 305 | |
| 306 | /* maximum number of rules which will be reported as being associated |
| 307 | * with a DFA state |
| 308 | */ |
| 309 | #define MAX_ASSOC_RULES 100 |
| 310 | |
| 311 | /* number that, if used to subscript an array, has a good chance of producing |
| 312 | * an error; should be small enough to fit into a short |
| 313 | */ |
| 314 | #define BAD_SUBSCRIPT -32767 |
| 315 | |
| 316 | /* absolute value of largest number that can be stored in a short, with a |
| 317 | * bit of slop thrown in for general paranoia. |
| 318 | */ |
| 319 | #define MAX_SHORT 32766 |
| 320 | |
| 321 | |
| 322 | /* Declarations for global variables. */ |
| 323 | |
| 324 | /* variables for symbol tables: |
| 325 | * sctbl - start-condition symbol table |
| 326 | * ndtbl - name-definition symbol table |
| 327 | * ccltab - character class text symbol table |
| 328 | */ |
| 329 | |
| 330 | struct hash_entry |
| 331 | { |
| 332 | struct hash_entry *prev, *next; |
| 333 | char *name; |
| 334 | char *str_val; |
| 335 | int int_val; |
| 336 | } ; |
| 337 | |
| 338 | typedef struct hash_entry *hash_table[]; |
| 339 | |
| 340 | #define NAME_TABLE_HASH_SIZE 101 |
| 341 | #define START_COND_HASH_SIZE 101 |
| 342 | #define CCL_HASH_SIZE 101 |
| 343 | |
| 344 | extern struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE]; |
| 345 | extern struct hash_entry *sctbl[START_COND_HASH_SIZE]; |
| 346 | extern struct hash_entry *ccltab[CCL_HASH_SIZE]; |
| 347 | |
| 348 | |
| 349 | /* variables for flags: |
| 350 | * printstats - if true (-v), dump statistics |
| 351 | * syntaxerror - true if a syntax error has been found |
| 352 | * eofseen - true if we've seen an eof in the input file |
| 353 | * ddebug - if true (-d), make a "debug" scanner |
| 354 | * trace - if true (-T), trace processing |
| 355 | * spprdflt - if true (-s), suppress the default rule |
| 356 | * interactive - if true (-I), generate an interactive scanner |
| 357 | * caseins - if true (-i), generate a case-insensitive scanner |
| 358 | * useecs - if true (-Ce flag), use equivalence classes |
| 359 | * fulltbl - if true (-Cf flag), don't compress the DFA state table |
| 360 | * usemecs - if true (-Cm flag), use meta-equivalence classes |
| 361 | * fullspd - if true (-F flag), use Jacobson method of table representation |
| 362 | * gen_line_dirs - if true (i.e., no -L flag), generate #line directives |
| 363 | * performance_report - if true (i.e., -p flag), generate a report relating |
| 364 | * to scanner performance |
| 365 | * backtrack_report - if true (i.e., -b flag), generate "lex.backtrack" file |
| 366 | * listing backtracking states |
| 367 | * csize - size of character set for the scanner we're generating; |
| 368 | * 128 for 7-bit chars and 256 for 8-bit |
| 369 | * yymore_used - if true, yymore() is used in input rules |
| 370 | * reject - if true, generate backtracking tables for REJECT macro |
| 371 | * real_reject - if true, scanner really uses REJECT (as opposed to just |
| 372 | * having "reject" set for variable trailing context) |
| 373 | * continued_action - true if this rule's action is to "fall through" to |
| 374 | * the next rule's action (i.e., the '|' action) |
| 375 | * yymore_really_used - has a REALLY_xxx value indicating whether a |
| 376 | * %used or %notused was used with yymore() |
| 377 | * reject_really_used - same for REJECT |
| 378 | */ |
| 379 | |
| 380 | extern int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; |
| 381 | extern int interactive, caseins, useecs, fulltbl, usemecs; |
| 382 | extern int fullspd, gen_line_dirs, performance_report, backtrack_report, csize; |
| 383 | extern int yymore_used, reject, real_reject, continued_action; |
| 384 | |
| 385 | #define REALLY_NOT_DETERMINED 0 |
| 386 | #define REALLY_USED 1 |
| 387 | #define REALLY_NOT_USED 2 |
| 388 | extern int yymore_really_used, reject_really_used; |
| 389 | |
| 390 | |
| 391 | /* variables used in the flex input routines: |
| 392 | * datapos - characters on current output line |
| 393 | * dataline - number of contiguous lines of data in current data |
| 394 | * statement. Used to generate readable -f output |
| 395 | * linenum - current input line number |
| 396 | * skelfile - the skeleton file |
| 397 | * yyin - input file |
| 398 | * temp_action_file - temporary file to hold actions |
| 399 | * backtrack_file - file to summarize backtracking states to |
| 400 | * infilename - name of input file |
| 401 | * action_file_name - name of the temporary file |
| 402 | * input_files - array holding names of input files |
| 403 | * num_input_files - size of input_files array |
| 404 | * program_name - name with which program was invoked |
| 405 | */ |
| 406 | |
| 407 | extern int datapos, dataline, linenum; |
| 408 | extern FILE *skelfile, *yyin, *temp_action_file, *backtrack_file; |
| 409 | extern char *infilename; |
| 410 | extern char *action_file_name; |
| 411 | extern char **input_files; |
| 412 | extern int num_input_files; |
| 413 | extern char *program_name; |
| 414 | |
| 415 | |
| 416 | /* variables for stack of states having only one out-transition: |
| 417 | * onestate - state number |
| 418 | * onesym - transition symbol |
| 419 | * onenext - target state |
| 420 | * onedef - default base entry |
| 421 | * onesp - stack pointer |
| 422 | */ |
| 423 | |
| 424 | extern int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; |
| 425 | extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; |
| 426 | |
| 427 | |
| 428 | /* variables for nfa machine data: |
| 429 | * current_mns - current maximum on number of NFA states |
| 430 | * num_rules - number of the last accepting state; also is number of |
| 431 | * rules created so far |
| 432 | * current_max_rules - current maximum number of rules |
| 433 | * lastnfa - last nfa state number created |
| 434 | * firstst - physically the first state of a fragment |
| 435 | * lastst - last physical state of fragment |
| 436 | * finalst - last logical state of fragment |
| 437 | * transchar - transition character |
| 438 | * trans1 - transition state |
| 439 | * trans2 - 2nd transition state for epsilons |
| 440 | * accptnum - accepting number |
| 441 | * assoc_rule - rule associated with this NFA state (or 0 if none) |
| 442 | * state_type - a STATE_xxx type identifying whether the state is part |
| 443 | * of a normal rule, the leading state in a trailing context |
| 444 | * rule (i.e., the state which marks the transition from |
| 445 | * recognizing the text-to-be-matched to the beginning of |
| 446 | * the trailing context), or a subsequent state in a trailing |
| 447 | * context rule |
| 448 | * rule_type - a RULE_xxx type identifying whether this a a ho-hum |
| 449 | * normal rule or one which has variable head & trailing |
| 450 | * context |
| 451 | * rule_linenum - line number associated with rule |
| 452 | */ |
| 453 | |
| 454 | extern int current_mns, num_rules, current_max_rules, lastnfa; |
| 455 | extern int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2; |
| 456 | extern int *accptnum, *assoc_rule, *state_type, *rule_type, *rule_linenum; |
| 457 | |
| 458 | /* different types of states; values are useful as masks, as well, for |
| 459 | * routines like check_trailing_context() |
| 460 | */ |
| 461 | #define STATE_NORMAL 0x1 |
| 462 | #define STATE_TRAILING_CONTEXT 0x2 |
| 463 | |
| 464 | /* global holding current type of state we're making */ |
| 465 | |
| 466 | extern int current_state_type; |
| 467 | |
| 468 | /* different types of rules */ |
| 469 | #define RULE_NORMAL 0 |
| 470 | #define RULE_VARIABLE 1 |
| 471 | |
| 472 | /* true if the input rules include a rule with both variable-length head |
| 473 | * and trailing context, false otherwise |
| 474 | */ |
| 475 | extern int variable_trailing_context_rules; |
| 476 | |
| 477 | |
| 478 | /* variables for protos: |
| 479 | * numtemps - number of templates created |
| 480 | * numprots - number of protos created |
| 481 | * protprev - backlink to a more-recently used proto |
| 482 | * protnext - forward link to a less-recently used proto |
| 483 | * prottbl - base/def table entry for proto |
| 484 | * protcomst - common state of proto |
| 485 | * firstprot - number of the most recently used proto |
| 486 | * lastprot - number of the least recently used proto |
| 487 | * protsave contains the entire state array for protos |
| 488 | */ |
| 489 | |
| 490 | extern int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; |
| 491 | extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; |
| 492 | |
| 493 | |
| 494 | /* variables for managing equivalence classes: |
| 495 | * numecs - number of equivalence classes |
| 496 | * nextecm - forward link of Equivalence Class members |
| 497 | * ecgroup - class number or backward link of EC members |
| 498 | * nummecs - number of meta-equivalence classes (used to compress |
| 499 | * templates) |
| 500 | * tecfwd - forward link of meta-equivalence classes members |
| 501 | * tecbck - backward link of MEC's |
| 502 | * xlation - maps character codes to their translations, or nil if no %t table |
| 503 | * num_xlations - number of different xlation values |
| 504 | */ |
| 505 | |
| 506 | /* reserve enough room in the equivalence class arrays so that we |
| 507 | * can use the CSIZE'th element to hold equivalence class information |
| 508 | * for the NUL character. Later we'll move this information into |
| 509 | * the 0th element. |
| 510 | */ |
| 511 | extern int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs; |
| 512 | |
| 513 | /* meta-equivalence classes are indexed starting at 1, so it's possible |
| 514 | * that they will require positions from 1 .. CSIZE, i.e., CSIZE + 1 |
| 515 | * slots total (since the arrays are 0-based). nextecm[] and ecgroup[] |
| 516 | * don't require the extra position since they're indexed from 1 .. CSIZE - 1. |
| 517 | */ |
| 518 | extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1]; |
| 519 | |
| 520 | extern int *xlation; |
| 521 | extern int num_xlations; |
| 522 | |
| 523 | |
| 524 | /* variables for start conditions: |
| 525 | * lastsc - last start condition created |
| 526 | * current_max_scs - current limit on number of start conditions |
| 527 | * scset - set of rules active in start condition |
| 528 | * scbol - set of rules active only at the beginning of line in a s.c. |
| 529 | * scxclu - true if start condition is exclusive |
| 530 | * sceof - true if start condition has EOF rule |
| 531 | * scname - start condition name |
| 532 | * actvsc - stack of active start conditions for the current rule |
| 533 | */ |
| 534 | |
| 535 | extern int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc; |
| 536 | extern char **scname; |
| 537 | |
| 538 | |
| 539 | /* variables for dfa machine data: |
| 540 | * current_max_dfa_size - current maximum number of NFA states in DFA |
| 541 | * current_max_xpairs - current maximum number of non-template xtion pairs |
| 542 | * current_max_template_xpairs - current maximum number of template pairs |
| 543 | * current_max_dfas - current maximum number DFA states |
| 544 | * lastdfa - last dfa state number created |
| 545 | * nxt - state to enter upon reading character |
| 546 | * chk - check value to see if "nxt" applies |
| 547 | * tnxt - internal nxt table for templates |
| 548 | * base - offset into "nxt" for given state |
| 549 | * def - where to go if "chk" disallows "nxt" entry |
| 550 | * nultrans - NUL transition for each state |
| 551 | * NUL_ec - equivalence class of the NUL character |
| 552 | * tblend - last "nxt/chk" table entry being used |
| 553 | * firstfree - first empty entry in "nxt/chk" table |
| 554 | * dss - nfa state set for each dfa |
| 555 | * dfasiz - size of nfa state set for each dfa |
| 556 | * dfaacc - accepting set for each dfa state (or accepting number, if |
| 557 | * -r is not given) |
| 558 | * accsiz - size of accepting set for each dfa state |
| 559 | * dhash - dfa state hash value |
| 560 | * numas - number of DFA accepting states created; note that this |
| 561 | * is not necessarily the same value as num_rules, which is the analogous |
| 562 | * value for the NFA |
| 563 | * numsnpairs - number of state/nextstate transition pairs |
| 564 | * jambase - position in base/def where the default jam table starts |
| 565 | * jamstate - state number corresponding to "jam" state |
| 566 | * end_of_buffer_state - end-of-buffer dfa state number |
| 567 | */ |
| 568 | |
| 569 | extern int current_max_dfa_size, current_max_xpairs; |
| 570 | extern int current_max_template_xpairs, current_max_dfas; |
| 571 | extern int lastdfa, lasttemp, *nxt, *chk, *tnxt; |
| 572 | extern int *base, *def, *nultrans, NUL_ec, tblend, firstfree, **dss, *dfasiz; |
| 573 | extern union dfaacc_union |
| 574 | { |
| 575 | int *dfaacc_set; |
| 576 | int dfaacc_state; |
| 577 | } *dfaacc; |
| 578 | extern int *accsiz, *dhash, numas; |
| 579 | extern int numsnpairs, jambase, jamstate; |
| 580 | extern int end_of_buffer_state; |
| 581 | |
| 582 | /* variables for ccl information: |
| 583 | * lastccl - ccl index of the last created ccl |
| 584 | * current_maxccls - current limit on the maximum number of unique ccl's |
| 585 | * cclmap - maps a ccl index to its set pointer |
| 586 | * ccllen - gives the length of a ccl |
| 587 | * cclng - true for a given ccl if the ccl is negated |
| 588 | * cclreuse - counts how many times a ccl is re-used |
| 589 | * current_max_ccl_tbl_size - current limit on number of characters needed |
| 590 | * to represent the unique ccl's |
| 591 | * ccltbl - holds the characters in each ccl - indexed by cclmap |
| 592 | */ |
| 593 | |
| 594 | extern int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse; |
| 595 | extern int current_max_ccl_tbl_size; |
| 596 | extern Char *ccltbl; |
| 597 | |
| 598 | |
| 599 | /* variables for miscellaneous information: |
| 600 | * starttime - real-time when we started |
| 601 | * endtime - real-time when we ended |
| 602 | * nmstr - last NAME scanned by the scanner |
| 603 | * sectnum - section number currently being parsed |
| 604 | * nummt - number of empty nxt/chk table entries |
| 605 | * hshcol - number of hash collisions detected by snstods |
| 606 | * dfaeql - number of times a newly created dfa was equal to an old one |
| 607 | * numeps - number of epsilon NFA states created |
| 608 | * eps2 - number of epsilon states which have 2 out-transitions |
| 609 | * num_reallocs - number of times it was necessary to realloc() a group |
| 610 | * of arrays |
| 611 | * tmpuses - number of DFA states that chain to templates |
| 612 | * totnst - total number of NFA states used to make DFA states |
| 613 | * peakpairs - peak number of transition pairs we had to store internally |
| 614 | * numuniq - number of unique transitions |
| 615 | * numdup - number of duplicate transitions |
| 616 | * hshsave - number of hash collisions saved by checking number of states |
| 617 | * num_backtracking - number of DFA states requiring back-tracking |
| 618 | * bol_needed - whether scanner needs beginning-of-line recognition |
| 619 | */ |
| 620 | |
| 621 | extern char *starttime, *endtime, nmstr[MAXLINE]; |
| 622 | extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; |
| 623 | extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; |
| 624 | extern int num_backtracking, bol_needed; |
| 625 | |
| 626 | void *allocate_array(), *reallocate_array(); |
| 627 | |
| 628 | #define allocate_integer_array(size) \ |
| 629 | (int *) allocate_array( size, sizeof( int ) ) |
| 630 | |
| 631 | #define reallocate_integer_array(array,size) \ |
| 632 | (int *) reallocate_array( (void *) array, size, sizeof( int ) ) |
| 633 | |
| 634 | #define allocate_int_ptr_array(size) \ |
| 635 | (int **) allocate_array( size, sizeof( int * ) ) |
| 636 | |
| 637 | #define allocate_char_ptr_array(size) \ |
| 638 | (char **) allocate_array( size, sizeof( char * ) ) |
| 639 | |
| 640 | #define allocate_dfaacc_union(size) \ |
| 641 | (union dfaacc_union *) \ |
| 642 | allocate_array( size, sizeof( union dfaacc_union ) ) |
| 643 | |
| 644 | #define reallocate_int_ptr_array(array,size) \ |
| 645 | (int **) reallocate_array( (void *) array, size, sizeof( int * ) ) |
| 646 | |
| 647 | #define reallocate_char_ptr_array(array,size) \ |
| 648 | (char **) reallocate_array( (void *) array, size, sizeof( char * ) ) |
| 649 | |
| 650 | #define reallocate_dfaacc_union(array, size) \ |
| 651 | (union dfaacc_union *) \ |
| 652 | reallocate_array( (void *) array, size, sizeof( union dfaacc_union ) ) |
| 653 | |
| 654 | #define allocate_character_array(size) \ |
| 655 | (Char *) allocate_array( size, sizeof( Char ) ) |
| 656 | |
| 657 | #define reallocate_character_array(array,size) \ |
| 658 | (Char *) reallocate_array( (void *) array, size, sizeof( Char ) ) |
| 659 | |
| 660 | |
| 661 | /* used to communicate between scanner and parser. The type should really |
| 662 | * be YYSTYPE, but we can't easily get our hands on it. |
| 663 | */ |
| 664 | extern int yylval; |
| 665 | |
| 666 | |
| 667 | /* external functions that are cross-referenced among the flex source files */ |
| 668 | |
| 669 | |
| 670 | /* from file ccl.c */ |
| 671 | |
| 672 | extern void ccladd PROTO((int, int)); /* Add a single character to a ccl */ |
| 673 | extern int cclinit PROTO(()); /* make an empty ccl */ |
| 674 | extern void cclnegate PROTO((int)); /* negate a ccl */ |
| 675 | |
| 676 | /* list the members of a set of characters in CCL form */ |
| 677 | extern void list_character_set PROTO((FILE*, int[])); |
| 678 | |
| 679 | |
| 680 | /* from file dfa.c */ |
| 681 | |
| 682 | /* increase the maximum number of dfas */ |
| 683 | extern void increase_max_dfas PROTO(()); |
| 684 | |
| 685 | extern void ntod PROTO(()); /* convert a ndfa to a dfa */ |
| 686 | |
| 687 | |
| 688 | /* from file ecs.c */ |
| 689 | |
| 690 | /* convert character classes to set of equivalence classes */ |
| 691 | extern void ccl2ecl PROTO(()); |
| 692 | |
| 693 | /* associate equivalence class numbers with class members */ |
| 694 | extern int cre8ecs PROTO((int[], int[], int)); |
| 695 | |
| 696 | /* associate equivalence class numbers using %t table */ |
| 697 | extern int ecs_from_xlation PROTO((int[])); |
| 698 | |
| 699 | /* update equivalence classes based on character class transitions */ |
| 700 | extern void mkeccl PROTO((Char[], int, int[], int[], int, int)); |
| 701 | |
| 702 | /* create equivalence class for single character */ |
| 703 | extern void mkechar PROTO((int, int[], int[])); |
| 704 | |
| 705 | |
| 706 | /* from file gen.c */ |
| 707 | |
| 708 | extern void make_tables PROTO(()); /* generate transition tables */ |
| 709 | |
| 710 | |
| 711 | /* from file main.c */ |
| 712 | |
| 713 | extern void flexend PROTO((int)); |
| 714 | |
| 715 | |
| 716 | /* from file misc.c */ |
| 717 | |
| 718 | /* write out the actions from the temporary file to lex.yy.c */ |
| 719 | extern void action_out PROTO(()); |
| 720 | |
| 721 | /* true if a string is all lower case */ |
| 722 | extern int all_lower PROTO((register Char *)); |
| 723 | |
| 724 | /* true if a string is all upper case */ |
| 725 | extern int all_upper PROTO((register Char *)); |
| 726 | |
| 727 | /* bubble sort an integer array */ |
| 728 | extern void bubble PROTO((int [], int)); |
| 729 | |
| 730 | /* shell sort a character array */ |
| 731 | extern void cshell PROTO((Char [], int, int)); |
| 732 | |
| 733 | extern void dataend PROTO(()); /* finish up a block of data declarations */ |
| 734 | |
| 735 | /* report an error message and terminate */ |
| 736 | extern void flexerror PROTO((char[])); |
| 737 | |
| 738 | /* report a fatal error message and terminate */ |
| 739 | extern void flexfatal PROTO((char[])); |
| 740 | |
| 741 | /* report an error message formatted with one integer argument */ |
| 742 | extern void lerrif PROTO((char[], int)); |
| 743 | |
| 744 | /* report an error message formatted with one string argument */ |
| 745 | extern void lerrsf PROTO((char[], char[])); |
| 746 | |
| 747 | /* spit out a "# line" statement */ |
| 748 | extern void line_directive_out PROTO((FILE*)); |
| 749 | |
| 750 | /* generate a data statment for a two-dimensional array */ |
| 751 | extern void mk2data PROTO((int)); |
| 752 | |
| 753 | extern void mkdata PROTO((int)); /* generate a data statement */ |
| 754 | |
| 755 | /* return the integer represented by a string of digits */ |
| 756 | extern int myctoi PROTO((Char [])); |
| 757 | |
| 758 | /* write out one section of the skeleton file */ |
| 759 | extern void skelout PROTO(()); |
| 760 | |
| 761 | /* output a yy_trans_info structure */ |
| 762 | extern void transition_struct_out PROTO((int, int)); |
| 763 | |
| 764 | |
| 765 | /* from file nfa.c */ |
| 766 | |
| 767 | /* add an accepting state to a machine */ |
| 768 | extern void add_accept PROTO((int, int)); |
| 769 | |
| 770 | /* make a given number of copies of a singleton machine */ |
| 771 | extern int copysingl PROTO((int, int)); |
| 772 | |
| 773 | /* debugging routine to write out an nfa */ |
| 774 | extern void dumpnfa PROTO((int)); |
| 775 | |
| 776 | /* finish up the processing for a rule */ |
| 777 | extern void finish_rule PROTO((int, int, int, int)); |
| 778 | |
| 779 | /* connect two machines together */ |
| 780 | extern int link_machines PROTO((int, int)); |
| 781 | |
| 782 | /* mark each "beginning" state in a machine as being a "normal" (i.e., |
| 783 | * not trailing context associated) state |
| 784 | */ |
| 785 | extern void mark_beginning_as_normal PROTO((register int)); |
| 786 | |
| 787 | /* make a machine that branches to two machines */ |
| 788 | extern int mkbranch PROTO((int, int)); |
| 789 | |
| 790 | extern int mkclos PROTO((int)); /* convert a machine into a closure */ |
| 791 | extern int mkopt PROTO((int)); /* make a machine optional */ |
| 792 | |
| 793 | /* make a machine that matches either one of two machines */ |
| 794 | extern int mkor PROTO((int, int)); |
| 795 | |
| 796 | /* convert a machine into a positive closure */ |
| 797 | extern int mkposcl PROTO((int)); |
| 798 | |
| 799 | extern int mkrep PROTO((int, int, int)); /* make a replicated machine */ |
| 800 | |
| 801 | /* create a state with a transition on a given symbol */ |
| 802 | extern int mkstate PROTO((int)); |
| 803 | |
| 804 | extern void new_rule PROTO(()); /* initialize for a new rule */ |
| 805 | |
| 806 | |
| 807 | /* from file parse.y */ |
| 808 | |
| 809 | /* write out a message formatted with one string, pinpointing its location */ |
| 810 | extern void format_pinpoint_message PROTO((char[], char[])); |
| 811 | |
| 812 | /* write out a message, pinpointing its location */ |
| 813 | extern void pinpoint_message PROTO((char[])); |
| 814 | |
| 815 | extern void synerr PROTO((char [])); /* report a syntax error */ |
| 816 | extern int yyparse PROTO(()); /* the YACC parser */ |
| 817 | |
| 818 | |
| 819 | /* from file scan.l */ |
| 820 | |
| 821 | extern int flexscan PROTO(()); /* the Flex-generated scanner for flex */ |
| 822 | |
| 823 | /* open the given file (if NULL, stdin) for scanning */ |
| 824 | extern void set_input_file PROTO((char*)); |
| 825 | |
| 826 | extern int yywrap PROTO(()); /* wrapup a file in the lexical analyzer */ |
| 827 | |
| 828 | |
| 829 | /* from file sym.c */ |
| 830 | |
| 831 | /* save the text of a character class */ |
| 832 | extern void cclinstal PROTO ((Char [], int)); |
| 833 | |
| 834 | /* lookup the number associated with character class */ |
| 835 | extern int ccllookup PROTO((Char [])); |
| 836 | |
| 837 | extern void ndinstal PROTO((char[], Char[])); /* install a name definition */ |
| 838 | extern void scinstal PROTO((char[], int)); /* make a start condition */ |
| 839 | |
| 840 | /* lookup the number associated with a start condition */ |
| 841 | extern int sclookup PROTO((char[])); |
| 842 | |
| 843 | |
| 844 | /* from file tblcmp.c */ |
| 845 | |
| 846 | /* build table entries for dfa state */ |
| 847 | extern void bldtbl PROTO((int[], int, int, int, int)); |
| 848 | |
| 849 | extern void cmptmps PROTO(()); /* compress template table entries */ |
| 850 | extern void inittbl PROTO(()); /* initialize transition tables */ |
| 851 | extern void mkdeftbl PROTO(()); /* make the default, "jam" table entries */ |
| 852 | |
| 853 | /* create table entries for a state (or state fragment) which has |
| 854 | * only one out-transition */ |
| 855 | extern void mk1tbl PROTO((int, int, int, int)); |
| 856 | |
| 857 | /* place a state into full speed transition table */ |
| 858 | extern void place_state PROTO((int*, int, int)); |
| 859 | |
| 860 | /* save states with only one out-transition to be processed later */ |
| 861 | extern void stack1 PROTO((int, int, int, int)); |
| 862 | |
| 863 | |
| 864 | /* from file yylex.c */ |
| 865 | |
| 866 | extern int yylex PROTO(()); |
| 867 | |
| 868 | |
| 869 | /* The Unix kernel calls used here */ |
| 870 | |
| 871 | extern int read PROTO((int, char*, int)); |
| 872 | extern int unlink PROTO((char*)); |
| 873 | extern int write PROTO((int, char*, int)); |