Commit | Line | Data |
---|---|---|
a7e60862 WJ |
1 | |
2 | /******************************************** | |
3 | rexp.c | |
4 | copyright 1991, Michael D. Brennan | |
5 | ||
6 | This is a source file for mawk, an implementation of | |
7 | the AWK programming language. | |
8 | ||
9 | Mawk is distributed without warranty under the terms of | |
10 | the GNU General Public License, version 2, 1991. | |
11 | ********************************************/ | |
12 | ||
13 | /*$Log: rexp.c,v $ | |
14 | * Revision 3.4 91/08/13 09:09:59 brennan | |
15 | * VERSION .9994 | |
16 | * | |
17 | * Revision 3.3 91/08/04 15:45:03 brennan | |
18 | * no longer attempt to recover mem on failed REcompile | |
19 | * Its not worth the effort | |
20 | * | |
21 | * Revision 3.2 91/08/03 07:24:06 brennan | |
22 | * check for empty machine stack (missing operand) wasn't quite right | |
23 | * | |
24 | * Revision 3.1 91/06/07 10:33:16 brennan | |
25 | * VERSION 0.995 | |
26 | * | |
27 | * Revision 1.7 91/06/05 08:58:47 brennan | |
28 | * change RE_free to free | |
29 | * | |
30 | * Revision 1.6 91/06/03 07:07:17 brennan | |
31 | * moved parser stacks inside REcompile | |
32 | * removed unnecessary copying | |
33 | * | |
34 | */ | |
35 | ||
36 | /* op precedence parser for regular expressions */ | |
37 | ||
38 | #include "rexp.h" | |
39 | ||
40 | ||
41 | /* DATA */ | |
42 | int REerrno ; | |
43 | char *REerrlist[] = { (char *) 0 , | |
44 | /* 1 */ "missing '('", | |
45 | /* 2 */ "missing ')'", | |
46 | /* 3 */ "bad class -- [], [^] or [" , | |
47 | /* 4 */ "missing operand" , | |
48 | /* 5 */ "resource exhaustion -- regular expression too large" | |
49 | } ; | |
50 | /* E5 is very unlikely to occur */ | |
51 | ||
52 | /* This table drives the operator precedence parser */ | |
53 | static short table[8][8] = { | |
54 | ||
55 | /* 0 | CAT * + ? ( ) */ | |
56 | /* 0 */ 0, L, L, L, L, L, L, E1, | |
57 | /* | */ G, G, L, L, L, L, L, G, | |
58 | /* CAT*/ G, G, G, L, L, L, L, G, | |
59 | /* * */ G, G, G, G, G, G, E7, G, | |
60 | /* + */ G, G, G, G, G, G, E7, G, | |
61 | /* ? */ G, G, G, G, G, G, E7, G, | |
62 | /* ( */ E2, L, L, L, L, L, L, EQ, | |
63 | /* ) */ G , G, G, G, G, G, E7, G } ; | |
64 | ||
65 | ||
66 | #define STACKSZ 64 | |
67 | ||
68 | ||
69 | static jmp_buf err_buf ; /* used to trap on error */ | |
70 | ||
71 | void RE_error_trap(x) /* return is dummy to make macro OK */ | |
72 | int x ; | |
73 | { | |
74 | REerrno = x ; | |
75 | longjmp(err_buf, 1 ) ; | |
76 | } | |
77 | ||
78 | ||
79 | VOID *REcompile(re) | |
80 | char *re ; | |
81 | { | |
82 | MACHINE m_stack[STACKSZ] ; | |
83 | struct op { | |
84 | int token ; | |
85 | int prec ; | |
86 | } op_stack[STACKSZ] ; | |
87 | register MACHINE *m_ptr ; | |
88 | register struct op *op_ptr ; | |
89 | register int t ; | |
90 | ||
91 | /* do this first because it also checks if we have a | |
92 | run time stack */ | |
93 | RE_lex_init(re) ; | |
94 | ||
95 | if ( *re == 0 ) | |
96 | { STATE *p = (STATE *) RE_malloc( sizeof(STATE) ) ; | |
97 | p->type = M_ACCEPT ; | |
98 | return (VOID *) p ; | |
99 | } | |
100 | ||
101 | if ( setjmp(err_buf) ) return (VOID *) 0 ; | |
102 | /* we used to try to recover memory left on machine stack ; | |
103 | but now m_ptr is in a register so it won't be right unless | |
104 | we force it out of a register which isn't worth the trouble */ | |
105 | ||
106 | /* initialize the stacks */ | |
107 | m_ptr = m_stack - 1 ; | |
108 | op_ptr = op_stack ; | |
109 | op_ptr->token = 0 ; | |
110 | ||
111 | t = RE_lex(m_stack) ; | |
112 | ||
113 | while( 1 ) | |
114 | { switch( t ) | |
115 | { | |
116 | case T_STR : | |
117 | case T_ANY : | |
118 | case T_U : | |
119 | case T_START : | |
120 | case T_END : | |
121 | case T_CLASS : m_ptr++ ; break ; | |
122 | ||
123 | case 0 : /* end of reg expr */ | |
124 | if ( op_ptr -> token == 0 ) /* done */ | |
125 | if ( m_ptr == m_stack ) return (VOID *)m_ptr->start ; | |
126 | else | |
127 | /* machines still on the stack */ | |
128 | RE_panic("values still on machine stack") ; | |
129 | ||
130 | /* otherwise fall thru to default | |
131 | which is operator case */ | |
132 | ||
133 | default: | |
134 | ||
135 | if ( (op_ptr -> prec = table[op_ptr -> token][t]) == G ) | |
136 | { | |
137 | do | |
138 | { /* op_pop */ | |
139 | ||
140 | if ( op_ptr->token <= T_CAT ) /*binary op*/ m_ptr-- ; | |
141 | /* if not enough values on machine stack | |
142 | then we have a missing operand */ | |
143 | if ( m_ptr < m_stack ) RE_error_trap(-E4) ; | |
144 | ||
145 | switch( op_ptr->token ) | |
146 | { case T_CAT : RE_cat(m_ptr, m_ptr+1) ; break ; | |
147 | case T_OR : RE_or( m_ptr, m_ptr+1) ; break ; | |
148 | case T_STAR : RE_close( m_ptr) ; break ; | |
149 | case T_PLUS : RE_poscl( m_ptr ) ; break ; | |
150 | case T_Q : RE_01( m_ptr ) ; break ; | |
151 | default : break ; /*nothing on ( or ) */ | |
152 | } | |
153 | ||
154 | op_ptr-- ; | |
155 | } | |
156 | while ( op_ptr->prec != L ) ; | |
157 | ||
158 | continue ; /* back thru switch at top */ | |
159 | } | |
160 | ||
161 | if ( op_ptr -> prec < 0 ) | |
162 | if ( op_ptr->prec == E7 ) | |
163 | RE_panic("parser returns E7") ; | |
164 | else RE_error_trap(-op_ptr->prec) ; | |
165 | ||
166 | if ( ++op_ptr == op_stack + STACKSZ ) /* stack overflow */ | |
167 | RE_error_trap(-E5) ; | |
168 | op_ptr -> token = t ; | |
169 | } /* end of switch */ | |
170 | ||
171 | if ( m_ptr == m_stack+(STACKSZ-1) ) /*overflow*/ | |
172 | RE_error_trap(-E5) ; | |
173 | t = RE_lex(m_ptr+1) ; | |
174 | } | |
175 | } | |
176 | ||
177 | ||
178 | /* getting here means a logic flaw or unforeseen case */ | |
179 | void RE_panic( s ) | |
180 | char *s ; | |
181 | { fprintf( stderr, "REcompile() - panic: %s\n", s) ; | |
182 | exit(100) ; } | |
183 |