386BSD 0.1 development
[unix-history] / usr / src / usr.bin / awk / rexp / rexp.c
CommitLineData
a7e60862
WJ
1
2/********************************************
3rexp.c
4copyright 1991, Michael D. Brennan
5
6This is a source file for mawk, an implementation of
7the AWK programming language.
8
9Mawk is distributed without warranty under the terms of
10the GNU General Public License, version 2, 1991.
11********************************************/
12
13/*$Log: rexp.c,v $
14 * Revision 3.4 91/08/13 09:09:59 brennan
15 * VERSION .9994
16 *
17 * Revision 3.3 91/08/04 15:45:03 brennan
18 * no longer attempt to recover mem on failed REcompile
19 * Its not worth the effort
20 *
21 * Revision 3.2 91/08/03 07:24:06 brennan
22 * check for empty machine stack (missing operand) wasn't quite right
23 *
24 * Revision 3.1 91/06/07 10:33:16 brennan
25 * VERSION 0.995
26 *
27 * Revision 1.7 91/06/05 08:58:47 brennan
28 * change RE_free to free
29 *
30 * Revision 1.6 91/06/03 07:07:17 brennan
31 * moved parser stacks inside REcompile
32 * removed unnecessary copying
33 *
34*/
35
36/* op precedence parser for regular expressions */
37
38#include "rexp.h"
39
40
41/* DATA */
42int REerrno ;
43char *REerrlist[] = { (char *) 0 ,
44/* 1 */ "missing '('",
45/* 2 */ "missing ')'",
46/* 3 */ "bad class -- [], [^] or [" ,
47/* 4 */ "missing operand" ,
48/* 5 */ "resource exhaustion -- regular expression too large"
49} ;
50/* E5 is very unlikely to occur */
51
52/* This table drives the operator precedence parser */
53static short table[8][8] = {
54
55/* 0 | CAT * + ? ( ) */
56/* 0 */ 0, L, L, L, L, L, L, E1,
57/* | */ G, G, L, L, L, L, L, G,
58/* CAT*/ G, G, G, L, L, L, L, G,
59/* * */ G, G, G, G, G, G, E7, G,
60/* + */ G, G, G, G, G, G, E7, G,
61/* ? */ G, G, G, G, G, G, E7, G,
62/* ( */ E2, L, L, L, L, L, L, EQ,
63/* ) */ G , G, G, G, G, G, E7, G } ;
64
65
66#define STACKSZ 64
67
68
69static jmp_buf err_buf ; /* used to trap on error */
70
71void RE_error_trap(x) /* return is dummy to make macro OK */
72 int x ;
73{
74 REerrno = x ;
75 longjmp(err_buf, 1 ) ;
76}
77
78
79VOID *REcompile(re)
80 char *re ;
81{
82 MACHINE m_stack[STACKSZ] ;
83 struct op {
84 int token ;
85 int prec ;
86 } op_stack[STACKSZ] ;
87 register MACHINE *m_ptr ;
88 register struct op *op_ptr ;
89 register int t ;
90
91 /* do this first because it also checks if we have a
92 run time stack */
93 RE_lex_init(re) ;
94
95 if ( *re == 0 )
96 { STATE *p = (STATE *) RE_malloc( sizeof(STATE) ) ;
97 p->type = M_ACCEPT ;
98 return (VOID *) p ;
99 }
100
101 if ( setjmp(err_buf) ) return (VOID *) 0 ;
102 /* we used to try to recover memory left on machine stack ;
103 but now m_ptr is in a register so it won't be right unless
104 we force it out of a register which isn't worth the trouble */
105
106 /* initialize the stacks */
107 m_ptr = m_stack - 1 ;
108 op_ptr = op_stack ;
109 op_ptr->token = 0 ;
110
111 t = RE_lex(m_stack) ;
112
113 while( 1 )
114 { switch( t )
115 {
116 case T_STR :
117 case T_ANY :
118 case T_U :
119 case T_START :
120 case T_END :
121 case T_CLASS : m_ptr++ ; break ;
122
123 case 0 : /* end of reg expr */
124 if ( op_ptr -> token == 0 ) /* done */
125 if ( m_ptr == m_stack ) return (VOID *)m_ptr->start ;
126 else
127 /* machines still on the stack */
128 RE_panic("values still on machine stack") ;
129
130 /* otherwise fall thru to default
131 which is operator case */
132
133 default:
134
135 if ( (op_ptr -> prec = table[op_ptr -> token][t]) == G )
136 {
137 do
138 { /* op_pop */
139
140 if ( op_ptr->token <= T_CAT ) /*binary op*/ m_ptr-- ;
141 /* if not enough values on machine stack
142 then we have a missing operand */
143 if ( m_ptr < m_stack ) RE_error_trap(-E4) ;
144
145 switch( op_ptr->token )
146 { case T_CAT : RE_cat(m_ptr, m_ptr+1) ; break ;
147 case T_OR : RE_or( m_ptr, m_ptr+1) ; break ;
148 case T_STAR : RE_close( m_ptr) ; break ;
149 case T_PLUS : RE_poscl( m_ptr ) ; break ;
150 case T_Q : RE_01( m_ptr ) ; break ;
151 default : break ; /*nothing on ( or ) */
152 }
153
154 op_ptr-- ;
155 }
156 while ( op_ptr->prec != L ) ;
157
158 continue ; /* back thru switch at top */
159 }
160
161 if ( op_ptr -> prec < 0 )
162 if ( op_ptr->prec == E7 )
163 RE_panic("parser returns E7") ;
164 else RE_error_trap(-op_ptr->prec) ;
165
166 if ( ++op_ptr == op_stack + STACKSZ ) /* stack overflow */
167 RE_error_trap(-E5) ;
168 op_ptr -> token = t ;
169 } /* end of switch */
170
171 if ( m_ptr == m_stack+(STACKSZ-1) ) /*overflow*/
172 RE_error_trap(-E5) ;
173 t = RE_lex(m_ptr+1) ;
174 }
175}
176
177
178/* getting here means a logic flaw or unforeseen case */
179void RE_panic( s )
180 char *s ;
181{ fprintf( stderr, "REcompile() - panic: %s\n", s) ;
182 exit(100) ; }
183