Commit | Line | Data |
---|---|---|
46fcc854 KB |
1 | %Start A str sc reg comment |
2 | ||
3 | %{ | |
4 | /**************************************************************** | |
5 | Copyright (C) AT&T 1993 | |
6 | All Rights Reserved | |
7 | ||
8 | Permission to use, copy, modify, and distribute this software and | |
9 | its documentation for any purpose and without fee is hereby | |
10 | granted, provided that the above copyright notice appear in all | |
11 | copies and that both that the copyright notice and this | |
12 | permission notice and warranty disclaimer appear in supporting | |
13 | documentation, and that the name of AT&T or any of its entities | |
14 | not be used in advertising or publicity pertaining to | |
15 | distribution of the software without specific, written prior | |
16 | permission. | |
17 | ||
18 | AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | |
19 | INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. | |
20 | IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY | |
21 | SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
22 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER | |
23 | IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, | |
24 | ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF | |
25 | THIS SOFTWARE. | |
26 | ****************************************************************/ | |
27 | ||
46fcc854 KB |
28 | #include <stdlib.h> |
29 | #include <string.h> | |
30 | #include "awk.h" | |
31 | #include "y.tab.h" | |
32 | ||
33 | extern YYSTYPE yylval; | |
34 | extern int infunc; | |
35 | ||
36 | int lineno = 1; | |
37 | int bracecnt = 0; | |
38 | int brackcnt = 0; | |
39 | int parencnt = 0; | |
40 | #define DEBUG | |
41 | #ifdef DEBUG | |
42 | # define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); } | |
43 | #else | |
44 | # define RET(x) return(x) | |
45 | #endif | |
46 | ||
47 | #define CADD cbuf[clen++] = yytext[0]; \ | |
48 | if (clen >= CBUFLEN-1) { \ | |
49 | ERROR "string/reg expr %.30s... too long", cbuf SYNTAX; \ | |
50 | BEGIN A; \ | |
51 | } | |
52 | ||
53 | uchar cbuf[CBUFLEN]; | |
54 | uchar *s; | |
55 | int clen, cflag; | |
e707fb85 KB |
56 | |
57 | /* some of this depends on behavior of lex that | |
58 | may not be preserved in other implementations of lex. | |
59 | */ | |
60 | ||
61 | static int my_input( YY_CHAR *buf, int max_size ); | |
62 | ||
63 | #undef YY_INPUT | |
64 | #define YY_INPUT(buf,result,max_size) result = my_input(buf, max_size); | |
65 | ||
66 | #undef YY_USER_INIT | |
67 | #define YY_USER_INIT init_input_source(); | |
46fcc854 KB |
68 | %} |
69 | ||
70 | A [a-zA-Z_] | |
71 | B [a-zA-Z0-9_] | |
72 | D [0-9] | |
73 | O [0-7] | |
74 | H [0-9a-fA-F] | |
75 | WS [ \t] | |
76 | ||
77 | %% | |
e707fb85 | 78 | switch ((yy_start - 1) / 2) { /* witchcraft */ |
46fcc854 KB |
79 | case 0: |
80 | BEGIN A; | |
81 | break; | |
82 | case sc: | |
83 | BEGIN A; | |
84 | RET('}'); | |
85 | } | |
86 | ||
87 | <A>\n { lineno++; RET(NL); } | |
88 | <A>#.* { ; } /* strip comments */ | |
89 | <A>{WS}+ { ; } | |
90 | <A>; { RET(';'); } | |
91 | ||
92 | <A>"\\"\n { lineno++; } | |
93 | <A>BEGIN { RET(XBEGIN); } | |
94 | <A>END { RET(XEND); } | |
95 | <A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); } | |
96 | <A>return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); } | |
97 | <A>"&&" { RET(AND); } | |
98 | <A>"||" { RET(BOR); } | |
99 | <A>"!" { RET(NOT); } | |
100 | <A>"!=" { yylval.i = NE; RET(NE); } | |
101 | <A>"~" { yylval.i = MATCH; RET(MATCHOP); } | |
102 | <A>"!~" { yylval.i = NOTMATCH; RET(MATCHOP); } | |
103 | <A>"<" { yylval.i = LT; RET(LT); } | |
104 | <A>"<=" { yylval.i = LE; RET(LE); } | |
105 | <A>"==" { yylval.i = EQ; RET(EQ); } | |
106 | <A>">=" { yylval.i = GE; RET(GE); } | |
107 | <A>">" { yylval.i = GT; RET(GT); } | |
108 | <A>">>" { yylval.i = APPEND; RET(APPEND); } | |
109 | <A>"++" { yylval.i = INCR; RET(INCR); } | |
110 | <A>"--" { yylval.i = DECR; RET(DECR); } | |
111 | <A>"+=" { yylval.i = ADDEQ; RET(ASGNOP); } | |
112 | <A>"-=" { yylval.i = SUBEQ; RET(ASGNOP); } | |
113 | <A>"*=" { yylval.i = MULTEQ; RET(ASGNOP); } | |
114 | <A>"/=" { yylval.i = DIVEQ; RET(ASGNOP); } | |
115 | <A>"%=" { yylval.i = MODEQ; RET(ASGNOP); } | |
116 | <A>"^=" { yylval.i = POWEQ; RET(ASGNOP); } | |
117 | <A>"**=" { yylval.i = POWEQ; RET(ASGNOP); } | |
118 | <A>"=" { yylval.i = ASSIGN; RET(ASGNOP); } | |
119 | <A>"**" { RET(POWER); } | |
120 | <A>"^" { RET(POWER); } | |
121 | ||
122 | <A>"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); } | |
123 | <A>"$NF" { unputstr("(NF)"); return(INDIRECT); } | |
e707fb85 KB |
124 | <A>"$"{A}{B}* { |
125 | int c; | |
126 | char *yytext_copy = strdup(yytext); | |
127 | c = input(); unput(c); /* look for '(' or '[' */ | |
128 | if (c == '(' || c == '[' || | |
129 | infunc && isarg(yytext_copy+1) >= 0) { | |
130 | unputstr(yytext_copy+1); | |
131 | free(yytext_copy); | |
46fcc854 KB |
132 | return(INDIRECT); |
133 | } else { | |
e707fb85 KB |
134 | yylval.cp = |
135 | setsymtab(yytext_copy+1,"",0.0,STR|NUM,symtab); | |
136 | free(yytext_copy); | |
46fcc854 KB |
137 | RET(IVAR); |
138 | } | |
139 | } | |
140 | <A>"$" { RET(INDIRECT); } | |
141 | <A>NF { yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); } | |
142 | ||
143 | <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? { | |
144 | yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab); | |
145 | /* should this also have STR set? */ | |
146 | RET(NUMBER); } | |
147 | ||
148 | <A>while { RET(WHILE); } | |
149 | <A>for { RET(FOR); } | |
150 | <A>do { RET(DO); } | |
151 | <A>if { RET(IF); } | |
152 | <A>else { RET(ELSE); } | |
153 | <A>next { RET(NEXT); } | |
154 | <A>exit { RET(EXIT); } | |
155 | <A>break { RET(BREAK); } | |
156 | <A>continue { RET(CONTINUE); } | |
157 | <A>print { yylval.i = PRINT; RET(PRINT); } | |
158 | <A>printf { yylval.i = PRINTF; RET(PRINTF); } | |
159 | <A>sprintf { yylval.i = SPRINTF; RET(SPRINTF); } | |
160 | <A>split { yylval.i = SPLIT; RET(SPLIT); } | |
161 | <A>substr { RET(SUBSTR); } | |
162 | <A>sub { yylval.i = SUB; RET(SUB); } | |
163 | <A>gsub { yylval.i = GSUB; RET(GSUB); } | |
164 | <A>index { RET(INDEX); } | |
165 | <A>match { RET(MATCHFCN); } | |
166 | <A>in { RET(IN); } | |
167 | <A>getline { RET(GETLINE); } | |
168 | <A>close { RET(CLOSE); } | |
169 | <A>delete { RET(DELETE); } | |
170 | <A>length { yylval.i = FLENGTH; RET(BLTIN); } | |
171 | <A>log { yylval.i = FLOG; RET(BLTIN); } | |
172 | <A>int { yylval.i = FINT; RET(BLTIN); } | |
173 | <A>exp { yylval.i = FEXP; RET(BLTIN); } | |
174 | <A>sqrt { yylval.i = FSQRT; RET(BLTIN); } | |
175 | <A>sin { yylval.i = FSIN; RET(BLTIN); } | |
176 | <A>cos { yylval.i = FCOS; RET(BLTIN); } | |
177 | <A>atan2 { yylval.i = FATAN; RET(BLTIN); } | |
178 | <A>system { yylval.i = FSYSTEM; RET(BLTIN); } | |
179 | <A>rand { yylval.i = FRAND; RET(BLTIN); } | |
180 | <A>srand { yylval.i = FSRAND; RET(BLTIN); } | |
181 | <A>toupper { yylval.i = FTOUPPER; RET(BLTIN); } | |
182 | <A>tolower { yylval.i = FTOLOWER; RET(BLTIN); } | |
183 | <A>fflush { yylval.i = FFLUSH; RET(BLTIN); } | |
184 | ||
185 | <A>{A}{B}* { int n, c; | |
e707fb85 | 186 | char *yytext_copy = strdup(yytext); |
46fcc854 | 187 | c = input(); unput(c); /* look for '(' */ |
e707fb85 | 188 | if (c != '(' && infunc && (n=isarg(yytext_copy)) >= 0) { |
46fcc854 | 189 | yylval.i = n; |
e707fb85 | 190 | free(yytext_copy); |
46fcc854 KB |
191 | RET(ARG); |
192 | } else { | |
e707fb85 KB |
193 | yylval.cp = |
194 | setsymtab(yytext_copy,"",0.0,STR|NUM,symtab); | |
195 | free(yytext_copy); | |
46fcc854 KB |
196 | if (c == '(') { |
197 | RET(CALL); | |
198 | } else { | |
199 | RET(VAR); | |
200 | } | |
201 | } | |
202 | } | |
e707fb85 | 203 | |
46fcc854 KB |
204 | <A>\" { BEGIN str; clen = 0; } |
205 | ||
206 | <A>"}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); } | |
207 | <A>"]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); } | |
208 | <A>")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); } | |
209 | ||
210 | <A>. { if (yytext[0] == '{') bracecnt++; | |
211 | else if (yytext[0] == '[') brackcnt++; | |
212 | else if (yytext[0] == '(') parencnt++; | |
213 | RET(yylval.i = yytext[0]); /* everything else */ } | |
214 | ||
215 | <reg>\\. { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; } | |
216 | <reg>\n { ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } | |
217 | <reg>"/" { BEGIN A; | |
218 | cbuf[clen] = 0; | |
219 | yylval.s = tostring(cbuf); | |
220 | unput('/'); | |
221 | RET(REGEXPR); } | |
222 | <reg>. { CADD; } | |
223 | ||
224 | <str>\" { BEGIN A; | |
225 | cbuf[clen] = 0; s = tostring(cbuf); | |
226 | cbuf[clen] = ' '; cbuf[++clen] = 0; | |
227 | yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab); | |
228 | RET(STRING); } | |
229 | <str>\n { ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } | |
230 | <str>"\\\"" { cbuf[clen++] = '"'; } | |
231 | <str>"\\"n { cbuf[clen++] = '\n'; } | |
232 | <str>"\\"t { cbuf[clen++] = '\t'; } | |
233 | <str>"\\"f { cbuf[clen++] = '\f'; } | |
234 | <str>"\\"r { cbuf[clen++] = '\r'; } | |
235 | <str>"\\"b { cbuf[clen++] = '\b'; } | |
236 | <str>"\\"v { cbuf[clen++] = '\v'; } /* these ANSIisms may not be known by */ | |
237 | <str>"\\"a { cbuf[clen++] = '\007'; } /* your compiler. hence 007 for bell */ | |
238 | <str>"\\\\" { cbuf[clen++] = '\\'; } | |
239 | <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n; | |
240 | sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; } | |
241 | <str>"\\"x({H}+) { int n; /* ANSI permits any number! */ | |
242 | sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; } | |
243 | <str>"\\". { cbuf[clen++] = yytext[1]; } | |
244 | <str>. { CADD; } | |
245 | ||
246 | %% | |
247 | ||
248 | void startreg(void) /* start parsing a regular expression */ | |
249 | { | |
250 | BEGIN reg; | |
251 | clen = 0; | |
252 | } | |
253 | ||
e707fb85 | 254 | static int my_input( YY_CHAR *buf, int max_size ) |
46fcc854 | 255 | { |
46fcc854 KB |
256 | extern uchar *lexprog; |
257 | ||
e707fb85 KB |
258 | if ( lexprog ) { /* awk '...' */ |
259 | int num_chars = strlen( lexprog ); | |
260 | if ( num_chars > max_size ) | |
261 | { | |
262 | num_chars = max_size; | |
263 | strncpy( buf, lexprog, num_chars ); | |
264 | } | |
265 | else | |
266 | strcpy( buf, lexprog ); | |
267 | lexprog += num_chars; | |
268 | return num_chars; | |
269 | ||
270 | } else { /* awk -f ... */ | |
271 | int c = pgetc(); | |
272 | if (c == EOF) | |
273 | return 0; | |
274 | buf[0] = c; | |
275 | return 1; | |
46fcc854 | 276 | } |
46fcc854 | 277 | } |
46fcc854 KB |
278 | |
279 | void unputstr(char *s) /* put a string back on input */ | |
280 | { | |
281 | int i; | |
282 | ||
283 | for (i = strlen(s)-1; i >= 0; i--) | |
284 | unput(s[i]); | |
285 | } | |
e707fb85 KB |
286 | |
287 | int lex_input() | |
288 | { | |
289 | return input(); | |
290 | } |