| 1 | %Start A str sc reg comment |
| 2 | |
| 3 | %{ |
| 4 | /**************************************************************** |
| 5 | Copyright (C) AT&T 1993 |
| 6 | All Rights Reserved |
| 7 | |
| 8 | Permission to use, copy, modify, and distribute this software and |
| 9 | its documentation for any purpose and without fee is hereby |
| 10 | granted, provided that the above copyright notice appear in all |
| 11 | copies and that both that the copyright notice and this |
| 12 | permission notice and warranty disclaimer appear in supporting |
| 13 | documentation, and that the name of AT&T or any of its entities |
| 14 | not be used in advertising or publicity pertaining to |
| 15 | distribution of the software without specific, written prior |
| 16 | permission. |
| 17 | |
| 18 | AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, |
| 19 | INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. |
| 20 | IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY |
| 21 | SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 22 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER |
| 23 | IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, |
| 24 | ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF |
| 25 | THIS SOFTWARE. |
| 26 | ****************************************************************/ |
| 27 | |
| 28 | #include <stdlib.h> |
| 29 | #include <string.h> |
| 30 | #include "awk.h" |
| 31 | #include "y.tab.h" |
| 32 | |
| 33 | extern YYSTYPE yylval; |
| 34 | extern int infunc; |
| 35 | |
| 36 | int lineno = 1; |
| 37 | int bracecnt = 0; |
| 38 | int brackcnt = 0; |
| 39 | int parencnt = 0; |
| 40 | #define DEBUG |
| 41 | #ifdef DEBUG |
| 42 | # define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); } |
| 43 | #else |
| 44 | # define RET(x) return(x) |
| 45 | #endif |
| 46 | |
| 47 | #define CADD cbuf[clen++] = yytext[0]; \ |
| 48 | if (clen >= CBUFLEN-1) { \ |
| 49 | ERROR "string/reg expr %.30s... too long", cbuf SYNTAX; \ |
| 50 | BEGIN A; \ |
| 51 | } |
| 52 | |
| 53 | uchar cbuf[CBUFLEN]; |
| 54 | uchar *s; |
| 55 | int clen, cflag; |
| 56 | |
| 57 | /* some of this depends on behavior of lex that |
| 58 | may not be preserved in other implementations of lex. |
| 59 | */ |
| 60 | |
| 61 | static int my_input( YY_CHAR *buf, int max_size ); |
| 62 | |
| 63 | #undef YY_INPUT |
| 64 | #define YY_INPUT(buf,result,max_size) result = my_input(buf, max_size); |
| 65 | |
| 66 | #undef YY_USER_INIT |
| 67 | #define YY_USER_INIT init_input_source(); |
| 68 | %} |
| 69 | |
| 70 | A [a-zA-Z_] |
| 71 | B [a-zA-Z0-9_] |
| 72 | D [0-9] |
| 73 | O [0-7] |
| 74 | H [0-9a-fA-F] |
| 75 | WS [ \t] |
| 76 | |
| 77 | %% |
| 78 | switch ((yy_start - 1) / 2) { /* witchcraft */ |
| 79 | case 0: |
| 80 | BEGIN A; |
| 81 | break; |
| 82 | case sc: |
| 83 | BEGIN A; |
| 84 | RET('}'); |
| 85 | } |
| 86 | |
| 87 | <A>\n { lineno++; RET(NL); } |
| 88 | <A>#.* { ; } /* strip comments */ |
| 89 | <A>{WS}+ { ; } |
| 90 | <A>; { RET(';'); } |
| 91 | |
| 92 | <A>"\\"\n { lineno++; } |
| 93 | <A>BEGIN { RET(XBEGIN); } |
| 94 | <A>END { RET(XEND); } |
| 95 | <A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); } |
| 96 | <A>return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); } |
| 97 | <A>"&&" { RET(AND); } |
| 98 | <A>"||" { RET(BOR); } |
| 99 | <A>"!" { RET(NOT); } |
| 100 | <A>"!=" { yylval.i = NE; RET(NE); } |
| 101 | <A>"~" { yylval.i = MATCH; RET(MATCHOP); } |
| 102 | <A>"!~" { yylval.i = NOTMATCH; RET(MATCHOP); } |
| 103 | <A>"<" { yylval.i = LT; RET(LT); } |
| 104 | <A>"<=" { yylval.i = LE; RET(LE); } |
| 105 | <A>"==" { yylval.i = EQ; RET(EQ); } |
| 106 | <A>">=" { yylval.i = GE; RET(GE); } |
| 107 | <A>">" { yylval.i = GT; RET(GT); } |
| 108 | <A>">>" { yylval.i = APPEND; RET(APPEND); } |
| 109 | <A>"++" { yylval.i = INCR; RET(INCR); } |
| 110 | <A>"--" { yylval.i = DECR; RET(DECR); } |
| 111 | <A>"+=" { yylval.i = ADDEQ; RET(ASGNOP); } |
| 112 | <A>"-=" { yylval.i = SUBEQ; RET(ASGNOP); } |
| 113 | <A>"*=" { yylval.i = MULTEQ; RET(ASGNOP); } |
| 114 | <A>"/=" { yylval.i = DIVEQ; RET(ASGNOP); } |
| 115 | <A>"%=" { yylval.i = MODEQ; RET(ASGNOP); } |
| 116 | <A>"^=" { yylval.i = POWEQ; RET(ASGNOP); } |
| 117 | <A>"**=" { yylval.i = POWEQ; RET(ASGNOP); } |
| 118 | <A>"=" { yylval.i = ASSIGN; RET(ASGNOP); } |
| 119 | <A>"**" { RET(POWER); } |
| 120 | <A>"^" { RET(POWER); } |
| 121 | |
| 122 | <A>"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); } |
| 123 | <A>"$NF" { unputstr("(NF)"); return(INDIRECT); } |
| 124 | <A>"$"{A}{B}* { |
| 125 | int c; |
| 126 | char *yytext_copy = strdup(yytext); |
| 127 | c = input(); unput(c); /* look for '(' or '[' */ |
| 128 | if (c == '(' || c == '[' || |
| 129 | infunc && isarg(yytext_copy+1) >= 0) { |
| 130 | unputstr(yytext_copy+1); |
| 131 | free(yytext_copy); |
| 132 | return(INDIRECT); |
| 133 | } else { |
| 134 | yylval.cp = |
| 135 | setsymtab(yytext_copy+1,"",0.0,STR|NUM,symtab); |
| 136 | free(yytext_copy); |
| 137 | RET(IVAR); |
| 138 | } |
| 139 | } |
| 140 | <A>"$" { RET(INDIRECT); } |
| 141 | <A>NF { yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); } |
| 142 | |
| 143 | <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? { |
| 144 | yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab); |
| 145 | /* should this also have STR set? */ |
| 146 | RET(NUMBER); } |
| 147 | |
| 148 | <A>while { RET(WHILE); } |
| 149 | <A>for { RET(FOR); } |
| 150 | <A>do { RET(DO); } |
| 151 | <A>if { RET(IF); } |
| 152 | <A>else { RET(ELSE); } |
| 153 | <A>next { RET(NEXT); } |
| 154 | <A>exit { RET(EXIT); } |
| 155 | <A>break { RET(BREAK); } |
| 156 | <A>continue { RET(CONTINUE); } |
| 157 | <A>print { yylval.i = PRINT; RET(PRINT); } |
| 158 | <A>printf { yylval.i = PRINTF; RET(PRINTF); } |
| 159 | <A>sprintf { yylval.i = SPRINTF; RET(SPRINTF); } |
| 160 | <A>split { yylval.i = SPLIT; RET(SPLIT); } |
| 161 | <A>substr { RET(SUBSTR); } |
| 162 | <A>sub { yylval.i = SUB; RET(SUB); } |
| 163 | <A>gsub { yylval.i = GSUB; RET(GSUB); } |
| 164 | <A>index { RET(INDEX); } |
| 165 | <A>match { RET(MATCHFCN); } |
| 166 | <A>in { RET(IN); } |
| 167 | <A>getline { RET(GETLINE); } |
| 168 | <A>close { RET(CLOSE); } |
| 169 | <A>delete { RET(DELETE); } |
| 170 | <A>length { yylval.i = FLENGTH; RET(BLTIN); } |
| 171 | <A>log { yylval.i = FLOG; RET(BLTIN); } |
| 172 | <A>int { yylval.i = FINT; RET(BLTIN); } |
| 173 | <A>exp { yylval.i = FEXP; RET(BLTIN); } |
| 174 | <A>sqrt { yylval.i = FSQRT; RET(BLTIN); } |
| 175 | <A>sin { yylval.i = FSIN; RET(BLTIN); } |
| 176 | <A>cos { yylval.i = FCOS; RET(BLTIN); } |
| 177 | <A>atan2 { yylval.i = FATAN; RET(BLTIN); } |
| 178 | <A>system { yylval.i = FSYSTEM; RET(BLTIN); } |
| 179 | <A>rand { yylval.i = FRAND; RET(BLTIN); } |
| 180 | <A>srand { yylval.i = FSRAND; RET(BLTIN); } |
| 181 | <A>toupper { yylval.i = FTOUPPER; RET(BLTIN); } |
| 182 | <A>tolower { yylval.i = FTOLOWER; RET(BLTIN); } |
| 183 | <A>fflush { yylval.i = FFLUSH; RET(BLTIN); } |
| 184 | |
| 185 | <A>{A}{B}* { int n, c; |
| 186 | char *yytext_copy = strdup(yytext); |
| 187 | c = input(); unput(c); /* look for '(' */ |
| 188 | if (c != '(' && infunc && (n=isarg(yytext_copy)) >= 0) { |
| 189 | yylval.i = n; |
| 190 | free(yytext_copy); |
| 191 | RET(ARG); |
| 192 | } else { |
| 193 | yylval.cp = |
| 194 | setsymtab(yytext_copy,"",0.0,STR|NUM,symtab); |
| 195 | free(yytext_copy); |
| 196 | if (c == '(') { |
| 197 | RET(CALL); |
| 198 | } else { |
| 199 | RET(VAR); |
| 200 | } |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | <A>\" { BEGIN str; clen = 0; } |
| 205 | |
| 206 | <A>"}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); } |
| 207 | <A>"]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); } |
| 208 | <A>")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); } |
| 209 | |
| 210 | <A>. { if (yytext[0] == '{') bracecnt++; |
| 211 | else if (yytext[0] == '[') brackcnt++; |
| 212 | else if (yytext[0] == '(') parencnt++; |
| 213 | RET(yylval.i = yytext[0]); /* everything else */ } |
| 214 | |
| 215 | <reg>\\. { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; } |
| 216 | <reg>\n { ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } |
| 217 | <reg>"/" { BEGIN A; |
| 218 | cbuf[clen] = 0; |
| 219 | yylval.s = tostring(cbuf); |
| 220 | unput('/'); |
| 221 | RET(REGEXPR); } |
| 222 | <reg>. { CADD; } |
| 223 | |
| 224 | <str>\" { BEGIN A; |
| 225 | cbuf[clen] = 0; s = tostring(cbuf); |
| 226 | cbuf[clen] = ' '; cbuf[++clen] = 0; |
| 227 | yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab); |
| 228 | RET(STRING); } |
| 229 | <str>\n { ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } |
| 230 | <str>"\\\"" { cbuf[clen++] = '"'; } |
| 231 | <str>"\\"n { cbuf[clen++] = '\n'; } |
| 232 | <str>"\\"t { cbuf[clen++] = '\t'; } |
| 233 | <str>"\\"f { cbuf[clen++] = '\f'; } |
| 234 | <str>"\\"r { cbuf[clen++] = '\r'; } |
| 235 | <str>"\\"b { cbuf[clen++] = '\b'; } |
| 236 | <str>"\\"v { cbuf[clen++] = '\v'; } /* these ANSIisms may not be known by */ |
| 237 | <str>"\\"a { cbuf[clen++] = '\007'; } /* your compiler. hence 007 for bell */ |
| 238 | <str>"\\\\" { cbuf[clen++] = '\\'; } |
| 239 | <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n; |
| 240 | sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; } |
| 241 | <str>"\\"x({H}+) { int n; /* ANSI permits any number! */ |
| 242 | sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; } |
| 243 | <str>"\\". { cbuf[clen++] = yytext[1]; } |
| 244 | <str>. { CADD; } |
| 245 | |
| 246 | %% |
| 247 | |
| 248 | void startreg(void) /* start parsing a regular expression */ |
| 249 | { |
| 250 | BEGIN reg; |
| 251 | clen = 0; |
| 252 | } |
| 253 | |
| 254 | static int my_input( YY_CHAR *buf, int max_size ) |
| 255 | { |
| 256 | extern uchar *lexprog; |
| 257 | |
| 258 | if ( lexprog ) { /* awk '...' */ |
| 259 | int num_chars = strlen( lexprog ); |
| 260 | if ( num_chars > max_size ) |
| 261 | { |
| 262 | num_chars = max_size; |
| 263 | strncpy( buf, lexprog, num_chars ); |
| 264 | } |
| 265 | else |
| 266 | strcpy( buf, lexprog ); |
| 267 | lexprog += num_chars; |
| 268 | return num_chars; |
| 269 | |
| 270 | } else { /* awk -f ... */ |
| 271 | int c = pgetc(); |
| 272 | if (c == EOF) |
| 273 | return 0; |
| 274 | buf[0] = c; |
| 275 | return 1; |
| 276 | } |
| 277 | } |
| 278 | |
| 279 | void unputstr(char *s) /* put a string back on input */ |
| 280 | { |
| 281 | int i; |
| 282 | |
| 283 | for (i = strlen(s)-1; i >= 0; i--) |
| 284 | unput(s[i]); |
| 285 | } |
| 286 | |
| 287 | int lex_input() |
| 288 | { |
| 289 | return input(); |
| 290 | } |