| 1 | /* |
| 2 | * Copyright (c) 1982 Regents of the University of California |
| 3 | */ |
| 4 | #ifndef lint |
| 5 | static char sccsid[] = "@(#)asscan2.c 4.4 %G%"; |
| 6 | #endif not lint |
| 7 | |
| 8 | #include "asscanl.h" |
| 9 | static inttoktype oval = NL; |
| 10 | |
| 11 | #define NINBUFFERS 2 |
| 12 | #define INBUFLG NINBUFFERS*BUFSIZ + 2 |
| 13 | /* |
| 14 | * We have two input buffers; the first one is reserved |
| 15 | * for catching the tail of a line split across a buffer |
| 16 | * boundary; the other one are used for snarfing a buffer |
| 17 | * worth of .s source. |
| 18 | */ |
| 19 | static char inbuffer[INBUFLG]; |
| 20 | static char *InBufPtr = 0; |
| 21 | |
| 22 | /* |
| 23 | * fill the inbuffer from the standard input. |
| 24 | * Assert: there are always n COMPLETE! lines in the buffer area. |
| 25 | * Assert: there is always a \n terminating the last line |
| 26 | * in the buffer area. |
| 27 | * Assert: after the \n, there is an EOFCHAR (hard end of file) |
| 28 | * or a NEEDCHAR (end of buffer) |
| 29 | * Assert: fgets always null pads the string it reads. |
| 30 | * Assert: no ungetc's are done at the end of a line or at the |
| 31 | * beginning of a line. |
| 32 | * |
| 33 | * We read a complete buffer of characters in one single read. |
| 34 | * We then back scan within this buffer to find the end of the |
| 35 | * last complete line, and force the assertions, and save a pointer |
| 36 | * to the incomplete line. |
| 37 | * The next call to fillinbuffer will move the unread characters |
| 38 | * to the end of the first buffer, and then read another two buffers, |
| 39 | * completing the cycle. |
| 40 | */ |
| 41 | |
| 42 | static char p_swapped = '\0'; |
| 43 | static char *p_start = &inbuffer[NINBUFFERS * BUFSIZ]; |
| 44 | static char *p_stop = &inbuffer[NINBUFFERS * BUFSIZ]; |
| 45 | |
| 46 | char *fillinbuffer() |
| 47 | { |
| 48 | register char *to; |
| 49 | register char *from; |
| 50 | char *inbufptr; |
| 51 | int nread; |
| 52 | static int hadeof; |
| 53 | int goal; |
| 54 | int got; |
| 55 | |
| 56 | *p_start = p_swapped; |
| 57 | inbufptr = &inbuffer[1*BUFSIZ] - (p_stop - p_start); |
| 58 | |
| 59 | for (to = inbufptr, from = p_start; from < p_stop;) |
| 60 | *to++ = *from++; |
| 61 | /* |
| 62 | * Now, go read two full buffers (hopefully) |
| 63 | */ |
| 64 | if (hadeof){ |
| 65 | hadeof = 0; |
| 66 | return (0); |
| 67 | } |
| 68 | goal = (NINBUFFERS - 1)*BUFSIZ; |
| 69 | nread = 0; |
| 70 | do { |
| 71 | got = read(stdin->_file, &inbuffer[1*BUFSIZ + nread], goal); |
| 72 | if (got == 0) |
| 73 | hadeof = 1; |
| 74 | if (got <= 0) |
| 75 | break; |
| 76 | nread += got; |
| 77 | goal -= got; |
| 78 | } while (goal); |
| 79 | |
| 80 | if (nread == 0) |
| 81 | return(0); |
| 82 | p_stop = from = &inbuffer[1*BUFSIZ + nread]; |
| 83 | *from = '\0'; |
| 84 | |
| 85 | while (*--from != '\n'){ |
| 86 | /* |
| 87 | * back over the partial line |
| 88 | */ |
| 89 | if (from == &inbuffer[1*BUFSIZ]) { |
| 90 | from = p_stop; |
| 91 | *p_stop++ = '\n'; |
| 92 | break; |
| 93 | } else { |
| 94 | continue; |
| 95 | } |
| 96 | } |
| 97 | |
| 98 | from++; /* first char of partial line */ |
| 99 | p_start = from; |
| 100 | p_swapped = *p_start; |
| 101 | *p_start = NEEDCHAR; /* force assertion */ |
| 102 | return(inbufptr); |
| 103 | } |
| 104 | |
| 105 | scan_dot_s(bufferbox) |
| 106 | struct tokbufdesc *bufferbox; |
| 107 | { |
| 108 | reg int ryylval; /* local copy of lexical value */ |
| 109 | extern int yylval; /* global copy of lexical value */ |
| 110 | reg int val; /* the value returned */ |
| 111 | int i; /* simple counter */ |
| 112 | reg char *rcp; |
| 113 | char *cp; /* can have address taken */ |
| 114 | reg int ch; /* treated as a character */ |
| 115 | int ch1; /* shadow value */ |
| 116 | reg char *inbufptr; |
| 117 | struct symtab *op; |
| 118 | |
| 119 | reg ptrall bufptr; /* where to stuff tokens */ |
| 120 | ptrall lgbackpatch; /* where to stuff a string length */ |
| 121 | ptrall bufub; /* where not to stuff tokens */ |
| 122 | int maxstrlg; /* how long a string can be */ |
| 123 | long intval; /* value of int */ |
| 124 | int linescrossed; /* when doing strings and comments */ |
| 125 | struct Opcode opstruct; |
| 126 | |
| 127 | (bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]); |
| 128 | (bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]); |
| 129 | |
| 130 | inbufptr = InBufPtr; |
| 131 | if (inbufptr == 0){ |
| 132 | inbufptr = fillinbuffer(); |
| 133 | if (inbufptr == 0){ /*end of file*/ |
| 134 | endoffile: |
| 135 | inbufptr = 0; |
| 136 | ptoken(bufptr, PARSEEOF); |
| 137 | goto done; |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | if (newfflag){ |
| 142 | ptoken(bufptr, IFILE); |
| 143 | ptoken(bufptr, STRING); |
| 144 | val = strlen(newfname) + 1; |
| 145 | movestr( (char *)&( ( (lgtype *)bufptr)[1]), newfname, val); |
| 146 | bstrlg(bufptr, val); |
| 147 | |
| 148 | ptoken(bufptr, ILINENO); |
| 149 | ptoken(bufptr, INT); |
| 150 | pint(bufptr, 1); |
| 151 | newfflag = 0; |
| 152 | } |
| 153 | |
| 154 | while (bufptr < bufub){ |
| 155 | loop: |
| 156 | switch(ryylval = (type+2)[ch = getchar()]) { |
| 157 | case SCANEOF: |
| 158 | inbufptr = 0; |
| 159 | goto endoffile; |
| 160 | |
| 161 | case NEEDSBUF: |
| 162 | inbufptr = fillinbuffer(); |
| 163 | if (inbufptr == 0) |
| 164 | goto endoffile; |
| 165 | goto loop; |
| 166 | |
| 167 | case DIV: /*process C style comments*/ |
| 168 | if ( (ch = getchar()) == '*') { /*comment prelude*/ |
| 169 | int incomment; |
| 170 | linescrossed = 0; |
| 171 | incomment = 1; |
| 172 | ch = getchar(); /*skip over the * */ |
| 173 | while(incomment){ |
| 174 | switch(ch){ |
| 175 | case '*': |
| 176 | ch = getchar(); |
| 177 | incomment = (ch != '/'); |
| 178 | break; |
| 179 | case '\n': |
| 180 | scanlineno++; |
| 181 | linescrossed++; |
| 182 | ch = getchar(); |
| 183 | break; |
| 184 | case EOFCHAR: |
| 185 | goto endoffile; |
| 186 | case NEEDCHAR: |
| 187 | inbufptr = fillinbuffer(); |
| 188 | if (inbufptr == 0) |
| 189 | goto endoffile; |
| 190 | lineno++; |
| 191 | ch = getchar(); |
| 192 | break; |
| 193 | default: |
| 194 | ch = getchar(); |
| 195 | break; |
| 196 | } |
| 197 | } |
| 198 | val = ILINESKIP; |
| 199 | ryylval = linescrossed; |
| 200 | goto ret; |
| 201 | } else { /*just an ordinary DIV*/ |
| 202 | ungetc(ch); |
| 203 | val = ryylval = DIV; |
| 204 | goto ret; |
| 205 | } |
| 206 | case SH: |
| 207 | if (oval == NL){ |
| 208 | /* |
| 209 | * Attempt to recognize a C preprocessor |
| 210 | * style comment '^#[ \t]*[0-9]*[ \t]*".*" |
| 211 | */ |
| 212 | ch = getchar(); /*bump the #*/ |
| 213 | while (INCHARSET(ch, SPACE)) |
| 214 | ch = getchar();/*bump white */ |
| 215 | if (INCHARSET(ch, DIGIT)){ |
| 216 | intval = 0; |
| 217 | while(INCHARSET(ch, DIGIT)){ |
| 218 | intval = intval*10 + ch - '0'; |
| 219 | ch = getchar(); |
| 220 | } |
| 221 | while (INCHARSET(ch, SPACE)) |
| 222 | ch = getchar(); |
| 223 | if (ch == '"'){ |
| 224 | ptoken(bufptr, ILINENO); |
| 225 | ptoken(bufptr, INT); |
| 226 | pint(bufptr, intval - 1); |
| 227 | ptoken(bufptr, IFILE); |
| 228 | /* |
| 229 | * The '"' has already been |
| 230 | * munched |
| 231 | * |
| 232 | * eatstr will not eat |
| 233 | * the trailing \n, so |
| 234 | * it is given to the parser |
| 235 | * and counted. |
| 236 | */ |
| 237 | goto eatstr; |
| 238 | } |
| 239 | } |
| 240 | } |
| 241 | /* |
| 242 | * Well, its just an ordinary decadent comment |
| 243 | */ |
| 244 | while ((ch != '\n') && (ch != EOFCHAR)) |
| 245 | ch = getchar(); |
| 246 | if (ch == EOFCHAR) |
| 247 | goto endoffile; |
| 248 | val = ryylval = oval = NL; |
| 249 | scanlineno++; |
| 250 | goto ret; |
| 251 | |
| 252 | case NL: |
| 253 | scanlineno++; |
| 254 | val = ryylval; |
| 255 | goto ret; |
| 256 | |
| 257 | case SP: |
| 258 | oval = SP; /*invalidate ^# meta comments*/ |
| 259 | goto loop; |
| 260 | |
| 261 | case REGOP: /* % , could be used as modulo, or register*/ |
| 262 | ch = getchar(); |
| 263 | if (INCHARSET(ch, DIGIT)){ |
| 264 | ryylval = ch-'0'; |
| 265 | if (ch=='1') { |
| 266 | if (INCHARSET( (ch = getchar()), REGDIGIT)) |
| 267 | ryylval = 10+ch-'0'; |
| 268 | else |
| 269 | ungetc(ch); |
| 270 | } |
| 271 | /* |
| 272 | * God only knows what the original author |
| 273 | * wanted this undocumented feature to |
| 274 | * do. |
| 275 | * %5++ is really r7 |
| 276 | */ |
| 277 | while(INCHARSET( (ch = getchar()), SIGN)) { |
| 278 | if (ch=='+') |
| 279 | ryylval++; |
| 280 | else |
| 281 | ryylval--; |
| 282 | } |
| 283 | ungetc(ch); |
| 284 | val = REG; |
| 285 | } else { |
| 286 | ungetc(ch); |
| 287 | val = REGOP; |
| 288 | } |
| 289 | goto ret; |
| 290 | |
| 291 | case ALPH: |
| 292 | ch1 = ch; |
| 293 | if (INCHARSET(ch, SZSPECBEGIN)){ |
| 294 | if( (ch = getchar()) == '`' || ch == '^'){ |
| 295 | ch1 |= 0100; /*convert to lower*/ |
| 296 | switch(ch1){ |
| 297 | case 'b': ryylval = 1; break; |
| 298 | case 'w': ryylval = 2; break; |
| 299 | case 'l': ryylval = 4; break; |
| 300 | default: ryylval = d124; break; |
| 301 | } |
| 302 | val = SIZESPEC; |
| 303 | goto ret; |
| 304 | } else { |
| 305 | ungetc(ch); |
| 306 | ch = ch1; /*restore first character*/ |
| 307 | } |
| 308 | } |
| 309 | rcp = yytext; |
| 310 | do { |
| 311 | if (rcp < &yytext[NCPS]) |
| 312 | *rcp++ = ch; |
| 313 | } while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT)); |
| 314 | *rcp = '\0'; |
| 315 | while (INCHARSET(ch, SPACE)) |
| 316 | ch = getchar(); |
| 317 | ungetc(ch); |
| 318 | |
| 319 | switch((op = *lookup(1))->s_tag){ |
| 320 | case 0: |
| 321 | case LABELID: |
| 322 | /* |
| 323 | * Its a name... (Labels are subsets ofname) |
| 324 | */ |
| 325 | ryylval = (int)op; |
| 326 | val = NAME; |
| 327 | break; |
| 328 | case INST0: |
| 329 | case INSTn: |
| 330 | case IJXXX: |
| 331 | opstruct.Op_popcode = ( (struct instab *)op)->i_popcode; |
| 332 | opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode; |
| 333 | val = op->s_tag; |
| 334 | break; |
| 335 | default: |
| 336 | ryylval = ( (struct instab *)op)->i_popcode; |
| 337 | val = op->s_tag; |
| 338 | break; |
| 339 | } |
| 340 | goto ret; |
| 341 | |
| 342 | case DIG: |
| 343 | /* |
| 344 | * Implement call by reference on a reg variable |
| 345 | */ |
| 346 | cp = inbufptr; |
| 347 | val = number(ch, &cp); |
| 348 | /* |
| 349 | * yylval or yybignum has been stuffed as a side |
| 350 | * effect to number(); get the global yylval |
| 351 | * into our fast local copy in case it was an INT. |
| 352 | */ |
| 353 | ryylval = yylval; |
| 354 | inbufptr = cp; |
| 355 | goto ret; |
| 356 | |
| 357 | case LSH: |
| 358 | case RSH: |
| 359 | /* |
| 360 | * We allow the C style operators |
| 361 | * << and >>, as well as < and > |
| 362 | */ |
| 363 | if ( (ch1 = getchar()) != ch) |
| 364 | ungetc(ch1); |
| 365 | val = ryylval; |
| 366 | goto ret; |
| 367 | |
| 368 | case MINUS: |
| 369 | if ( (ch = getchar()) =='(') |
| 370 | ryylval=val=MP; |
| 371 | else { |
| 372 | ungetc(ch); |
| 373 | val=MINUS; |
| 374 | } |
| 375 | goto ret; |
| 376 | |
| 377 | case SQ: |
| 378 | if ((ryylval = getchar()) == '\n') |
| 379 | scanlineno++; /*not entirely correct*/ |
| 380 | val = INT; |
| 381 | goto ret; |
| 382 | |
| 383 | case DQ: |
| 384 | eatstr: |
| 385 | linescrossed = 0; |
| 386 | maxstrlg = (char *)bufub - (char *)bufptr; |
| 387 | |
| 388 | if (maxstrlg < MAXSTRLG) { |
| 389 | ungetc('"'); |
| 390 | *(bytetoktype *)bufptr = VOID ; |
| 391 | bufub = bufptr; |
| 392 | goto done; |
| 393 | } |
| 394 | if (maxstrlg > MAXSTRLG) |
| 395 | maxstrlg = MAXSTRLG; |
| 396 | |
| 397 | ptoken(bufptr, STRING); |
| 398 | lgbackpatch = bufptr; /*this is where the size goes*/ |
| 399 | bufptr += sizeof(lgtype); |
| 400 | /* |
| 401 | * bufptr is now set to |
| 402 | * be stuffed with characters from |
| 403 | * the input |
| 404 | */ |
| 405 | |
| 406 | while ( (maxstrlg > 0) |
| 407 | && !(INCHARSET( (ch = getchar()), STRESCAPE)) |
| 408 | ){ |
| 409 | stuff: |
| 410 | maxstrlg -= 1; |
| 411 | pchar(bufptr, ch); |
| 412 | } |
| 413 | if (maxstrlg <= 0){ /*enough characters to fill a string buffer*/ |
| 414 | ungetc('"'); /*will read it next*/ |
| 415 | } |
| 416 | else if (ch == '"') |
| 417 | /*VOID*/ ; /*done*/ |
| 418 | else if (ch == '\n'){ |
| 419 | yywarning("New line embedded in a string constant."); |
| 420 | scanlineno++; |
| 421 | linescrossed++; |
| 422 | ch = getchar(); |
| 423 | if (ch == EOFCHAR){ |
| 424 | do_eof: |
| 425 | pchar(bufptr, '\n'); |
| 426 | ungetc(EOFCHAR); |
| 427 | } else |
| 428 | if (ch == NEEDCHAR){ |
| 429 | if ( (inbufptr = fillinbuffer()) == 0) |
| 430 | goto do_eof; |
| 431 | ch = '\n'; |
| 432 | goto stuff; |
| 433 | } else { /* simple case */ |
| 434 | ungetc(ch); |
| 435 | ch = '\n'; |
| 436 | goto stuff; |
| 437 | } |
| 438 | } else { |
| 439 | ch = getchar(); /*skip the '\\'*/ |
| 440 | if ( INCHARSET(ch, BSESCAPE)){ |
| 441 | switch (ch){ |
| 442 | case 'b': ch = '\b'; goto stuff; |
| 443 | case 'f': ch = '\f'; goto stuff; |
| 444 | case 'n': ch = '\n'; goto stuff; |
| 445 | case 'r': ch = '\r'; goto stuff; |
| 446 | case 't': ch = '\t'; goto stuff; |
| 447 | } |
| 448 | } |
| 449 | if ( !(INCHARSET(ch,OCTDIGIT)) ) goto stuff; |
| 450 | i = 0; |
| 451 | intval = 0; |
| 452 | while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){ |
| 453 | i++;intval <<= 3;intval += ch - '0'; |
| 454 | ch = getchar(); |
| 455 | } |
| 456 | ungetc(ch); |
| 457 | ch = (char)intval; |
| 458 | goto stuff; |
| 459 | } |
| 460 | /* |
| 461 | * bufptr now points at the next free slot |
| 462 | */ |
| 463 | bstrfromto(lgbackpatch, bufptr); |
| 464 | if (linescrossed){ |
| 465 | val = ILINESKIP; |
| 466 | ryylval = linescrossed; |
| 467 | goto ret; |
| 468 | } else |
| 469 | goto builtval; |
| 470 | |
| 471 | case BADCHAR: |
| 472 | linescrossed = lineno; |
| 473 | lineno = scanlineno; |
| 474 | yyerror("Illegal character mapped: %d, char read:(octal) %o", |
| 475 | ryylval, ch); |
| 476 | lineno = linescrossed; |
| 477 | val = BADCHAR; |
| 478 | goto ret; |
| 479 | |
| 480 | default: |
| 481 | val = ryylval; |
| 482 | goto ret; |
| 483 | } /*end of the switch*/ |
| 484 | /* |
| 485 | * here with one token, so stuff it |
| 486 | */ |
| 487 | ret: |
| 488 | oval = val; |
| 489 | ptoken(bufptr, val); |
| 490 | switch(val){ |
| 491 | case ILINESKIP: |
| 492 | pint(bufptr, ryylval); |
| 493 | break; |
| 494 | case SIZESPEC: |
| 495 | pchar(bufptr, ryylval); |
| 496 | break; |
| 497 | case BFINT: plong(bufptr, ryylval); |
| 498 | break; |
| 499 | case INT: plong(bufptr, ryylval); |
| 500 | break; |
| 501 | case BIGNUM: pnumber(bufptr, yybignum); |
| 502 | break; |
| 503 | case NAME: pptr(bufptr, (int)(struct symtab *)ryylval); |
| 504 | break; |
| 505 | case REG: pchar(bufptr, ryylval); |
| 506 | break; |
| 507 | case INST0: |
| 508 | case INSTn: |
| 509 | popcode(bufptr, opstruct); |
| 510 | break; |
| 511 | case IJXXX: |
| 512 | popcode(bufptr, opstruct); |
| 513 | pptr(bufptr, (int)(struct symtab *)symalloc()); |
| 514 | break; |
| 515 | case ISTAB: |
| 516 | case ISTABSTR: |
| 517 | case ISTABNONE: |
| 518 | case ISTABDOT: |
| 519 | case IALIGN: |
| 520 | pptr(bufptr, (int)(struct symtab *)symalloc()); |
| 521 | break; |
| 522 | /* |
| 523 | * default: |
| 524 | */ |
| 525 | } |
| 526 | builtval: ; |
| 527 | } /*end of the while to stuff the buffer*/ |
| 528 | done: |
| 529 | bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]); |
| 530 | |
| 531 | /* |
| 532 | * This is a real kludge: |
| 533 | * |
| 534 | * We put the last token in the buffer to be a MINUS |
| 535 | * symbol. This last token will never be picked up |
| 536 | * in the normal way, but can be looked at during |
| 537 | * a peekahead look that the short circuit expression |
| 538 | * evaluator uses to see if an expression is complicated. |
| 539 | * |
| 540 | * Consider the following situation: |
| 541 | * |
| 542 | * .word 45 + 47 |
| 543 | * buffer 1 | buffer 0 |
| 544 | * the peekahead would want to look across the buffer, |
| 545 | * but will look in the buffer end zone, see the minus, and |
| 546 | * fail. |
| 547 | */ |
| 548 | ptoken(bufptr, MINUS); |
| 549 | InBufPtr = inbufptr; /*copy this back*/ |
| 550 | } |