usr/src/old/as.vax/asscan2.c

/*
 *      Copyright (c) 1982 Regents of the University of California
 */
#ifndef lint
static char sccsid[] = "@(#)asscan2.c 4.6 %G%";
#endif not lint

#include "asscanl.h"
static  inttoktype      oval = NL;

#ifdef BUFSIZ
#undef BUFSIZ
#endif

#define BUFSIZ 4096

#define NINBUFFERS      2
#define INBUFLG         NINBUFFERS*BUFSIZ + 2
        /*
         *      We have two input buffers; the first one is reserved
         *      for catching the tail of a line split across a buffer
         *      boundary; the other one are used for snarfing a buffer
         *      worth of .s source.
         */
static  char    inbuffer[INBUFLG];
static  char    *InBufPtr = 0;

/*
 *      fill the inbuffer from the standard input.
 *      Assert: there are always n COMPLETE! lines in the buffer area.
 *      Assert: there is always a \n terminating the last line
 *              in the buffer area.
 *      Assert: after the \n, there is an EOFCHAR (hard end of file)
 *              or a NEEDCHAR (end of buffer)
 *      Assert: fgets always null pads the string it reads.
 *      Assert: no ungetc's are done at the end of a line or at the
 *              beginning of a line.
 *
 *      We read a complete buffer of characters in one single read.
 *      We then back scan within this buffer to find the end of the
 *      last complete line, and force the assertions, and save a pointer
 *      to the incomplete line.
 *      The next call to fillinbuffer will move the unread characters
 *      to the end of the first buffer, and then read another two buffers,
 *      completing the cycle.
 */

static  char    p_swapped = '\0';
static  char    *p_start = &inbuffer[NINBUFFERS * BUFSIZ];
static  char    *p_stop = &inbuffer[NINBUFFERS * BUFSIZ];

char *fillinbuffer()
{
        register        char    *to;
        register        char    *from;
                        char    *inbufptr;
        int             nread;
        static          int     hadeof;
        int             goal;
        int             got;

        *p_start = p_swapped;
        inbufptr = &inbuffer[1*BUFSIZ] - (p_stop - p_start);

        for (to = inbufptr, from = p_start; from < p_stop;)
                *to++ = *from++;
        /*
         *      Now, go read two full buffers (hopefully)
         */
        if (hadeof){
                hadeof = 0;
                return (0);
        }
        goal = (NINBUFFERS - 1)*BUFSIZ;
        nread = 0;
        do {
                got = read(stdin->_file, &inbuffer[1*BUFSIZ + nread], goal);
                if (got == 0)
                        hadeof = 1;
                if (got <= 0)
                        break;
                nread += got;
                goal -= got;
        } while (goal);

        if (nread == 0)
                return(0);
        p_stop = from = &inbuffer[1*BUFSIZ + nread];
        *from = '\0';

        while (*--from != '\n'){
                /*
                 *      back over the partial line
                 */
                if (from == &inbuffer[1*BUFSIZ]) {
                        from = p_stop;
                        *p_stop++ = '\n';
                        break;
                } else {
                        continue;
                }
        }

        from++;                         /* first char of partial line */
        p_start = from;
        p_swapped = *p_start;
        *p_start = NEEDCHAR;            /* force assertion */
        return(inbufptr);
}

scan_dot_s(bufferbox)
        struct tokbufdesc *bufferbox;
{
        reg     int     ryylval;        /* local copy of lexical value */
        extern  int     yylval;         /* global copy of lexical value */
        reg     int     val;            /* the value returned */
                int     i;              /* simple counter */
        reg     char    *rcp;
                char    *cp;            /* can have address taken */
        reg     int     ch;             /* treated as a character */
                int     ch1;            /* shadow value */
        reg     char    *inbufptr;
                struct  symtab  *op;

        reg     ptrall  bufptr;         /* where to stuff tokens */
                ptrall  lgbackpatch;    /* where to stuff a string length */
                ptrall  bufub;          /* where not to stuff tokens */
                int     maxstrlg;       /* how long a string can be */
                long    intval;         /* value of int */
                int     linescrossed;   /* when doing strings and comments */
                struct  Opcode          opstruct;

        (bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);
        (bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);

        inbufptr = InBufPtr;
        if (inbufptr == 0){
                inbufptr = fillinbuffer();
                if (inbufptr == 0){     /*end of file*/
                  endoffile:
                        inbufptr = 0;
                        ptoken(bufptr, PARSEEOF);
                        goto done;
                }
        }

        if (newfflag){
                ptoken(bufptr, IFILE);
                ptoken(bufptr, STRING);
                val = strlen(newfname) + 1;
                movestr( (char *)&( ( (lgtype *)bufptr)[1]), newfname, val);
                bstrlg(bufptr, val);

                ptoken(bufptr, ILINENO);
                ptoken(bufptr, INT);
                pint(bufptr,  1);
                newfflag = 0;
        }

        while (bufptr < bufub){
   loop:
        switch(ryylval = (type+2)[ch = getchar()]) {
        case SCANEOF:
                inbufptr = 0;
                goto endoffile;

        case NEEDSBUF:
                inbufptr = fillinbuffer();
                if (inbufptr == 0)
                        goto endoffile;
                goto loop;

        case DIV:               /*process C style comments*/
                if ( (ch = getchar()) == '*') {  /*comment prelude*/
                        int     incomment;
                        linescrossed = 0;
                        incomment = 1;
                        ch = getchar(); /*skip over the * */
                        while(incomment){
                                switch(ch){
                                case '*':
                                        ch = getchar();
                                        incomment = (ch != '/');
                                        break;
                                case '\n':
                                        scanlineno++;
                                        linescrossed++;
                                        ch = getchar();
                                        break;
                                case EOFCHAR:
                                        goto endoffile;
                                case NEEDCHAR:
                                        inbufptr = fillinbuffer();
                                        if (inbufptr == 0)
                                                goto endoffile;
                                        lineno++;
                                        ch = getchar();
                                        break;
                                default:
                                        ch = getchar();
                                        break;
                                }
                        }
                        val = ILINESKIP;
                        ryylval = linescrossed;
                        goto ret;
                } else {        /*just an ordinary DIV*/
                        ungetc(ch);
                        val = ryylval = DIV;
                        goto ret;
                }
        case SH:
                if (oval == NL){
                        /*
                         *      Attempt to recognize a C preprocessor
                         *      style comment '^#[ \t]*[0-9]*[ \t]*".*"
                         */
                        ch = getchar(); /*bump the #*/
                        while (INCHARSET(ch, SPACE))
                                ch = getchar();/*bump white */
                        if (INCHARSET(ch, DIGIT)){
                                intval = 0;
                                while(INCHARSET(ch, DIGIT)){
                                        intval = intval*10 + ch - '0';
                                        ch = getchar();
                                }
                                while (INCHARSET(ch, SPACE))
                                        ch = getchar();
                                if (ch == '"'){
                                        ptoken(bufptr, ILINENO);
                                        ptoken(bufptr, INT);
                                        pint(bufptr, intval - 1);
                                        ptoken(bufptr, IFILE);
                                        /*
                                         *      The '"' has already been
                                         *      munched
                                         *
                                         *      eatstr will not eat
                                         *      the trailing \n, so
                                         *      it is given to the parser
                                         *      and counted.
                                         */
                                        goto eatstr;
                                }
                        }
                }
                /*
                 *      Well, its just an ordinary decadent comment
                 */
                while ((ch != '\n') && (ch != EOFCHAR))
                        ch = getchar();
                if (ch == EOFCHAR)
                        goto endoffile;
                val = ryylval = oval = NL;
                scanlineno++;
                goto ret;

        case NL:
                scanlineno++;
                val = ryylval;
                goto ret;

        case SP:
                oval = SP;      /*invalidate ^# meta comments*/
                goto loop;

        case REGOP:             /* % , could be used as modulo, or register*/
                ch = getchar();
                if (INCHARSET(ch, DIGIT)){
                        ryylval = ch-'0';
                        if (ch=='1') {
                                if (INCHARSET( (ch = getchar()), REGDIGIT))
                                        ryylval = 10+ch-'0';
                                else
                                        ungetc(ch);
                        }
                        /*
                         *      God only knows what the original author
                         *      wanted this undocumented feature to
                         *      do.
                         *              %5++ is really  r7
                         */
                        while(INCHARSET( (ch = getchar()), SIGN)) {
                                if (ch=='+')
                                        ryylval++;
                                else
                                        ryylval--;
                        }
                        ungetc(ch);
                        val = REG;
                } else {
                        ungetc(ch);
                        val = REGOP;
                }
                goto ret;

        case ALPH:
                ch1 = ch;
                if (INCHARSET(ch, SZSPECBEGIN)){
                        if( (ch = getchar()) == '`' || ch == '^'){
                                ch1 |= 0100;    /*convert to lower*/
                                switch(ch1){
                                case 'b':       ryylval = 1;    break;
                                case 'w':       ryylval = 2;    break;
                                case 'l':       ryylval = 4;    break;
                                default:        ryylval = d124; break;
                                }
                                val = SIZESPEC;
                                goto ret;
                        } else {
                                ungetc(ch);
                                ch = ch1;       /*restore first character*/
                        }
                }
                rcp = yytext;
                do {
                        if (rcp < &yytext[NCPS])
                                *rcp++ = ch;
                } while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
                *rcp = '\0';
                while (INCHARSET(ch, SPACE))
                        ch = getchar();
                ungetc(ch);

                switch((op = *lookup(1))->s_tag){
                case 0:
                case LABELID:
                        /*
                         *      Its a name... (Labels are subsets ofname)
                         */
                        ryylval = (int)op;
                        val = NAME;
                        break;
                case INST0:
                case INSTn:
                case IJXXX:
                        opstruct.Op_popcode = ( (struct instab *)op)->i_popcode;
                        opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode;
                        val = op->s_tag;
                        break;
                default:
                        ryylval = ( (struct instab *)op)->i_popcode;
                        val = op->s_tag;
                        break;
                }
                goto ret;

        case DIG:
                /*
                 *      Implement call by reference on a reg variable
                 */
                cp = inbufptr;
                val = number(ch, &cp);
                /*
                 *      yylval or yybignum has been stuffed as a side
                 *      effect to number(); get the global yylval
                 *      into our fast local copy in case it was an INT.
                 */
                ryylval = yylval;
                inbufptr = cp;
                goto ret;

        case LSH:
        case RSH:
                /*
                 *      We allow the C style operators
                 *      << and >>, as well as < and >
                 */
                if ( (ch1 = getchar()) != ch)
                        ungetc(ch1);
                val = ryylval;
                goto ret;

        case MINUS:
                if ( (ch = getchar()) =='(')
                        ryylval=val=MP;
                else {
                        ungetc(ch);
                        val=MINUS;
                }
                goto ret;

        case SQ:
                if ((ryylval = getchar()) == '\n')
                        scanlineno++;           /*not entirely correct*/
                val = INT;
                goto ret;

        case DQ:
           eatstr:
                linescrossed = 0;
                maxstrlg = (char *)bufub - (char *)bufptr;

                if (maxstrlg < MAXSTRLG) {
                        ungetc('"');
                        *(bytetoktype *)bufptr = VOID ;
                        bufub = bufptr;
                        goto done;
                }
                if (maxstrlg > MAXSTRLG)
                        maxstrlg = MAXSTRLG;

                ptoken(bufptr, STRING);
                lgbackpatch = bufptr;   /*this is where the size goes*/
                bufptr += sizeof(lgtype);
                /*
                 *      bufptr is now set to
                 *      be stuffed with characters from
                 *      the input
                 */

                while (   (maxstrlg > 0)
                       && !(INCHARSET( (ch = getchar()), STRESCAPE))
                      ){
                        stuff:
                                maxstrlg -= 1;
                                pchar(bufptr, ch);
                        }
                if (maxstrlg <= 0){     /*enough characters to fill a string buffer*/
                        ungetc('"');            /*will read it next*/
                }
                else if (ch == '"')
                        /*VOID*/ ;              /*done*/
                else if (ch == '\n'){
                        yywarning("New line embedded in a string constant.");
                        scanlineno++;
                        linescrossed++;
                        ch = getchar();
                        if (ch == EOFCHAR){
                          do_eof:
                                pchar(bufptr, '\n');
                                ungetc(EOFCHAR);
                        } else
                        if (ch == NEEDCHAR){
                                if ( (inbufptr = fillinbuffer()) == 0)
                                        goto do_eof;
                                ch = '\n';
                                goto stuff;
                        } else {        /* simple case */
                                ungetc(ch);
                                ch = '\n';
                                goto stuff;
                        }
                } else {
                        ch = getchar();         /*skip the '\\'*/
                        if ( INCHARSET(ch, BSESCAPE)){
                                switch (ch){
                                  case 'b':  ch = '\b'; goto stuff;
                                  case 'f':  ch = '\f'; goto stuff;
                                  case 'n':  ch = '\n'; goto stuff;
                                  case 'r':  ch = '\r'; goto stuff;
                                  case 't':  ch = '\t'; goto stuff;
                                }
                        }
                        if ( !(INCHARSET(ch,OCTDIGIT)) )  goto stuff;
                        i = 0;
                        intval = 0;
                        while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
                                i++;intval <<= 3;intval += ch - '0';
                                ch = getchar();
                        }
                        ungetc(ch);
                        ch = (char)intval;
                        goto stuff;
                }
                /*
                 *      bufptr now points at the next free slot
                 */
                bstrfromto(lgbackpatch, bufptr);
                if (linescrossed){
                        val = ILINESKIP;
                        ryylval = linescrossed;
                        goto ret;
                } else
                        goto builtval;

        case BADCHAR:
                linescrossed = lineno;
                lineno = scanlineno;
                yyerror("Illegal character mapped: %d, char read:(octal) %o",
                        ryylval, ch);
                lineno = linescrossed;
                val = BADCHAR;
                goto ret;

        default:
                val = ryylval;
                goto ret;
        }       /*end of the switch*/
        /*
         *      here with one token, so stuff it
         */
   ret:
        oval = val;
        ptoken(bufptr, val);
        switch(val){
                case    ILINESKIP:
                                pint(bufptr, ryylval);
                                break;
                case    SIZESPEC:
                                pchar(bufptr, ryylval);
                                break;
                case    BFINT:  plong(bufptr, ryylval);
                                break;
                case    INT:    plong(bufptr, ryylval);
                                break;
                case    BIGNUM: pnumber(bufptr, yybignum);
                                break;
                case    NAME:   pptr(bufptr, (int)(struct symtab *)ryylval);
                                break;
                case    REG:    pchar(bufptr, ryylval);
                                break;
                case    INST0:
                case    INSTn:
                                popcode(bufptr, opstruct);
                                break;
                case    IJXXX:
                                popcode(bufptr, opstruct);
                                pptr(bufptr, (int)(struct symtab *)symalloc());
                                break;
                case    ISTAB:
                case    ISTABSTR:
                case    ISTABNONE:
                case    ISTABDOT:
                case    IALIGN:
                                pptr(bufptr, (int)(struct symtab *)symalloc());
                                break;
        /*
         *      default:
         */
         }
         builtval: ;
   }                    /*end of the while to stuff the buffer*/
   done:
        bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);

        /*
         *      This is a real kludge:
         *
         *      We put the last token in the buffer to be  a MINUS
         *      symbol.  This last token will never be picked up
         *      in the normal way, but can be looked at during
         *      a peekahead look that the short circuit expression
         *      evaluator uses to see if an expression is complicated.
         *
         *      Consider the following situation:
         *
         *      .word   45              +       47
         *        buffer 1      |  buffer 0
         *      the peekahead would want to look across the buffer,
         *      but will look in the buffer end zone, see the minus, and
         *      fail.
         */
        ptoken(bufptr, MINUS);
        InBufPtr = inbufptr;            /*copy this back*/
}