From b1cc4d5abe1a19767593927cfa947c248789b1cd Mon Sep 17 00:00:00 2001 From: Bill Joy Date: Sat, 3 May 1980 02:54:24 -0800 Subject: [PATCH] BSD 4 development Work on file usr/src/cmd/cpp/README Work on file usr/src/cmd/cpp/cpy.y Work on file usr/src/cmd/cpp/yylex.c Synthesized-from: CSRG//cd1/4.0 --- usr/src/cmd/cpp/README | 153 ++++++++++++++++++++++++++++++++++++++++ usr/src/cmd/cpp/cpy.y | 80 +++++++++++++++++++++ usr/src/cmd/cpp/yylex.c | 86 ++++++++++++++++++++++ 3 files changed, 319 insertions(+) create mode 100644 usr/src/cmd/cpp/README create mode 100644 usr/src/cmd/cpp/cpy.y create mode 100644 usr/src/cmd/cpp/yylex.c diff --git a/usr/src/cmd/cpp/README b/usr/src/cmd/cpp/README new file mode 100644 index 0000000000..fcff9bbc93 --- /dev/null +++ b/usr/src/cmd/cpp/README @@ -0,0 +1,153 @@ +August 25, 1978 + +Files in this directory form the C preprocessor, which handles '#include' +files and macro definition and expansion for the C compiler. +This new version was written by John F. Reiser and is from 5 to 12 +times faster (on UNIX systems) than the old. + +To create the executable file 'cpp' in the current directory: + make + +To install the preprocessor 'cpp' so it will be used by the C compiler: + : safety first: backup the existing version + cp /lib/cpp /lib/ocpp + : install the new version + cp cpp /lib/cpp + +Invocation + cpp [-CEPR] [-Dname] ... [-Dname=def] ... [-Idirectory] ... + [-Uname] ... [] [] + + If there are two non-flag arguments then the first is the name of the + input file and the second is the name of the output file. If there is + one non-flag argument then it is the name of the input file and the + output is written on the standard output. If there are no non-flag + arguments then the input is taken from the standard input and the output + is written on the standard output. Flag arguments are: + + -C retain comments in output + -Dname define name as "1" + -Dname=def define name as def + -E ignored + -Idirectory add directory to search list for #include files + -P don't insert lines "# 12 \"foo.c\"" into output + -R allow recursive macros + -Uname undefine name + +Documentation clarifications: + Symbols defined on the command line by "-Dfoo" are defined as "1", + i.e., as if they had been defined by "#define foo 1" or "-Dfoo=1". + The directory search order for #include files is + 1) the directory of the file which contains the #include request + (e.g. #include is relative to the file being scanned when + the request is made) + 2) the directories specified by -I, in left-to-right order + 3) the standard directory(s) (which for UNIX is /usr/include) + An unescaped linefeed (the single character "\n") terminates a + character constant or quoted string. + An escaped linefeed (the two-character sequence "\\\n") may be + used in the body of a '#define' statement to continue + the definition onto the next line. The escaped linefeed is + not included in the macro body. + Comments are uniformly removed (except if the argument -C is specified). + They are also ignored, except that a comment terminates a token. + Thus "foo/* la di da */bar" may expand 'foo' and 'bar' but + will never expand 'foobar'. If neither 'foo' nor 'bar' is a + macro then the output is "foobar", even if 'foobar' + is defined as something else. The file + #define foo(a,b)b/**/a + foo(1,2) + produces "21" because the comment causes a break which enables + the recognition of 'b' and 'a' as formals in the string "b/**/a". + Macro formal parameters are recognized in '#define' bodies even inside + character constants and quoted strings. The output from + #define foo(a) '\a' + foo(bar) + is the seven characters " '\\bar'". Macro names are not recognized + inside character constants or quoted strings during the regular scan. + Thus + #define foo bar + printf("foo"); + does not expand 'foo' in the second line, because it is inside + a quoted string which is not part of a '#define' macro definition. + Macros are not expanded while processing a '#define' or '#undef'. + Thus + #define foo bletch + #define bar foo + #undef foo + bar + produces "foo". The token appearing immediately after a + '#ifdef' or '#ifndef' is not expanded (of course!). + Macros are not expanded during the scan which determines the actual + parameters to another macro call. Thus + #define foo(a,b)b a + #define bar hi + foo(bar, + #define bar bye + ) + produces " bye" (and warns about the redefinition of 'bar'). + +There are some differences between the new and the old preprocessor. +Bugs fixed: + "1.e4" is recognized as a floating-point number, rather than as an + opportunity to expand the possible macro name "e4". + Any kind and amount of white space (space, tab, linefeed, vertical tab, + formfeed, carriage return) is allowed between a macro name and + the left parenthesis which introduces its actual parameters. + The comma operator is legal in preprocessor '#if' statements. + Macros with parameters are legal in preprocessor '#if' statements. + Single-character character constants are legal in preprocessor '#if' statements. + Linefeeds are put out in the proper place when a multiline comment + is not passed through to the output. + The following example expands to "# # #" : + #define foo # + foo foo foo + If the -R flag is not specified then the invocation of some recursive + macros is trapped and the recursion forcibly terminated with an + error message. The recursions that are trapped are the ones + in which the nesting level is non-decreasing from some point on. + In particular, + #define a a + a + will be detected. (Use "#undef a" if that is what you want.) + The recursion + #define a c b + #define b c a + #define c foo + a + will not be detected because the nesting level decreases after + each expansion of "c". + The -R flag specifically allows recursive macros and recursion will + be strictly obeyed (to the extent that space is available). + Assuming that -R is specified: + #define a a + a + causes an infinite loop with very little output. The tail recursion + #define a a + a + causes the string "<>" to be output infinitely many times. The + non-tail recursion + #define a b> + #define b a< + a + complains "too much pushback", dumps the pushback, and continues + (again, infinitely). + +Stylistic choice: + Nothing (not even linefeeds) is output while a false '#if', '#ifdef', + or '#ifndef' is in effect. Thus when all conditions become true + a line of the form "# 12345 \"foo.c\"" is output (unless -P). + Error and warning messages always appear on standard error (file + descriptor 2). + Mismatch between the number of formals and actuals in a macro call + produces only a warning, and not an error. Excess actuals + are ignored; missing actuals are turned into null strings. + +Incompatibility: + The virgule '/' in "a=/*b" is interpreted as the first character of + the pair "/*" which introduces a comment, rather than as the + second character of the divide-and-replace operator "=/". + This incompatibility reflects the recent change in the C language + which made "a/=*b" the legal way to write such a statement + if the meaning "a=a/ *b" is intended. diff --git a/usr/src/cmd/cpp/cpy.y b/usr/src/cmd/cpp/cpy.y new file mode 100644 index 0000000000..a93b80fa1a --- /dev/null +++ b/usr/src/cmd/cpp/cpy.y @@ -0,0 +1,80 @@ +%term number stop DEFINED +%term EQ NE LE GE LS RS +%term ANDAND OROR +%left ',' +%right '=' +%right '?' ':' +%left OROR +%left ANDAND +%left '|' '^' +%left '&' +%binary EQ NE +%binary '<' '>' LE GE +%left LS RS +%left '+' '-' +%left '*' '/' '%' +%right '!' '~' UMINUS +%left '(' '.' +%% +S: e stop ={return($1);} + + +e: e '*' e + ={$$ = $1 * $3;} + | e '/' e + ={$$ = $1 / $3;} + | e '%' e + ={$$ = $1 % $3;} + | e '+' e + ={$$ = $1 + $3;} + | e '-' e + ={$$ = $1 - $3;} + | e LS e + ={$$ = $1 << $3;} + | e RS e + ={$$ = $1 >> $3;} + | e '<' e + ={$$ = $1 < $3;} + | e '>' e + ={$$ = $1 > $3;} + | e LE e + ={$$ = $1 <= $3;} + | e GE e + ={$$ = $1 >= $3;} + | e EQ e + ={$$ = $1 == $3;} + | e NE e + ={$$ = $1 != $3;} + | e '&' e + ={$$ = $1 & $3;} + | e '^' e + ={$$ = $1 ^ $3;} + | e '|' e + ={$$ = $1 | $3;} + | e ANDAND e + ={$$ = $1 && $3;} + | e OROR e + ={$$ = $1 || $3;} + | e '?' e ':' e + ={$$ = $1 ? $3 : $5;} + | e ',' e + ={$$ = $3;} + | term + ={$$ = $1;} +term: + '-' term %prec UMINUS + ={$$ = -$1;} + | '!' term + ={$$ = !$2;} + | '~' term + ={$$ = ~$2;} + | '(' e ')' + ={$$ = $2;} + | DEFINED '(' number ')' + ={$$= $3;} + | DEFINED number + ={$$ = $2;} + | number + ={$$= $1;} +%% +# include "yylex.c" diff --git a/usr/src/cmd/cpp/yylex.c b/usr/src/cmd/cpp/yylex.c new file mode 100644 index 0000000000..7a6ba83c47 --- /dev/null +++ b/usr/src/cmd/cpp/yylex.c @@ -0,0 +1,86 @@ +#define isid(a) ((fastab+COFF)[a]&IB) +#define IB 1 +/* #if '\377' < 0 it would be nice if this worked properly!!!!! */ +#if pdp11 | vax +#define COFF 128 +#else +#define COFF 0 +#endif + +yylex() { + static int ifdef=0; + static char *op2[]={"||", "&&" , ">>", "<<", ">=", "<=", "!=", "=="}; + static int val2[]={OROR, ANDAND, RS, LS, GE, LE, NE, EQ}; + static char *opc="b\bt\tn\nf\fr\r\\\\"; + extern char fastab[]; + extern char *outp,*inp,*newp; extern int flslvl; + register char savc, *s; char *skipbl(); int val; + register char **p2; + struct symtab { + char *name; + char *value; + } *sp, *lookup(); + +for (;;) { + newp=skipbl(newp); + if (*inp=='\n') return(stop); /* end of #if */ + savc= *newp; *newp='\0'; + for (p2=op2+8; --p2>=op2; ) /* check 2-char ops */ + if (0==strcmp(*p2,inp)) {val=val2[p2-op2]; goto ret;} + s="+-*/%<>&^|?:!~(),"; /* check 1-char ops */ + while (*s) if (*s++== *inp) {val= *--s; goto ret;} + if (*inp<='9' && *inp>='0') {/* a number */ + if (*inp=='0') yylval= (inp[1]=='x' || inp[1]=='X') ? + tobinary(inp+2,16) : tobinary(inp+1,8); + else yylval=tobinary(inp,10); + val=number; + } else if (isid(*inp)) { + if (0==strcmp(inp,"defined")) {ifdef=1; ++flslvl; val=DEFINED;} + else { + sp=lookup(inp,-1); if (ifdef!=0) {ifdef=0; --flslvl;} + yylval= (sp->value==0) ? 0 : 1; + val=number; + } + } else if (*inp=='\'') {/* character constant */ + val=number; + if (inp[1]=='\\') {/* escaped */ + char c; if (newp[-1]=='\'') newp[-1]='\0'; + s=opc; + while (*s) if (*s++!=inp[2]) ++s; else {yylval= *s; goto ret;} + if (inp[2]<='9' && inp[2]>='0') yylval=c=tobinary(inp+2,8); + else yylval=inp[2]; + } else yylval=inp[1]; + } else if (0==strcmp("\\\n",inp)) {*newp=savc; continue;} + else { + *newp=savc; pperror("Illegal character %c in preprocessor if", *inp); + continue; + } +ret: + *newp=savc; outp=inp=newp; return(val); +} +} + +tobinary(st, b) char *st; { + int n, c, t; + char *s; + n=0; + s=st; + while (c = *s++) { + switch(c) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + t = c-'0'; break; + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + t = c-'a'; if (b>10) break; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + t = c - 'A'; if (b>10) break; + default: + t = -1; + if ( c=='l' || c=='L') if (*s=='\0') break; + pperror("Illegal number %s", st); + } + if (t<0) break; + n = n*b+t; + } +return(n); +} -- 2.20.1