386BSD 0.1 development
[unix-history] / usr / src / usr.bin / lex / scan.l
CommitLineData
8bf101bc
WJ
1/*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Vern Paxson of Lawrence Berkeley Laboratory.
7 *
8 * The United States Government has rights in this work pursuant
9 * to contract no. DE-AC03-76SF00098 between the United States
10 * Department of Energy and the University of California.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)scan.l 5.3 (Berkeley) 4/12/91
41 */
42
43/* scan.l - scanner for flex input */
44
45%{
46#undef yywrap
47
48#include "flexdef.h"
49#include "parse.h"
50
51#ifndef lint
52static char copyright[] =
53 "@(#) Copyright (c) 1989 The Regents of the University of California.\n";
54static char CR_continuation[] = "@(#) All rights reserved.\n";
55
56static char rcsid[] =
57 "@(#) $Header: /usr/fsys/odin/a/vern/flex/RCS/scan.l,v 2.8 90/05/26 16:53:23 vern Exp $ (LBL)";
58#endif
59
60#define ACTION_ECHO fprintf( temp_action_file, "%s", yytext )
61#define MARK_END_OF_PROLOG fprintf( temp_action_file, "%%%% end of prolog\n" );
62
63#undef YY_DECL
64#define YY_DECL \
65 int flexscan()
66
67#define RETURNCHAR \
68 yylval = yytext[0]; \
69 return ( CHAR );
70
71#define RETURNNAME \
72 (void) strcpy( nmstr, (char *) yytext ); \
73 return ( NAME );
74
75#define PUT_BACK_STRING(str, start) \
76 for ( i = strlen( (char *) (str) ) - 1; i >= start; --i ) \
77 unput((str)[i])
78
79#define CHECK_REJECT(str) \
80 if ( all_upper( str ) ) \
81 reject = true;
82
83#define CHECK_YYMORE(str) \
84 if ( all_lower( str ) ) \
85 yymore_used = true;
86%}
87
88%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
89%x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT
90%x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST CODEBLOCK_2 XLATION
91
92WS [ \t\f]+
93OPTWS [ \t\f]*
94NOT_WS [^ \t\f\n]
95
96NAME [a-z_][a-z_0-9-]*
97NOT_NAME [^a-z_\n]+
98
99SCNAME {NAME}
100
101ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2})
102
103%%
104 static int bracelevel, didadef;
105 int i, indented_code, checking_used, new_xlation;
106 int doing_codeblock = false;
107 Char nmdef[MAXLINE], myesc();
108
109^{WS} indented_code = true; BEGIN(CODEBLOCK);
110^#.*\n ++linenum; /* treat as a comment */
111^"/*" ECHO; BEGIN(C_COMMENT);
112^"%s"{NAME}? return ( SCDECL );
113^"%x"{NAME}? return ( XSCDECL );
114^"%{".*\n {
115 ++linenum;
116 line_directive_out( stdout );
117 indented_code = false;
118 BEGIN(CODEBLOCK);
119 }
120
121{WS} return ( WHITESPACE );
122
123^"%%".* {
124 sectnum = 2;
125 line_directive_out( stdout );
126 BEGIN(SECT2PROLOG);
127 return ( SECTEND );
128 }
129
130^"%used" {
131 pinpoint_message( "warning - %%used/%%unused have been deprecated" );
132 checking_used = REALLY_USED; BEGIN(USED_LIST);
133 }
134^"%unused" {
135 checking_used = REALLY_NOT_USED; BEGIN(USED_LIST);
136 pinpoint_message( "warning - %%used/%%unused have been deprecated" );
137 checking_used = REALLY_NOT_USED; BEGIN(USED_LIST);
138 }
139
140
141^"%"[aeknopt]" ".*\n {
142#ifdef NOTDEF
143 fprintf( stderr,
144 "old-style lex command at line %d ignored:\n\t%s",
145 linenum, yytext );
146#endif
147 ++linenum;
148 }
149
150^"%"[cr]{OPTWS} /* ignore old lex directive */
151
152%t{OPTWS}\n {
153 ++linenum;
154 xlation =
155 (int *) malloc( sizeof( int ) * (unsigned) csize );
156
157 if ( ! xlation )
158 flexfatal(
159 "dynamic memory failure building %t table" );
160
161 for ( i = 0; i < csize; ++i )
162 xlation[i] = 0;
163
164 num_xlations = 0;
165
166 BEGIN(XLATION);
167 }
168
169^"%"[^sxanpekotcru{}]{OPTWS} synerr( "unrecognized '%' directive" );
170
171^{NAME} {
172 (void) strcpy( nmstr, (char *) yytext );
173 didadef = false;
174 BEGIN(PICKUPDEF);
175 }
176
177{SCNAME} RETURNNAME;
178^{OPTWS}\n ++linenum; /* allows blank lines in section 1 */
179{OPTWS}\n ++linenum; return ( '\n' );
180. synerr( "illegal character" ); BEGIN(RECOVER);
181
182
183<C_COMMENT>"*/" ECHO; BEGIN(INITIAL);
184<C_COMMENT>"*/".*\n ++linenum; ECHO; BEGIN(INITIAL);
185<C_COMMENT>[^*\n]+ ECHO;
186<C_COMMENT>"*" ECHO;
187<C_COMMENT>\n ++linenum; ECHO;
188
189
190<CODEBLOCK>^"%}".*\n ++linenum; BEGIN(INITIAL);
191<CODEBLOCK>"reject" ECHO; CHECK_REJECT(yytext);
192<CODEBLOCK>"yymore" ECHO; CHECK_YYMORE(yytext);
193<CODEBLOCK>{NAME}|{NOT_NAME}|. ECHO;
194<CODEBLOCK>\n {
195 ++linenum;
196 ECHO;
197 if ( indented_code )
198 BEGIN(INITIAL);
199 }
200
201
202<PICKUPDEF>{WS} /* separates name and definition */
203
204<PICKUPDEF>{NOT_WS}.* {
205 (void) strcpy( (char *) nmdef, (char *) yytext );
206
207 for ( i = strlen( (char *) nmdef ) - 1;
208 i >= 0 &&
209 nmdef[i] == ' ' || nmdef[i] == '\t';
210 --i )
211 ;
212
213 nmdef[i + 1] = '\0';
214
215 ndinstal( nmstr, nmdef );
216 didadef = true;
217 }
218
219<PICKUPDEF>\n {
220 if ( ! didadef )
221 synerr( "incomplete name definition" );
222 BEGIN(INITIAL);
223 ++linenum;
224 }
225
226<RECOVER>.*\n ++linenum; BEGIN(INITIAL); RETURNNAME;
227
228
229<USED_LIST>\n ++linenum; BEGIN(INITIAL);
230<USED_LIST>{WS}
231<USED_LIST>"reject" {
232 if ( all_upper( yytext ) )
233 reject_really_used = checking_used;
234 else
235 synerr( "unrecognized %used/%unused construct" );
236 }
237<USED_LIST>"yymore" {
238 if ( all_lower( yytext ) )
239 yymore_really_used = checking_used;
240 else
241 synerr( "unrecognized %used/%unused construct" );
242 }
243<USED_LIST>{NOT_WS}+ synerr( "unrecognized %used/%unused construct" );
244
245
246<XLATION>"%t"{OPTWS}\n ++linenum; BEGIN(INITIAL);
247<XLATION>^{OPTWS}[0-9]+ ++num_xlations; new_xlation = true;
248<XLATION>^. synerr( "bad row in translation table" );
249<XLATION>{WS} /* ignore whitespace */
250
251<XLATION>{ESCSEQ} {
252 xlation[myesc( yytext )] =
253 (new_xlation ? num_xlations : -num_xlations);
254 new_xlation = false;
255 }
256<XLATION>. {
257 xlation[yytext[0]] =
258 (new_xlation ? num_xlations : -num_xlations);
259 new_xlation = false;
260 }
261
262<XLATION>\n ++linenum;
263
264
265<SECT2PROLOG>.*\n/{NOT_WS} {
266 ++linenum;
267 ACTION_ECHO;
268 MARK_END_OF_PROLOG;
269 BEGIN(SECT2);
270 }
271
272<SECT2PROLOG>.*\n ++linenum; ACTION_ECHO;
273
274<SECT2PROLOG><<EOF>> MARK_END_OF_PROLOG; yyterminate();
275
276<SECT2>^{OPTWS}\n ++linenum; /* allow blank lines in section 2 */
277
278<SECT2>^({WS}|"%{") {
279 indented_code = (yytext[0] != '%');
280 doing_codeblock = true;
281 bracelevel = 1;
282
283 if ( indented_code )
284 ACTION_ECHO;
285
286 BEGIN(CODEBLOCK_2);
287 }
288
289<SECT2>"<" BEGIN(SC); return ( '<' );
290<SECT2>^"^" return ( '^' );
291<SECT2>\" BEGIN(QUOTE); return ( '"' );
292<SECT2>"{"/[0-9] BEGIN(NUM); return ( '{' );
293<SECT2>"{"[^0-9\n][^}\n]* BEGIN(BRACEERROR);
294<SECT2>"$"/[ \t\n] return ( '$' );
295
296<SECT2>{WS}"%{" {
297 bracelevel = 1;
298 BEGIN(PERCENT_BRACE_ACTION);
299 return ( '\n' );
300 }
301<SECT2>{WS}"|".*\n continued_action = true; ++linenum; return ( '\n' );
302
303<SECT2>{WS} {
304 /* this rule is separate from the one below because
305 * otherwise we get variable trailing context, so
306 * we can't build the scanner using -{f,F}
307 */
308 bracelevel = 0;
309 continued_action = false;
310 BEGIN(ACTION);
311 return ( '\n' );
312 }
313
314<SECT2>{OPTWS}/\n {
315 bracelevel = 0;
316 continued_action = false;
317 BEGIN(ACTION);
318 return ( '\n' );
319 }
320
321<SECT2>^{OPTWS}\n ++linenum; return ( '\n' );
322
323<SECT2>"<<EOF>>" return ( EOF_OP );
324
325<SECT2>^"%%".* {
326 sectnum = 3;
327 BEGIN(SECT3);
328 return ( EOF ); /* to stop the parser */
329 }
330
331<SECT2>"["([^\\\]\n]|{ESCSEQ})+"]" {
332 int cclval;
333
334 (void) strcpy( nmstr, (char *) yytext );
335
336 /* check to see if we've already encountered this ccl */
337 if ( (cclval = ccllookup( (Char *) nmstr )) )
338 {
339 yylval = cclval;
340 ++cclreuse;
341 return ( PREVCCL );
342 }
343 else
344 {
345 /* we fudge a bit. We know that this ccl will
346 * soon be numbered as lastccl + 1 by cclinit
347 */
348 cclinstal( (Char *) nmstr, lastccl + 1 );
349
350 /* push back everything but the leading bracket
351 * so the ccl can be rescanned
352 */
353 PUT_BACK_STRING((Char *) nmstr, 1);
354
355 BEGIN(FIRSTCCL);
356 return ( '[' );
357 }
358 }
359
360<SECT2>"{"{NAME}"}" {
361 register Char *nmdefptr;
362 Char *ndlookup();
363
364 (void) strcpy( nmstr, (char *) yytext );
365 nmstr[yyleng - 1] = '\0'; /* chop trailing brace */
366
367 /* lookup from "nmstr + 1" to chop leading brace */
368 if ( ! (nmdefptr = ndlookup( nmstr + 1 )) )
369 synerr( "undefined {name}" );
370
371 else
372 { /* push back name surrounded by ()'s */
373 unput(')');
374 PUT_BACK_STRING(nmdefptr, 0);
375 unput('(');
376 }
377 }
378
379<SECT2>[/|*+?.()] return ( yytext[0] );
380<SECT2>. RETURNCHAR;
381<SECT2>\n ++linenum; return ( '\n' );
382
383
384<SC>"," return ( ',' );
385<SC>">" BEGIN(SECT2); return ( '>' );
386<SC>">"/"^" BEGIN(CARETISBOL); return ( '>' );
387<SC>{SCNAME} RETURNNAME;
388<SC>. synerr( "bad start condition name" );
389
390<CARETISBOL>"^" BEGIN(SECT2); return ( '^' );
391
392
393<QUOTE>[^"\n] RETURNCHAR;
394<QUOTE>\" BEGIN(SECT2); return ( '"' );
395
396<QUOTE>\n {
397 synerr( "missing quote" );
398 BEGIN(SECT2);
399 ++linenum;
400 return ( '"' );
401 }
402
403
404<FIRSTCCL>"^"/[^-\n] BEGIN(CCL); return ( '^' );
405<FIRSTCCL>"^"/- return ( '^' );
406<FIRSTCCL>- BEGIN(CCL); yylval = '-'; return ( CHAR );
407<FIRSTCCL>. BEGIN(CCL); RETURNCHAR;
408
409<CCL>-/[^\]\n] return ( '-' );
410<CCL>[^\]\n] RETURNCHAR;
411<CCL>"]" BEGIN(SECT2); return ( ']' );
412
413
414<NUM>[0-9]+ {
415 yylval = myctoi( yytext );
416 return ( NUMBER );
417 }
418
419<NUM>"," return ( ',' );
420<NUM>"}" BEGIN(SECT2); return ( '}' );
421
422<NUM>. {
423 synerr( "bad character inside {}'s" );
424 BEGIN(SECT2);
425 return ( '}' );
426 }
427
428<NUM>\n {
429 synerr( "missing }" );
430 BEGIN(SECT2);
431 ++linenum;
432 return ( '}' );
433 }
434
435
436<BRACEERROR>"}" synerr( "bad name in {}'s" ); BEGIN(SECT2);
437<BRACEERROR>\n synerr( "missing }" ); ++linenum; BEGIN(SECT2);
438
439
440<PERCENT_BRACE_ACTION,CODEBLOCK_2>{OPTWS}"%}".* bracelevel = 0;
441<PERCENT_BRACE_ACTION,CODEBLOCK_2,ACTION>"reject" {
442 ACTION_ECHO;
443 CHECK_REJECT(yytext);
444 }
445<PERCENT_BRACE_ACTION,CODEBLOCK_2,ACTION>"yymore" {
446 ACTION_ECHO;
447 CHECK_YYMORE(yytext);
448 }
449<PERCENT_BRACE_ACTION,CODEBLOCK_2>{NAME}|{NOT_NAME}|. ACTION_ECHO;
450<PERCENT_BRACE_ACTION,CODEBLOCK_2>\n {
451 ++linenum;
452 ACTION_ECHO;
453 if ( bracelevel == 0 ||
454 (doing_codeblock && indented_code) )
455 {
456 if ( ! doing_codeblock )
457 fputs( "\tYY_BREAK\n", temp_action_file );
458
459 doing_codeblock = false;
460 BEGIN(SECT2);
461 }
462 }
463
464
465 /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
466<ACTION>"{" ACTION_ECHO; ++bracelevel;
467<ACTION>"}" ACTION_ECHO; --bracelevel;
468<ACTION>[^a-z_{}"'/\n]+ ACTION_ECHO;
469<ACTION>{NAME} ACTION_ECHO;
470<ACTION>"/*" ACTION_ECHO; BEGIN(ACTION_COMMENT);
471<ACTION>"'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */
472<ACTION>\" ACTION_ECHO; BEGIN(ACTION_STRING);
473<ACTION>\n {
474 ++linenum;
475 ACTION_ECHO;
476 if ( bracelevel == 0 )
477 {
478 fputs( "\tYY_BREAK\n", temp_action_file );
479 BEGIN(SECT2);
480 }
481 }
482<ACTION>. ACTION_ECHO;
483
484<ACTION_COMMENT>"*/" ACTION_ECHO; BEGIN(ACTION);
485<ACTION_COMMENT>[^*\n]+ ACTION_ECHO;
486<ACTION_COMMENT>"*" ACTION_ECHO;
487<ACTION_COMMENT>\n ++linenum; ACTION_ECHO;
488<ACTION_COMMENT>. ACTION_ECHO;
489
490<ACTION_STRING>[^"\\\n]+ ACTION_ECHO;
491<ACTION_STRING>\\. ACTION_ECHO;
492<ACTION_STRING>\n ++linenum; ACTION_ECHO;
493<ACTION_STRING>\" ACTION_ECHO; BEGIN(ACTION);
494<ACTION_STRING>. ACTION_ECHO;
495
496<ACTION,ACTION_COMMENT,ACTION_STRING><<EOF>> {
497 synerr( "EOF encountered inside an action" );
498 yyterminate();
499 }
500
501
502<SECT2,QUOTE,CCL>{ESCSEQ} {
503 yylval = myesc( yytext );
504 return ( CHAR );
505 }
506
507<FIRSTCCL>{ESCSEQ} {
508 yylval = myesc( yytext );
509 BEGIN(CCL);
510 return ( CHAR );
511 }
512
513
514<SECT3>.*(\n?) ECHO;
515%%
516
517
518int yywrap()
519
520 {
521 if ( --num_input_files > 0 )
522 {
523 set_input_file( *++input_files );
524 return ( 0 );
525 }
526
527 else
528 return ( 1 );
529 }
530
531
532/* set_input_file - open the given file (if NULL, stdin) for scanning */
533
534void set_input_file( file )
535char *file;
536
537 {
538 if ( file )
539 {
540 infilename = file;
541 yyin = fopen( infilename, "r" );
542
543 if ( yyin == NULL )
544 lerrsf( "can't open %s", file );
545 }
546
547 else
548 {
549 yyin = stdin;
550 infilename = "<stdin>";
551 }
552 }