/* $RCSfile: toke.c,v $$Revision: 4.0.1.9 $$Date: 1993/02/05 19:48:43 $
* Copyright (c) 1991, Larry Wall
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
* Revision 4.0.1.9 1993/02/05 19:48:43 lwall
* patch36: now detects ambiguous use of filetest operators as well as unary
* patch36: fixed ambiguity on - within tr///
* Revision 4.0.1.8 92/06/23 12:33:45 lwall
* patch35: bad interaction between backslash and hyphen in tr///
* Revision 4.0.1.7 92/06/11 21:16:30 lwall
* patch34: expectterm incorrectly set to indicate start of program or block
* Revision 4.0.1.6 92/06/08 16:03:49 lwall
* patch20: an EXPR may now start with a bareword
* patch20: print $fh EXPR can now expect term rather than operator in EXPR
* patch20: added ... as variant on ..
* patch20: new warning on spurious backslash
* patch20: new warning on missing $ for foreach variable
* patch20: "foo"x1024 now legal without space after x
* patch20: new warning on print accidentally used as function
* patch20: tr/stuff// wasn't working right
* patch20: 2. now eats the dot
* patch20: <@ARGV> now notices @ARGV
* patch20: tr/// now lets you say \-
* Revision 4.0.1.5 91/11/11 16:45:51 lwall
* patch19: default arg for shift was wrong after first subroutine definition
* Revision 4.0.1.4 91/11/05 19:02:48 lwall
* patch11: \x and \c were subject to double interpretation in regexps
* patch11: prepared for ctype implementations that don't define isascii()
* patch11: nested list operators could miscount parens
* patch11: once-thru blocks didn't display right in the debugger
* patch11: sort eval "whatever" didn't work
* patch11: underscore is now allowed within literal octal and hex numbers
* Revision 4.0.1.3 91/06/10 01:32:26 lwall
* patch10: m'$foo' now treats string as single quoted
* patch10: certain pattern optimizations were botched
* Revision 4.0.1.2 91/06/07 12:05:56 lwall
* patch4: new copyright notice
* patch4: debugger lost track of lines in eval
* patch4: //o and s///o now optimize themselves fully at runtime
* patch4: added global modifier for pattern matches
* Revision 4.0.1.1 91/04/12 09:18:18 lwall
* patch1: perl -de "print" wouldn't stop at the first statement
* Revision 4.0 91/03/20 01:42:14 lwall
/* which backslash sequences to keep in m// or s// */
static char *patleave
= "\\.^$@dDwWsSbB+*?|()-nrtfeaxc0123456789[{]}";
char *reparse
; /* if non-null, scanident found ${foo[$bar]} */
#define CLINE (cmdline = (curcmd->c_line < cmdline ? curcmd->c_line : cmdline))
#define PERL_META(c) ((c) | 128)
#define META(c) ((c) | 128)
#define RETURN(retval) return (bufptr = s,(int)retval)
#define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
#define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
#define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
#define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
#define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
#define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
#define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
#define FUN2x(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2x)
#define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
#define FUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC4)
#define FUN5(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC5)
#define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
#define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
#define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
#define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
#define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
#define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
#define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
#define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
#define ROP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)RELOP)
#define FOP(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP)
#define FOP2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP2)
#define FOP3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP3)
#define FOP4(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP4)
#define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
#define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)
/* This bit of chicanery makes a unary function followed by
* a parenthesis into a function with one argument, highest precedence.
#define UNI(f) return(yylval.ival = f, \
(*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
/* This does similarly for list operators, merely by pretending that the
* paren came before the listop rather than after.
#define LOP(f) return(CLINE, *s == '(' || (s = skipspace(s), *s == '(') ? \
(*s = (char) PERL_META('('), bufptr = oldbufptr, '(') : \
(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
#define LOP(f) return(CLINE, *s == '(' || (s = skipspace(s), *s == '(') ? \
(*s = (char) META('('), bufptr = oldbufptr, '(') : \
(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
/* grandfather return to old style */
#define OLDLOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP)
while (s
< bufend
&& isSPACE(*s
))
if (oldoldbufptr
!= last_uni
)
while (isSPACE(*last_uni
))
for (s
= last_uni
; isALNUM(*s
) || *s
== '-'; s
++) ;
warn("Warning: Use of \"%s\" without parens is ambiguous", last_uni
);
#define UNI(f) return uni(f,s)
#define LOP(f) return lop(f,s)
register char *s
= bufptr
;
static bool in_format
= FALSE
;
static bool firstline
= TRUE
;
extern int yychar
; /* last token */
oldoldbufptr
= oldbufptr
;
fprintf(stderr
,"Tokener at %s",s
);
fprintf(stderr
,"Tokener at %s\n",s
);
else if ((*s
& 127) == '}') {
warn("Unrecognized character \\%03o ignored", *s
++ & 255);
else if ((*s
& 127) == '}') {
warn("Unrecognized character \\%03o ignored", *s
++ & 255);
goto fake_eof
; /* emulate EOF on ^D or ^Z */
goto retry
; /* ignore stray nulls */
if (minus_n
|| minus_p
|| perldb
) {
char *pdb
= getenv("PERLDB");
str_cat(linestr
, pdb
? pdb
: "require 'perldb.pl'");
if (minus_n
|| minus_p
) {
str_cat(linestr
,"line: while (<>) {");
str_cat(linestr
,"chop;");
str_cat(linestr
,"@F=split(' ');");
oldoldbufptr
= oldbufptr
= s
= str_get(linestr
);
bufend
= linestr
->str_ptr
+ linestr
->str_cur
;
yylval
.formval
= load_format();
oldoldbufptr
= oldbufptr
= s
= str_get(linestr
) + 1;
bufend
= linestr
->str_ptr
+ linestr
->str_cur
;
if ((s
= str_gets(linestr
, rsfp
, 0)) == Nullch
) {
else if ((FILE*)rsfp
== stdin
)
if (minus_n
|| minus_p
) {
str_set(linestr
,minus_p
? ";}continue{print" : "");
oldoldbufptr
= oldbufptr
= s
= str_get(linestr
);
bufend
= linestr
->str_ptr
+ linestr
->str_cur
;
oldoldbufptr
= oldbufptr
= s
= str_get(linestr
);
RETURN(';'); /* not infinite loop because rsfp is NULL now */
if (doextract
&& *linestr
->str_ptr
== '#')
oldoldbufptr
= oldbufptr
= bufptr
= s
;
STR
*str
= Str_new(85,0);
astore(stab_xarray(curcmd
->c_filestab
),(int)curcmd
->c_line
,str
);
bufend
= linestr
->str_ptr
+ linestr
->str_cur
;
if (curcmd
->c_line
== 1) {
if (*s
== '#' && s
[1] == '!') {
if (!in_eval
&& !instr(s
,"perl") && instr(origargv
[0],"perl")) {
while (s
< bufend
&& !isSPACE(*s
))
while (s
< bufend
&& isSPACE(*s
))
Newz(899,newargv
,origargc
+3,char*);
while (s
< bufend
&& !isSPACE(*s
))
Copy(origargv
+1, newargv
+2, origargc
+1, char*);
fatal("Can't exec %s", cmd
);
while (s
< bufend
&& isSPACE(*s
))
if (*s
== ':') /* for csh's that have to exec sh scripts */
case ' ': case '\t': case '\f': case '\r': case 013:
if (preprocess
&& s
== str_get(linestr
) &&
s
[1] == ' ' && (isDIGIT(s
[2]) || strnEQ(s
+2,"line ",5)) ) {
while (*s
&& !isDIGIT(*s
))
curcmd
->c_line
= atoi(s
)-1;
while (s
< d
&& isSPACE(*s
)) s
++;
s
[strlen(s
)-1] = '\0'; /* wipe out newline */
s
[strlen(s
)-1] = '\0'; /* wipe out trailing quote */
curcmd
->c_filestab
= fstab(s
);
curcmd
->c_filestab
= fstab(origfilename
);
oldoldbufptr
= oldbufptr
= s
= str_get(linestr
);
while (s
< d
&& *s
!= '\n')
yylval
.formval
= load_format();
oldoldbufptr
= oldbufptr
= s
= bufptr
+ 1;
if (s
[1] && isALPHA(s
[1]) && !isALPHA(s
[2])) {
case 'r': FTST(O_FTEREAD
);
case 'w': FTST(O_FTEWRITE
);
case 'x': FTST(O_FTEEXEC
);
case 'o': FTST(O_FTEOWNED
);
case 'R': FTST(O_FTRREAD
);
case 'W': FTST(O_FTRWRITE
);
case 'X': FTST(O_FTREXEC
);
case 'O': FTST(O_FTROWNED
);
case 'z': FTST(O_FTZERO
);
case 's': FTST(O_FTSIZE
);
case 'f': FTST(O_FTFILE
);
case 'l': FTST(O_FTLINK
);
case 'p': FTST(O_FTPIPE
);
case 'S': FTST(O_FTSOCK
);
case 'u': FTST(O_FTSUID
);
case 'g': FTST(O_FTSGID
);
case 'k': FTST(O_FTSVTX
);
case 'T': FTST(O_FTTEXT
);
case 'B': FTST(O_FTBINARY
);
case 'M': stabent("\024",TRUE
); FTST(O_FTMTIME
);
case 'A': stabent("\024",TRUE
); FTST(O_FTATIME
);
case 'C': stabent("\024",TRUE
); FTST(O_FTCTIME
);
if (isSPACE(*s
) || !isSPACE(*bufptr
))
if (isSPACE(*s
) || !isSPACE(*bufptr
))
s
= scanident(s
,bufend
,tokenbuf
);
yylval
.stabval
= stabent(tokenbuf
,TRUE
);
s
= scanident(s
,bufend
,tokenbuf
);
yylval
.stabval
= hadd(stabent(tokenbuf
,TRUE
));
yylval
.ival
= curcmd
->c_line
;
if (isSPACE(*s
) || *s
== '#')
cmdline
= NOLINE
; /* invalidate current command line number */
if (curcmd
->c_line
< cmdline
)
cmdline
= curcmd
->c_line
;
while (s
< d
&& isSPACE(*s
))
if (isALPHA(*s
) || *s
== '_' || *s
== '\'')
*(--s
) = '\\'; /* force next ident to WORD */
if (s
[1] != '<' && !index(s
,'>'))
s
= scanstr(s
, SCAN_DEF
);
while (isALNUM(*s) || *s == '\'') \
if (s
[1] == '#' && (isALPHA(s
[2]) || s
[2] == '_')) {
s
= scanident(s
,bufend
,tokenbuf
);
yylval
.stabval
= aadd(stabent(tokenbuf
,TRUE
));
s
= scanident(s
,bufend
,tokenbuf
);
if (reparse
) { /* turn ${foo[bar]} into ($foo[bar]) */
yylval
.stabval
= stabent(tokenbuf
,TRUE
);
if (isSPACE(*s
) && oldoldbufptr
&& oldoldbufptr
< bufptr
) {
while (isSPACE(*oldoldbufptr
))
if (*oldoldbufptr
== 'p' && strnEQ(oldoldbufptr
,"print",5)) {
if (index("&*<%", *s
) && isALPHA(s
[1]))
expectterm
= TRUE
; /* e.g. print $fh &sub */
else if (*s
== '.' && isDIGIT(s
[1]))
expectterm
= TRUE
; /* e.g. print $fh .3 */
else if (index("/?-+", *s
) && !isSPACE(s
[1]))
expectterm
= TRUE
; /* e.g. print $fh -1 */
s
= scanident(s
,bufend
,tokenbuf
);
yylval
.stabval
= aadd(stabent(tokenbuf
,TRUE
));
case '/': /* may either be division or pattern */
case '?': /* may either be conditional or pattern */
if (!expectterm
|| !isDIGIT(s
[1])) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case '\'': case '"': case '`':
s
= scanstr(s
, SCAN_DEF
);
case '\\': /* some magic to force next word to be a WORD */
s
++; /* used by do and sub to force a separate namespace */
if (!isALPHA(*s
) && *s
!= '_' && *s
!= '\'') {
warn("Spurious backslash ignored");
if (strEQ(d
,"__LINE__") || strEQ(d
,"__FILE__")) {
(void)sprintf(tokenbuf
,"%ld",(long)curcmd
->c_line
);
strcpy(tokenbuf
, stab_val(curcmd
->c_filestab
)->str_ptr
);
arg
[1].arg_type
= A_SINGLE
;
arg
[1].arg_ptr
.arg_str
= str_make(tokenbuf
,strlen(tokenbuf
));
else if (strEQ(d
,"__END__")) {
if (!in_eval
&& (stab
= stabent("DATA",FALSE
))) {
stab
->str_pok
|= SP_MULTI
;
stab_io(stab
) = stio_new();
stab_io(stab
)->ifp
= rsfp
;
#if defined(HAS_FCNTL) && defined(F_SETFD)
fcntl(fd
,F_SETFD
,fd
>= 3);
stab_io(stab
)->type
= '|';
else if ((FILE*)rsfp
== stdin
)
stab_io(stab
)->type
= '-';
stab_io(stab
)->type
= '<';
(void)stabent("ENV",TRUE
); /* may use HOME */
static int cryptseen
= 0;
while (s
< d
&& isSPACE(*s
))
if (isALPHA(*s
) || *s
== '_')
*(--s
) = '\\'; /* force next ident to WORD */
yylval
.ival
= curcmd
->c_line
;
if (strEQ(d
,"eq") || strEQ(d
,"EQ"))
allstabs
= TRUE
; /* must initialize everything since */
UNI(O_EVAL
); /* we don't know what will be used */
if (strEQ(d
,"endhostent"))
if (strEQ(d
,"endnetent"))
if (strEQ(d
,"endservent"))
if (strEQ(d
,"endprotoent"))
if (strEQ(d
,"for") || strEQ(d
,"foreach")) {
yylval
.ival
= curcmd
->c_line
;
while (s
< bufend
&& isSPACE(*s
))
fatal("Missing $ on loop variable");
while (s
< d
&& isSPACE(*s
))
if (isALPHA(*s
) || *s
== '_')
*(--s
) = '\\'; /* force next ident to WORD */
allstabs
= TRUE
; /* must initialize everything since */
OPERATOR(FORMAT
); /* we don't know what will be used */
if (strEQ(d
,"gt") || strEQ(d
,"GT"))
if (strEQ(d
,"ge") || strEQ(d
,"GE"))
if (strEQ(d
,"protobyname"))
if (strEQ(d
,"protobynumber"))
if (strEQ(d
,"hostbyname"))
if (strEQ(d
,"hostbyaddr"))
if (strEQ(d
,"netbyname"))
if (strEQ(d
,"netbyaddr"))
if (strEQ(d
,"servbyname"))
if (strEQ(d
,"servbyport"))
yylval
.ival
= curcmd
->c_line
;
if (strEQ(d
,"lt") || strEQ(d
,"LT"))
if (strEQ(d
,"le") || strEQ(d
,"LE"))
if (strEQ(d
,"localtime"))
RETURN(1); /* force error */
if (strEQ(d
,"ne") || strEQ(d
,"NE"))
checkcomma(s
,d
,"filehandle");
checkcomma(s
,d
,"filehandle");
s
= scanstr(s
-1, SCAN_DEF
);
s
= scanstr(s
-2, SCAN_DEF
);
s
= scanstr(s
-2, SCAN_DEF
);
if (strEQ(d
,"require")) {
allstabs
= TRUE
; /* must initialize everything since */
UNI(O_REQUIRE
); /* we don't know what will be used */
if (strEQ(d
,"rewinddir"))
RETURN(1); /* force error */
if (strEQ(d
,"setpriority"))
if (strEQ(d
,"sethostent"))
if (strEQ(d
,"setnetent"))
if (strEQ(d
,"setservent"))
if (strEQ(d
,"setprotoent"))
if (strEQ(d
,"setsockopt"))
if (strEQ(d
,"socketpair"))
checkcomma(s
,d
,"subroutine name");
while (s
< d
&& isSPACE(*s
)) s
++;
if (*s
== ';' || *s
== ')') /* probably a close */
fatal("sort is now a reserved word");
if (isALPHA(*s
) || *s
== '_') {
for (d
= s
; isALNUM(*d
); d
++) ;
if (strNE(tokenbuf
,"keys") &&
strNE(tokenbuf
,"values") &&
strNE(tokenbuf
,"split") &&
strNE(tokenbuf
,"grep") &&
strNE(tokenbuf
,"readdir") &&
strNE(tokenbuf
,"unpack") &&
strNE(tokenbuf
,"eval") &&
(d
>= bufend
|| isSPACE(*d
)) )
*(--s
) = '\\'; /* force next ident to WORD */
yylval
.ival
= savestack
->ary_fill
; /* restore stuff on reduce */
subline
= curcmd
->c_line
;
while (s
< d
&& isSPACE(*s
))
if (isALPHA(*s
) || *s
== '_' || *s
== '\'') {
str_sset(subname
,curstname
);
for (d
= s
+1; isALNUM(*d
) || *d
== '\''; d
++)
*(--s
) = '\\'; /* force next ident to WORD */
RETURN(1); /* force error */
yylval
.ival
= curcmd
->c_line
;
yylval
.ival
= curcmd
->c_line
;
if (strEQ(d
,"unshift")) {
yylval
.ival
= curcmd
->c_line
;
if (strEQ(d
,"wantarray")) {
yylval
.arg
->arg_type
= O_ITEM
;
yylval
.arg
[1].arg_type
= A_WANTARRAY
;
if (*s
== 'x' && isDIGIT(s
[1]) && !expectterm
) {
yylval
.cval
= savestr(d
);
if (expectterm
== 2) { /* special case: start of statement */
if (oldoldbufptr
&& oldoldbufptr
< bufptr
) {
while (isSPACE(*oldoldbufptr
))
if (*oldoldbufptr
== 'p' && strnEQ(oldoldbufptr
,"print",5))
else if (*oldoldbufptr
== 's' && strnEQ(oldoldbufptr
,"sort",4))
return (CLINE
, bufptr
= s
, (int)WORD
);
if (dowarn
&& *s
== ' ' && s
[1] == '(') {
for (w
++; *w
&& isSPACE(*w
); w
++) ;
if (!w
|| !*w
|| !index(";|}", *w
)) /* an advisory hack only... */
warn("%s (...) interpreted as function",name
);
while (s
< bufend
&& isSPACE(*s
))
while (s
< bufend
&& isSPACE(*s
))
if (isALPHA(*s
) || *s
== '_') {
while (s
< bufend
&& isSPACE(*s
))
"tell eof times getlogin wait length shift umask getppid \
cos exp int log rand sin sqrt ord wantarray",
fatal("No comma allowed after %s", what
);
while (isALNUM(*s
) || *s
== '\'')
while (d
> dest
+1 && d
[-1] == '\'')
if (*d
== '{' /* } */ ) {
while (s
< send
&& brackets
) {
if (!reparse
&& (d
== dest
|| (*s
&& isALNUM(*s
) ))) {
if (reparse
&& reparse
== s
- 1)
if (*d
== '^' && (isUPPER(*s
) || index("[\\]^_?", *s
))) {
scanconst(spat
,string
,len
)
char *origstring
= string
;
if (ninstr(string
, string
+len
, vert
, vert
+1))
tmpstr
= Str_new(86,len
);
str_nset(tmpstr
,string
,len
);
tmpstr
->str_u
.str_useful
= 100;
case '.': case '[': case '$': case '(': case ')': case '|': case '+':
if (d
[1] && index("wWbB0123456789sSdDlLuUExc",d
[1])) {
if (d
[1] == '*' || (d
[1] == '{' && d
[2] == '0') || d
[1] == '?') {
spat
->spat_flags
|= SPAT_ALL
;
spat
->spat_flags
|= SPAT_SCANFIRST
;
spat
->spat_short
= tmpstr
;
STR
*str
= Str_new(93,0);
spat
->spat_next
= curstash
->tbl_spatroot
; /* link into spat list */
curstash
->tbl_spatroot
= spat
;
spat
->spat_flags
|= SPAT_ONCE
;
s
= str_append_till(str
,s
,bufend
,s
[-1],patleave
);
yyerror("Search pattern not terminated");
while (*s
== 'i' || *s
== 'o' || *s
== 'g') {
spat
->spat_flags
|= SPAT_FOLD
;
spat
->spat_flags
|= SPAT_KEEP
;
spat
->spat_flags
|= SPAT_GLOBAL
;
else if ((*d
== '$' && d
[1] && d
[1] != '|' && d
[1] != ')') ||
spat
->spat_runtime
= arg
= op_new(1);
arg
[1].arg_type
= A_DOUBLE
;
arg
[1].arg_ptr
.arg_str
= str_smake(str
);
d
= scanident(d
,bufend
,buf
);
(void)stabent(buf
,TRUE
); /* make sure it's created */
else if (*d
== '$' && d
[1] && d
[1] != '|' && d
[1] != ')') {
d
= scanident(d
,bufend
,buf
);
d
= scanident(d
,bufend
,buf
);
if (strEQ(buf
,"ARGV") || strEQ(buf
,"ENV") ||
strEQ(buf
,"SIG") || strEQ(buf
,"INC"))
goto got_pat
; /* skip compiling for now */
if (spat
->spat_flags
& SPAT_FOLD
)
StructCopy(spat
, &savespat
, SPAT
);
scanconst(spat
,str
->str_ptr
,len
);
if ((spat
->spat_flags
& SPAT_ALL
) && (spat
->spat_flags
& SPAT_SCANFIRST
)) {
fbmcompile(spat
->spat_short
, spat
->spat_flags
& SPAT_FOLD
);
spat
->spat_regexp
= regcomp(str
->str_ptr
,str
->str_ptr
+len
,
spat
->spat_flags
& SPAT_FOLD
);
/* Note that this regexp can still be used if someone says
* something like /a/ && s//b/; so we can't delete it.
if (spat
->spat_flags
& SPAT_FOLD
)
StructCopy(&savespat
, spat
, SPAT
);
fbmcompile(spat
->spat_short
, spat
->spat_flags
& SPAT_FOLD
);
spat
->spat_regexp
= regcomp(str
->str_ptr
,str
->str_ptr
+len
,
spat
->spat_flags
& SPAT_FOLD
);
yylval
.arg
= make_match(O_MATCH
,stab2arg(A_STAB
,defstab
),spat
);
register char *s
= start
;
STR
*str
= Str_new(93,0);
if (term
&& (d
= index("([{< )]}> )]}>",term
)))
spat
->spat_next
= curstash
->tbl_spatroot
; /* link into spat list */
curstash
->tbl_spatroot
= spat
;
s
= str_append_till(str
,s
+1,bufend
,term
,patleave
);
yyerror("Substitution pattern not terminated");
for (d
= str
->str_ptr
; d
< e
; d
++) {
else if ((*d
== '$' && d
[1] && d
[1] != '|' && /*(*/ d
[1] != ')') ||
spat
->spat_runtime
= arg
= op_new(1);
arg
[1].arg_type
= A_DOUBLE
;
arg
[1].arg_ptr
.arg_str
= str_smake(str
);
(void)stabent(buf
,TRUE
); /* make sure it's created */
if (*d
== '$' && d
[1] && d
[-1] != '\\' && d
[1] != '|') {
else if (*d
== '@' && d
[-1] != '\\') {
if (strEQ(buf
,"ARGV") || strEQ(buf
,"ENV") ||
strEQ(buf
,"SIG") || strEQ(buf
,"INC"))
goto get_repl
; /* skip compiling for now */
scanconst(spat
,str
->str_ptr
,len
);
s
= scanstr(s
, SCAN_REPL
);
yyerror("Substitution replacement not terminated");
spat
->spat_repl
= yylval
.arg
;
if ((spat
->spat_repl
[1].arg_type
& A_MASK
) == A_SINGLE
)
spat
->spat_flags
|= SPAT_CONST
;
else if ((spat
->spat_repl
[1].arg_type
& A_MASK
) == A_DOUBLE
) {
spat
->spat_flags
|= SPAT_CONST
;
tmpstr
= spat
->spat_repl
[1].arg_ptr
.arg_str
;
e
= tmpstr
->str_ptr
+ tmpstr
->str_cur
;
for (t
= tmpstr
->str_ptr
; t
< e
; t
++) {
if (*t
== '$' && t
[1] && (index("`'&+0123456789",t
[1]) ||
(t
[1] == '{' /*}*/ && isDIGIT(t
[2])) ))
spat
->spat_flags
&= ~SPAT_CONST
;
while (*s
== 'g' || *s
== 'i' || *s
== 'e' || *s
== 'o') {
if ((spat
->spat_repl
[1].arg_type
& A_MASK
) == A_DOUBLE
)
spat
->spat_repl
[1].arg_type
= A_SINGLE
;
spat
->spat_repl
= make_op(
(!es
&& spat
->spat_repl
[1].arg_type
== A_SINGLE
spat
->spat_flags
&= ~SPAT_CONST
;
spat
->spat_flags
|= SPAT_GLOBAL
;
spat
->spat_flags
|= SPAT_FOLD
;
if (!(spat
->spat_flags
& SPAT_SCANFIRST
)) {
str_free(spat
->spat_short
); /* anchored opt doesn't do */
spat
->spat_short
= Nullstr
; /* case insensitive match */
spat
->spat_flags
|= SPAT_KEEP
;
if (spat
->spat_short
&& (spat
->spat_flags
& SPAT_SCANFIRST
))
fbmcompile(spat
->spat_short
, spat
->spat_flags
& SPAT_FOLD
);
if (!spat
->spat_runtime
) {
spat
->spat_regexp
= regcomp(str
->str_ptr
,str
->str_ptr
+len
,
spat
->spat_flags
& SPAT_FOLD
);
yylval
.arg
= make_match(O_SUBST
,stab2arg(A_STAB
,defstab
),spat
);
if (!spat
->spat_short
&& spat
->spat_regexp
->regstart
&&
(!spat
->spat_regexp
->regmust
|| spat
->spat_regexp
->reganch
& ROPT_ANCH
)
if (!(spat
->spat_regexp
->reganch
& ROPT_ANCH
))
spat
->spat_flags
|= SPAT_SCANFIRST
;
else if (spat
->spat_flags
& SPAT_FOLD
)
spat
->spat_short
= str_smake(spat
->spat_regexp
->regstart
);
else if (spat
->spat_regexp
->regmust
) {/* is there a better short-circuit? */
str_eq(spat
->spat_short
,spat
->spat_regexp
->regmust
))
if (spat
->spat_flags
& SPAT_SCANFIRST
) {
str_free(spat
->spat_short
);
spat
->spat_short
= Nullstr
;
str_free(spat
->spat_regexp
->regmust
);
spat
->spat_regexp
->regmust
= Nullstr
;
if (!spat
->spat_short
|| /* promote the better string */
((spat
->spat_flags
& SPAT_SCANFIRST
) &&
(spat
->spat_short
->str_cur
< spat
->spat_regexp
->regmust
->str_cur
) )){
str_free(spat
->spat_short
); /* ok if null */
spat
->spat_short
= spat
->spat_regexp
->regmust
;
spat
->spat_regexp
->regmust
= Nullstr
;
spat
->spat_flags
|= SPAT_SCANFIRST
;
register char *s
= start
;
l(make_op(O_TRANS
,2,stab2arg(A_STAB
,defstab
),Nullarg
,Nullarg
));
arg
[2].arg_type
= A_NULL
;
arg
[2].arg_ptr
.arg_cval
= (char*) tbl
;
yyerror("Translation pattern not terminated");
tstr
= yylval
.arg
[1].arg_ptr
.arg_str
;
yylval
.arg
[1].arg_ptr
.arg_str
= Nullstr
;
s
= scanstr(s
, SCAN_TR
|SCAN_REPL
);
yyerror("Translation replacement not terminated");
rstr
= yylval
.arg
[1].arg_ptr
.arg_str
;
yylval
.arg
[1].arg_ptr
.arg_str
= Nullstr
;
complement
= delete = squash
= 0;
while (*s
== 'c' || *s
== 'd' || *s
== 's') {
arg
[2].arg_len
= delete|squash
;
for (i
= 0; i
< tlen
; i
++)
for (i
= 0, j
= 0; i
< 256; i
++) {
for (i
= 0; i
< 256; i
++)
for (i
= 0, j
= 0; i
< tlen
; i
++,j
++) {
if (tbl
[t
[i
] & 0377] == -1)
if (tbl
[t
[i
] & 0377] == -1)
tbl
[t
[i
] & 0377] = r
[j
] & 0377;
register char *s
= start
;
register bool makesingle
= FALSE
;
bool alwaysdollar
= FALSE
;
/* which backslash sequences to keep */
char *leave
= (in_what
& SCAN_TR
)
? "\\$@nrtfbeacx0123456789-"
: "\\$@nrtfbeacx0123456789[{]}lLuUE";
default: /* a substitution replacement */
arg
[1].arg_type
= A_DOUBLE
;
makesingle
= TRUE
; /* maybe disable runtime scanning */
arg
[1].arg_type
= A_SINGLE
;
yyerror("Illegal octal digit");
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
str_numset(str
,(double)i
);
str
->str_len
= str
->str_cur
= 0;
arg
[1].arg_ptr
.arg_str
= str
;
case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9': case '.':
arg
[1].arg_type
= A_SINGLE
;
while (isDIGIT(*s
) || *s
== '_') {
if (*s
== '.' && s
[1] != '.') {
while (isDIGIT(*s
) || *s
== '_') {
if (*s
&& index("eE",*s
) && index("+-0123456789",s
[1])) {
if (*s
== '+' || *s
== '-')
str_numset(str
,atof(tokenbuf
));
str
->str_len
= str
->str_cur
= 0;
arg
[1].arg_ptr
.arg_str
= str
;
if (in_what
& (SCAN_REPL
|SCAN_TR
))
if (*++s
&& index("`'\"",*s
)) {
s
= cpytill(d
,s
,bufend
,term
,&len
);
} /* assuming tokenbuf won't clobber */
if (rsfp
|| !(d
=ninstr(s
,bufend
,d
,d
+1)))
herewas
= str_make(s
,bufend
-s
);
s
--, herewas
= str_make(s
,d
-s
);
s
= cpytill(d
,s
,bufend
,'>',&len
);
fatal("Unterminated <> operator");
while (*d
&& (isALNUM(*d
) || *d
== '\''))
if (d
- tokenbuf
!= len
) {
arg
[1].arg_type
= A_GLOB
;
alwaysdollar
= TRUE
; /* treat $) and $| as variables */
arg
[1].arg_type
= A_INDREAD
;
arg
[1].arg_ptr
.arg_stab
= stabent(d
+1,TRUE
);
arg
[1].arg_type
= A_READ
;
arg
[1].arg_ptr
.arg_stab
= stabent(d
,TRUE
);
if (!stab_io(arg
[1].arg_ptr
.arg_stab
))
stab_io(arg
[1].arg_ptr
.arg_stab
) = stio_new();
(void)aadd(arg
[1].arg_ptr
.arg_stab
);
stab_io(arg
[1].arg_ptr
.arg_stab
)->flags
|=
arg
[1].arg_type
= A_SINGLE
;
arg
[1].arg_type
= A_DOUBLE
;
makesingle
= TRUE
; /* maybe disable runtime scanning */
alwaysdollar
= TRUE
; /* treat $) and $| as variables */
arg
[1].arg_type
= A_BACKTICK
;
alwaysdollar
= TRUE
; /* treat $) and $| as variables */
multi_start
= curcmd
->c_line
;
multi_open
= multi_close
= '<';
if (term
&& (tmps
= index("([{< )]}> )]}>",term
)))
(*s
!= term
|| bcmp(s
,tokenbuf
,len
) != 0) ) {
curcmd
->c_line
= multi_start
;
str_nset(tmpstr
,d
+1,s
-d
);
str_ncat(herewas
,s
,bufend
-s
);
str_replace(linestr
,herewas
);
oldoldbufptr
= oldbufptr
= bufptr
= s
= str_get(linestr
);
bufend
= linestr
->str_ptr
+ linestr
->str_cur
;
str_nset(tmpstr
,"",0); /* avoid "uninitialized" warning */
s
= str_append_till(tmpstr
,s
+1,bufend
,term
,leave
);
while (s
>= bufend
) { /* multiple line string? */
!(oldoldbufptr
= oldbufptr
= s
= str_gets(linestr
, rsfp
, 0))) {
curcmd
->c_line
= multi_start
;
STR
*str
= Str_new(88,0);
astore(stab_xarray(curcmd
->c_filestab
),
(int)curcmd
->c_line
,str
);
bufend
= linestr
->str_ptr
+ linestr
->str_cur
;
if (*s
== term
&& bcmp(s
,tokenbuf
,len
) == 0) {
str_scat(linestr
,herewas
);
bufend
= linestr
->str_ptr
+ linestr
->str_cur
;
str_scat(tmpstr
,linestr
);
s
= str_append_till(tmpstr
,s
,bufend
,term
,leave
);
multi_end
= curcmd
->c_line
;
if (tmpstr
->str_cur
+ 5 < tmpstr
->str_len
) {
tmpstr
->str_len
= tmpstr
->str_cur
+ 1;
Renew(tmpstr
->str_ptr
, tmpstr
->str_len
, char);
if (arg
[1].arg_type
== A_SINGLE
) {
arg
[1].arg_ptr
.arg_str
= tmpstr
;
send
= s
+ tmpstr
->str_cur
;
while (s
< send
) { /* see if we can make SINGLE */
if (*s
== '\\' && s
[1] && isDIGIT(s
[1]) && !isDIGIT(s
[2]) &&
!alwaysdollar
&& s
[1] != '0')
*s
= '$'; /* grandfather \digit in subst */
if ((*s
== '$' || *s
== '@') && s
+1 < send
&&
(alwaysdollar
|| (s
[1] != ')' && s
[1] != '|'))) {
makesingle
= FALSE
; /* force interpretation */
else if (*s
== '\\' && s
+1 < send
) {
s
= d
= start
= tmpstr
->str_ptr
; /* assuming shrinkage only */
while (s
< send
|| dorange
) {
if (!tmpstr2
) { /* oops, have to grow */
tmpstr2
= str_smake(tmpstr
);
s
= tmpstr2
->str_ptr
+ (s
- tmpstr
->str_ptr
);
send
= tmpstr2
->str_ptr
+ (send
- tmpstr
->str_ptr
);
STR_GROW(tmpstr
, tmpstr
->str_len
+ 256);
for (i
= (*d
& 0377); i
<= max
; i
++)
else if (*s
== '-' && s
+1 < send
&& s
!= start
) {
if ((*s
== '$' && s
+1 < send
&&
(alwaysdollar
|| /*(*/(s
[1] != ')' && s
[1] != '|')) ) ||
(*s
== '@' && s
+1 < send
) ) {
if (s
[1] == '#' && (isALPHA(s
[2]) || s
[2] == '_'))
len
= scanident(s
,send
,tokenbuf
) - s
;
if (*s
== '$' || strEQ(tokenbuf
,"ARGV")
|| strEQ(tokenbuf
,"INC") )
(void)stabent(tokenbuf
,TRUE
); /* add symbol */
if (*s
== '\\' && s
+1 < send
) {
if (!makesingle
&& (!leave
|| (*s
&& index(leave
,*s
))))
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
*d
++ = scanoct(s
, 3, &len
);
*d
++ = scanhex(++s
, 2, &len
);
if (arg
[1].arg_type
== A_DOUBLE
&& makesingle
)
arg
[1].arg_type
= A_SINGLE
; /* now we can optimize on it */
tmpstr
->str_cur
= d
- tmpstr
->str_ptr
;
if (arg
[1].arg_type
== A_GLOB
) {
arg
[1].arg_ptr
.arg_stab
= stab
= genstab();
stab_io(stab
) = stio_new();
str_sset(stab_val(stab
), tmpstr
);
arg
[1].arg_ptr
.arg_str
= tmpstr
;
register FCMD
*fprev
= &froot
;
while (s
< bufend
|| (rsfp
&& (s
= str_gets(linestr
,rsfp
, 0)) != Nullch
)) {
eol
= bufend
= linestr
->str_ptr
+ linestr
->str_cur
;
STR
*tmpstr
= Str_new(89,0);
str_nset(tmpstr
, s
, eol
-s
);
astore(stab_xarray(curcmd
->c_filestab
), (int)curcmd
->c_line
,tmpstr
);
for (t
= s
+1; *t
== ' ' || *t
== '\t'; t
++) ;
for (t
=s
; t
< eol
&& *t
!= '@' && *t
!= '^'; t
++) {
fcmd
->f_pre
= nsavestr(s
, t
-s
);
fcmd
->f_flags
|= FC_NOBLANK
;
fcmd
->f_flags
|= FC_REPEAT
;
flinebeg
= fcmd
; /* start values here */
fcmd
->f_flags
|= FC_CHOP
; /* for doing text filling */
/* Catch the special case @... and handle it as a string
if (*s
== '.' && s
[1] == '.') {
fcmd
->f_type
= F_DECIMAL
;
/* Read a format in the form @####.####, where either group
of ### may be empty, or the final .### may be missing. */
if (fcmd
->f_flags
& FC_CHOP
&& *s
== '.') {
fcmd
->f_flags
|= FC_MORE
;
(!rsfp
|| (s
= str_gets(linestr
, rsfp
, 0)) == Nullch
) )
eol
= bufend
= linestr
->str_ptr
+ linestr
->str_cur
;
STR
*tmpstr
= Str_new(90,0);
str_nset(tmpstr
, s
, eol
-s
);
astore(stab_xarray(curcmd
->c_filestab
),
(int)curcmd
->c_line
,tmpstr
);
yyerror("Missing values line");
str
= flinebeg
->f_unparsed
= Str_new(91,eol
- s
);
str
->str_u
.str_hash
= curstash
;
flinebeg
->f_line
= curcmd
->c_line
;
if (!flinebeg
->f_next
->f_type
|| index(s
, ',')) {
str_ncat(str
, s
, eol
- s
- 1);
while (s
< eol
&& isSPACE(*s
))
case ' ': case '\t': case '\n': case ';':
while (s
< eol
&& (isSPACE(*s
) || *s
== ';'))
s
= scanident(s
,eol
,tokenbuf
);
if (s
< eol
&& *s
&& index("$'\"",*s
))
while (s
< eol
&& (*s
!= *t
|| s
[-1] == '\\'))
if (s
< eol
&& *s
&& index("$'\"",*s
))
yyerror("Please use commas to separate fields");
bufptr
= str_get(linestr
);
yyerror("Format not terminated");
cshlen
= strlen(cshname
);