/* $RCSfile: str.c,v $$Revision: 4.0.1.7 $$Date: 1993/02/05 19:43:47 $
* Copyright (c) 1991, Larry Wall
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
* Revision 4.0.1.7 1993/02/05 19:43:47 lwall
* patch36: the non-std stdio input code wasn't null-proof
* Revision 4.0.1.6 92/06/11 21:14:21 lwall
* patch34: quotes containing subscripts containing variables didn't parse right
* Revision 4.0.1.5 92/06/08 15:40:43 lwall
* patch20: removed implicit int declarations on functions
* patch20: Perl now distinguishes overlapped copies from non-overlapped
* patch20: paragraph mode now skips extra newlines automatically
* patch20: fixed memory leak in doube-quote interpretation
* patch20: made /\$$foo/ look for literal '$foo'
* patch20: "$var{$foo'bar}" didn't scan subscript correctly
* patch20: a splice on non-existent array elements could dump core
* patch20: running taintperl explicitly now does checks even if $< == $>
* Revision 4.0.1.4 91/11/05 18:40:51 lwall
* patch11: $foo .= <BAR> could overrun malloced memory
* patch11: \$ didn't always make it through double-quoter to regexp routines
* patch11: prepared for ctype implementations that don't define isascii()
* Revision 4.0.1.3 91/06/10 01:27:54 lwall
* patch10: $) and $| incorrectly handled in run-time patterns
* Revision 4.0.1.2 91/06/07 11:58:13 lwall
* patch4: new copyright notice
* patch4: taint check on undefined string could cause core dump
* Revision 4.0.1.1 91/04/12 09:15:30 lwall
* patch1: fixed undefined environ problem
* patch1: substr($ENV{"PATH"},0,0) = "/foo:" didn't modify environment
* patch1: $foo .= <BAR> could cause core dump for certain lengths of $foo
* Revision 4.0 91/03/20 01:39:55 lwall
tainted
|= str
->str_tainted
;
return str
->str_pok
? str
->str_ptr
: str_2ptr(str
);
/* dlb ... guess we have a "crippled cc".
* dlb the following functions are usually macros.
if (*Str
->str_ptr
> '0' ||
(Str
->str_cur
&& *Str
->str_ptr
!= '0'))
return (Str
->str_u
.str_nval
!= 0.0);
tainted
|= Str
->str_tainted
;
return Str
->str_u
.str_nval
;
/* dlb ... end of crutch */
register char *s
= str
->str_ptr
;
fprintf(stderr
, "Allocation too large: %lx\n", newlen
);
if (str
->str_state
== SS_INCR
) { /* data before str_ptr? */
str
->str_len
+= str
->str_u
.str_useful
;
str
->str_ptr
-= str
->str_u
.str_useful
;
str
->str_u
.str_useful
= 0L;
Move(s
, str
->str_ptr
, str
->str_cur
+1, char);
str
->str_state
= SS_NORM
; /* normal again */
if (newlen
> str
->str_len
)
newlen
+= 10 * (newlen
- str
->str_cur
); /* avoid copy each time */
if (newlen
> str
->str_len
) { /* need more room? */
str
->str_pok
= 0; /* invalidate pointer */
if (str
->str_state
== SS_INCR
)
str
->str_u
.str_nval
= num
;
str
->str_state
= SS_NORM
;
str
->str_nok
= 1; /* validate number */
str
->str_tainted
= tainted
;
olderrno
= errno
; /* some Xenix systems wipe out errno here */
#if defined(scs) && defined(ns32000)
gcvt(str
->str_u
.str_nval
,20,s
);
if (str
->str_u
.str_nval
== 0.0)
(void)sprintf(s
,"%.20g",str
->str_u
.str_nval
);
warn("Use of uninitialized variable");
str
->str_cur
= s
- str
->str_ptr
;
fprintf(stderr
,"0x%lx ptr(%s)\n",str
,str
->str_ptr
);
if (str
->str_state
== SS_INCR
)
Str_Grow(str
,0); /* just force copy down */
str
->str_state
= SS_NORM
;
if (str
->str_len
&& str
->str_pok
)
str
->str_u
.str_nval
= atof(str
->str_ptr
);
warn("Use of uninitialized variable");
str
->str_u
.str_nval
= 0.0;
fprintf(stderr
,"0x%lx num(%g)\n",str
,str
->str_u
.str_nval
);
return str
->str_u
.str_nval
;
/* Note: str_sset() should not be called with a source string that needs
* be reused, since it may destroy the source string if it is marked
tainted
|= sstr
->str_tainted
;
if (sstr
== dstr
|| dstr
== &str_undef
)
dstr
->str_pok
= dstr
->str_nok
= 0;
else if (sstr
->str_pok
) {
* Check to see if we can just swipe the string. If so, it's a
* possible small lose on short strings, but a big win on long ones.
* It might even be a win on short strings if dstr->str_ptr
* has to be allocated and sstr->str_ptr has to be freed.
if (sstr
->str_pok
& SP_TEMP
) { /* slated for free anyway? */
if (dstr
->str_state
== SS_INCR
)
dstr
->str_ptr
-= dstr
->str_u
.str_useful
;
dstr
->str_ptr
= sstr
->str_ptr
;
dstr
->str_len
= sstr
->str_len
;
dstr
->str_cur
= sstr
->str_cur
;
dstr
->str_state
= sstr
->str_state
;
dstr
->str_pok
= sstr
->str_pok
& ~SP_TEMP
;
dstr
->str_tainted
= sstr
->str_tainted
;
sstr
->str_pok
= 0; /* wipe out any weird flags */
sstr
->str_state
= 0; /* so sstr frees uneventfully */
else { /* have to copy actual string */
if (dstr
->str_state
== SS_INCR
) {
str_nset(dstr
,sstr
->str_ptr
,sstr
->str_cur
);
if (dstr
->str_nok
= sstr
->str_nok
)
dstr
->str_u
.str_nval
= sstr
->str_u
.str_nval
;
dstr
->str_u
= sstr
->str_u
;
dstr
->str_u
.str_nval
= sstr
->str_u
.str_nval
;
if (dstr
->str_cur
== sizeof(STBP
)) {
char *tmps
= dstr
->str_ptr
;
if (*tmps
== 'S' && bcmp(tmps
,"StB",4) == 0) {
if (dstr
->str_magic
&& dstr
->str_magic
->str_rare
== 'X') {
str_free(dstr
->str_magic
);
dstr
->str_magic
= Nullstr
;
dstr
->str_magic
= str_smake(sstr
->str_magic
);
dstr
->str_magic
->str_rare
= 'X';
str_numset(dstr
,sstr
->str_u
.str_nval
);
if (dstr
->str_state
== SS_INCR
)
Str_Grow(dstr
,0); /* just force copy down */
dstr
->str_u
= sstr
->str_u
;
dstr
->str_u
.str_nval
= sstr
->str_u
.str_nval
;
dstr
->str_pok
= dstr
->str_nok
= 0;
Move(ptr
,str
->str_ptr
,len
,char);
*(str
->str_ptr
+str
->str_cur
) = '\0';
str
->str_nok
= 0; /* invalidate number */
str
->str_pok
= 1; /* validate pointer */
str
->str_tainted
= tainted
;
Move(ptr
,str
->str_ptr
,len
+1,char);
str
->str_nok
= 0; /* invalidate number */
str
->str_pok
= 1; /* validate pointer */
str
->str_tainted
= tainted
;
str_chop(str
,ptr
) /* like set but assuming ptr is in str */
if (!ptr
|| !(str
->str_pok
))
delta
= ptr
- str
->str_ptr
;
if (str
->str_state
== SS_INCR
)
str
->str_u
.str_useful
+= delta
;
str
->str_u
.str_useful
= delta
;
str
->str_state
= SS_INCR
;
str
->str_nok
= 0; /* invalidate number */
str
->str_pok
= 1; /* validate pointer (and unstudy str) */
STR_GROW(str
, str
->str_cur
+ len
+ 1);
Move(ptr
,str
->str_ptr
+str
->str_cur
,len
,char);
*(str
->str_ptr
+str
->str_cur
) = '\0';
str
->str_nok
= 0; /* invalidate number */
str
->str_pok
= 1; /* validate pointer */
str
->str_tainted
|= tainted
;
tainted
|= sstr
->str_tainted
;
str_ncat(dstr
,sstr
->str_ptr
,sstr
->str_cur
);
STR_GROW(str
, str
->str_cur
+ len
+ 1);
Move(ptr
,str
->str_ptr
+str
->str_cur
,len
+1,char);
str
->str_nok
= 0; /* invalidate number */
str
->str_pok
= 1; /* validate pointer */
str
->str_tainted
|= tainted
;
str_append_till(str
,from
,fromend
,delim
,keeplist
)
STR_GROW(str
, str
->str_cur
+ len
+ 1);
str
->str_nok
= 0; /* invalidate number */
str
->str_pok
= 1; /* validate pointer */
to
= str
->str_ptr
+str
->str_cur
;
for (; from
< fromend
; from
++,to
++) {
if (*from
== '\\' && from
+1 < fromend
&& delim
!= '\\') {
if (from
[1] == delim
|| from
[1] == '\\')
else if (from
[1] && index(keeplist
,from
[1]))
str
->str_cur
= to
- str
->str_ptr
;
freestrroot
= str
->str_magic
;
str
->str_magic
= Nullstr
;
str
->str_state
= SS_NORM
;
str_magic(str
, stab
, how
, name
, namlen
)
if (str
== &str_undef
|| str
->str_magic
)
str
->str_magic
= Str_new(75,namlen
);
str
->str_u
.str_stab
= stab
;
str_nset(str
,name
,namlen
);
str_insert(bigstr
,offset
,len
,little
,littlelen
)
if (bigstr
== &str_undef
)
bigstr
->str_pok
= SP_VALID
; /* disable possible screamer */
if (i
> 0) { /* string might grow */
STR_GROW(bigstr
, bigstr
->str_cur
+ i
+ 1);
mid
= big
+ offset
+ len
;
midend
= bigend
= big
+ bigstr
->str_cur
;
while (midend
> mid
) /* shove everything down */
Move(little
,big
+offset
,littlelen
,char);
Move(little
,bigstr
->str_ptr
+offset
,len
,char);
bigend
= big
+ bigstr
->str_cur
;
fatal("panic: str_insert");
if (mid
- big
> bigend
- midend
) { /* faster to shorten from end */
Move(little
, mid
, littlelen
,char);
Move(midend
, mid
, i
,char);
bigstr
->str_cur
= mid
- big
;
else if (i
= mid
- big
) { /* faster from front */
str_chop(bigstr
,midend
-i
);
Move(little
, mid
, littlelen
,char);
Move(little
,midend
,littlelen
,char);
/* make str point to what nstr did */
if (str
->str_state
== SS_INCR
)
Str_Grow(str
,0); /* just force copy down */
if (nstr
->str_state
== SS_INCR
)
str
->str_ptr
= nstr
->str_ptr
;
str
->str_len
= nstr
->str_len
;
str
->str_cur
= nstr
->str_cur
;
str
->str_pok
= nstr
->str_pok
;
str
->str_nok
= nstr
->str_nok
;
str
->str_u
= nstr
->str_u
;
str
->str_u
.str_nval
= nstr
->str_u
.str_nval
;
str
->str_tainted
= nstr
->str_tainted
;
str_free(nstr
->str_magic
);
if (!str
|| str
== &str_undef
)
if (str
->str_state
== SS_FREE
) /* already freed */
if (str
->str_state
== SS_INCR
&& !(str
->str_pok
& 2)) {
str
->str_ptr
-= str
->str_u
.str_useful
;
str
->str_len
+= str
->str_u
.str_useful
;
str_free(str
->str_magic
);
str
->str_magic
= freestrroot
;
if ((str
->str_pok
& SP_INTRP
) && str
->str_u
.str_args
)
arg_free(str
->str_u
.str_args
);
if (str
->str_len
> 127) { /* next user not likely to want more */
Safefree(str
->str_ptr
); /* so give it back to malloc */
if ((str
->str_pok
& SP_INTRP
) && str
->str_u
.str_args
)
arg_free(str
->str_u
.str_args
);
str
->str_state
= SS_FREE
;
if (!str1
|| str1
== &str_undef
)
return (str2
== Nullstr
|| str2
== &str_undef
|| !str2
->str_cur
);
if (!str2
|| str2
== &str_undef
)
if (str1
->str_cur
!= str2
->str_cur
)
return !bcmp(str1
->str_ptr
, str2
->str_ptr
, str1
->str_cur
);
if (!str1
|| str1
== &str_undef
)
return (str2
== Nullstr
|| str2
== &str_undef
|| !str2
->str_cur
)?0:-1;
if (!str2
|| str2
== &str_undef
)
return str1
->str_cur
!= 0;
if (str1
->str_cur
< str2
->str_cur
) {
if (retval
= memcmp(str1
->str_ptr
, str2
->str_ptr
, str1
->str_cur
))
return retval
< 0 ? -1 : 1;
else if (retval
= memcmp(str1
->str_ptr
, str2
->str_ptr
, str2
->str_cur
))
return retval
< 0 ? -1 : 1;
else if (str1
->str_cur
== str2
->str_cur
)
register char *bp
; /* we're going to steal some values */
register int cnt
; /* from the stdio struct and put EVERYTHING */
register STDCHAR
*ptr
; /* in the innermost loop into registers */
register int newline
= rschar
;/* (assuming >= 6 registers) */
if (rspara
) { /* have to do this both before and after */
do { /* to make sure file boundaries work right */
#ifdef STDSTDIO /* Here is some breathtakingly efficient cheating */
cnt
= fp
->_cnt
; /* get count into register */
str
->str_nok
= 0; /* invalidate number */
str
->str_pok
= 1; /* validate pointer */
if (str
->str_len
- append
<= cnt
+ 1) { /* make sure we have the room */
if (cnt
> 80 && str
->str_len
> append
) {
shortbuffered
= cnt
- str
->str_len
+ append
+ 1;
STR_GROW(str
, append
+cnt
+2);/* (remembering cnt can be -1) */
bp
= str
->str_ptr
+ append
; /* move these two too to registers */
while (--cnt
>= 0) { /* this */ /* eat */
if ((*bp
++ = *ptr
++) == newline
) /* really */ /* dust */
goto thats_all_folks
; /* screams */ /* sed :-) */
if (shortbuffered
) { /* oh well, must extend */
bpx
= bp
- str
->str_ptr
; /* prepare for possible relocation */
STR_GROW(str
, str
->str_len
+ append
+ cnt
+ 2);
bp
= str
->str_ptr
+ bpx
; /* reconstitute our pointer */
fp
->_cnt
= cnt
; /* deregisterize cnt and ptr */
i
= _filbuf(fp
); /* get more characters */
ptr
= fp
->_ptr
; /* reregisterize cnt and ptr */
bpx
= bp
- str
->str_ptr
; /* prepare for possible relocation */
STR_GROW(str
, bpx
+ cnt
+ 2);
bp
= str
->str_ptr
+ bpx
; /* reconstitute our pointer */
if (i
== newline
) { /* all done for now? */
else if (i
== EOF
) /* all done for ever? */
goto thats_really_all_folks
;
*bp
++ = i
; /* now go back to screaming loop */
if (rslen
> 1 && (bp
- str
->str_ptr
< rslen
|| bcmp(bp
- rslen
, rs
, rslen
)))
goto screamer
; /* go back to the fray */
fp
->_cnt
= cnt
; /* put these back or we're in trouble */
str
->str_cur
= bp
- str
->str_ptr
; /* set length */
#else /* !STDSTDIO */ /* The big, slow, and stupid way */
char * bpe
= buf
+ sizeof(buf
) - 3;
while ((i
= getc(fp
)) != EOF
&& (*bp
++ = i
) != newline
&& bp
< bpe
) ;
str_ncat(str
, buf
, bp
- buf
);
str_nset(str
, buf
, bp
- buf
);
bcmp(str
->str_ptr
+ str
->str_cur
- rslen
, rs
, rslen
)
return str
->str_cur
- append
? str
->str_ptr
: Nullch
;
oldoldbufptr
= oldbufptr
= bufptr
= str_get(linestr
);
bufend
= bufptr
+ linestr
->str_cur
;
if (++loop_ptr
>= loop_max
) {
Renew(loop_stack
, loop_max
, struct loop
);
loop_stack
[loop_ptr
].loop_label
= "_EVAL_";
loop_stack
[loop_ptr
].loop_sp
= 0;
deb("(Pushing label #%d _EVAL_)\n", loop_ptr
);
if (setjmp(loop_stack
[loop_ptr
].loop_env
)) {
fatal("%s\n",stab_val(stabent("@",TRUE
))->str_ptr
);
char *tmps
= loop_stack
[loop_ptr
].loop_label
;
deb("(Popping label #%d %s)\n",loop_ptr
,
curcmd
->c_line
= oldcurcmd
->c_line
;
if (retval
|| error_count
)
fatal("Invalid component in string or format");
if (cmd
->c_type
!= C_EXPR
|| cmd
->c_next
|| arg
->arg_type
!= O_LIST
)
fatal("panic: error in parselist %d %x %d", cmd
->c_type
,
cmd
->c_next
, arg
? arg
->arg_type
: -1);
register char *s
= str_get(src
);
register char *send
= s
+ src
->str_cur
;
if (*s
== '\\' && s
[1] && index("$@[{\\]}lLuUE",s
[1])) {
if (*nointrp
) { /* in a regular expression */
if (*s
== '@') /* always strip \@ */ /*SUPPRESS 530*/
else /* don't strip \\, \[, \{ etc. */
else if (*s
== '$' && s
+1 < send
&& *nointrp
&& index(nointrp
,s
[1])) {
else if ((*s
== '@' || *s
== '$') && s
+1 < send
) {
if (*s
== '$' && s
[1] == '#' && (isALPHA(s
[2]) || s
[2] == '_'))
s
= scanident(s
,send
,tokenbuf
);
(!(stab
= stabent(tokenbuf
,FALSE
)) ||
(*s
== '{' ? !stab_xhash(stab
) : !stab_xarray(stab
)) )) {
continue; /* grandfather @ from old scripts */
if (t
[1] != '{' && (*s
== '[' || *s
== '{' /* }} */ ) &&
(stab
= stabent(tokenbuf
,FALSE
)) &&
((*s
== '[') ? (stab_xarray(stab
) != 0) : (stab_xhash(stab
) != 0)) ) {
s
= scanident(s
,send
,tokenbuf
);
s
= cpytill(tokenbuf
,s
+1,send
,*s
,&len
);
fatal("Unterminated string");
} while (brackets
> 0 && s
< send
);
fatal("Unmatched brackets in string");
if (*nointrp
) { /* we're in a regular expression */
if (*d
== '{' && s
[-1] == '}') { /* maybe {n,m} */
if (isDIGIT(*d
)) { /* matches /^{\d,?\d*}$/ */
s
= checkpoint
; /* Is {n,m}! Backoff! */
else if (*d
== '[' && s
[-1] == ']') { /* char class? */
int weight
= 2; /* let's weigh the evidence */
unsigned char un_char
= 0, last_un_char
;
if (isDIGIT(d
[2]) && !d
[3])
un_char
= (unsigned char)*d
;
weight
-= seen
[un_char
] * 10;
d
= scanident(d
,s
,tokenbuf
);
if (stabent(tokenbuf
,FALSE
))
else if (*d
== '$' && d
[1] &&
index("[#!%*<>()-=",d
[1])) {
if (!d
[2] || /*{*/ index("])} =",d
[2]))
else if (seen
['\''] || seen
['"'])
else if (index("rnftb",d
[1]))
else if (isDIGIT(d
[1])) {
while (d
[1] && isDIGIT(d
[1]))
if (last_un_char
< (unsigned char) d
[1]
if (index("aA01! ",last_un_char
))
if (isALPHA(*d
) && d
[1] && isALPHA(d
[1])) {
if (un_char
== last_un_char
+ 1)
fprintf(stderr
,"[%s] weight %d\n",
if (weight
>= 0) /* probably a character class */
str_ncat(toparse
, "join($\",", 8);
if (t
[1] == '{' && s
[-1] == '}') {
str_ncat(toparse
, t
+2, s
- t
- 3);
str_ncat(toparse
, t
, s
- t
);
str_ncat(toparse
, ")", 1);
if (toparse
->str_ptr
&& *toparse
->str_ptr
== ',') {
str_ncat(toparse
,",$$);",5);
str
->str_u
.str_args
= parselist(toparse
);
str
->str_u
.str_args
->arg_len
--; /* ignore $$ reference */
str
->str_u
.str_args
= Nullarg
;
str
->str_pok
|= SP_INTRP
;
if (!(src
->str_pok
& SP_INTRP
)) {
int oldsave
= savestack
->ary_fill
;
(void)savehptr(&curstash
);
curstash
= curcmd
->c_stash
; /* so stabent knows right package */
s
= src
->str_ptr
; /* assumed valid since str_pok set */
if (src
->str_u
.str_args
) {
(void)eval(src
->str_u
.str_args
,G_ARRAY
,sp
);
/* Assuming we have correct # of args */
elem
= stack
->ary_array
+ sp
;
if (*s
== '$' && s
+1 < send
) {
fatal("panic: unknown interp cookie\n");
if (docase
&& str
->str_cur
>= docase
) {
char *b
= str
->str_ptr
+ --docase
;
lcase(b
, str
->str_ptr
+ str
->str_cur
);
ucase(b
, str
->str_ptr
+ str
->str_cur
);
if (u
) /* note that l & u are independent of L & U */
docase
= str
->str_cur
+ 1;
docase
= L
= U
= l
= u
= 0;
if (!str
|| str
== &str_undef
)
str
->str_u
.str_nval
+= 1.0;
if (!str
->str_pok
|| !*str
->str_ptr
) {
str
->str_u
.str_nval
= 1.0;
str_numset(str
,atof(str
->str_ptr
) + 1.0); /* punt */
while (d
>= str
->str_ptr
) {
/* oh,oh, the number grew */
STR_GROW(str
, str
->str_cur
+ 2);
for (d
= str
->str_ptr
+ str
->str_cur
; d
> str
->str_ptr
; d
--)
if (!str
|| str
== &str_undef
)
str
->str_u
.str_nval
-= 1.0;
str
->str_u
.str_nval
= -1.0;
str_numset(str
,atof(str
->str_ptr
) - 1.0);
/* Make a string that will exist for the duration of the expression
* evaluation. Actually, it may have to last longer than that, but
* hopefully cmd_exec won't free it until it has been assigned to a
static long tmps_size
= -1;
register STR
*str
= Str_new(78,0);
if (++tmps_max
> tmps_size
) {
if (!(tmps_size
& 127)) {
Renew(tmps_list
, tmps_size
+ 128, STR
*);
New(702,tmps_list
, 128, STR
*);
tmps_list
[tmps_max
] = str
;
/* same thing without the copying */
if (!str
|| str
== &str_undef
)
if (++tmps_max
> tmps_size
) {
if (!(tmps_size
& 127)) {
Renew(tmps_list
, tmps_size
+ 128, STR
*);
New(704,tmps_list
, 128, STR
*);
tmps_list
[tmps_max
] = str
;
register STR
*str
= Str_new(79,0);
register STR
*str
= Str_new(80,0);
/* make an exact duplicate of old */
register STR
*new = Str_new(81,0);
if (old
->str_state
== SS_FREE
) {
warn("semi-panic: attempt to dup freed string");
if (old
->str_state
== SS_INCR
&& !(old
->str_pok
& 2))
new->str_ptr
= nsavestr(old
->str_ptr
,old
->str_len
);
new->str_pok
&= ~SP_TEMP
;
if (!*s
) { /* reset ?? searches */
for (spat
= stash
->tbl_spatroot
;
spat
= spat
->spat_next
) {
spat
->spat_flags
&= ~SPAT_USED
;
for (entry
= stash
->tbl_array
[i
];
entry
= entry
->hent_next
) {
stab
= (STAB
*)entry
->hent_val
;
str
->str_tainted
= tainted
;
if (str
->str_ptr
!= Nullch
)
aclear(stab_xarray(stab
));
hclear(stab_xhash(stab
), FALSE
);
fprintf(stderr
,"%s %d %d %d\n",s
,tainted
,uid
, euid
);
if (tainted
&& (!euid
|| euid
!= uid
|| egid
!= gid
|| taintanyway
)) {
envstr
= hfetch(stab_hash(envstab
),"PATH",4,FALSE
);
if (envstr
== &str_undef
|| envstr
->str_tainted
) {
if (envstr
->str_tainted
== 2)
taintproper("Insecure directory in PATH");
taintproper("Insecure PATH");
envstr
= hfetch(stab_hash(envstab
),"IFS",3,FALSE
);
if (envstr
!= &str_undef
&& envstr
->str_tainted
) {
taintproper("Insecure IFS");