#! /usr/local/bin/perl -w
sub T_BACKGROUND
() { 4; }
# ugly, ugly, but makes things faster
my %quotehash = qw
|' ' " " q
( ) qw( ) qq( ) ` `|;
my $def_tokenizer= '(\\s+|\\|\\||\\&\\&|\||=>|->|;;|;|\\&|>>|>|<<|<|\\(|\\)|\\{|\\}|\\[|\\])';
my $nevermatches = "(?!a)a";
$def_quoteexp = $nevermatches;
foreach my $opener (keys %quotehash) {
$def_quoteexp .= '|' . quotemeta($opener);
$quotedquotes{$opener} = quotemeta($quotehash{$opener});
my $stdallinall= "^((?:[^\\\\]|\\\\.)*?)(?:$def_tokenizer|($def_quoteexp))(.*)\$";
$stdallinall= qr{$stdallinall}s;
my ($delimexp,$line,$num,$keep,$unmatched) = @_;
if (!defined($delimexp)) { $delimexp = $def_tokenizer; }
elsif ($delimexp eq ' ') { $delimexp='(\s+)'; }
if (!defined($num)) { $num = -1; }
if (!defined($keep)) { $keep = 1; }
# Remember if delimexp came with any parenthesized subexpr, and
# arrange for it to have exactly one so we know what each piece in
@matches = ('x' =~ m/$delimexp|(.)/);
Carp
::carp
("Delimiter regexp '$delimexp' in decompose may " .
"contain at most 1 ().");
$delimexp = "($delimexp)";
return _decompose
($line, "^((?:[^\\\\]|\\\\.)*?)(?:$delimexp|($def_quoteexp))(.*)\$", $keep, $num, $unmatched, $saveDelimiters-1);
my ( $line, $regexp, $keep, $num, $unmatched, $saveDelimiters)= @_;
if (@pieces == $num) { last; }
# $delimexp is unparenthesized below because we have
# already arranged for it to contain exactly one backref ()
my ($prefix,$delimiter,$quote,$rest) =
if (!$keep and defined($prefix)) {
$prefix= remove_backslash
($prefix);
if (defined($delimiter)) {
$pieces[$#pieces] .= $prefix;
if (length($pieces[$#pieces]) or !$freshPiece) {
push @pieces, $delimiter;
$pieces[$#pieces] = $delimiter;
} elsif (@pieces > 1 or $pieces[0]) {
} elsif (defined($quote)) {
my ($restOfQuote,$remainder) =
($rest =~ m/^((?:[^\\]|\\.)*?)$quotedquotes{$quote}(.*)$/s);
if (defined($restOfQuote)) {
$quote ne "\'" and $quote ne 'q(') {
$restOfQuote= remove_backslash
($restOfQuote);
$pieces[$#pieces]= join('',$pieces[$#pieces],$prefix,
} else { # can't find matching quote, give up
} else { # nothing found, so remainder all one unquoted piece
if (!$keep and length($line)) {
$line= remove_backslash
($line);
if (length($line)) { $pieces[$#pieces] .= $line; }
if (defined($unmatched)) { ${$unmatched} = $uquote; }
return wantarray?
@pieces:\
@pieces;
return 0 unless $line=~/[\[{('"]/s;
my @words= @
{scalar(_decompose
($line,$stdallinall, 1, undef, \
$unmatch))};
if ($unmatch) { return 2; }
my @openstack = (':'); # : is used as a bottom marker here
my %open_of_close = qw
|) ( } { ] [ " '|;
foreach my $word (@words) {
next if length($word)!=1;
if ($word eq '[' or $word eq '{' or $word eq '(' or $word eq '"' or
} elsif ($word eq ')' or $word eq '}' or $word eq ']' or $word eq '"' or
my $open= $open_of_close{$word};
my $curopen = pop @openstack;
if (scalar(@openstack) > 1) { return 1; }
# If we're going to be a shell, let's act like a shell. The idea here
# is to provide expansion functions that individual evaluation
# strategies can use on the argument list to perform operations
# similar to the ones a shell argument list undergoes. Each of these
# functions should take a reference to an array of "words
" and return
# a solid (to be conservative, as opposed to modifying in place) array of
# Bash defines eight types of expansion in its manpage: brace
# expansion, tilde expansion, parameter and variable expansion,
# command substitution, arithmetic expansion, word splitting,
# pathname expansion, and process expansion.
# Of these, arithmetic expansion makes no sense in Perl. Word
# splitting should happen "on the fly
", i.e., the array returned by
# one of these functions might have more elements than the argument
# did. Since the perl builtin "glob" handles brace, tilde and pathname
# expansion, here's a glob_expansion function that covers all of
# those. Also a variable_expansion function that handles substituting
# in the values of Perl variables. That leaves only:
# TODO: command_expansion (i.e., backticks. For this,
# backticks would have to be added to decompose as a recognized quote
# character), process_expansion
# TODO: should some of these line-processing actions happen in a
# uniform way, or should things simply be left to each evaluation strategy
# as psh currently works?
# array glob_expansion (arrayref WORDS)
# For each element x of the array referred to by WORDS, such that x
# is not quoted, push glob(x) onto an array, and return the collected array.
for my $word (@{$arref}) {
if ($word =~ m/['"']/ # if it contains quotes
or ($word !~ m/{.*}|\[.*\]|[*?~]/)) { # or no globbing characters
push @retval, $word; # don't try to glob it
# Glob it. If anything happens, quote the
# results so they won't be clobbbered later.
my @results = Psh::OS::glob($word);
if (scalar(@results) == 0) {
} elsif (scalar(@results)>1 or $results[0] ne $word) {
foreach (@results) { $_ = "'$_'"; }
push @retval, join($join_char, @results);
if (substr($text,0,1) eq '\'' and
substr($text,-1,1) eq '\'') {
$text= substr($text,1,-1);
} elsif ( substr($text,0,1) eq "\"" and
substr($text,-1,1) eq "\"") {
$text= substr($text,1,-1);
} elsif (substr($text,0,1) eq "\\") {
$text=~ s/\\(0[0-7][0-7])/chr(oct($1))/ge;
$text=~ s/\\(x[0-9a-fA-F][0-9a-fA-F])/chr(oct($1))/ge;
if (substr($text,0,1) eq '(' and
substr($text,-1,1) eq ')') {
return substr($text,1,-1);
} elsif (substr($text,0,1) eq '{' and
substr($text,-1,1) eq '}') {
return substr($text,1,-1);
my @tmp= split('=', $tmp); # [out=in] - not supported fully yet
if (@tmp==2 && !$tmp[0]) {
if (ref *{"$Psh::PerlEval
::current_package\
:\
:$_"}{FILEHANDLE}) {
push @result, fileno(*{"$Psh::PerlEval
::current_package\
:\
:$_"});
my @tmpparts= @{scalar(_decompose($line,$stdallinall, 0))};
return @tmpparts if $splitonly;
# Walk through parts and combine parenthesized parts properly
if ($_ eq '[' or $_ eq '(' or $_ eq '{') {
} elsif ($_ eq '}' or $_ eq ')' or $_ eq ']') {
push @parts,join('',@tmp,$_);
while( defined($tmp= shift @parts)) {
if ($tmp eq '||' or $tmp eq '&&') {
push @t, [T_END],[$tmp eq '||'?T_OR:T_AND];
push @tokens, [T_WORD,';'];
if ($tmp=~/^\[(.+?)\]$/) {
if (lc($tmp2) eq 'all') {
push @tokens, [T_REDIRECT, '>&', 2, 1];
@fileno= parse_fileno($tmp2,1,0);
print STDERR "Illegal syntax
\n"; ## FIXME
push @t, [T_REDIRECT, '>&', $fileno[0], 'chainout']; # needs to come first
@tokens=( [T_REDIRECT, '<&', $fileno[1], 'chainin']);
} elsif( $tmp =~ /^(>>?)$/) {
if ($tmp=~/^\[(.+?)\]$/) {
if (lc($tmp2) eq 'all') {
@fileno= parse_fileno($tmp2,1,0);
print STDERR "Illegal syntax
\n"; ## FIXME
last if( $file !~ /^\s+$/);
if( !$file or substr($file,0,1) eq '&') {
Psh::Util::print_error_i18n('redirect_file_missing',
push @tokens, [T_REDIRECT,$tmp,$fileno[0],unquote($file)];
push @tokens, [T_REDIRECT, '>&', @fileno];
push @tokens, [T_REDIRECT, '>&', 2, 1];
if ($tmp=~/^\[(.+?)\]$/) {
@fileno= parse_fileno($1,0,0);
print STDERR "Illegal syntax
\n"; ## FIXME
last if( $file !~ /^\s+$/);
if( !$file or substr($file,0,1) eq '&') {
Psh::Util::print_error_i18n('redirect_file_missing',
push @tokens, [T_REDIRECT,'<',$fileno[1],unquote($file)];
push @tokens, [T_REDIRECT,'<&',$fileno[1],$fileno[0]];
push @t, [T_BACKGROUND],[T_END];
while ( (my $tmp2= shift @parts) ne '`' ) {
$tmp= Psh::OS::backtick($tmp);
push @tokens, [T_WORD, join('','"', $tmp,'"')];
} elsif( $tmp=~ /^\s+$/) {
push @tokens, [T_WORD,$tmp];
return () if substr($line,0,1) eq '#';
($lvl1,$lvl2,$lvl3)= Psh::Strategy::parser_return_objects(@use_strats);
} elsif (@Psh::temp_use_strats) {
($lvl1,$lvl2,$lvl3)= Psh::Strategy::parser_return_objects(@Psh::temp_use_strats);
($lvl1,$lvl2,$lvl3)= Psh::Strategy::parser_strategy_list();
foreach my $strategy (@$lvl1) {
$strategy->applies(\$line);
my $name= $strategy->name;
Psh::Util::print_debug_class('s',
"[Using strategy
$name: $how]\n");
return ([ T_EXECUTE, 1, [$strategy, $how, [], [$line], $line ]]);
die "Level
2 Strategies currently
not supported
!";
my @tokens= make_tokens( $line);
$element=parse_complex_command(\@tokens,$lvl3);
return undef if ! defined( $element); # TODO: Error handling
push @elements, $element;
if ($tokens[0][0] == T_END) {
if ($tokens[0][0] == T_AND) {
push @elements, [ T_AND ];
} elsif ($tokens[0][0] == T_OR) {
push @elements, [ T_OR ];
sub parse_complex_command {
return [ T_EXECUTE, $foreground, _subparse_complex_command($tokens,$strategies,\$piped,\$foreground,{})];
sub _subparse_complex_command {
my ($tokens,$use_strats,$piped,$foreground,$alias_disabled)=@_;
my @simplecommands= parse_simple_command($tokens,$use_strats, $piped,$alias_disabled,$foreground);
while (@$tokens > 0 && $tokens->[0][0] == T_PIPE) {
push @simplecommands, parse_simple_command($tokens,$use_strats,$piped,$alias_disabled,$foreground);
if (@$tokens > 0 && $tokens->[0][0] == T_BACKGROUND) {
sub parse_simple_command {
my ($tokens,$use_strats,$piped,$alias_disabled,$foreground)=@_;
my (@words,@options,@savetokens,@precom);
($tokens->[0][0] == T_WORD or
$tokens->[0][0] == T_REDIRECT)) {
my $token = shift @$tokens;
if ($token->[0] == T_WORD) {
($token->[1] eq 'noglob' or
$token->[1] eq 'noexpand' or
$token->[1] eq 'noalias')) {
push @words, $token->[1];
} elsif ($token->[0] == T_REDIRECT) {
if (%Psh::Support::Alias::aliases and
$Psh::Support::Alias::aliases{$words[0]} and
!$alias_disabled->{$words[0]}) {
my $alias= $Psh::Support::Alias::aliases{$words[0]};
$alias_disabled->{$words[0]}=1;
my @tmp= make_tokens($alias);
return _subparse_complex_command(\@tmp,$use_strats,$piped,$foreground,$alias_disabled);
} elsif (substr($words[0],0,1) eq "\\") {
$words[0]=substr($words[0],1);
my $line= join ' ', @words;
local $Psh::current_options= $opt;
foreach my $strat (@$use_strats) {
$strat->applies(\$line,\@words,$$piped);
Psh::Util::print_debug_class('s',
"[Using strategy
$name: $how]\n");
return [ $strat, $how, \@options, \@words, $line, $opt];
Psh::Util::print_error_i18n('clueless',$line,$Psh::bin);
# TODO: right now this is pretty much of a hack. Could it be improved?
# For example, 'print hello \n' on the command line gets double
# quotes around hello and \n, so that it ends up doing
# print("hello
","\n") which looks nice but is a surprise to
# bash users. Perhaps backslash escapes simply shouldn't be OK?
return if !defined($word) or !$word;
if ($word =~ m/[a-zA-Z]/ # if it has some letters
and $word =~ m!^(\\.|[$.:a-zA-Z0-9/.])*$!) { # and only these characters
return 1; # then double-quote it
Psh::Parser - Perl Shell Parser
array decompose(regexp DELIMITER, string LINE, int PIECES,
bool KEEP, hashref QUOTINGPAIRS,
scalarref UNMATCHED_QUOTE)
decompose is a cross between split() and
Text::ParseWords::parse_line: it breaks LINE into at most PIECES
pieces separated by DELIMITER, except that the hash given by the
reference QUOTINGPAIRS specifies pairs of quotes (each key is an
open quote which matches the corresponding value) which prevent
splitting on internal instances of DELIMITER, and negate the effect
of other quotes. The quoting characters are retained if KEEP is
true, discarded otherwise. Matches to the regexp METACHARACTERS
(outside quotes) are their own words, regardless of being delimited.
Backslashes escape the meanings of characters that might match
delimiters, quotes, or metacharacters. Initial unquoted empty
The regexp DELIMITER may contain a single back-reference parenthesis
construct, in which case the matches to the parenthesized
subexpression are also placed among the pieces, as with the
built-in split. METACHARACTERS may not contain any parenthesized
decompose returns the array of pieces. If UNMATCHED_QUOTE is
specified, 1 will be placed in the scalar referred to if LINE
contained an unmatched quote, 0 otherwise.
If PIECES is undefined, as many pieces as
necessary are used. KEEP defaults to 1. If QUOTINGPAIRS is
undefined, {"'" => "'", "\"" => "\""} is used, i.e. single and
double quotes are recognized. Supply a reference to an empty hash to
have no quoting characters. METACHARACTERS defaults to a regexp that
EXAMPLE: if $line is exactly
echo fred(joe, "Happy Days
", ' steve"jan
', "\"Oh, no!\"")
then decompose(' ', $line) should break it at the
following places marked by vertical bars:
echo|fred(joe,|"Happy Days",|' steve
"jan',|"\"Oh
, no!\"")
int incomplete_expr(string LINE)
Returns 2 if LINE has unmatched quotations. Returns -1 if LINE has
mismatched parens. Otherwise, returns 1 if LINE has an unmatched
open brace, parenthesis, or square bracket and 0 in all other
cases. Summing up, negative is a mismatch, 0 is all OK, and positive
is unfinished business. (Reasonably good, can be fooled with some
effort. I therefore have deliberately not taken comments into
account, which means you can use them to "unfool
" this function, but
also that unmatched stuff in comments WILL fool this function.)
string unquote( string word)
Removes quotes from a word and backslash escapes
bool needs_double_quotes (string WORD)
Returns true if WORD needs double quotes around it to be interpreted
in a "shell
-like
" manner when passed to eval. This covers barewords,
expressions that just have \-escapes and $variables in them, and