# -*- coding: iso-8859-1 -*-
"""A lexical analyzer class for simple shell-like syntaxes."""
# Module and documentation by Eric S. Raymond, 21 Dec 1998
# Input stacking and error message cleanup added by ESR, March 2000
# push_source() and pop_source() made explicit by ESR, January 2001.
# Posix compliance, split(), string arguments, and
# iterator interface by Gustavo Niemeyer, April 2003.
from collections
import deque
from cStringIO
import StringIO
from StringIO
import StringIO
__all__
= ["shlex", "split"]
"A lexical analyzer class for simple shell-like syntaxes."
def __init__(self
, instream
=None, infile
=None, posix
=False):
if isinstance(instream
, basestring
):
instream
= StringIO(instream
)
self
.instream
= sys
.stdin
self
.wordchars
= ('abcdfeghijklmnopqrstuvwxyz'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
self
.wordchars
+= ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
self
.whitespace
= ' \t\r\n'
self
.whitespace_split
= False
print 'shlex: reading from %s, line %d' \
% (self
.instream
, self
.lineno
)
def push_token(self
, tok
):
"Push a token onto the stack popped by the get_token method"
print "shlex: pushing token " + repr(tok
)
self
.pushback
.appendleft(tok
)
def push_source(self
, newstream
, newfile
=None):
"Push an input source onto the lexer's input source stack."
if isinstance(newstream
, basestring
):
newstream
= StringIO(newstream
)
self
.filestack
.appendleft((self
.infile
, self
.instream
, self
.lineno
))
self
.instream
= newstream
print 'shlex: pushing to file %s' % (self
.infile
,)
print 'shlex: pushing to stream %s' % (self
.instream
,)
"Pop the input source stack."
(self
.infile
, self
.instream
, self
.lineno
) = self
.filestack
.popleft()
print 'shlex: popping to %s, line %d' \
% (self
.instream
, self
.lineno
)
"Get a token from the input stream (or from stack if it's nonempty)"
tok
= self
.pushback
.popleft()
print "shlex: popping token " + repr(tok
)
# No pushback. Get a token.
if self
.source
is not None:
while raw
== self
.source
:
spec
= self
.sourcehook(self
.read_token())
(newfile
, newstream
) = spec
self
.push_source(newstream
, newfile
)
# Maybe we got EOF instead?
# Neither inclusion nor EOF
print "shlex: token=" + repr(raw
)
nextchar
= self
.instream
.read(1)
self
.lineno
= self
.lineno
+ 1
print "shlex: in state", repr(self
.state
), \
"I see character:", repr(nextchar
)
self
.token
= '' # past end of file
self
.state
= None # end of file
elif nextchar
in self
.whitespace
:
print "shlex: I see whitespace in whitespace state"
if self
.token
or (self
.posix
and quoted
):
break # emit current token
elif nextchar
in self
.commenters
:
self
.lineno
= self
.lineno
+ 1
elif self
.posix
and nextchar
in self
.escape
:
elif nextchar
in self
.wordchars
:
elif nextchar
in self
.quotes
:
elif self
.whitespace_split
:
if self
.token
or (self
.posix
and quoted
):
break # emit current token
elif self
.state
in self
.quotes
:
if not nextchar
: # end of file
print "shlex: I see EOF in quotes state"
# XXX what error should be raised here?
raise ValueError, "No closing quotation"
if nextchar
== self
.state
:
self
.token
= self
.token
+ nextchar
elif self
.posix
and nextchar
in self
.escape
and \
self
.state
in self
.escapedquotes
:
escapedstate
= self
.state
self
.token
= self
.token
+ nextchar
elif self
.state
in self
.escape
:
if not nextchar
: # end of file
print "shlex: I see EOF in escape state"
# XXX what error should be raised here?
raise ValueError, "No escaped character"
# In posix shells, only the quote itself or the escape
# character may be escaped within quotes.
if escapedstate
in self
.quotes
and \
nextchar
!= self
.state
and nextchar
!= escapedstate
:
self
.token
= self
.token
+ self
.state
self
.token
= self
.token
+ nextchar
self
.state
= escapedstate
self
.state
= None # end of file
elif nextchar
in self
.whitespace
:
print "shlex: I see whitespace in word state"
if self
.token
or (self
.posix
and quoted
):
break # emit current token
elif nextchar
in self
.commenters
:
self
.lineno
= self
.lineno
+ 1
if self
.token
or (self
.posix
and quoted
):
break # emit current token
elif self
.posix
and nextchar
in self
.quotes
:
elif self
.posix
and nextchar
in self
.escape
:
elif nextchar
in self
.wordchars
or nextchar
in self
.quotes \
or self
.whitespace_split
:
self
.token
= self
.token
+ nextchar
self
.pushback
.appendleft(nextchar
)
print "shlex: I see punctuation in word state"
break # emit current token
if self
.posix
and not quoted
and result
== '':
print "shlex: raw token=" + repr(result
)
print "shlex: raw token=EOF"
def sourcehook(self
, newfile
):
"Hook called on a filename to be sourced."
# This implements cpp-like semantics for relative-path inclusion.
if isinstance(self
.infile
, basestring
) and not os
.path
.isabs(newfile
):
newfile
= os
.path
.join(os
.path
.dirname(self
.infile
), newfile
)
return (newfile
, open(newfile
, "r"))
def error_leader(self
, infile
=None, lineno
=None):
"Emit a C-compiler-like, Emacs-friendly error-message leader."
return "\"%s\", line %d: " % (infile
, lineno
)
def split(s
, comments
=False):
lex
= shlex(s
, posix
=True)
lex
.whitespace_split
= True
if __name__
== '__main__':
lexer
= shlex(open(file), file)
print "Token: " + repr(tt
)