* Copyright (c) 1983 Regents of the University of California.
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
static char sccsid
[] = "@(#)symbols.c 5.5 (Berkeley) %G%";
static char rcsid
[] = "$Header: symbols.c,v 1.4 88/04/02 01:29:03 donn Exp $";
typedef struct Symbol
*Symbol
;
BADUSE
, CONST
, TYPE
, VAR
, ARRAY
, OPENARRAY
, DYNARRAY
, SUBARRAY
,
PROC
, FUNC
, FVAR
, REF
, PTR
, FILET
, SET
, RANGE
,
LABEL
, WITHPTR
, SCAL
, STR
, PROG
, IMPROPER
, VARNT
,
FPROC
, FFUNC
, MODULE
, TAG
, COMMON
, EXTREF
, TYPEREF
typedef enum { R_CONST
, R_TEMP
, R_ARG
, R_ADJUST
} Rangetype
;
typedef unsigned integer Storage
;
unsigned int level
: 6; /* for variables stored on stack only */
Node constval
; /* value of constant symbol */
int offset
; /* variable address */
long iconval
; /* integer constant value */
double fconval
; /* floating constant value */
int ndims
; /* no. of dimensions for dynamic/sub-arrays */
struct { /* field offset and size (both in bits) */
struct { /* common offset and chain; used to relocate */
int offset
; /* vars in global BSS */
struct { /* range bounds */
Rangetype lowertype
: 16;
Rangetype uppertype
: 16;
int offset
: 16; /* offset for of function value */
Boolean src
: 1; /* true if there is source line info */
Boolean
inline : 1; /* true if no separate act. rec. */
Boolean intern
: 1; /* internal calling sequence */
Address beginaddr
; /* address of function code */
struct { /* variant record info */
String typeref
; /* type defined by "<module>:<type>" */
Symbol extref
; /* indirect symbol for external reference */
Symbol block
; /* symbol containing this symbol */
Symbol next_sym
; /* hash chain */
#define symname(s) ident(s->name)
#define codeloc(f) ((f)->symvalue.funcv.beginaddr)
#define isblock(s) (Boolean) ( \
s->class == FUNC or s->class == PROC or \
s->class == MODULE or s->class == PROG \
#define isroutine(s) (Boolean) ( \
s->class == FUNC or s->class == PROC \
#define nosource(f) (not (f)->symvalue.funcv.src)
#define isinline(f) ((f)->symvalue.funcv.inline)
#define isreg(s) (s->storage == INREG)
* Some macros to make finding a symbol with certain attributes.
#define find(s, withname) \
while (s != nil and not (s->name == (withname) and
#define where /* qualification */
#define endfind(s) )) { \
* Symbol table structure currently does not support deletions.
* Hash table size is a power of two to make hashing faster.
* Using a non-prime is ok since we aren't doing rehashing.
#define HASHTABLESIZE 8192
private Symbol hashtab
[HASHTABLESIZE
];
#define hash(name) ((((unsigned) name) >> 2) & (HASHTABLESIZE - 1))
#define SYMBLOCKSIZE 1000
struct Symbol sym
[SYMBLOCKSIZE
];
struct Sympool
*prevpool
;
private Sympool sympool
= nil
;
private Integer nleft
= 0;
public Symbol
symbol_alloc()
register Sympool newpool
;
bzero(newpool
, sizeof(*newpool
));
newpool
->prevpool
= sympool
;
return &(sympool
->sym
[nleft
]);
public symbol_dump (func
)
printf(" symbols in %s \n",symname(func
));
for (i
= 0; i
< HASHTABLESIZE
; i
++) {
for (s
= hashtab
[i
]; s
!= nil
; s
= s
->next_sym
) {
* Free all the symbols currently allocated.
for (i
= 0; i
< HASHTABLESIZE
; i
++) {
* Create a new symbol with the given attributes.
public Symbol
newSymbol(name
, blevel
, class, type
, chain
)
* Insert a symbol into the hash table.
public Symbol
insert(name
)
s
->next_sym
= hashtab
[h
];
public Symbol
lookup(name
)
while (s
!= nil
and s
->name
!= name
) {
* Delete a symbol from the symbol table.
panic("delete of non-symbol '%s'", symname(s
));
hashtab
[h
] = s
->next_sym
;
while (t
->next_sym
!= s
) {
panic("delete of non-symbol '%s'", symname(s
));
t
->next_sym
= s
->next_sym
;
* Dump out all the variables associated with the given
* procedure, function, or program associated with the given stack frame.
* This is quite inefficient. We traverse the entire symbol table
* each time we're called. The assumption is that this routine
* won't be called frequently enough to merit improved performance.
public dumpvars(f
, frame
)
for (i
= 0; i
< HASHTABLESIZE
; i
++) {
for (s
= hashtab
[i
]; s
!= nil
; s
= s
->next_sym
) {
} else if (s
->class == MODULE
) {
* Builtin types are circular in that btype->type->type = btype.
private Symbol
maketype(name
, lower
, upper
)
n
= identname(name
, true);
s
->type
= newSymbol(nil
, 0, RANGE
, s
, nil
);
s
->type
->symvalue
.rangev
.lower
= lower
;
s
->type
->symvalue
.rangev
.upper
= upper
;
* Create the builtin symbols.
t_boolean
= maketype("$boolean", 0L, 1L);
t_int
= maketype("$integer", 0x80000000L
, 0x7fffffffL
);
t_char
= maketype("$char", 0L, 255L);
t_real
= maketype("$real", 8L, 0L);
t_nil
= maketype("$nil", 0L, 0L);
t_addr
= insert(identname("$address", true));
t_addr
->language
= primlang
;
t_addr
->type
= newSymbol(nil
, 1, PTR
, t_int
, nil
);
s
= insert(identname("true", true));
s
->symvalue
.constval
= build(O_LCON
, 1L);
s
->symvalue
.constval
->nodetype
= t_boolean
;
s
= insert(identname("false", true));
s
->symvalue
.constval
= build(O_LCON
, 0L);
s
->symvalue
.constval
->nodetype
= t_boolean
;
* Reduce type to avoid worrying about type names.
public Symbol
rtype(type
)
if (t
->class == VAR
or t
->class == CONST
or
t
->class == FIELD
or t
->class == REF
if (t
->class == TYPEREF
) {
while (t
->class == TYPE
or t
->class == TAG
) {
if (t
->class == TYPEREF
) {
* Find the end of a module name. Return nil if there is none
private String
findModuleMark (s
)
* Resolve a type reference by modifying to be the appropriate type.
* If the reference has a name, then it refers to an opaque type and
* the actual type is directly accessible. Otherwise, we must use
* the type reference string, which is of the form "module:{module:}name".
start
= t
->symvalue
.typeref
;
p
= findModuleMark(start
);
n
= identname(start
, true);
find(m
, n
) where m
->block
== outer
endfind(m
);
p
= findModuleMark(start
);
n
= identname(start
, true);
find(s
, n
) where s
->block
== outer
endfind(s
);
if (s
!= nil
and s
->type
!= nil
) {
t
->class = s
->type
->class;
t
->chain
= s
->type
->chain
;
t
->symvalue
= s
->type
->symvalue
;
t
->block
= s
->type
->block
;
public integer
regnum (s
)
if (s
->storage
== INREG
) {
public Symbol
container(s
)
error("[internal error: constval(non-CONST)]");
return s
->symvalue
.constval
;
* Return the object address of the given symbol.
* There are the following possibilities:
* globals - just take offset
* locals - take offset from locals base
* arguments - take offset from argument base
* register - offset is register number
#define isglobal(s) (s->storage == EXT)
#define islocaloff(s) (s->storage == STK and s->symvalue.offset < 0)
#define isparamoff(s) (s->storage == STK and s->symvalue.offset >= 0)
public Address
address (s
, frame
)
if (not isactive(s
->block
)) {
error("\"%s\" is not currently defined", symname(s
));
} else if (isglobal(s
)) {
addr
= s
->symvalue
.offset
;
while (cur
!= nil
and cur
->class == MODULE
) {
error("[internal error: unexpected nil frame for \"%s\"]",
addr
= locals_base(frp
) + s
->symvalue
.offset
;
} else if (isparamoff(s
)) {
addr
= args_base(frp
) + s
->symvalue
.offset
;
addr
= savereg(s
->symvalue
.offset
, frp
);
panic("address: bad symbol \"%s\"", symname(s
));
* Define a symbol used to access register values.
s
->language
= t_addr
->language
;
* Resolve an "abstract" type reference.
* It is possible in C to define a pointer to a type, but never define
* the type in a particular source file. Here we try to resolve
* the type definition. This is problematic, it is possible to
* have multiple, different definitions for the same name type.
register Symbol t
, u
, prev
;
while (u
!= nil
and u
->class != BADUSE
) {
error("couldn't find link to type reference");
t
!= prev
and t
->name
== prev
->name
and
t
->block
->class == MODULE
and t
->class == prev
->class and
t
->type
!= nil
and t
->type
->type
!= nil
and
t
->type
->type
->class != BADUSE
error("couldn't resolve reference");
* Find the size in bytes of the given type.
* This is probably the WRONG thing to do. The size should be kept
* as an attribute in the symbol information as is done for structures
* and fields. I haven't gotten around to cleaning this up yet.
public findbounds (u
, lower
, upper
)
lbt
= u
->symvalue
.rangev
.lowertype
;
ubt
= u
->symvalue
.rangev
.uppertype
;
lb
= u
->symvalue
.rangev
.lower
;
ub
= u
->symvalue
.rangev
.upper
;
if (lbt
== R_ARG
or lbt
== R_TEMP
) {
if (not getbound(u
, lb
, lbt
, lower
)) {
error("dynamic bounds not currently available");
if (ubt
== R_ARG
or ubt
== R_TEMP
) {
if (not getbound(u
, ub
, ubt
, upper
)) {
error("dynamic bounds not currently available");
} else if (u
->class == SCAL
) {
*upper
= u
->symvalue
.iconval
- 1;
error("[internal error: unexpected array bound type]");
register integer nel
, elsize
;
if (t
->class == TYPEREF
) {
lower
= t
->symvalue
.rangev
.lower
;
upper
= t
->symvalue
.rangev
.upper
;
if (upper
== 0 and lower
> 0) {
} else if (lower
> upper
) {
(lower
>= MINCHAR
and upper
<= MAXCHAR
) or
(lower
>= 0 and upper
<= MAXUCHAR
)
(lower
>= MINSHORT
and upper
<= MAXSHORT
) or
(lower
>= 0 and upper
<= MAXUSHORT
)
for (t
= t
->chain
; t
!= nil
; t
= t
->chain
) {
findbounds(u
, &lower
, &upper
);
r
= (t
->symvalue
.ndims
+ 1) * sizeof(Word
);
r
= (2 * t
->symvalue
.ndims
+ 1) * sizeof(Word
);
if (r < sizeof(Word) and isparam(t)) {
* This causes problems on the IRIS because of the compiler bug
* with stab offsets for parameters. Not sure it's really
if (t
->type
->class == PTR
and t
->type
->type
->class == BADUSE
) {
off
= t
->symvalue
.field
.offset
;
len
= t
->symvalue
.field
.length
;
r
= (off
+ len
+ 7) div
8 - (off div
8);
if (r
== 0 and t
->chain
!= nil
) {
panic("missing size information for record");
if (t->symvalue.iconval > 255) {
r
= u
->symvalue
.rangev
.upper
- u
->symvalue
.rangev
.lower
+ 1;
error("expected range for set base type");
r
= (r
+ BITSPERBYTE
- 1) div BITSPERBYTE
;
* These can happen in C (unfortunately) for unresolved type references
* Assume they are pointers.
if (ord(t
->class) > ord(TYPEREF
)) {
panic("size: bad class (%d)", ord(t
->class));
fprintf(stderr
, "can't compute size of a %s\n", classname(t
));
* Return the size associated with a symbol that takes into account
* reference parameters. This might be better as the normal size function, but
* too many places already depend on it working the way it does.
if (t
->class == OPENARRAY
) {
r
= (t
->symvalue
.ndims
+ 1) * sizeof(Word
);
} else if (t
->class == SUBARRAY
) {
r
= (2 * t
->symvalue
.ndims
+ 1) * sizeof(Word
);
* Test if a symbol is a parameter. This is true if there
* is a cycle from s->block to s via chain pointers.
public Boolean
isparam(s
)
while (t
!= nil
and t
!= s
) {
return (Boolean
) (t
!= nil
);
* Test if a type is an open array parameter type.
public boolean
isopenarray (type
)
return (boolean
) (t
->class == OPENARRAY
);
* Test if a symbol is a var parameter, i.e. has class REF.
public Boolean
isvarparam(s
)
return (Boolean
) (s
->class == REF
);
* Test if a symbol is a variable (actually any addressible quantity
public Boolean
isvariable(s
)
return (Boolean
) (s
->class == VAR
or s
->class == FVAR
or s
->class == REF
);
* Test if a symbol is a constant.
public Boolean
isconst(s
)
return (Boolean
) (s
->class == CONST
);
* Test if a symbol is a module.
public Boolean
ismodule(s
)
return (Boolean
) (s
->class == MODULE
);
* Mark a procedure or function as internal, meaning that it is called
* with a different calling sequence.
s
->symvalue
.funcv
.intern
= true;
public boolean
isinternal (s
)
return s
->symvalue
.funcv
.intern
;
* Decide if a field begins or ends on a bit rather than byte boundary.
public Boolean
isbitfield(s
)
register integer off
, len
;
off
= s
->symvalue
.field
.offset
;
len
= s
->symvalue
.field
.length
;
if ((off mod BITSPERBYTE
) != 0 or (len mod BITSPERBYTE
) != 0) {
(t
->class == SCAL
and len
!= (sizeof(int)*BITSPERBYTE
)) or
len
!= (size(t
)*BITSPERBYTE
)
private boolean
primlang_typematch (t1
, t2
)
t1
->class == RANGE
and t2
->class == RANGE
and
t1
->symvalue
.rangev
.lower
== t2
->symvalue
.rangev
.lower
and
t1
->symvalue
.rangev
.upper
== t2
->symvalue
.rangev
.upper
t1
->class == PTR
and t2
->class == RANGE
and
t2
->symvalue
.rangev
.upper
>= t2
->symvalue
.rangev
.lower
t2
->class == PTR
and t1
->class == RANGE
and
t1
->symvalue
.rangev
.upper
>= t1
->symvalue
.rangev
.lower
* Test if two types match.
* Equivalent names implies a match in any language.
* Special symbols must be handled with care.
public Boolean
compatible(t1
, t2
)
} else if (t1
== nil
or t2
== nil
) {
} else if (t1
== procsym
) {
} else if (t2
== procsym
) {
} else if (t1
->language
== nil
) {
if (t2
->language
== nil
) {
} else if (t2
->language
== primlang
) {
b
= (boolean
) primlang_typematch(rtype(t1
), rtype(t2
));
b
= (boolean
) (*language_op(t2
->language
, L_TYPEMATCH
))(t1
, t2
);
} else if (t1
->language
== primlang
) {
if (t2
->language
== primlang
or t2
->language
== nil
) {
b
= primlang_typematch(rtype(t1
), rtype(t2
));
b
= (boolean
) (*language_op(t2
->language
, L_TYPEMATCH
))(t1
, t2
);
b
= (boolean
) (*language_op(t1
->language
, L_TYPEMATCH
))(t1
, t2
);
* Check for a type of the given name.
public Boolean
istypename(type
, name
)
t
->class == TYPE
and streq(ident(t
->name
), name
)
* Determine if a (value) parameter should actually be passed by address.
public boolean
passaddr (p
, exprtype
)
def
= findlanguage(".c");
b
= (boolean
) (*language_op(def
, L_PASSADDR
))(p
, exprtype
);
} else if (p
->language
== nil
or p
->language
== primlang
) {
} else if (isopenarray(p
->type
)) {
b
= (boolean
) (*language_op(p
->language
, L_PASSADDR
))(p
, exprtype
);
* Test if the name of a symbol is uniquely defined or not.
public Boolean
isambiguous(s
)
find(t
, s
->name
) where t
!= s
endfind(t
);
return (Boolean
) (t
!= nil
);
#define nextarg(type) ((type *) (ap += sizeof(type)))[-1]
private Symbol
mkstring();
* Determine the type of a parse tree.
* Also make some symbol-dependent changes to the tree such as
* removing indirection for constant or register symbols.
p
->nodetype
= p
->value
.sym
;
p
->nodetype
= mkstring(p
->value
.scon
);
fprintf(stderr
, "\" is not a pointer");
p
->nodetype
= rtype(p1
->nodetype
)->type
;
p
->nodetype
= p
->value
.arg
[1]->value
.sym
;
p
->nodetype
= p1
->nodetype
;
if (p1
->nodetype
->class == PROC
or p
->nodetype
->class == FUNC
) {
p
->value
.sym
= p1
->value
.sym
;
p
->nodetype
= p1
->nodetype
;
} else if (p1
->value
.sym
->class == CONST
) {
p
->nodetype
= p1
->nodetype
;
} else if (isreg(p1
->value
.sym
)) {
p
->value
.sym
= p1
->value
.sym
;
} else if (p1
->op
== O_INDIR
and p1
->value
.arg
[0]->op
== O_SYM
) {
s
= p1
->value
.arg
[0]->value
.sym
;
dispose(p1
->value
.arg
[0]);
p
->nodetype
= p
->value
.arg
[0]->nodetype
;
p
->nodetype
= rtype(p1
->nodetype
)->type
;
p
->nodetype
= p
->value
.arg
[1]->nodetype
;
s
= p
->value
.arg
[0]->nodetype
;
if (not compatible(s
, t_int
)) {
if (not compatible(s
, t_real
)) {
prtree(stderr
, p
->value
.arg
[0]);
fprintf(stderr
, "\" is improper type");
convert(&(p
->value
.arg
[0]), t_real
, O_ITOF
);
convert(&(p
->value
.arg
[1]), t_real
, O_ITOF
);
convert(&(p
->value
.arg
[0]), t_int
, O_NOP
);
convert(&(p
->value
.arg
[1]), t_int
, O_NOP
);
chkboolean(p
->value
.arg
[0]);
chkboolean(p
->value
.arg
[1]);
* Process a binary arithmetic or relational operator.
* Convert from integer to real if necessary.
t1
= rtype(p1
->nodetype
);
t2
= rtype(p2
->nodetype
);
t1real
= compatible(t1
, t_real
);
t2real
= compatible(t2
, t_real
);
p
->op
= (Operator
) (ord(p
->op
) + 1);
p
->value
.arg
[0] = build(O_ITOF
, p1
);
p
->value
.arg
[1] = build(O_ITOF
, p2
);
if (size(p1
->nodetype
) > sizeof(integer
)) {
fprintf(stderr
, "operation not defined on \"");
} else if (size(p2
->nodetype
) > sizeof(integer
)) {
fprintf(stderr
, "operation not defined on \"");
* Convert a tree to a type via a conversion operator;
* if this isn't possible generate an error.
private convert(tp
, typeto
, op
)
s
= rtype(tree
->nodetype
);
if (compatible(t
, t_real
) and compatible(s
, t_int
)) {
/* we can convert int => floating but not the reverse */
} else if (not compatible(s
, t
)) {
fprintf(stderr
, ": illegal type in operation");
* Construct a node for the dot operator.
* If the left operand is not a record, but rather a procedure
* or function, then we interpret the "." as referencing an
* "invisible" variable; i.e. a variable within a dynamically
* active block but not within the static scope of the current procedure.
public Node
dot(record
, fieldname
)
if (isblock(rec
->nodetype
)) {
s
->block
== rec
->nodetype
and
fprintf(stderr
, "\"%s\" is not defined in ", ident(fieldname
));
printname(stderr
, rec
->nodetype
);
s
= findfield(fieldname
, t
->type
);
s
= findfield(fieldname
, t
);
fprintf(stderr
, "\"%s\" is not a field in ", ident(fieldname
));
if (t
->class != PTR
or isreg(rec
->nodetype
)) {
p
= build(O_DOT
, p
, build(O_SYM
, s
));
* Return a tree corresponding to an array reference and do the
public Node
subscript(a
, slist
)
if (t
->language
== nil
or t
->language
== primlang
) {
p
= (Node
) (*language_op(findlanguage(".s"), L_BUILDAREF
))(a
, slist
);
p
= (Node
) (*language_op(t
->language
, L_BUILDAREF
))(a
, slist
);
* Evaluate a subscript index.
public int evalindex(s
, base
, i
)
if (t
->language
== nil
or t
->language
== primlang
) {
r
= ((*language_op(findlanguage(".s"), L_EVALAREF
)) (s
, base
, i
));
r
= ((*language_op(t
->language
, L_EVALAREF
)) (s
, base
, i
));
* Check to see if a tree is boolean-valued, if not it's an error.
if (p
->nodetype
!= t_boolean
) {
fprintf(stderr
, "found ");
fprintf(stderr
, ", expected boolean expression");
* Construct a node for the type of a string.
private Symbol
mkstring(str
)
s
= newSymbol(nil
, 0, ARRAY
, t_char
, nil
);
s
->chain
= newSymbol(nil
, 0, RANGE
, t_int
, nil
);
s
->chain
->language
= s
->language
;
s
->chain
->symvalue
.rangev
.lower
= 1;
s
->chain
->symvalue
.rangev
.upper
= strlen(str
) + 1;
* Free up the space allocated for a string type.
* Figure out the "current" variable or function being referred to
private boolean
stwhich(), dynwhich();
error("\"%s\" is not defined", ident(n
));
} else if (not stwhich(&s
) and isambiguous(s
) and not dynwhich(&s
)) {
private boolean
stwhich (var_s
)
Name n
; /* name of desired symbol */
Symbol s
; /* iteration variable for symbols with name n */
Symbol f
; /* iteration variable for blocks containing s */
integer count
; /* number of levels from s->block to curfunc */
Symbol t
; /* current best answer for stwhich(n) */
integer mincount
; /* relative level for current best answer (t) */
boolean b
; /* return value, true if symbol found */
mincount
= 10000; /* force first match to set mincount */
if (s
->name
== n
and s
->class != FIELD
and s
->class != TAG
) {
while (f
!= nil
and f
!= s
->block
) {
if (f
!= nil
and count
< mincount
) {
private boolean
dynwhich (var_s
)
Name n
; /* name of desired symbol */
Symbol s
; /* iteration variable for possible symbols */
Symbol f
; /* iteration variable for active functions */
Frame frp
; /* frame associated with stack walk */
boolean b
; /* return value */
s
->name
!= n
or s
->block
!= f
or
s
->class == FIELD
or s
->class == TAG
* Find the symbol that has the same name and scope as the
* given symbol but is of the given field. Return nil if there is none.
public Symbol
findfield (fieldname
, record
)
t
= rtype(record
)->chain
;
while (t
!= nil
and t
->name
!= fieldname
) {
public Boolean
getbound(s
,off
,type
,valp
)
if (not isactive(s
->block
)) {
while (cur
!= nil
and cur
->class == MODULE
) { /* WHY*/
if(type
== R_TEMP
) addr
= locals_base(frp
) + off
;
else if (type
== R_ARG
) addr
= args_base(frp
) + off
;
dread(valp
,addr
,sizeof(long));