Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | """Module symbol-table generator""" |
2 | ||
3 | from compiler import ast | |
4 | from compiler.consts import SC_LOCAL, SC_GLOBAL, SC_FREE, SC_CELL, SC_UNKNOWN | |
5 | from compiler.misc import mangle | |
6 | import types | |
7 | ||
8 | ||
9 | import sys | |
10 | ||
11 | MANGLE_LEN = 256 | |
12 | ||
13 | class Scope: | |
14 | # XXX how much information do I need about each name? | |
15 | def __init__(self, name, module, klass=None): | |
16 | self.name = name | |
17 | self.module = module | |
18 | self.defs = {} | |
19 | self.uses = {} | |
20 | self.globals = {} | |
21 | self.params = {} | |
22 | self.frees = {} | |
23 | self.cells = {} | |
24 | self.children = [] | |
25 | # nested is true if the class could contain free variables, | |
26 | # i.e. if it is nested within another function. | |
27 | self.nested = None | |
28 | self.generator = None | |
29 | self.klass = None | |
30 | if klass is not None: | |
31 | for i in range(len(klass)): | |
32 | if klass[i] != '_': | |
33 | self.klass = klass[i:] | |
34 | break | |
35 | ||
36 | def __repr__(self): | |
37 | return "<%s: %s>" % (self.__class__.__name__, self.name) | |
38 | ||
39 | def mangle(self, name): | |
40 | if self.klass is None: | |
41 | return name | |
42 | return mangle(name, self.klass) | |
43 | ||
44 | def add_def(self, name): | |
45 | self.defs[self.mangle(name)] = 1 | |
46 | ||
47 | def add_use(self, name): | |
48 | self.uses[self.mangle(name)] = 1 | |
49 | ||
50 | def add_global(self, name): | |
51 | name = self.mangle(name) | |
52 | if self.uses.has_key(name) or self.defs.has_key(name): | |
53 | pass # XXX warn about global following def/use | |
54 | if self.params.has_key(name): | |
55 | raise SyntaxError, "%s in %s is global and parameter" % \ | |
56 | (name, self.name) | |
57 | self.globals[name] = 1 | |
58 | self.module.add_def(name) | |
59 | ||
60 | def add_param(self, name): | |
61 | name = self.mangle(name) | |
62 | self.defs[name] = 1 | |
63 | self.params[name] = 1 | |
64 | ||
65 | def get_names(self): | |
66 | d = {} | |
67 | d.update(self.defs) | |
68 | d.update(self.uses) | |
69 | d.update(self.globals) | |
70 | return d.keys() | |
71 | ||
72 | def add_child(self, child): | |
73 | self.children.append(child) | |
74 | ||
75 | def get_children(self): | |
76 | return self.children | |
77 | ||
78 | def DEBUG(self): | |
79 | print >> sys.stderr, self.name, self.nested and "nested" or "" | |
80 | print >> sys.stderr, "\tglobals: ", self.globals | |
81 | print >> sys.stderr, "\tcells: ", self.cells | |
82 | print >> sys.stderr, "\tdefs: ", self.defs | |
83 | print >> sys.stderr, "\tuses: ", self.uses | |
84 | print >> sys.stderr, "\tfrees:", self.frees | |
85 | ||
86 | def check_name(self, name): | |
87 | """Return scope of name. | |
88 | ||
89 | The scope of a name could be LOCAL, GLOBAL, FREE, or CELL. | |
90 | """ | |
91 | if self.globals.has_key(name): | |
92 | return SC_GLOBAL | |
93 | if self.cells.has_key(name): | |
94 | return SC_CELL | |
95 | if self.defs.has_key(name): | |
96 | return SC_LOCAL | |
97 | if self.nested and (self.frees.has_key(name) or | |
98 | self.uses.has_key(name)): | |
99 | return SC_FREE | |
100 | if self.nested: | |
101 | return SC_UNKNOWN | |
102 | else: | |
103 | return SC_GLOBAL | |
104 | ||
105 | def get_free_vars(self): | |
106 | if not self.nested: | |
107 | return () | |
108 | free = {} | |
109 | free.update(self.frees) | |
110 | for name in self.uses.keys(): | |
111 | if not (self.defs.has_key(name) or | |
112 | self.globals.has_key(name)): | |
113 | free[name] = 1 | |
114 | return free.keys() | |
115 | ||
116 | def handle_children(self): | |
117 | for child in self.children: | |
118 | frees = child.get_free_vars() | |
119 | globals = self.add_frees(frees) | |
120 | for name in globals: | |
121 | child.force_global(name) | |
122 | ||
123 | def force_global(self, name): | |
124 | """Force name to be global in scope. | |
125 | ||
126 | Some child of the current node had a free reference to name. | |
127 | When the child was processed, it was labelled a free | |
128 | variable. Now that all its enclosing scope have been | |
129 | processed, the name is known to be a global or builtin. So | |
130 | walk back down the child chain and set the name to be global | |
131 | rather than free. | |
132 | ||
133 | Be careful to stop if a child does not think the name is | |
134 | free. | |
135 | """ | |
136 | self.globals[name] = 1 | |
137 | if self.frees.has_key(name): | |
138 | del self.frees[name] | |
139 | for child in self.children: | |
140 | if child.check_name(name) == SC_FREE: | |
141 | child.force_global(name) | |
142 | ||
143 | def add_frees(self, names): | |
144 | """Process list of free vars from nested scope. | |
145 | ||
146 | Returns a list of names that are either 1) declared global in the | |
147 | parent or 2) undefined in a top-level parent. In either case, | |
148 | the nested scope should treat them as globals. | |
149 | """ | |
150 | child_globals = [] | |
151 | for name in names: | |
152 | sc = self.check_name(name) | |
153 | if self.nested: | |
154 | if sc == SC_UNKNOWN or sc == SC_FREE \ | |
155 | or isinstance(self, ClassScope): | |
156 | self.frees[name] = 1 | |
157 | elif sc == SC_GLOBAL: | |
158 | child_globals.append(name) | |
159 | elif isinstance(self, FunctionScope) and sc == SC_LOCAL: | |
160 | self.cells[name] = 1 | |
161 | elif sc != SC_CELL: | |
162 | child_globals.append(name) | |
163 | else: | |
164 | if sc == SC_LOCAL: | |
165 | self.cells[name] = 1 | |
166 | elif sc != SC_CELL: | |
167 | child_globals.append(name) | |
168 | return child_globals | |
169 | ||
170 | def get_cell_vars(self): | |
171 | return self.cells.keys() | |
172 | ||
173 | class ModuleScope(Scope): | |
174 | __super_init = Scope.__init__ | |
175 | ||
176 | def __init__(self): | |
177 | self.__super_init("global", self) | |
178 | ||
179 | class FunctionScope(Scope): | |
180 | pass | |
181 | ||
182 | class GenExprScope(Scope): | |
183 | __super_init = Scope.__init__ | |
184 | ||
185 | __counter = 1 | |
186 | ||
187 | def __init__(self, module, klass=None): | |
188 | i = self.__counter | |
189 | self.__counter += 1 | |
190 | self.__super_init("generator expression<%d>"%i, module, klass) | |
191 | self.add_param('[outmost-iterable]') | |
192 | ||
193 | def get_names(self): | |
194 | keys = Scope.get_names() | |
195 | return keys | |
196 | ||
197 | class LambdaScope(FunctionScope): | |
198 | __super_init = Scope.__init__ | |
199 | ||
200 | __counter = 1 | |
201 | ||
202 | def __init__(self, module, klass=None): | |
203 | i = self.__counter | |
204 | self.__counter += 1 | |
205 | self.__super_init("lambda.%d" % i, module, klass) | |
206 | ||
207 | class ClassScope(Scope): | |
208 | __super_init = Scope.__init__ | |
209 | ||
210 | def __init__(self, name, module): | |
211 | self.__super_init(name, module, name) | |
212 | ||
213 | class SymbolVisitor: | |
214 | def __init__(self): | |
215 | self.scopes = {} | |
216 | self.klass = None | |
217 | ||
218 | # node that define new scopes | |
219 | ||
220 | def visitModule(self, node): | |
221 | scope = self.module = self.scopes[node] = ModuleScope() | |
222 | self.visit(node.node, scope) | |
223 | ||
224 | visitExpression = visitModule | |
225 | ||
226 | def visitFunction(self, node, parent): | |
227 | if node.decorators: | |
228 | self.visit(node.decorators, parent) | |
229 | parent.add_def(node.name) | |
230 | for n in node.defaults: | |
231 | self.visit(n, parent) | |
232 | scope = FunctionScope(node.name, self.module, self.klass) | |
233 | if parent.nested or isinstance(parent, FunctionScope): | |
234 | scope.nested = 1 | |
235 | self.scopes[node] = scope | |
236 | self._do_args(scope, node.argnames) | |
237 | self.visit(node.code, scope) | |
238 | self.handle_free_vars(scope, parent) | |
239 | ||
240 | def visitGenExpr(self, node, parent): | |
241 | scope = GenExprScope(self.module, self.klass); | |
242 | if parent.nested or isinstance(parent, FunctionScope) \ | |
243 | or isinstance(parent, GenExprScope): | |
244 | scope.nested = 1 | |
245 | ||
246 | self.scopes[node] = scope | |
247 | self.visit(node.code, scope) | |
248 | ||
249 | self.handle_free_vars(scope, parent) | |
250 | ||
251 | def visitGenExprInner(self, node, scope): | |
252 | for genfor in node.quals: | |
253 | self.visit(genfor, scope) | |
254 | ||
255 | self.visit(node.expr, scope) | |
256 | ||
257 | def visitGenExprFor(self, node, scope): | |
258 | self.visit(node.assign, scope, 1) | |
259 | self.visit(node.iter, scope) | |
260 | for if_ in node.ifs: | |
261 | self.visit(if_, scope) | |
262 | ||
263 | def visitGenExprIf(self, node, scope): | |
264 | self.visit(node.test, scope) | |
265 | ||
266 | def visitLambda(self, node, parent, assign=0): | |
267 | # Lambda is an expression, so it could appear in an expression | |
268 | # context where assign is passed. The transformer should catch | |
269 | # any code that has a lambda on the left-hand side. | |
270 | assert not assign | |
271 | ||
272 | for n in node.defaults: | |
273 | self.visit(n, parent) | |
274 | scope = LambdaScope(self.module, self.klass) | |
275 | if parent.nested or isinstance(parent, FunctionScope): | |
276 | scope.nested = 1 | |
277 | self.scopes[node] = scope | |
278 | self._do_args(scope, node.argnames) | |
279 | self.visit(node.code, scope) | |
280 | self.handle_free_vars(scope, parent) | |
281 | ||
282 | def _do_args(self, scope, args): | |
283 | for name in args: | |
284 | if type(name) == types.TupleType: | |
285 | self._do_args(scope, name) | |
286 | else: | |
287 | scope.add_param(name) | |
288 | ||
289 | def handle_free_vars(self, scope, parent): | |
290 | parent.add_child(scope) | |
291 | scope.handle_children() | |
292 | ||
293 | def visitClass(self, node, parent): | |
294 | parent.add_def(node.name) | |
295 | for n in node.bases: | |
296 | self.visit(n, parent) | |
297 | scope = ClassScope(node.name, self.module) | |
298 | if parent.nested or isinstance(parent, FunctionScope): | |
299 | scope.nested = 1 | |
300 | if node.doc is not None: | |
301 | scope.add_def('__doc__') | |
302 | scope.add_def('__module__') | |
303 | self.scopes[node] = scope | |
304 | prev = self.klass | |
305 | self.klass = node.name | |
306 | self.visit(node.code, scope) | |
307 | self.klass = prev | |
308 | self.handle_free_vars(scope, parent) | |
309 | ||
310 | # name can be a def or a use | |
311 | ||
312 | # XXX a few calls and nodes expect a third "assign" arg that is | |
313 | # true if the name is being used as an assignment. only | |
314 | # expressions contained within statements may have the assign arg. | |
315 | ||
316 | def visitName(self, node, scope, assign=0): | |
317 | if assign: | |
318 | scope.add_def(node.name) | |
319 | else: | |
320 | scope.add_use(node.name) | |
321 | ||
322 | # operations that bind new names | |
323 | ||
324 | def visitFor(self, node, scope): | |
325 | self.visit(node.assign, scope, 1) | |
326 | self.visit(node.list, scope) | |
327 | self.visit(node.body, scope) | |
328 | if node.else_: | |
329 | self.visit(node.else_, scope) | |
330 | ||
331 | def visitFrom(self, node, scope): | |
332 | for name, asname in node.names: | |
333 | if name == "*": | |
334 | continue | |
335 | scope.add_def(asname or name) | |
336 | ||
337 | def visitImport(self, node, scope): | |
338 | for name, asname in node.names: | |
339 | i = name.find(".") | |
340 | if i > -1: | |
341 | name = name[:i] | |
342 | scope.add_def(asname or name) | |
343 | ||
344 | def visitGlobal(self, node, scope): | |
345 | for name in node.names: | |
346 | scope.add_global(name) | |
347 | ||
348 | def visitAssign(self, node, scope): | |
349 | """Propagate assignment flag down to child nodes. | |
350 | ||
351 | The Assign node doesn't itself contains the variables being | |
352 | assigned to. Instead, the children in node.nodes are visited | |
353 | with the assign flag set to true. When the names occur in | |
354 | those nodes, they are marked as defs. | |
355 | ||
356 | Some names that occur in an assignment target are not bound by | |
357 | the assignment, e.g. a name occurring inside a slice. The | |
358 | visitor handles these nodes specially; they do not propagate | |
359 | the assign flag to their children. | |
360 | """ | |
361 | for n in node.nodes: | |
362 | self.visit(n, scope, 1) | |
363 | self.visit(node.expr, scope) | |
364 | ||
365 | def visitAssName(self, node, scope, assign=1): | |
366 | scope.add_def(node.name) | |
367 | ||
368 | def visitAssAttr(self, node, scope, assign=0): | |
369 | self.visit(node.expr, scope, 0) | |
370 | ||
371 | def visitSubscript(self, node, scope, assign=0): | |
372 | self.visit(node.expr, scope, 0) | |
373 | for n in node.subs: | |
374 | self.visit(n, scope, 0) | |
375 | ||
376 | def visitSlice(self, node, scope, assign=0): | |
377 | self.visit(node.expr, scope, 0) | |
378 | if node.lower: | |
379 | self.visit(node.lower, scope, 0) | |
380 | if node.upper: | |
381 | self.visit(node.upper, scope, 0) | |
382 | ||
383 | def visitAugAssign(self, node, scope): | |
384 | # If the LHS is a name, then this counts as assignment. | |
385 | # Otherwise, it's just use. | |
386 | self.visit(node.node, scope) | |
387 | if isinstance(node.node, ast.Name): | |
388 | self.visit(node.node, scope, 1) # XXX worry about this | |
389 | self.visit(node.expr, scope) | |
390 | ||
391 | # prune if statements if tests are false | |
392 | ||
393 | _const_types = types.StringType, types.IntType, types.FloatType | |
394 | ||
395 | def visitIf(self, node, scope): | |
396 | for test, body in node.tests: | |
397 | if isinstance(test, ast.Const): | |
398 | if type(test.value) in self._const_types: | |
399 | if not test.value: | |
400 | continue | |
401 | self.visit(test, scope) | |
402 | self.visit(body, scope) | |
403 | if node.else_: | |
404 | self.visit(node.else_, scope) | |
405 | ||
406 | # a yield statement signals a generator | |
407 | ||
408 | def visitYield(self, node, scope): | |
409 | scope.generator = 1 | |
410 | self.visit(node.value, scope) | |
411 | ||
412 | def sort(l): | |
413 | l = l[:] | |
414 | l.sort() | |
415 | return l | |
416 | ||
417 | def list_eq(l1, l2): | |
418 | return sort(l1) == sort(l2) | |
419 | ||
420 | if __name__ == "__main__": | |
421 | import sys | |
422 | from compiler import parseFile, walk | |
423 | import symtable | |
424 | ||
425 | def get_names(syms): | |
426 | return [s for s in [s.get_name() for s in syms.get_symbols()] | |
427 | if not (s.startswith('_[') or s.startswith('.'))] | |
428 | ||
429 | for file in sys.argv[1:]: | |
430 | print file | |
431 | f = open(file) | |
432 | buf = f.read() | |
433 | f.close() | |
434 | syms = symtable.symtable(buf, file, "exec") | |
435 | mod_names = get_names(syms) | |
436 | tree = parseFile(file) | |
437 | s = SymbolVisitor() | |
438 | walk(tree, s) | |
439 | ||
440 | # compare module-level symbols | |
441 | names2 = s.scopes[tree].get_names() | |
442 | ||
443 | if not list_eq(mod_names, names2): | |
444 | ||
445 | print "oops", file | |
446 | print sort(mod_names) | |
447 | print sort(names2) | |
448 | sys.exit(-1) | |
449 | ||
450 | d = {} | |
451 | d.update(s.scopes) | |
452 | del d[tree] | |
453 | scopes = d.values() | |
454 | del d | |
455 | ||
456 | for s in syms.get_symbols(): | |
457 | if s.is_namespace(): | |
458 | l = [sc for sc in scopes | |
459 | if sc.name == s.get_name()] | |
460 | if len(l) > 1: | |
461 | print "skipping", s.get_name() | |
462 | else: | |
463 | if not list_eq(get_names(s.get_namespace()), | |
464 | l[0].get_names()): | |
465 | print s.get_name() | |
466 | print sort(get_names(s.get_namespace())) | |
467 | print sort(l[0].get_names()) | |
468 | sys.exit(-1) |