Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | """A flow graph representation for Python bytecode""" |
2 | ||
3 | import dis | |
4 | import new | |
5 | import sys | |
6 | import types | |
7 | ||
8 | from compiler import misc | |
9 | from compiler.consts \ | |
10 | import CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS | |
11 | ||
12 | class FlowGraph: | |
13 | def __init__(self): | |
14 | self.current = self.entry = Block() | |
15 | self.exit = Block("exit") | |
16 | self.blocks = misc.Set() | |
17 | self.blocks.add(self.entry) | |
18 | self.blocks.add(self.exit) | |
19 | ||
20 | def startBlock(self, block): | |
21 | if self._debug: | |
22 | if self.current: | |
23 | print "end", repr(self.current) | |
24 | print " next", self.current.next | |
25 | print " ", self.current.get_children() | |
26 | print repr(block) | |
27 | self.current = block | |
28 | ||
29 | def nextBlock(self, block=None): | |
30 | # XXX think we need to specify when there is implicit transfer | |
31 | # from one block to the next. might be better to represent this | |
32 | # with explicit JUMP_ABSOLUTE instructions that are optimized | |
33 | # out when they are unnecessary. | |
34 | # | |
35 | # I think this strategy works: each block has a child | |
36 | # designated as "next" which is returned as the last of the | |
37 | # children. because the nodes in a graph are emitted in | |
38 | # reverse post order, the "next" block will always be emitted | |
39 | # immediately after its parent. | |
40 | # Worry: maintaining this invariant could be tricky | |
41 | if block is None: | |
42 | block = self.newBlock() | |
43 | ||
44 | # Note: If the current block ends with an unconditional | |
45 | # control transfer, then it is incorrect to add an implicit | |
46 | # transfer to the block graph. The current code requires | |
47 | # these edges to get the blocks emitted in the right order, | |
48 | # however. :-( If a client needs to remove these edges, call | |
49 | # pruneEdges(). | |
50 | ||
51 | self.current.addNext(block) | |
52 | self.startBlock(block) | |
53 | ||
54 | def newBlock(self): | |
55 | b = Block() | |
56 | self.blocks.add(b) | |
57 | return b | |
58 | ||
59 | def startExitBlock(self): | |
60 | self.startBlock(self.exit) | |
61 | ||
62 | _debug = 0 | |
63 | ||
64 | def _enable_debug(self): | |
65 | self._debug = 1 | |
66 | ||
67 | def _disable_debug(self): | |
68 | self._debug = 0 | |
69 | ||
70 | def emit(self, *inst): | |
71 | if self._debug: | |
72 | print "\t", inst | |
73 | if inst[0] in ['RETURN_VALUE', 'YIELD_VALUE']: | |
74 | self.current.addOutEdge(self.exit) | |
75 | if len(inst) == 2 and isinstance(inst[1], Block): | |
76 | self.current.addOutEdge(inst[1]) | |
77 | self.current.emit(inst) | |
78 | ||
79 | def getBlocksInOrder(self): | |
80 | """Return the blocks in reverse postorder | |
81 | ||
82 | i.e. each node appears before all of its successors | |
83 | """ | |
84 | # XXX make sure every node that doesn't have an explicit next | |
85 | # is set so that next points to exit | |
86 | for b in self.blocks.elements(): | |
87 | if b is self.exit: | |
88 | continue | |
89 | if not b.next: | |
90 | b.addNext(self.exit) | |
91 | order = dfs_postorder(self.entry, {}) | |
92 | order.reverse() | |
93 | self.fixupOrder(order, self.exit) | |
94 | # hack alert | |
95 | if not self.exit in order: | |
96 | order.append(self.exit) | |
97 | ||
98 | return order | |
99 | ||
100 | def fixupOrder(self, blocks, default_next): | |
101 | """Fixup bad order introduced by DFS.""" | |
102 | ||
103 | # XXX This is a total mess. There must be a better way to get | |
104 | # the code blocks in the right order. | |
105 | ||
106 | self.fixupOrderHonorNext(blocks, default_next) | |
107 | self.fixupOrderForward(blocks, default_next) | |
108 | ||
109 | def fixupOrderHonorNext(self, blocks, default_next): | |
110 | """Fix one problem with DFS. | |
111 | ||
112 | The DFS uses child block, but doesn't know about the special | |
113 | "next" block. As a result, the DFS can order blocks so that a | |
114 | block isn't next to the right block for implicit control | |
115 | transfers. | |
116 | """ | |
117 | index = {} | |
118 | for i in range(len(blocks)): | |
119 | index[blocks[i]] = i | |
120 | ||
121 | for i in range(0, len(blocks) - 1): | |
122 | b = blocks[i] | |
123 | n = blocks[i + 1] | |
124 | if not b.next or b.next[0] == default_next or b.next[0] == n: | |
125 | continue | |
126 | # The blocks are in the wrong order. Find the chain of | |
127 | # blocks to insert where they belong. | |
128 | cur = b | |
129 | chain = [] | |
130 | elt = cur | |
131 | while elt.next and elt.next[0] != default_next: | |
132 | chain.append(elt.next[0]) | |
133 | elt = elt.next[0] | |
134 | # Now remove the blocks in the chain from the current | |
135 | # block list, so that they can be re-inserted. | |
136 | l = [] | |
137 | for b in chain: | |
138 | assert index[b] > i | |
139 | l.append((index[b], b)) | |
140 | l.sort() | |
141 | l.reverse() | |
142 | for j, b in l: | |
143 | del blocks[index[b]] | |
144 | # Insert the chain in the proper location | |
145 | blocks[i:i + 1] = [cur] + chain | |
146 | # Finally, re-compute the block indexes | |
147 | for i in range(len(blocks)): | |
148 | index[blocks[i]] = i | |
149 | ||
150 | def fixupOrderForward(self, blocks, default_next): | |
151 | """Make sure all JUMP_FORWARDs jump forward""" | |
152 | index = {} | |
153 | chains = [] | |
154 | cur = [] | |
155 | for b in blocks: | |
156 | index[b] = len(chains) | |
157 | cur.append(b) | |
158 | if b.next and b.next[0] == default_next: | |
159 | chains.append(cur) | |
160 | cur = [] | |
161 | chains.append(cur) | |
162 | ||
163 | while 1: | |
164 | constraints = [] | |
165 | ||
166 | for i in range(len(chains)): | |
167 | l = chains[i] | |
168 | for b in l: | |
169 | for c in b.get_children(): | |
170 | if index[c] < i: | |
171 | forward_p = 0 | |
172 | for inst in b.insts: | |
173 | if inst[0] == 'JUMP_FORWARD': | |
174 | if inst[1] == c: | |
175 | forward_p = 1 | |
176 | if not forward_p: | |
177 | continue | |
178 | constraints.append((index[c], i)) | |
179 | ||
180 | if not constraints: | |
181 | break | |
182 | ||
183 | # XXX just do one for now | |
184 | # do swaps to get things in the right order | |
185 | goes_before, a_chain = constraints[0] | |
186 | assert a_chain > goes_before | |
187 | c = chains[a_chain] | |
188 | chains.remove(c) | |
189 | chains.insert(goes_before, c) | |
190 | ||
191 | del blocks[:] | |
192 | for c in chains: | |
193 | for b in c: | |
194 | blocks.append(b) | |
195 | ||
196 | def getBlocks(self): | |
197 | return self.blocks.elements() | |
198 | ||
199 | def getRoot(self): | |
200 | """Return nodes appropriate for use with dominator""" | |
201 | return self.entry | |
202 | ||
203 | def getContainedGraphs(self): | |
204 | l = [] | |
205 | for b in self.getBlocks(): | |
206 | l.extend(b.getContainedGraphs()) | |
207 | return l | |
208 | ||
209 | def dfs_postorder(b, seen): | |
210 | """Depth-first search of tree rooted at b, return in postorder""" | |
211 | order = [] | |
212 | seen[b] = b | |
213 | for c in b.get_children(): | |
214 | if seen.has_key(c): | |
215 | continue | |
216 | order = order + dfs_postorder(c, seen) | |
217 | order.append(b) | |
218 | return order | |
219 | ||
220 | class Block: | |
221 | _count = 0 | |
222 | ||
223 | def __init__(self, label=''): | |
224 | self.insts = [] | |
225 | self.inEdges = misc.Set() | |
226 | self.outEdges = misc.Set() | |
227 | self.label = label | |
228 | self.bid = Block._count | |
229 | self.next = [] | |
230 | Block._count = Block._count + 1 | |
231 | ||
232 | def __repr__(self): | |
233 | if self.label: | |
234 | return "<block %s id=%d>" % (self.label, self.bid) | |
235 | else: | |
236 | return "<block id=%d>" % (self.bid) | |
237 | ||
238 | def __str__(self): | |
239 | insts = map(str, self.insts) | |
240 | return "<block %s %d:\n%s>" % (self.label, self.bid, | |
241 | '\n'.join(insts)) | |
242 | ||
243 | def emit(self, inst): | |
244 | op = inst[0] | |
245 | if op[:4] == 'JUMP': | |
246 | self.outEdges.add(inst[1]) | |
247 | self.insts.append(inst) | |
248 | ||
249 | def getInstructions(self): | |
250 | return self.insts | |
251 | ||
252 | def addInEdge(self, block): | |
253 | self.inEdges.add(block) | |
254 | ||
255 | def addOutEdge(self, block): | |
256 | self.outEdges.add(block) | |
257 | ||
258 | def addNext(self, block): | |
259 | self.next.append(block) | |
260 | assert len(self.next) == 1, map(str, self.next) | |
261 | ||
262 | _uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS', 'YIELD_VALUE', | |
263 | 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP') | |
264 | ||
265 | def pruneNext(self): | |
266 | """Remove bogus edge for unconditional transfers | |
267 | ||
268 | Each block has a next edge that accounts for implicit control | |
269 | transfers, e.g. from a JUMP_IF_FALSE to the block that will be | |
270 | executed if the test is true. | |
271 | ||
272 | These edges must remain for the current assembler code to | |
273 | work. If they are removed, the dfs_postorder gets things in | |
274 | weird orders. However, they shouldn't be there for other | |
275 | purposes, e.g. conversion to SSA form. This method will | |
276 | remove the next edge when it follows an unconditional control | |
277 | transfer. | |
278 | """ | |
279 | try: | |
280 | op, arg = self.insts[-1] | |
281 | except (IndexError, ValueError): | |
282 | return | |
283 | if op in self._uncond_transfer: | |
284 | self.next = [] | |
285 | ||
286 | def get_children(self): | |
287 | if self.next and self.next[0] in self.outEdges: | |
288 | self.outEdges.remove(self.next[0]) | |
289 | return self.outEdges.elements() + self.next | |
290 | ||
291 | def getContainedGraphs(self): | |
292 | """Return all graphs contained within this block. | |
293 | ||
294 | For example, a MAKE_FUNCTION block will contain a reference to | |
295 | the graph for the function body. | |
296 | """ | |
297 | contained = [] | |
298 | for inst in self.insts: | |
299 | if len(inst) == 1: | |
300 | continue | |
301 | op = inst[1] | |
302 | if hasattr(op, 'graph'): | |
303 | contained.append(op.graph) | |
304 | return contained | |
305 | ||
306 | # flags for code objects | |
307 | ||
308 | # the FlowGraph is transformed in place; it exists in one of these states | |
309 | RAW = "RAW" | |
310 | FLAT = "FLAT" | |
311 | CONV = "CONV" | |
312 | DONE = "DONE" | |
313 | ||
314 | class PyFlowGraph(FlowGraph): | |
315 | super_init = FlowGraph.__init__ | |
316 | ||
317 | def __init__(self, name, filename, args=(), optimized=0, klass=None): | |
318 | self.super_init() | |
319 | self.name = name | |
320 | self.filename = filename | |
321 | self.docstring = None | |
322 | self.args = args # XXX | |
323 | self.argcount = getArgCount(args) | |
324 | self.klass = klass | |
325 | if optimized: | |
326 | self.flags = CO_OPTIMIZED | CO_NEWLOCALS | |
327 | else: | |
328 | self.flags = 0 | |
329 | self.consts = [] | |
330 | self.names = [] | |
331 | # Free variables found by the symbol table scan, including | |
332 | # variables used only in nested scopes, are included here. | |
333 | self.freevars = [] | |
334 | self.cellvars = [] | |
335 | # The closure list is used to track the order of cell | |
336 | # variables and free variables in the resulting code object. | |
337 | # The offsets used by LOAD_CLOSURE/LOAD_DEREF refer to both | |
338 | # kinds of variables. | |
339 | self.closure = [] | |
340 | self.varnames = list(args) or [] | |
341 | for i in range(len(self.varnames)): | |
342 | var = self.varnames[i] | |
343 | if isinstance(var, TupleArg): | |
344 | self.varnames[i] = var.getName() | |
345 | self.stage = RAW | |
346 | ||
347 | def setDocstring(self, doc): | |
348 | self.docstring = doc | |
349 | ||
350 | def setFlag(self, flag): | |
351 | self.flags = self.flags | flag | |
352 | if flag == CO_VARARGS: | |
353 | self.argcount = self.argcount - 1 | |
354 | ||
355 | def checkFlag(self, flag): | |
356 | if self.flags & flag: | |
357 | return 1 | |
358 | ||
359 | def setFreeVars(self, names): | |
360 | self.freevars = list(names) | |
361 | ||
362 | def setCellVars(self, names): | |
363 | self.cellvars = names | |
364 | ||
365 | def getCode(self): | |
366 | """Get a Python code object""" | |
367 | if self.stage == RAW: | |
368 | self.computeStackDepth() | |
369 | self.flattenGraph() | |
370 | if self.stage == FLAT: | |
371 | self.convertArgs() | |
372 | if self.stage == CONV: | |
373 | self.makeByteCode() | |
374 | if self.stage == DONE: | |
375 | return self.newCodeObject() | |
376 | raise RuntimeError, "inconsistent PyFlowGraph state" | |
377 | ||
378 | def dump(self, io=None): | |
379 | if io: | |
380 | save = sys.stdout | |
381 | sys.stdout = io | |
382 | pc = 0 | |
383 | for t in self.insts: | |
384 | opname = t[0] | |
385 | if opname == "SET_LINENO": | |
386 | ||
387 | if len(t) == 1: | |
388 | print "\t", "%3d" % pc, opname | |
389 | pc = pc + 1 | |
390 | else: | |
391 | print "\t", "%3d" % pc, opname, t[1] | |
392 | pc = pc + 3 | |
393 | if io: | |
394 | sys.stdout = save | |
395 | ||
396 | def computeStackDepth(self): | |
397 | """Compute the max stack depth. | |
398 | ||
399 | Approach is to compute the stack effect of each basic block. | |
400 | Then find the path through the code with the largest total | |
401 | effect. | |
402 | """ | |
403 | depth = {} | |
404 | exit = None | |
405 | for b in self.getBlocks(): | |
406 | depth[b] = findDepth(b.getInstructions()) | |
407 | ||
408 | seen = {} | |
409 | ||
410 | def max_depth(b, d): | |
411 | if seen.has_key(b): | |
412 | return d | |
413 | seen[b] = 1 | |
414 | d = d + depth[b] | |
415 | children = b.get_children() | |
416 | if children: | |
417 | return max([max_depth(c, d) for c in children]) | |
418 | else: | |
419 | if not b.label == "exit": | |
420 | return max_depth(self.exit, d) | |
421 | else: | |
422 | return d | |
423 | ||
424 | self.stacksize = max_depth(self.entry, 0) | |
425 | ||
426 | def flattenGraph(self): | |
427 | """Arrange the blocks in order and resolve jumps""" | |
428 | assert self.stage == RAW | |
429 | self.insts = insts = [] | |
430 | pc = 0 | |
431 | begin = {} | |
432 | end = {} | |
433 | for b in self.getBlocksInOrder(): | |
434 | begin[b] = pc | |
435 | for inst in b.getInstructions(): | |
436 | insts.append(inst) | |
437 | if len(inst) == 1: | |
438 | pc = pc + 1 | |
439 | elif inst[0] != "SET_LINENO": | |
440 | # arg takes 2 bytes | |
441 | pc = pc + 3 | |
442 | end[b] = pc | |
443 | pc = 0 | |
444 | for i in range(len(insts)): | |
445 | inst = insts[i] | |
446 | if len(inst) == 1: | |
447 | pc = pc + 1 | |
448 | elif inst[0] != "SET_LINENO": | |
449 | pc = pc + 3 | |
450 | opname = inst[0] | |
451 | if self.hasjrel.has_elt(opname): | |
452 | oparg = inst[1] | |
453 | offset = begin[oparg] - pc | |
454 | insts[i] = opname, offset | |
455 | elif self.hasjabs.has_elt(opname): | |
456 | insts[i] = opname, begin[inst[1]] | |
457 | self.stage = FLAT | |
458 | ||
459 | hasjrel = misc.Set() | |
460 | for i in dis.hasjrel: | |
461 | hasjrel.add(dis.opname[i]) | |
462 | hasjabs = misc.Set() | |
463 | for i in dis.hasjabs: | |
464 | hasjabs.add(dis.opname[i]) | |
465 | ||
466 | def convertArgs(self): | |
467 | """Convert arguments from symbolic to concrete form""" | |
468 | assert self.stage == FLAT | |
469 | self.consts.insert(0, self.docstring) | |
470 | self.sort_cellvars() | |
471 | for i in range(len(self.insts)): | |
472 | t = self.insts[i] | |
473 | if len(t) == 2: | |
474 | opname, oparg = t | |
475 | conv = self._converters.get(opname, None) | |
476 | if conv: | |
477 | self.insts[i] = opname, conv(self, oparg) | |
478 | self.stage = CONV | |
479 | ||
480 | def sort_cellvars(self): | |
481 | """Sort cellvars in the order of varnames and prune from freevars. | |
482 | """ | |
483 | cells = {} | |
484 | for name in self.cellvars: | |
485 | cells[name] = 1 | |
486 | self.cellvars = [name for name in self.varnames | |
487 | if cells.has_key(name)] | |
488 | for name in self.cellvars: | |
489 | del cells[name] | |
490 | self.cellvars = self.cellvars + cells.keys() | |
491 | self.closure = self.cellvars + self.freevars | |
492 | ||
493 | def _lookupName(self, name, list): | |
494 | """Return index of name in list, appending if necessary | |
495 | ||
496 | This routine uses a list instead of a dictionary, because a | |
497 | dictionary can't store two different keys if the keys have the | |
498 | same value but different types, e.g. 2 and 2L. The compiler | |
499 | must treat these two separately, so it does an explicit type | |
500 | comparison before comparing the values. | |
501 | """ | |
502 | t = type(name) | |
503 | for i in range(len(list)): | |
504 | if t == type(list[i]) and list[i] == name: | |
505 | return i | |
506 | end = len(list) | |
507 | list.append(name) | |
508 | return end | |
509 | ||
510 | _converters = {} | |
511 | def _convert_LOAD_CONST(self, arg): | |
512 | if hasattr(arg, 'getCode'): | |
513 | arg = arg.getCode() | |
514 | return self._lookupName(arg, self.consts) | |
515 | ||
516 | def _convert_LOAD_FAST(self, arg): | |
517 | self._lookupName(arg, self.names) | |
518 | return self._lookupName(arg, self.varnames) | |
519 | _convert_STORE_FAST = _convert_LOAD_FAST | |
520 | _convert_DELETE_FAST = _convert_LOAD_FAST | |
521 | ||
522 | def _convert_LOAD_NAME(self, arg): | |
523 | if self.klass is None: | |
524 | self._lookupName(arg, self.varnames) | |
525 | return self._lookupName(arg, self.names) | |
526 | ||
527 | def _convert_NAME(self, arg): | |
528 | if self.klass is None: | |
529 | self._lookupName(arg, self.varnames) | |
530 | return self._lookupName(arg, self.names) | |
531 | _convert_STORE_NAME = _convert_NAME | |
532 | _convert_DELETE_NAME = _convert_NAME | |
533 | _convert_IMPORT_NAME = _convert_NAME | |
534 | _convert_IMPORT_FROM = _convert_NAME | |
535 | _convert_STORE_ATTR = _convert_NAME | |
536 | _convert_LOAD_ATTR = _convert_NAME | |
537 | _convert_DELETE_ATTR = _convert_NAME | |
538 | _convert_LOAD_GLOBAL = _convert_NAME | |
539 | _convert_STORE_GLOBAL = _convert_NAME | |
540 | _convert_DELETE_GLOBAL = _convert_NAME | |
541 | ||
542 | def _convert_DEREF(self, arg): | |
543 | self._lookupName(arg, self.names) | |
544 | self._lookupName(arg, self.varnames) | |
545 | return self._lookupName(arg, self.closure) | |
546 | _convert_LOAD_DEREF = _convert_DEREF | |
547 | _convert_STORE_DEREF = _convert_DEREF | |
548 | ||
549 | def _convert_LOAD_CLOSURE(self, arg): | |
550 | self._lookupName(arg, self.varnames) | |
551 | return self._lookupName(arg, self.closure) | |
552 | ||
553 | _cmp = list(dis.cmp_op) | |
554 | def _convert_COMPARE_OP(self, arg): | |
555 | return self._cmp.index(arg) | |
556 | ||
557 | # similarly for other opcodes... | |
558 | ||
559 | for name, obj in locals().items(): | |
560 | if name[:9] == "_convert_": | |
561 | opname = name[9:] | |
562 | _converters[opname] = obj | |
563 | del name, obj, opname | |
564 | ||
565 | def makeByteCode(self): | |
566 | assert self.stage == CONV | |
567 | self.lnotab = lnotab = LineAddrTable() | |
568 | for t in self.insts: | |
569 | opname = t[0] | |
570 | if len(t) == 1: | |
571 | lnotab.addCode(self.opnum[opname]) | |
572 | else: | |
573 | oparg = t[1] | |
574 | if opname == "SET_LINENO": | |
575 | lnotab.nextLine(oparg) | |
576 | continue | |
577 | hi, lo = twobyte(oparg) | |
578 | try: | |
579 | lnotab.addCode(self.opnum[opname], lo, hi) | |
580 | except ValueError: | |
581 | print opname, oparg | |
582 | print self.opnum[opname], lo, hi | |
583 | raise | |
584 | self.stage = DONE | |
585 | ||
586 | opnum = {} | |
587 | for num in range(len(dis.opname)): | |
588 | opnum[dis.opname[num]] = num | |
589 | del num | |
590 | ||
591 | def newCodeObject(self): | |
592 | assert self.stage == DONE | |
593 | if (self.flags & CO_NEWLOCALS) == 0: | |
594 | nlocals = 0 | |
595 | else: | |
596 | nlocals = len(self.varnames) | |
597 | argcount = self.argcount | |
598 | if self.flags & CO_VARKEYWORDS: | |
599 | argcount = argcount - 1 | |
600 | return new.code(argcount, nlocals, self.stacksize, self.flags, | |
601 | self.lnotab.getCode(), self.getConsts(), | |
602 | tuple(self.names), tuple(self.varnames), | |
603 | self.filename, self.name, self.lnotab.firstline, | |
604 | self.lnotab.getTable(), tuple(self.freevars), | |
605 | tuple(self.cellvars)) | |
606 | ||
607 | def getConsts(self): | |
608 | """Return a tuple for the const slot of the code object | |
609 | ||
610 | Must convert references to code (MAKE_FUNCTION) to code | |
611 | objects recursively. | |
612 | """ | |
613 | l = [] | |
614 | for elt in self.consts: | |
615 | if isinstance(elt, PyFlowGraph): | |
616 | elt = elt.getCode() | |
617 | l.append(elt) | |
618 | return tuple(l) | |
619 | ||
620 | def isJump(opname): | |
621 | if opname[:4] == 'JUMP': | |
622 | return 1 | |
623 | ||
624 | class TupleArg: | |
625 | """Helper for marking func defs with nested tuples in arglist""" | |
626 | def __init__(self, count, names): | |
627 | self.count = count | |
628 | self.names = names | |
629 | def __repr__(self): | |
630 | return "TupleArg(%s, %s)" % (self.count, self.names) | |
631 | def getName(self): | |
632 | return ".%d" % self.count | |
633 | ||
634 | def getArgCount(args): | |
635 | argcount = len(args) | |
636 | if args: | |
637 | for arg in args: | |
638 | if isinstance(arg, TupleArg): | |
639 | numNames = len(misc.flatten(arg.names)) | |
640 | argcount = argcount - numNames | |
641 | return argcount | |
642 | ||
643 | def twobyte(val): | |
644 | """Convert an int argument into high and low bytes""" | |
645 | assert type(val) == types.IntType | |
646 | return divmod(val, 256) | |
647 | ||
648 | class LineAddrTable: | |
649 | """lnotab | |
650 | ||
651 | This class builds the lnotab, which is documented in compile.c. | |
652 | Here's a brief recap: | |
653 | ||
654 | For each SET_LINENO instruction after the first one, two bytes are | |
655 | added to lnotab. (In some cases, multiple two-byte entries are | |
656 | added.) The first byte is the distance in bytes between the | |
657 | instruction for the last SET_LINENO and the current SET_LINENO. | |
658 | The second byte is offset in line numbers. If either offset is | |
659 | greater than 255, multiple two-byte entries are added -- see | |
660 | compile.c for the delicate details. | |
661 | """ | |
662 | ||
663 | def __init__(self): | |
664 | self.code = [] | |
665 | self.codeOffset = 0 | |
666 | self.firstline = 0 | |
667 | self.lastline = 0 | |
668 | self.lastoff = 0 | |
669 | self.lnotab = [] | |
670 | ||
671 | def addCode(self, *args): | |
672 | for arg in args: | |
673 | self.code.append(chr(arg)) | |
674 | self.codeOffset = self.codeOffset + len(args) | |
675 | ||
676 | def nextLine(self, lineno): | |
677 | if self.firstline == 0: | |
678 | self.firstline = lineno | |
679 | self.lastline = lineno | |
680 | else: | |
681 | # compute deltas | |
682 | addr = self.codeOffset - self.lastoff | |
683 | line = lineno - self.lastline | |
684 | # Python assumes that lineno always increases with | |
685 | # increasing bytecode address (lnotab is unsigned char). | |
686 | # Depending on when SET_LINENO instructions are emitted | |
687 | # this is not always true. Consider the code: | |
688 | # a = (1, | |
689 | # b) | |
690 | # In the bytecode stream, the assignment to "a" occurs | |
691 | # after the loading of "b". This works with the C Python | |
692 | # compiler because it only generates a SET_LINENO instruction | |
693 | # for the assignment. | |
694 | if line >= 0: | |
695 | push = self.lnotab.append | |
696 | while addr > 255: | |
697 | push(255); push(0) | |
698 | addr -= 255 | |
699 | while line > 255: | |
700 | push(addr); push(255) | |
701 | line -= 255 | |
702 | addr = 0 | |
703 | if addr > 0 or line > 0: | |
704 | push(addr); push(line) | |
705 | self.lastline = lineno | |
706 | self.lastoff = self.codeOffset | |
707 | ||
708 | def getCode(self): | |
709 | return ''.join(self.code) | |
710 | ||
711 | def getTable(self): | |
712 | return ''.join(map(chr, self.lnotab)) | |
713 | ||
714 | class StackDepthTracker: | |
715 | # XXX 1. need to keep track of stack depth on jumps | |
716 | # XXX 2. at least partly as a result, this code is broken | |
717 | ||
718 | def findDepth(self, insts, debug=0): | |
719 | depth = 0 | |
720 | maxDepth = 0 | |
721 | for i in insts: | |
722 | opname = i[0] | |
723 | if debug: | |
724 | print i, | |
725 | delta = self.effect.get(opname, None) | |
726 | if delta is not None: | |
727 | depth = depth + delta | |
728 | else: | |
729 | # now check patterns | |
730 | for pat, pat_delta in self.patterns: | |
731 | if opname[:len(pat)] == pat: | |
732 | delta = pat_delta | |
733 | depth = depth + delta | |
734 | break | |
735 | # if we still haven't found a match | |
736 | if delta is None: | |
737 | meth = getattr(self, opname, None) | |
738 | if meth is not None: | |
739 | depth = depth + meth(i[1]) | |
740 | if depth > maxDepth: | |
741 | maxDepth = depth | |
742 | if debug: | |
743 | print depth, maxDepth | |
744 | return maxDepth | |
745 | ||
746 | effect = { | |
747 | 'POP_TOP': -1, | |
748 | 'DUP_TOP': 1, | |
749 | 'SLICE+1': -1, | |
750 | 'SLICE+2': -1, | |
751 | 'SLICE+3': -2, | |
752 | 'STORE_SLICE+0': -1, | |
753 | 'STORE_SLICE+1': -2, | |
754 | 'STORE_SLICE+2': -2, | |
755 | 'STORE_SLICE+3': -3, | |
756 | 'DELETE_SLICE+0': -1, | |
757 | 'DELETE_SLICE+1': -2, | |
758 | 'DELETE_SLICE+2': -2, | |
759 | 'DELETE_SLICE+3': -3, | |
760 | 'STORE_SUBSCR': -3, | |
761 | 'DELETE_SUBSCR': -2, | |
762 | # PRINT_EXPR? | |
763 | 'PRINT_ITEM': -1, | |
764 | 'RETURN_VALUE': -1, | |
765 | 'YIELD_VALUE': -1, | |
766 | 'EXEC_STMT': -3, | |
767 | 'BUILD_CLASS': -2, | |
768 | 'STORE_NAME': -1, | |
769 | 'STORE_ATTR': -2, | |
770 | 'DELETE_ATTR': -1, | |
771 | 'STORE_GLOBAL': -1, | |
772 | 'BUILD_MAP': 1, | |
773 | 'COMPARE_OP': -1, | |
774 | 'STORE_FAST': -1, | |
775 | 'IMPORT_STAR': -1, | |
776 | 'IMPORT_NAME': 0, | |
777 | 'IMPORT_FROM': 1, | |
778 | 'LOAD_ATTR': 0, # unlike other loads | |
779 | # close enough... | |
780 | 'SETUP_EXCEPT': 3, | |
781 | 'SETUP_FINALLY': 3, | |
782 | 'FOR_ITER': 1, | |
783 | } | |
784 | # use pattern match | |
785 | patterns = [ | |
786 | ('BINARY_', -1), | |
787 | ('LOAD_', 1), | |
788 | ] | |
789 | ||
790 | def UNPACK_SEQUENCE(self, count): | |
791 | return count-1 | |
792 | def BUILD_TUPLE(self, count): | |
793 | return -count+1 | |
794 | def BUILD_LIST(self, count): | |
795 | return -count+1 | |
796 | def CALL_FUNCTION(self, argc): | |
797 | hi, lo = divmod(argc, 256) | |
798 | return -(lo + hi * 2) | |
799 | def CALL_FUNCTION_VAR(self, argc): | |
800 | return self.CALL_FUNCTION(argc)-1 | |
801 | def CALL_FUNCTION_KW(self, argc): | |
802 | return self.CALL_FUNCTION(argc)-1 | |
803 | def CALL_FUNCTION_VAR_KW(self, argc): | |
804 | return self.CALL_FUNCTION(argc)-2 | |
805 | def MAKE_FUNCTION(self, argc): | |
806 | return -argc | |
807 | def MAKE_CLOSURE(self, argc): | |
808 | # XXX need to account for free variables too! | |
809 | return -argc | |
810 | def BUILD_SLICE(self, argc): | |
811 | if argc == 2: | |
812 | return -1 | |
813 | elif argc == 3: | |
814 | return -2 | |
815 | def DUP_TOPX(self, argc): | |
816 | return argc | |
817 | ||
818 | findDepth = StackDepthTracker().findDepth |