Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | """Find modules used by a script, using introspection.""" |
2 | ||
3 | # This module should be kept compatible with Python 2.2, see PEP 291. | |
4 | ||
5 | import dis | |
6 | import imp | |
7 | import marshal | |
8 | import os | |
9 | import sys | |
10 | import new | |
11 | ||
12 | if hasattr(sys.__stdout__, "newlines"): | |
13 | READ_MODE = "U" # universal line endings | |
14 | else: | |
15 | # remain compatible with Python < 2.3 | |
16 | READ_MODE = "r" | |
17 | ||
18 | LOAD_CONST = dis.opname.index('LOAD_CONST') | |
19 | IMPORT_NAME = dis.opname.index('IMPORT_NAME') | |
20 | STORE_NAME = dis.opname.index('STORE_NAME') | |
21 | STORE_GLOBAL = dis.opname.index('STORE_GLOBAL') | |
22 | STORE_OPS = [STORE_NAME, STORE_GLOBAL] | |
23 | ||
24 | # Modulefinder does a good job at simulating Python's, but it can not | |
25 | # handle __path__ modifications packages make at runtime. Therefore there | |
26 | # is a mechanism whereby you can register extra paths in this map for a | |
27 | # package, and it will be honored. | |
28 | ||
29 | # Note this is a mapping is lists of paths. | |
30 | packagePathMap = {} | |
31 | ||
32 | # A Public interface | |
33 | def AddPackagePath(packagename, path): | |
34 | paths = packagePathMap.get(packagename, []) | |
35 | paths.append(path) | |
36 | packagePathMap[packagename] = paths | |
37 | ||
38 | replacePackageMap = {} | |
39 | ||
40 | # This ReplacePackage mechanism allows modulefinder to work around the | |
41 | # way the _xmlplus package injects itself under the name "xml" into | |
42 | # sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml") | |
43 | # before running ModuleFinder. | |
44 | ||
45 | def ReplacePackage(oldname, newname): | |
46 | replacePackageMap[oldname] = newname | |
47 | ||
48 | ||
49 | class Module: | |
50 | ||
51 | def __init__(self, name, file=None, path=None): | |
52 | self.__name__ = name | |
53 | self.__file__ = file | |
54 | self.__path__ = path | |
55 | self.__code__ = None | |
56 | # The set of global names that are assigned to in the module. | |
57 | # This includes those names imported through starimports of | |
58 | # Python modules. | |
59 | self.globalnames = {} | |
60 | # The set of starimports this module did that could not be | |
61 | # resolved, ie. a starimport from a non-Python module. | |
62 | self.starimports = {} | |
63 | ||
64 | def __repr__(self): | |
65 | s = "Module(%r" % (self.__name__,) | |
66 | if self.__file__ is not None: | |
67 | s = s + ", %r" % (self.__file__,) | |
68 | if self.__path__ is not None: | |
69 | s = s + ", %r" % (self.__path__,) | |
70 | s = s + ")" | |
71 | return s | |
72 | ||
73 | class ModuleFinder: | |
74 | ||
75 | def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]): | |
76 | if path is None: | |
77 | path = sys.path | |
78 | self.path = path | |
79 | self.modules = {} | |
80 | self.badmodules = {} | |
81 | self.debug = debug | |
82 | self.indent = 0 | |
83 | self.excludes = excludes | |
84 | self.replace_paths = replace_paths | |
85 | self.processed_paths = [] # Used in debugging only | |
86 | ||
87 | def msg(self, level, str, *args): | |
88 | if level <= self.debug: | |
89 | for i in range(self.indent): | |
90 | print " ", | |
91 | print str, | |
92 | for arg in args: | |
93 | print repr(arg), | |
94 | ||
95 | ||
96 | def msgin(self, *args): | |
97 | level = args[0] | |
98 | if level <= self.debug: | |
99 | self.indent = self.indent + 1 | |
100 | self.msg(*args) | |
101 | ||
102 | def msgout(self, *args): | |
103 | level = args[0] | |
104 | if level <= self.debug: | |
105 | self.indent = self.indent - 1 | |
106 | self.msg(*args) | |
107 | ||
108 | def run_script(self, pathname): | |
109 | self.msg(2, "run_script", pathname) | |
110 | fp = open(pathname, READ_MODE) | |
111 | stuff = ("", "r", imp.PY_SOURCE) | |
112 | self.load_module('__main__', fp, pathname, stuff) | |
113 | ||
114 | def load_file(self, pathname): | |
115 | dir, name = os.path.split(pathname) | |
116 | name, ext = os.path.splitext(name) | |
117 | fp = open(pathname, READ_MODE) | |
118 | stuff = (ext, "r", imp.PY_SOURCE) | |
119 | self.load_module(name, fp, pathname, stuff) | |
120 | ||
121 | def import_hook(self, name, caller=None, fromlist=None): | |
122 | self.msg(3, "import_hook", name, caller, fromlist) | |
123 | parent = self.determine_parent(caller) | |
124 | q, tail = self.find_head_package(parent, name) | |
125 | m = self.load_tail(q, tail) | |
126 | if not fromlist: | |
127 | return q | |
128 | if m.__path__: | |
129 | self.ensure_fromlist(m, fromlist) | |
130 | return None | |
131 | ||
132 | def determine_parent(self, caller): | |
133 | self.msgin(4, "determine_parent", caller) | |
134 | if not caller: | |
135 | self.msgout(4, "determine_parent -> None") | |
136 | return None | |
137 | pname = caller.__name__ | |
138 | if caller.__path__: | |
139 | parent = self.modules[pname] | |
140 | assert caller is parent | |
141 | self.msgout(4, "determine_parent ->", parent) | |
142 | return parent | |
143 | if '.' in pname: | |
144 | i = pname.rfind('.') | |
145 | pname = pname[:i] | |
146 | parent = self.modules[pname] | |
147 | assert parent.__name__ == pname | |
148 | self.msgout(4, "determine_parent ->", parent) | |
149 | return parent | |
150 | self.msgout(4, "determine_parent -> None") | |
151 | return None | |
152 | ||
153 | def find_head_package(self, parent, name): | |
154 | self.msgin(4, "find_head_package", parent, name) | |
155 | if '.' in name: | |
156 | i = name.find('.') | |
157 | head = name[:i] | |
158 | tail = name[i+1:] | |
159 | else: | |
160 | head = name | |
161 | tail = "" | |
162 | if parent: | |
163 | qname = "%s.%s" % (parent.__name__, head) | |
164 | else: | |
165 | qname = head | |
166 | q = self.import_module(head, qname, parent) | |
167 | if q: | |
168 | self.msgout(4, "find_head_package ->", (q, tail)) | |
169 | return q, tail | |
170 | if parent: | |
171 | qname = head | |
172 | parent = None | |
173 | q = self.import_module(head, qname, parent) | |
174 | if q: | |
175 | self.msgout(4, "find_head_package ->", (q, tail)) | |
176 | return q, tail | |
177 | self.msgout(4, "raise ImportError: No module named", qname) | |
178 | raise ImportError, "No module named " + qname | |
179 | ||
180 | def load_tail(self, q, tail): | |
181 | self.msgin(4, "load_tail", q, tail) | |
182 | m = q | |
183 | while tail: | |
184 | i = tail.find('.') | |
185 | if i < 0: i = len(tail) | |
186 | head, tail = tail[:i], tail[i+1:] | |
187 | mname = "%s.%s" % (m.__name__, head) | |
188 | m = self.import_module(head, mname, m) | |
189 | if not m: | |
190 | self.msgout(4, "raise ImportError: No module named", mname) | |
191 | raise ImportError, "No module named " + mname | |
192 | self.msgout(4, "load_tail ->", m) | |
193 | return m | |
194 | ||
195 | def ensure_fromlist(self, m, fromlist, recursive=0): | |
196 | self.msg(4, "ensure_fromlist", m, fromlist, recursive) | |
197 | for sub in fromlist: | |
198 | if sub == "*": | |
199 | if not recursive: | |
200 | all = self.find_all_submodules(m) | |
201 | if all: | |
202 | self.ensure_fromlist(m, all, 1) | |
203 | elif not hasattr(m, sub): | |
204 | subname = "%s.%s" % (m.__name__, sub) | |
205 | submod = self.import_module(sub, subname, m) | |
206 | if not submod: | |
207 | raise ImportError, "No module named " + subname | |
208 | ||
209 | def find_all_submodules(self, m): | |
210 | if not m.__path__: | |
211 | return | |
212 | modules = {} | |
213 | # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"]. | |
214 | # But we must also collect Python extension modules - although | |
215 | # we cannot separate normal dlls from Python extensions. | |
216 | suffixes = [] | |
217 | for triple in imp.get_suffixes(): | |
218 | suffixes.append(triple[0]) | |
219 | for dir in m.__path__: | |
220 | try: | |
221 | names = os.listdir(dir) | |
222 | except os.error: | |
223 | self.msg(2, "can't list directory", dir) | |
224 | continue | |
225 | for name in names: | |
226 | mod = None | |
227 | for suff in suffixes: | |
228 | n = len(suff) | |
229 | if name[-n:] == suff: | |
230 | mod = name[:-n] | |
231 | break | |
232 | if mod and mod != "__init__": | |
233 | modules[mod] = mod | |
234 | return modules.keys() | |
235 | ||
236 | def import_module(self, partname, fqname, parent): | |
237 | self.msgin(3, "import_module", partname, fqname, parent) | |
238 | try: | |
239 | m = self.modules[fqname] | |
240 | except KeyError: | |
241 | pass | |
242 | else: | |
243 | self.msgout(3, "import_module ->", m) | |
244 | return m | |
245 | if self.badmodules.has_key(fqname): | |
246 | self.msgout(3, "import_module -> None") | |
247 | return None | |
248 | if parent and parent.__path__ is None: | |
249 | self.msgout(3, "import_module -> None") | |
250 | return None | |
251 | try: | |
252 | fp, pathname, stuff = self.find_module(partname, | |
253 | parent and parent.__path__, parent) | |
254 | except ImportError: | |
255 | self.msgout(3, "import_module ->", None) | |
256 | return None | |
257 | try: | |
258 | m = self.load_module(fqname, fp, pathname, stuff) | |
259 | finally: | |
260 | if fp: fp.close() | |
261 | if parent: | |
262 | setattr(parent, partname, m) | |
263 | self.msgout(3, "import_module ->", m) | |
264 | return m | |
265 | ||
266 | def load_module(self, fqname, fp, pathname, (suffix, mode, type)): | |
267 | self.msgin(2, "load_module", fqname, fp and "fp", pathname) | |
268 | if type == imp.PKG_DIRECTORY: | |
269 | m = self.load_package(fqname, pathname) | |
270 | self.msgout(2, "load_module ->", m) | |
271 | return m | |
272 | if type == imp.PY_SOURCE: | |
273 | co = compile(fp.read()+'\n', pathname, 'exec') | |
274 | elif type == imp.PY_COMPILED: | |
275 | if fp.read(4) != imp.get_magic(): | |
276 | self.msgout(2, "raise ImportError: Bad magic number", pathname) | |
277 | raise ImportError, "Bad magic number in %s" % pathname | |
278 | fp.read(4) | |
279 | co = marshal.load(fp) | |
280 | else: | |
281 | co = None | |
282 | m = self.add_module(fqname) | |
283 | m.__file__ = pathname | |
284 | if co: | |
285 | if self.replace_paths: | |
286 | co = self.replace_paths_in_code(co) | |
287 | m.__code__ = co | |
288 | self.scan_code(co, m) | |
289 | self.msgout(2, "load_module ->", m) | |
290 | return m | |
291 | ||
292 | def _add_badmodule(self, name, caller): | |
293 | if name not in self.badmodules: | |
294 | self.badmodules[name] = {} | |
295 | self.badmodules[name][caller.__name__] = 1 | |
296 | ||
297 | def _safe_import_hook(self, name, caller, fromlist): | |
298 | # wrapper for self.import_hook() that won't raise ImportError | |
299 | if name in self.badmodules: | |
300 | self._add_badmodule(name, caller) | |
301 | return | |
302 | try: | |
303 | self.import_hook(name, caller) | |
304 | except ImportError, msg: | |
305 | self.msg(2, "ImportError:", str(msg)) | |
306 | self._add_badmodule(name, caller) | |
307 | else: | |
308 | if fromlist: | |
309 | for sub in fromlist: | |
310 | if sub in self.badmodules: | |
311 | self._add_badmodule(sub, caller) | |
312 | continue | |
313 | try: | |
314 | self.import_hook(name, caller, [sub]) | |
315 | except ImportError, msg: | |
316 | self.msg(2, "ImportError:", str(msg)) | |
317 | fullname = name + "." + sub | |
318 | self._add_badmodule(fullname, caller) | |
319 | ||
320 | def scan_code(self, co, m): | |
321 | code = co.co_code | |
322 | n = len(code) | |
323 | i = 0 | |
324 | fromlist = None | |
325 | while i < n: | |
326 | c = code[i] | |
327 | i = i+1 | |
328 | op = ord(c) | |
329 | if op >= dis.HAVE_ARGUMENT: | |
330 | oparg = ord(code[i]) + ord(code[i+1])*256 | |
331 | i = i+2 | |
332 | if op == LOAD_CONST: | |
333 | # An IMPORT_NAME is always preceded by a LOAD_CONST, it's | |
334 | # a tuple of "from" names, or None for a regular import. | |
335 | # The tuple may contain "*" for "from <mod> import *" | |
336 | fromlist = co.co_consts[oparg] | |
337 | elif op == IMPORT_NAME: | |
338 | assert fromlist is None or type(fromlist) is tuple | |
339 | name = co.co_names[oparg] | |
340 | have_star = 0 | |
341 | if fromlist is not None: | |
342 | if "*" in fromlist: | |
343 | have_star = 1 | |
344 | fromlist = [f for f in fromlist if f != "*"] | |
345 | self._safe_import_hook(name, m, fromlist) | |
346 | if have_star: | |
347 | # We've encountered an "import *". If it is a Python module, | |
348 | # the code has already been parsed and we can suck out the | |
349 | # global names. | |
350 | mm = None | |
351 | if m.__path__: | |
352 | # At this point we don't know whether 'name' is a | |
353 | # submodule of 'm' or a global module. Let's just try | |
354 | # the full name first. | |
355 | mm = self.modules.get(m.__name__ + "." + name) | |
356 | if mm is None: | |
357 | mm = self.modules.get(name) | |
358 | if mm is not None: | |
359 | m.globalnames.update(mm.globalnames) | |
360 | m.starimports.update(mm.starimports) | |
361 | if mm.__code__ is None: | |
362 | m.starimports[name] = 1 | |
363 | else: | |
364 | m.starimports[name] = 1 | |
365 | elif op in STORE_OPS: | |
366 | # keep track of all global names that are assigned to | |
367 | name = co.co_names[oparg] | |
368 | m.globalnames[name] = 1 | |
369 | for c in co.co_consts: | |
370 | if isinstance(c, type(co)): | |
371 | self.scan_code(c, m) | |
372 | ||
373 | def load_package(self, fqname, pathname): | |
374 | self.msgin(2, "load_package", fqname, pathname) | |
375 | newname = replacePackageMap.get(fqname) | |
376 | if newname: | |
377 | fqname = newname | |
378 | m = self.add_module(fqname) | |
379 | m.__file__ = pathname | |
380 | m.__path__ = [pathname] | |
381 | ||
382 | # As per comment at top of file, simulate runtime __path__ additions. | |
383 | m.__path__ = m.__path__ + packagePathMap.get(fqname, []) | |
384 | ||
385 | fp, buf, stuff = self.find_module("__init__", m.__path__) | |
386 | self.load_module(fqname, fp, buf, stuff) | |
387 | self.msgout(2, "load_package ->", m) | |
388 | return m | |
389 | ||
390 | def add_module(self, fqname): | |
391 | if self.modules.has_key(fqname): | |
392 | return self.modules[fqname] | |
393 | self.modules[fqname] = m = Module(fqname) | |
394 | return m | |
395 | ||
396 | def find_module(self, name, path, parent=None): | |
397 | if parent is not None: | |
398 | # assert path is not None | |
399 | fullname = parent.__name__+'.'+name | |
400 | else: | |
401 | fullname = name | |
402 | if fullname in self.excludes: | |
403 | self.msgout(3, "find_module -> Excluded", fullname) | |
404 | raise ImportError, name | |
405 | ||
406 | if path is None: | |
407 | if name in sys.builtin_module_names: | |
408 | return (None, None, ("", "", imp.C_BUILTIN)) | |
409 | ||
410 | path = self.path | |
411 | return imp.find_module(name, path) | |
412 | ||
413 | def report(self): | |
414 | """Print a report to stdout, listing the found modules with their | |
415 | paths, as well as modules that are missing, or seem to be missing. | |
416 | """ | |
417 | ||
418 | print " %-25s %s" % ("Name", "File") | |
419 | print " %-25s %s" % ("----", "----") | |
420 | # Print modules found | |
421 | keys = self.modules.keys() | |
422 | keys.sort() | |
423 | for key in keys: | |
424 | m = self.modules[key] | |
425 | if m.__path__: | |
426 | print "P", | |
427 | else: | |
428 | print "m", | |
429 | print "%-25s" % key, m.__file__ or "" | |
430 | ||
431 | # Print missing modules | |
432 | missing, maybe = self.any_missing_maybe() | |
433 | if missing: | |
434 | ||
435 | print "Missing modules:" | |
436 | for name in missing: | |
437 | mods = self.badmodules[name].keys() | |
438 | mods.sort() | |
439 | print "?", name, "imported from", ', '.join(mods) | |
440 | # Print modules that may be missing, but then again, maybe not... | |
441 | if maybe: | |
442 | ||
443 | print "Submodules thay appear to be missing, but could also be", | |
444 | print "global names in the parent package:" | |
445 | for name in maybe: | |
446 | mods = self.badmodules[name].keys() | |
447 | mods.sort() | |
448 | print "?", name, "imported from", ', '.join(mods) | |
449 | ||
450 | def any_missing(self): | |
451 | """Return a list of modules that appear to be missing. Use | |
452 | any_missing_maybe() if you want to know which modules are | |
453 | certain to be missing, and which *may* be missing. | |
454 | """ | |
455 | missing, maybe = self.any_missing_maybe() | |
456 | return missing + maybe | |
457 | ||
458 | def any_missing_maybe(self): | |
459 | """Return two lists, one with modules that are certainly missing | |
460 | and one with modules that *may* be missing. The latter names could | |
461 | either be submodules *or* just global names in the package. | |
462 | ||
463 | The reason it can't always be determined is that it's impossible to | |
464 | tell which names are imported when "from module import *" is done | |
465 | with an extension module, short of actually importing it. | |
466 | """ | |
467 | missing = [] | |
468 | maybe = [] | |
469 | for name in self.badmodules: | |
470 | if name in self.excludes: | |
471 | continue | |
472 | i = name.rfind(".") | |
473 | if i < 0: | |
474 | missing.append(name) | |
475 | continue | |
476 | subname = name[i+1:] | |
477 | pkgname = name[:i] | |
478 | pkg = self.modules.get(pkgname) | |
479 | if pkg is not None: | |
480 | if pkgname in self.badmodules[name]: | |
481 | # The package tried to import this module itself and | |
482 | # failed. It's definitely missing. | |
483 | missing.append(name) | |
484 | elif subname in pkg.globalnames: | |
485 | # It's a global in the package: definitely not missing. | |
486 | pass | |
487 | elif pkg.starimports: | |
488 | # It could be missing, but the package did an "import *" | |
489 | # from a non-Python module, so we simply can't be sure. | |
490 | maybe.append(name) | |
491 | else: | |
492 | # It's not a global in the package, the package didn't | |
493 | # do funny star imports, it's very likely to be missing. | |
494 | # The symbol could be inserted into the package from the | |
495 | # outside, but since that's not good style we simply list | |
496 | # it missing. | |
497 | missing.append(name) | |
498 | else: | |
499 | missing.append(name) | |
500 | missing.sort() | |
501 | maybe.sort() | |
502 | return missing, maybe | |
503 | ||
504 | def replace_paths_in_code(self, co): | |
505 | new_filename = original_filename = os.path.normpath(co.co_filename) | |
506 | for f, r in self.replace_paths: | |
507 | if original_filename.startswith(f): | |
508 | new_filename = r + original_filename[len(f):] | |
509 | break | |
510 | ||
511 | if self.debug and original_filename not in self.processed_paths: | |
512 | if new_filename != original_filename: | |
513 | self.msgout(2, "co_filename %r changed to %r" \ | |
514 | % (original_filename,new_filename,)) | |
515 | else: | |
516 | self.msgout(2, "co_filename %r remains unchanged" \ | |
517 | % (original_filename,)) | |
518 | self.processed_paths.append(original_filename) | |
519 | ||
520 | consts = list(co.co_consts) | |
521 | for i in range(len(consts)): | |
522 | if isinstance(consts[i], type(co)): | |
523 | consts[i] = self.replace_paths_in_code(consts[i]) | |
524 | ||
525 | return new.code(co.co_argcount, co.co_nlocals, co.co_stacksize, | |
526 | co.co_flags, co.co_code, tuple(consts), co.co_names, | |
527 | co.co_varnames, new_filename, co.co_name, | |
528 | co.co_firstlineno, co.co_lnotab, | |
529 | co.co_freevars, co.co_cellvars) | |
530 | ||
531 | ||
532 | def test(): | |
533 | # Parse command line | |
534 | import getopt | |
535 | try: | |
536 | opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") | |
537 | except getopt.error, msg: | |
538 | print msg | |
539 | return | |
540 | ||
541 | # Process options | |
542 | debug = 1 | |
543 | domods = 0 | |
544 | addpath = [] | |
545 | exclude = [] | |
546 | for o, a in opts: | |
547 | if o == '-d': | |
548 | debug = debug + 1 | |
549 | if o == '-m': | |
550 | domods = 1 | |
551 | if o == '-p': | |
552 | addpath = addpath + a.split(os.pathsep) | |
553 | if o == '-q': | |
554 | debug = 0 | |
555 | if o == '-x': | |
556 | exclude.append(a) | |
557 | ||
558 | # Provide default arguments | |
559 | if not args: | |
560 | script = "hello.py" | |
561 | else: | |
562 | script = args[0] | |
563 | ||
564 | # Set the path based on sys.path and the script directory | |
565 | path = sys.path[:] | |
566 | path[0] = os.path.dirname(script) | |
567 | path = addpath + path | |
568 | if debug > 1: | |
569 | print "path:" | |
570 | for item in path: | |
571 | print " ", repr(item) | |
572 | ||
573 | # Create the module finder and turn its crank | |
574 | mf = ModuleFinder(path, debug, exclude) | |
575 | for arg in args[1:]: | |
576 | if arg == '-m': | |
577 | domods = 1 | |
578 | continue | |
579 | if domods: | |
580 | if arg[-2:] == '.*': | |
581 | mf.import_hook(arg[:-2], None, ["*"]) | |
582 | else: | |
583 | mf.import_hook(arg) | |
584 | else: | |
585 | mf.load_file(arg) | |
586 | mf.run_script(script) | |
587 | mf.report() | |
588 | return mf # for -i debugging | |
589 | ||
590 | ||
591 | if __name__ == '__main__': | |
592 | try: | |
593 | mf = test() | |
594 | except KeyboardInterrupt: | |
595 | print "\n[interrupt]" |