Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / devtools / amd64 / lib / python2.4 / encodings / __init__.py
CommitLineData
920dae64
AT
1""" Standard "encodings" Package
2
3 Standard Python encoding modules are stored in this package
4 directory.
5
6 Codec modules must have names corresponding to normalized encoding
7 names as defined in the normalize_encoding() function below, e.g.
8 'utf-8' must be implemented by the module 'utf_8.py'.
9
10 Each codec module must export the following interface:
11
12 * getregentry() -> (encoder, decoder, stream_reader, stream_writer)
13 The getregentry() API must return callable objects which adhere to
14 the Python Codec Interface Standard.
15
16 In addition, a module may optionally also define the following
17 APIs which are then used by the package's codec search function:
18
19 * getaliases() -> sequence of encoding name strings to use as aliases
20
21 Alias names returned by getaliases() must be normalized encoding
22 names as defined by normalize_encoding().
23
24Written by Marc-Andre Lemburg (mal@lemburg.com).
25
26(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
27
28"""#"
29
30import codecs, exceptions, types, aliases
31
32_cache = {}
33_unknown = '--unknown--'
34_import_tail = ['*']
35_norm_encoding_map = (' . '
36 '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '
37 ' abcdefghijklmnopqrstuvwxyz '
38 ' '
39 ' '
40 ' ')
41_aliases = aliases.aliases
42
43class CodecRegistryError(exceptions.LookupError,
44 exceptions.SystemError):
45 pass
46
47def normalize_encoding(encoding):
48
49 """ Normalize an encoding name.
50
51 Normalization works as follows: all non-alphanumeric
52 characters except the dot used for Python package names are
53 collapsed and replaced with a single underscore, e.g. ' -;#'
54 becomes '_'. Leading and trailing underscores are removed.
55
56 Note that encoding names should be ASCII only; if they do use
57 non-ASCII characters, these must be Latin-1 compatible.
58
59 """
60 # Make sure we have an 8-bit string, because .translate() works
61 # differently for Unicode strings.
62 if type(encoding) is types.UnicodeType:
63 # Note that .encode('latin-1') does *not* use the codec
64 # registry, so this call doesn't recurse. (See unicodeobject.c
65 # PyUnicode_AsEncodedString() for details)
66 encoding = encoding.encode('latin-1')
67 return '_'.join(encoding.translate(_norm_encoding_map).split())
68
69def search_function(encoding):
70
71 # Cache lookup
72 entry = _cache.get(encoding, _unknown)
73 if entry is not _unknown:
74 return entry
75
76 # Import the module:
77 #
78 # First try to find an alias for the normalized encoding
79 # name and lookup the module using the aliased name, then try to
80 # lookup the module using the standard import scheme, i.e. first
81 # try in the encodings package, then at top-level.
82 #
83 norm_encoding = normalize_encoding(encoding)
84 aliased_encoding = _aliases.get(norm_encoding) or \
85 _aliases.get(norm_encoding.replace('.', '_'))
86 if aliased_encoding is not None:
87 modnames = [aliased_encoding,
88 norm_encoding]
89 else:
90 modnames = [norm_encoding]
91 for modname in modnames:
92 if not modname:
93 continue
94 try:
95 mod = __import__(modname,
96 globals(), locals(), _import_tail)
97 except ImportError:
98 pass
99 else:
100 break
101 else:
102 mod = None
103
104 try:
105 getregentry = mod.getregentry
106 except AttributeError:
107 # Not a codec module
108 mod = None
109
110 if mod is None:
111 # Cache misses
112 _cache[encoding] = None
113 return None
114
115 # Now ask the module for the registry entry
116 entry = tuple(getregentry())
117 if len(entry) != 4:
118 raise CodecRegistryError,\
119 'module "%s" (%s) failed to register' % \
120 (mod.__name__, mod.__file__)
121 for obj in entry:
122 if not callable(obj):
123 raise CodecRegistryError,\
124 'incompatible codecs in module "%s" (%s)' % \
125 (mod.__name__, mod.__file__)
126
127 # Cache the codec registry entry
128 _cache[encoding] = entry
129
130 # Register its aliases (without overwriting previously registered
131 # aliases)
132 try:
133 codecaliases = mod.getaliases()
134 except AttributeError:
135 pass
136 else:
137 for alias in codecaliases:
138 if not _aliases.has_key(alias):
139 _aliases[alias] = modname
140
141 # Return the registry entry
142 return entry
143
144# Register the search_function in the Python codec registry
145codecs.register(search_function)