[OpenSPARC-T2-SAM] / sam-t2 / devtools / amd64 / lib / python2.4 / encodings / __init__.py

""" Standard "encodings" Package

    Standard Python encoding modules are stored in this package
    directory.

    Codec modules must have names corresponding to normalized encoding
    names as defined in the normalize_encoding() function below, e.g.
    'utf-8' must be implemented by the module 'utf_8.py'.

    Each codec module must export the following interface:

    * getregentry() -> (encoder, decoder, stream_reader, stream_writer)
    The getregentry() API must return callable objects which adhere to
    the Python Codec Interface Standard.

    In addition, a module may optionally also define the following
    APIs which are then used by the package's codec search function:

    * getaliases() -> sequence of encoding name strings to use as aliases

    Alias names returned by getaliases() must be normalized encoding
    names as defined by normalize_encoding().

Written by Marc-Andre Lemburg (mal@lemburg.com).

(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.

"""#"

import codecs, exceptions, types, aliases

_cache = {}
_unknown = '--unknown--'
_import_tail = ['*']
_norm_encoding_map = ('                                              . '
                      '0123456789       ABCDEFGHIJKLMNOPQRSTUVWXYZ     '
                      ' abcdefghijklmnopqrstuvwxyz                     '
                      '                                                '
                      '                                                '
                      '                ')
_aliases = aliases.aliases

class CodecRegistryError(exceptions.LookupError,
                         exceptions.SystemError):
    pass

def normalize_encoding(encoding):

    """ Normalize an encoding name.

        Normalization works as follows: all non-alphanumeric
        characters except the dot used for Python package names are
        collapsed and replaced with a single underscore, e.g. '  -;#'
        becomes '_'. Leading and trailing underscores are removed.

        Note that encoding names should be ASCII only; if they do use
        non-ASCII characters, these must be Latin-1 compatible.

    """
    # Make sure we have an 8-bit string, because .translate() works
    # differently for Unicode strings.
    if type(encoding) is types.UnicodeType:
        # Note that .encode('latin-1') does *not* use the codec
        # registry, so this call doesn't recurse. (See unicodeobject.c
        # PyUnicode_AsEncodedString() for details)
        encoding = encoding.encode('latin-1')
    return '_'.join(encoding.translate(_norm_encoding_map).split())

def search_function(encoding):

    # Cache lookup
    entry = _cache.get(encoding, _unknown)
    if entry is not _unknown:
        return entry

    # Import the module:
    #
    # First try to find an alias for the normalized encoding
    # name and lookup the module using the aliased name, then try to
    # lookup the module using the standard import scheme, i.e. first
    # try in the encodings package, then at top-level.
    #
    norm_encoding = normalize_encoding(encoding)
    aliased_encoding = _aliases.get(norm_encoding) or \
                       _aliases.get(norm_encoding.replace('.', '_'))
    if aliased_encoding is not None:
        modnames = [aliased_encoding,
                    norm_encoding]
    else:
        modnames = [norm_encoding]
    for modname in modnames:
        if not modname:
            continue
        try:
            mod = __import__(modname,
                             globals(), locals(), _import_tail)
        except ImportError:
            pass
        else:
            break
    else:
        mod = None

    try:
        getregentry = mod.getregentry
    except AttributeError:
        # Not a codec module
        mod = None

    if mod is None:
        # Cache misses
        _cache[encoding] = None
        return None

    # Now ask the module for the registry entry
    entry = tuple(getregentry())
    if len(entry) != 4:
        raise CodecRegistryError,\
              'module "%s" (%s) failed to register' % \
              (mod.__name__, mod.__file__)
    for obj in entry:
        if not callable(obj):
            raise CodecRegistryError,\
                  'incompatible codecs in module "%s" (%s)' % \
                  (mod.__name__, mod.__file__)

    # Cache the codec registry entry
    _cache[encoding] = entry

    # Register its aliases (without overwriting previously registered
    # aliases)
    try:
        codecaliases = mod.getaliases()
    except AttributeError:
        pass
    else:
        for alias in codecaliases:
            if not _aliases.has_key(alias):
                _aliases[alias] = modname

    # Return the registry entry
    return entry

# Register the search_function in the Python codec registry
codecs.register(search_function)
Commit	Line	Data
920dae64 AT	1	""" Standard "encodings" Package
	2
	3	Standard Python encoding modules are stored in this package
	4	directory.
	5
	6	Codec modules must have names corresponding to normalized encoding
	7	names as defined in the normalize_encoding() function below, e.g.
	8	'utf-8' must be implemented by the module 'utf_8.py'.
	9
	10	Each codec module must export the following interface:
	11
	12	* getregentry() -> (encoder, decoder, stream_reader, stream_writer)
	13	The getregentry() API must return callable objects which adhere to
	14	the Python Codec Interface Standard.
	15
	16	In addition, a module may optionally also define the following
	17	APIs which are then used by the package's codec search function:
	18
	19	* getaliases() -> sequence of encoding name strings to use as aliases
	20
	21	Alias names returned by getaliases() must be normalized encoding
	22	names as defined by normalize_encoding().
	23
	24	Written by Marc-Andre Lemburg (mal@lemburg.com).
	25
	26	(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
	27
	28	"""#"
	29
	30	import codecs, exceptions, types, aliases
	31
	32	_cache = {}
	33	_unknown = '--unknown--'
	34	_import_tail = ['*']
	35	_norm_encoding_map = (' . '
	36	'0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '
	37	' abcdefghijklmnopqrstuvwxyz '
	38	' '
	39	' '
	40	' ')
	41	_aliases = aliases.aliases
	42
	43	class CodecRegistryError(exceptions.LookupError,
	44	exceptions.SystemError):
	45	pass
	46
	47	def normalize_encoding(encoding):
	48
	49	""" Normalize an encoding name.
	50
	51	Normalization works as follows: all non-alphanumeric
	52	characters except the dot used for Python package names are
	53	collapsed and replaced with a single underscore, e.g. ' -;#'
	54	becomes '_'. Leading and trailing underscores are removed.
	55
	56	Note that encoding names should be ASCII only; if they do use
	57	non-ASCII characters, these must be Latin-1 compatible.
	58
	59	"""
	60	# Make sure we have an 8-bit string, because .translate() works
	61	# differently for Unicode strings.
	62	if type(encoding) is types.UnicodeType:
	63	# Note that .encode('latin-1') does not use the codec
	64	# registry, so this call doesn't recurse. (See unicodeobject.c
65	# PyUnicode_AsEncodedString() for details)
66	encoding = encoding.encode('latin-1')
67	return '_'.join(encoding.translate(_norm_encoding_map).split())
68
69	def search_function(encoding):
70
71	# Cache lookup
72	entry = _cache.get(encoding, _unknown)
73	if entry is not _unknown:
74	return entry
75
76	# Import the module:
77	#
78	# First try to find an alias for the normalized encoding
79	# name and lookup the module using the aliased name, then try to
80	# lookup the module using the standard import scheme, i.e. first
81	# try in the encodings package, then at top-level.
82	#
83	norm_encoding = normalize_encoding(encoding)
84	aliased_encoding = _aliases.get(norm_encoding) or \
85	_aliases.get(norm_encoding.replace('.', '_'))
86	if aliased_encoding is not None:
87	modnames = [aliased_encoding,
88	norm_encoding]
89	else:
90	modnames = [norm_encoding]
91	for modname in modnames:
92	if not modname:
93	continue
94	try:
95	mod = __import__(modname,
96	globals(), locals(), _import_tail)
97	except ImportError:
98	pass
99	else:
100	break
101	else:
102	mod = None
103
104	try:
105	getregentry = mod.getregentry
106	except AttributeError:
107	# Not a codec module
108	mod = None
109
110	if mod is None:
111	# Cache misses
112	_cache[encoding] = None
113	return None
114
115	# Now ask the module for the registry entry
116	entry = tuple(getregentry())
117	if len(entry) != 4:
118	raise CodecRegistryError,\
119	'module "%s" (%s) failed to register' % \
120	(mod.__name__, mod.__file__)
121	for obj in entry:
122	if not callable(obj):
123	raise CodecRegistryError,\
124	'incompatible codecs in module "%s" (%s)' % \
125	(mod.__name__, mod.__file__)
126
127	# Cache the codec registry entry
128	_cache[encoding] = entry
129
130	# Register its aliases (without overwriting previously registered
131	# aliases)
132	try:
133	codecaliases = mod.getaliases()
134	except AttributeError:
135	pass
136	else:
137	for alias in codecaliases:
138	if not _aliases.has_key(alias):
139	_aliases[alias] = modname
140
141	# Return the registry entry
142	return entry
143
144	# Register the search_function in the Python codec registry
145	codecs.register(search_function)