[OpenSPARC-T2-SAM] / sam-t2 / devtools / v8plus / lib / python2.4 / gettext.py

"""Internationalization and localization support.

This module provides internationalization (I18N) and localization (L10N)
support for your Python programs by providing an interface to the GNU gettext
message catalog library.

I18N refers to the operation by which a program is made aware of multiple
languages.  L10N refers to the adaptation of your program, once
internationalized, to the local language and cultural habits.

"""

# This module represents the integration of work, contributions, feedback, and
# suggestions from the following people:
#
# Martin von Loewis, who wrote the initial implementation of the underlying
# C-based libintlmodule (later renamed _gettext), along with a skeletal
# gettext.py implementation.
#
# Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule,
# which also included a pure-Python implementation to read .mo files if
# intlmodule wasn't available.
#
# James Henstridge, who also wrote a gettext.py module, which has some
# interesting, but currently unsupported experimental features: the notion of
# a Catalog class and instances, and the ability to add to a catalog file via
# a Python API.
#
# Barry Warsaw integrated these modules, wrote the .install() API and code,
# and conformed all C and Python code to Python's coding standards.
#
# Francois Pinard and Marc-Andre Lemburg also contributed valuably to this
# module.
#
# J. David Ibanez implemented plural forms. Bruno Haible fixed some bugs.
#
# TODO:
# - Lazy loading of .mo files.  Currently the entire catalog is loaded into
#   memory, but that's probably bad for large translated programs.  Instead,
#   the lexical sort of original strings in GNU .mo files should be exploited
#   to do binary searches and lazy initializations.  Or you might want to use
#   the undocumented double-hash algorithm for .mo files with hash tables, but
#   you'll need to study the GNU gettext code to do this.
#
# - Support Solaris .mo file formats.  Unfortunately, we've been unable to
#   find this format documented anywhere.


import locale, copy, os, re, struct, sys
from errno import ENOENT


__all__ = ['NullTranslations', 'GNUTranslations', 'Catalog',
           'find', 'translation', 'install', 'textdomain', 'bindtextdomain',
           'dgettext', 'dngettext', 'gettext', 'ngettext',
           ]

_default_localedir = os.path.join(sys.prefix, 'share', 'locale')


def test(condition, true, false):
    """
    Implements the C expression:

      condition ? true : false

    Required to correctly interpret plural forms.
    """
    if condition:
        return true
    else:
        return false


def c2py(plural):
    """Gets a C expression as used in PO files for plural forms and returns a
    Python lambda function that implements an equivalent expression.
    """
    # Security check, allow only the "n" identifier
    from StringIO import StringIO
    import token, tokenize
    tokens = tokenize.generate_tokens(StringIO(plural).readline)
    try:
        danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']
    except tokenize.TokenError:
        raise ValueError, \
              'plural forms expression error, maybe unbalanced parenthesis'
    else:
        if danger:
            raise ValueError, 'plural forms expression could be dangerous'

    # Replace some C operators by their Python equivalents
    plural = plural.replace('&&', ' and ')
    plural = plural.replace('||', ' or ')

    expr = re.compile(r'\!([^=])')
    plural = expr.sub(' not \\1', plural)

    # Regular expression and replacement function used to transform
    # "a?b:c" to "test(a,b,c)".
    expr = re.compile(r'(.*?)\?(.*?):(.*)')
    def repl(x):
        return "test(%s, %s, %s)" % (x.group(1), x.group(2),
                                     expr.sub(repl, x.group(3)))

    # Code to transform the plural expression, taking care of parentheses
    stack = ['']
    for c in plural:
        if c == '(':
            stack.append('')
        elif c == ')':
            if len(stack) == 1:
                # Actually, we never reach this code, because unbalanced
                # parentheses get caught in the security check at the
                # beginning.
                raise ValueError, 'unbalanced parenthesis in plural form'
            s = expr.sub(repl, stack.pop())
            stack[-1] += '(%s)' % s
        else:
            stack[-1] += c
    plural = expr.sub(repl, stack.pop())

    return eval('lambda n: int(%s)' % plural)


def _expand_lang(locale):
    from locale import normalize
    locale = normalize(locale)
    COMPONENT_CODESET   = 1 << 0
    COMPONENT_TERRITORY = 1 << 1
    COMPONENT_MODIFIER  = 1 << 2
    # split up the locale into its base components
    mask = 0
    pos = locale.find('@')
    if pos >= 0:
        modifier = locale[pos:]
        locale = locale[:pos]
        mask |= COMPONENT_MODIFIER
    else:
        modifier = ''
    pos = locale.find('.')
    if pos >= 0:
        codeset = locale[pos:]
        locale = locale[:pos]
        mask |= COMPONENT_CODESET
    else:
        codeset = ''
    pos = locale.find('_')
    if pos >= 0:
        territory = locale[pos:]
        locale = locale[:pos]
        mask |= COMPONENT_TERRITORY
    else:
        territory = ''
    language = locale
    ret = []
    for i in range(mask+1):
        if not (i & ~mask):  # if all components for this combo exist ...
            val = language
            if i & COMPONENT_TERRITORY: val += territory
            if i & COMPONENT_CODESET:   val += codeset
            if i & COMPONENT_MODIFIER:  val += modifier
            ret.append(val)
    ret.reverse()
    return ret


class NullTranslations:
    def __init__(self, fp=None):
        self._info = {}
        self._charset = None
        self._output_charset = None
        self._fallback = None
        if fp is not None:
            self._parse(fp)

    def _parse(self, fp):
        pass

    def add_fallback(self, fallback):
        if self._fallback:
            self._fallback.add_fallback(fallback)
        else:
            self._fallback = fallback

    def gettext(self, message):
        if self._fallback:
            return self._fallback.gettext(message)
        return message

    def lgettext(self, message):
        if self._fallback:
            return self._fallback.lgettext(message)
        return message

    def ngettext(self, msgid1, msgid2, n):
        if self._fallback:
            return self._fallback.ngettext(msgid1, msgid2, n)
        if n == 1:
            return msgid1
        else:
            return msgid2

    def lngettext(self, msgid1, msgid2, n):
        if self._fallback:
            return self._fallback.lngettext(msgid1, msgid2, n)
        if n == 1:
            return msgid1
        else:
            return msgid2

    def ugettext(self, message):
        if self._fallback:
            return self._fallback.ugettext(message)
        return unicode(message)

    def ungettext(self, msgid1, msgid2, n):
        if self._fallback:
            return self._fallback.ungettext(msgid1, msgid2, n)
        if n == 1:
            return unicode(msgid1)
        else:
            return unicode(msgid2)

    def info(self):
        return self._info

    def charset(self):
        return self._charset

    def output_charset(self):
        return self._output_charset

    def set_output_charset(self, charset):
        self._output_charset = charset

    def install(self, unicode=False):
        import __builtin__
        __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext


class GNUTranslations(NullTranslations):
    # Magic number of .mo files
    LE_MAGIC = 0x950412deL
    BE_MAGIC = 0xde120495L

    def _parse(self, fp):
        """Override this method to support alternative .mo formats."""
        unpack = struct.unpack
        filename = getattr(fp, 'name', '')
        # Parse the .mo file header, which consists of 5 little endian 32
        # bit words.
        self._catalog = catalog = {}
        self.plural = lambda n: int(n != 1) # germanic plural by default
        buf = fp.read()
        buflen = len(buf)
        # Are we big endian or little endian?
        magic = unpack('<I', buf[:4])[0]
        if magic == self.LE_MAGIC:
            version, msgcount, masteridx, transidx = unpack('<4I', buf[4:20])
            ii = '<II'
        elif magic == self.BE_MAGIC:
            version, msgcount, masteridx, transidx = unpack('>4I', buf[4:20])
            ii = '>II'
        else:
            raise IOError(0, 'Bad magic number', filename)
        # Now put all messages from the .mo file buffer into the catalog
        # dictionary.
        for i in xrange(0, msgcount):
            mlen, moff = unpack(ii, buf[masteridx:masteridx+8])
            mend = moff + mlen
            tlen, toff = unpack(ii, buf[transidx:transidx+8])
            tend = toff + tlen
            if mend < buflen and tend < buflen:
                msg = buf[moff:mend]
                tmsg = buf[toff:tend]
            else:
                raise IOError(0, 'File is corrupt', filename)
            # See if we're looking at GNU .mo conventions for metadata
            if mlen == 0:
                # Catalog description
                lastk = k = None
                for item in tmsg.splitlines():
                    item = item.strip()
                    if not item:
                        continue
                    if ':' in item:
                        k, v = item.split(':', 1)
                        k = k.strip().lower()
                        v = v.strip()
                        self._info[k] = v
                        lastk = k
                    elif lastk:
                        self._info[lastk] += '\n' + item
                    if k == 'content-type':
                        self._charset = v.split('charset=')[1]
                    elif k == 'plural-forms':
                        v = v.split(';')
                        plural = v[1].split('plural=')[1]
                        self.plural = c2py(plural)
            # Note: we unconditionally convert both msgids and msgstrs to
            # Unicode using the character encoding specified in the charset
            # parameter of the Content-Type header.  The gettext documentation
            # strongly encourages msgids to be us-ascii, but some appliations
            # require alternative encodings (e.g. Zope's ZCML and ZPT).  For
            # traditional gettext applications, the msgid conversion will
            # cause no problems since us-ascii should always be a subset of
            # the charset encoding.  We may want to fall back to 8-bit msgids
            # if the Unicode conversion fails.
            if '\x00' in msg:
                # Plural forms
                msgid1, msgid2 = msg.split('\x00')
                tmsg = tmsg.split('\x00')
                if self._charset:
                    msgid1 = unicode(msgid1, self._charset)
                    tmsg = [unicode(x, self._charset) for x in tmsg]
                for i in range(len(tmsg)):
                    catalog[(msgid1, i)] = tmsg[i]
            else:
                if self._charset:
                    msg = unicode(msg, self._charset)
                    tmsg = unicode(tmsg, self._charset)
                catalog[msg] = tmsg
            # advance to next entry in the seek tables
            masteridx += 8
            transidx += 8

    def gettext(self, message):
        missing = object()
        tmsg = self._catalog.get(message, missing)
        if tmsg is missing:
            if self._fallback:
                return self._fallback.gettext(message)
            return message
        # Encode the Unicode tmsg back to an 8-bit string, if possible
        if self._output_charset:
            return tmsg.encode(self._output_charset)
        elif self._charset:
            return tmsg.encode(self._charset)
        return tmsg

    def lgettext(self, message):
        missing = object()
        tmsg = self._catalog.get(message, missing)
        if tmsg is missing:
            if self._fallback:
                return self._fallback.lgettext(message)
            return message
        if self._output_charset:
            return tmsg.encode(self._output_charset)
        return tmsg.encode(locale.getpreferredencoding())

    def ngettext(self, msgid1, msgid2, n):
        try:
            tmsg = self._catalog[(msgid1, self.plural(n))]
            if self._output_charset:
                return tmsg.encode(self._output_charset)
            elif self._charset:
                return tmsg.encode(self._charset)
            return tmsg
        except KeyError:
            if self._fallback:
                return self._fallback.ngettext(msgid1, msgid2, n)
            if n == 1:
                return msgid1
            else:
                return msgid2

    def lngettext(self, msgid1, msgid2, n):
        try:
            tmsg = self._catalog[(msgid1, self.plural(n))]
            if self._output_charset:
                return tmsg.encode(self._output_charset)
            return tmsg.encode(locale.getpreferredencoding())
        except KeyError:
            if self._fallback:
                return self._fallback.lngettext(msgid1, msgid2, n)
            if n == 1:
                return msgid1
            else:
                return msgid2

    def ugettext(self, message):
        missing = object()
        tmsg = self._catalog.get(message, missing)
        if tmsg is missing:
            if self._fallback:
                return self._fallback.ugettext(message)
            return unicode(message)
        return tmsg

    def ungettext(self, msgid1, msgid2, n):
        try:
            tmsg = self._catalog[(msgid1, self.plural(n))]
        except KeyError:
            if self._fallback:
                return self._fallback.ungettext(msgid1, msgid2, n)
            if n == 1:
                tmsg = unicode(msgid1)
            else:
                tmsg = unicode(msgid2)
        return tmsg


# Locate a .mo file using the gettext strategy
def find(domain, localedir=None, languages=None, all=0):
    # Get some reasonable defaults for arguments that were not supplied
    if localedir is None:
        localedir = _default_localedir
    if languages is None:
        languages = []
        for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
            val = os.environ.get(envar)
            if val:
                languages = val.split(':')
                break
        if 'C' not in languages:
            languages.append('C')
    # now normalize and expand the languages
    nelangs = []
    for lang in languages:
        for nelang in _expand_lang(lang):
            if nelang not in nelangs:
                nelangs.append(nelang)
    # select a language
    if all:
        result = []
    else:
        result = None
    for lang in nelangs:
        if lang == 'C':
            break
        mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
        if os.path.exists(mofile):
            if all:
                result.append(mofile)
            else:
                return mofile
    return result


# a mapping between absolute .mo file path and Translation object
_translations = {}

def translation(domain, localedir=None, languages=None,
                class_=None, fallback=False, codeset=None):
    if class_ is None:
        class_ = GNUTranslations
    mofiles = find(domain, localedir, languages, all=1)
    if not mofiles:
        if fallback:
            return NullTranslations()
        raise IOError(ENOENT, 'No translation file found for domain', domain)
    # TBD: do we need to worry about the file pointer getting collected?
    # Avoid opening, reading, and parsing the .mo file after it's been done
    # once.
    result = None
    for mofile in mofiles:
        key = os.path.abspath(mofile)
        t = _translations.get(key)
        if t is None:
            t = _translations.setdefault(key, class_(open(mofile, 'rb')))
        # Copy the translation object to allow setting fallbacks and
        # output charset. All other instance data is shared with the
        # cached object.
        t = copy.copy(t)
        if codeset:
            t.set_output_charset(codeset)
        if result is None:
            result = t
        else:
            result.add_fallback(t)
    return result


def install(domain, localedir=None, unicode=False, codeset=None):
    t = translation(domain, localedir, fallback=True, codeset=codeset)
    t.install(unicode)


# a mapping b/w domains and locale directories
_localedirs = {}
# a mapping b/w domains and codesets
_localecodesets = {}
# current global domain, `messages' used for compatibility w/ GNU gettext
_current_domain = 'messages'


def textdomain(domain=None):
    global _current_domain
    if domain is not None:
        _current_domain = domain
    return _current_domain


def bindtextdomain(domain, localedir=None):
    global _localedirs
    if localedir is not None:
        _localedirs[domain] = localedir
    return _localedirs.get(domain, _default_localedir)


def bind_textdomain_codeset(domain, codeset=None):
    global _localecodesets
    if codeset is not None:
        _localecodesets[domain] = codeset
    return _localecodesets.get(domain)


def dgettext(domain, message):
    try:
        t = translation(domain, _localedirs.get(domain, None),
                        codeset=_localecodesets.get(domain))
    except IOError:
        return message
    return t.gettext(message)

def ldgettext(domain, message):
    try:
        t = translation(domain, _localedirs.get(domain, None),
                        codeset=_localecodesets.get(domain))
    except IOError:
        return message
    return t.lgettext(message)

def dngettext(domain, msgid1, msgid2, n):
    try:
        t = translation(domain, _localedirs.get(domain, None),
                        codeset=_localecodesets.get(domain))
    except IOError:
        if n == 1:
            return msgid1
        else:
            return msgid2
    return t.ngettext(msgid1, msgid2, n)

def ldngettext(domain, msgid1, msgid2, n):
    try:
        t = translation(domain, _localedirs.get(domain, None),
                        codeset=_localecodesets.get(domain))
    except IOError:
        if n == 1:
            return msgid1
        else:
            return msgid2
    return t.lngettext(msgid1, msgid2, n)

def gettext(message):
    return dgettext(_current_domain, message)

def lgettext(message):
    return ldgettext(_current_domain, message)

def ngettext(msgid1, msgid2, n):
    return dngettext(_current_domain, msgid1, msgid2, n)

def lngettext(msgid1, msgid2, n):
    return ldngettext(_current_domain, msgid1, msgid2, n)

# dcgettext() has been deemed unnecessary and is not implemented.

# James Henstridge's Catalog constructor from GNOME gettext.  Documented usage
# was:
#
#    import gettext
#    cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR)
#    _ = cat.gettext
#    print _('Hello World')

# The resulting catalog object currently don't support access through a
# dictionary API, which was supported (but apparently unused) in GNOME
# gettext.

Catalog = translation
Commit	Line	Data
920dae64 AT	1	"""Internationalization and localization support.
	2
	3	This module provides internationalization (I18N) and localization (L10N)
	4	support for your Python programs by providing an interface to the GNU gettext
	5	message catalog library.
	6
	7	I18N refers to the operation by which a program is made aware of multiple
	8	languages. L10N refers to the adaptation of your program, once
	9	internationalized, to the local language and cultural habits.
	10
	11	"""
	12
	13	# This module represents the integration of work, contributions, feedback, and
	14	# suggestions from the following people:
	15	#
	16	# Martin von Loewis, who wrote the initial implementation of the underlying
	17	# C-based libintlmodule (later renamed _gettext), along with a skeletal
	18	# gettext.py implementation.
	19	#
	20	# Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule,
	21	# which also included a pure-Python implementation to read .mo files if
	22	# intlmodule wasn't available.
	23	#
	24	# James Henstridge, who also wrote a gettext.py module, which has some
	25	# interesting, but currently unsupported experimental features: the notion of
	26	# a Catalog class and instances, and the ability to add to a catalog file via
	27	# a Python API.
	28	#
	29	# Barry Warsaw integrated these modules, wrote the .install() API and code,
	30	# and conformed all C and Python code to Python's coding standards.
	31	#
	32	# Francois Pinard and Marc-Andre Lemburg also contributed valuably to this
	33	# module.
	34	#
	35	# J. David Ibanez implemented plural forms. Bruno Haible fixed some bugs.
	36	#
	37	# TODO:
	38	# - Lazy loading of .mo files. Currently the entire catalog is loaded into
	39	# memory, but that's probably bad for large translated programs. Instead,
	40	# the lexical sort of original strings in GNU .mo files should be exploited
	41	# to do binary searches and lazy initializations. Or you might want to use
	42	# the undocumented double-hash algorithm for .mo files with hash tables, but
	43	# you'll need to study the GNU gettext code to do this.
	44	#
	45	# - Support Solaris .mo file formats. Unfortunately, we've been unable to
	46	# find this format documented anywhere.
	47
	48
	49	import locale, copy, os, re, struct, sys
	50	from errno import ENOENT
	51
	52
	53	__all__ = ['NullTranslations', 'GNUTranslations', 'Catalog',
	54	'find', 'translation', 'install', 'textdomain', 'bindtextdomain',
	55	'dgettext', 'dngettext', 'gettext', 'ngettext',
	56	]
	57
	58	_default_localedir = os.path.join(sys.prefix, 'share', 'locale')
	59
	60
	61	def test(condition, true, false):
	62	"""
	63	Implements the C expression:
	64
65	condition ? true : false
66
67	Required to correctly interpret plural forms.
68	"""
69	if condition:
70	return true
71	else:
72	return false
73
74
75	def c2py(plural):
76	"""Gets a C expression as used in PO files for plural forms and returns a
77	Python lambda function that implements an equivalent expression.
78	"""
79	# Security check, allow only the "n" identifier
80	from StringIO import StringIO
81	import token, tokenize
82	tokens = tokenize.generate_tokens(StringIO(plural).readline)
83	try:
84	danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']
85	except tokenize.TokenError:
86	raise ValueError, \
87	'plural forms expression error, maybe unbalanced parenthesis'
88	else:
89	if danger:
90	raise ValueError, 'plural forms expression could be dangerous'
91
92	# Replace some C operators by their Python equivalents
93	plural = plural.replace('&&', ' and ')
94	plural = plural.replace('\|\|', ' or ')
95
96	expr = re.compile(r'\!([^=])')
97	plural = expr.sub(' not \\1', plural)
98
99	# Regular expression and replacement function used to transform
100	# "a?b:c" to "test(a,b,c)".
101	expr = re.compile(r'(.?)\?(.?):(.*)')
102	def repl(x):
103	return "test(%s, %s, %s)" % (x.group(1), x.group(2),
104	expr.sub(repl, x.group(3)))
105
106	# Code to transform the plural expression, taking care of parentheses
107	stack = ['']
108	for c in plural:
109	if c == '(':
110	stack.append('')
111	elif c == ')':
112	if len(stack) == 1:
113	# Actually, we never reach this code, because unbalanced
114	# parentheses get caught in the security check at the
115	# beginning.
116	raise ValueError, 'unbalanced parenthesis in plural form'
117	s = expr.sub(repl, stack.pop())
118	stack[-1] += '(%s)' % s
119	else:
120	stack[-1] += c
121	plural = expr.sub(repl, stack.pop())
122
123	return eval('lambda n: int(%s)' % plural)
124
125
126
127	def _expand_lang(locale):
128	from locale import normalize
129	locale = normalize(locale)
130	COMPONENT_CODESET = 1 << 0
131	COMPONENT_TERRITORY = 1 << 1
132	COMPONENT_MODIFIER = 1 << 2
133	# split up the locale into its base components
134	mask = 0
135	pos = locale.find('@')
136	if pos >= 0:
137	modifier = locale[pos:]
138	locale = locale[:pos]
139	mask \|= COMPONENT_MODIFIER
140	else:
141	modifier = ''
142	pos = locale.find('.')
143	if pos >= 0:
144	codeset = locale[pos:]
145	locale = locale[:pos]
146	mask \|= COMPONENT_CODESET
147	else:
148	codeset = ''
149	pos = locale.find('_')
150	if pos >= 0:
151	territory = locale[pos:]
152	locale = locale[:pos]
153	mask \|= COMPONENT_TERRITORY
154	else:
155	territory = ''
156	language = locale
157	ret = []
158	for i in range(mask+1):
159	if not (i & ~mask): # if all components for this combo exist ...
160	val = language
161	if i & COMPONENT_TERRITORY: val += territory
162	if i & COMPONENT_CODESET: val += codeset
163	if i & COMPONENT_MODIFIER: val += modifier
164	ret.append(val)
165	ret.reverse()
166	return ret
167
168
169
170	class NullTranslations:
171	def __init__(self, fp=None):
172	self._info = {}
173	self._charset = None
174	self._output_charset = None
175	self._fallback = None
176	if fp is not None:
177	self._parse(fp)
178
179	def _parse(self, fp):
180	pass
181
182	def add_fallback(self, fallback):
183	if self._fallback:
184	self._fallback.add_fallback(fallback)
185	else:
186	self._fallback = fallback
187
188	def gettext(self, message):
189	if self._fallback:
190	return self._fallback.gettext(message)
191	return message
192
193	def lgettext(self, message):
194	if self._fallback:
195	return self._fallback.lgettext(message)
196	return message
197
198	def ngettext(self, msgid1, msgid2, n):
199	if self._fallback:
200	return self._fallback.ngettext(msgid1, msgid2, n)
201	if n == 1:
202	return msgid1
203	else:
204	return msgid2
205
206	def lngettext(self, msgid1, msgid2, n):
207	if self._fallback:
208	return self._fallback.lngettext(msgid1, msgid2, n)
209	if n == 1:
210	return msgid1
211	else:
212	return msgid2
213
214	def ugettext(self, message):
215	if self._fallback:
216	return self._fallback.ugettext(message)
217	return unicode(message)
218
219	def ungettext(self, msgid1, msgid2, n):
220	if self._fallback:
221	return self._fallback.ungettext(msgid1, msgid2, n)
222	if n == 1:
223	return unicode(msgid1)
224	else:
225	return unicode(msgid2)
226
227	def info(self):
228	return self._info
229
230	def charset(self):
231	return self._charset
232
233	def output_charset(self):
234	return self._output_charset
235
236	def set_output_charset(self, charset):
237	self._output_charset = charset
238
239	def install(self, unicode=False):
240	import __builtin__
241	__builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
242
243
244	class GNUTranslations(NullTranslations):
245	# Magic number of .mo files
246	LE_MAGIC = 0x950412deL
247	BE_MAGIC = 0xde120495L
248
249	def _parse(self, fp):
250	"""Override this method to support alternative .mo formats."""
251	unpack = struct.unpack
252	filename = getattr(fp, 'name', '')
253	# Parse the .mo file header, which consists of 5 little endian 32
254	# bit words.
255	self._catalog = catalog = {}
256	self.plural = lambda n: int(n != 1) # germanic plural by default
257	buf = fp.read()
258	buflen = len(buf)
259	# Are we big endian or little endian?
260	magic = unpack('<I', buf[:4])[0]
261	if magic == self.LE_MAGIC:
262	version, msgcount, masteridx, transidx = unpack('<4I', buf[4:20])
263	ii = '<II'
264	elif magic == self.BE_MAGIC:
265	version, msgcount, masteridx, transidx = unpack('>4I', buf[4:20])
266	ii = '>II'
267	else:
268	raise IOError(0, 'Bad magic number', filename)
269	# Now put all messages from the .mo file buffer into the catalog
270	# dictionary.
271	for i in xrange(0, msgcount):
272	mlen, moff = unpack(ii, buf[masteridx:masteridx+8])
273	mend = moff + mlen
274	tlen, toff = unpack(ii, buf[transidx:transidx+8])
275	tend = toff + tlen
276	if mend < buflen and tend < buflen:
277	msg = buf[moff:mend]
278	tmsg = buf[toff:tend]
279	else:
280	raise IOError(0, 'File is corrupt', filename)
281	# See if we're looking at GNU .mo conventions for metadata
282	if mlen == 0:
283	# Catalog description
284	lastk = k = None
285	for item in tmsg.splitlines():
286	item = item.strip()
287	if not item:
288	continue
289	if ':' in item:
290	k, v = item.split(':', 1)
291	k = k.strip().lower()
292	v = v.strip()
293	self._info[k] = v
294	lastk = k
295	elif lastk:
296	self._info[lastk] += '\n' + item
297	if k == 'content-type':
298	self._charset = v.split('charset=')[1]
299	elif k == 'plural-forms':
300	v = v.split(';')
301	plural = v[1].split('plural=')[1]
302	self.plural = c2py(plural)
303	# Note: we unconditionally convert both msgids and msgstrs to
304	# Unicode using the character encoding specified in the charset
305	# parameter of the Content-Type header. The gettext documentation
306	# strongly encourages msgids to be us-ascii, but some appliations
307	# require alternative encodings (e.g. Zope's ZCML and ZPT). For
308	# traditional gettext applications, the msgid conversion will
309	# cause no problems since us-ascii should always be a subset of
310	# the charset encoding. We may want to fall back to 8-bit msgids
311	# if the Unicode conversion fails.
312	if '\x00' in msg:
313	# Plural forms
314	msgid1, msgid2 = msg.split('\x00')
315	tmsg = tmsg.split('\x00')
316	if self._charset:
317	msgid1 = unicode(msgid1, self._charset)
318	tmsg = [unicode(x, self._charset) for x in tmsg]
319	for i in range(len(tmsg)):
320	catalog[(msgid1, i)] = tmsg[i]
321	else:
322	if self._charset:
323	msg = unicode(msg, self._charset)
324	tmsg = unicode(tmsg, self._charset)
325	catalog[msg] = tmsg
326	# advance to next entry in the seek tables
327	masteridx += 8
328	transidx += 8
329
330	def gettext(self, message):
331	missing = object()
332	tmsg = self._catalog.get(message, missing)
333	if tmsg is missing:
334	if self._fallback:
335	return self._fallback.gettext(message)
336	return message
337	# Encode the Unicode tmsg back to an 8-bit string, if possible
338	if self._output_charset:
339	return tmsg.encode(self._output_charset)
340	elif self._charset:
341	return tmsg.encode(self._charset)
342	return tmsg
343
344	def lgettext(self, message):
345	missing = object()
346	tmsg = self._catalog.get(message, missing)
347	if tmsg is missing:
348	if self._fallback:
349	return self._fallback.lgettext(message)
350	return message
351	if self._output_charset:
352	return tmsg.encode(self._output_charset)
353	return tmsg.encode(locale.getpreferredencoding())
354
355	def ngettext(self, msgid1, msgid2, n):
356	try:
357	tmsg = self._catalog[(msgid1, self.plural(n))]
358	if self._output_charset:
359	return tmsg.encode(self._output_charset)
360	elif self._charset:
361	return tmsg.encode(self._charset)
362	return tmsg
363	except KeyError:
364	if self._fallback:
365	return self._fallback.ngettext(msgid1, msgid2, n)
366	if n == 1:
367	return msgid1
368	else:
369	return msgid2
370
371	def lngettext(self, msgid1, msgid2, n):
372	try:
373	tmsg = self._catalog[(msgid1, self.plural(n))]
374	if self._output_charset:
375	return tmsg.encode(self._output_charset)
376	return tmsg.encode(locale.getpreferredencoding())
377	except KeyError:
378	if self._fallback:
379	return self._fallback.lngettext(msgid1, msgid2, n)
380	if n == 1:
381	return msgid1
382	else:
383	return msgid2
384
385	def ugettext(self, message):
386	missing = object()
387	tmsg = self._catalog.get(message, missing)
388	if tmsg is missing:
389	if self._fallback:
390	return self._fallback.ugettext(message)
391	return unicode(message)
392	return tmsg
393
394	def ungettext(self, msgid1, msgid2, n):
395	try:
396	tmsg = self._catalog[(msgid1, self.plural(n))]
397	except KeyError:
398	if self._fallback:
399	return self._fallback.ungettext(msgid1, msgid2, n)
400	if n == 1:
401	tmsg = unicode(msgid1)
402	else:
403	tmsg = unicode(msgid2)
404	return tmsg
405
406
407	# Locate a .mo file using the gettext strategy
408	def find(domain, localedir=None, languages=None, all=0):
409	# Get some reasonable defaults for arguments that were not supplied
410	if localedir is None:
411	localedir = _default_localedir
412	if languages is None:
413	languages = []
414	for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
415	val = os.environ.get(envar)
416	if val:
417	languages = val.split(':')
418	break
419	if 'C' not in languages:
420	languages.append('C')
421	# now normalize and expand the languages
422	nelangs = []
423	for lang in languages:
424	for nelang in _expand_lang(lang):
425	if nelang not in nelangs:
426	nelangs.append(nelang)
427	# select a language
428	if all:
429	result = []
430	else:
431	result = None
432	for lang in nelangs:
433	if lang == 'C':
434	break
435	mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
436	if os.path.exists(mofile):
437	if all:
438	result.append(mofile)
439	else:
440	return mofile
441	return result
442
443
444
445	# a mapping between absolute .mo file path and Translation object
446	_translations = {}
447
448	def translation(domain, localedir=None, languages=None,
449	class_=None, fallback=False, codeset=None):
450	if class_ is None:
451	class_ = GNUTranslations
452	mofiles = find(domain, localedir, languages, all=1)
453	if not mofiles:
454	if fallback:
455	return NullTranslations()
456	raise IOError(ENOENT, 'No translation file found for domain', domain)
457	# TBD: do we need to worry about the file pointer getting collected?
458	# Avoid opening, reading, and parsing the .mo file after it's been done
459	# once.
460	result = None
461	for mofile in mofiles:
462	key = os.path.abspath(mofile)
463	t = _translations.get(key)
464	if t is None:
465	t = _translations.setdefault(key, class_(open(mofile, 'rb')))
466	# Copy the translation object to allow setting fallbacks and
467	# output charset. All other instance data is shared with the
468	# cached object.
469	t = copy.copy(t)
470	if codeset:
471	t.set_output_charset(codeset)
472	if result is None:
473	result = t
474	else:
475	result.add_fallback(t)
476	return result
477
478
479	def install(domain, localedir=None, unicode=False, codeset=None):
480	t = translation(domain, localedir, fallback=True, codeset=codeset)
481	t.install(unicode)
482
483
484
485	# a mapping b/w domains and locale directories
486	_localedirs = {}
487	# a mapping b/w domains and codesets
488	_localecodesets = {}
489	# current global domain, `messages' used for compatibility w/ GNU gettext
490	_current_domain = 'messages'
491
492
493	def textdomain(domain=None):
494	global _current_domain
495	if domain is not None:
496	_current_domain = domain
497	return _current_domain
498
499
500	def bindtextdomain(domain, localedir=None):
501	global _localedirs
502	if localedir is not None:
503	_localedirs[domain] = localedir
504	return _localedirs.get(domain, _default_localedir)
505
506
507	def bind_textdomain_codeset(domain, codeset=None):
508	global _localecodesets
509	if codeset is not None:
510	_localecodesets[domain] = codeset
511	return _localecodesets.get(domain)
512
513
514	def dgettext(domain, message):
515	try:
516	t = translation(domain, _localedirs.get(domain, None),
517	codeset=_localecodesets.get(domain))
518	except IOError:
519	return message
520	return t.gettext(message)
521
522	def ldgettext(domain, message):
523	try:
524	t = translation(domain, _localedirs.get(domain, None),
525	codeset=_localecodesets.get(domain))
526	except IOError:
527	return message
528	return t.lgettext(message)
529
530	def dngettext(domain, msgid1, msgid2, n):
531	try:
532	t = translation(domain, _localedirs.get(domain, None),
533	codeset=_localecodesets.get(domain))
534	except IOError:
535	if n == 1:
536	return msgid1
537	else:
538	return msgid2
539	return t.ngettext(msgid1, msgid2, n)
540
541	def ldngettext(domain, msgid1, msgid2, n):
542	try:
543	t = translation(domain, _localedirs.get(domain, None),
544	codeset=_localecodesets.get(domain))
545	except IOError:
546	if n == 1:
547	return msgid1
548	else:
549	return msgid2
550	return t.lngettext(msgid1, msgid2, n)
551
552	def gettext(message):
553	return dgettext(_current_domain, message)
554
555	def lgettext(message):
556	return ldgettext(_current_domain, message)
557
558	def ngettext(msgid1, msgid2, n):
559	return dngettext(_current_domain, msgid1, msgid2, n)
560
561	def lngettext(msgid1, msgid2, n):
562	return ldngettext(_current_domain, msgid1, msgid2, n)
563
564	# dcgettext() has been deemed unnecessary and is not implemented.
565
566	# James Henstridge's Catalog constructor from GNOME gettext. Documented usage
567	# was:
568	#
569	# import gettext
570	# cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR)
571	# _ = cat.gettext
572	# print _('Hello World')
573
574	# The resulting catalog object currently don't support access through a
575	# dictionary API, which was supported (but apparently unused) in GNOME
576	# gettext.
577
578	Catalog = translation