git.subgeniuskitty.com - OpenSPARC-T2-SAM/.git/blame - sam-t2/devtools/amd64/lib/python2.4/gettext.py

Commit	Line	Data
920dae64 AT	1	"""Internationalization and localization support.
	2
	3	This module provides internationalization (I18N) and localization (L10N)
	4	support for your Python programs by providing an interface to the GNU gettext
	5	message catalog library.
	6
	7	I18N refers to the operation by which a program is made aware of multiple
	8	languages. L10N refers to the adaptation of your program, once
	9	internationalized, to the local language and cultural habits.
	10
	11	"""
	12
	13	# This module represents the integration of work, contributions, feedback, and
	14	# suggestions from the following people:
	15	#
	16	# Martin von Loewis, who wrote the initial implementation of the underlying
	17	# C-based libintlmodule (later renamed _gettext), along with a skeletal
	18	# gettext.py implementation.
	19	#
	20	# Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule,
	21	# which also included a pure-Python implementation to read .mo files if
	22	# intlmodule wasn't available.
	23	#
	24	# James Henstridge, who also wrote a gettext.py module, which has some
	25	# interesting, but currently unsupported experimental features: the notion of
	26	# a Catalog class and instances, and the ability to add to a catalog file via
	27	# a Python API.
	28	#
	29	# Barry Warsaw integrated these modules, wrote the .install() API and code,
	30	# and conformed all C and Python code to Python's coding standards.
	31	#
	32	# Francois Pinard and Marc-Andre Lemburg also contributed valuably to this
	33	# module.
	34	#
	35	# J. David Ibanez implemented plural forms. Bruno Haible fixed some bugs.
	36	#
	37	# TODO:
	38	# - Lazy loading of .mo files. Currently the entire catalog is loaded into
	39	# memory, but that's probably bad for large translated programs. Instead,
	40	# the lexical sort of original strings in GNU .mo files should be exploited
	41	# to do binary searches and lazy initializations. Or you might want to use
	42	# the undocumented double-hash algorithm for .mo files with hash tables, but
	43	# you'll need to study the GNU gettext code to do this.
	44	#
	45	# - Support Solaris .mo file formats. Unfortunately, we've been unable to
	46	# find this format documented anywhere.
	47
	48
	49	import locale, copy, os, re, struct, sys
	50	from errno import ENOENT
	51
	52
	53	__all__ = ['NullTranslations', 'GNUTranslations', 'Catalog',
	54	'find', 'translation', 'install', 'textdomain', 'bindtextdomain',
	55	'dgettext', 'dngettext', 'gettext', 'ngettext',
	56	]
	57
	58	_default_localedir = os.path.join(sys.prefix, 'share', 'locale')
	59
	60
	61	def test(condition, true, false):
	62	"""
	63	Implements the C expression:
	64
65	condition ? true : false
66
67	Required to correctly interpret plural forms.
68	"""
69	if condition:
70	return true
71	else:
72	return false
73
74
75	def c2py(plural):
76	"""Gets a C expression as used in PO files for plural forms and returns a
77	Python lambda function that implements an equivalent expression.
78	"""
79	# Security check, allow only the "n" identifier
80	from StringIO import StringIO
81	import token, tokenize
82	tokens = tokenize.generate_tokens(StringIO(plural).readline)
83	try:
84	danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']
85	except tokenize.TokenError:
86	raise ValueError, \
87	'plural forms expression error, maybe unbalanced parenthesis'
88	else:
89	if danger:
90	raise ValueError, 'plural forms expression could be dangerous'
91
92	# Replace some C operators by their Python equivalents
93	plural = plural.replace('&&', ' and ')
94	plural = plural.replace('\|\|', ' or ')
95
96	expr = re.compile(r'\!([^=])')
97	plural = expr.sub(' not \\1', plural)
98
99	# Regular expression and replacement function used to transform
100	# "a?b:c" to "test(a,b,c)".
101	expr = re.compile(r'(.?)\?(.?):(.*)')
102	def repl(x):
103	return "test(%s, %s, %s)" % (x.group(1), x.group(2),
104	expr.sub(repl, x.group(3)))
105
106	# Code to transform the plural expression, taking care of parentheses
107	stack = ['']
108	for c in plural:
109	if c == '(':
110	stack.append('')
111	elif c == ')':
112	if len(stack) == 1:
113	# Actually, we never reach this code, because unbalanced
114	# parentheses get caught in the security check at the
115	# beginning.
116	raise ValueError, 'unbalanced parenthesis in plural form'
117	s = expr.sub(repl, stack.pop())
118	stack[-1] += '(%s)' % s
119	else:
120	stack[-1] += c
121	plural = expr.sub(repl, stack.pop())
122
123	return eval('lambda n: int(%s)' % plural)
124
125
126
127	def _expand_lang(locale):
128	from locale import normalize
129	locale = normalize(locale)
130	COMPONENT_CODESET = 1 << 0
131	COMPONENT_TERRITORY = 1 << 1
132	COMPONENT_MODIFIER = 1 << 2
133	# split up the locale into its base components
134	mask = 0
135	pos = locale.find('@')
136	if pos >= 0:
137	modifier = locale[pos:]
138	locale = locale[:pos]
139	mask \|= COMPONENT_MODIFIER
140	else:
141	modifier = ''
142	pos = locale.find('.')
143	if pos >= 0:
144	codeset = locale[pos:]
145	locale = locale[:pos]
146	mask \|= COMPONENT_CODESET
147	else:
148	codeset = ''
149	pos = locale.find('_')
150	if pos >= 0:
151	territory = locale[pos:]
152	locale = locale[:pos]
153	mask \|= COMPONENT_TERRITORY
154	else:
155	territory = ''
156	language = locale
157	ret = []
158	for i in range(mask+1):
159	if not (i & ~mask): # if all components for this combo exist ...
160	val = language
161	if i & COMPONENT_TERRITORY: val += territory
162	if i & COMPONENT_CODESET: val += codeset
163	if i & COMPONENT_MODIFIER: val += modifier
164	ret.append(val)
165	ret.reverse()
166	return ret
167
168
169
170	class NullTranslations:
171	def __init__(self, fp=None):
172	self._info = {}
173	self._charset = None
174	self._output_charset = None
175	self._fallback = None
176	if fp is not None:
177	self._parse(fp)
178
179	def _parse(self, fp):
180	pass
181
182	def add_fallback(self, fallback):
183	if self._fallback:
184	self._fallback.add_fallback(fallback)
185	else:
186	self._fallback = fallback
187
188	def gettext(self, message):
189	if self._fallback:
190	return self._fallback.gettext(message)
191	return message
192
193	def lgettext(self, message):
194	if self._fallback:
195	return self._fallback.lgettext(message)
196	return message
197
198	def ngettext(self, msgid1, msgid2, n):
199	if self._fallback:
200	return self._fallback.ngettext(msgid1, msgid2, n)
201	if n == 1:
202	return msgid1
203	else:
204	return msgid2
205
206	def lngettext(self, msgid1, msgid2, n):
207	if self._fallback:
208	return self._fallback.lngettext(msgid1, msgid2, n)
209	if n == 1:
210	return msgid1
211	else:
212	return msgid2
213
214	def ugettext(self, message):
215	if self._fallback:
216	return self._fallback.ugettext(message)
217	return unicode(message)
218
219	def ungettext(self, msgid1, msgid2, n):
220	if self._fallback:
221	return self._fallback.ungettext(msgid1, msgid2, n)
222	if n == 1:
223	return unicode(msgid1)
224	else:
225	return unicode(msgid2)
226
227	def info(self):
228	return self._info
229
230	def charset(self):
231	return self._charset
232
233	def output_charset(self):
234	return self._output_charset
235
236	def set_output_charset(self, charset):
237	self._output_charset = charset
238
239	def install(self, unicode=False):
240	import __builtin__
241	__builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
242
243
244	class GNUTranslations(NullTranslations):
245	# Magic number of .mo files
246	LE_MAGIC = 0x950412deL
247	BE_MAGIC = 0xde120495L
248
249	def _parse(self, fp):
250	"""Override this method to support alternative .mo formats."""
251	unpack = struct.unpack
252	filename = getattr(fp, 'name', '')
253	# Parse the .mo file header, which consists of 5 little endian 32
254	# bit words.
255	self._catalog = catalog = {}
256	self.plural = lambda n: int(n != 1) # germanic plural by default
257	buf = fp.read()
258	buflen = len(buf)
259	# Are we big endian or little endian?
260	magic = unpack('<I', buf[:4])[0]
261	if magic == self.LE_MAGIC:
262	version, msgcount, masteridx, transidx = unpack('<4I', buf[4:20])
263	ii = '<II'
264	elif magic == self.BE_MAGIC:
265	version, msgcount, masteridx, transidx = unpack('>4I', buf[4:20])
266	ii = '>II'
267	else:
268	raise IOError(0, 'Bad magic number', filename)
269	# Now put all messages from the .mo file buffer into the catalog
270	# dictionary.
271	for i in xrange(0, msgcount):
272	mlen, moff = unpack(ii, buf[masteridx:masteridx+8])
273	mend = moff + mlen
274	tlen, toff = unpack(ii, buf[transidx:transidx+8])
275	tend = toff + tlen
276	if mend < buflen and tend < buflen:
277	msg = buf[moff:mend]
278	tmsg = buf[toff:tend]
279	else:
280	raise IOError(0, 'File is corrupt', filename)
281	# See if we're looking at GNU .mo conventions for metadata
282	if mlen == 0:
283	# Catalog description
284	lastk = k = None
285	for item in tmsg.splitlines():
286	item = item.strip()
287	if not item:
288	continue
289	if ':' in item:
290	k, v = item.split(':', 1)
291	k = k.strip().lower()
292	v = v.strip()
293	self._info[k] = v
294	lastk = k
295	elif lastk:
296	self._info[lastk] += '\n' + item
297	if k == 'content-type':
298	self._charset = v.split('charset=')[1]
299	elif k == 'plural-forms':
300	v = v.split(';')
301	plural = v[1].split('plural=')[1]
302	self.plural = c2py(plural)
303	# Note: we unconditionally convert both msgids and msgstrs to
304	# Unicode using the character encoding specified in the charset
305	# parameter of the Content-Type header. The gettext documentation
306	# strongly encourages msgids to be us-ascii, but some appliations
307	# require alternative encodings (e.g. Zope's ZCML and ZPT). For
308	# traditional gettext applications, the msgid conversion will
309	# cause no problems since us-ascii should always be a subset of
310	# the charset encoding. We may want to fall back to 8-bit msgids
311	# if the Unicode conversion fails.
312	if '\x00' in msg:
313	# Plural forms
314	msgid1, msgid2 = msg.split('\x00')
315	tmsg = tmsg.split('\x00')
316	if self._charset:
317	msgid1 = unicode(msgid1, self._charset)
318	tmsg = [unicode(x, self._charset) for x in tmsg]
319	for i in range(len(tmsg)):
320	catalog[(msgid1, i)] = tmsg[i]
321	else:
322	if self._charset:
323	msg = unicode(msg, self._charset)
324	tmsg = unicode(tmsg, self._charset)
325	catalog[msg] = tmsg
326	# advance to next entry in the seek tables
327	masteridx += 8
328	transidx += 8
329
330	def gettext(self, message):
331	missing = object()
332	tmsg = self._catalog.get(message, missing)
333	if tmsg is missing:
334	if self._fallback:
335	return self._fallback.gettext(message)
336	return message
337	# Encode the Unicode tmsg back to an 8-bit string, if possible
338	if self._output_charset:
339	return tmsg.encode(self._output_charset)
340	elif self._charset:
341	return tmsg.encode(self._charset)
342	return tmsg
343
344	def lgettext(self, message):
345	missing = object()
346	tmsg = self._catalog.get(message, missing)
347	if tmsg is missing:
348	if self._fallback:
349	return self._fallback.lgettext(message)
350	return message
351	if self._output_charset:
352	return tmsg.encode(self._output_charset)
353	return tmsg.encode(locale.getpreferredencoding())
354
355	def ngettext(self, msgid1, msgid2, n):
356	try:
357	tmsg = self._catalog[(msgid1, self.plural(n))]
358	if self._output_charset:
359	return tmsg.encode(self._output_charset)
360	elif self._charset:
361	return tmsg.encode(self._charset)
362	return tmsg
363	except KeyError:
364	if self._fallback:
365	return self._fallback.ngettext(msgid1, msgid2, n)
366	if n == 1:
367	return msgid1
368	else:
369	return msgid2
370
371	def lngettext(self, msgid1, msgid2, n):
372	try:
373	tmsg = self._catalog[(msgid1, self.plural(n))]
374	if self._output_charset:
375	return tmsg.encode(self._output_charset)
376	return tmsg.encode(locale.getpreferredencoding())
377	except KeyError:
378	if self._fallback:
379	return self._fallback.lngettext(msgid1, msgid2, n)
380	if n == 1:
381	return msgid1
382	else:
383	return msgid2
384
385	def ugettext(self, message):
386	missing = object()
387	tmsg = self._catalog.get(message, missing)
388	if tmsg is missing:
389	if self._fallback:
390	return self._fallback.ugettext(message)
391	return unicode(message)
392	return tmsg
393
394	def ungettext(self, msgid1, msgid2, n):
395	try:
396	tmsg = self._catalog[(msgid1, self.plural(n))]
397	except KeyError:
398	if self._fallback:
399	return self._fallback.ungettext(msgid1, msgid2, n)
400	if n == 1:
401	tmsg = unicode(msgid1)
402	else:
403	tmsg = unicode(msgid2)
404	return tmsg
405
406
407	# Locate a .mo file using the gettext strategy
408	def find(domain, localedir=None, languages=None, all=0):
409	# Get some reasonable defaults for arguments that were not supplied
410	if localedir is None:
411	localedir = _default_localedir
412	if languages is None:
413	languages = []
414	for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
415	val = os.environ.get(envar)
416	if val:
417	languages = val.split(':')
418	break
419	if 'C' not in languages:
420	languages.append('C')
421	# now normalize and expand the languages
422	nelangs = []
423	for lang in languages:
424	for nelang in _expand_lang(lang):
425	if nelang not in nelangs:
426	nelangs.append(nelang)
427	# select a language
428	if all:
429	result = []
430	else:
431	result = None
432	for lang in nelangs:
433	if lang == 'C':
434	break
435	mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
436	if os.path.exists(mofile):
437	if all:
438	result.append(mofile)
439	else:
440	return mofile
441	return result
442
443
444
445	# a mapping between absolute .mo file path and Translation object
446	_translations = {}
447
448	def translation(domain, localedir=None, languages=None,
449	class_=None, fallback=False, codeset=None):
450	if class_ is None:
451	class_ = GNUTranslations
452	mofiles = find(domain, localedir, languages, all=1)
453	if not mofiles:
454	if fallback:
455	return NullTranslations()
456	raise IOError(ENOENT, 'No translation file found for domain', domain)
457	# TBD: do we need to worry about the file pointer getting collected?
458	# Avoid opening, reading, and parsing the .mo file after it's been done
459	# once.
460	result = None
461	for mofile in mofiles:
462	key = os.path.abspath(mofile)
463	t = _translations.get(key)
464	if t is None:
465	t = _translations.setdefault(key, class_(open(mofile, 'rb')))
466	# Copy the translation object to allow setting fallbacks and
467	# output charset. All other instance data is shared with the
468	# cached object.
469	t = copy.copy(t)
470	if codeset:
471	t.set_output_charset(codeset)
472	if result is None:
473	result = t
474	else:
475	result.add_fallback(t)
476	return result
477
478
479	def install(domain, localedir=None, unicode=False, codeset=None):
480	t = translation(domain, localedir, fallback=True, codeset=codeset)
481	t.install(unicode)
482
483
484
485	# a mapping b/w domains and locale directories
486	_localedirs = {}
487	# a mapping b/w domains and codesets
488	_localecodesets = {}
489	# current global domain, `messages' used for compatibility w/ GNU gettext
490	_current_domain = 'messages'
491
492
493	def textdomain(domain=None):
494	global _current_domain
495	if domain is not None:
496	_current_domain = domain
497	return _current_domain
498
499
500	def bindtextdomain(domain, localedir=None):
501	global _localedirs
502	if localedir is not None:
503	_localedirs[domain] = localedir
504	return _localedirs.get(domain, _default_localedir)
505
506
507	def bind_textdomain_codeset(domain, codeset=None):
508	global _localecodesets
509	if codeset is not None:
510	_localecodesets[domain] = codeset
511	return _localecodesets.get(domain)
512
513
514	def dgettext(domain, message):
515	try:
516	t = translation(domain, _localedirs.get(domain, None),
517	codeset=_localecodesets.get(domain))
518	except IOError:
519	return message
520	return t.gettext(message)
521
522	def ldgettext(domain, message):
523	try:
524	t = translation(domain, _localedirs.get(domain, None),
525	codeset=_localecodesets.get(domain))
526	except IOError:
527	return message
528	return t.lgettext(message)
529
530	def dngettext(domain, msgid1, msgid2, n):
531	try:
532	t = translation(domain, _localedirs.get(domain, None),
533	codeset=_localecodesets.get(domain))
534	except IOError:
535	if n == 1:
536	return msgid1
537	else:
538	return msgid2
539	return t.ngettext(msgid1, msgid2, n)
540
541	def ldngettext(domain, msgid1, msgid2, n):
542	try:
543	t = translation(domain, _localedirs.get(domain, None),
544	codeset=_localecodesets.get(domain))
545	except IOError:
546	if n == 1:
547	return msgid1
548	else:
549	return msgid2
550	return t.lngettext(msgid1, msgid2, n)
551
552	def gettext(message):
553	return dgettext(_current_domain, message)
554
555	def lgettext(message):
556	return ldgettext(_current_domain, message)
557
558	def ngettext(msgid1, msgid2, n):
559	return dngettext(_current_domain, msgid1, msgid2, n)
560
561	def lngettext(msgid1, msgid2, n):
562	return ldngettext(_current_domain, msgid1, msgid2, n)
563
564	# dcgettext() has been deemed unnecessary and is not implemented.
565
566	# James Henstridge's Catalog constructor from GNOME gettext. Documented usage
567	# was:
568	#
569	# import gettext
570	# cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR)
571	# _ = cat.gettext
572	# print _('Hello World')
573
574	# The resulting catalog object currently don't support access through a
575	# dictionary API, which was supported (but apparently unused) in GNOME
576	# gettext.
577
578	Catalog = translation