Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | .\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "Locale::Maketext 3" | |
132 | .TH Locale::Maketext 3 "2002-06-01" "perl v5.8.0" "Perl Programmers Reference Guide" | |
133 | .SH "NAME" | |
134 | Locale::Maketext \-\- framework for localization | |
135 | .SH "SYNOPSIS" | |
136 | .IX Header "SYNOPSIS" | |
137 | .Vb 9 | |
138 | \& package MyProgram; | |
139 | \& use strict; | |
140 | \& use MyProgram::L10N; | |
141 | \& # ...which inherits from Locale::Maketext | |
142 | \& my $lh = MyProgram::L10N->get_handle() || die "What language?"; | |
143 | \& ... | |
144 | \& # And then any messages your program emits, like: | |
145 | \& warn $lh->maketext( "Can't open file [_1]: [_2]\en", $f, $! ); | |
146 | \& ... | |
147 | .Ve | |
148 | .SH "DESCRIPTION" | |
149 | .IX Header "DESCRIPTION" | |
150 | It is a common feature of applications (whether run directly, | |
151 | or via the Web) for them to be \*(L"localized\*(R" \*(-- i.e., for them | |
152 | to a present an English interface to an English\-speaker, a German | |
153 | interface to a German\-speaker, and so on for all languages it's | |
154 | programmed with. Locale::Maketext | |
155 | is a framework for software localization; it provides you with the | |
156 | tools for organizing and accessing the bits of text and text-processing | |
157 | code that you need for producing localized applications. | |
158 | .PP | |
159 | In order to make sense of Maketext and how all its | |
160 | components fit together, you should probably | |
161 | go read Locale::Maketext::TPJ13, and | |
162 | \&\fIthen\fR read the following documentation. | |
163 | .PP | |
164 | You may also want to read over the source for \f(CW\*(C`File::Findgrep\*(C'\fR | |
165 | and its constituent modules \*(-- they are a complete (if small) | |
166 | example application that uses Maketext. | |
167 | .SH "QUICK OVERVIEW" | |
168 | .IX Header "QUICK OVERVIEW" | |
169 | The basic design of Locale::Maketext is object\-oriented, and | |
170 | Locale::Maketext is an abstract base class, from which you | |
171 | derive a \*(L"project class\*(R". | |
172 | The project class (with a name like \*(L"TkBocciBall::Localize\*(R", | |
173 | which you then use in your module) is in turn the base class | |
174 | for all the \*(L"language classes\*(R" for your project | |
175 | (with names \*(L"TkBocciBall::Localize::it\*(R", | |
176 | \&\*(L"TkBocciBall::Localize::en\*(R", | |
177 | \&\*(L"TkBocciBall::Localize::fr\*(R", etc.). | |
178 | .PP | |
179 | A language class is | |
180 | a class containing a lexicon of phrases as class data, | |
181 | and possibly also some methods that are of use in interpreting | |
182 | phrases in the lexicon, or otherwise dealing with text in that | |
183 | language. | |
184 | .PP | |
185 | An object belonging to a language class is called a \*(L"language | |
186 | handle\*(R"; it's typically a flyweight object. | |
187 | .PP | |
188 | The normal course of action is to call: | |
189 | .PP | |
190 | .Vb 6 | |
191 | \& use TkBocciBall::Localize; # the localization project class | |
192 | \& $lh = TkBocciBall::Localize->get_handle(); | |
193 | \& # Depending on the user's locale, etc., this will | |
194 | \& # make a language handle from among the classes available, | |
195 | \& # and any defaults that you declare. | |
196 | \& die "Couldn't make a language handle??" unless $lh; | |
197 | .Ve | |
198 | .PP | |
199 | From then on, you use the \f(CW\*(C`maketext\*(C'\fR function to access | |
200 | entries in whatever lexicon(s) belong to the language handle | |
201 | you got. So, this: | |
202 | .PP | |
203 | .Vb 1 | |
204 | \& print $lh->maketext("You won!"), "\en"; | |
205 | .Ve | |
206 | .PP | |
207 | \&...emits the right text for this language. If the object | |
208 | in \f(CW$lh\fR belongs to class \*(L"TkBocciBall::Localize::fr\*(R" and | |
209 | \&\f(CW%TkBocciBall::Localize::fr::Lexicon\fR contains \f(CW\*(C`("You won!" | |
210 | => "Tu as gagne\*'!")\*(C'\fR, then the above | |
211 | code happily tells the user "Tu as gagne\*'!". | |
212 | .SH "METHODS" | |
213 | .IX Header "METHODS" | |
214 | Locale::Maketext offers a variety of methods, which fall | |
215 | into three categories: | |
216 | .IP "\(bu" 4 | |
217 | Methods to do with constructing language handles. | |
218 | .IP "\(bu" 4 | |
219 | \&\f(CW\*(C`maketext\*(C'\fR and other methods to do with accessing \f(CW%Lexicon\fR data | |
220 | for a given language handle. | |
221 | .IP "\(bu" 4 | |
222 | Methods that you may find it handy to use, from routines of | |
223 | yours that you put in \f(CW%Lexicon\fR entries. | |
224 | .PP | |
225 | These are covered in the following section. | |
226 | .Sh "Construction Methods" | |
227 | .IX Subsection "Construction Methods" | |
228 | These are to do with constructing a language handle: | |
229 | .IP "\(bu" 4 | |
230 | $lh = YourProjClass\->get_handle( ...langtags... ) || die \*(L"lg\-handle?\*(R"; | |
231 | .Sp | |
232 | This tries loading classes based on the language-tags you give (like | |
233 | \&\f(CW\*(C`("en\-US", "sk", "kon", "es\-MX", "ja", "i\-klingon")\*(C'\fR, and for the first class | |
234 | that succeeds, returns YourProjClass::\fIlanguage\fR\->\fInew()\fR. | |
235 | .Sp | |
236 | It runs thru the entire given list of language\-tags, and finds no classes | |
237 | for those exact terms, it then tries \*(L"superordinate\*(R" language classes. | |
238 | So if no \*(L"en\-US\*(R" class (i.e., YourProjClass::en_us) | |
239 | was found, nor classes for anything else in that list, we then try | |
240 | its superordinate, \*(L"en\*(R" (i.e., YourProjClass::en), and so on thru | |
241 | the other language-tags in the given list: \*(L"es\*(R". | |
242 | (The other language-tags in our example list: | |
243 | happen to have no superordinates.) | |
244 | .Sp | |
245 | If none of those language-tags leads to loadable classes, we then | |
246 | try classes derived from YourProjClass\->\fIfallback_languages()\fR and | |
247 | then if nothing comes of that, we use classes named by | |
248 | YourProjClass\->\fIfallback_language_classes()\fR. Then in the (probably | |
249 | quite unlikely) event that that fails, we just return undef. | |
250 | .IP "\(bu" 4 | |
251 | $lh = YourProjClass\->get_handle\fB()\fR || die \*(L"lg\-handle?\*(R"; | |
252 | .Sp | |
253 | When \f(CW\*(C`get_handle\*(C'\fR is called with an empty parameter list, magic happens: | |
254 | .Sp | |
255 | If \f(CW\*(C`get_handle\*(C'\fR senses that it's running in program that was | |
256 | invoked as a \s-1CGI\s0, then it tries to get language-tags out of the | |
257 | environment variable \*(L"\s-1HTTP_ACCEPT_LANGUAGE\s0\*(R", and it pretends that | |
258 | those were the languages passed as parameters to \f(CW\*(C`get_handle\*(C'\fR. | |
259 | .Sp | |
260 | Otherwise (i.e., if not a \s-1CGI\s0), this tries various OS-specific ways | |
261 | to get the language-tags for the current locale/language, and then | |
262 | pretends that those were the value(s) passed to \f(CW\*(C`cet_handle\*(C'\fR. | |
263 | .Sp | |
264 | Currently this OS-specific stuff consists of looking in the environment | |
265 | variables \*(L"\s-1LANG\s0\*(R" and \*(L"\s-1LANGUAGE\s0\*(R"; and on MSWin machines (where those | |
266 | variables are typically unused), this also tries using | |
267 | the module Win32::Locale to get a language-tag for whatever language/locale | |
268 | is currently selected in the \*(L"Regional Settings\*(R" (or \*(L"International\*(R"?) | |
269 | Control Panel. I welcome further | |
270 | suggestions for making this do the Right Thing under other operating | |
271 | systems that support localization. | |
272 | .Sp | |
273 | If you're using localization in an application that keeps a configuration | |
274 | file, you might consider something like this in your project class: | |
275 | .Sp | |
276 | .Vb 14 | |
277 | \& sub get_handle_via_config { | |
278 | \& my $class = $_[0]; | |
279 | \& my $preferred_language = $Config_settings{'language'}; | |
280 | \& my $lh; | |
281 | \& if($preferred_language) { | |
282 | \& $lh = $class->get_handle($chosen_language) | |
283 | \& || die "No language handle for \e"$chosen_language\e" or the like"; | |
284 | \& } else { | |
285 | \& # Config file missing, maybe? | |
286 | \& $lh = $class->get_handle() | |
287 | \& || die "Can't get a language handle"; | |
288 | \& } | |
289 | \& return $lh; | |
290 | \& } | |
291 | .Ve | |
292 | .IP "\(bu" 4 | |
293 | $lh = YourProjClass::langname\->\fInew()\fR; | |
294 | .Sp | |
295 | This constructs a language handle. You usually \fBdon't\fR call this | |
296 | directly, but instead let \f(CW\*(C`get_handle\*(C'\fR find a language class to \f(CW\*(C`use\*(C'\fR | |
297 | and to then call \->new on. | |
298 | .IP "\(bu" 4 | |
299 | $lh\->\fIinit()\fR; | |
300 | .Sp | |
301 | This is called by \->new to initialize newly-constructed language handles. | |
302 | If you define an init method in your class, remember that it's usually | |
303 | considered a good idea to call \f(CW$lh\fR\->SUPER::init in it (presumably at the | |
304 | beginning), so that all classes get a chance to initialize a new object | |
305 | however they see fit. | |
306 | .IP "\(bu" 4 | |
307 | YourProjClass\->\fIfallback_languages()\fR | |
308 | .Sp | |
309 | \&\f(CW\*(C`get_handle\*(C'\fR appends the return value of this to the end of | |
310 | whatever list of languages you pass \f(CW\*(C`get_handle\*(C'\fR. Unless | |
311 | you override this method, your project class | |
312 | will inherit Locale::Maketext's \f(CW\*(C`fallback_languages\*(C'\fR, which | |
313 | currently returns \f(CW\*(C`('i\-default', 'en', 'en\-US')\*(C'\fR. | |
314 | (\*(L"i\-default\*(R" is defined in \s-1RFC\s0 2277). | |
315 | .Sp | |
316 | This method (by having it return the name | |
317 | of a language-tag that has an existing language class) | |
318 | can be used for making sure that | |
319 | \&\f(CW\*(C`get_handle\*(C'\fR will always manage to construct a language | |
320 | handle (assuming your language classes are in an appropriate | |
321 | \&\f(CW@INC\fR directory). Or you can use the next method: | |
322 | .IP "\(bu" 4 | |
323 | YourProjClass\->\fIfallback_language_classes()\fR | |
324 | .Sp | |
325 | \&\f(CW\*(C`get_handle\*(C'\fR appends the return value of this to the end | |
326 | of the list of classes it will try using. Unless | |
327 | you override this method, your project class | |
328 | will inherit Locale::Maketext's \f(CW\*(C`fallback_language_classes\*(C'\fR, | |
329 | which currently returns an empty list, \f(CW\*(C`()\*(C'\fR. | |
330 | By setting this to some value (namely, the name of a loadable | |
331 | language class), you can be sure that | |
332 | \&\f(CW\*(C`get_handle\*(C'\fR will always manage to construct a language | |
333 | handle. | |
334 | .ie n .Sh "The ""maketext"" Method" | |
335 | .el .Sh "The ``maketext'' Method" | |
336 | .IX Subsection "The maketext Method" | |
337 | This is the most important method in Locale::Maketext: | |
338 | .PP | |
339 | $text = \f(CW$lh\fR\->maketext(\fIkey\fR, ...parameters for this phrase...); | |
340 | .PP | |
341 | This looks in the \f(CW%Lexicon\fR of the language handle | |
342 | \&\f(CW$lh\fR and all its superclasses, looking | |
343 | for an entry whose key is the string \fIkey\fR. Assuming such | |
344 | an entry is found, various things then happen, depending on the | |
345 | value found: | |
346 | .PP | |
347 | If the value is a scalarref, the scalar is dereferenced and returned | |
348 | (and any parameters are ignored). | |
349 | If the value is a coderef, we return &$value($lh, ...parameters...). | |
350 | If the value is a string that \fIdoesn't\fR look like it's in Bracket Notation, | |
351 | we return it (after replacing it with a scalarref, in its \f(CW%Lexicon\fR). | |
352 | If the value \fIdoes\fR look like it's in Bracket Notation, then we compile | |
353 | it into a sub, replace the string in the \f(CW%Lexicon\fR with the new coderef, | |
354 | and then we return &$new_sub($lh, ...parameters...). | |
355 | .PP | |
356 | Bracket Notation is discussed in a later section. Note | |
357 | that trying to compile a string into Bracket Notation can throw | |
358 | an exception if the string is not syntactically valid (say, by not | |
359 | balancing brackets right.) | |
360 | .PP | |
361 | Also, calling &$coderef($lh, ...parameters...) can throw any sort of | |
362 | exception (if, say, code in that sub tries to divide by zero). But | |
363 | a very common exception occurs when you have Bracket | |
364 | Notation text that says to call a method \*(L"foo\*(R", but there is no such | |
365 | method. (E.g., "You have [qua\fBtn\fR,_1,ball]." will throw an exception | |
366 | on trying to call \f(CW$lh\fR\->qua\fBtn\fR($_[1],'ball') \*(-- you presumably meant | |
367 | \&\*(L"quant\*(R".) \f(CW\*(C`maketext\*(C'\fR catches these exceptions, but only to make the | |
368 | error message more readable, at which point it rethrows the exception. | |
369 | .PP | |
370 | An exception \fImay\fR be thrown if \fIkey\fR is not found in any | |
371 | of \f(CW$lh\fR's \f(CW%Lexicon\fR hashes. What happens if a key is not found, | |
372 | is discussed in a later section, \*(L"Controlling Lookup Failure\*(R". | |
373 | .PP | |
374 | Note that you might find it useful in some cases to override | |
375 | the \f(CW\*(C`maketext\*(C'\fR method with an \*(L"after method\*(R", if you want to | |
376 | translate encodings, or even scripts: | |
377 | .PP | |
378 | .Vb 7 | |
379 | \& package YrProj::zh_cn; # Chinese with PRC-style glyphs | |
380 | \& use base ('YrProj::zh_tw'); # Taiwan-style | |
381 | \& sub maketext { | |
382 | \& my $self = shift(@_); | |
383 | \& my $value = $self->maketext(@_); | |
384 | \& return Chineeze::taiwan2mainland($value); | |
385 | \& } | |
386 | .Ve | |
387 | .PP | |
388 | Or you may want to override it with something that traps | |
389 | any exceptions, if that's critical to your program: | |
390 | .PP | |
391 | .Vb 7 | |
392 | \& sub maketext { | |
393 | \& my($lh, @stuff) = @_; | |
394 | \& my $out; | |
395 | \& eval { $out = $lh->SUPER::maketext(@stuff) }; | |
396 | \& return $out unless $@; | |
397 | \& ...otherwise deal with the exception... | |
398 | \& } | |
399 | .Ve | |
400 | .PP | |
401 | Other than those two situations, I don't imagine that | |
402 | it's useful to override the \f(CW\*(C`maketext\*(C'\fR method. (If | |
403 | you run into a situation where it is useful, I'd be | |
404 | interested in hearing about it.) | |
405 | .ie n .IP "$lh\->fail_with \fIor\fR $lh\fR\->fail_with(\fI\s-1PARAM\s0)" 4 | |
406 | .el .IP "$lh\->fail_with \fIor\fR \f(CW$lh\fR\->fail_with(\fI\s-1PARAM\s0\fR)" 4 | |
407 | .IX Item "$lh->fail_with or $lh->fail_with(PARAM)" | |
408 | .PD 0 | |
409 | .IP "$lh\->failure_handler_auto" 4 | |
410 | .IX Item "$lh->failure_handler_auto" | |
411 | .PD | |
412 | These two methods are discussed in the section \*(L"Controlling | |
413 | Lookup Failure\*(R". | |
414 | .Sh "Utility Methods" | |
415 | .IX Subsection "Utility Methods" | |
416 | These are methods that you may find it handy to use, generally | |
417 | from \f(CW%Lexicon\fR routines of yours (whether expressed as | |
418 | Bracket Notation or not). | |
419 | .ie n .IP "$language\->quant($number, $singular)" 4 | |
420 | .el .IP "$language\->quant($number, \f(CW$singular\fR)" 4 | |
421 | .IX Item "$language->quant($number, $singular)" | |
422 | .PD 0 | |
423 | .ie n .IP "$language\->quant($number, $singular\fR, \f(CW$plural)" 4 | |
424 | .el .IP "$language\->quant($number, \f(CW$singular\fR, \f(CW$plural\fR)" 4 | |
425 | .IX Item "$language->quant($number, $singular, $plural)" | |
426 | .ie n .IP "$language\->quant($number, $singular\fR, \f(CW$plural\fR, \f(CW$negative)" 4 | |
427 | .el .IP "$language\->quant($number, \f(CW$singular\fR, \f(CW$plural\fR, \f(CW$negative\fR)" 4 | |
428 | .IX Item "$language->quant($number, $singular, $plural, $negative)" | |
429 | .PD | |
430 | This is generally meant to be called from inside Bracket Notation | |
431 | (which is discussed later), as in | |
432 | .Sp | |
433 | .Vb 1 | |
434 | \& "Your search matched [quant,_1,document]!" | |
435 | .Ve | |
436 | .Sp | |
437 | It's for \fIquantifying\fR a noun (i.e., saying how much of it there is, | |
438 | while giving the currect form of it). The behavior of this method is | |
439 | handy for English and a few other Western European languages, and you | |
440 | should override it for languages where it's not suitable. You can feel | |
441 | free to read the source, but the current implementation is basically | |
442 | as this pseudocode describes: | |
443 | .Sp | |
444 | .Vb 11 | |
445 | \& if $number is 0 and there's a $negative, | |
446 | \& return $negative; | |
447 | \& elsif $number is 1, | |
448 | \& return "1 $singular"; | |
449 | \& elsif there's a $plural, | |
450 | \& return "$number $plural"; | |
451 | \& else | |
452 | \& return "$number " . $singular . "s"; | |
453 | \& # | |
454 | \& # ...except that we actually call numf to | |
455 | \& # stringify $number before returning it. | |
456 | .Ve | |
457 | .Sp | |
458 | So for English (with Bracket Notation) | |
459 | \&\f(CW"...[quant,_1,file]..."\fR is fine (for 0 it returns \*(L"0 files\*(R", | |
460 | for 1 it returns \*(L"1 file\*(R", and for more it returns \*(L"2 files\*(R", etc.) | |
461 | .Sp | |
462 | But for \*(L"directory\*(R", you'd want \f(CW"[quant,_1,direcory,directories]"\fR | |
463 | so that our elementary \f(CW\*(C`quant\*(C'\fR method doesn't think that the | |
464 | plural of \*(L"directory\*(R" is \*(L"directorys\*(R". And you might find that the | |
465 | output may sound better if you specify a negative form, as in: | |
466 | .Sp | |
467 | .Vb 1 | |
468 | \& "[quant,_1,file,files,No files] matched your query.\en" | |
469 | .Ve | |
470 | .Sp | |
471 | Remember to keep in mind verb agreement (or adjectives too, in | |
472 | other languages), as in: | |
473 | .Sp | |
474 | .Vb 1 | |
475 | \& "[quant,_1,document] were matched.\en" | |
476 | .Ve | |
477 | .Sp | |
478 | Because if _1 is one, you get "1 document \fBwere\fR matched". | |
479 | An acceptable hack here is to do something like this: | |
480 | .Sp | |
481 | .Vb 1 | |
482 | \& "[quant,_1,document was, documents were] matched.\en" | |
483 | .Ve | |
484 | .IP "$language\->numf($number)" 4 | |
485 | .IX Item "$language->numf($number)" | |
486 | This returns the given number formatted nicely according to | |
487 | this language's conventions. Maketext's default method is | |
488 | mostly to just take the normal string form of the number | |
489 | (applying sprintf \*(L"%G\*(R" for only very large numbers), and then | |
490 | to add commas as necessary. (Except that | |
491 | we apply \f(CW\*(C`tr/,./.,/\*(C'\fR if \f(CW$language\fR\->{'numf_comma'} is true; | |
492 | that's a bit of a hack that's useful for languages that express | |
493 | two million as \*(L"2.000.000\*(R" and not as \*(L"2,000,000\*(R"). | |
494 | .Sp | |
495 | If you want anything fancier, consider overriding this with something | |
496 | that uses Number::Format, or does something else | |
497 | entirely. | |
498 | .Sp | |
499 | Note that numf is called by quant for stringifying all quantifying | |
500 | numbers. | |
501 | .ie n .IP "$language\->sprintf($format, @items)" 4 | |
502 | .el .IP "$language\->sprintf($format, \f(CW@items\fR)" 4 | |
503 | .IX Item "$language->sprintf($format, @items)" | |
504 | This is just a wrapper around Perl's normal \f(CW\*(C`sprintf\*(C'\fR function. | |
505 | It's provided so that you can use \*(L"sprintf\*(R" in Bracket Notation: | |
506 | .Sp | |
507 | .Vb 1 | |
508 | \& "Couldn't access datanode [sprintf,%10x=~[%s~],_1,_2]!\en" | |
509 | .Ve | |
510 | .Sp | |
511 | returning... | |
512 | .Sp | |
513 | .Vb 1 | |
514 | \& Couldn't access datanode Stuff=[thangamabob]! | |
515 | .Ve | |
516 | .IP "$language\->\fIlanguage_tag()\fR" 4 | |
517 | .IX Item "$language->language_tag()" | |
518 | Currently this just takes the last bit of \f(CW\*(C`ref($language)\*(C'\fR, turns | |
519 | underscores to dashes, and returns it. So if \f(CW$language\fR is | |
520 | an object of class Hee::HOO::Haw::en_us, \f(CW$language\fR\->\fIlanguage_tag()\fR | |
521 | returns \*(L"en\-us\*(R". (Yes, the usual representation for that language | |
522 | tag is \*(L"en\-US\*(R", but case is \fInever\fR considered meaningful in | |
523 | language-tag comparison.) | |
524 | .Sp | |
525 | You may override this as you like; Maketext doesn't use it for | |
526 | anything. | |
527 | .IP "$language\->\fIencoding()\fR" 4 | |
528 | .IX Item "$language->encoding()" | |
529 | Currently this isn't used for anything, but it's provided | |
530 | (with default value of | |
531 | \&\f(CW\*(C`(ref($language) && $language\->{'encoding'})) or "iso\-8859\-1"\*(C'\fR | |
532 | ) as a sort of suggestion that it may be useful/necessary to | |
533 | associate encodings with your language handles (whether on a | |
534 | per-class or even per-handle basis.) | |
535 | .Sh "Language Handle Attributes and Internals" | |
536 | .IX Subsection "Language Handle Attributes and Internals" | |
537 | A language handle is a flyweight object \*(-- i.e., it doesn't (necessarily) | |
538 | carry any data of interest, other than just being a member of | |
539 | whatever class it belongs to. | |
540 | .PP | |
541 | A language handle is implemented as a blessed hash. Subclasses of yours | |
542 | can store whatever data you want in the hash. Currently the only hash | |
543 | entry used by any crucial Maketext method is \*(L"fail\*(R", so feel free to | |
544 | use anything else as you like. | |
545 | .PP | |
546 | \&\fBRemember: Don't be afraid to read the Maketext source if there's | |
547 | any point on which this documentation is unclear.\fR This documentation | |
548 | is vastly longer than the module source itself. | |
549 | .SH "LANGUAGE CLASS HIERARCHIES" | |
550 | .IX Header "LANGUAGE CLASS HIERARCHIES" | |
551 | These are Locale::Maketext's assumptions about the class | |
552 | hierarchy formed by all your language classes: | |
553 | .IP "\(bu" 4 | |
554 | You must have a project base class, which you load, and | |
555 | which you then use as the first argument in | |
556 | the call to YourProjClass\->get_handle(...). It should derive | |
557 | (whether directly or indirectly) from Locale::Maketext. | |
558 | It \fBdoesn't matter\fR how you name this class, altho assuming this | |
559 | is the localization component of your Super Mega Program, | |
560 | good names for your project class might be | |
561 | SuperMegaProgram::Localization, SuperMegaProgram::L10N, | |
562 | SuperMegaProgram::I18N, SuperMegaProgram::International, | |
563 | or even SuperMegaProgram::Languages or SuperMegaProgram::Messages. | |
564 | .IP "\(bu" 4 | |
565 | Language classes are what YourProjClass\->get_handle will try to load. | |
566 | It will look for them by taking each language-tag (\fBskipping\fR it | |
567 | if it doesn't look like a language-tag or locale\-tag!), turning it to | |
568 | all lowercase, turning and dashes to underscores, and appending it | |
569 | to YourProjClass . \*(L"::\*(R". So this: | |
570 | .Sp | |
571 | .Vb 3 | |
572 | \& $lh = YourProjClass->get_handle( | |
573 | \& 'en-US', 'fr', 'kon', 'i-klingon', 'i-klingon-romanized' | |
574 | \& ); | |
575 | .Ve | |
576 | .Sp | |
577 | will try loading the classes | |
578 | YourProjClass::en_us (note lowercase!), YourProjClass::fr, | |
579 | YourProjClass::kon, | |
580 | YourProjClass::i_klingon | |
581 | and YourProjClass::i_klingon_romanized. (And it'll stop at the | |
582 | first one that actually loads.) | |
583 | .IP "\(bu" 4 | |
584 | I assume that each language class derives (directly or indirectly) | |
585 | from your project class, and also defines its \f(CW@ISA\fR, its \f(CW%Lexicon\fR, | |
586 | or both. But I anticipate no dire consequences if these assumptions | |
587 | do not hold. | |
588 | .IP "\(bu" 4 | |
589 | Language classes may derive from other language classes (altho they | |
590 | should have "use \fIThatclassname\fR\*(L" or \*(R"use base qw(\fI...classes...\fR)"). | |
591 | They may derive from the project | |
592 | class. They may derive from some other class altogether. Or via | |
593 | multiple inheritance, it may derive from any mixture of these. | |
594 | .IP "\(bu" 4 | |
595 | I foresee no problems with having multiple inheritance in | |
596 | your hierarchy of language classes. (As usual, however, Perl will | |
597 | complain bitterly if you have a cycle in the hierarchy: i.e., if | |
598 | any class is its own ancestor.) | |
599 | .SH "ENTRIES IN EACH LEXICON" | |
600 | .IX Header "ENTRIES IN EACH LEXICON" | |
601 | A typical \f(CW%Lexicon\fR entry is meant to signify a phrase, | |
602 | taking some number (0 or more) of parameters. An entry | |
603 | is meant to be accessed by via | |
604 | a string \fIkey\fR in \f(CW$lh\fR\->maketext(\fIkey\fR, ...parameters...), | |
605 | which should return a string that is generally meant for | |
606 | be used for \*(L"output\*(R" to the user \*(-- regardless of whether | |
607 | this actually means printing to \s-1STDOUT\s0, writing to a file, | |
608 | or putting into a \s-1GUI\s0 widget. | |
609 | .PP | |
610 | While the key must be a string value (since that's a basic | |
611 | restriction that Perl places on hash keys), the value in | |
612 | the lexicon can currenly be of several types: | |
613 | a defined scalar, scalarref, or coderef. The use of these is | |
614 | explained above, in the section 'The \*(L"maketext\*(R" Method', and | |
615 | Bracket Notation for strings is discussed in the next section. | |
616 | .PP | |
617 | While you can use arbitrary unique IDs for lexicon keys | |
618 | (like \*(L"_min_larger_max_error\*(R"), it is often | |
619 | useful for if an entry's key is itself a valid value, like | |
620 | this example error message: | |
621 | .PP | |
622 | .Vb 1 | |
623 | \& "Minimum ([_1]) is larger than maximum ([_2])!\en", | |
624 | .Ve | |
625 | .PP | |
626 | Compare this code that uses an arbitrary \s-1ID\s0... | |
627 | .PP | |
628 | .Vb 2 | |
629 | \& die $lh->maketext( "_min_larger_max_error", $min, $max ) | |
630 | \& if $min > $max; | |
631 | .Ve | |
632 | .PP | |
633 | \&...to this code that uses a key\-as\-value: | |
634 | .PP | |
635 | .Vb 4 | |
636 | \& die $lh->maketext( | |
637 | \& "Minimum ([_1]) is larger than maximum ([_2])!\en", | |
638 | \& $min, $max | |
639 | \& ) if $min > $max; | |
640 | .Ve | |
641 | .PP | |
642 | The second is, in short, more readable. In particular, it's obvious | |
643 | that the number of parameters you're feeding to that phrase (two) is | |
644 | the number of parameters that it \fIwants\fR to be fed. (Since you see | |
645 | _1 and a _2 being used in the key there.) | |
646 | .PP | |
647 | Also, once a project is otherwise | |
648 | complete and you start to localize it, you can scrape together | |
649 | all the various keys you use, and pass it to a translator; and then | |
650 | the translator's work will go faster if what he's presented is this: | |
651 | .PP | |
652 | .Vb 2 | |
653 | \& "Minimum ([_1]) is larger than maximum ([_2])!\en", | |
654 | \& => "", # fill in something here, Jacques! | |
655 | .Ve | |
656 | .PP | |
657 | rather than this more cryptic mess: | |
658 | .PP | |
659 | .Vb 2 | |
660 | \& "_min_larger_max_error" | |
661 | \& => "", # fill in something here, Jacques | |
662 | .Ve | |
663 | .PP | |
664 | I think that keys as lexicon values makes the completed lexicon | |
665 | entries more readable: | |
666 | .PP | |
667 | .Vb 2 | |
668 | \& "Minimum ([_1]) is larger than maximum ([_2])!\en", | |
669 | \& => "Le minimum ([_1]) est plus grand que le maximum ([_2])!\en", | |
670 | .Ve | |
671 | .PP | |
672 | Also, having valid values as keys becomes very useful if you set | |
673 | up an _AUTO lexicon. _AUTO lexicons are discussed in a later | |
674 | section. | |
675 | .PP | |
676 | I almost always use keys that are themselves | |
677 | valid lexicon values. One notable exception is when the value is | |
678 | quite long. For example, to get the screenful of data that | |
679 | a command-line program might returns when given an unknown switch, | |
680 | I often just use a key \*(L"_USAGE_MESSAGE\*(R". At that point I then go | |
681 | and immediately to define that lexicon entry in the | |
682 | ProjectClass::L10N::en lexicon (since English is always my \*(L"project | |
683 | lanuage\*(R"): | |
684 | .PP | |
685 | .Vb 3 | |
686 | \& '_USAGE_MESSAGE' => <<'EOSTUFF', | |
687 | \& ...long long message... | |
688 | \& EOSTUFF | |
689 | .Ve | |
690 | .PP | |
691 | and then I can use it as: | |
692 | .PP | |
693 | .Vb 1 | |
694 | \& getopt('oDI', \e%opts) or die $lh->maketext('_USAGE_MESSAGE'); | |
695 | .Ve | |
696 | .PP | |
697 | Incidentally, | |
698 | note that each class's \f(CW%Lexicon\fR inherits-and-extends | |
699 | the lexicons in its superclasses. This is not because these are | |
700 | special hashes \fIper se\fR, but because you access them via the | |
701 | \&\f(CW\*(C`maketext\*(C'\fR method, which looks for entries across all the | |
702 | \&\f(CW%Lexicon\fR's in a language class \fIand\fR all its ancestor classes. | |
703 | (This is because the idea of \*(L"class data\*(R" isn't directly implemented | |
704 | in Perl, but is instead left to individual class-systems to implement | |
705 | as they see fit..) | |
706 | .PP | |
707 | Note that you may have things stored in a lexicon | |
708 | besides just phrases for output: for example, if your program | |
709 | takes input from the keyboard, asking a \*(L"(Y/N)\*(R" question, | |
710 | you probably need to know what equivalent of \*(L"Y[es]/N[o]\*(R" is | |
711 | in whatever language. You probably also need to know what | |
712 | the equivalents of the answers \*(L"y\*(R" and \*(L"n\*(R" are. You can | |
713 | store that information in the lexicon (say, under the keys | |
714 | \&\*(L"~answer_y\*(R" and \*(L"~answer_n\*(R", and the long forms as | |
715 | \&\*(L"~answer_yes\*(R" and \*(L"~answer_no\*(R", where \*(L"~\*(R" is just an ad-hoc | |
716 | character meant to indicate to programmers/translators that | |
717 | these are not phrases for output). | |
718 | .PP | |
719 | Or instead of storing this in the language class's lexicon, | |
720 | you can (and, in some cases, really should) represent the same bit | |
721 | of knowledge as code is a method in the language class. (That | |
722 | leaves a tidy distinction between the lexicon as the things we | |
723 | know how to \fIsay\fR, and the rest of the things in the lexicon class | |
724 | as things that we know how to \fIdo\fR.) Consider | |
725 | this example of a processor for responses to French \*(L"oui/non\*(R" | |
726 | questions: | |
727 | .PP | |
728 | .Vb 7 | |
729 | \& sub y_or_n { | |
730 | \& return undef unless defined $_[1] and length $_[1]; | |
731 | \& my $answer = lc $_[1]; # smash case | |
732 | \& return 1 if $answer eq 'o' or $answer eq 'oui'; | |
733 | \& return 0 if $answer eq 'n' or $answer eq 'non'; | |
734 | \& return undef; | |
735 | \& } | |
736 | .Ve | |
737 | .PP | |
738 | \&...which you'd then call in a construct like this: | |
739 | .PP | |
740 | .Vb 7 | |
741 | \& my $response; | |
742 | \& until(defined $response) { | |
743 | \& print $lh->maketext("Open the pod bay door (y/n)? "); | |
744 | \& $response = $lh->y_or_n( get_input_from_keyboard_somehow() ); | |
745 | \& } | |
746 | \& if($response) { $pod_bay_door->open() } | |
747 | \& else { $pod_bay_door->leave_closed() } | |
748 | .Ve | |
749 | .PP | |
750 | Other data worth storing in a lexicon might be things like | |
751 | filenames for language-targetted resources: | |
752 | .PP | |
753 | .Vb 16 | |
754 | \& ... | |
755 | \& "_main_splash_png" | |
756 | \& => "/styles/en_us/main_splash.png", | |
757 | \& "_main_splash_imagemap" | |
758 | \& => "/styles/en_us/main_splash.incl", | |
759 | \& "_general_graphics_path" | |
760 | \& => "/styles/en_us/", | |
761 | \& "_alert_sound" | |
762 | \& => "/styles/en_us/hey_there.wav", | |
763 | \& "_forward_icon" | |
764 | \& => "left_arrow.png", | |
765 | \& "_backward_icon" | |
766 | \& => "right_arrow.png", | |
767 | \& # In some other languages, left equals | |
768 | \& # BACKwards, and right is FOREwards. | |
769 | \& ... | |
770 | .Ve | |
771 | .PP | |
772 | You might want to do the same thing for expressing key bindings | |
773 | or the like (since hardwiring \*(L"q\*(R" as the binding for the function | |
774 | that quits a screen/menu/program is useful only if your language | |
775 | happens to associate \*(L"q\*(R" with \*(L"quit\*(R"!) | |
776 | .SH "BRACKET NOTATION" | |
777 | .IX Header "BRACKET NOTATION" | |
778 | Bracket Notation is a crucial feature of Locale::Maketext. I mean | |
779 | Bracket Notation to provide a replacement for sprintf formatting. | |
780 | Everything you do with Bracket Notation could be done with a sub block, | |
781 | but bracket notation is meant to be much more concise. | |
782 | .PP | |
783 | Bracket Notation is a like a miniature \*(L"template\*(R" system (in the sense | |
784 | of Text::Template, not in the sense of \*(C+ templates), | |
785 | where normal text is passed thru basically as is, but text is special | |
786 | regions is specially interpreted. In Bracket Notation, you use brackets | |
787 | (\*(L"[...]\*(R" \*(-- not \*(L"{...}\*(R"!) to note sections that are specially interpreted. | |
788 | .PP | |
789 | For example, here all the areas that are taken literally are underlined with | |
790 | a \*(L"^\*(R", and all the in-bracket special regions are underlined with an X: | |
791 | .PP | |
792 | .Vb 2 | |
793 | \& "Minimum ([_1]) is larger than maximum ([_2])!\en", | |
794 | \& ^^^^^^^^^ XX ^^^^^^^^^^^^^^^^^^^^^^^^^^ XX ^^^^ | |
795 | .Ve | |
796 | .PP | |
797 | When that string is compiled from bracket notation into a real Perl sub, | |
798 | it's basically turned into: | |
799 | .PP | |
800 | .Vb 11 | |
801 | \& sub { | |
802 | \& my $lh = $_[0]; | |
803 | \& my @params = @_; | |
804 | \& return join '', | |
805 | \& "Minimum (", | |
806 | \& ...some code here... | |
807 | \& ") is larger than maximum (", | |
808 | \& ...some code here... | |
809 | \& ")!\en", | |
810 | \& } | |
811 | \& # to be called by $lh->maketext(KEY, params...) | |
812 | .Ve | |
813 | .PP | |
814 | In other words, text outside bracket groups is turned into string | |
815 | literals. Text in brackets is rather more complex, and currently follows | |
816 | these rules: | |
817 | .IP "\(bu" 4 | |
818 | Bracket groups that are empty, or which consist only of whitespace, | |
819 | are ignored. (Examples: \*(L"[]\*(R", \*(L"[ ]\*(R", or a [ and a ] with returns | |
820 | and/or tabs and/or spaces between them. | |
821 | .Sp | |
822 | Otherwise, each group is taken to be a comma-separated group of items, | |
823 | and each item is interpreted as follows: | |
824 | .IP "\(bu" 4 | |
825 | An item that is "_\fIdigits\fR\*(L" or \*(R"_\-\fIdigits\fR" is interpreted as | |
826 | \&\f(CW$_\fR[\fIvalue\fR]. I.e., \*(L"_1\*(R" is becomes with \f(CW$_\fR[1], and \*(L"_\-3\*(R" is interpreted | |
827 | as \f(CW$_\fR[\-3] (in which case \f(CW@_\fR should have at least three elements in it). | |
828 | Note that \f(CW$_\fR[0] is the language handle, and is typically not named | |
829 | directly. | |
830 | .IP "\(bu" 4 | |
831 | An item \*(L"_*\*(R" is interpreted to mean \*(L"all of \f(CW@_\fR except \f(CW$_\fR[0]\*(R". | |
832 | I.e., \f(CW@_[1..$#_]\fR. Note that this is an empty list in the case | |
833 | of calls like \f(CW$lh\fR\->maketext(\fIkey\fR) where there are no | |
834 | parameters (except \f(CW$_\fR[0], the language handle). | |
835 | .IP "\(bu" 4 | |
836 | Otherwise, each item is interpreted as a string literal. | |
837 | .PP | |
838 | The group as a whole is interpreted as follows: | |
839 | .IP "\(bu" 4 | |
840 | If the first item in a bracket group looks like a method name, | |
841 | then that group is interpreted like this: | |
842 | .Sp | |
843 | .Vb 3 | |
844 | \& $lh->that_method_name( | |
845 | \& ...rest of items in this group... | |
846 | \& ), | |
847 | .Ve | |
848 | .IP "\(bu" 4 | |
849 | If the first item in a bracket group is \*(L"*\*(R", it's taken as shorthand | |
850 | for the so commonly called \*(L"quant\*(R" method. Similarly, if the first | |
851 | item in a bracket group is \*(L"#\*(R", it's taken to be shorthand for | |
852 | \&\*(L"numf\*(R". | |
853 | .IP "\(bu" 4 | |
854 | If the first item in a bracket group is empty\-string, or \*(L"_*\*(R" | |
855 | or "_\fIdigits\fR\*(L" or \*(R"_\-\fIdigits\fR", then that group is interpreted | |
856 | as just the interpolation of all its items: | |
857 | .Sp | |
858 | .Vb 3 | |
859 | \& join('', | |
860 | \& ...rest of items in this group... | |
861 | \& ), | |
862 | .Ve | |
863 | .Sp | |
864 | Examples: \*(L"[_1]\*(R" and \*(L"[,_1]\*(R", which are synonymous; and | |
865 | \&\*(L"[,ID\-(,_4,\-,_2,)]\*(R", which compiles as | |
866 | \&\f(CW\*(C`join "", "ID\-(", $_[4], "\-", $_[2], ")"\*(C'\fR. | |
867 | .IP "\(bu" 4 | |
868 | Otherwise this bracket group is invalid. For example, in the group | |
869 | \&\*(L"[!@#,whatever]\*(R", the first item \f(CW"!@#"\fR is neither empty\-string, | |
870 | "_\fInumber\fR\*(L", \*(R"_\-\fInumber\fR\*(L", \*(R"_*", nor a valid method name; and so | |
871 | Locale::Maketext will throw an exception of you try compiling an | |
872 | expression containing this bracket group. | |
873 | .PP | |
874 | Note, incidentally, that items in each group are comma\-separated, | |
875 | not \f(CW\*(C`/\es*,\es*/\*(C'\fR\-separated. That is, you might expect that this | |
876 | bracket group: | |
877 | .PP | |
878 | .Vb 1 | |
879 | \& "Hoohah [foo, _1 , bar ,baz]!" | |
880 | .Ve | |
881 | .PP | |
882 | would compile to this: | |
883 | .PP | |
884 | .Vb 7 | |
885 | \& sub { | |
886 | \& my $lh = $_[0]; | |
887 | \& return join '', | |
888 | \& "Hoohah ", | |
889 | \& $lh->foo( $_[1], "bar", "baz"), | |
890 | \& "!", | |
891 | \& } | |
892 | .Ve | |
893 | .PP | |
894 | But it actually compiles as this: | |
895 | .PP | |
896 | .Vb 7 | |
897 | \& sub { | |
898 | \& my $lh = $_[0]; | |
899 | \& return join '', | |
900 | \& "Hoohah ", | |
901 | \& $lh->foo(" _1 ", " bar ", "baz"), #!!! | |
902 | \& "!", | |
903 | \& } | |
904 | .Ve | |
905 | .PP | |
906 | In the notation discussed so far, the characters \*(L"[\*(R" and \*(L"]\*(R" are given | |
907 | special meaning, for opening and closing bracket groups, and \*(L",\*(R" has | |
908 | a special meaning inside bracket groups, where it separates items in the | |
909 | group. This begs the question of how you'd express a literal \*(L"[\*(R" or | |
910 | \&\*(L"]\*(R" in a Bracket Notation string, and how you'd express a literal | |
911 | comma inside a bracket group. For this purpose I've adopted \*(L"~\*(R" (tilde) | |
912 | as an escape character: \*(L"~[\*(R" means a literal '[' character anywhere | |
913 | in Bracket Notation (i.e., regardless of whether you're in a bracket | |
914 | group or not), and ditto for \*(L"~]\*(R" meaning a literal ']', and \*(L"~,\*(R" meaning | |
915 | a literal comma. (Altho \*(L",\*(R" means a literal comma outside of | |
916 | bracket groups \*(-- it's only inside bracket groups that commas are special.) | |
917 | .PP | |
918 | And on the off chance you need a literal tilde in a bracket expression, | |
919 | you get it with \*(L"~~\*(R". | |
920 | .PP | |
921 | Currently, an unescaped \*(L"~\*(R" before a character | |
922 | other than a bracket or a comma is taken to mean just a \*(L"~\*(R" and that | |
923 | charecter. I.e., \*(L"~X\*(R" means the same as \*(L"~~X\*(R" \*(-- i.e., one literal tilde, | |
924 | and then one literal \*(L"X\*(R". However, by using \*(L"~X\*(R", you are assuming that | |
925 | no future version of Maketext will use \*(L"~X\*(R" as a magic escape sequence. | |
926 | In practice this is not a great problem, since first off you can just | |
927 | write \*(L"~~X\*(R" and not worry about it; second off, I doubt I'll add lots | |
928 | of new magic characters to bracket notation; and third off, you | |
929 | aren't likely to want literal \*(L"~\*(R" characters in your messages anyway, | |
930 | since it's not a character with wide use in natural language text. | |
931 | .PP | |
932 | Brackets must be balanced \*(-- every openbracket must have | |
933 | one matching closebracket, and vice versa. So these are all \fBinvalid\fR: | |
934 | .PP | |
935 | .Vb 4 | |
936 | \& "I ate [quant,_1,rhubarb pie." | |
937 | \& "I ate [quant,_1,rhubarb pie[." | |
938 | \& "I ate quant,_1,rhubarb pie]." | |
939 | \& "I ate quant,_1,rhubarb pie[." | |
940 | .Ve | |
941 | .PP | |
942 | Currently, bracket groups do not nest. That is, you \fBcannot\fR say: | |
943 | .PP | |
944 | .Vb 1 | |
945 | \& "Foo [bar,baz,[quux,quuux]]\en"; | |
946 | .Ve | |
947 | .PP | |
948 | If you need a notation that's that powerful, use normal Perl: | |
949 | .PP | |
950 | .Vb 11 | |
951 | \& %Lexicon = ( | |
952 | \& ... | |
953 | \& "some_key" => sub { | |
954 | \& my $lh = $_[0]; | |
955 | \& join '', | |
956 | \& "Foo ", | |
957 | \& $lh->bar('baz', $lh->quux('quuux')), | |
958 | \& "\en", | |
959 | \& }, | |
960 | \& ... | |
961 | \& ); | |
962 | .Ve | |
963 | .PP | |
964 | Or write the \*(L"bar\*(R" method so you don't need to pass it the | |
965 | output from calling quux. | |
966 | .PP | |
967 | I do not anticipate that you will need (or particularly want) | |
968 | to nest bracket groups, but you are welcome to email me with | |
969 | convincing (real\-life) arguments to the contrary. | |
970 | .SH "AUTO LEXICONS" | |
971 | .IX Header "AUTO LEXICONS" | |
972 | If maketext goes to look in an individual \f(CW%Lexicon\fR for an entry | |
973 | for \fIkey\fR (where \fIkey\fR does not start with an underscore), and | |
974 | sees none, \fBbut does see\fR an entry of \*(L"_AUTO\*(R" => \fIsome_true_value\fR, | |
975 | then we actually define \f(CW$Lexicon\fR{\fIkey\fR} = \fIkey\fR right then and there, | |
976 | and then use that value as if it had been there all | |
977 | along. This happens before we even look in any superclass \f(CW%Lexicons\fR! | |
978 | .PP | |
979 | (This is meant to be somewhat like the \s-1AUTOLOAD\s0 mechanism in | |
980 | Perl's function call system \*(-- or, looked at another way, | |
981 | like the AutoLoader module.) | |
982 | .PP | |
983 | I can picture all sorts of circumstances where you just | |
984 | do not want lookup to be able to fail (since failing | |
985 | normally means that maketext throws a \f(CW\*(C`die\*(C'\fR, altho | |
986 | see the next section for greater control over that). But | |
987 | here's one circumstance where _AUTO lexicons are meant to | |
988 | be \fIespecially\fR useful: | |
989 | .PP | |
990 | As you're writing an application, you decide as you go what messages | |
991 | you need to emit. Normally you'd go to write this: | |
992 | .PP | |
993 | .Vb 5 | |
994 | \& if(-e $filename) { | |
995 | \& go_process_file($filename) | |
996 | \& } else { | |
997 | \& print "Couldn't find file \e"$filename\e"!\en"; | |
998 | \& } | |
999 | .Ve | |
1000 | .PP | |
1001 | but since you anticipate localizing this, you write: | |
1002 | .PP | |
1003 | .Vb 13 | |
1004 | \& use ThisProject::I18N; | |
1005 | \& my $lh = ThisProject::I18N->get_handle(); | |
1006 | \& # For the moment, assume that things are set up so | |
1007 | \& # that we load class ThisProject::I18N::en | |
1008 | \& # and that's the class that $lh belongs to. | |
1009 | \& ... | |
1010 | \& if(-e $filename) { | |
1011 | \& go_process_file($filename) | |
1012 | \& } else { | |
1013 | \& print $lh->maketext( | |
1014 | \& "Couldn't find file \e"[_1]\e"!\en", $filename | |
1015 | \& ); | |
1016 | \& } | |
1017 | .Ve | |
1018 | .PP | |
1019 | Now, right after you've just written the above lines, you'd | |
1020 | normally have to go open the file | |
1021 | ThisProject/I18N/en.pm, and immediately add an entry: | |
1022 | .PP | |
1023 | .Vb 2 | |
1024 | \& "Couldn't find file \e"[_1]\e"!\en" | |
1025 | \& => "Couldn't find file \e"[_1]\e"!\en", | |
1026 | .Ve | |
1027 | .PP | |
1028 | But I consider that somewhat of a distraction from the work | |
1029 | of getting the main code working \*(-- to say nothing of the fact | |
1030 | that I often have to play with the program a few times before | |
1031 | I can decide exactly what wording I want in the messages (which | |
1032 | in this case would require me to go changing three lines of code: | |
1033 | the call to maketext with that key, and then the two lines in | |
1034 | ThisProject/I18N/en.pm). | |
1035 | .PP | |
1036 | However, if you set \*(L"_AUTO => 1\*(R" in the \f(CW%Lexicon\fR in, | |
1037 | ThisProject/I18N/en.pm (assuming that English (en) is | |
1038 | the language that all your programmers will be using for this | |
1039 | project's internal message keys), then you don't ever have to | |
1040 | go adding lines like this | |
1041 | .PP | |
1042 | .Vb 2 | |
1043 | \& "Couldn't find file \e"[_1]\e"!\en" | |
1044 | \& => "Couldn't find file \e"[_1]\e"!\en", | |
1045 | .Ve | |
1046 | .PP | |
1047 | to ThisProject/I18N/en.pm, because if _AUTO is true there, | |
1048 | then just looking for an entry with the key \*(L"Couldn't find | |
1049 | file \e\*(R"[_1]\e\*(L"!\en\*(R" in that lexicon will cause it to be added, | |
1050 | with that value! | |
1051 | .PP | |
1052 | Note that the reason that keys that start with \*(L"_\*(R" | |
1053 | are immune to _AUTO isn't anything generally magical about | |
1054 | the underscore character \*(-- I just wanted a way to have most | |
1055 | lexicon keys be autoable, except for possibly a few, and I | |
1056 | arbitrarily decided to use a leading underscore as a signal | |
1057 | to distinguish those few. | |
1058 | .SH "CONTROLLING LOOKUP FAILURE" | |
1059 | .IX Header "CONTROLLING LOOKUP FAILURE" | |
1060 | If you call \f(CW$lh\fR\->maketext(\fIkey\fR, ...parameters...), | |
1061 | and there's no entry \fIkey\fR in \f(CW$lh\fR's class's \f(CW%Lexicon\fR, nor | |
1062 | in the superclass \f(CW%Lexicon\fR hash, \fIand\fR if we can't auto-make | |
1063 | \&\fIkey\fR (because either it starts with a \*(L"_\*(R", or because none | |
1064 | of its lexicons have \f(CW\*(C`_AUTO => 1,\*(C'\fR), then we have | |
1065 | failed to find a normal way to maketext \fIkey\fR. What then | |
1066 | happens in these failure conditions, depends on the \f(CW$lh\fR object | |
1067 | \&\*(L"fail\*(R" attribute. | |
1068 | .PP | |
1069 | If the language handle has no \*(L"fail\*(R" attribute, maketext | |
1070 | will simply throw an exception (i.e., it calls \f(CW\*(C`die\*(C'\fR, mentioning | |
1071 | the \fIkey\fR whose lookup failed, and naming the line number where | |
1072 | the calling \f(CW$lh\fR\->maketext(\fIkey\fR,...) was. | |
1073 | .PP | |
1074 | If the language handle has a \*(L"fail\*(R" attribute whose value is a | |
1075 | coderef, then \f(CW$lh\fR\->maketext(\fIkey\fR,...params...) gives up and calls: | |
1076 | .PP | |
1077 | .Vb 1 | |
1078 | \& return &{$that_subref}($lh, $key, @params); | |
1079 | .Ve | |
1080 | .PP | |
1081 | Otherwise, the \*(L"fail\*(R" attribute's value should be a string denoting | |
1082 | a method name, so that \f(CW$lh\fR\->maketext(\fIkey\fR,...params...) can | |
1083 | give up with: | |
1084 | .PP | |
1085 | .Vb 1 | |
1086 | \& return $lh->$that_method_name($phrase, @params); | |
1087 | .Ve | |
1088 | .PP | |
1089 | The \*(L"fail\*(R" attribute can be accessed with the \f(CW\*(C`fail_with\*(C'\fR method: | |
1090 | .PP | |
1091 | .Vb 2 | |
1092 | \& # Set to a coderef: | |
1093 | \& $lh->fail_with( \e&failure_handler ); | |
1094 | .Ve | |
1095 | .PP | |
1096 | .Vb 2 | |
1097 | \& # Set to a method name: | |
1098 | \& $lh->fail_with( 'failure_method' ); | |
1099 | .Ve | |
1100 | .PP | |
1101 | .Vb 2 | |
1102 | \& # Set to nothing (i.e., so failure throws a plain exception) | |
1103 | \& $lh->fail_with( undef ); | |
1104 | .Ve | |
1105 | .PP | |
1106 | .Vb 2 | |
1107 | \& # Simply read: | |
1108 | \& $handler = $lh->fail_with(); | |
1109 | .Ve | |
1110 | .PP | |
1111 | Now, as to what you may want to do with these handlers: Maybe you'd | |
1112 | want to log what key failed for what class, and then die. Maybe | |
1113 | you don't like \f(CW\*(C`die\*(C'\fR and instead you want to send the error message | |
1114 | to \s-1STDOUT\s0 (or wherever) and then merely \f(CW\*(C`exit()\*(C'\fR. | |
1115 | .PP | |
1116 | Or maybe you don't want to \f(CW\*(C`die\*(C'\fR at all! Maybe you could use a | |
1117 | handler like this: | |
1118 | .PP | |
1119 | .Vb 10 | |
1120 | \& # Make all lookups fall back onto an English value, | |
1121 | \& # but after we log it for later fingerpointing. | |
1122 | \& my $lh_backup = ThisProject->get_handle('en'); | |
1123 | \& open(LEX_FAIL_LOG, ">>wherever/lex.log") || die "GNAARGH $!"; | |
1124 | \& sub lex_fail { | |
1125 | \& my($failing_lh, $key, $params) = @_; | |
1126 | \& print LEX_FAIL_LOG scalar(localtime), "\et", | |
1127 | \& ref($failing_lh), "\et", $key, "\en"; | |
1128 | \& return $lh_backup->maketext($key,@params); | |
1129 | \& } | |
1130 | .Ve | |
1131 | .PP | |
1132 | Some users have expressed that they think this whole mechanism of | |
1133 | having a \*(L"fail\*(R" attribute at all, seems a rather pointless complication. | |
1134 | But I want Locale::Maketext to be usable for software projects of \fIany\fR | |
1135 | scale and type; and different software projects have different ideas | |
1136 | of what the right thing is to do in failure conditions. I could simply | |
1137 | say that failure always throws an exception, and that if you want to be | |
1138 | careful, you'll just have to wrap every call to \f(CW$lh\fR\->maketext in an | |
1139 | eval\ {\ }. However, I want programmers to reserve the right (via | |
1140 | the \*(L"fail\*(R" attribute) to treat lookup failure as something other than | |
1141 | an exception of the same level of severity as a config file being | |
1142 | unreadable, or some essential resource being inaccessable. | |
1143 | .PP | |
1144 | One possibly useful value for the \*(L"fail\*(R" attribute is the method name | |
1145 | \&\*(L"failure_handler_auto\*(R". This is a method defined in class | |
1146 | Locale::Maketext itself. You set it with: | |
1147 | .PP | |
1148 | .Vb 1 | |
1149 | \& $lh->fail_with('failure_handler_auto'); | |
1150 | .Ve | |
1151 | .PP | |
1152 | Then when you call \f(CW$lh\fR\->maketext(\fIkey\fR, ...parameters...) and | |
1153 | there's no \fIkey\fR in any of those lexicons, maketext gives up with | |
1154 | .PP | |
1155 | .Vb 1 | |
1156 | \& return $lh->failure_handler_auto($key, @params); | |
1157 | .Ve | |
1158 | .PP | |
1159 | But failure_handler_auto, instead of dying or anything, compiles | |
1160 | \&\f(CW$key\fR, caching it in \f(CW$lh\fR\->{'failure_lex'}{$key} = \f(CW$complied\fR, | |
1161 | and then calls the compiled value, and returns that. (I.e., if | |
1162 | \&\f(CW$key\fR looks like bracket notation, \f(CW$compiled\fR is a sub, and we return | |
1163 | &{$compiled}(@params); but if \f(CW$key\fR is just a plain string, we just | |
1164 | return that.) | |
1165 | .PP | |
1166 | The effect of using \*(L"failure_auto_handler\*(R" | |
1167 | is like an \s-1AUTO\s0 lexicon, except that it 1) compiles \f(CW$key\fR even if | |
1168 | it starts with \*(L"_\*(R", and 2) you have a record in the new hashref | |
1169 | \&\f(CW$lh\fR\->{'failure_lex'} of all the keys that have failed for | |
1170 | this object. This should avoid your program dying \*(-- as long | |
1171 | as your keys aren't actually invalid as bracket code, and as | |
1172 | long as they don't try calling methods that don't exist. | |
1173 | .PP | |
1174 | \&\*(L"failure_auto_handler\*(R" may not be exactly what you want, but I | |
1175 | hope it at least shows you that maketext failure can be mitigated | |
1176 | in any number of very flexible ways. If you can formalize exactly | |
1177 | what you want, you should be able to express that as a failure | |
1178 | handler. You can even make it default for every object of a given | |
1179 | class, by setting it in that class's init: | |
1180 | .PP | |
1181 | .Vb 9 | |
1182 | \& sub init { | |
1183 | \& my $lh = $_[0]; # a newborn handle | |
1184 | \& $lh->SUPER::init(); | |
1185 | \& $lh->fail_with('my_clever_failure_handler'); | |
1186 | \& return; | |
1187 | \& } | |
1188 | \& sub my_clever_failure_handler { | |
1189 | \& ...you clever things here... | |
1190 | \& } | |
1191 | .Ve | |
1192 | .SH "HOW TO USE MAKETEXT" | |
1193 | .IX Header "HOW TO USE MAKETEXT" | |
1194 | Here is a brief checklist on how to use Maketext to localize | |
1195 | applications: | |
1196 | .IP "\(bu" 4 | |
1197 | Decide what system you'll use for lexicon keys. If you insist, | |
1198 | you can use opaque IDs (if you're nostalgic for \f(CW\*(C`catgets\*(C'\fR), | |
1199 | but I have better suggestions in the | |
1200 | section \*(L"Entries in Each Lexicon\*(R", above. Assuming you opt for | |
1201 | meaningful keys that double as values (like \*(L"Minimum ([_1]) is | |
1202 | larger than maximum ([_2])!\en\*(R"), you'll have to settle on what | |
1203 | language those should be in. For the sake of argument, I'll | |
1204 | call this English, specifically American English, \*(L"en\-US\*(R". | |
1205 | .IP "\(bu" 4 | |
1206 | Create a class for your localization project. This is | |
1207 | the name of the class that you'll use in the idiom: | |
1208 | .Sp | |
1209 | .Vb 2 | |
1210 | \& use Projname::L10N; | |
1211 | \& my $lh = Projname::L10N->get_handle(...) || die "Language?"; | |
1212 | .Ve | |
1213 | .Sp | |
1214 | Assuming your call your class Projname::L10N, create a class | |
1215 | consisting minimally of: | |
1216 | .Sp | |
1217 | .Vb 3 | |
1218 | \& package Projname::L10N; | |
1219 | \& use base qw(Locale::Maketext); | |
1220 | \& ...any methods you might want all your languages to share... | |
1221 | .Ve | |
1222 | .Sp | |
1223 | .Vb 2 | |
1224 | \& # And, assuming you want the base class to be an _AUTO lexicon, | |
1225 | \& # as is discussed a few sections up: | |
1226 | .Ve | |
1227 | .Sp | |
1228 | .Vb 1 | |
1229 | \& 1; | |
1230 | .Ve | |
1231 | .IP "\(bu" 4 | |
1232 | Create a class for the language your internal keys are in. Name | |
1233 | the class after the language-tag for that language, in lowercase, | |
1234 | with dashes changed to underscores. Assuming your project's first | |
1235 | language is \s-1US\s0 English, you should call this Projname::L10N::en_us. | |
1236 | It should consist minimally of: | |
1237 | .Sp | |
1238 | .Vb 6 | |
1239 | \& package Projname::L10N::en_us; | |
1240 | \& use base qw(Projname::L10N); | |
1241 | \& %Lexicon = ( | |
1242 | \& '_AUTO' => 1, | |
1243 | \& ); | |
1244 | \& 1; | |
1245 | .Ve | |
1246 | .Sp | |
1247 | (For the rest of this section, I'll assume that this \*(L"first | |
1248 | language class\*(R" of Projname::L10N::en_us has | |
1249 | _AUTO lexicon.) | |
1250 | .IP "\(bu" 4 | |
1251 | Go and write your program. Everywhere in your program where | |
1252 | you would say: | |
1253 | .Sp | |
1254 | .Vb 1 | |
1255 | \& print "Foobar $thing stuff\en"; | |
1256 | .Ve | |
1257 | .Sp | |
1258 | instead do it thru maketext, using no variable interpolation in | |
1259 | the key: | |
1260 | .Sp | |
1261 | .Vb 1 | |
1262 | \& print $lh->maketext("Foobar [_1] stuff\en", $thing); | |
1263 | .Ve | |
1264 | .Sp | |
1265 | If you get tired of constantly saying \f(CW\*(C`print $lh\->maketext\*(C'\fR, | |
1266 | consider making a functional wrapper for it, like so: | |
1267 | .Sp | |
1268 | .Vb 7 | |
1269 | \& use Projname::L10N; | |
1270 | \& use vars qw($lh); | |
1271 | \& $lh = Projname::L10N->get_handle(...) || die "Language?"; | |
1272 | \& sub pmt (@) { print( $lh->maketext(@_)) } | |
1273 | \& # "pmt" is short for "Print MakeText" | |
1274 | \& $Carp::Verbose = 1; | |
1275 | \& # so if maketext fails, we see made the call to pmt | |
1276 | .Ve | |
1277 | .Sp | |
1278 | Besides whole phrases meant for output, anything language-dependent | |
1279 | should be put into the class Projname::L10N::en_us, | |
1280 | whether as methods, or as lexicon entries \*(-- this is discussed | |
1281 | in the section \*(L"Entries in Each Lexicon\*(R", above. | |
1282 | .IP "\(bu" 4 | |
1283 | Once the program is otherwise done, and once its localization for | |
1284 | the first language works right (via the data and methods in | |
1285 | Projname::L10N::en_us), you can get together the data for translation. | |
1286 | If your first language lexicon isn't an _AUTO lexicon, then you already | |
1287 | have all the messages explicitly in the lexicon (or else you'd be | |
1288 | getting exceptions thrown when you call \f(CW$lh\fR\->maketext to get | |
1289 | messages that aren't in there). But if you were (advisedly) lazy and are | |
1290 | using an _AUTO lexicon, then you've got to make a list of all the phrases | |
1291 | that you've so far been letting _AUTO generate for you. There are very | |
1292 | many ways to assemble such a list. The most straightforward is to simply | |
1293 | grep the source for every occurrence of \*(L"maketext\*(R" (or calls | |
1294 | to wrappers around it, like the above \f(CW\*(C`pmt\*(C'\fR function), and to log the | |
1295 | following phrase. | |
1296 | .IP "\(bu" 4 | |
1297 | You may at this point want to consider whether the your base class | |
1298 | (Projname::L10N) that all lexicons inherit from (Projname::L10N::en, | |
1299 | Projname::L10N::es, etc.) should be an _AUTO lexicon. It may be true | |
1300 | that in theory, all needed messages will be in each language class; | |
1301 | but in the presumably unlikely or \*(L"impossible\*(R" case of lookup failure, | |
1302 | you should consider whether your program should throw an exception, | |
1303 | emit text in English (or whatever your project's first language is), | |
1304 | or some more complex solution as described in the section | |
1305 | \&\*(L"Controlling Lookup Failure\*(R", above. | |
1306 | .IP "\(bu" 4 | |
1307 | Submit all messages/phrases/etc. to translators. | |
1308 | .Sp | |
1309 | (You may, in fact, want to start with localizing to \fIone\fR other language | |
1310 | at first, if you're not sure that you've property abstracted the | |
1311 | language-dependent parts of your code.) | |
1312 | .Sp | |
1313 | Translators may request clarification of the situation in which a | |
1314 | particular phrase is found. For example, in English we are entirely happy | |
1315 | saying "\fIn\fR files found\*(L", regardless of whether we mean \*(R"I looked for files, | |
1316 | and found \fIn\fR of them\*(L" or the rather distinct situation of \*(R"I looked for | |
1317 | something else (like lines in files), and along the way I saw \fIn\fR | |
1318 | files.\*(L" This may involve rethinking things that you thought quite clear: | |
1319 | should \*(R"Edit\*(L" on a toolbar be a noun (\*(R"editing\*(L") or a verb (\*(R"to edit\*(L")? Is | |
1320 | there already a conventionalized way to express that menu option, separate | |
1321 | from the target language's normal word for \*(R"to edit"? | |
1322 | .Sp | |
1323 | In all cases where the very common phenomenon of quantification | |
1324 | (saying "\fIN\fR files", for \fBany\fR value of N) | |
1325 | is involved, each translator should make clear what dependencies the | |
1326 | number causes in the sentence. In many cases, dependency is | |
1327 | limited to words adjacent to the number, in places where you might | |
1328 | expect them ("I found the\-?PLURAL \fIN\fR | |
1329 | empty\-?PLURAL directory\-?PLURAL\*(L"), but in some cases there are | |
1330 | unexpected dependencies (\*(R"I found\-?PLURAL ...\*(L"!) as well as long-distance | |
1331 | dependencies \*(R"The \fIN\fR directory\-?PLURAL could not be deleted\-?PLURAL"!). | |
1332 | .Sp | |
1333 | Remind the translators to consider the case where N is 0: | |
1334 | \&\*(L"0 files found\*(R" isn't exactly natural-sounding in any language, but it | |
1335 | may be unacceptable in many \*(-- or it may condition special | |
1336 | kinds of agreement (similar to English \*(L"I didN'T find \s-1ANY\s0 files\*(R"). | |
1337 | .Sp | |
1338 | Remember to ask your translators about numeral formatting in their | |
1339 | language, so that you can override the \f(CW\*(C`numf\*(C'\fR method as | |
1340 | appropriate. Typical variables in number formatting are: what to | |
1341 | use as a decimal point (comma? period?); what to use as a thousands | |
1342 | separator (space? nonbreakinng space? comma? period? small | |
1343 | middot? prime? apostrophe?); and even whether the so-called \*(L"thousands | |
1344 | separator\*(R" is actually for every third digit \*(-- I've heard reports of | |
1345 | two hundred thousand being expressable as \*(L"2,00,000\*(R" for some Indian | |
1346 | (Subcontinental) languages, besides the less surprising "200\ 000\*(L", | |
1347 | \&\*(R"200.000\*(L", \*(R"200,000\*(L", and \*(R"200'000\*(L". Also, using a set of numeral | |
1348 | glyphs other than the usual \s-1ASCII\s0 \*(R"0\*(L"\-\*(R"9" might be appreciated, as via | |
1349 | \&\f(CW\*(C`tr/0\-9/\ex{0966}\-\ex{096F}/\*(C'\fR for getting digits in Devanagari script | |
1350 | (for Hindi, Konkani, others). | |
1351 | .Sp | |
1352 | The basic \f(CW\*(C`quant\*(C'\fR method that Locale::Maketext provides should be | |
1353 | good for many languages. For some languages, it might be useful | |
1354 | to modify it (or its constituent \f(CW\*(C`numerate\*(C'\fR method) | |
1355 | to take a plural form in the two-argument call to \f(CW\*(C`quant\*(C'\fR | |
1356 | (as in \*(L"[quant,_1,files]\*(R") if | |
1357 | it's all-around easier to infer the singular form from the plural, than | |
1358 | to infer the plural form from the singular. | |
1359 | .Sp | |
1360 | But for other languages (as is discussed at length | |
1361 | in Locale::Maketext::TPJ13), simple | |
1362 | \&\f(CW\*(C`quant\*(C'\fR/\f(CW\*(C`numerify\*(C'\fR is not enough. For the particularly problematic | |
1363 | Slavic languages, what you may need is a method which you provide | |
1364 | with the number, the citation form of the noun to quantify, and | |
1365 | the case and gender that the sentence's syntax projects onto that | |
1366 | noun slot. The method would then be responsible for determining | |
1367 | what grammatical number that numeral projects onto its noun phrase, | |
1368 | and what case and gender it may override the normal case and gender | |
1369 | with; and then it would look up the noun in a lexicon providing | |
1370 | all needed inflected forms. | |
1371 | .IP "\(bu" 4 | |
1372 | You may also wish to discuss with the translators the question of | |
1373 | how to relate different subforms of the same language tag, | |
1374 | considering how this reacts with \f(CW\*(C`get_handle\*(C'\fR's treatment of | |
1375 | these. For example, if a user accepts interfaces in \*(L"en, fr\*(R", and | |
1376 | you have interfaces available in \*(L"en\-US\*(R" and \*(L"fr\*(R", what should | |
1377 | they get? You may wish to resolve this by establishing that \*(L"en\*(R" | |
1378 | and \*(L"en\-US\*(R" are effectively synonymous, by having one class | |
1379 | zero-derive from the other. | |
1380 | .Sp | |
1381 | For some languages this issue may never come up (Danish is rarely | |
1382 | expressed as \*(L"da\-DK\*(R", but instead is just \*(L"da\*(R"). And for other | |
1383 | languages, the whole concept of a \*(L"generic\*(R" form may verge on | |
1384 | being uselessly vague, particularly for interfaces involving voice | |
1385 | media in forms of Arabic or Chinese. | |
1386 | .IP "\(bu" 4 | |
1387 | Once you've localized your program/site/etc. for all desired | |
1388 | languages, be sure to show the result (whether live, or via | |
1389 | screenshots) to the translators. Once they approve, make every | |
1390 | effort to have it then checked by at least one other speaker of | |
1391 | that language. This holds true even when (or especially when) the | |
1392 | translation is done by one of your own programmers. Some | |
1393 | kinds of systems may be harder to find testers for than others, | |
1394 | depending on the amount of domain-specific jargon and concepts | |
1395 | involved \*(-- it's easier to find people who can tell you whether | |
1396 | they approve of your translation for \*(L"delete this message\*(R" in an | |
1397 | email-via-Web interface, than to find people who can give you | |
1398 | an informed opinion on your translation for \*(L"attribute value\*(R" | |
1399 | in an \s-1XML\s0 query tool's interface. | |
1400 | .SH "SEE ALSO" | |
1401 | .IX Header "SEE ALSO" | |
1402 | I recommend reading all of these: | |
1403 | .PP | |
1404 | Locale::Maketext::TPJ13 \*(-- my \fIThe Perl | |
1405 | Journal\fR article about Maketext. It explains many important concepts | |
1406 | underlying Locale::Maketext's design, and some insight into why | |
1407 | Maketext is better than the plain old approach of just having | |
1408 | message catalogs that are just databases of sprintf formats. | |
1409 | .PP | |
1410 | File::Findgrep is a sample application/module | |
1411 | that uses Locale::Maketext to localize its messages. | |
1412 | .PP | |
1413 | I18N::LangTags. | |
1414 | .PP | |
1415 | Win32::Locale. | |
1416 | .PP | |
1417 | \&\s-1RFC\s0 3066, \fITags for the Identification of Languages\fR, | |
1418 | as at http://sunsite.dk/RFC/rfc/rfc3066.html | |
1419 | .PP | |
1420 | \&\s-1RFC\s0 2277, \fI\s-1IETF\s0 Policy on Character Sets and Languages\fR | |
1421 | is at http://sunsite.dk/RFC/rfc/rfc2277.html \*(-- much of it is | |
1422 | just things of interest to protocol designers, but it explains | |
1423 | some basic concepts, like the distinction between locales and | |
1424 | language\-tags. | |
1425 | .PP | |
1426 | The manual for \s-1GNU\s0 \f(CW\*(C`gettext\*(C'\fR. The gettext dist is available in | |
1427 | \&\f(CW\*(C`ftp://prep.ai.mit.edu/pub/gnu/\*(C'\fR \*(-- get | |
1428 | a recent gettext tarball and look in its \*(L"doc/\*(R" directory, there's | |
1429 | an easily browsable \s-1HTML\s0 version in there. The | |
1430 | gettext documentation asks lots of questions worth thinking | |
1431 | about, even if some of their answers are sometimes wonky, | |
1432 | particularly where they start talking about pluralization. | |
1433 | .PP | |
1434 | The Locale/Maketext.pm source. Obverse that the module is much | |
1435 | shorter than its documentation! | |
1436 | .SH "COPYRIGHT AND DISCLAIMER" | |
1437 | .IX Header "COPYRIGHT AND DISCLAIMER" | |
1438 | Copyright (c) 1999\-2001 Sean M. Burke. All rights reserved. | |
1439 | .PP | |
1440 | This library is free software; you can redistribute it and/or modify | |
1441 | it under the same terms as Perl itself. | |
1442 | .PP | |
1443 | This program is distributed in the hope that it will be useful, but | |
1444 | without any warranty; without even the implied warranty of | |
1445 | merchantability or fitness for a particular purpose. | |
1446 | .SH "AUTHOR" | |
1447 | .IX Header "AUTHOR" | |
1448 | Sean M. Burke \f(CW\*(C`sburke@cpan.org\*(C'\fR |