Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / devtools / amd64 / man / man1 / perllocale.1
CommitLineData
920dae64
AT
1.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32
2.\"
3.\" Standard preamble:
4.\" ========================================================================
5.de Sh \" Subsection heading
6.br
7.if t .Sp
8.ne 5
9.PP
10\fB\\$1\fR
11.PP
12..
13.de Sp \" Vertical space (when we can't use .PP)
14.if t .sp .5v
15.if n .sp
16..
17.de Vb \" Begin verbatim text
18.ft CW
19.nf
20.ne \\$1
21..
22.de Ve \" End verbatim text
23.ft R
24.fi
25..
26.\" Set up some character translations and predefined strings. \*(-- will
27.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
28.\" double quote, and \*(R" will give a right double quote. | will give a
29.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
30.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
31.\" expand to `' in nroff, nothing in troff, for use with C<>.
32.tr \(*W-|\(bv\*(Tr
33.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
34.ie n \{\
35. ds -- \(*W-
36. ds PI pi
37. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
38. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
39. ds L" ""
40. ds R" ""
41. ds C` ""
42. ds C' ""
43'br\}
44.el\{\
45. ds -- \|\(em\|
46. ds PI \(*p
47. ds L" ``
48. ds R" ''
49'br\}
50.\"
51.\" If the F register is turned on, we'll generate index entries on stderr for
52.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
53.\" entries marked with X<> in POD. Of course, you'll have to process the
54.\" output yourself in some meaningful fashion.
55.if \nF \{\
56. de IX
57. tm Index:\\$1\t\\n%\t"\\$2"
58..
59. nr % 0
60. rr F
61.\}
62.\"
63.\" For nroff, turn off justification. Always turn off hyphenation; it makes
64.\" way too many mistakes in technical documents.
65.hy 0
66.if n .na
67.\"
68.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
69.\" Fear. Run. Save yourself. No user-serviceable parts.
70. \" fudge factors for nroff and troff
71.if n \{\
72. ds #H 0
73. ds #V .8m
74. ds #F .3m
75. ds #[ \f1
76. ds #] \fP
77.\}
78.if t \{\
79. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
80. ds #V .6m
81. ds #F 0
82. ds #[ \&
83. ds #] \&
84.\}
85. \" simple accents for nroff and troff
86.if n \{\
87. ds ' \&
88. ds ` \&
89. ds ^ \&
90. ds , \&
91. ds ~ ~
92. ds /
93.\}
94.if t \{\
95. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
96. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
97. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
98. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
99. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
100. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
101.\}
102. \" troff and (daisy-wheel) nroff accents
103.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
104.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
105.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
106.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
107.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
108.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
109.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
110.ds ae a\h'-(\w'a'u*4/10)'e
111.ds Ae A\h'-(\w'A'u*4/10)'E
112. \" corrections for vroff
113.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
114.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
115. \" for low resolution devices (crt and lpr)
116.if \n(.H>23 .if \n(.V>19 \
117\{\
118. ds : e
119. ds 8 ss
120. ds o a
121. ds d- d\h'-1'\(ga
122. ds D- D\h'-1'\(hy
123. ds th \o'bp'
124. ds Th \o'LP'
125. ds ae ae
126. ds Ae AE
127.\}
128.rm #[ #] #H #V #F C
129.\" ========================================================================
130.\"
131.IX Title "PERLLOCALE 1"
132.TH PERLLOCALE 1 "2006-01-07" "perl v5.8.8" "Perl Programmers Reference Guide"
133.SH "NAME"
134perllocale \- Perl locale handling (internationalization and localization)
135.SH "DESCRIPTION"
136.IX Header "DESCRIPTION"
137Perl supports language-specific notions of data such as \*(L"is this
138a letter\*(R", \*(L"what is the uppercase equivalent of this letter\*(R", and
139\&\*(L"which of these letters comes first\*(R". These are important issues,
140especially for languages other than English\*(--but also for English: it
141would be nai\*:ve to imagine that \f(CW\*(C`A\-Za\-z\*(C'\fR defines all the \*(L"letters\*(R"
142needed to write in English. Perl is also aware that some character other
143than '.' may be preferred as a decimal point, and that output date
144representations may be language\-specific. The process of making an
145application take account of its users' preferences in such matters is
146called \fBinternationalization\fR (often abbreviated as \fBi18n\fR); telling
147such an application about a particular set of preferences is known as
148\&\fBlocalization\fR (\fBl10n\fR).
149.PP
150Perl can understand language-specific data via the standardized (\s-1ISO\s0 C,
151\&\s-1XPG4\s0, \s-1POSIX\s0 1.c) method called \*(L"the locale system\*(R". The locale system is
152controlled per application using one pragma, one function call, and
153several environment variables.
154.PP
155\&\fB\s-1NOTE\s0\fR: This feature is new in Perl 5.004, and does not apply unless an
156application specifically requests it\*(--see \*(L"Backward compatibility\*(R".
157The one exception is that \fIwrite()\fR now \fBalways\fR uses the current locale
158\&\- see \*(L"\s-1NOTES\s0\*(R".
159.SH "PREPARING TO USE LOCALES"
160.IX Header "PREPARING TO USE LOCALES"
161If Perl applications are to understand and present your data
162correctly according a locale of your choice, \fBall\fR of the following
163must be true:
164.IP "\(bu" 4
165\&\fBYour operating system must support the locale system\fR. If it does,
166you should find that the \fIsetlocale()\fR function is a documented part of
167its C library.
168.IP "\(bu" 4
169\&\fBDefinitions for locales that you use must be installed\fR. You, or
170your system administrator, must make sure that this is the case. The
171available locales, the location in which they are kept, and the manner
172in which they are installed all vary from system to system. Some systems
173provide only a few, hard-wired locales and do not allow more to be
174added. Others allow you to add \*(L"canned\*(R" locales provided by the system
175supplier. Still others allow you or the system administrator to define
176and add arbitrary locales. (You may have to ask your supplier to
177provide canned locales that are not delivered with your operating
178system.) Read your system documentation for further illumination.
179.IP "\(bu" 4
180\&\fBPerl must believe that the locale system is supported\fR. If it does,
181\&\f(CW\*(C`perl \-V:d_setlocale\*(C'\fR will say that the value for \f(CW\*(C`d_setlocale\*(C'\fR is
182\&\f(CW\*(C`define\*(C'\fR.
183.PP
184If you want a Perl application to process and present your data
185according to a particular locale, the application code should include
186the \f(CW\*(C`use\ locale\*(C'\fR pragma (see \*(L"The use locale pragma\*(R") where
187appropriate, and \fBat least one\fR of the following must be true:
188.IP "\(bu" 4
189\&\fBThe locale-determining environment variables (see \*(L"\s-1ENVIRONMENT\s0\*(R")
190must be correctly set up\fR at the time the application is started, either
191by yourself or by whoever set up your system account.
192.IP "\(bu" 4
193\&\fBThe application must set its own locale\fR using the method described in
194\&\*(L"The setlocale function\*(R".
195.SH "USING LOCALES"
196.IX Header "USING LOCALES"
197.Sh "The use locale pragma"
198.IX Subsection "The use locale pragma"
199By default, Perl ignores the current locale. The \f(CW\*(C`use\ locale\*(C'\fR
200pragma tells Perl to use the current locale for some operations:
201.IP "\(bu" 4
202\&\fBThe comparison operators\fR (\f(CW\*(C`lt\*(C'\fR, \f(CW\*(C`le\*(C'\fR, \f(CW\*(C`cmp\*(C'\fR, \f(CW\*(C`ge\*(C'\fR, and \f(CW\*(C`gt\*(C'\fR) and
203the \s-1POSIX\s0 string collation functions \fIstrcoll()\fR and \fIstrxfrm()\fR use
204\&\f(CW\*(C`LC_COLLATE\*(C'\fR. \fIsort()\fR is also affected if used without an
205explicit comparison function, because it uses \f(CW\*(C`cmp\*(C'\fR by default.
206.Sp
207\&\fBNote:\fR \f(CW\*(C`eq\*(C'\fR and \f(CW\*(C`ne\*(C'\fR are unaffected by locale: they always
208perform a char-by-char comparison of their scalar operands. What's
209more, if \f(CW\*(C`cmp\*(C'\fR finds that its operands are equal according to the
210collation sequence specified by the current locale, it goes on to
211perform a char-by-char comparison, and only returns \fI0\fR (equal) if the
212operands are char-for-char identical. If you really want to know whether
213two strings\*(--which \f(CW\*(C`eq\*(C'\fR and \f(CW\*(C`cmp\*(C'\fR may consider different\*(--are equal
214as far as collation in the locale is concerned, see the discussion in
215\&\*(L"Category \s-1LC_COLLATE:\s0 Collation\*(R".
216.IP "\(bu" 4
217\&\fBRegular expressions and case-modification functions\fR (\fIuc()\fR, \fIlc()\fR,
218\&\fIucfirst()\fR, and \fIlcfirst()\fR) use \f(CW\*(C`LC_CTYPE\*(C'\fR
219.IP "\(bu" 4
220\&\fBThe formatting functions\fR (\fIprintf()\fR, \fIsprintf()\fR and \fIwrite()\fR) use
221\&\f(CW\*(C`LC_NUMERIC\*(C'\fR
222.IP "\(bu" 4
223\&\fBThe \s-1POSIX\s0 date formatting function\fR (\fIstrftime()\fR) uses \f(CW\*(C`LC_TIME\*(C'\fR.
224.PP
225\&\f(CW\*(C`LC_COLLATE\*(C'\fR, \f(CW\*(C`LC_CTYPE\*(C'\fR, and so on, are discussed further in
226\&\*(L"\s-1LOCALE\s0 \s-1CATEGORIES\s0\*(R".
227.PP
228The default behavior is restored with the \f(CW\*(C`no\ locale\*(C'\fR pragma, or
229upon reaching the end of block enclosing \f(CW\*(C`use locale\*(C'\fR.
230.PP
231The string result of any operation that uses locale
232information is tainted, as it is possible for a locale to be
233untrustworthy. See \*(L"\s-1SECURITY\s0\*(R".
234.Sh "The setlocale function"
235.IX Subsection "The setlocale function"
236You can switch locales as often as you wish at run time with the
237\&\fIPOSIX::setlocale()\fR function:
238.PP
239.Vb 2
240\& # This functionality not usable prior to Perl 5.004
241\& require 5.004;
242.Ve
243.PP
244.Vb 4
245\& # Import locale-handling tool set from POSIX module.
246\& # This example uses: setlocale -- the function call
247\& # LC_CTYPE -- explained below
248\& use POSIX qw(locale_h);
249.Ve
250.PP
251.Vb 2
252\& # query and save the old locale
253\& $old_locale = setlocale(LC_CTYPE);
254.Ve
255.PP
256.Vb 2
257\& setlocale(LC_CTYPE, "fr_CA.ISO8859-1");
258\& # LC_CTYPE now in locale "French, Canada, codeset ISO 8859-1"
259.Ve
260.PP
261.Vb 3
262\& setlocale(LC_CTYPE, "");
263\& # LC_CTYPE now reset to default defined by LC_ALL/LC_CTYPE/LANG
264\& # environment variables. See below for documentation.
265.Ve
266.PP
267.Vb 2
268\& # restore the old locale
269\& setlocale(LC_CTYPE, $old_locale);
270.Ve
271.PP
272The first argument of \fIsetlocale()\fR gives the \fBcategory\fR, the second the
273\&\fBlocale\fR. The category tells in what aspect of data processing you
274want to apply locale-specific rules. Category names are discussed in
275\&\*(L"\s-1LOCALE\s0 \s-1CATEGORIES\s0\*(R" and \*(L"\s-1ENVIRONMENT\s0\*(R". The locale is the name of a
276collection of customization information corresponding to a particular
277combination of language, country or territory, and codeset. Read on for
278hints on the naming of locales: not all systems name locales as in the
279example.
280.PP
281If no second argument is provided and the category is something else
282than \s-1LC_ALL\s0, the function returns a string naming the current locale
283for the category. You can use this value as the second argument in a
284subsequent call to \fIsetlocale()\fR.
285.PP
286If no second argument is provided and the category is \s-1LC_ALL\s0, the
287result is implementation\-dependent. It may be a string of
288concatenated locales names (separator also implementation\-dependent)
289or a single locale name. Please consult your \fIsetlocale\fR\|(3) for
290details.
291.PP
292If a second argument is given and it corresponds to a valid locale,
293the locale for the category is set to that value, and the function
294returns the now-current locale value. You can then use this in yet
295another call to \fIsetlocale()\fR. (In some implementations, the return
296value may sometimes differ from the value you gave as the second
297argument\*(--think of it as an alias for the value you gave.)
298.PP
299As the example shows, if the second argument is an empty string, the
300category's locale is returned to the default specified by the
301corresponding environment variables. Generally, this results in a
302return to the default that was in force when Perl started up: changes
303to the environment made by the application after startup may or may not
304be noticed, depending on your system's C library.
305.PP
306If the second argument does not correspond to a valid locale, the locale
307for the category is not changed, and the function returns \fIundef\fR.
308.PP
309For further information about the categories, consult \fIsetlocale\fR\|(3).
310.Sh "Finding locales"
311.IX Subsection "Finding locales"
312For locales available in your system, consult also \fIsetlocale\fR\|(3) to
313see whether it leads to the list of available locales (search for the
314\&\fI\s-1SEE\s0 \s-1ALSO\s0\fR section). If that fails, try the following command lines:
315.PP
316.Vb 1
317\& locale -a
318.Ve
319.PP
320.Vb 1
321\& nlsinfo
322.Ve
323.PP
324.Vb 1
325\& ls /usr/lib/nls/loc
326.Ve
327.PP
328.Vb 1
329\& ls /usr/lib/locale
330.Ve
331.PP
332.Vb 1
333\& ls /usr/lib/nls
334.Ve
335.PP
336.Vb 1
337\& ls /usr/share/locale
338.Ve
339.PP
340and see whether they list something resembling these
341.PP
342.Vb 7
343\& en_US.ISO8859-1 de_DE.ISO8859-1 ru_RU.ISO8859-5
344\& en_US.iso88591 de_DE.iso88591 ru_RU.iso88595
345\& en_US de_DE ru_RU
346\& en de ru
347\& english german russian
348\& english.iso88591 german.iso88591 russian.iso88595
349\& english.roman8 russian.koi8r
350.Ve
351.PP
352Sadly, even though the calling interface for \fIsetlocale()\fR has been
353standardized, names of locales and the directories where the
354configuration resides have not been. The basic form of the name is
355\&\fIlanguage_territory\fR\fB.\fR\fIcodeset\fR, but the latter parts after
356\&\fIlanguage\fR are not always present. The \fIlanguage\fR and \fIcountry\fR
357are usually from the standards \fB\s-1ISO\s0 3166\fR and \fB\s-1ISO\s0 639\fR, the
358two-letter abbreviations for the countries and the languages of the
359world, respectively. The \fIcodeset\fR part often mentions some \fB\s-1ISO\s0
3608859\fR character set, the Latin codesets. For example, \f(CW\*(C`ISO 8859\-1\*(C'\fR
361is the so-called \*(L"Western European codeset\*(R" that can be used to encode
362most Western European languages adequately. Again, there are several
363ways to write even the name of that one standard. Lamentably.
364.PP
365Two special locales are worth particular mention: \*(L"C\*(R" and \*(L"\s-1POSIX\s0\*(R".
366Currently these are effectively the same locale: the difference is
367mainly that the first one is defined by the C standard, the second by
368the \s-1POSIX\s0 standard. They define the \fBdefault locale\fR in which
369every program starts in the absence of locale information in its
370environment. (The \fIdefault\fR default locale, if you will.) Its language
371is (American) English and its character codeset \s-1ASCII\s0.
372.PP
373\&\fB\s-1NOTE\s0\fR: Not all systems have the \*(L"\s-1POSIX\s0\*(R" locale (not all systems are
374POSIX\-conformant), so use \*(L"C\*(R" when you need explicitly to specify this
375default locale.
376.Sh "\s-1LOCALE\s0 \s-1PROBLEMS\s0"
377.IX Subsection "LOCALE PROBLEMS"
378You may encounter the following warning message at Perl startup:
379.PP
380.Vb 6
381\& perl: warning: Setting locale failed.
382\& perl: warning: Please check that your locale settings:
383\& LC_ALL = "En_US",
384\& LANG = (unset)
385\& are supported and installed on your system.
386\& perl: warning: Falling back to the standard locale ("C").
387.Ve
388.PP
389This means that your locale settings had \s-1LC_ALL\s0 set to \*(L"En_US\*(R" and
390\&\s-1LANG\s0 exists but has no value. Perl tried to believe you but could not.
391Instead, Perl gave up and fell back to the \*(L"C\*(R" locale, the default locale
392that is supposed to work no matter what. This usually means your locale
393settings were wrong, they mention locales your system has never heard
394of, or the locale installation in your system has problems (for example,
395some system files are broken or missing). There are quick and temporary
396fixes to these problems, as well as more thorough and lasting fixes.
397.Sh "Temporarily fixing locale problems"
398.IX Subsection "Temporarily fixing locale problems"
399The two quickest fixes are either to render Perl silent about any
400locale inconsistencies or to run Perl under the default locale \*(L"C\*(R".
401.PP
402Perl's moaning about locale problems can be silenced by setting the
403environment variable \s-1PERL_BADLANG\s0 to a zero value, for example \*(L"0\*(R".
404This method really just sweeps the problem under the carpet: you tell
405Perl to shut up even when Perl sees that something is wrong. Do not
406be surprised if later something locale-dependent misbehaves.
407.PP
408Perl can be run under the \*(L"C\*(R" locale by setting the environment
409variable \s-1LC_ALL\s0 to \*(L"C\*(R". This method is perhaps a bit more civilized
410than the \s-1PERL_BADLANG\s0 approach, but setting \s-1LC_ALL\s0 (or
411other locale variables) may affect other programs as well, not just
412Perl. In particular, external programs run from within Perl will see
413these changes. If you make the new settings permanent (read on), all
414programs you run see the changes. See \s-1ENVIRONMENT\s0 for
415the full list of relevant environment variables and \*(L"\s-1USING\s0 \s-1LOCALES\s0\*(R"
416for their effects in Perl. Effects in other programs are
417easily deducible. For example, the variable \s-1LC_COLLATE\s0 may well affect
418your \fBsort\fR program (or whatever the program that arranges \*(L"records\*(R"
419alphabetically in your system is called).
420.PP
421You can test out changing these variables temporarily, and if the
422new settings seem to help, put those settings into your shell startup
423files. Consult your local documentation for the exact details. For in
424Bourne-like shells (\fBsh\fR, \fBksh\fR, \fBbash\fR, \fBzsh\fR):
425.PP
426.Vb 2
427\& LC_ALL=en_US.ISO8859-1
428\& export LC_ALL
429.Ve
430.PP
431This assumes that we saw the locale \*(L"en_US.ISO8859\-1\*(R" using the commands
432discussed above. We decided to try that instead of the above faulty
433locale \*(L"En_US\*(R"\-\-and in Cshish shells (\fBcsh\fR, \fBtcsh\fR)
434.PP
435.Vb 1
436\& setenv LC_ALL en_US.ISO8859-1
437.Ve
438.PP
439or if you have the \*(L"env\*(R" application you can do in any shell
440.PP
441.Vb 1
442\& env LC_ALL=en_US.ISO8859-1 perl ...
443.Ve
444.PP
445If you do not know what shell you have, consult your local
446helpdesk or the equivalent.
447.Sh "Permanently fixing locale problems"
448.IX Subsection "Permanently fixing locale problems"
449The slower but superior fixes are when you may be able to yourself
450fix the misconfiguration of your own environment variables. The
451mis(sing)configuration of the whole system's locales usually requires
452the help of your friendly system administrator.
453.PP
454First, see earlier in this document about \*(L"Finding locales\*(R". That tells
455how to find which locales are really supported\*(--and more importantly,
456installed\*(--on your system. In our example error message, environment
457variables affecting the locale are listed in the order of decreasing
458importance (and unset variables do not matter). Therefore, having
459\&\s-1LC_ALL\s0 set to \*(L"En_US\*(R" must have been the bad choice, as shown by the
460error message. First try fixing locale settings listed first.
461.PP
462Second, if using the listed commands you see something \fBexactly\fR
463(prefix matches do not count and case usually counts) like \*(L"En_US\*(R"
464without the quotes, then you should be okay because you are using a
465locale name that should be installed and available in your system.
466In this case, see \*(L"Permanently fixing your system's locale configuration\*(R".
467.Sh "Permanently fixing your system's locale configuration"
468.IX Subsection "Permanently fixing your system's locale configuration"
469This is when you see something like:
470.PP
471.Vb 4
472\& perl: warning: Please check that your locale settings:
473\& LC_ALL = "En_US",
474\& LANG = (unset)
475\& are supported and installed on your system.
476.Ve
477.PP
478but then cannot see that \*(L"En_US\*(R" listed by the above-mentioned
479commands. You may see things like \*(L"en_US.ISO8859\-1\*(R", but that isn't
480the same. In this case, try running under a locale
481that you can list and which somehow matches what you tried. The
482rules for matching locale names are a bit vague because
483standardization is weak in this area. See again the
484\&\*(L"Finding locales\*(R" about general rules.
485.Sh "Fixing system locale configuration"
486.IX Subsection "Fixing system locale configuration"
487Contact a system administrator (preferably your own) and report the exact
488error message you get, and ask them to read this same documentation you
489are now reading. They should be able to check whether there is something
490wrong with the locale configuration of the system. The \*(L"Finding locales\*(R"
491section is unfortunately a bit vague about the exact commands and places
492because these things are not that standardized.
493.Sh "The localeconv function"
494.IX Subsection "The localeconv function"
495The \fIPOSIX::localeconv()\fR function allows you to get particulars of the
496locale-dependent numeric formatting information specified by the current
497\&\f(CW\*(C`LC_NUMERIC\*(C'\fR and \f(CW\*(C`LC_MONETARY\*(C'\fR locales. (If you just want the name of
498the current locale for a particular category, use \fIPOSIX::setlocale()\fR
499with a single parameter\*(--see \*(L"The setlocale function\*(R".)
500.PP
501.Vb 1
502\& use POSIX qw(locale_h);
503.Ve
504.PP
505.Vb 2
506\& # Get a reference to a hash of locale-dependent info
507\& $locale_values = localeconv();
508.Ve
509.PP
510.Vb 4
511\& # Output sorted list of the values
512\& for (sort keys %$locale_values) {
513\& printf "%-20s = %s\en", $_, $locale_values->{$_}
514\& }
515.Ve
516.PP
517\&\fIlocaleconv()\fR takes no arguments, and returns \fBa reference to\fR a hash.
518The keys of this hash are variable names for formatting, such as
519\&\f(CW\*(C`decimal_point\*(C'\fR and \f(CW\*(C`thousands_sep\*(C'\fR. The values are the
520corresponding, er, values. See \*(L"localeconv\*(R" in \s-1POSIX\s0 for a longer
521example listing the categories an implementation might be expected to
522provide; some provide more and others fewer. You don't need an
523explicit \f(CW\*(C`use locale\*(C'\fR, because \fIlocaleconv()\fR always observes the
524current locale.
525.PP
526Here's a simple-minded example program that rewrites its command-line
527parameters as integers correctly formatted in the current locale:
528.PP
529.Vb 3
530\& # See comments in previous example
531\& require 5.004;
532\& use POSIX qw(locale_h);
533.Ve
534.PP
535.Vb 3
536\& # Get some of locale's numeric formatting parameters
537\& my ($thousands_sep, $grouping) =
538\& @{localeconv()}{'thousands_sep', 'grouping'};
539.Ve
540.PP
541.Vb 2
542\& # Apply defaults if values are missing
543\& $thousands_sep = ',' unless $thousands_sep;
544.Ve
545.PP
546.Vb 16
547\& # grouping and mon_grouping are packed lists
548\& # of small integers (characters) telling the
549\& # grouping (thousand_seps and mon_thousand_seps
550\& # being the group dividers) of numbers and
551\& # monetary quantities. The integers' meanings:
552\& # 255 means no more grouping, 0 means repeat
553\& # the previous grouping, 1-254 means use that
554\& # as the current grouping. Grouping goes from
555\& # right to left (low to high digits). In the
556\& # below we cheat slightly by never using anything
557\& # else than the first grouping (whatever that is).
558\& if ($grouping) {
559\& @grouping = unpack("C*", $grouping);
560\& } else {
561\& @grouping = (3);
562\& }
563.Ve
564.PP
565.Vb 8
566\& # Format command line params for current locale
567\& for (@ARGV) {
568\& $_ = int; # Chop non-integer part
569\& 1 while
570\& s/(\ed)(\ed{$grouping[0]}($|$thousands_sep))/$1$thousands_sep$2/;
571\& print "$_";
572\& }
573\& print "\en";
574.Ve
575.Sh "I18N::Langinfo"
576.IX Subsection "I18N::Langinfo"
577Another interface for querying locale-dependent information is the
578\&\fII18N::Langinfo::langinfo()\fR function, available at least in UNIX-like
579systems and \s-1VMS\s0.
580.PP
581The following example will import the \fIlanginfo()\fR function itself and
582three constants to be used as arguments to \fIlanginfo()\fR: a constant for
583the abbreviated first day of the week (the numbering starts from
584Sunday = 1) and two more constants for the affirmative and negative
585answers for a yes/no question in the current locale.
586.PP
587.Vb 1
588\& use I18N::Langinfo qw(langinfo ABDAY_1 YESSTR NOSTR);
589.Ve
590.PP
591.Vb 1
592\& my ($abday_1, $yesstr, $nostr) = map { langinfo } qw(ABDAY_1 YESSTR NOSTR);
593.Ve
594.PP
595.Vb 1
596\& print "$abday_1? [$yesstr/$nostr] ";
597.Ve
598.PP
599In other words, in the \*(L"C\*(R" (or English) locale the above will probably
600print something like:
601.PP
602.Vb 1
603\& Sun? [yes/no]
604.Ve
605.PP
606See I18N::Langinfo for more information.
607.SH "LOCALE CATEGORIES"
608.IX Header "LOCALE CATEGORIES"
609The following subsections describe basic locale categories. Beyond these,
610some combination categories allow manipulation of more than one
611basic category at a time. See \*(L"\s-1ENVIRONMENT\s0\*(R" for a discussion of these.
612.Sh "Category \s-1LC_COLLATE:\s0 Collation"
613.IX Subsection "Category LC_COLLATE: Collation"
614In the scope of \f(CW\*(C`use\ locale\*(C'\fR, Perl looks to the \f(CW\*(C`LC_COLLATE\*(C'\fR
615environment variable to determine the application's notions on collation
616(ordering) of characters. For example, 'b' follows 'a' in Latin
617alphabets, but where do 'a\*'' and 'a\*o' belong? And while
618\&'color' follows 'chocolate' in English, what about in Spanish?
619.PP
620The following collations all make sense and you may meet any of them
621if you \*(L"use locale\*(R".
622.PP
623.Vb 4
624\& A B C D E a b c d e
625\& A a B b C c D d E e
626\& a A b B c C d D e E
627\& a b c d e A B C D E
628.Ve
629.PP
630Here is a code snippet to tell what \*(L"word\*(R"
631characters are in the current locale, in that locale's order:
632.PP
633.Vb 2
634\& use locale;
635\& print +(sort grep /\ew/, map { chr } 0..255), "\en";
636.Ve
637.PP
638Compare this with the characters that you see and their order if you
639state explicitly that the locale should be ignored:
640.PP
641.Vb 2
642\& no locale;
643\& print +(sort grep /\ew/, map { chr } 0..255), "\en";
644.Ve
645.PP
646This machine-native collation (which is what you get unless \f(CW\*(C`use\ locale\*(C'\fR has appeared earlier in the same block) must be used for
647sorting raw binary data, whereas the locale-dependent collation of the
648first example is useful for natural text.
649.PP
650As noted in \*(L"\s-1USING\s0 \s-1LOCALES\s0\*(R", \f(CW\*(C`cmp\*(C'\fR compares according to the current
651collation locale when \f(CW\*(C`use locale\*(C'\fR is in effect, but falls back to a
652char-by-char comparison for strings that the locale says are equal. You
653can use \fIPOSIX::strcoll()\fR if you don't want this fall\-back:
654.PP
655.Vb 3
656\& use POSIX qw(strcoll);
657\& $equal_in_locale =
658\& !strcoll("space and case ignored", "SpaceAndCaseIgnored");
659.Ve
660.PP
661$equal_in_locale will be true if the collation locale specifies a
662dictionary-like ordering that ignores space characters completely and
663which folds case.
664.PP
665If you have a single string that you want to check for \*(L"equality in
666locale\*(R" against several others, you might think you could gain a little
667efficiency by using \fIPOSIX::strxfrm()\fR in conjunction with \f(CW\*(C`eq\*(C'\fR:
668.PP
669.Vb 8
670\& use POSIX qw(strxfrm);
671\& $xfrm_string = strxfrm("Mixed-case string");
672\& print "locale collation ignores spaces\en"
673\& if $xfrm_string eq strxfrm("Mixed-casestring");
674\& print "locale collation ignores hyphens\en"
675\& if $xfrm_string eq strxfrm("Mixedcase string");
676\& print "locale collation ignores case\en"
677\& if $xfrm_string eq strxfrm("mixed-case string");
678.Ve
679.PP
680\&\fIstrxfrm()\fR takes a string and maps it into a transformed string for use
681in char-by-char comparisons against other transformed strings during
682collation. \*(L"Under the hood\*(R", locale-affected Perl comparison operators
683call \fIstrxfrm()\fR for both operands, then do a char-by-char
684comparison of the transformed strings. By calling \fIstrxfrm()\fR explicitly
685and using a non locale-affected comparison, the example attempts to save
686a couple of transformations. But in fact, it doesn't save anything: Perl
687magic (see \*(L"Magic Variables\*(R" in perlguts) creates the transformed version of a
688string the first time it's needed in a comparison, then keeps this version around
689in case it's needed again. An example rewritten the easy way with
690\&\f(CW\*(C`cmp\*(C'\fR runs just about as fast. It also copes with null characters
691embedded in strings; if you call \fIstrxfrm()\fR directly, it treats the first
692null it finds as a terminator. don't expect the transformed strings
693it produces to be portable across systems\*(--or even from one revision
694of your operating system to the next. In short, don't call \fIstrxfrm()\fR
695directly: let Perl do it for you.
696.PP
697Note: \f(CW\*(C`use locale\*(C'\fR isn't shown in some of these examples because it isn't
698needed: \fIstrcoll()\fR and \fIstrxfrm()\fR exist only to generate locale-dependent
699results, and so always obey the current \f(CW\*(C`LC_COLLATE\*(C'\fR locale.
700.Sh "Category \s-1LC_CTYPE:\s0 Character Types"
701.IX Subsection "Category LC_CTYPE: Character Types"
702In the scope of \f(CW\*(C`use\ locale\*(C'\fR, Perl obeys the \f(CW\*(C`LC_CTYPE\*(C'\fR locale
703setting. This controls the application's notion of which characters are
704alphabetic. This affects Perl's \f(CW\*(C`\ew\*(C'\fR regular expression metanotation,
705which stands for alphanumeric characters\*(--that is, alphabetic,
706numeric, and including other special characters such as the underscore or
707hyphen. (Consult perlre for more information about
708regular expressions.) Thanks to \f(CW\*(C`LC_CTYPE\*(C'\fR, depending on your locale
709setting, characters like '\*(ae', '\*(d-', '\*8', and
710\&'o\*/' may be understood as \f(CW\*(C`\ew\*(C'\fR characters.
711.PP
712The \f(CW\*(C`LC_CTYPE\*(C'\fR locale also provides the map used in transliterating
713characters between lower and uppercase. This affects the case-mapping
714functions\*(--\fIlc()\fR, lcfirst, \fIuc()\fR, and \fIucfirst()\fR; case-mapping
715interpolation with \f(CW\*(C`\el\*(C'\fR, \f(CW\*(C`\eL\*(C'\fR, \f(CW\*(C`\eu\*(C'\fR, or \f(CW\*(C`\eU\*(C'\fR in double-quoted strings
716and \f(CW\*(C`s///\*(C'\fR substitutions; and case-independent regular expression
717pattern matching using the \f(CW\*(C`i\*(C'\fR modifier.
718.PP
719Finally, \f(CW\*(C`LC_CTYPE\*(C'\fR affects the \s-1POSIX\s0 character-class test
720functions\*(--\fIisalpha()\fR, \fIislower()\fR, and so on. For example, if you move
721from the \*(L"C\*(R" locale to a 7\-bit Scandinavian one, you may find\*(--possibly
722to your surprise\*(--that \*(L"|\*(R" moves from the \fIispunct()\fR class to \fIisalpha()\fR.
723.PP
724\&\fBNote:\fR A broken or malicious \f(CW\*(C`LC_CTYPE\*(C'\fR locale definition may result
725in clearly ineligible characters being considered to be alphanumeric by
726your application. For strict matching of (mundane) letters and
727digits\*(--for example, in command strings\*(--locale\-aware applications
728should use \f(CW\*(C`\ew\*(C'\fR inside a \f(CW\*(C`no locale\*(C'\fR block. See \*(L"\s-1SECURITY\s0\*(R".
729.Sh "Category \s-1LC_NUMERIC:\s0 Numeric Formatting"
730.IX Subsection "Category LC_NUMERIC: Numeric Formatting"
731In the scope of \f(CW\*(C`use\ locale\*(C'\fR, Perl obeys the \f(CW\*(C`LC_NUMERIC\*(C'\fR locale
732information, which controls an application's idea of how numbers should
733be formatted for human readability by the \fIprintf()\fR, \fIsprintf()\fR, and
734\&\fIwrite()\fR functions. String-to-numeric conversion by the \fIPOSIX::strtod()\fR
735function is also affected. In most implementations the only effect is to
736change the character used for the decimal point\*(--perhaps from '.' to ','.
737These functions aren't aware of such niceties as thousands separation and
738so on. (See \*(L"The localeconv function\*(R" if you care about these things.)
739.PP
740Output produced by \fIprint()\fR is also affected by the current locale: it
741depends on whether \f(CW\*(C`use locale\*(C'\fR or \f(CW\*(C`no locale\*(C'\fR is in effect, and
742corresponds to what you'd get from \fIprintf()\fR in the \*(L"C\*(R" locale. The
743same is true for Perl's internal conversions between numeric and
744string formats:
745.PP
746.Vb 2
747\& use POSIX qw(strtod);
748\& use locale;
749.Ve
750.PP
751.Vb 1
752\& $n = 5/2; # Assign numeric 2.5 to $n
753.Ve
754.PP
755.Vb 1
756\& $a = " $n"; # Locale-dependent conversion to string
757.Ve
758.PP
759.Vb 1
760\& print "half five is $n\en"; # Locale-dependent output
761.Ve
762.PP
763.Vb 1
764\& printf "half five is %g\en", $n; # Locale-dependent output
765.Ve
766.PP
767.Vb 2
768\& print "DECIMAL POINT IS COMMA\en"
769\& if $n == (strtod("2,5"))[0]; # Locale-dependent conversion
770.Ve
771.PP
772See also I18N::Langinfo and \f(CW\*(C`RADIXCHAR\*(C'\fR.
773.Sh "Category \s-1LC_MONETARY:\s0 Formatting of monetary amounts"
774.IX Subsection "Category LC_MONETARY: Formatting of monetary amounts"
775The C standard defines the \f(CW\*(C`LC_MONETARY\*(C'\fR category, but no function
776that is affected by its contents. (Those with experience of standards
777committees will recognize that the working group decided to punt on the
778issue.) Consequently, Perl takes no notice of it. If you really want
779to use \f(CW\*(C`LC_MONETARY\*(C'\fR, you can query its contents\*(--see
780\&\*(L"The localeconv function\*(R"\-\-and use the information that it returns in your
781application's own formatting of currency amounts. However, you may well
782find that the information, voluminous and complex though it may be, still
783does not quite meet your requirements: currency formatting is a hard nut
784to crack.
785.PP
786See also I18N::Langinfo and \f(CW\*(C`CRNCYSTR\*(C'\fR.
787.Sh "\s-1LC_TIME\s0"
788.IX Subsection "LC_TIME"
789Output produced by \fIPOSIX::strftime()\fR, which builds a formatted
790human-readable date/time string, is affected by the current \f(CW\*(C`LC_TIME\*(C'\fR
791locale. Thus, in a French locale, the output produced by the \f(CW%B\fR
792format element (full month name) for the first month of the year would
793be \*(L"janvier\*(R". Here's how to get a list of long month names in the
794current locale:
795.PP
796.Vb 5
797\& use POSIX qw(strftime);
798\& for (0..11) {
799\& $long_month_name[$_] =
800\& strftime("%B", 0, 0, 0, 1, $_, 96);
801\& }
802.Ve
803.PP
804Note: \f(CW\*(C`use locale\*(C'\fR isn't needed in this example: as a function that
805exists only to generate locale-dependent results, \fIstrftime()\fR always
806obeys the current \f(CW\*(C`LC_TIME\*(C'\fR locale.
807.PP
808See also I18N::Langinfo and \f(CW\*(C`ABDAY_1\*(C'\fR..\f(CW\*(C`ABDAY_7\*(C'\fR, \f(CW\*(C`DAY_1\*(C'\fR..\f(CW\*(C`DAY_7\*(C'\fR,
809\&\f(CW\*(C`ABMON_1\*(C'\fR..\f(CW\*(C`ABMON_12\*(C'\fR, and \f(CW\*(C`ABMON_1\*(C'\fR..\f(CW\*(C`ABMON_12\*(C'\fR.
810.Sh "Other categories"
811.IX Subsection "Other categories"
812The remaining locale category, \f(CW\*(C`LC_MESSAGES\*(C'\fR (possibly supplemented
813by others in particular implementations) is not currently used by
814Perl\*(--except possibly to affect the behavior of library functions
815called by extensions outside the standard Perl distribution and by the
816operating system and its utilities. Note especially that the string
817value of \f(CW$!\fR and the error messages given by external utilities may
818be changed by \f(CW\*(C`LC_MESSAGES\*(C'\fR. If you want to have portable error
819codes, use \f(CW\*(C`%!\*(C'\fR. See Errno.
820.SH "SECURITY"
821.IX Header "SECURITY"
822Although the main discussion of Perl security issues can be found in
823perlsec, a discussion of Perl's locale handling would be incomplete
824if it did not draw your attention to locale-dependent security issues.
825Locales\*(--particularly on systems that allow unprivileged users to
826build their own locales\*(--are untrustworthy. A malicious (or just plain
827broken) locale can make a locale-aware application give unexpected
828results. Here are a few possibilities:
829.IP "\(bu" 4
830Regular expression checks for safe file names or mail addresses using
831\&\f(CW\*(C`\ew\*(C'\fR may be spoofed by an \f(CW\*(C`LC_CTYPE\*(C'\fR locale that claims that
832characters such as ">\*(L" and \*(R"|" are alphanumeric.
833.IP "\(bu" 4
834String interpolation with case\-mapping, as in, say, \f(CW\*(C`$dest =
835"C:\eU$name.$ext"\*(C'\fR, may produce dangerous results if a bogus \s-1LC_CTYPE\s0
836case-mapping table is in effect.
837.IP "\(bu" 4
838A sneaky \f(CW\*(C`LC_COLLATE\*(C'\fR locale could result in the names of students with
839\&\*(L"D\*(R" grades appearing ahead of those with \*(L"A\*(R"s.
840.IP "\(bu" 4
841An application that takes the trouble to use information in
842\&\f(CW\*(C`LC_MONETARY\*(C'\fR may format debits as if they were credits and vice versa
843if that locale has been subverted. Or it might make payments in \s-1US\s0
844dollars instead of Hong Kong dollars.
845.IP "\(bu" 4
846The date and day names in dates formatted by \fIstrftime()\fR could be
847manipulated to advantage by a malicious user able to subvert the
848\&\f(CW\*(C`LC_DATE\*(C'\fR locale. (\*(L"Look\*(--it says I wasn't in the building on
849Sunday.\*(R")
850.PP
851Such dangers are not peculiar to the locale system: any aspect of an
852application's environment which may be modified maliciously presents
853similar challenges. Similarly, they are not specific to Perl: any
854programming language that allows you to write programs that take
855account of their environment exposes you to these issues.
856.PP
857Perl cannot protect you from all possibilities shown in the
858examples\*(--there is no substitute for your own vigilance\*(--but, when
859\&\f(CW\*(C`use locale\*(C'\fR is in effect, Perl uses the tainting mechanism (see
860perlsec) to mark string results that become locale\-dependent, and
861which may be untrustworthy in consequence. Here is a summary of the
862tainting behavior of operators and functions that may be affected by
863the locale:
864.IP "\(bu" 4
865\&\fBComparison operators\fR (\f(CW\*(C`lt\*(C'\fR, \f(CW\*(C`le\*(C'\fR, \f(CW\*(C`ge\*(C'\fR, \f(CW\*(C`gt\*(C'\fR and \f(CW\*(C`cmp\*(C'\fR):
866.Sp
867Scalar true/false (or less/equal/greater) result is never tainted.
868.IP "\(bu" 4
869\&\fBCase-mapping interpolation\fR (with \f(CW\*(C`\el\*(C'\fR, \f(CW\*(C`\eL\*(C'\fR, \f(CW\*(C`\eu\*(C'\fR or \f(CW\*(C`\eU\*(C'\fR)
870.Sp
871Result string containing interpolated material is tainted if
872\&\f(CW\*(C`use locale\*(C'\fR is in effect.
873.IP "\(bu" 4
874\&\fBMatching operator\fR (\f(CW\*(C`m//\*(C'\fR):
875.Sp
876Scalar true/false result never tainted.
877.Sp
878Subpatterns, either delivered as a list-context result or as \f(CW$1\fR etc.
879are tainted if \f(CW\*(C`use locale\*(C'\fR is in effect, and the subpattern regular
880expression contains \f(CW\*(C`\ew\*(C'\fR (to match an alphanumeric character), \f(CW\*(C`\eW\*(C'\fR
881(non\-alphanumeric character), \f(CW\*(C`\es\*(C'\fR (whitespace character), or \f(CW\*(C`\eS\*(C'\fR
882(non whitespace character). The matched-pattern variable, $&, $`
883(pre\-match), $' (post\-match), and $+ (last match) are also tainted if
884\&\f(CW\*(C`use locale\*(C'\fR is in effect and the regular expression contains \f(CW\*(C`\ew\*(C'\fR,
885\&\f(CW\*(C`\eW\*(C'\fR, \f(CW\*(C`\es\*(C'\fR, or \f(CW\*(C`\eS\*(C'\fR.
886.IP "\(bu" 4
887\&\fBSubstitution operator\fR (\f(CW\*(C`s///\*(C'\fR):
888.Sp
889Has the same behavior as the match operator. Also, the left
890operand of \f(CW\*(C`=~\*(C'\fR becomes tainted when \f(CW\*(C`use locale\*(C'\fR in effect
891if modified as a result of a substitution based on a regular
892expression match involving \f(CW\*(C`\ew\*(C'\fR, \f(CW\*(C`\eW\*(C'\fR, \f(CW\*(C`\es\*(C'\fR, or \f(CW\*(C`\eS\*(C'\fR; or of
893case-mapping with \f(CW\*(C`\el\*(C'\fR, \f(CW\*(C`\eL\*(C'\fR,\f(CW\*(C`\eu\*(C'\fR or \f(CW\*(C`\eU\*(C'\fR.
894.IP "\(bu" 4
895\&\fBOutput formatting functions\fR (\fIprintf()\fR and \fIwrite()\fR):
896.Sp
897Results are never tainted because otherwise even output from print,
898for example \f(CW\*(C`print(1/7)\*(C'\fR, should be tainted if \f(CW\*(C`use locale\*(C'\fR is in
899effect.
900.IP "\(bu" 4
901\&\fBCase-mapping functions\fR (\fIlc()\fR, \fIlcfirst()\fR, \fIuc()\fR, \fIucfirst()\fR):
902.Sp
903Results are tainted if \f(CW\*(C`use locale\*(C'\fR is in effect.
904.IP "\(bu" 4
905\&\fB\s-1POSIX\s0 locale-dependent functions\fR (\fIlocaleconv()\fR, \fIstrcoll()\fR,
906\&\fIstrftime()\fR, \fIstrxfrm()\fR):
907.Sp
908Results are never tainted.
909.IP "\(bu" 4
910\&\fB\s-1POSIX\s0 character class tests\fR (\fIisalnum()\fR, \fIisalpha()\fR, \fIisdigit()\fR,
911\&\fIisgraph()\fR, \fIislower()\fR, \fIisprint()\fR, \fIispunct()\fR, \fIisspace()\fR, \fIisupper()\fR,
912\&\fIisxdigit()\fR):
913.Sp
914True/false results are never tainted.
915.PP
916Three examples illustrate locale-dependent tainting.
917The first program, which ignores its locale, won't run: a value taken
918directly from the command line may not be used to name an output file
919when taint checks are enabled.
920.PP
921.Vb 2
922\& #/usr/local/bin/perl -T
923\& # Run with taint checking
924.Ve
925.PP
926.Vb 2
927\& # Command line sanity check omitted...
928\& $tainted_output_file = shift;
929.Ve
930.PP
931.Vb 2
932\& open(F, ">$tainted_output_file")
933\& or warn "Open of $untainted_output_file failed: $!\en";
934.Ve
935.PP
936The program can be made to run by \*(L"laundering\*(R" the tainted value through
937a regular expression: the second example\*(--which still ignores locale
938information\*(--runs, creating the file named on its command line
939if it can.
940.PP
941.Vb 1
942\& #/usr/local/bin/perl -T
943.Ve
944.PP
945.Vb 3
946\& $tainted_output_file = shift;
947\& $tainted_output_file =~ m%[\ew/]+%;
948\& $untainted_output_file = $&;
949.Ve
950.PP
951.Vb 2
952\& open(F, ">$untainted_output_file")
953\& or warn "Open of $untainted_output_file failed: $!\en";
954.Ve
955.PP
956Compare this with a similar but locale-aware program:
957.PP
958.Vb 1
959\& #/usr/local/bin/perl -T
960.Ve
961.PP
962.Vb 4
963\& $tainted_output_file = shift;
964\& use locale;
965\& $tainted_output_file =~ m%[\ew/]+%;
966\& $localized_output_file = $&;
967.Ve
968.PP
969.Vb 2
970\& open(F, ">$localized_output_file")
971\& or warn "Open of $localized_output_file failed: $!\en";
972.Ve
973.PP
974This third program fails to run because $& is tainted: it is the result
975of a match involving \f(CW\*(C`\ew\*(C'\fR while \f(CW\*(C`use locale\*(C'\fR is in effect.
976.SH "ENVIRONMENT"
977.IX Header "ENVIRONMENT"
978.IP "\s-1PERL_BADLANG\s0" 12
979.IX Item "PERL_BADLANG"
980A string that can suppress Perl's warning about failed locale settings
981at startup. Failure can occur if the locale support in the operating
982system is lacking (broken) in some way\*(--or if you mistyped the name of
983a locale when you set up your environment. If this environment
984variable is absent, or has a value that does not evaluate to integer
985zero\*(--that is, \*(L"0\*(R" or ""\-\- Perl will complain about locale setting
986failures.
987.Sp
988\&\fB\s-1NOTE\s0\fR: \s-1PERL_BADLANG\s0 only gives you a way to hide the warning message.
989The message tells about some problem in your system's locale support,
990and you should investigate what the problem is.
991.PP
992The following environment variables are not specific to Perl: They are
993part of the standardized (\s-1ISO\s0 C, \s-1XPG4\s0, \s-1POSIX\s0 1.c) \fIsetlocale()\fR method
994for controlling an application's opinion on data.
995.IP "\s-1LC_ALL\s0" 12
996.IX Item "LC_ALL"
997\&\f(CW\*(C`LC_ALL\*(C'\fR is the \*(L"override\-all\*(R" locale environment variable. If
998set, it overrides all the rest of the locale environment variables.
999.IP "\s-1LANGUAGE\s0" 12
1000.IX Item "LANGUAGE"
1001\&\fB\s-1NOTE\s0\fR: \f(CW\*(C`LANGUAGE\*(C'\fR is a \s-1GNU\s0 extension, it affects you only if you
1002are using the \s-1GNU\s0 libc. This is the case if you are using e.g. Linux.
1003If you are using \*(L"commercial\*(R" UNIXes you are most probably \fInot\fR
1004using \s-1GNU\s0 libc and you can ignore \f(CW\*(C`LANGUAGE\*(C'\fR.
1005.Sp
1006However, in the case you are using \f(CW\*(C`LANGUAGE\*(C'\fR: it affects the
1007language of informational, warning, and error messages output by
1008commands (in other words, it's like \f(CW\*(C`LC_MESSAGES\*(C'\fR) but it has higher
1009priority than \s-1LC_ALL\s0. Moreover, it's not a single value but
1010instead a \*(L"path\*(R" (\*(L":\*(R"\-separated list) of \fIlanguages\fR (not locales).
1011See the \s-1GNU\s0 \f(CW\*(C`gettext\*(C'\fR library documentation for more information.
1012.IP "\s-1LC_CTYPE\s0" 12
1013.IX Item "LC_CTYPE"
1014In the absence of \f(CW\*(C`LC_ALL\*(C'\fR, \f(CW\*(C`LC_CTYPE\*(C'\fR chooses the character type
1015locale. In the absence of both \f(CW\*(C`LC_ALL\*(C'\fR and \f(CW\*(C`LC_CTYPE\*(C'\fR, \f(CW\*(C`LANG\*(C'\fR
1016chooses the character type locale.
1017.IP "\s-1LC_COLLATE\s0" 12
1018.IX Item "LC_COLLATE"
1019In the absence of \f(CW\*(C`LC_ALL\*(C'\fR, \f(CW\*(C`LC_COLLATE\*(C'\fR chooses the collation
1020(sorting) locale. In the absence of both \f(CW\*(C`LC_ALL\*(C'\fR and \f(CW\*(C`LC_COLLATE\*(C'\fR,
1021\&\f(CW\*(C`LANG\*(C'\fR chooses the collation locale.
1022.IP "\s-1LC_MONETARY\s0" 12
1023.IX Item "LC_MONETARY"
1024In the absence of \f(CW\*(C`LC_ALL\*(C'\fR, \f(CW\*(C`LC_MONETARY\*(C'\fR chooses the monetary
1025formatting locale. In the absence of both \f(CW\*(C`LC_ALL\*(C'\fR and \f(CW\*(C`LC_MONETARY\*(C'\fR,
1026\&\f(CW\*(C`LANG\*(C'\fR chooses the monetary formatting locale.
1027.IP "\s-1LC_NUMERIC\s0" 12
1028.IX Item "LC_NUMERIC"
1029In the absence of \f(CW\*(C`LC_ALL\*(C'\fR, \f(CW\*(C`LC_NUMERIC\*(C'\fR chooses the numeric format
1030locale. In the absence of both \f(CW\*(C`LC_ALL\*(C'\fR and \f(CW\*(C`LC_NUMERIC\*(C'\fR, \f(CW\*(C`LANG\*(C'\fR
1031chooses the numeric format.
1032.IP "\s-1LC_TIME\s0" 12
1033.IX Item "LC_TIME"
1034In the absence of \f(CW\*(C`LC_ALL\*(C'\fR, \f(CW\*(C`LC_TIME\*(C'\fR chooses the date and time
1035formatting locale. In the absence of both \f(CW\*(C`LC_ALL\*(C'\fR and \f(CW\*(C`LC_TIME\*(C'\fR,
1036\&\f(CW\*(C`LANG\*(C'\fR chooses the date and time formatting locale.
1037.IP "\s-1LANG\s0" 12
1038.IX Item "LANG"
1039\&\f(CW\*(C`LANG\*(C'\fR is the \*(L"catch\-all\*(R" locale environment variable. If it is set, it
1040is used as the last resort after the overall \f(CW\*(C`LC_ALL\*(C'\fR and the
1041category-specific \f(CW\*(C`LC_...\*(C'\fR.
1042.SH "NOTES"
1043.IX Header "NOTES"
1044.Sh "Backward compatibility"
1045.IX Subsection "Backward compatibility"
1046Versions of Perl prior to 5.004 \fBmostly\fR ignored locale information,
1047generally behaving as if something similar to the \f(CW"C"\fR locale were
1048always in force, even if the program environment suggested otherwise
1049(see \*(L"The setlocale function\*(R"). By default, Perl still behaves this
1050way for backward compatibility. If you want a Perl application to pay
1051attention to locale information, you \fBmust\fR use the \f(CW\*(C`use\ locale\*(C'\fR
1052pragma (see \*(L"The use locale pragma\*(R") to instruct it to do so.
1053.PP
1054Versions of Perl from 5.002 to 5.003 did use the \f(CW\*(C`LC_CTYPE\*(C'\fR
1055information if available; that is, \f(CW\*(C`\ew\*(C'\fR did understand what
1056were the letters according to the locale environment variables.
1057The problem was that the user had no control over the feature:
1058if the C library supported locales, Perl used them.
1059.Sh "I18N:Collate obsolete"
1060.IX Subsection "I18N:Collate obsolete"
1061In versions of Perl prior to 5.004, per-locale collation was possible
1062using the \f(CW\*(C`I18N::Collate\*(C'\fR library module. This module is now mildly
1063obsolete and should be avoided in new applications. The \f(CW\*(C`LC_COLLATE\*(C'\fR
1064functionality is now integrated into the Perl core language: One can
1065use locale-specific scalar data completely normally with \f(CW\*(C`use locale\*(C'\fR,
1066so there is no longer any need to juggle with the scalar references of
1067\&\f(CW\*(C`I18N::Collate\*(C'\fR.
1068.Sh "Sort speed and memory use impacts"
1069.IX Subsection "Sort speed and memory use impacts"
1070Comparing and sorting by locale is usually slower than the default
1071sorting; slow-downs of two to four times have been observed. It will
1072also consume more memory: once a Perl scalar variable has participated
1073in any string comparison or sorting operation obeying the locale
1074collation rules, it will take 3\-15 times more memory than before. (The
1075exact multiplier depends on the string's contents, the operating system
1076and the locale.) These downsides are dictated more by the operating
1077system's implementation of the locale system than by Perl.
1078.Sh "\fIwrite()\fP and \s-1LC_NUMERIC\s0"
1079.IX Subsection "write() and LC_NUMERIC"
1080Formats are the only part of Perl that unconditionally use information
1081from a program's locale; if a program's environment specifies an
1082\&\s-1LC_NUMERIC\s0 locale, it is always used to specify the decimal point
1083character in formatted output. Formatted output cannot be controlled by
1084\&\f(CW\*(C`use locale\*(C'\fR because the pragma is tied to the block structure of the
1085program, and, for historical reasons, formats exist outside that block
1086structure.
1087.Sh "Freely available locale definitions"
1088.IX Subsection "Freely available locale definitions"
1089There is a large collection of locale definitions at
1090ftp://dkuug.dk/i18n/WG15\-collection . You should be aware that it is
1091unsupported, and is not claimed to be fit for any purpose. If your
1092system allows installation of arbitrary locales, you may find the
1093definitions useful as they are, or as a basis for the development of
1094your own locales.
1095.Sh "I18n and l10n"
1096.IX Subsection "I18n and l10n"
1097\&\*(L"Internationalization\*(R" is often abbreviated as \fBi18n\fR because its first
1098and last letters are separated by eighteen others. (You may guess why
1099the internalin ... internaliti ... i18n tends to get abbreviated.) In
1100the same way, \*(L"localization\*(R" is often abbreviated to \fBl10n\fR.
1101.Sh "An imperfect standard"
1102.IX Subsection "An imperfect standard"
1103Internationalization, as defined in the C and \s-1POSIX\s0 standards, can be
1104criticized as incomplete, ungainly, and having too large a granularity.
1105(Locales apply to a whole process, when it would arguably be more useful
1106to have them apply to a single thread, window group, or whatever.) They
1107also have a tendency, like standards groups, to divide the world into
1108nations, when we all know that the world can equally well be divided
1109into bankers, bikers, gamers, and so on. But, for now, it's the only
1110standard we've got. This may be construed as a bug.
1111.SH "Unicode and UTF\-8"
1112.IX Header "Unicode and UTF-8"
1113The support of Unicode is new starting from Perl version 5.6, and
1114more fully implemented in the version 5.8. See perluniintro and
1115perlunicode for more details.
1116.PP
1117Usually locale settings and Unicode do not affect each other, but
1118there are exceptions, see \*(L"Locales\*(R" in perlunicode for examples.
1119.SH "BUGS"
1120.IX Header "BUGS"
1121.Sh "Broken systems"
1122.IX Subsection "Broken systems"
1123In certain systems, the operating system's locale support
1124is broken and cannot be fixed or used by Perl. Such deficiencies can
1125and will result in mysterious hangs and/or Perl core dumps when the
1126\&\f(CW\*(C`use locale\*(C'\fR is in effect. When confronted with such a system,
1127please report in excruciating detail to <\fIperlbug@perl.org\fR>, and
1128complain to your vendor: bug fixes may exist for these problems
1129in your operating system. Sometimes such bug fixes are called an
1130operating system upgrade.
1131.SH "SEE ALSO"
1132.IX Header "SEE ALSO"
1133I18N::Langinfo, perluniintro, perlunicode, open,
1134\&\*(L"isalnum\*(R" in \s-1POSIX\s0, \*(L"isalpha\*(R" in \s-1POSIX\s0,
1135\&\*(L"isdigit\*(R" in \s-1POSIX\s0, \*(L"isgraph\*(R" in \s-1POSIX\s0, \*(L"islower\*(R" in \s-1POSIX\s0,
1136\&\*(L"isprint\*(R" in \s-1POSIX\s0, \*(L"ispunct\*(R" in \s-1POSIX\s0, \*(L"isspace\*(R" in \s-1POSIX\s0,
1137\&\*(L"isupper\*(R" in \s-1POSIX\s0, \*(L"isxdigit\*(R" in \s-1POSIX\s0, \*(L"localeconv\*(R" in \s-1POSIX\s0,
1138\&\*(L"setlocale\*(R" in \s-1POSIX\s0, \*(L"strcoll\*(R" in \s-1POSIX\s0, \*(L"strftime\*(R" in \s-1POSIX\s0,
1139\&\*(L"strtod\*(R" in \s-1POSIX\s0, \*(L"strxfrm\*(R" in \s-1POSIX\s0.
1140.SH "HISTORY"
1141.IX Header "HISTORY"
1142Jarkko Hietaniemi's original \fIperli18n.pod\fR heavily hacked by Dominic
1143Dunlop, assisted by the perl5\-porters. Prose worked over a bit by
1144Tom Christiansen.
1145.PP
1146Last update: Thu Jun 11 08:44:13 \s-1MDT\s0 1998