Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | .\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "Unicode::UCD 3" | |
132 | .TH Unicode::UCD 3 "2002-06-01" "perl v5.8.0" "Perl Programmers Reference Guide" | |
133 | .SH "NAME" | |
134 | Unicode::UCD \- Unicode character database | |
135 | .SH "SYNOPSIS" | |
136 | .IX Header "SYNOPSIS" | |
137 | .Vb 2 | |
138 | \& use Unicode::UCD 'charinfo'; | |
139 | \& my $charinfo = charinfo($codepoint); | |
140 | .Ve | |
141 | .PP | |
142 | .Vb 2 | |
143 | \& use Unicode::UCD 'charblock'; | |
144 | \& my $charblock = charblock($codepoint); | |
145 | .Ve | |
146 | .PP | |
147 | .Vb 2 | |
148 | \& use Unicode::UCD 'charscript'; | |
149 | \& my $charscript = charblock($codepoint); | |
150 | .Ve | |
151 | .PP | |
152 | .Vb 2 | |
153 | \& use Unicode::UCD 'charblocks'; | |
154 | \& my $charblocks = charblocks(); | |
155 | .Ve | |
156 | .PP | |
157 | .Vb 2 | |
158 | \& use Unicode::UCD 'charscripts'; | |
159 | \& my %charscripts = charscripts(); | |
160 | .Ve | |
161 | .PP | |
162 | .Vb 3 | |
163 | \& use Unicode::UCD qw(charscript charinrange); | |
164 | \& my $range = charscript($script); | |
165 | \& print "looks like $script\en" if charinrange($range, $codepoint); | |
166 | .Ve | |
167 | .PP | |
168 | .Vb 2 | |
169 | \& use Unicode::UCD 'compexcl'; | |
170 | \& my $compexcl = compexcl($codepoint); | |
171 | .Ve | |
172 | .PP | |
173 | .Vb 1 | |
174 | \& my $unicode_version = Unicode::UCD::UnicodeVersion(); | |
175 | .Ve | |
176 | .SH "DESCRIPTION" | |
177 | .IX Header "DESCRIPTION" | |
178 | The Unicode::UCD module offers a simple interface to the Unicode | |
179 | Character Database. | |
180 | .Sh "charinfo" | |
181 | .IX Subsection "charinfo" | |
182 | .Vb 1 | |
183 | \& use Unicode::UCD 'charinfo'; | |
184 | .Ve | |
185 | .PP | |
186 | .Vb 1 | |
187 | \& my $charinfo = charinfo(0x41); | |
188 | .Ve | |
189 | .PP | |
190 | \&\fIcharinfo()\fR returns a reference to a hash that has the following fields | |
191 | as defined by the Unicode standard: | |
192 | .PP | |
193 | .Vb 1 | |
194 | \& key | |
195 | .Ve | |
196 | .PP | |
197 | .Vb 15 | |
198 | \& code code point with at least four hexdigits | |
199 | \& name name of the character IN UPPER CASE | |
200 | \& category general category of the character | |
201 | \& combining classes used in the Canonical Ordering Algorithm | |
202 | \& bidi bidirectional category | |
203 | \& decomposition character decomposition mapping | |
204 | \& decimal if decimal digit this is the integer numeric value | |
205 | \& digit if digit this is the numeric value | |
206 | \& numeric if numeric is the integer or rational numeric value | |
207 | \& mirrored if mirrored in bidirectional text | |
208 | \& unicode10 Unicode 1.0 name if existed and different | |
209 | \& comment ISO 10646 comment field | |
210 | \& upper uppercase equivalent mapping | |
211 | \& lower lowercase equivalent mapping | |
212 | \& title titlecase equivalent mapping | |
213 | .Ve | |
214 | .PP | |
215 | .Vb 2 | |
216 | \& block block the character belongs to (used in \ep{In...}) | |
217 | \& script script the character belongs to | |
218 | .Ve | |
219 | .PP | |
220 | If no match is found, a reference to an empty hash is returned. | |
221 | .PP | |
222 | The \f(CW\*(C`block\*(C'\fR property is the same as returned by \fIcharinfo()\fR. It is | |
223 | not defined in the Unicode Character Database proper (Chapter 4 of the | |
224 | Unicode 3.0 Standard, aka \s-1TUS3\s0) but instead in an auxiliary database | |
225 | (Chapter 14 of \s-1TUS3\s0). Similarly for the \f(CW\*(C`script\*(C'\fR property. | |
226 | .PP | |
227 | Note that you cannot do (de)composition and casing based solely on the | |
228 | above \f(CW\*(C`decomposition\*(C'\fR and \f(CW\*(C`lower\*(C'\fR, \f(CW\*(C`upper\*(C'\fR, \f(CW\*(C`title\*(C'\fR, properties, | |
229 | you will need also the \fIcompexcl()\fR, \fIcasefold()\fR, and \fIcasespec()\fR functions. | |
230 | .Sh "charblock" | |
231 | .IX Subsection "charblock" | |
232 | .Vb 1 | |
233 | \& use Unicode::UCD 'charblock'; | |
234 | .Ve | |
235 | .PP | |
236 | .Vb 4 | |
237 | \& my $charblock = charblock(0x41); | |
238 | \& my $charblock = charblock(1234); | |
239 | \& my $charblock = charblock("0x263a"); | |
240 | \& my $charblock = charblock("U+263a"); | |
241 | .Ve | |
242 | .PP | |
243 | .Vb 1 | |
244 | \& my $range = charblock('Armenian'); | |
245 | .Ve | |
246 | .PP | |
247 | With a \fBcode point argument\fR \fIcharblock()\fR returns the \fIblock\fR the character | |
248 | belongs to, e.g. \f(CW\*(C`Basic Latin\*(C'\fR. Note that not all the character | |
249 | positions within all blocks are defined. | |
250 | .PP | |
251 | See also \*(L"Blocks versus Scripts\*(R". | |
252 | .PP | |
253 | If supplied with an argument that can't be a code point, \fIcharblock()\fR tries | |
254 | to do the opposite and interpret the argument as a character block. The | |
255 | return value is a \fIrange\fR: an anonymous list of lists that contain | |
256 | \&\fIstart-of-range\fR, \fIend-of-range\fR code point pairs. You can test whether a | |
257 | code point is in a range using the \*(L"charinrange\*(R" function. If the | |
258 | argument is not a known charater block, \f(CW\*(C`undef\*(C'\fR is returned. | |
259 | .Sh "charscript" | |
260 | .IX Subsection "charscript" | |
261 | .Vb 1 | |
262 | \& use Unicode::UCD 'charscript'; | |
263 | .Ve | |
264 | .PP | |
265 | .Vb 3 | |
266 | \& my $charscript = charscript(0x41); | |
267 | \& my $charscript = charscript(1234); | |
268 | \& my $charscript = charscript("U+263a"); | |
269 | .Ve | |
270 | .PP | |
271 | .Vb 1 | |
272 | \& my $range = charscript('Thai'); | |
273 | .Ve | |
274 | .PP | |
275 | With a \fBcode point argument\fR \fIcharscript()\fR returns the \fIscript\fR the | |
276 | character belongs to, e.g. \f(CW\*(C`Latin\*(C'\fR, \f(CW\*(C`Greek\*(C'\fR, \f(CW\*(C`Han\*(C'\fR. | |
277 | .PP | |
278 | See also \*(L"Blocks versus Scripts\*(R". | |
279 | .PP | |
280 | If supplied with an argument that can't be a code point, \fIcharscript()\fR tries | |
281 | to do the opposite and interpret the argument as a character script. The | |
282 | return value is a \fIrange\fR: an anonymous list of lists that contain | |
283 | \&\fIstart-of-range\fR, \fIend-of-range\fR code point pairs. You can test whether a | |
284 | code point is in a range using the \*(L"charinrange\*(R" function. If the | |
285 | argument is not a known charater script, \f(CW\*(C`undef\*(C'\fR is returned. | |
286 | .Sh "charblocks" | |
287 | .IX Subsection "charblocks" | |
288 | .Vb 1 | |
289 | \& use Unicode::UCD 'charblocks'; | |
290 | .Ve | |
291 | .PP | |
292 | .Vb 1 | |
293 | \& my $charblocks = charblocks(); | |
294 | .Ve | |
295 | .PP | |
296 | \&\fIcharblocks()\fR returns a reference to a hash with the known block names | |
297 | as the keys, and the code point ranges (see \*(L"charblock\*(R") as the values. | |
298 | .PP | |
299 | See also \*(L"Blocks versus Scripts\*(R". | |
300 | .Sh "charscripts" | |
301 | .IX Subsection "charscripts" | |
302 | .Vb 1 | |
303 | \& use Unicode::UCD 'charscripts'; | |
304 | .Ve | |
305 | .PP | |
306 | .Vb 1 | |
307 | \& my %charscripts = charscripts(); | |
308 | .Ve | |
309 | .PP | |
310 | \&\fIcharscripts()\fR returns a hash with the known script names as the keys, | |
311 | and the code point ranges (see \*(L"charscript\*(R") as the values. | |
312 | .PP | |
313 | See also \*(L"Blocks versus Scripts\*(R". | |
314 | .Sh "Blocks versus Scripts" | |
315 | .IX Subsection "Blocks versus Scripts" | |
316 | The difference between a block and a script is that scripts are closer | |
317 | to the linguistic notion of a set of characters required to present | |
318 | languages, while block is more of an artifact of the Unicode character | |
319 | numbering and separation into blocks of (mostly) 256 characters. | |
320 | .PP | |
321 | For example the Latin \fBscript\fR is spread over several \fBblocks\fR, such | |
322 | as \f(CW\*(C`Basic Latin\*(C'\fR, \f(CW\*(C`Latin 1 Supplement\*(C'\fR, \f(CW\*(C`Latin Extended\-A\*(C'\fR, and | |
323 | \&\f(CW\*(C`Latin Extended\-B\*(C'\fR. On the other hand, the Latin script does not | |
324 | contain all the characters of the \f(CW\*(C`Basic Latin\*(C'\fR block (also known as | |
325 | the \s-1ASCII\s0): it includes only the letters, and not, for example, the digits | |
326 | or the punctuation. | |
327 | .PP | |
328 | For blocks see http://www.unicode.org/Public/UNIDATA/Blocks.txt | |
329 | .PP | |
330 | For scripts see \s-1UTR\s0 #24: http://www.unicode.org/unicode/reports/tr24/ | |
331 | .Sh "Matching Scripts and Blocks" | |
332 | .IX Subsection "Matching Scripts and Blocks" | |
333 | Scripts are matched with the regular-expression construct | |
334 | \&\f(CW\*(C`\ep{...}\*(C'\fR (e.g. \f(CW\*(C`\ep{Tibetan}\*(C'\fR matches characters of the Tibetan script), | |
335 | while \f(CW\*(C`\ep{In...}\*(C'\fR is used for blocks (e.g. \f(CW\*(C`\ep{InTibetan}\*(C'\fR matches | |
336 | any of the 256 code points in the Tibetan block). | |
337 | .Sh "Code Point Arguments" | |
338 | .IX Subsection "Code Point Arguments" | |
339 | A \fIcode point argument\fR is either a decimal or a hexadecimal scalar | |
340 | designating a Unicode character, or \f(CW\*(C`U+\*(C'\fR followed by hexadecimals | |
341 | designating a Unicode character. Note that Unicode is \fBnot\fR limited | |
342 | to 16 bits (the number of Unicode characters is open\-ended, in theory | |
343 | unlimited): you may have more than 4 hexdigits. | |
344 | .Sh "charinrange" | |
345 | .IX Subsection "charinrange" | |
346 | In addition to using the \f(CW\*(C`\ep{In...}\*(C'\fR and \f(CW\*(C`\eP{In...}\*(C'\fR constructs, you | |
347 | can also test whether a code point is in the \fIrange\fR as returned by | |
348 | \&\*(L"charblock\*(R" and \*(L"charscript\*(R" or as the values of the hash returned | |
349 | by \*(L"charblocks\*(R" and \*(L"charscripts\*(R" by using \fIcharinrange()\fR: | |
350 | .PP | |
351 | .Vb 1 | |
352 | \& use Unicode::UCD qw(charscript charinrange); | |
353 | .Ve | |
354 | .PP | |
355 | .Vb 2 | |
356 | \& $range = charscript('Hiragana'); | |
357 | \& print "looks like hiragana\en" if charinrange($range, $codepoint); | |
358 | .Ve | |
359 | .Sh "compexcl" | |
360 | .IX Subsection "compexcl" | |
361 | .Vb 1 | |
362 | \& use Unicode::UCD 'compexcl'; | |
363 | .Ve | |
364 | .PP | |
365 | .Vb 1 | |
366 | \& my $compexcl = compexcl("09dc"); | |
367 | .Ve | |
368 | .PP | |
369 | The \fIcompexcl()\fR returns the composition exclusion (that is, if the | |
370 | character should not be produced during a precomposition) of the | |
371 | character specified by a \fBcode point argument\fR. | |
372 | .PP | |
373 | If there is a composition exclusion for the character, true is | |
374 | returned. Otherwise, false is returned. | |
375 | .Sh "casefold" | |
376 | .IX Subsection "casefold" | |
377 | .Vb 1 | |
378 | \& use Unicode::UCD 'casefold'; | |
379 | .Ve | |
380 | .PP | |
381 | .Vb 1 | |
382 | \& my %casefold = casefold("09dc"); | |
383 | .Ve | |
384 | .PP | |
385 | The \fIcasefold()\fR returns the locale-independent case folding of the | |
386 | character specified by a \fBcode point argument\fR. | |
387 | .PP | |
388 | If there is a case folding for that character, a reference to a hash | |
389 | with the following fields is returned: | |
390 | .PP | |
391 | .Vb 1 | |
392 | \& key | |
393 | .Ve | |
394 | .PP | |
395 | .Vb 3 | |
396 | \& code code point with at least four hexdigits | |
397 | \& status "C", "F", "S", or "I" | |
398 | \& mapping one or more codes separated by spaces | |
399 | .Ve | |
400 | .PP | |
401 | The meaning of the \fIstatus\fR is as follows: | |
402 | .PP | |
403 | .Vb 15 | |
404 | \& C common case folding, common mappings shared | |
405 | \& by both simple and full mappings | |
406 | \& F full case folding, mappings that cause strings | |
407 | \& to grow in length. Multiple characters are separated | |
408 | \& by spaces | |
409 | \& S simple case folding, mappings to single characters | |
410 | \& where different from F | |
411 | \& I special case for dotted uppercase I and | |
412 | \& dotless lowercase i | |
413 | \& - If this mapping is included, the result is | |
414 | \& case-insensitive, but dotless and dotted I's | |
415 | \& are not distinguished | |
416 | \& - If this mapping is excluded, the result is not | |
417 | \& fully case-insensitive, but dotless and dotted | |
418 | \& I's are distinguished | |
419 | .Ve | |
420 | .PP | |
421 | If there is no case folding for that character, \f(CW\*(C`undef\*(C'\fR is returned. | |
422 | .PP | |
423 | For more information about case mappings see | |
424 | http://www.unicode.org/unicode/reports/tr21/ | |
425 | .Sh "casespec" | |
426 | .IX Subsection "casespec" | |
427 | .Vb 1 | |
428 | \& use Unicode::UCD 'casespec'; | |
429 | .Ve | |
430 | .PP | |
431 | .Vb 1 | |
432 | \& my %casespec = casespec("09dc"); | |
433 | .Ve | |
434 | .PP | |
435 | The \fIcasespec()\fR returns the potentially locale-dependent case mapping | |
436 | of the character specified by a \fBcode point argument\fR. The mapping | |
437 | may change the length of the string (which the basic Unicode case | |
438 | mappings as returned by \fIcharinfo()\fR never do). | |
439 | .PP | |
440 | If there is a case folding for that character, a reference to a hash | |
441 | with the following fields is returned: | |
442 | .PP | |
443 | .Vb 1 | |
444 | \& key | |
445 | .Ve | |
446 | .PP | |
447 | .Vb 5 | |
448 | \& code code point with at least four hexdigits | |
449 | \& lower lowercase | |
450 | \& title titlecase | |
451 | \& upper uppercase | |
452 | \& condition condition list (may be undef) | |
453 | .Ve | |
454 | .PP | |
455 | The \f(CW\*(C`condition\*(C'\fR is optional. Where present, it consists of one or | |
456 | more \fIlocales\fR or \fIcontexts\fR, separated by spaces (other than as | |
457 | used to separate elements, spaces are to be ignored). A condition | |
458 | list overrides the normal behavior if all of the listed conditions are | |
459 | true. Case distinctions in the condition list are not significant. | |
460 | Conditions preceded by \*(L"\s-1NON_\s0\*(R" represent the negation of the condition | |
461 | .PP | |
462 | Note that when there are multiple case folding definitions for a | |
463 | single code point because of different locales, the value returned by | |
464 | \&\fIcasespec()\fR is a hash reference which has the locales as the keys and | |
465 | hash references as described above as the values. | |
466 | .PP | |
467 | A \fIlocale\fR is defined as a 2\-letter \s-1ISO\s0 3166 country code, possibly | |
468 | followed by a \*(L"_\*(R" and a 2\-letter \s-1ISO\s0 language code (possibly followed | |
469 | by a \*(L"_\*(R" and a variant code). You can find the lists of those codes, | |
470 | see Locale::Country and Locale::Language. | |
471 | .PP | |
472 | A \fIcontext\fR is one of the following choices: | |
473 | .PP | |
474 | .Vb 4 | |
475 | \& FINAL The letter is not followed by a letter of | |
476 | \& general category L (e.g. Ll, Lt, Lu, Lm, or Lo) | |
477 | \& MODERN The mapping is only used for modern text | |
478 | \& AFTER_i The last base character was "i" (U+0069) | |
479 | .Ve | |
480 | .PP | |
481 | For more information about case mappings see | |
482 | http://www.unicode.org/unicode/reports/tr21/ | |
483 | .Sh "Unicode::UCD::UnicodeVersion" | |
484 | .IX Subsection "Unicode::UCD::UnicodeVersion" | |
485 | \&\fIUnicode::UCD::UnicodeVersion()\fR returns the version of the Unicode | |
486 | Character Database, in other words, the version of the Unicode | |
487 | standard the database implements. The version is a string | |
488 | of numbers delimited by dots (\f(CW'.'\fR). | |
489 | .Sh "Implementation Note" | |
490 | .IX Subsection "Implementation Note" | |
491 | The first use of \fIcharinfo()\fR opens a read-only filehandle to the Unicode | |
492 | Character Database (the database is included in the Perl distribution). | |
493 | The filehandle is then kept open for further queries. In other words, | |
494 | if you are wondering where one of your filehandles went, that's where. | |
495 | .SH "BUGS" | |
496 | .IX Header "BUGS" | |
497 | Does not yet support \s-1EBCDIC\s0 platforms. | |
498 | .SH "AUTHOR" | |
499 | .IX Header "AUTHOR" | |
500 | Jarkko Hietaniemi |