Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / devtools / v8plus / man / man3 / Unicode::Normalize.3
CommitLineData
920dae64
AT
1.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32
2.\"
3.\" Standard preamble:
4.\" ========================================================================
5.de Sh \" Subsection heading
6.br
7.if t .Sp
8.ne 5
9.PP
10\fB\\$1\fR
11.PP
12..
13.de Sp \" Vertical space (when we can't use .PP)
14.if t .sp .5v
15.if n .sp
16..
17.de Vb \" Begin verbatim text
18.ft CW
19.nf
20.ne \\$1
21..
22.de Ve \" End verbatim text
23.ft R
24.fi
25..
26.\" Set up some character translations and predefined strings. \*(-- will
27.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
28.\" double quote, and \*(R" will give a right double quote. | will give a
29.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
30.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
31.\" expand to `' in nroff, nothing in troff, for use with C<>.
32.tr \(*W-|\(bv\*(Tr
33.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
34.ie n \{\
35. ds -- \(*W-
36. ds PI pi
37. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
38. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
39. ds L" ""
40. ds R" ""
41. ds C` ""
42. ds C' ""
43'br\}
44.el\{\
45. ds -- \|\(em\|
46. ds PI \(*p
47. ds L" ``
48. ds R" ''
49'br\}
50.\"
51.\" If the F register is turned on, we'll generate index entries on stderr for
52.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
53.\" entries marked with X<> in POD. Of course, you'll have to process the
54.\" output yourself in some meaningful fashion.
55.if \nF \{\
56. de IX
57. tm Index:\\$1\t\\n%\t"\\$2"
58..
59. nr % 0
60. rr F
61.\}
62.\"
63.\" For nroff, turn off justification. Always turn off hyphenation; it makes
64.\" way too many mistakes in technical documents.
65.hy 0
66.if n .na
67.\"
68.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
69.\" Fear. Run. Save yourself. No user-serviceable parts.
70. \" fudge factors for nroff and troff
71.if n \{\
72. ds #H 0
73. ds #V .8m
74. ds #F .3m
75. ds #[ \f1
76. ds #] \fP
77.\}
78.if t \{\
79. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
80. ds #V .6m
81. ds #F 0
82. ds #[ \&
83. ds #] \&
84.\}
85. \" simple accents for nroff and troff
86.if n \{\
87. ds ' \&
88. ds ` \&
89. ds ^ \&
90. ds , \&
91. ds ~ ~
92. ds /
93.\}
94.if t \{\
95. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
96. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
97. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
98. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
99. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
100. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
101.\}
102. \" troff and (daisy-wheel) nroff accents
103.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
104.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
105.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
106.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
107.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
108.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
109.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
110.ds ae a\h'-(\w'a'u*4/10)'e
111.ds Ae A\h'-(\w'A'u*4/10)'E
112. \" corrections for vroff
113.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
114.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
115. \" for low resolution devices (crt and lpr)
116.if \n(.H>23 .if \n(.V>19 \
117\{\
118. ds : e
119. ds 8 ss
120. ds o a
121. ds d- d\h'-1'\(ga
122. ds D- D\h'-1'\(hy
123. ds th \o'bp'
124. ds Th \o'LP'
125. ds ae ae
126. ds Ae AE
127.\}
128.rm #[ #] #H #V #F C
129.\" ========================================================================
130.\"
131.IX Title "Unicode::Normalize 3"
132.TH Unicode::Normalize 3 "2001-09-21" "perl v5.8.8" "Perl Programmers Reference Guide"
133.SH "NAME"
134Unicode::Normalize \- Unicode Normalization Forms
135.SH "SYNOPSIS"
136.IX Header "SYNOPSIS"
137(1) using function names exported by default:
138.PP
139.Vb 1
140\& use Unicode::Normalize;
141.Ve
142.PP
143.Vb 4
144\& $NFD_string = NFD($string); # Normalization Form D
145\& $NFC_string = NFC($string); # Normalization Form C
146\& $NFKD_string = NFKD($string); # Normalization Form KD
147\& $NFKC_string = NFKC($string); # Normalization Form KC
148.Ve
149.PP
150(2) using function names exported on request:
151.PP
152.Vb 1
153\& use Unicode::Normalize 'normalize';
154.Ve
155.PP
156.Vb 4
157\& $NFD_string = normalize('D', $string); # Normalization Form D
158\& $NFC_string = normalize('C', $string); # Normalization Form C
159\& $NFKD_string = normalize('KD', $string); # Normalization Form KD
160\& $NFKC_string = normalize('KC', $string); # Normalization Form KC
161.Ve
162.SH "DESCRIPTION"
163.IX Header "DESCRIPTION"
164Parameters:
165.PP
166\&\f(CW$string\fR is used as a string under character semantics
167(see \fIperlunicode\fR).
168.PP
169\&\f(CW$codepoint\fR should be an unsigned integer
170representing a Unicode code point.
171.PP
172Note: Between \s-1XSUB\s0 and pure Perl, there is an incompatibility
173about the interpretation of \f(CW$codepoint\fR as a decimal number.
174\&\s-1XSUB\s0 converts \f(CW$codepoint\fR to an unsigned integer, but pure Perl does not.
175Do not use a floating point nor a negative sign in \f(CW$codepoint\fR.
176.Sh "Normalization Forms"
177.IX Subsection "Normalization Forms"
178.ie n .IP """$NFD_string = NFD($string)""" 4
179.el .IP "\f(CW$NFD_string = NFD($string)\fR" 4
180.IX Item "$NFD_string = NFD($string)"
181returns the Normalization Form D (formed by canonical decomposition).
182.ie n .IP """$NFC_string = NFC($string)""" 4
183.el .IP "\f(CW$NFC_string = NFC($string)\fR" 4
184.IX Item "$NFC_string = NFC($string)"
185returns the Normalization Form C (formed by canonical decomposition
186followed by canonical composition).
187.ie n .IP """$NFKD_string = NFKD($string)""" 4
188.el .IP "\f(CW$NFKD_string = NFKD($string)\fR" 4
189.IX Item "$NFKD_string = NFKD($string)"
190returns the Normalization Form \s-1KD\s0 (formed by compatibility decomposition).
191.ie n .IP """$NFKC_string = NFKC($string)""" 4
192.el .IP "\f(CW$NFKC_string = NFKC($string)\fR" 4
193.IX Item "$NFKC_string = NFKC($string)"
194returns the Normalization Form \s-1KC\s0 (formed by compatibility decomposition
195followed by \fBcanonical\fR composition).
196.ie n .IP """$FCD_string = FCD($string)""" 4
197.el .IP "\f(CW$FCD_string = FCD($string)\fR" 4
198.IX Item "$FCD_string = FCD($string)"
199If the given string is in \s-1FCD\s0 (\*(L"Fast C or D\*(R" form; cf. \s-1UTN\s0 #5),
200returns it without modification; otherwise returns an \s-1FCD\s0 string.
201.Sp
202Note: \s-1FCD\s0 is not always unique, then plural forms may be equivalent
203each other. \f(CW\*(C`FCD()\*(C'\fR will return one of these equivalent forms.
204.ie n .IP """$FCC_string = FCC($string)""" 4
205.el .IP "\f(CW$FCC_string = FCC($string)\fR" 4
206.IX Item "$FCC_string = FCC($string)"
207returns the \s-1FCC\s0 form (\*(L"Fast C Contiguous\*(R"; cf. \s-1UTN\s0 #5).
208.Sp
209Note: \s-1FCC\s0 is unique, as well as four normalization forms (NF*).
210.ie n .IP """$normalized_string = normalize($form_name, $string)""" 4
211.el .IP "\f(CW$normalized_string = normalize($form_name, $string)\fR" 4
212.IX Item "$normalized_string = normalize($form_name, $string)"
213As \f(CW$form_name\fR, one of the following names must be given.
214.Sp
215.Vb 4
216\& 'C' or 'NFC' for Normalization Form C (UAX #15)
217\& 'D' or 'NFD' for Normalization Form D (UAX #15)
218\& 'KC' or 'NFKC' for Normalization Form KC (UAX #15)
219\& 'KD' or 'NFKD' for Normalization Form KD (UAX #15)
220.Ve
221.Sp
222.Vb 2
223\& 'FCD' for "Fast C or D" Form (UTN #5)
224\& 'FCC' for "Fast C Contiguous" (UTN #5)
225.Ve
226.Sh "Decomposition and Composition"
227.IX Subsection "Decomposition and Composition"
228.ie n .IP """$decomposed_string = decompose($string)""" 4
229.el .IP "\f(CW$decomposed_string = decompose($string)\fR" 4
230.IX Item "$decomposed_string = decompose($string)"
231.PD 0
232.ie n .IP """$decomposed_string = decompose($string, $useCompatMapping)""" 4
233.el .IP "\f(CW$decomposed_string = decompose($string, $useCompatMapping)\fR" 4
234.IX Item "$decomposed_string = decompose($string, $useCompatMapping)"
235.PD
236Decomposes the specified string and returns the result.
237.Sp
238If the second parameter (a boolean) is omitted or false, decomposes it
239using the Canonical Decomposition Mapping.
240If true, decomposes it using the Compatibility Decomposition Mapping.
241.Sp
242The string returned is not always in \s-1NFD/NFKD\s0.
243Reordering may be required.
244.Sp
245.Vb 2
246\& $NFD_string = reorder(decompose($string)); # eq. to NFD()
247\& $NFKD_string = reorder(decompose($string, TRUE)); # eq. to NFKD()
248.Ve
249.ie n .IP """$reordered_string = reorder($string)""" 4
250.el .IP "\f(CW$reordered_string = reorder($string)\fR" 4
251.IX Item "$reordered_string = reorder($string)"
252Reorders the combining characters and the like in the canonical ordering
253and returns the result.
254.Sp
255E.g., when you have a list of \s-1NFD/NFKD\s0 strings,
256you can get the concatenated \s-1NFD/NFKD\s0 string from them, saying
257.Sp
258.Vb 2
259\& $concat_NFD = reorder(join '', @NFD_strings);
260\& $concat_NFKD = reorder(join '', @NFKD_strings);
261.Ve
262.ie n .IP """$composed_string = compose($string)""" 4
263.el .IP "\f(CW$composed_string = compose($string)\fR" 4
264.IX Item "$composed_string = compose($string)"
265Returns the string where composable pairs are composed.
266.Sp
267E.g., when you have a \s-1NFD/NFKD\s0 string,
268you can get its \s-1NFC/NFKC\s0 string, saying
269.Sp
270.Vb 2
271\& $NFC_string = compose($NFD_string);
272\& $NFKC_string = compose($NFKD_string);
273.Ve
274.Sh "Quick Check"
275.IX Subsection "Quick Check"
276(see Annex 8, \s-1UAX\s0 #15; and \fIDerivedNormalizationProps.txt\fR)
277.PP
278The following functions check whether the string is in that normalization form.
279.PP
280The result returned will be:
281.PP
282.Vb 3
283\& YES The string is in that normalization form.
284\& NO The string is not in that normalization form.
285\& MAYBE Dubious. Maybe yes, maybe no.
286.Ve
287.ie n .IP """$result = checkNFD($string)""" 4
288.el .IP "\f(CW$result = checkNFD($string)\fR" 4
289.IX Item "$result = checkNFD($string)"
290returns true (\f(CW1\fR) if \f(CW\*(C`YES\*(C'\fR; false (\f(CW\*(C`empty string\*(C'\fR) if \f(CW\*(C`NO\*(C'\fR.
291.ie n .IP """$result = checkNFC($string)""" 4
292.el .IP "\f(CW$result = checkNFC($string)\fR" 4
293.IX Item "$result = checkNFC($string)"
294returns true (\f(CW1\fR) if \f(CW\*(C`YES\*(C'\fR; false (\f(CW\*(C`empty string\*(C'\fR) if \f(CW\*(C`NO\*(C'\fR;
295\&\f(CW\*(C`undef\*(C'\fR if \f(CW\*(C`MAYBE\*(C'\fR.
296.ie n .IP """$result = checkNFKD($string)""" 4
297.el .IP "\f(CW$result = checkNFKD($string)\fR" 4
298.IX Item "$result = checkNFKD($string)"
299returns true (\f(CW1\fR) if \f(CW\*(C`YES\*(C'\fR; false (\f(CW\*(C`empty string\*(C'\fR) if \f(CW\*(C`NO\*(C'\fR.
300.ie n .IP """$result = checkNFKC($string)""" 4
301.el .IP "\f(CW$result = checkNFKC($string)\fR" 4
302.IX Item "$result = checkNFKC($string)"
303returns true (\f(CW1\fR) if \f(CW\*(C`YES\*(C'\fR; false (\f(CW\*(C`empty string\*(C'\fR) if \f(CW\*(C`NO\*(C'\fR;
304\&\f(CW\*(C`undef\*(C'\fR if \f(CW\*(C`MAYBE\*(C'\fR.
305.ie n .IP """$result = checkFCD($string)""" 4
306.el .IP "\f(CW$result = checkFCD($string)\fR" 4
307.IX Item "$result = checkFCD($string)"
308returns true (\f(CW1\fR) if \f(CW\*(C`YES\*(C'\fR; false (\f(CW\*(C`empty string\*(C'\fR) if \f(CW\*(C`NO\*(C'\fR.
309.ie n .IP """$result = checkFCC($string)""" 4
310.el .IP "\f(CW$result = checkFCC($string)\fR" 4
311.IX Item "$result = checkFCC($string)"
312returns true (\f(CW1\fR) if \f(CW\*(C`YES\*(C'\fR; false (\f(CW\*(C`empty string\*(C'\fR) if \f(CW\*(C`NO\*(C'\fR;
313\&\f(CW\*(C`undef\*(C'\fR if \f(CW\*(C`MAYBE\*(C'\fR.
314.Sp
315If a string is not in \s-1FCD\s0, it must not be in \s-1FCC\s0.
316So \f(CW\*(C`checkFCC($not_FCD_string)\*(C'\fR should return \f(CW\*(C`NO\*(C'\fR.
317.ie n .IP """$result = check($form_name, $string)""" 4
318.el .IP "\f(CW$result = check($form_name, $string)\fR" 4
319.IX Item "$result = check($form_name, $string)"
320returns true (\f(CW1\fR) if \f(CW\*(C`YES\*(C'\fR; false (\f(CW\*(C`empty string\*(C'\fR) if \f(CW\*(C`NO\*(C'\fR;
321\&\f(CW\*(C`undef\*(C'\fR if \f(CW\*(C`MAYBE\*(C'\fR.
322.Sp
323As \f(CW$form_name\fR, one of the following names must be given.
324.Sp
325.Vb 4
326\& 'C' or 'NFC' for Normalization Form C (UAX #15)
327\& 'D' or 'NFD' for Normalization Form D (UAX #15)
328\& 'KC' or 'NFKC' for Normalization Form KC (UAX #15)
329\& 'KD' or 'NFKD' for Normalization Form KD (UAX #15)
330.Ve
331.Sp
332.Vb 2
333\& 'FCD' for "Fast C or D" Form (UTN #5)
334\& 'FCC' for "Fast C Contiguous" (UTN #5)
335.Ve
336.PP
337\&\fBNote\fR
338.PP
339In the cases of \s-1NFD\s0, \s-1NFKD\s0, and \s-1FCD\s0, the answer must be
340either \f(CW\*(C`YES\*(C'\fR or \f(CW\*(C`NO\*(C'\fR. The answer \f(CW\*(C`MAYBE\*(C'\fR may be returned
341in the cases of \s-1NFC\s0, \s-1NFKC\s0, and \s-1FCC\s0.
342.PP
343A \f(CW\*(C`MAYBE\*(C'\fR string should contain at least one combining character
344or the like. For example, \f(CW\*(C`COMBINING ACUTE ACCENT\*(C'\fR has
345the \s-1MAYBE_NFC/MAYBE_NFKC\s0 property.
346.PP
347Both \f(CW\*(C`checkNFC("A\eN{COMBINING ACUTE ACCENT}")\*(C'\fR
348and \f(CW\*(C`checkNFC("B\eN{COMBINING ACUTE ACCENT}")\*(C'\fR will return \f(CW\*(C`MAYBE\*(C'\fR.
349\&\f(CW"A\eN{COMBINING ACUTE ACCENT}"\fR is not in \s-1NFC\s0
350(its \s-1NFC\s0 is \f(CW"\eN{LATIN CAPITAL LETTER A WITH ACUTE}"\fR),
351while \f(CW"B\eN{COMBINING ACUTE ACCENT}"\fR is in \s-1NFC\s0.
352.PP
353If you want to check exactly, compare the string with its \s-1NFC/NFKC/FCC\s0.
354.PP
355.Vb 5
356\& if ($string eq NFC($string)) {
357\& # $string is exactly normalized in NFC;
358\& } else {
359\& # $string is not normalized in NFC;
360\& }
361.Ve
362.PP
363.Vb 5
364\& if ($string eq NFKC($string)) {
365\& # $string is exactly normalized in NFKC;
366\& } else {
367\& # $string is not normalized in NFKC;
368\& }
369.Ve
370.Sh "Character Data"
371.IX Subsection "Character Data"
372These functions are interface of character data used internally.
373If you want only to get Unicode normalization forms, you don't need
374call them yourself.
375.ie n .IP """$canonical_decomposed = getCanon($codepoint)""" 4
376.el .IP "\f(CW$canonical_decomposed = getCanon($codepoint)\fR" 4
377.IX Item "$canonical_decomposed = getCanon($codepoint)"
378If the character of the specified codepoint is canonically
379decomposable (including Hangul Syllables),
380returns the \fBcompletely decomposed\fR string canonically equivalent to it.
381.Sp
382If it is not decomposable, returns \f(CW\*(C`undef\*(C'\fR.
383.ie n .IP """$compatibility_decomposed = getCompat($codepoint)""" 4
384.el .IP "\f(CW$compatibility_decomposed = getCompat($codepoint)\fR" 4
385.IX Item "$compatibility_decomposed = getCompat($codepoint)"
386If the character of the specified codepoint is compatibility
387decomposable (including Hangul Syllables),
388returns the \fBcompletely decomposed\fR string compatibility equivalent to it.
389.Sp
390If it is not decomposable, returns \f(CW\*(C`undef\*(C'\fR.
391.ie n .IP """$codepoint_composite = getComposite($codepoint_here, $codepoint_next)""" 4
392.el .IP "\f(CW$codepoint_composite = getComposite($codepoint_here, $codepoint_next)\fR" 4
393.IX Item "$codepoint_composite = getComposite($codepoint_here, $codepoint_next)"
394If two characters here and next (as codepoints) are composable
395(including Hangul Jamo/Syllables and Composition Exclusions),
396returns the codepoint of the composite.
397.Sp
398If they are not composable, returns \f(CW\*(C`undef\*(C'\fR.
399.ie n .IP """$combining_class = getCombinClass($codepoint)""" 4
400.el .IP "\f(CW$combining_class = getCombinClass($codepoint)\fR" 4
401.IX Item "$combining_class = getCombinClass($codepoint)"
402Returns the combining class of the character as an integer.
403.ie n .IP """$is_exclusion = isExclusion($codepoint)""" 4
404.el .IP "\f(CW$is_exclusion = isExclusion($codepoint)\fR" 4
405.IX Item "$is_exclusion = isExclusion($codepoint)"
406Returns a boolean whether the character of the specified codepoint
407is a composition exclusion.
408.ie n .IP """$is_singleton = isSingleton($codepoint)""" 4
409.el .IP "\f(CW$is_singleton = isSingleton($codepoint)\fR" 4
410.IX Item "$is_singleton = isSingleton($codepoint)"
411Returns a boolean whether the character of the specified codepoint is
412a singleton.
413.ie n .IP """$is_non_starter_decomposition = isNonStDecomp($codepoint)""" 4
414.el .IP "\f(CW$is_non_starter_decomposition = isNonStDecomp($codepoint)\fR" 4
415.IX Item "$is_non_starter_decomposition = isNonStDecomp($codepoint)"
416Returns a boolean whether the canonical decomposition
417of the character of the specified codepoint
418is a Non-Starter Decomposition.
419.ie n .IP """$may_be_composed_with_prev_char = isComp2nd($codepoint)""" 4
420.el .IP "\f(CW$may_be_composed_with_prev_char = isComp2nd($codepoint)\fR" 4
421.IX Item "$may_be_composed_with_prev_char = isComp2nd($codepoint)"
422Returns a boolean whether the character of the specified codepoint
423may be composed with the previous one in a certain composition
424(including Hangul Compositions, but excluding
425Composition Exclusions and Non-Starter Decompositions).
426.SH "EXPORT"
427.IX Header "EXPORT"
428\&\f(CW\*(C`NFC\*(C'\fR, \f(CW\*(C`NFD\*(C'\fR, \f(CW\*(C`NFKC\*(C'\fR, \f(CW\*(C`NFKD\*(C'\fR: by default.
429.PP
430\&\f(CW\*(C`normalize\*(C'\fR and other some functions: on request.
431.SH "CAVEATS"
432.IX Header "CAVEATS"
433.IP "Perl's version vs. Unicode version" 4
434.IX Item "Perl's version vs. Unicode version"
435Since this module refers to perl core's Unicode database in the directory
436\&\fI/lib/unicore\fR (or formerly \fI/lib/unicode\fR), the Unicode version of
437normalization implemented by this module depends on your perl's version.
438.Sp
439.Vb 7
440\& perl's version implemented Unicode version
441\& 5.6.1 3.0.1
442\& 5.7.2 3.1.0
443\& 5.7.3 3.1.1 (same normalized form as that of 3.1.0)
444\& 5.8.0 3.2.0
445\& 5.8.1-5.8.3 4.0.0
446\& 5.8.4-5.8.6 (latest) 4.0.1 (same normalized form as that of 4.0.0)
447.Ve
448.IP "Correction of decomposition mapping" 4
449.IX Item "Correction of decomposition mapping"
450In older Unicode versions, a small number of characters (all of which are
451\&\s-1CJK\s0 compatibility ideographs as far as they have been found) may have
452an erroneous decomposition mapping (see \fINormalizationCorrections.txt\fR).
453Anyhow, this module will neither refer to \fINormalizationCorrections.txt\fR
454nor provide any specific version of normalization. Therefore this module
455running on an older perl with an older Unicode database may use
456the erroneous decomposition mapping blindly conforming to the Unicode database.
457.IP "Revised definition of canonical composition" 4
458.IX Item "Revised definition of canonical composition"
459In Unicode 4.1.0, the definition D2 of canonical composition (which
460affects \s-1NFC\s0 and \s-1NFKC\s0) has been changed (see Public Review Issue #29
461and recent \s-1UAX\s0 #15). This module has used the newer definition
462since the version 0.07 (Oct 31, 2001).
463This module does not support normalization according to the older
464definition, even if the Unicode version implemented by perl is
465lower than 4.1.0.
466.SH "AUTHOR"
467.IX Header "AUTHOR"
468\&\s-1SADAHIRO\s0 Tomoyuki <SADAHIRO@cpan.org>
469.PP
470Copyright(C) 2001\-2005, \s-1SADAHIRO\s0 Tomoyuki. Japan. All rights reserved.
471.PP
472This module is free software; you can redistribute it
473and/or modify it under the same terms as Perl itself.
474.SH "SEE ALSO"
475.IX Header "SEE ALSO"
476.IP "http://www.unicode.org/reports/tr15/" 4
477.IX Item "http://www.unicode.org/reports/tr15/"
478Unicode Normalization Forms \- \s-1UAX\s0 #15
479.IP "http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt" 4
480.IX Item "http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt"
481Derived Normalization Properties
482.IP "http://www.unicode.org/Public/UNIDATA/NormalizationCorrections.txt" 4
483.IX Item "http://www.unicode.org/Public/UNIDATA/NormalizationCorrections.txt"
484Normalization Corrections
485.IP "http://www.unicode.org/review/pr\-29.html" 4
486.IX Item "http://www.unicode.org/review/pr-29.html"
487Public Review Issue #29: Normalization Issue
488.IP "http://www.unicode.org/notes/tn5/" 4
489.IX Item "http://www.unicode.org/notes/tn5/"
490Canonical Equivalence in Applications \- \s-1UTN\s0 #5