Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / site_perl / 5.8.0 / iso8859.pl
CommitLineData
86530b38
AT
1##---------------------------------------------------------------------------##
2## File:
3## $Id: iso8859.pl,v 2.5 2001/09/17 16:08:49 ehood Exp $
4## Author:
5## Earl Hood mhonarc@mhonarc.org
6## Description:
7## Routines to process data encoded in iso8859 character sets.
8##---------------------------------------------------------------------------##
9## Copyright (C) 1996-1999 Earl Hood, mhonarc@mhonarc.org
10##
11## This program is free software; you can redistribute it and/or modify
12## it under the terms of the GNU General Public License as published by
13## the Free Software Foundation; either version 2 of the License, or
14## (at your option) any later version.
15##
16## This program is distributed in the hope that it will be useful,
17## but WITHOUT ANY WARRANTY; without even the implied warranty of
18## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19## GNU General Public License for more details.
20##
21## You should have received a copy of the GNU General Public License
22## along with this program; if not, write to the Free Software
23## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24## 02111-1307, USA
25##---------------------------------------------------------------------------##
26
27package iso_8859;
28
29###############################################################################
30## Mapping arrays for characters to entity references
31###############################################################################
32
33##---------------------------------------------------------------------------
34## US-ASCII/Common characters
35##---------------------------------------------------------------------------
36
37%US_ASCII_To_Ent = (
38 #--------------------------------------------------------------------------
39 # Hex Code Entity Ref # ISO external entity and description
40 #--------------------------------------------------------------------------
41 0x26, "&", # ISOnum : Ampersand
42 0x3C, "<", # ISOnum : Less-than sign
43 0x3E, ">", # ISOnum : Greater-than sign
44
45 0xA0, " ", # ISOnum : NO-BREAK SPACE
46);
47
48##---------------------------------------------------------------------------
49## ISO-8859-1: Latin-1
50##---------------------------------------------------------------------------
51
52%ISO_8859_1_To_Ent = (
53 #--------------------------------------------------------------------------
54 # Hex Code Entity Ref # ISO external entity and description
55 #--------------------------------------------------------------------------
56 0xA1, "¡", # ISOnum : INVERTED EXCLAMATION MARK
57 0xA2, "¢", # ISOnum : CENT SIGN
58 0xA3, "£", # ISOnum : POUND SIGN
59 0xA4, "¤", # ISOnum : CURRENCY SIGN
60 0xA5, "¥", # ISOnum : YEN SIGN
61 0xA6, "¦", # ISOnum : BROKEN BAR
62 0xA7, "§", # ISOnum : SECTION SIGN
63 0xA8, "¨", # ISOdia : DIAERESIS
64 0xA9, "©", # ISOnum : COPYRIGHT SIGN
65 0xAA, "ª", # ISOnum : FEMININE ORDINAL INDICATOR
66 0xAB, "«", # ISOnum : LEFT-POINTING DOUBLE ANGLE
67 # QUOTATION MARK
68 0xAC, "¬", # ISOnum : NOT SIGN
69 0xAD, "­", # ISOnum : SOFT HYPHEN
70 0xAE, "®", # ISOnum : REGISTERED SIGN
71 0xAF, "¯", # ISOdia : OVERLINE (MACRON)
72 0xB0, "°", # ISOnum : DEGREE SIGN
73 0xB1, "±", # ISOnum : PLUS-MINUS SIGN
74 0xB2, "²", # ISOnum : SUPERSCRIPT TWO
75 0xB3, "³", # ISOnum : SUPERSCRIPT THREE
76 0xB4, "´", # ISOdia : ACUTE ACCENT
77 0xB5, "µ", # ISOnum : MICRO SIGN
78 0xB6, "¶", # ISOnum : PILCROW SIGN
79 0xB7, "·", # ISOnum : MIDDLE DOT
80 0xB8, "¸", # ISOdia : CEDILLA
81 0xB9, "¹", # ISOnum : SUPERSCRIPT ONE
82 0xBA, "º", # ISOnum : MASCULINE ORDINAL INDICATOR
83 0xBB, "»", # ISOnum : RIGHT-POINTING DOUBLE ANGLE
84 # QUOTATION MARK
85 0xBC, "¼", # ISOnum : VULGAR FRACTION ONE QUARTER
86 0xBD, "½", # ISOnum : VULGAR FRACTION ONE HALF
87 0xBE, "¾", # ISOnum : VULGAR FRACTION THREE QUARTERS
88 0xBF, "¿", # ISOnum : INVERTED QUESTION MARK
89 0xC0, "À", # ISOlat1: LATIN CAPITAL LETTER A WITH GRAVE
90 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE
91 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH
92 # CIRCUMFLEX
93 0xC3, "Ã", # ISOlat1: LATIN CAPITAL LETTER A WITH TILDE
94 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH
95 # DIAERESIS
96 0xC5, "Å", # ISOlat1: LATIN CAPITAL LETTER A WITH RING
97 # ABOVE
98 0xC6, "Æ", # ISOlat1: LATIN CAPITAL LETTER AE
99 0xC7, "Ç", # ISOlat1: LATIN CAPITAL LETTER C WITH CEDILLA
100 0xC8, "È", # ISOlat1: LATIN CAPITAL LETTER E WITH GRAVE
101 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE
102 0xCA, "Ê", # ISOlat1: LATIN CAPITAL LETTER E WITH
103 # CIRCUMFLEX
104 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH
105 # DIAERESIS
106 0xCC, "Ì", # ISOlat1: LATIN CAPITAL LETTER I WITH GRAVE
107 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE
108 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH
109 # CIRCUMFLEX
110 0xCF, "Ï", # ISOlat1: LATIN CAPITAL LETTER I WITH
111 # DIAERESIS
112 0xD0, "Ð", # ISOlat1: LATIN CAPITAL LETTER ETH (Icelandic)
113 0xD1, "Ñ", # ISOlat1: LATIN CAPITAL LETTER N WITH TILDE
114 0xD2, "Ò", # ISOlat1: LATIN CAPITAL LETTER O WITH GRAVE
115 0xD3, "Ó", # ISOlat1: LATIN CAPITAL LETTER O WITH ACUTE
116 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH
117 # CIRCUMFLEX
118 0xD5, "Õ", # ISOlat1: LATIN CAPITAL LETTER O WITH TILDE
119 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH
120 # DIAERESIS
121 0xD7, "×", # ISOnum : MULTIPLICATION SIGN
122 0xD8, "Ø", # ISOlat1: LATIN CAPITAL LETTER O WITH STROKE
123 0xD9, "Ù", # ISOlat1: LATIN CAPITAL LETTER U WITH GRAVE
124 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE
125 0xDB, "Û", # ISOlat1: LATIN CAPITAL LETTER U WITH
126 # CIRCUMFLEX
127 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH
128 # DIAERESIS
129 0xDD, "Ý", # ISOlat1: LATIN CAPITAL LETTER Y WITH ACUTE
130 0xDE, "Þ", # ISOlat1: LATIN CAPITAL LETTER THORN
131 # (Icelandic)
132 0xDF, "ß", # ISOlat1: LATIN SMALL LETTER SHARP S (German)
133 0xE0, "à", # ISOlat1: LATIN SMALL LETTER A WITH GRAVE
134 0xE1, "á", # ISOlat1: LATIN SMALL LETTER A WITH ACUTE
135 0xE2, "â", # ISOlat1: LATIN SMALL LETTER A WITH CIRCUMFLEX
136 0xE3, "ã", # ISOlat1: LATIN SMALL LETTER A WITH TILDE
137 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER A WITH DIAERESIS
138 0xE5, "å", # ISOlat1: LATIN SMALL LETTER A WITH RING ABOVE
139 0xE6, "æ", # ISOlat1: LATIN SMALL LETTER AE
140 0xE7, "ç", # ISOlat1: LATIN SMALL LETTER C WITH CEDILLA
141 0xE8, "è", # ISOlat1: LATIN SMALL LETTER E WITH GRAVE
142 0xE9, "é", # ISOlat1: LATIN SMALL LETTER E WITH ACUTE
143 0xEA, "ê", # ISOlat1: LATIN SMALL LETTER E WITH CIRCUMFLEX
144 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER E WITH DIAERESIS
145 0xEC, "ì", # ISOlat1: LATIN SMALL LETTER I WITH GRAVE
146 0xED, "í", # ISOlat1: LATIN SMALL LETTER I WITH ACUTE
147 0xEE, "î", # ISOlat1: LATIN SMALL LETTER I WITH CIRCUMFLEX
148 0xEF, "ï", # ISOlat1: LATIN SMALL LETTER I WITH DIAERESIS
149 0xF0, "ð", # ISOlat1: LATIN SMALL LETTER ETH (Icelandic)
150 0xF1, "ñ", # ISOlat1: LATIN SMALL LETTER N WITH TILDE
151 0xF2, "ò", # ISOlat1: LATIN SMALL LETTER O WITH GRAVE
152 0xF3, "ó", # ISOlat1: LATIN SMALL LETTER O WITH ACUTE
153 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER O WITH CIRCUMFLEX
154 0xF5, "õ", # ISOlat1: LATIN SMALL LETTER O WITH TILDE
155 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER O WITH DIAERESIS
156 0xF7, "÷", # ISOnum : DIVISION SIGN
157 0xF8, "ø", # ISOlat1: LATIN SMALL LETTER O WITH STROKE
158 0xF9, "ù", # ISOlat1: LATIN SMALL LETTER U WITH GRAVE
159 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER U WITH ACUTE
160 0xFB, "û", # ISOlat1: LATIN SMALL LETTER U WITH CIRCUMFLEX
161 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER U WITH DIAERESIS
162 0xFD, "ý", # ISOlat1: LATIN SMALL LETTER Y WITH ACUTE
163 0xFE, "þ", # ISOlat1: LATIN SMALL LETTER THORN
164 # (Icelandic)
165 0xFF, "ÿ", # ISOlat1: LATIN SMALL LETTER Y WITH DIAERESIS
166);
167*LATIN1_To_Ent = *ISO_8859_1_To_Ent;
168
169##---------------------------------------------------------------------------
170## ISO-8859-2: Latin-2
171##---------------------------------------------------------------------------
172
173%ISO_8859_2_To_Ent = (
174 #--------------------------------------------------------------------------
175 # Hex Code Entity Ref # ISO external entity and description
176 #--------------------------------------------------------------------------
177 0xA1, "Ą", # ISOlat2: LATIN CAPITAL LETTER A WITH OGONEK
178 0xA2, "˘", # ISOdia : BREVE
179 0xA3, "Ł", # ISOlat2: LATIN CAPITAL LETTER L WITH STROKE
180 0xA4, "¤", # ISOnum : CURRENCY SIGN
181 0xA5, "Ľ", # ISOlat2: LATIN CAPITAL LETTER L WITH CARON
182 0xA6, "Ś", # ISOlat2: LATIN CAPITAL LETTER S WITH ACUTE
183 0xA7, "§", # ISOnum : SECTION SIGN
184 0xA8, "¨", # ISOdia : DIAERESIS
185 0xA9, "Š", # ISOlat2: LATIN CAPITAL LETTER S WITH CARON
186 0xAA, "Ş", # ISOlat2: LATIN CAPITAL LETTER S WITH CEDILLA
187 0xAB, "Ť", # ISOlat2: LATIN CAPITAL LETTER T WITH CARON
188 0xAC, "Ź", # ISOlat2: LATIN CAPITAL LETTER Z WITH ACUTE
189 0xAD, "­", # ISOnum : SOFT HYPHEN
190 0xAE, "Ž", # ISOlat2: LATIN CAPITAL LETTER Z WITH CARON
191 0xAF, "Ż", # ISOlat2: LATIN CAPITAL LETTER Z WITH DOT
192 # ABOVE
193 0xB0, "°", # ISOnum : DEGREE SIGN
194 0xB1, "ą", # ISOlat2: LATIN SMALL LETTER A WITH OGONEK
195 0xB2, "˛", # ISOdia : OGONEK
196 0xB3, "ł", # ISOlat2: LATIN SMALL LETTER L WITH STROKE
197 0xB4, "´", # ISOdia : ACUTE ACCENT
198 0xB5, "ľ", # ISOlat2: LATIN SMALL LETTER L WITH CARON
199 0xB6, "ś", # ISOlat2: LATIN SMALL LETTER S WITH ACUTE
200 0xB7, "ˇ", # ISOdia : CARON
201 0xB8, "¸", # ISOdia : CEDILLA
202 0xB9, "š", # ISOlat2: LATIN SMALL LETTER S WITH CARON
203 0xBA, "ş", # ISOlat2: LATIN SMALL LETTER S WITH CEDILLA
204 0xBB, "ť", # ISOlat2: LATIN SMALL LETTER T WITH CARON
205 0xBC, "ź", # ISOlat2: LATIN SMALL LETTER Z WITH ACUTE
206 0xBD, "˝", # ISOdia : DOUBLE ACUTE ACCENT
207 0xBE, "ž", # ISOlat2: LATIN SMALL LETTER Z WITH CARON
208 0xBF, "ż", # ISOlat2: LATIN SMALL LETTER Z WITH DOT ABOVE
209 0xC0, "Ŕ", # ISOlat2: LATIN CAPITAL LETTER R WITH ACUTE
210 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE
211 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH
212 # CIRCUMFLEX
213 0xC3, "Ă", # ISOlat2: LATIN CAPITAL LETTER A WITH BREVE
214 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH
215 # DIAERESIS
216 0xC5, "Ĺ", # ISOlat2: LATIN CAPITAL LETTER L WITH ACUTE
217 0xC6, "Ć", # ISOlat2: LATIN CAPITAL LETTER C WITH ACUTE
218 0xC7, "Ç", # ISOlat2: LATIN CAPITAL LETTER C WITH CEDILLA
219 0xC8, "Č", # ISOlat2: LATIN CAPITAL LETTER C WITH CARON
220 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE
221 0xCA, "Ę", # ISOlat2: LATIN CAPITAL LETTER E WITH OGONEK
222 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH
223 # DIAERESIS
224 0xCC, "Ě", # ISOlat2: LATIN CAPITAL LETTER E WITH CARON
225 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE
226 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH
227 # CIRCUMFLEX
228 0xCF, "Ď", # ISOlat2: LATIN CAPITAL LETTER D WITH CARON
229 0xD0, "Đ", # ISOlat2: LATIN CAPITAL LETTER D WITH STROKE
230 0xD1, "Ń", # ISOlat2: LATIN CAPITAL LETTER N WITH ACUTE
231 0xD2, "Ň", # ISOlat2: LATIN CAPITAL LETTER N WITH CARON
232 0xD3, "Ó", # ISOlat1: LATIN CAPITAL LETTER O WITH ACUTE
233 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH
234 # CIRCUMFLEX
235 0xD5, "Ő", # ISOlat2: LATIN CAPITAL LETTER O WITH DOUBLE
236 # ACUTE
237 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH
238 # DIAERESIS
239 0xD7, "×", # ISOnum : MULTIPLICATION SIGN
240 0xD8, "Ř", # ISOlat2: LATIN CAPITAL LETTER R WITH CARON
241 0xD9, "Ů", # ISOlat2: LATIN CAPITAL LETTER U WITH RING
242 # ABOVE
243 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE
244 0xDB, "Ű", # ISOlat2: LATIN CAPITAL LETTER U WITH DOUBLE
245 # ACUTE
246 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH
247 # DIAERESIS
248 0xDD, "Ý", # ISOlat2: LATIN CAPITAL LETTER Y WITH ACUTE
249 0xDE, "Ţ", # ISOlat2: LATIN CAPITAL LETTER T WITH CEDILLA
250 0xDF, "ß", # ISOlat1: LATIN SMALL LETTER SHARP S (German)
251 0xE0, "ŕ", # ISOlat2: LATIN SMALL LETTER R WITH ACUTE
252 0xE1, "á", # ISOlat1: LATIN SMALL LETTER A WITH ACUTE
253 0xE2, "â", # ISOlat1: LATIN SMALL LETTER A WITH CIRCUMFLEX
254 0xE3, "ă", # ISOlat2: LATIN SMALL LETTER A WITH BREVE
255 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER A WITH DIAERESIS
256 0xE5, "ĺ", # ISOlat2: LATIN SMALL LETTER L WITH ACUTE
257 0xE6, "ć", # ISOlat2: LATIN SMALL LETTER C WITH ACUTE
258 0xE7, "ç", # ISOlat1: LATIN SMALL LETTER C WITH CEDILLA
259 0xE8, "č", # ISOlat2: LATIN SMALL LETTER C WITH CARON
260 0xE9, "é", # ISOlat1: LATIN SMALL LETTER E WITH ACUTE
261 0xEA, "ę", # ISOlat2: LATIN SMALL LETTER E WITH OGONEK
262 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER E WITH DIAERESIS
263 0xEC, "ě", # ISOlat2: LATIN SMALL LETTER E WITH CARON
264 0xED, "í", # ISOlat1: LATIN SMALL LETTER I WITH ACUTE
265 0xEE, "î", # ISOlat1: LATIN SMALL LETTER I WITH CIRCUMFLEX
266 0xEF, "ď", # ISOlat2: LATIN SMALL LETTER D WITH CARON
267 0xF0, "đ", # ISOlat2: LATIN SMALL LETTER D WITH STROKE
268 0xF1, "ń", # ISOlat2: LATIN SMALL LETTER N WITH ACUTE
269 0xF2, "ň", # ISOlat2: LATIN SMALL LETTER N WITH CARON
270 0xF3, "ó", # ISOlat1: LATIN SMALL LETTER O WITH ACUTE
271 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER O WITH CIRCUMFLEX
272 0xF5, "ő", # ISOlat2: LATIN SMALL LETTER O WITH DOUBLE
273 # ACUTE
274 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER O WITH DIAERESIS
275 0xF7, "÷", # ISOnum : DIVISION SIGN
276 0xF8, "ř", # ISOlat2: LATIN SMALL LETTER R WITH CARON
277 0xF9, "ů", # ISOlat2: LATIN SMALL LETTER U WITH RING ABOVE
278 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER U WITH ACUTE
279 0xFB, "ű", # ISOlat2: LATIN SMALL LETTER U WITH DOUBLE
280 # ACUTE
281 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER U WITH DIAERESIS
282 0xFD, "ý", # ISOlat1: LATIN SMALL LETTER Y WITH ACUTE
283 0xFE, "ţ", # ISOlat2: LATIN SMALL LETTER T WITH CEDILLA
284 0xFF, "˙", # ISOdia : DOT ABOVE
285);
286*LATIN2_To_Ent = *ISO_8859_2_To_Ent;
287
288##---------------------------------------------------------------------------
289## ISO-8859-3: Latin-3
290##---------------------------------------------------------------------------
291
292%ISO_8859_3_To_Ent = (
293 #--------------------------------------------------------------------------
294 # Hex Code Entity Ref # ISO external entity and description
295 #--------------------------------------------------------------------------
296 0xA1, "Ħ", # ISOlat2: LATIN CAPITAL LETTER H WITH STROKE
297 0xA2, "˘", # ISOdia : BREVE
298 0xA3, "£", # ISOnum : POUND SIGN
299 0xA4, "¤", # ISOnum : CURRENCY SIGN
300 0xA6, "Ĥ", # ISOlat2: LATIN CAPITAL LETTER H WITH
301 # CIRCUMFLEX
302 0xA7, "§", # ISOnum : SECTION SIGN
303 0xA8, "¨", # ISOdia : DIAERESIS
304 0xA9, "İ", # ISOlat2: LATIN CAPITAL LETTER I WITH DOT
305 # ABOVE
306 0xAA, "Ş", # ISOlat2: LATIN CAPITAL LETTER S WITH CEDILLA
307 0xAB, "Ğ", # ISOlat2: LATIN CAPITAL LETTER G WITH BREVE
308 0xAC, "Ĵ", # ISOlat2: LATIN CAPITAL LETTER J WITH
309 # CIRCUMFLEX
310 0xAD, "­", # ISOnum : SOFT HYPHEN
311 0xAF, "Ż", # ISOlat2: LATIN CAPITAL LETTER Z WITH DOT
312 # ABOVE
313 0xB0, "°", # ISOnum : DEGREE SIGN
314 0xB1, "ħ", # ISOlat2: LATIN SMALL LETTER H WITH STROKE
315 0xB2, "²", # ISOnum : SUPERSCRIPT TWO
316 0xB3, "³", # ISOnum : SUPERSCRIPT THREE
317 0xB4, "´", # ISOdia : ACUTE ACCENT
318 0xB5, "µ", # ISOnum : MICRO SIGN
319 0xB6, "ĥ", # ISOlat2: LATIN SMALL LETTER H WITH
320 # CIRCUMFLEX
321 0xB7, "·", # ISOnum : MIDDLE DOT
322 0xB8, "¸", # ISOdia : CEDILLA
323 0xB9, "ı", # ISOlat2: LATIN SMALL LETTER I DOTLESS
324 0xBA, "ş", # ISOlat2: LATIN SMALL LETTER S WITH CEDILLA
325 0xBB, "ğ", # ISOlat2: LATIN SMALL LETTER G WITH BREVE
326 0xBC, "ĵ", # ISOlat2: LATIN SMALL LETTER J WITH CIRCUMFLEX
327 0xBD, "½", # ISOnum : VULGAR FRACTION ONE HALF
328 0xBF, "ż", # ISOlat2: LATIN SMALL LETTER Z WITH DOT ABOVE
329 0xC0, "À", # ISOlat1: LATIN CAPITAL LETTER A WITH GRAVE
330 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE
331 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH
332 # CIRCUMFLEX
333 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH
334 # DIAERESIS
335 0xC5, "Ċ", # ISOlat2: LATIN CAPITAL LETTER C WITH DOT
336 # ABOVE
337 0xC6, "Ĉ", # ISOlat2: LATIN CAPITAL LETTER C WITH
338 # CIRCUMFLEX
339 0xC7, "Ç", # ISOlat2: LATIN CAPITAL LETTER C WITH CEDILLA
340 0xC8, "È", # ISOlat1: LATIN CAPITAL LETTER E WITH GRAVE
341 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE
342 0xCA, "Ê", # ISOlat2: LATIN CAPITAL LETTER E WITH
343 # CIRCUMFLEX
344 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH
345 # DIAERESIS
346 0xCC, "Ì", # ISOlat1: LATIN CAPITAL LETTER I WITH GRAVE
347 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE
348 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH
349 # CIRCUMFLEX
350 0xCF, "Ï", # ISOlat1: LATIN CAPITAL LETTER I WITH
351 # DIAERESIS
352 0xD1, "Ñ", # ISOlat1: LATIN CAPITAL LETTER N WITH TILDE
353 0xD2, "Ò", # ISOlat1: LATIN CAPITAL LETTER O WITH GRAVE
354 0xD3, "Ó", # ISOlat1: LATIN CAPITAL LETTER O WITH ACUTE
355 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH
356 # CIRCUMFLEX
357 0xD5, "Ġ", # ISOlat2: LATIN CAPITAL LETTER G WITH DOT
358 # ABOVE
359 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH
360 # DIAERESIS
361 0xD7, "×", # ISOnum : MULTIPLICATION SIGN
362 0xD8, "Ĝ", # ISOlat2: LATIN CAPITAL LETTER G WITH
363 # CIRCUMFLEX
364 0xD9, "Ù", # ISOlat1: LATIN CAPITAL LETTER U WITH GRAVE
365 # ABOVE
366 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE
367 0xDB, "Û", # ISOlat1: LATIN CAPITAL LETTER U WITH
368 # CIRCUMFLEX
369 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH
370 # DIAERESIS
371 0xDD, "Ŭ", # ISOlat2: LATIN CAPITAL LETTER U WITH BREVE
372 0xDE, "Ŝ", # ISOlat2: LATIN CAPITAL LETTER S WITH
373 # CIRCUMFLEX
374 0xDF, "ß", # ISOlat1: LATIN SMALL LETTER SHARP S (German)
375 0xE0, "à", # ISOlat1: LATIN SMALL LETTER A WITH GRAVE
376 0xE1, "á", # ISOlat1: LATIN SMALL LETTER A WITH ACUTE
377 0xE2, "â", # ISOlat1: LATIN SMALL LETTER A WITH CIRCUMFLEX
378 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER A WITH DIAERESIS
379 0xE5, "ċ", # ISOlat2: LATIN SMALL LETTER C WITH DOT ABOVE
380 0xE6, "&ccirce;", # ISOlat2: LATIN SMALL LETTER C WITH
381 # CIRCUMFLEX
382 0xE7, "ç", # ISOlat1: LATIN SMALL LETTER C WITH CEDILLA
383 0xE8, "è", # ISOlat1: LATIN SMALL LETTER E WITH GRAVE
384 0xE9, "é", # ISOlat2: LATIN SMALL LETTER E WITH ACUTE
385 0xEA, "ê", # ISOlat2: LATIN SMALL LETTER E WITH
386 # CIRCUMFLEX
387 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER E WITH DIAERESIS
388 0xEC, "ì", # ISOlat1: LATIN SMALL LETTER I WITH GRAVE
389 0xED, "í", # ISOlat1: LATIN SMALL LETTER I WITH ACUTE
390 0xEE, "î", # ISOlat1: LATIN SMALL LETTER I WITH CIRCUMFLEX
391 0xEF, "ï", # ISOlat1: LATIN SMALL LETTER I WITH DIAERESIS
392 0xF1, "ñ", # ISOlat1: LATIN SMALL LETTER N WITH TILDE
393 0xF2, "ò", # ISOlat1: LATIN SMALL LETTER O WITH GRAVE
394 0xF3, "ó", # ISOlat1: LATIN SMALL LETTER O WITH ACUTE
395 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER O WITH CIRCUMFLEX
396 0xF5, "ġ", # ISOlat2: LATIN SMALL LETTER G WITH DOT ABOVE
397 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER O WITH DIAERESIS
398 0xF7, "÷", # ISOnum : DIVISION SIGN
399 0xF8, "ĝ", # ISOlat2: LATIN SMALL LETTER G WITH
400 # CIRCUMFLEX
401 0xF9, "ù", # ISOlat1: LATIN SMALL LETTER U WITH GRAVE
402 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER U WITH ACUTE
403 0xFB, "û", # ISOlat1: LATIN SMALL LETTER U WITH
404 # CIRCUMFLEX
405 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER U WITH DIAERESIS
406 0xFD, "ŭ", # ISOlat2: LATIN SMALL LETTER U WITH BREVE
407 0xFE, "ŝ", # ISOlat2: LATIN SMALL LETTER S WITH
408 # CIRCUMFLEX
409 0xFF, "˙", # ISOdia : DOT ABOVE
410);
411*LATIN3_To_Ent = *ISO_8859_3_To_Ent;
412
413##---------------------------------------------------------------------------
414## ISO-8859-4: Latin-4
415##---------------------------------------------------------------------------
416
417%ISO_8859_4_To_Ent = (
418 #--------------------------------------------------------------------------
419 # Hex Code Entity Ref # ISO external entity and description
420 #--------------------------------------------------------------------------
421 0xA1, "Ą", # ISOlat2: LATIN CAPITAL LETTER A WITH OGONEK
422 0xA2, "ĸ", # ISOlat2: LATIN SMALL LETTER KRA (Greenlandic)
423 0xA3, "Ŗ", # ISOlat2: LATIN CAPITAL LETTER R WITH CEDILLA
424 0xA4, "¤", # ISOnum : CURRENCY SIGN
425 0xA5, "&Itilde", # ISOlat2: LATIN CAPITAL LETTER I WITH TILDE
426 0xA6, "Ļ", # ISOlat2: LATIN CAPITAL LETTER L WITH CEDILLA
427 0xA7, "§", # ISOnum : SECTION SIGN
428 0xA8, "¨", # ISOdia : DIAERESIS
429 0xA9, "Š", # ISOlat2: LATIN CAPITAL LETTER S WITH CARON
430 0xAA, "Ē", # ISOlat2: LATIN CAPITAL LETTER E WITH MACRON
431 0xAB, "Ģ", # ISOlat2: LATIN CAPITAL LETTER G WITH CEDILLA
432 0xAC, "Ŧ", # ISOlat2: LATIN CAPITAL LETTER T WITH STROKE
433 0xAD, "­", # ISOnum : SOFT HYPHEN
434 0xAE, "&Zcaron", # ISOlat2: LATIN CAPITAL LETTER Z WITH CARON
435 0xAF, "¯", # ISOdia : MACRON
436 0xB0, "°", # ISOnum : DEGREE SIGN
437 0xB1, "ą", # ISOlat2: LATIN SMALL LETTER A WITH OGONEK
438 0xB2, "˛", # ISOdia : OGONEK
439 0xB3, "ŗ", # ISOlat2: LATIN SMALL LETTER R WITH CEDILLA
440 0xB4, "´", # ISOdia : ACUTE ACCENT
441 0xB5, "ĩ", # ISOlat2: LATIN SMALL LETTER I WITH TILDE
442 0xB6, "ļ", # ISOlat2: LATIN SMALL LETTER L WITH CEDILLA
443 0xB7, "ˇ", # ISOdia : CARON
444 0xB8, "¸", # ISOdia : CEDILLA
445 0xB9, "š", # ISOlat2: LATIN SMALL LETTER S WITH CARON
446 0xBA, "ē", # ISOlat2: LATIN SMALL LETTER E WITH MACRON
447 0xBB, "&gcedil;", # ISOlat2: LATIN SMALL LETTER G WITH CEDILLA
448 0xBC, "ŧ", # ISOlat2: LATIN SMALL LETTER J WITH STROKE
449 0xBD, "Ŋ", # ISOlat2: LATIN CAPITAL LETTER ENG (Lappish)
450 0xBE, "ž", # ISOlat2: LATIN SMALL LETTER Z WITH CARON
451 0xBF, "ŋ", # ISOlat2: LATIN SMALL LETTER ENG (Lappish)
452 0xC0, "Ā", # ISOlat1: LATIN CAPITAL LETTER A WITH MACRON
453 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE
454 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH
455 # CIRCUMFLEX
456 0xC3, "&Atilde", # ISOlat1: LATIN CAPITAL LETTER A WITH TILDE
457 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH
458 # DIAERESIS
459 0xC5, "Å", # ISOlat1: LATIN CAPITAL LETTER A WITH RING
460 # ABOVE
461 0xC6, "Æ", # ISOlat1: LATIN CAPITAL LETTER AE
462 0xC7, "Į", # ISOlat2: LATIN CAPITAL LETTER I WITH OGONEK
463 0xC8, "Č", # ISOlat2: LATIN CAPITAL LETTER C WITH CARON
464 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE
465 0xCA, "Ę", # ISOlat2: LATIN CAPITAL LETTER E WITH OGONEK
466 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH
467 # DIAERESIS
468 0xCC, "Ė", # ISOlat1: LATIN CAPITAL LETTER E WITH DOT
469 # ABOVE
470 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE
471 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH
472 # CIRCUMFLEX
473 0xCF, "Ī", # ISOlat2: LATIN CAPITAL LETTER I WITH MACRON
474 0xD0, "&Dstrok", # ISOlat2: LATIN CAPITAL LETTER D WITH STROKE
475 0xD1, "Ņ", # ISOlat2: LATIN CAPITAL LETTER N WITH CEDILLA
476 0xD2, "Ō", # ISOlat2: LATIN CAPITAL LETTER O WITH MACRON
477 0xD3, "Ķ", # ISOlat2: LATIN CAPITAL LETTER K WITH CEDILLA
478 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH
479 # CIRCUMFLEX
480 0xD5, "Õ", # ISOlat1: LATIN CAPITAL LETTER O WITH TILDE
481 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH
482 # DIAERESIS
483 0xD7, "×", # ISOnum : MULTIPLICATION SIGN
484 0xD8, "Ø", # ISOlat1: LATIN CAPITAL LETTER O WITH STROKE
485 0xD9, "Ų", # ISOlat2: LATIN CAPITAL LETTER U WITH OGONEK
486 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE
487 0xDB, "Û", # ISOlat1: LATIN CAPITAL LETTER U WITH
488 # CIRCUMFLEX
489 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH
490 # DIAERESIS
491 0xDD, "Ũ", # ISOlat2: LATIN CAPITAL LETTER U WITH TILDE
492 0xDE, "Ū", # ISOlat2: LATIN CAPITAL LETTER U WITH MACRON
493 0xDF, "ß", # ISOlat1: LATIN SMALL LETTER SHARP S (German)
494 0xE0, "ā", # ISOlat1: LATIN SMALL LETTER A WITH MACRON
495 0xE1, "á", # ISOlat1: LATIN SMALL LETTER A WITH ACUTE
496 0xE2, "â", # ISOlat1: LATIN SMALL LETTER A WITH CIRCUMFLEX
497 0xE3, "&atilde", # ISOlat1: LATIN SMALL LETTER A WITH TILDE
498 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER A WITH DIAERESIS
499 0xE5, "å", # ISOlat1: LATIN SMALL LETTER A WITH RING ABOVE
500 0xE6, "æ", # ISOlat1: LATIN SMALL LETTER AE
501 0xE7, "į", # ISOlat2: LATIN SMALL LETTER I WITH OGONEK
502 0xE8, "č", # ISOlat2: LATIN SMALL LETTER C WITH CARON
503 0xE9, "é", # ISOlat2: LATIN SMALL LETTER E WITH ACUTE
504 0xEA, "ę", # ISOlat2: LATIN SMALL LETTER E WITH OGONEK
505 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER E WITH DIAERESIS
506 0xEC, "ė", # ISOlat2: LATIN SMALL LETTER E WITH DOT ABOVE
507 0xED, "í", # ISOlat1: LATIN SMALL LETTER I WITH ACUTE
508 0xEE, "î", # ISOlat1: LATIN SMALL LETTER I WITH CIRCUMFLEX
509 0xEF, "ī", # ISOlat2: LATIN SMALL LETTER I WITH MACRON
510 0xF0, "đ", # ISOlat2: LATIN SMALL LETTER D WITH STROKE
511 0xF1, "ņ", # ISOlat2: LATIN SMALL LETTER N WITH CEDILLA
512 0xF2, "ō", # ISOlat2: LATIN SMALL LETTER O WITH MACRON
513 0xF3, "ķ", # ISOlat2: LATIN SMALL LETTER K WITH CEDILLA
514 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER O WITH CIRCUMFLEX
515 0xF5, "õ", # ISOlat1: LATIN SMALL LETTER O WITH TILDE
516 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER O WITH DIAERESIS
517 0xF7, "÷", # ISOnum : DIVISION SIGN
518 0xF8, "ø", # ISOlat1: LATIN SMALL LETTER O WITH STROKE
519 0xF9, "ų", # ISOlat2: LATIN SMALL LETTER U WITH OGONEK
520 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER U WITH ACUTE
521 0xFB, "û", # ISOlat1: LATIN SMALL LETTER U WITH
522 # CIRCUMFLEX
523 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER U WITH DIAERESIS
524 0xFD, "ũ", # ISOlat2: LATIN SMALL LETTER U WITH TILDE
525 0xFE, "ū", # ISOlat2: LATIN SMALL LETTER U WITH MACRON
526 0xFF, "˙", # ISOdia : DOT ABOVE
527);
528*LATIN4_To_Ent = *ISO_8859_4_To_Ent;
529
530##---------------------------------------------------------------------------
531## ISO-8859-5: Cyrillic
532##---------------------------------------------------------------------------
533
534%ISO_8859_5_To_Ent = (
535 #--------------------------------------------------------------------------
536 # Hex Code Entity Ref # ISO external entity and description
537 #--------------------------------------------------------------------------
538 0xA1, "Ё", # ISOcyr1: CYRILLIC CAPITAL LETTER IO
539 0xA2, "Ђ", # ISOcyr2: CYRILLIC CAPITAL LETTER DJE
540 # (Serbocroatian)
541 0xA3, "Ѓ", # ISOcyr2: CYRILLIC CAPITAL LETTER GJE
542 # (Macedonian)
543 0xA4, "Е", # ISOcyr1: CYRILLIC CAPITAL LETTER UKRAINIAN IE
544 0xA5, "Ѕ", # ISOcyr2: CYRILLIC CAPITAL LETTER DZE
545 # (Macedonian)
546 0xA6, "І", # ISOcyr2: CYRILLIC CAPITAL LETTER
547 # BYELORUSSIAN-UKRAINIAN I
548 0xA7, "Ї", # ISOcyr2: CYRILLIC CAPITAL LETTER YI
549 # (Ukrainian)
550 0xA8, "Є", # ISOcyr2: CYRILLIC CAPITAL LETTER JE
551 0xA9, "Љ", # ISOcyr2: CYRILLIC CAPITAL LETTER LJE
552 0xAA, "Њ", # ISOcyr2: CYRILLIC CAPITAL LETTER NJE
553 0xAB, "Ћ", # ISOcyr2: CYRILLIC CAPITAL LETTER TSHE
554 # (Serbocroatian)
555 0xAC, "Ќ", # ISOcyr2: CYRILLIC CAPITAL LETTER KJE
556 # (Macedonian)
557 0xAD, "­", # ISOnum : SOFT HYPHEN
558 0xAE, "Ў", # ISOcyr2: CYRILLIC CAPITAL LETTER SHORT U
559 # (Byelorussian)
560 0xAF, "Џ", # ISOcyr2: CYRILLIC CAPITAL LETTER DZHE
561 0xB0, "А", # ISOcyr1: CYRILLIC CAPITAL LETTER A
562 0xB1, "Б", # ISOcyr1: CYRILLIC CAPITAL LETTER BE
563 0xB2, "В", # ISOcyr1: CYRILLIC CAPITAL LETTER VE
564 0xB3, "Г", # ISOcyr1: CYRILLIC CAPITAL LETTER GHE
565 0xB4, "Д", # ISOcyr1: CYRILLIC CAPITAL LETTER DE
566 0xB5, "Е", # ISOcyr1: CYRILLIC CAPITAL LETTER IE
567 0xB6, "Ж", # ISOcyr1: CYRILLIC CAPITAL LETTER ZHE
568 0xB7, "З", # ISOcyr1: CYRILLIC CAPITAL LETTER ZE
569 0xB8, "И", # ISOcyr1: CYRILLIC CAPITAL LETTER I
570 0xB9, "Й", # ISOcyr1: CYRILLIC CAPITAL LETTER SHORT I
571 0xBA, "К", # ISOcyr1: CYRILLIC CAPITAL LETTER KA
572 0xBB, "Л", # ISOcyr1: CYRILLIC CAPITAL LETTER EL
573 0xBC, "М", # ISOcyr1: CYRILLIC CAPITAL LETTER EM
574 0xBD, "Н", # ISOcyr1: CYRILLIC CAPITAL LETTER EN
575 0xBE, "О", # ISOcyr1: CYRILLIC CAPITAL LETTER O
576 0xBF, "П", # ISOcyr1: CYRILLIC CAPITAL LETTER PE
577 0xC0, "Р", # ISOcyr1: CYRILLIC CAPITAL LETTER ER
578 0xC1, "С", # ISOcyr1: CYRILLIC CAPITAL LETTER ES
579 0xC2, "Т", # ISOcyr1: CYRILLIC CAPITAL LETTER TE
580 0xC3, "У", # ISOcyr1: CYRILLIC CAPITAL LETTER U
581 0xC4, "Ф", # ISOcyr1: CYRILLIC CAPITAL LETTER EF
582 0xC5, "Х", # ISOcyr1: CYRILLIC CAPITAL LETTER HA
583 0xC6, "Ц", # ISOcyr1: CYRILLIC CAPITAL LETTER TSE
584 0xC7, "Ч", # ISOcyr1: CYRILLIC CAPITAL LETTER CHE
585 0xC8, "Ш", # ISOcyr1: CYRILLIC CAPITAL LETTER SHA
586 0xC9, "Щ", # ISOcyr1: CYRILLIC CAPITAL LETTER SHCHA
587 0xCA, "Ъ", # ISOcyr1: CYRILLIC CAPITAL LETTER HARD SIGN
588 0xCB, "Ы", # ISOcyr1: CYRILLIC CAPITAL LETTER YERU
589 0xCC, "Ь", # ISOcyr1: CYRILLIC CAPITAL LETTER SOFT SIGN
590 0xCD, "Э", # ISOcyr1: CYRILLIC CAPITAL LETTER E
591 0xCE, "Ю", # ISOcyr1: CYRILLIC CAPITAL LETTER YU
592 0xCF, "Я", # ISOcyr1: CYRILLIC CAPITAL LETTER YA
593 0xD0, "а", # ISOcyr1: CYRILLIC SMALL LETTER A
594 0xD1, "б", # ISOcyr1: CYRILLIC SMALL LETTER BE
595 0xD2, "в", # ISOcyr1: CYRILLIC SMALL LETTER VE
596 0xD3, "г", # ISOcyr1: CYRILLIC SMALL LETTER GHE
597 0xD4, "д", # ISOcyr1: CYRILLIC SMALL LETTER DE
598 0xD5, "е", # ISOcyr1: CYRILLIC SMALL LETTER IE
599 0xD6, "ж", # ISOcyr1: CYRILLIC SMALL LETTER ZHE
600 0xD7, "з", # ISOcyr1: CYRILLIC SMALL LETTER ZE
601 0xD8, "и", # ISOcyr1: CYRILLIC SMALL LETTER I
602 0xD9, "й", # ISOcyr1: CYRILLIC SMALL LETTER SHORT I
603 0xDA, "к", # ISOcyr1: CYRILLIC SMALL LETTER KA
604 0xDB, "л", # ISOcyr1: CYRILLIC SMALL LETTER EL
605 0xDC, "м", # ISOcyr1: CYRILLIC SMALL LETTER EM
606 0xDD, "н", # ISOcyr1: CYRILLIC SMALL LETTER EN
607 0xDE, "о", # ISOcyr1: CYRILLIC SMALL LETTER O
608 0xDF, "п", # ISOcyr1: CYRILLIC SMALL LETTER PE
609 0xE0, "р", # ISOcyr1: CYRILLIC SMALL LETTER ER
610 0xE1, "с", # ISOcyr1: CYRILLIC SMALL LETTER ES
611 0xE2, "т", # ISOcyr1: CYRILLIC SMALL LETTER TE
612 0xE3, "у", # ISOcyr1: CYRILLIC SMALL LETTER U
613 0xE4, "ф", # ISOcyr1: CYRILLIC SMALL LETTER EF
614 0xE5, "х", # ISOcyr1: CYRILLIC SMALL LETTER HA
615 0xE6, "ц", # ISOcyr1: CYRILLIC SMALL LETTER TSE
616 0xE7, "ч", # ISOcyr1: CYRILLIC SMALL LETTER CHE
617 0xE8, "ш", # ISOcyr1: CYRILLIC SMALL LETTER SHA
618 0xE9, "щ", # ISOcyr1: CYRILLIC SMALL LETTER SHCHA
619 0xEA, "ъ", # ISOcyr1: CYRILLIC SMALL LETTER HARD SIGN
620 0xEB, "ы", # ISOcyr1: CYRILLIC SMALL LETTER YERU
621 0xEC, "ь", # ISOcyr1: CYRILLIC SMALL LETTER SOFT SIGN
622 0xED, "э", # ISOcyr1: CYRILLIC SMALL LETTER E
623 0xEE, "ю", # ISOcyr1: CYRILLIC SMALL LETTER YU
624 0xEF, "я", # ISOcyr1: CYRILLIC SMALL LETTER YA
625 0xF0, "№", # ISOcyr1: NUMERO SIGN
626 0xF1, "ё", # ISOcyr1: CYRILLIC SMALL LETTER IO
627 0xF2, "ђ", # ISOcyr2: CYRILLIC SMALL LETTER DJE
628 # (Serbocroatian)
629 0xF3, "ѓ", # ISOcyr2: CYRILLIC SMALL LETTER GJE
630 # (Macedonian)
631 0xF4, "е", # ISOcyr1: CYRILLIC SMALL LETTER UKRAINIAN IE
632 0xF5, "ѕ", # ISOcyr2: CYRILLIC SMALL LETTER DZE
633 # (Macedonian)
634 0xF6, "і", # ISOcyr2: CYRILLIC SMALL LETTER
635 # BYELORUSSIAN-UKRAINIAN I
636 0xF7, "ї", # ISOcyr2: CYRILLIC SMALL LETTER YI
637 # (Ukrainian)
638 0xF8, "ј", # ISOcyr2: CYRILLIC SMALL LETTER JE
639 0xF9, "љ", # ISOcyr2: CYRILLIC SMALL LETTER LJE
640 0xFA, "њ", # ISOcyr2: CYRILLIC SMALL LETTER NJE
641 0xFB, "ћ", # ISOcyr2: CYRILLIC SMALL LETTER TSHE
642 # (Serbocroatian)
643 0xFC, "ќ", # ISOcyr2: CYRILLIC SMALL LETTER KJE
644 # (Macedonian)
645 0xFD, "§", # ISOnum : SECTION SIGN
646 0xFE, "ў", # ISOcyr2: CYRILLIC SMALL LETTER SHORT U
647 # (Byelorussian)
648 0xFF, "џ", # ISOcyr2: CYRILLIC SMALL LETTER DZHE
649);
650
651##---------------------------------------------------------------------------
652## ISO-8859-6: Arabic
653##---------------------------------------------------------------------------
654## Note: There is no ISO entities for arabic characters. Some of
655## the following are non-standard entity references. "ISOarbc"
656## is used as the entity defining the Arabic entities in
657## anticipation that ISO will define such an entity.
658
659%ISO_8859_6_To_Ent = (
660 #--------------------------------------------------------------------------
661 # Hex Code Entity Ref # ISO external entity and description
662 #--------------------------------------------------------------------------
663 0xA4, "¤", # ISOnum : CURRENCY SIGN
664 0xAC, "&arcomma;", # ISOarbc: ARABIC COMMA
665 0xAD, "­", # ISOnum : SOFT HYPHEN
666 0xBB, "&arsemi;", # ISOarbc: ARABIC SEMICOLON
667 0xBF, "&arquest;", # ISOarbc: ARABIC QUESTION MARK
668 0xC1, "&hamz;", # ISOarbc: ARABIC LETTER HAMZA
669 0xC2, "&alefmadd;", # ISOarbc: ARABIC LETTER ALEF WITH MADDA ABOVE
670 0xC3, "&alefhamz;", # ISOarbc: ARABIC LETTER ALEF WITH HAMZA ABOVE
671 0xC4, "&wawhamz;", # ISOarbc: ARABIC LETTER WAW WITH HAMZA ABOVE
672 0xC5, "&alefhamz;", # ISOarbc: ARABIC LETTER ALEF WITH HAMZA BELOW
673 0xC6, "&yehhamz;", # ISOarbc: ARABIC LETTER YEH WITH HAMZA ABOVE
674 0xC7, "&alef;", # ISOarbc: ARABIC LETTER ALEF
675 0xC8, "&beh;", # ISOarbc: ARABIC LETTER BEH
676 0xC9, "&tehmarb;", # ISOarbc: ARABIC LETTER TEH MARBUTA
677 0xCA, "&teh;", # ISOarbc: ARABIC LETTER TEH
678 0xCB, "&theh;", # ISOarbc: ARABIC LETTER THEH
679 0xCC, "&jeem;", # ISOarbc: ARABIC LETTER JEEM
680 0xCD, "&hah;", # ISOarbc: ARABIC LETTER HAH
681 0xCE, "&khah;", # ISOarbc: ARABIC LETTER KHAH
682 0xCF, "&dal;", # ISOarbc: ARABIC LETTER DAL
683 0xD0, "&thal;", # ISOarbc: ARABIC LETTER THAL
684 0xD1, "&reh;", # ISOarbc: ARABIC LETTER REH
685 0xD2, "&zain;", # ISOarbc: ARABIC LETTER ZAIN
686 0xD3, "&seen;", # ISOarbc: ARABIC LETTER SEEN
687 0xD4, "&sheen;", # ISOarbc: ARABIC LETTER SHEEN
688 0xD5, "&sad;", # ISOarbc: ARABIC LETTER SAD
689 0xD6, "&dad;", # ISOarbc: ARABIC LETTER DAD
690 0xD7, "&tah;", # ISOarbc: ARABIC LETTER TAH
691 0xD8, "&zah;", # ISOarbc: ARABIC LETTER ZAH
692 0xD9, "&ain;", # ISOarbc: ARABIC LETTER AIN
693 0xDA, "&ghain;", # ISOarbc: ARABIC LETTER GHAIN
694 0xE0, "&tatweel;", # ISOarbc: ARABIC TATWEEL
695 0xE1, "&feh;", # ISOarbc: ARABIC LETTER FEH
696 0xE2, "&qaf;", # ISOarbc: ARABIC LETTER QAF
697 0xE3, "&kaf;", # ISOarbc: ARABIC LETTER KAF
698 0xE4, "&lam;", # ISOarbc: ARABIC LETTER LAM
699 0xE5, "&meem;", # ISOarbc: ARABIC LETTER MEEM
700 0xE6, "&noon;", # ISOarbc: ARABIC LETTER NOON
701 0xE7, "&heh;", # ISOarbc: ARABIC LETTER HEH
702 0xE8, "&waw;", # ISOarbc: ARABIC LETTER WAW
703 0xE9, "&alefmaks;", # ISOarbc: ARABIC LETTER ALEF MAKSURA
704 0xEA, "&yeh;", # ISOarbc: ARABIC LETTER YEH
705 0xEB, "&fathatan;", # ISOarbc: ARABIC FATHATAN
706 0xEC, "&dammatan;", # ISOarbc: ARABIC DAMMATAN
707 0xED, "&kasratan;", # ISOarbc: ARABIC KASRATAN
708 0xEE, "&fatha;", # ISOarbc: ARABIC FATHA
709 0xEF, "&damma;", # ISOarbc: ARABIC DAMMA
710 0xF0, "&kasra;", # ISOarbc: ARABIC KASRA
711 0xF1, "&shadda;", # ISOarbc: ARABIC SHADDA
712 0xF2, "&sukun;", # ISOarbc: ARABIC SUKUN
713);
714
715##---------------------------------------------------------------------------
716## ISO-8859-7: Greek
717##---------------------------------------------------------------------------
718
719%ISO_8859_7_To_Ent = (
720 #--------------------------------------------------------------------------
721 # Hex Code Entity Ref # ISO external entity and description
722 #--------------------------------------------------------------------------
723 0xA1, "‘", # ISOnum : SINGLE HIGH-REVERSED-9 QUOTATION
724 # MARK
725 0xA2, "’", # ISOnum : RIGHT SINGLE QUOTATION MARK
726 0xA3, "£", # ISOnum : POUND SIGN
727 0xA6, "¦", # ISOnum : BROKEN BAR
728 0xA7, "§", # ISOnum : SECTION SIGN
729 0xA8, "¨", # ISOdia : DIAERESIS
730 0xA9, "©", # ISOnum : COPYRIGHT SIGN
731 0xAB, "«", # ISOnum : LEFT-POINTING DOUBLE ANGLE
732 # QUOTATION MARK
733 0xAC, "¬", # ISOnum : NOT SIGN
734 0xAD, "­", # ISOnum : SOFT HYPHEN
735 0xAF, "—", # ISOpub : EM DASH
736 0xB0, "°", # ISOnum : DEGREE SIGN
737 0xB1, "±", # ISOnum : PLUS-MINUS SIGN
738 0xB2, "²", # ISOnum : SUPERSCRIPT TWO
739 0xB3, "³", # ISOnum : SUPERSCRIPT THREE
740 0xB4, "´", # ISOdia : ACUTE ACCENT
741 0xB5, "&diagr;", # ISOgrk?: ACUTE ACCENT AND DIAERESIS
742 # (Tonos and Dialytika)
743 0xB6, "&Aacgr;", # ISOgrk2: GREEK CAPITAL LETTER ALPHA WITH
744 # ACUTE
745 0xB7, "·", # ISOnum : MIDDLE DOT
746 0xB8, "&Eacgr;", # ISOgrk2: GREEK CAPITAL LETTER EPSILON WITH
747 # ACUTE
748 0xB9, "&EEacgr;", # ISOgrk2: GREEK CAPITAL LETTER ETA WITH ACUTE
749 0xBA, "&Iacgr;", # ISOgrk2: GREEK CAPITAL LETTER IOTA WITH ACUTE
750 0xBB, "»", # ISOnum : RIGHT-POINTING DOUBLE ANGLE
751 # QUOTATION MARK
752 0xBC, "&Oacgr;", # ISOgrk2: GREEK CAPITAL LETTER OMICRON WITH
753 # ACUTE
754 0xBD, "½", # ISOnum : VULGAR FRACTION ONE HALF
755 0xBE, "&Uacgr;", # ISOgrk2: GREEK CAPITAL LETTER UPSILON WITH
756 # ACUTE
757 0xBF, "&OHacgr;", # ISOgrk2: GREEK CAPITAL LETTER OMEGA WITH
758 # ACUTE
759 0xC0, "&idiagr;", # ISOgrk2: GREEK SMALL LETTER IOTA WITH ACUTE
760 # AND DIAERESIS
761 0xC1, "&Agr;", # ISOgrk1: GREEK CAPITAL LETTER ALPHA
762 0xC2, "&Bgr;", # ISOgrk1: GREEK CAPITAL LETTER BETA
763 0xC3, "&Ggr;", # ISOgrk1: GREEK CAPITAL LETTER GAMMA
764 0xC4, "&Dgr;", # ISOgrk1: GREEK CAPITAL LETTER DELTA
765 0xC5, "&Egr;", # ISOgrk1: GREEK CAPITAL LETTER EPSILON
766 0xC6, "&Zgr;", # ISOgrk1: GREEK CAPITAL LETTER ZETA
767 0xC7, "&EEgr;", # ISOgrk1: GREEK CAPITAL LETTER ETA
768 0xC8, "&THgr;", # ISOgrk1: GREEK CAPITAL LETTER THETA
769 0xC9, "&Igr;", # ISOgrk1: GREEK CAPITAL LETTER IOTA
770 0xCA, "&Kgr;", # ISOgrk1: GREEK CAPITAL LETTER KAPPA
771 0xCB, "&Lgr;", # ISOgrk1: GREEK CAPITAL LETTER LAMDA
772 0xCC, "&Mgr;", # ISOgrk1: GREEK CAPITAL LETTER MU
773 0xCD, "&Ngr;", # ISOgrk1: GREEK CAPITAL LETTER NU
774 0xCE, "&Xgr;", # ISOgrk1: GREEK CAPITAL LETTER XI
775 0xCF, "&Ogr;", # ISOgrk1: GREEK CAPITAL LETTER OMICRON
776 0xD0, "&Pgr;", # ISOgrk1: GREEK CAPITAL LETTER PI
777 0xD1, "&Rgr;", # ISOgrk1: GREEK CAPITAL LETTER RHO
778 0xD3, "&Sgr;", # ISOgrk1: GREEK CAPITAL LETTER SIGMA
779 0xD4, "&Tgr;", # ISOgrk1: GREEK CAPITAL LETTER TAU
780 0xD5, "&Ugr;", # ISOgrk1: GREEK CAPITAL LETTER UPSILON
781 0xD6, "&PHgr;", # ISOgrk1: GREEK CAPITAL LETTER PHI
782 0xD7, "&KHgr;", # ISOgrk1: GREEK CAPITAL LETTER CHI
783 0xD8, "&PSgr;", # ISOgrk1: GREEK CAPITAL LETTER PSI
784 0xD9, "&OHgr;", # ISOgrk1: GREEK CAPITAL LETTER OMEGA
785 0xDA, "&Idigr;", # ISOgrk2: GREEK CAPITAL LETTER IOTA WITH
786 # DIAERESIS
787 0xDB, "&Udigr;", # ISOgrk2: GREEK CAPITAL LETTER UPSILON WITH
788 # DIAERESIS
789 0xDC, "&aacgr;", # ISOgrk2: GREEK SMALL LETTER ALPHA WITH ACUTE
790 0xDD, "&eacgr;", # ISOgrk2: GREEK SMALL LETTER EPSILON WITH
791 # ACUTE
792 0xDE, "&eeacgr;", # ISOgrk2: GREEK SMALL LETTER ETA WITH ACUTE
793 0xDF, "&iacgr;", # ISOgrk2: GREEK SMALL LETTER IOTA WITH ACUTE
794 0xE0, "&udiagr;", # ISOgrk2: GREEK SMALL LETTER UPSILON WITH
795 # ACUTE AND DIAERESIS
796 0xE1, "&agr;", # ISOgrk1: GREEK SMALL LETTER ALPHA
797 0xE2, "&bgr;", # ISOgrk1: GREEK SMALL LETTER BETA
798 0xE3, "&ggr;", # ISOgrk1: GREEK SMALL LETTER GAMMA
799 0xE4, "&dgr;", # ISOgrk1: GREEK SMALL LETTER DELTA
800 0xE5, "&egr;", # ISOgrk1: GREEK SMALL LETTER EPSILON
801 0xE6, "&zgr;", # ISOgrk1: GREEK SMALL LETTER ZETA
802 0xE7, "&eegr;", # ISOgrk1: GREEK SMALL LETTER ETA
803 0xE8, "&thgr;", # ISOgrk1: GREEK SMALL LETTER THETA
804 0xE9, "&igr;", # ISOgrk1: GREEK SMALL LETTER IOTA
805 0xEA, "&kgr;", # ISOgrk1: GREEK SMALL LETTER KAPPA
806 0xEB, "&lgr;", # ISOgrk1: GREEK SMALL LETTER LAMDA
807 0xEC, "&mgr;", # ISOgrk1: GREEK SMALL LETTER MU
808 0xED, "&ngr;", # ISOgrk1: GREEK SMALL LETTER NU
809 0xEE, "&xgr;", # ISOgrk1: GREEK SMALL LETTER XI
810 0xEF, "&ogr;", # ISOgrk1: GREEK SMALL LETTER OMICRON
811 0xF0, "&pgr;", # ISOgrk1: GREEK SMALL LETTER PI
812 0xF1, "&rgr;", # ISOgrk1: GREEK SMALL LETTER RHO
813 0xF2, "&sfgr;", # ISOgrk1: GREEK SMALL LETTER FINAL SIGMA
814 0xF3, "&sgr;", # ISOgrk1: GREEK SMALL LETTER SIGMA
815 0xF4, "&tgr;", # ISOgrk1: GREEK SMALL LETTER TAU
816 0xF5, "&ugr;", # ISOgrk1: GREEK SMALL LETTER UPSILON
817 0xF6, "&phgr;", # ISOgrk1: GREEK SMALL LETTER PHI
818 0xF7, "&khgr;", # ISOgrk1: GREEK SMALL LETTER CHI
819 0xF8, "&psgr;", # ISOgrk1: GREEK SMALL LETTER PSI
820 0xF9, "&ohgr;", # ISOgrk1: GREEK SMALL LETTER OMEGA
821 0xFA, "&idigr;", # ISOgrk2: GREEK SMALL LETTER IOTA WITH
822 # DIAERESIS
823 0xFB, "&udigr;", # ISOgrk2: GREEK SMALL LETTER UPSILON WITH
824 # DIAERESIS
825 0xFC, "&oacgr;", # ISOgrk2: GREEK SMALL LETTER OMICRON WITH
826 # ACUTE
827 0xFD, "&uacgr;", # ISOgrk2: GREEK SMALL LETTER UPSILON WITH
828 # ACUTE
829 0xFE, "&ohacgr;", # ISOgrk2: GREEK SMALL LETTER OMEGA WITH ACUTE
830);
831
832##---------------------------------------------------------------------------
833## ISO-8859-8: Hebrew
834##---------------------------------------------------------------------------
835## Note: There is no ISO entities for hebrew characters. ISOamso
836## defines a few characters, but they are for math purposes.
837## Some of the following are non-standard entity references.
838## "ISOhbrw" is used as the entity defining the Hebrew entities
839## in anticipation that ISO will define such an entity.
840
841%ISO_8859_8_To_Ent = (
842 #--------------------------------------------------------------------------
843 # Hex Code Entity Ref # ISO external entity and description
844 #--------------------------------------------------------------------------
845 0xA2, "¢", # ISOnum : CENT SIGN
846 0xA3, "£", # ISOnum : POUND SIGN
847 0xA4, "¤", # ISOnum : CURRENCY SIGN
848 0xA5, "¥", # ISOnum : YEN SIGN
849 0xA6, "¦", # ISOnum : BROKEN BAR
850 0xA7, "§", # ISOnum : SECTION SIGN
851 0xA8, "¨", # ISOdia : DIAERESIS
852 0xA9, "©", # ISOnum : COPYRIGHT SIGN
853 0xAA, "×", # ISOnum : MULTIPLICATION SIGN
854 0xAB, "«", # ISOnum : LEFT-POINTING DOUBLE ANGLE
855 # QUOTATION MARK
856 0xAC, "¬", # ISOnum : NOT SIGN
857 0xAD, "­", # ISOnum : SOFT HYPHEN
858 0xAE, "®", # ISOnum : REGISTERED SIGN
859 0xAF, "¯", # ISOdia : OVERLINE (MACRON)
860 0xB0, "°", # ISOnum : DEGREE SIGN
861 0xB1, "±", # ISOnum : PLUS-MINUS SIGN
862 0xB2, "²", # ISOnum : SUPERSCRIPT TWO
863 0xB3, "³", # ISOnum : SUPERSCRIPT THREE
864 0xB4, "´", # ISOdia : ACUTE ACCENT
865 0xB5, "µ", # ISOnum : MICRO SIGN
866 0xB6, "¶", # ISOnum : PILCROW SIGN
867 0xB7, "·", # ISOnum : MIDDLE DOT
868 0xB8, "¸", # ISOdia : CEDILLA
869 0xB9, "¹", # ISOnum : SUPERSCRIPT ONE
870 0xBA, "÷", # ISOlat1: DIVISION SIGN
871 0xBB, "»", # ISOnum : RIGHT-POINTING DOUBLE ANGLE
872 # QUOTATION MARK
873 0xBC, "¼", # ISOnum : VULGAR FRACTION ONE QUARTER
874 0xBD, "½", # ISOnum : VULGAR FRACTION ONE HALF
875 0xBE, "¾", # ISOnum : VULGAR FRACTION THREE QUARTERS
876 0xDF, "&dlowbar;", # ISOnum?: DOUBLE LOW LINE
877 0xE0, "&alef;", # ISOhbrw: HEBREW LETTER ALEF
878 0xE1, "&bet;", # ISOhbrw: HEBREW LETTER BET
879 0xE2, "ℷ", # ISOhbrw: HEBREW LETTER GIMEL
880 0xE3, "&dalet;", # ISOhbrw: HEBREW LETTER DALET
881 0xE4, "&he;", # ISOhbrw: HEBREW LETTER HE
882 0xE5, "&vav;", # ISOhbrw: HEBREW LETTER VAV
883 0xE6, "&zayin;", # ISOhbrw: HEBREW LETTER ZAYIN
884 0xE7, "&het;", # ISOhbrw: HEBREW LETTER HET
885 0xE8, "&tet;", # ISOhbrw: HEBREW LETTER TET
886 0xE9, "&yod;", # ISOhbrw: HEBREW LETTER YOD
887 0xEA, "&fkaf;", # ISOhbrw: HEBREW LETTER FINAL KAF
888 0xEB, "&kaf;", # ISOhbrw: HEBREW LETTER KAF
889 0xEC, "&lamed;", # ISOhbrw: HEBREW LETTER LAMED
890 0xED, "&fmem;", # ISOhbrw: HEBREW LETTER FINAL MEM
891 0xEE, "&mem;", # ISOhbrw: HEBREW LETTER MEM
892 0xEF, "&fnun;", # ISOhbrw: HEBREW LETTER FINAL NUN
893 0xF0, "&nun;", # ISOhbrw: HEBREW LETTER NUN
894 0xF1, "&samekh;", # ISOhbrw: HEBREW LETTER SAMEKH
895 0xF2, "&ayin;", # ISOhbrw: HEBREW LETTER AYIN
896 0xF3, "&fpe;", # ISOhbrw: HEBREW LETTER FINAL PE
897 0xF4, "&pe;", # ISOhbrw: HEBREW LETTER PE
898 0xF5, "&ftsadi;", # ISOhbrw: HEBREW LETTER FINAL TSADI
899 0xF6, "&tsadi;", # ISOhbrw: HEBREW LETTER TSADI
900 0xF7, "&qof;", # ISOhbrw: HEBREW LETTER QOF
901 0xF8, "&resh;", # ISOhbrw: HEBREW LETTER RESH
902 0xF9, "&shin;", # ISOhbrw: HEBREW LETTER SHIN
903 0xFA, "&tav;", # ISOhbrw: HEBREW LETTER TAV
904);
905
906##---------------------------------------------------------------------------
907## ISO-8859-9: Latin-5
908##---------------------------------------------------------------------------
909
910%ISO_8859_9_To_Ent = (
911 #--------------------------------------------------------------------------
912 # Hex Code Entity Ref # ISO external entity and description
913 #--------------------------------------------------------------------------
914 0xA1, "¡", # ISOnum : INVERTED EXCLAMATION MARK
915 0xA2, "¢", # ISOnum : CENT SIGN
916 0xA3, "£", # ISOnum : POUND SIGN
917 0xA4, "¤", # ISOnum : CURRENCY SIGN
918 0xA5, "¥", # ISOnum : YEN SIGN
919 0xA6, "¦", # ISOnum : BROKEN BAR
920 0xA7, "§", # ISOnum : SECTION SIGN
921 0xA8, "¨", # ISOdia : DIAERESIS
922 0xA9, "©", # ISOnum : COPYRIGHT SIGN
923 0xAA, "ª", # ISOnum : FEMININE ORDINAL INDICATOR
924 0xAB, "«", # ISOnum : LEFT-POINTING DOUBLE ANGLE
925 # QUOTATION MARK
926 0xAC, "¬", # ISOnum : NOT SIGN
927 0xAD, "­", # ISOnum : SOFT HYPHEN
928 0xAE, "®", # ISOnum : REGISTERED SIGN
929 0xAF, "¯", # ISOdia : OVERLINE (MACRON)
930 0xB0, "°", # ISOnum : DEGREE SIGN
931 0xB1, "±", # ISOnum : PLUS-MINUS SIGN
932 0xB2, "²", # ISOnum : SUPERSCRIPT TWO
933 0xB3, "³", # ISOnum : SUPERSCRIPT THREE
934 0xB4, "´", # ISOdia : ACUTE ACCENT
935 0xB5, "µ", # ISOnum : MICRO SIGN
936 0xB6, "¶", # ISOnum : PILCROW SIGN
937 0xB7, "·", # ISOnum : MIDDLE DOT
938 0xB8, "¸", # ISOdia : CEDILLA
939 0xB9, "¹", # ISOnum : SUPERSCRIPT ONE
940 0xBA, "º", # ISOnum : MASCULINE ORDINAL INDICATOR
941 0xBB, "»", # ISOnum : RIGHT-POINTING DOUBLE ANGLE
942 # QUOTATION MARK
943 0xBC, "¼", # ISOnum : VULGAR FRACTION ONE QUARTER
944 0xBD, "½", # ISOnum : VULGAR FRACTION ONE HALF
945 0xBE, "¾", # ISOnum : VULGAR FRACTION THREE QUARTERS
946 0xBF, "¿", # ISOnum : INVERTED QUESTION MARK
947 0xC0, "À", # ISOlat1: LATIN CAPITAL LETTER A WITH GRAVE
948 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE
949 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH
950 # CIRCUMFLEX
951 0xC3, "Ã", # ISOlat1: LATIN CAPITAL LETTER A WITH TILDE
952 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH
953 # DIAERESIS
954 0xC5, "Å", # ISOlat1: LATIN CAPITAL LETTER A WITH RING
955 # ABOVE
956 0xC6, "Æ", # ISOlat1: LATIN CAPITAL LETTER AE
957 0xC7, "Ç", # ISOlat1: LATIN CAPITAL LETTER C WITH CEDILLA
958 0xC8, "È", # ISOlat1: LATIN CAPITAL LETTER E WITH GRAVE
959 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE
960 0xCA, "Ê", # ISOlat1: LATIN CAPITAL LETTER E WITH
961 # CIRCUMFLEX
962 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH
963 # DIAERESIS
964 0xCC, "Ì", # ISOlat1: LATIN CAPITAL LETTER I WITH GRAVE
965 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE
966 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH
967 # CIRCUMFLEX
968 0xCF, "Ï", # ISOlat1: LATIN CAPITAL LETTER I WITH
969 # DIAERESIS
970 0xD0, "Ğ", # ISOlat2: LATIN CAPITAL LETTER G WITH BREVE
971 0xD1, "Ñ", # ISOlat1: LATIN CAPITAL LETTER N WITH TILDE
972 0xD2, "Ò", # ISOlat1: LATIN CAPITAL LETTER O WITH GRAVE
973 0xD3, "Ó", # ISOlat1: LATIN CAPITAL LETTER O WITH ACUTE
974 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH
975 # CIRCUMFLEX
976 0xD5, "Õ", # ISOlat1: LATIN CAPITAL LETTER O WITH TILDE
977 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH
978 # DIAERESIS
979 0xD7, "×", # ISOnum : MULTIPLICATION SIGN
980 0xD8, "Ø", # ISOlat1: LATIN CAPITAL LETTER O WITH STROKE
981 0xD9, "Ù", # ISOlat1: LATIN CAPITAL LETTER U WITH GRAVE
982 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE
983 0xDB, "Û", # ISOlat1: LATIN CAPITAL LETTER U WITH
984 # CIRCUMFLEX
985 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH
986 # DIAERESIS
987 0xDD, "İ", # ISOlat2: LATIN CAPITAL LETTER I WITH DOT
988 # ABOVE
989 0xDE, "Ş", # ISOlat2: LATIN CAPITAL LETTER S WITH CEDILLA
990 0xDF, "ß", # ISOlat1: LATIN SMALL LETTER SHARP S (German)
991 0xE0, "à", # ISOlat1: LATIN SMALL LETTER A WITH GRAVE
992 0xE1, "á", # ISOlat1: LATIN SMALL LETTER A WITH ACUTE
993 0xE2, "â", # ISOlat1: LATIN SMALL LETTER A WITH CIRCUMFLEX
994 0xE3, "ã", # ISOlat1: LATIN SMALL LETTER A WITH TILDE
995 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER A WITH DIAERESIS
996 0xE5, "å", # ISOlat1: LATIN SMALL LETTER A WITH RING ABOVE
997 0xE6, "æ", # ISOlat1: LATIN SMALL LETTER AE
998 0xE7, "ç", # ISOlat1: LATIN SMALL LETTER C WITH CEDILLA
999 0xE8, "è", # ISOlat1: LATIN SMALL LETTER E WITH GRAVE
1000 0xE9, "é", # ISOlat1: LATIN SMALL LETTER E WITH ACUTE
1001 0xEA, "ę", # ISOlat2: LATIN SMALL LETTER E WITH OGONEK
1002 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER E WITH DIAERESIS
1003 0xEC, "ė", # ISOlat2: LATIN SMALL LETTER E WITH DOT ABOVE
1004 0xED, "í", # ISOlat1: LATIN SMALL LETTER I WITH ACUTE
1005 0xEE, "î", # ISOlat1: LATIN SMALL LETTER I WITH CIRCUMFLEX
1006 0xEF, "ī", # ISOlat2: LATIN SMALL LETTER I WITH MACRON
1007 0xF0, "ğ", # ISOlat2: LATIN SMALL LETTER G WITH BREVE
1008 0xF1, "ñ", # ISOlat1: LATIN SMALL LETTER N WITH TILDE
1009 0xF2, "ò", # ISOlat1: LATIN SMALL LETTER O WITH GRAVE
1010 0xF3, "ó", # ISOlat1: LATIN SMALL LETTER O WITH ACUTE
1011 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER O WITH CIRCUMFLEX
1012 0xF5, "õ", # ISOlat1: LATIN SMALL LETTER O WITH TILDE
1013 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER O WITH DIAERESIS
1014 0xF7, "÷", # ISOlat1: DIVISION SIGN
1015 0xF8, "ø", # ISOlat1: LATIN SMALL LETTER O WITH STROKE
1016 0xF9, "ù", # ISOlat1: LATIN SMALL LETTER U WITH GRAVE
1017 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER U WITH ACUTE
1018 0xFB, "û", # ISOlat1: LATIN SMALL LETTER U WITH CIRCUMFLEX
1019 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER U WITH DIAERESIS
1020 0xFD, "ı", # ISOlat2: LATIN SMALL LETTER I DOTLESS
1021 0xFE, "ş", # ISOlat2: LATIN SMALL LETTER S WITH CEDILLA
1022 0xFF, "ÿ", # ISOlat1: LATIN SMALL LETTER Y WITH DIAERESIS
1023);
1024*LATIN5_To_Ent = *ISO_8859_9_To_Ent;
1025
1026##---------------------------------------------------------------------------
1027## ISO-8859-10: Latin-6
1028##---------------------------------------------------------------------------
1029
1030%ISO_8859_10_To_Ent = (
1031 #--------------------------------------------------------------------------
1032 # Hex Code Entity Ref # ISO external entity and description
1033 #--------------------------------------------------------------------------
1034 0xA1, "Ą", # ISOlat1: LATIN CAPITAL LETTER A WITH OGONEK
1035 0xA2, "Ē", # ISOlat2: LATIN CAPITAL LETTER E WITH MACRON
1036 0xA3, "Ģ", # ISOlat2: LATIN CAPITAL LETTER G WITH CEDILLA
1037 0xA4, "Ī", # ISOlat2: LATIN CAPITAL LETTER I WITH MACRON
1038 0xA5, "Ĩ", # ISOlat2: LATIN CAPITAL LETTER I WITH TILDE
1039 0xA6, "Ķ", # ISOlat2: LATIN CAPITAL LETTER K WITH CEDILLA
1040 0xA7, "Ļ", # ISOlat2: LATIN CAPITAL LETTER L WITH CEDILLA
1041 0xA8, "Ń", # ISOlat2: LATIN CAPITAL LETTER N WITH ACUTE
1042 0xA9, "Ŗ", # ISOlat2: LATIN CAPITAL LETTER R WITH CEDILLA
1043 0xAA, "Š", # ISOlat2: LATIN CAPITAL LETTER S WITH CARON
1044 0xAB, "Ŧ", # ISOlat2: LATIN CAPITAL LETTER T WITH STROKE
1045 0xAC, "Ž", # ISOlat2: LATIN CAPITAL LETTER Z WITH CARON
1046 0xAD, "­", # ISOnum : SOFT HYPHEN
1047 0xAE, "ĸ", # ISOlat2: LATIN SMALL LETTER KRA (Greenlandic)
1048 0xAF, "&end;", # ISOlat?: LATIN SMALL LETTER END (Lappish)
1049 0xB0, "đ", # ISOlat2: LATIN SMALL LETTER d WITH STROKE
1050 0xB1, "ą", # ISOlat2: LATIN SMALL LETTER a WITH OGONEK
1051 0xB2, "ē", # ISOlat2: LATIN SMALL LETTER e WITH MACRON
1052 0xB3, "&gcedil;", # ISOlat2: LATIN SMALL LETTER g WITH CEDILLA
1053 0xB4, "ī", # ISOlat2: LATIN SMALL LETTER i WITH MACRON
1054 0xB5, "ĩ", # ISOlat2: LATIN SMALL LETTER i WITH TILDE
1055 0xB6, "ķ", # ISOlat2: LATIN SMALL LETTER k WITH CEDILLA
1056 0xB7, "ļ", # ISOlat2: LATIN SMALL LETTER l WITH CEDILLA
1057 0xB8, "ń", # ISOlat2: LATIN SMALL LETTER n WITH ACUTE
1058 0xB9, "ŗ", # ISOlat2: LATIN SMALL LETTER r WITH CEDILLA
1059 0xBA, "š", # ISOlat2: LATIN SMALL LETTER s WITH CARON
1060 0xBB, "ŧ", # ISOlat2: LATIN SMALL LETTER t WITH STROKE
1061 0xBC, "ž", # ISOlat2: LATIN SMALL LETTER z WITH CARON
1062 0xBD, "§", # ISOnum : SECTION SIGN
1063 0xBE, "ß", # ISOlat1: LATIN SMALL LETTER SHARP s (German)
1064 0xBF, "ŋ", # ISOlat2: LATIN SMALL LETTER ENG (Lappish)
1065 0xC0, "Ā", # ISOlat2: LATIN CAPITAL LETTER A WITH MACRON
1066 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE
1067 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH
1068 # CIRCUMFLEX
1069 0xC3, "Ã", # ISOlat1: LATIN CAPITAL LETTER A WITH TILDE
1070 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH
1071 # DIAERESIS
1072 0xC5, "Å", # ISOlat1: LATIN CAPITAL LETTER A WITH RING
1073 # ABOVE
1074 0xC6, "Æ", # ISOlat1: LATIN CAPITAL LETTER AE
1075 0xC7, "Į", # ISOlat2: LATIN CAPITAL LETTER I WITH OGONEK
1076 0xC8, "Č", # ISOlat2: LATIN CAPITAL LETTER C WITH CARON
1077 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE
1078 0xCA, "Ę", # ISOlat2: LATIN CAPITAL LETTER E WITH OGONEK
1079 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH
1080 # DIAERESIS
1081 0xCC, "Ė", # ISOlat2: LATIN CAPITAL LETTER E WITH
1082 # DOT ABOVE
1083 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE
1084 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH
1085 # CIRCUMFLEX
1086 0xCF, "Ï", # ISOlat1: LATIN CAPITAL LETTER I WITH
1087 # DIAERESIS
1088 0xD0, "Đ", # ISOlat2: LATIN CAPITAL LETTER D WITH STROKE
1089 0xD1, "Ņ", # ISOlat2: LATIN CAPITAL LETTER N WITH CEDILLA
1090 0xD2, "Ō", # ISOlat2: LATIN CAPITAL LETTER O WITH MACRON
1091 0xD3, "Ó", # ISOlat1: LATIN CAPITAL LETTER O WITH ACUTE
1092 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH
1093 # CIRCUMFLEX
1094 0xD5, "Õ", # ISOlat1: LATIN CAPITAL LETTER O WITH TILDE
1095 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH
1096 # DIAERESIS
1097 0xD7, "Ũ", # ISOlat2: LATIN CAPITAL LETTER U WITH TILDE
1098 0xD8, "Ø", # ISOlat1: LATIN CAPITAL LETTER O WITH STROKE
1099 0xD9, "Ų", # ISOlat2: LATIN CAPITAL LETTER U WITH OGONEK
1100 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE
1101 0xDB, "Û", # ISOlat1: LATIN CAPITAL LETTER U WITH
1102 # CIRCUMFLEX
1103 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH
1104 # DIAERESIS
1105 0xDD, "Ý", # ISOlat1: LATIN CAPITAL LETTER Y WITH ACUTE
1106 0xDE, "Þ", # ISOlat1: LATIN CAPITAL LETTER THORN
1107 # (Icelandic)
1108 0xDF, "Ū", # ISOlat2: LATIN CAPITAL LETTER U WITH MACRON
1109 0xE0, "ā", # ISOlat2: LATIN SMALL LETTER a WITH MACRON
1110 0xE1, "á", # ISOlat1: LATIN SMALL LETTER a WITH ACUTE
1111 0xE2, "â", # ISOlat1: LATIN SMALL LETTER a WITH CIRCUMFLEX
1112 0xE3, "ã", # ISOlat1: LATIN SMALL LETTER a WITH TILDE
1113 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER a WITH DIAERESIS
1114 0xE5, "å", # ISOlat1: LATIN SMALL LETTER a WITH RING ABOVE
1115 0xE6, "æ", # ISOlat1: LATIN SMALL LETTER ae
1116 0xE7, "į", # ISOlat2: LATIN SMALL LETTER i WITH OGONEK
1117 0xE8, "č", # ISOlat2: LATIN SMALL LETTER c WITH CARON
1118 0xE9, "é", # ISOlat1: LATIN SMALL LETTER e WITH ACUTE
1119 0xEA, "ę", # ISOlat2: LATIN SMALL LETTER e WITH OGONEK
1120 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER e WITH DIAERESIS
1121 0xEC, "ė", # ISOlat2: LATIN SMALL LETTER e WITH DOT ABOVE
1122 0xED, "í", # ISOlat1: LATIN SMALL LETTER i WITH ACUTE
1123 0xEE, "î", # ISOlat1: LATIN SMALL LETTER i WITH CIRCUMFLEX
1124 0xEF, "ï", # ISOlat1: LATIN SMALL LETTER i WITH DIAERESIS
1125 0xF0, "ð", # ISOlat1: LATIN SMALL LETTER ETH (Icelandic)
1126 0xF1, "ņ", # ISOlat2: LATIN SMALL LETTER n WITH CEDILLA
1127 0xF2, "ō", # ISOlat2: LATIN SMALL LETTER o WITH MACRON
1128 0xF3, "ó", # ISOlat1: LATIN SMALL LETTER o WITH ACUTE
1129 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER o WITH CIRCUMFLEX
1130 0xF5, "õ", # ISOlat1: LATIN SMALL LETTER o WITH TILDE
1131 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER o WITH DIAERESIS
1132 0xF7, "ũ", # ISOlat2: LATIN SMALL LETTER u WITH TILDE
1133 0xF8, "ø", # ISOlat1: LATIN SMALL LETTER o WITH STROKE
1134 0xF9, "ų", # ISOlat2: LATIN SMALL LETTER u WITH OGONEK
1135 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER u WITH ACUTE
1136 0xFB, "û", # ISOlat1: LATIN SMALL LETTER u WITH CIRCUMFLEX
1137 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER u WITH DIAERESIS
1138 0xFD, "ý", # ISOlat1: LATIN SMALL LETTER y WITH ACUTE
1139 0xFE, "þ", # ISOlat1: LATIN SMALL LETTER THORN (Icelandic)
1140 0xFF, "ū", # ISOlat2: LATIN SMALL LETTER u WITH MACRON
1141);
1142*LATIN6_To_Ent = *ISO_8859_10_To_Ent;
1143
1144###############################################################################
1145## Routines
1146###############################################################################
1147
1148##---------------------------------------------------------------------------##
1149## str2sgml converts a string encoded by $charset to an sgml
1150## string where special characters are converted to entity
1151## references.
1152##
1153## $return_data = iso_8859'str2sgml($data, $charset, $only8bit);
1154##
1155## If $only8bit is non-zero, than only 8-bit characters are
1156## translated.
1157##
1158sub str2sgml {
1159 local($data, $charset, $only8bit) = ($_[0], $_[1], $_[2]);
1160 local($ret, $offset, $len) = ('', 0, 0);
1161
1162 # Get mapping (this method works for Perl 4 and 5)
1163 $charset =~ tr/a-z/A-Z/;
1164 $charset =~ tr/-/_/;
1165 local(*map) = ("${charset}_To_Ent");
1166
1167 # Convert string
1168 $len = length($data);
1169 while ($offset < $len) {
1170 $char = unpack("C", substr($data, $offset++, 1));
1171 if ($only8bit && $char < 0xA0) {
1172 $ret .= pack("C", $char);
1173 } else {
1174 $ret .= ($map{$char} || $US_ASCII_To_Ent{$char} ||
1175 pack("C", $char));
1176 }
1177 }
1178 $ret;
1179}
1180
1181##---------------------------------------------------------------------------##
11821;