Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | ##---------------------------------------------------------------------------## |
2 | ## File: | |
3 | ## $Id: iso8859.pl,v 2.5 2001/09/17 16:08:49 ehood Exp $ | |
4 | ## Author: | |
5 | ## Earl Hood mhonarc@mhonarc.org | |
6 | ## Description: | |
7 | ## Routines to process data encoded in iso8859 character sets. | |
8 | ##---------------------------------------------------------------------------## | |
9 | ## Copyright (C) 1996-1999 Earl Hood, mhonarc@mhonarc.org | |
10 | ## | |
11 | ## This program is free software; you can redistribute it and/or modify | |
12 | ## it under the terms of the GNU General Public License as published by | |
13 | ## the Free Software Foundation; either version 2 of the License, or | |
14 | ## (at your option) any later version. | |
15 | ## | |
16 | ## This program is distributed in the hope that it will be useful, | |
17 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | ## GNU General Public License for more details. | |
20 | ## | |
21 | ## You should have received a copy of the GNU General Public License | |
22 | ## along with this program; if not, write to the Free Software | |
23 | ## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | |
24 | ## 02111-1307, USA | |
25 | ##---------------------------------------------------------------------------## | |
26 | ||
27 | package iso_8859; | |
28 | ||
29 | ############################################################################### | |
30 | ## Mapping arrays for characters to entity references | |
31 | ############################################################################### | |
32 | ||
33 | ##--------------------------------------------------------------------------- | |
34 | ## US-ASCII/Common characters | |
35 | ##--------------------------------------------------------------------------- | |
36 | ||
37 | %US_ASCII_To_Ent = ( | |
38 | #-------------------------------------------------------------------------- | |
39 | # Hex Code Entity Ref # ISO external entity and description | |
40 | #-------------------------------------------------------------------------- | |
41 | 0x26, "&", # ISOnum : Ampersand | |
42 | 0x3C, "<", # ISOnum : Less-than sign | |
43 | 0x3E, ">", # ISOnum : Greater-than sign | |
44 | ||
45 | 0xA0, " ", # ISOnum : NO-BREAK SPACE | |
46 | ); | |
47 | ||
48 | ##--------------------------------------------------------------------------- | |
49 | ## ISO-8859-1: Latin-1 | |
50 | ##--------------------------------------------------------------------------- | |
51 | ||
52 | %ISO_8859_1_To_Ent = ( | |
53 | #-------------------------------------------------------------------------- | |
54 | # Hex Code Entity Ref # ISO external entity and description | |
55 | #-------------------------------------------------------------------------- | |
56 | 0xA1, "¡", # ISOnum : INVERTED EXCLAMATION MARK | |
57 | 0xA2, "¢", # ISOnum : CENT SIGN | |
58 | 0xA3, "£", # ISOnum : POUND SIGN | |
59 | 0xA4, "¤", # ISOnum : CURRENCY SIGN | |
60 | 0xA5, "¥", # ISOnum : YEN SIGN | |
61 | 0xA6, "¦", # ISOnum : BROKEN BAR | |
62 | 0xA7, "§", # ISOnum : SECTION SIGN | |
63 | 0xA8, "¨", # ISOdia : DIAERESIS | |
64 | 0xA9, "©", # ISOnum : COPYRIGHT SIGN | |
65 | 0xAA, "ª", # ISOnum : FEMININE ORDINAL INDICATOR | |
66 | 0xAB, "«", # ISOnum : LEFT-POINTING DOUBLE ANGLE | |
67 | # QUOTATION MARK | |
68 | 0xAC, "¬", # ISOnum : NOT SIGN | |
69 | 0xAD, "­", # ISOnum : SOFT HYPHEN | |
70 | 0xAE, "®", # ISOnum : REGISTERED SIGN | |
71 | 0xAF, "¯", # ISOdia : OVERLINE (MACRON) | |
72 | 0xB0, "°", # ISOnum : DEGREE SIGN | |
73 | 0xB1, "±", # ISOnum : PLUS-MINUS SIGN | |
74 | 0xB2, "²", # ISOnum : SUPERSCRIPT TWO | |
75 | 0xB3, "³", # ISOnum : SUPERSCRIPT THREE | |
76 | 0xB4, "´", # ISOdia : ACUTE ACCENT | |
77 | 0xB5, "µ", # ISOnum : MICRO SIGN | |
78 | 0xB6, "¶", # ISOnum : PILCROW SIGN | |
79 | 0xB7, "·", # ISOnum : MIDDLE DOT | |
80 | 0xB8, "¸", # ISOdia : CEDILLA | |
81 | 0xB9, "¹", # ISOnum : SUPERSCRIPT ONE | |
82 | 0xBA, "º", # ISOnum : MASCULINE ORDINAL INDICATOR | |
83 | 0xBB, "»", # ISOnum : RIGHT-POINTING DOUBLE ANGLE | |
84 | # QUOTATION MARK | |
85 | 0xBC, "¼", # ISOnum : VULGAR FRACTION ONE QUARTER | |
86 | 0xBD, "½", # ISOnum : VULGAR FRACTION ONE HALF | |
87 | 0xBE, "¾", # ISOnum : VULGAR FRACTION THREE QUARTERS | |
88 | 0xBF, "¿", # ISOnum : INVERTED QUESTION MARK | |
89 | 0xC0, "À", # ISOlat1: LATIN CAPITAL LETTER A WITH GRAVE | |
90 | 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE | |
91 | 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH | |
92 | # CIRCUMFLEX | |
93 | 0xC3, "Ã", # ISOlat1: LATIN CAPITAL LETTER A WITH TILDE | |
94 | 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH | |
95 | # DIAERESIS | |
96 | 0xC5, "Å", # ISOlat1: LATIN CAPITAL LETTER A WITH RING | |
97 | # ABOVE | |
98 | 0xC6, "Æ", # ISOlat1: LATIN CAPITAL LETTER AE | |
99 | 0xC7, "Ç", # ISOlat1: LATIN CAPITAL LETTER C WITH CEDILLA | |
100 | 0xC8, "È", # ISOlat1: LATIN CAPITAL LETTER E WITH GRAVE | |
101 | 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE | |
102 | 0xCA, "Ê", # ISOlat1: LATIN CAPITAL LETTER E WITH | |
103 | # CIRCUMFLEX | |
104 | 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH | |
105 | # DIAERESIS | |
106 | 0xCC, "Ì", # ISOlat1: LATIN CAPITAL LETTER I WITH GRAVE | |
107 | 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE | |
108 | 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH | |
109 | # CIRCUMFLEX | |
110 | 0xCF, "Ï", # ISOlat1: LATIN CAPITAL LETTER I WITH | |
111 | # DIAERESIS | |
112 | 0xD0, "Ð", # ISOlat1: LATIN CAPITAL LETTER ETH (Icelandic) | |
113 | 0xD1, "Ñ", # ISOlat1: LATIN CAPITAL LETTER N WITH TILDE | |
114 | 0xD2, "Ò", # ISOlat1: LATIN CAPITAL LETTER O WITH GRAVE | |
115 | 0xD3, "Ó", # ISOlat1: LATIN CAPITAL LETTER O WITH ACUTE | |
116 | 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH | |
117 | # CIRCUMFLEX | |
118 | 0xD5, "Õ", # ISOlat1: LATIN CAPITAL LETTER O WITH TILDE | |
119 | 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH | |
120 | # DIAERESIS | |
121 | 0xD7, "×", # ISOnum : MULTIPLICATION SIGN | |
122 | 0xD8, "Ø", # ISOlat1: LATIN CAPITAL LETTER O WITH STROKE | |
123 | 0xD9, "Ù", # ISOlat1: LATIN CAPITAL LETTER U WITH GRAVE | |
124 | 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE | |
125 | 0xDB, "Û", # ISOlat1: LATIN CAPITAL LETTER U WITH | |
126 | # CIRCUMFLEX | |
127 | 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH | |
128 | # DIAERESIS | |
129 | 0xDD, "Ý", # ISOlat1: LATIN CAPITAL LETTER Y WITH ACUTE | |
130 | 0xDE, "Þ", # ISOlat1: LATIN CAPITAL LETTER THORN | |
131 | # (Icelandic) | |
132 | 0xDF, "ß", # ISOlat1: LATIN SMALL LETTER SHARP S (German) | |
133 | 0xE0, "à", # ISOlat1: LATIN SMALL LETTER A WITH GRAVE | |
134 | 0xE1, "á", # ISOlat1: LATIN SMALL LETTER A WITH ACUTE | |
135 | 0xE2, "â", # ISOlat1: LATIN SMALL LETTER A WITH CIRCUMFLEX | |
136 | 0xE3, "ã", # ISOlat1: LATIN SMALL LETTER A WITH TILDE | |
137 | 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER A WITH DIAERESIS | |
138 | 0xE5, "å", # ISOlat1: LATIN SMALL LETTER A WITH RING ABOVE | |
139 | 0xE6, "æ", # ISOlat1: LATIN SMALL LETTER AE | |
140 | 0xE7, "ç", # ISOlat1: LATIN SMALL LETTER C WITH CEDILLA | |
141 | 0xE8, "è", # ISOlat1: LATIN SMALL LETTER E WITH GRAVE | |
142 | 0xE9, "é", # ISOlat1: LATIN SMALL LETTER E WITH ACUTE | |
143 | 0xEA, "ê", # ISOlat1: LATIN SMALL LETTER E WITH CIRCUMFLEX | |
144 | 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER E WITH DIAERESIS | |
145 | 0xEC, "ì", # ISOlat1: LATIN SMALL LETTER I WITH GRAVE | |
146 | 0xED, "í", # ISOlat1: LATIN SMALL LETTER I WITH ACUTE | |
147 | 0xEE, "î", # ISOlat1: LATIN SMALL LETTER I WITH CIRCUMFLEX | |
148 | 0xEF, "ï", # ISOlat1: LATIN SMALL LETTER I WITH DIAERESIS | |
149 | 0xF0, "ð", # ISOlat1: LATIN SMALL LETTER ETH (Icelandic) | |
150 | 0xF1, "ñ", # ISOlat1: LATIN SMALL LETTER N WITH TILDE | |
151 | 0xF2, "ò", # ISOlat1: LATIN SMALL LETTER O WITH GRAVE | |
152 | 0xF3, "ó", # ISOlat1: LATIN SMALL LETTER O WITH ACUTE | |
153 | 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER O WITH CIRCUMFLEX | |
154 | 0xF5, "õ", # ISOlat1: LATIN SMALL LETTER O WITH TILDE | |
155 | 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER O WITH DIAERESIS | |
156 | 0xF7, "÷", # ISOnum : DIVISION SIGN | |
157 | 0xF8, "ø", # ISOlat1: LATIN SMALL LETTER O WITH STROKE | |
158 | 0xF9, "ù", # ISOlat1: LATIN SMALL LETTER U WITH GRAVE | |
159 | 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER U WITH ACUTE | |
160 | 0xFB, "û", # ISOlat1: LATIN SMALL LETTER U WITH CIRCUMFLEX | |
161 | 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER U WITH DIAERESIS | |
162 | 0xFD, "ý", # ISOlat1: LATIN SMALL LETTER Y WITH ACUTE | |
163 | 0xFE, "þ", # ISOlat1: LATIN SMALL LETTER THORN | |
164 | # (Icelandic) | |
165 | 0xFF, "ÿ", # ISOlat1: LATIN SMALL LETTER Y WITH DIAERESIS | |
166 | ); | |
167 | *LATIN1_To_Ent = *ISO_8859_1_To_Ent; | |
168 | ||
169 | ##--------------------------------------------------------------------------- | |
170 | ## ISO-8859-2: Latin-2 | |
171 | ##--------------------------------------------------------------------------- | |
172 | ||
173 | %ISO_8859_2_To_Ent = ( | |
174 | #-------------------------------------------------------------------------- | |
175 | # Hex Code Entity Ref # ISO external entity and description | |
176 | #-------------------------------------------------------------------------- | |
177 | 0xA1, "Ą", # ISOlat2: LATIN CAPITAL LETTER A WITH OGONEK | |
178 | 0xA2, "˘", # ISOdia : BREVE | |
179 | 0xA3, "Ł", # ISOlat2: LATIN CAPITAL LETTER L WITH STROKE | |
180 | 0xA4, "¤", # ISOnum : CURRENCY SIGN | |
181 | 0xA5, "Ľ", # ISOlat2: LATIN CAPITAL LETTER L WITH CARON | |
182 | 0xA6, "Ś", # ISOlat2: LATIN CAPITAL LETTER S WITH ACUTE | |
183 | 0xA7, "§", # ISOnum : SECTION SIGN | |
184 | 0xA8, "¨", # ISOdia : DIAERESIS | |
185 | 0xA9, "Š", # ISOlat2: LATIN CAPITAL LETTER S WITH CARON | |
186 | 0xAA, "Ş", # ISOlat2: LATIN CAPITAL LETTER S WITH CEDILLA | |
187 | 0xAB, "Ť", # ISOlat2: LATIN CAPITAL LETTER T WITH CARON | |
188 | 0xAC, "Ź", # ISOlat2: LATIN CAPITAL LETTER Z WITH ACUTE | |
189 | 0xAD, "­", # ISOnum : SOFT HYPHEN | |
190 | 0xAE, "Ž", # ISOlat2: LATIN CAPITAL LETTER Z WITH CARON | |
191 | 0xAF, "Ż", # ISOlat2: LATIN CAPITAL LETTER Z WITH DOT | |
192 | # ABOVE | |
193 | 0xB0, "°", # ISOnum : DEGREE SIGN | |
194 | 0xB1, "ą", # ISOlat2: LATIN SMALL LETTER A WITH OGONEK | |
195 | 0xB2, "˛", # ISOdia : OGONEK | |
196 | 0xB3, "ł", # ISOlat2: LATIN SMALL LETTER L WITH STROKE | |
197 | 0xB4, "´", # ISOdia : ACUTE ACCENT | |
198 | 0xB5, "ľ", # ISOlat2: LATIN SMALL LETTER L WITH CARON | |
199 | 0xB6, "ś", # ISOlat2: LATIN SMALL LETTER S WITH ACUTE | |
200 | 0xB7, "ˇ", # ISOdia : CARON | |
201 | 0xB8, "¸", # ISOdia : CEDILLA | |
202 | 0xB9, "š", # ISOlat2: LATIN SMALL LETTER S WITH CARON | |
203 | 0xBA, "ş", # ISOlat2: LATIN SMALL LETTER S WITH CEDILLA | |
204 | 0xBB, "ť", # ISOlat2: LATIN SMALL LETTER T WITH CARON | |
205 | 0xBC, "ź", # ISOlat2: LATIN SMALL LETTER Z WITH ACUTE | |
206 | 0xBD, "˝", # ISOdia : DOUBLE ACUTE ACCENT | |
207 | 0xBE, "ž", # ISOlat2: LATIN SMALL LETTER Z WITH CARON | |
208 | 0xBF, "ż", # ISOlat2: LATIN SMALL LETTER Z WITH DOT ABOVE | |
209 | 0xC0, "Ŕ", # ISOlat2: LATIN CAPITAL LETTER R WITH ACUTE | |
210 | 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE | |
211 | 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH | |
212 | # CIRCUMFLEX | |
213 | 0xC3, "Ă", # ISOlat2: LATIN CAPITAL LETTER A WITH BREVE | |
214 | 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH | |
215 | # DIAERESIS | |
216 | 0xC5, "Ĺ", # ISOlat2: LATIN CAPITAL LETTER L WITH ACUTE | |
217 | 0xC6, "Ć", # ISOlat2: LATIN CAPITAL LETTER C WITH ACUTE | |
218 | 0xC7, "Ç", # ISOlat2: LATIN CAPITAL LETTER C WITH CEDILLA | |
219 | 0xC8, "Č", # ISOlat2: LATIN CAPITAL LETTER C WITH CARON | |
220 | 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE | |
221 | 0xCA, "Ę", # ISOlat2: LATIN CAPITAL LETTER E WITH OGONEK | |
222 | 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH | |
223 | # DIAERESIS | |
224 | 0xCC, "Ě", # ISOlat2: LATIN CAPITAL LETTER E WITH CARON | |
225 | 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE | |
226 | 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH | |
227 | # CIRCUMFLEX | |
228 | 0xCF, "Ď", # ISOlat2: LATIN CAPITAL LETTER D WITH CARON | |
229 | 0xD0, "Đ", # ISOlat2: LATIN CAPITAL LETTER D WITH STROKE | |
230 | 0xD1, "Ń", # ISOlat2: LATIN CAPITAL LETTER N WITH ACUTE | |
231 | 0xD2, "Ň", # ISOlat2: LATIN CAPITAL LETTER N WITH CARON | |
232 | 0xD3, "Ó", # ISOlat1: LATIN CAPITAL LETTER O WITH ACUTE | |
233 | 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH | |
234 | # CIRCUMFLEX | |
235 | 0xD5, "Ő", # ISOlat2: LATIN CAPITAL LETTER O WITH DOUBLE | |
236 | # ACUTE | |
237 | 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH | |
238 | # DIAERESIS | |
239 | 0xD7, "×", # ISOnum : MULTIPLICATION SIGN | |
240 | 0xD8, "Ř", # ISOlat2: LATIN CAPITAL LETTER R WITH CARON | |
241 | 0xD9, "Ů", # ISOlat2: LATIN CAPITAL LETTER U WITH RING | |
242 | # ABOVE | |
243 | 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE | |
244 | 0xDB, "Ű", # ISOlat2: LATIN CAPITAL LETTER U WITH DOUBLE | |
245 | # ACUTE | |
246 | 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH | |
247 | # DIAERESIS | |
248 | 0xDD, "Ý", # ISOlat2: LATIN CAPITAL LETTER Y WITH ACUTE | |
249 | 0xDE, "Ţ", # ISOlat2: LATIN CAPITAL LETTER T WITH CEDILLA | |
250 | 0xDF, "ß", # ISOlat1: LATIN SMALL LETTER SHARP S (German) | |
251 | 0xE0, "ŕ", # ISOlat2: LATIN SMALL LETTER R WITH ACUTE | |
252 | 0xE1, "á", # ISOlat1: LATIN SMALL LETTER A WITH ACUTE | |
253 | 0xE2, "â", # ISOlat1: LATIN SMALL LETTER A WITH CIRCUMFLEX | |
254 | 0xE3, "ă", # ISOlat2: LATIN SMALL LETTER A WITH BREVE | |
255 | 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER A WITH DIAERESIS | |
256 | 0xE5, "ĺ", # ISOlat2: LATIN SMALL LETTER L WITH ACUTE | |
257 | 0xE6, "ć", # ISOlat2: LATIN SMALL LETTER C WITH ACUTE | |
258 | 0xE7, "ç", # ISOlat1: LATIN SMALL LETTER C WITH CEDILLA | |
259 | 0xE8, "č", # ISOlat2: LATIN SMALL LETTER C WITH CARON | |
260 | 0xE9, "é", # ISOlat1: LATIN SMALL LETTER E WITH ACUTE | |
261 | 0xEA, "ę", # ISOlat2: LATIN SMALL LETTER E WITH OGONEK | |
262 | 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER E WITH DIAERESIS | |
263 | 0xEC, "ě", # ISOlat2: LATIN SMALL LETTER E WITH CARON | |
264 | 0xED, "í", # ISOlat1: LATIN SMALL LETTER I WITH ACUTE | |
265 | 0xEE, "î", # ISOlat1: LATIN SMALL LETTER I WITH CIRCUMFLEX | |
266 | 0xEF, "ď", # ISOlat2: LATIN SMALL LETTER D WITH CARON | |
267 | 0xF0, "đ", # ISOlat2: LATIN SMALL LETTER D WITH STROKE | |
268 | 0xF1, "ń", # ISOlat2: LATIN SMALL LETTER N WITH ACUTE | |
269 | 0xF2, "ň", # ISOlat2: LATIN SMALL LETTER N WITH CARON | |
270 | 0xF3, "ó", # ISOlat1: LATIN SMALL LETTER O WITH ACUTE | |
271 | 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER O WITH CIRCUMFLEX | |
272 | 0xF5, "ő", # ISOlat2: LATIN SMALL LETTER O WITH DOUBLE | |
273 | # ACUTE | |
274 | 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER O WITH DIAERESIS | |
275 | 0xF7, "÷", # ISOnum : DIVISION SIGN | |
276 | 0xF8, "ř", # ISOlat2: LATIN SMALL LETTER R WITH CARON | |
277 | 0xF9, "ů", # ISOlat2: LATIN SMALL LETTER U WITH RING ABOVE | |
278 | 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER U WITH ACUTE | |
279 | 0xFB, "ű", # ISOlat2: LATIN SMALL LETTER U WITH DOUBLE | |
280 | # ACUTE | |
281 | 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER U WITH DIAERESIS | |
282 | 0xFD, "ý", # ISOlat1: LATIN SMALL LETTER Y WITH ACUTE | |
283 | 0xFE, "ţ", # ISOlat2: LATIN SMALL LETTER T WITH CEDILLA | |
284 | 0xFF, "˙", # ISOdia : DOT ABOVE | |
285 | ); | |
286 | *LATIN2_To_Ent = *ISO_8859_2_To_Ent; | |
287 | ||
288 | ##--------------------------------------------------------------------------- | |
289 | ## ISO-8859-3: Latin-3 | |
290 | ##--------------------------------------------------------------------------- | |
291 | ||
292 | %ISO_8859_3_To_Ent = ( | |
293 | #-------------------------------------------------------------------------- | |
294 | # Hex Code Entity Ref # ISO external entity and description | |
295 | #-------------------------------------------------------------------------- | |
296 | 0xA1, "Ħ", # ISOlat2: LATIN CAPITAL LETTER H WITH STROKE | |
297 | 0xA2, "˘", # ISOdia : BREVE | |
298 | 0xA3, "£", # ISOnum : POUND SIGN | |
299 | 0xA4, "¤", # ISOnum : CURRENCY SIGN | |
300 | 0xA6, "Ĥ", # ISOlat2: LATIN CAPITAL LETTER H WITH | |
301 | # CIRCUMFLEX | |
302 | 0xA7, "§", # ISOnum : SECTION SIGN | |
303 | 0xA8, "¨", # ISOdia : DIAERESIS | |
304 | 0xA9, "İ", # ISOlat2: LATIN CAPITAL LETTER I WITH DOT | |
305 | # ABOVE | |
306 | 0xAA, "Ş", # ISOlat2: LATIN CAPITAL LETTER S WITH CEDILLA | |
307 | 0xAB, "Ğ", # ISOlat2: LATIN CAPITAL LETTER G WITH BREVE | |
308 | 0xAC, "Ĵ", # ISOlat2: LATIN CAPITAL LETTER J WITH | |
309 | # CIRCUMFLEX | |
310 | 0xAD, "­", # ISOnum : SOFT HYPHEN | |
311 | 0xAF, "Ż", # ISOlat2: LATIN CAPITAL LETTER Z WITH DOT | |
312 | # ABOVE | |
313 | 0xB0, "°", # ISOnum : DEGREE SIGN | |
314 | 0xB1, "ħ", # ISOlat2: LATIN SMALL LETTER H WITH STROKE | |
315 | 0xB2, "²", # ISOnum : SUPERSCRIPT TWO | |
316 | 0xB3, "³", # ISOnum : SUPERSCRIPT THREE | |
317 | 0xB4, "´", # ISOdia : ACUTE ACCENT | |
318 | 0xB5, "µ", # ISOnum : MICRO SIGN | |
319 | 0xB6, "ĥ", # ISOlat2: LATIN SMALL LETTER H WITH | |
320 | # CIRCUMFLEX | |
321 | 0xB7, "·", # ISOnum : MIDDLE DOT | |
322 | 0xB8, "¸", # ISOdia : CEDILLA | |
323 | 0xB9, "ı", # ISOlat2: LATIN SMALL LETTER I DOTLESS | |
324 | 0xBA, "ş", # ISOlat2: LATIN SMALL LETTER S WITH CEDILLA | |
325 | 0xBB, "ğ", # ISOlat2: LATIN SMALL LETTER G WITH BREVE | |
326 | 0xBC, "ĵ", # ISOlat2: LATIN SMALL LETTER J WITH CIRCUMFLEX | |
327 | 0xBD, "½", # ISOnum : VULGAR FRACTION ONE HALF | |
328 | 0xBF, "ż", # ISOlat2: LATIN SMALL LETTER Z WITH DOT ABOVE | |
329 | 0xC0, "À", # ISOlat1: LATIN CAPITAL LETTER A WITH GRAVE | |
330 | 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE | |
331 | 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH | |
332 | # CIRCUMFLEX | |
333 | 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH | |
334 | # DIAERESIS | |
335 | 0xC5, "Ċ", # ISOlat2: LATIN CAPITAL LETTER C WITH DOT | |
336 | # ABOVE | |
337 | 0xC6, "Ĉ", # ISOlat2: LATIN CAPITAL LETTER C WITH | |
338 | # CIRCUMFLEX | |
339 | 0xC7, "Ç", # ISOlat2: LATIN CAPITAL LETTER C WITH CEDILLA | |
340 | 0xC8, "È", # ISOlat1: LATIN CAPITAL LETTER E WITH GRAVE | |
341 | 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE | |
342 | 0xCA, "Ê", # ISOlat2: LATIN CAPITAL LETTER E WITH | |
343 | # CIRCUMFLEX | |
344 | 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH | |
345 | # DIAERESIS | |
346 | 0xCC, "Ì", # ISOlat1: LATIN CAPITAL LETTER I WITH GRAVE | |
347 | 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE | |
348 | 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH | |
349 | # CIRCUMFLEX | |
350 | 0xCF, "Ï", # ISOlat1: LATIN CAPITAL LETTER I WITH | |
351 | # DIAERESIS | |
352 | 0xD1, "Ñ", # ISOlat1: LATIN CAPITAL LETTER N WITH TILDE | |
353 | 0xD2, "Ò", # ISOlat1: LATIN CAPITAL LETTER O WITH GRAVE | |
354 | 0xD3, "Ó", # ISOlat1: LATIN CAPITAL LETTER O WITH ACUTE | |
355 | 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH | |
356 | # CIRCUMFLEX | |
357 | 0xD5, "Ġ", # ISOlat2: LATIN CAPITAL LETTER G WITH DOT | |
358 | # ABOVE | |
359 | 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH | |
360 | # DIAERESIS | |
361 | 0xD7, "×", # ISOnum : MULTIPLICATION SIGN | |
362 | 0xD8, "Ĝ", # ISOlat2: LATIN CAPITAL LETTER G WITH | |
363 | # CIRCUMFLEX | |
364 | 0xD9, "Ù", # ISOlat1: LATIN CAPITAL LETTER U WITH GRAVE | |
365 | # ABOVE | |
366 | 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE | |
367 | 0xDB, "Û", # ISOlat1: LATIN CAPITAL LETTER U WITH | |
368 | # CIRCUMFLEX | |
369 | 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH | |
370 | # DIAERESIS | |
371 | 0xDD, "Ŭ", # ISOlat2: LATIN CAPITAL LETTER U WITH BREVE | |
372 | 0xDE, "Ŝ", # ISOlat2: LATIN CAPITAL LETTER S WITH | |
373 | # CIRCUMFLEX | |
374 | 0xDF, "ß", # ISOlat1: LATIN SMALL LETTER SHARP S (German) | |
375 | 0xE0, "à", # ISOlat1: LATIN SMALL LETTER A WITH GRAVE | |
376 | 0xE1, "á", # ISOlat1: LATIN SMALL LETTER A WITH ACUTE | |
377 | 0xE2, "â", # ISOlat1: LATIN SMALL LETTER A WITH CIRCUMFLEX | |
378 | 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER A WITH DIAERESIS | |
379 | 0xE5, "ċ", # ISOlat2: LATIN SMALL LETTER C WITH DOT ABOVE | |
380 | 0xE6, "&ccirce;", # ISOlat2: LATIN SMALL LETTER C WITH | |
381 | # CIRCUMFLEX | |
382 | 0xE7, "ç", # ISOlat1: LATIN SMALL LETTER C WITH CEDILLA | |
383 | 0xE8, "è", # ISOlat1: LATIN SMALL LETTER E WITH GRAVE | |
384 | 0xE9, "é", # ISOlat2: LATIN SMALL LETTER E WITH ACUTE | |
385 | 0xEA, "ê", # ISOlat2: LATIN SMALL LETTER E WITH | |
386 | # CIRCUMFLEX | |
387 | 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER E WITH DIAERESIS | |
388 | 0xEC, "ì", # ISOlat1: LATIN SMALL LETTER I WITH GRAVE | |
389 | 0xED, "í", # ISOlat1: LATIN SMALL LETTER I WITH ACUTE | |
390 | 0xEE, "î", # ISOlat1: LATIN SMALL LETTER I WITH CIRCUMFLEX | |
391 | 0xEF, "ï", # ISOlat1: LATIN SMALL LETTER I WITH DIAERESIS | |
392 | 0xF1, "ñ", # ISOlat1: LATIN SMALL LETTER N WITH TILDE | |
393 | 0xF2, "ò", # ISOlat1: LATIN SMALL LETTER O WITH GRAVE | |
394 | 0xF3, "ó", # ISOlat1: LATIN SMALL LETTER O WITH ACUTE | |
395 | 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER O WITH CIRCUMFLEX | |
396 | 0xF5, "ġ", # ISOlat2: LATIN SMALL LETTER G WITH DOT ABOVE | |
397 | 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER O WITH DIAERESIS | |
398 | 0xF7, "÷", # ISOnum : DIVISION SIGN | |
399 | 0xF8, "ĝ", # ISOlat2: LATIN SMALL LETTER G WITH | |
400 | # CIRCUMFLEX | |
401 | 0xF9, "ù", # ISOlat1: LATIN SMALL LETTER U WITH GRAVE | |
402 | 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER U WITH ACUTE | |
403 | 0xFB, "û", # ISOlat1: LATIN SMALL LETTER U WITH | |
404 | # CIRCUMFLEX | |
405 | 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER U WITH DIAERESIS | |
406 | 0xFD, "ŭ", # ISOlat2: LATIN SMALL LETTER U WITH BREVE | |
407 | 0xFE, "ŝ", # ISOlat2: LATIN SMALL LETTER S WITH | |
408 | # CIRCUMFLEX | |
409 | 0xFF, "˙", # ISOdia : DOT ABOVE | |
410 | ); | |
411 | *LATIN3_To_Ent = *ISO_8859_3_To_Ent; | |
412 | ||
413 | ##--------------------------------------------------------------------------- | |
414 | ## ISO-8859-4: Latin-4 | |
415 | ##--------------------------------------------------------------------------- | |
416 | ||
417 | %ISO_8859_4_To_Ent = ( | |
418 | #-------------------------------------------------------------------------- | |
419 | # Hex Code Entity Ref # ISO external entity and description | |
420 | #-------------------------------------------------------------------------- | |
421 | 0xA1, "Ą", # ISOlat2: LATIN CAPITAL LETTER A WITH OGONEK | |
422 | 0xA2, "ĸ", # ISOlat2: LATIN SMALL LETTER KRA (Greenlandic) | |
423 | 0xA3, "Ŗ", # ISOlat2: LATIN CAPITAL LETTER R WITH CEDILLA | |
424 | 0xA4, "¤", # ISOnum : CURRENCY SIGN | |
425 | 0xA5, "&Itilde", # ISOlat2: LATIN CAPITAL LETTER I WITH TILDE | |
426 | 0xA6, "Ļ", # ISOlat2: LATIN CAPITAL LETTER L WITH CEDILLA | |
427 | 0xA7, "§", # ISOnum : SECTION SIGN | |
428 | 0xA8, "¨", # ISOdia : DIAERESIS | |
429 | 0xA9, "Š", # ISOlat2: LATIN CAPITAL LETTER S WITH CARON | |
430 | 0xAA, "Ē", # ISOlat2: LATIN CAPITAL LETTER E WITH MACRON | |
431 | 0xAB, "Ģ", # ISOlat2: LATIN CAPITAL LETTER G WITH CEDILLA | |
432 | 0xAC, "Ŧ", # ISOlat2: LATIN CAPITAL LETTER T WITH STROKE | |
433 | 0xAD, "­", # ISOnum : SOFT HYPHEN | |
434 | 0xAE, "&Zcaron", # ISOlat2: LATIN CAPITAL LETTER Z WITH CARON | |
435 | 0xAF, "¯", # ISOdia : MACRON | |
436 | 0xB0, "°", # ISOnum : DEGREE SIGN | |
437 | 0xB1, "ą", # ISOlat2: LATIN SMALL LETTER A WITH OGONEK | |
438 | 0xB2, "˛", # ISOdia : OGONEK | |
439 | 0xB3, "ŗ", # ISOlat2: LATIN SMALL LETTER R WITH CEDILLA | |
440 | 0xB4, "´", # ISOdia : ACUTE ACCENT | |
441 | 0xB5, "ĩ", # ISOlat2: LATIN SMALL LETTER I WITH TILDE | |
442 | 0xB6, "ļ", # ISOlat2: LATIN SMALL LETTER L WITH CEDILLA | |
443 | 0xB7, "ˇ", # ISOdia : CARON | |
444 | 0xB8, "¸", # ISOdia : CEDILLA | |
445 | 0xB9, "š", # ISOlat2: LATIN SMALL LETTER S WITH CARON | |
446 | 0xBA, "ē", # ISOlat2: LATIN SMALL LETTER E WITH MACRON | |
447 | 0xBB, "&gcedil;", # ISOlat2: LATIN SMALL LETTER G WITH CEDILLA | |
448 | 0xBC, "ŧ", # ISOlat2: LATIN SMALL LETTER J WITH STROKE | |
449 | 0xBD, "Ŋ", # ISOlat2: LATIN CAPITAL LETTER ENG (Lappish) | |
450 | 0xBE, "ž", # ISOlat2: LATIN SMALL LETTER Z WITH CARON | |
451 | 0xBF, "ŋ", # ISOlat2: LATIN SMALL LETTER ENG (Lappish) | |
452 | 0xC0, "Ā", # ISOlat1: LATIN CAPITAL LETTER A WITH MACRON | |
453 | 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE | |
454 | 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH | |
455 | # CIRCUMFLEX | |
456 | 0xC3, "Ã", # ISOlat1: LATIN CAPITAL LETTER A WITH TILDE | |
457 | 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH | |
458 | # DIAERESIS | |
459 | 0xC5, "Å", # ISOlat1: LATIN CAPITAL LETTER A WITH RING | |
460 | # ABOVE | |
461 | 0xC6, "Æ", # ISOlat1: LATIN CAPITAL LETTER AE | |
462 | 0xC7, "Į", # ISOlat2: LATIN CAPITAL LETTER I WITH OGONEK | |
463 | 0xC8, "Č", # ISOlat2: LATIN CAPITAL LETTER C WITH CARON | |
464 | 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE | |
465 | 0xCA, "Ę", # ISOlat2: LATIN CAPITAL LETTER E WITH OGONEK | |
466 | 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH | |
467 | # DIAERESIS | |
468 | 0xCC, "Ė", # ISOlat1: LATIN CAPITAL LETTER E WITH DOT | |
469 | # ABOVE | |
470 | 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE | |
471 | 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH | |
472 | # CIRCUMFLEX | |
473 | 0xCF, "Ī", # ISOlat2: LATIN CAPITAL LETTER I WITH MACRON | |
474 | 0xD0, "&Dstrok", # ISOlat2: LATIN CAPITAL LETTER D WITH STROKE | |
475 | 0xD1, "Ņ", # ISOlat2: LATIN CAPITAL LETTER N WITH CEDILLA | |
476 | 0xD2, "Ō", # ISOlat2: LATIN CAPITAL LETTER O WITH MACRON | |
477 | 0xD3, "Ķ", # ISOlat2: LATIN CAPITAL LETTER K WITH CEDILLA | |
478 | 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH | |
479 | # CIRCUMFLEX | |
480 | 0xD5, "Õ", # ISOlat1: LATIN CAPITAL LETTER O WITH TILDE | |
481 | 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH | |
482 | # DIAERESIS | |
483 | 0xD7, "×", # ISOnum : MULTIPLICATION SIGN | |
484 | 0xD8, "Ø", # ISOlat1: LATIN CAPITAL LETTER O WITH STROKE | |
485 | 0xD9, "Ų", # ISOlat2: LATIN CAPITAL LETTER U WITH OGONEK | |
486 | 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE | |
487 | 0xDB, "Û", # ISOlat1: LATIN CAPITAL LETTER U WITH | |
488 | # CIRCUMFLEX | |
489 | 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH | |
490 | # DIAERESIS | |
491 | 0xDD, "Ũ", # ISOlat2: LATIN CAPITAL LETTER U WITH TILDE | |
492 | 0xDE, "Ū", # ISOlat2: LATIN CAPITAL LETTER U WITH MACRON | |
493 | 0xDF, "ß", # ISOlat1: LATIN SMALL LETTER SHARP S (German) | |
494 | 0xE0, "ā", # ISOlat1: LATIN SMALL LETTER A WITH MACRON | |
495 | 0xE1, "á", # ISOlat1: LATIN SMALL LETTER A WITH ACUTE | |
496 | 0xE2, "â", # ISOlat1: LATIN SMALL LETTER A WITH CIRCUMFLEX | |
497 | 0xE3, "ã", # ISOlat1: LATIN SMALL LETTER A WITH TILDE | |
498 | 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER A WITH DIAERESIS | |
499 | 0xE5, "å", # ISOlat1: LATIN SMALL LETTER A WITH RING ABOVE | |
500 | 0xE6, "æ", # ISOlat1: LATIN SMALL LETTER AE | |
501 | 0xE7, "į", # ISOlat2: LATIN SMALL LETTER I WITH OGONEK | |
502 | 0xE8, "č", # ISOlat2: LATIN SMALL LETTER C WITH CARON | |
503 | 0xE9, "é", # ISOlat2: LATIN SMALL LETTER E WITH ACUTE | |
504 | 0xEA, "ę", # ISOlat2: LATIN SMALL LETTER E WITH OGONEK | |
505 | 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER E WITH DIAERESIS | |
506 | 0xEC, "ė", # ISOlat2: LATIN SMALL LETTER E WITH DOT ABOVE | |
507 | 0xED, "í", # ISOlat1: LATIN SMALL LETTER I WITH ACUTE | |
508 | 0xEE, "î", # ISOlat1: LATIN SMALL LETTER I WITH CIRCUMFLEX | |
509 | 0xEF, "ī", # ISOlat2: LATIN SMALL LETTER I WITH MACRON | |
510 | 0xF0, "đ", # ISOlat2: LATIN SMALL LETTER D WITH STROKE | |
511 | 0xF1, "ņ", # ISOlat2: LATIN SMALL LETTER N WITH CEDILLA | |
512 | 0xF2, "ō", # ISOlat2: LATIN SMALL LETTER O WITH MACRON | |
513 | 0xF3, "ķ", # ISOlat2: LATIN SMALL LETTER K WITH CEDILLA | |
514 | 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER O WITH CIRCUMFLEX | |
515 | 0xF5, "õ", # ISOlat1: LATIN SMALL LETTER O WITH TILDE | |
516 | 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER O WITH DIAERESIS | |
517 | 0xF7, "÷", # ISOnum : DIVISION SIGN | |
518 | 0xF8, "ø", # ISOlat1: LATIN SMALL LETTER O WITH STROKE | |
519 | 0xF9, "ų", # ISOlat2: LATIN SMALL LETTER U WITH OGONEK | |
520 | 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER U WITH ACUTE | |
521 | 0xFB, "û", # ISOlat1: LATIN SMALL LETTER U WITH | |
522 | # CIRCUMFLEX | |
523 | 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER U WITH DIAERESIS | |
524 | 0xFD, "ũ", # ISOlat2: LATIN SMALL LETTER U WITH TILDE | |
525 | 0xFE, "ū", # ISOlat2: LATIN SMALL LETTER U WITH MACRON | |
526 | 0xFF, "˙", # ISOdia : DOT ABOVE | |
527 | ); | |
528 | *LATIN4_To_Ent = *ISO_8859_4_To_Ent; | |
529 | ||
530 | ##--------------------------------------------------------------------------- | |
531 | ## ISO-8859-5: Cyrillic | |
532 | ##--------------------------------------------------------------------------- | |
533 | ||
534 | %ISO_8859_5_To_Ent = ( | |
535 | #-------------------------------------------------------------------------- | |
536 | # Hex Code Entity Ref # ISO external entity and description | |
537 | #-------------------------------------------------------------------------- | |
538 | 0xA1, "Ё", # ISOcyr1: CYRILLIC CAPITAL LETTER IO | |
539 | 0xA2, "Ђ", # ISOcyr2: CYRILLIC CAPITAL LETTER DJE | |
540 | # (Serbocroatian) | |
541 | 0xA3, "Ѓ", # ISOcyr2: CYRILLIC CAPITAL LETTER GJE | |
542 | # (Macedonian) | |
543 | 0xA4, "Е", # ISOcyr1: CYRILLIC CAPITAL LETTER UKRAINIAN IE | |
544 | 0xA5, "Ѕ", # ISOcyr2: CYRILLIC CAPITAL LETTER DZE | |
545 | # (Macedonian) | |
546 | 0xA6, "І", # ISOcyr2: CYRILLIC CAPITAL LETTER | |
547 | # BYELORUSSIAN-UKRAINIAN I | |
548 | 0xA7, "Ї", # ISOcyr2: CYRILLIC CAPITAL LETTER YI | |
549 | # (Ukrainian) | |
550 | 0xA8, "Є", # ISOcyr2: CYRILLIC CAPITAL LETTER JE | |
551 | 0xA9, "Љ", # ISOcyr2: CYRILLIC CAPITAL LETTER LJE | |
552 | 0xAA, "Њ", # ISOcyr2: CYRILLIC CAPITAL LETTER NJE | |
553 | 0xAB, "Ћ", # ISOcyr2: CYRILLIC CAPITAL LETTER TSHE | |
554 | # (Serbocroatian) | |
555 | 0xAC, "Ќ", # ISOcyr2: CYRILLIC CAPITAL LETTER KJE | |
556 | # (Macedonian) | |
557 | 0xAD, "­", # ISOnum : SOFT HYPHEN | |
558 | 0xAE, "Ў", # ISOcyr2: CYRILLIC CAPITAL LETTER SHORT U | |
559 | # (Byelorussian) | |
560 | 0xAF, "Џ", # ISOcyr2: CYRILLIC CAPITAL LETTER DZHE | |
561 | 0xB0, "А", # ISOcyr1: CYRILLIC CAPITAL LETTER A | |
562 | 0xB1, "Б", # ISOcyr1: CYRILLIC CAPITAL LETTER BE | |
563 | 0xB2, "В", # ISOcyr1: CYRILLIC CAPITAL LETTER VE | |
564 | 0xB3, "Г", # ISOcyr1: CYRILLIC CAPITAL LETTER GHE | |
565 | 0xB4, "Д", # ISOcyr1: CYRILLIC CAPITAL LETTER DE | |
566 | 0xB5, "Е", # ISOcyr1: CYRILLIC CAPITAL LETTER IE | |
567 | 0xB6, "Ж", # ISOcyr1: CYRILLIC CAPITAL LETTER ZHE | |
568 | 0xB7, "З", # ISOcyr1: CYRILLIC CAPITAL LETTER ZE | |
569 | 0xB8, "И", # ISOcyr1: CYRILLIC CAPITAL LETTER I | |
570 | 0xB9, "Й", # ISOcyr1: CYRILLIC CAPITAL LETTER SHORT I | |
571 | 0xBA, "К", # ISOcyr1: CYRILLIC CAPITAL LETTER KA | |
572 | 0xBB, "Л", # ISOcyr1: CYRILLIC CAPITAL LETTER EL | |
573 | 0xBC, "М", # ISOcyr1: CYRILLIC CAPITAL LETTER EM | |
574 | 0xBD, "Н", # ISOcyr1: CYRILLIC CAPITAL LETTER EN | |
575 | 0xBE, "О", # ISOcyr1: CYRILLIC CAPITAL LETTER O | |
576 | 0xBF, "П", # ISOcyr1: CYRILLIC CAPITAL LETTER PE | |
577 | 0xC0, "Р", # ISOcyr1: CYRILLIC CAPITAL LETTER ER | |
578 | 0xC1, "С", # ISOcyr1: CYRILLIC CAPITAL LETTER ES | |
579 | 0xC2, "Т", # ISOcyr1: CYRILLIC CAPITAL LETTER TE | |
580 | 0xC3, "У", # ISOcyr1: CYRILLIC CAPITAL LETTER U | |
581 | 0xC4, "Ф", # ISOcyr1: CYRILLIC CAPITAL LETTER EF | |
582 | 0xC5, "Х", # ISOcyr1: CYRILLIC CAPITAL LETTER HA | |
583 | 0xC6, "Ц", # ISOcyr1: CYRILLIC CAPITAL LETTER TSE | |
584 | 0xC7, "Ч", # ISOcyr1: CYRILLIC CAPITAL LETTER CHE | |
585 | 0xC8, "Ш", # ISOcyr1: CYRILLIC CAPITAL LETTER SHA | |
586 | 0xC9, "Щ", # ISOcyr1: CYRILLIC CAPITAL LETTER SHCHA | |
587 | 0xCA, "Ъ", # ISOcyr1: CYRILLIC CAPITAL LETTER HARD SIGN | |
588 | 0xCB, "Ы", # ISOcyr1: CYRILLIC CAPITAL LETTER YERU | |
589 | 0xCC, "Ь", # ISOcyr1: CYRILLIC CAPITAL LETTER SOFT SIGN | |
590 | 0xCD, "Э", # ISOcyr1: CYRILLIC CAPITAL LETTER E | |
591 | 0xCE, "Ю", # ISOcyr1: CYRILLIC CAPITAL LETTER YU | |
592 | 0xCF, "Я", # ISOcyr1: CYRILLIC CAPITAL LETTER YA | |
593 | 0xD0, "а", # ISOcyr1: CYRILLIC SMALL LETTER A | |
594 | 0xD1, "б", # ISOcyr1: CYRILLIC SMALL LETTER BE | |
595 | 0xD2, "в", # ISOcyr1: CYRILLIC SMALL LETTER VE | |
596 | 0xD3, "г", # ISOcyr1: CYRILLIC SMALL LETTER GHE | |
597 | 0xD4, "д", # ISOcyr1: CYRILLIC SMALL LETTER DE | |
598 | 0xD5, "е", # ISOcyr1: CYRILLIC SMALL LETTER IE | |
599 | 0xD6, "ж", # ISOcyr1: CYRILLIC SMALL LETTER ZHE | |
600 | 0xD7, "з", # ISOcyr1: CYRILLIC SMALL LETTER ZE | |
601 | 0xD8, "и", # ISOcyr1: CYRILLIC SMALL LETTER I | |
602 | 0xD9, "й", # ISOcyr1: CYRILLIC SMALL LETTER SHORT I | |
603 | 0xDA, "к", # ISOcyr1: CYRILLIC SMALL LETTER KA | |
604 | 0xDB, "л", # ISOcyr1: CYRILLIC SMALL LETTER EL | |
605 | 0xDC, "м", # ISOcyr1: CYRILLIC SMALL LETTER EM | |
606 | 0xDD, "н", # ISOcyr1: CYRILLIC SMALL LETTER EN | |
607 | 0xDE, "о", # ISOcyr1: CYRILLIC SMALL LETTER O | |
608 | 0xDF, "п", # ISOcyr1: CYRILLIC SMALL LETTER PE | |
609 | 0xE0, "р", # ISOcyr1: CYRILLIC SMALL LETTER ER | |
610 | 0xE1, "с", # ISOcyr1: CYRILLIC SMALL LETTER ES | |
611 | 0xE2, "т", # ISOcyr1: CYRILLIC SMALL LETTER TE | |
612 | 0xE3, "у", # ISOcyr1: CYRILLIC SMALL LETTER U | |
613 | 0xE4, "ф", # ISOcyr1: CYRILLIC SMALL LETTER EF | |
614 | 0xE5, "х", # ISOcyr1: CYRILLIC SMALL LETTER HA | |
615 | 0xE6, "ц", # ISOcyr1: CYRILLIC SMALL LETTER TSE | |
616 | 0xE7, "ч", # ISOcyr1: CYRILLIC SMALL LETTER CHE | |
617 | 0xE8, "ш", # ISOcyr1: CYRILLIC SMALL LETTER SHA | |
618 | 0xE9, "щ", # ISOcyr1: CYRILLIC SMALL LETTER SHCHA | |
619 | 0xEA, "ъ", # ISOcyr1: CYRILLIC SMALL LETTER HARD SIGN | |
620 | 0xEB, "ы", # ISOcyr1: CYRILLIC SMALL LETTER YERU | |
621 | 0xEC, "ь", # ISOcyr1: CYRILLIC SMALL LETTER SOFT SIGN | |
622 | 0xED, "э", # ISOcyr1: CYRILLIC SMALL LETTER E | |
623 | 0xEE, "ю", # ISOcyr1: CYRILLIC SMALL LETTER YU | |
624 | 0xEF, "я", # ISOcyr1: CYRILLIC SMALL LETTER YA | |
625 | 0xF0, "№", # ISOcyr1: NUMERO SIGN | |
626 | 0xF1, "ё", # ISOcyr1: CYRILLIC SMALL LETTER IO | |
627 | 0xF2, "ђ", # ISOcyr2: CYRILLIC SMALL LETTER DJE | |
628 | # (Serbocroatian) | |
629 | 0xF3, "ѓ", # ISOcyr2: CYRILLIC SMALL LETTER GJE | |
630 | # (Macedonian) | |
631 | 0xF4, "е", # ISOcyr1: CYRILLIC SMALL LETTER UKRAINIAN IE | |
632 | 0xF5, "ѕ", # ISOcyr2: CYRILLIC SMALL LETTER DZE | |
633 | # (Macedonian) | |
634 | 0xF6, "і", # ISOcyr2: CYRILLIC SMALL LETTER | |
635 | # BYELORUSSIAN-UKRAINIAN I | |
636 | 0xF7, "ї", # ISOcyr2: CYRILLIC SMALL LETTER YI | |
637 | # (Ukrainian) | |
638 | 0xF8, "ј", # ISOcyr2: CYRILLIC SMALL LETTER JE | |
639 | 0xF9, "љ", # ISOcyr2: CYRILLIC SMALL LETTER LJE | |
640 | 0xFA, "њ", # ISOcyr2: CYRILLIC SMALL LETTER NJE | |
641 | 0xFB, "ћ", # ISOcyr2: CYRILLIC SMALL LETTER TSHE | |
642 | # (Serbocroatian) | |
643 | 0xFC, "ќ", # ISOcyr2: CYRILLIC SMALL LETTER KJE | |
644 | # (Macedonian) | |
645 | 0xFD, "§", # ISOnum : SECTION SIGN | |
646 | 0xFE, "ў", # ISOcyr2: CYRILLIC SMALL LETTER SHORT U | |
647 | # (Byelorussian) | |
648 | 0xFF, "џ", # ISOcyr2: CYRILLIC SMALL LETTER DZHE | |
649 | ); | |
650 | ||
651 | ##--------------------------------------------------------------------------- | |
652 | ## ISO-8859-6: Arabic | |
653 | ##--------------------------------------------------------------------------- | |
654 | ## Note: There is no ISO entities for arabic characters. Some of | |
655 | ## the following are non-standard entity references. "ISOarbc" | |
656 | ## is used as the entity defining the Arabic entities in | |
657 | ## anticipation that ISO will define such an entity. | |
658 | ||
659 | %ISO_8859_6_To_Ent = ( | |
660 | #-------------------------------------------------------------------------- | |
661 | # Hex Code Entity Ref # ISO external entity and description | |
662 | #-------------------------------------------------------------------------- | |
663 | 0xA4, "¤", # ISOnum : CURRENCY SIGN | |
664 | 0xAC, "&arcomma;", # ISOarbc: ARABIC COMMA | |
665 | 0xAD, "­", # ISOnum : SOFT HYPHEN | |
666 | 0xBB, "&arsemi;", # ISOarbc: ARABIC SEMICOLON | |
667 | 0xBF, "&arquest;", # ISOarbc: ARABIC QUESTION MARK | |
668 | 0xC1, "&hamz;", # ISOarbc: ARABIC LETTER HAMZA | |
669 | 0xC2, "&alefmadd;", # ISOarbc: ARABIC LETTER ALEF WITH MADDA ABOVE | |
670 | 0xC3, "&alefhamz;", # ISOarbc: ARABIC LETTER ALEF WITH HAMZA ABOVE | |
671 | 0xC4, "&wawhamz;", # ISOarbc: ARABIC LETTER WAW WITH HAMZA ABOVE | |
672 | 0xC5, "&alefhamz;", # ISOarbc: ARABIC LETTER ALEF WITH HAMZA BELOW | |
673 | 0xC6, "&yehhamz;", # ISOarbc: ARABIC LETTER YEH WITH HAMZA ABOVE | |
674 | 0xC7, "&alef;", # ISOarbc: ARABIC LETTER ALEF | |
675 | 0xC8, "&beh;", # ISOarbc: ARABIC LETTER BEH | |
676 | 0xC9, "&tehmarb;", # ISOarbc: ARABIC LETTER TEH MARBUTA | |
677 | 0xCA, "&teh;", # ISOarbc: ARABIC LETTER TEH | |
678 | 0xCB, "&theh;", # ISOarbc: ARABIC LETTER THEH | |
679 | 0xCC, "&jeem;", # ISOarbc: ARABIC LETTER JEEM | |
680 | 0xCD, "&hah;", # ISOarbc: ARABIC LETTER HAH | |
681 | 0xCE, "&khah;", # ISOarbc: ARABIC LETTER KHAH | |
682 | 0xCF, "&dal;", # ISOarbc: ARABIC LETTER DAL | |
683 | 0xD0, "&thal;", # ISOarbc: ARABIC LETTER THAL | |
684 | 0xD1, "&reh;", # ISOarbc: ARABIC LETTER REH | |
685 | 0xD2, "&zain;", # ISOarbc: ARABIC LETTER ZAIN | |
686 | 0xD3, "&seen;", # ISOarbc: ARABIC LETTER SEEN | |
687 | 0xD4, "&sheen;", # ISOarbc: ARABIC LETTER SHEEN | |
688 | 0xD5, "&sad;", # ISOarbc: ARABIC LETTER SAD | |
689 | 0xD6, "&dad;", # ISOarbc: ARABIC LETTER DAD | |
690 | 0xD7, "&tah;", # ISOarbc: ARABIC LETTER TAH | |
691 | 0xD8, "&zah;", # ISOarbc: ARABIC LETTER ZAH | |
692 | 0xD9, "&ain;", # ISOarbc: ARABIC LETTER AIN | |
693 | 0xDA, "&ghain;", # ISOarbc: ARABIC LETTER GHAIN | |
694 | 0xE0, "&tatweel;", # ISOarbc: ARABIC TATWEEL | |
695 | 0xE1, "&feh;", # ISOarbc: ARABIC LETTER FEH | |
696 | 0xE2, "&qaf;", # ISOarbc: ARABIC LETTER QAF | |
697 | 0xE3, "&kaf;", # ISOarbc: ARABIC LETTER KAF | |
698 | 0xE4, "&lam;", # ISOarbc: ARABIC LETTER LAM | |
699 | 0xE5, "&meem;", # ISOarbc: ARABIC LETTER MEEM | |
700 | 0xE6, "&noon;", # ISOarbc: ARABIC LETTER NOON | |
701 | 0xE7, "&heh;", # ISOarbc: ARABIC LETTER HEH | |
702 | 0xE8, "&waw;", # ISOarbc: ARABIC LETTER WAW | |
703 | 0xE9, "&alefmaks;", # ISOarbc: ARABIC LETTER ALEF MAKSURA | |
704 | 0xEA, "&yeh;", # ISOarbc: ARABIC LETTER YEH | |
705 | 0xEB, "&fathatan;", # ISOarbc: ARABIC FATHATAN | |
706 | 0xEC, "&dammatan;", # ISOarbc: ARABIC DAMMATAN | |
707 | 0xED, "&kasratan;", # ISOarbc: ARABIC KASRATAN | |
708 | 0xEE, "&fatha;", # ISOarbc: ARABIC FATHA | |
709 | 0xEF, "&damma;", # ISOarbc: ARABIC DAMMA | |
710 | 0xF0, "&kasra;", # ISOarbc: ARABIC KASRA | |
711 | 0xF1, "&shadda;", # ISOarbc: ARABIC SHADDA | |
712 | 0xF2, "&sukun;", # ISOarbc: ARABIC SUKUN | |
713 | ); | |
714 | ||
715 | ##--------------------------------------------------------------------------- | |
716 | ## ISO-8859-7: Greek | |
717 | ##--------------------------------------------------------------------------- | |
718 | ||
719 | %ISO_8859_7_To_Ent = ( | |
720 | #-------------------------------------------------------------------------- | |
721 | # Hex Code Entity Ref # ISO external entity and description | |
722 | #-------------------------------------------------------------------------- | |
723 | 0xA1, "‘", # ISOnum : SINGLE HIGH-REVERSED-9 QUOTATION | |
724 | # MARK | |
725 | 0xA2, "’", # ISOnum : RIGHT SINGLE QUOTATION MARK | |
726 | 0xA3, "£", # ISOnum : POUND SIGN | |
727 | 0xA6, "¦", # ISOnum : BROKEN BAR | |
728 | 0xA7, "§", # ISOnum : SECTION SIGN | |
729 | 0xA8, "¨", # ISOdia : DIAERESIS | |
730 | 0xA9, "©", # ISOnum : COPYRIGHT SIGN | |
731 | 0xAB, "«", # ISOnum : LEFT-POINTING DOUBLE ANGLE | |
732 | # QUOTATION MARK | |
733 | 0xAC, "¬", # ISOnum : NOT SIGN | |
734 | 0xAD, "­", # ISOnum : SOFT HYPHEN | |
735 | 0xAF, "—", # ISOpub : EM DASH | |
736 | 0xB0, "°", # ISOnum : DEGREE SIGN | |
737 | 0xB1, "±", # ISOnum : PLUS-MINUS SIGN | |
738 | 0xB2, "²", # ISOnum : SUPERSCRIPT TWO | |
739 | 0xB3, "³", # ISOnum : SUPERSCRIPT THREE | |
740 | 0xB4, "´", # ISOdia : ACUTE ACCENT | |
741 | 0xB5, "&diagr;", # ISOgrk?: ACUTE ACCENT AND DIAERESIS | |
742 | # (Tonos and Dialytika) | |
743 | 0xB6, "&Aacgr;", # ISOgrk2: GREEK CAPITAL LETTER ALPHA WITH | |
744 | # ACUTE | |
745 | 0xB7, "·", # ISOnum : MIDDLE DOT | |
746 | 0xB8, "&Eacgr;", # ISOgrk2: GREEK CAPITAL LETTER EPSILON WITH | |
747 | # ACUTE | |
748 | 0xB9, "&EEacgr;", # ISOgrk2: GREEK CAPITAL LETTER ETA WITH ACUTE | |
749 | 0xBA, "&Iacgr;", # ISOgrk2: GREEK CAPITAL LETTER IOTA WITH ACUTE | |
750 | 0xBB, "»", # ISOnum : RIGHT-POINTING DOUBLE ANGLE | |
751 | # QUOTATION MARK | |
752 | 0xBC, "&Oacgr;", # ISOgrk2: GREEK CAPITAL LETTER OMICRON WITH | |
753 | # ACUTE | |
754 | 0xBD, "½", # ISOnum : VULGAR FRACTION ONE HALF | |
755 | 0xBE, "&Uacgr;", # ISOgrk2: GREEK CAPITAL LETTER UPSILON WITH | |
756 | # ACUTE | |
757 | 0xBF, "&OHacgr;", # ISOgrk2: GREEK CAPITAL LETTER OMEGA WITH | |
758 | # ACUTE | |
759 | 0xC0, "&idiagr;", # ISOgrk2: GREEK SMALL LETTER IOTA WITH ACUTE | |
760 | # AND DIAERESIS | |
761 | 0xC1, "&Agr;", # ISOgrk1: GREEK CAPITAL LETTER ALPHA | |
762 | 0xC2, "&Bgr;", # ISOgrk1: GREEK CAPITAL LETTER BETA | |
763 | 0xC3, "&Ggr;", # ISOgrk1: GREEK CAPITAL LETTER GAMMA | |
764 | 0xC4, "&Dgr;", # ISOgrk1: GREEK CAPITAL LETTER DELTA | |
765 | 0xC5, "&Egr;", # ISOgrk1: GREEK CAPITAL LETTER EPSILON | |
766 | 0xC6, "&Zgr;", # ISOgrk1: GREEK CAPITAL LETTER ZETA | |
767 | 0xC7, "&EEgr;", # ISOgrk1: GREEK CAPITAL LETTER ETA | |
768 | 0xC8, "&THgr;", # ISOgrk1: GREEK CAPITAL LETTER THETA | |
769 | 0xC9, "&Igr;", # ISOgrk1: GREEK CAPITAL LETTER IOTA | |
770 | 0xCA, "&Kgr;", # ISOgrk1: GREEK CAPITAL LETTER KAPPA | |
771 | 0xCB, "&Lgr;", # ISOgrk1: GREEK CAPITAL LETTER LAMDA | |
772 | 0xCC, "&Mgr;", # ISOgrk1: GREEK CAPITAL LETTER MU | |
773 | 0xCD, "&Ngr;", # ISOgrk1: GREEK CAPITAL LETTER NU | |
774 | 0xCE, "&Xgr;", # ISOgrk1: GREEK CAPITAL LETTER XI | |
775 | 0xCF, "&Ogr;", # ISOgrk1: GREEK CAPITAL LETTER OMICRON | |
776 | 0xD0, "&Pgr;", # ISOgrk1: GREEK CAPITAL LETTER PI | |
777 | 0xD1, "&Rgr;", # ISOgrk1: GREEK CAPITAL LETTER RHO | |
778 | 0xD3, "&Sgr;", # ISOgrk1: GREEK CAPITAL LETTER SIGMA | |
779 | 0xD4, "&Tgr;", # ISOgrk1: GREEK CAPITAL LETTER TAU | |
780 | 0xD5, "&Ugr;", # ISOgrk1: GREEK CAPITAL LETTER UPSILON | |
781 | 0xD6, "&PHgr;", # ISOgrk1: GREEK CAPITAL LETTER PHI | |
782 | 0xD7, "&KHgr;", # ISOgrk1: GREEK CAPITAL LETTER CHI | |
783 | 0xD8, "&PSgr;", # ISOgrk1: GREEK CAPITAL LETTER PSI | |
784 | 0xD9, "&OHgr;", # ISOgrk1: GREEK CAPITAL LETTER OMEGA | |
785 | 0xDA, "&Idigr;", # ISOgrk2: GREEK CAPITAL LETTER IOTA WITH | |
786 | # DIAERESIS | |
787 | 0xDB, "&Udigr;", # ISOgrk2: GREEK CAPITAL LETTER UPSILON WITH | |
788 | # DIAERESIS | |
789 | 0xDC, "&aacgr;", # ISOgrk2: GREEK SMALL LETTER ALPHA WITH ACUTE | |
790 | 0xDD, "&eacgr;", # ISOgrk2: GREEK SMALL LETTER EPSILON WITH | |
791 | # ACUTE | |
792 | 0xDE, "&eeacgr;", # ISOgrk2: GREEK SMALL LETTER ETA WITH ACUTE | |
793 | 0xDF, "&iacgr;", # ISOgrk2: GREEK SMALL LETTER IOTA WITH ACUTE | |
794 | 0xE0, "&udiagr;", # ISOgrk2: GREEK SMALL LETTER UPSILON WITH | |
795 | # ACUTE AND DIAERESIS | |
796 | 0xE1, "&agr;", # ISOgrk1: GREEK SMALL LETTER ALPHA | |
797 | 0xE2, "&bgr;", # ISOgrk1: GREEK SMALL LETTER BETA | |
798 | 0xE3, "&ggr;", # ISOgrk1: GREEK SMALL LETTER GAMMA | |
799 | 0xE4, "&dgr;", # ISOgrk1: GREEK SMALL LETTER DELTA | |
800 | 0xE5, "&egr;", # ISOgrk1: GREEK SMALL LETTER EPSILON | |
801 | 0xE6, "&zgr;", # ISOgrk1: GREEK SMALL LETTER ZETA | |
802 | 0xE7, "&eegr;", # ISOgrk1: GREEK SMALL LETTER ETA | |
803 | 0xE8, "&thgr;", # ISOgrk1: GREEK SMALL LETTER THETA | |
804 | 0xE9, "&igr;", # ISOgrk1: GREEK SMALL LETTER IOTA | |
805 | 0xEA, "&kgr;", # ISOgrk1: GREEK SMALL LETTER KAPPA | |
806 | 0xEB, "&lgr;", # ISOgrk1: GREEK SMALL LETTER LAMDA | |
807 | 0xEC, "&mgr;", # ISOgrk1: GREEK SMALL LETTER MU | |
808 | 0xED, "&ngr;", # ISOgrk1: GREEK SMALL LETTER NU | |
809 | 0xEE, "&xgr;", # ISOgrk1: GREEK SMALL LETTER XI | |
810 | 0xEF, "&ogr;", # ISOgrk1: GREEK SMALL LETTER OMICRON | |
811 | 0xF0, "&pgr;", # ISOgrk1: GREEK SMALL LETTER PI | |
812 | 0xF1, "&rgr;", # ISOgrk1: GREEK SMALL LETTER RHO | |
813 | 0xF2, "&sfgr;", # ISOgrk1: GREEK SMALL LETTER FINAL SIGMA | |
814 | 0xF3, "&sgr;", # ISOgrk1: GREEK SMALL LETTER SIGMA | |
815 | 0xF4, "&tgr;", # ISOgrk1: GREEK SMALL LETTER TAU | |
816 | 0xF5, "&ugr;", # ISOgrk1: GREEK SMALL LETTER UPSILON | |
817 | 0xF6, "&phgr;", # ISOgrk1: GREEK SMALL LETTER PHI | |
818 | 0xF7, "&khgr;", # ISOgrk1: GREEK SMALL LETTER CHI | |
819 | 0xF8, "&psgr;", # ISOgrk1: GREEK SMALL LETTER PSI | |
820 | 0xF9, "&ohgr;", # ISOgrk1: GREEK SMALL LETTER OMEGA | |
821 | 0xFA, "&idigr;", # ISOgrk2: GREEK SMALL LETTER IOTA WITH | |
822 | # DIAERESIS | |
823 | 0xFB, "&udigr;", # ISOgrk2: GREEK SMALL LETTER UPSILON WITH | |
824 | # DIAERESIS | |
825 | 0xFC, "&oacgr;", # ISOgrk2: GREEK SMALL LETTER OMICRON WITH | |
826 | # ACUTE | |
827 | 0xFD, "&uacgr;", # ISOgrk2: GREEK SMALL LETTER UPSILON WITH | |
828 | # ACUTE | |
829 | 0xFE, "&ohacgr;", # ISOgrk2: GREEK SMALL LETTER OMEGA WITH ACUTE | |
830 | ); | |
831 | ||
832 | ##--------------------------------------------------------------------------- | |
833 | ## ISO-8859-8: Hebrew | |
834 | ##--------------------------------------------------------------------------- | |
835 | ## Note: There is no ISO entities for hebrew characters. ISOamso | |
836 | ## defines a few characters, but they are for math purposes. | |
837 | ## Some of the following are non-standard entity references. | |
838 | ## "ISOhbrw" is used as the entity defining the Hebrew entities | |
839 | ## in anticipation that ISO will define such an entity. | |
840 | ||
841 | %ISO_8859_8_To_Ent = ( | |
842 | #-------------------------------------------------------------------------- | |
843 | # Hex Code Entity Ref # ISO external entity and description | |
844 | #-------------------------------------------------------------------------- | |
845 | 0xA2, "¢", # ISOnum : CENT SIGN | |
846 | 0xA3, "£", # ISOnum : POUND SIGN | |
847 | 0xA4, "¤", # ISOnum : CURRENCY SIGN | |
848 | 0xA5, "¥", # ISOnum : YEN SIGN | |
849 | 0xA6, "¦", # ISOnum : BROKEN BAR | |
850 | 0xA7, "§", # ISOnum : SECTION SIGN | |
851 | 0xA8, "¨", # ISOdia : DIAERESIS | |
852 | 0xA9, "©", # ISOnum : COPYRIGHT SIGN | |
853 | 0xAA, "×", # ISOnum : MULTIPLICATION SIGN | |
854 | 0xAB, "«", # ISOnum : LEFT-POINTING DOUBLE ANGLE | |
855 | # QUOTATION MARK | |
856 | 0xAC, "¬", # ISOnum : NOT SIGN | |
857 | 0xAD, "­", # ISOnum : SOFT HYPHEN | |
858 | 0xAE, "®", # ISOnum : REGISTERED SIGN | |
859 | 0xAF, "¯", # ISOdia : OVERLINE (MACRON) | |
860 | 0xB0, "°", # ISOnum : DEGREE SIGN | |
861 | 0xB1, "±", # ISOnum : PLUS-MINUS SIGN | |
862 | 0xB2, "²", # ISOnum : SUPERSCRIPT TWO | |
863 | 0xB3, "³", # ISOnum : SUPERSCRIPT THREE | |
864 | 0xB4, "´", # ISOdia : ACUTE ACCENT | |
865 | 0xB5, "µ", # ISOnum : MICRO SIGN | |
866 | 0xB6, "¶", # ISOnum : PILCROW SIGN | |
867 | 0xB7, "·", # ISOnum : MIDDLE DOT | |
868 | 0xB8, "¸", # ISOdia : CEDILLA | |
869 | 0xB9, "¹", # ISOnum : SUPERSCRIPT ONE | |
870 | 0xBA, "÷", # ISOlat1: DIVISION SIGN | |
871 | 0xBB, "»", # ISOnum : RIGHT-POINTING DOUBLE ANGLE | |
872 | # QUOTATION MARK | |
873 | 0xBC, "¼", # ISOnum : VULGAR FRACTION ONE QUARTER | |
874 | 0xBD, "½", # ISOnum : VULGAR FRACTION ONE HALF | |
875 | 0xBE, "¾", # ISOnum : VULGAR FRACTION THREE QUARTERS | |
876 | 0xDF, "&dlowbar;", # ISOnum?: DOUBLE LOW LINE | |
877 | 0xE0, "&alef;", # ISOhbrw: HEBREW LETTER ALEF | |
878 | 0xE1, "&bet;", # ISOhbrw: HEBREW LETTER BET | |
879 | 0xE2, "ℷ", # ISOhbrw: HEBREW LETTER GIMEL | |
880 | 0xE3, "&dalet;", # ISOhbrw: HEBREW LETTER DALET | |
881 | 0xE4, "&he;", # ISOhbrw: HEBREW LETTER HE | |
882 | 0xE5, "&vav;", # ISOhbrw: HEBREW LETTER VAV | |
883 | 0xE6, "&zayin;", # ISOhbrw: HEBREW LETTER ZAYIN | |
884 | 0xE7, "&het;", # ISOhbrw: HEBREW LETTER HET | |
885 | 0xE8, "&tet;", # ISOhbrw: HEBREW LETTER TET | |
886 | 0xE9, "&yod;", # ISOhbrw: HEBREW LETTER YOD | |
887 | 0xEA, "&fkaf;", # ISOhbrw: HEBREW LETTER FINAL KAF | |
888 | 0xEB, "&kaf;", # ISOhbrw: HEBREW LETTER KAF | |
889 | 0xEC, "&lamed;", # ISOhbrw: HEBREW LETTER LAMED | |
890 | 0xED, "&fmem;", # ISOhbrw: HEBREW LETTER FINAL MEM | |
891 | 0xEE, "&mem;", # ISOhbrw: HEBREW LETTER MEM | |
892 | 0xEF, "&fnun;", # ISOhbrw: HEBREW LETTER FINAL NUN | |
893 | 0xF0, "&nun;", # ISOhbrw: HEBREW LETTER NUN | |
894 | 0xF1, "&samekh;", # ISOhbrw: HEBREW LETTER SAMEKH | |
895 | 0xF2, "&ayin;", # ISOhbrw: HEBREW LETTER AYIN | |
896 | 0xF3, "&fpe;", # ISOhbrw: HEBREW LETTER FINAL PE | |
897 | 0xF4, "&pe;", # ISOhbrw: HEBREW LETTER PE | |
898 | 0xF5, "&ftsadi;", # ISOhbrw: HEBREW LETTER FINAL TSADI | |
899 | 0xF6, "&tsadi;", # ISOhbrw: HEBREW LETTER TSADI | |
900 | 0xF7, "&qof;", # ISOhbrw: HEBREW LETTER QOF | |
901 | 0xF8, "&resh;", # ISOhbrw: HEBREW LETTER RESH | |
902 | 0xF9, "&shin;", # ISOhbrw: HEBREW LETTER SHIN | |
903 | 0xFA, "&tav;", # ISOhbrw: HEBREW LETTER TAV | |
904 | ); | |
905 | ||
906 | ##--------------------------------------------------------------------------- | |
907 | ## ISO-8859-9: Latin-5 | |
908 | ##--------------------------------------------------------------------------- | |
909 | ||
910 | %ISO_8859_9_To_Ent = ( | |
911 | #-------------------------------------------------------------------------- | |
912 | # Hex Code Entity Ref # ISO external entity and description | |
913 | #-------------------------------------------------------------------------- | |
914 | 0xA1, "¡", # ISOnum : INVERTED EXCLAMATION MARK | |
915 | 0xA2, "¢", # ISOnum : CENT SIGN | |
916 | 0xA3, "£", # ISOnum : POUND SIGN | |
917 | 0xA4, "¤", # ISOnum : CURRENCY SIGN | |
918 | 0xA5, "¥", # ISOnum : YEN SIGN | |
919 | 0xA6, "¦", # ISOnum : BROKEN BAR | |
920 | 0xA7, "§", # ISOnum : SECTION SIGN | |
921 | 0xA8, "¨", # ISOdia : DIAERESIS | |
922 | 0xA9, "©", # ISOnum : COPYRIGHT SIGN | |
923 | 0xAA, "ª", # ISOnum : FEMININE ORDINAL INDICATOR | |
924 | 0xAB, "«", # ISOnum : LEFT-POINTING DOUBLE ANGLE | |
925 | # QUOTATION MARK | |
926 | 0xAC, "¬", # ISOnum : NOT SIGN | |
927 | 0xAD, "­", # ISOnum : SOFT HYPHEN | |
928 | 0xAE, "®", # ISOnum : REGISTERED SIGN | |
929 | 0xAF, "¯", # ISOdia : OVERLINE (MACRON) | |
930 | 0xB0, "°", # ISOnum : DEGREE SIGN | |
931 | 0xB1, "±", # ISOnum : PLUS-MINUS SIGN | |
932 | 0xB2, "²", # ISOnum : SUPERSCRIPT TWO | |
933 | 0xB3, "³", # ISOnum : SUPERSCRIPT THREE | |
934 | 0xB4, "´", # ISOdia : ACUTE ACCENT | |
935 | 0xB5, "µ", # ISOnum : MICRO SIGN | |
936 | 0xB6, "¶", # ISOnum : PILCROW SIGN | |
937 | 0xB7, "·", # ISOnum : MIDDLE DOT | |
938 | 0xB8, "¸", # ISOdia : CEDILLA | |
939 | 0xB9, "¹", # ISOnum : SUPERSCRIPT ONE | |
940 | 0xBA, "º", # ISOnum : MASCULINE ORDINAL INDICATOR | |
941 | 0xBB, "»", # ISOnum : RIGHT-POINTING DOUBLE ANGLE | |
942 | # QUOTATION MARK | |
943 | 0xBC, "¼", # ISOnum : VULGAR FRACTION ONE QUARTER | |
944 | 0xBD, "½", # ISOnum : VULGAR FRACTION ONE HALF | |
945 | 0xBE, "¾", # ISOnum : VULGAR FRACTION THREE QUARTERS | |
946 | 0xBF, "¿", # ISOnum : INVERTED QUESTION MARK | |
947 | 0xC0, "À", # ISOlat1: LATIN CAPITAL LETTER A WITH GRAVE | |
948 | 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE | |
949 | 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH | |
950 | # CIRCUMFLEX | |
951 | 0xC3, "Ã", # ISOlat1: LATIN CAPITAL LETTER A WITH TILDE | |
952 | 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH | |
953 | # DIAERESIS | |
954 | 0xC5, "Å", # ISOlat1: LATIN CAPITAL LETTER A WITH RING | |
955 | # ABOVE | |
956 | 0xC6, "Æ", # ISOlat1: LATIN CAPITAL LETTER AE | |
957 | 0xC7, "Ç", # ISOlat1: LATIN CAPITAL LETTER C WITH CEDILLA | |
958 | 0xC8, "È", # ISOlat1: LATIN CAPITAL LETTER E WITH GRAVE | |
959 | 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE | |
960 | 0xCA, "Ê", # ISOlat1: LATIN CAPITAL LETTER E WITH | |
961 | # CIRCUMFLEX | |
962 | 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH | |
963 | # DIAERESIS | |
964 | 0xCC, "Ì", # ISOlat1: LATIN CAPITAL LETTER I WITH GRAVE | |
965 | 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE | |
966 | 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH | |
967 | # CIRCUMFLEX | |
968 | 0xCF, "Ï", # ISOlat1: LATIN CAPITAL LETTER I WITH | |
969 | # DIAERESIS | |
970 | 0xD0, "Ğ", # ISOlat2: LATIN CAPITAL LETTER G WITH BREVE | |
971 | 0xD1, "Ñ", # ISOlat1: LATIN CAPITAL LETTER N WITH TILDE | |
972 | 0xD2, "Ò", # ISOlat1: LATIN CAPITAL LETTER O WITH GRAVE | |
973 | 0xD3, "Ó", # ISOlat1: LATIN CAPITAL LETTER O WITH ACUTE | |
974 | 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH | |
975 | # CIRCUMFLEX | |
976 | 0xD5, "Õ", # ISOlat1: LATIN CAPITAL LETTER O WITH TILDE | |
977 | 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH | |
978 | # DIAERESIS | |
979 | 0xD7, "×", # ISOnum : MULTIPLICATION SIGN | |
980 | 0xD8, "Ø", # ISOlat1: LATIN CAPITAL LETTER O WITH STROKE | |
981 | 0xD9, "Ù", # ISOlat1: LATIN CAPITAL LETTER U WITH GRAVE | |
982 | 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE | |
983 | 0xDB, "Û", # ISOlat1: LATIN CAPITAL LETTER U WITH | |
984 | # CIRCUMFLEX | |
985 | 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH | |
986 | # DIAERESIS | |
987 | 0xDD, "İ", # ISOlat2: LATIN CAPITAL LETTER I WITH DOT | |
988 | # ABOVE | |
989 | 0xDE, "Ş", # ISOlat2: LATIN CAPITAL LETTER S WITH CEDILLA | |
990 | 0xDF, "ß", # ISOlat1: LATIN SMALL LETTER SHARP S (German) | |
991 | 0xE0, "à", # ISOlat1: LATIN SMALL LETTER A WITH GRAVE | |
992 | 0xE1, "á", # ISOlat1: LATIN SMALL LETTER A WITH ACUTE | |
993 | 0xE2, "â", # ISOlat1: LATIN SMALL LETTER A WITH CIRCUMFLEX | |
994 | 0xE3, "ã", # ISOlat1: LATIN SMALL LETTER A WITH TILDE | |
995 | 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER A WITH DIAERESIS | |
996 | 0xE5, "å", # ISOlat1: LATIN SMALL LETTER A WITH RING ABOVE | |
997 | 0xE6, "æ", # ISOlat1: LATIN SMALL LETTER AE | |
998 | 0xE7, "ç", # ISOlat1: LATIN SMALL LETTER C WITH CEDILLA | |
999 | 0xE8, "è", # ISOlat1: LATIN SMALL LETTER E WITH GRAVE | |
1000 | 0xE9, "é", # ISOlat1: LATIN SMALL LETTER E WITH ACUTE | |
1001 | 0xEA, "ę", # ISOlat2: LATIN SMALL LETTER E WITH OGONEK | |
1002 | 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER E WITH DIAERESIS | |
1003 | 0xEC, "ė", # ISOlat2: LATIN SMALL LETTER E WITH DOT ABOVE | |
1004 | 0xED, "í", # ISOlat1: LATIN SMALL LETTER I WITH ACUTE | |
1005 | 0xEE, "î", # ISOlat1: LATIN SMALL LETTER I WITH CIRCUMFLEX | |
1006 | 0xEF, "ī", # ISOlat2: LATIN SMALL LETTER I WITH MACRON | |
1007 | 0xF0, "ğ", # ISOlat2: LATIN SMALL LETTER G WITH BREVE | |
1008 | 0xF1, "ñ", # ISOlat1: LATIN SMALL LETTER N WITH TILDE | |
1009 | 0xF2, "ò", # ISOlat1: LATIN SMALL LETTER O WITH GRAVE | |
1010 | 0xF3, "ó", # ISOlat1: LATIN SMALL LETTER O WITH ACUTE | |
1011 | 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER O WITH CIRCUMFLEX | |
1012 | 0xF5, "õ", # ISOlat1: LATIN SMALL LETTER O WITH TILDE | |
1013 | 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER O WITH DIAERESIS | |
1014 | 0xF7, "÷", # ISOlat1: DIVISION SIGN | |
1015 | 0xF8, "ø", # ISOlat1: LATIN SMALL LETTER O WITH STROKE | |
1016 | 0xF9, "ù", # ISOlat1: LATIN SMALL LETTER U WITH GRAVE | |
1017 | 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER U WITH ACUTE | |
1018 | 0xFB, "û", # ISOlat1: LATIN SMALL LETTER U WITH CIRCUMFLEX | |
1019 | 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER U WITH DIAERESIS | |
1020 | 0xFD, "ı", # ISOlat2: LATIN SMALL LETTER I DOTLESS | |
1021 | 0xFE, "ş", # ISOlat2: LATIN SMALL LETTER S WITH CEDILLA | |
1022 | 0xFF, "ÿ", # ISOlat1: LATIN SMALL LETTER Y WITH DIAERESIS | |
1023 | ); | |
1024 | *LATIN5_To_Ent = *ISO_8859_9_To_Ent; | |
1025 | ||
1026 | ##--------------------------------------------------------------------------- | |
1027 | ## ISO-8859-10: Latin-6 | |
1028 | ##--------------------------------------------------------------------------- | |
1029 | ||
1030 | %ISO_8859_10_To_Ent = ( | |
1031 | #-------------------------------------------------------------------------- | |
1032 | # Hex Code Entity Ref # ISO external entity and description | |
1033 | #-------------------------------------------------------------------------- | |
1034 | 0xA1, "Ą", # ISOlat1: LATIN CAPITAL LETTER A WITH OGONEK | |
1035 | 0xA2, "Ē", # ISOlat2: LATIN CAPITAL LETTER E WITH MACRON | |
1036 | 0xA3, "Ģ", # ISOlat2: LATIN CAPITAL LETTER G WITH CEDILLA | |
1037 | 0xA4, "Ī", # ISOlat2: LATIN CAPITAL LETTER I WITH MACRON | |
1038 | 0xA5, "Ĩ", # ISOlat2: LATIN CAPITAL LETTER I WITH TILDE | |
1039 | 0xA6, "Ķ", # ISOlat2: LATIN CAPITAL LETTER K WITH CEDILLA | |
1040 | 0xA7, "Ļ", # ISOlat2: LATIN CAPITAL LETTER L WITH CEDILLA | |
1041 | 0xA8, "Ń", # ISOlat2: LATIN CAPITAL LETTER N WITH ACUTE | |
1042 | 0xA9, "Ŗ", # ISOlat2: LATIN CAPITAL LETTER R WITH CEDILLA | |
1043 | 0xAA, "Š", # ISOlat2: LATIN CAPITAL LETTER S WITH CARON | |
1044 | 0xAB, "Ŧ", # ISOlat2: LATIN CAPITAL LETTER T WITH STROKE | |
1045 | 0xAC, "Ž", # ISOlat2: LATIN CAPITAL LETTER Z WITH CARON | |
1046 | 0xAD, "­", # ISOnum : SOFT HYPHEN | |
1047 | 0xAE, "ĸ", # ISOlat2: LATIN SMALL LETTER KRA (Greenlandic) | |
1048 | 0xAF, "&end;", # ISOlat?: LATIN SMALL LETTER END (Lappish) | |
1049 | 0xB0, "đ", # ISOlat2: LATIN SMALL LETTER d WITH STROKE | |
1050 | 0xB1, "ą", # ISOlat2: LATIN SMALL LETTER a WITH OGONEK | |
1051 | 0xB2, "ē", # ISOlat2: LATIN SMALL LETTER e WITH MACRON | |
1052 | 0xB3, "&gcedil;", # ISOlat2: LATIN SMALL LETTER g WITH CEDILLA | |
1053 | 0xB4, "ī", # ISOlat2: LATIN SMALL LETTER i WITH MACRON | |
1054 | 0xB5, "ĩ", # ISOlat2: LATIN SMALL LETTER i WITH TILDE | |
1055 | 0xB6, "ķ", # ISOlat2: LATIN SMALL LETTER k WITH CEDILLA | |
1056 | 0xB7, "ļ", # ISOlat2: LATIN SMALL LETTER l WITH CEDILLA | |
1057 | 0xB8, "ń", # ISOlat2: LATIN SMALL LETTER n WITH ACUTE | |
1058 | 0xB9, "ŗ", # ISOlat2: LATIN SMALL LETTER r WITH CEDILLA | |
1059 | 0xBA, "š", # ISOlat2: LATIN SMALL LETTER s WITH CARON | |
1060 | 0xBB, "ŧ", # ISOlat2: LATIN SMALL LETTER t WITH STROKE | |
1061 | 0xBC, "ž", # ISOlat2: LATIN SMALL LETTER z WITH CARON | |
1062 | 0xBD, "§", # ISOnum : SECTION SIGN | |
1063 | 0xBE, "ß", # ISOlat1: LATIN SMALL LETTER SHARP s (German) | |
1064 | 0xBF, "ŋ", # ISOlat2: LATIN SMALL LETTER ENG (Lappish) | |
1065 | 0xC0, "Ā", # ISOlat2: LATIN CAPITAL LETTER A WITH MACRON | |
1066 | 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE | |
1067 | 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH | |
1068 | # CIRCUMFLEX | |
1069 | 0xC3, "Ã", # ISOlat1: LATIN CAPITAL LETTER A WITH TILDE | |
1070 | 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH | |
1071 | # DIAERESIS | |
1072 | 0xC5, "Å", # ISOlat1: LATIN CAPITAL LETTER A WITH RING | |
1073 | # ABOVE | |
1074 | 0xC6, "Æ", # ISOlat1: LATIN CAPITAL LETTER AE | |
1075 | 0xC7, "Į", # ISOlat2: LATIN CAPITAL LETTER I WITH OGONEK | |
1076 | 0xC8, "Č", # ISOlat2: LATIN CAPITAL LETTER C WITH CARON | |
1077 | 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE | |
1078 | 0xCA, "Ę", # ISOlat2: LATIN CAPITAL LETTER E WITH OGONEK | |
1079 | 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH | |
1080 | # DIAERESIS | |
1081 | 0xCC, "Ė", # ISOlat2: LATIN CAPITAL LETTER E WITH | |
1082 | # DOT ABOVE | |
1083 | 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE | |
1084 | 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH | |
1085 | # CIRCUMFLEX | |
1086 | 0xCF, "Ï", # ISOlat1: LATIN CAPITAL LETTER I WITH | |
1087 | # DIAERESIS | |
1088 | 0xD0, "Đ", # ISOlat2: LATIN CAPITAL LETTER D WITH STROKE | |
1089 | 0xD1, "Ņ", # ISOlat2: LATIN CAPITAL LETTER N WITH CEDILLA | |
1090 | 0xD2, "Ō", # ISOlat2: LATIN CAPITAL LETTER O WITH MACRON | |
1091 | 0xD3, "Ó", # ISOlat1: LATIN CAPITAL LETTER O WITH ACUTE | |
1092 | 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH | |
1093 | # CIRCUMFLEX | |
1094 | 0xD5, "Õ", # ISOlat1: LATIN CAPITAL LETTER O WITH TILDE | |
1095 | 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH | |
1096 | # DIAERESIS | |
1097 | 0xD7, "Ũ", # ISOlat2: LATIN CAPITAL LETTER U WITH TILDE | |
1098 | 0xD8, "Ø", # ISOlat1: LATIN CAPITAL LETTER O WITH STROKE | |
1099 | 0xD9, "Ų", # ISOlat2: LATIN CAPITAL LETTER U WITH OGONEK | |
1100 | 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE | |
1101 | 0xDB, "Û", # ISOlat1: LATIN CAPITAL LETTER U WITH | |
1102 | # CIRCUMFLEX | |
1103 | 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH | |
1104 | # DIAERESIS | |
1105 | 0xDD, "Ý", # ISOlat1: LATIN CAPITAL LETTER Y WITH ACUTE | |
1106 | 0xDE, "Þ", # ISOlat1: LATIN CAPITAL LETTER THORN | |
1107 | # (Icelandic) | |
1108 | 0xDF, "Ū", # ISOlat2: LATIN CAPITAL LETTER U WITH MACRON | |
1109 | 0xE0, "ā", # ISOlat2: LATIN SMALL LETTER a WITH MACRON | |
1110 | 0xE1, "á", # ISOlat1: LATIN SMALL LETTER a WITH ACUTE | |
1111 | 0xE2, "â", # ISOlat1: LATIN SMALL LETTER a WITH CIRCUMFLEX | |
1112 | 0xE3, "ã", # ISOlat1: LATIN SMALL LETTER a WITH TILDE | |
1113 | 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER a WITH DIAERESIS | |
1114 | 0xE5, "å", # ISOlat1: LATIN SMALL LETTER a WITH RING ABOVE | |
1115 | 0xE6, "æ", # ISOlat1: LATIN SMALL LETTER ae | |
1116 | 0xE7, "į", # ISOlat2: LATIN SMALL LETTER i WITH OGONEK | |
1117 | 0xE8, "č", # ISOlat2: LATIN SMALL LETTER c WITH CARON | |
1118 | 0xE9, "é", # ISOlat1: LATIN SMALL LETTER e WITH ACUTE | |
1119 | 0xEA, "ę", # ISOlat2: LATIN SMALL LETTER e WITH OGONEK | |
1120 | 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER e WITH DIAERESIS | |
1121 | 0xEC, "ė", # ISOlat2: LATIN SMALL LETTER e WITH DOT ABOVE | |
1122 | 0xED, "í", # ISOlat1: LATIN SMALL LETTER i WITH ACUTE | |
1123 | 0xEE, "î", # ISOlat1: LATIN SMALL LETTER i WITH CIRCUMFLEX | |
1124 | 0xEF, "ï", # ISOlat1: LATIN SMALL LETTER i WITH DIAERESIS | |
1125 | 0xF0, "ð", # ISOlat1: LATIN SMALL LETTER ETH (Icelandic) | |
1126 | 0xF1, "ņ", # ISOlat2: LATIN SMALL LETTER n WITH CEDILLA | |
1127 | 0xF2, "ō", # ISOlat2: LATIN SMALL LETTER o WITH MACRON | |
1128 | 0xF3, "ó", # ISOlat1: LATIN SMALL LETTER o WITH ACUTE | |
1129 | 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER o WITH CIRCUMFLEX | |
1130 | 0xF5, "õ", # ISOlat1: LATIN SMALL LETTER o WITH TILDE | |
1131 | 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER o WITH DIAERESIS | |
1132 | 0xF7, "ũ", # ISOlat2: LATIN SMALL LETTER u WITH TILDE | |
1133 | 0xF8, "ø", # ISOlat1: LATIN SMALL LETTER o WITH STROKE | |
1134 | 0xF9, "ų", # ISOlat2: LATIN SMALL LETTER u WITH OGONEK | |
1135 | 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER u WITH ACUTE | |
1136 | 0xFB, "û", # ISOlat1: LATIN SMALL LETTER u WITH CIRCUMFLEX | |
1137 | 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER u WITH DIAERESIS | |
1138 | 0xFD, "ý", # ISOlat1: LATIN SMALL LETTER y WITH ACUTE | |
1139 | 0xFE, "þ", # ISOlat1: LATIN SMALL LETTER THORN (Icelandic) | |
1140 | 0xFF, "ū", # ISOlat2: LATIN SMALL LETTER u WITH MACRON | |
1141 | ); | |
1142 | *LATIN6_To_Ent = *ISO_8859_10_To_Ent; | |
1143 | ||
1144 | ############################################################################### | |
1145 | ## Routines | |
1146 | ############################################################################### | |
1147 | ||
1148 | ##---------------------------------------------------------------------------## | |
1149 | ## str2sgml converts a string encoded by $charset to an sgml | |
1150 | ## string where special characters are converted to entity | |
1151 | ## references. | |
1152 | ## | |
1153 | ## $return_data = iso_8859'str2sgml($data, $charset, $only8bit); | |
1154 | ## | |
1155 | ## If $only8bit is non-zero, than only 8-bit characters are | |
1156 | ## translated. | |
1157 | ## | |
1158 | sub str2sgml { | |
1159 | local($data, $charset, $only8bit) = ($_[0], $_[1], $_[2]); | |
1160 | local($ret, $offset, $len) = ('', 0, 0); | |
1161 | ||
1162 | # Get mapping (this method works for Perl 4 and 5) | |
1163 | $charset =~ tr/a-z/A-Z/; | |
1164 | $charset =~ tr/-/_/; | |
1165 | local(*map) = ("${charset}_To_Ent"); | |
1166 | ||
1167 | # Convert string | |
1168 | $len = length($data); | |
1169 | while ($offset < $len) { | |
1170 | $char = unpack("C", substr($data, $offset++, 1)); | |
1171 | if ($only8bit && $char < 0xA0) { | |
1172 | $ret .= pack("C", $char); | |
1173 | } else { | |
1174 | $ret .= ($map{$char} || $US_ASCII_To_Ent{$char} || | |
1175 | pack("C", $char)); | |
1176 | } | |
1177 | } | |
1178 | $ret; | |
1179 | } | |
1180 | ||
1181 | ##---------------------------------------------------------------------------## | |
1182 | 1; |