Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | """ Encoding Aliases Support |
2 | ||
3 | This module is used by the encodings package search function to | |
4 | map encodings names to module names. | |
5 | ||
6 | Note that the search function normalizes the encoding names before | |
7 | doing the lookup, so the mapping will have to map normalized | |
8 | encoding names to module names. | |
9 | ||
10 | Contents: | |
11 | ||
12 | The following aliases dictionary contains mappings of all IANA | |
13 | character set names for which the Python core library provides | |
14 | codecs. In addition to these, a few Python specific codec | |
15 | aliases have also been added. | |
16 | ||
17 | """ | |
18 | aliases = { | |
19 | ||
20 | # Please keep this list sorted alphabetically by value ! | |
21 | ||
22 | # ascii codec | |
23 | '646' : 'ascii', | |
24 | 'ansi_x3.4_1968' : 'ascii', | |
25 | 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name | |
26 | 'ansi_x3.4_1986' : 'ascii', | |
27 | 'cp367' : 'ascii', | |
28 | 'csascii' : 'ascii', | |
29 | 'ibm367' : 'ascii', | |
30 | 'iso646_us' : 'ascii', | |
31 | 'iso_646.irv_1991' : 'ascii', | |
32 | 'iso_ir_6' : 'ascii', | |
33 | 'us' : 'ascii', | |
34 | 'us_ascii' : 'ascii', | |
35 | ||
36 | # base64_codec codec | |
37 | 'base64' : 'base64_codec', | |
38 | 'base_64' : 'base64_codec', | |
39 | ||
40 | # big5 codec | |
41 | 'big5_tw' : 'big5', | |
42 | 'csbig5' : 'big5', | |
43 | ||
44 | # big5hkscs codec | |
45 | 'big5_hkscs' : 'big5hkscs', | |
46 | 'hkscs' : 'big5hkscs', | |
47 | ||
48 | # bz2_codec codec | |
49 | 'bz2' : 'bz2_codec', | |
50 | ||
51 | # cp037 codec | |
52 | '037' : 'cp037', | |
53 | 'csibm037' : 'cp037', | |
54 | 'ebcdic_cp_ca' : 'cp037', | |
55 | 'ebcdic_cp_nl' : 'cp037', | |
56 | 'ebcdic_cp_us' : 'cp037', | |
57 | 'ebcdic_cp_wt' : 'cp037', | |
58 | 'ibm037' : 'cp037', | |
59 | 'ibm039' : 'cp037', | |
60 | ||
61 | # cp1026 codec | |
62 | '1026' : 'cp1026', | |
63 | 'csibm1026' : 'cp1026', | |
64 | 'ibm1026' : 'cp1026', | |
65 | ||
66 | # cp1140 codec | |
67 | '1140' : 'cp1140', | |
68 | 'ibm1140' : 'cp1140', | |
69 | ||
70 | # cp1250 codec | |
71 | '1250' : 'cp1250', | |
72 | 'windows_1250' : 'cp1250', | |
73 | ||
74 | # cp1251 codec | |
75 | '1251' : 'cp1251', | |
76 | 'windows_1251' : 'cp1251', | |
77 | ||
78 | # cp1252 codec | |
79 | '1252' : 'cp1252', | |
80 | 'windows_1252' : 'cp1252', | |
81 | ||
82 | # cp1253 codec | |
83 | '1253' : 'cp1253', | |
84 | 'windows_1253' : 'cp1253', | |
85 | ||
86 | # cp1254 codec | |
87 | '1254' : 'cp1254', | |
88 | 'windows_1254' : 'cp1254', | |
89 | ||
90 | # cp1255 codec | |
91 | '1255' : 'cp1255', | |
92 | 'windows_1255' : 'cp1255', | |
93 | ||
94 | # cp1256 codec | |
95 | '1256' : 'cp1256', | |
96 | 'windows_1256' : 'cp1256', | |
97 | ||
98 | # cp1257 codec | |
99 | '1257' : 'cp1257', | |
100 | 'windows_1257' : 'cp1257', | |
101 | ||
102 | # cp1258 codec | |
103 | '1258' : 'cp1258', | |
104 | 'windows_1258' : 'cp1258', | |
105 | ||
106 | # cp424 codec | |
107 | '424' : 'cp424', | |
108 | 'csibm424' : 'cp424', | |
109 | 'ebcdic_cp_he' : 'cp424', | |
110 | 'ibm424' : 'cp424', | |
111 | ||
112 | # cp437 codec | |
113 | '437' : 'cp437', | |
114 | 'cspc8codepage437' : 'cp437', | |
115 | 'ibm437' : 'cp437', | |
116 | ||
117 | # cp500 codec | |
118 | '500' : 'cp500', | |
119 | 'csibm500' : 'cp500', | |
120 | 'ebcdic_cp_be' : 'cp500', | |
121 | 'ebcdic_cp_ch' : 'cp500', | |
122 | 'ibm500' : 'cp500', | |
123 | ||
124 | # cp775 codec | |
125 | '775' : 'cp775', | |
126 | 'cspc775baltic' : 'cp775', | |
127 | 'ibm775' : 'cp775', | |
128 | ||
129 | # cp850 codec | |
130 | '850' : 'cp850', | |
131 | 'cspc850multilingual' : 'cp850', | |
132 | 'ibm850' : 'cp850', | |
133 | ||
134 | # cp852 codec | |
135 | '852' : 'cp852', | |
136 | 'cspcp852' : 'cp852', | |
137 | 'ibm852' : 'cp852', | |
138 | ||
139 | # cp855 codec | |
140 | '855' : 'cp855', | |
141 | 'csibm855' : 'cp855', | |
142 | 'ibm855' : 'cp855', | |
143 | ||
144 | # cp857 codec | |
145 | '857' : 'cp857', | |
146 | 'csibm857' : 'cp857', | |
147 | 'ibm857' : 'cp857', | |
148 | ||
149 | # cp860 codec | |
150 | '860' : 'cp860', | |
151 | 'csibm860' : 'cp860', | |
152 | 'ibm860' : 'cp860', | |
153 | ||
154 | # cp861 codec | |
155 | '861' : 'cp861', | |
156 | 'cp_is' : 'cp861', | |
157 | 'csibm861' : 'cp861', | |
158 | 'ibm861' : 'cp861', | |
159 | ||
160 | # cp862 codec | |
161 | '862' : 'cp862', | |
162 | 'cspc862latinhebrew' : 'cp862', | |
163 | 'ibm862' : 'cp862', | |
164 | ||
165 | # cp863 codec | |
166 | '863' : 'cp863', | |
167 | 'csibm863' : 'cp863', | |
168 | 'ibm863' : 'cp863', | |
169 | ||
170 | # cp864 codec | |
171 | '864' : 'cp864', | |
172 | 'csibm864' : 'cp864', | |
173 | 'ibm864' : 'cp864', | |
174 | ||
175 | # cp865 codec | |
176 | '865' : 'cp865', | |
177 | 'csibm865' : 'cp865', | |
178 | 'ibm865' : 'cp865', | |
179 | ||
180 | # cp866 codec | |
181 | '866' : 'cp866', | |
182 | 'csibm866' : 'cp866', | |
183 | 'ibm866' : 'cp866', | |
184 | ||
185 | # cp869 codec | |
186 | '869' : 'cp869', | |
187 | 'cp_gr' : 'cp869', | |
188 | 'csibm869' : 'cp869', | |
189 | 'ibm869' : 'cp869', | |
190 | ||
191 | # cp932 codec | |
192 | '932' : 'cp932', | |
193 | 'ms932' : 'cp932', | |
194 | 'mskanji' : 'cp932', | |
195 | 'ms_kanji' : 'cp932', | |
196 | ||
197 | # cp949 codec | |
198 | '949' : 'cp949', | |
199 | 'ms949' : 'cp949', | |
200 | 'uhc' : 'cp949', | |
201 | ||
202 | # cp950 codec | |
203 | '950' : 'cp950', | |
204 | 'ms950' : 'cp950', | |
205 | ||
206 | # euc_jis_2004 codec | |
207 | 'jisx0213' : 'euc_jis_2004', | |
208 | 'eucjis2004' : 'euc_jis_2004', | |
209 | 'euc_jis2004' : 'euc_jis_2004', | |
210 | ||
211 | # euc_jisx0213 codec | |
212 | 'eucjisx0213' : 'euc_jisx0213', | |
213 | ||
214 | # euc_jp codec | |
215 | 'eucjp' : 'euc_jp', | |
216 | 'ujis' : 'euc_jp', | |
217 | 'u_jis' : 'euc_jp', | |
218 | ||
219 | # euc_kr codec | |
220 | 'euckr' : 'euc_kr', | |
221 | 'korean' : 'euc_kr', | |
222 | 'ksc5601' : 'euc_kr', | |
223 | 'ks_c_5601' : 'euc_kr', | |
224 | 'ks_c_5601_1987' : 'euc_kr', | |
225 | 'ksx1001' : 'euc_kr', | |
226 | 'ks_x_1001' : 'euc_kr', | |
227 | ||
228 | # gb18030 codec | |
229 | 'gb18030_2000' : 'gb18030', | |
230 | ||
231 | # gb2312 codec | |
232 | 'chinese' : 'gb2312', | |
233 | 'csiso58gb231280' : 'gb2312', | |
234 | 'euc_cn' : 'gb2312', | |
235 | 'euccn' : 'gb2312', | |
236 | 'eucgb2312_cn' : 'gb2312', | |
237 | 'gb2312_1980' : 'gb2312', | |
238 | 'gb2312_80' : 'gb2312', | |
239 | 'iso_ir_58' : 'gb2312', | |
240 | ||
241 | # gbk codec | |
242 | '936' : 'gbk', | |
243 | 'cp936' : 'gbk', | |
244 | 'ms936' : 'gbk', | |
245 | ||
246 | # hex_codec codec | |
247 | 'hex' : 'hex_codec', | |
248 | ||
249 | # hp_roman8 codec | |
250 | 'roman8' : 'hp_roman8', | |
251 | 'r8' : 'hp_roman8', | |
252 | 'csHPRoman8' : 'hp_roman8', | |
253 | ||
254 | # hz codec | |
255 | 'hzgb' : 'hz', | |
256 | 'hz_gb' : 'hz', | |
257 | 'hz_gb_2312' : 'hz', | |
258 | ||
259 | # iso2022_jp codec | |
260 | 'csiso2022jp' : 'iso2022_jp', | |
261 | 'iso2022jp' : 'iso2022_jp', | |
262 | 'iso_2022_jp' : 'iso2022_jp', | |
263 | ||
264 | # iso2022_jp_1 codec | |
265 | 'iso2022jp_1' : 'iso2022_jp_1', | |
266 | 'iso_2022_jp_1' : 'iso2022_jp_1', | |
267 | ||
268 | # iso2022_jp_2 codec | |
269 | 'iso2022jp_2' : 'iso2022_jp_2', | |
270 | 'iso_2022_jp_2' : 'iso2022_jp_2', | |
271 | ||
272 | # iso2022_jp_2004 codec | |
273 | 'iso_2022_jp_2004' : 'iso2022_jp_2004', | |
274 | 'iso2022jp_2004' : 'iso2022_jp_2004', | |
275 | ||
276 | # iso2022_jp_3 codec | |
277 | 'iso2022jp_3' : 'iso2022_jp_3', | |
278 | 'iso_2022_jp_3' : 'iso2022_jp_3', | |
279 | ||
280 | # iso2022_jp_ext codec | |
281 | 'iso2022jp_ext' : 'iso2022_jp_ext', | |
282 | 'iso_2022_jp_ext' : 'iso2022_jp_ext', | |
283 | ||
284 | # iso2022_kr codec | |
285 | 'csiso2022kr' : 'iso2022_kr', | |
286 | 'iso2022kr' : 'iso2022_kr', | |
287 | 'iso_2022_kr' : 'iso2022_kr', | |
288 | ||
289 | # iso8859_10 codec | |
290 | 'csisolatin6' : 'iso8859_10', | |
291 | 'iso_8859_10' : 'iso8859_10', | |
292 | 'iso_8859_10_1992' : 'iso8859_10', | |
293 | 'iso_ir_157' : 'iso8859_10', | |
294 | 'l6' : 'iso8859_10', | |
295 | 'latin6' : 'iso8859_10', | |
296 | ||
297 | # iso8859_13 codec | |
298 | 'iso_8859_13' : 'iso8859_13', | |
299 | ||
300 | # iso8859_14 codec | |
301 | 'iso_8859_14' : 'iso8859_14', | |
302 | 'iso_8859_14_1998' : 'iso8859_14', | |
303 | 'iso_celtic' : 'iso8859_14', | |
304 | 'iso_ir_199' : 'iso8859_14', | |
305 | 'l8' : 'iso8859_14', | |
306 | 'latin8' : 'iso8859_14', | |
307 | ||
308 | # iso8859_15 codec | |
309 | 'iso_8859_15' : 'iso8859_15', | |
310 | ||
311 | # iso8859_2 codec | |
312 | 'csisolatin2' : 'iso8859_2', | |
313 | 'iso_8859_2' : 'iso8859_2', | |
314 | 'iso_8859_2_1987' : 'iso8859_2', | |
315 | 'iso_ir_101' : 'iso8859_2', | |
316 | 'l2' : 'iso8859_2', | |
317 | 'latin2' : 'iso8859_2', | |
318 | ||
319 | # iso8859_3 codec | |
320 | 'csisolatin3' : 'iso8859_3', | |
321 | 'iso_8859_3' : 'iso8859_3', | |
322 | 'iso_8859_3_1988' : 'iso8859_3', | |
323 | 'iso_ir_109' : 'iso8859_3', | |
324 | 'l3' : 'iso8859_3', | |
325 | 'latin3' : 'iso8859_3', | |
326 | ||
327 | # iso8859_4 codec | |
328 | 'csisolatin4' : 'iso8859_4', | |
329 | 'iso_8859_4' : 'iso8859_4', | |
330 | 'iso_8859_4_1988' : 'iso8859_4', | |
331 | 'iso_ir_110' : 'iso8859_4', | |
332 | 'l4' : 'iso8859_4', | |
333 | 'latin4' : 'iso8859_4', | |
334 | ||
335 | # iso8859_5 codec | |
336 | 'csisolatincyrillic' : 'iso8859_5', | |
337 | 'cyrillic' : 'iso8859_5', | |
338 | 'iso_8859_5' : 'iso8859_5', | |
339 | 'iso_8859_5_1988' : 'iso8859_5', | |
340 | 'iso_ir_144' : 'iso8859_5', | |
341 | ||
342 | # iso8859_6 codec | |
343 | 'arabic' : 'iso8859_6', | |
344 | 'asmo_708' : 'iso8859_6', | |
345 | 'csisolatinarabic' : 'iso8859_6', | |
346 | 'ecma_114' : 'iso8859_6', | |
347 | 'iso_8859_6' : 'iso8859_6', | |
348 | 'iso_8859_6_1987' : 'iso8859_6', | |
349 | 'iso_ir_127' : 'iso8859_6', | |
350 | ||
351 | # iso8859_7 codec | |
352 | 'csisolatingreek' : 'iso8859_7', | |
353 | 'ecma_118' : 'iso8859_7', | |
354 | 'elot_928' : 'iso8859_7', | |
355 | 'greek' : 'iso8859_7', | |
356 | 'greek8' : 'iso8859_7', | |
357 | 'iso_8859_7' : 'iso8859_7', | |
358 | 'iso_8859_7_1987' : 'iso8859_7', | |
359 | 'iso_ir_126' : 'iso8859_7', | |
360 | ||
361 | # iso8859_8 codec | |
362 | 'csisolatinhebrew' : 'iso8859_8', | |
363 | 'hebrew' : 'iso8859_8', | |
364 | 'iso_8859_8' : 'iso8859_8', | |
365 | 'iso_8859_8_1988' : 'iso8859_8', | |
366 | 'iso_ir_138' : 'iso8859_8', | |
367 | ||
368 | # iso8859_9 codec | |
369 | 'csisolatin5' : 'iso8859_9', | |
370 | 'iso_8859_9' : 'iso8859_9', | |
371 | 'iso_8859_9_1989' : 'iso8859_9', | |
372 | 'iso_ir_148' : 'iso8859_9', | |
373 | 'l5' : 'iso8859_9', | |
374 | 'latin5' : 'iso8859_9', | |
375 | ||
376 | # iso8859_11 codec | |
377 | 'thai' : 'iso8859_11', | |
378 | 'iso_8859_11' : 'iso8859_11', | |
379 | 'iso_8859_11_2001' : 'iso8859_11', | |
380 | ||
381 | # iso8859_16 codec | |
382 | 'iso_8859_16' : 'iso8859_16', | |
383 | 'iso_8859_16_2001' : 'iso8859_16', | |
384 | 'iso_ir_226' : 'iso8859_16', | |
385 | 'l10' : 'iso8859_16', | |
386 | 'latin10' : 'iso8859_16', | |
387 | ||
388 | # johab codec | |
389 | 'cp1361' : 'johab', | |
390 | 'ms1361' : 'johab', | |
391 | ||
392 | # koi8_r codec | |
393 | 'cskoi8r' : 'koi8_r', | |
394 | ||
395 | # latin_1 codec | |
396 | '8859' : 'latin_1', | |
397 | 'cp819' : 'latin_1', | |
398 | 'csisolatin1' : 'latin_1', | |
399 | 'ibm819' : 'latin_1', | |
400 | 'iso8859' : 'latin_1', | |
401 | 'iso_8859_1' : 'latin_1', | |
402 | 'iso_8859_1_1987' : 'latin_1', | |
403 | 'iso_ir_100' : 'latin_1', | |
404 | 'l1' : 'latin_1', | |
405 | 'latin' : 'latin_1', | |
406 | 'latin1' : 'latin_1', | |
407 | ||
408 | # mac_cyrillic codec | |
409 | 'maccyrillic' : 'mac_cyrillic', | |
410 | ||
411 | # mac_greek codec | |
412 | 'macgreek' : 'mac_greek', | |
413 | ||
414 | # mac_iceland codec | |
415 | 'maciceland' : 'mac_iceland', | |
416 | ||
417 | # mac_latin2 codec | |
418 | 'maccentraleurope' : 'mac_latin2', | |
419 | 'maclatin2' : 'mac_latin2', | |
420 | ||
421 | # mac_roman codec | |
422 | 'macroman' : 'mac_roman', | |
423 | ||
424 | # mac_turkish codec | |
425 | 'macturkish' : 'mac_turkish', | |
426 | ||
427 | # mbcs codec | |
428 | 'dbcs' : 'mbcs', | |
429 | ||
430 | # ptcp154 codec | |
431 | 'csptcp154' : 'ptcp154', | |
432 | 'pt154' : 'ptcp154', | |
433 | 'cp154' : 'ptcp154', | |
434 | 'cyrillic-asian' : 'ptcp154', | |
435 | ||
436 | # quopri_codec codec | |
437 | 'quopri' : 'quopri_codec', | |
438 | 'quoted_printable' : 'quopri_codec', | |
439 | 'quotedprintable' : 'quopri_codec', | |
440 | ||
441 | # rot_13 codec | |
442 | 'rot13' : 'rot_13', | |
443 | ||
444 | # shift_jis codec | |
445 | 'csshiftjis' : 'shift_jis', | |
446 | 'shiftjis' : 'shift_jis', | |
447 | 'sjis' : 'shift_jis', | |
448 | 's_jis' : 'shift_jis', | |
449 | ||
450 | # shift_jis_2004 codec | |
451 | 'shiftjis2004' : 'shift_jis_2004', | |
452 | 'sjis_2004' : 'shift_jis_2004', | |
453 | 's_jis_2004' : 'shift_jis_2004', | |
454 | ||
455 | # shift_jisx0213 codec | |
456 | 'shiftjisx0213' : 'shift_jisx0213', | |
457 | 'sjisx0213' : 'shift_jisx0213', | |
458 | 's_jisx0213' : 'shift_jisx0213', | |
459 | ||
460 | # tactis codec | |
461 | 'tis260' : 'tactis', | |
462 | ||
463 | # tis_620 codec | |
464 | 'tis620' : 'tis_620', | |
465 | 'tis_620_0' : 'tis_620', | |
466 | 'tis_620_2529_0' : 'tis_620', | |
467 | 'tis_620_2529_1' : 'tis_620', | |
468 | 'iso_ir_166' : 'tis_620', | |
469 | ||
470 | # utf_16 codec | |
471 | 'u16' : 'utf_16', | |
472 | 'utf16' : 'utf_16', | |
473 | ||
474 | # utf_16_be codec | |
475 | 'unicodebigunmarked' : 'utf_16_be', | |
476 | 'utf_16be' : 'utf_16_be', | |
477 | ||
478 | # utf_16_le codec | |
479 | 'unicodelittleunmarked' : 'utf_16_le', | |
480 | 'utf_16le' : 'utf_16_le', | |
481 | ||
482 | # utf_7 codec | |
483 | 'u7' : 'utf_7', | |
484 | 'utf7' : 'utf_7', | |
485 | ||
486 | # utf_8 codec | |
487 | 'u8' : 'utf_8', | |
488 | 'utf' : 'utf_8', | |
489 | 'utf8' : 'utf_8', | |
490 | 'utf8_ucs2' : 'utf_8', | |
491 | 'utf8_ucs4' : 'utf_8', | |
492 | ||
493 | # uu_codec codec | |
494 | 'uu' : 'uu_codec', | |
495 | ||
496 | # zlib_codec codec | |
497 | 'zip' : 'zlib_codec', | |
498 | 'zlib' : 'zlib_codec', | |
499 | ||
500 | } |