Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | #ifndef ENCODE_H |
2 | #define ENCODE_H | |
3 | ||
4 | #ifndef U8 | |
5 | /* | |
6 | A tad devious this: | |
7 | perl normally has a #define for U8 - if that isn't present then we | |
8 | typedef it - leaving it #ifndef so we can do data parts without | |
9 | getting extern references to the code parts | |
10 | */ | |
11 | typedef unsigned char U8; | |
12 | #endif | |
13 | ||
14 | typedef struct encpage_s encpage_t; | |
15 | ||
16 | struct encpage_s | |
17 | { | |
18 | /* fields ordered to pack nicely on 32-bit machines */ | |
19 | const U8 *seq; /* Packed output sequences we generate | |
20 | if we match */ | |
21 | encpage_t *next; /* Page to go to if we match */ | |
22 | U8 min; /* Min value of octet to match this entry */ | |
23 | U8 max; /* Max value of octet to match this entry */ | |
24 | U8 dlen; /* destination length - | |
25 | size of entries in seq */ | |
26 | U8 slen; /* source length - | |
27 | number of source octets needed */ | |
28 | }; | |
29 | ||
30 | /* | |
31 | At any point in a translation there is a page pointer which points | |
32 | at an array of the above structures. | |
33 | ||
34 | Basic operation : | |
35 | get octet from source stream. | |
36 | if (octet >= min && octet < max) { | |
37 | if slen is 0 then we cannot represent this character. | |
38 | if we have less than slen octets (including this one) then | |
39 | we have a partial character. | |
40 | otherwise | |
41 | copy dlen octets from seq + dlen*(octet-min) to output | |
42 | (dlen may be zero if we don't know yet.) | |
43 | load page pointer with next to continue. | |
44 | (is slen is one this is end of a character) | |
45 | get next octet. | |
46 | } | |
47 | else { | |
48 | increment the page pointer to look at next slot in the array | |
49 | } | |
50 | ||
51 | arrays SHALL be constructed so there is an entry which matches | |
52 | ..0xFF at the end, and either maps it or indicates no | |
53 | representation. | |
54 | ||
55 | if MSB of slen is set then mapping is an approximate "FALLBACK" entry. | |
56 | ||
57 | */ | |
58 | ||
59 | ||
60 | typedef struct encode_s encode_t; | |
61 | struct encode_s | |
62 | { | |
63 | encpage_t *t_utf8; /* Starting table for translation from | |
64 | the encoding to UTF-8 form */ | |
65 | encpage_t *f_utf8; /* Starting table for translation | |
66 | from UTF-8 to the encoding */ | |
67 | const U8 *rep; /* Replacement character in this encoding | |
68 | e.g. "?" */ | |
69 | int replen; /* Number of octets in rep */ | |
70 | U8 min_el; /* Minimum octets to represent a character */ | |
71 | U8 max_el; /* Maximum octets to represent a character */ | |
72 | const char *name[2]; /* name(s) of this encoding */ | |
73 | }; | |
74 | ||
75 | #ifdef U8 | |
76 | /* See comment at top of file for deviousness */ | |
77 | ||
78 | extern int do_encode(encpage_t *enc, const U8 *src, STRLEN *slen, | |
79 | U8 *dst, STRLEN dlen, STRLEN *dout, int approx, | |
80 | const U8 *term, STRLEN tlen); | |
81 | ||
82 | extern void Encode_DefineEncoding(encode_t *enc); | |
83 | ||
84 | #endif /* U8 */ | |
85 | ||
86 | #define ENCODE_NOSPACE 1 | |
87 | #define ENCODE_PARTIAL 2 | |
88 | #define ENCODE_NOREP 3 | |
89 | #define ENCODE_FALLBACK 4 | |
90 | #define ENCODE_FOUND_TERM 5 | |
91 | ||
92 | #define FBCHAR_UTF8 "\xEF\xBF\xBD" | |
93 | ||
94 | #define ENCODE_DIE_ON_ERR 0x0001 /* croaks immediately */ | |
95 | #define ENCODE_WARN_ON_ERR 0x0002 /* warn on error; may proceed */ | |
96 | #define ENCODE_RETURN_ON_ERR 0x0004 /* immediately returns on NOREP */ | |
97 | #define ENCODE_LEAVE_SRC 0x0008 /* $src updated unless set */ | |
98 | #define ENCODE_PERLQQ 0x0100 /* perlqq fallback string */ | |
99 | #define ENCODE_HTMLCREF 0x0200 /* HTML character ref. fb mode */ | |
100 | #define ENCODE_XMLCREF 0x0400 /* XML character ref. fb mode */ | |
101 | #define ENCODE_STOP_AT_PARTIAL 0x0800 /* stop at partial explicitly */ | |
102 | ||
103 | #define ENCODE_FB_DEFAULT 0x0000 | |
104 | #define ENCODE_FB_CROAK 0x0001 | |
105 | #define ENCODE_FB_QUIET ENCODE_RETURN_ON_ERR | |
106 | #define ENCODE_FB_WARN (ENCODE_RETURN_ON_ERR|ENCODE_WARN_ON_ERR) | |
107 | #define ENCODE_FB_PERLQQ (ENCODE_PERLQQ|ENCODE_LEAVE_SRC) | |
108 | #define ENCODE_FB_HTMLCREF (ENCODE_HTMLCREF|ENCODE_LEAVE_SRC) | |
109 | #define ENCODE_FB_XMLCREF (ENCODE_XMLCREF|ENCODE_LEAVE_SRC) | |
110 | ||
111 | #endif /* ENCODE_H */ |