| 1 | #ifndef ENCODE_H |
| 2 | #define ENCODE_H |
| 3 | |
| 4 | #ifndef U8 |
| 5 | /* |
| 6 | A tad devious this: |
| 7 | perl normally has a #define for U8 - if that isn't present then we |
| 8 | typedef it - leaving it #ifndef so we can do data parts without |
| 9 | getting extern references to the code parts |
| 10 | */ |
| 11 | typedef unsigned char U8; |
| 12 | #endif |
| 13 | |
| 14 | typedef struct encpage_s encpage_t; |
| 15 | |
| 16 | struct encpage_s |
| 17 | { |
| 18 | /* fields ordered to pack nicely on 32-bit machines */ |
| 19 | const U8 *seq; /* Packed output sequences we generate |
| 20 | if we match */ |
| 21 | encpage_t *next; /* Page to go to if we match */ |
| 22 | U8 min; /* Min value of octet to match this entry */ |
| 23 | U8 max; /* Max value of octet to match this entry */ |
| 24 | U8 dlen; /* destination length - |
| 25 | size of entries in seq */ |
| 26 | U8 slen; /* source length - |
| 27 | number of source octets needed */ |
| 28 | }; |
| 29 | |
| 30 | /* |
| 31 | At any point in a translation there is a page pointer which points |
| 32 | at an array of the above structures. |
| 33 | |
| 34 | Basic operation : |
| 35 | get octet from source stream. |
| 36 | if (octet >= min && octet < max) { |
| 37 | if slen is 0 then we cannot represent this character. |
| 38 | if we have less than slen octets (including this one) then |
| 39 | we have a partial character. |
| 40 | otherwise |
| 41 | copy dlen octets from seq + dlen*(octet-min) to output |
| 42 | (dlen may be zero if we don't know yet.) |
| 43 | load page pointer with next to continue. |
| 44 | (is slen is one this is end of a character) |
| 45 | get next octet. |
| 46 | } |
| 47 | else { |
| 48 | increment the page pointer to look at next slot in the array |
| 49 | } |
| 50 | |
| 51 | arrays SHALL be constructed so there is an entry which matches |
| 52 | ..0xFF at the end, and either maps it or indicates no |
| 53 | representation. |
| 54 | |
| 55 | if MSB of slen is set then mapping is an approximate "FALLBACK" entry. |
| 56 | |
| 57 | */ |
| 58 | |
| 59 | |
| 60 | typedef struct encode_s encode_t; |
| 61 | struct encode_s |
| 62 | { |
| 63 | encpage_t *t_utf8; /* Starting table for translation from |
| 64 | the encoding to UTF-8 form */ |
| 65 | encpage_t *f_utf8; /* Starting table for translation |
| 66 | from UTF-8 to the encoding */ |
| 67 | const U8 *rep; /* Replacement character in this encoding |
| 68 | e.g. "?" */ |
| 69 | int replen; /* Number of octets in rep */ |
| 70 | U8 min_el; /* Minimum octets to represent a character */ |
| 71 | U8 max_el; /* Maximum octets to represent a character */ |
| 72 | const char *name[2]; /* name(s) of this encoding */ |
| 73 | }; |
| 74 | |
| 75 | #ifdef U8 |
| 76 | /* See comment at top of file for deviousness */ |
| 77 | |
| 78 | extern int do_encode(encpage_t *enc, const U8 *src, STRLEN *slen, |
| 79 | U8 *dst, STRLEN dlen, STRLEN *dout, int approx, |
| 80 | const U8 *term, STRLEN tlen); |
| 81 | |
| 82 | extern void Encode_DefineEncoding(encode_t *enc); |
| 83 | |
| 84 | #endif /* U8 */ |
| 85 | |
| 86 | #define ENCODE_NOSPACE 1 |
| 87 | #define ENCODE_PARTIAL 2 |
| 88 | #define ENCODE_NOREP 3 |
| 89 | #define ENCODE_FALLBACK 4 |
| 90 | #define ENCODE_FOUND_TERM 5 |
| 91 | |
| 92 | #define FBCHAR_UTF8 "\xEF\xBF\xBD" |
| 93 | |
| 94 | #define ENCODE_DIE_ON_ERR 0x0001 /* croaks immediately */ |
| 95 | #define ENCODE_WARN_ON_ERR 0x0002 /* warn on error; may proceed */ |
| 96 | #define ENCODE_RETURN_ON_ERR 0x0004 /* immediately returns on NOREP */ |
| 97 | #define ENCODE_LEAVE_SRC 0x0008 /* $src updated unless set */ |
| 98 | #define ENCODE_PERLQQ 0x0100 /* perlqq fallback string */ |
| 99 | #define ENCODE_HTMLCREF 0x0200 /* HTML character ref. fb mode */ |
| 100 | #define ENCODE_XMLCREF 0x0400 /* XML character ref. fb mode */ |
| 101 | #define ENCODE_STOP_AT_PARTIAL 0x0800 /* stop at partial explicitly */ |
| 102 | |
| 103 | #define ENCODE_FB_DEFAULT 0x0000 |
| 104 | #define ENCODE_FB_CROAK 0x0001 |
| 105 | #define ENCODE_FB_QUIET ENCODE_RETURN_ON_ERR |
| 106 | #define ENCODE_FB_WARN (ENCODE_RETURN_ON_ERR|ENCODE_WARN_ON_ERR) |
| 107 | #define ENCODE_FB_PERLQQ (ENCODE_PERLQQ|ENCODE_LEAVE_SRC) |
| 108 | #define ENCODE_FB_HTMLCREF (ENCODE_HTMLCREF|ENCODE_LEAVE_SRC) |
| 109 | #define ENCODE_FB_XMLCREF (ENCODE_XMLCREF|ENCODE_LEAVE_SRC) |
| 110 | |
| 111 | #endif /* ENCODE_H */ |