Commit | Line | Data |
---|---|---|
3144ee8a AT |
1 | /* xscreensaver, Copyright (c) 2014-2016 Jamie Zawinski <jwz@jwz.org> |
2 | * | |
3 | * Permission to use, copy, modify, distribute, and sell this software and its | |
4 | * documentation for any purpose is hereby granted without fee, provided that | |
5 | * the above copyright notice appear in all copies and that both that | |
6 | * copyright notice and this permission notice appear in supporting | |
7 | * documentation. No representations are made about the suitability of this | |
8 | * software for any purpose. It is provided "as is" without express or | |
9 | * implied warranty. | |
10 | */ | |
11 | ||
12 | #ifdef HAVE_CONFIG_H | |
13 | # include "config.h" | |
14 | #endif | |
15 | ||
16 | #include <stdlib.h> | |
17 | #include <stdio.h> | |
18 | #include <string.h> | |
19 | ||
20 | #ifdef HAVE_JWXYZ | |
21 | # include "jwxyz.h" | |
22 | #else /* !HAVE_JWXYZ */ | |
23 | # include <X11/Xlib.h> | |
24 | #endif | |
25 | ||
26 | #include "utf8wc.h" | |
27 | ||
28 | ||
29 | /* "Unicode Replacement Character", displayed in lieu of invalid characters. */ | |
30 | # define INVALID 0xFFFD | |
31 | ||
32 | ||
33 | /* Mask the number to be within the valid range of unicode characters. | |
34 | */ | |
35 | static unsigned long | |
36 | uc_truncate (unsigned long uc) | |
37 | { | |
38 | uc &= 0x7FFFFFFFL; /* Unicode is 31 bits */ | |
39 | if (uc > 0x10FFFF) uc = INVALID; /* But UTF-8 is 4 bytes */ | |
40 | if (uc == 0) uc = INVALID; /* no nulls */ | |
41 | ||
42 | if (uc >= 0xD800 && uc <= 0xDFFF) | |
43 | /* Reserved for use with UTF-16: not a real character. */ | |
44 | uc = INVALID; | |
45 | ||
46 | return uc; | |
47 | } | |
48 | ||
49 | ||
50 | /* Parse the first UTF8 character at the front of the string. | |
51 | Return the Unicode character, and the number of bytes read. | |
52 | */ | |
53 | long | |
54 | utf8_decode (const unsigned char *in, long length, unsigned long *unicode_ret) | |
55 | { | |
56 | const unsigned char *start = in; | |
57 | const unsigned char *end = in + length; | |
58 | unsigned long uc = INVALID; | |
59 | unsigned long min = 0; | |
60 | unsigned char c; | |
61 | ||
62 | if (length <= 0) goto DONE; | |
63 | ||
64 | c = *in++; | |
65 | ||
66 | # define PREMATURE_EOF { in = end; goto DONE; } | |
67 | ||
68 | if ((c & 0xC0) == 0x80) { /* 10xxxxxx - lonely continuation byte */ | |
69 | uc = INVALID; | |
70 | ||
71 | } else if ((c & 0x80) == 0) { /* 0xxxxxxx - 7 bits in 1 byte */ | |
72 | uc = (c & 0x7F); /* 01111111 */ | |
73 | ||
74 | } else if ((c & 0xE0) == 0xC0) { /* 110xxxxx - 11 bits in 2 bytes */ | |
75 | if (in+1 > end) PREMATURE_EOF; | |
76 | min = 1 << 7; | |
77 | uc = (((c & 0x1F) << 6) | /* 00011111------ */ | |
78 | (in[0] & 0x3F)); /* 00111111 */ | |
79 | in += 1; | |
80 | ||
81 | } else if ((c & 0xF0) == 0xE0) { /* 1110xxxx - 16 bits in 3 bytes */ | |
82 | if (in+2 > end) PREMATURE_EOF; | |
83 | min = 1 << 11; | |
84 | uc = (((c & 0x0F) << 12) | /* 00001111----+------- */ | |
85 | ((in[0] & 0x3F) << 6) | /* 00111111------ */ | |
86 | ((in[1] & 0x3F))); /* 00111111 */ | |
87 | in += 2; | |
88 | ||
89 | } else if ((c & 0xF8) == 0xF0) { /* 11110xxx - 21 bits in 4 bytes */ | |
90 | if (in+3 > end) PREMATURE_EOF; | |
91 | min = 1 << 16; | |
92 | uc = (((c & 0x07) << 18) | /* 00000111--+-------+------- */ | |
93 | ((in[0] & 0x3F) << 12) | /* 01111111----+------- */ | |
94 | ((in[1] & 0x3F) << 6) | /* 00111111------ */ | |
95 | ((in[2] & 0x3F))); /* 00111111 */ | |
96 | in += 3; | |
97 | ||
98 | } else if ((c & 0xFC) == 0xF8) { /* 111110xx - 26 bits in 5 bytes */ | |
99 | if (in+4 > end) PREMATURE_EOF; | |
100 | min = 1 << 21; | |
101 | uc = (((c & 0x03) << 24) | /* 00000011--------+-------+------- */ | |
102 | ((in[0] & 0x3F) << 18) | /* 00111111--+-------+------- */ | |
103 | ((in[1] & 0x3F) << 12) | /* 00111111----+------- */ | |
104 | ((in[2] & 0x3F) << 6) | /* 00111111------ */ | |
105 | ((in[3] & 0x3F))); /* 00111111 */ | |
106 | in += 4; | |
107 | ||
108 | } else if ((c & 0xFE) == 0xFC) { /* 1111110x - 31 bits in 6 bytes */ | |
109 | if (in+5 > end) PREMATURE_EOF; | |
110 | min = 1 << 26; | |
111 | uc = (((c & 0x01) << 30) | /* 00000001------+-------+-------+------- */ | |
112 | ((in[0] & 0x3F) << 24) | /* 00111111+-------+-------+------- */ | |
113 | ((in[1] & 0x3F) << 18) | /* 00111111--+-------+------- */ | |
114 | ((in[2] & 0x3F) << 12) | /* 00111111----+------- */ | |
115 | ((in[3] & 0x3F) << 6) | /* 00111111------ */ | |
116 | ((in[4] & 0x3F))); /* 00111111 */ | |
117 | in += 5; | |
118 | } else { | |
119 | uc = INVALID; /* Unparsable sequence. */ | |
120 | } | |
121 | ||
122 | DONE: | |
123 | ||
124 | length = in - start; | |
125 | ||
126 | /* If any of the continuation bytes didn't begin with the continuation tag, | |
127 | the sequence is invalid; stop at the bad byte, not consuming later ones. | |
128 | (It's easier to check this after the fact than up above.) */ | |
129 | { | |
130 | int i; | |
131 | for (i = 1; i < length; i++) | |
132 | if ((start[i] & 0xC0) != 0x80) { | |
133 | uc = INVALID; | |
134 | length = i+1; | |
135 | break; | |
136 | } | |
137 | } | |
138 | ||
139 | if (uc < min) | |
140 | /* A multi-byte sequence encoded a character that could have been | |
141 | encoded with a shorter sequence, e.g., hiding ASCII inside a | |
142 | multi-byte sequence. Something hinky's going on. Reject it. */ | |
143 | uc = INVALID; | |
144 | ||
145 | uc = uc_truncate (uc); | |
146 | ||
147 | if (unicode_ret) | |
148 | *unicode_ret = uc; | |
149 | ||
150 | return length; | |
151 | } | |
152 | ||
153 | ||
154 | /* Converts a Unicode character to a multi-byte UTF8 sequence. | |
155 | Returns the number of bytes written. | |
156 | */ | |
157 | int | |
158 | utf8_encode (unsigned long uc, char *out, long length) | |
159 | { | |
160 | const char *old = out; | |
161 | ||
162 | uc = uc_truncate (uc); | |
163 | ||
164 | if (uc < 0x80 && length >= 1) /* 7 bits in 1 byte */ | |
165 | { | |
166 | *out++ = uc; /* 0xxxxxxx */ | |
167 | } | |
168 | else if (uc < 0x800 && length >= 2) /* 11 bits in 2 bytes */ | |
169 | { | |
170 | *out++ = (0xC0 | ((uc >> 6) & 0x1F)); /* 110xxxxx */ | |
171 | *out++ = (0x80 | (uc & 0x3F)); /* 10xxxxxx */ | |
172 | } | |
173 | else if (uc < 0x10000L && length >= 3) /* 16 bits in 3 bytes */ | |
174 | { | |
175 | *out++ = (0xE0 | ((uc >> 12) & 0x0F)); /* 1110xxxx */ | |
176 | *out++ = (0x80 | ((uc >> 6) & 0x3F)); /* 10xxxxxx */ | |
177 | *out++ = (0x80 | (uc & 0x3F)); /* 10xxxxxx */ | |
178 | } | |
179 | else if (uc < 0x200000L && length >= 4) /* 21 bits in 4 bytes */ | |
180 | { | |
181 | *out++ = (0xF0 | ((uc >> 18) & 0x07)); /* 11110xxx */ | |
182 | *out++ = (0x80 | ((uc >> 12) & 0x3F)); /* 10xxxxxx */ | |
183 | *out++ = (0x80 | ((uc >> 6) & 0x3F)); /* 10xxxxxx */ | |
184 | *out++ = (0x80 | (uc & 0x3F)); /* 10xxxxxx */ | |
185 | } | |
186 | else if (uc < 0x4000000L && length >= 5) /* 26 bits in 5 bytes */ | |
187 | { | |
188 | *out++ = (0xF8 | ((uc >> 24) & 0x03)); /* 111110xx */ | |
189 | *out++ = (0x80 | ((uc >> 18) & 0x3F)); /* 10xxxxxx */ | |
190 | *out++ = (0x80 | ((uc >> 12) & 0x3F)); /* 10xxxxxx */ | |
191 | *out++ = (0x80 | ((uc >> 6) & 0x3F)); /* 10xxxxxx */ | |
192 | *out++ = (0x80 | (uc & 0x3F)); /* 10xxxxxx */ | |
193 | } | |
194 | else if (length >= 6) /* 31 bits in 6 bytes */ | |
195 | { | |
196 | *out++ = (0xFC | ((uc >> 30) & 0x01)); /* 1111110x */ | |
197 | *out++ = (0x80 | ((uc >> 24) & 0x3F)); /* 10xxxxxx */ | |
198 | *out++ = (0x80 | ((uc >> 18) & 0x3F)); /* 10xxxxxx */ | |
199 | *out++ = (0x80 | ((uc >> 12) & 0x3F)); /* 10xxxxxx */ | |
200 | *out++ = (0x80 | ((uc >> 6) & 0x3F)); /* 10xxxxxx */ | |
201 | *out++ = (0x80 | (uc & 0x3F)); /* 10xxxxxx */ | |
202 | } | |
203 | ||
204 | return (int) (out - old); | |
205 | } | |
206 | ||
207 | ||
208 | /* Converts a null-terminated UTF8 string to a null-terminated XChar2b array. | |
209 | This only handles characters that can be represented in 16 bits, the | |
210 | Basic Multilingual Plane. (No hieroglyphics, Elvish, Klingon or Emoji.) | |
211 | */ | |
212 | XChar2b * | |
213 | utf8_to_XChar2b (const char *string, int *length_ret) | |
214 | { | |
215 | long in_len = strlen(string); | |
216 | const unsigned char *in = (const unsigned char *) string; | |
217 | const unsigned char *in_end = in + in_len; | |
218 | XChar2b *c2b = (XChar2b *) malloc ((in_len + 1) * sizeof(*c2b)); | |
219 | XChar2b *out = c2b; | |
220 | if (! out) return 0; | |
221 | ||
222 | while (in < in_end) | |
223 | { | |
224 | unsigned long uc = 0; | |
225 | long L = utf8_decode (in, in_end - in, &uc); | |
226 | in += L; | |
227 | ||
228 | /* If it can't be represented in a 16-bit XChar2b, | |
229 | use "Unicode Replacement Character". */ | |
230 | if (uc > 0xFFFF) uc = INVALID; | |
231 | ||
232 | out->byte1 = (uc >> 8) & 0xFF; | |
233 | out->byte2 = uc & 0xFF; | |
234 | out++; | |
235 | } | |
236 | ||
237 | out->byte1 = 0; | |
238 | out->byte2 = 0; | |
239 | ||
240 | if (length_ret) | |
241 | *length_ret = (int) (out - c2b); | |
242 | ||
243 | /* shrink */ | |
244 | c2b = (XChar2b *) realloc (c2b, (out - c2b + 1) * sizeof(*c2b)); | |
245 | ||
246 | return c2b; | |
247 | } | |
248 | ||
249 | ||
250 | /* Split a UTF8 string into an array of strings, one per character. | |
251 | The sub-strings will be null terminated and may be multiple bytes. | |
252 | */ | |
253 | char ** | |
254 | utf8_split (const char *string, int *length_ret) | |
255 | { | |
256 | const unsigned char *in = (const unsigned char *) string; | |
257 | long len = strlen (string); | |
258 | const unsigned char *end = in + len; | |
259 | char **ret = (char **) malloc ((len+1) * sizeof(*ret)); | |
260 | int i = 0; | |
261 | int zwjp = 0; | |
262 | if (!ret) return 0; | |
263 | ||
264 | while (in < end) | |
265 | { | |
266 | unsigned long uc; | |
267 | long len2 = utf8_decode (in, len, &uc); | |
268 | char tmp[10]; | |
269 | memcpy (tmp, (char *) in, len2); | |
270 | tmp[len2] = 0; | |
271 | ret[i++] = strdup (tmp); | |
272 | in += len2; | |
273 | ||
274 | /* If this is a Combining Diacritical, append it to the previous | |
275 | character. E.g., "y\314\206\314\206" is one string, not three. | |
276 | ||
277 | If this is ZWJ, Zero Width Joiner, then we append both this character | |
278 | and the following character, e.g. "X ZWJ Y" is one string not three. | |
279 | ||
280 | #### Hmmm, should this also include every character in the | |
281 | "Symbol, Modifier" category, or does ZWJ get used for those? | |
282 | https://www.fileformat.info/info/unicode/category/Sk/list.htm | |
283 | ||
284 | Is it intended that "Latin small letter C, 0063" + "Cedilla, 00B8" | |
285 | should be a single glyph? Or is that what "Combining Cedilla, 0327" | |
286 | is for? I'm confused by the fact that the skin tones (1F3FB-1F3FF) | |
287 | do not seem to be in a readily-identifiable block the way the various | |
288 | combining diacriticals are. | |
289 | */ | |
290 | if (i > 1 && | |
291 | ((uc >= 0x300 && uc <= 0x36F) || /* Combining Diacritical */ | |
292 | (uc >= 0x1AB0 && uc <= 0x1AFF) || /* Combining Diacritical Ext. */ | |
293 | (uc >= 0x1DC0 && uc <= 0x1DFF) || /* Combining Diacritical Supp. */ | |
294 | (uc >= 0x20D0 && uc <= 0x20FF) || /* Combining Diacritical Sym. */ | |
295 | (uc >= 0xFE20 && uc <= 0xFE2F) || /* Combining Half Marks */ | |
296 | (uc >= 0x1F3FB && uc <= 0x1F3FF) || /* Emoji skin tone modifiers */ | |
297 | zwjp || uc == 0x200D)) /* Zero Width Joiner */ | |
298 | { | |
299 | long L1 = strlen(ret[i-2]); | |
300 | long L2 = strlen(ret[i-1]); | |
301 | char *s2 = (char *) malloc (L1 + L2 + 1); | |
302 | memcpy (s2, ret[i-2], L1); | |
303 | memcpy (s2 + L1, ret[i-1], L2); | |
304 | s2[L1 + L2] = 0; | |
305 | free (ret[i-2]); | |
306 | ret[i-2] = s2; | |
307 | i--; | |
308 | zwjp = (uc == 0x200D); /* Swallow the next character as well */ | |
309 | } | |
310 | } | |
311 | ret[i] = 0; | |
312 | ||
313 | if (length_ret) | |
314 | *length_ret = i; | |
315 | ||
316 | /* shrink */ | |
317 | ret = (char **) realloc (ret, (i+1) * sizeof(*ret)); | |
318 | ||
319 | return ret; | |
320 | } | |
321 | ||
322 | ||
323 | /* Converts a null-terminated XChar2b array to a null-terminated UTF8 string. | |
324 | */ | |
325 | char * | |
326 | XChar2b_to_utf8 (const XChar2b *in, int *length_ret) | |
327 | { | |
328 | int in_len = 0; | |
329 | const XChar2b *in_end; | |
330 | int out_len; | |
331 | char *utf8, *out; | |
332 | const char *out_end; | |
333 | ||
334 | /* Find the null termination on the XChar2b. */ | |
335 | for (in_end = in; in_end->byte1 || in_end->byte2; in_end++, in_len++) | |
336 | ; | |
337 | ||
338 | out_len = (in_len + 1) * 3; /* 16 bit chars = 3 bytes max */ | |
339 | utf8 = out = (char *) malloc (out_len + 1); | |
340 | if (! out) return 0; | |
341 | out_end = out + out_len; | |
342 | ||
343 | while (in < in_end) | |
344 | { | |
345 | unsigned long uc = (in->byte1 << 8) | in->byte2; | |
346 | int wrote = utf8_encode (uc, out, out_end - out); | |
347 | if (wrote > 3) abort(); /* Can't happen with 16 bit input */ | |
348 | out += wrote; | |
349 | in++; | |
350 | } | |
351 | *out = 0; | |
352 | ||
353 | out_len = (int) (out - utf8 + 1); | |
354 | ||
355 | if (length_ret) | |
356 | *length_ret = out_len; | |
357 | ||
358 | /* shrink */ | |
359 | utf8 = (char *) realloc (utf8, out_len); | |
360 | ||
361 | return utf8; | |
362 | } | |
363 | ||
364 | ||
365 | /* Converts a UTF8 string to the closest Latin1 or ASCII equivalent. | |
366 | */ | |
367 | char * | |
368 | utf8_to_latin1 (const char *string, Bool ascii_p) | |
369 | { | |
370 | long in_len = strlen(string); | |
371 | const unsigned char *in = (const unsigned char *) string; | |
372 | const unsigned char *in_end = in + in_len; | |
373 | unsigned char *ret = (unsigned char *) malloc (in_len + 1); | |
374 | unsigned char *out = ret; | |
375 | ||
376 | if (! ret) return 0; | |
377 | ||
378 | while (in < in_end) | |
379 | { | |
380 | unsigned long uc = 0; | |
381 | long len2 = utf8_decode (in, in_end - in, &uc); | |
382 | in += len2; | |
383 | ||
384 | if (uc == '\240') /* */ | |
385 | uc = ' '; | |
386 | else if (uc >= 0x300 && uc <= 0x36F) | |
387 | uc = 0; /* Discard "Combining Diacritical Marks" */ | |
388 | else if (uc >= 0x1AB0 && uc <= 0x1AFF) | |
389 | uc = 0; /* Discard "Combining Diacritical Marks Extended" */ | |
390 | else if (uc >= 0x1DC0 && uc <= 0x1DFF) | |
391 | uc = 0; /* Discard "Combining Diacritical Marks Supplement" */ | |
392 | else if (uc >= 0x20D0 && uc <= 0x20FF) | |
393 | uc = 0; /* Discard "Combining Diacritical Marks for Symbols" */ | |
394 | else if (uc >= 0xFE20 && uc <= 0xFE2F) | |
395 | uc = 0; /* Discard "Combining Half Marks" */ | |
396 | ||
397 | else if (uc > 0xFF) | |
398 | switch (uc) { | |
399 | ||
400 | /* Map "Unicode General Punctuation Block" to Latin1 equivalents. */ | |
401 | ||
402 | case 0x2000: /* EN QUAD */ | |
403 | case 0x2001: /* EM QUAD */ | |
404 | case 0x2002: /* EN SPACE */ | |
405 | case 0x2003: /* EM SPACE */ | |
406 | case 0x2004: /* THREE-PER-EM SPACE */ | |
407 | case 0x2005: /* FOUR-PER-EM SPACE */ | |
408 | case 0x2006: /* SIX-PER-EM SPACE */ | |
409 | case 0x2007: /* FIGURE SPACE */ | |
410 | case 0x2008: /* PUNCTUATION SPACE */ | |
411 | case 0x2009: /* THIN SPACE */ | |
412 | case 0x200A: /* HAIR SPACE */ | |
413 | uc = ' '; | |
414 | break; | |
415 | ||
416 | case 0x2010: /* HYPHEN */ | |
417 | case 0x2011: /* NON-BREAKING HYPHEN */ | |
418 | case 0x2012: /* FIGURE DASH */ | |
419 | case 0x2013: /* EN DASH */ | |
420 | case 0x2014: /* EM DASH */ | |
421 | case 0x2015: /* HORIZONTAL BAR */ | |
422 | uc = '-'; | |
423 | break; | |
424 | ||
425 | case 0x2018: /* LEFT SINGLE QUOTATION MARK */ | |
426 | case 0x2019: /* SINGLE LOW-9 QUOTATION MARK */ | |
427 | case 0x201A: /* SINGLE LOW-9 QUOTATION MARK */ | |
428 | case 0x201B: /* SINGLE HIGH-REVERSED-9 QUOTATION MARK */ | |
429 | uc = '\''; | |
430 | break; | |
431 | ||
432 | case 0x201C: /* LEFT DOUBLE QUOTATION MARK */ | |
433 | case 0x201D: /* RIGHT DOUBLE QUOTATION MARK */ | |
434 | case 0x201E: /* DOUBLE LOW-9 QUOTATION MARK */ | |
435 | case 0x201F: /* DOUBLE HIGH-REVERSED-9 QUOTATION MARK */ | |
436 | uc = '"'; | |
437 | break; | |
438 | ||
439 | case 0x2022: uc = '\267'; break; /* BULLET */ | |
440 | case 0x2023: uc = '\273'; break; /* TRIANGULAR BULLET */ | |
441 | case 0x2027: uc = '\267'; break; /* HYPHENATION POINT */ | |
442 | case 0x202F: uc = ' '; break; /* NARROW NO-BREAK SPACE */ | |
443 | case 0x2038: uc = '^'; break; /* CARET */ | |
444 | case 0x2039: uc = '\253'; break; /* SINGLE LEFT ANGLE QUOTATION MARK */ | |
445 | case 0x203A: uc = '\273'; break; /* SINGLE RIGHT ANGLE QUOTATION MARK*/ | |
446 | case 0x2041: uc = '^'; break; /* CARET INSERTION POINT */ | |
447 | case 0x2042: uc = '*'; break; /* ASTERISM */ | |
448 | case 0x2043: uc = '='; break; /* HYPHEN BULLET */ | |
449 | case 0x2044: uc = '/'; break; /* FRACTION SLASH */ | |
450 | case 0x204B: uc = '\266'; break; /* REVERSED PILCROW SIGN */ | |
451 | case 0x204C: uc = '\267'; break; /* BLACK LEFTWARDS BULLET */ | |
452 | case 0x204D: uc = '\267'; break; /* BLACK RIGHTWARDS BULLET */ | |
453 | case 0x204E: uc = '*'; break; /* LOW ASTERISK */ | |
454 | case 0x204F: uc = ';'; break; /* REVERSED SEMICOLON */ | |
455 | default: | |
456 | break; | |
457 | } | |
458 | ||
459 | if (uc > 0xFF) | |
460 | /* "Inverted question mark" looks enough like 0xFFFD, | |
461 | the "Unicode Replacement Character". */ | |
462 | uc = (ascii_p ? '#' : '\277'); | |
463 | ||
464 | if (ascii_p) /* Map Latin1 to the closest ASCII versions. */ | |
465 | { | |
466 | const unsigned char latin1_to_ascii[96] = | |
467 | " !C##Y|S_C#<=-R_##23'uP.,1o>###?" | |
468 | "AAAAAAECEEEEIIIIDNOOOOOx0UUUUYpS" | |
469 | "aaaaaaeceeeeiiiionooooo/ouuuuypy"; | |
470 | if (uc >= 0xA0) | |
471 | uc = latin1_to_ascii[uc - 0xA0]; | |
472 | } | |
473 | ||
474 | if (uc > 0) | |
475 | *out++ = (unsigned char) uc; | |
476 | } | |
477 | *out = 0; | |
478 | ||
479 | /* shrink */ | |
480 | ret = (unsigned char *) realloc (ret, (out - ret + 1) * sizeof(*ret)); | |
481 | ||
482 | return (char *) ret; | |
483 | } | |
484 | ||
485 | ||
486 | /************************************************************************* | |
487 | ||
488 | cd ../hacks ; make test-utf8wc | |
489 | ||
490 | *************************************************************************/ | |
491 | ||
492 | #ifdef SELFTEST | |
493 | ||
494 | /* Convert a UTF8 string to Unicode and back again. | |
495 | */ | |
496 | static char * | |
497 | split_and_join (const char *string) | |
498 | { | |
499 | const unsigned char *in = (const unsigned char *) string; | |
500 | int len = strlen (string); | |
501 | const unsigned char *end = in + len; | |
502 | unsigned long *unicode = (unsigned long *) | |
503 | malloc((len + 1) * sizeof(*unicode)); | |
504 | int i = 0; | |
505 | char *ret, *out, *out_end; | |
506 | ||
507 | while (in < end) | |
508 | { | |
509 | long len2 = utf8_decode (in, len, &unicode[i]); | |
510 | i++; | |
511 | in += len2; | |
512 | } | |
513 | unicode[i] = 0; | |
514 | ||
515 | i = i*6 + 1; | |
516 | out = ret = (char *) malloc(i); | |
517 | out_end = out + i; | |
518 | i = 0; | |
519 | while (unicode[i]) | |
520 | { | |
521 | int len2 = utf8_encode (unicode[i], out, out_end - out); | |
522 | out += len2; | |
523 | i++; | |
524 | } | |
525 | *out = 0; | |
526 | free (unicode); | |
527 | ||
528 | return ret; | |
529 | } | |
530 | ||
531 | ||
532 | static void | |
533 | LOG (FILE *out, const char *prefix, const char *s) | |
534 | { | |
535 | fprintf (out, "%6s: \"", prefix); | |
536 | while (*s) | |
537 | { | |
538 | unsigned char c = *s; | |
539 | if (c == '"' || c == '\\') fprintf(out, "\\%c", c); | |
540 | else if (c < 32 || c >= 127) fprintf(out, "\\%03o", c); | |
541 | else fprintf (out, "%c", c); | |
542 | s++; | |
543 | } | |
544 | fprintf (out, "\"\n"); | |
545 | } | |
546 | ||
547 | ||
548 | int | |
549 | main (int argc, char **argv) | |
550 | { | |
551 | /* Adapted from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt | |
552 | */ | |
553 | ||
554 | # define URC "\357\277\275" /* 0xFFFD, "Unicode Replacement Character" */ | |
555 | ||
556 | static const struct { const char *name, *in, *target, *target2; } tests[] = { | |
557 | /* 1 Some correct UTF-8 text */ | |
558 | ||
559 | /* The Greek word 'kosme': */ | |
560 | { "1", "\316\272\341\275\271\317\203\316\274\316\265" }, | |
561 | ||
562 | ||
563 | /* 2 Boundary condition test cases */ | |
564 | ||
565 | /* 2.1 First possible sequence of a certain length */ | |
566 | ||
567 | { "2.1.1", /* 1 byte (U-00000000): */ "\000" }, | |
568 | { "2.1.2", /* 2 bytes (U-00000080): */ "\302\200" }, | |
569 | { "2.1.3", /* 3 bytes (U-00000800): */ "\340\240\200" }, | |
570 | { "2.1.4", /* 4 bytes (U-00010000): */ "\360\220\200\200", 0, URC }, | |
571 | { "2.1.5", /* 5 bytes (U-00200000): */ "\370\210\200\200\200", URC }, | |
572 | { "2.1.6", /* 6 bytes (U-04000000): */ "\374\204\200\200\200\200", URC }, | |
573 | ||
574 | /* 2.2 Last possible sequence of a certain length */ | |
575 | ||
576 | { "2.2.1", /* 1 byte (U-0000007F): */ "\177" }, | |
577 | { "2.2.2", /* 2 bytes (U-000007FF): */ "\337\277" }, | |
578 | { "2.2.3", /* 3 bytes (U-0000FFFF): */ "\357\277\277" }, | |
579 | { "2.2.4", /* 4 bytes (U-001FFFFF): */ "\367\277\277\277", URC }, | |
580 | { "2.2.5", /* 5 bytes (U-03FFFFFF): */ "\373\277\277\277\277", URC }, | |
581 | { "2.2.6", /* 6 bytes (U-7FFFFFFF): */ "\375\277\277\277\277\277", URC }, | |
582 | ||
583 | /* 2.3 Other boundary conditions */ | |
584 | ||
585 | { "2.3.1", /* U-0000D7FF = ed 9f bf = */ "\355\237\277" }, | |
586 | { "2.3.2", /* U-0000E000 = ee 80 80 = */ "\356\200\200" }, | |
587 | { "2.3.3", /* U-0000FFFD = ef bf bd = */ URC }, | |
588 | { "2.3.4", /* U-0010FFFF = f4 8f bf bf = */ "\364\217\277\277", 0, URC }, | |
589 | { "2.3.5", /* U-00110000 = f4 90 80 80 = */ "\364\220\200\200", URC }, | |
590 | ||
591 | ||
592 | /* 3 Malformed sequences */ | |
593 | ||
594 | /* 3.1 Unexpected continuation bytes */ | |
595 | ||
596 | /* Each unexpected continuation byte should be separately signalled as a | |
597 | malformed sequence of its own. */ | |
598 | ||
599 | { "3.1.1", /* First continuation byte 0x80: */ "\200", URC }, | |
600 | { "3.1.2", /* Last continuation byte 0xbf: */ "\277", URC }, | |
601 | { "3.1.3", /* 2 continuation bytes: */ "\200\277", URC URC }, | |
602 | { "3.1.4", /* 3 continuation bytes: */ "\200\277\200", URC URC URC }, | |
603 | { "3.1.5", /* 4 continuation bytes: */ "\200\277\200\277", | |
604 | URC URC URC URC }, | |
605 | { "3.1.6", /* 5 continuation bytes: */ "\200\277\200\277\200", | |
606 | URC URC URC URC URC }, | |
607 | { "3.1.7", /* 6 continuation bytes: */ "\200\277\200\277\200\277", | |
608 | URC URC URC URC URC URC }, | |
609 | { "3.1.8", /* 7 continuation bytes: */ "\200\277\200\277\200\277\200", | |
610 | URC URC URC URC URC URC URC }, | |
611 | ||
612 | { "3.1.9", /* Sequence of all 64 possible continuation bytes (0x80-0xbf):*/ | |
613 | ||
614 | "\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217" | |
615 | "\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237" | |
616 | "\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257" | |
617 | "\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277", | |
618 | URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC | |
619 | URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC | |
620 | URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC | |
621 | URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC }, | |
622 | ||
623 | /* 3.2 Lonely start characters */ | |
624 | ||
625 | { "3.2.1", /* All 32 first bytes of 2-byte sequences (0xc0-0xdf), | |
626 | each followed by a space character: */ | |
627 | ||
628 | "\300 \301 \302 \303 \304 \305 \306 \307 \310 \311 \312 \313 \314 " | |
629 | "\315 \316 \317 \320 \321 \322 \323 \324 \325 \326 \327 \330 \331 " | |
630 | "\332 \333 \334 \335 \336 \337 ", | |
631 | URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC | |
632 | URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC }, | |
633 | ||
634 | { "3.2.2", /* All 16 first bytes of 3-byte sequences (0xe0-0xef), | |
635 | each followed by a space character: */ | |
636 | "\340 \341 \342 \343 \344 \345 \346 \347 " | |
637 | "\350 \351 \352 \353 \354 \355 \356 \357 ", | |
638 | URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC }, | |
639 | ||
640 | { "3.2.3", /* All 8 first bytes of 4-byte sequences (0xf0-0xf7), | |
641 | each followed by a space character: */ | |
642 | URC URC URC URC URC URC URC URC }, | |
643 | ||
644 | { "3.2.4", /* All 4 first bytes of 5-byte sequences (0xf8-0xfb), | |
645 | each followed by a space character: */ | |
646 | "\370 \371 \372 \373 ", | |
647 | URC URC URC URC }, | |
648 | ||
649 | { "3.2.5", /* All 2 first bytes of 6-byte sequences (0xfc-0xfd), | |
650 | each followed by a space character: */ | |
651 | "\374 \375 ", URC URC }, | |
652 | ||
653 | /* 3.3 Sequences with last continuation byte missing */ | |
654 | ||
655 | /* All bytes of an incomplete sequence should be signalled as a single | |
656 | malformed sequence, i.e., you should see only a single replacement | |
657 | character in each of the next 10 tests. (Characters as in section 2) */ | |
658 | ||
659 | { "3.3.1", /* 2-byte sequence with last byte missing (U+0000): */ | |
660 | "\300", URC }, | |
661 | { "3.3.2", /* 3-byte sequence with last byte missing (U+0000): */ | |
662 | "\340\200", URC }, | |
663 | { "3.3.3", /* 4-byte sequence with last byte missing (U+0000): */ | |
664 | "\360\200\200", URC }, | |
665 | { "3.3.4", /* 5-byte sequence with last byte missing (U+0000): */ | |
666 | "\370\200\200\200", URC }, | |
667 | { "3.3.5", /* 6-byte sequence with last byte missing (U+0000): */ | |
668 | "\374\200\200\200\200", URC }, | |
669 | { "3.3.6", /* 2-byte sequence with last byte missing (U-000007FF): */ | |
670 | "\337", URC }, | |
671 | { "3.3.7", /* 3-byte sequence with last byte missing (U-0000FFFF): */ | |
672 | "\357\277", URC }, | |
673 | { "3.3.8", /* 4-byte sequence with last byte missing (U-001FFFFF): */ | |
674 | "\367\277\277", URC }, | |
675 | { "3.3.9", /* 5-byte sequence with last byte missing (U-03FFFFFF): */ | |
676 | "\373\277\277\277", URC }, | |
677 | { "3.3.10", /* 6-byte sequence with last byte missing (U-7FFFFFFF): */ | |
678 | "\375\277\277\277\277", URC }, | |
679 | ||
680 | /* 3.4 Concatenation of incomplete sequences */ | |
681 | ||
682 | /* All the 10 sequences of 3.3 concatenated, you should see 10 malformed | |
683 | sequences being signalled: */ | |
684 | ||
685 | { "3.4", "\300\340\200\360\200\200\370\200\200\200\374\200\200\200\200" | |
686 | "\337\357\277\367\277\277\373\277\277\277\375\277\277\277\277", | |
687 | URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC }, | |
688 | ||
689 | /* 3.5 Impossible bytes */ | |
690 | ||
691 | /* The following two bytes cannot appear in a correct UTF-8 string */ | |
692 | ||
693 | { "3.5.1", /* fe = */ "\376", URC }, | |
694 | { "3.5.2", /* ff = */ "\377", URC }, | |
695 | { "3.5.3", /* fe fe ff ff = */ "\376\376\377\377", URC URC URC URC }, | |
696 | ||
697 | ||
698 | /* 4 Overlong sequences */ | |
699 | ||
700 | /* 4.1 Examples of an overlong ASCII character */ | |
701 | ||
702 | { "4.1.1", /* U+002F = c0 af = */ "\300\257", URC }, | |
703 | { "4.1.2", /* U+002F = e0 80 af = */ "\340\200\257", URC }, | |
704 | { "4.1.3", /* U+002F = f0 80 80 af = */ "\360\200\200\257", URC }, | |
705 | { "4.1.4", /* U+002F = f8 80 80 80 af = */ "\370\200\200\200\257", | |
706 | URC }, | |
707 | { "4.1.5", /* U+002F = fc 80 80 80 80 af = */ "\374\200\200\200\200\257", | |
708 | URC }, | |
709 | ||
710 | /* 4.2 Maximum overlong sequences */ | |
711 | ||
712 | { "4.2.1", /* U-0000007F = c1 bf = */ "\301\277", URC }, | |
713 | { "4.2.2", /* U-000007FF = e0 9f bf = */ "\340\237\277", URC }, | |
714 | { "4.2.3", /* U-0000FFFF = f0 8f bf bf = */ "\360\217\277\277", | |
715 | URC }, | |
716 | { "4.2.4", /* U-001FFFFF = f8 87 bf bf bf = */ "\370\207\277\277\277", | |
717 | URC }, | |
718 | { "4.2.5", /* U-03FFFFFF = fc 83 bf bf bf bf = */ URC }, | |
719 | ||
720 | /* 4.3 Overlong representation of the NUL character */ | |
721 | ||
722 | { "4.3.1", /* U+0000 = c0 80 = */ "\300\200", URC }, | |
723 | { "4.3.2", /* U+0000 = e0 80 80 = */ "\340\200\200", URC }, | |
724 | { "4.3.3", /* U+0000 = f0 80 80 80 = */ "\360\200\200\200", URC }, | |
725 | { "4.3.4", /* U+0000 = f8 80 80 80 80 = */ "\370\200\200\200\200", | |
726 | URC }, | |
727 | { "4.3.5", /* U+0000 = fc 80 80 80 80 80 = */ "\374\200\200\200\200\200", | |
728 | URC }, | |
729 | ||
730 | ||
731 | /* 5 Illegal code positions */ | |
732 | ||
733 | /* 5.1 Single UTF-16 surrogates */ | |
734 | ||
735 | { "5.1.1", /* U+D800 = ed a0 80 = */ "\355\240\200", URC }, | |
736 | { "5.1.2", /* U+DB7F = ed ad bf = */ "\355\255\277", URC }, | |
737 | { "5.1.3", /* U+DB80 = ed ae 80 = */ "\355\256\200", URC }, | |
738 | { "5.1.4", /* U+DBFF = ed af bf = */ "\355\257\277", URC }, | |
739 | { "5.1.5", /* U+DC00 = ed b0 80 = */ "\355\260\200", URC }, | |
740 | { "5.1.6", /* U+DF80 = ed be 80 = */ "\355\276\200", URC }, | |
741 | { "5.1.7", /* U+DFFF = ed bf bf = */ "\355\277\277", URC }, | |
742 | ||
743 | /* 5.2 Paired UTF-16 surrogates */ | |
744 | ||
745 | { "5.2.1", /* U+D800 U+DC00 = ed a0 80 ed b0 80 = */ URC URC }, | |
746 | { "5.2.2", /* U+D800 U+DFFF = ed a0 80 ed bf bf = */ URC URC }, | |
747 | { "5.2.3", /* U+DB7F U+DC00 = ed ad bf ed b0 80 = */ URC URC }, | |
748 | { "5.2.4", /* U+DB7F U+DFFF = ed ad bf ed bf bf = */ URC URC }, | |
749 | { "5.2.5", /* U+DB80 U+DC00 = ed ae 80 ed b0 80 = */ URC URC }, | |
750 | { "5.2.6", /* U+DB80 U+DFFF = ed ae 80 ed bf bf = */ URC URC }, | |
751 | { "5.2.7", /* U+DBFF U+DC00 = ed af bf ed b0 80 = */ URC URC }, | |
752 | { "5.2.8", /* U+DBFF U+DFFF = ed af bf ed bf bf = */ URC URC }, | |
753 | ||
754 | /* 5.3 Other illegal code positions */ | |
755 | ||
756 | { "5.3.1", /* U+FFFE = ef bf be = */ "\357\277\276" }, | |
757 | { "5.3.2", /* U+FFFF = ef bf bf = */ "\357\277\277" }, | |
758 | ||
759 | ||
760 | /* 6 Some other junk */ | |
761 | ||
762 | { "6.0", "" }, | |
763 | { "6.1", "\001\002\003\004\005 ABC" }, | |
764 | { "6.2", /* every non-ASCII Latin1 character */ | |
765 | "\302\241\302\242\302\243\302\244\302\245\302\246\302\247\302\250" | |
766 | "\302\251\302\252\302\253\302\254\302\255\302\256\302\257\302\260" | |
767 | "\302\261\302\262\302\263\302\264\302\265\302\266\302\267\302\270" | |
768 | "\302\271\302\272\302\273\302\274\302\275\302\276\302\277\303\200" | |
769 | "\303\201\303\202\303\203\303\204\303\205\303\206\303\207\303\210" | |
770 | "\303\211\303\212\303\213\303\214\303\215\303\216\303\217\303\220" | |
771 | "\303\221\303\222\303\223\303\224\303\225\303\226\303\227\303\230" | |
772 | "\303\231\303\232\303\233\303\234\303\235\303\236\303\237\303\240" | |
773 | "\303\241\303\242\303\243\303\244\303\245\303\246\303\247\303\250" | |
774 | "\303\251\303\252\303\253\303\254\303\255\303\256\303\257\303\260" | |
775 | "\303\261\303\262\303\263\303\264\303\265\303\266\303\267\303\270" | |
776 | "\303\271\303\272\303\273\303\274\303\275\303\276\303\277" }, | |
777 | ||
778 | { "6.3", /* Christmas tree */ | |
779 | "\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020" | |
780 | "\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\040" | |
781 | "\041\042\043\044\045\046\047\050\051\052\053\054\055\056\057\060" | |
782 | "\061\062\063\064\065\066\067\070\071\072\073\074\075\076\077\100" | |
783 | "\101\102\103\104\105\106\107\110\111\112\113\114\115\116\117\120" | |
784 | "\121\122\123\124\125\126\127\130\131\132\133\134\135\136\137\140" | |
785 | "\141\142\143\144\145\146\147\150\151\152\153\154\155\156\157\160" | |
786 | "\161\162\163\164\165\166\167\170\171\172\173\174\175\176\177\200" | |
787 | "\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220" | |
788 | "\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240" | |
789 | "\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260" | |
790 | "\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300" | |
791 | "\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320" | |
792 | "\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340" | |
793 | "\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360" | |
794 | "\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377", | |
795 | ||
796 | "\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020" | |
797 | "\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" | |
798 | " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
799 | "[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177" | |
800 | URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC | |
801 | URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC | |
802 | URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC | |
803 | URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC | |
804 | URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC URC | |
805 | URC URC URC URC URC URC URC URC URC URC URC URC }, | |
806 | }; | |
807 | ||
808 | int i; | |
809 | int ok = 1; | |
810 | for (i = 0; i < sizeof(tests)/sizeof(*tests); i++) | |
811 | { | |
812 | const char *name = tests[i].name; | |
813 | const char *in = tests[i].in; | |
814 | const char *target = (tests[i].target ? tests[i].target : in); | |
815 | const char *target2 = (tests[i].target2 ? tests[i].target2 : target); | |
816 | char *out = split_and_join (in); | |
817 | XChar2b *out16 = utf8_to_XChar2b (in, 0); | |
818 | char *out2 = XChar2b_to_utf8 (out16, 0); | |
819 | if (strcmp (out, target)) | |
820 | { | |
821 | LOG (stderr, name, target); | |
822 | LOG (stderr, "FAIL", out); | |
823 | fprintf (stderr, "\n"); | |
824 | ok = 0; | |
825 | } | |
826 | if (strcmp (out2, target2)) | |
827 | { | |
828 | LOG (stderr, name, target2); | |
829 | LOG (stderr, "FAIL2", out2); | |
830 | fprintf (stderr, "\n"); | |
831 | ok = 0; | |
832 | } | |
833 | free (out); | |
834 | free (out2); | |
835 | free (out16); | |
836 | } | |
837 | ||
838 | /* Check conversion from UTF8 to Latin1 and ASCII. */ | |
839 | { | |
840 | const char *utf8 = ("son \303\256le int\303\251rieure, \303\240 " | |
841 | "c\303\264t\303\251 de l'alc\303\264ve " | |
842 | "ovo\303\257de, o\303\271 les b\303\273ches " | |
843 | "se consument dans l'\303\242tre"); | |
844 | const char *latin1 = ("son \356le int\351rieure, \340 " | |
845 | "c\364t\351 de l'alc\364ve ovo\357de, " | |
846 | "o\371 les b\373ches se consument dans " | |
847 | "l'\342tre"); | |
848 | const char *ascii = ("son ile interieure, a cote de l'alcove " | |
849 | "ovoide, ou les buches se consument dans " | |
850 | "l'atre"); | |
851 | char *latin1b = utf8_to_latin1 (utf8, False); | |
852 | char *ascii2 = utf8_to_latin1 (utf8, True); | |
853 | if (strcmp (latin1, latin1b)) | |
854 | { | |
855 | LOG (stderr, "LATIN1", utf8); | |
856 | LOG (stderr, "FAIL3", latin1b); | |
857 | fprintf (stderr, "\n"); | |
858 | ok = 0; | |
859 | } | |
860 | if (strcmp (ascii, ascii2)) | |
861 | { | |
862 | LOG (stderr, "ASCII", utf8); | |
863 | LOG (stderr, "FAIL4", ascii2); | |
864 | fprintf (stderr, "\n"); | |
865 | ok = 0; | |
866 | } | |
867 | free (latin1b); | |
868 | free (ascii2); | |
869 | } | |
870 | ||
871 | /* Check de-composition of emoji that should all be treated as a unit | |
872 | for measurement and display purposes. */ | |
873 | { | |
874 | static const char * const tests[] = { | |
875 | ||
876 | /* 0: "Man" */ | |
877 | " \360\237\221\250 ", | |
878 | ||
879 | /* 1: "Blackula" = "Vampire, dark skin tone" = 1F9DB 1F3FF */ | |
880 | " \360\237\247\233\360\237\217\277 ", | |
881 | ||
882 | /* 2: "Black male teacher" = "Man, dark skin tone, ZWJ, school" = | |
883 | 1F468 1F3FF 200D 1F3EB | |
884 | */ | |
885 | " \360\237\221\250\360\237\217\277\342\200\215\360\237\217\253 ", | |
886 | ||
887 | /* 3: "Female runner" = "Runner, ZWJ, female sign" = 1F3C3 200D 2640 */ | |
888 | " \360\237\217\203\342\200\215\342\231\200 ", | |
889 | ||
890 | /* 4: "Woman astronaut" = "Woman, ZWJ, rocket ship" = 1F3C3 200D 1F680 */ | |
891 | " \360\237\217\203\342\200\215\360\237\232\200 ", | |
892 | ||
893 | /* 5: | |
894 | Group of people displayed as a single glyph: | |
895 | Woman, dark skin tone, ZWJ, 1F469 1F3FF 200D | |
896 | Man, light skin tone, ZWJ, 1F468 1F3FB 200D | |
897 | Boy, medium skin tone, ZWJ, 1F466 1F3FD 200D | |
898 | Girl, dark skin tone. 1F467 1F3FF | |
899 | */ | |
900 | " \360\237\221\251\360\237\217\277\342\200\215" | |
901 | "\360\237\221\250\360\237\217\273\342\200\215" | |
902 | "\360\237\221\246\360\237\217\275\342\200\215" | |
903 | "\360\237\221\247\360\237\217\277 ", | |
904 | }; | |
905 | int i; | |
906 | for (i = 0; i < sizeof(tests)/sizeof(*tests); i++) | |
907 | { | |
908 | int L = 0; | |
909 | char **out = utf8_split (tests[i], &L); | |
910 | char name[100]; | |
911 | int j; | |
912 | sprintf (name, "SPLIT %d: %d glyphs", i, L-2); | |
913 | if (L != 3) | |
914 | { | |
915 | LOG (stderr, name, tests[i]); | |
916 | ok = 0; | |
917 | } | |
918 | for (j = 0; j < L; j++) | |
919 | free (out[j]); | |
920 | free (out); | |
921 | } | |
922 | } | |
923 | ||
924 | if (ok) fprintf (stderr, "OK\n"); | |
925 | return (ok == 0); | |
926 | } | |
927 | ||
928 | #endif /* SELFTEST */ |