| 1 | '\" |
| 2 | '\" Copyright (c) 1997 Sun Microsystems, Inc. |
| 3 | '\" |
| 4 | '\" See the file "license.terms" for information on usage and redistribution |
| 5 | '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. |
| 6 | '\" |
| 7 | '\" RCS: @(#) $Id: Utf.3,v 1.13.2.2 2003/07/18 22:15:45 dkf Exp $ |
| 8 | '\" |
| 9 | '\" The definitions below are for supplemental macros used in Tcl/Tk |
| 10 | '\" manual entries. |
| 11 | '\" |
| 12 | '\" .AP type name in/out ?indent? |
| 13 | '\" Start paragraph describing an argument to a library procedure. |
| 14 | '\" type is type of argument (int, etc.), in/out is either "in", "out", |
| 15 | '\" or "in/out" to describe whether procedure reads or modifies arg, |
| 16 | '\" and indent is equivalent to second arg of .IP (shouldn't ever be |
| 17 | '\" needed; use .AS below instead) |
| 18 | '\" |
| 19 | '\" .AS ?type? ?name? |
| 20 | '\" Give maximum sizes of arguments for setting tab stops. Type and |
| 21 | '\" name are examples of largest possible arguments that will be passed |
| 22 | '\" to .AP later. If args are omitted, default tab stops are used. |
| 23 | '\" |
| 24 | '\" .BS |
| 25 | '\" Start box enclosure. From here until next .BE, everything will be |
| 26 | '\" enclosed in one large box. |
| 27 | '\" |
| 28 | '\" .BE |
| 29 | '\" End of box enclosure. |
| 30 | '\" |
| 31 | '\" .CS |
| 32 | '\" Begin code excerpt. |
| 33 | '\" |
| 34 | '\" .CE |
| 35 | '\" End code excerpt. |
| 36 | '\" |
| 37 | '\" .VS ?version? ?br? |
| 38 | '\" Begin vertical sidebar, for use in marking newly-changed parts |
| 39 | '\" of man pages. The first argument is ignored and used for recording |
| 40 | '\" the version when the .VS was added, so that the sidebars can be |
| 41 | '\" found and removed when they reach a certain age. If another argument |
| 42 | '\" is present, then a line break is forced before starting the sidebar. |
| 43 | '\" |
| 44 | '\" .VE |
| 45 | '\" End of vertical sidebar. |
| 46 | '\" |
| 47 | '\" .DS |
| 48 | '\" Begin an indented unfilled display. |
| 49 | '\" |
| 50 | '\" .DE |
| 51 | '\" End of indented unfilled display. |
| 52 | '\" |
| 53 | '\" .SO |
| 54 | '\" Start of list of standard options for a Tk widget. The |
| 55 | '\" options follow on successive lines, in four columns separated |
| 56 | '\" by tabs. |
| 57 | '\" |
| 58 | '\" .SE |
| 59 | '\" End of list of standard options for a Tk widget. |
| 60 | '\" |
| 61 | '\" .OP cmdName dbName dbClass |
| 62 | '\" Start of description of a specific option. cmdName gives the |
| 63 | '\" option's name as specified in the class command, dbName gives |
| 64 | '\" the option's name in the option database, and dbClass gives |
| 65 | '\" the option's class in the option database. |
| 66 | '\" |
| 67 | '\" .UL arg1 arg2 |
| 68 | '\" Print arg1 underlined, then print arg2 normally. |
| 69 | '\" |
| 70 | '\" RCS: @(#) $Id: man.macros,v 1.4 2000/08/25 06:18:32 ericm Exp $ |
| 71 | '\" |
| 72 | '\" # Set up traps and other miscellaneous stuff for Tcl/Tk man pages. |
| 73 | .if t .wh -1.3i ^B |
| 74 | .nr ^l \n(.l |
| 75 | .ad b |
| 76 | '\" # Start an argument description |
| 77 | .de AP |
| 78 | .ie !"\\$4"" .TP \\$4 |
| 79 | .el \{\ |
| 80 | . ie !"\\$2"" .TP \\n()Cu |
| 81 | . el .TP 15 |
| 82 | .\} |
| 83 | .ta \\n()Au \\n()Bu |
| 84 | .ie !"\\$3"" \{\ |
| 85 | \&\\$1 \\fI\\$2\\fP (\\$3) |
| 86 | .\".b |
| 87 | .\} |
| 88 | .el \{\ |
| 89 | .br |
| 90 | .ie !"\\$2"" \{\ |
| 91 | \&\\$1 \\fI\\$2\\fP |
| 92 | .\} |
| 93 | .el \{\ |
| 94 | \&\\fI\\$1\\fP |
| 95 | .\} |
| 96 | .\} |
| 97 | .. |
| 98 | '\" # define tabbing values for .AP |
| 99 | .de AS |
| 100 | .nr )A 10n |
| 101 | .if !"\\$1"" .nr )A \\w'\\$1'u+3n |
| 102 | .nr )B \\n()Au+15n |
| 103 | .\" |
| 104 | .if !"\\$2"" .nr )B \\w'\\$2'u+\\n()Au+3n |
| 105 | .nr )C \\n()Bu+\\w'(in/out)'u+2n |
| 106 | .. |
| 107 | .AS Tcl_Interp Tcl_CreateInterp in/out |
| 108 | '\" # BS - start boxed text |
| 109 | '\" # ^y = starting y location |
| 110 | '\" # ^b = 1 |
| 111 | .de BS |
| 112 | .br |
| 113 | .mk ^y |
| 114 | .nr ^b 1u |
| 115 | .if n .nf |
| 116 | .if n .ti 0 |
| 117 | .if n \l'\\n(.lu\(ul' |
| 118 | .if n .fi |
| 119 | .. |
| 120 | '\" # BE - end boxed text (draw box now) |
| 121 | .de BE |
| 122 | .nf |
| 123 | .ti 0 |
| 124 | .mk ^t |
| 125 | .ie n \l'\\n(^lu\(ul' |
| 126 | .el \{\ |
| 127 | .\" Draw four-sided box normally, but don't draw top of |
| 128 | .\" box if the box started on an earlier page. |
| 129 | .ie !\\n(^b-1 \{\ |
| 130 | \h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul' |
| 131 | .\} |
| 132 | .el \}\ |
| 133 | \h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul' |
| 134 | .\} |
| 135 | .\} |
| 136 | .fi |
| 137 | .br |
| 138 | .nr ^b 0 |
| 139 | .. |
| 140 | '\" # VS - start vertical sidebar |
| 141 | '\" # ^Y = starting y location |
| 142 | '\" # ^v = 1 (for troff; for nroff this doesn't matter) |
| 143 | .de VS |
| 144 | .if !"\\$2"" .br |
| 145 | .mk ^Y |
| 146 | .ie n 'mc \s12\(br\s0 |
| 147 | .el .nr ^v 1u |
| 148 | .. |
| 149 | '\" # VE - end of vertical sidebar |
| 150 | .de VE |
| 151 | .ie n 'mc |
| 152 | .el \{\ |
| 153 | .ev 2 |
| 154 | .nf |
| 155 | .ti 0 |
| 156 | .mk ^t |
| 157 | \h'|\\n(^lu+3n'\L'|\\n(^Yu-1v\(bv'\v'\\n(^tu+1v-\\n(^Yu'\h'-|\\n(^lu+3n' |
| 158 | .sp -1 |
| 159 | .fi |
| 160 | .ev |
| 161 | .\} |
| 162 | .nr ^v 0 |
| 163 | .. |
| 164 | '\" # Special macro to handle page bottom: finish off current |
| 165 | '\" # box/sidebar if in box/sidebar mode, then invoked standard |
| 166 | '\" # page bottom macro. |
| 167 | .de ^B |
| 168 | .ev 2 |
| 169 | 'ti 0 |
| 170 | 'nf |
| 171 | .mk ^t |
| 172 | .if \\n(^b \{\ |
| 173 | .\" Draw three-sided box if this is the box's first page, |
| 174 | .\" draw two sides but no top otherwise. |
| 175 | .ie !\\n(^b-1 \h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c |
| 176 | .el \h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c |
| 177 | .\} |
| 178 | .if \\n(^v \{\ |
| 179 | .nr ^x \\n(^tu+1v-\\n(^Yu |
| 180 | \kx\h'-\\nxu'\h'|\\n(^lu+3n'\ky\L'-\\n(^xu'\v'\\n(^xu'\h'|0u'\c |
| 181 | .\} |
| 182 | .bp |
| 183 | 'fi |
| 184 | .ev |
| 185 | .if \\n(^b \{\ |
| 186 | .mk ^y |
| 187 | .nr ^b 2 |
| 188 | .\} |
| 189 | .if \\n(^v \{\ |
| 190 | .mk ^Y |
| 191 | .\} |
| 192 | .. |
| 193 | '\" # DS - begin display |
| 194 | .de DS |
| 195 | .RS |
| 196 | .nf |
| 197 | .sp |
| 198 | .. |
| 199 | '\" # DE - end display |
| 200 | .de DE |
| 201 | .fi |
| 202 | .RE |
| 203 | .sp |
| 204 | .. |
| 205 | '\" # SO - start of list of standard options |
| 206 | .de SO |
| 207 | .SH "STANDARD OPTIONS" |
| 208 | .LP |
| 209 | .nf |
| 210 | .ta 5.5c 11c |
| 211 | .ft B |
| 212 | .. |
| 213 | '\" # SE - end of list of standard options |
| 214 | .de SE |
| 215 | .fi |
| 216 | .ft R |
| 217 | .LP |
| 218 | See the \\fBoptions\\fR manual entry for details on the standard options. |
| 219 | .. |
| 220 | '\" # OP - start of full description for a single option |
| 221 | .de OP |
| 222 | .LP |
| 223 | .nf |
| 224 | .ta 4c |
| 225 | Command-Line Name: \\fB\\$1\\fR |
| 226 | Database Name: \\fB\\$2\\fR |
| 227 | Database Class: \\fB\\$3\\fR |
| 228 | .fi |
| 229 | .IP |
| 230 | .. |
| 231 | '\" # CS - begin code excerpt |
| 232 | .de CS |
| 233 | .RS |
| 234 | .nf |
| 235 | .ta .25i .5i .75i 1i |
| 236 | .. |
| 237 | '\" # CE - end code excerpt |
| 238 | .de CE |
| 239 | .fi |
| 240 | .RE |
| 241 | .. |
| 242 | .de UL |
| 243 | \\$1\l'|0\(ul'\\$2 |
| 244 | .. |
| 245 | .TH Utf 3 "8.1" Tcl "Tcl Library Procedures" |
| 246 | .BS |
| 247 | .SH NAME |
| 248 | Tcl_UniChar, Tcl_UniCharCaseMatch, Tcl_UniCharNcasecmp, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings. |
| 249 | .SH SYNOPSIS |
| 250 | .nf |
| 251 | \fB#include <tcl.h>\fR |
| 252 | .sp |
| 253 | typedef ... Tcl_UniChar; |
| 254 | .sp |
| 255 | int |
| 256 | \fBTcl_UniCharToUtf\fR(\fIch, buf\fR) |
| 257 | .sp |
| 258 | int |
| 259 | \fBTcl_UtfToUniChar\fR(\fIsrc, chPtr\fR) |
| 260 | .VS 8.4 |
| 261 | .sp |
| 262 | char * |
| 263 | \fBTcl_UniCharToUtfDString\fR(\fIuniStr, numChars, dstPtr\fR) |
| 264 | .sp |
| 265 | Tcl_UniChar * |
| 266 | \fBTcl_UtfToUniCharDString\fR(\fIsrc, len, dstPtr\fR) |
| 267 | .VE 8.4 |
| 268 | .sp |
| 269 | int |
| 270 | \fBTcl_UniCharLen\fR(\fIuniStr\fR) |
| 271 | .sp |
| 272 | int |
| 273 | \fBTcl_UniCharNcmp\fR(\fIuniStr, uniStr, num\fR) |
| 274 | .VS 8.4 |
| 275 | .sp |
| 276 | int |
| 277 | \fBTcl_UniCharNcasecmp\fR(\fIuniStr, uniStr, num\fR) |
| 278 | .sp |
| 279 | int |
| 280 | \fBTcl_UniCharCaseMatch\fR(\fIuniStr, uniPattern, nocase\fR) |
| 281 | .VE 8.4 |
| 282 | .sp |
| 283 | int |
| 284 | \fBTcl_UtfNcmp\fR(\fIsrc, src, num\fR) |
| 285 | .sp |
| 286 | int |
| 287 | \fBTcl_UtfNcasecmp\fR(\fIsrc, src, num\fR) |
| 288 | .sp |
| 289 | int |
| 290 | \fBTcl_UtfCharComplete\fR(\fIsrc, len\fR) |
| 291 | .sp |
| 292 | int |
| 293 | \fBTcl_NumUtfChars\fR(\fIsrc, len\fR) |
| 294 | .VS 8.4 |
| 295 | .sp |
| 296 | CONST char * |
| 297 | \fBTcl_UtfFindFirst\fR(\fIsrc, ch\fR) |
| 298 | .sp |
| 299 | CONST char * |
| 300 | \fBTcl_UtfFindLast\fR(\fIsrc, ch\fR) |
| 301 | .sp |
| 302 | CONST char * |
| 303 | \fBTcl_UtfNext\fR(\fIsrc\fR) |
| 304 | .sp |
| 305 | CONST char * |
| 306 | \fBTcl_UtfPrev\fR(\fIsrc, start\fR) |
| 307 | .VE 8.4 |
| 308 | .sp |
| 309 | Tcl_UniChar |
| 310 | \fBTcl_UniCharAtIndex\fR(\fIsrc, index\fR) |
| 311 | .VS 8.4 |
| 312 | .sp |
| 313 | CONST char * |
| 314 | \fBTcl_UtfAtIndex\fR(\fIsrc, index\fR) |
| 315 | .VE 8.4 |
| 316 | .sp |
| 317 | int |
| 318 | \fBTcl_UtfBackslash\fR(\fIsrc, readPtr, dst\fR) |
| 319 | .SH ARGUMENTS |
| 320 | .AS "CONST Tcl_UniChar" numChars in/out |
| 321 | .AP char *buf out |
| 322 | Buffer in which the UTF-8 representation of the Tcl_UniChar is stored. At most |
| 323 | TCL_UTF_MAX bytes are stored in the buffer. |
| 324 | .AP int ch in |
| 325 | The Tcl_UniChar to be converted or examined. |
| 326 | .AP Tcl_UniChar *chPtr out |
| 327 | Filled with the Tcl_UniChar represented by the head of the UTF-8 string. |
| 328 | .AP "CONST char" *src in |
| 329 | Pointer to a UTF-8 string. |
| 330 | .AP "CONST Tcl_UniChar" *uniStr in |
| 331 | A null-terminated Unicode string. |
| 332 | .AP "CONST Tcl_UniChar" *uniPattern in |
| 333 | A null-terminated Unicode string. |
| 334 | .AP int len in |
| 335 | The length of the UTF-8 string in bytes (not UTF-8 characters). If |
| 336 | negative, all bytes up to the first null byte are used. |
| 337 | .AP int numChars in |
| 338 | The length of the Unicode string in characters. Must be greater than or |
| 339 | equal to 0. |
| 340 | .AP "Tcl_DString" *dstPtr in/out |
| 341 | A pointer to a previously-initialized \fBTcl_DString\fR. |
| 342 | .AP "unsigned long" num in |
| 343 | The number of characters to compare. |
| 344 | .AP "CONST char" *start in |
| 345 | Pointer to the beginning of a UTF-8 string. |
| 346 | .AP int index in |
| 347 | The index of a character (not byte) in the UTF-8 string. |
| 348 | .AP int *readPtr out |
| 349 | If non-NULL, filled with the number of bytes in the backslash sequence, |
| 350 | including the backslash character. |
| 351 | .AP char *dst out |
| 352 | Buffer in which the bytes represented by the backslash sequence are stored. |
| 353 | At most TCL_UTF_MAX bytes are stored in the buffer. |
| 354 | .VS 8.4 |
| 355 | .AP int nocase in |
| 356 | Specifies whether the match should be done case-sensitive (0) or |
| 357 | case-insensitive (1). |
| 358 | .VE 8.4 |
| 359 | .BE |
| 360 | |
| 361 | .SH DESCRIPTION |
| 362 | .PP |
| 363 | These routines convert between UTF-8 strings and Tcl_UniChars. A |
| 364 | Tcl_UniChar is a Unicode character represented as an unsigned, fixed-size |
| 365 | quantity. A UTF-8 character is a Unicode character represented as |
| 366 | a varying-length sequence of up to TCL_UTF_MAX bytes. A multibyte UTF-8 |
| 367 | sequence consists of a lead byte followed by some number of trail bytes. |
| 368 | .PP |
| 369 | \fBTCL_UTF_MAX\fR is the maximum number of bytes that it takes to |
| 370 | represent one Unicode character in the UTF-8 representation. |
| 371 | .PP |
| 372 | \fBTcl_UniCharToUtf\fR stores the Tcl_UniChar \fIch\fR as a UTF-8 string |
| 373 | in starting at \fIbuf\fR. The return value is the number of bytes stored |
| 374 | in \fIbuf\fR. |
| 375 | .PP |
| 376 | \fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR |
| 377 | and stores it as a Tcl_UniChar in \fI*chPtr\fR. The return value is the |
| 378 | number of bytes read from \fIsrc\fR.. The caller must ensure that the |
| 379 | source buffer is long enough such that this routine does not run off the |
| 380 | end and dereference non-existent or random memory; if the source buffer |
| 381 | is known to be null-terminated, this will not happen. If the input is |
| 382 | not in proper UTF-8 format, \fBTcl_UtfToUniChar\fR will store the first |
| 383 | byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0000 and |
| 384 | 0x00ff and return 1. |
| 385 | .PP |
| 386 | \fBTcl_UniCharToUtfDString\fR converts the given Unicode string |
| 387 | to UTF-8, storing the result in a previously-initialized \fBTcl_DString\fR. |
| 388 | You must specify the length of the given Unicode string. |
| 389 | The return value is a pointer to the UTF-8 representation of the |
| 390 | Unicode string. Storage for the return value is appended to the |
| 391 | end of the \fBTcl_DString\fR. |
| 392 | .PP |
| 393 | \fBTcl_UtfToUniCharDString\fR converts the given UTF-8 string to Unicode, |
| 394 | storing the result in the previously-initialized \fBTcl_DString\fR. |
| 395 | you may either specify the length of the given UTF-8 string or "-1", |
| 396 | in which case \fBTcl_UtfToUniCharDString\fR uses \fBstrlen\fR to |
| 397 | calculate the length. The return value is a pointer to the Unicode |
| 398 | representation of the UTF-8 string. Storage for the return value |
| 399 | is appended to the end of the \fBTcl_DString\fR. The Unicode string |
| 400 | is terminated with a Unicode null character. |
| 401 | .PP |
| 402 | \fBTcl_UniCharLen\fR corresponds to \fBstrlen\fR for Unicode |
| 403 | characters. It accepts a null-terminated Unicode string and returns |
| 404 | the number of Unicode characters (not bytes) in that string. |
| 405 | .PP |
| 406 | \fBTcl_UniCharNcmp\fR and \fBTcl_UniCharNcasecmp\fR correspond to |
| 407 | \fBstrncmp\fR and \fBstrncasecmp\fR, respectively, for Unicode characters. |
| 408 | They accepts two null-terminated Unicode strings and the number of characters |
| 409 | to compare. Both strings are assumed to be at least \fIlen\fR characters |
| 410 | long. \fBTcl_UniCharNcmp\fR compares the two strings character-by-character |
| 411 | according to the Unicode character ordering. It returns an integer greater |
| 412 | than, equal to, or less than 0 if the first string is greater than, equal |
| 413 | to, or less than the second string respectively. \fBTcl_UniCharNcasecmp\fR |
| 414 | is the Unicode case insensitive version. |
| 415 | .PP |
| 416 | .VS 8.4 |
| 417 | \fBTcl_UniCharCaseMatch\fR is the Unicode equivalent to |
| 418 | \fBTcl_StringCaseMatch\fR. It accepts a null-terminated Unicode string, |
| 419 | a Unicode pattern, and a boolean value specifying whether the match should |
| 420 | be case sensitive and returns whether the string matches the pattern. |
| 421 | .VE 8.4 |
| 422 | .PP |
| 423 | \fBTcl_UtfNcmp\fR corresponds to \fBstrncmp\fR for UTF-8 strings. It |
| 424 | accepts two null-terminated UTF-8 strings and the number of characters |
| 425 | to compare. (Both strings are assumed to be at least \fIlen\fR |
| 426 | characters long.) \fBTcl_UtfNcmp\fR compares the two strings |
| 427 | character-by-character according to the Unicode character ordering. |
| 428 | It returns an integer greater than, equal to, or less than 0 if the |
| 429 | first string is greater than, equal to, or less than the second string |
| 430 | respectively. |
| 431 | .PP |
| 432 | \fBTcl_UtfNcasecmp\fR corresponds to \fBstrncasecmp\fR for UTF-8 |
| 433 | strings. It is similar to \fBTcl_UtfNcmp\fR except comparisons ignore |
| 434 | differences in case when comparing upper, lower or title case |
| 435 | characters. |
| 436 | .PP |
| 437 | \fBTcl_UtfCharComplete\fR returns 1 if the source UTF-8 string \fIsrc\fR |
| 438 | of length \fIlen\fR bytes is long enough to be decoded by |
| 439 | \fBTcl_UtfToUniChar\fR, or 0 otherwise. This function does not guarantee |
| 440 | that the UTF-8 string is properly formed. This routine is used by |
| 441 | procedures that are operating on a byte at a time and need to know if a |
| 442 | full Tcl_UniChar has been seen. |
| 443 | .PP |
| 444 | \fBTcl_NumUtfChars\fR corresponds to \fBstrlen\fR for UTF-8 strings. It |
| 445 | returns the number of Tcl_UniChars that are represented by the UTF-8 string |
| 446 | \fIsrc\fR. The length of the source string is \fIlen\fR bytes. If the |
| 447 | length is negative, all bytes up to the first null byte are used. |
| 448 | .PP |
| 449 | \fBTcl_UtfFindFirst\fR corresponds to \fBstrchr\fR for UTF-8 strings. It |
| 450 | returns a pointer to the first occurrence of the Tcl_UniChar \fIch\fR |
| 451 | in the null-terminated UTF-8 string \fIsrc\fR. The null terminator is |
| 452 | considered part of the UTF-8 string. |
| 453 | .PP |
| 454 | \fBTcl_UtfFindLast\fR corresponds to \fBstrrchr\fR for UTF-8 strings. It |
| 455 | returns a pointer to the last occurrence of the Tcl_UniChar \fIch\fR |
| 456 | in the null-terminated UTF-8 string \fIsrc\fR. The null terminator is |
| 457 | considered part of the UTF-8 string. |
| 458 | .PP |
| 459 | Given \fIsrc\fR, a pointer to some location in a UTF-8 string, |
| 460 | \fBTcl_UtfNext\fR returns a pointer to the next UTF-8 character in the |
| 461 | string. The caller must not ask for the next character after the last |
| 462 | character in the string if the string is not terminated by a null |
| 463 | character. |
| 464 | .PP |
| 465 | Given \fIsrc\fR, a pointer to some location in a UTF-8 string (or to a |
| 466 | null byte immediately following such a string), \fBTcl_UtfPrev\fR |
| 467 | returns a pointer to the closest preceding byte that starts a UTF-8 |
| 468 | character. |
| 469 | This function will not back up to a position before \fIstart\fR, |
| 470 | the start of the UTF-8 string. If \fIsrc\fR was already at \fIstart\fR, the |
| 471 | return value will be \fIstart\fR. |
| 472 | .PP |
| 473 | \fBTcl_UniCharAtIndex\fR corresponds to a C string array dereference or the |
| 474 | Pascal Ord() function. It returns the Tcl_UniChar represented at the |
| 475 | specified character (not byte) \fIindex\fR in the UTF-8 string |
| 476 | \fIsrc\fR. The source string must contain at least \fIindex\fR |
| 477 | characters. Behavior is undefined if a negative \fIindex\fR is given. |
| 478 | .PP |
| 479 | \fBTcl_UtfAtIndex\fR returns a pointer to the specified character (not |
| 480 | byte) \fIindex\fR in the UTF-8 string \fIsrc\fR. The source string must |
| 481 | contain at least \fIindex\fR characters. This is equivalent to calling |
| 482 | \fBTcl_UtfNext\fR \fIindex\fR times. If a negative \fIindex\fR is given, |
| 483 | the return pointer points to the first character in the source string. |
| 484 | .PP |
| 485 | \fBTcl_UtfBackslash\fR is a utility procedure used by several of the Tcl |
| 486 | commands. It parses a backslash sequence and stores the properly formed |
| 487 | UTF-8 character represented by the backslash sequence in the output |
| 488 | buffer \fIdst\fR. At most TCL_UTF_MAX bytes are stored in the buffer. |
| 489 | \fBTcl_UtfBackslash\fR modifies \fI*readPtr\fR to contain the number |
| 490 | of bytes in the backslash sequence, including the backslash character. |
| 491 | The return value is the number of bytes stored in the output buffer. |
| 492 | .PP |
| 493 | See the \fBTcl\fR manual entry for information on the valid backslash |
| 494 | sequences. All of the sequences described in the Tcl manual entry are |
| 495 | supported by \fBTcl_UtfBackslash\fR. |
| 496 | |
| 497 | .SH KEYWORDS |
| 498 | utf, unicode, backslash |