| 1 | '\" |
| 2 | '\" Copyright (c) 1994 The Regents of the University of California. |
| 3 | '\" Copyright (c) 1994-1996 Sun Microsystems, Inc. |
| 4 | '\" Copyright (c) 1998-1999 Scriptics Corporation |
| 5 | '\" |
| 6 | '\" See the file "license.terms" for information on usage and redistribution |
| 7 | '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. |
| 8 | '\" |
| 9 | '\" RCS: @(#) $Id: RegExp.3,v 1.13 2002/11/13 22:11:40 vincentdarley Exp $ |
| 10 | '\" |
| 11 | '\" The definitions below are for supplemental macros used in Tcl/Tk |
| 12 | '\" manual entries. |
| 13 | '\" |
| 14 | '\" .AP type name in/out ?indent? |
| 15 | '\" Start paragraph describing an argument to a library procedure. |
| 16 | '\" type is type of argument (int, etc.), in/out is either "in", "out", |
| 17 | '\" or "in/out" to describe whether procedure reads or modifies arg, |
| 18 | '\" and indent is equivalent to second arg of .IP (shouldn't ever be |
| 19 | '\" needed; use .AS below instead) |
| 20 | '\" |
| 21 | '\" .AS ?type? ?name? |
| 22 | '\" Give maximum sizes of arguments for setting tab stops. Type and |
| 23 | '\" name are examples of largest possible arguments that will be passed |
| 24 | '\" to .AP later. If args are omitted, default tab stops are used. |
| 25 | '\" |
| 26 | '\" .BS |
| 27 | '\" Start box enclosure. From here until next .BE, everything will be |
| 28 | '\" enclosed in one large box. |
| 29 | '\" |
| 30 | '\" .BE |
| 31 | '\" End of box enclosure. |
| 32 | '\" |
| 33 | '\" .CS |
| 34 | '\" Begin code excerpt. |
| 35 | '\" |
| 36 | '\" .CE |
| 37 | '\" End code excerpt. |
| 38 | '\" |
| 39 | '\" .VS ?version? ?br? |
| 40 | '\" Begin vertical sidebar, for use in marking newly-changed parts |
| 41 | '\" of man pages. The first argument is ignored and used for recording |
| 42 | '\" the version when the .VS was added, so that the sidebars can be |
| 43 | '\" found and removed when they reach a certain age. If another argument |
| 44 | '\" is present, then a line break is forced before starting the sidebar. |
| 45 | '\" |
| 46 | '\" .VE |
| 47 | '\" End of vertical sidebar. |
| 48 | '\" |
| 49 | '\" .DS |
| 50 | '\" Begin an indented unfilled display. |
| 51 | '\" |
| 52 | '\" .DE |
| 53 | '\" End of indented unfilled display. |
| 54 | '\" |
| 55 | '\" .SO |
| 56 | '\" Start of list of standard options for a Tk widget. The |
| 57 | '\" options follow on successive lines, in four columns separated |
| 58 | '\" by tabs. |
| 59 | '\" |
| 60 | '\" .SE |
| 61 | '\" End of list of standard options for a Tk widget. |
| 62 | '\" |
| 63 | '\" .OP cmdName dbName dbClass |
| 64 | '\" Start of description of a specific option. cmdName gives the |
| 65 | '\" option's name as specified in the class command, dbName gives |
| 66 | '\" the option's name in the option database, and dbClass gives |
| 67 | '\" the option's class in the option database. |
| 68 | '\" |
| 69 | '\" .UL arg1 arg2 |
| 70 | '\" Print arg1 underlined, then print arg2 normally. |
| 71 | '\" |
| 72 | '\" RCS: @(#) $Id: man.macros,v 1.4 2000/08/25 06:18:32 ericm Exp $ |
| 73 | '\" |
| 74 | '\" # Set up traps and other miscellaneous stuff for Tcl/Tk man pages. |
| 75 | .if t .wh -1.3i ^B |
| 76 | .nr ^l \n(.l |
| 77 | .ad b |
| 78 | '\" # Start an argument description |
| 79 | .de AP |
| 80 | .ie !"\\$4"" .TP \\$4 |
| 81 | .el \{\ |
| 82 | . ie !"\\$2"" .TP \\n()Cu |
| 83 | . el .TP 15 |
| 84 | .\} |
| 85 | .ta \\n()Au \\n()Bu |
| 86 | .ie !"\\$3"" \{\ |
| 87 | \&\\$1 \\fI\\$2\\fP (\\$3) |
| 88 | .\".b |
| 89 | .\} |
| 90 | .el \{\ |
| 91 | .br |
| 92 | .ie !"\\$2"" \{\ |
| 93 | \&\\$1 \\fI\\$2\\fP |
| 94 | .\} |
| 95 | .el \{\ |
| 96 | \&\\fI\\$1\\fP |
| 97 | .\} |
| 98 | .\} |
| 99 | .. |
| 100 | '\" # define tabbing values for .AP |
| 101 | .de AS |
| 102 | .nr )A 10n |
| 103 | .if !"\\$1"" .nr )A \\w'\\$1'u+3n |
| 104 | .nr )B \\n()Au+15n |
| 105 | .\" |
| 106 | .if !"\\$2"" .nr )B \\w'\\$2'u+\\n()Au+3n |
| 107 | .nr )C \\n()Bu+\\w'(in/out)'u+2n |
| 108 | .. |
| 109 | .AS Tcl_Interp Tcl_CreateInterp in/out |
| 110 | '\" # BS - start boxed text |
| 111 | '\" # ^y = starting y location |
| 112 | '\" # ^b = 1 |
| 113 | .de BS |
| 114 | .br |
| 115 | .mk ^y |
| 116 | .nr ^b 1u |
| 117 | .if n .nf |
| 118 | .if n .ti 0 |
| 119 | .if n \l'\\n(.lu\(ul' |
| 120 | .if n .fi |
| 121 | .. |
| 122 | '\" # BE - end boxed text (draw box now) |
| 123 | .de BE |
| 124 | .nf |
| 125 | .ti 0 |
| 126 | .mk ^t |
| 127 | .ie n \l'\\n(^lu\(ul' |
| 128 | .el \{\ |
| 129 | .\" Draw four-sided box normally, but don't draw top of |
| 130 | .\" box if the box started on an earlier page. |
| 131 | .ie !\\n(^b-1 \{\ |
| 132 | \h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul' |
| 133 | .\} |
| 134 | .el \}\ |
| 135 | \h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul' |
| 136 | .\} |
| 137 | .\} |
| 138 | .fi |
| 139 | .br |
| 140 | .nr ^b 0 |
| 141 | .. |
| 142 | '\" # VS - start vertical sidebar |
| 143 | '\" # ^Y = starting y location |
| 144 | '\" # ^v = 1 (for troff; for nroff this doesn't matter) |
| 145 | .de VS |
| 146 | .if !"\\$2"" .br |
| 147 | .mk ^Y |
| 148 | .ie n 'mc \s12\(br\s0 |
| 149 | .el .nr ^v 1u |
| 150 | .. |
| 151 | '\" # VE - end of vertical sidebar |
| 152 | .de VE |
| 153 | .ie n 'mc |
| 154 | .el \{\ |
| 155 | .ev 2 |
| 156 | .nf |
| 157 | .ti 0 |
| 158 | .mk ^t |
| 159 | \h'|\\n(^lu+3n'\L'|\\n(^Yu-1v\(bv'\v'\\n(^tu+1v-\\n(^Yu'\h'-|\\n(^lu+3n' |
| 160 | .sp -1 |
| 161 | .fi |
| 162 | .ev |
| 163 | .\} |
| 164 | .nr ^v 0 |
| 165 | .. |
| 166 | '\" # Special macro to handle page bottom: finish off current |
| 167 | '\" # box/sidebar if in box/sidebar mode, then invoked standard |
| 168 | '\" # page bottom macro. |
| 169 | .de ^B |
| 170 | .ev 2 |
| 171 | 'ti 0 |
| 172 | 'nf |
| 173 | .mk ^t |
| 174 | .if \\n(^b \{\ |
| 175 | .\" Draw three-sided box if this is the box's first page, |
| 176 | .\" draw two sides but no top otherwise. |
| 177 | .ie !\\n(^b-1 \h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c |
| 178 | .el \h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c |
| 179 | .\} |
| 180 | .if \\n(^v \{\ |
| 181 | .nr ^x \\n(^tu+1v-\\n(^Yu |
| 182 | \kx\h'-\\nxu'\h'|\\n(^lu+3n'\ky\L'-\\n(^xu'\v'\\n(^xu'\h'|0u'\c |
| 183 | .\} |
| 184 | .bp |
| 185 | 'fi |
| 186 | .ev |
| 187 | .if \\n(^b \{\ |
| 188 | .mk ^y |
| 189 | .nr ^b 2 |
| 190 | .\} |
| 191 | .if \\n(^v \{\ |
| 192 | .mk ^Y |
| 193 | .\} |
| 194 | .. |
| 195 | '\" # DS - begin display |
| 196 | .de DS |
| 197 | .RS |
| 198 | .nf |
| 199 | .sp |
| 200 | .. |
| 201 | '\" # DE - end display |
| 202 | .de DE |
| 203 | .fi |
| 204 | .RE |
| 205 | .sp |
| 206 | .. |
| 207 | '\" # SO - start of list of standard options |
| 208 | .de SO |
| 209 | .SH "STANDARD OPTIONS" |
| 210 | .LP |
| 211 | .nf |
| 212 | .ta 5.5c 11c |
| 213 | .ft B |
| 214 | .. |
| 215 | '\" # SE - end of list of standard options |
| 216 | .de SE |
| 217 | .fi |
| 218 | .ft R |
| 219 | .LP |
| 220 | See the \\fBoptions\\fR manual entry for details on the standard options. |
| 221 | .. |
| 222 | '\" # OP - start of full description for a single option |
| 223 | .de OP |
| 224 | .LP |
| 225 | .nf |
| 226 | .ta 4c |
| 227 | Command-Line Name: \\fB\\$1\\fR |
| 228 | Database Name: \\fB\\$2\\fR |
| 229 | Database Class: \\fB\\$3\\fR |
| 230 | .fi |
| 231 | .IP |
| 232 | .. |
| 233 | '\" # CS - begin code excerpt |
| 234 | .de CS |
| 235 | .RS |
| 236 | .nf |
| 237 | .ta .25i .5i .75i 1i |
| 238 | .. |
| 239 | '\" # CE - end code excerpt |
| 240 | .de CE |
| 241 | .fi |
| 242 | .RE |
| 243 | .. |
| 244 | .de UL |
| 245 | \\$1\l'|0\(ul'\\$2 |
| 246 | .. |
| 247 | .TH Tcl_RegExpMatch 3 8.1 Tcl "Tcl Library Procedures" |
| 248 | .BS |
| 249 | .SH NAME |
| 250 | Tcl_RegExpMatch, Tcl_RegExpCompile, Tcl_RegExpExec, Tcl_RegExpRange, Tcl_GetRegExpFromObj, Tcl_RegExpMatchObj, Tcl_RegExpExecObj, Tcl_RegExpGetInfo \- Pattern matching with regular expressions |
| 251 | .SH SYNOPSIS |
| 252 | .nf |
| 253 | \fB#include <tcl.h>\fR |
| 254 | .sp |
| 255 | int |
| 256 | \fBTcl_RegExpMatchObj\fR(\fIinterp\fR, \fIstrObj\fR, \fIpatObj\fR) |
| 257 | .sp |
| 258 | int |
| 259 | \fBTcl_RegExpMatch\fR(\fIinterp\fR, \fIstring\fR, \fIpattern\fR) |
| 260 | .sp |
| 261 | Tcl_RegExp |
| 262 | \fBTcl_RegExpCompile\fR(\fIinterp\fR, \fIpattern\fR) |
| 263 | .sp |
| 264 | int |
| 265 | \fBTcl_RegExpExec\fR(\fIinterp\fR, \fIregexp\fR, \fIstring\fR, \fIstart\fR) |
| 266 | .sp |
| 267 | \fBTcl_RegExpRange\fR(\fIregexp\fR, \fIindex\fR, \fIstartPtr\fR, \fIendPtr\fR) |
| 268 | .VS 8.1 |
| 269 | .sp |
| 270 | Tcl_RegExp |
| 271 | \fBTcl_GetRegExpFromObj\fR(\fIinterp\fR, \fIpatObj\fR, \fIcflags\fR) |
| 272 | .sp |
| 273 | int |
| 274 | \fBTcl_RegExpExecObj\fR(\fIinterp\fR, \fIregexp\fR, \fIobjPtr\fR, \fIoffset\fR, \fInmatches\fR, \fIeflags\fR) |
| 275 | .sp |
| 276 | \fBTcl_RegExpGetInfo\fR(\fIregexp\fR, \fIinfoPtr\fR) |
| 277 | .VE 8.1 |
| 278 | |
| 279 | .SH ARGUMENTS |
| 280 | .AS Tcl_Interp *interp |
| 281 | .AP Tcl_Interp *interp in |
| 282 | Tcl interpreter to use for error reporting. The interpreter may be |
| 283 | NULL if no error reporting is desired. |
| 284 | .VS 8.1 |
| 285 | .AP Tcl_Obj *strObj in/out |
| 286 | Refers to the object from which to get the string to search. The |
| 287 | internal representation of the object may be converted to a form that |
| 288 | can be efficiently searched. |
| 289 | .AP Tcl_Obj *patObj in/out |
| 290 | Refers to the object from which to get a regular expression. The |
| 291 | compiled regular expression is cached in the object. |
| 292 | .VE 8.1 |
| 293 | .AP char *string in |
| 294 | String to check for a match with a regular expression. |
| 295 | .AP "CONST char" *pattern in |
| 296 | String in the form of a regular expression pattern. |
| 297 | .AP Tcl_RegExp regexp in |
| 298 | Compiled regular expression. Must have been returned previously |
| 299 | by \fBTcl_GetRegExpFromObj\fR or \fBTcl_RegExpCompile\fR. |
| 300 | .AP char *start in |
| 301 | If \fIstring\fR is just a portion of some other string, this argument |
| 302 | identifies the beginning of the larger string. |
| 303 | If it isn't the same as \fIstring\fR, then no \fB^\fR matches |
| 304 | will be allowed. |
| 305 | .AP int index in |
| 306 | Specifies which range is desired: 0 means the range of the entire |
| 307 | match, 1 or greater means the range that matched a parenthesized |
| 308 | sub-expression. |
| 309 | .VS 8.4 |
| 310 | .AP "CONST char" **startPtr out |
| 311 | The address of the first character in the range is stored here, or |
| 312 | NULL if there is no such range. |
| 313 | .AP "CONST char" **endPtr out |
| 314 | The address of the character just after the last one in the range |
| 315 | is stored here, or NULL if there is no such range. |
| 316 | .VE 8.4 |
| 317 | .VS 8.1 |
| 318 | .AP int cflags in |
| 319 | OR-ed combination of compilation flags. See below for more information. |
| 320 | .AP Tcl_Obj *objPtr in/out |
| 321 | An object which contains the string to check for a match with a |
| 322 | regular expression. |
| 323 | .AP int offset in |
| 324 | The character offset into the string where matching should begin. |
| 325 | The value of the offset has no impact on \fB^\fR matches. This |
| 326 | behavior is controlled by \fIeflags\fR. |
| 327 | .AP int nmatches in |
| 328 | The number of matching subexpressions that should be remembered for |
| 329 | later use. If this value is 0, then no subexpression match |
| 330 | information will be computed. If the value is -1, then |
| 331 | all of the matching subexpressions will be remembered. Any other |
| 332 | value will be taken as the maximum number of subexpressions to |
| 333 | remember. |
| 334 | .AP int eflags in |
| 335 | OR-ed combination of the values TCL_REG_NOTBOL and TCL_REG_NOTEOL. |
| 336 | See below for more information. |
| 337 | .AP Tcl_RegExpInfo *infoPtr out |
| 338 | The address of the location where information about a previous match |
| 339 | should be stored by \fBTcl_RegExpGetInfo\fR. |
| 340 | .VE 8.1 |
| 341 | .BE |
| 342 | |
| 343 | .SH DESCRIPTION |
| 344 | .PP |
| 345 | \fBTcl_RegExpMatch\fR determines whether its \fIpattern\fR argument |
| 346 | matches \fIregexp\fR, where \fIregexp\fR is interpreted |
| 347 | as a regular expression using the rules in the \fBre_syntax\fR |
| 348 | reference page. |
| 349 | If there is a match then \fBTcl_RegExpMatch\fR returns 1. |
| 350 | If there is no match then \fBTcl_RegExpMatch\fR returns 0. |
| 351 | If an error occurs in the matching process (e.g. \fIpattern\fR |
| 352 | is not a valid regular expression) then \fBTcl_RegExpMatch\fR |
| 353 | returns \-1 and leaves an error message in the interpreter result. |
| 354 | .VS 8.1.2 |
| 355 | \fBTcl_RegExpMatchObj\fR is similar to \fBTcl_RegExpMatch\fR except it |
| 356 | operates on the Tcl objects \fIstrObj\fR and \fIpatObj\fR instead of |
| 357 | UTF strings. |
| 358 | \fBTcl_RegExpMatchObj\fR is generally more efficient than |
| 359 | \fBTcl_RegExpMatch\fR, so it is the preferred interface. |
| 360 | .VE 8.1.2 |
| 361 | .PP |
| 362 | \fBTcl_RegExpCompile\fR, \fBTcl_RegExpExec\fR, and \fBTcl_RegExpRange\fR |
| 363 | provide lower-level access to the regular expression pattern matcher. |
| 364 | \fBTcl_RegExpCompile\fR compiles a regular expression string into |
| 365 | the internal form used for efficient pattern matching. |
| 366 | The return value is a token for this compiled form, which can be |
| 367 | used in subsequent calls to \fBTcl_RegExpExec\fR or \fBTcl_RegExpRange\fR. |
| 368 | If an error occurs while compiling the regular expression then |
| 369 | \fBTcl_RegExpCompile\fR returns NULL and leaves an error message |
| 370 | in the interpreter result. |
| 371 | Note: the return value from \fBTcl_RegExpCompile\fR is only valid |
| 372 | up to the next call to \fBTcl_RegExpCompile\fR; it is not safe to |
| 373 | retain these values for long periods of time. |
| 374 | .PP |
| 375 | \fBTcl_RegExpExec\fR executes the regular expression pattern matcher. |
| 376 | It returns 1 if \fIstring\fR contains a range of characters that |
| 377 | match \fIregexp\fR, 0 if no match is found, and |
| 378 | \-1 if an error occurs. |
| 379 | In the case of an error, \fBTcl_RegExpExec\fR leaves an error |
| 380 | message in the interpreter result. |
| 381 | When searching a string for multiple matches of a pattern, |
| 382 | it is important to distinguish between the start of the original |
| 383 | string and the start of the current search. |
| 384 | For example, when searching for the second occurrence of a |
| 385 | match, the \fIstring\fR argument might point to the character |
| 386 | just after the first match; however, it is important for the |
| 387 | pattern matcher to know that this is not the start of the entire string, |
| 388 | so that it doesn't allow \fB^\fR atoms in the pattern to match. |
| 389 | The \fIstart\fR argument provides this information by pointing |
| 390 | to the start of the overall string containing \fIstring\fR. |
| 391 | \fIStart\fR will be less than or equal to \fIstring\fR; if it |
| 392 | is less than \fIstring\fR then no \fB^\fR matches will be allowed. |
| 393 | .PP |
| 394 | \fBTcl_RegExpRange\fR may be invoked after \fBTcl_RegExpExec\fR |
| 395 | returns; it provides detailed information about what ranges of |
| 396 | the string matched what parts of the pattern. |
| 397 | \fBTcl_RegExpRange\fR returns a pair of pointers in \fI*startPtr\fR |
| 398 | and \fI*endPtr\fR that identify a range of characters in |
| 399 | the source string for the most recent call to \fBTcl_RegExpExec\fR. |
| 400 | \fIIndex\fR indicates which of several ranges is desired: |
| 401 | if \fIindex\fR is 0, information is returned about the overall range |
| 402 | of characters that matched the entire pattern; otherwise, |
| 403 | information is returned about the range of characters that matched the |
| 404 | \fIindex\fR'th parenthesized subexpression within the pattern. |
| 405 | If there is no range corresponding to \fIindex\fR then NULL |
| 406 | is stored in \fI*startPtr\fR and \fI*endPtr\fR. |
| 407 | .PP |
| 408 | .VS 8.1 |
| 409 | \fBTcl_GetRegExpFromObj\fR, \fBTcl_RegExpExecObj\fR, and |
| 410 | \fBTcl_RegExpGetInfo\fR are object interfaces that provide the most |
| 411 | direct control of Henry Spencer's regular expression library. For |
| 412 | users that need to modify compilation and execution options directly, |
| 413 | it is recommended that you use these interfaces instead of calling the |
| 414 | internal regexp functions. These interfaces handle the details of UTF |
| 415 | to Unicode translations as well as providing improved performance |
| 416 | through caching in the pattern and string objects. |
| 417 | .PP |
| 418 | \fBTcl_GetRegExpFromObj\fR attempts to return a compiled regular |
| 419 | expression from the \fIpatObj\fR. If the object does not already |
| 420 | contain a compiled regular expression it will attempt to create one |
| 421 | from the string in the object and assign it to the internal |
| 422 | representation of the \fIpatObj\fR. The return value of this function |
| 423 | is of type \fBTcl_RegExp\fR. The return value is a token for this |
| 424 | compiled form, which can be used in subsequent calls to |
| 425 | \fBTcl_RegExpExecObj\fR or \fBTcl_RegExpGetInfo\fR. If an error |
| 426 | occurs while compiling the regular expression then |
| 427 | \fBTcl_GetRegExpFromObj\fR returns NULL and leaves an error message in |
| 428 | the interpreter result. The regular expression token can be used as |
| 429 | long as the internal representation of \fIpatObj\fR refers to the |
| 430 | compiled form. The \fIeflags\fR argument is a bitwise OR of |
| 431 | zero or more of the following flags that control the compilation of |
| 432 | \fIpatObj\fR: |
| 433 | .RS 2 |
| 434 | .TP |
| 435 | \fBTCL_REG_ADVANCED\fR |
| 436 | Compile advanced regular expressions (`AREs'). This mode corresponds to |
| 437 | the normal regular expression syntax accepted by the Tcl regexp and |
| 438 | regsub commands. |
| 439 | .TP |
| 440 | \fBTCL_REG_EXTENDED\fR |
| 441 | Compile extended regular expressions (`EREs'). This mode corresponds |
| 442 | to the regular expression syntax recognized by Tcl 8.0 and earlier |
| 443 | versions. |
| 444 | .TP |
| 445 | \fBTCL_REG_BASIC\fR |
| 446 | Compile basic regular expressions (`BREs'). This mode corresponds |
| 447 | to the regular expression syntax recognized by common Unix utilities |
| 448 | like \fBsed\fR and \fBgrep\fR. This is the default if no flags are |
| 449 | specified. |
| 450 | .TP |
| 451 | \fBTCL_REG_EXPANDED\fR |
| 452 | Compile the regular expression (basic, extended, or advanced) using an |
| 453 | expanded syntax that allows comments and whitespace. This mode causes |
| 454 | non-backslashed non-bracket-expression white |
| 455 | space and #-to-end-of-line comments to be ignored. |
| 456 | .TP |
| 457 | \fBTCL_REG_QUOTE\fR |
| 458 | Compile a literal string, with all characters treated as ordinary characters. |
| 459 | .TP |
| 460 | \fBTCL_REG_NOCASE\fR |
| 461 | Compile for matching that ignores upper/lower case distinctions. |
| 462 | .TP |
| 463 | \fBTCL_REG_NEWLINE\fR |
| 464 | Compile for newline-sensitive matching. By default, newline is a |
| 465 | completely ordinary character with no special meaning in either |
| 466 | regular expressions or strings. With this flag, `[^' bracket |
| 467 | expressions and `.' never match newline, `^' matches an empty string |
| 468 | after any newline in addition to its normal function, and `$' matches |
| 469 | an empty string before any newline in addition to its normal function. |
| 470 | \fBREG_NEWLINE\fR is the bitwise OR of \fBREG_NLSTOP\fR and |
| 471 | \fBREG_NLANCH\fR. |
| 472 | .TP |
| 473 | \fBTCL_REG_NLSTOP\fR |
| 474 | Compile for partial newline-sensitive matching, |
| 475 | with the behavior of |
| 476 | `[^' bracket expressions and `.' affected, |
| 477 | but not the behavior of `^' and `$'. In this mode, `[^' bracket |
| 478 | expressions and `.' never match newline. |
| 479 | .TP |
| 480 | \fBTCL_REG_NLANCH\fR |
| 481 | Compile for inverse partial newline-sensitive matching, |
| 482 | with the behavior of |
| 483 | of `^' and `$' (the ``anchors'') affected, but not the behavior of |
| 484 | `[^' bracket expressions and `.'. In this mode `^' matches an empty string |
| 485 | after any newline in addition to its normal function, and `$' matches |
| 486 | an empty string before any newline in addition to its normal function. |
| 487 | .TP |
| 488 | \fBTCL_REG_NOSUB\fR |
| 489 | Compile for matching that reports only success or failure, |
| 490 | not what was matched. This reduces compile overhead and may improve |
| 491 | performance. Subsequent calls to \fBTcl_RegExpGetInfo\fR or |
| 492 | \fBTcl_RegExpRange\fR will not report any match information. |
| 493 | .TP |
| 494 | \fBTCL_REG_CANMATCH\fR |
| 495 | Compile for matching that reports the potential to complete a partial |
| 496 | match given more text (see below). |
| 497 | .RE |
| 498 | .PP |
| 499 | Only one of |
| 500 | \fBTCL_REG_EXTENDED\fR, |
| 501 | \fBTCL_REG_ADVANCED\fR, |
| 502 | \fBTCL_REG_BASIC\fR, and |
| 503 | \fBTCL_REG_QUOTE\fR may be specified. |
| 504 | .PP |
| 505 | \fBTcl_RegExpExecObj\fR executes the regular expression pattern |
| 506 | matcher. It returns 1 if \fIobjPtr\fR contains a range of characters |
| 507 | that match \fIregexp\fR, 0 if no match is found, and \-1 if an error |
| 508 | occurs. In the case of an error, \fBTcl_RegExpExecObj\fR leaves an |
| 509 | error message in the interpreter result. The \fInmatches\fR value |
| 510 | indicates to the matcher how many subexpressions are of interest. If |
| 511 | \fInmatches\fR is 0, then no subexpression match information is |
| 512 | recorded, which may allow the matcher to make various optimizations. |
| 513 | If the value is -1, then all of the subexpressions in the pattern are |
| 514 | remembered. If the value is a positive integer, then only that number |
| 515 | of subexpressions will be remembered. Matching begins at the |
| 516 | specified Unicode character index given by \fIoffset\fR. Unlike |
| 517 | \fBTcl_RegExpExec\fR, the behavior of anchors is not affected by the |
| 518 | offset value. Instead the behavior of the anchors is explicitly |
| 519 | controlled by the \fIeflags\fR argument, which is a bitwise OR of |
| 520 | zero or more of the following flags: |
| 521 | .RS 2 |
| 522 | .TP |
| 523 | \fBTCL_REG_NOTBOL\fR |
| 524 | The starting character will not be treated as the beginning of a |
| 525 | line or the beginning of the string, so `^' will not match there. |
| 526 | Note that this flag has no effect on how `\fB\eA\fR' matches. |
| 527 | .TP |
| 528 | \fBTCL_REG_NOTEOL\fR |
| 529 | The last character in the string will not be treated as the end of a |
| 530 | line or the end of the string, so '$' will not match there. |
| 531 | Note that this flag has no effect on how `\fB\eZ\fR' matches. |
| 532 | .RE |
| 533 | .PP |
| 534 | \fBTcl_RegExpGetInfo\fR retrieves information about the last match |
| 535 | performed with a given regular expression \fIregexp\fR. The |
| 536 | \fIinfoPtr\fR argument contains a pointer to a structure that is |
| 537 | defined as follows: |
| 538 | .PP |
| 539 | .CS |
| 540 | typedef struct Tcl_RegExpInfo { |
| 541 | int \fInsubs\fR; |
| 542 | Tcl_RegExpIndices *\fImatches\fR; |
| 543 | long \fIextendStart\fR; |
| 544 | } Tcl_RegExpInfo; |
| 545 | .CE |
| 546 | .PP |
| 547 | The \fInsubs\fR field contains a count of the number of parenthesized |
| 548 | subexpressions within the regular expression. If the \fBTCL_REG_NOSUB\fR |
| 549 | was used, then this value will be zero. The \fImatches\fR field |
| 550 | points to an array of \fInsubs\fR values that indicate the bounds of each |
| 551 | subexpression matched. The first element in the array refers to the |
| 552 | range matched by the entire regular expression, and subsequent elements |
| 553 | refer to the parenthesized subexpressions in the order that they |
| 554 | appear in the pattern. Each element is a structure that is defined as |
| 555 | follows: |
| 556 | .PP |
| 557 | .CS |
| 558 | typedef struct Tcl_RegExpIndices { |
| 559 | long \fIstart\fR; |
| 560 | long \fIend\fR; |
| 561 | } Tcl_RegExpIndices; |
| 562 | .CE |
| 563 | .PP |
| 564 | The \fIstart\fR and \fIend\fR values are Unicode character indices |
| 565 | relative to the offset location within \fIobjPtr\fR where matching began. |
| 566 | The \fIstart\fR index identifies the first character of the matched |
| 567 | subexpression. The \fIend\fR index identifies the first character |
| 568 | after the matched subexpression. If the subexpression matched the |
| 569 | empty string, then \fIstart\fR and \fIend\fR will be equal. If the |
| 570 | subexpression did not participate in the match, then \fIstart\fR and |
| 571 | \fIend\fR will be set to -1. |
| 572 | .PP |
| 573 | The \fIextendStart\fR field in \fBTcl_RegExpInfo\fR is only set if the |
| 574 | \fBTCL_REG_CANMATCH\fR flag was used. It indicates the first |
| 575 | character in the string where a match could occur. If a match was |
| 576 | found, this will be the same as the beginning of the current match. |
| 577 | If no match was found, then it indicates the earliest point at which a |
| 578 | match might occur if additional text is appended to the string. If it |
| 579 | is no match is possible even with further text, this field will be set |
| 580 | to -1. |
| 581 | .VE 8.1 |
| 582 | .SH "SEE ALSO" |
| 583 | re_syntax(n) |
| 584 | .SH KEYWORDS |
| 585 | match, pattern, regular expression, string, subexpression, Tcl_RegExpIndices, Tcl_RegExpInfo |