Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | '\" |
2 | '\" Copyright (c) 1994 The Regents of the University of California. | |
3 | '\" Copyright (c) 1994-1996 Sun Microsystems, Inc. | |
4 | '\" Copyright (c) 1998-1999 Scriptics Corporation | |
5 | '\" | |
6 | '\" See the file "license.terms" for information on usage and redistribution | |
7 | '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. | |
8 | '\" | |
9 | '\" RCS: @(#) $Id: RegExp.3,v 1.13 2002/11/13 22:11:40 vincentdarley Exp $ | |
10 | '\" | |
11 | '\" The definitions below are for supplemental macros used in Tcl/Tk | |
12 | '\" manual entries. | |
13 | '\" | |
14 | '\" .AP type name in/out ?indent? | |
15 | '\" Start paragraph describing an argument to a library procedure. | |
16 | '\" type is type of argument (int, etc.), in/out is either "in", "out", | |
17 | '\" or "in/out" to describe whether procedure reads or modifies arg, | |
18 | '\" and indent is equivalent to second arg of .IP (shouldn't ever be | |
19 | '\" needed; use .AS below instead) | |
20 | '\" | |
21 | '\" .AS ?type? ?name? | |
22 | '\" Give maximum sizes of arguments for setting tab stops. Type and | |
23 | '\" name are examples of largest possible arguments that will be passed | |
24 | '\" to .AP later. If args are omitted, default tab stops are used. | |
25 | '\" | |
26 | '\" .BS | |
27 | '\" Start box enclosure. From here until next .BE, everything will be | |
28 | '\" enclosed in one large box. | |
29 | '\" | |
30 | '\" .BE | |
31 | '\" End of box enclosure. | |
32 | '\" | |
33 | '\" .CS | |
34 | '\" Begin code excerpt. | |
35 | '\" | |
36 | '\" .CE | |
37 | '\" End code excerpt. | |
38 | '\" | |
39 | '\" .VS ?version? ?br? | |
40 | '\" Begin vertical sidebar, for use in marking newly-changed parts | |
41 | '\" of man pages. The first argument is ignored and used for recording | |
42 | '\" the version when the .VS was added, so that the sidebars can be | |
43 | '\" found and removed when they reach a certain age. If another argument | |
44 | '\" is present, then a line break is forced before starting the sidebar. | |
45 | '\" | |
46 | '\" .VE | |
47 | '\" End of vertical sidebar. | |
48 | '\" | |
49 | '\" .DS | |
50 | '\" Begin an indented unfilled display. | |
51 | '\" | |
52 | '\" .DE | |
53 | '\" End of indented unfilled display. | |
54 | '\" | |
55 | '\" .SO | |
56 | '\" Start of list of standard options for a Tk widget. The | |
57 | '\" options follow on successive lines, in four columns separated | |
58 | '\" by tabs. | |
59 | '\" | |
60 | '\" .SE | |
61 | '\" End of list of standard options for a Tk widget. | |
62 | '\" | |
63 | '\" .OP cmdName dbName dbClass | |
64 | '\" Start of description of a specific option. cmdName gives the | |
65 | '\" option's name as specified in the class command, dbName gives | |
66 | '\" the option's name in the option database, and dbClass gives | |
67 | '\" the option's class in the option database. | |
68 | '\" | |
69 | '\" .UL arg1 arg2 | |
70 | '\" Print arg1 underlined, then print arg2 normally. | |
71 | '\" | |
72 | '\" RCS: @(#) $Id: man.macros,v 1.4 2000/08/25 06:18:32 ericm Exp $ | |
73 | '\" | |
74 | '\" # Set up traps and other miscellaneous stuff for Tcl/Tk man pages. | |
75 | .if t .wh -1.3i ^B | |
76 | .nr ^l \n(.l | |
77 | .ad b | |
78 | '\" # Start an argument description | |
79 | .de AP | |
80 | .ie !"\\$4"" .TP \\$4 | |
81 | .el \{\ | |
82 | . ie !"\\$2"" .TP \\n()Cu | |
83 | . el .TP 15 | |
84 | .\} | |
85 | .ta \\n()Au \\n()Bu | |
86 | .ie !"\\$3"" \{\ | |
87 | \&\\$1 \\fI\\$2\\fP (\\$3) | |
88 | .\".b | |
89 | .\} | |
90 | .el \{\ | |
91 | .br | |
92 | .ie !"\\$2"" \{\ | |
93 | \&\\$1 \\fI\\$2\\fP | |
94 | .\} | |
95 | .el \{\ | |
96 | \&\\fI\\$1\\fP | |
97 | .\} | |
98 | .\} | |
99 | .. | |
100 | '\" # define tabbing values for .AP | |
101 | .de AS | |
102 | .nr )A 10n | |
103 | .if !"\\$1"" .nr )A \\w'\\$1'u+3n | |
104 | .nr )B \\n()Au+15n | |
105 | .\" | |
106 | .if !"\\$2"" .nr )B \\w'\\$2'u+\\n()Au+3n | |
107 | .nr )C \\n()Bu+\\w'(in/out)'u+2n | |
108 | .. | |
109 | .AS Tcl_Interp Tcl_CreateInterp in/out | |
110 | '\" # BS - start boxed text | |
111 | '\" # ^y = starting y location | |
112 | '\" # ^b = 1 | |
113 | .de BS | |
114 | .br | |
115 | .mk ^y | |
116 | .nr ^b 1u | |
117 | .if n .nf | |
118 | .if n .ti 0 | |
119 | .if n \l'\\n(.lu\(ul' | |
120 | .if n .fi | |
121 | .. | |
122 | '\" # BE - end boxed text (draw box now) | |
123 | .de BE | |
124 | .nf | |
125 | .ti 0 | |
126 | .mk ^t | |
127 | .ie n \l'\\n(^lu\(ul' | |
128 | .el \{\ | |
129 | .\" Draw four-sided box normally, but don't draw top of | |
130 | .\" box if the box started on an earlier page. | |
131 | .ie !\\n(^b-1 \{\ | |
132 | \h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul' | |
133 | .\} | |
134 | .el \}\ | |
135 | \h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul' | |
136 | .\} | |
137 | .\} | |
138 | .fi | |
139 | .br | |
140 | .nr ^b 0 | |
141 | .. | |
142 | '\" # VS - start vertical sidebar | |
143 | '\" # ^Y = starting y location | |
144 | '\" # ^v = 1 (for troff; for nroff this doesn't matter) | |
145 | .de VS | |
146 | .if !"\\$2"" .br | |
147 | .mk ^Y | |
148 | .ie n 'mc \s12\(br\s0 | |
149 | .el .nr ^v 1u | |
150 | .. | |
151 | '\" # VE - end of vertical sidebar | |
152 | .de VE | |
153 | .ie n 'mc | |
154 | .el \{\ | |
155 | .ev 2 | |
156 | .nf | |
157 | .ti 0 | |
158 | .mk ^t | |
159 | \h'|\\n(^lu+3n'\L'|\\n(^Yu-1v\(bv'\v'\\n(^tu+1v-\\n(^Yu'\h'-|\\n(^lu+3n' | |
160 | .sp -1 | |
161 | .fi | |
162 | .ev | |
163 | .\} | |
164 | .nr ^v 0 | |
165 | .. | |
166 | '\" # Special macro to handle page bottom: finish off current | |
167 | '\" # box/sidebar if in box/sidebar mode, then invoked standard | |
168 | '\" # page bottom macro. | |
169 | .de ^B | |
170 | .ev 2 | |
171 | 'ti 0 | |
172 | 'nf | |
173 | .mk ^t | |
174 | .if \\n(^b \{\ | |
175 | .\" Draw three-sided box if this is the box's first page, | |
176 | .\" draw two sides but no top otherwise. | |
177 | .ie !\\n(^b-1 \h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c | |
178 | .el \h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c | |
179 | .\} | |
180 | .if \\n(^v \{\ | |
181 | .nr ^x \\n(^tu+1v-\\n(^Yu | |
182 | \kx\h'-\\nxu'\h'|\\n(^lu+3n'\ky\L'-\\n(^xu'\v'\\n(^xu'\h'|0u'\c | |
183 | .\} | |
184 | .bp | |
185 | 'fi | |
186 | .ev | |
187 | .if \\n(^b \{\ | |
188 | .mk ^y | |
189 | .nr ^b 2 | |
190 | .\} | |
191 | .if \\n(^v \{\ | |
192 | .mk ^Y | |
193 | .\} | |
194 | .. | |
195 | '\" # DS - begin display | |
196 | .de DS | |
197 | .RS | |
198 | .nf | |
199 | .sp | |
200 | .. | |
201 | '\" # DE - end display | |
202 | .de DE | |
203 | .fi | |
204 | .RE | |
205 | .sp | |
206 | .. | |
207 | '\" # SO - start of list of standard options | |
208 | .de SO | |
209 | .SH "STANDARD OPTIONS" | |
210 | .LP | |
211 | .nf | |
212 | .ta 5.5c 11c | |
213 | .ft B | |
214 | .. | |
215 | '\" # SE - end of list of standard options | |
216 | .de SE | |
217 | .fi | |
218 | .ft R | |
219 | .LP | |
220 | See the \\fBoptions\\fR manual entry for details on the standard options. | |
221 | .. | |
222 | '\" # OP - start of full description for a single option | |
223 | .de OP | |
224 | .LP | |
225 | .nf | |
226 | .ta 4c | |
227 | Command-Line Name: \\fB\\$1\\fR | |
228 | Database Name: \\fB\\$2\\fR | |
229 | Database Class: \\fB\\$3\\fR | |
230 | .fi | |
231 | .IP | |
232 | .. | |
233 | '\" # CS - begin code excerpt | |
234 | .de CS | |
235 | .RS | |
236 | .nf | |
237 | .ta .25i .5i .75i 1i | |
238 | .. | |
239 | '\" # CE - end code excerpt | |
240 | .de CE | |
241 | .fi | |
242 | .RE | |
243 | .. | |
244 | .de UL | |
245 | \\$1\l'|0\(ul'\\$2 | |
246 | .. | |
247 | .TH Tcl_RegExpMatch 3 8.1 Tcl "Tcl Library Procedures" | |
248 | .BS | |
249 | .SH NAME | |
250 | Tcl_RegExpMatch, Tcl_RegExpCompile, Tcl_RegExpExec, Tcl_RegExpRange, Tcl_GetRegExpFromObj, Tcl_RegExpMatchObj, Tcl_RegExpExecObj, Tcl_RegExpGetInfo \- Pattern matching with regular expressions | |
251 | .SH SYNOPSIS | |
252 | .nf | |
253 | \fB#include <tcl.h>\fR | |
254 | .sp | |
255 | int | |
256 | \fBTcl_RegExpMatchObj\fR(\fIinterp\fR, \fIstrObj\fR, \fIpatObj\fR) | |
257 | .sp | |
258 | int | |
259 | \fBTcl_RegExpMatch\fR(\fIinterp\fR, \fIstring\fR, \fIpattern\fR) | |
260 | .sp | |
261 | Tcl_RegExp | |
262 | \fBTcl_RegExpCompile\fR(\fIinterp\fR, \fIpattern\fR) | |
263 | .sp | |
264 | int | |
265 | \fBTcl_RegExpExec\fR(\fIinterp\fR, \fIregexp\fR, \fIstring\fR, \fIstart\fR) | |
266 | .sp | |
267 | \fBTcl_RegExpRange\fR(\fIregexp\fR, \fIindex\fR, \fIstartPtr\fR, \fIendPtr\fR) | |
268 | .VS 8.1 | |
269 | .sp | |
270 | Tcl_RegExp | |
271 | \fBTcl_GetRegExpFromObj\fR(\fIinterp\fR, \fIpatObj\fR, \fIcflags\fR) | |
272 | .sp | |
273 | int | |
274 | \fBTcl_RegExpExecObj\fR(\fIinterp\fR, \fIregexp\fR, \fIobjPtr\fR, \fIoffset\fR, \fInmatches\fR, \fIeflags\fR) | |
275 | .sp | |
276 | \fBTcl_RegExpGetInfo\fR(\fIregexp\fR, \fIinfoPtr\fR) | |
277 | .VE 8.1 | |
278 | ||
279 | .SH ARGUMENTS | |
280 | .AS Tcl_Interp *interp | |
281 | .AP Tcl_Interp *interp in | |
282 | Tcl interpreter to use for error reporting. The interpreter may be | |
283 | NULL if no error reporting is desired. | |
284 | .VS 8.1 | |
285 | .AP Tcl_Obj *strObj in/out | |
286 | Refers to the object from which to get the string to search. The | |
287 | internal representation of the object may be converted to a form that | |
288 | can be efficiently searched. | |
289 | .AP Tcl_Obj *patObj in/out | |
290 | Refers to the object from which to get a regular expression. The | |
291 | compiled regular expression is cached in the object. | |
292 | .VE 8.1 | |
293 | .AP char *string in | |
294 | String to check for a match with a regular expression. | |
295 | .AP "CONST char" *pattern in | |
296 | String in the form of a regular expression pattern. | |
297 | .AP Tcl_RegExp regexp in | |
298 | Compiled regular expression. Must have been returned previously | |
299 | by \fBTcl_GetRegExpFromObj\fR or \fBTcl_RegExpCompile\fR. | |
300 | .AP char *start in | |
301 | If \fIstring\fR is just a portion of some other string, this argument | |
302 | identifies the beginning of the larger string. | |
303 | If it isn't the same as \fIstring\fR, then no \fB^\fR matches | |
304 | will be allowed. | |
305 | .AP int index in | |
306 | Specifies which range is desired: 0 means the range of the entire | |
307 | match, 1 or greater means the range that matched a parenthesized | |
308 | sub-expression. | |
309 | .VS 8.4 | |
310 | .AP "CONST char" **startPtr out | |
311 | The address of the first character in the range is stored here, or | |
312 | NULL if there is no such range. | |
313 | .AP "CONST char" **endPtr out | |
314 | The address of the character just after the last one in the range | |
315 | is stored here, or NULL if there is no such range. | |
316 | .VE 8.4 | |
317 | .VS 8.1 | |
318 | .AP int cflags in | |
319 | OR-ed combination of compilation flags. See below for more information. | |
320 | .AP Tcl_Obj *objPtr in/out | |
321 | An object which contains the string to check for a match with a | |
322 | regular expression. | |
323 | .AP int offset in | |
324 | The character offset into the string where matching should begin. | |
325 | The value of the offset has no impact on \fB^\fR matches. This | |
326 | behavior is controlled by \fIeflags\fR. | |
327 | .AP int nmatches in | |
328 | The number of matching subexpressions that should be remembered for | |
329 | later use. If this value is 0, then no subexpression match | |
330 | information will be computed. If the value is -1, then | |
331 | all of the matching subexpressions will be remembered. Any other | |
332 | value will be taken as the maximum number of subexpressions to | |
333 | remember. | |
334 | .AP int eflags in | |
335 | OR-ed combination of the values TCL_REG_NOTBOL and TCL_REG_NOTEOL. | |
336 | See below for more information. | |
337 | .AP Tcl_RegExpInfo *infoPtr out | |
338 | The address of the location where information about a previous match | |
339 | should be stored by \fBTcl_RegExpGetInfo\fR. | |
340 | .VE 8.1 | |
341 | .BE | |
342 | ||
343 | .SH DESCRIPTION | |
344 | .PP | |
345 | \fBTcl_RegExpMatch\fR determines whether its \fIpattern\fR argument | |
346 | matches \fIregexp\fR, where \fIregexp\fR is interpreted | |
347 | as a regular expression using the rules in the \fBre_syntax\fR | |
348 | reference page. | |
349 | If there is a match then \fBTcl_RegExpMatch\fR returns 1. | |
350 | If there is no match then \fBTcl_RegExpMatch\fR returns 0. | |
351 | If an error occurs in the matching process (e.g. \fIpattern\fR | |
352 | is not a valid regular expression) then \fBTcl_RegExpMatch\fR | |
353 | returns \-1 and leaves an error message in the interpreter result. | |
354 | .VS 8.1.2 | |
355 | \fBTcl_RegExpMatchObj\fR is similar to \fBTcl_RegExpMatch\fR except it | |
356 | operates on the Tcl objects \fIstrObj\fR and \fIpatObj\fR instead of | |
357 | UTF strings. | |
358 | \fBTcl_RegExpMatchObj\fR is generally more efficient than | |
359 | \fBTcl_RegExpMatch\fR, so it is the preferred interface. | |
360 | .VE 8.1.2 | |
361 | .PP | |
362 | \fBTcl_RegExpCompile\fR, \fBTcl_RegExpExec\fR, and \fBTcl_RegExpRange\fR | |
363 | provide lower-level access to the regular expression pattern matcher. | |
364 | \fBTcl_RegExpCompile\fR compiles a regular expression string into | |
365 | the internal form used for efficient pattern matching. | |
366 | The return value is a token for this compiled form, which can be | |
367 | used in subsequent calls to \fBTcl_RegExpExec\fR or \fBTcl_RegExpRange\fR. | |
368 | If an error occurs while compiling the regular expression then | |
369 | \fBTcl_RegExpCompile\fR returns NULL and leaves an error message | |
370 | in the interpreter result. | |
371 | Note: the return value from \fBTcl_RegExpCompile\fR is only valid | |
372 | up to the next call to \fBTcl_RegExpCompile\fR; it is not safe to | |
373 | retain these values for long periods of time. | |
374 | .PP | |
375 | \fBTcl_RegExpExec\fR executes the regular expression pattern matcher. | |
376 | It returns 1 if \fIstring\fR contains a range of characters that | |
377 | match \fIregexp\fR, 0 if no match is found, and | |
378 | \-1 if an error occurs. | |
379 | In the case of an error, \fBTcl_RegExpExec\fR leaves an error | |
380 | message in the interpreter result. | |
381 | When searching a string for multiple matches of a pattern, | |
382 | it is important to distinguish between the start of the original | |
383 | string and the start of the current search. | |
384 | For example, when searching for the second occurrence of a | |
385 | match, the \fIstring\fR argument might point to the character | |
386 | just after the first match; however, it is important for the | |
387 | pattern matcher to know that this is not the start of the entire string, | |
388 | so that it doesn't allow \fB^\fR atoms in the pattern to match. | |
389 | The \fIstart\fR argument provides this information by pointing | |
390 | to the start of the overall string containing \fIstring\fR. | |
391 | \fIStart\fR will be less than or equal to \fIstring\fR; if it | |
392 | is less than \fIstring\fR then no \fB^\fR matches will be allowed. | |
393 | .PP | |
394 | \fBTcl_RegExpRange\fR may be invoked after \fBTcl_RegExpExec\fR | |
395 | returns; it provides detailed information about what ranges of | |
396 | the string matched what parts of the pattern. | |
397 | \fBTcl_RegExpRange\fR returns a pair of pointers in \fI*startPtr\fR | |
398 | and \fI*endPtr\fR that identify a range of characters in | |
399 | the source string for the most recent call to \fBTcl_RegExpExec\fR. | |
400 | \fIIndex\fR indicates which of several ranges is desired: | |
401 | if \fIindex\fR is 0, information is returned about the overall range | |
402 | of characters that matched the entire pattern; otherwise, | |
403 | information is returned about the range of characters that matched the | |
404 | \fIindex\fR'th parenthesized subexpression within the pattern. | |
405 | If there is no range corresponding to \fIindex\fR then NULL | |
406 | is stored in \fI*startPtr\fR and \fI*endPtr\fR. | |
407 | .PP | |
408 | .VS 8.1 | |
409 | \fBTcl_GetRegExpFromObj\fR, \fBTcl_RegExpExecObj\fR, and | |
410 | \fBTcl_RegExpGetInfo\fR are object interfaces that provide the most | |
411 | direct control of Henry Spencer's regular expression library. For | |
412 | users that need to modify compilation and execution options directly, | |
413 | it is recommended that you use these interfaces instead of calling the | |
414 | internal regexp functions. These interfaces handle the details of UTF | |
415 | to Unicode translations as well as providing improved performance | |
416 | through caching in the pattern and string objects. | |
417 | .PP | |
418 | \fBTcl_GetRegExpFromObj\fR attempts to return a compiled regular | |
419 | expression from the \fIpatObj\fR. If the object does not already | |
420 | contain a compiled regular expression it will attempt to create one | |
421 | from the string in the object and assign it to the internal | |
422 | representation of the \fIpatObj\fR. The return value of this function | |
423 | is of type \fBTcl_RegExp\fR. The return value is a token for this | |
424 | compiled form, which can be used in subsequent calls to | |
425 | \fBTcl_RegExpExecObj\fR or \fBTcl_RegExpGetInfo\fR. If an error | |
426 | occurs while compiling the regular expression then | |
427 | \fBTcl_GetRegExpFromObj\fR returns NULL and leaves an error message in | |
428 | the interpreter result. The regular expression token can be used as | |
429 | long as the internal representation of \fIpatObj\fR refers to the | |
430 | compiled form. The \fIeflags\fR argument is a bitwise OR of | |
431 | zero or more of the following flags that control the compilation of | |
432 | \fIpatObj\fR: | |
433 | .RS 2 | |
434 | .TP | |
435 | \fBTCL_REG_ADVANCED\fR | |
436 | Compile advanced regular expressions (`AREs'). This mode corresponds to | |
437 | the normal regular expression syntax accepted by the Tcl regexp and | |
438 | regsub commands. | |
439 | .TP | |
440 | \fBTCL_REG_EXTENDED\fR | |
441 | Compile extended regular expressions (`EREs'). This mode corresponds | |
442 | to the regular expression syntax recognized by Tcl 8.0 and earlier | |
443 | versions. | |
444 | .TP | |
445 | \fBTCL_REG_BASIC\fR | |
446 | Compile basic regular expressions (`BREs'). This mode corresponds | |
447 | to the regular expression syntax recognized by common Unix utilities | |
448 | like \fBsed\fR and \fBgrep\fR. This is the default if no flags are | |
449 | specified. | |
450 | .TP | |
451 | \fBTCL_REG_EXPANDED\fR | |
452 | Compile the regular expression (basic, extended, or advanced) using an | |
453 | expanded syntax that allows comments and whitespace. This mode causes | |
454 | non-backslashed non-bracket-expression white | |
455 | space and #-to-end-of-line comments to be ignored. | |
456 | .TP | |
457 | \fBTCL_REG_QUOTE\fR | |
458 | Compile a literal string, with all characters treated as ordinary characters. | |
459 | .TP | |
460 | \fBTCL_REG_NOCASE\fR | |
461 | Compile for matching that ignores upper/lower case distinctions. | |
462 | .TP | |
463 | \fBTCL_REG_NEWLINE\fR | |
464 | Compile for newline-sensitive matching. By default, newline is a | |
465 | completely ordinary character with no special meaning in either | |
466 | regular expressions or strings. With this flag, `[^' bracket | |
467 | expressions and `.' never match newline, `^' matches an empty string | |
468 | after any newline in addition to its normal function, and `$' matches | |
469 | an empty string before any newline in addition to its normal function. | |
470 | \fBREG_NEWLINE\fR is the bitwise OR of \fBREG_NLSTOP\fR and | |
471 | \fBREG_NLANCH\fR. | |
472 | .TP | |
473 | \fBTCL_REG_NLSTOP\fR | |
474 | Compile for partial newline-sensitive matching, | |
475 | with the behavior of | |
476 | `[^' bracket expressions and `.' affected, | |
477 | but not the behavior of `^' and `$'. In this mode, `[^' bracket | |
478 | expressions and `.' never match newline. | |
479 | .TP | |
480 | \fBTCL_REG_NLANCH\fR | |
481 | Compile for inverse partial newline-sensitive matching, | |
482 | with the behavior of | |
483 | of `^' and `$' (the ``anchors'') affected, but not the behavior of | |
484 | `[^' bracket expressions and `.'. In this mode `^' matches an empty string | |
485 | after any newline in addition to its normal function, and `$' matches | |
486 | an empty string before any newline in addition to its normal function. | |
487 | .TP | |
488 | \fBTCL_REG_NOSUB\fR | |
489 | Compile for matching that reports only success or failure, | |
490 | not what was matched. This reduces compile overhead and may improve | |
491 | performance. Subsequent calls to \fBTcl_RegExpGetInfo\fR or | |
492 | \fBTcl_RegExpRange\fR will not report any match information. | |
493 | .TP | |
494 | \fBTCL_REG_CANMATCH\fR | |
495 | Compile for matching that reports the potential to complete a partial | |
496 | match given more text (see below). | |
497 | .RE | |
498 | .PP | |
499 | Only one of | |
500 | \fBTCL_REG_EXTENDED\fR, | |
501 | \fBTCL_REG_ADVANCED\fR, | |
502 | \fBTCL_REG_BASIC\fR, and | |
503 | \fBTCL_REG_QUOTE\fR may be specified. | |
504 | .PP | |
505 | \fBTcl_RegExpExecObj\fR executes the regular expression pattern | |
506 | matcher. It returns 1 if \fIobjPtr\fR contains a range of characters | |
507 | that match \fIregexp\fR, 0 if no match is found, and \-1 if an error | |
508 | occurs. In the case of an error, \fBTcl_RegExpExecObj\fR leaves an | |
509 | error message in the interpreter result. The \fInmatches\fR value | |
510 | indicates to the matcher how many subexpressions are of interest. If | |
511 | \fInmatches\fR is 0, then no subexpression match information is | |
512 | recorded, which may allow the matcher to make various optimizations. | |
513 | If the value is -1, then all of the subexpressions in the pattern are | |
514 | remembered. If the value is a positive integer, then only that number | |
515 | of subexpressions will be remembered. Matching begins at the | |
516 | specified Unicode character index given by \fIoffset\fR. Unlike | |
517 | \fBTcl_RegExpExec\fR, the behavior of anchors is not affected by the | |
518 | offset value. Instead the behavior of the anchors is explicitly | |
519 | controlled by the \fIeflags\fR argument, which is a bitwise OR of | |
520 | zero or more of the following flags: | |
521 | .RS 2 | |
522 | .TP | |
523 | \fBTCL_REG_NOTBOL\fR | |
524 | The starting character will not be treated as the beginning of a | |
525 | line or the beginning of the string, so `^' will not match there. | |
526 | Note that this flag has no effect on how `\fB\eA\fR' matches. | |
527 | .TP | |
528 | \fBTCL_REG_NOTEOL\fR | |
529 | The last character in the string will not be treated as the end of a | |
530 | line or the end of the string, so '$' will not match there. | |
531 | Note that this flag has no effect on how `\fB\eZ\fR' matches. | |
532 | .RE | |
533 | .PP | |
534 | \fBTcl_RegExpGetInfo\fR retrieves information about the last match | |
535 | performed with a given regular expression \fIregexp\fR. The | |
536 | \fIinfoPtr\fR argument contains a pointer to a structure that is | |
537 | defined as follows: | |
538 | .PP | |
539 | .CS | |
540 | typedef struct Tcl_RegExpInfo { | |
541 | int \fInsubs\fR; | |
542 | Tcl_RegExpIndices *\fImatches\fR; | |
543 | long \fIextendStart\fR; | |
544 | } Tcl_RegExpInfo; | |
545 | .CE | |
546 | .PP | |
547 | The \fInsubs\fR field contains a count of the number of parenthesized | |
548 | subexpressions within the regular expression. If the \fBTCL_REG_NOSUB\fR | |
549 | was used, then this value will be zero. The \fImatches\fR field | |
550 | points to an array of \fInsubs\fR values that indicate the bounds of each | |
551 | subexpression matched. The first element in the array refers to the | |
552 | range matched by the entire regular expression, and subsequent elements | |
553 | refer to the parenthesized subexpressions in the order that they | |
554 | appear in the pattern. Each element is a structure that is defined as | |
555 | follows: | |
556 | .PP | |
557 | .CS | |
558 | typedef struct Tcl_RegExpIndices { | |
559 | long \fIstart\fR; | |
560 | long \fIend\fR; | |
561 | } Tcl_RegExpIndices; | |
562 | .CE | |
563 | .PP | |
564 | The \fIstart\fR and \fIend\fR values are Unicode character indices | |
565 | relative to the offset location within \fIobjPtr\fR where matching began. | |
566 | The \fIstart\fR index identifies the first character of the matched | |
567 | subexpression. The \fIend\fR index identifies the first character | |
568 | after the matched subexpression. If the subexpression matched the | |
569 | empty string, then \fIstart\fR and \fIend\fR will be equal. If the | |
570 | subexpression did not participate in the match, then \fIstart\fR and | |
571 | \fIend\fR will be set to -1. | |
572 | .PP | |
573 | The \fIextendStart\fR field in \fBTcl_RegExpInfo\fR is only set if the | |
574 | \fBTCL_REG_CANMATCH\fR flag was used. It indicates the first | |
575 | character in the string where a match could occur. If a match was | |
576 | found, this will be the same as the beginning of the current match. | |
577 | If no match was found, then it indicates the earliest point at which a | |
578 | match might occur if additional text is appended to the string. If it | |
579 | is no match is possible even with further text, this field will be set | |
580 | to -1. | |
581 | .VE 8.1 | |
582 | .SH "SEE ALSO" | |
583 | re_syntax(n) | |
584 | .SH KEYWORDS | |
585 | match, pattern, regular expression, string, subexpression, Tcl_RegExpIndices, Tcl_RegExpInfo |