Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / devtools / amd64 / man / man3 / Tcl_RegExpExec.3
CommitLineData
920dae64
AT
1'\"
2'\" Copyright (c) 1994 The Regents of the University of California.
3'\" Copyright (c) 1994-1996 Sun Microsystems, Inc.
4'\" Copyright (c) 1998-1999 Scriptics Corporation
5'\"
6'\" See the file "license.terms" for information on usage and redistribution
7'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
8'\"
9'\" RCS: @(#) $Id: RegExp.3,v 1.13 2002/11/13 22:11:40 vincentdarley Exp $
10'\"
11'\" The definitions below are for supplemental macros used in Tcl/Tk
12'\" manual entries.
13'\"
14'\" .AP type name in/out ?indent?
15'\" Start paragraph describing an argument to a library procedure.
16'\" type is type of argument (int, etc.), in/out is either "in", "out",
17'\" or "in/out" to describe whether procedure reads or modifies arg,
18'\" and indent is equivalent to second arg of .IP (shouldn't ever be
19'\" needed; use .AS below instead)
20'\"
21'\" .AS ?type? ?name?
22'\" Give maximum sizes of arguments for setting tab stops. Type and
23'\" name are examples of largest possible arguments that will be passed
24'\" to .AP later. If args are omitted, default tab stops are used.
25'\"
26'\" .BS
27'\" Start box enclosure. From here until next .BE, everything will be
28'\" enclosed in one large box.
29'\"
30'\" .BE
31'\" End of box enclosure.
32'\"
33'\" .CS
34'\" Begin code excerpt.
35'\"
36'\" .CE
37'\" End code excerpt.
38'\"
39'\" .VS ?version? ?br?
40'\" Begin vertical sidebar, for use in marking newly-changed parts
41'\" of man pages. The first argument is ignored and used for recording
42'\" the version when the .VS was added, so that the sidebars can be
43'\" found and removed when they reach a certain age. If another argument
44'\" is present, then a line break is forced before starting the sidebar.
45'\"
46'\" .VE
47'\" End of vertical sidebar.
48'\"
49'\" .DS
50'\" Begin an indented unfilled display.
51'\"
52'\" .DE
53'\" End of indented unfilled display.
54'\"
55'\" .SO
56'\" Start of list of standard options for a Tk widget. The
57'\" options follow on successive lines, in four columns separated
58'\" by tabs.
59'\"
60'\" .SE
61'\" End of list of standard options for a Tk widget.
62'\"
63'\" .OP cmdName dbName dbClass
64'\" Start of description of a specific option. cmdName gives the
65'\" option's name as specified in the class command, dbName gives
66'\" the option's name in the option database, and dbClass gives
67'\" the option's class in the option database.
68'\"
69'\" .UL arg1 arg2
70'\" Print arg1 underlined, then print arg2 normally.
71'\"
72'\" RCS: @(#) $Id: man.macros,v 1.4 2000/08/25 06:18:32 ericm Exp $
73'\"
74'\" # Set up traps and other miscellaneous stuff for Tcl/Tk man pages.
75.if t .wh -1.3i ^B
76.nr ^l \n(.l
77.ad b
78'\" # Start an argument description
79.de AP
80.ie !"\\$4"" .TP \\$4
81.el \{\
82. ie !"\\$2"" .TP \\n()Cu
83. el .TP 15
84.\}
85.ta \\n()Au \\n()Bu
86.ie !"\\$3"" \{\
87\&\\$1 \\fI\\$2\\fP (\\$3)
88.\".b
89.\}
90.el \{\
91.br
92.ie !"\\$2"" \{\
93\&\\$1 \\fI\\$2\\fP
94.\}
95.el \{\
96\&\\fI\\$1\\fP
97.\}
98.\}
99..
100'\" # define tabbing values for .AP
101.de AS
102.nr )A 10n
103.if !"\\$1"" .nr )A \\w'\\$1'u+3n
104.nr )B \\n()Au+15n
105.\"
106.if !"\\$2"" .nr )B \\w'\\$2'u+\\n()Au+3n
107.nr )C \\n()Bu+\\w'(in/out)'u+2n
108..
109.AS Tcl_Interp Tcl_CreateInterp in/out
110'\" # BS - start boxed text
111'\" # ^y = starting y location
112'\" # ^b = 1
113.de BS
114.br
115.mk ^y
116.nr ^b 1u
117.if n .nf
118.if n .ti 0
119.if n \l'\\n(.lu\(ul'
120.if n .fi
121..
122'\" # BE - end boxed text (draw box now)
123.de BE
124.nf
125.ti 0
126.mk ^t
127.ie n \l'\\n(^lu\(ul'
128.el \{\
129.\" Draw four-sided box normally, but don't draw top of
130.\" box if the box started on an earlier page.
131.ie !\\n(^b-1 \{\
132\h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul'
133.\}
134.el \}\
135\h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul'
136.\}
137.\}
138.fi
139.br
140.nr ^b 0
141..
142'\" # VS - start vertical sidebar
143'\" # ^Y = starting y location
144'\" # ^v = 1 (for troff; for nroff this doesn't matter)
145.de VS
146.if !"\\$2"" .br
147.mk ^Y
148.ie n 'mc \s12\(br\s0
149.el .nr ^v 1u
150..
151'\" # VE - end of vertical sidebar
152.de VE
153.ie n 'mc
154.el \{\
155.ev 2
156.nf
157.ti 0
158.mk ^t
159\h'|\\n(^lu+3n'\L'|\\n(^Yu-1v\(bv'\v'\\n(^tu+1v-\\n(^Yu'\h'-|\\n(^lu+3n'
160.sp -1
161.fi
162.ev
163.\}
164.nr ^v 0
165..
166'\" # Special macro to handle page bottom: finish off current
167'\" # box/sidebar if in box/sidebar mode, then invoked standard
168'\" # page bottom macro.
169.de ^B
170.ev 2
171'ti 0
172'nf
173.mk ^t
174.if \\n(^b \{\
175.\" Draw three-sided box if this is the box's first page,
176.\" draw two sides but no top otherwise.
177.ie !\\n(^b-1 \h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c
178.el \h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c
179.\}
180.if \\n(^v \{\
181.nr ^x \\n(^tu+1v-\\n(^Yu
182\kx\h'-\\nxu'\h'|\\n(^lu+3n'\ky\L'-\\n(^xu'\v'\\n(^xu'\h'|0u'\c
183.\}
184.bp
185'fi
186.ev
187.if \\n(^b \{\
188.mk ^y
189.nr ^b 2
190.\}
191.if \\n(^v \{\
192.mk ^Y
193.\}
194..
195'\" # DS - begin display
196.de DS
197.RS
198.nf
199.sp
200..
201'\" # DE - end display
202.de DE
203.fi
204.RE
205.sp
206..
207'\" # SO - start of list of standard options
208.de SO
209.SH "STANDARD OPTIONS"
210.LP
211.nf
212.ta 5.5c 11c
213.ft B
214..
215'\" # SE - end of list of standard options
216.de SE
217.fi
218.ft R
219.LP
220See the \\fBoptions\\fR manual entry for details on the standard options.
221..
222'\" # OP - start of full description for a single option
223.de OP
224.LP
225.nf
226.ta 4c
227Command-Line Name: \\fB\\$1\\fR
228Database Name: \\fB\\$2\\fR
229Database Class: \\fB\\$3\\fR
230.fi
231.IP
232..
233'\" # CS - begin code excerpt
234.de CS
235.RS
236.nf
237.ta .25i .5i .75i 1i
238..
239'\" # CE - end code excerpt
240.de CE
241.fi
242.RE
243..
244.de UL
245\\$1\l'|0\(ul'\\$2
246..
247.TH Tcl_RegExpMatch 3 8.1 Tcl "Tcl Library Procedures"
248.BS
249.SH NAME
250Tcl_RegExpMatch, Tcl_RegExpCompile, Tcl_RegExpExec, Tcl_RegExpRange, Tcl_GetRegExpFromObj, Tcl_RegExpMatchObj, Tcl_RegExpExecObj, Tcl_RegExpGetInfo \- Pattern matching with regular expressions
251.SH SYNOPSIS
252.nf
253\fB#include <tcl.h>\fR
254.sp
255int
256\fBTcl_RegExpMatchObj\fR(\fIinterp\fR, \fIstrObj\fR, \fIpatObj\fR)
257.sp
258int
259\fBTcl_RegExpMatch\fR(\fIinterp\fR, \fIstring\fR, \fIpattern\fR)
260.sp
261Tcl_RegExp
262\fBTcl_RegExpCompile\fR(\fIinterp\fR, \fIpattern\fR)
263.sp
264int
265\fBTcl_RegExpExec\fR(\fIinterp\fR, \fIregexp\fR, \fIstring\fR, \fIstart\fR)
266.sp
267\fBTcl_RegExpRange\fR(\fIregexp\fR, \fIindex\fR, \fIstartPtr\fR, \fIendPtr\fR)
268.VS 8.1
269.sp
270Tcl_RegExp
271\fBTcl_GetRegExpFromObj\fR(\fIinterp\fR, \fIpatObj\fR, \fIcflags\fR)
272.sp
273int
274\fBTcl_RegExpExecObj\fR(\fIinterp\fR, \fIregexp\fR, \fIobjPtr\fR, \fIoffset\fR, \fInmatches\fR, \fIeflags\fR)
275.sp
276\fBTcl_RegExpGetInfo\fR(\fIregexp\fR, \fIinfoPtr\fR)
277.VE 8.1
278
279.SH ARGUMENTS
280.AS Tcl_Interp *interp
281.AP Tcl_Interp *interp in
282Tcl interpreter to use for error reporting. The interpreter may be
283NULL if no error reporting is desired.
284.VS 8.1
285.AP Tcl_Obj *strObj in/out
286Refers to the object from which to get the string to search. The
287internal representation of the object may be converted to a form that
288can be efficiently searched.
289.AP Tcl_Obj *patObj in/out
290Refers to the object from which to get a regular expression. The
291compiled regular expression is cached in the object.
292.VE 8.1
293.AP char *string in
294String to check for a match with a regular expression.
295.AP "CONST char" *pattern in
296String in the form of a regular expression pattern.
297.AP Tcl_RegExp regexp in
298Compiled regular expression. Must have been returned previously
299by \fBTcl_GetRegExpFromObj\fR or \fBTcl_RegExpCompile\fR.
300.AP char *start in
301If \fIstring\fR is just a portion of some other string, this argument
302identifies the beginning of the larger string.
303If it isn't the same as \fIstring\fR, then no \fB^\fR matches
304will be allowed.
305.AP int index in
306Specifies which range is desired: 0 means the range of the entire
307match, 1 or greater means the range that matched a parenthesized
308sub-expression.
309.VS 8.4
310.AP "CONST char" **startPtr out
311The address of the first character in the range is stored here, or
312NULL if there is no such range.
313.AP "CONST char" **endPtr out
314The address of the character just after the last one in the range
315is stored here, or NULL if there is no such range.
316.VE 8.4
317.VS 8.1
318.AP int cflags in
319OR-ed combination of compilation flags. See below for more information.
320.AP Tcl_Obj *objPtr in/out
321An object which contains the string to check for a match with a
322regular expression.
323.AP int offset in
324The character offset into the string where matching should begin.
325The value of the offset has no impact on \fB^\fR matches. This
326behavior is controlled by \fIeflags\fR.
327.AP int nmatches in
328The number of matching subexpressions that should be remembered for
329later use. If this value is 0, then no subexpression match
330information will be computed. If the value is -1, then
331all of the matching subexpressions will be remembered. Any other
332value will be taken as the maximum number of subexpressions to
333remember.
334.AP int eflags in
335OR-ed combination of the values TCL_REG_NOTBOL and TCL_REG_NOTEOL.
336See below for more information.
337.AP Tcl_RegExpInfo *infoPtr out
338The address of the location where information about a previous match
339should be stored by \fBTcl_RegExpGetInfo\fR.
340.VE 8.1
341.BE
342
343.SH DESCRIPTION
344.PP
345\fBTcl_RegExpMatch\fR determines whether its \fIpattern\fR argument
346matches \fIregexp\fR, where \fIregexp\fR is interpreted
347as a regular expression using the rules in the \fBre_syntax\fR
348reference page.
349If there is a match then \fBTcl_RegExpMatch\fR returns 1.
350If there is no match then \fBTcl_RegExpMatch\fR returns 0.
351If an error occurs in the matching process (e.g. \fIpattern\fR
352is not a valid regular expression) then \fBTcl_RegExpMatch\fR
353returns \-1 and leaves an error message in the interpreter result.
354.VS 8.1.2
355\fBTcl_RegExpMatchObj\fR is similar to \fBTcl_RegExpMatch\fR except it
356operates on the Tcl objects \fIstrObj\fR and \fIpatObj\fR instead of
357UTF strings.
358\fBTcl_RegExpMatchObj\fR is generally more efficient than
359\fBTcl_RegExpMatch\fR, so it is the preferred interface.
360.VE 8.1.2
361.PP
362\fBTcl_RegExpCompile\fR, \fBTcl_RegExpExec\fR, and \fBTcl_RegExpRange\fR
363provide lower-level access to the regular expression pattern matcher.
364\fBTcl_RegExpCompile\fR compiles a regular expression string into
365the internal form used for efficient pattern matching.
366The return value is a token for this compiled form, which can be
367used in subsequent calls to \fBTcl_RegExpExec\fR or \fBTcl_RegExpRange\fR.
368If an error occurs while compiling the regular expression then
369\fBTcl_RegExpCompile\fR returns NULL and leaves an error message
370in the interpreter result.
371Note: the return value from \fBTcl_RegExpCompile\fR is only valid
372up to the next call to \fBTcl_RegExpCompile\fR; it is not safe to
373retain these values for long periods of time.
374.PP
375\fBTcl_RegExpExec\fR executes the regular expression pattern matcher.
376It returns 1 if \fIstring\fR contains a range of characters that
377match \fIregexp\fR, 0 if no match is found, and
378\-1 if an error occurs.
379In the case of an error, \fBTcl_RegExpExec\fR leaves an error
380message in the interpreter result.
381When searching a string for multiple matches of a pattern,
382it is important to distinguish between the start of the original
383string and the start of the current search.
384For example, when searching for the second occurrence of a
385match, the \fIstring\fR argument might point to the character
386just after the first match; however, it is important for the
387pattern matcher to know that this is not the start of the entire string,
388so that it doesn't allow \fB^\fR atoms in the pattern to match.
389The \fIstart\fR argument provides this information by pointing
390to the start of the overall string containing \fIstring\fR.
391\fIStart\fR will be less than or equal to \fIstring\fR; if it
392is less than \fIstring\fR then no \fB^\fR matches will be allowed.
393.PP
394\fBTcl_RegExpRange\fR may be invoked after \fBTcl_RegExpExec\fR
395returns; it provides detailed information about what ranges of
396the string matched what parts of the pattern.
397\fBTcl_RegExpRange\fR returns a pair of pointers in \fI*startPtr\fR
398and \fI*endPtr\fR that identify a range of characters in
399the source string for the most recent call to \fBTcl_RegExpExec\fR.
400\fIIndex\fR indicates which of several ranges is desired:
401if \fIindex\fR is 0, information is returned about the overall range
402of characters that matched the entire pattern; otherwise,
403information is returned about the range of characters that matched the
404\fIindex\fR'th parenthesized subexpression within the pattern.
405If there is no range corresponding to \fIindex\fR then NULL
406is stored in \fI*startPtr\fR and \fI*endPtr\fR.
407.PP
408.VS 8.1
409\fBTcl_GetRegExpFromObj\fR, \fBTcl_RegExpExecObj\fR, and
410\fBTcl_RegExpGetInfo\fR are object interfaces that provide the most
411direct control of Henry Spencer's regular expression library. For
412users that need to modify compilation and execution options directly,
413it is recommended that you use these interfaces instead of calling the
414internal regexp functions. These interfaces handle the details of UTF
415to Unicode translations as well as providing improved performance
416through caching in the pattern and string objects.
417.PP
418\fBTcl_GetRegExpFromObj\fR attempts to return a compiled regular
419expression from the \fIpatObj\fR. If the object does not already
420contain a compiled regular expression it will attempt to create one
421from the string in the object and assign it to the internal
422representation of the \fIpatObj\fR. The return value of this function
423is of type \fBTcl_RegExp\fR. The return value is a token for this
424compiled form, which can be used in subsequent calls to
425\fBTcl_RegExpExecObj\fR or \fBTcl_RegExpGetInfo\fR. If an error
426occurs while compiling the regular expression then
427\fBTcl_GetRegExpFromObj\fR returns NULL and leaves an error message in
428the interpreter result. The regular expression token can be used as
429long as the internal representation of \fIpatObj\fR refers to the
430compiled form. The \fIeflags\fR argument is a bitwise OR of
431zero or more of the following flags that control the compilation of
432\fIpatObj\fR:
433.RS 2
434.TP
435\fBTCL_REG_ADVANCED\fR
436Compile advanced regular expressions (`AREs'). This mode corresponds to
437the normal regular expression syntax accepted by the Tcl regexp and
438regsub commands.
439.TP
440\fBTCL_REG_EXTENDED\fR
441Compile extended regular expressions (`EREs'). This mode corresponds
442to the regular expression syntax recognized by Tcl 8.0 and earlier
443versions.
444.TP
445\fBTCL_REG_BASIC\fR
446Compile basic regular expressions (`BREs'). This mode corresponds
447to the regular expression syntax recognized by common Unix utilities
448like \fBsed\fR and \fBgrep\fR. This is the default if no flags are
449specified.
450.TP
451\fBTCL_REG_EXPANDED\fR
452Compile the regular expression (basic, extended, or advanced) using an
453expanded syntax that allows comments and whitespace. This mode causes
454non-backslashed non-bracket-expression white
455space and #-to-end-of-line comments to be ignored.
456.TP
457\fBTCL_REG_QUOTE\fR
458Compile a literal string, with all characters treated as ordinary characters.
459.TP
460\fBTCL_REG_NOCASE\fR
461Compile for matching that ignores upper/lower case distinctions.
462.TP
463\fBTCL_REG_NEWLINE\fR
464Compile for newline-sensitive matching. By default, newline is a
465completely ordinary character with no special meaning in either
466regular expressions or strings. With this flag, `[^' bracket
467expressions and `.' never match newline, `^' matches an empty string
468after any newline in addition to its normal function, and `$' matches
469an empty string before any newline in addition to its normal function.
470\fBREG_NEWLINE\fR is the bitwise OR of \fBREG_NLSTOP\fR and
471\fBREG_NLANCH\fR.
472.TP
473\fBTCL_REG_NLSTOP\fR
474Compile for partial newline-sensitive matching,
475with the behavior of
476`[^' bracket expressions and `.' affected,
477but not the behavior of `^' and `$'. In this mode, `[^' bracket
478expressions and `.' never match newline.
479.TP
480\fBTCL_REG_NLANCH\fR
481Compile for inverse partial newline-sensitive matching,
482with the behavior of
483of `^' and `$' (the ``anchors'') affected, but not the behavior of
484`[^' bracket expressions and `.'. In this mode `^' matches an empty string
485after any newline in addition to its normal function, and `$' matches
486an empty string before any newline in addition to its normal function.
487.TP
488\fBTCL_REG_NOSUB\fR
489Compile for matching that reports only success or failure,
490not what was matched. This reduces compile overhead and may improve
491performance. Subsequent calls to \fBTcl_RegExpGetInfo\fR or
492\fBTcl_RegExpRange\fR will not report any match information.
493.TP
494\fBTCL_REG_CANMATCH\fR
495Compile for matching that reports the potential to complete a partial
496match given more text (see below).
497.RE
498.PP
499Only one of
500\fBTCL_REG_EXTENDED\fR,
501\fBTCL_REG_ADVANCED\fR,
502\fBTCL_REG_BASIC\fR, and
503\fBTCL_REG_QUOTE\fR may be specified.
504.PP
505\fBTcl_RegExpExecObj\fR executes the regular expression pattern
506matcher. It returns 1 if \fIobjPtr\fR contains a range of characters
507that match \fIregexp\fR, 0 if no match is found, and \-1 if an error
508occurs. In the case of an error, \fBTcl_RegExpExecObj\fR leaves an
509error message in the interpreter result. The \fInmatches\fR value
510indicates to the matcher how many subexpressions are of interest. If
511\fInmatches\fR is 0, then no subexpression match information is
512recorded, which may allow the matcher to make various optimizations.
513If the value is -1, then all of the subexpressions in the pattern are
514remembered. If the value is a positive integer, then only that number
515of subexpressions will be remembered. Matching begins at the
516specified Unicode character index given by \fIoffset\fR. Unlike
517\fBTcl_RegExpExec\fR, the behavior of anchors is not affected by the
518offset value. Instead the behavior of the anchors is explicitly
519controlled by the \fIeflags\fR argument, which is a bitwise OR of
520zero or more of the following flags:
521.RS 2
522.TP
523\fBTCL_REG_NOTBOL\fR
524The starting character will not be treated as the beginning of a
525line or the beginning of the string, so `^' will not match there.
526Note that this flag has no effect on how `\fB\eA\fR' matches.
527.TP
528\fBTCL_REG_NOTEOL\fR
529The last character in the string will not be treated as the end of a
530line or the end of the string, so '$' will not match there.
531Note that this flag has no effect on how `\fB\eZ\fR' matches.
532.RE
533.PP
534\fBTcl_RegExpGetInfo\fR retrieves information about the last match
535performed with a given regular expression \fIregexp\fR. The
536\fIinfoPtr\fR argument contains a pointer to a structure that is
537defined as follows:
538.PP
539.CS
540typedef struct Tcl_RegExpInfo {
541 int \fInsubs\fR;
542 Tcl_RegExpIndices *\fImatches\fR;
543 long \fIextendStart\fR;
544} Tcl_RegExpInfo;
545.CE
546.PP
547The \fInsubs\fR field contains a count of the number of parenthesized
548subexpressions within the regular expression. If the \fBTCL_REG_NOSUB\fR
549was used, then this value will be zero. The \fImatches\fR field
550points to an array of \fInsubs\fR values that indicate the bounds of each
551subexpression matched. The first element in the array refers to the
552range matched by the entire regular expression, and subsequent elements
553refer to the parenthesized subexpressions in the order that they
554appear in the pattern. Each element is a structure that is defined as
555follows:
556.PP
557.CS
558typedef struct Tcl_RegExpIndices {
559 long \fIstart\fR;
560 long \fIend\fR;
561} Tcl_RegExpIndices;
562.CE
563.PP
564The \fIstart\fR and \fIend\fR values are Unicode character indices
565relative to the offset location within \fIobjPtr\fR where matching began.
566The \fIstart\fR index identifies the first character of the matched
567subexpression. The \fIend\fR index identifies the first character
568after the matched subexpression. If the subexpression matched the
569empty string, then \fIstart\fR and \fIend\fR will be equal. If the
570subexpression did not participate in the match, then \fIstart\fR and
571\fIend\fR will be set to -1.
572.PP
573The \fIextendStart\fR field in \fBTcl_RegExpInfo\fR is only set if the
574\fBTCL_REG_CANMATCH\fR flag was used. It indicates the first
575character in the string where a match could occur. If a match was
576found, this will be the same as the beginning of the current match.
577If no match was found, then it indicates the earliest point at which a
578match might occur if additional text is appended to the string. If it
579is no match is possible even with further text, this field will be set
580to -1.
581.VE 8.1
582.SH "SEE ALSO"
583re_syntax(n)
584.SH KEYWORDS
585match, pattern, regular expression, string, subexpression, Tcl_RegExpIndices, Tcl_RegExpInfo