git.subgeniuskitty.com - OpenSPARC-T2-SAM/.git/blame_incremental

... / ...

Commit	Line	Data
	1	'\"
	2	'\" Copyright (c) 1998 Sun Microsystems, Inc.
	3	'\" Copyright (c) 1999 Scriptics Corporation
	4	'\"
	5	'\" See the file "license.terms" for information on usage and redistribution
	6	'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
	7	'\"
	8	'\" RCS: @(#) $Id: re_syntax.n,v 1.3 1999/07/14 19:09:36 jpeek Exp $
	9	'\"
	10	'\" The definitions below are for supplemental macros used in Tcl/Tk
	11	'\" manual entries.
	12	'\"
	13	'\" .AP type name in/out ?indent?
	14	'\" Start paragraph describing an argument to a library procedure.
	15	'\" type is type of argument (int, etc.), in/out is either "in", "out",
	16	'\" or "in/out" to describe whether procedure reads or modifies arg,
	17	'\" and indent is equivalent to second arg of .IP (shouldn't ever be
	18	'\" needed; use .AS below instead)
	19	'\"
	20	'\" .AS ?type? ?name?
	21	'\" Give maximum sizes of arguments for setting tab stops. Type and
	22	'\" name are examples of largest possible arguments that will be passed
	23	'\" to .AP later. If args are omitted, default tab stops are used.
	24	'\"
	25	'\" .BS
	26	'\" Start box enclosure. From here until next .BE, everything will be
	27	'\" enclosed in one large box.
	28	'\"
	29	'\" .BE
	30	'\" End of box enclosure.
	31	'\"
	32	'\" .CS
	33	'\" Begin code excerpt.
	34	'\"
	35	'\" .CE
	36	'\" End code excerpt.
	37	'\"
	38	'\" .VS ?version? ?br?
	39	'\" Begin vertical sidebar, for use in marking newly-changed parts
	40	'\" of man pages. The first argument is ignored and used for recording
	41	'\" the version when the .VS was added, so that the sidebars can be
	42	'\" found and removed when they reach a certain age. If another argument
	43	'\" is present, then a line break is forced before starting the sidebar.
	44	'\"
	45	'\" .VE
	46	'\" End of vertical sidebar.
	47	'\"
	48	'\" .DS
	49	'\" Begin an indented unfilled display.
	50	'\"
	51	'\" .DE
	52	'\" End of indented unfilled display.
	53	'\"
	54	'\" .SO
	55	'\" Start of list of standard options for a Tk widget. The
	56	'\" options follow on successive lines, in four columns separated
	57	'\" by tabs.
	58	'\"
	59	'\" .SE
	60	'\" End of list of standard options for a Tk widget.
	61	'\"
	62	'\" .OP cmdName dbName dbClass
	63	'\" Start of description of a specific option. cmdName gives the
	64	'\" option's name as specified in the class command, dbName gives
	65	'\" the option's name in the option database, and dbClass gives
	66	'\" the option's class in the option database.
	67	'\"
	68	'\" .UL arg1 arg2
	69	'\" Print arg1 underlined, then print arg2 normally.
	70	'\"
	71	'\" RCS: @(#) $Id: man.macros,v 1.4 2000/08/25 06:18:32 ericm Exp $
	72	'\"
	73	'\" # Set up traps and other miscellaneous stuff for Tcl/Tk man pages.
	74	.if t .wh -1.3i ^B
	75	.nr ^l \n(.l
	76	.ad b
	77	'\" # Start an argument description
	78	.de AP
	79	.ie !"\\$4"" .TP \\$4
	80	.el \{\
	81	. ie !"\\$2"" .TP \\n()Cu
	82	. el .TP 15
	83	.\}
	84	.ta \\n()Au \\n()Bu
	85	.ie !"\\$3"" \{\
	86	\&\\$1 \\fI\\$2\\fP (\\$3)
	87	.\".b
	88	.\}
	89	.el \{\
	90	.br
	91	.ie !"\\$2"" \{\
	92	\&\\$1 \\fI\\$2\\fP
	93	.\}
	94	.el \{\
	95	\&\\fI\\$1\\fP
	96	.\}
	97	.\}
	98	..
	99	'\" # define tabbing values for .AP
	100	.de AS
	101	.nr )A 10n
	102	.if !"\\$1"" .nr )A \\w'\\$1'u+3n
	103	.nr )B \\n()Au+15n
	104	.\"
	105	.if !"\\$2"" .nr )B \\w'\\$2'u+\\n()Au+3n
	106	.nr )C \\n()Bu+\\w'(in/out)'u+2n
	107	..
	108	.AS Tcl_Interp Tcl_CreateInterp in/out
	109	'\" # BS - start boxed text
	110	'\" # ^y = starting y location
	111	'\" # ^b = 1
	112	.de BS
	113	.br
	114	.mk ^y
	115	.nr ^b 1u
	116	.if n .nf
	117	.if n .ti 0
	118	.if n \l'\\n(.lu\(ul'
	119	.if n .fi
	120	..
	121	'\" # BE - end boxed text (draw box now)
	122	.de BE
	123	.nf
	124	.ti 0
	125	.mk ^t
	126	.ie n \l'\\n(^lu\(ul'
	127	.el \{\
	128	.\" Draw four-sided box normally, but don't draw top of
	129	.\" box if the box started on an earlier page.
	130	.ie !\\n(^b-1 \{\
	131	\h'-1.5n'\L'\|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\l'\|0u-1.5n\(ul'
	132	.\}
	133	.el \}\
	134	\h'-1.5n'\L'\|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\l'\|0u-1.5n\(ul'
	135	.\}
	136	.\}
	137	.fi
	138	.br
	139	.nr ^b 0
	140	..
	141	'\" # VS - start vertical sidebar
	142	'\" # ^Y = starting y location
	143	'\" # ^v = 1 (for troff; for nroff this doesn't matter)
	144	.de VS
	145	.if !"\\$2"" .br
	146	.mk ^Y
	147	.ie n 'mc \s12\(br\s0
	148	.el .nr ^v 1u
	149	..
	150	'\" # VE - end of vertical sidebar
	151	.de VE
	152	.ie n 'mc
	153	.el \{\
	154	.ev 2
	155	.nf
	156	.ti 0
	157	.mk ^t
	158	\h'\|\\n(^lu+3n'\L'\|\\n(^Yu-1v\(bv'\v'\\n(^tu+1v-\\n(^Yu'\h'-\|\\n(^lu+3n'
	159	.sp -1
	160	.fi
	161	.ev
	162	.\}
	163	.nr ^v 0
	164	..
	165	'\" # Special macro to handle page bottom: finish off current
	166	'\" # box/sidebar if in box/sidebar mode, then invoked standard
	167	'\" # page bottom macro.
	168	.de ^B
	169	.ev 2
	170	'ti 0
	171	'nf
	172	.mk ^t
	173	.if \\n(^b \{\
	174	.\" Draw three-sided box if this is the box's first page,
	175	.\" draw two sides but no top otherwise.
	176	.ie !\\n(^b-1 \h'-1.5n'\L'\|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\h'\|0u'\c
	177	.el \h'-1.5n'\L'\|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\h'\|0u'\c
	178	.\}
	179	.if \\n(^v \{\
	180	.nr ^x \\n(^tu+1v-\\n(^Yu
	181	\kx\h'-\\nxu'\h'\|\\n(^lu+3n'\ky\L'-\\n(^xu'\v'\\n(^xu'\h'\|0u'\c
	182	.\}
	183	.bp
	184	'fi
	185	.ev
	186	.if \\n(^b \{\
	187	.mk ^y
	188	.nr ^b 2
	189	.\}
	190	.if \\n(^v \{\
	191	.mk ^Y
	192	.\}
	193	..
	194	'\" # DS - begin display
	195	.de DS
	196	.RS
	197	.nf
	198	.sp
	199	..
	200	'\" # DE - end display
	201	.de DE
	202	.fi
	203	.RE
	204	.sp
	205	..
	206	'\" # SO - start of list of standard options
	207	.de SO
	208	.SH "STANDARD OPTIONS"
	209	.LP
	210	.nf
	211	.ta 5.5c 11c
	212	.ft B
	213	..
	214	'\" # SE - end of list of standard options
	215	.de SE
	216	.fi
	217	.ft R
	218	.LP
	219	See the \\fBoptions\\fR manual entry for details on the standard options.
	220	..
	221	'\" # OP - start of full description for a single option
	222	.de OP
	223	.LP
	224	.nf
	225	.ta 4c
	226	Command-Line Name: \\fB\\$1\\fR
	227	Database Name: \\fB\\$2\\fR
	228	Database Class: \\fB\\$3\\fR
	229	.fi
	230	.IP
	231	..
	232	'\" # CS - begin code excerpt
	233	.de CS
	234	.RS
	235	.nf
	236	.ta .25i .5i .75i 1i
	237	..
	238	'\" # CE - end code excerpt
	239	.de CE
	240	.fi
	241	.RE
	242	..
	243	.de UL
	244	\\$1\l'\|0\(ul'\\$2
	245	..
	246	.TH re_syntax n "8.1" Tcl "Tcl Built-In Commands"
	247	.BS
	248	.SH NAME
	249	re_syntax \- Syntax of Tcl regular expressions.
	250	.BE
	251
	252	.SH DESCRIPTION
	253	.PP
	254	A \fIregular expression\fR describes strings of characters.
	255	It's a pattern that matches certain strings and doesn't match others.
	256
	257	.SH "DIFFERENT FLAVORS OF REs"
	258	Regular expressions (``RE''s), as defined by POSIX, come in two
	259	flavors: \fIextended\fR REs (``EREs'') and \fIbasic\fR REs (``BREs'').
	260	EREs are roughly those of the traditional \fIegrep\fR, while BREs are
	261	roughly those of the traditional \fIed\fR. This implementation adds
	262	a third flavor, \fIadvanced\fR REs (``AREs''), basically EREs with
	263	some significant extensions.
	264	.PP
	265	This manual page primarily describes AREs. BREs mostly exist for
	266	backward compatibility in some old programs; they will be discussed at
	267	the end. POSIX EREs are almost an exact subset of AREs. Features of
	268	AREs that are not present in EREs will be indicated.
	269
	270	.SH "REGULAR EXPRESSION SYNTAX"
	271	.PP
	272	Tcl regular expressions are implemented using the package written by
	273	Henry Spencer, based on the 1003.2 spec and some (not quite all) of
	274	the Perl5 extensions (thanks, Henry!). Much of the description of
	275	regular expressions below is copied verbatim from his manual entry.
	276	.PP
	277	An ARE is one or more \fIbranches\fR,
	278	separated by `\fB\|\fR',
	279	matching anything that matches any of the branches.
	280	.PP
	281	A branch is zero or more \fIconstraints\fR or \fIquantified atoms\fR,
	282	concatenated.
	283	It matches a match for the first, followed by a match for the second, etc;
	284	an empty branch matches the empty string.
	285	.PP
	286	A quantified atom is an \fIatom\fR possibly followed
	287	by a single \fIquantifier\fR.
	288	Without a quantifier, it matches a match for the atom.
	289	The quantifiers,
	290	and what a so-quantified atom matches, are:
	291	.RS 2
	292	.TP 6
	293	\fB*\fR
	294	a sequence of 0 or more matches of the atom
	295	.TP
	296	\fB+\fR
	297	a sequence of 1 or more matches of the atom
	298	.TP
	299	\fB?\fR
	300	a sequence of 0 or 1 matches of the atom
	301	.TP
	302	\fB{\fIm\fB}\fR
	303	a sequence of exactly \fIm\fR matches of the atom
	304	.TP
	305	\fB{\fIm\fB,}\fR
	306	a sequence of \fIm\fR or more matches of the atom
	307	.TP
	308	\fB{\fIm\fB,\fIn\fB}\fR
	309	a sequence of \fIm\fR through \fIn\fR (inclusive) matches of the atom;
	310	\fIm\fR may not exceed \fIn\fR
	311	.TP
	312	\fB*? +? ?? {\fIm\fB}? {\fIm\fB,}? {\fIm\fB,\fIn\fB}?\fR
	313	\fInon-greedy\fR quantifiers,
	314	which match the same possibilities,
	315	but prefer the smallest number rather than the largest number
	316	of matches (see MATCHING)
	317	.RE
	318	.PP
	319	The forms using
	320	\fB{\fR and \fB}\fR
	321	are known as \fIbound\fRs.
	322	The numbers
	323	\fIm\fR and \fIn\fR are unsigned decimal integers
	324	with permissible values from 0 to 255 inclusive.
	325	.PP
	326	An atom is one of:
	327	.RS 2
	328	.TP 6
	329	\fB(\fIre\fB)\fR
	330	(where \fIre\fR is any regular expression)
	331	matches a match for
	332	\fIre\fR, with the match noted for possible reporting
	333	.TP
	334	\fB(?:\fIre\fB)\fR
	335	as previous,
	336	but does no reporting
	337	(a ``non-capturing'' set of parentheses)
	338	.TP
	339	\fB()\fR
	340	matches an empty string,
	341	noted for possible reporting
	342	.TP
	343	\fB(?:)\fR
	344	matches an empty string,
	345	without reporting
	346	.TP
	347	\fB[\fIchars\fB]\fR
	348	a \fIbracket expression\fR,
	349	matching any one of the \fIchars\fR (see BRACKET EXPRESSIONS for more detail)
	350	.TP
	351	\fB.\fR
	352	matches any single character
	353	.TP
	354	\fB\e\fIk\fR
	355	(where \fIk\fR is a non-alphanumeric character)
	356	matches that character taken as an ordinary character,
	357	e.g. \e\e matches a backslash character
	358	.TP
	359	\fB\e\fIc\fR
	360	where \fIc\fR is alphanumeric
	361	(possibly followed by other characters),
	362	an \fIescape\fR (AREs only),
	363	see ESCAPES below
	364	.TP
	365	\fB{\fR
	366	when followed by a character other than a digit,
	367	matches the left-brace character `\fB{\fR';
	368	when followed by a digit, it is the beginning of a
	369	\fIbound\fR (see above)
	370	.TP
	371	\fIx\fR
	372	where \fIx\fR is
	373	a single character with no other significance, matches that character.
	374	.RE
	375	.PP
	376	A \fIconstraint\fR matches an empty string when specific conditions
	377	are met.
	378	A constraint may not be followed by a quantifier.
	379	The simple constraints are as follows; some more constraints are
	380	described later, under ESCAPES.
	381	.RS 2
	382	.TP 8
	383	\fB^\fR
	384	matches at the beginning of a line
	385	.TP
	386	\fB$\fR
	387	matches at the end of a line
	388	.TP
	389	\fB(?=\fIre\fB)\fR
	390	\fIpositive lookahead\fR (AREs only), matches at any point
	391	where a substring matching \fIre\fR begins
	392	.TP
	393	\fB(?!\fIre\fB)\fR
	394	\fInegative lookahead\fR (AREs only), matches at any point
	395	where no substring matching \fIre\fR begins
	396	.RE
	397	.PP
	398	The lookahead constraints may not contain back references (see later),
	399	and all parentheses within them are considered non-capturing.
	400	.PP
	401	An RE may not end with `\fB\e\fR'.
	402
	403	.SH "BRACKET EXPRESSIONS"
	404	A \fIbracket expression\fR is a list of characters enclosed in `\fB[\\|]\fR'.
	405	It normally matches any single character from the list (but see below).
	406	If the list begins with `\fB^\fR',
	407	it matches any single character
	408	(but see below) \fInot\fR from the rest of the list.
	409	.PP
	410	If two characters in the list are separated by `\fB\-\fR',
	411	this is shorthand
	412	for the full \fIrange\fR of characters between those two (inclusive) in the
	413	collating sequence,
	414	e.g.
	415	\fB[0\-9]\fR
	416	in ASCII matches any decimal digit.
	417	Two ranges may not share an
	418	endpoint, so e.g.
	419	\fBa\-c\-e\fR
	420	is illegal.
	421	Ranges are very collating-sequence-dependent,
	422	and portable programs should avoid relying on them.
	423	.PP
	424	To include a literal
	425	\fB]\fR
	426	or
	427	\fB\-\fR
	428	in the list,
	429	the simplest method is to
	430	enclose it in
	431	\fB[.\fR and \fB.]\fR
	432	to make it a collating element (see below).
	433	Alternatively,
	434	make it the first character
	435	(following a possible `\fB^\fR'),
	436	or (AREs only) precede it with `\fB\e\fR'.
	437	Alternatively, for `\fB\-\fR',
	438	make it the last character,
	439	or the second endpoint of a range.
	440	To use a literal
	441	\fB\-\fR
	442	as the first endpoint of a range,
	443	make it a collating element
	444	or (AREs only) precede it with `\fB\e\fR'.
	445	With the exception of these, some combinations using
	446	\fB[\fR
	447	(see next
	448	paragraphs), and escapes,
	449	all other special characters lose their
	450	special significance within a bracket expression.
	451	.PP
	452	Within a bracket expression, a collating element (a character,
	453	a multi-character sequence that collates as if it were a single character,
	454	or a collating-sequence name for either)
	455	enclosed in
	456	\fB[.\fR and \fB.]\fR
	457	stands for the
	458	sequence of characters of that collating element.
	459	The sequence is a single element of the bracket expression's list.
	460	A bracket expression in a locale that has
	461	multi-character collating elements
	462	can thus match more than one character.
	463	.VS 8.2
	464	So (insidiously), a bracket expression that starts with \fB^\fR
	465	can match multi-character collating elements even if none of them
	466	appear in the bracket expression!
	467	(\fINote:\fR Tcl currently has no multi-character collating elements.
	468	This information is only for illustration.)
	469	.PP
	470	For example, assume the collating sequence includes a \fBch\fR
	471	multi-character collating element.
	472	Then the RE \fB[[.ch.]]*c\fR (zero or more \fBch\fP's followed by \fBc\fP)
	473	matches the first five characters of `\fBchchcc\fR'.
	474	Also, the RE \fB[^c]b\fR matches all of `\fBchb\fR'
	475	(because \fB[^c]\fR matches the multi-character \fBch\fR).
	476	.VE 8.2
	477	.PP
	478	Within a bracket expression, a collating element enclosed in
	479	\fB[=\fR
	480	and
	481	\fB=]\fR
	482	is an equivalence class, standing for the sequences of characters
	483	of all collating elements equivalent to that one, including itself.
	484	(If there are no other equivalent collating elements,
	485	the treatment is as if the enclosing delimiters were `\fB[.\fR'\&
	486	and `\fB.]\fR'.)
	487	For example, if
	488	\fBo\fR
	489	and
	490	\fB\o'o^'\fR
	491	are the members of an equivalence class,
	492	then `\fB[[=o=]]\fR', `\fB[[=\o'o^'=]]\fR',
	493	and `\fB[o\o'o^']\fR'\&
	494	are all synonymous.
	495	An equivalence class may not be an endpoint
	496	of a range.
	497	.VS 8.2
	498	(\fINote:\fR
	499	Tcl currently implements only the Unicode locale.
	500	It doesn't define any equivalence classes.
	501	The examples above are just illustrations.)
	502	.VE 8.2
	503	.PP
	504	Within a bracket expression, the name of a \fIcharacter class\fR enclosed
	505	in
	506	\fB[:\fR
	507	and
	508	\fB:]\fR
	509	stands for the list of all characters
	510	(not all collating elements!)
	511	belonging to that
	512	class.
	513	Standard character classes are:
	514	.PP
	515	.RS
	516	.ne 5
	517	.nf
	518	.ta 3c
	519	\fBalpha\fR A letter.
	520	\fBupper\fR An upper-case letter.
	521	\fBlower\fR A lower-case letter.
	522	\fBdigit\fR A decimal digit.
	523	\fBxdigit\fR A hexadecimal digit.
	524	\fBalnum\fR An alphanumeric (letter or digit).
	525	\fBprint\fR An alphanumeric (same as alnum).
	526	\fBblank\fR A space or tab character.
	527	\fBspace\fR A character producing white space in displayed text.
	528	\fBpunct\fR A punctuation character.
	529	\fBgraph\fR A character with a visible representation.
	530	\fBcntrl\fR A control character.
	531	.fi
	532	.RE
	533	.PP
	534	A locale may provide others.
	535	.VS 8.2
	536	(Note that the current Tcl implementation has only one locale:
	537	the Unicode locale.)
	538	.VE 8.2
	539	A character class may not be used as an endpoint of a range.
	540	.PP
	541	There are two special cases of bracket expressions:
	542	the bracket expressions
	543	\fB[[:<:]]\fR
	544	and
	545	\fB[[:>:]]\fR
	546	are constraints, matching empty strings at
	547	the beginning and end of a word respectively.
	548	'\" note, discussion of escapes below references this definition of word
	549	A word is defined as a sequence of
	550	word characters
	551	that is neither preceded nor followed by
	552	word characters.
	553	A word character is an
	554	\fIalnum\fR
	555	character
	556	or an underscore
	557	(\fB_\fR).
	558	These special bracket expressions are deprecated;
	559	users of AREs should use constraint escapes instead (see below).
	560	.SH ESCAPES
	561	Escapes (AREs only), which begin with a
	562	\fB\e\fR
	563	followed by an alphanumeric character,
	564	come in several varieties:
	565	character entry, class shorthands, constraint escapes, and back references.
	566	A
	567	\fB\e\fR
	568	followed by an alphanumeric character but not constituting
	569	a valid escape is illegal in AREs.
	570	In EREs, there are no escapes:
	571	outside a bracket expression,
	572	a
	573	\fB\e\fR
	574	followed by an alphanumeric character merely stands for that
	575	character as an ordinary character,
	576	and inside a bracket expression,
	577	\fB\e\fR
	578	is an ordinary character.
	579	(The latter is the one actual incompatibility between EREs and AREs.)
	580	.PP
	581	Character-entry escapes (AREs only) exist to make it easier to specify
	582	non-printing and otherwise inconvenient characters in REs:
	583	.RS 2
	584	.TP 5
	585	\fB\ea\fR
	586	alert (bell) character, as in C
	587	.TP
	588	\fB\eb\fR
	589	backspace, as in C
	590	.TP
	591	\fB\eB\fR
	592	synonym for
	593	\fB\e\fR
	594	to help reduce backslash doubling in some
	595	applications where there are multiple levels of backslash processing
	596	.TP
	597	\fB\ec\fIX\fR
	598	(where X is any character) the character whose
	599	low-order 5 bits are the same as those of
	600	\fIX\fR,
	601	and whose other bits are all zero
	602	.TP
	603	\fB\ee\fR
	604	the character whose collating-sequence name
	605	is `\fBESC\fR',
	606	or failing that, the character with octal value 033
	607	.TP
	608	\fB\ef\fR
	609	formfeed, as in C
	610	.TP
	611	\fB\en\fR
	612	newline, as in C
	613	.TP
	614	\fB\er\fR
	615	carriage return, as in C
	616	.TP
	617	\fB\et\fR
	618	horizontal tab, as in C
	619	.TP
	620	\fB\eu\fIwxyz\fR
	621	(where
	622	\fIwxyz\fR
	623	is exactly four hexadecimal digits)
	624	the Unicode character
	625	\fBU+\fIwxyz\fR
	626	in the local byte ordering
	627	.TP
	628	\fB\eU\fIstuvwxyz\fR
	629	(where
	630	\fIstuvwxyz\fR
	631	is exactly eight hexadecimal digits)
	632	reserved for a somewhat-hypothetical Unicode extension to 32 bits
	633	.TP
	634	\fB\ev\fR
	635	vertical tab, as in C
	636	are all available.
	637	.TP
	638	\fB\ex\fIhhh\fR
	639	(where
	640	\fIhhh\fR
	641	is any sequence of hexadecimal digits)
	642	the character whose hexadecimal value is
	643	\fB0x\fIhhh\fR
	644	(a single character no matter how many hexadecimal digits are used).
	645	.TP
	646	\fB\e0\fR
	647	the character whose value is
	648	\fB0\fR
	649	.TP
	650	\fB\e\fIxy\fR
	651	(where
	652	\fIxy\fR
	653	is exactly two octal digits,
	654	and is not a
	655	\fIback reference\fR (see below))
	656	the character whose octal value is
	657	\fB0\fIxy\fR
	658	.TP
	659	\fB\e\fIxyz\fR
	660	(where
	661	\fIxyz\fR
	662	is exactly three octal digits,
	663	and is not a
	664	back reference (see below))
	665	the character whose octal value is
	666	\fB0\fIxyz\fR
	667	.RE
	668	.PP
	669	Hexadecimal digits are `\fB0\fR'-`\fB9\fR', `\fBa\fR'-`\fBf\fR',
	670	and `\fBA\fR'-`\fBF\fR'.
	671	Octal digits are `\fB0\fR'-`\fB7\fR'.
	672	.PP
	673	The character-entry escapes are always taken as ordinary characters.
	674	For example,
	675	\fB\e135\fR
	676	is
	677	\fB]\fR
	678	in ASCII,
	679	but
	680	\fB\e135\fR
	681	does not terminate a bracket expression.
	682	Beware, however, that some applications (e.g., C compilers) interpret
	683	such sequences themselves before the regular-expression package
	684	gets to see them, which may require doubling (quadrupling, etc.) the `\fB\e\fR'.
	685	.PP
	686	Class-shorthand escapes (AREs only) provide shorthands for certain commonly-used
	687	character classes:
	688	.RS 2
	689	.TP 10
	690	\fB\ed\fR
	691	\fB[[:digit:]]\fR
	692	.TP
	693	\fB\es\fR
	694	\fB[[:space:]]\fR
	695	.TP
	696	\fB\ew\fR
	697	\fB[[:alnum:]_]\fR
	698	(note underscore)
	699	.TP
	700	\fB\eD\fR
	701	\fB[^[:digit:]]\fR
	702	.TP
	703	\fB\eS\fR
	704	\fB[^[:space:]]\fR
	705	.TP
	706	\fB\eW\fR
	707	\fB[^[:alnum:]_]\fR
	708	(note underscore)
	709	.RE
	710	.PP
	711	Within bracket expressions, `\fB\ed\fR', `\fB\es\fR',
	712	and `\fB\ew\fR'\&
	713	lose their outer brackets,
	714	and `\fB\eD\fR', `\fB\eS\fR',
	715	and `\fB\eW\fR'\&
	716	are illegal.
	717	.VS 8.2
	718	(So, for example, \fB[a-c\ed]\fR is equivalent to \fB[a-c[:digit:]]\fR.
	719	Also, \fB[a-c\eD]\fR, which is equivalent to \fB[a-c^[:digit:]]\fR, is illegal.)
	720	.VE 8.2
	721	.PP
	722	A constraint escape (AREs only) is a constraint,
	723	matching the empty string if specific conditions are met,
	724	written as an escape:
	725	.RS 2
	726	.TP 6
	727	\fB\eA\fR
	728	matches only at the beginning of the string
	729	(see MATCHING, below, for how this differs from `\fB^\fR')
	730	.TP
	731	\fB\em\fR
	732	matches only at the beginning of a word
	733	.TP
	734	\fB\eM\fR
	735	matches only at the end of a word
	736	.TP
	737	\fB\ey\fR
	738	matches only at the beginning or end of a word
	739	.TP
	740	\fB\eY\fR
	741	matches only at a point that is not the beginning or end of a word
	742	.TP
	743	\fB\eZ\fR
	744	matches only at the end of the string
	745	(see MATCHING, below, for how this differs from `\fB$\fR')
	746	.TP
	747	\fB\e\fIm\fR
	748	(where
	749	\fIm\fR
	750	is a nonzero digit) a \fIback reference\fR, see below
	751	.TP
	752	\fB\e\fImnn\fR
	753	(where
	754	\fIm\fR
	755	is a nonzero digit, and
	756	\fInn\fR
	757	is some more digits,
	758	and the decimal value
	759	\fImnn\fR
	760	is not greater than the number of closing capturing parentheses seen so far)
	761	a \fIback reference\fR, see below
	762	.RE
	763	.PP
	764	A word is defined as in the specification of
	765	\fB[[:<:]]\fR
	766	and
	767	\fB[[:>:]]\fR
	768	above.
	769	Constraint escapes are illegal within bracket expressions.
	770	.PP
	771	A back reference (AREs only) matches the same string matched by the parenthesized
	772	subexpression specified by the number,
	773	so that (e.g.)
	774	\fB([bc])\e1\fR
	775	matches
	776	\fBbb\fR
	777	or
	778	\fBcc\fR
	779	but not `\fBbc\fR'.
	780	The subexpression must entirely precede the back reference in the RE.
	781	Subexpressions are numbered in the order of their leading parentheses.
	782	Non-capturing parentheses do not define subexpressions.
	783	.PP
	784	There is an inherent historical ambiguity between octal character-entry
	785	escapes and back references, which is resolved by heuristics,
	786	as hinted at above.
	787	A leading zero always indicates an octal escape.
	788	A single non-zero digit, not followed by another digit,
	789	is always taken as a back reference.
	790	A multi-digit sequence not starting with a zero is taken as a back
	791	reference if it comes after a suitable subexpression
	792	(i.e. the number is in the legal range for a back reference),
	793	and otherwise is taken as octal.
	794	.SH "METASYNTAX"
	795	In addition to the main syntax described above, there are some special
	796	forms and miscellaneous syntactic facilities available.
	797	.PP
	798	Normally the flavor of RE being used is specified by
	799	application-dependent means.
	800	However, this can be overridden by a \fIdirector\fR.
	801	If an RE of any flavor begins with `\fB***:\fR',
	802	the rest of the RE is an ARE.
	803	If an RE of any flavor begins with `\fB***=\fR',
	804	the rest of the RE is taken to be a literal string,
	805	with all characters considered ordinary characters.
	806	.PP
	807	An ARE may begin with \fIembedded options\fR:
	808	a sequence
	809	\fB(?\fIxyz\fB)\fR
	810	(where
	811	\fIxyz\fR
	812	is one or more alphabetic characters)
	813	specifies options affecting the rest of the RE.
	814	These supplement, and can override,
	815	any options specified by the application.
	816	The available option letters are:
	817	.RS 2
	818	.TP 3
	819	\fBb\fR
	820	rest of RE is a BRE
	821	.TP 3
	822	\fBc\fR
	823	case-sensitive matching (usual default)
	824	.TP 3
	825	\fBe\fR
	826	rest of RE is an ERE
	827	.TP 3
	828	\fBi\fR
	829	case-insensitive matching (see MATCHING, below)
	830	.TP 3
	831	\fBm\fR
	832	historical synonym for
	833	\fBn\fR
	834	.TP 3
	835	\fBn\fR
	836	newline-sensitive matching (see MATCHING, below)
	837	.TP 3
	838	\fBp\fR
	839	partial newline-sensitive matching (see MATCHING, below)
	840	.TP 3
	841	\fBq\fR
	842	rest of RE is a literal (``quoted'') string, all ordinary characters
	843	.TP 3
	844	\fBs\fR
	845	non-newline-sensitive matching (usual default)
	846	.TP 3
	847	\fBt\fR
	848	tight syntax (usual default; see below)
	849	.TP 3
	850	\fBw\fR
	851	inverse partial newline-sensitive (``weird'') matching (see MATCHING, below)
	852	.TP 3
	853	\fBx\fR
	854	expanded syntax (see below)
	855	.RE
	856	.PP
	857	Embedded options take effect at the
	858	\fB)\fR
	859	terminating the sequence.
	860	They are available only at the start of an ARE,
	861	and may not be used later within it.
	862	.PP
	863	In addition to the usual (\fItight\fR) RE syntax, in which all characters are
	864	significant, there is an \fIexpanded\fR syntax,
	865	available in all flavors of RE
	866	with the \fB-expanded\fR switch, or in AREs with the embedded x option.
	867	In the expanded syntax,
	868	white-space characters are ignored
	869	and all characters between a
	870	\fB#\fR
	871	and the following newline (or the end of the RE) are ignored,
	872	permitting paragraphing and commenting a complex RE.
	873	There are three exceptions to that basic rule:
	874	.RS 2
	875	.PP
	876	a white-space character or `\fB#\fR' preceded by `\fB\e\fR' is retained
	877	.PP
	878	white space or `\fB#\fR' within a bracket expression is retained
	879	.PP
	880	white space and comments are illegal within multi-character symbols
	881	like the ARE `\fB(?:\fR' or the BRE `\fB\e(\fR'
	882	.RE
	883	.PP
	884	Expanded-syntax white-space characters are blank, tab, newline, and
	885	.VS 8.2
	886	any character that belongs to the \fIspace\fR character class.
	887	.VE 8.2
	888	.PP
	889	Finally, in an ARE,
	890	outside bracket expressions, the sequence `\fB(?#\fIttt\fB)\fR'
	891	(where
	892	\fIttt\fR
	893	is any text not containing a `\fB)\fR')
	894	is a comment,
	895	completely ignored.
	896	Again, this is not allowed between the characters of
	897	multi-character symbols like `\fB(?:\fR'.
	898	Such comments are more a historical artifact than a useful facility,
	899	and their use is deprecated;
	900	use the expanded syntax instead.
	901	.PP
	902	\fINone\fR of these metasyntax extensions is available if the application
	903	(or an initial
	904	\fB***=\fR
	905	director)
	906	has specified that the user's input be treated as a literal string
	907	rather than as an RE.
	908	.SH MATCHING
	909	In the event that an RE could match more than one substring of a given
	910	string,
	911	the RE matches the one starting earliest in the string.
	912	If the RE could match more than one substring starting at that point,
	913	its choice is determined by its \fIpreference\fR:
	914	either the longest substring, or the shortest.
	915	.PP
	916	Most atoms, and all constraints, have no preference.
	917	A parenthesized RE has the same preference (possibly none) as the RE.
	918	A quantified atom with quantifier
	919	\fB{\fIm\fB}\fR
	920	or
	921	\fB{\fIm\fB}?\fR
	922	has the same preference (possibly none) as the atom itself.
	923	A quantified atom with other normal quantifiers (including
	924	\fB{\fIm\fB,\fIn\fB}\fR
	925	with
	926	\fIm\fR
	927	equal to
	928	\fIn\fR)
	929	prefers longest match.
	930	A quantified atom with other non-greedy quantifiers (including
	931	\fB{\fIm\fB,\fIn\fB}?\fR
	932	with
	933	\fIm\fR
	934	equal to
	935	\fIn\fR)
	936	prefers shortest match.
	937	A branch has the same preference as the first quantified atom in it
	938	which has a preference.
	939	An RE consisting of two or more branches connected by the
	940	\fB\|\fR
	941	operator prefers longest match.
	942	.PP
	943	Subject to the constraints imposed by the rules for matching the whole RE,
	944	subexpressions also match the longest or shortest possible substrings,
	945	based on their preferences,
	946	with subexpressions starting earlier in the RE taking priority over
	947	ones starting later.
	948	Note that outer subexpressions thus take priority over
	949	their component subexpressions.
	950	.PP
	951	Note that the quantifiers
	952	\fB{1,1}\fR
	953	and
	954	\fB{1,1}?\fR
	955	can be used to force longest and shortest preference, respectively,
	956	on a subexpression or a whole RE.
	957	.PP
	958	Match lengths are measured in characters, not collating elements.
	959	An empty string is considered longer than no match at all.
	960	For example,
	961	\fBbb*\fR
	962	matches the three middle characters of `\fBabbbc\fR',
	963	\fB(week\|wee)(night\|knights)\fR
	964	matches all ten characters of `\fBweeknights\fR',
	965	when
	966	\fB(.).\fR
	967	is matched against
	968	\fBabc\fR
	969	the parenthesized subexpression
	970	matches all three characters, and
	971	when
	972	\fB(a)\fR
	973	is matched against
	974	\fBbc\fR
	975	both the whole RE and the parenthesized
	976	subexpression match an empty string.
	977	.PP
	978	If case-independent matching is specified,
	979	the effect is much as if all case distinctions had vanished from the
	980	alphabet.
	981	When an alphabetic that exists in multiple cases appears as an
	982	ordinary character outside a bracket expression, it is effectively
	983	transformed into a bracket expression containing both cases,
	984	so that
	985	\fBx\fR
	986	becomes `\fB[xX]\fR'.
	987	When it appears inside a bracket expression, all case counterparts
	988	of it are added to the bracket expression, so that
	989	\fB[x]\fR
	990	becomes
	991	\fB[xX]\fR
	992	and
	993	\fB[^x]\fR
	994	becomes `\fB[^xX]\fR'.
	995	.PP
	996	If newline-sensitive matching is specified, \fB.\fR
	997	and bracket expressions using
	998	\fB^\fR
	999	will never match the newline character
	1000	(so that matches will never cross newlines unless the RE
	1001	explicitly arranges it)
	1002	and
	1003	\fB^\fR
	1004	and
	1005	\fB$\fR
	1006	will match the empty string after and before a newline
	1007	respectively, in addition to matching at beginning and end of string
	1008	respectively.
	1009	ARE
	1010	\fB\eA\fR
	1011	and
	1012	\fB\eZ\fR
	1013	continue to match beginning or end of string \fIonly\fR.
	1014	.PP
	1015	If partial newline-sensitive matching is specified,
	1016	this affects \fB.\fR
	1017	and bracket expressions
	1018	as with newline-sensitive matching, but not
	1019	\fB^\fR
	1020	and `\fB$\fR'.
	1021	.PP
	1022	If inverse partial newline-sensitive matching is specified,
	1023	this affects
	1024	\fB^\fR
	1025	and
	1026	\fB$\fR
	1027	as with
	1028	newline-sensitive matching,
	1029	but not \fB.\fR
	1030	and bracket expressions.
	1031	This isn't very useful but is provided for symmetry.
	1032	.SH "LIMITS AND COMPATIBILITY"
	1033	No particular limit is imposed on the length of REs.
	1034	Programs intended to be highly portable should not employ REs longer
	1035	than 256 bytes,
	1036	as a POSIX-compliant implementation can refuse to accept such REs.
	1037	.PP
	1038	The only feature of AREs that is actually incompatible with
	1039	POSIX EREs is that
	1040	\fB\e\fR
	1041	does not lose its special
	1042	significance inside bracket expressions.
	1043	All other ARE features use syntax which is illegal or has
	1044	undefined or unspecified effects in POSIX EREs;
	1045	the
	1046	\fB***\fR
	1047	syntax of directors likewise is outside the POSIX
	1048	syntax for both BREs and EREs.
	1049	.PP
	1050	Many of the ARE extensions are borrowed from Perl, but some have
	1051	been changed to clean them up, and a few Perl extensions are not present.
	1052	Incompatibilities of note include `\fB\eb\fR', `\fB\eB\fR',
	1053	the lack of special treatment for a trailing newline,
	1054	the addition of complemented bracket expressions to the things
	1055	affected by newline-sensitive matching,
	1056	the restrictions on parentheses and back references in lookahead constraints,
	1057	and the longest/shortest-match (rather than first-match) matching semantics.
	1058	.PP
	1059	The matching rules for REs containing both normal and non-greedy quantifiers
	1060	have changed since early beta-test versions of this package.
	1061	(The new rules are much simpler and cleaner,
	1062	but don't work as hard at guessing the user's real intentions.)
	1063	.PP
	1064	Henry Spencer's original 1986 \fIregexp\fR package,
	1065	still in widespread use (e.g., in pre-8.1 releases of Tcl),
	1066	implemented an early version of today's EREs.
	1067	There are four incompatibilities between \fIregexp\fR's near-EREs
	1068	(`RREs' for short) and AREs.
	1069	In roughly increasing order of significance:
	1070	.PP
	1071	.RS
	1072	In AREs,
	1073	\fB\e\fR
	1074	followed by an alphanumeric character is either an
	1075	escape or an error,
	1076	while in RREs, it was just another way of writing the
	1077	alphanumeric.
	1078	This should not be a problem because there was no reason to write
	1079	such a sequence in RREs.
	1080	.PP
	1081	\fB{\fR
	1082	followed by a digit in an ARE is the beginning of a bound,
	1083	while in RREs,
	1084	\fB{\fR
	1085	was always an ordinary character.
	1086	Such sequences should be rare,
	1087	and will often result in an error because following characters
	1088	will not look like a valid bound.
	1089	.PP
	1090	In AREs,
	1091	\fB\e\fR
	1092	remains a special character within `\fB[\\|]\fR',
	1093	so a literal
	1094	\fB\e\fR
	1095	within
	1096	\fB[\\|]\fR
	1097	must be written `\fB\e\e\fR'.
	1098	\fB\e\e\fR
	1099	also gives a literal
	1100	\fB\e\fR
	1101	within
	1102	\fB[\\|]\fR
	1103	in RREs,
	1104	but only truly paranoid programmers routinely doubled the backslash.
	1105	.PP
	1106	AREs report the longest/shortest match for the RE,
	1107	rather than the first found in a specified search order.
	1108	This may affect some RREs which were written in the expectation that
	1109	the first match would be reported.
	1110	(The careful crafting of RREs to optimize the search order for fast
	1111	matching is obsolete (AREs examine all possible matches
	1112	in parallel, and their performance is largely insensitive to their
	1113	complexity) but cases where the search order was exploited to deliberately
	1114	find a match which was \fInot\fR the longest/shortest will need rewriting.)
	1115	.RE
	1116
	1117	.SH "BASIC REGULAR EXPRESSIONS"
	1118	BREs differ from EREs in several respects. `\fB\|\fR', `\fB+\fR',
	1119	and
	1120	\fB?\fR
	1121	are ordinary characters and there is no equivalent
	1122	for their functionality.
	1123	The delimiters for bounds are
	1124	\fB\e{\fR
	1125	and `\fB\e}\fR',
	1126	with
	1127	\fB{\fR
	1128	and
	1129	\fB}\fR
	1130	by themselves ordinary characters.
	1131	The parentheses for nested subexpressions are
	1132	\fB\e(\fR
	1133	and `\fB\e)\fR',
	1134	with
	1135	\fB(\fR
	1136	and
	1137	\fB)\fR
	1138	by themselves ordinary characters.
	1139	\fB^\fR
	1140	is an ordinary character except at the beginning of the
	1141	RE or the beginning of a parenthesized subexpression,
	1142	\fB$\fR
	1143	is an ordinary character except at the end of the
	1144	RE or the end of a parenthesized subexpression,
	1145	and
	1146	\fB*\fR
	1147	is an ordinary character if it appears at the beginning of the
	1148	RE or the beginning of a parenthesized subexpression
	1149	(after a possible leading `\fB^\fR').
	1150	Finally,
	1151	single-digit back references are available,
	1152	and
	1153	\fB\e<\fR
	1154	and
	1155	\fB\e>\fR
	1156	are synonyms for
	1157	\fB[[:<:]]\fR
	1158	and
	1159	\fB[[:>:]]\fR
	1160	respectively;
	1161	no other escapes are available.
	1162
	1163	.SH "SEE ALSO"
	1164	RegExp(3), regexp(n), regsub(n), lsearch(n), switch(n), text(n)
	1165
	1166	.SH KEYWORDS
	1167	match, regular expression, string