git.subgeniuskitty.com - OpenSPARC-T2-DV/.git/blame_incremental

... / ...

Commit	Line	Data
	1	.\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13
	2	.\"
	3	.\" Standard preamble:
	4	.\" ========================================================================
	5	.de Sh \" Subsection heading
	6	.br
	7	.if t .Sp
	8	.ne 5
	9	.PP
	10	\fB\\$1\fR
	11	.PP
	12	..
	13	.de Sp \" Vertical space (when we can't use .PP)
	14	.if t .sp .5v
	15	.if n .sp
	16	..
	17	.de Vb \" Begin verbatim text
	18	.ft CW
	19	.nf
	20	.ne \\$1
	21	..
	22	.de Ve \" End verbatim text
	23	.ft R
	24	.fi
	25	..
	26	.\" Set up some character translations and predefined strings. \*(-- will
	27	.\" give an unbreakable dash, \(PI will give pi, \(L" will give a left
	28	.\" double quote, and \*(R" will give a right double quote. \| will give a
	29	.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
	30	.\" do unbreakable dashes and therefore won't be available. \(C` and \(C'
	31	.\" expand to `' in nroff, nothing in troff, for use with C<>.
	32	.tr \(W-\|\(bv\(Tr
	33	.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
	34	.ie n \{\
	35	. ds -- \(*W-
	36	. ds PI pi
	37	. if (\n(.H=4u)&(1m=24u) .ds -- \(W\h'-12u'\(W\h'-12u'-\" diablo 10 pitch
	38	. if (\n(.H=4u)&(1m=20u) .ds -- \(W\h'-12u'\(W\h'-8u'-\" diablo 12 pitch
	39	. ds L" ""
	40	. ds R" ""
	41	. ds C` ""
	42	. ds C' ""
	43	'br\}
	44	.el\{\
	45	. ds -- \\|\(em\\|
	46	. ds PI \(*p
	47	. ds L" ``
	48	. ds R" ''
	49	'br\}
	50	.\"
	51	.\" If the F register is turned on, we'll generate index entries on stderr for
	52	.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
	53	.\" entries marked with X<> in POD. Of course, you'll have to process the
	54	.\" output yourself in some meaningful fashion.
	55	.if \nF \{\
	56	. de IX
	57	. tm Index:\\$1\t\\n%\t"\\$2"
	58	..
	59	. nr % 0
	60	. rr F
	61	.\}
	62	.\"
	63	.\" For nroff, turn off justification. Always turn off hyphenation; it makes
	64	.\" way too many mistakes in technical documents.
	65	.hy 0
	66	.if n .na
	67	.\"
	68	.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
	69	.\" Fear. Run. Save yourself. No user-serviceable parts.
	70	. \" fudge factors for nroff and troff
	71	.if n \{\
	72	. ds #H 0
	73	. ds #V .8m
	74	. ds #F .3m
	75	. ds #[ \f1
	76	. ds #] \fP
	77	.\}
	78	.if t \{\
	79	. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
	80	. ds #V .6m
	81	. ds #F 0
	82	. ds #[ \&
	83	. ds #] \&
	84	.\}
	85	. \" simple accents for nroff and troff
	86	.if n \{\
	87	. ds ' \&
	88	. ds ` \&
	89	. ds ^ \&
	90	. ds , \&
	91	. ds ~ ~
	92	. ds /
	93	.\}
	94	.if t \{\
	95	. ds ' \\k:\h'-(\\n(.wu8/10-\(#H)'\'\h"\|\\n:u"
	96	. ds ` \\k:\h'-(\\n(.wu8/10-\(#H)'\`\h'\|\\n:u'
	97	. ds ^ \\k:\h'-(\\n(.wu10/11-\(#H)'^\h'\|\\n:u'
	98	. ds , \\k:\h'-(\\n(.wu*8/10)',\h'\|\\n:u'
	99	. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'\|\\n:u'
	100	. ds / \\k:\h'-(\\n(.wu8/10-\(#H)'\z\(sl\h'\|\\n:u'
	101	.\}
	102	. \" troff and (daisy-wheel) nroff accents
	103	.ds : \\k:\h'-(\\n(.wu8/10-\(#H+.1m+\(#F)'\v'-\(#V'\z.\h'.2m+\(#F'.\h'\|\\n:u'\v'\(#V'
	104	.ds 8 \h'\(#H'\(b\h'-\*(#H'
	105	.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\(#H)/2u'\v'-.3n'\(#[\z\(de\v'.3n'\h'\|\\n:u'\*(#]
	106	.ds d- \h'\(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\(#H'
	107	.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'\|\\n:u'
	108	.ds th \(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u2/3)'\s-1o\s+1\*(#]
	109	.ds Th \(#[\s+2I\s-2\h'-\w'I'u3/5'\v'-.3m'o\v'.3m'\*(#]
	110	.ds ae a\h'-(\w'a'u*4/10)'e
	111	.ds Ae A\h'-(\w'A'u*4/10)'E
	112	. \" corrections for vroff
	113	.if v .ds ~ \\k:\h'-(\\n(.wu9/10-\(#H)'\s-2\u~\d\s+2\h'\|\\n:u'
	114	.if v .ds ^ \\k:\h'-(\\n(.wu10/11-\(#H)'\v'-.4m'^\v'.4m'\h'\|\\n:u'
	115	. \" for low resolution devices (crt and lpr)
	116	.if \n(.H>23 .if \n(.V>19 \
	117	\{\
	118	. ds : e
	119	. ds 8 ss
	120	. ds o a
	121	. ds d- d\h'-1'\(ga
	122	. ds D- D\h'-1'\(hy
	123	. ds th \o'bp'
	124	. ds Th \o'LP'
	125	. ds ae ae
	126	. ds Ae AE
	127	.\}
	128	.rm #[ #] #H #V #F C
	129	.\" ========================================================================
	130	.\"
	131	.IX Title "PERLREQUICK 1"
	132	.TH PERLREQUICK 1 "2002-06-08" "perl v5.8.0" "Perl Programmers Reference Guide"
	133	.SH "NAME"
	134	perlrequick \- Perl regular expressions quick start
	135	.SH "DESCRIPTION"
	136	.IX Header "DESCRIPTION"
	137	This page covers the very basics of understanding, creating and
	138	using regular expressions ('regexes') in Perl.
	139	.SH "The Guide"
	140	.IX Header "The Guide"
	141	.Sh "Simple word matching"
	142	.IX Subsection "Simple word matching"
	143	The simplest regex is simply a word, or more generally, a string of
	144	characters. A regex consisting of a word matches any string that
	145	contains that word:
	146	.PP
	147	.Vb 1
	148	\& "Hello World" =~ /World/; # matches
	149	.Ve
	150	.PP
	151	In this statement, \f(CW\(C`World\(C'\fR is a regex and the \f(CW\(C`//\(C'\fR enclosing
	152	\&\f(CW\(C`/World/\(C'\fR tells perl to search a string for a match. The operator
	153	\&\f(CW\(C`=~\(C'\fR associates the string with the regex match and produces a true
	154	value if the regex matched, or false if the regex did not match. In
	155	our case, \f(CW\(C`World\(C'\fR matches the second word in \f(CW"Hello World"\fR, so the
	156	expression is true. This idea has several variations.
	157	.PP
	158	Expressions like this are useful in conditionals:
	159	.PP
	160	.Vb 1
	161	\& print "It matches\en" if "Hello World" =~ /World/;
	162	.Ve
	163	.PP
	164	The sense of the match can be reversed by using \f(CW\(C`!~\(C'\fR operator:
	165	.PP
	166	.Vb 1
	167	\& print "It doesn't match\en" if "Hello World" !~ /World/;
	168	.Ve
	169	.PP
	170	The literal string in the regex can be replaced by a variable:
	171	.PP
	172	.Vb 2
	173	\& $greeting = "World";
	174	\& print "It matches\en" if "Hello World" =~ /$greeting/;
	175	.Ve
	176	.PP
	177	If you're matching against \f(CW$_\fR, the \f(CW\(C`$_ =~\(C'\fR part can be omitted:
	178	.PP
	179	.Vb 2
	180	\& $_ = "Hello World";
	181	\& print "It matches\en" if /World/;
	182	.Ve
	183	.PP
	184	Finally, the \f(CW\(C`//\(C'\fR default delimiters for a match can be changed to
	185	arbitrary delimiters by putting an \f(CW'm'\fR out front:
	186	.PP
	187	.Vb 4
	188	\& "Hello World" =~ m!World!; # matches, delimited by '!'
	189	\& "Hello World" =~ m{World}; # matches, note the matching '{}'
	190	\& "/usr/bin/perl" =~ m"/perl"; # matches after '/usr/bin',
	191	\& # '/' becomes an ordinary char
	192	.Ve
	193	.PP
	194	Regexes must match a part of the string \fIexactly\fR in order for the
	195	statement to be true:
	196	.PP
	197	.Vb 3
	198	\& "Hello World" =~ /world/; # doesn't match, case sensitive
	199	\& "Hello World" =~ /o W/; # matches, ' ' is an ordinary char
	200	\& "Hello World" =~ /World /; # doesn't match, no ' ' at end
	201	.Ve
	202	.PP
	203	perl will always match at the earliest possible point in the string:
	204	.PP
	205	.Vb 2
	206	\& "Hello World" =~ /o/; # matches 'o' in 'Hello'
	207	\& "That hat is red" =~ /hat/; # matches 'hat' in 'That'
	208	.Ve
	209	.PP
	210	Not all characters can be used 'as is' in a match. Some characters,
	211	called \fBmetacharacters\fR, are reserved for use in regex notation.
	212	The metacharacters are
	213	.PP
	214	.Vb 1
	215	\& {}[]()^$.\|*+?\e
	216	.Ve
	217	.PP
	218	A metacharacter can be matched by putting a backslash before it:
	219	.PP
	220	.Vb 4
	221	\& "2+2=4" =~ /2+2/; # doesn't match, + is a metacharacter
	222	\& "2+2=4" =~ /2\e+2/; # matches, \e+ is treated like an ordinary +
	223	\& 'C:\eWIN32' =~ /C:\e\eWIN/; # matches
	224	\& "/usr/bin/perl" =~ /\e/usr\e/local\e/bin\e/perl/; # matches
	225	.Ve
	226	.PP
	227	In the last regex, the forward slash \f(CW'/'\fR is also backslashed,
	228	because it is used to delimit the regex.
	229	.PP
	230	Non-printable \s-1ASCII\s0 characters are represented by \fBescape sequences\fR.
	231	Common examples are \f(CW\(C`\et\(C'\fR for a tab, \f(CW\(C`\en\(C'\fR for a newline, and \f(CW\(C`\er\(C'\fR
	232	for a carriage return. Arbitrary bytes are represented by octal
	233	escape sequences, e.g., \f(CW\(C`\e033\(C'\fR, or hexadecimal escape sequences,
	234	e.g., \f(CW\(C`\ex1B\(C'\fR:
	235	.PP
	236	.Vb 2
	237	\& "1000\et2000" =~ m(0\et2) # matches
	238	\& "cat" =~ /\e143\ex61\ex74/ # matches, but a weird way to spell cat
	239	.Ve
	240	.PP
	241	Regexes are treated mostly as double quoted strings, so variable
	242	substitution works:
	243	.PP
	244	.Vb 3
	245	\& $foo = 'house';
	246	\& 'cathouse' =~ /cat$foo/; # matches
	247	\& 'housecat' =~ /${foo}cat/; # matches
	248	.Ve
	249	.PP
	250	With all of the regexes above, if the regex matched anywhere in the
	251	string, it was considered a match. To specify \fIwhere\fR it should
	252	match, we would use the \fBanchor\fR metacharacters \f(CW\(C`^\(C'\fR and \f(CW\(C`$\(C'\fR. The
	253	anchor \f(CW\(C`^\(C'\fR means match at the beginning of the string and the anchor
	254	\&\f(CW\(C`$\(C'\fR means match at the end of the string, or before a newline at the
	255	end of the string. Some examples:
	256	.PP
	257	.Vb 5
	258	\& "housekeeper" =~ /keeper/; # matches
	259	\& "housekeeper" =~ /^keeper/; # doesn't match
	260	\& "housekeeper" =~ /keeper$/; # matches
	261	\& "housekeeper\en" =~ /keeper$/; # matches
	262	\& "housekeeper" =~ /^housekeeper$/; # matches
	263	.Ve
	264	.Sh "Using character classes"
	265	.IX Subsection "Using character classes"
	266	A \fBcharacter class\fR allows a set of possible characters, rather than
	267	just a single character, to match at a particular point in a regex.
	268	Character classes are denoted by brackets \f(CW\(C`[...]\(C'\fR, with the set of
	269	characters to be possibly matched inside. Here are some examples:
	270	.PP
	271	.Vb 3
	272	\& /cat/; # matches 'cat'
	273	\& /[bcr]at/; # matches 'bat', 'cat', or 'rat'
	274	\& "abc" =~ /[cab]/; # matches 'a'
	275	.Ve
	276	.PP
	277	In the last statement, even though \f(CW'c'\fR is the first character in
	278	the class, the earliest point at which the regex can match is \f(CW'a'\fR.
	279	.PP
	280	.Vb 3
	281	\& /[yY][eE][sS]/; # match 'yes' in a case-insensitive way
	282	\& # 'yes', 'Yes', 'YES', etc.
	283	\& /yes/i; # also match 'yes' in a case-insensitive way
	284	.Ve
	285	.PP
	286	The last example shows a match with an \f(CW'i'\fR \fBmodifier\fR, which makes
	287	the match case\-insensitive.
	288	.PP
	289	Character classes also have ordinary and special characters, but the
	290	sets of ordinary and special characters inside a character class are
	291	different than those outside a character class. The special
	292	characters for a character class are \f(CW\(C`\-]\e^$\(C'\fR and are matched using an
	293	escape:
	294	.PP
	295	.Vb 5
	296	\& /[\e]c]def/; # matches ']def' or 'cdef'
	297	\& $x = 'bcr';
	298	\& /[$x]at/; # matches 'bat, 'cat', or 'rat'
	299	\& /[\e$x]at/; # matches '$at' or 'xat'
	300	\& /[\e\e$x]at/; # matches '\eat', 'bat, 'cat', or 'rat'
	301	.Ve
	302	.PP
	303	The special character \f(CW'\-'\fR acts as a range operator within character
	304	classes, so that the unwieldy \f(CW\(C`[0123456789]\(C'\fR and \f(CW\(C`[abc...xyz]\(C'\fR
	305	become the svelte \f(CW\(C`[0\-9]\(C'\fR and \f(CW\(C`[a\-z]\(C'\fR:
	306	.PP
	307	.Vb 2
	308	\& /item[0-9]/; # matches 'item0' or ... or 'item9'
	309	\& /[0-9a-fA-F]/; # matches a hexadecimal digit
	310	.Ve
	311	.PP
	312	If \f(CW'\-'\fR is the first or last character in a character class, it is
	313	treated as an ordinary character.
	314	.PP
	315	The special character \f(CW\(C`^\(C'\fR in the first position of a character class
	316	denotes a \fBnegated character class\fR, which matches any character but
	317	those in the brackets. Both \f(CW\(C`[...]\(C'\fR and \f(CW\(C`[^...]\(C'\fR must match a
	318	character, or the match fails. Then
	319	.PP
	320	.Vb 4
	321	\& /[^a]at/; # doesn't match 'aat' or 'at', but matches
	322	\& # all other 'bat', 'cat, '0at', '%at', etc.
	323	\& /[^0-9]/; # matches a non-numeric character
	324	\& /[a^]at/; # matches 'aat' or '^at'; here '^' is ordinary
	325	.Ve
	326	.PP
	327	Perl has several abbreviations for common character classes:
	328	.IP "\(bu" 4
	329	\&\ed is a digit and represents [0\-9]
	330	.IP "\(bu" 4
	331	\&\es is a whitespace character and represents [\e \et\er\en\ef]
	332	.IP "\(bu" 4
	333	\&\ew is a word character (alphanumeric or _) and represents [0\-9a\-zA\-Z_]
	334	.IP "\(bu" 4
	335	\&\eD is a negated \ed; it represents any character but a digit [^0\-9]
	336	.IP "\(bu" 4
	337	\&\eS is a negated \es; it represents any non-whitespace character [^\es]
	338	.IP "\(bu" 4
	339	\&\eW is a negated \ew; it represents any non-word character [^\ew]
	340	.IP "\(bu" 4
	341	The period '.' matches any character but \(L"\en\(R"
	342	.PP
	343	The \f(CW\(C`\ed\es\ew\eD\eS\eW\(C'\fR abbreviations can be used both inside and outside
	344	of character classes. Here are some in use:
	345	.PP
	346	.Vb 7
	347	\& /\ed\ed:\ed\ed:\ed\ed/; # matches a hh:mm:ss time format
	348	\& /[\ed\es]/; # matches any digit or whitespace character
	349	\& /\ew\eW\ew/; # matches a word char, followed by a
	350	\& # non-word char, followed by a word char
	351	\& /..rt/; # matches any two chars, followed by 'rt'
	352	\& /end\e./; # matches 'end.'
	353	\& /end[.]/; # same thing, matches 'end.'
	354	.Ve
	355	.PP
	356	The \fBword\ anchor\fR\ \f(CW\(C`\eb\(C'\fR matches a boundary between a word
	357	character and a non-word character \f(CW\(C`\ew\eW\(C'\fR or \f(CW\(C`\eW\ew\(C'\fR:
	358	.PP
	359	.Vb 4
	360	\& $x = "Housecat catenates house and cat";
	361	\& $x =~ /\ebcat/; # matches cat in 'catenates'
	362	\& $x =~ /cat\eb/; # matches cat in 'housecat'
	363	\& $x =~ /\ebcat\eb/; # matches 'cat' at end of string
	364	.Ve
	365	.PP
	366	In the last example, the end of the string is considered a word
	367	boundary.
	368	.Sh "Matching this or that"
	369	.IX Subsection "Matching this or that"
	370	We can match different character strings with the \fBalternation\fR
	371	metacharacter \f(CW'\|'\fR. To match \f(CW\(C`dog\(C'\fR or \f(CW\(C`cat\(C'\fR, we form the regex
	372	\&\f(CW\(C`dog\|cat\(C'\fR. As before, perl will try to match the regex at the
	373	earliest possible point in the string. At each character position,
	374	perl will first try to match the first alternative, \f(CW\(C`dog\(C'\fR. If
	375	\&\f(CW\(C`dog\(C'\fR doesn't match, perl will then try the next alternative, \f(CW\(C`cat\(C'\fR.
	376	If \f(CW\(C`cat\(C'\fR doesn't match either, then the match fails and perl moves to
	377	the next position in the string. Some examples:
	378	.PP
	379	.Vb 2
	380	\& "cats and dogs" =~ /cat\|dog\|bird/; # matches "cat"
	381	\& "cats and dogs" =~ /dog\|cat\|bird/; # matches "cat"
	382	.Ve
	383	.PP
	384	Even though \f(CW\(C`dog\(C'\fR is the first alternative in the second regex,
	385	\&\f(CW\(C`cat\(C'\fR is able to match earlier in the string.
	386	.PP
	387	.Vb 2
	388	\& "cats" =~ /c\|ca\|cat\|cats/; # matches "c"
	389	\& "cats" =~ /cats\|cat\|ca\|c/; # matches "cats"
	390	.Ve
	391	.PP
	392	At a given character position, the first alternative that allows the
	393	regex match to succeed will be the one that matches. Here, all the
	394	alternatives match at the first string position, so th first matches.
	395	.Sh "Grouping things and hierarchical matching"
	396	.IX Subsection "Grouping things and hierarchical matching"
	397	The \fBgrouping\fR metacharacters \f(CW\(C`()\(C'\fR allow a part of a regex to be
	398	treated as a single unit. Parts of a regex are grouped by enclosing
	399	them in parentheses. The regex \f(CW\(C`house(cat\|keeper)\(C'\fR means match
	400	\&\f(CW\(C`house\(C'\fR followed by either \f(CW\(C`cat\(C'\fR or \f(CW\(C`keeper\(C'\fR. Some more examples
	401	are
	402	.PP
	403	.Vb 2
	404	\& /(a\|b)b/; # matches 'ab' or 'bb'
	405	\& /(^a\|b)c/; # matches 'ac' at start of string or 'bc' anywhere
	406	.Ve
	407	.PP
	408	.Vb 3
	409	\& /house(cat\|)/; # matches either 'housecat' or 'house'
	410	\& /house(cat(s\|)\|)/; # matches either 'housecats' or 'housecat' or
	411	\& # 'house'. Note groups can be nested.
	412	.Ve
	413	.PP
	414	.Vb 2
	415	\& "20" =~ /(19\|20\|)\ed\ed/; # matches the null alternative '()\ed\ed',
	416	\& # because '20\ed\ed' can't match
	417	.Ve
	418	.Sh "Extracting matches"
	419	.IX Subsection "Extracting matches"
	420	The grouping metacharacters \f(CW\(C`()\(C'\fR also allow the extraction of the
	421	parts of a string that matched. For each grouping, the part that
	422	matched inside goes into the special variables \f(CW$1\fR, \f(CW$2\fR, etc.
	423	They can be used just as ordinary variables:
	424	.PP
	425	.Vb 5
	426	\& # extract hours, minutes, seconds
	427	\& $time =~ /(\ed\ed):(\ed\ed):(\ed\ed)/; # match hh:mm:ss format
	428	\& $hours = $1;
	429	\& $minutes = $2;
	430	\& $seconds = $3;
	431	.Ve
	432	.PP
	433	In list context, a match \f(CW\(C`/regex/\(C'\fR with groupings will return the
	434	list of matched values \f(CW\(C`($1,$2,...)\(C'\fR. So we could rewrite it as
	435	.PP
	436	.Vb 1
	437	\& ($hours, $minutes, $second) = ($time =~ /(\ed\ed):(\ed\ed):(\ed\ed)/);
	438	.Ve
	439	.PP
	440	If the groupings in a regex are nested, \f(CW$1\fR gets the group with the
	441	leftmost opening parenthesis, \f(CW$2\fR the next opening parenthesis,
	442	etc. For example, here is a complex regex and the matching variables
	443	indicated below it:
	444	.PP
	445	.Vb 2
	446	\& /(ab(cd\|ef)((gi)\|j))/;
	447	\& 1 2 34
	448	.Ve
	449	.PP
	450	Associated with the matching variables \f(CW$1\fR, \f(CW$2\fR, ... are
	451	the \fBbackreferences\fR \f(CW\(C`\e1\(C'\fR, \f(CW\(C`\e2\(C'\fR, ... Backreferences are
	452	matching variables that can be used \fIinside\fR a regex:
	453	.PP
	454	.Vb 1
	455	\& /(\ew\ew\ew)\es\e1/; # find sequences like 'the the' in string
	456	.Ve
	457	.PP
	458	\&\f(CW$1\fR, \f(CW$2\fR, ... should only be used outside of a regex, and \f(CW\(C`\e1\(C'\fR,
	459	\&\f(CW\(C`\e2\(C'\fR, ... only inside a regex.
	460	.Sh "Matching repetitions"
	461	.IX Subsection "Matching repetitions"
	462	The \fBquantifier\fR metacharacters \f(CW\(C`?\(C'\fR, \f(CW\(C`\(C'\fR, \f(CW\(C`+\(C'\fR, and \f(CW\(C`{}\*(C'\fR allow us
	463	to determine the number of repeats of a portion of a regex we
	464	consider to be a match. Quantifiers are put immediately after the
	465	character, character class, or grouping that we want to specify. They
	466	have the following meanings:
	467	.IP "\(bu" 4
	468	\&\f(CW\(C`a?\(C'\fR = match 'a' 1 or 0 times
	469	.IP "\(bu" 4
	470	\&\f(CW\(C`a\*(C'\fR = match 'a' 0 or more times, i.e., any number of times
	471	.IP "\(bu" 4
	472	\&\f(CW\(C`a+\(C'\fR = match 'a' 1 or more times, i.e., at least once
	473	.IP "\(bu" 4
	474	\&\f(CW\(C`a{n,m}\(C'\fR = match at least \f(CW\(C`n\(C'\fR times, but not more than \f(CW\(C`m\(C'\fR
	475	times.
	476	.IP "\(bu" 4
	477	\&\f(CW\(C`a{n,}\(C'\fR = match at least \f(CW\(C`n\(C'\fR or more times
	478	.IP "\(bu" 4
	479	\&\f(CW\(C`a{n}\(C'\fR = match exactly \f(CW\(C`n\(C'\fR times
	480	.PP
	481	Here are some examples:
	482	.PP
	483	.Vb 6
	484	\& /[a-z]+\es+\ed*/; # match a lowercase word, at least some space, and
	485	\& # any number of digits
	486	\& /(\ew+)\es+\e1/; # match doubled words of arbitrary length
	487	\& $year =~ /\ed{2,4}/; # make sure year is at least 2 but not more
	488	\& # than 4 digits
	489	\& $year =~ /\ed{4}\|\ed{2}/; # better match; throw out 3 digit dates
	490	.Ve
	491	.PP
	492	These quantifiers will try to match as much of the string as possible,
	493	while still allowing the regex to match. So we have
	494	.PP
	495	.Vb 5
	496	\& $x = 'the cat in the hat';
	497	\& $x =~ /^(.)(at)(.)$/; # matches,
	498	\& # $1 = 'the cat in the h'
	499	\& # $2 = 'at'
	500	\& # $3 = '' (0 matches)
	501	.Ve
	502	.PP
	503	The first quantifier \f(CW\(C`.\*(C'\fR grabs as much of the string as possible
	504	while still having the regex match. The second quantifier \f(CW\(C`.\*(C'\fR has
	505	no string left to it, so it matches 0 times.
	506	.Sh "More matching"
	507	.IX Subsection "More matching"
	508	There are a few more things you might want to know about matching
	509	operators. In the code
	510	.PP
	511	.Vb 4
	512	\& $pattern = 'Seuss';
	513	\& while (<>) {
	514	\& print if /$pattern/;
	515	\& }
	516	.Ve
	517	.PP
	518	perl has to re-evaluate \f(CW$pattern\fR each time through the loop. If
	519	\&\f(CW$pattern\fR won't be changing, use the \f(CW\(C`//o\(C'\fR modifier, to only
	520	perform variable substitutions once. If you don't want any
	521	substitutions at all, use the special delimiter \f(CW\(C`m''\(C'\fR:
	522	.PP
	523	.Vb 2
	524	\& $pattern = 'Seuss';
	525	\& m'$pattern'; # matches '$pattern', not 'Seuss'
	526	.Ve
	527	.PP
	528	The global modifier \f(CW\(C`//g\(C'\fR allows the matching operator to match
	529	within a string as many times as possible. In scalar context,
	530	successive matches against a string will have \f(CW\(C`//g\(C'\fR jump from match
	531	to match, keeping track of position in the string as it goes along.
	532	You can get or set the position with the \f(CW\(C`pos()\(C'\fR function.
	533	For example,
	534	.PP
	535	.Vb 4
	536	\& $x = "cat dog house"; # 3 words
	537	\& while ($x =~ /(\ew+)/g) {
	538	\& print "Word is $1, ends at position ", pos $x, "\en";
	539	\& }
	540	.Ve
	541	.PP
	542	prints
	543	.PP
	544	.Vb 3
	545	\& Word is cat, ends at position 3
	546	\& Word is dog, ends at position 7
	547	\& Word is house, ends at position 13
	548	.Ve
	549	.PP
	550	A failed match or changing the target string resets the position. If
	551	you don't want the position reset after failure to match, add the
	552	\&\f(CW\(C`//c\(C'\fR, as in \f(CW\(C`/regex/gc\(C'\fR.
	553	.PP
	554	In list context, \f(CW\(C`//g\(C'\fR returns a list of matched groupings, or if
	555	there are no groupings, a list of matches to the whole regex. So
	556	.PP
	557	.Vb 4
	558	\& @words = ($x =~ /(\ew+)/g); # matches,
	559	\& # $word[0] = 'cat'
	560	\& # $word[1] = 'dog'
	561	\& # $word[2] = 'house'
	562	.Ve
	563	.Sh "Search and replace"
	564	.IX Subsection "Search and replace"
	565	Search and replace is performed using \f(CW\(C`s/regex/replacement/modifiers\(C'\fR.
	566	The \f(CW\(C`replacement\(C'\fR is a Perl double quoted string that replaces in the
	567	string whatever is matched with the \f(CW\(C`regex\(C'\fR. The operator \f(CW\(C`=~\(C'\fR is
	568	also used here to associate a string with \f(CW\(C`s///\(C'\fR. If matching
	569	against \f(CW$_\fR, the \f(CW\(C`$_\ =~\(C'\fR\ can be dropped. If there is a match,
	570	\&\f(CW\(C`s///\(C'\fR returns the number of substitutions made, otherwise it returns
	571	false. Here are a few examples:
	572	.PP
	573	.Vb 5
	574	\& $x = "Time to feed the cat!";
	575	\& $x =~ s/cat/hacker/; # $x contains "Time to feed the hacker!"
	576	\& $y = "'quoted words'";
	577	\& $y =~ s/^'(.*)'$/$1/; # strip single quotes,
	578	\& # $y contains "quoted words"
	579	.Ve
	580	.PP
	581	With the \f(CW\(C`s///\(C'\fR operator, the matched variables \f(CW$1\fR, \f(CW$2\fR, etc.
	582	are immediately available for use in the replacement expression. With
	583	the global modifier, \f(CW\(C`s///g\(C'\fR will search and replace all occurrences
	584	of the regex in the string:
	585	.PP
	586	.Vb 4
	587	\& $x = "I batted 4 for 4";
	588	\& $x =~ s/4/four/; # $x contains "I batted four for 4"
	589	\& $x = "I batted 4 for 4";
	590	\& $x =~ s/4/four/g; # $x contains "I batted four for four"
	591	.Ve
	592	.PP
	593	The evaluation modifier \f(CW\(C`s///e\(C'\fR wraps an \f(CW\(C`eval{...}\(C'\fR around the
	594	replacement string and the evaluated result is substituted for the
	595	matched substring. Some examples:
	596	.PP
	597	.Vb 3
	598	\& # reverse all the words in a string
	599	\& $x = "the cat in the hat";
	600	\& $x =~ s/(\ew+)/reverse $1/ge; # $x contains "eht tac ni eht tah"
	601	.Ve
	602	.PP
	603	.Vb 3
	604	\& # convert percentage to decimal
	605	\& $x = "A 39% hit rate";
	606	\& $x =~ s!(\ed+)%!$1/100!e; # $x contains "A 0.39 hit rate"
	607	.Ve
	608	.PP
	609	The last example shows that \f(CW\(C`s///\(C'\fR can use other delimiters, such as
	610	\&\f(CW\(C`s!!!\(C'\fR and \f(CW\(C`s{}{}\(C'\fR, and even \f(CW\(C`s{}//\(C'\fR. If single quotes are used
	611	\&\f(CW\(C`s'''\(C'\fR, then the regex and replacement are treated as single quoted
	612	strings.
	613	.Sh "The split operator"
	614	.IX Subsection "The split operator"
	615	\&\f(CW\(C`split /regex/, string\(C'\fR splits \f(CW\(C`string\(C'\fR into a list of substrings
	616	and returns that list. The regex determines the character sequence
	617	that \f(CW\(C`string\(C'\fR is split with respect to. For example, to split a
	618	string into words, use
	619	.PP
	620	.Vb 4
	621	\& $x = "Calvin and Hobbes";
	622	\& @word = split /\es+/, $x; # $word[0] = 'Calvin'
	623	\& # $word[1] = 'and'
	624	\& # $word[2] = 'Hobbes'
	625	.Ve
	626	.PP
	627	To extract a comma-delimited list of numbers, use
	628	.PP
	629	.Vb 4
	630	\& $x = "1.618,2.718, 3.142";
	631	\& @const = split /,\es*/, $x; # $const[0] = '1.618'
	632	\& # $const[1] = '2.718'
	633	\& # $const[2] = '3.142'
	634	.Ve
	635	.PP
	636	If the empty regex \f(CW\(C`//\(C'\fR is used, the string is split into individual
	637	characters. If the regex has groupings, then list produced contains
	638	the matched substrings from the groupings as well:
	639	.PP
	640	.Vb 6
	641	\& $x = "/usr/bin";
	642	\& @parts = split m!(/)!, $x; # $parts[0] = ''
	643	\& # $parts[1] = '/'
	644	\& # $parts[2] = 'usr'
	645	\& # $parts[3] = '/'
	646	\& # $parts[4] = 'bin'
	647	.Ve
	648	.PP
	649	Since the first character of \f(CW$x\fR matched the regex, \f(CW\(C`split\(C'\fR prepended
	650	an empty initial element to the list.
	651	.SH "BUGS"
	652	.IX Header "BUGS"
	653	None.
	654	.SH "SEE ALSO"
	655	.IX Header "SEE ALSO"
	656	This is just a quick start guide. For a more in-depth tutorial on
	657	regexes, see perlretut and for the reference page, see perlre.
	658	.SH "AUTHOR AND COPYRIGHT"
	659	.IX Header "AUTHOR AND COPYRIGHT"
	660	Copyright (c) 2000 Mark Kvale
	661	All rights reserved.
	662	.PP
	663	This document may be distributed under the same terms as Perl itself.
	664	.Sh "Acknowledgments"
	665	.IX Subsection "Acknowledgments"
	666	The author would like to thank Mark-Jason Dominus, Tom Christiansen,
	667	Ilya Zakharevich, Brad Hughes, and Mike Giroux for all their helpful
	668	comments.