Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / man / man1 / perlrequick.1
CommitLineData
86530b38
AT
1.\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13
2.\"
3.\" Standard preamble:
4.\" ========================================================================
5.de Sh \" Subsection heading
6.br
7.if t .Sp
8.ne 5
9.PP
10\fB\\$1\fR
11.PP
12..
13.de Sp \" Vertical space (when we can't use .PP)
14.if t .sp .5v
15.if n .sp
16..
17.de Vb \" Begin verbatim text
18.ft CW
19.nf
20.ne \\$1
21..
22.de Ve \" End verbatim text
23.ft R
24.fi
25..
26.\" Set up some character translations and predefined strings. \*(-- will
27.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
28.\" double quote, and \*(R" will give a right double quote. | will give a
29.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
30.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
31.\" expand to `' in nroff, nothing in troff, for use with C<>.
32.tr \(*W-|\(bv\*(Tr
33.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
34.ie n \{\
35. ds -- \(*W-
36. ds PI pi
37. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
38. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
39. ds L" ""
40. ds R" ""
41. ds C` ""
42. ds C' ""
43'br\}
44.el\{\
45. ds -- \|\(em\|
46. ds PI \(*p
47. ds L" ``
48. ds R" ''
49'br\}
50.\"
51.\" If the F register is turned on, we'll generate index entries on stderr for
52.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
53.\" entries marked with X<> in POD. Of course, you'll have to process the
54.\" output yourself in some meaningful fashion.
55.if \nF \{\
56. de IX
57. tm Index:\\$1\t\\n%\t"\\$2"
58..
59. nr % 0
60. rr F
61.\}
62.\"
63.\" For nroff, turn off justification. Always turn off hyphenation; it makes
64.\" way too many mistakes in technical documents.
65.hy 0
66.if n .na
67.\"
68.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
69.\" Fear. Run. Save yourself. No user-serviceable parts.
70. \" fudge factors for nroff and troff
71.if n \{\
72. ds #H 0
73. ds #V .8m
74. ds #F .3m
75. ds #[ \f1
76. ds #] \fP
77.\}
78.if t \{\
79. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
80. ds #V .6m
81. ds #F 0
82. ds #[ \&
83. ds #] \&
84.\}
85. \" simple accents for nroff and troff
86.if n \{\
87. ds ' \&
88. ds ` \&
89. ds ^ \&
90. ds , \&
91. ds ~ ~
92. ds /
93.\}
94.if t \{\
95. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
96. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
97. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
98. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
99. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
100. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
101.\}
102. \" troff and (daisy-wheel) nroff accents
103.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
104.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
105.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
106.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
107.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
108.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
109.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
110.ds ae a\h'-(\w'a'u*4/10)'e
111.ds Ae A\h'-(\w'A'u*4/10)'E
112. \" corrections for vroff
113.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
114.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
115. \" for low resolution devices (crt and lpr)
116.if \n(.H>23 .if \n(.V>19 \
117\{\
118. ds : e
119. ds 8 ss
120. ds o a
121. ds d- d\h'-1'\(ga
122. ds D- D\h'-1'\(hy
123. ds th \o'bp'
124. ds Th \o'LP'
125. ds ae ae
126. ds Ae AE
127.\}
128.rm #[ #] #H #V #F C
129.\" ========================================================================
130.\"
131.IX Title "PERLREQUICK 1"
132.TH PERLREQUICK 1 "2002-06-08" "perl v5.8.0" "Perl Programmers Reference Guide"
133.SH "NAME"
134perlrequick \- Perl regular expressions quick start
135.SH "DESCRIPTION"
136.IX Header "DESCRIPTION"
137This page covers the very basics of understanding, creating and
138using regular expressions ('regexes') in Perl.
139.SH "The Guide"
140.IX Header "The Guide"
141.Sh "Simple word matching"
142.IX Subsection "Simple word matching"
143The simplest regex is simply a word, or more generally, a string of
144characters. A regex consisting of a word matches any string that
145contains that word:
146.PP
147.Vb 1
148\& "Hello World" =~ /World/; # matches
149.Ve
150.PP
151In this statement, \f(CW\*(C`World\*(C'\fR is a regex and the \f(CW\*(C`//\*(C'\fR enclosing
152\&\f(CW\*(C`/World/\*(C'\fR tells perl to search a string for a match. The operator
153\&\f(CW\*(C`=~\*(C'\fR associates the string with the regex match and produces a true
154value if the regex matched, or false if the regex did not match. In
155our case, \f(CW\*(C`World\*(C'\fR matches the second word in \f(CW"Hello World"\fR, so the
156expression is true. This idea has several variations.
157.PP
158Expressions like this are useful in conditionals:
159.PP
160.Vb 1
161\& print "It matches\en" if "Hello World" =~ /World/;
162.Ve
163.PP
164The sense of the match can be reversed by using \f(CW\*(C`!~\*(C'\fR operator:
165.PP
166.Vb 1
167\& print "It doesn't match\en" if "Hello World" !~ /World/;
168.Ve
169.PP
170The literal string in the regex can be replaced by a variable:
171.PP
172.Vb 2
173\& $greeting = "World";
174\& print "It matches\en" if "Hello World" =~ /$greeting/;
175.Ve
176.PP
177If you're matching against \f(CW$_\fR, the \f(CW\*(C`$_ =~\*(C'\fR part can be omitted:
178.PP
179.Vb 2
180\& $_ = "Hello World";
181\& print "It matches\en" if /World/;
182.Ve
183.PP
184Finally, the \f(CW\*(C`//\*(C'\fR default delimiters for a match can be changed to
185arbitrary delimiters by putting an \f(CW'm'\fR out front:
186.PP
187.Vb 4
188\& "Hello World" =~ m!World!; # matches, delimited by '!'
189\& "Hello World" =~ m{World}; # matches, note the matching '{}'
190\& "/usr/bin/perl" =~ m"/perl"; # matches after '/usr/bin',
191\& # '/' becomes an ordinary char
192.Ve
193.PP
194Regexes must match a part of the string \fIexactly\fR in order for the
195statement to be true:
196.PP
197.Vb 3
198\& "Hello World" =~ /world/; # doesn't match, case sensitive
199\& "Hello World" =~ /o W/; # matches, ' ' is an ordinary char
200\& "Hello World" =~ /World /; # doesn't match, no ' ' at end
201.Ve
202.PP
203perl will always match at the earliest possible point in the string:
204.PP
205.Vb 2
206\& "Hello World" =~ /o/; # matches 'o' in 'Hello'
207\& "That hat is red" =~ /hat/; # matches 'hat' in 'That'
208.Ve
209.PP
210Not all characters can be used 'as is' in a match. Some characters,
211called \fBmetacharacters\fR, are reserved for use in regex notation.
212The metacharacters are
213.PP
214.Vb 1
215\& {}[]()^$.|*+?\e
216.Ve
217.PP
218A metacharacter can be matched by putting a backslash before it:
219.PP
220.Vb 4
221\& "2+2=4" =~ /2+2/; # doesn't match, + is a metacharacter
222\& "2+2=4" =~ /2\e+2/; # matches, \e+ is treated like an ordinary +
223\& 'C:\eWIN32' =~ /C:\e\eWIN/; # matches
224\& "/usr/bin/perl" =~ /\e/usr\e/local\e/bin\e/perl/; # matches
225.Ve
226.PP
227In the last regex, the forward slash \f(CW'/'\fR is also backslashed,
228because it is used to delimit the regex.
229.PP
230Non-printable \s-1ASCII\s0 characters are represented by \fBescape sequences\fR.
231Common examples are \f(CW\*(C`\et\*(C'\fR for a tab, \f(CW\*(C`\en\*(C'\fR for a newline, and \f(CW\*(C`\er\*(C'\fR
232for a carriage return. Arbitrary bytes are represented by octal
233escape sequences, e.g., \f(CW\*(C`\e033\*(C'\fR, or hexadecimal escape sequences,
234e.g., \f(CW\*(C`\ex1B\*(C'\fR:
235.PP
236.Vb 2
237\& "1000\et2000" =~ m(0\et2) # matches
238\& "cat" =~ /\e143\ex61\ex74/ # matches, but a weird way to spell cat
239.Ve
240.PP
241Regexes are treated mostly as double quoted strings, so variable
242substitution works:
243.PP
244.Vb 3
245\& $foo = 'house';
246\& 'cathouse' =~ /cat$foo/; # matches
247\& 'housecat' =~ /${foo}cat/; # matches
248.Ve
249.PP
250With all of the regexes above, if the regex matched anywhere in the
251string, it was considered a match. To specify \fIwhere\fR it should
252match, we would use the \fBanchor\fR metacharacters \f(CW\*(C`^\*(C'\fR and \f(CW\*(C`$\*(C'\fR. The
253anchor \f(CW\*(C`^\*(C'\fR means match at the beginning of the string and the anchor
254\&\f(CW\*(C`$\*(C'\fR means match at the end of the string, or before a newline at the
255end of the string. Some examples:
256.PP
257.Vb 5
258\& "housekeeper" =~ /keeper/; # matches
259\& "housekeeper" =~ /^keeper/; # doesn't match
260\& "housekeeper" =~ /keeper$/; # matches
261\& "housekeeper\en" =~ /keeper$/; # matches
262\& "housekeeper" =~ /^housekeeper$/; # matches
263.Ve
264.Sh "Using character classes"
265.IX Subsection "Using character classes"
266A \fBcharacter class\fR allows a set of possible characters, rather than
267just a single character, to match at a particular point in a regex.
268Character classes are denoted by brackets \f(CW\*(C`[...]\*(C'\fR, with the set of
269characters to be possibly matched inside. Here are some examples:
270.PP
271.Vb 3
272\& /cat/; # matches 'cat'
273\& /[bcr]at/; # matches 'bat', 'cat', or 'rat'
274\& "abc" =~ /[cab]/; # matches 'a'
275.Ve
276.PP
277In the last statement, even though \f(CW'c'\fR is the first character in
278the class, the earliest point at which the regex can match is \f(CW'a'\fR.
279.PP
280.Vb 3
281\& /[yY][eE][sS]/; # match 'yes' in a case-insensitive way
282\& # 'yes', 'Yes', 'YES', etc.
283\& /yes/i; # also match 'yes' in a case-insensitive way
284.Ve
285.PP
286The last example shows a match with an \f(CW'i'\fR \fBmodifier\fR, which makes
287the match case\-insensitive.
288.PP
289Character classes also have ordinary and special characters, but the
290sets of ordinary and special characters inside a character class are
291different than those outside a character class. The special
292characters for a character class are \f(CW\*(C`\-]\e^$\*(C'\fR and are matched using an
293escape:
294.PP
295.Vb 5
296\& /[\e]c]def/; # matches ']def' or 'cdef'
297\& $x = 'bcr';
298\& /[$x]at/; # matches 'bat, 'cat', or 'rat'
299\& /[\e$x]at/; # matches '$at' or 'xat'
300\& /[\e\e$x]at/; # matches '\eat', 'bat, 'cat', or 'rat'
301.Ve
302.PP
303The special character \f(CW'\-'\fR acts as a range operator within character
304classes, so that the unwieldy \f(CW\*(C`[0123456789]\*(C'\fR and \f(CW\*(C`[abc...xyz]\*(C'\fR
305become the svelte \f(CW\*(C`[0\-9]\*(C'\fR and \f(CW\*(C`[a\-z]\*(C'\fR:
306.PP
307.Vb 2
308\& /item[0-9]/; # matches 'item0' or ... or 'item9'
309\& /[0-9a-fA-F]/; # matches a hexadecimal digit
310.Ve
311.PP
312If \f(CW'\-'\fR is the first or last character in a character class, it is
313treated as an ordinary character.
314.PP
315The special character \f(CW\*(C`^\*(C'\fR in the first position of a character class
316denotes a \fBnegated character class\fR, which matches any character but
317those in the brackets. Both \f(CW\*(C`[...]\*(C'\fR and \f(CW\*(C`[^...]\*(C'\fR must match a
318character, or the match fails. Then
319.PP
320.Vb 4
321\& /[^a]at/; # doesn't match 'aat' or 'at', but matches
322\& # all other 'bat', 'cat, '0at', '%at', etc.
323\& /[^0-9]/; # matches a non-numeric character
324\& /[a^]at/; # matches 'aat' or '^at'; here '^' is ordinary
325.Ve
326.PP
327Perl has several abbreviations for common character classes:
328.IP "\(bu" 4
329\&\ed is a digit and represents [0\-9]
330.IP "\(bu" 4
331\&\es is a whitespace character and represents [\e \et\er\en\ef]
332.IP "\(bu" 4
333\&\ew is a word character (alphanumeric or _) and represents [0\-9a\-zA\-Z_]
334.IP "\(bu" 4
335\&\eD is a negated \ed; it represents any character but a digit [^0\-9]
336.IP "\(bu" 4
337\&\eS is a negated \es; it represents any non-whitespace character [^\es]
338.IP "\(bu" 4
339\&\eW is a negated \ew; it represents any non-word character [^\ew]
340.IP "\(bu" 4
341The period '.' matches any character but \*(L"\en\*(R"
342.PP
343The \f(CW\*(C`\ed\es\ew\eD\eS\eW\*(C'\fR abbreviations can be used both inside and outside
344of character classes. Here are some in use:
345.PP
346.Vb 7
347\& /\ed\ed:\ed\ed:\ed\ed/; # matches a hh:mm:ss time format
348\& /[\ed\es]/; # matches any digit or whitespace character
349\& /\ew\eW\ew/; # matches a word char, followed by a
350\& # non-word char, followed by a word char
351\& /..rt/; # matches any two chars, followed by 'rt'
352\& /end\e./; # matches 'end.'
353\& /end[.]/; # same thing, matches 'end.'
354.Ve
355.PP
356The \fBword\ anchor\fR\ \f(CW\*(C`\eb\*(C'\fR matches a boundary between a word
357character and a non-word character \f(CW\*(C`\ew\eW\*(C'\fR or \f(CW\*(C`\eW\ew\*(C'\fR:
358.PP
359.Vb 4
360\& $x = "Housecat catenates house and cat";
361\& $x =~ /\ebcat/; # matches cat in 'catenates'
362\& $x =~ /cat\eb/; # matches cat in 'housecat'
363\& $x =~ /\ebcat\eb/; # matches 'cat' at end of string
364.Ve
365.PP
366In the last example, the end of the string is considered a word
367boundary.
368.Sh "Matching this or that"
369.IX Subsection "Matching this or that"
370We can match different character strings with the \fBalternation\fR
371metacharacter \f(CW'|'\fR. To match \f(CW\*(C`dog\*(C'\fR or \f(CW\*(C`cat\*(C'\fR, we form the regex
372\&\f(CW\*(C`dog|cat\*(C'\fR. As before, perl will try to match the regex at the
373earliest possible point in the string. At each character position,
374perl will first try to match the first alternative, \f(CW\*(C`dog\*(C'\fR. If
375\&\f(CW\*(C`dog\*(C'\fR doesn't match, perl will then try the next alternative, \f(CW\*(C`cat\*(C'\fR.
376If \f(CW\*(C`cat\*(C'\fR doesn't match either, then the match fails and perl moves to
377the next position in the string. Some examples:
378.PP
379.Vb 2
380\& "cats and dogs" =~ /cat|dog|bird/; # matches "cat"
381\& "cats and dogs" =~ /dog|cat|bird/; # matches "cat"
382.Ve
383.PP
384Even though \f(CW\*(C`dog\*(C'\fR is the first alternative in the second regex,
385\&\f(CW\*(C`cat\*(C'\fR is able to match earlier in the string.
386.PP
387.Vb 2
388\& "cats" =~ /c|ca|cat|cats/; # matches "c"
389\& "cats" =~ /cats|cat|ca|c/; # matches "cats"
390.Ve
391.PP
392At a given character position, the first alternative that allows the
393regex match to succeed will be the one that matches. Here, all the
394alternatives match at the first string position, so th first matches.
395.Sh "Grouping things and hierarchical matching"
396.IX Subsection "Grouping things and hierarchical matching"
397The \fBgrouping\fR metacharacters \f(CW\*(C`()\*(C'\fR allow a part of a regex to be
398treated as a single unit. Parts of a regex are grouped by enclosing
399them in parentheses. The regex \f(CW\*(C`house(cat|keeper)\*(C'\fR means match
400\&\f(CW\*(C`house\*(C'\fR followed by either \f(CW\*(C`cat\*(C'\fR or \f(CW\*(C`keeper\*(C'\fR. Some more examples
401are
402.PP
403.Vb 2
404\& /(a|b)b/; # matches 'ab' or 'bb'
405\& /(^a|b)c/; # matches 'ac' at start of string or 'bc' anywhere
406.Ve
407.PP
408.Vb 3
409\& /house(cat|)/; # matches either 'housecat' or 'house'
410\& /house(cat(s|)|)/; # matches either 'housecats' or 'housecat' or
411\& # 'house'. Note groups can be nested.
412.Ve
413.PP
414.Vb 2
415\& "20" =~ /(19|20|)\ed\ed/; # matches the null alternative '()\ed\ed',
416\& # because '20\ed\ed' can't match
417.Ve
418.Sh "Extracting matches"
419.IX Subsection "Extracting matches"
420The grouping metacharacters \f(CW\*(C`()\*(C'\fR also allow the extraction of the
421parts of a string that matched. For each grouping, the part that
422matched inside goes into the special variables \f(CW$1\fR, \f(CW$2\fR, etc.
423They can be used just as ordinary variables:
424.PP
425.Vb 5
426\& # extract hours, minutes, seconds
427\& $time =~ /(\ed\ed):(\ed\ed):(\ed\ed)/; # match hh:mm:ss format
428\& $hours = $1;
429\& $minutes = $2;
430\& $seconds = $3;
431.Ve
432.PP
433In list context, a match \f(CW\*(C`/regex/\*(C'\fR with groupings will return the
434list of matched values \f(CW\*(C`($1,$2,...)\*(C'\fR. So we could rewrite it as
435.PP
436.Vb 1
437\& ($hours, $minutes, $second) = ($time =~ /(\ed\ed):(\ed\ed):(\ed\ed)/);
438.Ve
439.PP
440If the groupings in a regex are nested, \f(CW$1\fR gets the group with the
441leftmost opening parenthesis, \f(CW$2\fR the next opening parenthesis,
442etc. For example, here is a complex regex and the matching variables
443indicated below it:
444.PP
445.Vb 2
446\& /(ab(cd|ef)((gi)|j))/;
447\& 1 2 34
448.Ve
449.PP
450Associated with the matching variables \f(CW$1\fR, \f(CW$2\fR, ... are
451the \fBbackreferences\fR \f(CW\*(C`\e1\*(C'\fR, \f(CW\*(C`\e2\*(C'\fR, ... Backreferences are
452matching variables that can be used \fIinside\fR a regex:
453.PP
454.Vb 1
455\& /(\ew\ew\ew)\es\e1/; # find sequences like 'the the' in string
456.Ve
457.PP
458\&\f(CW$1\fR, \f(CW$2\fR, ... should only be used outside of a regex, and \f(CW\*(C`\e1\*(C'\fR,
459\&\f(CW\*(C`\e2\*(C'\fR, ... only inside a regex.
460.Sh "Matching repetitions"
461.IX Subsection "Matching repetitions"
462The \fBquantifier\fR metacharacters \f(CW\*(C`?\*(C'\fR, \f(CW\*(C`*\*(C'\fR, \f(CW\*(C`+\*(C'\fR, and \f(CW\*(C`{}\*(C'\fR allow us
463to determine the number of repeats of a portion of a regex we
464consider to be a match. Quantifiers are put immediately after the
465character, character class, or grouping that we want to specify. They
466have the following meanings:
467.IP "\(bu" 4
468\&\f(CW\*(C`a?\*(C'\fR = match 'a' 1 or 0 times
469.IP "\(bu" 4
470\&\f(CW\*(C`a*\*(C'\fR = match 'a' 0 or more times, i.e., any number of times
471.IP "\(bu" 4
472\&\f(CW\*(C`a+\*(C'\fR = match 'a' 1 or more times, i.e., at least once
473.IP "\(bu" 4
474\&\f(CW\*(C`a{n,m}\*(C'\fR = match at least \f(CW\*(C`n\*(C'\fR times, but not more than \f(CW\*(C`m\*(C'\fR
475times.
476.IP "\(bu" 4
477\&\f(CW\*(C`a{n,}\*(C'\fR = match at least \f(CW\*(C`n\*(C'\fR or more times
478.IP "\(bu" 4
479\&\f(CW\*(C`a{n}\*(C'\fR = match exactly \f(CW\*(C`n\*(C'\fR times
480.PP
481Here are some examples:
482.PP
483.Vb 6
484\& /[a-z]+\es+\ed*/; # match a lowercase word, at least some space, and
485\& # any number of digits
486\& /(\ew+)\es+\e1/; # match doubled words of arbitrary length
487\& $year =~ /\ed{2,4}/; # make sure year is at least 2 but not more
488\& # than 4 digits
489\& $year =~ /\ed{4}|\ed{2}/; # better match; throw out 3 digit dates
490.Ve
491.PP
492These quantifiers will try to match as much of the string as possible,
493while still allowing the regex to match. So we have
494.PP
495.Vb 5
496\& $x = 'the cat in the hat';
497\& $x =~ /^(.*)(at)(.*)$/; # matches,
498\& # $1 = 'the cat in the h'
499\& # $2 = 'at'
500\& # $3 = '' (0 matches)
501.Ve
502.PP
503The first quantifier \f(CW\*(C`.*\*(C'\fR grabs as much of the string as possible
504while still having the regex match. The second quantifier \f(CW\*(C`.*\*(C'\fR has
505no string left to it, so it matches 0 times.
506.Sh "More matching"
507.IX Subsection "More matching"
508There are a few more things you might want to know about matching
509operators. In the code
510.PP
511.Vb 4
512\& $pattern = 'Seuss';
513\& while (<>) {
514\& print if /$pattern/;
515\& }
516.Ve
517.PP
518perl has to re-evaluate \f(CW$pattern\fR each time through the loop. If
519\&\f(CW$pattern\fR won't be changing, use the \f(CW\*(C`//o\*(C'\fR modifier, to only
520perform variable substitutions once. If you don't want any
521substitutions at all, use the special delimiter \f(CW\*(C`m''\*(C'\fR:
522.PP
523.Vb 2
524\& $pattern = 'Seuss';
525\& m'$pattern'; # matches '$pattern', not 'Seuss'
526.Ve
527.PP
528The global modifier \f(CW\*(C`//g\*(C'\fR allows the matching operator to match
529within a string as many times as possible. In scalar context,
530successive matches against a string will have \f(CW\*(C`//g\*(C'\fR jump from match
531to match, keeping track of position in the string as it goes along.
532You can get or set the position with the \f(CW\*(C`pos()\*(C'\fR function.
533For example,
534.PP
535.Vb 4
536\& $x = "cat dog house"; # 3 words
537\& while ($x =~ /(\ew+)/g) {
538\& print "Word is $1, ends at position ", pos $x, "\en";
539\& }
540.Ve
541.PP
542prints
543.PP
544.Vb 3
545\& Word is cat, ends at position 3
546\& Word is dog, ends at position 7
547\& Word is house, ends at position 13
548.Ve
549.PP
550A failed match or changing the target string resets the position. If
551you don't want the position reset after failure to match, add the
552\&\f(CW\*(C`//c\*(C'\fR, as in \f(CW\*(C`/regex/gc\*(C'\fR.
553.PP
554In list context, \f(CW\*(C`//g\*(C'\fR returns a list of matched groupings, or if
555there are no groupings, a list of matches to the whole regex. So
556.PP
557.Vb 4
558\& @words = ($x =~ /(\ew+)/g); # matches,
559\& # $word[0] = 'cat'
560\& # $word[1] = 'dog'
561\& # $word[2] = 'house'
562.Ve
563.Sh "Search and replace"
564.IX Subsection "Search and replace"
565Search and replace is performed using \f(CW\*(C`s/regex/replacement/modifiers\*(C'\fR.
566The \f(CW\*(C`replacement\*(C'\fR is a Perl double quoted string that replaces in the
567string whatever is matched with the \f(CW\*(C`regex\*(C'\fR. The operator \f(CW\*(C`=~\*(C'\fR is
568also used here to associate a string with \f(CW\*(C`s///\*(C'\fR. If matching
569against \f(CW$_\fR, the \f(CW\*(C`$_\ =~\*(C'\fR\ can be dropped. If there is a match,
570\&\f(CW\*(C`s///\*(C'\fR returns the number of substitutions made, otherwise it returns
571false. Here are a few examples:
572.PP
573.Vb 5
574\& $x = "Time to feed the cat!";
575\& $x =~ s/cat/hacker/; # $x contains "Time to feed the hacker!"
576\& $y = "'quoted words'";
577\& $y =~ s/^'(.*)'$/$1/; # strip single quotes,
578\& # $y contains "quoted words"
579.Ve
580.PP
581With the \f(CW\*(C`s///\*(C'\fR operator, the matched variables \f(CW$1\fR, \f(CW$2\fR, etc.
582are immediately available for use in the replacement expression. With
583the global modifier, \f(CW\*(C`s///g\*(C'\fR will search and replace all occurrences
584of the regex in the string:
585.PP
586.Vb 4
587\& $x = "I batted 4 for 4";
588\& $x =~ s/4/four/; # $x contains "I batted four for 4"
589\& $x = "I batted 4 for 4";
590\& $x =~ s/4/four/g; # $x contains "I batted four for four"
591.Ve
592.PP
593The evaluation modifier \f(CW\*(C`s///e\*(C'\fR wraps an \f(CW\*(C`eval{...}\*(C'\fR around the
594replacement string and the evaluated result is substituted for the
595matched substring. Some examples:
596.PP
597.Vb 3
598\& # reverse all the words in a string
599\& $x = "the cat in the hat";
600\& $x =~ s/(\ew+)/reverse $1/ge; # $x contains "eht tac ni eht tah"
601.Ve
602.PP
603.Vb 3
604\& # convert percentage to decimal
605\& $x = "A 39% hit rate";
606\& $x =~ s!(\ed+)%!$1/100!e; # $x contains "A 0.39 hit rate"
607.Ve
608.PP
609The last example shows that \f(CW\*(C`s///\*(C'\fR can use other delimiters, such as
610\&\f(CW\*(C`s!!!\*(C'\fR and \f(CW\*(C`s{}{}\*(C'\fR, and even \f(CW\*(C`s{}//\*(C'\fR. If single quotes are used
611\&\f(CW\*(C`s'''\*(C'\fR, then the regex and replacement are treated as single quoted
612strings.
613.Sh "The split operator"
614.IX Subsection "The split operator"
615\&\f(CW\*(C`split /regex/, string\*(C'\fR splits \f(CW\*(C`string\*(C'\fR into a list of substrings
616and returns that list. The regex determines the character sequence
617that \f(CW\*(C`string\*(C'\fR is split with respect to. For example, to split a
618string into words, use
619.PP
620.Vb 4
621\& $x = "Calvin and Hobbes";
622\& @word = split /\es+/, $x; # $word[0] = 'Calvin'
623\& # $word[1] = 'and'
624\& # $word[2] = 'Hobbes'
625.Ve
626.PP
627To extract a comma-delimited list of numbers, use
628.PP
629.Vb 4
630\& $x = "1.618,2.718, 3.142";
631\& @const = split /,\es*/, $x; # $const[0] = '1.618'
632\& # $const[1] = '2.718'
633\& # $const[2] = '3.142'
634.Ve
635.PP
636If the empty regex \f(CW\*(C`//\*(C'\fR is used, the string is split into individual
637characters. If the regex has groupings, then list produced contains
638the matched substrings from the groupings as well:
639.PP
640.Vb 6
641\& $x = "/usr/bin";
642\& @parts = split m!(/)!, $x; # $parts[0] = ''
643\& # $parts[1] = '/'
644\& # $parts[2] = 'usr'
645\& # $parts[3] = '/'
646\& # $parts[4] = 'bin'
647.Ve
648.PP
649Since the first character of \f(CW$x\fR matched the regex, \f(CW\*(C`split\*(C'\fR prepended
650an empty initial element to the list.
651.SH "BUGS"
652.IX Header "BUGS"
653None.
654.SH "SEE ALSO"
655.IX Header "SEE ALSO"
656This is just a quick start guide. For a more in-depth tutorial on
657regexes, see perlretut and for the reference page, see perlre.
658.SH "AUTHOR AND COPYRIGHT"
659.IX Header "AUTHOR AND COPYRIGHT"
660Copyright (c) 2000 Mark Kvale
661All rights reserved.
662.PP
663This document may be distributed under the same terms as Perl itself.
664.Sh "Acknowledgments"
665.IX Subsection "Acknowledgments"
666The author would like to thank Mark-Jason Dominus, Tom Christiansen,
667Ilya Zakharevich, Brad Hughes, and Mike Giroux for all their helpful
668comments.