Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / man / man3 / Text::Balanced.3
CommitLineData
86530b38
AT
1.\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13
2.\"
3.\" Standard preamble:
4.\" ========================================================================
5.de Sh \" Subsection heading
6.br
7.if t .Sp
8.ne 5
9.PP
10\fB\\$1\fR
11.PP
12..
13.de Sp \" Vertical space (when we can't use .PP)
14.if t .sp .5v
15.if n .sp
16..
17.de Vb \" Begin verbatim text
18.ft CW
19.nf
20.ne \\$1
21..
22.de Ve \" End verbatim text
23.ft R
24.fi
25..
26.\" Set up some character translations and predefined strings. \*(-- will
27.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
28.\" double quote, and \*(R" will give a right double quote. | will give a
29.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
30.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
31.\" expand to `' in nroff, nothing in troff, for use with C<>.
32.tr \(*W-|\(bv\*(Tr
33.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
34.ie n \{\
35. ds -- \(*W-
36. ds PI pi
37. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
38. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
39. ds L" ""
40. ds R" ""
41. ds C` ""
42. ds C' ""
43'br\}
44.el\{\
45. ds -- \|\(em\|
46. ds PI \(*p
47. ds L" ``
48. ds R" ''
49'br\}
50.\"
51.\" If the F register is turned on, we'll generate index entries on stderr for
52.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
53.\" entries marked with X<> in POD. Of course, you'll have to process the
54.\" output yourself in some meaningful fashion.
55.if \nF \{\
56. de IX
57. tm Index:\\$1\t\\n%\t"\\$2"
58..
59. nr % 0
60. rr F
61.\}
62.\"
63.\" For nroff, turn off justification. Always turn off hyphenation; it makes
64.\" way too many mistakes in technical documents.
65.hy 0
66.if n .na
67.\"
68.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
69.\" Fear. Run. Save yourself. No user-serviceable parts.
70. \" fudge factors for nroff and troff
71.if n \{\
72. ds #H 0
73. ds #V .8m
74. ds #F .3m
75. ds #[ \f1
76. ds #] \fP
77.\}
78.if t \{\
79. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
80. ds #V .6m
81. ds #F 0
82. ds #[ \&
83. ds #] \&
84.\}
85. \" simple accents for nroff and troff
86.if n \{\
87. ds ' \&
88. ds ` \&
89. ds ^ \&
90. ds , \&
91. ds ~ ~
92. ds /
93.\}
94.if t \{\
95. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
96. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
97. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
98. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
99. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
100. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
101.\}
102. \" troff and (daisy-wheel) nroff accents
103.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
104.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
105.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
106.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
107.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
108.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
109.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
110.ds ae a\h'-(\w'a'u*4/10)'e
111.ds Ae A\h'-(\w'A'u*4/10)'E
112. \" corrections for vroff
113.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
114.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
115. \" for low resolution devices (crt and lpr)
116.if \n(.H>23 .if \n(.V>19 \
117\{\
118. ds : e
119. ds 8 ss
120. ds o a
121. ds d- d\h'-1'\(ga
122. ds D- D\h'-1'\(hy
123. ds th \o'bp'
124. ds Th \o'LP'
125. ds ae ae
126. ds Ae AE
127.\}
128.rm #[ #] #H #V #F C
129.\" ========================================================================
130.\"
131.IX Title "BALANCED 1"
132.TH BALANCED 1 "2000-08-20" "perl v5.8.0" "User Contributed Perl Documentation"
133.SH "NAME"
134Text::Balanced \- Extract delimited text sequences from strings.
135.SH "SYNOPSIS"
136.IX Header "SYNOPSIS"
137.Vb 8
138\& use Text::Balanced qw (
139\& extract_delimited
140\& extract_bracketed
141\& extract_quotelike
142\& extract_codeblock
143\& extract_variable
144\& extract_tagged
145\& extract_multiple
146.Ve
147.PP
148.Vb 3
149\& gen_delimited_pat
150\& gen_extract_tagged
151\& );
152.Ve
153.PP
154.Vb 2
155\& # Extract the initial substring of $text that is delimited by
156\& # two (unescaped) instances of the first character in $delim.
157.Ve
158.PP
159.Vb 1
160\& ($extracted, $remainder) = extract_delimited($text,$delim);
161.Ve
162.PP
163.Vb 3
164\& # Extract the initial substring of $text that is bracketed
165\& # with a delimiter(s) specified by $delim (where the string
166\& # in $delim contains one or more of '(){}[]<>').
167.Ve
168.PP
169.Vb 1
170\& ($extracted, $remainder) = extract_bracketed($text,$delim);
171.Ve
172.PP
173.Vb 2
174\& # Extract the initial substring of $text that is bounded by
175\& # an HTML/XML tag.
176.Ve
177.PP
178.Vb 1
179\& ($extracted, $remainder) = extract_tagged($text);
180.Ve
181.PP
182.Vb 2
183\& # Extract the initial substring of $text that is bounded by
184\& # a C<BEGIN>...C<END> pair. Don't allow nested C<BEGIN> tags
185.Ve
186.PP
187.Vb 2
188\& ($extracted, $remainder) =
189\& extract_tagged($text,"BEGIN","END",undef,{bad=>["BEGIN"]});
190.Ve
191.PP
192.Vb 2
193\& # Extract the initial substring of $text that represents a
194\& # Perl "quote or quote-like operation"
195.Ve
196.PP
197.Vb 1
198\& ($extracted, $remainder) = extract_quotelike($text);
199.Ve
200.PP
201.Vb 3
202\& # Extract the initial substring of $text that represents a block
203\& # of Perl code, bracketed by any of character(s) specified by $delim
204\& # (where the string $delim contains one or more of '(){}[]<>').
205.Ve
206.PP
207.Vb 1
208\& ($extracted, $remainder) = extract_codeblock($text,$delim);
209.Ve
210.PP
211.Vb 3
212\& # Extract the initial substrings of $text that would be extracted by
213\& # one or more sequential applications of the specified functions
214\& # or regular expressions
215.Ve
216.PP
217.Vb 7
218\& @extracted = extract_multiple($text,
219\& [ \e&extract_bracketed,
220\& \e&extract_quotelike,
221\& \e&some_other_extractor_sub,
222\& qr/[xyz]*/,
223\& 'literal',
224\& ]);
225.Ve
226.PP
227# Create a string representing an optimized pattern (a la Friedl)
228# that matches a substring delimited by any of the specified characters
229# (in this case: any type of quote or a slash)
230.PP
231.Vb 1
232\& $patstring = gen_delimited_pat(q{'"`/});
233.Ve
234.PP
235# Generate a reference to an anonymous sub that is just like extract_tagged
236# but pre-compiled and optimized for a specific pair of tags, and consequently
237# much faster (i.e. 3 times faster). It uses qr// for better performance on
238# repeated calls, so it only works under Perl 5.005 or later.
239.PP
240.Vb 1
241\& $extract_head = gen_extract_tagged('<HEAD>','</HEAD>');
242.Ve
243.PP
244.Vb 1
245\& ($extracted, $remainder) = $extract_head->($text);
246.Ve
247.SH "DESCRIPTION"
248.IX Header "DESCRIPTION"
249The various \f(CW\*(C`extract_...\*(C'\fR subroutines may be used to extract a
250delimited string (possibly after skipping a specified prefix string).
251The search for the string always begins at the current \f(CW\*(C`pos\*(C'\fR
252location of the string's variable (or at index zero, if no \f(CW\*(C`pos\*(C'\fR
253position is defined).
254.Sh "General behaviour in list contexts"
255.IX Subsection "General behaviour in list contexts"
256In a list context, all the subroutines return a list, the first three
257elements of which are always:
258.IP "[0]" 4
259.IX Item "[0]"
260The extracted string, including the specified delimiters.
261If the extraction fails an empty string is returned.
262.IP "[1]" 4
263.IX Item "[1]"
264The remainder of the input string (i.e. the characters after the
265extracted string). On failure, the entire string is returned.
266.IP "[2]" 4
267.IX Item "[2]"
268The skipped prefix (i.e. the characters before the extracted string).
269On failure, the empty string is returned.
270.PP
271Note that in a list context, the contents of the original input text (the first
272argument) are not modified in any way.
273.PP
274However, if the input text was passed in a variable, that variable's
275\&\f(CW\*(C`pos\*(C'\fR value is updated to point at the first character after the
276extracted text. That means that in a list context the various
277subroutines can be used much like regular expressions. For example:
278.PP
279.Vb 4
280\& while ( $next = (extract_quotelike($text))[0] )
281\& {
282\& # process next quote-like (in $next)
283\& }
284.Ve
285.Sh "General behaviour in scalar and void contexts"
286.IX Subsection "General behaviour in scalar and void contexts"
287In a scalar context, the extracted string is returned, having first been
288removed from the input text. Thus, the following code also processes
289each quote-like operation, but actually removes them from \f(CW$text:\fR
290.PP
291.Vb 4
292\& while ( $next = extract_quotelike($text) )
293\& {
294\& # process next quote-like (in $next)
295\& }
296.Ve
297.PP
298Note that if the input text is a read-only string (i.e. a literal),
299no attempt is made to remove the extracted text.
300.PP
301In a void context the behaviour of the extraction subroutines is
302exactly the same as in a scalar context, except (of course) that the
303extracted substring is not returned.
304.Sh "A note about prefixes"
305.IX Subsection "A note about prefixes"
306Prefix patterns are matched without any trailing modifiers (\f(CW\*(C`/gimsox\*(C'\fR etc.)
307This can bite you if you're expecting a prefix specification like
308\&'.*?(?=<H1>)' to skip everything up to the first <H1> tag. Such a prefix
309pattern will only succeed if the <H1> tag is on the current line, since
310\&. normally doesn't match newlines.
311.PP
312To overcome this limitation, you need to turn on /s matching within
313the prefix pattern, using the \f(CW\*(C`(?s)\*(C'\fR directive: '(?s).*?(?=<H1>)'
314.ie n .Sh """extract_delimited"""
315.el .Sh "\f(CWextract_delimited\fP"
316.IX Subsection "extract_delimited"
317The \f(CW\*(C`extract_delimited\*(C'\fR function formalizes the common idiom
318of extracting a single-character-delimited substring from the start of
319a string. For example, to extract a single-quote delimited string, the
320following code is typically used:
321.PP
322.Vb 2
323\& ($remainder = $text) =~ s/\eA('(\e\e.|[^'])*')//s;
324\& $extracted = $1;
325.Ve
326.PP
327but with \f(CW\*(C`extract_delimited\*(C'\fR it can be simplified to:
328.PP
329.Vb 1
330\& ($extracted,$remainder) = extract_delimited($text, "'");
331.Ve
332.PP
333\&\f(CW\*(C`extract_delimited\*(C'\fR takes up to four scalars (the input text, the
334delimiters, a prefix pattern to be skipped, and any escape characters)
335and extracts the initial substring of the text that
336is appropriately delimited. If the delimiter string has multiple
337characters, the first one encountered in the text is taken to delimit
338the substring.
339The third argument specifies a prefix pattern that is to be skipped
340(but must be present!) before the substring is extracted.
341The final argument specifies the escape character to be used for each
342delimiter.
343.PP
344All arguments are optional. If the escape characters are not specified,
345every delimiter is escaped with a backslash (\f(CW\*(C`\e\*(C'\fR).
346If the prefix is not specified, the
347pattern \f(CW'\es*'\fR \- optional whitespace \- is used. If the delimiter set
348is also not specified, the set \f(CW\*(C`/["'`]/\*(C'\fR is used. If the text to be processed
349is not specified either, \f(CW$_\fR is used.
350.PP
351In list context, \f(CW\*(C`extract_delimited\*(C'\fR returns a array of three
352elements, the extracted substring (\fIincluding the surrounding
353delimiters\fR), the remainder of the text, and the skipped prefix (if
354any). If a suitable delimited substring is not found, the first
355element of the array is the empty string, the second is the complete
356original text, and the prefix returned in the third element is an
357empty string.
358.PP
359In a scalar context, just the extracted substring is returned. In
360a void context, the extracted substring (and any prefix) are simply
361removed from the beginning of the first argument.
362.PP
363Examples:
364.PP
365.Vb 1
366\& # Remove a single-quoted substring from the very beginning of $text:
367.Ve
368.PP
369.Vb 1
370\& $substring = extract_delimited($text, "'", '');
371.Ve
372.PP
373.Vb 3
374\& # Remove a single-quoted Pascalish substring (i.e. one in which
375\& # doubling the quote character escapes it) from the very
376\& # beginning of $text:
377.Ve
378.PP
379.Vb 1
380\& $substring = extract_delimited($text, "'", '', "'");
381.Ve
382.PP
383.Vb 3
384\& # Extract a single- or double- quoted substring from the
385\& # beginning of $text, optionally after some whitespace
386\& # (note the list context to protect $text from modification):
387.Ve
388.PP
389.Vb 1
390\& ($substring) = extract_delimited $text, q{"'};
391.Ve
392.PP
393.Vb 1
394\& # Delete the substring delimited by the first '/' in $text:
395.Ve
396.PP
397.Vb 1
398\& $text = join '', (extract_delimited($text,'/','[^/]*')[2,1];
399.Ve
400.PP
401Note that this last example is \fInot\fR the same as deleting the first
402quote-like pattern. For instance, if \f(CW$text\fR contained the string:
403.PP
404.Vb 1
405\& "if ('./cmd' =~ m/$UNIXCMD/s) { $cmd = $1; }"
406.Ve
407.PP
408then after the deletion it would contain:
409.PP
410.Vb 1
411\& "if ('.$UNIXCMD/s) { $cmd = $1; }"
412.Ve
413.PP
414not:
415.PP
416.Vb 1
417\& "if ('./cmd' =~ ms) { $cmd = $1; }"
418.Ve
419.PP
420See \*(L"extract_quotelike\*(R" for a (partial) solution to this problem.
421.ie n .Sh """extract_bracketed"""
422.el .Sh "\f(CWextract_bracketed\fP"
423.IX Subsection "extract_bracketed"
424Like \f(CW"extract_delimited"\fR, the \f(CW\*(C`extract_bracketed\*(C'\fR function takes
425up to three optional scalar arguments: a string to extract from, a delimiter
426specifier, and a prefix pattern. As before, a missing prefix defaults to
427optional whitespace and a missing text defaults to \f(CW$_\fR. However, a missing
428delimiter specifier defaults to \f(CW'{}()[]<>'\fR (see below).
429.PP
430\&\f(CW\*(C`extract_bracketed\*(C'\fR extracts a balanced-bracket-delimited
431substring (using any one (or more) of the user-specified delimiter
432brackets: '(..)', '{..}', '[..]', or '<..>'). Optionally it will also
433respect quoted unbalanced brackets (see below).
434.PP
435A \*(L"delimiter bracket\*(R" is a bracket in list of delimiters passed as
436\&\f(CW\*(C`extract_bracketed\*(C'\fR's second argument. Delimiter brackets are
437specified by giving either the left or right (or both!) versions
438of the required bracket(s). Note that the order in which
439two or more delimiter brackets are specified is not significant.
440.PP
441A \*(L"balanced\-bracket\-delimited substring\*(R" is a substring bounded by
442matched brackets, such that any other (left or right) delimiter
443bracket \fIwithin\fR the substring is also matched by an opposite
444(right or left) delimiter bracket \fIat the same level of nesting\fR. Any
445type of bracket not in the delimiter list is treated as an ordinary
446character.
447.PP
448In other words, each type of bracket specified as a delimiter must be
449balanced and correctly nested within the substring, and any other kind of
450(\*(L"non\-delimiter\*(R") bracket in the substring is ignored.
451.PP
452For example, given the string:
453.PP
454.Vb 1
455\& $text = "{ an '[irregularly :-(] {} parenthesized >:-)' string }";
456.Ve
457.PP
458then a call to \f(CW\*(C`extract_bracketed\*(C'\fR in a list context:
459.PP
460.Vb 1
461\& @result = extract_bracketed( $text, '{}' );
462.Ve
463.PP
464would return:
465.PP
466.Vb 1
467\& ( "{ an '[irregularly :-(] {} parenthesized >:-)' string }" , "" , "" )
468.Ve
469.PP
470since both sets of \f(CW'{..}'\fR brackets are properly nested and evenly balanced.
471(In a scalar context just the first element of the array would be returned. In
472a void context, \f(CW$text\fR would be replaced by an empty string.)
473.PP
474Likewise the call in:
475.PP
476.Vb 1
477\& @result = extract_bracketed( $text, '{[' );
478.Ve
479.PP
480would return the same result, since all sets of both types of specified
481delimiter brackets are correctly nested and balanced.
482.PP
483However, the call in:
484.PP
485.Vb 1
486\& @result = extract_bracketed( $text, '{([<' );
487.Ve
488.PP
489would fail, returning:
490.PP
491.Vb 1
492\& ( undef , "{ an '[irregularly :-(] {} parenthesized >:-)' string }" );
493.Ve
494.PP
495because the embedded pairs of \f(CW'(..)'\fRs and \f(CW'[..]'\fRs are \*(L"cross\-nested\*(R" and
496the embedded \f(CW'>'\fR is unbalanced. (In a scalar context, this call would
497return an empty string. In a void context, \f(CW$text\fR would be unchanged.)
498.PP
499Note that the embedded single-quotes in the string don't help in this
500case, since they have not been specified as acceptable delimiters and are
501therefore treated as non-delimiter characters (and ignored).
502.PP
503However, if a particular species of quote character is included in the
504delimiter specification, then that type of quote will be correctly handled.
505for example, if \f(CW$text\fR is:
506.PP
507.Vb 1
508\& $text = '<A HREF=">>>>">link</A>';
509.Ve
510.PP
511then
512.PP
513.Vb 1
514\& @result = extract_bracketed( $text, '<">' );
515.Ve
516.PP
517returns:
518.PP
519.Vb 1
520\& ( '<A HREF=">>>>">', 'link</A>', "" )
521.Ve
522.PP
523as expected. Without the specification of \f(CW\*(C`"\*(C'\fR as an embedded quoter:
524.PP
525.Vb 1
526\& @result = extract_bracketed( $text, '<>' );
527.Ve
528.PP
529the result would be:
530.PP
531.Vb 1
532\& ( '<A HREF=">', '>>>">link</A>', "" )
533.Ve
534.PP
535In addition to the quote delimiters \f(CW\*(C`'\*(C'\fR, \f(CW\*(C`"\*(C'\fR, and \f(CW\*(C``\*(C'\fR, full Perl quote-like
536quoting (i.e. q{string}, qq{string}, etc) can be specified by including the
537letter 'q' as a delimiter. Hence:
538.PP
539.Vb 1
540\& @result = extract_bracketed( $text, '<q>' );
541.Ve
542.PP
543would correctly match something like this:
544.PP
545.Vb 1
546\& $text = '<leftop: conj /and/ conj>';
547.Ve
548.PP
549See also: \f(CW"extract_quotelike"\fR and \f(CW"extract_codeblock"\fR.
550.ie n .Sh """extract_tagged"""
551.el .Sh "\f(CWextract_tagged\fP"
552.IX Subsection "extract_tagged"
553\&\f(CW\*(C`extract_tagged\*(C'\fR extracts and segments text between (balanced)
554specified tags.
555.PP
556The subroutine takes up to five optional arguments:
557.IP "1." 4
558A string to be processed (\f(CW$_\fR if the string is omitted or \f(CW\*(C`undef\*(C'\fR)
559.IP "2." 4
560A string specifying a pattern to be matched as the opening tag.
561If the pattern string is omitted (or \f(CW\*(C`undef\*(C'\fR) then a pattern
562that matches any standard \s-1HTML/XML\s0 tag is used.
563.IP "3." 4
564A string specifying a pattern to be matched at the closing tag.
565If the pattern string is omitted (or \f(CW\*(C`undef\*(C'\fR) then the closing
566tag is constructed by inserting a \f(CW\*(C`/\*(C'\fR after any leading bracket
567characters in the actual opening tag that was matched (\fInot\fR the pattern
568that matched the tag). For example, if the opening tag pattern
569is specified as \f(CW'{{\ew+}}'\fR and actually matched the opening tag
570\&\f(CW"{{DATA}}"\fR, then the constructed closing tag would be \f(CW"{{/DATA}}"\fR.
571.IP "4." 4
572A string specifying a pattern to be matched as a prefix (which is to be
573skipped). If omitted, optional whitespace is skipped.
574.IP "5." 4
575A hash reference containing various parsing options (see below)
576.PP
577The various options that can be specified are:
578.ie n .IP """reject => $listref""" 4
579.el .IP "\f(CWreject => $listref\fR" 4
580.IX Item "reject => $listref"
581The list reference contains one or more strings specifying patterns
582that must \fInot\fR appear within the tagged text.
583.Sp
584For example, to extract
585an \s-1HTML\s0 link (which should not contain nested links) use:
586.Sp
587.Vb 1
588\& extract_tagged($text, '<A>', '</A>', undef, {reject => ['<A>']} );
589.Ve
590.ie n .IP """ignore => $listref""" 4
591.el .IP "\f(CWignore => $listref\fR" 4
592.IX Item "ignore => $listref"
593The list reference contains one or more strings specifying patterns
594that are \fInot\fR be be treated as nested tags within the tagged text
595(even if they would match the start tag pattern).
596.Sp
597For example, to extract an arbitrary \s-1XML\s0 tag, but ignore \*(L"empty\*(R" elements:
598.Sp
599.Vb 1
600\& extract_tagged($text, undef, undef, undef, {ignore => ['<[^>]*/>']} );
601.Ve
602.Sp
603(also see \*(L"gen_delimited_pat\*(R" below).
604.ie n .IP """fail => $str""" 4
605.el .IP "\f(CWfail => $str\fR" 4
606.IX Item "fail => $str"
607The \f(CW\*(C`fail\*(C'\fR option indicates the action to be taken if a matching end
608tag is not encountered (i.e. before the end of the string or some
609\&\f(CW\*(C`reject\*(C'\fR pattern matches). By default, a failure to match a closing
610tag causes \f(CW\*(C`extract_tagged\*(C'\fR to immediately fail.
611.Sp
612However, if the string value associated with <reject> is \*(L"\s-1MAX\s0\*(R", then
613\&\f(CW\*(C`extract_tagged\*(C'\fR returns the complete text up to the point of failure.
614If the string is \*(L"\s-1PARA\s0\*(R", \f(CW\*(C`extract_tagged\*(C'\fR returns only the first paragraph
615after the tag (up to the first line that is either empty or contains
616only whitespace characters).
617If the string is "", the the default behaviour (i.e. failure) is reinstated.
618.Sp
619For example, suppose the start tag \*(L"/para\*(R" introduces a paragraph, which then
620continues until the next \*(L"/endpara\*(R" tag or until another \*(L"/para\*(R" tag is
621encountered:
622.Sp
623.Vb 1
624\& $text = "/para line 1\en\enline 3\en/para line 4";
625.Ve
626.Sp
627.Vb 2
628\& extract_tagged($text, '/para', '/endpara', undef,
629\& {reject => '/para', fail => MAX );
630.Ve
631.Sp
632.Vb 1
633\& # EXTRACTED: "/para line 1\en\enline 3\en"
634.Ve
635.Sp
636Suppose instead, that if no matching \*(L"/endpara\*(R" tag is found, the \*(L"/para\*(R"
637tag refers only to the immediately following paragraph:
638.Sp
639.Vb 1
640\& $text = "/para line 1\en\enline 3\en/para line 4";
641.Ve
642.Sp
643.Vb 2
644\& extract_tagged($text, '/para', '/endpara', undef,
645\& {reject => '/para', fail => MAX );
646.Ve
647.Sp
648.Vb 1
649\& # EXTRACTED: "/para line 1\en"
650.Ve
651.Sp
652Note that the specified \f(CW\*(C`fail\*(C'\fR behaviour applies to nested tags as well.
653.PP
654On success in a list context, an array of 6 elements is returned. The elements are:
655.IP "[0]" 4
656.IX Item "[0]"
657the extracted tagged substring (including the outermost tags),
658.IP "[1]" 4
659.IX Item "[1]"
660the remainder of the input text,
661.IP "[2]" 4
662.IX Item "[2]"
663the prefix substring (if any),
664.IP "[3]" 4
665.IX Item "[3]"
666the opening tag
667.IP "[4]" 4
668.IX Item "[4]"
669the text between the opening and closing tags
670.IP "[5]" 4
671.IX Item "[5]"
672the closing tag (or "" if no closing tag was found)
673.PP
674On failure, all of these values (except the remaining text) are \f(CW\*(C`undef\*(C'\fR.
675.PP
676In a scalar context, \f(CW\*(C`extract_tagged\*(C'\fR returns just the complete
677substring that matched a tagged text (including the start and end
678tags). \f(CW\*(C`undef\*(C'\fR is returned on failure. In addition, the original input
679text has the returned substring (and any prefix) removed from it.
680.PP
681In a void context, the input text just has the matched substring (and
682any specified prefix) removed.
683.ie n .Sh """gen_extract_tagged"""
684.el .Sh "\f(CWgen_extract_tagged\fP"
685.IX Subsection "gen_extract_tagged"
686(Note: This subroutine is only available under Perl5.005)
687.PP
688\&\f(CW\*(C`gen_extract_tagged\*(C'\fR generates a new anonymous subroutine which
689extracts text between (balanced) specified tags. In other words,
690it generates a function identical in function to \f(CW\*(C`extract_tagged\*(C'\fR.
691.PP
692The difference between \f(CW\*(C`extract_tagged\*(C'\fR and the anonymous
693subroutines generated by
694\&\f(CW\*(C`gen_extract_tagged\*(C'\fR, is that those generated subroutines:
695.IP "\(bu" 4
696do not have to reparse tag specification or parsing options every time
697they are called (whereas \f(CW\*(C`extract_tagged\*(C'\fR has to effectively rebuild
698its tag parser on every call);
699.IP "\(bu" 4
700make use of the new qr// construct to pre-compile the regexes they use
701(whereas \f(CW\*(C`extract_tagged\*(C'\fR uses standard string variable interpolation
702to create tag-matching patterns).
703.PP
704The subroutine takes up to four optional arguments (the same set as
705\&\f(CW\*(C`extract_tagged\*(C'\fR except for the string to be processed). It returns
706a reference to a subroutine which in turn takes a single argument (the text to
707be extracted from).
708.PP
709In other words, the implementation of \f(CW\*(C`extract_tagged\*(C'\fR is exactly
710equivalent to:
711.PP
712.Vb 6
713\& sub extract_tagged
714\& {
715\& my $text = shift;
716\& $extractor = gen_extract_tagged(@_);
717\& return $extractor->($text);
718\& }
719.Ve
720.PP
721(although \f(CW\*(C`extract_tagged\*(C'\fR is not currently implemented that way, in order
722to preserve pre\-5.005 compatibility).
723.PP
724Using \f(CW\*(C`gen_extract_tagged\*(C'\fR to create extraction functions for specific tags
725is a good idea if those functions are going to be called more than once, since
726their performance is typically twice as good as the more general-purpose
727\&\f(CW\*(C`extract_tagged\*(C'\fR.
728.ie n .Sh """extract_quotelike"""
729.el .Sh "\f(CWextract_quotelike\fP"
730.IX Subsection "extract_quotelike"
731\&\f(CW\*(C`extract_quotelike\*(C'\fR attempts to recognize, extract, and segment any
732one of the various Perl quotes and quotelike operators (see
733\&\fIperlop\fR\|(3)) Nested backslashed delimiters, embedded balanced bracket
734delimiters (for the quotelike operators), and trailing modifiers are
735all caught. For example, in:
736.PP
737.Vb 1
738\& extract_quotelike 'q # an octothorpe: \e# (not the end of the q!) #'
739.Ve
740.PP
741.Vb 1
742\& extract_quotelike ' "You said, \e"Use sed\e"." '
743.Ve
744.PP
745.Vb 1
746\& extract_quotelike ' s{([A-Z]{1,8}\e.[A-Z]{3})} /\eL$1\eE/; '
747.Ve
748.PP
749.Vb 1
750\& extract_quotelike ' tr/\e\e\e/\e\e\e\e/\e\e\e//ds; '
751.Ve
752.PP
753the full Perl quotelike operations are all extracted correctly.
754.PP
755Note too that, when using the /x modifier on a regex, any comment
756containing the current pattern delimiter will cause the regex to be
757immediately terminated. In other words:
758.PP
759.Vb 5
760\& 'm /
761\& (?i) # CASE INSENSITIVE
762\& [a-z_] # LEADING ALPHABETIC/UNDERSCORE
763\& [a-z0-9]* # FOLLOWED BY ANY NUMBER OF ALPHANUMERICS
764\& /x'
765.Ve
766.PP
767will be extracted as if it were:
768.PP
769.Vb 3
770\& 'm /
771\& (?i) # CASE INSENSITIVE
772\& [a-z_] # LEADING ALPHABETIC/'
773.Ve
774.PP
775This behaviour is identical to that of the Perl 5.004 interpreter.
776.PP
777\&\f(CW\*(C`extract_quotelike\*(C'\fR takes two arguments: the text to be processed and
778a prefix to be matched at the very beginning of the text. If no prefix
779is specified, optional whitespace is the default. If no text is given,
780\&\f(CW$_\fR is used.
781.PP
782In a list context, an array of 11 elements is returned. The elements are:
783.IP "[0]" 4
784.IX Item "[0]"
785the extracted quotelike substring (including trailing modifiers),
786.IP "[1]" 4
787.IX Item "[1]"
788the remainder of the input text,
789.IP "[2]" 4
790.IX Item "[2]"
791the prefix substring (if any),
792.IP "[3]" 4
793.IX Item "[3]"
794the name of the quotelike operator (if any),
795.IP "[4]" 4
796.IX Item "[4]"
797the left delimiter of the first block of the operation,
798.IP "[5]" 4
799.IX Item "[5]"
800the text of the first block of the operation
801(that is, the contents of
802a quote, the regex of a match or substitution or the target list of a
803translation),
804.IP "[6]" 4
805.IX Item "[6]"
806the right delimiter of the first block of the operation,
807.IP "[7]" 4
808.IX Item "[7]"
809the left delimiter of the second block of the operation
810(that is, if it is a \f(CW\*(C`s\*(C'\fR, \f(CW\*(C`tr\*(C'\fR, or \f(CW\*(C`y\*(C'\fR),
811.IP "[8]" 4
812.IX Item "[8]"
813the text of the second block of the operation
814(that is, the replacement of a substitution or the translation list
815of a translation),
816.IP "[9]" 4
817.IX Item "[9]"
818the right delimiter of the second block of the operation (if any),
819.IP "[10]" 4
820.IX Item "[10]"
821the trailing modifiers on the operation (if any).
822.PP
823For each of the fields marked \*(L"(if any)\*(R" the default value on success is
824an empty string.
825On failure, all of these values (except the remaining text) are \f(CW\*(C`undef\*(C'\fR.
826.PP
827In a scalar context, \f(CW\*(C`extract_quotelike\*(C'\fR returns just the complete substring
828that matched a quotelike operation (or \f(CW\*(C`undef\*(C'\fR on failure). In a scalar or
829void context, the input text has the same substring (and any specified
830prefix) removed.
831.PP
832Examples:
833.PP
834.Vb 1
835\& # Remove the first quotelike literal that appears in text
836.Ve
837.PP
838.Vb 1
839\& $quotelike = extract_quotelike($text,'.*?');
840.Ve
841.PP
842.Vb 2
843\& # Replace one or more leading whitespace-separated quotelike
844\& # literals in $_ with "<QLL>"
845.Ve
846.PP
847.Vb 1
848\& do { $_ = join '<QLL>', (extract_quotelike)[2,1] } until $@;
849.Ve
850.PP
851.Vb 1
852\& # Isolate the search pattern in a quotelike operation from $text
853.Ve
854.PP
855.Vb 9
856\& ($op,$pat) = (extract_quotelike $text)[3,5];
857\& if ($op =~ /[ms]/)
858\& {
859\& print "search pattern: $pat\en";
860\& }
861\& else
862\& {
863\& print "$op is not a pattern matching operation\en";
864\& }
865.Ve
866.ie n .Sh """extract_codeblock"""
867.el .Sh "\f(CWextract_codeblock\fP"
868.IX Subsection "extract_codeblock"
869\&\f(CW\*(C`extract_codeblock\*(C'\fR attempts to recognize and extract a balanced
870bracket delimited substring that may contain unbalanced brackets
871inside Perl quotes or quotelike operations. That is, \f(CW\*(C`extract_codeblock\*(C'\fR
872is like a combination of \f(CW"extract_bracketed"\fR and
873\&\f(CW"extract_quotelike"\fR.
874.PP
875\&\f(CW\*(C`extract_codeblock\*(C'\fR takes the same initial three parameters as \f(CW\*(C`extract_bracketed\*(C'\fR:
876a text to process, a set of delimiter brackets to look for, and a prefix to
877match first. It also takes an optional fourth parameter, which allows the
878outermost delimiter brackets to be specified separately (see below).
879.PP
880Omitting the first argument (input text) means process \f(CW$_\fR instead.
881Omitting the second argument (delimiter brackets) indicates that only \f(CW'{'\fR is to be used.
882Omitting the third argument (prefix argument) implies optional whitespace at the start.
883Omitting the fourth argument (outermost delimiter brackets) indicates that the
884value of the second argument is to be used for the outermost delimiters.
885.PP
886Once the prefix an dthe outermost opening delimiter bracket have been
887recognized, code blocks are extracted by stepping through the input text and
888trying the following alternatives in sequence:
889.IP "1." 4
890Try and match a closing delimiter bracket. If the bracket was the same
891species as the last opening bracket, return the substring to that
892point. If the bracket was mismatched, return an error.
893.IP "2." 4
894Try to match a quote or quotelike operator. If found, call
895\&\f(CW\*(C`extract_quotelike\*(C'\fR to eat it. If \f(CW\*(C`extract_quotelike\*(C'\fR fails, return
896the error it returned. Otherwise go back to step 1.
897.IP "3." 4
898Try to match an opening delimiter bracket. If found, call
899\&\f(CW\*(C`extract_codeblock\*(C'\fR recursively to eat the embedded block. If the
900recursive call fails, return an error. Otherwise, go back to step 1.
901.IP "4." 4
902Unconditionally match a bareword or any other single character, and
903then go back to step 1.
904.PP
905Examples:
906.PP
907.Vb 1
908\& # Find a while loop in the text
909.Ve
910.PP
911.Vb 4
912\& if ($text =~ s/.*?while\es*\e{/{/)
913\& {
914\& $loop = "while " . extract_codeblock($text);
915\& }
916.Ve
917.PP
918.Vb 2
919\& # Remove the first round-bracketed list (which may include
920\& # round- or curly-bracketed code blocks or quotelike operators)
921.Ve
922.PP
923.Vb 1
924\& extract_codeblock $text, "(){}", '[^(]*';
925.Ve
926.PP
927The ability to specify a different outermost delimiter bracket is useful
928in some circumstances. For example, in the Parse::RecDescent module,
929parser actions which are to be performed only on a successful parse
930are specified using a \f(CW\*(C`<defer:...>\*(C'\fR directive. For example:
931.PP
932.Vb 2
933\& sentence: subject verb object
934\& <defer: {$::theVerb = $item{verb}} >
935.Ve
936.PP
937Parse::RecDescent uses \f(CW\*(C`extract_codeblock($text, '{}<>')\*(C'\fR to extract the code
938within the \f(CW\*(C`<defer:...>\*(C'\fR directive, but there's a problem.
939.PP
940A deferred action like this:
941.PP
942.Vb 1
943\& <defer: {if ($count>10) {$count--}} >
944.Ve
945.PP
946will be incorrectly parsed as:
947.PP
948.Vb 1
949\& <defer: {if ($count>
950.Ve
951.PP
952because the \*(L"less than\*(R" operator is interpreted as a closing delimiter.
953.PP
954But, by extracting the directive using
955\&\f(CW\*(C`extract_codeblock($text,\ '{}',\ undef,\ '<>')\*(C'\fR
956the '>' character is only treated as a delimited at the outermost
957level of the code block, so the directive is parsed correctly.
958.ie n .Sh """extract_multiple"""
959.el .Sh "\f(CWextract_multiple\fP"
960.IX Subsection "extract_multiple"
961The \f(CW\*(C`extract_multiple\*(C'\fR subroutine takes a string to be processed and a
962list of extractors (subroutines or regular expressions) to apply to that string.
963.PP
964In an array context \f(CW\*(C`extract_multiple\*(C'\fR returns an array of substrings
965of the original string, as extracted by the specified extractors.
966In a scalar context, \f(CW\*(C`extract_multiple\*(C'\fR returns the first
967substring successfully extracted from the original string. In both
968scalar and void contexts the original string has the first successfully
969extracted substring removed from it. In all contexts
970\&\f(CW\*(C`extract_multiple\*(C'\fR starts at the current \f(CW\*(C`pos\*(C'\fR of the string, and
971sets that \f(CW\*(C`pos\*(C'\fR appropriately after it matches.
972.PP
973Hence, the aim of of a call to \f(CW\*(C`extract_multiple\*(C'\fR in a list context
974is to split the processed string into as many non-overlapping fields as
975possible, by repeatedly applying each of the specified extractors
976to the remainder of the string. Thus \f(CW\*(C`extract_multiple\*(C'\fR is
977a generalized form of Perl's \f(CW\*(C`split\*(C'\fR subroutine.
978.PP
979The subroutine takes up to four optional arguments:
980.IP "1." 4
981A string to be processed (\f(CW$_\fR if the string is omitted or \f(CW\*(C`undef\*(C'\fR)
982.IP "2." 4
983A reference to a list of subroutine references and/or qr// objects and/or
984literal strings and/or hash references, specifying the extractors
985to be used to split the string. If this argument is omitted (or
986\&\f(CW\*(C`undef\*(C'\fR) the list:
987.Sp
988.Vb 5
989\& [
990\& sub { extract_variable($_[0], '') },
991\& sub { extract_quotelike($_[0],'') },
992\& sub { extract_codeblock($_[0],'{}','') },
993\& ]
994.Ve
995.Sp
996is used.
997.IP "3." 4
998An number specifying the maximum number of fields to return. If this
999argument is omitted (or \f(CW\*(C`undef\*(C'\fR), split continues as long as possible.
1000.Sp
1001If the third argument is \fIN\fR, then extraction continues until \fIN\fR fields
1002have been successfully extracted, or until the string has been completely
1003processed.
1004.Sp
1005Note that in scalar and void contexts the value of this argument is
1006automatically reset to 1 (under \f(CW\*(C`\-w\*(C'\fR, a warning is issued if the argument
1007has to be reset).
1008.IP "4." 4
1009A value indicating whether unmatched substrings (see below) within the
1010text should be skipped or returned as fields. If the value is true,
1011such substrings are skipped. Otherwise, they are returned.
1012.PP
1013The extraction process works by applying each extractor in
1014sequence to the text string. If the extractor is a subroutine it
1015is called in a list
1016context and is expected to return a list of a single element, namely
1017the extracted text.
1018Note that the value returned by an extractor subroutine need not bear any
1019relationship to the corresponding substring of the original text (see
1020examples below).
1021.PP
1022If the extractor is a precompiled regular expression or a string,
1023it is matched against the text in a scalar context with a leading
1024\&'\eG' and the gc modifiers enabled. The extracted value is either
1025\&\f(CW$1\fR if that variable is defined after the match, or else the
1026complete match (i.e. $&).
1027.PP
1028If the extractor is a hash reference, it must contain exactly one element.
1029The value of that element is one of the
1030above extractor types (subroutine reference, regular expression, or string).
1031The key of that element is the name of a class into which the successful
1032return value of the extractor will be blessed.
1033.PP
1034If an extractor returns a defined value, that value is immediately
1035treated as the next extracted field and pushed onto the list of fields.
1036If the extractor was specified in a hash reference, the field is also
1037blessed into the appropriate class,
1038.PP
1039If the extractor fails to match (in the case of a regex extractor), or returns an empty list or an undefined value (in the case of a subroutine extractor), it is
1040assumed to have failed to extract.
1041If none of the extractor subroutines succeeds, then one
1042character is extracted from the start of the text and the extraction
1043subroutines reapplied. Characters which are thus removed are accumulated and
1044eventually become the next field (unless the fourth argument is true, in which
1045case they are disgarded).
1046.PP
1047For example, the following extracts substrings that are valid Perl variables:
1048.PP
1049.Vb 3
1050\& @fields = extract_multiple($text,
1051\& [ sub { extract_variable($_[0]) } ],
1052\& undef, 1);
1053.Ve
1054.PP
1055This example separates a text into fields which are quote delimited,
1056curly bracketed, and anything else. The delimited and bracketed
1057parts are also blessed to identify them (the \*(L"anything else\*(R" is unblessed):
1058.PP
1059.Vb 5
1060\& @fields = extract_multiple($text,
1061\& [
1062\& { Delim => sub { extract_delimited($_[0],q{'"}) } },
1063\& { Brack => sub { extract_bracketed($_[0],'{}') } },
1064\& ]);
1065.Ve
1066.PP
1067This call extracts the next single substring that is a valid Perl quotelike
1068operator (and removes it from \f(CW$text\fR):
1069.PP
1070.Vb 4
1071\& $quotelike = extract_multiple($text,
1072\& [
1073\& sub { extract_quotelike($_[0]) },
1074\& ], undef, 1);
1075.Ve
1076.PP
1077Finally, here is yet another way to do comma-separated value parsing:
1078.PP
1079.Vb 6
1080\& @fields = extract_multiple($csv_text,
1081\& [
1082\& sub { extract_delimited($_[0],q{'"}) },
1083\& qr/([^,]+)(.*)/,
1084\& ],
1085\& undef,1);
1086.Ve
1087.PP
1088The list in the second argument means:
1089\&\fI\*(L"Try and extract a ' or \*(R" delimited string, otherwise extract anything up to a comma..."\fR.
1090The undef third argument means:
1091\&\fI\*(L"...as many times as possible...\*(R"\fR,
1092and the true value in the fourth argument means
1093\&\fI\*(L"...discarding anything else that appears (i.e. the commas)\*(R"\fR.
1094.PP
1095If you wanted the commas preserved as separate fields (i.e. like split
1096does if your split pattern has capturing parentheses), you would
1097just make the last parameter undefined (or remove it).
1098.ie n .Sh """gen_delimited_pat"""
1099.el .Sh "\f(CWgen_delimited_pat\fP"
1100.IX Subsection "gen_delimited_pat"
1101The \f(CW\*(C`gen_delimited_pat\*(C'\fR subroutine takes a single (string) argument and
1102builds a Friedl-style optimized regex that matches a string delimited
1103by any one of the characters in the single argument. For example:
1104.PP
1105.Vb 1
1106\& gen_delimited_pat(q{'"})
1107.Ve
1108.PP
1109returns the regex:
1110.PP
1111.Vb 1
1112\& (?:\e"(?:\e\e\e"|(?!\e").)*\e"|\e'(?:\e\e\e'|(?!\e').)*\e')
1113.Ve
1114.PP
1115Note that the specified delimiters are automatically quotemeta'd.
1116.PP
1117A typical use of \f(CW\*(C`gen_delimited_pat\*(C'\fR would be to build special purpose tags
1118for \f(CW\*(C`extract_tagged\*(C'\fR. For example, to properly ignore \*(L"empty\*(R" \s-1XML\s0 elements
1119(which might contain quoted strings):
1120.PP
1121.Vb 1
1122\& my $empty_tag = '<(' . gen_delimited_pat(q{'"}) . '|.)+/>';
1123.Ve
1124.PP
1125.Vb 1
1126\& extract_tagged($text, undef, undef, undef, {ignore => [$empty_tag]} );
1127.Ve
1128.PP
1129\&\f(CW\*(C`gen_delimited_pat\*(C'\fR may also be called with an optional second argument,
1130which specifies the \*(L"escape\*(R" character(s) to be used for each delimiter.
1131For example to match a Pascal-style string (where ' is the delimiter
1132and '' is a literal ' within the string):
1133.PP
1134.Vb 1
1135\& gen_delimited_pat(q{'},q{'});
1136.Ve
1137.PP
1138Different escape characters can be specified for different delimiters.
1139For example, to specify that '/' is the escape for single quotes
1140and '%' is the escape for double quotes:
1141.PP
1142.Vb 1
1143\& gen_delimited_pat(q{'"},q{/%});
1144.Ve
1145.PP
1146If more delimiters than escape chars are specified, the last escape char
1147is used for the remaining delimiters.
1148If no escape char is specified for a given specified delimiter, '\e' is used.
1149.PP
1150Note that
1151\&\f(CW\*(C`gen_delimited_pat\*(C'\fR was previously called
1152\&\f(CW\*(C`delimited_pat\*(C'\fR. That name may still be used, but is now deprecated.
1153.SH "DIAGNOSTICS"
1154.IX Header "DIAGNOSTICS"
1155In a list context, all the functions return \f(CW\*(C`(undef,$original_text)\*(C'\fR
1156on failure. In a scalar context, failure is indicated by returning \f(CW\*(C`undef\*(C'\fR
1157(in this case the input text is not modified in any way).
1158.PP
1159In addition, on failure in \fIany\fR context, one of the following explanatory
1160diagnostic messages is returned in the standard \f(CW$@\fR variable (on success the
1161\&\f(CW$@\fR variable is guaranteed to be \f(CW\*(C`undef\*(C'\fR):
1162.ie n .IP """Did not find a suitable bracket: ""%s""""" 4
1163.el .IP "\f(CWDid not find a suitable bracket: ``%s''\fR" 4
1164.IX Item "Did not find a suitable bracket: ""%s"""
1165The delimiter provided to \f(CW\*(C`extract_bracketed\*(C'\fR was not one of
1166\&\f(CW'()[]<>{}'\fR.
1167.ie n .IP """Did not find prefix: /%s/""" 4
1168.el .IP "\f(CWDid not find prefix: /%s/\fR" 4
1169.IX Item "Did not find prefix: /%s/"
1170A non-optional prefix was specified but wasn't found at the start of the text.
1171.ie n .IP """Did not find opening bracket after prefix: ""%s""""" 4
1172.el .IP "\f(CWDid not find opening bracket after prefix: ``%s''\fR" 4
1173.IX Item "Did not find opening bracket after prefix: ""%s"""
1174\&\f(CW\*(C`extract_bracketed\*(C'\fR or \f(CW\*(C`extract_codeblock\*(C'\fR was expecting a
1175particular kind of bracket at the start of the text, and didn't find it.
1176.ie n .IP """No quotelike operator found after prefix: ""%s""""" 4
1177.el .IP "\f(CWNo quotelike operator found after prefix: ``%s''\fR" 4
1178.IX Item "No quotelike operator found after prefix: ""%s"""
1179\&\f(CW\*(C`extract_quotelike\*(C'\fR didn't find one of the quotelike operators \f(CW\*(C`q\*(C'\fR,
1180\&\f(CW\*(C`qq\*(C'\fR, \f(CW\*(C`qw\*(C'\fR, \f(CW\*(C`qx\*(C'\fR, \f(CW\*(C`s\*(C'\fR, \f(CW\*(C`tr\*(C'\fR or \f(CW\*(C`y\*(C'\fR at the start of the substring
1181it was extracting.
1182.ie n .IP """Unmatched closing bracket: ""%c""""" 4
1183.el .IP "\f(CWUnmatched closing bracket: ``%c''\fR" 4
1184.IX Item "Unmatched closing bracket: ""%c"""
1185\&\f(CW\*(C`extract_bracketed\*(C'\fR, \f(CW\*(C`extract_quotelike\*(C'\fR or \f(CW\*(C`extract_codeblock\*(C'\fR encountered
1186a closing bracket where none was expected.
1187.ie n .IP """Unmatched opening bracket(s): ""%s""""" 4
1188.el .IP "\f(CWUnmatched opening bracket(s): ``%s''\fR" 4
1189.IX Item "Unmatched opening bracket(s): ""%s"""
1190\&\f(CW\*(C`extract_bracketed\*(C'\fR, \f(CW\*(C`extract_quotelike\*(C'\fR or \f(CW\*(C`extract_codeblock\*(C'\fR ran
1191out of characters in the text before closing one or more levels of nested
1192brackets.
1193.ie n .IP """Unmatched embedded quote (%s)""" 4
1194.el .IP "\f(CWUnmatched embedded quote (%s)\fR" 4
1195.IX Item "Unmatched embedded quote (%s)"
1196\&\f(CW\*(C`extract_bracketed\*(C'\fR attempted to match an embedded quoted substring, but
1197failed to find a closing quote to match it.
1198.ie n .IP """Did not find closing delimiter to match '%s'""" 4
1199.el .IP "\f(CWDid not find closing delimiter to match '%s'\fR" 4
1200.IX Item "Did not find closing delimiter to match '%s'"
1201\&\f(CW\*(C`extract_quotelike\*(C'\fR was unable to find a closing delimiter to match the
1202one that opened the quote-like operation.
1203.ie n .IP """Mismatched closing bracket: expected ""%c"" but found ""%s""""" 4
1204.el .IP "\f(CWMismatched closing bracket: expected ``%c'' but found ``%s''\fR" 4
1205.IX Item "Mismatched closing bracket: expected ""%c"" but found ""%s"""
1206\&\f(CW\*(C`extract_bracketed\*(C'\fR, \f(CW\*(C`extract_quotelike\*(C'\fR or \f(CW\*(C`extract_codeblock\*(C'\fR found
1207a valid bracket delimiter, but it was the wrong species. This usually
1208indicates a nesting error, but may indicate incorrect quoting or escaping.
1209.ie n .IP """No block delimiter found after quotelike ""%s""""" 4
1210.el .IP "\f(CWNo block delimiter found after quotelike ``%s''\fR" 4
1211.IX Item "No block delimiter found after quotelike ""%s"""
1212\&\f(CW\*(C`extract_quotelike\*(C'\fR or \f(CW\*(C`extract_codeblock\*(C'\fR found one of the
1213quotelike operators \f(CW\*(C`q\*(C'\fR, \f(CW\*(C`qq\*(C'\fR, \f(CW\*(C`qw\*(C'\fR, \f(CW\*(C`qx\*(C'\fR, \f(CW\*(C`s\*(C'\fR, \f(CW\*(C`tr\*(C'\fR or \f(CW\*(C`y\*(C'\fR
1214without a suitable block after it.
1215.ie n .IP """Did not find leading dereferencer""" 4
1216.el .IP "\f(CWDid not find leading dereferencer\fR" 4
1217.IX Item "Did not find leading dereferencer"
1218\&\f(CW\*(C`extract_variable\*(C'\fR was expecting one of '$', '@', or '%' at the start of
1219a variable, but didn't find any of them.
1220.ie n .IP """Bad identifier after dereferencer""" 4
1221.el .IP "\f(CWBad identifier after dereferencer\fR" 4
1222.IX Item "Bad identifier after dereferencer"
1223\&\f(CW\*(C`extract_variable\*(C'\fR found a '$', '@', or '%' indicating a variable, but that
1224character was not followed by a legal Perl identifier.
1225.ie n .IP """Did not find expected opening bracket at %s""" 4
1226.el .IP "\f(CWDid not find expected opening bracket at %s\fR" 4
1227.IX Item "Did not find expected opening bracket at %s"
1228\&\f(CW\*(C`extract_codeblock\*(C'\fR failed to find any of the outermost opening brackets
1229that were specified.
1230.ie n .IP """Improperly nested codeblock at %s""" 4
1231.el .IP "\f(CWImproperly nested codeblock at %s\fR" 4
1232.IX Item "Improperly nested codeblock at %s"
1233A nested code block was found that started with a delimiter that was specified
1234as being only to be used as an outermost bracket.
1235.ie n .IP """Missing second block for quotelike ""%s""""" 4
1236.el .IP "\f(CWMissing second block for quotelike ``%s''\fR" 4
1237.IX Item "Missing second block for quotelike ""%s"""
1238\&\f(CW\*(C`extract_codeblock\*(C'\fR or \f(CW\*(C`extract_quotelike\*(C'\fR found one of the
1239quotelike operators \f(CW\*(C`s\*(C'\fR, \f(CW\*(C`tr\*(C'\fR or \f(CW\*(C`y\*(C'\fR followed by only one block.
1240.ie n .IP """No match found for opening bracket""" 4
1241.el .IP "\f(CWNo match found for opening bracket\fR" 4
1242.IX Item "No match found for opening bracket"
1243\&\f(CW\*(C`extract_codeblock\*(C'\fR failed to find a closing bracket to match the outermost
1244opening bracket.
1245.ie n .IP """Did not find opening tag: /%s/""" 4
1246.el .IP "\f(CWDid not find opening tag: /%s/\fR" 4
1247.IX Item "Did not find opening tag: /%s/"
1248\&\f(CW\*(C`extract_tagged\*(C'\fR did not find a suitable opening tag (after any specified
1249prefix was removed).
1250.ie n .IP """Unable to construct closing tag to match: /%s/""" 4
1251.el .IP "\f(CWUnable to construct closing tag to match: /%s/\fR" 4
1252.IX Item "Unable to construct closing tag to match: /%s/"
1253\&\f(CW\*(C`extract_tagged\*(C'\fR matched the specified opening tag and tried to
1254modify the matched text to produce a matching closing tag (because
1255none was specified). It failed to generate the closing tag, almost
1256certainly because the opening tag did not start with a
1257bracket of some kind.
1258.ie n .IP """Found invalid nested tag: %s""" 4
1259.el .IP "\f(CWFound invalid nested tag: %s\fR" 4
1260.IX Item "Found invalid nested tag: %s"
1261\&\f(CW\*(C`extract_tagged\*(C'\fR found a nested tag that appeared in the \*(L"reject\*(R" list
1262(and the failure mode was not \*(L"\s-1MAX\s0\*(R" or \*(L"\s-1PARA\s0\*(R").
1263.ie n .IP """Found unbalanced nested tag: %s""" 4
1264.el .IP "\f(CWFound unbalanced nested tag: %s\fR" 4
1265.IX Item "Found unbalanced nested tag: %s"
1266\&\f(CW\*(C`extract_tagged\*(C'\fR found a nested opening tag that was not matched by a
1267corresponding nested closing tag (and the failure mode was not \*(L"\s-1MAX\s0\*(R" or \*(L"\s-1PARA\s0\*(R").
1268.ie n .IP """Did not find closing tag""" 4
1269.el .IP "\f(CWDid not find closing tag\fR" 4
1270.IX Item "Did not find closing tag"
1271\&\f(CW\*(C`extract_tagged\*(C'\fR reached the end of the text without finding a closing tag
1272to match the original opening tag (and the failure mode was not
1273\&\*(L"\s-1MAX\s0\*(R" or \*(L"\s-1PARA\s0\*(R").
1274.SH "AUTHOR"
1275.IX Header "AUTHOR"
1276Damian Conway (damian@conway.org)
1277.SH "BUGS AND IRRITATIONS"
1278.IX Header "BUGS AND IRRITATIONS"
1279There are undoubtedly serious bugs lurking somewhere in this code, if
1280only because parts of it give the impression of understanding a great deal
1281more about Perl than they really do.
1282.PP
1283Bug reports and other feedback are most welcome.
1284.SH "COPYRIGHT"
1285.IX Header "COPYRIGHT"
1286.Vb 4
1287\& Copyright (c) 1997-2000, Damian Conway. All Rights Reserved.
1288\& This module is free software. It may be used, redistributed
1289\&and/or modified under the terms of the Perl Artistic License
1290\& (see http://www.perl.com/perl/misc/Artistic.html)
1291.Ve