Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "Text::Balanced 3" | |
132 | .TH Text::Balanced 3 "2001-09-21" "perl v5.8.8" "Perl Programmers Reference Guide" | |
133 | .SH "NAME" | |
134 | Text::Balanced \- Extract delimited text sequences from strings. | |
135 | .SH "SYNOPSIS" | |
136 | .IX Header "SYNOPSIS" | |
137 | .Vb 8 | |
138 | \& use Text::Balanced qw ( | |
139 | \& extract_delimited | |
140 | \& extract_bracketed | |
141 | \& extract_quotelike | |
142 | \& extract_codeblock | |
143 | \& extract_variable | |
144 | \& extract_tagged | |
145 | \& extract_multiple | |
146 | .Ve | |
147 | .PP | |
148 | .Vb 3 | |
149 | \& gen_delimited_pat | |
150 | \& gen_extract_tagged | |
151 | \& ); | |
152 | .Ve | |
153 | .PP | |
154 | .Vb 2 | |
155 | \& # Extract the initial substring of $text that is delimited by | |
156 | \& # two (unescaped) instances of the first character in $delim. | |
157 | .Ve | |
158 | .PP | |
159 | .Vb 1 | |
160 | \& ($extracted, $remainder) = extract_delimited($text,$delim); | |
161 | .Ve | |
162 | .PP | |
163 | .Vb 3 | |
164 | \& # Extract the initial substring of $text that is bracketed | |
165 | \& # with a delimiter(s) specified by $delim (where the string | |
166 | \& # in $delim contains one or more of '(){}[]<>'). | |
167 | .Ve | |
168 | .PP | |
169 | .Vb 1 | |
170 | \& ($extracted, $remainder) = extract_bracketed($text,$delim); | |
171 | .Ve | |
172 | .PP | |
173 | .Vb 2 | |
174 | \& # Extract the initial substring of $text that is bounded by | |
175 | \& # an XML tag. | |
176 | .Ve | |
177 | .PP | |
178 | .Vb 1 | |
179 | \& ($extracted, $remainder) = extract_tagged($text); | |
180 | .Ve | |
181 | .PP | |
182 | .Vb 2 | |
183 | \& # Extract the initial substring of $text that is bounded by | |
184 | \& # a C<BEGIN>...C<END> pair. Don't allow nested C<BEGIN> tags | |
185 | .Ve | |
186 | .PP | |
187 | .Vb 2 | |
188 | \& ($extracted, $remainder) = | |
189 | \& extract_tagged($text,"BEGIN","END",undef,{bad=>["BEGIN"]}); | |
190 | .Ve | |
191 | .PP | |
192 | .Vb 2 | |
193 | \& # Extract the initial substring of $text that represents a | |
194 | \& # Perl "quote or quote-like operation" | |
195 | .Ve | |
196 | .PP | |
197 | .Vb 1 | |
198 | \& ($extracted, $remainder) = extract_quotelike($text); | |
199 | .Ve | |
200 | .PP | |
201 | .Vb 3 | |
202 | \& # Extract the initial substring of $text that represents a block | |
203 | \& # of Perl code, bracketed by any of character(s) specified by $delim | |
204 | \& # (where the string $delim contains one or more of '(){}[]<>'). | |
205 | .Ve | |
206 | .PP | |
207 | .Vb 1 | |
208 | \& ($extracted, $remainder) = extract_codeblock($text,$delim); | |
209 | .Ve | |
210 | .PP | |
211 | .Vb 3 | |
212 | \& # Extract the initial substrings of $text that would be extracted by | |
213 | \& # one or more sequential applications of the specified functions | |
214 | \& # or regular expressions | |
215 | .Ve | |
216 | .PP | |
217 | .Vb 7 | |
218 | \& @extracted = extract_multiple($text, | |
219 | \& [ \e&extract_bracketed, | |
220 | \& \e&extract_quotelike, | |
221 | \& \e&some_other_extractor_sub, | |
222 | \& qr/[xyz]*/, | |
223 | \& 'literal', | |
224 | \& ]); | |
225 | .Ve | |
226 | .PP | |
227 | # Create a string representing an optimized pattern (a la Friedl) | |
228 | # that matches a substring delimited by any of the specified characters | |
229 | # (in this case: any type of quote or a slash) | |
230 | .PP | |
231 | .Vb 1 | |
232 | \& $patstring = gen_delimited_pat(q{'"`/}); | |
233 | .Ve | |
234 | .PP | |
235 | # Generate a reference to an anonymous sub that is just like extract_tagged | |
236 | # but pre-compiled and optimized for a specific pair of tags, and consequently | |
237 | # much faster (i.e. 3 times faster). It uses qr// for better performance on | |
238 | # repeated calls, so it only works under Perl 5.005 or later. | |
239 | .PP | |
240 | .Vb 1 | |
241 | \& $extract_head = gen_extract_tagged('<HEAD>','</HEAD>'); | |
242 | .Ve | |
243 | .PP | |
244 | .Vb 1 | |
245 | \& ($extracted, $remainder) = $extract_head->($text); | |
246 | .Ve | |
247 | .SH "DESCRIPTION" | |
248 | .IX Header "DESCRIPTION" | |
249 | The various \f(CW\*(C`extract_...\*(C'\fR subroutines may be used to | |
250 | extract a delimited substring, possibly after skipping a | |
251 | specified prefix string. By default, that prefix is | |
252 | optional whitespace (\f(CW\*(C`/\es*/\*(C'\fR), but you can change it to whatever | |
253 | you wish (see below). | |
254 | .PP | |
255 | The substring to be extracted must appear at the | |
256 | current \f(CW\*(C`pos\*(C'\fR location of the string's variable | |
257 | (or at index zero, if no \f(CW\*(C`pos\*(C'\fR position is defined). | |
258 | In other words, the \f(CW\*(C`extract_...\*(C'\fR subroutines \fIdon't\fR | |
259 | extract the first occurance of a substring anywhere | |
260 | in a string (like an unanchored regex would). Rather, | |
261 | they extract an occurance of the substring appearing | |
262 | immediately at the current matching position in the | |
263 | string (like a \f(CW\*(C`\eG\*(C'\fR\-anchored regex would). | |
264 | .Sh "General behaviour in list contexts" | |
265 | .IX Subsection "General behaviour in list contexts" | |
266 | In a list context, all the subroutines return a list, the first three | |
267 | elements of which are always: | |
268 | .IP "[0]" 4 | |
269 | .IX Item "[0]" | |
270 | The extracted string, including the specified delimiters. | |
271 | If the extraction fails an empty string is returned. | |
272 | .IP "[1]" 4 | |
273 | .IX Item "[1]" | |
274 | The remainder of the input string (i.e. the characters after the | |
275 | extracted string). On failure, the entire string is returned. | |
276 | .IP "[2]" 4 | |
277 | .IX Item "[2]" | |
278 | The skipped prefix (i.e. the characters before the extracted string). | |
279 | On failure, the empty string is returned. | |
280 | .PP | |
281 | Note that in a list context, the contents of the original input text (the first | |
282 | argument) are not modified in any way. | |
283 | .PP | |
284 | However, if the input text was passed in a variable, that variable's | |
285 | \&\f(CW\*(C`pos\*(C'\fR value is updated to point at the first character after the | |
286 | extracted text. That means that in a list context the various | |
287 | subroutines can be used much like regular expressions. For example: | |
288 | .PP | |
289 | .Vb 4 | |
290 | \& while ( $next = (extract_quotelike($text))[0] ) | |
291 | \& { | |
292 | \& # process next quote-like (in $next) | |
293 | \& } | |
294 | .Ve | |
295 | .Sh "General behaviour in scalar and void contexts" | |
296 | .IX Subsection "General behaviour in scalar and void contexts" | |
297 | In a scalar context, the extracted string is returned, having first been | |
298 | removed from the input text. Thus, the following code also processes | |
299 | each quote-like operation, but actually removes them from \f(CW$text:\fR | |
300 | .PP | |
301 | .Vb 4 | |
302 | \& while ( $next = extract_quotelike($text) ) | |
303 | \& { | |
304 | \& # process next quote-like (in $next) | |
305 | \& } | |
306 | .Ve | |
307 | .PP | |
308 | Note that if the input text is a read-only string (i.e. a literal), | |
309 | no attempt is made to remove the extracted text. | |
310 | .PP | |
311 | In a void context the behaviour of the extraction subroutines is | |
312 | exactly the same as in a scalar context, except (of course) that the | |
313 | extracted substring is not returned. | |
314 | .Sh "A note about prefixes" | |
315 | .IX Subsection "A note about prefixes" | |
316 | Prefix patterns are matched without any trailing modifiers (\f(CW\*(C`/gimsox\*(C'\fR etc.) | |
317 | This can bite you if you're expecting a prefix specification like | |
318 | \&'.*?(?=<H1>)' to skip everything up to the first <H1> tag. Such a prefix | |
319 | pattern will only succeed if the <H1> tag is on the current line, since | |
320 | \&. normally doesn't match newlines. | |
321 | .PP | |
322 | To overcome this limitation, you need to turn on /s matching within | |
323 | the prefix pattern, using the \f(CW\*(C`(?s)\*(C'\fR directive: '(?s).*?(?=<H1>)' | |
324 | .ie n .Sh """extract_delimited""" | |
325 | .el .Sh "\f(CWextract_delimited\fP" | |
326 | .IX Subsection "extract_delimited" | |
327 | The \f(CW\*(C`extract_delimited\*(C'\fR function formalizes the common idiom | |
328 | of extracting a single-character-delimited substring from the start of | |
329 | a string. For example, to extract a single-quote delimited string, the | |
330 | following code is typically used: | |
331 | .PP | |
332 | .Vb 2 | |
333 | \& ($remainder = $text) =~ s/\eA('(\e\e.|[^'])*')//s; | |
334 | \& $extracted = $1; | |
335 | .Ve | |
336 | .PP | |
337 | but with \f(CW\*(C`extract_delimited\*(C'\fR it can be simplified to: | |
338 | .PP | |
339 | .Vb 1 | |
340 | \& ($extracted,$remainder) = extract_delimited($text, "'"); | |
341 | .Ve | |
342 | .PP | |
343 | \&\f(CW\*(C`extract_delimited\*(C'\fR takes up to four scalars (the input text, the | |
344 | delimiters, a prefix pattern to be skipped, and any escape characters) | |
345 | and extracts the initial substring of the text that | |
346 | is appropriately delimited. If the delimiter string has multiple | |
347 | characters, the first one encountered in the text is taken to delimit | |
348 | the substring. | |
349 | The third argument specifies a prefix pattern that is to be skipped | |
350 | (but must be present!) before the substring is extracted. | |
351 | The final argument specifies the escape character to be used for each | |
352 | delimiter. | |
353 | .PP | |
354 | All arguments are optional. If the escape characters are not specified, | |
355 | every delimiter is escaped with a backslash (\f(CW\*(C`\e\*(C'\fR). | |
356 | If the prefix is not specified, the | |
357 | pattern \f(CW'\es*'\fR \- optional whitespace \- is used. If the delimiter set | |
358 | is also not specified, the set \f(CW\*(C`/["'`]/\*(C'\fR is used. If the text to be processed | |
359 | is not specified either, \f(CW$_\fR is used. | |
360 | .PP | |
361 | In list context, \f(CW\*(C`extract_delimited\*(C'\fR returns a array of three | |
362 | elements, the extracted substring (\fIincluding the surrounding | |
363 | delimiters\fR), the remainder of the text, and the skipped prefix (if | |
364 | any). If a suitable delimited substring is not found, the first | |
365 | element of the array is the empty string, the second is the complete | |
366 | original text, and the prefix returned in the third element is an | |
367 | empty string. | |
368 | .PP | |
369 | In a scalar context, just the extracted substring is returned. In | |
370 | a void context, the extracted substring (and any prefix) are simply | |
371 | removed from the beginning of the first argument. | |
372 | .PP | |
373 | Examples: | |
374 | .PP | |
375 | .Vb 1 | |
376 | \& # Remove a single-quoted substring from the very beginning of $text: | |
377 | .Ve | |
378 | .PP | |
379 | .Vb 1 | |
380 | \& $substring = extract_delimited($text, "'", ''); | |
381 | .Ve | |
382 | .PP | |
383 | .Vb 3 | |
384 | \& # Remove a single-quoted Pascalish substring (i.e. one in which | |
385 | \& # doubling the quote character escapes it) from the very | |
386 | \& # beginning of $text: | |
387 | .Ve | |
388 | .PP | |
389 | .Vb 1 | |
390 | \& $substring = extract_delimited($text, "'", '', "'"); | |
391 | .Ve | |
392 | .PP | |
393 | .Vb 3 | |
394 | \& # Extract a single- or double- quoted substring from the | |
395 | \& # beginning of $text, optionally after some whitespace | |
396 | \& # (note the list context to protect $text from modification): | |
397 | .Ve | |
398 | .PP | |
399 | .Vb 1 | |
400 | \& ($substring) = extract_delimited $text, q{"'}; | |
401 | .Ve | |
402 | .PP | |
403 | .Vb 1 | |
404 | \& # Delete the substring delimited by the first '/' in $text: | |
405 | .Ve | |
406 | .PP | |
407 | .Vb 1 | |
408 | \& $text = join '', (extract_delimited($text,'/','[^/]*')[2,1]; | |
409 | .Ve | |
410 | .PP | |
411 | Note that this last example is \fInot\fR the same as deleting the first | |
412 | quote-like pattern. For instance, if \f(CW$text\fR contained the string: | |
413 | .PP | |
414 | .Vb 1 | |
415 | \& "if ('./cmd' =~ m/$UNIXCMD/s) { $cmd = $1; }" | |
416 | .Ve | |
417 | .PP | |
418 | then after the deletion it would contain: | |
419 | .PP | |
420 | .Vb 1 | |
421 | \& "if ('.$UNIXCMD/s) { $cmd = $1; }" | |
422 | .Ve | |
423 | .PP | |
424 | not: | |
425 | .PP | |
426 | .Vb 1 | |
427 | \& "if ('./cmd' =~ ms) { $cmd = $1; }" | |
428 | .Ve | |
429 | .PP | |
430 | See \*(L"extract_quotelike\*(R" for a (partial) solution to this problem. | |
431 | .ie n .Sh """extract_bracketed""" | |
432 | .el .Sh "\f(CWextract_bracketed\fP" | |
433 | .IX Subsection "extract_bracketed" | |
434 | Like \f(CW"extract_delimited"\fR, the \f(CW\*(C`extract_bracketed\*(C'\fR function takes | |
435 | up to three optional scalar arguments: a string to extract from, a delimiter | |
436 | specifier, and a prefix pattern. As before, a missing prefix defaults to | |
437 | optional whitespace and a missing text defaults to \f(CW$_\fR. However, a missing | |
438 | delimiter specifier defaults to \f(CW'{}()[]<>'\fR (see below). | |
439 | .PP | |
440 | \&\f(CW\*(C`extract_bracketed\*(C'\fR extracts a balanced-bracket-delimited | |
441 | substring (using any one (or more) of the user-specified delimiter | |
442 | brackets: '(..)', '{..}', '[..]', or '<..>'). Optionally it will also | |
443 | respect quoted unbalanced brackets (see below). | |
444 | .PP | |
445 | A \*(L"delimiter bracket\*(R" is a bracket in list of delimiters passed as | |
446 | \&\f(CW\*(C`extract_bracketed\*(C'\fR's second argument. Delimiter brackets are | |
447 | specified by giving either the left or right (or both!) versions | |
448 | of the required bracket(s). Note that the order in which | |
449 | two or more delimiter brackets are specified is not significant. | |
450 | .PP | |
451 | A \*(L"balanced\-bracket\-delimited substring\*(R" is a substring bounded by | |
452 | matched brackets, such that any other (left or right) delimiter | |
453 | bracket \fIwithin\fR the substring is also matched by an opposite | |
454 | (right or left) delimiter bracket \fIat the same level of nesting\fR. Any | |
455 | type of bracket not in the delimiter list is treated as an ordinary | |
456 | character. | |
457 | .PP | |
458 | In other words, each type of bracket specified as a delimiter must be | |
459 | balanced and correctly nested within the substring, and any other kind of | |
460 | (\*(L"non\-delimiter\*(R") bracket in the substring is ignored. | |
461 | .PP | |
462 | For example, given the string: | |
463 | .PP | |
464 | .Vb 1 | |
465 | \& $text = "{ an '[irregularly :-(] {} parenthesized >:-)' string }"; | |
466 | .Ve | |
467 | .PP | |
468 | then a call to \f(CW\*(C`extract_bracketed\*(C'\fR in a list context: | |
469 | .PP | |
470 | .Vb 1 | |
471 | \& @result = extract_bracketed( $text, '{}' ); | |
472 | .Ve | |
473 | .PP | |
474 | would return: | |
475 | .PP | |
476 | .Vb 1 | |
477 | \& ( "{ an '[irregularly :-(] {} parenthesized >:-)' string }" , "" , "" ) | |
478 | .Ve | |
479 | .PP | |
480 | since both sets of \f(CW'{..}'\fR brackets are properly nested and evenly balanced. | |
481 | (In a scalar context just the first element of the array would be returned. In | |
482 | a void context, \f(CW$text\fR would be replaced by an empty string.) | |
483 | .PP | |
484 | Likewise the call in: | |
485 | .PP | |
486 | .Vb 1 | |
487 | \& @result = extract_bracketed( $text, '{[' ); | |
488 | .Ve | |
489 | .PP | |
490 | would return the same result, since all sets of both types of specified | |
491 | delimiter brackets are correctly nested and balanced. | |
492 | .PP | |
493 | However, the call in: | |
494 | .PP | |
495 | .Vb 1 | |
496 | \& @result = extract_bracketed( $text, '{([<' ); | |
497 | .Ve | |
498 | .PP | |
499 | would fail, returning: | |
500 | .PP | |
501 | .Vb 1 | |
502 | \& ( undef , "{ an '[irregularly :-(] {} parenthesized >:-)' string }" ); | |
503 | .Ve | |
504 | .PP | |
505 | because the embedded pairs of \f(CW'(..)'\fRs and \f(CW'[..]'\fRs are \*(L"cross\-nested\*(R" and | |
506 | the embedded \f(CW'>'\fR is unbalanced. (In a scalar context, this call would | |
507 | return an empty string. In a void context, \f(CW$text\fR would be unchanged.) | |
508 | .PP | |
509 | Note that the embedded single-quotes in the string don't help in this | |
510 | case, since they have not been specified as acceptable delimiters and are | |
511 | therefore treated as non-delimiter characters (and ignored). | |
512 | .PP | |
513 | However, if a particular species of quote character is included in the | |
514 | delimiter specification, then that type of quote will be correctly handled. | |
515 | for example, if \f(CW$text\fR is: | |
516 | .PP | |
517 | .Vb 1 | |
518 | \& $text = '<A HREF=">>>>">link</A>'; | |
519 | .Ve | |
520 | .PP | |
521 | then | |
522 | .PP | |
523 | .Vb 1 | |
524 | \& @result = extract_bracketed( $text, '<">' ); | |
525 | .Ve | |
526 | .PP | |
527 | returns: | |
528 | .PP | |
529 | .Vb 1 | |
530 | \& ( '<A HREF=">>>>">', 'link</A>', "" ) | |
531 | .Ve | |
532 | .PP | |
533 | as expected. Without the specification of \f(CW\*(C`"\*(C'\fR as an embedded quoter: | |
534 | .PP | |
535 | .Vb 1 | |
536 | \& @result = extract_bracketed( $text, '<>' ); | |
537 | .Ve | |
538 | .PP | |
539 | the result would be: | |
540 | .PP | |
541 | .Vb 1 | |
542 | \& ( '<A HREF=">', '>>>">link</A>', "" ) | |
543 | .Ve | |
544 | .PP | |
545 | In addition to the quote delimiters \f(CW\*(C`'\*(C'\fR, \f(CW\*(C`"\*(C'\fR, and \f(CW\*(C``\*(C'\fR, full Perl quote-like | |
546 | quoting (i.e. q{string}, qq{string}, etc) can be specified by including the | |
547 | letter 'q' as a delimiter. Hence: | |
548 | .PP | |
549 | .Vb 1 | |
550 | \& @result = extract_bracketed( $text, '<q>' ); | |
551 | .Ve | |
552 | .PP | |
553 | would correctly match something like this: | |
554 | .PP | |
555 | .Vb 1 | |
556 | \& $text = '<leftop: conj /and/ conj>'; | |
557 | .Ve | |
558 | .PP | |
559 | See also: \f(CW"extract_quotelike"\fR and \f(CW"extract_codeblock"\fR. | |
560 | .ie n .Sh """extract_variable""" | |
561 | .el .Sh "\f(CWextract_variable\fP" | |
562 | .IX Subsection "extract_variable" | |
563 | \&\f(CW\*(C`extract_variable\*(C'\fR extracts any valid Perl variable or | |
564 | variable-involved expression, including scalars, arrays, hashes, array | |
565 | accesses, hash look\-ups, method calls through objects, subroutine calles | |
566 | through subroutine references, etc. | |
567 | .PP | |
568 | The subroutine takes up to two optional arguments: | |
569 | .IP "1." 4 | |
570 | A string to be processed (\f(CW$_\fR if the string is omitted or \f(CW\*(C`undef\*(C'\fR) | |
571 | .IP "2." 4 | |
572 | A string specifying a pattern to be matched as a prefix (which is to be | |
573 | skipped). If omitted, optional whitespace is skipped. | |
574 | .PP | |
575 | On success in a list context, an array of 3 elements is returned. The | |
576 | elements are: | |
577 | .IP "[0]" 4 | |
578 | .IX Item "[0]" | |
579 | the extracted variable, or variablish expression | |
580 | .IP "[1]" 4 | |
581 | .IX Item "[1]" | |
582 | the remainder of the input text, | |
583 | .IP "[2]" 4 | |
584 | .IX Item "[2]" | |
585 | the prefix substring (if any), | |
586 | .PP | |
587 | On failure, all of these values (except the remaining text) are \f(CW\*(C`undef\*(C'\fR. | |
588 | .PP | |
589 | In a scalar context, \f(CW\*(C`extract_variable\*(C'\fR returns just the complete | |
590 | substring that matched a variablish expression. \f(CW\*(C`undef\*(C'\fR is returned on | |
591 | failure. In addition, the original input text has the returned substring | |
592 | (and any prefix) removed from it. | |
593 | .PP | |
594 | In a void context, the input text just has the matched substring (and | |
595 | any specified prefix) removed. | |
596 | .ie n .Sh """extract_tagged""" | |
597 | .el .Sh "\f(CWextract_tagged\fP" | |
598 | .IX Subsection "extract_tagged" | |
599 | \&\f(CW\*(C`extract_tagged\*(C'\fR extracts and segments text between (balanced) | |
600 | specified tags. | |
601 | .PP | |
602 | The subroutine takes up to five optional arguments: | |
603 | .IP "1." 4 | |
604 | A string to be processed (\f(CW$_\fR if the string is omitted or \f(CW\*(C`undef\*(C'\fR) | |
605 | .IP "2." 4 | |
606 | A string specifying a pattern to be matched as the opening tag. | |
607 | If the pattern string is omitted (or \f(CW\*(C`undef\*(C'\fR) then a pattern | |
608 | that matches any standard \s-1XML\s0 tag is used. | |
609 | .IP "3." 4 | |
610 | A string specifying a pattern to be matched at the closing tag. | |
611 | If the pattern string is omitted (or \f(CW\*(C`undef\*(C'\fR) then the closing | |
612 | tag is constructed by inserting a \f(CW\*(C`/\*(C'\fR after any leading bracket | |
613 | characters in the actual opening tag that was matched (\fInot\fR the pattern | |
614 | that matched the tag). For example, if the opening tag pattern | |
615 | is specified as \f(CW'{{\ew+}}'\fR and actually matched the opening tag | |
616 | \&\f(CW"{{DATA}}"\fR, then the constructed closing tag would be \f(CW"{{/DATA}}"\fR. | |
617 | .IP "4." 4 | |
618 | A string specifying a pattern to be matched as a prefix (which is to be | |
619 | skipped). If omitted, optional whitespace is skipped. | |
620 | .IP "5." 4 | |
621 | A hash reference containing various parsing options (see below) | |
622 | .PP | |
623 | The various options that can be specified are: | |
624 | .ie n .IP """reject => $listref""" 4 | |
625 | .el .IP "\f(CWreject => $listref\fR" 4 | |
626 | .IX Item "reject => $listref" | |
627 | The list reference contains one or more strings specifying patterns | |
628 | that must \fInot\fR appear within the tagged text. | |
629 | .Sp | |
630 | For example, to extract | |
631 | an \s-1HTML\s0 link (which should not contain nested links) use: | |
632 | .Sp | |
633 | .Vb 1 | |
634 | \& extract_tagged($text, '<A>', '</A>', undef, {reject => ['<A>']} ); | |
635 | .Ve | |
636 | .ie n .IP """ignore => $listref""" 4 | |
637 | .el .IP "\f(CWignore => $listref\fR" 4 | |
638 | .IX Item "ignore => $listref" | |
639 | The list reference contains one or more strings specifying patterns | |
640 | that are \fInot\fR be be treated as nested tags within the tagged text | |
641 | (even if they would match the start tag pattern). | |
642 | .Sp | |
643 | For example, to extract an arbitrary \s-1XML\s0 tag, but ignore \*(L"empty\*(R" elements: | |
644 | .Sp | |
645 | .Vb 1 | |
646 | \& extract_tagged($text, undef, undef, undef, {ignore => ['<[^>]*/>']} ); | |
647 | .Ve | |
648 | .Sp | |
649 | (also see \*(L"gen_delimited_pat\*(R" below). | |
650 | .ie n .IP """fail => $str""" 4 | |
651 | .el .IP "\f(CWfail => $str\fR" 4 | |
652 | .IX Item "fail => $str" | |
653 | The \f(CW\*(C`fail\*(C'\fR option indicates the action to be taken if a matching end | |
654 | tag is not encountered (i.e. before the end of the string or some | |
655 | \&\f(CW\*(C`reject\*(C'\fR pattern matches). By default, a failure to match a closing | |
656 | tag causes \f(CW\*(C`extract_tagged\*(C'\fR to immediately fail. | |
657 | .Sp | |
658 | However, if the string value associated with <reject> is \*(L"\s-1MAX\s0\*(R", then | |
659 | \&\f(CW\*(C`extract_tagged\*(C'\fR returns the complete text up to the point of failure. | |
660 | If the string is \*(L"\s-1PARA\s0\*(R", \f(CW\*(C`extract_tagged\*(C'\fR returns only the first paragraph | |
661 | after the tag (up to the first line that is either empty or contains | |
662 | only whitespace characters). | |
663 | If the string is "", the the default behaviour (i.e. failure) is reinstated. | |
664 | .Sp | |
665 | For example, suppose the start tag \*(L"/para\*(R" introduces a paragraph, which then | |
666 | continues until the next \*(L"/endpara\*(R" tag or until another \*(L"/para\*(R" tag is | |
667 | encountered: | |
668 | .Sp | |
669 | .Vb 1 | |
670 | \& $text = "/para line 1\en\enline 3\en/para line 4"; | |
671 | .Ve | |
672 | .Sp | |
673 | .Vb 2 | |
674 | \& extract_tagged($text, '/para', '/endpara', undef, | |
675 | \& {reject => '/para', fail => MAX ); | |
676 | .Ve | |
677 | .Sp | |
678 | .Vb 1 | |
679 | \& # EXTRACTED: "/para line 1\en\enline 3\en" | |
680 | .Ve | |
681 | .Sp | |
682 | Suppose instead, that if no matching \*(L"/endpara\*(R" tag is found, the \*(L"/para\*(R" | |
683 | tag refers only to the immediately following paragraph: | |
684 | .Sp | |
685 | .Vb 1 | |
686 | \& $text = "/para line 1\en\enline 3\en/para line 4"; | |
687 | .Ve | |
688 | .Sp | |
689 | .Vb 2 | |
690 | \& extract_tagged($text, '/para', '/endpara', undef, | |
691 | \& {reject => '/para', fail => MAX ); | |
692 | .Ve | |
693 | .Sp | |
694 | .Vb 1 | |
695 | \& # EXTRACTED: "/para line 1\en" | |
696 | .Ve | |
697 | .Sp | |
698 | Note that the specified \f(CW\*(C`fail\*(C'\fR behaviour applies to nested tags as well. | |
699 | .PP | |
700 | On success in a list context, an array of 6 elements is returned. The elements are: | |
701 | .IP "[0]" 4 | |
702 | .IX Item "[0]" | |
703 | the extracted tagged substring (including the outermost tags), | |
704 | .IP "[1]" 4 | |
705 | .IX Item "[1]" | |
706 | the remainder of the input text, | |
707 | .IP "[2]" 4 | |
708 | .IX Item "[2]" | |
709 | the prefix substring (if any), | |
710 | .IP "[3]" 4 | |
711 | .IX Item "[3]" | |
712 | the opening tag | |
713 | .IP "[4]" 4 | |
714 | .IX Item "[4]" | |
715 | the text between the opening and closing tags | |
716 | .IP "[5]" 4 | |
717 | .IX Item "[5]" | |
718 | the closing tag (or "" if no closing tag was found) | |
719 | .PP | |
720 | On failure, all of these values (except the remaining text) are \f(CW\*(C`undef\*(C'\fR. | |
721 | .PP | |
722 | In a scalar context, \f(CW\*(C`extract_tagged\*(C'\fR returns just the complete | |
723 | substring that matched a tagged text (including the start and end | |
724 | tags). \f(CW\*(C`undef\*(C'\fR is returned on failure. In addition, the original input | |
725 | text has the returned substring (and any prefix) removed from it. | |
726 | .PP | |
727 | In a void context, the input text just has the matched substring (and | |
728 | any specified prefix) removed. | |
729 | .ie n .Sh """gen_extract_tagged""" | |
730 | .el .Sh "\f(CWgen_extract_tagged\fP" | |
731 | .IX Subsection "gen_extract_tagged" | |
732 | (Note: This subroutine is only available under Perl5.005) | |
733 | .PP | |
734 | \&\f(CW\*(C`gen_extract_tagged\*(C'\fR generates a new anonymous subroutine which | |
735 | extracts text between (balanced) specified tags. In other words, | |
736 | it generates a function identical in function to \f(CW\*(C`extract_tagged\*(C'\fR. | |
737 | .PP | |
738 | The difference between \f(CW\*(C`extract_tagged\*(C'\fR and the anonymous | |
739 | subroutines generated by | |
740 | \&\f(CW\*(C`gen_extract_tagged\*(C'\fR, is that those generated subroutines: | |
741 | .IP "\(bu" 4 | |
742 | do not have to reparse tag specification or parsing options every time | |
743 | they are called (whereas \f(CW\*(C`extract_tagged\*(C'\fR has to effectively rebuild | |
744 | its tag parser on every call); | |
745 | .IP "\(bu" 4 | |
746 | make use of the new qr// construct to pre-compile the regexes they use | |
747 | (whereas \f(CW\*(C`extract_tagged\*(C'\fR uses standard string variable interpolation | |
748 | to create tag-matching patterns). | |
749 | .PP | |
750 | The subroutine takes up to four optional arguments (the same set as | |
751 | \&\f(CW\*(C`extract_tagged\*(C'\fR except for the string to be processed). It returns | |
752 | a reference to a subroutine which in turn takes a single argument (the text to | |
753 | be extracted from). | |
754 | .PP | |
755 | In other words, the implementation of \f(CW\*(C`extract_tagged\*(C'\fR is exactly | |
756 | equivalent to: | |
757 | .PP | |
758 | .Vb 6 | |
759 | \& sub extract_tagged | |
760 | \& { | |
761 | \& my $text = shift; | |
762 | \& $extractor = gen_extract_tagged(@_); | |
763 | \& return $extractor->($text); | |
764 | \& } | |
765 | .Ve | |
766 | .PP | |
767 | (although \f(CW\*(C`extract_tagged\*(C'\fR is not currently implemented that way, in order | |
768 | to preserve pre\-5.005 compatibility). | |
769 | .PP | |
770 | Using \f(CW\*(C`gen_extract_tagged\*(C'\fR to create extraction functions for specific tags | |
771 | is a good idea if those functions are going to be called more than once, since | |
772 | their performance is typically twice as good as the more general-purpose | |
773 | \&\f(CW\*(C`extract_tagged\*(C'\fR. | |
774 | .ie n .Sh """extract_quotelike""" | |
775 | .el .Sh "\f(CWextract_quotelike\fP" | |
776 | .IX Subsection "extract_quotelike" | |
777 | \&\f(CW\*(C`extract_quotelike\*(C'\fR attempts to recognize, extract, and segment any | |
778 | one of the various Perl quotes and quotelike operators (see | |
779 | \&\fIperlop\fR\|(3)) Nested backslashed delimiters, embedded balanced bracket | |
780 | delimiters (for the quotelike operators), and trailing modifiers are | |
781 | all caught. For example, in: | |
782 | .PP | |
783 | .Vb 1 | |
784 | \& extract_quotelike 'q # an octothorpe: \e# (not the end of the q!) #' | |
785 | .Ve | |
786 | .PP | |
787 | .Vb 1 | |
788 | \& extract_quotelike ' "You said, \e"Use sed\e"." ' | |
789 | .Ve | |
790 | .PP | |
791 | .Vb 1 | |
792 | \& extract_quotelike ' s{([A-Z]{1,8}\e.[A-Z]{3})} /\eL$1\eE/; ' | |
793 | .Ve | |
794 | .PP | |
795 | .Vb 1 | |
796 | \& extract_quotelike ' tr/\e\e\e/\e\e\e\e/\e\e\e//ds; ' | |
797 | .Ve | |
798 | .PP | |
799 | the full Perl quotelike operations are all extracted correctly. | |
800 | .PP | |
801 | Note too that, when using the /x modifier on a regex, any comment | |
802 | containing the current pattern delimiter will cause the regex to be | |
803 | immediately terminated. In other words: | |
804 | .PP | |
805 | .Vb 5 | |
806 | \& 'm / | |
807 | \& (?i) # CASE INSENSITIVE | |
808 | \& [a-z_] # LEADING ALPHABETIC/UNDERSCORE | |
809 | \& [a-z0-9]* # FOLLOWED BY ANY NUMBER OF ALPHANUMERICS | |
810 | \& /x' | |
811 | .Ve | |
812 | .PP | |
813 | will be extracted as if it were: | |
814 | .PP | |
815 | .Vb 3 | |
816 | \& 'm / | |
817 | \& (?i) # CASE INSENSITIVE | |
818 | \& [a-z_] # LEADING ALPHABETIC/' | |
819 | .Ve | |
820 | .PP | |
821 | This behaviour is identical to that of the actual compiler. | |
822 | .PP | |
823 | \&\f(CW\*(C`extract_quotelike\*(C'\fR takes two arguments: the text to be processed and | |
824 | a prefix to be matched at the very beginning of the text. If no prefix | |
825 | is specified, optional whitespace is the default. If no text is given, | |
826 | \&\f(CW$_\fR is used. | |
827 | .PP | |
828 | In a list context, an array of 11 elements is returned. The elements are: | |
829 | .IP "[0]" 4 | |
830 | .IX Item "[0]" | |
831 | the extracted quotelike substring (including trailing modifiers), | |
832 | .IP "[1]" 4 | |
833 | .IX Item "[1]" | |
834 | the remainder of the input text, | |
835 | .IP "[2]" 4 | |
836 | .IX Item "[2]" | |
837 | the prefix substring (if any), | |
838 | .IP "[3]" 4 | |
839 | .IX Item "[3]" | |
840 | the name of the quotelike operator (if any), | |
841 | .IP "[4]" 4 | |
842 | .IX Item "[4]" | |
843 | the left delimiter of the first block of the operation, | |
844 | .IP "[5]" 4 | |
845 | .IX Item "[5]" | |
846 | the text of the first block of the operation | |
847 | (that is, the contents of | |
848 | a quote, the regex of a match or substitution or the target list of a | |
849 | translation), | |
850 | .IP "[6]" 4 | |
851 | .IX Item "[6]" | |
852 | the right delimiter of the first block of the operation, | |
853 | .IP "[7]" 4 | |
854 | .IX Item "[7]" | |
855 | the left delimiter of the second block of the operation | |
856 | (that is, if it is a \f(CW\*(C`s\*(C'\fR, \f(CW\*(C`tr\*(C'\fR, or \f(CW\*(C`y\*(C'\fR), | |
857 | .IP "[8]" 4 | |
858 | .IX Item "[8]" | |
859 | the text of the second block of the operation | |
860 | (that is, the replacement of a substitution or the translation list | |
861 | of a translation), | |
862 | .IP "[9]" 4 | |
863 | .IX Item "[9]" | |
864 | the right delimiter of the second block of the operation (if any), | |
865 | .IP "[10]" 4 | |
866 | .IX Item "[10]" | |
867 | the trailing modifiers on the operation (if any). | |
868 | .PP | |
869 | For each of the fields marked \*(L"(if any)\*(R" the default value on success is | |
870 | an empty string. | |
871 | On failure, all of these values (except the remaining text) are \f(CW\*(C`undef\*(C'\fR. | |
872 | .PP | |
873 | In a scalar context, \f(CW\*(C`extract_quotelike\*(C'\fR returns just the complete substring | |
874 | that matched a quotelike operation (or \f(CW\*(C`undef\*(C'\fR on failure). In a scalar or | |
875 | void context, the input text has the same substring (and any specified | |
876 | prefix) removed. | |
877 | .PP | |
878 | Examples: | |
879 | .PP | |
880 | .Vb 1 | |
881 | \& # Remove the first quotelike literal that appears in text | |
882 | .Ve | |
883 | .PP | |
884 | .Vb 1 | |
885 | \& $quotelike = extract_quotelike($text,'.*?'); | |
886 | .Ve | |
887 | .PP | |
888 | .Vb 2 | |
889 | \& # Replace one or more leading whitespace-separated quotelike | |
890 | \& # literals in $_ with "<QLL>" | |
891 | .Ve | |
892 | .PP | |
893 | .Vb 1 | |
894 | \& do { $_ = join '<QLL>', (extract_quotelike)[2,1] } until $@; | |
895 | .Ve | |
896 | .PP | |
897 | .Vb 1 | |
898 | \& # Isolate the search pattern in a quotelike operation from $text | |
899 | .Ve | |
900 | .PP | |
901 | .Vb 9 | |
902 | \& ($op,$pat) = (extract_quotelike $text)[3,5]; | |
903 | \& if ($op =~ /[ms]/) | |
904 | \& { | |
905 | \& print "search pattern: $pat\en"; | |
906 | \& } | |
907 | \& else | |
908 | \& { | |
909 | \& print "$op is not a pattern matching operation\en"; | |
910 | \& } | |
911 | .Ve | |
912 | .ie n .Sh """extract_quotelike"" and ""here documents""" | |
913 | .el .Sh "\f(CWextract_quotelike\fP and ``here documents''" | |
914 | .IX Subsection "extract_quotelike and here documents" | |
915 | \&\f(CW\*(C`extract_quotelike\*(C'\fR can successfully extract \*(L"here documents\*(R" from an input | |
916 | string, but with an important caveat in list contexts. | |
917 | .PP | |
918 | Unlike other types of quote-like literals, a here document is rarely | |
919 | a contiguous substring. For example, a typical piece of code using | |
920 | here document might look like this: | |
921 | .PP | |
922 | .Vb 4 | |
923 | \& <<'EOMSG' || die; | |
924 | \& This is the message. | |
925 | \& EOMSG | |
926 | \& exit; | |
927 | .Ve | |
928 | .PP | |
929 | Given this as an input string in a scalar context, \f(CW\*(C`extract_quotelike\*(C'\fR | |
930 | would correctly return the string \*(L"<<'\s-1EOMSG\s0'\enThis is the message.\enEOMSG\*(R", | |
931 | leaving the string \*(L" || die;\enexit;\*(R" in the original variable. In other words, | |
932 | the two separate pieces of the here document are successfully extracted and | |
933 | concatenated. | |
934 | .PP | |
935 | In a list context, \f(CW\*(C`extract_quotelike\*(C'\fR would return the list | |
936 | .IP "[0]" 4 | |
937 | .IX Item "[0]" | |
938 | \&\*(L"<<'\s-1EOMSG\s0'\enThis is the message.\enEOMSG\en\*(R" (i.e. the full extracted here document, | |
939 | including fore and aft delimiters), | |
940 | .IP "[1]" 4 | |
941 | .IX Item "[1]" | |
942 | \&\*(L" || die;\enexit;\*(R" (i.e. the remainder of the input text, concatenated), | |
943 | .IP "[2]" 4 | |
944 | .IX Item "[2]" | |
945 | "" (i.e. the prefix substring \*(-- trivial in this case), | |
946 | .IP "[3]" 4 | |
947 | .IX Item "[3]" | |
948 | \&\*(L"<<\*(R" (i.e. the \*(L"name\*(R" of the quotelike operator) | |
949 | .IP "[4]" 4 | |
950 | .IX Item "[4]" | |
951 | \&\*(L"'\s-1EOMSG\s0'\*(R" (i.e. the left delimiter of the here document, including any quotes), | |
952 | .IP "[5]" 4 | |
953 | .IX Item "[5]" | |
954 | \&\*(L"This is the message.\en\*(R" (i.e. the text of the here document), | |
955 | .IP "[6]" 4 | |
956 | .IX Item "[6]" | |
957 | \&\*(L"\s-1EOMSG\s0\*(R" (i.e. the right delimiter of the here document), | |
958 | .IP "[7..10]" 4 | |
959 | .IX Item "[7..10]" | |
960 | "" (a here document has no second left delimiter, second text, second right | |
961 | delimiter, or trailing modifiers). | |
962 | .PP | |
963 | However, the matching position of the input variable would be set to | |
964 | \&\*(L"exit;\*(R" (i.e. \fIafter\fR the closing delimiter of the here document), | |
965 | which would cause the earlier \*(L" || die;\enexit;\*(R" to be skipped in any | |
966 | sequence of code fragment extractions. | |
967 | .PP | |
968 | To avoid this problem, when it encounters a here document whilst | |
969 | extracting from a modifiable string, \f(CW\*(C`extract_quotelike\*(C'\fR silently | |
970 | rearranges the string to an equivalent piece of Perl: | |
971 | .PP | |
972 | .Vb 5 | |
973 | \& <<'EOMSG' | |
974 | \& This is the message. | |
975 | \& EOMSG | |
976 | \& || die; | |
977 | \& exit; | |
978 | .Ve | |
979 | .PP | |
980 | in which the here document \fIis\fR contiguous. It still leaves the | |
981 | matching position after the here document, but now the rest of the line | |
982 | on which the here document starts is not skipped. | |
983 | .PP | |
984 | To prevent <extract_quotelike> from mucking about with the input in this way | |
985 | (this is the only case where a list-context \f(CW\*(C`extract_quotelike\*(C'\fR does so), | |
986 | you can pass the input variable as an interpolated literal: | |
987 | .PP | |
988 | .Vb 1 | |
989 | \& $quotelike = extract_quotelike("$var"); | |
990 | .Ve | |
991 | .ie n .Sh """extract_codeblock""" | |
992 | .el .Sh "\f(CWextract_codeblock\fP" | |
993 | .IX Subsection "extract_codeblock" | |
994 | \&\f(CW\*(C`extract_codeblock\*(C'\fR attempts to recognize and extract a balanced | |
995 | bracket delimited substring that may contain unbalanced brackets | |
996 | inside Perl quotes or quotelike operations. That is, \f(CW\*(C`extract_codeblock\*(C'\fR | |
997 | is like a combination of \f(CW"extract_bracketed"\fR and | |
998 | \&\f(CW"extract_quotelike"\fR. | |
999 | .PP | |
1000 | \&\f(CW\*(C`extract_codeblock\*(C'\fR takes the same initial three parameters as \f(CW\*(C`extract_bracketed\*(C'\fR: | |
1001 | a text to process, a set of delimiter brackets to look for, and a prefix to | |
1002 | match first. It also takes an optional fourth parameter, which allows the | |
1003 | outermost delimiter brackets to be specified separately (see below). | |
1004 | .PP | |
1005 | Omitting the first argument (input text) means process \f(CW$_\fR instead. | |
1006 | Omitting the second argument (delimiter brackets) indicates that only \f(CW'{'\fR is to be used. | |
1007 | Omitting the third argument (prefix argument) implies optional whitespace at the start. | |
1008 | Omitting the fourth argument (outermost delimiter brackets) indicates that the | |
1009 | value of the second argument is to be used for the outermost delimiters. | |
1010 | .PP | |
1011 | Once the prefix an dthe outermost opening delimiter bracket have been | |
1012 | recognized, code blocks are extracted by stepping through the input text and | |
1013 | trying the following alternatives in sequence: | |
1014 | .IP "1." 4 | |
1015 | Try and match a closing delimiter bracket. If the bracket was the same | |
1016 | species as the last opening bracket, return the substring to that | |
1017 | point. If the bracket was mismatched, return an error. | |
1018 | .IP "2." 4 | |
1019 | Try to match a quote or quotelike operator. If found, call | |
1020 | \&\f(CW\*(C`extract_quotelike\*(C'\fR to eat it. If \f(CW\*(C`extract_quotelike\*(C'\fR fails, return | |
1021 | the error it returned. Otherwise go back to step 1. | |
1022 | .IP "3." 4 | |
1023 | Try to match an opening delimiter bracket. If found, call | |
1024 | \&\f(CW\*(C`extract_codeblock\*(C'\fR recursively to eat the embedded block. If the | |
1025 | recursive call fails, return an error. Otherwise, go back to step 1. | |
1026 | .IP "4." 4 | |
1027 | Unconditionally match a bareword or any other single character, and | |
1028 | then go back to step 1. | |
1029 | .PP | |
1030 | Examples: | |
1031 | .PP | |
1032 | .Vb 1 | |
1033 | \& # Find a while loop in the text | |
1034 | .Ve | |
1035 | .PP | |
1036 | .Vb 4 | |
1037 | \& if ($text =~ s/.*?while\es*\e{/{/) | |
1038 | \& { | |
1039 | \& $loop = "while " . extract_codeblock($text); | |
1040 | \& } | |
1041 | .Ve | |
1042 | .PP | |
1043 | .Vb 2 | |
1044 | \& # Remove the first round-bracketed list (which may include | |
1045 | \& # round- or curly-bracketed code blocks or quotelike operators) | |
1046 | .Ve | |
1047 | .PP | |
1048 | .Vb 1 | |
1049 | \& extract_codeblock $text, "(){}", '[^(]*'; | |
1050 | .Ve | |
1051 | .PP | |
1052 | The ability to specify a different outermost delimiter bracket is useful | |
1053 | in some circumstances. For example, in the Parse::RecDescent module, | |
1054 | parser actions which are to be performed only on a successful parse | |
1055 | are specified using a \f(CW\*(C`<defer:...>\*(C'\fR directive. For example: | |
1056 | .PP | |
1057 | .Vb 2 | |
1058 | \& sentence: subject verb object | |
1059 | \& <defer: {$::theVerb = $item{verb}} > | |
1060 | .Ve | |
1061 | .PP | |
1062 | Parse::RecDescent uses \f(CW\*(C`extract_codeblock($text, '{}<>')\*(C'\fR to extract the code | |
1063 | within the \f(CW\*(C`<defer:...>\*(C'\fR directive, but there's a problem. | |
1064 | .PP | |
1065 | A deferred action like this: | |
1066 | .PP | |
1067 | .Vb 1 | |
1068 | \& <defer: {if ($count>10) {$count--}} > | |
1069 | .Ve | |
1070 | .PP | |
1071 | will be incorrectly parsed as: | |
1072 | .PP | |
1073 | .Vb 1 | |
1074 | \& <defer: {if ($count> | |
1075 | .Ve | |
1076 | .PP | |
1077 | because the \*(L"less than\*(R" operator is interpreted as a closing delimiter. | |
1078 | .PP | |
1079 | But, by extracting the directive using | |
1080 | \&\f(CW\*(C`extract_codeblock($text,\ '{}',\ undef,\ '<>')\*(C'\fR | |
1081 | the '>' character is only treated as a delimited at the outermost | |
1082 | level of the code block, so the directive is parsed correctly. | |
1083 | .ie n .Sh """extract_multiple""" | |
1084 | .el .Sh "\f(CWextract_multiple\fP" | |
1085 | .IX Subsection "extract_multiple" | |
1086 | The \f(CW\*(C`extract_multiple\*(C'\fR subroutine takes a string to be processed and a | |
1087 | list of extractors (subroutines or regular expressions) to apply to that string. | |
1088 | .PP | |
1089 | In an array context \f(CW\*(C`extract_multiple\*(C'\fR returns an array of substrings | |
1090 | of the original string, as extracted by the specified extractors. | |
1091 | In a scalar context, \f(CW\*(C`extract_multiple\*(C'\fR returns the first | |
1092 | substring successfully extracted from the original string. In both | |
1093 | scalar and void contexts the original string has the first successfully | |
1094 | extracted substring removed from it. In all contexts | |
1095 | \&\f(CW\*(C`extract_multiple\*(C'\fR starts at the current \f(CW\*(C`pos\*(C'\fR of the string, and | |
1096 | sets that \f(CW\*(C`pos\*(C'\fR appropriately after it matches. | |
1097 | .PP | |
1098 | Hence, the aim of of a call to \f(CW\*(C`extract_multiple\*(C'\fR in a list context | |
1099 | is to split the processed string into as many non-overlapping fields as | |
1100 | possible, by repeatedly applying each of the specified extractors | |
1101 | to the remainder of the string. Thus \f(CW\*(C`extract_multiple\*(C'\fR is | |
1102 | a generalized form of Perl's \f(CW\*(C`split\*(C'\fR subroutine. | |
1103 | .PP | |
1104 | The subroutine takes up to four optional arguments: | |
1105 | .IP "1." 4 | |
1106 | A string to be processed (\f(CW$_\fR if the string is omitted or \f(CW\*(C`undef\*(C'\fR) | |
1107 | .IP "2." 4 | |
1108 | A reference to a list of subroutine references and/or qr// objects and/or | |
1109 | literal strings and/or hash references, specifying the extractors | |
1110 | to be used to split the string. If this argument is omitted (or | |
1111 | \&\f(CW\*(C`undef\*(C'\fR) the list: | |
1112 | .Sp | |
1113 | .Vb 5 | |
1114 | \& [ | |
1115 | \& sub { extract_variable($_[0], '') }, | |
1116 | \& sub { extract_quotelike($_[0],'') }, | |
1117 | \& sub { extract_codeblock($_[0],'{}','') }, | |
1118 | \& ] | |
1119 | .Ve | |
1120 | .Sp | |
1121 | is used. | |
1122 | .IP "3." 4 | |
1123 | An number specifying the maximum number of fields to return. If this | |
1124 | argument is omitted (or \f(CW\*(C`undef\*(C'\fR), split continues as long as possible. | |
1125 | .Sp | |
1126 | If the third argument is \fIN\fR, then extraction continues until \fIN\fR fields | |
1127 | have been successfully extracted, or until the string has been completely | |
1128 | processed. | |
1129 | .Sp | |
1130 | Note that in scalar and void contexts the value of this argument is | |
1131 | automatically reset to 1 (under \f(CW\*(C`\-w\*(C'\fR, a warning is issued if the argument | |
1132 | has to be reset). | |
1133 | .IP "4." 4 | |
1134 | A value indicating whether unmatched substrings (see below) within the | |
1135 | text should be skipped or returned as fields. If the value is true, | |
1136 | such substrings are skipped. Otherwise, they are returned. | |
1137 | .PP | |
1138 | The extraction process works by applying each extractor in | |
1139 | sequence to the text string. | |
1140 | .PP | |
1141 | If the extractor is a subroutine it is called in a list context and is | |
1142 | expected to return a list of a single element, namely the extracted | |
1143 | text. It may optionally also return two further arguments: a string | |
1144 | representing the text left after extraction (like $' for a pattern | |
1145 | match), and a string representing any prefix skipped before the | |
1146 | extraction (like $` in a pattern match). Note that this is designed | |
1147 | to facilitate the use of other Text::Balanced subroutines with | |
1148 | \&\f(CW\*(C`extract_multiple\*(C'\fR. Note too that the value returned by an extractor | |
1149 | subroutine need not bear any relationship to the corresponding substring | |
1150 | of the original text (see examples below). | |
1151 | .PP | |
1152 | If the extractor is a precompiled regular expression or a string, | |
1153 | it is matched against the text in a scalar context with a leading | |
1154 | \&'\eG' and the gc modifiers enabled. The extracted value is either | |
1155 | \&\f(CW$1\fR if that variable is defined after the match, or else the | |
1156 | complete match (i.e. $&). | |
1157 | .PP | |
1158 | If the extractor is a hash reference, it must contain exactly one element. | |
1159 | The value of that element is one of the | |
1160 | above extractor types (subroutine reference, regular expression, or string). | |
1161 | The key of that element is the name of a class into which the successful | |
1162 | return value of the extractor will be blessed. | |
1163 | .PP | |
1164 | If an extractor returns a defined value, that value is immediately | |
1165 | treated as the next extracted field and pushed onto the list of fields. | |
1166 | If the extractor was specified in a hash reference, the field is also | |
1167 | blessed into the appropriate class, | |
1168 | .PP | |
1169 | If the extractor fails to match (in the case of a regex extractor), or returns an empty list or an undefined value (in the case of a subroutine extractor), it is | |
1170 | assumed to have failed to extract. | |
1171 | If none of the extractor subroutines succeeds, then one | |
1172 | character is extracted from the start of the text and the extraction | |
1173 | subroutines reapplied. Characters which are thus removed are accumulated and | |
1174 | eventually become the next field (unless the fourth argument is true, in which | |
1175 | case they are disgarded). | |
1176 | .PP | |
1177 | For example, the following extracts substrings that are valid Perl variables: | |
1178 | .PP | |
1179 | .Vb 3 | |
1180 | \& @fields = extract_multiple($text, | |
1181 | \& [ sub { extract_variable($_[0]) } ], | |
1182 | \& undef, 1); | |
1183 | .Ve | |
1184 | .PP | |
1185 | This example separates a text into fields which are quote delimited, | |
1186 | curly bracketed, and anything else. The delimited and bracketed | |
1187 | parts are also blessed to identify them (the \*(L"anything else\*(R" is unblessed): | |
1188 | .PP | |
1189 | .Vb 5 | |
1190 | \& @fields = extract_multiple($text, | |
1191 | \& [ | |
1192 | \& { Delim => sub { extract_delimited($_[0],q{'"}) } }, | |
1193 | \& { Brack => sub { extract_bracketed($_[0],'{}') } }, | |
1194 | \& ]); | |
1195 | .Ve | |
1196 | .PP | |
1197 | This call extracts the next single substring that is a valid Perl quotelike | |
1198 | operator (and removes it from \f(CW$text\fR): | |
1199 | .PP | |
1200 | .Vb 4 | |
1201 | \& $quotelike = extract_multiple($text, | |
1202 | \& [ | |
1203 | \& sub { extract_quotelike($_[0]) }, | |
1204 | \& ], undef, 1); | |
1205 | .Ve | |
1206 | .PP | |
1207 | Finally, here is yet another way to do comma-separated value parsing: | |
1208 | .PP | |
1209 | .Vb 6 | |
1210 | \& @fields = extract_multiple($csv_text, | |
1211 | \& [ | |
1212 | \& sub { extract_delimited($_[0],q{'"}) }, | |
1213 | \& qr/([^,]+)(.*)/, | |
1214 | \& ], | |
1215 | \& undef,1); | |
1216 | .Ve | |
1217 | .PP | |
1218 | The list in the second argument means: | |
1219 | \&\fI\*(L"Try and extract a ' or \*(R" delimited string, otherwise extract anything up to a comma..."\fR. | |
1220 | The undef third argument means: | |
1221 | \&\fI\*(L"...as many times as possible...\*(R"\fR, | |
1222 | and the true value in the fourth argument means | |
1223 | \&\fI\*(L"...discarding anything else that appears (i.e. the commas)\*(R"\fR. | |
1224 | .PP | |
1225 | If you wanted the commas preserved as separate fields (i.e. like split | |
1226 | does if your split pattern has capturing parentheses), you would | |
1227 | just make the last parameter undefined (or remove it). | |
1228 | .ie n .Sh """gen_delimited_pat""" | |
1229 | .el .Sh "\f(CWgen_delimited_pat\fP" | |
1230 | .IX Subsection "gen_delimited_pat" | |
1231 | The \f(CW\*(C`gen_delimited_pat\*(C'\fR subroutine takes a single (string) argument and | |
1232 | > builds a Friedl-style optimized regex that matches a string delimited | |
1233 | by any one of the characters in the single argument. For example: | |
1234 | .PP | |
1235 | .Vb 1 | |
1236 | \& gen_delimited_pat(q{'"}) | |
1237 | .Ve | |
1238 | .PP | |
1239 | returns the regex: | |
1240 | .PP | |
1241 | .Vb 1 | |
1242 | \& (?:\e"(?:\e\e\e"|(?!\e").)*\e"|\e'(?:\e\e\e'|(?!\e').)*\e') | |
1243 | .Ve | |
1244 | .PP | |
1245 | Note that the specified delimiters are automatically quotemeta'd. | |
1246 | .PP | |
1247 | A typical use of \f(CW\*(C`gen_delimited_pat\*(C'\fR would be to build special purpose tags | |
1248 | for \f(CW\*(C`extract_tagged\*(C'\fR. For example, to properly ignore \*(L"empty\*(R" \s-1XML\s0 elements | |
1249 | (which might contain quoted strings): | |
1250 | .PP | |
1251 | .Vb 1 | |
1252 | \& my $empty_tag = '<(' . gen_delimited_pat(q{'"}) . '|.)+/>'; | |
1253 | .Ve | |
1254 | .PP | |
1255 | .Vb 1 | |
1256 | \& extract_tagged($text, undef, undef, undef, {ignore => [$empty_tag]} ); | |
1257 | .Ve | |
1258 | .PP | |
1259 | \&\f(CW\*(C`gen_delimited_pat\*(C'\fR may also be called with an optional second argument, | |
1260 | which specifies the \*(L"escape\*(R" character(s) to be used for each delimiter. | |
1261 | For example to match a Pascal-style string (where ' is the delimiter | |
1262 | and '' is a literal ' within the string): | |
1263 | .PP | |
1264 | .Vb 1 | |
1265 | \& gen_delimited_pat(q{'},q{'}); | |
1266 | .Ve | |
1267 | .PP | |
1268 | Different escape characters can be specified for different delimiters. | |
1269 | For example, to specify that '/' is the escape for single quotes | |
1270 | and '%' is the escape for double quotes: | |
1271 | .PP | |
1272 | .Vb 1 | |
1273 | \& gen_delimited_pat(q{'"},q{/%}); | |
1274 | .Ve | |
1275 | .PP | |
1276 | If more delimiters than escape chars are specified, the last escape char | |
1277 | is used for the remaining delimiters. | |
1278 | If no escape char is specified for a given specified delimiter, '\e' is used. | |
1279 | .PP | |
1280 | Note that | |
1281 | \&\f(CW\*(C`gen_delimited_pat\*(C'\fR was previously called | |
1282 | \&\f(CW\*(C`delimited_pat\*(C'\fR. That name may still be used, but is now deprecated. | |
1283 | .SH "DIAGNOSTICS" | |
1284 | .IX Header "DIAGNOSTICS" | |
1285 | In a list context, all the functions return \f(CW\*(C`(undef,$original_text)\*(C'\fR | |
1286 | on failure. In a scalar context, failure is indicated by returning \f(CW\*(C`undef\*(C'\fR | |
1287 | (in this case the input text is not modified in any way). | |
1288 | .PP | |
1289 | In addition, on failure in \fIany\fR context, the \f(CW$@\fR variable is set. | |
1290 | Accessing \f(CW\*(C`$@\->{error}\*(C'\fR returns one of the error diagnostics listed | |
1291 | below. | |
1292 | Accessing \f(CW\*(C`$@\->{pos}\*(C'\fR returns the offset into the original string at | |
1293 | which the error was detected (although not necessarily where it occurred!) | |
1294 | Printing \f(CW$@\fR directly produces the error message, with the offset appended. | |
1295 | On success, the \f(CW$@\fR variable is guaranteed to be \f(CW\*(C`undef\*(C'\fR. | |
1296 | .PP | |
1297 | The available diagnostics are: | |
1298 | .ie n .IP """Did not find a suitable bracket: ""%s""""" 4 | |
1299 | .el .IP "\f(CWDid not find a suitable bracket: ``%s''\fR" 4 | |
1300 | .IX Item "Did not find a suitable bracket: ""%s""" | |
1301 | The delimiter provided to \f(CW\*(C`extract_bracketed\*(C'\fR was not one of | |
1302 | \&\f(CW'()[]<>{}'\fR. | |
1303 | .ie n .IP """Did not find prefix: /%s/""" 4 | |
1304 | .el .IP "\f(CWDid not find prefix: /%s/\fR" 4 | |
1305 | .IX Item "Did not find prefix: /%s/" | |
1306 | A non-optional prefix was specified but wasn't found at the start of the text. | |
1307 | .ie n .IP """Did not find opening bracket after prefix: ""%s""""" 4 | |
1308 | .el .IP "\f(CWDid not find opening bracket after prefix: ``%s''\fR" 4 | |
1309 | .IX Item "Did not find opening bracket after prefix: ""%s""" | |
1310 | \&\f(CW\*(C`extract_bracketed\*(C'\fR or \f(CW\*(C`extract_codeblock\*(C'\fR was expecting a | |
1311 | particular kind of bracket at the start of the text, and didn't find it. | |
1312 | .ie n .IP """No quotelike operator found after prefix: ""%s""""" 4 | |
1313 | .el .IP "\f(CWNo quotelike operator found after prefix: ``%s''\fR" 4 | |
1314 | .IX Item "No quotelike operator found after prefix: ""%s""" | |
1315 | \&\f(CW\*(C`extract_quotelike\*(C'\fR didn't find one of the quotelike operators \f(CW\*(C`q\*(C'\fR, | |
1316 | \&\f(CW\*(C`qq\*(C'\fR, \f(CW\*(C`qw\*(C'\fR, \f(CW\*(C`qx\*(C'\fR, \f(CW\*(C`s\*(C'\fR, \f(CW\*(C`tr\*(C'\fR or \f(CW\*(C`y\*(C'\fR at the start of the substring | |
1317 | it was extracting. | |
1318 | .ie n .IP """Unmatched closing bracket: ""%c""""" 4 | |
1319 | .el .IP "\f(CWUnmatched closing bracket: ``%c''\fR" 4 | |
1320 | .IX Item "Unmatched closing bracket: ""%c""" | |
1321 | \&\f(CW\*(C`extract_bracketed\*(C'\fR, \f(CW\*(C`extract_quotelike\*(C'\fR or \f(CW\*(C`extract_codeblock\*(C'\fR encountered | |
1322 | a closing bracket where none was expected. | |
1323 | .ie n .IP """Unmatched opening bracket(s): ""%s""""" 4 | |
1324 | .el .IP "\f(CWUnmatched opening bracket(s): ``%s''\fR" 4 | |
1325 | .IX Item "Unmatched opening bracket(s): ""%s""" | |
1326 | \&\f(CW\*(C`extract_bracketed\*(C'\fR, \f(CW\*(C`extract_quotelike\*(C'\fR or \f(CW\*(C`extract_codeblock\*(C'\fR ran | |
1327 | out of characters in the text before closing one or more levels of nested | |
1328 | brackets. | |
1329 | .ie n .IP """Unmatched embedded quote (%s)""" 4 | |
1330 | .el .IP "\f(CWUnmatched embedded quote (%s)\fR" 4 | |
1331 | .IX Item "Unmatched embedded quote (%s)" | |
1332 | \&\f(CW\*(C`extract_bracketed\*(C'\fR attempted to match an embedded quoted substring, but | |
1333 | failed to find a closing quote to match it. | |
1334 | .ie n .IP """Did not find closing delimiter to match '%s'""" 4 | |
1335 | .el .IP "\f(CWDid not find closing delimiter to match '%s'\fR" 4 | |
1336 | .IX Item "Did not find closing delimiter to match '%s'" | |
1337 | \&\f(CW\*(C`extract_quotelike\*(C'\fR was unable to find a closing delimiter to match the | |
1338 | one that opened the quote-like operation. | |
1339 | .ie n .IP """Mismatched closing bracket: expected ""%c"" but found ""%s""""" 4 | |
1340 | .el .IP "\f(CWMismatched closing bracket: expected ``%c'' but found ``%s''\fR" 4 | |
1341 | .IX Item "Mismatched closing bracket: expected ""%c"" but found ""%s""" | |
1342 | \&\f(CW\*(C`extract_bracketed\*(C'\fR, \f(CW\*(C`extract_quotelike\*(C'\fR or \f(CW\*(C`extract_codeblock\*(C'\fR found | |
1343 | a valid bracket delimiter, but it was the wrong species. This usually | |
1344 | indicates a nesting error, but may indicate incorrect quoting or escaping. | |
1345 | .ie n .IP """No block delimiter found after quotelike ""%s""""" 4 | |
1346 | .el .IP "\f(CWNo block delimiter found after quotelike ``%s''\fR" 4 | |
1347 | .IX Item "No block delimiter found after quotelike ""%s""" | |
1348 | \&\f(CW\*(C`extract_quotelike\*(C'\fR or \f(CW\*(C`extract_codeblock\*(C'\fR found one of the | |
1349 | quotelike operators \f(CW\*(C`q\*(C'\fR, \f(CW\*(C`qq\*(C'\fR, \f(CW\*(C`qw\*(C'\fR, \f(CW\*(C`qx\*(C'\fR, \f(CW\*(C`s\*(C'\fR, \f(CW\*(C`tr\*(C'\fR or \f(CW\*(C`y\*(C'\fR | |
1350 | without a suitable block after it. | |
1351 | .ie n .IP """Did not find leading dereferencer""" 4 | |
1352 | .el .IP "\f(CWDid not find leading dereferencer\fR" 4 | |
1353 | .IX Item "Did not find leading dereferencer" | |
1354 | \&\f(CW\*(C`extract_variable\*(C'\fR was expecting one of '$', '@', or '%' at the start of | |
1355 | a variable, but didn't find any of them. | |
1356 | .ie n .IP """Bad identifier after dereferencer""" 4 | |
1357 | .el .IP "\f(CWBad identifier after dereferencer\fR" 4 | |
1358 | .IX Item "Bad identifier after dereferencer" | |
1359 | \&\f(CW\*(C`extract_variable\*(C'\fR found a '$', '@', or '%' indicating a variable, but that | |
1360 | character was not followed by a legal Perl identifier. | |
1361 | .ie n .IP """Did not find expected opening bracket at %s""" 4 | |
1362 | .el .IP "\f(CWDid not find expected opening bracket at %s\fR" 4 | |
1363 | .IX Item "Did not find expected opening bracket at %s" | |
1364 | \&\f(CW\*(C`extract_codeblock\*(C'\fR failed to find any of the outermost opening brackets | |
1365 | that were specified. | |
1366 | .ie n .IP """Improperly nested codeblock at %s""" 4 | |
1367 | .el .IP "\f(CWImproperly nested codeblock at %s\fR" 4 | |
1368 | .IX Item "Improperly nested codeblock at %s" | |
1369 | A nested code block was found that started with a delimiter that was specified | |
1370 | as being only to be used as an outermost bracket. | |
1371 | .ie n .IP """Missing second block for quotelike ""%s""""" 4 | |
1372 | .el .IP "\f(CWMissing second block for quotelike ``%s''\fR" 4 | |
1373 | .IX Item "Missing second block for quotelike ""%s""" | |
1374 | \&\f(CW\*(C`extract_codeblock\*(C'\fR or \f(CW\*(C`extract_quotelike\*(C'\fR found one of the | |
1375 | quotelike operators \f(CW\*(C`s\*(C'\fR, \f(CW\*(C`tr\*(C'\fR or \f(CW\*(C`y\*(C'\fR followed by only one block. | |
1376 | .ie n .IP """No match found for opening bracket""" 4 | |
1377 | .el .IP "\f(CWNo match found for opening bracket\fR" 4 | |
1378 | .IX Item "No match found for opening bracket" | |
1379 | \&\f(CW\*(C`extract_codeblock\*(C'\fR failed to find a closing bracket to match the outermost | |
1380 | opening bracket. | |
1381 | .ie n .IP """Did not find opening tag: /%s/""" 4 | |
1382 | .el .IP "\f(CWDid not find opening tag: /%s/\fR" 4 | |
1383 | .IX Item "Did not find opening tag: /%s/" | |
1384 | \&\f(CW\*(C`extract_tagged\*(C'\fR did not find a suitable opening tag (after any specified | |
1385 | prefix was removed). | |
1386 | .ie n .IP """Unable to construct closing tag to match: /%s/""" 4 | |
1387 | .el .IP "\f(CWUnable to construct closing tag to match: /%s/\fR" 4 | |
1388 | .IX Item "Unable to construct closing tag to match: /%s/" | |
1389 | \&\f(CW\*(C`extract_tagged\*(C'\fR matched the specified opening tag and tried to | |
1390 | modify the matched text to produce a matching closing tag (because | |
1391 | none was specified). It failed to generate the closing tag, almost | |
1392 | certainly because the opening tag did not start with a | |
1393 | bracket of some kind. | |
1394 | .ie n .IP """Found invalid nested tag: %s""" 4 | |
1395 | .el .IP "\f(CWFound invalid nested tag: %s\fR" 4 | |
1396 | .IX Item "Found invalid nested tag: %s" | |
1397 | \&\f(CW\*(C`extract_tagged\*(C'\fR found a nested tag that appeared in the \*(L"reject\*(R" list | |
1398 | (and the failure mode was not \*(L"\s-1MAX\s0\*(R" or \*(L"\s-1PARA\s0\*(R"). | |
1399 | .ie n .IP """Found unbalanced nested tag: %s""" 4 | |
1400 | .el .IP "\f(CWFound unbalanced nested tag: %s\fR" 4 | |
1401 | .IX Item "Found unbalanced nested tag: %s" | |
1402 | \&\f(CW\*(C`extract_tagged\*(C'\fR found a nested opening tag that was not matched by a | |
1403 | corresponding nested closing tag (and the failure mode was not \*(L"\s-1MAX\s0\*(R" or \*(L"\s-1PARA\s0\*(R"). | |
1404 | .ie n .IP """Did not find closing tag""" 4 | |
1405 | .el .IP "\f(CWDid not find closing tag\fR" 4 | |
1406 | .IX Item "Did not find closing tag" | |
1407 | \&\f(CW\*(C`extract_tagged\*(C'\fR reached the end of the text without finding a closing tag | |
1408 | to match the original opening tag (and the failure mode was not | |
1409 | \&\*(L"\s-1MAX\s0\*(R" or \*(L"\s-1PARA\s0\*(R"). | |
1410 | .SH "AUTHOR" | |
1411 | .IX Header "AUTHOR" | |
1412 | Damian Conway (damian@conway.org) | |
1413 | .SH "BUGS AND IRRITATIONS" | |
1414 | .IX Header "BUGS AND IRRITATIONS" | |
1415 | There are undoubtedly serious bugs lurking somewhere in this code, if | |
1416 | only because parts of it give the impression of understanding a great deal | |
1417 | more about Perl than they really do. | |
1418 | .PP | |
1419 | Bug reports and other feedback are most welcome. | |
1420 | .SH "COPYRIGHT" | |
1421 | .IX Header "COPYRIGHT" | |
1422 | .Vb 3 | |
1423 | \& Copyright (c) 1997-2001, Damian Conway. All Rights Reserved. | |
1424 | \& This module is free software. It may be used, redistributed | |
1425 | \& and/or modified under the same terms as Perl itself. | |
1426 | .Ve |