Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / site_perl / 5.8.0 / Text / Balanced.pm
CommitLineData
86530b38
AT
1# EXTRACT VARIOUSLY DELIMITED TEXT SEQUENCES FROM STRINGS.
2# FOR FULL DOCUMENTATION SEE Balanced.pod
3
4use 5.005;
5use strict;
6
7package Text::Balanced;
8
9use Exporter;
10use SelfLoader;
11use vars qw { $VERSION @ISA %EXPORT_TAGS };
12
13$VERSION = '1.77';
14@ISA = qw ( Exporter );
15
16%EXPORT_TAGS = ( ALL => [ qw(
17 &extract_delimited
18 &extract_bracketed
19 &extract_quotelike
20 &extract_codeblock
21 &extract_variable
22 &extract_tagged
23 &extract_multiple
24
25 &gen_delimited_pat
26 &gen_extract_tagged
27
28 &delimited_pat
29 ) ] );
30
31Exporter::export_ok_tags('ALL');
32
33# PROTOTYPES
34
35sub _match_bracketed($$$$$$);
36sub _match_variable($$);
37sub _match_codeblock($$$$$$$);
38sub _match_quotelike($$$$);
39
40# HANDLE RETURN VALUES IN VARIOUS CONTEXTS
41
42sub _fail
43{
44 my ($wantarray,$textref) = @_;
45 return ("",$$textref,"") if $wantarray;
46 return undef;
47}
48
49sub _succeed
50{
51 $@ = undef;
52 my ($wantarray,$textref) = splice @_, 0, 2;
53 if ($wantarray)
54 {
55 # print join ("|", @_), "\n";
56 my @res;
57 pos($$textref) = $_[2]; # RESET \G
58 while (my ($from, $len) = splice @_, 0, 2)
59 {
60 push @res, substr($$textref,$from,$len);
61 }
62 return @res;
63 }
64 else
65 {
66 my $match = substr($$textref,$_[0],$_[1]);
67 eval {substr($$textref,$_[4],$_[1]+$_[5])=""} ; #CHOP OUT PREFIX & MATCH, IF POSSIBLE
68 pos($$textref) = $_[4]; # RESET \G
69 return $match;
70 }
71}
72
73# BUILD A PATTERN MATCHING A SIMPLE DELIMITED STRING
74
75sub gen_delimited_pat($;$) # ($delimiters;$escapes)
76{
77 my ($dels, $escs) = @_;
78 return "" unless $dels =~ /\S/;
79 $escs = '\\' unless $escs;
80 $escs .= substr($escs,-1) x (length($dels)-length($escs));
81 my @pat = ();
82 my $i;
83 for ($i=0; $i<length $dels; $i++)
84 {
85 my $del = quotemeta substr($dels,$i,1);
86 my $esc = quotemeta substr($escs,$i,1);
87 if ($del eq $esc)
88 {
89 push @pat, "$del(?:[^$del]*(?:(?:$del$del)[^$del]*)*)$del";
90 }
91 else
92 {
93 push @pat, "$del(?:[^$esc$del]*(?:$esc.[^$esc$del]*)*)$del";
94 }
95 }
96 my $pat = join '|', @pat;
97 return "(?:$pat)";
98}
99
100*delimited_pat = \&gen_delimited_pat;
101
102
103# THE EXTRACTION FUNCTIONS
104
105sub extract_delimited (;$$$$)
106{
107 my $textref = defined $_[0] ? \$_[0] : \$_;
108 my $wantarray = wantarray;
109 my $del = defined $_[1] ? $_[1] : qq{\'\"\`};
110 my $pre = defined $_[2] ? $_[2] : '\s*';
111 my $esc = defined $_[3] ? $_[3] : qq{\\};
112 my $pat = gen_delimited_pat($del, $esc);
113 my $startpos = pos $$textref || 0;
114 return _fail($wantarray, $textref)
115 unless $$textref =~ m/\G($pre)($pat)/gc;
116 my $prelen = length($1);
117 my $matchpos = $startpos+$prelen;
118 my $endpos = pos $$textref;
119 return _succeed $wantarray, $textref,
120 $matchpos, $endpos-$matchpos, # MATCH
121 $endpos, length($$textref)-$endpos, # REMAINDER
122 $startpos, $prelen; # PREFIX
123}
124
125sub extract_bracketed (;$$$)
126{
127 my $textref = defined $_[0] ? \$_[0] : \$_;
128 my $ldel = defined $_[1] ? $_[1] : '{([<';
129 my $pre = defined $_[2] ? $_[2] : '\s*';
130 my $wantarray = wantarray;
131 my $qdel = "";
132 my $quotelike;
133 $ldel =~ s/'//g and $qdel .= q{'};
134 $ldel =~ s/"//g and $qdel .= q{"};
135 $ldel =~ s/`//g and $qdel .= q{`};
136 $ldel =~ s/q//g and $quotelike = 1;
137 $ldel =~ tr/[](){}<>\0-\377/[[(({{<</ds;
138 my $rdel = $ldel;
139 unless ($rdel =~ tr/[({</])}>/)
140 {
141 $@ = "Did not find a suitable bracket in delimiter: \"$_[1]\"";
142 return _fail $wantarray, $textref;
143 }
144 my $posbug = pos;
145 $ldel = join('|', map { quotemeta $_ } split('', $ldel));
146 $rdel = join('|', map { quotemeta $_ } split('', $rdel));
147 pos = $posbug;
148
149 my $startpos = pos $$textref || 0;
150 my @match = _match_bracketed($textref,$pre, $ldel, $qdel, $quotelike, $rdel);
151
152 return _fail ($wantarray, $textref) unless @match;
153
154 return _succeed ( $wantarray, $textref,
155 $match[2], $match[5]+2, # MATCH
156 @match[8,9], # REMAINDER
157 @match[0,1], # PREFIX
158 );
159}
160
161sub _match_bracketed($$$$$$) # $textref, $pre, $ldel, $qdel, $quotelike, $rdel
162{
163 my ($textref, $pre, $ldel, $qdel, $quotelike, $rdel) = @_;
164 my ($startpos, $ldelpos, $endpos) = (pos $$textref = pos $$textref||0);
165 unless ($$textref =~ m/\G$pre/gc)
166 {
167 $@ = "Did not find prefix: /$pre/";
168 return;
169 }
170
171 $ldelpos = pos $$textref;
172
173 unless ($$textref =~ m/\G($ldel)/gc)
174 {
175 $@ = "Did not find opening bracket after prefix: \"$pre\"";
176 pos $$textref = $startpos;
177 return;
178 }
179
180 my @nesting = ( $1 );
181 my $textlen = length $$textref;
182 while (pos $$textref < $textlen)
183 {
184 next if $$textref =~ m/\G\\./gcs;
185
186 if ($$textref =~ m/\G($ldel)/gc)
187 {
188 push @nesting, $1;
189 }
190 elsif ($$textref =~ m/\G($rdel)/gc)
191 {
192 my ($found, $brackettype) = ($1, $1);
193 if ($#nesting < 0)
194 {
195 $@ = "Unmatched closing bracket: \"$found\"";
196 pos $$textref = $startpos;
197 return;
198 }
199 my $expected = pop(@nesting);
200 $expected =~ tr/({[</)}]>/;
201 if ($expected ne $brackettype)
202 {
203 $@ = qq{Mismatched closing bracket: expected "$expected" but found "$found"};
204 pos $$textref = $startpos;
205 return;
206 }
207 last if $#nesting < 0;
208 }
209 elsif ($qdel && $$textref =~ m/\G([$qdel])/gc)
210 {
211 $$textref =~ m/\G[^\\$1]*(?:\\.[^\\$1]*)*(\Q$1\E)/gc and next;
212 $@ = "Unmatched embedded quote ($1)";
213 pos $$textref = $startpos;
214 return;
215 }
216 elsif ($quotelike && _match_quotelike($textref,"",1,0))
217 {
218 next;
219 }
220
221 else { $$textref =~ m/\G(?:[a-zA-Z0-9]+|.)/gcs }
222 }
223 if ($#nesting>=0)
224 {
225 $@ = "Unmatched opening bracket(s): "
226 . join("..",@nesting)."..";
227 pos $$textref = $startpos;
228 return;
229 }
230
231 $endpos = pos $$textref;
232
233 return (
234 $startpos, $ldelpos-$startpos, # PREFIX
235 $ldelpos, 1, # OPENING BRACKET
236 $ldelpos+1, $endpos-$ldelpos-2, # CONTENTS
237 $endpos-1, 1, # CLOSING BRACKET
238 $endpos, length($$textref)-$endpos, # REMAINDER
239 );
240}
241
242sub revbracket($)
243{
244 my $brack = reverse $_[0];
245 $brack =~ tr/[({</])}>/;
246 return $brack;
247}
248
249my $XMLNAME = q{[a-zA-Z_:][a-zA-Z0-9_:.-]*};
250
251sub extract_tagged (;$$$$$) # ($text, $opentag, $closetag, $pre, \%options)
252{
253 my $textref = defined $_[0] ? \$_[0] : \$_;
254 my $ldel = $_[1];
255 my $rdel = $_[2];
256 my $pre = defined $_[3] ? $_[3] : '\s*';
257 my %options = defined $_[4] ? %{$_[4]} : ();
258 my $omode = defined $options{fail} ? $options{fail} : '';
259 my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}})
260 : defined($options{reject}) ? $options{reject}
261 : ''
262 ;
263 my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}})
264 : defined($options{ignore}) ? $options{ignore}
265 : ''
266 ;
267
268 if (!defined $ldel) { $ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[^>])*>'; }
269 $@ = undef;
270
271 my @match = _match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore);
272
273 return _fail(wantarray, $textref) unless @match;
274 return _succeed wantarray, $textref,
275 $match[2], $match[3]+$match[5]+$match[7], # MATCH
276 @match[8..9,0..1,2..7]; # REM, PRE, BITS
277}
278
279sub _match_tagged # ($$$$$$$)
280{
281 my ($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore) = @_;
282 my $rdelspec;
283
284 my ($startpos, $opentagpos, $textpos, $parapos, $closetagpos, $endpos) = ( pos($$textref) = pos($$textref)||0 );
285
286 unless ($$textref =~ m/\G($pre)/gc)
287 {
288 $@ = "Did not find prefix: /$pre/";
289 goto failed;
290 }
291
292 $opentagpos = pos($$textref);
293
294 unless ($$textref =~ m/\G$ldel/gc)
295 {
296 $@ = "Did not find opening tag: /$ldel/";
297 goto failed;
298 }
299
300 $textpos = pos($$textref);
301
302 if (!defined $rdel)
303 {
304 $rdelspec = $&;
305 unless ($rdelspec =~ s/\A([[(<{]+)($XMLNAME).*/ "$1\/$2". revbracket($1) /oes)
306 {
307 $@ = "Unable to construct closing tag to match: $rdel";
308 goto failed;
309 }
310 }
311 else
312 {
313 $rdelspec = eval "qq{$rdel}";
314 }
315
316 while (pos($$textref) < length($$textref))
317 {
318 next if $$textref =~ m/\G\\./gc;
319
320 if ($$textref =~ m/\G(\n[ \t]*\n)/gc )
321 {
322 $parapos = pos($$textref) - length($1)
323 unless defined $parapos;
324 }
325 elsif ($$textref =~ m/\G($rdelspec)/gc )
326 {
327 $closetagpos = pos($$textref)-length($1);
328 goto matched;
329 }
330 elsif ($ignore && $$textref =~ m/\G(?:$ignore)/gc)
331 {
332 next;
333 }
334 elsif ($bad && $$textref =~ m/\G($bad)/gcs)
335 {
336 pos($$textref) -= length($1); # CUT OFF WHATEVER CAUSED THE SHORTNESS
337 goto short if ($omode eq 'PARA' || $omode eq 'MAX');
338 $@ = "Found invalid nested tag: $1";
339 goto failed;
340 }
341 elsif ($$textref =~ m/\G($ldel)/gc)
342 {
343 my $tag = $1;
344 pos($$textref) -= length($tag); # REWIND TO NESTED TAG
345 unless (_match_tagged(@_)) # MATCH NESTED TAG
346 {
347 goto short if $omode eq 'PARA' || $omode eq 'MAX';
348 $@ = "Found unbalanced nested tag: $tag";
349 goto failed;
350 }
351 }
352 else { $$textref =~ m/./gcs }
353 }
354
355short:
356 $closetagpos = pos($$textref);
357 goto matched if $omode eq 'MAX';
358 goto failed unless $omode eq 'PARA';
359
360 if (defined $parapos) { pos($$textref) = $parapos }
361 else { $parapos = pos($$textref) }
362
363 return (
364 $startpos, $opentagpos-$startpos, # PREFIX
365 $opentagpos, $textpos-$opentagpos, # OPENING TAG
366 $textpos, $parapos-$textpos, # TEXT
367 $parapos, 0, # NO CLOSING TAG
368 $parapos, length($$textref)-$parapos, # REMAINDER
369 );
370
371matched:
372 $endpos = pos($$textref);
373 return (
374 $startpos, $opentagpos-$startpos, # PREFIX
375 $opentagpos, $textpos-$opentagpos, # OPENING TAG
376 $textpos, $closetagpos-$textpos, # TEXT
377 $closetagpos, $endpos-$closetagpos, # CLOSING TAG
378 $endpos, length($$textref)-$endpos, # REMAINDER
379 );
380
381failed:
382 $@ = "Did not find closing tag" unless $@;
383 pos($$textref) = $startpos;
384 return;
385}
386
387sub extract_variable (;$$)
388{
389 my $textref = defined $_[0] ? \$_[0] : \$_;
390 return ("","","") unless defined $$textref;
391 my $pre = defined $_[1] ? $_[1] : '\s*';
392
393 my @match = _match_variable($textref,$pre);
394
395 return _fail wantarray, $textref unless @match;
396
397 return _succeed wantarray, $textref,
398 @match[2..3,4..5,0..1]; # MATCH, REMAINDER, PREFIX
399}
400
401sub _match_variable($$)
402{
403 my ($textref, $pre) = @_;
404 my $startpos = pos($$textref) = pos($$textref)||0;
405 unless ($$textref =~ m/\G($pre)/gc)
406 {
407 $@ = "Did not find prefix: /$pre/";
408 return;
409 }
410 my $varpos = pos($$textref);
411 unless ($$textref =~ m/\G(\$#?|[*\@\%]|\\&)+/gc)
412 {
413 $@ = "Did not find leading dereferencer";
414 pos $$textref = $startpos;
415 return;
416 }
417
418 unless ($$textref =~ m/\G\s*(?:::)?(?:[_a-z]\w*::)*[_a-z]\w*/gci
419 or _match_codeblock($textref, "", '\{', '\}', '\{', '\}', 0))
420 {
421 $@ = "Bad identifier after dereferencer";
422 pos $$textref = $startpos;
423 return;
424 }
425
426 while (1)
427 {
428 next if _match_codeblock($textref,
429 qr/\s*->\s*(?:[a-zA-Z]\w+\s*)?/,
430 qr/[({[]/, qr/[)}\]]/,
431 qr/[({[]/, qr/[)}\]]/, 0);
432 next if _match_codeblock($textref,
433 qr/\s*/, qr/[{[]/, qr/[}\]]/,
434 qr/[{[]/, qr/[}\]]/, 0);
435 next if _match_variable($textref,'\s*->\s*');
436 next if $$textref =~ m/\G\s*->\s*\w+(?![{([])/gc;
437 last;
438 }
439
440 my $endpos = pos($$textref);
441 return ($startpos, $varpos-$startpos,
442 $varpos, $endpos-$varpos,
443 $endpos, length($$textref)-$endpos
444 );
445}
446
447sub extract_codeblock (;$$$$$)
448{
449 my $textref = defined $_[0] ? \$_[0] : \$_;
450 my $wantarray = wantarray;
451 my $ldel_inner = defined $_[1] ? $_[1] : '{';
452 my $pre = defined $_[2] ? $_[2] : '\s*';
453 my $ldel_outer = defined $_[3] ? $_[3] : $ldel_inner;
454 my $rd = $_[4];
455 my $rdel_inner = $ldel_inner;
456 my $rdel_outer = $ldel_outer;
457 my $posbug = pos;
458 for ($ldel_inner, $ldel_outer) { tr/[]()<>{}\0-\377/[[((<<{{/ds }
459 for ($rdel_inner, $rdel_outer) { tr/[]()<>{}\0-\377/]]))>>}}/ds }
460 for ($ldel_inner, $ldel_outer, $rdel_inner, $rdel_outer)
461 {
462 $_ = '('.join('|',map { quotemeta $_ } split('',$_)).')'
463 }
464 pos = $posbug;
465
466 my @match = _match_codeblock($textref, $pre,
467 $ldel_outer, $rdel_outer,
468 $ldel_inner, $rdel_inner,
469 $rd);
470 return _fail($wantarray, $textref) unless @match;
471 return _succeed($wantarray, $textref,
472 @match[2..3,4..5,0..1] # MATCH, REMAINDER, PREFIX
473 );
474
475}
476
477sub _match_codeblock($$$$$$$)
478{
479 my ($textref, $pre, $ldel_outer, $rdel_outer, $ldel_inner, $rdel_inner, $rd) = @_;
480 my $startpos = pos($$textref) = pos($$textref) || 0;
481 unless ($$textref =~ m/\G($pre)/gc)
482 {
483 $@ = qq{Did not match prefix /$pre/ at"} .
484 substr($$textref,pos($$textref),20) .
485 q{..."};
486 return;
487 }
488 my $codepos = pos($$textref);
489 unless ($$textref =~ m/\G($ldel_outer)/gc) # OUTERMOST DELIMITER
490 {
491 $@ = qq{Did not find expected opening bracket at "} .
492 substr($$textref,pos($$textref),20) .
493 q{..."};
494 pos $$textref = $startpos;
495 return;
496 }
497 my $closing = $1;
498 $closing =~ tr/([<{/)]>}/;
499 my $matched;
500 my $patvalid = 1;
501 while (pos($$textref) < length($$textref))
502 {
503 $matched = '';
504 if ($rd && $$textref =~ m#\G(\Q(?)\E|\Q(s?)\E|\Q(s)\E)#gc)
505 {
506 $patvalid = 0;
507 next;
508 }
509
510 if ($$textref =~ m/\G\s*#.*/gc)
511 {
512 next;
513 }
514
515 if ($$textref =~ m/\G\s*($rdel_outer)/gc)
516 {
517 unless ($matched = ($closing && $1 eq $closing) )
518 {
519 next if $1 eq '>'; # MIGHT BE A "LESS THAN"
520 $@ = q{Mismatched closing bracket at "} .
521 substr($$textref,pos($$textref),20) .
522 qq{...". Expected '$closing'};
523 }
524 last;
525 }
526
527 if (_match_variable($textref,'\s*') ||
528 _match_quotelike($textref,'\s*',$patvalid,$patvalid) )
529 {
530 $patvalid = 0;
531 next;
532 }
533
534
535 # NEED TO COVER MANY MORE CASES HERE!!!
536 if ($$textref =~ m#\G\s*( [-+*x/%^&|.]=?
537 | =(?!>)
538 | (\*\*|&&|\|\||<<|>>)=?
539 | [!=][~=]
540 | split|grep|map|return
541 )#gcx)
542 {
543 $patvalid = 1;
544 next;
545 }
546
547 if ( _match_codeblock($textref, '\s*', $ldel_inner, $rdel_inner, $ldel_inner, $rdel_inner, $rd) )
548 {
549 $patvalid = 1;
550 next;
551 }
552
553 if ($$textref =~ m/\G\s*$ldel_outer/gc)
554 {
555 $@ = q{Improperly nested codeblock at "} .
556 substr($$textref,pos($$textref),20) .
557 q{..."};
558 last;
559 }
560
561 $patvalid = 0;
562 $$textref =~ m/\G\s*(\w+|[-=>]>|.|\Z)/gc;
563 }
564
565 unless ($matched)
566 {
567 $@ = 'No match found for opening bracket' unless $@;
568 return;
569 }
570
571 my $endpos = pos($$textref);
572 return ( $startpos, $codepos-$startpos,
573 $codepos, $endpos-$codepos,
574 $endpos, length($$textref)-$endpos,
575 );
576}
577
578
579my %mods = (
580 'none' => '[cgimsox]*',
581 'm' => '[cgimsox]*',
582 's' => '[cegimsox]*',
583 'tr' => '[cds]*',
584 'y' => '[cds]*',
585 'qq' => '',
586 'qx' => '',
587 'qw' => '',
588 'qr' => '[imsx]*',
589 'q' => '',
590 );
591
592sub extract_quotelike (;$$)
593{
594 my $textref = $_[0] ? \$_[0] : \$_;
595 my $wantarray = wantarray;
596 my $pre = defined $_[1] ? $_[1] : '\s*';
597
598 my @match = _match_quotelike($textref,$pre,1,0);
599 return _fail($wantarray, $textref) unless @match;
600 return _succeed($wantarray, $textref,
601 $match[2], $match[18]-$match[2], # MATCH
602 @match[18,19], # REMAINDER
603 @match[0,1], # PREFIX
604 @match[2..17], # THE BITS
605 );
606};
607
608sub _match_quotelike($$$$) # ($textref, $prepat, $allow_raw_match)
609{
610 my ($textref, $pre, $rawmatch, $qmark) = @_;
611
612 my ($textlen,$startpos,
613 $oppos,
614 $preld1pos,$ld1pos,$str1pos,$rd1pos,
615 $preld2pos,$ld2pos,$str2pos,$rd2pos,
616 $modpos) = ( length($$textref), pos($$textref) = pos($$textref) || 0 );
617
618 unless ($$textref =~ m/\G($pre)/gc)
619 {
620 $@ = qq{Did not find prefix /$pre/ at "} .
621 substr($$textref, pos($$textref), 20) .
622 q{..."};
623 return;
624 }
625 $oppos = pos($$textref);
626
627 my $initial = substr($$textref,$oppos,1);
628
629 if ($initial && $initial =~ m|^[\"\'\`]|
630 || $rawmatch && $initial =~ m|^/|
631 || $qmark && $initial =~ m|^\?|)
632 {
633 unless ($$textref =~ m/ \Q$initial\E [^\\$initial]* (\\.[^\\$initial]*)* \Q$initial\E /gcx)
634 {
635 $@ = qq{Did not find closing delimiter to match '$initial' at "} .
636 substr($$textref, $oppos, 20) .
637 q{..."};
638 pos $$textref = $startpos;
639 return;
640 }
641 $modpos= pos($$textref);
642 $rd1pos = $modpos-1;
643
644 if ($initial eq '/' || $initial eq '?')
645 {
646 $$textref =~ m/\G$mods{none}/gc
647 }
648
649 my $endpos = pos($$textref);
650 return (
651 $startpos, $oppos-$startpos, # PREFIX
652 $oppos, 0, # NO OPERATOR
653 $oppos, 1, # LEFT DEL
654 $oppos+1, $rd1pos-$oppos-1, # STR/PAT
655 $rd1pos, 1, # RIGHT DEL
656 $modpos, 0, # NO 2ND LDEL
657 $modpos, 0, # NO 2ND STR
658 $modpos, 0, # NO 2ND RDEL
659 $modpos, $endpos-$modpos, # MODIFIERS
660 $endpos, $textlen-$endpos, # REMAINDER
661 );
662 }
663
664 unless ($$textref =~ m!\G(m|s|qq|qx|qw|q|qr|tr|y)\b(?=\s*\S)!gc)
665 {
666 $@ = q{No quotelike operator found after prefix at "} .
667 substr($$textref, pos($$textref), 20) .
668 q{..."};
669 pos $$textref = $startpos;
670 return;
671 }
672
673 my $op = $1;
674 $preld1pos = pos($$textref);
675 $$textref =~ m/\G\s*/gc;
676 $ld1pos = pos($$textref);
677 $str1pos = $ld1pos+1;
678
679 unless ($$textref =~ m/\G(\S)/gc) # SHOULD USE LOOKAHEAD
680 {
681 $@ = "No block delimiter found after quotelike $op";
682 pos $$textref = $startpos;
683 return;
684 }
685 pos($$textref) = $ld1pos; # HAVE TO DO THIS BECAUSE LOOKAHEAD BROKEN
686 my ($ldel1, $rdel1) = ("\Q$1","\Q$1");
687 if ($ldel1 =~ /[[(<{]/)
688 {
689 $rdel1 =~ tr/[({</])}>/;
690 _match_bracketed($textref,"",$ldel1,"","",$rdel1)
691 || do { pos $$textref = $startpos; return };
692 }
693 else
694 {
695 $$textref =~ /$ldel1[^\\$ldel1]*(\\.[^\\$ldel1]*)*$ldel1/gc
696 || do { pos $$textref = $startpos; return };
697 }
698 $ld2pos = $rd1pos = pos($$textref)-1;
699
700 my $second_arg = $op =~ /s|tr|y/ ? 1 : 0;
701 if ($second_arg)
702 {
703 my ($ldel2, $rdel2);
704 if ($ldel1 =~ /[[(<{]/)
705 {
706 unless ($$textref =~ /\G\s*(\S)/gc) # SHOULD USE LOOKAHEAD
707 {
708 $@ = "Missing second block for quotelike $op";
709 pos $$textref = $startpos;
710 return;
711 }
712 $ldel2 = $rdel2 = "\Q$1";
713 $rdel2 =~ tr/[({</])}>/;
714 }
715 else
716 {
717 $ldel2 = $rdel2 = $ldel1;
718 }
719 $str2pos = $ld2pos+1;
720
721 if ($ldel2 =~ /[[(<{]/)
722 {
723 pos($$textref)--; # OVERCOME BROKEN LOOKAHEAD
724 _match_bracketed($textref,"",$ldel2,"","",$rdel2)
725 || do { pos $$textref = $startpos; return };
726 }
727 else
728 {
729 $$textref =~ /[^\\$ldel2]*(\\.[^\\$ldel2]*)*$ldel2/gc
730 || do { pos $$textref = $startpos; return };
731 }
732 $rd2pos = pos($$textref)-1;
733 }
734 else
735 {
736 $ld2pos = $str2pos = $rd2pos = $rd1pos;
737 }
738
739 $modpos = pos $$textref;
740
741 $$textref =~ m/\G($mods{$op})/gc;
742 my $endpos = pos $$textref;
743
744 return (
745 $startpos, $startpos-$oppos, # PREFIX
746 $oppos, length($op), # OPERATOR
747 $ld1pos, 1, # LEFT DEL
748 $str1pos, $rd1pos-$str1pos, # STR/PAT
749 $rd1pos, 1, # RIGHT DEL
750 $ld2pos, $second_arg, # 2ND LDEL (MAYBE)
751 $str2pos, $rd2pos-$str2pos, # 2ND STR (MAYBE)
752 $rd2pos, $second_arg, # 2ND RDEL (MAYBE)
753 $modpos, $endpos-$modpos, # MODIFIERS
754 $endpos, $textlen-$endpos, # REMAINDER
755 );
756}
757
758my $def_func =
759[
760 sub { extract_variable($_[0], '') },
761 sub { extract_quotelike($_[0],'') },
762 sub { extract_codeblock($_[0],'{}','') },
763];
764
765sub extract_multiple (;$$$$) # ($text, $functions_ref, $max_fields, $ignoreunknown)
766{
767 my $textref = defined($_[0]) ? \$_[0] : \$_;
768 my $posbug = pos;
769 my ($lastpos, $firstpos);
770 my @fields = ();
771
772 for ($$textref)
773 {
774 my @func = defined $_[1] ? @{$_[1]} : @{$def_func};
775 my $max = defined $_[2] && $_[2]>0 ? $_[2] : 1_000_000_000;
776 my $igunk = $_[3];
777
778 pos ||= 0;
779
780 unless (wantarray)
781 {
782 use Carp;
783 carp "extract_multiple reset maximal count to 1 in scalar context"
784 if $^W && defined($_[2]) && $max > 1;
785 $max = 1
786 }
787
788 my $unkpos;
789 my $func;
790 my $class;
791
792 my @class;
793 foreach $func ( @func )
794 {
795 if (ref($func) eq 'HASH')
796 {
797 push @class, (keys %$func)[0];
798 $func = (values %$func)[0];
799 }
800 else
801 {
802 push @class, undef;
803 }
804 }
805
806 FIELD: while (pos() < length())
807 {
808 my $field;
809 foreach my $i ( 0..$#func )
810 {
811 $func = $func[$i];
812 $class = $class[$i];
813 $lastpos = pos;
814 if (ref($func) eq 'CODE')
815 { ($field) = $func->($_) }
816 elsif( m/\G$func/gc )
817 { $field = defined($1) ? $1 : $& }
818
819 if (defined($field) && length($field))
820 {
821 if (defined($unkpos) && !$igunk)
822 {
823 push @fields, substr($_, $unkpos, $lastpos-$unkpos);
824 $firstpos = $unkpos unless defined $firstpos;
825 undef $unkpos;
826 last FIELD if @fields == $max;
827 }
828 push @fields, $class
829 ? bless(\$field, $class)
830 : $field;
831 $firstpos = $lastpos unless defined $firstpos;
832 $lastpos = pos;
833 last FIELD if @fields == $max;
834 next FIELD;
835 }
836 }
837 if (/\G(.)/gcs)
838 {
839 $unkpos = pos()-1
840 unless $igunk || defined $unkpos;
841 }
842 }
843
844 if (defined $unkpos)
845 {
846 push @fields, substr($_, $unkpos);
847 $firstpos = $unkpos unless defined $firstpos;
848 $lastpos = length;
849 }
850 last;
851 }
852
853 pos $$textref = $lastpos;
854 return @fields if wantarray;
855
856 $firstpos ||= 0;
857 eval { substr($$textref,$firstpos,$lastpos-$firstpos)="";
858 pos $$textref = $firstpos };
859 return $fields[0];
860}
861
862
863sub gen_extract_tagged # ($opentag, $closetag, $pre, \%options)
864{
865 my $ldel = $_[0];
866 my $rdel = $_[1];
867 my $pre = defined $_[2] ? $_[2] : '\s*';
868 my %options = defined $_[3] ? %{$_[3]} : ();
869 my $omode = defined $options{fail} ? $options{fail} : '';
870 my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}})
871 : defined($options{reject}) ? $options{reject}
872 : ''
873 ;
874 my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}})
875 : defined($options{ignore}) ? $options{ignore}
876 : ''
877 ;
878
879 if (!defined $ldel) { $ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[^>])*>'; }
880
881 my $posbug = pos;
882 for ($ldel, $pre, $bad, $ignore) { $_ = qr/$_/ if $_ }
883 pos = $posbug;
884
885 my $closure = sub
886 {
887 my $textref = defined $_[0] ? \$_[0] : \$_;
888 my @match = Text::Balanced::_match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore);
889
890 return _fail(wantarray, $textref) unless @match;
891 return _succeed wantarray, $textref,
892 $match[2], $match[3]+$match[5]+$match[7], # MATCH
893 @match[8..9,0..1,2..7]; # REM, PRE, BITS
894 };
895
896 bless $closure, 'Text::Balanced::Extractor';
897}
898
899package Text::Balanced::Extractor;
900
901sub extract($$) # ($self, $text)
902{
903 &{$_[0]}($_[1]);
904}
905
9061;