Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / 5.8.0 / Pod / ParseUtils.pm
CommitLineData
86530b38
AT
1#############################################################################
2# Pod/ParseUtils.pm -- helpers for POD parsing and conversion
3#
4# Copyright (C) 1999-2000 by Marek Rouchal. All rights reserved.
5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::ParseUtils;
11
12use vars qw($VERSION);
13$VERSION = 0.22; ## Current version of this package
14require 5.005; ## requires this Perl version or later
15
16=head1 NAME
17
18Pod::ParseUtils - helpers for POD parsing and conversion
19
20=head1 SYNOPSIS
21
22 use Pod::ParseUtils;
23
24 my $list = new Pod::List;
25 my $link = Pod::Hyperlink->new('Pod::Parser');
26
27=head1 DESCRIPTION
28
29B<Pod::ParseUtils> contains a few object-oriented helper packages for
30POD parsing and processing (i.e. in POD formatters and translators).
31
32=cut
33
34#-----------------------------------------------------------------------------
35# Pod::List
36#
37# class to hold POD list info (=over, =item, =back)
38#-----------------------------------------------------------------------------
39
40package Pod::List;
41
42use Carp;
43
44=head2 Pod::List
45
46B<Pod::List> can be used to hold information about POD lists
47(written as =over ... =item ... =back) for further processing.
48The following methods are available:
49
50=over 4
51
52=item Pod::List-E<gt>new()
53
54Create a new list object. Properties may be specified through a hash
55reference like this:
56
57 my $list = Pod::List->new({ -start => $., -indent => 4 });
58
59See the individual methods/properties for details.
60
61=cut
62
63sub new {
64 my $this = shift;
65 my $class = ref($this) || $this;
66 my %params = @_;
67 my $self = {%params};
68 bless $self, $class;
69 $self->initialize();
70 return $self;
71}
72
73sub initialize {
74 my $self = shift;
75 $self->{-file} ||= 'unknown';
76 $self->{-start} ||= 'unknown';
77 $self->{-indent} ||= 4; # perlpod: "should be the default"
78 $self->{_items} = [];
79 $self->{-type} ||= '';
80}
81
82=item $list-E<gt>file()
83
84Without argument, retrieves the file name the list is in. This must
85have been set before by either specifying B<-file> in the B<new()>
86method or by calling the B<file()> method with a scalar argument.
87
88=cut
89
90# The POD file name the list appears in
91sub file {
92 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
93}
94
95=item $list-E<gt>start()
96
97Without argument, retrieves the line number where the list started.
98This must have been set before by either specifying B<-start> in the
99B<new()> method or by calling the B<start()> method with a scalar
100argument.
101
102=cut
103
104# The line in the file the node appears
105sub start {
106 return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start};
107}
108
109=item $list-E<gt>indent()
110
111Without argument, retrieves the indent level of the list as specified
112in C<=over n>. This must have been set before by either specifying
113B<-indent> in the B<new()> method or by calling the B<indent()> method
114with a scalar argument.
115
116=cut
117
118# indent level
119sub indent {
120 return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent};
121}
122
123=item $list-E<gt>type()
124
125Without argument, retrieves the list type, which can be an arbitrary value,
126e.g. C<OL>, C<UL>, ... when thinking the HTML way.
127This must have been set before by either specifying
128B<-type> in the B<new()> method or by calling the B<type()> method
129with a scalar argument.
130
131=cut
132
133# The type of the list (UL, OL, ...)
134sub type {
135 return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
136}
137
138=item $list-E<gt>rx()
139
140Without argument, retrieves a regular expression for simplifying the
141individual item strings once the list type has been determined. Usage:
142E.g. when converting to HTML, one might strip the leading number in
143an ordered list as C<E<lt>OLE<gt>> already prints numbers itself.
144This must have been set before by either specifying
145B<-rx> in the B<new()> method or by calling the B<rx()> method
146with a scalar argument.
147
148=cut
149
150# The regular expression to simplify the items
151sub rx {
152 return (@_ > 1) ? ($_[0]->{-rx} = $_[1]) : $_[0]->{-rx};
153}
154
155=item $list-E<gt>item()
156
157Without argument, retrieves the array of the items in this list.
158The items may be represented by any scalar.
159If an argument has been given, it is pushed on the list of items.
160
161=cut
162
163# The individual =items of this list
164sub item {
165 my ($self,$item) = @_;
166 if(defined $item) {
167 push(@{$self->{_items}}, $item);
168 return $item;
169 }
170 else {
171 return @{$self->{_items}};
172 }
173}
174
175=item $list-E<gt>parent()
176
177Without argument, retrieves information about the parent holding this
178list, which is represented as an arbitrary scalar.
179This must have been set before by either specifying
180B<-parent> in the B<new()> method or by calling the B<parent()> method
181with a scalar argument.
182
183=cut
184
185# possibility for parsers/translators to store information about the
186# lists's parent object
187sub parent {
188 return (@_ > 1) ? ($_[0]->{-parent} = $_[1]) : $_[0]->{-parent};
189}
190
191=item $list-E<gt>tag()
192
193Without argument, retrieves information about the list tag, which can be
194any scalar.
195This must have been set before by either specifying
196B<-tag> in the B<new()> method or by calling the B<tag()> method
197with a scalar argument.
198
199=back
200
201=cut
202
203# possibility for parsers/translators to store information about the
204# list's object
205sub tag {
206 return (@_ > 1) ? ($_[0]->{-tag} = $_[1]) : $_[0]->{-tag};
207}
208
209#-----------------------------------------------------------------------------
210# Pod::Hyperlink
211#
212# class to manipulate POD hyperlinks (L<>)
213#-----------------------------------------------------------------------------
214
215package Pod::Hyperlink;
216
217=head2 Pod::Hyperlink
218
219B<Pod::Hyperlink> is a class for manipulation of POD hyperlinks. Usage:
220
221 my $link = Pod::Hyperlink->new('alternative text|page/"section in page"');
222
223The B<Pod::Hyperlink> class is mainly designed to parse the contents of the
224C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the
225different parts of a POD hyperlink for further processing. It can also be
226used to construct hyperlinks.
227
228=over 4
229
230=item Pod::Hyperlink-E<gt>new()
231
232The B<new()> method can either be passed a set of key/value pairs or a single
233scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object
234of the class C<Pod::Hyperlink> is returned. The value C<undef> indicates a
235failure, the error message is stored in C<$@>.
236
237=cut
238
239use Carp;
240
241sub new {
242 my $this = shift;
243 my $class = ref($this) || $this;
244 my $self = +{};
245 bless $self, $class;
246 $self->initialize();
247 if(defined $_[0]) {
248 if(ref($_[0])) {
249 # called with a list of parameters
250 %$self = %{$_[0]};
251 $self->_construct_text();
252 }
253 else {
254 # called with L<> contents
255 return undef unless($self->parse($_[0]));
256 }
257 }
258 return $self;
259}
260
261sub initialize {
262 my $self = shift;
263 $self->{-line} ||= 'undef';
264 $self->{-file} ||= 'undef';
265 $self->{-page} ||= '';
266 $self->{-node} ||= '';
267 $self->{-alttext} ||= '';
268 $self->{-type} ||= 'undef';
269 $self->{_warnings} = [];
270}
271
272=item $link-E<gt>parse($string)
273
274This method can be used to (re)parse a (new) hyperlink, i.e. the contents
275of a C<LE<lt>...E<gt>> sequence. The result is stored in the current object.
276Warnings are stored in the B<warnings> property.
277E.g. sections like C<LE<lt>open(2)E<gt>> are deprecated, as they do not point
278to Perl documents. C<LE<lt>DBI::foo(3p)E<gt>> is wrong as well, the manpage
279section can simply be dropped.
280
281=cut
282
283sub parse {
284 my $self = shift;
285 local($_) = $_[0];
286 # syntax check the link and extract destination
287 my ($alttext,$page,$node,$type) = (undef,'','','');
288
289 $self->{_warnings} = [];
290
291 # collapse newlines with whitespace
292 s/\s*\n+\s*/ /g;
293
294 # strip leading/trailing whitespace
295 if(s/^[\s\n]+//) {
296 $self->warning("ignoring leading whitespace in link");
297 }
298 if(s/[\s\n]+$//) {
299 $self->warning("ignoring trailing whitespace in link");
300 }
301 unless(length($_)) {
302 _invalid_link("empty link");
303 return undef;
304 }
305
306 ## Check for different possibilities. This is tedious and error-prone
307 # we match all possibilities (alttext, page, section/item)
308 #warn "DEBUG: link=$_\n";
309
310 # only page
311 # problem: a lot of people use (), or (1) or the like to indicate
312 # man page sections. But this collides with L<func()> that is supposed
313 # to point to an internal funtion...
314 my $page_rx = '[\w.]+(?:::[\w.]+)*(?:[(](?:\d\w*|)[)]|)';
315 # page name only
316 if(m!^($page_rx)$!o) {
317 $page = $1;
318 $type = 'page';
319 }
320 # alttext, page and "section"
321 elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$!o) {
322 ($alttext, $page, $node) = ($1, $2, $3);
323 $type = 'section';
324 }
325 # alttext and page
326 elsif(m!^(.*?)\s*[|]\s*($page_rx)$!o) {
327 ($alttext, $page) = ($1, $2);
328 $type = 'page';
329 }
330 # alttext and "section"
331 elsif(m!^(.*?)\s*[|]\s*(?:/\s*|)"(.+)"$!) {
332 ($alttext, $node) = ($1,$2);
333 $type = 'section';
334 }
335 # page and "section"
336 elsif(m!^($page_rx)\s*/\s*"(.+)"$!o) {
337 ($page, $node) = ($1, $2);
338 $type = 'section';
339 }
340 # page and item
341 elsif(m!^($page_rx)\s*/\s*(.+)$!o) {
342 ($page, $node) = ($1, $2);
343 $type = 'item';
344 }
345 # only "section"
346 elsif(m!^/?"(.+)"$!) {
347 $node = $1;
348 $type = 'section';
349 }
350 # only item
351 elsif(m!^\s*/(.+)$!) {
352 $node = $1;
353 $type = 'item';
354 }
355 # non-standard: Hyperlink
356 elsif(m!^((?:http|ftp|mailto|news):.+)$!i) {
357 $node = $1;
358 $type = 'hyperlink';
359 }
360 # alttext, page and item
361 elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*(.+)$!o) {
362 ($alttext, $page, $node) = ($1, $2, $3);
363 $type = 'item';
364 }
365 # alttext and item
366 elsif(m!^(.*?)\s*[|]\s*/(.+)$!) {
367 ($alttext, $node) = ($1,$2);
368 }
369 # nonstandard: alttext and hyperlink
370 elsif(m!^(.*?)\s*[|]\s*((?:http|ftp|mailto|news):.+)$!) {
371 ($alttext, $node) = ($1,$2);
372 $type = 'hyperlink';
373 }
374 # must be an item or a "malformed" section (without "")
375 else {
376 $node = $_;
377 $type = 'item';
378 }
379 # collapse whitespace in nodes
380 $node =~ s/\s+/ /gs;
381
382 # empty alternative text expands to node name
383 if(defined $alttext) {
384 if(!length($alttext)) {
385 $alttext = $node | $page;
386 }
387 }
388 else {
389 $alttext = '';
390 }
391
392 if($page =~ /[(]\w*[)]$/) {
393 $self->warning("(section) in '$page' deprecated");
394 }
395 if($node =~ m:[|/]:) {
396 $self->warning("node '$node' contains non-escaped | or /");
397 }
398 if($alttext =~ m:[|/]:) {
399 $self->warning("alternative text '$node' contains non-escaped | or /");
400 }
401 $self->{-page} = $page;
402 $self->{-node} = $node;
403 $self->{-alttext} = $alttext;
404 #warn "DEBUG: page=$page section=$section item=$item alttext=$alttext\n";
405 $self->{-type} = $type;
406 $self->_construct_text();
407 1;
408}
409
410sub _construct_text {
411 my $self = shift;
412 my $alttext = $self->alttext();
413 my $type = $self->type();
414 my $section = $self->node();
415 my $page = $self->page();
416 my $page_ext = '';
417 $page =~ s/([(]\w*[)])$// && ($page_ext = $1);
418 if($alttext) {
419 $self->{_text} = $alttext;
420 }
421 elsif($type eq 'hyperlink') {
422 $self->{_text} = $section;
423 }
424 else {
425 $self->{_text} = (!$section ? '' :
426 $type eq 'item' ? "the $section entry" :
427 "the section on $section" ) .
428 ($page ? ($section ? ' in ':'') . "the $page$page_ext manpage" :
429 ' elsewhere in this document');
430 }
431 # for being marked up later
432 # use the non-standard markers P<> and Q<>, so that the resulting
433 # text can be parsed by the translators. It's their job to put
434 # the correct hypertext around the linktext
435 if($alttext) {
436 $self->{_markup} = "Q<$alttext>";
437 }
438 elsif($type eq 'hyperlink') {
439 $self->{_markup} = "Q<$section>";
440 }
441 else {
442 $self->{_markup} = (!$section ? '' :
443 $type eq 'item' ? "the Q<$section> entry" :
444 "the section on Q<$section>" ) .
445 ($page ? ($section ? ' in ':'') . "the P<$page>$page_ext manpage" :
446 ' elsewhere in this document');
447 }
448}
449
450=item $link-E<gt>markup($string)
451
452Set/retrieve the textual value of the link. This string contains special
453markers C<PE<lt>E<gt>> and C<QE<lt>E<gt>> that should be expanded by the
454translator's interior sequence expansion engine to the
455formatter-specific code to highlight/activate the hyperlink. The details
456have to be implemented in the translator.
457
458=cut
459
460#' retrieve/set markuped text
461sub markup {
462 return (@_ > 1) ? ($_[0]->{_markup} = $_[1]) : $_[0]->{_markup};
463}
464
465=item $link-E<gt>text()
466
467This method returns the textual representation of the hyperlink as above,
468but without markers (read only). Depending on the link type this is one of
469the following alternatives (the + and * denote the portions of the text
470that are marked up):
471
472 the +perl+ manpage
473 the *$|* entry in the +perlvar+ manpage
474 the section on *OPTIONS* in the +perldoc+ manpage
475 the section on *DESCRIPTION* elsewhere in this document
476
477=cut
478
479# The complete link's text
480sub text {
481 $_[0]->{_text};
482}
483
484=item $link-E<gt>warning()
485
486After parsing, this method returns any warnings encountered during the
487parsing process.
488
489=cut
490
491# Set/retrieve warnings
492sub warning {
493 my $self = shift;
494 if(@_) {
495 push(@{$self->{_warnings}}, @_);
496 return @_;
497 }
498 return @{$self->{_warnings}};
499}
500
501=item $link-E<gt>file()
502
503=item $link-E<gt>line()
504
505Just simple slots for storing information about the line and the file
506the link was encountered in. Has to be filled in manually.
507
508=cut
509
510# The line in the file the link appears
511sub line {
512 return (@_ > 1) ? ($_[0]->{-line} = $_[1]) : $_[0]->{-line};
513}
514
515# The POD file name the link appears in
516sub file {
517 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
518}
519
520=item $link-E<gt>page()
521
522This method sets or returns the POD page this link points to.
523
524=cut
525
526# The POD page the link appears on
527sub page {
528 if (@_ > 1) {
529 $_[0]->{-page} = $_[1];
530 $_[0]->_construct_text();
531 }
532 $_[0]->{-page};
533}
534
535=item $link-E<gt>node()
536
537As above, but the destination node text of the link.
538
539=cut
540
541# The link destination
542sub node {
543 if (@_ > 1) {
544 $_[0]->{-node} = $_[1];
545 $_[0]->_construct_text();
546 }
547 $_[0]->{-node};
548}
549
550=item $link-E<gt>alttext()
551
552Sets or returns an alternative text specified in the link.
553
554=cut
555
556# Potential alternative text
557sub alttext {
558 if (@_ > 1) {
559 $_[0]->{-alttext} = $_[1];
560 $_[0]->_construct_text();
561 }
562 $_[0]->{-alttext};
563}
564
565=item $link-E<gt>type()
566
567The node type, either C<section> or C<item>. As an unofficial type,
568there is also C<hyperlink>, derived from e.g. C<LE<lt>http://perl.comE<gt>>
569
570=cut
571
572# The type: item or headn
573sub type {
574 return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
575}
576
577=item $link-E<gt>link()
578
579Returns the link as contents of C<LE<lt>E<gt>>. Reciprocal to B<parse()>.
580
581=back
582
583=cut
584
585# The link itself
586sub link {
587 my $self = shift;
588 my $link = $self->page() || '';
589 if($self->node()) {
590 my $node = $self->node();
591 $text =~ s/\|/E<verbar>/g;
592 $text =~ s:/:E<sol>:g;
593 if($self->type() eq 'section') {
594 $link .= ($link ? '/' : '') . '"' . $node . '"';
595 }
596 elsif($self->type() eq 'hyperlink') {
597 $link = $self->node();
598 }
599 else { # item
600 $link .= '/' . $node;
601 }
602 }
603 if($self->alttext()) {
604 my $text = $self->alttext();
605 $text =~ s/\|/E<verbar>/g;
606 $text =~ s:/:E<sol>:g;
607 $link = "$text|$link";
608 }
609 $link;
610}
611
612sub _invalid_link {
613 my ($msg) = @_;
614 # this sets @_
615 #eval { die "$msg\n" };
616 #chomp $@;
617 $@ = $msg; # this seems to work, too!
618 undef;
619}
620
621#-----------------------------------------------------------------------------
622# Pod::Cache
623#
624# class to hold POD page details
625#-----------------------------------------------------------------------------
626
627package Pod::Cache;
628
629=head2 Pod::Cache
630
631B<Pod::Cache> holds information about a set of POD documents,
632especially the nodes for hyperlinks.
633The following methods are available:
634
635=over 4
636
637=item Pod::Cache-E<gt>new()
638
639Create a new cache object. This object can hold an arbitrary number of
640POD documents of class Pod::Cache::Item.
641
642=cut
643
644sub new {
645 my $this = shift;
646 my $class = ref($this) || $this;
647 my $self = [];
648 bless $self, $class;
649 return $self;
650}
651
652=item $cache-E<gt>item()
653
654Add a new item to the cache. Without arguments, this method returns a
655list of all cache elements.
656
657=cut
658
659sub item {
660 my ($self,%param) = @_;
661 if(%param) {
662 my $item = Pod::Cache::Item->new(%param);
663 push(@$self, $item);
664 return $item;
665 }
666 else {
667 return @{$self};
668 }
669}
670
671=item $cache-E<gt>find_page($name)
672
673Look for a POD document named C<$name> in the cache. Returns the
674reference to the corresponding Pod::Cache::Item object or undef if
675not found.
676
677=back
678
679=cut
680
681sub find_page {
682 my ($self,$page) = @_;
683 foreach(@$self) {
684 if($_->page() eq $page) {
685 return $_;
686 }
687 }
688 undef;
689}
690
691package Pod::Cache::Item;
692
693=head2 Pod::Cache::Item
694
695B<Pod::Cache::Item> holds information about individual POD documents,
696that can be grouped in a Pod::Cache object.
697It is intended to hold information about the hyperlink nodes of POD
698documents.
699The following methods are available:
700
701=over 4
702
703=item Pod::Cache::Item-E<gt>new()
704
705Create a new object.
706
707=cut
708
709sub new {
710 my $this = shift;
711 my $class = ref($this) || $this;
712 my %params = @_;
713 my $self = {%params};
714 bless $self, $class;
715 $self->initialize();
716 return $self;
717}
718
719sub initialize {
720 my $self = shift;
721 $self->{-nodes} = [] unless(defined $self->{-nodes});
722}
723
724=item $cacheitem-E<gt>page()
725
726Set/retrieve the POD document name (e.g. "Pod::Parser").
727
728=cut
729
730# The POD page
731sub page {
732 return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page};
733}
734
735=item $cacheitem-E<gt>description()
736
737Set/retrieve the POD short description as found in the C<=head1 NAME>
738section.
739
740=cut
741
742# The POD description, taken out of NAME if present
743sub description {
744 return (@_ > 1) ? ($_[0]->{-description} = $_[1]) : $_[0]->{-description};
745}
746
747=item $cacheitem-E<gt>path()
748
749Set/retrieve the POD file storage path.
750
751=cut
752
753# The file path
754sub path {
755 return (@_ > 1) ? ($_[0]->{-path} = $_[1]) : $_[0]->{-path};
756}
757
758=item $cacheitem-E<gt>file()
759
760Set/retrieve the POD file name.
761
762=cut
763
764# The POD file name
765sub file {
766 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
767}
768
769=item $cacheitem-E<gt>nodes()
770
771Add a node (or a list of nodes) to the document's node list. Note that
772the order is kept, i.e. start with the first node and end with the last.
773If no argument is given, the current list of nodes is returned in the
774same order the nodes have been added.
775A node can be any scalar, but usually is a pair of node string and
776unique id for the C<find_node> method to work correctly.
777
778=cut
779
780# The POD nodes
781sub nodes {
782 my ($self,@nodes) = @_;
783 if(@nodes) {
784 push(@{$self->{-nodes}}, @nodes);
785 return @nodes;
786 }
787 else {
788 return @{$self->{-nodes}};
789 }
790}
791
792=item $cacheitem-E<gt>find_node($name)
793
794Look for a node or index entry named C<$name> in the object.
795Returns the unique id of the node (i.e. the second element of the array
796stored in the node arry) or undef if not found.
797
798=cut
799
800sub find_node {
801 my ($self,$node) = @_;
802 my @search;
803 push(@search, @{$self->{-nodes}}) if($self->{-nodes});
804 push(@search, @{$self->{-idx}}) if($self->{-idx});
805 foreach(@search) {
806 if($_->[0] eq $node) {
807 return $_->[1]; # id
808 }
809 }
810 undef;
811}
812
813=item $cacheitem-E<gt>idx()
814
815Add an index entry (or a list of them) to the document's index list. Note that
816the order is kept, i.e. start with the first node and end with the last.
817If no argument is given, the current list of index entries is returned in the
818same order the entries have been added.
819An index entry can be any scalar, but usually is a pair of string and
820unique id.
821
822=back
823
824=cut
825
826# The POD index entries
827sub idx {
828 my ($self,@idx) = @_;
829 if(@idx) {
830 push(@{$self->{-idx}}, @idx);
831 return @idx;
832 }
833 else {
834 return @{$self->{-idx}};
835 }
836}
837
838=head1 AUTHOR
839
840Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, borrowing
841a lot of things from L<pod2man> and L<pod2roff> as well as other POD
842processing tools by Tom Christiansen, Brad Appleton and Russ Allbery.
843
844=head1 SEE ALSO
845
846L<pod2man>, L<pod2roff>, L<Pod::Parser>, L<Pod::Checker>,
847L<pod2html>
848
849=cut
850
8511;