Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | # Pod::ParseLink -- Parse an L<> formatting code in POD text. |
2 | # $Id: ParseLink.pm,v 1.6 2002/07/15 05:46:00 eagle Exp $ | |
3 | # | |
4 | # Copyright 2001 by Russ Allbery <rra@stanford.edu> | |
5 | # | |
6 | # This program is free software; you may redistribute it and/or modify it | |
7 | # under the same terms as Perl itself. | |
8 | # | |
9 | # This module implements parsing of the text of an L<> formatting code as | |
10 | # defined in perlpodspec. It should be suitable for any POD formatter. It | |
11 | # exports only one function, parselink(), which returns the five-item parse | |
12 | # defined in perlpodspec. | |
13 | # | |
14 | # Perl core hackers, please note that this module is also separately | |
15 | # maintained outside of the Perl core as part of the podlators. Please send | |
16 | # me any patches at the address above in addition to sending them to the | |
17 | # standard Perl mailing lists. | |
18 | ||
19 | ############################################################################## | |
20 | # Modules and declarations | |
21 | ############################################################################## | |
22 | ||
23 | package Pod::ParseLink; | |
24 | ||
25 | require 5.004; | |
26 | ||
27 | use strict; | |
28 | use vars qw(@EXPORT @ISA $VERSION); | |
29 | ||
30 | use Exporter; | |
31 | @ISA = qw(Exporter); | |
32 | @EXPORT = qw(parselink); | |
33 | ||
34 | # Don't use the CVS revision as the version, since this module is also in Perl | |
35 | # core and too many things could munge CVS magic revision strings. This | |
36 | # number should ideally be the same as the CVS revision in podlators, however. | |
37 | $VERSION = 1.06; | |
38 | ||
39 | ||
40 | ############################################################################## | |
41 | # Implementation | |
42 | ############################################################################## | |
43 | ||
44 | # Parse the name and section portion of a link into a name and section. | |
45 | sub _parse_section { | |
46 | my ($link) = @_; | |
47 | $link =~ s/^\s+//; | |
48 | $link =~ s/\s+$//; | |
49 | ||
50 | # If the whole link is enclosed in quotes, interpret it all as a section | |
51 | # even if it contains a slash. | |
52 | return (undef, $1) if ($link =~ /^"\s*(.*?)\s*"$/); | |
53 | ||
54 | # Split into page and section on slash, and then clean up quoting in the | |
55 | # section. If there is no section and the name contains spaces, also | |
56 | # guess that it's an old section link. | |
57 | my ($page, $section) = split (/\s*\/\s*/, $link, 2); | |
58 | $section =~ s/^"\s*(.*?)\s*"$/$1/ if $section; | |
59 | if ($page && $page =~ / / && !defined ($section)) { | |
60 | $section = $page; | |
61 | $page = undef; | |
62 | } else { | |
63 | $page = undef unless $page; | |
64 | $section = undef unless $section; | |
65 | } | |
66 | return ($page, $section); | |
67 | } | |
68 | ||
69 | # Infer link text from the page and section. | |
70 | sub _infer_text { | |
71 | my ($page, $section) = @_; | |
72 | my $inferred; | |
73 | if ($page && !$section) { | |
74 | $inferred = $page; | |
75 | } elsif (!$page && $section) { | |
76 | $inferred = '"' . $section . '"'; | |
77 | } elsif ($page && $section) { | |
78 | $inferred = '"' . $section . '" in ' . $page; | |
79 | } | |
80 | return $inferred; | |
81 | } | |
82 | ||
83 | # Given the contents of an L<> formatting code, parse it into the link text, | |
84 | # the possibly inferred link text, the name or URL, the section, and the type | |
85 | # of link (pod, man, or url). | |
86 | sub parselink { | |
87 | my ($link) = @_; | |
88 | $link =~ s/\s+/ /g; | |
89 | if ($link =~ /\A\w+:[^:\s]\S*\Z/) { | |
90 | return (undef, $link, $link, undef, 'url'); | |
91 | } else { | |
92 | my $text; | |
93 | if ($link =~ /\|/) { | |
94 | ($text, $link) = split (/\|/, $link, 2); | |
95 | } | |
96 | my ($name, $section) = _parse_section ($link); | |
97 | my $inferred = $text || _infer_text ($name, $section); | |
98 | my $type = ($name && $name =~ /\(\S*\)/) ? 'man' : 'pod'; | |
99 | return ($text, $inferred, $name, $section, $type); | |
100 | } | |
101 | } | |
102 | ||
103 | ||
104 | ############################################################################## | |
105 | # Module return value and documentation | |
106 | ############################################################################## | |
107 | ||
108 | # Ensure we evaluate to true. | |
109 | 1; | |
110 | __END__ | |
111 | ||
112 | =head1 NAME | |
113 | ||
114 | Pod::ParseLink - Parse an LE<lt>E<gt> formatting code in POD text | |
115 | ||
116 | =head1 SYNOPSIS | |
117 | ||
118 | use Pod::ParseLink; | |
119 | my ($text, $inferred, $name, $section, $type) = parselink ($link); | |
120 | ||
121 | =head1 DESCRIPTION | |
122 | ||
123 | This module only provides a single function, parselink(), which takes the | |
124 | text of an LE<lt>E<gt> formatting code and parses it. It returns the anchor | |
125 | text for the link (if any was given), the anchor text possibly inferred from | |
126 | the name and section, the name or URL, the section if any, and the type of | |
127 | link. The type will be one of 'url', 'pod', or 'man', indicating a URL, a | |
128 | link to a POD page, or a link to a Unix manual page. | |
129 | ||
130 | Parsing is implemented per L<perlpodspec>. For backward compatibility, | |
131 | links where there is no section and name contains spaces, or links where the | |
132 | entirety of the link (except for the anchor text if given) is enclosed in | |
133 | double-quotes are interpreted as links to a section (LE<lt>/sectionE<gt>). | |
134 | ||
135 | The inferred anchor text is implemented per L<perlpodspec>: | |
136 | ||
137 | L<name> => L<name|name> | |
138 | L</section> => L<"section"|/section> | |
139 | L<name/section> => L<"section" in name|name/section> | |
140 | ||
141 | The name may contain embedded EE<lt>E<gt> and ZE<lt>E<gt> formatting codes, | |
142 | and the section, anchor text, and inferred anchor text may contain any | |
143 | formatting codes. Any double quotes around the section are removed as part | |
144 | of the parsing, as is any leading or trailing whitespace. | |
145 | ||
146 | If the text of the LE<lt>E<gt> escape is entirely enclosed in double quotes, | |
147 | it's interpreted as a link to a section for backwards compatibility. | |
148 | ||
149 | No attempt is made to resolve formatting codes. This must be done after | |
150 | calling parselink (since EE<lt>E<gt> formatting codes can be used to escape | |
151 | characters that would otherwise be significant to the parser and resolving | |
152 | them before parsing would result in an incorrect parse of a formatting code | |
153 | like: | |
154 | ||
155 | L<verticalE<verbar>barE<sol>slash> | |
156 | ||
157 | which should be interpreted as a link to the C<vertical|bar/slash> POD page | |
158 | and not as a link to the C<slash> section of the C<bar> POD page with an | |
159 | anchor text of C<vertical>. Note that not only the anchor text will need to | |
160 | have formatting codes expanded, but so will the target of the link (to deal | |
161 | with EE<lt>E<gt> and ZE<lt>E<gt> formatting codes), and special handling of | |
162 | the section may be necessary depending on whether the translator wants to | |
163 | consider markup in sections to be significant when resolving links. See | |
164 | L<perlpodspec> for more information. | |
165 | ||
166 | =head1 SEE ALSO | |
167 | ||
168 | L<Pod::Parser> | |
169 | ||
170 | The current version of this module is always available from its web site at | |
171 | L<http://www.eyrie.org/~eagle/software/podlators/>. | |
172 | ||
173 | =head1 AUTHOR | |
174 | ||
175 | Russ Allbery <rra@stanford.edu>. | |
176 | ||
177 | =head1 COPYRIGHT AND LICENSE | |
178 | ||
179 | Copyright 2001 by Russ Allbery <rra@stanford.edu>. | |
180 | ||
181 | This program is free software; you may redistribute it and/or modify it | |
182 | under the same terms as Perl itself. | |
183 | ||
184 | =cut |