| 1 | # Pod::ParseLink -- Parse an L<> formatting code in POD text. |
| 2 | # $Id: ParseLink.pm,v 1.6 2002/07/15 05:46:00 eagle Exp $ |
| 3 | # |
| 4 | # Copyright 2001 by Russ Allbery <rra@stanford.edu> |
| 5 | # |
| 6 | # This program is free software; you may redistribute it and/or modify it |
| 7 | # under the same terms as Perl itself. |
| 8 | # |
| 9 | # This module implements parsing of the text of an L<> formatting code as |
| 10 | # defined in perlpodspec. It should be suitable for any POD formatter. It |
| 11 | # exports only one function, parselink(), which returns the five-item parse |
| 12 | # defined in perlpodspec. |
| 13 | # |
| 14 | # Perl core hackers, please note that this module is also separately |
| 15 | # maintained outside of the Perl core as part of the podlators. Please send |
| 16 | # me any patches at the address above in addition to sending them to the |
| 17 | # standard Perl mailing lists. |
| 18 | |
| 19 | ############################################################################## |
| 20 | # Modules and declarations |
| 21 | ############################################################################## |
| 22 | |
| 23 | package Pod::ParseLink; |
| 24 | |
| 25 | require 5.004; |
| 26 | |
| 27 | use strict; |
| 28 | use vars qw(@EXPORT @ISA $VERSION); |
| 29 | |
| 30 | use Exporter; |
| 31 | @ISA = qw(Exporter); |
| 32 | @EXPORT = qw(parselink); |
| 33 | |
| 34 | # Don't use the CVS revision as the version, since this module is also in Perl |
| 35 | # core and too many things could munge CVS magic revision strings. This |
| 36 | # number should ideally be the same as the CVS revision in podlators, however. |
| 37 | $VERSION = 1.06; |
| 38 | |
| 39 | |
| 40 | ############################################################################## |
| 41 | # Implementation |
| 42 | ############################################################################## |
| 43 | |
| 44 | # Parse the name and section portion of a link into a name and section. |
| 45 | sub _parse_section { |
| 46 | my ($link) = @_; |
| 47 | $link =~ s/^\s+//; |
| 48 | $link =~ s/\s+$//; |
| 49 | |
| 50 | # If the whole link is enclosed in quotes, interpret it all as a section |
| 51 | # even if it contains a slash. |
| 52 | return (undef, $1) if ($link =~ /^"\s*(.*?)\s*"$/); |
| 53 | |
| 54 | # Split into page and section on slash, and then clean up quoting in the |
| 55 | # section. If there is no section and the name contains spaces, also |
| 56 | # guess that it's an old section link. |
| 57 | my ($page, $section) = split (/\s*\/\s*/, $link, 2); |
| 58 | $section =~ s/^"\s*(.*?)\s*"$/$1/ if $section; |
| 59 | if ($page && $page =~ / / && !defined ($section)) { |
| 60 | $section = $page; |
| 61 | $page = undef; |
| 62 | } else { |
| 63 | $page = undef unless $page; |
| 64 | $section = undef unless $section; |
| 65 | } |
| 66 | return ($page, $section); |
| 67 | } |
| 68 | |
| 69 | # Infer link text from the page and section. |
| 70 | sub _infer_text { |
| 71 | my ($page, $section) = @_; |
| 72 | my $inferred; |
| 73 | if ($page && !$section) { |
| 74 | $inferred = $page; |
| 75 | } elsif (!$page && $section) { |
| 76 | $inferred = '"' . $section . '"'; |
| 77 | } elsif ($page && $section) { |
| 78 | $inferred = '"' . $section . '" in ' . $page; |
| 79 | } |
| 80 | return $inferred; |
| 81 | } |
| 82 | |
| 83 | # Given the contents of an L<> formatting code, parse it into the link text, |
| 84 | # the possibly inferred link text, the name or URL, the section, and the type |
| 85 | # of link (pod, man, or url). |
| 86 | sub parselink { |
| 87 | my ($link) = @_; |
| 88 | $link =~ s/\s+/ /g; |
| 89 | if ($link =~ /\A\w+:[^:\s]\S*\Z/) { |
| 90 | return (undef, $link, $link, undef, 'url'); |
| 91 | } else { |
| 92 | my $text; |
| 93 | if ($link =~ /\|/) { |
| 94 | ($text, $link) = split (/\|/, $link, 2); |
| 95 | } |
| 96 | my ($name, $section) = _parse_section ($link); |
| 97 | my $inferred = $text || _infer_text ($name, $section); |
| 98 | my $type = ($name && $name =~ /\(\S*\)/) ? 'man' : 'pod'; |
| 99 | return ($text, $inferred, $name, $section, $type); |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | |
| 104 | ############################################################################## |
| 105 | # Module return value and documentation |
| 106 | ############################################################################## |
| 107 | |
| 108 | # Ensure we evaluate to true. |
| 109 | 1; |
| 110 | __END__ |
| 111 | |
| 112 | =head1 NAME |
| 113 | |
| 114 | Pod::ParseLink - Parse an LE<lt>E<gt> formatting code in POD text |
| 115 | |
| 116 | =head1 SYNOPSIS |
| 117 | |
| 118 | use Pod::ParseLink; |
| 119 | my ($text, $inferred, $name, $section, $type) = parselink ($link); |
| 120 | |
| 121 | =head1 DESCRIPTION |
| 122 | |
| 123 | This module only provides a single function, parselink(), which takes the |
| 124 | text of an LE<lt>E<gt> formatting code and parses it. It returns the anchor |
| 125 | text for the link (if any was given), the anchor text possibly inferred from |
| 126 | the name and section, the name or URL, the section if any, and the type of |
| 127 | link. The type will be one of 'url', 'pod', or 'man', indicating a URL, a |
| 128 | link to a POD page, or a link to a Unix manual page. |
| 129 | |
| 130 | Parsing is implemented per L<perlpodspec>. For backward compatibility, |
| 131 | links where there is no section and name contains spaces, or links where the |
| 132 | entirety of the link (except for the anchor text if given) is enclosed in |
| 133 | double-quotes are interpreted as links to a section (LE<lt>/sectionE<gt>). |
| 134 | |
| 135 | The inferred anchor text is implemented per L<perlpodspec>: |
| 136 | |
| 137 | L<name> => L<name|name> |
| 138 | L</section> => L<"section"|/section> |
| 139 | L<name/section> => L<"section" in name|name/section> |
| 140 | |
| 141 | The name may contain embedded EE<lt>E<gt> and ZE<lt>E<gt> formatting codes, |
| 142 | and the section, anchor text, and inferred anchor text may contain any |
| 143 | formatting codes. Any double quotes around the section are removed as part |
| 144 | of the parsing, as is any leading or trailing whitespace. |
| 145 | |
| 146 | If the text of the LE<lt>E<gt> escape is entirely enclosed in double quotes, |
| 147 | it's interpreted as a link to a section for backwards compatibility. |
| 148 | |
| 149 | No attempt is made to resolve formatting codes. This must be done after |
| 150 | calling parselink (since EE<lt>E<gt> formatting codes can be used to escape |
| 151 | characters that would otherwise be significant to the parser and resolving |
| 152 | them before parsing would result in an incorrect parse of a formatting code |
| 153 | like: |
| 154 | |
| 155 | L<verticalE<verbar>barE<sol>slash> |
| 156 | |
| 157 | which should be interpreted as a link to the C<vertical|bar/slash> POD page |
| 158 | and not as a link to the C<slash> section of the C<bar> POD page with an |
| 159 | anchor text of C<vertical>. Note that not only the anchor text will need to |
| 160 | have formatting codes expanded, but so will the target of the link (to deal |
| 161 | with EE<lt>E<gt> and ZE<lt>E<gt> formatting codes), and special handling of |
| 162 | the section may be necessary depending on whether the translator wants to |
| 163 | consider markup in sections to be significant when resolving links. See |
| 164 | L<perlpodspec> for more information. |
| 165 | |
| 166 | =head1 SEE ALSO |
| 167 | |
| 168 | L<Pod::Parser> |
| 169 | |
| 170 | The current version of this module is always available from its web site at |
| 171 | L<http://www.eyrie.org/~eagle/software/podlators/>. |
| 172 | |
| 173 | =head1 AUTHOR |
| 174 | |
| 175 | Russ Allbery <rra@stanford.edu>. |
| 176 | |
| 177 | =head1 COPYRIGHT AND LICENSE |
| 178 | |
| 179 | Copyright 2001 by Russ Allbery <rra@stanford.edu>. |
| 180 | |
| 181 | This program is free software; you may redistribute it and/or modify it |
| 182 | under the same terms as Perl itself. |
| 183 | |
| 184 | =cut |