| 1 | ############################################################################# |
| 2 | # Pod/Checker.pm -- check pod documents for syntax errors |
| 3 | # |
| 4 | # Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved. |
| 5 | # This file is part of "PodParser". PodParser is free software; |
| 6 | # you can redistribute it and/or modify it under the same terms |
| 7 | # as Perl itself. |
| 8 | ############################################################################# |
| 9 | |
| 10 | package Pod::Checker; |
| 11 | |
| 12 | use vars qw($VERSION); |
| 13 | $VERSION = 1.43; ## Current version of this package |
| 14 | require 5.005; ## requires this Perl version or later |
| 15 | |
| 16 | use Pod::ParseUtils; ## for hyperlinks and lists |
| 17 | |
| 18 | =head1 NAME |
| 19 | |
| 20 | Pod::Checker, podchecker() - check pod documents for syntax errors |
| 21 | |
| 22 | =head1 SYNOPSIS |
| 23 | |
| 24 | use Pod::Checker; |
| 25 | |
| 26 | $syntax_okay = podchecker($filepath, $outputpath, %options); |
| 27 | |
| 28 | my $checker = new Pod::Checker %options; |
| 29 | $checker->parse_from_file($filepath, \*STDERR); |
| 30 | |
| 31 | =head1 OPTIONS/ARGUMENTS |
| 32 | |
| 33 | C<$filepath> is the input POD to read and C<$outputpath> is |
| 34 | where to write POD syntax error messages. Either argument may be a scalar |
| 35 | indicating a file-path, or else a reference to an open filehandle. |
| 36 | If unspecified, the input-file it defaults to C<\*STDIN>, and |
| 37 | the output-file defaults to C<\*STDERR>. |
| 38 | |
| 39 | =head2 podchecker() |
| 40 | |
| 41 | This function can take a hash of options: |
| 42 | |
| 43 | =over 4 |
| 44 | |
| 45 | =item B<-warnings> =E<gt> I<val> |
| 46 | |
| 47 | Turn warnings on/off. I<val> is usually 1 for on, but higher values |
| 48 | trigger additional warnings. See L<"Warnings">. |
| 49 | |
| 50 | =back |
| 51 | |
| 52 | =head1 DESCRIPTION |
| 53 | |
| 54 | B<podchecker> will perform syntax checking of Perl5 POD format documentation. |
| 55 | |
| 56 | Curious/ambitious users are welcome to propose additional features they wish |
| 57 | to see in B<Pod::Checker> and B<podchecker> and verify that the checks are |
| 58 | consistent with L<perlpod>. |
| 59 | |
| 60 | The following checks are currently performed: |
| 61 | |
| 62 | =over 4 |
| 63 | |
| 64 | =item * |
| 65 | |
| 66 | Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences, |
| 67 | and unterminated interior sequences. |
| 68 | |
| 69 | =item * |
| 70 | |
| 71 | Check for proper balancing of C<=begin> and C<=end>. The contents of such |
| 72 | a block are generally ignored, i.e. no syntax checks are performed. |
| 73 | |
| 74 | =item * |
| 75 | |
| 76 | Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>. |
| 77 | |
| 78 | =item * |
| 79 | |
| 80 | Check for same nested interior-sequences (e.g. |
| 81 | C<LE<lt>...LE<lt>...E<gt>...E<gt>>). |
| 82 | |
| 83 | =item * |
| 84 | |
| 85 | Check for malformed or nonexisting entities C<EE<lt>...E<gt>>. |
| 86 | |
| 87 | =item * |
| 88 | |
| 89 | Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod> |
| 90 | for details. |
| 91 | |
| 92 | =item * |
| 93 | |
| 94 | Check for unresolved document-internal links. This check may also reveal |
| 95 | misspelled links that seem to be internal links but should be links |
| 96 | to something else. |
| 97 | |
| 98 | =back |
| 99 | |
| 100 | =head1 DIAGNOSTICS |
| 101 | |
| 102 | =head2 Errors |
| 103 | |
| 104 | =over 4 |
| 105 | |
| 106 | =item * empty =headn |
| 107 | |
| 108 | A heading (C<=head1> or C<=head2>) without any text? That ain't no |
| 109 | heading! |
| 110 | |
| 111 | =item * =over on line I<N> without closing =back |
| 112 | |
| 113 | The C<=over> command does not have a corresponding C<=back> before the |
| 114 | next heading (C<=head1> or C<=head2>) or the end of the file. |
| 115 | |
| 116 | =item * =item without previous =over |
| 117 | |
| 118 | =item * =back without previous =over |
| 119 | |
| 120 | An C<=item> or C<=back> command has been found outside a |
| 121 | C<=over>/C<=back> block. |
| 122 | |
| 123 | =item * No argument for =begin |
| 124 | |
| 125 | A C<=begin> command was found that is not followed by the formatter |
| 126 | specification. |
| 127 | |
| 128 | =item * =end without =begin |
| 129 | |
| 130 | A standalone C<=end> command was found. |
| 131 | |
| 132 | =item * Nested =begin's |
| 133 | |
| 134 | There were at least two consecutive C<=begin> commands without |
| 135 | the corresponding C<=end>. Only one C<=begin> may be active at |
| 136 | a time. |
| 137 | |
| 138 | =item * =for without formatter specification |
| 139 | |
| 140 | There is no specification of the formatter after the C<=for> command. |
| 141 | |
| 142 | =item * unresolved internal link I<NAME> |
| 143 | |
| 144 | The given link to I<NAME> does not have a matching node in the current |
| 145 | POD. This also happend when a single word node name is not enclosed in |
| 146 | C<"">. |
| 147 | |
| 148 | =item * Unknown command "I<CMD>" |
| 149 | |
| 150 | An invalid POD command has been found. Valid are C<=head1>, C<=head2>, |
| 151 | C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>, |
| 152 | C<=for>, C<=pod>, C<=cut> |
| 153 | |
| 154 | =item * Unknown interior-sequence "I<SEQ>" |
| 155 | |
| 156 | An invalid markup command has been encountered. Valid are: |
| 157 | C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>, |
| 158 | C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>, |
| 159 | C<ZE<lt>E<gt>> |
| 160 | |
| 161 | =item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt> |
| 162 | |
| 163 | Two nested identical markup commands have been found. Generally this |
| 164 | does not make sense. |
| 165 | |
| 166 | =item * garbled entity I<STRING> |
| 167 | |
| 168 | The I<STRING> found cannot be interpreted as a character entity. |
| 169 | |
| 170 | =item * Entity number out of range |
| 171 | |
| 172 | An entity specified by number (dec, hex, oct) is out of range (1-255). |
| 173 | |
| 174 | =item * malformed link LE<lt>E<gt> |
| 175 | |
| 176 | The link found cannot be parsed because it does not conform to the |
| 177 | syntax described in L<perlpod>. |
| 178 | |
| 179 | =item * nonempty ZE<lt>E<gt> |
| 180 | |
| 181 | The C<ZE<lt>E<gt>> sequence is supposed to be empty. |
| 182 | |
| 183 | =item * empty XE<lt>E<gt> |
| 184 | |
| 185 | The index entry specified contains nothing but whitespace. |
| 186 | |
| 187 | =item * Spurious text after =pod / =cut |
| 188 | |
| 189 | The commands C<=pod> and C<=cut> do not take any arguments. |
| 190 | |
| 191 | =item * Spurious character(s) after =back |
| 192 | |
| 193 | The C<=back> command does not take any arguments. |
| 194 | |
| 195 | =back |
| 196 | |
| 197 | =head2 Warnings |
| 198 | |
| 199 | These may not necessarily cause trouble, but indicate mediocre style. |
| 200 | |
| 201 | =over 4 |
| 202 | |
| 203 | =item * multiple occurrence of link target I<name> |
| 204 | |
| 205 | The POD file has some C<=item> and/or C<=head> commands that have |
| 206 | the same text. Potential hyperlinks to such a text cannot be unique then. |
| 207 | This warning is printed only with warning level greater than one. |
| 208 | |
| 209 | =item * line containing nothing but whitespace in paragraph |
| 210 | |
| 211 | There is some whitespace on a seemingly empty line. POD is very sensitive |
| 212 | to such things, so this is flagged. B<vi> users switch on the B<list> |
| 213 | option to avoid this problem. |
| 214 | |
| 215 | =begin _disabled_ |
| 216 | |
| 217 | =item * file does not start with =head |
| 218 | |
| 219 | The file starts with a different POD directive than head. |
| 220 | This is most probably something you do not want. |
| 221 | |
| 222 | =end _disabled_ |
| 223 | |
| 224 | =item * previous =item has no contents |
| 225 | |
| 226 | There is a list C<=item> right above the flagged line that has no |
| 227 | text contents. You probably want to delete empty items. |
| 228 | |
| 229 | =item * preceding non-item paragraph(s) |
| 230 | |
| 231 | A list introduced by C<=over> starts with a text or verbatim paragraph, |
| 232 | but continues with C<=item>s. Move the non-item paragraph out of the |
| 233 | C<=over>/C<=back> block. |
| 234 | |
| 235 | =item * =item type mismatch (I<one> vs. I<two>) |
| 236 | |
| 237 | A list started with e.g. a bulletted C<=item> and continued with a |
| 238 | numbered one. This is obviously inconsistent. For most translators the |
| 239 | type of the I<first> C<=item> determines the type of the list. |
| 240 | |
| 241 | =item * I<N> unescaped C<E<lt>E<gt>> in paragraph |
| 242 | |
| 243 | Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>> |
| 244 | can potentially cause errors as they could be misinterpreted as |
| 245 | markup commands. This is only printed when the -warnings level is |
| 246 | greater than 1. |
| 247 | |
| 248 | =item * Unknown entity |
| 249 | |
| 250 | A character entity was found that does not belong to the standard |
| 251 | ISO set or the POD specials C<verbar> and C<sol>. |
| 252 | |
| 253 | =item * No items in =over |
| 254 | |
| 255 | The list opened with C<=over> does not contain any items. |
| 256 | |
| 257 | =item * No argument for =item |
| 258 | |
| 259 | C<=item> without any parameters is deprecated. It should either be followed |
| 260 | by C<*> to indicate an unordered list, by a number (optionally followed |
| 261 | by a dot) to indicate an ordered (numbered) list or simple text for a |
| 262 | definition list. |
| 263 | |
| 264 | =item * empty section in previous paragraph |
| 265 | |
| 266 | The previous section (introduced by a C<=head> command) does not contain |
| 267 | any text. This usually indicates that something is missing. Note: A |
| 268 | C<=head1> followed immediately by C<=head2> does not trigger this warning. |
| 269 | |
| 270 | =item * Verbatim paragraph in NAME section |
| 271 | |
| 272 | The NAME section (C<=head1 NAME>) should consist of a single paragraph |
| 273 | with the script/module name, followed by a dash `-' and a very short |
| 274 | description of what the thing is good for. |
| 275 | |
| 276 | =item * =headI<n> without preceding higher level |
| 277 | |
| 278 | For example if there is a C<=head2> in the POD file prior to a |
| 279 | C<=head1>. |
| 280 | |
| 281 | =back |
| 282 | |
| 283 | =head2 Hyperlinks |
| 284 | |
| 285 | There are some warnings wrt. malformed hyperlinks. |
| 286 | |
| 287 | =over 4 |
| 288 | |
| 289 | =item * ignoring leading/trailing whitespace in link |
| 290 | |
| 291 | There is whitespace at the beginning or the end of the contents of |
| 292 | LE<lt>...E<gt>. |
| 293 | |
| 294 | =item * (section) in '$page' deprecated |
| 295 | |
| 296 | There is a section detected in the page name of LE<lt>...E<gt>, e.g. |
| 297 | C<LE<lt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only. |
| 298 | Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able |
| 299 | to expand this to appropriate code. For links to (builtin) functions, |
| 300 | please say C<LE<lt>perlfunc/mkdirE<gt>>, without (). |
| 301 | |
| 302 | =item * alternative text/node '%s' contains non-escaped | or / |
| 303 | |
| 304 | The characters C<|> and C</> are special in the LE<lt>...E<gt> context. |
| 305 | Although the hyperlink parser does its best to determine which "/" is |
| 306 | text and which is a delimiter in case of doubt, one ought to escape |
| 307 | these literal characters like this: |
| 308 | |
| 309 | / E<sol> |
| 310 | | E<verbar> |
| 311 | |
| 312 | =back |
| 313 | |
| 314 | =head1 RETURN VALUE |
| 315 | |
| 316 | B<podchecker> returns the number of POD syntax errors found or -1 if |
| 317 | there were no POD commands at all found in the file. |
| 318 | |
| 319 | =head1 EXAMPLES |
| 320 | |
| 321 | See L</SYNOPSIS> |
| 322 | |
| 323 | =head1 INTERFACE |
| 324 | |
| 325 | While checking, this module collects document properties, e.g. the nodes |
| 326 | for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>). |
| 327 | POD translators can use this feature to syntax-check and get the nodes in |
| 328 | a first pass before actually starting to convert. This is expensive in terms |
| 329 | of execution time, but allows for very robust conversions. |
| 330 | |
| 331 | Since PodParser-1.24 the B<Pod::Checker> module uses only the B<poderror> |
| 332 | method to print errors and warnings. The summary output (e.g. |
| 333 | "Pod syntax OK") has been dropped from the module and has been included in |
| 334 | B<podchecker> (the script). This allows users of B<Pod::Checker> to |
| 335 | control completely the output behaviour. Users of B<podchecker> (the script) |
| 336 | get the well-known behaviour. |
| 337 | |
| 338 | =cut |
| 339 | |
| 340 | ############################################################################# |
| 341 | |
| 342 | use strict; |
| 343 | #use diagnostics; |
| 344 | use Carp; |
| 345 | use Exporter; |
| 346 | use Pod::Parser; |
| 347 | |
| 348 | use vars qw(@ISA @EXPORT); |
| 349 | @ISA = qw(Pod::Parser); |
| 350 | @EXPORT = qw(&podchecker); |
| 351 | |
| 352 | use vars qw(%VALID_COMMANDS %VALID_SEQUENCES); |
| 353 | |
| 354 | my %VALID_COMMANDS = ( |
| 355 | 'pod' => 1, |
| 356 | 'cut' => 1, |
| 357 | 'head1' => 1, |
| 358 | 'head2' => 1, |
| 359 | 'head3' => 1, |
| 360 | 'head4' => 1, |
| 361 | 'over' => 1, |
| 362 | 'back' => 1, |
| 363 | 'item' => 1, |
| 364 | 'for' => 1, |
| 365 | 'begin' => 1, |
| 366 | 'end' => 1, |
| 367 | ); |
| 368 | |
| 369 | my %VALID_SEQUENCES = ( |
| 370 | 'I' => 1, |
| 371 | 'B' => 1, |
| 372 | 'S' => 1, |
| 373 | 'C' => 1, |
| 374 | 'L' => 1, |
| 375 | 'F' => 1, |
| 376 | 'X' => 1, |
| 377 | 'Z' => 1, |
| 378 | 'E' => 1, |
| 379 | ); |
| 380 | |
| 381 | # stolen from HTML::Entities |
| 382 | my %ENTITIES = ( |
| 383 | # Some normal chars that have special meaning in SGML context |
| 384 | amp => '&', # ampersand |
| 385 | 'gt' => '>', # greater than |
| 386 | 'lt' => '<', # less than |
| 387 | quot => '"', # double quote |
| 388 | |
| 389 | # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML |
| 390 |