Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 |
2 | .\" | |
3 | .\" Standard preamble: | |
4 | .\" ======================================================================== | |
5 | .de Sh \" Subsection heading | |
6 | .br | |
7 | .if t .Sp | |
8 | .ne 5 | |
9 | .PP | |
10 | \fB\\$1\fR | |
11 | .PP | |
12 | .. | |
13 | .de Sp \" Vertical space (when we can't use .PP) | |
14 | .if t .sp .5v | |
15 | .if n .sp | |
16 | .. | |
17 | .de Vb \" Begin verbatim text | |
18 | .ft CW | |
19 | .nf | |
20 | .ne \\$1 | |
21 | .. | |
22 | .de Ve \" End verbatim text | |
23 | .ft R | |
24 | .fi | |
25 | .. | |
26 | .\" Set up some character translations and predefined strings. \*(-- will | |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a | |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to | |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' | |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. | |
32 | .tr \(*W-|\(bv\*(Tr | |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
34 | .ie n \{\ | |
35 | . ds -- \(*W- | |
36 | . ds PI pi | |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
39 | . ds L" "" | |
40 | . ds R" "" | |
41 | . ds C` "" | |
42 | . ds C' "" | |
43 | 'br\} | |
44 | .el\{\ | |
45 | . ds -- \|\(em\| | |
46 | . ds PI \(*p | |
47 | . ds L" `` | |
48 | . ds R" '' | |
49 | 'br\} | |
50 | .\" | |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for | |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index | |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the | |
54 | .\" output yourself in some meaningful fashion. | |
55 | .if \nF \{\ | |
56 | . de IX | |
57 | . tm Index:\\$1\t\\n%\t"\\$2" | |
58 | .. | |
59 | . nr % 0 | |
60 | . rr F | |
61 | .\} | |
62 | .\" | |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
64 | .\" way too many mistakes in technical documents. | |
65 | .hy 0 | |
66 | .if n .na | |
67 | .\" | |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
70 | . \" fudge factors for nroff and troff | |
71 | .if n \{\ | |
72 | . ds #H 0 | |
73 | . ds #V .8m | |
74 | . ds #F .3m | |
75 | . ds #[ \f1 | |
76 | . ds #] \fP | |
77 | .\} | |
78 | .if t \{\ | |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
80 | . ds #V .6m | |
81 | . ds #F 0 | |
82 | . ds #[ \& | |
83 | . ds #] \& | |
84 | .\} | |
85 | . \" simple accents for nroff and troff | |
86 | .if n \{\ | |
87 | . ds ' \& | |
88 | . ds ` \& | |
89 | . ds ^ \& | |
90 | . ds , \& | |
91 | . ds ~ ~ | |
92 | . ds / | |
93 | .\} | |
94 | .if t \{\ | |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
101 | .\} | |
102 | . \" troff and (daisy-wheel) nroff accents | |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
110 | .ds ae a\h'-(\w'a'u*4/10)'e | |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
112 | . \" corrections for vroff | |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
115 | . \" for low resolution devices (crt and lpr) | |
116 | .if \n(.H>23 .if \n(.V>19 \ | |
117 | \{\ | |
118 | . ds : e | |
119 | . ds 8 ss | |
120 | . ds o a | |
121 | . ds d- d\h'-1'\(ga | |
122 | . ds D- D\h'-1'\(hy | |
123 | . ds th \o'bp' | |
124 | . ds Th \o'LP' | |
125 | . ds ae ae | |
126 | . ds Ae AE | |
127 | .\} | |
128 | .rm #[ #] #H #V #F C | |
129 | .\" ======================================================================== | |
130 | .\" | |
131 | .IX Title "PERLPODSPEC 1" | |
132 | .TH PERLPODSPEC 1 "2006-01-07" "perl v5.8.8" "Perl Programmers Reference Guide" | |
133 | .SH "NAME" | |
134 | perlpodspec \- Plain Old Documentation: format specification and notes | |
135 | .SH "DESCRIPTION" | |
136 | .IX Header "DESCRIPTION" | |
137 | This document is detailed notes on the Pod markup language. Most | |
138 | people will only have to read perlpod to know how to write | |
139 | in Pod, but this document may answer some incidental questions to do | |
140 | with parsing and rendering Pod. | |
141 | .PP | |
142 | In this document, \*(L"must\*(R" / \*(L"must not\*(R", \*(L"should\*(R" / | |
143 | \&\*(L"should not\*(R", and \*(L"may\*(R" have their conventional (cf. \s-1RFC\s0 2119) | |
144 | meanings: \*(L"X must do Y\*(R" means that if X doesn't do Y, it's against | |
145 | this specification, and should really be fixed. \*(L"X should do Y\*(R" | |
146 | means that it's recommended, but X may fail to do Y, if there's a | |
147 | good reason. \*(L"X may do Y\*(R" is merely a note that X can do Y at | |
148 | will (although it is up to the reader to detect any connotation of | |
149 | "and I think it would be \fInice\fR if X did Y\*(L" versus \*(R"it wouldn't | |
150 | really \fIbother\fR me if X did Y"). | |
151 | .PP | |
152 | Notably, when I say \*(L"the parser should do Y\*(R", the | |
153 | parser may fail to do Y, if the calling application explicitly | |
154 | requests that the parser \fInot\fR do Y. I often phrase this as | |
155 | \&\*(L"the parser should, by default, do Y.\*(R" This doesn't \fIrequire\fR | |
156 | the parser to provide an option for turning off whatever | |
157 | feature Y is (like expanding tabs in verbatim paragraphs), although | |
158 | it implicates that such an option \fImay\fR be provided. | |
159 | .SH "Pod Definitions" | |
160 | .IX Header "Pod Definitions" | |
161 | Pod is embedded in files, typically Perl source files \*(-- although you | |
162 | can write a file that's nothing but Pod. | |
163 | .PP | |
164 | A \fBline\fR in a file consists of zero or more non-newline characters, | |
165 | terminated by either a newline or the end of the file. | |
166 | .PP | |
167 | A \fBnewline sequence\fR is usually a platform-dependent concept, but | |
168 | Pod parsers should understand it to mean any of \s-1CR\s0 (\s-1ASCII\s0 13), \s-1LF\s0 | |
169 | (\s-1ASCII\s0 10), or a \s-1CRLF\s0 (\s-1ASCII\s0 13 followed immediately by \s-1ASCII\s0 10), in | |
170 | addition to any other system-specific meaning. The first \s-1CR/CRLF/LF\s0 | |
171 | sequence in the file may be used as the basis for identifying the | |
172 | newline sequence for parsing the rest of the file. | |
173 | .PP | |
174 | A \fBblank line\fR is a line consisting entirely of zero or more spaces | |
175 | (\s-1ASCII\s0 32) or tabs (\s-1ASCII\s0 9), and terminated by a newline or end\-of\-file. | |
176 | A \fBnon-blank line\fR is a line containing one or more characters other | |
177 | than space or tab (and terminated by a newline or end\-of\-file). | |
178 | .PP | |
179 | (\fINote:\fR Many older Pod parsers did not accept a line consisting of | |
180 | spaces/tabs and then a newline as a blank line \*(-- the only lines they | |
181 | considered blank were lines consisting of \fIno characters at all\fR, | |
182 | terminated by a newline.) | |
183 | .PP | |
184 | \&\fBWhitespace\fR is used in this document as a blanket term for spaces, | |
185 | tabs, and newline sequences. (By itself, this term usually refers | |
186 | to literal whitespace. That is, sequences of whitespace characters | |
187 | in Pod source, as opposed to "E<32>", which is a formatting | |
188 | code that \fIdenotes\fR a whitespace character.) | |
189 | .PP | |
190 | A \fBPod parser\fR is a module meant for parsing Pod (regardless of | |
191 | whether this involves calling callbacks or building a parse tree or | |
192 | directly formatting it). A \fBPod formatter\fR (or \fBPod translator\fR) | |
193 | is a module or program that converts Pod to some other format (\s-1HTML\s0, | |
194 | plaintext, TeX, PostScript, \s-1RTF\s0). A \fBPod processor\fR might be a | |
195 | formatter or translator, or might be a program that does something | |
196 | else with the Pod (like wordcounting it, scanning for index points, | |
197 | etc.). | |
198 | .PP | |
199 | Pod content is contained in \fBPod blocks\fR. A Pod block starts with a | |
200 | line that matches <m/\eA=[a\-zA\-Z]/>, and continues up to the next line | |
201 | that matches \f(CW\*(C`m/\eA=cut/\*(C'\fR \*(-- or up to the end of the file, if there is | |
202 | no \f(CW\*(C`m/\eA=cut/\*(C'\fR line. | |
203 | .PP | |
204 | Within a Pod block, there are \fBPod paragraphs\fR. A Pod paragraph | |
205 | consists of non-blank lines of text, separated by one or more blank | |
206 | lines. | |
207 | .PP | |
208 | For purposes of Pod processing, there are four types of paragraphs in | |
209 | a Pod block: | |
210 | .IP "\(bu" 4 | |
211 | A command paragraph (also called a \*(L"directive\*(R"). The first line of | |
212 | this paragraph must match \f(CW\*(C`m/\eA=[a\-zA\-Z]/\*(C'\fR. Command paragraphs are | |
213 | typically one line, as in: | |
214 | .Sp | |
215 | .Vb 1 | |
216 | \& =head1 NOTES | |
217 | .Ve | |
218 | .Sp | |
219 | .Vb 1 | |
220 | \& =item * | |
221 | .Ve | |
222 | .Sp | |
223 | But they may span several (non\-blank) lines: | |
224 | .Sp | |
225 | .Vb 3 | |
226 | \& =for comment | |
227 | \& Hm, I wonder what it would look like if | |
228 | \& you tried to write a BNF for Pod from this. | |
229 | .Ve | |
230 | .Sp | |
231 | .Vb 2 | |
232 | \& =head3 Dr. Strangelove, or: How I Learned to | |
233 | \& Stop Worrying and Love the Bomb | |
234 | .Ve | |
235 | .Sp | |
236 | \&\fISome\fR command paragraphs allow formatting codes in their content | |
237 | (i.e., after the part that matches \f(CW\*(C`m/\eA=[a\-zA\-Z]\eS*\es*/\*(C'\fR), as in: | |
238 | .Sp | |
239 | .Vb 1 | |
240 | \& =head1 Did You Remember to C<use strict;>? | |
241 | .Ve | |
242 | .Sp | |
243 | In other words, the Pod processing handler for \*(L"head1\*(R" will apply the | |
244 | same processing to "Did You Remember to C<use strict;>?\*(L" that it | |
245 | would to an ordinary paragraph \*(-- i.e., formatting codes (like | |
246 | \&\*(R"C<...>") are parsed and presumably formatted appropriately, and | |
247 | whitespace in the form of literal spaces and/or tabs is not | |
248 | significant. | |
249 | .IP "\(bu" 4 | |
250 | A \fBverbatim paragraph\fR. The first line of this paragraph must be a | |
251 | literal space or tab, and this paragraph must not be inside a "=begin | |
252 | \&\fIidentifier\fR\*(L", ... \*(R"=end \fIidentifier\fR\*(L" sequence unless | |
253 | \&\*(R"\fIidentifier\fR\*(L" begins with a colon (\*(R":"). That is, if a paragraph | |
254 | starts with a literal space or tab, but \fIis\fR inside a | |
255 | "=begin \fIidentifier\fR\*(L", ... \*(R"=end \fIidentifier\fR\*(L" region, then it's | |
256 | a data paragraph, unless \*(R"\fIidentifier\fR" begins with a colon. | |
257 | .Sp | |
258 | Whitespace \fIis\fR significant in verbatim paragraphs (although, in | |
259 | processing, tabs are probably expanded). | |
260 | .IP "\(bu" 4 | |
261 | An \fBordinary paragraph\fR. A paragraph is an ordinary paragraph | |
262 | if its first line matches neither \f(CW\*(C`m/\eA=[a\-zA\-Z]/\*(C'\fR nor | |
263 | \&\f(CW\*(C`m/\eA[ \et]/\*(C'\fR, \fIand\fR if it's not inside a "=begin \fIidentifier\fR\*(L", | |
264 | \&... \*(R"=end \fIidentifier\fR\*(L" sequence unless \*(R"\fIidentifier\fR\*(L" begins with | |
265 | a colon (\*(R":"). | |
266 | .IP "\(bu" 4 | |
267 | A \fBdata paragraph\fR. This is a paragraph that \fIis\fR inside a "=begin | |
268 | \&\fIidentifier\fR\*(L" ... \*(R"=end \fIidentifier\fR\*(L" sequence where | |
269 | \&\*(R"\fIidentifier\fR" does \fInot\fR begin with a literal colon (\*(L":\*(R"). In | |
270 | some sense, a data paragraph is not part of Pod at all (i.e., | |
271 | effectively it's \*(L"out\-of\-band\*(R"), since it's not subject to most kinds | |
272 | of Pod parsing; but it is specified here, since Pod | |
273 | parsers need to be able to call an event for it, or store it in some | |
274 | form in a parse tree, or at least just parse \fIaround\fR it. | |
275 | .PP | |
276 | For example: consider the following paragraphs: | |
277 | .PP | |
278 | .Vb 1 | |
279 | \& # <- that's the 0th column | |
280 | .Ve | |
281 | .PP | |
282 | .Vb 1 | |
283 | \& =head1 Foo | |
284 | .Ve | |
285 | .PP | |
286 | .Vb 1 | |
287 | \& Stuff | |
288 | .Ve | |
289 | .PP | |
290 | .Vb 1 | |
291 | \& $foo->bar | |
292 | .Ve | |
293 | .PP | |
294 | .Vb 1 | |
295 | \& =cut | |
296 | .Ve | |
297 | .PP | |
298 | Here, \*(L"=head1 Foo\*(R" and \*(L"=cut\*(R" are command paragraphs because the first | |
299 | line of each matches \f(CW\*(C`m/\eA=[a\-zA\-Z]/\*(C'\fR. "\fI[space][space]\fR$foo\->bar\*(L" | |
300 | is a verbatim paragraph, because its first line starts with a literal | |
301 | whitespace character (and there's no \*(R"=begin\*(L"...\*(R"=end" region around). | |
302 | .PP | |
303 | The "=begin \fIidentifier\fR\*(L" ... \*(R"=end \fIidentifier\fR" commands stop | |
304 | paragraphs that they surround from being parsed as data or verbatim | |
305 | paragraphs, if \fIidentifier\fR doesn't begin with a colon. This | |
306 | is discussed in detail in the section | |
307 | \&\*(L"About Data Paragraphs and \*(R"=begin/=end\*(L" Regions\*(R". | |
308 | .SH "Pod Commands" | |
309 | .IX Header "Pod Commands" | |
310 | This section is intended to supplement and clarify the discussion in | |
311 | \&\*(L"Command Paragraph\*(R" in perlpod. These are the currently recognized | |
312 | Pod commands: | |
313 | .ie n .IP """=head1"", ""=head2"", ""=head3"", ""=head4""" 4 | |
314 | .el .IP "``=head1'', ``=head2'', ``=head3'', ``=head4''" 4 | |
315 | .IX Item "=head1, =head2, =head3, =head4" | |
316 | This command indicates that the text in the remainder of the paragraph | |
317 | is a heading. That text may contain formatting codes. Examples: | |
318 | .Sp | |
319 | .Vb 1 | |
320 | \& =head1 Object Attributes | |
321 | .Ve | |
322 | .Sp | |
323 | .Vb 1 | |
324 | \& =head3 What B<Not> to Do! | |
325 | .Ve | |
326 | .ie n .IP """=pod""" 4 | |
327 | .el .IP "``=pod''" 4 | |
328 | .IX Item "=pod" | |
329 | This command indicates that this paragraph begins a Pod block. (If we | |
330 | are already in the middle of a Pod block, this command has no effect at | |
331 | all.) If there is any text in this command paragraph after \*(L"=pod\*(R", | |
332 | it must be ignored. Examples: | |
333 | .Sp | |
334 | .Vb 1 | |
335 | \& =pod | |
336 | .Ve | |
337 | .Sp | |
338 | .Vb 1 | |
339 | \& This is a plain Pod paragraph. | |
340 | .Ve | |
341 | .Sp | |
342 | .Vb 1 | |
343 | \& =pod This text is ignored. | |
344 | .Ve | |
345 | .ie n .IP """=cut""" 4 | |
346 | .el .IP "``=cut''" 4 | |
347 | .IX Item "=cut" | |
348 | This command indicates that this line is the end of this previously | |
349 | started Pod block. If there is any text after \*(L"=cut\*(R" on the line, it must be | |
350 | ignored. Examples: | |
351 | .Sp | |
352 | .Vb 1 | |
353 | \& =cut | |
354 | .Ve | |
355 | .Sp | |
356 | .Vb 1 | |
357 | \& =cut The documentation ends here. | |
358 | .Ve | |
359 | .Sp | |
360 | .Vb 3 | |
361 | \& =cut | |
362 | \& # This is the first line of program text. | |
363 | \& sub foo { # This is the second. | |
364 | .Ve | |
365 | .Sp | |
366 | It is an error to try to \fIstart\fR a Pod block with a \*(L"=cut\*(R" command. In | |
367 | that case, the Pod processor must halt parsing of the input file, and | |
368 | must by default emit a warning. | |
369 | .ie n .IP """=over""" 4 | |
370 | .el .IP "``=over''" 4 | |
371 | .IX Item "=over" | |
372 | This command indicates that this is the start of a list/indent | |
373 | region. If there is any text following the \*(L"=over\*(R", it must consist | |
374 | of only a nonzero positive numeral. The semantics of this numeral is | |
375 | explained in the \*(L"About =over...=back Regions\*(R" section, further | |
376 | below. Formatting codes are not expanded. Examples: | |
377 | .Sp | |
378 | .Vb 1 | |
379 | \& =over 3 | |
380 | .Ve | |
381 | .Sp | |
382 | .Vb 1 | |
383 | \& =over 3.5 | |
384 | .Ve | |
385 | .Sp | |
386 | .Vb 1 | |
387 | \& =over | |
388 | .Ve | |
389 | .ie n .IP """=item""" 4 | |
390 | .el .IP "``=item''" 4 | |
391 | .IX Item "=item" | |
392 | This command indicates that an item in a list begins here. Formatting | |
393 | codes are processed. The semantics of the (optional) text in the | |
394 | remainder of this paragraph are | |
395 | explained in the \*(L"About =over...=back Regions\*(R" section, further | |
396 | below. Examples: | |
397 | .Sp | |
398 | .Vb 1 | |
399 | \& =item | |
400 | .Ve | |
401 | .Sp | |
402 | .Vb 1 | |
403 | \& =item * | |
404 | .Ve | |
405 | .Sp | |
406 | .Vb 1 | |
407 | \& =item * | |
408 | .Ve | |
409 | .Sp | |
410 | .Vb 1 | |
411 | \& =item 14 | |
412 | .Ve | |
413 | .Sp | |
414 | .Vb 1 | |
415 | \& =item 3. | |
416 | .Ve | |
417 | .Sp | |
418 | .Vb 1 | |
419 | \& =item C<< $thing->stuff(I<dodad>) >> | |
420 | .Ve | |
421 | .Sp | |
422 | .Vb 2 | |
423 | \& =item For transporting us beyond seas to be tried for pretended | |
424 | \& offenses | |
425 | .Ve | |
426 | .Sp | |
427 | .Vb 5 | |
428 | \& =item He is at this time transporting large armies of foreign | |
429 | \& mercenaries to complete the works of death, desolation and | |
430 | \& tyranny, already begun with circumstances of cruelty and perfidy | |
431 | \& scarcely paralleled in the most barbarous ages, and totally | |
432 | \& unworthy the head of a civilized nation. | |
433 | .Ve | |
434 | .ie n .IP """=back""" 4 | |
435 | .el .IP "``=back''" 4 | |
436 | .IX Item "=back" | |
437 | This command indicates that this is the end of the region begun | |
438 | by the most recent \*(L"=over\*(R" command. It permits no text after the | |
439 | \&\*(L"=back\*(R" command. | |
440 | .ie n .IP """=begin formatname""" 4 | |
441 | .el .IP "``=begin formatname''" 4 | |
442 | .IX Item "=begin formatname" | |
443 | This marks the following paragraphs (until the matching \*(L"=end | |
444 | formatname\*(R") as being for some special kind of processing. Unless | |
445 | \&\*(L"formatname\*(R" begins with a colon, the contained non-command | |
446 | paragraphs are data paragraphs. But if \*(L"formatname\*(R" \fIdoes\fR begin | |
447 | with a colon, then non-command paragraphs are ordinary paragraphs | |
448 | or data paragraphs. This is discussed in detail in the section | |
449 | \&\*(L"About Data Paragraphs and \*(R"=begin/=end\*(L" Regions\*(R". | |
450 | .Sp | |
451 | It is advised that formatnames match the regexp | |
452 | \&\f(CW\*(C`m/\eA:?[\-a\-zA\-Z0\-9_]+\ez/\*(C'\fR. Implementors should anticipate future | |
453 | expansion in the semantics and syntax of the first parameter | |
454 | to \*(L"=begin\*(R"/\*(L"=end\*(R"/\*(L"=for\*(R". | |
455 | .ie n .IP """=end formatname""" 4 | |
456 | .el .IP "``=end formatname''" 4 | |
457 | .IX Item "=end formatname" | |
458 | This marks the end of the region opened by the matching | |
459 | \&\*(L"=begin formatname\*(R" region. If \*(L"formatname\*(R" is not the formatname | |
460 | of the most recent open \*(L"=begin formatname\*(R" region, then this | |
461 | is an error, and must generate an error message. This | |
462 | is discussed in detail in the section | |
463 | \&\*(L"About Data Paragraphs and \*(R"=begin/=end\*(L" Regions\*(R". | |
464 | .ie n .IP """=for formatname text...""" 4 | |
465 | .el .IP "``=for formatname text...''" 4 | |
466 | .IX Item "=for formatname text..." | |
467 | This is synonymous with: | |
468 | .Sp | |
469 | .Vb 1 | |
470 | \& =begin formatname | |
471 | .Ve | |
472 | .Sp | |
473 | .Vb 1 | |
474 | \& text... | |
475 | .Ve | |
476 | .Sp | |
477 | .Vb 1 | |
478 | \& =end formatname | |
479 | .Ve | |
480 | .Sp | |
481 | That is, it creates a region consisting of a single paragraph; that | |
482 | paragraph is to be treated as a normal paragraph if \*(L"formatname\*(R" | |
483 | begins with a \*(L":\*(R"; if \*(L"formatname\*(R" \fIdoesn't\fR begin with a colon, | |
484 | then \*(L"text...\*(R" will constitute a data paragraph. There is no way | |
485 | to use \*(L"=for formatname text...\*(R" to express \*(L"text...\*(R" as a verbatim | |
486 | paragraph. | |
487 | .ie n .IP """=encoding encodingname""" 4 | |
488 | .el .IP "``=encoding encodingname''" 4 | |
489 | .IX Item "=encoding encodingname" | |
490 | This command, which should occur early in the document (at least | |
491 | before any non-US-ASCII data!), declares that this document is | |
492 | encoded in the encoding \fIencodingname\fR, which must be | |
493 | an encoding name that Encoding recognizes. (Encoding's list | |
494 | of supported encodings, in Encoding::Supported, is useful here.) | |
495 | If the Pod parser cannot decode the declared encoding, it | |
496 | should emit a warning and may abort parsing the document | |
497 | altogether. | |
498 | .Sp | |
499 | A document having more than one \*(L"=encoding\*(R" line should be | |
500 | considered an error. Pod processors may silently tolerate this if | |
501 | the not-first \*(L"=encoding\*(R" lines are just duplicates of the | |
502 | first one (e.g., if there's a \*(L"=use utf8\*(R" line, and later on | |
503 | another \*(L"=use utf8\*(R" line). But Pod processors should complain if | |
504 | there are contradictory \*(L"=encoding\*(R" lines in the same document | |
505 | (e.g., if there is a \*(L"=encoding utf8\*(R" early in the document and | |
506 | \&\*(L"=encoding big5\*(R" later). Pod processors that recognize BOMs | |
507 | may also complain if they see an \*(L"=encoding\*(R" line | |
508 | that contradicts the \s-1BOM\s0 (e.g., if a document with a \s-1UTF\-16LE\s0 | |
509 | \&\s-1BOM\s0 has an \*(L"=encoding shiftjis\*(R" line). | |
510 | .PP | |
511 | If a Pod processor sees any command other than the ones listed | |
512 | above (like \*(L"=head\*(R", or \*(L"=haed1\*(R", or \*(L"=stuff\*(R", or \*(L"=cuttlefish\*(R", | |
513 | or \*(L"=w123\*(R"), that processor must by default treat this as an | |
514 | error. It must not process the paragraph beginning with that | |
515 | command, must by default warn of this as an error, and may | |
516 | abort the parse. A Pod parser may allow a way for particular | |
517 | applications to add to the above list of known commands, and to | |
518 | stipulate, for each additional command, whether formatting | |
519 | codes should be processed. | |
520 | .PP | |
521 | Future versions of this specification may add additional | |
522 | commands. | |
523 | .SH "Pod Formatting Codes" | |
524 | .IX Header "Pod Formatting Codes" | |
525 | (Note that in previous drafts of this document and of perlpod, | |
526 | formatting codes were referred to as \*(L"interior sequences\*(R", and | |
527 | this term may still be found in the documentation for Pod parsers, | |
528 | and in error messages from Pod processors.) | |
529 | .PP | |
530 | There are two syntaxes for formatting codes: | |
531 | .IP "\(bu" 4 | |
532 | A formatting code starts with a capital letter (just US-ASCII [A\-Z]) | |
533 | followed by a \*(L"<\*(R", any number of characters, and ending with the first | |
534 | matching \*(L">\*(R". Examples: | |
535 | .Sp | |
536 | .Vb 1 | |
537 | \& That's what I<you> think! | |
538 | .Ve | |
539 | .Sp | |
540 | .Vb 1 | |
541 | \& What's C<dump()> for? | |
542 | .Ve | |
543 | .Sp | |
544 | .Vb 1 | |
545 | \& X<C<chmod> and C<unlink()> Under Different Operating Systems> | |
546 | .Ve | |
547 | .IP "\(bu" 4 | |
548 | A formatting code starts with a capital letter (just US-ASCII [A\-Z]) | |
549 | followed by two or more \*(L"<\*(R"'s, one or more whitespace characters, | |
550 | any number of characters, one or more whitespace characters, | |
551 | and ending with the first matching sequence of two or more \*(L">\*(R"'s, where | |
552 | the number of \*(L">\*(R"'s equals the number of \*(L"<\*(R"'s in the opening of this | |
553 | formatting code. Examples: | |
554 | .Sp | |
555 | .Vb 1 | |
556 | \& That's what I<< you >> think! | |
557 | .Ve | |
558 | .Sp | |
559 | .Vb 1 | |
560 | \& C<<< open(X, ">>thing.dat") || die $! >>> | |
561 | .Ve | |
562 | .Sp | |
563 | .Vb 1 | |
564 | \& B<< $foo->bar(); >> | |
565 | .Ve | |
566 | .Sp | |
567 | With this syntax, the whitespace character(s) after the "C<<<\*(L" | |
568 | and before the \*(R">>" (or whatever letter) are \fInot\fR renderable \*(-- they | |
569 | do not signify whitespace, are merely part of the formatting codes | |
570 | themselves. That is, these are all synonymous: | |
571 | .Sp | |
572 | .Vb 7 | |
573 | \& C<thing> | |
574 | \& C<< thing >> | |
575 | \& C<< thing >> | |
576 | \& C<<< thing >>> | |
577 | \& C<<<< | |
578 | \& thing | |
579 | \& >>>> | |
580 | .Ve | |
581 | .Sp | |
582 | and so on. | |
583 | .PP | |
584 | In parsing Pod, a notably tricky part is the correct parsing of | |
585 | (potentially nested!) formatting codes. Implementors should | |
586 | consult the code in the \f(CW\*(C`parse_text\*(C'\fR routine in Pod::Parser as an | |
587 | example of a correct implementation. | |
588 | .ie n .IP """I<text>"" \*(-- italic text" 4 | |
589 | .el .IP "\f(CWI<text>\fR \*(-- italic text" 4 | |
590 | .IX Item "I<text> italic text" | |
591 | See the brief discussion in \*(L"Formatting Codes\*(R" in perlpod. | |
592 | .ie n .IP """B<text>"" \*(-- bold text" 4 | |
593 | .el .IP "\f(CWB<text>\fR \*(-- bold text" 4 | |
594 | .IX Item "B<text> bold text" | |
595 | See the brief discussion in \*(L"Formatting Codes\*(R" in perlpod. | |
596 | .ie n .IP """C<code>"" \*(-- code text" 4 | |
597 | .el .IP "\f(CWC<code>\fR \*(-- code text" 4 | |
598 | .IX Item "C<code> code text" | |
599 | See the brief discussion in \*(L"Formatting Codes\*(R" in perlpod. | |
600 | .ie n .IP """F<filename>"" \*(-- style for filenames" 4 | |
601 | .el .IP "\f(CWF<filename>\fR \*(-- style for filenames" 4 | |
602 | .IX Item "F<filename> style for filenames" | |
603 | See the brief discussion in \*(L"Formatting Codes\*(R" in perlpod. | |
604 | .ie n .IP """X<topic name>"" \*(-- an index entry" 4 | |
605 | .el .IP "\f(CWX<topic name>\fR \*(-- an index entry" 4 | |
606 | .IX Item "X<topic name> an index entry" | |
607 | See the brief discussion in \*(L"Formatting Codes\*(R" in perlpod. | |
608 | .Sp | |
609 | This code is unusual in that most formatters completely discard | |
610 | this code and its content. Other formatters will render it with | |
611 | invisible codes that can be used in building an index of | |
612 | the current document. | |
613 | .ie n .IP """Z<>"" \*(-- a null (zero\-effect) formatting code" 4 | |
614 | .el .IP "\f(CWZ<>\fR \*(-- a null (zero\-effect) formatting code" 4 | |
615 | .IX Item "Z<> a null (zero-effect) formatting code" | |
616 | Discussed briefly in \*(L"Formatting Codes\*(R" in perlpod. | |
617 | .Sp | |
618 | This code is unusual is that it should have no content. That is, | |
619 | a processor may complain if it sees \f(CW\*(C`Z<potatoes>\*(C'\fR. Whether | |
620 | or not it complains, the \fIpotatoes\fR text should ignored. | |
621 | .ie n .IP """L<name>"" \*(-- a hyperlink" 4 | |
622 | .el .IP "\f(CWL<name>\fR \*(-- a hyperlink" 4 | |
623 | .IX Item "L<name> a hyperlink" | |
624 | The complicated syntaxes of this code are discussed at length in | |
625 | \&\*(L"Formatting Codes\*(R" in perlpod, and implementation details are | |
626 | discussed below, in "About L<...> Codes". Parsing the | |
627 | contents of L<content> is tricky. Notably, the content has to be | |
628 | checked for whether it looks like a \s-1URL\s0, or whether it has to be split | |
629 | on literal \*(L"|\*(R" and/or \*(L"/\*(R" (in the right order!), and so on, | |
630 | \&\fIbefore\fR E<...> codes are resolved. | |
631 | .ie n .IP """E<escape>"" \*(-- a character escape" 4 | |
632 | .el .IP "\f(CWE<escape>\fR \*(-- a character escape" 4 | |
633 | .IX Item "E<escape> a character escape" | |
634 | See \*(L"Formatting Codes\*(R" in perlpod, and several points in | |
635 | \&\*(L"Notes on Implementing Pod Processors\*(R". | |
636 | .ie n .IP """S<text>"" \*(-- text contains non-breaking spaces" 4 | |
637 | .el .IP "\f(CWS<text>\fR \*(-- text contains non-breaking spaces" 4 | |
638 | .IX Item "S<text> text contains non-breaking spaces" | |
639 | This formatting code is syntactically simple, but semantically | |
640 | complex. What it means is that each space in the printable | |
641 | content of this code signifies a non-breaking space. | |
642 | .Sp | |
643 | Consider: | |
644 | .Sp | |
645 | .Vb 1 | |
646 | \& C<$x ? $y : $z> | |
647 | .Ve | |
648 | .Sp | |
649 | .Vb 1 | |
650 | \& S<C<$x ? $y : $z>> | |
651 | .Ve | |
652 | .Sp | |
653 | Both signify the monospace (c[ode] style) text consisting of | |
654 | \&\*(L"$x\*(R", one space, \*(L"?\*(R", one space, \*(L":\*(R", one space, \*(L"$z\*(R". The | |
655 | difference is that in the latter, with the S code, those spaces | |
656 | are not \*(L"normal\*(R" spaces, but instead are non-breaking spaces. | |
657 | .PP | |
658 | If a Pod processor sees any formatting code other than the ones | |
659 | listed above (as in "N<...>\*(L", or \*(R"Q<...>", etc.), that | |
660 | processor must by default treat this as an error. | |
661 | A Pod parser may allow a way for particular | |
662 | applications to add to the above list of known formatting codes; | |
663 | a Pod parser might even allow a way to stipulate, for each additional | |
664 | command, whether it requires some form of special processing, as | |
665 | L<...> does. | |
666 | .PP | |
667 | Future versions of this specification may add additional | |
668 | formatting codes. | |
669 | .PP | |
670 | Historical note: A few older Pod processors would not see a \*(L">\*(R" as | |
671 | closing a "C<\*(L" code, if the \*(R">\*(L" was immediately preceded by | |
672 | a \*(R"\-". This was so that this: | |
673 | .PP | |
674 | .Vb 1 | |
675 | \& C<$foo->bar> | |
676 | .Ve | |
677 | .PP | |
678 | would parse as equivalent to this: | |
679 | .PP | |
680 | .Vb 1 | |
681 | \& C<$foo-E<gt>bar> | |
682 | .Ve | |
683 | .PP | |
684 | instead of as equivalent to a \*(L"C\*(R" formatting code containing | |
685 | only \*(L"$foo\-\*(R", and then a \*(L"bar>\*(R" outside the \*(L"C\*(R" formatting code. This | |
686 | problem has since been solved by the addition of syntaxes like this: | |
687 | .PP | |
688 | .Vb 1 | |
689 | \& C<< $foo->bar >> | |
690 | .Ve | |
691 | .PP | |
692 | Compliant parsers must not treat \*(L"\->\*(R" as special. | |
693 | .PP | |
694 | Formatting codes absolutely cannot span paragraphs. If a code is | |
695 | opened in one paragraph, and no closing code is found by the end of | |
696 | that paragraph, the Pod parser must close that formatting code, | |
697 | and should complain (as in \*(L"Unterminated I code in the paragraph | |
698 | starting at line 123: 'Time objects are not...'\*(R"). So these | |
699 | two paragraphs: | |
700 | .PP | |
701 | .Vb 1 | |
702 | \& I<I told you not to do this! | |
703 | .Ve | |
704 | .PP | |
705 | .Vb 1 | |
706 | \& Don't make me say it again!> | |
707 | .Ve | |
708 | .PP | |
709 | \&...must \fInot\fR be parsed as two paragraphs in italics (with the I | |
710 | code starting in one paragraph and starting in another.) Instead, | |
711 | the first paragraph should generate a warning, but that aside, the | |
712 | above code must parse as if it were: | |
713 | .PP | |
714 | .Vb 1 | |
715 | \& I<I told you not to do this!> | |
716 | .Ve | |
717 | .PP | |
718 | .Vb 1 | |
719 | \& Don't make me say it again!E<gt> | |
720 | .Ve | |
721 | .PP | |
722 | (In SGMLish jargon, all Pod commands are like block-level | |
723 | elements, whereas all Pod formatting codes are like inline-level | |
724 | elements.) | |
725 | .SH "Notes on Implementing Pod Processors" | |
726 | .IX Header "Notes on Implementing Pod Processors" | |
727 | The following is a long section of miscellaneous requirements | |
728 | and suggestions to do with Pod processing. | |
729 | .IP "\(bu" 4 | |
730 | Pod formatters should tolerate lines in verbatim blocks that are of | |
731 | any length, even if that means having to break them (possibly several | |
732 | times, for very long lines) to avoid text running off the side of the | |
733 | page. Pod formatters may warn of such line\-breaking. Such warnings | |
734 | are particularly appropriate for lines are over 100 characters long, which | |
735 | are usually not intentional. | |
736 | .IP "\(bu" 4 | |
737 | Pod parsers must recognize \fIall\fR of the three well-known newline | |
738 | formats: \s-1CR\s0, \s-1LF\s0, and \s-1CRLF\s0. See perlport. | |
739 | .IP "\(bu" 4 | |
740 | Pod parsers should accept input lines that are of any length. | |
741 | .IP "\(bu" 4 | |
742 | Since Perl recognizes a Unicode Byte Order Mark at the start of files | |
743 | as signaling that the file is Unicode encoded as in \s-1UTF\-16\s0 (whether | |
744 | big-endian or little\-endian) or \s-1UTF\-8\s0, Pod parsers should do the | |
745 | same. Otherwise, the character encoding should be understood as | |
746 | being \s-1UTF\-8\s0 if the first highbit byte sequence in the file seems | |
747 | valid as a \s-1UTF\-8\s0 sequence, or otherwise as Latin\-1. | |
748 | .Sp | |
749 | Future versions of this specification may specify | |
750 | how Pod can accept other encodings. Presumably treatment of other | |
751 | encodings in Pod parsing would be as in \s-1XML\s0 parsing: whatever the | |
752 | encoding declared by a particular Pod file, content is to be | |
753 | stored in memory as Unicode characters. | |
754 | .IP "\(bu" 4 | |
755 | The well known Unicode Byte Order Marks are as follows: if the | |
756 | file begins with the two literal byte values 0xFE 0xFF, this is | |
757 | the \s-1BOM\s0 for big-endian \s-1UTF\-16\s0. If the file begins with the two | |
758 | literal byte value 0xFF 0xFE, this is the \s-1BOM\s0 for little-endian | |
759 | \&\s-1UTF\-16\s0. If the file begins with the three literal byte values | |
760 | 0xEF 0xBB 0xBF, this is the \s-1BOM\s0 for \s-1UTF\-8\s0. | |
761 | .IP "\(bu" 4 | |
762 | A naive but sufficient heuristic for testing the first highbit | |
763 | byte-sequence in a BOM-less file (whether in code or in Pod!), to see | |
764 | whether that sequence is valid as \s-1UTF\-8\s0 (\s-1RFC\s0 2279) is to check whether | |
765 | that the first byte in the sequence is in the range 0xC0 \- 0xFD | |
766 | \&\fIand\fR whether the next byte is in the range | |
767 | 0x80 \- 0xBF. If so, the parser may conclude that this file is in | |
768 | \&\s-1UTF\-8\s0, and all highbit sequences in the file should be assumed to | |
769 | be \s-1UTF\-8\s0. Otherwise the parser should treat the file as being | |
770 | in Latin\-1. In the unlikely circumstance that the first highbit | |
771 | sequence in a truly non\-UTF\-8 file happens to appear to be \s-1UTF\-8\s0, one | |
772 | can cater to our heuristic (as well as any more intelligent heuristic) | |
773 | by prefacing that line with a comment line containing a highbit | |
774 | sequence that is clearly \fInot\fR valid as \s-1UTF\-8\s0. A line consisting | |
775 | of simply \*(L"#\*(R", an e\-acute, and any non-highbit byte, | |
776 | is sufficient to establish this file's encoding. | |
777 | .IP "\(bu" 4 | |
778 | This document's requirements and suggestions about encodings | |
779 | do not apply to Pod processors running on non-ASCII platforms, | |
780 | notably \s-1EBCDIC\s0 platforms. | |
781 | .IP "\(bu" 4 | |
782 | Pod processors must treat a \*(L"=for [label] [content...]\*(R" paragraph as | |
783 | meaning the same thing as a \*(L"=begin [label]\*(R" paragraph, content, and | |
784 | an \*(L"=end [label]\*(R" paragraph. (The parser may conflate these two | |
785 | constructs, or may leave them distinct, in the expectation that the | |
786 | formatter will nevertheless treat them the same.) | |
787 | .IP "\(bu" 4 | |
788 | When rendering Pod to a format that allows comments (i.e., to nearly | |
789 | any format other than plaintext), a Pod formatter must insert comment | |
790 | text identifying its name and version number, and the name and | |
791 | version numbers of any modules it might be using to process the Pod. | |
792 | Minimal examples: | |
793 | .Sp | |
794 | .Vb 1 | |
795 | \& %% POD::Pod2PS v3.14159, using POD::Parser v1.92 | |
796 | .Ve | |
797 | .Sp | |
798 | .Vb 1 | |
799 | \& <!-- Pod::HTML v3.14159, using POD::Parser v1.92 --> | |
800 | .Ve | |
801 | .Sp | |
802 | .Vb 1 | |
803 | \& {\edoccomm generated by Pod::Tree::RTF 3.14159 using Pod::Tree 1.08} | |
804 | .Ve | |
805 | .Sp | |
806 | .Vb 1 | |
807 | \& .\e" Pod::Man version 3.14159, using POD::Parser version 1.92 | |
808 | .Ve | |
809 | .Sp | |
810 | Formatters may also insert additional comments, including: the | |
811 | release date of the Pod formatter program, the contact address for | |
812 | the author(s) of the formatter, the current time, the name of input | |
813 | file, the formatting options in effect, version of Perl used, etc. | |
814 | .Sp | |
815 | Formatters may also choose to note errors/warnings as comments, | |
816 | besides or instead of emitting them otherwise (as in messages to | |
817 | \&\s-1STDERR\s0, or \f(CW\*(C`die\*(C'\fRing). | |
818 | .IP "\(bu" 4 | |
819 | Pod parsers \fImay\fR emit warnings or error messages ("Unknown E code | |
820 | E<zslig>!") to \s-1STDERR\s0 (whether through printing to \s-1STDERR\s0, or | |
821 | \&\f(CW\*(C`warn\*(C'\fRing/\f(CW\*(C`carp\*(C'\fRing, or \f(CW\*(C`die\*(C'\fRing/\f(CW\*(C`croak\*(C'\fRing), but \fImust\fR allow | |
822 | suppressing all such \s-1STDERR\s0 output, and instead allow an option for | |
823 | reporting errors/warnings | |
824 | in some other way, whether by triggering a callback, or noting errors | |
825 | in some attribute of the document object, or some similarly unobtrusive | |
826 | mechanism \*(-- or even by appending a \*(L"Pod Errors\*(R" section to the end of | |
827 | the parsed form of the document. | |
828 | .IP "\(bu" 4 | |
829 | In cases of exceptionally aberrant documents, Pod parsers may abort the | |
830 | parse. Even then, using \f(CW\*(C`die\*(C'\fRing/\f(CW\*(C`croak\*(C'\fRing is to be avoided; where | |
831 | possible, the parser library may simply close the input file | |
832 | and add text like \*(L"*** Formatting Aborted ***\*(R" to the end of the | |
833 | (partial) in-memory document. | |
834 | .IP "\(bu" 4 | |
835 | In paragraphs where formatting codes (like E<...>, B<...>) | |
836 | are understood (i.e., \fInot\fR verbatim paragraphs, but \fIincluding\fR | |
837 | ordinary paragraphs, and command paragraphs that produce renderable | |
838 | text, like \*(L"=head1\*(R"), literal whitespace should generally be considered | |
839 | \&\*(L"insignificant\*(R", in that one literal space has the same meaning as any | |
840 | (nonzero) number of literal spaces, literal newlines, and literal tabs | |
841 | (as long as this produces no blank lines, since those would terminate | |
842 | the paragraph). Pod parsers should compact literal whitespace in each | |
843 | processed paragraph, but may provide an option for overriding this | |
844 | (since some processing tasks do not require it), or may follow | |
845 | additional special rules (for example, specially treating | |
846 | period-space-space or period-newline sequences). | |
847 | .IP "\(bu" 4 | |
848 | Pod parsers should not, by default, try to coerce apostrophe (') and | |
849 | quote (\*(L") into smart quotes (little 9's, 66's, 99's, etc), nor try to | |
850 | turn backtick (`) into anything else but a single backtick character | |
851 | (distinct from an openquote character!), nor \*(R"\-\-" into anything but | |
852 | two minus signs. They \fImust never\fR do any of those things to text | |
853 | in C<...> formatting codes, and never \fIever\fR to text in verbatim | |
854 | paragraphs. | |
855 | .IP "\(bu" 4 | |
856 | When rendering Pod to a format that has two kinds of hyphens (\-), one | |
857 | that's a non-breaking hyphen, and another that's a breakable hyphen | |
858 | (as in \*(L"object\-oriented\*(R", which can be split across lines as | |
859 | \&\*(L"object\-\*(R", newline, \*(L"oriented\*(R"), formatters are encouraged to | |
860 | generally translate \*(L"\-\*(R" to non-breaking hyphen, but may apply | |
861 | heuristics to convert some of these to breaking hyphens. | |
862 | .IP "\(bu" 4 | |
863 | Pod formatters should make reasonable efforts to keep words of Perl | |
864 | code from being broken across lines. For example, \*(L"Foo::Bar\*(R" in some | |
865 | formatting systems is seen as eligible for being broken across lines | |
866 | as \*(L"Foo::\*(R" newline \*(L"Bar\*(R" or even \*(L"Foo::\-\*(R" newline \*(L"Bar\*(R". This should | |
867 | be avoided where possible, either by disabling all line-breaking in | |
868 | mid\-word, or by wrapping particular words with internal punctuation | |
869 | in \*(L"don't break this across lines\*(R" codes (which in some formats may | |
870 | not be a single code, but might be a matter of inserting non-breaking | |
871 | zero-width spaces between every pair of characters in a word.) | |
872 | .IP "\(bu" 4 | |
873 | Pod parsers should, by default, expand tabs in verbatim paragraphs as | |
874 | they are processed, before passing them to the formatter or other | |
875 | processor. Parsers may also allow an option for overriding this. | |
876 | .IP "\(bu" 4 | |
877 | Pod parsers should, by default, remove newlines from the end of | |
878 | ordinary and verbatim paragraphs before passing them to the | |
879 | formatter. For example, while the paragraph you're reading now | |
880 | could be considered, in Pod source, to end with (and contain) | |
881 | the newline(s) that end it, it should be processed as ending with | |
882 | (and containing) the period character that ends this sentence. | |
883 | .IP "\(bu" 4 | |
884 | Pod parsers, when reporting errors, should make some effort to report | |
885 | an approximate line number ("Nested E<>'s in Paragraph #52, near | |
886 | line 633 of Thing/Foo.pm!\*(L"), instead of merely noting the paragraph | |
887 | number (\*(R"Nested E<>'s in Paragraph #52 of Thing/Foo.pm!\*(L"). Where | |
888 | this is problematic, the paragraph number should at least be | |
889 | accompanied by an excerpt from the paragraph (\*(R"Nested E<>'s in | |
890 | Paragraph #52 of Thing/Foo.pm, which begins 'Read/write accessor for | |
891 | the C<interest rate> attribute...'"). | |
892 | .IP "\(bu" 4 | |
893 | Pod parsers, when processing a series of verbatim paragraphs one | |
894 | after another, should consider them to be one large verbatim | |
895 | paragraph that happens to contain blank lines. I.e., these two | |
896 | lines, which have a blank line between them: | |
897 | .Sp | |
898 | .Vb 1 | |
899 | \& use Foo; | |
900 | .Ve | |
901 | .Sp | |
902 | .Vb 1 | |
903 | \& print Foo->VERSION | |
904 | .Ve | |
905 | .Sp | |
906 | should be unified into one paragraph (\*(L"\etuse Foo;\en\en\etprint | |
907 | Foo\->\s-1VERSION\s0\*(R") before being passed to the formatter or other | |
908 | processor. Parsers may also allow an option for overriding this. | |
909 | .Sp | |
910 | While this might be too cumbersome to implement in event-based Pod | |
911 | parsers, it is straightforward for parsers that return parse trees. | |
912 | .IP "\(bu" 4 | |
913 | Pod formatters, where feasible, are advised to avoid splitting short | |
914 | verbatim paragraphs (under twelve lines, say) across pages. | |
915 | .IP "\(bu" 4 | |
916 | Pod parsers must treat a line with only spaces and/or tabs on it as a | |
917 | \&\*(L"blank line\*(R" such as separates paragraphs. (Some older parsers | |
918 | recognized only two adjacent newlines as a \*(L"blank line\*(R" but would not | |
919 | recognize a newline, a space, and a newline, as a blank line. This | |
920 | is noncompliant behavior.) | |
921 | .IP "\(bu" 4 | |
922 | Authors of Pod formatters/processors should make every effort to | |
923 | avoid writing their own Pod parser. There are already several in | |
924 | \&\s-1CPAN\s0, with a wide range of interface styles \*(-- and one of them, | |
925 | Pod::Parser, comes with modern versions of Perl. | |
926 | .IP "\(bu" 4 | |
927 | Characters in Pod documents may be conveyed either as literals, or by | |
928 | number in E<n> codes, or by an equivalent mnemonic, as in | |
929 | E<eacute> which is exactly equivalent to E<233>. | |
930 | .Sp | |
931 | Characters in the range 32\-126 refer to those well known US-ASCII | |
932 | characters (also defined there by Unicode, with the same meaning), | |
933 | which all Pod formatters must render faithfully. Characters | |
934 | in the ranges 0\-31 and 127\-159 should not be used (neither as | |
935 | literals, nor as E<number> codes), except for the | |
936 | literal byte-sequences for newline (13, 13 10, or 10), and tab (9). | |
937 | .Sp | |
938 | Characters in the range 160\-255 refer to Latin\-1 characters (also | |
939 | defined there by Unicode, with the same meaning). Characters above | |
940 | 255 should be understood to refer to Unicode characters. | |
941 | .IP "\(bu" 4 | |
942 | Be warned | |
943 | that some formatters cannot reliably render characters outside 32\-126; | |
944 | and many are able to handle 32\-126 and 160\-255, but nothing above | |
945 | 255. | |
946 | .IP "\(bu" 4 | |
947 | Besides the well-known "E<lt>\*(L" and \*(R"E<gt>\*(L" codes for | |
948 | less-than and greater\-than, Pod parsers must understand \*(R"E<sol>\*(L" | |
949 | for \*(R"/\*(L" (solidus, slash), and \*(R"E<verbar>\*(L" for \*(R"|\*(L" (vertical bar, | |
950 | pipe). Pod parsers should also understand \*(R"E<lchevron>\*(L" and | |
951 | \&\*(R"E<rchevron>\*(L" as legacy codes for characters 171 and 187, i.e., | |
952 | \&\*(R"left\-pointing double angle quotation mark\*(L" = \*(R"left pointing | |
953 | guillemet\*(L" and \*(R"right\-pointing double angle quotation mark\*(L" = \*(R"right | |
954 | pointing guillemet\*(L". (These look like little \*(R"<<\*(L" and \*(R">>\*(L", and they | |
955 | are now preferably expressed with the \s-1HTML/XHTML\s0 codes \*(R"E<laquo>\*(L" | |
956 | and \*(R"E<raquo>".) | |
957 | .IP "\(bu" 4 | |
958 | Pod parsers should understand all "E<html>" codes as defined | |
959 | in the entity declarations in the most recent \s-1XHTML\s0 specification at | |
960 | \&\f(CW\*(C`www.W3.org\*(C'\fR. Pod parsers must understand at least the entities | |
961 | that define characters in the range 160\-255 (Latin\-1). Pod parsers, | |
962 | when faced with some unknown "E<\fIidentifier\fR>" code, | |
963 | shouldn't simply replace it with nullstring (by default, at least), | |
964 | but may pass it through as a string consisting of the literal characters | |
965 | E, less\-than, \fIidentifier\fR, greater\-than. Or Pod parsers may offer the | |
966 | alternative option of processing such unknown | |
967 | "E<\fIidentifier\fR>\*(L" codes by firing an event especially | |
968 | for such codes, or by adding a special node-type to the in-memory | |
969 | document tree. Such \*(R"E<\fIidentifier\fR>" may have special meaning | |
970 | to some processors, or some processors may choose to add them to | |
971 | a special error report. | |
972 | .IP "\(bu" 4 | |
973 | Pod parsers must also support the \s-1XHTML\s0 codes "E<quot>\*(L" for | |
974 | character 34 (doublequote, \*(R"), "E<amp>\*(L" for character 38 | |
975 | (ampersand, &), and \*(R"E<apos>" for character 39 (apostrophe, '). | |
976 | .IP "\(bu" 4 | |
977 | Note that in all cases of "E<whatever>", \fIwhatever\fR (whether | |
978 | an htmlname, or a number in any base) must consist only of | |
979 | alphanumeric characters \*(-- that is, \fIwhatever\fR must watch | |
980 | \&\f(CW\*(C`m/\eA\ew+\ez/\*(C'\fR. So "E< 0 1 2 3 >" is invalid, because | |
981 | it contains spaces, which aren't alphanumeric characters. This | |
982 | presumably does not \fIneed\fR special treatment by a Pod processor; | |
983 | \&\*(L" 0 1 2 3 \*(R" doesn't look like a number in any base, so it would | |
984 | presumably be looked up in the table of HTML-like names. Since | |
985 | there isn't (and cannot be) an HTML-like entity called \*(L" 0 1 2 3 \*(R", | |
986 | this will be treated as an error. However, Pod processors may | |
987 | treat "E< 0 1 2 3 >\*(L" or \*(R"E<e\-acute>" as \fIsyntactically\fR | |
988 | invalid, potentially earning a different error message than the | |
989 | error message (or warning, or event) generated by a merely unknown | |
990 | (but theoretically valid) htmlname, as in "E<qacute>" | |
991 | [sic]. However, Pod parsers are not required to make this | |
992 | distinction. | |
993 | .IP "\(bu" 4 | |
994 | Note that E<number> \fImust not\fR be interpreted as simply | |
995 | "codepoint \fInumber\fR in the current/native character set\*(L". It always | |
996 | means only \*(R"the character represented by codepoint \fInumber\fR in | |
997 | Unicode." (This is identical to the semantics of &#\fInumber\fR; in \s-1XML\s0.) | |
998 | .Sp | |
999 | This will likely require many formatters to have tables mapping from | |
1000 | treatable Unicode codepoints (such as the \*(L"\exE9\*(R" for the e\-acute | |
1001 | character) to the escape sequences or codes necessary for conveying | |
1002 | such sequences in the target output format. A converter to *roff | |
1003 | would, for example know that \*(L"\exE9\*(R" (whether conveyed literally, or via | |
1004 | a E<...> sequence) is to be conveyed as \*(L"e\e\e*'\*(R". | |
1005 | Similarly, a program rendering Pod in a Mac \s-1OS\s0 application window, would | |
1006 | presumably need to know that \*(L"\exE9\*(R" maps to codepoint 142 in MacRoman | |
1007 | encoding that (at time of writing) is native for Mac \s-1OS\s0. Such | |
1008 | Unicode2whatever mappings are presumably already widely available for | |
1009 | common output formats. (Such mappings may be incomplete! Implementers | |
1010 | are not expected to bend over backwards in an attempt to render | |
1011 | Cherokee syllabics, Etruscan runes, Byzantine musical symbols, or any | |
1012 | of the other weird things that Unicode can encode.) And | |
1013 | if a Pod document uses a character not found in such a mapping, the | |
1014 | formatter should consider it an unrenderable character. | |
1015 | .IP "\(bu" 4 | |
1016 | If, surprisingly, the implementor of a Pod formatter can't find a | |
1017 | satisfactory pre-existing table mapping from Unicode characters to | |
1018 | escapes in the target format (e.g., a decent table of Unicode | |
1019 | characters to *roff escapes), it will be necessary to build such a | |
1020 | table. If you are in this circumstance, you should begin with the | |
1021 | characters in the range 0x00A0 \- 0x00FF, which is mostly the heavily | |
1022 | used accented characters. Then proceed (as patience permits and | |
1023 | fastidiousness compels) through the characters that the (X)HTML | |
1024 | standards groups judged important enough to merit mnemonics | |
1025 | for. These are declared in the (X)HTML specifications at the | |
1026 | www.W3.org site. At time of writing (September 2001), the most recent | |
1027 | entity declaration files are: | |
1028 | .Sp | |
1029 | .Vb 3 | |
1030 | \& http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent | |
1031 | \& http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent | |
1032 | \& http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent | |
1033 | .Ve | |
1034 | .Sp | |
1035 | Then you can progress through any remaining notable Unicode characters | |
1036 | in the range 0x2000\-0x204D (consult the character tables at | |
1037 | www.unicode.org), and whatever else strikes your fancy. For example, | |
1038 | in \fIxhtml\-symbol.ent\fR, there is the entry: | |
1039 | .Sp | |
1040 | .Vb 1 | |
1041 | \& <!ENTITY infin "∞"> <!-- infinity, U+221E ISOtech --> | |
1042 | .Ve | |
1043 | .Sp | |
1044 | While the mapping \*(L"infin\*(R" to the character \*(L"\ex{221E}\*(R" will (hopefully) | |
1045 | have been already handled by the Pod parser, the presence of the | |
1046 | character in this file means that it's reasonably important enough to | |
1047 | include in a formatter's table that maps from notable Unicode characters | |
1048 | to the codes necessary for rendering them. So for a Unicode\-to\-*roff | |
1049 | mapping, for example, this would merit the entry: | |
1050 | .Sp | |
1051 | .Vb 1 | |
1052 | \& "\ex{221E}" => '\e(in', | |
1053 | .Ve | |
1054 | .Sp | |
1055 | It is eagerly hoped that in the future, increasing numbers of formats | |
1056 | (and formatters) will support Unicode characters directly (as (X)HTML | |
1057 | does with \f(CW\*(C`∞\*(C'\fR, \f(CW\*(C`∞\*(C'\fR, or \f(CW\*(C`∞\*(C'\fR), reducing the need | |
1058 | for idiosyncratic mappings of Unicode\-to\-\fImy_escapes\fR. | |
1059 | .IP "\(bu" 4 | |
1060 | It is up to individual Pod formatter to display good judgment when | |
1061 | confronted with an unrenderable character (which is distinct from an | |
1062 | unknown E<thing> sequence that the parser couldn't resolve to | |
1063 | anything, renderable or not). It is good practice to map Latin letters | |
1064 | with diacritics (like "E<eacute>\*(L"/\*(R"E<233>\*(L") to the corresponding | |
1065 | unaccented US-ASCII letters (like a simple character 101, \*(R"e\*(L"), but | |
1066 | clearly this is often not feasible, and an unrenderable character may | |
1067 | be represented as \*(R"?", or the like. In attempting a sane fallback | |
1068 | (as from E<233> to \*(L"e\*(R"), Pod formatters may use the | |
1069 | \&\f(CW%Latin1Code_to_fallback\fR table in Pod::Escapes, or | |
1070 | Text::Unidecode, if available. | |
1071 | .Sp | |
1072 | For example, this Pod text: | |
1073 | .Sp | |
1074 | .Vb 1 | |
1075 | \& magic is enabled if you set C<$Currency> to 'E<euro>'. | |
1076 | .Ve | |
1077 | .Sp | |
1078 | may be rendered as: | |
1079 | "magic is enabled if you set \f(CW$Currency\fR to '\fI?\fR'\*(L" or as | |
1080 | \&\*(R"magic is enabled if you set \f(CW$Currency\fR to '\fB[euro]\fR'\*(L", or as | |
1081 | \&\*(R"magic is enabled if you set \f(CW$Currency\fR to '[x20AC]', etc. | |
1082 | .Sp | |
1083 | A Pod formatter may also note, in a comment or warning, a list of what | |
1084 | unrenderable characters were encountered. | |
1085 | .IP "\(bu" 4 | |
1086 | E<...> may freely appear in any formatting code (other than | |
1087 | in another E<...> or in an Z<>). That is, "X<The | |
1088 | E<euro>1,000,000 Solution>\*(L" is valid, as is \*(R"L<The | |
1089 | E<euro>1,000,000 Solution|Million::Euros>". | |
1090 | .IP "\(bu" 4 | |
1091 | Some Pod formatters output to formats that implement non-breaking | |
1092 | spaces as an individual character (which I'll call \*(L"\s-1NBSP\s0\*(R"), and | |
1093 | others output to formats that implement non-breaking spaces just as | |
1094 | spaces wrapped in a \*(L"don't break this across lines\*(R" code. Note that | |
1095 | at the level of Pod, both sorts of codes can occur: Pod can contain a | |
1096 | \&\s-1NBSP\s0 character (whether as a literal, or as a "E<160>\*(L" or | |
1097 | \&\*(R"E<nbsp>\*(L" code); and Pod can contain \*(R"S<foo | |
1098 | I<bar> baz>\*(L" codes, where \*(R"mere spaces\*(L" (character 32) in | |
1099 | such codes are taken to represent non-breaking spaces. Pod | |
1100 | parsers should consider supporting the optional parsing of \*(R"S<foo | |
1101 | I<bar> baz>\*(L" as if it were | |
1102 | \&\*(R"foo\fI\s-1NBSP\s0\fRI<bar>\fI\s-1NBSP\s0\fRbaz", and, going the other way, the | |
1103 | optional parsing of groups of words joined by \s-1NBSP\s0's as if each group | |
1104 | were in a S<...> code, so that formatters may use the | |
1105 | representation that maps best to what the output format demands. | |
1106 | .IP "\(bu" 4 | |
1107 | Some processors may find that the \f(CW\*(C`S<...>\*(C'\fR code is easiest to | |
1108 | implement by replacing each space in the parse tree under the content | |
1109 | of the S, with an \s-1NBSP\s0. But note: the replacement should apply \fInot\fR to | |
1110 | spaces in \fIall\fR text, but \fIonly\fR to spaces in \fIprintable\fR text. (This | |
1111 | distinction may or may not be evident in the particular tree/event | |
1112 | model implemented by the Pod parser.) For example, consider this | |
1113 | unusual case: | |
1114 | .Sp | |
1115 | .Vb 1 | |
1116 | \& S<L</Autoloaded Functions>> | |
1117 | .Ve | |
1118 | .Sp | |
1119 | This means that the space in the middle of the visible link text must | |
1120 | not be broken across lines. In other words, it's the same as this: | |
1121 | .Sp | |
1122 | .Vb 1 | |
1123 | \& L<"AutoloadedE<160>Functions"/Autoloaded Functions> | |
1124 | .Ve | |
1125 | .Sp | |
1126 | However, a misapplied space-to-NBSP replacement could (wrongly) | |
1127 | produce something equivalent to this: | |
1128 | .Sp | |
1129 | .Vb 1 | |
1130 | \& L<"AutoloadedE<160>Functions"/AutoloadedE<160>Functions> | |
1131 | .Ve | |
1132 | .Sp | |
1133 | \&...which is almost definitely not going to work as a hyperlink (assuming | |
1134 | this formatter outputs a format supporting hypertext). | |
1135 | .Sp | |
1136 | Formatters may choose to just not support the S format code, | |
1137 | especially in cases where the output format simply has no \s-1NBSP\s0 | |
1138 | character/code and no code for \*(L"don't break this stuff across lines\*(R". | |
1139 | .IP "\(bu" 4 | |
1140 | Besides the \s-1NBSP\s0 character discussed above, implementors are reminded | |
1141 | of the existence of the other \*(L"special\*(R" character in Latin\-1, the | |
1142 | \&\*(L"soft hyphen\*(R" character, also known as \*(L"discretionary hyphen\*(R", | |
1143 | i.e. \f(CW\*(C`E<173>\*(C'\fR = \f(CW\*(C`E<0xAD>\*(C'\fR = | |
1144 | \&\f(CW\*(C`E<shy>\*(C'\fR). This character expresses an optional hyphenation | |
1145 | point. That is, it normally renders as nothing, but may render as a | |
1146 | \&\*(L"\-\*(R" if a formatter breaks the word at that point. Pod formatters | |
1147 | should, as appropriate, do one of the following: 1) render this with | |
1148 | a code with the same meaning (e.g., \*(L"\e\-\*(R" in \s-1RTF\s0), 2) pass it through | |
1149 | in the expectation that the formatter understands this character as | |
1150 | such, or 3) delete it. | |
1151 | .Sp | |
1152 | For example: | |
1153 | .Sp | |
1154 | .Vb 3 | |
1155 | \& sigE<shy>action | |
1156 | \& manuE<shy>script | |
1157 | \& JarkE<shy>ko HieE<shy>taE<shy>nieE<shy>mi | |
1158 | .Ve | |
1159 | .Sp | |
1160 | These signal to a formatter that if it is to hyphenate \*(L"sigaction\*(R" | |
1161 | or \*(L"manuscript\*(R", then it should be done as | |
1162 | "sig\-\fI[linebreak]\fRaction\*(L" or \*(R"manu\-\fI[linebreak]\fRscript" | |
1163 | (and if it doesn't hyphenate it, then the \f(CW\*(C`E<shy>\*(C'\fR doesn't | |
1164 | show up at all). And if it is | |
1165 | to hyphenate \*(L"Jarkko\*(R" and/or \*(L"Hietaniemi\*(R", it can do | |
1166 | so only at the points where there is a \f(CW\*(C`E<shy>\*(C'\fR code. | |
1167 | .Sp | |
1168 | In practice, it is anticipated that this character will not be used | |
1169 | often, but formatters should either support it, or delete it. | |
1170 | .IP "\(bu" 4 | |
1171 | If you think that you want to add a new command to Pod (like, say, a | |
1172 | \&\*(L"=biblio\*(R" command), consider whether you could get the same | |
1173 | effect with a for or begin/end sequence: \*(L"=for biblio ...\*(R" or \*(L"=begin | |
1174 | biblio\*(R" ... \*(L"=end biblio\*(R". Pod processors that don't understand | |
1175 | \&\*(L"=for biblio\*(R", etc, will simply ignore it, whereas they may complain | |
1176 | loudly if they see \*(L"=biblio\*(R". | |
1177 | .IP "\(bu" 4 | |
1178 | Throughout this document, \*(L"Pod\*(R" has been the preferred spelling for | |
1179 | the name of the documentation format. One may also use \*(L"\s-1POD\s0\*(R" or | |
1180 | \&\*(L"pod\*(R". For the documentation that is (typically) in the Pod | |
1181 | format, you may use \*(L"pod\*(R", or \*(L"Pod\*(R", or \*(L"\s-1POD\s0\*(R". Understanding these | |
1182 | distinctions is useful; but obsessing over how to spell them, usually | |
1183 | is not. | |
1184 | .SH "About L<...> Codes" | |
1185 | .IX Header "About L<...> Codes" | |
1186 | As you can tell from a glance at perlpod, the L<...> | |
1187 | code is the most complex of the Pod formatting codes. The points below | |
1188 | will hopefully clarify what it means and how processors should deal | |
1189 | with it. | |
1190 | .IP "\(bu" 4 | |
1191 | In parsing an L<...> code, Pod parsers must distinguish at least | |
1192 | four attributes: | |
1193 | .RS 4 | |
1194 | .IP "First:" 4 | |
1195 | .IX Item "First:" | |
1196 | The link\-text. If there is none, this must be undef. (E.g., in | |
1197 | "L<Perl Functions|perlfunc>\*(L", the link-text is \*(R"Perl Functions\*(L". | |
1198 | In \*(R"L<Time::HiRes>\*(L" and even \*(R"L<|Time::HiRes>", there is no | |
1199 | link text. Note that link text may contain formatting.) | |
1200 | .IP "Second:" 4 | |
1201 | .IX Item "Second:" | |
1202 | The possibly inferred link-text \*(-- i.e., if there was no real link | |
1203 | text, then this is the text that we'll infer in its place. (E.g., for | |
1204 | "L<Getopt::Std>\*(L", the inferred link text is \*(R"Getopt::Std".) | |
1205 | .IP "Third:" 4 | |
1206 | .IX Item "Third:" | |
1207 | The name or \s-1URL\s0, or undef if none. (E.g., in "L<Perl | |
1208 | Functions|perlfunc>\*(L", the name \*(-- also sometimes called the page \*(-- | |
1209 | is \*(R"perlfunc\*(L". In \*(R"L</CAVEATS>", the name is undef.) | |
1210 | .IP "Fourth:" 4 | |
1211 | .IX Item "Fourth:" | |
1212 | The section (\s-1AKA\s0 \*(L"item\*(R" in older perlpods), or undef if none. E.g., | |
1213 | in \*(L"\s-1DESCRIPTION\s0\*(R" in Getopt::Std, \*(L"\s-1DESCRIPTION\s0\*(R" is the section. (Note | |
1214 | that this is not the same as a manpage section like the \*(L"5\*(R" in \*(L"man 5 | |
1215 | crontab\*(R". \*(L"Section Foo\*(R" in the Pod sense means the part of the text | |
1216 | that's introduced by the heading or item whose text is \*(L"Foo\*(R".) | |
1217 | .RE | |
1218 | .RS 4 | |
1219 | .Sp | |
1220 | Pod parsers may also note additional attributes including: | |
1221 | .IP "Fifth:" 4 | |
1222 | .IX Item "Fifth:" | |
1223 | A flag for whether item 3 (if present) is a \s-1URL\s0 (like | |
1224 | \&\*(L"http://lists.perl.org\*(R" is), in which case there should be no section | |
1225 | attribute; a Pod name (like \*(L"perldoc\*(R" and \*(L"Getopt::Std\*(R" are); or | |
1226 | possibly a man page name (like \*(L"\fIcrontab\fR\|(5)\*(R" is). | |
1227 | .IP "Sixth:" 4 | |
1228 | .IX Item "Sixth:" | |
1229 | The raw original L<...> content, before text is split on | |
1230 | \&\*(L"|\*(R", \*(L"/\*(R", etc, and before E<...> codes are expanded. | |
1231 | .RE | |
1232 | .RS 4 | |
1233 | .Sp | |
1234 | (The above were numbered only for concise reference below. It is not | |
1235 | a requirement that these be passed as an actual list or array.) | |
1236 | .Sp | |
1237 | For example: | |
1238 | .Sp | |
1239 | .Vb 7 | |
1240 | \& L<Foo::Bar> | |
1241 | \& => undef, # link text | |
1242 | \& "Foo::Bar", # possibly inferred link text | |
1243 | \& "Foo::Bar", # name | |
1244 | \& undef, # section | |
1245 | \& 'pod', # what sort of link | |
1246 | \& "Foo::Bar" # original content | |
1247 | .Ve | |
1248 | .Sp | |
1249 | .Vb 7 | |
1250 | \& L<Perlport's section on NL's|perlport/Newlines> | |
1251 | \& => "Perlport's section on NL's", # link text | |
1252 | \& "Perlport's section on NL's", # possibly inferred link text | |
1253 | \& "perlport", # name | |
1254 | \& "Newlines", # section | |
1255 | \& 'pod', # what sort of link | |
1256 | \& "Perlport's section on NL's|perlport/Newlines" # orig. content | |
1257 | .Ve | |
1258 | .Sp | |
1259 | .Vb 7 | |
1260 | \& L<perlport/Newlines> | |
1261 | \& => undef, # link text | |
1262 | \& '"Newlines" in perlport', # possibly inferred link text | |
1263 | \& "perlport", # name | |
1264 | \& "Newlines", # section | |
1265 | \& 'pod', # what sort of link | |
1266 | \& "perlport/Newlines" # original content | |
1267 | .Ve | |
1268 | .Sp | |
1269 | .Vb 7 | |
1270 | \& L<crontab(5)/"DESCRIPTION"> | |
1271 | \& => undef, # link text | |
1272 | \& '"DESCRIPTION" in crontab(5)', # possibly inferred link text | |
1273 | \& "crontab(5)", # name | |
1274 | \& "DESCRIPTION", # section | |
1275 | \& 'man', # what sort of link | |
1276 | \& 'crontab(5)/"DESCRIPTION"' # original content | |
1277 | .Ve | |
1278 | .Sp | |
1279 | .Vb 7 | |
1280 | \& L</Object Attributes> | |
1281 | \& => undef, # link text | |
1282 | \& '"Object Attributes"', # possibly inferred link text | |
1283 | \& undef, # name | |
1284 | \& "Object Attributes", # section | |
1285 | \& 'pod', # what sort of link | |
1286 | \& "/Object Attributes" # original content | |
1287 | .Ve | |
1288 | .Sp | |
1289 | .Vb 7 | |
1290 | \& L<http://www.perl.org/> | |
1291 | \& => undef, # link text | |
1292 | \& "http://www.perl.org/", # possibly inferred link text | |
1293 | \& "http://www.perl.org/", # name | |
1294 | \& undef, # section | |
1295 | \& 'url', # what sort of link | |
1296 | \& "http://www.perl.org/" # original content | |
1297 | .Ve | |
1298 | .Sp | |
1299 | Note that you can distinguish URL-links from anything else by the | |
1300 | fact that they match \f(CW\*(C`m/\eA\ew+:[^:\es]\eS*\ez/\*(C'\fR. So | |
1301 | \&\f(CW\*(C`L<http://www.perl.com>\*(C'\fR is a \s-1URL\s0, but | |
1302 | \&\f(CW\*(C`L<HTTP::Response>\*(C'\fR isn't. | |
1303 | .RE | |
1304 | .IP "\(bu" 4 | |
1305 | In case of L<...> codes with no \*(L"text|\*(R" part in them, | |
1306 | older formatters have exhibited great variation in actually displaying | |
1307 | the link or cross reference. For example, L<\fIcrontab\fR\|(5)> would render | |
1308 | as "the \f(CWcrontab(5)\fR manpage\*(L", or \*(R"in the \f(CWcrontab(5)\fR manpage\*(L" | |
1309 | or just \*(R"\f(CWcrontab(5)\fR". | |
1310 | .Sp | |
1311 | Pod processors must now treat \*(L"text|\*(R"\-less links as follows: | |
1312 | .Sp | |
1313 | .Vb 3 | |
1314 | \& L<name> => L<name|name> | |
1315 | \& L</section> => L<"section"|/section> | |
1316 | \& L<name/section> => L<"section" in name|name/section> | |
1317 | .Ve | |
1318 | .IP "\(bu" 4 | |
1319 | Note that section names might contain markup. I.e., if a section | |
1320 | starts with: | |
1321 | .Sp | |
1322 | .Vb 1 | |
1323 | \& =head2 About the C<-M> Operator | |
1324 | .Ve | |
1325 | .Sp | |
1326 | or with: | |
1327 | .Sp | |
1328 | .Vb 1 | |
1329 | \& =item About the C<-M> Operator | |
1330 | .Ve | |
1331 | .Sp | |
1332 | then a link to it would look like this: | |
1333 | .Sp | |
1334 | .Vb 1 | |
1335 | \& L<somedoc/About the C<-M> Operator> | |
1336 | .Ve | |
1337 | .Sp | |
1338 | Formatters may choose to ignore the markup for purposes of resolving | |
1339 | the link and use only the renderable characters in the section name, | |
1340 | as in: | |
1341 | .Sp | |
1342 | .Vb 2 | |
1343 | \& <h1><a name="About_the_-M_Operator">About the <code>-M</code> | |
1344 | \& Operator</h1> | |
1345 | .Ve | |
1346 | .Sp | |
1347 | .Vb 1 | |
1348 | \& ... | |
1349 | .Ve | |
1350 | .Sp | |
1351 | .Vb 2 | |
1352 | \& <a href="somedoc#About_the_-M_Operator">About the <code>-M</code> | |
1353 | \& Operator" in somedoc</a> | |
1354 | .Ve | |
1355 | .IP "\(bu" 4 | |
1356 | Previous versions of perlpod distinguished \f(CW\*(C`L<name/"section">\*(C'\fR | |
1357 | links from \f(CW\*(C`L<name/item>\*(C'\fR links (and their targets). These | |
1358 | have been merged syntactically and semantically in the current | |
1359 | specification, and \fIsection\fR can refer either to a "=head\fIn\fR Heading | |
1360 | Content\*(L" command or to a \*(R"=item Item Content" command. This | |
1361 | specification does not specify what behavior should be in the case | |
1362 | of a given document having several things all seeming to produce the | |
1363 | same \fIsection\fR identifier (e.g., in \s-1HTML\s0, several things all producing | |
1364 | the same \fIanchorname\fR in <a name="\fIanchorname\fR">...</a> | |
1365 | elements). Where Pod processors can control this behavior, they should | |
1366 | use the first such anchor. That is, \f(CW\*(C`L<Foo/Bar>\*(C'\fR refers to the | |
1367 | \&\fIfirst\fR \*(L"Bar\*(R" section in Foo. | |
1368 | .Sp | |
1369 | But for some processors/formats this cannot be easily controlled; as | |
1370 | with the \s-1HTML\s0 example, the behavior of multiple ambiguous | |
1371 | <a name="\fIanchorname\fR">...</a> is most easily just left up to | |
1372 | browsers to decide. | |
1373 | .IP "\(bu" 4 | |
1374 | Authors wanting to link to a particular (absolute) \s-1URL\s0, must do so | |
1375 | only with "L<scheme:...>" codes (like | |
1376 | L<http://www.perl.org>), and must not attempt "L<Some Site | |
1377 | Name|scheme:...>" codes. This restriction avoids many problems | |
1378 | in parsing and rendering L<...> codes. | |
1379 | .IP "\(bu" 4 | |
1380 | In a \f(CW\*(C`L<text|...>\*(C'\fR code, text may contain formatting codes | |
1381 | for formatting or for E<...> escapes, as in: | |
1382 | .Sp | |
1383 | .Vb 1 | |
1384 | \& L<B<ummE<234>stuff>|...> | |
1385 | .Ve | |
1386 | .Sp | |
1387 | For \f(CW\*(C`L<...>\*(C'\fR codes without a \*(L"name|\*(R" part, only | |
1388 | \&\f(CW\*(C`E<...>\*(C'\fR and \f(CW\*(C`Z<>\*(C'\fR codes may occur \*(-- no | |
1389 | other formatting codes. That is, authors should not use | |
1390 | "\f(CW\*(C`L<B<Foo::Bar>>\*(C'\fR". | |
1391 | .Sp | |
1392 | Note, however, that formatting codes and Z<>'s can occur in any | |
1393 | and all parts of an L<...> (i.e., in \fIname\fR, \fIsection\fR, \fItext\fR, | |
1394 | and \fIurl\fR). | |
1395 | .Sp | |
1396 | Authors must not nest L<...> codes. For example, "L<The | |
1397 | L<Foo::Bar> man page>" should be treated as an error. | |
1398 | .IP "\(bu" 4 | |
1399 | Note that Pod authors may use formatting codes inside the \*(L"text\*(R" | |
1400 | part of "L<text|name>" (and so on for L<text|/\*(L"sec\*(R">). | |
1401 | .Sp | |
1402 | In other words, this is valid: | |
1403 | .Sp | |
1404 | .Vb 1 | |
1405 | \& Go read L<the docs on C<$.>|perlvar/"$."> | |
1406 | .Ve | |
1407 | .Sp | |
1408 | Some output formats that do allow rendering "L<...>" codes as | |
1409 | hypertext, might not allow the link-text to be formatted; in | |
1410 | that case, formatters will have to just ignore that formatting. | |
1411 | .IP "\(bu" 4 | |
1412 | At time of writing, \f(CW\*(C`L<name>\*(C'\fR values are of two types: | |
1413 | either the name of a Pod page like \f(CW\*(C`L<Foo::Bar>\*(C'\fR (which | |
1414 | might be a real Perl module or program in an \f(CW@INC\fR / \s-1PATH\s0 | |
1415 | directory, or a .pod file in those places); or the name of a \s-1UNIX\s0 | |
1416 | man page, like \f(CW\*(C`L<crontab(5)>\*(C'\fR. In theory, \f(CW\*(C`L<chmod>\*(C'\fR | |
1417 | in ambiguous between a Pod page called \*(L"chmod\*(R", or the Unix man page | |
1418 | \&\*(L"chmod\*(R" (in whatever man\-section). However, the presence of a string | |
1419 | in parens, as in \*(L"\fIcrontab\fR\|(5)\*(R", is sufficient to signal that what | |
1420 | is being discussed is not a Pod page, and so is presumably a | |
1421 | \&\s-1UNIX\s0 man page. The distinction is of no importance to many | |
1422 | Pod processors, but some processors that render to hypertext formats | |
1423 | may need to distinguish them in order to know how to render a | |
1424 | given \f(CW\*(C`L<foo>\*(C'\fR code. | |
1425 | .IP "\(bu" 4 | |
1426 | Previous versions of perlpod allowed for a \f(CW\*(C`L<section>\*(C'\fR syntax | |
1427 | (as in "\f(CW\*(C`L<Object Attributes>\*(C'\fR"), which was not easily distinguishable | |
1428 | from \f(CW\*(C`L<name>\*(C'\fR syntax. This syntax is no longer in the | |
1429 | specification, and has been replaced by the \f(CW\*(C`L<"section">\*(C'\fR syntax | |
1430 | (where the quotes were formerly optional). Pod parsers should tolerate | |
1431 | the \f(CW\*(C`L<section>\*(C'\fR syntax, for a while at least. The suggested | |
1432 | heuristic for distinguishing \f(CW\*(C`L<section>\*(C'\fR from \f(CW\*(C`L<name>\*(C'\fR | |
1433 | is that if it contains any whitespace, it's a \fIsection\fR. Pod processors | |
1434 | may warn about this being deprecated syntax. | |
1435 | .SH "About =over...=back Regions" | |
1436 | .IX Header "About =over...=back Regions" | |
1437 | \&\*(L"=over\*(R"...\*(L"=back\*(R" regions are used for various kinds of list-like | |
1438 | structures. (I use the term \*(L"region\*(R" here simply as a collective | |
1439 | term for everything from the \*(L"=over\*(R" to the matching \*(L"=back\*(R".) | |
1440 | .IP "\(bu" 4 | |
1441 | The non-zero numeric \fIindentlevel\fR in "=over \fIindentlevel\fR\*(L" ... | |
1442 | \&\*(R"=back\*(L" is used for giving the formatter a clue as to how many | |
1443 | \&\*(R"spaces" (ems, or roughly equivalent units) it should tab over, | |
1444 | although many formatters will have to convert this to an absolute | |
1445 | measurement that may not exactly match with the size of spaces (or M's) | |
1446 | in the document's base font. Other formatters may have to completely | |
1447 | ignore the number. The lack of any explicit \fIindentlevel\fR parameter is | |
1448 | equivalent to an \fIindentlevel\fR value of 4. Pod processors may | |
1449 | complain if \fIindentlevel\fR is present but is not a positive number | |
1450 | matching \f(CW\*(C`m/\eA(\ed*\e.)?\ed+\ez/\*(C'\fR. | |
1451 | .IP "\(bu" 4 | |
1452 | Authors of Pod formatters are reminded that \*(L"=over\*(R" ... \*(L"=back\*(R" may | |
1453 | map to several different constructs in your output format. For | |
1454 | example, in converting Pod to (X)HTML, it can map to any of | |
1455 | <ul>...</ul>, <ol>...</ol>, <dl>...</dl>, or | |
1456 | <blockquote>...</blockquote>. Similarly, \*(L"=item\*(R" can map to <li> or | |
1457 | <dt>. | |
1458 | .IP "\(bu" 4 | |
1459 | Each \*(L"=over\*(R" ... \*(L"=back\*(R" region should be one of the following: | |
1460 | .RS 4 | |
1461 | .IP "\(bu" 4 | |
1462 | An \*(L"=over\*(R" ... \*(L"=back\*(R" region containing only \*(L"=item *\*(R" commands, | |
1463 | each followed by some number of ordinary/verbatim paragraphs, other | |
1464 | nested \*(L"=over\*(R" ... \*(L"=back\*(R" regions, \*(L"=for...\*(R" paragraphs, and | |
1465 | \&\*(L"=begin\*(R"...\*(L"=end\*(R" regions. | |
1466 | .Sp | |
1467 | (Pod processors must tolerate a bare \*(L"=item\*(R" as if it were \*(L"=item | |
1468 | *\*(R".) Whether \*(L"*\*(R" is rendered as a literal asterisk, an \*(L"o\*(R", or as | |
1469 | some kind of real bullet character, is left up to the Pod formatter, | |
1470 | and may depend on the level of nesting. | |
1471 | .IP "\(bu" 4 | |
1472 | An \*(L"=over\*(R" ... \*(L"=back\*(R" region containing only | |
1473 | \&\f(CW\*(C`m/\eA=item\es+\ed+\e.?\es*\ez/\*(C'\fR paragraphs, each one (or each group of them) | |
1474 | followed by some number of ordinary/verbatim paragraphs, other nested | |
1475 | \&\*(L"=over\*(R" ... \*(L"=back\*(R" regions, \*(L"=for...\*(R" paragraphs, and/or | |
1476 | \&\*(L"=begin\*(R"...\*(L"=end\*(R" codes. Note that the numbers must start at 1 | |
1477 | in each section, and must proceed in order and without skipping | |
1478 | numbers. | |
1479 | .Sp | |
1480 | (Pod processors must tolerate lines like \*(L"=item 1\*(R" as if they were | |
1481 | \&\*(L"=item 1.\*(R", with the period.) | |
1482 | .IP "\(bu" 4 | |
1483 | An \*(L"=over\*(R" ... \*(L"=back\*(R" region containing only \*(L"=item [text]\*(R" | |
1484 | commands, each one (or each group of them) followed by some number of | |
1485 | ordinary/verbatim paragraphs, other nested \*(L"=over\*(R" ... \*(L"=back\*(R" | |
1486 | regions, or \*(L"=for...\*(R" paragraphs, and \*(L"=begin\*(R"...\*(L"=end\*(R" regions. | |
1487 | .Sp | |
1488 | The \*(L"=item [text]\*(R" paragraph should not match | |
1489 | \&\f(CW\*(C`m/\eA=item\es+\ed+\e.?\es*\ez/\*(C'\fR or \f(CW\*(C`m/\eA=item\es+\e*\es*\ez/\*(C'\fR, nor should it | |
1490 | match just \f(CW\*(C`m/\eA=item\es*\ez/\*(C'\fR. | |
1491 | .IP "\(bu" 4 | |
1492 | An \*(L"=over\*(R" ... \*(L"=back\*(R" region containing no \*(L"=item\*(R" paragraphs at | |
1493 | all, and containing only some number of | |
1494 | ordinary/verbatim paragraphs, and possibly also some nested \*(L"=over\*(R" | |
1495 | \&... \*(L"=back\*(R" regions, \*(L"=for...\*(R" paragraphs, and \*(L"=begin\*(R"...\*(L"=end\*(R" | |
1496 | regions. Such an itemless \*(L"=over\*(R" ... \*(L"=back\*(R" region in Pod is | |
1497 | equivalent in meaning to a \*(L"<blockquote>...</blockquote>\*(R" element in | |
1498 | \&\s-1HTML\s0. | |
1499 | .RE | |
1500 | .RS 4 | |
1501 | .Sp | |
1502 | Note that with all the above cases, you can determine which type of | |
1503 | \&\*(L"=over\*(R" ... \*(L"=back\*(R" you have, by examining the first (non\-\*(L"=cut\*(R", | |
1504 | non\-\*(L"=pod\*(R") Pod paragraph after the \*(L"=over\*(R" command. | |
1505 | .RE | |
1506 | .IP "\(bu" 4 | |
1507 | Pod formatters \fImust\fR tolerate arbitrarily large amounts of text | |
1508 | in the "=item \fItext...\fR" paragraph. In practice, most such | |
1509 | paragraphs are short, as in: | |
1510 | .Sp | |
1511 | .Vb 1 | |
1512 | \& =item For cutting off our trade with all parts of the world | |
1513 | .Ve | |
1514 | .Sp | |
1515 | But they may be arbitrarily long: | |
1516 | .Sp | |
1517 | .Vb 2 | |
1518 | \& =item For transporting us beyond seas to be tried for pretended | |
1519 | \& offenses | |
1520 | .Ve | |
1521 | .Sp | |
1522 | .Vb 5 | |
1523 | \& =item He is at this time transporting large armies of foreign | |
1524 | \& mercenaries to complete the works of death, desolation and | |
1525 | \& tyranny, already begun with circumstances of cruelty and perfidy | |
1526 | \& scarcely paralleled in the most barbarous ages, and totally | |
1527 | \& unworthy the head of a civilized nation. | |
1528 | .Ve | |
1529 | .IP "\(bu" 4 | |
1530 | Pod processors should tolerate \*(L"=item *\*(R" / "=item \fInumber\fR" commands | |
1531 | with no accompanying paragraph. The middle item is an example: | |
1532 | .Sp | |
1533 | .Vb 1 | |
1534 | \& =over | |
1535 | .Ve | |
1536 | .Sp | |
1537 | .Vb 1 | |
1538 | \& =item 1 | |
1539 | .Ve | |
1540 | .Sp | |
1541 | .Vb 1 | |
1542 | \& Pick up dry cleaning. | |
1543 | .Ve | |
1544 | .Sp | |
1545 | .Vb 1 | |
1546 | \& =item 2 | |
1547 | .Ve | |
1548 | .Sp | |
1549 | .Vb 1 | |
1550 | \& =item 3 | |
1551 | .Ve | |
1552 | .Sp | |
1553 | .Vb 1 | |
1554 | \& Stop by the store. Get Abba Zabas, Stoli, and cheap lawn chairs. | |
1555 | .Ve | |
1556 | .Sp | |
1557 | .Vb 1 | |
1558 | \& =back | |
1559 | .Ve | |
1560 | .IP "\(bu" 4 | |
1561 | No \*(L"=over\*(R" ... \*(L"=back\*(R" region can contain headings. Processors may | |
1562 | treat such a heading as an error. | |
1563 | .IP "\(bu" 4 | |
1564 | Note that an \*(L"=over\*(R" ... \*(L"=back\*(R" region should have some | |
1565 | content. That is, authors should not have an empty region like this: | |
1566 | .Sp | |
1567 | .Vb 1 | |
1568 | \& =over | |
1569 | .Ve | |
1570 | .Sp | |
1571 | .Vb 1 | |
1572 | \& =back | |
1573 | .Ve | |
1574 | .Sp | |
1575 | Pod processors seeing such a contentless \*(L"=over\*(R" ... \*(L"=back\*(R" region, | |
1576 | may ignore it, or may report it as an error. | |
1577 | .IP "\(bu" 4 | |
1578 | Processors must tolerate an \*(L"=over\*(R" list that goes off the end of the | |
1579 | document (i.e., which has no matching \*(L"=back\*(R"), but they may warn | |
1580 | about such a list. | |
1581 | .IP "\(bu" 4 | |
1582 | Authors of Pod formatters should note that this construct: | |
1583 | .Sp | |
1584 | .Vb 1 | |
1585 | \& =item Neque | |
1586 | .Ve | |
1587 | .Sp | |
1588 | .Vb 1 | |
1589 | \& =item Porro | |
1590 | .Ve | |
1591 | .Sp | |
1592 | .Vb 1 | |
1593 | \& =item Quisquam Est | |
1594 | .Ve | |
1595 | .Sp | |
1596 | .Vb 3 | |
1597 | \& Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci | |
1598 | \& velit, sed quia non numquam eius modi tempora incidunt ut | |
1599 | \& labore et dolore magnam aliquam quaerat voluptatem. | |
1600 | .Ve | |
1601 | .Sp | |
1602 | .Vb 1 | |
1603 | \& =item Ut Enim | |
1604 | .Ve | |
1605 | .Sp | |
1606 | is semantically ambiguous, in a way that makes formatting decisions | |
1607 | a bit difficult. On the one hand, it could be mention of an item | |
1608 | \&\*(L"Neque\*(R", mention of another item \*(L"Porro\*(R", and mention of another | |
1609 | item \*(L"Quisquam Est\*(R", with just the last one requiring the explanatory | |
1610 | paragraph \*(L"Qui dolorem ipsum quia dolor...\*(R"; and then an item | |
1611 | \&\*(L"Ut Enim\*(R". In that case, you'd want to format it like so: | |
1612 | .Sp | |
1613 | .Vb 1 | |
1614 | \& Neque | |
1615 | .Ve | |
1616 | .Sp | |
1617 | .Vb 1 | |
1618 | \& Porro | |
1619 | .Ve | |
1620 | .Sp | |
1621 | .Vb 4 | |
1622 | \& Quisquam Est | |
1623 | \& Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci | |
1624 | \& velit, sed quia non numquam eius modi tempora incidunt ut | |
1625 | \& labore et dolore magnam aliquam quaerat voluptatem. | |
1626 | .Ve | |
1627 | .Sp | |
1628 | .Vb 1 | |
1629 | \& Ut Enim | |
1630 | .Ve | |
1631 | .Sp | |
1632 | But it could equally well be a discussion of three (related or equivalent) | |
1633 | items, \*(L"Neque\*(R", \*(L"Porro\*(R", and \*(L"Quisquam Est\*(R", followed by a paragraph | |
1634 | explaining them all, and then a new item \*(L"Ut Enim\*(R". In that case, you'd | |
1635 | probably want to format it like so: | |
1636 | .Sp | |
1637 | .Vb 6 | |
1638 | \& Neque | |
1639 | \& Porro | |
1640 | \& Quisquam Est | |
1641 | \& Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci | |
1642 | \& velit, sed quia non numquam eius modi tempora incidunt ut | |
1643 | \& labore et dolore magnam aliquam quaerat voluptatem. | |
1644 | .Ve | |
1645 | .Sp | |
1646 | .Vb 1 | |
1647 | \& Ut Enim | |
1648 | .Ve | |
1649 | .Sp | |
1650 | But (for the forseeable future), Pod does not provide any way for Pod | |
1651 | authors to distinguish which grouping is meant by the above | |
1652 | \&\*(L"=item\*(R"\-cluster structure. So formatters should format it like so: | |
1653 | .Sp | |
1654 | .Vb 1 | |
1655 | \& Neque | |
1656 | .Ve | |
1657 | .Sp | |
1658 | .Vb 1 | |
1659 | \& Porro | |
1660 | .Ve | |
1661 | .Sp | |
1662 | .Vb 1 | |
1663 | \& Quisquam Est | |
1664 | .Ve | |
1665 | .Sp | |
1666 | .Vb 3 | |
1667 | \& Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci | |
1668 | \& velit, sed quia non numquam eius modi tempora incidunt ut | |
1669 | \& labore et dolore magnam aliquam quaerat voluptatem. | |
1670 | .Ve | |
1671 | .Sp | |
1672 | .Vb 1 | |
1673 | \& Ut Enim | |
1674 | .Ve | |
1675 | .Sp | |
1676 | That is, there should be (at least roughly) equal spacing between | |
1677 | items as between paragraphs (although that spacing may well be less | |
1678 | than the full height of a line of text). This leaves it to the reader | |
1679 | to use (con)textual cues to figure out whether the \*(L"Qui dolorem | |
1680 | ipsum...\*(R" paragraph applies to the \*(L"Quisquam Est\*(R" item or to all three | |
1681 | items \*(L"Neque\*(R", \*(L"Porro\*(R", and \*(L"Quisquam Est\*(R". While not an ideal | |
1682 | situation, this is preferable to providing formatting cues that may | |
1683 | be actually contrary to the author's intent. | |
1684 | .ie n .SH "About Data Paragraphs and ""=begin/=end"" Regions" | |
1685 | .el .SH "About Data Paragraphs and ``=begin/=end'' Regions" | |
1686 | .IX Header "About Data Paragraphs and =begin/=end Regions" | |
1687 | Data paragraphs are typically used for inlining non-Pod data that is | |
1688 | to be used (typically passed through) when rendering the document to | |
1689 | a specific format: | |
1690 | .PP | |
1691 | .Vb 1 | |
1692 | \& =begin rtf | |
1693 | .Ve | |
1694 | .PP | |
1695 | .Vb 1 | |
1696 | \& \epar{\epard\eqr\esa4500{\ei Printed\e~\echdate\e~\echtime}\epar} | |
1697 | .Ve | |
1698 | .PP | |
1699 | .Vb 1 | |
1700 | \& =end rtf | |
1701 | .Ve | |
1702 | .PP | |
1703 | The exact same effect could, incidentally, be achieved with a single | |
1704 | \&\*(L"=for\*(R" paragraph: | |
1705 | .PP | |
1706 | .Vb 1 | |
1707 | \& =for rtf \epar{\epard\eqr\esa4500{\ei Printed\e~\echdate\e~\echtime}\epar} | |
1708 | .Ve | |
1709 | .PP | |
1710 | (Although that is not formally a data paragraph, it has the same | |
1711 | meaning as one, and Pod parsers may parse it as one.) | |
1712 | .PP | |
1713 | Another example of a data paragraph: | |
1714 | .PP | |
1715 | .Vb 1 | |
1716 | \& =begin html | |
1717 | .Ve | |
1718 | .PP | |
1719 | .Vb 1 | |
1720 | \& I like <em>PIE</em>! | |
1721 | .Ve | |
1722 | .PP | |
1723 | .Vb 1 | |
1724 | \& <hr>Especially pecan pie! | |
1725 | .Ve | |
1726 | .PP | |
1727 | .Vb 1 | |
1728 | \& =end html | |
1729 | .Ve | |
1730 | .PP | |
1731 | If these were ordinary paragraphs, the Pod parser would try to | |
1732 | expand the "E</em>\*(L" (in the first paragraph) as a formatting | |
1733 | code, just like \*(R"E<lt>\*(L" or \*(R"E<eacute>\*(L". But since this | |
1734 | is in a \*(R"=begin \fIidentifier\fR\*(L"...\*(R"=end \fIidentifier\fR" region \fIand\fR | |
1735 | the identifier \*(L"html\*(R" doesn't begin have a \*(L":\*(R" prefix, the contents | |
1736 | of this region are stored as data paragraphs, instead of being | |
1737 | processed as ordinary paragraphs (or if they began with a spaces | |
1738 | and/or tabs, as verbatim paragraphs). | |
1739 | .PP | |
1740 | As a further example: At time of writing, no \*(L"biblio\*(R" identifier is | |
1741 | supported, but suppose some processor were written to recognize it as | |
1742 | a way of (say) denoting a bibliographic reference (necessarily | |
1743 | containing formatting codes in ordinary paragraphs). The fact that | |
1744 | \&\*(L"biblio\*(R" paragraphs were meant for ordinary processing would be | |
1745 | indicated by prefacing each \*(L"biblio\*(R" identifier with a colon: | |
1746 | .PP | |
1747 | .Vb 1 | |
1748 | \& =begin :biblio | |
1749 | .Ve | |
1750 | .PP | |
1751 | .Vb 2 | |
1752 | \& Wirth, Niklaus. 1976. I<Algorithms + Data Structures = | |
1753 | \& Programs.> Prentice-Hall, Englewood Cliffs, NJ. | |
1754 | .Ve | |
1755 | .PP | |
1756 | .Vb 1 | |
1757 | \& =end :biblio | |
1758 | .Ve | |
1759 | .PP | |
1760 | This would signal to the parser that paragraphs in this begin...end | |
1761 | region are subject to normal handling as ordinary/verbatim paragraphs | |
1762 | (while still tagged as meant only for processors that understand the | |
1763 | \&\*(L"biblio\*(R" identifier). The same effect could be had with: | |
1764 | .PP | |
1765 | .Vb 3 | |
1766 | \& =for :biblio | |
1767 | \& Wirth, Niklaus. 1976. I<Algorithms + Data Structures = | |
1768 | \& Programs.> Prentice-Hall, Englewood Cliffs, NJ. | |
1769 | .Ve | |
1770 | .PP | |
1771 | The \*(L":\*(R" on these identifiers means simply \*(L"process this stuff | |
1772 | normally, even though the result will be for some special target\*(R". | |
1773 | I suggest that parser APIs report \*(L"biblio\*(R" as the target identifier, | |
1774 | but also report that it had a \*(L":\*(R" prefix. (And similarly, with the | |
1775 | above \*(L"html\*(R", report \*(L"html\*(R" as the target identifier, and note the | |
1776 | \&\fIlack\fR of a \*(L":\*(R" prefix.) | |
1777 | .PP | |
1778 | Note that a "=begin \fIidentifier\fR\*(L"...\*(R"=end \fIidentifier\fR" region where | |
1779 | \&\fIidentifier\fR begins with a colon, \fIcan\fR contain commands. For example: | |
1780 | .PP | |
1781 | .Vb 1 | |
1782 | \& =begin :biblio | |
1783 | .Ve | |
1784 | .PP | |
1785 | .Vb 1 | |
1786 | \& Wirth's classic is available in several editions, including: | |
1787 | .Ve | |
1788 | .PP | |
1789 | .Vb 2 | |
1790 | \& =for comment | |
1791 | \& hm, check abebooks.com for how much used copies cost. | |
1792 | .Ve | |
1793 | .PP | |
1794 | .Vb 1 | |
1795 | \& =over | |
1796 | .Ve | |
1797 | .PP | |
1798 | .Vb 1 | |
1799 | \& =item | |
1800 | .Ve | |
1801 | .PP | |
1802 | .Vb 2 | |
1803 | \& Wirth, Niklaus. 1975. I<Algorithmen und Datenstrukturen.> | |
1804 | \& Teubner, Stuttgart. [Yes, it's in German.] | |
1805 | .Ve | |
1806 | .PP | |
1807 | .Vb 1 | |
1808 | \& =item | |
1809 | .Ve | |
1810 | .PP | |
1811 | .Vb 2 | |
1812 | \& Wirth, Niklaus. 1976. I<Algorithms + Data Structures = | |
1813 | \& Programs.> Prentice-Hall, Englewood Cliffs, NJ. | |
1814 | .Ve | |
1815 | .PP | |
1816 | .Vb 1 | |
1817 | \& =back | |
1818 | .Ve | |
1819 | .PP | |
1820 | .Vb 1 | |
1821 | \& =end :biblio | |
1822 | .Ve | |
1823 | .PP | |
1824 | Note, however, a "=begin \fIidentifier\fR\*(L"...\*(R"=end \fIidentifier\fR" | |
1825 | region where \fIidentifier\fR does \fInot\fR begin with a colon, should not | |
1826 | directly contain \*(L"=head1\*(R" ... \*(L"=head4\*(R" commands, nor \*(L"=over\*(R", nor \*(L"=back\*(R", | |
1827 | nor \*(L"=item\*(R". For example, this may be considered invalid: | |
1828 | .PP | |
1829 | .Vb 1 | |
1830 | \& =begin somedata | |
1831 | .Ve | |
1832 | .PP | |
1833 | .Vb 1 | |
1834 | \& This is a data paragraph. | |
1835 | .Ve | |
1836 | .PP | |
1837 | .Vb 1 | |
1838 | \& =head1 Don't do this! | |
1839 | .Ve | |
1840 | .PP | |
1841 | .Vb 1 | |
1842 | \& This is a data paragraph too. | |
1843 | .Ve | |
1844 | .PP | |
1845 | .Vb 1 | |
1846 | \& =end somedata | |
1847 | .Ve | |
1848 | .PP | |
1849 | A Pod processor may signal that the above (specifically the \*(L"=head1\*(R" | |
1850 | paragraph) is an error. Note, however, that the following should | |
1851 | \&\fInot\fR be treated as an error: | |
1852 | .PP | |
1853 | .Vb 1 | |
1854 | \& =begin somedata | |
1855 | .Ve | |
1856 | .PP | |
1857 | .Vb 1 | |
1858 | \& This is a data paragraph. | |
1859 | .Ve | |
1860 | .PP | |
1861 | .Vb 1 | |
1862 | \& =cut | |
1863 | .Ve | |
1864 | .PP | |
1865 | .Vb 2 | |
1866 | \& # Yup, this isn't Pod anymore. | |
1867 | \& sub excl { (rand() > .5) ? "hoo!" : "hah!" } | |
1868 | .Ve | |
1869 | .PP | |
1870 | .Vb 1 | |
1871 | \& =pod | |
1872 | .Ve | |
1873 | .PP | |
1874 | .Vb 1 | |
1875 | \& This is a data paragraph too. | |
1876 | .Ve | |
1877 | .PP | |
1878 | .Vb 1 | |
1879 | \& =end somedata | |
1880 | .Ve | |
1881 | .PP | |
1882 | And this too is valid: | |
1883 | .PP | |
1884 | .Vb 1 | |
1885 | \& =begin someformat | |
1886 | .Ve | |
1887 | .PP | |
1888 | .Vb 1 | |
1889 | \& This is a data paragraph. | |
1890 | .Ve | |
1891 | .PP | |
1892 | .Vb 1 | |
1893 | \& And this is a data paragraph. | |
1894 | .Ve | |
1895 | .PP | |
1896 | .Vb 1 | |
1897 | \& =begin someotherformat | |
1898 | .Ve | |
1899 | .PP | |
1900 | .Vb 1 | |
1901 | \& This is a data paragraph too. | |
1902 | .Ve | |
1903 | .PP | |
1904 | .Vb 1 | |
1905 | \& And this is a data paragraph too. | |
1906 | .Ve | |
1907 | .PP | |
1908 | .Vb 1 | |
1909 | \& =begin :yetanotherformat | |
1910 | .Ve | |
1911 | .PP | |
1912 | .Vb 1 | |
1913 | \& =head2 This is a command paragraph! | |
1914 | .Ve | |
1915 | .PP | |
1916 | .Vb 1 | |
1917 | \& This is an ordinary paragraph! | |
1918 | .Ve | |
1919 | .PP | |
1920 | .Vb 1 | |
1921 | \& And this is a verbatim paragraph! | |
1922 | .Ve | |
1923 | .PP | |
1924 | .Vb 1 | |
1925 | \& =end :yetanotherformat | |
1926 | .Ve | |
1927 | .PP | |
1928 | .Vb 1 | |
1929 | \& =end someotherformat | |
1930 | .Ve | |
1931 | .PP | |
1932 | .Vb 1 | |
1933 | \& Another data paragraph! | |
1934 | .Ve | |
1935 | .PP | |
1936 | .Vb 1 | |
1937 | \& =end someformat | |
1938 | .Ve | |
1939 | .PP | |
1940 | The contents of the above \*(L"=begin :yetanotherformat\*(R" ... | |
1941 | \&\*(L"=end :yetanotherformat\*(R" region \fIaren't\fR data paragraphs, because | |
1942 | the immediately containing region's identifier (\*(L":yetanotherformat\*(R") | |
1943 | begins with a colon. In practice, most regions that contain | |
1944 | data paragraphs will contain \fIonly\fR data paragraphs; however, | |
1945 | the above nesting is syntactically valid as Pod, even if it is | |
1946 | rare. However, the handlers for some formats, like \*(L"html\*(R", | |
1947 | will accept only data paragraphs, not nested regions; and they may | |
1948 | complain if they see (targeted for them) nested regions, or commands, | |
1949 | other than \*(L"=end\*(R", \*(L"=pod\*(R", and \*(L"=cut\*(R". | |
1950 | .PP | |
1951 | Also consider this valid structure: | |
1952 | .PP | |
1953 | .Vb 1 | |
1954 | \& =begin :biblio | |
1955 | .Ve | |
1956 | .PP | |
1957 | .Vb 1 | |
1958 | \& Wirth's classic is available in several editions, including: | |
1959 | .Ve | |
1960 | .PP | |
1961 | .Vb 1 | |
1962 | \& =over | |
1963 | .Ve | |
1964 | .PP | |
1965 | .Vb 1 | |
1966 | \& =item | |
1967 | .Ve | |
1968 | .PP | |
1969 | .Vb 2 | |
1970 | \& Wirth, Niklaus. 1975. I<Algorithmen und Datenstrukturen.> | |
1971 | \& Teubner, Stuttgart. [Yes, it's in German.] | |
1972 | .Ve | |
1973 | .PP | |
1974 | .Vb 1 | |
1975 | \& =item | |
1976 | .Ve | |
1977 | .PP | |
1978 | .Vb 2 | |
1979 | \& Wirth, Niklaus. 1976. I<Algorithms + Data Structures = | |
1980 | \& Programs.> Prentice-Hall, Englewood Cliffs, NJ. | |
1981 | .Ve | |
1982 | .PP | |
1983 | .Vb 1 | |
1984 | \& =back | |
1985 | .Ve | |
1986 | .PP | |
1987 | .Vb 1 | |
1988 | \& Buy buy buy! | |
1989 | .Ve | |
1990 | .PP | |
1991 | .Vb 1 | |
1992 | \& =begin html | |
1993 | .Ve | |
1994 | .PP | |
1995 | .Vb 1 | |
1996 | \& <img src='wirth_spokesmodeling_book.png'> | |
1997 | .Ve | |
1998 | .PP | |
1999 | .Vb 1 | |
2000 | \& <hr> | |
2001 | .Ve | |
2002 | .PP | |
2003 | .Vb 1 | |
2004 | \& =end html | |
2005 | .Ve | |
2006 | .PP | |
2007 | .Vb 1 | |
2008 | \& Now now now! | |
2009 | .Ve | |
2010 | .PP | |
2011 | .Vb 1 | |
2012 | \& =end :biblio | |
2013 | .Ve | |
2014 | .PP | |
2015 | There, the \*(L"=begin html\*(R"...\*(L"=end html\*(R" region is nested inside | |
2016 | the larger \*(L"=begin :biblio\*(R"...\*(L"=end :biblio\*(R" region. Note that the | |
2017 | content of the \*(L"=begin html\*(R"...\*(L"=end html\*(R" region is data | |
2018 | paragraph(s), because the immediately containing region's identifier | |
2019 | (\*(L"html\*(R") \fIdoesn't\fR begin with a colon. | |
2020 | .PP | |
2021 | Pod parsers, when processing a series of data paragraphs one | |
2022 | after another (within a single region), should consider them to | |
2023 | be one large data paragraph that happens to contain blank lines. So | |
2024 | the content of the above \*(L"=begin html\*(R"...\*(L"=end html\*(R" \fImay\fR be stored | |
2025 | as two data paragraphs (one consisting of | |
2026 | \&\*(L"<img src='wirth_spokesmodeling_book.png'>\en\*(R" | |
2027 | and another consisting of \*(L"<hr>\en\*(R"), but \fIshould\fR be stored as | |
2028 | a single data paragraph (consisting of | |
2029 | \&\*(L"<img src='wirth_spokesmodeling_book.png'>\en\en<hr>\en\*(R"). | |
2030 | .PP | |
2031 | Pod processors should tolerate empty | |
2032 | "=begin \fIsomething\fR\*(L"...\*(R"=end \fIsomething\fR\*(L" regions, | |
2033 | empty \*(R"=begin :\fIsomething\fR\*(L"...\*(R"=end :\fIsomething\fR\*(L" regions, and | |
2034 | contentless \*(R"=for \fIsomething\fR\*(L" and \*(R"=for :\fIsomething\fR" | |
2035 | paragraphs. I.e., these should be tolerated: | |
2036 | .PP | |
2037 | .Vb 1 | |
2038 | \& =for html | |
2039 | .Ve | |
2040 | .PP | |
2041 | .Vb 1 | |
2042 | \& =begin html | |
2043 | .Ve | |
2044 | .PP | |
2045 | .Vb 1 | |
2046 | \& =end html | |
2047 | .Ve | |
2048 | .PP | |
2049 | .Vb 1 | |
2050 | \& =begin :biblio | |
2051 | .Ve | |
2052 | .PP | |
2053 | .Vb 1 | |
2054 | \& =end :biblio | |
2055 | .Ve | |
2056 | .PP | |
2057 | Incidentally, note that there's no easy way to express a data | |
2058 | paragraph starting with something that looks like a command. Consider: | |
2059 | .PP | |
2060 | .Vb 1 | |
2061 | \& =begin stuff | |
2062 | .Ve | |
2063 | .PP | |
2064 | .Vb 1 | |
2065 | \& =shazbot | |
2066 | .Ve | |
2067 | .PP | |
2068 | .Vb 1 | |
2069 | \& =end stuff | |
2070 | .Ve | |
2071 | .PP | |
2072 | There, \*(L"=shazbot\*(R" will be parsed as a Pod command \*(L"shazbot\*(R", not as a data | |
2073 | paragraph \*(L"=shazbot\en\*(R". However, you can express a data paragraph consisting | |
2074 | of \*(L"=shazbot\en\*(R" using this code: | |
2075 | .PP | |
2076 | .Vb 1 | |
2077 | \& =for stuff =shazbot | |
2078 | .Ve | |
2079 | .PP | |
2080 | The situation where this is necessary, is presumably quite rare. | |
2081 | .PP | |
2082 | Note that =end commands must match the currently open =begin command. That | |
2083 | is, they must properly nest. For example, this is valid: | |
2084 | .PP | |
2085 | .Vb 1 | |
2086 | \& =begin outer | |
2087 | .Ve | |
2088 | .PP | |
2089 | .Vb 1 | |
2090 | \& X | |
2091 | .Ve | |
2092 | .PP | |
2093 | .Vb 1 | |
2094 | \& =begin inner | |
2095 | .Ve | |
2096 | .PP | |
2097 | .Vb 1 | |
2098 | \& Y | |
2099 | .Ve | |
2100 | .PP | |
2101 | .Vb 1 | |
2102 | \& =end inner | |
2103 | .Ve | |
2104 | .PP | |
2105 | .Vb 1 | |
2106 | \& Z | |
2107 | .Ve | |
2108 | .PP | |
2109 | .Vb 1 | |
2110 | \& =end outer | |
2111 | .Ve | |
2112 | .PP | |
2113 | while this is invalid: | |
2114 | .PP | |
2115 | .Vb 1 | |
2116 | \& =begin outer | |
2117 | .Ve | |
2118 | .PP | |
2119 | .Vb 1 | |
2120 | \& X | |
2121 | .Ve | |
2122 | .PP | |
2123 | .Vb 1 | |
2124 | \& =begin inner | |
2125 | .Ve | |
2126 | .PP | |
2127 | .Vb 1 | |
2128 | \& Y | |
2129 | .Ve | |
2130 | .PP | |
2131 | .Vb 1 | |
2132 | \& =end outer | |
2133 | .Ve | |
2134 | .PP | |
2135 | .Vb 1 | |
2136 | \& Z | |
2137 | .Ve | |
2138 | .PP | |
2139 | .Vb 1 | |
2140 | \& =end inner | |
2141 | .Ve | |
2142 | .PP | |
2143 | This latter is improper because when the \*(L"=end outer\*(R" command is seen, the | |
2144 | currently open region has the formatname \*(L"inner\*(R", not \*(L"outer\*(R". (It just | |
2145 | happens that \*(L"outer\*(R" is the format name of a higher-up region.) This is | |
2146 | an error. Processors must by default report this as an error, and may halt | |
2147 | processing the document containing that error. A corollary of this is that | |
2148 | regions cannot \*(L"overlap\*(R" \*(-- i.e., the latter block above does not represent | |
2149 | a region called \*(L"outer\*(R" which contains X and Y, overlapping a region called | |
2150 | \&\*(L"inner\*(R" which contains Y and Z. But because it is invalid (as all | |
2151 | apparently overlapping regions would be), it doesn't represent that, or | |
2152 | anything at all. | |
2153 | .PP | |
2154 | Similarly, this is invalid: | |
2155 | .PP | |
2156 | .Vb 1 | |
2157 | \& =begin thing | |
2158 | .Ve | |
2159 | .PP | |
2160 | .Vb 1 | |
2161 | \& =end hting | |
2162 | .Ve | |
2163 | .PP | |
2164 | This is an error because the region is opened by \*(L"thing\*(R", and the \*(L"=end\*(R" | |
2165 | tries to close \*(L"hting\*(R" [sic]. | |
2166 | .PP | |
2167 | This is also invalid: | |
2168 | .PP | |
2169 | .Vb 1 | |
2170 | \& =begin thing | |
2171 | .Ve | |
2172 | .PP | |
2173 | .Vb 1 | |
2174 | \& =end | |
2175 | .Ve | |
2176 | .PP | |
2177 | This is invalid because every \*(L"=end\*(R" command must have a formatname | |
2178 | parameter. | |
2179 | .SH "SEE ALSO" | |
2180 | .IX Header "SEE ALSO" | |
2181 | perlpod, \*(L"PODs: Embedded Documentation\*(R" in perlsyn, | |
2182 | podchecker | |
2183 | .SH "AUTHOR" | |
2184 | .IX Header "AUTHOR" | |
2185 | Sean M. Burke |