Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / devtools / amd64 / man / man1 / perlpodspec.1
CommitLineData
920dae64
AT
1.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32
2.\"
3.\" Standard preamble:
4.\" ========================================================================
5.de Sh \" Subsection heading
6.br
7.if t .Sp
8.ne 5
9.PP
10\fB\\$1\fR
11.PP
12..
13.de Sp \" Vertical space (when we can't use .PP)
14.if t .sp .5v
15.if n .sp
16..
17.de Vb \" Begin verbatim text
18.ft CW
19.nf
20.ne \\$1
21..
22.de Ve \" End verbatim text
23.ft R
24.fi
25..
26.\" Set up some character translations and predefined strings. \*(-- will
27.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
28.\" double quote, and \*(R" will give a right double quote. | will give a
29.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
30.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
31.\" expand to `' in nroff, nothing in troff, for use with C<>.
32.tr \(*W-|\(bv\*(Tr
33.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
34.ie n \{\
35. ds -- \(*W-
36. ds PI pi
37. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
38. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
39. ds L" ""
40. ds R" ""
41. ds C` ""
42. ds C' ""
43'br\}
44.el\{\
45. ds -- \|\(em\|
46. ds PI \(*p
47. ds L" ``
48. ds R" ''
49'br\}
50.\"
51.\" If the F register is turned on, we'll generate index entries on stderr for
52.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
53.\" entries marked with X<> in POD. Of course, you'll have to process the
54.\" output yourself in some meaningful fashion.
55.if \nF \{\
56. de IX
57. tm Index:\\$1\t\\n%\t"\\$2"
58..
59. nr % 0
60. rr F
61.\}
62.\"
63.\" For nroff, turn off justification. Always turn off hyphenation; it makes
64.\" way too many mistakes in technical documents.
65.hy 0
66.if n .na
67.\"
68.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
69.\" Fear. Run. Save yourself. No user-serviceable parts.
70. \" fudge factors for nroff and troff
71.if n \{\
72. ds #H 0
73. ds #V .8m
74. ds #F .3m
75. ds #[ \f1
76. ds #] \fP
77.\}
78.if t \{\
79. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
80. ds #V .6m
81. ds #F 0
82. ds #[ \&
83. ds #] \&
84.\}
85. \" simple accents for nroff and troff
86.if n \{\
87. ds ' \&
88. ds ` \&
89. ds ^ \&
90. ds , \&
91. ds ~ ~
92. ds /
93.\}
94.if t \{\
95. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
96. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
97. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
98. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
99. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
100. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
101.\}
102. \" troff and (daisy-wheel) nroff accents
103.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
104.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
105.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
106.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
107.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
108.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
109.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
110.ds ae a\h'-(\w'a'u*4/10)'e
111.ds Ae A\h'-(\w'A'u*4/10)'E
112. \" corrections for vroff
113.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
114.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
115. \" for low resolution devices (crt and lpr)
116.if \n(.H>23 .if \n(.V>19 \
117\{\
118. ds : e
119. ds 8 ss
120. ds o a
121. ds d- d\h'-1'\(ga
122. ds D- D\h'-1'\(hy
123. ds th \o'bp'
124. ds Th \o'LP'
125. ds ae ae
126. ds Ae AE
127.\}
128.rm #[ #] #H #V #F C
129.\" ========================================================================
130.\"
131.IX Title "PERLPODSPEC 1"
132.TH PERLPODSPEC 1 "2006-01-07" "perl v5.8.8" "Perl Programmers Reference Guide"
133.SH "NAME"
134perlpodspec \- Plain Old Documentation: format specification and notes
135.SH "DESCRIPTION"
136.IX Header "DESCRIPTION"
137This document is detailed notes on the Pod markup language. Most
138people will only have to read perlpod to know how to write
139in Pod, but this document may answer some incidental questions to do
140with parsing and rendering Pod.
141.PP
142In this document, \*(L"must\*(R" / \*(L"must not\*(R", \*(L"should\*(R" /
143\&\*(L"should not\*(R", and \*(L"may\*(R" have their conventional (cf. \s-1RFC\s0 2119)
144meanings: \*(L"X must do Y\*(R" means that if X doesn't do Y, it's against
145this specification, and should really be fixed. \*(L"X should do Y\*(R"
146means that it's recommended, but X may fail to do Y, if there's a
147good reason. \*(L"X may do Y\*(R" is merely a note that X can do Y at
148will (although it is up to the reader to detect any connotation of
149"and I think it would be \fInice\fR if X did Y\*(L" versus \*(R"it wouldn't
150really \fIbother\fR me if X did Y").
151.PP
152Notably, when I say \*(L"the parser should do Y\*(R", the
153parser may fail to do Y, if the calling application explicitly
154requests that the parser \fInot\fR do Y. I often phrase this as
155\&\*(L"the parser should, by default, do Y.\*(R" This doesn't \fIrequire\fR
156the parser to provide an option for turning off whatever
157feature Y is (like expanding tabs in verbatim paragraphs), although
158it implicates that such an option \fImay\fR be provided.
159.SH "Pod Definitions"
160.IX Header "Pod Definitions"
161Pod is embedded in files, typically Perl source files \*(-- although you
162can write a file that's nothing but Pod.
163.PP
164A \fBline\fR in a file consists of zero or more non-newline characters,
165terminated by either a newline or the end of the file.
166.PP
167A \fBnewline sequence\fR is usually a platform-dependent concept, but
168Pod parsers should understand it to mean any of \s-1CR\s0 (\s-1ASCII\s0 13), \s-1LF\s0
169(\s-1ASCII\s0 10), or a \s-1CRLF\s0 (\s-1ASCII\s0 13 followed immediately by \s-1ASCII\s0 10), in
170addition to any other system-specific meaning. The first \s-1CR/CRLF/LF\s0
171sequence in the file may be used as the basis for identifying the
172newline sequence for parsing the rest of the file.
173.PP
174A \fBblank line\fR is a line consisting entirely of zero or more spaces
175(\s-1ASCII\s0 32) or tabs (\s-1ASCII\s0 9), and terminated by a newline or end\-of\-file.
176A \fBnon-blank line\fR is a line containing one or more characters other
177than space or tab (and terminated by a newline or end\-of\-file).
178.PP
179(\fINote:\fR Many older Pod parsers did not accept a line consisting of
180spaces/tabs and then a newline as a blank line \*(-- the only lines they
181considered blank were lines consisting of \fIno characters at all\fR,
182terminated by a newline.)
183.PP
184\&\fBWhitespace\fR is used in this document as a blanket term for spaces,
185tabs, and newline sequences. (By itself, this term usually refers
186to literal whitespace. That is, sequences of whitespace characters
187in Pod source, as opposed to "E<32>", which is a formatting
188code that \fIdenotes\fR a whitespace character.)
189.PP
190A \fBPod parser\fR is a module meant for parsing Pod (regardless of
191whether this involves calling callbacks or building a parse tree or
192directly formatting it). A \fBPod formatter\fR (or \fBPod translator\fR)
193is a module or program that converts Pod to some other format (\s-1HTML\s0,
194plaintext, TeX, PostScript, \s-1RTF\s0). A \fBPod processor\fR might be a
195formatter or translator, or might be a program that does something
196else with the Pod (like wordcounting it, scanning for index points,
197etc.).
198.PP
199Pod content is contained in \fBPod blocks\fR. A Pod block starts with a
200line that matches <m/\eA=[a\-zA\-Z]/>, and continues up to the next line
201that matches \f(CW\*(C`m/\eA=cut/\*(C'\fR \*(-- or up to the end of the file, if there is
202no \f(CW\*(C`m/\eA=cut/\*(C'\fR line.
203.PP
204Within a Pod block, there are \fBPod paragraphs\fR. A Pod paragraph
205consists of non-blank lines of text, separated by one or more blank
206lines.
207.PP
208For purposes of Pod processing, there are four types of paragraphs in
209a Pod block:
210.IP "\(bu" 4
211A command paragraph (also called a \*(L"directive\*(R"). The first line of
212this paragraph must match \f(CW\*(C`m/\eA=[a\-zA\-Z]/\*(C'\fR. Command paragraphs are
213typically one line, as in:
214.Sp
215.Vb 1
216\& =head1 NOTES
217.Ve
218.Sp
219.Vb 1
220\& =item *
221.Ve
222.Sp
223But they may span several (non\-blank) lines:
224.Sp
225.Vb 3
226\& =for comment
227\& Hm, I wonder what it would look like if
228\& you tried to write a BNF for Pod from this.
229.Ve
230.Sp
231.Vb 2
232\& =head3 Dr. Strangelove, or: How I Learned to
233\& Stop Worrying and Love the Bomb
234.Ve
235.Sp
236\&\fISome\fR command paragraphs allow formatting codes in their content
237(i.e., after the part that matches \f(CW\*(C`m/\eA=[a\-zA\-Z]\eS*\es*/\*(C'\fR), as in:
238.Sp
239.Vb 1
240\& =head1 Did You Remember to C<use strict;>?
241.Ve
242.Sp
243In other words, the Pod processing handler for \*(L"head1\*(R" will apply the
244same processing to "Did You Remember to C<use strict;>?\*(L" that it
245would to an ordinary paragraph \*(-- i.e., formatting codes (like
246\&\*(R"C<...>") are parsed and presumably formatted appropriately, and
247whitespace in the form of literal spaces and/or tabs is not
248significant.
249.IP "\(bu" 4
250A \fBverbatim paragraph\fR. The first line of this paragraph must be a
251literal space or tab, and this paragraph must not be inside a "=begin
252\&\fIidentifier\fR\*(L", ... \*(R"=end \fIidentifier\fR\*(L" sequence unless
253\&\*(R"\fIidentifier\fR\*(L" begins with a colon (\*(R":"). That is, if a paragraph
254starts with a literal space or tab, but \fIis\fR inside a
255"=begin \fIidentifier\fR\*(L", ... \*(R"=end \fIidentifier\fR\*(L" region, then it's
256a data paragraph, unless \*(R"\fIidentifier\fR" begins with a colon.
257.Sp
258Whitespace \fIis\fR significant in verbatim paragraphs (although, in
259processing, tabs are probably expanded).
260.IP "\(bu" 4
261An \fBordinary paragraph\fR. A paragraph is an ordinary paragraph
262if its first line matches neither \f(CW\*(C`m/\eA=[a\-zA\-Z]/\*(C'\fR nor
263\&\f(CW\*(C`m/\eA[ \et]/\*(C'\fR, \fIand\fR if it's not inside a "=begin \fIidentifier\fR\*(L",
264\&... \*(R"=end \fIidentifier\fR\*(L" sequence unless \*(R"\fIidentifier\fR\*(L" begins with
265a colon (\*(R":").
266.IP "\(bu" 4
267A \fBdata paragraph\fR. This is a paragraph that \fIis\fR inside a "=begin
268\&\fIidentifier\fR\*(L" ... \*(R"=end \fIidentifier\fR\*(L" sequence where
269\&\*(R"\fIidentifier\fR" does \fInot\fR begin with a literal colon (\*(L":\*(R"). In
270some sense, a data paragraph is not part of Pod at all (i.e.,
271effectively it's \*(L"out\-of\-band\*(R"), since it's not subject to most kinds
272of Pod parsing; but it is specified here, since Pod
273parsers need to be able to call an event for it, or store it in some
274form in a parse tree, or at least just parse \fIaround\fR it.
275.PP
276For example: consider the following paragraphs:
277.PP
278.Vb 1
279\& # <- that's the 0th column
280.Ve
281.PP
282.Vb 1
283\& =head1 Foo
284.Ve
285.PP
286.Vb 1
287\& Stuff
288.Ve
289.PP
290.Vb 1
291\& $foo->bar
292.Ve
293.PP
294.Vb 1
295\& =cut
296.Ve
297.PP
298Here, \*(L"=head1 Foo\*(R" and \*(L"=cut\*(R" are command paragraphs because the first
299line of each matches \f(CW\*(C`m/\eA=[a\-zA\-Z]/\*(C'\fR. "\fI[space][space]\fR$foo\->bar\*(L"
300is a verbatim paragraph, because its first line starts with a literal
301whitespace character (and there's no \*(R"=begin\*(L"...\*(R"=end" region around).
302.PP
303The "=begin \fIidentifier\fR\*(L" ... \*(R"=end \fIidentifier\fR" commands stop
304paragraphs that they surround from being parsed as data or verbatim
305paragraphs, if \fIidentifier\fR doesn't begin with a colon. This
306is discussed in detail in the section
307\&\*(L"About Data Paragraphs and \*(R"=begin/=end\*(L" Regions\*(R".
308.SH "Pod Commands"
309.IX Header "Pod Commands"
310This section is intended to supplement and clarify the discussion in
311\&\*(L"Command Paragraph\*(R" in perlpod. These are the currently recognized
312Pod commands:
313.ie n .IP """=head1"", ""=head2"", ""=head3"", ""=head4""" 4
314.el .IP "``=head1'', ``=head2'', ``=head3'', ``=head4''" 4
315.IX Item "=head1, =head2, =head3, =head4"
316This command indicates that the text in the remainder of the paragraph
317is a heading. That text may contain formatting codes. Examples:
318.Sp
319.Vb 1
320\& =head1 Object Attributes
321.Ve
322.Sp
323.Vb 1
324\& =head3 What B<Not> to Do!
325.Ve
326.ie n .IP """=pod""" 4
327.el .IP "``=pod''" 4
328.IX Item "=pod"
329This command indicates that this paragraph begins a Pod block. (If we
330are already in the middle of a Pod block, this command has no effect at
331all.) If there is any text in this command paragraph after \*(L"=pod\*(R",
332it must be ignored. Examples:
333.Sp
334.Vb 1
335\& =pod
336.Ve
337.Sp
338.Vb 1
339\& This is a plain Pod paragraph.
340.Ve
341.Sp
342.Vb 1
343\& =pod This text is ignored.
344.Ve
345.ie n .IP """=cut""" 4
346.el .IP "``=cut''" 4
347.IX Item "=cut"
348This command indicates that this line is the end of this previously
349started Pod block. If there is any text after \*(L"=cut\*(R" on the line, it must be
350ignored. Examples:
351.Sp
352.Vb 1
353\& =cut
354.Ve
355.Sp
356.Vb 1
357\& =cut The documentation ends here.
358.Ve
359.Sp
360.Vb 3
361\& =cut
362\& # This is the first line of program text.
363\& sub foo { # This is the second.
364.Ve
365.Sp
366It is an error to try to \fIstart\fR a Pod block with a \*(L"=cut\*(R" command. In
367that case, the Pod processor must halt parsing of the input file, and
368must by default emit a warning.
369.ie n .IP """=over""" 4
370.el .IP "``=over''" 4
371.IX Item "=over"
372This command indicates that this is the start of a list/indent
373region. If there is any text following the \*(L"=over\*(R", it must consist
374of only a nonzero positive numeral. The semantics of this numeral is
375explained in the \*(L"About =over...=back Regions\*(R" section, further
376below. Formatting codes are not expanded. Examples:
377.Sp
378.Vb 1
379\& =over 3
380.Ve
381.Sp
382.Vb 1
383\& =over 3.5
384.Ve
385.Sp
386.Vb 1
387\& =over
388.Ve
389.ie n .IP """=item""" 4
390.el .IP "``=item''" 4
391.IX Item "=item"
392This command indicates that an item in a list begins here. Formatting
393codes are processed. The semantics of the (optional) text in the
394remainder of this paragraph are
395explained in the \*(L"About =over...=back Regions\*(R" section, further
396below. Examples:
397.Sp
398.Vb 1
399\& =item
400.Ve
401.Sp
402.Vb 1
403\& =item *
404.Ve
405.Sp
406.Vb 1
407\& =item *
408.Ve
409.Sp
410.Vb 1
411\& =item 14
412.Ve
413.Sp
414.Vb 1
415\& =item 3.
416.Ve
417.Sp
418.Vb 1
419\& =item C<< $thing->stuff(I<dodad>) >>
420.Ve
421.Sp
422.Vb 2
423\& =item For transporting us beyond seas to be tried for pretended
424\& offenses
425.Ve
426.Sp
427.Vb 5
428\& =item He is at this time transporting large armies of foreign
429\& mercenaries to complete the works of death, desolation and
430\& tyranny, already begun with circumstances of cruelty and perfidy
431\& scarcely paralleled in the most barbarous ages, and totally
432\& unworthy the head of a civilized nation.
433.Ve
434.ie n .IP """=back""" 4
435.el .IP "``=back''" 4
436.IX Item "=back"
437This command indicates that this is the end of the region begun
438by the most recent \*(L"=over\*(R" command. It permits no text after the
439\&\*(L"=back\*(R" command.
440.ie n .IP """=begin formatname""" 4
441.el .IP "``=begin formatname''" 4
442.IX Item "=begin formatname"
443This marks the following paragraphs (until the matching \*(L"=end
444formatname\*(R") as being for some special kind of processing. Unless
445\&\*(L"formatname\*(R" begins with a colon, the contained non-command
446paragraphs are data paragraphs. But if \*(L"formatname\*(R" \fIdoes\fR begin
447with a colon, then non-command paragraphs are ordinary paragraphs
448or data paragraphs. This is discussed in detail in the section
449\&\*(L"About Data Paragraphs and \*(R"=begin/=end\*(L" Regions\*(R".
450.Sp
451It is advised that formatnames match the regexp
452\&\f(CW\*(C`m/\eA:?[\-a\-zA\-Z0\-9_]+\ez/\*(C'\fR. Implementors should anticipate future
453expansion in the semantics and syntax of the first parameter
454to \*(L"=begin\*(R"/\*(L"=end\*(R"/\*(L"=for\*(R".
455.ie n .IP """=end formatname""" 4
456.el .IP "``=end formatname''" 4
457.IX Item "=end formatname"
458This marks the end of the region opened by the matching
459\&\*(L"=begin formatname\*(R" region. If \*(L"formatname\*(R" is not the formatname
460of the most recent open \*(L"=begin formatname\*(R" region, then this
461is an error, and must generate an error message. This
462is discussed in detail in the section
463\&\*(L"About Data Paragraphs and \*(R"=begin/=end\*(L" Regions\*(R".
464.ie n .IP """=for formatname text...""" 4
465.el .IP "``=for formatname text...''" 4
466.IX Item "=for formatname text..."
467This is synonymous with:
468.Sp
469.Vb 1
470\& =begin formatname
471.Ve
472.Sp
473.Vb 1
474\& text...
475.Ve
476.Sp
477.Vb 1
478\& =end formatname
479.Ve
480.Sp
481That is, it creates a region consisting of a single paragraph; that
482paragraph is to be treated as a normal paragraph if \*(L"formatname\*(R"
483begins with a \*(L":\*(R"; if \*(L"formatname\*(R" \fIdoesn't\fR begin with a colon,
484then \*(L"text...\*(R" will constitute a data paragraph. There is no way
485to use \*(L"=for formatname text...\*(R" to express \*(L"text...\*(R" as a verbatim
486paragraph.
487.ie n .IP """=encoding encodingname""" 4
488.el .IP "``=encoding encodingname''" 4
489.IX Item "=encoding encodingname"
490This command, which should occur early in the document (at least
491before any non-US-ASCII data!), declares that this document is
492encoded in the encoding \fIencodingname\fR, which must be
493an encoding name that Encoding recognizes. (Encoding's list
494of supported encodings, in Encoding::Supported, is useful here.)
495If the Pod parser cannot decode the declared encoding, it
496should emit a warning and may abort parsing the document
497altogether.
498.Sp
499A document having more than one \*(L"=encoding\*(R" line should be
500considered an error. Pod processors may silently tolerate this if
501the not-first \*(L"=encoding\*(R" lines are just duplicates of the
502first one (e.g., if there's a \*(L"=use utf8\*(R" line, and later on
503another \*(L"=use utf8\*(R" line). But Pod processors should complain if
504there are contradictory \*(L"=encoding\*(R" lines in the same document
505(e.g., if there is a \*(L"=encoding utf8\*(R" early in the document and
506\&\*(L"=encoding big5\*(R" later). Pod processors that recognize BOMs
507may also complain if they see an \*(L"=encoding\*(R" line
508that contradicts the \s-1BOM\s0 (e.g., if a document with a \s-1UTF\-16LE\s0
509\&\s-1BOM\s0 has an \*(L"=encoding shiftjis\*(R" line).
510.PP
511If a Pod processor sees any command other than the ones listed
512above (like \*(L"=head\*(R", or \*(L"=haed1\*(R", or \*(L"=stuff\*(R", or \*(L"=cuttlefish\*(R",
513or \*(L"=w123\*(R"), that processor must by default treat this as an
514error. It must not process the paragraph beginning with that
515command, must by default warn of this as an error, and may
516abort the parse. A Pod parser may allow a way for particular
517applications to add to the above list of known commands, and to
518stipulate, for each additional command, whether formatting
519codes should be processed.
520.PP
521Future versions of this specification may add additional
522commands.
523.SH "Pod Formatting Codes"
524.IX Header "Pod Formatting Codes"
525(Note that in previous drafts of this document and of perlpod,
526formatting codes were referred to as \*(L"interior sequences\*(R", and
527this term may still be found in the documentation for Pod parsers,
528and in error messages from Pod processors.)
529.PP
530There are two syntaxes for formatting codes:
531.IP "\(bu" 4
532A formatting code starts with a capital letter (just US-ASCII [A\-Z])
533followed by a \*(L"<\*(R", any number of characters, and ending with the first
534matching \*(L">\*(R". Examples:
535.Sp
536.Vb 1
537\& That's what I<you> think!
538.Ve
539.Sp
540.Vb 1
541\& What's C<dump()> for?
542.Ve
543.Sp
544.Vb 1
545\& X<C<chmod> and C<unlink()> Under Different Operating Systems>
546.Ve
547.IP "\(bu" 4
548A formatting code starts with a capital letter (just US-ASCII [A\-Z])
549followed by two or more \*(L"<\*(R"'s, one or more whitespace characters,
550any number of characters, one or more whitespace characters,
551and ending with the first matching sequence of two or more \*(L">\*(R"'s, where
552the number of \*(L">\*(R"'s equals the number of \*(L"<\*(R"'s in the opening of this
553formatting code. Examples:
554.Sp
555.Vb 1
556\& That's what I<< you >> think!
557.Ve
558.Sp
559.Vb 1
560\& C<<< open(X, ">>thing.dat") || die $! >>>
561.Ve
562.Sp
563.Vb 1
564\& B<< $foo->bar(); >>
565.Ve
566.Sp
567With this syntax, the whitespace character(s) after the "C<<<\*(L"
568and before the \*(R">>" (or whatever letter) are \fInot\fR renderable \*(-- they
569do not signify whitespace, are merely part of the formatting codes
570themselves. That is, these are all synonymous:
571.Sp
572.Vb 7
573\& C<thing>
574\& C<< thing >>
575\& C<< thing >>
576\& C<<< thing >>>
577\& C<<<<
578\& thing
579\& >>>>
580.Ve
581.Sp
582and so on.
583.PP
584In parsing Pod, a notably tricky part is the correct parsing of
585(potentially nested!) formatting codes. Implementors should
586consult the code in the \f(CW\*(C`parse_text\*(C'\fR routine in Pod::Parser as an
587example of a correct implementation.
588.ie n .IP """I<text>"" \*(-- italic text" 4
589.el .IP "\f(CWI<text>\fR \*(-- italic text" 4
590.IX Item "I<text> italic text"
591See the brief discussion in \*(L"Formatting Codes\*(R" in perlpod.
592.ie n .IP """B<text>"" \*(-- bold text" 4
593.el .IP "\f(CWB<text>\fR \*(-- bold text" 4
594.IX Item "B<text> bold text"
595See the brief discussion in \*(L"Formatting Codes\*(R" in perlpod.
596.ie n .IP """C<code>"" \*(-- code text" 4
597.el .IP "\f(CWC<code>\fR \*(-- code text" 4
598.IX Item "C<code> code text"
599See the brief discussion in \*(L"Formatting Codes\*(R" in perlpod.
600.ie n .IP """F<filename>"" \*(-- style for filenames" 4
601.el .IP "\f(CWF<filename>\fR \*(-- style for filenames" 4
602.IX Item "F<filename> style for filenames"
603See the brief discussion in \*(L"Formatting Codes\*(R" in perlpod.
604.ie n .IP """X<topic name>"" \*(-- an index entry" 4
605.el .IP "\f(CWX<topic name>\fR \*(-- an index entry" 4
606.IX Item "X<topic name> an index entry"
607See the brief discussion in \*(L"Formatting Codes\*(R" in perlpod.
608.Sp
609This code is unusual in that most formatters completely discard
610this code and its content. Other formatters will render it with
611invisible codes that can be used in building an index of
612the current document.
613.ie n .IP """Z<>"" \*(-- a null (zero\-effect) formatting code" 4
614.el .IP "\f(CWZ<>\fR \*(-- a null (zero\-effect) formatting code" 4
615.IX Item "Z<> a null (zero-effect) formatting code"
616Discussed briefly in \*(L"Formatting Codes\*(R" in perlpod.
617.Sp
618This code is unusual is that it should have no content. That is,
619a processor may complain if it sees \f(CW\*(C`Z<potatoes>\*(C'\fR. Whether
620or not it complains, the \fIpotatoes\fR text should ignored.
621.ie n .IP """L<name>"" \*(-- a hyperlink" 4
622.el .IP "\f(CWL<name>\fR \*(-- a hyperlink" 4
623.IX Item "L<name> a hyperlink"
624The complicated syntaxes of this code are discussed at length in
625\&\*(L"Formatting Codes\*(R" in perlpod, and implementation details are
626discussed below, in "About L<...> Codes". Parsing the
627contents of L<content> is tricky. Notably, the content has to be
628checked for whether it looks like a \s-1URL\s0, or whether it has to be split
629on literal \*(L"|\*(R" and/or \*(L"/\*(R" (in the right order!), and so on,
630\&\fIbefore\fR E<...> codes are resolved.
631.ie n .IP """E<escape>"" \*(-- a character escape" 4
632.el .IP "\f(CWE<escape>\fR \*(-- a character escape" 4
633.IX Item "E<escape> a character escape"
634See \*(L"Formatting Codes\*(R" in perlpod, and several points in
635\&\*(L"Notes on Implementing Pod Processors\*(R".
636.ie n .IP """S<text>"" \*(-- text contains non-breaking spaces" 4
637.el .IP "\f(CWS<text>\fR \*(-- text contains non-breaking spaces" 4
638.IX Item "S<text> text contains non-breaking spaces"
639This formatting code is syntactically simple, but semantically
640complex. What it means is that each space in the printable
641content of this code signifies a non-breaking space.
642.Sp
643Consider:
644.Sp
645.Vb 1
646\& C<$x ? $y : $z>
647.Ve
648.Sp
649.Vb 1
650\& S<C<$x ? $y : $z>>
651.Ve
652.Sp
653Both signify the monospace (c[ode] style) text consisting of
654\&\*(L"$x\*(R", one space, \*(L"?\*(R", one space, \*(L":\*(R", one space, \*(L"$z\*(R". The
655difference is that in the latter, with the S code, those spaces
656are not \*(L"normal\*(R" spaces, but instead are non-breaking spaces.
657.PP
658If a Pod processor sees any formatting code other than the ones
659listed above (as in "N<...>\*(L", or \*(R"Q<...>", etc.), that
660processor must by default treat this as an error.
661A Pod parser may allow a way for particular
662applications to add to the above list of known formatting codes;
663a Pod parser might even allow a way to stipulate, for each additional
664command, whether it requires some form of special processing, as
665L<...> does.
666.PP
667Future versions of this specification may add additional
668formatting codes.
669.PP
670Historical note: A few older Pod processors would not see a \*(L">\*(R" as
671closing a "C<\*(L" code, if the \*(R">\*(L" was immediately preceded by
672a \*(R"\-". This was so that this:
673.PP
674.Vb 1
675\& C<$foo->bar>
676.Ve
677.PP
678would parse as equivalent to this:
679.PP
680.Vb 1
681\& C<$foo-E<gt>bar>
682.Ve
683.PP
684instead of as equivalent to a \*(L"C\*(R" formatting code containing
685only \*(L"$foo\-\*(R", and then a \*(L"bar>\*(R" outside the \*(L"C\*(R" formatting code. This
686problem has since been solved by the addition of syntaxes like this:
687.PP
688.Vb 1
689\& C<< $foo->bar >>
690.Ve
691.PP
692Compliant parsers must not treat \*(L"\->\*(R" as special.
693.PP
694Formatting codes absolutely cannot span paragraphs. If a code is
695opened in one paragraph, and no closing code is found by the end of
696that paragraph, the Pod parser must close that formatting code,
697and should complain (as in \*(L"Unterminated I code in the paragraph
698starting at line 123: 'Time objects are not...'\*(R"). So these
699two paragraphs:
700.PP
701.Vb 1
702\& I<I told you not to do this!
703.Ve
704.PP
705.Vb 1
706\& Don't make me say it again!>
707.Ve
708.PP
709\&...must \fInot\fR be parsed as two paragraphs in italics (with the I
710code starting in one paragraph and starting in another.) Instead,
711the first paragraph should generate a warning, but that aside, the
712above code must parse as if it were:
713.PP
714.Vb 1
715\& I<I told you not to do this!>
716.Ve
717.PP
718.Vb 1
719\& Don't make me say it again!E<gt>
720.Ve
721.PP
722(In SGMLish jargon, all Pod commands are like block-level
723elements, whereas all Pod formatting codes are like inline-level
724elements.)
725.SH "Notes on Implementing Pod Processors"
726.IX Header "Notes on Implementing Pod Processors"
727The following is a long section of miscellaneous requirements
728and suggestions to do with Pod processing.
729.IP "\(bu" 4
730Pod formatters should tolerate lines in verbatim blocks that are of
731any length, even if that means having to break them (possibly several
732times, for very long lines) to avoid text running off the side of the
733page. Pod formatters may warn of such line\-breaking. Such warnings
734are particularly appropriate for lines are over 100 characters long, which
735are usually not intentional.
736.IP "\(bu" 4
737Pod parsers must recognize \fIall\fR of the three well-known newline
738formats: \s-1CR\s0, \s-1LF\s0, and \s-1CRLF\s0. See perlport.
739.IP "\(bu" 4
740Pod parsers should accept input lines that are of any length.
741.IP "\(bu" 4
742Since Perl recognizes a Unicode Byte Order Mark at the start of files
743as signaling that the file is Unicode encoded as in \s-1UTF\-16\s0 (whether
744big-endian or little\-endian) or \s-1UTF\-8\s0, Pod parsers should do the
745same. Otherwise, the character encoding should be understood as
746being \s-1UTF\-8\s0 if the first highbit byte sequence in the file seems
747valid as a \s-1UTF\-8\s0 sequence, or otherwise as Latin\-1.
748.Sp
749Future versions of this specification may specify
750how Pod can accept other encodings. Presumably treatment of other
751encodings in Pod parsing would be as in \s-1XML\s0 parsing: whatever the
752encoding declared by a particular Pod file, content is to be
753stored in memory as Unicode characters.
754.IP "\(bu" 4
755The well known Unicode Byte Order Marks are as follows: if the
756file begins with the two literal byte values 0xFE 0xFF, this is
757the \s-1BOM\s0 for big-endian \s-1UTF\-16\s0. If the file begins with the two
758literal byte value 0xFF 0xFE, this is the \s-1BOM\s0 for little-endian
759\&\s-1UTF\-16\s0. If the file begins with the three literal byte values
7600xEF 0xBB 0xBF, this is the \s-1BOM\s0 for \s-1UTF\-8\s0.
761.IP "\(bu" 4
762A naive but sufficient heuristic for testing the first highbit
763byte-sequence in a BOM-less file (whether in code or in Pod!), to see
764whether that sequence is valid as \s-1UTF\-8\s0 (\s-1RFC\s0 2279) is to check whether
765that the first byte in the sequence is in the range 0xC0 \- 0xFD
766\&\fIand\fR whether the next byte is in the range
7670x80 \- 0xBF. If so, the parser may conclude that this file is in
768\&\s-1UTF\-8\s0, and all highbit sequences in the file should be assumed to
769be \s-1UTF\-8\s0. Otherwise the parser should treat the file as being
770in Latin\-1. In the unlikely circumstance that the first highbit
771sequence in a truly non\-UTF\-8 file happens to appear to be \s-1UTF\-8\s0, one
772can cater to our heuristic (as well as any more intelligent heuristic)
773by prefacing that line with a comment line containing a highbit
774sequence that is clearly \fInot\fR valid as \s-1UTF\-8\s0. A line consisting
775of simply \*(L"#\*(R", an e\-acute, and any non-highbit byte,
776is sufficient to establish this file's encoding.
777.IP "\(bu" 4
778This document's requirements and suggestions about encodings
779do not apply to Pod processors running on non-ASCII platforms,
780notably \s-1EBCDIC\s0 platforms.
781.IP "\(bu" 4
782Pod processors must treat a \*(L"=for [label] [content...]\*(R" paragraph as
783meaning the same thing as a \*(L"=begin [label]\*(R" paragraph, content, and
784an \*(L"=end [label]\*(R" paragraph. (The parser may conflate these two
785constructs, or may leave them distinct, in the expectation that the
786formatter will nevertheless treat them the same.)
787.IP "\(bu" 4
788When rendering Pod to a format that allows comments (i.e., to nearly
789any format other than plaintext), a Pod formatter must insert comment
790text identifying its name and version number, and the name and
791version numbers of any modules it might be using to process the Pod.
792Minimal examples:
793.Sp
794.Vb 1
795\& %% POD::Pod2PS v3.14159, using POD::Parser v1.92
796.Ve
797.Sp
798.Vb 1
799\& <!-- Pod::HTML v3.14159, using POD::Parser v1.92 -->
800.Ve
801.Sp
802.Vb 1
803\& {\edoccomm generated by Pod::Tree::RTF 3.14159 using Pod::Tree 1.08}
804.Ve
805.Sp
806.Vb 1
807\& .\e" Pod::Man version 3.14159, using POD::Parser version 1.92
808.Ve
809.Sp
810Formatters may also insert additional comments, including: the
811release date of the Pod formatter program, the contact address for
812the author(s) of the formatter, the current time, the name of input
813file, the formatting options in effect, version of Perl used, etc.
814.Sp
815Formatters may also choose to note errors/warnings as comments,
816besides or instead of emitting them otherwise (as in messages to
817\&\s-1STDERR\s0, or \f(CW\*(C`die\*(C'\fRing).
818.IP "\(bu" 4
819Pod parsers \fImay\fR emit warnings or error messages ("Unknown E code
820E<zslig>!") to \s-1STDERR\s0 (whether through printing to \s-1STDERR\s0, or
821\&\f(CW\*(C`warn\*(C'\fRing/\f(CW\*(C`carp\*(C'\fRing, or \f(CW\*(C`die\*(C'\fRing/\f(CW\*(C`croak\*(C'\fRing), but \fImust\fR allow
822suppressing all such \s-1STDERR\s0 output, and instead allow an option for
823reporting errors/warnings
824in some other way, whether by triggering a callback, or noting errors
825in some attribute of the document object, or some similarly unobtrusive
826mechanism \*(-- or even by appending a \*(L"Pod Errors\*(R" section to the end of
827the parsed form of the document.
828.IP "\(bu" 4
829In cases of exceptionally aberrant documents, Pod parsers may abort the
830parse. Even then, using \f(CW\*(C`die\*(C'\fRing/\f(CW\*(C`croak\*(C'\fRing is to be avoided; where
831possible, the parser library may simply close the input file
832and add text like \*(L"*** Formatting Aborted ***\*(R" to the end of the
833(partial) in-memory document.
834.IP "\(bu" 4
835In paragraphs where formatting codes (like E<...>, B<...>)
836are understood (i.e., \fInot\fR verbatim paragraphs, but \fIincluding\fR
837ordinary paragraphs, and command paragraphs that produce renderable
838text, like \*(L"=head1\*(R"), literal whitespace should generally be considered
839\&\*(L"insignificant\*(R", in that one literal space has the same meaning as any
840(nonzero) number of literal spaces, literal newlines, and literal tabs
841(as long as this produces no blank lines, since those would terminate
842the paragraph). Pod parsers should compact literal whitespace in each
843processed paragraph, but may provide an option for overriding this
844(since some processing tasks do not require it), or may follow
845additional special rules (for example, specially treating
846period-space-space or period-newline sequences).
847.IP "\(bu" 4
848Pod parsers should not, by default, try to coerce apostrophe (') and
849quote (\*(L") into smart quotes (little 9's, 66's, 99's, etc), nor try to
850turn backtick (`) into anything else but a single backtick character
851(distinct from an openquote character!), nor \*(R"\-\-" into anything but
852two minus signs. They \fImust never\fR do any of those things to text
853in C<...> formatting codes, and never \fIever\fR to text in verbatim
854paragraphs.
855.IP "\(bu" 4
856When rendering Pod to a format that has two kinds of hyphens (\-), one
857that's a non-breaking hyphen, and another that's a breakable hyphen
858(as in \*(L"object\-oriented\*(R", which can be split across lines as
859\&\*(L"object\-\*(R", newline, \*(L"oriented\*(R"), formatters are encouraged to
860generally translate \*(L"\-\*(R" to non-breaking hyphen, but may apply
861heuristics to convert some of these to breaking hyphens.
862.IP "\(bu" 4
863Pod formatters should make reasonable efforts to keep words of Perl
864code from being broken across lines. For example, \*(L"Foo::Bar\*(R" in some
865formatting systems is seen as eligible for being broken across lines
866as \*(L"Foo::\*(R" newline \*(L"Bar\*(R" or even \*(L"Foo::\-\*(R" newline \*(L"Bar\*(R". This should
867be avoided where possible, either by disabling all line-breaking in
868mid\-word, or by wrapping particular words with internal punctuation
869in \*(L"don't break this across lines\*(R" codes (which in some formats may
870not be a single code, but might be a matter of inserting non-breaking
871zero-width spaces between every pair of characters in a word.)
872.IP "\(bu" 4
873Pod parsers should, by default, expand tabs in verbatim paragraphs as
874they are processed, before passing them to the formatter or other
875processor. Parsers may also allow an option for overriding this.
876.IP "\(bu" 4
877Pod parsers should, by default, remove newlines from the end of
878ordinary and verbatim paragraphs before passing them to the
879formatter. For example, while the paragraph you're reading now
880could be considered, in Pod source, to end with (and contain)
881the newline(s) that end it, it should be processed as ending with
882(and containing) the period character that ends this sentence.
883.IP "\(bu" 4
884Pod parsers, when reporting errors, should make some effort to report
885an approximate line number ("Nested E<>'s in Paragraph #52, near
886line 633 of Thing/Foo.pm!\*(L"), instead of merely noting the paragraph
887number (\*(R"Nested E<>'s in Paragraph #52 of Thing/Foo.pm!\*(L"). Where
888this is problematic, the paragraph number should at least be
889accompanied by an excerpt from the paragraph (\*(R"Nested E<>'s in
890Paragraph #52 of Thing/Foo.pm, which begins 'Read/write accessor for
891the C<interest rate> attribute...'").
892.IP "\(bu" 4
893Pod parsers, when processing a series of verbatim paragraphs one
894after another, should consider them to be one large verbatim
895paragraph that happens to contain blank lines. I.e., these two
896lines, which have a blank line between them:
897.Sp
898.Vb 1
899\& use Foo;
900.Ve
901.Sp
902.Vb 1
903\& print Foo->VERSION
904.Ve
905.Sp
906should be unified into one paragraph (\*(L"\etuse Foo;\en\en\etprint
907Foo\->\s-1VERSION\s0\*(R") before being passed to the formatter or other
908processor. Parsers may also allow an option for overriding this.
909.Sp
910While this might be too cumbersome to implement in event-based Pod
911parsers, it is straightforward for parsers that return parse trees.
912.IP "\(bu" 4
913Pod formatters, where feasible, are advised to avoid splitting short
914verbatim paragraphs (under twelve lines, say) across pages.
915.IP "\(bu" 4
916Pod parsers must treat a line with only spaces and/or tabs on it as a
917\&\*(L"blank line\*(R" such as separates paragraphs. (Some older parsers
918recognized only two adjacent newlines as a \*(L"blank line\*(R" but would not
919recognize a newline, a space, and a newline, as a blank line. This
920is noncompliant behavior.)
921.IP "\(bu" 4
922Authors of Pod formatters/processors should make every effort to
923avoid writing their own Pod parser. There are already several in
924\&\s-1CPAN\s0, with a wide range of interface styles \*(-- and one of them,
925Pod::Parser, comes with modern versions of Perl.
926.IP "\(bu" 4
927Characters in Pod documents may be conveyed either as literals, or by
928number in E<n> codes, or by an equivalent mnemonic, as in
929E<eacute> which is exactly equivalent to E<233>.
930.Sp
931Characters in the range 32\-126 refer to those well known US-ASCII
932characters (also defined there by Unicode, with the same meaning),
933which all Pod formatters must render faithfully. Characters
934in the ranges 0\-31 and 127\-159 should not be used (neither as
935literals, nor as E<number> codes), except for the
936literal byte-sequences for newline (13, 13 10, or 10), and tab (9).
937.Sp
938Characters in the range 160\-255 refer to Latin\-1 characters (also
939defined there by Unicode, with the same meaning). Characters above
940255 should be understood to refer to Unicode characters.
941.IP "\(bu" 4
942Be warned
943that some formatters cannot reliably render characters outside 32\-126;
944and many are able to handle 32\-126 and 160\-255, but nothing above
945255.
946.IP "\(bu" 4
947Besides the well-known "E<lt>\*(L" and \*(R"E<gt>\*(L" codes for
948less-than and greater\-than, Pod parsers must understand \*(R"E<sol>\*(L"
949for \*(R"/\*(L" (solidus, slash), and \*(R"E<verbar>\*(L" for \*(R"|\*(L" (vertical bar,
950pipe). Pod parsers should also understand \*(R"E<lchevron>\*(L" and
951\&\*(R"E<rchevron>\*(L" as legacy codes for characters 171 and 187, i.e.,
952\&\*(R"left\-pointing double angle quotation mark\*(L" = \*(R"left pointing
953guillemet\*(L" and \*(R"right\-pointing double angle quotation mark\*(L" = \*(R"right
954pointing guillemet\*(L". (These look like little \*(R"<<\*(L" and \*(R">>\*(L", and they
955are now preferably expressed with the \s-1HTML/XHTML\s0 codes \*(R"E<laquo>\*(L"
956and \*(R"E<raquo>".)
957.IP "\(bu" 4
958Pod parsers should understand all "E<html>" codes as defined
959in the entity declarations in the most recent \s-1XHTML\s0 specification at
960\&\f(CW\*(C`www.W3.org\*(C'\fR. Pod parsers must understand at least the entities
961that define characters in the range 160\-255 (Latin\-1). Pod parsers,
962when faced with some unknown "E<\fIidentifier\fR>" code,
963shouldn't simply replace it with nullstring (by default, at least),
964but may pass it through as a string consisting of the literal characters
965E, less\-than, \fIidentifier\fR, greater\-than. Or Pod parsers may offer the
966alternative option of processing such unknown
967"E<\fIidentifier\fR>\*(L" codes by firing an event especially
968for such codes, or by adding a special node-type to the in-memory
969document tree. Such \*(R"E<\fIidentifier\fR>" may have special meaning
970to some processors, or some processors may choose to add them to
971a special error report.
972.IP "\(bu" 4
973Pod parsers must also support the \s-1XHTML\s0 codes "E<quot>\*(L" for
974character 34 (doublequote, \*(R"), "E<amp>\*(L" for character 38
975(ampersand, &), and \*(R"E<apos>" for character 39 (apostrophe, ').
976.IP "\(bu" 4
977Note that in all cases of "E<whatever>", \fIwhatever\fR (whether
978an htmlname, or a number in any base) must consist only of
979alphanumeric characters \*(-- that is, \fIwhatever\fR must watch
980\&\f(CW\*(C`m/\eA\ew+\ez/\*(C'\fR. So "E< 0 1 2 3 >" is invalid, because
981it contains spaces, which aren't alphanumeric characters. This
982presumably does not \fIneed\fR special treatment by a Pod processor;
983\&\*(L" 0 1 2 3 \*(R" doesn't look like a number in any base, so it would
984presumably be looked up in the table of HTML-like names. Since
985there isn't (and cannot be) an HTML-like entity called \*(L" 0 1 2 3 \*(R",
986this will be treated as an error. However, Pod processors may
987treat "E< 0 1 2 3 >\*(L" or \*(R"E<e\-acute>" as \fIsyntactically\fR
988invalid, potentially earning a different error message than the
989error message (or warning, or event) generated by a merely unknown
990(but theoretically valid) htmlname, as in "E<qacute>"
991[sic]. However, Pod parsers are not required to make this
992distinction.
993.IP "\(bu" 4
994Note that E<number> \fImust not\fR be interpreted as simply
995"codepoint \fInumber\fR in the current/native character set\*(L". It always
996means only \*(R"the character represented by codepoint \fInumber\fR in
997Unicode." (This is identical to the semantics of &#\fInumber\fR; in \s-1XML\s0.)
998.Sp
999This will likely require many formatters to have tables mapping from
1000treatable Unicode codepoints (such as the \*(L"\exE9\*(R" for the e\-acute
1001character) to the escape sequences or codes necessary for conveying
1002such sequences in the target output format. A converter to *roff
1003would, for example know that \*(L"\exE9\*(R" (whether conveyed literally, or via
1004a E<...> sequence) is to be conveyed as \*(L"e\e\e*'\*(R".
1005Similarly, a program rendering Pod in a Mac \s-1OS\s0 application window, would
1006presumably need to know that \*(L"\exE9\*(R" maps to codepoint 142 in MacRoman
1007encoding that (at time of writing) is native for Mac \s-1OS\s0. Such
1008Unicode2whatever mappings are presumably already widely available for
1009common output formats. (Such mappings may be incomplete! Implementers
1010are not expected to bend over backwards in an attempt to render
1011Cherokee syllabics, Etruscan runes, Byzantine musical symbols, or any
1012of the other weird things that Unicode can encode.) And
1013if a Pod document uses a character not found in such a mapping, the
1014formatter should consider it an unrenderable character.
1015.IP "\(bu" 4
1016If, surprisingly, the implementor of a Pod formatter can't find a
1017satisfactory pre-existing table mapping from Unicode characters to
1018escapes in the target format (e.g., a decent table of Unicode
1019characters to *roff escapes), it will be necessary to build such a
1020table. If you are in this circumstance, you should begin with the
1021characters in the range 0x00A0 \- 0x00FF, which is mostly the heavily
1022used accented characters. Then proceed (as patience permits and
1023fastidiousness compels) through the characters that the (X)HTML
1024standards groups judged important enough to merit mnemonics
1025for. These are declared in the (X)HTML specifications at the
1026www.W3.org site. At time of writing (September 2001), the most recent
1027entity declaration files are:
1028.Sp
1029.Vb 3
1030\& http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent
1031\& http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent
1032\& http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent
1033.Ve
1034.Sp
1035Then you can progress through any remaining notable Unicode characters
1036in the range 0x2000\-0x204D (consult the character tables at
1037www.unicode.org), and whatever else strikes your fancy. For example,
1038in \fIxhtml\-symbol.ent\fR, there is the entry:
1039.Sp
1040.Vb 1
1041\& <!ENTITY infin "&#8734;"> <!-- infinity, U+221E ISOtech -->
1042.Ve
1043.Sp
1044While the mapping \*(L"infin\*(R" to the character \*(L"\ex{221E}\*(R" will (hopefully)
1045have been already handled by the Pod parser, the presence of the
1046character in this file means that it's reasonably important enough to
1047include in a formatter's table that maps from notable Unicode characters
1048to the codes necessary for rendering them. So for a Unicode\-to\-*roff
1049mapping, for example, this would merit the entry:
1050.Sp
1051.Vb 1
1052\& "\ex{221E}" => '\e(in',
1053.Ve
1054.Sp
1055It is eagerly hoped that in the future, increasing numbers of formats
1056(and formatters) will support Unicode characters directly (as (X)HTML
1057does with \f(CW\*(C`&infin;\*(C'\fR, \f(CW\*(C`&#8734;\*(C'\fR, or \f(CW\*(C`&#x221E;\*(C'\fR), reducing the need
1058for idiosyncratic mappings of Unicode\-to\-\fImy_escapes\fR.
1059.IP "\(bu" 4
1060It is up to individual Pod formatter to display good judgment when
1061confronted with an unrenderable character (which is distinct from an
1062unknown E<thing> sequence that the parser couldn't resolve to
1063anything, renderable or not). It is good practice to map Latin letters
1064with diacritics (like "E<eacute>\*(L"/\*(R"E<233>\*(L") to the corresponding
1065unaccented US-ASCII letters (like a simple character 101, \*(R"e\*(L"), but
1066clearly this is often not feasible, and an unrenderable character may
1067be represented as \*(R"?", or the like. In attempting a sane fallback
1068(as from E<233> to \*(L"e\*(R"), Pod formatters may use the
1069\&\f(CW%Latin1Code_to_fallback\fR table in Pod::Escapes, or
1070Text::Unidecode, if available.
1071.Sp
1072For example, this Pod text:
1073.Sp
1074.Vb 1
1075\& magic is enabled if you set C<$Currency> to 'E<euro>'.
1076.Ve
1077.Sp
1078may be rendered as:
1079"magic is enabled if you set \f(CW$Currency\fR to '\fI?\fR'\*(L" or as
1080\&\*(R"magic is enabled if you set \f(CW$Currency\fR to '\fB[euro]\fR'\*(L", or as
1081\&\*(R"magic is enabled if you set \f(CW$Currency\fR to '[x20AC]', etc.
1082.Sp
1083A Pod formatter may also note, in a comment or warning, a list of what
1084unrenderable characters were encountered.
1085.IP "\(bu" 4
1086E<...> may freely appear in any formatting code (other than
1087in another E<...> or in an Z<>). That is, "X<The
1088E<euro>1,000,000 Solution>\*(L" is valid, as is \*(R"L<The
1089E<euro>1,000,000 Solution|Million::Euros>".
1090.IP "\(bu" 4
1091Some Pod formatters output to formats that implement non-breaking
1092spaces as an individual character (which I'll call \*(L"\s-1NBSP\s0\*(R"), and
1093others output to formats that implement non-breaking spaces just as
1094spaces wrapped in a \*(L"don't break this across lines\*(R" code. Note that
1095at the level of Pod, both sorts of codes can occur: Pod can contain a
1096\&\s-1NBSP\s0 character (whether as a literal, or as a "E<160>\*(L" or
1097\&\*(R"E<nbsp>\*(L" code); and Pod can contain \*(R"S<foo
1098I<bar> baz>\*(L" codes, where \*(R"mere spaces\*(L" (character 32) in
1099such codes are taken to represent non-breaking spaces. Pod
1100parsers should consider supporting the optional parsing of \*(R"S<foo
1101I<bar> baz>\*(L" as if it were
1102\&\*(R"foo\fI\s-1NBSP\s0\fRI<bar>\fI\s-1NBSP\s0\fRbaz", and, going the other way, the
1103optional parsing of groups of words joined by \s-1NBSP\s0's as if each group
1104were in a S<...> code, so that formatters may use the
1105representation that maps best to what the output format demands.
1106.IP "\(bu" 4
1107Some processors may find that the \f(CW\*(C`S<...>\*(C'\fR code is easiest to
1108implement by replacing each space in the parse tree under the content
1109of the S, with an \s-1NBSP\s0. But note: the replacement should apply \fInot\fR to
1110spaces in \fIall\fR text, but \fIonly\fR to spaces in \fIprintable\fR text. (This
1111distinction may or may not be evident in the particular tree/event
1112model implemented by the Pod parser.) For example, consider this
1113unusual case:
1114.Sp
1115.Vb 1
1116\& S<L</Autoloaded Functions>>
1117.Ve
1118.Sp
1119This means that the space in the middle of the visible link text must
1120not be broken across lines. In other words, it's the same as this:
1121.Sp
1122.Vb 1
1123\& L<"AutoloadedE<160>Functions"/Autoloaded Functions>
1124.Ve
1125.Sp
1126However, a misapplied space-to-NBSP replacement could (wrongly)
1127produce something equivalent to this:
1128.Sp
1129.Vb 1
1130\& L<"AutoloadedE<160>Functions"/AutoloadedE<160>Functions>
1131.Ve
1132.Sp
1133\&...which is almost definitely not going to work as a hyperlink (assuming
1134this formatter outputs a format supporting hypertext).
1135.Sp
1136Formatters may choose to just not support the S format code,
1137especially in cases where the output format simply has no \s-1NBSP\s0
1138character/code and no code for \*(L"don't break this stuff across lines\*(R".
1139.IP "\(bu" 4
1140Besides the \s-1NBSP\s0 character discussed above, implementors are reminded
1141of the existence of the other \*(L"special\*(R" character in Latin\-1, the
1142\&\*(L"soft hyphen\*(R" character, also known as \*(L"discretionary hyphen\*(R",
1143i.e. \f(CW\*(C`E<173>\*(C'\fR = \f(CW\*(C`E<0xAD>\*(C'\fR =
1144\&\f(CW\*(C`E<shy>\*(C'\fR). This character expresses an optional hyphenation
1145point. That is, it normally renders as nothing, but may render as a
1146\&\*(L"\-\*(R" if a formatter breaks the word at that point. Pod formatters
1147should, as appropriate, do one of the following: 1) render this with
1148a code with the same meaning (e.g., \*(L"\e\-\*(R" in \s-1RTF\s0), 2) pass it through
1149in the expectation that the formatter understands this character as
1150such, or 3) delete it.
1151.Sp
1152For example:
1153.Sp
1154.Vb 3
1155\& sigE<shy>action
1156\& manuE<shy>script
1157\& JarkE<shy>ko HieE<shy>taE<shy>nieE<shy>mi
1158.Ve
1159.Sp
1160These signal to a formatter that if it is to hyphenate \*(L"sigaction\*(R"
1161or \*(L"manuscript\*(R", then it should be done as
1162"sig\-\fI[linebreak]\fRaction\*(L" or \*(R"manu\-\fI[linebreak]\fRscript"
1163(and if it doesn't hyphenate it, then the \f(CW\*(C`E<shy>\*(C'\fR doesn't
1164show up at all). And if it is
1165to hyphenate \*(L"Jarkko\*(R" and/or \*(L"Hietaniemi\*(R", it can do
1166so only at the points where there is a \f(CW\*(C`E<shy>\*(C'\fR code.
1167.Sp
1168In practice, it is anticipated that this character will not be used
1169often, but formatters should either support it, or delete it.
1170.IP "\(bu" 4
1171If you think that you want to add a new command to Pod (like, say, a
1172\&\*(L"=biblio\*(R" command), consider whether you could get the same
1173effect with a for or begin/end sequence: \*(L"=for biblio ...\*(R" or \*(L"=begin
1174biblio\*(R" ... \*(L"=end biblio\*(R". Pod processors that don't understand
1175\&\*(L"=for biblio\*(R", etc, will simply ignore it, whereas they may complain
1176loudly if they see \*(L"=biblio\*(R".
1177.IP "\(bu" 4
1178Throughout this document, \*(L"Pod\*(R" has been the preferred spelling for
1179the name of the documentation format. One may also use \*(L"\s-1POD\s0\*(R" or
1180\&\*(L"pod\*(R". For the documentation that is (typically) in the Pod
1181format, you may use \*(L"pod\*(R", or \*(L"Pod\*(R", or \*(L"\s-1POD\s0\*(R". Understanding these
1182distinctions is useful; but obsessing over how to spell them, usually
1183is not.
1184.SH "About L<...> Codes"
1185.IX Header "About L<...> Codes"
1186As you can tell from a glance at perlpod, the L<...>
1187code is the most complex of the Pod formatting codes. The points below
1188will hopefully clarify what it means and how processors should deal
1189with it.
1190.IP "\(bu" 4
1191In parsing an L<...> code, Pod parsers must distinguish at least
1192four attributes:
1193.RS 4
1194.IP "First:" 4
1195.IX Item "First:"
1196The link\-text. If there is none, this must be undef. (E.g., in
1197"L<Perl Functions|perlfunc>\*(L", the link-text is \*(R"Perl Functions\*(L".
1198In \*(R"L<Time::HiRes>\*(L" and even \*(R"L<|Time::HiRes>", there is no
1199link text. Note that link text may contain formatting.)
1200.IP "Second:" 4
1201.IX Item "Second:"
1202The possibly inferred link-text \*(-- i.e., if there was no real link
1203text, then this is the text that we'll infer in its place. (E.g., for
1204"L<Getopt::Std>\*(L", the inferred link text is \*(R"Getopt::Std".)
1205.IP "Third:" 4
1206.IX Item "Third:"
1207The name or \s-1URL\s0, or undef if none. (E.g., in "L<Perl
1208Functions|perlfunc>\*(L", the name \*(-- also sometimes called the page \*(--
1209is \*(R"perlfunc\*(L". In \*(R"L</CAVEATS>", the name is undef.)
1210.IP "Fourth:" 4
1211.IX Item "Fourth:"
1212The section (\s-1AKA\s0 \*(L"item\*(R" in older perlpods), or undef if none. E.g.,
1213in \*(L"\s-1DESCRIPTION\s0\*(R" in Getopt::Std, \*(L"\s-1DESCRIPTION\s0\*(R" is the section. (Note
1214that this is not the same as a manpage section like the \*(L"5\*(R" in \*(L"man 5
1215crontab\*(R". \*(L"Section Foo\*(R" in the Pod sense means the part of the text
1216that's introduced by the heading or item whose text is \*(L"Foo\*(R".)
1217.RE
1218.RS 4
1219.Sp
1220Pod parsers may also note additional attributes including:
1221.IP "Fifth:" 4
1222.IX Item "Fifth:"
1223A flag for whether item 3 (if present) is a \s-1URL\s0 (like
1224\&\*(L"http://lists.perl.org\*(R" is), in which case there should be no section
1225attribute; a Pod name (like \*(L"perldoc\*(R" and \*(L"Getopt::Std\*(R" are); or
1226possibly a man page name (like \*(L"\fIcrontab\fR\|(5)\*(R" is).
1227.IP "Sixth:" 4
1228.IX Item "Sixth:"
1229The raw original L<...> content, before text is split on
1230\&\*(L"|\*(R", \*(L"/\*(R", etc, and before E<...> codes are expanded.
1231.RE
1232.RS 4
1233.Sp
1234(The above were numbered only for concise reference below. It is not
1235a requirement that these be passed as an actual list or array.)
1236.Sp
1237For example:
1238.Sp
1239.Vb 7
1240\& L<Foo::Bar>
1241\& => undef, # link text
1242\& "Foo::Bar", # possibly inferred link text
1243\& "Foo::Bar", # name
1244\& undef, # section
1245\& 'pod', # what sort of link
1246\& "Foo::Bar" # original content
1247.Ve
1248.Sp
1249.Vb 7
1250\& L<Perlport's section on NL's|perlport/Newlines>
1251\& => "Perlport's section on NL's", # link text
1252\& "Perlport's section on NL's", # possibly inferred link text
1253\& "perlport", # name
1254\& "Newlines", # section
1255\& 'pod', # what sort of link
1256\& "Perlport's section on NL's|perlport/Newlines" # orig. content
1257.Ve
1258.Sp
1259.Vb 7
1260\& L<perlport/Newlines>
1261\& => undef, # link text
1262\& '"Newlines" in perlport', # possibly inferred link text
1263\& "perlport", # name
1264\& "Newlines", # section
1265\& 'pod', # what sort of link
1266\& "perlport/Newlines" # original content
1267.Ve
1268.Sp
1269.Vb 7
1270\& L<crontab(5)/"DESCRIPTION">
1271\& => undef, # link text
1272\& '"DESCRIPTION" in crontab(5)', # possibly inferred link text
1273\& "crontab(5)", # name
1274\& "DESCRIPTION", # section
1275\& 'man', # what sort of link
1276\& 'crontab(5)/"DESCRIPTION"' # original content
1277.Ve
1278.Sp
1279.Vb 7
1280\& L</Object Attributes>
1281\& => undef, # link text
1282\& '"Object Attributes"', # possibly inferred link text
1283\& undef, # name
1284\& "Object Attributes", # section
1285\& 'pod', # what sort of link
1286\& "/Object Attributes" # original content
1287.Ve
1288.Sp
1289.Vb 7
1290\& L<http://www.perl.org/>
1291\& => undef, # link text
1292\& "http://www.perl.org/", # possibly inferred link text
1293\& "http://www.perl.org/", # name
1294\& undef, # section
1295\& 'url', # what sort of link
1296\& "http://www.perl.org/" # original content
1297.Ve
1298.Sp
1299Note that you can distinguish URL-links from anything else by the
1300fact that they match \f(CW\*(C`m/\eA\ew+:[^:\es]\eS*\ez/\*(C'\fR. So
1301\&\f(CW\*(C`L<http://www.perl.com>\*(C'\fR is a \s-1URL\s0, but
1302\&\f(CW\*(C`L<HTTP::Response>\*(C'\fR isn't.
1303.RE
1304.IP "\(bu" 4
1305In case of L<...> codes with no \*(L"text|\*(R" part in them,
1306older formatters have exhibited great variation in actually displaying
1307the link or cross reference. For example, L<\fIcrontab\fR\|(5)> would render
1308as "the \f(CWcrontab(5)\fR manpage\*(L", or \*(R"in the \f(CWcrontab(5)\fR manpage\*(L"
1309or just \*(R"\f(CWcrontab(5)\fR".
1310.Sp
1311Pod processors must now treat \*(L"text|\*(R"\-less links as follows:
1312.Sp
1313.Vb 3
1314\& L<name> => L<name|name>
1315\& L</section> => L<"section"|/section>
1316\& L<name/section> => L<"section" in name|name/section>
1317.Ve
1318.IP "\(bu" 4
1319Note that section names might contain markup. I.e., if a section
1320starts with:
1321.Sp
1322.Vb 1
1323\& =head2 About the C<-M> Operator
1324.Ve
1325.Sp
1326or with:
1327.Sp
1328.Vb 1
1329\& =item About the C<-M> Operator
1330.Ve
1331.Sp
1332then a link to it would look like this:
1333.Sp
1334.Vb 1
1335\& L<somedoc/About the C<-M> Operator>
1336.Ve
1337.Sp
1338Formatters may choose to ignore the markup for purposes of resolving
1339the link and use only the renderable characters in the section name,
1340as in:
1341.Sp
1342.Vb 2
1343\& <h1><a name="About_the_-M_Operator">About the <code>-M</code>
1344\& Operator</h1>
1345.Ve
1346.Sp
1347.Vb 1
1348\& ...
1349.Ve
1350.Sp
1351.Vb 2
1352\& <a href="somedoc#About_the_-M_Operator">About the <code>-M</code>
1353\& Operator" in somedoc</a>
1354.Ve
1355.IP "\(bu" 4
1356Previous versions of perlpod distinguished \f(CW\*(C`L<name/"section">\*(C'\fR
1357links from \f(CW\*(C`L<name/item>\*(C'\fR links (and their targets). These
1358have been merged syntactically and semantically in the current
1359specification, and \fIsection\fR can refer either to a "=head\fIn\fR Heading
1360Content\*(L" command or to a \*(R"=item Item Content" command. This
1361specification does not specify what behavior should be in the case
1362of a given document having several things all seeming to produce the
1363same \fIsection\fR identifier (e.g., in \s-1HTML\s0, several things all producing
1364the same \fIanchorname\fR in <a name="\fIanchorname\fR">...</a>
1365elements). Where Pod processors can control this behavior, they should
1366use the first such anchor. That is, \f(CW\*(C`L<Foo/Bar>\*(C'\fR refers to the
1367\&\fIfirst\fR \*(L"Bar\*(R" section in Foo.
1368.Sp
1369But for some processors/formats this cannot be easily controlled; as
1370with the \s-1HTML\s0 example, the behavior of multiple ambiguous
1371<a name="\fIanchorname\fR">...</a> is most easily just left up to
1372browsers to decide.
1373.IP "\(bu" 4
1374Authors wanting to link to a particular (absolute) \s-1URL\s0, must do so
1375only with "L<scheme:...>" codes (like
1376L<http://www.perl.org>), and must not attempt "L<Some Site
1377Name|scheme:...>" codes. This restriction avoids many problems
1378in parsing and rendering L<...> codes.
1379.IP "\(bu" 4
1380In a \f(CW\*(C`L<text|...>\*(C'\fR code, text may contain formatting codes
1381for formatting or for E<...> escapes, as in:
1382.Sp
1383.Vb 1
1384\& L<B<ummE<234>stuff>|...>
1385.Ve
1386.Sp
1387For \f(CW\*(C`L<...>\*(C'\fR codes without a \*(L"name|\*(R" part, only
1388\&\f(CW\*(C`E<...>\*(C'\fR and \f(CW\*(C`Z<>\*(C'\fR codes may occur \*(-- no
1389other formatting codes. That is, authors should not use
1390"\f(CW\*(C`L<B<Foo::Bar>>\*(C'\fR".
1391.Sp
1392Note, however, that formatting codes and Z<>'s can occur in any
1393and all parts of an L<...> (i.e., in \fIname\fR, \fIsection\fR, \fItext\fR,
1394and \fIurl\fR).
1395.Sp
1396Authors must not nest L<...> codes. For example, "L<The
1397L<Foo::Bar> man page>" should be treated as an error.
1398.IP "\(bu" 4
1399Note that Pod authors may use formatting codes inside the \*(L"text\*(R"
1400part of "L<text|name>" (and so on for L<text|/\*(L"sec\*(R">).
1401.Sp
1402In other words, this is valid:
1403.Sp
1404.Vb 1
1405\& Go read L<the docs on C<$.>|perlvar/"$.">
1406.Ve
1407.Sp
1408Some output formats that do allow rendering "L<...>" codes as
1409hypertext, might not allow the link-text to be formatted; in
1410that case, formatters will have to just ignore that formatting.
1411.IP "\(bu" 4
1412At time of writing, \f(CW\*(C`L<name>\*(C'\fR values are of two types:
1413either the name of a Pod page like \f(CW\*(C`L<Foo::Bar>\*(C'\fR (which
1414might be a real Perl module or program in an \f(CW@INC\fR / \s-1PATH\s0
1415directory, or a .pod file in those places); or the name of a \s-1UNIX\s0
1416man page, like \f(CW\*(C`L<crontab(5)>\*(C'\fR. In theory, \f(CW\*(C`L<chmod>\*(C'\fR
1417in ambiguous between a Pod page called \*(L"chmod\*(R", or the Unix man page
1418\&\*(L"chmod\*(R" (in whatever man\-section). However, the presence of a string
1419in parens, as in \*(L"\fIcrontab\fR\|(5)\*(R", is sufficient to signal that what
1420is being discussed is not a Pod page, and so is presumably a
1421\&\s-1UNIX\s0 man page. The distinction is of no importance to many
1422Pod processors, but some processors that render to hypertext formats
1423may need to distinguish them in order to know how to render a
1424given \f(CW\*(C`L<foo>\*(C'\fR code.
1425.IP "\(bu" 4
1426Previous versions of perlpod allowed for a \f(CW\*(C`L<section>\*(C'\fR syntax
1427(as in "\f(CW\*(C`L<Object Attributes>\*(C'\fR"), which was not easily distinguishable
1428from \f(CW\*(C`L<name>\*(C'\fR syntax. This syntax is no longer in the
1429specification, and has been replaced by the \f(CW\*(C`L<"section">\*(C'\fR syntax
1430(where the quotes were formerly optional). Pod parsers should tolerate
1431the \f(CW\*(C`L<section>\*(C'\fR syntax, for a while at least. The suggested
1432heuristic for distinguishing \f(CW\*(C`L<section>\*(C'\fR from \f(CW\*(C`L<name>\*(C'\fR
1433is that if it contains any whitespace, it's a \fIsection\fR. Pod processors
1434may warn about this being deprecated syntax.
1435.SH "About =over...=back Regions"
1436.IX Header "About =over...=back Regions"
1437\&\*(L"=over\*(R"...\*(L"=back\*(R" regions are used for various kinds of list-like
1438structures. (I use the term \*(L"region\*(R" here simply as a collective
1439term for everything from the \*(L"=over\*(R" to the matching \*(L"=back\*(R".)
1440.IP "\(bu" 4
1441The non-zero numeric \fIindentlevel\fR in "=over \fIindentlevel\fR\*(L" ...
1442\&\*(R"=back\*(L" is used for giving the formatter a clue as to how many
1443\&\*(R"spaces" (ems, or roughly equivalent units) it should tab over,
1444although many formatters will have to convert this to an absolute
1445measurement that may not exactly match with the size of spaces (or M's)
1446in the document's base font. Other formatters may have to completely
1447ignore the number. The lack of any explicit \fIindentlevel\fR parameter is
1448equivalent to an \fIindentlevel\fR value of 4. Pod processors may
1449complain if \fIindentlevel\fR is present but is not a positive number
1450matching \f(CW\*(C`m/\eA(\ed*\e.)?\ed+\ez/\*(C'\fR.
1451.IP "\(bu" 4
1452Authors of Pod formatters are reminded that \*(L"=over\*(R" ... \*(L"=back\*(R" may
1453map to several different constructs in your output format. For
1454example, in converting Pod to (X)HTML, it can map to any of
1455<ul>...</ul>, <ol>...</ol>, <dl>...</dl>, or
1456<blockquote>...</blockquote>. Similarly, \*(L"=item\*(R" can map to <li> or
1457<dt>.
1458.IP "\(bu" 4
1459Each \*(L"=over\*(R" ... \*(L"=back\*(R" region should be one of the following:
1460.RS 4
1461.IP "\(bu" 4
1462An \*(L"=over\*(R" ... \*(L"=back\*(R" region containing only \*(L"=item *\*(R" commands,
1463each followed by some number of ordinary/verbatim paragraphs, other
1464nested \*(L"=over\*(R" ... \*(L"=back\*(R" regions, \*(L"=for...\*(R" paragraphs, and
1465\&\*(L"=begin\*(R"...\*(L"=end\*(R" regions.
1466.Sp
1467(Pod processors must tolerate a bare \*(L"=item\*(R" as if it were \*(L"=item
1468*\*(R".) Whether \*(L"*\*(R" is rendered as a literal asterisk, an \*(L"o\*(R", or as
1469some kind of real bullet character, is left up to the Pod formatter,
1470and may depend on the level of nesting.
1471.IP "\(bu" 4
1472An \*(L"=over\*(R" ... \*(L"=back\*(R" region containing only
1473\&\f(CW\*(C`m/\eA=item\es+\ed+\e.?\es*\ez/\*(C'\fR paragraphs, each one (or each group of them)
1474followed by some number of ordinary/verbatim paragraphs, other nested
1475\&\*(L"=over\*(R" ... \*(L"=back\*(R" regions, \*(L"=for...\*(R" paragraphs, and/or
1476\&\*(L"=begin\*(R"...\*(L"=end\*(R" codes. Note that the numbers must start at 1
1477in each section, and must proceed in order and without skipping
1478numbers.
1479.Sp
1480(Pod processors must tolerate lines like \*(L"=item 1\*(R" as if they were
1481\&\*(L"=item 1.\*(R", with the period.)
1482.IP "\(bu" 4
1483An \*(L"=over\*(R" ... \*(L"=back\*(R" region containing only \*(L"=item [text]\*(R"
1484commands, each one (or each group of them) followed by some number of
1485ordinary/verbatim paragraphs, other nested \*(L"=over\*(R" ... \*(L"=back\*(R"
1486regions, or \*(L"=for...\*(R" paragraphs, and \*(L"=begin\*(R"...\*(L"=end\*(R" regions.
1487.Sp
1488The \*(L"=item [text]\*(R" paragraph should not match
1489\&\f(CW\*(C`m/\eA=item\es+\ed+\e.?\es*\ez/\*(C'\fR or \f(CW\*(C`m/\eA=item\es+\e*\es*\ez/\*(C'\fR, nor should it
1490match just \f(CW\*(C`m/\eA=item\es*\ez/\*(C'\fR.
1491.IP "\(bu" 4
1492An \*(L"=over\*(R" ... \*(L"=back\*(R" region containing no \*(L"=item\*(R" paragraphs at
1493all, and containing only some number of
1494ordinary/verbatim paragraphs, and possibly also some nested \*(L"=over\*(R"
1495\&... \*(L"=back\*(R" regions, \*(L"=for...\*(R" paragraphs, and \*(L"=begin\*(R"...\*(L"=end\*(R"
1496regions. Such an itemless \*(L"=over\*(R" ... \*(L"=back\*(R" region in Pod is
1497equivalent in meaning to a \*(L"<blockquote>...</blockquote>\*(R" element in
1498\&\s-1HTML\s0.
1499.RE
1500.RS 4
1501.Sp
1502Note that with all the above cases, you can determine which type of
1503\&\*(L"=over\*(R" ... \*(L"=back\*(R" you have, by examining the first (non\-\*(L"=cut\*(R",
1504non\-\*(L"=pod\*(R") Pod paragraph after the \*(L"=over\*(R" command.
1505.RE
1506.IP "\(bu" 4
1507Pod formatters \fImust\fR tolerate arbitrarily large amounts of text
1508in the "=item \fItext...\fR" paragraph. In practice, most such
1509paragraphs are short, as in:
1510.Sp
1511.Vb 1
1512\& =item For cutting off our trade with all parts of the world
1513.Ve
1514.Sp
1515But they may be arbitrarily long:
1516.Sp
1517.Vb 2
1518\& =item For transporting us beyond seas to be tried for pretended
1519\& offenses
1520.Ve
1521.Sp
1522.Vb 5
1523\& =item He is at this time transporting large armies of foreign
1524\& mercenaries to complete the works of death, desolation and
1525\& tyranny, already begun with circumstances of cruelty and perfidy
1526\& scarcely paralleled in the most barbarous ages, and totally
1527\& unworthy the head of a civilized nation.
1528.Ve
1529.IP "\(bu" 4
1530Pod processors should tolerate \*(L"=item *\*(R" / "=item \fInumber\fR" commands
1531with no accompanying paragraph. The middle item is an example:
1532.Sp
1533.Vb 1
1534\& =over
1535.Ve
1536.Sp
1537.Vb 1
1538\& =item 1
1539.Ve
1540.Sp
1541.Vb 1
1542\& Pick up dry cleaning.
1543.Ve
1544.Sp
1545.Vb 1
1546\& =item 2
1547.Ve
1548.Sp
1549.Vb 1
1550\& =item 3
1551.Ve
1552.Sp
1553.Vb 1
1554\& Stop by the store. Get Abba Zabas, Stoli, and cheap lawn chairs.
1555.Ve
1556.Sp
1557.Vb 1
1558\& =back
1559.Ve
1560.IP "\(bu" 4
1561No \*(L"=over\*(R" ... \*(L"=back\*(R" region can contain headings. Processors may
1562treat such a heading as an error.
1563.IP "\(bu" 4
1564Note that an \*(L"=over\*(R" ... \*(L"=back\*(R" region should have some
1565content. That is, authors should not have an empty region like this:
1566.Sp
1567.Vb 1
1568\& =over
1569.Ve
1570.Sp
1571.Vb 1
1572\& =back
1573.Ve
1574.Sp
1575Pod processors seeing such a contentless \*(L"=over\*(R" ... \*(L"=back\*(R" region,
1576may ignore it, or may report it as an error.
1577.IP "\(bu" 4
1578Processors must tolerate an \*(L"=over\*(R" list that goes off the end of the
1579document (i.e., which has no matching \*(L"=back\*(R"), but they may warn
1580about such a list.
1581.IP "\(bu" 4
1582Authors of Pod formatters should note that this construct:
1583.Sp
1584.Vb 1
1585\& =item Neque
1586.Ve
1587.Sp
1588.Vb 1
1589\& =item Porro
1590.Ve
1591.Sp
1592.Vb 1
1593\& =item Quisquam Est
1594.Ve
1595.Sp
1596.Vb 3
1597\& Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci
1598\& velit, sed quia non numquam eius modi tempora incidunt ut
1599\& labore et dolore magnam aliquam quaerat voluptatem.
1600.Ve
1601.Sp
1602.Vb 1
1603\& =item Ut Enim
1604.Ve
1605.Sp
1606is semantically ambiguous, in a way that makes formatting decisions
1607a bit difficult. On the one hand, it could be mention of an item
1608\&\*(L"Neque\*(R", mention of another item \*(L"Porro\*(R", and mention of another
1609item \*(L"Quisquam Est\*(R", with just the last one requiring the explanatory
1610paragraph \*(L"Qui dolorem ipsum quia dolor...\*(R"; and then an item
1611\&\*(L"Ut Enim\*(R". In that case, you'd want to format it like so:
1612.Sp
1613.Vb 1
1614\& Neque
1615.Ve
1616.Sp
1617.Vb 1
1618\& Porro
1619.Ve
1620.Sp
1621.Vb 4
1622\& Quisquam Est
1623\& Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci
1624\& velit, sed quia non numquam eius modi tempora incidunt ut
1625\& labore et dolore magnam aliquam quaerat voluptatem.
1626.Ve
1627.Sp
1628.Vb 1
1629\& Ut Enim
1630.Ve
1631.Sp
1632But it could equally well be a discussion of three (related or equivalent)
1633items, \*(L"Neque\*(R", \*(L"Porro\*(R", and \*(L"Quisquam Est\*(R", followed by a paragraph
1634explaining them all, and then a new item \*(L"Ut Enim\*(R". In that case, you'd
1635probably want to format it like so:
1636.Sp
1637.Vb 6
1638\& Neque
1639\& Porro
1640\& Quisquam Est
1641\& Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci
1642\& velit, sed quia non numquam eius modi tempora incidunt ut
1643\& labore et dolore magnam aliquam quaerat voluptatem.
1644.Ve
1645.Sp
1646.Vb 1
1647\& Ut Enim
1648.Ve
1649.Sp
1650But (for the forseeable future), Pod does not provide any way for Pod
1651authors to distinguish which grouping is meant by the above
1652\&\*(L"=item\*(R"\-cluster structure. So formatters should format it like so:
1653.Sp
1654.Vb 1
1655\& Neque
1656.Ve
1657.Sp
1658.Vb 1
1659\& Porro
1660.Ve
1661.Sp
1662.Vb 1
1663\& Quisquam Est
1664.Ve
1665.Sp
1666.Vb 3
1667\& Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci
1668\& velit, sed quia non numquam eius modi tempora incidunt ut
1669\& labore et dolore magnam aliquam quaerat voluptatem.
1670.Ve
1671.Sp
1672.Vb 1
1673\& Ut Enim
1674.Ve
1675.Sp
1676That is, there should be (at least roughly) equal spacing between
1677items as between paragraphs (although that spacing may well be less
1678than the full height of a line of text). This leaves it to the reader
1679to use (con)textual cues to figure out whether the \*(L"Qui dolorem
1680ipsum...\*(R" paragraph applies to the \*(L"Quisquam Est\*(R" item or to all three
1681items \*(L"Neque\*(R", \*(L"Porro\*(R", and \*(L"Quisquam Est\*(R". While not an ideal
1682situation, this is preferable to providing formatting cues that may
1683be actually contrary to the author's intent.
1684.ie n .SH "About Data Paragraphs and ""=begin/=end"" Regions"
1685.el .SH "About Data Paragraphs and ``=begin/=end'' Regions"
1686.IX Header "About Data Paragraphs and =begin/=end Regions"
1687Data paragraphs are typically used for inlining non-Pod data that is
1688to be used (typically passed through) when rendering the document to
1689a specific format:
1690.PP
1691.Vb 1
1692\& =begin rtf
1693.Ve
1694.PP
1695.Vb 1
1696\& \epar{\epard\eqr\esa4500{\ei Printed\e~\echdate\e~\echtime}\epar}
1697.Ve
1698.PP
1699.Vb 1
1700\& =end rtf
1701.Ve
1702.PP
1703The exact same effect could, incidentally, be achieved with a single
1704\&\*(L"=for\*(R" paragraph:
1705.PP
1706.Vb 1
1707\& =for rtf \epar{\epard\eqr\esa4500{\ei Printed\e~\echdate\e~\echtime}\epar}
1708.Ve
1709.PP
1710(Although that is not formally a data paragraph, it has the same
1711meaning as one, and Pod parsers may parse it as one.)
1712.PP
1713Another example of a data paragraph:
1714.PP
1715.Vb 1
1716\& =begin html
1717.Ve
1718.PP
1719.Vb 1
1720\& I like <em>PIE</em>!
1721.Ve
1722.PP
1723.Vb 1
1724\& <hr>Especially pecan pie!
1725.Ve
1726.PP
1727.Vb 1
1728\& =end html
1729.Ve
1730.PP
1731If these were ordinary paragraphs, the Pod parser would try to
1732expand the "E</em>\*(L" (in the first paragraph) as a formatting
1733code, just like \*(R"E<lt>\*(L" or \*(R"E<eacute>\*(L". But since this
1734is in a \*(R"=begin \fIidentifier\fR\*(L"...\*(R"=end \fIidentifier\fR" region \fIand\fR
1735the identifier \*(L"html\*(R" doesn't begin have a \*(L":\*(R" prefix, the contents
1736of this region are stored as data paragraphs, instead of being
1737processed as ordinary paragraphs (or if they began with a spaces
1738and/or tabs, as verbatim paragraphs).
1739.PP
1740As a further example: At time of writing, no \*(L"biblio\*(R" identifier is
1741supported, but suppose some processor were written to recognize it as
1742a way of (say) denoting a bibliographic reference (necessarily
1743containing formatting codes in ordinary paragraphs). The fact that
1744\&\*(L"biblio\*(R" paragraphs were meant for ordinary processing would be
1745indicated by prefacing each \*(L"biblio\*(R" identifier with a colon:
1746.PP
1747.Vb 1
1748\& =begin :biblio
1749.Ve
1750.PP
1751.Vb 2
1752\& Wirth, Niklaus. 1976. I<Algorithms + Data Structures =
1753\& Programs.> Prentice-Hall, Englewood Cliffs, NJ.
1754.Ve
1755.PP
1756.Vb 1
1757\& =end :biblio
1758.Ve
1759.PP
1760This would signal to the parser that paragraphs in this begin...end
1761region are subject to normal handling as ordinary/verbatim paragraphs
1762(while still tagged as meant only for processors that understand the
1763\&\*(L"biblio\*(R" identifier). The same effect could be had with:
1764.PP
1765.Vb 3
1766\& =for :biblio
1767\& Wirth, Niklaus. 1976. I<Algorithms + Data Structures =
1768\& Programs.> Prentice-Hall, Englewood Cliffs, NJ.
1769.Ve
1770.PP
1771The \*(L":\*(R" on these identifiers means simply \*(L"process this stuff
1772normally, even though the result will be for some special target\*(R".
1773I suggest that parser APIs report \*(L"biblio\*(R" as the target identifier,
1774but also report that it had a \*(L":\*(R" prefix. (And similarly, with the
1775above \*(L"html\*(R", report \*(L"html\*(R" as the target identifier, and note the
1776\&\fIlack\fR of a \*(L":\*(R" prefix.)
1777.PP
1778Note that a "=begin \fIidentifier\fR\*(L"...\*(R"=end \fIidentifier\fR" region where
1779\&\fIidentifier\fR begins with a colon, \fIcan\fR contain commands. For example:
1780.PP
1781.Vb 1
1782\& =begin :biblio
1783.Ve
1784.PP
1785.Vb 1
1786\& Wirth's classic is available in several editions, including:
1787.Ve
1788.PP
1789.Vb 2
1790\& =for comment
1791\& hm, check abebooks.com for how much used copies cost.
1792.Ve
1793.PP
1794.Vb 1
1795\& =over
1796.Ve
1797.PP
1798.Vb 1
1799\& =item
1800.Ve
1801.PP
1802.Vb 2
1803\& Wirth, Niklaus. 1975. I<Algorithmen und Datenstrukturen.>
1804\& Teubner, Stuttgart. [Yes, it's in German.]
1805.Ve
1806.PP
1807.Vb 1
1808\& =item
1809.Ve
1810.PP
1811.Vb 2
1812\& Wirth, Niklaus. 1976. I<Algorithms + Data Structures =
1813\& Programs.> Prentice-Hall, Englewood Cliffs, NJ.
1814.Ve
1815.PP
1816.Vb 1
1817\& =back
1818.Ve
1819.PP
1820.Vb 1
1821\& =end :biblio
1822.Ve
1823.PP
1824Note, however, a "=begin \fIidentifier\fR\*(L"...\*(R"=end \fIidentifier\fR"
1825region where \fIidentifier\fR does \fInot\fR begin with a colon, should not
1826directly contain \*(L"=head1\*(R" ... \*(L"=head4\*(R" commands, nor \*(L"=over\*(R", nor \*(L"=back\*(R",
1827nor \*(L"=item\*(R". For example, this may be considered invalid:
1828.PP
1829.Vb 1
1830\& =begin somedata
1831.Ve
1832.PP
1833.Vb 1
1834\& This is a data paragraph.
1835.Ve
1836.PP
1837.Vb 1
1838\& =head1 Don't do this!
1839.Ve
1840.PP
1841.Vb 1
1842\& This is a data paragraph too.
1843.Ve
1844.PP
1845.Vb 1
1846\& =end somedata
1847.Ve
1848.PP
1849A Pod processor may signal that the above (specifically the \*(L"=head1\*(R"
1850paragraph) is an error. Note, however, that the following should
1851\&\fInot\fR be treated as an error:
1852.PP
1853.Vb 1
1854\& =begin somedata
1855.Ve
1856.PP
1857.Vb 1
1858\& This is a data paragraph.
1859.Ve
1860.PP
1861.Vb 1
1862\& =cut
1863.Ve
1864.PP
1865.Vb 2
1866\& # Yup, this isn't Pod anymore.
1867\& sub excl { (rand() > .5) ? "hoo!" : "hah!" }
1868.Ve
1869.PP
1870.Vb 1
1871\& =pod
1872.Ve
1873.PP
1874.Vb 1
1875\& This is a data paragraph too.
1876.Ve
1877.PP
1878.Vb 1
1879\& =end somedata
1880.Ve
1881.PP
1882And this too is valid:
1883.PP
1884.Vb 1
1885\& =begin someformat
1886.Ve
1887.PP
1888.Vb 1
1889\& This is a data paragraph.
1890.Ve
1891.PP
1892.Vb 1
1893\& And this is a data paragraph.
1894.Ve
1895.PP
1896.Vb 1
1897\& =begin someotherformat
1898.Ve
1899.PP
1900.Vb 1
1901\& This is a data paragraph too.
1902.Ve
1903.PP
1904.Vb 1
1905\& And this is a data paragraph too.
1906.Ve
1907.PP
1908.Vb 1
1909\& =begin :yetanotherformat
1910.Ve
1911.PP
1912.Vb 1
1913\& =head2 This is a command paragraph!
1914.Ve
1915.PP
1916.Vb 1
1917\& This is an ordinary paragraph!
1918.Ve
1919.PP
1920.Vb 1
1921\& And this is a verbatim paragraph!
1922.Ve
1923.PP
1924.Vb 1
1925\& =end :yetanotherformat
1926.Ve
1927.PP
1928.Vb 1
1929\& =end someotherformat
1930.Ve
1931.PP
1932.Vb 1
1933\& Another data paragraph!
1934.Ve
1935.PP
1936.Vb 1
1937\& =end someformat
1938.Ve
1939.PP
1940The contents of the above \*(L"=begin :yetanotherformat\*(R" ...
1941\&\*(L"=end :yetanotherformat\*(R" region \fIaren't\fR data paragraphs, because
1942the immediately containing region's identifier (\*(L":yetanotherformat\*(R")
1943begins with a colon. In practice, most regions that contain
1944data paragraphs will contain \fIonly\fR data paragraphs; however,
1945the above nesting is syntactically valid as Pod, even if it is
1946rare. However, the handlers for some formats, like \*(L"html\*(R",
1947will accept only data paragraphs, not nested regions; and they may
1948complain if they see (targeted for them) nested regions, or commands,
1949other than \*(L"=end\*(R", \*(L"=pod\*(R", and \*(L"=cut\*(R".
1950.PP
1951Also consider this valid structure:
1952.PP
1953.Vb 1
1954\& =begin :biblio
1955.Ve
1956.PP
1957.Vb 1
1958\& Wirth's classic is available in several editions, including:
1959.Ve
1960.PP
1961.Vb 1
1962\& =over
1963.Ve
1964.PP
1965.Vb 1
1966\& =item
1967.Ve
1968.PP
1969.Vb 2
1970\& Wirth, Niklaus. 1975. I<Algorithmen und Datenstrukturen.>
1971\& Teubner, Stuttgart. [Yes, it's in German.]
1972.Ve
1973.PP
1974.Vb 1
1975\& =item
1976.Ve
1977.PP
1978.Vb 2
1979\& Wirth, Niklaus. 1976. I<Algorithms + Data Structures =
1980\& Programs.> Prentice-Hall, Englewood Cliffs, NJ.
1981.Ve
1982.PP
1983.Vb 1
1984\& =back
1985.Ve
1986.PP
1987.Vb 1
1988\& Buy buy buy!
1989.Ve
1990.PP
1991.Vb 1
1992\& =begin html
1993.Ve
1994.PP
1995.Vb 1
1996\& <img src='wirth_spokesmodeling_book.png'>
1997.Ve
1998.PP
1999.Vb 1
2000\& <hr>
2001.Ve
2002.PP
2003.Vb 1
2004\& =end html
2005.Ve
2006.PP
2007.Vb 1
2008\& Now now now!
2009.Ve
2010.PP
2011.Vb 1
2012\& =end :biblio
2013.Ve
2014.PP
2015There, the \*(L"=begin html\*(R"...\*(L"=end html\*(R" region is nested inside
2016the larger \*(L"=begin :biblio\*(R"...\*(L"=end :biblio\*(R" region. Note that the
2017content of the \*(L"=begin html\*(R"...\*(L"=end html\*(R" region is data
2018paragraph(s), because the immediately containing region's identifier
2019(\*(L"html\*(R") \fIdoesn't\fR begin with a colon.
2020.PP
2021Pod parsers, when processing a series of data paragraphs one
2022after another (within a single region), should consider them to
2023be one large data paragraph that happens to contain blank lines. So
2024the content of the above \*(L"=begin html\*(R"...\*(L"=end html\*(R" \fImay\fR be stored
2025as two data paragraphs (one consisting of
2026\&\*(L"<img src='wirth_spokesmodeling_book.png'>\en\*(R"
2027and another consisting of \*(L"<hr>\en\*(R"), but \fIshould\fR be stored as
2028a single data paragraph (consisting of
2029\&\*(L"<img src='wirth_spokesmodeling_book.png'>\en\en<hr>\en\*(R").
2030.PP
2031Pod processors should tolerate empty
2032"=begin \fIsomething\fR\*(L"...\*(R"=end \fIsomething\fR\*(L" regions,
2033empty \*(R"=begin :\fIsomething\fR\*(L"...\*(R"=end :\fIsomething\fR\*(L" regions, and
2034contentless \*(R"=for \fIsomething\fR\*(L" and \*(R"=for :\fIsomething\fR"
2035paragraphs. I.e., these should be tolerated:
2036.PP
2037.Vb 1
2038\& =for html
2039.Ve
2040.PP
2041.Vb 1
2042\& =begin html
2043.Ve
2044.PP
2045.Vb 1
2046\& =end html
2047.Ve
2048.PP
2049.Vb 1
2050\& =begin :biblio
2051.Ve
2052.PP
2053.Vb 1
2054\& =end :biblio
2055.Ve
2056.PP
2057Incidentally, note that there's no easy way to express a data
2058paragraph starting with something that looks like a command. Consider:
2059.PP
2060.Vb 1
2061\& =begin stuff
2062.Ve
2063.PP
2064.Vb 1
2065\& =shazbot
2066.Ve
2067.PP
2068.Vb 1
2069\& =end stuff
2070.Ve
2071.PP
2072There, \*(L"=shazbot\*(R" will be parsed as a Pod command \*(L"shazbot\*(R", not as a data
2073paragraph \*(L"=shazbot\en\*(R". However, you can express a data paragraph consisting
2074of \*(L"=shazbot\en\*(R" using this code:
2075.PP
2076.Vb 1
2077\& =for stuff =shazbot
2078.Ve
2079.PP
2080The situation where this is necessary, is presumably quite rare.
2081.PP
2082Note that =end commands must match the currently open =begin command. That
2083is, they must properly nest. For example, this is valid:
2084.PP
2085.Vb 1
2086\& =begin outer
2087.Ve
2088.PP
2089.Vb 1
2090\& X
2091.Ve
2092.PP
2093.Vb 1
2094\& =begin inner
2095.Ve
2096.PP
2097.Vb 1
2098\& Y
2099.Ve
2100.PP
2101.Vb 1
2102\& =end inner
2103.Ve
2104.PP
2105.Vb 1
2106\& Z
2107.Ve
2108.PP
2109.Vb 1
2110\& =end outer
2111.Ve
2112.PP
2113while this is invalid:
2114.PP
2115.Vb 1
2116\& =begin outer
2117.Ve
2118.PP
2119.Vb 1
2120\& X
2121.Ve
2122.PP
2123.Vb 1
2124\& =begin inner
2125.Ve
2126.PP
2127.Vb 1
2128\& Y
2129.Ve
2130.PP
2131.Vb 1
2132\& =end outer
2133.Ve
2134.PP
2135.Vb 1
2136\& Z
2137.Ve
2138.PP
2139.Vb 1
2140\& =end inner
2141.Ve
2142.PP
2143This latter is improper because when the \*(L"=end outer\*(R" command is seen, the
2144currently open region has the formatname \*(L"inner\*(R", not \*(L"outer\*(R". (It just
2145happens that \*(L"outer\*(R" is the format name of a higher-up region.) This is
2146an error. Processors must by default report this as an error, and may halt
2147processing the document containing that error. A corollary of this is that
2148regions cannot \*(L"overlap\*(R" \*(-- i.e., the latter block above does not represent
2149a region called \*(L"outer\*(R" which contains X and Y, overlapping a region called
2150\&\*(L"inner\*(R" which contains Y and Z. But because it is invalid (as all
2151apparently overlapping regions would be), it doesn't represent that, or
2152anything at all.
2153.PP
2154Similarly, this is invalid:
2155.PP
2156.Vb 1
2157\& =begin thing
2158.Ve
2159.PP
2160.Vb 1
2161\& =end hting
2162.Ve
2163.PP
2164This is an error because the region is opened by \*(L"thing\*(R", and the \*(L"=end\*(R"
2165tries to close \*(L"hting\*(R" [sic].
2166.PP
2167This is also invalid:
2168.PP
2169.Vb 1
2170\& =begin thing
2171.Ve
2172.PP
2173.Vb 1
2174\& =end
2175.Ve
2176.PP
2177This is invalid because every \*(L"=end\*(R" command must have a formatname
2178parameter.
2179.SH "SEE ALSO"
2180.IX Header "SEE ALSO"
2181perlpod, \*(L"PODs: Embedded Documentation\*(R" in perlsyn,
2182podchecker
2183.SH "AUTHOR"
2184.IX Header "AUTHOR"
2185Sean M. Burke