git.subgeniuskitty.com - OpenSPARC-T2-DV/.git/blame_incremental - tools/perl-5.8.0/man/man3/Parse::RecDescent.3

... / ...

Commit	Line	Data
	1	.\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13
	2	.\"
	3	.\" Standard preamble:
	4	.\" ========================================================================
	5	.de Sh \" Subsection heading
	6	.br
	7	.if t .Sp
	8	.ne 5
	9	.PP
	10	\fB\\$1\fR
	11	.PP
	12	..
	13	.de Sp \" Vertical space (when we can't use .PP)
	14	.if t .sp .5v
	15	.if n .sp
	16	..
	17	.de Vb \" Begin verbatim text
	18	.ft CW
	19	.nf
	20	.ne \\$1
	21	..
	22	.de Ve \" End verbatim text
	23	.ft R
	24	.fi
	25	..
	26	.\" Set up some character translations and predefined strings. \*(-- will
	27	.\" give an unbreakable dash, \(PI will give pi, \(L" will give a left
	28	.\" double quote, and \*(R" will give a right double quote. \| will give a
	29	.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
	30	.\" do unbreakable dashes and therefore won't be available. \(C` and \(C'
	31	.\" expand to `' in nroff, nothing in troff, for use with C<>.
	32	.tr \(W-\|\(bv\(Tr
	33	.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
	34	.ie n \{\
	35	. ds -- \(*W-
	36	. ds PI pi
	37	. if (\n(.H=4u)&(1m=24u) .ds -- \(W\h'-12u'\(W\h'-12u'-\" diablo 10 pitch
	38	. if (\n(.H=4u)&(1m=20u) .ds -- \(W\h'-12u'\(W\h'-8u'-\" diablo 12 pitch
	39	. ds L" ""
	40	. ds R" ""
	41	. ds C` ""
	42	. ds C' ""
	43	'br\}
	44	.el\{\
	45	. ds -- \\|\(em\\|
	46	. ds PI \(*p
	47	. ds L" ``
	48	. ds R" ''
	49	'br\}
	50	.\"
	51	.\" If the F register is turned on, we'll generate index entries on stderr for
	52	.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
	53	.\" entries marked with X<> in POD. Of course, you'll have to process the
	54	.\" output yourself in some meaningful fashion.
	55	.if \nF \{\
	56	. de IX
	57	. tm Index:\\$1\t\\n%\t"\\$2"
	58	..
	59	. nr % 0
	60	. rr F
	61	.\}
	62	.\"
	63	.\" For nroff, turn off justification. Always turn off hyphenation; it makes
	64	.\" way too many mistakes in technical documents.
	65	.hy 0
	66	.if n .na
	67	.\"
	68	.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
	69	.\" Fear. Run. Save yourself. No user-serviceable parts.
	70	. \" fudge factors for nroff and troff
	71	.if n \{\
	72	. ds #H 0
	73	. ds #V .8m
	74	. ds #F .3m
	75	. ds #[ \f1
	76	. ds #] \fP
	77	.\}
	78	.if t \{\
	79	. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
	80	. ds #V .6m
	81	. ds #F 0
	82	. ds #[ \&
	83	. ds #] \&
	84	.\}
	85	. \" simple accents for nroff and troff
	86	.if n \{\
	87	. ds ' \&
	88	. ds ` \&
	89	. ds ^ \&
	90	. ds , \&
	91	. ds ~ ~
	92	. ds /
	93	.\}
	94	.if t \{\
	95	. ds ' \\k:\h'-(\\n(.wu8/10-\(#H)'\'\h"\|\\n:u"
	96	. ds ` \\k:\h'-(\\n(.wu8/10-\(#H)'\`\h'\|\\n:u'
	97	. ds ^ \\k:\h'-(\\n(.wu10/11-\(#H)'^\h'\|\\n:u'
	98	. ds , \\k:\h'-(\\n(.wu*8/10)',\h'\|\\n:u'
	99	. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'\|\\n:u'
	100	. ds / \\k:\h'-(\\n(.wu8/10-\(#H)'\z\(sl\h'\|\\n:u'
	101	.\}
	102	. \" troff and (daisy-wheel) nroff accents
	103	.ds : \\k:\h'-(\\n(.wu8/10-\(#H+.1m+\(#F)'\v'-\(#V'\z.\h'.2m+\(#F'.\h'\|\\n:u'\v'\(#V'
	104	.ds 8 \h'\(#H'\(b\h'-\*(#H'
	105	.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\(#H)/2u'\v'-.3n'\(#[\z\(de\v'.3n'\h'\|\\n:u'\*(#]
	106	.ds d- \h'\(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\(#H'
	107	.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'\|\\n:u'
	108	.ds th \(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u2/3)'\s-1o\s+1\*(#]
	109	.ds Th \(#[\s+2I\s-2\h'-\w'I'u3/5'\v'-.3m'o\v'.3m'\*(#]
	110	.ds ae a\h'-(\w'a'u*4/10)'e
	111	.ds Ae A\h'-(\w'A'u*4/10)'E
	112	. \" corrections for vroff
	113	.if v .ds ~ \\k:\h'-(\\n(.wu9/10-\(#H)'\s-2\u~\d\s+2\h'\|\\n:u'
	114	.if v .ds ^ \\k:\h'-(\\n(.wu10/11-\(#H)'\v'-.4m'^\v'.4m'\h'\|\\n:u'
	115	. \" for low resolution devices (crt and lpr)
	116	.if \n(.H>23 .if \n(.V>19 \
	117	\{\
	118	. ds : e
	119	. ds 8 ss
	120	. ds o a
	121	. ds d- d\h'-1'\(ga
	122	. ds D- D\h'-1'\(hy
	123	. ds th \o'bp'
	124	. ds Th \o'LP'
	125	. ds ae ae
	126	. ds Ae AE
	127	.\}
	128	.rm #[ #] #H #V #F C
	129	.\" ========================================================================
	130	.\"
	131	.IX Title "RECDESCENT 1"
	132	.TH RECDESCENT 1 "2000-08-20" "perl v5.8.0" "User Contributed Perl Documentation"
	133	.SH "NAME"
	134	Parse::RecDescent \- Generate Recursive\-Descent Parsers
	135	.SH "VERSION"
	136	.IX Header "VERSION"
	137	This document describes version 1.79 of Parse::RecDescent,
	138	released August 21, 2000.
	139	.SH "SYNOPSIS"
	140	.IX Header "SYNOPSIS"
	141	.Vb 1
	142	\& use Parse::RecDescent;
	143	.Ve
	144	.PP
	145	.Vb 1
	146	\& # Generate a parser from the specification in $grammar:
	147	.Ve
	148	.PP
	149	.Vb 1
	150	\& $parser = new Parse::RecDescent ($grammar);
	151	.Ve
	152	.PP
	153	.Vb 1
	154	\& # Generate a parser from the specification in $othergrammar
	155	.Ve
	156	.PP
	157	.Vb 1
	158	\& $anotherparser = new Parse::RecDescent ($othergrammar);
	159	.Ve
	160	.PP
	161	.Vb 2
	162	\& # Parse $text using rule 'startrule' (which must be
	163	\& # defined in $grammar):
	164	.Ve
	165	.PP
	166	.Vb 1
	167	\& $parser->startrule($text);
	168	.Ve
	169	.PP
	170	.Vb 2
	171	\& # Parse $text using rule 'otherrule' (which must also
	172	\& # be defined in $grammar):
	173	.Ve
	174	.PP
	175	.Vb 1
	176	\& $parser->otherrule($text);
	177	.Ve
	178	.PP
	179	.Vb 2
	180	\& # Change the universal token prefix pattern
	181	\& # (the default is: '\es*'):
	182	.Ve
	183	.PP
	184	.Vb 1
	185	\& $Parse::RecDescent::skip = '[ \et]+';
	186	.Ve
	187	.PP
	188	.Vb 2
	189	\& # Replace productions of existing rules (or create new ones)
	190	\& # with the productions defined in $newgrammar:
	191	.Ve
	192	.PP
	193	.Vb 1
	194	\& $parser->Replace($newgrammar);
	195	.Ve
	196	.PP
	197	.Vb 2
	198	\& # Extend existing rules (or create new ones)
	199	\& # by adding extra productions defined in $moregrammar:
	200	.Ve
	201	.PP
	202	.Vb 1
	203	\& $parser->Extend($moregrammar);
	204	.Ve
	205	.PP
	206	.Vb 1
	207	\& # Global flags (useful as command line arguments under -s):
	208	.Ve
	209	.PP
	210	.Vb 6
	211	\& $::RD_ERRORS # unless undefined, report fatal errors
	212	\& $::RD_WARN # unless undefined, also report non-fatal problems
	213	\& $::RD_HINT # if defined, also suggestion remedies
	214	\& $::RD_TRACE # if defined, also trace parsers' behaviour
	215	\& $::RD_AUTOSTUB # if defined, generates "stubs" for undefined rules
	216	\& $::RD_AUTOACTION # if defined, appends specified action to productions
	217	.Ve
	218	.SH "DESCRIPTION"
	219	.IX Header "DESCRIPTION"
	220	.Sh "Overview"
	221	.IX Subsection "Overview"
	222	Parse::RecDescent incrementally generates top-down recursive-descent text
	223	parsers from simple \fIyacc\fR\-like grammar specifications. It provides:
	224	.IP "\(bu" 4
	225	Regular expressions or literal strings as terminals (tokens),
	226	.IP "\(bu" 4
	227	Multiple (non\-contiguous) productions for any rule,
	228	.IP "\(bu" 4
	229	Repeated and optional subrules within productions,
	230	.IP "\(bu" 4
	231	Full access to Perl within actions specified as part of the grammar,
	232	.IP "\(bu" 4
	233	Simple automated error reporting during parser generation and parsing,
	234	.IP "\(bu" 4
	235	The ability to commit to, uncommit to, or reject particular
	236	productions during a parse,
	237	.IP "\(bu" 4
	238	The ability to pass data up and down the parse tree (\(L"down\(R" via subrule
	239	argument lists, \(L"up\(R" via subrule return values)
	240	.IP "\(bu" 4
	241	Incremental extension of the parsing grammar (even during a parse),
	242	.IP "\(bu" 4
	243	Precompilation of parser objects,
	244	.IP "\(bu" 4
	245	User-definable reduce-reduce conflict resolution via
	246	\&\(L"scoring\(R" of matching productions.
	247	.ie n .Sh "Using ""Parse::RecDescent"""
	248	.el .Sh "Using \f(CWParse::RecDescent\fP"
	249	.IX Subsection "Using Parse::RecDescent"
	250	Parser objects are created by calling \f(CW\(C`Parse::RecDescent::new\(C'\fR, passing in a
	251	grammar specification (see the following subsections). If the grammar is
	252	correct, \f(CW\(C`new\(C'\fR returns a blessed reference which can then be used to initiate
	253	parsing through any rule specified in the original grammar. A typical sequence
	254	looks like this:
	255	.PP
	256	.Vb 3
	257	\& $grammar = q {
	258	\& # GRAMMAR SPECIFICATION HERE
	259	\& };
	260	.Ve
	261	.PP
	262	.Vb 1
	263	\& $parser = new Parse::RecDescent ($grammar) or die "Bad grammar!\en";
	264	.Ve
	265	.PP
	266	.Vb 1
	267	\& # acquire $text
	268	.Ve
	269	.PP
	270	.Vb 1
	271	\& defined $parser->startrule($text) or print "Bad text!\en";
	272	.Ve
	273	.PP
	274	The rule through which parsing is initiated must be explicitly defined
	275	in the grammar (i.e. for the above example, the grammar must include a
	276	rule of the form: \(L"startrule: <subrules>\(R".
	277	.PP
	278	If the starting rule succeeds, its value (see below)
	279	is returned. Failure to generate the original parser or failure to match a text
	280	is indicated by returning \f(CW\(C`undef\(C'\fR. Note that it's easy to set up grammars
	281	that can succeed, but which return a value of 0, \(L"0\(R", or "". So don't be
	282	tempted to write:
	283	.PP
	284	.Vb 1
	285	\& $parser->startrule($text) or print "Bad text!\en";
	286	.Ve
	287	.PP
	288	Normally, the parser has no effect on the original text. So in the
	289	previous example the value of \f(CW$text\fR would be unchanged after having
	290	been parsed.
	291	.PP
	292	If, however, the text to be matched is passed by reference:
	293	.PP
	294	.Vb 1
	295	\& $parser->startrule(\e$text)
	296	.Ve
	297	.PP
	298	then any text which was consumed during the match will be removed from the
	299	start of \f(CW$text\fR.
	300	.Sh "Rules"
	301	.IX Subsection "Rules"
	302	In the grammar from which the parser is built, rules are specified by
	303	giving an identifier (which must satisfy /[A\-Za\-z]\ew*/), followed by a
	304	colon \fIon the same line\fR, followed by one or more productions,
	305	separated by single vertical bars. The layout of the productions
	306	is entirely free\-format:
	307	.PP
	308	.Vb 3
	309	\& rule1: production1
	310	\& \| production2 \|
	311	\& production3 \| production4
	312	.Ve
	313	.PP
	314	At any point in the grammar previously defined rules may be extended with
	315	additional productions. This is achieved by redeclaring the rule with the new
	316	productions. Thus:
	317	.PP
	318	.Vb 3
	319	\& rule1: a \| b \| c
	320	\& rule2: d \| e \| f
	321	\& rule1: g \| h
	322	.Ve
	323	.PP
	324	is exactly equivalent to:
	325	.PP
	326	.Vb 2
	327	\& rule1: a \| b \| c \| g \| h
	328	\& rule2: d \| e \| f
	329	.Ve
	330	.PP
	331	Each production in a rule consists of zero or more items, each of which
	332	may be either: the name of another rule to be matched (a \(L"subrule\(R"),
	333	a pattern or string literal to be matched directly (a \(L"token\(R"), a
	334	block of Perl code to be executed (an \(L"action\(R"), a special instruction
	335	to the parser (a \(L"directive\(R"), or a standard Perl comment (which is
	336	ignored).
	337	.PP
	338	A rule matches a text if one of its productions matches. A production
	339	matches if each of its items match consecutive substrings of the
	340	text. The productions of a rule being matched are tried in the same
	341	order that they appear in the original grammar, and the first matching
	342	production terminates the match attempt (successfully). If all
	343	productions are tried and none matches, the match attempt fails.
	344	.PP
	345	Note that this behaviour is quite different from the \(L"prefer the longer match\(R"
	346	behaviour of \fIyacc\fR. For example, if \fIyacc\fR were parsing the rule:
	347	.PP
	348	.Vb 2
	349	\& seq : 'A' 'B'
	350	\& \| 'A' 'B' 'C'
	351	.Ve
	352	.PP
	353	upon matching \(L"\s-1AB\s0\(R" it would look ahead to see if a 'C' is next and, if
	354	so, will match the second production in preference to the first. In
	355	other words, \fIyacc\fR effectively tries all the productions of a rule
	356	breadth-first in parallel, and selects the \(L"best\(R" match, where \(L"best\(R"
	357	means longest (note that this is a gross simplification of the true
	358	behaviour of \fIyacc\fR but it will do for our purposes).
	359	.PP
	360	In contrast, \f(CW\(C`Parse::RecDescent\(C'\fR tries each production depth-first in
	361	sequence, and selects the \(L"best\(R" match, where \(L"best\(R" means first. This is
	362	the fundamental difference between \(L"bottom\-up\(R" and \(L"recursive descent\(R"
	363	parsing.
	364	.PP
	365	Each successfully matched item in a production is assigned a value,
	366	which can be accessed in subsequent actions within the same
	367	production (or, in some cases, as the return value of a successful
	368	subrule call). Unsuccessful items don't have an associated value,
	369	since the failure of an item causes the entire surrounding production
	370	to immediately fail. The following sections describe the various types
	371	of items and their success values.
	372	.Sh "Subrules"
	373	.IX Subsection "Subrules"
	374	A subrule which appears in a production is an instruction to the parser to
	375	attempt to match the named rule at that point in the text being
	376	parsed. If the named subrule is not defined when requested the
	377	production containing it immediately fails (unless it was \(L"autostubbed\(R" \- see
	378	Autostubbing).
	379	.PP
	380	A rule may (recursively) call itself as a subrule, but \fInot\fR as the
	381	left-most item in any of its productions (since such recursions are usually
	382	non\-terminating).
	383	.PP
	384	The value associated with a subrule is the value associated with its
	385	\&\f(CW$return\fR variable (see \(L"Actions\(R" below), or with the last successfully
	386	matched item in the subrule match.
	387	.PP
	388	Subrules may also be specified with a trailing repetition specifier,
	389	indicating that they are to be (greedily) matched the specified number
	390	of times. The available specifiers are:
	391	.PP
	392	.Vb 7
	393	\& subrule(?) # Match one-or-zero times
	394	\& subrule(s) # Match one-or-more times
	395	\& subrule(s?) # Match zero-or-more times
	396	\& subrule(N) # Match exactly N times for integer N > 0
	397	\& subrule(N..M) # Match between N and M times
	398	\& subrule(..M) # Match between 1 and M times
	399	\& subrule(N..) # Match at least N times
	400	.Ve
	401	.PP
	402	Repeated subrules keep matching until either the subrule fails to
	403	match, or it has matched the minimal number of times but fails to
	404	consume any of the parsed text (this second condition prevents the
	405	subrule matching forever in some cases).
	406	.PP
	407	Since a repeated subrule may match many instances of the subrule itself, the
	408	value associated with it is not a simple scalar, but rather a reference to a
	409	list of scalars, each of which is the value associated with one of the
	410	individual subrule matches. In other words in the rule:
	411	.PP
	412	.Vb 1
	413	\& program: statement(s)
	414	.Ve
	415	.PP
	416	the value associated with the repeated subrule \(L"statement(s)\(R" is a reference
	417	to an array containing the values matched by each call to the individual
	418	subrule \(L"statement\(R".
	419	.PP
	420	Repetition modifieres may include a separator pattern:
	421	.PP
	422	.Vb 1
	423	\& program: statement(s /;/)
	424	.Ve
	425	.PP
	426	specifying some sequence of characters to be skipped between each repetition.
	427	This is really just a shorthand for the <leftop:...> directive
	428	(see below).
	429	.Sh "Tokens"
	430	.IX Subsection "Tokens"
	431	If a quote-delimited string or a Perl regex appears in a production,
	432	the parser attempts to match that string or pattern at that point in
	433	the text. For example:
	434	.PP
	435	.Vb 1
	436	\& typedef: "typedef" typename identifier ';'
	437	.Ve
	438	.PP
	439	.Vb 1
	440	\& identifier: /[A-Za-z_][A-Za-z0-9_]*/
	441	.Ve
	442	.PP
	443	As in regular Perl, a single quoted string is uninterpolated, whilst
	444	a double-quoted string or a pattern is interpolated (at the time
	445	of matching, \fInot\fR when the parser is constructed). Hence, it is
	446	possible to define rules in which tokens can be set at run\-time:
	447	.PP
	448	.Vb 1
	449	\& typedef: "$::typedefkeyword" typename identifier ';'
	450	.Ve
	451	.PP
	452	.Vb 1
	453	\& identifier: /$::identpat/
	454	.Ve
	455	.PP
	456	Note that, since each rule is implemented inside a special namespace
	457	belonging to its parser, it is necessary to explicitly quantify
	458	variables from the main package.
	459	.PP
	460	Regex tokens can be specified using just slashes as delimiters
	461	or with the explicit \f(CW\(C`m<delimiter>......<delimiter>\(C'\fR syntax:
	462	.PP
	463	.Vb 1
	464	\& typedef: "typedef" typename identifier ';'
	465	.Ve
	466	.PP
	467	.Vb 1
	468	\& typename: /[A-Za-z_][A-Za-z0-9_]*/
	469	.Ve
	470	.PP
	471	.Vb 1
	472	\& identifier: m{[A-Za-z_][A-Za-z0-9_]*}
	473	.Ve
	474	.PP
	475	A regex of either type can also have any valid trailing parameter(s)
	476	(that is, any of [cgimsox]):
	477	.PP
	478	.Vb 1
	479	\& typedef: "typedef" typename identifier ';'
	480	.Ve
	481	.PP
	482	.Vb 3
	483	\& identifier: / [a-z_] # LEADING ALPHA OR UNDERSCORE
	484	\& [a-z0-9_]* # THEN DIGITS ALSO ALLOWED
	485	\& /ix # CASE/SPACE/COMMENT INSENSITIVE
	486	.Ve
	487	.PP
	488	The value associated with any successfully matched token is a string
	489	containing the actual text which was matched by the token.
	490	.PP
	491	It is important to remember that, since each grammar is specified in a
	492	Perl string, all instances of the universal escape character '\e' within
	493	a grammar must be \(L"doubled\(R", so that they interpolate to single '\e's when
	494	the string is compiled. For example, to use the grammar:
	495	.PP
	496	.Vb 3
	497	\& word: /\eS+/ \| backslash
	498	\& line: prefix word(s) "\en"
	499	\& backslash: '\e\e'
	500	.Ve
	501	.PP
	502	the following code is required:
	503	.PP
	504	.Vb 1
	505	\& $parser = new Parse::RecDescent (q{
	506	.Ve
	507	.PP
	508	.Vb 3
	509	\& word: /\e\eS+/ \| backslash
	510	\& line: prefix word(s) "\e\en"
	511	\& backslash: '\e\e\e\e'
	512	.Ve
	513	.PP
	514	.Vb 1
	515	\& });
	516	.Ve
	517	.Sh "Terminal Separators"
	518	.IX Subsection "Terminal Separators"
	519	For the purpose of matching, each terminal in a production is considered
	520	to be preceded by a \(L"prefix\(R" \- a pattern which must be
	521	matched before a token match is attempted. By default, the
	522	prefix is optional whitespace (which always matches, at
	523	least trivially), but this default may be reset in any production.
	524	.PP
	525	The variable \f(CW$Parse::RecDescent::skip\fR stores the universal
	526	prefix, which is the default for all terminal matches in all parsers
	527	built with \f(CW\(C`Parse::RecDescent\(C'\fR.
	528	.PP
	529	The prefix for an individual production can be altered
	530	by using the \f(CW\(C`<skip:...>\(C'\fR directive (see below).
	531	.Sh "Actions"
	532	.IX Subsection "Actions"
	533	An action is a block of Perl code which is to be executed (as the
	534	block of a \f(CW\(C`do\(C'\fR statement) when the parser reaches that point in a
	535	production. The action executes within a special namespace belonging to
	536	the active parser, so care must be taken in correctly qualifying variable
	537	names (see also \(L"Start\-up Actions\(R" below).
	538	.PP
	539	The action is considered to succeed if the final value of the block
	540	is defined (that is, if the implied \f(CW\(C`do\(C'\fR statement evaluates to a
	541	defined value \- \fIeven one which would be treated as \(L"false\(R"\fR). Note
	542	that the value associated with a successful action is also the final
	543	value in the block.
	544	.PP
	545	An action will \fIfail\fR if its last evaluated value is \f(CW\(C`undef\(C'\fR. This is
	546	surprisingly easy to accomplish by accident. For instance, here's an
	547	infuriating case of an action that makes its production fail, but only
	548	when debugging \fIisn't\fR activated:
	549	.PP
	550	.Vb 4
	551	\& description: name rank serial_number
	552	\& { print "Got $item[2] $item[1] ($item[3])\en"
	553	\& if $::debugging
	554	\& }
	555	.Ve
	556	.PP
	557	If \f(CW$debugging\fR is false, no statement in the block is executed, so
	558	the final value is \f(CW\(C`undef\(C'\fR, and the entire production fails. The solution is:
	559	.PP
	560	.Vb 5
	561	\& description: name rank serial_number
	562	\& { print "Got $item[2] $item[1] ($item[3])\en"
	563	\& if $::debugging;
	564	\& 1;
	565	\& }
	566	.Ve
	567	.PP
	568	Within an action, a number of useful parse-time variables are
	569	available in the special parser namespace (there are other variables
	570	also accessible, but meddling with them will probably just break your
	571	parser. As a general rule, if you avoid referring to unqualified
	572	variables \- especially those starting with an underscore \- inside an action,
	573	things should be okay):
	574	.ie n .IP "@item\fR and \f(CW%item" 4
	575	.el .IP "\f(CW@item\fR and \f(CW%item\fR" 4
	576	.IX Item "@item and %item"
	577	The array slice \f(CW@item[1..$#item]\fR stores the value associated with each item
	578	(that is, each subrule, token, or action) in the current production. The
	579	analogy is to \f(CW$1\fR, \f(CW$2\fR, etc. in a \fIyacc\fR grammar.
	580	Note that, for obvious reasons, \f(CW@item\fR only contains the
	581	values of items \fIbefore\fR the current point in the production.
	582	.Sp
	583	The first element (\f(CW$item[0]\fR) stores the name of the current rule
	584	being matched.
	585	.Sp
	586	\&\f(CW@item\fR is a standard Perl array, so it can also be indexed with negative
	587	numbers, representing the number of items \fIback\fR from the current position in
	588	the parse:
	589	.Sp
	590	.Vb 3
	591	\& stuff: /various/ bits 'and' pieces "then" data 'end'
	592	\& { print $item[-2] } # PRINTS data
	593	\& # (EASIER THAN: $item[6])
	594	.Ve
	595	.Sp
	596	The \f(CW%item\fR hash complements the <@item> array, providing named
	597	access to the same item values:
	598	.Sp
	599	.Vb 3
	600	\& stuff: /various/ bits 'and' pieces "then" data 'end'
	601	\& { print $item{data} # PRINTS data
	602	\& # (EVEN EASIER THAN USING @item)
	603	.Ve
	604	.Sp
	605	The results of named subrules are stored in the hash under each
	606	subrule's name, whilst all other items are stored under a \*(L"named
	607	positional\*(R" key that indictates their ordinal position within their item
	608	type: _\\|_STRING\fIn\fR_\\|_, _\\|_PATTERN\fIn\fR_\\|_, _\\|_DIRECTIVE\fIn\fR_\\|_, _\\|_ACTION\fIn\fR_\\|_:
	609	.Sp
	610	.Vb 6
	611	\& stuff: /various/ bits 'and' pieces "then" data 'end' { save }
	612	\& { print $item{__PATTERN1__}, # PRINTS 'various'
	613	\& $item{__STRING2__}, # PRINTS 'then'
	614	\& $item{__ACTION1__}, # PRINTS RETURN
	615	\& # VALUE OF save
	616	\& }
	617	.Ve
	618	.Sp
	619	If you want proper \fInamed\fR access to patterns or literals, you need to turn
	620	them into separate rules:
	621	.Sp
	622	.Vb 3
	623	\& stuff: various bits 'and' pieces "then" data 'end'
	624	\& { print $item{various} # PRINTS various
	625	\& }
	626	.Ve
	627	.Sp
	628	.Vb 1
	629	\& various: /various/
	630	.Ve
	631	.Sp
	632	The special entry \f(CW$item{_\\|_RULE_\\|_}\fR stores the name of the current
	633	rule (i.e. the same value as \f(CW$item[0]\fR.
	634	.Sp
	635	The advantage of using \f(CW%item\fR, instead of \f(CW@items\fR is that it
	636	removes the need to track items positions that may change as a grammar
	637	evolves. For example, adding an interim \f(CW\(C`<skip>\(C'\fR directive
	638	of action can silently ruin a trailing action, by moving an \f(CW@item\fR
	639	element \(L"down\(R" the array one place. In contrast, the named entry
	640	of \f(CW%item\fR is unaffected by such an insertion.
	641	.Sp
	642	A limitation of the \f(CW%item\fR hash is that it only records the \fIlast\fR
	643	value of a particular subrule. For example:
	644	.Sp
	645	.Vb 2
	646	\& range: '(' number '..' number )'
	647	\& { $return = $item{number} }
	648	.Ve
	649	.Sp
	650	will return only the value corresponding to the \fIsecond\fR match of the
	651	\&\f(CW\(C`number\(C'\fR subrule. In other words, successive calls to a subrule
	652	overwrite the corresponding entry in \f(CW%item\fR. Once again, the
	653	solution is to rename each subrule in its own rule:
	654	.Sp
	655	.Vb 2
	656	\& range: '(' from_num '..' to_num )'
	657	\& { $return = $item{from_num} }
	658	.Ve
	659	.Sp
	660	.Vb 2
	661	\& from_num: number
	662	\& to_num: number
	663	.Ve
	664	.ie n .IP "@arg\fR and \f(CW%arg" 4
	665	.el .IP "\f(CW@arg\fR and \f(CW%arg\fR" 4
	666	.IX Item "@arg and %arg"
	667	The array \f(CW@arg\fR and the hash \f(CW%arg\fR store any arguments passed to
	668	the rule from some other rule (see "\(L"Subrule argument lists\(R"). Changes
	669	to the elements of either variable do not propagate back to the calling
	670	rule (data can be passed back from a subrule via the \f(CW$return\fR
	671	variable \- see next item).
	672	.ie n .IP "$return" 4
	673	.el .IP "\f(CW$return\fR" 4
	674	.IX Item "$return"
	675	If a value is assigned to \f(CW$return\fR within an action, that value is
	676	returned if the production containing the action eventually matches
	677	successfully. Note that setting \f(CW$return\fR \fIdoesn't\fR cause the current
	678	production to succeed. It merely tells it what to return if it \fIdoes\fR succeed.
	679	Hence \f(CW$return\fR is analogous to \f(CW$$\fR in a \fIyacc\fR grammar.
	680	.Sp
	681	If \f(CW$return\fR is not assigned within a production, the value of the
	682	last component of the production (namely: \f(CW$item[$#item]\fR) is
	683	returned if the production succeeds.
	684	.ie n .IP "$commit" 4
	685	.el .IP "\f(CW$commit\fR" 4
	686	.IX Item "$commit"
	687	The current state of commitment to the current production (see \(L"Directives\(R"
	688	below).
	689	.ie n .IP "$skip" 4
	690	.el .IP "\f(CW$skip\fR" 4
	691	.IX Item "$skip"
	692	The current terminal prefix (see \(L"Directives\(R" below).
	693	.ie n .IP "$text" 4
	694	.el .IP "\f(CW$text\fR" 4
	695	.IX Item "$text"
	696	The remaining (unparsed) text. Changes to \f(CW$text\fR \fIdo not
	697	propagate\fR out of unsuccessful productions, but \fIdo\fR survive
	698	successful productions. Hence it is possible to dynamically alter the
	699	text being parsed \- for example, to provide a \f(CW\(C`#include\(C'\fR\-like facility:
	700	.Sp
	701	.Vb 2
	702	\& hash_include: '#include' filename
	703	\& { $text = ::loadfile($item[2]) . $text }
	704	.Ve
	705	.Sp
	706	.Vb 2
	707	\& filename: '<' /[a-z0-9._-]+/i '>' { $return = $item[2] }
	708	\& \| '"' /[a-z0-9._-]+/i '"' { $return = $item[2] }
	709	.Ve
	710	.ie n .IP "$thisline\fR and \f(CW$prevline" 4
	711	.el .IP "\f(CW$thisline\fR and \f(CW$prevline\fR" 4
	712	.IX Item "$thisline and $prevline"
	713	\&\f(CW$thisline\fR stores the current line number within the current parse
	714	(starting from 1). \f(CW$prevline\fR stores the line number for the last
	715	character which was already successfully parsed (this will be different from
	716	\&\f(CW$thisline\fR at the end of each line).
	717	.Sp
	718	For efficiency, \f(CW$thisline\fR and \f(CW$prevline\fR are actually tied
	719	hashes, and only recompute the required line number when the variable's
	720	value is used.
	721	.Sp
	722	Assignment to \f(CW$thisline\fR adjusts the line number calculator, so that
	723	it believes that the current line number is the value being assigned. Note
	724	that this adjustment will be reflected in all subsequent line numbers
	725	calculations.
	726	.Sp
	727	Modifying the value of the variable \f(CW$text\fR (as in the previous
	728	\&\f(CW\(C`hash_include\(C'\fR example, for instance) will confuse the line
	729	counting mechanism. To prevent this, you should call
	730	\&\f(CW\(C`Parse::RecDescent::LineCounter::resync($thisline)\(C'\fR \fIimmediately\fR
	731	after any assignment to the variable \f(CW$text\fR (or, at least, before the
	732	next attempt to use \f(CW$thisline\fR).
	733	.Sp
	734	Note that if a production fails after assigning to or
	735	resync'ing \f(CW$thisline\fR, the parser's line counter mechanism will
	736	usually be corrupted.
	737	.Sp
	738	Also see the entry for \f(CW@itempos\fR.
	739	.Sp
	740	The line number can be set to values other than 1, by calling the start
	741	rule with a second argument. For example:
	742	.Sp
	743	.Vb 1
	744	\& $parser = new Parse::RecDescent ($grammar);
	745	.Ve
	746	.Sp
	747	.Vb 1
	748	\& $parser->input($text, 10); # START LINE NUMBERS AT 10
	749	.Ve
	750	.ie n .IP "$thiscolumn\fR and \f(CW$prevcolumn" 4
	751	.el .IP "\f(CW$thiscolumn\fR and \f(CW$prevcolumn\fR" 4
	752	.IX Item "$thiscolumn and $prevcolumn"
	753	\&\f(CW$thiscolumn\fR stores the current column number within the current line
	754	being parsed (starting from 1). \f(CW$prevcolumn\fR stores the column number
	755	of the last character which was actually successfully parsed. Usually
	756	\&\f(CW\(C`$prevcolumn == $thiscolumn\-1\(C'\fR, but not at the end of lines.
	757	.Sp
	758	For efficiency, \f(CW$thiscolumn\fR and \f(CW$prevcolumn\fR are
	759	actually tied hashes, and only recompute the required column number
	760	when the variable's value is used.
	761	.Sp
	762	Assignment to \f(CW$thiscolumn\fR or \f(CW$prevcolumn\fR is a fatal error.
	763	.Sp
	764	Modifying the value of the variable \f(CW$text\fR (as in the previous
	765	\&\f(CW\(C`hash_include\(C'\fR example, for instance) may confuse the column
	766	counting mechanism.
	767	.Sp
	768	Note that \f(CW$thiscolumn\fR reports the column number \fIbefore\fR any
	769	whitespace that might be skipped before reading a token. Hence
	770	if you wish to know where a token started (and ended) use something like this:
	771	.Sp
	772	.Vb 2
	773	\& rule: token1 token2 startcol token3 endcol token4
	774	\& { print "token3: columns $item[3] to $item[5]"; }
	775	.Ve
	776	.Sp
	777	.Vb 2
	778	\& startcol: // { $thiscolumn } # NEED THE // TO STEP PAST TOKEN SEP
	779	\& endcol: { $prevcolumn }
	780	.Ve
	781	.Sp
	782	Also see the entry for \f(CW@itempos\fR.
	783	.ie n .IP "$thisoffset\fR and \f(CW$prevoffset" 4
	784	.el .IP "\f(CW$thisoffset\fR and \f(CW$prevoffset\fR" 4
	785	.IX Item "$thisoffset and $prevoffset"
	786	\&\f(CW$thisoffset\fR stores the offset of the current parsing position
	787	within the complete text
	788	being parsed (starting from 0). \f(CW$prevoffset\fR stores the offset
	789	of the last character which was actually successfully parsed. In all
	790	cases \f(CW\(C`$prevoffset == $thisoffset\-1\(C'\fR.
	791	.Sp
	792	For efficiency, \f(CW$thisoffset\fR and \f(CW$prevoffset\fR are
	793	actually tied hashes, and only recompute the required offset
	794	when the variable's value is used.
	795	.Sp
	796	Assignment to \f(CW$thisoffset\fR or <$prevoffset> is a fatal error.
	797	.Sp
	798	Modifying the value of the variable \f(CW$text\fR will \fInot\fR affect the
	799	offset counting mechanism.
	800	.Sp
	801	Also see the entry for \f(CW@itempos\fR.
	802	.ie n .IP "@itempos" 4
	803	.el .IP "\f(CW@itempos\fR" 4
	804	.IX Item "@itempos"
	805	The array \f(CW@itempos\fR stores a hash reference corresponding to
	806	each element of \f(CW@item\fR. The elements of the hash provide the
	807	following:
	808	.Sp
	809	.Vb 6
	810	\& $itempos[$n]{offset}{from} # VALUE OF $thisoffset BEFORE $item[$n]
	811	\& $itempos[$n]{offset}{to} # VALUE OF $prevoffset AFTER $item[$n]
	812	\& $itempos[$n]{line}{from} # VALUE OF $thisline BEFORE $item[$n]
	813	\& $itempos[$n]{line}{to} # VALUE OF $prevline AFTER $item[$n]
	814	\& $itempos[$n]{column}{from} # VALUE OF $thiscolumn BEFORE $item[$n]
	815	\& $itempos[$n]{column}{to} # VALUE OF $prevcolumn AFTER $item[$n]
	816	.Ve
	817	.Sp
	818	Note that the various \f(CW\(C`$itempos[$n]...{from}\(C'\fR values record the
	819	appropriate value \fIafter\fR any token prefix has been skipped.
	820	.Sp
	821	Hence, instead of the somewhat tedious and error\-prone:
	822	.Sp
	823	.Vb 9
	824	\& rule: startcol token1 endcol
	825	\& startcol token2 endcol
	826	\& startcol token3 endcol
	827	\& { print "token1: columns $item[1]
	828	\& to $item[3]
	829	\& token2: columns $item[4]
	830	\& to $item[6]
	831	\& token3: columns $item[7]
	832	\& to $item[9]" }
	833	.Ve
	834	.Sp
	835	.Vb 2
	836	\& startcol: // { $thiscolumn } # NEED THE // TO STEP PAST TOKEN SEP
	837	\& endcol: { $prevcolumn }
	838	.Ve
	839	.Sp
	840	it is possible to write:
	841	.Sp
	842	.Vb 7
	843	\& rule: token1 token2 token3
	844	\& { print "token1: columns $itempos[1]{column}{from}
	845	\& to $itempos[1]{column}{to}
	846	\& token2: columns $itempos[2]{column}{from}
	847	\& to $itempos[2]{column}{to}
	848	\& token3: columns $itempos[3]{column}{from}
	849	\& to $itempos[3]{column}{to}" }
	850	.Ve
	851	.Sp
	852	Note however that (in the current implementation) the use of \f(CW@itempos\fR
	853	anywhere in a grammar implies that item positioning information is
	854	collected \fIeverywhere\fR during the parse. Depending on the grammar
	855	and the size of the text to be parsed, this may be prohibitively
	856	expensive and the explicit use of \f(CW$thisline\fR, \f(CW$thiscolumn\fR, etc. may
	857	be a better choice.
	858	.ie n .IP "$thisparser" 4
	859	.el .IP "\f(CW$thisparser\fR" 4
	860	.IX Item "$thisparser"
	861	A reference to the \f(CW\(C`Parse::RecDescent\(C'\fR object through which
	862	parsing was initiated.
	863	.Sp
	864	The value of \f(CW$thisparser\fR propagates down the subrules of a parse
	865	but not back up. Hence, you can invoke subrules from another parser
	866	for the scope of the current rule as follows:
	867	.Sp
	868	.Vb 4
	869	\& rule: subrule1 subrule2
	870	\& \| { $thisparser = $::otherparser } <reject>
	871	\& \| subrule3 subrule4
	872	\& \| subrule5
	873	.Ve
	874	.Sp
	875	The result is that the production calls \(L"subrule1\(R" and \(L"subrule2\(R" of
	876	the current parser, and the remaining productions call the named subrules
	877	from \f(CW$::otherparser\fR. Note, however that \(L"Bad Things\(R" will happen if
	878	\&\f(CW\(C`::otherparser\(C'\fR isn't a blessed reference and/or doesn't have methods
	879	with the same names as the required subrules!
	880	.ie n .IP "$thisrule" 4
	881	.el .IP "\f(CW$thisrule\fR" 4
	882	.IX Item "$thisrule"
	883	A reference to the \f(CW\(C`Parse::RecDescent::Rule\(C'\fR object corresponding to the
	884	rule currently being matched.
	885	.ie n .IP "$thisprod" 4
	886	.el .IP "\f(CW$thisprod\fR" 4
	887	.IX Item "$thisprod"
	888	A reference to the \f(CW\(C`Parse::RecDescent::Production\(C'\fR object
	889	corresponding to the production currently being matched.
	890	.ie n .IP "$score\fR and \f(CW$score_return" 4
	891	.el .IP "\f(CW$score\fR and \f(CW$score_return\fR" 4
	892	.IX Item "$score and $score_return"
	893	$score stores the best production score to date, as specified by
	894	an earlier \f(CW\(C`<score:...>\(C'\fR directive. \f(CW$score_return\fR stores
	895	the corresponding return value for the successful production.
	896	.Sp
	897	See \(L"Scored productions\(R".
	898	.PP
	899	\&\fBWarning:\fR the parser relies on the information in the various \f(CW\(C`this...\(C'\fR
	900	objects in some non-obvious ways. Tinkering with the other members of
	901	these objects will probably cause Bad Things to happen, unless you
	902	\&\fIreally\fR know what you're doing. The only exception to this advice is
	903	that the use of \f(CW\(C`$this...\->{local}\(C'\fR is always safe.
	904	.Sh "Start-up Actions"
	905	.IX Subsection "Start-up Actions"
	906	Any actions which appear \fIbefore\fR the first rule definition in a
	907	grammar are treated as \(L"start\-up\(R" actions. Each such action is
	908	stripped of its outermost brackets and then evaluated (in the parser's
	909	special namespace) just before the rules of the grammar are first
	910	compiled.
	911	.PP
	912	The main use of start-up actions is to declare local variables within the
	913	parser's special namespace:
	914	.PP
	915	.Vb 1
	916	\& { my $lastitem = '???'; }
	917	.Ve
	918	.PP
	919	.Vb 1
	920	\& list: item(s) { $return = $lastitem }
	921	.Ve
	922	.PP
	923	.Vb 3
	924	\& item: book { $lastitem = 'book'; }
	925	\& bell { $lastitem = 'bell'; }
	926	\& candle { $lastitem = 'candle'; }
	927	.Ve
	928	.PP
	929	but start-up actions can be used to execute \fIany\fR valid Perl code
	930	within a parser's special namespace.
	931	.PP
	932	Start-up actions can appear within a grammar extension or replacement
	933	(that is, a partial grammar installed via \f(CW\(C`Parse::RecDescent::Extend()\(C'\fR or
	934	\&\f(CW\(C`Parse::RecDescent::Replace()\(C'\fR \- see \(L"Incremental Parsing\(R"), and will be
	935	executed before the new grammar is installed. Note, however, that a
	936	particular start-up action is only ever executed once.
	937	.Sh "Autoactions"
	938	.IX Subsection "Autoactions"
	939	It is sometimes desirable to be able to specify a default action to be
	940	taken at the end of every production (for example, in order to easily
	941	build a parse tree). If the variable \f(CW$::RD_AUTOACTION\fR is defined
	942	when \f(CW\(C`Parse::RecDescent::new()\(C'\fR is called, the contents of that
	943	variable are treated as a specification of an action which is to appended
	944	to each production in the corresponding grammar. So, for example, to construct
	945	a simple parse tree:
	946	.PP
	947	.Vb 1
	948	\& $::RD_AUTOACTION = q { [@item] };
	949	.Ve
	950	.PP
	951	.Vb 7
	952	\& parser = new Parse::RecDescent (q{
	953	\& expression: and_expr '\|\|' expression \| and_expr
	954	\& and_expr: not_expr '&&' and_expr \| not_expr
	955	\& not_expr: '!' brack_expr \| brack_expr
	956	\& brack_expr: '(' expression ')' \| identifier
	957	\& identifier: /[a-z]+/i
	958	\& });
	959	.Ve
	960	.PP
	961	which is equivalent to:
	962	.PP
	963	.Vb 5
	964	\& parser = new Parse::RecDescent (q{
	965	\& expression: and_expr '&&' expression
	966	\& { [@item] }
	967	\& \| and_expr
	968	\& { [@item] }
	969	.Ve
	970	.PP
	971	.Vb 4
	972	\& and_expr: not_expr '&&' and_expr
	973	\& { [@item] }
	974	\& \| not_expr
	975	\& { [@item] }
	976	.Ve
	977	.PP
	978	.Vb 4
	979	\& not_expr: '!' brack_expr
	980	\& { [@item] }
	981	\& \| brack_expr
	982	\& { [@item] }
	983	.Ve
	984	.PP
	985	.Vb 4
	986	\& brack_expr: '(' expression ')'
	987	\& { [@item] }
	988	\& \| identifier
	989	\& { [@item] }
	990	.Ve
	991	.PP
	992	.Vb 3
	993	\& identifier: /[a-z]+/i
	994	\& { [@item] }
	995	\& });
	996	.Ve
	997	.PP
	998	Alternatively, we could take an object-oriented approach, use different
	999	classes for each node (and also eliminating redundant intermediate nodes):
	1000	.PP
	1001	.Vb 2
	1002	\& $::RD_AUTOACTION = q
	1003	\& { $#item==1 ? $item[1] : new ${"$item[0]_node"} (@item[1..$#item]) };
	1004	.Ve
	1005	.PP
	1006	.Vb 7
	1007	\& parser = new Parse::RecDescent (q{
	1008	\& expression: and_expr '\|\|' expression \| and_expr
	1009	\& and_expr: not_expr '&&' and_expr \| not_expr
	1010	\& not_expr: '!' brack_expr \| brack_expr
	1011	\& brack_expr: '(' expression ')' \| identifier
	1012	\& identifier: /[a-z]+/i
	1013	\& });
	1014	.Ve
	1015	.PP
	1016	which is equivalent to:
	1017	.PP
	1018	.Vb 4
	1019	\& parser = new Parse::RecDescent (q{
	1020	\& expression: and_expr '&&' expression
	1021	\& { new expression_node (@item[1..3]) }
	1022	\& \| and_expr
	1023	.Ve
	1024	.PP
	1025	.Vb 3
	1026	\& and_expr: not_expr '&&' and_expr
	1027	\& { new and_expr_node (@item[1..3]) }
	1028	\& \| not_expr
	1029	.Ve
	1030	.PP
	1031	.Vb 3
	1032	\& not_expr: '!' brack_expr
	1033	\& { new not_expr_node (@item[1..2]) }
	1034	\& \| brack_expr
	1035	.Ve
	1036	.PP
	1037	.Vb 3
	1038	\& brack_expr: '(' expression ')'
	1039	\& { new brack_expr_node (@item[1..3]) }
	1040	\& \| identifier
	1041	.Ve
	1042	.PP
	1043	.Vb 3
	1044	\& identifier: /[a-z]+/i
	1045	\& { new identifer_node (@item[1]) }
	1046	\& });
	1047	.Ve
	1048	.PP
	1049	Note that, if a production already ends in an action, no autoaction is appended
	1050	to it. For example, in this version:
	1051	.PP
	1052	.Vb 2
	1053	\& $::RD_AUTOACTION = q
	1054	\& { $#item==1 ? $item[1] : new ${"$item[0]_node"} (@item[1..$#item]) };
	1055	.Ve
	1056	.PP
	1057	.Vb 8
	1058	\& parser = new Parse::RecDescent (q{
	1059	\& expression: and_expr '&&' expression \| and_expr
	1060	\& and_expr: not_expr '&&' and_expr \| not_expr
	1061	\& not_expr: '!' brack_expr \| brack_expr
	1062	\& brack_expr: '(' expression ')' \| identifier
	1063	\& identifier: /[a-z]+/i
	1064	\& { new terminal_node($item[1]) }
	1065	\& });
	1066	.Ve
	1067	.PP
	1068	each \f(CW\(C`identifier\(C'\fR match produces a \f(CW\(C`terminal_node\(C'\fR object, \fInot\fR an
	1069	\&\f(CW\(C`identifier_node\(C'\fR object.
	1070	.PP
	1071	A level 1 warning is issued each time an \(L"autoaction\(R" is added to
	1072	some production.
	1073	.Sh "Autotrees"
	1074	.IX Subsection "Autotrees"
	1075	A commonly needed autoaction is one that builds a parse\-tree. It is moderately
	1076	tricky to set up such an action (which must treat terminals differently from
	1077	non\-terminals), so Parse::RecDescent simplifies the process by providing the
	1078	\&\f(CW\(C`<autotree>\(C'\fR directive.
	1079	.PP
	1080	If this directive appears at the start of grammar, it causes
	1081	Parse::RecDescent to insert autoactions at the end of any rule except
	1082	those which already end in an action. The action inserted depends on whether
	1083	the production is an intermediate rule (two or more items), or a terminal
	1084	of the grammar (i.e. a single pattern or string item).
	1085	.PP
	1086	So, for example, the following grammar:
	1087	.PP
	1088	.Vb 1
	1089	\& <autotree>
	1090	.Ve
	1091	.PP
	1092	.Vb 7
	1093	\& file : command(s)
	1094	\& command : get \| set \| vet
	1095	\& get : 'get' ident ';'
	1096	\& set : 'set' ident 'to' value ';'
	1097	\& vet : 'check' ident 'is' value ';'
	1098	\& ident : /\ew+/
	1099	\& value : /\ed+/
	1100	.Ve
	1101	.PP
	1102	is equivalent to:
	1103	.PP
	1104	.Vb 7
	1105	\& file : command(s) { bless \e%item, $item[0] }
	1106	\& command : get { bless \e%item, $item[0] }
	1107	\& \| set { bless \e%item, $item[0] }
	1108	\& \| vet { bless \e%item, $item[0] }
	1109	\& get : 'get' ident ';' { bless \e%item, $item[0] }
	1110	\& set : 'set' ident 'to' value ';' { bless \e%item, $item[0] }
	1111	\& vet : 'check' ident 'is' value ';' { bless \e%item, $item[0] }
	1112	.Ve
	1113	.PP
	1114	.Vb 2
	1115	\& ident : /\ew+/ { bless {__VALUE__=>$item[1]}, $item[0] }
	1116	\& value : /\ed+/ { bless {__VALUE__=>$item[1]}, $item[0] }
	1117	.Ve
	1118	.PP
	1119	Note that each node in the tree is blessed into a class of the same name
	1120	as the rule itself. This makes it easy to build object-oriented
	1121	processors for the parse-trees that the grammar produces. Note too that
	1122	the last two rules produce special objects with the single attribute
	1123	\&'_\\|_VALUE_\\|_'. This is because they consist solely of a single terminal.
	1124	.PP
	1125	This autoaction-ed grammar would then produce a parse tree in a data
	1126	structure like this:
	1127	.PP
	1128	.Vb 18
	1129	\& {
	1130	\& file => {
	1131	\& command => {
	1132	\& [ get => {
	1133	\& identifier => { __VALUE__ => 'a' },
	1134	\& },
	1135	\& set => {
	1136	\& identifier => { __VALUE__ => 'b' },
	1137	\& value => { __VALUE__ => '7' },
	1138	\& },
	1139	\& vet => {
	1140	\& identifier => { __VALUE__ => 'b' },
	1141	\& value => { __VALUE__ => '7' },
	1142	\& },
	1143	\& ],
	1144	\& },
	1145	\& }
	1146	\& }
	1147	.Ve
	1148	.PP
	1149	(except, of course, that each nested hash would also be blessed into
	1150	the appropriate class).
	1151	.Sh "Autostubbing"
	1152	.IX Subsection "Autostubbing"
	1153	Normally, if a subrule appears in some production, but no rule of that
	1154	name is ever defined in the grammar, the production which refers to the
	1155	non-existent subrule fails immediately. This typically occurs as a
	1156	result of misspellings, and is a sufficiently common occurance that a
	1157	warning is generated for such situations.
	1158	.PP
	1159	However, when prototyping a grammar it is sometimes useful to be
	1160	able to use subrules before a proper specification of them is
	1161	really possible. For example, a grammar might include a section like:
	1162	.PP
	1163	.Vb 1
	1164	\& function_call: identifier '(' arg(s?) ')'
	1165	.Ve
	1166	.PP
	1167	.Vb 1
	1168	\& identifier: /[a-z]\ew*/i
	1169	.Ve
	1170	.PP
	1171	where the possible format of an argument is sufficiently complex that
	1172	it is not worth specifying in full until the general function call
	1173	syntax has been debugged. In this situation it is convenient to leave
	1174	the real rule \f(CW\(C`arg\(C'\fR undefined and just slip in a placeholder (or
	1175	\&\(L"stub\(R"):
	1176	.PP
	1177	.Vb 1
	1178	\& arg: 'arg'
	1179	.Ve
	1180	.PP
	1181	so that the function call syntax can be tested with dummy input such as:
	1182	.PP
	1183	.Vb 4
	1184	\& f0()
	1185	\& f1(arg)
	1186	\& f2(arg arg)
	1187	\& f3(arg arg arg)
	1188	.Ve
	1189	.PP
	1190	et cetera.
	1191	.PP
	1192	Early in prototyping, many such \(L"stubs\(R" may be required, so
	1193	\&\f(CW\(C`Parse::RecDescent\(C'\fR provides a means of automating their definition.
	1194	If the variable \f(CW$::RD_AUTOSTUB\fR is defined when a parser is built,
	1195	a subrule reference to any non-existent rule (say, \f(CW\(C`sr\(C'\fR),
	1196	causes a \(L"stub\(R" rule of the form:
	1197	.PP
	1198	.Vb 1
	1199	\& sr: 'sr'
	1200	.Ve
	1201	.PP
	1202	to be automatically defined in the generated parser.
	1203	A level 1 warning is issued for each such \(L"autostubbed\(R" rule.
	1204	.PP
	1205	Hence, with \f(CW$::AUTOSTUB\fR defined, it is possible to only partially
	1206	specify a grammar, and then \(L"fake\(R" matches of the unspecified
	1207	(sub)rules by just typing in their name.
	1208	.Sh "Look-ahead"
	1209	.IX Subsection "Look-ahead"
	1210	If a subrule, token, or action is prefixed by \(L"...\(R", then it is
	1211	treated as a \(L"look\-ahead\(R" request. That means that the current production can
	1212	(as usual) only succeed if the specified item is matched, but that the matching
	1213	\&\fIdoes not consume any of the text being parsed\fR. This is very similar to the
	1214	\&\f(CW\(C`/(?=...)/\(C'\fR look-ahead construct in Perl patterns. Thus, the rule:
	1215	.PP
	1216	.Vb 1
	1217	\& inner_word: word ...word
	1218	.Ve
	1219	.PP
	1220	will match whatever the subrule \(L"word\(R" matches, provided that match is followed
	1221	by some more text which subrule \(L"word\(R" would also match (although this
	1222	second substring is not actually consumed by \(L"inner_word\(R")
	1223	.PP
	1224	Likewise, a \(L"...!\(R" prefix, causes the following item to succeed (without
	1225	consuming any text) if and only if it would normally fail. Hence, a
	1226	rule such as:
	1227	.PP
	1228	.Vb 1
	1229	\& identifier: ...!keyword ...!'_' /[A-Za-z_]\ew*/
	1230	.Ve
	1231	.PP
	1232	matches a string of characters which satisfies the pattern
	1233	\&\f(CW\(C`/[A\-Za\-z_]\ew/\*(C'\fR, but only if the same sequence of characters would
	1234	not match either subrule \(L"keyword\(R" or the literal token '_'.
	1235	.PP
	1236	Sequences of look-ahead prefixes accumulate, multiplying their positive and/or
	1237	negative senses. Hence:
	1238	.PP
	1239	.Vb 1
	1240	\& inner_word: word ...!......!word
	1241	.Ve
	1242	.PP
	1243	is exactly equivalent the the original example above (a warning is issued in
	1244	cases like these, since they often indicate something left out, or
	1245	misunderstood).
	1246	.PP
	1247	Note that actions can also be treated as look\-aheads. In such cases,
	1248	the state of the parser text (in the local variable \f(CW$text\fR)
	1249	\&\fIafter\fR the look-ahead action is guaranteed to be identical to its
	1250	state \fIbefore\fR the action, regardless of how it's changed \fIwithin\fR
	1251	the action (unless you actually undefine \f(CW$text\fR, in which case you
	1252	get the disaster you deserve :\-).
	1253	.Sh "Directives"
	1254	.IX Subsection "Directives"
	1255	Directives are special pre-defined actions which may be used to alter
	1256	the behaviour of the parser. There are currently eighteen directives:
	1257	\&\f(CW\(C`<commit>\(C'\fR,
	1258	\&\f(CW\(C`<uncommit>\(C'\fR,
	1259	\&\f(CW\(C`<reject>\(C'\fR,
	1260	\&\f(CW\(C`<score>\(C'\fR,
	1261	\&\f(CW\(C`<autoscore>\(C'\fR,
	1262	\&\f(CW\(C`<skip>\(C'\fR,
	1263	\&\f(CW\(C`<resync>\(C'\fR,
	1264	\&\f(CW\(C`<error>\(C'\fR,
	1265	\&\f(CW\(C`<rulevar>\(C'\fR,
	1266	\&\f(CW\(C`<matchrule>\(C'\fR,
	1267	\&\f(CW\(C`<leftop>\(C'\fR,
	1268	\&\f(CW\(C`<rightop>\(C'\fR,
	1269	\&\f(CW\(C`<defer>\(C'\fR,
	1270	\&\f(CW\(C`<nocheck>\(C'\fR,
	1271	\&\f(CW\(C`<perl_quotelike>\(C'\fR,
	1272	\&\f(CW\(C`<perl_codeblock>\(C'\fR,
	1273	\&\f(CW\(C`<perl_variable>\(C'\fR,
	1274	and \f(CW\(C`<token>\(C'\fR.
	1275	.IP "Committing and uncommitting" 4
	1276	.IX Item "Committing and uncommitting"
	1277	The \f(CW\(C`<commit>\(C'\fR and \f(CW\(C`<uncommit>\(C'\fR directives permit the recursive
	1278	descent of the parse tree to be pruned (or \(L"cut\(R") for efficiency.
	1279	Within a rule, a \f(CW\(C`<commit>\(C'\fR directive instructs the rule to ignore subsequent
	1280	productions if the current production fails. For example:
	1281	.Sp
	1282	.Vb 3
	1283	\& command: 'find' <commit> filename
	1284	\& \| 'open' <commit> filename
	1285	\& \| 'move' filename filename
	1286	.Ve
	1287	.Sp
	1288	Clearly, if the leading token 'find' is matched in the first production but that
	1289	production fails for some other reason, then the remaining
	1290	productions cannot possibly match. The presence of the
	1291	\&\f(CW\(C`<commit>\(C'\fR causes the \(L"command\(R" rule to fail immediately if
	1292	an invalid \(L"find\(R" command is found, and likewise if an invalid \(L"open\(R"
	1293	command is encountered.
	1294	.Sp
	1295	It is also possible to revoke a previous commitment. For example:
	1296	.Sp
	1297	.Vb 5
	1298	\& if_statement: 'if' <commit> condition
	1299	\& 'then' block <uncommit>
	1300	\& 'else' block
	1301	\& \| 'if' <commit> condition
	1302	\& 'then' block
	1303	.Ve
	1304	.Sp
	1305	In this case, a failure to find an \(L"else\(R" block in the first
	1306	production shouldn't preclude trying the second production, but a
	1307	failure to find a \(L"condition\(R" certainly should.
	1308	.Sp
	1309	As a special case, any production in which the \fIfirst\fR item is an
	1310	\&\f(CW\(C`<uncommit>\(C'\fR immediately revokes a preceding \f(CW\(C`<commit>\(C'\fR
	1311	(even though the production would not otherwise have been tried). For
	1312	example, in the rule:
	1313	.Sp
	1314	.Vb 5
	1315	\& request: 'explain' expression
	1316	\& \| 'explain' <commit> keyword
	1317	\& \| 'save'
	1318	\& \| 'quit'
	1319	\& \| <uncommit> term '?'
	1320	.Ve
	1321	.Sp
	1322	if the text being matched was \(L"explain?\(R", and the first two
	1323	productions failed, then the \f(CW\(C`<commit>\(C'\fR in production two would cause
	1324	productions three and four to be skipped, but the leading
	1325	\&\f(CW\(C`<uncommit>\(C'\fR in the production five would allow that production to
	1326	attempt a match.
	1327	.Sp
	1328	Note in the preceding example, that the \f(CW\(C`<commit>\(C'\fR was only placed
	1329	in production two. If production one had been:
	1330	.Sp
	1331	.Vb 1
	1332	\& request: 'explain' <commit> expression
	1333	.Ve
	1334	.Sp
	1335	then production two would be (inappropriately) skipped if a leading
	1336	\&\(L"explain...\(R" was encountered.
	1337	.Sp
	1338	Both \f(CW\(C`<commit>\(C'\fR and \f(CW\(C`<uncommit>\(C'\fR directives always succeed, and their value
	1339	is always 1.
	1340	.IP "Rejecting a production" 4
	1341	.IX Item "Rejecting a production"
	1342	The \f(CW\(C`<reject>\(C'\fR directive immediately causes the current production
	1343	to fail (it is exactly equivalent to, but more obvious than, the
	1344	action \f(CW\(C`{undef}\(C'\fR). A \f(CW\(C`<reject>\(C'\fR is useful when it is desirable to get
	1345	the side effects of the actions in one production, without prejudicing a match
	1346	by some other production later in the rule. For example, to insert
	1347	tracing code into the parse:
	1348	.Sp
	1349	.Vb 1
	1350	\& complex_rule: { print "In complex rule...\en"; } <reject>
	1351	.Ve
	1352	.Sp
	1353	.Vb 3
	1354	\& complex_rule: simple_rule '+' 'i' '*' simple_rule
	1355	\& \| 'i' '*' simple_rule
	1356	\& \| simple_rule
	1357	.Ve
	1358	.Sp
	1359	It is also possible to specify a conditional rejection, using the
	1360	form \f(CW\(C`<reject:\f(CIcondition\f(CW>\(C'\fR, which only rejects if the
	1361	specified condition is true. This form of rejection is exactly
	1362	equivalent to the action \f(CW\(C`{(\f(CIcondition\f(CW)?undef:1}>\(C'\fR.
	1363	For example:
	1364	.Sp
	1365	.Vb 4
	1366	\& command: save_command
	1367	\& \| restore_command
	1368	\& \| <reject: defined $::tolerant> { exit }
	1369	\& \| <error: Unknown command. Ignored.>
	1370	.Ve
	1371	.Sp
	1372	A \f(CW\(C`<reject>\(C'\fR directive never succeeds (and hence has no
	1373	associated value). A conditional rejection may succeed (if its
	1374	condition is not satisfied), in which case its value is 1.
	1375	.Sp
	1376	As an extra optimization, \f(CW\(C`Parse::RecDescent\(C'\fR ignores any production
	1377	which \fIbegins\fR with an unconditional \f(CW\(C`<reject>\(C'\fR directive,
	1378	since any such production can never successfully match or have any
	1379	useful side\-effects. A level 1 warning is issued in all such cases.
	1380	.Sp
	1381	Note that productions beginning with conditional
	1382	\&\f(CW\(C`<reject:...>\(C'\fR directives are \fInever\fR \(L"optimized away\(R" in
	1383	this manner, even if they are always guaranteed to fail (for example:
	1384	\&\f(CW\(C`<reject:1>\(C'\fR)
	1385	.Sp
	1386	Due to the way grammars are parsed, there is a minor restriction on the
	1387	condition of a conditional \f(CW\(C`<reject:...>\(C'\fR: it cannot
	1388	contain any raw '<' or '>' characters. For example:
	1389	.Sp
	1390	.Vb 1
	1391	\& line: cmd <reject: $thiscolumn > max> data
	1392	.Ve
	1393	.Sp
	1394	results in an error when a parser is built from this grammar (since the
	1395	grammar parser has no way of knowing whether the first > is a \(L"less than\(R"
	1396	or the end of the \f(CW\(C`<reject:...>\(C'\fR.
	1397	.Sp
	1398	To overcome this problem, put the condition inside a do{} block:
	1399	.Sp
	1400	.Vb 1
	1401	\& line: cmd <reject: do{$thiscolumn > max}> data
	1402	.Ve
	1403	.Sp
	1404	Note that the same problem may occur in other directives that take
	1405	arguments. The same solution will work in all cases.
	1406	.IP "Skipping between terminals" 4
	1407	.IX Item "Skipping between terminals"
	1408	The \f(CW\(C`<skip>\(C'\fR directive enables the terminal prefix used in
	1409	a production to be changed. For example:
	1410	.Sp
	1411	.Vb 1
	1412	\& OneLiner: Command <skip:'[ \et]*'> Arg(s) /;/
	1413	.Ve
	1414	.Sp
	1415	causes only blanks and tabs to be skipped before terminals in the \f(CW\(C`Arg\(C'\fR
	1416	subrule (and any of \fIits\fR subrules>, and also before the final \f(CW\(C`/;/\(C'\fR terminal.
	1417	Once the production is complete, the previous terminal prefix is
	1418	reinstated. Note that this implies that distinct productions of a rule
	1419	must reset their terminal prefixes individually.
	1420	.Sp
	1421	The \f(CW\(C`<skip>\(C'\fR directive evaluates to the \fIprevious\fR terminal prefix,
	1422	so it's easy to reinstate a prefix later in a production:
	1423	.Sp
	1424	.Vb 1
	1425	\& Command: <skip:","> CSV(s) <skip:$item[1]> Modifier
	1426	.Ve
	1427	.Sp
	1428	The value specified after the colon is interpolated into a pattern, so all of
	1429	the following are equivalent (though their efficiency increases down the list):
	1430	.Sp
	1431	.Vb 1
	1432	\& <skip: "$colon\|$comma"> # ASSUMING THE VARS HOLD THE OBVIOUS VALUES
	1433	.Ve
	1434	.Sp
	1435	.Vb 1
	1436	\& <skip: ':\|,'>
	1437	.Ve
	1438	.Sp
	1439	.Vb 1
	1440	\& <skip: q{[:,]}>
	1441	.Ve
	1442	.Sp
	1443	.Vb 1
	1444	\& <skip: qr/[:,]/>
	1445	.Ve
	1446	.Sp
	1447	There is no way of directly setting the prefix for
	1448	an entire rule, except as follows:
	1449	.Sp
	1450	.Vb 3
	1451	\& Rule: <skip: '[ \et]*'> Prod1
	1452	\& \| <skip: '[ \et]*'> Prod2a Prod2b
	1453	\& \| <skip: '[ \et]*'> Prod3
	1454	.Ve
	1455	.Sp
	1456	or, better:
	1457	.Sp
	1458	.Vb 6
	1459	\& Rule: <skip: '[ \et]*'>
	1460	\& (
	1461	\& Prod1
	1462	\& \| Prod2a Prod2b
	1463	\& \| Prod3
	1464	\& )
	1465	.Ve
	1466	.Sp
	1467	\&\fBNote: Up to release 1.51 of Parse::RecDescent, an entirely different
	1468	mechanism was used for specifying terminal prefixes. The current method
	1469	is not backwards-compatible with that early approach. The current approach
	1470	is stable and will not to change again.\fR
	1471	.IP "Resynchronization" 4
	1472	.IX Item "Resynchronization"
	1473	The \f(CW\(C`<resync>\(C'\fR directive provides a visually distinctive
	1474	means of consuming some of the text being parsed, usually to skip an
	1475	erroneous input. In its simplest form \f(CW\(C`<resync>\(C'\fR simply
	1476	consumes text up to and including the next newline (\f(CW"\en"\fR)
	1477	character, succeeding only if the newline is found, in which case it
	1478	causes its surrounding rule to return zero on success.
	1479	.Sp
	1480	In other words, a \f(CW\(C`<resync>\(C'\fR is exactly equivalent to the token
	1481	\&\f(CW\(C`/[^\en]\en/\(C'\fR followed by the action \f(CW\(C`{\ $return\ =\ 0\ }\*(C'\fR (except that
	1482	productions beginning with a \f(CW\(C`<resync>\(C'\fR are ignored when generating
	1483	error messages). A typical use might be:
	1484	.Sp
	1485	.Vb 1
	1486	\& script : command(s)
	1487	.Ve
	1488	.Sp
	1489	.Vb 3
	1490	\& command: save_command
	1491	\& \| restore_command
	1492	\& \| <resync> # TRY NEXT LINE, IF POSSIBLE
	1493	.Ve
	1494	.Sp
	1495	It is also possible to explicitly specify a resynchronization
	1496	pattern, using the \f(CW\(C`<resync:\f(CIpattern\f(CW>\(C'\fR variant. This version
	1497	succeeds only if the specified pattern matches (and consumes) the
	1498	parsed text. In other words, \f(CW\(C`<resync:\f(CIpattern\f(CW>\(C'\fR is exactly
	1499	equivalent to the token \f(CW\(C`/\f(CIpattern\f(CW/\(C'\fR (followed by a \f(CW\(C`{\ $return\ =\ 0\ }\(C'\fR
	1500	action). For example, if commands were terminated by newlines or semi\-colons:
	1501	.Sp
	1502	.Vb 3
	1503	\& command: save_command
	1504	\& \| restore_command
	1505	\& \| <resync:[^;\en]*[;\en]>
	1506	.Ve
	1507	.Sp
	1508	The value of a successfully matched \f(CW\(C`<resync>\(C'\fR directive (of either
	1509	type) is the text that it consumed. Note, however, that since the
	1510	directive also sets \f(CW$return\fR, a production consisting of a lone
	1511	\&\f(CW\(C`<resync>\(C'\fR succeeds but returns the value zero (which a calling rule
	1512	may find useful to distinguish between \(L"true\(R" matches and \(L"tolerant\(R" matches).
	1513	Remember that returning a zero value indicates that the rule \fIsucceeded\fR (since
	1514	only an \f(CW\(C`undef\(C'\fR denotes failure within \f(CW\(C`Parse::RecDescent\(C'\fR parsers.
	1515	.IP "Error handling" 4
	1516	.IX Item "Error handling"
	1517	The \f(CW\(C`<error>\(C'\fR directive provides automatic or user-defined
	1518	generation of error messages during a parse. In its simplest form
	1519	\&\f(CW\(C`<error>\(C'\fR prepares an error message based on
	1520	the mismatch between the last item expected and the text which cause
	1521	it to fail. For example, given the rule:
	1522	.Sp
	1523	.Vb 3
	1524	\& McCoy: curse ',' name ', I'm a doctor, not a' a_profession '!'
	1525	\& \| pronoun 'dead,' name '!'
	1526	\& \| <error>
	1527	.Ve
	1528	.Sp
	1529	the following strings would produce the following messages:
	1530	.RS 4
	1531	.ie n .IP """Amen, Jim!""" 4
	1532	.el .IP "``Amen, Jim!''" 4
	1533	.IX Item "Amen, Jim!"
	1534	.Vb 2
	1535	\& ERROR (line 1): Invalid McCoy: Expected curse or pronoun
	1536	\& not found
	1537	.Ve
	1538	.ie n .IP """Dammit, Jim, I'm a doctor!""" 4
	1539	.el .IP "``Dammit, Jim, I'm a doctor!''" 4
	1540	.IX Item "Dammit, Jim, I'm a doctor!"
	1541	.Vb 2
	1542	\& ERROR (line 1): Invalid McCoy: Expected ", I'm a doctor, not a"
	1543	\& but found ", I'm a doctor!" instead
	1544	.Ve
	1545	.ie n .IP """He's dead,\en""" 4
	1546	.el .IP "``He's dead,\en''" 4
	1547	.IX Item "He's dead,n"
	1548	.Vb 1
	1549	\& ERROR (line 2): Invalid McCoy: Expected name not found
	1550	.Ve
	1551	.ie n .IP """He's alive!""" 4
	1552	.el .IP "``He's alive!''" 4
	1553	.IX Item "He's alive!"
	1554	.Vb 2
	1555	\& ERROR (line 1): Invalid McCoy: Expected 'dead,' but found
	1556	\& "alive!" instead
	1557	.Ve
	1558	.ie n .IP """Dammit, Jim, I'm a doctor, not a pointy-eared Vulcan!""" 4
	1559	.el .IP "``Dammit, Jim, I'm a doctor, not a pointy-eared Vulcan!''" 4
	1560	.IX Item "Dammit, Jim, I'm a doctor, not a pointy-eared Vulcan!"
	1561	.Vb 2
	1562	\& ERROR (line 1): Invalid McCoy: Expected a profession but found
	1563	\& "pointy-eared Vulcan!" instead
	1564	.Ve
	1565	.RE
	1566	.RS 4
	1567	.Sp
	1568	Note that, when autogenerating error messages, all underscores in any
	1569	rule name used in a message are replaced by single spaces (for example
	1570	\&\(L"a_production\(R" becomes \(L"a production\(R"). Judicious choice of rule
	1571	names can therefore considerably improve the readability of automatic
	1572	error messages (as well as the maintainability of the original
	1573	grammar).
	1574	.Sp
	1575	If the automatically generated error is not sufficient, it is possible to
	1576	provide an explicit message as part of the error directive. For example:
	1577	.Sp
	1578	.Vb 3
	1579	\& Spock: "Fascinating ',' (name \| 'Captain') '.'
	1580	\& \| "Highly illogical, doctor."
	1581	\& \| <error: He never said that!>
	1582	.Ve
	1583	.Sp
	1584	which would result in \fIall\fR failures to parse a \(L"Spock\(R" subrule printing the
	1585	following message:
	1586	.Sp
	1587	.Vb 1
	1588	\& ERROR (line <N>): Invalid Spock: He never said that!
	1589	.Ve
	1590	.Sp
	1591	The error message is treated as a \(L"qq{...}\(R" string and interpolated
	1592	when the error is generated (\fInot\fR when the directive is specified!).
	1593	Hence:
	1594	.Sp
	1595	.Vb 1
	1596	\& <error: Mystical error near "$text">
	1597	.Ve
	1598	.Sp
	1599	would correctly insert the ambient text string which caused the error.
	1600	.Sp
	1601	There are two other forms of error directive: \f(CW\(C`<error?>\(C'\fR and
	1602	\&\f(CW\(C`<error?:\ msg>\(C'\fR. These behave just like \f(CW\(C`<error>\(C'\fR
	1603	and \f(CW\(C`<error:\ msg>\(C'\fR respectively, except that they are
	1604	only triggered if the rule is \(L"committed\(R" at the time they are
	1605	encountered. For example:
	1606	.Sp
	1607	.Vb 3
	1608	\& Scotty: "Ya kenna change the Laws of Phusics," <commit> name
	1609	\& \| name <commit> ',' 'she's goanta blaw!'
	1610	\& \| <error?>
	1611	.Ve
	1612	.Sp
	1613	will only generate an error for a string beginning with \*(L"Ya kenna
	1614	change the Laws o' Phusics,\*(R" or a valid name, but which still fails to match the
	1615	corresponding production. That is, \f(CW\(C`$parser\->Scotty("Aye, Cap'ain")\(C'\fR will
	1616	fail silently (since neither production will \(L"commit\(R" the rule on that
	1617	input), whereas \f(CW\(C`$parser\->Scotty("Mr\ Spock,\ ah\ jest\ kenna\ do'ut!")\(C'\fR
	1618	will fail with the error message:
	1619	.Sp
	1620	.Vb 2
	1621	\& ERROR (line 1): Invalid Scotty: expected 'she's goanta blaw!'
	1622	\& but found 'I jest kenna do'ut!' instead.
	1623	.Ve
	1624	.Sp
	1625	since in that case the second production would commit after matching
	1626	the leading name.
	1627	.Sp
	1628	Note that to allow this behaviour, all \f(CW\(C`<error>\(C'\fR directives which are
	1629	the first item in a production automatically uncommit the rule just
	1630	long enough to allow their production to be attempted (that is, when
	1631	their production fails, the commitment is reinstated so that
	1632	subsequent productions are skipped).
	1633	.Sp
	1634	In order to \fIpermanently\fR uncommit the rule before an error message,
	1635	it is necessary to put an explicit \f(CW\(C`<uncommit>\(C'\fR before the
	1636	\&\f(CW\(C`<error>\(C'\fR. For example:
	1637	.Sp
	1638	.Vb 5
	1639	\& line: 'Kirk:' <commit> Kirk
	1640	\& \| 'Spock:' <commit> Spock
	1641	\& \| 'McCoy:' <commit> McCoy
	1642	\& \| <uncommit> <error?> <reject>
	1643	\& \| <resync>
	1644	.Ve
	1645	.Sp
	1646	Error messages generated by the various \f(CW\(C`<error...>\(C'\fR directives
	1647	are not displayed immediately. Instead, they are \(L"queued\(R" in a buffer and
	1648	are only displayed once parsing ultimately fails. Moreover,
	1649	\&\f(CW\(C`<error...>\(C'\fR directives that cause one production of a rule
	1650	to fail are automatically removed from the message queue
	1651	if another production subsequently causes the entire rule to succeed.
	1652	This means that you can put
	1653	\&\f(CW\(C`<error...>\(C'\fR directives wherever useful diagnosis can be done,
	1654	and only those associated with actual parser failure will ever be
	1655	displayed. Also see \(L"Gotchas\(R".
	1656	.Sp
	1657	As a general rule, the most useful diagnostics are usually generated
	1658	either at the very lowest level within the grammar, or at the very
	1659	highest. A good rule of thumb is to identify those subrules which
	1660	consist mainly (or entirely) of terminals, and then put an
	1661	\&\f(CW\(C`<error...>\(C'\fR directive at the end of any other rule which calls
	1662	one or more of those subrules.
	1663	.Sp
	1664	There is one other situation in which the output of the various types of
	1665	error directive is suppressed; namely, when the rule containing them
	1666	is being parsed as part of a \(L"look\-ahead\(R" (see \(L"Look\-ahead\(R"). In this
	1667	case, the error directive will still cause the rule to fail, but will do
	1668	so silently.
	1669	.Sp
	1670	An unconditional \f(CW\(C`<error>\(C'\fR directive always fails (and hence has no
	1671	associated value). This means that encountering such a directive
	1672	always causes the production containing it to fail. Hence an
	1673	\&\f(CW\(C`<error>\(C'\fR directive will inevitably be the last (useful) item of a
	1674	rule (a level 3 warning is issued if a production contains items after an unconditional
	1675	\&\f(CW\(C`<error>\(C'\fR directive).
	1676	.Sp
	1677	An \f(CW\(C`<error?>\(C'\fR directive will \fIsucceed\fR (that is: fail to fail :\-), if
	1678	the current rule is uncommitted when the directive is encountered. In
	1679	that case the directive's associated value is zero. Hence, this type
	1680	of error directive \fIcan\fR be used before the end of a
	1681	production. For example:
	1682	.Sp
	1683	.Vb 3
	1684	\& command: 'do' <commit> something
	1685	\& \| 'report' <commit> something
	1686	\& \| <error?: Syntax error> <error: Unknown command>
	1687	.Ve
	1688	.Sp
	1689	\&\fBWarning:\fR The \f(CW\(C`<error?>\(C'\fR directive does \fInot\fR mean \*(L"always fail (but
	1690	do so silently unless committed)\*(R". It actually means "only fail (and report) if
	1691	committed, otherwise \fIsucceed\fR\(L". To achieve the \(R"fail silently if uncommitted"
	1692	semantics, it is necessary to use:
	1693	.Sp
	1694	.Vb 2
	1695	\& rule: item <commit> item(s)
	1696	\& \| <error?> <reject> # FAIL SILENTLY UNLESS COMMITTED
	1697	.Ve
	1698	.Sp
	1699	However, because people seem to expect a lone \f(CW\(C`<error?>\(C'\fR directive
	1700	to work like this:
	1701	.Sp
	1702	.Vb 3
	1703	\& rule: item <commit> item(s)
	1704	\& \| <error?: Error message if committed>
	1705	\& \| <error: Error message if uncommitted>
	1706	.Ve
	1707	.Sp
	1708	Parse::RecDescent automatically appends a
	1709	\&\f(CW\(C`<reject>\(C'\fR directive if the \f(CW\(C`<error?>\(C'\fR directive
	1710	is the only item in a production. A level 2 warning (see below)
	1711	is issued when this happens.
	1712	.Sp
	1713	The level of error reporting during both parser construction and
	1714	parsing is controlled by the presence or absence of four global
	1715	variables: \f(CW$::RD_ERRORS\fR, \f(CW$::RD_WARN\fR, \f(CW$::RD_HINT\fR, and
	1716	<$::RD_TRACE>. If \f(CW$::RD_ERRORS\fR is defined (and, by default, it is)
	1717	then fatal errors are reported.
	1718	.Sp
	1719	Whenever \f(CW$::RD_WARN\fR is defined, certain non-fatal problems are also reported.
	1720	Warnings have an associated \(L"level\(R": 1, 2, or 3. The higher the level,
	1721	the more serious the warning. The value of the corresponding global
	1722	variable (\f(CW$::RD_WARN\fR) determines the \fIlowest\fR level of warning to
	1723	be displayed. Hence, to see \fIall\fR warnings, set \f(CW$::RD_WARN\fR to 1.
	1724	To see only the most serious warnings set \f(CW$::RD_WARN\fR to 3.
	1725	By default \f(CW$::RD_WARN\fR is initialized to 3, ensuring that serious but
	1726	non-fatal errors are automatically reported.
	1727	.Sp
	1728	See \fI\(L"\s-1DIAGNOSTICS\s0\(R"\fR for a list of the varous error and warning messages
	1729	that Parse::RecDescent generates when these two variables are defined.
	1730	.Sp
	1731	Defining any of the remaining variables (which are not defined by
	1732	default) further increases the amount of information reported.
	1733	Defining \f(CW$::RD_HINT\fR causes the parser generator to offer
	1734	more detailed analyses and hints on both errors and warnings.
	1735	Note that setting \f(CW$::RD_HINT\fR at any point automagically
	1736	sets \f(CW$::RD_WARN\fR to 1.
	1737	.Sp
	1738	Defining \f(CW$::RD_TRACE\fR causes the parser generator and the parser to
	1739	report their progress to \s-1STDERR\s0 in excruciating detail (although, without hints
	1740	unless \f(CW$::RD_HINT\fR is separately defined). This detail
	1741	can be moderated in only one respect: if \f(CW$::RD_TRACE\fR has an
	1742	integer value (\fIN\fR) greater than 1, only the \fIN\fR characters of
	1743	the \(L"current parsing context\(R" (that is, where in the input string we
	1744	are at any point in the parse) is reported at any time.
	1745	.Sp
	1746	\&\f(CW$::RD_TRACE\fR is mainly useful for debugging a grammar that isn't
	1747	behaving as you expected it to. To this end, if \f(CW$::RD_TRACE\fR is
	1748	defined when a parser is built, any actual parser code which is
	1749	generated is also written to a file named \(L"\s-1RD_TRACE\s0\(R" in the local
	1750	directory.
	1751	.Sp
	1752	Note that the four variables belong to the \(L"main\(R" package, which
	1753	makes them easier to refer to in the code controlling the parser, and
	1754	also makes it easy to turn them into command line flags (\(L"\-RD_ERRORS\(R",
	1755	\&\(L"\-RD_WARN\(R", \(L"\-RD_HINT\(R", \(L"\-RD_TRACE\(R") under \fBperl \-s\fR.
	1756	.RE
	1757	.IP "Specifying local variables" 4
	1758	.IX Item "Specifying local variables"
	1759	It is occasionally convenient to specify variables which are local
	1760	to a single rule. This may be achieved by including a
	1761	\&\f(CW\(C`<rulevar:...>\(C'\fR directive anywhere in the rule. For example:
	1762	.Sp
	1763	.Vb 1
	1764	\& markup: <rulevar: $tag>
	1765	.Ve
	1766	.Sp
	1767	.Vb 1
	1768	\& markup: tag {($tag=$item[1]) =~ s/^<\|>$//g} body[$tag]
	1769	.Ve
	1770	.Sp
	1771	The example \f(CW\(C`<rulevar: $tag>\(C'\fR directive causes a \(L"my\(R" variable named
	1772	\&\f(CW$tag\fR to be declared at the start of the subroutine implementing the
	1773	\&\f(CW\(C`markup\(C'\fR rule (that is, \fIbefore\fR the first production, regardless of
	1774	where in the rule it is specified).
	1775	.Sp
	1776	Specifically, any directive of the form:
	1777	\&\f(CW\(C`<rulevar:\f(CItext\f(CW>\(C'\fR causes a line of the form \f(CW\(C`my \f(CItext\f(CW;\(C'\fR
	1778	to be added at the beginning of the rule subroutine, immediately after
	1779	the definitions of the following local variables:
	1780	.Sp
	1781	.Vb 4
	1782	\& $thisparser $commit
	1783	\& $thisrule @item
	1784	\& $thisline @arg
	1785	\& $text %arg
	1786	.Ve
	1787	.Sp
	1788	This means that the following \f(CW\(C`<rulevar>\(C'\fR directives work
	1789	as expected:
	1790	.Sp
	1791	.Vb 1
	1792	\& <rulevar: $count = 0 >
	1793	.Ve
	1794	.Sp
	1795	.Vb 1
	1796	\& <rulevar: $firstarg = $arg[0] \|\| '' >
	1797	.Ve
	1798	.Sp
	1799	.Vb 1
	1800	\& <rulevar: $myItems = \e@item >
	1801	.Ve
	1802	.Sp
	1803	.Vb 1
	1804	\& <rulevar: @context = ( $thisline, $text, @arg ) >
	1805	.Ve
	1806	.Sp
	1807	.Vb 1
	1808	\& <rulevar: ($name,$age) = $arg{"name","age"} >
	1809	.Ve
	1810	.Sp
	1811	Note however that, because all such variables are \(L"my\(R" variables, their
	1812	values \fIdo not persist\fR between match attempts on a given rule. To
	1813	preserve values between match attempts, values can be stored within the
	1814	\&\(L"local\(R" member of the \f(CW$thisrule\fR object:
	1815	.Sp
	1816	.Vb 6
	1817	\& countedrule: { $thisrule->{"local"}{"count"}++ }
	1818	\& <reject>
	1819	\& \| subrule1
	1820	\& \| subrule2
	1821	\& \| <reject: $thisrule->{"local"}{"count"} == 1>
	1822	\& subrule3
	1823	.Ve
	1824	.Sp
	1825	When matching a rule, each \f(CW\(C`<rulevar>\(C'\fR directive is matched as
	1826	if it were an unconditional \f(CW\(C`<reject>\(C'\fR directive (that is, it
	1827	causes any production in which it appears to immediately fail to match).
	1828	For this reason (and to improve readability) it is usual to specify any
	1829	\&\f(CW\(C`<rulevar>\(C'\fR directive in a separate production at the start of
	1830	the rule (this has the added advantage that it enables
	1831	\&\f(CW\(C`Parse::RecDescent\(C'\fR to optimize away such productions, just as it does
	1832	for the \f(CW\(C`<reject>\(C'\fR directive).
	1833	.IP "Dynamically matched rules" 4
	1834	.IX Item "Dynamically matched rules"
	1835	Because regexes and double-quoted strings are interpolated, it is relatively
	1836	easy to specify productions with \(L"context sensitive\(R" tokens. For example:
	1837	.Sp
	1838	.Vb 1
	1839	\& command: keyword body "end $item[1]"
	1840	.Ve
	1841	.Sp
	1842	which ensures that a command block is bounded by a
	1843	"\fI<keyword>\fR...end \fI<same keyword>\fR" pair.
	1844	.Sp
	1845	Building productions in which subrules are context sensitive is also possible,
	1846	via the \f(CW\(C`<matchrule:...>\(C'\fR directive. This directive behaves
	1847	identically to a subrule item, except that the rule which is invoked to match
	1848	it is determined by the string specified after the colon. For example, we could
	1849	rewrite the \f(CW\(C`command\(C'\fR rule like this:
	1850	.Sp
	1851	.Vb 1
	1852	\& command: keyword <matchrule:body> "end $item[1]"
	1853	.Ve
	1854	.Sp
	1855	Whatever appears after the colon in the directive is treated as an interpolated
	1856	string (that is, as if it appeared in \f(CW\(C`qq{...}\(C'\fR operator) and the value of
	1857	that interpolated string is the name of the subrule to be matched.
	1858	.Sp
	1859	Of course, just putting a constant string like \f(CW\(C`body\(C'\fR in a
	1860	\&\f(CW\(C`<matchrule:...>\(C'\fR directive is of little interest or benefit.
	1861	The power of directive is seen when we use a string that interpolates
	1862	to something interesting. For example:
	1863	.Sp
	1864	.Vb 1
	1865	\& command: keyword <matchrule:$item[1]_body> "end $item[1]"
	1866	.Ve
	1867	.Sp
	1868	.Vb 1
	1869	\& keyword: 'while' \| 'if' \| 'function'
	1870	.Ve
	1871	.Sp
	1872	.Vb 1
	1873	\& while_body: condition block
	1874	.Ve
	1875	.Sp
	1876	.Vb 1
	1877	\& if_body: condition block ('else' block)(?)
	1878	.Ve
	1879	.Sp
	1880	.Vb 1
	1881	\& function_body: arglist block
	1882	.Ve
	1883	.Sp
	1884	Now the \f(CW\(C`command\(C'\fR rule selects how to proceed on the basis of the keyword
	1885	that is found. It is as if \f(CW\(C`command\(C'\fR were declared:
	1886	.Sp
	1887	.Vb 3
	1888	\& command: 'while' while_body "end while"
	1889	\& \| 'if' if_body "end if"
	1890	\& \| 'function' function_body "end function"
	1891	.Ve
	1892	.Sp
	1893	When a \f(CW\(C`<matchrule:...>\(C'\fR directive is used as a repeated
	1894	subrule, the rule name expression is \(L"late\-bound\(R". That is, the name of
	1895	the rule to be called is re-evaluated \fIeach time\fR a match attempt is
	1896	made. Hence, the following grammar:
	1897	.Sp
	1898	.Vb 1
	1899	\& { $::species = 'dogs' }
	1900	.Ve
	1901	.Sp
	1902	.Vb 1
	1903	\& pair: 'two' <matchrule:$::species>(s)
	1904	.Ve
	1905	.Sp
	1906	.Vb 1
	1907	\& dogs: /dogs/ { $::species = 'cats' }
	1908	.Ve
	1909	.Sp
	1910	.Vb 1
	1911	\& cats: /cats/
	1912	.Ve
	1913	.Sp
	1914	will match the string \(L"two dogs cats cats\(R" completely, whereas it will
	1915	only match the string \(L"two dogs dogs dogs\(R" up to the eighth letter. If
	1916	the rule name were \(L"early bound\(R" (that is, evaluated only the first
	1917	time the directive is encountered in a production), the reverse
	1918	behaviour would be expected.
	1919	.IP "Deferred actions" 4
	1920	.IX Item "Deferred actions"
	1921	The \f(CW\(C`<defer:...>\(C'\fR directive is used to specify an action to be
	1922	performed when (and only if!) the current production ultimately succeeds.
	1923	.Sp
	1924	Whenever a \f(CW\(C`<defer:...>\(C'\fR directive appears, the code it specifies
	1925	is converted to a closure (an anonymous subroutine reference) which is
	1926	queued within the active parser object. Note that,
	1927	because the deferred code is converted to a closure, the values of any
	1928	\&\(L"local\(R" variable (such as \f(CW$text\fR, <@item>, etc.) are preserved
	1929	until the deferred code is actually executed.
	1930	.Sp
	1931	If the parse ultimately succeeds
	1932	\&\fIand\fR the production in which the \f(CW\(C`<defer:...>\(C'\fR directive was
	1933	evaluated formed part of the successful parse, then the deferred code is
	1934	executed immediately before the parse returns. If however the production
	1935	which queued a deferred action fails, or one of the higher-level
	1936	rules which called that production fails, then the deferred action is
	1937	removed from the queue, and hence is never executed.
	1938	.Sp
	1939	For example, given the grammar:
	1940	.Sp
	1941	.Vb 2
	1942	\& sentence: noun trans noun
	1943	\& \| noun intrans
	1944	.Ve
	1945	.Sp
	1946	.Vb 4
	1947	\& noun: 'the dog'
	1948	\& { print "$item[1]\et(noun)\en" }
	1949	\& \| 'the meat'
	1950	\& { print "$item[1]\et(noun)\en" }
	1951	.Ve
	1952	.Sp
	1953	.Vb 2
	1954	\& trans: 'ate'
	1955	\& { print "$item[1]\et(transitive)\en" }
	1956	.Ve
	1957	.Sp
	1958	.Vb 4
	1959	\& intrans: 'ate'
	1960	\& { print "$item[1]\et(intransitive)\en" }
	1961	\& \| 'barked'
	1962	\& { print "$item[1]\et(intransitive)\en" }
	1963	.Ve
	1964	.Sp
	1965	then parsing the sentence \f(CW"the dog ate"\fR would produce the output:
	1966	.Sp
	1967	.Vb 4
	1968	\& the dog (noun)
	1969	\& ate (transitive)
	1970	\& the dog (noun)
	1971	\& ate (intransitive)
	1972	.Ve
	1973	.Sp
	1974	This is because, even though the first production of \f(CW\(C`sentence\(C'\fR
	1975	ultimately fails, its initial subrules \f(CW\(C`noun\(C'\fR and \f(CW\(C`trans\(C'\fR do match,
	1976	and hence they execute their associated actions.
	1977	Then the second production of \f(CW\(C`sentence\(C'\fR succeeds, causing the
	1978	actions of the subrules \f(CW\(C`noun\(C'\fR and \f(CW\(C`intrans\(C'\fR to be executed as well.
	1979	.Sp
	1980	On the other hand, if the actions were replaced by \f(CW\(C`<defer:...>\(C'\fR
	1981	directives:
	1982	.Sp
	1983	.Vb 2
	1984	\& sentence: noun trans noun
	1985	\& \| noun intrans
	1986	.Ve
	1987	.Sp
	1988	.Vb 4
	1989	\& noun: 'the dog'
	1990	\& <defer: print "$item[1]\et(noun)\en" >
	1991	\& \| 'the meat'
	1992	\& <defer: print "$item[1]\et(noun)\en" >
	1993	.Ve
	1994	.Sp
	1995	.Vb 2
	1996	\& trans: 'ate'
	1997	\& <defer: print "$item[1]\et(transitive)\en" >
	1998	.Ve
	1999	.Sp
	2000	.Vb 4
	2001	\& intrans: 'ate'
	2002	\& <defer: print "$item[1]\et(intransitive)\en" >
	2003	\& \| 'barked'
	2004	\& <defer: print "$item[1]\et(intransitive)\en" >
	2005	.Ve
	2006	.Sp
	2007	the output would be:
	2008	.Sp
	2009	.Vb 2
	2010	\& the dog (noun)
	2011	\& ate (intransitive)
	2012	.Ve
	2013	.Sp
	2014	since deferred actions are only executed if they were evaluated in
	2015	a production which ultimately contributes to the successful parse.
	2016	.Sp
	2017	In this case, even though the first production of \f(CW\(C`sentence\(C'\fR caused
	2018	the subrules \f(CW\(C`noun\(C'\fR and \f(CW\(C`trans\(C'\fR to match, that production ultimately
	2019	failed and so the deferred actions queued by those subrules were subsequently
	2020	disgarded. The second production then succeeded, causing the entire
	2021	parse to succeed, and so the deferred actions queued by the (second) match of
	2022	the \f(CW\(C`noun\(C'\fR subrule and the subsequent match of \f(CW\(C`intrans\(C'\fR \fIare\fR preserved and
	2023	eventually executed.
	2024	.Sp
	2025	Deferred actions provide a means of improving the performance of a parser,
	2026	by only executing those actions which are part of the final parse-tree
	2027	for the input data.
	2028	.Sp
	2029	Alternatively, deferred actions can be viewed as a mechanism for building
	2030	(and executing) a
	2031	customized subroutine corresponding to the given input data, much in the
	2032	same way that autoactions (see \(L"Autoactions\(R") can be used to build a
	2033	customized data structure for specific input.
	2034	.Sp
	2035	Whether or not the action it specifies is ever executed,
	2036	a \f(CW\(C`<defer:...>\(C'\fR directive always succeeds, returning the
	2037	number of deferred actions currently queued at that point.
	2038	.IP "Parsing Perl" 4
	2039	.IX Item "Parsing Perl"
	2040	Parse::RecDescent provides limited support for parsing subsets of Perl,
	2041	namely: quote-like operators, Perl variables, and complete code blocks.
	2042	.Sp
	2043	The \f(CW\(C`<perl_quotelike>\(C'\fR directive can be used to parse any Perl
	2044	quote-like operator: \f(CW'a string'\fR, \f(CW\(C`m/a pattern/\(C'\fR, \f(CW\(C`tr{ans}{lation}\(C'\fR,
	2045	etc. It does this by calling \fIText::Balanced::quotelike()\fR.
	2046	.Sp
	2047	If a quote-like operator is found, a reference to an array of eight elements
	2048	is returned. Those elements are identical to the last eight elements returned
	2049	by \fIText::Balanced::extract_quotelike()\fR in an array context, namely:
	2050	.RS 4
	2051	.IP "[0]" 4
	2052	.IX Item "[0]"
	2053	the name of the quotelike operator \(-- 'q', 'qq', 'm', 's', 'tr' \(-- if the
	2054	operator was named; otherwise \f(CW\(C`undef\(C'\fR,
	2055	.IP "[1]" 4
	2056	.IX Item "[1]"
	2057	the left delimiter of the first block of the operation,
	2058	.IP "[2]" 4
	2059	.IX Item "[2]"
	2060	the text of the first block of the operation
	2061	(that is, the contents of
	2062	a quote, the regex of a match, or substitution or the target list of a
	2063	translation),
	2064	.IP "[3]" 4
	2065	.IX Item "[3]"
	2066	the right delimiter of the first block of the operation,
	2067	.IP "[4]" 4
	2068	.IX Item "[4]"
	2069	the left delimiter of the second block of the operation if there is one
	2070	(that is, if it is a \f(CW\(C`s\(C'\fR, \f(CW\(C`tr\(C'\fR, or \f(CW\(C`y\(C'\fR); otherwise \f(CW\(C`undef\(C'\fR,
	2071	.IP "[5]" 4
	2072	.IX Item "[5]"
	2073	the text of the second block of the operation if there is one
	2074	(that is, the replacement of a substitution or the translation list
	2075	of a translation); otherwise \f(CW\(C`undef\(C'\fR,
	2076	.IP "[6]" 4
	2077	.IX Item "[6]"
	2078	the right delimiter of the second block of the operation (if any);
	2079	otherwise \f(CW\(C`undef\(C'\fR,
	2080	.IP "[7]" 4
	2081	.IX Item "[7]"
	2082	the trailing modifiers on the operation (if any); otherwise \f(CW\(C`undef\(C'\fR.
	2083	.RE
	2084	.RS 4
	2085	.Sp
	2086	If a quote-like expression is not found, the directive fails with the usual
	2087	\&\f(CW\(C`undef\(C'\fR value.
	2088	.Sp
	2089	The \f(CW\(C`<perl_variable>\(C'\fR directive can be used to parse any Perl
	2090	variable: \f(CW$scalar\fR, \f(CW@array\fR, \f(CW%hash\fR, \f(CW$ref\fR\->{field}[$index], etc.
	2091	It does this by calling \fIText::Balanced::extract_variable()\fR.
	2092	.Sp
	2093	If the directive matches text representing a valid Perl variable
	2094	specification, it returns that text. Otherwise it fails with the usual
	2095	\&\f(CW\(C`undef\(C'\fR value.
	2096	.Sp
	2097	The \f(CW\(C`<perl_codeblock>\(C'\fR directive can be used to parse curly-brace-delimited block of Perl code, such as: { \f(CW$a\fR = 1; f() =~ m/pat/; }.
	2098	It does this by calling \fIText::Balanced::extract_codeblock()\fR.
	2099	.Sp
	2100	If the directive matches text representing a valid Perl code block,
	2101	it returns that text. Otherwise it fails with the usual \f(CW\(C`undef\(C'\fR value.
	2102	.RE
	2103	.IP "Constructing tokens" 4
	2104	.IX Item "Constructing tokens"
	2105	Eventually, Parse::RecDescent will be able to parse tokenized input, as
	2106	well as ordinary strings. In preparation for this joyous day, the
	2107	\&\f(CW\(C`<token:...>\(C'\fR directive has been provided.
	2108	This directive creates a token which will be suitable for
	2109	input to a Parse::RecDescent parser (when it eventually supports
	2110	tokenized input).
	2111	.Sp
	2112	The text of the token is the value of the
	2113	immediately preceding item in the production. A
	2114	\&\f(CW\(C`<token:...>\(C'\fR directive always succeeds with a return
	2115	value which is the hash reference that is the new token. It also
	2116	sets the return value for the production to that hash ref.
	2117	.Sp
	2118	The \f(CW\(C`<token:...>\(C'\fR directive makes it easy to build
	2119	a Parse::RecDescent\-compatible lexer in Parse::RecDescent:
	2120	.Sp
	2121	.Vb 3
	2122	\& my $lexer = new Parse::RecDescent q
	2123	\& {
	2124	\& lex: token(s)
	2125	.Ve
	2126	.Sp
	2127	.Vb 5
	2128	\& token: /a\eb/ <token:INDEF>
	2129	\& \| /the\eb/ <token:DEF>
	2130	\& \| /fly\eb/ <token:NOUN,VERB>
	2131	\& \| /[a-z]+/i { lc $item[1] } <token:ALPHA>
	2132	\& \| <error: Unknown token>
	2133	.Ve
	2134	.Sp
	2135	.Vb 1
	2136	\& };
	2137	.Ve
	2138	.Sp
	2139	which will eventually be able to be used with a regular Parse::RecDescent
	2140	grammar:
	2141	.Sp
	2142	.Vb 3
	2143	\& my $parser = new Parse::RecDescent q
	2144	\& {
	2145	\& startrule: subrule1 subrule 2
	2146	.Ve
	2147	.Sp
	2148	.Vb 2
	2149	\& # ETC...
	2150	\& };
	2151	.Ve
	2152	.Sp
	2153	either with a pre-lexing phase:
	2154	.Sp
	2155	.Vb 1
	2156	\& $parser->startrule( $lexer->lex($data) );
	2157	.Ve
	2158	.Sp
	2159	or with a lex-on-demand approach:
	2160	.Sp
	2161	.Vb 1
	2162	\& $parser->startrule( sub{$lexer->token(\e$data)} );
	2163	.Ve
	2164	.Sp
	2165	But at present, only the \f(CW\(C`<token:...>\(C'\fR directive is
	2166	actually implemented. The rest is vapourware.
	2167	.IP "Specifying operations" 4
	2168	.IX Item "Specifying operations"
	2169	One of the commonest requirements when building a parser is to specify
	2170	binary operators. Unfortunately, in a normal grammar, the rules for
	2171	such things are awkward:
	2172	.Sp
	2173	.Vb 2
	2174	\& disjunction: conjunction ('or' conjunction)(s?)
	2175	\& { $return = [ $item[1], @{$item[2]} ] }
	2176	.Ve
	2177	.Sp
	2178	.Vb 2
	2179	\& conjunction: atom ('and' atom)(s?)
	2180	\& { $return = [ $item[1], @{$item[2]} ] }
	2181	.Ve
	2182	.Sp
	2183	or inefficient:
	2184	.Sp
	2185	.Vb 4
	2186	\& disjunction: conjunction 'or' disjunction
	2187	\& { $return = [ $item[1], @{$item[2]} ] }
	2188	\& \| conjunction
	2189	\& { $return = [ $item[1] ] }
	2190	.Ve
	2191	.Sp
	2192	.Vb 4
	2193	\& conjunction: atom 'and' conjunction
	2194	\& { $return = [ $item[1], @{$item[2]} ] }
	2195	\& \| atom
	2196	\& { $return = [ $item[1] ] }
	2197	.Ve
	2198	.Sp
	2199	and either way is ugly and hard to get right.
	2200	.Sp
	2201	The \f(CW\(C`<leftop:...>\(C'\fR and \f(CW\(C`<rightop:...>\(C'\fR directives provide an
	2202	easier way of specifying such operations. Using \f(CW\(C`<leftop:...>\(C'\fR the
	2203	above examples become:
	2204	.Sp
	2205	.Vb 2
	2206	\& disjunction: <leftop: conjunction 'or' conjunction>
	2207	\& conjunction: <leftop: atom 'and' atom>
	2208	.Ve
	2209	.Sp
	2210	The \f(CW\(C`<leftop:...>\(C'\fR directive specifies a left-associative binary operator.
	2211	It is specified around three other grammar elements
	2212	(typically subrules or terminals), which match the left operand,
	2213	the operator itself, and the right operand respectively.
	2214	.Sp
	2215	A \f(CW\(C`<leftop:...>\(C'\fR directive such as:
	2216	.Sp
	2217	.Vb 1
	2218	\& disjunction: <leftop: conjunction 'or' conjunction>
	2219	.Ve
	2220	.Sp
	2221	is converted to the following:
	2222	.Sp
	2223	.Vb 2
	2224	\& disjunction: ( conjunction ('or' conjunction)(s?)
	2225	\& { $return = [ $item[1], @{$item[2]} ] } )
	2226	.Ve
	2227	.Sp
	2228	In other words, a \f(CW\(C`<leftop:...>\(C'\fR directive matches the left operand followed by zero
	2229	or more repetitions of both the operator and the right operand. It then
	2230	flattens the matched items into an anonymous array which becomes the
	2231	(single) value of the entire \f(CW\(C`<leftop:...>\(C'\fR directive.
	2232	.Sp
	2233	For example, an \f(CW\(C`<leftop:...>\(C'\fR directive such as:
	2234	.Sp
	2235	.Vb 1
	2236	\& output: <leftop: ident '<<' expr >
	2237	.Ve
	2238	.Sp
	2239	when given a string such as:
	2240	.Sp
	2241	.Vb 1
	2242	\& cout << var << "str" << 3
	2243	.Ve
	2244	.Sp
	2245	would match, and \f(CW$item[1]\fR would be set to:
	2246	.Sp
	2247	.Vb 1
	2248	\& [ 'cout', 'var', '"str"', '3' ]
	2249	.Ve
	2250	.Sp
	2251	In other words:
	2252	.Sp
	2253	.Vb 1
	2254	\& output: <leftop: ident '<<' expr >
	2255	.Ve
	2256	.Sp
	2257	is equivalent to a left-associative operator:
	2258	.Sp
	2259	.Vb 5
	2260	\& output: ident { $return = [$item[1]] }
	2261	\& \| ident '<<' expr { $return = [@item[1,3]] }
	2262	\& \| ident '<<' expr '<<' expr { $return = [@item[1,3,5]] }
	2263	\& \| ident '<<' expr '<<' expr '<<' expr { $return = [@item[1,3,5,7]] }
	2264	\& # ...etc...
	2265	.Ve
	2266	.Sp
	2267	Similarly, the \f(CW\(C`<rightop:...>\(C'\fR directive takes a left operand, an operator, and a right operand:
	2268	.Sp
	2269	.Vb 1
	2270	\& assign: <rightop: var '=' expr >
	2271	.Ve
	2272	.Sp
	2273	and converts them to:
	2274	.Sp
	2275	.Vb 2
	2276	\& assign: ( (var '=' {$return=$item[1]})(s?) expr
	2277	\& { $return = [ @{$item[1]}, $item[2] ] } )
	2278	.Ve
	2279	.Sp
	2280	which is equivalent to a right-associative operator:
	2281	.Sp
	2282	.Vb 5
	2283	\& assign: var { $return = [$item[1]] }
	2284	\& \| var '=' expr { $return = [@item[1,3]] }
	2285	\& \| var '=' var '=' expr { $return = [@item[1,3,5]] }
	2286	\& \| var '=' var '=' var '=' expr { $return = [@item[1,3,5,7]] }
	2287	\& # ...etc...
	2288	.Ve
	2289	.Sp
	2290	Note that for both the \f(CW\(C`<leftop:...>\(C'\fR and \f(CW\(C`<rightop:...>\(C'\fR directives, the directive does not normally
	2291	return the operator itself, just a list of the operands involved. This is
	2292	particularly handy for specifying lists:
	2293	.Sp
	2294	.Vb 2
	2295	\& list: '(' <leftop: list_item ',' list_item> ')'
	2296	\& { $return = $item[2] }
	2297	.Ve
	2298	.Sp
	2299	There is, however, a problem: sometimes the operator is itself significant.
	2300	For example, in a Perl list a comma and a \f(CW\(C`=>\(C'\fR are both
	2301	valid separators, but the \f(CW\(C`=>\(C'\fR has additional stringification semantics.
	2302	Hence it's important to know which was used in each case.
	2303	.Sp
	2304	To solve this problem the
	2305	\&\f(CW\(C`<leftop:...>\(C'\fR and \f(CW\(C`<rightop:...>\(C'\fR directives
	2306	\&\fIdo\fR return the operator(s) as well, under two circumstances.
	2307	The first case is where the operator is specified as a subrule. In that instance,
	2308	whatever the operator matches is returned (on the assumption that if the operator
	2309	is important enough to have its own subrule, then it's important enough to return).
	2310	.Sp
	2311	The second case is where the operator is specified as a regular
	2312	expression. In that case, if the first bracketed subpattern of the
	2313	regular expression matches, that matching value is returned (this is analogous to
	2314	the behaviour of the Perl \f(CW\(C`split\(C'\fR function, except that only the first subpattern
	2315	is returned).
	2316	.Sp
	2317	In other words, given the input:
	2318	.Sp
	2319	.Vb 1
	2320	\& ( a=>1, b=>2 )
	2321	.Ve
	2322	.Sp
	2323	the specifications:
	2324	.Sp
	2325	.Vb 1
	2326	\& list: '(' <leftop: list_item separator list_item> ')'
	2327	.Ve
	2328	.Sp
	2329	.Vb 1
	2330	\& separator: ',' \| '=>'
	2331	.Ve
	2332	.Sp
	2333	or:
	2334	.Sp
	2335	.Vb 1
	2336	\& list: '(' <leftop: list_item /(,\|=>)/ list_item> ')'
	2337	.Ve
	2338	.Sp
	2339	cause the list separators to be interleaved with the operands in the
	2340	anonymous array in \f(CW$item[2]\fR:
	2341	.Sp
	2342	.Vb 1
	2343	\& [ 'a', '=>', '1', ',', 'b', '=>', '2' ]
	2344	.Ve
	2345	.Sp
	2346	But the following version:
	2347	.Sp
	2348	.Vb 1
	2349	\& list: '(' <leftop: list_item /,\|=>/ list_item> ')'
	2350	.Ve
	2351	.Sp
	2352	returns only the operators:
	2353	.Sp
	2354	.Vb 1
	2355	\& [ 'a', '1', 'b', '2' ]
	2356	.Ve
	2357	.Sp
	2358	Of course, none of the above specifications handle the case of an empty
	2359	list, since the \f(CW\(C`<leftop:...>\(C'\fR and \f(CW\(C`<rightop:...>\(C'\fR directives
	2360	require at least a single right or left operand to match. To specify
	2361	that the operator can match \(L"trivially\(R",
	2362	it's necessary to add a \f(CW\(C`(?)\(C'\fR qualifier to the directive:
	2363	.Sp
	2364	.Vb 1
	2365	\& list: '(' <leftop: list_item /(,\|=>)/ list_item>(?) ')'
	2366	.Ve
	2367	.Sp
	2368	Note that in almost all the above examples, the first and third arguments
	2369	of the \f(CW\(C`<leftop:...>\(C'\fR directive were the same subrule. That is because
	2370	\&\f(CW\(C`<leftop:...>\(C'\fR's are frequently used to specify \(L"separated\(R" lists of the
	2371	same type of item. To make such lists easier to specify, the following
	2372	syntax:
	2373	.Sp
	2374	.Vb 1
	2375	\& list: element(s /,/)
	2376	.Ve
	2377	.Sp
	2378	is exactly equivalent to:
	2379	.Sp
	2380	.Vb 1
	2381	\& list: <leftop: element /,/ element>
	2382	.Ve
	2383	.Sp
	2384	Note that the separator must be specified as a raw pattern (i.e.
	2385	not a string or subrule).
	2386	.IP "Scored productions" 4
	2387	.IX Item "Scored productions"
	2388	By default, Parse::RecDescent grammar rules always accept the first
	2389	production that matches the input. But if two or more productions may
	2390	potentially match the same input, choosing the first that does so may
	2391	not be optimal.
	2392	.Sp
	2393	For example, if you were parsing the sentence \(L"time flies like an arrow\(R",
	2394	you might use a rule like this:
	2395	.Sp
	2396	.Vb 3
	2397	\& sentence: verb noun preposition article noun { [@item] }
	2398	\& \| adjective noun verb article noun { [@item] }
	2399	\& \| noun verb preposition article noun { [@item] }
	2400	.Ve
	2401	.Sp
	2402	Each of these productions matches the sentence, but the third one
	2403	is the most likely interpretation. However, if the sentence had been
	2404	\&\(L"fruit flies like a banana\(R", then the second production is probably
	2405	the right match.
	2406	.Sp
	2407	To cater for such situtations, the \f(CW\(C`<score:...>\(C'\fR can be used.
	2408	The directive is equivalent to an unconditional \f(CW\(C`<reject>\(C'\fR,
	2409	except that it allows you to specify a \(L"score\(R" for the current
	2410	production. If that score is numerically greater than the best
	2411	score of any preceding production, the current production is cached for later
	2412	consideration. If no later production matches, then the cached
	2413	production is treated as having matched, and the value of the
	2414	item immediately before its \f(CW\(C`<score:...>\(C'\fR directive is returned as the
	2415	result.
	2416	.Sp
	2417	In other words, by putting a \f(CW\(C`<score:...>\(C'\fR directive at the end of
	2418	each production, you can select which production matches using
	2419	criteria other than specification order. For example:
	2420	.Sp
	2421	.Vb 3
	2422	\& sentence: verb noun preposition article noun { [@item] } <score: sensible(@item)>
	2423	\& \| adjective noun verb article noun { [@item] } <score: sensible(@item)>
	2424	\& \| noun verb preposition article noun { [@item] } <score: sensible(@item)>
	2425	.Ve
	2426	.Sp
	2427	Now, when each production reaches its respective \f(CW\(C`<score:...>\(C'\fR
	2428	directive, the subroutine \f(CW\(C`sensible\(C'\fR will be called to evaluate the
	2429	matched items (somehow). Once all productions have been tried, the
	2430	one which \f(CW\(C`sensible\(C'\fR scored most highly will be the one that is
	2431	accepted as a match for the rule.
	2432	.Sp
	2433	The variable \f(CW$score\fR always holds the current best score of any production,
	2434	and the variable \f(CW$score_return\fR holds the corresponding return value.
	2435	.Sp
	2436	As another example, the following grammar matches lines that may be
	2437	separated by commas, colons, or semi\-colons. This can be tricky if
	2438	a colon-separated line also contains commas, or vice versa. The grammar
	2439	resolves the ambiguity by selecting the rule that results in the
	2440	fewest fields:
	2441	.Sp
	2442	.Vb 3
	2443	\& line: seplist[sep=>','] <score: -@{$item[1]}>
	2444	\& \| seplist[sep=>':'] <score: -@{$item[1]}>
	2445	\& \| seplist[sep=>" "] <score: -@{$item[1]}>
	2446	.Ve
	2447	.Sp
	2448	.Vb 1
	2449	\& seplist: <skip:""> <leftop: /[^$arg{sep}]/ "$arg{sep}" /[^$arg{sep}]/>
	2450	.Ve
	2451	.Sp
	2452	Note the use of negation within the \f(CW\(C`<score:...>\(C'\fR directive
	2453	to ensure that the seplist with the most items gets the lowest score.
	2454	.Sp
	2455	As the above examples indicate, it is often the case that all productions
	2456	in a rule use exactly the same \f(CW\(C`<score:...>\(C'\fR directive. It is
	2457	tedious to have to repeat this identical directive in every production, so
	2458	Parse::RecDescent also provides the \f(CW\(C`<autoscore:...>\(C'\fR directive.
	2459	.Sp
	2460	If an \f(CW\(C`<autoscore:...>\(C'\fR directive appears in any
	2461	production of a rule, the code it specifies is used as the scoring
	2462	code for every production of that rule, except productions that already
	2463	end with an explicit \f(CW\(C`<score:...>\(C'\fR directive. Thus the rules above could
	2464	be rewritten:
	2465	.Sp
	2466	.Vb 4
	2467	\& line: <autoscore: -@{$item[1]}>
	2468	\& line: seplist[sep=>',']
	2469	\& \| seplist[sep=>':']
	2470	\& \| seplist[sep=>" "]
	2471	.Ve
	2472	.Sp
	2473	.Vb 4
	2474	\& sentence: <autoscore: sensible(@item)>
	2475	\& \| verb noun preposition article noun { [@item] }
	2476	\& \| adjective noun verb article noun { [@item] }
	2477	\& \| noun verb preposition article noun { [@item] }
	2478	.Ve
	2479	.Sp
	2480	Note that the \f(CW\(C`<autoscore:...>\(C'\fR directive itself acts as an
	2481	unconditional \f(CW\(C`<reject>\(C'\fR, and (like the \f(CW\(C`<rulevar:...>\(C'\fR
	2482	directive) is pruned at compile-time wherever possible.
	2483	.IP "Dispensing with grammar checks" 4
	2484	.IX Item "Dispensing with grammar checks"
	2485	During the compilation phase of parser construction, Parse::RecDescent performs
	2486	a small number of checks on the grammar it's given. Specifically it checks that
	2487	the grammar is not left\-recursive, that there are no \(L"insatiable\(R" constructs of
	2488	the form:
	2489	.Sp
	2490	.Vb 1
	2491	\& rule: subrule(s) subrule
	2492	.Ve
	2493	.Sp
	2494	and that there are no rules missing (i.e. referred to, but never defined).
	2495	.Sp
	2496	These checks are important during development, but can slow down parser
	2497	construction in stable code. So Parse::RecDescent provides the
	2498	<nocheck> directive to turn them off. The directive can only appear
	2499	before the first rule definition, and switches off checking throughout the rest
	2500	of the current grammar.
	2501	.Sp
	2502	Typically, this directive would be added when a parser has been thoroughly
	2503	tested and is ready for release.
	2504	.Sh "Subrule argument lists"
	2505	.IX Subsection "Subrule argument lists"
	2506	It is occasionally useful to pass data to a subrule which is being invoked. For
	2507	example, consider the following grammar fragment:
	2508	.PP
	2509	.Vb 1
	2510	\& classdecl: keyword decl
	2511	.Ve
	2512	.PP
	2513	.Vb 1
	2514	\& keyword: 'struct' \| 'class';
	2515	.Ve
	2516	.PP
	2517	.Vb 1
	2518	\& decl: # WHATEVER
	2519	.Ve
	2520	.PP
	2521	The \f(CW\(C`decl\(C'\fR rule might wish to know which of the two keywords was used
	2522	(since it may affect some aspect of the way the subsequent declaration
	2523	is interpreted). \f(CW\(C`Parse::RecDescent\(C'\fR allows the grammar designer to
	2524	pass data into a rule, by placing that data in an \fIargument list\fR
	2525	(that is, in square brackets) immediately after any subrule item in a
	2526	production. Hence, we could pass the keyword to \f(CW\(C`decl\(C'\fR as follows:
	2527	.PP
	2528	.Vb 1
	2529	\& classdecl: keyword decl[ $item[1] ]
	2530	.Ve
	2531	.PP
	2532	.Vb 1
	2533	\& keyword: 'struct' \| 'class';
	2534	.Ve
	2535	.PP
	2536	.Vb 1
	2537	\& decl: # WHATEVER
	2538	.Ve
	2539	.PP
	2540	The argument list can consist of any number (including zero!) of comma-separated
	2541	Perl expressions. In other words, it looks exactly like a Perl anonymous
	2542	array reference. For example, we could pass the keyword, the name of the
	2543	surrounding rule, and the literal 'keyword' to \f(CW\(C`decl\(C'\fR like so:
	2544	.PP
	2545	.Vb 1
	2546	\& classdecl: keyword decl[$item[1],$item[0],'keyword']
	2547	.Ve
	2548	.PP
	2549	.Vb 1
	2550	\& keyword: 'struct' \| 'class';
	2551	.Ve
	2552	.PP
	2553	.Vb 1
	2554	\& decl: # WHATEVER
	2555	.Ve
	2556	.PP
	2557	Within the rule to which the data is passed (\f(CW\(C`decl\(C'\fR in the above examples)
	2558	that data is available as the elements of a local variable \f(CW@arg\fR. Hence
	2559	\&\f(CW\(C`decl\(C'\fR might report its intentions as follows:
	2560	.PP
	2561	.Vb 1
	2562	\& classdecl: keyword decl[$item[1],$item[0],'keyword']
	2563	.Ve
	2564	.PP
	2565	.Vb 1
	2566	\& keyword: 'struct' \| 'class';
	2567	.Ve
	2568	.PP
	2569	.Vb 2
	2570	\& decl: { print "Declaring $arg[0] (a $arg[2])\en";
	2571	\& print "(this rule called by $arg[1])" }
	2572	.Ve
	2573	.PP
	2574	Subrule argument lists can also be interpreted as hashes, simply by using
	2575	the local variable \f(CW%arg\fR instead of \f(CW@arg\fR. Hence we could rewrite the
	2576	previous example:
	2577	.PP
	2578	.Vb 3
	2579	\& classdecl: keyword decl[keyword => $item[1],
	2580	\& caller => $item[0],
	2581	\& type => 'keyword']
	2582	.Ve
	2583	.PP
	2584	.Vb 1
	2585	\& keyword: 'struct' \| 'class';
	2586	.Ve
	2587	.PP
	2588	.Vb 2
	2589	\& decl: { print "Declaring $arg{keyword} (a $arg{type})\en";
	2590	\& print "(this rule called by $arg{caller})" }
	2591	.Ve
	2592	.PP
	2593	Both \f(CW@arg\fR and \f(CW%arg\fR are always available, so the grammar designer may
	2594	choose whichever convention (or combination of conventions) suits best.
	2595	.PP
	2596	Subrule argument lists are also useful for creating \(L"rule templates\(R"
	2597	(especially when used in conjunction with the \f(CW\(C`<matchrule:...>\(C'\fR
	2598	directive). For example, the subrule:
	2599	.PP
	2600	.Vb 4
	2601	\& list: <matchrule:$arg{rule}> /$arg{sep}/ list[%arg]
	2602	\& { $return = [ $item[1], @{$item[3]} ] }
	2603	\& \| <matchrule:$arg{rule}>
	2604	\& { $return = [ $item[1]] }
	2605	.Ve
	2606	.PP
	2607	is a handy template for the common problem of matching a separated list.
	2608	For example:
	2609	.PP
	2610	.Vb 1
	2611	\& function: 'func' name '(' list[rule=>'param',sep=>';'] ')'
	2612	.Ve
	2613	.PP
	2614	.Vb 1
	2615	\& param: list[rule=>'name',sep=>','] ':' typename
	2616	.Ve
	2617	.PP
	2618	.Vb 1
	2619	\& name: /\ew+/
	2620	.Ve
	2621	.PP
	2622	.Vb 1
	2623	\& typename: name
	2624	.Ve
	2625	.PP
	2626	When a subrule argument list is used with a repeated subrule, the argument list
	2627	goes \fIbefore\fR the repetition specifier:
	2628	.PP
	2629	.Vb 1
	2630	\& list: /some\|many/ thing[ $item[1] ](s)
	2631	.Ve
	2632	.PP
	2633	The argument list is \(L"late bound\(R". That is, it is re-evaluated for every
	2634	repetition of the repeated subrule.
	2635	This means that each repeated attempt to match the subrule may be
	2636	passed a completely different set of arguments if the value of the
	2637	expression in the argument list changes between attempts. So, for
	2638	example, the grammar:
	2639	.PP
	2640	.Vb 1
	2641	\& { $::species = 'dogs' }
	2642	.Ve
	2643	.PP
	2644	.Vb 1
	2645	\& pair: 'two' animal[$::species](s)
	2646	.Ve
	2647	.PP
	2648	.Vb 1
	2649	\& animal: /$arg[0]/ { $::species = 'cats' }
	2650	.Ve
	2651	.PP
	2652	will match the string \(L"two dogs cats cats\(R" completely, whereas
	2653	it will only match the string \(L"two dogs dogs dogs\(R" up to the
	2654	eighth letter. If the value of the argument list were \(L"early bound\(R"
	2655	(that is, evaluated only the first time a repeated subrule match is
	2656	attempted), one would expect the matching behaviours to be reversed.
	2657	.PP
	2658	Of course, it is possible to effectively \(L"early bind\(R" such argument lists
	2659	by passing them a value which does not change on each repetition. For example:
	2660	.PP
	2661	.Vb 1
	2662	\& { $::species = 'dogs' }
	2663	.Ve
	2664	.PP
	2665	.Vb 1
	2666	\& pair: 'two' { $::species } animal[$item[2]](s)
	2667	.Ve
	2668	.PP
	2669	.Vb 1
	2670	\& animal: /$arg[0]/ { $::species = 'cats' }
	2671	.Ve
	2672	.PP
	2673	Arguments can also be passed to the start rule, simply by appending them
	2674	to the argument list with which the start rule is called (\fIafter\fR the
	2675	\&\(L"line number\(R" parameter). For example, given:
	2676	.PP
	2677	.Vb 1
	2678	\& $parser = new Parse::RecDescent ( $grammar );
	2679	.Ve
	2680	.PP
	2681	.Vb 1
	2682	\& $parser->data($text, 1, "str", 2, \e@arr);
	2683	.Ve
	2684	.PP
	2685	.Vb 5
	2686	\& # ^^^^^ ^ ^^^^^^^^^^^^^^^
	2687	\& # \| \| \|
	2688	\& # TEXT TO BE PARSED \| \|
	2689	\& # STARTING LINE NUMBER \|
	2690	\& # ELEMENTS OF @arg WHICH IS PASSED TO RULE data
	2691	.Ve
	2692	.PP
	2693	then within the productions of the rule \f(CW\(C`data\(C'\fR, the array \f(CW@arg\fR will contain
	2694	\&\f(CW\(C`("str", 2, \e@arr)\(C'\fR.
	2695	.Sh "Alternations"
	2696	.IX Subsection "Alternations"
	2697	Alternations are implicit (unnamed) rules defined as part of a production. An
	2698	alternation is defined as a series of '\|'\-separated productions inside a
	2699	pair of round brackets. For example:
	2700	.PP
	2701	.Vb 1
	2702	\& character: 'the' ( good \| bad \| ugly ) /dude/
	2703	.Ve
	2704	.PP
	2705	Every alternation implicitly defines a new subrule, whose
	2706	automatically-generated name indicates its origin:
	2707	\&\(L"_alternation_<I>_of_production_<P>_of_rule<R>\(R" for the appropriate
	2708	values of <I>, <P>, and <R>. A call to this implicit subrule is then
	2709	inserted in place of the brackets. Hence the above example is merely a
	2710	convenient short-hand for:
	2711	.PP
	2712	.Vb 3
	2713	\& character: 'the'
	2714	\& _alternation_1_of_production_1_of_rule_character
	2715	\& /dude/
	2716	.Ve
	2717	.PP
	2718	.Vb 2
	2719	\& _alternation_1_of_production_1_of_rule_character:
	2720	\& good \| bad \| ugly
	2721	.Ve
	2722	.PP
	2723	Since alternations are parsed by recursively calling the parser generator,
	2724	any type(s) of item can appear in an alternation. For example:
	2725	.PP
	2726	.Vb 5
	2727	\& character: 'the' ( 'high' "plains" # Silent, with poncho
	2728	\& \| /no[- ]name/ # Silent, no poncho
	2729	\& \| vengeance_seeking # Poncho-optional
	2730	\& \| <error>
	2731	\& ) drifter
	2732	.Ve
	2733	.PP
	2734	In this case, if an error occurred, the automatically generated
	2735	message would be:
	2736	.PP
	2737	.Vb 3
	2738	\& ERROR (line <N>): Invalid implicit subrule: Expected
	2739	\& 'high' or /no[- ]name/ or generic,
	2740	\& but found "pacifist" instead
	2741	.Ve
	2742	.PP
	2743	Since every alternation actually has a name, it's even possible
	2744	to extend or replace them:
	2745	.PP
	2746	.Vb 4
	2747	\& parser->Replace(
	2748	\& "_alternation_1_of_production_1_of_rule_character:
	2749	\& 'generic Eastwood'"
	2750	\& );
	2751	.Ve
	2752	.PP
	2753	More importantly, since alternations are a form of subrule, they can be given
	2754	repetition specifiers:
	2755	.PP
	2756	.Vb 1
	2757	\& character: 'the' ( good \| bad \| ugly )(?) /dude/
	2758	.Ve
	2759	.Sh "Incremental Parsing"
	2760	.IX Subsection "Incremental Parsing"
	2761	\&\f(CW\(C`Parse::RecDescent\(C'\fR provides two methods \- \f(CW\(C`Extend\(C'\fR and \f(CW\(C`Replace\(C'\fR \- which
	2762	can be used to alter the grammar matched by a parser. Both methods
	2763	take the same argument as \f(CW\(C`Parse::RecDescent::new\(C'\fR, namely a
	2764	grammar specification string
	2765	.PP
	2766	\&\f(CW\(C`Parse::RecDescent::Extend\(C'\fR interprets the grammar specification and adds any
	2767	productions it finds to the end of the rules for which they are specified. For
	2768	example:
	2769	.PP
	2770	.Vb 2
	2771	\& $add = "name: 'Jimmy-Bob' \| 'Bobby-Jim'\endesc: colour /necks?/";
	2772	\& parser->Extend($add);
	2773	.Ve
	2774	.PP
	2775	adds two productions to the rule \(L"name\(R" (creating it if necessary) and one
	2776	production to the rule \(L"desc\(R".
	2777	.PP
	2778	\&\f(CW\(C`Parse::RecDescent::Replace\(C'\fR is identical, except that it first resets are
	2779	rule specified in the additional grammar, removing any existing productions.
	2780	Hence after:
	2781	.PP
	2782	.Vb 2
	2783	\& $add = "name: 'Jimmy-Bob' \| 'Bobby-Jim'\endesc: colour /necks?/";
	2784	\& parser->Replace($add);
	2785	.Ve
	2786	.PP
	2787	are are \fIonly\fR valid \(L"name\(R"s and the one possible description.
	2788	.PP
	2789	A more interesting use of the \f(CW\(C`Extend\(C'\fR and \f(CW\(C`Replace\(C'\fR methods is to call them
	2790	inside the action of an executing parser. For example:
	2791	.PP
	2792	.Vb 3
	2793	\& typedef: 'typedef' type_name identifier ';'
	2794	\& { $thisparser->Extend("type_name: '$item[3]'") }
	2795	\& \| <error>
	2796	.Ve
	2797	.PP
	2798	.Vb 1
	2799	\& identifier: ...!type_name /[A-Za-z_]w*/
	2800	.Ve
	2801	.PP
	2802	which automatically prevents type names from being typedef'd, or:
	2803	.PP
	2804	.Vb 6
	2805	\& command: 'map' key_name 'to' abort_key
	2806	\& { $thisparser->Replace("abort_key: '$item[2]'") }
	2807	\& \| 'map' key_name 'to' key_name
	2808	\& { map_key($item[2],$item[4]) }
	2809	\& \| abort_key
	2810	\& { exit if confirm("abort?") }
	2811	.Ve
	2812	.PP
	2813	.Vb 1
	2814	\& abort_key: 'q'
	2815	.Ve
	2816	.PP
	2817	.Vb 1
	2818	\& key_name: ...!abort_key /[A-Za-z]/
	2819	.Ve
	2820	.PP
	2821	which allows the user to change the abort key binding, but not to unbind it.
	2822	.PP
	2823	The careful use of such constructs makes it possible to reconfigure a
	2824	a running parser, eliminating the need for semantic feedback by
	2825	providing syntactic feedback instead. However, as currently implemented,
	2826	\&\f(CW\(C`Replace()\(C'\fR and \f(CW\(C`Extend()\(C'\fR have to regenerate and re\-\f(CW\(C`eval\(C'\fR the
	2827	entire parser whenever they are called. This makes them quite slow for
	2828	large grammars.
	2829	.PP
	2830	In such cases, the judicious use of an interpolated regex is likely to
	2831	be far more efficient:
	2832	.PP
	2833	.Vb 3
	2834	\& typedef: 'typedef' type_name/ identifier ';'
	2835	\& { $thisparser->{local}{type_name} .= "\|$item[3]" }
	2836	\& \| <error>
	2837	.Ve
	2838	.PP
	2839	.Vb 1
	2840	\& identifier: ...!type_name /[A-Za-z_]w*/
	2841	.Ve
	2842	.PP
	2843	.Vb 1
	2844	\& type_name: /$thisparser->{local}{type_name}/
	2845	.Ve
	2846	.Sh "Precompiling parsers"
	2847	.IX Subsection "Precompiling parsers"
	2848	Normally Parse::RecDescent builds a parser from a grammar at run\-time.
	2849	That approach simplifies the design and implementation of parsing code,
	2850	but has the disadvantage that it slows the parsing process down \- you
	2851	have to wait for Parse::RecDescent to build the parser every time the
	2852	program runs. Long or complex grammars can be particularly slow to
	2853	build, leading to unacceptable delays at start\-up.
	2854	.PP
	2855	To overcome this, the module provides a way of \(L"pre\-building\(R" a parser
	2856	object and saving it in a separate module. That module can then be used
	2857	to create clones of the original parser.
	2858	.PP
	2859	A grammar may be precompiled using the \f(CW\(C`Precompile\(C'\fR class method.
	2860	For example, to precompile a grammar stored in the scalar \f(CW$grammar\fR,
	2861	and produce a class named PreGrammar in a module file named PreGrammar.pm,
	2862	you could use:
	2863	.PP
	2864	.Vb 1
	2865	\& use Parse::RecDescent;
	2866	.Ve
	2867	.PP
	2868	.Vb 1
	2869	\& Parse::RecDescent->Precompile($grammar, "PreGrammar");
	2870	.Ve
	2871	.PP
	2872	The first argument is the grammar string, the second is the name of the class
	2873	to be built. The name of the module file is generated automatically by
	2874	appending \(L".pm\(R" to the last element of the class name. Thus
	2875	.PP
	2876	.Vb 1
	2877	\& Parse::RecDescent->Precompile($grammar, "My::New::Parser");
	2878	.Ve
	2879	.PP
	2880	would produce a module file named Parser.pm.
	2881	.PP
	2882	It is somewhat tedious to have to write a small Perl program just to
	2883	generate a precompiled grammar class, so Parse::RecDescent has some special
	2884	magic that allows you to do the job directly from the command\-line.
	2885	.PP
	2886	If your grammar is specified in a file named \fIgrammar\fR, you can generate
	2887	a class named Yet::Another::Grammar like so:
	2888	.PP
	2889	.Vb 1
	2890	\& > perl -MParse::RecDescent - grammar Yet::Another::Grammar
	2891	.Ve
	2892	.PP
	2893	This would produce a file named \fIGrammar.pm\fR containing the full
	2894	definition of a class called Yet::Another::Grammar. Of course, to use
	2895	that class, you would need to put the \fIGrammar.pm\fR file in a
	2896	directory named \fIYet/Another\fR, somewhere in your Perl include path.
	2897	.PP
	2898	Having created the new class, it's very easy to use it to build
	2899	a parser. You simply \f(CW\(C`use\(C'\fR the new module, and then call its
	2900	\&\f(CW\(C`new\(C'\fR method to create a parser object. For example:
	2901	.PP
	2902	.Vb 2
	2903	\& use Yet::Another::Grammar;
	2904	\& my $parser = Yet::Another::Grammar->new();
	2905	.Ve
	2906	.PP
	2907	The effect of these two lines is exactly the same as:
	2908	.PP
	2909	.Vb 1
	2910	\& use Parse::RecDescent;
	2911	.Ve
	2912	.PP
	2913	.Vb 3
	2914	\& open GRAMMAR_FILE, "grammar" or die;
	2915	\& local $/;
	2916	\& my $grammar = <GRAMMAR_FILE>;
	2917	.Ve
	2918	.PP
	2919	.Vb 1
	2920	\& my $parser = Parse::RecDescent->new($grammar);
	2921	.Ve
	2922	.PP
	2923	only considerably faster.
	2924	.PP
	2925	Note however that the parsers produced by either approach are exactly
	2926	the same, so whilst precompilation has an effect on \fIset-up\fR speed,
	2927	it has no effect on \fIparsing\fR speed. RecDescent 2.0 will address that
	2928	problem.
	2929	.ie n .Sh "A Metagrammar for ""Parse::RecDescent"""
	2930	.el .Sh "A Metagrammar for \f(CWParse::RecDescent\fP"
	2931	.IX Subsection "A Metagrammar for Parse::RecDescent"
	2932	The following is a specification of grammar format accepted by
	2933	\&\f(CW\(C`Parse::RecDescent::new\(C'\fR (specified in the \f(CW\(C`Parse::RecDescent\(C'\fR grammar format!):
	2934	.PP
	2935	.Vb 1
	2936	\& grammar : components(s)
	2937	.Ve
	2938	.PP
	2939	.Vb 1
	2940	\& component : rule \| comment
	2941	.Ve
	2942	.PP
	2943	.Vb 1
	2944	\& rule : "\en" identifier ":" production(s?)
	2945	.Ve
	2946	.PP
	2947	.Vb 1
	2948	\& production : items(s)
	2949	.Ve
	2950	.PP
	2951	.Vb 3
	2952	\& item : lookahead(?) simpleitem
	2953	\& \| directive
	2954	\& \| comment
	2955	.Ve
	2956	.PP
	2957	.Vb 1
	2958	\& lookahead : '...' \| '...!' # +'ve or -'ve lookahead
	2959	.Ve
	2960	.PP
	2961	.Vb 5
	2962	\& simpleitem : subrule args(?) # match another rule
	2963	\& \| repetition # match repeated subrules
	2964	\& \| terminal # match the next input
	2965	\& \| bracket args(?) # match alternative items
	2966	\& \| action # do something
	2967	.Ve
	2968	.PP
	2969	.Vb 1
	2970	\& subrule : identifier # the name of the rule
	2971	.Ve
	2972	.PP
	2973	.Vb 1
	2974	\& args : {extract_codeblock($text,'[]')} # just like a [...] array ref
	2975	.Ve
	2976	.PP
	2977	.Vb 1
	2978	\& repetition : subrule args(?) howoften
	2979	.Ve
	2980	.PP
	2981	.Vb 6
	2982	\& howoften : '(?)' # 0 or 1 times
	2983	\& \| '(s?)' # 0 or more times
	2984	\& \| '(s)' # 1 or more times
	2985	\& \| /(\ed+)[.][.](/\ed+)/ # $1 to $2 times
	2986	\& \| /[.][.](/\ed*)/ # at most $1 times
	2987	\& \| /(\ed*)[.][.])/ # at least $1 times
	2988	.Ve
	2989	.PP
	2990	.Vb 3
	2991	\& terminal : /[/]([\e][/]\|[^/])*[/]/ # interpolated pattern
	2992	\& \| /"([\e]"\|[^"])*"/ # interpolated literal
	2993	\& \| /'([\e]'\|[^'])*'/ # uninterpolated literal
	2994	.Ve
	2995	.PP
	2996	.Vb 1
	2997	\& action : { extract_codeblock($text) } # embedded Perl code
	2998	.Ve
	2999	.PP
	3000	.Vb 1
	3001	\& bracket : '(' Item(s) production(s?) ')' # alternative subrules
	3002	.Ve
	3003	.PP
	3004	.Vb 12
	3005	\& directive : '<commit>' # commit to production
	3006	\& \| '<uncommit>' # cancel commitment
	3007	\& \| '<resync>' # skip to newline
	3008	\& \| '<resync:' pattern '>' # skip <pattern>
	3009	\& \| '<reject>' # fail this production
	3010	\& \| '<reject:' condition '>' # fail if <condition>
	3011	\& \| '<error>' # report an error
	3012	\& \| '<error:' string '>' # report error as "<string>"
	3013	\& \| '<error?>' # error only if committed
	3014	\& \| '<error?:' string '>' # " " " "
	3015	\& \| '<rulevar:' /[^>]+/ '>' # define rule-local variable
	3016	\& \| '<matchrule:' string '>' # invoke rule named in string
	3017	.Ve
	3018	.PP
	3019	.Vb 1
	3020	\& identifier : /[a-z]\ew*/i # must start with alpha
	3021	.Ve
	3022	.PP
	3023	.Vb 1
	3024	\& comment : /#[^\en]*/ # same as Perl
	3025	.Ve
	3026	.PP
	3027	.Vb 1
	3028	\& pattern : {extract_bracketed($text,'<')} # allow embedded "<..>"
	3029	.Ve
	3030	.PP
	3031	.Vb 1
	3032	\& condition : {extract_codeblock($text,'{<')} # full Perl expression
	3033	.Ve
	3034	.PP
	3035	.Vb 3
	3036	\& string : {extract_variable($text)} # any Perl variable
	3037	\& \| {extract_quotelike($text)} # or quotelike string
	3038	\& \| {extract_bracketed($text,'<')} # or balanced brackets
	3039	.Ve
	3040	.SH "GOTCHAS"
	3041	.IX Header "GOTCHAS"
	3042	This section describes common mistakes that grammar writers seem to
	3043	make on a regular basis.
	3044	.Sh "1. Expecting an error to always invalidate a parse"
	3045	.IX Subsection "1. Expecting an error to always invalidate a parse"
	3046	A common mistake when using error messages is to write the grammar like this:
	3047	.PP
	3048	.Vb 1
	3049	\& file: line(s)
	3050	.Ve
	3051	.PP
	3052	.Vb 4
	3053	\& line: line_type_1
	3054	\& \| line_type_2
	3055	\& \| line_type_3
	3056	\& \| <error>
	3057	.Ve
	3058	.PP
	3059	The expectation seems to be that any line that is not of type 1, 2 or 3 will
	3060	invoke the \f(CW\(C`<error>\(C'\fR directive and thereby cause the parse to fail.
	3061	.PP
	3062	Unfortunately, that only happens if the error occurs in the very first line.
	3063	The first rule states that a \f(CW\(C`file\(C'\fR is matched by one or more lines, so if
	3064	even a single line succeeds, the first rule is completely satisfied and the
	3065	parse as a whole succeeds. That means that any error messages generated by
	3066	subsequent failures in the \f(CW\(C`line\(C'\fR rule are quietly ignored.
	3067	.PP
	3068	Typically what's really needed is this:
	3069	.PP
	3070	.Vb 1
	3071	\& file: line(s) eofile { $return = $item[1] }
	3072	.Ve
	3073	.PP
	3074	.Vb 4
	3075	\& line: line_type_1
	3076	\& \| line_type_2
	3077	\& \| line_type_3
	3078	\& \| <error>
	3079	.Ve
	3080	.PP
	3081	.Vb 1
	3082	\& eofile: /^\eZ/
	3083	.Ve
	3084	.PP
	3085	The addition of the \f(CW\(C`eofile\(C'\fR subrule to the first production means that
	3086	a file only matches a series of successful \f(CW\(C`line\(C'\fR matches \fIthat consume the
	3087	complete input text\fR. If any input text remains after the lines are matched,
	3088	there must have been an error in the last \f(CW\(C`line\(C'\fR. In that case the \f(CW\(C`eofile\(C'\fR
	3089	rule will fail, causing the entire \f(CW\(C`file\(C'\fR rule to fail too.
	3090	.PP
	3091	Note too that \f(CW\(C`eofile\(C'\fR must match \f(CW\(C`/^\eZ/\(C'\fR (end\-of\-text), \fInot\fR
	3092	\&\f(CW\(C`/^\ecZ/\(C'\fR or \f(CW\(C`/^\ecD/\(C'\fR (end\-of\-file).
	3093	.PP
	3094	And don't forget the action at the end of the production. If you just
	3095	write:
	3096	.PP
	3097	.Vb 1
	3098	\& file: line(s) eofile
	3099	.Ve
	3100	.PP
	3101	then the value returned by the \f(CW\(C`file\(C'\fR rule will be the value of its
	3102	last item: \f(CW\(C`eofile\(C'\fR. Since \f(CW\(C`eofile\(C'\fR always returns an empty string
	3103	on success, that will cause the \f(CW\(C`file\(C'\fR rule to return that empty
	3104	string. Apart from returning the wrong value, returning an empty string
	3105	will trip up code such as:
	3106	.PP
	3107	.Vb 1
	3108	\& $parser->file($filetext) \|\| die;
	3109	.Ve
	3110	.PP
	3111	(since "" is false).
	3112	.PP
	3113	Remember that Parse::RecDescent returns undef on failure,
	3114	so the only safe test for failure is:
	3115	.PP
	3116	.Vb 1
	3117	\& defined($parser->file($filetext)) \|\| die;
	3118	.Ve
	3119	.SH "DIAGNOSTICS"
	3120	.IX Header "DIAGNOSTICS"
	3121	Diagnostics are intended to be self-explanatory (particularly if you
	3122	use \fB\-RD_HINT\fR (under \fBperl \-s\fR) or define \f(CW$::RD_HINT\fR inside the program).
	3123	.PP
	3124	\&\f(CW\(C`Parse::RecDescent\(C'\fR currently diagnoses the following:
	3125	.IP "\(bu" 4
	3126	Invalid regular expressions used as pattern terminals (fatal error).
	3127	.IP "\(bu" 4
	3128	Invalid Perl code in code blocks (fatal error).
	3129	.IP "\(bu" 4
	3130	Lookahead used in the wrong place or in a nonsensical way (fatal error).
	3131	.IP "\(bu" 4
	3132	\&\(L"Obvious\(R" cases of left-recursion (fatal error).
	3133	.IP "\(bu" 4
	3134	Missing or extra components in a \f(CW\(C`<leftop>\(C'\fR or \f(CW\(C`<rightop>\(C'\fR
	3135	directive.
	3136	.IP "\(bu" 4
	3137	Unrecognisable components in the grammar specification (fatal error).
	3138	.IP "\(bu" 4
	3139	\&\(L"Orphaned\(R" rule components specified before the first rule (fatal error)
	3140	or after an \f(CW\(C`<error>\(C'\fR directive (level 3 warning).
	3141	.IP "\(bu" 4
	3142	Missing rule definitions (this only generates a level 3 warning, since you
	3143	may be providing them later via \f(CW\(C`Parse::RecDescent::Extend()\(C'\fR).
	3144	.IP "\(bu" 4
	3145	Instances where greedy repetition behaviour will almost certainly
	3146	cause the failure of a production (a level 3 warning \- see
	3147	\&\(L"\s-1ON\-GOING\s0 \s-1ISSUES\s0 \s-1AND\s0 \s-1FUTURE\s0 \s-1DIRECTIONS\s0\(R" below).
	3148	.IP "\(bu" 4
	3149	Attempts to define rules named 'Replace' or 'Extend', which cannot be
	3150	called directly through the parser object because of the predefined
	3151	meaning of \f(CW\(C`Parse::RecDescent::Replace\(C'\fR and
	3152	\&\f(CW\(C`Parse::RecDescent::Extend\(C'\fR. (Only a level 2 warning is generated, since
	3153	such rules \fIcan\fR still be used as subrules).
	3154	.IP "\(bu" 4
	3155	Productions which consist of a single \f(CW\(C`<error?>\(C'\fR
	3156	directive, and which therefore may succeed unexpectedly
	3157	(a level 2 warning, since this might conceivably be the desired effect).
	3158	.IP "\(bu" 4
	3159	Multiple consecutive lookahead specifiers (a level 1 warning only, since their
	3160	effects simply accumulate).
	3161	.IP "\(bu" 4
	3162	Productions which start with a \f(CW\(C`<reject>\(C'\fR or \f(CW\(C`<rulevar:...>\(C'\fR
	3163	directive. Such productions are optimized away (a level 1 warning).
	3164	.IP "\(bu" 4
	3165	Rules which are autogenerated under \f(CW$::AUTOSTUB\fR (a level 1 warning).
	3166	.SH "AUTHOR"
	3167	.IX Header "AUTHOR"
	3168	Damian Conway (damian@conway.org)
	3169	.SH "BUGS AND IRRITATIONS"
	3170	.IX Header "BUGS AND IRRITATIONS"
	3171	There are undoubtedly serious bugs lurking somewhere in this much code :\-)
	3172	Bug reports and other feedback are most welcome.
	3173	.PP
	3174	Ongoing annoyances include:
	3175	.IP "\(bu" 4
	3176	There's no support for parsing directly from an input stream.
	3177	If and when the Perl Gods give us regular expressions on streams,
	3178	this should be trivial (ahem!) to implement.
	3179	.IP "\(bu" 4
	3180	The parser generator can get confused if actions aren't properly
	3181	closed or if they contain particularly nasty Perl syntax errors
	3182	(especially unmatched curly brackets).
	3183	.IP "\(bu" 4
	3184	The generator only detects the most obvious form of left recursion
	3185	(potential recursion on the first subrule in a rule). More subtle
	3186	forms of left recursion (for example, through the second item in a
	3187	rule after a \(L"zero\(R" match of a preceding \(L"zero\-or\-more\(R" repetition,
	3188	or after a match of a subrule with an empty production) are not found.
	3189	.IP "\(bu" 4
	3190	Instead of complaining about left\-recursion, the generator should
	3191	silently transform the grammar to remove it. Don't expect this
	3192	feature any time soon as it would require a more sophisticated
	3193	approach to parser generation than is currently used.
	3194	.IP "\(bu" 4
	3195	The generated parsers don't always run as fast as might be wished.
	3196	.IP "\(bu" 4
	3197	The meta-parser should be bootstrapped using \f(CW\(C`Parse::RecDescent\(C'\fR :\-)
	3198	.SH "ON-GOING ISSUES AND FUTURE DIRECTIONS"
	3199	.IX Header "ON-GOING ISSUES AND FUTURE DIRECTIONS"
	3200	.IP "1." 4
	3201	Repetitions are \(L"incorrigibly greedy\(R" in that they will eat everything they can
	3202	and won't backtrack if that behaviour causes a production to fail needlessly.
	3203	So, for example:
	3204	.Sp
	3205	.Vb 1
	3206	\& rule: subrule(s) subrule
	3207	.Ve
	3208	.Sp
	3209	will \fInever\fR succeed, because the repetition will eat all the
	3210	subrules it finds, leaving none to match the second item. Such
	3211	constructions are relatively rare (and \f(CW\(C`Parse::RecDescent::new\(C'\fR generates a
	3212	warning whenever they occur) so this may not be a problem, especially
	3213	since the insatiable behaviour can be overcome \(L"manually\(R" by writing:
	3214	.Sp
	3215	.Vb 1
	3216	\& rule: penultimate_subrule(s) subrule
	3217	.Ve
	3218	.Sp
	3219	.Vb 1
	3220	\& penultimate_subrule: subrule ...subrule
	3221	.Ve
	3222	.Sp
	3223	The issue is that this construction is exactly twice as expensive as the
	3224	original, whereas backtracking would add only 1/\fIN\fR to the cost (for
	3225	matching \fIN\fR repetitions of \f(CW\(C`subrule\(C'\fR). I would welcome feedback on
	3226	the need for backtracking; particularly on cases where the lack of it
	3227	makes parsing performance problematical.
	3228	.IP "2." 4
	3229	Having opened that can of worms, it's also necessary to consider whether there
	3230	is a need for non-greedy repetition specifiers. Again, it's possible (at some
	3231	cost) to manually provide the required functionality:
	3232	.Sp
	3233	.Vb 1
	3234	\& rule: nongreedy_subrule(s) othersubrule
	3235	.Ve
	3236	.Sp
	3237	.Vb 1
	3238	\& nongreedy_subrule: subrule ...!othersubrule
	3239	.Ve
	3240	.Sp
	3241	Overall, the issue is whether the benefit of this extra functionality
	3242	outweighs the drawbacks of further complicating the (currently
	3243	minimalist) grammar specification syntax, and (worse) introducing more overhead
	3244	into the generated parsers.
	3245	.IP "3." 4
	3246	An \f(CW\(C`<autocommit>\(C'\fR directive would be nice. That is, it would be useful to be
	3247	able to say:
	3248	.Sp
	3249	.Vb 7
	3250	\& command: <autocommit>
	3251	\& command: 'find' name
	3252	\& \| 'find' address
	3253	\& \| 'do' command 'at' time 'if' condition
	3254	\& \| 'do' command 'at' time
	3255	\& \| 'do' command
	3256	\& \| unusual_command
	3257	.Ve
	3258	.Sp
	3259	and have the generator work out that this should be \(L"pruned\(R" thus:
	3260	.Sp
	3261	.Vb 9
	3262	\& command: 'find' name
	3263	\& \| 'find' <commit> address
	3264	\& \| 'do' <commit> command <uncommit>
	3265	\& 'at' time
	3266	\& 'if' <commit> condition
	3267	\& \| 'do' <commit> command <uncommit>
	3268	\& 'at' <commit> time
	3269	\& \| 'do' <commit> command
	3270	\& \| unusual_command
	3271	.Ve
	3272	.Sp
	3273	There are several issues here. Firstly, should the
	3274	\&\f(CW\(C`<autocommit>\(C'\fR automatically install an \f(CW\(C`<uncommit>\(C'\fR
	3275	at the start of the last production (on the grounds that the \(L"command\(R"
	3276	rule doesn't know whether an \(L"unusual_command\(R" might start with \(L"find\(R"
	3277	or \(L"do\(R") or should the \(L"unusual_command\(R" subgraph be analysed (to see
	3278	if it \fImight\fR be viable after a \(L"find\(R" or \(L"do\(R")?
	3279	.Sp
	3280	The second issue is how regular expressions should be treated. The simplest
	3281	approach would be simply to uncommit before them (on the grounds that they
	3282	\&\fImight\fR match). Better efficiency would be obtained by analyzing all preceding
	3283	literal tokens to determine whether the pattern would match them.
	3284	.Sp
	3285	Overall, the issues are: can such automated \(L"pruning\(R" approach a hand-tuned
	3286	version sufficiently closely to warrant the extra set-up expense, and (more
	3287	importantly) is the problem important enough to even warrant the non-trivial
	3288	effort of building an automated solution?
	3289	.SH "COPYRIGHT"
	3290	.IX Header "COPYRIGHT"
	3291	Copyright (c) 1997\-2000, Damian Conway. All Rights Reserved.
	3292	This module is free software. It may be used, redistributed
	3293	and/or modified under the terms of the Perl Artistic License
	3294	(see http://www.perl.com/perl/misc/Artistic.html)