git.subgeniuskitty.com - OpenSPARC-T2-SAM/.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!/import/archperf/ws/devtools/4/amd64/bin/perl
	2	eval 'exec /import/archperf/ws/devtools/4/amd64/bin/perl -S $0 ${1+"$@"}'
	3	if $running_under_some_shell;
	4	my $startperl;
	5	my $perlpath;
	6	($startperl = <<'/../') =~ s/\s*\z//;
	7	#!/import/archperf/ws/devtools/4/amd64/bin/perl
	8	/../
	9	($perlpath = <<'/../') =~ s/\s*\z//;
	10	/import/archperf/ws/devtools/4/amd64/bin/perl
	11	/../
	12
	13	$0 =~ s/^.?(\w+)[\.\w]$/$1/;
	14
	15	# (p)sed - a stream editor
	16	# History: Aug 12 2000: Original version.
	17	# Mar 25 2002: Rearrange generated Perl program.
	18
	19	use strict;
	20	use integer;
	21	use Symbol;
	22
	23	=head1 NAME
	24
	25	psed - a stream editor
	26
	27	=head1 SYNOPSIS
	28
	29	psed [-an] script [file ...]
	30	psed [-an] [-e script] [-f script-file] [file ...]
	31
	32	s2p [-an] [-e script] [-f script-file]
	33
	34	=head1 DESCRIPTION
	35
	36	A stream editor reads the input stream consisting of the specified files
	37	(or standard input, if none are given), processes is line by line by
	38	applying a script consisting of edit commands, and writes resulting lines
	39	to standard output. The filename `C<->' may be used to read standard input.
	40
	41	The edit script is composed from arguments of B<-e> options and
	42	script-files, in the given order. A single script argument may be specified
	43	as the first parameter.
	44
	45	If this program is invoked with the name F<s2p>, it will act as a
	46	sed-to-Perl translator. See L<"sed Script Translation">.
	47
	48	B<sed> returns an exit code of 0 on success or >0 if an error occurred.
	49
	50	=head1 OPTIONS
	51
	52	=over 4
	53
	54	=item B<-a>
	55
	56	A file specified as argument to the B<w> edit command is by default
	57	opened before input processing starts. Using B<-a>, opening of such
	58	files is delayed until the first line is actually written to the file.
	59
	60	=item B<-e> I<script>
	61
	62	The editing commands defined by I<script> are appended to the script.
	63	Multiple commands must be separated by newlines.
	64
	65	=item B<-f> I<script-file>
	66
	67	Editing commands from the specified I<script-file> are read and appended
	68	to the script.
	69
	70	=item B<-n>
	71
	72	By default, a line is written to standard output after the editing script
	73	has been applied to it. The B<-n> option suppresses automatic printing.
	74
	75	=back
	76
	77	=head1 COMMANDS
	78
	79	B<sed> command syntax is defined as
	80
	81	Z<> Z<> Z<> Z<>[I<address>[B<,>I<address>]][B<!>]I<function>[I<argument>]
	82
	83	with whitespace being permitted before or after addresses, and between
	84	the function character and the argument. The I<address>es and the
	85	address inverter (C<!>) are used to restrict the application of a
	86	command to the selected line(s) of input.
	87
	88	Each command must be on a line of its own, except where noted in
	89	the synopses below.
	90
	91	The edit cycle performed on each input line consist of reading the line
	92	(without its trailing newline character) into the I<pattern space>,
	93	applying the applicable commands of the edit script, writing the final
	94	contents of the pattern space and a newline to the standard output.
	95	A I<hold space> is provided for saving the contents of the
	96	pattern space for later use.
	97
	98	=head2 Addresses
	99
	100	A sed address is either a line number or a pattern, which may be combined
	101	arbitrarily to construct ranges. Lines are numbered across all input files.
	102
	103	Any address may be followed by an exclamation mark (`C<!>'), selecting
	104	all lines not matching that address.
	105
	106	=over 4
	107
	108	=item I<number>
	109
	110	The line with the given number is selected.
	111
	112	=item B<$>
	113
	114	A dollar sign (C<$>) is the line number of the last line of the input stream.
	115
	116	=item B</>I<regular expression>B</>
	117
	118	A pattern address is a basic regular expression (see
	119	L<"Basic Regular Expressions">), between the delimiting character C</>.
	120	Any other character except C<\> or newline may be used to delimit a
	121	pattern address when the initial delimiter is prefixed with a
	122	backslash (`C<\>').
	123
	124	=back
	125
	126	If no address is given, the command selects every line.
	127
	128	If one address is given, it selects the line (or lines) matching the
	129	address.
	130
	131	Two addresses select a range that begins whenever the first address
	132	matches, and ends (including that line) when the second address matches.
	133	If the first (second) address is a matching pattern, the second
	134	address is not applied to the very same line to determine the end of
	135	the range. Likewise, if the second address is a matching pattern, the
	136	first address is not applied to the very same line to determine the
	137	begin of another range. If both addresses are line numbers,
	138	and the second line number is less than the first line number, then
	139	only the first line is selected.
	140
	141
	142	=head2 Functions
	143
	144	The maximum permitted number of addresses is indicated with each
	145	function synopsis below.
	146
	147	The argument I<text> consists of one or more lines following the command.
	148	Embedded newlines in I<text> must be preceded with a backslash. Other
	149	backslashes in I<text> are deleted and the following character is taken
	150	literally.
	151
	152	=over 4
	153
	154	=cut
	155
	156	my %ComTab;
	157	my %GenKey;
	158	#--------------------------------------------------------------------------
	159	$ComTab{'a'}=[ 1, 'txt', \&Emit, '{ push( @Q, <<'."'TheEnd' ) }\n" ]; #ok
	160
	161	=item [1addr]B<a\> I<text>
	162
	163	Write I<text> (which must start on the line following the command)
	164	to standard output immediately before reading the next line
	165	of input, either by executing the B<N> function or by beginning a new cycle.
	166
	167	=cut
	168
	169	#--------------------------------------------------------------------------
	170	$ComTab{'b'}=[ 2, 'str', \&Branch, '{ goto XXX; }' ]; #ok
	171
	172	=item [2addr]B<b> [I<label>]
	173
	174	Branch to the B<:> function with the specified I<label>. If no label
	175	is given, branch to the end of the script.
	176
	177	=cut
	178
	179	#--------------------------------------------------------------------------
	180	$ComTab{'c'}=[ 2, 'txt', \&Change, <<'-X-' ]; #ok
	181	{ print <<'TheEnd'; } $doPrint = 0; goto EOS;
	182	-X-
	183	### continue OK => next CYCLE;
	184
	185	=item [2addr]B<c\> I<text>
	186
	187	The line, or range of lines, selected by the address is deleted.
	188	The I<text> (which must start on the line following the command)
	189	is written to standard output. With an address range, this occurs at
	190	the end of the range.
	191
	192	=cut
	193
	194	#--------------------------------------------------------------------------
	195	$ComTab{'d'}=[ 2, '', \&Emit, <<'-X-' ]; #ok
	196	{ $doPrint = 0;
	197	goto EOS;
	198	}
	199	-X-
	200	### continue OK => next CYCLE;
	201
	202	=item [2addr]B<d>
	203
	204	Deletes the pattern space and starts the next cycle.
	205
	206	=cut
	207
	208	#--------------------------------------------------------------------------
	209	$ComTab{'D'}=[ 2, '', \&Emit, <<'-X-' ]; #ok
	210	{ s/^.*\n?//;
	211	if(length($_)){ goto BOS } else { goto EOS }
	212	}
	213	-X-
	214	### continue OK => next CYCLE;
	215
	216	=item [2addr]B<D>
	217
	218	Deletes the pattern space through the first embedded newline or to the end.
	219	If the pattern space becomes empty, a new cycle is started, otherwise
	220	execution of the script is restarted.
	221
	222	=cut
	223
	224	#--------------------------------------------------------------------------
	225	$ComTab{'g'}=[ 2, '', \&Emit, '{ $_ = $Hold };' ]; #ok
	226
	227	=item [2addr]B<g>
	228
	229	Replace the contents of the pattern space with the hold space.
	230
	231	=cut
	232
	233	#--------------------------------------------------------------------------
	234	$ComTab{'G'}=[ 2, '', \&Emit, '{ $_ .= "\n"; $_ .= $Hold };' ]; #ok
	235
	236	=item [2addr]B<G>
	237
	238	Append a newline and the contents of the hold space to the pattern space.
	239
	240	=cut
	241
	242	#--------------------------------------------------------------------------
	243	$ComTab{'h'}=[ 2, '', \&Emit, '{ $Hold = $_ }' ]; #ok
	244
	245	=item [2addr]B<h>
	246
	247	Replace the contents of the hold space with the pattern space.
	248
	249	=cut
	250
	251	#--------------------------------------------------------------------------
	252	$ComTab{'H'}=[ 2, '', \&Emit, '{ $Hold .= "\n"; $Hold .= $_; }' ]; #ok
	253
	254	=item [2addr]B<H>
	255
	256	Append a newline and the contents of the pattern space to the hold space.
	257
	258	=cut
	259
	260	#--------------------------------------------------------------------------
	261	$ComTab{'i'}=[ 1, 'txt', \&Emit, '{ print <<'."'TheEnd' }\n" ]; #ok
	262
	263	=item [1addr]B<i\> I<text>
	264
	265	Write the I<text> (which must start on the line following the command)
	266	to standard output.
	267
	268	=cut
	269
	270	#--------------------------------------------------------------------------
	271	$ComTab{'l'}=[ 2, '', \&Emit, '{ _l() }' ]; #okUTF8
	272
	273	=item [2addr]B<l>
	274
	275	Print the contents of the pattern space: non-printable characters are
	276	shown in C-style escaped form; long lines are split and have a trailing
	277	`C<\>' at the point of the split; the true end of a line is marked with
	278	a `C<$>'. Escapes are: `\a', `\t', `\n', `\f', `\r', `\e' for
	279	BEL, HT, LF, FF, CR, ESC, respectively, and `\' followed by a three-digit
	280	octal number for all other non-printable characters.
	281
	282	=cut
	283
	284	#--------------------------------------------------------------------------
	285	$ComTab{'n'}=[ 2, '', \&Emit, <<'-X-' ]; #ok
	286	{ print $_, "\n" if $doPrint;
	287	printQ() if @Q;
	288	$CondReg = 0;
	289	last CYCLE unless getsARGV();
	290	chomp();
	291	}
	292	-X-
	293
	294	=item [2addr]B<n>
	295
	296	If automatic printing is enabled, write the pattern space to the standard
	297	output. Replace the pattern space with the next line of input. If
	298	there is no more input, processing is terminated.
	299
	300	=cut
	301
	302	#--------------------------------------------------------------------------
	303	$ComTab{'N'}=[ 2, '', \&Emit, <<'-X-' ]; #ok
	304	{ printQ() if @Q;
	305	$CondReg = 0;
	306	last CYCLE unless getsARGV( $h );
	307	chomp( $h );
	308	$_ .= "\n$h";
	309	}
	310	-X-
	311
	312	=item [2addr]B<N>
	313
	314	Append a newline and the next line of input to the pattern space. If
	315	there is no more input, processing is terminated.
	316
	317	=cut
	318
	319	#--------------------------------------------------------------------------
	320	$ComTab{'p'}=[ 2, '', \&Emit, '{ print $_, "\n"; }' ]; #ok
	321
	322	=item [2addr]B<p>
	323
	324	Print the pattern space to the standard output. (Use the B<-n> option
	325	to suppress automatic printing at the end of a cycle if you want to
	326	avoid double printing of lines.)
	327
	328	=cut
	329
	330	#--------------------------------------------------------------------------
	331	$ComTab{'P'}=[ 2, '', \&Emit, <<'-X-' ]; #ok
	332	{ if( /^(.*)/ ){ print $1, "\n"; } }
	333	-X-
	334
	335	=item [2addr]B<P>
	336
	337	Prints the pattern space through the first embedded newline or to the end.
	338
	339	=cut
	340
	341	#--------------------------------------------------------------------------
	342	$ComTab{'q'}=[ 1, '', \&Emit, <<'-X-' ]; #ok
	343	{ print $_, "\n" if $doPrint;
	344	last CYCLE;
	345	}
	346	-X-
	347
	348	=item [1addr]B<q>
	349
	350	Branch to the end of the script and quit without starting a new cycle.
	351
	352	=cut
	353
	354	#--------------------------------------------------------------------------
	355	$ComTab{'r'}=[ 1, 'str', \&Emit, "{ _r( '-X-' ) }" ]; #ok
	356
	357	=item [1addr]B<r> I<file>
	358
	359	Copy the contents of the I<file> to standard output immediately before
	360	the next attempt to read a line of input. Any error encountered while
	361	reading I<file> is silently ignored.
	362
	363	=cut
	364
	365	#--------------------------------------------------------------------------
	366	$ComTab{'s'}=[ 2, 'sub', \&Emit, '' ]; #ok
	367
	368	=item [2addr]B<s/>I<regular expression>B</>I<replacement>B</>I<flags>
	369
	370	Substitute the I<replacement> string for the first substring in
	371	the pattern space that matches the I<regular expression>.
	372	Any character other than backslash or newline can be used instead of a
	373	slash to delimit the regular expression and the replacement.
	374	To use the delimiter as a literal character within the regular expression
	375	and the replacement, precede the character by a backslash (`C<\>').
	376
	377	Literal newlines may be embedded in the replacement string by
	378	preceding a newline with a backslash.
	379
	380	Within the replacement, an ampersand (`C<&>') is replaced by the string
	381	matching the regular expression. The strings `C<\1>' through `C<\9>' are
	382	replaced by the corresponding subpattern (see L<"Basic Regular Expressions">).
	383	To get a literal `C<&>' or `C<\>' in the replacement text, precede it
	384	by a backslash.
	385
	386	The following I<flags> modify the behaviour of the B<s> command:
	387
	388	=over 8
	389
	390	=item B<g>
	391
	392	The replacement is performed for all matching, non-overlapping substrings
	393	of the pattern space.
	394
	395	=item B<1>..B<9>
	396
	397	Replace only the n-th matching substring of the pattern space.
	398
	399	=item B<p>
	400
	401	If the substitution was made, print the new value of the pattern space.
	402
	403	=item B<w> I<file>
	404
	405	If the substitution was made, write the new value of the pattern space
	406	to the specified file.
	407
	408	=back
	409
	410	=cut
	411
	412	#--------------------------------------------------------------------------
	413	$ComTab{'t'}=[ 2, 'str', \&Branch, '{ goto XXX if _t() }' ]; #ok
	414
	415	=item [2addr]B<t> [I<label>]
	416
	417	Branch to the B<:> function with the specified I<label> if any B<s>
	418	substitutions have been made since the most recent reading of an input line
	419	or execution of a B<t> function. If no label is given, branch to the end of
	420	the script.
	421
	422
	423	=cut
	424
	425	#--------------------------------------------------------------------------
	426	$ComTab{'w'}=[ 2, 'str', \&Write, "{ _w( '-X-' ) }" ]; #ok
	427
	428	=item [2addr]B<w> I<file>
	429
	430	The contents of the pattern space are written to the I<file>.
	431
	432	=cut
	433
	434	#--------------------------------------------------------------------------
	435	$ComTab{'x'}=[ 2, '', \&Emit, '{ ($Hold, $_) = ($_, $Hold) }' ]; #ok
	436
	437	=item [2addr]B<x>
	438
	439	Swap the contents of the pattern space and the hold space.
	440
	441	=cut
	442
	443	#--------------------------------------------------------------------------
	444	$ComTab{'y'}=[ 2, 'tra', \&Emit, '' ]; #ok
	445	=item [2addr]B<y>B</>I<string1>B</>I<string2>B</>
	446
	447	In the pattern space, replace all characters occuring in I<string1> by the
	448	character at the corresponding position in I<string2>. It is possible
	449	to use any character (other than a backslash or newline) instead of a
	450	slash to delimit the strings. Within I<string1> and I<string2>, a
	451	backslash followed by any character other than a newline is that literal
	452	character, and a backslash followed by an `n' is replaced by a newline
	453	character.
	454
	455	=cut
	456
	457	#--------------------------------------------------------------------------
	458	$ComTab{'='}=[ 1, '', \&Emit, '{ print "$.\n" }' ]; #ok
	459
	460	=item [1addr]B<=>
	461
	462	Prints the current line number on the standard output.
	463
	464	=cut
	465
	466	#--------------------------------------------------------------------------
	467	$ComTab{':'}=[ 0, 'str', \&Label, '' ]; #ok
	468
	469	=item [0addr]B<:> [I<label>]
	470
	471	The command specifies the position of the I<label>. It has no other effect.
	472
	473	=cut
	474
	475	#--------------------------------------------------------------------------
	476	$ComTab{'{'}=[ 2, '', \&BeginBlock, '{' ]; #ok
	477	$ComTab{'}'}=[ 0, '', \&EndBlock, ';}' ]; #ok
	478	# ';' to avoid warning on empty {}-block
	479
	480	=item [2addr]B<{> [I<command>]
	481
	482	=item [0addr]B<}>
	483
	484	These two commands begin and end a command list. The first command may
	485	be given on the same line as the opening B<{> command. The commands
	486	within the list are jointly selected by the address(es) given on the
	487	B<{> command (but may still have individual addresses).
	488
	489	=cut
	490
	491	#--------------------------------------------------------------------------
	492	$ComTab{'#'}=[ 0, 'str', \&Comment, '' ]; #ok
	493
	494	=item [0addr]B<#> [I<comment>]
	495
	496	The entire line is ignored (treated as a comment). If, however, the first
	497	two characters in the script are `C<#n>', automatic printing of output is
	498	suppressed, as if the B<-n> option were given on the command line.
	499
	500	=back
	501
	502	=cut
	503
	504	use vars qw{ $isEOF $Hold %wFiles @Q $CondReg $doPrint };
	505
	506	my $useDEBUG = exists( $ENV{PSEDDEBUG} );
	507	my $useEXTBRE = $ENV{PSEDEXTBRE} \|\| '';
	508	$useEXTBRE =~ s/[^<>wWyB]//g; # gawk RE's handle these
	509
	510	my $doAutoPrint = 1; # automatic printing of pattern space (-n => 0)
	511	my $doOpenWrite = 1; # open w command output files at start (-a => 0)
	512	my $svOpenWrite = 0; # save $doOpenWrite
	513
	514	# lower case $0 below as a VMSism. The VMS build procedure creates the
	515	# s2p file traditionally in upper case on the disk. When VMS is in a
	516	# case preserved or case sensitive mode, $0 will be returned in the exact
	517	# case which will be on the disk, and that is not predictable at this time.
	518
	519	my $doGenerate = lc($0) eq 's2p';
	520
	521	# Collected and compiled script
	522	#
	523	my( @Commands, %Defined, @BlockStack, %Label, $labNum, $Code, $Func );
	524	$Code = '';
	525
	526	##################
	527	# Compile Time
	528	#
	529	# Labels
	530	#
	531	# Error handling
	532	#
	533	sub Warn($;$){
	534	my( $msg, $loc ) = @_;
	535	$loc \|\|= '';
	536	$loc .= ': ' if length( $loc );
	537	warn( "$0: $loc$msg\n" );
	538	}
	539
	540	$labNum = 0;
	541	sub newLabel(){
	542	return 'L_'.++$labNum;
	543	}
	544
	545	# safeHere: create safe here delimiter and modify opcode and argument
	546	#
	547	sub safeHere($$){
	548	my( $codref, $argref ) = @_;
	549	my $eod = 'EOD000';
	550	while( $$argref =~ /^$eod$/m ){
	551	$eod++;
	552	}
	553	$$codref =~ s/TheEnd/$eod/e;
	554	$$argref .= "$eod\n";
	555	}
	556
	557	# Emit: create address logic and emit command
	558	#
	559	sub Emit($$$$$$){
	560	my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
	561	my $cond = '';
	562	if( defined( $addr1 ) ){
	563	if( defined( $addr2 ) ){
	564	$addr1 .= $addr2 =~ /^\d+$/ ? "..$addr2" : "...$addr2";
	565	} else {
	566	$addr1 .= ' == $.' if $addr1 =~ /^\d+$/;
	567	}
	568	$cond = $negated ? "unless( $addr1 )\n" : "if( $addr1 )\n";
	569	}
	570
	571	if( $opcode eq '' ){
	572	$Code .= "$cond$arg\n";
	573
	574	} elsif( $opcode =~ s/-X-/$arg/e ){
	575	$Code .= "$cond$opcode\n";
	576
	577	} elsif( $opcode =~ /TheEnd/ ){
	578	safeHere( \$opcode, \$arg );
	579	$Code .= "$cond$opcode$arg";
	580
	581	} else {
	582	$Code .= "$cond$opcode\n";
	583	}
	584	0;
	585	}
	586
	587	# Write (w command, w flag): store pathname
	588	#
	589	sub Write($$$$$$){
	590	my( $addr1, $addr2, $negated, $opcode, $path, $fl ) = @_;
	591	$wFiles{$path} = '';
	592	Emit( $addr1, $addr2, $negated, $opcode, $path, $fl );
	593	}
	594
	595
	596	# Label (: command): label definition
	597	#
	598	sub Label($$$$$$){
	599	my( $addr1, $addr2, $negated, $opcode, $lab, $fl ) = @_;
	600	my $rc = 0;
	601	$lab =~ s/\s+//;
	602	if( length( $lab ) ){
	603	my $h;
	604	if( ! exists( $Label{$lab} ) ){
	605	$h = $Label{$lab}{name} = newLabel();
	606	} else {
	607	$h = $Label{$lab}{name};
	608	if( exists( $Label{$lab}{defined} ) ){
	609	my $dl = $Label{$lab}{defined};
	610	Warn( "duplicate label $lab (first defined at $dl)", $fl );
	611	$rc = 1;
	612	}
	613	}
	614	$Label{$lab}{defined} = $fl;
	615	$Code .= "$h:;\n";
	616	}
	617	$rc;
	618	}
	619
	620	# BeginBlock ({ command): push block start
	621	#
	622	sub BeginBlock($$$$$$){
	623	my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
	624	push( @BlockStack, [ $fl, $addr1, $addr2, $negated ] );
	625	Emit( $addr1, $addr2, $negated, $opcode, $arg, $fl );
	626	}
	627
	628	# EndBlock (} command): check proper nesting
	629	#
	630	sub EndBlock($$$$$$){
	631	my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
	632	my $rc;
	633	my $jcom = pop( @BlockStack );
	634	if( defined( $jcom ) ){
	635	$rc = Emit( $addr1, $addr2, $negated, $opcode, $arg, $fl );
	636	} else {
	637	Warn( "unexpected `}'", $fl );
	638	$rc = 1;
	639	}
	640	$rc;
	641	}
	642
	643	# Branch (t, b commands): check or create label, substitute default
	644	#
	645	sub Branch($$$$$$){
	646	my( $addr1, $addr2, $negated, $opcode, $lab, $fl ) = @_;
	647	$lab =~ s/\s+//; # no spaces at end
	648	my $h;
	649	if( length( $lab ) ){
	650	if( ! exists( $Label{$lab} ) ){
	651	$h = $Label{$lab}{name} = newLabel();
	652	} else {
	653	$h = $Label{$lab}{name};
	654	}
	655	push( @{$Label{$lab}{used}}, $fl );
	656	} else {
	657	$h = 'EOS';
	658	}
	659	$opcode =~ s/XXX/$h/e;
	660	Emit( $addr1, $addr2, $negated, $opcode, '', $fl );
	661	}
	662
	663	# Change (c command): is special due to range end watching
	664	#
	665	sub Change($$$$$$){
	666	my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
	667	my $kwd = $negated ? 'unless' : 'if';
	668	if( defined( $addr2 ) ){
	669	$addr1 .= $addr2 =~ /^\d+$/ ? "..$addr2" : "...$addr2";
	670	if( ! $negated ){
	671	$addr1 = '$icnt = ('.$addr1.')';
	672	$opcode = 'if( $icnt =~ /E0$/ )' . $opcode;
	673	}
	674	} else {
	675	$addr1 .= ' == $.' if $addr1 =~ /^\d+$/;
	676	}
	677	safeHere( \$opcode, \$arg );
	678	$Code .= "$kwd( $addr1 ){\n $opcode$arg}\n";
	679	0;
	680	}
	681
	682
	683	# Comment (# command): A no-op. Who would've thought that!
	684	#
	685	sub Comment($$$$$$){
	686	my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
	687	### $Code .= "# $arg\n";
	688	0;
	689	}
	690
	691
	692	sub stripRegex($$){
	693	my( $del, $sref ) = @_;
	694	my $regex = $del;
	695	print "stripRegex:$del:$$sref:\n" if $useDEBUG;
	696	while( $$sref =~ s{^(.?)(\\)\Q$del\E(\s*)}{}s ){
	697	my $sl = $2;
	698	$regex .= $1.$sl.$del;
	699	if( length( $sl ) % 2 == 0 ){
	700	return $regex;
	701	}
	702	$regex .= $3;
	703	}
	704	undef();
	705	}
	706
	707	# stripTrans: take a <del> terminated string from y command
	708	# honoring and cleaning up of \-escaped <del>'s
	709	#
	710	sub stripTrans($$){
	711	my( $del, $sref ) = @_;
	712	my $t = '';
	713	print "stripTrans:$del:$$sref:\n" if $useDEBUG;
	714	while( $$sref =~ s{^(.?)(\\)\Q$del\E}{}s ){
	715	my $sl = $2;
	716	$t .= $1;
	717	if( length( $sl ) % 2 == 0 ){
	718	$t .= $sl;
	719	$t =~ s/\\\\/\\/g;
	720	return $t;
	721	}
	722	chop( $sl );
	723	$t .= $sl.$del.$3;
	724	}
	725	undef();
	726	}
	727
	728	# makey - construct Perl y/// from sed y///
	729	#
	730	sub makey($$$){
	731	my( $fr, $to, $fl ) = @_;
	732	my $error = 0;
	733
	734	# Ensure that any '-' is up front.
	735	# Diagnose duplicate contradicting mappings
	736	my %tr;
	737	for( my $i = 0; $i < length($fr); $i++ ){
	738	my $fc = substr($fr,$i,1);
	739	my $tc = substr($to,$i,1);
	740	if( exists( $tr{$fc} ) && $tr{$fc} ne $tc ){
	741	Warn( "ambiguous translation for character `$fc' in `y' command",
	742	$fl );
	743	$error++;
	744	}
	745	$tr{$fc} = $tc;
	746	}
	747	$fr = $to = '';
	748	if( exists( $tr{'-'} ) ){
	749	( $fr, $to ) = ( '-', $tr{'-'} );
	750	delete( $tr{'-'} );
	751	} else {
	752	$fr = $to = '';
	753	}
	754	# might just as well sort it...
	755	for my $fc ( sort keys( %tr ) ){
	756	$fr .= $fc;
	757	$to .= $tr{$fc};
	758	}
	759	# make embedded delimiters and newlines safe
	760	$fr =~ s/([{}])/\$1/g;
	761	$to =~ s/([{}])/\$1/g;
	762	$fr =~ s/\n/\\n/g;
	763	$to =~ s/\n/\\n/g;
	764	return $error ? undef() : "{ y{$fr}{$to}; }";
	765	}
	766
	767	######
	768	# makes - construct Perl s/// from sed s///
	769	#
	770	sub makes($$$$$$$){
	771	my( $regex, $subst, $path, $global, $print, $nmatch, $fl ) = @_;
	772
	773	# make embedded newlines safe
	774	$regex =~ s/\n/\\n/g;
	775	$subst =~ s/\n/\\n/g;
	776
	777	my $code;
	778	# n-th occurrence
	779	#
	780	if( length( $nmatch ) ){
	781	$code = <<TheEnd;
	782	{ \$n = $nmatch;
	783	while( --\$n && ( \$s = m ${regex}g ) ){}
	784	\$s = ( substr( \$_, pos() ) =~ s ${regex}${subst}s ) if \$s;
	785	\$CondReg \|\|= \$s;
	786	TheEnd
	787	} else {
	788	$code = <<TheEnd;
	789	{ \$s = s ${regex}${subst}s${global};
	790	\$CondReg \|\|= \$s;
	791	TheEnd
	792	}
	793	if( $print ){
	794	$code .= ' print $_, "\n" if $s;'."\n";
	795	}
	796	if( defined( $path ) ){
	797	$wFiles{$path} = '';
	798	$code .= " _w( '$path' ) if \$s;\n";
	799	$GenKey{'w'} = 1;
	800	}
	801	$code .= "}";
	802	}
	803
	804	=head1 BASIC REGULAR EXPRESSIONS
	805
	806	A I<Basic Regular Expression> (BRE), as defined in POSIX 1003.2, consists
	807	of I<atoms>, for matching parts of a string, and I<bounds>, specifying
	808	repetitions of a preceding atom.
	809
	810	=head2 Atoms
	811
	812	The possible atoms of a BRE are: B<.>, matching any single character;
	813	B<^> and B<$>, matching the null string at the beginning or end
	814	of a string, respectively; a I<bracket expressions>, enclosed
	815	in B<[> and B<]> (see below); and any single character with no
	816	other significance (matching that character). A B<\> before one
	817	of: B<.>, B<^>, B<$>, B<[>, B<*>, B<\>, matching the character
	818	after the backslash. A sequence of atoms enclosed in B<$> and B<$>
	819	becomes an atom and establishes the target for a I<backreference>,
	820	consisting of the substring that actually matches the enclosed atoms.
	821	Finally, B<\> followed by one of the digits B<0> through B<9> is a
	822	backreference.
	823
	824	A B<^> that is not first, or a B<$> that is not last does not have
	825	a special significance and need not be preceded by a backslash to
	826	become literal. The same is true for a B<]>, that does not terminate
	827	a bracket expression.
	828
	829	An unescaped backslash cannot be last in a BRE.
	830
	831	=head2 Bounds
	832
	833	The BRE bounds are: B<*>, specifying 0 or more matches of the preceding
	834	atom; B<\{>I<count>B<\}>, specifying that many repetitions;
	835	B<\{>I<minimum>B<,\}>, giving a lower limit; and
	836	B<\{>I<minimum>B<,>I<maximum>B<\}> finally defines a lower and upper
	837	bound.
	838
	839	A bound appearing as the first item in a BRE is taken literally.
	840
	841	=head2 Bracket Expressions
	842
	843	A I<bracket expression> is a list of characters, character ranges
	844	and character classes enclosed in B<[> and B<]> and matches any
	845	single character from the represented set of characters.
	846
	847	A character range is written as two characters separated by B<-> and
	848	represents all characters (according to the character collating sequence)
	849	that are not less than the first and not greater than the second.
	850	(Ranges are very collating-sequence-dependent, and portable programs
	851	should avoid relying on them.)
	852
	853	A character class is one of the class names
	854
	855	alnum digit punct
	856	alpha graph space
	857	blank lower upper
	858	cntrl print xdigit
	859
	860	enclosed in B<[:> and B<:]> and represents the set of characters
	861	as defined in ctype(3).
	862
	863	If the first character after B<[> is B<^>, the sense of matching is
	864	inverted.
	865
	866	To include a literal `C<^>', place it anywhere else but first. To
	867	include a literal 'C<]>' place it first or immediately after an
	868	initial B<^>. To include a literal `C<->' make it the first (or
	869	second after B<^>) or last character, or the second endpoint of
	870	a range.
	871
	872	The special bracket expression constructs C<[[:E<lt>:]]> and C<[[:E<gt>:]]>
	873	match the null string at the beginning and end of a word respectively.
	874	(Note that neither is identical to Perl's `\b' atom.)
	875
	876	=head2 Additional Atoms
	877
	878	Since some sed implementations provide additional regular expression
	879	atoms (not defined in POSIX 1003.2), B<psed> is capable of translating
	880	the following backslash escapes:
	881
	882	=over 4
	883
	884	=item B<\E<lt>> This is the same as C<[[:E<gt>:]]>.
	885
	886	=item B<\E<gt>> This is the same as C<[[:E<lt>:]]>.
	887
	888	=item B<\w> This is an abbreviation for C<[[:alnum:]_]>.
	889
	890	=item B<\W> This is an abbreviation for C<[^[:alnum:]_]>.
	891
	892	=item B<\y> Match the empty string at a word boundary.
	893
	894	=item B<\B> Match the empty string between any two either word or non-word characters.
	895
	896	=back
	897
	898	To enable this feature, the environment variable PSEDEXTBRE must be set
	899	to a string containing the requested characters, e.g.:
	900	C<PSEDEXTBRE='E<lt>E<gt>wW'>.
	901
	902	=cut
	903
	904	#####
	905	# bre2p - convert BRE to Perl RE
	906	#
	907	sub peek(\$$){
	908	my( $pref, $ic ) = @_;
	909	$ic < length($$pref)-1 ? substr( $$pref, $ic+1, 1 ) : '';
	910	}
	911
	912	sub bre2p($$$){
	913	my( $del, $pat, $fl ) = @_;
	914	my $led = $del;
	915	$led =~ tr/{([</})]>/;
	916	$led = '' if $led eq $del;
	917
	918	$pat = substr( $pat, 1, length($pat) - 2 );
	919	my $res = '';
	920	my $bracklev = 0;
	921	my $backref = 0;
	922	my $parlev = 0;
	923	for( my $ic = 0; $ic < length( $pat ); $ic++ ){
	924	my $c = substr( $pat, $ic, 1 );
	925	if( $c eq '\\' ){
	926	### backslash escapes
	927	my $nc = peek($pat,$ic);
	928	if( $nc eq '' ){
	929	Warn( "`\\' cannot be last in pattern", $fl );
	930	return undef();
	931	}
	932	$ic++;
	933	if( $nc eq $del ){ ## \<pattern del> => \<pattern del>
	934	$res .= "\\$del";
	935
	936	} elsif( $nc =~ /([[.*\\n])/ ){
	937	## check for \-escaped magics and \n:
	938	## \[ \. \* \\ \n stay as they are
	939	$res .= '\\'.$nc;
	940
	941	} elsif( $nc eq '(' ){ ## \( => (
	942	$parlev++;
	943	$res .= '(';
	944
	945	} elsif( $nc eq ')' ){ ## \) => )
	946	$parlev--;
	947	$backref++;
	948	if( $parlev < 0 ){
	949	Warn( "unmatched `\\)'", $fl );
	950	return undef();
	951	}
	952	$res .= ')';
	953
	954	} elsif( $nc eq '{' ){ ## repetition factor \{<i>[,[<j>]]\}
	955	my $endpos = index( $pat, '\\}', $ic );
	956	if( $endpos < 0 ){
	957	Warn( "unmatched `\\{'", $fl );
	958	return undef();
	959	}
	960	my $rep = substr( $pat, $ic+1, $endpos-($ic+1) );
	961	$ic = $endpos + 1;
	962
	963	if( $res =~ /^\^?$/ ){
	964	$res .= "\\{$rep\}";
	965	} elsif( $rep =~ /^(\d+)(,?)(\d*)?$/ ){
	966	my $min = $1;
	967	my $com = $2 \|\| '';
	968	my $max = $3;
	969	if( length( $max ) ){
	970	if( $max < $min ){
	971	Warn( "maximum less than minimum in `\\{$rep\\}'",
	972	$fl );
	973	return undef();
	974	}
	975	} else {
	976	$max = '';
	977	}
	978	# simplify some
	979	if( $min == 0 && $max eq '1' ){
	980	$res .= '?';
	981	} elsif( $min == 1 && "$com$max" eq ',' ){
	982	$res .= '+';
	983	} elsif( $min == 0 && "$com$max" eq ',' ){
	984	$res .= '*';
	985	} else {
	986	$res .= "{$min$com$max}";
	987	}
	988	} else {
	989	Warn( "invalid repeat clause `\\{$rep\\}'", $fl );
	990	return undef();
	991	}
	992
	993	} elsif( $nc =~ /^[1-9]$/ ){
	994	## \1 .. \9 => \1 .. \9, but check for a following digit
	995	if( $nc > $backref ){
	996	Warn( "invalid backreference ($nc)", $fl );
	997	return undef();
	998	}
	999	$res .= "\\$nc";
	1000	if( peek($pat,$ic) =~ /[0-9]/ ){
	1001	$res .= '(?:)';
	1002	}
	1003
	1004	} elsif( $useEXTBRE && ( $nc =~ /[$useEXTBRE]/ ) ){
	1005	## extensions - at most <>wWyB - not in POSIX
	1006	if( $nc eq '<' ){ ## \< => \b(?=\w), be precise
	1007	$res .= '\\b(?<=\\W)';
	1008	} elsif( $nc eq '>' ){ ## \> => \b(?=\W), be precise
	1009	$res .= '\\b(?=\\W)';
	1010	} elsif( $nc eq 'y' ){ ## \y => \b
	1011	$res .= '\\b';
	1012	} else { ## \B, \w, \W remain the same
	1013	$res .= "\\$nc";
	1014	}
	1015	} elsif( $nc eq $led ){
	1016	## \<closing bracketing-delimiter> - keep '\'
	1017	$res .= "\\$nc";
	1018
	1019	} else { ## \ <char> => <char> ("as if `\' were not present")
	1020	$res .= $nc;
	1021	}
	1022
	1023	} elsif( $c eq '.' ){ ## . => .
	1024	$res .= $c;
	1025
	1026	} elsif( $c eq '' ){ ## => * but \* if there's nothing preceding it
	1027	if( $res =~ /^\^?$/ ){
	1028	$res .= '\\*';
	1029	} elsif( substr( $res, -1, 1 ) ne '*' ){
	1030	$res .= $c;
	1031	}
	1032
	1033	} elsif( $c eq '[' ){
	1034	## parse []: [^...] [^]...] [-...]
	1035	my $add = '[';
	1036	if( peek($pat,$ic) eq '^' ){
	1037	$ic++;
	1038	$add .= '^';
	1039	}
	1040	my $nc = peek($pat,$ic);
	1041	if( $nc eq ']' \|\| $nc eq '-' ){
	1042	$add .= $nc;
	1043	$ic++;
	1044	}
	1045	# check that [ is not trailing
	1046	if( $ic >= length( $pat ) - 1 ){
	1047	Warn( "unmatched `['", $fl );
	1048	return undef();
	1049	}
	1050	# look for [:...:] and x-y
	1051	my $rstr = substr( $pat, $ic+1 );
	1052	if( $rstr =~ /^((?:\[:$\w+\|[><]$:\]\|[^]-](?:-[^]])?)*)/ ){
	1053	my $cnt = $1;
	1054	$ic += length( $cnt );
	1055	$cnt =~ s/([\\\$])/\\$1/g; # `\', `$' are magic in Perl []
	1056	# try some simplifications
	1057	my $red = $cnt;
	1058	if( $red =~ s/0-9// ){
	1059	$cnt = $red.'\d';
	1060	if( $red =~ s/A-Z// && $red =~ s/a-z// && $red =~ s/_// ){
	1061	$cnt = $red.'\w';
	1062	}
	1063	}
	1064	$add .= $cnt;
	1065
	1066	# POSIX 1003.2 has this (optional) for begin/end word
	1067	$add = '\\b(?=\\W)' if $add eq '[[:<:]]';
	1068	$add = '\\b(?<=\\W)' if $add eq '[[:>:]]';
	1069
	1070	}
	1071
	1072	## may have a trailing `-' before `]'
	1073	if( $ic < length($pat) - 1 &&
	1074	substr( $pat, $ic+1 ) =~ /^(-?])/ ){
	1075	$ic += length( $1 );
	1076	$add .= $1;
	1077	# another simplification
	1078	$add =~ s/^\[(\^?)(\\[dw])]$/ $1 eq '^' ? uc($2) : $2 /e;
	1079	$res .= $add;
	1080	} else {
	1081	Warn( "unmatched `['", $fl );
	1082	return undef();
	1083	}
	1084
	1085	} elsif( $c eq $led ){ ## unescaped <closing bracketing-delimiter>
	1086	$res .= "\\$c";
	1087
	1088	} elsif( $c eq ']' ){ ## unmatched ] is not magic
	1089	$res .= ']';
	1090
	1091	} elsif( $c =~ /[\|+?{}()]/ ){ ## not magic in BRE, but in Perl: \-quote
	1092	$res .= "\\$c";
	1093
	1094	} elsif( $c eq '^' ){ ## not magic unless 1st, but in Perl: \-quote
	1095	$res .= length( $res ) ? '\\^' : '^';
	1096
	1097	} elsif( $c eq '$' ){ ## not magic unless last, but in Perl: \-quote
	1098	$res .= $ic == length( $pat ) - 1 ? '$' : '\\$';
	1099
	1100	} else {
	1101	$res .= $c;
	1102	}
	1103	}
	1104
	1105	if( $parlev ){
	1106	Warn( "unmatched `\\('", $fl );
	1107	return undef();
	1108	}
	1109
	1110	# final cleanup: eliminate raw HTs
	1111	$res =~ s/\t/\\t/g;
	1112	return $del . $res . ( $led ? $led : $del );
	1113	}
	1114
	1115
	1116	#####
	1117	# sub2p - convert sed substitution to Perl substitution
	1118	#
	1119	sub sub2p($$$){
	1120	my( $del, $subst, $fl ) = @_;
	1121	my $led = $del;
	1122	$led =~ tr/{([</})]>/;
	1123	$led = '' if $led eq $del;
	1124
	1125	$subst = substr( $subst, 1, length($subst) - 2 );
	1126	my $res = '';
	1127
	1128	for( my $ic = 0; $ic < length( $subst ); $ic++ ){
	1129	my $c = substr( $subst, $ic, 1 );
	1130	if( $c eq '\\' ){
	1131	### backslash escapes
	1132	my $nc = peek($subst,$ic);
	1133	if( $nc eq '' ){
	1134	Warn( "`\\' cannot be last in substitution", $fl );
	1135	return undef();
	1136	}
	1137	$ic++;
	1138	if( $nc =~ /[\\$del$led]/ ){ ## \ and delimiter
	1139	$res .= '\\' . $nc;
	1140	} elsif( $nc =~ /[1-9]/ ){ ## \1 - \9 => ${1} - ${9}
	1141	$res .= '${' . $nc . '}';
	1142	} else { ## everything else (includes &): omit \
	1143	$res .= $nc;
	1144	}
	1145	} elsif( $c eq '&' ){ ## & => $&
	1146	$res .= '$&';
	1147	} elsif( $c =~ /[\$\@$led]/ ){ ## magic in Perl's substitution string
	1148	$res .= '\\' . $c;
	1149	} else {
	1150	$res .= $c;
	1151	}
	1152	}
	1153
	1154	# final cleanup: eliminate raw HTs
	1155	$res =~ s/\t/\\t/g;
	1156	return ( $led ? $del : $led ) . $res . ( $led ? $led : $del );
	1157	}
	1158
	1159
	1160	sub Parse(){
	1161	my $error = 0;
	1162	my( $pdef, $pfil, $plin );
	1163	for( my $icom = 0; $icom < @Commands; $icom++ ){
	1164	my $cmd = $Commands[$icom];
	1165	print "Parse:$cmd:\n" if $useDEBUG;
	1166	$cmd =~ s/^\s+//;
	1167	next unless length( $cmd );
	1168	my $scom = $icom;
	1169	if( exists( $Defined{$icom} ) ){
	1170	$pdef = $Defined{$icom};
	1171	if( $pdef =~ /^ #(\d+)/ ){
	1172	$pfil = 'expression #';
	1173	$plin = $1;
	1174	} else {
	1175	$pfil = "$pdef l.";
	1176	$plin = 1;
	1177	}
	1178	} else {
	1179	$plin++;
	1180	}
	1181	my $fl = "$pfil$plin";
	1182
	1183	# insert command as comment in gnerated code
	1184	#
	1185	$Code .= "# $cmd\n" if $doGenerate;
	1186
	1187	# The Address(es)
	1188	#
	1189	my( $negated, $naddr, $addr1, $addr2 );
	1190	$naddr = 0;
	1191	if( $cmd =~ s/^(\d+)\s*// ){
	1192	$addr1 = "$1"; $naddr++;
	1193	} elsif( $cmd =~ s/^\$\s*// ){
	1194	$addr1 = 'eofARGV()'; $naddr++;
	1195	} elsif( $cmd =~ s{^(/)}{} \|\| $cmd =~ s{^\\(.)}{} ){
	1196	my $del = $1;
	1197	my $regex = stripRegex( $del, \$cmd );
	1198	if( defined( $regex ) ){
	1199	$addr1 = 'm '.bre2p( $del, $regex, $fl ).'s';
	1200	$naddr++;
	1201	} else {
	1202	Warn( "malformed regex, 1st address", $fl );
	1203	$error++;
	1204	next;
	1205	}
	1206	}
	1207	if( defined( $addr1 ) && $cmd =~ s/,\s*// ){
	1208	if( $cmd =~ s/^(\d+)\s*// ){
	1209	$addr2 = "$1"; $naddr++;
	1210	} elsif( $cmd =~ s/^\$\s*// ){
	1211	$addr2 = 'eofARGV()'; $naddr++;
	1212	} elsif( $cmd =~ s{^(/)}{} \|\| $cmd =~ s{^\\(.)}{} ){
	1213	my $del = $1;
	1214	my $regex = stripRegex( $del, \$cmd );
	1215	if( defined( $regex ) ){
	1216	$addr2 = 'm '. bre2p( $del, $regex, $fl ).'s';
	1217	$naddr++;
	1218	} else {
	1219	Warn( "malformed regex, 2nd address", $fl );
	1220	$error++;
	1221	next;
	1222	}
	1223	} else {
	1224	Warn( "invalid address after `,'", $fl );
	1225	$error++;
	1226	next;
	1227	}
	1228	}
	1229
	1230	# address modifier `!'
	1231	#
	1232	$negated = $cmd =~ s/^!\s*//;
	1233	if( defined( $addr1 ) ){
	1234	print "Parse: addr1=$addr1" if $useDEBUG;
	1235	if( defined( $addr2 ) ){
	1236	print ", addr2=$addr2 " if $useDEBUG;
	1237	# both numeric and addr1 > addr2 => eliminate addr2
	1238	undef( $addr2 ) if $addr1 =~ /^\d+$/ &&
	1239	$addr2 =~ /^\d+$/ && $addr1 > $addr2;
	1240	}
	1241	}
	1242	print 'negated' if $useDEBUG && $negated;
	1243	print " command:$cmd\n" if $useDEBUG;
	1244
	1245	# The Command
	1246	#
	1247	if( $cmd !~ s/^([:#={}abcdDgGhHilnNpPqrstwxy])\s*// ){
	1248	my $h = substr( $cmd, 0, 1 );
	1249	Warn( "unknown command `$h'", $fl );
	1250	$error++;
	1251	next;
	1252	}
	1253	my $key = $1;
	1254
	1255	my $tabref = $ComTab{$key};
	1256	$GenKey{$key} = 1;
	1257	if( $naddr > $tabref->[0] ){
	1258	Warn( "excess address(es)", $fl );
	1259	$error++;
	1260	next;
	1261	}
	1262
	1263	my $arg = '';
	1264	if( $tabref->[1] eq 'str' ){
	1265	# take remainder - don't care if it is empty
	1266	$arg = $cmd;
	1267	$cmd = '';
	1268
	1269	} elsif( $tabref->[1] eq 'txt' ){
	1270	# multi-line text
	1271	my $goon = $cmd =~ /(.*)\\$/;
	1272	if( length( $1 ) ){
	1273	Warn( "extra characters after command ($cmd)", $fl );
	1274	$error++;
	1275	}
	1276	while( $goon ){
	1277	$icom++;
	1278	if( $icom > $#Commands ){
	1279	Warn( "unexpected end of script", $fl );
	1280	$error++;
	1281	last;
	1282	}
	1283	$cmd = $Commands[$icom];
	1284	$Code .= "# $cmd\n" if $doGenerate;
	1285	$goon = $cmd =~ s/\\$//;
	1286	$cmd =~ s/\\(.)/$1/g;
	1287	$arg .= "\n" if length( $arg );
	1288	$arg .= $cmd;
	1289	}
	1290	$arg .= "\n" if length( $arg );
	1291	$cmd = '';
	1292
	1293	} elsif( $tabref->[1] eq 'sub' ){
	1294	# s///
	1295	if( ! length( $cmd ) ){
	1296	Warn( "`s' command requires argument", $fl );
	1297	$error++;
	1298	next;
	1299	}
	1300	if( $cmd =~ s{^([^\\\n])}{} ){
	1301	my $del = $1;
	1302	my $regex = stripRegex( $del, \$cmd );
	1303	if( ! defined( $regex ) ){
	1304	Warn( "malformed regular expression", $fl );
	1305	$error++;
	1306	next;
	1307	}
	1308	$regex = bre2p( $del, $regex, $fl );
	1309
	1310	# a trailing \ indicates embedded NL (in replacement string)
	1311	while( $cmd =~ s/(?<!\\)\\$/\n/ ){
	1312	$icom++;
	1313	if( $icom > $#Commands ){
	1314	Warn( "unexpected end of script", $fl );
	1315	$error++;
	1316	last;
	1317	}
	1318	$cmd .= $Commands[$icom];
	1319	$Code .= "# $Commands[$icom]\n" if $doGenerate;
	1320	}
	1321
	1322	my $subst = stripRegex( $del, \$cmd );
	1323	if( ! defined( $regex ) ){
	1324	Warn( "malformed substitution expression", $fl );
	1325	$error++;
	1326	next;
	1327	}
	1328	$subst = sub2p( $del, $subst, $fl );
	1329
	1330	# parse s/// modifier: g\|p\|0-9\|w <file>
	1331	my( $global, $nmatch, $print, $write ) =
	1332	( '', '', 0, undef );
	1333	while( $cmd =~ s/^([gp0-9])// ){
	1334	$1 eq 'g' ? ( $global = 'g' ) :
	1335	$1 eq 'p' ? ( $print = $1 ) : ( $nmatch .= $1 );
	1336	}
	1337	$write = $1 if $cmd =~ s/w\s(.)$//;
	1338	### $nmatch =~ s/^(\d)\1*$/$1/; ### may be dangerous?
	1339	if( $global && length( $nmatch ) \|\| length( $nmatch ) > 1 ){
	1340	Warn( "conflicting flags `$global$nmatch'", $fl );
	1341	$error++;
	1342	next;
	1343	}
	1344
	1345	$arg = makes( $regex, $subst,
	1346	$write, $global, $print, $nmatch, $fl );
	1347	if( ! defined( $arg ) ){
	1348	$error++;
	1349	next;
	1350	}
	1351
	1352	} else {
	1353	Warn( "improper delimiter in s command", $fl );
	1354	$error++;
	1355	next;
	1356	}
	1357
	1358	} elsif( $tabref->[1] eq 'tra' ){
	1359	# y///
	1360	# a trailing \ indicates embedded newline
	1361	while( $cmd =~ s/(?<!\\)\\$/\n/ ){
	1362	$icom++;
	1363	if( $icom > $#Commands ){
	1364	Warn( "unexpected end of script", $fl );
	1365	$error++;
	1366	last;
	1367	}
	1368	$cmd .= $Commands[$icom];
	1369	$Code .= "# $Commands[$icom]\n" if $doGenerate;
	1370	}
	1371	if( ! length( $cmd ) ){
	1372	Warn( "`y' command requires argument", $fl );
	1373	$error++;
	1374	next;
	1375	}
	1376	my $d = substr( $cmd, 0, 1 ); $cmd = substr( $cmd, 1 );
	1377	if( $d eq '\\' ){
	1378	Warn( "`\\' not valid as delimiter in `y' command", $fl );
	1379	$error++;
	1380	next;
	1381	}
	1382	my $fr = stripTrans( $d, \$cmd );
	1383	if( ! defined( $fr ) \|\| ! length( $cmd ) ){
	1384	Warn( "malformed `y' command argument", $fl );
	1385	$error++;
	1386	next;
	1387	}
	1388	my $to = stripTrans( $d, \$cmd );
	1389	if( ! defined( $to ) ){
	1390	Warn( "malformed `y' command argument", $fl );
	1391	$error++;
	1392	next;
	1393	}
	1394	if( length($fr) != length($to) ){
	1395	Warn( "string lengths in `y' command differ", $fl );
	1396	$error++;
	1397	next;
	1398	}
	1399	if( ! defined( $arg = makey( $fr, $to, $fl ) ) ){
	1400	$error++;
	1401	next;
	1402	}
	1403
	1404	}
	1405
	1406	# $cmd must be now empty - exception is {
	1407	if( $cmd !~ /^\s*$/ ){
	1408	if( $key eq '{' ){
	1409	# dirty hack to process command on '{' line
	1410	$Commands[$icom--] = $cmd;
	1411	} else {
	1412	Warn( "extra characters after command ($cmd)", $fl );
	1413	$error++;
	1414	next;
	1415	}
	1416	}
	1417
	1418	# Make Code
	1419	#
	1420	if( &{$tabref->[2]}( $addr1, $addr2, $negated,
	1421	$tabref->[3], $arg, $fl ) ){
	1422	$error++;
	1423	}
	1424	}
	1425
	1426	while( @BlockStack ){
	1427	my $bl = pop( @BlockStack );
	1428	Warn( "start of unterminated `{'", $bl );
	1429	$error++;
	1430	}
	1431
	1432	for my $lab ( keys( %Label ) ){
	1433	if( ! exists( $Label{$lab}{defined} ) ){
	1434	for my $used ( @{$Label{$lab}{used}} ){
	1435	Warn( "undefined label `$lab'", $used );
	1436	$error++;
	1437	}
	1438	}
	1439	}
	1440
	1441	exit( 1 ) if $error;
	1442	}
	1443
	1444
	1445	##############
	1446	#### MAIN ####
	1447	##############
	1448
	1449	sub usage(){
	1450	print STDERR "Usage: sed [-an] command [file...]\n";
	1451	print STDERR " [-an] [-e command] [-f script-file] [file...]\n";
	1452	}
	1453
	1454	###################
	1455	# Here we go again...
	1456	#
	1457	my $expr = 0;
	1458	while( @ARGV && $ARGV[0] =~ /^-(.)(.*)$/ ){
	1459	my $opt = $1;
	1460	my $arg = $2;
	1461	shift( @ARGV );
	1462	if( $opt eq 'e' ){
	1463	if( length( $arg ) ){
	1464	push( @Commands, split( "\n", $arg ) );
	1465	} elsif( @ARGV ){
	1466	push( @Commands, shift( @ARGV ) );
	1467	} else {
	1468	Warn( "option -e requires an argument" );
	1469	usage();
	1470	exit( 1 );
	1471	}
	1472	$expr++;
	1473	$Defined{$#Commands} = " #$expr";
	1474	next;
	1475	}
	1476	if( $opt eq 'f' ){
	1477	my $path;
	1478	if( length( $arg ) ){
	1479	$path = $arg;
	1480	} elsif( @ARGV ){
	1481	$path = shift( @ARGV );
	1482	} else {
	1483	Warn( "option -f requires an argument" );
	1484	usage();
	1485	exit( 1 );
	1486	}
	1487	my $fst = $#Commands + 1;
	1488	open( SCRIPT, "<$path" ) \|\| die( "$0: $path: could not open ($!)\n" );
	1489	my $cmd;
	1490	while( defined( $cmd = <SCRIPT> ) ){
	1491	chomp( $cmd );
	1492	push( @Commands, $cmd );
	1493	}
	1494	close( SCRIPT );
	1495	if( $#Commands >= $fst ){
	1496	$Defined{$fst} = "$path";
	1497	}
	1498	next;
	1499	}
	1500	if( $opt eq '-' && $arg eq '' ){
	1501	last;
	1502	}
	1503	if( $opt eq 'h' \|\| $opt eq '?' ){
	1504	usage();
	1505	exit( 0 );
	1506	}
	1507	if( $opt eq 'n' ){
	1508	$doAutoPrint = 0;
	1509	} elsif( $opt eq 'a' ){
	1510	$doOpenWrite = 0;
	1511	} else {
	1512	Warn( "illegal option `$opt'" );
	1513	usage();
	1514	exit( 1 );
	1515	}
	1516	if( length( $arg ) ){
	1517	unshift( @ARGV, "-$arg" );
	1518	}
	1519	}
	1520
	1521	# A singleton command may be the 1st argument when there are no options.
	1522	#
	1523	if( @Commands == 0 ){
	1524	if( @ARGV == 0 ){
	1525	Warn( "no script command given" );
	1526	usage();
	1527	exit( 1 );
	1528	}
	1529	push( @Commands, split( "\n", shift( @ARGV ) ) );
	1530	$Defined{0} = ' #1';
	1531	}
	1532
	1533	print STDERR "Files: @ARGV\n" if $useDEBUG;
	1534
	1535	# generate leading code
	1536	#
	1537	$Func = <<'[TheEnd]';
	1538
	1539	# openARGV: open 1st input file
	1540	#
	1541	sub openARGV(){
	1542	unshift( @ARGV, '-' ) unless @ARGV;
	1543	my $file = shift( @ARGV );
	1544	open( ARG, "<$file" )
	1545	\|\| die( "$0: can't open $file for reading ($!)\n" );
	1546	$isEOF = 0;
	1547	}
	1548
	1549	# getsARGV: Read another input line into argument (default: $_).
	1550	# Move on to next input file, and reset EOF flag $isEOF.
	1551	sub getsARGV(;\$){
	1552	my $argref = @_ ? shift() : \$_;
	1553	while( $isEOF \|\| ! defined( $$argref = <ARG> ) ){
	1554	close( ARG );
	1555	return 0 unless @ARGV;
	1556	my $file = shift( @ARGV );
	1557	open( ARG, "<$file" )
	1558	\|\| die( "$0: can't open $file for reading ($!)\n" );
	1559	$isEOF = 0;
	1560	}
	1561	1;
	1562	}
	1563
	1564	# eofARGV: end-of-file test
	1565	#
	1566	sub eofARGV(){
	1567	return @ARGV == 0 && ( $isEOF = eof( ARG ) );
	1568	}
	1569
	1570	# makeHandle: Generates another file handle for some file (given by its path)
	1571	# to be written due to a w command or an s command's w flag.
	1572	sub makeHandle($){
	1573	my( $path ) = @_;
	1574	my $handle;
	1575	if( ! exists( $wFiles{$path} ) \|\| $wFiles{$path} eq '' ){
	1576	$handle = $wFiles{$path} = gensym();
	1577	if( $doOpenWrite ){
	1578	if( ! open( $handle, ">$path" ) ){
	1579	die( "$0: can't open $path for writing: ($!)\n" );
	1580	}
	1581	}
	1582	} else {
	1583	$handle = $wFiles{$path};
	1584	}
	1585	return $handle;
	1586	}
	1587
	1588	# printQ: Print queued output which is either a string or a reference
	1589	# to a pathname.
	1590	sub printQ(){
	1591	for my $q ( @Q ){
	1592	if( ref( $q ) ){
	1593	# flush open w files so that reading this file gets it all
	1594	if( exists( $wFiles{$$q} ) && $wFiles{$$q} ne '' ){
	1595	open( $wFiles{$$q}, ">>$$q" );
	1596	}
	1597	# copy file to stdout: slow, but safe
	1598	if( open( RF, "<$$q" ) ){
	1599	while( defined( my $line = <RF> ) ){
	1600	print $line;
	1601	}
	1602	close( RF );
	1603	}
	1604	} else {
	1605	print $q;
	1606	}
	1607	}
	1608	undef( @Q );
	1609	}
	1610
	1611	[TheEnd]
	1612
	1613	# generate the sed loop
	1614	#
	1615	$Code .= <<'[TheEnd]';
	1616	sub openARGV();
	1617	sub getsARGV(;\$);
	1618	sub eofARGV();
	1619	sub printQ();
	1620
	1621	# Run: the sed loop reading input and applying the script
	1622	#
	1623	sub Run(){
	1624	my( $h, $icnt, $s, $n );
	1625	# hack (not unbreakable :-/) to avoid // matching an empty string
	1626	my $z = "\000"; $z =~ /$z/;
	1627	# Initialize.
	1628	openARGV();
	1629	$Hold = '';
	1630	$CondReg = 0;
	1631	$doPrint = $doAutoPrint;
	1632	CYCLE:
	1633	while( getsARGV() ){
	1634	chomp();
	1635	$CondReg = 0; # cleared on t
	1636	BOS:;
	1637	[TheEnd]
	1638
	1639	# parse - avoid opening files when doing s2p
	1640	#
	1641	( $svOpenWrite, $doOpenWrite ) = ( $doOpenWrite, $svOpenWrite )
	1642	if $doGenerate;
	1643	Parse();
	1644	( $svOpenWrite, $doOpenWrite ) = ( $doOpenWrite, $svOpenWrite )
	1645	if $doGenerate;
	1646
	1647	# append trailing code
	1648	#
	1649	$Code .= <<'[TheEnd]';
	1650	EOS: if( $doPrint ){
	1651	print $_, "\n";
	1652	} else {
	1653	$doPrint = $doAutoPrint;
	1654	}
	1655	printQ() if @Q;
	1656	}
	1657
	1658	exit( 0 );
	1659	}
	1660	[TheEnd]
	1661
	1662
	1663	# append optional functions, prepend prototypes
	1664	#
	1665	my $Proto = "# prototypes\n";
	1666	if( $GenKey{'l'} ){
	1667	$Proto .= "sub _l();\n";
	1668	$Func .= <<'[TheEnd]';
	1669	# _l: l command processing
	1670	#
	1671	sub _l(){
	1672	my $h = $_;
	1673	my $mcpl = 70;
	1674	# transform non printing chars into escape notation
	1675	$h =~ s/\\/\\\\/g;
	1676	if( $h =~ /[^[:print:]]/ ){
	1677	$h =~ s/\a/\\a/g;
	1678	$h =~ s/\f/\\f/g;
	1679	$h =~ s/\n/\\n/g;
	1680	$h =~ s/\t/\\t/g;
	1681	$h =~ s/\r/\\r/g;
	1682	$h =~ s/\e/\\e/g;
	1683	$h =~ s/([^[:print:]])/sprintf("\\%03o", ord($1))/ge;
	1684	}
	1685	# split into lines of length $mcpl
	1686	while( length( $h ) > $mcpl ){
	1687	my $l = substr( $h, 0, $mcpl-1 );
	1688	$h = substr( $h, $mcpl );
	1689	# remove incomplete \-escape from end of line
	1690	if( $l =~ s/(?<!\\)(\\[0-7]{0,2})$// ){
	1691	$h = $1 . $h;
	1692	}
	1693	print $l, "\\\n";
	1694	}
	1695	print "$h\$\n";
	1696	}
	1697
	1698	[TheEnd]
	1699	}
	1700
	1701	if( $GenKey{'r'} ){
	1702	$Proto .= "sub _r(\$);\n";
	1703	$Func .= <<'[TheEnd]';
	1704	# _r: r command processing: Save a reference to the pathname.
	1705	#
	1706	sub _r($){
	1707	my $path = shift();
	1708	push( @Q, \$path );
	1709	}
	1710
	1711	[TheEnd]
	1712	}
	1713
	1714	if( $GenKey{'t'} ){
	1715	$Proto .= "sub _t();\n";
	1716	$Func .= <<'[TheEnd]';
	1717	# _t: t command - condition register test/reset
	1718	#
	1719	sub _t(){
	1720	my $res = $CondReg;
	1721	$CondReg = 0;
	1722	$res;
	1723	}
	1724
	1725	[TheEnd]
	1726	}
	1727
	1728	if( $GenKey{'w'} ){
	1729	$Proto .= "sub _w(\$);\n";
	1730	$Func .= <<'[TheEnd]';
	1731	# _w: w command and s command's w flag - write to file
	1732	#
	1733	sub _w($){
	1734	my $path = shift();
	1735	my $handle = $wFiles{$path};
	1736	if( ! $doOpenWrite && ! defined( fileno( $handle ) ) ){
	1737	open( $handle, ">$path" )
	1738	\|\| die( "$0: $path: cannot open ($!)\n" );
	1739	}
	1740	print $handle $_, "\n";
	1741	}
	1742
	1743	[TheEnd]
	1744	}
	1745
	1746	$Code = $Proto . $Code;
	1747
	1748	# magic "#n" - same as -n option
	1749	#
	1750	$doAutoPrint = 0 if substr( $Commands[0], 0, 2 ) eq '#n';
	1751
	1752	# eval code - check for errors
	1753	#
	1754	print "Code:\n$Code$Func" if $useDEBUG;
	1755	eval $Code . $Func;
	1756	if( $@ ){
	1757	print "Code:\n$Code$Func";
	1758	die( "$0: internal error - generated incorrect Perl code: $@\n" );
	1759	}
	1760
	1761	if( $doGenerate ){
	1762
	1763	# write full Perl program
	1764	#
	1765
	1766	# bang line, declarations, prototypes
	1767	print <<TheEnd;
	1768	#!$perlpath -w
	1769	eval 'exec $perlpath -S \$0 \${1+"\$@"}'
	1770	if 0;
	1771	\$0 =~ s/^.?(\\w+)\[\\.\\w+\]\$/\$1/;
	1772
	1773	use strict;
	1774	use Symbol;
	1775	use vars qw{ \$isEOF \$Hold \%wFiles \@Q \$CondReg
	1776	\$doAutoPrint \$doOpenWrite \$doPrint };
	1777	\$doAutoPrint = $doAutoPrint;
	1778	\$doOpenWrite = $doOpenWrite;
	1779	TheEnd
	1780
	1781	my $wf = "'" . join( "', '", keys( %wFiles ) ) . "'";
	1782	if( $wf ne "''" ){
	1783	print <<TheEnd;
	1784	sub makeHandle(\$);
	1785	for my \$p ( $wf ){
	1786	exit( 1 ) unless makeHandle( \$p );
	1787	}
	1788	TheEnd
	1789	}
	1790
	1791	print $Code;
	1792	print "Run();\n";
	1793	print $Func;
	1794	exit( 0 );
	1795
	1796	} else {
	1797
	1798	# execute: make handles (and optionally open) all w files; run!
	1799	for my $p ( keys( %wFiles ) ){
	1800	exit( 1 ) unless makeHandle( $p );
	1801	}
	1802	Run();
	1803	}
	1804
	1805
	1806	=head1 ENVIRONMENT
	1807
	1808	The environment variable C<PSEDEXTBRE> may be set to extend BREs.
	1809	See L<"Additional Atoms">.
	1810
	1811	=head1 DIAGNOSTICS
	1812
	1813	=over 4
	1814
	1815	=item ambiguous translation for character `%s' in `y' command
	1816
	1817	The indicated character appears twice, with different translations.
	1818
	1819	=item `[' cannot be last in pattern
	1820
	1821	A `[' in a BRE indicates the beginning of a I<bracket expression>.
	1822
	1823	=item `\' cannot be last in pattern
	1824
	1825	A `\' in a BRE is used to make the subsequent character literal.
	1826
	1827	=item `\' cannot be last in substitution
	1828
	1829	A `\' in a subsitution string is used to make the subsequent character literal.
	1830
	1831	=item conflicting flags `%s'
	1832
	1833	In an B<s> command, either the `g' flag and an n-th occurrence flag, or
	1834	multiple n-th occurrence flags are specified. Note that only the digits
	1835	`1' through `9' are permitted.
	1836
	1837	=item duplicate label %s (first defined at %s)
	1838
	1839	=item excess address(es)
	1840
	1841	The command has more than the permitted number of addresses.
	1842
	1843	=item extra characters after command (%s)
	1844
	1845	=item illegal option `%s'
	1846
	1847	=item improper delimiter in s command
	1848
	1849	The BRE and substitution may not be delimited with `\' or newline.
	1850
	1851	=item invalid address after `,'
	1852
	1853	=item invalid backreference (%s)
	1854
	1855	The specified backreference number exceeds the number of backreferences
	1856	in the BRE.
	1857
	1858	=item invalid repeat clause `\{%s\}'
	1859
	1860	The repeat clause does not contain a valid integer value, or pair of
	1861	values.
	1862
	1863	=item malformed regex, 1st address
	1864
	1865	=item malformed regex, 2nd address
	1866
	1867	=item malformed regular expression
	1868
	1869	=item malformed substitution expression
	1870
	1871	=item malformed `y' command argument
	1872
	1873	The first or second string of a B<y> command is syntactically incorrect.
	1874
	1875	=item maximum less than minimum in `\{%s\}'
	1876
	1877	=item no script command given
	1878
	1879	There must be at least one B<-e> or one B<-f> option specifying a
	1880	script or script file.
	1881
	1882	=item `\' not valid as delimiter in `y' command
	1883
	1884	=item option -e requires an argument
	1885
	1886	=item option -f requires an argument
	1887
	1888	=item `s' command requires argument
	1889
	1890	=item start of unterminated `{'
	1891
	1892	=item string lengths in `y' command differ
	1893
	1894	The translation table strings in a B<y> commanf must have equal lengths.
	1895
	1896	=item undefined label `%s'
	1897
	1898	=item unexpected `}'
	1899
	1900	A B<}> command without a preceding B<{> command was encountered.
	1901
	1902	=item unexpected end of script
	1903
	1904	The end of the script was reached although a text line after a
	1905	B<a>, B<c> or B<i> command indicated another line.
	1906
	1907	=item unknown command `%s'
	1908
	1909	=item unterminated `['
	1910
	1911	A BRE contains an unterminated bracket expression.
	1912
	1913	=item unterminated `\('
	1914
	1915	A BRE contains an unterminated backreference.
	1916
	1917	=item `\{' without closing `\}'
	1918
	1919	A BRE contains an unterminated bounds specification.
	1920
	1921	=item `\)' without preceding `\('
	1922
	1923	=item `y' command requires argument
	1924
	1925	=back
	1926
	1927	=head1 EXAMPLE
	1928
	1929	The basic material for the preceding section was generated by running
	1930	the sed script
	1931
	1932	#no autoprint
	1933	s/^.Warn( "$[^"]$".$/\1/
	1934	t process
	1935	b
	1936	:process
	1937	s/$!/%s/g
	1938	s/$[_[:alnum:]]\{1,\}/%s/g
	1939	s/\\\\/\\/g
	1940	s/^/=item /
	1941	p
	1942
	1943	on the program's own text, and piping the output into C<sort -u>.
	1944
	1945
	1946	=head1 SED SCRIPT TRANSLATION
	1947
	1948	If this program is invoked with the name F<s2p> it will act as a
	1949	sed-to-Perl translator. After option processing (all other
	1950	arguments are ignored), a Perl program is printed on standard
	1951	output, which will process the input stream (as read from all
	1952	arguments) in the way defined by the sed script and the option setting
	1953	used for the translation.
	1954
	1955	=head1 SEE ALSO
	1956
	1957	perl(1), re_format(7)
	1958
	1959	=head1 BUGS
	1960
	1961	The B<l> command will show escape characters (ESC) as `C<\e>', but
	1962	a vertical tab (VT) in octal.
	1963
	1964	Trailing spaces are truncated from labels in B<:>, B<t> and B<b> commands.
	1965
	1966	The meaning of an empty regular expression (`C<//>'), as defined by B<sed>,
	1967	is "the last pattern used, at run time". This deviates from the Perl
	1968	interpretation, which will re-use the "last last successfully executed
	1969	regular expression". Since keeping track of pattern usage would create
	1970	terribly cluttered code, and differences would only appear in obscure
	1971	context (where other B<sed> implementations appear to deviate, too),
	1972	the Perl semantics was adopted. Note that common usage of this feature,
	1973	such as in C</abc/s//xyz/>, will work as expected.
	1974
	1975	Collating elements (of bracket expressions in BREs) are not implemented.
	1976
	1977	=head1 STANDARDS
	1978
	1979	This B<sed> implementation conforms to the IEEE Std1003.2-1992 ("POSIX.2")
	1980	definition of B<sed>, and is compatible with the I<OpenBSD>
	1981	implementation, except where otherwise noted (see L<"BUGS">).
	1982
	1983	=head1 AUTHOR
	1984
	1985	This Perl implementation of I<sed> was written by Wolfgang Laun,
	1986	I<Wolfgang.Laun@alcatel.at>.
	1987
	1988	=head1 COPYRIGHT and LICENSE
	1989
	1990	This program is free and open software. You may use, modify,
	1991	distribute, and sell this program (and any modified variants) in any
	1992	way you wish, provided you do not restrict others from doing the same.
	1993
	1994	=cut
	1995