Initial commit of OpenSPARC T2 design and verification files.
[OpenSPARC-T2-DV] / tools / perl-5.8.0 / lib / site_perl / 5.8.0 / Perl / Tidy.pm
CommitLineData
86530b38
AT
1############################################################
2#
3# perltidy - a perl script indenter and formatter
4#
5# Copyright (c) 2000-2003 by Steve Hancock
6# Distributed under the GPL license agreement; see file COPYING
7#
8# This program is free software; you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation; either version 2 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, write to the Free Software
20# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21#
22# For brief instructions instructions, try 'perltidy -h'.
23# For more complete documentation, try 'man perltidy'
24# or visit http://perltidy.sourceforge.net
25#
26# This script is an example of the default style. It was formatted with:
27#
28# perltidy Tidy.pm
29#
30# Code Contributions:
31# Michael Cartmell supplied code for adaptation to VMS and helped with
32# v-strings.
33# Hugh S. Myers supplied sub streamhandle and the supporting code to
34# create a Perl::Tidy module which can operate on strings, arrays, etc.
35# Yves Orton supplied coding to help detect Windows versions.
36# Axel Rose supplied a patch for MacPerl.
37# Many others have supplied key ideas, suggestions, and bug reports;
38# see the CHANGES file.
39#
40############################################################
41
42package Perl::Tidy;
43use 5.004; # need IO::File from 5.004 or later
44BEGIN { $^W = 1; } # turn on warnings
45
46use strict;
47use Exporter;
48use Carp;
49$|++;
50
51use vars qw{
52 $VERSION
53 @ISA
54 @EXPORT
55 $missing_file_spec
56};
57
58@ISA = qw( Exporter );
59@EXPORT = qw( &perltidy );
60
61use IO::File;
62use File::Basename;
63
64BEGIN {
65 ( $VERSION = q($Id: Tidy.pm,v 1.46 2003/10/21 14:09:29 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
66}
67
68sub streamhandle {
69
70 # given filename and mode (r or w), create an object which:
71 # has a 'getline' method if mode='r', and
72 # has a 'print' method if mode='w'.
73 # The objects also need a 'close' method.
74 #
75 # How the object is made:
76 #
77 # if $filename is: Make object using:
78 # ---------------- -----------------
79 # '-' (STDIN if mode = 'r', STDOUT if mode='w')
80 # string IO::File
81 # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
82 # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar)
83 # object object
84 # (check for 'print' method for 'w' mode)
85 # (check for 'getline' method for 'r' mode)
86 my $ref = ref( my $filename = shift );
87 my $mode = shift;
88 my $New;
89 my $fh;
90
91 # handle a reference
92 if ($ref) {
93 if ( $ref eq 'ARRAY' ) {
94 $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
95 }
96 elsif ( $ref eq 'SCALAR' ) {
97 $New = sub { Perl::Tidy::IOScalar->new(@_) };
98 }
99 else {
100
101 # Accept an object with a getline method for reading. Note:
102 # IO::File is built-in and does not respond to the defined
103 # operator. If this causes trouble, the check can be
104 # skipped and we can just let it crash if there is no
105 # getline.
106 if ( $mode =~ /[rR]/ ) {
107 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
108 $New = sub { $filename };
109 }
110 else {
111 $New = sub { undef };
112 confess <<EOM;
113------------------------------------------------------------------------
114No 'getline' method is defined for object of class $ref
115Please check your call to Perl::Tidy::perltidy. Trace follows.
116------------------------------------------------------------------------
117EOM
118 }
119 }
120
121 # Accept an object with a print method for writing.
122 # See note above about IO::File
123 if ( $mode =~ /[wW]/ ) {
124 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
125 $New = sub { $filename };
126 }
127 else {
128 $New = sub { undef };
129 confess <<EOM;
130------------------------------------------------------------------------
131No 'print' method is defined for object of class $ref
132Please check your call to Perl::Tidy::perltidy. Trace follows.
133------------------------------------------------------------------------
134EOM
135 }
136 }
137 }
138 }
139
140 # handle a string
141 else {
142 if ( $filename eq '-' ) {
143 $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
144 }
145 else {
146 $New = sub { IO::File->new(@_) };
147 }
148 }
149 $fh = $New->( $filename, $mode )
150 or warn "Couldn't open file:$filename in mode:$mode : $!\n";
151 return $fh, ( $ref or $filename );
152}
153
154sub find_input_line_ending {
155
156 # Peek at a file and return first line ending character.
157 # Quietly return undef in case of any trouble.
158 my ($input_file) = @_;
159 my $ending;
160
161 # silently ignore input from object or stdin
162 if ( ref($input_file) || $input_file eq '-' ) {
163 return $ending;
164 }
165 open( INFILE, $input_file ) || return $ending;
166
167 binmode INFILE;
168 my $buf;
169 read( INFILE, $buf, 1024 );
170 close INFILE;
171 if ( $buf && $buf =~ /([\012\015]+)/ ) {
172 my $test = $1;
173
174 # dos
175 if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
176
177 # mac
178 elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
179
180 # unix
181 elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
182
183 # unknown
184 else { }
185 }
186
187 # no ending seen
188 else { }
189
190 return $ending;
191}
192
193sub catfile {
194
195 # concatenate a path and file basename
196 # returns undef in case of error
197
198 BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
199
200 # use File::Spec if we can
201 unless ($missing_file_spec) {
202 return File::Spec->catfile(@_);
203 }
204
205 # Perl 5.004 systems may not have File::Spec so we'll make
206 # a simple try. We assume File::Basename is available.
207 # return undef if not successful.
208 my $name = pop @_;
209 my $path = join '/', @_;
210 my $test_file = $path . $name;
211 my ( $test_name, $test_path ) = fileparse($test_file);
212 return $test_file if ( $test_name eq $name );
213 return undef if ( $^O eq 'VMS' );
214
215 # this should work at least for Windows and Unix:
216 $test_file = $path . '/' . $name;
217 ( $test_name, $test_path ) = fileparse($test_file);
218 return $test_file if ( $test_name eq $name );
219 return undef;
220}
221
222sub make_temporary_filename {
223
224 # Make a temporary filename.
225 #
226 # The POSIX tmpnam() function tends to be unreliable for non-unix
227 # systems (at least for the win32 systems that I've tested), so use
228 # a pre-defined name. A slight disadvantage of this is that two
229 # perltidy runs in the same working directory may conflict.
230 # However, the chance of that is small and managable by the user.
231 # An alternative would be to check for the file's existance and use,
232 # say .TMP0, .TMP1, etc, but that scheme has its own problems. So,
233 # keep it simple.
234 my $name = "perltidy.TMP";
235 if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
236 return $name;
237 }
238 eval "use POSIX qw(tmpnam)";
239 if ($@) { return $name }
240 use IO::File;
241
242 # just make a couple of tries before giving up and using the default
243 for ( 0 .. 1 ) {
244 my $tmpname = tmpnam();
245 my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
246 if ($fh) {
247 $fh->close();
248 return ($tmpname);
249 last;
250 }
251 }
252 return ($name);
253}
254
255# Here is a map of the flow of data from the input source to the output
256# line sink:
257#
258# LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
259# input groups output
260# lines tokens lines of lines lines
261# lines
262#
263# The names correspond to the package names responsible for the unit processes.
264#
265# The overall process is controlled by the "main" package.
266#
267# LineSource is the stream of input lines
268#
269# Tokenizer analyzes a line and breaks it into tokens, peeking ahead
270# if necessary. A token is any section of the input line which should be
271# manipulated as a single entity during formatting. For example, a single
272# ',' character is a token, and so is an entire side comment. It handles
273# the complexities of Perl syntax, such as distinguishing between '<<' as
274# a shift operator and as a here-document, or distinguishing between '/'
275# as a divide symbol and as a pattern delimiter.
276#
277# Formatter inserts and deletes whitespace between tokens, and breaks
278# sequences of tokens at appropriate points as output lines. It bases its
279# decisions on the default rules as modified by any command-line options.
280#
281# VerticalAligner collects groups of lines together and tries to line up
282# certain tokens, such as '=>', '#', and '=' by adding whitespace.
283#
284# FileWriter simply writes lines to the output stream.
285#
286# The Logger package, not shown, records significant events and warning
287# messages. It writes a .LOG file, which may be saved with a
288# '-log' or a '-g' flag.
289
290{
291
292 # variables needed by interrupt handler:
293 my $tokenizer;
294 my $input_file;
295
296 # this routine may be called to give a status report if interrupted. If a
297 # parameter is given, it will call exit with that parameter. This is no
298 # longer used because it works under Unix but not under Windows.
299 sub interrupt_handler {
300
301 my $exit_flag = shift;
302 print STDERR "perltidy interrupted";
303 if ($tokenizer) {
304 my $input_line_number =
305 Perl::Tidy::Tokenizer::get_input_line_number();
306 print STDERR " at line $input_line_number";
307 }
308 if ($input_file) {
309
310 if ( ref $input_file ) { print STDERR " of reference to:" }
311 else { print STDERR " of file:" }
312 print STDERR " $input_file";
313 }
314 print STDERR "\n";
315 exit $exit_flag if defined($exit_flag);
316 }
317
318 sub perltidy {
319
320 my %defaults = (
321 argv => undef,
322 destination => undef,
323 formatter => undef,
324 logfile => undef,
325 errorfile => undef,
326 perltidyrc => undef,
327 source => undef,
328 stderr => undef,
329 );
330
331 # don't overwrite callers ARGV
332 local @ARGV = @ARGV;
333
334 my %input_hash = @_;
335 if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
336 local $" = ')(';
337 my @good_keys = sort keys %defaults;
338 @bad_keys = sort @bad_keys;
339 confess <<EOM;
340------------------------------------------------------------------------
341Unknown perltidy parameter : (@bad_keys)
342perltidy only understands : (@good_keys)
343------------------------------------------------------------------------
344
345EOM
346 }
347
348 %input_hash = ( %defaults, %input_hash );
349 my $argv = $input_hash{'argv'};
350 my $destination_stream = $input_hash{'destination'};
351 my $errorfile_stream = $input_hash{'errorfile'};
352 my $logfile_stream = $input_hash{'logfile'};
353 my $perltidyrc_stream = $input_hash{'perltidyrc'};
354 my $source_stream = $input_hash{'source'};
355 my $stderr_stream = $input_hash{'stderr'};
356 my $user_formatter = $input_hash{'formatter'};
357
358 if ($user_formatter) {
359
360 # if the user defines a formatter, there is no output stream,
361 # but we need a null stream to keep coding simple
362 $destination_stream = Perl::Tidy::DevNull->new();
363 }
364
365 # see if ARGV is overridden
366 if ( defined($argv) ) {
367
368 my $rargv = ref $argv;
369 if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
370
371 # ref to ARRAY
372 if ($rargv) {
373 if ( $rargv eq 'ARRAY' ) {
374 @ARGV = @$argv;
375 }
376 else {
377 croak <<EOM;
378------------------------------------------------------------------------
379Please check value of -argv in call to perltidy;
380it must be a string or ref to ARRAY but is: $rargv
381------------------------------------------------------------------------
382EOM
383 }
384 }
385
386 # string
387 else {
388 my ( $rargv, $msg ) = parse_args($argv);
389 if ($msg) {
390 die <<EOM;
391Error parsing this string passed to to perltidy with 'argv':
392$msg
393EOM
394 }
395 @ARGV = @{$rargv};
396 }
397 }
398
399 # redirect STDERR if requested
400 if ($stderr_stream) {
401 my ( $fh_stderr, $stderr_file ) =
402 Perl::Tidy::streamhandle( $stderr_stream, 'w' );
403 if ($fh_stderr) { *STDERR = $fh_stderr }
404 else {
405 croak <<EOM;
406------------------------------------------------------------------------
407Unable to redirect STDERR to $stderr_stream
408Please check value of -stderr in call to perltidy
409------------------------------------------------------------------------
410EOM
411 }
412 }
413
414 my $rpending_complaint;
415 $$rpending_complaint = "";
416 my $rpending_logfile_message;
417 $$rpending_logfile_message = "";
418
419 my ( $is_Windows, $Windows_type ) =
420 look_for_Windows($rpending_complaint);
421
422 # VMS file names are restricted to a 40.40 format, so we append _tdy
423 # instead of .tdy, etc. (but see also sub check_vms_filename)
424 my $dot;
425 my $dot_pattern;
426 if ( $^O eq 'VMS' ) {
427 $dot = '_';
428 $dot_pattern = '_';
429 }
430 else {
431 $dot = '.';
432 $dot_pattern = '\.'; # must escape for use in regex
433 }
434
435 # handle command line options
436 my ( $rOpts, $config_file, $rraw_options, $saw_extrude ) =
437 process_command_line(
438 $perltidyrc_stream, $is_Windows,
439 $Windows_type, $rpending_complaint
440 );
441
442 if ($user_formatter) {
443 $rOpts->{'format'} = 'user';
444 }
445
446 # there must be one entry here for every possible format
447 my %default_file_extension = (
448 tidy => 'tdy',
449 html => 'html',
450 user => '',
451 );
452
453 # be sure we have a valid output format
454 unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
455 my $formats = join ' ',
456 sort map { "'" . $_ . "'" } keys %default_file_extension;
457 my $fmt = $rOpts->{'format'};
458 die "-format='$fmt' but must be one of: $formats\n";
459 }
460
461 my $output_extension =
462 make_extension( $rOpts->{'output-file-extension'},
463 $default_file_extension{ $rOpts->{'format'} }, $dot );
464
465 my $backup_extension =
466 make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
467
468 my $html_toc_extension =
469 make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
470
471 my $html_src_extension =
472 make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
473
474 # check for -b option;
475 my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
476 && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode
477 && @ARGV > 0; # silently ignore if standard input;
478 # this allows -b to be in a .perltidyrc file
479 # without error messages when running from an editor
480
481 # turn off -b with warnings in case of conflicts with other options
482 if ($in_place_modify) {
483 if ( $rOpts->{'standard-output'} ) {
484 warn "Ignoring -b; you may not use -b and -st together\n";
485 $in_place_modify = 0;
486 }
487 if ($destination_stream) {
488 warn
489"Ignoring -b; you may not specify a destination array and -b together\n";
490 $in_place_modify = 0;
491 }
492 if ($source_stream) {
493 warn
494"Ignoring -b; you may not specify a source array and -b together\n";
495 $in_place_modify = 0;
496 }
497 if ( $rOpts->{'outfile'} ) {
498 warn "Ignoring -b; you may not use -b and -o together\n";
499 $in_place_modify = 0;
500 }
501 if ( defined( $rOpts->{'output-path'} ) ) {
502 warn "Ignoring -b; you may not use -b and -opath together\n";
503 $in_place_modify = 0;
504 }
505 }
506
507 Perl::Tidy::Formatter::check_options($rOpts);
508 if ( $rOpts->{'format'} eq 'html' ) {
509 Perl::Tidy::HtmlWriter->check_options($rOpts);
510 }
511
512 # make the pattern of file extensions that we shouldn't touch
513 my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
514 if ($output_extension) {
515 $_ = quotemeta($output_extension);
516 $forbidden_file_extensions .= "|$_";
517 }
518 if ( $in_place_modify && $backup_extension ) {
519 $_ = quotemeta($backup_extension);
520 $forbidden_file_extensions .= "|$_";
521 }
522 $forbidden_file_extensions .= ')$';
523
524 # Create a diagnostics object if requested;
525 # This is only useful for code development
526 my $diagnostics_object = undef;
527 if ( $rOpts->{'DIAGNOSTICS'} ) {
528 $diagnostics_object = Perl::Tidy::Diagnostics->new();
529 }
530
531 # no filenames should be given if input is from an array
532 if ($source_stream) {
533 if ( @ARGV > 0 ) {
534 die
535"You may not specify any filenames when a source array is given\n";
536 }
537
538 # we'll stuff the source array into ARGV
539 unshift( @ARGV, $source_stream );
540
541 # No special treatment for source stream which is a filename.
542 # This will enable checks for binary files and other bad stuff.
543 $source_stream = undef unless ref($source_stream);
544 }
545
546 # use stdin by default if no source array and no args
547 else {
548 unshift( @ARGV, '-' ) unless @ARGV;
549 }
550
551 # loop to process all files in argument list
552 my $number_of_files = @ARGV;
553 my $formatter = undef;
554 $tokenizer = undef;
555 while ( $input_file = shift @ARGV ) {
556 my $fileroot;
557 my $input_file_permissions;
558
559 #---------------------------------------------------------------
560 # determine the input file name
561 #---------------------------------------------------------------
562 if ($source_stream) {
563 $fileroot = "perltidy";
564 }
565 elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
566 $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
567 $in_place_modify = 0;
568 }
569 else {
570 $fileroot = $input_file;
571 unless ( -e $input_file ) {
572
573 # file doesn't exist - check for a file glob
574 if ( $input_file =~ /([\?\*\[\{])/ ) {
575
576 # Windows shell may not remove quotes, so do it
577 my $input_file = $input_file;
578 if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
579 if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
580 my $pattern = fileglob_to_re($input_file);
581 eval "/$pattern/";
582 if ( !$@ && opendir( DIR, './' ) ) {
583 my @files =
584 grep { /$pattern/ && !-d $_ } readdir(DIR);
585 closedir(DIR);
586 if (@files) {
587 unshift @ARGV, @files;
588 next;
589 }
590 }
591 }
592 print "skipping file: '$input_file': no matches found\n";
593 next;
594 }
595
596 unless ( -f $input_file ) {
597 print "skipping file: $input_file: not a regular file\n";
598 next;
599 }
600
601 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
602 print
603"skipping file: $input_file: Non-text (override with -f)\n";
604 next;
605 }
606
607 # we should have a valid filename now
608 $fileroot = $input_file;
609 $input_file_permissions = ( stat $input_file )[2] & 07777;
610
611 if ( $^O eq 'VMS' ) {
612 ( $fileroot, $dot ) = check_vms_filename($fileroot);
613 }
614
615 # add option to change path here
616 if ( defined( $rOpts->{'output-path'} ) ) {
617
618 my ( $base, $old_path ) = fileparse($fileroot);
619 my $new_path = $rOpts->{'output-path'};
620 unless ( -d $new_path ) {
621 unless ( mkdir $new_path, 0777 ) {
622 die "unable to create directory $new_path: $!\n";
623 }
624 }
625 my $path = $new_path;
626 $fileroot = catfile( $path, $base );
627 unless ($fileroot) {
628 die <<EOM;
629------------------------------------------------------------------------
630Problem combining $new_path and $base to make a filename; check -opath
631------------------------------------------------------------------------
632EOM
633 }
634 }
635 }
636
637 # Skip files with same extension as the output files because
638 # this can lead to a messy situation with files like
639 # script.tdy.tdy.tdy ... or worse problems ... when you
640 # rerun perltidy over and over with wildcard input.
641 if (
642 !$source_stream
643 && ( $input_file =~ /$forbidden_file_extensions/o
644 || $input_file eq 'DIAGNOSTICS' )
645 )
646 {
647 print "skipping file: $input_file: wrong extension\n";
648 next;
649 }
650
651 # the 'source_object' supplies a method to read the input file
652 my $source_object =
653 Perl::Tidy::LineSource->new( $input_file, $rOpts,
654 $rpending_logfile_message );
655 next unless ($source_object);
656
657 # register this file name with the Diagnostics package
658 $diagnostics_object->set_input_file($input_file)
659 if $diagnostics_object;
660
661 #---------------------------------------------------------------
662 # determine the output file name
663 #---------------------------------------------------------------
664 my $output_file = undef;
665 my $actual_output_extension;
666
667 if ( $rOpts->{'outfile'} ) {
668
669 if ( $number_of_files <= 1 ) {
670
671 if ( $rOpts->{'standard-output'} ) {
672 die "You may not use -o and -st together\n";
673 }
674 elsif ($destination_stream) {
675 die
676"You may not specify a destination array and -o together\n";
677 }
678 elsif ( defined( $rOpts->{'output-path'} ) ) {
679 die "You may not specify -o and -opath together\n";
680 }
681 elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
682 die "You may not specify -o and -oext together\n";
683 }
684 $output_file = $rOpts->{outfile};
685
686 # make sure user gives a file name after -o
687 if ( $output_file =~ /^-/ ) {
688 die "You must specify a valid filename after -o\n";
689 }
690
691 # do not overwrite input file with -o
692 if ( defined($input_file_permissions)
693 && ( $output_file eq $input_file ) )
694 {
695 die
696 "Use 'perltidy -b $input_file' to modify in-place\n";
697 }
698 }
699 else {
700 die "You may not use -o with more than one input file\n";
701 }
702 }
703 elsif ( $rOpts->{'standard-output'} ) {
704 if ($destination_stream) {
705 die
706"You may not specify a destination array and -st together\n";
707 }
708 $output_file = '-';
709
710 if ( $number_of_files <= 1 ) {
711 }
712 else {
713 die "You may not use -st with more than one input file\n";
714 }
715 }
716 elsif ($destination_stream) {
717 $output_file = $destination_stream;
718 }
719 elsif ($source_stream) { # source but no destination goes to stdout
720 $output_file = '-';
721 }
722 elsif ( $input_file eq '-' ) {
723 $output_file = '-';
724 }
725 else {
726 if ($in_place_modify) {
727 $output_file = IO::File->new_tmpfile()
728 or die "cannot open temp file for -b option: $!\n";
729 }
730 else {
731 $actual_output_extension = $output_extension;
732 $output_file = $fileroot . $output_extension;
733 }
734 }
735
736 # the 'sink_object' knows how to write the output file
737 my $tee_file = $fileroot . $dot . "TEE";
738
739 my $line_separator = $rOpts->{'output-line-ending'};
740 if ( $rOpts->{'preserve-line-endings'} ) {
741 $line_separator = find_input_line_ending($input_file);
742 }
743 $line_separator = "\n" unless defined($line_separator);
744
745 my $sink_object =
746 Perl::Tidy::LineSink->new( $output_file, $tee_file,
747 $line_separator, $rOpts, $rpending_logfile_message );
748
749 #---------------------------------------------------------------
750 # initialize the error logger
751 #---------------------------------------------------------------
752 my $warning_file = $fileroot . $dot . "ERR";
753 if ($errorfile_stream) { $warning_file = $errorfile_stream }
754 my $log_file = $fileroot . $dot . "LOG";
755 if ($logfile_stream) { $log_file = $logfile_stream }
756
757 my $logger_object =
758 Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
759 $saw_extrude );
760 write_logfile_header(
761 $rOpts, $logger_object, $config_file,
762 $rraw_options, $Windows_type
763 );
764 if ($$rpending_logfile_message) {
765 $logger_object->write_logfile_entry($$rpending_logfile_message);
766 }
767 if ($$rpending_complaint) {
768 $logger_object->complain($$rpending_complaint);
769 }
770
771 #---------------------------------------------------------------
772 # initialize the debug object, if any
773 #---------------------------------------------------------------
774 my $debugger_object = undef;
775 if ( $rOpts->{DEBUG} ) {
776 $debugger_object =
777 Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
778 }
779
780 #---------------------------------------------------------------
781 # create a formatter for this file : html writer or pretty printer
782 #---------------------------------------------------------------
783
784 # we have to delete any old formatter because, for safety,
785 # the formatter will check to see that there is only one.
786 $formatter = undef;
787
788 if ($user_formatter) {
789 $formatter = $user_formatter;
790 }
791 elsif ( $rOpts->{'format'} eq 'html' ) {
792 $formatter =
793 Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
794 $actual_output_extension, $html_toc_extension,
795 $html_src_extension );
796 }
797 elsif ( $rOpts->{'format'} eq 'tidy' ) {
798 $formatter = Perl::Tidy::Formatter->new(
799 logger_object => $logger_object,
800 diagnostics_object => $diagnostics_object,
801 sink_object => $sink_object,
802 );
803 }
804 else {
805 die "I don't know how to do -format=$rOpts->{'format'}\n";
806 }
807
808 unless ($formatter) {
809 die "Unable to continue with $rOpts->{'format'} formatting\n";
810 }
811
812 #---------------------------------------------------------------
813 # create the tokenizer for this file
814 #---------------------------------------------------------------
815 $tokenizer = undef; # must destroy old tokenizer
816 $tokenizer = Perl::Tidy::Tokenizer->new(
817 source_object => $source_object,
818 logger_object => $logger_object,
819 debugger_object => $debugger_object,
820 diagnostics_object => $diagnostics_object,
821 starting_level => $rOpts->{'starting-indentation-level'},
822 tabs => $rOpts->{'tabs'},
823 indent_columns => $rOpts->{'indent-columns'},
824 look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
825 look_for_autoloader => $rOpts->{'look-for-autoloader'},
826 look_for_selfloader => $rOpts->{'look-for-selfloader'},
827 trim_qw => $rOpts->{'trim-qw'},
828 );
829
830 #---------------------------------------------------------------
831 # now we can do it
832 #---------------------------------------------------------------
833 process_this_file( $tokenizer, $formatter );
834
835 #---------------------------------------------------------------
836 # close the input source and report errors
837 #---------------------------------------------------------------
838 $source_object->close_input_file();
839
840 # get file names to use for syntax check
841 my $ifname = $source_object->get_input_file_copy_name();
842 my $ofname = $sink_object->get_output_file_copy();
843
844 #---------------------------------------------------------------
845 # handle the -b option (backup and modify in-place)
846 #---------------------------------------------------------------
847 if ($in_place_modify) {
848 unless ( -f $input_file ) {
849
850 # oh, oh, no real file to backup ..
851 # shouldn't happen because of numerous preliminary checks
852 die print
853"problem with -b backing up input file '$input_file': not a file\n";
854 }
855 my $backup_name = $input_file . $backup_extension;
856 if ( -f $backup_name ) {
857 unlink($backup_name)
858 or die
859"unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
860 }
861 rename( $input_file, $backup_name )
862 or die
863"problem renaming $input_file to $backup_name for -b option: $!\n";
864 $ifname = $backup_name;
865
866 seek( $output_file, 0, 0 )
867 or die "unable to rewind tmp file for -b option: $!\n";
868
869 my $fout = IO::File->new("> $input_file")
870 or die
871"problem opening $input_file for write for -b option; check directory permissions: $!\n";
872 my $line;
873 while ( $line = $output_file->getline() ) {
874 $fout->print($line);
875 }
876 $fout->close();
877 $output_file = $input_file;
878 $ofname = $input_file;
879 }
880
881 #---------------------------------------------------------------
882 # clean up and report errors
883 #---------------------------------------------------------------
884 $sink_object->close_output_file() if $sink_object;
885 $debugger_object->close_debug_file() if $debugger_object;
886
887 my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes
888 if ($output_file) {
889
890 if ($input_file_permissions) {
891
892 # give output script same permissions as input script, but
893 # make it user-writable or else we can't run perltidy again.
894 # Thus we retain whatever executable flags were set.
895 if ( $rOpts->{'format'} eq 'tidy' ) {
896 chmod( $input_file_permissions | 0600, $output_file );
897 }
898
899 # else use default permissions for html and any other format
900
901 }
902 if ( $logger_object && $rOpts->{'check-syntax'} ) {
903 $infile_syntax_ok =
904 check_syntax( $ifname, $ofname, $logger_object, $rOpts );
905 }
906 }
907
908 $logger_object->finish( $infile_syntax_ok, $formatter )
909 if $logger_object;
910 } # end of loop to process all files
911 } # end of main program
912}
913
914sub fileglob_to_re {
915
916 # modified (corrected) from version in find2perl
917 my $x = shift;
918 $x =~ s#([./^\$()])#\\$1#g; # escape special characters
919 $x =~ s#\*#.*#g; # '*' -> '.*'
920 $x =~ s#\?#.#g; # '?' -> '.'
921 "^$x\\z"; # match whole word
922}
923
924sub make_extension {
925
926 # Make a file extension, including any leading '.' if necessary
927 # The '.' may actually be an '_' under VMS
928 my ( $extension, $default, $dot ) = @_;
929
930 # Use the default if none specified
931 $extension = $default unless ($extension);
932
933 # Only extensions with these leading characters get a '.'
934 # This rule gives the user some freedom
935 if ( $extension =~ /^[a-zA-Z0-9]/ ) {
936 $extension = $dot . $extension;
937 }
938 return $extension;
939}
940
941sub write_logfile_header {
942 my ( $rOpts, $logger_object, $config_file, $rraw_options, $Windows_type ) =
943 @_;
944 $logger_object->write_logfile_entry(
945"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
946 );
947 if ($Windows_type) {
948 $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
949 }
950 my $options_string = join( ' ', @$rraw_options );
951
952 if ($config_file) {
953 $logger_object->write_logfile_entry(
954 "Found Configuration File >>> $config_file \n");
955 }
956 $logger_object->write_logfile_entry(
957 "Configuration and command line parameters for this run:\n");
958 $logger_object->write_logfile_entry("$options_string\n");
959
960 if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
961 $rOpts->{'logfile'} = 1; # force logfile to be saved
962 $logger_object->write_logfile_entry(
963 "Final parameter set for this run\n");
964 $logger_object->write_logfile_entry(
965 "------------------------------------\n");
966
967 foreach ( keys %{$rOpts} ) {
968 $logger_object->write_logfile_entry( '--' . "$_=$rOpts->{$_}\n" );
969 }
970 $logger_object->write_logfile_entry(
971 "------------------------------------\n");
972 }
973 $logger_object->write_logfile_entry(
974 "To find error messages search for 'WARNING' with your editor\n");
975}
976
977sub process_command_line {
978
979 my ( $perltidyrc_stream, $is_Windows, $Windows_type, $rpending_complaint ) =
980 @_;
981
982 use Getopt::Long;
983
984 ######################################################################
985 # Note: a few options are not documented in the man page and usage
986 # message. This is because these are experimental or debug options and
987 # may or may not be retained in future versions.
988 #
989 # Here are the undocumented flags as far as I know. Any of them
990 # may disappear at any time. They are mainly for fine-tuning
991 # and debugging.
992 #
993 # fll --> fuzzy-line-length # a trivial parameter which gets
994 # turned off for the extrude option
995 # which is mainly for debugging
996 # chk --> check-multiline-quotes # check for old bug; to be deleted
997 # scl --> short-concatenation-item-length # helps break at '.'
998 # recombine # for debugging line breaks
999 # I --> DIAGNOSTICS # for debugging
1000 ######################################################################
1001
1002 # here is a summary of the Getopt codes:
1003 # <none> does not take an argument
1004 # =s takes a mandatory string
1005 # :s takes an optional string (DO NOT USE - filenames will get eaten up)
1006 # =i takes a mandatory integer
1007 # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1008 # ! does not take an argument and may be negated
1009 # i.e., -foo and -nofoo are allowed
1010 # a double dash signals the end of the options list
1011 #
1012 #---------------------------------------------------------------
1013 # Define the option string passed to GetOptions.
1014 #---------------------------------------------------------------
1015
1016 my @option_string = ();
1017 my %expansion = ();
1018 my $rexpansion = \%expansion;
1019
1020 # These options are parsed directly by perltidy:
1021 # help h
1022 # version v
1023 # However, they are included in the option set so that they will
1024 # be seen in the options dump.
1025
1026 # These long option names have no abbreviations or are treated specially
1027 @option_string = qw(
1028 html!
1029 noprofile
1030 no-profile
1031 npro
1032 recombine!
1033 );
1034
1035 # routine to install and check options
1036 my $add_option = sub {
1037 my ( $long_name, $short_name, $flag ) = @_;
1038 push @option_string, $long_name . $flag;
1039 if ($short_name) {
1040 if ( $expansion{$short_name} ) {
1041 my $existing_name = $expansion{$short_name}[0];
1042 die
1043"redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1044 }
1045 $expansion{$short_name} = [$long_name];
1046 if ( $flag eq '!' ) {
1047 my $nshort_name = 'n' . $short_name;
1048 my $nolong_name = 'no' . $long_name;
1049 if ( $expansion{$nshort_name} ) {
1050 my $existing_name = $expansion{$nshort_name}[0];
1051 die
1052"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1053 }
1054 $expansion{$nshort_name} = [$nolong_name];
1055 }
1056 }
1057 };
1058
1059 # Install long option names which have a simple abbreviation.
1060 # Options with code '!' get standard negation ('no' for long names,
1061 # 'n' for abbreviations)
1062 $add_option->( 'DEBUG', 'D', '!' );
1063 $add_option->( 'DIAGNOSTICS', 'I', '!' );
1064 $add_option->( 'add-newlines', 'anl', '!' );
1065 $add_option->( 'add-semicolons', 'asc', '!' );
1066 $add_option->( 'add-whitespace', 'aws', '!' );
1067 $add_option->( 'backup-and-modify-in-place', 'b', '!' );
1068 $add_option->( 'backup-file-extension', 'bext', '=s' );
1069 $add_option->( 'blanks-before-blocks', 'bbb', '!' );
1070 $add_option->( 'blanks-before-comments', 'bbc', '!' );
1071 $add_option->( 'blanks-before-subs', 'bbs', '!' );
1072 $add_option->( 'block-brace-tightness', 'bbt', '=i' );
1073 $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
1074 $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
1075 $add_option->( 'brace-left-and-indent', 'bli', '!' );
1076 $add_option->( 'brace-left-and-indent-list', 'blil', '=s' );
1077 $add_option->( 'brace-tightness', 'bt', '=i' );
1078 $add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
1079 $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
1080 $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1081 $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1082 $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1083 $add_option->( 'break-at-old-trinary-breakpoints', 'bot', '!' );
1084 $add_option->( 'check-multiline-quotes', 'chk', '!' );
1085 $add_option->( 'check-syntax', 'syn', '!' );
1086 $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' );
1087 $add_option->( 'closing-side-comment-interval', 'csci', '=i' );
1088 $add_option->( 'closing-side-comment-list', 'cscl', '=s' );
1089 $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1090 $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );
1091 $add_option->( 'closing-side-comment-warnings', 'cscw', '!' );
1092 $add_option->( 'closing-side-comments', 'csc', '!' );
1093 $add_option->( 'closing-token-indentation', 'cti', '=i' );
1094 $add_option->( 'closing-paren-indentation', 'cpi', '=i' );
1095 $add_option->( 'closing-brace-indentation', 'cbi', '=i' );
1096 $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1097 $add_option->( 'continuation-indentation', 'ci', '=i' );
1098 $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' );
1099 $add_option->( 'cuddled-else', 'ce', '!' );
1100 $add_option->( 'delete-block-comments', 'dbc', '!' );
1101 $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1102 $add_option->( 'delete-old-newlines', 'dnl', '!' );
1103 $add_option->( 'delete-old-whitespace', 'dws', '!' );
1104 $add_option->( 'delete-pod', 'dp', '!' );
1105 $add_option->( 'delete-semicolons', 'dsm', '!' );
1106 $add_option->( 'delete-side-comments', 'dsc', '!' );
1107 $add_option->( 'dump-defaults', 'ddf', '!' );
1108 $add_option->( 'dump-long-names', 'dln', '!' );
1109 $add_option->( 'dump-options', 'dop', '!' );
1110 $add_option->( 'dump-profile', 'dpro', '!' );
1111 $add_option->( 'dump-short-names', 'dsn', '!' );
1112 $add_option->( 'dump-token-types', 'dtt', '!' );
1113 $add_option->( 'dump-want-left-space', 'dwls', '!' );
1114 $add_option->( 'dump-want-right-space', 'dwrs', '!' );
1115 $add_option->( 'entab-leading-whitespace', 'et', '=i' );
1116 $add_option->( 'force-read-binary', 'f', '!' );
1117 $add_option->( 'format', 'fmt', '=s' );
1118 $add_option->( 'fuzzy-line-length', 'fll', '!' );
1119 $add_option->( 'hanging-side-comments', 'hsc', '!' );
1120 $add_option->( 'help', 'h', '' );
1121 $add_option->( 'ignore-old-line-breaks', 'iob', '!' );
1122 $add_option->( 'indent-block-comments', 'ibc', '!' );
1123 $add_option->( 'indent-closing-brace', 'icb', '!' );
1124 $add_option->( 'indent-columns', 'i', '=i' );
1125 $add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
1126 $add_option->( 'line-up-parentheses', 'lp', '!' );
1127 $add_option->( 'logfile', 'log', '!' );
1128 $add_option->( 'logfile-gap', 'g', ':i' );
1129 $add_option->( 'long-block-line-count', 'lbl', '=i' );
1130 $add_option->( 'look-for-autoloader', 'lal', '!' );
1131 $add_option->( 'look-for-hash-bang', 'x', '!' );
1132 $add_option->( 'look-for-selfloader', 'lsl', '!' );
1133 $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1134 $add_option->( 'maximum-fields-per-table', 'mft', '=i' );
1135 $add_option->( 'maximum-line-length', 'l', '=i' );
1136 $add_option->( 'minimum-space-to-comment', 'msc', '=i' );
1137 $add_option->( 'nowant-left-space', 'nwls', '=s' );
1138 $add_option->( 'nowant-right-space', 'nwrs', '=s' );
1139 $add_option->( 'nospace-after-keyword', 'nsak', '=s' );
1140 $add_option->( 'opening-brace-always-on-right', 'bar', '' );
1141 $add_option->( 'opening-brace-on-new-line', 'bl', '!' );
1142 $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
1143 $add_option->( 'outdent-keyword-list', 'okwl', '=s' );
1144 $add_option->( 'outdent-keywords', 'okw', '!' );
1145 $add_option->( 'outdent-labels', 'ola', '!' );
1146 $add_option->( 'outdent-long-comments', 'olc', '!' );
1147 $add_option->( 'outdent-long-quotes', 'olq', '!' );
1148 $add_option->( 'outdent-static-block-comments', 'osbc', '!' );
1149 $add_option->( 'outfile', 'o', '=s' );
1150 $add_option->( 'output-file-extension', 'oext', '=s' );
1151 $add_option->( 'output-line-ending', 'ole', '=s' );
1152 $add_option->( 'output-path', 'opath', '=s' );
1153 $add_option->( 'paren-tightness', 'pt', '=i' );
1154 $add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
1155 $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
1156 $add_option->( 'pass-version-line', 'pvl', '!' );
1157 $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
1158 $add_option->( 'preserve-line-endings', 'ple', '!' );
1159 $add_option->( 'profile', 'pro', '=s' );
1160 $add_option->( 'quiet', 'q', '!' );
1161 $add_option->( 'short-concatenation-item-length', 'scl', '=i' );
1162 $add_option->( 'show-options', 'opt', '!' );
1163 $add_option->( 'space-after-keyword', 'sak', '=s' );
1164 $add_option->( 'space-for-semicolon', 'sfs', '!' );
1165 $add_option->( 'space-terminal-semicolon', 'sts', '!' );
1166 $add_option->( 'square-bracket-tightness', 'sbt', '=i' );
1167 $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' );
1168 $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1169 $add_option->( 'standard-error-output', 'se', '!' );
1170 $add_option->( 'standard-output', 'st', '!' );
1171 $add_option->( 'starting-indentation-level', 'sil', '=i' );
1172 $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' );
1173 $add_option->( 'static-block-comments', 'sbc', '!' );
1174 $add_option->( 'static-side-comment-prefix', 'sscp', '=s' );
1175 $add_option->( 'static-side-comments', 'ssc', '!' );
1176 $add_option->( 'swallow-optional-blank-lines', 'sob', '!' );
1177 $add_option->( 'tabs', 't', '!' );
1178 $add_option->( 'tee-block-comments', 'tbc', '!' );
1179 $add_option->( 'tee-pod', 'tp', '!' );
1180 $add_option->( 'tee-side-comments', 'tsc', '!' );
1181 $add_option->( 'trim-qw', 'tqw', '!' );
1182 $add_option->( 'version', 'v', '' );
1183 $add_option->( 'vertical-tightness', 'vt', '=i' );
1184 $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
1185 $add_option->( 'want-break-after', 'wba', '=s' );
1186 $add_option->( 'want-break-before', 'wbb', '=s' );
1187 $add_option->( 'want-left-space', 'wls', '=s' );
1188 $add_option->( 'want-right-space', 'wrs', '=s' );
1189 $add_option->( 'warning-output', 'w', '!' );
1190
1191 # The Perl::Tidy::HtmlWriter will add its own options to the string
1192 Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1193
1194 #---------------------------------------------------------------
1195 # Assign default values to the above options here, except
1196 # for 'outfile' and 'help'.
1197 # These settings should approximate the perlstyle(1) suggestions.
1198 #---------------------------------------------------------------
1199 my @defaults = qw(
1200 add-newlines
1201 add-semicolons
1202 add-whitespace
1203 blanks-before-blocks
1204 blanks-before-comments
1205 blanks-before-subs
1206 block-brace-tightness=0
1207 block-brace-vertical-tightness=0
1208 brace-tightness=1
1209 brace-vertical-tightness-closing=0
1210 brace-vertical-tightness=0
1211 break-at-old-logical-breakpoints
1212 break-at-old-trinary-breakpoints
1213 break-at-old-keyword-breakpoints
1214 comma-arrow-breakpoints=1
1215 nocheck-syntax
1216 closing-side-comment-interval=6
1217 closing-side-comment-maximum-text=20
1218 closing-side-comment-else-flag=0
1219 closing-paren-indentation=0
1220 closing-brace-indentation=0
1221 closing-square-bracket-indentation=0
1222 continuation-indentation=2
1223 delete-old-newlines
1224 delete-semicolons
1225 fuzzy-line-length
1226 hanging-side-comments
1227 indent-block-comments
1228 indent-columns=4
1229 long-block-line-count=8
1230 look-for-autoloader
1231 look-for-selfloader
1232 maximum-consecutive-blank-lines=1
1233 maximum-fields-per-table=0
1234 maximum-line-length=80
1235 minimum-space-to-comment=4
1236 nobrace-left-and-indent
1237 nocuddled-else
1238 nodelete-old-whitespace
1239 nohtml
1240 nologfile
1241 noquiet
1242 noshow-options
1243 nostatic-side-comments
1244 noswallow-optional-blank-lines
1245 notabs
1246 nowarning-output
1247 outdent-labels
1248 outdent-long-quotes
1249 outdent-long-comments
1250 paren-tightness=1
1251 paren-vertical-tightness-closing=0
1252 paren-vertical-tightness=0
1253 pass-version-line
1254 recombine
1255 short-concatenation-item-length=8
1256 space-for-semicolon
1257 square-bracket-tightness=1
1258 square-bracket-vertical-tightness-closing=0
1259 square-bracket-vertical-tightness=0
1260 static-block-comments
1261 trim-qw
1262 format=tidy
1263 backup-file-extension=bak
1264
1265 pod2html
1266 html-table-of-contents
1267 html-entities
1268 );
1269
1270 push @defaults, "perl-syntax-check-flags=-c -T";
1271
1272 #---------------------------------------------------------------
1273 # set the defaults by passing the above list through GetOptions
1274 #---------------------------------------------------------------
1275 my %Opts = ();
1276 {
1277 local @ARGV;
1278 my $i;
1279
1280 for $i (@defaults) { push @ARGV, "--" . $i }
1281
1282 if ( !GetOptions( \%Opts, @option_string ) ) {
1283 die "Programming Bug: error in setting default options";
1284 }
1285 }
1286
1287 #---------------------------------------------------------------
1288 # Define abbreviations which will be expanded into the above primitives.
1289 # These may be defined recursively.
1290 #---------------------------------------------------------------
1291 %expansion = (
1292 %expansion,
1293 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
1294 'fnl' => [qw(freeze-newlines)],
1295 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1296 'fws' => [qw(freeze-whitespace)],
1297 'indent-only' => [qw(freeze-newlines freeze-whitespace)],
1298 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1299 'nooutdent-long-lines' =>
1300 [qw(nooutdent-long-quotes nooutdent-long-comments)],
1301 'noll' => [qw(nooutdent-long-lines)],
1302 'io' => [qw(indent-only)],
1303 'delete-all-comments' =>
1304 [qw(delete-block-comments delete-side-comments delete-pod)],
1305 'nodelete-all-comments' =>
1306 [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1307 'dac' => [qw(delete-all-comments)],
1308 'ndac' => [qw(nodelete-all-comments)],
1309 'gnu' => [qw(gnu-style)],
1310 'tee-all-comments' =>
1311 [qw(tee-block-comments tee-side-comments tee-pod)],
1312 'notee-all-comments' =>
1313 [qw(notee-block-comments notee-side-comments notee-pod)],
1314 'tac' => [qw(tee-all-comments)],
1315 'ntac' => [qw(notee-all-comments)],
1316 'html' => [qw(format=html)],
1317 'nhtml' => [qw(format=tidy)],
1318 'tidy' => [qw(format=tidy)],
1319
1320 'break-after-comma-arrows' => [qw(cab=0)],
1321 'nobreak-after-comma-arrows' => [qw(cab=1)],
1322 'baa' => [qw(cab=0)],
1323 'nbaa' => [qw(cab=1)],
1324
1325 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1326 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1327 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1328 'icp' => [qw(cpi=2 cbi=2 csbi=2)],
1329 'nicp' => [qw(cpi=0 cbi=0 csbi=0)],
1330
1331 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1332 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1333 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1334 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)],
1335 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)],
1336
1337 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1338 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1339 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1340
1341 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1342 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1343 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1344
1345 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1346 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1347 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1348
1349 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1350 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1351 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1352
1353 # 'mangle' originally deleted pod and comments, but to keep it
1354 # reversible, it no longer does. But if you really want to
1355 # delete them, just use:
1356 # -mangle -dac
1357
1358 # An interesting use for 'mangle' is to do this:
1359 # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
1360 # which will form as many one-line blocks as possible
1361
1362 'mangle' => [
1363 qw(
1364 check-syntax
1365 delete-old-newlines
1366 delete-old-whitespace
1367 delete-semicolons
1368 indent-columns=0
1369 maximum-consecutive-blank-lines=0
1370 maximum-line-length=100000
1371 noadd-newlines
1372 noadd-semicolons
1373 noadd-whitespace
1374 noblanks-before-blocks
1375 noblanks-before-subs
1376 notabs
1377 )
1378 ],
1379
1380 # 'extrude' originally deleted pod and comments, but to keep it
1381 # reversible, it no longer does. But if you really want to
1382 # delete them, just use
1383 # extrude -dac
1384 #
1385 # An interesting use for 'extrude' is to do this:
1386 # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
1387 # which will break up all one-line blocks.
1388
1389 'extrude' => [
1390 qw(
1391 check-syntax
1392 ci=0
1393 delete-old-newlines
1394 delete-old-whitespace
1395 delete-semicolons
1396 indent-columns=0
1397 maximum-consecutive-blank-lines=0
1398 maximum-line-length=1
1399 noadd-semicolons
1400 noadd-whitespace
1401 noblanks-before-blocks
1402 noblanks-before-subs
1403 nofuzzy-line-length
1404 notabs
1405 )
1406 ],
1407
1408 # this style tries to follow the GNU Coding Standards (which do
1409 # not really apply to perl but which are followed by some perl
1410 # programmers).
1411 'gnu-style' => [
1412 qw(
1413 lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
1414 )
1415 ],
1416
1417 # Additional styles can be added here
1418 );
1419
1420 Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
1421
1422 # Uncomment next line to dump all expansions for debugging:
1423 # dump_short_names(\%expansion);
1424
1425 my $word;
1426 my @raw_options = ();
1427 my $config_file = "";
1428 my $saw_ignore_profile = 0;
1429 my $saw_extrude = 0;
1430 my $saw_dump_profile = 0;
1431 my $i;
1432
1433 #---------------------------------------------------------------
1434 # Take a first look at the command-line parameters. Do as many
1435 # immediate dumps as possible, which can avoid confusion if the
1436 # perltidyrc file has an error.
1437 #---------------------------------------------------------------
1438 foreach $i (@ARGV) {
1439
1440 $i =~ s/^--/-/;
1441 if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
1442 $saw_ignore_profile = 1;
1443 }
1444
1445 # note: this must come before -pro and -profile, below:
1446 elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
1447 $saw_dump_profile = 1;
1448 }
1449 elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
1450 if ($config_file) {
1451 warn
1452"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
1453 }
1454 $config_file = $2;
1455 unless ( -e $config_file ) {
1456 warn "cannot find file given with -pro=$config_file: $!\n";
1457 $config_file = "";
1458 }
1459 }
1460 elsif ( $i =~ /^-(pro|profile)=?$/ ) {
1461 die "usage: -pro=filename or --profile=filename, no spaces\n";
1462 }
1463 elsif ( $i =~ /^-extrude$/ ) {
1464 $saw_extrude = 1;
1465 }
1466 elsif ( $i =~ /^-(help|h|HELP|H)$/ ) {
1467 usage();
1468 exit 1;
1469 }
1470 elsif ( $i =~ /^-(version|v)$/ ) {
1471 show_version();
1472 exit 1;
1473 }
1474 elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
1475 dump_defaults(@defaults);
1476 exit 1;
1477 }
1478 elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
1479 dump_long_names(@option_string);
1480 exit 1;
1481 }
1482 elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
1483 dump_short_names( \%expansion );
1484 exit 1;
1485 }
1486 elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
1487 Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
1488 exit 1;
1489 }
1490 }
1491
1492 if ( $saw_dump_profile && $saw_ignore_profile ) {
1493 warn "No profile to dump because of -npro\n";
1494 exit 1;
1495 }
1496
1497 #---------------------------------------------------------------
1498 # read any .perltidyrc configuration file
1499 #---------------------------------------------------------------
1500 unless ($saw_ignore_profile) {
1501
1502 # resolve possible conflict between $perltidyrc_stream passed
1503 # as call parameter to perltidy and -pro=filename on command
1504 # line.
1505 if ($perltidyrc_stream) {
1506 if ($config_file) {
1507 warn <<EOM;
1508 Conflict: a perltidyrc configuration file was specified both as this
1509 perltidy call parameter: $perltidyrc_stream
1510 and with this -profile=$config_file.
1511 Using -profile=$config_file.
1512EOM
1513 }
1514 else {
1515 $config_file = $perltidyrc_stream;
1516 }
1517 }
1518
1519 # look for a config file if we don't have one yet
1520 my $rconfig_file_chatter;
1521 $$rconfig_file_chatter = "";
1522 $config_file =
1523 find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
1524 $rpending_complaint )
1525 unless $config_file;
1526
1527 # open any config file
1528 my $fh_config;
1529 if ($config_file) {
1530 ( $fh_config, $config_file ) =
1531 Perl::Tidy::streamhandle( $config_file, 'r' );
1532 unless ($fh_config) {
1533 $$rconfig_file_chatter .=
1534 "# $config_file exists but cannot be opened\n";
1535 }
1536 }
1537
1538 if ($saw_dump_profile) {
1539 if ($saw_dump_profile) {
1540 dump_config_file( $fh_config, $config_file,
1541 $rconfig_file_chatter );
1542 exit 1;
1543 }
1544 }
1545
1546 if ($fh_config) {
1547
1548 my $rconfig_list =
1549 read_config_file( $fh_config, $config_file, \%expansion );
1550
1551 # process any .perltidyrc parameters right now so we can
1552 # localize errors
1553 if (@$rconfig_list) {
1554 local @ARGV = @$rconfig_list;
1555
1556 expand_command_abbreviations( \%expansion, \@raw_options,
1557 $config_file );
1558
1559 if ( !GetOptions( \%Opts, @option_string ) ) {
1560 die
1561"Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n";
1562 }
1563
1564 # Undo any options which cause premature exit. They are not
1565 # appropriate for a config file, and it could be hard to
1566 # diagnose the cause of the premature exit.
1567 foreach (
1568 qw{
1569 dump-defaults
1570 dump-long-names
1571 dump-options
1572 dump-profile
1573 dump-short-names
1574 dump-token-types
1575 dump-want-left-space
1576 dump-want-right-space
1577 help
1578 stylesheet
1579 version
1580 }
1581 )
1582 {
1583 if ( defined( $Opts{$_} ) ) {
1584 delete $Opts{$_};
1585 warn "ignoring --$_ in config file: $config_file\n";
1586 }
1587 }
1588 }
1589 }
1590 }
1591
1592 #---------------------------------------------------------------
1593 # now process the command line parameters
1594 #---------------------------------------------------------------
1595 expand_command_abbreviations( \%expansion, \@raw_options, $config_file );
1596
1597 if ( !GetOptions( \%Opts, @option_string ) ) {
1598 die "Error on command line; for help try 'perltidy -h'\n";
1599 }
1600
1601 if ( $Opts{'dump-options'} ) {
1602 dump_options( \%Opts );
1603 exit 1;
1604 }
1605
1606 #---------------------------------------------------------------
1607 # Now we have to handle any interactions among the options..
1608 #---------------------------------------------------------------
1609
1610 # Since -vt, -vtc, and -cti are abbreviations, but under
1611 # msdos, an unquoted input parameter like vtc=1 will be
1612 # seen as 2 parameters, vtc and 1, so the abbreviations
1613 # won't be seen. Therefore, we will catch them here if
1614 # they get through.
1615
1616 if ( defined $Opts{'vertical-tightness'} ) {
1617 my $vt = $Opts{'vertical-tightness'};
1618 $Opts{'paren-vertical-tightness'} = $vt;
1619 $Opts{'square-bracket-vertical-tightness'} = $vt;
1620 $Opts{'brace-vertical-tightness'} = $vt;
1621 }
1622
1623 if ( defined $Opts{'vertical-tightness-closing'} ) {
1624 my $vtc = $Opts{'vertical-tightness-closing'};
1625 $Opts{'paren-vertical-tightness-closing'} = $vtc;
1626 $Opts{'square-bracket-vertical-tightness-closing'} = $vtc;
1627 $Opts{'brace-vertical-tightness-closing'} = $vtc;
1628 }
1629
1630 if ( defined $Opts{'closing-token-indentation'} ) {
1631 my $cti = $Opts{'closing-token-indentation'};
1632 $Opts{'closing-square-bracket-indentation'} = $cti;
1633 $Opts{'closing-brace-indentation'} = $cti;
1634 $Opts{'closing-paren-indentation'} = $cti;
1635 }
1636
1637 # In quiet mode, there is no log file and hence no way to report
1638 # results of syntax check, so don't do it.
1639 if ( $Opts{'quiet'} ) {
1640 $Opts{'check-syntax'} = 0;
1641 }
1642
1643 # can't check syntax if no output
1644 if ( $Opts{'format'} ne 'tidy' ) {
1645 $Opts{'check-syntax'} = 0;
1646 }
1647
1648 # Never let Windows 9x/Me systems run syntax check -- this will prevent a
1649 # wide variety of nasty problems on these systems, because they cannot
1650 # reliably run backticks. Don't even think about changing this!
1651 if ( $Opts{'check-syntax'}
1652 && $is_Windows
1653 && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
1654 {
1655 $Opts{'check-syntax'} = 0;
1656 }
1657
1658 # It's really a bad idea to check syntax as root unless you wrote
1659 # the script yourself. FIXME: not sure if this works with VMS
1660 unless ($is_Windows) {
1661
1662 if ( $< == 0 && $Opts{'check-syntax'} ) {
1663 $Opts{'check-syntax'} = 0;
1664 $$rpending_complaint .=
1665"Syntax check deactivated for safety; you shouldn't run this as root\n";
1666 }
1667 }
1668
1669 # see if user set a non-negative logfile-gap
1670 if ( defined( $Opts{'logfile-gap'} ) && $Opts{'logfile-gap'} >= 0 ) {
1671
1672 # a zero gap will be taken as a 1
1673 if ( $Opts{'logfile-gap'} == 0 ) {
1674 $Opts{'logfile-gap'} = 1;
1675 }
1676
1677 # setting a non-negative logfile gap causes logfile to be saved
1678 $Opts{'logfile'} = 1;
1679 }
1680
1681 # not setting logfile gap, or setting it negative, causes default of 50
1682 else {
1683 $Opts{'logfile-gap'} = 50;
1684 }
1685
1686 # set short-cut flag when only indentation is to be done.
1687 # Note that the user may or may not have already set the
1688 # indent-only flag.
1689 if ( !$Opts{'add-whitespace'}
1690 && !$Opts{'delete-old-whitespace'}
1691 && !$Opts{'add-newlines'}
1692 && !$Opts{'delete-old-newlines'} )
1693 {
1694 $Opts{'indent-only'} = 1;
1695 }
1696
1697 # -isbc implies -ibc
1698 if ( $Opts{'indent-spaced-block-comments'} ) {
1699 $Opts{'indent-block-comments'} = 1;
1700 }
1701
1702 # -bli flag implies -bl
1703 if ( $Opts{'brace-left-and-indent'} ) {
1704 $Opts{'opening-brace-on-new-line'} = 1;
1705 }
1706
1707 if ( $Opts{'opening-brace-always-on-right'}
1708 && $Opts{'opening-brace-on-new-line'} )
1709 {
1710 warn <<EOM;
1711 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
1712 'opening-brace-on-new-line' (-bl). Ignoring -bl.
1713EOM
1714 $Opts{'opening-brace-on-new-line'} = 0;
1715 }
1716
1717 # it simplifies things if -bl is 0 rather than undefined
1718 if ( !defined( $Opts{'opening-brace-on-new-line'} ) ) {
1719 $Opts{'opening-brace-on-new-line'} = 0;
1720 }
1721
1722 # -sbl defaults to -bl if not defined
1723 if ( !defined( $Opts{'opening-sub-brace-on-new-line'} ) ) {
1724 $Opts{'opening-sub-brace-on-new-line'} =
1725 $Opts{'opening-brace-on-new-line'};
1726 }
1727
1728 # set shortcut flag if no blanks to be written
1729 unless ( $Opts{'maximum-consecutive-blank-lines'} ) {
1730 $Opts{'swallow-optional-blank-lines'} = 1;
1731 }
1732
1733 if ( $Opts{'entab-leading-whitespace'} ) {
1734 if ( $Opts{'entab-leading-whitespace'} < 0 ) {
1735 warn "-et=n must use a positive integer; ignoring -et\n";
1736 $Opts{'entab-leading-whitespace'} = undef;
1737 }
1738
1739 # entab leading whitespace has priority over the older 'tabs' option
1740 if ( $Opts{'tabs'} ) { $Opts{'tabs'} = 0; }
1741 }
1742
1743 if ( $Opts{'output-line-ending'} ) {
1744 unless ( is_unix() ) {
1745 warn "ignoring -ole; only works under unix\n";
1746 $Opts{'output-line-ending'} = undef;
1747 }
1748 }
1749 if ( $Opts{'preserve-line-endings'} ) {
1750 unless ( is_unix() ) {
1751 warn "ignoring -ple; only works under unix\n";
1752 $Opts{'preserve-line-endings'} = undef;
1753 }
1754 }
1755
1756 return ( \%Opts, $config_file, \@raw_options, $saw_extrude );
1757
1758} # end of process_command_line
1759
1760sub expand_command_abbreviations {
1761
1762 # go through @ARGV and expand any abbreviations
1763
1764 my ( $rexpansion, $rraw_options, $config_file ) = @_;
1765 my ($word);
1766
1767 # set a pass limit to prevent an infinite loop;
1768 # 10 should be plenty, but it may be increased to allow deeply
1769 # nested expansions.
1770 my $max_passes = 10;
1771 my @new_argv = ();
1772
1773 # keep looping until all expansions have been converted into actual
1774 # dash parameters..
1775 for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
1776 my @new_argv = ();
1777 my $abbrev_count = 0;
1778
1779 # loop over each item in @ARGV..
1780 foreach $word (@ARGV) {
1781
1782 # convert any leading 'no-' to just 'no'
1783 if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
1784
1785 # if it is a dash flag (instead of a file name)..
1786 if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
1787
1788 my $abr = $1;
1789 my $flags = $2;
1790
1791 # save the raw input for debug output in case of circular refs
1792 if ( $pass_count == 0 ) {
1793 push( @$rraw_options, $word );
1794 }
1795
1796 # recombine abbreviation and flag, if necessary,
1797 # to allow abbreviations with arguments such as '-vt=1'
1798 if ( $rexpansion->{ $abr . $flags } ) {
1799 $abr = $abr . $flags;
1800 $flags = "";
1801 }
1802
1803 # if we see this dash item in the expansion hash..
1804 if ( $rexpansion->{$abr} ) {
1805 $abbrev_count++;
1806
1807 # stuff all of the words that it expands to into the
1808 # new arg list for the next pass
1809 foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
1810 next unless $abbrev; # for safety; shouldn't happen
1811 push( @new_argv, '--' . $abbrev . $flags );
1812 }
1813 }
1814
1815 # not in expansion hash, must be actual long name
1816 else {
1817 push( @new_argv, $word );
1818 }
1819 }
1820
1821 # not a dash item, so just save it for the next pass
1822 else {
1823 push( @new_argv, $word );
1824 }
1825 } # end of this pass
1826
1827 # update parameter list @ARGV to the new one
1828 @ARGV = @new_argv;
1829 last unless ( $abbrev_count > 0 );
1830
1831 # make sure we are not in an infinite loop
1832 if ( $pass_count == $max_passes ) {
1833 print STDERR
1834"I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
1835 print STDERR "Here are the raw options\n";
1836 local $" = ')(';
1837 print STDERR "(@$rraw_options)\n";
1838 my $num = @new_argv;
1839
1840 if ( $num < 50 ) {
1841 print STDERR "After $max_passes passes here is ARGV\n";
1842 print STDERR "(@new_argv)\n";
1843 }
1844 else {
1845 print STDERR "After $max_passes passes ARGV has $num entries\n";
1846 }
1847
1848 if ($config_file) {
1849 die <<"DIE";
1850Please check your configuration file $config_file for circular-references.
1851To deactivate it, use -npro.
1852DIE
1853 }
1854 else {
1855 die <<'DIE';
1856Program bug - circular-references in the %expansion hash, probably due to
1857a recent program change.
1858DIE
1859 }
1860 } # end of check for circular references
1861 } # end of loop over all passes
1862}
1863
1864# Debug routine -- this will dump the expansion hash
1865sub dump_short_names {
1866 my $rexpansion = shift;
1867 print STDOUT <<EOM;
1868List of short names. This list shows how all abbreviations are
1869translated into other abbreviations and, eventually, into long names.
1870New abbreviations may be defined in a .perltidyrc file.
1871For a list of all long names, use perltidy --dump-long-names (-dln).
1872--------------------------------------------------------------------------
1873EOM
1874 foreach my $abbrev ( sort keys %$rexpansion ) {
1875 my @list = @{ $$rexpansion{$abbrev} };
1876 print STDOUT "$abbrev --> @list\n";
1877 }
1878}
1879
1880sub check_vms_filename {
1881
1882 # given a valid filename (the perltidy input file)
1883 # create a modified filename and separator character
1884 # suitable for VMS.
1885 #
1886 # Contributed by Michael Cartmell
1887 #
1888 my ( $base, $path ) = fileparse( $_[0] );
1889
1890 # remove explicit ; version
1891 $base =~ s/;-?\d*$//
1892
1893 # remove explicit . version ie two dots in filename NB ^ escapes a dot
1894 or $base =~ s/( # begin capture $1
1895 (?:^|[^^])\. # match a dot not preceded by a caret
1896 (?: # followed by nothing
1897 | # or
1898 .*[^^] # anything ending in a non caret
1899 )
1900 ) # end capture $1
1901 \.-?\d*$ # match . version number
1902 /$1/x;
1903
1904 # normalise filename, if there are no unescaped dots then append one
1905 $base .= '.' unless $base =~ /(?:^|[^^])\./;
1906
1907 # if we don't already have an extension then we just append the extention
1908 my $separator = ( $base =~ /\.$/ ) ? "" : "_";
1909 return ( $path . $base, $separator );
1910}
1911
1912sub Win_OS_Type {
1913
1914 # Returns a string that determines what MS OS we are on.
1915 # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net
1916 # Returns nothing if not an MS system.
1917 # Contributed by: Yves Orton
1918
1919 my $rpending_complaint = shift;
1920 return unless $^O =~ /win32|dos/i; # is it a MS box?
1921
1922 # It _should_ have Win32 unless something is really weird
1923 return unless eval('require Win32');
1924
1925 # Use the standard API call to determine the version
1926 my ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion();
1927
1928 return "win32s" unless $id; # If id==0 then its a win32s box.
1929 my $os = { # Magic numbers from MSDN
1930 # documentation of GetOSVersion
1931 1 => {
1932 0 => "95",
1933 10 => "98",
1934 90 => "Me"
1935 },
1936 2 => {
1937 0 => "2000",
1938 1 => "XP/.Net",
1939 51 => "NT3.51"
1940 }
1941 }->{$id}->{$minor};
1942
1943 # This _really_ shouldnt happen. At least not for quite a while
1944 unless ( defined $os ) {
1945 $$rpending_complaint .= <<EOS;
1946Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
1947We won't be able to look for a system-wide config file.
1948EOS
1949 }
1950
1951 # Unfortunately the logic used for the various versions isnt so clever..
1952 # so we have to handle an outside case.
1953 return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
1954}
1955
1956sub is_unix {
1957 return ( $^O !~ /win32|dos/i )
1958 && ( $^O ne 'VMS' )
1959 && ( $^O ne 'OS2' )
1960 && ( $^O ne 'MacOS' );
1961}
1962
1963sub look_for_Windows {
1964
1965 # determine Windows sub-type and location of
1966 # system-wide configuration files
1967 my $rpending_complaint = shift;
1968 my $is_Windows = ( $^O =~ /win32|dos/i );
1969 my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows;
1970 return ( $is_Windows, $Windows_type );
1971}
1972
1973sub find_config_file {
1974
1975 # look for a .perltidyrc configuration file
1976 my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
1977 $rpending_complaint ) = @_;
1978
1979 $$rconfig_file_chatter .= "# Config file search...system reported as:";
1980 if ($is_Windows) {
1981 $$rconfig_file_chatter .= "Windows $Windows_type\n";
1982 }
1983 else {
1984 $$rconfig_file_chatter .= " $^O\n";
1985 }
1986
1987 # sub to check file existance and record all tests
1988 my $exists_config_file = sub {
1989 my $config_file = shift;
1990 return 0 unless $config_file;
1991 $$rconfig_file_chatter .= "# Testing: $config_file\n";
1992 return -f $config_file;
1993 };
1994
1995 my $config_file;
1996
1997 # look in current directory first
1998 $config_file = ".perltidyrc";
1999 return $config_file if $exists_config_file->($config_file);
2000
2001 # Default environment vars.
2002 my @envs = qw(PERLTIDY HOME);
2003
2004 # Check the NT/2k/XP locations, first a local machine def, then a
2005 # network def
2006 push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2007
2008 # Now go through the enviornment ...
2009 foreach my $var (@envs) {
2010 $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2011 if ( defined( $ENV{$var} ) ) {
2012 $$rconfig_file_chatter .= " = $ENV{$var}\n";
2013
2014 # test ENV{ PERLTIDY } as file:
2015 if ( $var eq 'PERLTIDY' ) {
2016 $config_file = "$ENV{$var}";
2017 return $config_file if $exists_config_file->($config_file);
2018 }
2019
2020 # test ENV as directory:
2021 $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2022 return $config_file if $exists_config_file->($config_file);
2023 }
2024 else {
2025 $$rconfig_file_chatter .= "\n";
2026 }
2027 }
2028
2029 # then look for a system-wide definition
2030 # where to look varies with OS
2031 if ($is_Windows) {
2032
2033 if ($Windows_type) {
2034 my ( $os, $system, $allusers ) =
2035 Win_Config_Locs( $rpending_complaint, $Windows_type );
2036
2037 # Check All Users directory, if there is one.
2038 if ($allusers) {
2039 $config_file = catfile( $allusers, ".perltidyrc" );
2040 return $config_file if $exists_config_file->($config_file);
2041 }
2042
2043 # Check system directory.
2044 $config_file = catfile( $system, ".perltidyrc" );
2045 return $config_file if $exists_config_file->($config_file);
2046 }
2047 }
2048
2049 # Place to add customization code for other systems
2050 elsif ( $^O eq 'OS2' ) {
2051 }
2052 elsif ( $^O eq 'MacOS' ) {
2053 }
2054 elsif ( $^O eq 'VMS' ) {
2055 }
2056
2057 # Assume some kind of Unix
2058 else {
2059
2060 $config_file = "/usr/local/etc/perltidyrc";
2061 return $config_file if $exists_config_file->($config_file);
2062
2063 $config_file = "/etc/perltidyrc";
2064 return $config_file if $exists_config_file->($config_file);
2065 }
2066
2067 # Couldn't find a config file
2068 return;
2069}
2070
2071sub Win_Config_Locs {
2072
2073 # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2074 # or undef if its not a win32 OS. In list context returns OS, System
2075 # Directory, and All Users Directory. All Users will be empty on a
2076 # 9x/Me box. Contributed by: Yves Orton.
2077
2078 my $rpending_complaint = shift;
2079 my $os = (@_) ? shift: Win_OS_Type();
2080 return unless $os;
2081
2082 my $system = "";
2083 my $allusers = "";
2084
2085 if ( $os =~ /9[58]|Me/ ) {
2086 $system = "C:/Windows";
2087 }
2088 elsif ( $os =~ /NT|XP|2000/ ) {
2089 $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
2090 $allusers =
2091 ( $os =~ /NT/ )
2092 ? "C:/WinNT/profiles/All Users/"
2093 : "C:/Documents and Settings/All Users/";
2094 }
2095 else {
2096
2097 # This currently would only happen on a win32s computer.
2098 # I dont have one to test So I am unsure how to proceed.
2099 # Sorry. :-)
2100 $$rpending_complaint .=
2101"I dont know a sensible place to look for config files on an $os system.\n";
2102 return;
2103 }
2104 return wantarray ? ( $os, $system, $allusers ) : $os;
2105}
2106
2107sub dump_config_file {
2108 my $fh = shift;
2109 my $config_file = shift;
2110 my $rconfig_file_chatter = shift;
2111 print STDOUT "$$rconfig_file_chatter";
2112 if ($fh) {
2113 print STDOUT "# Dump of file: '$config_file'\n";
2114 while ( $_ = $fh->getline() ) { print STDOUT }
2115 eval { $fh->close() };
2116 }
2117 else {
2118 print STDOUT "# ...no config file found\n";
2119 }
2120}
2121
2122sub read_config_file {
2123
2124 my ( $fh, $config_file, $rexpansion ) = @_;
2125 my @config_list = ();
2126
2127 my $name = undef;
2128 my $line_no;
2129 while ( $_ = $fh->getline() ) {
2130 $line_no++;
2131 chomp;
2132 next if /^\s*#/; # skip full-line comment
2133 $_ = strip_comment( $_, $config_file, $line_no );
2134 s/^\s*(.*?)\s*$/$1/; # trim both ends
2135 next unless $_;
2136
2137 # look for something of the general form
2138 # newname { body }
2139 # or just
2140 # body
2141
2142 if ( $_ =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
2143 my ( $newname, $body, $curly ) = ( $2, $3, $4 );
2144
2145 # handle a new alias definition
2146 if ($newname) {
2147 if ($name) {
2148 die
2149"No '}' seen after $name and before $newname in config file $config_file line $.\n";
2150 }
2151 $name = $newname;
2152
2153 if ( ${$rexpansion}{$name} ) {
2154 local $" = ')(';
2155 my @names = sort keys %$rexpansion;
2156 print "Here is a list of all installed aliases\n(@names)\n";
2157 die
2158"Attempting to redefine alias ($name) in config file $config_file line $.\n";
2159 }
2160 ${$rexpansion}{$name} = [];
2161 }
2162
2163 # now do the body
2164 if ($body) {
2165
2166 my ( $rbody_parts, $msg ) = parse_args($body);
2167 if ($msg) {
2168 die <<EOM;
2169Error reading file $config_file at line number $line_no.
2170$msg
2171Please fix this line or use -npro to avoid reading this file
2172EOM
2173 }
2174
2175 if ($name) {
2176
2177 # remove leading dashes if this is an alias
2178 foreach (@$rbody_parts) { s/^\-+//; }
2179 push @{ ${$rexpansion}{$name} }, @$rbody_parts;
2180 }
2181
2182 else {
2183 push( @config_list, @$rbody_parts );
2184 }
2185 }
2186
2187 if ($curly) {
2188 unless ($name) {
2189 die
2190"Unexpected '}' seen in config file $config_file line $.\n";
2191 }
2192 $name = undef;
2193 }
2194 }
2195 }
2196 eval { $fh->close() };
2197 return ( \@config_list );
2198}
2199
2200sub strip_comment {
2201
2202 my ( $instr, $config_file, $line_no ) = @_;
2203
2204 # nothing to do if no comments
2205 if ( $instr !~ /#/ ) {
2206 return $instr;
2207 }
2208
2209 # use simple method of no quotes
2210 elsif ( $instr !~ /['"]/ ) {
2211 $instr =~ s/\s*\#.*$//; # simple trim
2212 return $instr;
2213 }
2214
2215 # handle comments and quotes
2216 my $outstr = "";
2217 my $quote_char = "";
2218 while (1) {
2219
2220 # looking for ending quote character
2221 if ($quote_char) {
2222 if ( $instr =~ /\G($quote_char)/gc ) {
2223 $quote_char = "";
2224 $outstr .= $1;
2225 }
2226 elsif ( $instr =~ /\G(.)/gc ) {
2227 $outstr .= $1;
2228 }
2229
2230 # error..we reached the end without seeing the ending quote char
2231 else {
2232 die <<EOM;
2233Error reading file $config_file at line number $line_no.
2234Did not see ending quote character <$quote_char> in this text:
2235$instr
2236Please fix this line or use -npro to avoid reading this file
2237EOM
2238 last;
2239 }
2240 }
2241
2242 # accumulating characters and looking for start of a quoted string
2243 else {
2244 if ( $instr =~ /\G([\"\'])/gc ) {
2245 $outstr .= $1;
2246 $quote_char = $1;
2247 }
2248 elsif ( $instr =~ /\G#/gc ) {
2249 last;
2250 }
2251 elsif ( $instr =~ /\G(.)/gc ) {
2252 $outstr .= $1;
2253 }
2254 else {
2255 last;
2256 }
2257 }
2258 }
2259 return $outstr;
2260}
2261
2262sub parse_args {
2263
2264 # Parse a command string containing multiple string with possible
2265 # quotes, into individual commands. It might look like this, for example:
2266 #
2267 # -wba=" + - " -some-thing -wbb='. && ||'
2268 #
2269 # There is no need, at present, to handle escaped quote characters.
2270 # (They are not perltidy tokens, so needn't be in strings).
2271
2272 my ($body) = @_;
2273 my @body_parts = ();
2274 my $quote_char = "";
2275 my $part = "";
2276 my $msg = "";
2277 while (1) {
2278
2279 # looking for ending quote character
2280 if ($quote_char) {
2281 if ( $body =~ /\G($quote_char)/gc ) {
2282 $quote_char = "";
2283 }
2284 elsif ( $body =~ /\G(.)/gc ) {
2285 $part .= $1;
2286 }
2287
2288 # error..we reached the end without seeing the ending quote char
2289 else {
2290 if ($part) { push @body_parts, $part; }
2291 $msg = <<EOM;
2292Did not see ending quote character <$quote_char> in this text:
2293$body
2294EOM
2295 last;
2296 }
2297 }
2298
2299 # accumulating characters and looking for start of a quoted string
2300 else {
2301 if ( $body =~ /\G([\"\'])/gc ) {
2302 $quote_char = $1;
2303 }
2304 elsif ( $body =~ /\G(\s+)/gc ) {
2305 if ($part) { push @body_parts, $part; }
2306 $part = "";
2307 }
2308 elsif ( $body =~ /\G(.)/gc ) {
2309 $part .= $1;
2310 }
2311 else {
2312 if ($part) { push @body_parts, $part; }
2313 last;
2314 }
2315 }
2316 }
2317 return ( \@body_parts, $msg );
2318}
2319
2320sub dump_long_names {
2321
2322 my @names = sort @_;
2323 print STDOUT <<EOM;
2324# Command line long names (passed to GetOptions)
2325#---------------------------------------------------------------
2326# here is a summary of the Getopt codes:
2327# <none> does not take an argument
2328# =s takes a mandatory string
2329# :s takes an optional string
2330# =i takes a mandatory integer
2331# :i takes an optional integer
2332# ! does not take an argument and may be negated
2333# i.e., -foo and -nofoo are allowed
2334# a double dash signals the end of the options list
2335#
2336#---------------------------------------------------------------
2337EOM
2338
2339 foreach (@names) { print STDOUT "$_\n" }
2340}
2341
2342sub dump_defaults {
2343 my @defaults = sort @_;
2344 print STDOUT "Default command line options:\n";
2345 foreach (@_) { print STDOUT "$_\n" }
2346}
2347
2348sub dump_options {
2349 my ($rOpts) = @_;
2350 local $" = "\n";
2351 print STDOUT "Final parameter set for this run\n";
2352 foreach ( sort keys %{$rOpts} ) {
2353 print STDOUT "$_=$rOpts->{$_}\n";
2354 }
2355}
2356
2357sub show_version {
2358 print <<"EOM";
2359This is perltidy, v$VERSION
2360
2361Copyright 2000-2003, Steve Hancock
2362
2363Perltidy is free software and may be copied under the terms of the GNU
2364General Public License, which is included in the distribution files.
2365
2366Complete documentation for perltidy can be found using 'man perltidy'
2367or on the internet at http://perltidy.sourceforge.net.
2368EOM
2369}
2370
2371sub usage {
2372
2373 print STDOUT <<EOF;
2374This is perltidy version $VERSION, a perl script indenter. Usage:
2375
2376 perltidy [ options ] file1 file2 file3 ...
2377 (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
2378 perltidy [ options ] file1 -o outfile
2379 perltidy [ options ] file1 -st >outfile
2380 perltidy [ options ] <infile >outfile
2381
2382Options have short and long forms. Short forms are shown; see
2383man pages for long forms. Note: '=s' indicates a required string,
2384and '=n' indicates a required integer.
2385
2386I/O control
2387 -h show this help
2388 -o=file name of the output file (only if single input file)
2389 -oext=s change output extension from 'tdy' to s
2390 -opath=path change path to be 'path' for output files
2391 -b backup original to .bak and modify file in-place
2392 -bext=s change default backup extension from 'bak' to s
2393 -q deactivate error messages (for running under editor)
2394 -w include non-critical warning messages in the .ERR error output
2395 -syn run perl -c to check syntax (default under unix systems)
2396 -log save .LOG file, which has useful diagnostics
2397 -f force perltidy to read a binary file
2398 -g like -log but writes more detailed .LOG file, for debugging scripts
2399 -opt write the set of options actually used to a .LOG file
2400 -npro ignore .perltidyrc configuration command file
2401 -pro=file read configuration commands from file instead of .perltidyrc
2402 -st send output to standard output, STDOUT
2403 -se send error output to standard error output, STDERR
2404 -v display version number to standard output and quit
2405
2406Basic Options:
2407 -i=n use n columns per indentation level (default n=4)
2408 -t tabs: use one tab character per indentation level, not recommeded
2409 -nt no tabs: use n spaces per indentation level (default)
2410 -et=n entab leading whitespace n spaces per tab; not recommended
2411 -io "indent only": just do indentation, no other formatting.
2412 -sil=n set starting indentation level to n; use if auto detection fails
2413 -ole=s specify output line ending (s=dos or win, mac, unix)
2414 -ple keep output line endings same as input (input must be filename)
2415
2416Whitespace Control
2417 -fws freeze whitespace; this disables all whitespace changes
2418 and disables the following switches:
2419 -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight)
2420 -bbt same as -bt but for code block braces; same as -bt if not given
2421 -bbvt block braces vertically tight; use with -bl or -bli
2422 -bbvtl=s make -bbvt to apply to selected list of block types
2423 -pt=n paren tightness (n=0, 1 or 2)
2424 -sbt=n square bracket tightness (n=0, 1, or 2)
2425 -bvt=n brace vertical tightness,
2426 n=(0=open, 1=close unless multiple steps on a line, 2=always close)
2427 -pvt=n paren vertical tightness (see -bvt for n)
2428 -sbvt=n square bracket vertical tightness (see -bvt for n)
2429 -bvtc=n closing brace vertical tightness:
2430 n=(0=open, 1=sometimes close, 2=always close)
2431 -pvtc=n closing paren vertical tightness, see -bvtc for n.
2432 -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
2433 -ci=n sets continuation indentation=n, default is n=2 spaces
2434 -lp line up parentheses, brackets, and non-BLOCK braces
2435 -sfs add space before semicolon in for( ; ; )
2436 -aws allow perltidy to add whitespace (default)
2437 -dws delete all old non-essential whitespace
2438 -icb indent closing brace of a code block
2439 -cti=n closing indentation of paren, square bracket, or non-block brace:
2440 n=0 none, =1 align with opening, =2 one full indentation level
2441 -icp equivalent to -cti=2
2442 -wls=s want space left of tokens in string; i.e. -nwls='+ - * /'
2443 -wrs=s want space right of tokens in string;
2444 -sts put space before terminal semicolon of a statement
2445 -sak=s put space between keywords given in s and '(';
2446 -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
2447
2448Line Break Control
2449 -fnl freeze newlines; this disables all line break changes
2450 and disables the following switches:
2451 -anl add newlines; ok to introduce new line breaks
2452 -bbs add blank line before subs and packages
2453 -bbc add blank line before block comments
2454 -bbb add blank line between major blocks
2455 -sob swallow optional blank lines
2456 -ce cuddled else; use this style: '} else {'
2457 -dnl delete old newlines (default)
2458 -mbl=n maximum consecutive blank lines (default=1)
2459 -l=n maximum line length; default n=80
2460 -bl opening brace on new line
2461 -sbl opening sub brace on new line. value of -bl is used if not given.
2462 -bli opening brace on new line and indented
2463 -bar opening brace always on right, even for long clauses
2464 -vt=n vertical tightness (requires -lp); n controls break after opening
2465 token: 0=never 1=no break if next line balanced 2=no break
2466 -vtc=n vertical tightness of closing container; n controls if closing
2467 token starts new line: 0=always 1=not unless list 1=never
2468 -wba=s want break after tokens in string; i.e. wba=': .'
2469 -wbb=s want break before tokens in string
2470
2471Following Old Breakpoints
2472 -boc break at old comma breaks: turns off all automatic list formatting
2473 -bol break at old logical breakpoints: or, and, ||, && (default)
2474 -bok break at old list keyword breakpoints such as map, sort (default)
2475 -bot break at old conditional (trinary ?:) operator breakpoints (default)
2476 -cab=n break at commas after a comma-arrow (=>):
2477 n=0 break at all commas after =>
2478 n=1 stable: break unless this breaks an existing one-line container
2479 n=2 break only if a one-line container cannot be formed
2480 n=3 do not treat commas after => specially at all
2481
2482Comment controls
2483 -ibc indent block comments (default)
2484 -isbc indent spaced block comments; may indent unless no leading space
2485 -msc=n minimum desired spaces to side comment, default 4
2486 -csc add or update closing side comments after closing BLOCK brace
2487 -dcsc delete closing side comments created by a -csc command
2488 -cscp=s change closing side comment prefix to be other than '## end'
2489 -cscl=s change closing side comment to apply to selected list of blocks
2490 -csci=n minimum number of lines needed to apply a -csc tag, default n=6
2491 -csct=n maximum number of columns of appended text, default n=20
2492 -cscw causes warning if old side comment is overwritten with -csc
2493
2494 -sbc use 'static block comments' identified by leading '##' (default)
2495 -sbcp=s change static block comment identifier to be other than '##'
2496 -osbc outdent static block comments
2497
2498 -ssc use 'static side comments' identified by leading '##' (default)
2499 -sscp=s change static side comment identifier to be other than '##'
2500
2501Delete selected text
2502 -dac delete all comments AND pod
2503 -dbc delete block comments
2504 -dsc delete side comments
2505 -dp delete pod
2506
2507Send selected text to a '.TEE' file
2508 -tac tee all comments AND pod
2509 -tbc tee block comments
2510 -tsc tee side comments
2511 -tp tee pod
2512
2513Outdenting
2514 -olq outdent long quoted strings (default)
2515 -olc outdent a long block comment line
2516 -ola outdent statement labels
2517 -okw outdent control keywords (redo, next, last, goto, return)
2518 -okwl=s specify alternative keywords for -okw command
2519
2520Other controls
2521 -mft=n maximum fields per table; default n=40
2522 -x do not format lines before hash-bang line (i.e., for VMS)
2523 -asc allows perltidy to add a ';' when missing (default)
2524 -dsm allows perltidy to delete an unnecessary ';' (default)
2525
2526Combinations of other parameters
2527 -gnu attempt to follow GNU Coding Standards as applied to perl
2528 -mangle remove as many newlines as possible (but keep comments and pods)
2529 -extrude insert as many newlines as possible
2530
2531Dump and die, debugging
2532 -dop dump options used in this run to standard output and quit
2533 -ddf dump default options to standard output and quit
2534 -dsn dump all option short names to standard output and quit
2535 -dln dump option long names to standard output and quit
2536 -dpro dump whatever configuration file is in effect to standard output
2537 -dtt dump all token types to standard output and quit
2538
2539HTML
2540 -html write an html file (see 'man perl2web' for many options)
2541 Note: when -html is used, no indentation or formatting are done.
2542 Hint: try perltidy -html -css=mystyle.css filename.pl
2543 and edit mystyle.css to change the appearance of filename.html.
2544 -nnn gives line numbers
2545 -pre only writes out <pre>..</pre> code section
2546 -toc places a table of contents to subs at the top (default)
2547 -pod passes pod text through pod2html (default)
2548 -frm write html as a frame (3 files)
2549 -text=s extra extension for table of contents if -frm, default='toc'
2550 -sext=s extra extension for file content if -frm, default='src'
2551
2552A prefix of "n" negates short form toggle switches, and a prefix of "no"
2553negates the long forms. For example, -nasc means don't add missing
2554semicolons.
2555
2556If you are unable to see this entire text, try "perltidy -h | more"
2557For more detailed information, and additional options, try "man perltidy",
2558or go to the perltidy home page at http://perltidy.sourceforge.net
2559EOF
2560
2561}
2562
2563sub process_this_file {
2564
2565 my ( $truth, $beauty ) = @_;
2566
2567 # loop to process each line of this file
2568 while ( my $line_of_tokens = $truth->get_line() ) {
2569 $beauty->write_line($line_of_tokens);
2570 }
2571
2572 # finish up
2573 eval { $beauty->finish_formatting() };
2574 $truth->report_tokenization_errors();
2575}
2576
2577sub check_syntax {
2578
2579 # Use 'perl -c' to make sure that we did not create bad syntax
2580 # This is a very good independent check for programming errors
2581 #
2582 # Given names of the input and output files, ($ifname, $ofname),
2583 # we do the following:
2584 # - check syntax of the input file
2585 # - if bad, all done (could be an incomplete code snippet)
2586 # - if infile syntax ok, then check syntax of the output file;
2587 # - if outfile syntax bad, issue warning; this implies a code bug!
2588 # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
2589
2590 my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
2591 my $infile_syntax_ok = 0;
2592 my $line_of_dashes = '-' x 42 . "\n";
2593
2594 my $flags = $rOpts->{'perl-syntax-check-flags'};
2595
2596 # be sure we invoke perl with -c
2597 # note: perl will accept repeated flags like '-c -c'. It is safest
2598 # to append another -c than try to find an interior bundled c, as
2599 # in -Tc, because such a 'c' might be in a quoted string, for example.
2600 if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
2601
2602 # be sure we invoke perl with -x if requested
2603 # same comments about repeated parameters applies
2604 if ( $rOpts->{'look-for-hash-bang'} ) {
2605 if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
2606 }
2607
2608 # this shouldn't happen unless a termporary file couldn't be made
2609 if ( $ifname eq '-' ) {
2610 $logger_object->write_logfile_entry(
2611 "Cannot run perl -c on STDIN and STDOUT\n");
2612 return $infile_syntax_ok;
2613 }
2614
2615 $logger_object->write_logfile_entry(
2616 "checking input file syntax with perl $flags\n");
2617 $logger_object->write_logfile_entry($line_of_dashes);
2618
2619 # Not all operating systems/shells support redirection of the standard
2620 # error output.
2621 my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
2622
2623 my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );
2624 $logger_object->write_logfile_entry("$perl_output\n");
2625
2626 if ( $perl_output =~ /syntax\s*OK/ ) {
2627 $infile_syntax_ok = 1;
2628 $logger_object->write_logfile_entry($line_of_dashes);
2629 $logger_object->write_logfile_entry(
2630 "checking output file syntax with perl $flags ...\n");
2631 $logger_object->write_logfile_entry($line_of_dashes);
2632
2633 my $perl_output =
2634 do_syntax_check( $ofname, $flags, $error_redirection );
2635 $logger_object->write_logfile_entry("$perl_output\n");
2636
2637 unless ( $perl_output =~ /syntax\s*OK/ ) {
2638 $logger_object->write_logfile_entry($line_of_dashes);
2639 $logger_object->warning(
2640"The output file has a syntax error when tested with perl $flags $ofname !\n"
2641 );
2642 $logger_object->warning(
2643 "This implies an error in perltidy; the file $ofname is bad\n");
2644 $logger_object->report_definite_bug();
2645
2646 # the perl version number will be helpful for diagnosing the problem
2647 $logger_object->write_logfile_entry(
2648 qx/perl -v $error_redirection/ . "\n" );
2649 }
2650 }
2651 else {
2652
2653 # Only warn of perl -c syntax errors. Other messages,
2654 # such as missing modules, are too common. They can be
2655 # seen by running with perltidy -w
2656 $logger_object->complain("A syntax check using perl $flags gives: \n");
2657 $logger_object->complain($line_of_dashes);
2658 $logger_object->complain("$perl_output\n");
2659 $logger_object->complain($line_of_dashes);
2660 $infile_syntax_ok = -1;
2661 $logger_object->write_logfile_entry($line_of_dashes);
2662 $logger_object->write_logfile_entry(
2663"The output file will not be checked because of input file problems\n"
2664 );
2665 }
2666 return $infile_syntax_ok;
2667}
2668
2669sub do_syntax_check {
2670 my ( $fname, $flags, $error_redirection ) = @_;
2671
2672 # We have to quote the filename in case it has unusual characters
2673 # or spaces. Example: this filename #CM11.pm# gives trouble.
2674 $fname = '"' . $fname . '"';
2675
2676 # Under VMS something like -T will become -t (and an error) so we
2677 # will put quotes around the flags. Double quotes seem to work on
2678 # Unix/Windows/VMS, but this may not work on all systems. (Single
2679 # quotes do not work under Windows). It could become necessary to
2680 # put double quotes around each flag, such as: -"c" -"T"
2681 # We may eventually need some system-dependent coding here.
2682 $flags = '"' . $flags . '"';
2683
2684 # now wish for luck...
2685 return qx/perl $flags $fname $error_redirection/;
2686}
2687
2688#####################################################################
2689#
2690# This is a stripped down version of IO::Scalar
2691# Given a reference to a scalar, it supplies either:
2692# a getline method which reads lines (mode='r'), or
2693# a print method which reads lines (mode='w')
2694#
2695#####################################################################
2696package Perl::Tidy::IOScalar;
2697use Carp;
2698
2699sub new {
2700 my ( $package, $rscalar, $mode ) = @_;
2701 my $ref = ref $rscalar;
2702 if ( $ref ne 'SCALAR' ) {
2703 confess <<EOM;
2704------------------------------------------------------------------------
2705expecting ref to SCALAR but got ref to ($ref); trace follows:
2706------------------------------------------------------------------------
2707EOM
2708
2709 }
2710 if ( $mode eq 'w' ) {
2711 $$rscalar = "";
2712 return bless [ $rscalar, $mode ], $package;
2713 }
2714 elsif ( $mode eq 'r' ) {
2715
2716 # Convert a scalar to an array.
2717 # This avoids looking for "\n" on each call to getline
2718 my @array = map { $_ .= "\n" } split /\n/, ${$rscalar};
2719 my $i_next = 0;
2720 return bless [ \@array, $mode, $i_next ], $package;
2721 }
2722 else {
2723 confess <<EOM;
2724------------------------------------------------------------------------
2725expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
2726------------------------------------------------------------------------
2727EOM
2728 }
2729}
2730
2731sub getline {
2732 my $self = shift;
2733 my $mode = $self->[1];
2734 if ( $mode ne 'r' ) {
2735 confess <<EOM;
2736------------------------------------------------------------------------
2737getline call requires mode = 'r' but mode = ($mode); trace follows:
2738------------------------------------------------------------------------
2739EOM
2740 }
2741 my $i = $self->[2]++;
2742 ##my $line = $self->[0]->[$i];
2743 return $self->[0]->[$i];
2744}
2745
2746sub print {
2747 my $self = shift;
2748 my $mode = $self->[1];
2749 if ( $mode ne 'w' ) {
2750 confess <<EOM;
2751------------------------------------------------------------------------
2752print call requires mode = 'w' but mode = ($mode); trace follows:
2753------------------------------------------------------------------------
2754EOM
2755 }
2756 ${ $self->[0] } .= $_[0];
2757}
2758sub close { return }
2759
2760#####################################################################
2761#
2762# This is a stripped down version of IO::ScalarArray
2763# Given a reference to an array, it supplies either:
2764# a getline method which reads lines (mode='r'), or
2765# a print method which reads lines (mode='w')
2766#
2767# NOTE: this routine assumes that that there aren't any embedded
2768# newlines within any of the array elements. There are no checks
2769# for that.
2770#
2771#####################################################################
2772package Perl::Tidy::IOScalarArray;
2773use Carp;
2774
2775sub new {
2776 my ( $package, $rarray, $mode ) = @_;
2777 my $ref = ref $rarray;
2778 if ( $ref ne 'ARRAY' ) {
2779 confess <<EOM;
2780------------------------------------------------------------------------
2781expecting ref to ARRAY but got ref to ($ref); trace follows:
2782------------------------------------------------------------------------
2783EOM
2784
2785 }
2786 if ( $mode eq 'w' ) {
2787 @$rarray = ();
2788 return bless [ $rarray, $mode ], $package;
2789 }
2790 elsif ( $mode eq 'r' ) {
2791 my $i_next = 0;
2792 return bless [ $rarray, $mode, $i_next ], $package;
2793 }
2794 else {
2795 confess <<EOM;
2796------------------------------------------------------------------------
2797expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
2798------------------------------------------------------------------------
2799EOM
2800 }
2801}
2802
2803sub getline {
2804 my $self = shift;
2805 my $mode = $self->[1];
2806 if ( $mode ne 'r' ) {
2807 confess <<EOM;
2808------------------------------------------------------------------------
2809getline requires mode = 'r' but mode = ($mode); trace follows:
2810------------------------------------------------------------------------
2811EOM
2812 }
2813 my $i = $self->[2]++;
2814 ##my $line = $self->[0]->[$i];
2815 return $self->[0]->[$i];
2816}
2817
2818sub print {
2819 my $self = shift;
2820 my $mode = $self->[1];
2821 if ( $mode ne 'w' ) {
2822 confess <<EOM;
2823------------------------------------------------------------------------
2824print requires mode = 'w' but mode = ($mode); trace follows:
2825------------------------------------------------------------------------
2826EOM
2827 }
2828 push @{ $self->[0] }, $_[0];
2829}
2830sub close { return }
2831
2832#####################################################################
2833#
2834# the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
2835# which returns the next line to be parsed
2836#
2837#####################################################################
2838
2839package Perl::Tidy::LineSource;
2840
2841sub new {
2842
2843 my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
2844 my $input_file_copy = undef;
2845 my $fh_copy;
2846
2847 my $input_line_ending;
2848 if ( $rOpts->{'preserve-line-endings'} ) {
2849 $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
2850 }
2851
2852 ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
2853 return undef unless $fh;
2854
2855 # in order to check output syntax when standard output is used,
2856 # or when it is an object, we have to make a copy of the file
2857 if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
2858 {
2859
2860 # Turning off syntax check when input output is used.
2861 # The reason is that temporary files cause problems on
2862 # on many systems.
2863 $rOpts->{'check-syntax'} = 0;
2864 $input_file_copy = '-';
2865
2866 $$rpending_logfile_message .= <<EOM;
2867Note: --syntax check will be skipped because standard input is used
2868EOM
2869
2870 }
2871
2872 return bless {
2873 _fh => $fh,
2874 _fh_copy => $fh_copy,
2875 _filename => $input_file,
2876 _input_file_copy => $input_file_copy,
2877 _input_line_ending => $input_line_ending,
2878 _rinput_buffer => [],
2879 _started => 0,
2880 }, $class;
2881}
2882
2883sub get_input_file_copy_name {
2884 my $self = shift;
2885 my $ifname = $self->{_input_file_copy};
2886 unless ($ifname) {
2887 $ifname = $self->{_filename};
2888 }
2889 return $ifname;
2890}
2891
2892sub close_input_file {
2893 my $self = shift;
2894 eval { $self->{_fh}->close() };
2895 eval { $self->{_fh_copy}->close() } if $self->{_fh_copy};
2896}
2897
2898sub get_line {
2899 my $self = shift;
2900 my $line = undef;
2901 my $fh = $self->{_fh};
2902 my $fh_copy = $self->{_fh_copy};
2903 my $rinput_buffer = $self->{_rinput_buffer};
2904
2905 if ( scalar(@$rinput_buffer) ) {
2906 $line = shift @$rinput_buffer;
2907 }
2908 else {
2909 $line = $fh->getline();
2910
2911 # patch to read raw mac files under unix, dos
2912 # see if the first line has embedded \r's
2913 if ( $line && !$self->{_started} ) {
2914 if ( $line =~ /[\015][^\015\012]/ ) {
2915
2916 # found one -- break the line up and store in a buffer
2917 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
2918 my $count = @$rinput_buffer;
2919 $line = shift @$rinput_buffer;
2920 }
2921 $self->{_started}++;
2922 }
2923 }
2924 if ( $line && $fh_copy ) { $fh_copy->print($line); }
2925 return $line;
2926}
2927
2928sub old_get_line {
2929 my $self = shift;
2930 my $line = undef;
2931 my $fh = $self->{_fh};
2932 my $fh_copy = $self->{_fh_copy};
2933 $line = $fh->getline();
2934 if ( $line && $fh_copy ) { $fh_copy->print($line); }
2935 return $line;
2936}
2937
2938#####################################################################
2939#
2940# the Perl::Tidy::LineSink class supplies a write_line method for
2941# actual file writing
2942#
2943#####################################################################
2944
2945package Perl::Tidy::LineSink;
2946
2947sub new {
2948
2949 my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
2950 $rpending_logfile_message )
2951 = @_;
2952 my $fh = undef;
2953 my $fh_copy = undef;
2954 my $fh_tee = undef;
2955 my $output_file_copy = "";
2956 my $output_file_open = 0;
2957
2958 if ( $rOpts->{'format'} eq 'tidy' ) {
2959 ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
2960 unless ($fh) { die "Cannot write to output stream\n"; }
2961 $output_file_open = 1;
2962 }
2963
2964 # in order to check output syntax when standard output is used,
2965 # or when it is an object, we have to make a copy of the file
2966 if ( $output_file eq '-' || ref $output_file ) {
2967 if ( $rOpts->{'check-syntax'} ) {
2968
2969 # Turning off syntax check when standard output is used.
2970 # The reason is that temporary files cause problems on
2971 # on many systems.
2972 $rOpts->{'check-syntax'} = 0;
2973 $output_file_copy = '-';
2974 $$rpending_logfile_message .= <<EOM;
2975Note: --syntax check will be skipped because standard output is used
2976EOM
2977
2978 }
2979 }
2980
2981 bless {
2982 _fh => $fh,
2983 _fh_copy => $fh_copy,
2984 _fh_tee => $fh_tee,
2985 _output_file => $output_file,
2986 _output_file_open => $output_file_open,
2987 _output_file_copy => $output_file_copy,
2988 _tee_flag => 0,
2989 _tee_file => $tee_file,
2990 _tee_file_opened => 0,
2991 _line_separator => $line_separator,
2992 }, $class;
2993}
2994
2995sub write_line {
2996
2997 my $self = shift;
2998 my $fh = $self->{_fh};
2999 my $fh_copy = $self->{_fh_copy};
3000
3001 my $output_file_open = $self->{_output_file_open};
3002 chomp $_[0];
3003 $_[0] .= $self->{_line_separator};
3004
3005 $fh->print( $_[0] ) if ( $self->{_output_file_open} );
3006 print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} );
3007
3008 if ( $self->{_tee_flag} ) {
3009 unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3010 my $fh_tee = $self->{_fh_tee};
3011 print $fh_tee $_[0];
3012 }
3013}
3014
3015sub get_output_file_copy {
3016 my $self = shift;
3017 my $ofname = $self->{_output_file_copy};
3018 unless ($ofname) {
3019 $ofname = $self->{_output_file};
3020 }
3021 return $ofname;
3022}
3023
3024sub tee_on {
3025 my $self = shift;
3026 $self->{_tee_flag} = 1;
3027}
3028
3029sub tee_off {
3030 my $self = shift;
3031 $self->{_tee_flag} = 0;
3032}
3033
3034sub really_open_tee_file {
3035 my $self = shift;
3036 my $tee_file = $self->{_tee_file};
3037 my $fh_tee;
3038 $fh_tee = IO::File->new(">$tee_file")
3039 or die("couldn't open TEE file $tee_file: $!\n");
3040 $self->{_tee_file_opened} = 1;
3041 $self->{_fh_tee} = $fh_tee;
3042}
3043
3044sub close_output_file {
3045 my $self = shift;
3046 eval { $self->{_fh}->close() } if $self->{_output_file_open};
3047 eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} );
3048 $self->close_tee_file();
3049}
3050
3051sub close_tee_file {
3052 my $self = shift;
3053
3054 if ( $self->{_tee_file_opened} ) {
3055 eval { $self->{_fh_tee}->close() };
3056 $self->{_tee_file_opened} = 0;
3057 }
3058}
3059
3060#####################################################################
3061#
3062# The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
3063# useful for program development.
3064#
3065# Only one such file is created regardless of the number of input
3066# files processed. This allows the results of processing many files
3067# to be summarized in a single file.
3068#
3069#####################################################################
3070
3071package Perl::Tidy::Diagnostics;
3072
3073sub new {
3074
3075 my $class = shift;
3076 bless {
3077 _write_diagnostics_count => 0,
3078 _last_diagnostic_file => "",
3079 _input_file => "",
3080 _fh => undef,
3081 }, $class;
3082}
3083
3084sub set_input_file {
3085 my $self = shift;
3086 $self->{_input_file} = $_[0];
3087}
3088
3089# This is a diagnostic routine which is useful for program development.
3090# Output from debug messages go to a file named DIAGNOSTICS, where
3091# they are labeled by file and line. This allows many files to be
3092# scanned at once for some particular condition of interest.
3093sub write_diagnostics {
3094 my $self = shift;
3095
3096 unless ( $self->{_write_diagnostics_count} ) {
3097 open DIAGNOSTICS, ">DIAGNOSTICS"
3098 or death("couldn't open DIAGNOSTICS: $!\n");
3099 }
3100
3101 my $last_diagnostic_file = $self->{_last_diagnostic_file};
3102 my $input_file = $self->{_input_file};
3103 if ( $last_diagnostic_file ne $input_file ) {
3104 print DIAGNOSTICS "\nFILE:$input_file\n";
3105 }
3106 $self->{_last_diagnostic_file} = $input_file;
3107 my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
3108 print DIAGNOSTICS "$input_line_number:\t@_";
3109 $self->{_write_diagnostics_count}++;
3110}
3111
3112#####################################################################
3113#
3114# The Perl::Tidy::Logger class writes the .LOG and .ERR files
3115#
3116#####################################################################
3117
3118package Perl::Tidy::Logger;
3119
3120sub new {
3121 my $class = shift;
3122 my $fh;
3123 my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
3124
3125 # remove any old error output file
3126 unless ( ref($warning_file) ) {
3127 if ( -e $warning_file ) { unlink($warning_file) }
3128 }
3129
3130 bless {
3131 _log_file => $log_file,
3132 _fh_warnings => undef,
3133 _rOpts => $rOpts,
3134 _fh_warnings => undef,
3135 _last_input_line_written => 0,
3136 _at_end_of_file => 0,
3137 _use_prefix => 1,
3138 _block_log_output => 0,
3139 _line_of_tokens => undef,
3140 _output_line_number => undef,
3141 _wrote_line_information_string => 0,
3142 _wrote_column_headings => 0,
3143 _warning_file => $warning_file,
3144 _warning_count => 0,
3145 _complaint_count => 0,
3146 _saw_code_bug => -1, # -1=no 0=maybe 1=for sure
3147 _saw_brace_error => 0,
3148 _saw_extrude => $saw_extrude,
3149 _output_array => [],
3150 }, $class;
3151}
3152
3153sub close_log_file {
3154
3155 my $self = shift;
3156 if ( $self->{_fh_warnings} ) {
3157 eval { $self->{_fh_warnings}->close() };
3158 $self->{_fh_warnings} = undef;
3159 }
3160}
3161
3162sub get_warning_count {
3163 my $self = shift;
3164 return $self->{_warning_count};
3165}
3166
3167sub get_use_prefix {
3168 my $self = shift;
3169 return $self->{_use_prefix};
3170}
3171
3172sub block_log_output {
3173 my $self = shift;
3174 $self->{_block_log_output} = 1;
3175}
3176
3177sub unblock_log_output {
3178 my $self = shift;
3179 $self->{_block_log_output} = 0;
3180}
3181
3182sub interrupt_logfile {
3183 my $self = shift;
3184 $self->{_use_prefix} = 0;
3185 $self->warning("\n");
3186 $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" );
3187}
3188
3189sub resume_logfile {
3190 my $self = shift;
3191 $self->write_logfile_entry( '#' x 60 . "\n" );
3192 $self->{_use_prefix} = 1;
3193}
3194
3195sub we_are_at_the_last_line {
3196 my $self = shift;
3197 unless ( $self->{_wrote_line_information_string} ) {
3198 $self->write_logfile_entry("Last line\n\n");
3199 }
3200 $self->{_at_end_of_file} = 1;
3201}
3202
3203# record some stuff in case we go down in flames
3204sub black_box {
3205 my $self = shift;
3206 my ( $line_of_tokens, $output_line_number ) = @_;
3207 my $input_line = $line_of_tokens->{_line_text};
3208 my $input_line_number = $line_of_tokens->{_line_number};
3209
3210 # save line information in case we have to write a logfile message
3211 $self->{_line_of_tokens} = $line_of_tokens;
3212 $self->{_output_line_number} = $output_line_number;
3213 $self->{_wrote_line_information_string} = 0;
3214
3215 my $last_input_line_written = $self->{_last_input_line_written};
3216 my $rOpts = $self->{_rOpts};
3217 if (
3218 (
3219 ( $input_line_number - $last_input_line_written ) >=
3220 $rOpts->{'logfile-gap'}
3221 )
3222 || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
3223 )
3224 {
3225 my $rlevels = $line_of_tokens->{_rlevels};
3226 my $structural_indentation_level = $$rlevels[0];
3227 $self->{_last_input_line_written} = $input_line_number;
3228 ( my $out_str = $input_line ) =~ s/^\s*//;
3229 chomp $out_str;
3230
3231 $out_str = ( '.' x $structural_indentation_level ) . $out_str;
3232
3233 if ( length($out_str) > 35 ) {
3234 $out_str = substr( $out_str, 0, 35 ) . " ....";
3235 }
3236 $self->logfile_output( "", "$out_str\n" );
3237 }
3238}
3239
3240sub write_logfile_entry {
3241 my $self = shift;
3242
3243 # add leading >>> to avoid confusing error mesages and code
3244 $self->logfile_output( ">>>", "@_" );
3245}
3246
3247sub write_column_headings {
3248 my $self = shift;
3249
3250 $self->{_wrote_column_headings} = 1;
3251 my $routput_array = $self->{_output_array};
3252 push @{$routput_array}, <<EOM;
3253The nesting depths in the table below are at the start of the lines.
3254The indicated output line numbers are not always exact.
3255ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
3256
3257in:out indent c b nesting code + messages; (messages begin with >>>)
3258lines levels i k (code begins with one '.' per indent level)
3259------ ----- - - -------- -------------------------------------------
3260EOM
3261}
3262
3263sub make_line_information_string {
3264
3265 # make columns of information when a logfile message needs to go out
3266 my $self = shift;
3267 my $line_of_tokens = $self->{_line_of_tokens};
3268 my $input_line_number = $line_of_tokens->{_line_number};
3269 my $line_information_string = "";
3270 if ($input_line_number) {
3271
3272 my $output_line_number = $self->{_output_line_number};
3273 my $brace_depth = $line_of_tokens->{_curly_brace_depth};
3274 my $paren_depth = $line_of_tokens->{_paren_depth};
3275 my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
3276 my $python_indentation_level =
3277 $line_of_tokens->{_python_indentation_level};
3278 my $rlevels = $line_of_tokens->{_rlevels};
3279 my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
3280 my $rci_levels = $line_of_tokens->{_rci_levels};
3281 my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
3282
3283 my $structural_indentation_level = $$rlevels[0];
3284
3285 $self->write_column_headings() unless $self->{_wrote_column_headings};
3286
3287 # keep logfile columns aligned for scripts up to 999 lines;
3288 # for longer scripts it doesn't really matter
3289 my $extra_space = "";
3290 $extra_space .=
3291 ( $input_line_number < 10 ) ? " "
3292 : ( $input_line_number < 100 ) ? " "
3293 : "";
3294 $extra_space .=
3295 ( $output_line_number < 10 ) ? " "
3296 : ( $output_line_number < 100 ) ? " "
3297 : "";
3298
3299 # there are 2 possible nesting strings:
3300 # the original which looks like this: (0 [1 {2
3301 # the new one, which looks like this: {{[
3302 # the new one is easier to read, and shows the order, but
3303 # could be arbitrarily long, so we use it unless it is too long
3304 my $nesting_string =
3305 "($paren_depth [$square_bracket_depth {$brace_depth";
3306 my $nesting_string_new = $$rnesting_tokens[0];
3307
3308 my $ci_level = $$rci_levels[0];
3309 if ( $ci_level > 9 ) { $ci_level = '*' }
3310 my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
3311
3312 if ( length($nesting_string_new) <= 8 ) {
3313 $nesting_string =
3314 $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
3315 }
3316 if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
3317 $line_information_string =
3318"L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
3319 }
3320 return $line_information_string;
3321}
3322
3323sub logfile_output {
3324 my $self = shift;
3325 my ( $prompt, $msg ) = @_;
3326 return if ( $self->{_block_log_output} );
3327
3328 my $routput_array = $self->{_output_array};
3329 if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
3330 push @{$routput_array}, "$msg";
3331 }
3332 else {
3333 my $line_information_string = $self->make_line_information_string();
3334 $self->{_wrote_line_information_string} = 1;
3335
3336 if ($line_information_string) {
3337 push @{$routput_array}, "$line_information_string $prompt$msg";
3338 }
3339 else {
3340 push @{$routput_array}, "$msg";
3341 }
3342 }
3343}
3344
3345sub get_saw_brace_error {
3346 my $self = shift;
3347 return $self->{_saw_brace_error};
3348}
3349
3350sub increment_brace_error {
3351 my $self = shift;
3352 $self->{_saw_brace_error}++;
3353}
3354
3355sub brace_warning {
3356 my $self = shift;
3357 use constant BRACE_WARNING_LIMIT => 10;
3358 my $saw_brace_error = $self->{_saw_brace_error};
3359
3360 if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
3361 $self->warning(@_);
3362 }
3363 $saw_brace_error++;
3364 $self->{_saw_brace_error} = $saw_brace_error;
3365
3366 if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
3367 $self->warning("No further warnings of this type will be given\n");
3368 }
3369}
3370
3371sub complain {
3372
3373 # handle non-critical warning messages based on input flag
3374 my $self = shift;
3375 my $rOpts = $self->{_rOpts};
3376
3377 # these appear in .ERR output only if -w flag is used
3378 if ( $rOpts->{'warning-output'} ) {
3379 $self->warning(@_);
3380 }
3381
3382 # otherwise, they go to the .LOG file
3383 else {
3384 $self->{_complaint_count}++;
3385 $self->write_logfile_entry(@_);
3386 }
3387}
3388
3389sub warning {
3390
3391 # report errors to .ERR file (or stdout)
3392 my $self = shift;
3393 use constant WARNING_LIMIT => 50;
3394
3395 my $rOpts = $self->{_rOpts};
3396 unless ( $rOpts->{'quiet'} ) {
3397
3398 my $warning_count = $self->{_warning_count};
3399 unless ($warning_count) {
3400 my $warning_file = $self->{_warning_file};
3401 my $fh_warnings;
3402 if ( $rOpts->{'standard-error-output'} ) {
3403 $fh_warnings = *STDERR;
3404 }
3405 else {
3406 ( $fh_warnings, my $filename ) =
3407 Perl::Tidy::streamhandle( $warning_file, 'w' );
3408 $fh_warnings or die("couldn't open $filename $!\n");
3409 warn "## Please see file $filename\n";
3410 }
3411 $self->{_fh_warnings} = $fh_warnings;
3412 }
3413
3414 my $fh_warnings = $self->{_fh_warnings};
3415 if ( $warning_count < WARNING_LIMIT ) {
3416 if ( $self->get_use_prefix() > 0 ) {
3417 my $input_line_number =
3418 Perl::Tidy::Tokenizer::get_input_line_number();
3419 print $fh_warnings "$input_line_number:\t@_";
3420 $self->write_logfile_entry("WARNING: @_");
3421 }
3422 else {
3423 print $fh_warnings @_;
3424 $self->write_logfile_entry(@_);
3425 }
3426 }
3427 $warning_count++;
3428 $self->{_warning_count} = $warning_count;
3429
3430 if ( $warning_count == WARNING_LIMIT ) {
3431 print $fh_warnings "No further warnings will be given";
3432 }
3433 }
3434}
3435
3436# programming bug codes:
3437# -1 = no bug
3438# 0 = maybe, not sure.
3439# 1 = definitely
3440sub report_possible_bug {
3441 my $self = shift;
3442 my $saw_code_bug = $self->{_saw_code_bug};
3443 $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
3444}
3445
3446sub report_definite_bug {
3447 my $self = shift;
3448 $self->{_saw_code_bug} = 1;
3449}
3450
3451sub ask_user_for_bug_report {
3452 my $self = shift;
3453
3454 my ( $infile_syntax_ok, $formatter ) = @_;
3455 my $saw_code_bug = $self->{_saw_code_bug};
3456 if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
3457 $self->warning(<<EOM);
3458
3459You may have encountered a code bug in perltidy. If you think so, and
3460the problem is not listed in the BUGS file at
3461http://perltidy.sourceforge.net, please report it so that it can be
3462corrected. Include the smallest possible script which has the problem,
3463along with the .LOG file. See the manual pages for contact information.
3464Thank you!
3465EOM
3466
3467 }
3468 elsif ( $saw_code_bug == 1 ) {
3469 if ( $self->{_saw_extrude} ) {
3470 $self->warning(<<EOM);
3471You may have encountered a bug in perltidy. However, since you are
3472using the -extrude option, the problem may be with perl itself, which
3473has occasional parsing problems with this type of file. If you believe
3474that the problem is with perltidy, and the problem is not listed in the
3475BUGS file at http://perltidy.sourceforge.net, please report it so that
3476it can be corrected. Include the smallest possible script which has the
3477problem, along with the .LOG file. See the manual pages for contact
3478information.
3479Thank you!
3480EOM
3481 }
3482 else {
3483 $self->warning(<<EOM);
3484
3485Oops, you seem to have encountered a bug in perltidy. Please check the
3486BUGS file at http://perltidy.sourceforge.net. If the problem is not
3487listed there, please report it so that it can be corrected. Include the
3488smallest possible script which produces this message, along with the
3489.LOG file if appropriate. See the manual pages for contact information.
3490Your efforts are appreciated.
3491Thank you!
3492EOM
3493 my $added_semicolon_count = 0;
3494 eval {
3495 $added_semicolon_count =
3496 $formatter->get_added_semicolon_count();
3497 };
3498 if ( $added_semicolon_count > 0 ) {
3499 $self->warning(<<EOM);
3500
3501The log file shows that perltidy added $added_semicolon_count semicolons.
3502Please rerun with -nasc to see if that is the cause of the syntax error. Even
3503if that is the problem, please report it so that it can be fixed.
3504EOM
3505
3506 }
3507 }
3508 }
3509}
3510
3511sub finish {
3512
3513 # called after all formatting to summarize errors
3514 my $self = shift;
3515 my ( $infile_syntax_ok, $formatter ) = @_;
3516
3517 my $rOpts = $self->{_rOpts};
3518 my $warning_count = $self->{_warning_count};
3519 my $saw_code_bug = $self->{_saw_code_bug};
3520
3521 my $save_logfile = ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
3522 || $saw_code_bug == 1
3523 || $rOpts->{'logfile'};
3524 my $log_file = $self->{_log_file};
3525 if ($warning_count) {
3526 if ($save_logfile) {
3527 $self->block_log_output(); # avoid echoing this to the logfile
3528 $self->warning(
3529 "The logfile $log_file may contain useful information\n");
3530 $self->unblock_log_output();
3531 }
3532
3533 if ( $self->{_complaint_count} > 0 ) {
3534 $self->warning(
3535"To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
3536 );
3537 }
3538
3539 if ( $self->{_saw_brace_error}
3540 && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
3541 {
3542 $self->warning("To save a full .LOG file rerun with -g\n");
3543 }
3544 }
3545 $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
3546
3547 if ($save_logfile) {
3548 my $log_file = $self->{_log_file};
3549 my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
3550 if ($fh) {
3551 my $routput_array = $self->{_output_array};
3552 foreach ( @{$routput_array} ) { $fh->print($_) }
3553 eval { $fh->close() };
3554 }
3555 }
3556}
3557
3558#####################################################################
3559#
3560# The Perl::Tidy::DevNull class supplies a dummy print method
3561#
3562#####################################################################
3563
3564package Perl::Tidy::DevNull;
3565sub new { return bless {}, $_[0] }
3566sub print { return }
3567sub close { return }
3568
3569#####################################################################
3570#
3571# The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
3572#
3573#####################################################################
3574
3575package Perl::Tidy::HtmlWriter;
3576
3577use File::Basename;
3578
3579# class variables
3580use vars qw{
3581 %html_color
3582 %html_bold
3583 %html_italic
3584 %token_short_names
3585 %short_to_long_names
3586 $rOpts
3587 $css_filename
3588 $css_linkname
3589 $missing_html_entities
3590};
3591
3592# replace unsafe characters with HTML entity representation if HTML::Entities
3593# is available
3594{ eval "use HTML::Entities"; $missing_html_entities = $@; }
3595
3596sub new {
3597
3598 my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
3599 $html_src_extension )
3600 = @_;
3601
3602 my $html_file_opened = 0;
3603 my $html_fh;
3604 ( $html_fh, my $html_filename ) =
3605 Perl::Tidy::streamhandle( $html_file, 'w' );
3606 unless ($html_fh) {
3607 warn("can't open $html_file: $!\n");
3608 return undef;
3609 }
3610 $html_file_opened = 1;
3611
3612 if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
3613 $input_file = "NONAME";
3614 }
3615
3616 # write the table of contents to a string
3617 my $toc_string;
3618 my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
3619
3620 my $html_pre_fh;
3621 my @pre_string_stack;
3622 if ( $rOpts->{'html-pre-only'} ) {
3623
3624 # pre section goes directly to the output stream
3625 $html_pre_fh = $html_fh;
3626 $html_pre_fh->print( <<"PRE_END");
3627<pre>
3628PRE_END
3629 }
3630 else {
3631
3632 # pre section go out to a temporary string
3633 my $pre_string;
3634 $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
3635 push @pre_string_stack, \$pre_string;
3636 }
3637
3638 # pod text gets diverted if the 'pod2html' is used
3639 my $html_pod_fh;
3640 my $pod_string;
3641 if ( $rOpts->{'pod2html'} ) {
3642 if ( $rOpts->{'html-pre-only'} ) {
3643 undef $rOpts->{'pod2html'};
3644 }
3645 else {
3646 eval "use Pod::Html";
3647 if ($@) {
3648 warn
3649"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
3650 undef $rOpts->{'pod2html'};
3651 }
3652 else {
3653 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
3654 }
3655 }
3656 }
3657
3658 my $toc_filename;
3659 my $src_filename;
3660 if ( $rOpts->{'frames'} ) {
3661 unless ($extension) {
3662 warn
3663"cannot use frames without a specified output extension; ignoring -frm\n";
3664 undef $rOpts->{'frames'};
3665 }
3666 else {
3667 $toc_filename = $input_file . $html_toc_extension . $extension;
3668 $src_filename = $input_file . $html_src_extension . $extension;
3669 }
3670 }
3671
3672 # ----------------------------------------------------------
3673 # Output is now directed as follows:
3674 # html_toc_fh <-- table of contents items
3675 # html_pre_fh <-- the <pre> section of formatted code, except:
3676 # html_pod_fh <-- pod goes here with the pod2html option
3677 # ----------------------------------------------------------
3678
3679 my $title = $rOpts->{'title'};
3680 unless ($title) {
3681 ( $title, my $path ) = fileparse($input_file);
3682 }
3683 my $toc_item_count = 0;
3684 my $in_toc_package = "";
3685 my $last_level = 0;
3686 bless {
3687 _input_file => $input_file, # name of input file
3688 _title => $title, # title, unescaped
3689 _html_file => $html_file, # name of .html output file
3690 _toc_filename => $toc_filename, # for frames option
3691 _src_filename => $src_filename, # for frames option
3692 _html_file_opened => $html_file_opened, # a flag
3693 _html_fh => $html_fh, # the output stream
3694 _html_pre_fh => $html_pre_fh, # pre section goes here
3695 _rpre_string_stack => \@pre_string_stack, # stack of pre sections
3696 _html_pod_fh => $html_pod_fh, # pod goes here if pod2html
3697 _rpod_string => \$pod_string, # string holding pod
3698 _pod_cut_count => 0, # how many =cut's?
3699 _html_toc_fh => $html_toc_fh, # fh for table of contents
3700 _rtoc_string => \$toc_string, # string holding toc
3701 _rtoc_item_count => \$toc_item_count, # how many toc items
3702 _rin_toc_package => \$in_toc_package, # package name
3703 _rtoc_name_count => {}, # hash to track unique names
3704 _rpackage_stack => [], # stack to check for package
3705 # name changes
3706 _rlast_level => \$last_level, # brace indentation level
3707 }, $class;
3708}
3709
3710sub add_toc_item {
3711
3712 # Add an item to the html table of contents.
3713 # This is called even if no table of contents is written,
3714 # because we still want to put the anchors in the <pre> text.
3715 # We are given an anchor name and its type; types are:
3716 # 'package', 'sub', '__END__', '__DATA__', 'EOF'
3717 # There must be an 'EOF' call at the end to wrap things up.
3718 my $self = shift;
3719 my ( $name, $type ) = @_;
3720 my $html_toc_fh = $self->{_html_toc_fh};
3721 my $html_pre_fh = $self->{_html_pre_fh};
3722 my $rtoc_name_count = $self->{_rtoc_name_count};
3723 my $rtoc_item_count = $self->{_rtoc_item_count};
3724 my $rlast_level = $self->{_rlast_level};
3725 my $rin_toc_package = $self->{_rin_toc_package};
3726 my $rpackage_stack = $self->{_rpackage_stack};
3727
3728 # packages contain sublists of subs, so to avoid errors all package
3729 # items are written and finished with the following routines
3730 my $end_package_list = sub {
3731 if ($$rin_toc_package) {
3732 $html_toc_fh->print("</ul>\n</li>\n");
3733 $$rin_toc_package = "";
3734 }
3735 };
3736
3737 my $start_package_list = sub {
3738 my ( $unique_name, $package ) = @_;
3739 if ($$rin_toc_package) { $end_package_list->() }
3740 $html_toc_fh->print(<<EOM);
3741<li><a href=\"#$unique_name\">package $package</a>
3742<ul>
3743EOM
3744 $$rin_toc_package = $package;
3745 };
3746
3747 # start the table of contents on the first item
3748 unless ($$rtoc_item_count) {
3749
3750 # but just quit if we hit EOF without any other entries
3751 # in this case, there will be no toc
3752 return if ( $type eq 'EOF' );
3753 $html_toc_fh->print( <<"TOC_END");
3754<!-- BEGIN CODE INDEX --><a name="code-index"></a>
3755<ul>
3756TOC_END
3757 }
3758 $$rtoc_item_count++;
3759
3760 # make a unique anchor name for this location:
3761 # - packages get a 'package-' prefix
3762 # - subs use their names
3763 my $unique_name = $name;
3764 if ( $type eq 'package' ) { $unique_name = "package-$name" }
3765
3766 # append '-1', '-2', etc if necessary to make unique; this will
3767 # be unique because subs and packages cannot have a '-'
3768 if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
3769 $unique_name .= "-$count";
3770 }
3771
3772 # - all names get terminal '-' if pod2html is used, to avoid
3773 # conflicts with anchor names created by pod2html
3774 if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
3775
3776 # start/stop lists of subs
3777 if ( $type eq 'sub' ) {
3778 my $package = $rpackage_stack->[$$rlast_level];
3779 unless ($package) { $package = 'main' }
3780
3781 # if we're already in a package/sub list, be sure its the right
3782 # package or else close it
3783 if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
3784 $end_package_list->();
3785 }
3786
3787 # start a package/sub list if necessary
3788 unless ($$rin_toc_package) {
3789 $start_package_list->( $unique_name, $package );
3790 }
3791 }
3792
3793 # now write an entry in the toc for this item
3794 if ( $type eq 'package' ) {
3795 $start_package_list->( $unique_name, $name );
3796 }
3797 elsif ( $type eq 'sub' ) {
3798 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
3799 }
3800 else {
3801 $end_package_list->();
3802 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
3803 }
3804
3805 # write the anchor in the <pre> section
3806 $html_pre_fh->print("<a name=\"$unique_name\"></a>");
3807
3808 # end the table of contents, if any, on the end of file
3809 if ( $type eq 'EOF' ) {
3810 $html_toc_fh->print( <<"TOC_END");
3811</ul>
3812<!-- END CODE INDEX -->
3813TOC_END
3814 }
3815}
3816
3817BEGIN {
3818
3819 # This is the official list of tokens which may be identified by the
3820 # user. Long names are used as getopt keys. Short names are
3821 # convenient short abbreviations for specifying input. Short names
3822 # somewhat resemble token type characters, but are often different
3823 # because they may only be alphanumeric, to allow command line
3824 # input. Also, note that because of case insensitivity of html,
3825 # this table must be in a single case only (I've chosen to use all
3826 # lower case).
3827 # When adding NEW_TOKENS: update this hash table
3828 # short names => long names
3829 %short_to_long_names = (
3830 'n' => 'numeric',
3831 'p' => 'paren',
3832 'q' => 'quote',
3833 's' => 'structure',
3834 'c' => 'comment',
3835 'v' => 'v-string',
3836 'cm' => 'comma',
3837 'w' => 'bareword',
3838 'co' => 'colon',
3839 'pu' => 'punctuation',
3840 'i' => 'identifier',
3841 'j' => 'label',
3842 'h' => 'here-doc-target',
3843 'hh' => 'here-doc-text',
3844 'k' => 'keyword',
3845 'sc' => 'semicolon',
3846 'm' => 'subroutine',
3847 'pd' => 'pod-text',
3848 );
3849
3850 # Now we have to map actual token types into one of the above short
3851 # names; any token types not mapped will get 'punctuation'
3852 # properties.
3853
3854 # The values of this hash table correspond to the keys of the
3855 # previous hash table.
3856 # The keys of this hash table are token types and can be seen
3857 # by running with --dump-token-types (-dtt).
3858
3859 # When adding NEW_TOKENS: update this hash table
3860 # $type => $short_name
3861 %token_short_names = (
3862 '#' => 'c',
3863 'n' => 'n',
3864 'v' => 'v',
3865 'k' => 'k',
3866 'F' => 'k',
3867 'Q' => 'q',
3868 'q' => 'q',
3869 'J' => 'j',
3870 'j' => 'j',
3871 'h' => 'h',
3872 'H' => 'hh',
3873 'w' => 'w',
3874 ',' => 'cm',
3875 '=>' => 'cm',
3876 ';' => 'sc',
3877 ':' => 'co',
3878 'f' => 'sc',
3879 '(' => 'p',
3880 ')' => 'p',
3881 'M' => 'm',
3882 'P' => 'pd',
3883 'A' => 'co',
3884 );
3885
3886 # These token types will all be called identifiers for now
3887 # FIXME: need to separate user defined modules as separate type
3888 my @identifier = qw" i t U C Y Z G :: ";
3889 @token_short_names{@identifier} = ('i') x scalar(@identifier);
3890
3891 # These token types will be called 'structure'
3892 my @structure = qw" { } ";
3893 @token_short_names{@structure} = ('s') x scalar(@structure);
3894
3895 # OLD NOTES: save for reference
3896 # Any of these could be added later if it would be useful.
3897 # For now, they will by default become punctuation
3898 # my @list = qw" L R [ ] ";
3899 # @token_long_names{@list} = ('non-structure') x scalar(@list);
3900 #
3901 # my @list = qw"
3902 # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
3903 # ";
3904 # @token_long_names{@list} = ('math') x scalar(@list);
3905 #
3906 # my @list = qw" & &= ~ ~= ^ ^= | |= ";
3907 # @token_long_names{@list} = ('bit') x scalar(@list);
3908 #
3909 # my @list = qw" == != < > <= <=> ";
3910 # @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
3911 #
3912 # my @list = qw" && || ! &&= ||= ";
3913 # @token_long_names{@list} = ('logical') x scalar(@list);
3914 #
3915 # my @list = qw" . .= =~ !~ x x= ";
3916 # @token_long_names{@list} = ('string-operators') x scalar(@list);
3917 #
3918 # # Incomplete..
3919 # my @list = qw" .. -> <> ... \ ? ";
3920 # @token_long_names{@list} = ('misc-operators') x scalar(@list);
3921
3922}
3923
3924sub make_getopt_long_names {
3925 my $class = shift;
3926 my ($rgetopt_names) = @_;
3927 while ( my ( $short_name, $name ) = each %short_to_long_names ) {
3928 push @$rgetopt_names, "html-color-$name=s";
3929 push @$rgetopt_names, "html-italic-$name!";
3930 push @$rgetopt_names, "html-bold-$name!";
3931 }
3932 push @$rgetopt_names, "html-color-background=s";
3933 push @$rgetopt_names, "html-linked-style-sheet=s";
3934 push @$rgetopt_names, "nohtml-style-sheets";
3935 push @$rgetopt_names, "html-pre-only";
3936 push @$rgetopt_names, "html-line-numbers";
3937 push @$rgetopt_names, "html-entities!";
3938 push @$rgetopt_names, "stylesheet";
3939 push @$rgetopt_names, "html-table-of-contents!";
3940 push @$rgetopt_names, "pod2html!";
3941 push @$rgetopt_names, "frames!";
3942 push @$rgetopt_names, "html-toc-extension=s";
3943 push @$rgetopt_names, "html-src-extension=s";
3944
3945 # Pod::Html parameters:
3946 push @$rgetopt_names, "backlink=s";
3947 push @$rgetopt_names, "cachedir=s";
3948 push @$rgetopt_names, "htmlroot=s";
3949 push @$rgetopt_names, "libpods=s";
3950 push @$rgetopt_names, "podpath=s";
3951 push @$rgetopt_names, "podroot=s";
3952 push @$rgetopt_names, "title=s";
3953
3954 # Pod::Html parameters with leading 'pod' which will be removed
3955 # before the call to Pod::Html
3956 push @$rgetopt_names, "podquiet!";
3957 push @$rgetopt_names, "podverbose!";
3958 push @$rgetopt_names, "podrecurse!";
3959 push @$rgetopt_names, "podflush";
3960 push @$rgetopt_names, "podheader!";
3961 push @$rgetopt_names, "podindex!";
3962}
3963
3964sub make_abbreviated_names {
3965
3966 # We're appending things like this to the expansion list:
3967 # 'hcc' => [qw(html-color-comment)],
3968 # 'hck' => [qw(html-color-keyword)],
3969 # etc
3970 my $class = shift;
3971 my ($rexpansion) = @_;
3972
3973 # abbreviations for color/bold/italic properties
3974 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
3975 ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"];
3976 ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"];
3977 ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"];
3978 ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
3979 ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
3980 }
3981
3982 # abbreviations for all other html options
3983 ${$rexpansion}{"hcbg"} = ["html-color-background"];
3984 ${$rexpansion}{"pre"} = ["html-pre-only"];
3985 ${$rexpansion}{"toc"} = ["html-table-of-contents"];
3986 ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"];
3987 ${$rexpansion}{"nnn"} = ["html-line-numbers"];
3988 ${$rexpansion}{"hent"} = ["html-entities"];
3989 ${$rexpansion}{"nhent"} = ["nohtml-entities"];
3990 ${$rexpansion}{"css"} = ["html-linked-style-sheet"];
3991 ${$rexpansion}{"nss"} = ["nohtml-style-sheets"];
3992 ${$rexpansion}{"ss"} = ["stylesheet"];
3993 ${$rexpansion}{"pod"} = ["pod2html"];
3994 ${$rexpansion}{"npod"} = ["nopod2html"];
3995 ${$rexpansion}{"frm"} = ["frames"];
3996 ${$rexpansion}{"nfrm"} = ["noframes"];
3997 ${$rexpansion}{"text"} = ["html-toc-extension"];
3998 ${$rexpansion}{"sext"} = ["html-src-extension"];
3999}
4000
4001sub check_options {
4002
4003 # This will be called once after options have been parsed
4004 my $class = shift;
4005 $rOpts = shift;
4006
4007 # X11 color names for default settings that seemed to look ok
4008 # (these color names are only used for programming clarity; the hex
4009 # numbers are actually written)
4010 use constant ForestGreen => "#228B22";
4011 use constant SaddleBrown => "#8B4513";
4012 use constant magenta4 => "#8B008B";
4013 use constant IndianRed3 => "#CD5555";
4014 use constant DeepSkyBlue4 => "#00688B";
4015 use constant MediumOrchid3 => "#B452CD";
4016 use constant black => "#000000";
4017 use constant white => "#FFFFFF";
4018 use constant red => "#FF0000";
4019
4020 # set default color, bold, italic properties
4021 # anything not listed here will be given the default (punctuation) color --
4022 # these types currently not listed and get default: ws pu s sc cm co p
4023 # When adding NEW_TOKENS: add an entry here if you don't want defaults
4024
4025 # set_default_properties( $short_name, default_color, bold?, italic? );
4026 set_default_properties( 'c', ForestGreen, 0, 0 );
4027 set_default_properties( 'pd', ForestGreen, 0, 1 );
4028 set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown
4029 set_default_properties( 'q', IndianRed3, 0, 0 );
4030 set_default_properties( 'hh', IndianRed3, 0, 1 );
4031 set_default_properties( 'h', IndianRed3, 1, 0 );
4032 set_default_properties( 'i', DeepSkyBlue4, 0, 0 );
4033 set_default_properties( 'w', black, 0, 0 );
4034 set_default_properties( 'n', MediumOrchid3, 0, 0 );
4035 set_default_properties( 'v', MediumOrchid3, 0, 0 );
4036 set_default_properties( 'j', IndianRed3, 1, 0 );
4037 set_default_properties( 'm', red, 1, 0 );
4038
4039 set_default_color( 'html-color-background', white );
4040 set_default_color( 'html-color-punctuation', black );
4041
4042 # setup property lookup tables for tokens based on their short names
4043 # every token type has a short name, and will use these tables
4044 # to do the html markup
4045 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4046 $html_color{$short_name} = $rOpts->{"html-color-$long_name"};
4047 $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"};
4048 $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
4049 }
4050
4051 # write style sheet to STDOUT and die if requested
4052 if ( defined( $rOpts->{'stylesheet'} ) ) {
4053 write_style_sheet_file('-');
4054 exit 1;
4055 }
4056
4057 # make sure user gives a file name after -css
4058 if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
4059 $css_linkname = $rOpts->{'html-linked-style-sheet'};
4060 if ( $css_linkname =~ /^-/ ) {
4061 die "You must specify a valid filename after -css\n";
4062 }
4063 }
4064
4065 # check for conflict
4066 if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
4067 $rOpts->{'nohtml-style-sheets'} = 0;
4068 warning("You can't specify both -css and -nss; -nss ignored\n");
4069 }
4070
4071 # write a style sheet file if necessary
4072 if ($css_linkname) {
4073
4074 # if the selected filename exists, don't write, because user may
4075 # have done some work by hand to create it; use backup name instead
4076 # Also, this will avoid a potential disaster in which the user
4077 # forgets to specify the style sheet, like this:
4078 # perltidy -html -css myfile1.pl myfile2.pl
4079 # This would cause myfile1.pl to parsed as the style sheet by GetOpts
4080 my $css_filename = $css_linkname;
4081 unless ( -e $css_filename ) {
4082 write_style_sheet_file($css_filename);
4083 }
4084 }
4085 $missing_html_entities = 1 unless $rOpts->{'html-entities'};
4086}
4087
4088sub write_style_sheet_file {
4089
4090 my $css_filename = shift;
4091 my $fh;
4092 unless ( $fh = IO::File->new("> $css_filename") ) {
4093 die "can't open $css_filename: $!\n";
4094 }
4095 write_style_sheet_data($fh);
4096 eval { $fh->close };
4097}
4098
4099sub write_style_sheet_data {
4100
4101 # write the style sheet data to an open file handle
4102 my $fh = shift;
4103
4104 my $bg_color = $rOpts->{'html-color-background'};
4105 my $text_color = $rOpts->{'html-color-punctuation'};
4106
4107 # pre-bgcolor is new, and may not be defined
4108 my $pre_bg_color = $rOpts->{'html-pre-color-background'};
4109 $pre_bg_color = $bg_color unless $pre_bg_color;
4110
4111 $fh->print(<<"EOM");
4112/* default style sheet generated by perltidy */
4113body {background: $bg_color; color: $text_color}
4114pre { color: $text_color;
4115 background: $pre_bg_color;
4116 font-family: courier;
4117 }
4118
4119EOM
4120
4121 foreach my $short_name ( sort keys %short_to_long_names ) {
4122 my $long_name = $short_to_long_names{$short_name};
4123
4124 my $abbrev = '.' . $short_name;
4125 if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment
4126 my $color = $html_color{$short_name};
4127 if ( !defined($color) ) { $color = $text_color }
4128 $fh->print("$abbrev \{ color: $color;");
4129
4130 if ( $html_bold{$short_name} ) {
4131 $fh->print(" font-weight:bold;");
4132 }
4133
4134 if ( $html_italic{$short_name} ) {
4135 $fh->print(" font-style:italic;");
4136 }
4137 $fh->print("} /* $long_name */\n");
4138 }
4139}
4140
4141sub set_default_color {
4142
4143 # make sure that options hash $rOpts->{$key} contains a valid color
4144 my ( $key, $color ) = @_;
4145 if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
4146 $rOpts->{$key} = check_RGB($color);
4147}
4148
4149sub check_RGB {
4150
4151 # if color is a 6 digit hex RGB value, prepend a #, otherwise
4152 # assume that it is a valid ascii color name
4153 my ($color) = @_;
4154 if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
4155 return $color;
4156}
4157
4158sub set_default_properties {
4159 my ( $short_name, $color, $bold, $italic ) = @_;
4160
4161 set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
4162 my $key;
4163 $key = "html-bold-$short_to_long_names{$short_name}";
4164 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
4165 $key = "html-italic-$short_to_long_names{$short_name}";
4166 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
4167}
4168
4169sub pod_to_html {
4170
4171 # Use Pod::Html to process the pod and make the page
4172 # then merge the perltidy code sections into it.
4173 # return 1 if success, 0 otherwise
4174 my $self = shift;
4175 my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
4176 my $input_file = $self->{_input_file};
4177 my $title = $self->{_title};
4178 my $success_flag = 0;
4179
4180 # don't try to use pod2html if no pod
4181 unless ($pod_string) {
4182 return $success_flag;
4183 }
4184
4185 # Pod::Html requires a real temporary filename
4186 # If we are making a frame, we have a name available
4187 # Otherwise, we have to fine one
4188 my $tmpfile;
4189 if ( $rOpts->{'frames'} ) {
4190 $tmpfile = $self->{_toc_filename};
4191 }
4192 else {
4193 $tmpfile = Perl::Tidy::make_temporary_filename();
4194 }
4195 my $fh_tmp = IO::File->new( $tmpfile, 'w' );
4196 unless ($fh_tmp) {
4197 warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4198 return $success_flag;
4199 }
4200
4201 #------------------------------------------------------------------
4202 # Warning: a temporary file is open; we have to clean up if
4203 # things go bad. From here on all returns should be by going to
4204 # RETURN so that the temporary file gets unlinked.
4205 #------------------------------------------------------------------
4206
4207 # write the pod text to the temporary file
4208 $fh_tmp->print($pod_string);
4209 $fh_tmp->close();
4210
4211 # Hand off the pod to pod2html.
4212 # Note that we can use the same temporary filename for input and output
4213 # because of the way pod2html works.
4214 {
4215
4216 my @args;
4217 push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
4218 my $kw;
4219
4220 # Flags with string args:
4221 # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
4222 # "podpath=s", "podroot=s"
4223 # Note: -css=s is handled by perltidy itself
4224 foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
4225 if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
4226 }
4227
4228 # Toggle switches; these have extra leading 'pod'
4229 # "header!", "index!", "recurse!", "quiet!", "verbose!"
4230 foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
4231 my $kwd = $kw; # allows us to strip 'pod'
4232 if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
4233 elsif ( defined( $rOpts->{$kw} ) ) {
4234 $kwd =~ s/^pod//;
4235 push @args, "--no$kwd";
4236 }
4237 }
4238
4239 # "flush",
4240 $kw = 'podflush';
4241 if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
4242
4243 # Must clean up if pod2html dies (it can);
4244 # Be careful not to overwrite callers __DIE__ routine
4245 local $SIG{__DIE__} = sub {
4246 print $_[0];
4247 unlink $tmpfile if -e $tmpfile;
4248 exit 1;
4249 };
4250
4251 pod2html(@args);
4252 }
4253 $fh_tmp = IO::File->new( $tmpfile, 'r' );
4254 unless ($fh_tmp) {
4255
4256 # this error shouldn't happen ... we just used this filename
4257 warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4258 goto RETURN;
4259 }
4260
4261 my $html_fh = $self->{_html_fh};
4262 my @toc;
4263 my $in_toc;
4264 my $no_print;
4265
4266 # This routine will write the html selectively and store the toc
4267 my $html_print = sub {
4268 foreach (@_) {
4269 $html_fh->print($_) unless ($no_print);
4270 if ($in_toc) { push @toc, $_ }
4271 }
4272 };
4273
4274 # loop over lines of html output from pod2html and merge in
4275 # the necessary perltidy html sections
4276 my ( $saw_body, $saw_index, $saw_body_end );
4277 while ( my $line = $fh_tmp->getline() ) {
4278
4279 if ( $line =~ /^\s*<html>\s*$/i ) {
4280 my $date = localtime;
4281 $html_print->("<!-- Generated by perltidy on $date -->\n");
4282 $html_print->($line);
4283 }
4284
4285 # Copy the perltidy css, if any, after <body> tag
4286 elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
4287 $saw_body = 1;
4288 $html_print->($css_string) if $css_string;
4289 $html_print->($line);
4290
4291 # add a top anchor and heading
4292 $html_print->("<a name=\"-top-\"></a>\n");
4293 $title = escape_html($title);
4294 $html_print->("<h1>$title</h1>\n");
4295 }
4296 elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
4297 $in_toc = 1;
4298
4299 # when frames are used, an extra table of contents in the
4300 # contents panel is confusing, so don't print it
4301 $no_print = $rOpts->{'frames'}
4302 || !$rOpts->{'html-table-of-contents'};
4303 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
4304 $html_print->($line);
4305 }
4306
4307 # Copy the perltidy toc, if any, after the Pod::Html toc
4308 elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
4309 $saw_index = 1;
4310 $html_print->($line);
4311 if ($toc_string) {
4312 $html_print->("<hr />\n") if $rOpts->{'frames'};
4313 $html_print->("<h2>Code Index:</h2>\n");
4314 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
4315 $html_print->(@toc);
4316 }
4317 $in_toc = 0;
4318 $no_print = 0;
4319 }
4320
4321 # Copy one perltidy section after each marker
4322 elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
4323 $line = $2;
4324 $html_print->($1) if $1;
4325
4326 # Intermingle code and pod sections if we saw multiple =cut's.
4327 if ( $self->{_pod_cut_count} > 1 ) {
4328 my $rpre_string = shift(@$rpre_string_stack);
4329 if ($$rpre_string) {
4330 $html_print->('<pre>');
4331 $html_print->($$rpre_string);
4332 $html_print->('</pre>');
4333 }
4334 else {
4335
4336 # shouldn't happen: we stored a string before writing
4337 # each marker.
4338 warn
4339"Problem merging html stream with pod2html; order may be wrong\n";
4340 }
4341 $html_print->($line);
4342 }
4343
4344 # If didn't see multiple =cut lines, we'll put the pod out first
4345 # and then the code, because it's less confusing.
4346 else {
4347
4348 # since we are not intermixing code and pod, we don't need
4349 # or want any <hr> lines which separated pod and code
4350 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
4351 }
4352 }
4353
4354 # Copy any remaining code section before the </body> tag
4355 elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
4356 $saw_body_end = 1;
4357 if (@$rpre_string_stack) {
4358 unless ( $self->{_pod_cut_count} > 1 ) {
4359 $html_print->('<hr />');
4360 }
4361 while ( my $rpre_string = shift(@$rpre_string_stack) ) {
4362 $html_print->('<pre>');
4363 $html_print->($$rpre_string);
4364 $html_print->('</pre>');
4365 }
4366 }
4367 $html_print->($line);
4368 }
4369 else {
4370 $html_print->($line);
4371 }
4372 }
4373
4374 $success_flag = 1;
4375 unless ($saw_body) {
4376 warn "Did not see <body> in pod2html output\n";
4377 $success_flag = 0;
4378 }
4379 unless ($saw_body_end) {
4380 warn "Did not see </body> in pod2html output\n";
4381 $success_flag = 0;
4382 }
4383 unless ($saw_index) {
4384 warn "Did not find INDEX END in pod2html output\n";
4385 $success_flag = 0;
4386 }
4387
4388 RETURN:
4389 eval { $html_fh->close() };
4390
4391 # note that we have to unlink tmpfile before making frames
4392 # because the tmpfile may be one of the names used for frames
4393 unlink $tmpfile if -e $tmpfile;
4394 if ( $success_flag && $rOpts->{'frames'} ) {
4395 $self->make_frame( \@toc );
4396 }
4397 return $success_flag;
4398}
4399
4400sub make_frame {
4401
4402 # Make a frame with table of contents in the left panel
4403 # and the text in the right panel.
4404 # On entry:
4405 # $html_filename contains the no-frames html output
4406 # $rtoc is a reference to an array with the table of contents
4407 my $self = shift;
4408 my ($rtoc) = @_;
4409 my $input_file = $self->{_input_file};
4410 my $html_filename = $self->{_html_file};
4411 my $toc_filename = $self->{_toc_filename};
4412 my $src_filename = $self->{_src_filename};
4413 my $title = $self->{_title};
4414 $title = escape_html($title);
4415
4416 # FUTURE input parameter:
4417 my $top_basename = "";
4418
4419 # We need to produce 3 html files:
4420 # 1. - the table of contents
4421 # 2. - the contents (source code) itself
4422 # 3. - the frame which contains them
4423
4424 # get basenames for relative links
4425 my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
4426 my ( $src_basename, $src_path ) = fileparse($src_filename);
4427
4428 # 1. Make the table of contents panel, with appropriate changes
4429 # to the anchor names
4430 my $src_frame_name = 'SRC';
4431 my $first_anchor =
4432 write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
4433 $src_frame_name );
4434
4435 # 2. The current .html filename is renamed to be the contents panel
4436 rename( $html_filename, $src_filename )
4437 or die "Cannot rename $html_filename to $src_filename:$!\n";
4438
4439 # 3. Then use the original html filename for the frame
4440 write_frame_html(
4441 $title, $html_filename, $top_basename,
4442 $toc_basename, $src_basename, $src_frame_name
4443 );
4444}
4445
4446sub write_toc_html {
4447
4448 # write a separate html table of contents file for frames
4449 my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
4450 my $fh = IO::File->new( $toc_filename, 'w' )
4451 or die "Cannot open $toc_filename:$!\n";
4452 $fh->print(<<EOM);
4453<html>
4454<head>
4455<title>$title</title>
4456</head>
4457<body>
4458<h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
4459EOM
4460
4461 my $first_anchor =
4462 change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
4463 $fh->print( join "", @$rtoc );
4464
4465 $fh->print(<<EOM);
4466</body>
4467</html>
4468EOM
4469
4470}
4471
4472sub write_frame_html {
4473
4474 # write an html file to be the table of contents frame
4475 my (
4476 $title, $frame_filename, $top_basename,
4477 $toc_basename, $src_basename, $src_frame_name
4478 )
4479 = @_;
4480
4481 my $fh = IO::File->new( $frame_filename, 'w' )
4482 or die "Cannot open $toc_basename:$!\n";
4483
4484 $fh->print(<<EOM);
4485<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
4486 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
4487<?xml version="1.0" encoding="iso-8859-1" ?>
4488<html xmlns="http://www.w3.org/1999/xhtml">
4489<head>
4490<title>$title</title>
4491</head>
4492EOM
4493
4494 # two left panels, one right, if master index file
4495 if ($top_basename) {
4496 $fh->print(<<EOM);
4497<frameset cols="20%,80%">
4498<frameset rows="30%,70%">
4499<frame src = "$top_basename" />
4500<frame src = "$toc_basename" />
4501</frameset>
4502EOM
4503 }
4504
4505 # one left panels, one right, if no master index file
4506 else {
4507 $fh->print(<<EOM);
4508<frameset cols="20%,*">
4509<frame src = "$toc_basename" />
4510EOM
4511 }
4512 $fh->print(<<EOM);
4513<frame src = "$src_basename" name = "$src_frame_name" />
4514<noframes>
4515<body>
4516<p>If you see this message, you are using a non-frame-capable web client.</p>
4517<p>This document contains:</p>
4518<ul>
4519<li><a href="$toc_basename">A table of contents</a></li>
4520<li><a href="$src_basename">The source code</a></li>
4521</ul>
4522</body>
4523</noframes>
4524</frameset>
4525</html>
4526EOM
4527}
4528
4529sub change_anchor_names {
4530
4531 # add a filename and target to anchors
4532 # also return the first anchor
4533 my ( $rlines, $filename, $target ) = @_;
4534 my $first_anchor;
4535 foreach my $line (@$rlines) {
4536
4537 # We're looking for lines like this:
4538 # <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
4539 # ---- - -------- -----------------
4540 # $1 $4 $5
4541 if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
4542 my $pre = $1;
4543 my $name = $4;
4544 my $post = $5;
4545 my $href = "$filename#$name";
4546 $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
4547 unless ($first_anchor) { $first_anchor = $href }
4548 }
4549 }
4550 return $first_anchor;
4551}
4552
4553sub close_html_file {
4554 my $self = shift;
4555 return unless $self->{_html_file_opened};
4556
4557 my $html_fh = $self->{_html_fh};
4558 my $rtoc_string = $self->{_rtoc_string};
4559
4560 # There are 3 basic paths to html output...
4561
4562 # ---------------------------------
4563 # Path 1: finish up if in -pre mode
4564 # ---------------------------------
4565 if ( $rOpts->{'html-pre-only'} ) {
4566 $html_fh->print( <<"PRE_END");
4567</pre>
4568PRE_END
4569 eval { $html_fh->close() };
4570 return;
4571 }
4572
4573 # Finish the index
4574 $self->add_toc_item( 'EOF', 'EOF' );
4575
4576 my $rpre_string_stack = $self->{_rpre_string_stack};
4577
4578 # Patch to darken the <pre> background color in case of pod2html and
4579 # interleaved code/documentation. Otherwise, the distinction
4580 # between code and documentation is blurred.
4581 if ( $rOpts->{pod2html}
4582 && $self->{_pod_cut_count} >= 1
4583 && $rOpts->{'html-color-background'} eq '#FFFFFF' )
4584 {
4585 $rOpts->{'html-pre-color-background'} = '#F0F0F0';
4586 }
4587
4588 # put the css or its link into a string, if used
4589 my $css_string;
4590 my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
4591
4592 # use css linked to another file
4593 if ( $rOpts->{'html-linked-style-sheet'} ) {
4594 $fh_css->print(
4595 qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
4596 );
4597 }
4598
4599 # use css embedded in this file
4600 elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
4601 $fh_css->print( <<'ENDCSS');
4602<style type="text/css">
4603<!--
4604ENDCSS
4605 write_style_sheet_data($fh_css);
4606 $fh_css->print( <<"ENDCSS");
4607-->
4608</style>
4609ENDCSS
4610 }
4611
4612 # -----------------------------------------------------------
4613 # path 2: use pod2html if requested
4614 # If we fail for some reason, continue on to path 3
4615 # -----------------------------------------------------------
4616 if ( $rOpts->{'pod2html'} ) {
4617 my $rpod_string = $self->{_rpod_string};
4618 $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
4619 $rpre_string_stack )
4620 && return;
4621 }
4622
4623 # --------------------------------------------------
4624 # path 3: write code in html, with pod only in italics
4625 # --------------------------------------------------
4626 my $input_file = $self->{_input_file};
4627 my $title = escape_html($input_file);
4628 my $date = localtime;
4629 $html_fh->print( <<"HTML_START");
4630<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4631 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
4632<!-- Generated by perltidy on $date -->
4633<html xmlns="http://www.w3.org/1999/xhtml">
4634<head>
4635<title>$title</title>
4636HTML_START
4637
4638 # output the css, if used
4639 if ($css_string) {
4640 $html_fh->print($css_string);
4641 $html_fh->print( <<"ENDCSS");
4642</head>
4643<body>
4644ENDCSS
4645 }
4646 else {
4647
4648 $html_fh->print( <<"HTML_START");
4649</head>
4650<body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
4651HTML_START
4652 }
4653
4654 $html_fh->print("<a name=\"-top-\"></a>\n");
4655 $html_fh->print( <<"EOM");
4656<h1>$title</h1>
4657EOM
4658
4659 # copy the table of contents
4660 if ( $$rtoc_string
4661 && !$rOpts->{'frames'}
4662 && $rOpts->{'html-table-of-contents'} )
4663 {
4664 $html_fh->print($$rtoc_string);
4665 }
4666
4667 # copy the pre section(s)
4668 my $fname_comment = $input_file;
4669 $fname_comment =~ s/--+/-/g; # protect HTML comment tags
4670 $html_fh->print( <<"END_PRE");
4671<hr />
4672<!-- contents of filename: $fname_comment -->
4673<pre>
4674END_PRE
4675
4676 foreach my $rpre_string (@$rpre_string_stack) {
4677 $html_fh->print($$rpre_string);
4678 }
4679
4680 # and finish the html page
4681 $html_fh->print( <<"HTML_END");
4682</pre>
4683</body>
4684</html>
4685HTML_END
4686 eval { $html_fh->close() }; # could be object without close method
4687
4688 if ( $rOpts->{'frames'} ) {
4689 my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
4690 $self->make_frame( \@toc );
4691 }
4692}
4693
4694sub markup_tokens {
4695 my $self = shift;
4696 my ( $rtokens, $rtoken_type, $rlevels ) = @_;
4697 my ( @colored_tokens, $j, $string, $type, $token, $level );
4698 my $rlast_level = $self->{_rlast_level};
4699 my $rpackage_stack = $self->{_rpackage_stack};
4700
4701 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
4702 $type = $$rtoken_type[$j];
4703 $token = $$rtokens[$j];
4704 $level = $$rlevels[$j];
4705 $level = 0 if ( $level < 0 );
4706
4707 #-------------------------------------------------------
4708 # Update the package stack. The package stack is needed to keep
4709 # the toc correct because some packages may be declared within
4710 # blocks and go out of scope when we leave the block.
4711 #-------------------------------------------------------
4712 if ( $level > $$rlast_level ) {
4713 unless ( $rpackage_stack->[ $level - 1 ] ) {
4714 $rpackage_stack->[ $level - 1 ] = 'main';
4715 }
4716 $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
4717 }
4718 elsif ( $level < $$rlast_level ) {
4719 my $package = $rpackage_stack->[$level];
4720 unless ($package) { $package = 'main' }
4721
4722 # if we change packages due to a nesting change, we
4723 # have to make an entry in the toc
4724 if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
4725 $self->add_toc_item( $package, 'package' );
4726 }
4727 }
4728 $$rlast_level = $level;
4729
4730 #-------------------------------------------------------
4731 # Intercept a sub name here; split it
4732 # into keyword 'sub' and sub name; and add an
4733 # entry in the toc
4734 #-------------------------------------------------------
4735 if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
4736 $token = $self->markup_html_element( $1, 'k' );
4737 push @colored_tokens, $token;
4738 $token = $2;
4739 $type = 'M';
4740
4741 # but don't include sub declarations in the toc;
4742 # these wlll have leading token types 'i;'
4743 my $signature = join "", @$rtoken_type;
4744 unless ( $signature =~ /^i;/ ) {
4745 my $subname = $token;
4746 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
4747 $self->add_toc_item( $subname, 'sub' );
4748 }
4749 }
4750
4751 #-------------------------------------------------------
4752 # Intercept a package name here; split it
4753 # into keyword 'package' and name; add to the toc,
4754 # and update the package stack
4755 #-------------------------------------------------------
4756 if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
4757 $token = $self->markup_html_element( $1, 'k' );
4758 push @colored_tokens, $token;
4759 $token = $2;
4760 $type = 'i';
4761 $self->add_toc_item( "$token", 'package' );
4762 $rpackage_stack->[$level] = $token;
4763 }
4764
4765 $token = $self->markup_html_element( $token, $type );
4766 push @colored_tokens, $token;
4767 }
4768 return ( \@colored_tokens );
4769}
4770
4771sub markup_html_element {
4772 my $self = shift;
4773 my ( $token, $type ) = @_;
4774
4775 return $token if ( $type eq 'b' ); # skip a blank token
4776 return $token if ( $token =~ /^\s*$/ ); # skip a blank line
4777 $token = escape_html($token);
4778
4779 # get the short abbreviation for this token type
4780 my $short_name = $token_short_names{$type};
4781 if ( !defined($short_name) ) {
4782 $short_name = "pu"; # punctuation is default
4783 }
4784
4785 # handle style sheets..
4786 if ( !$rOpts->{'nohtml-style-sheets'} ) {
4787 if ( $short_name ne 'pu' ) {
4788 $token = qq(<span class="$short_name">) . $token . "</span>";
4789 }
4790 }
4791
4792 # handle no style sheets..
4793 else {
4794 my $color = $html_color{$short_name};
4795
4796 if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
4797 $token = qq(<font color="$color">) . $token . "</font>";
4798 }
4799 if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
4800 if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" }
4801 }
4802 return $token;
4803}
4804
4805sub escape_html {
4806
4807 my $token = shift;
4808 if ($missing_html_entities) {
4809 $token =~ s/\&/&amp;/g;
4810 $token =~ s/\</&lt;/g;
4811 $token =~ s/\>/&gt;/g;
4812 $token =~ s/\"/&quot;/g;
4813 }
4814 else {
4815 HTML::Entities::encode_entities($token);
4816 }
4817 return $token;
4818}
4819
4820sub finish_formatting {
4821
4822 # called after last line
4823 my $self = shift;
4824 $self->close_html_file();
4825 return;
4826}
4827
4828sub write_line {
4829
4830 my $self = shift;
4831 return unless $self->{_html_file_opened};
4832 my $html_pre_fh = $self->{_html_pre_fh};
4833 my ($line_of_tokens) = @_;
4834 my $line_type = $line_of_tokens->{_line_type};
4835 my $input_line = $line_of_tokens->{_line_text};
4836 my $line_number = $line_of_tokens->{_line_number};
4837 chomp $input_line;
4838
4839 # markup line of code..
4840 my $html_line;
4841 if ( $line_type eq 'CODE' ) {
4842 my $rtoken_type = $line_of_tokens->{_rtoken_type};
4843 my $rtokens = $line_of_tokens->{_rtokens};
4844 my $rlevels = $line_of_tokens->{_rlevels};
4845
4846 if ( $input_line =~ /(^\s*)/ ) {
4847 $html_line = $1;
4848 }
4849 else {
4850 $html_line = "";
4851 }
4852 my ($rcolored_tokens) =
4853 $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
4854 $html_line .= join '', @$rcolored_tokens;
4855 }
4856
4857 # markup line of non-code..
4858 else {
4859 my $line_character;
4860 if ( $line_type eq 'HERE' ) { $line_character = 'H' }
4861 elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' }
4862 elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' }
4863 elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
4864 elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' }
4865 elsif ( $line_type eq 'END_START' ) {
4866 $line_character = 'k';
4867 $self->add_toc_item( '__END__', '__END__' );
4868 }
4869 elsif ( $line_type eq 'DATA_START' ) {
4870 $line_character = 'k';
4871 $self->add_toc_item( '__DATA__', '__DATA__' );
4872 }
4873 elsif ( $line_type =~ /^POD/ ) {
4874 $line_character = 'P';
4875 if ( $rOpts->{'pod2html'} ) {
4876 my $html_pod_fh = $self->{_html_pod_fh};
4877 if ( $line_type eq 'POD_START' ) {
4878
4879 my $rpre_string_stack = $self->{_rpre_string_stack};
4880 my $rpre_string = $rpre_string_stack->[-1];
4881
4882 # if we have written any non-blank lines to the
4883 # current pre section, start writing to a new output
4884 # string
4885 if ( $$rpre_string =~ /\S/ ) {
4886 my $pre_string;
4887 $html_pre_fh =
4888 Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4889 $self->{_html_pre_fh} = $html_pre_fh;
4890 push @$rpre_string_stack, \$pre_string;
4891
4892 # leave a marker in the pod stream so we know
4893 # where to put the pre section we just
4894 # finished.
4895 my $for_html = '=for html'; # don't confuse pod utils
4896 $html_pod_fh->print(<<EOM);
4897
4898$for_html
4899<!-- pERLTIDY sECTION -->
4900
4901EOM
4902 }
4903
4904 # otherwise, just clear the current string and start
4905 # over
4906 else {
4907 $$rpre_string = "";
4908 $html_pod_fh->print("\n");
4909 }
4910 }
4911 $html_pod_fh->print( $input_line . "\n" );
4912 if ( $line_type eq 'POD_END' ) {
4913 $self->{_pod_cut_count}++;
4914 $html_pod_fh->print("\n");
4915 }
4916 return;
4917 }
4918 }
4919 else { $line_character = 'Q' }
4920 $html_line = $self->markup_html_element( $input_line, $line_character );
4921 }
4922
4923 # add the line number if requested
4924 if ( $rOpts->{'html-line-numbers'} ) {
4925 my $extra_space .=
4926 ( $line_number < 10 ) ? " "
4927 : ( $line_number < 100 ) ? " "
4928 : ( $line_number < 1000 ) ? " "
4929 : "";
4930 $html_line = $extra_space . $line_number . " " . $html_line;
4931 }
4932
4933 # write the line
4934 $html_pre_fh->print("$html_line\n");
4935}
4936
4937#####################################################################
4938#
4939# The Perl::Tidy::Formatter package adds indentation, whitespace, and
4940# line breaks to the token stream
4941#
4942# WARNING: This is not a real class for speed reasons. Only one
4943# Formatter may be used.
4944#
4945#####################################################################
4946
4947package Perl::Tidy::Formatter;
4948
4949BEGIN {
4950
4951 # Caution: these debug flags produce a lot of output
4952 # They should all be 0 except when debugging small scripts
4953 use constant FORMATTER_DEBUG_FLAG_BOND => 0;
4954 use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
4955 use constant FORMATTER_DEBUG_FLAG_CI => 0;
4956 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
4957 use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
4958 use constant FORMATTER_DEBUG_FLAG_LIST => 0;
4959 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
4960 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
4961 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
4962 use constant FORMATTER_DEBUG_FLAG_STORE => 0;
4963 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
4964 use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
4965
4966 my $debug_warning = sub {
4967 print "FORMATTER_DEBUGGING with key $_[0]\n";
4968 };
4969
4970 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
4971 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
4972 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
4973 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
4974 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
4975 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
4976 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
4977 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
4978 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
4979 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
4980 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
4981 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
4982}
4983
4984use Carp;
4985use vars qw{
4986
4987 @gnu_stack
4988 $max_gnu_stack_index
4989 $gnu_position_predictor
4990 $line_start_index_to_go
4991 $last_indentation_written
4992 $last_unadjusted_indentation
4993 $last_leading_token
4994
4995 $saw_VERSION_in_this_file
4996 $saw_END_or_DATA_
4997
4998 @gnu_item_list
4999 $max_gnu_item_index
5000 $gnu_sequence_number
5001 $last_output_indentation
5002 %last_gnu_equals
5003 %gnu_comma_count
5004 %gnu_arrow_count
5005
5006 @block_type_to_go
5007 @type_sequence_to_go
5008 @container_environment_to_go
5009 @bond_strength_to_go
5010 @forced_breakpoint_to_go
5011 @lengths_to_go
5012 @levels_to_go
5013 @leading_spaces_to_go
5014 @reduced_spaces_to_go
5015 @matching_token_to_go
5016 @mate_index_to_go
5017 @nesting_blocks_to_go
5018 @ci_levels_to_go
5019 @nesting_depth_to_go
5020 @nobreak_to_go
5021 @old_breakpoint_to_go
5022 @tokens_to_go
5023 @types_to_go
5024
5025 %saved_opening_indentation
5026
5027 $max_index_to_go
5028 $comma_count_in_batch
5029 $old_line_count_in_batch
5030 $last_nonblank_index_to_go
5031 $last_nonblank_type_to_go
5032 $last_nonblank_token_to_go
5033 $last_last_nonblank_index_to_go
5034 $last_last_nonblank_type_to_go
5035 $last_last_nonblank_token_to_go
5036 @nonblank_lines_at_depth
5037 $starting_in_quote
5038
5039 $forced_breakpoint_count
5040 $forced_breakpoint_undo_count
5041 @forced_breakpoint_undo_stack
5042 %postponed_breakpoint
5043
5044 $tabbing
5045 $embedded_tab_count
5046 $first_embedded_tab_at
5047 $last_embedded_tab_at
5048 $deleted_semicolon_count
5049 $first_deleted_semicolon_at
5050 $last_deleted_semicolon_at
5051 $added_semicolon_count
5052 $first_added_semicolon_at
5053 $last_added_semicolon_at
5054 $saw_negative_indentation
5055 $first_tabbing_disagreement
5056 $last_tabbing_disagreement
5057 $in_tabbing_disagreement
5058 $tabbing_disagreement_count
5059 $input_line_tabbing
5060
5061 $last_line_type
5062 $last_line_leading_type
5063 $last_line_leading_level
5064 $last_last_line_leading_level
5065
5066 %block_leading_text
5067 %block_opening_line_number
5068 $csc_new_statement_ok
5069 $accumulating_text_for_block
5070 $leading_block_text
5071 $rleading_block_if_elsif_text
5072 $leading_block_text_level
5073 $leading_block_text_length_exceeded
5074 $leading_block_text_line_length
5075 $leading_block_text_line_number
5076 $closing_side_comment_prefix_pattern
5077 $closing_side_comment_list_pattern
5078
5079 $last_nonblank_token
5080 $last_nonblank_type
5081 $last_last_nonblank_token
5082 $last_last_nonblank_type
5083 $last_nonblank_block_type
5084 $last_output_level
5085 %is_do_follower
5086 %is_if_brace_follower
5087 %space_after_keyword
5088 $rbrace_follower
5089 $looking_for_else
5090 %is_last_next_redo_return
5091 %is_other_brace_follower
5092 %is_else_brace_follower
5093 %is_anon_sub_brace_follower
5094 %is_anon_sub_1_brace_follower
5095 %is_sort_map_grep
5096 %is_sort_map_grep_eval
5097 %is_sort_map_grep_eval_do
5098 %is_block_without_semicolon
5099 %is_if_unless
5100 %is_and_or
5101 %is_assignment
5102 %is_chain_operator
5103 %is_if_unless_and_or_last_next_redo_return
5104
5105 @has_broken_sublist
5106 @dont_align
5107 @want_comma_break
5108
5109 $index_start_one_line_block
5110 $semicolons_before_block_self_destruct
5111 $index_max_forced_break
5112 $input_line_number
5113 $diagnostics_object
5114 $vertical_aligner_object
5115 $logger_object
5116 $file_writer_object
5117 $formatter_self
5118 @ci_stack
5119 $last_line_had_side_comment
5120 %want_break_before
5121 %outdent_keyword
5122 $static_block_comment_pattern
5123 $static_side_comment_pattern
5124 %opening_vertical_tightness
5125 %closing_vertical_tightness
5126 %closing_token_indentation
5127 $block_brace_vertical_tightness_pattern
5128
5129 $rOpts_add_newlines
5130 $rOpts_add_whitespace
5131 $rOpts_block_brace_tightness
5132 $rOpts_block_brace_vertical_tightness
5133 $rOpts_brace_left_and_indent
5134 $rOpts_comma_arrow_breakpoints
5135 $rOpts_break_at_old_keyword_breakpoints
5136 $rOpts_break_at_old_comma_breakpoints
5137 $rOpts_break_at_old_logical_breakpoints
5138 $rOpts_break_at_old_trinary_breakpoints
5139 $rOpts_closing_side_comment_else_flag
5140 $rOpts_closing_side_comment_maximum_text
5141 $rOpts_continuation_indentation
5142 $rOpts_cuddled_else
5143 $rOpts_delete_old_whitespace
5144 $rOpts_fuzzy_line_length
5145 $rOpts_indent_columns
5146 $rOpts_line_up_parentheses
5147 $rOpts_maximum_fields_per_table
5148 $rOpts_maximum_line_length
5149 $rOpts_short_concatenation_item_length
5150 $rOpts_swallow_optional_blank_lines
5151 $rOpts_ignore_old_line_breaks
5152
5153 $half_maximum_line_length
5154
5155 %is_opening_type
5156 %is_closing_type
5157 %is_keyword_returning_list
5158 %tightness
5159 %matching_token
5160 $rOpts
5161 %right_bond_strength
5162 %left_bond_strength
5163 %binary_ws_rules
5164 %want_left_space
5165 %want_right_space
5166 %is_digraph
5167 %is_trigraph
5168 $bli_pattern
5169 $bli_list_string
5170 %is_closing_type
5171 %is_opening_type
5172 %is_closing_token
5173 %is_opening_token
5174};
5175
5176BEGIN {
5177
5178 # default list of block types for which -bli would apply
5179 $bli_list_string = 'if else elsif unless while for foreach do : sub';
5180
5181 @_ = qw(
5182 .. :: << >> ** && .. || -> => += -= .= %= &= |= ^= *= <>
5183 <= >= == =~ !~ != ++ -- /= x=
5184 );
5185 @is_digraph{@_} = (1) x scalar(@_);
5186
5187 @_ = qw( ... **= <<= >>= &&= ||= <=> );
5188 @is_trigraph{@_} = (1) x scalar(@_);
5189
5190 @_ = qw(
5191 = **= += *= &= <<= &&=
5192 -= /= |= >>= ||=
5193 .= %= ^=
5194 x=
5195 );
5196 @is_assignment{@_} = (1) x scalar(@_);
5197
5198 @_ = qw(
5199 grep
5200 keys
5201 map
5202 reverse
5203 sort
5204 split
5205 );
5206 @is_keyword_returning_list{@_} = (1) x scalar(@_);
5207
5208 @_ = qw(is if unless and or last next redo return);
5209 @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
5210
5211 @_ = qw(last next redo return);
5212 @is_last_next_redo_return{@_} = (1) x scalar(@_);
5213
5214 @_ = qw(sort map grep);
5215 @is_sort_map_grep{@_} = (1) x scalar(@_);
5216
5217 @_ = qw(sort map grep eval);
5218 @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
5219
5220 @_ = qw(sort map grep eval do);
5221 @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
5222
5223 @_ = qw(if unless);
5224 @is_if_unless{@_} = (1) x scalar(@_);
5225
5226 @_ = qw(and or);
5227 @is_and_or{@_} = (1) x scalar(@_);
5228
5229 # We can remove semicolons after blocks preceded by these keywords
5230 @_ = qw(BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
5231 unless while until for foreach);
5232 @is_block_without_semicolon{@_} = (1) x scalar(@_);
5233
5234 # 'L' is token for opening { at hash key
5235 @_ = qw" L { ( [ ";
5236 @is_opening_type{@_} = (1) x scalar(@_);
5237
5238 # 'R' is token for closing } at hash key
5239 @_ = qw" R } ) ] ";
5240 @is_closing_type{@_} = (1) x scalar(@_);
5241
5242 @_ = qw" { ( [ ";
5243 @is_opening_token{@_} = (1) x scalar(@_);
5244
5245 @_ = qw" } ) ] ";
5246 @is_closing_token{@_} = (1) x scalar(@_);
5247}
5248
5249# whitespace codes
5250use constant WS_YES => 1;
5251use constant WS_OPTIONAL => 0;
5252use constant WS_NO => -1;
5253
5254# Token bond strengths.
5255use constant NO_BREAK => 10000;
5256use constant VERY_STRONG => 100;
5257use constant STRONG => 2.1;
5258use constant NOMINAL => 1.1;
5259use constant WEAK => 0.8;
5260use constant VERY_WEAK => 0.55;
5261
5262# values for testing indexes in output array
5263use constant UNDEFINED_INDEX => -1;
5264
5265# Maximum number of little messages; probably need not be changed.
5266use constant MAX_NAG_MESSAGES => 6;
5267
5268# increment between sequence numbers for each type
5269# For example, ?: pairs might have numbers 7,11,15,...
5270use constant TYPE_SEQUENCE_INCREMENT => 4;
5271
5272{
5273
5274 # methods to count instances
5275 my $_count = 0;
5276 sub get_count { $_count; }
5277 sub _increment_count { ++$_count }
5278 sub _decrement_count { --$_count }
5279}
5280
5281# interface to Perl::Tidy::Logger routines
5282sub warning {
5283 if ($logger_object) {
5284 $logger_object->warning(@_);
5285 }
5286}
5287
5288sub complain {
5289 if ($logger_object) {
5290 $logger_object->complain(@_);
5291 }
5292}
5293
5294sub write_logfile_entry {
5295 if ($logger_object) {
5296 $logger_object->write_logfile_entry(@_);
5297 }
5298}
5299
5300sub black_box {
5301 if ($logger_object) {
5302 $logger_object->black_box(@_);
5303 }
5304}
5305
5306sub report_definite_bug {
5307 if ($logger_object) {
5308 $logger_object->report_definite_bug();
5309 }
5310}
5311
5312sub get_saw_brace_error {
5313 if ($logger_object) {
5314 $logger_object->get_saw_brace_error();
5315 }
5316}
5317
5318sub we_are_at_the_last_line {
5319 if ($logger_object) {
5320 $logger_object->we_are_at_the_last_line();
5321 }
5322}
5323
5324# interface to Perl::Tidy::Diagnostics routine
5325sub write_diagnostics {
5326
5327 if ($diagnostics_object) {
5328 $diagnostics_object->write_diagnostics(@_);
5329 }
5330}
5331
5332sub get_added_semicolon_count {
5333 my $self = shift;
5334 return $added_semicolon_count;
5335}
5336
5337sub DESTROY {
5338 $_[0]->_decrement_count();
5339}
5340
5341sub new {
5342
5343 my $class = shift;
5344
5345 # we are given an object with a write_line() method to take lines
5346 my %defaults = (
5347 sink_object => undef,
5348 diagnostics_object => undef,
5349 logger_object => undef,
5350 );
5351 my %args = ( %defaults, @_ );
5352
5353 $logger_object = $args{logger_object};
5354 $diagnostics_object = $args{diagnostics_object};
5355
5356 # we create another object with a get_line() and peek_ahead() method
5357 my $sink_object = $args{sink_object};
5358 $file_writer_object =
5359 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
5360
5361 # initialize the leading whitespace stack to negative levels
5362 # so that we can never run off the end of the stack
5363 $gnu_position_predictor = 0; # where the current token is predicted to be
5364 $max_gnu_stack_index = 0;
5365 $max_gnu_item_index = -1;
5366 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
5367 @gnu_item_list = ();
5368 $last_output_indentation = 0;
5369 $last_indentation_written = 0;
5370 $last_unadjusted_indentation = 0;
5371 $last_leading_token = "";
5372
5373 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
5374 $saw_END_or_DATA_ = 0;
5375
5376 @block_type_to_go = ();
5377 @type_sequence_to_go = ();
5378 @container_environment_to_go = ();
5379 @bond_strength_to_go = ();
5380 @forced_breakpoint_to_go = ();
5381 @lengths_to_go = (); # line length to start of ith token
5382 @levels_to_go = ();
5383 @matching_token_to_go = ();
5384 @mate_index_to_go = ();
5385 @nesting_blocks_to_go = ();
5386 @ci_levels_to_go = ();
5387 @nesting_depth_to_go = (0);
5388 @nobreak_to_go = ();
5389 @old_breakpoint_to_go = ();
5390 @tokens_to_go = ();
5391 @types_to_go = ();
5392 @leading_spaces_to_go = ();
5393 @reduced_spaces_to_go = ();
5394
5395 @dont_align = ();
5396 @has_broken_sublist = ();
5397 @want_comma_break = ();
5398
5399 @ci_stack = ("");
5400 $saw_negative_indentation = 0;
5401 $first_tabbing_disagreement = 0;
5402 $last_tabbing_disagreement = 0;
5403 $tabbing_disagreement_count = 0;
5404 $in_tabbing_disagreement = 0;
5405 $input_line_tabbing = undef;
5406
5407 $last_line_type = "";
5408 $last_last_line_leading_level = 0;
5409 $last_line_leading_level = 0;
5410 $last_line_leading_type = '#';
5411
5412 $last_nonblank_token = ';';
5413 $last_nonblank_type = ';';
5414 $last_last_nonblank_token = ';';
5415 $last_last_nonblank_type = ';';
5416 $last_nonblank_block_type = "";
5417 $last_output_level = 0;
5418 $looking_for_else = 0;
5419 $embedded_tab_count = 0;
5420 $first_embedded_tab_at = 0;
5421 $last_embedded_tab_at = 0;
5422 $deleted_semicolon_count = 0;
5423 $first_deleted_semicolon_at = 0;
5424 $last_deleted_semicolon_at = 0;
5425 $added_semicolon_count = 0;
5426 $first_added_semicolon_at = 0;
5427 $last_added_semicolon_at = 0;
5428 $last_line_had_side_comment = 0;
5429 %postponed_breakpoint = ();
5430
5431 # variables for adding side comments
5432 %block_leading_text = ();
5433 %block_opening_line_number = ();
5434 $csc_new_statement_ok = 1;
5435
5436 %saved_opening_indentation = ();
5437
5438 reset_block_text_accumulator();
5439
5440 prepare_for_new_input_lines();
5441
5442 $vertical_aligner_object =
5443 Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
5444 $logger_object, $diagnostics_object );
5445
5446 if ( $rOpts->{'entab-leading-whitespace'} ) {
5447 write_logfile_entry(
5448"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
5449 );
5450 }
5451 elsif ( $rOpts->{'tabs'} ) {
5452 write_logfile_entry("Indentation will be with a tab character\n");
5453 }
5454 else {
5455 write_logfile_entry(
5456 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
5457 }
5458
5459 # This was the start of a formatter referent, but object-oriented
5460 # coding has turned out to be too slow here.
5461 $formatter_self = {};
5462
5463 bless $formatter_self, $class;
5464
5465 # Safety check..this is not a class yet
5466 if ( _increment_count() > 1 ) {
5467 confess
5468"Attempt to create more than 1 object in $class, which is not a true class yet\n";
5469 }
5470 return $formatter_self;
5471}
5472
5473sub prepare_for_new_input_lines {
5474
5475 $gnu_sequence_number++; # increment output batch counter
5476 %last_gnu_equals = ();
5477 %gnu_comma_count = ();
5478 %gnu_arrow_count = ();
5479 $line_start_index_to_go = 0;
5480 $max_gnu_item_index = UNDEFINED_INDEX;
5481 $index_max_forced_break = UNDEFINED_INDEX;
5482 $max_index_to_go = UNDEFINED_INDEX;
5483 $last_nonblank_index_to_go = UNDEFINED_INDEX;
5484 $last_nonblank_type_to_go = '';
5485 $last_nonblank_token_to_go = '';
5486 $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
5487 $last_last_nonblank_type_to_go = '';
5488 $last_last_nonblank_token_to_go = '';
5489 $forced_breakpoint_count = 0;
5490 $forced_breakpoint_undo_count = 0;
5491 $rbrace_follower = undef;
5492 $lengths_to_go[0] = 0;
5493 $old_line_count_in_batch = 1;
5494 $comma_count_in_batch = 0;
5495 $starting_in_quote = 0;
5496
5497 destroy_one_line_block();
5498}
5499
5500sub write_line {
5501
5502 my $self = shift;
5503 my ($line_of_tokens) = @_;
5504
5505 my $line_type = $line_of_tokens->{_line_type};
5506 my $input_line = $line_of_tokens->{_line_text};
5507
5508 my $want_blank_line_next = 0;
5509
5510 # _line_type codes are:
5511 # SYSTEM - system-specific code before hash-bang line
5512 # CODE - line of perl code (including comments)
5513 # POD_START - line starting pod, such as '=head'
5514 # POD - pod documentation text
5515 # POD_END - last line of pod section, '=cut'
5516 # HERE - text of here-document
5517 # HERE_END - last line of here-doc (target word)
5518 # FORMAT - format section
5519 # FORMAT_END - last line of format section, '.'
5520 # DATA_START - __DATA__ line
5521 # DATA - unidentified text following __DATA__
5522 # END_START - __END__ line
5523 # END - unidentified text following __END__
5524 # ERROR - we are in big trouble, probably not a perl script
5525 #
5526 # handle line of code..
5527 if ( $line_type eq 'CODE' ) {
5528
5529 # let logger see all non-blank lines of code
5530 if ( $input_line !~ /^\s*$/ ) {
5531 my $output_line_number =
5532 $vertical_aligner_object->get_output_line_number();
5533 black_box( $line_of_tokens, $output_line_number );
5534 }
5535 print_line_of_tokens($line_of_tokens);
5536 }
5537
5538 # handle line of non-code..
5539 else {
5540
5541 # set special flags
5542 my $skip_line = 0;
5543 my $tee_line = 0;
5544 if ( $line_type =~ /^POD/ ) {
5545
5546 # Pod docs should have a preceding blank line. But be
5547 # very careful in __END__ and __DATA__ sections, because:
5548 # 1. the user may be using this section for any purpose whatsoever
5549 # 2. the blank counters are not active there
5550 # It should be safe to request a blank line between an
5551 # __END__ or __DATA__ and an immediately following '=head'
5552 # type line, (types END_START and DATA_START), but not for
5553 # any other lines of type END or DATA.
5554 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
5555 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
5556 if ( !$skip_line
5557 && $line_type eq 'POD_START'
5558 && $last_line_type !~ /^(END|DATA)$/ )
5559 {
5560 want_blank_line();
5561 }
5562
5563 # patch to put a blank line after =cut
5564 # (required by podchecker)
5565 if ( $line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
5566 $file_writer_object->reset_consecutive_blank_lines();
5567 $want_blank_line_next = 1;
5568 }
5569 }
5570
5571 # leave the blank counters in a predictable state
5572 # after __END__ or __DATA__
5573 elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
5574 $file_writer_object->reset_consecutive_blank_lines();
5575 $saw_END_or_DATA_ = 1;
5576 }
5577
5578 # write unindented non-code line
5579 if ( !$skip_line ) {
5580 if ($tee_line) { $file_writer_object->tee_on() }
5581 write_unindented_line($input_line);
5582 if ($tee_line) { $file_writer_object->tee_off() }
5583 if ($want_blank_line_next) { want_blank_line(); }
5584 }
5585 }
5586 $last_line_type = $line_type;
5587}
5588
5589sub create_one_line_block {
5590 $index_start_one_line_block = $_[0];
5591 $semicolons_before_block_self_destruct = $_[1];
5592}
5593
5594sub destroy_one_line_block {
5595 $index_start_one_line_block = UNDEFINED_INDEX;
5596 $semicolons_before_block_self_destruct = 0;
5597}
5598
5599sub leading_spaces_to_go {
5600
5601 # return the number of indentation spaces for a token in the output stream;
5602 # these were previously stored by 'set_leading_whitespace'.
5603
5604 return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
5605
5606}
5607
5608sub get_SPACES {
5609
5610 # return the number of leading spaces associated with an indentation
5611 # variable $indentation is either a constant number of spaces or an object
5612 # with a get_SPACES method.
5613 my $indentation = shift;
5614 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
5615}
5616
5617sub get_RECOVERABLE_SPACES {
5618
5619 # return the number of spaces (+ means shift right, - means shift left)
5620 # that we would like to shift a group of lines with the same indentation
5621 # to get them to line up with their opening parens
5622 my $indentation = shift;
5623 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
5624}
5625
5626sub get_AVAILABLE_SPACES_to_go {
5627
5628 my $item = $leading_spaces_to_go[ $_[0] ];
5629
5630 # return the number of available leading spaces associated with an
5631 # indentation variable. $indentation is either a constant number of
5632 # spaces or an object with a get_AVAILABLE_SPACES method.
5633 return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
5634}
5635
5636sub new_lp_indentation_item {
5637
5638 # this is an interface to the IndentationItem class
5639 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
5640
5641 # A negative level implies not to store the item in the item_list
5642 my $index = 0;
5643 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
5644
5645 my $item = Perl::Tidy::IndentationItem->new(
5646 $spaces, $level,
5647 $ci_level, $available_spaces,
5648 $index, $gnu_sequence_number,
5649 $align_paren, $max_gnu_stack_index,
5650 $line_start_index_to_go,
5651 );
5652
5653 if ( $level >= 0 ) {
5654 $gnu_item_list[$max_gnu_item_index] = $item;
5655 }
5656
5657 return $item;
5658}
5659
5660sub set_leading_whitespace {
5661
5662 # This routine defines leading whitespace
5663 # given: the level and continuation_level of a token,
5664 # define: space count of leading string which would apply if it
5665 # were the first token of a new line.
5666
5667 my ( $level, $ci_level, $in_continued_quote ) = @_;
5668
5669 # modify for -bli, which adds one continuation indentation for
5670 # opening braces
5671 if ( $rOpts_brace_left_and_indent
5672 && $max_index_to_go == 0
5673 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
5674 {
5675 $ci_level++;
5676 }
5677
5678 # patch to avoid trouble when input file has negative indentation.
5679 # other logic should catch this error.
5680 if ( $level < 0 ) { $level = 0 }
5681
5682 #-------------------------------------------
5683 # handle the standard indentation scheme
5684 #-------------------------------------------
5685 unless ($rOpts_line_up_parentheses) {
5686 my $space_count = $ci_level * $rOpts_continuation_indentation + $level *
5687 $rOpts_indent_columns;
5688 my $ci_spaces =
5689 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
5690
5691 if ($in_continued_quote) {
5692 $space_count = 0;
5693 $ci_spaces = 0;
5694 }
5695 $leading_spaces_to_go[$max_index_to_go] = $space_count;
5696 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
5697 return;
5698 }
5699
5700 #-------------------------------------------------------------
5701 # handle case of -lp indentation..
5702 #-------------------------------------------------------------
5703
5704 # The continued_quote flag means that this is the first token of a
5705 # line, and it is the continuation of some kind of multi-line quote
5706 # or pattern. It requires special treatment because it must have no
5707 # added leading whitespace. So we create a special indentation item
5708 # which is not in the stack.
5709 if ($in_continued_quote) {
5710 my $space_count = 0;
5711 my $available_space = 0;
5712 $level = -1; # flag to prevent storing in item_list
5713 $leading_spaces_to_go[$max_index_to_go] =
5714 $reduced_spaces_to_go[$max_index_to_go] =
5715 new_lp_indentation_item( $space_count, $level, $ci_level,
5716 $available_space, 0 );
5717 return;
5718 }
5719
5720 # get the top state from the stack
5721 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
5722 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
5723 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
5724
5725 my $type = $types_to_go[$max_index_to_go];
5726 my $token = $tokens_to_go[$max_index_to_go];
5727 my $total_depth = $nesting_depth_to_go[$max_index_to_go];
5728
5729 if ( $type eq '{' || $type eq '(' ) {
5730
5731 $gnu_comma_count{ $total_depth + 1 } = 0;
5732 $gnu_arrow_count{ $total_depth + 1 } = 0;
5733
5734 # If we come to an opening token after an '=' token of some type,
5735 # see if it would be helpful to 'break' after the '=' to save space
5736 my $last_equals = $last_gnu_equals{$total_depth};
5737 if ( $last_equals && $last_equals > $line_start_index_to_go ) {
5738
5739 # find the position if we break at the '='
5740 my $i_test = $last_equals;
5741 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
5742 my $test_position = total_line_length( $i_test, $max_index_to_go );
5743
5744 if (
5745
5746 # if we are beyond the midpoint
5747 $gnu_position_predictor > $half_maximum_line_length
5748
5749 # or if we can save some space by breaking at the '='
5750 # without obscuring the second line by the first
5751 || ( $test_position > 1 +
5752 total_line_length( $line_start_index_to_go, $last_equals ) )
5753 )
5754 {
5755
5756 # then make the switch -- note that we do not set a real
5757 # breakpoint here because we may not really need one; sub
5758 # scan_list will do that if necessary
5759 $line_start_index_to_go = $i_test + 1;
5760 $gnu_position_predictor = $test_position;
5761 }
5762 }
5763 }
5764
5765 # Check for decreasing depth ..
5766 # Note that one token may have both decreasing and then increasing
5767 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
5768 # in this example we would first go back to (1,0) then up to (2,0)
5769 # in a single call.
5770 if ( $level < $current_level || $ci_level < $current_ci_level ) {
5771
5772 # loop to find the first entry at or completely below this level
5773 my ( $lev, $ci_lev );
5774 while (1) {
5775 if ($max_gnu_stack_index) {
5776
5777 # save index of token which closes this level
5778 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
5779
5780 # Undo any extra indentation if we saw no commas
5781 my $available_spaces =
5782 $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
5783
5784 my $comma_count = 0;
5785 my $arrow_count = 0;
5786 if ( $type eq '}' || $type eq ')' ) {
5787 $comma_count = $gnu_comma_count{$total_depth};
5788 $arrow_count = $gnu_arrow_count{$total_depth};
5789 $comma_count = 0 unless $comma_count;
5790 $arrow_count = 0 unless $arrow_count;
5791 }
5792 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
5793 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
5794
5795 if ( $available_spaces > 0 ) {
5796
5797 if ( $comma_count <= 0 || $arrow_count > 0 ) {
5798
5799 my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
5800 my $seqno =
5801 $gnu_stack[$max_gnu_stack_index]
5802 ->get_SEQUENCE_NUMBER();
5803
5804 # Be sure this item was created in this batch. This
5805 # should be true because we delete any available
5806 # space from open items at the end of each batch.
5807 if ( $gnu_sequence_number != $seqno
5808 || $i > $max_gnu_item_index )
5809 {
5810 warning(
5811"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
5812 );
5813 report_definite_bug();
5814 }
5815
5816 else {
5817 if ( $arrow_count == 0 ) {
5818 $gnu_item_list[$i]
5819 ->permanently_decrease_AVAILABLE_SPACES(
5820 $available_spaces);
5821 }
5822 else {
5823 $gnu_item_list[$i]
5824 ->tentatively_decrease_AVAILABLE_SPACES(
5825 $available_spaces);
5826 }
5827
5828 my $j;
5829 for (
5830 $j = $i + 1 ;
5831 $j <= $max_gnu_item_index ;
5832 $j++
5833 )
5834 {
5835 $gnu_item_list[$j]
5836 ->decrease_SPACES($available_spaces);
5837 }
5838 }
5839 }
5840 }
5841
5842 # go down one level
5843 --$max_gnu_stack_index;
5844 $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
5845 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
5846
5847 # stop when we reach a level at or below the current level
5848 if ( $lev <= $level && $ci_lev <= $ci_level ) {
5849 $space_count =
5850 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
5851 $current_level = $lev;
5852 $current_ci_level = $ci_lev;
5853 last;
5854 }
5855 }
5856
5857 # reached bottom of stack .. should never happen because
5858 # only negative levels can get here, and $level was forced
5859 # to be positive above.
5860 else {
5861 warning(
5862"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
5863 );
5864 report_definite_bug();
5865 last;
5866 }
5867 }
5868 }
5869
5870 # handle increasing depth
5871 if ( $level > $current_level || $ci_level > $current_ci_level ) {
5872
5873 # Compute the standard incremental whitespace. This will be
5874 # the minimum incremental whitespace that will be used. This
5875 # choice results in a smooth transition between the gnu-style
5876 # and the standard style.
5877 my $standard_increment =
5878 ( $level - $current_level ) * $rOpts_indent_columns +
5879 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
5880
5881 # Now we have to define how much extra incremental space
5882 # ("$available_space") we want. This extra space will be
5883 # reduced as necessary when long lines are encountered or when
5884 # it becomes clear that we do not have a good list.
5885 my $available_space = 0;
5886 my $align_paren = 0;
5887 my $excess = 0;
5888
5889 # initialization on empty stack..
5890 if ( $max_gnu_stack_index == 0 ) {
5891 $space_count = $level * $rOpts_indent_columns;
5892 }
5893
5894 # if this is a BLOCK, add the standard increment
5895 elsif ($last_nonblank_block_type) {
5896 $space_count += $standard_increment;
5897 }
5898
5899 # if last nonblank token was not structural indentation,
5900 # just use standard increment
5901 elsif ( $last_nonblank_type ne '{' ) {
5902 $space_count += $standard_increment;
5903 }
5904
5905 # otherwise use the space to the first non-blank level change token
5906 else {
5907
5908 $space_count = $gnu_position_predictor;
5909
5910 my $min_gnu_indentation =
5911 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
5912
5913 $available_space = $space_count - $min_gnu_indentation;
5914 if ( $available_space >= $standard_increment ) {
5915 $min_gnu_indentation += $standard_increment;
5916 }
5917 elsif ( $available_space > 1 ) {
5918 $min_gnu_indentation += $available_space + 1;
5919 }
5920 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
5921 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
5922 $min_gnu_indentation += 2;
5923 }
5924 else {
5925 $min_gnu_indentation += 1;
5926 }
5927 }
5928 else {
5929 $min_gnu_indentation += $standard_increment;
5930 }
5931 $available_space = $space_count - $min_gnu_indentation;
5932
5933 if ( $available_space < 0 ) {
5934 $space_count = $min_gnu_indentation;
5935 $available_space = 0;
5936 }
5937 $align_paren = 1;
5938 }
5939
5940 # update state, but not on a blank token
5941 if ( $types_to_go[$max_index_to_go] ne 'b' ) {
5942
5943 $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
5944
5945 ++$max_gnu_stack_index;
5946 $gnu_stack[$max_gnu_stack_index] =
5947 new_lp_indentation_item( $space_count, $level, $ci_level,
5948 $available_space, $align_paren );
5949
5950 # If the opening paren is beyond the half-line length, then
5951 # we will use the minimum (standard) indentation. This will
5952 # help avoid problems associated with running out of space
5953 # near the end of a line. As a result, in deeply nested
5954 # lists, there will be some indentations which are limited
5955 # to this minimum standard indentation. But the most deeply
5956 # nested container will still probably be able to shift its
5957 # parameters to the right for proper alignment, so in most
5958 # cases this will not be noticable.
5959 if ( $available_space > 0
5960 && $space_count > $half_maximum_line_length )
5961 {
5962 $gnu_stack[$max_gnu_stack_index]
5963 ->tentatively_decrease_AVAILABLE_SPACES($available_space);
5964 }
5965 }
5966 }
5967
5968 # Count commas and look for non-list characters. Once we see a
5969 # non-list character, we give up and don't look for any more commas.
5970 if ( $type eq '=>' ) {
5971 $gnu_arrow_count{$total_depth}++;
5972
5973 # tentatively treating '=>' like '=' for estimating breaks
5974 # TODO: this could use some experimentation
5975 $last_gnu_equals{$total_depth} = $max_index_to_go;
5976 }
5977
5978 elsif ( $type eq ',' ) {
5979 $gnu_comma_count{$total_depth}++;
5980 }
5981
5982 elsif ( $is_assignment{$type} ) {
5983 $last_gnu_equals{$total_depth} = $max_index_to_go;
5984 }
5985
5986 # this token might start a new line
5987 # if this is a non-blank..
5988 if ( $type ne 'b' ) {
5989
5990 # and if ..
5991 if (
5992
5993 # this is the first nonblank token of the line
5994 $max_index_to_go == 1 && $types_to_go[0] eq 'b'
5995
5996 # or previous character was one of these:
5997 || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
5998
5999 # or previous character was opening and this does not close it
6000 || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
6001 || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
6002
6003 # or this token is one of these:
6004 || $type =~ /^([\.]|\|\||\&\&)$/
6005
6006 # or this is a closing structure
6007 || ( $last_nonblank_type_to_go eq '}'
6008 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
6009
6010 # or previous token was keyword 'return'
6011 || ( $last_nonblank_type_to_go eq 'k'
6012 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
6013
6014 # or starting a new line at certain keywords is fine
6015 || ( $type eq 'k'
6016 && $is_if_unless_and_or_last_next_redo_return{$token} )
6017
6018 # or this is after an assignment after a closing structure
6019 || (
6020 $is_assignment{$last_nonblank_type_to_go}
6021 && (
6022 $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
6023
6024 # and it is significantly to the right
6025 || $gnu_position_predictor > $half_maximum_line_length
6026 )
6027 )
6028 )
6029 {
6030 check_for_long_gnu_style_lines();
6031 $line_start_index_to_go = $max_index_to_go;
6032
6033 # back up 1 token if we want to break before that type
6034 # otherwise, we may strand tokens like '?' or ':' on a line
6035 if ( $line_start_index_to_go > 0 ) {
6036 if ( $last_nonblank_type_to_go eq 'k' ) {
6037
6038 if ( $want_break_before{$last_nonblank_token_to_go} ) {
6039 $line_start_index_to_go--;
6040 }
6041 }
6042 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
6043 $line_start_index_to_go--;
6044 }
6045 }
6046 }
6047 }
6048
6049 # remember the predicted position of this token on the output line
6050 if ( $max_index_to_go > $line_start_index_to_go ) {
6051 $gnu_position_predictor =
6052 total_line_length( $line_start_index_to_go, $max_index_to_go );
6053 }
6054 else {
6055 $gnu_position_predictor = $space_count +
6056 token_sequence_length( $max_index_to_go, $max_index_to_go );
6057 }
6058
6059 # store the indentation object for this token
6060 # this allows us to manipulate the leading whitespace
6061 # (in case we have to reduce indentation to fit a line) without
6062 # having to change any token values
6063 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
6064 $reduced_spaces_to_go[$max_index_to_go] =
6065 ( $max_gnu_stack_index > 0 && $ci_level )
6066 ? $gnu_stack[ $max_gnu_stack_index - 1 ]
6067 : $gnu_stack[$max_gnu_stack_index];
6068 return;
6069}
6070
6071sub check_for_long_gnu_style_lines {
6072
6073 # look at the current estimated maximum line length, and
6074 # remove some whitespace if it exceeds the desired maximum
6075
6076 # this is only for the '-lp' style
6077 return unless ($rOpts_line_up_parentheses);
6078
6079 # nothing can be done if no stack items defined for this line
6080 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6081
6082 # see if we have exceeded the maximum desired line length
6083 # keep 2 extra free because they are needed in some cases
6084 # (result of trial-and-error testing)
6085 my $spaces_needed =
6086 $gnu_position_predictor - $rOpts_maximum_line_length + 2;
6087
6088 return if ( $spaces_needed < 0 );
6089
6090 # We are over the limit, so try to remove a requested number of
6091 # spaces from leading whitespace. We are only allowed to remove
6092 # from whitespace items created on this batch, since others have
6093 # already been used and cannot be undone.
6094 my @candidates = ();
6095 my $i;
6096
6097 # loop over all whitespace items created for the current batch
6098 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6099 my $item = $gnu_item_list[$i];
6100
6101 # item must still be open to be a candidate (otherwise it
6102 # cannot influence the current token)
6103 next if ( $item->get_CLOSED() >= 0 );
6104
6105 my $available_spaces = $item->get_AVAILABLE_SPACES();
6106
6107 if ( $available_spaces > 0 ) {
6108 push( @candidates, [ $i, $available_spaces ] );
6109 }
6110 }
6111
6112 return unless (@candidates);
6113
6114 # sort by available whitespace so that we can remove whitespace
6115 # from the maximum available first
6116 @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
6117
6118 # keep removing whitespace until we are done or have no more
6119 my $candidate;
6120 foreach $candidate (@candidates) {
6121 my ( $i, $available_spaces ) = @{$candidate};
6122 my $deleted_spaces =
6123 ( $available_spaces > $spaces_needed )
6124 ? $spaces_needed
6125 : $available_spaces;
6126
6127 # remove the incremental space from this item
6128 $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
6129
6130 my $i_debug = $i;
6131
6132 # update the leading whitespace of this item and all items
6133 # that came after it
6134 for ( ; $i <= $max_gnu_item_index ; $i++ ) {
6135
6136 my $old_spaces = $gnu_item_list[$i]->get_SPACES();
6137 if ( $old_spaces > $deleted_spaces ) {
6138 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
6139 }
6140
6141 # shouldn't happen except for code bug:
6142 else {
6143 my $level = $gnu_item_list[$i_debug]->get_LEVEL();
6144 my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL();
6145 my $old_level = $gnu_item_list[$i]->get_LEVEL();
6146 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
6147 warning(
6148"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
6149 );
6150 report_definite_bug();
6151 }
6152 }
6153 $gnu_position_predictor -= $deleted_spaces;
6154 $spaces_needed -= $deleted_spaces;
6155 last unless ( $spaces_needed > 0 );
6156 }
6157}
6158
6159sub finish_lp_batch {
6160
6161 # This routine is called once after each each output stream batch is
6162 # finished to undo indentation for all incomplete -lp
6163 # indentation levels. It is too risky to leave a level open,
6164 # because then we can't backtrack in case of a long line to follow.
6165 # This means that comments and blank lines will disrupt this
6166 # indentation style. But the vertical aligner may be able to
6167 # get the space back if there are side comments.
6168
6169 # this is only for the 'lp' style
6170 return unless ($rOpts_line_up_parentheses);
6171
6172 # nothing can be done if no stack items defined for this line
6173 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6174
6175 # loop over all whitespace items created for the current batch
6176 my $i;
6177 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6178 my $item = $gnu_item_list[$i];
6179
6180 # only look for open items
6181 next if ( $item->get_CLOSED() >= 0 );
6182
6183 # Tentatively remove all of the available space
6184 # (The vertical aligner will try to get it back later)
6185 my $available_spaces = $item->get_AVAILABLE_SPACES();
6186 if ( $available_spaces > 0 ) {
6187
6188 # delete incremental space for this item
6189 $gnu_item_list[$i]
6190 ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
6191
6192 # Reduce the total indentation space of any nodes that follow
6193 # Note that any such nodes must necessarily be dependents
6194 # of this node.
6195 foreach ( $i + 1 .. $max_gnu_item_index ) {
6196 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
6197 }
6198 }
6199 }
6200 return;
6201}
6202
6203sub reduce_lp_indentation {
6204
6205 # reduce the leading whitespace at token $i if possible by $spaces_needed
6206 # (a large value of $spaces_needed will remove all excess space)
6207 # NOTE: to be called from scan_list only for a sequence of tokens
6208 # contained between opening and closing parens/braces/brackets
6209
6210 my ( $i, $spaces_wanted ) = @_;
6211 my $deleted_spaces = 0;
6212
6213 my $item = $leading_spaces_to_go[$i];
6214 my $available_spaces = $item->get_AVAILABLE_SPACES();
6215
6216 if (
6217 $available_spaces > 0
6218 && ( ( $spaces_wanted <= $available_spaces )
6219 || !$item->get_HAVE_CHILD() )
6220 )
6221 {
6222
6223 # we'll remove these spaces, but mark them as recoverable
6224 $deleted_spaces =
6225 $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
6226 }
6227
6228 return $deleted_spaces;
6229}
6230
6231sub token_sequence_length {
6232
6233 # return length of tokens ($ifirst .. $ilast) including first & last
6234 # returns 0 if $ifirst > $ilast
6235 my $ifirst = shift;
6236 my $ilast = shift;
6237 return 0 if ( $ilast < 0 || $ifirst > $ilast );
6238 return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
6239 return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
6240}
6241
6242sub total_line_length {
6243
6244 # return length of a line of tokens ($ifirst .. $ilast)
6245 my $ifirst = shift;
6246 my $ilast = shift;
6247 if ( $ifirst < 0 ) { $ifirst = 0 }
6248
6249 return leading_spaces_to_go($ifirst) +
6250 token_sequence_length( $ifirst, $ilast );
6251}
6252
6253sub excess_line_length {
6254
6255 # return number of characters by which a line of tokens ($ifirst..$ilast)
6256 # exceeds the allowable line length.
6257 my $ifirst = shift;
6258 my $ilast = shift;
6259 if ( $ifirst < 0 ) { $ifirst = 0 }
6260 return leading_spaces_to_go($ifirst) +
6261 token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
6262}
6263
6264sub finish_formatting {
6265
6266 # flush buffer and write any informative messages
6267 my $self = shift;
6268
6269 flush();
6270 $file_writer_object->decrement_output_line_number()
6271 ; # fix up line number since it was incremented
6272 we_are_at_the_last_line();
6273 if ( $added_semicolon_count > 0 ) {
6274 my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
6275 my $what =
6276 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
6277 write_logfile_entry("$added_semicolon_count $what added:\n");
6278 write_logfile_entry(
6279 " $first at input line $first_added_semicolon_at\n");
6280
6281 if ( $added_semicolon_count > 1 ) {
6282 write_logfile_entry(
6283 " Last at input line $last_added_semicolon_at\n");
6284 }
6285 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
6286 write_logfile_entry("\n");
6287 }
6288
6289 if ( $deleted_semicolon_count > 0 ) {
6290 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
6291 my $what =
6292 ( $deleted_semicolon_count > 1 )
6293 ? "semicolons were"
6294 : "semicolon was";
6295 write_logfile_entry(
6296 "$deleted_semicolon_count unnecessary $what deleted:\n");
6297 write_logfile_entry(
6298 " $first at input line $first_deleted_semicolon_at\n");
6299
6300 if ( $deleted_semicolon_count > 1 ) {
6301 write_logfile_entry(
6302 " Last at input line $last_deleted_semicolon_at\n");
6303 }
6304 write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");
6305 write_logfile_entry("\n");
6306 }
6307
6308 if ( $embedded_tab_count > 0 ) {
6309 my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
6310 my $what =
6311 ( $embedded_tab_count > 1 )
6312 ? "quotes or patterns"
6313 : "quote or pattern";
6314 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
6315 write_logfile_entry(
6316"This means the display of this script could vary with device or software\n"
6317 );
6318 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
6319
6320 if ( $embedded_tab_count > 1 ) {
6321 write_logfile_entry(
6322 " Last at input line $last_embedded_tab_at\n");
6323 }
6324 write_logfile_entry("\n");
6325 }
6326
6327 if ($first_tabbing_disagreement) {
6328 write_logfile_entry(
6329"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
6330 );
6331 }
6332
6333 if ($in_tabbing_disagreement) {
6334 write_logfile_entry(
6335"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
6336 );
6337 }
6338 else {
6339
6340 if ($last_tabbing_disagreement) {
6341
6342 write_logfile_entry(
6343"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
6344 );
6345 }
6346 else {
6347 write_logfile_entry("No indentation disagreement seen\n");
6348 }
6349 }
6350 write_logfile_entry("\n");
6351
6352 $vertical_aligner_object->report_anything_unusual();
6353
6354 $file_writer_object->report_line_length_errors();
6355}
6356
6357sub check_options {
6358
6359 # This routine is called to check the Opts hash after it is defined
6360
6361 ($rOpts) = @_;
6362 my ( $tabbing_string, $tab_msg );
6363
6364 make_static_block_comment_pattern();
6365 make_static_side_comment_pattern();
6366 make_closing_side_comment_prefix();
6367 make_closing_side_comment_list_pattern();
6368
6369 # If closing side comments ARE selected, then we can safely
6370 # delete old closing side comments unless closing side comment
6371 # warnings are requested. This is a good idea because it will
6372 # eliminate any old csc's which fall below the line count threshold.
6373 # We cannot do this if warnings are turned on, though, because we
6374 # might delete some text which has been added. So that must
6375 # be handled when comments are created.
6376 if ( $rOpts->{'closing-side-comments'} ) {
6377 if ( !$rOpts->{'closing-side-comment-warnings'} ) {
6378 $rOpts->{'delete-closing-side-comments'} = 1;
6379 }
6380 }
6381
6382 # If closing side comments ARE NOT selected, but warnings ARE
6383 # selected and we ARE DELETING csc's, then we will pretend to be
6384 # adding with a huge interval. This will force the comments to be
6385 # generated for comparison with the old comments, but not added.
6386 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
6387 if ( $rOpts->{'delete-closing-side-comments'} ) {
6388 $rOpts->{'delete-closing-side-comments'} = 0;
6389 $rOpts->{'closing-side-comments'} = 1;
6390 $rOpts->{'closing-side-comment-interval'} = 100000000;
6391 }
6392 }
6393
6394 make_bli_pattern();
6395 make_block_brace_vertical_tightness_pattern();
6396
6397 if ( $rOpts->{'line-up-parentheses'} ) {
6398
6399 if ( $rOpts->{'indent-only'}
6400 || !$rOpts->{'add-newlines'}
6401 || !$rOpts->{'delete-old-newlines'} )
6402 {
6403 warn <<EOM;
6404-----------------------------------------------------------------------
6405Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
6406
6407The -lp indentation logic requires that perltidy be able to coordinate
6408arbitrarily large numbers of line breakpoints. This isn't possible
6409with these flags. Sometimes an acceptable workaround is to use -wocb=3
6410-----------------------------------------------------------------------
6411EOM
6412 $rOpts->{'line-up-parentheses'} = 0;
6413 }
6414 }
6415
6416 # At present, tabs are not compatable with the line-up-parentheses style
6417 # (it would be possible to entab the total leading whitespace
6418 # just prior to writing the line, if desired).
6419 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
6420 warn <<EOM;
6421Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
6422EOM
6423 $rOpts->{'tabs'} = 0;
6424 }
6425
6426 # Likewise, tabs are not compatable with outdenting..
6427 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
6428 warn <<EOM;
6429Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
6430EOM
6431 $rOpts->{'tabs'} = 0;
6432 }
6433
6434 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
6435 warn <<EOM;
6436Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
6437EOM
6438 $rOpts->{'tabs'} = 0;
6439 }
6440
6441 if ( !$rOpts->{'space-for-semicolon'} ) {
6442 $want_left_space{'f'} = -1;
6443 }
6444
6445 if ( $rOpts->{'space-terminal-semicolon'} ) {
6446 $want_left_space{';'} = 1;
6447 }
6448
6449 # implement outdenting preferences for keywords
6450 %outdent_keyword = ();
6451
6452 # load defaults
6453 @_ = qw(next last redo goto return);
6454
6455 # override defaults if requested
6456 if ( $_ = $rOpts->{'outdent-keyword-list'} ) {
6457 s/^\s+//;
6458 s/\s+$//;
6459 @_ = split /\s+/;
6460 }
6461
6462 # FUTURE: if not a keyword, assume that it is an identifier
6463 foreach (@_) {
6464 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
6465 $outdent_keyword{$_} = 1;
6466 }
6467 else {
6468 warn "ignoring '$_' in -okwl list; not a perl keyword";
6469 }
6470 }
6471
6472 # implement user whitespace preferences
6473 if ( $_ = $rOpts->{'want-left-space'} ) {
6474 s/^\s+//;
6475 s/\s+$//;
6476 @_ = split /\s+/;
6477 @want_left_space{@_} = (1) x scalar(@_);
6478 }
6479
6480 if ( $_ = $rOpts->{'want-right-space'} ) {
6481 s/^\s+//;
6482 s/\s+$//;
6483 @_ = split /\s+/;
6484 @want_right_space{@_} = (1) x scalar(@_);
6485 }
6486 if ( $_ = $rOpts->{'nowant-left-space'} ) {
6487 s/^\s+//;
6488 s/\s+$//;
6489 @_ = split /\s+/;
6490 @want_left_space{@_} = (-1) x scalar(@_);
6491 }
6492
6493 if ( $_ = $rOpts->{'nowant-right-space'} ) {
6494 s/^\s+//;
6495 s/\s+$//;
6496 @want_right_space{@_} = (-1) x scalar(@_);
6497 }
6498 if ( $rOpts->{'dump-want-left-space'} ) {
6499 dump_want_left_space(*STDOUT);
6500 exit 1;
6501 }
6502
6503 if ( $rOpts->{'dump-want-right-space'} ) {
6504 dump_want_right_space(*STDOUT);
6505 exit 1;
6506 }
6507
6508 # default keywords for which space is introduced before an opening paren
6509 # (at present, including them messes up vertical alignment)
6510 @_ = qw(my local our and or eq ne if else elsif until
6511 unless while for foreach return switch case given when);
6512 @space_after_keyword{@_} = (1) x scalar(@_);
6513
6514 # allow user to modify these defaults
6515 if ( $_ = $rOpts->{'space-after-keyword'} ) {
6516 s/^\s+//;
6517 s/\s+$//;
6518 @_ = split /\s+/;
6519 @space_after_keyword{@_} = (1) x scalar(@_);
6520 }
6521
6522 if ( $_ = $rOpts->{'nospace-after-keyword'} ) {
6523 s/^\s+//;
6524 s/\s+$//;
6525 @_ = split /\s+/;
6526 @space_after_keyword{@_} = (0) x scalar(@_);
6527 }
6528
6529 # implement user break preferences
6530 if ( $_ = $rOpts->{'want-break-after'} ) {
6531 @_ = split /\s+/;
6532 foreach my $tok (@_) {
6533 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
6534 my $lbs = $left_bond_strength{$tok};
6535 my $rbs = $right_bond_strength{$tok};
6536 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
6537 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
6538 ( $lbs, $rbs );
6539 }
6540 }
6541 }
6542
6543 if ( $_ = $rOpts->{'want-break-before'} ) {
6544 s/^\s+//;
6545 s/\s+$//;
6546 @_ = split /\s+/;
6547 foreach my $tok (@_) {
6548 my $lbs = $left_bond_strength{$tok};
6549 my $rbs = $right_bond_strength{$tok};
6550 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
6551 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
6552 ( $lbs, $rbs );
6553 }
6554 }
6555 }
6556
6557 # make note if breaks are before certain key types
6558 %want_break_before = ();
6559
6560 foreach my $tok ( '.', ',', ':', '?', '&&', '||', 'and', 'or', 'xor' ) {
6561 $want_break_before{$tok} =
6562 $left_bond_strength{$tok} < $right_bond_strength{$tok};
6563 }
6564
6565 # Coordinate ?/: breaks, which must be similar
6566 if ( !$want_break_before{':'} ) {
6567 $want_break_before{'?'} = $want_break_before{':'};
6568 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
6569 $left_bond_strength{'?'} = NO_BREAK;
6570 }
6571
6572 # Define here tokens which may follow the closing brace of a do statement
6573 # on the same line, as in:
6574 # } while ( $something);
6575 @_ = qw(until while unless if ; );
6576 push @_, ',';
6577 @is_do_follower{@_} = (1) x scalar(@_);
6578
6579 # These tokens may follow the closing brace of an if or elsif block.
6580 # In other words, for cuddled else we want code to look like:
6581 # } elsif ( $something) {
6582 # } else {
6583 if ( $rOpts->{'cuddled-else'} ) {
6584 @_ = qw(else elsif);
6585 @is_if_brace_follower{@_} = (1) x scalar(@_);
6586 }
6587 else {
6588 %is_if_brace_follower = ();
6589 }
6590
6591 # nothing can follow the closing curly of an else { } block:
6592 %is_else_brace_follower = ();
6593
6594 # what can follow a multi-line anonymous sub definition closing curly:
6595 @_ = qw# ; : => or and && || ) #;
6596 push @_, ',';
6597 @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
6598
6599 # what can follow a one-line anonynomous sub closing curly:
6600 # one-line anonumous subs also have ']' here...
6601 # see tk3.t and PP.pm
6602 @_ = qw# ; : => or and && || ) ] #;
6603 push @_, ',';
6604 @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
6605
6606 # What can follow a closing curly of a block
6607 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
6608 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
6609 @_ = qw# ; : => or and && || ) #;
6610 push @_, ',';
6611
6612 # allow cuddled continue if cuddled else is specified
6613 if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
6614
6615 @is_other_brace_follower{@_} = (1) x scalar(@_);
6616
6617 $right_bond_strength{'{'} = WEAK;
6618 $left_bond_strength{'{'} = VERY_STRONG;
6619
6620 # make -l=0 equal to -l=infinite
6621 if ( !$rOpts->{'maximum-line-length'} ) {
6622 $rOpts->{'maximum-line-length'} = 1000000;
6623 }
6624
6625 # make -lbl=0 equal to -lbl=infinite
6626 if ( !$rOpts->{'long-block-line-count'} ) {
6627 $rOpts->{'long-block-line-count'} = 1000000;
6628 }
6629
6630 my $ole = $rOpts->{'output-line-ending'};
6631 ##if ($^O =~ /^(VMS|
6632 if ($ole) {
6633 my %endings = (
6634 dos => "\015\012",
6635 win => "\015\012",
6636 mac => "\015",
6637 unix => "\012",
6638 );
6639 $ole = lc $ole;
6640 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
6641 my $str = join " ", keys %endings;
6642 die <<EOM;
6643Unrecognized line ending '$ole'; expecting one of: $str
6644EOM
6645 }
6646 if ( $rOpts->{'preserve-line-endings'} ) {
6647 warn "Ignoring -ple; conflicts with -ole\n";
6648 $rOpts->{'preserve-line-endings'} = undef;
6649 }
6650 }
6651
6652 # hashes used to simplify setting whitespace
6653 %tightness = (
6654 '{' => $rOpts->{'brace-tightness'},
6655 '}' => $rOpts->{'brace-tightness'},
6656 '(' => $rOpts->{'paren-tightness'},
6657 ')' => $rOpts->{'paren-tightness'},
6658 '[' => $rOpts->{'square-bracket-tightness'},
6659 ']' => $rOpts->{'square-bracket-tightness'},
6660 );
6661 %matching_token = (
6662 '{' => '}',
6663 '(' => ')',
6664 '[' => ']',
6665 '?' => ':',
6666 );
6667
6668 # frequently used parameters
6669 $rOpts_add_newlines = $rOpts->{'add-newlines'};
6670 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
6671 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
6672 $rOpts_block_brace_vertical_tightness =
6673 $rOpts->{'block-brace-vertical-tightness'};
6674 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
6675 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
6676 $rOpts_break_at_old_trinary_breakpoints =
6677 $rOpts->{'break-at-old-trinary-breakpoints'};
6678 $rOpts_break_at_old_comma_breakpoints =
6679 $rOpts->{'break-at-old-comma-breakpoints'};
6680 $rOpts_break_at_old_keyword_breakpoints =
6681 $rOpts->{'break-at-old-keyword-breakpoints'};
6682 $rOpts_break_at_old_logical_breakpoints =
6683 $rOpts->{'break-at-old-logical-breakpoints'};
6684 $rOpts_closing_side_comment_else_flag =
6685 $rOpts->{'closing-side-comment-else-flag'};
6686 $rOpts_closing_side_comment_maximum_text =
6687 $rOpts->{'closing-side-comment-maximum-text'};
6688 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
6689 $rOpts_cuddled_else = $rOpts->{'cuddled-else'};
6690 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
6691 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
6692 $rOpts_indent_columns = $rOpts->{'indent-columns'};
6693 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
6694 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
6695 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
6696 $rOpts_short_concatenation_item_length =
6697 $rOpts->{'short-concatenation-item-length'};
6698 $rOpts_swallow_optional_blank_lines =
6699 $rOpts->{'swallow-optional-blank-lines'};
6700 $rOpts_ignore_old_line_breaks = $rOpts->{'ignore-old-line-breaks'};
6701 $half_maximum_line_length = $rOpts_maximum_line_length / 2;
6702
6703 # Note that both opening and closing tokens can access the opening
6704 # and closing flags of their container types.
6705 %opening_vertical_tightness = (
6706 '(' => $rOpts->{'paren-vertical-tightness'},
6707 '{' => $rOpts->{'brace-vertical-tightness'},
6708 '[' => $rOpts->{'square-bracket-vertical-tightness'},
6709 ')' => $rOpts->{'paren-vertical-tightness'},
6710 '}' => $rOpts->{'brace-vertical-tightness'},
6711 ']' => $rOpts->{'square-bracket-vertical-tightness'},
6712 );
6713
6714 %closing_vertical_tightness = (
6715 '(' => $rOpts->{'paren-vertical-tightness-closing'},
6716 '{' => $rOpts->{'brace-vertical-tightness-closing'},
6717 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
6718 ')' => $rOpts->{'paren-vertical-tightness-closing'},
6719 '}' => $rOpts->{'brace-vertical-tightness-closing'},
6720 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
6721 );
6722
6723 # assume flag for '>' same as ')' for closing qw quotes
6724 %closing_token_indentation = (
6725 ')' => $rOpts->{'closing-paren-indentation'},
6726 '}' => $rOpts->{'closing-brace-indentation'},
6727 ']' => $rOpts->{'closing-square-bracket-indentation'},
6728 '>' => $rOpts->{'closing-paren-indentation'},
6729 );
6730}
6731
6732sub make_static_block_comment_pattern {
6733
6734 # create the pattern used to identify static block comments
6735 $static_block_comment_pattern = '^(\s*)##';
6736
6737 # allow the user to change it
6738 if ( $rOpts->{'static-block-comment-prefix'} ) {
6739 my $prefix = $rOpts->{'static-block-comment-prefix'};
6740 $prefix =~ s/^\s*//;
6741 if ( $prefix !~ /^#/ ) {
6742 die "ERROR: the -sbcp prefix '$prefix' must begin with '#'\n";
6743
6744 }
6745 my $pattern = '^(\s*)' . $prefix;
6746 eval "'##'=~/$pattern/";
6747 if ($@) {
6748 die
6749"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
6750 }
6751 $static_block_comment_pattern = $pattern;
6752 }
6753}
6754
6755sub make_closing_side_comment_list_pattern {
6756
6757 # turn any input list into a regex for recognizing selected block types
6758 $closing_side_comment_list_pattern = '^\w+';
6759 if ( defined( $rOpts->{'closing-side-comment-list'} )
6760 && $rOpts->{'closing-side-comment-list'} )
6761 {
6762 $closing_side_comment_list_pattern =
6763 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
6764 }
6765}
6766
6767sub make_bli_pattern {
6768
6769 if (
6770 defined(
6771 $rOpts->{'brace-left-and-indent-list'}
6772 && $rOpts->{'brace-left-and-indent-list'}
6773 )
6774 )
6775 {
6776 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
6777 }
6778
6779 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
6780}
6781
6782sub make_block_brace_vertical_tightness_pattern {
6783
6784 # turn any input list into a regex for recognizing selected block types
6785 $block_brace_vertical_tightness_pattern =
6786 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
6787
6788 if (
6789 defined(
6790 $rOpts->{'block-brace-vertical-tightness-list'}
6791 && $rOpts->{'block-brace-vertical-tightness-list'}
6792 )
6793 )
6794 {
6795 $block_brace_vertical_tightness_pattern =
6796 make_block_pattern( '-bbvtl',
6797 $rOpts->{'block-brace-vertical-tightness-list'} );
6798 }
6799}
6800
6801sub make_block_pattern {
6802
6803 # given a string of block-type keywords, return a regex to match them
6804 # The only tricky part is that labels are indicated with a single ':'
6805 # and the 'sub' token text may have additional text after it (name of
6806 # sub).
6807 #
6808 # Example:
6809 #
6810 # input string: "if else elsif unless while for foreach do : sub";
6811 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
6812
6813 my ( $abbrev, $string ) = @_;
6814 $string =~ s/^\s+//;
6815 $string =~ s/\s+$//;
6816 my @list = split /\s+/, $string;
6817 my @words = ();
6818 my %seen;
6819 for my $i (@list) {
6820 next if $seen{$i};
6821 $seen{$i} = 1;
6822 if ( $i eq 'sub' ) {
6823 }
6824 elsif ( $i eq ':' ) {
6825 push @words, '\w+:';
6826 }
6827 elsif ( $i =~ /^\w/ ) {
6828 push @words, $i;
6829 }
6830 else {
6831 warn "unrecognized block type $i after $abbrev, ignoring\n";
6832 }
6833 }
6834 my $pattern = '(' . join( '|', @words ) . ')$';
6835 if ( $seen{'sub'} ) {
6836 $pattern = '(' . $pattern . '|sub)';
6837 }
6838 $pattern = '^' . $pattern;
6839 return $pattern;
6840}
6841
6842sub make_static_side_comment_pattern {
6843
6844 # create the pattern used to identify static side comments
6845 $static_side_comment_pattern = '^##';
6846
6847 # allow the user to change it
6848 if ( $rOpts->{'static-side-comment-prefix'} ) {
6849 my $prefix = $rOpts->{'static-side-comment-prefix'};
6850 $prefix =~ s/^\s*//;
6851 my $pattern = '^' . $prefix;
6852 eval "'##'=~/$pattern/";
6853 if ($@) {
6854 die
6855"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
6856 }
6857 $static_side_comment_pattern = $pattern;
6858 }
6859}
6860
6861sub make_closing_side_comment_prefix {
6862
6863 # Be sure we have a valid closing side comment prefix
6864 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
6865 my $csc_prefix_pattern;
6866 if ( !defined($csc_prefix) ) {
6867 $csc_prefix = '## end';
6868 $csc_prefix_pattern = '^##\s+end';
6869 }
6870 else {
6871 my $test_csc_prefix = $csc_prefix;
6872 if ( $test_csc_prefix !~ /^#/ ) {
6873 $test_csc_prefix = '#' . $test_csc_prefix;
6874 }
6875
6876 # make a regex to recognize the prefix
6877 my $test_csc_prefix_pattern = $test_csc_prefix;
6878
6879 # escape any special characters
6880 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
6881
6882 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
6883
6884 # allow exact number of intermediate spaces to vary
6885 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
6886
6887 # make sure we have a good pattern
6888 # if we fail this we probably have an error in escaping
6889 # characters.
6890 eval "'##'=~/$test_csc_prefix_pattern/";
6891 if ($@) {
6892
6893 # shouldn't happen..must have screwed up escaping, above
6894 report_definite_bug();
6895 warn
6896"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
6897
6898 # just warn and keep going with defaults
6899 warn "Please consider using a simpler -cscp prefix\n";
6900 warn "Using default -cscp instead; please check output\n";
6901 }
6902 else {
6903 $csc_prefix = $test_csc_prefix;
6904 $csc_prefix_pattern = $test_csc_prefix_pattern;
6905 }
6906 }
6907 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
6908 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
6909}
6910
6911sub dump_want_left_space {
6912 my $fh = shift;
6913 local $" = "\n";
6914 print $fh <<EOM;
6915These values are the main control of whitespace to the left of a token type;
6916They may be altered with the -wls parameter.
6917For a list of token types, use perltidy --dump-token-types (-dtt)
6918 1 means the token wants a space to its left
6919-1 means the token does not want a space to its left
6920------------------------------------------------------------------------
6921EOM
6922 foreach ( sort keys %want_left_space ) {
6923 print $fh "$_\t$want_left_space{$_}\n";
6924 }
6925}
6926
6927sub dump_want_right_space {
6928 my $fh = shift;
6929 local $" = "\n";
6930 print $fh <<EOM;
6931These values are the main control of whitespace to the right of a token type;
6932They may be altered with the -wrs parameter.
6933For a list of token types, use perltidy --dump-token-types (-dtt)
6934 1 means the token wants a space to its right
6935-1 means the token does not want a space to its right
6936------------------------------------------------------------------------
6937EOM
6938 foreach ( sort keys %want_right_space ) {
6939 print $fh "$_\t$want_right_space{$_}\n";
6940 }
6941}
6942
6943{ # begin is_essential_whitespace
6944
6945 my %is_sort_grep_map;
6946 my %is_for_foreach;
6947
6948 BEGIN {
6949
6950 @_ = qw(sort grep map);
6951 @is_sort_grep_map{@_} = (1) x scalar(@_);
6952
6953 @_ = qw(for foreach);
6954 @is_for_foreach{@_} = (1) x scalar(@_);
6955
6956 }
6957
6958 sub is_essential_whitespace {
6959
6960 # Essential whitespace means whitespace which cannot be safely deleted.
6961 # We are given three tokens and their types:
6962 # ($tokenl, $typel) is the token to the left of the space in question
6963 # ($tokenr, $typer) is the token to the right of the space in question
6964 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
6965 #
6966 # This is a slow routine but is not needed too often except when -mangle
6967 # is used.
6968 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
6969
6970 # never combine two bare words or numbers
6971 my $result = ( ( $tokenr =~ /^[\'\w]/ ) && ( $tokenl =~ /[\'\w]$/ ) )
6972
6973 # do not combine a number with a concatination dot
6974 # example: pom.caputo:
6975 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
6976 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
6977 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
6978
6979 # do not join a minus with a bare word, because you might form
6980 # a file test operator. Example from Complex.pm:
6981 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
6982 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
6983
6984 # and something like this could become ambiguous without space
6985 # after the '-':
6986 # use constant III=>1;
6987 # $a = $b - III;
6988 # and even this:
6989 # $a = - III;
6990 || ( ( $tokenl eq '-' )
6991 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
6992
6993 # '= -' should not become =- or you will get a warning
6994 # about reversed -=
6995 # || ($tokenr eq '-')
6996
6997 # keep a space between a quote and a bareword to prevent the
6998 # bareword from becomming a quote modifier.
6999 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7000
7001 # keep a space between a token ending in '$' and any word;
7002 # this caused trouble: "die @$ if $@"
7003 || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
7004 && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7005
7006 # perl is very fussy about spaces before <<
7007 || ( $tokenr =~ /^\<\</ )
7008
7009 # avoid combining tokens to create new meanings. Example:
7010 # $a+ +$b must not become $a++$b
7011 || ( $is_digraph{ $tokenl . $tokenr } )
7012 || ( $is_trigraph{ $tokenl . $tokenr } )
7013
7014 # another example: do not combine these two &'s:
7015 # allow_options & &OPT_EXECCGI
7016 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
7017
7018 # don't combine $$ or $# with any alphanumeric
7019 # (testfile mangle.t with --mangle)
7020 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
7021
7022 # retain any space after possible filehandle
7023 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
7024 || ( $typel eq 'Z' || $typell eq 'Z' )
7025
7026 # keep paren separate in 'use Foo::Bar ()'
7027 || ( $tokenr eq '('
7028 && $typel eq 'w'
7029 && $typell eq 'k'
7030 && $tokenll eq 'use' )
7031
7032 # keep any space between filehandle and paren:
7033 # file mangle.t with --mangle:
7034 || ( $typel eq 'Y' && $tokenr eq '(' )
7035
7036 # retain any space after here doc operator ( hereerr.t)
7037 || ( $typel eq 'h' )
7038
7039 # FIXME: this needs some further work; extrude.t has test cases
7040 # it is safest to retain any space after start of ? : operator
7041 # because of perl's quirky parser.
7042 # ie, this line will fail if you remove the space after the '?':
7043 # $b=join $comma ? ',' : ':', @_; # ok
7044 # $b=join $comma ?',' : ':', @_; # error!
7045 # but this is ok :)
7046 # $b=join $comma?',' : ':', @_; # not a problem!
7047 ## || ($typel eq '?')
7048
7049 # be careful with a space around ++ and --, to avoid ambiguity as to
7050 # which token it applies
7051 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
7052 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
7053
7054 # need space after foreach my; for example, this will fail in
7055 # older versions of Perl:
7056 # foreach my$ft(@filetypes)...
7057 || (
7058 $tokenl eq 'my'
7059
7060 # /^(for|foreach)$/
7061 && $is_for_foreach{$tokenll} && $tokenr =~ /^\$/
7062 )
7063
7064 # must have space between grep and left paren; "grep(" will fail
7065 || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
7066
7067 # don't stick numbers next to left parens, as in:
7068 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
7069 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
7070
7071 # don't join something like: for bla::bla:: abc
7072 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7073 || ( $tokenl =~ /\:\:$/ && ( $tokenr =~ /^[\'\w]/ ) )
7074 ; # the value of this long logic sequence is the result we want
7075 return $result;
7076 }
7077}
7078
7079sub set_white_space_flag {
7080
7081 # This routine examines each pair of nonblank tokens and
7082 # sets values for array @white_space_flag.
7083 #
7084 # $white_space_flag[$j] is a flag indicating whether a white space
7085 # BEFORE token $j is needed, with the following values:
7086 #
7087 # -1 do not want a space before token $j
7088 # 0 optional space or $j is a whitespace
7089 # 1 want a space before token $j
7090 #
7091 #
7092 # The values for the first token will be defined based
7093 # upon the contents of the "to_go" output array.
7094 #
7095 # Note: retain debug print statements because they are usually
7096 # required after adding new token types.
7097
7098 BEGIN {
7099
7100 # initialize these global hashes, which control the use of
7101 # whitespace around tokens:
7102 #
7103 # %binary_ws_rules
7104 # %want_left_space
7105 # %want_right_space
7106 # %space_after_keyword
7107 #
7108 # Many token types are identical to the tokens themselves.
7109 # See the tokenizer for a complete list. Here are some special types:
7110 # k = perl keyword
7111 # f = semicolon in for statement
7112 # m = unary minus
7113 # p = unary plus
7114 # Note that :: is excluded since it should be contained in an identifier
7115 # Note that '->' is excluded because it never gets space
7116 # parentheses and brackets are excluded since they are handled specially
7117 # curly braces are included but may be overridden by logic, such as
7118 # newline logic.
7119
7120 # NEW_TOKENS: create a whitespace rule here. This can be as
7121 # simple as adding your new letter to @spaces_both_sides, for
7122 # example.
7123
7124 @_ = qw" L { ( [ ";
7125 @is_opening_type{@_} = (1) x scalar(@_);
7126
7127 @_ = qw" R } ) ] ";
7128 @is_closing_type{@_} = (1) x scalar(@_);
7129
7130 my @spaces_both_sides = qw"
7131 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || => += -=
7132 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>=
7133 &&= ||= <=> A k f w F n C Y U G v
7134 ";
7135
7136 my @spaces_left_side = qw"
7137 t ! ~ m p { \ h pp mm Z j
7138 ";
7139 push( @spaces_left_side, '#' ); # avoids warning message
7140
7141 my @spaces_right_side = qw"
7142 ; } ) ] R J ++ -- **=
7143 ";
7144 push( @spaces_right_side, ',' ); # avoids warning message
7145 @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
7146 @want_right_space{@spaces_both_sides} =
7147 (1) x scalar(@spaces_both_sides);
7148 @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side);
7149 @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
7150 @want_left_space{@spaces_right_side} =
7151 (-1) x scalar(@spaces_right_side);
7152 @want_right_space{@spaces_right_side} =
7153 (1) x scalar(@spaces_right_side);
7154 $want_left_space{'L'} = WS_NO;
7155 $want_left_space{'->'} = WS_NO;
7156 $want_right_space{'->'} = WS_NO;
7157 $want_left_space{'**'} = WS_NO;
7158 $want_right_space{'**'} = WS_NO;
7159
7160 # hash type information must stay tightly bound
7161 # as in : ${xxxx}
7162 $binary_ws_rules{'i'}{'L'} = WS_NO;
7163 $binary_ws_rules{'i'}{'{'} = WS_YES;
7164 $binary_ws_rules{'k'}{'{'} = WS_YES;
7165 $binary_ws_rules{'U'}{'{'} = WS_YES;
7166 $binary_ws_rules{'i'}{'['} = WS_NO;
7167 $binary_ws_rules{'R'}{'L'} = WS_NO;
7168 $binary_ws_rules{'R'}{'{'} = WS_NO;
7169 $binary_ws_rules{'t'}{'L'} = WS_NO;
7170 $binary_ws_rules{'t'}{'{'} = WS_NO;
7171 $binary_ws_rules{'}'}{'L'} = WS_NO;
7172 $binary_ws_rules{'}'}{'{'} = WS_NO;
7173 $binary_ws_rules{'$'}{'L'} = WS_NO;
7174 $binary_ws_rules{'$'}{'{'} = WS_NO;
7175 $binary_ws_rules{'@'}{'L'} = WS_NO;
7176 $binary_ws_rules{'@'}{'{'} = WS_NO;
7177 $binary_ws_rules{'='}{'L'} = WS_YES;
7178
7179 # the following includes ') {'
7180 # as in : if ( xxx ) { yyy }
7181 $binary_ws_rules{']'}{'L'} = WS_NO;
7182 $binary_ws_rules{']'}{'{'} = WS_NO;
7183 $binary_ws_rules{')'}{'{'} = WS_YES;
7184 $binary_ws_rules{')'}{'['} = WS_NO;
7185 $binary_ws_rules{']'}{'['} = WS_NO;
7186 $binary_ws_rules{']'}{'{'} = WS_NO;
7187 $binary_ws_rules{'}'}{'['} = WS_NO;
7188 $binary_ws_rules{'R'}{'['} = WS_NO;
7189
7190 $binary_ws_rules{']'}{'++'} = WS_NO;
7191 $binary_ws_rules{']'}{'--'} = WS_NO;
7192 $binary_ws_rules{')'}{'++'} = WS_NO;
7193 $binary_ws_rules{')'}{'--'} = WS_NO;
7194
7195 $binary_ws_rules{'R'}{'++'} = WS_NO;
7196 $binary_ws_rules{'R'}{'--'} = WS_NO;
7197
7198 $binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label
7199 $binary_ws_rules{'w'}{':'} = WS_NO;
7200 $binary_ws_rules{'i'}{'Q'} = WS_YES;
7201 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
7202
7203 # FIXME: we need to split 'i' into variables and functions
7204 # and have no space for functions but space for variables. For now,
7205 # I have a special patch in the special rules below
7206 $binary_ws_rules{'i'}{'('} = WS_NO;
7207
7208 $binary_ws_rules{'w'}{'('} = WS_NO;
7209 $binary_ws_rules{'w'}{'{'} = WS_YES;
7210 }
7211 my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
7212 my ( $last_token, $last_type, $last_block_type, $token, $type,
7213 $block_type );
7214 my (@white_space_flag);
7215 my $j_tight_closing_paren = -1;
7216
7217 if ( $max_index_to_go >= 0 ) {
7218 $token = $tokens_to_go[$max_index_to_go];
7219 $type = $types_to_go[$max_index_to_go];
7220 $block_type = $block_type_to_go[$max_index_to_go];
7221 }
7222 else {
7223 $token = ' ';
7224 $type = 'b';
7225 $block_type = '';
7226 }
7227
7228 # loop over all tokens
7229 my ( $j, $ws );
7230
7231 for ( $j = 0 ; $j <= $jmax ; $j++ ) {
7232
7233 if ( $$rtoken_type[$j] eq 'b' ) {
7234 $white_space_flag[$j] = WS_OPTIONAL;
7235 next;
7236 }
7237
7238 # set a default value, to be changed as needed
7239 $ws = undef;
7240 $last_token = $token;
7241 $last_type = $type;
7242 $last_block_type = $block_type;
7243 $token = $$rtokens[$j];
7244 $type = $$rtoken_type[$j];
7245 $block_type = $$rblock_type[$j];
7246
7247 #---------------------------------------------------------------
7248 # section 1:
7249 # handle space on the inside of opening braces
7250 #---------------------------------------------------------------
7251
7252 # /^[L\{\(\[]$/
7253 if ( $is_opening_type{$last_type} ) {
7254
7255 $j_tight_closing_paren = -1;
7256
7257 # let's keep empty matched braces together: () {} []
7258 # except for BLOCKS
7259 if ( $token eq $matching_token{$last_token} ) {
7260 if ($block_type) {
7261 $ws = WS_YES;
7262 }
7263 else {
7264 $ws = WS_NO;
7265 }
7266 }
7267 else {
7268
7269 # we're considering the right of an opening brace
7270 # tightness = 0 means always pad inside with space
7271 # tightness = 1 means pad inside if "complex"
7272 # tightness = 2 means never pad inside with space
7273
7274 my $tightness;
7275 if ( $last_type eq '{'
7276 && $last_token eq '{'
7277 && $last_block_type )
7278 {
7279 $tightness = $rOpts_block_brace_tightness;
7280 }
7281 else { $tightness = $tightness{$last_token} }
7282
7283 if ( $tightness <= 0 ) {
7284 $ws = WS_YES;
7285 }
7286 elsif ( $tightness > 1 ) {
7287 $ws = WS_NO;
7288 }
7289 else {
7290
7291 # Patch to count '-foo' as single token so that
7292 # each of $a{-foo} and $a{foo} and $a{'foo'} do
7293 # not get spaces with default formatting.
7294 my $j_here = $j;
7295 ++$j_here
7296 if ( $token eq '-'
7297 && $last_token eq '{'
7298 && $$rtoken_type[ $j + 1 ] eq 'w' );
7299
7300 # $j_next is where a closing token should be if
7301 # the container has a single token
7302 my $j_next =
7303 ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
7304 ? $j_here + 2
7305 : $j_here + 1;
7306 my $tok_next = $$rtokens[$j_next];
7307 my $type_next = $$rtoken_type[$j_next];
7308
7309 # for tightness = 1, if there is just one token
7310 # within the matching pair, we will keep it tight
7311 if (
7312 $tok_next eq $matching_token{$last_token}
7313
7314 # but watch out for this: [ [ ] (misc.t)
7315 && $last_token ne $token
7316 )
7317 {
7318
7319 # remember where to put the space for the closing paren
7320 $j_tight_closing_paren = $j_next;
7321 $ws = WS_NO;
7322 }
7323 else {
7324 $ws = WS_YES;
7325 }
7326 }
7327 }
7328 } # done with opening braces and brackets
7329 my $ws_1 = $ws
7330 if FORMATTER_DEBUG_FLAG_WHITE;
7331
7332 #---------------------------------------------------------------
7333 # section 2:
7334 # handle space on inside of closing brace pairs
7335 #---------------------------------------------------------------
7336
7337 # /[\}\)\]R]/
7338 if ( $is_closing_type{$type} ) {
7339
7340 if ( $j == $j_tight_closing_paren ) {
7341
7342 $j_tight_closing_paren = -1;
7343 $ws = WS_NO;
7344 }
7345 else {
7346
7347 if ( !defined($ws) ) {
7348
7349 my $tightness;
7350 if ( $type eq '}' && $token eq '}' && $block_type ) {
7351 $tightness = $rOpts_block_brace_tightness;
7352 }
7353 else { $tightness = $tightness{$token} }
7354
7355 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
7356 }
7357 }
7358 }
7359
7360 my $ws_2 = $ws
7361 if FORMATTER_DEBUG_FLAG_WHITE;
7362
7363 #---------------------------------------------------------------
7364 # section 3:
7365 # use the binary table
7366 #---------------------------------------------------------------
7367 if ( !defined($ws) ) {
7368 $ws = $binary_ws_rules{$last_type}{$type};
7369 }
7370 my $ws_3 = $ws
7371 if FORMATTER_DEBUG_FLAG_WHITE;
7372
7373 #---------------------------------------------------------------
7374 # section 4:
7375 # some special cases
7376 #---------------------------------------------------------------
7377 if ( $token eq '(' ) {
7378
7379 # This will have to be tweaked as tokenization changes.
7380 # We want a space after certain block types:
7381 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
7382 #
7383 # But not others:
7384 # &{ $_->[1] } ( delete $_[$#_]{ $_->[0] } );
7385 # At present, the & block is not marked as a code block, so
7386 # this works:
7387 if ( $last_type eq '}' ) {
7388
7389 if ( $is_sort_map_grep{$last_block_type} ) {
7390 $ws = WS_YES;
7391 }
7392 else {
7393 $ws = WS_NO;
7394 }
7395 }
7396
7397 # -----------------------------------------------------
7398 # 'w' and 'i' checks for something like:
7399 # myfun( &myfun( ->myfun(
7400 # -----------------------------------------------------
7401 if ( ( $last_type =~ /^[wkU]$/ )
7402 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
7403 {
7404
7405 # Do not introduce new space between keyword or function
7406 # ( except in special cases) because this can
7407 # introduce errors in some cases ( prnterr1.t )
7408 unless ( $last_type eq 'k'
7409 && $space_after_keyword{$last_token} )
7410 {
7411 $ws = WS_NO;
7412 }
7413 }
7414
7415 # space between something like $i and ( in
7416 # for $i ( 0 .. 20 ) {
7417 # FIXME: eventually, type 'i' needs to be split into multiple
7418 # token types so this can be a hardwired rule.
7419 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
7420 $ws = WS_YES;
7421 }
7422
7423 # allow constant function followed by '()' to retain no space
7424 elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
7425 ;
7426 $ws = WS_NO;
7427 }
7428 }
7429
7430 # patch for SWITCH/CASE: make space at ']{' optional
7431 # since the '{' might begin a case or when block
7432 elsif ( $token eq '{' && $last_token eq ']' ) {
7433 $ws = WS_OPTIONAL;
7434 }
7435
7436 # keep space between 'sub' and '{' for anonymous sub definition
7437 if ( $type eq '{' ) {
7438 if ( $last_token eq 'sub' ) {
7439 $ws = WS_YES;
7440 }
7441
7442 # this is needed to avoid no space in '){'
7443 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
7444
7445 # avoid any space before the brace or bracket in something like
7446 # @opts{'a','b',...}
7447 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
7448 $ws = WS_NO;
7449 }
7450 }
7451
7452 elsif ( $type eq 'i' ) {
7453
7454 # never a space before ->
7455 if ( $token =~ /^\-\>/ ) {
7456 $ws = WS_NO;
7457 }
7458 }
7459
7460 # retain any space between '-' and bare word
7461 elsif ( $type eq 'w' || $type eq 'C' ) {
7462 $ws = WS_OPTIONAL if $last_type eq '-';
7463
7464 # never a space before ->
7465 if ( $token =~ /^\-\>/ ) {
7466 $ws = WS_NO;
7467 }
7468 }
7469
7470 # retain any space between '-' and bare word
7471 # example: avoid space between 'USER' and '-' here:
7472 # $myhash{USER-NAME}='steve';
7473 elsif ( $type eq 'm' || $type eq '-' ) {
7474 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
7475 }
7476
7477 # always space before side comment
7478 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
7479
7480 # always preserver whatever space was used after a possible
7481 # filehandle or here doc operator
7482 if ( $type ne '#' && ( $last_type eq 'Z' || $last_type eq 'h' ) ) {
7483 $ws = WS_OPTIONAL;
7484 }
7485
7486 my $ws_4 = $ws
7487 if FORMATTER_DEBUG_FLAG_WHITE;
7488
7489 #---------------------------------------------------------------
7490 # section 5:
7491 # default rules not covered above
7492 #---------------------------------------------------------------
7493 # if we fall through to here,
7494 # look at the pre-defined hash tables for the two tokens, and
7495 # if (they are equal) use the common value
7496 # if (either is zero or undef) use the other
7497 # if (either is -1) use it
7498 # That is,
7499 # left vs right
7500 # 1 vs 1 --> 1
7501 # 0 vs 0 --> 0
7502 # -1 vs -1 --> -1
7503 #
7504 # 0 vs -1 --> -1
7505 # 0 vs 1 --> 1
7506 # 1 vs 0 --> 1
7507 # -1 vs 0 --> -1
7508 #
7509 # -1 vs 1 --> -1
7510 # 1 vs -1 --> -1
7511 if ( !defined($ws) ) {
7512 my $wl = $want_left_space{$type};
7513 my $wr = $want_right_space{$last_type};
7514 if ( !defined($wl) ) { $wl = 0 }
7515 if ( !defined($wr) ) { $wr = 0 }
7516 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
7517 }
7518
7519 if ( !defined($ws) ) {
7520 $ws = 0;
7521 write_diagnostics(
7522 "WS flag is undefined for tokens $last_token $token\n");
7523 }
7524
7525 # Treat newline as a whitespace. Otherwise, we might combine
7526 # 'Send' and '-recipients' here according to the above rules:
7527 # my $msg = new Fax::Send
7528 # -recipients => $to,
7529 # -data => $data;
7530 if ( $ws == 0 && $j == 0 ) { $ws = 1 }
7531
7532 if ( ( $ws == 0 )
7533 && $j > 0
7534 && $j < $jmax
7535 && ( $last_type !~ /^[Zh]$/ ) )
7536 {
7537
7538 # If this happens, we have a non-fatal but undesirable
7539 # hole in the above rules which should be patched.
7540 write_diagnostics(
7541 "WS flag is zero for tokens $last_token $token\n");
7542 }
7543 $white_space_flag[$j] = $ws;
7544
7545 FORMATTER_DEBUG_FLAG_WHITE && do {
7546 my $str = substr( $last_token, 0, 15 );
7547 $str .= ' ' x ( 16 - length($str) );
7548 if ( !defined($ws_1) ) { $ws_1 = "*" }
7549 if ( !defined($ws_2) ) { $ws_2 = "*" }
7550 if ( !defined($ws_3) ) { $ws_3 = "*" }
7551 if ( !defined($ws_4) ) { $ws_4 = "*" }
7552 print
7553"WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
7554 };
7555 }
7556 return \@white_space_flag;
7557}
7558
7559{ # begin print_line_of_tokens
7560
7561 my $rtoken_type;
7562 my $rtokens;
7563 my $rlevels;
7564 my $rslevels;
7565 my $rblock_type;
7566 my $rcontainer_type;
7567 my $rcontainer_environment;
7568 my $rtype_sequence;
7569 my $input_line;
7570 my $rnesting_tokens;
7571 my $rci_levels;
7572 my $rnesting_blocks;
7573
7574 my $in_quote;
7575 my $python_indentation_level;
7576
7577 # These local token variables are stored by store_token_to_go:
7578 my $block_type;
7579 my $ci_level;
7580 my $container_environment;
7581 my $container_type;
7582 my $in_continued_quote;
7583 my $level;
7584 my $nesting_blocks;
7585 my $no_internal_newlines;
7586 my $slevel;
7587 my $token;
7588 my $type;
7589 my $type_sequence;
7590
7591 # routine to pull the jth token from the line of tokens
7592 sub extract_token {
7593 my $j = shift;
7594 $token = $$rtokens[$j];
7595 $type = $$rtoken_type[$j];
7596 $block_type = $$rblock_type[$j];
7597 $container_type = $$rcontainer_type[$j];
7598 $container_environment = $$rcontainer_environment[$j];
7599 $type_sequence = $$rtype_sequence[$j];
7600 $level = $$rlevels[$j];
7601 $slevel = $$rslevels[$j];
7602 $nesting_blocks = $$rnesting_blocks[$j];
7603 $ci_level = $$rci_levels[$j];
7604 }
7605
7606 {
7607 my @saved_token;
7608
7609 sub save_current_token {
7610
7611 @saved_token = (
7612 $block_type, $ci_level,
7613 $container_environment, $container_type,
7614 $in_continued_quote, $level,
7615 $nesting_blocks, $no_internal_newlines,
7616 $slevel, $token,
7617 $type, $type_sequence,
7618 );
7619 }
7620
7621 sub restore_current_token {
7622 (
7623 $block_type, $ci_level,
7624 $container_environment, $container_type,
7625 $in_continued_quote, $level,
7626 $nesting_blocks, $no_internal_newlines,
7627 $slevel, $token,
7628 $type, $type_sequence,
7629 )
7630 = @saved_token;
7631 }
7632 }
7633
7634 # Routine to place the current token into the output stream.
7635 # Called once per output token.
7636 sub store_token_to_go {
7637
7638 my $flag = $no_internal_newlines;
7639 if ( $_[0] ) { $flag = 1 }
7640
7641 $tokens_to_go[ ++$max_index_to_go ] = $token;
7642 $types_to_go[$max_index_to_go] = $type;
7643 $nobreak_to_go[$max_index_to_go] = $flag;
7644 $old_breakpoint_to_go[$max_index_to_go] = 0;
7645 $forced_breakpoint_to_go[$max_index_to_go] = 0;
7646 $block_type_to_go[$max_index_to_go] = $block_type;
7647 $type_sequence_to_go[$max_index_to_go] = $type_sequence;
7648 $container_environment_to_go[$max_index_to_go] = $container_environment;
7649 $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks;
7650 $ci_levels_to_go[$max_index_to_go] = $ci_level;
7651 $mate_index_to_go[$max_index_to_go] = -1;
7652 $matching_token_to_go[$max_index_to_go] = '';
7653
7654 # Note: negative levels are currently retained as a diagnostic so that
7655 # the 'final indentation level' is correctly reported for bad scripts.
7656 # But this means that every use of $level as an index must be checked.
7657 # If this becomes too much of a problem, we might give up and just clip
7658 # them at zero.
7659 ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
7660 $levels_to_go[$max_index_to_go] = $level;
7661 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
7662 $lengths_to_go[ $max_index_to_go + 1 ] =
7663 $lengths_to_go[$max_index_to_go] + length($token);
7664
7665 # Define the indentation that this token would have if it started
7666 # a new line. We have to do this now because we need to know this
7667 # when considering one-line blocks.
7668 set_leading_whitespace( $level, $ci_level, $in_continued_quote );
7669
7670 if ( $type ne 'b' ) {
7671 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
7672 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
7673 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
7674 $last_nonblank_index_to_go = $max_index_to_go;
7675 $last_nonblank_type_to_go = $type;
7676 $last_nonblank_token_to_go = $token;
7677 if ( $type eq ',' ) {
7678 $comma_count_in_batch++;
7679 }
7680 }
7681
7682 FORMATTER_DEBUG_FLAG_STORE && do {
7683 my ( $a, $b, $c ) = caller();
7684 print
7685"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
7686 };
7687 }
7688
7689 sub insert_new_token_to_go {
7690
7691 # insert a new token into the output stream. use same level as
7692 # previous token; assumes a character at max_index_to_go.
7693 save_current_token();
7694 ( $token, $type, $slevel, $no_internal_newlines ) = @_;
7695
7696 if ( $max_index_to_go == UNDEFINED_INDEX ) {
7697 warning("code bug: bad call to insert_new_token_to_go\n");
7698 }
7699 $level = $levels_to_go[$max_index_to_go];
7700
7701 # FIXME: it seems to be necessary to use the next, rather than
7702 # previous, value of this variable when creating a new blank (align.t)
7703 #my $slevel = $nesting_depth_to_go[$max_index_to_go];
7704 $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
7705 $ci_level = $ci_levels_to_go[$max_index_to_go];
7706 $container_environment = $container_environment_to_go[$max_index_to_go];
7707 $in_continued_quote = 0;
7708 $block_type = "";
7709 $type_sequence = "";
7710 store_token_to_go();
7711 restore_current_token();
7712 return;
7713 }
7714
7715 my %is_until_while_for_if_elsif_else;
7716
7717 BEGIN {
7718
7719 # always break after a closing curly of these block types:
7720 @_ = qw(until while for if elsif else);
7721 @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
7722
7723 }
7724
7725 sub print_line_of_tokens {
7726
7727 my $line_of_tokens = shift;
7728
7729 # This routine is called once per input line to process all of
7730 # the tokens on that line. This is the first stage of
7731 # beautification.
7732 #
7733 # Full-line comments and blank lines may be processed immediately.
7734 #
7735 # For normal lines of code, the tokens are stored one-by-one,
7736 # via calls to 'sub store_token_to_go', until a known line break
7737 # point is reached. Then, the batch of collected tokens is
7738 # passed along to 'sub output_line_to_go' for further
7739 # processing. This routine decides if there should be
7740 # whitespace between each pair of non-white tokens, so later
7741 # routines only need to decide on any additional line breaks.
7742 # Any whitespace is initally a single space character. Later,
7743 # the vertical aligner may expand that to be multiple space
7744 # characters if necessary for alignment.
7745
7746 # extract input line number for error messages
7747 $input_line_number = $line_of_tokens->{_line_number};
7748
7749 $rtoken_type = $line_of_tokens->{_rtoken_type};
7750 $rtokens = $line_of_tokens->{_rtokens};
7751 $rlevels = $line_of_tokens->{_rlevels};
7752 $rslevels = $line_of_tokens->{_rslevels};
7753 $rblock_type = $line_of_tokens->{_rblock_type};
7754 $rcontainer_type = $line_of_tokens->{_rcontainer_type};
7755 $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
7756 $rtype_sequence = $line_of_tokens->{_rtype_sequence};
7757 $input_line = $line_of_tokens->{_line_text};
7758 $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
7759 $rci_levels = $line_of_tokens->{_rci_levels};
7760 $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
7761
7762 $in_continued_quote = $starting_in_quote =
7763 $line_of_tokens->{_starting_in_quote};
7764 $in_quote = $line_of_tokens->{_ending_in_quote};
7765 $python_indentation_level =
7766 $line_of_tokens->{_python_indentation_level};
7767
7768 my $j;
7769 my $j_next;
7770 my $jmax;
7771 my $next_nonblank_token;
7772 my $next_nonblank_token_type;
7773 my $rwhite_space_flag;
7774
7775 $jmax = @$rtokens - 1;
7776 $block_type = "";
7777 $container_type = "";
7778 $container_environment = "";
7779 $type_sequence = "";
7780 $no_internal_newlines = 1 - $rOpts_add_newlines;
7781
7782 # Handle a continued quote..
7783 if ($in_continued_quote) {
7784
7785 # A line which is entirely a quote or pattern must go out
7786 # verbatim. Note: the \n is contained in $input_line.
7787 if ( $jmax <= 0 ) {
7788 if ( ( $input_line =~ "\t" ) ) {
7789 note_embedded_tab();
7790 }
7791 write_unindented_line("$input_line");
7792 $last_line_had_side_comment = 0;
7793 return;
7794 }
7795
7796 # prior to version 20010406, perltidy had a bug which placed
7797 # continuation indentation before the last line of some multiline
7798 # quotes and patterns -- exactly the lines passing this way.
7799 # To help find affected lines in scripts run with these
7800 # versions, run with '-chk', and it will warn of any quotes or
7801 # patterns which might have been modified by these early
7802 # versions.
7803 if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
7804 warning(
7805"-chk: please check this line for extra leading whitespace\n"
7806 );
7807 }
7808 }
7809
7810 # delete trailing blank tokens
7811 if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
7812
7813 # Handle a blank line..
7814 if ( $jmax < 0 ) {
7815
7816 # For the 'swallow-optional-blank-lines' option, we delete all
7817 # old blank lines and let the blank line rules generate any
7818 # needed blanks.
7819 if ( !$rOpts_swallow_optional_blank_lines ) {
7820 flush();
7821 $file_writer_object->write_blank_code_line();
7822 $last_line_leading_type = 'b';
7823 }
7824 $last_line_had_side_comment = 0;
7825 return;
7826 }
7827
7828 # see if this is a static block comment (starts with ##)
7829 my $is_static_block_comment = 0;
7830 my $is_static_block_comment_without_leading_space = 0;
7831 if ( $jmax == 0
7832 && $$rtoken_type[0] eq '#'
7833 && $rOpts->{'static-block-comments'}
7834 && $input_line =~ /$static_block_comment_pattern/o )
7835 {
7836 $is_static_block_comment = 1;
7837 $is_static_block_comment_without_leading_space =
7838 ( length($1) <= 0 );
7839 }
7840
7841 # create a hanging side comment if appropriate
7842 if (
7843 $jmax == 0
7844 && $$rtoken_type[0] eq '#' # only token is a comment
7845 && $last_line_had_side_comment # last line had side comment
7846 && $input_line =~ /^\s/ # there is some leading space
7847 && !$is_static_block_comment # do not make static comment hanging
7848 && $rOpts->{'hanging-side-comments'} # user is allowing this
7849 )
7850 {
7851
7852 # We will insert an empty qw string at the start of the token list
7853 # to force this comment to be a side comment. The vertical aligner
7854 # should then line it up with the previous side comment.
7855 unshift @$rtoken_type, 'q';
7856 unshift @$rtokens, '';
7857 unshift @$rlevels, $$rlevels[0];
7858 unshift @$rslevels, $$rslevels[0];
7859 unshift @$rblock_type, '';
7860 unshift @$rcontainer_type, '';
7861 unshift @$rcontainer_environment, '';
7862 unshift @$rtype_sequence, '';
7863 unshift @$rnesting_tokens, $$rnesting_tokens[0];
7864 unshift @$rci_levels, $$rci_levels[0];
7865 unshift @$rnesting_blocks, $$rnesting_blocks[0];
7866 $jmax = 1;
7867 }
7868
7869 # remember if this line has a side comment
7870 $last_line_had_side_comment =
7871 ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
7872
7873 # Handle a block (full-line) comment..
7874 if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
7875
7876 if ( $rOpts->{'delete-block-comments'} ) { return }
7877
7878 if ( $rOpts->{'tee-block-comments'} ) {
7879 $file_writer_object->tee_on();
7880 }
7881
7882 destroy_one_line_block();
7883 output_line_to_go();
7884
7885 # output a blank line before block comments
7886 if (
7887 $last_line_leading_type !~ /^[#b]$/
7888 && $rOpts->{'blanks-before-comments'} # only if allowed
7889 && !
7890 $is_static_block_comment # never before static block comments
7891 )
7892 {
7893 flush(); # switching to new output stream
7894 $file_writer_object->write_blank_code_line();
7895 $last_line_leading_type = 'b';
7896 }
7897
7898 # TRIM COMMENTS -- This could be turned off as a option
7899 $$rtokens[0] =~ s/\s*$//; # trim right end
7900
7901 if (
7902 $rOpts->{'indent-block-comments'}
7903 && ( !$rOpts->{'indent-spaced-block-comments'}
7904 || $input_line =~ /^\s+/ )
7905 && !$is_static_block_comment_without_leading_space
7906 )
7907 {
7908 extract_token(0);
7909 store_token_to_go();
7910 output_line_to_go();
7911 }
7912 else {
7913 flush(); # switching to new output stream
7914 $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
7915 $last_line_leading_type = '#';
7916 }
7917 if ( $rOpts->{'tee-block-comments'} ) {
7918 $file_writer_object->tee_off();
7919 }
7920 return;
7921 }
7922
7923 # compare input/output indentation except for continuation lines
7924 # (because they have an unknown amount of initial blank space)
7925 # and lines which are quotes (because they may have been outdented)
7926 # Note: this test is placed here because we know the continuation flag
7927 # at this point, which allows us to avoid non-meaningful checks.
7928 my $structural_indentation_level = $$rlevels[0];
7929 compare_indentation_levels( $python_indentation_level,
7930 $structural_indentation_level )
7931 unless ( $python_indentation_level < 0
7932 || ( $$rci_levels[0] > 0 )
7933 || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
7934 );
7935
7936 # Patch needed for MakeMaker. Do not break a statement
7937 # in which $VERSION may be calculated. See MakeMaker.pm;
7938 # this is based on the coding in it.
7939 # The first line of a file that matches this will be eval'd:
7940 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
7941 # Examples:
7942 # *VERSION = \'1.01';
7943 # ( $VERSION ) = '$Revision: 1.46 $ ' =~ /\$Revision:\s+([^\s]+)/;
7944 # We will pass such a line straight through without breaking
7945 # it unless -npvl is used
7946
7947 my $is_VERSION_statement = 0;
7948
7949 if (
7950 !$saw_VERSION_in_this_file
7951 && $input_line =~ /VERSION/ # quick check to reject most lines
7952 && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
7953 )
7954 {
7955 $saw_VERSION_in_this_file = 1;
7956 $is_VERSION_statement = 1;
7957 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
7958 $no_internal_newlines = 1;
7959 }
7960
7961 # take care of indentation-only
7962 # also write a line which is entirely a 'qw' list
7963 if ( $rOpts->{'indent-only'}
7964 || ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq 'q' ) ) )
7965 {
7966 flush();
7967 $input_line =~ s/^\s*//; # trim left end
7968 $input_line =~ s/\s*$//; # trim right end
7969
7970 extract_token(0);
7971 $token = $input_line;
7972 $type = 'q';
7973 $block_type = "";
7974 $container_type = "";
7975 $container_environment = "";
7976 $type_sequence = "";
7977 store_token_to_go();
7978 output_line_to_go();
7979 return;
7980 }
7981
7982 push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding
7983 push( @$rtoken_type, 'b', 'b' );
7984 ($rwhite_space_flag) =
7985 set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
7986
7987 # find input tabbing to allow checks for tabbing disagreement
7988 ## not used for now
7989 ##$input_line_tabbing = "";
7990 ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
7991
7992 # if the buffer hasn't been flushed, add a leading space if
7993 # necessary to keep essential whitespace. This is really only
7994 # necessary if we are squeezing out all ws.
7995 if ( $max_index_to_go >= 0 ) {
7996
7997 $old_line_count_in_batch++;
7998
7999 if (
8000 is_essential_whitespace(
8001 $last_last_nonblank_token,
8002 $last_last_nonblank_type,
8003 $tokens_to_go[$max_index_to_go],
8004 $types_to_go[$max_index_to_go],
8005 $$rtokens[0],
8006 $$rtoken_type[0]
8007 )
8008 )
8009 {
8010 my $slevel = $$rslevels[0];
8011 insert_new_token_to_go( ' ', 'b', $slevel,
8012 $no_internal_newlines );
8013 }
8014 }
8015
8016 # If we just saw the end of an elsif block, write nag message
8017 # if we do not see another elseif or an else.
8018 if ($looking_for_else) {
8019
8020 unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
8021 write_logfile_entry("(No else block)\n");
8022 }
8023 $looking_for_else = 0;
8024 }
8025
8026 # This is a good place to kill incomplete one-line blocks
8027 if ( ( $semicolons_before_block_self_destruct == 0 )
8028 && ( $max_index_to_go >= 0 )
8029 && ( $types_to_go[$max_index_to_go] eq ';' )
8030 && ( $$rtokens[0] ne '}' ) )
8031 {
8032 destroy_one_line_block();
8033 output_line_to_go();
8034 }
8035
8036 # loop to process the tokens one-by-one
8037 $type = 'b';
8038 $token = "";
8039
8040 foreach $j ( 0 .. $jmax ) {
8041
8042 # pull out the local values for this token
8043 extract_token($j);
8044
8045 if ( $type eq '#' ) {
8046
8047 # trim trailing whitespace
8048 # (there is no option at present to prevent this)
8049 $token =~ s/\s*$//;
8050
8051 if (
8052 $rOpts->{'delete-side-comments'}
8053
8054 # delete closing side comments if necessary
8055 || ( $rOpts->{'delete-closing-side-comments'}
8056 && $token =~ /$closing_side_comment_prefix_pattern/o
8057 && $last_nonblank_block_type =~
8058 /$closing_side_comment_list_pattern/o )
8059 )
8060 {
8061 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8062 unstore_token_to_go();
8063 }
8064 last;
8065 }
8066 }
8067
8068 # If we are continuing after seeing a right curly brace, flush
8069 # buffer unless we see what we are looking for, as in
8070 # } else ...
8071 if ( $rbrace_follower && $type ne 'b' ) {
8072
8073 unless ( $rbrace_follower->{$token} ) {
8074 output_line_to_go();
8075 }
8076 $rbrace_follower = undef;
8077 }
8078
8079 $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
8080 $next_nonblank_token = $$rtokens[$j_next];
8081 $next_nonblank_token_type = $$rtoken_type[$j_next];
8082
8083 #--------------------------------------------------------
8084 # Start of section to patch token text
8085 #--------------------------------------------------------
8086
8087 # Modify certain tokens here for whitespace
8088 # The following is not yet done, but could be:
8089 # sub (x x x)
8090 if ( $type =~ /^[wit]$/ ) {
8091
8092 # Examples:
8093 # change '$ var' to '$var' etc
8094 # '-> new' to '->new'
8095 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
8096 $token =~ s/\s*//g;
8097 }
8098
8099 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
8100 }
8101
8102 # change 'LABEL :' to 'LABEL:'
8103 elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
8104
8105 # patch to add space to something like "x10"
8106 # This avoids having to split this token in the pre-tokenizer
8107 elsif ( $type eq 'n' ) {
8108 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
8109 }
8110
8111 elsif ( $type eq 'Q' ) {
8112 note_embedded_tab() if ( $token =~ "\t" );
8113
8114 # make note of something like '$var = s/xxx/yyy/;'
8115 # in case it should have been '$var =~ s/xxx/yyy/;'
8116 if (
8117 $token =~ /^(s|tr|y|m|\/)/
8118 && $last_nonblank_token =~ /^(=|==|!=)$/
8119
8120 # precededed by simple scalar
8121 && $last_last_nonblank_type eq 'i'
8122 && $last_last_nonblank_token =~ /^\$/
8123
8124 # followed by some kind of termination
8125 # (but give complaint if we can's see far enough ahead)
8126 && $next_nonblank_token =~ /^[; \)\}]$/
8127
8128 # scalar is not decleared
8129 && !(
8130 $types_to_go[0] eq 'k'
8131 && $tokens_to_go[0] =~ /^(my|our|local)$/
8132 )
8133 )
8134 {
8135 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
8136 complain(
8137"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
8138 );
8139 }
8140 }
8141
8142 # trim blanks from right of qw quotes
8143 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
8144 elsif ( $type eq 'q' ) {
8145 $token =~ s/\s*$//;
8146 note_embedded_tab() if ( $token =~ "\t" );
8147 }
8148
8149 #--------------------------------------------------------
8150 # End of section to patch token text
8151 #--------------------------------------------------------
8152
8153 # insert any needed whitespace
8154 if ( ( $type ne 'b' )
8155 && ( $max_index_to_go >= 0 )
8156 && ( $types_to_go[$max_index_to_go] ne 'b' )
8157 && $rOpts_add_whitespace )
8158 {
8159 my $ws = $$rwhite_space_flag[$j];
8160
8161 if ( $ws == 1 ) {
8162 insert_new_token_to_go( ' ', 'b', $slevel,
8163 $no_internal_newlines );
8164 }
8165 }
8166
8167 # Do not allow breaks which would promote a side comment to a
8168 # block comment. In order to allow a break before an opening
8169 # or closing BLOCK, followed by a side comment, those sections
8170 # of code will handle this flag separately.
8171 my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
8172 my $is_opening_BLOCK =
8173 ( $type eq '{'
8174 && $token eq '{'
8175 && $block_type
8176 && $block_type ne 't' );
8177 my $is_closing_BLOCK =
8178 ( $type eq '}'
8179 && $token eq '}'
8180 && $block_type
8181 && $block_type ne 't' );
8182
8183 if ( $side_comment_follows
8184 && !$is_opening_BLOCK
8185 && !$is_closing_BLOCK )
8186 {
8187 $no_internal_newlines = 1;
8188 }
8189
8190 # We're only going to handle breaking for code BLOCKS at this
8191 # (top) level. Other indentation breaks will be handled by
8192 # sub scan_list, which is better suited to dealing with them.
8193 if ($is_opening_BLOCK) {
8194
8195 # Tentatively output this token. This is required before
8196 # calling starting_one_line_block. We may have to unstore
8197 # it, though, if we have to break before it.
8198 store_token_to_go($side_comment_follows);
8199
8200 # Look ahead to see if we might form a one-line block
8201 my $too_long =
8202 starting_one_line_block( $j, $jmax, $level, $slevel,
8203 $ci_level, $rtokens, $rtoken_type, $rblock_type );
8204 clear_breakpoint_undo_stack();
8205
8206 # to simplify the logic below, set a flag to indicate if
8207 # this opening brace is far from the keyword which introduces it
8208 my $keyword_on_same_line = 1;
8209 if ( ( $max_index_to_go >= 0 )
8210 && ( $last_nonblank_type eq ')' ) )
8211 {
8212 if ( $block_type =~ /^(if|else|elsif)$/
8213 && ( $tokens_to_go[0] eq '}' )
8214 && $rOpts_cuddled_else )
8215 {
8216 $keyword_on_same_line = 1;
8217 }
8218 elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
8219 {
8220 $keyword_on_same_line = 0;
8221 }
8222 }
8223
8224 # decide if user requested break before '{'
8225 my $want_break =
8226
8227 # use -bl flag if not a sub block of any type
8228 $block_type !~ /^sub/
8229 ? $rOpts->{'opening-brace-on-new-line'}
8230
8231 # use -sbl flag unless this is an anonymous sub block
8232 : $block_type !~ /^sub\W*$/
8233 ? $rOpts->{'opening-sub-brace-on-new-line'}
8234
8235 # do not break for anonymous subs
8236 : 0;
8237
8238 # Break before an opening '{' ...
8239 if (
8240
8241 # if requested
8242 $want_break
8243
8244 # and we were unable to start looking for a block,
8245 && $index_start_one_line_block == UNDEFINED_INDEX
8246
8247 # or if it will not be on same line as its keyword, so that
8248 # it will be outdented (eval.t, overload.t), and the user
8249 # has not insisted on keeping it on the right
8250 || ( !$keyword_on_same_line
8251 && !$rOpts->{'opening-brace-always-on-right'} )
8252
8253 )
8254 {
8255
8256 # but only if allowed
8257 unless ($no_internal_newlines) {
8258
8259 # since we already stored this token, we must unstore it
8260 unstore_token_to_go();
8261
8262 # then output the line
8263 output_line_to_go();
8264
8265 # and now store this token at the start of a new line
8266 store_token_to_go($side_comment_follows);
8267 }
8268 }
8269
8270 # Now update for side comment
8271 if ($side_comment_follows) { $no_internal_newlines = 1 }
8272
8273 # now output this line
8274 unless ($no_internal_newlines) {
8275 output_line_to_go();
8276 }
8277 }
8278
8279 elsif ($is_closing_BLOCK) {
8280
8281 # If there is a pending one-line block ..
8282 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8283
8284 # we have to terminate it if..
8285 if (
8286
8287 # it is too long (final length may be different from
8288 # initial estimate). note: must allow 1 space for this token
8289 excess_line_length( $index_start_one_line_block,
8290 $max_index_to_go ) >= 0
8291
8292 # or if it has too many semicolons
8293 || ( $semicolons_before_block_self_destruct == 0
8294 && $last_nonblank_type ne ';' )
8295 )
8296 {
8297 destroy_one_line_block();
8298 }
8299 }
8300
8301 # put a break before this closing curly brace if appropriate
8302 unless ( $no_internal_newlines
8303 || $index_start_one_line_block != UNDEFINED_INDEX )
8304 {
8305
8306 # add missing semicolon if ...
8307 # there are some tokens
8308 if (
8309 ( $max_index_to_go > 0 )
8310
8311 # and we don't have one
8312 && ( $last_nonblank_type ne ';' )
8313
8314 # patch until some block type issues are fixed:
8315 # Do not add semi-colon for block types '{',
8316 # '}', and ';' because we cannot be sure yet
8317 # that this is a block and not an anonomyous
8318 # hash (blktype.t, blktype1.t)
8319 && ( $block_type !~ /^[\{\};]$/ )
8320
8321 # it seems best not to add semicolons in these
8322 # special block types: sort|map|grep
8323 && ( !$is_sort_map_grep{$block_type} )
8324
8325 # and we are allowed to do so.
8326 && $rOpts->{'add-semicolons'}
8327 )
8328 {
8329
8330 save_current_token();
8331 $token = ';';
8332 $type = ';';
8333 $level = $levels_to_go[$max_index_to_go];
8334 $slevel = $nesting_depth_to_go[$max_index_to_go];
8335 $nesting_blocks =
8336 $nesting_blocks_to_go[$max_index_to_go];
8337 $ci_level = $ci_levels_to_go[$max_index_to_go];
8338 $block_type = "";
8339 $container_type = "";
8340 $container_environment = "";
8341 $type_sequence = "";
8342
8343 # Note - we remove any blank AFTER extracting its
8344 # parameters such as level, etc, above
8345 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8346 unstore_token_to_go();
8347 }
8348 store_token_to_go();
8349
8350 note_added_semicolon();
8351 restore_current_token();
8352 }
8353
8354 # then write out everything before this closing curly brace
8355 output_line_to_go();
8356
8357 }
8358
8359 # Now update for side comment
8360 if ($side_comment_follows) { $no_internal_newlines = 1 }
8361
8362 # store the closing curly brace
8363 store_token_to_go();
8364
8365 # ok, we just stored a closing curly brace. Often, but
8366 # not always, we want to end the line immediately.
8367 # So now we have to check for special cases.
8368
8369 # if this '}' successfully ends a one-line block..
8370 my $is_one_line_block = 0;
8371 my $keep_going = 0;
8372 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8373
8374 # Remember the type of token just before the
8375 # opening brace. It would be more general to use
8376 # a stack, but this will work for one-line blocks.
8377 $is_one_line_block =
8378 $types_to_go[$index_start_one_line_block];
8379
8380 # we have to actually make it by removing tentative
8381 # breaks that were set within it
8382 undo_forced_breakpoint_stack(0);
8383 set_nobreaks( $index_start_one_line_block,
8384 $max_index_to_go - 1 );
8385
8386 # then re-initialize for the next one-line block
8387 destroy_one_line_block();
8388
8389 # then decide if we want to break after the '}' ..
8390 # We will keep going to allow certain brace followers as in:
8391 # do { $ifclosed = 1; last } unless $losing;
8392 #
8393 # But make a line break if the curly ends a
8394 # significant block:
8395 if ( $is_until_while_for_if_elsif_else{$block_type} ) {
8396 output_line_to_go() unless ($no_internal_newlines);
8397 }
8398 }
8399
8400 # set string indicating what we need to look for brace follower
8401 # tokens
8402 if ( $block_type eq 'do' ) {
8403 $rbrace_follower = \%is_do_follower;
8404 }
8405 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
8406 $rbrace_follower = \%is_if_brace_follower;
8407 }
8408 elsif ( $block_type eq 'else' ) {
8409 $rbrace_follower = \%is_else_brace_follower;
8410 }
8411
8412 # added eval for borris.t
8413 elsif ($is_sort_map_grep_eval{$block_type}
8414 || $is_one_line_block eq 'G' )
8415 {
8416 $rbrace_follower = undef;
8417 $keep_going = 1;
8418 }
8419
8420 # anonymous sub
8421 elsif ( $block_type =~ /^sub\W*$/ ) {
8422
8423 if ($is_one_line_block) {
8424 $rbrace_follower = \%is_anon_sub_1_brace_follower;
8425 }
8426 else {
8427 $rbrace_follower = \%is_anon_sub_brace_follower;
8428 }
8429 }
8430
8431 # TESTING ONLY for SWITCH/CASE - this is where to start
8432 # recoding to retain else's on the same line as a case,
8433 # but there is a lot more that would need to be done.
8434 ##elsif ($block_type eq 'case') {$rbrace_follower = {else=>1};}
8435
8436 # None of the above: specify what can follow a closing
8437 # brace of a block which is not an
8438 # if/elsif/else/do/sort/map/grep/eval
8439 # Testfiles:
8440 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
8441 else {
8442 $rbrace_follower = \%is_other_brace_follower;
8443 }
8444
8445 # See if an elsif block is followed by another elsif or else;
8446 # complain if not.
8447 if ( $block_type eq 'elsif' ) {
8448
8449 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
8450 $looking_for_else = 1; # ok, check on next line
8451 }
8452 else {
8453
8454 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
8455 write_logfile_entry("No else block :(\n");
8456 }
8457 }
8458 }
8459
8460 # keep going after certain block types (map,sort,grep,eval)
8461 # added eval for borris.t
8462 if ($keep_going) {
8463
8464 # keep going
8465 }
8466
8467 # if no more tokens, postpone decision until re-entring
8468 elsif ( ( $next_nonblank_token_type eq 'b' )
8469 && $rOpts_add_newlines )
8470 {
8471 unless ($rbrace_follower) {
8472 output_line_to_go() unless ($no_internal_newlines);
8473 }
8474 }
8475
8476 elsif ($rbrace_follower) {
8477
8478 unless ( $rbrace_follower->{$next_nonblank_token} ) {
8479 output_line_to_go() unless ($no_internal_newlines);
8480 }
8481 $rbrace_follower = undef;
8482 }
8483
8484 else {
8485 output_line_to_go() unless ($no_internal_newlines);
8486 }
8487
8488 } # end treatment of closing block token
8489
8490 # handle semicolon
8491 elsif ( $type eq ';' ) {
8492
8493 # kill one-line blocks with too many semicolons
8494 $semicolons_before_block_self_destruct--;
8495 if (
8496 ( $semicolons_before_block_self_destruct < 0 )
8497 || ( $semicolons_before_block_self_destruct == 0
8498 && $next_nonblank_token_type !~ /^[b\}]$/ )
8499 )
8500 {
8501 destroy_one_line_block();
8502 }
8503
8504 # Remove unnecessary semicolons, but not after bare
8505 # blocks, where it could be unsafe if the brace is
8506 # mistokenized.
8507 if (
8508 (
8509 $last_nonblank_token eq '}'
8510 && (
8511 $is_block_without_semicolon{
8512 $last_nonblank_block_type}
8513 || $last_nonblank_block_type =~ /^sub\s+\w/
8514 || $last_nonblank_block_type =~ /^\w+:$/ )
8515 )
8516 || $last_nonblank_type eq ';'
8517 )
8518 {
8519
8520 if (
8521 $rOpts->{'delete-semicolons'}
8522
8523 # don't delete ; before a # because it would promote it
8524 # to a block comment
8525 && ( $next_nonblank_token_type ne '#' )
8526 )
8527 {
8528 note_deleted_semicolon();
8529 output_line_to_go()
8530 unless ( $no_internal_newlines
8531 || $index_start_one_line_block != UNDEFINED_INDEX );
8532 next;
8533 }
8534 else {
8535 write_logfile_entry("Extra ';'\n");
8536 }
8537 }
8538 store_token_to_go();
8539
8540 output_line_to_go()
8541 unless ( $no_internal_newlines
8542 || ( $next_nonblank_token eq '}' ) );
8543
8544 }
8545
8546 # handle here_doc target string
8547 elsif ( $type eq 'h' ) {
8548 $no_internal_newlines =
8549 1; # no newlines after seeing here-target
8550 destroy_one_line_block();
8551 store_token_to_go();
8552 }
8553
8554 # handle all other token types
8555 else {
8556
8557 # if this is a blank...
8558 if ( $type eq 'b' ) {
8559
8560 # make it just one character
8561 $token = ' ' if $rOpts_add_whitespace;
8562
8563 # delete it if unwanted by whitespace rules
8564 # or we are deleting all whitespace
8565 my $ws = $$rwhite_space_flag[ $j + 1 ];
8566 if ( ( defined($ws) && $ws == -1 )
8567 || $rOpts_delete_old_whitespace )
8568 {
8569
8570 # unless it might make a syntax error
8571 next
8572 unless is_essential_whitespace(
8573 $last_last_nonblank_token,
8574 $last_last_nonblank_type,
8575 $tokens_to_go[$max_index_to_go],
8576 $types_to_go[$max_index_to_go],
8577 $$rtokens[ $j + 1 ],
8578 $$rtoken_type[ $j + 1 ]
8579 );
8580 }
8581 }
8582 store_token_to_go();
8583 }
8584
8585 # remember two previous nonblank OUTPUT tokens
8586 if ( $type ne '#' && $type ne 'b' ) {
8587 $last_last_nonblank_token = $last_nonblank_token;
8588 $last_last_nonblank_type = $last_nonblank_type;
8589 $last_nonblank_token = $token;
8590 $last_nonblank_type = $type;
8591 $last_nonblank_block_type = $block_type;
8592 }
8593
8594 # unset the continued-quote flag since it only applies to the
8595 # first token, and we want to resume normal formatting if
8596 # there are additional tokens on the line
8597 $in_continued_quote = 0;
8598
8599 } # end of loop over all tokens in this 'line_of_tokens'
8600
8601 # we have to flush ..
8602 if (
8603
8604 # if there is a side comment
8605 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
8606
8607 # if this line which ends in a quote
8608 || $in_quote
8609
8610 # if this is a VERSION statement
8611 || $is_VERSION_statement
8612
8613 # to keep a label on one line if that is how it is now
8614 || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
8615
8616 # if we are instructed to keep all old line breaks
8617 || !$rOpts->{'delete-old-newlines'}
8618 )
8619 {
8620 destroy_one_line_block();
8621 output_line_to_go();
8622 }
8623
8624 # mark old line breakpoints in current output stream
8625 if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_line_breaks ) {
8626 $old_breakpoint_to_go[$max_index_to_go] = 1;
8627 }
8628 }
8629} # end print_line_of_tokens
8630
8631sub note_added_semicolon {
8632 $last_added_semicolon_at = $input_line_number;
8633 if ( $added_semicolon_count == 0 ) {
8634 $first_added_semicolon_at = $last_added_semicolon_at;
8635 }
8636 $added_semicolon_count++;
8637 write_logfile_entry("Added ';' here\n");
8638}
8639
8640sub note_deleted_semicolon {
8641 $last_deleted_semicolon_at = $input_line_number;
8642 if ( $deleted_semicolon_count == 0 ) {
8643 $first_deleted_semicolon_at = $last_deleted_semicolon_at;
8644 }
8645 $deleted_semicolon_count++;
8646 write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
8647}
8648
8649sub note_embedded_tab {
8650 $embedded_tab_count++;
8651 $last_embedded_tab_at = $input_line_number;
8652 if ( !$first_embedded_tab_at ) {
8653 $first_embedded_tab_at = $last_embedded_tab_at;
8654 }
8655
8656 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
8657 write_logfile_entry("Embedded tabs in quote or pattern\n");
8658 }
8659}
8660
8661sub starting_one_line_block {
8662
8663 # after seeing an opening curly brace, look for the closing brace
8664 # and see if the entire block will fit on a line. This routine is
8665 # not always right because it uses the old whitespace, so a check
8666 # is made later (at the closing brace) to make sure we really
8667 # have a one-line block. We have to do this preliminary check,
8668 # though, because otherwise we would always break at a semicolon
8669 # within a one-line block if the block contains multiple statements.
8670
8671 my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
8672 $rblock_type )
8673 = @_;
8674
8675 # kill any current block - we can only go 1 deep
8676 destroy_one_line_block();
8677
8678 # return value:
8679 # 1=distance from start of block to opening brace exceeds line length
8680 # 0=otherwise
8681
8682 my $i_start = 0;
8683
8684 # shouldn't happen: there must have been a prior call to
8685 # store_token_to_go to put the opening brace in the output stream
8686 if ( $max_index_to_go < 0 ) {
8687 warning("program bug: store_token_to_go called incorrectly\n");
8688 report_definite_bug();
8689 }
8690 else {
8691
8692 # cannot use one-line blocks with cuddled else else/elsif lines
8693 if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
8694 return 0;
8695 }
8696 }
8697
8698 my $block_type = $$rblock_type[$j];
8699
8700 # find the starting keyword for this block (such as 'if', 'else', ...)
8701
8702 if ( $block_type =~ /^[\{\}\;\:]$/ ) {
8703 $i_start = $max_index_to_go;
8704 }
8705
8706 elsif ( $last_last_nonblank_token_to_go eq ')' ) {
8707
8708 # For something like "if (xxx) {", the keyword "if" will be
8709 # just after the most recent break. This will be 0 unless
8710 # we have just killed a one-line block and are starting another.
8711 # (doif.t)
8712 $i_start = $index_max_forced_break + 1;
8713 if ( $types_to_go[$i_start] eq 'b' ) {
8714 $i_start++;
8715 }
8716
8717 unless ( $tokens_to_go[$i_start] eq $block_type ) {
8718 return 0;
8719 }
8720 }
8721
8722 # the previous nonblank token should start these block types
8723 elsif (
8724 ( $last_last_nonblank_token_to_go eq $block_type )
8725 || ( $block_type =~ /^sub/
8726 && $last_last_nonblank_token_to_go =~ /^sub/ )
8727 )
8728 {
8729 $i_start = $last_last_nonblank_index_to_go;
8730 }
8731
8732 # patch for SWITCH/CASE to retain one-line case/when blocks
8733 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
8734 $i_start = $index_max_forced_break + 1;
8735 if ( $types_to_go[$i_start] eq 'b' ) {
8736 $i_start++;
8737 }
8738 unless ( $tokens_to_go[$i_start] eq $block_type ) {
8739 return 0;
8740 }
8741 }
8742
8743 else {
8744 return 1;
8745 }
8746
8747 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
8748
8749 my $i;
8750
8751 # see if length is too long to even start
8752 if ( $pos > $rOpts_maximum_line_length ) {
8753 return 1;
8754 }
8755
8756 for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
8757
8758 # old whitespace could be arbitrarily large, so don't use it
8759 if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
8760 else { $pos += length( $$rtokens[$i] ) }
8761
8762 # Return false result if we exceed the maximum line length,
8763 if ( $pos > $rOpts_maximum_line_length ) {
8764 return 0;
8765 }
8766
8767 # or encounter another opening brace before finding the closing brace.
8768 elsif ($$rtokens[$i] eq '{'
8769 && $$rtoken_type[$i] eq '{'
8770 && $$rblock_type[$i] )
8771 {
8772 return 0;
8773 }
8774
8775 # if we find our closing brace..
8776 elsif ($$rtokens[$i] eq '}'
8777 && $$rtoken_type[$i] eq '}'
8778 && $$rblock_type[$i] )
8779 {
8780
8781 # be sure any trailing comment also fits on the line
8782 my $i_nonblank =
8783 ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
8784
8785 if ( $$rtoken_type[$i_nonblank] eq '#' ) {
8786 $pos += length( $$rtokens[$i_nonblank] );
8787
8788 if ( $i_nonblank > $i + 1 ) {
8789 $pos += length( $$rtokens[ $i + 1 ] );
8790 }
8791
8792 if ( $pos > $rOpts_maximum_line_length ) {
8793 return 0;
8794 }
8795 }
8796
8797 # ok, it's a one-line block
8798 create_one_line_block( $i_start, 20 );
8799 return 0;
8800 }
8801
8802 # just keep going for other characters
8803 else {
8804 }
8805 }
8806
8807 # Allow certain types of new one-line blocks to form by joining
8808 # input lines. These can be safely done, but for other block types,
8809 # we keep old one-line blocks but do not form new ones. It is not
8810 # always a good idea to make as many one-line blocks as possible,
8811 # so other types are not done. The user can always use -mangle.
8812 if ( $is_sort_map_grep_eval{$block_type} ) {
8813 create_one_line_block( $i_start, 1 );
8814 }
8815
8816 return 0;
8817}
8818
8819sub unstore_token_to_go {
8820
8821 # remove most recent token from output stream
8822 if ( $max_index_to_go > 0 ) {
8823 $max_index_to_go--;
8824 }
8825 else {
8826 $max_index_to_go = UNDEFINED_INDEX;
8827 }
8828
8829}
8830
8831sub want_blank_line {
8832 flush();
8833 $file_writer_object->want_blank_line();
8834}
8835
8836sub write_unindented_line {
8837 flush();
8838 $file_writer_object->write_line( $_[0] );
8839}
8840
8841sub undo_lp_ci {
8842
8843 # If there is a single, long parameter within parens, like this:
8844 #
8845 # $self->command( "/msg "
8846 # . $infoline->chan
8847 # . " You said $1, but did you know that it's square was "
8848 # . $1 * $1 . " ?" );
8849 #
8850 # we can remove the continuation indentation of the 2nd and higher lines
8851 # to achieve this effect, which is more pleasing:
8852 #
8853 # $self->command("/msg "
8854 # . $infoline->chan
8855 # . " You said $1, but did you know that it's square was "
8856 # . $1 * $1 . " ?");
8857
8858 my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
8859 my $max_line = @$ri_first - 1;
8860
8861 # must be multiple lines
8862 return unless $max_line > $line_open;
8863
8864 my $lev_start = $levels_to_go[$i_start];
8865 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
8866
8867 # see if all additional lines in this container have continuation
8868 # indentation
8869 my $n;
8870 my $line_1 = 1 + $line_open;
8871 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
8872 my $ibeg = $$ri_first[$n];
8873 my $iend = $$ri_last[$n];
8874 if ( $ibeg eq $closing_index ) { $n--; last }
8875 return if ( $lev_start != $levels_to_go[$ibeg] );
8876 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
8877 last if ( $closing_index <= $iend );
8878 }
8879
8880 # we can reduce the indentation of all continuation lines
8881 my $continuation_line_count = $n - $line_open;
8882 @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
8883 (0) x ($continuation_line_count);
8884 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
8885 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
8886}
8887
8888{
8889
8890 # Identify certain operators which often occur in chains.
8891 # We will try to improve alignment when these lead a line.
8892 my %is_chain_operator;
8893
8894 BEGIN {
8895 @_ = qw(&& || and or : ? .);
8896 @is_chain_operator{@_} = (1) x scalar(@_);
8897 }
8898
8899 sub set_logical_padding {
8900
8901 # Look at a batch of lines and see if extra padding can improve the
8902 # alignment when there are certain leading operators. Here is an
8903 # example, in which some extra space is introduced before
8904 # '( $year' to make it line up with the subsequent lines:
8905 #
8906 # if ( ( $Year < 1601 )
8907 # || ( $Year > 2899 )
8908 # || ( $EndYear < 1601 )
8909 # || ( $EndYear > 2899 ) )
8910 # {
8911 # &Error_OutOfRange;
8912 # }
8913 #
8914 my ( $ri_first, $ri_last ) = @_;
8915 my $max_line = @$ri_first - 1;
8916
8917 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line,
8918 $pad_spaces, $tok_next, $has_leading_op_next, $has_leading_op );
8919
8920 # looking at each line of this batch..
8921 foreach $line ( 0 .. $max_line - 1 ) {
8922
8923 # see if the next line begins with a logical operator
8924 $ibeg = $$ri_first[$line];
8925 $iend = $$ri_last[$line];
8926 $ibeg_next = $$ri_first[ $line + 1 ];
8927 $tok_next = $tokens_to_go[$ibeg_next];
8928 $has_leading_op_next = $is_chain_operator{$tok_next};
8929 next unless ($has_leading_op_next);
8930
8931 # next line must not be at lesser depth
8932 next
8933 if ( $nesting_depth_to_go[$ibeg] >
8934 $nesting_depth_to_go[$ibeg_next] );
8935
8936 # identify the token in this line to be padded on the left
8937 $ipad = undef;
8938
8939 # handle lines at same depth...
8940 if ( $nesting_depth_to_go[$ibeg] ==
8941 $nesting_depth_to_go[$ibeg_next] )
8942 {
8943
8944 # if this is not first line of the batch ...
8945 if ( $line > 0 ) {
8946
8947 # and we have leading operator
8948 next if $has_leading_op;
8949
8950 # and ..
8951 # 1. the previous line is at lesser depth, or
8952 # 2. the previous line ends in an assignment
8953 #
8954 # Example 1: previous line at lesser depth
8955 # if ( ( $Year < 1601 ) # <- we are here but
8956 # || ( $Year > 2899 ) # list has not yet
8957 # || ( $EndYear < 1601 ) # collapsed vertically
8958 # || ( $EndYear > 2899 ) )
8959 # {
8960 #
8961 # Example 2: previous line ending in assignment:
8962 # $leapyear =
8963 # $year % 4 ? 0 # <- We are here
8964 # : $year % 100 ? 1
8965 # : $year % 400 ? 0
8966 # : 1;
8967 next
8968 unless (
8969 $is_assignment{ $types_to_go[$iendm] }
8970 || ( $nesting_depth_to_go[$ibegm] <
8971 $nesting_depth_to_go[$ibeg] )
8972 );
8973
8974 # we will add padding before the first token
8975 $ipad = $ibeg;
8976 }
8977
8978 # for first line of the batch..
8979 else {
8980
8981 # WARNING: Never indent if first line is starting in a
8982 # continued quote, which would change the quote.
8983 next if $starting_in_quote;
8984
8985 # if this is text after closing '}'
8986 # then look for an interior token to pad
8987 if ( $types_to_go[$ibeg] eq '}' ) {
8988
8989 }
8990
8991 # otherwise, we might pad if it looks really good
8992 else {
8993
8994 # we might pad token $ibeg, so be sure that it
8995 # is at the same depth as the next line.
8996 next
8997 if ( $nesting_depth_to_go[ $ibeg + 1 ] !=
8998 $nesting_depth_to_go[$ibeg_next] );
8999
9000 # We can pad on line 1 of a statement if at least 3
9001 # lines will be aligned. Otherwise, it
9002 # can look very confusing.
9003 if ( $max_line > 2 ) {
9004 my $leading_token = $tokens_to_go[$ibeg_next];
9005
9006 # never indent line 1 of a '.' series because
9007 # previous line is most likely at same level.
9008 # TODO: we should also look at the leasing_spaces
9009 # of the last output line and skip if it is same
9010 # as this line.
9011 next if ( $leading_token eq '.' );
9012
9013 my $count = 1;
9014 foreach my $l ( 2 .. 3 ) {
9015 my $ibeg_next_next = $$ri_first[ $line + $l ];
9016 next
9017 unless $tokens_to_go[$ibeg_next_next] eq
9018 $leading_token;
9019 $count++;
9020 }
9021 next unless $count == 3;
9022 $ipad = $ibeg;
9023 }
9024 else {
9025 next;
9026 }
9027 }
9028 }
9029 }
9030
9031 # find interior token to pad if necessary
9032 if ( !defined($ipad) ) {
9033
9034 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
9035
9036 # find any unclosed container
9037 next
9038 unless ( $type_sequence_to_go[$i]
9039 && $mate_index_to_go[$i] > $iend );
9040
9041 # find next nonblank token to pad
9042 $ipad = $i + 1;
9043 if ( $types_to_go[$ipad] eq 'b' ) {
9044 $ipad++;
9045 last if ( $ipad > $iend );
9046 }
9047 }
9048 last unless $ipad;
9049 }
9050
9051 # next line must not be at greater depth
9052 my $iend_next = $$ri_last[ $line + 1 ];
9053 next
9054 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
9055 $nesting_depth_to_go[$ipad] );
9056
9057 # lines must be somewhat similar to be padded..
9058 my $inext_next = $ibeg_next + 1;
9059 if ( $types_to_go[$inext_next] eq 'b' ) {
9060 $inext_next++;
9061 }
9062 my $type = $types_to_go[$ipad];
9063
9064 # see if there are multiple continuation lines
9065 my $logical_continuation_lines = 1;
9066 if ( $line + 2 <= $max_line ) {
9067 my $leading_token = $tokens_to_go[$ibeg_next];
9068 my $ibeg_next_next = $$ri_first[ $line + 2 ];
9069 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
9070 && $nesting_depth_to_go[$ibeg_next] eq
9071 $nesting_depth_to_go[$ibeg_next_next] )
9072 {
9073 $logical_continuation_lines++;
9074 }
9075 }
9076 if (
9077
9078 # either we have multiple continuation lines to follow
9079 # and we are not padding the first token
9080 ( $logical_continuation_lines > 1 && $ipad > 0 )
9081
9082 # or..
9083 || (
9084
9085 # types must match
9086 $types_to_go[$inext_next] eq $type
9087
9088 # and keywords must match if keyword
9089 && !(
9090 $type eq 'k'
9091 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
9092 )
9093 )
9094 )
9095 {
9096
9097 #----------------------begin special check---------------
9098 #
9099 # One more check is needed before we can make the pad.
9100 # If we are in a list with some long items, we want each
9101 # item to stand out. So in the following example, the
9102 # first line begining with '$casefold->' would look good
9103 # padded to align with the next line, but then it
9104 # would be indented more than the last line, so we
9105 # won't do it.
9106 #
9107 # ok(
9108 # $casefold->{code} eq '0041'
9109 # && $casefold->{status} eq 'C'
9110 # && $casefold->{mapping} eq '0061',
9111 # 'casefold 0x41'
9112 # );
9113 #
9114 # Note:
9115 # It would be faster, and almost as good, to use a comma
9116 # count, and not pad if comma_count > 1 and the previous
9117 # line did not end with a comma.
9118 #
9119 my $ok_to_pad = 1;
9120
9121 my $ibg = $$ri_first[ $line + 1 ];
9122 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
9123
9124 # just use simplified formula for leading spaces to avoid
9125 # needless sub calls
9126 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
9127
9128 # look at each line beyond the next ..
9129 my $l = $line + 1;
9130 foreach $l ( $line + 2 .. $max_line ) {
9131 my $ibg = $$ri_first[$l];
9132
9133 # quit looking at the end of this container
9134 last
9135 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
9136 || ( $nesting_depth_to_go[$ibg] < $depth );
9137
9138 # cannot do the pad if a later line would be
9139 # outdented more
9140 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
9141 $ok_to_pad = 0;
9142 last;
9143 }
9144 }
9145
9146 # don't pad if we end in a broken list
9147 if ( $l == $max_line ) {
9148 my $i2 = $$ri_last[$l];
9149 if ( $types_to_go[$i2] eq '#' ) {
9150 my $i1 = $$ri_first[$l];
9151 next
9152 if (
9153 terminal_type( \@types_to_go, \@block_type_to_go,
9154 $i1, $i2 ) eq ','
9155 );
9156 }
9157 }
9158 next unless $ok_to_pad;
9159
9160 #----------------------end special check---------------
9161
9162 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
9163 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
9164 $pad_spaces = $length_2 - $length_1;
9165
9166 # make sure this won't change if -lp is used
9167 my $indentation_1 = $leading_spaces_to_go[$ibeg];
9168 if ( ref($indentation_1) ) {
9169 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
9170 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
9171 unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 )
9172 {
9173 $pad_spaces = 0;
9174 }
9175 }
9176 }
9177
9178 # we might be able to handle a pad of -1 by removing a blank
9179 # token
9180 if ( $pad_spaces < 0 ) {
9181 if ( $pad_spaces == -1 ) {
9182 if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
9183 {
9184 $tokens_to_go[ $ipad - 1 ] = '';
9185 }
9186 }
9187 $pad_spaces = 0;
9188 }
9189
9190 # now apply any padding for alignment
9191 if ( $ipad >= 0 && $pad_spaces ) {
9192 my $length_t = total_line_length( $ibeg, $iend );
9193 if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length )
9194 {
9195 $tokens_to_go[$ipad] =
9196 ' ' x $pad_spaces . $tokens_to_go[$ipad];
9197 }
9198 }
9199 }
9200 }
9201 continue {
9202 $iendm = $iend;
9203 $ibegm = $ibeg;
9204 $has_leading_op = $has_leading_op_next;
9205 } # end of loop over lines
9206 return;
9207 }
9208}
9209
9210sub correct_lp_indentation {
9211
9212 # When the -lp option is used, we need to make a last pass through
9213 # each line to correct the indentation positions in case they differ
9214 # from the predictions. This is necessary because perltidy uses a
9215 # predictor/corrector method for aligning with opening parens. The
9216 # predictor is usually good, but sometimes stumbles. The corrector
9217 # tries to patch things up once the actual opening paren locations
9218 # are known.
9219 my ( $ri_first, $ri_last ) = @_;
9220 my $do_not_pad = 0;
9221
9222 # Note on flag '$do_not_pad':
9223 # We want to avoid a situation like this, where the aligner inserts
9224 # whitespace before the '=' to align it with a previous '=', because
9225 # otherwise the parens might become mis-aligned in a situation like
9226 # this, where the '=' has become aligned with the previous line,
9227 # pushing the opening '(' forward beyond where we want it.
9228 #
9229 # $mkFloor::currentRoom = '';
9230 # $mkFloor::c_entry = $c->Entry(
9231 # -width => '10',
9232 # -relief => 'sunken',
9233 # ...
9234 # );
9235 #
9236 # We leave it to the aligner to decide how to do this.
9237
9238 # first remove continuation indentation if appropriate
9239 my $max_line = @$ri_first - 1;
9240
9241 # looking at each line of this batch..
9242 my ( $ibeg, $iend );
9243 my $line;
9244 foreach $line ( 0 .. $max_line ) {
9245 $ibeg = $$ri_first[$line];
9246 $iend = $$ri_last[$line];
9247
9248 # looking at each token in this output line..
9249 my $i;
9250 foreach $i ( $ibeg .. $iend ) {
9251
9252 # How many space characters to place before this token
9253 # for special alignment. Actual padding is done in the
9254 # continue block.
9255
9256 # looking for next unvisited indentation item
9257 my $indentation = $leading_spaces_to_go[$i];
9258 if ( !$indentation->get_MARKED() ) {
9259 $indentation->set_MARKED(1);
9260
9261 # looking for indentation item for which we are aligning
9262 # with parens, braces, and brackets
9263 next unless ( $indentation->get_ALIGN_PAREN() );
9264
9265 # skip closed container on this line
9266 if ( $i > $ibeg ) {
9267 my $im = $i - 1;
9268 if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
9269 if ( $type_sequence_to_go[$im]
9270 && $mate_index_to_go[$im] <= $iend )
9271 {
9272 next;
9273 }
9274 }
9275
9276 if ( $line == 1 && $i == $ibeg ) {
9277 $do_not_pad = 1;
9278 }
9279
9280 # Ok, let's see what the error is and try to fix it
9281 my $actual_pos;
9282 my $predicted_pos = $indentation->get_SPACES();
9283 if ( $i > $ibeg ) {
9284
9285 # token is mid-line - use length to previous token
9286 $actual_pos = total_line_length( $ibeg, $i - 1 );
9287
9288 # for mid-line token, we must check to see if all
9289 # additional lines have continuation indentation,
9290 # and remove it if so. Otherwise, we do not get
9291 # good alignment.
9292 my $closing_index = $indentation->get_CLOSED();
9293 if ( $closing_index > $iend ) {
9294 my $ibeg_next = $$ri_first[ $line + 1 ];
9295 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
9296 undo_lp_ci( $line, $i, $closing_index, $ri_first,
9297 $ri_last );
9298 }
9299 }
9300 }
9301 elsif ( $line > 0 ) {
9302
9303 # handle case where token starts a new line;
9304 # use length of previous line
9305 my $ibegm = $$ri_first[ $line - 1 ];
9306 my $iendm = $$ri_last[ $line - 1 ];
9307 $actual_pos = total_line_length( $ibegm, $iendm );
9308
9309 # follow -pt style
9310 ++$actual_pos
9311 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
9312 }
9313 else {
9314
9315 # token is first character of first line of batch
9316 $actual_pos = $predicted_pos;
9317 }
9318
9319 my $move_right = $actual_pos - $predicted_pos;
9320
9321 # done if no error to correct (gnu2.t)
9322 if ( $move_right == 0 ) {
9323 $indentation->set_RECOVERABLE_SPACES($move_right);
9324 next;
9325 }
9326
9327 # if we have not seen closure for this indentation in
9328 # this batch, we can only pass on a request to the
9329 # vertical aligner
9330 my $closing_index = $indentation->get_CLOSED();
9331
9332 if ( $closing_index < 0 ) {
9333 $indentation->set_RECOVERABLE_SPACES($move_right);
9334 next;
9335 }
9336
9337 # If necessary, look ahead to see if there is really any
9338 # leading whitespace dependent on this whitespace, and
9339 # also find the longest line using this whitespace.
9340 # Since it is always safe to move left if there are no
9341 # dependents, we only need to do this if we may have
9342 # dependent nodes or need to move right.
9343
9344 my $right_margin = 0;
9345 my $have_child = $indentation->get_HAVE_CHILD();
9346
9347 my %saw_indentation;
9348 my $line_count = 1;
9349 $saw_indentation{$indentation} = $indentation;
9350
9351 if ( $have_child || $move_right > 0 ) {
9352 $have_child = 0;
9353 my $max_length = 0;
9354 if ( $i == $ibeg ) {
9355 $max_length = total_line_length( $ibeg, $iend );
9356 }
9357
9358 # look ahead at the rest of the lines of this batch..
9359 my $line_t;
9360 foreach $line_t ( $line + 1 .. $max_line ) {
9361 my $ibeg_t = $$ri_first[$line_t];
9362 my $iend_t = $$ri_last[$line_t];
9363 last if ( $closing_index <= $ibeg_t );
9364
9365 # remember all different indentation objects
9366 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
9367 $saw_indentation{$indentation_t} = $indentation_t;
9368 $line_count++;
9369
9370 # remember longest line in the group
9371 my $length_t = total_line_length( $ibeg_t, $iend_t );
9372 if ( $length_t > $max_length ) {
9373 $max_length = $length_t;
9374 }
9375 }
9376 $right_margin = $rOpts_maximum_line_length - $max_length;
9377 if ( $right_margin < 0 ) { $right_margin = 0 }
9378 }
9379
9380 my $first_line_comma_count =
9381 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
9382 my $comma_count = $indentation->get_COMMA_COUNT();
9383 my $arrow_count = $indentation->get_ARROW_COUNT();
9384
9385 # This is a simple approximate test for vertical alignment:
9386 # if we broke just after an opening paren, brace, bracket,
9387 # and there are 2 or more commas in the first line,
9388 # and there are no '=>'s,
9389 # then we are probably vertically aligned. We could set
9390 # an exact flag in sub scan_list, but this is good
9391 # enough.
9392 my $indentation_count = keys %saw_indentation;
9393 my $is_vertically_aligned =
9394 ( $i == $ibeg
9395 && $first_line_comma_count > 1
9396 && $indentation_count == 1
9397 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
9398
9399 # Make the move if possible ..
9400 if (
9401
9402 # we can always move left
9403 $move_right < 0
9404
9405 # but we should only move right if we are sure it will
9406 # not spoil vertical alignment
9407 || ( $comma_count == 0 )
9408 || ( $comma_count > 0 && !$is_vertically_aligned )
9409 )
9410 {
9411 my $move =
9412 ( $move_right <= $right_margin )
9413 ? $move_right
9414 : $right_margin;
9415
9416 foreach ( keys %saw_indentation ) {
9417 $saw_indentation{$_}
9418 ->permanently_decrease_AVAILABLE_SPACES( -$move );
9419 }
9420 }
9421
9422 # Otherwise, record what we want and the vertical aligner
9423 # will try to recover it.
9424 else {
9425 $indentation->set_RECOVERABLE_SPACES($move_right);
9426 }
9427 }
9428 }
9429 }
9430 return $do_not_pad;
9431}
9432
9433# flush is called to output any tokens in the pipeline, so that
9434# an alternate source of lines can be written in the correct order
9435
9436sub flush {
9437 destroy_one_line_block();
9438 output_line_to_go();
9439 Perl::Tidy::VerticalAligner::flush();
9440}
9441
9442# output_line_to_go sends one logical line of tokens on down the
9443# pipeline to the VerticalAligner package, breaking the line into continuation
9444# lines as necessary. The line of tokens is ready to go in the "to_go"
9445# arrays.
9446
9447sub output_line_to_go {
9448
9449 # debug stuff; this routine can be called from many points
9450 FORMATTER_DEBUG_FLAG_OUTPUT && do {
9451 my ( $a, $b, $c ) = caller;
9452 write_diagnostics(
9453"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
9454 );
9455 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
9456 write_diagnostics("$output_str\n");
9457 };
9458
9459 # just set a tentative breakpoint if we might be in a one-line block
9460 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9461 set_forced_breakpoint($max_index_to_go);
9462 return;
9463 }
9464
9465 my $cscw_block_comment;
9466 $cscw_block_comment = add_closing_side_comment()
9467 if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
9468
9469 match_opening_and_closing_tokens();
9470
9471 # tell the -lp option we are outputting a batch so it can close
9472 # any unfinished items in its stack
9473 finish_lp_batch();
9474
9475 my $imin = 0;
9476 my $imax = $max_index_to_go;
9477
9478 # trim any blank tokens
9479 if ( $max_index_to_go >= 0 ) {
9480 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
9481 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
9482 }
9483
9484 # anything left to write?
9485 if ( $imin <= $imax ) {
9486
9487 # add a blank line before certain key types
9488 if ( $last_line_leading_type !~ /^[#b]/ ) {
9489 my $want_blank = 0;
9490 my $leading_token = $tokens_to_go[$imin];
9491 my $leading_type = $types_to_go[$imin];
9492
9493 # blank lines before subs except declarations and one-liners
9494 # MCONVERSION LOCATION - for sub tokenization change
9495 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
9496 $want_blank = ( $rOpts->{'blanks-before-subs'} )
9497 && (
9498 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9499 $imax ) !~ /^[\;\}]$/
9500 );
9501 }
9502
9503 # break before all package declarations
9504 # MCONVERSION LOCATION - for tokenizaton change
9505 elsif ( $leading_token =~ /^(package\s)/ && $leading_type eq 'i' ) {
9506 $want_blank = ( $rOpts->{'blanks-before-subs'} );
9507 }
9508
9509 # break before certain key blocks except one-liners
9510 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
9511 $want_blank = ( $rOpts->{'blanks-before-subs'} )
9512 && (
9513 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9514 $imax ) ne '}'
9515 );
9516 }
9517
9518 # Break before certain block types if we haven't had a break at this
9519 # level for a while. This is the difficult decision..
9520 elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
9521 && $leading_type eq 'k' )
9522 {
9523 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
9524 if ( !defined($lc) ) { $lc = 0 }
9525
9526 $want_blank = $rOpts->{'blanks-before-blocks'}
9527 && $lc >= $rOpts->{'long-block-line-count'}
9528 && $file_writer_object->get_consecutive_nonblank_lines() >=
9529 $rOpts->{'long-block-line-count'}
9530 && (
9531 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9532 $imax ) ne '}'
9533 );
9534 }
9535
9536 if ($want_blank) {
9537
9538 # future: send blank line down normal path to VerticalAligner
9539 Perl::Tidy::VerticalAligner::flush();
9540 $file_writer_object->write_blank_code_line();
9541 }
9542 }
9543
9544 # update blank line variables and count number of consecutive
9545 # non-blank, non-comment lines at this level
9546 $last_last_line_leading_level = $last_line_leading_level;
9547 $last_line_leading_level = $levels_to_go[$imin];
9548 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
9549 $last_line_leading_type = $types_to_go[$imin];
9550 if ( $last_line_leading_level == $last_last_line_leading_level
9551 && $last_line_leading_type ne 'b'
9552 && $last_line_leading_type ne '#'
9553 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
9554 {
9555 $nonblank_lines_at_depth[$last_line_leading_level]++;
9556 }
9557 else {
9558 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
9559 }
9560
9561 FORMATTER_DEBUG_FLAG_FLUSH && do {
9562 my ( $package, $file, $line ) = caller;
9563 print
9564"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
9565 };
9566
9567 # add a couple of extra terminal blank tokens
9568 pad_array_to_go();
9569
9570 # set all forced breakpoints for good list formatting
9571 my $saw_good_break = 0;
9572 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
9573
9574 if (
9575 $max_index_to_go > 0
9576 && (
9577 $is_long_line
9578 || $old_line_count_in_batch > 1
9579 || is_unbalanced_batch()
9580 || (
9581 $comma_count_in_batch
9582 && ( $rOpts_maximum_fields_per_table > 0
9583 || $rOpts_comma_arrow_breakpoints == 0 )
9584 )
9585 )
9586 )
9587 {
9588 $saw_good_break = scan_list();
9589 }
9590
9591 # let $ri_first and $ri_last be references to lists of
9592 # first and last tokens of line fragments to output..
9593 my ( $ri_first, $ri_last );
9594
9595 # write a single line if..
9596 if (
9597
9598 # we aren't allowed to add any newlines
9599 !$rOpts_add_newlines
9600
9601 # or, we don't already have an interior breakpoint
9602 # and we didn't see a good breakpoint
9603 || (
9604 !$forced_breakpoint_count
9605 && !$saw_good_break
9606
9607 # and this line is 'short'
9608 && !$is_long_line
9609 )
9610 )
9611 {
9612 @$ri_first = ($imin);
9613 @$ri_last = ($imax);
9614 }
9615
9616 # otherwise use multiple lines
9617 else {
9618
9619 ( $ri_first, $ri_last ) = set_continuation_breaks($saw_good_break);
9620
9621 # now we do a correction step to clean this up a bit
9622 # (The only time we would not do this is for debugging)
9623 if ( $rOpts->{'recombine'} ) {
9624 ( $ri_first, $ri_last ) =
9625 recombine_breakpoints( $ri_first, $ri_last );
9626 }
9627 }
9628
9629 # do corrector step if -lp option is used
9630 my $do_not_pad = 0;
9631 if ($rOpts_line_up_parentheses) {
9632 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
9633 }
9634 send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
9635 }
9636 prepare_for_new_input_lines();
9637
9638 # output any new -cscw block comment
9639 if ($cscw_block_comment) {
9640 flush();
9641 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
9642 }
9643}
9644
9645sub reset_block_text_accumulator {
9646
9647 # save text after 'if' and 'elsif' to append after 'else'
9648 if ($accumulating_text_for_block) {
9649
9650 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
9651 push @{$rleading_block_if_elsif_text}, $leading_block_text;
9652 }
9653 }
9654 $accumulating_text_for_block = "";
9655 $leading_block_text = "";
9656 $leading_block_text_level = 0;
9657 $leading_block_text_length_exceeded = 0;
9658 $leading_block_text_line_number = 0;
9659 $leading_block_text_line_length = 0;
9660}
9661
9662sub set_block_text_accumulator {
9663 my $i = shift;
9664 $accumulating_text_for_block = $tokens_to_go[$i];
9665 if ( $accumulating_text_for_block !~ /^els/ ) {
9666 $rleading_block_if_elsif_text = [];
9667 }
9668 $leading_block_text = "";
9669 $leading_block_text_level = $levels_to_go[$i];
9670 $leading_block_text_line_number =
9671 $vertical_aligner_object->get_output_line_number();
9672 $leading_block_text_length_exceeded = 0;
9673
9674 # this will contain the column number of the last character
9675 # of the closing side comment
9676 $leading_block_text_line_length =
9677 length($accumulating_text_for_block) +
9678 length( $rOpts->{'closing-side-comment-prefix'} ) +
9679 $leading_block_text_level * $rOpts_indent_columns + 3;
9680}
9681
9682sub accumulate_block_text {
9683 my $i = shift;
9684
9685 # accumulate leading text for -csc, ignoring any side comments
9686 if ( $accumulating_text_for_block
9687 && !$leading_block_text_length_exceeded
9688 && $types_to_go[$i] ne '#' )
9689 {
9690
9691 my $added_length = length( $tokens_to_go[$i] );
9692 $added_length += 1 if $i == 0;
9693 my $new_line_length = $leading_block_text_line_length + $added_length;
9694
9695 # we can add this text if we don't exceed some limits..
9696 if (
9697
9698 # we must not have already exceeded the text length limit
9699 length($leading_block_text) <
9700 $rOpts_closing_side_comment_maximum_text
9701
9702 # and either:
9703 # the new total line length must be below the line length limit
9704 # or the new length must be below the text length limit
9705 # (ie, we may allow one token to exceed the text length limit)
9706 && ( $new_line_length < $rOpts_maximum_line_length
9707 || length($leading_block_text) + $added_length <
9708 $rOpts_closing_side_comment_maximum_text )
9709
9710 # UNLESS: we are adding a closing paren before the brace we seek.
9711 # This is an attempt to avoid situations where the ... to be
9712 # added are longer than the omitted right paren, as in:
9713
9714 # foreach my $item (@a_rather_long_variable_name_here) {
9715 # &whatever;
9716 # } ## end foreach my $item (@a_rather_long_variable_name_here...
9717
9718 || (
9719 $tokens_to_go[$i] eq ')'
9720 && (
9721 (
9722 $i + 1 <= $max_index_to_go
9723 && $block_type_to_go[ $i + 1 ] eq
9724 $accumulating_text_for_block
9725 )
9726 || ( $i + 2 <= $max_index_to_go
9727 && $block_type_to_go[ $i + 2 ] eq
9728 $accumulating_text_for_block )
9729 )
9730 )
9731 )
9732 {
9733
9734 # add an extra space at each newline
9735 if ( $i == 0 ) { $leading_block_text .= ' ' }
9736
9737 # add the token text
9738 $leading_block_text .= $tokens_to_go[$i];
9739 $leading_block_text_line_length = $new_line_length;
9740 }
9741
9742 # show that text was truncated if necessary
9743 elsif ( $types_to_go[$i] ne 'b' ) {
9744 $leading_block_text_length_exceeded = 1;
9745 $leading_block_text .= '...';
9746 }
9747 }
9748}
9749
9750{
9751 my %is_if_elsif_else_unless_while_until_for_foreach;
9752
9753 BEGIN {
9754
9755 # These block types may have text between the keyword and opening
9756 # curly. Note: 'else' does not, but must be included to allow trailing
9757 # if/elsif text to be appended.
9758 # patch for SWITCH/CASE: added 'case' and 'when'
9759 @_ = qw(if elsif else unless while until for foreach case when);
9760 @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
9761 }
9762
9763 sub accumulate_csc_text {
9764
9765 # called once per output buffer when -csc is used. Accumulates
9766 # the text placed after certain closing block braces.
9767 # Defines and returns the following for this buffer:
9768
9769 my $block_leading_text = ""; # the leading text of the last '}'
9770 my $rblock_leading_if_elsif_text;
9771 my $i_block_leading_text =
9772 -1; # index of token owning block_leading_text
9773 my $block_line_count = 100; # how many lines the block spans
9774 my $terminal_type = 'b'; # type of last nonblank token
9775 my $i_terminal = 0; # index of last nonblank token
9776 my $terminal_block_type = "";
9777
9778 for my $i ( 0 .. $max_index_to_go ) {
9779 my $type = $types_to_go[$i];
9780 my $block_type = $block_type_to_go[$i];
9781 my $token = $tokens_to_go[$i];
9782
9783 # remember last nonblank token type
9784 if ( $type ne '#' && $type ne 'b' ) {
9785 $terminal_type = $type;
9786 $terminal_block_type = $block_type;
9787 $i_terminal = $i;
9788 }
9789
9790 my $type_sequence = $type_sequence_to_go[$i];
9791 if ( $block_type && $type_sequence ) {
9792
9793 if ( $token eq '}' ) {
9794
9795 # restore any leading text saved when we entered this block
9796 if ( defined( $block_leading_text{$type_sequence} ) ) {
9797 ( $block_leading_text, $rblock_leading_if_elsif_text ) =
9798 @{ $block_leading_text{$type_sequence} };
9799 $i_block_leading_text = $i;
9800 delete $block_leading_text{$type_sequence};
9801 $rleading_block_if_elsif_text =
9802 $rblock_leading_if_elsif_text;
9803 }
9804
9805 # if we run into a '}' then we probably started accumulating
9806 # at something like a trailing 'if' clause..no harm done.
9807 if ( $accumulating_text_for_block
9808 && $levels_to_go[$i] <= $leading_block_text_level )
9809 {
9810 my $lev = $levels_to_go[$i];
9811 reset_block_text_accumulator();
9812 }
9813
9814 if ( defined( $block_opening_line_number{$type_sequence} ) )
9815 {
9816 my $output_line_number =
9817 $vertical_aligner_object->get_output_line_number();
9818 $block_line_count = $output_line_number -
9819 $block_opening_line_number{$type_sequence} + 1;
9820 delete $block_opening_line_number{$type_sequence};
9821 }
9822 else {
9823
9824 # Error: block opening line undefined for this line..
9825 # This shouldn't be possible, but it is not a
9826 # significant problem.
9827 }
9828 }
9829
9830 elsif ( $token eq '{' ) {
9831
9832 my $line_number =
9833 $vertical_aligner_object->get_output_line_number();
9834 $block_opening_line_number{$type_sequence} = $line_number;
9835
9836 if ( $accumulating_text_for_block
9837 && $levels_to_go[$i] == $leading_block_text_level )
9838 {
9839
9840 if ( $accumulating_text_for_block eq $block_type ) {
9841
9842 # save any leading text before we enter this block
9843 $block_leading_text{$type_sequence} = [
9844 $leading_block_text,
9845 $rleading_block_if_elsif_text
9846 ];
9847 $block_opening_line_number{$type_sequence} =
9848 $leading_block_text_line_number;
9849 reset_block_text_accumulator();
9850 }
9851 else {
9852
9853 # shouldn't happen, but not a serious error.
9854 # We were accumulating -csc text for block type
9855 # $accumulating_text_for_block and unexpectedly
9856 # encountered a '{' for block type $block_type.
9857 }
9858 }
9859 }
9860 }
9861
9862 if ( $type eq 'k'
9863 && $csc_new_statement_ok
9864 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
9865 && $token =~ /$closing_side_comment_list_pattern/o )
9866 {
9867 set_block_text_accumulator($i);
9868 }
9869 else {
9870
9871 # note: ignoring type 'q' because of tricks being played
9872 # with 'q' for hanging side comments
9873 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
9874 $csc_new_statement_ok =
9875 ( $block_type || $type eq 'J' || $type eq ';' );
9876 }
9877 if ( $type eq ';'
9878 && $accumulating_text_for_block
9879 && $levels_to_go[$i] == $leading_block_text_level )
9880 {
9881 reset_block_text_accumulator();
9882 }
9883 else {
9884 accumulate_block_text($i);
9885 }
9886 }
9887 }
9888
9889 # Treat an 'else' block specially by adding preceding 'if' and
9890 # 'elsif' text. Otherwise, the 'end else' is not helpful,
9891 # especially for cuddled-else formatting.
9892 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
9893 $block_leading_text =
9894 make_else_csc_text( $i_terminal, $terminal_block_type,
9895 $block_leading_text, $rblock_leading_if_elsif_text );
9896 }
9897
9898 return ( $terminal_type, $i_terminal, $i_block_leading_text,
9899 $block_leading_text, $block_line_count );
9900 }
9901}
9902
9903sub make_else_csc_text {
9904
9905 # create additional -csc text for an 'else' and optionally 'elsif',
9906 # depending on the value of switch
9907 # $rOpts_closing_side_comment_else_flag:
9908 #
9909 # = 0 add 'if' text to trailing else
9910 # = 1 same as 0 plus:
9911 # add 'if' to 'elsif's if can fit in line length
9912 # add last 'elsif' to trailing else if can fit in one line
9913 # = 2 same as 1 but do not check if exceed line length
9914 #
9915 # $rif_elsif_text = a reference to a list of all previous closing
9916 # side comments created for this if block
9917 #
9918 my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
9919 my $csc_text = $block_leading_text;
9920
9921 if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
9922 {
9923 return $csc_text;
9924 }
9925
9926 my $count = @{$rif_elsif_text};
9927 return $csc_text unless ($count);
9928
9929 my $if_text = '[ if' . $rif_elsif_text->[0];
9930
9931 # always show the leading 'if' text on 'else'
9932 if ( $block_type eq 'else' ) {
9933 $csc_text .= $if_text;
9934 }
9935
9936 # see if that's all
9937 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
9938 return $csc_text;
9939 }
9940
9941 my $last_elsif_text = "";
9942 if ( $count > 1 ) {
9943 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
9944 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
9945 }
9946
9947 # tentatively append one more item
9948 my $saved_text = $csc_text;
9949 if ( $block_type eq 'else' ) {
9950 $csc_text .= $last_elsif_text;
9951 }
9952 else {
9953 $csc_text .= ' ' . $if_text;
9954 }
9955
9956 # all done if no length checks requested
9957 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
9958 return $csc_text;
9959 }
9960
9961 # undo it if line length exceeded
9962 my $length =
9963 length($csc_text) + length($block_type) +
9964 length( $rOpts->{'closing-side-comment-prefix'} ) +
9965 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
9966 if ( $length > $rOpts_maximum_line_length ) {
9967 $csc_text = $saved_text;
9968 }
9969 return $csc_text;
9970}
9971
9972sub add_closing_side_comment {
9973
9974 # add closing side comments after closing block braces if -csc used
9975 my $cscw_block_comment;
9976
9977 #---------------------------------------------------------------
9978 # Step 1: loop through all tokens of this line to accumulate
9979 # the text needed to create the closing side comments. Also see
9980 # how the line ends.
9981 #---------------------------------------------------------------
9982
9983 my ( $terminal_type, $i_terminal, $i_block_leading_text,
9984 $block_leading_text, $block_line_count )
9985 = accumulate_csc_text();
9986
9987 #---------------------------------------------------------------
9988 # Step 2: make the closing side comment if this ends a block
9989 #---------------------------------------------------------------
9990 my $have_side_comment = $i_terminal != $max_index_to_go;
9991
9992 # if this line might end in a block closure..
9993 if (
9994 $terminal_type eq '}'
9995
9996 # ..and either
9997 && (
9998
9999 # the block is long enough
10000 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
10001
10002 # or there is an existing comment to check
10003 || ( $have_side_comment
10004 && $rOpts->{'closing-side-comment-warnings'} )
10005 )
10006
10007 # .. and if this is one of the types of interest
10008 && $block_type_to_go[$i_terminal] =~
10009 /$closing_side_comment_list_pattern/o
10010
10011 # ..and the corresponding opening brace must is not in this batch
10012 # (because we do not need to tag one-line blocks, although this
10013 # should also be caught with a positive -csci value)
10014 && $mate_index_to_go[$i_terminal] < 0
10015
10016 # ..and either
10017 && (
10018
10019 # this is the last token (line doesnt have a side comment)
10020 !$have_side_comment
10021
10022 # or the old side comment is a closing side comment
10023 || $tokens_to_go[$max_index_to_go] =~
10024 /$closing_side_comment_prefix_pattern/o
10025 )
10026 )
10027 {
10028
10029 # then make the closing side comment text
10030 my $token =
10031"$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";
10032
10033 # append any extra descriptive text collected above
10034 if ( $i_block_leading_text == $i_terminal ) {
10035 $token .= $block_leading_text;
10036 }
10037 $token =~ s/\s*$//; # trim any trailing whitespace
10038
10039 # handle case of existing closing side comment
10040 if ($have_side_comment) {
10041
10042 # warn if requested and tokens differ significantly
10043 if ( $rOpts->{'closing-side-comment-warnings'} ) {
10044 my $old_csc = $tokens_to_go[$max_index_to_go];
10045 my $new_csc = $token;
10046 $new_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
10047 my $new_trailing_dots = $1;
10048 $old_csc =~ s/\.\.\.\s*$//;
10049 $new_csc =~ s/\s+//g; # trim all whitespace
10050 $old_csc =~ s/\s+//g;
10051
10052 # Patch to handle multiple closing side comments at
10053 # else and elsif's. These have become too complicated
10054 # to check, so if we see an indication of
10055 # '[ if' or '[ # elsif', then assume they were made
10056 # by perltidy.
10057 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
10058 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
10059 }
10060 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
10061 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
10062 }
10063
10064 # if old comment is contained in new comment,
10065 # only compare the common part.
10066 if ( length($new_csc) > length($old_csc) ) {
10067 $new_csc = substr( $new_csc, 0, length($old_csc) );
10068 }
10069
10070 # if the new comment is shorter and has been limited,
10071 # only compare the common part.
10072 if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
10073 {
10074 $old_csc = substr( $old_csc, 0, length($new_csc) );
10075 }
10076
10077 # any remaining difference?
10078 if ( $new_csc ne $old_csc ) {
10079
10080 # just leave the old comment if we are below the threshold
10081 # for creating side comments
10082 if ( $block_line_count <
10083 $rOpts->{'closing-side-comment-interval'} )
10084 {
10085 $token = undef;
10086 }
10087
10088 # otherwise we'll make a note of it
10089 else {
10090
10091 warning(
10092"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
10093 );
10094
10095 # save the old side comment in a new trailing block comment
10096 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
10097 $year += 1900;
10098 $month += 1;
10099 $cscw_block_comment =
10100"## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
10101 }
10102 }
10103 else {
10104
10105 # No differences.. we can safely delete old comment if we
10106 # are below the threshold
10107 if ( $block_line_count <
10108 $rOpts->{'closing-side-comment-interval'} )
10109 {
10110 $token = undef;
10111 unstore_token_to_go()
10112 if ( $types_to_go[$max_index_to_go] eq '#' );
10113 unstore_token_to_go()
10114 if ( $types_to_go[$max_index_to_go] eq 'b' );
10115 }
10116 }
10117 }
10118
10119 # switch to the new csc (unless we deleted it!)
10120 $tokens_to_go[$max_index_to_go] = $token if $token;
10121 }
10122
10123 # handle case of NO existing closing side comment
10124 else {
10125
10126 # insert the new side comment into the output token stream
10127 my $type = '#';
10128 my $block_type = '';
10129 my $type_sequence = '';
10130 my $container_environment =
10131 $container_environment_to_go[$max_index_to_go];
10132 my $level = $levels_to_go[$max_index_to_go];
10133 my $slevel = $nesting_depth_to_go[$max_index_to_go];
10134 my $no_internal_newlines = 0;
10135
10136 my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
10137 my $ci_level = $ci_levels_to_go[$max_index_to_go];
10138 my $in_continued_quote = 0;
10139
10140 # first insert a blank token
10141 insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
10142
10143 # then the side comment
10144 insert_new_token_to_go( $token, $type, $slevel,
10145 $no_internal_newlines );
10146 }
10147 }
10148 return $cscw_block_comment;
10149}
10150
10151sub previous_nonblank_token {
10152 my ($i) = @_;
10153 if ( $i <= 0 ) {
10154 return "";
10155 }
10156 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
10157 return $tokens_to_go[ $i - 1 ];
10158 }
10159 elsif ( $i > 1 ) {
10160 return $tokens_to_go[ $i - 2 ];
10161 }
10162 else {
10163 return "";
10164 }
10165}
10166
10167sub send_lines_to_vertical_aligner {
10168
10169 my ( $ri_first, $ri_last, $do_not_pad ) = @_;
10170
10171 my $rindentation_list = [0]; # ref to indentations for each line
10172
10173 set_vertical_alignment_markers( $ri_first, $ri_last );
10174
10175 # flush if necessary to avoid unwanted alignment
10176 my $must_flush = 0;
10177 if ( @$ri_first > 1 ) {
10178
10179 # flush before a long if statement
10180 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
10181 $must_flush = 1;
10182 }
10183 }
10184 if ($must_flush) {
10185 Perl::Tidy::VerticalAligner::flush();
10186 }
10187
10188 set_logical_padding( $ri_first, $ri_last );
10189
10190 # loop to prepare each line for shipment
10191 my $n_last_line = @$ri_first - 1;
10192 my $in_comma_list;
10193 for my $n ( 0 .. $n_last_line ) {
10194 my $ibeg = $$ri_first[$n];
10195 my $iend = $$ri_last[$n];
10196
10197 my @patterns = ();
10198 my @tokens = ();
10199 my @fields = ();
10200 my $i_start = $ibeg;
10201 my $i;
10202
10203 my $depth = 0;
10204 my @container_name = ("");
10205 my @multiple_comma_arrows = (undef);
10206
10207 my $j = 0; # field index
10208
10209 $patterns[0] = "";
10210 for $i ( $ibeg .. $iend ) {
10211
10212 # Keep track of containers balanced on this line only.
10213 # These are used below to prevent unwanted cross-line alignments.
10214 # Unbalanced containers already avoid aligning across
10215 # container boundaries.
10216 if ( $tokens_to_go[$i] eq '(' ) {
10217 my $i_mate = $mate_index_to_go[$i];
10218 if ( $i_mate > $i && $i_mate <= $iend ) {
10219 $depth++;
10220 my $seqno = $type_sequence_to_go[$i];
10221 my $count = comma_arrow_count($seqno);
10222 $multiple_comma_arrows[$depth] = $count && $count > 1;
10223 my $name = previous_nonblank_token($i);
10224 $name =~ s/^->//;
10225 $container_name[$depth] = "+" . $name;
10226 }
10227 }
10228 elsif ( $tokens_to_go[$i] eq ')' ) {
10229 $depth-- if $depth > 0;
10230 }
10231
10232 # if we find a new synchronization token, we are done with
10233 # a field
10234 if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
10235
10236 my $tok = my $raw_tok = $matching_token_to_go[$i];
10237
10238 # make separators in different nesting depths unique
10239 # by appending the nesting depth digit.
10240 if ( $raw_tok ne '#' ) {
10241 $tok .= "$nesting_depth_to_go[$i]";
10242 }
10243
10244 # do any special decorations for commas to avoid unwanted
10245 # cross-line alignments.
10246 if ( $raw_tok eq ',' ) {
10247 if ( $container_name[$depth] ) {
10248 $tok .= $container_name[$depth];
10249 }
10250 }
10251
10252 # decorate '=>' with:
10253 # - Nothing if this container is unbalanced on this line.
10254 # - The previous token if it is balanced and multiple '=>'s
10255 # - The container name if it is bananced and no other '=>'s
10256 elsif ( $raw_tok eq '=>' ) {
10257 if ( $container_name[$depth] ) {
10258 if ( $multiple_comma_arrows[$depth] ) {
10259 $tok .= "+" . previous_nonblank_token($i);
10260 }
10261 else {
10262 $tok .= $container_name[$depth];
10263 }
10264 }
10265 }
10266
10267 # concatenate the text of the consecutive tokens to form
10268 # the field
10269 push( @fields,
10270 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
10271
10272 # store the alignment token for this field
10273 push( @tokens, $tok );
10274
10275 # get ready for the next batch
10276 $i_start = $i;
10277 $j++;
10278 $patterns[$j] = "";
10279 }
10280
10281 # continue accumulating tokens
10282 # handle non-keywords..
10283 if ( $types_to_go[$i] ne 'k' ) {
10284 my $type = $types_to_go[$i];
10285
10286 # Mark most things before arrows as a quote to
10287 # get them to line up. Testfile: mixed.pl.
10288 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
10289 my $next_type = $types_to_go[ $i + 1 ];
10290 my $i_next_nonblank =
10291 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
10292
10293 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
10294 $type = 'Q';
10295 }
10296 }
10297
10298 # minor patch to make numbers and quotes align
10299 if ( $type eq 'n' ) { $type = 'Q' }
10300
10301 $patterns[$j] .= $type;
10302 }
10303
10304 # for keywords we have to use the actual text
10305 else {
10306
10307 # map certain keywords to the same 'if' class to align
10308 # long if/elsif sequences. my testfile: elsif.pl
10309 my $tok = $tokens_to_go[$i];
10310 if ( $n == 0 && $tok =~ /^(elsif|else|unless)$/ ) {
10311 $tok = 'if';
10312 }
10313 $patterns[$j] .= $tok;
10314 }
10315 }
10316
10317 # done with this line .. join text of tokens to make the last field
10318 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
10319
10320 my ( $indentation, $lev, $level_end, $is_semicolon_terminated,
10321 $is_outdented_line )
10322 = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns,
10323 $ri_first, $ri_last, $rindentation_list );
10324
10325 # we will allow outdenting of long lines..
10326 my $outdent_long_lines = (
10327
10328 # which are long quotes, if allowed
10329 ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
10330
10331 # which are long block comments, if allowed
10332 || (
10333 $types_to_go[$ibeg] eq '#'
10334 && $rOpts->{'outdent-long-comments'}
10335
10336 # but not if this is a static block comment
10337 && !(
10338 $rOpts->{'static-block-comments'}
10339 && $tokens_to_go[$ibeg] =~ /$static_block_comment_pattern/o
10340 )
10341 )
10342 );
10343
10344 my $level_jump =
10345 $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
10346
10347 my $rvertical_tightness_flags =
10348 set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
10349 $ri_first, $ri_last );
10350
10351 # flush an outdented line to avoid any unwanted vertical alignment
10352 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
10353
10354 # send this new line down the pipe
10355 my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
10356 Perl::Tidy::VerticalAligner::append_line(
10357 $lev,
10358 $level_end,
10359 $indentation,
10360 \@fields,
10361 \@tokens,
10362 \@patterns,
10363 $forced_breakpoint_to_go[$iend] || $in_comma_list,
10364 $outdent_long_lines,
10365 $is_semicolon_terminated,
10366 $do_not_pad,
10367 $rvertical_tightness_flags,
10368 $level_jump,
10369 );
10370 $in_comma_list =
10371 $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
10372
10373 # flush an outdented line to avoid any unwanted vertical alignment
10374 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
10375
10376 $do_not_pad = 0;
10377
10378 } # end of loop to output each line
10379
10380 # remember indentation of lines containing opening containers for
10381 # later use by sub set_adjusted_indentation
10382 save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
10383}
10384
10385{ # begin unmatched_indexes
10386
10387 # closure to keep track of unbalanced containers.
10388 # arrays shared by the routines in this block:
10389 my @unmatched_opening_indexes_in_this_batch;
10390 my @unmatched_closing_indexes_in_this_batch;
10391 my %comma_arrow_count;
10392
10393 sub is_unbalanced_batch {
10394 @unmatched_opening_indexes_in_this_batch +
10395 @unmatched_closing_indexes_in_this_batch;
10396 }
10397
10398 sub comma_arrow_count {
10399 my $seqno = $_[0];
10400 return $comma_arrow_count{$seqno};
10401 }
10402
10403 sub match_opening_and_closing_tokens {
10404
10405 # Match up indexes of opening and closing braces, etc, in this batch.
10406 # This has to be done after all tokens are stored because unstoring
10407 # of tokens would otherwise cause trouble.
10408
10409 @unmatched_opening_indexes_in_this_batch = ();
10410 @unmatched_closing_indexes_in_this_batch = ();
10411 %comma_arrow_count = ();
10412
10413 my ( $i, $i_mate, $token );
10414 foreach $i ( 0 .. $max_index_to_go ) {
10415 if ( $type_sequence_to_go[$i] ) {
10416 $token = $tokens_to_go[$i];
10417 if ( $token =~ /^[\(\[\{\?]$/ ) {
10418 push @unmatched_opening_indexes_in_this_batch, $i;
10419 }
10420 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
10421
10422 $i_mate = pop @unmatched_opening_indexes_in_this_batch;
10423 if ( defined($i_mate) && $i_mate >= 0 ) {
10424 if ( $type_sequence_to_go[$i_mate] ==
10425 $type_sequence_to_go[$i] )
10426 {
10427 $mate_index_to_go[$i] = $i_mate;
10428 $mate_index_to_go[$i_mate] = $i;
10429 }
10430 else {
10431 push @unmatched_opening_indexes_in_this_batch,
10432 $i_mate;
10433 push @unmatched_closing_indexes_in_this_batch, $i;
10434 }
10435 }
10436 else {
10437 push @unmatched_closing_indexes_in_this_batch, $i;
10438 }
10439 }
10440 }
10441 elsif ( $tokens_to_go[$i] eq '=>' ) {
10442 if (@unmatched_opening_indexes_in_this_batch) {
10443 my $j = $unmatched_opening_indexes_in_this_batch[-1];
10444 my $seqno = $type_sequence_to_go[$j];
10445 $comma_arrow_count{$seqno}++;
10446 }
10447 }
10448 }
10449 }
10450
10451 sub save_opening_indentation {
10452
10453 # This should be called after each batch of tokens is output. It
10454 # saves indentations of lines of all unmatched opening tokens.
10455 # These will be used by sub get_opening_indentation.
10456
10457 my ( $ri_first, $ri_last, $rindentation_list ) = @_;
10458
10459 # we no longer need indentations of any saved indentations which
10460 # are unmatched closing tokens in this batch, because we will
10461 # never encounter them again. So we can delete them to keep
10462 # the hash size down.
10463 foreach (@unmatched_closing_indexes_in_this_batch) {
10464 my $seqno = $type_sequence_to_go[$_];
10465 delete $saved_opening_indentation{$seqno};
10466 }
10467
10468 # we need to save indentations of any unmatched opening tokens
10469 # in this batch because we may need them in a subsequent batch.
10470 foreach (@unmatched_opening_indexes_in_this_batch) {
10471 my $seqno = $type_sequence_to_go[$_];
10472 $saved_opening_indentation{$seqno} = [
10473 lookup_opening_indentation(
10474 $_, $ri_first, $ri_last, $rindentation_list
10475 )
10476 ];
10477 }
10478 }
10479} # end unmatched_indexes
10480
10481sub get_opening_indentation {
10482
10483 # get the indentation of the line which output the opening token
10484 # corresponding to a given closing token in the current output batch.
10485 #
10486 # given:
10487 # $i_closing - index in this line of a closing token ')' '}' or ']'
10488 #
10489 # $ri_first - reference to list of the first index $i for each output
10490 # line in this batch
10491 # $ri_last - reference to list of the last index $i for each output line
10492 # in this batch
10493 # $rindentation_list - reference to a list containing the indentation
10494 # used for each line.
10495 #
10496 # return:
10497 # -the indentation of the line which contained the opening token
10498 # which matches the token at index $i_opening
10499 # -and its offset (number of columns) from the start of the line
10500 #
10501 my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
10502
10503 # first, see if the opening token is in the current batch
10504 my $i_opening = $mate_index_to_go[$i_closing];
10505 my ( $indent, $offset );
10506 if ( $i_opening >= 0 ) {
10507
10508 # it is..look up the indentation
10509 ( $indent, $offset ) =
10510 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
10511 $rindentation_list );
10512 }
10513
10514 # if not, it should have been stored in the hash by a previous batch
10515 else {
10516 my $seqno = $type_sequence_to_go[$i_closing];
10517 if ($seqno) {
10518 if ( $saved_opening_indentation{$seqno} ) {
10519 ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} };
10520 }
10521 }
10522
10523 # if no sequence number it must be an unbalanced container
10524 else {
10525 $indent = 0;
10526 $offset = 0;
10527 }
10528 }
10529 return ( $indent, $offset );
10530}
10531
10532sub lookup_opening_indentation {
10533
10534 # get the indentation of the line in the current output batch
10535 # which output a selected opening token
10536 #
10537 # given:
10538 # $i_opening - index of an opening token in the current output batch
10539 # whose line indentation we need
10540 # $ri_first - reference to list of the first index $i for each output
10541 # line in this batch
10542 # $ri_last - reference to list of the last index $i for each output line
10543 # in this batch
10544 # $rindentation_list - reference to a list containing the indentation
10545 # used for each line. (NOTE: the first slot in
10546 # this list is the last returned line number, and this is
10547 # followed by the list of indentations).
10548 #
10549 # return
10550 # -the indentation of the line which contained token $i_opening
10551 # -and its offset (number of columns) from the start of the line
10552
10553 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
10554
10555 my $nline = $rindentation_list->[0]; # line number of previous lookup
10556
10557 # reset line location if necessary
10558 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
10559
10560 # find the correct line
10561 unless ( $i_opening > $ri_last->[-1] ) {
10562 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
10563 }
10564
10565 # error - token index is out of bounds - shouldn't happen
10566 else {
10567 warning(
10568"non-fatal program bug in lookup_opening_indentation - index out of range\n"
10569 );
10570 report_definite_bug();
10571 $nline = $#{$ri_last};
10572 }
10573
10574 $rindentation_list->[0] =
10575 $nline; # save line number to start looking next call
10576 my $ibeg = $ri_start->[$nline];
10577 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
10578 return ( $rindentation_list->[ $nline + 1 ], $offset );
10579}
10580
10581sub set_adjusted_indentation {
10582
10583 # This routine has the final say regarding the actual indentation of
10584 # a line. It starts with the basic indentation which has been
10585 # defined for the leading token, and then takes into account any
10586 # options that the user has set regarding special indenting and
10587 # outdenting.
10588
10589 my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
10590 $rindentation_list )
10591 = @_;
10592
10593 # we need to know the last token of this line
10594 my ( $terminal_type, $i_terminal ) =
10595 terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
10596
10597 my $is_outdented_line = 0;
10598
10599 my $is_semicolon_terminated = $terminal_type eq ';'
10600 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
10601
10602 # Most lines are indented according to the initial token.
10603 # But it is common to outdent to the level just after the
10604 # terminal token in certain cases...
10605 # adjust_indentation flag:
10606 # 0 - do not adjust
10607 # 1 - outdent
10608 # 2 - vertically align with opening token
10609 # 3 - indent
10610 my $adjust_indentation = 0;
10611 my $default_adjust_indentation = $adjust_indentation;
10612
10613 my ( $opening_indentation, $opening_offset );
10614
10615 # if we are at a closing token of some type..
10616 if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
10617
10618 # get the indentation of the line containing the corresponding
10619 # opening token
10620 ( $opening_indentation, $opening_offset ) =
10621 get_opening_indentation( $ibeg, $ri_first, $ri_last,
10622 $rindentation_list );
10623
10624 # First set the default behavior:
10625 # default behavior is to outdent closing lines
10626 # of the form: "); }; ]; )->xxx;"
10627 if (
10628 $is_semicolon_terminated
10629
10630 # and 'cuddled parens' of the form: ")->pack("
10631 || (
10632 $terminal_type eq '('
10633 && $types_to_go[$ibeg] eq ')'
10634 && ( $nesting_depth_to_go[$iend] + 1 ==
10635 $nesting_depth_to_go[$ibeg] )
10636 )
10637 )
10638 {
10639 $adjust_indentation = 1;
10640 }
10641
10642 # TESTING: outdent something like '),'
10643 if (
10644 $terminal_type eq ','
10645
10646 # allow just one character before the comma
10647 && $i_terminal == $ibeg + 1
10648
10649 # requre LIST environment; otherwise, we may outdent too much --
10650 # this can happen in calls without parentheses (overload.t);
10651 && $container_environment_to_go[$i_terminal] eq 'LIST'
10652 )
10653 {
10654 $adjust_indentation = 1;
10655 }
10656
10657 # undo continuation indentation of a terminal closing token if
10658 # it is the last token before a level decrease. This will allow
10659 # a closing token to line up with its opening counterpart, and
10660 # avoids a indentation jump larger than 1 level.
10661 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
10662 && $i_terminal == $ibeg )
10663 {
10664 my $ci = $ci_levels_to_go[$ibeg];
10665 my $lev = $levels_to_go[$ibeg];
10666 my $next_type = $types_to_go[ $ibeg + 1 ];
10667 my $i_next_nonblank =
10668 ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
10669 if ( $i_next_nonblank <= $max_index_to_go
10670 && $levels_to_go[$i_next_nonblank] < $lev )
10671 {
10672 $adjust_indentation = 1;
10673 }
10674 }
10675
10676 $default_adjust_indentation = $adjust_indentation;
10677
10678 # Now modify default behavior according to user request:
10679 # handle option to indent non-blocks of the form ); }; ];
10680 # But don't do special indentation to something like ')->pack('
10681 if ( !$block_type_to_go[$ibeg] ) {
10682 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
10683 if (
10684 $cti == 1
10685 && ( $i_terminal <= $ibeg + 1
10686 || $is_semicolon_terminated )
10687 )
10688 {
10689 $adjust_indentation = 2;
10690 }
10691 elsif ($cti == 2
10692 && $is_semicolon_terminated
10693 && $i_terminal == $ibeg + 1 )
10694 {
10695 $adjust_indentation = 3;
10696 }
10697 }
10698
10699 # handle option to indent blocks
10700 else {
10701 if (
10702 $rOpts->{'indent-closing-brace'}
10703 && (
10704 $i_terminal == $ibeg # isolated terminal '}'
10705 || $is_semicolon_terminated
10706 )
10707 ) # } xxxx ;
10708 {
10709 $adjust_indentation = 3;
10710 }
10711 }
10712 }
10713
10714 # if at ');', '};', '>;', and '];' of a terminal qw quote
10715 elsif ( $$rpatterns[0] =~ /^qb*;$/ && $$rfields[0] =~ /^([\)\}\]\>]);$/ ) {
10716 if ( $closing_token_indentation{$1} == 0 ) {
10717 $adjust_indentation = 1;
10718 }
10719 else {
10720 $adjust_indentation = 3;
10721 }
10722 }
10723
10724 # Handle variation in indentation styles...
10725 # Select the indentation object to define leading
10726 # whitespace. If we are outdenting something like '} } );'
10727 # then we want to use one level below the last token
10728 # ($i_terminal) in order to get it to fully outdent through
10729 # all levels.
10730 my $indentation;
10731 my $lev;
10732 my $level_end = $levels_to_go[$iend];
10733
10734 if ( $adjust_indentation == 0 ) {
10735 $indentation = $leading_spaces_to_go[$ibeg];
10736 $lev = $levels_to_go[$ibeg];
10737 }
10738 elsif ( $adjust_indentation == 1 ) {
10739 $indentation = $reduced_spaces_to_go[$i_terminal];
10740 $lev = $levels_to_go[$i_terminal];
10741 }
10742
10743 # handle indented closing token which aligns with opening token
10744 elsif ( $adjust_indentation == 2 ) {
10745
10746 # handle option to align closing token with opening token
10747 $lev = $levels_to_go[$ibeg];
10748
10749 # calculate spaces needed to align with opening token
10750 my $space_count = get_SPACES($opening_indentation) + $opening_offset;
10751
10752 # Indent less than the previous line.
10753 #
10754 # Problem: For -lp we don't exactly know what it was if there were
10755 # recoverable spaces sent to the aligner. A good solution would be to
10756 # force a flush of the vertical alignment buffer, so that we would
10757 # know. For now, this rule is used for -lp:
10758 #
10759 # When the last line did not start with a closing token we will be
10760 # optimistic that the aligner will recover everything wanted.
10761 #
10762 # This rule will prevent us from breaking a hierarchy of closing
10763 # tokens, and in a worst case will leave a closing paren too far
10764 # indented, but this is better than frequently leaving it not indented
10765 # enough.
10766 my $last_spaces = get_SPACES($last_indentation_written);
10767 if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
10768 $last_spaces += get_RECOVERABLE_SPACES($last_indentation_written);
10769 }
10770
10771 # reset the indentation to the new space count if it works
10772 # only options are all or none: nothing in-between looks good
10773 $lev = $levels_to_go[$ibeg];
10774 if ( $space_count < $last_spaces ) {
10775 if ($rOpts_line_up_parentheses) {
10776 my $lev = $levels_to_go[$ibeg];
10777 $indentation =
10778 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
10779 }
10780 else {
10781 $indentation = $space_count;
10782 }
10783 }
10784
10785 # revert to default if it doesnt work
10786 else {
10787 $space_count = leading_spaces_to_go($ibeg);
10788 if ( $default_adjust_indentation == 0 ) {
10789 $indentation = $leading_spaces_to_go[$ibeg];
10790 }
10791 elsif ( $default_adjust_indentation == 1 ) {
10792 $indentation = $reduced_spaces_to_go[$i_terminal];
10793 $lev = $levels_to_go[$i_terminal];
10794 }
10795 }
10796 }
10797
10798 # Full indentaion of closing tokens (-icb and -icp or -cti=2)
10799 else {
10800
10801 # There are two ways to handle -icb and -icp...
10802 # One way is to use the indentation of the previous line:
10803 # $indentation = $last_indentation_written;
10804
10805 # The other way is to use the indentation that the previous line
10806 # would have had if it hadn't been adjusted:
10807 $indentation = $last_unadjusted_indentation;
10808
10809 # Current method: use the minimum of the two. This avoids inconsistent
10810 # indentation.
10811 if ( get_SPACES($last_indentation_written) < get_SPACES($indentation) )
10812 {
10813 $indentation = $last_indentation_written;
10814 }
10815
10816 # use previous indentation but use own level
10817 # to cause list to be flushed properly
10818 $lev = $levels_to_go[$ibeg];
10819 }
10820
10821 # remember indentation except for multi-line quotes, which get
10822 # no indentation
10823 unless ( $types_to_go[$ibeg] eq 'Q' && $lev == 0 ) {
10824 $last_indentation_written = $indentation;
10825 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
10826 $last_leading_token = $tokens_to_go[$ibeg];
10827 }
10828
10829 # be sure lines with leading closing tokens are not outdented more
10830 # than the line which contained the corresponding opening token.
10831 my $is_isolated_block_brace =
10832 ( $iend == $ibeg ) && $block_type_to_go[$ibeg];
10833 if ( !$is_isolated_block_brace && defined($opening_indentation) ) {
10834 if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
10835 $indentation = $opening_indentation;
10836 }
10837 }
10838
10839 # remember the indentation of each line of this batch
10840 push @{$rindentation_list}, $indentation;
10841
10842 # outdent lines with certain leading tokens...
10843 if (
10844
10845 # must be first word of this batch
10846 $ibeg == 0
10847
10848 # and ...
10849 && (
10850
10851 # certain leading keywords if requested
10852 (
10853 $rOpts->{'outdent-keywords'}
10854 && $types_to_go[$ibeg] eq 'k'
10855 && $outdent_keyword{ $tokens_to_go[$ibeg] }
10856 )
10857
10858 # or labels if requested
10859 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
10860
10861 # or static block comments if requested
10862 || ( $types_to_go[$ibeg] eq '#'
10863 && $rOpts->{'outdent-static-block-comments'}
10864 && $tokens_to_go[$ibeg] =~ /$static_block_comment_pattern/o
10865 && $rOpts->{'static-block-comments'} )
10866 )
10867 )
10868
10869 {
10870 my $space_count = leading_spaces_to_go($ibeg);
10871 if ( $space_count > 0 ) {
10872 $space_count -= $rOpts_continuation_indentation;
10873 $is_outdented_line = 1;
10874 if ( $space_count < 0 ) { $space_count = 0 }
10875
10876 # do not promote a spaced static block comment to non-spaced;
10877 # this is not normally necessary but could be for some
10878 # unusual user inputs (such as -ci = -i)
10879 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
10880 $space_count = 1;
10881 }
10882
10883 if ($rOpts_line_up_parentheses) {
10884 $indentation =
10885 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
10886 }
10887 else {
10888 $indentation = $space_count;
10889 }
10890 }
10891 }
10892
10893 return ( $indentation, $lev, $level_end, $is_semicolon_terminated,
10894 $is_outdented_line );
10895}
10896
10897sub set_vertical_tightness_flags {
10898
10899 my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
10900
10901 # Define vertical tightness controls for the nth line of a batch.
10902 # We create an array of parameters which tell the vertical aligner
10903 # if we should combine this line with the next line to achieve the
10904 # desired vertical tightness. The array of parameters contains:
10905 #
10906 # [0] type: 1=is opening tok 2=is closing tok 3=is opening block brace
10907 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
10908 # if closing: spaces of padding to use
10909 # [2] sequence number of container
10910 # [3] valid flag: do not append if this flag is false. Will be
10911 # true if appropriate -vt flag is set. Otherwise, Will be
10912 # made true only for 2 line container in parens with -lp
10913 #
10914 # These flags are used by sub set_leading_whitespace in
10915 # the vertical aligner
10916
10917 my $rvertical_tightness_flags;
10918
10919 # For non-BLOCK tokens, we will need to examine the next line
10920 # too, so we won't consider the last line.
10921 if ( $n < $n_last_line ) {
10922
10923 # see if last token is an opening token...not a BLOCK...
10924 my $ibeg_next = $$ri_first[ $n + 1 ];
10925 my $token_end = $tokens_to_go[$iend];
10926 my $iend_next = $$ri_last[ $n + 1 ];
10927 if (
10928 $type_sequence_to_go[$iend]
10929 && !$block_type_to_go[$iend]
10930 && $is_opening_token{$token_end}
10931 && (
10932 $opening_vertical_tightness{$token_end} > 0
10933
10934 # allow 2-line method call to be closed up
10935 || ( $rOpts_line_up_parentheses
10936 && $token_end eq '('
10937 && $iend > $ibeg
10938 && $types_to_go[ $iend - 1 ] ne 'b' )
10939 )
10940 )
10941 {
10942
10943 # avoid multiple jumps in nesting depth in one line if
10944 # requested
10945 my $ovt = $opening_vertical_tightness{$token_end};
10946 my $iend_next = $$ri_last[ $n + 1 ];
10947 unless (
10948 $ovt < 2
10949 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
10950 $nesting_depth_to_go[$ibeg_next] )
10951 )
10952 {
10953
10954 # If -vt flag has not been set, mark this as invalid
10955 # and aligner will validate it if it sees the closing paren
10956 # within 2 lines.
10957 my $valid_flag = $ovt;
10958 @{$rvertical_tightness_flags} =
10959 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
10960 }
10961 }
10962
10963 # see if first token of next line is a closing token...
10964 # ..and be sure this line does not have a side comment
10965 my $token_next = $tokens_to_go[$ibeg_next];
10966 if ( $type_sequence_to_go[$ibeg_next]
10967 && !$block_type_to_go[$ibeg_next]
10968 && $is_closing_token{$token_next}
10969 && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
10970 {
10971 my $ovt = $opening_vertical_tightness{$token_next};
10972 my $cvt = $closing_vertical_tightness{$token_next};
10973 if (
10974
10975 # never append a trailing line like )->pack(
10976 # because it will throw off later alignment
10977 (
10978 $nesting_depth_to_go[$ibeg_next] ==
10979 $nesting_depth_to_go[ $iend_next + 1 ] + 1
10980 )
10981 && (
10982 $cvt == 2
10983 || (
10984 $container_environment_to_go[$ibeg_next] ne 'LIST'
10985 && (
10986 $cvt == 1
10987
10988 # allow closing up 2-line method calls
10989 || ( $rOpts_line_up_parentheses
10990 && $token_next eq ')' )
10991 )
10992 )
10993 )
10994 )
10995 {
10996
10997 # decide which trailing closing tokens to append..
10998 my $ok = 0;
10999 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
11000 else {
11001 my $str = join( '',
11002 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
11003
11004 # append closing token if followed by comment or ';'
11005 if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
11006 }
11007
11008 if ($ok) {
11009 my $valid_flag = $cvt;
11010 @{$rvertical_tightness_flags} = (
11011 2,
11012 $tightness{$token_next} == 2 ? 0 : 1,
11013 $type_sequence_to_go[$ibeg_next], $valid_flag,
11014 );
11015 }
11016 }
11017 }
11018 }
11019
11020 # Check for a last line with isolated opening BLOCK curly
11021 elsif ($rOpts_block_brace_vertical_tightness
11022 && $ibeg eq $iend
11023 && $types_to_go[$iend] eq '{'
11024 && $block_type_to_go[$iend] =~
11025 /$block_brace_vertical_tightness_pattern/o )
11026 {
11027 @{$rvertical_tightness_flags} =
11028 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
11029 }
11030
11031 return $rvertical_tightness_flags;
11032}
11033
11034{
11035 my %is_vertical_alignment_type;
11036 my %is_vertical_alignment_keyword;
11037
11038 BEGIN {
11039
11040 @_ = qw#
11041 = **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x=
11042 { ? : => =~ && ||
11043 #;
11044 @is_vertical_alignment_type{@_} = (1) x scalar(@_);
11045
11046 @_ = qw(if unless and or eq ne for foreach while until);
11047 @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
11048 }
11049
11050 sub set_vertical_alignment_markers {
11051
11052 # Look at the tokens in this output batch and define the array
11053 # 'matching_token_to_go' which marks tokens at which we would
11054 # accept vertical alignment.
11055
11056 # nothing to do if we aren't allowed to change whitespace
11057 if ( !$rOpts_add_whitespace ) {
11058 for my $i ( 0 .. $max_index_to_go ) {
11059 $matching_token_to_go[$i] = '';
11060 }
11061 return;
11062 }
11063
11064 my ( $ri_first, $ri_last ) = @_;
11065
11066 # look at each line of this batch..
11067 my $last_vertical_alignment_before_index;
11068 my $vert_last_nonblank_type;
11069 my $vert_last_nonblank_token;
11070 my $vert_last_nonblank_block_type;
11071 my $max_line = @$ri_first - 1;
11072 my ( $i, $type, $token, $block_type, $alignment_type );
11073 my ( $ibeg, $iend, $line );
11074 foreach $line ( 0 .. $max_line ) {
11075 $ibeg = $$ri_first[$line];
11076 $iend = $$ri_last[$line];
11077 $last_vertical_alignment_before_index = -1;
11078 $vert_last_nonblank_type = '';
11079 $vert_last_nonblank_token = '';
11080 $vert_last_nonblank_block_type = '';
11081
11082 # look at each token in this output line..
11083 foreach $i ( $ibeg .. $iend ) {
11084 $alignment_type = '';
11085 $type = $types_to_go[$i];
11086 $block_type = $block_type_to_go[$i];
11087 $token = $tokens_to_go[$i];
11088
11089 # check for flag indicating that we should not align
11090 # this token
11091 if ( $matching_token_to_go[$i] ) {
11092 $matching_token_to_go[$i] = '';
11093 next;
11094 }
11095
11096 #--------------------------------------------------------
11097 # First see if we want to align BEFORE this token
11098 #--------------------------------------------------------
11099
11100 # The first possible token that we can align before
11101 # is index 2 because: 1) it doesn't normally make sense to
11102 # align before the first token and 2) the second
11103 # token must be a blank if we are to align before
11104 # the third
11105 if ( $i < $ibeg + 2 ) {
11106 }
11107
11108 # must follow a blank token
11109 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
11110 }
11111
11112 # align a side comment --
11113 elsif ( $type eq '#' ) {
11114
11115 unless (
11116
11117 # it is a static side comment
11118 (
11119 $rOpts->{'static-side-comments'}
11120 && $token =~ /$static_side_comment_pattern/o
11121 )
11122
11123 # or a closing side comment
11124 || ( $vert_last_nonblank_block_type
11125 && $token =~
11126 /$closing_side_comment_prefix_pattern/o )
11127 )
11128 {
11129 $alignment_type = $type;
11130 } ## Example of a static side comment
11131 }
11132
11133 # otherwise, do not align two in a row to create a
11134 # blank field
11135 elsif ( $last_vertical_alignment_before_index == $i - 2 ) {
11136 }
11137
11138 # align before one of these keywords
11139 # (within a line, since $i>1)
11140 elsif ( $type eq 'k' ) {
11141
11142 # /^(if|unless|and|or|eq|ne)$/
11143 if ( $is_vertical_alignment_keyword{$token} ) {
11144 $alignment_type = $token;
11145 }
11146 }
11147
11148 # align before one of these types..
11149 # Note: add '.' after new vertical aligner is operational
11150 elsif ( $is_vertical_alignment_type{$type} ) {
11151 $alignment_type = $token;
11152
11153 # For a paren after keyword, only align something like this:
11154 # if ( $a ) { &a }
11155 # elsif ( $b ) { &b }
11156 if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
11157 $alignment_type = ""
11158 unless $vert_last_nonblank_token =~
11159 /^(if|unless|elsif)$/;
11160 }
11161
11162 # be sure the alignment tokens are unique
11163 # This didn't work well: reason not determined
11164 # if ($token ne $type) {$alignment_type .= $type}
11165 }
11166
11167 # NOTE: This is deactivated until the new vertical aligner
11168 # is finished because it causes the previous if/elsif alignment
11169 # to fail
11170 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) {
11171 # $alignment_type = $type;
11172 #}
11173
11174 if ($alignment_type) {
11175 $last_vertical_alignment_before_index = $i;
11176 }
11177
11178 #--------------------------------------------------------
11179 # Next see if we want to align AFTER the previous nonblank
11180 #--------------------------------------------------------
11181
11182 # We want to line up ',' and interior ';' tokens, with the added
11183 # space AFTER these tokens. (Note: interior ';' is included
11184 # because it may occur in short blocks).
11185 if (
11186
11187 # we haven't already set it
11188 !$alignment_type
11189
11190 # and its not the first token of the line
11191 && ( $i > $ibeg )
11192
11193 # and it follows a blank
11194 && $types_to_go[ $i - 1 ] eq 'b'
11195
11196 # and previous token IS one of these:
11197 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
11198
11199 # and it's NOT one of these
11200 && ( $type !~ /^[b\#\)\]\}]$/ )
11201
11202 # then go ahead and align
11203 )
11204
11205 {
11206 $alignment_type = $vert_last_nonblank_type;
11207 }
11208
11209 #--------------------------------------------------------
11210 # then store the value
11211 #--------------------------------------------------------
11212 $matching_token_to_go[$i] = $alignment_type;
11213 if ( $type ne 'b' ) {
11214 $vert_last_nonblank_type = $type;
11215 $vert_last_nonblank_token = $token;
11216 $vert_last_nonblank_block_type = $block_type;
11217 }
11218 }
11219 }
11220 }
11221}
11222
11223sub terminal_type {
11224
11225 # returns type of last token on this line (terminal token), as follows:
11226 # returns # for a full-line comment
11227 # returns ' ' for a blank line
11228 # otherwise returns final token type
11229
11230 my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
11231
11232 # check for full-line comment..
11233 if ( $$rtype[$ibeg] eq '#' ) {
11234 return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
11235 }
11236 else {
11237
11238 # start at end and walk bakwards..
11239 for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
11240
11241 # skip past any side comment and blanks
11242 next if ( $$rtype[$i] eq 'b' );
11243 next if ( $$rtype[$i] eq '#' );
11244
11245 # found it..make sure it is a BLOCK termination,
11246 # but hide a terminal } after sort/grep/map because it is not
11247 # necessarily the end of the line. (terminal.t)
11248 my $terminal_type = $$rtype[$i];
11249 if (
11250 $terminal_type eq '}'
11251 && ( !$$rblock_type[$i]
11252 || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
11253 )
11254 {
11255 $terminal_type = 'b';
11256 }
11257 return wantarray ? ( $terminal_type, $i ) : $terminal_type;
11258 }
11259
11260 # empty line
11261 return wantarray ? ( ' ', $ibeg ) : ' ';
11262 }
11263}
11264
11265{
11266 my %is_good_keyword_breakpoint;
11267 my %is_lt_gt_le_ge;
11268
11269 sub set_bond_strengths {
11270
11271 BEGIN {
11272
11273 @_ = qw(if unless while until for foreach);
11274 @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
11275
11276 @_ = qw(lt gt le ge);
11277 @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
11278
11279 ###############################################################
11280 # NOTE: NO_BREAK's set here are HINTS which may not be honored;
11281 # essential NO_BREAKS's must be enforced in section 2, below.
11282 ###############################################################
11283
11284 # adding NEW_TOKENS: add a left and right bond strength by
11285 # mimmicking what is done for an existing token type. You
11286 # can skip this step at first and take the default, then
11287 # tweak later to get desired results.
11288
11289 # The bond strengths should roughly follow precenence order where
11290 # possible. If you make changes, please check the results very
11291 # carefully on a variety of scripts.
11292
11293 # no break around possible filehandle
11294 $left_bond_strength{'Z'} = NO_BREAK;
11295 $right_bond_strength{'Z'} = NO_BREAK;
11296
11297 # never put a bare word on a new line:
11298 # example print (STDERR, "bla"); will fail with break after (
11299 $left_bond_strength{'w'} = NO_BREAK;
11300
11301 # blanks always have infinite strength to force breaks after real tokens
11302 $right_bond_strength{'b'} = NO_BREAK;
11303
11304 # try not to break on exponentation
11305 @_ = qw" ** .. ... <=> ";
11306 @left_bond_strength{@_} = (STRONG) x scalar(@_);
11307 @right_bond_strength{@_} = (STRONG) x scalar(@_);
11308
11309 # The comma-arrow has very low precedence but not a good break point
11310 $left_bond_strength{'=>'} = NO_BREAK;
11311 $right_bond_strength{'=>'} = NOMINAL;
11312
11313 # ok to break after label
11314 $left_bond_strength{'J'} = NO_BREAK;
11315 $right_bond_strength{'J'} = NOMINAL;
11316 $left_bond_strength{'j'} = STRONG;
11317 $right_bond_strength{'j'} = STRONG;
11318 $left_bond_strength{'A'} = STRONG;
11319 $right_bond_strength{'A'} = STRONG;
11320
11321 $left_bond_strength{'->'} = STRONG;
11322 $right_bond_strength{'->'} = VERY_STRONG;
11323
11324 # breaking AFTER these is just ok:
11325 @_ = qw" % + - * / x ";
11326 @left_bond_strength{@_} = (STRONG) x scalar(@_);
11327 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
11328
11329 # breaking BEFORE these is just ok:
11330 @_ = qw" >> << ";
11331 @right_bond_strength{@_} = (STRONG) x scalar(@_);
11332 @left_bond_strength{@_} = (NOMINAL) x scalar(@_);
11333
11334 # I prefer breaking before the string concatenation operator
11335 # because it can be hard to see at the end of a line
11336 # swap these to break after a '.'
11337 # this could be a future option
11338 $right_bond_strength{'.'} = STRONG;
11339 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
11340
11341 @_ = qw"} ] ) ";
11342 @left_bond_strength{@_} = (STRONG) x scalar(@_);
11343 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
11344
11345 # make these a little weaker than nominal so that they get
11346 # favored for end-of-line characters
11347 @_ = qw"!= == =~ !~";
11348 @left_bond_strength{@_} = (STRONG) x scalar(@_);
11349 @right_bond_strength{@_} =
11350 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
11351
11352 # break AFTER these
11353 @_ = qw" < > | & >= <=";
11354 @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
11355 @right_bond_strength{@_} =
11356 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
11357
11358 # breaking either before or after a quote is ok
11359 # but bias for breaking before a quote
11360 $left_bond_strength{'Q'} = NOMINAL;
11361 $right_bond_strength{'Q'} = NOMINAL + 0.02;
11362 $left_bond_strength{'q'} = NOMINAL;
11363 $right_bond_strength{'q'} = NOMINAL;
11364
11365 # starting a line with a keyword is usually ok
11366 $left_bond_strength{'k'} = NOMINAL;
11367
11368 # we usually want to bond a keyword strongly to what immediately
11369 # follows, rather than leaving it stranded at the end of a line
11370 $right_bond_strength{'k'} = STRONG;
11371
11372 $left_bond_strength{'G'} = NOMINAL;
11373 $right_bond_strength{'G'} = STRONG;
11374
11375 # it is very good to break AFTER various assignment operators
11376 @_ = qw(
11377 = **= += *= &= <<= &&=
11378 -= /= |= >>= ||=
11379 .= %= ^=
11380 x=
11381 );
11382 @left_bond_strength{@_} = (STRONG) x scalar(@_);
11383 @right_bond_strength{@_} =
11384 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
11385
11386 # break BEFORE '&&' and '||'
11387 # set strength of '||' to same as '=' so that chains like
11388 # $a = $b || $c || $d will break before the first '||'
11389 $right_bond_strength{'||'} = NOMINAL;
11390 $left_bond_strength{'||'} = $right_bond_strength{'='};
11391
11392 # set strength of && a little higher than ||
11393 $right_bond_strength{'&&'} = NOMINAL;
11394 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
11395
11396 $left_bond_strength{';'} = VERY_STRONG;
11397 $right_bond_strength{';'} = VERY_WEAK;
11398 $left_bond_strength{'f'} = VERY_STRONG;
11399
11400 # make right strength of for ';' a little less than '='
11401 # to make for contents break after the ';' to avoid this:
11402 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
11403 # $number_of_fields )
11404 # and make it weaker than ',' and 'and' too
11405 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
11406
11407 # The strengths of ?/: should be somewhere between
11408 # an '=' and a quote (NOMINAL),
11409 # make strength of ':' slightly less than '?' to help
11410 # break long chains of ? : after the colons
11411 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
11412 $right_bond_strength{':'} = NO_BREAK;
11413 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
11414 $right_bond_strength{'?'} = NO_BREAK;
11415
11416 $left_bond_strength{','} = VERY_STRONG;
11417 $right_bond_strength{','} = VERY_WEAK;
11418
11419 # Set bond strengths of certain keywords
11420 # make 'or', 'and' slightly weaker than a ','
11421 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
11422 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
11423 $left_bond_strength{'xor'} = NOMINAL;
11424 $right_bond_strength{'and'} = NOMINAL;
11425 $right_bond_strength{'or'} = NOMINAL;
11426 $right_bond_strength{'xor'} = STRONG;
11427 }
11428
11429 # patch-its always ok to break at end of line
11430 $nobreak_to_go[$max_index_to_go] = 0;
11431
11432 # adding a small 'bias' to strengths is a simple way to make a line
11433 # break at the first of a sequence of identical terms. For example,
11434 # to force long string of conditional operators to break with
11435 # each line ending in a ':', we can add a small number to the bond
11436 # strength of each ':'
11437 my $colon_bias = 0;
11438 my $amp_bias = 0;
11439 my $bar_bias = 0;
11440 my $and_bias = 0;
11441 my $or_bias = 0;
11442 my $dot_bias = 0;
11443 my $f_bias = 0;
11444 my $code_bias = -.01;
11445 my $type = 'b';
11446 my $token = ' ';
11447 my $last_type;
11448 my $last_nonblank_type = $type;
11449 my $last_nonblank_token = $token;
11450 my $delta_bias = 0.0001;
11451 my $list_str = $left_bond_strength{'?'};
11452
11453 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
11454 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
11455 );
11456
11457 # preliminary loop to compute bond strengths
11458 for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
11459 $last_type = $type;
11460 if ( $type ne 'b' ) {
11461 $last_nonblank_type = $type;
11462 $last_nonblank_token = $token;
11463 }
11464 $type = $types_to_go[$i];
11465
11466 # strength on both sides of a blank is the same
11467 if ( $type eq 'b' && $last_type ne 'b' ) {
11468 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
11469 next;
11470 }
11471
11472 $token = $tokens_to_go[$i];
11473 $block_type = $block_type_to_go[$i];
11474 $i_next = $i + 1;
11475 $next_type = $types_to_go[$i_next];
11476 $next_token = $tokens_to_go[$i_next];
11477 $total_nesting_depth = $nesting_depth_to_go[$i_next];
11478 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
11479 $next_nonblank_type = $types_to_go[$i_next_nonblank];
11480 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
11481
11482 # Some token chemistry... The decision about where to break a
11483 # line depends upon a "bond strength" between tokens. The LOWER
11484 # the bond strength, the MORE likely a break. The strength
11485 # values are based on trial-and-error, and need to be tweaked
11486 # occasionally to get desired results. Things to keep in mind
11487 # are:
11488 # 1. relative strengths are important. small differences
11489 # in strengths can make big formatting differences.
11490 # 2. each indentation level adds one unit of bond strength
11491 # 3. a value of NO_BREAK makes an unbreakable bond
11492 # 4. a value of VERY_WEAK is the strength of a ','
11493 # 5. values below NOMINAL are considered ok break points
11494 # 6. values above NOMINAL are considered poor break points
11495 # We are computing the strength of the bond between the current
11496 # token and the NEXT token.
11497 my $bond_str = VERY_STRONG; # a default, high strength
11498
11499 #---------------------------------------------------------------
11500 # section 1:
11501 # use minimum of left and right bond strengths if defined;
11502 # digraphs and trigraphs like to break on their left
11503 #---------------------------------------------------------------
11504 my $bsr = $right_bond_strength{$type};
11505
11506 if ( !defined($bsr) ) {
11507
11508 if ( $is_digraph{$type} || $is_trigraph{$type} ) {
11509 $bsr = STRONG;
11510 }
11511 else {
11512 $bsr = VERY_STRONG;
11513 }
11514 }
11515
11516 # define right bond strengths of certain keywords
11517 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
11518 $bsr = $right_bond_strength{$token};
11519 }
11520 elsif ( $token eq 'ne' or $token eq 'eq' ) {
11521 $bsr = NOMINAL;
11522 }
11523 my $bsl = $left_bond_strength{$next_nonblank_type};
11524
11525 # set terminal bond strength to the nominal value
11526 # this will cause good preceding breaks to be retained
11527 if ( $i_next_nonblank > $max_index_to_go ) {
11528 $bsl = NOMINAL;
11529 }
11530
11531 if ( !defined($bsl) ) {
11532
11533 if ( $is_digraph{$next_nonblank_type}
11534 || $is_trigraph{$next_nonblank_type} )
11535 {
11536 $bsl = WEAK;
11537 }
11538 else {
11539 $bsl = VERY_STRONG;
11540 }
11541 }
11542
11543 # define right bond strengths of certain keywords
11544 if ( $next_nonblank_type eq 'k'
11545 && defined( $left_bond_strength{$next_nonblank_token} ) )
11546 {
11547 $bsl = $left_bond_strength{$next_nonblank_token};
11548 }
11549 elsif ($next_nonblank_token eq 'ne'
11550 or $next_nonblank_token eq 'eq' )
11551 {
11552 $bsl = NOMINAL;
11553 }
11554 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
11555 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
11556 }
11557
11558 # Note: it might seem that we would want to keep a NO_BREAK if
11559 # either token has this value. This didn't work, because in an
11560 # arrow list, it prevents the comma from separating from the
11561 # following bare word (which is probably quoted by its arrow).
11562 # So necessary NO_BREAK's have to be handled as special cases
11563 # in the final section.
11564 $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
11565 my $bond_str_1 = $bond_str;
11566
11567 #---------------------------------------------------------------
11568 # section 2:
11569 # special cases
11570 #---------------------------------------------------------------
11571
11572 # allow long lines before final { in an if statement, as in:
11573 # if (..........
11574 # ..........)
11575 # {
11576 #
11577 # Otherwise, the line before the { tends to be too short.
11578 if ( $type eq ')' ) {
11579 if ( $next_nonblank_type eq '{' ) {
11580 $bond_str = VERY_WEAK + 0.03;
11581 }
11582 }
11583
11584 elsif ( $type eq '(' ) {
11585 if ( $next_nonblank_type eq '{' ) {
11586 $bond_str = NOMINAL;
11587 }
11588 }
11589
11590 # break on something like '} (', but keep this stronger than a ','
11591 # example is in 'howe.pl'
11592 elsif ( $type eq 'R' or $type eq '}' ) {
11593 if ( $next_nonblank_type eq '(' ) {
11594 $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
11595 }
11596 }
11597
11598 #-----------------------------------------------------------------
11599 # adjust bond strength bias
11600 #-----------------------------------------------------------------
11601
11602 elsif ( $type eq 'f' ) {
11603 $bond_str += $f_bias;
11604 $f_bias += $delta_bias;
11605 }
11606
11607 # in long ?: conditionals, bias toward just one set per line (colon.t)
11608 elsif ( $type eq ':' ) {
11609 if ( !$want_break_before{$type} ) {
11610 $bond_str += $colon_bias;
11611 $colon_bias += $delta_bias;
11612 }
11613 }
11614
11615 if ( $next_nonblank_type eq ':'
11616 && $want_break_before{$next_nonblank_type} )
11617 {
11618 $bond_str += $colon_bias;
11619 $colon_bias += $delta_bias;
11620 }
11621
11622 # if leading '.' is used, align all but 'short' quotes;
11623 # the idea is to not place something like "\n" on a single line.
11624 elsif ( $next_nonblank_type eq '.' ) {
11625 if ( $want_break_before{'.'} ) {
11626 unless (
11627 $last_nonblank_type eq '.'
11628 && (
11629 length($token) <=
11630 $rOpts_short_concatenation_item_length )
11631 && ( $token !~ /^[\)\]\}]$/ )
11632 )
11633 {
11634 $dot_bias += $delta_bias;
11635 }
11636 $bond_str += $dot_bias;
11637 }
11638 }
11639 elsif ($next_nonblank_type eq '&&'
11640 && $want_break_before{$next_nonblank_type} )
11641 {
11642 $bond_str += $amp_bias;
11643 $amp_bias += $delta_bias;
11644 }
11645 elsif ($next_nonblank_type eq '||'
11646 && $want_break_before{$next_nonblank_type} )
11647 {
11648 $bond_str += $bar_bias;
11649 $bar_bias += $delta_bias;
11650 }
11651 elsif ( $next_nonblank_type eq 'k' ) {
11652
11653 if ( $next_nonblank_token eq 'and'
11654 && $want_break_before{$next_nonblank_token} )
11655 {
11656 $bond_str += $and_bias;
11657 $and_bias += $delta_bias;
11658 }
11659 elsif ($next_nonblank_token eq 'or'
11660 && $want_break_before{$next_nonblank_token} )
11661 {
11662 $bond_str += $or_bias;
11663 $or_bias += $delta_bias;
11664 }
11665
11666 # FIXME: needs more testing
11667 elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
11668 $bond_str = $list_str if ( $bond_str > $list_str );
11669 }
11670 }
11671
11672 if ( $type eq ':'
11673 && !$want_break_before{$type} )
11674 {
11675 $bond_str += $colon_bias;
11676 $colon_bias += $delta_bias;
11677 }
11678 elsif ( $type eq '&&'
11679 && !$want_break_before{$type} )
11680 {
11681 $bond_str += $amp_bias;
11682 $amp_bias += $delta_bias;
11683 }
11684 elsif ( $type eq '||'
11685 && !$want_break_before{$type} )
11686 {
11687 $bond_str += $bar_bias;
11688 $bar_bias += $delta_bias;
11689 }
11690 elsif ( $type eq 'k' ) {
11691
11692 if ( $token eq 'and'
11693 && !$want_break_before{$token} )
11694 {
11695 $bond_str += $and_bias;
11696 $and_bias += $delta_bias;
11697 }
11698 elsif ( $token eq 'or'
11699 && !$want_break_before{$token} )
11700 {
11701 $bond_str += $or_bias;
11702 $or_bias += $delta_bias;
11703 }
11704 }
11705
11706 # keep matrix and hash indices together
11707 # but make them a little below STRONG to allow breaking open
11708 # something like {'some-word'}{'some-very-long-word'} at the }{
11709 # (bracebrk.t)
11710 if ( ( $type eq ']' or $type eq 'R' )
11711 && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
11712 )
11713 {
11714 $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
11715 }
11716
11717 if ( $next_nonblank_token =~ /^->/ ) {
11718
11719 # increase strength to the point where a break in the following
11720 # will be after the opening paren rather than at the arrow:
11721 # $a->$b($c);
11722 if ( $type eq 'i' ) {
11723 $bond_str = 1.45 * STRONG;
11724 }
11725
11726 elsif ( $type =~ /^[\)\]\}R]$/ ) {
11727 $bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
11728 }
11729
11730 # otherwise make strength before an '->' a little over a '+'
11731 else {
11732 if ( $bond_str <= NOMINAL ) {
11733 $bond_str = NOMINAL + 0.01;
11734 }
11735 }
11736 }
11737
11738 if ( $token eq ')' && $next_nonblank_token eq '[' ) {
11739 $bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
11740 }
11741
11742 # map1.t -- correct for a quirk in perl
11743 if ( $token eq '('
11744 && $next_nonblank_type eq 'i'
11745 && $last_nonblank_type eq 'k'
11746 && $is_sort_map_grep{$last_nonblank_token} )
11747
11748 # /^(sort|map|grep)$/ )
11749 {
11750 $bond_str = NO_BREAK;
11751 }
11752
11753 # extrude.t: do not break before paren at:
11754 # -l pid_filename(
11755 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
11756 $bond_str = NO_BREAK;
11757 }
11758
11759 # good to break after end of code blocks
11760 if ( $type eq '}' && $block_type ) {
11761
11762 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
11763 $code_bias += $delta_bias;
11764 }
11765
11766 if ( $type eq 'k' ) {
11767
11768 # allow certain control keywords to stand out
11769 if ( $next_nonblank_type eq 'k'
11770 && $is_last_next_redo_return{$token} )
11771 {
11772 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
11773 }
11774
11775# Don't break after keyword my. This is a quick fix for a
11776# rare problem with perl. An example is this line from file
11777# Container.pm:
11778# foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
11779
11780 if ( $token eq 'my' ) {
11781 $bond_str = NO_BREAK;
11782 }
11783
11784 }
11785
11786 # good to break before 'if', 'unless', etc
11787 if ( $is_if_brace_follower{$next_nonblank_token} ) {
11788 $bond_str = VERY_WEAK;
11789 }
11790
11791 if ( $next_nonblank_type eq 'k' ) {
11792
11793 # keywords like 'unless', 'if', etc, within statements
11794 # make good breaks
11795 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
11796 $bond_str = VERY_WEAK / 1.05;
11797 }
11798 }
11799
11800 # try not to break before a comma-arrow
11801 elsif ( $next_nonblank_type eq '=>' ) {
11802 if ( $bond_str < STRONG ) { $bond_str = STRONG }
11803 }
11804
11805 #----------------------------------------------------------------------
11806 # only set NO_BREAK's from here on
11807 #----------------------------------------------------------------------
11808 if ( $type eq 'C' or $type eq 'U' ) {
11809
11810 # use strict requires that bare word and => not be separated
11811 if ( $next_nonblank_type eq '=>' ) {
11812 $bond_str = NO_BREAK;
11813 }
11814
11815 }
11816
11817 # use strict requires that bare word within braces not start new line
11818 elsif ( $type eq 'L' ) {
11819
11820 if ( $next_nonblank_type eq 'w' ) {
11821 $bond_str = NO_BREAK;
11822 }
11823 }
11824
11825 # in older version of perl, use strict can cause problems with
11826 # breaks before bare words following opening parens. For example,
11827 # this will fail under older versions if a break is made between
11828 # '(' and 'MAIL':
11829 # use strict;
11830 # open( MAIL, "a long filename or command");
11831 # close MAIL;
11832 elsif ( $type eq '{' ) {
11833
11834 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
11835
11836 # but it's fine to break if the word is followed by a '=>'
11837 # or if it is obviously a sub call
11838 my $i_next_next_nonblank = $i_next_nonblank + 1;
11839 my $next_next_type = $types_to_go[$i_next_next_nonblank];
11840 if ( $next_next_type eq 'b'
11841 && $i_next_nonblank < $max_index_to_go )
11842 {
11843 $i_next_next_nonblank++;
11844 $next_next_type = $types_to_go[$i_next_next_nonblank];
11845 }
11846
11847 ##if ( $next_next_type ne '=>' ) {
11848 # these are ok: '->xxx', '=>', '('
11849
11850 # We'll check for an old breakpoint and keep a leading
11851 # bareword if it was that way in the input file. Presumably
11852 # it was ok that way. For example, the following would remain
11853 # unchanged:
11854 #
11855 # @months = (
11856 # January, February, March, April,
11857 # May, June, July, August,
11858 # September, October, November, December,
11859 # );
11860 #
11861 # This should be sufficient:
11862 if ( !$old_breakpoint_to_go[$i]
11863 && ( $next_next_type eq ',' || $next_next_type eq '}' )
11864 )
11865 {
11866 $bond_str = NO_BREAK;
11867 }
11868 }
11869 }
11870
11871 elsif ( $type eq 'w' ) {
11872
11873 if ( $next_nonblank_type eq 'R' ) {
11874 $bond_str = NO_BREAK;
11875 }
11876
11877 # use strict requires that bare word and => not be separated
11878 if ( $next_nonblank_type eq '=>' ) {
11879 $bond_str = NO_BREAK;
11880 }
11881 }
11882
11883 # in fact, use strict hates bare words on any new line. For example,
11884 # a break before the underscore here provokes the wrath of use strict:
11885 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
11886 elsif ( $type eq 'F' ) {
11887 $bond_str = NO_BREAK;
11888 }
11889
11890 # use strict does not allow separating type info from trailing { }
11891 # testfile is readmail.pl
11892 elsif ( $type eq 't' or $type eq 'i' ) {
11893
11894 if ( $next_nonblank_type eq 'L' ) {
11895 $bond_str = NO_BREAK;
11896 }
11897 }
11898
11899 # Do not break between a possible filehandle and a ? or /
11900 # and do not introduce a break after it if there is no blank (extrude.t)
11901 elsif ( $type eq 'Z' ) {
11902
11903 # dont break..
11904 if (
11905
11906 # if there is no blank and we do not want one. Examples:
11907 # print $x++ # do not break after $x
11908 # print HTML"HELLO" # break ok after HTML
11909 (
11910 $next_type ne 'b'
11911 && defined( $want_left_space{$next_type} )
11912 && $want_left_space{$next_type} == WS_NO
11913 )
11914
11915 # or we might be followed by the start of a quote
11916 || $next_nonblank_type =~ /^[\/\?]$/
11917 )
11918 {
11919 $bond_str = NO_BREAK;
11920 }
11921 }
11922
11923 # Do not break before a possible file handle
11924 if ( $next_nonblank_type eq 'Z' ) {
11925 $bond_str = NO_BREAK;
11926 }
11927
11928 # As a defensive measure, do not break between a '(' and a
11929 # filehandle. In some cases, this can cause an error. For
11930 # example, the following program works:
11931 # my $msg="hi!\n";
11932 # print
11933 # ( STDOUT
11934 # $msg
11935 # );
11936 #
11937 # But this program fails:
11938 # my $msg="hi!\n";
11939 # print
11940 # (
11941 # STDOUT
11942 # $msg
11943 # );
11944 #
11945 # This is normally only a problem with the 'extrude' option
11946 if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
11947 $bond_str = NO_BREAK;
11948 }
11949
11950 # patch to put cuddled elses back together when on multiple
11951 # lines, as in: } \n else \n { \n
11952 if ($rOpts_cuddled_else) {
11953
11954 if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
11955 || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
11956 {
11957 $bond_str = NO_BREAK;
11958 }
11959 }
11960
11961 # keep '}' together with ';'
11962 if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
11963 $bond_str = NO_BREAK;
11964 }
11965
11966 # never break between sub name and opening paren
11967 if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
11968 $bond_str = NO_BREAK;
11969 }
11970
11971 #---------------------------------------------------------------
11972 # section 3:
11973 # now take nesting depth into account
11974 #---------------------------------------------------------------
11975 # final strength incorporates the bond strength and nesting depth
11976 my $strength;
11977
11978 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
11979 if ( $total_nesting_depth > 0 ) {
11980 $strength = $bond_str + $total_nesting_depth;
11981 }
11982 else {
11983 $strength = $bond_str;
11984 }
11985 }
11986 else {
11987 $strength = NO_BREAK;
11988 }
11989
11990 # always break after side comment
11991 if ( $type eq '#' ) { $strength = 0 }
11992
11993 $bond_strength_to_go[$i] = $strength;
11994
11995 FORMATTER_DEBUG_FLAG_BOND && do {
11996 my $str = substr( $token, 0, 15 );
11997 $str .= ' ' x ( 16 - length($str) );
11998 print
11999"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
12000 };
12001 }
12002 }
12003
12004}
12005
12006sub pad_array_to_go {
12007
12008 # to simplify coding in scan_list and set_bond_strengths, it helps
12009 # to create some extra blank tokens at the end of the arrays
12010 $tokens_to_go[ $max_index_to_go + 1 ] = '';
12011 $tokens_to_go[ $max_index_to_go + 2 ] = '';
12012 $types_to_go[ $max_index_to_go + 1 ] = 'b';
12013 $types_to_go[ $max_index_to_go + 2 ] = 'b';
12014 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
12015 $nesting_depth_to_go[$max_index_to_go];
12016
12017 # /^[R\}\)\]]$/
12018 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
12019 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
12020
12021 # shouldn't happen:
12022 unless ( get_saw_brace_error() ) {
12023 warning(
12024"Program bug in scan_list: hit nesting error which should have been caught\n"
12025 );
12026 report_definite_bug();
12027 }
12028 }
12029 else {
12030 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
12031 }
12032 }
12033
12034 # /^[L\{\(\[]$/
12035 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
12036 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
12037 }
12038}
12039
12040{ # begin scan_list
12041
12042 my (
12043 $block_type, $current_depth,
12044 $depth, $i,
12045 $i_last_nonblank_token, $last_colon_sequence_number,
12046 $last_nonblank_token, $last_nonblank_type,
12047 $last_old_breakpoint_count, $minimum_depth,
12048 $next_nonblank_block_type, $next_nonblank_token,
12049 $next_nonblank_type, $old_breakpoint_count,
12050 $starting_breakpoint_count, $starting_depth,
12051 $token, $type,
12052 $type_sequence,
12053 );
12054
12055 my (
12056 @breakpoint_stack, @breakpoint_undo_stack,
12057 @comma_index, @container_type,
12058 @identifier_count_stack, @index_before_arrow,
12059 @interrupted_list, @item_count_stack,
12060 @last_comma_index, @last_dot_index,
12061 @last_nonblank_type, @old_breakpoint_count_stack,
12062 @opening_structure_index_stack, @rfor_semicolon_list,
12063 @has_old_logical_breakpoints, @rand_or_list,
12064 @i_equals,
12065 );
12066
12067 # routine to define essential variables when we go 'up' to
12068 # a new depth
12069 sub check_for_new_minimum_depth {
12070 my $depth = shift;
12071 if ( $depth < $minimum_depth ) {
12072
12073 $minimum_depth = $depth;
12074
12075 # these arrays need not retain values between calls
12076 $breakpoint_stack[$depth] = $starting_breakpoint_count;
12077 $container_type[$depth] = "";
12078 $identifier_count_stack[$depth] = 0;
12079 $index_before_arrow[$depth] = -1;
12080 $interrupted_list[$depth] = 1;
12081 $item_count_stack[$depth] = 0;
12082 $last_nonblank_type[$depth] = "";
12083 $opening_structure_index_stack[$depth] = -1;
12084
12085 $breakpoint_undo_stack[$depth] = undef;
12086 $comma_index[$depth] = undef;
12087 $last_comma_index[$depth] = undef;
12088 $last_dot_index[$depth] = undef;
12089 $old_breakpoint_count_stack[$depth] = undef;
12090 $has_old_logical_breakpoints[$depth] = 0;
12091 $rand_or_list[$depth] = [];
12092 $rfor_semicolon_list[$depth] = [];
12093 $i_equals[$depth] = -1;
12094
12095 # these arrays must retain values between calls
12096 if ( !defined( $has_broken_sublist[$depth] ) ) {
12097 $dont_align[$depth] = 0;
12098 $has_broken_sublist[$depth] = 0;
12099 $want_comma_break[$depth] = 0;
12100 }
12101 }
12102 }
12103
12104 # routine to decide which commas to break at within a container;
12105 # returns:
12106 # $bp_count = number of comma breakpoints set
12107 # $do_not_break_apart = a flag indicating if container need not
12108 # be broken open
12109 sub set_comma_breakpoints {
12110
12111 my $dd = shift;
12112 my $bp_count = 0;
12113 my $do_not_break_apart = 0;
12114 if ( $item_count_stack[$dd] && !$dont_align[$dd] ) {
12115
12116 my $fbc = $forced_breakpoint_count;
12117
12118 # always open comma lists not preceded by keywords,
12119 # barewords, identifiers (that is, anything that doesn't
12120 # look like a function call)
12121 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
12122
12123 set_comma_breakpoints_do(
12124 $dd,
12125 $opening_structure_index_stack[$dd],
12126 $i,
12127 $item_count_stack[$dd],
12128 $identifier_count_stack[$dd],
12129 $comma_index[$dd],
12130 $next_nonblank_type,
12131 $container_type[$dd],
12132 $interrupted_list[$dd],
12133 \$do_not_break_apart,
12134 $must_break_open,
12135 );
12136 $bp_count = $forced_breakpoint_count - $fbc;
12137 $do_not_break_apart = 0 if $must_break_open;
12138 }
12139 return ( $bp_count, $do_not_break_apart );
12140 }
12141
12142 my %is_logical_container;
12143
12144 BEGIN {
12145 @_ = qw# if elsif unless while and or not && | || ? : ! #;
12146 @is_logical_container{@_} = (1) x scalar(@_);
12147 }
12148
12149 sub set_for_semicolon_breakpoints {
12150 my $dd = shift;
12151 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
12152 set_forced_breakpoint($_);
12153 }
12154 }
12155
12156 sub set_logical_breakpoints {
12157 my $dd = shift;
12158 if (
12159 $item_count_stack[$dd] == 0
12160 && $is_logical_container{ $container_type[$dd] }
12161
12162 # TESTING:
12163 || $has_old_logical_breakpoints[$dd]
12164 )
12165 {
12166
12167 # Look for breaks in this order:
12168 # 0 1 2 3
12169 # or and || &&
12170 foreach my $i ( 0 .. 3 ) {
12171 if ( $rand_or_list[$dd][$i] ) {
12172 foreach ( @{ $rand_or_list[$dd][$i] } ) {
12173 set_forced_breakpoint($_);
12174 }
12175
12176 # break at any 'if' and 'unless' too
12177 foreach ( @{ $rand_or_list[$dd][4] } ) {
12178 set_forced_breakpoint($_);
12179 }
12180 $rand_or_list[$dd] = [];
12181 last;
12182 }
12183 }
12184 }
12185 }
12186
12187 sub is_unbreakable_container {
12188
12189 # never break a container of one of these types
12190 # because bad things can happen (map1.t)
12191 my $dd = shift;
12192 $is_sort_map_grep{ $container_type[$dd] };
12193 }
12194
12195 sub scan_list {
12196
12197 # This routine is responsible for setting line breaks for all lists,
12198 # so that hierarchical structure can be displayed and so that list
12199 # items can be vertically aligned. The output of this routine is
12200 # stored in the array @forced_breakpoint_to_go, which is used to set
12201 # final breakpoints.
12202
12203 $starting_depth = $nesting_depth_to_go[0];
12204
12205 $block_type = ' ';
12206 $current_depth = $starting_depth;
12207 $i = -1;
12208 $last_colon_sequence_number = -1;
12209 $last_nonblank_token = ';';
12210 $last_nonblank_type = ';';
12211 $last_old_breakpoint_count = 0;
12212 $minimum_depth = $current_depth + 1; # forces update in check below
12213 $old_breakpoint_count = 0;
12214 $starting_breakpoint_count = $forced_breakpoint_count;
12215 $token = ';';
12216 $type = ';';
12217 $type_sequence = '';
12218
12219 check_for_new_minimum_depth($current_depth);
12220
12221 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
12222 my $want_previous_breakpoint = -1;
12223
12224 my $saw_good_breakpoint;
12225 my $i_line_end = -1;
12226 my $i_line_start = -1;
12227
12228 # loop over all tokens in this batch
12229 while ( ++$i <= $max_index_to_go ) {
12230 if ( $type ne 'b' ) {
12231 $i_last_nonblank_token = $i - 1;
12232 $last_nonblank_type = $type;
12233 $last_nonblank_token = $token;
12234 }
12235 $type = $types_to_go[$i];
12236 $block_type = $block_type_to_go[$i];
12237 $token = $tokens_to_go[$i];
12238 $type_sequence = $type_sequence_to_go[$i];
12239 my $next_type = $types_to_go[ $i + 1 ];
12240 my $next_token = $tokens_to_go[ $i + 1 ];
12241 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12242 $next_nonblank_type = $types_to_go[$i_next_nonblank];
12243 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
12244 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
12245
12246 # set break if flag was set
12247 if ( $want_previous_breakpoint >= 0 ) {
12248 set_forced_breakpoint($want_previous_breakpoint);
12249 $want_previous_breakpoint = -1;
12250 }
12251
12252 $last_old_breakpoint_count = $old_breakpoint_count;
12253 if ( $old_breakpoint_to_go[$i] ) {
12254 $i_line_end = $i;
12255 $i_line_start = $i_next_nonblank;
12256
12257 $old_breakpoint_count++;
12258
12259 # Break before certain keywords if user broke there and
12260 # this is a 'safe' break point. The idea is to retain
12261 # any preferred breaks for sequential list operations,
12262 # like a schwartzian transform.
12263 if ($rOpts_break_at_old_keyword_breakpoints) {
12264 if (
12265 $next_nonblank_type eq 'k'
12266 && $is_keyword_returning_list{$next_nonblank_token}
12267 && ( $type =~ /^[=\)\]\}Riw]$/
12268 || $type eq 'k'
12269 && $is_keyword_returning_list{$token} )
12270 )
12271 {
12272
12273 # we actually have to set this break next time through
12274 # the loop because if we are at a closing token (such
12275 # as '}') which forms a one-line block, this break might
12276 # get undone.
12277 $want_previous_breakpoint = $i;
12278 }
12279 }
12280 }
12281 next if ( $type eq 'b' );
12282 $depth = $nesting_depth_to_go[ $i + 1 ];
12283
12284 # safety check - be sure we always break after a comment
12285 # Shouldn't happen .. an error here probably means that the
12286 # nobreak flag did not get turned off correctly during
12287 # formatting.
12288 if ( $type eq '#' ) {
12289 if ( $i != $max_index_to_go ) {
12290 warning(
12291"Non-fatal program bug: backup logic needed to break after a comment\n"
12292 );
12293 report_definite_bug();
12294 $nobreak_to_go[$i] = 0;
12295 set_forced_breakpoint($i);
12296 }
12297 }
12298
12299 # Force breakpoints at certain tokens in long lines.
12300 # Note that such breakpoints will be undone later if these tokens
12301 # are fully contained within parens on a line.
12302 if (
12303 $type eq 'k'
12304 && $i > 0
12305 && $token =~ /^(if|unless)$/
12306 && (
12307 $is_long_line
12308
12309 # or container is broken (by side-comment, etc)
12310 || ( $next_nonblank_token eq '('
12311 && $mate_index_to_go[$i_next_nonblank] < $i )
12312 )
12313 )
12314 {
12315 set_forced_breakpoint( $i - 1 );
12316 }
12317
12318 # remember locations of '||' and '&&' for possible breaks if we
12319 # decide this is a long logical expression.
12320 if ( $type eq '||' ) {
12321 push @{ $rand_or_list[$depth][2] }, $i;
12322 ++$has_old_logical_breakpoints[$depth]
12323 if ( ( $i == $i_line_start || $i == $i_line_end )
12324 && $rOpts_break_at_old_logical_breakpoints );
12325 }
12326 elsif ( $type eq '&&' ) {
12327 push @{ $rand_or_list[$depth][3] }, $i;
12328 ++$has_old_logical_breakpoints[$depth]
12329 if ( ( $i == $i_line_start || $i == $i_line_end )
12330 && $rOpts_break_at_old_logical_breakpoints );
12331 }
12332 elsif ( $type eq 'f' ) {
12333 push @{ $rfor_semicolon_list[$depth] }, $i;
12334 }
12335 elsif ( $type eq 'k' ) {
12336 if ( $token eq 'and' ) {
12337 push @{ $rand_or_list[$depth][1] }, $i;
12338 ++$has_old_logical_breakpoints[$depth]
12339 if ( ( $i == $i_line_start || $i == $i_line_end )
12340 && $rOpts_break_at_old_logical_breakpoints );
12341 }
12342
12343 # break immediately at 'or's which are probably not in a logical
12344 # block -- but we will break in logical breaks below so that
12345 # they do not add to the forced_breakpoint_count
12346 elsif ( $token eq 'or' ) {
12347 push @{ $rand_or_list[$depth][0] }, $i;
12348 ++$has_old_logical_breakpoints[$depth]
12349 if ( ( $i == $i_line_start || $i == $i_line_end )
12350 && $rOpts_break_at_old_logical_breakpoints );
12351 if ( $is_logical_container{ $container_type[$depth] } ) {
12352 }
12353 else {
12354 if ($is_long_line) { set_forced_breakpoint($i) }
12355 elsif ( ( $i == $i_line_start || $i == $i_line_end )
12356 && $rOpts_break_at_old_logical_breakpoints )
12357 {
12358 $saw_good_breakpoint = 1;
12359 }
12360 }
12361 }
12362 elsif ( $token eq 'if' || $token eq 'unless' ) {
12363 push @{ $rand_or_list[$depth][4] }, $i;
12364 if ( ( $i == $i_line_start || $i == $i_line_end )
12365 && $rOpts_break_at_old_logical_breakpoints )
12366 {
12367 set_forced_breakpoint($i);
12368 }
12369 }
12370 }
12371 elsif ( $is_assignment{$type} ) {
12372 $i_equals[$depth] = $i;
12373 }
12374
12375 if ($type_sequence) {
12376
12377 # handle any postponed closing breakpoints
12378 if ( $token =~ /^[\)\]\}\:]$/ ) {
12379 if ( $type eq ':' ) {
12380 $last_colon_sequence_number = $type_sequence;
12381
12382 # TESTING: retain break at a ':' line break
12383 if ( ( $i == $i_line_start || $i == $i_line_end )
12384 && $rOpts_break_at_old_trinary_breakpoints )
12385 {
12386
12387 # TESTING:
12388 set_forced_breakpoint($i);
12389
12390 # break at previous '='
12391 if ( $i_equals[$depth] > 0 ) {
12392 set_forced_breakpoint( $i_equals[$depth] );
12393 $i_equals[$depth] = -1;
12394 }
12395 }
12396 }
12397 if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
12398 my $inc = ( $type eq ':' ) ? 0 : 1;
12399 set_forced_breakpoint( $i - $inc );
12400 delete $postponed_breakpoint{$type_sequence};
12401 }
12402 }
12403
12404 # set breaks at ?/: if they will get separated (and are
12405 # not a ?/: chain), or if the '?' is at the end of the
12406 # line
12407 elsif ( $token eq '?' ) {
12408 my $i_colon = $mate_index_to_go[$i];
12409 if (
12410 $i_colon <= 0 # the ':' is not in this batch
12411 || $i == 0 # this '?' is the first token of the line
12412 || $i ==
12413 $max_index_to_go # or this '?' is the last token
12414 )
12415 {
12416
12417 # don't break at a '?' if preceded by ':' on
12418 # this line of previous ?/: pair on this line.
12419 # This is an attempt to preserve a chain of ?/:
12420 # expressions (elsif2.t). And don't break if
12421 # this has a side comment.
12422 set_forced_breakpoint($i)
12423 unless (
12424 $type_sequence == (
12425 $last_colon_sequence_number +
12426 TYPE_SEQUENCE_INCREMENT
12427 )
12428 || $tokens_to_go[$max_index_to_go] eq '#'
12429 );
12430 set_closing_breakpoint($i);
12431 }
12432 }
12433 }
12434
12435#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
12436
12437 #------------------------------------------------------------
12438 # Handle Increasing Depth..
12439 #
12440 # prepare for a new list when depth increases
12441 # token $i is a '(','{', or '['
12442 #------------------------------------------------------------
12443 if ( $depth > $current_depth ) {
12444
12445 $breakpoint_stack[$depth] = $forced_breakpoint_count;
12446 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
12447 $has_broken_sublist[$depth] = 0;
12448 $identifier_count_stack[$depth] = 0;
12449 $index_before_arrow[$depth] = -1;
12450 $interrupted_list[$depth] = 0;
12451 $item_count_stack[$depth] = 0;
12452 $last_comma_index[$depth] = undef;
12453 $last_dot_index[$depth] = undef;
12454 $last_nonblank_type[$depth] = $last_nonblank_type;
12455 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
12456 $opening_structure_index_stack[$depth] = $i;
12457 $rand_or_list[$depth] = [];
12458 $rfor_semicolon_list[$depth] = [];
12459 $i_equals[$depth] = -1;
12460 $want_comma_break[$depth] = 0;
12461 $container_type[$depth] =
12462 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
12463 ? $last_nonblank_token
12464 : "";
12465 $has_old_logical_breakpoints[$depth] = 0;
12466
12467 # if line ends here then signal closing token to break
12468 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
12469 {
12470 set_closing_breakpoint($i);
12471 }
12472
12473 # Not all lists of values should be vertically aligned..
12474 $dont_align[$depth] =
12475
12476 # code BLOCKS are handled at a higher level
12477 ( $block_type ne "" )
12478
12479 # certain paren lists
12480 || ( $type eq '(' ) && (
12481
12482 # it does not usually look good to align a list of
12483 # identifiers in a parameter list, as in:
12484 # my($var1, $var2, ...)
12485 # (This test should probably be refined, for now I'm just
12486 # testing for any keyword)
12487 ( $last_nonblank_type eq 'k' )
12488
12489 # a trailing '(' usually indicates a non-list
12490 || ( $next_nonblank_type eq '(' )
12491 );
12492
12493 # patch to outdent opening brace of long if/for/..
12494 # statements (like this one). See similar coding in
12495 # set_continuation breaks. We have also catch it here for
12496 # short line fragments which otherwise will not go through
12497 # set_continuation_breaks.
12498 if (
12499 $block_type
12500
12501 # if we have the ')' but not its '(' in this batch..
12502 && ( $last_nonblank_token eq ')' )
12503 && $mate_index_to_go[$i_last_nonblank_token] < 0
12504
12505 # and user wants brace to left
12506 && !$rOpts->{'opening-brace-always-on-right'}
12507
12508 && ( $type eq '{' ) # should be true
12509 && ( $token eq '{' ) # should be true
12510 )
12511 {
12512 set_forced_breakpoint( $i - 1 );
12513 }
12514 }
12515
12516 #------------------------------------------------------------
12517 # Handle Decreasing Depth..
12518 #
12519 # finish off any old list when depth decreases
12520 # token $i is a ')','}', or ']'
12521 #------------------------------------------------------------
12522 elsif ( $depth < $current_depth ) {
12523
12524 check_for_new_minimum_depth($depth);
12525
12526 # force all outer logical containers to break after we see on
12527 # old breakpoint
12528 $has_old_logical_breakpoints[$depth] ||=
12529 $has_old_logical_breakpoints[$current_depth];
12530
12531 # Patch to break between ') {' if the paren list is broken.
12532 # There is similar logic in set_continuation_breaks for
12533 # non-broken lists.
12534 if ( $token eq ')'
12535 && $next_nonblank_block_type
12536 && $interrupted_list[$current_depth]
12537 && $next_nonblank_type eq '{'
12538 && !$rOpts->{'opening-brace-always-on-right'} )
12539 {
12540 set_forced_breakpoint($i);
12541 }
12542
12543#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
12544
12545 # set breaks at commas if necessary
12546 my ( $bp_count, $do_not_break_apart ) =
12547 set_comma_breakpoints($current_depth);
12548
12549 my $i_opening = $opening_structure_index_stack[$current_depth];
12550 my $saw_opening_structure = ( $i_opening >= 0 );
12551
12552 # this term is long if we had to break at interior commas..
12553 my $is_long_term = $bp_count > 0;
12554
12555 # ..or if the length between opening and closing parens exceeds
12556 # allowed line length
12557 if ( !$is_long_term && $saw_opening_structure ) {
12558 my $i_opening_minus = find_token_starting_list($i_opening);
12559
12560 # Note: we have to allow for one extra space after a
12561 # closing token so that we do not strand a comma or
12562 # semicolon, hence the '>=' here (oneline.t)
12563 $is_long_term =
12564 excess_line_length( $i_opening_minus, $i ) >= 0;
12565 }
12566
12567 # We've set breaks after all comma-arrows. Now we have to
12568 # undo them if this can be a one-line block
12569 # (the only breakpoints set will be due to comma-arrows)
12570 if (
12571
12572 # user doesn't require breaking after all comma-arrows
12573 ( $rOpts_comma_arrow_breakpoints != 0 )
12574
12575 # and if the opening structure is in this batch
12576 && $saw_opening_structure
12577
12578 # and either on the same old line
12579 && (
12580 $old_breakpoint_count_stack[$current_depth] ==
12581 $last_old_breakpoint_count
12582
12583 # or user wants to form long blocks with arrows
12584 || $rOpts_comma_arrow_breakpoints == 2
12585 )
12586
12587 # and we made some breakpoints between the opening and closing
12588 && ( $breakpoint_undo_stack[$current_depth] <
12589 $forced_breakpoint_undo_count )
12590
12591 # and this block is short enough to fit on one line
12592 # Note: use < because need 1 more space for possible comma
12593 && !$is_long_term
12594
12595 )
12596 {
12597 undo_forced_breakpoint_stack(
12598 $breakpoint_undo_stack[$current_depth] );
12599 }
12600
12601 # now see if we have any comma breakpoints left
12602 my $has_comma_breakpoints =
12603 ( $breakpoint_stack[$current_depth] !=
12604 $forced_breakpoint_count );
12605
12606 # update broken-sublist flag of the outer container
12607 $has_broken_sublist[$depth] = $has_broken_sublist[$depth]
12608 || $has_broken_sublist[$current_depth]
12609 || $is_long_term
12610 || $has_comma_breakpoints;
12611
12612# Having come to the closing ')', '}', or ']', now we have to decide if we
12613# should 'open up' the structure by placing breaks at the opening and
12614# closing containers. This is a tricky decision. Here are some of the
12615# basic considerations:
12616#
12617# -If this is a BLOCK container, then any breakpoints will have already
12618# been set (and according to user preferences), so we need do nothing here.
12619#
12620# -If we have a comma-separated list for which we can align the list items,
12621# then we need to do so because otherwise the vertical aligner cannot
12622# currently do the alignment.
12623#
12624# -If this container does itself contain a container which has been broken
12625# open, then it should be broken open to properly show the structure.
12626#
12627# -If there is nothing to align, and no other reason to break apart,
12628# then do not do it.
12629#
12630# We will not break open the parens of a long but 'simple' logical expression.
12631# For example:
12632#
12633# This is an example of a simple logical expression and its formatting:
12634#
12635# if ( $bigwasteofspace1 && $bigwasteofspace2
12636# || $bigwasteofspace3 && $bigwasteofspace4 )
12637#
12638# Most people would prefer this than the 'spacey' version:
12639#
12640# if (
12641# $bigwasteofspace1 && $bigwasteofspace2
12642# || $bigwasteofspace3 && $bigwasteofspace4
12643# )
12644#
12645# To illustrate the rules for breaking logical expressions, consider:
12646#
12647# FULLY DENSE:
12648# if ( $opt_excl
12649# and ( exists $ids_excl_uc{$id_uc}
12650# or grep $id_uc =~ /$_/, @ids_excl_uc ))
12651#
12652# This is on the verge of being difficult to read. The current default is to
12653# open it up like this:
12654#
12655# DEFAULT:
12656# if (
12657# $opt_excl
12658# and ( exists $ids_excl_uc{$id_uc}
12659# or grep $id_uc =~ /$_/, @ids_excl_uc )
12660# )
12661#
12662# This is a compromise which tries to avoid being too dense and to spacey.
12663# A more spaced version would be:
12664#
12665# SPACEY:
12666# if (
12667# $opt_excl
12668# and (
12669# exists $ids_excl_uc{$id_uc}
12670# or grep $id_uc =~ /$_/, @ids_excl_uc
12671# )
12672# )
12673#
12674# Some people might prefer the spacey version -- an option could be added. The
12675# innermost expression contains a long block '( exists $ids_... ')'.
12676#
12677# Here is how the logic goes: We will force a break at the 'or' that the
12678# innermost expression contains, but we will not break apart its opening and
12679# closing containers because (1) it contains no multi-line sub-containers itself,
12680# and (2) there is no alignment to be gained by breaking it open like this
12681#
12682# and (
12683# exists $ids_excl_uc{$id_uc}
12684# or grep $id_uc =~ /$_/, @ids_excl_uc
12685# )
12686#
12687# (although this looks perfectly ok and might be good for long expressions). The
12688# outer 'if' container, though, contains a broken sub-container, so it will be
12689# broken open to avoid too much density. Also, since it contains no 'or's, there
12690# will be a forced break at its 'and'.
12691
12692 # set some flags telling something about this container..
12693 my $is_simple_logical_expression = 0;
12694 if ( $item_count_stack[$current_depth] == 0
12695 && $saw_opening_structure
12696 && $tokens_to_go[$i_opening] eq '('
12697 && $is_logical_container{ $container_type[$current_depth] }
12698 )
12699 {
12700
12701 # This seems to be a simple logical expression with
12702 # no existing breakpoints. Set a flag to prevent
12703 # opening it up.
12704 if ( !$has_comma_breakpoints ) {
12705 $is_simple_logical_expression = 1;
12706 }
12707
12708 # This seems to be a simple logical expression with
12709 # breakpoints (broken sublists, for example). Break
12710 # at all 'or's and '||'s.
12711 else {
12712 set_logical_breakpoints($current_depth);
12713 }
12714 }
12715
12716 if ( $is_long_term
12717 && @{ $rfor_semicolon_list[$current_depth] } )
12718 {
12719 set_for_semicolon_breakpoints($current_depth);
12720
12721 # open up a long 'for' or 'foreach' container to allow
12722 # leading term alignment unless -lp is used.
12723 $has_comma_breakpoints = 1
12724 unless $rOpts_line_up_parentheses;
12725 }
12726
12727 if (
12728
12729 # breaks for code BLOCKS are handled at a higher level
12730 !$block_type
12731
12732 # we do not need to break at the top level of an 'if'
12733 # type expression
12734 && !$is_simple_logical_expression
12735
12736 ## modification to keep ': (' containers vertically tight;
12737 ## but probably better to let user set -vt=1 to avoid
12738 ## inconsistency with other paren types
12739 ## && ($container_type[$current_depth] ne ':')
12740
12741 # otherwise, we require one of these reasons for breaking:
12742 && (
12743
12744 # - this term has forced line breaks
12745 $has_comma_breakpoints
12746
12747 # - the opening container is separated from this batch
12748 # for some reason (comment, blank line, code block)
12749 # - this is a non-paren container spanning multiple lines
12750 || !$saw_opening_structure
12751
12752 # - this is a long block contained in another breakable
12753 # container
12754 || ( $is_long_term
12755 && $container_environment_to_go[$i_opening] ne
12756 'BLOCK' )
12757 )
12758 )
12759 {
12760
12761 # For -lp option, we must put a breakpoint before
12762 # the token which has been identified as starting
12763 # this indentation level. This is necessary for
12764 # proper alignment.
12765 if ( $rOpts_line_up_parentheses && $saw_opening_structure )
12766 {
12767 my $item = $leading_spaces_to_go[ $i_opening + 1 ];
12768 if ( defined($item) ) {
12769 my $i_start_2 = $item->get_STARTING_INDEX();
12770 if (
12771 defined($i_start_2)
12772
12773 # we are breaking after an opening brace, paren,
12774 # so don't break before it too
12775 && $i_start_2 ne $i_opening
12776 )
12777 {
12778
12779 # Only break for breakpoints at the same
12780 # indentation level as the opening paren
12781 my $test1 = $nesting_depth_to_go[$i_opening];
12782 my $test2 = $nesting_depth_to_go[$i_start_2];
12783 if ( $test2 == $test1 ) {
12784 set_forced_breakpoint( $i_start_2 - 1 );
12785 }
12786 }
12787 }
12788 }
12789
12790 # break after opening structure.
12791 # note: break before closing structure will be automatic
12792 if ( $minimum_depth <= $current_depth ) {
12793
12794 set_forced_breakpoint($i_opening)
12795 unless ( $do_not_break_apart
12796 || is_unbreakable_container($current_depth) );
12797
12798 # break at '.' of lower depth level before opening token
12799 if ( $last_dot_index[$depth] ) {
12800 set_forced_breakpoint( $last_dot_index[$depth] );
12801 }
12802
12803 # break before opening structure if preeced by another
12804 # closing structure and a comma. This is normally
12805 # done by the previous closing brace, but not
12806 # if it was a one-line block.
12807 if ( $i_opening > 2 ) {
12808 my $i_prev =
12809 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
12810 ? $i_opening - 2
12811 : $i_opening - 1;
12812
12813 if ( $types_to_go[$i_prev] eq ','
12814 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
12815 {
12816 set_forced_breakpoint($i_prev);
12817 }
12818
12819 # also break before something like ':(' or '?('
12820 # if appropriate.
12821 elsif (
12822 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
12823 {
12824 my $token_prev = $tokens_to_go[$i_prev];
12825 if ( $want_break_before{$token_prev} ) {
12826 set_forced_breakpoint($i_prev);
12827 }
12828 }
12829 }
12830 }
12831
12832 # break after comma following closing structure
12833 if ( $next_type eq ',' ) {
12834 set_forced_breakpoint( $i + 1 );
12835 }
12836
12837 # break before an '=' following closing structure
12838 if (
12839 $is_assignment{$next_nonblank_type}
12840 && ( $breakpoint_stack[$current_depth] !=
12841 $forced_breakpoint_count )
12842 )
12843 {
12844 set_forced_breakpoint($i);
12845 }
12846
12847 # break at any comma before the opening structure Added
12848 # for -lp, but seems to be good in general. It isn't
12849 # obvious how far back to look; the '5' below seems to
12850 # work well and will catch the comma in something like
12851 # push @list, myfunc( $param, $param, ..
12852
12853 my $icomma = $last_comma_index[$depth];
12854 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
12855 unless ( $forced_breakpoint_to_go[$icomma] ) {
12856 set_forced_breakpoint($icomma);
12857 }
12858 }
12859 } # end logic to open up a container
12860
12861 # Break open a logical container open if it was already open
12862 elsif ($is_simple_logical_expression
12863 && $has_old_logical_breakpoints[$current_depth] )
12864 {
12865 set_logical_breakpoints($current_depth);
12866 }
12867
12868 # Handle long container which does not get opened up
12869 elsif ($is_long_term) {
12870
12871 # must set fake breakpoint to alert outer containers that
12872 # they are complex
12873 set_fake_breakpoint();
12874 }
12875 }
12876
12877 #------------------------------------------------------------
12878 # Handle this token
12879 #------------------------------------------------------------
12880
12881 $current_depth = $depth;
12882
12883 # handle comma-arrow
12884 if ( $type eq '=>' ) {
12885 next if ( $last_nonblank_type eq '=>' );
12886 next if $rOpts_break_at_old_comma_breakpoints;
12887 next if $rOpts_comma_arrow_breakpoints == 3;
12888 $want_comma_break[$depth] = 1;
12889 $index_before_arrow[$depth] = $i_last_nonblank_token;
12890 next;
12891 }
12892
12893 elsif ( $type eq '.' ) {
12894 $last_dot_index[$depth] = $i;
12895 }
12896
12897 # Turn off alignment if we are sure that this is not a list
12898 # environment. To be safe, we will do this if we see certain
12899 # non-list tokens, such as ';', and also the environment is
12900 # not a list. Note that '=' could be in any of the = operators
12901 # (lextest.t). We can't just use the reported environment
12902 # because it can be incorrect in some cases.
12903 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
12904 && $container_environment_to_go[$i] ne 'LIST' )
12905 {
12906 $dont_align[$depth] = 1;
12907 $want_comma_break[$depth] = 0;
12908 $index_before_arrow[$depth] = -1;
12909 }
12910
12911 # now just handle any commas
12912 next unless ( $type eq ',' );
12913
12914 $last_dot_index[$depth] = undef;
12915 $last_comma_index[$depth] = $i;
12916
12917 # break here if this comma follows a '=>'
12918 # but not if there is a side comment after the comma
12919 if ( $want_comma_break[$depth] ) {
12920
12921 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
12922 $want_comma_break[$depth] = 0;
12923 $index_before_arrow[$depth] = -1;
12924 next;
12925 }
12926
12927 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
12928
12929 # break before the previous token if it looks safe
12930 # Example of something that we will not try to break before:
12931 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
12932 my $ibreak = $index_before_arrow[$depth] - 1;
12933 if ( $ibreak > 0
12934 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
12935 {
12936 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
12937 if ( $types_to_go[$ibreak] =~ /^[,b\(\{\[]$/ ) {
12938 set_forced_breakpoint($ibreak);
12939 }
12940 }
12941
12942 $want_comma_break[$depth] = 0;
12943 $index_before_arrow[$depth] = -1;
12944
12945 # handle list which mixes '=>'s and ','s:
12946 # treat any list items so far as an interrupted list
12947 $interrupted_list[$depth] = 1;
12948 next;
12949 }
12950
12951 # skip past these commas if we are not supposed to format them
12952 next if ( $dont_align[$depth] );
12953
12954 # break after all commas above starting depth
12955 if ( $depth < $starting_depth ) {
12956 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
12957 next;
12958 }
12959
12960 # add this comma to the list..
12961 my $item_count = $item_count_stack[$depth];
12962 if ( $item_count == 0 ) {
12963
12964 # but do not form a list with no opening structure
12965 # for example:
12966
12967 # open INFILE_COPY, ">$input_file_copy"
12968 # or die ("very long message");
12969
12970 if ( ( $opening_structure_index_stack[$depth] < 0 )
12971 && $container_environment_to_go[$i] eq 'BLOCK' )
12972 {
12973 $dont_align[$depth] = 1;
12974 next;
12975 }
12976 }
12977
12978 $comma_index[$depth][$item_count] = $i;
12979 ++$item_count_stack[$depth];
12980 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
12981 $identifier_count_stack[$depth]++;
12982 }
12983 }
12984
12985 #-------------------------------------------
12986 # end of loop over all tokens in this batch
12987 #-------------------------------------------
12988
12989 # set breaks for any unfinished lists ..
12990 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
12991
12992 $interrupted_list[$dd] = 1;
12993 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
12994 set_comma_breakpoints($dd);
12995 set_logical_breakpoints($dd)
12996 if ( $has_old_logical_breakpoints[$dd] );
12997 set_for_semicolon_breakpoints($dd);
12998
12999 # break open container...
13000 my $i_opening = $opening_structure_index_stack[$dd];
13001 set_forced_breakpoint($i_opening)
13002 unless (
13003 is_unbreakable_container($dd)
13004
13005 # Avoid a break which would place an isolated ' or "
13006 # on a line
13007 || ( $type eq 'Q'
13008 && $i_opening >= $max_index_to_go - 2
13009 && $token =~ /^['"]$/ )
13010 );
13011 }
13012
13013 # Return a flag indicating if the input file had some good breakpoints.
13014 # This flag will be used to force a break in a line shorter than the
13015 # allowed line length.
13016 if ( $has_old_logical_breakpoints[$current_depth] ) {
13017 $saw_good_breakpoint = 1;
13018 }
13019 return $saw_good_breakpoint;
13020 }
13021} # end scan_list
13022
13023sub find_token_starting_list {
13024
13025 # When testing to see if a block will fit on one line, some
13026 # previous token(s) may also need to be on the line; particularly
13027 # if this is a sub call. So we will look back at least one
13028 # token. NOTE: This isn't perfect, but not critical, because
13029 # if we mis-identify a block, it will be wrapped and therefore
13030 # fixed the next time it is formatted.
13031 my $i_opening_paren = shift;
13032 my $i_opening_minus = $i_opening_paren;
13033 my $im1 = $i_opening_paren - 1;
13034 my $im2 = $i_opening_paren - 2;
13035 my $im3 = $i_opening_paren - 3;
13036 my $typem1 = $types_to_go[$im1];
13037 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
13038 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
13039 $i_opening_minus = $i_opening_paren;
13040 }
13041 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
13042 $i_opening_minus = $im1 if $im1 >= 0;
13043
13044 # walk back to improve length estimate
13045 for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
13046 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
13047 $i_opening_minus = $j;
13048 }
13049 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
13050 }
13051 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
13052 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
13053 $i_opening_minus = $im2;
13054 }
13055 return $i_opening_minus;
13056}
13057
13058{ # begin set_comma_breakpoints_do
13059
13060 my %is_keyword_with_special_leading_term;
13061
13062 BEGIN {
13063
13064 # These keywords have prototypes which allow a special leading item
13065 # followed by a list
13066 @_ =
13067 qw(formline grep kill map printf sprintf push chmod join pack unshift);
13068 @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
13069 }
13070
13071 sub set_comma_breakpoints_do {
13072
13073 # Given a list with some commas, set breakpoints at some of the
13074 # commas, if necessary, to make it easy to read. This list is
13075 # an example:
13076 my (
13077 $depth, $i_opening_paren, $i_closing_paren,
13078 $item_count, $identifier_count, $rcomma_index,
13079 $next_nonblank_type, $list_type, $interrupted,
13080 $rdo_not_break_apart, $must_break_open,
13081 )
13082 = @_;
13083
13084 # nothing to do if no commas seen
13085 return if ( $item_count < 1 );
13086 my $i_first_comma = $$rcomma_index[0];
13087 my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
13088 my $i_last_comma = $i_true_last_comma;
13089 if ( $i_last_comma >= $max_index_to_go ) {
13090 $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
13091 return if ( $item_count < 1 );
13092 }
13093
13094 #---------------------------------------------------------------
13095 # find lengths of all items in the list to calculate page layout
13096 #---------------------------------------------------------------
13097 my $comma_count = $item_count;
13098 my @item_lengths;
13099 my @i_term_begin;
13100 my @i_term_end;
13101 my @i_term_comma;
13102 my $i_prev_plus;
13103 my @max_length = ( 0, 0 );
13104 my $first_term_length;
13105 my $i = $i_opening_paren;
13106 my $is_odd = 1;
13107
13108 for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
13109 $is_odd = 1 - $is_odd;
13110 $i_prev_plus = $i + 1;
13111 $i = $$rcomma_index[$j];
13112
13113 my $i_term_end =
13114 ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
13115 my $i_term_begin =
13116 ( $types_to_go[$i_prev_plus] eq 'b' )
13117 ? $i_prev_plus + 1
13118 : $i_prev_plus;
13119 push @i_term_begin, $i_term_begin;
13120 push @i_term_end, $i_term_end;
13121 push @i_term_comma, $i;
13122
13123 # note: currently adding 2 to all lengths (for comma and space)
13124 my $length =
13125 2 + token_sequence_length( $i_term_begin, $i_term_end );
13126 push @item_lengths, $length;
13127
13128 if ( $j == 0 ) {
13129 $first_term_length = $length;
13130 }
13131 else {
13132
13133 if ( $length > $max_length[$is_odd] ) {
13134 $max_length[$is_odd] = $length;
13135 }
13136 }
13137 }
13138
13139 # now we have to make a distinction between the comma count and item
13140 # count, because the item count will be one greater than the comma
13141 # count if the last item is not terminated with a comma
13142 my $i_b =
13143 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
13144 ? $i_last_comma + 1
13145 : $i_last_comma;
13146 my $i_e =
13147 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
13148 ? $i_closing_paren - 2
13149 : $i_closing_paren - 1;
13150 my $i_effective_last_comma = $i_last_comma;
13151
13152 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
13153
13154 if ( $last_item_length > 0 ) {
13155
13156 # add 2 to length because other lengths include a comma and a blank
13157 $last_item_length += 2;
13158 push @item_lengths, $last_item_length;
13159 push @i_term_begin, $i_b + 1;
13160 push @i_term_end, $i_e;
13161 push @i_term_comma, undef;
13162
13163 my $i_odd = $item_count % 2;
13164
13165 if ( $last_item_length > $max_length[$i_odd] ) {
13166 $max_length[$i_odd] = $last_item_length;
13167 }
13168
13169 $item_count++;
13170 $i_effective_last_comma = $i_e + 1;
13171
13172 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
13173 $identifier_count++;
13174 }
13175 }
13176
13177 #---------------------------------------------------------------
13178 # End of length calculations
13179 #---------------------------------------------------------------
13180
13181 #---------------------------------------------------------------
13182 # Compound List Rule 1:
13183 # Break at (almost) every comma for a list containing a broken
13184 # sublist. This has higher priority than the Interrupted List
13185 # Rule.
13186 #---------------------------------------------------------------
13187 if ( $has_broken_sublist[$depth] ) {
13188
13189 # Break at every comma except for a comma between two
13190 # simple, small terms. This prevents long vertical
13191 # columns of, say, just 0's.
13192 my $small_length = 10; # 2 + actual maximum length wanted
13193
13194 # We'll insert a break in long runs of small terms to
13195 # allow alignment in uniform tables.
13196 my $skipped_count = 0;
13197 my $columns = table_columns_available($i_first_comma);
13198 my $fields = int( $columns / $small_length );
13199 if ( $rOpts_maximum_fields_per_table
13200 && $fields > $rOpts_maximum_fields_per_table )
13201 {
13202 $fields = $rOpts_maximum_fields_per_table;
13203 }
13204 my $max_skipped_count = $fields - 1;
13205
13206 my $is_simple_last_term = 0;
13207 my $is_simple_next_term = 0;
13208 foreach my $j ( 0 .. $item_count ) {
13209 $is_simple_last_term = $is_simple_next_term;
13210 $is_simple_next_term = 0;
13211 if ( $j < $item_count
13212 && $i_term_end[$j] == $i_term_begin[$j]
13213 && $item_lengths[$j] <= $small_length )
13214 {
13215 $is_simple_next_term = 1;
13216 }
13217 next if $j == 0;
13218 if ( $is_simple_last_term
13219 && $is_simple_next_term
13220 && $skipped_count < $max_skipped_count )
13221 {
13222 $skipped_count++;
13223 }
13224 else {
13225 $skipped_count = 0;
13226 my $i = $i_term_comma[ $j - 1 ];
13227 last unless defined $i;
13228 set_forced_breakpoint($i);
13229 }
13230 }
13231
13232 # always break at the last comma if this list is
13233 # interrupted; we wouldn't want to leave a terminal '{', for
13234 # example.
13235 if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
13236 return;
13237 }
13238
13239#my ( $a, $b, $c ) = caller();
13240#print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
13241#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
13242#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
13243
13244 #---------------------------------------------------------------
13245 # Interrupted List Rule:
13246 # A list is is forced to use old breakpoints if it was interrupted
13247 # by side comments or blank lines, or requested by user.
13248 #---------------------------------------------------------------
13249 if ( $rOpts_break_at_old_comma_breakpoints
13250 || $interrupted
13251 || $i_opening_paren < 0 )
13252 {
13253 copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
13254 return;
13255 }
13256
13257 #---------------------------------------------------------------
13258 # Looks like a list of items. We have to look at it and size it up.
13259 #---------------------------------------------------------------
13260
13261 my $opening_token = $tokens_to_go[$i_opening_paren];
13262 my $opening_environment =
13263 $container_environment_to_go[$i_opening_paren];
13264
13265 #-------------------------------------------------------------------
13266 # Return if this will fit on one line
13267 #-------------------------------------------------------------------
13268
13269 my $i_opening_minus = find_token_starting_list($i_opening_paren);
13270 return
13271 unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
13272
13273 #-------------------------------------------------------------------
13274 # Now we know that this block spans multiple lines; we have to set
13275 # at least one breakpoint -- real or fake -- as a signal to break
13276 # open any outer containers.
13277 #-------------------------------------------------------------------
13278 set_fake_breakpoint();
13279
13280 # be sure we do not extend beyond the current list length
13281 if ( $i_effective_last_comma >= $max_index_to_go ) {
13282 $i_effective_last_comma = $max_index_to_go - 1;
13283 }
13284
13285 # Set a flag indicating if we need to break open to keep -lp
13286 # items aligned. This is necessary if any of the list terms
13287 # exceeds the available space after the '('.
13288 my $need_lp_break_open = $must_break_open;
13289 if ( $rOpts_line_up_parentheses && !$must_break_open ) {
13290 my $columns_if_unbroken = $rOpts_maximum_line_length -
13291 total_line_length( $i_opening_minus, $i_opening_paren );
13292 $need_lp_break_open = ( $max_length[0] > $columns_if_unbroken )
13293 || ( $max_length[1] > $columns_if_unbroken )
13294 || ( $first_term_length > $columns_if_unbroken );
13295 }
13296
13297 # Specify if the list must have an even number of fields or not.
13298 # It is generally safest to assume an even number, because the
13299 # list items might be a hash list. But if we can be sure that
13300 # it is not a hash, then we can allow an odd number for more
13301 # flexibility.
13302 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
13303
13304 if ( $identifier_count >= $item_count - 1
13305 || $is_assignment{$next_nonblank_type}
13306 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
13307 )
13308 {
13309 $odd_or_even = 1;
13310 }
13311
13312 # do we have a long first term which should be
13313 # left on a line by itself?
13314 my $use_separate_first_term = (
13315 $odd_or_even == 1 # only if we can use 1 field/line
13316 && $item_count > 3 # need several items
13317 && $first_term_length >
13318 2 * $max_length[0] - 2 # need long first term
13319 && $first_term_length >
13320 2 * $max_length[1] - 2 # need long first term
13321 );
13322
13323 # or do we know from the type of list that the first term should
13324 # be placed alone?
13325 if ( !$use_separate_first_term ) {
13326 if ( $is_keyword_with_special_leading_term{$list_type} ) {
13327 $use_separate_first_term = 1;
13328
13329 # should the container be broken open?
13330 if ( $item_count < 3 ) {
13331 if ( $i_first_comma - $i_opening_paren < 4 ) {
13332 $$rdo_not_break_apart = 1;
13333 }
13334 }
13335 elsif ($first_term_length < 20
13336 && $i_first_comma - $i_opening_paren < 4 )
13337 {
13338 my $columns = table_columns_available($i_first_comma);
13339 if ( $first_term_length < $columns ) {
13340 $$rdo_not_break_apart = 1;
13341 }
13342 }
13343 }
13344 }
13345
13346 # if so,
13347 if ($use_separate_first_term) {
13348
13349 # ..set a break and update starting values
13350 $use_separate_first_term = 1;
13351 set_forced_breakpoint($i_first_comma);
13352 $i_opening_paren = $i_first_comma;
13353 $i_first_comma = $$rcomma_index[1];
13354 $item_count--;
13355 return if $comma_count == 1;
13356 shift @item_lengths;
13357 shift @i_term_begin;
13358 shift @i_term_end;
13359 shift @i_term_comma;
13360 }
13361
13362 # if not, update the metrics to include the first term
13363 else {
13364 if ( $first_term_length > $max_length[0] ) {
13365 $max_length[0] = $first_term_length;
13366 }
13367 }
13368
13369 # Field width parameters
13370 my $pair_width = ( $max_length[0] + $max_length[1] );
13371 my $max_width =
13372 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
13373
13374 # Number of free columns across the page width for laying out tables
13375 my $columns = table_columns_available($i_first_comma);
13376
13377 # Estimated maximum number of fields which fit this space
13378 # This will be our first guess
13379 my $number_of_fields_max =
13380 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
13381 $pair_width );
13382 my $number_of_fields = $number_of_fields_max;
13383
13384 # Find the best-looking number of fields
13385 # and make this our second guess if possible
13386 my ( $number_of_fields_best, $ri_ragged_break_list,
13387 $new_identifier_count )
13388 = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
13389 $max_width );
13390
13391 if ( $number_of_fields_best != 0
13392 && $number_of_fields_best < $number_of_fields_max )
13393 {
13394 $number_of_fields = $number_of_fields_best;
13395 }
13396
13397 # ----------------------------------------------------------------------
13398 # If we are crowded and the -lp option is being used, try to
13399 # undo some indentation
13400 # ----------------------------------------------------------------------
13401 if (
13402 $rOpts_line_up_parentheses
13403 && (
13404 $number_of_fields == 0
13405 || ( $number_of_fields == 1
13406 && $number_of_fields != $number_of_fields_best )
13407 )
13408 )
13409 {
13410 my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
13411 if ( $available_spaces > 0 ) {
13412
13413 my $spaces_wanted = $max_width - $columns; # for 1 field
13414
13415 if ( $number_of_fields_best == 0 ) {
13416 $number_of_fields_best =
13417 get_maximum_fields_wanted( \@item_lengths );
13418 }
13419
13420 if ( $number_of_fields_best != 1 ) {
13421 my $spaces_wanted_2 =
13422 1 + $pair_width - $columns; # for 2 fields
13423 if ( $available_spaces > $spaces_wanted_2 ) {
13424 $spaces_wanted = $spaces_wanted_2;
13425 }
13426 }
13427
13428 if ( $spaces_wanted > 0 ) {
13429 my $deleted_spaces =
13430 reduce_lp_indentation( $i_first_comma, $spaces_wanted );
13431
13432 # redo the math
13433 if ( $deleted_spaces > 0 ) {
13434 $columns = table_columns_available($i_first_comma);
13435 $number_of_fields_max =
13436 maximum_number_of_fields( $columns, $odd_or_even,
13437 $max_width, $pair_width );
13438 $number_of_fields = $number_of_fields_max;
13439
13440 if ( $number_of_fields_best == 1
13441 && $number_of_fields >= 1 )
13442 {
13443 $number_of_fields = $number_of_fields_best;
13444 }
13445 }
13446 }
13447 }
13448 }
13449
13450 # try for one column if two won't work
13451 if ( $number_of_fields <= 0 ) {
13452 $number_of_fields = int( $columns / $max_width );
13453 }
13454
13455 # The user can place an upper bound on the number of fields,
13456 # which can be useful for doing maintenance on tables
13457 if ( $rOpts_maximum_fields_per_table
13458 && $number_of_fields > $rOpts_maximum_fields_per_table )
13459 {
13460 $number_of_fields = $rOpts_maximum_fields_per_table;
13461 }
13462
13463 # How many columns (characters) and lines would this container take
13464 # if no additional whitespace were added?
13465 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
13466 $i_effective_last_comma + 1 );
13467 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
13468 my $packed_lines = 1 + int( $packed_columns / $columns );
13469
13470 # are we an item contained in an outer list?
13471 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
13472
13473 if ( $number_of_fields <= 0 ) {
13474
13475# #---------------------------------------------------------------
13476# # We're in trouble. We can't find a single field width that works.
13477# # There is no simple answer here; we may have a single long list
13478# # item, or many.
13479# #---------------------------------------------------------------
13480#
13481# In many cases, it may be best to not force a break if there is just one
13482# comma, because the standard continuation break logic will do a better
13483# job without it.
13484#
13485# In the common case that all but one of the terms can fit
13486# on a single line, it may look better not to break open the
13487# containing parens. Consider, for example
13488#
13489# $color =
13490# join ( '/',
13491# sort { $color_value{$::a} <=> $color_value{$::b}; }
13492# keys %colors );
13493#
13494# which will look like this with the container broken:
13495#
13496# $color = join (
13497# '/',
13498# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
13499# );
13500#
13501# Here is an example of this rule for a long last term:
13502#
13503# log_message( 0, 256, 128,
13504# "Number of routes in adj-RIB-in to be considered: $peercount" );
13505#
13506# And here is an example with a long first term:
13507#
13508# $s = sprintf(
13509# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
13510# $r, $pu, $ps, $cu, $cs, $tt
13511# )
13512# if $style eq 'all';
13513
13514 my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
13515 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
13516 my $long_first_term =
13517 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
13518
13519 # break at every comma ...
13520 if (
13521
13522 # if requested by user or is best looking
13523 $number_of_fields_best == 1
13524
13525 # or if this is a sublist of a larger list
13526 || $in_hierarchical_list
13527
13528 # or if multiple commas and we dont have a long first or last
13529 # term
13530 || ( $comma_count > 1
13531 && !( $long_last_term || $long_first_term ) )
13532 )
13533 {
13534 foreach ( 0 .. $comma_count - 1 ) {
13535 set_forced_breakpoint( $$rcomma_index[$_] );
13536 }
13537 }
13538 elsif ($long_last_term) {
13539
13540 set_forced_breakpoint($i_last_comma);
13541 $$rdo_not_break_apart = 1 unless $must_break_open;
13542 }
13543 elsif ($long_first_term) {
13544
13545 set_forced_breakpoint($i_first_comma);
13546 }
13547 else {
13548
13549 # let breaks be defined by default bond strength logic
13550 }
13551 return;
13552 }
13553
13554 # --------------------------------------------------------
13555 # We have a tentative field count that seems to work.
13556 # How many lines will this require?
13557 # --------------------------------------------------------
13558 my $formatted_lines = $item_count / ($number_of_fields);
13559 if ( $formatted_lines != int $formatted_lines ) {
13560 $formatted_lines = 1 + int $formatted_lines;
13561 }
13562
13563 # So far we've been trying to fill out to the right margin. But
13564 # compact tables are easier to read, so let's see if we can use fewer
13565 # fields without increasing the number of lines.
13566 $number_of_fields =
13567 compactify_table( $item_count, $number_of_fields, $formatted_lines,
13568 $odd_or_even );
13569
13570 # How many spaces across the page will we fill?
13571 my $columns_per_line =
13572 ( int $number_of_fields / 2 ) * $pair_width +
13573 ( $number_of_fields % 2 ) * $max_width;
13574
13575 my $formatted_columns;
13576
13577 if ( $number_of_fields > 1 ) {
13578 $formatted_columns =
13579 ( $pair_width * ( int( $item_count / 2 ) ) + ( $item_count % 2 ) *
13580 $max_width );
13581 }
13582 else {
13583 $formatted_columns = $max_width * $item_count;
13584 }
13585 if ( $formatted_columns < $packed_columns ) {
13586 $formatted_columns = $packed_columns;
13587 }
13588
13589 my $unused_columns = $formatted_columns - $packed_columns;
13590
13591 # set some empirical parameters to help decide if we should try to
13592 # align; high sparsity does not look good, especially with few lines
13593 my $sparsity = ($unused_columns) / ($formatted_columns);
13594 my $max_allowed_sparsity =
13595 ( $item_count < 3 ) ? 0.1
13596 : ( $packed_lines == 1 ) ? 0.15
13597 : ( $packed_lines == 2 ) ? 0.4
13598 : 0.7;
13599
13600 # Begin check for shortcut methods, which avoid treating a list
13601 # as a table for relatively small parenthesized lists. These
13602 # are usually easier to read if not formatted as tables.
13603 if (
13604 $packed_lines <= 2 # probably can fit in 2 lines
13605 && $item_count < 9 # doesn't have too many items
13606 && $opening_environment eq 'BLOCK' # not a sub-container
13607 && $opening_token eq '(' # is paren list
13608 )
13609 {
13610
13611 # Shortcut method 1: for -lp and just one comma:
13612 # This is a no-brainer, just break at the comma.
13613 if (
13614 $rOpts_line_up_parentheses # -lp
13615 && $item_count == 2 # two items, one comma
13616 && !$must_break_open
13617 )
13618 {
13619 my $i_break = $$rcomma_index[0];
13620 set_forced_breakpoint($i_break);
13621 $$rdo_not_break_apart = 1;
13622 set_non_alignment_flags( $comma_count, $rcomma_index );
13623 return;
13624
13625 }
13626
13627 # method 2 is for most small ragged lists which might look
13628 # best if not displayed as a table.
13629 if (
13630 ( $number_of_fields == 2 && $item_count == 3 )
13631 || (
13632 $new_identifier_count > 0 # isn't all quotes
13633 && $sparsity > 0.15
13634 ) # would be fairly spaced gaps if aligned
13635 )
13636 {
13637
13638 my $break_count =
13639 set_ragged_breakpoints( \@i_term_comma,
13640 $ri_ragged_break_list );
13641 ++$break_count if ($use_separate_first_term);
13642
13643 # NOTE: we should really use the true break count here,
13644 # which can be greater if there are large terms and
13645 # little space, but usually this will work well enough.
13646 unless ($must_break_open) {
13647
13648 if ( $break_count <= 1 ) {
13649 $$rdo_not_break_apart = 1;
13650 }
13651 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
13652 {
13653 $$rdo_not_break_apart = 1;
13654 }
13655 }
13656 set_non_alignment_flags( $comma_count, $rcomma_index );
13657 return;
13658 }
13659
13660 } # end shortcut methods
13661
13662 # debug stuff
13663
13664 FORMATTER_DEBUG_FLAG_SPARSE && do {
13665 print
13666"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
13667
13668 };
13669
13670 #---------------------------------------------------------------
13671 # Compound List Rule 2:
13672 # If this list is too long for one line, and it is an item of a
13673 # larger list, then we must format it, regardless of sparsity
13674 # (ian.t). One reason that we have to do this is to trigger
13675 # Compound List Rule 1, above, which causes breaks at all commas of
13676 # all outer lists. In this way, the structure will be properly
13677 # displayed.
13678 #---------------------------------------------------------------
13679
13680 # Decide if this list is too long for one line unless broken
13681 my $total_columns = table_columns_available($i_opening_paren);
13682 my $too_long = $packed_columns > $total_columns;
13683
13684 # For a paren list, include the length of the token just before the
13685 # '(' because this is likely a sub call, and we would have to
13686 # include the sub name on the same line as the list. This is still
13687 # imprecise, but not too bad. (steve.t)
13688 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
13689
13690 $too_long =
13691 excess_line_length( $i_opening_minus,
13692 $i_effective_last_comma + 1 ) > 0;
13693 }
13694
13695 # FIXME: For an item after a '=>', try to include the length of the
13696 # thing before the '=>'. This is crude and should be improved by
13697 # actually looking back token by token.
13698 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
13699 my $i_opening_minus = $i_opening_paren - 4;
13700 if ( $i_opening_minus >= 0 ) {
13701 $too_long =
13702 excess_line_length( $i_opening_minus,
13703 $i_effective_last_comma + 1 ) > 0;
13704 }
13705 }
13706
13707 # Always break lists contained in '[' and '{' if too long for 1 line,
13708 # and always break lists which are too long and part of a more complex
13709 # structure.
13710 my $must_break_open_container = $must_break_open
13711 || ( $too_long
13712 && ( $in_hierarchical_list || $opening_token ne '(' ) );
13713
13714#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
13715
13716 #---------------------------------------------------------------
13717 # The main decision:
13718 # Now decide if we will align the data into aligned columns. Do not
13719 # attempt to align columns if this is a tiny table or it would be
13720 # too spaced. It seems that the more packed lines we have, the
13721 # sparser the list that can be allowed and still look ok.
13722 #---------------------------------------------------------------
13723
13724 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
13725 || ( $formatted_lines < 2 )
13726 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
13727 )
13728 {
13729
13730 #---------------------------------------------------------------
13731 # too sparse: would look ugly if aligned in a table;
13732 #---------------------------------------------------------------
13733
13734 # use old breakpoints if this is a 'big' list
13735 # FIXME: goal is to improve set_ragged_breakpoints so that
13736 # this is not necessary.
13737 if ( $packed_lines > 2 && $item_count > 10 ) {
13738 write_logfile_entry("List sparse: using old breakpoints\n");
13739 copy_old_breakpoints( $i_first_comma, $i_last_comma );
13740 }
13741
13742 # let the continuation logic handle it if 2 lines
13743 else {
13744
13745 my $break_count =
13746 set_ragged_breakpoints( \@i_term_comma,
13747 $ri_ragged_break_list );
13748 ++$break_count if ($use_separate_first_term);
13749
13750 unless ($must_break_open_container) {
13751 if ( $break_count <= 1 ) {
13752 $$rdo_not_break_apart = 1;
13753 }
13754 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
13755 {
13756 $$rdo_not_break_apart = 1;
13757 }
13758 }
13759 set_non_alignment_flags( $comma_count, $rcomma_index );
13760 }
13761 return;
13762 }
13763
13764 #---------------------------------------------------------------
13765 # go ahead and format as a table
13766 #---------------------------------------------------------------
13767 write_logfile_entry(
13768 "List: auto formatting with $number_of_fields fields/row\n");
13769
13770 my $j_first_break =
13771 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
13772
13773 for (
13774 my $j = $j_first_break ;
13775 $j < $comma_count ;
13776 $j += $number_of_fields
13777 )
13778 {
13779 my $i = $$rcomma_index[$j];
13780 set_forced_breakpoint($i);
13781 }
13782 return;
13783 }
13784}
13785
13786sub set_non_alignment_flags {
13787
13788 # set flag which indicates that these commas should not be
13789 # aligned
13790 my ( $comma_count, $rcomma_index ) = @_;
13791 foreach ( 0 .. $comma_count - 1 ) {
13792 $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
13793 }
13794}
13795
13796sub study_list_complexity {
13797
13798 # Look for complex tables which should be formatted with one term per line.
13799 # Returns the following:
13800 #
13801 # \@i_ragged_break_list = list of good breakpoints to avoid lines
13802 # which are hard to read
13803 # $number_of_fields_best = suggested number of fields based on
13804 # complexity; = 0 if any number may be used.
13805 #
13806 my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
13807 my $item_count = @{$ri_term_begin};
13808 my $complex_item_count = 0;
13809 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
13810 my $i_max = @{$ritem_lengths} - 1;
13811 ##my @item_complexity;
13812
13813 my $i_last_last_break = -3;
13814 my $i_last_break = -2;
13815 my @i_ragged_break_list;
13816
13817 my $definitely_complex = 30;
13818 my $definitely_simple = 12;
13819 my $quote_count = 0;
13820
13821 for my $i ( 0 .. $i_max ) {
13822 my $ib = $ri_term_begin->[$i];
13823 my $ie = $ri_term_end->[$i];
13824
13825 # define complexity: start with the actual term length
13826 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
13827
13828 ##TBD: join types here and check for variations
13829 ##my $str=join "", @tokens_to_go[$ib..$ie];
13830
13831 my $is_quote = 0;
13832 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
13833 $is_quote = 1;
13834 $quote_count++;
13835 }
13836 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
13837 $quote_count++;
13838 }
13839
13840 if ( $ib eq $ie ) {
13841 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
13842 $complex_item_count++;
13843 $weighted_length *= 2;
13844 }
13845 else {
13846 }
13847 }
13848 else {
13849 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
13850 $complex_item_count++;
13851 $weighted_length *= 2;
13852 }
13853 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
13854 $weighted_length += 4;
13855 }
13856 }
13857
13858 # add weight for extra tokens.
13859 $weighted_length += 2 * ( $ie - $ib );
13860
13861## my $BUB = join '', @tokens_to_go[$ib..$ie];
13862## print "# COMPLEXITY:$weighted_length $BUB\n";
13863
13864##push @item_complexity, $weighted_length;
13865
13866 # now mark a ragged break after this item it if it is 'long and
13867 # complex':
13868 if ( $weighted_length >= $definitely_complex ) {
13869
13870 # if we broke after the previous term
13871 # then break before it too
13872 if ( $i_last_break == $i - 1
13873 && $i > 1
13874 && $i_last_last_break != $i - 2 )
13875 {
13876
13877 ## FIXME: don't strand a small term
13878 pop @i_ragged_break_list;
13879 push @i_ragged_break_list, $i - 2;
13880 push @i_ragged_break_list, $i - 1;
13881 }
13882
13883 push @i_ragged_break_list, $i;
13884 $i_last_last_break = $i_last_break;
13885 $i_last_break = $i;
13886 }
13887
13888 # don't break before a small last term -- it will
13889 # not look good on a line by itself.
13890 elsif ($i == $i_max
13891 && $i_last_break == $i - 1
13892 && $weighted_length <= $definitely_simple )
13893 {
13894 pop @i_ragged_break_list;
13895 }
13896 }
13897
13898 my $identifier_count = $i_max + 1 - $quote_count;
13899
13900 # Need more tuning here..
13901 if ( $max_width > 12
13902 && $complex_item_count > $item_count / 2
13903 && $number_of_fields_best != 2 )
13904 {
13905 $number_of_fields_best = 1;
13906 }
13907
13908 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
13909}
13910
13911sub get_maximum_fields_wanted {
13912
13913 # Not all tables look good with more than one field of items.
13914 # This routine looks at a table and decides if it should be
13915 # formatted with just one field or not.
13916 # This coding is still under development.
13917 my ($ritem_lengths) = @_;
13918
13919 my $number_of_fields_best = 0;
13920
13921 # For just a few items, we tentatively assume just 1 field.
13922 my $item_count = @{$ritem_lengths};
13923 if ( $item_count <= 5 ) {
13924 $number_of_fields_best = 1;
13925 }
13926
13927 # For larger tables, look at it both ways and see what looks best
13928 else {
13929
13930 my $is_odd = 1;
13931 my @max_length = ( 0, 0 );
13932 my @last_length_2 = ( undef, undef );
13933 my @first_length_2 = ( undef, undef );
13934 my $last_length = undef;
13935 my $total_variation_1 = 0;
13936 my $total_variation_2 = 0;
13937 my @total_variation_2 = ( 0, 0 );
13938 for ( my $j = 0 ; $j < $item_count ; $j++ ) {
13939
13940 $is_odd = 1 - $is_odd;
13941 my $length = $ritem_lengths->[$j];
13942 if ( $length > $max_length[$is_odd] ) {
13943 $max_length[$is_odd] = $length;
13944 }
13945
13946 if ( defined($last_length) ) {
13947 my $dl = abs( $length - $last_length );
13948 $total_variation_1 += $dl;
13949 }
13950 $last_length = $length;
13951
13952 my $ll = $last_length_2[$is_odd];
13953 if ( defined($ll) ) {
13954 my $dl = abs( $length - $ll );
13955 $total_variation_2[$is_odd] += $dl;
13956 }
13957 else {
13958 $first_length_2[$is_odd] = $length;
13959 }
13960 $last_length_2[$is_odd] = $length;
13961 }
13962 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
13963
13964 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
13965 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
13966 $number_of_fields_best = 1;
13967 }
13968 }
13969 return ($number_of_fields_best);
13970}
13971
13972sub table_columns_available {
13973 my $i_first_comma = shift;
13974 my $columns =
13975 $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
13976
13977 # Patch: the vertical formatter does not line up lines whose lengths
13978 # exactly equal the available line length because of allowances
13979 # that must be made for side comments. Therefore, the number of
13980 # available columns is reduced by 1 character.
13981 $columns -= 1;
13982 return $columns;
13983}
13984
13985sub maximum_number_of_fields {
13986
13987 # how many fields will fit in the available space?
13988 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
13989 my $max_pairs = int( $columns / $pair_width );
13990 my $number_of_fields = $max_pairs * 2;
13991 if ( $odd_or_even == 1
13992 && $max_pairs * $pair_width + $max_width <= $columns )
13993 {
13994 $number_of_fields++;
13995 }
13996 return $number_of_fields;
13997}
13998
13999sub compactify_table {
14000
14001 # given a table with a certain number of fields and a certain number
14002 # of lines, see if reducing the number of fields will make it look
14003 # better.
14004 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
14005 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
14006 my $min_fields;
14007
14008 for (
14009 $min_fields = $number_of_fields ;
14010 $min_fields >= $odd_or_even
14011 && $min_fields * $formatted_lines >= $item_count ;
14012 $min_fields -= $odd_or_even
14013 )
14014 {
14015 $number_of_fields = $min_fields;
14016 }
14017 }
14018 return $number_of_fields;
14019}
14020
14021sub set_ragged_breakpoints {
14022
14023 # Set breakpoints in a list that cannot be formatted nicely as a
14024 # table.
14025 my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
14026
14027 my $break_count = 0;
14028 foreach (@$ri_ragged_break_list) {
14029 my $j = $ri_term_comma->[$_];
14030 if ($j) {
14031 set_forced_breakpoint($j);
14032 $break_count++;
14033 }
14034 }
14035 return $break_count;
14036}
14037
14038sub copy_old_breakpoints {
14039 my ( $i_first_comma, $i_last_comma ) = @_;
14040 for my $i ( $i_first_comma .. $i_last_comma ) {
14041 if ( $old_breakpoint_to_go[$i] ) {
14042 set_forced_breakpoint($i);
14043 }
14044 }
14045}
14046
14047sub set_nobreaks {
14048 my ( $i, $j ) = @_;
14049 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
14050
14051 FORMATTER_DEBUG_FLAG_NOBREAK && do {
14052 my ( $a, $b, $c ) = caller();
14053 print(
14054"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
14055 );
14056 };
14057
14058 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
14059 }
14060
14061 # shouldn't happen; non-critical error
14062 else {
14063 FORMATTER_DEBUG_FLAG_NOBREAK && do {
14064 my ( $a, $b, $c ) = caller();
14065 print(
14066"NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
14067 );
14068 };
14069 }
14070}
14071
14072sub set_fake_breakpoint {
14073
14074 # Just bump up the breakpoint count as a signal that there are breaks.
14075 # This is useful if we have breaks but may want to postpone deciding where
14076 # to make them.
14077 $forced_breakpoint_count++;
14078}
14079
14080sub set_forced_breakpoint {
14081 my $i = shift;
14082
14083 return unless defined $i && $i >= 0;
14084
14085 # when called with certain tokens, use bond strengths to decide
14086 # if we break before or after it
14087 my $token = $tokens_to_go[$i];
14088
14089 if ( $token =~ /^([\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
14090 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
14091 }
14092
14093 # breaks are forced before 'if' and 'unless'
14094 elsif ( $is_if_unless{$token} ) { $i-- }
14095
14096 if ( $i >= 0 && $i <= $max_index_to_go ) {
14097 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
14098
14099 FORMATTER_DEBUG_FLAG_FORCE && do {
14100 my ( $a, $b, $c ) = caller();
14101 print
14102"FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
14103 };
14104
14105 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
14106 $forced_breakpoint_to_go[$i_nonblank] = 1;
14107
14108 if ( $i_nonblank > $index_max_forced_break ) {
14109 $index_max_forced_break = $i_nonblank;
14110 }
14111 $forced_breakpoint_count++;
14112 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
14113 $i_nonblank;
14114
14115 # if we break at an opening container..break at the closing
14116 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
14117 set_closing_breakpoint($i_nonblank);
14118 }
14119 }
14120 }
14121}
14122
14123sub clear_breakpoint_undo_stack {
14124 $forced_breakpoint_undo_count = 0;
14125}
14126
14127sub undo_forced_breakpoint_stack {
14128
14129 my $i_start = shift;
14130 if ( $i_start < 0 ) {
14131 $i_start = 0;
14132 my ( $a, $b, $c ) = caller();
14133 warning(
14134"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
14135 );
14136 }
14137
14138 while ( $forced_breakpoint_undo_count > $i_start ) {
14139 my $i =
14140 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
14141 if ( $i >= 0 && $i <= $max_index_to_go ) {
14142 $forced_breakpoint_to_go[$i] = 0;
14143 $forced_breakpoint_count--;
14144
14145 FORMATTER_DEBUG_FLAG_UNDOBP && do {
14146 my ( $a, $b, $c ) = caller();
14147 print(
14148"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
14149 );
14150 };
14151 }
14152
14153 # shouldn't happen, but not a critical error
14154 else {
14155 FORMATTER_DEBUG_FLAG_UNDOBP && do {
14156 my ( $a, $b, $c ) = caller();
14157 print(
14158"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
14159 );
14160 };
14161 }
14162 }
14163}
14164
14165sub recombine_breakpoints {
14166
14167 # sub set_continuation_breaks is very liberal in setting line breaks
14168 # for long lines, always setting breaks at good breakpoints, even
14169 # when that creates small lines. Occasionally small line fragments
14170 # are produced which would look better if they were combined.
14171 # That's the task of this routine, recombine_breakpoints.
14172 my ( $ri_first, $ri_last ) = @_;
14173 my $more_to_do = 1;
14174
14175 # Keep looping until there are no more possible recombinations
14176 my $nmax_last = @$ri_last;
14177 while ($more_to_do) {
14178 my $n_best = 0;
14179 my $bs_best;
14180 my $n;
14181 my $nmax = @$ri_last - 1;
14182
14183 # safety check..
14184 unless ( $nmax < $nmax_last ) {
14185
14186 # shouldn't happen because splice below decreases nmax on each pass:
14187 # but i get paranoid sometimes
14188 die "Program bug-infinite loop in recombine breakpoints\n";
14189 }
14190 $nmax_last = $nmax;
14191 $more_to_do = 0;
14192
14193 # loop over all remaining lines...
14194 for $n ( 1 .. $nmax ) {
14195
14196 #----------------------------------------------------------
14197 # Indexes of the endpoints of the two lines are:
14198 #
14199 # ---left---- | ---right---
14200 # $if $imid | $imidr $il
14201 #
14202 # We want to decide if we should join tokens $imid to $imidr
14203 #----------------------------------------------------------
14204 my $if = $$ri_first[ $n - 1 ];
14205 my $il = $$ri_last[$n];
14206 my $imid = $$ri_last[ $n - 1 ];
14207 my $imidr = $$ri_first[$n];
14208
14209#print "RECOMBINE: n=$n imid=$imid if=$if type=$types_to_go[$if] =$tokens_to_go[$if] next_type=$types_to_go[$imidr] next_tok=$tokens_to_go[$imidr]\n";
14210
14211 #----------------------------------------------------------
14212 # Start of special recombination rules
14213 # These are ad-hoc rules which have been found to work ok.
14214 # Skip to next pair to avoid re-combination.
14215 #----------------------------------------------------------
14216
14217 # a terminal '{' should stay where it is
14218 next if ( $n == $nmax && $types_to_go[$imidr] eq '{' );
14219
14220 #----------------------------------------------------------
14221 # examine token at $imid (right end of first line of pair)
14222 #----------------------------------------------------------
14223
14224 # an isolated '}' may join with a ';' terminated segment
14225 if ( $types_to_go[$imid] eq '}' ) {
14226 next
14227 unless (
14228
14229 # join } and ;
14230 ( ( $if == $imid ) && ( $types_to_go[$il] eq ';' ) )
14231
14232 # handle '.' and '?' below
14233 || ( $types_to_go[$imidr] =~ /^[\.\?]$/ )
14234 );
14235 }
14236
14237 # do not recombine lines with ending &&, ||, or :
14238 elsif ( $types_to_go[$imid] =~ /^(|:|\&\&|\|\|)$/ ) {
14239 next unless $want_break_before{ $types_to_go[$imid] };
14240 }
14241
14242 # for lines ending in a comma...
14243 elsif ( $types_to_go[$imid] eq ',' ) {
14244
14245 # an isolated '},' may join with an identifier + ';'
14246 # this is useful for the class of a 'bless' statement (bless.t)
14247 if ( $types_to_go[$if] eq '}'
14248 && $types_to_go[$imidr] eq 'i' )
14249 {
14250 next
14251 unless ( ( $if == ( $imid - 1 ) )
14252 && ( $il == ( $imidr + 1 ) )
14253 && ( $types_to_go[$il] eq ';' ) );
14254
14255 # override breakpoint
14256 $forced_breakpoint_to_go[$imid] = 0;
14257 }
14258
14259 # but otherwise, do not recombine unless this will leave
14260 # just 1 more line
14261 else {
14262 next unless ( $n + 1 >= $nmax );
14263 }
14264 }
14265
14266 # opening paren..
14267 elsif ( $types_to_go[$imid] eq '(' ) {
14268
14269 # No longer doing this
14270 }
14271
14272 elsif ( $types_to_go[$imid] eq ')' ) {
14273
14274 # No longer doing this
14275 }
14276
14277 # keep a terminal colon
14278 elsif ( $types_to_go[$imid] eq ':' ) {
14279 next;
14280 }
14281
14282 # keep a terminal for-semicolon
14283 elsif ( $types_to_go[$imid] eq 'f' ) {
14284 next;
14285 }
14286
14287 # if '=' at end of line ...
14288 elsif ( $is_assignment{ $types_to_go[$imid] } ) {
14289
14290 # otherwise always ok to join isolated '='
14291 unless ( $if == $imid ) {
14292
14293 my $is_math = (
14294 ( $types_to_go[$il] =~ /^[+-\/\*\)]$/ )
14295
14296 # note no '$' in pattern because -> can
14297 # start long identifier
14298 && !grep { $_ =~ /^(->|=>|[\,])/ }
14299 @types_to_go[ $imidr .. $il ]
14300 );
14301
14302 # retain the break after the '=' unless ...
14303 next
14304 unless (
14305
14306 # '=' is followed by a number and looks like math
14307 ( $types_to_go[$imidr] eq 'n' && $is_math )
14308
14309 # or followed by a scalar and looks like math
14310 || ( ( $types_to_go[$imidr] eq 'i' )
14311 && ( $tokens_to_go[$imidr] =~ /^\$/ )
14312 && $is_math )
14313
14314 # or followed by a single "short" token
14315 # ('12' is arbitrary)
14316 || ( $il == $imidr
14317 && token_sequence_length( $imidr, $imidr ) < 12 )
14318
14319 );
14320 }
14321 unless ( $tokens_to_go[$imidr] =~ /^[\{\(\[]$/ ) {
14322 $forced_breakpoint_to_go[$imid] = 0;
14323 }
14324 }
14325
14326 # for keywords..
14327 elsif ( $types_to_go[$imid] eq 'k' ) {
14328
14329 # make major control keywords stand out
14330 # (recombine.t)
14331 next
14332 if (
14333
14334 #/^(last|next|redo|return)$/
14335 $is_last_next_redo_return{ $tokens_to_go[$imid] }
14336 );
14337
14338 if ( $is_and_or{ $tokens_to_go[$imid] } ) {
14339 next unless $want_break_before{ $tokens_to_go[$imid] };
14340 }
14341 }
14342
14343 #----------------------------------------------------------
14344 # examine token at $imidr (left end of second line of pair)
14345 #----------------------------------------------------------
14346
14347 # do not recombine lines with leading &&, ||, or :
14348 if ( $types_to_go[$imidr] =~ /^(|:|\&\&|\|\|)$/ ) {
14349 next if $want_break_before{ $types_to_go[$imidr] };
14350 }
14351
14352 # Identify and recombine a broken ?/: chain
14353 elsif ( $types_to_go[$imidr] eq '?' ) {
14354
14355 # indexes of line first tokens --
14356 # mm - line before previous line
14357 # f - previous line
14358 # <-- this line
14359 # ff - next line
14360 # fff - line after next
14361 my $iff = $n < $nmax ? $$ri_first[ $n + 1 ] : -1;
14362 my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1;
14363 my $imm = $n > 1 ? $$ri_first[ $n - 2 ] : -1;
14364 my $seqno = $type_sequence_to_go[$imidr];
14365 my $f_ok =
14366 ( $types_to_go[$if] eq ':'
14367 && $type_sequence_to_go[$if] ==
14368 $seqno - TYPE_SEQUENCE_INCREMENT );
14369 my $mm_ok =
14370 ( $imm >= 0
14371 && $types_to_go[$imm] eq ':'
14372 && $type_sequence_to_go[$imm] ==
14373 $seqno - 2 * TYPE_SEQUENCE_INCREMENT );
14374
14375 my $ff_ok =
14376 ( $iff > 0
14377 && $types_to_go[$iff] eq ':'
14378 && $type_sequence_to_go[$iff] == $seqno );
14379 my $fff_ok =
14380 ( $ifff > 0
14381 && $types_to_go[$ifff] eq ':'
14382 && $type_sequence_to_go[$ifff] ==
14383 $seqno + TYPE_SEQUENCE_INCREMENT );
14384
14385 # we require that this '?' be part of a correct sequence
14386 # of 3 in a row or else no recombination is done.
14387 next
14388 unless ( ( $ff_ok || $mm_ok ) && ( $f_ok || $fff_ok ) );
14389 $forced_breakpoint_to_go[$imid] = 0;
14390 }
14391
14392 # do not recombine lines with leading '.'
14393 elsif ( $types_to_go[$imidr] =~ /^(\.)$/ ) {
14394 my $i_next_nonblank = $imidr + 1;
14395 if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
14396 $i_next_nonblank++;
14397 }
14398
14399 next
14400 unless (
14401
14402 # ... unless there is just one and we can reduce this to
14403 # two lines if we do. For example, this :
14404 #
14405 # $bodyA .=
14406 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
14407 #
14408 # looks better than this:
14409 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
14410 # . '$args .= $pat;'
14411
14412 (
14413 $n == 2
14414 && $n == $nmax
14415 && $types_to_go[$if] ne $types_to_go[$imidr]
14416 )
14417
14418 #
14419 # ... or this would strand a short quote , like this
14420 # . "some long qoute"
14421 # . "\n";
14422 #
14423
14424 || ( $types_to_go[$i_next_nonblank] eq 'Q'
14425 && $i_next_nonblank >= $il - 1
14426 && length( $tokens_to_go[$i_next_nonblank] ) <
14427 $rOpts_short_concatenation_item_length )
14428 );
14429 }
14430
14431 # handle leading keyword..
14432 elsif ( $types_to_go[$imidr] eq 'k' ) {
14433
14434 # handle leading "and" and "or"
14435 if ( $is_and_or{ $tokens_to_go[$imidr] } ) {
14436
14437 # Decide if we will combine a single terminal 'and' and
14438 # 'or' after an 'if' or 'unless'. We should consider the
14439 # possible vertical alignment, and visual clutter.
14440
14441 # This looks best with the 'and' on the same line as the 'if':
14442 #
14443 # $a = 1
14444 # if $seconds and $nu < 2;
14445 #
14446 # But this looks better as shown:
14447 #
14448 # $a = 1
14449 # if !$this->{Parents}{$_}
14450 # or $this->{Parents}{$_} eq $_;
14451 #
14452 # Eventually, it would be nice to look for similarities (such as 'this' or
14453 # 'Parents'), but for now I'm using a simple rule that says that the
14454 # resulting line length must not be more than half the maximum line length
14455 # (making it 80/2 = 40 characters by default).
14456
14457 next
14458 unless (
14459 $n == $nmax # if this is the last line
14460 && $types_to_go[$il] eq ';' # ending in ';'
14461 && $types_to_go[$if] eq 'k' # after 'if' or 'unless'
14462 # /^(if|unless)$/
14463 && $is_if_unless{ $tokens_to_go[$if] }
14464
14465 # and if this doesn't make a long last line
14466 && total_line_length( $if, $il ) <=
14467 $half_maximum_line_length
14468 );
14469
14470 # override breakpoint
14471 $forced_breakpoint_to_go[$imid] = 0;
14472 }
14473
14474 # handle leading "if" and "unless"
14475 elsif ( $is_if_unless{ $tokens_to_go[$imidr] } ) {
14476
14477 # FIXME: This is still experimental..may not be too useful
14478 next
14479 unless (
14480 $n == $nmax # if this is the last line
14481 && $types_to_go[$il] eq ';' # ending in ';'
14482 && $types_to_go[$if] eq 'k'
14483
14484 # /^(and|or)$/
14485 && $is_and_or{ $tokens_to_go[$if] }
14486
14487 # and if this doesn't make a long last line
14488 && total_line_length( $if, $il ) <=
14489 $half_maximum_line_length
14490 );
14491
14492 # override breakpoint
14493 $forced_breakpoint_to_go[$imid] = 0;
14494 }
14495
14496 # handle all other leading keywords
14497 else {
14498
14499 # keywords look best at start of lines,
14500 # but combine things like "1 while"
14501
14502 unless ( $is_assignment{ $types_to_go[$imid] } ) {
14503 next
14504 if ( ( $types_to_go[$imid] ne 'k' )
14505 && ( $tokens_to_go[$imidr] !~ /^(while)$/ ) );
14506 }
14507 }
14508 }
14509
14510 # similar treatment of && and || as above for 'and' and 'or':
14511 elsif ( $types_to_go[$imidr] =~ /^(&&|\|\|)$/ ) {
14512
14513 # maybe looking at something like:
14514 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
14515
14516 next
14517 unless (
14518 $n == $nmax # if this is the last line
14519 && $types_to_go[$il] eq ';' # ending in ';'
14520 && $types_to_go[$if] eq 'k' # after an 'if' or 'unless'
14521 # /^(if|unless)$/
14522 && $is_if_unless{ $tokens_to_go[$if] }
14523
14524 # and if this doesn't make a long last line
14525 && total_line_length( $if, $il ) <=
14526 $half_maximum_line_length
14527 );
14528
14529 # override breakpoint
14530 $forced_breakpoint_to_go[$imid] = 0;
14531 }
14532
14533 # honor hard breakpoints
14534 next if ( $forced_breakpoint_to_go[$imid] > 0 );
14535
14536 #----------------------------------------------------------
14537 # end of special recombination rules
14538 #----------------------------------------------------------
14539
14540 my $bs = $bond_strength_to_go[$imid];
14541
14542 # combined line cannot be too long
14543 next
14544 if excess_line_length( $if, $il ) > 0;
14545
14546 # do not recombine if we would skip in indentation levels
14547 if ( $n < $nmax ) {
14548 my $if_next = $$ri_first[ $n + 1 ];
14549 next
14550 if (
14551 $levels_to_go[$if] < $levels_to_go[$imidr]
14552 && $levels_to_go[$imidr] < $levels_to_go[$if_next]
14553
14554 # but an isolated 'if (' is undesirable
14555 && !(
14556 $n == 1
14557 && $imid - $if <= 2
14558 && $types_to_go[$if] eq 'k'
14559 && $tokens_to_go[$if] eq 'if'
14560 && $tokens_to_go[$imid] ne '('
14561 )
14562
14563 #
14564 );
14565 }
14566
14567 # honor no-break's
14568 next if ( $bs == NO_BREAK );
14569
14570 # remember the pair with the greatest bond strength
14571 if ( !$n_best ) {
14572 $n_best = $n;
14573 $bs_best = $bs;
14574 }
14575 else {
14576
14577 if ( $bs > $bs_best ) {
14578 $n_best = $n;
14579 $bs_best = $bs;
14580 }
14581
14582 # we have 2 or more candidates, so need another pass
14583 $more_to_do++;
14584 }
14585 }
14586
14587 # recombine the pair with the greatest bond strength
14588 if ($n_best) {
14589 splice @$ri_first, $n_best, 1;
14590 splice @$ri_last, $n_best - 1, 1;
14591 }
14592 }
14593 return ( $ri_first, $ri_last );
14594}
14595
14596sub set_continuation_breaks {
14597
14598 # Define an array of indexes for inserting newline characters to
14599 # keep the line lengths below the maximum desired length. There is
14600 # an implied break after the last token, so it need not be included.
14601 # We'll break at points where the bond strength is lowest.
14602
14603 my $saw_good_break = shift;
14604 my @i_first = (); # the first index to output
14605 my @i_last = (); # the last index to output
14606 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
14607 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
14608
14609 set_bond_strengths();
14610
14611 my $imin = 0;
14612 my $imax = $max_index_to_go;
14613 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
14614 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
14615 my $i_begin = $imin;
14616
14617 my $leading_spaces = leading_spaces_to_go($imin);
14618 my $line_count = 0;
14619 my $last_break_strength = NO_BREAK;
14620 my $i_last_break = -1;
14621 my $max_bias = 0.001;
14622 my $tiny_bias = 0.0001;
14623 my $leading_alignment_token = "";
14624 my $leading_alignment_type = "";
14625
14626 # see if any ?/:'s are in order
14627 my $colons_in_order = 1;
14628 my $last_tok = "";
14629 my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
14630 foreach (@colon_list) {
14631 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
14632 $last_tok = $_;
14633 }
14634
14635 # This is a sufficient but not necessary condition for colon chain
14636 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
14637
14638 while ( $i_begin <= $imax ) {
14639 my $lowest_strength = NO_BREAK;
14640 my $starting_sum = $lengths_to_go[$i_begin];
14641 my $i_lowest = -1;
14642 my $i_test = -1;
14643 my $lowest_next_token = '';
14644 my $lowest_next_type = 'b';
14645 my $i_lowest_next_nonblank = -1;
14646
14647 # loop to find next break point
14648 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
14649 my $type = $types_to_go[$i_test];
14650 my $token = $tokens_to_go[$i_test];
14651 my $next_type = $types_to_go[ $i_test + 1 ];
14652 my $next_token = $tokens_to_go[ $i_test + 1 ];
14653 my $i_next_nonblank =
14654 ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
14655 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
14656 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
14657 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
14658 my $strength = $bond_strength_to_go[$i_test];
14659 my $must_break = 0;
14660
14661 # FIXME: TESTING: Might want to be able to break after these
14662 # force an immediate break at certain operators
14663 # with lower level than the start of the line
14664 if (
14665 (
14666 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
14667 || ( $next_nonblank_type eq 'k'
14668 && $next_nonblank_token =~ /^(and|or)$/ )
14669 )
14670 && ( $nesting_depth_to_go[$i_begin] >
14671 $nesting_depth_to_go[$i_next_nonblank] )
14672 )
14673 {
14674 set_forced_breakpoint($i_next_nonblank);
14675 }
14676
14677 if (
14678
14679 # Try to put a break where requested by scan_list
14680 $forced_breakpoint_to_go[$i_test]
14681
14682 # break between ) { in a continued line so that the '{' can
14683 # be outdented
14684 # See similar logic in scan_list which catches instances
14685 # where a line is just something like ') {'
14686 || ( $line_count
14687 && ( $token eq ')' )
14688 && ( $next_nonblank_type eq '{' )
14689 && ($next_nonblank_block_type)
14690 && !$rOpts->{'opening-brace-always-on-right'} )
14691
14692 # There is an implied forced break at a terminal opening brace
14693 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
14694
14695 )
14696 {
14697
14698 # Forced breakpoints must sometimes be overridden, for example
14699 # because of a side comment causing a NO_BREAK. It is easier
14700 # to catch this here than when they are set.
14701 if ( $strength < NO_BREAK ) {
14702 $strength = $lowest_strength - $tiny_bias;
14703 $must_break = 1;
14704 }
14705 }
14706
14707 # quit if a break here would put a good terminal token on
14708 # the next line and we already have a possible break
14709 if (
14710 !$must_break
14711 && ( $next_nonblank_type =~ /^[\;\,]$/ )
14712 && (
14713 (
14714 $leading_spaces + $lengths_to_go[ $i_next_nonblank + 1 ]
14715 - $starting_sum
14716 ) > $rOpts_maximum_line_length
14717 )
14718 )
14719 {
14720 last if ( $i_lowest >= 0 );
14721 }
14722
14723 # Avoid a break which would strand a single punctuation
14724 # token. For example, we do not want to strand a leading
14725 # '.' which is followed by a long quoted string.
14726 if (
14727 !$must_break
14728 && ( $i_test == $i_begin )
14729 && ( $i_test < $imax )
14730 && ( $token eq $type )
14731 && (
14732 (
14733 $leading_spaces + $lengths_to_go[ $i_test + 1 ] -
14734 $starting_sum
14735 ) <= $rOpts_maximum_line_length
14736 )
14737 )
14738 {
14739 $i_test++;
14740
14741 if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
14742 $i_test++;
14743 }
14744 redo;
14745 }
14746
14747 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
14748 {
14749
14750 # break at previous best break if it would have produced
14751 # a leading alignment of certain common tokens, and it
14752 # is different from the latest candidate break
14753 last
14754 if ($leading_alignment_type);
14755
14756 # Force at least one breakpoint if old code had good
14757 # break It is only called if a breakpoint is required or
14758 # desired. This will probably need some adjustments
14759 # over time. A goal is to try to be sure that, if a new
14760 # side comment is introduced into formated text, then
14761 # the same breakpoints will occur. scbreak.t
14762 last
14763 if (
14764 $i_test == $imax # we are at the end
14765 && !$forced_breakpoint_count #
14766 && $saw_good_break # old line had good break
14767 && $type =~ /^[#;\{]$/ # and this line ends in
14768 # ';' or side comment
14769 && $i_last_break < 0 # and we haven't made a break
14770 && $i_lowest > 0 # and we saw a possible break
14771 && $i_lowest < $imax - 1 # (but not just before this ;)
14772 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
14773 );
14774
14775 $lowest_strength = $strength;
14776 $i_lowest = $i_test;
14777 $lowest_next_token = $next_nonblank_token;
14778 $lowest_next_type = $next_nonblank_type;
14779 $i_lowest_next_nonblank = $i_next_nonblank;
14780 last if $must_break;
14781
14782 # set flags to remember if a break here will produce a
14783 # leading alignment of certain common tokens
14784 if (
14785 $line_count > 0
14786 && $i_test < $imax
14787 && ( $lowest_strength - $last_break_strength <= $max_bias )
14788 && ( $nesting_depth_to_go[$i_begin] >=
14789 $nesting_depth_to_go[$i_next_nonblank] )
14790 && (
14791 (
14792 $types_to_go[$i_begin] =~ /^(\.|\&\&|\|\||:)$/
14793 && $types_to_go[$i_begin] eq $next_nonblank_type
14794 )
14795 || ( $tokens_to_go[$i_begin] =~ /^(and|or)$/
14796 && $tokens_to_go[$i_begin] eq $next_nonblank_token )
14797 )
14798 )
14799 {
14800 $leading_alignment_token = $next_nonblank_token;
14801 $leading_alignment_type = $next_nonblank_type;
14802 }
14803 }
14804
14805 my $too_long =
14806 ( $i_test >= $imax )
14807 ? 1
14808 : (
14809 (
14810 $leading_spaces + $lengths_to_go[ $i_test + 2 ] -
14811 $starting_sum
14812 ) > $rOpts_maximum_line_length
14813 );
14814
14815 FORMATTER_DEBUG_FLAG_BREAK
14816 && print
14817"BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
14818
14819 # allow one extra terminal token after exceeding line length
14820 # if it would strand this token.
14821 if ( $rOpts_fuzzy_line_length
14822 && $too_long
14823 && ( $i_lowest == $i_test )
14824 && ( length($token) > 1 )
14825 && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
14826 {
14827 $too_long = 0;
14828 }
14829
14830 last
14831 if (
14832 ( $i_test == $imax ) # we're done if no more tokens,
14833 || (
14834 ( $i_lowest >= 0 ) # or no more space and we have a break
14835 && $too_long
14836 )
14837 );
14838 }
14839
14840 # it's always ok to break at imax if no other break was found
14841 if ( $i_lowest < 0 ) { $i_lowest = $imax }
14842
14843 # semi-final index calculation
14844 my $i_next_nonblank = (
14845 ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
14846 ? $i_lowest + 2
14847 : $i_lowest + 1
14848 );
14849 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
14850 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
14851
14852 #-------------------------------------------------------
14853 # ?/: rule 1 : if a break here will separate a '?' on this
14854 # line from its closing ':', then break at the '?' instead.
14855 #-------------------------------------------------------
14856 my $i;
14857 foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
14858 next unless ( $tokens_to_go[$i] eq '?' );
14859
14860 # do not break if probable sequence of ?/: statements
14861 next if ($is_colon_chain);
14862
14863 # do not break if statement is broken by side comment
14864 next
14865 if (
14866 $tokens_to_go[$max_index_to_go] eq '#'
14867 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
14868 $max_index_to_go ) !~ /^[\;\}]$/
14869 );
14870
14871 # no break needed if matching : is also on the line
14872 next
14873 if ( $mate_index_to_go[$i] >= 0
14874 && $mate_index_to_go[$i] <= $i_next_nonblank );
14875
14876 $i_lowest = $i;
14877 if ( $want_break_before{'?'} ) { $i_lowest-- }
14878 last;
14879 }
14880
14881 # final index calculation
14882 $i_next_nonblank = (
14883 ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
14884 ? $i_lowest + 2
14885 : $i_lowest + 1
14886 );
14887 $next_nonblank_type = $types_to_go[$i_next_nonblank];
14888 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
14889
14890 FORMATTER_DEBUG_FLAG_BREAK
14891 && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
14892
14893 #-------------------------------------------------------
14894 # ?/: rule 2 : if we break at a '?', then break at its ':'
14895 #
14896 # Note: this rule is also in sub scan_list to handle a break
14897 # at the start and end of a line (in case breaks are dictated
14898 # by side comments).
14899 #-------------------------------------------------------
14900 if ( $next_nonblank_type eq '?' ) {
14901 set_closing_breakpoint($i_next_nonblank);
14902 }
14903 elsif ( $types_to_go[$i_lowest] eq '?' ) {
14904 set_closing_breakpoint($i_lowest);
14905 }
14906
14907 #-------------------------------------------------------
14908 # ?/: rule 3 : if we break at a ':' then we save
14909 # its location for further work below. We may need to go
14910 # back and break at its '?'.
14911 #-------------------------------------------------------
14912 if ( $next_nonblank_type eq ':' ) {
14913 push @i_colon_breaks, $i_next_nonblank;
14914 }
14915 elsif ( $types_to_go[$i_lowest] eq ':' ) {
14916 push @i_colon_breaks, $i_lowest;
14917 }
14918
14919 # here we should set breaks for all '?'/':' pairs which are
14920 # separated by this line
14921
14922 $line_count++;
14923
14924 # save this line segment, after trimming blanks at the ends
14925 push( @i_first,
14926 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
14927 push( @i_last,
14928 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
14929
14930 # set a forced breakpoint at a container opening, if necessary, to
14931 # signal a break at a closing container. Excepting '(' for now.
14932 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
14933 && !$forced_breakpoint_to_go[$i_lowest] )
14934 {
14935 set_closing_breakpoint($i_lowest);
14936 }
14937
14938 # get ready to go again
14939 $i_begin = $i_lowest + 1;
14940 $last_break_strength = $lowest_strength;
14941 $i_last_break = $i_lowest;
14942 $leading_alignment_token = "";
14943 $leading_alignment_type = "";
14944 $lowest_next_token = '';
14945 $lowest_next_type = 'b';
14946
14947 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
14948 $i_begin++;
14949 }
14950
14951 # update indentation size
14952 if ( $i_begin <= $imax ) {
14953 $leading_spaces = leading_spaces_to_go($i_begin);
14954 }
14955 }
14956
14957 #-------------------------------------------------------
14958 # ?/: rule 4 -- if we broke at a ':', then break at
14959 # corresponding '?' unless this is a chain of ?: expressions
14960 #-------------------------------------------------------
14961 if (@i_colon_breaks) {
14962
14963 # using a simple method for deciding if we are in a ?/: chain --
14964 # this is a chain if it has multiple ?/: pairs all in order;
14965 # otherwise not.
14966 # Note that if line starts in a ':' we count that above as a break
14967 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
14968
14969 unless ($is_chain) {
14970 my @insert_list = ();
14971 foreach (@i_colon_breaks) {
14972 my $i_question = $mate_index_to_go[$_];
14973 if ( $i_question >= 0 ) {
14974 if ( $want_break_before{'?'} ) {
14975 $i_question--;
14976 if ( $i_question > 0
14977 && $types_to_go[$i_question] eq 'b' )
14978 {
14979 $i_question--;
14980 }
14981 }
14982
14983 if ( $i_question >= 0 ) {
14984 push @insert_list, $i_question;
14985 }
14986 }
14987 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
14988 }
14989 }
14990 }
14991 return \@i_first, \@i_last;
14992}
14993
14994sub insert_additional_breaks {
14995
14996 # this routine will add line breaks at requested locations after
14997 # sub set_continuation_breaks has made preliminary breaks.
14998
14999 my ( $ri_break_list, $ri_first, $ri_last ) = @_;
15000 my $i_f;
15001 my $i_l;
15002 my $line_number = 0;
15003 my $i_break_left;
15004 foreach $i_break_left ( sort @$ri_break_list ) {
15005
15006 $i_f = $$ri_first[$line_number];
15007 $i_l = $$ri_last[$line_number];
15008 while ( $i_break_left >= $i_l ) {
15009 $line_number++;
15010
15011 # shouldn't happen unless caller passes bad indexes
15012 if ( $line_number >= @$ri_last ) {
15013 warning(
15014"Non-fatal program bug: couldn't set break at $i_break_left\n"
15015 );
15016 report_definite_bug();
15017 return;
15018 }
15019 $i_f = $$ri_first[$line_number];
15020 $i_l = $$ri_last[$line_number];
15021 }
15022
15023 my $i_break_right = $i_break_left + 1;
15024 if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
15025
15026 if ( $i_break_left >= $i_f
15027 && $i_break_left < $i_l
15028 && $i_break_right > $i_f
15029 && $i_break_right <= $i_l )
15030 {
15031 splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
15032 splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
15033 }
15034 }
15035}
15036
15037sub set_closing_breakpoint {
15038
15039 # set a breakpoint at a matching closing token
15040 # at present, this is only used to break at a ':' which matches a '?'
15041 my $i_break = shift;
15042
15043 if ( $mate_index_to_go[$i_break] >= 0 ) {
15044
15045 # CAUTION: infinite recursion possible here:
15046 # set_closing_breakpoint calls set_forced_breakpoint, and
15047 # set_forced_breakpoint call set_closing_breakpoint
15048 # ( test files attrib.t, BasicLyx.pm.html).
15049 # Don't reduce the '2' in the statement below
15050 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
15051
15052 # break before } ] and ), but sub set_forced_breakpoint will decide
15053 # to break before or after a ? and :
15054 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
15055 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
15056 }
15057 }
15058 else {
15059 my $type_sequence = $type_sequence_to_go[$i_break];
15060 if ($type_sequence) {
15061 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
15062 $postponed_breakpoint{$type_sequence} = 1;
15063 }
15064 }
15065}
15066
15067# check to see if output line tabbing agrees with input line
15068# this can be very useful for debugging a script which has an extra
15069# or missing brace
15070sub compare_indentation_levels {
15071
15072 my ( $python_indentation_level, $structural_indentation_level ) = @_;
15073 if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
15074 $last_tabbing_disagreement = $input_line_number;
15075
15076 if ($in_tabbing_disagreement) {
15077 }
15078 else {
15079 $tabbing_disagreement_count++;
15080
15081 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
15082 write_logfile_entry(
15083"Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
15084 );
15085 }
15086 $in_tabbing_disagreement = $input_line_number;
15087 $first_tabbing_disagreement = $in_tabbing_disagreement
15088 unless ($first_tabbing_disagreement);
15089 }
15090 }
15091 else {
15092
15093 if ($in_tabbing_disagreement) {
15094
15095 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
15096 write_logfile_entry(
15097"End indentation disagreement from input line $in_tabbing_disagreement\n"
15098 );
15099
15100 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
15101 write_logfile_entry(
15102 "No further tabbing disagreements will be noted\n");
15103 }
15104 }
15105 $in_tabbing_disagreement = 0;
15106 }
15107 }
15108}
15109
15110#####################################################################
15111#
15112# the Perl::Tidy::IndentationItem class supplies items which contain
15113# how much whitespace should be used at the start of a line
15114#
15115#####################################################################
15116
15117package Perl::Tidy::IndentationItem;
15118
15119# Indexes for indentation items
15120use constant SPACES => 0; # total leading white spaces
15121use constant LEVEL => 1; # the indentation 'level'
15122use constant CI_LEVEL => 2; # the 'continuation level'
15123use constant AVAILABLE_SPACES => 3; # how many left spaces available
15124 # for this level
15125use constant CLOSED => 4; # index where we saw closing '}'
15126use constant COMMA_COUNT => 5; # how many commas at this level?
15127use constant SEQUENCE_NUMBER => 6; # output batch number
15128use constant INDEX => 7; # index in output batch list
15129use constant HAVE_CHILD => 8; # any dependents?
15130use constant RECOVERABLE_SPACES => 9; # how many spaces to the right
15131 # we would like to move to get
15132 # alignment (negative if left)
15133use constant ALIGN_PAREN => 10; # do we want to try to align
15134 # with an opening structure?
15135use constant MARKED => 11; # if visited by corrector logic
15136use constant STACK_DEPTH => 12; # indentation nesting depth
15137use constant STARTING_INDEX => 13; # first token index of this level
15138use constant ARROW_COUNT => 14; # how many =>'s
15139
15140sub new {
15141
15142 # Create an 'indentation_item' which describes one level of leading
15143 # whitespace when the '-lp' indentation is used. We return
15144 # a reference to an anonymous array of associated variables.
15145 # See above constants for storage scheme.
15146 my (
15147 $class, $spaces, $level,
15148 $ci_level, $available_spaces, $index,
15149 $gnu_sequence_number, $align_paren, $stack_depth,
15150 $starting_index,
15151 )
15152 = @_;
15153 my $closed = -1;
15154 my $arrow_count = 0;
15155 my $comma_count = 0;
15156 my $have_child = 0;
15157 my $want_right_spaces = 0;
15158 my $marked = 0;
15159 bless [
15160 $spaces, $level, $ci_level,
15161 $available_spaces, $closed, $comma_count,
15162 $gnu_sequence_number, $index, $have_child,
15163 $want_right_spaces, $align_paren, $marked,
15164 $stack_depth, $starting_index, $arrow_count,
15165 ], $class;
15166}
15167
15168sub permanently_decrease_AVAILABLE_SPACES {
15169
15170 # make a permanent reduction in the available indentation spaces
15171 # at one indentation item. NOTE: if there are child nodes, their
15172 # total SPACES must be reduced by the caller.
15173
15174 my ( $item, $spaces_needed ) = @_;
15175 my $available_spaces = $item->get_AVAILABLE_SPACES();
15176 my $deleted_spaces =
15177 ( $available_spaces > $spaces_needed )
15178 ? $spaces_needed
15179 : $available_spaces;
15180 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
15181 $item->decrease_SPACES($deleted_spaces);
15182 $item->set_RECOVERABLE_SPACES(0);
15183
15184 return $deleted_spaces;
15185}
15186
15187sub tentatively_decrease_AVAILABLE_SPACES {
15188
15189 # We are asked to tentatively delete $spaces_needed of indentation
15190 # for a indentation item. We may want to undo this later. NOTE: if
15191 # there are child nodes, their total SPACES must be reduced by the
15192 # caller.
15193 my ( $item, $spaces_needed ) = @_;
15194 my $available_spaces = $item->get_AVAILABLE_SPACES();
15195 my $deleted_spaces =
15196 ( $available_spaces > $spaces_needed )
15197 ? $spaces_needed
15198 : $available_spaces;
15199 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
15200 $item->decrease_SPACES($deleted_spaces);
15201 $item->increase_RECOVERABLE_SPACES($deleted_spaces);
15202 return $deleted_spaces;
15203}
15204
15205sub get_STACK_DEPTH {
15206 my $self = shift;
15207 return $self->[STACK_DEPTH];
15208}
15209
15210sub get_SPACES {
15211 my $self = shift;
15212 return $self->[SPACES];
15213}
15214
15215sub get_MARKED {
15216 my $self = shift;
15217 return $self->[MARKED];
15218}
15219
15220sub set_MARKED {
15221 my ( $self, $value ) = @_;
15222 if ( defined($value) ) {
15223 $self->[MARKED] = $value;
15224 }
15225 return $self->[MARKED];
15226}
15227
15228sub get_AVAILABLE_SPACES {
15229 my $self = shift;
15230 return $self->[AVAILABLE_SPACES];
15231}
15232
15233sub decrease_SPACES {
15234 my ( $self, $value ) = @_;
15235 if ( defined($value) ) {
15236 $self->[SPACES] -= $value;
15237 }
15238 return $self->[SPACES];
15239}
15240
15241sub decrease_AVAILABLE_SPACES {
15242 my ( $self, $value ) = @_;
15243 if ( defined($value) ) {
15244 $self->[AVAILABLE_SPACES] -= $value;
15245 }
15246 return $self->[AVAILABLE_SPACES];
15247}
15248
15249sub get_ALIGN_PAREN {
15250 my $self = shift;
15251 return $self->[ALIGN_PAREN];
15252}
15253
15254sub get_RECOVERABLE_SPACES {
15255 my $self = shift;
15256 return $self->[RECOVERABLE_SPACES];
15257}
15258
15259sub set_RECOVERABLE_SPACES {
15260 my ( $self, $value ) = @_;
15261 if ( defined($value) ) {
15262 $self->[RECOVERABLE_SPACES] = $value;
15263 }
15264 return $self->[RECOVERABLE_SPACES];
15265}
15266
15267sub increase_RECOVERABLE_SPACES {
15268 my ( $self, $value ) = @_;
15269 if ( defined($value) ) {
15270 $self->[RECOVERABLE_SPACES] += $value;
15271 }
15272 return $self->[RECOVERABLE_SPACES];
15273}
15274
15275sub get_CI_LEVEL {
15276 my $self = shift;
15277 return $self->[CI_LEVEL];
15278}
15279
15280sub get_LEVEL {
15281 my $self = shift;
15282 return $self->[LEVEL];
15283}
15284
15285sub get_SEQUENCE_NUMBER {
15286 my $self = shift;
15287 return $self->[SEQUENCE_NUMBER];
15288}
15289
15290sub get_INDEX {
15291 my $self = shift;
15292 return $self->[INDEX];
15293}
15294
15295sub get_STARTING_INDEX {
15296 my $self = shift;
15297 return $self->[STARTING_INDEX];
15298}
15299
15300sub set_HAVE_CHILD {
15301 my ( $self, $value ) = @_;
15302 if ( defined($value) ) {
15303 $self->[HAVE_CHILD] = $value;
15304 }
15305 return $self->[HAVE_CHILD];
15306}
15307
15308sub get_HAVE_CHILD {
15309 my $self = shift;
15310 return $self->[HAVE_CHILD];
15311}
15312
15313sub set_ARROW_COUNT {
15314 my ( $self, $value ) = @_;
15315 if ( defined($value) ) {
15316 $self->[ARROW_COUNT] = $value;
15317 }
15318 return $self->[ARROW_COUNT];
15319}
15320
15321sub get_ARROW_COUNT {
15322 my $self = shift;
15323 return $self->[ARROW_COUNT];
15324}
15325
15326sub set_COMMA_COUNT {
15327 my ( $self, $value ) = @_;
15328 if ( defined($value) ) {
15329 $self->[COMMA_COUNT] = $value;
15330 }
15331 return $self->[COMMA_COUNT];
15332}
15333
15334sub get_COMMA_COUNT {
15335 my $self = shift;
15336 return $self->[COMMA_COUNT];
15337}
15338
15339sub set_CLOSED {
15340 my ( $self, $value ) = @_;
15341 if ( defined($value) ) {
15342 $self->[CLOSED] = $value;
15343 }
15344 return $self->[CLOSED];
15345}
15346
15347sub get_CLOSED {
15348 my $self = shift;
15349 return $self->[CLOSED];
15350}
15351
15352#####################################################################
15353#
15354# the Perl::Tidy::VerticalAligner::Line class supplies an object to
15355# contain a single output line
15356#
15357#####################################################################
15358
15359package Perl::Tidy::VerticalAligner::Line;
15360
15361{
15362
15363 use strict;
15364 use Carp;
15365
15366 use constant JMAX => 0;
15367 use constant JMAX_ORIGINAL_LINE => 1;
15368 use constant RTOKENS => 2;
15369 use constant RFIELDS => 3;
15370 use constant RPATTERNS => 4;
15371 use constant INDENTATION => 5;
15372 use constant LEADING_SPACE_COUNT => 6;
15373 use constant OUTDENT_LONG_LINES => 7;
15374 use constant LIST_TYPE => 8;
15375 use constant IS_HANGING_SIDE_COMMENT => 9;
15376 use constant RALIGNMENTS => 10;
15377 use constant MAXIMUM_LINE_LENGTH => 11;
15378 use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
15379
15380 my %_index_map;
15381 $_index_map{jmax} = JMAX;
15382 $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE;
15383 $_index_map{rtokens} = RTOKENS;
15384 $_index_map{rfields} = RFIELDS;
15385 $_index_map{rpatterns} = RPATTERNS;
15386 $_index_map{indentation} = INDENTATION;
15387 $_index_map{leading_space_count} = LEADING_SPACE_COUNT;
15388 $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES;
15389 $_index_map{list_type} = LIST_TYPE;
15390 $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT;
15391 $_index_map{ralignments} = RALIGNMENTS;
15392 $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH;
15393 $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
15394
15395 my @_default_data = ();
15396 $_default_data[JMAX] = undef;
15397 $_default_data[JMAX_ORIGINAL_LINE] = undef;
15398 $_default_data[RTOKENS] = undef;
15399 $_default_data[RFIELDS] = undef;
15400 $_default_data[RPATTERNS] = undef;
15401 $_default_data[INDENTATION] = undef;
15402 $_default_data[LEADING_SPACE_COUNT] = undef;
15403 $_default_data[OUTDENT_LONG_LINES] = undef;
15404 $_default_data[LIST_TYPE] = undef;
15405 $_default_data[IS_HANGING_SIDE_COMMENT] = undef;
15406 $_default_data[RALIGNMENTS] = [];
15407 $_default_data[MAXIMUM_LINE_LENGTH] = undef;
15408 $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
15409
15410 {
15411
15412 # methods to count object population
15413 my $_count = 0;
15414 sub get_count { $_count; }
15415 sub _increment_count { ++$_count }
15416 sub _decrement_count { --$_count }
15417 }
15418
15419 # Constructor may be called as a class method
15420 sub new {
15421 my ( $caller, %arg ) = @_;
15422 my $caller_is_obj = ref($caller);
15423 my $class = $caller_is_obj || $caller;
15424 no strict "refs";
15425 my $self = bless [], $class;
15426
15427 $self->[RALIGNMENTS] = [];
15428
15429 my $index;
15430 foreach ( keys %_index_map ) {
15431 $index = $_index_map{$_};
15432 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
15433 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
15434 else { $self->[$index] = $_default_data[$index] }
15435 }
15436
15437 $self->_increment_count();
15438 return $self;
15439 }
15440
15441 sub DESTROY {
15442 $_[0]->_decrement_count();
15443 }
15444
15445 sub get_jmax { $_[0]->[JMAX] }
15446 sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] }
15447 sub get_rtokens { $_[0]->[RTOKENS] }
15448 sub get_rfields { $_[0]->[RFIELDS] }
15449 sub get_rpatterns { $_[0]->[RPATTERNS] }
15450 sub get_indentation { $_[0]->[INDENTATION] }
15451 sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] }
15452 sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] }
15453 sub get_list_type { $_[0]->[LIST_TYPE] }
15454 sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] }
15455 sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
15456
15457 sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
15458 sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
15459 sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
15460 sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
15461
15462 sub get_starting_column {
15463 $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
15464 }
15465
15466 sub increment_column {
15467 $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
15468 }
15469 sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
15470
15471 sub current_field_width {
15472 my $self = shift;
15473 my ($j) = @_;
15474 if ( $j == 0 ) {
15475 return $self->get_column($j);
15476 }
15477 else {
15478 return $self->get_column($j) - $self->get_column( $j - 1 );
15479 }
15480 }
15481
15482 sub field_width_growth {
15483 my $self = shift;
15484 my $j = shift;
15485 return $self->get_column($j) - $self->get_starting_column($j);
15486 }
15487
15488 sub starting_field_width {
15489 my $self = shift;
15490 my $j = shift;
15491 if ( $j == 0 ) {
15492 return $self->get_starting_column($j);
15493 }
15494 else {
15495 return $self->get_starting_column($j) -
15496 $self->get_starting_column( $j - 1 );
15497 }
15498 }
15499
15500 sub increase_field_width {
15501
15502 my $self = shift;
15503 my ( $j, $pad ) = @_;
15504 my $jmax = $self->get_jmax();
15505 for my $k ( $j .. $jmax ) {
15506 $self->increment_column( $k, $pad );
15507 }
15508 }
15509
15510 sub get_available_space_on_right {
15511 my $self = shift;
15512 my $jmax = $self->get_jmax();
15513 return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
15514 }
15515
15516 sub set_jmax { $_[0]->[JMAX] = $_[1] }
15517 sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] }
15518 sub set_rtokens { $_[0]->[RTOKENS] = $_[1] }
15519 sub set_rfields { $_[0]->[RFIELDS] = $_[1] }
15520 sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] }
15521 sub set_indentation { $_[0]->[INDENTATION] = $_[1] }
15522 sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] }
15523 sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] }
15524 sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] }
15525 sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
15526 sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] }
15527
15528}
15529
15530#####################################################################
15531#
15532# the Perl::Tidy::VerticalAligner::Alignment class holds information
15533# on a single column being aligned
15534#
15535#####################################################################
15536package Perl::Tidy::VerticalAligner::Alignment;
15537
15538{
15539
15540 use strict;
15541
15542 #use Carp;
15543
15544 # Symbolic array indexes
15545 use constant COLUMN => 0; # the current column number
15546 use constant STARTING_COLUMN => 1; # column number when created
15547 use constant MATCHING_TOKEN => 2; # what token we are matching
15548 use constant STARTING_LINE => 3; # the line index of creation
15549 use constant ENDING_LINE => 4; # the most recent line to use it
15550 use constant SAVED_COLUMN => 5; # the most recent line to use it
15551 use constant SERIAL_NUMBER => 6; # unique number for this alignment
15552 # (just its index in an array)
15553
15554 # Correspondence between variables and array indexes
15555 my %_index_map;
15556 $_index_map{column} = COLUMN;
15557 $_index_map{starting_column} = STARTING_COLUMN;
15558 $_index_map{matching_token} = MATCHING_TOKEN;
15559 $_index_map{starting_line} = STARTING_LINE;
15560 $_index_map{ending_line} = ENDING_LINE;
15561 $_index_map{saved_column} = SAVED_COLUMN;
15562 $_index_map{serial_number} = SERIAL_NUMBER;
15563
15564 my @_default_data = ();
15565 $_default_data[COLUMN] = undef;
15566 $_default_data[STARTING_COLUMN] = undef;
15567 $_default_data[MATCHING_TOKEN] = undef;
15568 $_default_data[STARTING_LINE] = undef;
15569 $_default_data[ENDING_LINE] = undef;
15570 $_default_data[SAVED_COLUMN] = undef;
15571 $_default_data[SERIAL_NUMBER] = undef;
15572
15573 # class population count
15574 {
15575 my $_count = 0;
15576 sub get_count { $_count; }
15577 sub _increment_count { ++$_count }
15578 sub _decrement_count { --$_count }
15579 }
15580
15581 # constructor
15582 sub new {
15583 my ( $caller, %arg ) = @_;
15584 my $caller_is_obj = ref($caller);
15585 my $class = $caller_is_obj || $caller;
15586 no strict "refs";
15587 my $self = bless [], $class;
15588
15589 foreach ( keys %_index_map ) {
15590 my $index = $_index_map{$_};
15591 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
15592 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
15593 else { $self->[$index] = $_default_data[$index] }
15594 }
15595 $self->_increment_count();
15596 return $self;
15597 }
15598
15599 sub DESTROY {
15600 $_[0]->_decrement_count();
15601 }
15602
15603 sub get_column { return $_[0]->[COLUMN] }
15604 sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
15605 sub get_matching_token { return $_[0]->[MATCHING_TOKEN] }
15606 sub get_starting_line { return $_[0]->[STARTING_LINE] }
15607 sub get_ending_line { return $_[0]->[ENDING_LINE] }
15608 sub get_serial_number { return $_[0]->[SERIAL_NUMBER] }
15609
15610 sub set_column { $_[0]->[COLUMN] = $_[1] }
15611 sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
15612 sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] }
15613 sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] }
15614 sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] }
15615 sub increment_column { $_[0]->[COLUMN] += $_[1] }
15616
15617 sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
15618 sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] }
15619
15620}
15621
15622package Perl::Tidy::VerticalAligner;
15623
15624# The Perl::Tidy::VerticalAligner package collects output lines and
15625# attempts to line up certain common tokens, such as => and #, which are
15626# identified by the calling routine.
15627#
15628# There are two main routines: append_line and flush. Append acts as a
15629# storage buffer, collecting lines into a group which can be vertically
15630# aligned. When alignment is no longer possible or desirable, it dumps
15631# the group to flush.
15632#
15633# append_line -----> flush
15634#
15635# collects writes
15636# vertical one
15637# groups group
15638
15639BEGIN {
15640
15641 # Caution: these debug flags produce a lot of output
15642 # They should all be 0 except when debugging small scripts
15643
15644 use constant VALIGN_DEBUG_FLAG_APPEND => 0;
15645 use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
15646
15647 my $debug_warning = sub {
15648 print "VALIGN_DEBUGGING with key $_[0]\n";
15649 };
15650
15651 VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
15652 VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
15653
15654}
15655
15656use vars qw(
15657 $vertical_aligner_self
15658 $current_line
15659 $maximum_alignment_index
15660 $ralignment_list
15661 $maximum_jmax_seen
15662 $minimum_jmax_seen
15663 $previous_minimum_jmax_seen
15664 $previous_maximum_jmax_seen
15665 $maximum_line_index
15666 $group_level
15667 $group_type
15668 $group_maximum_gap
15669 $marginal_match
15670 $last_group_level_written
15671 $last_leading_space_count
15672 $extra_indent_ok
15673 $zero_count
15674 @group_lines
15675 $last_comment_column
15676 $last_side_comment_line_number
15677 $last_side_comment_length
15678 $last_side_comment_level
15679 $outdented_line_count
15680 $first_outdented_line_at
15681 $last_outdented_line_at
15682 $diagnostics_object
15683 $logger_object
15684 $file_writer_object
15685 @side_comment_history
15686 $comment_leading_space_count
15687
15688 $cached_line_text
15689 $cached_line_type
15690 $cached_line_flag
15691 $cached_seqno
15692 $cached_line_valid
15693
15694 $rOpts
15695
15696 $rOpts_maximum_line_length
15697 $rOpts_continuation_indentation
15698 $rOpts_indent_columns
15699 $rOpts_tabs
15700 $rOpts_entab_leading_whitespace
15701
15702 $rOpts_minimum_space_to_comment
15703
15704);
15705
15706sub initialize {
15707
15708 my $class;
15709
15710 ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
15711 = @_;
15712
15713 # variables describing the entire space group:
15714
15715 $ralignment_list = [];
15716 $group_level = 0;
15717 $last_group_level_written = -1;
15718 $extra_indent_ok = 0; # can we move all lines to the right?
15719 $last_side_comment_length = 0;
15720 $maximum_jmax_seen = 0;
15721 $minimum_jmax_seen = 0;
15722 $previous_minimum_jmax_seen = 0;
15723 $previous_maximum_jmax_seen = 0;
15724
15725 # variables describing each line of the group
15726 @group_lines = (); # list of all lines in group
15727
15728 $outdented_line_count = 0;
15729 $first_outdented_line_at = 0;
15730 $last_outdented_line_at = 0;
15731 $last_side_comment_line_number = 0;
15732 $last_side_comment_level = -1;
15733
15734 # most recent 3 side comments; [ line number, column ]
15735 $side_comment_history[0] = [ -300, 0 ];
15736 $side_comment_history[1] = [ -200, 0 ];
15737 $side_comment_history[2] = [ -100, 0 ];
15738
15739 # write_leader_and_string cache:
15740 $cached_line_text = "";
15741 $cached_line_type = 0;
15742 $cached_line_flag = 0;
15743 $cached_seqno = 0;
15744 $cached_line_valid = 0;
15745
15746 # frequently used parameters
15747 $rOpts_indent_columns = $rOpts->{'indent-columns'};
15748 $rOpts_tabs = $rOpts->{'tabs'};
15749 $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
15750 $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
15751 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
15752
15753 forget_side_comment();
15754
15755 initialize_for_new_group();
15756
15757 $vertical_aligner_self = {};
15758 bless $vertical_aligner_self, $class;
15759 return $vertical_aligner_self;
15760}
15761
15762sub initialize_for_new_group {
15763 $maximum_line_index = -1; # lines in the current group
15764 $maximum_alignment_index = -1; # alignments in current group
15765 $zero_count = 0; # count consecutive lines without tokens
15766 $current_line = undef; # line being matched for alignment
15767 $group_maximum_gap = 0; # largest gap introduced
15768 $group_type = "";
15769 $marginal_match = 0;
15770 $comment_leading_space_count = 0;
15771 $last_leading_space_count = 0;
15772}
15773
15774# interface to Perl::Tidy::Diagnostics routines
15775sub write_diagnostics {
15776 if ($diagnostics_object) {
15777 $diagnostics_object->write_diagnostics(@_);
15778 }
15779}
15780
15781# interface to Perl::Tidy::Logger routines
15782sub warning {
15783 if ($logger_object) {
15784 $logger_object->warning(@_);
15785 }
15786}
15787
15788sub write_logfile_entry {
15789 if ($logger_object) {
15790 $logger_object->write_logfile_entry(@_);
15791 }
15792}
15793
15794sub report_definite_bug {
15795 if ($logger_object) {
15796 $logger_object->report_definite_bug();
15797 }
15798}
15799
15800sub get_SPACES {
15801
15802 # return the number of leading spaces associated with an indentation
15803 # variable $indentation is either a constant number of spaces or an
15804 # object with a get_SPACES method.
15805 my $indentation = shift;
15806 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
15807}
15808
15809sub get_RECOVERABLE_SPACES {
15810
15811 # return the number of spaces (+ means shift right, - means shift left)
15812 # that we would like to shift a group of lines with the same indentation
15813 # to get them to line up with their opening parens
15814 my $indentation = shift;
15815 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
15816}
15817
15818sub get_STACK_DEPTH {
15819
15820 my $indentation = shift;
15821 return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
15822}
15823
15824sub make_alignment {
15825 my ( $col, $token ) = @_;
15826
15827 # make one new alignment at column $col which aligns token $token
15828 ++$maximum_alignment_index;
15829 my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
15830 column => $col,
15831 starting_column => $col,
15832 matching_token => $token,
15833 starting_line => $maximum_line_index,
15834 ending_line => $maximum_line_index,
15835 serial_number => $maximum_alignment_index,
15836 );
15837 $ralignment_list->[$maximum_alignment_index] = $alignment;
15838 return $alignment;
15839}
15840
15841sub dump_alignments {
15842 print
15843"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
15844 for my $i ( 0 .. $maximum_alignment_index ) {
15845 my $column = $ralignment_list->[$i]->get_column();
15846 my $starting_column = $ralignment_list->[$i]->get_starting_column();
15847 my $matching_token = $ralignment_list->[$i]->get_matching_token();
15848 my $starting_line = $ralignment_list->[$i]->get_starting_line();
15849 my $ending_line = $ralignment_list->[$i]->get_ending_line();
15850 print
15851"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
15852 }
15853}
15854
15855sub save_alignment_columns {
15856 for my $i ( 0 .. $maximum_alignment_index ) {
15857 $ralignment_list->[$i]->save_column();
15858 }
15859}
15860
15861sub restore_alignment_columns {
15862 for my $i ( 0 .. $maximum_alignment_index ) {
15863 $ralignment_list->[$i]->restore_column();
15864 }
15865}
15866
15867sub forget_side_comment {
15868 $last_comment_column = 0;
15869}
15870
15871sub append_line {
15872
15873 # sub append is called to place one line in the current vertical group.
15874 #
15875 # The input parameters are:
15876 # $level = indentation level of this line
15877 # $rfields = reference to array of fields
15878 # $rpatterns = reference to array of patterns, one per field
15879 # $rtokens = reference to array of tokens starting fields 1,2,..
15880 #
15881 # Here is an example of what this package does. In this example,
15882 # we are trying to line up both the '=>' and the '#'.
15883 #
15884 # '18' => 'grave', # \`
15885 # '19' => 'acute', # `'
15886 # '20' => 'caron', # \v
15887 # <-tabs-><f1-><--field 2 ---><-f3->
15888 # | | | |
15889 # | | | |
15890 # col1 col2 col3 col4
15891 #
15892 # The calling routine has already broken the entire line into 3 fields as
15893 # indicated. (So the work of identifying promising common tokens has
15894 # already been done).
15895 #
15896 # In this example, there will be 2 tokens being matched: '=>' and '#'.
15897 # They are the leading parts of fields 2 and 3, but we do need to know
15898 # what they are so that we can dump a group of lines when these tokens
15899 # change.
15900 #
15901 # The fields contain the actual characters of each field. The patterns
15902 # are like the fields, but they contain mainly token types instead
15903 # of tokens, so they have fewer characters. They are used to be
15904 # sure we are matching fields of similar type.
15905 #
15906 # In this example, there will be 4 column indexes being adjusted. The
15907 # first one is always at zero. The interior columns are at the start of
15908 # the matching tokens, and the last one tracks the maximum line length.
15909 #
15910 # Basically, each time a new line comes in, it joins the current vertical
15911 # group if possible. Otherwise it causes the current group to be dumped
15912 # and a new group is started.
15913 #
15914 # For each new group member, the column locations are increased, as
15915 # necessary, to make room for the new fields. When the group is finally
15916 # output, these column numbers are used to compute the amount of spaces of
15917 # padding needed for each field.
15918 #
15919 # Programming note: the fields are assumed not to have any tab characters.
15920 # Tabs have been previously removed except for tabs in quoted strings and
15921 # side comments. Tabs in these fields can mess up the column counting.
15922 # The log file warns the user if there are any such tabs.
15923
15924 my (
15925 $level, $level_end,
15926 $indentation, $rfields,
15927 $rtokens, $rpatterns,
15928 $is_forced_break, $outdent_long_lines,
15929 $is_terminal_statement, $do_not_pad,
15930 $rvertical_tightness_flags, $level_jump,
15931 )
15932 = @_;
15933
15934 # number of fields is $jmax
15935 # number of tokens between fields is $jmax-1
15936 my $jmax = $#{$rfields};
15937 $previous_minimum_jmax_seen = $minimum_jmax_seen;
15938 $previous_maximum_jmax_seen = $maximum_jmax_seen;
15939
15940 my $leading_space_count = get_SPACES($indentation);
15941
15942 # set outdented flag to be sure we either align within statements or
15943 # across statement boundaries, but not both.
15944 my $is_outdented = $last_leading_space_count > $leading_space_count;
15945 $last_leading_space_count = $leading_space_count;
15946
15947 # Patch: undo for hanging side comment
15948 my $is_hanging_side_comment =
15949 ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
15950 $is_outdented = 0 if $is_hanging_side_comment;
15951
15952 VALIGN_DEBUG_FLAG_APPEND0 && do {
15953 print
15954"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
15955 };
15956
15957 # Validate cached line if necessary: If we can produce a container
15958 # with just 2 lines total by combining an existing cached opening
15959 # token with the closing token to follow, then we will mark both
15960 # cached flags as valid.
15961 if ($rvertical_tightness_flags) {
15962 if ( $maximum_line_index <= 0
15963 && $cached_line_type
15964 && $rvertical_tightness_flags->[2] == $cached_seqno )
15965 {
15966 $rvertical_tightness_flags->[3] ||= 1;
15967 $cached_line_valid ||= 1;
15968 }
15969 }
15970
15971 # do not join an opening block brace with an unbalanced line
15972 # unless requested with a flag value of 2
15973 if ( $cached_line_type == 3
15974 && $maximum_line_index < 0
15975 && $cached_line_flag < 2
15976 && $level_jump != 0 )
15977 {
15978 $cached_line_valid = 0;
15979 }
15980
15981 # patch until new aligner is finished
15982 if ($do_not_pad) { my_flush() }
15983
15984 # shouldn't happen:
15985 if ( $level < 0 ) { $level = 0 }
15986
15987 # do not align code across indentation level changes
15988 if ( $level != $group_level || $is_outdented ) {
15989
15990 # we are allowed to shift a group of lines to the right if its
15991 # level is greater than the previous and next group
15992 $extra_indent_ok =
15993 ( $level < $group_level && $last_group_level_written < $group_level );
15994
15995 my_flush();
15996
15997 # If we know that this line will get flushed out by itself because
15998 # of level changes, we can leave the extra_indent_ok flag set.
15999 # That way, if we get an external flush call, we will still be
16000 # able to do some -lp alignment if necessary.
16001 $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
16002
16003 $group_level = $level;
16004
16005 # wait until after the above flush to get the leading space
16006 # count because it may have been changed if the -icp flag is in
16007 # effect
16008 $leading_space_count = get_SPACES($indentation);
16009
16010 }
16011
16012 # --------------------------------------------------------------------
16013 # Patch to collect outdentable block COMMENTS
16014 # --------------------------------------------------------------------
16015 my $is_blank_line = "";
16016 my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
16017 if ( $group_type eq 'COMMENT' ) {
16018 if (
16019 (
16020 $is_block_comment
16021 && $outdent_long_lines
16022 && $leading_space_count == $comment_leading_space_count
16023 )
16024 || $is_blank_line
16025 )
16026 {
16027 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
16028 return;
16029 }
16030 else {
16031 my_flush();
16032 }
16033 }
16034
16035 # --------------------------------------------------------------------
16036 # Step 1. Handle simple line of code with no fields to match.
16037 # --------------------------------------------------------------------
16038 if ( $jmax <= 0 ) {
16039 $zero_count++;
16040
16041 if ( $maximum_line_index >= 0
16042 && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
16043 {
16044
16045 # flush the current group if it has some aligned columns..
16046 if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
16047
16048 # flush current group if we are just collecting side comments..
16049 elsif (
16050
16051 # ...and we haven't seen a comment lately
16052 ( $zero_count > 3 )
16053
16054 # ..or if this new line doesn't fit to the left of the comments
16055 || ( ( $leading_space_count + length( $$rfields[0] ) ) >
16056 $group_lines[0]->get_column(0) )
16057 )
16058 {
16059 my_flush();
16060 }
16061 }
16062
16063 # patch to start new COMMENT group if this comment may be outdented
16064 if ( $is_block_comment
16065 && $outdent_long_lines
16066 && $maximum_line_index < 0 )
16067 {
16068 $group_type = 'COMMENT';
16069 $comment_leading_space_count = $leading_space_count;
16070 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
16071 return;
16072 }
16073
16074 # just write this line directly if no current group, no side comment,
16075 # and no space recovery is needed.
16076 if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
16077 {
16078 write_leader_and_string( $leading_space_count, $$rfields[0], 0,
16079 $outdent_long_lines, $rvertical_tightness_flags );
16080 return;
16081 }
16082 }
16083 else {
16084 $zero_count = 0;
16085 }
16086
16087 # programming check: (shouldn't happen)
16088 # an error here implies an incorrect call was made
16089 if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
16090 warning(
16091"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
16092 );
16093 report_definite_bug();
16094 }
16095
16096 # --------------------------------------------------------------------
16097 # create an object to hold this line
16098 # --------------------------------------------------------------------
16099 my $new_line = new Perl::Tidy::VerticalAligner::Line(
16100 jmax => $jmax,
16101 jmax_original_line => $jmax,
16102 rtokens => $rtokens,
16103 rfields => $rfields,
16104 rpatterns => $rpatterns,
16105 indentation => $indentation,
16106 leading_space_count => $leading_space_count,
16107 outdent_long_lines => $outdent_long_lines,
16108 list_type => "",
16109 is_hanging_side_comment => $is_hanging_side_comment,
16110 maximum_line_length => $rOpts->{'maximum-line-length'},
16111 rvertical_tightness_flags => $rvertical_tightness_flags,
16112 );
16113
16114 # --------------------------------------------------------------------
16115 # It simplifies things to create a zero length side comment
16116 # if none exists.
16117 # --------------------------------------------------------------------
16118 make_side_comment( $new_line, $level_end );
16119
16120 # --------------------------------------------------------------------
16121 # Decide if this is a simple list of items.
16122 # There are 3 list types: none, comma, comma-arrow.
16123 # We use this below to be less restrictive in deciding what to align.
16124 # --------------------------------------------------------------------
16125 if ($is_forced_break) {
16126 decide_if_list($new_line);
16127 }
16128
16129 if ($current_line) {
16130
16131 # --------------------------------------------------------------------
16132 # Allow hanging side comment to join current group, if any
16133 # This will help keep side comments aligned, because otherwise we
16134 # will have to start a new group, making alignment less likely.
16135 # --------------------------------------------------------------------
16136 join_hanging_comment( $new_line, $current_line )
16137 if $is_hanging_side_comment;
16138
16139 # --------------------------------------------------------------------
16140 # If there is just one previous line, and it has more fields
16141 # than the new line, try to join fields together to get a match with
16142 # the new line. At the present time, only a single leading '=' is
16143 # allowed to be compressed out. This is useful in rare cases where
16144 # a table is forced to use old breakpoints because of side comments,
16145 # and the table starts out something like this:
16146 # my %MonthChars = ('0', 'Jan', # side comment
16147 # '1', 'Feb',
16148 # '2', 'Mar',
16149 # Eliminating the '=' field will allow the remaining fields to line up.
16150 # This situation does not occur if there are no side comments
16151 # because scan_list would put a break after the opening '('.
16152 # --------------------------------------------------------------------
16153 eliminate_old_fields( $new_line, $current_line );
16154
16155 # --------------------------------------------------------------------
16156 # If the new line has more fields than the current group,
16157 # see if we can match the first fields and combine the remaining
16158 # fields of the new line.
16159 # --------------------------------------------------------------------
16160 eliminate_new_fields( $new_line, $current_line );
16161
16162 # --------------------------------------------------------------------
16163 # Flush previous group unless all common tokens and patterns match..
16164 # --------------------------------------------------------------------
16165 check_match( $new_line, $current_line );
16166
16167 # --------------------------------------------------------------------
16168 # See if there is space for this line in the current group (if any)
16169 # --------------------------------------------------------------------
16170 if ($current_line) {
16171 check_fit( $new_line, $current_line );
16172 }
16173 }
16174
16175 # --------------------------------------------------------------------
16176 # Append this line to the current group (or start new group)
16177 # --------------------------------------------------------------------
16178 accept_line($new_line);
16179
16180 # Future update to allow this to vary:
16181 $current_line = $new_line if ( $maximum_line_index == 0 );
16182
16183 # --------------------------------------------------------------------
16184 # Step 8. Some old debugging stuff
16185 # --------------------------------------------------------------------
16186 VALIGN_DEBUG_FLAG_APPEND && do {
16187 print "APPEND fields:";
16188 dump_array(@$rfields);
16189 print "APPEND tokens:";
16190 dump_array(@$rtokens);
16191 print "APPEND patterns:";
16192 dump_array(@$rpatterns);
16193 dump_alignments();
16194 };
16195}
16196
16197sub join_hanging_comment {
16198
16199 my $line = shift;
16200 my $jmax = $line->get_jmax();
16201 return 0 unless $jmax == 1; # must be 2 fields
16202 my $rtokens = $line->get_rtokens();
16203 return 0 unless $$rtokens[0] eq '#'; # the second field is a comment..
16204 my $rfields = $line->get_rfields();
16205 return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty...
16206 my $old_line = shift;
16207 my $maximum_field_index = $old_line->get_jmax();
16208 return 0
16209 unless $maximum_field_index > $jmax; # the current line has more fields
16210 my $rpatterns = $line->get_rpatterns();
16211
16212 $line->set_is_hanging_side_comment(1);
16213 $jmax = $maximum_field_index;
16214 $line->set_jmax($jmax);
16215 $$rfields[$jmax] = $$rfields[1];
16216 $$rtokens[ $jmax - 1 ] = $$rtokens[0];
16217 $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
16218 for ( my $j = 1 ; $j < $jmax ; $j++ ) {
16219 $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why?
16220 $$rtokens[ $j - 1 ] = "";
16221 $$rpatterns[ $j - 1 ] = "";
16222 }
16223 return 1;
16224}
16225
16226sub eliminate_old_fields {
16227
16228 my $new_line = shift;
16229 my $jmax = $new_line->get_jmax();
16230 if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
16231 if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
16232
16233 # there must be one previous line
16234 return unless ( $maximum_line_index == 0 );
16235
16236 my $old_line = shift;
16237 my $maximum_field_index = $old_line->get_jmax();
16238
16239 # this line must have fewer fields
16240 return unless $maximum_field_index > $jmax;
16241
16242 # Identify specific cases where field elimination is allowed:
16243 # case=1: both lines have comma-separated lists, and the first
16244 # line has an equals
16245 # case=2: both lines have leading equals
16246
16247 # case 1 is the default
16248 my $case = 1;
16249
16250 # See if case 2: both lines have leading '='
16251 # We'll require smiliar leading patterns in this case
16252 my $old_rtokens = $old_line->get_rtokens();
16253 my $rtokens = $new_line->get_rtokens();
16254 my $rpatterns = $new_line->get_rpatterns();
16255 my $old_rpatterns = $old_line->get_rpatterns();
16256 if ( $rtokens->[0] =~ /^=\d*$/
16257 && $old_rtokens->[0] eq $rtokens->[0]
16258 && $old_rpatterns->[0] eq $rpatterns->[0] )
16259 {
16260 $case = 2;
16261 }
16262
16263 # not too many fewer fields in new line for case 1
16264 return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
16265
16266 # case 1 must have side comment
16267 my $old_rfields = $old_line->get_rfields();
16268 return
16269 if ( $case == 1
16270 && length( $$old_rfields[$maximum_field_index] ) == 0 );
16271
16272 my $rfields = $new_line->get_rfields();
16273
16274 my $hid_equals = 0;
16275
16276 my @new_alignments = ();
16277 my @new_fields = ();
16278 my @new_matching_patterns = ();
16279 my @new_matching_tokens = ();
16280
16281 my $j = 0;
16282 my $k;
16283 my $current_field = '';
16284 my $current_pattern = '';
16285
16286 # loop over all old tokens
16287 my $in_match = 0;
16288 for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
16289 $current_field .= $$old_rfields[$k];
16290 $current_pattern .= $$old_rpatterns[$k];
16291 last if ( $j > $jmax - 1 );
16292
16293 if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
16294 $in_match = 1;
16295 $new_fields[$j] = $current_field;
16296 $new_matching_patterns[$j] = $current_pattern;
16297 $current_field = '';
16298 $current_pattern = '';
16299 $new_matching_tokens[$j] = $$old_rtokens[$k];
16300 $new_alignments[$j] = $old_line->get_alignment($k);
16301 $j++;
16302 }
16303 else {
16304
16305 if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
16306 last if ( $case == 2 ); # avoid problems with stuff
16307 # like: $a=$b=$c=$d;
16308 $hid_equals = 1;
16309 }
16310 last
16311 if ( $in_match && $case == 1 )
16312 ; # disallow gaps in matching field types in case 1
16313 }
16314 }
16315
16316 # Modify the current state if we are successful.
16317 # We must exactly reach the ends of both lists for success.
16318 if ( ( $j == $jmax )
16319 && ( $current_field eq '' )
16320 && ( $case != 1 || $hid_equals ) )
16321 {
16322 $k = $maximum_field_index;
16323 $current_field .= $$old_rfields[$k];
16324 $current_pattern .= $$old_rpatterns[$k];
16325 $new_fields[$j] = $current_field;
16326 $new_matching_patterns[$j] = $current_pattern;
16327
16328 $new_alignments[$j] = $old_line->get_alignment($k);
16329 $maximum_field_index = $j;
16330
16331 $old_line->set_alignments(@new_alignments);
16332 $old_line->set_jmax($jmax);
16333 $old_line->set_rtokens( \@new_matching_tokens );
16334 $old_line->set_rfields( \@new_fields );
16335 $old_line->set_rpatterns( \@$rpatterns );
16336 }
16337}
16338
16339# create an empty side comment if none exists
16340sub make_side_comment {
16341 my $new_line = shift;
16342 my $level_end = shift;
16343 my $jmax = $new_line->get_jmax();
16344 my $rtokens = $new_line->get_rtokens();
16345
16346 # if line does not have a side comment...
16347 if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
16348 my $rfields = $new_line->get_rfields();
16349 my $rpatterns = $new_line->get_rpatterns();
16350 $$rtokens[$jmax] = '#';
16351 $$rfields[ ++$jmax ] = '';
16352 $$rpatterns[$jmax] = '#';
16353 $new_line->set_jmax($jmax);
16354 $new_line->set_jmax_original_line($jmax);
16355 }
16356
16357 # line has a side comment..
16358 else {
16359
16360 # don't remember old side comment location for very long
16361 my $line_number = $vertical_aligner_self->get_output_line_number();
16362 my $rfields = $new_line->get_rfields();
16363 if (
16364 $line_number - $last_side_comment_line_number > 12
16365
16366 # and don't remember comment location across block level changes
16367 || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
16368 )
16369 {
16370 forget_side_comment();
16371 }
16372 $last_side_comment_line_number = $line_number;
16373 $last_side_comment_level = $level_end;
16374 }
16375}
16376
16377sub decide_if_list {
16378
16379 my $line = shift;
16380
16381 # A list will be taken to be a line with a forced break in which all
16382 # of the field separators are commas or comma-arrows (except for the
16383 # trailing #)
16384
16385 # List separator tokens are things like ',3' or '=>2',
16386 # where the trailing digit is the nesting depth. Allow braces
16387 # to allow nested list items.
16388 my $rtokens = $line->get_rtokens();
16389 my $test_token = $$rtokens[0];
16390 if ( $test_token =~ /^(\,|=>)/ ) {
16391 my $list_type = $test_token;
16392 my $jmax = $line->get_jmax();
16393
16394 foreach ( 1 .. $jmax - 2 ) {
16395 if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
16396 $list_type = "";
16397 last;
16398 }
16399 }
16400 $line->set_list_type($list_type);
16401 }
16402}
16403
16404sub eliminate_new_fields {
16405
16406 return unless ( $maximum_line_index >= 0 );
16407 my $new_line = shift;
16408 my $old_line = shift;
16409 my $jmax = $new_line->get_jmax();
16410
16411 my $old_rtokens = $old_line->get_rtokens();
16412 my $rtokens = $new_line->get_rtokens();
16413 my $is_assignment =
16414 ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
16415
16416 # must be monotonic variation
16417 return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
16418
16419 # must be more fields in the new line
16420 my $maximum_field_index = $old_line->get_jmax();
16421 return unless ( $maximum_field_index < $jmax );
16422
16423 unless ($is_assignment) {
16424 return
16425 unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
16426 ; # only if monotonic
16427
16428 # never combine fields of a comma list
16429 return
16430 unless ( $maximum_field_index > 1 )
16431 && ( $new_line->get_list_type() !~ /^,/ );
16432 }
16433
16434 my $rfields = $new_line->get_rfields();
16435 my $rpatterns = $new_line->get_rpatterns();
16436 my $old_rpatterns = $old_line->get_rpatterns();
16437
16438 # loop over all old tokens except comment
16439 my $match = 1;
16440 my $k;
16441 for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
16442 if ( ( $$old_rtokens[$k] ne $$rtokens[$k] )
16443 || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
16444 {
16445 $match = 0;
16446 last;
16447 }
16448 }
16449
16450 # first tokens agree, so combine new tokens
16451 if ($match) {
16452 for $k ( $maximum_field_index .. $jmax - 1 ) {
16453
16454 $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
16455 $$rfields[$k] = "";
16456 $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
16457 $$rpatterns[$k] = "";
16458 }
16459
16460 $$rtokens[ $maximum_field_index - 1 ] = '#';
16461 $$rfields[$maximum_field_index] = $$rfields[$jmax];
16462 $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax];
16463 $jmax = $maximum_field_index;
16464 }
16465 $new_line->set_jmax($jmax);
16466}
16467
16468sub check_match {
16469
16470 my $new_line = shift;
16471 my $old_line = shift;
16472
16473 my $jmax = $new_line->get_jmax();
16474 my $maximum_field_index = $old_line->get_jmax();
16475
16476 # flush if this line has too many fields
16477 if ( $jmax > $maximum_field_index ) { my_flush(); return }
16478
16479 # flush if adding this line would make a non-monotonic field count
16480 if (
16481 ( $maximum_field_index > $jmax ) # this has too few fields
16482 && (
16483 ( $previous_minimum_jmax_seen < $jmax ) # and wouldn't be monotonic
16484 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
16485 )
16486 )
16487 {
16488 my_flush();
16489 return;
16490 }
16491
16492 # otherwise append this line if everything matches
16493 my $jmax_original_line = $new_line->get_jmax_original_line();
16494 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
16495 my $rtokens = $new_line->get_rtokens();
16496 my $rfields = $new_line->get_rfields();
16497 my $rpatterns = $new_line->get_rpatterns();
16498 my $list_type = $new_line->get_list_type();
16499
16500 my $group_list_type = $old_line->get_list_type();
16501 my $old_rpatterns = $old_line->get_rpatterns();
16502 my $old_rtokens = $old_line->get_rtokens();
16503
16504 my $jlimit = $jmax - 1;
16505 if ( $maximum_field_index > $jmax ) {
16506 $jlimit = $jmax_original_line;
16507 --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
16508 }
16509
16510 my $everything_matches = 1;
16511
16512 # common list types always match
16513 unless ( ( $group_list_type && ( $list_type eq $group_list_type ) )
16514 || $is_hanging_side_comment )
16515 {
16516
16517 my $leading_space_count = $new_line->get_leading_space_count();
16518 my $saw_equals = 0;
16519 for my $j ( 0 .. $jlimit ) {
16520 my $match = 1;
16521
16522 my $old_tok = $$old_rtokens[$j];
16523 my $new_tok = $$rtokens[$j];
16524
16525 # dumb down the match after an equals
16526 if ( $saw_equals && $new_tok =~ /(.*)\+/ ) {
16527 $new_tok = $1;
16528 $old_tok =~ s/\+.*$//;
16529 }
16530 if ( $new_tok =~ /^=\d*$/ ) { $saw_equals = 1 }
16531
16532 # we never match if the matching tokens differ
16533 if ( $j < $jlimit
16534 && $old_tok ne $new_tok )
16535 {
16536 $match = 0;
16537 }
16538
16539 # otherwise, if patterns match, we always have a match.
16540 # However, if patterns don't match, we have to be careful...
16541 elsif ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
16542
16543 # We have to be very careful about aligning commas when the
16544 # pattern's don't match, because it can be worse to create an
16545 # alignment where none is needed than to omit one. The current
16546 # rule: if we are within a matching sub call (indicated by '+'
16547 # in the matching token), we'll allow a marginal match, but
16548 # otherwise not.
16549 #
16550 # Here's an example where we'd like to align the '='
16551 # my $cfile = File::Spec->catfile( 't', 'callext.c' );
16552 # my $inc = File::Spec->catdir( 'Basic', 'Core' );
16553 # because the function names differ.
16554 # Future alignment logic should make this unnecessary.
16555 #
16556 # Here's an example where the ','s are not contained in a call.
16557 # The first line below should probably not match the next two:
16558 # ( $a, $b ) = ( $b, $r );
16559 # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
16560 # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
16561 if ( $new_tok =~ /^,/ ) {
16562 if ( $$rtokens[$j] =~ /[A-Za-z]/ ) {
16563 $marginal_match = 1;
16564 }
16565 else {
16566 $match = 0;
16567 }
16568 }
16569
16570 # parens don't align well unless patterns match
16571 elsif ( $new_tok =~ /^\(/ ) {
16572 $match = 0;
16573 }
16574
16575 # Handle an '=' alignment with different patterns to
16576 # the left.
16577 elsif ( $new_tok =~ /^=\d*$/ ) {
16578
16579 $saw_equals = 1;
16580
16581 # It is best to be a little restrictive when
16582 # aligning '=' tokens. Here is an example of
16583 # two lines that we will not align:
16584 # my $variable=6;
16585 # $bb=4;
16586 # The problem is that one is a 'my' declaration,
16587 # and the other isn't, so they're not very similar.
16588 # We will filter these out by comparing the first
16589 # letter of the pattern. This is crude, but works
16590 # well enough.
16591 if (
16592 substr( $$old_rpatterns[$j], 0, 1 ) ne
16593 substr( $$rpatterns[$j], 0, 1 ) )
16594 {
16595 $match = 0;
16596 }
16597
16598 # If we pass that test, we'll call it a marginal match.
16599 # Here is an example of a marginal match:
16600 # $done{$$op} = 1;
16601 # $op = compile_bblock($op);
16602 # The left tokens are both identifiers, but
16603 # one accesses a hash and the other doesn't.
16604 # We'll let this be a tentative match and undo
16605 # it later if we don't find more than 2 lines
16606 # in the group.
16607 elsif ( $maximum_line_index == 0 ) {
16608 $marginal_match = 1;
16609 }
16610 }
16611 }
16612
16613 # Don't let line with fewer fields increase column widths
16614 # ( align3.t )
16615 if ( $maximum_field_index > $jmax ) {
16616 my $pad =
16617 length( $$rfields[$j] ) - $old_line->current_field_width($j);
16618
16619 if ( $j == 0 ) {
16620 $pad += $leading_space_count;
16621 }
16622
16623 # TESTING: suspend this rule to allow last lines to join
16624 if ( $pad > 0 ) { $match = 0; }
16625 }
16626
16627 unless ($match) {
16628 $everything_matches = 0;
16629 last;
16630 }
16631 }
16632 }
16633
16634 if ( $maximum_field_index > $jmax ) {
16635
16636 if ($everything_matches) {
16637
16638 my $comment = $$rfields[$jmax];
16639 for $jmax ( $jlimit .. $maximum_field_index ) {
16640 $$rtokens[$jmax] = $$old_rtokens[$jmax];
16641 $$rfields[ ++$jmax ] = '';
16642 $$rpatterns[$jmax] = $$old_rpatterns[$jmax];
16643 }
16644 $$rfields[$jmax] = $comment;
16645 $new_line->set_jmax($jmax);
16646 }
16647 }
16648
16649 my_flush() unless ($everything_matches);
16650}
16651
16652sub check_fit {
16653
16654 return unless ( $maximum_line_index >= 0 );
16655 my $new_line = shift;
16656 my $old_line = shift;
16657
16658 my $jmax = $new_line->get_jmax();
16659 my $leading_space_count = $new_line->get_leading_space_count();
16660 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
16661 my $rtokens = $new_line->get_rtokens();
16662 my $rfields = $new_line->get_rfields();
16663 my $rpatterns = $new_line->get_rpatterns();
16664
16665 my $group_list_type = $group_lines[0]->get_list_type();
16666
16667 my $padding_so_far = 0;
16668 my $padding_available = $old_line->get_available_space_on_right();
16669
16670 # save current columns in case this doesn't work
16671 save_alignment_columns();
16672
16673 my ( $j, $pad, $eight );
16674 my $maximum_field_index = $old_line->get_jmax();
16675 for $j ( 0 .. $jmax ) {
16676
16677 ## testing patch to avoid excessive gaps in previous lines,
16678 # due to a line of fewer fields.
16679 # return join( ".",
16680 # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
16681 # $self->{"area"}, $self->{"id"}, $self->{"sel"} );
16682 ## MOVED BELOW AS A TEST
16683 ##next if ($jmax < $maximum_field_index && $j==$jmax-1);
16684
16685 $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
16686
16687 if ( $j == 0 ) {
16688 $pad += $leading_space_count;
16689 }
16690
16691 # remember largest gap of the group, excluding gap to side comment
16692 if ( $pad < 0
16693 && $group_maximum_gap < -$pad
16694 && $j > 0
16695 && $j < $jmax - 1 )
16696 {
16697 $group_maximum_gap = -$pad;
16698 }
16699
16700 next if $pad < 0;
16701
16702 # This line will need space; lets see if we want to accept it..
16703 if (
16704
16705 # not if this won't fit
16706 ( $pad > $padding_available )
16707
16708 # previously, there were upper bounds placed on padding here
16709 # (maximum_whitespace_columns), but they were not really helpful
16710
16711 )
16712 {
16713
16714 # revert to starting state then flush; things didn't work out
16715 restore_alignment_columns();
16716 my_flush();
16717 last;
16718 }
16719
16720 # TESTING PATCH moved from above to be sure we fit
16721 next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
16722
16723 # looks ok, squeeze this field in
16724 $old_line->increase_field_width( $j, $pad );
16725 $padding_available -= $pad;
16726
16727 # remember largest gap of the group, excluding gap to side comment
16728 if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
16729 $group_maximum_gap = $pad;
16730 }
16731 }
16732}
16733
16734sub accept_line {
16735
16736 my $new_line = shift;
16737 $group_lines[ ++$maximum_line_index ] = $new_line;
16738
16739 # initialize field lengths if starting new group
16740 if ( $maximum_line_index == 0 ) {
16741
16742 my $jmax = $new_line->get_jmax();
16743 my $rfields = $new_line->get_rfields();
16744 my $rtokens = $new_line->get_rtokens();
16745 my $j;
16746 my $col = $new_line->get_leading_space_count();
16747
16748 for $j ( 0 .. $jmax ) {
16749 $col += length( $$rfields[$j] );
16750
16751 # create initial alignments for the new group
16752 my $token = "";
16753 if ( $j < $jmax ) { $token = $$rtokens[$j] }
16754 my $alignment = make_alignment( $col, $token );
16755 $new_line->set_alignment( $j, $alignment );
16756 }
16757
16758 $maximum_jmax_seen = $jmax;
16759 $minimum_jmax_seen = $jmax;
16760 }
16761
16762 # use previous alignments otherwise
16763 else {
16764 my @new_alignments =
16765 $group_lines[ $maximum_line_index - 1 ]->get_alignments();
16766 $new_line->set_alignments(@new_alignments);
16767 }
16768}
16769
16770sub dump_array {
16771
16772 # debug routine to dump array contents
16773 local $" = ')(';
16774 print "(@_)\n";
16775}
16776
16777# flush() sends the current Perl::Tidy::VerticalAligner group down the
16778# pipeline to Perl::Tidy::FileWriter.
16779
16780# This is the external flush, which also empties the cache
16781sub flush {
16782
16783 if ( $maximum_line_index < 0 ) {
16784 if ($cached_line_type) {
16785 $file_writer_object->write_code_line( $cached_line_text . "\n" );
16786 $cached_line_type = 0;
16787 $cached_line_text = "";
16788 }
16789 }
16790 else {
16791 my_flush();
16792 }
16793}
16794
16795# This is the internal flush, which leaves the cache intact
16796sub my_flush {
16797
16798 return if ( $maximum_line_index < 0 );
16799
16800 # handle a group of comment lines
16801 if ( $group_type eq 'COMMENT' ) {
16802
16803 VALIGN_DEBUG_FLAG_APPEND0 && do {
16804 my ( $a, $b, $c ) = caller();
16805 print
16806"APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
16807
16808 };
16809 my $leading_space_count = $comment_leading_space_count;
16810 my $leading_string = get_leading_string($leading_space_count);
16811
16812 # zero leading space count if any lines are too long
16813 my $max_excess = 0;
16814 for my $i ( 0 .. $maximum_line_index ) {
16815 my $str = $group_lines[$i];
16816 my $excess =
16817 length($str) + $leading_space_count - $rOpts_maximum_line_length;
16818 if ( $excess > $max_excess ) {
16819 $max_excess = $excess;
16820 }
16821 }
16822
16823 if ( $max_excess > 0 ) {
16824 $leading_space_count -= $max_excess;
16825 if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
16826 $last_outdented_line_at =
16827 $file_writer_object->get_output_line_number();
16828 unless ($outdented_line_count) {
16829 $first_outdented_line_at = $last_outdented_line_at;
16830 }
16831 $outdented_line_count += ( $maximum_line_index + 1 );
16832 }
16833
16834 # write the group of lines
16835 my $outdent_long_lines = 0;
16836 for my $i ( 0 .. $maximum_line_index ) {
16837 write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
16838 $outdent_long_lines, "" );
16839 }
16840 }
16841
16842 # handle a group of code lines
16843 else {
16844
16845 VALIGN_DEBUG_FLAG_APPEND0 && do {
16846 my $group_list_type = $group_lines[0]->get_list_type();
16847 my ( $a, $b, $c ) = caller();
16848 my $maximum_field_index = $group_lines[0]->get_jmax();
16849 print
16850"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
16851
16852 };
16853
16854 # some small groups are best left unaligned
16855 my $do_not_align = decide_if_aligned();
16856
16857 # optimize side comment location
16858 $do_not_align = adjust_side_comment($do_not_align);
16859
16860 # recover spaces for -lp option if possible
16861 my $extra_leading_spaces = get_extra_leading_spaces();
16862
16863 # all lines of this group have the same basic leading spacing
16864 my $group_leader_length = $group_lines[0]->get_leading_space_count();
16865
16866 # add extra leading spaces if helpful
16867 my $min_ci_gap =
16868 improve_continuation_indentation( $do_not_align,
16869 $group_leader_length );
16870
16871 # loop to output all lines
16872 for my $i ( 0 .. $maximum_line_index ) {
16873 my $line = $group_lines[$i];
16874 write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
16875 $group_leader_length, $extra_leading_spaces );
16876 }
16877 }
16878 initialize_for_new_group();
16879}
16880
16881sub decide_if_aligned {
16882
16883 # Do not try to align two lines which are not really similar
16884 return unless $maximum_line_index == 1;
16885
16886 my $group_list_type = $group_lines[0]->get_list_type();
16887
16888 my $do_not_align = (
16889
16890 # always align lists
16891 !$group_list_type
16892
16893 && (
16894
16895 # don't align if it was just a marginal match
16896 $marginal_match
16897
16898 # don't align two lines with big gap
16899 || $group_maximum_gap > 12
16900
16901 # or lines with differing number of alignment tokens
16902 || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
16903 )
16904 );
16905
16906 # But try to convert them into a simple comment group if the first line
16907 # a has side comment
16908 my $rfields = $group_lines[0]->get_rfields();
16909 my $maximum_field_index = $group_lines[0]->get_jmax();
16910 if ( $do_not_align
16911 && ( $maximum_line_index > 0 )
16912 && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
16913 {
16914 combine_fields();
16915 $do_not_align = 0;
16916 }
16917 return $do_not_align;
16918}
16919
16920sub adjust_side_comment {
16921
16922 my $do_not_align = shift;
16923
16924 # let's see if we can move the side comment field out a little
16925 # to improve readability (the last field is always a side comment field)
16926 my $have_side_comment = 0;
16927 my $first_side_comment_line = -1;
16928 my $maximum_field_index = $group_lines[0]->get_jmax();
16929 for my $i ( 0 .. $maximum_line_index ) {
16930 my $line = $group_lines[$i];
16931
16932 if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
16933 $have_side_comment = 1;
16934 $first_side_comment_line = $i;
16935 last;
16936 }
16937 }
16938
16939 my $kmax = $maximum_field_index + 1;
16940
16941 if ($have_side_comment) {
16942
16943 my $line = $group_lines[0];
16944
16945 # the maximum space without exceeding the line length:
16946 my $avail = $line->get_available_space_on_right();
16947
16948 # try to use the previous comment column
16949 my $side_comment_column = $line->get_column( $kmax - 2 );
16950 my $move = $last_comment_column - $side_comment_column;
16951
16952## my $sc_line0 = $side_comment_history[0]->[0];
16953## my $sc_col0 = $side_comment_history[0]->[1];
16954## my $sc_line1 = $side_comment_history[1]->[0];
16955## my $sc_col1 = $side_comment_history[1]->[1];
16956## my $sc_line2 = $side_comment_history[2]->[0];
16957## my $sc_col2 = $side_comment_history[2]->[1];
16958##
16959## # FUTURE UPDATES:
16960## # Be sure to ignore 'do not align' and '} # end comments'
16961## # Find first $move > 0 and $move <= $avail as follows:
16962## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
16963## # 2. try sc_col2 if (line-sc_line2) < 12
16964## # 3. try min possible space, plus up to 8,
16965## # 4. try min possible space
16966
16967 if ( $kmax > 0 && !$do_not_align ) {
16968
16969 # but if this doesn't work, give up and use the minimum space
16970 if ( $move > $avail ) {
16971 $move = $rOpts_minimum_space_to_comment - 1;
16972 }
16973
16974 # but we want some minimum space to the comment
16975 my $min_move = $rOpts_minimum_space_to_comment - 1;
16976 if ( $move >= 0
16977 && $last_side_comment_length > 0
16978 && ( $first_side_comment_line == 0 )
16979 && $group_level == $last_group_level_written )
16980 {
16981 $min_move = 0;
16982 }
16983
16984 if ( $move < $min_move ) {
16985 $move = $min_move;
16986 }
16987
16988 # prevously, an upper bound was placed on $move here,
16989 # (maximum_space_to_comment), but it was not helpful
16990
16991 # don't exceed the available space
16992 if ( $move > $avail ) { $move = $avail }
16993
16994 # we can only increase space, never decrease
16995 if ( $move > 0 ) {
16996 $line->increase_field_width( $maximum_field_index - 1, $move );
16997 }
16998
16999 # remember this column for the next group
17000 $last_comment_column = $line->get_column( $kmax - 2 );
17001 }
17002 else {
17003
17004 # try to at least line up the existing side comment location
17005 if ( $kmax > 0 && $move > 0 && $move < $avail ) {
17006 $line->increase_field_width( $maximum_field_index - 1, $move );
17007 $do_not_align = 0;
17008 }
17009
17010 # reset side comment column if we can't align
17011 else {
17012 forget_side_comment();
17013 }
17014 }
17015 }
17016 return $do_not_align;
17017}
17018
17019sub improve_continuation_indentation {
17020 my ( $do_not_align, $group_leader_length ) = @_;
17021
17022 # See if we can increase the continuation indentation
17023 # to move all continuation lines closer to the next field
17024 # (unless it is a comment).
17025 #
17026 # '$min_ci_gap'is the extra indentation that we may need to introduce.
17027 # We will only introduce this to fields which already have some ci.
17028 # Without this variable, we would occasionally get something like this
17029 # (Complex.pm):
17030 #
17031 # use overload '+' => \&plus,
17032 # '-' => \&minus,
17033 # '*' => \&multiply,
17034 # ...
17035 # 'tan' => \&tan,
17036 # 'atan2' => \&atan2,
17037 #
17038 # Whereas with this variable, we can shift variables over to get this:
17039 #
17040 # use overload '+' => \&plus,
17041 # '-' => \&minus,
17042 # '*' => \&multiply,
17043 # ...
17044 # 'tan' => \&tan,
17045 # 'atan2' => \&atan2,
17046
17047 ## BUB: Deactivated####################
17048 # The trouble with this patch is that it may, for example,
17049 # move in some 'or's or ':'s, and leave some out, so that the
17050 # left edge alignment suffers.
17051 return 0;
17052 ###########################################
17053
17054 my $maximum_field_index = $group_lines[0]->get_jmax();
17055
17056 my $min_ci_gap = $rOpts_maximum_line_length;
17057 if ( $maximum_field_index > 1 && !$do_not_align ) {
17058
17059 for my $i ( 0 .. $maximum_line_index ) {
17060 my $line = $group_lines[$i];
17061 my $leading_space_count = $line->get_leading_space_count();
17062 my $rfields = $line->get_rfields();
17063
17064 my $gap = $line->get_column(0) - $leading_space_count -
17065 length( $$rfields[0] );
17066
17067 if ( $leading_space_count > $group_leader_length ) {
17068 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
17069 }
17070 }
17071
17072 if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
17073 $min_ci_gap = 0;
17074 }
17075 }
17076 else {
17077 $min_ci_gap = 0;
17078 }
17079 return $min_ci_gap;
17080}
17081
17082sub write_vertically_aligned_line {
17083
17084 my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
17085 $extra_leading_spaces )
17086 = @_;
17087 my $rfields = $line->get_rfields();
17088 my $leading_space_count = $line->get_leading_space_count();
17089 my $outdent_long_lines = $line->get_outdent_long_lines();
17090 my $maximum_field_index = $line->get_jmax();
17091 my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
17092
17093 # add any extra spaces
17094 if ( $leading_space_count > $group_leader_length ) {
17095 $leading_space_count += $min_ci_gap;
17096 }
17097
17098 my $str = $$rfields[0];
17099
17100 # loop to concatenate all fields of this line and needed padding
17101 my $total_pad_count = 0;
17102 my ( $j, $pad );
17103 for $j ( 1 .. $maximum_field_index ) {
17104
17105 # skip zero-length side comments
17106 last
17107 if ( ( $j == $maximum_field_index )
17108 && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
17109 );
17110
17111 # compute spaces of padding before this field
17112 my $col = $line->get_column( $j - 1 );
17113 $pad = $col - ( length($str) + $leading_space_count );
17114
17115 if ($do_not_align) {
17116 $pad =
17117 ( $j < $maximum_field_index )
17118 ? 0
17119 : $rOpts_minimum_space_to_comment - 1;
17120 }
17121
17122 # accumulate the padding
17123 if ( $pad > 0 ) { $total_pad_count += $pad; }
17124
17125 # add this field
17126 if ( !defined $$rfields[$j] ) {
17127 write_diagnostics("UNDEFined field at j=$j\n");
17128 }
17129
17130 # only add padding when we have a finite field;
17131 # this avoids extra terminal spaces if we have empty fields
17132 if ( length( $$rfields[$j] ) > 0 ) {
17133 $str .= ' ' x $total_pad_count;
17134 $total_pad_count = 0;
17135 $str .= $$rfields[$j];
17136 }
17137
17138 # update side comment history buffer
17139 if ( $j == $maximum_field_index ) {
17140 my $lineno = $file_writer_object->get_output_line_number();
17141 shift @side_comment_history;
17142 push @side_comment_history, [ $lineno, $col ];
17143 }
17144 }
17145
17146 my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
17147
17148 # ship this line off
17149 write_leader_and_string( $leading_space_count + $extra_leading_spaces,
17150 $str, $side_comment_length, $outdent_long_lines,
17151 $rvertical_tightness_flags );
17152}
17153
17154sub get_extra_leading_spaces {
17155
17156 #----------------------------------------------------------
17157 # Define any extra indentation space (for the -lp option).
17158 # Here is why:
17159 # If a list has side comments, sub scan_list must dump the
17160 # list before it sees everything. When this happens, it sets
17161 # the indentation to the standard scheme, but notes how
17162 # many spaces it would have liked to use. We may be able
17163 # to recover that space here in the event that that all of the
17164 # lines of a list are back together again.
17165 #----------------------------------------------------------
17166
17167 my $extra_leading_spaces = 0;
17168 if ($extra_indent_ok) {
17169 my $object = $group_lines[0]->get_indentation();
17170 if ( ref($object) ) {
17171 my $extra_indentation_spaces_wanted =
17172 get_RECOVERABLE_SPACES($object);
17173
17174 # all indentation objects must be the same
17175 my $i;
17176 for $i ( 1 .. $maximum_line_index ) {
17177 if ( $object != $group_lines[$i]->get_indentation() ) {
17178 $extra_indentation_spaces_wanted = 0;
17179 last;
17180 }
17181 }
17182
17183 if ($extra_indentation_spaces_wanted) {
17184
17185 # the maximum space without exceeding the line length:
17186 my $avail = $group_lines[0]->get_available_space_on_right();
17187 $extra_leading_spaces =
17188 ( $avail > $extra_indentation_spaces_wanted )
17189 ? $extra_indentation_spaces_wanted
17190 : $avail;
17191
17192 # update the indentation object because with -icp the terminal
17193 # ');' will use the same adjustment.
17194 $object->permanently_decrease_AVAILABLE_SPACES(
17195 -$extra_leading_spaces );
17196 }
17197 }
17198 }
17199 return $extra_leading_spaces;
17200}
17201
17202sub combine_fields {
17203
17204 # combine all fields except for the comment field ( sidecmt.t )
17205 my ( $j, $k );
17206 my $maximum_field_index = $group_lines[0]->get_jmax();
17207 for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
17208 my $line = $group_lines[$j];
17209 my $rfields = $line->get_rfields();
17210 foreach ( 1 .. $maximum_field_index - 1 ) {
17211 $$rfields[0] .= $$rfields[$_];
17212 }
17213 $$rfields[1] = $$rfields[$maximum_field_index];
17214
17215 $line->set_jmax(1);
17216 $line->set_column( 0, 0 );
17217 $line->set_column( 1, 0 );
17218
17219 }
17220 $maximum_field_index = 1;
17221
17222 for $j ( 0 .. $maximum_line_index ) {
17223 my $line = $group_lines[$j];
17224 my $rfields = $line->get_rfields();
17225 for $k ( 0 .. $maximum_field_index ) {
17226 my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
17227 if ( $k == 0 ) {
17228 $pad += $group_lines[$j]->get_leading_space_count();
17229 }
17230
17231 if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
17232
17233 }
17234 }
17235}
17236
17237sub get_output_line_number {
17238
17239 # the output line number reported to a caller is the number of items
17240 # written plus the number of items in the buffer
17241 my $self = shift;
17242 1 + $maximum_line_index + $file_writer_object->get_output_line_number();
17243}
17244
17245sub write_leader_and_string {
17246
17247 my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
17248 $rvertical_tightness_flags )
17249 = @_;
17250
17251 my $leading_string = get_leading_string($leading_space_count);
17252
17253 # handle outdenting of long lines:
17254 if ($outdent_long_lines) {
17255 my $excess =
17256 length($str) - $side_comment_length + $leading_space_count -
17257 $rOpts_maximum_line_length;
17258 if ( $excess > 0 ) {
17259 $leading_string = "";
17260 $last_outdented_line_at =
17261 $file_writer_object->get_output_line_number();
17262
17263 unless ($outdented_line_count) {
17264 $first_outdented_line_at = $last_outdented_line_at;
17265 }
17266 $outdented_line_count++;
17267 }
17268 }
17269
17270 # Unpack any recombination data; it was packed by
17271 # sub send_lines_to_vertical_aligner. Contents:
17272 #
17273 # [0] type: 1=opening 2=closing 3=opening block brace
17274 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
17275 # if closing: spaces of padding to use
17276 # [2] sequence number of container
17277 # [3] valid flag: do not append if this flag is false
17278 #
17279 my ( $open_or_close, $tightness_flag, $seqno, $valid );
17280 if ($rvertical_tightness_flags) {
17281 ( $open_or_close, $tightness_flag, $seqno, $valid ) =
17282 @{$rvertical_tightness_flags};
17283 }
17284
17285 # handle any cached line ..
17286 # either append this line to it or write it out
17287 if ($cached_line_text) {
17288
17289 if ( !$cached_line_valid ) {
17290 $file_writer_object->write_code_line( $cached_line_text . "\n" );
17291 }
17292
17293 # handle cached line with opening container token
17294 elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
17295
17296 my $gap = $leading_space_count - length($cached_line_text);
17297
17298 # handle option of just one tight opening per line:
17299 if ( $cached_line_flag == 1 ) {
17300 if ( defined($open_or_close) && $open_or_close == 1 ) {
17301 $gap = -1;
17302 }
17303 }
17304
17305 if ( $gap >= 0 ) {
17306 $leading_string = $cached_line_text . ' ' x $gap;
17307 }
17308 else {
17309 $file_writer_object->write_code_line(
17310 $cached_line_text . "\n" );
17311 }
17312 }
17313
17314 # handle cached line to place before this closing container token
17315 else {
17316 my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
17317
17318 if ( length($test_line) <= $rOpts_maximum_line_length ) {
17319 $str = $test_line;
17320 $leading_string = "";
17321 }
17322 else {
17323 $file_writer_object->write_code_line(
17324 $cached_line_text . "\n" );
17325 }
17326 }
17327 }
17328 $cached_line_type = 0;
17329 $cached_line_text = "";
17330
17331 my $line = $leading_string . $str;
17332
17333 # write or cache this line
17334 if ( !$rvertical_tightness_flags || $side_comment_length > 0 ) {
17335 $file_writer_object->write_code_line( $line . "\n" );
17336 }
17337 else {
17338 $cached_line_text = $line;
17339 $cached_line_type = $open_or_close;
17340 $cached_line_flag = $tightness_flag;
17341 $cached_seqno = $seqno;
17342 $cached_line_valid = $valid;
17343 }
17344
17345 $last_group_level_written = $group_level;
17346 $last_side_comment_length = $side_comment_length;
17347 $extra_indent_ok = 0;
17348}
17349
17350{ # begin get_leading_string
17351
17352 my @leading_string_cache;
17353
17354 sub get_leading_string {
17355
17356 # define the leading whitespace string for this line..
17357 my $leading_whitespace_count = shift;
17358
17359 # Handle case of zero whitespace, which includes multi-line quotes
17360 # (which may have a finite level; this prevents tab problems)
17361 if ( $leading_whitespace_count <= 0 ) {
17362 return "";
17363 }
17364
17365 # look for previous result
17366 elsif ( $leading_string_cache[$leading_whitespace_count] ) {
17367 return $leading_string_cache[$leading_whitespace_count];
17368 }
17369
17370 # must compute a string for this number of spaces
17371 my $leading_string;
17372
17373 # Handle simple case of no tabs
17374 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
17375 || $rOpts_indent_columns <= 0 )
17376 {
17377 $leading_string = ( ' ' x $leading_whitespace_count );
17378 }
17379
17380 # Handle entab option
17381 elsif ($rOpts_entab_leading_whitespace) {
17382 my $space_count =
17383 $leading_whitespace_count % $rOpts_entab_leading_whitespace;
17384 my $tab_count =
17385 int(
17386 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
17387 $leading_string = "\t" x $tab_count . ' ' x $space_count;
17388 }
17389
17390 # Handle option of one tab per level
17391 else {
17392 $leading_string = ( "\t" x $group_level );
17393 my $space_count =
17394 $leading_whitespace_count - $group_level * $rOpts_indent_columns;
17395
17396 # shouldn't happen:
17397 if ( $space_count < 0 ) {
17398 warning(
17399"Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
17400 );
17401 $leading_string = ( ' ' x $leading_whitespace_count );
17402 }
17403 else {
17404 $leading_string .= ( ' ' x $space_count );
17405 }
17406 }
17407 $leading_string_cache[$leading_whitespace_count] = $leading_string;
17408 return $leading_string;
17409 }
17410} # end get_leading_string
17411
17412sub report_anything_unusual {
17413 my $self = shift;
17414 if ( $outdented_line_count > 0 ) {
17415 write_logfile_entry(
17416 "$outdented_line_count long lines were outdented:\n");
17417 write_logfile_entry(
17418 " First at output line $first_outdented_line_at\n");
17419
17420 if ( $outdented_line_count > 1 ) {
17421 write_logfile_entry(
17422 " Last at output line $last_outdented_line_at\n");
17423 }
17424 write_logfile_entry(
17425 " use -noll to prevent outdenting, -l=n to increase line length\n"
17426 );
17427 write_logfile_entry("\n");
17428 }
17429}
17430
17431#####################################################################
17432#
17433# the Perl::Tidy::FileWriter class writes the output file
17434#
17435#####################################################################
17436
17437package Perl::Tidy::FileWriter;
17438
17439# Maximum number of little messages; probably need not be changed.
17440use constant MAX_NAG_MESSAGES => 6;
17441
17442sub write_logfile_entry {
17443 my $self = shift;
17444 my $logger_object = $self->{_logger_object};
17445 if ($logger_object) {
17446 $logger_object->write_logfile_entry(@_);
17447 }
17448}
17449
17450sub new {
17451 my $class = shift;
17452 my ( $line_sink_object, $rOpts, $logger_object ) = @_;
17453
17454 bless {
17455 _line_sink_object => $line_sink_object,
17456 _logger_object => $logger_object,
17457 _rOpts => $rOpts,
17458 _output_line_number => 1,
17459 _consecutive_blank_lines => 0,
17460 _consecutive_nonblank_lines => 0,
17461 _first_line_length_error => 0,
17462 _max_line_length_error => 0,
17463 _last_line_length_error => 0,
17464 _first_line_length_error_at => 0,
17465 _max_line_length_error_at => 0,
17466 _last_line_length_error_at => 0,
17467 _line_length_error_count => 0,
17468 _max_output_line_length => 0,
17469 _max_output_line_length_at => 0,
17470 }, $class;
17471}
17472
17473sub tee_on {
17474 my $self = shift;
17475 $self->{_line_sink_object}->tee_on();
17476}
17477
17478sub tee_off {
17479 my $self = shift;
17480 $self->{_line_sink_object}->tee_off();
17481}
17482
17483sub get_output_line_number {
17484 my $self = shift;
17485 return $self->{_output_line_number};
17486}
17487
17488sub decrement_output_line_number {
17489 my $self = shift;
17490 $self->{_output_line_number}--;
17491}
17492
17493sub get_consecutive_nonblank_lines {
17494 my $self = shift;
17495 return $self->{_consecutive_nonblank_lines};
17496}
17497
17498sub reset_consecutive_blank_lines {
17499 my $self = shift;
17500 $self->{_consecutive_blank_lines} = 0;
17501}
17502
17503sub want_blank_line {
17504 my $self = shift;
17505 unless ( $self->{_consecutive_blank_lines} ) {
17506 $self->write_blank_code_line();
17507 }
17508}
17509
17510sub write_blank_code_line {
17511 my $self = shift;
17512 my $rOpts = $self->{_rOpts};
17513 return
17514 if ( $self->{_consecutive_blank_lines} >=
17515 $rOpts->{'maximum-consecutive-blank-lines'} );
17516 $self->{_consecutive_blank_lines}++;
17517 $self->{_consecutive_nonblank_lines} = 0;
17518 $self->write_line("\n");
17519}
17520
17521sub write_code_line {
17522 my $self = shift;
17523 my $a = shift;
17524
17525 if ( $a =~ /^\s*$/ ) {
17526 my $rOpts = $self->{_rOpts};
17527 return
17528 if ( $self->{_consecutive_blank_lines} >=
17529 $rOpts->{'maximum-consecutive-blank-lines'} );
17530 $self->{_consecutive_blank_lines}++;
17531 $self->{_consecutive_nonblank_lines} = 0;
17532 }
17533 else {
17534 $self->{_consecutive_blank_lines} = 0;
17535 $self->{_consecutive_nonblank_lines}++;
17536 }
17537 $self->write_line($a);
17538}
17539
17540sub write_line {
17541 my $self = shift;
17542 my $a = shift;
17543
17544 # TODO: go through and see if the test is necessary here
17545 if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
17546
17547 $self->{_line_sink_object}->write_line($a);
17548
17549 # This calculation of excess line length ignores any internal tabs
17550 my $rOpts = $self->{_rOpts};
17551 my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
17552 if ( $a =~ /^\t+/g ) {
17553 $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
17554 }
17555
17556 # Note that we just incremented output line number to future value
17557 # so we must subtract 1 for current line number
17558 if ( length($a) > 1 + $self->{_max_output_line_length} ) {
17559 $self->{_max_output_line_length} = length($a) - 1;
17560 $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
17561 }
17562
17563 if ( $exceed > 0 ) {
17564 my $output_line_number = $self->{_output_line_number};
17565 $self->{_last_line_length_error} = $exceed;
17566 $self->{_last_line_length_error_at} = $output_line_number - 1;
17567 if ( $self->{_line_length_error_count} == 0 ) {
17568 $self->{_first_line_length_error} = $exceed;
17569 $self->{_first_line_length_error_at} = $output_line_number - 1;
17570 }
17571
17572 if (
17573 $self->{_last_line_length_error} > $self->{_max_line_length_error} )
17574 {
17575 $self->{_max_line_length_error} = $exceed;
17576 $self->{_max_line_length_error_at} = $output_line_number - 1;
17577 }
17578
17579 if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
17580 $self->write_logfile_entry(
17581 "Line length exceeded by $exceed characters\n");
17582 }
17583 $self->{_line_length_error_count}++;
17584 }
17585
17586}
17587
17588sub report_line_length_errors {
17589 my $self = shift;
17590 my $rOpts = $self->{_rOpts};
17591 my $line_length_error_count = $self->{_line_length_error_count};
17592 if ( $line_length_error_count == 0 ) {
17593 $self->write_logfile_entry(
17594 "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
17595 my $max_output_line_length = $self->{_max_output_line_length};
17596 my $max_output_line_length_at = $self->{_max_output_line_length_at};
17597 $self->write_logfile_entry(
17598" Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
17599 );
17600
17601 }
17602 else {
17603
17604 my $word = ( $line_length_error_count > 1 ) ? "s" : "";
17605 $self->write_logfile_entry(
17606"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
17607 );
17608
17609 $word = ( $line_length_error_count > 1 ) ? "First" : "";
17610 my $first_line_length_error = $self->{_first_line_length_error};
17611 my $first_line_length_error_at = $self->{_first_line_length_error_at};
17612 $self->write_logfile_entry(
17613" $word at line $first_line_length_error_at by $first_line_length_error characters\n"
17614 );
17615
17616 if ( $line_length_error_count > 1 ) {
17617 my $max_line_length_error = $self->{_max_line_length_error};
17618 my $max_line_length_error_at = $self->{_max_line_length_error_at};
17619 my $last_line_length_error = $self->{_last_line_length_error};
17620 my $last_line_length_error_at = $self->{_last_line_length_error_at};
17621 $self->write_logfile_entry(
17622" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
17623 );
17624 $self->write_logfile_entry(
17625" Last at line $last_line_length_error_at by $last_line_length_error characters\n"
17626 );
17627 }
17628 }
17629}
17630
17631#####################################################################
17632#
17633# The Perl::Tidy::Debugger class shows line tokenization
17634#
17635#####################################################################
17636
17637package Perl::Tidy::Debugger;
17638
17639sub new {
17640
17641 my ( $class, $filename ) = @_;
17642
17643 bless {
17644 _debug_file => $filename,
17645 _debug_file_opened => 0,
17646 _fh => undef,
17647 }, $class;
17648}
17649
17650sub really_open_debug_file {
17651
17652 my $self = shift;
17653 my $debug_file = $self->{_debug_file};
17654 my $fh;
17655 unless ( $fh = IO::File->new("> $debug_file") ) {
17656 warn("can't open $debug_file: $!\n");
17657 }
17658 $self->{_debug_file_opened} = 1;
17659 $self->{_fh} = $fh;
17660 print $fh
17661 "Use -dump-token-types (-dtt) to get a list of token type codes\n";
17662}
17663
17664sub close_debug_file {
17665
17666 my $self = shift;
17667 my $fh = $self->{_fh};
17668 if ( $self->{_debug_file_opened} ) {
17669
17670 eval { $self->{_fh}->close() };
17671 }
17672}
17673
17674sub write_debug_entry {
17675
17676 # This is a debug dump routine which may be modified as necessary
17677 # to dump tokens on a line-by-line basis. The output will be written
17678 # to the .DEBUG file when the -D flag is entered.
17679 my $self = shift;
17680 my $line_of_tokens = shift;
17681
17682 my $input_line = $line_of_tokens->{_line_text};
17683 my $rtoken_type = $line_of_tokens->{_rtoken_type};
17684 my $rtokens = $line_of_tokens->{_rtokens};
17685 my $rlevels = $line_of_tokens->{_rlevels};
17686 my $rslevels = $line_of_tokens->{_rslevels};
17687 my $rblock_type = $line_of_tokens->{_rblock_type};
17688 my $input_line_number = $line_of_tokens->{_line_number};
17689 my $line_type = $line_of_tokens->{_line_type};
17690
17691 my ( $j, $num );
17692
17693 my $token_str = "$input_line_number: ";
17694 my $reconstructed_original = "$input_line_number: ";
17695 my $block_str = "$input_line_number: ";
17696
17697 #$token_str .= "$line_type: ";
17698 #$reconstructed_original .= "$line_type: ";
17699
17700 my $pattern = "";
17701 my @next_char = ( '"', '"' );
17702 my $i_next = 0;
17703 unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
17704 my $fh = $self->{_fh};
17705
17706 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
17707
17708 # testing patterns
17709 if ( $$rtoken_type[$j] eq 'k' ) {
17710 $pattern .= $$rtokens[$j];
17711 }
17712 else {
17713 $pattern .= $$rtoken_type[$j];
17714 }
17715 $reconstructed_original .= $$rtokens[$j];
17716 $block_str .= "($$rblock_type[$j])";
17717 $num = length( $$rtokens[$j] );
17718 my $type_str = $$rtoken_type[$j];
17719
17720 # be sure there are no blank tokens (shouldn't happen)
17721 # This can only happen if a programming error has been made
17722 # because all valid tokens are non-blank
17723 if ( $type_str eq ' ' ) {
17724 print $fh "BLANK TOKEN on the next line\n";
17725 $type_str = $next_char[$i_next];
17726 $i_next = 1 - $i_next;
17727 }
17728
17729 if ( length($type_str) == 1 ) {
17730 $type_str = $type_str x $num;
17731 }
17732 $token_str .= $type_str;
17733 }
17734
17735 # Write what you want here ...
17736 # print $fh "$input_line\n";
17737 # print $fh "$pattern\n";
17738 print $fh "$reconstructed_original\n";
17739 print $fh "$token_str\n";
17740
17741 #print $fh "$block_str\n";
17742}
17743
17744#####################################################################
17745#
17746# The Perl::Tidy::LineBuffer class supplies a 'get_line()'
17747# method for returning the next line to be parsed, as well as a
17748# 'peek_ahead()' method
17749#
17750# The input parameter is an object with a 'get_line()' method
17751# which returns the next line to be parsed
17752#
17753#####################################################################
17754
17755package Perl::Tidy::LineBuffer;
17756
17757sub new {
17758
17759 my $class = shift;
17760 my $line_source_object = shift;
17761
17762 return bless {
17763 _line_source_object => $line_source_object,
17764 _rlookahead_buffer => [],
17765 }, $class;
17766}
17767
17768sub peek_ahead {
17769 my $self = shift;
17770 my $buffer_index = shift;
17771 my $line = undef;
17772 my $line_source_object = $self->{_line_source_object};
17773 my $rlookahead_buffer = $self->{_rlookahead_buffer};
17774 if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
17775 $line = $$rlookahead_buffer[$buffer_index];
17776 }
17777 else {
17778 $line = $line_source_object->get_line();
17779 push( @$rlookahead_buffer, $line );
17780 }
17781 return $line;
17782}
17783
17784sub get_line {
17785 my $self = shift;
17786 my $line = undef;
17787 my $line_source_object = $self->{_line_source_object};
17788 my $rlookahead_buffer = $self->{_rlookahead_buffer};
17789
17790 if ( scalar(@$rlookahead_buffer) ) {
17791 $line = shift @$rlookahead_buffer;
17792 }
17793 else {
17794 $line = $line_source_object->get_line();
17795 }
17796 return $line;
17797}
17798
17799########################################################################
17800#
17801# the Perl::Tidy::Tokenizer package is essentially a filter which
17802# reads lines of perl source code from a source object and provides
17803# corresponding tokenized lines through its get_line() method. Lines
17804# flow from the source_object to the caller like this:
17805#
17806# source_object --> LineBuffer_object --> Tokenizer --> calling routine
17807# get_line() get_line() get_line() line_of_tokens
17808#
17809# The source object can be any object with a get_line() method which
17810# supplies one line (a character string) perl call.
17811# The LineBuffer object is created by the Tokenizer.
17812# The Tokenizer returns a reference to a data structure 'line_of_tokens'
17813# containing one tokenized line for each call to its get_line() method.
17814#
17815# WARNING: This is not a real class yet. Only one tokenizer my be used.
17816#
17817########################################################################
17818
17819package Perl::Tidy::Tokenizer;
17820
17821BEGIN {
17822
17823 # Caution: these debug flags produce a lot of output
17824 # They should all be 0 except when debugging small scripts
17825
17826 use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0;
17827 use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0;
17828 use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0;
17829 use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0;
17830 use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
17831
17832 my $debug_warning = sub {
17833 print "TOKENIZER_DEBUGGING with key $_[0]\n";
17834 };
17835
17836 TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT');
17837 TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN');
17838 TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE');
17839 TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID');
17840 TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
17841
17842}
17843
17844use Carp;
17845use vars qw{
17846 $tokenizer_self
17847 $level_in_tokenizer
17848 $slevel_in_tokenizer
17849 $nesting_token_string
17850 $nesting_type_string
17851 $nesting_block_string
17852 $nesting_block_flag
17853 $nesting_list_string
17854 $nesting_list_flag
17855 $saw_negative_indentation
17856 $id_scan_state
17857 $last_nonblank_token
17858 $last_nonblank_type
17859 $last_nonblank_block_type
17860 $last_nonblank_container_type
17861 $last_nonblank_type_sequence
17862 $last_last_nonblank_token
17863 $last_last_nonblank_type
17864 $last_last_nonblank_block_type
17865 $last_last_nonblank_container_type
17866 $last_last_nonblank_type_sequence
17867 $last_nonblank_prototype
17868 $statement_type
17869 $identifier
17870 $in_quote
17871 $quote_type
17872 $quote_character
17873 $quote_pos
17874 $quote_depth
17875 $allowed_quote_modifiers
17876 $paren_depth
17877 @paren_type
17878 @paren_semicolon_count
17879 @paren_structural_type
17880 $brace_depth
17881 @brace_type
17882 @brace_structural_type
17883 @brace_statement_type
17884 @brace_context
17885 @brace_package
17886 $square_bracket_depth
17887 @square_bracket_type
17888 @square_bracket_structural_type
17889 @depth_array
17890 @starting_line_of_current_depth
17891 @current_depth
17892 @current_sequence_number
17893 @nesting_sequence_number
17894 @lower_case_labels_at
17895 $saw_v_string
17896 %is_constant
17897 %is_user_function
17898 %user_function_prototype
17899 %saw_function_definition
17900 $max_token_index
17901 $peeked_ahead
17902 $current_package
17903 $unexpected_error_count
17904 $input_line
17905 $input_line_number
17906 $rpretokens
17907 $rpretoken_map
17908 $rpretoken_type
17909 $want_paren
17910 $context
17911 @slevel_stack
17912 $ci_string_in_tokenizer
17913 $continuation_string_in_tokenizer
17914 $in_statement_continuation
17915 $started_looking_for_here_target_at
17916 $nearly_matched_here_target_at
17917
17918 %is_indirect_object_taker
17919 %is_block_operator
17920 %expecting_operator_token
17921 %expecting_operator_types
17922 %expecting_term_types
17923 %expecting_term_token
17924 %is_block_function
17925 %is_block_list_function
17926 %is_digraph
17927 %is_file_test_operator
17928 %is_trigraph
17929 %is_valid_token_type
17930 %is_keyword
17931 %is_code_block_token
17932 %really_want_term
17933 @opening_brace_names
17934 @closing_brace_names
17935 %is_keyword_taking_list
17936 %is_q_qq_qw_qx_qr_s_y_tr_m
17937};
17938
17939# possible values of operator_expected()
17940use constant TERM => -1;
17941use constant UNKNOWN => 0;
17942use constant OPERATOR => 1;
17943
17944# possible values of context
17945use constant SCALAR_CONTEXT => -1;
17946use constant UNKNOWN_CONTEXT => 0;
17947use constant LIST_CONTEXT => 1;
17948
17949# Maximum number of little messages; probably need not be changed.
17950use constant MAX_NAG_MESSAGES => 6;
17951
17952{
17953
17954 # methods to count instances
17955 my $_count = 0;
17956 sub get_count { $_count; }
17957 sub _increment_count { ++$_count }
17958 sub _decrement_count { --$_count }
17959}
17960
17961sub DESTROY {
17962 $_[0]->_decrement_count();
17963}
17964
17965sub new {
17966
17967 my $class = shift;
17968
17969 # Note: 'tabs' and 'indent_columns' are temporary and should be
17970 # removed asap
17971 my %defaults = (
17972 source_object => undef,
17973 debugger_object => undef,
17974 diagnostics_object => undef,
17975 logger_object => undef,
17976 starting_level => undef,
17977 indent_columns => 4,
17978 tabs => 0,
17979 look_for_hash_bang => 0,
17980 trim_qw => 1,
17981 look_for_autoloader => 1,
17982 look_for_selfloader => 1,
17983 );
17984 my %args = ( %defaults, @_ );
17985
17986 # we are given an object with a get_line() method to supply source lines
17987 my $source_object = $args{source_object};
17988
17989 # we create another object with a get_line() and peek_ahead() method
17990 my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
17991
17992 # Tokenizer state data is as follows:
17993 # _rhere_target_list reference to list of here-doc targets
17994 # _here_doc_target the target string for a here document
17995 # _here_quote_character the type of here-doc quoting (" ' ` or none)
17996 # to determine if interpolation is done
17997 # _quote_target character we seek if chasing a quote
17998 # _line_start_quote line where we started looking for a long quote
17999 # _in_here_doc flag indicating if we are in a here-doc
18000 # _in_pod flag set if we are in pod documentation
18001 # _in_error flag set if we saw severe error (binary in script)
18002 # _in_data flag set if we are in __DATA__ section
18003 # _in_end flag set if we are in __END__ section
18004 # _in_format flag set if we are in a format description
18005 # _in_quote flag telling if we are chasing a quote
18006 # _starting_level indentation level of first line
18007 # _input_tabstr string denoting one indentation level of input file
18008 # _know_input_tabstr flag indicating if we know _input_tabstr
18009 # _line_buffer_object object with get_line() method to supply source code
18010 # _diagnostics_object place to write debugging information
18011 $tokenizer_self = {
18012 _rhere_target_list => undef,
18013 _in_here_doc => 0,
18014 _here_doc_target => "",
18015 _here_quote_character => "",
18016 _in_data => 0,
18017 _in_end => 0,
18018 _in_format => 0,
18019 _in_error => 0,
18020 _in_pod => 0,
18021 _in_quote => 0,
18022 _quote_target => "",
18023 _line_start_quote => -1,
18024 _starting_level => $args{starting_level},
18025 _know_starting_level => defined( $args{starting_level} ),
18026 _tabs => $args{tabs},
18027 _indent_columns => $args{indent_columns},
18028 _look_for_hash_bang => $args{look_for_hash_bang},
18029 _trim_qw => $args{trim_qw},
18030 _input_tabstr => "",
18031 _know_input_tabstr => -1,
18032 _last_line_number => 0,
18033 _saw_perl_dash_P => 0,
18034 _saw_perl_dash_w => 0,
18035 _saw_use_strict => 0,
18036 _look_for_autoloader => $args{look_for_autoloader},
18037 _look_for_selfloader => $args{look_for_selfloader},
18038 _saw_autoloader => 0,
18039 _saw_selfloader => 0,
18040 _saw_hash_bang => 0,
18041 _saw_end => 0,
18042 _saw_data => 0,
18043 _saw_lc_filehandle => 0,
18044 _started_tokenizing => 0,
18045 _line_buffer_object => $line_buffer_object,
18046 _debugger_object => $args{debugger_object},
18047 _diagnostics_object => $args{diagnostics_object},
18048 _logger_object => $args{logger_object},
18049 };
18050
18051 prepare_for_a_new_file();
18052 find_starting_indentation_level();
18053
18054 bless $tokenizer_self, $class;
18055
18056 # This is not a full class yet, so die if an attempt is made to
18057 # create more than one object.
18058
18059 if ( _increment_count() > 1 ) {
18060 confess
18061"Attempt to create more than 1 object in $class, which is not a true class yet\n";
18062 }
18063
18064 return $tokenizer_self;
18065
18066}
18067
18068# interface to Perl::Tidy::Logger routines
18069sub warning {
18070 my $logger_object = $tokenizer_self->{_logger_object};
18071 if ($logger_object) {
18072 $logger_object->warning(@_);
18073 }
18074}
18075
18076sub complain {
18077 my $logger_object = $tokenizer_self->{_logger_object};
18078 if ($logger_object) {
18079 $logger_object->complain(@_);
18080 }
18081}
18082
18083sub write_logfile_entry {
18084 my $logger_object = $tokenizer_self->{_logger_object};
18085 if ($logger_object) {
18086 $logger_object->write_logfile_entry(@_);
18087 }
18088}
18089
18090sub interrupt_logfile {
18091 my $logger_object = $tokenizer_self->{_logger_object};
18092 if ($logger_object) {
18093 $logger_object->interrupt_logfile();
18094 }
18095}
18096
18097sub resume_logfile {
18098 my $logger_object = $tokenizer_self->{_logger_object};
18099 if ($logger_object) {
18100 $logger_object->resume_logfile();
18101 }
18102}
18103
18104sub increment_brace_error {
18105 my $logger_object = $tokenizer_self->{_logger_object};
18106 if ($logger_object) {
18107 $logger_object->increment_brace_error();
18108 }
18109}
18110
18111sub report_definite_bug {
18112 my $logger_object = $tokenizer_self->{_logger_object};
18113 if ($logger_object) {
18114 $logger_object->report_definite_bug();
18115 }
18116}
18117
18118sub brace_warning {
18119 my $logger_object = $tokenizer_self->{_logger_object};
18120 if ($logger_object) {
18121 $logger_object->brace_warning(@_);
18122 }
18123}
18124
18125sub get_saw_brace_error {
18126 my $logger_object = $tokenizer_self->{_logger_object};
18127 if ($logger_object) {
18128 $logger_object->get_saw_brace_error();
18129 }
18130 else {
18131 0;
18132 }
18133}
18134
18135# interface to Perl::Tidy::Diagnostics routines
18136sub write_diagnostics {
18137 if ( $tokenizer_self->{_diagnostics_object} ) {
18138 $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
18139 }
18140}
18141
18142sub report_tokenization_errors {
18143
18144 my $self = shift;
18145
18146 my $level = get_indentation_level();
18147 if ( $level != $tokenizer_self->{_starting_level} ) {
18148 warning("final indentation level: $level\n");
18149 }
18150
18151 check_final_nesting_depths();
18152
18153 if ( $tokenizer_self->{_look_for_hash_bang}
18154 && !$tokenizer_self->{_saw_hash_bang} )
18155 {
18156 warning(
18157 "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
18158 }
18159
18160 if ( $tokenizer_self->{_in_format} ) {
18161 warning("hit EOF while in format description\n");
18162 }
18163
18164 # this check may be removed after a year or so
18165 if ( $tokenizer_self->{_saw_lc_filehandle} ) {
18166
18167 warning( <<'EOM' );
18168------------------------------------------------------------------------
18169PLEASE NOTE: If you get this message, it is because perltidy noticed
18170possible ambiguous syntax at one or more places in your script, as
18171noted above. The problem is with statements accepting indirect objects,
18172such as print and printf statements of the form
18173
18174 print bareword ( $etc
18175
18176Perltidy needs your help in deciding if 'bareword' is a filehandle or a
18177function call. The problem is the space between 'bareword' and '('. If
18178'bareword' is a function call, you should remove the trailing space. If
18179'bareword' is a filehandle, you should avoid the opening paren or else
18180globally capitalize 'bareword' to be BAREWORD. So the above line
18181would be:
18182
18183 print bareword( $etc # function
18184or
18185 print bareword @list # filehandle
18186or
18187 print BAREWORD ( $etc # filehandle
18188
18189If you want to keep the line as it is, and are sure it is correct,
18190you can use -w=0 to prevent this message.
18191------------------------------------------------------------------------
18192EOM
18193
18194 }
18195
18196 if ( $tokenizer_self->{_in_pod} ) {
18197
18198 # Just write log entry if this is after __END__ or __DATA__
18199 # because this happens to often, and it is not likely to be
18200 # a parsing error.
18201 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
18202 write_logfile_entry(
18203"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
18204 );
18205 }
18206
18207 else {
18208 complain(
18209"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
18210 );
18211 }
18212
18213 }
18214
18215 if ( $tokenizer_self->{_in_here_doc} ) {
18216 my $here_doc_target = $tokenizer_self->{_here_doc_target};
18217 if ($here_doc_target) {
18218 warning(
18219"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
18220 );
18221 }
18222 else {
18223 warning(
18224"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
18225 );
18226 }
18227 if ($nearly_matched_here_target_at) {
18228 warning(
18229"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
18230 );
18231 }
18232 }
18233
18234 if ( $tokenizer_self->{_in_quote} ) {
18235 my $line_start_quote = $tokenizer_self->{_line_start_quote};
18236 my $quote_target = $tokenizer_self->{_quote_target};
18237 warning(
18238"hit EOF seeking end of quote/pattern starting at line $line_start_quote ending in $quote_target\n"
18239 );
18240 }
18241
18242 unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
18243 if ( $] < 5.006 ) {
18244 write_logfile_entry("Suggest including '-w parameter'\n");
18245 }
18246 else {
18247 write_logfile_entry("Suggest including 'use warnings;'\n");
18248 }
18249 }
18250
18251 if ( $tokenizer_self->{_saw_perl_dash_P} ) {
18252 write_logfile_entry("Use of -P parameter for defines is discouraged\n");
18253 }
18254
18255 unless ( $tokenizer_self->{_saw_use_strict} ) {
18256 write_logfile_entry("Suggest including 'use strict;'\n");
18257 }
18258
18259 # it is suggested that lables have at least one upper case character
18260 # for legibility and to avoid code breakage as new keywords are introduced
18261 if (@lower_case_labels_at) {
18262 my $num = @lower_case_labels_at;
18263 write_logfile_entry(
18264 "Suggest using upper case characters in label(s)\n");
18265 local $" = ')(';
18266 write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n");
18267 }
18268}
18269
18270sub report_v_string {
18271
18272 # warn if this version can't handle v-strings
18273 my $tok = shift;
18274 $saw_v_string = $input_line_number;
18275 if ( $] < 5.006 ) {
18276 warning(
18277"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
18278 );
18279 }
18280}
18281
18282sub get_input_line_number {
18283 return $tokenizer_self->{_last_line_number};
18284}
18285
18286# returns the next tokenized line
18287sub get_line {
18288
18289 my $self = shift;
18290
18291 my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
18292
18293 return undef unless ($input_line);
18294
18295 $tokenizer_self->{_last_line_number}++;
18296
18297 # Find and remove what characters terminate this line, including any
18298 # control r
18299 my $input_line_separator = "";
18300 if ( chomp($input_line) ) { $input_line_separator = $/ }
18301
18302 # TODO: what other characters should be included here?
18303 if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
18304 $input_line_separator = $2 . $input_line_separator;
18305 }
18306
18307 # for backwards compatability we keep the line text terminated with
18308 # a newline character
18309 $input_line .= "\n";
18310
18311 my $input_line_number = $tokenizer_self->{_last_line_number};
18312
18313 # create a data structure describing this line which will be
18314 # returned to the caller.
18315
18316 # _line_type codes are:
18317 # SYSTEM - system-specific code before hash-bang line
18318 # CODE - line of perl code (including comments)
18319 # POD_START - line starting pod, such as '=head'
18320 # POD - pod documentation text
18321 # POD_END - last line of pod section, '=cut'
18322 # HERE - text of here-document
18323 # HERE_END - last line of here-doc (target word)
18324 # FORMAT - format section
18325 # FORMAT_END - last line of format section, '.'
18326 # DATA_START - __DATA__ line
18327 # DATA - unidentified text following __DATA__
18328 # END_START - __END__ line
18329 # END - unidentified text following __END__
18330 # ERROR - we are in big trouble, probably not a perl script
18331
18332 # Other variables:
18333 # _curly_brace_depth - depth of curly braces at start of line
18334 # _square_bracket_depth - depth of square brackets at start of line
18335 # _paren_depth - depth of parens at start of line
18336 # _starting_in_quote - this line continues a multi-line quote
18337 # (so don't trim leading blanks!)
18338 # _ending_in_quote - this line ends in a multi-line quote
18339 # (so don't trim trailing blanks!)
18340 my $line_of_tokens = {
18341 _line_type => 'EOF',
18342 _line_text => $input_line,
18343 _line_number => $input_line_number,
18344 _rtoken_type => undef,
18345 _rtokens => undef,
18346 _rlevels => undef,
18347 _rslevels => undef,
18348 _rblock_type => undef,
18349 _rcontainer_type => undef,
18350 _rcontainer_environment => undef,
18351 _rtype_sequence => undef,
18352 _rnesting_tokens => undef,
18353 _rci_levels => undef,
18354 _rnesting_blocks => undef,
18355 _python_indentation_level => -1, ## 0,
18356 _starting_in_quote =>
18357 ( $tokenizer_self->{_in_quote} && ( $quote_type eq 'Q' ) ),
18358 _ending_in_quote => 0,
18359 _curly_brace_depth => $brace_depth,
18360 _square_bracket_depth => $square_bracket_depth,
18361 _paren_depth => $paren_depth,
18362 _quote_character => '',
18363 };
18364
18365 # must print line unchanged if we are in a here document
18366 if ( $tokenizer_self->{_in_here_doc} ) {
18367
18368 $line_of_tokens->{_line_type} = 'HERE';
18369 my $here_doc_target = $tokenizer_self->{_here_doc_target};
18370 my $here_quote_character = $tokenizer_self->{_here_quote_character};
18371 my $candidate_target = $input_line;
18372 chomp $candidate_target;
18373 if ( $candidate_target eq $here_doc_target ) {
18374 $nearly_matched_here_target_at = undef;
18375 $line_of_tokens->{_line_type} = 'HERE_END';
18376 write_logfile_entry("Exiting HERE document $here_doc_target\n");
18377
18378 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
18379 if (@$rhere_target_list) { # there can be multiple here targets
18380 ( $here_doc_target, $here_quote_character ) =
18381 @{ shift @$rhere_target_list };
18382 $tokenizer_self->{_here_doc_target} = $here_doc_target;
18383 $tokenizer_self->{_here_quote_character} =
18384 $here_quote_character;
18385 write_logfile_entry(
18386 "Entering HERE document $here_doc_target\n");
18387 $nearly_matched_here_target_at = undef;
18388 $started_looking_for_here_target_at = $input_line_number;
18389 }
18390 else {
18391 $tokenizer_self->{_in_here_doc} = 0;
18392 $tokenizer_self->{_here_doc_target} = "";
18393 $tokenizer_self->{_here_quote_character} = "";
18394 }
18395 }
18396
18397 # check for error of extra whitespace
18398 # note for PERL6: leading whitespace is allowed
18399 else {
18400 $candidate_target =~ s/\s*$//;
18401 $candidate_target =~ s/^\s*//;
18402 if ( $candidate_target eq $here_doc_target ) {
18403 $nearly_matched_here_target_at = $input_line_number;
18404 }
18405 }
18406 return $line_of_tokens;
18407 }
18408
18409 # must print line unchanged if we are in a format section
18410 elsif ( $tokenizer_self->{_in_format} ) {
18411
18412 if ( $input_line =~ /^\.[\s#]*$/ ) {
18413 write_logfile_entry("Exiting format section\n");
18414 $tokenizer_self->{_in_format} = 0;
18415 $line_of_tokens->{_line_type} = 'FORMAT_END';
18416 }
18417 else {
18418 $line_of_tokens->{_line_type} = 'FORMAT';
18419 }
18420 return $line_of_tokens;
18421 }
18422
18423 # must print line unchanged if we are in pod documentation
18424 elsif ( $tokenizer_self->{_in_pod} ) {
18425
18426 $line_of_tokens->{_line_type} = 'POD';
18427 if ( $input_line =~ /^=cut/ ) {
18428 $line_of_tokens->{_line_type} = 'POD_END';
18429 write_logfile_entry("Exiting POD section\n");
18430 $tokenizer_self->{_in_pod} = 0;
18431 }
18432 if ( $input_line =~ /^\#\!.*perl\b/ ) {
18433 warning("Hash-bang in pod can cause perl to fail! \n");
18434 }
18435
18436 return $line_of_tokens;
18437 }
18438
18439 # must print line unchanged if we have seen a severe error (i.e., we
18440 # are seeing illegal tokens and connot continue. Syntax errors do
18441 # not pass this route). Calling routine can decide what to do, but
18442 # the default can be to just pass all lines as if they were after __END__
18443 elsif ( $tokenizer_self->{_in_error} ) {
18444 $line_of_tokens->{_line_type} = 'ERROR';
18445 return $line_of_tokens;
18446 }
18447
18448 # print line unchanged if we are __DATA__ section
18449 elsif ( $tokenizer_self->{_in_data} ) {
18450
18451 # ...but look for POD
18452 # Note that the _in_data and _in_end flags remain set
18453 # so that we return to that state after seeing the
18454 # end of a pod section
18455 if ( $input_line =~ /^=(?!cut)/ ) {
18456 $line_of_tokens->{_line_type} = 'POD_START';
18457 write_logfile_entry("Entering POD section\n");
18458 $tokenizer_self->{_in_pod} = 1;
18459 return $line_of_tokens;
18460 }
18461 else {
18462 $line_of_tokens->{_line_type} = 'DATA';
18463 return $line_of_tokens;
18464 }
18465 }
18466
18467 # print line unchanged if we are in __END__ section
18468 elsif ( $tokenizer_self->{_in_end} ) {
18469
18470 # ...but look for POD
18471 # Note that the _in_data and _in_end flags remain set
18472 # so that we return to that state after seeing the
18473 # end of a pod section
18474 if ( $input_line =~ /^=(?!cut)/ ) {
18475 $line_of_tokens->{_line_type} = 'POD_START';
18476 write_logfile_entry("Entering POD section\n");
18477 $tokenizer_self->{_in_pod} = 1;
18478 return $line_of_tokens;
18479 }
18480 else {
18481 $line_of_tokens->{_line_type} = 'END';
18482 return $line_of_tokens;
18483 }
18484 }
18485
18486 # check for a hash-bang line if we haven't seen one
18487 if ( !$tokenizer_self->{_saw_hash_bang} ) {
18488 if ( $input_line =~ /^\#\!.*perl\b/ ) {
18489 $tokenizer_self->{_saw_hash_bang} = $input_line_number;
18490
18491 # check for -w and -P flags
18492 if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
18493 $tokenizer_self->{_saw_perl_dash_P} = 1;
18494 }
18495
18496 if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
18497 $tokenizer_self->{_saw_perl_dash_w} = 1;
18498 }
18499
18500 if ( ( $input_line_number > 1 )
18501 && ( !$tokenizer_self->{_look_for_hash_bang} ) )
18502 {
18503
18504 # this is helpful for VMS systems; we may have accidentally
18505 # tokenized some DCL commands
18506 if ( $tokenizer_self->{_started_tokenizing} ) {
18507 warning(
18508"There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
18509 );
18510 }
18511 else {
18512 complain("Useless hash-bang after line 1\n");
18513 }
18514 }
18515
18516 # Report the leading hash-bang as a system line
18517 # This will prevent -dac from deleting it
18518 else {
18519 $line_of_tokens->{_line_type} = 'SYSTEM';
18520 return $line_of_tokens;
18521 }
18522 }
18523 }
18524
18525 # wait for a hash-bang before parsing if the user invoked us with -x
18526 if ( $tokenizer_self->{_look_for_hash_bang}
18527 && !$tokenizer_self->{_saw_hash_bang} )
18528 {
18529 $line_of_tokens->{_line_type} = 'SYSTEM';
18530 return $line_of_tokens;
18531 }
18532
18533 # a first line of the form ': #' will be marked as SYSTEM
18534 # since lines of this form may be used by tcsh
18535 if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
18536 $line_of_tokens->{_line_type} = 'SYSTEM';
18537 return $line_of_tokens;
18538 }
18539
18540 # now we know that it is ok to tokenize the line...
18541 # the line tokenizer will modify any of these private variables:
18542 # _rhere_target_list
18543 # _in_data
18544 # _in_end
18545 # _in_format
18546 # _in_error
18547 # _in_pod
18548 # _in_quote
18549 my $ending_in_quote_last = $tokenizer_self->{_in_quote};
18550 tokenize_this_line($line_of_tokens);
18551
18552 # Now finish defining the return structure and return it
18553 $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
18554
18555 # handle severe error (binary data in script)
18556 if ( $tokenizer_self->{_in_error} ) {
18557 $tokenizer_self->{_in_quote} = 0; # to avoid any more messages
18558 warning("Giving up after error\n");
18559 $line_of_tokens->{_line_type} = 'ERROR';
18560 reset_indentation_level(0); # avoid error messages
18561 return $line_of_tokens;
18562 }
18563
18564 # handle start of pod documentation
18565 if ( $tokenizer_self->{_in_pod} ) {
18566
18567 # This gets tricky..above a __DATA__ or __END__ section, perl
18568 # accepts '=cut' as the start of pod section. But afterwards,
18569 # only pod utilities see it and they may ignore an =cut without
18570 # leading =head. In any case, this isn't good.
18571 if ( $input_line =~ /^=cut\b/ ) {
18572 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
18573 complain("=cut while not in pod ignored\n");
18574 $tokenizer_self->{_in_pod} = 0;
18575 $line_of_tokens->{_line_type} = 'POD_STOP';
18576 }
18577 else {
18578 $line_of_tokens->{_line_type} = 'POD_END';
18579 complain(
18580"=cut starts a pod section .. this can fool pod utilities.\n"
18581 );
18582 write_logfile_entry("Entering POD section\n");
18583 }
18584 }
18585
18586 else {
18587 $line_of_tokens->{_line_type} = 'POD_START';
18588 write_logfile_entry("Entering POD section\n");
18589 }
18590
18591 return $line_of_tokens;
18592 }
18593
18594 # update indentation levels for log messages
18595 if ( $input_line !~ /^\s*$/ ) {
18596 my $rlevels = $line_of_tokens->{_rlevels};
18597 my $structural_indentation_level = $$rlevels[0];
18598 my ( $python_indentation_level, $msg ) =
18599 find_indentation_level( $input_line, $structural_indentation_level );
18600 if ($msg) { write_logfile_entry("$msg") }
18601 if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
18602 $line_of_tokens->{_python_indentation_level} =
18603 $python_indentation_level;
18604 }
18605 }
18606
18607 # see if this line contains here doc targets
18608 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
18609 if (@$rhere_target_list) {
18610
18611 #my $here_doc_target = shift @$rhere_target_list;
18612 my ( $here_doc_target, $here_quote_character ) =
18613 @{ shift @$rhere_target_list };
18614 $tokenizer_self->{_in_here_doc} = 1;
18615 $tokenizer_self->{_here_doc_target} = $here_doc_target;
18616 $tokenizer_self->{_here_quote_character} = $here_quote_character;
18617 write_logfile_entry("Entering HERE document $here_doc_target\n");
18618 $started_looking_for_here_target_at = $input_line_number;
18619 }
18620
18621 # NOTE: __END__ and __DATA__ statements are written unformatted
18622 # because they can theoretically contain additional characters
18623 # which are not tokenized (and cannot be read with <DATA> either!).
18624 if ( $tokenizer_self->{_in_data} ) {
18625 $line_of_tokens->{_line_type} = 'DATA_START';
18626 write_logfile_entry("Starting __DATA__ section\n");
18627 $tokenizer_self->{_saw_data} = 1;
18628
18629 # keep parsing after __DATA__ if use SelfLoader was seen
18630 if ( $tokenizer_self->{_saw_selfloader} ) {
18631 $tokenizer_self->{_in_data} = 0;
18632 write_logfile_entry(
18633 "SelfLoader seen, continuing; -nlsl deactivates\n");
18634 }
18635
18636 return $line_of_tokens;
18637 }
18638
18639 elsif ( $tokenizer_self->{_in_end} ) {
18640 $line_of_tokens->{_line_type} = 'END_START';
18641 write_logfile_entry("Starting __END__ section\n");
18642 $tokenizer_self->{_saw_end} = 1;
18643
18644 # keep parsing after __END__ if use AutoLoader was seen
18645 if ( $tokenizer_self->{_saw_autoloader} ) {
18646 $tokenizer_self->{_in_end} = 0;
18647 write_logfile_entry(
18648 "AutoLoader seen, continuing; -nlal deactivates\n");
18649 }
18650 return $line_of_tokens;
18651 }
18652
18653 # now, finally, we know that this line is type 'CODE'
18654 $line_of_tokens->{_line_type} = 'CODE';
18655
18656 # remember if we have seen any real code
18657 if ( !$tokenizer_self->{_started_tokenizing}
18658 && $input_line !~ /^\s*$/
18659 && $input_line !~ /^\s*#/ )
18660 {
18661 $tokenizer_self->{_started_tokenizing} = 1;
18662 }
18663
18664 if ( $tokenizer_self->{_debugger_object} ) {
18665 $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
18666 }
18667
18668 # Note: if keyword 'format' occurs in this line code, it is still CODE
18669 # (keyword 'format' need not start a line)
18670 if ( $tokenizer_self->{_in_format} ) {
18671 write_logfile_entry("Entering format section\n");
18672 }
18673
18674 if ( $tokenizer_self->{_in_quote}
18675 and ( $tokenizer_self->{_line_start_quote} < 0 ) )
18676 {
18677
18678 if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
18679 $tokenizer_self->{_line_start_quote} = $input_line_number;
18680 $tokenizer_self->{_quote_target} = $quote_target;
18681 write_logfile_entry(
18682 "Start multi-line quote or pattern ending in $quote_target\n");
18683 }
18684 }
18685 elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
18686 and !$tokenizer_self->{_in_quote} )
18687 {
18688 $tokenizer_self->{_line_start_quote} = -1;
18689 write_logfile_entry("End of multi-line quote or pattern\n");
18690 }
18691
18692 # we are returning a line of CODE
18693 return $line_of_tokens;
18694}
18695
18696sub find_starting_indentation_level {
18697
18698 my $starting_level = 0;
18699 my $know_input_tabstr = -1; # flag for find_indentation_level
18700
18701 # use value if given as parameter
18702 if ( $tokenizer_self->{_know_starting_level} ) {
18703 $starting_level = $tokenizer_self->{_starting_level};
18704 }
18705
18706 # if we know there is a hash_bang line, the level must be zero
18707 elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
18708 $tokenizer_self->{_know_starting_level} = 1;
18709 }
18710
18711 # otherwise figure it out from the input file
18712 else {
18713 my $line;
18714 my $i = 0;
18715 my $structural_indentation_level = -1; # flag for find_indentation_level
18716
18717 my $msg = "";
18718 while ( $line =
18719 $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
18720 {
18721
18722 # if first line is #! then assume starting level is zero
18723 if ( $i == 1 && $line =~ /^\#\!/ ) {
18724 $starting_level = 0;
18725 last;
18726 }
18727 next if ( $line =~ /^\s*#/ ); # must not be comment
18728 next if ( $line =~ /^\s*$/ ); # must not be blank
18729 ( $starting_level, $msg ) =
18730 find_indentation_level( $line, $structural_indentation_level );
18731 if ($msg) { write_logfile_entry("$msg") }
18732 last;
18733 }
18734 $msg = "Line $i implies starting-indentation-level = $starting_level\n";
18735
18736 if ( $starting_level > 0 ) {
18737
18738 my $input_tabstr = $tokenizer_self->{_input_tabstr};
18739 if ( $input_tabstr eq "\t" ) {
18740 $msg .= "by guessing input tabbing uses 1 tab per level\n";
18741 }
18742 else {
18743 my $cols = length($input_tabstr);
18744 $msg .=
18745 "by guessing input tabbing uses $cols blanks per level\n";
18746 }
18747 }
18748 write_logfile_entry("$msg");
18749 }
18750 $tokenizer_self->{_starting_level} = $starting_level;
18751 reset_indentation_level($starting_level);
18752}
18753
18754# Find indentation level given a input line. At the same time, try to
18755# figure out the input tabbing scheme.
18756#
18757# There are two types of calls:
18758#
18759# Type 1: $structural_indentation_level < 0
18760# In this case we have to guess $input_tabstr to figure out the level.
18761#
18762# Type 2: $structural_indentation_level >= 0
18763# In this case the level of this line is known, and this routine can
18764# update the tabbing string, if still unknown, to make the level correct.
18765
18766sub find_indentation_level {
18767 my ( $line, $structural_indentation_level ) = @_;
18768 my $level = 0;
18769 my $msg = "";
18770
18771 my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
18772 my $input_tabstr = $tokenizer_self->{_input_tabstr};
18773
18774 # find leading whitespace
18775 my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
18776
18777 # make first guess at input tabbing scheme if necessary
18778 if ( $know_input_tabstr < 0 ) {
18779
18780 $know_input_tabstr = 0;
18781
18782 if ( $tokenizer_self->{_tabs} ) {
18783 $input_tabstr = "\t";
18784 if ( length($leading_whitespace) > 0 ) {
18785 if ( $leading_whitespace !~ /\t/ ) {
18786
18787 my $cols = $tokenizer_self->{_indent_columns};
18788
18789 if ( length($leading_whitespace) < $cols ) {
18790 $cols = length($leading_whitespace);
18791 }
18792 $input_tabstr = " " x $cols;
18793 }
18794 }
18795 }
18796 else {
18797 $input_tabstr = " " x $tokenizer_self->{_indent_columns};
18798
18799 if ( length($leading_whitespace) > 0 ) {
18800 if ( $leading_whitespace =~ /^\t/ ) {
18801 $input_tabstr = "\t";
18802 }
18803 }
18804 }
18805 $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
18806 $tokenizer_self->{_input_tabstr} = $input_tabstr;
18807 }
18808
18809 # determine the input tabbing scheme if possible
18810 if ( ( $know_input_tabstr == 0 )
18811 && ( length($leading_whitespace) > 0 )
18812 && ( $structural_indentation_level > 0 ) )
18813 {
18814 my $saved_input_tabstr = $input_tabstr;
18815
18816 # check for common case of one tab per indentation level
18817 if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
18818 if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
18819 $input_tabstr = "\t";
18820 $msg = "Guessing old indentation was tab character\n";
18821 }
18822 }
18823
18824 else {
18825
18826 # detab any tabs based on 8 blanks per tab
18827 my $entabbed = "";
18828 if ( $leading_whitespace =~ s/^\t+/ /g ) {
18829 $entabbed = "entabbed";
18830 }
18831
18832 # now compute tabbing from number of spaces
18833 my $columns =
18834 length($leading_whitespace) / $structural_indentation_level;
18835 if ( $columns == int $columns ) {
18836 $msg =
18837 "Guessing old indentation was $columns $entabbed spaces\n";
18838 }
18839 else {
18840 $columns = int $columns;
18841 $msg =
18842"old indentation is unclear, using $columns $entabbed spaces\n";
18843 }
18844 $input_tabstr = " " x $columns;
18845 }
18846 $know_input_tabstr = 1;
18847 $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
18848 $tokenizer_self->{_input_tabstr} = $input_tabstr;
18849
18850 # see if mistakes were made
18851 if ( ( $tokenizer_self->{_starting_level} > 0 )
18852 && !$tokenizer_self->{_know_starting_level} )
18853 {
18854
18855 if ( $input_tabstr ne $saved_input_tabstr ) {
18856 complain(
18857"I made a bad starting level guess; rerun with a value for -sil \n"
18858 );
18859 }
18860 }
18861 }
18862
18863 # use current guess at input tabbing to get input indentation level
18864 #
18865 # Patch to handle a common case of entabbed leading whitespace
18866 # If the leading whitespace equals 4 spaces and we also have
18867 # tabs, detab the input whitespace assuming 8 spaces per tab.
18868 if ( length($input_tabstr) == 4 ) {
18869 $leading_whitespace =~ s/^\t+/ /g;
18870 }
18871
18872 if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
18873 my $pos = 0;
18874
18875 while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
18876 {
18877 $pos += $len_tab;
18878 $level++;
18879 }
18880 }
18881 return ( $level, $msg );
18882}
18883
18884sub dump_token_types {
18885 my $class = shift;
18886 my $fh = shift;
18887
18888 # This should be the latest list of token types in use
18889 # adding NEW_TOKENS: add a comment here
18890 print $fh <<'END_OF_LIST';
18891
18892Here is a list of the token types currently used for lines of type 'CODE'.
18893For the following tokens, the "type" of a token is just the token itself.
18894
18895.. :: << >> ** && .. || -> => += -= .= %= &= |= ^= *= <>
18896( ) <= >= == =~ !~ != ++ -- /= x=
18897... **= <<= >>= &&= ||= <=>
18898, + - / * | % ! x ~ = \ ? : . < > ^ &
18899
18900The following additional token types are defined:
18901
18902 type meaning
18903 b blank (white space)
18904 { indent: opening structural curly brace or square bracket or paren
18905 (code block, anonymous hash reference, or anonymous array reference)
18906 } outdent: right structural curly brace or square bracket or paren
18907 [ left non-structural square bracket (enclosing an array index)
18908 ] right non-structural square bracket
18909 ( left non-structural paren (all but a list right of an =)
18910 ) right non-structural parena
18911 L left non-structural curly brace (enclosing a key)
18912 R right non-structural curly brace
18913 ; terminal semicolon
18914 f indicates a semicolon in a "for" statement
18915 h here_doc operator <<
18916 # a comment
18917 Q indicates a quote or pattern
18918 q indicates a qw quote block
18919 k a perl keyword
18920 C user-defined constant or constant function (with void prototype = ())
18921 U user-defined function taking parameters
18922 G user-defined function taking block parameter (like grep/map/eval)
18923 M (unused, but reserved for subroutine definition name)
18924 P (unused, but -html uses it to label pod text)
18925 t type indicater such as %,$,@,*,&,sub
18926 w bare word (perhaps a subroutine call)
18927 i identifier of some type (with leading %, $, @, *, &, sub, -> )
18928 n a number
18929 v a v-string
18930 F a file test operator (like -e)
18931 Y File handle
18932 Z identifier in indirect object slot: may be file handle, object
18933 J LABEL: code block label
18934 j LABEL after next, last, redo, goto
18935 p unary +
18936 m unary -
18937 pp pre-increment operator ++
18938 mm pre-decrement operator --
18939 A : used as attribute separator
18940
18941 Here are the '_line_type' codes used internally:
18942 SYSTEM - system-specific code before hash-bang line
18943 CODE - line of perl code (including comments)
18944 POD_START - line starting pod, such as '=head'
18945 POD - pod documentation text
18946 POD_END - last line of pod section, '=cut'
18947 HERE - text of here-document
18948 HERE_END - last line of here-doc (target word)
18949 FORMAT - format section
18950 FORMAT_END - last line of format section, '.'
18951 DATA_START - __DATA__ line
18952 DATA - unidentified text following __DATA__
18953 END_START - __END__ line
18954 END - unidentified text following __END__
18955 ERROR - we are in big trouble, probably not a perl script
18956END_OF_LIST
18957}
18958
18959# This is a currently unused debug routine
18960sub dump_functions {
18961
18962 my $fh = *STDOUT;
18963 my ( $pkg, $sub );
18964 foreach $pkg ( keys %is_user_function ) {
18965 print $fh "\nnon-constant subs in package $pkg\n";
18966
18967 foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
18968 my $msg = "";
18969 if ( $is_block_list_function{$pkg}{$sub} ) {
18970 $msg = 'block_list';
18971 }
18972
18973 if ( $is_block_function{$pkg}{$sub} ) {
18974 $msg = 'block';
18975 }
18976 print $fh "$sub $msg\n";
18977 }
18978 }
18979
18980 foreach $pkg ( keys %is_constant ) {
18981 print $fh "\nconstants and constant subs in package $pkg\n";
18982
18983 foreach $sub ( keys %{ $is_constant{$pkg} } ) {
18984 print $fh "$sub\n";
18985 }
18986 }
18987}
18988
18989sub prepare_for_a_new_file {
18990 $saw_negative_indentation = 0;
18991 $id_scan_state = '';
18992 $statement_type = ''; # '' or 'use' or 'sub..' or 'case..'
18993 $last_nonblank_token = ';'; # the only possible starting state which
18994 $last_nonblank_type = ';'; # will make a leading brace a code block
18995 $last_nonblank_block_type = '';
18996 $last_nonblank_container_type = '';
18997 $last_nonblank_type_sequence = '';
18998 $last_last_nonblank_token = ';';
18999 $last_last_nonblank_type = ';';
19000 $last_last_nonblank_block_type = '';
19001 $last_last_nonblank_container_type = '';
19002 $last_last_nonblank_type_sequence = '';
19003 $last_nonblank_prototype = "";
19004 $identifier = '';
19005 $in_quote = 0; # flag telling if we are chasing a quote, and what kind
19006 $quote_type = 'Q';
19007 $quote_character = ""; # character we seek if chasing a quote
19008 $quote_pos = 0; # next character index to check for case of alphanum char
19009 $quote_depth = 0;
19010 $allowed_quote_modifiers = "";
19011 $paren_depth = 0;
19012 $brace_depth = 0;
19013 $square_bracket_depth = 0;
19014 $current_package = "main";
19015 @current_depth[ 0 .. $#closing_brace_names ] =
19016 (0) x scalar @closing_brace_names;
19017 @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
19018 ( 0 .. $#closing_brace_names );
19019 @current_sequence_number = ();
19020
19021 $paren_type[$paren_depth] = '';
19022 $paren_semicolon_count[$paren_depth] = 0;
19023 $brace_type[$brace_depth] = ';'; # identify opening brace as code block
19024 $brace_structural_type[$brace_depth] = '';
19025 $brace_statement_type[$brace_depth] = "";
19026 $brace_context[$brace_depth] = UNKNOWN_CONTEXT;
19027 $paren_structural_type[$brace_depth] = '';
19028 $square_bracket_type[$square_bracket_depth] = '';
19029 $square_bracket_structural_type[$square_bracket_depth] = '';
19030 $brace_package[$paren_depth] = $current_package;
19031 %is_constant = (); # user-defined constants
19032 %is_user_function = (); # user-defined functions
19033 %user_function_prototype = (); # their prototypes
19034 %is_block_function = ();
19035 %is_block_list_function = ();
19036 %saw_function_definition = ();
19037 $unexpected_error_count = 0;
19038 $want_paren = "";
19039 $context = UNKNOWN_CONTEXT;
19040 @slevel_stack = ();
19041 $ci_string_in_tokenizer = "";
19042 $continuation_string_in_tokenizer = "0";
19043 $in_statement_continuation = 0;
19044 @lower_case_labels_at = ();
19045 $saw_v_string = 0; # for warning of v-strings on older perl
19046 $nesting_token_string = "";
19047 $nesting_type_string = "";
19048 $nesting_block_string = '1'; # initially in a block
19049 $nesting_block_flag = 1;
19050 $nesting_list_string = '0'; # initially not in a list
19051 $nesting_list_flag = 0; # initially not in a list
19052 $nearly_matched_here_target_at = undef;
19053}
19054
19055sub get_quote_target {
19056 return matching_end_token($quote_character);
19057}
19058
19059sub get_indentation_level {
19060 return $level_in_tokenizer;
19061}
19062
19063sub reset_indentation_level {
19064 $level_in_tokenizer = $_[0];
19065 $slevel_in_tokenizer = $_[0];
19066 push @slevel_stack, $slevel_in_tokenizer;
19067}
19068
19069{ # begin tokenize_this_line
19070
19071 use constant BRACE => 0;
19072 use constant SQUARE_BRACKET => 1;
19073 use constant PAREN => 2;
19074 use constant QUESTION_COLON => 3;
19075
19076 my (
19077 $block_type, $container_type, $expecting,
19078 $here_doc_target, $here_quote_character, $i,
19079 $i_tok, $last_nonblank_i, $next_tok,
19080 $next_type, $prototype, $rtoken_map,
19081 $rtoken_type, $rtokens, $tok,
19082 $type, $type_sequence,
19083 );
19084
19085 my @output_token_list = (); # stack of output token indexes
19086 my @output_token_type = (); # token types
19087 my @output_block_type = (); # types of code block
19088 my @output_container_type = (); # paren types, such as if, elsif, ..
19089 my @output_type_sequence = (); # nesting sequential number
19090
19091 my @here_target_list = (); # list of here-doc target strings
19092
19093 # ------------------------------------------------------------
19094 # beginning of various scanner interfaces to simplify coding
19095 # ------------------------------------------------------------
19096 sub scan_bare_identifier {
19097 ( $i, $tok, $type, $prototype ) =
19098 scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
19099 $rtoken_map );
19100 }
19101
19102 sub scan_identifier {
19103 ( $i, $tok, $type, $id_scan_state, $identifier ) =
19104 scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens );
19105 }
19106
19107 sub scan_id {
19108 ( $i, $tok, $type, $id_scan_state ) =
19109 scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
19110 $id_scan_state );
19111 }
19112
19113 my $number;
19114
19115 sub scan_number {
19116 ( $i, $type, $number ) =
19117 scan_number_do( $input_line, $i, $rtoken_map, $type );
19118 }
19119
19120 # a sub to warn if token found where term expected
19121 sub error_if_expecting_TERM {
19122 if ( $expecting == TERM ) {
19123 if ( $really_want_term{$last_nonblank_type} ) {
19124 unexpected( $tok, "term", $i_tok, $last_nonblank_i );
19125 1;
19126 }
19127 }
19128 }
19129
19130 # a sub to warn if token found where operator expected
19131 sub error_if_expecting_OPERATOR {
19132 if ( $expecting == OPERATOR ) {
19133 my $thing = defined $_[0] ? $_[0] : $tok;
19134 unexpected( $thing, "operator", $i_tok, $last_nonblank_i );
19135 if ( $i_tok == 0 ) {
19136 interrupt_logfile();
19137 warning("Missing ';' above?\n");
19138 resume_logfile();
19139 }
19140 1;
19141 }
19142 }
19143
19144 # ------------------------------------------------------------
19145 # end scanner interfaces
19146 # ------------------------------------------------------------
19147
19148 my %is_for_foreach;
19149 @_ = qw(for foreach);
19150 @is_for_foreach{@_} = (1) x scalar(@_);
19151
19152 my %is_my_our;
19153 @_ = qw(my our);
19154 @is_my_our{@_} = (1) x scalar(@_);
19155
19156 # These keywords may introduce blocks after parenthesized expressions,
19157 # in the form:
19158 # keyword ( .... ) { BLOCK }
19159 # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
19160 my %is_blocktype_with_paren;
19161 @_ = qw(if elsif unless while until for foreach switch case given when);
19162 @is_blocktype_with_paren{@_} = (1) x scalar(@_);
19163
19164 # ------------------------------------------------------------
19165 # begin hash of code for handling most token types
19166 # ------------------------------------------------------------
19167 my $tokenization_code = {
19168
19169 # no special code for these types yet, but syntax checks
19170 # could be added
19171
19172## '!' => undef,
19173## '!=' => undef,
19174## '!~' => undef,
19175## '%=' => undef,
19176## '&&=' => undef,
19177## '&=' => undef,
19178## '+=' => undef,
19179## '-=' => undef,
19180## '..' => undef,
19181## '..' => undef,
19182## '...' => undef,
19183## '.=' => undef,
19184## '<<=' => undef,
19185## '<=' => undef,
19186## '<=>' => undef,
19187## '<>' => undef,
19188## '=' => undef,
19189## '==' => undef,
19190## '=~' => undef,
19191## '>=' => undef,
19192## '>>' => undef,
19193## '>>=' => undef,
19194## '\\' => undef,
19195## '^=' => undef,
19196## '|=' => undef,
19197## '||=' => undef,
19198## '~' => undef,
19199
19200 '>' => sub {
19201 error_if_expecting_TERM()
19202 if ( $expecting == TERM );
19203 },
19204 '|' => sub {
19205 error_if_expecting_TERM()
19206 if ( $expecting == TERM );
19207 },
19208 '$' => sub {
19209
19210 # start looking for a scalar
19211 error_if_expecting_OPERATOR("Scalar")
19212 if ( $expecting == OPERATOR );
19213 scan_identifier();
19214
19215 if ( $identifier eq '$^W' ) {
19216 $tokenizer_self->{_saw_perl_dash_w} = 1;
19217 }
19218
19219 # Check for indentifier in indirect object slot
19220 # (vorboard.pl, sort.t). Something like:
19221 # /^(print|printf|sort|exec|system)$/
19222 if (
19223 $is_indirect_object_taker{$last_nonblank_token}
19224
19225 || ( ( $last_nonblank_token eq '(' )
19226 && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
19227 || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object
19228 )
19229 {
19230 $type = 'Z';
19231 }
19232 },
19233 '(' => sub {
19234
19235 ++$paren_depth;
19236 $paren_semicolon_count[$paren_depth] = 0;
19237 if ($want_paren) {
19238 $container_type = $want_paren;
19239 $want_paren = "";
19240 }
19241 else {
19242 $container_type = $last_nonblank_token;
19243
19244 # We can check for a syntax error here of unexpected '(',
19245 # but this is going to get messy...
19246 if (
19247 $expecting == OPERATOR
19248
19249 # be sure this is not a method call of the form
19250 # &method(...), $method->(..), &{method}(...),
19251 # $ref[2](list) is ok & short for $ref[2]->(list)
19252 # NOTE: at present, braces in something like &{ xxx }
19253 # are not marked as a block, we might have a method call
19254 && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
19255
19256 )
19257 {
19258
19259 # ref: camel 3 p 703.
19260 if ( $last_last_nonblank_token eq 'do' ) {
19261 complain(
19262"do SUBROUTINE is deprecated; consider & or -> notation\n"
19263 );
19264 }
19265 else {
19266
19267 # if this is an empty list, (), then it is not an
19268 # error; for example, we might have a constant pi and
19269 # invoke it with pi() or just pi;
19270 my ( $next_nonblank_token, $i_next ) =
19271 find_next_nonblank_token( $i, $rtokens );
19272 if ( $next_nonblank_token ne ')' ) {
19273 my $hint;
19274 error_if_expecting_OPERATOR('(');
19275
19276 if ( $last_nonblank_type eq 'C' ) {
19277 $hint =
19278 "$last_nonblank_token has a void prototype\n";
19279 }
19280 elsif ( $last_nonblank_type eq 'i' ) {
19281 if ( $i_tok > 0
19282 && $last_nonblank_token =~ /^\$/ )
19283 {
19284 $hint =
19285"Do you mean '$last_nonblank_token->(' ?\n";
19286 }
19287 }
19288 if ($hint) {
19289 interrupt_logfile();
19290 warning($hint);
19291 resume_logfile();
19292 }
19293 } ## end if ( $next_nonblank_token...
19294 } ## end else [ if ( $last_last_nonblank_token...
19295 } ## end if ( $expecting == OPERATOR...
19296 }
19297 $paren_type[$paren_depth] = $container_type;
19298 $type_sequence = increase_nesting_depth( PAREN, $i_tok );
19299
19300 # propagate types down through nested parens
19301 # for example: the second paren in 'if ((' would be structural
19302 # since the first is.
19303
19304 if ( $last_nonblank_token eq '(' ) {
19305 $type = $last_nonblank_type;
19306 }
19307
19308 # We exclude parens as structural after a ',' because it
19309 # causes subtle problems with continuation indentation for
19310 # something like this, where the first 'or' will not get
19311 # indented.
19312 #
19313 # assert(
19314 # __LINE__,
19315 # ( not defined $check )
19316 # or ref $check
19317 # or $check eq "new"
19318 # or $check eq "old",
19319 # );
19320 #
19321 # Likewise, we exclude parens where a statement can start
19322 # because of problems with continuation indentation, like
19323 # these:
19324 #
19325 # ($firstline =~ /^#\!.*perl/)
19326 # and (print $File::Find::name, "\n")
19327 # and (return 1);
19328 #
19329 # (ref($usage_fref) =~ /CODE/)
19330 # ? &$usage_fref
19331 # : (&blast_usage, &blast_params, &blast_general_params);
19332
19333 else {
19334 $type = '{';
19335 }
19336
19337 if ( $last_nonblank_type eq ')' ) {
19338 warning(
19339 "Syntax error? found token '$last_nonblank_type' then '('\n"
19340 );
19341 }
19342 $paren_structural_type[$paren_depth] = $type;
19343
19344 },
19345 ')' => sub {
19346 $type_sequence = decrease_nesting_depth( PAREN, $i_tok );
19347
19348 if ( $paren_structural_type[$paren_depth] eq '{' ) {
19349 $type = '}';
19350 }
19351
19352 $container_type = $paren_type[$paren_depth];
19353
19354 # /^(for|foreach)$/
19355 if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
19356 my $num_sc = $paren_semicolon_count[$paren_depth];
19357 if ( $num_sc > 0 && $num_sc != 2 ) {
19358 warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
19359 }
19360 }
19361
19362 if ( $paren_depth > 0 ) { $paren_depth-- }
19363 },
19364 ',' => sub {
19365 if ( $last_nonblank_type eq ',' ) {
19366 complain("Repeated ','s \n");
19367 }
19368## FIXME: need to move this elsewhere, perhaps check after a '('
19369## elsif ($last_nonblank_token eq '(') {
19370## warning("Leading ','s illegal in some versions of perl\n");
19371## }
19372 },
19373 ';' => sub {
19374 $context = UNKNOWN_CONTEXT;
19375 $statement_type = '';
19376
19377 # /^(for|foreach)$/
19378 if ( $is_for_foreach{ $paren_type[$paren_depth] } )
19379 { # mark ; in for loop
19380
19381 # Be careful: we do not want a semicolon such as the
19382 # following to be included:
19383 #
19384 # for (sort {strcoll($a,$b);} keys %investments) {
19385
19386 if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
19387 && $square_bracket_depth ==
19388 $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
19389 {
19390
19391 $type = 'f';
19392 $paren_semicolon_count[$paren_depth]++;
19393 }
19394 }
19395
19396 },
19397 '"' => sub {
19398 error_if_expecting_OPERATOR("String")
19399 if ( $expecting == OPERATOR );
19400 $in_quote = 1;
19401 $type = 'Q';
19402 $allowed_quote_modifiers = "";
19403 },
19404 "'" => sub {
19405 error_if_expecting_OPERATOR("String")
19406 if ( $expecting == OPERATOR );
19407 $in_quote = 1;
19408 $type = 'Q';
19409 $allowed_quote_modifiers = "";
19410 },
19411 '`' => sub {
19412 error_if_expecting_OPERATOR("String")
19413 if ( $expecting == OPERATOR );
19414 $in_quote = 1;
19415 $type = 'Q';
19416 $allowed_quote_modifiers = "";
19417 },
19418 '/' => sub {
19419 my $is_pattern;
19420
19421 if ( $expecting == UNKNOWN ) { # indeterminte, must guess..
19422 my $msg;
19423 ( $is_pattern, $msg ) =
19424 guess_if_pattern_or_division( $i, $rtokens, $rtoken_map );
19425
19426 if ($msg) {
19427 write_diagnostics("DIVIDE:$msg\n");
19428 write_logfile_entry($msg);
19429 }
19430 }
19431 else { $is_pattern = ( $expecting == TERM ) }
19432
19433 if ($is_pattern) {
19434 $in_quote = 1;
19435 $type = 'Q';
19436 $allowed_quote_modifiers = '[cgimosx]';
19437 }
19438 else { # not a pattern; check for a /= token
19439
19440 if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /=
19441 $i++;
19442 $tok = '/=';
19443 $type = $tok;
19444 }
19445
19446 #DEBUG - collecting info on what tokens follow a divide
19447 # for development of guessing algorithm
19448 #if ( numerator_expected( $i, $rtokens ) < 0 ) {
19449 # #write_diagnostics( "DIVIDE? $input_line\n" );
19450 #}
19451 }
19452 },
19453 '{' => sub {
19454
19455 # if we just saw a ')', we will label this block with
19456 # its type. We need to do this to allow sub
19457 # code_block_type to determine if this brace starts a
19458 # code block or anonymous hash. (The type of a paren
19459 # pair is the preceding token, such as 'if', 'else',
19460 # etc).
19461 $container_type = "";
19462
19463 # ATTRS: for a '{' following an attribute list, reset
19464 # things to look like we just saw the sub name
19465 if ( $statement_type =~ /^sub/ ) {
19466 $last_nonblank_token = $statement_type;
19467 $last_nonblank_type = 'i';
19468 $statement_type = "";
19469 }
19470
19471 # patch for SWITCH/CASE: hide these keywords from an immediately
19472 # following opening brace
19473 elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
19474 && $statement_type eq $last_nonblank_token )
19475 {
19476 $last_nonblank_token = ";";
19477 }
19478
19479 elsif ( $last_nonblank_token eq ')' ) {
19480 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
19481
19482 # defensive move in case of a nesting error (pbug.t)
19483 # in which this ')' had no previous '('
19484 # this nesting error will have been caught
19485 if ( !defined($last_nonblank_token) ) {
19486 $last_nonblank_token = 'if';
19487 }
19488
19489 # check for syntax error here;
19490 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
19491 my $list = join( ' ', sort keys %is_blocktype_with_paren );
19492 warning(
19493 "syntax error at ') {', didn't see one of: $list\n");
19494 }
19495 }
19496
19497 # patch for paren-less for/foreach glitch, part 2.
19498 # see note below under 'qw'
19499 elsif ($last_nonblank_token eq 'qw'
19500 && $is_for_foreach{$want_paren} )
19501 {
19502 $last_nonblank_token = $want_paren;
19503 if ( $last_last_nonblank_token eq $want_paren ) {
19504 warning(
19505"syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
19506 );
19507
19508 }
19509 $want_paren = "";
19510 }
19511
19512 # now identify which of the three possible types of
19513 # curly braces we have: hash index container, anonymous
19514 # hash reference, or code block.
19515
19516 # non-structural (hash index) curly brace pair
19517 # get marked 'L' and 'R'
19518 if ( is_non_structural_brace() ) {
19519 $type = 'L';
19520
19521 # patch for SWITCH/CASE:
19522 # allow paren-less identifier after 'when'
19523 # if the brace is preceded by a space
19524 if ( $statement_type eq 'when'
19525 && $last_nonblank_type eq 'i'
19526 && $last_last_nonblank_type eq 'k'
19527 && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
19528 {
19529 $type = '{';
19530 $block_type = $statement_type;
19531 }
19532 }
19533
19534 # code and anonymous hash have the same type, '{', but are
19535 # distinguished by 'block_type',
19536 # which will be blank for an anonymous hash
19537 else {
19538
19539 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type );
19540
19541 # patch to promote bareword type to function taking block
19542 if ( $block_type
19543 && $last_nonblank_type eq 'w'
19544 && $last_nonblank_i >= 0 )
19545 {
19546 if ( $output_token_type[$last_nonblank_i] eq 'w' ) {
19547 $output_token_type[$last_nonblank_i] = 'G';
19548 }
19549 }
19550
19551 # patch for SWITCH/CASE: if we find a stray opening block brace
19552 # where we might accept a 'case' or 'when' block, then take it
19553 if ( $statement_type eq 'case'
19554 || $statement_type eq 'when' )
19555 {
19556 if ( !$block_type || $block_type eq '}' ) {
19557 $block_type = $statement_type;
19558 }
19559 }
19560 }
19561 $brace_type[ ++$brace_depth ] = $block_type;
19562 $brace_package[$brace_depth] = $current_package;
19563 $type_sequence = increase_nesting_depth( BRACE, $i_tok );
19564 $brace_structural_type[$brace_depth] = $type;
19565 $brace_context[$brace_depth] = $context;
19566 $brace_statement_type[$brace_depth] = $statement_type;
19567 },
19568 '}' => sub {
19569 $block_type = $brace_type[$brace_depth];
19570 if ($block_type) { $statement_type = '' }
19571 if ( defined( $brace_package[$brace_depth] ) ) {
19572 $current_package = $brace_package[$brace_depth];
19573 }
19574
19575 # can happen on brace error (caught elsewhere)
19576 else {
19577 }
19578 $type_sequence = decrease_nesting_depth( BRACE, $i_tok );
19579
19580 if ( $brace_structural_type[$brace_depth] eq 'L' ) {
19581 $type = 'R';
19582 }
19583
19584 # propagate type information for 'do' and 'eval' blocks.
19585 # This is necessary to enable us to know if an operator
19586 # or term is expected next
19587 if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
19588 $tok = $brace_type[$brace_depth];
19589 }
19590
19591 $context = $brace_context[$brace_depth];
19592 $statement_type = $brace_statement_type[$brace_depth];
19593 if ( $brace_depth > 0 ) { $brace_depth--; }
19594 },
19595 '&' => sub { # maybe sub call? start looking
19596
19597 # We have to check for sub call unless we are sure we
19598 # are expecting an operator. This example from s2p
19599 # got mistaken as a q operator in an early version:
19600 # print BODY &q(<<'EOT');
19601 if ( $expecting != OPERATOR ) {
19602 scan_identifier();
19603 }
19604 else {
19605 }
19606 },
19607 '<' => sub { # angle operator or less than?
19608
19609 if ( $expecting != OPERATOR ) {
19610 ( $i, $type ) =
19611 find_angle_operator_termination( $input_line, $i, $rtoken_map,
19612 $expecting );
19613
19614 }
19615 else {
19616 }
19617 },
19618 '?' => sub { # ?: conditional or starting pattern?
19619
19620 my $is_pattern;
19621
19622 if ( $expecting == UNKNOWN ) {
19623
19624 my $msg;
19625 ( $is_pattern, $msg ) =
19626 guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map );
19627
19628 if ($msg) { write_logfile_entry($msg) }
19629 }
19630 else { $is_pattern = ( $expecting == TERM ) }
19631
19632 if ($is_pattern) {
19633 $in_quote = 1;
19634 $type = 'Q';
19635 $allowed_quote_modifiers = '[cgimosx]'; # TBD:check this
19636 }
19637 else {
19638
19639 $type_sequence =
19640 increase_nesting_depth( QUESTION_COLON, $i_tok );
19641 }
19642 },
19643 '*' => sub { # typeglob, or multiply?
19644
19645 if ( $expecting == TERM ) {
19646 scan_identifier();
19647 }
19648 else {
19649
19650 if ( $$rtokens[ $i + 1 ] eq '=' ) {
19651 $tok = '*=';
19652 $type = $tok;
19653 $i++;
19654 }
19655 elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
19656 $tok = '**';
19657 $type = $tok;
19658 $i++;
19659 if ( $$rtokens[ $i + 1 ] eq '=' ) {
19660 $tok = '**=';
19661 $type = $tok;
19662 $i++;
19663 }
19664 }
19665 }
19666 },
19667 '.' => sub { # what kind of . ?
19668
19669 if ( $expecting != OPERATOR ) {
19670 scan_number();
19671 if ( $type eq '.' ) {
19672 error_if_expecting_TERM()
19673 if ( $expecting == TERM );
19674 }
19675 }
19676 else {
19677 }
19678 },
19679 ':' => sub {
19680
19681 # if this is the first nonblank character, call it a label
19682 # since perl seems to just swallow it
19683 if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
19684 $type = 'J';
19685 }
19686
19687 # ATTRS: check for a ':' which introduces an attribute list
19688 # (this might eventually get its own token type)
19689 elsif ( $statement_type =~ /^sub/ ) {
19690 $type = 'A';
19691 }
19692
19693 # check for scalar attribute, such as
19694 # my $foo : shared = 1;
19695 elsif ($is_my_our{$statement_type}
19696 && $current_depth[QUESTION_COLON] == 0 )
19697 {
19698 $type = 'A';
19699 }
19700
19701 # otherwise, it should be part of a ?/: operator
19702 else {
19703 $type_sequence =
19704 decrease_nesting_depth( QUESTION_COLON, $i_tok );
19705 if ( $last_nonblank_token eq '?' ) {
19706 warning("Syntax error near ? :\n");
19707 }
19708 }
19709 },
19710 '+' => sub { # what kind of plus?
19711
19712 if ( $expecting == TERM ) {
19713 scan_number();
19714
19715 # unary plus is safest assumption if not a number
19716 if ( !defined($number) ) { $type = 'p'; }
19717 }
19718 elsif ( $expecting == OPERATOR ) {
19719 }
19720 else {
19721 if ( $next_type eq 'w' ) { $type = 'p' }
19722 }
19723 },
19724 '@' => sub {
19725
19726 error_if_expecting_OPERATOR("Array")
19727 if ( $expecting == OPERATOR );
19728 scan_identifier();
19729 },
19730 '%' => sub { # hash or modulo?
19731
19732 # first guess is hash if no following blank
19733 if ( $expecting == UNKNOWN ) {
19734 if ( $next_type ne 'b' ) { $expecting = TERM }
19735 }
19736 if ( $expecting == TERM ) {
19737 scan_identifier();
19738 }
19739 },
19740 '[' => sub {
19741 $square_bracket_type[ ++$square_bracket_depth ] =
19742 $last_nonblank_token;
19743 $type_sequence = increase_nesting_depth( SQUARE_BRACKET, $i_tok );
19744
19745 # It may seem odd, but structural square brackets have
19746 # type '{' and '}'. This simplifies the indentation logic.
19747 if ( !is_non_structural_brace() ) {
19748 $type = '{';
19749 }
19750 $square_bracket_structural_type[$square_bracket_depth] = $type;
19751 },
19752 ']' => sub {
19753 $type_sequence = decrease_nesting_depth( SQUARE_BRACKET, $i_tok );
19754
19755 if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
19756 {
19757 $type = '}';
19758 }
19759 if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
19760 },
19761 '-' => sub { # what kind of minus?
19762
19763 if ( ( $expecting != OPERATOR )
19764 && $is_file_test_operator{$next_tok} )
19765 {
19766 $i++;
19767 $tok .= $next_tok;
19768 $type = 'F';
19769 }
19770 elsif ( $expecting == TERM ) {
19771 scan_number();
19772
19773 # maybe part of bareword token? unary is safest
19774 if ( !defined($number) ) { $type = 'm'; }
19775
19776 }
19777 elsif ( $expecting == OPERATOR ) {
19778 }
19779 else {
19780
19781 if ( $next_type eq 'w' ) {
19782 $type = 'm';
19783 }
19784 }
19785 },
19786
19787 '^' => sub {
19788
19789 # check for special variables like ${^WARNING_BITS}
19790 if ( $expecting == TERM ) {
19791
19792 # FIXME: this should work but will not catch errors
19793 # because we also have to be sure that previous token is
19794 # a type character ($,@,%).
19795 if ( $last_nonblank_token eq '{'
19796 && ( $next_tok =~ /^[A-Za-z_]/ ) )
19797 {
19798
19799 if ( $next_tok eq 'W' ) {
19800 $tokenizer_self->{_saw_perl_dash_w} = 1;
19801 }
19802 $tok = $tok . $next_tok;
19803 $i = $i + 1;
19804 $type = 'w';
19805 }
19806
19807 else {
19808 unless ( error_if_expecting_TERM() ) {
19809
19810 # Something like this is valid but strange:
19811 # undef ^I;
19812 complain("The '^' seems unusual here\n");
19813 }
19814 }
19815 }
19816 },
19817
19818 '::' => sub { # probably a sub call
19819 scan_bare_identifier();
19820 },
19821 '<<' => sub { # maybe a here-doc?
19822 return
19823 unless ( $i < $max_token_index )
19824 ; # here-doc not possible if end of line
19825
19826 if ( $expecting != OPERATOR ) {
19827 my ($found_target);
19828 ( $found_target, $here_doc_target, $here_quote_character, $i ) =
19829 find_here_doc( $expecting, $i, $rtokens, $rtoken_map );
19830
19831 if ($found_target) {
19832 push @here_target_list,
19833 [ $here_doc_target, $here_quote_character ];
19834 $type = 'h';
19835 if ( length($here_doc_target) > 80 ) {
19836 my $truncated = substr( $here_doc_target, 0, 80 );
19837 complain("Long here-target: '$truncated' ...\n");
19838 }
19839 elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
19840 complain(
19841 "Unconventional here-target: '$here_doc_target'\n"
19842 );
19843 }
19844 }
19845 elsif ( $expecting == TERM ) {
19846
19847 # shouldn't happen..
19848 warning("Program bug; didn't find here doc target\n");
19849 report_definite_bug();
19850 }
19851 }
19852 else {
19853 }
19854 },
19855 '->' => sub {
19856
19857 # if -> points to a bare word, we must scan for an identifier,
19858 # otherwise something like ->y would look like the y operator
19859 scan_identifier();
19860 },
19861
19862 # type = 'pp' for pre-increment, '++' for post-increment
19863 '++' => sub {
19864 if ( $expecting == TERM ) { $type = 'pp' }
19865 elsif ( $expecting == UNKNOWN ) {
19866 my ( $next_nonblank_token, $i_next ) =
19867 find_next_nonblank_token( $i, $rtokens );
19868 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
19869 }
19870 },
19871
19872 '=>' => sub {
19873 if ( $last_nonblank_type eq $tok ) {
19874 complain("Repeated '=>'s \n");
19875 }
19876 },
19877
19878 # type = 'mm' for pre-decrement, '--' for post-decrement
19879 '--' => sub {
19880
19881 if ( $expecting == TERM ) { $type = 'mm' }
19882 elsif ( $expecting == UNKNOWN ) {
19883 my ( $next_nonblank_token, $i_next ) =
19884 find_next_nonblank_token( $i, $rtokens );
19885 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
19886 }
19887 },
19888
19889 '&&' => sub {
19890 error_if_expecting_TERM()
19891 if ( $expecting == TERM );
19892 },
19893
19894 '||' => sub {
19895 error_if_expecting_TERM()
19896 if ( $expecting == TERM );
19897 },
19898 };
19899
19900 # ------------------------------------------------------------
19901 # end hash of code for handling individual token types
19902 # ------------------------------------------------------------
19903
19904 my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
19905
19906 # These block types terminate statements and do not need a trailing
19907 # semicolon
19908 # patched for SWITCH/CASE:
19909 my %is_zero_continuation_block_type;
19910 @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY continue ;
19911 if elsif else unless while until for foreach switch case given when);
19912 @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
19913
19914 my %is_not_zero_continuation_block_type;
19915 @_ = qw(sort grep map do eval);
19916 @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
19917
19918 my %is_logical_container;
19919 @_ = qw(if elsif unless while and or not && ! || for foreach);
19920 @is_logical_container{@_} = (1) x scalar(@_);
19921
19922 my %is_binary_type;
19923 @_ = qw(|| &&);
19924 @is_binary_type{@_} = (1) x scalar(@_);
19925
19926 my %is_binary_keyword;
19927 @_ = qw(and or eq ne cmp);
19928 @is_binary_keyword{@_} = (1) x scalar(@_);
19929
19930 # 'L' is token for opening { at hash key
19931 my %is_opening_type;
19932 @_ = qw" L { ( [ ";
19933 @is_opening_type{@_} = (1) x scalar(@_);
19934
19935 # 'R' is token for closing } at hash key
19936 my %is_closing_type;
19937 @_ = qw" R } ) ] ";
19938 @is_closing_type{@_} = (1) x scalar(@_);
19939
19940 my %is_redo_last_next_goto;
19941 @_ = qw(redo last next goto);
19942 @is_redo_last_next_goto{@_} = (1) x scalar(@_);
19943
19944 my %is_use_require;
19945 @_ = qw(use require);
19946 @is_use_require{@_} = (1) x scalar(@_);
19947
19948 my %is_sub_package;
19949 @_ = qw(sub package);
19950 @is_sub_package{@_} = (1) x scalar(@_);
19951
19952 # This hash holds the hash key in $tokenizer_self for these keywords:
19953 my %is_format_END_DATA = (
19954 'format' => '_in_format',
19955 '__END__' => '_in_end',
19956 '__DATA__' => '_in_data',
19957 );
19958
19959 # ref: camel 3 p 147,
19960 # but perl may accept undocumented flags
19961 my %quote_modifiers = (
19962 's' => '[cegimosx]',
19963 'y' => '[cds]',
19964 'tr' => '[cds]',
19965 'm' => '[cgimosx]',
19966 'qr' => '[imosx]',
19967 'q' => "",
19968 'qq' => "",
19969 'qw' => "",
19970 'qx' => "",
19971 );
19972
19973 # table showing how many quoted things to look for after quote operator..
19974 # s, y, tr have 2 (pattern and replacement)
19975 # others have 1 (pattern only)
19976 my %quote_items = (
19977 's' => 2,
19978 'y' => 2,
19979 'tr' => 2,
19980 'm' => 1,
19981 'qr' => 1,
19982 'q' => 1,
19983 'qq' => 1,
19984 'qw' => 1,
19985 'qx' => 1,
19986 );
19987
19988 sub tokenize_this_line {
19989
19990 # This routine breaks a line of perl code into tokens which are of use in
19991 # indentation and reformatting. One of my goals has been to define tokens
19992 # such that a newline may be inserted between any pair of tokens without
19993 # changing or invalidating the program. This version comes close to this,
19994 # although there are necessarily a few exceptions which must be caught by
19995 # the formatter. Many of these involve the treatment of bare words.
19996 #
19997 # The tokens and their types are returned in arrays. See previous
19998 # routine for their names.
19999 #
20000 # See also the array "valid_token_types" in the BEGIN section for an
20001 # up-to-date list.
20002 #
20003 # To simplify things, token types are either a single character, or they
20004 # are identical to the tokens themselves.
20005 #
20006 # As a debugging aid, the -D flag creates a file containing a side-by-side
20007 # comparison of the input string and its tokenization for each line of a file.
20008 # This is an invaluable debugging aid.
20009 #
20010 # In addition to tokens, and some associated quantities, the tokenizer
20011 # also returns flags indication any special line types. These include
20012 # quotes, here_docs, formats.
20013 #
20014 # -----------------------------------------------------------------------
20015 #
20016 # How to add NEW_TOKENS:
20017 #
20018 # New token types will undoubtedly be needed in the future both to keep up
20019 # with changes in perl and to help adapt the tokenizer to other applications.
20020 #
20021 # Here are some notes on the minimal steps. I wrote these notes while
20022 # adding the 'v' token type for v-strings, which are things like version
20023 # numbers 5.6.0, and ip addresses, and will use that as an example. ( You
20024 # can use your editor to search for the string "NEW_TOKENS" to find the
20025 # appropriate sections to change):
20026 #
20027 # *. Try to talk somebody else into doing it! If not, ..
20028 #
20029 # *. Make a backup of your current version in case things don't work out!
20030 #
20031 # *. Think of a new, unused character for the token type, and add to
20032 # the array @valid_token_types in the BEGIN section of this package.
20033 # For example, I used 'v' for v-strings.
20034 #
20035 # *. Implement coding to recognize the $type of the token in this routine.
20036 # This is the hardest part, and is best done by immitating or modifying
20037 # some of the existing coding. For example, to recognize v-strings, I
20038 # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
20039 # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
20040 #
20041 # *. Update sub operator_expected. This update is critically important but
20042 # the coding is trivial. Look at the comments in that routine for help.
20043 # For v-strings, which should behave like numbers, I just added 'v' to the
20044 # regex used to handle numbers and strings (types 'n' and 'Q').
20045 #
20046 # *. Implement a 'bond strength' rule in sub set_bond_strengths in
20047 # Perl::Tidy::Formatter for breaking lines around this token type. You can
20048 # skip this step and take the default at first, then adjust later to get
20049 # desired results. For adding type 'v', I looked at sub bond_strength and
20050 # saw that number type 'n' was using default strengths, so I didn't do
20051 # anything. I may tune it up someday if I don't like the way line
20052 # breaks with v-strings look.
20053 #
20054 # *. Implement a 'whitespace' rule in sub set_white_space_flag in
20055 # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
20056 # and saw that type 'n' used spaces on both sides, so I just added 'v'
20057 # to the array @spaces_both_sides.
20058 #
20059 # *. Update HtmlWriter package so that users can colorize the token as
20060 # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
20061 # that package. For v-strings, I initially chose to use a default color
20062 # equal to the default for numbers, but it might be nice to change that
20063 # eventually.
20064 #
20065 # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
20066 #
20067 # *. Run lots and lots of debug tests. Start with special files designed
20068 # to test the new token type. Run with the -D flag to create a .DEBUG
20069 # file which shows the tokenization. When these work ok, test as many old
20070 # scripts as possible. Start with all of the '.t' files in the 'test'
20071 # directory of the distribution file. Compare .tdy output with previous
20072 # version and updated version to see the differences. Then include as
20073 # many more files as possible. My own technique has been to collect a huge
20074 # number of perl scripts (thousands!) into one directory and run perltidy
20075 # *, then run diff between the output of the previous version and the
20076 # current version.
20077 #
20078 # -----------------------------------------------------------------------
20079
20080 my $line_of_tokens = shift;
20081 my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
20082
20083 # patch while coding change is underway
20084 # make callers private data to allow access
20085 # $tokenizer_self = $caller_tokenizer_self;
20086
20087 # extract line number for use in error messages
20088 $input_line_number = $line_of_tokens->{_line_number};
20089
20090 # check for pod documentation
20091 if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
20092
20093 # must not be in multi-line quote
20094 # and must not be in an eqn
20095 if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
20096 {
20097 $tokenizer_self->{_in_pod} = 1;
20098 return;
20099 }
20100 }
20101
20102 $input_line = $untrimmed_input_line;
20103
20104 chomp $input_line;
20105
20106 # trim start of this line unless we are continuing a quoted line
20107 # do not trim end because we might end in a quote (test: deken4.pl)
20108 # Perl::Tidy::Formatter will delete needless trailing blanks
20109 unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
20110 $input_line =~ s/^\s*//; # trim left end
20111 }
20112
20113 # re-initialize for the main loop
20114 @output_token_list = (); # stack of output token indexes
20115 @output_token_type = (); # token types
20116 @output_block_type = (); # types of code block
20117 @output_container_type = (); # paren types, such as if, elsif, ..
20118 @output_type_sequence = (); # nesting sequential number
20119
20120 $tok = $last_nonblank_token;
20121 $type = $last_nonblank_type;
20122 $prototype = $last_nonblank_prototype;
20123 $last_nonblank_i = -1;
20124 $block_type = $last_nonblank_block_type;
20125 $container_type = $last_nonblank_container_type;
20126 $type_sequence = $last_nonblank_type_sequence;
20127 @here_target_list = (); # list of here-doc target strings
20128
20129 $peeked_ahead = 0;
20130
20131 # tokenization is done in two stages..
20132 # stage 1 is a very simple pre-tokenization
20133 my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
20134
20135 # a little optimization for a full-line comment
20136 if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
20137 $max_tokens_wanted = 1 # no use tokenizing a comment
20138 }
20139
20140 # start by breaking the line into pre-tokens
20141 ( $rpretokens, $rpretoken_map, $rpretoken_type ) =
20142 pre_tokenize( $input_line, $max_tokens_wanted );
20143
20144 $max_token_index = scalar(@$rpretokens) - 1;
20145 push( @$rpretokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic
20146 push( @$rpretoken_map, 0, 0, 0 ); # shouldn't be referenced
20147 push( @$rpretoken_type, 'b', 'b', 'b' );
20148
20149 # temporary copies while coding change is underway
20150 ( $rtokens, $rtoken_map, $rtoken_type ) =
20151 ( $rpretokens, $rpretoken_map, $rpretoken_type );
20152
20153 # initialize for main loop
20154 for $i ( 0 .. $max_token_index + 3 ) {
20155 $output_token_type[$i] = "";
20156 $output_block_type[$i] = "";
20157 $output_container_type[$i] = "";
20158 $output_type_sequence[$i] = "";
20159 }
20160 $i = -1;
20161 $i_tok = -1;
20162
20163 # ------------------------------------------------------------
20164 # begin main tokenization loop
20165 # ------------------------------------------------------------
20166
20167 # we are looking at each pre-token of one line and combining them
20168 # into tokens
20169 while ( ++$i <= $max_token_index ) {
20170
20171 if ($in_quote) { # continue looking for end of a quote
20172 $type = $quote_type;
20173
20174 unless (@output_token_list) { # initialize if continuation line
20175 push( @output_token_list, $i );
20176 $output_token_type[$i] = $type;
20177
20178 }
20179 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
20180
20181 # scan for the end of the quote or pattern
20182 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
20183 do_quote( $i, $in_quote, $quote_character, $quote_pos,
20184 $quote_depth, $rtokens, $rtoken_map );
20185
20186 # all done if we didn't find it
20187 last if ($in_quote);
20188
20189 # re-initialize for next search
20190 $quote_character = '';
20191 $quote_pos = 0;
20192 $quote_type = 'Q';
20193 last if ( ++$i > $max_token_index );
20194
20195 # look for any modifiers
20196 if ($allowed_quote_modifiers) {
20197
20198 # check for exact quote modifiers
20199 if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
20200 my $str = $$rtokens[$i];
20201 while ( $str =~ /\G$allowed_quote_modifiers/gc ) { }
20202
20203 if ( defined( pos($str) ) ) {
20204
20205 # matched
20206 if ( pos($str) == length($str) ) {
20207 last if ( ++$i > $max_token_index );
20208 }
20209
20210 # Looks like a joined quote modifier
20211 # and keyword, maybe something like
20212 # s/xxx/yyy/gefor @k=...
20213 # Example is "galgen.pl". Would have to split
20214 # the word and insert a new token in the
20215 # pre-token list. This is so rare that I haven't
20216 # done it. Will just issue a warning citation.
20217
20218 # This error might also be triggered if my quote
20219 # modifier characters are incomplete
20220 else {
20221 warning(<<EOM);
20222
20223Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
20224Please put a space between quote modifiers and trailing keywords.
20225EOM
20226
20227 # print "token $$rtokens[$i]\n";
20228 # my $num = length($str) - pos($str);
20229 # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
20230 # print "continuing with new token $$rtokens[$i]\n";
20231
20232 # skipping past this token does least damage
20233 last if ( ++$i > $max_token_index );
20234 }
20235 }
20236 else {
20237
20238 # example file: rokicki4.pl
20239 # This error might also be triggered if my quote
20240 # modifier characters are incomplete
20241 write_logfile_entry(
20242"Note: found word $str at quote modifier location\n"
20243 );
20244 }
20245 }
20246
20247 # re-initialize
20248 $allowed_quote_modifiers = "";
20249 }
20250 }
20251
20252 unless ( $tok =~ /^\s*$/ ) {
20253
20254 # try to catch some common errors
20255 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
20256
20257 if ( $last_nonblank_token eq 'eq' ) {
20258 complain("Should 'eq' be '==' here ?\n");
20259 }
20260 elsif ( $last_nonblank_token eq 'ne' ) {
20261 complain("Should 'ne' be '!=' here ?\n");
20262 }
20263 }
20264
20265 $last_last_nonblank_token = $last_nonblank_token;
20266 $last_last_nonblank_type = $last_nonblank_type;
20267 $last_last_nonblank_block_type = $last_nonblank_block_type;
20268 $last_last_nonblank_container_type =
20269 $last_nonblank_container_type;
20270 $last_last_nonblank_type_sequence =
20271 $last_nonblank_type_sequence;
20272 $last_nonblank_token = $tok;
20273 $last_nonblank_type = $type;
20274 $last_nonblank_prototype = $prototype;
20275 $last_nonblank_block_type = $block_type;
20276 $last_nonblank_container_type = $container_type;
20277 $last_nonblank_type_sequence = $type_sequence;
20278 $last_nonblank_i = $i_tok;
20279 }
20280
20281 # store previous token type
20282 if ( $i_tok >= 0 ) {
20283 $output_token_type[$i_tok] = $type;
20284 $output_block_type[$i_tok] = $block_type;
20285 $output_container_type[$i_tok] = $container_type;
20286 $output_type_sequence[$i_tok] = $type_sequence;
20287 }
20288 my $pre_tok = $$rtokens[$i]; # get the next pre-token
20289 my $pre_type = $$rtoken_type[$i]; # and type
20290 $tok = $pre_tok;
20291 $type = $pre_type; # to be modified as necessary
20292 $block_type = ""; # blank for all tokens except code block braces
20293 $container_type = ""; # blank for all tokens except some parens
20294 $type_sequence = ""; # blank for all tokens except ?/:
20295 $prototype = ""; # blank for all tokens except user defined subs
20296 $i_tok = $i;
20297
20298 # this pre-token will start an output token
20299 push( @output_token_list, $i_tok );
20300
20301 # continue gathering identifier if necessary
20302 # but do not start on blanks and comments
20303 if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
20304
20305 if ( $id_scan_state =~ /^(sub|package)/ ) {
20306 scan_id();
20307 }
20308 else {
20309 scan_identifier();
20310 }
20311
20312 last if ($id_scan_state);
20313 next if ( ( $i > 0 ) || $type );
20314
20315 # didn't find any token; start over
20316 $type = $pre_type;
20317 $tok = $pre_tok;
20318 }
20319
20320 # handle whitespace tokens..
20321 next if ( $type eq 'b' );
20322 my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' ';
20323 my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
20324
20325 # Build larger tokens where possible, since we are not in a quote.
20326 #
20327 # First try to assemble digraphs. The following tokens are
20328 # excluded and handled specially:
20329 # '/=' is excluded because the / might start a pattern.
20330 # 'x=' is excluded since it might be $x=, with $ on previous line
20331 # '**' and *= might be typeglobs of punctuation variables
20332 # I have allowed tokens starting with <, such as <=,
20333 # because I don't think these could be valid angle operators.
20334 # test file: storrs4.pl
20335 my $test_tok = $tok . $$rtokens[ $i + 1 ];
20336
20337 if (
20338 $is_digraph{$test_tok}
20339 && ( $test_tok ne '/=' ) # might be pattern
20340 && ( $test_tok ne 'x=' ) # might be $x
20341 && ( $test_tok ne '**' ) # typeglob?
20342 && ( $test_tok ne '*=' ) # typeglob?
20343 )
20344 {
20345 $tok = $test_tok;
20346 $i++;
20347
20348 # Now try to assemble trigraphs. Note that all possible
20349 # perl trigraphs can be constructed by appending a character
20350 # to a digraph.
20351 $test_tok = $tok . $$rtokens[ $i + 1 ];
20352
20353 if ( $is_trigraph{$test_tok} ) {
20354 $tok = $test_tok;
20355 $i++;
20356 }
20357 }
20358 $type = $tok;
20359 $next_tok = $$rtokens[ $i + 1 ];
20360 $next_type = $$rtoken_type[ $i + 1 ];
20361
20362 TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
20363 local $" = ')(';
20364 my @debug_list = (
20365 $last_nonblank_token, $tok,
20366 $next_tok, $brace_depth,
20367 $brace_type[$brace_depth], $paren_depth,
20368 $paren_type[$paren_depth]
20369 );
20370 print "TOKENIZE:(@debug_list)\n";
20371 };
20372
20373 ###############################################################
20374 # We have the next token, $tok.
20375 # Now we have to examine this token and decide what it is
20376 # and define its $type
20377 #
20378 # section 1: bare words
20379 ###############################################################
20380
20381 if ( $pre_type eq 'w' ) {
20382 $expecting = operator_expected( $prev_type, $tok, $next_type );
20383 my ( $next_nonblank_token, $i_next ) =
20384 find_next_nonblank_token( $i, $rtokens );
20385
20386 # quote a word followed by => operator
20387 if ( $next_nonblank_token eq '=' ) {
20388
20389 if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
20390 if ( $is_constant{$current_package}{$tok} ) {
20391 $type = 'C';
20392 }
20393 elsif ( $is_user_function{$current_package}{$tok} ) {
20394 $type = 'U';
20395 $prototype =
20396 $user_function_prototype{$current_package}{$tok};
20397 }
20398 elsif ( $tok =~ /^v\d+$/ ) {
20399 $type = 'v';
20400 unless ($saw_v_string) { report_v_string($tok) }
20401 }
20402 else { $type = 'w' }
20403
20404 next;
20405 }
20406 }
20407
20408 # quote a bare word within braces..like xxx->{s}; note that we
20409 # must be sure this is not a structural brace, to avoid
20410 # mistaking {s} in the following for a quoted bare word:
20411 # for(@[){s}bla}BLA}
20412 if ( ( $last_nonblank_type eq 'L' )
20413 && ( $next_nonblank_token eq '}' ) )
20414 {
20415 $type = 'w';
20416 next;
20417 }
20418
20419 # a bare word immediately followed by :: is not a keyword;
20420 # use $tok_kw when testing for keywords to avoid a mistake
20421 my $tok_kw = $tok;
20422 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
20423 {
20424 $tok_kw .= '::';
20425 }
20426
20427 # handle operator x (now we know it isn't $x=)
20428 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
20429 if ( $tok eq 'x' ) {
20430
20431 if ( $$rtokens[ $i + 1 ] eq '=' ) { # x=
20432 $tok = 'x=';
20433 $type = $tok;
20434 $i++;
20435 }
20436 else {
20437 $type = 'x';
20438 }
20439 }
20440
20441 # FIXME: Patch: mark something like x4 as an integer for now
20442 # It gets fixed downstream. This is easier than
20443 # splitting the pretoken.
20444 else {
20445 $type = 'n';
20446 }
20447 }
20448
20449 elsif ( ( $tok eq 'strict' )
20450 and ( $last_nonblank_token eq 'use' ) )
20451 {
20452 $tokenizer_self->{_saw_use_strict} = 1;
20453 scan_bare_identifier();
20454 }
20455
20456 elsif ( ( $tok eq 'warnings' )
20457 and ( $last_nonblank_token eq 'use' ) )
20458 {
20459 $tokenizer_self->{_saw_perl_dash_w} = 1;
20460
20461 # scan as identifier, so that we pick up something like:
20462 # use warnings::register
20463 scan_bare_identifier();
20464 }
20465
20466 elsif (
20467 $tok eq 'AutoLoader'
20468 && $tokenizer_self->{_look_for_autoloader}
20469 && (
20470 $last_nonblank_token eq 'use'
20471
20472 # these regexes are from AutoSplit.pm, which we want
20473 # to mimic
20474 || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
20475 || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
20476 )
20477 )
20478 {
20479 write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
20480 $tokenizer_self->{_saw_autoloader} = 1;
20481 $tokenizer_self->{_look_for_autoloader} = 0;
20482 scan_bare_identifier();
20483 }
20484
20485 elsif (
20486 $tok eq 'SelfLoader'
20487 && $tokenizer_self->{_look_for_selfloader}
20488 && ( $last_nonblank_token eq 'use'
20489 || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
20490 || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
20491 )
20492 {
20493 write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
20494 $tokenizer_self->{_saw_selfloader} = 1;
20495 $tokenizer_self->{_look_for_selfloader} = 0;
20496 scan_bare_identifier();
20497 }
20498
20499 elsif ( ( $tok eq 'constant' )
20500 and ( $last_nonblank_token eq 'use' ) )
20501 {
20502 scan_bare_identifier();
20503 my ( $next_nonblank_token, $i_next ) =
20504 find_next_nonblank_token( $i, $rtokens );
20505
20506 if ($next_nonblank_token) {
20507
20508 if ( $is_keyword{$next_nonblank_token} ) {
20509 warning(
20510"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
20511 );
20512 }
20513
20514 # FIXME: could check for error in which next token is
20515 # not a word (number, punctuation, ..)
20516 else {
20517 $is_constant{$current_package}
20518 {$next_nonblank_token} = 1;
20519 }
20520 }
20521 }
20522
20523 # various quote operators
20524 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
20525 if ( $expecting == OPERATOR ) {
20526
20527 # patch for paren-less for/foreach glitch, part 1
20528 # perl will accept this construct as valid:
20529 #
20530 # foreach my $key qw\Uno Due Tres Quadro\ {
20531 # print "Set $key\n";
20532 # }
20533 unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
20534 {
20535 error_if_expecting_OPERATOR();
20536 }
20537 }
20538 $in_quote = $quote_items{$tok};
20539 $allowed_quote_modifiers = $quote_modifiers{$tok};
20540
20541 # All quote types are 'Q' except possibly qw quotes.
20542 # qw quotes are special in that they may generally be trimmed
20543 # of leading and trailing whitespace. So they are given a
20544 # separate type, 'q', unless requested otherwise.
20545 $type =
20546 ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
20547 ? 'q'
20548 : 'Q';
20549 $quote_type = $type;
20550 }
20551
20552 # check for a statement label
20553 elsif (
20554 ( $next_nonblank_token eq ':' )
20555 && ( $$rtokens[ $i_next + 1 ] ne ':' )
20556 && ( $i_next <= $max_token_index ) # colon on same line
20557 && label_ok()
20558 )
20559 {
20560 if ( $tok !~ /A-Z/ ) {
20561 push @lower_case_labels_at, $input_line_number;
20562 }
20563 $type = 'J';
20564 $tok .= ':';
20565 $i = $i_next;
20566 next;
20567 }
20568
20569 # 'sub' || 'package'
20570 elsif ( $is_sub_package{$tok_kw} ) {
20571 error_if_expecting_OPERATOR()
20572 if ( $expecting == OPERATOR );
20573 scan_id();
20574 }
20575
20576 # Note on token types for format, __DATA__, __END__:
20577 # It simplifies things to give these type ';', so that when we
20578 # start rescanning we will be expecting a token of type TERM.
20579 # We will switch to type 'k' before outputting the tokens.
20580 elsif ( $is_format_END_DATA{$tok_kw} ) {
20581 $type = ';'; # make tokenizer look for TERM next
20582 $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
20583 last;
20584 }
20585
20586 elsif ( $is_keyword{$tok_kw} ) {
20587 $type = 'k';
20588
20589 # Since for and foreach may not be followed immediately
20590 # by an opening paren, we have to remember which keyword
20591 # is associated with the next '('
20592 if ( $is_for_foreach{$tok} ) {
20593 if ( new_statement_ok() ) {
20594 $want_paren = $tok;
20595 }
20596 }
20597
20598 # recognize 'use' statements, which are special
20599 elsif ( $is_use_require{$tok} ) {
20600 $statement_type = $tok;
20601 error_if_expecting_OPERATOR()
20602 if ( $expecting == OPERATOR );
20603 }
20604
20605 # remember my and our to check for trailing ": shared"
20606 elsif ( $is_my_our{$tok} ) {
20607 $statement_type = $tok;
20608 }
20609
20610 # Check for misplaced 'elsif' and 'else', but allow isolated
20611 # else or elsif blocks to be formatted. This is indicated
20612 # by a last noblank token of ';'
20613 elsif ( $tok eq 'elsif' ) {
20614 if ( $last_nonblank_token ne ';'
20615 && $last_nonblank_block_type !~
20616 /^(if|elsif|unless)$/ )
20617 {
20618 warning(
20619"expecting '$tok' to follow one of 'if|elsif|unless'\n"
20620 );
20621 }
20622 }
20623 elsif ( $tok eq 'else' ) {
20624
20625 # patched for SWITCH/CASE
20626 if ( $last_nonblank_token ne ';'
20627 && $last_nonblank_block_type !~
20628 /^(if|elsif|unless|case|when)$/ )
20629 {
20630 warning(
20631"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
20632 );
20633 }
20634 }
20635 elsif ( $tok eq 'continue' ) {
20636 if ( $last_nonblank_token ne ';'
20637 && $last_nonblank_block_type !~
20638 /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
20639 {
20640
20641 # note: ';' '{' and '}' in list above
20642 # because continues can follow bare blocks;
20643 # ':' is labeled block
20644 warning("'$tok' should follow a block\n");
20645 }
20646 }
20647
20648 # patch for SWITCH/CASE if 'case' and 'when are
20649 # treated as keywords.
20650 elsif ( $tok eq 'when' || $tok eq 'case' ) {
20651 $statement_type = $tok; # next '{' is block
20652 }
20653 }
20654
20655 # check for inline label following
20656 # /^(redo|last|next|goto)$/
20657 elsif (( $last_nonblank_type eq 'k' )
20658 && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
20659 {
20660 $type = 'j';
20661 next;
20662 }
20663
20664 # something else --
20665 else {
20666
20667 scan_bare_identifier();
20668 if ( $type eq 'w' ) {
20669
20670 if ( $expecting == OPERATOR ) {
20671
20672 # don't complain about possible indirect object
20673 # notation.
20674 # For example:
20675 # package main;
20676 # sub new($) { ... }
20677 # $b = new A::; # calls A::new
20678 # $c = new A; # same thing but suspicious
20679 # This will call A::new but we have a 'new' in
20680 # main:: which looks like a constant.
20681 #
20682 if ( $last_nonblank_type eq 'C' ) {
20683 if ( $tok !~ /::$/ ) {
20684 complain(<<EOM);
20685Expecting operator after '$last_nonblank_token' but found bare word '$tok'
20686 Maybe indirectet object notation?
20687EOM
20688 }
20689 }
20690 else {
20691 error_if_expecting_OPERATOR("bareword");
20692 }
20693 }
20694
20695 # mark bare words immediately followed by a paren as
20696 # functions
20697 $next_tok = $$rtokens[ $i + 1 ];
20698 if ( $next_tok eq '(' ) {
20699 $type = 'U';
20700 }
20701
20702 # mark bare words following a file test operator as
20703 # something that will expect an operator next.
20704 # patch 072901: unless followed immediately by a paren,
20705 # in which case it must be a function call (pid.t)
20706 if ( $last_nonblank_type eq 'F' && $next_tok ne '(' ) {
20707 $type = 'C';
20708 }
20709
20710 # patch for SWITCH/CASE if 'case' and 'when are
20711 # not treated as keywords:
20712 if (
20713 (
20714 $tok eq 'case'
20715 && $brace_type[$brace_depth] eq 'switch'
20716 )
20717 || ( $tok eq 'when'
20718 && $brace_type[$brace_depth] eq 'given' )
20719 )
20720 {
20721 $statement_type = $tok; # next '{' is block
20722 $type = 'k'; # for keyword syntax coloring
20723 }
20724
20725 # patch for SWITCH/CASE if switch and given not keywords
20726 # Switch is not a perl 5 keyword, but we will gamble
20727 # and mark switch followed by paren as a keyword. This
20728 # is only necessary to get html syntax coloring nice,
20729 # and does not commit this as being a switch/case.
20730 if ( $next_nonblank_token eq '('
20731 && ( $tok eq 'switch' || $tok eq 'given' ) )
20732 {
20733 $type = 'k'; # for keyword syntax coloring
20734 }
20735 }
20736 }
20737 }
20738
20739 ###############################################################
20740 # section 2: strings of digits
20741 ###############################################################
20742 elsif ( $pre_type eq 'd' ) {
20743 $expecting = operator_expected( $prev_type, $tok, $next_type );
20744 error_if_expecting_OPERATOR("Number")
20745 if ( $expecting == OPERATOR );
20746 scan_number();
20747 if ( !defined($number) ) {
20748
20749 # shouldn't happen - we should always get a number
20750 warning("non-number beginning with digit--program bug\n");
20751 report_definite_bug();
20752 }
20753 }
20754
20755 ###############################################################
20756 # section 3: all other tokens
20757 ###############################################################
20758
20759 else {
20760 last if ( $tok eq '#' );
20761 my $code = $tokenization_code->{$tok};
20762 if ($code) {
20763 $expecting =
20764 operator_expected( $prev_type, $tok, $next_type );
20765 $code->();
20766 redo if $in_quote;
20767 }
20768 }
20769 }
20770
20771 # -----------------------------
20772 # end of main tokenization loop
20773 # -----------------------------
20774
20775 if ( $i_tok >= 0 ) {
20776 $output_token_type[$i_tok] = $type;
20777 $output_block_type[$i_tok] = $block_type;
20778 $output_container_type[$i_tok] = $container_type;
20779 $output_type_sequence[$i_tok] = $type_sequence;
20780 }
20781
20782 unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
20783 $last_last_nonblank_token = $last_nonblank_token;
20784 $last_last_nonblank_type = $last_nonblank_type;
20785 $last_last_nonblank_block_type = $last_nonblank_block_type;
20786 $last_last_nonblank_container_type = $last_nonblank_container_type;
20787 $last_last_nonblank_type_sequence = $last_nonblank_type_sequence;
20788 $last_nonblank_token = $tok;
20789 $last_nonblank_type = $type;
20790 $last_nonblank_block_type = $block_type;
20791 $last_nonblank_container_type = $container_type;
20792 $last_nonblank_type_sequence = $type_sequence;
20793 $last_nonblank_prototype = $prototype;
20794 }
20795
20796 # reset indentation level if necessary at a sub or package
20797 # in an attempt to recover from a nesting error
20798 if ( $level_in_tokenizer < 0 ) {
20799 if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
20800 reset_indentation_level(0);
20801 brace_warning("resetting level to 0 at $1 $2\n");
20802 }
20803 }
20804
20805 # all done tokenizing this line ...
20806 # now prepare the final list of tokens and types
20807
20808 my @token_type = (); # stack of output token types
20809 my @block_type = (); # stack of output code block types
20810 my @container_type = (); # stack of output code container types
20811 my @type_sequence = (); # stack of output type sequence numbers
20812 my @tokens = (); # output tokens
20813 my @levels = (); # structural brace levels of output tokens
20814 my @slevels = (); # secondary nesting levels of output tokens
20815 my @nesting_tokens = (); # string of tokens leading to this depth
20816 my @nesting_types = (); # string of token types leading to this depth
20817 my @nesting_blocks = (); # string of block types leading to this depth
20818 my @nesting_lists = (); # string of list types leading to this depth
20819 my @ci_string = (); # string needed to compute continuation indentation
20820 my @container_environment = (); # BLOCK or LIST
20821 my $container_environment = '';
20822 my $im = -1; # previous $i value
20823 my $num;
20824 my $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
20825
20826# =head1 Computing Token Indentation
20827#
20828# The final section of the tokenizer forms tokens and also computes
20829# parameters needed to find indentation. It is much easier to do it
20830# in the tokenizer than elsewhere. Here is a brief description of how
20831# indentation is computed. Perl::Tidy computes indentation as the sum
20832# of 2 terms:
20833#
20834# (1) structural indentation, such as if/else/elsif blocks
20835# (2) continuation indentation, such as long parameter call lists.
20836#
20837# These are occasionally called primary and secondary indentation.
20838#
20839# Structural indentation is introduced by tokens of type '{', although
20840# the actual tokens might be '{', '(', or '['. Structural indentation
20841# is of two types: BLOCK and non-BLOCK. Default structural indentation
20842# is 4 characters if the standard indentation scheme is used.
20843#
20844# Continuation indentation is introduced whenever a line at BLOCK level
20845# is broken before its termination. Default continuation indentation
20846# is 2 characters in the standard indentation scheme.
20847#
20848# Both types of indentation may be nested arbitrarily deep and
20849# interlaced. The distinction between the two is somewhat arbitrary.
20850#
20851# For each token, we will define two variables which would apply if
20852# the current statement were broken just before that token, so that
20853# that token started a new line:
20854#
20855# $level = the structural indentation level,
20856# $ci_level = the continuation indentation level
20857#
20858# The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
20859# assuming defaults. However, in some special cases it is customary
20860# to modify $ci_level from this strict value.
20861#
20862# The total structural indentation is easy to compute by adding and
20863# subtracting 1 from a saved value as types '{' and '}' are seen. The
20864# running value of this variable is $level_in_tokenizer.
20865#
20866# The total continuation is much more difficult to compute, and requires
20867# several variables. These veriables are:
20868#
20869# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
20870# each indentation level, if there are intervening open secondary
20871# structures just prior to that level.
20872# $continuation_string_in_tokenizer = a string of 1's and 0's indicating
20873# if the last token at that level is "continued", meaning that it
20874# is not the first token of an expression.
20875# $nesting_block_string = a string of 1's and 0's indicating, for each
20876# indentation level, if the level is of type BLOCK or not.
20877# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
20878# $nesting_list_string = a string of 1's and 0's indicating, for each
20879# indentation level, if it is is appropriate for list formatting.
20880# If so, continuation indentation is used to indent long list items.
20881# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
20882# @slevel_stack = a stack of total nesting depths at each
20883# structural indentation level, where "total nesting depth" means
20884# the nesting depth that would occur if every nesting token -- '{', '[',
20885# and '(' -- , regardless of context, is used to compute a nesting
20886# depth.
20887
20888 #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
20889 #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
20890
20891 my ( $ci_string_i, $level_i, $nesting_block_string_i,
20892 $nesting_list_string_i, $nesting_token_string_i,
20893 $nesting_type_string_i, );
20894
20895 foreach $i (@output_token_list) { # scan the list of pre-tokens indexes
20896
20897 # self-checking for valid token types
20898 my $type = $output_token_type[$i];
20899 my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken
20900 $level_i = $level_in_tokenizer;
20901
20902 # This can happen by running perltidy on non-scripts
20903 # although it could also be bug introduced by programming change.
20904 # Perl silently accepts a 032 (^Z) and takes it as the end
20905 if ( !$is_valid_token_type{$type} ) {
20906 my $val = ord($type);
20907 warning(
20908 "unexpected character decimal $val ($type) in script\n");
20909 $tokenizer_self->{_in_error} = 1;
20910 }
20911
20912 # ----------------------------------------------------------------
20913 # TOKEN TYPE PATCHES
20914 # output __END__, __DATA__, and format as type 'k' instead of ';'
20915 # to make html colors correct, etc.
20916 my $fix_type = $type;
20917 if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
20918
20919 # output anonymous 'sub' as keyword
20920 if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
20921
20922 # -----------------------------------------------------------------
20923
20924 $nesting_token_string_i = $nesting_token_string;
20925 $nesting_type_string_i = $nesting_type_string;
20926 $nesting_block_string_i = $nesting_block_string;
20927 $nesting_list_string_i = $nesting_list_string;
20928
20929 # set primary indentation levels based on structural braces
20930 # Note: these are set so that the leading braces have a HIGHER
20931 # level than their CONTENTS, which is convenient for indentation
20932 # Also, define continuation indentation for each token.
20933 if ( $type eq '{' || $type eq 'L' ) {
20934
20935 # use environment before updating
20936 $container_environment =
20937 $nesting_block_flag ? 'BLOCK'
20938 : $nesting_list_flag ? 'LIST'
20939 : "";
20940
20941 # if the difference between total nesting levels is not 1,
20942 # there are intervening non-structural nesting types between
20943 # this '{' and the previous unclosed '{'
20944 my $intervening_secondary_structure = 0;
20945 if (@slevel_stack) {
20946 $intervening_secondary_structure =
20947 $slevel_in_tokenizer - $slevel_stack[-1];
20948 }
20949
20950 # =head1 Continuation Indentation
20951 #
20952 # Having tried setting continuation indentation both in the formatter and
20953 # in the tokenizer, I can say that setting it in the tokenizer is much,
20954 # much easier. The formatter already has too much to do, and can't
20955 # make decisions on line breaks without knowing what 'ci' will be at
20956 # arbitrary locations.
20957 #
20958 # But a problem with setting the continuation indentation (ci) here
20959 # in the tokenizer is that we do not know where line breaks will actually
20960 # be. As a result, we don't know if we should propagate continuation
20961 # indentation to higher levels of structure.
20962 #
20963 # For nesting of only structural indentation, we never need to do this.
20964 # For example, in a long if statement, like this
20965 #
20966 # if ( !$output_block_type[$i]
20967 # && ($in_statement_continuation) )
20968 # { <--outdented
20969 # do_something();
20970 # }
20971 #
20972 # the second line has ci but we do normally give the lines within the BLOCK
20973 # any ci. This would be true if we had blocks nested arbitrarily deeply.
20974 #
20975 # But consider something like this, where we have created a break after
20976 # an opening paren on line 1, and the paren is not (currently) a
20977 # structural indentation token:
20978 #
20979 # my $file = $menubar->Menubutton(
20980 # qw/-text File -underline 0 -menuitems/ => [
20981 # [
20982 # Cascade => '~View',
20983 # -menuitems => [
20984 # ...
20985 #
20986 # The second line has ci, so it would seem reasonable to propagate it
20987 # down, giving the third line 1 ci + 1 indentation. This suggests the
20988 # following rule, which is currently used to propagating ci down: if there
20989 # are any non-structural opening parens (or brackets, or braces), before
20990 # an opening structural brace, then ci is propagated down, and otherwise
20991 # not. The variable $intervening_secondary_structure contains this
20992 # information for the current token, and the string
20993 # "$ci_string_in_tokenizer" is a stack of previous values of this
20994 # variable.
20995
20996 # save the current states
20997 push( @slevel_stack, 1 + $slevel_in_tokenizer );
20998 $level_in_tokenizer++;
20999
21000 if ( $output_block_type[$i] ) {
21001 $nesting_block_flag = 1;
21002 $nesting_block_string .= '1';
21003 }
21004 else {
21005 $nesting_block_flag = 0;
21006 $nesting_block_string .= '0';
21007 }
21008
21009 # we will use continuation indentation within containers
21010 # which are not blocks and not logical expressions
21011 my $bit = 0;
21012 if ( !$output_block_type[$i] ) {
21013
21014 # propagate flag down at nested open parens
21015 if ( $output_container_type[$i] eq '(' ) {
21016 $bit = 1 if $nesting_list_flag;
21017 }
21018
21019 # use list continuation if not a logical grouping
21020 # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
21021 else {
21022 $bit = 1
21023 unless
21024 $is_logical_container{ $output_container_type[$i] };
21025 }
21026 }
21027 $nesting_list_string .= $bit;
21028 $nesting_list_flag = $bit;
21029
21030 $ci_string_in_tokenizer .=
21031 ( $intervening_secondary_structure != 0 ) ? '1' : '0';
21032 $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
21033 $continuation_string_in_tokenizer .=
21034 ( $in_statement_continuation > 0 ) ? '1' : '0';
21035
21036 # Sometimes we want to give an opening brace continuation indentation,
21037 # and sometimes not. For code blocks, we don't do it, so that the leading
21038 # '{' gets outdented, like this:
21039 #
21040 # if ( !$output_block_type[$i]
21041 # && ($in_statement_continuation) )
21042 # { <--outdented
21043 #
21044 # For other types, we will give them continuation indentation. For example,
21045 # here is how a list looks with the opening paren indented:
21046 #
21047 # @LoL =
21048 # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
21049 # [ "homer", "marge", "bart" ], );
21050 #
21051 # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4)
21052
21053 my $total_ci = $ci_string_sum;
21054 if (
21055 !$output_block_type[$i] # patch: skip for BLOCK
21056 && ($in_statement_continuation)
21057 )
21058 {
21059 $total_ci += $in_statement_continuation
21060 unless ( $ci_string_in_tokenizer =~ /1$/ );
21061 }
21062
21063 $ci_string_i = $total_ci;
21064 $in_statement_continuation = 0;
21065 }
21066
21067 elsif ( $type eq '}' || $type eq 'R' ) {
21068
21069 # only a nesting error in the script would prevent popping here
21070 if ( @slevel_stack > 1 ) { pop(@slevel_stack); }
21071
21072 $level_i = --$level_in_tokenizer;
21073
21074 # restore previous level values
21075 if ( length($nesting_block_string) > 1 )
21076 { # true for valid script
21077 chop $nesting_block_string;
21078 $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
21079 chop $nesting_list_string;
21080 $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
21081
21082 chop $ci_string_in_tokenizer;
21083 $ci_string_sum =
21084 ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
21085
21086 $in_statement_continuation =
21087 chop $continuation_string_in_tokenizer;
21088
21089 # zero continuation flag at terminal BLOCK '}' which
21090 # ends a statement.
21091 if ( $output_block_type[$i] ) {
21092
21093 # ...These include non-anonymous subs
21094 # note: could be sub ::abc { or sub 'abc
21095 if ( $output_block_type[$i] =~ m/^sub\s*/gc ) {
21096
21097 # note: older versions of perl require the /gc modifier
21098 # here or else the \G does not work.
21099 if ( $output_block_type[$i] =~ /\G('|::|\w)/gc ) {
21100 $in_statement_continuation = 0;
21101 }
21102 }
21103
21104# ...and include all block types except user subs with
21105# block prototypes and these: (sort|grep|map|do|eval)
21106# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
21107 elsif (
21108 $is_zero_continuation_block_type{ $output_block_type
21109 [$i] } )
21110 {
21111 $in_statement_continuation = 0;
21112 }
21113
21114 # ..but these are not terminal types:
21115 # /^(sort|grep|map|do|eval)$/ )
21116 elsif (
21117 $is_not_zero_continuation_block_type{
21118 $output_block_type[$i] } )
21119 {
21120 }
21121
21122 # ..and a block introduced by a label
21123 # /^\w+\s*:$/gc ) {
21124 elsif ( $output_block_type[$i] =~ /:$/ ) {
21125 $in_statement_continuation = 0;
21126 }
21127
21128 # ..nor user function with block prototype
21129 else {
21130 }
21131 }
21132
21133 # If we are in a list, then
21134 # we must set continuatoin indentation at the closing
21135 # paren of something like this (paren after $check):
21136 # assert(
21137 # __LINE__,
21138 # ( not defined $check )
21139 # or ref $check
21140 # or $check eq "new"
21141 # or $check eq "old",
21142 # );
21143 elsif ( $tok eq ')' ) {
21144 $in_statement_continuation = 1
21145 if $output_container_type[$i] =~ /^[;,\{\}]$/;
21146 }
21147 }
21148
21149 # use environment after updating
21150 $container_environment =
21151 $nesting_block_flag ? 'BLOCK'
21152 : $nesting_list_flag ? 'LIST'
21153 : "";
21154 $ci_string_i = $ci_string_sum + $in_statement_continuation;
21155 $nesting_block_string_i = $nesting_block_string;
21156 $nesting_list_string_i = $nesting_list_string;
21157 }
21158
21159 # not a structural indentation type..
21160 else {
21161
21162 $container_environment =
21163 $nesting_block_flag ? 'BLOCK'
21164 : $nesting_list_flag ? 'LIST'
21165 : "";
21166
21167 # zero the continuation indentation at certain tokens so
21168 # that they will be at the same level as its container. For
21169 # commas, this simplifies the -lp indentation logic, which
21170 # counts commas. For ?: it makes them stand out.
21171 if ($nesting_list_flag) {
21172 if ( $type =~ /^[,\?\:]$/ ) {
21173 $in_statement_continuation = 0;
21174 }
21175 }
21176
21177 # be sure binary operators get continuation indentation
21178 if (
21179 $container_environment
21180 && ( $type eq 'k' && $is_binary_keyword{$tok}
21181 || $is_binary_type{$type} )
21182 )
21183 {
21184 $in_statement_continuation = 1;
21185 }
21186
21187 # continuation indentation is sum of any open ci from previous
21188 # levels plus the current level
21189 $ci_string_i = $ci_string_sum + $in_statement_continuation;
21190
21191 # update continuation flag ...
21192 # if this isn't a blank or comment..
21193 if ( $type ne 'b' && $type ne '#' ) {
21194
21195 # and we are in a BLOCK
21196 if ($nesting_block_flag) {
21197
21198 # the next token after a ';' and label starts a new stmt
21199 if ( $type eq ';' || $type eq 'J' ) {
21200 $in_statement_continuation = 0;
21201 }
21202
21203 # otherwise, we are continuing the current statement
21204 else {
21205 $in_statement_continuation = 1;
21206 }
21207 }
21208
21209 # if we are not in a BLOCK..
21210 else {
21211
21212 # do not use continuation indentation if not list
21213 # environment (could be within if/elsif clause)
21214 if ( !$nesting_list_flag ) {
21215 $in_statement_continuation = 0;
21216 }
21217
21218 # otherwise, the next token after a ',' starts a new term
21219 elsif ( $type eq ',' ) {
21220 $in_statement_continuation = 0;
21221 }
21222
21223 # otherwise, we are continuing the current term
21224 else {
21225 $in_statement_continuation = 1;
21226 }
21227 }
21228 }
21229 }
21230
21231 if ( $level_in_tokenizer < 0 ) {
21232 unless ($saw_negative_indentation) {
21233 $saw_negative_indentation = 1;
21234 warning("Starting negative indentation\n");
21235 }
21236 }
21237
21238 # set secondary nesting levels based on all continment token types
21239 # Note: these are set so that the nesting depth is the depth
21240 # of the PREVIOUS TOKEN, which is convenient for setting
21241 # the stength of token bonds
21242 my $slevel_i = $slevel_in_tokenizer;
21243
21244 # /^[L\{\(\[]$/
21245 if ( $is_opening_type{$type} ) {
21246 $slevel_in_tokenizer++;
21247 $nesting_token_string .= $tok;
21248 $nesting_type_string .= $type;
21249 }
21250
21251 # /^[R\}\)\]]$/
21252 elsif ( $is_closing_type{$type} ) {
21253 $slevel_in_tokenizer--;
21254 my $char = chop $nesting_token_string;
21255
21256 if ( $char ne $matching_start_token{$tok} ) {
21257 $nesting_token_string .= $char . $tok;
21258 $nesting_type_string .= $type;
21259 }
21260 else {
21261 chop $nesting_type_string;
21262 }
21263 }
21264
21265 push( @block_type, $output_block_type[$i] );
21266 push( @ci_string, $ci_string_i );
21267 push( @container_environment, $container_environment );
21268 push( @container_type, $output_container_type[$i] );
21269 push( @levels, $level_i );
21270 push( @nesting_tokens, $nesting_token_string_i );
21271 push( @nesting_types, $nesting_type_string_i );
21272 push( @slevels, $slevel_i );
21273 push( @token_type, $fix_type );
21274 push( @type_sequence, $output_type_sequence[$i] );
21275 push( @nesting_blocks, $nesting_block_string );
21276 push( @nesting_lists, $nesting_list_string );
21277
21278 # now form the previous token
21279 if ( $im >= 0 ) {
21280 $num =
21281 $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters
21282
21283 if ( $num > 0 ) {
21284 push( @tokens,
21285 substr( $input_line, $$rtoken_map[$im], $num ) );
21286 }
21287 }
21288 $im = $i;
21289 }
21290
21291 $num = length($input_line) - $$rtoken_map[$im]; # make the last token
21292 if ( $num > 0 ) {
21293 push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
21294 }
21295
21296 $tokenizer_self->{_in_quote} = $in_quote;
21297 $tokenizer_self->{_rhere_target_list} = \@here_target_list;
21298
21299 $line_of_tokens->{_rtoken_type} = \@token_type;
21300 $line_of_tokens->{_rtokens} = \@tokens;
21301 $line_of_tokens->{_rblock_type} = \@block_type;
21302 $line_of_tokens->{_rcontainer_type} = \@container_type;
21303 $line_of_tokens->{_rcontainer_environment} = \@container_environment;
21304 $line_of_tokens->{_rtype_sequence} = \@type_sequence;
21305 $line_of_tokens->{_rlevels} = \@levels;
21306 $line_of_tokens->{_rslevels} = \@slevels;
21307 $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens;
21308 $line_of_tokens->{_rci_levels} = \@ci_string;
21309 $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks;
21310
21311 return;
21312 }
21313} # end tokenize_this_line
21314
21315sub new_statement_ok {
21316
21317 # return true if the current token can start a new statement
21318
21319 return label_ok() # a label would be ok here
21320
21321 || $last_nonblank_type eq 'J'; # or we follow a label
21322
21323}
21324
21325sub label_ok {
21326
21327 # Decide if a bare word followed by a colon here is a label
21328
21329 # if it follows an opening or closing code block curly brace..
21330 if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
21331 && $last_nonblank_type eq $last_nonblank_token )
21332 {
21333
21334 # it is a label if and only if the curly encloses a code block
21335 return $brace_type[$brace_depth];
21336 }
21337
21338 # otherwise, it is a label if and only if it follows a ';'
21339 # (real or fake)
21340 else {
21341 return ( $last_nonblank_type eq ';' );
21342 }
21343}
21344
21345sub code_block_type {
21346
21347 # Decide if this is a block of code, and its type.
21348 # Must be called only when $type = $token = '{'
21349 # The problem is to distinguish between the start of a block of code
21350 # and the start of an anonymous hash reference
21351 # Returns "" if not code block, otherwise returns 'last_nonblank_token'
21352 # to indicate the type of code block. (For example, 'last_nonblank_token'
21353 # might be 'if' for an if block, 'else' for an else block, etc).
21354
21355 # handle case of multiple '{'s
21356
21357# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
21358
21359 my ( $i, $rtokens, $rtoken_type ) = @_;
21360 if ( $last_nonblank_token eq '{'
21361 && $last_nonblank_type eq $last_nonblank_token )
21362 {
21363
21364 # opening brace where a statement may appear is probably
21365 # a code block but might be and anonymous hash reference
21366 if ( $brace_type[$brace_depth] ) {
21367 return decide_if_code_block( $i, $rtokens, $rtoken_type );
21368 }
21369
21370 # cannot start a code block within an anonymous hash
21371 else {
21372 return "";
21373 }
21374 }
21375
21376 elsif ( $last_nonblank_token eq ';' ) {
21377
21378 # an opening brace where a statement may appear is probably
21379 # a code block but might be and anonymous hash reference
21380 return decide_if_code_block( $i, $rtokens, $rtoken_type );
21381 }
21382
21383 # handle case of '}{'
21384 elsif ($last_nonblank_token eq '}'
21385 && $last_nonblank_type eq $last_nonblank_token )
21386 {
21387
21388 # a } { situation ...
21389 # could be hash reference after code block..(blktype1.t)
21390 if ($last_nonblank_block_type) {
21391 return decide_if_code_block( $i, $rtokens, $rtoken_type );
21392 }
21393
21394 # must be a block if it follows a closing hash reference
21395 else {
21396 return $last_nonblank_token;
21397 }
21398 }
21399
21400 # NOTE: braces after type characters start code blocks, but for
21401 # simplicity these are not identified as such. See also
21402 # sub is_non_structural_brace.
21403 # elsif ( $last_nonblank_type eq 't' ) {
21404 # return $last_nonblank_token;
21405 # }
21406
21407 # brace after label:
21408 elsif ( $last_nonblank_type eq 'J' ) {
21409 return $last_nonblank_token;
21410 }
21411
21412# otherwise, look at previous token. This must be a code block if
21413# it follows any of these:
21414# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
21415 elsif ( $is_code_block_token{$last_nonblank_token} ) {
21416 return $last_nonblank_token;
21417 }
21418
21419 # or a sub definition
21420 elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
21421 && $last_nonblank_token =~ /^sub\b/ )
21422 {
21423 return $last_nonblank_token;
21424 }
21425
21426 # user-defined subs with block parameters (like grep/map/eval)
21427 elsif ( $last_nonblank_type eq 'G' ) {
21428 return $last_nonblank_token;
21429 }
21430
21431 # check bareword
21432 elsif ( $last_nonblank_type eq 'w' ) {
21433 return decide_if_code_block( $i, $rtokens, $rtoken_type );
21434 }
21435
21436 # anything else must be anonymous hash reference
21437 else {
21438 return "";
21439 }
21440}
21441
21442sub decide_if_code_block {
21443
21444 my ( $i, $rtokens, $rtoken_type ) = @_;
21445 my ( $next_nonblank_token, $i_next ) =
21446 find_next_nonblank_token( $i, $rtokens );
21447
21448 # we are at a '{' where a statement may appear.
21449 # We must decide if this brace starts an anonymous hash or a code
21450 # block.
21451 # return "" if anonymous hash, and $last_nonblank_token otherwise
21452
21453 # initialize to be code BLOCK
21454 my $code_block_type = $last_nonblank_token;
21455
21456 # Check for the common case of an empty anonymous hash reference:
21457 # Maybe something like sub { { } }
21458 if ( $next_nonblank_token eq '}' ) {
21459 $code_block_type = "";
21460 }
21461
21462 else {
21463
21464 # To guess if this '{' is an anonymous hash reference, look ahead
21465 # and test as follows:
21466 #
21467 # it is a hash reference if next come:
21468 # - a string or digit followed by a comma or =>
21469 # - bareword followed by =>
21470 # otherwise it is a code block
21471 #
21472 # Examples of anonymous hash ref:
21473 # {'aa',};
21474 # {1,2}
21475 #
21476 # Examples of code blocks:
21477 # {1; print "hello\n", 1;}
21478 # {$a,1};
21479
21480 # We are only going to look ahead one more (nonblank/comment) line.
21481 # Strange formatting could cause a bad guess, but that's unlikely.
21482 my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
21483 my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
21484 my ( $rpre_tokens, $rpre_types ) =
21485 peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
21486 # generous, and prevents
21487 # wasting lots of
21488 # time in mangled files
21489 if ( defined($rpre_types) && @$rpre_types ) {
21490 push @pre_types, @$rpre_types;
21491 push @pre_tokens, @$rpre_tokens;
21492 }
21493
21494 # put a sentinal token to simplify stopping the search
21495 push @pre_types, '}';
21496
21497 my $jbeg = 0;
21498 $jbeg = 1 if $pre_types[0] eq 'b';
21499
21500 # first look for one of these
21501 # - bareword
21502 # - bareword with leading -
21503 # - digit
21504 # - quoted string
21505 my $j = $jbeg;
21506 if ( $pre_types[$j] =~ /^[\'\"]/ ) {
21507
21508 # find the closing quote; don't worry about escapes
21509 my $quote_mark = $pre_types[$j];
21510 for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
21511 if ( $pre_types[$k] eq $quote_mark ) {
21512 $j = $k + 1;
21513 my $next = $pre_types[$j];
21514 last;
21515 }
21516 }
21517 }
21518 elsif ( $pre_types[$j] eq 'd' ) {
21519 $j++;
21520 }
21521 elsif ( $pre_types[$j] eq 'w' ) {
21522 unless ( $is_keyword{ $pre_tokens[$j] } ) {
21523 $j++;
21524 }
21525 }
21526 elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
21527 $j++;
21528 }
21529 if ( $j > $jbeg ) {
21530
21531 $j++ if $pre_types[$j] eq 'b';
21532
21533 # it's a hash ref if a comma or => follow next
21534 if ( $pre_types[$j] eq ','
21535 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
21536 {
21537 $code_block_type = "";
21538 }
21539 }
21540 }
21541
21542 return $code_block_type;
21543}
21544
21545sub unexpected {
21546
21547 # report unexpected token type and show where it is
21548 my ( $found, $expecting, $i_tok, $last_nonblank_i ) = @_;
21549 $unexpected_error_count++;
21550 if ( $unexpected_error_count <= MAX_NAG_MESSAGES ) {
21551 my $msg = "found $found where $expecting expected";
21552 my $pos = $$rpretoken_map[$i_tok];
21553 interrupt_logfile();
21554 my ( $offset, $numbered_line, $underline ) =
21555 make_numbered_line( $input_line_number, $input_line, $pos );
21556 $underline = write_on_underline( $underline, $pos - $offset, '^' );
21557
21558 my $trailer = "";
21559 if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
21560 my $pos_prev = $$rpretoken_map[$last_nonblank_i];
21561 my $num;
21562 if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
21563 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
21564 }
21565 else {
21566 $num = $pos - $pos_prev;
21567 }
21568 if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
21569
21570 $underline =
21571 write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
21572 $trailer = " (previous token underlined)";
21573 }
21574 warning( $numbered_line . "\n" );
21575 warning( $underline . "\n" );
21576 warning( $msg . $trailer . "\n" );
21577 resume_logfile();
21578 }
21579}
21580
21581sub indicate_error {
21582 my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
21583 interrupt_logfile();
21584 warning($msg);
21585 write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
21586 resume_logfile();
21587}
21588
21589sub write_error_indicator_pair {
21590 my ( $line_number, $input_line, $pos, $carrat ) = @_;
21591 my ( $offset, $numbered_line, $underline ) =
21592 make_numbered_line( $line_number, $input_line, $pos );
21593 $underline = write_on_underline( $underline, $pos - $offset, $carrat );
21594 warning( $numbered_line . "\n" );
21595 $underline =~ s/\s*$//;
21596 warning( $underline . "\n" );
21597}
21598
21599sub make_numbered_line {
21600
21601 # Given an input line, its line number, and a character position of
21602 # interest, create a string not longer than 80 characters of the form
21603 # $lineno: sub_string
21604 # such that the sub_string of $str contains the position of interest
21605 #
21606 # Here is an example of what we want, in this case we add trailing
21607 # '...' because the line is long.
21608 #
21609 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
21610 #
21611 # Here is another example, this time in which we used leading '...'
21612 # because of excessive length:
21613 #
21614 # 2: ... er of the World Wide Web Consortium's
21615 #
21616 # input parameters are:
21617 # $lineno = line number
21618 # $str = the text of the line
21619 # $pos = position of interest (the error) : 0 = first character
21620 #
21621 # We return :
21622 # - $offset = an offset which corrects the position in case we only
21623 # display part of a line, such that $pos-$offset is the effective
21624 # position from the start of the displayed line.
21625 # - $numbered_line = the numbered line as above,
21626 # - $underline = a blank 'underline' which is all spaces with the same
21627 # number of characters as the numbered line.
21628
21629 my ( $lineno, $str, $pos ) = @_;
21630 my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
21631 my $excess = length($str) - $offset - 68;
21632 my $numc = ( $excess > 0 ) ? 68 : undef;
21633
21634 if ( defined($numc) ) {
21635 if ( $offset == 0 ) {
21636 $str = substr( $str, $offset, $numc - 4 ) . " ...";
21637 }
21638 else {
21639 $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
21640 }
21641 }
21642 else {
21643
21644 if ( $offset == 0 ) {
21645 }
21646 else {
21647 $str = "... " . substr( $str, $offset + 4 );
21648 }
21649 }
21650
21651 my $numbered_line = sprintf( "%d: ", $lineno );
21652 $offset -= length($numbered_line);
21653 $numbered_line .= $str;
21654 my $underline = " " x length($numbered_line);
21655 return ( $offset, $numbered_line, $underline );
21656}
21657
21658sub write_on_underline {
21659
21660 # The "underline" is a string that shows where an error is; it starts
21661 # out as a string of blanks with the same length as the numbered line of
21662 # code above it, and we have to add marking to show where an error is.
21663 # In the example below, we want to write the string '--^' just below
21664 # the line of bad code:
21665 #
21666 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
21667 # ---^
21668 # We are given the current underline string, plus a position and a
21669 # string to write on it.
21670 #
21671 # In the above example, there will be 2 calls to do this:
21672 # First call: $pos=19, pos_chr=^
21673 # Second call: $pos=16, pos_chr=---
21674 #
21675 # This is a trivial thing to do with substr, but there is some
21676 # checking to do.
21677
21678 my ( $underline, $pos, $pos_chr ) = @_;
21679
21680 # check for error..shouldn't happen
21681 unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
21682 return $underline;
21683 }
21684 my $excess = length($pos_chr) + $pos - length($underline);
21685 if ( $excess > 0 ) {
21686 $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
21687 }
21688 substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
21689 return ($underline);
21690}
21691
21692sub is_non_structural_brace {
21693
21694 # Decide if a brace or bracket is structural or non-structural
21695 # by looking at the previous token and type
21696
21697 # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
21698 # Tentatively deactivated because it caused the wrong operator expectation
21699 # for this code:
21700 # $user = @vars[1] / 100;
21701 # Must update sub operator_expected before re-implementing.
21702 # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
21703 # return 0;
21704 # }
21705
21706 # NOTE: braces after type characters start code blocks, but for
21707 # simplicity these are not identified as such. See also
21708 # sub code_block_type
21709 # if ($last_nonblank_type eq 't') {return 0}
21710
21711 # otherwise, it is non-structural if it is decorated
21712 # by type information.
21713 # For example, the '{' here is non-structural: ${xxx}
21714 (
21715 $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
21716
21717 # or if we follow a hash or array closing curly brace or bracket
21718 # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
21719 # because the first '}' would have been given type 'R'
21720 || $last_nonblank_type =~ /^([R\]])$/
21721 );
21722}
21723
21724sub operator_expected {
21725
21726 # Many perl symbols have two or more meanings. For example, '<<'
21727 # can be a shift operator or a here-doc operator. The
21728 # interpretation of these symbols depends on the current state of
21729 # the tokenizer, which may either be expecting a term or an
21730 # operator. For this example, a << would be a shift if an operator
21731 # is expected, and a here-doc if a term is expected. This routine
21732 # is called to make this decision for any current token. It returns
21733 # one of three possible values:
21734 #
21735 # OPERATOR - operator expected (or at least, not a term)
21736 # UNKNOWN - can't tell
21737 # TERM - a term is expected (or at least, not an operator)
21738 #
21739 # The decision is based on what has been seen so far. This
21740 # information is stored in the "$last_nonblank_type" and
21741 # "$last_nonblank_token" variables. For example, if the
21742 # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
21743 # if $last_nonblank_type is 'n' (numeric), we are expecting an
21744 # OPERATOR.
21745 #
21746 # If a UNKNOWN is returned, the calling routine must guess. A major
21747 # goal of this tokenizer is to minimize the possiblity of returning
21748 # UNKNOWN, because a wrong guess can spoil the formatting of a
21749 # script.
21750 #
21751 # adding NEW_TOKENS: it is critically important that this routine be
21752 # updated to allow it to determine if an operator or term is to be
21753 # expected after the new token. Doing this simply involves adding
21754 # the new token character to one of the regexes in this routine or
21755 # to one of the hash lists
21756 # that it uses, which are initialized in the BEGIN section.
21757
21758 my ( $prev_type, $tok, $next_type ) = @_;
21759 my $op_expected = UNKNOWN;
21760
21761# Note: function prototype is available for token type 'U' for future
21762# program development. It contains the leading and trailing parens,
21763# and no blanks. It might be used to eliminate token type 'C', for
21764# example (prototype = '()'). Thus:
21765# if ($last_nonblank_type eq 'U') {
21766# print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
21767# }
21768
21769 # A possible filehandle (or object) requires some care...
21770 if ( $last_nonblank_type eq 'Z' ) {
21771
21772 # angle.t
21773 if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
21774 $op_expected = UNKNOWN;
21775 }
21776
21777 # For possible file handle like "$a", Perl uses weird parsing rules.
21778 # For example:
21779 # print $a/2,"/hi"; - division
21780 # print $a / 2,"/hi"; - division
21781 # print $a/ 2,"/hi"; - division
21782 # print $a /2,"/hi"; - pattern (and error)!
21783 elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
21784 $op_expected = TERM;
21785 }
21786
21787 # Note when an operation is being done where a
21788 # filehandle might be expected, since a change in whitespace
21789 # could change the interpretation of the statement.
21790 else {
21791 if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
21792 complain("operator in print statement not recommended\n");
21793 $op_expected = OPERATOR;
21794 }
21795 }
21796 }
21797
21798 # handle something after 'do' and 'eval'
21799 elsif ( $is_block_operator{$last_nonblank_token} ) {
21800
21801 # something like $a = eval "expression";
21802 # ^
21803 if ( $last_nonblank_type eq 'k' ) {
21804 $op_expected = TERM; # expression or list mode following keyword
21805 }
21806
21807 # something like $a = do { BLOCK } / 2;
21808 # ^
21809 else {
21810 $op_expected = OPERATOR; # block mode following }
21811 }
21812 }
21813
21814 # handle bare word..
21815 elsif ( $last_nonblank_type eq 'w' ) {
21816
21817 # unfortunately, we can't tell what type of token to expect next
21818 # after most bare words
21819 $op_expected = UNKNOWN;
21820 }
21821
21822 # operator, but not term possible after these types
21823 # Note: moved ')' from type to token because parens in list context
21824 # get marked as '{' '}' now. This is a minor glitch in the following:
21825 # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
21826 #
21827 elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
21828 || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
21829 {
21830 $op_expected = OPERATOR;
21831
21832 # in a 'use' statement, numbers and v-strings are not really
21833 # numbers, so to avoid incorrect error messages, we will
21834 # mark them as unknown for now (use.t)
21835 if ( ( $statement_type eq 'use' )
21836 && ( $last_nonblank_type =~ /^[nv]$/ ) )
21837 {
21838 $op_expected = UNKNOWN;
21839 }
21840 }
21841
21842 # no operator after many keywords, such as "die", "warn", etc
21843 elsif ( $expecting_term_token{$last_nonblank_token} ) {
21844 $op_expected = TERM;
21845 }
21846
21847 # no operator after things like + - ** (i.e., other operators)
21848 elsif ( $expecting_term_types{$last_nonblank_type} ) {
21849 $op_expected = TERM;
21850 }
21851
21852 # a few operators, like "time", have an empty prototype () and so
21853 # take no parameters but produce a value to operate on
21854 elsif ( $expecting_operator_token{$last_nonblank_token} ) {
21855 $op_expected = OPERATOR;
21856 }
21857
21858 # post-increment and decrement produce values to be operated on
21859 elsif ( $expecting_operator_types{$last_nonblank_type} ) {
21860 $op_expected = OPERATOR;
21861 }
21862
21863 # no value to operate on after sub block
21864 elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
21865
21866 # a right brace here indicates the end of a simple block.
21867 # all non-structural right braces have type 'R'
21868 # all braces associated with block operator keywords have been given those
21869 # keywords as "last_nonblank_token" and caught above.
21870 # (This statement is order dependent, and must come after checking
21871 # $last_nonblank_token).
21872 elsif ( $last_nonblank_type eq '}' ) {
21873 $op_expected = TERM;
21874 }
21875
21876 # something else..what did I forget?
21877 else {
21878
21879 # collecting diagnostics on unknown operator types..see what was missed
21880 $op_expected = UNKNOWN;
21881 write_diagnostics(
21882"OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n"
21883 );
21884 }
21885
21886 TOKENIZER_DEBUG_FLAG_EXPECT && do {
21887 print
21888"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
21889 };
21890 return $op_expected;
21891}
21892
21893# The following routines keep track of nesting depths of the nesting
21894# types, ( [ { and ?. This is necessary for determining the indentation
21895# level, and also for debugging programs. Not only do they keep track of
21896# nesting depths of the individual brace types, but they check that each
21897# of the other brace types is balanced within matching pairs. For
21898# example, if the program sees this sequence:
21899#
21900# { ( ( ) }
21901#
21902# then it can determine that there is an extra left paren somewhere
21903# between the { and the }. And so on with every other possible
21904# combination of outer and inner brace types. For another
21905# example:
21906#
21907# ( [ ..... ] ] )
21908#
21909# which has an extra ] within the parens.
21910#
21911# The brace types have indexes 0 .. 3 which are indexes into
21912# the matrices.
21913#
21914# The pair ? : are treated as just another nesting type, with ? acting
21915# as the opening brace and : acting as the closing brace.
21916#
21917# The matrix
21918#
21919# $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
21920#
21921# saves the nesting depth of brace type $b (where $b is either of the other
21922# nesting types) when brace type $a enters a new depth. When this depth
21923# decreases, a check is made that the current depth of brace types $b is
21924# unchanged, or otherwise there must have been an error. This can
21925# be very useful for localizing errors, particularly when perl runs to
21926# the end of a large file (such as this one) and announces that there
21927# is a problem somewhere.
21928#
21929# A numerical sequence number is maintained for every nesting type,
21930# so that each matching pair can be uniquely identified in a simple
21931# way.
21932
21933sub increase_nesting_depth {
21934 my ( $a, $i_tok ) = @_;
21935 my $b;
21936 $current_depth[$a]++;
21937
21938 # Sequence numbers increment by number of items. This keeps
21939 # a unique set of numbers but still allows the relative location
21940 # of any type to be determined.
21941 $nesting_sequence_number[$a] += scalar(@closing_brace_names);
21942 my $seqno = $nesting_sequence_number[$a];
21943 $current_sequence_number[$a][ $current_depth[$a] ] = $seqno;
21944
21945 my $pos = $$rpretoken_map[$i_tok];
21946 $starting_line_of_current_depth[$a][ $current_depth[$a] ] =
21947 [ $input_line_number, $input_line, $pos ];
21948
21949 for $b ( 0 .. $#closing_brace_names ) {
21950 next if ( $b == $a );
21951 $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
21952 }
21953 return $seqno;
21954}
21955
21956sub decrease_nesting_depth {
21957
21958 my ( $a, $i_tok ) = @_;
21959 my $pos = $$rpretoken_map[$i_tok];
21960 my $b;
21961 my $seqno = 0;
21962
21963 if ( $current_depth[$a] > 0 ) {
21964
21965 $seqno = $current_sequence_number[$a][ $current_depth[$a] ];
21966
21967 # check that any brace types $b contained within are balanced
21968 for $b ( 0 .. $#closing_brace_names ) {
21969 next if ( $b == $a );
21970
21971 unless ( $depth_array[$a][$b][ $current_depth[$a] ] ==
21972 $current_depth[$b] )
21973 {
21974 my $diff = $current_depth[$b] -
21975 $depth_array[$a][$b][ $current_depth[$a] ];
21976
21977 # don't whine too many times
21978 my $saw_brace_error = get_saw_brace_error();
21979 if (
21980 $saw_brace_error <= MAX_NAG_MESSAGES
21981
21982 # if too many closing types have occured, we probably
21983 # already caught this error
21984 && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
21985 )
21986 {
21987 interrupt_logfile();
21988 my $rsl =
21989 $starting_line_of_current_depth[$a][ $current_depth[$a] ];
21990 my $sl = $$rsl[0];
21991 my $rel = [ $input_line_number, $input_line, $pos ];
21992 my $el = $$rel[0];
21993 my ($ess);
21994
21995 if ( $diff == 1 || $diff == -1 ) {
21996 $ess = '';
21997 }
21998 else {
21999 $ess = 's';
22000 }
22001 my $bname =
22002 ( $diff > 0 )
22003 ? $opening_brace_names[$b]
22004 : $closing_brace_names[$b];
22005 write_error_indicator_pair( @$rsl, '^' );
22006 my $msg = <<"EOM";
22007Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el
22008EOM
22009
22010 if ( $diff > 0 ) {
22011 my $rml =
22012 $starting_line_of_current_depth[$b]
22013 [ $current_depth[$b] ];
22014 my $ml = $$rml[0];
22015 $msg .=
22016" The most recent un-matched $bname is on line $ml\n";
22017 write_error_indicator_pair( @$rml, '^' );
22018 }
22019 write_error_indicator_pair( @$rel, '^' );
22020 warning($msg);
22021 resume_logfile();
22022 }
22023 increment_brace_error();
22024 }
22025 }
22026 $current_depth[$a]--;
22027 }
22028 else {
22029
22030 my $saw_brace_error = get_saw_brace_error();
22031 if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
22032 my $msg = <<"EOM";
22033There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number
22034EOM
22035 indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
22036 }
22037 increment_brace_error();
22038 }
22039 return $seqno;
22040}
22041
22042sub check_final_nesting_depths {
22043 my ($a);
22044
22045 for $a ( 0 .. $#closing_brace_names ) {
22046
22047 if ( $current_depth[$a] ) {
22048 my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ];
22049 my $sl = $$rsl[0];
22050 my $msg = <<"EOM";
22051Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a]
22052The most recent un-matched $opening_brace_names[$a] is on line $sl
22053EOM
22054 indicate_error( $msg, @$rsl, '^' );
22055 increment_brace_error();
22056 }
22057 }
22058}
22059
22060sub numerator_expected {
22061
22062 # this is a filter for a possible numerator, in support of guessing
22063 # for the / pattern delimiter token.
22064 # returns -
22065 # 1 - yes
22066 # 0 - can't tell
22067 # -1 - no
22068 # Note: I am using the convention that variables ending in
22069 # _expected have these 3 possible values.
22070 my ( $i, $rtokens ) = @_;
22071 my $next_token = $$rtokens[ $i + 1 ];
22072 if ( $next_token eq '=' ) { $i++; } # handle /=
22073 my ( $next_nonblank_token, $i_next ) =
22074 find_next_nonblank_token( $i, $rtokens );
22075
22076 if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
22077 1;
22078 }
22079 else {
22080
22081 if ( $next_nonblank_token =~ /^\s*$/ ) {
22082 0;
22083 }
22084 else {
22085 -1;
22086 }
22087 }
22088}
22089
22090sub pattern_expected {
22091
22092 # This is the start of a filter for a possible pattern.
22093 # It looks at the token after a possbible pattern and tries to
22094 # determine if that token could end a pattern.
22095 # returns -
22096 # 1 - yes
22097 # 0 - can't tell
22098 # -1 - no
22099 my ( $i, $rtokens ) = @_;
22100 my $next_token = $$rtokens[ $i + 1 ];
22101 if ( $next_token =~ /^[cgimosx]/ ) { $i++; } # skip possible modifier
22102 my ( $next_nonblank_token, $i_next ) =
22103 find_next_nonblank_token( $i, $rtokens );
22104
22105 # list of tokens which may follow a pattern
22106 # (can probably be expanded)
22107 if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
22108 {
22109 1;
22110 }
22111 else {
22112
22113 if ( $next_nonblank_token =~ /^\s*$/ ) {
22114 0;
22115 }
22116 else {
22117 -1;
22118 }
22119 }
22120}
22121
22122sub find_next_nonblank_token_on_this_line {
22123 my ( $i, $rtokens ) = @_;
22124 my $next_nonblank_token;
22125
22126 if ( $i < $max_token_index ) {
22127 $next_nonblank_token = $$rtokens[ ++$i ];
22128
22129 if ( $next_nonblank_token =~ /^\s*$/ ) {
22130
22131 if ( $i < $max_token_index ) {
22132 $next_nonblank_token = $$rtokens[ ++$i ];
22133 }
22134 }
22135 }
22136 else {
22137 $next_nonblank_token = "";
22138 }
22139 return ( $next_nonblank_token, $i );
22140}
22141
22142sub find_next_nonblank_token {
22143 my ( $i, $rtokens ) = @_;
22144
22145 if ( $i >= $max_token_index ) {
22146
22147 if ( !$peeked_ahead ) {
22148 $peeked_ahead = 1;
22149 $rtokens = peek_ahead_for_nonblank_token($rtokens);
22150 }
22151 }
22152 my $next_nonblank_token = $$rtokens[ ++$i ];
22153
22154 if ( $next_nonblank_token =~ /^\s*$/ ) {
22155 $next_nonblank_token = $$rtokens[ ++$i ];
22156 }
22157 return ( $next_nonblank_token, $i );
22158}
22159
22160sub peek_ahead_for_n_nonblank_pre_tokens {
22161
22162 # returns next n pretokens if they exist
22163 # returns undef's if hits eof without seeing any pretokens
22164 my $max_pretokens = shift;
22165 my $line;
22166 my $i = 0;
22167 my ( $rpre_tokens, $rmap, $rpre_types );
22168
22169 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
22170 {
22171 $line =~ s/^\s*//; # trim leading blanks
22172 next if ( length($line) <= 0 ); # skip blank
22173 next if ( $line =~ /^#/ ); # skip comment
22174 ( $rpre_tokens, $rmap, $rpre_types ) =
22175 pre_tokenize( $line, $max_pretokens );
22176 last;
22177 }
22178 return ( $rpre_tokens, $rpre_types );
22179}
22180
22181# look ahead for next non-blank, non-comment line of code
22182sub peek_ahead_for_nonblank_token {
22183 my $rtokens = shift;
22184 my $line;
22185 my $i = 0;
22186
22187 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
22188 {
22189 $line =~ s/^\s*//; # trim leading blanks
22190 next if ( length($line) <= 0 ); # skip blank
22191 next if ( $line =~ /^#/ ); # skip comment
22192 my ( $rtok, $rmap, $rtype ) =
22193 pre_tokenize( $line, 2 ); # only need 2 pre-tokens
22194 my $j = $max_token_index + 1;
22195 my $tok;
22196
22197 foreach $tok (@$rtok) {
22198 last if ( $tok =~ "\n" );
22199 $$rtokens[ ++$j ] = $tok;
22200 }
22201 last;
22202 }
22203 return $rtokens;
22204}
22205
22206sub pre_tokenize {
22207
22208 # Break a string, $str, into a sequence of preliminary tokens. We
22209 # are interested in these types of tokens:
22210 # words (type='w'), example: 'max_tokens_wanted'
22211 # digits (type = 'd'), example: '0755'
22212 # whitespace (type = 'b'), example: ' '
22213 # any other single character (i.e. punct; type = the character itself).
22214 # We cannot do better than this yet because we might be in a quoted
22215 # string or pattern. Caller sets $max_tokens_wanted to 0 to get all
22216 # tokens.
22217 my ( $str, $max_tokens_wanted ) = @_;
22218
22219 # we return references to these 3 arrays:
22220 my @tokens = (); # array of the tokens themselves
22221 my @token_map = (0); # string position of start of each token
22222 my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
22223
22224 do {
22225
22226 # whitespace
22227 if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
22228
22229 # numbers
22230 # note that this must come before words!
22231 elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
22232
22233 # words
22234 elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
22235
22236 # single-character punctuation
22237 elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
22238
22239 # that's all..
22240 else {
22241 return ( \@tokens, \@token_map, \@type );
22242 }
22243
22244 push @tokens, $1;
22245 push @token_map, pos($str);
22246
22247 } while ( --$max_tokens_wanted != 0 );
22248
22249 return ( \@tokens, \@token_map, \@type );
22250}
22251
22252sub show_tokens {
22253
22254 # this is an old debug routine
22255 my ( $rtokens, $rtoken_map ) = @_;
22256 my $num = scalar(@$rtokens);
22257 my $i;
22258
22259 for ( $i = 0 ; $i < $num ; $i++ ) {
22260 my $len = length( $$rtokens[$i] );
22261 print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
22262 }
22263}
22264
22265sub find_angle_operator_termination {
22266
22267 # We are looking at a '<' and want to know if it is an angle operator.
22268 # We are to return:
22269 # $i = pretoken index of ending '>' if found, current $i otherwise
22270 # $type = 'Q' if found, '>' otherwise
22271 my ( $input_line, $i_beg, $rtoken_map, $expecting ) = @_;
22272 my $i = $i_beg;
22273 my $type = '<';
22274 pos($input_line) = 1 + $$rtoken_map[$i];
22275
22276 my $filter;
22277
22278 # we just have to find the next '>' if a term is expected
22279 if ( $expecting == TERM ) { $filter = '[\>]' }
22280
22281 # we have to guess if we don't know what is expected
22282 elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
22283
22284 # shouldn't happen - we shouldn't be here if operator is expected
22285 else { warning("Program Bug in find_angle_operator_termination\n") }
22286
22287 # To illustrate what we might be looking at, in case we are
22288 # guessing, here are some examples of valid angle operators
22289 # (or file globs):
22290 # <tmp_imp/*>
22291 # <FH>
22292 # <$fh>
22293 # <*.c *.h>
22294 # <_>
22295 # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
22296 # <${PREFIX}*img*.$IMAGE_TYPE>
22297 # <img*.$IMAGE_TYPE>
22298 # <Timg*.$IMAGE_TYPE>
22299 # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
22300 #
22301 # Here are some examples of lines which do not have angle operators:
22302 # return undef unless $self->[2]++ < $#{$self->[1]};
22303 # < 2 || @$t >
22304 #
22305 # the following line from dlister.pl caused trouble:
22306 # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
22307 #
22308 # If the '<' starts an angle operator, it must end on this line and
22309 # it must not have certain characters like ';' and '=' in it. I use
22310 # this to limit the testing. This filter should be improved if
22311 # possible.
22312
22313 if ( $input_line =~ /($filter)/g ) {
22314
22315 if ( $1 eq '>' ) {
22316
22317 # We MAY have found an angle operator termination if we get
22318 # here, but we need to do more to be sure we haven't been
22319 # fooled.
22320 my $pos = pos($input_line);
22321
22322 my $pos_beg = $$rtoken_map[$i];
22323 my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
22324
22325 ######################################debug#####
22326 #write_diagnostics( "ANGLE? :$str\n");
22327 #print "ANGLE: found $1 at pos=$pos\n";
22328 ######################################debug#####
22329 $type = 'Q';
22330 my $error;
22331 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
22332
22333 # It may be possible that a quote ends midway in a pretoken.
22334 # If this happens, it may be necessary to split the pretoken.
22335 if ($error) {
22336 warning(
22337 "Possible tokinization error..please check this line\n");
22338 report_possible_bug();
22339 }
22340
22341 # Now let's see where we stand....
22342 # OK if math op not possible
22343 if ( $expecting == TERM ) {
22344 }
22345
22346 # OK if there are no more than 2 pre-tokens inside
22347 # (not possible to write 2 token math between < and >)
22348 # This catches most common cases
22349 elsif ( $i <= $i_beg + 3 ) {
22350 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
22351 }
22352
22353 # Not sure..
22354 else {
22355
22356 # Let's try a Brace Test: any braces inside must balance
22357 my $br = 0;
22358 while ( $str =~ /\{/g ) { $br++ }
22359 while ( $str =~ /\}/g ) { $br-- }
22360 my $sb = 0;
22361 while ( $str =~ /\[/g ) { $sb++ }
22362 while ( $str =~ /\]/g ) { $sb-- }
22363 my $pr = 0;
22364 while ( $str =~ /\(/g ) { $pr++ }
22365 while ( $str =~ /\)/g ) { $pr-- }
22366
22367 # if braces do not balance - not angle operator
22368 if ( $br || $sb || $pr ) {
22369 $i = $i_beg;
22370 $type = '<';
22371 write_diagnostics(
22372 "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
22373 }
22374
22375 # we should keep doing more checks here...to be continued
22376 # Tentatively accepting this as a valid angle operator.
22377 # There are lots more things that can be checked.
22378 else {
22379 write_diagnostics(
22380 "ANGLE-Guessing yes: $str expecting=$expecting\n");
22381 write_logfile_entry("Guessing angle operator here: $str\n");
22382 }
22383 }
22384 }
22385
22386 # didn't find ending >
22387 else {
22388 if ( $expecting == TERM ) {
22389 warning("No ending > for angle operator\n");
22390 }
22391 }
22392 }
22393 return ( $i, $type );
22394}
22395
22396sub inverse_pretoken_map {
22397
22398 # Starting with the current pre_token index $i, scan forward until
22399 # finding the index of the next pre_token whose position is $pos.
22400 my ( $i, $pos, $rtoken_map ) = @_;
22401 my $error = 0;
22402
22403 while ( ++$i <= $max_token_index ) {
22404
22405 if ( $pos <= $$rtoken_map[$i] ) {
22406
22407 # Let the calling routine handle errors in which we do not
22408 # land on a pre-token boundary. It can happen by running
22409 # perltidy on some non-perl scripts, for example.
22410 if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
22411 $i--;
22412 last;
22413 }
22414 }
22415 return ( $i, $error );
22416}
22417
22418sub guess_if_pattern_or_conditional {
22419
22420 # this routine is called when we have encountered a ? following an
22421 # unknown bareword, and we must decide if it starts a pattern or not
22422 # input parameters:
22423 # $i - token index of the ? starting possible pattern
22424 # output parameters:
22425 # $is_pattern = 0 if probably not pattern, =1 if probably a pattern
22426 # msg = a warning or diagnostic message
22427 my ( $i, $rtokens, $rtoken_map ) = @_;
22428 my $is_pattern = 0;
22429 my $msg = "guessing that ? after $last_nonblank_token starts a ";
22430
22431 if ( $i >= $max_token_index ) {
22432 $msg .= "conditional (no end to pattern found on the line)\n";
22433 }
22434 else {
22435 my $ibeg = $i;
22436 $i = $ibeg + 1;
22437 my $next_token = $$rtokens[$i]; # first token after ?
22438
22439 # look for a possible ending ? on this line..
22440 my $in_quote = 1;
22441 my $quote_depth = 0;
22442 my $quote_character = '';
22443 my $quote_pos = 0;
22444 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
22445 follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
22446 $quote_pos, $quote_depth );
22447
22448 if ($in_quote) {
22449
22450 # we didn't find an ending ? on this line,
22451 # so we bias towards conditional
22452 $is_pattern = 0;
22453 $msg .= "conditional (no ending ? on this line)\n";
22454
22455 # we found an ending ?, so we bias towards a pattern
22456 }
22457 else {
22458
22459 if ( pattern_expected( $i, $rtokens ) >= 0 ) {
22460 $is_pattern = 1;
22461 $msg .= "pattern (found ending ? and pattern expected)\n";
22462 }
22463 else {
22464 $msg .= "pattern (uncertain, but found ending ?)\n";
22465 }
22466 }
22467 }
22468 return ( $is_pattern, $msg );
22469}
22470
22471sub guess_if_pattern_or_division {
22472
22473 # this routine is called when we have encountered a / following an
22474 # unknown bareword, and we must decide if it starts a pattern or is a
22475 # division
22476 # input parameters:
22477 # $i - token index of the / starting possible pattern
22478 # output parameters:
22479 # $is_pattern = 0 if probably division, =1 if probably a pattern
22480 # msg = a warning or diagnostic message
22481 my ( $i, $rtokens, $rtoken_map ) = @_;
22482 my $is_pattern = 0;
22483 my $msg = "guessing that / after $last_nonblank_token starts a ";
22484
22485 if ( $i >= $max_token_index ) {
22486 "division (no end to pattern found on the line)\n";
22487 }
22488 else {
22489 my $ibeg = $i;
22490 my $divide_expected = numerator_expected( $i, $rtokens );
22491 $i = $ibeg + 1;
22492 my $next_token = $$rtokens[$i]; # first token after slash
22493
22494 # look for a possible ending / on this line..
22495 my $in_quote = 1;
22496 my $quote_depth = 0;
22497 my $quote_character = '';
22498 my $quote_pos = 0;
22499 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
22500 follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
22501 $quote_pos, $quote_depth );
22502
22503 if ($in_quote) {
22504
22505 # we didn't find an ending / on this line,
22506 # so we bias towards division
22507 if ( $divide_expected >= 0 ) {
22508 $is_pattern = 0;
22509 $msg .= "division (no ending / on this line)\n";
22510 }
22511 else {
22512 $msg = "multi-line pattern (division not possible)\n";
22513 $is_pattern = 1;
22514 }
22515
22516 }
22517
22518 # we found an ending /, so we bias towards a pattern
22519 else {
22520
22521 if ( pattern_expected( $i, $rtokens ) >= 0 ) {
22522
22523 if ( $divide_expected >= 0 ) {
22524
22525 if ( $i - $ibeg > 60 ) {
22526 $msg .= "division (matching / too distant)\n";
22527 $is_pattern = 0;
22528 }
22529 else {
22530 $msg .= "pattern (but division possible too)\n";
22531 $is_pattern = 1;
22532 }
22533 }
22534 else {
22535 $is_pattern = 1;
22536 $msg .= "pattern (division not possible)\n";
22537 }
22538 }
22539 else {
22540
22541 if ( $divide_expected >= 0 ) {
22542 $is_pattern = 0;
22543 $msg .= "division (pattern not possible)\n";
22544 }
22545 else {
22546 $is_pattern = 1;
22547 $msg .=
22548 "pattern (uncertain, but division would not work here)\n";
22549 }
22550 }
22551 }
22552 }
22553 return ( $is_pattern, $msg );
22554}
22555
22556sub find_here_doc {
22557
22558 # find the target of a here document, if any
22559 # input parameters:
22560 # $i - token index of the second < of <<
22561 # ($i must be less than the last token index if this is called)
22562 # output parameters:
22563 # $found_target = 0 didn't find target; =1 found target
22564 # HERE_TARGET - the target string (may be empty string)
22565 # $i - unchanged if not here doc,
22566 # or index of the last token of the here target
22567 my ( $expecting, $i, $rtokens, $rtoken_map ) = @_;
22568 my $ibeg = $i;
22569 my $found_target = 0;
22570 my $here_doc_target = '';
22571 my $here_quote_character = '';
22572 my ( $next_nonblank_token, $i_next_nonblank, $next_token );
22573 $next_token = $$rtokens[ $i + 1 ];
22574
22575 # perl allows a backslash before the target string (heredoc.t)
22576 my $backslash = 0;
22577 if ( $next_token eq '\\' ) {
22578 $backslash = 1;
22579 $next_token = $$rtokens[ $i + 2 ];
22580 }
22581
22582 ( $next_nonblank_token, $i_next_nonblank ) =
22583 find_next_nonblank_token_on_this_line( $i, $rtokens );
22584
22585 if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
22586
22587 my $in_quote = 1;
22588 my $quote_depth = 0;
22589 my $quote_pos = 0;
22590
22591 ( $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth ) =
22592 follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
22593 $here_quote_character, $quote_pos, $quote_depth );
22594
22595 if ($in_quote) { # didn't find end of quote, so no target found
22596 $i = $ibeg;
22597 }
22598 else { # found ending quote
22599 my $j;
22600 $found_target = 1;
22601
22602 my $tokj;
22603 for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
22604 $tokj = $$rtokens[$j];
22605
22606 # we have to remove any backslash before the quote character
22607 # so that the here-doc-target exactly matches this string
22608 next
22609 if ( $tokj eq "\\"
22610 && $j < $i - 1
22611 && $$rtokens[ $j + 1 ] eq $here_quote_character );
22612 $here_doc_target .= $tokj;
22613 }
22614 }
22615 }
22616
22617 elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
22618 $found_target = 1;
22619 write_logfile_entry(
22620 "found blank here-target after <<; suggest using \"\"\n");
22621 $i = $ibeg;
22622 }
22623 elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
22624
22625 my $here_doc_expected;
22626 if ( $expecting == UNKNOWN ) {
22627 $here_doc_expected = guess_if_here_doc($next_token);
22628 }
22629 else {
22630 $here_doc_expected = 1;
22631 }
22632
22633 if ($here_doc_expected) {
22634 $found_target = 1;
22635 $here_doc_target = $next_token;
22636 $i = $ibeg + 1;
22637 }
22638
22639 }
22640 else {
22641
22642 if ( $expecting == TERM ) {
22643 $found_target = 1;
22644 write_logfile_entry("Note: bare here-doc operator <<\n");
22645 }
22646 else {
22647 $i = $ibeg;
22648 }
22649 }
22650
22651 # patch to neglect any prepended backslash
22652 if ( $found_target && $backslash ) { $i++ }
22653
22654 return ( $found_target, $here_doc_target, $here_quote_character, $i );
22655}
22656
22657# try to resolve here-doc vs. shift by looking ahead for
22658# non-code or the end token (currently only looks for end token)
22659# returns 1 if it is probably a here doc, 0 if not
22660sub guess_if_here_doc {
22661
22662 # This is how many lines we will search for a target as part of the
22663 # guessing strategy. It is a constant because there is probably
22664 # little reason to change it.
22665 use constant HERE_DOC_WINDOW => 40;
22666
22667 my $next_token = shift;
22668 my $here_doc_expected = 0;
22669 my $line;
22670 my $k = 0;
22671 my $msg = "checking <<";
22672
22673 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
22674 {
22675 chomp $line;
22676
22677 if ( $line =~ /^$next_token$/ ) {
22678 $msg .= " -- found target $next_token ahead $k lines\n";
22679 $here_doc_expected = 1; # got it
22680 last;
22681 }
22682 last if ( $k >= HERE_DOC_WINDOW );
22683 }
22684
22685 unless ($here_doc_expected) {
22686
22687 if ( !defined($line) ) {
22688 $here_doc_expected = -1; # hit eof without seeing target
22689 $msg .= " -- must be shift; target $next_token not in file\n";
22690
22691 }
22692 else { # still unsure..taking a wild guess
22693
22694 if ( !$is_constant{$current_package}{$next_token} ) {
22695 $here_doc_expected = 1;
22696 $msg .=
22697 " -- guessing it's a here-doc ($next_token not a constant)\n";
22698 }
22699 else {
22700 $msg .=
22701 " -- guessing it's a shift ($next_token is a constant)\n";
22702 }
22703 }
22704 }
22705 write_logfile_entry($msg);
22706 return $here_doc_expected;
22707}
22708
22709sub do_quote {
22710
22711 # follow (or continue following) quoted string or pattern
22712 # $in_quote return code:
22713 # 0 - ok, found end
22714 # 1 - still must find end of quote whose target is $quote_character
22715 # 2 - still looking for end of first of two quotes
22716 my ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, $rtokens,
22717 $rtoken_map )
22718 = @_;
22719
22720 if ( $in_quote == 2 ) { # two quotes/patterns to follow
22721 my $ibeg = $i;
22722 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
22723 follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
22724 $quote_pos, $quote_depth );
22725
22726 if ( $in_quote == 1 ) {
22727 if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
22728 $quote_character = '';
22729 }
22730 }
22731
22732 if ( $in_quote == 1 ) { # one (more) quote to follow
22733 my $ibeg = $i;
22734 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
22735 follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
22736 $quote_pos, $quote_depth );
22737 }
22738 return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth );
22739}
22740
22741sub scan_number_do {
22742
22743 # scan a number in any of the formats that Perl accepts
22744 # Underbars (_) are allowed in decimal numbers.
22745 # input parameters -
22746 # $input_line - the string to scan
22747 # $i - pre_token index to start scanning
22748 # $rtoken_map - reference to the pre_token map giving starting
22749 # character position in $input_line of token $i
22750 # output parameters -
22751 # $i - last pre_token index of the number just scanned
22752 # number - the number (characters); or undef if not a number
22753
22754 my ( $input_line, $i, $rtoken_map, $input_type ) = @_;
22755 my $pos_beg = $$rtoken_map[$i];
22756 my $pos;
22757 my $i_begin = $i;
22758 my $number = undef;
22759 my $type = $input_type;
22760
22761 my $first_char = substr( $input_line, $pos_beg, 1 );
22762
22763 # Look for bad starting characters; Shouldn't happen..
22764 if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
22765 warning("Program bug - scan_number given character $first_char\n");
22766 report_definite_bug();
22767 return ( $i, $type, $number );
22768 }
22769
22770 # handle v-string without leading 'v' character ('Two Dot' rule)
22771 # (vstring.t)
22772 pos($input_line) = $pos_beg;
22773 if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
22774 $pos = pos($input_line);
22775 my $numc = $pos - $pos_beg;
22776 $number = substr( $input_line, $pos_beg, $numc );
22777 $type = 'v';
22778 unless ($saw_v_string) { report_v_string($number) }
22779 }
22780
22781 # handle octal, hex, binary
22782 if ( !defined($number) ) {
22783 pos($input_line) = $pos_beg;
22784 if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
22785 {
22786 $pos = pos($input_line);
22787 my $numc = $pos - $pos_beg;
22788 $number = substr( $input_line, $pos_beg, $numc );
22789 $type = 'n';
22790 }
22791 }
22792
22793 # handle decimal
22794 if ( !defined($number) ) {
22795 pos($input_line) = $pos_beg;
22796
22797 if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
22798 $pos = pos($input_line);
22799
22800 # watch out for things like 0..40 which would give 0. by this;
22801 if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
22802 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
22803 {
22804 $pos--;
22805 }
22806 my $numc = $pos - $pos_beg;
22807 $number = substr( $input_line, $pos_beg, $numc );
22808 $type = 'n';
22809 }
22810 }
22811
22812 # filter out non-numbers like e + - . e2 .e3 +e6
22813 # the rule: at least one digit, and any 'e' must be preceded by a digit
22814 if (
22815 $number !~ /\d/ # no digits
22816 || ( $number =~ /^(.*)[eE]/
22817 && $1 !~ /\d/ ) # or no digits before the 'e'
22818 )
22819 {
22820 $number = undef;
22821 $type = $input_type;
22822 return ( $i, $type, $number );
22823 }
22824
22825 # Found a number; now we must convert back from character position
22826 # to pre_token index. An error here implies user syntax error.
22827 # An example would be an invalid octal number like '009'.
22828 my $error;
22829 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
22830 if ($error) { warning("Possibly invalid number\n") }
22831
22832 return ( $i, $type, $number );
22833}
22834
22835sub scan_bare_identifier_do {
22836
22837 # this routine is called to scan a token starting with an alphanumeric
22838 # variable or package separator, :: or '.
22839
22840 my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map ) = @_;
22841 my $i_begin = $i;
22842 my $package = undef;
22843
22844 my $i_beg = $i;
22845
22846 # we have to back up one pretoken at a :: since each : is one pretoken
22847 if ( $tok eq '::' ) { $i_beg-- }
22848 if ( $tok eq '->' ) { $i_beg-- }
22849 my $pos_beg = $$rtoken_map[$i_beg];
22850 pos($input_line) = $pos_beg;
22851
22852 # Examples:
22853 # A::B::C
22854 # A::
22855 # ::A
22856 # A'B
22857 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
22858
22859 my $pos = pos($input_line);
22860 my $numc = $pos - $pos_beg;
22861 $tok = substr( $input_line, $pos_beg, $numc );
22862
22863 # type 'w' includes anything without leading type info
22864 # ($,%,@,*) including something like abc::def::ghi
22865 $type = 'w';
22866
22867 my $sub_name = "";
22868 if ( defined($2) ) { $sub_name = $2; }
22869 if ( defined($1) ) {
22870 $package = $1;
22871
22872 # patch: don't allow isolated package name which just ends
22873 # in the old style package separator (single quote). Example:
22874 # use CGI':all';
22875 if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
22876 $pos--;
22877 }
22878
22879 $package =~ s/\'/::/g;
22880 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
22881 $package =~ s/::$//;
22882 }
22883 else {
22884 $package = $current_package;
22885
22886 if ( $is_keyword{$tok} ) {
22887 $type = 'k';
22888 }
22889 }
22890
22891 # if it is a bareword..
22892 if ( $type eq 'w' ) {
22893
22894 # check for v-string with leading 'v' type character
22895 # (This seems to have presidence over filehandle, type 'Y')
22896 if ( $tok =~ /^v\d+$/ ) {
22897
22898 # we only have the first part - something like 'v101' -
22899 # look for more
22900 if ( $input_line =~ m/\G(\.\d+)+/gc ) {
22901 $pos = pos($input_line);
22902 $numc = $pos - $pos_beg;
22903 $tok = substr( $input_line, $pos_beg, $numc );
22904 }
22905 $type = 'v';
22906
22907 # warn if this version can't handle v-strings
22908 unless ($saw_v_string) { report_v_string($tok) }
22909 }
22910
22911 elsif ( $is_constant{$package}{$sub_name} ) {
22912 $type = 'C';
22913 }
22914
22915 # bareword after sort has implied empty prototype; for example:
22916 # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
22917 # This has priority over whatever the user has specified.
22918 elsif ($last_nonblank_token eq 'sort'
22919 && $last_nonblank_type eq 'k' )
22920 {
22921 $type = 'Z';
22922 }
22923
22924 # Note: strangely, perl does not seem to really let you create
22925 # functions which act like eval and do, in the sense that eval
22926 # and do may have operators following the final }, but any operators
22927 # that you create with prototype (&) apparently do not allow
22928 # trailing operators, only terms. This seems strange.
22929 # If this ever changes, here is the update
22930 # to make perltidy behave accordingly:
22931
22932 # elsif ( $is_block_function{$package}{$tok} ) {
22933 # $tok='eval'; # patch to do braces like eval - doesn't work
22934 # $type = 'k';
22935 #}
22936 # FIXME: This could become a separate type to allow for different
22937 # future behavior:
22938 elsif ( $is_block_function{$package}{$sub_name} ) {
22939 $type = 'G';
22940 }
22941
22942 elsif ( $is_block_list_function{$package}{$sub_name} ) {
22943 $type = 'G';
22944 }
22945 elsif ( $is_user_function{$package}{$sub_name} ) {
22946 $type = 'U';
22947 $prototype = $user_function_prototype{$package}{$sub_name};
22948 }
22949
22950 # check for indirect object
22951 elsif (
22952
22953 # added 2001-03-27: must not be followed immediately by '('
22954 # see fhandle.t
22955 ( $input_line !~ m/\G\(/gc )
22956
22957 # and
22958 && (
22959
22960 # preceded by keyword like 'print', 'printf' and friends
22961 $is_indirect_object_taker{$last_nonblank_token}
22962
22963 # or preceded by something like 'print(' or 'printf('
22964 || (
22965 ( $last_nonblank_token eq '(' )
22966 && $is_indirect_object_taker{ $paren_type[$paren_depth]
22967 }
22968
22969 )
22970 )
22971 )
22972 {
22973
22974 # may not be indirect object unless followed by a space
22975 if ( $input_line =~ m/\G\s+/gc ) {
22976 $type = 'Y';
22977
22978 # Abandon Hope ...
22979 # Perl's indirect object notation is a very bad
22980 # thing and can cause subtle bugs, especially for
22981 # beginning programmers. And I haven't even been
22982 # able to figure out a sane warning scheme which
22983 # doesn't get in the way of good scripts.
22984
22985 # Complain if a filehandle has any lower case
22986 # letters. This is suggested good practice, but the
22987 # main reason for this warning is that prior to
22988 # release 20010328, perltidy incorrectly parsed a
22989 # function call after a print/printf, with the
22990 # result that a space got added before the opening
22991 # paren, thereby converting the function name to a
22992 # filehandle according to perl's weird rules. This
22993 # will not usually generate a syntax error, so this
22994 # is a potentially serious bug. By warning
22995 # of filehandles with any lower case letters,
22996 # followed by opening parens, we will help the user
22997 # find almost all of these older errors.
22998 # use 'sub_name' because something like
22999 # main::MYHANDLE is ok for filehandle
23000 if ( $sub_name =~ /[a-z]/ ) {
23001
23002 # could be bug caused by older perltidy if
23003 # followed by '('
23004 if ( $input_line =~ m/\G\s*\(/gc ) {
23005 complain(
23006"Caution: unknown word '$tok' in indirect object slot\n"
23007 );
23008 }
23009 }
23010 }
23011
23012 # bareword not followed by a space -- may not be filehandle
23013 # (may be function call defined in a 'use' statement)
23014 else {
23015 $type = 'Z';
23016 }
23017 }
23018 }
23019
23020 # Now we must convert back from character position
23021 # to pre_token index.
23022 # I don't think an error flag can occur here ..but who knows
23023 my $error;
23024 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
23025 if ($error) {
23026 warning("scan_bare_identifier: Possibly invalid tokenization\n");
23027 }
23028 }
23029
23030 # no match but line not blank - could be syntax error
23031 # perl will take '::' alone without complaint
23032 else {
23033 $type = 'w';
23034
23035 # change this warning to log message if it becomes annoying
23036 warning("didn't find identifier after leading ::\n");
23037 }
23038 return ( $i, $tok, $type, $prototype );
23039}
23040
23041sub scan_id_do {
23042
23043 # This is the new scanner and will eventually replace scan_identifier.
23044 # Only type 'sub' and 'package' are implemented.
23045 # Token types $ * % @ & -> are not yet implemented.
23046 #
23047 # Scan identifier following a type token.
23048 # The type of call depends on $id_scan_state: $id_scan_state = ''
23049 # for starting call, in which case $tok must be the token defining
23050 # the type.
23051 #
23052 # If the type token is the last nonblank token on the line, a value
23053 # of $id_scan_state = $tok is returned, indicating that further
23054 # calls must be made to get the identifier. If the type token is
23055 # not the last nonblank token on the line, the identifier is
23056 # scanned and handled and a value of '' is returned.
23057
23058 my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state ) = @_;
23059 my $type = '';
23060 my ( $i_beg, $pos_beg );
23061
23062 #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
23063 #my ($a,$b,$c) = caller;
23064 #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
23065
23066 # on re-entry, start scanning at first token on the line
23067 if ($id_scan_state) {
23068 $i_beg = $i;
23069 $type = '';
23070 }
23071
23072 # on initial entry, start scanning just after type token
23073 else {
23074 $i_beg = $i + 1;
23075 $id_scan_state = $tok;
23076 $type = 't';
23077 }
23078
23079 # find $i_beg = index of next nonblank token,
23080 # and handle empty lines
23081 my $blank_line = 0;
23082 my $next_nonblank_token = $$rtokens[$i_beg];
23083 if ( $i_beg > $max_token_index ) {
23084 $blank_line = 1;
23085 }
23086 else {
23087
23088 # only a '#' immediately after a '$' is not a comment
23089 if ( $next_nonblank_token eq '#' ) {
23090 unless ( $tok eq '$' ) {
23091 $blank_line = 1;
23092 }
23093 }
23094
23095 if ( $next_nonblank_token =~ /^\s/ ) {
23096 ( $next_nonblank_token, $i_beg ) =
23097 find_next_nonblank_token_on_this_line( $i_beg, $rtokens );
23098 if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
23099 $blank_line = 1;
23100 }
23101 }
23102 }
23103
23104 # handle non-blank line; identifier, if any, must follow
23105 unless ($blank_line) {
23106
23107 if ( $id_scan_state eq 'sub' ) {
23108 ( $i, $tok, $type, $id_scan_state ) =
23109 do_scan_sub( $input_line, $i, $i_beg, $tok, $type, $rtokens,
23110 $rtoken_map, $id_scan_state );
23111 }
23112
23113 elsif ( $id_scan_state eq 'package' ) {
23114 ( $i, $tok, $type ) =
23115 do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
23116 $rtoken_map );
23117 $id_scan_state = '';
23118 }
23119
23120 else {
23121 warning("invalid token in scan_id: $tok\n");
23122 $id_scan_state = '';
23123 }
23124 }
23125
23126 if ( $id_scan_state && ( !defined($type) || !$type ) ) {
23127
23128 # shouldn't happen:
23129 warning(
23130"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
23131 );
23132 report_definite_bug();
23133 }
23134
23135 TOKENIZER_DEBUG_FLAG_NSCAN && do {
23136 print
23137 "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
23138 };
23139 return ( $i, $tok, $type, $id_scan_state );
23140}
23141
23142{
23143
23144 # saved package and subnames in case prototype is on separate line
23145 my ( $package_saved, $subname_saved );
23146
23147 sub do_scan_sub {
23148
23149 # do_scan_sub parses a sub name and prototype
23150 # it is called with $i_beg equal to the index of the first nonblank
23151 # token following a 'sub' token.
23152
23153 # TODO: add future error checks to be sure we have a valid
23154 # sub name. For example, 'sub &doit' is wrong. Also, be sure
23155 # a name is given if and only if a non-anonymous sub is
23156 # appropriate.
23157
23158 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
23159 $id_scan_state )
23160 = @_;
23161 $id_scan_state = ""; # normally we get everything in one call
23162 my $subname = undef;
23163 my $package = undef;
23164 my $proto = undef;
23165 my $attrs = undef;
23166 my $match;
23167
23168 my $pos_beg = $$rtoken_map[$i_beg];
23169 pos($input_line) = $pos_beg;
23170
23171 # sub NAME PROTO ATTRS
23172 if (
23173 $input_line =~ m/\G\s*
23174 ((?:\w*(?:'|::))*) # package - something that ends in :: or '
23175 (\w+) # NAME - required
23176 (\s*\([^){]*\))? # PROTO - something in parens
23177 (\s*:)? # ATTRS - leading : of attribute list
23178 /gcx
23179 )
23180 {
23181 $match = 1;
23182 $subname = $2;
23183 $proto = $3;
23184 $attrs = $4;
23185
23186 $package = ( defined($1) && $1 ) ? $1 : $current_package;
23187 $package =~ s/\'/::/g;
23188 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
23189 $package =~ s/::$//;
23190 my $pos = pos($input_line);
23191 my $numc = $pos - $pos_beg;
23192 $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
23193 $type = 'i';
23194 }
23195
23196 # Look for prototype/attributes not preceded on this line by subname;
23197 # This might be an anonymous sub with attributes,
23198 # or a prototype on a separate line from its sub name
23199 elsif (
23200 $input_line =~ m/\G(\s*\([^){]*\))? # PROTO
23201 (\s*:)? # ATTRS leading ':'
23202 /gcx
23203 && ( $1 || $2 )
23204 )
23205 {
23206 $match = 1;
23207 $proto = $1;
23208 $attrs = $2;
23209
23210 # Handle prototype on separate line from subname
23211 if ($subname_saved) {
23212 $package = $package_saved;
23213 $subname = $subname_saved;
23214 $tok = $last_nonblank_token;
23215 }
23216 $type = 'i';
23217 }
23218
23219 if ($match) {
23220
23221 # ATTRS: if there are attributes, back up and let the ':' be
23222 # found later by the scanner.
23223 my $pos = pos($input_line);
23224 if ($attrs) {
23225 $pos -= length($attrs);
23226 }
23227
23228 my $next_nonblank_token = $tok;
23229
23230 # catch case of line with leading ATTR ':' after anonymous sub
23231 if ( $pos == $pos_beg && $tok eq ':' ) {
23232 $type = 'A';
23233 }
23234
23235 # We must convert back from character position
23236 # to pre_token index.
23237 else {
23238
23239 # I don't think an error flag can occur here ..but ?
23240 my $error;
23241 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
23242 if ($error) { warning("Possibly invalid sub\n") }
23243
23244 # check for multiple definitions of a sub
23245 ( $next_nonblank_token, my $i_next ) =
23246 find_next_nonblank_token_on_this_line( $i, $rtokens );
23247 }
23248
23249 if ( $next_nonblank_token =~ /^(\s*|#)$/ )
23250 { # skip blank or side comment
23251 my ( $rpre_tokens, $rpre_types ) =
23252 peek_ahead_for_n_nonblank_pre_tokens(1);
23253 if ( defined($rpre_tokens) && @$rpre_tokens ) {
23254 $next_nonblank_token = $rpre_tokens->[0];
23255 }
23256 else {
23257 $next_nonblank_token = '}';
23258 }
23259 }
23260 $package_saved = "";
23261 $subname_saved = "";
23262 if ( $next_nonblank_token eq '{' ) {
23263 if ($subname) {
23264 if ( $saw_function_definition{$package}{$subname} ) {
23265 my $lno = $saw_function_definition{$package}{$subname};
23266 warning(
23267"already saw definition of 'sub $subname' in package '$package' at line $lno\n"
23268 );
23269 }
23270 $saw_function_definition{$package}{$subname} =
23271 $input_line_number;
23272 }
23273 }
23274 elsif ( $next_nonblank_token eq ';' ) {
23275 }
23276 elsif ( $next_nonblank_token eq '}' ) {
23277 }
23278
23279 # ATTRS - if an attribute list follows, remember the name
23280 # of the sub so the next opening brace can be labeled.
23281 # Setting 'statement_type' causes any ':'s to introduce
23282 # attributes.
23283 elsif ( $next_nonblank_token eq ':' ) {
23284 $statement_type = $tok;
23285 }
23286
23287 # see if PROTO follows on another line:
23288 elsif ( $next_nonblank_token eq '(' ) {
23289 if ( $attrs || $proto ) {
23290 warning(
23291"unexpected '(' after definition or declaration of sub '$subname'\n"
23292 );
23293 }
23294 else {
23295 $id_scan_state = 'sub'; # we must come back to get proto
23296 $statement_type = $tok;
23297 $package_saved = $package;
23298 $subname_saved = $subname;
23299 }
23300 }
23301 elsif ($next_nonblank_token) { # EOF technically ok
23302 warning(
23303"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
23304 );
23305 }
23306 check_prototype( $proto, $package, $subname );
23307 }
23308
23309 # no match but line not blank
23310 else {
23311 }
23312 return ( $i, $tok, $type, $id_scan_state );
23313 }
23314}
23315
23316sub check_prototype {
23317 my ( $proto, $package, $subname ) = @_;
23318 return unless ( defined($package) && defined($subname) );
23319 if ( defined($proto) ) {
23320 $proto =~ s/^\s*\(\s*//;
23321 $proto =~ s/\s*\)$//;
23322 if ($proto) {
23323 $is_user_function{$package}{$subname} = 1;
23324 $user_function_prototype{$package}{$subname} = "($proto)";
23325
23326 # prototypes containing '&' must be treated specially..
23327 if ( $proto =~ /\&/ ) {
23328
23329 # right curly braces of prototypes ending in
23330 # '&' may be followed by an operator
23331 if ( $proto =~ /\&$/ ) {
23332 $is_block_function{$package}{$subname} = 1;
23333 }
23334
23335 # right curly braces of prototypes NOT ending in
23336 # '&' may NOT be followed by an operator
23337 elsif ( $proto !~ /\&$/ ) {
23338 $is_block_list_function{$package}{$subname} = 1;
23339 }
23340 }
23341 }
23342 else {
23343 $is_constant{$package}{$subname} = 1;
23344 }
23345 }
23346 else {
23347 $is_user_function{$package}{$subname} = 1;
23348 }
23349}
23350
23351sub do_scan_package {
23352
23353 # do_scan_package parses a package name
23354 # it is called with $i_beg equal to the index of the first nonblank
23355 # token following a 'package' token.
23356
23357 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map ) = @_;
23358 my $package = undef;
23359 my $pos_beg = $$rtoken_map[$i_beg];
23360 pos($input_line) = $pos_beg;
23361
23362 # handle non-blank line; package name, if any, must follow
23363 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
23364 $package = $1;
23365 $package = ( defined($1) && $1 ) ? $1 : 'main';
23366 $package =~ s/\'/::/g;
23367 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
23368 $package =~ s/::$//;
23369 my $pos = pos($input_line);
23370 my $numc = $pos - $pos_beg;
23371 $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
23372 $type = 'i';
23373
23374 # Now we must convert back from character position
23375 # to pre_token index.
23376 # I don't think an error flag can occur here ..but ?
23377 my $error;
23378 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
23379 if ($error) { warning("Possibly invalid package\n") }
23380 $current_package = $package;
23381
23382 # check for error
23383 my ( $next_nonblank_token, $i_next ) =
23384 find_next_nonblank_token( $i, $rtokens );
23385 if ( $next_nonblank_token !~ /^[;\}]$/ ) {
23386 warning(
23387 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
23388 );
23389 }
23390 }
23391
23392 # no match but line not blank --
23393 # could be a label with name package, like package: , for example.
23394 else {
23395 $type = 'k';
23396 }
23397
23398 return ( $i, $tok, $type );
23399}
23400
23401sub scan_identifier_do {
23402
23403 # This routine assembles tokens into identifiers. It maintains a
23404 # scan state, id_scan_state. It updates id_scan_state based upon
23405 # current id_scan_state and token, and returns an updated
23406 # id_scan_state and the next index after the identifier.
23407
23408 my ( $i, $id_scan_state, $identifier, $rtokens ) = @_;
23409 my $i_begin = $i;
23410 my $type = '';
23411 my $tok_begin = $$rtokens[$i_begin];
23412 if ( $tok_begin eq ':' ) { $tok_begin = '::' }
23413 my $id_scan_state_begin = $id_scan_state;
23414 my $identifier_begin = $identifier;
23415 my $tok = $tok_begin;
23416 my $message = "";
23417
23418 # these flags will be used to help figure out the type:
23419 my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
23420 my $saw_type;
23421
23422 # allow old package separator (') except in 'use' statement
23423 my $allow_tick = ( $last_nonblank_token ne 'use' );
23424
23425 # get started by defining a type and a state if necessary
23426 unless ($id_scan_state) {
23427 $context = UNKNOWN_CONTEXT;
23428
23429 # fixup for digraph
23430 if ( $tok eq '>' ) {
23431 $tok = '->';
23432 $tok_begin = $tok;
23433 }
23434 $identifier = $tok;
23435
23436 if ( $tok eq '$' || $tok eq '*' ) {
23437 $id_scan_state = '$';
23438 $context = SCALAR_CONTEXT;
23439 }
23440 elsif ( $tok eq '%' || $tok eq '@' ) {
23441 $id_scan_state = '$';
23442 $context = LIST_CONTEXT;
23443 }
23444 elsif ( $tok eq '&' ) {
23445 $id_scan_state = '&';
23446 }
23447 elsif ( $tok eq 'sub' or $tok eq 'package' ) {
23448 $saw_alpha = 0; # 'sub' is considered type info here
23449 $id_scan_state = '$';
23450 $identifier .= ' '; # need a space to separate sub from sub name
23451 }
23452 elsif ( $tok eq '::' ) {
23453 $id_scan_state = 'A';
23454 }
23455 elsif ( $tok =~ /^[A-Za-z_]/ ) {
23456 $id_scan_state = ':';
23457 }
23458 elsif ( $tok eq '->' ) {
23459 $id_scan_state = '$';
23460 }
23461 else {
23462
23463 # shouldn't happen
23464 my ( $a, $b, $c ) = caller;
23465 warning("Program Bug: scan_identifier given bad token = $tok \n");
23466 warning(" called from sub $a line: $c\n");
23467 report_definite_bug();
23468 }
23469 $saw_type = !$saw_alpha;
23470 }
23471 else {
23472 $i--;
23473 $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
23474 }
23475
23476 # now loop to gather the identifier
23477 my $i_save = $i;
23478
23479 while ( $i < $max_token_index ) {
23480 $i_save = $i unless ( $tok =~ /^\s*$/ );
23481 $tok = $$rtokens[ ++$i ];
23482
23483 if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
23484 $tok = '::';
23485 $i++;
23486 }
23487
23488 if ( $id_scan_state eq '$' ) { # starting variable name
23489
23490 if ( $tok eq '$' ) {
23491
23492 $identifier .= $tok;
23493
23494 # we've got a punctuation variable if end of line (punct.t)
23495 if ( $i == $max_token_index ) {
23496 $type = 'i';
23497 $id_scan_state = '';
23498 last;
23499 }
23500 }
23501 elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..
23502 $saw_alpha = 1;
23503 $id_scan_state = ':'; # now need ::
23504 $identifier .= $tok;
23505 }
23506 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
23507 $saw_alpha = 1;
23508 $id_scan_state = ':'; # now need ::
23509 $identifier .= $tok;
23510
23511 # Perl will accept leading digits in identifiers,
23512 # although they may not always produce useful results.
23513 # Something like $main::0 is ok. But this also works:
23514 #
23515 # sub howdy::123::bubba{ print "bubba $54321!\n" }
23516 # howdy::123::bubba();
23517 #
23518 }
23519 elsif ( $tok =~ /^[0-9]/ ) { # numeric
23520 $saw_alpha = 1;
23521 $id_scan_state = ':'; # now need ::
23522 $identifier .= $tok;
23523 }
23524 elsif ( $tok eq '::' ) {
23525 $id_scan_state = 'A';
23526 $identifier .= $tok;
23527 }
23528 elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array
23529 $identifier .= $tok; # keep same state, a $ could follow
23530 }
23531 elsif ( $tok eq '{' ) {
23532
23533