Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | ############################################################ |
2 | # | |
3 | # perltidy - a perl script indenter and formatter | |
4 | # | |
5 | # Copyright (c) 2000-2003 by Steve Hancock | |
6 | # Distributed under the GPL license agreement; see file COPYING | |
7 | # | |
8 | # This program is free software; you can redistribute it and/or modify | |
9 | # it under the terms of the GNU General Public License as published by | |
10 | # the Free Software Foundation; either version 2 of the License, or | |
11 | # (at your option) any later version. | |
12 | # | |
13 | # This program is distributed in the hope that it will be useful, | |
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | # GNU General Public License for more details. | |
17 | # | |
18 | # You should have received a copy of the GNU General Public License | |
19 | # along with this program; if not, write to the Free Software | |
20 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
21 | # | |
22 | # For brief instructions instructions, try 'perltidy -h'. | |
23 | # For more complete documentation, try 'man perltidy' | |
24 | # or visit http://perltidy.sourceforge.net | |
25 | # | |
26 | # This script is an example of the default style. It was formatted with: | |
27 | # | |
28 | # perltidy Tidy.pm | |
29 | # | |
30 | # Code Contributions: | |
31 | # Michael Cartmell supplied code for adaptation to VMS and helped with | |
32 | # v-strings. | |
33 | # Hugh S. Myers supplied sub streamhandle and the supporting code to | |
34 | # create a Perl::Tidy module which can operate on strings, arrays, etc. | |
35 | # Yves Orton supplied coding to help detect Windows versions. | |
36 | # Axel Rose supplied a patch for MacPerl. | |
37 | # Many others have supplied key ideas, suggestions, and bug reports; | |
38 | # see the CHANGES file. | |
39 | # | |
40 | ############################################################ | |
41 | ||
42 | package Perl::Tidy; | |
43 | use 5.004; # need IO::File from 5.004 or later | |
44 | BEGIN { $^W = 1; } # turn on warnings | |
45 | ||
46 | use strict; | |
47 | use Exporter; | |
48 | use Carp; | |
49 | $|++; | |
50 | ||
51 | use vars qw{ | |
52 | $VERSION | |
53 | @ISA | |
54 | @EXPORT | |
55 | $missing_file_spec | |
56 | }; | |
57 | ||
58 | @ISA = qw( Exporter ); | |
59 | @EXPORT = qw( &perltidy ); | |
60 | ||
61 | use IO::File; | |
62 | use File::Basename; | |
63 | ||
64 | BEGIN { | |
65 | ( $VERSION = q($Id: Tidy.pm,v 1.46 2003/10/21 14:09:29 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker | |
66 | } | |
67 | ||
68 | sub streamhandle { | |
69 | ||
70 | # given filename and mode (r or w), create an object which: | |
71 | # has a 'getline' method if mode='r', and | |
72 | # has a 'print' method if mode='w'. | |
73 | # The objects also need a 'close' method. | |
74 | # | |
75 | # How the object is made: | |
76 | # | |
77 | # if $filename is: Make object using: | |
78 | # ---------------- ----------------- | |
79 | # '-' (STDIN if mode = 'r', STDOUT if mode='w') | |
80 | # string IO::File | |
81 | # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray) | |
82 | # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar) | |
83 | # object object | |
84 | # (check for 'print' method for 'w' mode) | |
85 | # (check for 'getline' method for 'r' mode) | |
86 | my $ref = ref( my $filename = shift ); | |
87 | my $mode = shift; | |
88 | my $New; | |
89 | my $fh; | |
90 | ||
91 | # handle a reference | |
92 | if ($ref) { | |
93 | if ( $ref eq 'ARRAY' ) { | |
94 | $New = sub { Perl::Tidy::IOScalarArray->new(@_) }; | |
95 | } | |
96 | elsif ( $ref eq 'SCALAR' ) { | |
97 | $New = sub { Perl::Tidy::IOScalar->new(@_) }; | |
98 | } | |
99 | else { | |
100 | ||
101 | # Accept an object with a getline method for reading. Note: | |
102 | # IO::File is built-in and does not respond to the defined | |
103 | # operator. If this causes trouble, the check can be | |
104 | # skipped and we can just let it crash if there is no | |
105 | # getline. | |
106 | if ( $mode =~ /[rR]/ ) { | |
107 | if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) { | |
108 | $New = sub { $filename }; | |
109 | } | |
110 | else { | |
111 | $New = sub { undef }; | |
112 | confess <<EOM; | |
113 | ------------------------------------------------------------------------ | |
114 | No 'getline' method is defined for object of class $ref | |
115 | Please check your call to Perl::Tidy::perltidy. Trace follows. | |
116 | ------------------------------------------------------------------------ | |
117 | EOM | |
118 | } | |
119 | } | |
120 | ||
121 | # Accept an object with a print method for writing. | |
122 | # See note above about IO::File | |
123 | if ( $mode =~ /[wW]/ ) { | |
124 | if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) { | |
125 | $New = sub { $filename }; | |
126 | } | |
127 | else { | |
128 | $New = sub { undef }; | |
129 | confess <<EOM; | |
130 | ------------------------------------------------------------------------ | |
131 | No 'print' method is defined for object of class $ref | |
132 | Please check your call to Perl::Tidy::perltidy. Trace follows. | |
133 | ------------------------------------------------------------------------ | |
134 | EOM | |
135 | } | |
136 | } | |
137 | } | |
138 | } | |
139 | ||
140 | # handle a string | |
141 | else { | |
142 | if ( $filename eq '-' ) { | |
143 | $New = sub { $mode eq 'w' ? *STDOUT : *STDIN } | |
144 | } | |
145 | else { | |
146 | $New = sub { IO::File->new(@_) }; | |
147 | } | |
148 | } | |
149 | $fh = $New->( $filename, $mode ) | |
150 | or warn "Couldn't open file:$filename in mode:$mode : $!\n"; | |
151 | return $fh, ( $ref or $filename ); | |
152 | } | |
153 | ||
154 | sub find_input_line_ending { | |
155 | ||
156 | # Peek at a file and return first line ending character. | |
157 | # Quietly return undef in case of any trouble. | |
158 | my ($input_file) = @_; | |
159 | my $ending; | |
160 | ||
161 | # silently ignore input from object or stdin | |
162 | if ( ref($input_file) || $input_file eq '-' ) { | |
163 | return $ending; | |
164 | } | |
165 | open( INFILE, $input_file ) || return $ending; | |
166 | ||
167 | binmode INFILE; | |
168 | my $buf; | |
169 | read( INFILE, $buf, 1024 ); | |
170 | close INFILE; | |
171 | if ( $buf && $buf =~ /([\012\015]+)/ ) { | |
172 | my $test = $1; | |
173 | ||
174 | # dos | |
175 | if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" } | |
176 | ||
177 | # mac | |
178 | elsif ( $test =~ /^\015+$/ ) { $ending = "\015" } | |
179 | ||
180 | # unix | |
181 | elsif ( $test =~ /^\012+$/ ) { $ending = "\012" } | |
182 | ||
183 | # unknown | |
184 | else { } | |
185 | } | |
186 | ||
187 | # no ending seen | |
188 | else { } | |
189 | ||
190 | return $ending; | |
191 | } | |
192 | ||
193 | sub catfile { | |
194 | ||
195 | # concatenate a path and file basename | |
196 | # returns undef in case of error | |
197 | ||
198 | BEGIN { eval "require File::Spec"; $missing_file_spec = $@; } | |
199 | ||
200 | # use File::Spec if we can | |
201 | unless ($missing_file_spec) { | |
202 | return File::Spec->catfile(@_); | |
203 | } | |
204 | ||
205 | # Perl 5.004 systems may not have File::Spec so we'll make | |
206 | # a simple try. We assume File::Basename is available. | |
207 | # return undef if not successful. | |
208 | my $name = pop @_; | |
209 | my $path = join '/', @_; | |
210 | my $test_file = $path . $name; | |
211 | my ( $test_name, $test_path ) = fileparse($test_file); | |
212 | return $test_file if ( $test_name eq $name ); | |
213 | return undef if ( $^O eq 'VMS' ); | |
214 | ||
215 | # this should work at least for Windows and Unix: | |
216 | $test_file = $path . '/' . $name; | |
217 | ( $test_name, $test_path ) = fileparse($test_file); | |
218 | return $test_file if ( $test_name eq $name ); | |
219 | return undef; | |
220 | } | |
221 | ||
222 | sub make_temporary_filename { | |
223 | ||
224 | # Make a temporary filename. | |
225 | # | |
226 | # The POSIX tmpnam() function tends to be unreliable for non-unix | |
227 | # systems (at least for the win32 systems that I've tested), so use | |
228 | # a pre-defined name. A slight disadvantage of this is that two | |
229 | # perltidy runs in the same working directory may conflict. | |
230 | # However, the chance of that is small and managable by the user. | |
231 | # An alternative would be to check for the file's existance and use, | |
232 | # say .TMP0, .TMP1, etc, but that scheme has its own problems. So, | |
233 | # keep it simple. | |
234 | my $name = "perltidy.TMP"; | |
235 | if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) { | |
236 | return $name; | |
237 | } | |
238 | eval "use POSIX qw(tmpnam)"; | |
239 | if ($@) { return $name } | |
240 | use IO::File; | |
241 | ||
242 | # just make a couple of tries before giving up and using the default | |
243 | for ( 0 .. 1 ) { | |
244 | my $tmpname = tmpnam(); | |
245 | my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL ); | |
246 | if ($fh) { | |
247 | $fh->close(); | |
248 | return ($tmpname); | |
249 | last; | |
250 | } | |
251 | } | |
252 | return ($name); | |
253 | } | |
254 | ||
255 | # Here is a map of the flow of data from the input source to the output | |
256 | # line sink: | |
257 | # | |
258 | # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter--> | |
259 | # input groups output | |
260 | # lines tokens lines of lines lines | |
261 | # lines | |
262 | # | |
263 | # The names correspond to the package names responsible for the unit processes. | |
264 | # | |
265 | # The overall process is controlled by the "main" package. | |
266 | # | |
267 | # LineSource is the stream of input lines | |
268 | # | |
269 | # Tokenizer analyzes a line and breaks it into tokens, peeking ahead | |
270 | # if necessary. A token is any section of the input line which should be | |
271 | # manipulated as a single entity during formatting. For example, a single | |
272 | # ',' character is a token, and so is an entire side comment. It handles | |
273 | # the complexities of Perl syntax, such as distinguishing between '<<' as | |
274 | # a shift operator and as a here-document, or distinguishing between '/' | |
275 | # as a divide symbol and as a pattern delimiter. | |
276 | # | |
277 | # Formatter inserts and deletes whitespace between tokens, and breaks | |
278 | # sequences of tokens at appropriate points as output lines. It bases its | |
279 | # decisions on the default rules as modified by any command-line options. | |
280 | # | |
281 | # VerticalAligner collects groups of lines together and tries to line up | |
282 | # certain tokens, such as '=>', '#', and '=' by adding whitespace. | |
283 | # | |
284 | # FileWriter simply writes lines to the output stream. | |
285 | # | |
286 | # The Logger package, not shown, records significant events and warning | |
287 | # messages. It writes a .LOG file, which may be saved with a | |
288 | # '-log' or a '-g' flag. | |
289 | ||
290 | { | |
291 | ||
292 | # variables needed by interrupt handler: | |
293 | my $tokenizer; | |
294 | my $input_file; | |
295 | ||
296 | # this routine may be called to give a status report if interrupted. If a | |
297 | # parameter is given, it will call exit with that parameter. This is no | |
298 | # longer used because it works under Unix but not under Windows. | |
299 | sub interrupt_handler { | |
300 | ||
301 | my $exit_flag = shift; | |
302 | print STDERR "perltidy interrupted"; | |
303 | if ($tokenizer) { | |
304 | my $input_line_number = | |
305 | Perl::Tidy::Tokenizer::get_input_line_number(); | |
306 | print STDERR " at line $input_line_number"; | |
307 | } | |
308 | if ($input_file) { | |
309 | ||
310 | if ( ref $input_file ) { print STDERR " of reference to:" } | |
311 | else { print STDERR " of file:" } | |
312 | print STDERR " $input_file"; | |
313 | } | |
314 | print STDERR "\n"; | |
315 | exit $exit_flag if defined($exit_flag); | |
316 | } | |
317 | ||
318 | sub perltidy { | |
319 | ||
320 | my %defaults = ( | |
321 | argv => undef, | |
322 | destination => undef, | |
323 | formatter => undef, | |
324 | logfile => undef, | |
325 | errorfile => undef, | |
326 | perltidyrc => undef, | |
327 | source => undef, | |
328 | stderr => undef, | |
329 | ); | |
330 | ||
331 | # don't overwrite callers ARGV | |
332 | local @ARGV = @ARGV; | |
333 | ||
334 | my %input_hash = @_; | |
335 | if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) { | |
336 | local $" = ')('; | |
337 | my @good_keys = sort keys %defaults; | |
338 | @bad_keys = sort @bad_keys; | |
339 | confess <<EOM; | |
340 | ------------------------------------------------------------------------ | |
341 | Unknown perltidy parameter : (@bad_keys) | |
342 | perltidy only understands : (@good_keys) | |
343 | ------------------------------------------------------------------------ | |
344 | ||
345 | EOM | |
346 | } | |
347 | ||
348 | %input_hash = ( %defaults, %input_hash ); | |
349 | my $argv = $input_hash{'argv'}; | |
350 | my $destination_stream = $input_hash{'destination'}; | |
351 | my $errorfile_stream = $input_hash{'errorfile'}; | |
352 | my $logfile_stream = $input_hash{'logfile'}; | |
353 | my $perltidyrc_stream = $input_hash{'perltidyrc'}; | |
354 | my $source_stream = $input_hash{'source'}; | |
355 | my $stderr_stream = $input_hash{'stderr'}; | |
356 | my $user_formatter = $input_hash{'formatter'}; | |
357 | ||
358 | if ($user_formatter) { | |
359 | ||
360 | # if the user defines a formatter, there is no output stream, | |
361 | # but we need a null stream to keep coding simple | |
362 | $destination_stream = Perl::Tidy::DevNull->new(); | |
363 | } | |
364 | ||
365 | # see if ARGV is overridden | |
366 | if ( defined($argv) ) { | |
367 | ||
368 | my $rargv = ref $argv; | |
369 | if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef } | |
370 | ||
371 | # ref to ARRAY | |
372 | if ($rargv) { | |
373 | if ( $rargv eq 'ARRAY' ) { | |
374 | @ARGV = @$argv; | |
375 | } | |
376 | else { | |
377 | croak <<EOM; | |
378 | ------------------------------------------------------------------------ | |
379 | Please check value of -argv in call to perltidy; | |
380 | it must be a string or ref to ARRAY but is: $rargv | |
381 | ------------------------------------------------------------------------ | |
382 | EOM | |
383 | } | |
384 | } | |
385 | ||
386 | # string | |
387 | else { | |
388 | my ( $rargv, $msg ) = parse_args($argv); | |
389 | if ($msg) { | |
390 | die <<EOM; | |
391 | Error parsing this string passed to to perltidy with 'argv': | |
392 | $msg | |
393 | EOM | |
394 | } | |
395 | @ARGV = @{$rargv}; | |
396 | } | |
397 | } | |
398 | ||
399 | # redirect STDERR if requested | |
400 | if ($stderr_stream) { | |
401 | my ( $fh_stderr, $stderr_file ) = | |
402 | Perl::Tidy::streamhandle( $stderr_stream, 'w' ); | |
403 | if ($fh_stderr) { *STDERR = $fh_stderr } | |
404 | else { | |
405 | croak <<EOM; | |
406 | ------------------------------------------------------------------------ | |
407 | Unable to redirect STDERR to $stderr_stream | |
408 | Please check value of -stderr in call to perltidy | |
409 | ------------------------------------------------------------------------ | |
410 | EOM | |
411 | } | |
412 | } | |
413 | ||
414 | my $rpending_complaint; | |
415 | $$rpending_complaint = ""; | |
416 | my $rpending_logfile_message; | |
417 | $$rpending_logfile_message = ""; | |
418 | ||
419 | my ( $is_Windows, $Windows_type ) = | |
420 | look_for_Windows($rpending_complaint); | |
421 | ||
422 | # VMS file names are restricted to a 40.40 format, so we append _tdy | |
423 | # instead of .tdy, etc. (but see also sub check_vms_filename) | |
424 | my $dot; | |
425 | my $dot_pattern; | |
426 | if ( $^O eq 'VMS' ) { | |
427 | $dot = '_'; | |
428 | $dot_pattern = '_'; | |
429 | } | |
430 | else { | |
431 | $dot = '.'; | |
432 | $dot_pattern = '\.'; # must escape for use in regex | |
433 | } | |
434 | ||
435 | # handle command line options | |
436 | my ( $rOpts, $config_file, $rraw_options, $saw_extrude ) = | |
437 | process_command_line( | |
438 | $perltidyrc_stream, $is_Windows, | |
439 | $Windows_type, $rpending_complaint | |
440 | ); | |
441 | ||
442 | if ($user_formatter) { | |
443 | $rOpts->{'format'} = 'user'; | |
444 | } | |
445 | ||
446 | # there must be one entry here for every possible format | |
447 | my %default_file_extension = ( | |
448 | tidy => 'tdy', | |
449 | html => 'html', | |
450 | user => '', | |
451 | ); | |
452 | ||
453 | # be sure we have a valid output format | |
454 | unless ( exists $default_file_extension{ $rOpts->{'format'} } ) { | |
455 | my $formats = join ' ', | |
456 | sort map { "'" . $_ . "'" } keys %default_file_extension; | |
457 | my $fmt = $rOpts->{'format'}; | |
458 | die "-format='$fmt' but must be one of: $formats\n"; | |
459 | } | |
460 | ||
461 | my $output_extension = | |
462 | make_extension( $rOpts->{'output-file-extension'}, | |
463 | $default_file_extension{ $rOpts->{'format'} }, $dot ); | |
464 | ||
465 | my $backup_extension = | |
466 | make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot ); | |
467 | ||
468 | my $html_toc_extension = | |
469 | make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot ); | |
470 | ||
471 | my $html_src_extension = | |
472 | make_extension( $rOpts->{'html-src-extension'}, 'src', $dot ); | |
473 | ||
474 | # check for -b option; | |
475 | my $in_place_modify = $rOpts->{'backup-and-modify-in-place'} | |
476 | && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode | |
477 | && @ARGV > 0; # silently ignore if standard input; | |
478 | # this allows -b to be in a .perltidyrc file | |
479 | # without error messages when running from an editor | |
480 | ||
481 | # turn off -b with warnings in case of conflicts with other options | |
482 | if ($in_place_modify) { | |
483 | if ( $rOpts->{'standard-output'} ) { | |
484 | warn "Ignoring -b; you may not use -b and -st together\n"; | |
485 | $in_place_modify = 0; | |
486 | } | |
487 | if ($destination_stream) { | |
488 | warn | |
489 | "Ignoring -b; you may not specify a destination array and -b together\n"; | |
490 | $in_place_modify = 0; | |
491 | } | |
492 | if ($source_stream) { | |
493 | warn | |
494 | "Ignoring -b; you may not specify a source array and -b together\n"; | |
495 | $in_place_modify = 0; | |
496 | } | |
497 | if ( $rOpts->{'outfile'} ) { | |
498 | warn "Ignoring -b; you may not use -b and -o together\n"; | |
499 | $in_place_modify = 0; | |
500 | } | |
501 | if ( defined( $rOpts->{'output-path'} ) ) { | |
502 | warn "Ignoring -b; you may not use -b and -opath together\n"; | |
503 | $in_place_modify = 0; | |
504 | } | |
505 | } | |
506 | ||
507 | Perl::Tidy::Formatter::check_options($rOpts); | |
508 | if ( $rOpts->{'format'} eq 'html' ) { | |
509 | Perl::Tidy::HtmlWriter->check_options($rOpts); | |
510 | } | |
511 | ||
512 | # make the pattern of file extensions that we shouldn't touch | |
513 | my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)"; | |
514 | if ($output_extension) { | |
515 | $_ = quotemeta($output_extension); | |
516 | $forbidden_file_extensions .= "|$_"; | |
517 | } | |
518 | if ( $in_place_modify && $backup_extension ) { | |
519 | $_ = quotemeta($backup_extension); | |
520 | $forbidden_file_extensions .= "|$_"; | |
521 | } | |
522 | $forbidden_file_extensions .= ')$'; | |
523 | ||
524 | # Create a diagnostics object if requested; | |
525 | # This is only useful for code development | |
526 | my $diagnostics_object = undef; | |
527 | if ( $rOpts->{'DIAGNOSTICS'} ) { | |
528 | $diagnostics_object = Perl::Tidy::Diagnostics->new(); | |
529 | } | |
530 | ||
531 | # no filenames should be given if input is from an array | |
532 | if ($source_stream) { | |
533 | if ( @ARGV > 0 ) { | |
534 | die | |
535 | "You may not specify any filenames when a source array is given\n"; | |
536 | } | |
537 | ||
538 | # we'll stuff the source array into ARGV | |
539 | unshift( @ARGV, $source_stream ); | |
540 | ||
541 | # No special treatment for source stream which is a filename. | |
542 | # This will enable checks for binary files and other bad stuff. | |
543 | $source_stream = undef unless ref($source_stream); | |
544 | } | |
545 | ||
546 | # use stdin by default if no source array and no args | |
547 | else { | |
548 | unshift( @ARGV, '-' ) unless @ARGV; | |
549 | } | |
550 | ||
551 | # loop to process all files in argument list | |
552 | my $number_of_files = @ARGV; | |
553 | my $formatter = undef; | |
554 | $tokenizer = undef; | |
555 | while ( $input_file = shift @ARGV ) { | |
556 | my $fileroot; | |
557 | my $input_file_permissions; | |
558 | ||
559 | #--------------------------------------------------------------- | |
560 | # determine the input file name | |
561 | #--------------------------------------------------------------- | |
562 | if ($source_stream) { | |
563 | $fileroot = "perltidy"; | |
564 | } | |
565 | elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN | |
566 | $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc | |
567 | $in_place_modify = 0; | |
568 | } | |
569 | else { | |
570 | $fileroot = $input_file; | |
571 | unless ( -e $input_file ) { | |
572 | ||
573 | # file doesn't exist - check for a file glob | |
574 | if ( $input_file =~ /([\?\*\[\{])/ ) { | |
575 | ||
576 | # Windows shell may not remove quotes, so do it | |
577 | my $input_file = $input_file; | |
578 | if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 } | |
579 | if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 } | |
580 | my $pattern = fileglob_to_re($input_file); | |
581 | eval "/$pattern/"; | |
582 | if ( !$@ && opendir( DIR, './' ) ) { | |
583 | my @files = | |
584 | grep { /$pattern/ && !-d $_ } readdir(DIR); | |
585 | closedir(DIR); | |
586 | if (@files) { | |
587 | unshift @ARGV, @files; | |
588 | next; | |
589 | } | |
590 | } | |
591 | } | |
592 | print "skipping file: '$input_file': no matches found\n"; | |
593 | next; | |
594 | } | |
595 | ||
596 | unless ( -f $input_file ) { | |
597 | print "skipping file: $input_file: not a regular file\n"; | |
598 | next; | |
599 | } | |
600 | ||
601 | unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { | |
602 | ||
603 | "skipping file: $input_file: Non-text (override with -f)\n"; | |
604 | next; | |
605 | } | |
606 | ||
607 | # we should have a valid filename now | |
608 | $fileroot = $input_file; | |
609 | $input_file_permissions = ( stat $input_file )[2] & 07777; | |
610 | ||
611 | if ( $^O eq 'VMS' ) { | |
612 | ( $fileroot, $dot ) = check_vms_filename($fileroot); | |
613 | } | |
614 | ||
615 | # add option to change path here | |
616 | if ( defined( $rOpts->{'output-path'} ) ) { | |
617 | ||
618 | my ( $base, $old_path ) = fileparse($fileroot); | |
619 | my $new_path = $rOpts->{'output-path'}; | |
620 | unless ( -d $new_path ) { | |
621 | unless ( mkdir $new_path, 0777 ) { | |
622 | die "unable to create directory $new_path: $!\n"; | |
623 | } | |
624 | } | |
625 | my $path = $new_path; | |
626 | $fileroot = catfile( $path, $base ); | |
627 | unless ($fileroot) { | |
628 | die <<EOM; | |
629 | ------------------------------------------------------------------------ | |
630 | Problem combining $new_path and $base to make a filename; check -opath | |
631 | ------------------------------------------------------------------------ | |
632 | EOM | |
633 | } | |
634 | } | |
635 | } | |
636 | ||
637 | # Skip files with same extension as the output files because | |
638 | # this can lead to a messy situation with files like | |
639 | # script.tdy.tdy.tdy ... or worse problems ... when you | |
640 | # rerun perltidy over and over with wildcard input. | |
641 | if ( | |
642 | !$source_stream | |
643 | && ( $input_file =~ /$forbidden_file_extensions/o | |
644 | || $input_file eq 'DIAGNOSTICS' ) | |
645 | ) | |
646 | { | |
647 | print "skipping file: $input_file: wrong extension\n"; | |
648 | next; | |
649 | } | |
650 | ||
651 | # the 'source_object' supplies a method to read the input file | |
652 | my $source_object = | |
653 | Perl::Tidy::LineSource->new( $input_file, $rOpts, | |
654 | $rpending_logfile_message ); | |
655 | next unless ($source_object); | |
656 | ||
657 | # register this file name with the Diagnostics package | |
658 | $diagnostics_object->set_input_file($input_file) | |
659 | if $diagnostics_object; | |
660 | ||
661 | #--------------------------------------------------------------- | |
662 | # determine the output file name | |
663 | #--------------------------------------------------------------- | |
664 | my $output_file = undef; | |
665 | my $actual_output_extension; | |
666 | ||
667 | if ( $rOpts->{'outfile'} ) { | |
668 | ||
669 | if ( $number_of_files <= 1 ) { | |
670 | ||
671 | if ( $rOpts->{'standard-output'} ) { | |
672 | die "You may not use -o and -st together\n"; | |
673 | } | |
674 | elsif ($destination_stream) { | |
675 | die | |
676 | "You may not specify a destination array and -o together\n"; | |
677 | } | |
678 | elsif ( defined( $rOpts->{'output-path'} ) ) { | |
679 | die "You may not specify -o and -opath together\n"; | |
680 | } | |
681 | elsif ( defined( $rOpts->{'output-file-extension'} ) ) { | |
682 | die "You may not specify -o and -oext together\n"; | |
683 | } | |
684 | $output_file = $rOpts->{outfile}; | |
685 | ||
686 | # make sure user gives a file name after -o | |
687 | if ( $output_file =~ /^-/ ) { | |
688 | die "You must specify a valid filename after -o\n"; | |
689 | } | |
690 | ||
691 | # do not overwrite input file with -o | |
692 | if ( defined($input_file_permissions) | |
693 | && ( $output_file eq $input_file ) ) | |
694 | { | |
695 | die | |
696 | "Use 'perltidy -b $input_file' to modify in-place\n"; | |
697 | } | |
698 | } | |
699 | else { | |
700 | die "You may not use -o with more than one input file\n"; | |
701 | } | |
702 | } | |
703 | elsif ( $rOpts->{'standard-output'} ) { | |
704 | if ($destination_stream) { | |
705 | die | |
706 | "You may not specify a destination array and -st together\n"; | |
707 | } | |
708 | $output_file = '-'; | |
709 | ||
710 | if ( $number_of_files <= 1 ) { | |
711 | } | |
712 | else { | |
713 | die "You may not use -st with more than one input file\n"; | |
714 | } | |
715 | } | |
716 | elsif ($destination_stream) { | |
717 | $output_file = $destination_stream; | |
718 | } | |
719 | elsif ($source_stream) { # source but no destination goes to stdout | |
720 | $output_file = '-'; | |
721 | } | |
722 | elsif ( $input_file eq '-' ) { | |
723 | $output_file = '-'; | |
724 | } | |
725 | else { | |
726 | if ($in_place_modify) { | |
727 | $output_file = IO::File->new_tmpfile() | |
728 | or die "cannot open temp file for -b option: $!\n"; | |
729 | } | |
730 | else { | |
731 | $actual_output_extension = $output_extension; | |
732 | $output_file = $fileroot . $output_extension; | |
733 | } | |
734 | } | |
735 | ||
736 | # the 'sink_object' knows how to write the output file | |
737 | my $tee_file = $fileroot . $dot . "TEE"; | |
738 | ||
739 | my $line_separator = $rOpts->{'output-line-ending'}; | |
740 | if ( $rOpts->{'preserve-line-endings'} ) { | |
741 | $line_separator = find_input_line_ending($input_file); | |
742 | } | |
743 | $line_separator = "\n" unless defined($line_separator); | |
744 | ||
745 | my $sink_object = | |
746 | Perl::Tidy::LineSink->new( $output_file, $tee_file, | |
747 | $line_separator, $rOpts, $rpending_logfile_message ); | |
748 | ||
749 | #--------------------------------------------------------------- | |
750 | # initialize the error logger | |
751 | #--------------------------------------------------------------- | |
752 | my $warning_file = $fileroot . $dot . "ERR"; | |
753 | if ($errorfile_stream) { $warning_file = $errorfile_stream } | |
754 | my $log_file = $fileroot . $dot . "LOG"; | |
755 | if ($logfile_stream) { $log_file = $logfile_stream } | |
756 | ||
757 | my $logger_object = | |
758 | Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file, | |
759 | $saw_extrude ); | |
760 | write_logfile_header( | |
761 | $rOpts, $logger_object, $config_file, | |
762 | $rraw_options, $Windows_type | |
763 | ); | |
764 | if ($$rpending_logfile_message) { | |
765 | $logger_object->write_logfile_entry($$rpending_logfile_message); | |
766 | } | |
767 | if ($$rpending_complaint) { | |
768 | $logger_object->complain($$rpending_complaint); | |
769 | } | |
770 | ||
771 | #--------------------------------------------------------------- | |
772 | # initialize the debug object, if any | |
773 | #--------------------------------------------------------------- | |
774 | my $debugger_object = undef; | |
775 | if ( $rOpts->{DEBUG} ) { | |
776 | $debugger_object = | |
777 | Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" ); | |
778 | } | |
779 | ||
780 | #--------------------------------------------------------------- | |
781 | # create a formatter for this file : html writer or pretty printer | |
782 | #--------------------------------------------------------------- | |
783 | ||
784 | # we have to delete any old formatter because, for safety, | |
785 | # the formatter will check to see that there is only one. | |
786 | $formatter = undef; | |
787 | ||
788 | if ($user_formatter) { | |
789 | $formatter = $user_formatter; | |
790 | } | |
791 | elsif ( $rOpts->{'format'} eq 'html' ) { | |
792 | $formatter = | |
793 | Perl::Tidy::HtmlWriter->new( $fileroot, $output_file, | |
794 | $actual_output_extension, $html_toc_extension, | |
795 | $html_src_extension ); | |
796 | } | |
797 | elsif ( $rOpts->{'format'} eq 'tidy' ) { | |
798 | $formatter = Perl::Tidy::Formatter->new( | |
799 | logger_object => $logger_object, | |
800 | diagnostics_object => $diagnostics_object, | |
801 | sink_object => $sink_object, | |
802 | ); | |
803 | } | |
804 | else { | |
805 | die "I don't know how to do -format=$rOpts->{'format'}\n"; | |
806 | } | |
807 | ||
808 | unless ($formatter) { | |
809 | die "Unable to continue with $rOpts->{'format'} formatting\n"; | |
810 | } | |
811 | ||
812 | #--------------------------------------------------------------- | |
813 | # create the tokenizer for this file | |
814 | #--------------------------------------------------------------- | |
815 | $tokenizer = undef; # must destroy old tokenizer | |
816 | $tokenizer = Perl::Tidy::Tokenizer->new( | |
817 | source_object => $source_object, | |
818 | logger_object => $logger_object, | |
819 | debugger_object => $debugger_object, | |
820 | diagnostics_object => $diagnostics_object, | |
821 | starting_level => $rOpts->{'starting-indentation-level'}, | |
822 | tabs => $rOpts->{'tabs'}, | |
823 | indent_columns => $rOpts->{'indent-columns'}, | |
824 | look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, | |
825 | look_for_autoloader => $rOpts->{'look-for-autoloader'}, | |
826 | look_for_selfloader => $rOpts->{'look-for-selfloader'}, | |
827 | trim_qw => $rOpts->{'trim-qw'}, | |
828 | ); | |
829 | ||
830 | #--------------------------------------------------------------- | |
831 | # now we can do it | |
832 | #--------------------------------------------------------------- | |
833 | process_this_file( $tokenizer, $formatter ); | |
834 | ||
835 | #--------------------------------------------------------------- | |
836 | # close the input source and report errors | |
837 | #--------------------------------------------------------------- | |
838 | $source_object->close_input_file(); | |
839 | ||
840 | # get file names to use for syntax check | |
841 | my $ifname = $source_object->get_input_file_copy_name(); | |
842 | my $ofname = $sink_object->get_output_file_copy(); | |
843 | ||
844 | #--------------------------------------------------------------- | |
845 | # handle the -b option (backup and modify in-place) | |
846 | #--------------------------------------------------------------- | |
847 | if ($in_place_modify) { | |
848 | unless ( -f $input_file ) { | |
849 | ||
850 | # oh, oh, no real file to backup .. | |
851 | # shouldn't happen because of numerous preliminary checks | |
852 | die print | |
853 | "problem with -b backing up input file '$input_file': not a file\n"; | |
854 | } | |
855 | my $backup_name = $input_file . $backup_extension; | |
856 | if ( -f $backup_name ) { | |
857 | unlink($backup_name) | |
858 | or die | |
859 | "unable to remove previous '$backup_name' for -b option; check permissions: $!\n"; | |
860 | } | |
861 | rename( $input_file, $backup_name ) | |
862 | or die | |
863 | "problem renaming $input_file to $backup_name for -b option: $!\n"; | |
864 | $ifname = $backup_name; | |
865 | ||
866 | seek( $output_file, 0, 0 ) | |
867 | or die "unable to rewind tmp file for -b option: $!\n"; | |
868 | ||
869 | my $fout = IO::File->new("> $input_file") | |
870 | or die | |
871 | "problem opening $input_file for write for -b option; check directory permissions: $!\n"; | |
872 | my $line; | |
873 | while ( $line = $output_file->getline() ) { | |
874 | $fout->print($line); | |
875 | } | |
876 | $fout->close(); | |
877 | $output_file = $input_file; | |
878 | $ofname = $input_file; | |
879 | } | |
880 | ||
881 | #--------------------------------------------------------------- | |
882 | # clean up and report errors | |
883 | #--------------------------------------------------------------- | |
884 | $sink_object->close_output_file() if $sink_object; | |
885 | $debugger_object->close_debug_file() if $debugger_object; | |
886 | ||
887 | my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes | |
888 | if ($output_file) { | |
889 | ||
890 | if ($input_file_permissions) { | |
891 | ||
892 | # give output script same permissions as input script, but | |
893 | # make it user-writable or else we can't run perltidy again. | |
894 | # Thus we retain whatever executable flags were set. | |
895 | if ( $rOpts->{'format'} eq 'tidy' ) { | |
896 | chmod( $input_file_permissions | 0600, $output_file ); | |
897 | } | |
898 | ||
899 | # else use default permissions for html and any other format | |
900 | ||
901 | } | |
902 | if ( $logger_object && $rOpts->{'check-syntax'} ) { | |
903 | $infile_syntax_ok = | |
904 | check_syntax( $ifname, $ofname, $logger_object, $rOpts ); | |
905 | } | |
906 | } | |
907 | ||
908 | $logger_object->finish( $infile_syntax_ok, $formatter ) | |
909 | if $logger_object; | |
910 | } # end of loop to process all files | |
911 | } # end of main program | |
912 | } | |
913 | ||
914 | sub fileglob_to_re { | |
915 | ||
916 | # modified (corrected) from version in find2perl | |
917 | my $x = shift; | |
918 | $x =~ s#([./^\$()])#\\$1#g; # escape special characters | |
919 | $x =~ s#\*#.*#g; # '*' -> '.*' | |
920 | $x =~ s#\?#.#g; # '?' -> '.' | |
921 | "^$x\\z"; # match whole word | |
922 | } | |
923 | ||
924 | sub make_extension { | |
925 | ||
926 | # Make a file extension, including any leading '.' if necessary | |
927 | # The '.' may actually be an '_' under VMS | |
928 | my ( $extension, $default, $dot ) = @_; | |
929 | ||
930 | # Use the default if none specified | |
931 | $extension = $default unless ($extension); | |
932 | ||
933 | # Only extensions with these leading characters get a '.' | |
934 | # This rule gives the user some freedom | |
935 | if ( $extension =~ /^[a-zA-Z0-9]/ ) { | |
936 | $extension = $dot . $extension; | |
937 | } | |
938 | return $extension; | |
939 | } | |
940 | ||
941 | sub write_logfile_header { | |
942 | my ( $rOpts, $logger_object, $config_file, $rraw_options, $Windows_type ) = | |
943 | @_; | |
944 | $logger_object->write_logfile_entry( | |
945 | "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n" | |
946 | ); | |
947 | if ($Windows_type) { | |
948 | $logger_object->write_logfile_entry("Windows type is $Windows_type\n"); | |
949 | } | |
950 | my $options_string = join( ' ', @$rraw_options ); | |
951 | ||
952 | if ($config_file) { | |
953 | $logger_object->write_logfile_entry( | |
954 | "Found Configuration File >>> $config_file \n"); | |
955 | } | |
956 | $logger_object->write_logfile_entry( | |
957 | "Configuration and command line parameters for this run:\n"); | |
958 | $logger_object->write_logfile_entry("$options_string\n"); | |
959 | ||
960 | if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) { | |
961 | $rOpts->{'logfile'} = 1; # force logfile to be saved | |
962 | $logger_object->write_logfile_entry( | |
963 | "Final parameter set for this run\n"); | |
964 | $logger_object->write_logfile_entry( | |
965 | "------------------------------------\n"); | |
966 | ||
967 | foreach ( keys %{$rOpts} ) { | |
968 | $logger_object->write_logfile_entry( '--' . "$_=$rOpts->{$_}\n" ); | |
969 | } | |
970 | $logger_object->write_logfile_entry( | |
971 | "------------------------------------\n"); | |
972 | } | |
973 | $logger_object->write_logfile_entry( | |
974 | "To find error messages search for 'WARNING' with your editor\n"); | |
975 | } | |
976 | ||
977 | sub process_command_line { | |
978 | ||
979 | my ( $perltidyrc_stream, $is_Windows, $Windows_type, $rpending_complaint ) = | |
980 | @_; | |
981 | ||
982 | use Getopt::Long; | |
983 | ||
984 | ###################################################################### | |
985 | # Note: a few options are not documented in the man page and usage | |
986 | # message. This is because these are experimental or debug options and | |
987 | # may or may not be retained in future versions. | |
988 | # | |
989 | # Here are the undocumented flags as far as I know. Any of them | |
990 | # may disappear at any time. They are mainly for fine-tuning | |
991 | # and debugging. | |
992 | # | |
993 | # fll --> fuzzy-line-length # a trivial parameter which gets | |
994 | # turned off for the extrude option | |
995 | # which is mainly for debugging | |
996 | # chk --> check-multiline-quotes # check for old bug; to be deleted | |
997 | # scl --> short-concatenation-item-length # helps break at '.' | |
998 | # recombine # for debugging line breaks | |
999 | # I --> DIAGNOSTICS # for debugging | |
1000 | ###################################################################### | |
1001 | ||
1002 | # here is a summary of the Getopt codes: | |
1003 | # <none> does not take an argument | |
1004 | # =s takes a mandatory string | |
1005 | # :s takes an optional string (DO NOT USE - filenames will get eaten up) | |
1006 | # =i takes a mandatory integer | |
1007 | # :i takes an optional integer (NOT RECOMMENDED - can cause trouble) | |
1008 | # ! does not take an argument and may be negated | |
1009 | # i.e., -foo and -nofoo are allowed | |
1010 | # a double dash signals the end of the options list | |
1011 | # | |
1012 | #--------------------------------------------------------------- | |
1013 | # Define the option string passed to GetOptions. | |
1014 | #--------------------------------------------------------------- | |
1015 | ||
1016 | my @option_string = (); | |
1017 | my %expansion = (); | |
1018 | my $rexpansion = \%expansion; | |
1019 | ||
1020 | # These options are parsed directly by perltidy: | |
1021 | # help h | |
1022 | # version v | |
1023 | # However, they are included in the option set so that they will | |
1024 | # be seen in the options dump. | |
1025 | ||
1026 | # These long option names have no abbreviations or are treated specially | |
1027 | @option_string = qw( | |
1028 | html! | |
1029 | noprofile | |
1030 | no-profile | |
1031 | npro | |
1032 | recombine! | |
1033 | ); | |
1034 | ||
1035 | # routine to install and check options | |
1036 | my $add_option = sub { | |
1037 | my ( $long_name, $short_name, $flag ) = @_; | |
1038 | push @option_string, $long_name . $flag; | |
1039 | if ($short_name) { | |
1040 | if ( $expansion{$short_name} ) { | |
1041 | my $existing_name = $expansion{$short_name}[0]; | |
1042 | die | |
1043 | "redefining abbreviation $short_name for $long_name; already used for $existing_name\n"; | |
1044 | } | |
1045 | $expansion{$short_name} = [$long_name]; | |
1046 | if ( $flag eq '!' ) { | |
1047 | my $nshort_name = 'n' . $short_name; | |
1048 | my $nolong_name = 'no' . $long_name; | |
1049 | if ( $expansion{$nshort_name} ) { | |
1050 | my $existing_name = $expansion{$nshort_name}[0]; | |
1051 | die | |
1052 | "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n"; | |
1053 | } | |
1054 | $expansion{$nshort_name} = [$nolong_name]; | |
1055 | } | |
1056 | } | |
1057 | }; | |
1058 | ||
1059 | # Install long option names which have a simple abbreviation. | |
1060 | # Options with code '!' get standard negation ('no' for long names, | |
1061 | # 'n' for abbreviations) | |
1062 | $add_option->( 'DEBUG', 'D', '!' ); | |
1063 | $add_option->( 'DIAGNOSTICS', 'I', '!' ); | |
1064 | $add_option->( 'add-newlines', 'anl', '!' ); | |
1065 | $add_option->( 'add-semicolons', 'asc', '!' ); | |
1066 | $add_option->( 'add-whitespace', 'aws', '!' ); | |
1067 | $add_option->( 'backup-and-modify-in-place', 'b', '!' ); | |
1068 | $add_option->( 'backup-file-extension', 'bext', '=s' ); | |
1069 | $add_option->( 'blanks-before-blocks', 'bbb', '!' ); | |
1070 | $add_option->( 'blanks-before-comments', 'bbc', '!' ); | |
1071 | $add_option->( 'blanks-before-subs', 'bbs', '!' ); | |
1072 | $add_option->( 'block-brace-tightness', 'bbt', '=i' ); | |
1073 | $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' ); | |
1074 | $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' ); | |
1075 | $add_option->( 'brace-left-and-indent', 'bli', '!' ); | |
1076 | $add_option->( 'brace-left-and-indent-list', 'blil', '=s' ); | |
1077 | $add_option->( 'brace-tightness', 'bt', '=i' ); | |
1078 | $add_option->( 'brace-vertical-tightness', 'bvt', '=i' ); | |
1079 | $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' ); | |
1080 | $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' ); | |
1081 | $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' ); | |
1082 | $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' ); | |
1083 | $add_option->( 'break-at-old-trinary-breakpoints', 'bot', '!' ); | |
1084 | $add_option->( 'check-multiline-quotes', 'chk', '!' ); | |
1085 | $add_option->( 'check-syntax', 'syn', '!' ); | |
1086 | $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' ); | |
1087 | $add_option->( 'closing-side-comment-interval', 'csci', '=i' ); | |
1088 | $add_option->( 'closing-side-comment-list', 'cscl', '=s' ); | |
1089 | $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' ); | |
1090 | $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' ); | |
1091 | $add_option->( 'closing-side-comment-warnings', 'cscw', '!' ); | |
1092 | $add_option->( 'closing-side-comments', 'csc', '!' ); | |
1093 | $add_option->( 'closing-token-indentation', 'cti', '=i' ); | |
1094 | $add_option->( 'closing-paren-indentation', 'cpi', '=i' ); | |
1095 | $add_option->( 'closing-brace-indentation', 'cbi', '=i' ); | |
1096 | $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' ); | |
1097 | $add_option->( 'continuation-indentation', 'ci', '=i' ); | |
1098 | $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' ); | |
1099 | $add_option->( 'cuddled-else', 'ce', '!' ); | |
1100 | $add_option->( 'delete-block-comments', 'dbc', '!' ); | |
1101 | $add_option->( 'delete-closing-side-comments', 'dcsc', '!' ); | |
1102 | $add_option->( 'delete-old-newlines', 'dnl', '!' ); | |
1103 | $add_option->( 'delete-old-whitespace', 'dws', '!' ); | |
1104 | $add_option->( 'delete-pod', 'dp', '!' ); | |
1105 | $add_option->( 'delete-semicolons', 'dsm', '!' ); | |
1106 | $add_option->( 'delete-side-comments', 'dsc', '!' ); | |
1107 | $add_option->( 'dump-defaults', 'ddf', '!' ); | |
1108 | $add_option->( 'dump-long-names', 'dln', '!' ); | |
1109 | $add_option->( 'dump-options', 'dop', '!' ); | |
1110 | $add_option->( 'dump-profile', 'dpro', '!' ); | |
1111 | $add_option->( 'dump-short-names', 'dsn', '!' ); | |
1112 | $add_option->( 'dump-token-types', 'dtt', '!' ); | |
1113 | $add_option->( 'dump-want-left-space', 'dwls', '!' ); | |
1114 | $add_option->( 'dump-want-right-space', 'dwrs', '!' ); | |
1115 | $add_option->( 'entab-leading-whitespace', 'et', '=i' ); | |
1116 | $add_option->( 'force-read-binary', 'f', '!' ); | |
1117 | $add_option->( 'format', 'fmt', '=s' ); | |
1118 | $add_option->( 'fuzzy-line-length', 'fll', '!' ); | |
1119 | $add_option->( 'hanging-side-comments', 'hsc', '!' ); | |
1120 | $add_option->( 'help', 'h', '' ); | |
1121 | $add_option->( 'ignore-old-line-breaks', 'iob', '!' ); | |
1122 | $add_option->( 'indent-block-comments', 'ibc', '!' ); | |
1123 | $add_option->( 'indent-closing-brace', 'icb', '!' ); | |
1124 | $add_option->( 'indent-columns', 'i', '=i' ); | |
1125 | $add_option->( 'indent-spaced-block-comments', 'isbc', '!' ); | |
1126 | $add_option->( 'line-up-parentheses', 'lp', '!' ); | |
1127 | $add_option->( 'logfile', 'log', '!' ); | |
1128 | $add_option->( 'logfile-gap', 'g', ':i' ); | |
1129 | $add_option->( 'long-block-line-count', 'lbl', '=i' ); | |
1130 | $add_option->( 'look-for-autoloader', 'lal', '!' ); | |
1131 | $add_option->( 'look-for-hash-bang', 'x', '!' ); | |
1132 | $add_option->( 'look-for-selfloader', 'lsl', '!' ); | |
1133 | $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' ); | |
1134 | $add_option->( 'maximum-fields-per-table', 'mft', '=i' ); | |
1135 | $add_option->( 'maximum-line-length', 'l', '=i' ); | |
1136 | $add_option->( 'minimum-space-to-comment', 'msc', '=i' ); | |
1137 | $add_option->( 'nowant-left-space', 'nwls', '=s' ); | |
1138 | $add_option->( 'nowant-right-space', 'nwrs', '=s' ); | |
1139 | $add_option->( 'nospace-after-keyword', 'nsak', '=s' ); | |
1140 | $add_option->( 'opening-brace-always-on-right', 'bar', '' ); | |
1141 | $add_option->( 'opening-brace-on-new-line', 'bl', '!' ); | |
1142 | $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' ); | |
1143 | $add_option->( 'outdent-keyword-list', 'okwl', '=s' ); | |
1144 | $add_option->( 'outdent-keywords', 'okw', '!' ); | |
1145 | $add_option->( 'outdent-labels', 'ola', '!' ); | |
1146 | $add_option->( 'outdent-long-comments', 'olc', '!' ); | |
1147 | $add_option->( 'outdent-long-quotes', 'olq', '!' ); | |
1148 | $add_option->( 'outdent-static-block-comments', 'osbc', '!' ); | |
1149 | $add_option->( 'outfile', 'o', '=s' ); | |
1150 | $add_option->( 'output-file-extension', 'oext', '=s' ); | |
1151 | $add_option->( 'output-line-ending', 'ole', '=s' ); | |
1152 | $add_option->( 'output-path', 'opath', '=s' ); | |
1153 | $add_option->( 'paren-tightness', 'pt', '=i' ); | |
1154 | $add_option->( 'paren-vertical-tightness', 'pvt', '=i' ); | |
1155 | $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' ); | |
1156 | $add_option->( 'pass-version-line', 'pvl', '!' ); | |
1157 | $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' ); | |
1158 | $add_option->( 'preserve-line-endings', 'ple', '!' ); | |
1159 | $add_option->( 'profile', 'pro', '=s' ); | |
1160 | $add_option->( 'quiet', 'q', '!' ); | |
1161 | $add_option->( 'short-concatenation-item-length', 'scl', '=i' ); | |
1162 | $add_option->( 'show-options', 'opt', '!' ); | |
1163 | $add_option->( 'space-after-keyword', 'sak', '=s' ); | |
1164 | $add_option->( 'space-for-semicolon', 'sfs', '!' ); | |
1165 | $add_option->( 'space-terminal-semicolon', 'sts', '!' ); | |
1166 | $add_option->( 'square-bracket-tightness', 'sbt', '=i' ); | |
1167 | $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' ); | |
1168 | $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' ); | |
1169 | $add_option->( 'standard-error-output', 'se', '!' ); | |
1170 | $add_option->( 'standard-output', 'st', '!' ); | |
1171 | $add_option->( 'starting-indentation-level', 'sil', '=i' ); | |
1172 | $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' ); | |
1173 | $add_option->( 'static-block-comments', 'sbc', '!' ); | |
1174 | $add_option->( 'static-side-comment-prefix', 'sscp', '=s' ); | |
1175 | $add_option->( 'static-side-comments', 'ssc', '!' ); | |
1176 | $add_option->( 'swallow-optional-blank-lines', 'sob', '!' ); | |
1177 | $add_option->( 'tabs', 't', '!' ); | |
1178 | $add_option->( 'tee-block-comments', 'tbc', '!' ); | |
1179 | $add_option->( 'tee-pod', 'tp', '!' ); | |
1180 | $add_option->( 'tee-side-comments', 'tsc', '!' ); | |
1181 | $add_option->( 'trim-qw', 'tqw', '!' ); | |
1182 | $add_option->( 'version', 'v', '' ); | |
1183 | $add_option->( 'vertical-tightness', 'vt', '=i' ); | |
1184 | $add_option->( 'vertical-tightness-closing', 'vtc', '=i' ); | |
1185 | $add_option->( 'want-break-after', 'wba', '=s' ); | |
1186 | $add_option->( 'want-break-before', 'wbb', '=s' ); | |
1187 | $add_option->( 'want-left-space', 'wls', '=s' ); | |
1188 | $add_option->( 'want-right-space', 'wrs', '=s' ); | |
1189 | $add_option->( 'warning-output', 'w', '!' ); | |
1190 | ||
1191 | # The Perl::Tidy::HtmlWriter will add its own options to the string | |
1192 | Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string ); | |
1193 | ||
1194 | #--------------------------------------------------------------- | |
1195 | # Assign default values to the above options here, except | |
1196 | # for 'outfile' and 'help'. | |
1197 | # These settings should approximate the perlstyle(1) suggestions. | |
1198 | #--------------------------------------------------------------- | |
1199 | my @defaults = qw( | |
1200 | add-newlines | |
1201 | add-semicolons | |
1202 | add-whitespace | |
1203 | blanks-before-blocks | |
1204 | blanks-before-comments | |
1205 | blanks-before-subs | |
1206 | block-brace-tightness=0 | |
1207 | block-brace-vertical-tightness=0 | |
1208 | brace-tightness=1 | |
1209 | brace-vertical-tightness-closing=0 | |
1210 | brace-vertical-tightness=0 | |
1211 | break-at-old-logical-breakpoints | |
1212 | break-at-old-trinary-breakpoints | |
1213 | break-at-old-keyword-breakpoints | |
1214 | comma-arrow-breakpoints=1 | |
1215 | nocheck-syntax | |
1216 | closing-side-comment-interval=6 | |
1217 | closing-side-comment-maximum-text=20 | |
1218 | closing-side-comment-else-flag=0 | |
1219 | closing-paren-indentation=0 | |
1220 | closing-brace-indentation=0 | |
1221 | closing-square-bracket-indentation=0 | |
1222 | continuation-indentation=2 | |
1223 | delete-old-newlines | |
1224 | delete-semicolons | |
1225 | fuzzy-line-length | |
1226 | hanging-side-comments | |
1227 | indent-block-comments | |
1228 | indent-columns=4 | |
1229 | long-block-line-count=8 | |
1230 | look-for-autoloader | |
1231 | look-for-selfloader | |
1232 | maximum-consecutive-blank-lines=1 | |
1233 | maximum-fields-per-table=0 | |
1234 | maximum-line-length=80 | |
1235 | minimum-space-to-comment=4 | |
1236 | nobrace-left-and-indent | |
1237 | nocuddled-else | |
1238 | nodelete-old-whitespace | |
1239 | nohtml | |
1240 | nologfile | |
1241 | noquiet | |
1242 | noshow-options | |
1243 | nostatic-side-comments | |
1244 | noswallow-optional-blank-lines | |
1245 | notabs | |
1246 | nowarning-output | |
1247 | outdent-labels | |
1248 | outdent-long-quotes | |
1249 | outdent-long-comments | |
1250 | paren-tightness=1 | |
1251 | paren-vertical-tightness-closing=0 | |
1252 | paren-vertical-tightness=0 | |
1253 | pass-version-line | |
1254 | recombine | |
1255 | short-concatenation-item-length=8 | |
1256 | space-for-semicolon | |
1257 | square-bracket-tightness=1 | |
1258 | square-bracket-vertical-tightness-closing=0 | |
1259 | square-bracket-vertical-tightness=0 | |
1260 | static-block-comments | |
1261 | trim-qw | |
1262 | format=tidy | |
1263 | backup-file-extension=bak | |
1264 | ||
1265 | pod2html | |
1266 | html-table-of-contents | |
1267 | html-entities | |
1268 | ); | |
1269 | ||
1270 | push @defaults, "perl-syntax-check-flags=-c -T"; | |
1271 | ||
1272 | #--------------------------------------------------------------- | |
1273 | # set the defaults by passing the above list through GetOptions | |
1274 | #--------------------------------------------------------------- | |
1275 | my %Opts = (); | |
1276 | { | |
1277 | local @ARGV; | |
1278 | my $i; | |
1279 | ||
1280 | for $i (@defaults) { push @ARGV, "--" . $i } | |
1281 | ||
1282 | if ( !GetOptions( \%Opts, @option_string ) ) { | |
1283 | die "Programming Bug: error in setting default options"; | |
1284 | } | |
1285 | } | |
1286 | ||
1287 | #--------------------------------------------------------------- | |
1288 | # Define abbreviations which will be expanded into the above primitives. | |
1289 | # These may be defined recursively. | |
1290 | #--------------------------------------------------------------- | |
1291 | %expansion = ( | |
1292 | %expansion, | |
1293 | 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)], | |
1294 | 'fnl' => [qw(freeze-newlines)], | |
1295 | 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)], | |
1296 | 'fws' => [qw(freeze-whitespace)], | |
1297 | 'indent-only' => [qw(freeze-newlines freeze-whitespace)], | |
1298 | 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)], | |
1299 | 'nooutdent-long-lines' => | |
1300 | [qw(nooutdent-long-quotes nooutdent-long-comments)], | |
1301 | 'noll' => [qw(nooutdent-long-lines)], | |
1302 | 'io' => [qw(indent-only)], | |
1303 | 'delete-all-comments' => | |
1304 | [qw(delete-block-comments delete-side-comments delete-pod)], | |
1305 | 'nodelete-all-comments' => | |
1306 | [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)], | |
1307 | 'dac' => [qw(delete-all-comments)], | |
1308 | 'ndac' => [qw(nodelete-all-comments)], | |
1309 | 'gnu' => [qw(gnu-style)], | |
1310 | 'tee-all-comments' => | |
1311 | [qw(tee-block-comments tee-side-comments tee-pod)], | |
1312 | 'notee-all-comments' => | |
1313 | [qw(notee-block-comments notee-side-comments notee-pod)], | |
1314 | 'tac' => [qw(tee-all-comments)], | |
1315 | 'ntac' => [qw(notee-all-comments)], | |
1316 | 'html' => [qw(format=html)], | |
1317 | 'nhtml' => [qw(format=tidy)], | |
1318 | 'tidy' => [qw(format=tidy)], | |
1319 | ||
1320 | 'break-after-comma-arrows' => [qw(cab=0)], | |
1321 | 'nobreak-after-comma-arrows' => [qw(cab=1)], | |
1322 | 'baa' => [qw(cab=0)], | |
1323 | 'nbaa' => [qw(cab=1)], | |
1324 | ||
1325 | 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)], | |
1326 | 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)], | |
1327 | 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)], | |
1328 | 'icp' => [qw(cpi=2 cbi=2 csbi=2)], | |
1329 | 'nicp' => [qw(cpi=0 cbi=0 csbi=0)], | |
1330 | ||
1331 | 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)], | |
1332 | 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)], | |
1333 | 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)], | |
1334 | 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)], | |
1335 | 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)], | |
1336 | ||
1337 | 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)], | |
1338 | 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)], | |
1339 | 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)], | |
1340 | ||
1341 | 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)], | |
1342 | 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)], | |
1343 | 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)], | |
1344 | ||
1345 | 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], | |
1346 | 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], | |
1347 | 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], | |
1348 | ||
1349 | 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], | |
1350 | 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], | |
1351 | 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], | |
1352 | ||
1353 | # 'mangle' originally deleted pod and comments, but to keep it | |
1354 | # reversible, it no longer does. But if you really want to | |
1355 | # delete them, just use: | |
1356 | # -mangle -dac | |
1357 | ||
1358 | # An interesting use for 'mangle' is to do this: | |
1359 | # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new | |
1360 | # which will form as many one-line blocks as possible | |
1361 | ||
1362 | 'mangle' => [ | |
1363 | qw( | |
1364 | check-syntax | |
1365 | delete-old-newlines | |
1366 | delete-old-whitespace | |
1367 | delete-semicolons | |
1368 | indent-columns=0 | |
1369 | maximum-consecutive-blank-lines=0 | |
1370 | maximum-line-length=100000 | |
1371 | noadd-newlines | |
1372 | noadd-semicolons | |
1373 | noadd-whitespace | |
1374 | noblanks-before-blocks | |
1375 | noblanks-before-subs | |
1376 | notabs | |
1377 | ) | |
1378 | ], | |
1379 | ||
1380 | # 'extrude' originally deleted pod and comments, but to keep it | |
1381 | # reversible, it no longer does. But if you really want to | |
1382 | # delete them, just use | |
1383 | # extrude -dac | |
1384 | # | |
1385 | # An interesting use for 'extrude' is to do this: | |
1386 | # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new | |
1387 | # which will break up all one-line blocks. | |
1388 | ||
1389 | 'extrude' => [ | |
1390 | qw( | |
1391 | check-syntax | |
1392 | ci=0 | |
1393 | delete-old-newlines | |
1394 | delete-old-whitespace | |
1395 | delete-semicolons | |
1396 | indent-columns=0 | |
1397 | maximum-consecutive-blank-lines=0 | |
1398 | maximum-line-length=1 | |
1399 | noadd-semicolons | |
1400 | noadd-whitespace | |
1401 | noblanks-before-blocks | |
1402 | noblanks-before-subs | |
1403 | nofuzzy-line-length | |
1404 | notabs | |
1405 | ) | |
1406 | ], | |
1407 | ||
1408 | # this style tries to follow the GNU Coding Standards (which do | |
1409 | # not really apply to perl but which are followed by some perl | |
1410 | # programmers). | |
1411 | 'gnu-style' => [ | |
1412 | qw( | |
1413 | lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1 | |
1414 | ) | |
1415 | ], | |
1416 | ||
1417 | # Additional styles can be added here | |
1418 | ); | |
1419 | ||
1420 | Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion ); | |
1421 | ||
1422 | # Uncomment next line to dump all expansions for debugging: | |
1423 | # dump_short_names(\%expansion); | |
1424 | ||
1425 | my $word; | |
1426 | my @raw_options = (); | |
1427 | my $config_file = ""; | |
1428 | my $saw_ignore_profile = 0; | |
1429 | my $saw_extrude = 0; | |
1430 | my $saw_dump_profile = 0; | |
1431 | my $i; | |
1432 | ||
1433 | #--------------------------------------------------------------- | |
1434 | # Take a first look at the command-line parameters. Do as many | |
1435 | # immediate dumps as possible, which can avoid confusion if the | |
1436 | # perltidyrc file has an error. | |
1437 | #--------------------------------------------------------------- | |
1438 | foreach $i (@ARGV) { | |
1439 | ||
1440 | $i =~ s/^--/-/; | |
1441 | if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) { | |
1442 | $saw_ignore_profile = 1; | |
1443 | } | |
1444 | ||
1445 | # note: this must come before -pro and -profile, below: | |
1446 | elsif ( $i =~ /^-(dump-profile|dpro)$/ ) { | |
1447 | $saw_dump_profile = 1; | |
1448 | } | |
1449 | elsif ( $i =~ /^-(pro|profile)=(.+)/ ) { | |
1450 | if ($config_file) { | |
1451 | warn | |
1452 | "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n"; | |
1453 | } | |
1454 | $config_file = $2; | |
1455 | unless ( -e $config_file ) { | |
1456 | warn "cannot find file given with -pro=$config_file: $!\n"; | |
1457 | $config_file = ""; | |
1458 | } | |
1459 | } | |
1460 | elsif ( $i =~ /^-(pro|profile)=?$/ ) { | |
1461 | die "usage: -pro=filename or --profile=filename, no spaces\n"; | |
1462 | } | |
1463 | elsif ( $i =~ /^-extrude$/ ) { | |
1464 | $saw_extrude = 1; | |
1465 | } | |
1466 | elsif ( $i =~ /^-(help|h|HELP|H)$/ ) { | |
1467 | usage(); | |
1468 | exit 1; | |
1469 | } | |
1470 | elsif ( $i =~ /^-(version|v)$/ ) { | |
1471 | show_version(); | |
1472 | exit 1; | |
1473 | } | |
1474 | elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) { | |
1475 | dump_defaults(@defaults); | |
1476 | exit 1; | |
1477 | } | |
1478 | elsif ( $i =~ /^-(dump-long-names|dln)$/ ) { | |
1479 | dump_long_names(@option_string); | |
1480 | exit 1; | |
1481 | } | |
1482 | elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) { | |
1483 | dump_short_names( \%expansion ); | |
1484 | exit 1; | |
1485 | } | |
1486 | elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) { | |
1487 | Perl::Tidy::Tokenizer->dump_token_types(*STDOUT); | |
1488 | exit 1; | |
1489 | } | |
1490 | } | |
1491 | ||
1492 | if ( $saw_dump_profile && $saw_ignore_profile ) { | |
1493 | warn "No profile to dump because of -npro\n"; | |
1494 | exit 1; | |
1495 | } | |
1496 | ||
1497 | #--------------------------------------------------------------- | |
1498 | # read any .perltidyrc configuration file | |
1499 | #--------------------------------------------------------------- | |
1500 | unless ($saw_ignore_profile) { | |
1501 | ||
1502 | # resolve possible conflict between $perltidyrc_stream passed | |
1503 | # as call parameter to perltidy and -pro=filename on command | |
1504 | # line. | |
1505 | if ($perltidyrc_stream) { | |
1506 | if ($config_file) { | |
1507 | warn <<EOM; | |
1508 | Conflict: a perltidyrc configuration file was specified both as this | |
1509 | perltidy call parameter: $perltidyrc_stream | |
1510 | and with this -profile=$config_file. | |
1511 | Using -profile=$config_file. | |
1512 | EOM | |
1513 | } | |
1514 | else { | |
1515 | $config_file = $perltidyrc_stream; | |
1516 | } | |
1517 | } | |
1518 | ||
1519 | # look for a config file if we don't have one yet | |
1520 | my $rconfig_file_chatter; | |
1521 | $$rconfig_file_chatter = ""; | |
1522 | $config_file = | |
1523 | find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter, | |
1524 | $rpending_complaint ) | |
1525 | unless $config_file; | |
1526 | ||
1527 | # open any config file | |
1528 | my $fh_config; | |
1529 | if ($config_file) { | |
1530 | ( $fh_config, $config_file ) = | |
1531 | Perl::Tidy::streamhandle( $config_file, 'r' ); | |
1532 | unless ($fh_config) { | |
1533 | $$rconfig_file_chatter .= | |
1534 | "# $config_file exists but cannot be opened\n"; | |
1535 | } | |
1536 | } | |
1537 | ||
1538 | if ($saw_dump_profile) { | |
1539 | if ($saw_dump_profile) { | |
1540 | dump_config_file( $fh_config, $config_file, | |
1541 | $rconfig_file_chatter ); | |
1542 | exit 1; | |
1543 | } | |
1544 | } | |
1545 | ||
1546 | if ($fh_config) { | |
1547 | ||
1548 | my $rconfig_list = | |
1549 | read_config_file( $fh_config, $config_file, \%expansion ); | |
1550 | ||
1551 | # process any .perltidyrc parameters right now so we can | |
1552 | # localize errors | |
1553 | if (@$rconfig_list) { | |
1554 | local @ARGV = @$rconfig_list; | |
1555 | ||
1556 | expand_command_abbreviations( \%expansion, \@raw_options, | |
1557 | $config_file ); | |
1558 | ||
1559 | if ( !GetOptions( \%Opts, @option_string ) ) { | |
1560 | die | |
1561 | "Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n"; | |
1562 | } | |
1563 | ||
1564 | # Undo any options which cause premature exit. They are not | |
1565 | # appropriate for a config file, and it could be hard to | |
1566 | # diagnose the cause of the premature exit. | |
1567 | foreach ( | |
1568 | qw{ | |
1569 | dump-defaults | |
1570 | dump-long-names | |
1571 | dump-options | |
1572 | dump-profile | |
1573 | dump-short-names | |
1574 | dump-token-types | |
1575 | dump-want-left-space | |
1576 | dump-want-right-space | |
1577 | help | |
1578 | stylesheet | |
1579 | version | |
1580 | } | |
1581 | ) | |
1582 | { | |
1583 | if ( defined( $Opts{$_} ) ) { | |
1584 | delete $Opts{$_}; | |
1585 | warn "ignoring --$_ in config file: $config_file\n"; | |
1586 | } | |
1587 | } | |
1588 | } | |
1589 | } | |
1590 | } | |
1591 | ||
1592 | #--------------------------------------------------------------- | |
1593 | # now process the command line parameters | |
1594 | #--------------------------------------------------------------- | |
1595 | expand_command_abbreviations( \%expansion, \@raw_options, $config_file ); | |
1596 | ||
1597 | if ( !GetOptions( \%Opts, @option_string ) ) { | |
1598 | die "Error on command line; for help try 'perltidy -h'\n"; | |
1599 | } | |
1600 | ||
1601 | if ( $Opts{'dump-options'} ) { | |
1602 | dump_options( \%Opts ); | |
1603 | exit 1; | |
1604 | } | |
1605 | ||
1606 | #--------------------------------------------------------------- | |
1607 | # Now we have to handle any interactions among the options.. | |
1608 | #--------------------------------------------------------------- | |
1609 | ||
1610 | # Since -vt, -vtc, and -cti are abbreviations, but under | |
1611 | # msdos, an unquoted input parameter like vtc=1 will be | |
1612 | # seen as 2 parameters, vtc and 1, so the abbreviations | |
1613 | # won't be seen. Therefore, we will catch them here if | |
1614 | # they get through. | |
1615 | ||
1616 | if ( defined $Opts{'vertical-tightness'} ) { | |
1617 | my $vt = $Opts{'vertical-tightness'}; | |
1618 | $Opts{'paren-vertical-tightness'} = $vt; | |
1619 | $Opts{'square-bracket-vertical-tightness'} = $vt; | |
1620 | $Opts{'brace-vertical-tightness'} = $vt; | |
1621 | } | |
1622 | ||
1623 | if ( defined $Opts{'vertical-tightness-closing'} ) { | |
1624 | my $vtc = $Opts{'vertical-tightness-closing'}; | |
1625 | $Opts{'paren-vertical-tightness-closing'} = $vtc; | |
1626 | $Opts{'square-bracket-vertical-tightness-closing'} = $vtc; | |
1627 | $Opts{'brace-vertical-tightness-closing'} = $vtc; | |
1628 | } | |
1629 | ||
1630 | if ( defined $Opts{'closing-token-indentation'} ) { | |
1631 | my $cti = $Opts{'closing-token-indentation'}; | |
1632 | $Opts{'closing-square-bracket-indentation'} = $cti; | |
1633 | $Opts{'closing-brace-indentation'} = $cti; | |
1634 | $Opts{'closing-paren-indentation'} = $cti; | |
1635 | } | |
1636 | ||
1637 | # In quiet mode, there is no log file and hence no way to report | |
1638 | # results of syntax check, so don't do it. | |
1639 | if ( $Opts{'quiet'} ) { | |
1640 | $Opts{'check-syntax'} = 0; | |
1641 | } | |
1642 | ||
1643 | # can't check syntax if no output | |
1644 | if ( $Opts{'format'} ne 'tidy' ) { | |
1645 | $Opts{'check-syntax'} = 0; | |
1646 | } | |
1647 | ||
1648 | # Never let Windows 9x/Me systems run syntax check -- this will prevent a | |
1649 | # wide variety of nasty problems on these systems, because they cannot | |
1650 | # reliably run backticks. Don't even think about changing this! | |
1651 | if ( $Opts{'check-syntax'} | |
1652 | && $is_Windows | |
1653 | && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) ) | |
1654 | { | |
1655 | $Opts{'check-syntax'} = 0; | |
1656 | } | |
1657 | ||
1658 | # It's really a bad idea to check syntax as root unless you wrote | |
1659 | # the script yourself. FIXME: not sure if this works with VMS | |
1660 | unless ($is_Windows) { | |
1661 | ||
1662 | if ( $< == 0 && $Opts{'check-syntax'} ) { | |
1663 | $Opts{'check-syntax'} = 0; | |
1664 | $$rpending_complaint .= | |
1665 | "Syntax check deactivated for safety; you shouldn't run this as root\n"; | |
1666 | } | |
1667 | } | |
1668 | ||
1669 | # see if user set a non-negative logfile-gap | |
1670 | if ( defined( $Opts{'logfile-gap'} ) && $Opts{'logfile-gap'} >= 0 ) { | |
1671 | ||
1672 | # a zero gap will be taken as a 1 | |
1673 | if ( $Opts{'logfile-gap'} == 0 ) { | |
1674 | $Opts{'logfile-gap'} = 1; | |
1675 | } | |
1676 | ||
1677 | # setting a non-negative logfile gap causes logfile to be saved | |
1678 | $Opts{'logfile'} = 1; | |
1679 | } | |
1680 | ||
1681 | # not setting logfile gap, or setting it negative, causes default of 50 | |
1682 | else { | |
1683 | $Opts{'logfile-gap'} = 50; | |
1684 | } | |
1685 | ||
1686 | # set short-cut flag when only indentation is to be done. | |
1687 | # Note that the user may or may not have already set the | |
1688 | # indent-only flag. | |
1689 | if ( !$Opts{'add-whitespace'} | |
1690 | && !$Opts{'delete-old-whitespace'} | |
1691 | && !$Opts{'add-newlines'} | |
1692 | && !$Opts{'delete-old-newlines'} ) | |
1693 | { | |
1694 | $Opts{'indent-only'} = 1; | |
1695 | } | |
1696 | ||
1697 | # -isbc implies -ibc | |
1698 | if ( $Opts{'indent-spaced-block-comments'} ) { | |
1699 | $Opts{'indent-block-comments'} = 1; | |
1700 | } | |
1701 | ||
1702 | # -bli flag implies -bl | |
1703 | if ( $Opts{'brace-left-and-indent'} ) { | |
1704 | $Opts{'opening-brace-on-new-line'} = 1; | |
1705 | } | |
1706 | ||
1707 | if ( $Opts{'opening-brace-always-on-right'} | |
1708 | && $Opts{'opening-brace-on-new-line'} ) | |
1709 | { | |
1710 | warn <<EOM; | |
1711 | Conflict: you specified both 'opening-brace-always-on-right' (-bar) and | |
1712 | 'opening-brace-on-new-line' (-bl). Ignoring -bl. | |
1713 | EOM | |
1714 | $Opts{'opening-brace-on-new-line'} = 0; | |
1715 | } | |
1716 | ||
1717 | # it simplifies things if -bl is 0 rather than undefined | |
1718 | if ( !defined( $Opts{'opening-brace-on-new-line'} ) ) { | |
1719 | $Opts{'opening-brace-on-new-line'} = 0; | |
1720 | } | |
1721 | ||
1722 | # -sbl defaults to -bl if not defined | |
1723 | if ( !defined( $Opts{'opening-sub-brace-on-new-line'} ) ) { | |
1724 | $Opts{'opening-sub-brace-on-new-line'} = | |
1725 | $Opts{'opening-brace-on-new-line'}; | |
1726 | } | |
1727 | ||
1728 | # set shortcut flag if no blanks to be written | |
1729 | unless ( $Opts{'maximum-consecutive-blank-lines'} ) { | |
1730 | $Opts{'swallow-optional-blank-lines'} = 1; | |
1731 | } | |
1732 | ||
1733 | if ( $Opts{'entab-leading-whitespace'} ) { | |
1734 | if ( $Opts{'entab-leading-whitespace'} < 0 ) { | |
1735 | warn "-et=n must use a positive integer; ignoring -et\n"; | |
1736 | $Opts{'entab-leading-whitespace'} = undef; | |
1737 | } | |
1738 | ||
1739 | # entab leading whitespace has priority over the older 'tabs' option | |
1740 | if ( $Opts{'tabs'} ) { $Opts{'tabs'} = 0; } | |
1741 | } | |
1742 | ||
1743 | if ( $Opts{'output-line-ending'} ) { | |
1744 | unless ( is_unix() ) { | |
1745 | warn "ignoring -ole; only works under unix\n"; | |
1746 | $Opts{'output-line-ending'} = undef; | |
1747 | } | |
1748 | } | |
1749 | if ( $Opts{'preserve-line-endings'} ) { | |
1750 | unless ( is_unix() ) { | |
1751 | warn "ignoring -ple; only works under unix\n"; | |
1752 | $Opts{'preserve-line-endings'} = undef; | |
1753 | } | |
1754 | } | |
1755 | ||
1756 | return ( \%Opts, $config_file, \@raw_options, $saw_extrude ); | |
1757 | ||
1758 | } # end of process_command_line | |
1759 | ||
1760 | sub expand_command_abbreviations { | |
1761 | ||
1762 | # go through @ARGV and expand any abbreviations | |
1763 | ||
1764 | my ( $rexpansion, $rraw_options, $config_file ) = @_; | |
1765 | my ($word); | |
1766 | ||
1767 | # set a pass limit to prevent an infinite loop; | |
1768 | # 10 should be plenty, but it may be increased to allow deeply | |
1769 | # nested expansions. | |
1770 | my $max_passes = 10; | |
1771 | my @new_argv = (); | |
1772 | ||
1773 | # keep looping until all expansions have been converted into actual | |
1774 | # dash parameters.. | |
1775 | for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) { | |
1776 | my @new_argv = (); | |
1777 | my $abbrev_count = 0; | |
1778 | ||
1779 | # loop over each item in @ARGV.. | |
1780 | foreach $word (@ARGV) { | |
1781 | ||
1782 | # convert any leading 'no-' to just 'no' | |
1783 | if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 } | |
1784 | ||
1785 | # if it is a dash flag (instead of a file name).. | |
1786 | if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) { | |
1787 | ||
1788 | my $abr = $1; | |
1789 | my $flags = $2; | |
1790 | ||
1791 | # save the raw input for debug output in case of circular refs | |
1792 | if ( $pass_count == 0 ) { | |
1793 | push( @$rraw_options, $word ); | |
1794 | } | |
1795 | ||
1796 | # recombine abbreviation and flag, if necessary, | |
1797 | # to allow abbreviations with arguments such as '-vt=1' | |
1798 | if ( $rexpansion->{ $abr . $flags } ) { | |
1799 | $abr = $abr . $flags; | |
1800 | $flags = ""; | |
1801 | } | |
1802 | ||
1803 | # if we see this dash item in the expansion hash.. | |
1804 | if ( $rexpansion->{$abr} ) { | |
1805 | $abbrev_count++; | |
1806 | ||
1807 | # stuff all of the words that it expands to into the | |
1808 | # new arg list for the next pass | |
1809 | foreach my $abbrev ( @{ $rexpansion->{$abr} } ) { | |
1810 | next unless $abbrev; # for safety; shouldn't happen | |
1811 | push( @new_argv, '--' . $abbrev . $flags ); | |
1812 | } | |
1813 | } | |
1814 | ||
1815 | # not in expansion hash, must be actual long name | |
1816 | else { | |
1817 | push( @new_argv, $word ); | |
1818 | } | |
1819 | } | |
1820 | ||
1821 | # not a dash item, so just save it for the next pass | |
1822 | else { | |
1823 | push( @new_argv, $word ); | |
1824 | } | |
1825 | } # end of this pass | |
1826 | ||
1827 | # update parameter list @ARGV to the new one | |
1828 | @ARGV = @new_argv; | |
1829 | last unless ( $abbrev_count > 0 ); | |
1830 | ||
1831 | # make sure we are not in an infinite loop | |
1832 | if ( $pass_count == $max_passes ) { | |
1833 | print STDERR | |
1834 | "I'm tired. We seem to be in an infinite loop trying to expand aliases.\n"; | |
1835 | print STDERR "Here are the raw options\n"; | |
1836 | local $" = ')('; | |
1837 | print STDERR "(@$rraw_options)\n"; | |
1838 | my $num = @new_argv; | |
1839 | ||
1840 | if ( $num < 50 ) { | |
1841 | print STDERR "After $max_passes passes here is ARGV\n"; | |
1842 | print STDERR "(@new_argv)\n"; | |
1843 | } | |
1844 | else { | |
1845 | print STDERR "After $max_passes passes ARGV has $num entries\n"; | |
1846 | } | |
1847 | ||
1848 | if ($config_file) { | |
1849 | die <<"DIE"; | |
1850 | Please check your configuration file $config_file for circular-references. | |
1851 | To deactivate it, use -npro. | |
1852 | DIE | |
1853 | } | |
1854 | else { | |
1855 | die <<'DIE'; | |
1856 | Program bug - circular-references in the %expansion hash, probably due to | |
1857 | a recent program change. | |
1858 | DIE | |
1859 | } | |
1860 | } # end of check for circular references | |
1861 | } # end of loop over all passes | |
1862 | } | |
1863 | ||
1864 | # Debug routine -- this will dump the expansion hash | |
1865 | sub dump_short_names { | |
1866 | my $rexpansion = shift; | |
1867 | print STDOUT <<EOM; | |
1868 | List of short names. This list shows how all abbreviations are | |
1869 | translated into other abbreviations and, eventually, into long names. | |
1870 | New abbreviations may be defined in a .perltidyrc file. | |
1871 | For a list of all long names, use perltidy --dump-long-names (-dln). | |
1872 | -------------------------------------------------------------------------- | |
1873 | EOM | |
1874 | foreach my $abbrev ( sort keys %$rexpansion ) { | |
1875 | my @list = @{ $$rexpansion{$abbrev} }; | |
1876 | print STDOUT "$abbrev --> @list\n"; | |
1877 | } | |
1878 | } | |
1879 | ||
1880 | sub check_vms_filename { | |
1881 | ||
1882 | # given a valid filename (the perltidy input file) | |
1883 | # create a modified filename and separator character | |
1884 | # suitable for VMS. | |
1885 | # | |
1886 | # Contributed by Michael Cartmell | |
1887 | # | |
1888 | my ( $base, $path ) = fileparse( $_[0] ); | |
1889 | ||
1890 | # remove explicit ; version | |
1891 | $base =~ s/;-?\d*$// | |
1892 | ||
1893 | # remove explicit . version ie two dots in filename NB ^ escapes a dot | |
1894 | or $base =~ s/( # begin capture $1 | |
1895 | (?:^|[^^])\. # match a dot not preceded by a caret | |
1896 | (?: # followed by nothing | |
1897 | | # or | |
1898 | .*[^^] # anything ending in a non caret | |
1899 | ) | |
1900 | ) # end capture $1 | |
1901 | \.-?\d*$ # match . version number | |
1902 | /$1/x; | |
1903 | ||
1904 | # normalise filename, if there are no unescaped dots then append one | |
1905 | $base .= '.' unless $base =~ /(?:^|[^^])\./; | |
1906 | ||
1907 | # if we don't already have an extension then we just append the extention | |
1908 | my $separator = ( $base =~ /\.$/ ) ? "" : "_"; | |
1909 | return ( $path . $base, $separator ); | |
1910 | } | |
1911 | ||
1912 | sub Win_OS_Type { | |
1913 | ||
1914 | # Returns a string that determines what MS OS we are on. | |
1915 | # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net | |
1916 | # Returns nothing if not an MS system. | |
1917 | # Contributed by: Yves Orton | |
1918 | ||
1919 | my $rpending_complaint = shift; | |
1920 | return unless $^O =~ /win32|dos/i; # is it a MS box? | |
1921 | ||
1922 | # It _should_ have Win32 unless something is really weird | |
1923 | return unless eval('require Win32'); | |
1924 | ||
1925 | # Use the standard API call to determine the version | |
1926 | my ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion(); | |
1927 | ||
1928 | return "win32s" unless $id; # If id==0 then its a win32s box. | |
1929 | my $os = { # Magic numbers from MSDN | |
1930 | # documentation of GetOSVersion | |
1931 | 1 => { | |
1932 | 0 => "95", | |
1933 | 10 => "98", | |
1934 | 90 => "Me" | |
1935 | }, | |
1936 | 2 => { | |
1937 | 0 => "2000", | |
1938 | 1 => "XP/.Net", | |
1939 | 51 => "NT3.51" | |
1940 | } | |
1941 | }->{$id}->{$minor}; | |
1942 | ||
1943 | # This _really_ shouldnt happen. At least not for quite a while | |
1944 | unless ( defined $os ) { | |
1945 | $$rpending_complaint .= <<EOS; | |
1946 | Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record! | |
1947 | We won't be able to look for a system-wide config file. | |
1948 | EOS | |
1949 | } | |
1950 | ||
1951 | # Unfortunately the logic used for the various versions isnt so clever.. | |
1952 | # so we have to handle an outside case. | |
1953 | return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os; | |
1954 | } | |
1955 | ||
1956 | sub is_unix { | |
1957 | return ( $^O !~ /win32|dos/i ) | |
1958 | && ( $^O ne 'VMS' ) | |
1959 | && ( $^O ne 'OS2' ) | |
1960 | && ( $^O ne 'MacOS' ); | |
1961 | } | |
1962 | ||
1963 | sub look_for_Windows { | |
1964 | ||
1965 | # determine Windows sub-type and location of | |
1966 | # system-wide configuration files | |
1967 | my $rpending_complaint = shift; | |
1968 | my $is_Windows = ( $^O =~ /win32|dos/i ); | |
1969 | my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows; | |
1970 | return ( $is_Windows, $Windows_type ); | |
1971 | } | |
1972 | ||
1973 | sub find_config_file { | |
1974 | ||
1975 | # look for a .perltidyrc configuration file | |
1976 | my ( $is_Windows, $Windows_type, $rconfig_file_chatter, | |
1977 | $rpending_complaint ) = @_; | |
1978 | ||
1979 | $$rconfig_file_chatter .= "# Config file search...system reported as:"; | |
1980 | if ($is_Windows) { | |
1981 | $$rconfig_file_chatter .= "Windows $Windows_type\n"; | |
1982 | } | |
1983 | else { | |
1984 | $$rconfig_file_chatter .= " $^O\n"; | |
1985 | } | |
1986 | ||
1987 | # sub to check file existance and record all tests | |
1988 | my $exists_config_file = sub { | |
1989 | my $config_file = shift; | |
1990 | return 0 unless $config_file; | |
1991 | $$rconfig_file_chatter .= "# Testing: $config_file\n"; | |
1992 | return -f $config_file; | |
1993 | }; | |
1994 | ||
1995 | my $config_file; | |
1996 | ||
1997 | # look in current directory first | |
1998 | $config_file = ".perltidyrc"; | |
1999 | return $config_file if $exists_config_file->($config_file); | |
2000 | ||
2001 | # Default environment vars. | |
2002 | my @envs = qw(PERLTIDY HOME); | |
2003 | ||
2004 | # Check the NT/2k/XP locations, first a local machine def, then a | |
2005 | # network def | |
2006 | push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i; | |
2007 | ||
2008 | # Now go through the enviornment ... | |
2009 | foreach my $var (@envs) { | |
2010 | $$rconfig_file_chatter .= "# Examining: \$ENV{$var}"; | |
2011 | if ( defined( $ENV{$var} ) ) { | |
2012 | $$rconfig_file_chatter .= " = $ENV{$var}\n"; | |
2013 | ||
2014 | # test ENV{ PERLTIDY } as file: | |
2015 | if ( $var eq 'PERLTIDY' ) { | |
2016 | $config_file = "$ENV{$var}"; | |
2017 | return $config_file if $exists_config_file->($config_file); | |
2018 | } | |
2019 | ||
2020 | # test ENV as directory: | |
2021 | $config_file = catfile( $ENV{$var}, ".perltidyrc" ); | |
2022 | return $config_file if $exists_config_file->($config_file); | |
2023 | } | |
2024 | else { | |
2025 | $$rconfig_file_chatter .= "\n"; | |
2026 | } | |
2027 | } | |
2028 | ||
2029 | # then look for a system-wide definition | |
2030 | # where to look varies with OS | |
2031 | if ($is_Windows) { | |
2032 | ||
2033 | if ($Windows_type) { | |
2034 | my ( $os, $system, $allusers ) = | |
2035 | Win_Config_Locs( $rpending_complaint, $Windows_type ); | |
2036 | ||
2037 | # Check All Users directory, if there is one. | |
2038 | if ($allusers) { | |
2039 | $config_file = catfile( $allusers, ".perltidyrc" ); | |
2040 | return $config_file if $exists_config_file->($config_file); | |
2041 | } | |
2042 | ||
2043 | # Check system directory. | |
2044 | $config_file = catfile( $system, ".perltidyrc" ); | |
2045 | return $config_file if $exists_config_file->($config_file); | |
2046 | } | |
2047 | } | |
2048 | ||
2049 | # Place to add customization code for other systems | |
2050 | elsif ( $^O eq 'OS2' ) { | |
2051 | } | |
2052 | elsif ( $^O eq 'MacOS' ) { | |
2053 | } | |
2054 | elsif ( $^O eq 'VMS' ) { | |
2055 | } | |
2056 | ||
2057 | # Assume some kind of Unix | |
2058 | else { | |
2059 | ||
2060 | $config_file = "/usr/local/etc/perltidyrc"; | |
2061 | return $config_file if $exists_config_file->($config_file); | |
2062 | ||
2063 | $config_file = "/etc/perltidyrc"; | |
2064 | return $config_file if $exists_config_file->($config_file); | |
2065 | } | |
2066 | ||
2067 | # Couldn't find a config file | |
2068 | return; | |
2069 | } | |
2070 | ||
2071 | sub Win_Config_Locs { | |
2072 | ||
2073 | # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP), | |
2074 | # or undef if its not a win32 OS. In list context returns OS, System | |
2075 | # Directory, and All Users Directory. All Users will be empty on a | |
2076 | # 9x/Me box. Contributed by: Yves Orton. | |
2077 | ||
2078 | my $rpending_complaint = shift; | |
2079 | my $os = (@_) ? shift: Win_OS_Type(); | |
2080 | return unless $os; | |
2081 | ||
2082 | my $system = ""; | |
2083 | my $allusers = ""; | |
2084 | ||
2085 | if ( $os =~ /9[58]|Me/ ) { | |
2086 | $system = "C:/Windows"; | |
2087 | } | |
2088 | elsif ( $os =~ /NT|XP|2000/ ) { | |
2089 | $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/"; | |
2090 | $allusers = | |
2091 | ( $os =~ /NT/ ) | |
2092 | ? "C:/WinNT/profiles/All Users/" | |
2093 | : "C:/Documents and Settings/All Users/"; | |
2094 | } | |
2095 | else { | |
2096 | ||
2097 | # This currently would only happen on a win32s computer. | |
2098 | # I dont have one to test So I am unsure how to proceed. | |
2099 | # Sorry. :-) | |
2100 | $$rpending_complaint .= | |
2101 | "I dont know a sensible place to look for config files on an $os system.\n"; | |
2102 | return; | |
2103 | } | |
2104 | return wantarray ? ( $os, $system, $allusers ) : $os; | |
2105 | } | |
2106 | ||
2107 | sub dump_config_file { | |
2108 | my $fh = shift; | |
2109 | my $config_file = shift; | |
2110 | my $rconfig_file_chatter = shift; | |
2111 | print STDOUT "$$rconfig_file_chatter"; | |
2112 | if ($fh) { | |
2113 | print STDOUT "# Dump of file: '$config_file'\n"; | |
2114 | while ( $_ = $fh->getline() ) { print STDOUT } | |
2115 | eval { $fh->close() }; | |
2116 | } | |
2117 | else { | |
2118 | print STDOUT "# ...no config file found\n"; | |
2119 | } | |
2120 | } | |
2121 | ||
2122 | sub read_config_file { | |
2123 | ||
2124 | my ( $fh, $config_file, $rexpansion ) = @_; | |
2125 | my @config_list = (); | |
2126 | ||
2127 | my $name = undef; | |
2128 | my $line_no; | |
2129 | while ( $_ = $fh->getline() ) { | |
2130 | $line_no++; | |
2131 | chomp; | |
2132 | next if /^\s*#/; # skip full-line comment | |
2133 | $_ = strip_comment( $_, $config_file, $line_no ); | |
2134 | s/^\s*(.*?)\s*$/$1/; # trim both ends | |
2135 | next unless $_; | |
2136 | ||
2137 | # look for something of the general form | |
2138 | # newname { body } | |
2139 | # or just | |
2140 | # body | |
2141 | ||
2142 | if ( $_ =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) { | |
2143 | my ( $newname, $body, $curly ) = ( $2, $3, $4 ); | |
2144 | ||
2145 | # handle a new alias definition | |
2146 | if ($newname) { | |
2147 | if ($name) { | |
2148 | die | |
2149 | "No '}' seen after $name and before $newname in config file $config_file line $.\n"; | |
2150 | } | |
2151 | $name = $newname; | |
2152 | ||
2153 | if ( ${$rexpansion}{$name} ) { | |
2154 | local $" = ')('; | |
2155 | my @names = sort keys %$rexpansion; | |
2156 | print "Here is a list of all installed aliases\n(@names)\n"; | |
2157 | die | |
2158 | "Attempting to redefine alias ($name) in config file $config_file line $.\n"; | |
2159 | } | |
2160 | ${$rexpansion}{$name} = []; | |
2161 | } | |
2162 | ||
2163 | # now do the body | |
2164 | if ($body) { | |
2165 | ||
2166 | my ( $rbody_parts, $msg ) = parse_args($body); | |
2167 | if ($msg) { | |
2168 | die <<EOM; | |
2169 | Error reading file $config_file at line number $line_no. | |
2170 | $msg | |
2171 | Please fix this line or use -npro to avoid reading this file | |
2172 | EOM | |
2173 | } | |
2174 | ||
2175 | if ($name) { | |
2176 | ||
2177 | # remove leading dashes if this is an alias | |
2178 | foreach (@$rbody_parts) { s/^\-+//; } | |
2179 | push @{ ${$rexpansion}{$name} }, @$rbody_parts; | |
2180 | } | |
2181 | ||
2182 | else { | |
2183 | push( @config_list, @$rbody_parts ); | |
2184 | } | |
2185 | } | |
2186 | ||
2187 | if ($curly) { | |
2188 | unless ($name) { | |
2189 | die | |
2190 | "Unexpected '}' seen in config file $config_file line $.\n"; | |
2191 | } | |
2192 | $name = undef; | |
2193 | } | |
2194 | } | |
2195 | } | |
2196 | eval { $fh->close() }; | |
2197 | return ( \@config_list ); | |
2198 | } | |
2199 | ||
2200 | sub strip_comment { | |
2201 | ||
2202 | my ( $instr, $config_file, $line_no ) = @_; | |
2203 | ||
2204 | # nothing to do if no comments | |
2205 | if ( $instr !~ /#/ ) { | |
2206 | return $instr; | |
2207 | } | |
2208 | ||
2209 | # use simple method of no quotes | |
2210 | elsif ( $instr !~ /['"]/ ) { | |
2211 | $instr =~ s/\s*\#.*$//; # simple trim | |
2212 | return $instr; | |
2213 | } | |
2214 | ||
2215 | # handle comments and quotes | |
2216 | my $outstr = ""; | |
2217 | my $quote_char = ""; | |
2218 | while (1) { | |
2219 | ||
2220 | # looking for ending quote character | |
2221 | if ($quote_char) { | |
2222 | if ( $instr =~ /\G($quote_char)/gc ) { | |
2223 | $quote_char = ""; | |
2224 | $outstr .= $1; | |
2225 | } | |
2226 | elsif ( $instr =~ /\G(.)/gc ) { | |
2227 | $outstr .= $1; | |
2228 | } | |
2229 | ||
2230 | # error..we reached the end without seeing the ending quote char | |
2231 | else { | |
2232 | die <<EOM; | |
2233 | Error reading file $config_file at line number $line_no. | |
2234 | Did not see ending quote character <$quote_char> in this text: | |
2235 | $instr | |
2236 | Please fix this line or use -npro to avoid reading this file | |
2237 | EOM | |
2238 | last; | |
2239 | } | |
2240 | } | |
2241 | ||
2242 | # accumulating characters and looking for start of a quoted string | |
2243 | else { | |
2244 | if ( $instr =~ /\G([\"\'])/gc ) { | |
2245 | $outstr .= $1; | |
2246 | $quote_char = $1; | |
2247 | } | |
2248 | elsif ( $instr =~ /\G#/gc ) { | |
2249 | last; | |
2250 | } | |
2251 | elsif ( $instr =~ /\G(.)/gc ) { | |
2252 | $outstr .= $1; | |
2253 | } | |
2254 | else { | |
2255 | last; | |
2256 | } | |
2257 | } | |
2258 | } | |
2259 | return $outstr; | |
2260 | } | |
2261 | ||
2262 | sub parse_args { | |
2263 | ||
2264 | # Parse a command string containing multiple string with possible | |
2265 | # quotes, into individual commands. It might look like this, for example: | |
2266 | # | |
2267 | # -wba=" + - " -some-thing -wbb='. && ||' | |
2268 | # | |
2269 | # There is no need, at present, to handle escaped quote characters. | |
2270 | # (They are not perltidy tokens, so needn't be in strings). | |
2271 | ||
2272 | my ($body) = @_; | |
2273 | my @body_parts = (); | |
2274 | my $quote_char = ""; | |
2275 | my $part = ""; | |
2276 | my $msg = ""; | |
2277 | while (1) { | |
2278 | ||
2279 | # looking for ending quote character | |
2280 | if ($quote_char) { | |
2281 | if ( $body =~ /\G($quote_char)/gc ) { | |
2282 | $quote_char = ""; | |
2283 | } | |
2284 | elsif ( $body =~ /\G(.)/gc ) { | |
2285 | $part .= $1; | |
2286 | } | |
2287 | ||
2288 | # error..we reached the end without seeing the ending quote char | |
2289 | else { | |
2290 | if ($part) { push @body_parts, $part; } | |
2291 | $msg = <<EOM; | |
2292 | Did not see ending quote character <$quote_char> in this text: | |
2293 | $body | |
2294 | EOM | |
2295 | last; | |
2296 | } | |
2297 | } | |
2298 | ||
2299 | # accumulating characters and looking for start of a quoted string | |
2300 | else { | |
2301 | if ( $body =~ /\G([\"\'])/gc ) { | |
2302 | $quote_char = $1; | |
2303 | } | |
2304 | elsif ( $body =~ /\G(\s+)/gc ) { | |
2305 | if ($part) { push @body_parts, $part; } | |
2306 | $part = ""; | |
2307 | } | |
2308 | elsif ( $body =~ /\G(.)/gc ) { | |
2309 | $part .= $1; | |
2310 | } | |
2311 | else { | |
2312 | if ($part) { push @body_parts, $part; } | |
2313 | last; | |
2314 | } | |
2315 | } | |
2316 | } | |
2317 | return ( \@body_parts, $msg ); | |
2318 | } | |
2319 | ||
2320 | sub dump_long_names { | |
2321 | ||
2322 | my @names = sort @_; | |
2323 | print STDOUT <<EOM; | |
2324 | # Command line long names (passed to GetOptions) | |
2325 | #--------------------------------------------------------------- | |
2326 | # here is a summary of the Getopt codes: | |
2327 | # <none> does not take an argument | |
2328 | # =s takes a mandatory string | |
2329 | # :s takes an optional string | |
2330 | # =i takes a mandatory integer | |
2331 | # :i takes an optional integer | |
2332 | # ! does not take an argument and may be negated | |
2333 | # i.e., -foo and -nofoo are allowed | |
2334 | # a double dash signals the end of the options list | |
2335 | # | |
2336 | #--------------------------------------------------------------- | |
2337 | EOM | |
2338 | ||
2339 | foreach (@names) { print STDOUT "$_\n" } | |
2340 | } | |
2341 | ||
2342 | sub dump_defaults { | |
2343 | my @defaults = sort @_; | |
2344 | print STDOUT "Default command line options:\n"; | |
2345 | foreach (@_) { print STDOUT "$_\n" } | |
2346 | } | |
2347 | ||
2348 | sub dump_options { | |
2349 | my ($rOpts) = @_; | |
2350 | local $" = "\n"; | |
2351 | print STDOUT "Final parameter set for this run\n"; | |
2352 | foreach ( sort keys %{$rOpts} ) { | |
2353 | print STDOUT "$_=$rOpts->{$_}\n"; | |
2354 | } | |
2355 | } | |
2356 | ||
2357 | sub show_version { | |
2358 | print <<"EOM"; | |
2359 | This is perltidy, v$VERSION | |
2360 | ||
2361 | Copyright 2000-2003, Steve Hancock | |
2362 | ||
2363 | Perltidy is free software and may be copied under the terms of the GNU | |
2364 | General Public License, which is included in the distribution files. | |
2365 | ||
2366 | Complete documentation for perltidy can be found using 'man perltidy' | |
2367 | or on the internet at http://perltidy.sourceforge.net. | |
2368 | EOM | |
2369 | } | |
2370 | ||
2371 | sub usage { | |
2372 | ||
2373 | print STDOUT <<EOF; | |
2374 | This is perltidy version $VERSION, a perl script indenter. Usage: | |
2375 | ||
2376 | perltidy [ options ] file1 file2 file3 ... | |
2377 | (output goes to file1.tdy, file2.tdy, file3.tdy, ...) | |
2378 | perltidy [ options ] file1 -o outfile | |
2379 | perltidy [ options ] file1 -st >outfile | |
2380 | perltidy [ options ] <infile >outfile | |
2381 | ||
2382 | Options have short and long forms. Short forms are shown; see | |
2383 | man pages for long forms. Note: '=s' indicates a required string, | |
2384 | and '=n' indicates a required integer. | |
2385 | ||
2386 | I/O control | |
2387 | -h show this help | |
2388 | -o=file name of the output file (only if single input file) | |
2389 | -oext=s change output extension from 'tdy' to s | |
2390 | -opath=path change path to be 'path' for output files | |
2391 | -b backup original to .bak and modify file in-place | |
2392 | -bext=s change default backup extension from 'bak' to s | |
2393 | -q deactivate error messages (for running under editor) | |
2394 | -w include non-critical warning messages in the .ERR error output | |
2395 | -syn run perl -c to check syntax (default under unix systems) | |
2396 | -log save .LOG file, which has useful diagnostics | |
2397 | -f force perltidy to read a binary file | |
2398 | -g like -log but writes more detailed .LOG file, for debugging scripts | |
2399 | -opt write the set of options actually used to a .LOG file | |
2400 | -npro ignore .perltidyrc configuration command file | |
2401 | -pro=file read configuration commands from file instead of .perltidyrc | |
2402 | -st send output to standard output, STDOUT | |
2403 | -se send error output to standard error output, STDERR | |
2404 | -v display version number to standard output and quit | |
2405 | ||
2406 | Basic Options: | |
2407 | -i=n use n columns per indentation level (default n=4) | |
2408 | -t tabs: use one tab character per indentation level, not recommeded | |
2409 | -nt no tabs: use n spaces per indentation level (default) | |
2410 | -et=n entab leading whitespace n spaces per tab; not recommended | |
2411 | -io "indent only": just do indentation, no other formatting. | |
2412 | -sil=n set starting indentation level to n; use if auto detection fails | |
2413 | -ole=s specify output line ending (s=dos or win, mac, unix) | |
2414 | -ple keep output line endings same as input (input must be filename) | |
2415 | ||
2416 | Whitespace Control | |
2417 | -fws freeze whitespace; this disables all whitespace changes | |
2418 | and disables the following switches: | |
2419 | -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight) | |
2420 | -bbt same as -bt but for code block braces; same as -bt if not given | |
2421 | -bbvt block braces vertically tight; use with -bl or -bli | |
2422 | -bbvtl=s make -bbvt to apply to selected list of block types | |
2423 | -pt=n paren tightness (n=0, 1 or 2) | |
2424 | -sbt=n square bracket tightness (n=0, 1, or 2) | |
2425 | -bvt=n brace vertical tightness, | |
2426 | n=(0=open, 1=close unless multiple steps on a line, 2=always close) | |
2427 | -pvt=n paren vertical tightness (see -bvt for n) | |
2428 | -sbvt=n square bracket vertical tightness (see -bvt for n) | |
2429 | -bvtc=n closing brace vertical tightness: | |
2430 | n=(0=open, 1=sometimes close, 2=always close) | |
2431 | -pvtc=n closing paren vertical tightness, see -bvtc for n. | |
2432 | -sbvtc=n closing square bracket vertical tightness, see -bvtc for n. | |
2433 | -ci=n sets continuation indentation=n, default is n=2 spaces | |
2434 | -lp line up parentheses, brackets, and non-BLOCK braces | |
2435 | -sfs add space before semicolon in for( ; ; ) | |
2436 | -aws allow perltidy to add whitespace (default) | |
2437 | -dws delete all old non-essential whitespace | |
2438 | -icb indent closing brace of a code block | |
2439 | -cti=n closing indentation of paren, square bracket, or non-block brace: | |
2440 | n=0 none, =1 align with opening, =2 one full indentation level | |
2441 | -icp equivalent to -cti=2 | |
2442 | -wls=s want space left of tokens in string; i.e. -nwls='+ - * /' | |
2443 | -wrs=s want space right of tokens in string; | |
2444 | -sts put space before terminal semicolon of a statement | |
2445 | -sak=s put space between keywords given in s and '('; | |
2446 | -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local' | |
2447 | ||
2448 | Line Break Control | |
2449 | -fnl freeze newlines; this disables all line break changes | |
2450 | and disables the following switches: | |
2451 | -anl add newlines; ok to introduce new line breaks | |
2452 | -bbs add blank line before subs and packages | |
2453 | -bbc add blank line before block comments | |
2454 | -bbb add blank line between major blocks | |
2455 | -sob swallow optional blank lines | |
2456 | -ce cuddled else; use this style: '} else {' | |
2457 | -dnl delete old newlines (default) | |
2458 | -mbl=n maximum consecutive blank lines (default=1) | |
2459 | -l=n maximum line length; default n=80 | |
2460 | -bl opening brace on new line | |
2461 | -sbl opening sub brace on new line. value of -bl is used if not given. | |
2462 | -bli opening brace on new line and indented | |
2463 | -bar opening brace always on right, even for long clauses | |
2464 | -vt=n vertical tightness (requires -lp); n controls break after opening | |
2465 | token: 0=never 1=no break if next line balanced 2=no break | |
2466 | -vtc=n vertical tightness of closing container; n controls if closing | |
2467 | token starts new line: 0=always 1=not unless list 1=never | |
2468 | -wba=s want break after tokens in string; i.e. wba=': .' | |
2469 | -wbb=s want break before tokens in string | |
2470 | ||
2471 | Following Old Breakpoints | |
2472 | -boc break at old comma breaks: turns off all automatic list formatting | |
2473 | -bol break at old logical breakpoints: or, and, ||, && (default) | |
2474 | -bok break at old list keyword breakpoints such as map, sort (default) | |
2475 | -bot break at old conditional (trinary ?:) operator breakpoints (default) | |
2476 | -cab=n break at commas after a comma-arrow (=>): | |
2477 | n=0 break at all commas after => | |
2478 | n=1 stable: break unless this breaks an existing one-line container | |
2479 | n=2 break only if a one-line container cannot be formed | |
2480 | n=3 do not treat commas after => specially at all | |
2481 | ||
2482 | Comment controls | |
2483 | -ibc indent block comments (default) | |
2484 | -isbc indent spaced block comments; may indent unless no leading space | |
2485 | -msc=n minimum desired spaces to side comment, default 4 | |
2486 | -csc add or update closing side comments after closing BLOCK brace | |
2487 | -dcsc delete closing side comments created by a -csc command | |
2488 | -cscp=s change closing side comment prefix to be other than '## end' | |
2489 | -cscl=s change closing side comment to apply to selected list of blocks | |
2490 | -csci=n minimum number of lines needed to apply a -csc tag, default n=6 | |
2491 | -csct=n maximum number of columns of appended text, default n=20 | |
2492 | -cscw causes warning if old side comment is overwritten with -csc | |
2493 | ||
2494 | -sbc use 'static block comments' identified by leading '##' (default) | |
2495 | -sbcp=s change static block comment identifier to be other than '##' | |
2496 | -osbc outdent static block comments | |
2497 | ||
2498 | -ssc use 'static side comments' identified by leading '##' (default) | |
2499 | -sscp=s change static side comment identifier to be other than '##' | |
2500 | ||
2501 | Delete selected text | |
2502 | -dac delete all comments AND pod | |
2503 | -dbc delete block comments | |
2504 | -dsc delete side comments | |
2505 | -dp delete pod | |
2506 | ||
2507 | Send selected text to a '.TEE' file | |
2508 | -tac tee all comments AND pod | |
2509 | -tbc tee block comments | |
2510 | -tsc tee side comments | |
2511 | -tp tee pod | |
2512 | ||
2513 | Outdenting | |
2514 | -olq outdent long quoted strings (default) | |
2515 | -olc outdent a long block comment line | |
2516 | -ola outdent statement labels | |
2517 | -okw outdent control keywords (redo, next, last, goto, return) | |
2518 | -okwl=s specify alternative keywords for -okw command | |
2519 | ||
2520 | Other controls | |
2521 | -mft=n maximum fields per table; default n=40 | |
2522 | -x do not format lines before hash-bang line (i.e., for VMS) | |
2523 | -asc allows perltidy to add a ';' when missing (default) | |
2524 | -dsm allows perltidy to delete an unnecessary ';' (default) | |
2525 | ||
2526 | Combinations of other parameters | |
2527 | -gnu attempt to follow GNU Coding Standards as applied to perl | |
2528 | -mangle remove as many newlines as possible (but keep comments and pods) | |
2529 | -extrude insert as many newlines as possible | |
2530 | ||
2531 | Dump and die, debugging | |
2532 | -dop dump options used in this run to standard output and quit | |
2533 | -ddf dump default options to standard output and quit | |
2534 | -dsn dump all option short names to standard output and quit | |
2535 | -dln dump option long names to standard output and quit | |
2536 | -dpro dump whatever configuration file is in effect to standard output | |
2537 | -dtt dump all token types to standard output and quit | |
2538 | ||
2539 | HTML | |
2540 | -html write an html file (see 'man perl2web' for many options) | |
2541 | Note: when -html is used, no indentation or formatting are done. | |
2542 | Hint: try perltidy -html -css=mystyle.css filename.pl | |
2543 | and edit mystyle.css to change the appearance of filename.html. | |
2544 | -nnn gives line numbers | |
2545 | -pre only writes out <pre>..</pre> code section | |
2546 | -toc places a table of contents to subs at the top (default) | |
2547 | -pod passes pod text through pod2html (default) | |
2548 | -frm write html as a frame (3 files) | |
2549 | -text=s extra extension for table of contents if -frm, default='toc' | |
2550 | -sext=s extra extension for file content if -frm, default='src' | |
2551 | ||
2552 | A prefix of "n" negates short form toggle switches, and a prefix of "no" | |
2553 | negates the long forms. For example, -nasc means don't add missing | |
2554 | semicolons. | |
2555 | ||
2556 | If you are unable to see this entire text, try "perltidy -h | more" | |
2557 | For more detailed information, and additional options, try "man perltidy", | |
2558 | or go to the perltidy home page at http://perltidy.sourceforge.net | |
2559 | EOF | |
2560 | ||
2561 | } | |
2562 | ||
2563 | sub process_this_file { | |
2564 | ||
2565 | my ( $truth, $beauty ) = @_; | |
2566 | ||
2567 | # loop to process each line of this file | |
2568 | while ( my $line_of_tokens = $truth->get_line() ) { | |
2569 | $beauty->write_line($line_of_tokens); | |
2570 | } | |
2571 | ||
2572 | # finish up | |
2573 | eval { $beauty->finish_formatting() }; | |
2574 | $truth->report_tokenization_errors(); | |
2575 | } | |
2576 | ||
2577 | sub check_syntax { | |
2578 | ||
2579 | # Use 'perl -c' to make sure that we did not create bad syntax | |
2580 | # This is a very good independent check for programming errors | |
2581 | # | |
2582 | # Given names of the input and output files, ($ifname, $ofname), | |
2583 | # we do the following: | |
2584 | # - check syntax of the input file | |
2585 | # - if bad, all done (could be an incomplete code snippet) | |
2586 | # - if infile syntax ok, then check syntax of the output file; | |
2587 | # - if outfile syntax bad, issue warning; this implies a code bug! | |
2588 | # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good | |
2589 | ||
2590 | my ( $ifname, $ofname, $logger_object, $rOpts ) = @_; | |
2591 | my $infile_syntax_ok = 0; | |
2592 | my $line_of_dashes = '-' x 42 . "\n"; | |
2593 | ||
2594 | my $flags = $rOpts->{'perl-syntax-check-flags'}; | |
2595 | ||
2596 | # be sure we invoke perl with -c | |
2597 | # note: perl will accept repeated flags like '-c -c'. It is safest | |
2598 | # to append another -c than try to find an interior bundled c, as | |
2599 | # in -Tc, because such a 'c' might be in a quoted string, for example. | |
2600 | if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" } | |
2601 | ||
2602 | # be sure we invoke perl with -x if requested | |
2603 | # same comments about repeated parameters applies | |
2604 | if ( $rOpts->{'look-for-hash-bang'} ) { | |
2605 | if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" } | |
2606 | } | |
2607 | ||
2608 | # this shouldn't happen unless a termporary file couldn't be made | |
2609 | if ( $ifname eq '-' ) { | |
2610 | $logger_object->write_logfile_entry( | |
2611 | "Cannot run perl -c on STDIN and STDOUT\n"); | |
2612 | return $infile_syntax_ok; | |
2613 | } | |
2614 | ||
2615 | $logger_object->write_logfile_entry( | |
2616 | "checking input file syntax with perl $flags\n"); | |
2617 | $logger_object->write_logfile_entry($line_of_dashes); | |
2618 | ||
2619 | # Not all operating systems/shells support redirection of the standard | |
2620 | # error output. | |
2621 | my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1'; | |
2622 | ||
2623 | my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection ); | |
2624 | $logger_object->write_logfile_entry("$perl_output\n"); | |
2625 | ||
2626 | if ( $perl_output =~ /syntax\s*OK/ ) { | |
2627 | $infile_syntax_ok = 1; | |
2628 | $logger_object->write_logfile_entry($line_of_dashes); | |
2629 | $logger_object->write_logfile_entry( | |
2630 | "checking output file syntax with perl $flags ...\n"); | |
2631 | $logger_object->write_logfile_entry($line_of_dashes); | |
2632 | ||
2633 | my $perl_output = | |
2634 | do_syntax_check( $ofname, $flags, $error_redirection ); | |
2635 | $logger_object->write_logfile_entry("$perl_output\n"); | |
2636 | ||
2637 | unless ( $perl_output =~ /syntax\s*OK/ ) { | |
2638 | $logger_object->write_logfile_entry($line_of_dashes); | |
2639 | $logger_object->warning( | |
2640 | "The output file has a syntax error when tested with perl $flags $ofname !\n" | |
2641 | ); | |
2642 | $logger_object->warning( | |
2643 | "This implies an error in perltidy; the file $ofname is bad\n"); | |
2644 | $logger_object->report_definite_bug(); | |
2645 | ||
2646 | # the perl version number will be helpful for diagnosing the problem | |
2647 | $logger_object->write_logfile_entry( | |
2648 | qx/perl -v $error_redirection/ . "\n" ); | |
2649 | } | |
2650 | } | |
2651 | else { | |
2652 | ||
2653 | # Only warn of perl -c syntax errors. Other messages, | |
2654 | # such as missing modules, are too common. They can be | |
2655 | # seen by running with perltidy -w | |
2656 | $logger_object->complain("A syntax check using perl $flags gives: \n"); | |
2657 | $logger_object->complain($line_of_dashes); | |
2658 | $logger_object->complain("$perl_output\n"); | |
2659 | $logger_object->complain($line_of_dashes); | |
2660 | $infile_syntax_ok = -1; | |
2661 | $logger_object->write_logfile_entry($line_of_dashes); | |
2662 | $logger_object->write_logfile_entry( | |
2663 | "The output file will not be checked because of input file problems\n" | |
2664 | ); | |
2665 | } | |
2666 | return $infile_syntax_ok; | |
2667 | } | |
2668 | ||
2669 | sub do_syntax_check { | |
2670 | my ( $fname, $flags, $error_redirection ) = @_; | |
2671 | ||
2672 | # We have to quote the filename in case it has unusual characters | |
2673 | # or spaces. Example: this filename #CM11.pm# gives trouble. | |
2674 | $fname = '"' . $fname . '"'; | |
2675 | ||
2676 | # Under VMS something like -T will become -t (and an error) so we | |
2677 | # will put quotes around the flags. Double quotes seem to work on | |
2678 | # Unix/Windows/VMS, but this may not work on all systems. (Single | |
2679 | # quotes do not work under Windows). It could become necessary to | |
2680 | # put double quotes around each flag, such as: -"c" -"T" | |
2681 | # We may eventually need some system-dependent coding here. | |
2682 | $flags = '"' . $flags . '"'; | |
2683 | ||
2684 | # now wish for luck... | |
2685 | return qx/perl $flags $fname $error_redirection/; | |
2686 | } | |
2687 | ||
2688 | ##################################################################### | |
2689 | # | |
2690 | # This is a stripped down version of IO::Scalar | |
2691 | # Given a reference to a scalar, it supplies either: | |
2692 | # a getline method which reads lines (mode='r'), or | |
2693 | # a print method which reads lines (mode='w') | |
2694 | # | |
2695 | ##################################################################### | |
2696 | package Perl::Tidy::IOScalar; | |
2697 | use Carp; | |
2698 | ||
2699 | sub new { | |
2700 | my ( $package, $rscalar, $mode ) = @_; | |
2701 | my $ref = ref $rscalar; | |
2702 | if ( $ref ne 'SCALAR' ) { | |
2703 | confess <<EOM; | |
2704 | ------------------------------------------------------------------------ | |
2705 | expecting ref to SCALAR but got ref to ($ref); trace follows: | |
2706 | ------------------------------------------------------------------------ | |
2707 | EOM | |
2708 | ||
2709 | } | |
2710 | if ( $mode eq 'w' ) { | |
2711 | $$rscalar = ""; | |
2712 | return bless [ $rscalar, $mode ], $package; | |
2713 | } | |
2714 | elsif ( $mode eq 'r' ) { | |
2715 | ||
2716 | # Convert a scalar to an array. | |
2717 | # This avoids looking for "\n" on each call to getline | |
2718 | my @array = map { $_ .= "\n" } split /\n/, ${$rscalar}; | |
2719 | my $i_next = 0; | |
2720 | return bless [ \@array, $mode, $i_next ], $package; | |
2721 | } | |
2722 | else { | |
2723 | confess <<EOM; | |
2724 | ------------------------------------------------------------------------ | |
2725 | expecting mode = 'r' or 'w' but got mode ($mode); trace follows: | |
2726 | ------------------------------------------------------------------------ | |
2727 | EOM | |
2728 | } | |
2729 | } | |
2730 | ||
2731 | sub getline { | |
2732 | my $self = shift; | |
2733 | my $mode = $self->[1]; | |
2734 | if ( $mode ne 'r' ) { | |
2735 | confess <<EOM; | |
2736 | ------------------------------------------------------------------------ | |
2737 | getline call requires mode = 'r' but mode = ($mode); trace follows: | |
2738 | ------------------------------------------------------------------------ | |
2739 | EOM | |
2740 | } | |
2741 | my $i = $self->[2]++; | |
2742 | ##my $line = $self->[0]->[$i]; | |
2743 | return $self->[0]->[$i]; | |
2744 | } | |
2745 | ||
2746 | sub print { | |
2747 | my $self = shift; | |
2748 | my $mode = $self->[1]; | |
2749 | if ( $mode ne 'w' ) { | |
2750 | confess <<EOM; | |
2751 | ------------------------------------------------------------------------ | |
2752 | print call requires mode = 'w' but mode = ($mode); trace follows: | |
2753 | ------------------------------------------------------------------------ | |
2754 | EOM | |
2755 | } | |
2756 | ${ $self->[0] } .= $_[0]; | |
2757 | } | |
2758 | sub close { return } | |
2759 | ||
2760 | ##################################################################### | |
2761 | # | |
2762 | # This is a stripped down version of IO::ScalarArray | |
2763 | # Given a reference to an array, it supplies either: | |
2764 | # a getline method which reads lines (mode='r'), or | |
2765 | # a print method which reads lines (mode='w') | |
2766 | # | |
2767 | # NOTE: this routine assumes that that there aren't any embedded | |
2768 | # newlines within any of the array elements. There are no checks | |
2769 | # for that. | |
2770 | # | |
2771 | ##################################################################### | |
2772 | package Perl::Tidy::IOScalarArray; | |
2773 | use Carp; | |
2774 | ||
2775 | sub new { | |
2776 | my ( $package, $rarray, $mode ) = @_; | |
2777 | my $ref = ref $rarray; | |
2778 | if ( $ref ne 'ARRAY' ) { | |
2779 | confess <<EOM; | |
2780 | ------------------------------------------------------------------------ | |
2781 | expecting ref to ARRAY but got ref to ($ref); trace follows: | |
2782 | ------------------------------------------------------------------------ | |
2783 | EOM | |
2784 | ||
2785 | } | |
2786 | if ( $mode eq 'w' ) { | |
2787 | @$rarray = (); | |
2788 | return bless [ $rarray, $mode ], $package; | |
2789 | } | |
2790 | elsif ( $mode eq 'r' ) { | |
2791 | my $i_next = 0; | |
2792 | return bless [ $rarray, $mode, $i_next ], $package; | |
2793 | } | |
2794 | else { | |
2795 | confess <<EOM; | |
2796 | ------------------------------------------------------------------------ | |
2797 | expecting mode = 'r' or 'w' but got mode ($mode); trace follows: | |
2798 | ------------------------------------------------------------------------ | |
2799 | EOM | |
2800 | } | |
2801 | } | |
2802 | ||
2803 | sub getline { | |
2804 | my $self = shift; | |
2805 | my $mode = $self->[1]; | |
2806 | if ( $mode ne 'r' ) { | |
2807 | confess <<EOM; | |
2808 | ------------------------------------------------------------------------ | |
2809 | getline requires mode = 'r' but mode = ($mode); trace follows: | |
2810 | ------------------------------------------------------------------------ | |
2811 | EOM | |
2812 | } | |
2813 | my $i = $self->[2]++; | |
2814 | ##my $line = $self->[0]->[$i]; | |
2815 | return $self->[0]->[$i]; | |
2816 | } | |
2817 | ||
2818 | sub print { | |
2819 | my $self = shift; | |
2820 | my $mode = $self->[1]; | |
2821 | if ( $mode ne 'w' ) { | |
2822 | confess <<EOM; | |
2823 | ------------------------------------------------------------------------ | |
2824 | print requires mode = 'w' but mode = ($mode); trace follows: | |
2825 | ------------------------------------------------------------------------ | |
2826 | EOM | |
2827 | } | |
2828 | push @{ $self->[0] }, $_[0]; | |
2829 | } | |
2830 | sub close { return } | |
2831 | ||
2832 | ##################################################################### | |
2833 | # | |
2834 | # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method | |
2835 | # which returns the next line to be parsed | |
2836 | # | |
2837 | ##################################################################### | |
2838 | ||
2839 | package Perl::Tidy::LineSource; | |
2840 | ||
2841 | sub new { | |
2842 | ||
2843 | my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_; | |
2844 | my $input_file_copy = undef; | |
2845 | my $fh_copy; | |
2846 | ||
2847 | my $input_line_ending; | |
2848 | if ( $rOpts->{'preserve-line-endings'} ) { | |
2849 | $input_line_ending = Perl::Tidy::find_input_line_ending($input_file); | |
2850 | } | |
2851 | ||
2852 | ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' ); | |
2853 | return undef unless $fh; | |
2854 | ||
2855 | # in order to check output syntax when standard output is used, | |
2856 | # or when it is an object, we have to make a copy of the file | |
2857 | if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} ) | |
2858 | { | |
2859 | ||
2860 | # Turning off syntax check when input output is used. | |
2861 | # The reason is that temporary files cause problems on | |
2862 | # on many systems. | |
2863 | $rOpts->{'check-syntax'} = 0; | |
2864 | $input_file_copy = '-'; | |
2865 | ||
2866 | $$rpending_logfile_message .= <<EOM; | |
2867 | Note: --syntax check will be skipped because standard input is used | |
2868 | EOM | |
2869 | ||
2870 | } | |
2871 | ||
2872 | return bless { | |
2873 | _fh => $fh, | |
2874 | _fh_copy => $fh_copy, | |
2875 | _filename => $input_file, | |
2876 | _input_file_copy => $input_file_copy, | |
2877 | _input_line_ending => $input_line_ending, | |
2878 | _rinput_buffer => [], | |
2879 | _started => 0, | |
2880 | }, $class; | |
2881 | } | |
2882 | ||
2883 | sub get_input_file_copy_name { | |
2884 | my $self = shift; | |
2885 | my $ifname = $self->{_input_file_copy}; | |
2886 | unless ($ifname) { | |
2887 | $ifname = $self->{_filename}; | |
2888 | } | |
2889 | return $ifname; | |
2890 | } | |
2891 | ||
2892 | sub close_input_file { | |
2893 | my $self = shift; | |
2894 | eval { $self->{_fh}->close() }; | |
2895 | eval { $self->{_fh_copy}->close() } if $self->{_fh_copy}; | |
2896 | } | |
2897 | ||
2898 | sub get_line { | |
2899 | my $self = shift; | |
2900 | my $line = undef; | |
2901 | my $fh = $self->{_fh}; | |
2902 | my $fh_copy = $self->{_fh_copy}; | |
2903 | my $rinput_buffer = $self->{_rinput_buffer}; | |
2904 | ||
2905 | if ( scalar(@$rinput_buffer) ) { | |
2906 | $line = shift @$rinput_buffer; | |
2907 | } | |
2908 | else { | |
2909 | $line = $fh->getline(); | |
2910 | ||
2911 | # patch to read raw mac files under unix, dos | |
2912 | # see if the first line has embedded \r's | |
2913 | if ( $line && !$self->{_started} ) { | |
2914 | if ( $line =~ /[\015][^\015\012]/ ) { | |
2915 | ||
2916 | # found one -- break the line up and store in a buffer | |
2917 | @$rinput_buffer = map { $_ . "\n" } split /\015/, $line; | |
2918 | my $count = @$rinput_buffer; | |
2919 | $line = shift @$rinput_buffer; | |
2920 | } | |
2921 | $self->{_started}++; | |
2922 | } | |
2923 | } | |
2924 | if ( $line && $fh_copy ) { $fh_copy->print($line); } | |
2925 | return $line; | |
2926 | } | |
2927 | ||
2928 | sub old_get_line { | |
2929 | my $self = shift; | |
2930 | my $line = undef; | |
2931 | my $fh = $self->{_fh}; | |
2932 | my $fh_copy = $self->{_fh_copy}; | |
2933 | $line = $fh->getline(); | |
2934 | if ( $line && $fh_copy ) { $fh_copy->print($line); } | |
2935 | return $line; | |
2936 | } | |
2937 | ||
2938 | ##################################################################### | |
2939 | # | |
2940 | # the Perl::Tidy::LineSink class supplies a write_line method for | |
2941 | # actual file writing | |
2942 | # | |
2943 | ##################################################################### | |
2944 | ||
2945 | package Perl::Tidy::LineSink; | |
2946 | ||
2947 | sub new { | |
2948 | ||
2949 | my ( $class, $output_file, $tee_file, $line_separator, $rOpts, | |
2950 | $rpending_logfile_message ) | |
2951 | = @_; | |
2952 | my $fh = undef; | |
2953 | my $fh_copy = undef; | |
2954 | my $fh_tee = undef; | |
2955 | my $output_file_copy = ""; | |
2956 | my $output_file_open = 0; | |
2957 | ||
2958 | if ( $rOpts->{'format'} eq 'tidy' ) { | |
2959 | ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' ); | |
2960 | unless ($fh) { die "Cannot write to output stream\n"; } | |
2961 | $output_file_open = 1; | |
2962 | } | |
2963 | ||
2964 | # in order to check output syntax when standard output is used, | |
2965 | # or when it is an object, we have to make a copy of the file | |
2966 | if ( $output_file eq '-' || ref $output_file ) { | |
2967 | if ( $rOpts->{'check-syntax'} ) { | |
2968 | ||
2969 | # Turning off syntax check when standard output is used. | |
2970 | # The reason is that temporary files cause problems on | |
2971 | # on many systems. | |
2972 | $rOpts->{'check-syntax'} = 0; | |
2973 | $output_file_copy = '-'; | |
2974 | $$rpending_logfile_message .= <<EOM; | |
2975 | Note: --syntax check will be skipped because standard output is used | |
2976 | EOM | |
2977 | ||
2978 | } | |
2979 | } | |
2980 | ||
2981 | bless { | |
2982 | _fh => $fh, | |
2983 | _fh_copy => $fh_copy, | |
2984 | _fh_tee => $fh_tee, | |
2985 | _output_file => $output_file, | |
2986 | _output_file_open => $output_file_open, | |
2987 | _output_file_copy => $output_file_copy, | |
2988 | _tee_flag => 0, | |
2989 | _tee_file => $tee_file, | |
2990 | _tee_file_opened => 0, | |
2991 | _line_separator => $line_separator, | |
2992 | }, $class; | |
2993 | } | |
2994 | ||
2995 | sub write_line { | |
2996 | ||
2997 | my $self = shift; | |
2998 | my $fh = $self->{_fh}; | |
2999 | my $fh_copy = $self->{_fh_copy}; | |
3000 | ||
3001 | my $output_file_open = $self->{_output_file_open}; | |
3002 | chomp $_[0]; | |
3003 | $_[0] .= $self->{_line_separator}; | |
3004 | ||
3005 | $fh->print( $_[0] ) if ( $self->{_output_file_open} ); | |
3006 | print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} ); | |
3007 | ||
3008 | if ( $self->{_tee_flag} ) { | |
3009 | unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() } | |
3010 | my $fh_tee = $self->{_fh_tee}; | |
3011 | print $fh_tee $_[0]; | |
3012 | } | |
3013 | } | |
3014 | ||
3015 | sub get_output_file_copy { | |
3016 | my $self = shift; | |
3017 | my $ofname = $self->{_output_file_copy}; | |
3018 | unless ($ofname) { | |
3019 | $ofname = $self->{_output_file}; | |
3020 | } | |
3021 | return $ofname; | |
3022 | } | |
3023 | ||
3024 | sub tee_on { | |
3025 | my $self = shift; | |
3026 | $self->{_tee_flag} = 1; | |
3027 | } | |
3028 | ||
3029 | sub tee_off { | |
3030 | my $self = shift; | |
3031 | $self->{_tee_flag} = 0; | |
3032 | } | |
3033 | ||
3034 | sub really_open_tee_file { | |
3035 | my $self = shift; | |
3036 | my $tee_file = $self->{_tee_file}; | |
3037 | my $fh_tee; | |
3038 | $fh_tee = IO::File->new(">$tee_file") | |
3039 | or die("couldn't open TEE file $tee_file: $!\n"); | |
3040 | $self->{_tee_file_opened} = 1; | |
3041 | $self->{_fh_tee} = $fh_tee; | |
3042 | } | |
3043 | ||
3044 | sub close_output_file { | |
3045 | my $self = shift; | |
3046 | eval { $self->{_fh}->close() } if $self->{_output_file_open}; | |
3047 | eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} ); | |
3048 | $self->close_tee_file(); | |
3049 | } | |
3050 | ||
3051 | sub close_tee_file { | |
3052 | my $self = shift; | |
3053 | ||
3054 | if ( $self->{_tee_file_opened} ) { | |
3055 | eval { $self->{_fh_tee}->close() }; | |
3056 | $self->{_tee_file_opened} = 0; | |
3057 | } | |
3058 | } | |
3059 | ||
3060 | ##################################################################### | |
3061 | # | |
3062 | # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is | |
3063 | # useful for program development. | |
3064 | # | |
3065 | # Only one such file is created regardless of the number of input | |
3066 | # files processed. This allows the results of processing many files | |
3067 | # to be summarized in a single file. | |
3068 | # | |
3069 | ##################################################################### | |
3070 | ||
3071 | package Perl::Tidy::Diagnostics; | |
3072 | ||
3073 | sub new { | |
3074 | ||
3075 | my $class = shift; | |
3076 | bless { | |
3077 | _write_diagnostics_count => 0, | |
3078 | _last_diagnostic_file => "", | |
3079 | _input_file => "", | |
3080 | _fh => undef, | |
3081 | }, $class; | |
3082 | } | |
3083 | ||
3084 | sub set_input_file { | |
3085 | my $self = shift; | |
3086 | $self->{_input_file} = $_[0]; | |
3087 | } | |
3088 | ||
3089 | # This is a diagnostic routine which is useful for program development. | |
3090 | # Output from debug messages go to a file named DIAGNOSTICS, where | |
3091 | # they are labeled by file and line. This allows many files to be | |
3092 | # scanned at once for some particular condition of interest. | |
3093 | sub write_diagnostics { | |
3094 | my $self = shift; | |
3095 | ||
3096 | unless ( $self->{_write_diagnostics_count} ) { | |
3097 | open DIAGNOSTICS, ">DIAGNOSTICS" | |
3098 | or death("couldn't open DIAGNOSTICS: $!\n"); | |
3099 | } | |
3100 | ||
3101 | my $last_diagnostic_file = $self->{_last_diagnostic_file}; | |
3102 | my $input_file = $self->{_input_file}; | |
3103 | if ( $last_diagnostic_file ne $input_file ) { | |
3104 | print DIAGNOSTICS "\nFILE:$input_file\n"; | |
3105 | } | |
3106 | $self->{_last_diagnostic_file} = $input_file; | |
3107 | my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number(); | |
3108 | print DIAGNOSTICS "$input_line_number:\t@_"; | |
3109 | $self->{_write_diagnostics_count}++; | |
3110 | } | |
3111 | ||
3112 | ##################################################################### | |
3113 | # | |
3114 | # The Perl::Tidy::Logger class writes the .LOG and .ERR files | |
3115 | # | |
3116 | ##################################################################### | |
3117 | ||
3118 | package Perl::Tidy::Logger; | |
3119 | ||
3120 | sub new { | |
3121 | my $class = shift; | |
3122 | my $fh; | |
3123 | my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_; | |
3124 | ||
3125 | # remove any old error output file | |
3126 | unless ( ref($warning_file) ) { | |
3127 | if ( -e $warning_file ) { unlink($warning_file) } | |
3128 | } | |
3129 | ||
3130 | bless { | |
3131 | _log_file => $log_file, | |
3132 | _fh_warnings => undef, | |
3133 | _rOpts => $rOpts, | |
3134 | _fh_warnings => undef, | |
3135 | _last_input_line_written => 0, | |
3136 | _at_end_of_file => 0, | |
3137 | _use_prefix => 1, | |
3138 | _block_log_output => 0, | |
3139 | _line_of_tokens => undef, | |
3140 | _output_line_number => undef, | |
3141 | _wrote_line_information_string => 0, | |
3142 | _wrote_column_headings => 0, | |
3143 | _warning_file => $warning_file, | |
3144 | _warning_count => 0, | |
3145 | _complaint_count => 0, | |
3146 | _saw_code_bug => -1, # -1=no 0=maybe 1=for sure | |
3147 | _saw_brace_error => 0, | |
3148 | _saw_extrude => $saw_extrude, | |
3149 | _output_array => [], | |
3150 | }, $class; | |
3151 | } | |
3152 | ||
3153 | sub close_log_file { | |
3154 | ||
3155 | my $self = shift; | |
3156 | if ( $self->{_fh_warnings} ) { | |
3157 | eval { $self->{_fh_warnings}->close() }; | |
3158 | $self->{_fh_warnings} = undef; | |
3159 | } | |
3160 | } | |
3161 | ||
3162 | sub get_warning_count { | |
3163 | my $self = shift; | |
3164 | return $self->{_warning_count}; | |
3165 | } | |
3166 | ||
3167 | sub get_use_prefix { | |
3168 | my $self = shift; | |
3169 | return $self->{_use_prefix}; | |
3170 | } | |
3171 | ||
3172 | sub block_log_output { | |
3173 | my $self = shift; | |
3174 | $self->{_block_log_output} = 1; | |
3175 | } | |
3176 | ||
3177 | sub unblock_log_output { | |
3178 | my $self = shift; | |
3179 | $self->{_block_log_output} = 0; | |
3180 | } | |
3181 | ||
3182 | sub interrupt_logfile { | |
3183 | my $self = shift; | |
3184 | $self->{_use_prefix} = 0; | |
3185 | $self->warning("\n"); | |
3186 | $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" ); | |
3187 | } | |
3188 | ||
3189 | sub resume_logfile { | |
3190 | my $self = shift; | |
3191 | $self->write_logfile_entry( '#' x 60 . "\n" ); | |
3192 | $self->{_use_prefix} = 1; | |
3193 | } | |
3194 | ||
3195 | sub we_are_at_the_last_line { | |
3196 | my $self = shift; | |
3197 | unless ( $self->{_wrote_line_information_string} ) { | |
3198 | $self->write_logfile_entry("Last line\n\n"); | |
3199 | } | |
3200 | $self->{_at_end_of_file} = 1; | |
3201 | } | |
3202 | ||
3203 | # record some stuff in case we go down in flames | |
3204 | sub black_box { | |
3205 | my $self = shift; | |
3206 | my ( $line_of_tokens, $output_line_number ) = @_; | |
3207 | my $input_line = $line_of_tokens->{_line_text}; | |
3208 | my $input_line_number = $line_of_tokens->{_line_number}; | |
3209 | ||
3210 | # save line information in case we have to write a logfile message | |
3211 | $self->{_line_of_tokens} = $line_of_tokens; | |
3212 | $self->{_output_line_number} = $output_line_number; | |
3213 | $self->{_wrote_line_information_string} = 0; | |
3214 | ||
3215 | my $last_input_line_written = $self->{_last_input_line_written}; | |
3216 | my $rOpts = $self->{_rOpts}; | |
3217 | if ( | |
3218 | ( | |
3219 | ( $input_line_number - $last_input_line_written ) >= | |
3220 | $rOpts->{'logfile-gap'} | |
3221 | ) | |
3222 | || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) | |
3223 | ) | |
3224 | { | |
3225 | my $rlevels = $line_of_tokens->{_rlevels}; | |
3226 | my $structural_indentation_level = $$rlevels[0]; | |
3227 | $self->{_last_input_line_written} = $input_line_number; | |
3228 | ( my $out_str = $input_line ) =~ s/^\s*//; | |
3229 | chomp $out_str; | |
3230 | ||
3231 | $out_str = ( '.' x $structural_indentation_level ) . $out_str; | |
3232 | ||
3233 | if ( length($out_str) > 35 ) { | |
3234 | $out_str = substr( $out_str, 0, 35 ) . " ...."; | |
3235 | } | |
3236 | $self->logfile_output( "", "$out_str\n" ); | |
3237 | } | |
3238 | } | |
3239 | ||
3240 | sub write_logfile_entry { | |
3241 | my $self = shift; | |
3242 | ||
3243 | # add leading >>> to avoid confusing error mesages and code | |
3244 | $self->logfile_output( ">>>", "@_" ); | |
3245 | } | |
3246 | ||
3247 | sub write_column_headings { | |
3248 | my $self = shift; | |
3249 | ||
3250 | $self->{_wrote_column_headings} = 1; | |
3251 | my $routput_array = $self->{_output_array}; | |
3252 | push @{$routput_array}, <<EOM; | |
3253 | The nesting depths in the table below are at the start of the lines. | |
3254 | The indicated output line numbers are not always exact. | |
3255 | ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not. | |
3256 | ||
3257 | in:out indent c b nesting code + messages; (messages begin with >>>) | |
3258 | lines levels i k (code begins with one '.' per indent level) | |
3259 | ------ ----- - - -------- ------------------------------------------- | |
3260 | EOM | |
3261 | } | |
3262 | ||
3263 | sub make_line_information_string { | |
3264 | ||
3265 | # make columns of information when a logfile message needs to go out | |
3266 | my $self = shift; | |
3267 | my $line_of_tokens = $self->{_line_of_tokens}; | |
3268 | my $input_line_number = $line_of_tokens->{_line_number}; | |
3269 | my $line_information_string = ""; | |
3270 | if ($input_line_number) { | |
3271 | ||
3272 | my $output_line_number = $self->{_output_line_number}; | |
3273 | my $brace_depth = $line_of_tokens->{_curly_brace_depth}; | |
3274 | my $paren_depth = $line_of_tokens->{_paren_depth}; | |
3275 | my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth}; | |
3276 | my $python_indentation_level = | |
3277 | $line_of_tokens->{_python_indentation_level}; | |
3278 | my $rlevels = $line_of_tokens->{_rlevels}; | |
3279 | my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; | |
3280 | my $rci_levels = $line_of_tokens->{_rci_levels}; | |
3281 | my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; | |
3282 | ||
3283 | my $structural_indentation_level = $$rlevels[0]; | |
3284 | ||
3285 | $self->write_column_headings() unless $self->{_wrote_column_headings}; | |
3286 | ||
3287 | # keep logfile columns aligned for scripts up to 999 lines; | |
3288 | # for longer scripts it doesn't really matter | |
3289 | my $extra_space = ""; | |
3290 | $extra_space .= | |
3291 | ( $input_line_number < 10 ) ? " " | |
3292 | : ( $input_line_number < 100 ) ? " " | |
3293 | : ""; | |
3294 | $extra_space .= | |
3295 | ( $output_line_number < 10 ) ? " " | |
3296 | : ( $output_line_number < 100 ) ? " " | |
3297 | : ""; | |
3298 | ||
3299 | # there are 2 possible nesting strings: | |
3300 | # the original which looks like this: (0 [1 {2 | |
3301 | # the new one, which looks like this: {{[ | |
3302 | # the new one is easier to read, and shows the order, but | |
3303 | # could be arbitrarily long, so we use it unless it is too long | |
3304 | my $nesting_string = | |
3305 | "($paren_depth [$square_bracket_depth {$brace_depth"; | |
3306 | my $nesting_string_new = $$rnesting_tokens[0]; | |
3307 | ||
3308 | my $ci_level = $$rci_levels[0]; | |
3309 | if ( $ci_level > 9 ) { $ci_level = '*' } | |
3310 | my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0'; | |
3311 | ||
3312 | if ( length($nesting_string_new) <= 8 ) { | |
3313 | $nesting_string = | |
3314 | $nesting_string_new . " " x ( 8 - length($nesting_string_new) ); | |
3315 | } | |
3316 | if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 } | |
3317 | $line_information_string = | |
3318 | "L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string"; | |
3319 | } | |
3320 | return $line_information_string; | |
3321 | } | |
3322 | ||
3323 | sub logfile_output { | |
3324 | my $self = shift; | |
3325 | my ( $prompt, $msg ) = @_; | |
3326 | return if ( $self->{_block_log_output} ); | |
3327 | ||
3328 | my $routput_array = $self->{_output_array}; | |
3329 | if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) { | |
3330 | push @{$routput_array}, "$msg"; | |
3331 | } | |
3332 | else { | |
3333 | my $line_information_string = $self->make_line_information_string(); | |
3334 | $self->{_wrote_line_information_string} = 1; | |
3335 | ||
3336 | if ($line_information_string) { | |
3337 | push @{$routput_array}, "$line_information_string $prompt$msg"; | |
3338 | } | |
3339 | else { | |
3340 | push @{$routput_array}, "$msg"; | |
3341 | } | |
3342 | } | |
3343 | } | |
3344 | ||
3345 | sub get_saw_brace_error { | |
3346 | my $self = shift; | |
3347 | return $self->{_saw_brace_error}; | |
3348 | } | |
3349 | ||
3350 | sub increment_brace_error { | |
3351 | my $self = shift; | |
3352 | $self->{_saw_brace_error}++; | |
3353 | } | |
3354 | ||
3355 | sub brace_warning { | |
3356 | my $self = shift; | |
3357 | use constant BRACE_WARNING_LIMIT => 10; | |
3358 | my $saw_brace_error = $self->{_saw_brace_error}; | |
3359 | ||
3360 | if ( $saw_brace_error < BRACE_WARNING_LIMIT ) { | |
3361 | $self->warning(@_); | |
3362 | } | |
3363 | $saw_brace_error++; | |
3364 | $self->{_saw_brace_error} = $saw_brace_error; | |
3365 | ||
3366 | if ( $saw_brace_error == BRACE_WARNING_LIMIT ) { | |
3367 | $self->warning("No further warnings of this type will be given\n"); | |
3368 | } | |
3369 | } | |
3370 | ||
3371 | sub complain { | |
3372 | ||
3373 | # handle non-critical warning messages based on input flag | |
3374 | my $self = shift; | |
3375 | my $rOpts = $self->{_rOpts}; | |
3376 | ||
3377 | # these appear in .ERR output only if -w flag is used | |
3378 | if ( $rOpts->{'warning-output'} ) { | |
3379 | $self->warning(@_); | |
3380 | } | |
3381 | ||
3382 | # otherwise, they go to the .LOG file | |
3383 | else { | |
3384 | $self->{_complaint_count}++; | |
3385 | $self->write_logfile_entry(@_); | |
3386 | } | |
3387 | } | |
3388 | ||
3389 | sub warning { | |
3390 | ||
3391 | # report errors to .ERR file (or stdout) | |
3392 | my $self = shift; | |
3393 | use constant WARNING_LIMIT => 50; | |
3394 | ||
3395 | my $rOpts = $self->{_rOpts}; | |
3396 | unless ( $rOpts->{'quiet'} ) { | |
3397 | ||
3398 | my $warning_count = $self->{_warning_count}; | |
3399 | unless ($warning_count) { | |
3400 | my $warning_file = $self->{_warning_file}; | |
3401 | my $fh_warnings; | |
3402 | if ( $rOpts->{'standard-error-output'} ) { | |
3403 | $fh_warnings = *STDERR; | |
3404 | } | |
3405 | else { | |
3406 | ( $fh_warnings, my $filename ) = | |
3407 | Perl::Tidy::streamhandle( $warning_file, 'w' ); | |
3408 | $fh_warnings or die("couldn't open $filename $!\n"); | |
3409 | warn "## Please see file $filename\n"; | |
3410 | } | |
3411 | $self->{_fh_warnings} = $fh_warnings; | |
3412 | } | |
3413 | ||
3414 | my $fh_warnings = $self->{_fh_warnings}; | |
3415 | if ( $warning_count < WARNING_LIMIT ) { | |
3416 | if ( $self->get_use_prefix() > 0 ) { | |
3417 | my $input_line_number = | |
3418 | Perl::Tidy::Tokenizer::get_input_line_number(); | |
3419 | print $fh_warnings "$input_line_number:\t@_"; | |
3420 | $self->write_logfile_entry("WARNING: @_"); | |
3421 | } | |
3422 | else { | |
3423 | print $fh_warnings @_; | |
3424 | $self->write_logfile_entry(@_); | |
3425 | } | |
3426 | } | |
3427 | $warning_count++; | |
3428 | $self->{_warning_count} = $warning_count; | |
3429 | ||
3430 | if ( $warning_count == WARNING_LIMIT ) { | |
3431 | print $fh_warnings "No further warnings will be given"; | |
3432 | } | |
3433 | } | |
3434 | } | |
3435 | ||
3436 | # programming bug codes: | |
3437 | # -1 = no bug | |
3438 | # 0 = maybe, not sure. | |
3439 | # 1 = definitely | |
3440 | sub report_possible_bug { | |
3441 | my $self = shift; | |
3442 | my $saw_code_bug = $self->{_saw_code_bug}; | |
3443 | $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug; | |
3444 | } | |
3445 | ||
3446 | sub report_definite_bug { | |
3447 | my $self = shift; | |
3448 | $self->{_saw_code_bug} = 1; | |
3449 | } | |
3450 | ||
3451 | sub ask_user_for_bug_report { | |
3452 | my $self = shift; | |
3453 | ||
3454 | my ( $infile_syntax_ok, $formatter ) = @_; | |
3455 | my $saw_code_bug = $self->{_saw_code_bug}; | |
3456 | if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) { | |
3457 | $self->warning(<<EOM); | |
3458 | ||
3459 | You may have encountered a code bug in perltidy. If you think so, and | |
3460 | the problem is not listed in the BUGS file at | |
3461 | http://perltidy.sourceforge.net, please report it so that it can be | |
3462 | corrected. Include the smallest possible script which has the problem, | |
3463 | along with the .LOG file. See the manual pages for contact information. | |
3464 | Thank you! | |
3465 | EOM | |
3466 | ||
3467 | } | |
3468 | elsif ( $saw_code_bug == 1 ) { | |
3469 | if ( $self->{_saw_extrude} ) { | |
3470 | $self->warning(<<EOM); | |
3471 | You may have encountered a bug in perltidy. However, since you are | |
3472 | using the -extrude option, the problem may be with perl itself, which | |
3473 | has occasional parsing problems with this type of file. If you believe | |
3474 | that the problem is with perltidy, and the problem is not listed in the | |
3475 | BUGS file at http://perltidy.sourceforge.net, please report it so that | |
3476 | it can be corrected. Include the smallest possible script which has the | |
3477 | problem, along with the .LOG file. See the manual pages for contact | |
3478 | information. | |
3479 | Thank you! | |
3480 | EOM | |
3481 | } | |
3482 | else { | |
3483 | $self->warning(<<EOM); | |
3484 | ||
3485 | Oops, you seem to have encountered a bug in perltidy. Please check the | |
3486 | BUGS file at http://perltidy.sourceforge.net. If the problem is not | |
3487 | listed there, please report it so that it can be corrected. Include the | |
3488 | smallest possible script which produces this message, along with the | |
3489 | .LOG file if appropriate. See the manual pages for contact information. | |
3490 | Your efforts are appreciated. | |
3491 | Thank you! | |
3492 | EOM | |
3493 | my $added_semicolon_count = 0; | |
3494 | eval { | |
3495 | $added_semicolon_count = | |
3496 | $formatter->get_added_semicolon_count(); | |
3497 | }; | |
3498 | if ( $added_semicolon_count > 0 ) { | |
3499 | $self->warning(<<EOM); | |
3500 | ||
3501 | The log file shows that perltidy added $added_semicolon_count semicolons. | |
3502 | Please rerun with -nasc to see if that is the cause of the syntax error. Even | |
3503 | if that is the problem, please report it so that it can be fixed. | |
3504 | EOM | |
3505 | ||
3506 | } | |
3507 | } | |
3508 | } | |
3509 | } | |
3510 | ||
3511 | sub finish { | |
3512 | ||
3513 | # called after all formatting to summarize errors | |
3514 | my $self = shift; | |
3515 | my ( $infile_syntax_ok, $formatter ) = @_; | |
3516 | ||
3517 | my $rOpts = $self->{_rOpts}; | |
3518 | my $warning_count = $self->{_warning_count}; | |
3519 | my $saw_code_bug = $self->{_saw_code_bug}; | |
3520 | ||
3521 | my $save_logfile = ( $saw_code_bug == 0 && $infile_syntax_ok == 1 ) | |
3522 | || $saw_code_bug == 1 | |
3523 | || $rOpts->{'logfile'}; | |
3524 | my $log_file = $self->{_log_file}; | |
3525 | if ($warning_count) { | |
3526 | if ($save_logfile) { | |
3527 | $self->block_log_output(); # avoid echoing this to the logfile | |
3528 | $self->warning( | |
3529 | "The logfile $log_file may contain useful information\n"); | |
3530 | $self->unblock_log_output(); | |
3531 | } | |
3532 | ||
3533 | if ( $self->{_complaint_count} > 0 ) { | |
3534 | $self->warning( | |
3535 | "To see $self->{_complaint_count} non-critical warnings rerun with -w\n" | |
3536 | ); | |
3537 | } | |
3538 | ||
3539 | if ( $self->{_saw_brace_error} | |
3540 | && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) ) | |
3541 | { | |
3542 | $self->warning("To save a full .LOG file rerun with -g\n"); | |
3543 | } | |
3544 | } | |
3545 | $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter ); | |
3546 | ||
3547 | if ($save_logfile) { | |
3548 | my $log_file = $self->{_log_file}; | |
3549 | my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' ); | |
3550 | if ($fh) { | |
3551 | my $routput_array = $self->{_output_array}; | |
3552 | foreach ( @{$routput_array} ) { $fh->print($_) } | |
3553 | eval { $fh->close() }; | |
3554 | } | |
3555 | } | |
3556 | } | |
3557 | ||
3558 | ##################################################################### | |
3559 | # | |
3560 | # The Perl::Tidy::DevNull class supplies a dummy print method | |
3561 | # | |
3562 | ##################################################################### | |
3563 | ||
3564 | package Perl::Tidy::DevNull; | |
3565 | sub new { return bless {}, $_[0] } | |
3566 | sub print { return } | |
3567 | sub close { return } | |
3568 | ||
3569 | ##################################################################### | |
3570 | # | |
3571 | # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html | |
3572 | # | |
3573 | ##################################################################### | |
3574 | ||
3575 | package Perl::Tidy::HtmlWriter; | |
3576 | ||
3577 | use File::Basename; | |
3578 | ||
3579 | # class variables | |
3580 | use vars qw{ | |
3581 | %html_color | |
3582 | %html_bold | |
3583 | %html_italic | |
3584 | %token_short_names | |
3585 | %short_to_long_names | |
3586 | $rOpts | |
3587 | $css_filename | |
3588 | $css_linkname | |
3589 | $missing_html_entities | |
3590 | }; | |
3591 | ||
3592 | # replace unsafe characters with HTML entity representation if HTML::Entities | |
3593 | # is available | |
3594 | { eval "use HTML::Entities"; $missing_html_entities = $@; } | |
3595 | ||
3596 | sub new { | |
3597 | ||
3598 | my ( $class, $input_file, $html_file, $extension, $html_toc_extension, | |
3599 | $html_src_extension ) | |
3600 | = @_; | |
3601 | ||
3602 | my $html_file_opened = 0; | |
3603 | my $html_fh; | |
3604 | ( $html_fh, my $html_filename ) = | |
3605 | Perl::Tidy::streamhandle( $html_file, 'w' ); | |
3606 | unless ($html_fh) { | |
3607 | warn("can't open $html_file: $!\n"); | |
3608 | return undef; | |
3609 | } | |
3610 | $html_file_opened = 1; | |
3611 | ||
3612 | if ( !$input_file || $input_file eq '-' || ref($input_file) ) { | |
3613 | $input_file = "NONAME"; | |
3614 | } | |
3615 | ||
3616 | # write the table of contents to a string | |
3617 | my $toc_string; | |
3618 | my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' ); | |
3619 | ||
3620 | my $html_pre_fh; | |
3621 | my @pre_string_stack; | |
3622 | if ( $rOpts->{'html-pre-only'} ) { | |
3623 | ||
3624 | # pre section goes directly to the output stream | |
3625 | $html_pre_fh = $html_fh; | |
3626 | $html_pre_fh->print( <<"PRE_END"); | |
3627 | <pre> | |
3628 | PRE_END | |
3629 | } | |
3630 | else { | |
3631 | ||
3632 | # pre section go out to a temporary string | |
3633 | my $pre_string; | |
3634 | $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); | |
3635 | push @pre_string_stack, \$pre_string; | |
3636 | } | |
3637 | ||
3638 | # pod text gets diverted if the 'pod2html' is used | |
3639 | my $html_pod_fh; | |
3640 | my $pod_string; | |
3641 | if ( $rOpts->{'pod2html'} ) { | |
3642 | if ( $rOpts->{'html-pre-only'} ) { | |
3643 | undef $rOpts->{'pod2html'}; | |
3644 | } | |
3645 | else { | |
3646 | eval "use Pod::Html"; | |
3647 | if ($@) { | |
3648 | warn | |
3649 | "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n"; | |
3650 | undef $rOpts->{'pod2html'}; | |
3651 | } | |
3652 | else { | |
3653 | $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' ); | |
3654 | } | |
3655 | } | |
3656 | } | |
3657 | ||
3658 | my $toc_filename; | |
3659 | my $src_filename; | |
3660 | if ( $rOpts->{'frames'} ) { | |
3661 | unless ($extension) { | |
3662 | warn | |
3663 | "cannot use frames without a specified output extension; ignoring -frm\n"; | |
3664 | undef $rOpts->{'frames'}; | |
3665 | } | |
3666 | else { | |
3667 | $toc_filename = $input_file . $html_toc_extension . $extension; | |
3668 | $src_filename = $input_file . $html_src_extension . $extension; | |
3669 | } | |
3670 | } | |
3671 | ||
3672 | # ---------------------------------------------------------- | |
3673 | # Output is now directed as follows: | |
3674 | # html_toc_fh <-- table of contents items | |
3675 | # html_pre_fh <-- the <pre> section of formatted code, except: | |
3676 | # html_pod_fh <-- pod goes here with the pod2html option | |
3677 | # ---------------------------------------------------------- | |
3678 | ||
3679 | my $title = $rOpts->{'title'}; | |
3680 | unless ($title) { | |
3681 | ( $title, my $path ) = fileparse($input_file); | |
3682 | } | |
3683 | my $toc_item_count = 0; | |
3684 | my $in_toc_package = ""; | |
3685 | my $last_level = 0; | |
3686 | bless { | |
3687 | _input_file => $input_file, # name of input file | |
3688 | _title => $title, # title, unescaped | |
3689 | _html_file => $html_file, # name of .html output file | |
3690 | _toc_filename => $toc_filename, # for frames option | |
3691 | _src_filename => $src_filename, # for frames option | |
3692 | _html_file_opened => $html_file_opened, # a flag | |
3693 | _html_fh => $html_fh, # the output stream | |
3694 | _html_pre_fh => $html_pre_fh, # pre section goes here | |
3695 | _rpre_string_stack => \@pre_string_stack, # stack of pre sections | |
3696 | _html_pod_fh => $html_pod_fh, # pod goes here if pod2html | |
3697 | _rpod_string => \$pod_string, # string holding pod | |
3698 | _pod_cut_count => 0, # how many =cut's? | |
3699 | _html_toc_fh => $html_toc_fh, # fh for table of contents | |
3700 | _rtoc_string => \$toc_string, # string holding toc | |
3701 | _rtoc_item_count => \$toc_item_count, # how many toc items | |
3702 | _rin_toc_package => \$in_toc_package, # package name | |
3703 | _rtoc_name_count => {}, # hash to track unique names | |
3704 | _rpackage_stack => [], # stack to check for package | |
3705 | # name changes | |
3706 | _rlast_level => \$last_level, # brace indentation level | |
3707 | }, $class; | |
3708 | } | |
3709 | ||
3710 | sub add_toc_item { | |
3711 | ||
3712 | # Add an item to the html table of contents. | |
3713 | # This is called even if no table of contents is written, | |
3714 | # because we still want to put the anchors in the <pre> text. | |
3715 | # We are given an anchor name and its type; types are: | |
3716 | # 'package', 'sub', '__END__', '__DATA__', 'EOF' | |
3717 | # There must be an 'EOF' call at the end to wrap things up. | |
3718 | my $self = shift; | |
3719 | my ( $name, $type ) = @_; | |
3720 | my $html_toc_fh = $self->{_html_toc_fh}; | |
3721 | my $html_pre_fh = $self->{_html_pre_fh}; | |
3722 | my $rtoc_name_count = $self->{_rtoc_name_count}; | |
3723 | my $rtoc_item_count = $self->{_rtoc_item_count}; | |
3724 | my $rlast_level = $self->{_rlast_level}; | |
3725 | my $rin_toc_package = $self->{_rin_toc_package}; | |
3726 | my $rpackage_stack = $self->{_rpackage_stack}; | |
3727 | ||
3728 | # packages contain sublists of subs, so to avoid errors all package | |
3729 | # items are written and finished with the following routines | |
3730 | my $end_package_list = sub { | |
3731 | if ($$rin_toc_package) { | |
3732 | $html_toc_fh->print("</ul>\n</li>\n"); | |
3733 | $$rin_toc_package = ""; | |
3734 | } | |
3735 | }; | |
3736 | ||
3737 | my $start_package_list = sub { | |
3738 | my ( $unique_name, $package ) = @_; | |
3739 | if ($$rin_toc_package) { $end_package_list->() } | |
3740 | $html_toc_fh->print(<<EOM); | |
3741 | <li><a href=\"#$unique_name\">package $package</a> | |
3742 | <ul> | |
3743 | EOM | |
3744 | $$rin_toc_package = $package; | |
3745 | }; | |
3746 | ||
3747 | # start the table of contents on the first item | |
3748 | unless ($$rtoc_item_count) { | |
3749 | ||
3750 | # but just quit if we hit EOF without any other entries | |
3751 | # in this case, there will be no toc | |
3752 | return if ( $type eq 'EOF' ); | |
3753 | $html_toc_fh->print( <<"TOC_END"); | |
3754 | <!-- BEGIN CODE INDEX --><a name="code-index"></a> | |
3755 | <ul> | |
3756 | TOC_END | |
3757 | } | |
3758 | $$rtoc_item_count++; | |
3759 | ||
3760 | # make a unique anchor name for this location: | |
3761 | # - packages get a 'package-' prefix | |
3762 | # - subs use their names | |
3763 | my $unique_name = $name; | |
3764 | if ( $type eq 'package' ) { $unique_name = "package-$name" } | |
3765 | ||
3766 | # append '-1', '-2', etc if necessary to make unique; this will | |
3767 | # be unique because subs and packages cannot have a '-' | |
3768 | if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) { | |
3769 | $unique_name .= "-$count"; | |
3770 | } | |
3771 | ||
3772 | # - all names get terminal '-' if pod2html is used, to avoid | |
3773 | # conflicts with anchor names created by pod2html | |
3774 | if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' } | |
3775 | ||
3776 | # start/stop lists of subs | |
3777 | if ( $type eq 'sub' ) { | |
3778 | my $package = $rpackage_stack->[$$rlast_level]; | |
3779 | unless ($package) { $package = 'main' } | |
3780 | ||
3781 | # if we're already in a package/sub list, be sure its the right | |
3782 | # package or else close it | |
3783 | if ( $$rin_toc_package && $$rin_toc_package ne $package ) { | |
3784 | $end_package_list->(); | |
3785 | } | |
3786 | ||
3787 | # start a package/sub list if necessary | |
3788 | unless ($$rin_toc_package) { | |
3789 | $start_package_list->( $unique_name, $package ); | |
3790 | } | |
3791 | } | |
3792 | ||
3793 | # now write an entry in the toc for this item | |
3794 | if ( $type eq 'package' ) { | |
3795 | $start_package_list->( $unique_name, $name ); | |
3796 | } | |
3797 | elsif ( $type eq 'sub' ) { | |
3798 | $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n"); | |
3799 | } | |
3800 | else { | |
3801 | $end_package_list->(); | |
3802 | $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n"); | |
3803 | } | |
3804 | ||
3805 | # write the anchor in the <pre> section | |
3806 | $html_pre_fh->print("<a name=\"$unique_name\"></a>"); | |
3807 | ||
3808 | # end the table of contents, if any, on the end of file | |
3809 | if ( $type eq 'EOF' ) { | |
3810 | $html_toc_fh->print( <<"TOC_END"); | |
3811 | </ul> | |
3812 | <!-- END CODE INDEX --> | |
3813 | TOC_END | |
3814 | } | |
3815 | } | |
3816 | ||
3817 | BEGIN { | |
3818 | ||
3819 | # This is the official list of tokens which may be identified by the | |
3820 | # user. Long names are used as getopt keys. Short names are | |
3821 | # convenient short abbreviations for specifying input. Short names | |
3822 | # somewhat resemble token type characters, but are often different | |
3823 | # because they may only be alphanumeric, to allow command line | |
3824 | # input. Also, note that because of case insensitivity of html, | |
3825 | # this table must be in a single case only (I've chosen to use all | |
3826 | # lower case). | |
3827 | # When adding NEW_TOKENS: update this hash table | |
3828 | # short names => long names | |
3829 | %short_to_long_names = ( | |
3830 | 'n' => 'numeric', | |
3831 | 'p' => 'paren', | |
3832 | 'q' => 'quote', | |
3833 | 's' => 'structure', | |
3834 | 'c' => 'comment', | |
3835 | 'v' => 'v-string', | |
3836 | 'cm' => 'comma', | |
3837 | 'w' => 'bareword', | |
3838 | 'co' => 'colon', | |
3839 | 'pu' => 'punctuation', | |
3840 | 'i' => 'identifier', | |
3841 | 'j' => 'label', | |
3842 | 'h' => 'here-doc-target', | |
3843 | 'hh' => 'here-doc-text', | |
3844 | 'k' => 'keyword', | |
3845 | 'sc' => 'semicolon', | |
3846 | 'm' => 'subroutine', | |
3847 | 'pd' => 'pod-text', | |
3848 | ); | |
3849 | ||
3850 | # Now we have to map actual token types into one of the above short | |
3851 | # names; any token types not mapped will get 'punctuation' | |
3852 | # properties. | |
3853 | ||
3854 | # The values of this hash table correspond to the keys of the | |
3855 | # previous hash table. | |
3856 | # The keys of this hash table are token types and can be seen | |
3857 | # by running with --dump-token-types (-dtt). | |
3858 | ||
3859 | # When adding NEW_TOKENS: update this hash table | |
3860 | # $type => $short_name | |
3861 | %token_short_names = ( | |
3862 | '#' => 'c', | |
3863 | 'n' => 'n', | |
3864 | 'v' => 'v', | |
3865 | 'k' => 'k', | |
3866 | 'F' => 'k', | |
3867 | 'Q' => 'q', | |
3868 | 'q' => 'q', | |
3869 | 'J' => 'j', | |
3870 | 'j' => 'j', | |
3871 | 'h' => 'h', | |
3872 | 'H' => 'hh', | |
3873 | 'w' => 'w', | |
3874 | ',' => 'cm', | |
3875 | '=>' => 'cm', | |
3876 | ';' => 'sc', | |
3877 | ':' => 'co', | |
3878 | 'f' => 'sc', | |
3879 | '(' => 'p', | |
3880 | ')' => 'p', | |
3881 | 'M' => 'm', | |
3882 | 'P' => 'pd', | |
3883 | 'A' => 'co', | |
3884 | ); | |
3885 | ||
3886 | # These token types will all be called identifiers for now | |
3887 | # FIXME: need to separate user defined modules as separate type | |
3888 | my @identifier = qw" i t U C Y Z G :: "; | |
3889 | @token_short_names{@identifier} = ('i') x scalar(@identifier); | |
3890 | ||
3891 | # These token types will be called 'structure' | |
3892 | my @structure = qw" { } "; | |
3893 | @token_short_names{@structure} = ('s') x scalar(@structure); | |
3894 | ||
3895 | # OLD NOTES: save for reference | |
3896 | # Any of these could be added later if it would be useful. | |
3897 | # For now, they will by default become punctuation | |
3898 | # my @list = qw" L R [ ] "; | |
3899 | # @token_long_names{@list} = ('non-structure') x scalar(@list); | |
3900 | # | |
3901 | # my @list = qw" | |
3902 | # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm | |
3903 | # "; | |
3904 | # @token_long_names{@list} = ('math') x scalar(@list); | |
3905 | # | |
3906 | # my @list = qw" & &= ~ ~= ^ ^= | |= "; | |
3907 | # @token_long_names{@list} = ('bit') x scalar(@list); | |
3908 | # | |
3909 | # my @list = qw" == != < > <= <=> "; | |
3910 | # @token_long_names{@list} = ('numerical-comparison') x scalar(@list); | |
3911 | # | |
3912 | # my @list = qw" && || ! &&= ||= "; | |
3913 | # @token_long_names{@list} = ('logical') x scalar(@list); | |
3914 | # | |
3915 | # my @list = qw" . .= =~ !~ x x= "; | |
3916 | # @token_long_names{@list} = ('string-operators') x scalar(@list); | |
3917 | # | |
3918 | # # Incomplete.. | |
3919 | # my @list = qw" .. -> <> ... \ ? "; | |
3920 | # @token_long_names{@list} = ('misc-operators') x scalar(@list); | |
3921 | ||
3922 | } | |
3923 | ||
3924 | sub make_getopt_long_names { | |
3925 | my $class = shift; | |
3926 | my ($rgetopt_names) = @_; | |
3927 | while ( my ( $short_name, $name ) = each %short_to_long_names ) { | |
3928 | push @$rgetopt_names, "html-color-$name=s"; | |
3929 | push @$rgetopt_names, "html-italic-$name!"; | |
3930 | push @$rgetopt_names, "html-bold-$name!"; | |
3931 | } | |
3932 | push @$rgetopt_names, "html-color-background=s"; | |
3933 | push @$rgetopt_names, "html-linked-style-sheet=s"; | |
3934 | push @$rgetopt_names, "nohtml-style-sheets"; | |
3935 | push @$rgetopt_names, "html-pre-only"; | |
3936 | push @$rgetopt_names, "html-line-numbers"; | |
3937 | push @$rgetopt_names, "html-entities!"; | |
3938 | push @$rgetopt_names, "stylesheet"; | |
3939 | push @$rgetopt_names, "html-table-of-contents!"; | |
3940 | push @$rgetopt_names, "pod2html!"; | |
3941 | push @$rgetopt_names, "frames!"; | |
3942 | push @$rgetopt_names, "html-toc-extension=s"; | |
3943 | push @$rgetopt_names, "html-src-extension=s"; | |
3944 | ||
3945 | # Pod::Html parameters: | |
3946 | push @$rgetopt_names, "backlink=s"; | |
3947 | push @$rgetopt_names, "cachedir=s"; | |
3948 | push @$rgetopt_names, "htmlroot=s"; | |
3949 | push @$rgetopt_names, "libpods=s"; | |
3950 | push @$rgetopt_names, "podpath=s"; | |
3951 | push @$rgetopt_names, "podroot=s"; | |
3952 | push @$rgetopt_names, "title=s"; | |
3953 | ||
3954 | # Pod::Html parameters with leading 'pod' which will be removed | |
3955 | # before the call to Pod::Html | |
3956 | push @$rgetopt_names, "podquiet!"; | |
3957 | push @$rgetopt_names, "podverbose!"; | |
3958 | push @$rgetopt_names, "podrecurse!"; | |
3959 | push @$rgetopt_names, "podflush"; | |
3960 | push @$rgetopt_names, "podheader!"; | |
3961 | push @$rgetopt_names, "podindex!"; | |
3962 | } | |
3963 | ||
3964 | sub make_abbreviated_names { | |
3965 | ||
3966 | # We're appending things like this to the expansion list: | |
3967 | # 'hcc' => [qw(html-color-comment)], | |
3968 | # 'hck' => [qw(html-color-keyword)], | |
3969 | # etc | |
3970 | my $class = shift; | |
3971 | my ($rexpansion) = @_; | |
3972 | ||
3973 | # abbreviations for color/bold/italic properties | |
3974 | while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { | |
3975 | ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"]; | |
3976 | ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"]; | |
3977 | ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"]; | |
3978 | ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"]; | |
3979 | ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"]; | |
3980 | } | |
3981 | ||
3982 | # abbreviations for all other html options | |
3983 | ${$rexpansion}{"hcbg"} = ["html-color-background"]; | |
3984 | ${$rexpansion}{"pre"} = ["html-pre-only"]; | |
3985 | ${$rexpansion}{"toc"} = ["html-table-of-contents"]; | |
3986 | ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"]; | |
3987 | ${$rexpansion}{"nnn"} = ["html-line-numbers"]; | |
3988 | ${$rexpansion}{"hent"} = ["html-entities"]; | |
3989 | ${$rexpansion}{"nhent"} = ["nohtml-entities"]; | |
3990 | ${$rexpansion}{"css"} = ["html-linked-style-sheet"]; | |
3991 | ${$rexpansion}{"nss"} = ["nohtml-style-sheets"]; | |
3992 | ${$rexpansion}{"ss"} = ["stylesheet"]; | |
3993 | ${$rexpansion}{"pod"} = ["pod2html"]; | |
3994 | ${$rexpansion}{"npod"} = ["nopod2html"]; | |
3995 | ${$rexpansion}{"frm"} = ["frames"]; | |
3996 | ${$rexpansion}{"nfrm"} = ["noframes"]; | |
3997 | ${$rexpansion}{"text"} = ["html-toc-extension"]; | |
3998 | ${$rexpansion}{"sext"} = ["html-src-extension"]; | |
3999 | } | |
4000 | ||
4001 | sub check_options { | |
4002 | ||
4003 | # This will be called once after options have been parsed | |
4004 | my $class = shift; | |
4005 | $rOpts = shift; | |
4006 | ||
4007 | # X11 color names for default settings that seemed to look ok | |
4008 | # (these color names are only used for programming clarity; the hex | |
4009 | # numbers are actually written) | |
4010 | use constant ForestGreen => "#228B22"; | |
4011 | use constant SaddleBrown => "#8B4513"; | |
4012 | use constant magenta4 => "#8B008B"; | |
4013 | use constant IndianRed3 => "#CD5555"; | |
4014 | use constant DeepSkyBlue4 => "#00688B"; | |
4015 | use constant MediumOrchid3 => "#B452CD"; | |
4016 | use constant black => "#000000"; | |
4017 | use constant white => "#FFFFFF"; | |
4018 | use constant red => "#FF0000"; | |
4019 | ||
4020 | # set default color, bold, italic properties | |
4021 | # anything not listed here will be given the default (punctuation) color -- | |
4022 | # these types currently not listed and get default: ws pu s sc cm co p | |
4023 | # When adding NEW_TOKENS: add an entry here if you don't want defaults | |
4024 | ||
4025 | # set_default_properties( $short_name, default_color, bold?, italic? ); | |
4026 | set_default_properties( 'c', ForestGreen, 0, 0 ); | |
4027 | set_default_properties( 'pd', ForestGreen, 0, 1 ); | |
4028 | set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown | |
4029 | set_default_properties( 'q', IndianRed3, 0, 0 ); | |
4030 | set_default_properties( 'hh', IndianRed3, 0, 1 ); | |
4031 | set_default_properties( 'h', IndianRed3, 1, 0 ); | |
4032 | set_default_properties( 'i', DeepSkyBlue4, 0, 0 ); | |
4033 | set_default_properties( 'w', black, 0, 0 ); | |
4034 | set_default_properties( 'n', MediumOrchid3, 0, 0 ); | |
4035 | set_default_properties( 'v', MediumOrchid3, 0, 0 ); | |
4036 | set_default_properties( 'j', IndianRed3, 1, 0 ); | |
4037 | set_default_properties( 'm', red, 1, 0 ); | |
4038 | ||
4039 | set_default_color( 'html-color-background', white ); | |
4040 | set_default_color( 'html-color-punctuation', black ); | |
4041 | ||
4042 | # setup property lookup tables for tokens based on their short names | |
4043 | # every token type has a short name, and will use these tables | |
4044 | # to do the html markup | |
4045 | while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { | |
4046 | $html_color{$short_name} = $rOpts->{"html-color-$long_name"}; | |
4047 | $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"}; | |
4048 | $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"}; | |
4049 | } | |
4050 | ||
4051 | # write style sheet to STDOUT and die if requested | |
4052 | if ( defined( $rOpts->{'stylesheet'} ) ) { | |
4053 | write_style_sheet_file('-'); | |
4054 | exit 1; | |
4055 | } | |
4056 | ||
4057 | # make sure user gives a file name after -css | |
4058 | if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) { | |
4059 | $css_linkname = $rOpts->{'html-linked-style-sheet'}; | |
4060 | if ( $css_linkname =~ /^-/ ) { | |
4061 | die "You must specify a valid filename after -css\n"; | |
4062 | } | |
4063 | } | |
4064 | ||
4065 | # check for conflict | |
4066 | if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) { | |
4067 | $rOpts->{'nohtml-style-sheets'} = 0; | |
4068 | warning("You can't specify both -css and -nss; -nss ignored\n"); | |
4069 | } | |
4070 | ||
4071 | # write a style sheet file if necessary | |
4072 | if ($css_linkname) { | |
4073 | ||
4074 | # if the selected filename exists, don't write, because user may | |
4075 | # have done some work by hand to create it; use backup name instead | |
4076 | # Also, this will avoid a potential disaster in which the user | |
4077 | # forgets to specify the style sheet, like this: | |
4078 | # perltidy -html -css myfile1.pl myfile2.pl | |
4079 | # This would cause myfile1.pl to parsed as the style sheet by GetOpts | |
4080 | my $css_filename = $css_linkname; | |
4081 | unless ( -e $css_filename ) { | |
4082 | write_style_sheet_file($css_filename); | |
4083 | } | |
4084 | } | |
4085 | $missing_html_entities = 1 unless $rOpts->{'html-entities'}; | |
4086 | } | |
4087 | ||
4088 | sub write_style_sheet_file { | |
4089 | ||
4090 | my $css_filename = shift; | |
4091 | my $fh; | |
4092 | unless ( $fh = IO::File->new("> $css_filename") ) { | |
4093 | die "can't open $css_filename: $!\n"; | |
4094 | } | |
4095 | write_style_sheet_data($fh); | |
4096 | eval { $fh->close }; | |
4097 | } | |
4098 | ||
4099 | sub write_style_sheet_data { | |
4100 | ||
4101 | # write the style sheet data to an open file handle | |
4102 | my $fh = shift; | |
4103 | ||
4104 | my $bg_color = $rOpts->{'html-color-background'}; | |
4105 | my $text_color = $rOpts->{'html-color-punctuation'}; | |
4106 | ||
4107 | # pre-bgcolor is new, and may not be defined | |
4108 | my $pre_bg_color = $rOpts->{'html-pre-color-background'}; | |
4109 | $pre_bg_color = $bg_color unless $pre_bg_color; | |
4110 | ||
4111 | $fh->print(<<"EOM"); | |
4112 | /* default style sheet generated by perltidy */ | |
4113 | body {background: $bg_color; color: $text_color} | |
4114 | pre { color: $text_color; | |
4115 | background: $pre_bg_color; | |
4116 | font-family: courier; | |
4117 | } | |
4118 | ||
4119 | EOM | |
4120 | ||
4121 | foreach my $short_name ( sort keys %short_to_long_names ) { | |
4122 | my $long_name = $short_to_long_names{$short_name}; | |
4123 | ||
4124 | my $abbrev = '.' . $short_name; | |
4125 | if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment | |
4126 | my $color = $html_color{$short_name}; | |
4127 | if ( !defined($color) ) { $color = $text_color } | |
4128 | $fh->print("$abbrev \{ color: $color;"); | |
4129 | ||
4130 | if ( $html_bold{$short_name} ) { | |
4131 | $fh->print(" font-weight:bold;"); | |
4132 | } | |
4133 | ||
4134 | if ( $html_italic{$short_name} ) { | |
4135 | $fh->print(" font-style:italic;"); | |
4136 | } | |
4137 | $fh->print("} /* $long_name */\n"); | |
4138 | } | |
4139 | } | |
4140 | ||
4141 | sub set_default_color { | |
4142 | ||
4143 | # make sure that options hash $rOpts->{$key} contains a valid color | |
4144 | my ( $key, $color ) = @_; | |
4145 | if ( $rOpts->{$key} ) { $color = $rOpts->{$key} } | |
4146 | $rOpts->{$key} = check_RGB($color); | |
4147 | } | |
4148 | ||
4149 | sub check_RGB { | |
4150 | ||
4151 | # if color is a 6 digit hex RGB value, prepend a #, otherwise | |
4152 | # assume that it is a valid ascii color name | |
4153 | my ($color) = @_; | |
4154 | if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" } | |
4155 | return $color; | |
4156 | } | |
4157 | ||
4158 | sub set_default_properties { | |
4159 | my ( $short_name, $color, $bold, $italic ) = @_; | |
4160 | ||
4161 | set_default_color( "html-color-$short_to_long_names{$short_name}", $color ); | |
4162 | my $key; | |
4163 | $key = "html-bold-$short_to_long_names{$short_name}"; | |
4164 | $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold; | |
4165 | $key = "html-italic-$short_to_long_names{$short_name}"; | |
4166 | $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic; | |
4167 | } | |
4168 | ||
4169 | sub pod_to_html { | |
4170 | ||
4171 | # Use Pod::Html to process the pod and make the page | |
4172 | # then merge the perltidy code sections into it. | |
4173 | # return 1 if success, 0 otherwise | |
4174 | my $self = shift; | |
4175 | my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_; | |
4176 | my $input_file = $self->{_input_file}; | |
4177 | my $title = $self->{_title}; | |
4178 | my $success_flag = 0; | |
4179 | ||
4180 | # don't try to use pod2html if no pod | |
4181 | unless ($pod_string) { | |
4182 | return $success_flag; | |
4183 | } | |
4184 | ||
4185 | # Pod::Html requires a real temporary filename | |
4186 | # If we are making a frame, we have a name available | |
4187 | # Otherwise, we have to fine one | |
4188 | my $tmpfile; | |
4189 | if ( $rOpts->{'frames'} ) { | |
4190 | $tmpfile = $self->{_toc_filename}; | |
4191 | } | |
4192 | else { | |
4193 | $tmpfile = Perl::Tidy::make_temporary_filename(); | |
4194 | } | |
4195 | my $fh_tmp = IO::File->new( $tmpfile, 'w' ); | |
4196 | unless ($fh_tmp) { | |
4197 | warn "unable to open temporary file $tmpfile; cannot use pod2html\n"; | |
4198 | return $success_flag; | |
4199 | } | |
4200 | ||
4201 | #------------------------------------------------------------------ | |
4202 | # Warning: a temporary file is open; we have to clean up if | |
4203 | # things go bad. From here on all returns should be by going to | |
4204 | # RETURN so that the temporary file gets unlinked. | |
4205 | #------------------------------------------------------------------ | |
4206 | ||
4207 | # write the pod text to the temporary file | |
4208 | $fh_tmp->print($pod_string); | |
4209 | $fh_tmp->close(); | |
4210 | ||
4211 | # Hand off the pod to pod2html. | |
4212 | # Note that we can use the same temporary filename for input and output | |
4213 | # because of the way pod2html works. | |
4214 | { | |
4215 | ||
4216 | my @args; | |
4217 | push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title"; | |
4218 | my $kw; | |
4219 | ||
4220 | # Flags with string args: | |
4221 | # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s", | |
4222 | # "podpath=s", "podroot=s" | |
4223 | # Note: -css=s is handled by perltidy itself | |
4224 | foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) { | |
4225 | if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" } | |
4226 | } | |
4227 | ||
4228 | # Toggle switches; these have extra leading 'pod' | |
4229 | # "header!", "index!", "recurse!", "quiet!", "verbose!" | |
4230 | foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) { | |
4231 | my $kwd = $kw; # allows us to strip 'pod' | |
4232 | if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" } | |
4233 | elsif ( defined( $rOpts->{$kw} ) ) { | |
4234 | $kwd =~ s/^pod//; | |
4235 | push @args, "--no$kwd"; | |
4236 | } | |
4237 | } | |
4238 | ||
4239 | # "flush", | |
4240 | $kw = 'podflush'; | |
4241 | if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" } | |
4242 | ||
4243 | # Must clean up if pod2html dies (it can); | |
4244 | # Be careful not to overwrite callers __DIE__ routine | |
4245 | local $SIG{__DIE__} = sub { | |
4246 | print $_[0]; | |
4247 | unlink $tmpfile if -e $tmpfile; | |
4248 | exit 1; | |
4249 | }; | |
4250 | ||
4251 | pod2html(@args); | |
4252 | } | |
4253 | $fh_tmp = IO::File->new( $tmpfile, 'r' ); | |
4254 | unless ($fh_tmp) { | |
4255 | ||
4256 | # this error shouldn't happen ... we just used this filename | |
4257 | warn "unable to open temporary file $tmpfile; cannot use pod2html\n"; | |
4258 | goto RETURN; | |
4259 | } | |
4260 | ||
4261 | my $html_fh = $self->{_html_fh}; | |
4262 | my @toc; | |
4263 | my $in_toc; | |
4264 | my $no_print; | |
4265 | ||
4266 | # This routine will write the html selectively and store the toc | |
4267 | my $html_print = sub { | |
4268 | foreach (@_) { | |
4269 | $html_fh->print($_) unless ($no_print); | |
4270 | if ($in_toc) { push @toc, $_ } | |
4271 | } | |
4272 | }; | |
4273 | ||
4274 | # loop over lines of html output from pod2html and merge in | |
4275 | # the necessary perltidy html sections | |
4276 | my ( $saw_body, $saw_index, $saw_body_end ); | |
4277 | while ( my $line = $fh_tmp->getline() ) { | |
4278 | ||
4279 | if ( $line =~ /^\s*<html>\s*$/i ) { | |
4280 | my $date = localtime; | |
4281 | $html_print->("<!-- Generated by perltidy on $date -->\n"); | |
4282 | $html_print->($line); | |
4283 | } | |
4284 | ||
4285 | # Copy the perltidy css, if any, after <body> tag | |
4286 | elsif ( $line =~ /^\s*<body.*>\s*$/i ) { | |
4287 | $saw_body = 1; | |
4288 | $html_print->($css_string) if $css_string; | |
4289 | $html_print->($line); | |
4290 | ||
4291 | # add a top anchor and heading | |
4292 | $html_print->("<a name=\"-top-\"></a>\n"); | |
4293 | $title = escape_html($title); | |
4294 | $html_print->("<h1>$title</h1>\n"); | |
4295 | } | |
4296 | elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) { | |
4297 | $in_toc = 1; | |
4298 | ||
4299 | # when frames are used, an extra table of contents in the | |
4300 | # contents panel is confusing, so don't print it | |
4301 | $no_print = $rOpts->{'frames'} | |
4302 | || !$rOpts->{'html-table-of-contents'}; | |
4303 | $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'}; | |
4304 | $html_print->($line); | |
4305 | } | |
4306 | ||
4307 | # Copy the perltidy toc, if any, after the Pod::Html toc | |
4308 | elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) { | |
4309 | $saw_index = 1; | |
4310 | $html_print->($line); | |
4311 | if ($toc_string) { | |
4312 | $html_print->("<hr />\n") if $rOpts->{'frames'}; | |
4313 | $html_print->("<h2>Code Index:</h2>\n"); | |
4314 | my @toc = map { $_ .= "\n" } split /\n/, $toc_string; | |
4315 | $html_print->(@toc); | |
4316 | } | |
4317 | $in_toc = 0; | |
4318 | $no_print = 0; | |
4319 | } | |
4320 | ||
4321 | # Copy one perltidy section after each marker | |
4322 | elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) { | |
4323 | $line = $2; | |
4324 | $html_print->($1) if $1; | |
4325 | ||
4326 | # Intermingle code and pod sections if we saw multiple =cut's. | |
4327 | if ( $self->{_pod_cut_count} > 1 ) { | |
4328 | my $rpre_string = shift(@$rpre_string_stack); | |
4329 | if ($$rpre_string) { | |
4330 | $html_print->('<pre>'); | |
4331 | $html_print->($$rpre_string); | |
4332 | $html_print->('</pre>'); | |
4333 | } | |
4334 | else { | |
4335 | ||
4336 | # shouldn't happen: we stored a string before writing | |
4337 | # each marker. | |
4338 | warn | |
4339 | "Problem merging html stream with pod2html; order may be wrong\n"; | |
4340 | } | |
4341 | $html_print->($line); | |
4342 | } | |
4343 | ||
4344 | # If didn't see multiple =cut lines, we'll put the pod out first | |
4345 | # and then the code, because it's less confusing. | |
4346 | else { | |
4347 | ||
4348 | # since we are not intermixing code and pod, we don't need | |
4349 | # or want any <hr> lines which separated pod and code | |
4350 | $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i ); | |
4351 | } | |
4352 | } | |
4353 | ||
4354 | # Copy any remaining code section before the </body> tag | |
4355 | elsif ( $line =~ /^\s*<\/body>\s*$/i ) { | |
4356 | $saw_body_end = 1; | |
4357 | if (@$rpre_string_stack) { | |
4358 | unless ( $self->{_pod_cut_count} > 1 ) { | |
4359 | $html_print->('<hr />'); | |
4360 | } | |
4361 | while ( my $rpre_string = shift(@$rpre_string_stack) ) { | |
4362 | $html_print->('<pre>'); | |
4363 | $html_print->($$rpre_string); | |
4364 | $html_print->('</pre>'); | |
4365 | } | |
4366 | } | |
4367 | $html_print->($line); | |
4368 | } | |
4369 | else { | |
4370 | $html_print->($line); | |
4371 | } | |
4372 | } | |
4373 | ||
4374 | $success_flag = 1; | |
4375 | unless ($saw_body) { | |
4376 | warn "Did not see <body> in pod2html output\n"; | |
4377 | $success_flag = 0; | |
4378 | } | |
4379 | unless ($saw_body_end) { | |
4380 | warn "Did not see </body> in pod2html output\n"; | |
4381 | $success_flag = 0; | |
4382 | } | |
4383 | unless ($saw_index) { | |
4384 | warn "Did not find INDEX END in pod2html output\n"; | |
4385 | $success_flag = 0; | |
4386 | } | |
4387 | ||
4388 | RETURN: | |
4389 | eval { $html_fh->close() }; | |
4390 | ||
4391 | # note that we have to unlink tmpfile before making frames | |
4392 | # because the tmpfile may be one of the names used for frames | |
4393 | unlink $tmpfile if -e $tmpfile; | |
4394 | if ( $success_flag && $rOpts->{'frames'} ) { | |
4395 | $self->make_frame( \@toc ); | |
4396 | } | |
4397 | return $success_flag; | |
4398 | } | |
4399 | ||
4400 | sub make_frame { | |
4401 | ||
4402 | # Make a frame with table of contents in the left panel | |
4403 | # and the text in the right panel. | |
4404 | # On entry: | |
4405 | # $html_filename contains the no-frames html output | |
4406 | # $rtoc is a reference to an array with the table of contents | |
4407 | my $self = shift; | |
4408 | my ($rtoc) = @_; | |
4409 | my $input_file = $self->{_input_file}; | |
4410 | my $html_filename = $self->{_html_file}; | |
4411 | my $toc_filename = $self->{_toc_filename}; | |
4412 | my $src_filename = $self->{_src_filename}; | |
4413 | my $title = $self->{_title}; | |
4414 | $title = escape_html($title); | |
4415 | ||
4416 | # FUTURE input parameter: | |
4417 | my $top_basename = ""; | |
4418 | ||
4419 | # We need to produce 3 html files: | |
4420 | # 1. - the table of contents | |
4421 | # 2. - the contents (source code) itself | |
4422 | # 3. - the frame which contains them | |
4423 | ||
4424 | # get basenames for relative links | |
4425 | my ( $toc_basename, $toc_path ) = fileparse($toc_filename); | |
4426 | my ( $src_basename, $src_path ) = fileparse($src_filename); | |
4427 | ||
4428 | # 1. Make the table of contents panel, with appropriate changes | |
4429 | # to the anchor names | |
4430 | my $src_frame_name = 'SRC'; | |
4431 | my $first_anchor = | |
4432 | write_toc_html( $title, $toc_filename, $src_basename, $rtoc, | |
4433 | $src_frame_name ); | |
4434 | ||
4435 | # 2. The current .html filename is renamed to be the contents panel | |
4436 | rename( $html_filename, $src_filename ) | |
4437 | or die "Cannot rename $html_filename to $src_filename:$!\n"; | |
4438 | ||
4439 | # 3. Then use the original html filename for the frame | |
4440 | write_frame_html( | |
4441 | $title, $html_filename, $top_basename, | |
4442 | $toc_basename, $src_basename, $src_frame_name | |
4443 | ); | |
4444 | } | |
4445 | ||
4446 | sub write_toc_html { | |
4447 | ||
4448 | # write a separate html table of contents file for frames | |
4449 | my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_; | |
4450 | my $fh = IO::File->new( $toc_filename, 'w' ) | |
4451 | or die "Cannot open $toc_filename:$!\n"; | |
4452 | $fh->print(<<EOM); | |
4453 | <html> | |
4454 | <head> | |
4455 | <title>$title</title> | |
4456 | </head> | |
4457 | <body> | |
4458 | <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1> | |
4459 | EOM | |
4460 | ||
4461 | my $first_anchor = | |
4462 | change_anchor_names( $rtoc, $src_basename, "$src_frame_name" ); | |
4463 | $fh->print( join "", @$rtoc ); | |
4464 | ||
4465 | $fh->print(<<EOM); | |
4466 | </body> | |
4467 | </html> | |
4468 | EOM | |
4469 | ||
4470 | } | |
4471 | ||
4472 | sub write_frame_html { | |
4473 | ||
4474 | # write an html file to be the table of contents frame | |
4475 | my ( | |
4476 | $title, $frame_filename, $top_basename, | |
4477 | $toc_basename, $src_basename, $src_frame_name | |
4478 | ) | |
4479 | = @_; | |
4480 | ||
4481 | my $fh = IO::File->new( $frame_filename, 'w' ) | |
4482 | or die "Cannot open $toc_basename:$!\n"; | |
4483 | ||
4484 | $fh->print(<<EOM); | |
4485 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" | |
4486 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> | |
4487 | <?xml version="1.0" encoding="iso-8859-1" ?> | |
4488 | <html xmlns="http://www.w3.org/1999/xhtml"> | |
4489 | <head> | |
4490 | <title>$title</title> | |
4491 | </head> | |
4492 | EOM | |
4493 | ||
4494 | # two left panels, one right, if master index file | |
4495 | if ($top_basename) { | |
4496 | $fh->print(<<EOM); | |
4497 | <frameset cols="20%,80%"> | |
4498 | <frameset rows="30%,70%"> | |
4499 | <frame src = "$top_basename" /> | |
4500 | <frame src = "$toc_basename" /> | |
4501 | </frameset> | |
4502 | EOM | |
4503 | } | |
4504 | ||
4505 | # one left panels, one right, if no master index file | |
4506 | else { | |
4507 | $fh->print(<<EOM); | |
4508 | <frameset cols="20%,*"> | |
4509 | <frame src = "$toc_basename" /> | |
4510 | EOM | |
4511 | } | |
4512 | $fh->print(<<EOM); | |
4513 | <frame src = "$src_basename" name = "$src_frame_name" /> | |
4514 | <noframes> | |
4515 | <body> | |
4516 | <p>If you see this message, you are using a non-frame-capable web client.</p> | |
4517 | <p>This document contains:</p> | |
4518 | <ul> | |
4519 | <li><a href="$toc_basename">A table of contents</a></li> | |
4520 | <li><a href="$src_basename">The source code</a></li> | |
4521 | </ul> | |
4522 | </body> | |
4523 | </noframes> | |
4524 | </frameset> | |
4525 | </html> | |
4526 | EOM | |
4527 | } | |
4528 | ||
4529 | sub change_anchor_names { | |
4530 | ||
4531 | # add a filename and target to anchors | |
4532 | # also return the first anchor | |
4533 | my ( $rlines, $filename, $target ) = @_; | |
4534 | my $first_anchor; | |
4535 | foreach my $line (@$rlines) { | |
4536 | ||
4537 | # We're looking for lines like this: | |
4538 | # <LI><A HREF="#synopsis">SYNOPSIS</A></LI> | |
4539 | # ---- - -------- ----------------- | |
4540 | # $1 $4 $5 | |
4541 | if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) { | |
4542 | my $pre = $1; | |
4543 | my $name = $4; | |
4544 | my $post = $5; | |
4545 | my $href = "$filename#$name"; | |
4546 | $line = "$pre<a href=\"$href\" target=\"$target\">$post\n"; | |
4547 | unless ($first_anchor) { $first_anchor = $href } | |
4548 | } | |
4549 | } | |
4550 | return $first_anchor; | |
4551 | } | |
4552 | ||
4553 | sub close_html_file { | |
4554 | my $self = shift; | |
4555 | return unless $self->{_html_file_opened}; | |
4556 | ||
4557 | my $html_fh = $self->{_html_fh}; | |
4558 | my $rtoc_string = $self->{_rtoc_string}; | |
4559 | ||
4560 | # There are 3 basic paths to html output... | |
4561 | ||
4562 | # --------------------------------- | |
4563 | # Path 1: finish up if in -pre mode | |
4564 | # --------------------------------- | |
4565 | if ( $rOpts->{'html-pre-only'} ) { | |
4566 | $html_fh->print( <<"PRE_END"); | |
4567 | </pre> | |
4568 | PRE_END | |
4569 | eval { $html_fh->close() }; | |
4570 | return; | |
4571 | } | |
4572 | ||
4573 | # Finish the index | |
4574 | $self->add_toc_item( 'EOF', 'EOF' ); | |
4575 | ||
4576 | my $rpre_string_stack = $self->{_rpre_string_stack}; | |
4577 | ||
4578 | # Patch to darken the <pre> background color in case of pod2html and | |
4579 | # interleaved code/documentation. Otherwise, the distinction | |
4580 | # between code and documentation is blurred. | |
4581 | if ( $rOpts->{pod2html} | |
4582 | && $self->{_pod_cut_count} >= 1 | |
4583 | && $rOpts->{'html-color-background'} eq '#FFFFFF' ) | |
4584 | { | |
4585 | $rOpts->{'html-pre-color-background'} = '#F0F0F0'; | |
4586 | } | |
4587 | ||
4588 | # put the css or its link into a string, if used | |
4589 | my $css_string; | |
4590 | my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' ); | |
4591 | ||
4592 | # use css linked to another file | |
4593 | if ( $rOpts->{'html-linked-style-sheet'} ) { | |
4594 | $fh_css->print( | |
4595 | qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />) | |
4596 | ); | |
4597 | } | |
4598 | ||
4599 | # use css embedded in this file | |
4600 | elsif ( !$rOpts->{'nohtml-style-sheets'} ) { | |
4601 | $fh_css->print( <<'ENDCSS'); | |
4602 | <style type="text/css"> | |
4603 | <!-- | |
4604 | ENDCSS | |
4605 | write_style_sheet_data($fh_css); | |
4606 | $fh_css->print( <<"ENDCSS"); | |
4607 | --> | |
4608 | </style> | |
4609 | ENDCSS | |
4610 | } | |
4611 | ||
4612 | # ----------------------------------------------------------- | |
4613 | # path 2: use pod2html if requested | |
4614 | # If we fail for some reason, continue on to path 3 | |
4615 | # ----------------------------------------------------------- | |
4616 | if ( $rOpts->{'pod2html'} ) { | |
4617 | my $rpod_string = $self->{_rpod_string}; | |
4618 | $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string, | |
4619 | $rpre_string_stack ) | |
4620 | && return; | |
4621 | } | |
4622 | ||
4623 | # -------------------------------------------------- | |
4624 | # path 3: write code in html, with pod only in italics | |
4625 | # -------------------------------------------------- | |
4626 | my $input_file = $self->{_input_file}; | |
4627 | my $title = escape_html($input_file); | |
4628 | my $date = localtime; | |
4629 | $html_fh->print( <<"HTML_START"); | |
4630 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" | |
4631 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | |
4632 | <!-- Generated by perltidy on $date --> | |
4633 | <html xmlns="http://www.w3.org/1999/xhtml"> | |
4634 | <head> | |
4635 | <title>$title</title> | |
4636 | HTML_START | |
4637 | ||
4638 | # output the css, if used | |
4639 | if ($css_string) { | |
4640 | $html_fh->print($css_string); | |
4641 | $html_fh->print( <<"ENDCSS"); | |
4642 | </head> | |
4643 | <body> | |
4644 | ENDCSS | |
4645 | } | |
4646 | else { | |
4647 | ||
4648 | $html_fh->print( <<"HTML_START"); | |
4649 | </head> | |
4650 | <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\"> | |
4651 | HTML_START | |
4652 | } | |
4653 | ||
4654 | $html_fh->print("<a name=\"-top-\"></a>\n"); | |
4655 | $html_fh->print( <<"EOM"); | |
4656 | <h1>$title</h1> | |
4657 | EOM | |
4658 | ||
4659 | # copy the table of contents | |
4660 | if ( $$rtoc_string | |
4661 | && !$rOpts->{'frames'} | |
4662 | && $rOpts->{'html-table-of-contents'} ) | |
4663 | { | |
4664 | $html_fh->print($$rtoc_string); | |
4665 | } | |
4666 | ||
4667 | # copy the pre section(s) | |
4668 | my $fname_comment = $input_file; | |
4669 | $fname_comment =~ s/--+/-/g; # protect HTML comment tags | |
4670 | $html_fh->print( <<"END_PRE"); | |
4671 | <hr /> | |
4672 | <!-- contents of filename: $fname_comment --> | |
4673 | <pre> | |
4674 | END_PRE | |
4675 | ||
4676 | foreach my $rpre_string (@$rpre_string_stack) { | |
4677 | $html_fh->print($$rpre_string); | |
4678 | } | |
4679 | ||
4680 | # and finish the html page | |
4681 | $html_fh->print( <<"HTML_END"); | |
4682 | </pre> | |
4683 | </body> | |
4684 | </html> | |
4685 | HTML_END | |
4686 | eval { $html_fh->close() }; # could be object without close method | |
4687 | ||
4688 | if ( $rOpts->{'frames'} ) { | |
4689 | my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string; | |
4690 | $self->make_frame( \@toc ); | |
4691 | } | |
4692 | } | |
4693 | ||
4694 | sub markup_tokens { | |
4695 | my $self = shift; | |
4696 | my ( $rtokens, $rtoken_type, $rlevels ) = @_; | |
4697 | my ( @colored_tokens, $j, $string, $type, $token, $level ); | |
4698 | my $rlast_level = $self->{_rlast_level}; | |
4699 | my $rpackage_stack = $self->{_rpackage_stack}; | |
4700 | ||
4701 | for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { | |
4702 | $type = $$rtoken_type[$j]; | |
4703 | $token = $$rtokens[$j]; | |
4704 | $level = $$rlevels[$j]; | |
4705 | $level = 0 if ( $level < 0 ); | |
4706 | ||
4707 | #------------------------------------------------------- | |
4708 | # Update the package stack. The package stack is needed to keep | |
4709 | # the toc correct because some packages may be declared within | |
4710 | # blocks and go out of scope when we leave the block. | |
4711 | #------------------------------------------------------- | |
4712 | if ( $level > $$rlast_level ) { | |
4713 | unless ( $rpackage_stack->[ $level - 1 ] ) { | |
4714 | $rpackage_stack->[ $level - 1 ] = 'main'; | |
4715 | } | |
4716 | $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ]; | |
4717 | } | |
4718 | elsif ( $level < $$rlast_level ) { | |
4719 | my $package = $rpackage_stack->[$level]; | |
4720 | unless ($package) { $package = 'main' } | |
4721 | ||
4722 | # if we change packages due to a nesting change, we | |
4723 | # have to make an entry in the toc | |
4724 | if ( $package ne $rpackage_stack->[ $level + 1 ] ) { | |
4725 | $self->add_toc_item( $package, 'package' ); | |
4726 | } | |
4727 | } | |
4728 | $$rlast_level = $level; | |
4729 | ||
4730 | #------------------------------------------------------- | |
4731 | # Intercept a sub name here; split it | |
4732 | # into keyword 'sub' and sub name; and add an | |
4733 | # entry in the toc | |
4734 | #------------------------------------------------------- | |
4735 | if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) { | |
4736 | $token = $self->markup_html_element( $1, 'k' ); | |
4737 | push @colored_tokens, $token; | |
4738 | $token = $2; | |
4739 | $type = 'M'; | |
4740 | ||
4741 | # but don't include sub declarations in the toc; | |
4742 | # these wlll have leading token types 'i;' | |
4743 | my $signature = join "", @$rtoken_type; | |
4744 | unless ( $signature =~ /^i;/ ) { | |
4745 | my $subname = $token; | |
4746 | $subname =~ s/[\s\(].*$//; # remove any attributes and prototype | |
4747 | $self->add_toc_item( $subname, 'sub' ); | |
4748 | } | |
4749 | } | |
4750 | ||
4751 | #------------------------------------------------------- | |
4752 | # Intercept a package name here; split it | |
4753 | # into keyword 'package' and name; add to the toc, | |
4754 | # and update the package stack | |
4755 | #------------------------------------------------------- | |
4756 | if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) { | |
4757 | $token = $self->markup_html_element( $1, 'k' ); | |
4758 | push @colored_tokens, $token; | |
4759 | $token = $2; | |
4760 | $type = 'i'; | |
4761 | $self->add_toc_item( "$token", 'package' ); | |
4762 | $rpackage_stack->[$level] = $token; | |
4763 | } | |
4764 | ||
4765 | $token = $self->markup_html_element( $token, $type ); | |
4766 | push @colored_tokens, $token; | |
4767 | } | |
4768 | return ( \@colored_tokens ); | |
4769 | } | |
4770 | ||
4771 | sub markup_html_element { | |
4772 | my $self = shift; | |
4773 | my ( $token, $type ) = @_; | |
4774 | ||
4775 | return $token if ( $type eq 'b' ); # skip a blank token | |
4776 | return $token if ( $token =~ /^\s*$/ ); # skip a blank line | |
4777 | $token = escape_html($token); | |
4778 | ||
4779 | # get the short abbreviation for this token type | |
4780 | my $short_name = $token_short_names{$type}; | |
4781 | if ( !defined($short_name) ) { | |
4782 | $short_name = "pu"; # punctuation is default | |
4783 | } | |
4784 | ||
4785 | # handle style sheets.. | |
4786 | if ( !$rOpts->{'nohtml-style-sheets'} ) { | |
4787 | if ( $short_name ne 'pu' ) { | |
4788 | $token = qq(<span class="$short_name">) . $token . "</span>"; | |
4789 | } | |
4790 | } | |
4791 | ||
4792 | # handle no style sheets.. | |
4793 | else { | |
4794 | my $color = $html_color{$short_name}; | |
4795 | ||
4796 | if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) { | |
4797 | $token = qq(<font color="$color">) . $token . "</font>"; | |
4798 | } | |
4799 | if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" } | |
4800 | if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" } | |
4801 | } | |
4802 | return $token; | |
4803 | } | |
4804 | ||
4805 | sub escape_html { | |
4806 | ||
4807 | my $token = shift; | |
4808 | if ($missing_html_entities) { | |
4809 | $token =~ s/\&/&/g; | |
4810 | $token =~ s/\</</g; | |
4811 | $token =~ s/\>/>/g; | |
4812 | $token =~ s/\"/"/g; | |
4813 | } | |
4814 | else { | |
4815 | HTML::Entities::encode_entities($token); | |
4816 | } | |
4817 | return $token; | |
4818 | } | |
4819 | ||
4820 | sub finish_formatting { | |
4821 | ||
4822 | # called after last line | |
4823 | my $self = shift; | |
4824 | $self->close_html_file(); | |
4825 | return; | |
4826 | } | |
4827 | ||
4828 | sub write_line { | |
4829 | ||
4830 | my $self = shift; | |
4831 | return unless $self->{_html_file_opened}; | |
4832 | my $html_pre_fh = $self->{_html_pre_fh}; | |
4833 | my ($line_of_tokens) = @_; | |
4834 | my $line_type = $line_of_tokens->{_line_type}; | |
4835 | my $input_line = $line_of_tokens->{_line_text}; | |
4836 | my $line_number = $line_of_tokens->{_line_number}; | |
4837 | chomp $input_line; | |
4838 | ||
4839 | # markup line of code.. | |
4840 | my $html_line; | |
4841 | if ( $line_type eq 'CODE' ) { | |
4842 | my $rtoken_type = $line_of_tokens->{_rtoken_type}; | |
4843 | my $rtokens = $line_of_tokens->{_rtokens}; | |
4844 | my $rlevels = $line_of_tokens->{_rlevels}; | |
4845 | ||
4846 | if ( $input_line =~ /(^\s*)/ ) { | |
4847 | $html_line = $1; | |
4848 | } | |
4849 | else { | |
4850 | $html_line = ""; | |
4851 | } | |
4852 | my ($rcolored_tokens) = | |
4853 | $self->markup_tokens( $rtokens, $rtoken_type, $rlevels ); | |
4854 | $html_line .= join '', @$rcolored_tokens; | |
4855 | } | |
4856 | ||
4857 | # markup line of non-code.. | |
4858 | else { | |
4859 | my $line_character; | |
4860 | if ( $line_type eq 'HERE' ) { $line_character = 'H' } | |
4861 | elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' } | |
4862 | elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' } | |
4863 | elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' } | |
4864 | elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' } | |
4865 | elsif ( $line_type eq 'END_START' ) { | |
4866 | $line_character = 'k'; | |
4867 | $self->add_toc_item( '__END__', '__END__' ); | |
4868 | } | |
4869 | elsif ( $line_type eq 'DATA_START' ) { | |
4870 | $line_character = 'k'; | |
4871 | $self->add_toc_item( '__DATA__', '__DATA__' ); | |
4872 | } | |
4873 | elsif ( $line_type =~ /^POD/ ) { | |
4874 | $line_character = 'P'; | |
4875 | if ( $rOpts->{'pod2html'} ) { | |
4876 | my $html_pod_fh = $self->{_html_pod_fh}; | |
4877 | if ( $line_type eq 'POD_START' ) { | |
4878 | ||
4879 | my $rpre_string_stack = $self->{_rpre_string_stack}; | |
4880 | my $rpre_string = $rpre_string_stack->[-1]; | |
4881 | ||
4882 | # if we have written any non-blank lines to the | |
4883 | # current pre section, start writing to a new output | |
4884 | # string | |
4885 | if ( $$rpre_string =~ /\S/ ) { | |
4886 | my $pre_string; | |
4887 | $html_pre_fh = | |
4888 | Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); | |
4889 | $self->{_html_pre_fh} = $html_pre_fh; | |
4890 | push @$rpre_string_stack, \$pre_string; | |
4891 | ||
4892 | # leave a marker in the pod stream so we know | |
4893 | # where to put the pre section we just | |
4894 | # finished. | |
4895 | my $for_html = '=for html'; # don't confuse pod utils | |
4896 | $html_pod_fh->print(<<EOM); | |
4897 | ||
4898 | $for_html | |
4899 | <!-- pERLTIDY sECTION --> | |
4900 | ||
4901 | EOM | |
4902 | } | |
4903 | ||
4904 | # otherwise, just clear the current string and start | |
4905 | # over | |
4906 | else { | |
4907 | $$rpre_string = ""; | |
4908 | $html_pod_fh->print("\n"); | |
4909 | } | |
4910 | } | |
4911 | $html_pod_fh->print( $input_line . "\n" ); | |
4912 | if ( $line_type eq 'POD_END' ) { | |
4913 | $self->{_pod_cut_count}++; | |
4914 | $html_pod_fh->print("\n"); | |
4915 | } | |
4916 | return; | |
4917 | } | |
4918 | } | |
4919 | else { $line_character = 'Q' } | |
4920 | $html_line = $self->markup_html_element( $input_line, $line_character ); | |
4921 | } | |
4922 | ||
4923 | # add the line number if requested | |
4924 | if ( $rOpts->{'html-line-numbers'} ) { | |
4925 | my $extra_space .= | |
4926 | ( $line_number < 10 ) ? " " | |
4927 | : ( $line_number < 100 ) ? " " | |
4928 | : ( $line_number < 1000 ) ? " " | |
4929 | : ""; | |
4930 | $html_line = $extra_space . $line_number . " " . $html_line; | |
4931 | } | |
4932 | ||
4933 | # write the line | |
4934 | $html_pre_fh->print("$html_line\n"); | |
4935 | } | |
4936 | ||
4937 | ##################################################################### | |
4938 | # | |
4939 | # The Perl::Tidy::Formatter package adds indentation, whitespace, and | |
4940 | # line breaks to the token stream | |
4941 | # | |
4942 | # WARNING: This is not a real class for speed reasons. Only one | |
4943 | # Formatter may be used. | |
4944 | # | |
4945 | ##################################################################### | |
4946 | ||
4947 | package Perl::Tidy::Formatter; | |
4948 | ||
4949 | BEGIN { | |
4950 | ||
4951 | # Caution: these debug flags produce a lot of output | |
4952 | # They should all be 0 except when debugging small scripts | |
4953 | use constant FORMATTER_DEBUG_FLAG_BOND => 0; | |
4954 | use constant FORMATTER_DEBUG_FLAG_BREAK => 0; | |
4955 | use constant FORMATTER_DEBUG_FLAG_CI => 0; | |
4956 | use constant FORMATTER_DEBUG_FLAG_FLUSH => 0; | |
4957 | use constant FORMATTER_DEBUG_FLAG_FORCE => 0; | |
4958 | use constant FORMATTER_DEBUG_FLAG_LIST => 0; | |
4959 | use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0; | |
4960 | use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0; | |
4961 | use constant FORMATTER_DEBUG_FLAG_SPARSE => 0; | |
4962 | use constant FORMATTER_DEBUG_FLAG_STORE => 0; | |
4963 | use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0; | |
4964 | use constant FORMATTER_DEBUG_FLAG_WHITE => 0; | |
4965 | ||
4966 | my $debug_warning = sub { | |
4967 | print "FORMATTER_DEBUGGING with key $_[0]\n"; | |
4968 | }; | |
4969 | ||
4970 | FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND'); | |
4971 | FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK'); | |
4972 | FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI'); | |
4973 | FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH'); | |
4974 | FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE'); | |
4975 | FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST'); | |
4976 | FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK'); | |
4977 | FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT'); | |
4978 | FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE'); | |
4979 | FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE'); | |
4980 | FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP'); | |
4981 | FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE'); | |
4982 | } | |
4983 | ||
4984 | use Carp; | |
4985 | use vars qw{ | |
4986 | ||
4987 | @gnu_stack | |
4988 | $max_gnu_stack_index | |
4989 | $gnu_position_predictor | |
4990 | $line_start_index_to_go | |
4991 | $last_indentation_written | |
4992 | $last_unadjusted_indentation | |
4993 | $last_leading_token | |
4994 | ||
4995 | $saw_VERSION_in_this_file | |
4996 | $saw_END_or_DATA_ | |
4997 | ||
4998 | @gnu_item_list | |
4999 | $max_gnu_item_index | |
5000 | $gnu_sequence_number | |
5001 | $last_output_indentation | |
5002 | %last_gnu_equals | |
5003 | %gnu_comma_count | |
5004 | %gnu_arrow_count | |
5005 | ||
5006 | @block_type_to_go | |
5007 | @type_sequence_to_go | |
5008 | @container_environment_to_go | |
5009 | @bond_strength_to_go | |
5010 | @forced_breakpoint_to_go | |
5011 | @lengths_to_go | |
5012 | @levels_to_go | |
5013 | @leading_spaces_to_go | |
5014 | @reduced_spaces_to_go | |
5015 | @matching_token_to_go | |
5016 | @mate_index_to_go | |
5017 | @nesting_blocks_to_go | |
5018 | @ci_levels_to_go | |
5019 | @nesting_depth_to_go | |
5020 | @nobreak_to_go | |
5021 | @old_breakpoint_to_go | |
5022 | @tokens_to_go | |
5023 | @types_to_go | |
5024 | ||
5025 | %saved_opening_indentation | |
5026 | ||
5027 | $max_index_to_go | |
5028 | $comma_count_in_batch | |
5029 | $old_line_count_in_batch | |
5030 | $last_nonblank_index_to_go | |
5031 | $last_nonblank_type_to_go | |
5032 | $last_nonblank_token_to_go | |
5033 | $last_last_nonblank_index_to_go | |
5034 | $last_last_nonblank_type_to_go | |
5035 | $last_last_nonblank_token_to_go | |
5036 | @nonblank_lines_at_depth | |
5037 | $starting_in_quote | |
5038 | ||
5039 | $forced_breakpoint_count | |
5040 | $forced_breakpoint_undo_count | |
5041 | @forced_breakpoint_undo_stack | |
5042 | %postponed_breakpoint | |
5043 | ||
5044 | $tabbing | |
5045 | $embedded_tab_count | |
5046 | $first_embedded_tab_at | |
5047 | $last_embedded_tab_at | |
5048 | $deleted_semicolon_count | |
5049 | $first_deleted_semicolon_at | |
5050 | $last_deleted_semicolon_at | |
5051 | $added_semicolon_count | |
5052 | $first_added_semicolon_at | |
5053 | $last_added_semicolon_at | |
5054 | $saw_negative_indentation | |
5055 | $first_tabbing_disagreement | |
5056 | $last_tabbing_disagreement | |
5057 | $in_tabbing_disagreement | |
5058 | $tabbing_disagreement_count | |
5059 | $input_line_tabbing | |
5060 | ||
5061 | $last_line_type | |
5062 | $last_line_leading_type | |
5063 | $last_line_leading_level | |
5064 | $last_last_line_leading_level | |
5065 | ||
5066 | %block_leading_text | |
5067 | %block_opening_line_number | |
5068 | $csc_new_statement_ok | |
5069 | $accumulating_text_for_block | |
5070 | $leading_block_text | |
5071 | $rleading_block_if_elsif_text | |
5072 | $leading_block_text_level | |
5073 | $leading_block_text_length_exceeded | |
5074 | $leading_block_text_line_length | |
5075 | $leading_block_text_line_number | |
5076 | $closing_side_comment_prefix_pattern | |
5077 | $closing_side_comment_list_pattern | |
5078 | ||
5079 | $last_nonblank_token | |
5080 | $last_nonblank_type | |
5081 | $last_last_nonblank_token | |
5082 | $last_last_nonblank_type | |
5083 | $last_nonblank_block_type | |
5084 | $last_output_level | |
5085 | %is_do_follower | |
5086 | %is_if_brace_follower | |
5087 | %space_after_keyword | |
5088 | $rbrace_follower | |
5089 | $looking_for_else | |
5090 | %is_last_next_redo_return | |
5091 | %is_other_brace_follower | |
5092 | %is_else_brace_follower | |
5093 | %is_anon_sub_brace_follower | |
5094 | %is_anon_sub_1_brace_follower | |
5095 | %is_sort_map_grep | |
5096 | %is_sort_map_grep_eval | |
5097 | %is_sort_map_grep_eval_do | |
5098 | %is_block_without_semicolon | |
5099 | %is_if_unless | |
5100 | %is_and_or | |
5101 | %is_assignment | |
5102 | %is_chain_operator | |
5103 | %is_if_unless_and_or_last_next_redo_return | |
5104 | ||
5105 | @has_broken_sublist | |
5106 | @dont_align | |
5107 | @want_comma_break | |
5108 | ||
5109 | $index_start_one_line_block | |
5110 | $semicolons_before_block_self_destruct | |
5111 | $index_max_forced_break | |
5112 | $input_line_number | |
5113 | $diagnostics_object | |
5114 | $vertical_aligner_object | |
5115 | $logger_object | |
5116 | $file_writer_object | |
5117 | $formatter_self | |
5118 | @ci_stack | |
5119 | $last_line_had_side_comment | |
5120 | %want_break_before | |
5121 | %outdent_keyword | |
5122 | $static_block_comment_pattern | |
5123 | $static_side_comment_pattern | |
5124 | %opening_vertical_tightness | |
5125 | %closing_vertical_tightness | |
5126 | %closing_token_indentation | |
5127 | $block_brace_vertical_tightness_pattern | |
5128 | ||
5129 | $rOpts_add_newlines | |
5130 | $rOpts_add_whitespace | |
5131 | $rOpts_block_brace_tightness | |
5132 | $rOpts_block_brace_vertical_tightness | |
5133 | $rOpts_brace_left_and_indent | |
5134 | $rOpts_comma_arrow_breakpoints | |
5135 | $rOpts_break_at_old_keyword_breakpoints | |
5136 | $rOpts_break_at_old_comma_breakpoints | |
5137 | $rOpts_break_at_old_logical_breakpoints | |
5138 | $rOpts_break_at_old_trinary_breakpoints | |
5139 | $rOpts_closing_side_comment_else_flag | |
5140 | $rOpts_closing_side_comment_maximum_text | |
5141 | $rOpts_continuation_indentation | |
5142 | $rOpts_cuddled_else | |
5143 | $rOpts_delete_old_whitespace | |
5144 | $rOpts_fuzzy_line_length | |
5145 | $rOpts_indent_columns | |
5146 | $rOpts_line_up_parentheses | |
5147 | $rOpts_maximum_fields_per_table | |
5148 | $rOpts_maximum_line_length | |
5149 | $rOpts_short_concatenation_item_length | |
5150 | $rOpts_swallow_optional_blank_lines | |
5151 | $rOpts_ignore_old_line_breaks | |
5152 | ||
5153 | $half_maximum_line_length | |
5154 | ||
5155 | %is_opening_type | |
5156 | %is_closing_type | |
5157 | %is_keyword_returning_list | |
5158 | %tightness | |
5159 | %matching_token | |
5160 | $rOpts | |
5161 | %right_bond_strength | |
5162 | %left_bond_strength | |
5163 | %binary_ws_rules | |
5164 | %want_left_space | |
5165 | %want_right_space | |
5166 | %is_digraph | |
5167 | %is_trigraph | |
5168 | $bli_pattern | |
5169 | $bli_list_string | |
5170 | %is_closing_type | |
5171 | %is_opening_type | |
5172 | %is_closing_token | |
5173 | %is_opening_token | |
5174 | }; | |
5175 | ||
5176 | BEGIN { | |
5177 | ||
5178 | # default list of block types for which -bli would apply | |
5179 | $bli_list_string = 'if else elsif unless while for foreach do : sub'; | |
5180 | ||
5181 | @_ = qw( | |
5182 | .. :: << >> ** && .. || -> => += -= .= %= &= |= ^= *= <> | |
5183 | <= >= == =~ !~ != ++ -- /= x= | |
5184 | ); | |
5185 | @is_digraph{@_} = (1) x scalar(@_); | |
5186 | ||
5187 | @_ = qw( ... **= <<= >>= &&= ||= <=> ); | |
5188 | @is_trigraph{@_} = (1) x scalar(@_); | |
5189 | ||
5190 | @_ = qw( | |
5191 | = **= += *= &= <<= &&= | |
5192 | -= /= |= >>= ||= | |
5193 | .= %= ^= | |
5194 | x= | |
5195 | ); | |
5196 | @is_assignment{@_} = (1) x scalar(@_); | |
5197 | ||
5198 | @_ = qw( | |
5199 | grep | |
5200 | keys | |
5201 | map | |
5202 | reverse | |
5203 | sort | |
5204 | split | |
5205 | ); | |
5206 | @is_keyword_returning_list{@_} = (1) x scalar(@_); | |
5207 | ||
5208 | @_ = qw(is if unless and or last next redo return); | |
5209 | @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_); | |
5210 | ||
5211 | @_ = qw(last next redo return); | |
5212 | @is_last_next_redo_return{@_} = (1) x scalar(@_); | |
5213 | ||
5214 | @_ = qw(sort map grep); | |
5215 | @is_sort_map_grep{@_} = (1) x scalar(@_); | |
5216 | ||
5217 | @_ = qw(sort map grep eval); | |
5218 | @is_sort_map_grep_eval{@_} = (1) x scalar(@_); | |
5219 | ||
5220 | @_ = qw(sort map grep eval do); | |
5221 | @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_); | |
5222 | ||
5223 | @_ = qw(if unless); | |
5224 | @is_if_unless{@_} = (1) x scalar(@_); | |
5225 | ||
5226 | @_ = qw(and or); | |
5227 | @is_and_or{@_} = (1) x scalar(@_); | |
5228 | ||
5229 | # We can remove semicolons after blocks preceded by these keywords | |
5230 | @_ = qw(BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else | |
5231 | unless while until for foreach); | |
5232 | @is_block_without_semicolon{@_} = (1) x scalar(@_); | |
5233 | ||
5234 | # 'L' is token for opening { at hash key | |
5235 | @_ = qw" L { ( [ "; | |
5236 | @is_opening_type{@_} = (1) x scalar(@_); | |
5237 | ||
5238 | # 'R' is token for closing } at hash key | |
5239 | @_ = qw" R } ) ] "; | |
5240 | @is_closing_type{@_} = (1) x scalar(@_); | |
5241 | ||
5242 | @_ = qw" { ( [ "; | |
5243 | @is_opening_token{@_} = (1) x scalar(@_); | |
5244 | ||
5245 | @_ = qw" } ) ] "; | |
5246 | @is_closing_token{@_} = (1) x scalar(@_); | |
5247 | } | |
5248 | ||
5249 | # whitespace codes | |
5250 | use constant WS_YES => 1; | |
5251 | use constant WS_OPTIONAL => 0; | |
5252 | use constant WS_NO => -1; | |
5253 | ||
5254 | # Token bond strengths. | |
5255 | use constant NO_BREAK => 10000; | |
5256 | use constant VERY_STRONG => 100; | |
5257 | use constant STRONG => 2.1; | |
5258 | use constant NOMINAL => 1.1; | |
5259 | use constant WEAK => 0.8; | |
5260 | use constant VERY_WEAK => 0.55; | |
5261 | ||
5262 | # values for testing indexes in output array | |
5263 | use constant UNDEFINED_INDEX => -1; | |
5264 | ||
5265 | # Maximum number of little messages; probably need not be changed. | |
5266 | use constant MAX_NAG_MESSAGES => 6; | |
5267 | ||
5268 | # increment between sequence numbers for each type | |
5269 | # For example, ?: pairs might have numbers 7,11,15,... | |
5270 | use constant TYPE_SEQUENCE_INCREMENT => 4; | |
5271 | ||
5272 | { | |
5273 | ||
5274 | # methods to count instances | |
5275 | my $_count = 0; | |
5276 | sub get_count { $_count; } | |
5277 | sub _increment_count { ++$_count } | |
5278 | sub _decrement_count { --$_count } | |
5279 | } | |
5280 | ||
5281 | # interface to Perl::Tidy::Logger routines | |
5282 | sub warning { | |
5283 | if ($logger_object) { | |
5284 | $logger_object->warning(@_); | |
5285 | } | |
5286 | } | |
5287 | ||
5288 | sub complain { | |
5289 | if ($logger_object) { | |
5290 | $logger_object->complain(@_); | |
5291 | } | |
5292 | } | |
5293 | ||
5294 | sub write_logfile_entry { | |
5295 | if ($logger_object) { | |
5296 | $logger_object->write_logfile_entry(@_); | |
5297 | } | |
5298 | } | |
5299 | ||
5300 | sub black_box { | |
5301 | if ($logger_object) { | |
5302 | $logger_object->black_box(@_); | |
5303 | } | |
5304 | } | |
5305 | ||
5306 | sub report_definite_bug { | |
5307 | if ($logger_object) { | |
5308 | $logger_object->report_definite_bug(); | |
5309 | } | |
5310 | } | |
5311 | ||
5312 | sub get_saw_brace_error { | |
5313 | if ($logger_object) { | |
5314 | $logger_object->get_saw_brace_error(); | |
5315 | } | |
5316 | } | |
5317 | ||
5318 | sub we_are_at_the_last_line { | |
5319 | if ($logger_object) { | |
5320 | $logger_object->we_are_at_the_last_line(); | |
5321 | } | |
5322 | } | |
5323 | ||
5324 | # interface to Perl::Tidy::Diagnostics routine | |
5325 | sub write_diagnostics { | |
5326 | ||
5327 | if ($diagnostics_object) { | |
5328 | $diagnostics_object->write_diagnostics(@_); | |
5329 | } | |
5330 | } | |
5331 | ||
5332 | sub get_added_semicolon_count { | |
5333 | my $self = shift; | |
5334 | return $added_semicolon_count; | |
5335 | } | |
5336 | ||
5337 | sub DESTROY { | |
5338 | $_[0]->_decrement_count(); | |
5339 | } | |
5340 | ||
5341 | sub new { | |
5342 | ||
5343 | my $class = shift; | |
5344 | ||
5345 | # we are given an object with a write_line() method to take lines | |
5346 | my %defaults = ( | |
5347 | sink_object => undef, | |
5348 | diagnostics_object => undef, | |
5349 | logger_object => undef, | |
5350 | ); | |
5351 | my %args = ( %defaults, @_ ); | |
5352 | ||
5353 | $logger_object = $args{logger_object}; | |
5354 | $diagnostics_object = $args{diagnostics_object}; | |
5355 | ||
5356 | # we create another object with a get_line() and peek_ahead() method | |
5357 | my $sink_object = $args{sink_object}; | |
5358 | $file_writer_object = | |
5359 | Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object ); | |
5360 | ||
5361 | # initialize the leading whitespace stack to negative levels | |
5362 | # so that we can never run off the end of the stack | |
5363 | $gnu_position_predictor = 0; # where the current token is predicted to be | |
5364 | $max_gnu_stack_index = 0; | |
5365 | $max_gnu_item_index = -1; | |
5366 | $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 ); | |
5367 | @gnu_item_list = (); | |
5368 | $last_output_indentation = 0; | |
5369 | $last_indentation_written = 0; | |
5370 | $last_unadjusted_indentation = 0; | |
5371 | $last_leading_token = ""; | |
5372 | ||
5373 | $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'}; | |
5374 | $saw_END_or_DATA_ = 0; | |
5375 | ||
5376 | @block_type_to_go = (); | |
5377 | @type_sequence_to_go = (); | |
5378 | @container_environment_to_go = (); | |
5379 | @bond_strength_to_go = (); | |
5380 | @forced_breakpoint_to_go = (); | |
5381 | @lengths_to_go = (); # line length to start of ith token | |
5382 | @levels_to_go = (); | |
5383 | @matching_token_to_go = (); | |
5384 | @mate_index_to_go = (); | |
5385 | @nesting_blocks_to_go = (); | |
5386 | @ci_levels_to_go = (); | |
5387 | @nesting_depth_to_go = (0); | |
5388 | @nobreak_to_go = (); | |
5389 | @old_breakpoint_to_go = (); | |
5390 | @tokens_to_go = (); | |
5391 | @types_to_go = (); | |
5392 | @leading_spaces_to_go = (); | |
5393 | @reduced_spaces_to_go = (); | |
5394 | ||
5395 | @dont_align = (); | |
5396 | @has_broken_sublist = (); | |
5397 | @want_comma_break = (); | |
5398 | ||
5399 | @ci_stack = (""); | |
5400 | $saw_negative_indentation = 0; | |
5401 | $first_tabbing_disagreement = 0; | |
5402 | $last_tabbing_disagreement = 0; | |
5403 | $tabbing_disagreement_count = 0; | |
5404 | $in_tabbing_disagreement = 0; | |
5405 | $input_line_tabbing = undef; | |
5406 | ||
5407 | $last_line_type = ""; | |
5408 | $last_last_line_leading_level = 0; | |
5409 | $last_line_leading_level = 0; | |
5410 | $last_line_leading_type = '#'; | |
5411 | ||
5412 | $last_nonblank_token = ';'; | |
5413 | $last_nonblank_type = ';'; | |
5414 | $last_last_nonblank_token = ';'; | |
5415 | $last_last_nonblank_type = ';'; | |
5416 | $last_nonblank_block_type = ""; | |
5417 | $last_output_level = 0; | |
5418 | $looking_for_else = 0; | |
5419 | $embedded_tab_count = 0; | |
5420 | $first_embedded_tab_at = 0; | |
5421 | $last_embedded_tab_at = 0; | |
5422 | $deleted_semicolon_count = 0; | |
5423 | $first_deleted_semicolon_at = 0; | |
5424 | $last_deleted_semicolon_at = 0; | |
5425 | $added_semicolon_count = 0; | |
5426 | $first_added_semicolon_at = 0; | |
5427 | $last_added_semicolon_at = 0; | |
5428 | $last_line_had_side_comment = 0; | |
5429 | %postponed_breakpoint = (); | |
5430 | ||
5431 | # variables for adding side comments | |
5432 | %block_leading_text = (); | |
5433 | %block_opening_line_number = (); | |
5434 | $csc_new_statement_ok = 1; | |
5435 | ||
5436 | %saved_opening_indentation = (); | |
5437 | ||
5438 | reset_block_text_accumulator(); | |
5439 | ||
5440 | prepare_for_new_input_lines(); | |
5441 | ||
5442 | $vertical_aligner_object = | |
5443 | Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object, | |
5444 | $logger_object, $diagnostics_object ); | |
5445 | ||
5446 | if ( $rOpts->{'entab-leading-whitespace'} ) { | |
5447 | write_logfile_entry( | |
5448 | "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n" | |
5449 | ); | |
5450 | } | |
5451 | elsif ( $rOpts->{'tabs'} ) { | |
5452 | write_logfile_entry("Indentation will be with a tab character\n"); | |
5453 | } | |
5454 | else { | |
5455 | write_logfile_entry( | |
5456 | "Indentation will be with $rOpts->{'indent-columns'} spaces\n"); | |
5457 | } | |
5458 | ||
5459 | # This was the start of a formatter referent, but object-oriented | |
5460 | # coding has turned out to be too slow here. | |
5461 | $formatter_self = {}; | |
5462 | ||
5463 | bless $formatter_self, $class; | |
5464 | ||
5465 | # Safety check..this is not a class yet | |
5466 | if ( _increment_count() > 1 ) { | |
5467 | confess | |
5468 | "Attempt to create more than 1 object in $class, which is not a true class yet\n"; | |
5469 | } | |
5470 | return $formatter_self; | |
5471 | } | |
5472 | ||
5473 | sub prepare_for_new_input_lines { | |
5474 | ||
5475 | $gnu_sequence_number++; # increment output batch counter | |
5476 | %last_gnu_equals = (); | |
5477 | %gnu_comma_count = (); | |
5478 | %gnu_arrow_count = (); | |
5479 | $line_start_index_to_go = 0; | |
5480 | $max_gnu_item_index = UNDEFINED_INDEX; | |
5481 | $index_max_forced_break = UNDEFINED_INDEX; | |
5482 | $max_index_to_go = UNDEFINED_INDEX; | |
5483 | $last_nonblank_index_to_go = UNDEFINED_INDEX; | |
5484 | $last_nonblank_type_to_go = ''; | |
5485 | $last_nonblank_token_to_go = ''; | |
5486 | $last_last_nonblank_index_to_go = UNDEFINED_INDEX; | |
5487 | $last_last_nonblank_type_to_go = ''; | |
5488 | $last_last_nonblank_token_to_go = ''; | |
5489 | $forced_breakpoint_count = 0; | |
5490 | $forced_breakpoint_undo_count = 0; | |
5491 | $rbrace_follower = undef; | |
5492 | $lengths_to_go[0] = 0; | |
5493 | $old_line_count_in_batch = 1; | |
5494 | $comma_count_in_batch = 0; | |
5495 | $starting_in_quote = 0; | |
5496 | ||
5497 | destroy_one_line_block(); | |
5498 | } | |
5499 | ||
5500 | sub write_line { | |
5501 | ||
5502 | my $self = shift; | |
5503 | my ($line_of_tokens) = @_; | |
5504 | ||
5505 | my $line_type = $line_of_tokens->{_line_type}; | |
5506 | my $input_line = $line_of_tokens->{_line_text}; | |
5507 | ||
5508 | my $want_blank_line_next = 0; | |
5509 | ||
5510 | # _line_type codes are: | |
5511 | # SYSTEM - system-specific code before hash-bang line | |
5512 | # CODE - line of perl code (including comments) | |
5513 | # POD_START - line starting pod, such as '=head' | |
5514 | # POD - pod documentation text | |
5515 | # POD_END - last line of pod section, '=cut' | |
5516 | # HERE - text of here-document | |
5517 | # HERE_END - last line of here-doc (target word) | |
5518 | # FORMAT - format section | |
5519 | # FORMAT_END - last line of format section, '.' | |
5520 | # DATA_START - __DATA__ line | |
5521 | # DATA - unidentified text following __DATA__ | |
5522 | # END_START - __END__ line | |
5523 | # END - unidentified text following __END__ | |
5524 | # ERROR - we are in big trouble, probably not a perl script | |
5525 | # | |
5526 | # handle line of code.. | |
5527 | if ( $line_type eq 'CODE' ) { | |
5528 | ||
5529 | # let logger see all non-blank lines of code | |
5530 | if ( $input_line !~ /^\s*$/ ) { | |
5531 | my $output_line_number = | |
5532 | $vertical_aligner_object->get_output_line_number(); | |
5533 | black_box( $line_of_tokens, $output_line_number ); | |
5534 | } | |
5535 | print_line_of_tokens($line_of_tokens); | |
5536 | } | |
5537 | ||
5538 | # handle line of non-code.. | |
5539 | else { | |
5540 | ||
5541 | # set special flags | |
5542 | my $skip_line = 0; | |
5543 | my $tee_line = 0; | |
5544 | if ( $line_type =~ /^POD/ ) { | |
5545 | ||
5546 | # Pod docs should have a preceding blank line. But be | |
5547 | # very careful in __END__ and __DATA__ sections, because: | |
5548 | # 1. the user may be using this section for any purpose whatsoever | |
5549 | # 2. the blank counters are not active there | |
5550 | # It should be safe to request a blank line between an | |
5551 | # __END__ or __DATA__ and an immediately following '=head' | |
5552 | # type line, (types END_START and DATA_START), but not for | |
5553 | # any other lines of type END or DATA. | |
5554 | if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } | |
5555 | if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } | |
5556 | if ( !$skip_line | |
5557 | && $line_type eq 'POD_START' | |
5558 | && $last_line_type !~ /^(END|DATA)$/ ) | |
5559 | { | |
5560 | want_blank_line(); | |
5561 | } | |
5562 | ||
5563 | # patch to put a blank line after =cut | |
5564 | # (required by podchecker) | |
5565 | if ( $line_type eq 'POD_END' && !$saw_END_or_DATA_ ) { | |
5566 | $file_writer_object->reset_consecutive_blank_lines(); | |
5567 | $want_blank_line_next = 1; | |
5568 | } | |
5569 | } | |
5570 | ||
5571 | # leave the blank counters in a predictable state | |
5572 | # after __END__ or __DATA__ | |
5573 | elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) { | |
5574 | $file_writer_object->reset_consecutive_blank_lines(); | |
5575 | $saw_END_or_DATA_ = 1; | |
5576 | } | |
5577 | ||
5578 | # write unindented non-code line | |
5579 | if ( !$skip_line ) { | |
5580 | if ($tee_line) { $file_writer_object->tee_on() } | |
5581 | write_unindented_line($input_line); | |
5582 | if ($tee_line) { $file_writer_object->tee_off() } | |
5583 | if ($want_blank_line_next) { want_blank_line(); } | |
5584 | } | |
5585 | } | |
5586 | $last_line_type = $line_type; | |
5587 | } | |
5588 | ||
5589 | sub create_one_line_block { | |
5590 | $index_start_one_line_block = $_[0]; | |
5591 | $semicolons_before_block_self_destruct = $_[1]; | |
5592 | } | |
5593 | ||
5594 | sub destroy_one_line_block { | |
5595 | $index_start_one_line_block = UNDEFINED_INDEX; | |
5596 | $semicolons_before_block_self_destruct = 0; | |
5597 | } | |
5598 | ||
5599 | sub leading_spaces_to_go { | |
5600 | ||
5601 | # return the number of indentation spaces for a token in the output stream; | |
5602 | # these were previously stored by 'set_leading_whitespace'. | |
5603 | ||
5604 | return get_SPACES( $leading_spaces_to_go[ $_[0] ] ); | |
5605 | ||
5606 | } | |
5607 | ||
5608 | sub get_SPACES { | |
5609 | ||
5610 | # return the number of leading spaces associated with an indentation | |
5611 | # variable $indentation is either a constant number of spaces or an object | |
5612 | # with a get_SPACES method. | |
5613 | my $indentation = shift; | |
5614 | return ref($indentation) ? $indentation->get_SPACES() : $indentation; | |
5615 | } | |
5616 | ||
5617 | sub get_RECOVERABLE_SPACES { | |
5618 | ||
5619 | # return the number of spaces (+ means shift right, - means shift left) | |
5620 | # that we would like to shift a group of lines with the same indentation | |
5621 | # to get them to line up with their opening parens | |
5622 | my $indentation = shift; | |
5623 | return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; | |
5624 | } | |
5625 | ||
5626 | sub get_AVAILABLE_SPACES_to_go { | |
5627 | ||
5628 | my $item = $leading_spaces_to_go[ $_[0] ]; | |
5629 | ||
5630 | # return the number of available leading spaces associated with an | |
5631 | # indentation variable. $indentation is either a constant number of | |
5632 | # spaces or an object with a get_AVAILABLE_SPACES method. | |
5633 | return ref($item) ? $item->get_AVAILABLE_SPACES() : 0; | |
5634 | } | |
5635 | ||
5636 | sub new_lp_indentation_item { | |
5637 | ||
5638 | # this is an interface to the IndentationItem class | |
5639 | my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_; | |
5640 | ||
5641 | # A negative level implies not to store the item in the item_list | |
5642 | my $index = 0; | |
5643 | if ( $level >= 0 ) { $index = ++$max_gnu_item_index; } | |
5644 | ||
5645 | my $item = Perl::Tidy::IndentationItem->new( | |
5646 | $spaces, $level, | |
5647 | $ci_level, $available_spaces, | |
5648 | $index, $gnu_sequence_number, | |
5649 | $align_paren, $max_gnu_stack_index, | |
5650 | $line_start_index_to_go, | |
5651 | ); | |
5652 | ||
5653 | if ( $level >= 0 ) { | |
5654 | $gnu_item_list[$max_gnu_item_index] = $item; | |
5655 | } | |
5656 | ||
5657 | return $item; | |
5658 | } | |
5659 | ||
5660 | sub set_leading_whitespace { | |
5661 | ||
5662 | # This routine defines leading whitespace | |
5663 | # given: the level and continuation_level of a token, | |
5664 | # define: space count of leading string which would apply if it | |
5665 | # were the first token of a new line. | |
5666 | ||
5667 | my ( $level, $ci_level, $in_continued_quote ) = @_; | |
5668 | ||
5669 | # modify for -bli, which adds one continuation indentation for | |
5670 | # opening braces | |
5671 | if ( $rOpts_brace_left_and_indent | |
5672 | && $max_index_to_go == 0 | |
5673 | && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o ) | |
5674 | { | |
5675 | $ci_level++; | |
5676 | } | |
5677 | ||
5678 | # patch to avoid trouble when input file has negative indentation. | |
5679 | # other logic should catch this error. | |
5680 | if ( $level < 0 ) { $level = 0 } | |
5681 | ||
5682 | #------------------------------------------- | |
5683 | # handle the standard indentation scheme | |
5684 | #------------------------------------------- | |
5685 | unless ($rOpts_line_up_parentheses) { | |
5686 | my $space_count = $ci_level * $rOpts_continuation_indentation + $level * | |
5687 | $rOpts_indent_columns; | |
5688 | my $ci_spaces = | |
5689 | ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation; | |
5690 | ||
5691 | if ($in_continued_quote) { | |
5692 | $space_count = 0; | |
5693 | $ci_spaces = 0; | |
5694 | } | |
5695 | $leading_spaces_to_go[$max_index_to_go] = $space_count; | |
5696 | $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces; | |
5697 | return; | |
5698 | } | |
5699 | ||
5700 | #------------------------------------------------------------- | |
5701 | # handle case of -lp indentation.. | |
5702 | #------------------------------------------------------------- | |
5703 | ||
5704 | # The continued_quote flag means that this is the first token of a | |
5705 | # line, and it is the continuation of some kind of multi-line quote | |
5706 | # or pattern. It requires special treatment because it must have no | |
5707 | # added leading whitespace. So we create a special indentation item | |
5708 | # which is not in the stack. | |
5709 | if ($in_continued_quote) { | |
5710 | my $space_count = 0; | |
5711 | my $available_space = 0; | |
5712 | $level = -1; # flag to prevent storing in item_list | |
5713 | $leading_spaces_to_go[$max_index_to_go] = | |
5714 | $reduced_spaces_to_go[$max_index_to_go] = | |
5715 | new_lp_indentation_item( $space_count, $level, $ci_level, | |
5716 | $available_space, 0 ); | |
5717 | return; | |
5718 | } | |
5719 | ||
5720 | # get the top state from the stack | |
5721 | my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES(); | |
5722 | my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); | |
5723 | my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); | |
5724 | ||
5725 | my $type = $types_to_go[$max_index_to_go]; | |
5726 | my $token = $tokens_to_go[$max_index_to_go]; | |
5727 | my $total_depth = $nesting_depth_to_go[$max_index_to_go]; | |
5728 | ||
5729 | if ( $type eq '{' || $type eq '(' ) { | |
5730 | ||
5731 | $gnu_comma_count{ $total_depth + 1 } = 0; | |
5732 | $gnu_arrow_count{ $total_depth + 1 } = 0; | |
5733 | ||
5734 | # If we come to an opening token after an '=' token of some type, | |
5735 | # see if it would be helpful to 'break' after the '=' to save space | |
5736 | my $last_equals = $last_gnu_equals{$total_depth}; | |
5737 | if ( $last_equals && $last_equals > $line_start_index_to_go ) { | |
5738 | ||
5739 | # find the position if we break at the '=' | |
5740 | my $i_test = $last_equals; | |
5741 | if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } | |
5742 | my $test_position = total_line_length( $i_test, $max_index_to_go ); | |
5743 | ||
5744 | if ( | |
5745 | ||
5746 | # if we are beyond the midpoint | |
5747 | $gnu_position_predictor > $half_maximum_line_length | |
5748 | ||
5749 | # or if we can save some space by breaking at the '=' | |
5750 | # without obscuring the second line by the first | |
5751 | || ( $test_position > 1 + | |
5752 | total_line_length( $line_start_index_to_go, $last_equals ) ) | |
5753 | ) | |
5754 | { | |
5755 | ||
5756 | # then make the switch -- note that we do not set a real | |
5757 | # breakpoint here because we may not really need one; sub | |
5758 | # scan_list will do that if necessary | |
5759 | $line_start_index_to_go = $i_test + 1; | |
5760 | $gnu_position_predictor = $test_position; | |
5761 | } | |
5762 | } | |
5763 | } | |
5764 | ||
5765 | # Check for decreasing depth .. | |
5766 | # Note that one token may have both decreasing and then increasing | |
5767 | # depth. For example, (level, ci) can go from (1,1) to (2,0). So, | |
5768 | # in this example we would first go back to (1,0) then up to (2,0) | |
5769 | # in a single call. | |
5770 | if ( $level < $current_level || $ci_level < $current_ci_level ) { | |
5771 | ||
5772 | # loop to find the first entry at or completely below this level | |
5773 | my ( $lev, $ci_lev ); | |
5774 | while (1) { | |
5775 | if ($max_gnu_stack_index) { | |
5776 | ||
5777 | # save index of token which closes this level | |
5778 | $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go); | |
5779 | ||
5780 | # Undo any extra indentation if we saw no commas | |
5781 | my $available_spaces = | |
5782 | $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES(); | |
5783 | ||
5784 | my $comma_count = 0; | |
5785 | my $arrow_count = 0; | |
5786 | if ( $type eq '}' || $type eq ')' ) { | |
5787 | $comma_count = $gnu_comma_count{$total_depth}; | |
5788 | $arrow_count = $gnu_arrow_count{$total_depth}; | |
5789 | $comma_count = 0 unless $comma_count; | |
5790 | $arrow_count = 0 unless $arrow_count; | |
5791 | } | |
5792 | $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count); | |
5793 | $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count); | |
5794 | ||
5795 | if ( $available_spaces > 0 ) { | |
5796 | ||
5797 | if ( $comma_count <= 0 || $arrow_count > 0 ) { | |
5798 | ||
5799 | my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX(); | |
5800 | my $seqno = | |
5801 | $gnu_stack[$max_gnu_stack_index] | |
5802 | ->get_SEQUENCE_NUMBER(); | |
5803 | ||
5804 | # Be sure this item was created in this batch. This | |
5805 | # should be true because we delete any available | |
5806 | # space from open items at the end of each batch. | |
5807 | if ( $gnu_sequence_number != $seqno | |
5808 | || $i > $max_gnu_item_index ) | |
5809 | { | |
5810 | warning( | |
5811 | "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n" | |
5812 | ); | |
5813 | report_definite_bug(); | |
5814 | } | |
5815 | ||
5816 | else { | |
5817 | if ( $arrow_count == 0 ) { | |
5818 | $gnu_item_list[$i] | |
5819 | ->permanently_decrease_AVAILABLE_SPACES( | |
5820 | $available_spaces); | |
5821 | } | |
5822 | else { | |
5823 | $gnu_item_list[$i] | |
5824 | ->tentatively_decrease_AVAILABLE_SPACES( | |
5825 | $available_spaces); | |
5826 | } | |
5827 | ||
5828 | my $j; | |
5829 | for ( | |
5830 | $j = $i + 1 ; | |
5831 | $j <= $max_gnu_item_index ; | |
5832 | $j++ | |
5833 | ) | |
5834 | { | |
5835 | $gnu_item_list[$j] | |
5836 | ->decrease_SPACES($available_spaces); | |
5837 | } | |
5838 | } | |
5839 | } | |
5840 | } | |
5841 | ||
5842 | # go down one level | |
5843 | --$max_gnu_stack_index; | |
5844 | $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); | |
5845 | $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); | |
5846 | ||
5847 | # stop when we reach a level at or below the current level | |
5848 | if ( $lev <= $level && $ci_lev <= $ci_level ) { | |
5849 | $space_count = | |
5850 | $gnu_stack[$max_gnu_stack_index]->get_SPACES(); | |
5851 | $current_level = $lev; | |
5852 | $current_ci_level = $ci_lev; | |
5853 | last; | |
5854 | } | |
5855 | } | |
5856 | ||
5857 | # reached bottom of stack .. should never happen because | |
5858 | # only negative levels can get here, and $level was forced | |
5859 | # to be positive above. | |
5860 | else { | |
5861 | warning( | |
5862 | "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n" | |
5863 | ); | |
5864 | report_definite_bug(); | |
5865 | last; | |
5866 | } | |
5867 | } | |
5868 | } | |
5869 | ||
5870 | # handle increasing depth | |
5871 | if ( $level > $current_level || $ci_level > $current_ci_level ) { | |
5872 | ||
5873 | # Compute the standard incremental whitespace. This will be | |
5874 | # the minimum incremental whitespace that will be used. This | |
5875 | # choice results in a smooth transition between the gnu-style | |
5876 | # and the standard style. | |
5877 | my $standard_increment = | |
5878 | ( $level - $current_level ) * $rOpts_indent_columns + | |
5879 | ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation; | |
5880 | ||
5881 | # Now we have to define how much extra incremental space | |
5882 | # ("$available_space") we want. This extra space will be | |
5883 | # reduced as necessary when long lines are encountered or when | |
5884 | # it becomes clear that we do not have a good list. | |
5885 | my $available_space = 0; | |
5886 | my $align_paren = 0; | |
5887 | my $excess = 0; | |
5888 | ||
5889 | # initialization on empty stack.. | |
5890 | if ( $max_gnu_stack_index == 0 ) { | |
5891 | $space_count = $level * $rOpts_indent_columns; | |
5892 | } | |
5893 | ||
5894 | # if this is a BLOCK, add the standard increment | |
5895 | elsif ($last_nonblank_block_type) { | |
5896 | $space_count += $standard_increment; | |
5897 | } | |
5898 | ||
5899 | # if last nonblank token was not structural indentation, | |
5900 | # just use standard increment | |
5901 | elsif ( $last_nonblank_type ne '{' ) { | |
5902 | $space_count += $standard_increment; | |
5903 | } | |
5904 | ||
5905 | # otherwise use the space to the first non-blank level change token | |
5906 | else { | |
5907 | ||
5908 | $space_count = $gnu_position_predictor; | |
5909 | ||
5910 | my $min_gnu_indentation = | |
5911 | $gnu_stack[$max_gnu_stack_index]->get_SPACES(); | |
5912 | ||
5913 | $available_space = $space_count - $min_gnu_indentation; | |
5914 | if ( $available_space >= $standard_increment ) { | |
5915 | $min_gnu_indentation += $standard_increment; | |
5916 | } | |
5917 | elsif ( $available_space > 1 ) { | |
5918 | $min_gnu_indentation += $available_space + 1; | |
5919 | } | |
5920 | elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) { | |
5921 | if ( ( $tightness{$last_nonblank_token} < 2 ) ) { | |
5922 | $min_gnu_indentation += 2; | |
5923 | } | |
5924 | else { | |
5925 | $min_gnu_indentation += 1; | |
5926 | } | |
5927 | } | |
5928 | else { | |
5929 | $min_gnu_indentation += $standard_increment; | |
5930 | } | |
5931 | $available_space = $space_count - $min_gnu_indentation; | |
5932 | ||
5933 | if ( $available_space < 0 ) { | |
5934 | $space_count = $min_gnu_indentation; | |
5935 | $available_space = 0; | |
5936 | } | |
5937 | $align_paren = 1; | |
5938 | } | |
5939 | ||
5940 | # update state, but not on a blank token | |
5941 | if ( $types_to_go[$max_index_to_go] ne 'b' ) { | |
5942 | ||
5943 | $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1); | |
5944 | ||
5945 | ++$max_gnu_stack_index; | |
5946 | $gnu_stack[$max_gnu_stack_index] = | |
5947 | new_lp_indentation_item( $space_count, $level, $ci_level, | |
5948 | $available_space, $align_paren ); | |
5949 | ||
5950 | # If the opening paren is beyond the half-line length, then | |
5951 | # we will use the minimum (standard) indentation. This will | |
5952 | # help avoid problems associated with running out of space | |
5953 | # near the end of a line. As a result, in deeply nested | |
5954 | # lists, there will be some indentations which are limited | |
5955 | # to this minimum standard indentation. But the most deeply | |
5956 | # nested container will still probably be able to shift its | |
5957 | # parameters to the right for proper alignment, so in most | |
5958 | # cases this will not be noticable. | |
5959 | if ( $available_space > 0 | |
5960 | && $space_count > $half_maximum_line_length ) | |
5961 | { | |
5962 | $gnu_stack[$max_gnu_stack_index] | |
5963 | ->tentatively_decrease_AVAILABLE_SPACES($available_space); | |
5964 | } | |
5965 | } | |
5966 | } | |
5967 | ||
5968 | # Count commas and look for non-list characters. Once we see a | |
5969 | # non-list character, we give up and don't look for any more commas. | |
5970 | if ( $type eq '=>' ) { | |
5971 | $gnu_arrow_count{$total_depth}++; | |
5972 | ||
5973 | # tentatively treating '=>' like '=' for estimating breaks | |
5974 | # TODO: this could use some experimentation | |
5975 | $last_gnu_equals{$total_depth} = $max_index_to_go; | |
5976 | } | |
5977 | ||
5978 | elsif ( $type eq ',' ) { | |
5979 | $gnu_comma_count{$total_depth}++; | |
5980 | } | |
5981 | ||
5982 | elsif ( $is_assignment{$type} ) { | |
5983 | $last_gnu_equals{$total_depth} = $max_index_to_go; | |
5984 | } | |
5985 | ||
5986 | # this token might start a new line | |
5987 | # if this is a non-blank.. | |
5988 | if ( $type ne 'b' ) { | |
5989 | ||
5990 | # and if .. | |
5991 | if ( | |
5992 | ||
5993 | # this is the first nonblank token of the line | |
5994 | $max_index_to_go == 1 && $types_to_go[0] eq 'b' | |
5995 | ||
5996 | # or previous character was one of these: | |
5997 | || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/ | |
5998 | ||
5999 | # or previous character was opening and this does not close it | |
6000 | || ( $last_nonblank_type_to_go eq '{' && $type ne '}' ) | |
6001 | || ( $last_nonblank_type_to_go eq '(' and $type ne ')' ) | |
6002 | ||
6003 | # or this token is one of these: | |
6004 | || $type =~ /^([\.]|\|\||\&\&)$/ | |
6005 | ||
6006 | # or this is a closing structure | |
6007 | || ( $last_nonblank_type_to_go eq '}' | |
6008 | && $last_nonblank_token_to_go eq $last_nonblank_type_to_go ) | |
6009 | ||
6010 | # or previous token was keyword 'return' | |
6011 | || ( $last_nonblank_type_to_go eq 'k' | |
6012 | && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) ) | |
6013 | ||
6014 | # or starting a new line at certain keywords is fine | |
6015 | || ( $type eq 'k' | |
6016 | && $is_if_unless_and_or_last_next_redo_return{$token} ) | |
6017 | ||
6018 | # or this is after an assignment after a closing structure | |
6019 | || ( | |
6020 | $is_assignment{$last_nonblank_type_to_go} | |
6021 | && ( | |
6022 | $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/ | |
6023 | ||
6024 | # and it is significantly to the right | |
6025 | || $gnu_position_predictor > $half_maximum_line_length | |
6026 | ) | |
6027 | ) | |
6028 | ) | |
6029 | { | |
6030 | check_for_long_gnu_style_lines(); | |
6031 | $line_start_index_to_go = $max_index_to_go; | |
6032 | ||
6033 | # back up 1 token if we want to break before that type | |
6034 | # otherwise, we may strand tokens like '?' or ':' on a line | |
6035 | if ( $line_start_index_to_go > 0 ) { | |
6036 | if ( $last_nonblank_type_to_go eq 'k' ) { | |
6037 | ||
6038 | if ( $want_break_before{$last_nonblank_token_to_go} ) { | |
6039 | $line_start_index_to_go--; | |
6040 | } | |
6041 | } | |
6042 | elsif ( $want_break_before{$last_nonblank_type_to_go} ) { | |
6043 | $line_start_index_to_go--; | |
6044 | } | |
6045 | } | |
6046 | } | |
6047 | } | |
6048 | ||
6049 | # remember the predicted position of this token on the output line | |
6050 | if ( $max_index_to_go > $line_start_index_to_go ) { | |
6051 | $gnu_position_predictor = | |
6052 | total_line_length( $line_start_index_to_go, $max_index_to_go ); | |
6053 | } | |
6054 | else { | |
6055 | $gnu_position_predictor = $space_count + | |
6056 | token_sequence_length( $max_index_to_go, $max_index_to_go ); | |
6057 | } | |
6058 | ||
6059 | # store the indentation object for this token | |
6060 | # this allows us to manipulate the leading whitespace | |
6061 | # (in case we have to reduce indentation to fit a line) without | |
6062 | # having to change any token values | |
6063 | $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index]; | |
6064 | $reduced_spaces_to_go[$max_index_to_go] = | |
6065 | ( $max_gnu_stack_index > 0 && $ci_level ) | |
6066 | ? $gnu_stack[ $max_gnu_stack_index - 1 ] | |
6067 | : $gnu_stack[$max_gnu_stack_index]; | |
6068 | return; | |
6069 | } | |
6070 | ||
6071 | sub check_for_long_gnu_style_lines { | |
6072 | ||
6073 | # look at the current estimated maximum line length, and | |
6074 | # remove some whitespace if it exceeds the desired maximum | |
6075 | ||
6076 | # this is only for the '-lp' style | |
6077 | return unless ($rOpts_line_up_parentheses); | |
6078 | ||
6079 | # nothing can be done if no stack items defined for this line | |
6080 | return if ( $max_gnu_item_index == UNDEFINED_INDEX ); | |
6081 | ||
6082 | # see if we have exceeded the maximum desired line length | |
6083 | # keep 2 extra free because they are needed in some cases | |
6084 | # (result of trial-and-error testing) | |
6085 | my $spaces_needed = | |
6086 | $gnu_position_predictor - $rOpts_maximum_line_length + 2; | |
6087 | ||
6088 | return if ( $spaces_needed < 0 ); | |
6089 | ||
6090 | # We are over the limit, so try to remove a requested number of | |
6091 | # spaces from leading whitespace. We are only allowed to remove | |
6092 | # from whitespace items created on this batch, since others have | |
6093 | # already been used and cannot be undone. | |
6094 | my @candidates = (); | |
6095 | my $i; | |
6096 | ||
6097 | # loop over all whitespace items created for the current batch | |
6098 | for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { | |
6099 | my $item = $gnu_item_list[$i]; | |
6100 | ||
6101 | # item must still be open to be a candidate (otherwise it | |
6102 | # cannot influence the current token) | |
6103 | next if ( $item->get_CLOSED() >= 0 ); | |
6104 | ||
6105 | my $available_spaces = $item->get_AVAILABLE_SPACES(); | |
6106 | ||
6107 | if ( $available_spaces > 0 ) { | |
6108 | push( @candidates, [ $i, $available_spaces ] ); | |
6109 | } | |
6110 | } | |
6111 | ||
6112 | return unless (@candidates); | |
6113 | ||
6114 | # sort by available whitespace so that we can remove whitespace | |
6115 | # from the maximum available first | |
6116 | @candidates = sort { $b->[1] <=> $a->[1] } @candidates; | |
6117 | ||
6118 | # keep removing whitespace until we are done or have no more | |
6119 | my $candidate; | |
6120 | foreach $candidate (@candidates) { | |
6121 | my ( $i, $available_spaces ) = @{$candidate}; | |
6122 | my $deleted_spaces = | |
6123 | ( $available_spaces > $spaces_needed ) | |
6124 | ? $spaces_needed | |
6125 | : $available_spaces; | |
6126 | ||
6127 | # remove the incremental space from this item | |
6128 | $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces); | |
6129 | ||
6130 | my $i_debug = $i; | |
6131 | ||
6132 | # update the leading whitespace of this item and all items | |
6133 | # that came after it | |
6134 | for ( ; $i <= $max_gnu_item_index ; $i++ ) { | |
6135 | ||
6136 | my $old_spaces = $gnu_item_list[$i]->get_SPACES(); | |
6137 | if ( $old_spaces > $deleted_spaces ) { | |
6138 | $gnu_item_list[$i]->decrease_SPACES($deleted_spaces); | |
6139 | } | |
6140 | ||
6141 | # shouldn't happen except for code bug: | |
6142 | else { | |
6143 | my $level = $gnu_item_list[$i_debug]->get_LEVEL(); | |
6144 | my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL(); | |
6145 | my $old_level = $gnu_item_list[$i]->get_LEVEL(); | |
6146 | my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL(); | |
6147 | warning( | |
6148 | "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n" | |
6149 | ); | |
6150 | report_definite_bug(); | |
6151 | } | |
6152 | } | |
6153 | $gnu_position_predictor -= $deleted_spaces; | |
6154 | $spaces_needed -= $deleted_spaces; | |
6155 | last unless ( $spaces_needed > 0 ); | |
6156 | } | |
6157 | } | |
6158 | ||
6159 | sub finish_lp_batch { | |
6160 | ||
6161 | # This routine is called once after each each output stream batch is | |
6162 | # finished to undo indentation for all incomplete -lp | |
6163 | # indentation levels. It is too risky to leave a level open, | |
6164 | # because then we can't backtrack in case of a long line to follow. | |
6165 | # This means that comments and blank lines will disrupt this | |
6166 | # indentation style. But the vertical aligner may be able to | |
6167 | # get the space back if there are side comments. | |
6168 | ||
6169 | # this is only for the 'lp' style | |
6170 | return unless ($rOpts_line_up_parentheses); | |
6171 | ||
6172 | # nothing can be done if no stack items defined for this line | |
6173 | return if ( $max_gnu_item_index == UNDEFINED_INDEX ); | |
6174 | ||
6175 | # loop over all whitespace items created for the current batch | |
6176 | my $i; | |
6177 | for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { | |
6178 | my $item = $gnu_item_list[$i]; | |
6179 | ||
6180 | # only look for open items | |
6181 | next if ( $item->get_CLOSED() >= 0 ); | |
6182 | ||
6183 | # Tentatively remove all of the available space | |
6184 | # (The vertical aligner will try to get it back later) | |
6185 | my $available_spaces = $item->get_AVAILABLE_SPACES(); | |
6186 | if ( $available_spaces > 0 ) { | |
6187 | ||
6188 | # delete incremental space for this item | |
6189 | $gnu_item_list[$i] | |
6190 | ->tentatively_decrease_AVAILABLE_SPACES($available_spaces); | |
6191 | ||
6192 | # Reduce the total indentation space of any nodes that follow | |
6193 | # Note that any such nodes must necessarily be dependents | |
6194 | # of this node. | |
6195 | foreach ( $i + 1 .. $max_gnu_item_index ) { | |
6196 | $gnu_item_list[$_]->decrease_SPACES($available_spaces); | |
6197 | } | |
6198 | } | |
6199 | } | |
6200 | return; | |
6201 | } | |
6202 | ||
6203 | sub reduce_lp_indentation { | |
6204 | ||
6205 | # reduce the leading whitespace at token $i if possible by $spaces_needed | |
6206 | # (a large value of $spaces_needed will remove all excess space) | |
6207 | # NOTE: to be called from scan_list only for a sequence of tokens | |
6208 | # contained between opening and closing parens/braces/brackets | |
6209 | ||
6210 | my ( $i, $spaces_wanted ) = @_; | |
6211 | my $deleted_spaces = 0; | |
6212 | ||
6213 | my $item = $leading_spaces_to_go[$i]; | |
6214 | my $available_spaces = $item->get_AVAILABLE_SPACES(); | |
6215 | ||
6216 | if ( | |
6217 | $available_spaces > 0 | |
6218 | && ( ( $spaces_wanted <= $available_spaces ) | |
6219 | || !$item->get_HAVE_CHILD() ) | |
6220 | ) | |
6221 | { | |
6222 | ||
6223 | # we'll remove these spaces, but mark them as recoverable | |
6224 | $deleted_spaces = | |
6225 | $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted); | |
6226 | } | |
6227 | ||
6228 | return $deleted_spaces; | |
6229 | } | |
6230 | ||
6231 | sub token_sequence_length { | |
6232 | ||
6233 | # return length of tokens ($ifirst .. $ilast) including first & last | |
6234 | # returns 0 if $ifirst > $ilast | |
6235 | my $ifirst = shift; | |
6236 | my $ilast = shift; | |
6237 | return 0 if ( $ilast < 0 || $ifirst > $ilast ); | |
6238 | return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 ); | |
6239 | return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst]; | |
6240 | } | |
6241 | ||
6242 | sub total_line_length { | |
6243 | ||
6244 | # return length of a line of tokens ($ifirst .. $ilast) | |
6245 | my $ifirst = shift; | |
6246 | my $ilast = shift; | |
6247 | if ( $ifirst < 0 ) { $ifirst = 0 } | |
6248 | ||
6249 | return leading_spaces_to_go($ifirst) + | |
6250 | token_sequence_length( $ifirst, $ilast ); | |
6251 | } | |
6252 | ||
6253 | sub excess_line_length { | |
6254 | ||
6255 | # return number of characters by which a line of tokens ($ifirst..$ilast) | |
6256 | # exceeds the allowable line length. | |
6257 | my $ifirst = shift; | |
6258 | my $ilast = shift; | |
6259 | if ( $ifirst < 0 ) { $ifirst = 0 } | |
6260 | return leading_spaces_to_go($ifirst) + | |
6261 | token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length; | |
6262 | } | |
6263 | ||
6264 | sub finish_formatting { | |
6265 | ||
6266 | # flush buffer and write any informative messages | |
6267 | my $self = shift; | |
6268 | ||
6269 | flush(); | |
6270 | $file_writer_object->decrement_output_line_number() | |
6271 | ; # fix up line number since it was incremented | |
6272 | we_are_at_the_last_line(); | |
6273 | if ( $added_semicolon_count > 0 ) { | |
6274 | my $first = ( $added_semicolon_count > 1 ) ? "First" : ""; | |
6275 | my $what = | |
6276 | ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was"; | |
6277 | write_logfile_entry("$added_semicolon_count $what added:\n"); | |
6278 | write_logfile_entry( | |
6279 | " $first at input line $first_added_semicolon_at\n"); | |
6280 | ||
6281 | if ( $added_semicolon_count > 1 ) { | |
6282 | write_logfile_entry( | |
6283 | " Last at input line $last_added_semicolon_at\n"); | |
6284 | } | |
6285 | write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n"); | |
6286 | write_logfile_entry("\n"); | |
6287 | } | |
6288 | ||
6289 | if ( $deleted_semicolon_count > 0 ) { | |
6290 | my $first = ( $deleted_semicolon_count > 1 ) ? "First" : ""; | |
6291 | my $what = | |
6292 | ( $deleted_semicolon_count > 1 ) | |
6293 | ? "semicolons were" | |
6294 | : "semicolon was"; | |
6295 | write_logfile_entry( | |
6296 | "$deleted_semicolon_count unnecessary $what deleted:\n"); | |
6297 | write_logfile_entry( | |
6298 | " $first at input line $first_deleted_semicolon_at\n"); | |
6299 | ||
6300 | if ( $deleted_semicolon_count > 1 ) { | |
6301 | write_logfile_entry( | |
6302 | " Last at input line $last_deleted_semicolon_at\n"); | |
6303 | } | |
6304 | write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n"); | |
6305 | write_logfile_entry("\n"); | |
6306 | } | |
6307 | ||
6308 | if ( $embedded_tab_count > 0 ) { | |
6309 | my $first = ( $embedded_tab_count > 1 ) ? "First" : ""; | |
6310 | my $what = | |
6311 | ( $embedded_tab_count > 1 ) | |
6312 | ? "quotes or patterns" | |
6313 | : "quote or pattern"; | |
6314 | write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n"); | |
6315 | write_logfile_entry( | |
6316 | "This means the display of this script could vary with device or software\n" | |
6317 | ); | |
6318 | write_logfile_entry(" $first at input line $first_embedded_tab_at\n"); | |
6319 | ||
6320 | if ( $embedded_tab_count > 1 ) { | |
6321 | write_logfile_entry( | |
6322 | " Last at input line $last_embedded_tab_at\n"); | |
6323 | } | |
6324 | write_logfile_entry("\n"); | |
6325 | } | |
6326 | ||
6327 | if ($first_tabbing_disagreement) { | |
6328 | write_logfile_entry( | |
6329 | "First indentation disagreement seen at input line $first_tabbing_disagreement\n" | |
6330 | ); | |
6331 | } | |
6332 | ||
6333 | if ($in_tabbing_disagreement) { | |
6334 | write_logfile_entry( | |
6335 | "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n" | |
6336 | ); | |
6337 | } | |
6338 | else { | |
6339 | ||
6340 | if ($last_tabbing_disagreement) { | |
6341 | ||
6342 | write_logfile_entry( | |
6343 | "Last indentation disagreement seen at input line $last_tabbing_disagreement\n" | |
6344 | ); | |
6345 | } | |
6346 | else { | |
6347 | write_logfile_entry("No indentation disagreement seen\n"); | |
6348 | } | |
6349 | } | |
6350 | write_logfile_entry("\n"); | |
6351 | ||
6352 | $vertical_aligner_object->report_anything_unusual(); | |
6353 | ||
6354 | $file_writer_object->report_line_length_errors(); | |
6355 | } | |
6356 | ||
6357 | sub check_options { | |
6358 | ||
6359 | # This routine is called to check the Opts hash after it is defined | |
6360 | ||
6361 | ($rOpts) = @_; | |
6362 | my ( $tabbing_string, $tab_msg ); | |
6363 | ||
6364 | make_static_block_comment_pattern(); | |
6365 | make_static_side_comment_pattern(); | |
6366 | make_closing_side_comment_prefix(); | |
6367 | make_closing_side_comment_list_pattern(); | |
6368 | ||
6369 | # If closing side comments ARE selected, then we can safely | |
6370 | # delete old closing side comments unless closing side comment | |
6371 | # warnings are requested. This is a good idea because it will | |
6372 | # eliminate any old csc's which fall below the line count threshold. | |
6373 | # We cannot do this if warnings are turned on, though, because we | |
6374 | # might delete some text which has been added. So that must | |
6375 | # be handled when comments are created. | |
6376 | if ( $rOpts->{'closing-side-comments'} ) { | |
6377 | if ( !$rOpts->{'closing-side-comment-warnings'} ) { | |
6378 | $rOpts->{'delete-closing-side-comments'} = 1; | |
6379 | } | |
6380 | } | |
6381 | ||
6382 | # If closing side comments ARE NOT selected, but warnings ARE | |
6383 | # selected and we ARE DELETING csc's, then we will pretend to be | |
6384 | # adding with a huge interval. This will force the comments to be | |
6385 | # generated for comparison with the old comments, but not added. | |
6386 | elsif ( $rOpts->{'closing-side-comment-warnings'} ) { | |
6387 | if ( $rOpts->{'delete-closing-side-comments'} ) { | |
6388 | $rOpts->{'delete-closing-side-comments'} = 0; | |
6389 | $rOpts->{'closing-side-comments'} = 1; | |
6390 | $rOpts->{'closing-side-comment-interval'} = 100000000; | |
6391 | } | |
6392 | } | |
6393 | ||
6394 | make_bli_pattern(); | |
6395 | make_block_brace_vertical_tightness_pattern(); | |
6396 | ||
6397 | if ( $rOpts->{'line-up-parentheses'} ) { | |
6398 | ||
6399 | if ( $rOpts->{'indent-only'} | |
6400 | || !$rOpts->{'add-newlines'} | |
6401 | || !$rOpts->{'delete-old-newlines'} ) | |
6402 | { | |
6403 | warn <<EOM; | |
6404 | ----------------------------------------------------------------------- | |
6405 | Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp | |
6406 | ||
6407 | The -lp indentation logic requires that perltidy be able to coordinate | |
6408 | arbitrarily large numbers of line breakpoints. This isn't possible | |
6409 | with these flags. Sometimes an acceptable workaround is to use -wocb=3 | |
6410 | ----------------------------------------------------------------------- | |
6411 | EOM | |
6412 | $rOpts->{'line-up-parentheses'} = 0; | |
6413 | } | |
6414 | } | |
6415 | ||
6416 | # At present, tabs are not compatable with the line-up-parentheses style | |
6417 | # (it would be possible to entab the total leading whitespace | |
6418 | # just prior to writing the line, if desired). | |
6419 | if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) { | |
6420 | warn <<EOM; | |
6421 | Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et. | |
6422 | EOM | |
6423 | $rOpts->{'tabs'} = 0; | |
6424 | } | |
6425 | ||
6426 | # Likewise, tabs are not compatable with outdenting.. | |
6427 | if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) { | |
6428 | warn <<EOM; | |
6429 | Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et. | |
6430 | EOM | |
6431 | $rOpts->{'tabs'} = 0; | |
6432 | } | |
6433 | ||
6434 | if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) { | |
6435 | warn <<EOM; | |
6436 | Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et. | |
6437 | EOM | |
6438 | $rOpts->{'tabs'} = 0; | |
6439 | } | |
6440 | ||
6441 | if ( !$rOpts->{'space-for-semicolon'} ) { | |
6442 | $want_left_space{'f'} = -1; | |
6443 | } | |
6444 | ||
6445 | if ( $rOpts->{'space-terminal-semicolon'} ) { | |
6446 | $want_left_space{';'} = 1; | |
6447 | } | |
6448 | ||
6449 | # implement outdenting preferences for keywords | |
6450 | %outdent_keyword = (); | |
6451 | ||
6452 | # load defaults | |
6453 | @_ = qw(next last redo goto return); | |
6454 | ||
6455 | # override defaults if requested | |
6456 | if ( $_ = $rOpts->{'outdent-keyword-list'} ) { | |
6457 | s/^\s+//; | |
6458 | s/\s+$//; | |
6459 | @_ = split /\s+/; | |
6460 | } | |
6461 | ||
6462 | # FUTURE: if not a keyword, assume that it is an identifier | |
6463 | foreach (@_) { | |
6464 | if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) { | |
6465 | $outdent_keyword{$_} = 1; | |
6466 | } | |
6467 | else { | |
6468 | warn "ignoring '$_' in -okwl list; not a perl keyword"; | |
6469 | } | |
6470 | } | |
6471 | ||
6472 | # implement user whitespace preferences | |
6473 | if ( $_ = $rOpts->{'want-left-space'} ) { | |
6474 | s/^\s+//; | |
6475 | s/\s+$//; | |
6476 | @_ = split /\s+/; | |
6477 | @want_left_space{@_} = (1) x scalar(@_); | |
6478 | } | |
6479 | ||
6480 | if ( $_ = $rOpts->{'want-right-space'} ) { | |
6481 | s/^\s+//; | |
6482 | s/\s+$//; | |
6483 | @_ = split /\s+/; | |
6484 | @want_right_space{@_} = (1) x scalar(@_); | |
6485 | } | |
6486 | if ( $_ = $rOpts->{'nowant-left-space'} ) { | |
6487 | s/^\s+//; | |
6488 | s/\s+$//; | |
6489 | @_ = split /\s+/; | |
6490 | @want_left_space{@_} = (-1) x scalar(@_); | |
6491 | } | |
6492 | ||
6493 | if ( $_ = $rOpts->{'nowant-right-space'} ) { | |
6494 | s/^\s+//; | |
6495 | s/\s+$//; | |
6496 | @want_right_space{@_} = (-1) x scalar(@_); | |
6497 | } | |
6498 | if ( $rOpts->{'dump-want-left-space'} ) { | |
6499 | dump_want_left_space(*STDOUT); | |
6500 | exit 1; | |
6501 | } | |
6502 | ||
6503 | if ( $rOpts->{'dump-want-right-space'} ) { | |
6504 | dump_want_right_space(*STDOUT); | |
6505 | exit 1; | |
6506 | } | |
6507 | ||
6508 | # default keywords for which space is introduced before an opening paren | |
6509 | # (at present, including them messes up vertical alignment) | |
6510 | @_ = qw(my local our and or eq ne if else elsif until | |
6511 | unless while for foreach return switch case given when); | |
6512 | @space_after_keyword{@_} = (1) x scalar(@_); | |
6513 | ||
6514 | # allow user to modify these defaults | |
6515 | if ( $_ = $rOpts->{'space-after-keyword'} ) { | |
6516 | s/^\s+//; | |
6517 | s/\s+$//; | |
6518 | @_ = split /\s+/; | |
6519 | @space_after_keyword{@_} = (1) x scalar(@_); | |
6520 | } | |
6521 | ||
6522 | if ( $_ = $rOpts->{'nospace-after-keyword'} ) { | |
6523 | s/^\s+//; | |
6524 | s/\s+$//; | |
6525 | @_ = split /\s+/; | |
6526 | @space_after_keyword{@_} = (0) x scalar(@_); | |
6527 | } | |
6528 | ||
6529 | # implement user break preferences | |
6530 | if ( $_ = $rOpts->{'want-break-after'} ) { | |
6531 | @_ = split /\s+/; | |
6532 | foreach my $tok (@_) { | |
6533 | if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: | |
6534 | my $lbs = $left_bond_strength{$tok}; | |
6535 | my $rbs = $right_bond_strength{$tok}; | |
6536 | if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { | |
6537 | ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = | |
6538 | ( $lbs, $rbs ); | |
6539 | } | |
6540 | } | |
6541 | } | |
6542 | ||
6543 | if ( $_ = $rOpts->{'want-break-before'} ) { | |
6544 | s/^\s+//; | |
6545 | s/\s+$//; | |
6546 | @_ = split /\s+/; | |
6547 | foreach my $tok (@_) { | |
6548 | my $lbs = $left_bond_strength{$tok}; | |
6549 | my $rbs = $right_bond_strength{$tok}; | |
6550 | if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { | |
6551 | ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = | |
6552 | ( $lbs, $rbs ); | |
6553 | } | |
6554 | } | |
6555 | } | |
6556 | ||
6557 | # make note if breaks are before certain key types | |
6558 | %want_break_before = (); | |
6559 | ||
6560 | foreach my $tok ( '.', ',', ':', '?', '&&', '||', 'and', 'or', 'xor' ) { | |
6561 | $want_break_before{$tok} = | |
6562 | $left_bond_strength{$tok} < $right_bond_strength{$tok}; | |
6563 | } | |
6564 | ||
6565 | # Coordinate ?/: breaks, which must be similar | |
6566 | if ( !$want_break_before{':'} ) { | |
6567 | $want_break_before{'?'} = $want_break_before{':'}; | |
6568 | $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01; | |
6569 | $left_bond_strength{'?'} = NO_BREAK; | |
6570 | } | |
6571 | ||
6572 | # Define here tokens which may follow the closing brace of a do statement | |
6573 | # on the same line, as in: | |
6574 | # } while ( $something); | |
6575 | @_ = qw(until while unless if ; ); | |
6576 | push @_, ','; | |
6577 | @is_do_follower{@_} = (1) x scalar(@_); | |
6578 | ||
6579 | # These tokens may follow the closing brace of an if or elsif block. | |
6580 | # In other words, for cuddled else we want code to look like: | |
6581 | # } elsif ( $something) { | |
6582 | # } else { | |
6583 | if ( $rOpts->{'cuddled-else'} ) { | |
6584 | @_ = qw(else elsif); | |
6585 | @is_if_brace_follower{@_} = (1) x scalar(@_); | |
6586 | } | |
6587 | else { | |
6588 | %is_if_brace_follower = (); | |
6589 | } | |
6590 | ||
6591 | # nothing can follow the closing curly of an else { } block: | |
6592 | %is_else_brace_follower = (); | |
6593 | ||
6594 | # what can follow a multi-line anonymous sub definition closing curly: | |
6595 | @_ = qw# ; : => or and && || ) #; | |
6596 | push @_, ','; | |
6597 | @is_anon_sub_brace_follower{@_} = (1) x scalar(@_); | |
6598 | ||
6599 | # what can follow a one-line anonynomous sub closing curly: | |
6600 | # one-line anonumous subs also have ']' here... | |
6601 | # see tk3.t and PP.pm | |
6602 | @_ = qw# ; : => or and && || ) ] #; | |
6603 | push @_, ','; | |
6604 | @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_); | |
6605 | ||
6606 | # What can follow a closing curly of a block | |
6607 | # which is not an if/elsif/else/do/sort/map/grep/eval/sub | |
6608 | # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' | |
6609 | @_ = qw# ; : => or and && || ) #; | |
6610 | push @_, ','; | |
6611 | ||
6612 | # allow cuddled continue if cuddled else is specified | |
6613 | if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; } | |
6614 | ||
6615 | @is_other_brace_follower{@_} = (1) x scalar(@_); | |
6616 | ||
6617 | $right_bond_strength{'{'} = WEAK; | |
6618 | $left_bond_strength{'{'} = VERY_STRONG; | |
6619 | ||
6620 | # make -l=0 equal to -l=infinite | |
6621 | if ( !$rOpts->{'maximum-line-length'} ) { | |
6622 | $rOpts->{'maximum-line-length'} = 1000000; | |
6623 | } | |
6624 | ||
6625 | # make -lbl=0 equal to -lbl=infinite | |
6626 | if ( !$rOpts->{'long-block-line-count'} ) { | |
6627 | $rOpts->{'long-block-line-count'} = 1000000; | |
6628 | } | |
6629 | ||
6630 | my $ole = $rOpts->{'output-line-ending'}; | |
6631 | ##if ($^O =~ /^(VMS| | |
6632 | if ($ole) { | |
6633 | my %endings = ( | |
6634 | dos => "\015\012", | |
6635 | win => "\015\012", | |
6636 | mac => "\015", | |
6637 | unix => "\012", | |
6638 | ); | |
6639 | $ole = lc $ole; | |
6640 | unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) { | |
6641 | my $str = join " ", keys %endings; | |
6642 | die <<EOM; | |
6643 | Unrecognized line ending '$ole'; expecting one of: $str | |
6644 | EOM | |
6645 | } | |
6646 | if ( $rOpts->{'preserve-line-endings'} ) { | |
6647 | warn "Ignoring -ple; conflicts with -ole\n"; | |
6648 | $rOpts->{'preserve-line-endings'} = undef; | |
6649 | } | |
6650 | } | |
6651 | ||
6652 | # hashes used to simplify setting whitespace | |
6653 | %tightness = ( | |
6654 | '{' => $rOpts->{'brace-tightness'}, | |
6655 | '}' => $rOpts->{'brace-tightness'}, | |
6656 | '(' => $rOpts->{'paren-tightness'}, | |
6657 | ')' => $rOpts->{'paren-tightness'}, | |
6658 | '[' => $rOpts->{'square-bracket-tightness'}, | |
6659 | ']' => $rOpts->{'square-bracket-tightness'}, | |
6660 | ); | |
6661 | %matching_token = ( | |
6662 | '{' => '}', | |
6663 | '(' => ')', | |
6664 | '[' => ']', | |
6665 | '?' => ':', | |
6666 | ); | |
6667 | ||
6668 | # frequently used parameters | |
6669 | $rOpts_add_newlines = $rOpts->{'add-newlines'}; | |
6670 | $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; | |
6671 | $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'}; | |
6672 | $rOpts_block_brace_vertical_tightness = | |
6673 | $rOpts->{'block-brace-vertical-tightness'}; | |
6674 | $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'}; | |
6675 | $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'}; | |
6676 | $rOpts_break_at_old_trinary_breakpoints = | |
6677 | $rOpts->{'break-at-old-trinary-breakpoints'}; | |
6678 | $rOpts_break_at_old_comma_breakpoints = | |
6679 | $rOpts->{'break-at-old-comma-breakpoints'}; | |
6680 | $rOpts_break_at_old_keyword_breakpoints = | |
6681 | $rOpts->{'break-at-old-keyword-breakpoints'}; | |
6682 | $rOpts_break_at_old_logical_breakpoints = | |
6683 | $rOpts->{'break-at-old-logical-breakpoints'}; | |
6684 | $rOpts_closing_side_comment_else_flag = | |
6685 | $rOpts->{'closing-side-comment-else-flag'}; | |
6686 | $rOpts_closing_side_comment_maximum_text = | |
6687 | $rOpts->{'closing-side-comment-maximum-text'}; | |
6688 | $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; | |
6689 | $rOpts_cuddled_else = $rOpts->{'cuddled-else'}; | |
6690 | $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; | |
6691 | $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; | |
6692 | $rOpts_indent_columns = $rOpts->{'indent-columns'}; | |
6693 | $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; | |
6694 | $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'}; | |
6695 | $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; | |
6696 | $rOpts_short_concatenation_item_length = | |
6697 | $rOpts->{'short-concatenation-item-length'}; | |
6698 | $rOpts_swallow_optional_blank_lines = | |
6699 | $rOpts->{'swallow-optional-blank-lines'}; | |
6700 | $rOpts_ignore_old_line_breaks = $rOpts->{'ignore-old-line-breaks'}; | |
6701 | $half_maximum_line_length = $rOpts_maximum_line_length / 2; | |
6702 | ||
6703 | # Note that both opening and closing tokens can access the opening | |
6704 | # and closing flags of their container types. | |
6705 | %opening_vertical_tightness = ( | |
6706 | '(' => $rOpts->{'paren-vertical-tightness'}, | |
6707 | '{' => $rOpts->{'brace-vertical-tightness'}, | |
6708 | '[' => $rOpts->{'square-bracket-vertical-tightness'}, | |
6709 | ')' => $rOpts->{'paren-vertical-tightness'}, | |
6710 | '}' => $rOpts->{'brace-vertical-tightness'}, | |
6711 | ']' => $rOpts->{'square-bracket-vertical-tightness'}, | |
6712 | ); | |
6713 | ||
6714 | %closing_vertical_tightness = ( | |
6715 | '(' => $rOpts->{'paren-vertical-tightness-closing'}, | |
6716 | '{' => $rOpts->{'brace-vertical-tightness-closing'}, | |
6717 | '[' => $rOpts->{'square-bracket-vertical-tightness-closing'}, | |
6718 | ')' => $rOpts->{'paren-vertical-tightness-closing'}, | |
6719 | '}' => $rOpts->{'brace-vertical-tightness-closing'}, | |
6720 | ']' => $rOpts->{'square-bracket-vertical-tightness-closing'}, | |
6721 | ); | |
6722 | ||
6723 | # assume flag for '>' same as ')' for closing qw quotes | |
6724 | %closing_token_indentation = ( | |
6725 | ')' => $rOpts->{'closing-paren-indentation'}, | |
6726 | '}' => $rOpts->{'closing-brace-indentation'}, | |
6727 | ']' => $rOpts->{'closing-square-bracket-indentation'}, | |
6728 | '>' => $rOpts->{'closing-paren-indentation'}, | |
6729 | ); | |
6730 | } | |
6731 | ||
6732 | sub make_static_block_comment_pattern { | |
6733 | ||
6734 | # create the pattern used to identify static block comments | |
6735 | $static_block_comment_pattern = '^(\s*)##'; | |
6736 | ||
6737 | # allow the user to change it | |
6738 | if ( $rOpts->{'static-block-comment-prefix'} ) { | |
6739 | my $prefix = $rOpts->{'static-block-comment-prefix'}; | |
6740 | $prefix =~ s/^\s*//; | |
6741 | if ( $prefix !~ /^#/ ) { | |
6742 | die "ERROR: the -sbcp prefix '$prefix' must begin with '#'\n"; | |
6743 | ||
6744 | } | |
6745 | my $pattern = '^(\s*)' . $prefix; | |
6746 | eval "'##'=~/$pattern/"; | |
6747 | if ($@) { | |
6748 | die | |
6749 | "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"; | |
6750 | } | |
6751 | $static_block_comment_pattern = $pattern; | |
6752 | } | |
6753 | } | |
6754 | ||
6755 | sub make_closing_side_comment_list_pattern { | |
6756 | ||
6757 | # turn any input list into a regex for recognizing selected block types | |
6758 | $closing_side_comment_list_pattern = '^\w+'; | |
6759 | if ( defined( $rOpts->{'closing-side-comment-list'} ) | |
6760 | && $rOpts->{'closing-side-comment-list'} ) | |
6761 | { | |
6762 | $closing_side_comment_list_pattern = | |
6763 | make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} ); | |
6764 | } | |
6765 | } | |
6766 | ||
6767 | sub make_bli_pattern { | |
6768 | ||
6769 | if ( | |
6770 | defined( | |
6771 | $rOpts->{'brace-left-and-indent-list'} | |
6772 | && $rOpts->{'brace-left-and-indent-list'} | |
6773 | ) | |
6774 | ) | |
6775 | { | |
6776 | $bli_list_string = $rOpts->{'brace-left-and-indent-list'}; | |
6777 | } | |
6778 | ||
6779 | $bli_pattern = make_block_pattern( '-blil', $bli_list_string ); | |
6780 | } | |
6781 | ||
6782 | sub make_block_brace_vertical_tightness_pattern { | |
6783 | ||
6784 | # turn any input list into a regex for recognizing selected block types | |
6785 | $block_brace_vertical_tightness_pattern = | |
6786 | '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; | |
6787 | ||
6788 | if ( | |
6789 | defined( | |
6790 | $rOpts->{'block-brace-vertical-tightness-list'} | |
6791 | && $rOpts->{'block-brace-vertical-tightness-list'} | |
6792 | ) | |
6793 | ) | |
6794 | { | |
6795 | $block_brace_vertical_tightness_pattern = | |
6796 | make_block_pattern( '-bbvtl', | |
6797 | $rOpts->{'block-brace-vertical-tightness-list'} ); | |
6798 | } | |
6799 | } | |
6800 | ||
6801 | sub make_block_pattern { | |
6802 | ||
6803 | # given a string of block-type keywords, return a regex to match them | |
6804 | # The only tricky part is that labels are indicated with a single ':' | |
6805 | # and the 'sub' token text may have additional text after it (name of | |
6806 | # sub). | |
6807 | # | |
6808 | # Example: | |
6809 | # | |
6810 | # input string: "if else elsif unless while for foreach do : sub"; | |
6811 | # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; | |
6812 | ||
6813 | my ( $abbrev, $string ) = @_; | |
6814 | $string =~ s/^\s+//; | |
6815 | $string =~ s/\s+$//; | |
6816 | my @list = split /\s+/, $string; | |
6817 | my @words = (); | |
6818 | my %seen; | |
6819 | for my $i (@list) { | |
6820 | next if $seen{$i}; | |
6821 | $seen{$i} = 1; | |
6822 | if ( $i eq 'sub' ) { | |
6823 | } | |
6824 | elsif ( $i eq ':' ) { | |
6825 | push @words, '\w+:'; | |
6826 | } | |
6827 | elsif ( $i =~ /^\w/ ) { | |
6828 | push @words, $i; | |
6829 | } | |
6830 | else { | |
6831 | warn "unrecognized block type $i after $abbrev, ignoring\n"; | |
6832 | } | |
6833 | } | |
6834 | my $pattern = '(' . join( '|', @words ) . ')$'; | |
6835 | if ( $seen{'sub'} ) { | |
6836 | $pattern = '(' . $pattern . '|sub)'; | |
6837 | } | |
6838 | $pattern = '^' . $pattern; | |
6839 | return $pattern; | |
6840 | } | |
6841 | ||
6842 | sub make_static_side_comment_pattern { | |
6843 | ||
6844 | # create the pattern used to identify static side comments | |
6845 | $static_side_comment_pattern = '^##'; | |
6846 | ||
6847 | # allow the user to change it | |
6848 | if ( $rOpts->{'static-side-comment-prefix'} ) { | |
6849 | my $prefix = $rOpts->{'static-side-comment-prefix'}; | |
6850 | $prefix =~ s/^\s*//; | |
6851 | my $pattern = '^' . $prefix; | |
6852 | eval "'##'=~/$pattern/"; | |
6853 | if ($@) { | |
6854 | die | |
6855 | "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"; | |
6856 | } | |
6857 | $static_side_comment_pattern = $pattern; | |
6858 | } | |
6859 | } | |
6860 | ||
6861 | sub make_closing_side_comment_prefix { | |
6862 | ||
6863 | # Be sure we have a valid closing side comment prefix | |
6864 | my $csc_prefix = $rOpts->{'closing-side-comment-prefix'}; | |
6865 | my $csc_prefix_pattern; | |
6866 | if ( !defined($csc_prefix) ) { | |
6867 | $csc_prefix = '## end'; | |
6868 | $csc_prefix_pattern = '^##\s+end'; | |
6869 | } | |
6870 | else { | |
6871 | my $test_csc_prefix = $csc_prefix; | |
6872 | if ( $test_csc_prefix !~ /^#/ ) { | |
6873 | $test_csc_prefix = '#' . $test_csc_prefix; | |
6874 | } | |
6875 | ||
6876 | # make a regex to recognize the prefix | |
6877 | my $test_csc_prefix_pattern = $test_csc_prefix; | |
6878 | ||
6879 | # escape any special characters | |
6880 | $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g; | |
6881 | ||
6882 | $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern; | |
6883 | ||
6884 | # allow exact number of intermediate spaces to vary | |
6885 | $test_csc_prefix_pattern =~ s/\s+/\\s\+/g; | |
6886 | ||
6887 | # make sure we have a good pattern | |
6888 | # if we fail this we probably have an error in escaping | |
6889 | # characters. | |
6890 | eval "'##'=~/$test_csc_prefix_pattern/"; | |
6891 | if ($@) { | |
6892 | ||
6893 | # shouldn't happen..must have screwed up escaping, above | |
6894 | report_definite_bug(); | |
6895 | warn | |
6896 | "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"; | |
6897 | ||
6898 | # just warn and keep going with defaults | |
6899 | warn "Please consider using a simpler -cscp prefix\n"; | |
6900 | warn "Using default -cscp instead; please check output\n"; | |
6901 | } | |
6902 | else { | |
6903 | $csc_prefix = $test_csc_prefix; | |
6904 | $csc_prefix_pattern = $test_csc_prefix_pattern; | |
6905 | } | |
6906 | } | |
6907 | $rOpts->{'closing-side-comment-prefix'} = $csc_prefix; | |
6908 | $closing_side_comment_prefix_pattern = $csc_prefix_pattern; | |
6909 | } | |
6910 | ||
6911 | sub dump_want_left_space { | |
6912 | my $fh = shift; | |
6913 | local $" = "\n"; | |
6914 | print $fh <<EOM; | |
6915 | These values are the main control of whitespace to the left of a token type; | |
6916 | They may be altered with the -wls parameter. | |
6917 | For a list of token types, use perltidy --dump-token-types (-dtt) | |
6918 | 1 means the token wants a space to its left | |
6919 | -1 means the token does not want a space to its left | |
6920 | ------------------------------------------------------------------------ | |
6921 | EOM | |
6922 | foreach ( sort keys %want_left_space ) { | |
6923 | print $fh "$_\t$want_left_space{$_}\n"; | |
6924 | } | |
6925 | } | |
6926 | ||
6927 | sub dump_want_right_space { | |
6928 | my $fh = shift; | |
6929 | local $" = "\n"; | |
6930 | print $fh <<EOM; | |
6931 | These values are the main control of whitespace to the right of a token type; | |
6932 | They may be altered with the -wrs parameter. | |
6933 | For a list of token types, use perltidy --dump-token-types (-dtt) | |
6934 | 1 means the token wants a space to its right | |
6935 | -1 means the token does not want a space to its right | |
6936 | ------------------------------------------------------------------------ | |
6937 | EOM | |
6938 | foreach ( sort keys %want_right_space ) { | |
6939 | print $fh "$_\t$want_right_space{$_}\n"; | |
6940 | } | |
6941 | } | |
6942 | ||
6943 | { # begin is_essential_whitespace | |
6944 | ||
6945 | my %is_sort_grep_map; | |
6946 | my %is_for_foreach; | |
6947 | ||
6948 | BEGIN { | |
6949 | ||
6950 | @_ = qw(sort grep map); | |
6951 | @is_sort_grep_map{@_} = (1) x scalar(@_); | |
6952 | ||
6953 | @_ = qw(for foreach); | |
6954 | @is_for_foreach{@_} = (1) x scalar(@_); | |
6955 | ||
6956 | } | |
6957 | ||
6958 | sub is_essential_whitespace { | |
6959 | ||
6960 | # Essential whitespace means whitespace which cannot be safely deleted. | |
6961 | # We are given three tokens and their types: | |
6962 | # ($tokenl, $typel) is the token to the left of the space in question | |
6963 | # ($tokenr, $typer) is the token to the right of the space in question | |
6964 | # ($tokenll, $typell) is previous nonblank token to the left of $tokenl | |
6965 | # | |
6966 | # This is a slow routine but is not needed too often except when -mangle | |
6967 | # is used. | |
6968 | my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_; | |
6969 | ||
6970 | # never combine two bare words or numbers | |
6971 | my $result = ( ( $tokenr =~ /^[\'\w]/ ) && ( $tokenl =~ /[\'\w]$/ ) ) | |
6972 | ||
6973 | # do not combine a number with a concatination dot | |
6974 | # example: pom.caputo: | |
6975 | # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n"); | |
6976 | || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) ) | |
6977 | || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) ) | |
6978 | ||
6979 | # do not join a minus with a bare word, because you might form | |
6980 | # a file test operator. Example from Complex.pm: | |
6981 | # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test. | |
6982 | || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) ) | |
6983 | ||
6984 | # and something like this could become ambiguous without space | |
6985 | # after the '-': | |
6986 | # use constant III=>1; | |
6987 | # $a = $b - III; | |
6988 | # and even this: | |
6989 | # $a = - III; | |
6990 | || ( ( $tokenl eq '-' ) | |
6991 | && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) ) | |
6992 | ||
6993 | # '= -' should not become =- or you will get a warning | |
6994 | # about reversed -= | |
6995 | # || ($tokenr eq '-') | |
6996 | ||
6997 | # keep a space between a quote and a bareword to prevent the | |
6998 | # bareword from becomming a quote modifier. | |
6999 | || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) | |
7000 | ||
7001 | # keep a space between a token ending in '$' and any word; | |
7002 | # this caused trouble: "die @$ if $@" | |
7003 | || ( ( $typel eq 'i' && $tokenl =~ /\$$/ ) | |
7004 | && ( $tokenr =~ /^[a-zA-Z_]/ ) ) | |
7005 | ||
7006 | # perl is very fussy about spaces before << | |
7007 | || ( $tokenr =~ /^\<\</ ) | |
7008 | ||
7009 | # avoid combining tokens to create new meanings. Example: | |
7010 | # $a+ +$b must not become $a++$b | |
7011 | || ( $is_digraph{ $tokenl . $tokenr } ) | |
7012 | || ( $is_trigraph{ $tokenl . $tokenr } ) | |
7013 | ||
7014 | # another example: do not combine these two &'s: | |
7015 | # allow_options & &OPT_EXECCGI | |
7016 | || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } ) | |
7017 | ||
7018 | # don't combine $$ or $# with any alphanumeric | |
7019 | # (testfile mangle.t with --mangle) | |
7020 | || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) ) | |
7021 | ||
7022 | # retain any space after possible filehandle | |
7023 | # (testfiles prnterr1.t with --extrude and mangle.t with --mangle) | |
7024 | || ( $typel eq 'Z' || $typell eq 'Z' ) | |
7025 | ||
7026 | # keep paren separate in 'use Foo::Bar ()' | |
7027 | || ( $tokenr eq '(' | |
7028 | && $typel eq 'w' | |
7029 | && $typell eq 'k' | |
7030 | && $tokenll eq 'use' ) | |
7031 | ||
7032 | # keep any space between filehandle and paren: | |
7033 | # file mangle.t with --mangle: | |
7034 | || ( $typel eq 'Y' && $tokenr eq '(' ) | |
7035 | ||
7036 | # retain any space after here doc operator ( hereerr.t) | |
7037 | || ( $typel eq 'h' ) | |
7038 | ||
7039 | # FIXME: this needs some further work; extrude.t has test cases | |
7040 | # it is safest to retain any space after start of ? : operator | |
7041 | # because of perl's quirky parser. | |
7042 | # ie, this line will fail if you remove the space after the '?': | |
7043 | # $b=join $comma ? ',' : ':', @_; # ok | |
7044 | # $b=join $comma ?',' : ':', @_; # error! | |
7045 | # but this is ok :) | |
7046 | # $b=join $comma?',' : ':', @_; # not a problem! | |
7047 | ## || ($typel eq '?') | |
7048 | ||
7049 | # be careful with a space around ++ and --, to avoid ambiguity as to | |
7050 | # which token it applies | |
7051 | || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) ) | |
7052 | || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) ) | |
7053 | ||
7054 | # need space after foreach my; for example, this will fail in | |
7055 | # older versions of Perl: | |
7056 | # foreach my$ft(@filetypes)... | |
7057 | || ( | |
7058 | $tokenl eq 'my' | |
7059 | ||
7060 | # /^(for|foreach)$/ | |
7061 | && $is_for_foreach{$tokenll} && $tokenr =~ /^\$/ | |
7062 | ) | |
7063 | ||
7064 | # must have space between grep and left paren; "grep(" will fail | |
7065 | || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} ) | |
7066 | ||
7067 | # don't stick numbers next to left parens, as in: | |
7068 | #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm) | |
7069 | || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) ) | |
7070 | ||
7071 | # don't join something like: for bla::bla:: abc | |
7072 | # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl | |
7073 | || ( $tokenl =~ /\:\:$/ && ( $tokenr =~ /^[\'\w]/ ) ) | |
7074 | ; # the value of this long logic sequence is the result we want | |
7075 | return $result; | |
7076 | } | |
7077 | } | |
7078 | ||
7079 | sub set_white_space_flag { | |
7080 | ||
7081 | # This routine examines each pair of nonblank tokens and | |
7082 | # sets values for array @white_space_flag. | |
7083 | # | |
7084 | # $white_space_flag[$j] is a flag indicating whether a white space | |
7085 | # BEFORE token $j is needed, with the following values: | |
7086 | # | |
7087 | # -1 do not want a space before token $j | |
7088 | # 0 optional space or $j is a whitespace | |
7089 | # 1 want a space before token $j | |
7090 | # | |
7091 | # | |
7092 | # The values for the first token will be defined based | |
7093 | # upon the contents of the "to_go" output array. | |
7094 | # | |
7095 | # Note: retain debug print statements because they are usually | |
7096 | # required after adding new token types. | |
7097 | ||
7098 | BEGIN { | |
7099 | ||
7100 | # initialize these global hashes, which control the use of | |
7101 | # whitespace around tokens: | |
7102 | # | |
7103 | # %binary_ws_rules | |
7104 | # %want_left_space | |
7105 | # %want_right_space | |
7106 | # %space_after_keyword | |
7107 | # | |
7108 | # Many token types are identical to the tokens themselves. | |
7109 | # See the tokenizer for a complete list. Here are some special types: | |
7110 | # k = perl keyword | |
7111 | # f = semicolon in for statement | |
7112 | # m = unary minus | |
7113 | # p = unary plus | |
7114 | # Note that :: is excluded since it should be contained in an identifier | |
7115 | # Note that '->' is excluded because it never gets space | |
7116 | # parentheses and brackets are excluded since they are handled specially | |
7117 | # curly braces are included but may be overridden by logic, such as | |
7118 | # newline logic. | |
7119 | ||
7120 | # NEW_TOKENS: create a whitespace rule here. This can be as | |
7121 | # simple as adding your new letter to @spaces_both_sides, for | |
7122 | # example. | |
7123 | ||
7124 | @_ = qw" L { ( [ "; | |
7125 | @is_opening_type{@_} = (1) x scalar(@_); | |
7126 | ||
7127 | @_ = qw" R } ) ] "; | |
7128 | @is_closing_type{@_} = (1) x scalar(@_); | |
7129 | ||
7130 | my @spaces_both_sides = qw" | |
7131 | + - * / % ? = . : x < > | & ^ .. << >> ** && .. || => += -= | |
7132 | .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= | |
7133 | &&= ||= <=> A k f w F n C Y U G v | |
7134 | "; | |
7135 | ||
7136 | my @spaces_left_side = qw" | |
7137 | t ! ~ m p { \ h pp mm Z j | |
7138 | "; | |
7139 | push( @spaces_left_side, '#' ); # avoids warning message | |
7140 | ||
7141 | my @spaces_right_side = qw" | |
7142 | ; } ) ] R J ++ -- **= | |
7143 | "; | |
7144 | push( @spaces_right_side, ',' ); # avoids warning message | |
7145 | @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides); | |
7146 | @want_right_space{@spaces_both_sides} = | |
7147 | (1) x scalar(@spaces_both_sides); | |
7148 | @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side); | |
7149 | @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side); | |
7150 | @want_left_space{@spaces_right_side} = | |
7151 | (-1) x scalar(@spaces_right_side); | |
7152 | @want_right_space{@spaces_right_side} = | |
7153 | (1) x scalar(@spaces_right_side); | |
7154 | $want_left_space{'L'} = WS_NO; | |
7155 | $want_left_space{'->'} = WS_NO; | |
7156 | $want_right_space{'->'} = WS_NO; | |
7157 | $want_left_space{'**'} = WS_NO; | |
7158 | $want_right_space{'**'} = WS_NO; | |
7159 | ||
7160 | # hash type information must stay tightly bound | |
7161 | # as in : ${xxxx} | |
7162 | $binary_ws_rules{'i'}{'L'} = WS_NO; | |
7163 | $binary_ws_rules{'i'}{'{'} = WS_YES; | |
7164 | $binary_ws_rules{'k'}{'{'} = WS_YES; | |
7165 | $binary_ws_rules{'U'}{'{'} = WS_YES; | |
7166 | $binary_ws_rules{'i'}{'['} = WS_NO; | |
7167 | $binary_ws_rules{'R'}{'L'} = WS_NO; | |
7168 | $binary_ws_rules{'R'}{'{'} = WS_NO; | |
7169 | $binary_ws_rules{'t'}{'L'} = WS_NO; | |
7170 | $binary_ws_rules{'t'}{'{'} = WS_NO; | |
7171 | $binary_ws_rules{'}'}{'L'} = WS_NO; | |
7172 | $binary_ws_rules{'}'}{'{'} = WS_NO; | |
7173 | $binary_ws_rules{'$'}{'L'} = WS_NO; | |
7174 | $binary_ws_rules{'$'}{'{'} = WS_NO; | |
7175 | $binary_ws_rules{'@'}{'L'} = WS_NO; | |
7176 | $binary_ws_rules{'@'}{'{'} = WS_NO; | |
7177 | $binary_ws_rules{'='}{'L'} = WS_YES; | |
7178 | ||
7179 | # the following includes ') {' | |
7180 | # as in : if ( xxx ) { yyy } | |
7181 | $binary_ws_rules{']'}{'L'} = WS_NO; | |
7182 | $binary_ws_rules{']'}{'{'} = WS_NO; | |
7183 | $binary_ws_rules{')'}{'{'} = WS_YES; | |
7184 | $binary_ws_rules{')'}{'['} = WS_NO; | |
7185 | $binary_ws_rules{']'}{'['} = WS_NO; | |
7186 | $binary_ws_rules{']'}{'{'} = WS_NO; | |
7187 | $binary_ws_rules{'}'}{'['} = WS_NO; | |
7188 | $binary_ws_rules{'R'}{'['} = WS_NO; | |
7189 | ||
7190 | $binary_ws_rules{']'}{'++'} = WS_NO; | |
7191 | $binary_ws_rules{']'}{'--'} = WS_NO; | |
7192 | $binary_ws_rules{')'}{'++'} = WS_NO; | |
7193 | $binary_ws_rules{')'}{'--'} = WS_NO; | |
7194 | ||
7195 | $binary_ws_rules{'R'}{'++'} = WS_NO; | |
7196 | $binary_ws_rules{'R'}{'--'} = WS_NO; | |
7197 | ||
7198 | $binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label | |
7199 | $binary_ws_rules{'w'}{':'} = WS_NO; | |
7200 | $binary_ws_rules{'i'}{'Q'} = WS_YES; | |
7201 | $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()' | |
7202 | ||
7203 | # FIXME: we need to split 'i' into variables and functions | |
7204 | # and have no space for functions but space for variables. For now, | |
7205 | # I have a special patch in the special rules below | |
7206 | $binary_ws_rules{'i'}{'('} = WS_NO; | |
7207 | ||
7208 | $binary_ws_rules{'w'}{'('} = WS_NO; | |
7209 | $binary_ws_rules{'w'}{'{'} = WS_YES; | |
7210 | } | |
7211 | my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_; | |
7212 | my ( $last_token, $last_type, $last_block_type, $token, $type, | |
7213 | $block_type ); | |
7214 | my (@white_space_flag); | |
7215 | my $j_tight_closing_paren = -1; | |
7216 | ||
7217 | if ( $max_index_to_go >= 0 ) { | |
7218 | $token = $tokens_to_go[$max_index_to_go]; | |
7219 | $type = $types_to_go[$max_index_to_go]; | |
7220 | $block_type = $block_type_to_go[$max_index_to_go]; | |
7221 | } | |
7222 | else { | |
7223 | $token = ' '; | |
7224 | $type = 'b'; | |
7225 | $block_type = ''; | |
7226 | } | |
7227 | ||
7228 | # loop over all tokens | |
7229 | my ( $j, $ws ); | |
7230 | ||
7231 | for ( $j = 0 ; $j <= $jmax ; $j++ ) { | |
7232 | ||
7233 | if ( $$rtoken_type[$j] eq 'b' ) { | |
7234 | $white_space_flag[$j] = WS_OPTIONAL; | |
7235 | next; | |
7236 | } | |
7237 | ||
7238 | # set a default value, to be changed as needed | |
7239 | $ws = undef; | |
7240 | $last_token = $token; | |
7241 | $last_type = $type; | |
7242 | $last_block_type = $block_type; | |
7243 | $token = $$rtokens[$j]; | |
7244 | $type = $$rtoken_type[$j]; | |
7245 | $block_type = $$rblock_type[$j]; | |
7246 | ||
7247 | #--------------------------------------------------------------- | |
7248 | # section 1: | |
7249 | # handle space on the inside of opening braces | |
7250 | #--------------------------------------------------------------- | |
7251 | ||
7252 | # /^[L\{\(\[]$/ | |
7253 | if ( $is_opening_type{$last_type} ) { | |
7254 | ||
7255 | $j_tight_closing_paren = -1; | |
7256 | ||
7257 | # let's keep empty matched braces together: () {} [] | |
7258 | # except for BLOCKS | |
7259 | if ( $token eq $matching_token{$last_token} ) { | |
7260 | if ($block_type) { | |
7261 | $ws = WS_YES; | |
7262 | } | |
7263 | else { | |
7264 | $ws = WS_NO; | |
7265 | } | |
7266 | } | |
7267 | else { | |
7268 | ||
7269 | # we're considering the right of an opening brace | |
7270 | # tightness = 0 means always pad inside with space | |
7271 | # tightness = 1 means pad inside if "complex" | |
7272 | # tightness = 2 means never pad inside with space | |
7273 | ||
7274 | my $tightness; | |
7275 | if ( $last_type eq '{' | |
7276 | && $last_token eq '{' | |
7277 | && $last_block_type ) | |
7278 | { | |
7279 | $tightness = $rOpts_block_brace_tightness; | |
7280 | } | |
7281 | else { $tightness = $tightness{$last_token} } | |
7282 | ||
7283 | if ( $tightness <= 0 ) { | |
7284 | $ws = WS_YES; | |
7285 | } | |
7286 | elsif ( $tightness > 1 ) { | |
7287 | $ws = WS_NO; | |
7288 | } | |
7289 | else { | |
7290 | ||
7291 | # Patch to count '-foo' as single token so that | |
7292 | # each of $a{-foo} and $a{foo} and $a{'foo'} do | |
7293 | # not get spaces with default formatting. | |
7294 | my $j_here = $j; | |
7295 | ++$j_here | |
7296 | if ( $token eq '-' | |
7297 | && $last_token eq '{' | |
7298 | && $$rtoken_type[ $j + 1 ] eq 'w' ); | |
7299 | ||
7300 | # $j_next is where a closing token should be if | |
7301 | # the container has a single token | |
7302 | my $j_next = | |
7303 | ( $$rtoken_type[ $j_here + 1 ] eq 'b' ) | |
7304 | ? $j_here + 2 | |
7305 | : $j_here + 1; | |
7306 | my $tok_next = $$rtokens[$j_next]; | |
7307 | my $type_next = $$rtoken_type[$j_next]; | |
7308 | ||
7309 | # for tightness = 1, if there is just one token | |
7310 | # within the matching pair, we will keep it tight | |
7311 | if ( | |
7312 | $tok_next eq $matching_token{$last_token} | |
7313 | ||
7314 | # but watch out for this: [ [ ] (misc.t) | |
7315 | && $last_token ne $token | |
7316 | ) | |
7317 | { | |
7318 | ||
7319 | # remember where to put the space for the closing paren | |
7320 | $j_tight_closing_paren = $j_next; | |
7321 | $ws = WS_NO; | |
7322 | } | |
7323 | else { | |
7324 | $ws = WS_YES; | |
7325 | } | |
7326 | } | |
7327 | } | |
7328 | } # done with opening braces and brackets | |
7329 | my $ws_1 = $ws | |
7330 | if FORMATTER_DEBUG_FLAG_WHITE; | |
7331 | ||
7332 | #--------------------------------------------------------------- | |
7333 | # section 2: | |
7334 | # handle space on inside of closing brace pairs | |
7335 | #--------------------------------------------------------------- | |
7336 | ||
7337 | # /[\}\)\]R]/ | |
7338 | if ( $is_closing_type{$type} ) { | |
7339 | ||
7340 | if ( $j == $j_tight_closing_paren ) { | |
7341 | ||
7342 | $j_tight_closing_paren = -1; | |
7343 | $ws = WS_NO; | |
7344 | } | |
7345 | else { | |
7346 | ||
7347 | if ( !defined($ws) ) { | |
7348 | ||
7349 | my $tightness; | |
7350 | if ( $type eq '}' && $token eq '}' && $block_type ) { | |
7351 | $tightness = $rOpts_block_brace_tightness; | |
7352 | } | |
7353 | else { $tightness = $tightness{$token} } | |
7354 | ||
7355 | $ws = ( $tightness > 1 ) ? WS_NO : WS_YES; | |
7356 | } | |
7357 | } | |
7358 | } | |
7359 | ||
7360 | my $ws_2 = $ws | |
7361 | if FORMATTER_DEBUG_FLAG_WHITE; | |
7362 | ||
7363 | #--------------------------------------------------------------- | |
7364 | # section 3: | |
7365 | # use the binary table | |
7366 | #--------------------------------------------------------------- | |
7367 | if ( !defined($ws) ) { | |
7368 | $ws = $binary_ws_rules{$last_type}{$type}; | |
7369 | } | |
7370 | my $ws_3 = $ws | |
7371 | if FORMATTER_DEBUG_FLAG_WHITE; | |
7372 | ||
7373 | #--------------------------------------------------------------- | |
7374 | # section 4: | |
7375 | # some special cases | |
7376 | #--------------------------------------------------------------- | |
7377 | if ( $token eq '(' ) { | |
7378 | ||
7379 | # This will have to be tweaked as tokenization changes. | |
7380 | # We want a space after certain block types: | |
7381 | # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s ); | |
7382 | # | |
7383 | # But not others: | |
7384 | # &{ $_->[1] } ( delete $_[$#_]{ $_->[0] } ); | |
7385 | # At present, the & block is not marked as a code block, so | |
7386 | # this works: | |
7387 | if ( $last_type eq '}' ) { | |
7388 | ||
7389 | if ( $is_sort_map_grep{$last_block_type} ) { | |
7390 | $ws = WS_YES; | |
7391 | } | |
7392 | else { | |
7393 | $ws = WS_NO; | |
7394 | } | |
7395 | } | |
7396 | ||
7397 | # ----------------------------------------------------- | |
7398 | # 'w' and 'i' checks for something like: | |
7399 | # myfun( &myfun( ->myfun( | |
7400 | # ----------------------------------------------------- | |
7401 | if ( ( $last_type =~ /^[wkU]$/ ) | |
7402 | || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) ) | |
7403 | { | |
7404 | ||
7405 | # Do not introduce new space between keyword or function | |
7406 | # ( except in special cases) because this can | |
7407 | # introduce errors in some cases ( prnterr1.t ) | |
7408 | unless ( $last_type eq 'k' | |
7409 | && $space_after_keyword{$last_token} ) | |
7410 | { | |
7411 | $ws = WS_NO; | |
7412 | } | |
7413 | } | |
7414 | ||
7415 | # space between something like $i and ( in | |
7416 | # for $i ( 0 .. 20 ) { | |
7417 | # FIXME: eventually, type 'i' needs to be split into multiple | |
7418 | # token types so this can be a hardwired rule. | |
7419 | elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { | |
7420 | $ws = WS_YES; | |
7421 | } | |
7422 | ||
7423 | # allow constant function followed by '()' to retain no space | |
7424 | elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) { | |
7425 | ; | |
7426 | $ws = WS_NO; | |
7427 | } | |
7428 | } | |
7429 | ||
7430 | # patch for SWITCH/CASE: make space at ']{' optional | |
7431 | # since the '{' might begin a case or when block | |
7432 | elsif ( $token eq '{' && $last_token eq ']' ) { | |
7433 | $ws = WS_OPTIONAL; | |
7434 | } | |
7435 | ||
7436 | # keep space between 'sub' and '{' for anonymous sub definition | |
7437 | if ( $type eq '{' ) { | |
7438 | if ( $last_token eq 'sub' ) { | |
7439 | $ws = WS_YES; | |
7440 | } | |
7441 | ||
7442 | # this is needed to avoid no space in '){' | |
7443 | if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES } | |
7444 | ||
7445 | # avoid any space before the brace or bracket in something like | |
7446 | # @opts{'a','b',...} | |
7447 | if ( $last_type eq 'i' && $last_token =~ /^\@/ ) { | |
7448 | $ws = WS_NO; | |
7449 | } | |
7450 | } | |
7451 | ||
7452 | elsif ( $type eq 'i' ) { | |
7453 | ||
7454 | # never a space before -> | |
7455 | if ( $token =~ /^\-\>/ ) { | |
7456 | $ws = WS_NO; | |
7457 | } | |
7458 | } | |
7459 | ||
7460 | # retain any space between '-' and bare word | |
7461 | elsif ( $type eq 'w' || $type eq 'C' ) { | |
7462 | $ws = WS_OPTIONAL if $last_type eq '-'; | |
7463 | ||
7464 | # never a space before -> | |
7465 | if ( $token =~ /^\-\>/ ) { | |
7466 | $ws = WS_NO; | |
7467 | } | |
7468 | } | |
7469 | ||
7470 | # retain any space between '-' and bare word | |
7471 | # example: avoid space between 'USER' and '-' here: | |
7472 | # $myhash{USER-NAME}='steve'; | |
7473 | elsif ( $type eq 'm' || $type eq '-' ) { | |
7474 | $ws = WS_OPTIONAL if ( $last_type eq 'w' ); | |
7475 | } | |
7476 | ||
7477 | # always space before side comment | |
7478 | elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 } | |
7479 | ||
7480 | # always preserver whatever space was used after a possible | |
7481 | # filehandle or here doc operator | |
7482 | if ( $type ne '#' && ( $last_type eq 'Z' || $last_type eq 'h' ) ) { | |
7483 | $ws = WS_OPTIONAL; | |
7484 | } | |
7485 | ||
7486 | my $ws_4 = $ws | |
7487 | if FORMATTER_DEBUG_FLAG_WHITE; | |
7488 | ||
7489 | #--------------------------------------------------------------- | |
7490 | # section 5: | |
7491 | # default rules not covered above | |
7492 | #--------------------------------------------------------------- | |
7493 | # if we fall through to here, | |
7494 | # look at the pre-defined hash tables for the two tokens, and | |
7495 | # if (they are equal) use the common value | |
7496 | # if (either is zero or undef) use the other | |
7497 | # if (either is -1) use it | |
7498 | # That is, | |
7499 | # left vs right | |
7500 | # 1 vs 1 --> 1 | |
7501 | # 0 vs 0 --> 0 | |
7502 | # -1 vs -1 --> -1 | |
7503 | # | |
7504 | # 0 vs -1 --> -1 | |
7505 | # 0 vs 1 --> 1 | |
7506 | # 1 vs 0 --> 1 | |
7507 | # -1 vs 0 --> -1 | |
7508 | # | |
7509 | # -1 vs 1 --> -1 | |
7510 | # 1 vs -1 --> -1 | |
7511 | if ( !defined($ws) ) { | |
7512 | my $wl = $want_left_space{$type}; | |
7513 | my $wr = $want_right_space{$last_type}; | |
7514 | if ( !defined($wl) ) { $wl = 0 } | |
7515 | if ( !defined($wr) ) { $wr = 0 } | |
7516 | $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr; | |
7517 | } | |
7518 | ||
7519 | if ( !defined($ws) ) { | |
7520 | $ws = 0; | |
7521 | write_diagnostics( | |
7522 | "WS flag is undefined for tokens $last_token $token\n"); | |
7523 | } | |
7524 | ||
7525 | # Treat newline as a whitespace. Otherwise, we might combine | |
7526 | # 'Send' and '-recipients' here according to the above rules: | |
7527 | # my $msg = new Fax::Send | |
7528 | # -recipients => $to, | |
7529 | # -data => $data; | |
7530 | if ( $ws == 0 && $j == 0 ) { $ws = 1 } | |
7531 | ||
7532 | if ( ( $ws == 0 ) | |
7533 | && $j > 0 | |
7534 | && $j < $jmax | |
7535 | && ( $last_type !~ /^[Zh]$/ ) ) | |
7536 | { | |
7537 | ||
7538 | # If this happens, we have a non-fatal but undesirable | |
7539 | # hole in the above rules which should be patched. | |
7540 | write_diagnostics( | |
7541 | "WS flag is zero for tokens $last_token $token\n"); | |
7542 | } | |
7543 | $white_space_flag[$j] = $ws; | |
7544 | ||
7545 | FORMATTER_DEBUG_FLAG_WHITE && do { | |
7546 | my $str = substr( $last_token, 0, 15 ); | |
7547 | $str .= ' ' x ( 16 - length($str) ); | |
7548 | if ( !defined($ws_1) ) { $ws_1 = "*" } | |
7549 | if ( !defined($ws_2) ) { $ws_2 = "*" } | |
7550 | if ( !defined($ws_3) ) { $ws_3 = "*" } | |
7551 | if ( !defined($ws_4) ) { $ws_4 = "*" } | |
7552 | ||
7553 | "WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; | |
7554 | }; | |
7555 | } | |
7556 | return \@white_space_flag; | |
7557 | } | |
7558 | ||
7559 | { # begin print_line_of_tokens | |
7560 | ||
7561 | my $rtoken_type; | |
7562 | my $rtokens; | |
7563 | my $rlevels; | |
7564 | my $rslevels; | |
7565 | my $rblock_type; | |
7566 | my $rcontainer_type; | |
7567 | my $rcontainer_environment; | |
7568 | my $rtype_sequence; | |
7569 | my $input_line; | |
7570 | my $rnesting_tokens; | |
7571 | my $rci_levels; | |
7572 | my $rnesting_blocks; | |
7573 | ||
7574 | my $in_quote; | |
7575 | my $python_indentation_level; | |
7576 | ||
7577 | # These local token variables are stored by store_token_to_go: | |
7578 | my $block_type; | |
7579 | my $ci_level; | |
7580 | my $container_environment; | |
7581 | my $container_type; | |
7582 | my $in_continued_quote; | |
7583 | my $level; | |
7584 | my $nesting_blocks; | |
7585 | my $no_internal_newlines; | |
7586 | my $slevel; | |
7587 | my $token; | |
7588 | my $type; | |
7589 | my $type_sequence; | |
7590 | ||
7591 | # routine to pull the jth token from the line of tokens | |
7592 | sub extract_token { | |
7593 | my $j = shift; | |
7594 | $token = $$rtokens[$j]; | |
7595 | $type = $$rtoken_type[$j]; | |
7596 | $block_type = $$rblock_type[$j]; | |
7597 | $container_type = $$rcontainer_type[$j]; | |
7598 | $container_environment = $$rcontainer_environment[$j]; | |
7599 | $type_sequence = $$rtype_sequence[$j]; | |
7600 | $level = $$rlevels[$j]; | |
7601 | $slevel = $$rslevels[$j]; | |
7602 | $nesting_blocks = $$rnesting_blocks[$j]; | |
7603 | $ci_level = $$rci_levels[$j]; | |
7604 | } | |
7605 | ||
7606 | { | |
7607 | my @saved_token; | |
7608 | ||
7609 | sub save_current_token { | |
7610 | ||
7611 | @saved_token = ( | |
7612 | $block_type, $ci_level, | |
7613 | $container_environment, $container_type, | |
7614 | $in_continued_quote, $level, | |
7615 | $nesting_blocks, $no_internal_newlines, | |
7616 | $slevel, $token, | |
7617 | $type, $type_sequence, | |
7618 | ); | |
7619 | } | |
7620 | ||
7621 | sub restore_current_token { | |
7622 | ( | |
7623 | $block_type, $ci_level, | |
7624 | $container_environment, $container_type, | |
7625 | $in_continued_quote, $level, | |
7626 | $nesting_blocks, $no_internal_newlines, | |
7627 | $slevel, $token, | |
7628 | $type, $type_sequence, | |
7629 | ) | |
7630 | = @saved_token; | |
7631 | } | |
7632 | } | |
7633 | ||
7634 | # Routine to place the current token into the output stream. | |
7635 | # Called once per output token. | |
7636 | sub store_token_to_go { | |
7637 | ||
7638 | my $flag = $no_internal_newlines; | |
7639 | if ( $_[0] ) { $flag = 1 } | |
7640 | ||
7641 | $tokens_to_go[ ++$max_index_to_go ] = $token; | |
7642 | $types_to_go[$max_index_to_go] = $type; | |
7643 | $nobreak_to_go[$max_index_to_go] = $flag; | |
7644 | $old_breakpoint_to_go[$max_index_to_go] = 0; | |
7645 | $forced_breakpoint_to_go[$max_index_to_go] = 0; | |
7646 | $block_type_to_go[$max_index_to_go] = $block_type; | |
7647 | $type_sequence_to_go[$max_index_to_go] = $type_sequence; | |
7648 | $container_environment_to_go[$max_index_to_go] = $container_environment; | |
7649 | $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks; | |
7650 | $ci_levels_to_go[$max_index_to_go] = $ci_level; | |
7651 | $mate_index_to_go[$max_index_to_go] = -1; | |
7652 | $matching_token_to_go[$max_index_to_go] = ''; | |
7653 | ||
7654 | # Note: negative levels are currently retained as a diagnostic so that | |
7655 | # the 'final indentation level' is correctly reported for bad scripts. | |
7656 | # But this means that every use of $level as an index must be checked. | |
7657 | # If this becomes too much of a problem, we might give up and just clip | |
7658 | # them at zero. | |
7659 | ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0; | |
7660 | $levels_to_go[$max_index_to_go] = $level; | |
7661 | $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0; | |
7662 | $lengths_to_go[ $max_index_to_go + 1 ] = | |
7663 | $lengths_to_go[$max_index_to_go] + length($token); | |
7664 | ||
7665 | # Define the indentation that this token would have if it started | |
7666 | # a new line. We have to do this now because we need to know this | |
7667 | # when considering one-line blocks. | |
7668 | set_leading_whitespace( $level, $ci_level, $in_continued_quote ); | |
7669 | ||
7670 | if ( $type ne 'b' ) { | |
7671 | $last_last_nonblank_index_to_go = $last_nonblank_index_to_go; | |
7672 | $last_last_nonblank_type_to_go = $last_nonblank_type_to_go; | |
7673 | $last_last_nonblank_token_to_go = $last_nonblank_token_to_go; | |
7674 | $last_nonblank_index_to_go = $max_index_to_go; | |
7675 | $last_nonblank_type_to_go = $type; | |
7676 | $last_nonblank_token_to_go = $token; | |
7677 | if ( $type eq ',' ) { | |
7678 | $comma_count_in_batch++; | |
7679 | } | |
7680 | } | |
7681 | ||
7682 | FORMATTER_DEBUG_FLAG_STORE && do { | |
7683 | my ( $a, $b, $c ) = caller(); | |
7684 | ||
7685 | "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n"; | |
7686 | }; | |
7687 | } | |
7688 | ||
7689 | sub insert_new_token_to_go { | |
7690 | ||
7691 | # insert a new token into the output stream. use same level as | |
7692 | # previous token; assumes a character at max_index_to_go. | |
7693 | save_current_token(); | |
7694 | ( $token, $type, $slevel, $no_internal_newlines ) = @_; | |
7695 | ||
7696 | if ( $max_index_to_go == UNDEFINED_INDEX ) { | |
7697 | warning("code bug: bad call to insert_new_token_to_go\n"); | |
7698 | } | |
7699 | $level = $levels_to_go[$max_index_to_go]; | |
7700 | ||
7701 | # FIXME: it seems to be necessary to use the next, rather than | |
7702 | # previous, value of this variable when creating a new blank (align.t) | |
7703 | #my $slevel = $nesting_depth_to_go[$max_index_to_go]; | |
7704 | $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; | |
7705 | $ci_level = $ci_levels_to_go[$max_index_to_go]; | |
7706 | $container_environment = $container_environment_to_go[$max_index_to_go]; | |
7707 | $in_continued_quote = 0; | |
7708 | $block_type = ""; | |
7709 | $type_sequence = ""; | |
7710 | store_token_to_go(); | |
7711 | restore_current_token(); | |
7712 | return; | |
7713 | } | |
7714 | ||
7715 | my %is_until_while_for_if_elsif_else; | |
7716 | ||
7717 | BEGIN { | |
7718 | ||
7719 | # always break after a closing curly of these block types: | |
7720 | @_ = qw(until while for if elsif else); | |
7721 | @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_); | |
7722 | ||
7723 | } | |
7724 | ||
7725 | sub print_line_of_tokens { | |
7726 | ||
7727 | my $line_of_tokens = shift; | |
7728 | ||
7729 | # This routine is called once per input line to process all of | |
7730 | # the tokens on that line. This is the first stage of | |
7731 | # beautification. | |
7732 | # | |
7733 | # Full-line comments and blank lines may be processed immediately. | |
7734 | # | |
7735 | # For normal lines of code, the tokens are stored one-by-one, | |
7736 | # via calls to 'sub store_token_to_go', until a known line break | |
7737 | # point is reached. Then, the batch of collected tokens is | |
7738 | # passed along to 'sub output_line_to_go' for further | |
7739 | # processing. This routine decides if there should be | |
7740 | # whitespace between each pair of non-white tokens, so later | |
7741 | # routines only need to decide on any additional line breaks. | |
7742 | # Any whitespace is initally a single space character. Later, | |
7743 | # the vertical aligner may expand that to be multiple space | |
7744 | # characters if necessary for alignment. | |
7745 | ||
7746 | # extract input line number for error messages | |
7747 | $input_line_number = $line_of_tokens->{_line_number}; | |
7748 | ||
7749 | $rtoken_type = $line_of_tokens->{_rtoken_type}; | |
7750 | $rtokens = $line_of_tokens->{_rtokens}; | |
7751 | $rlevels = $line_of_tokens->{_rlevels}; | |
7752 | $rslevels = $line_of_tokens->{_rslevels}; | |
7753 | $rblock_type = $line_of_tokens->{_rblock_type}; | |
7754 | $rcontainer_type = $line_of_tokens->{_rcontainer_type}; | |
7755 | $rcontainer_environment = $line_of_tokens->{_rcontainer_environment}; | |
7756 | $rtype_sequence = $line_of_tokens->{_rtype_sequence}; | |
7757 | $input_line = $line_of_tokens->{_line_text}; | |
7758 | $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; | |
7759 | $rci_levels = $line_of_tokens->{_rci_levels}; | |
7760 | $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; | |
7761 | ||
7762 | $in_continued_quote = $starting_in_quote = | |
7763 | $line_of_tokens->{_starting_in_quote}; | |
7764 | $in_quote = $line_of_tokens->{_ending_in_quote}; | |
7765 | $python_indentation_level = | |
7766 | $line_of_tokens->{_python_indentation_level}; | |
7767 | ||
7768 | my $j; | |
7769 | my $j_next; | |
7770 | my $jmax; | |
7771 | my $next_nonblank_token; | |
7772 | my $next_nonblank_token_type; | |
7773 | my $rwhite_space_flag; | |
7774 | ||
7775 | $jmax = @$rtokens - 1; | |
7776 | $block_type = ""; | |
7777 | $container_type = ""; | |
7778 | $container_environment = ""; | |
7779 | $type_sequence = ""; | |
7780 | $no_internal_newlines = 1 - $rOpts_add_newlines; | |
7781 | ||
7782 | # Handle a continued quote.. | |
7783 | if ($in_continued_quote) { | |
7784 | ||
7785 | # A line which is entirely a quote or pattern must go out | |
7786 | # verbatim. Note: the \n is contained in $input_line. | |
7787 | if ( $jmax <= 0 ) { | |
7788 | if ( ( $input_line =~ "\t" ) ) { | |
7789 | note_embedded_tab(); | |
7790 | } | |
7791 | write_unindented_line("$input_line"); | |
7792 | $last_line_had_side_comment = 0; | |
7793 | return; | |
7794 | } | |
7795 | ||
7796 | # prior to version 20010406, perltidy had a bug which placed | |
7797 | # continuation indentation before the last line of some multiline | |
7798 | # quotes and patterns -- exactly the lines passing this way. | |
7799 | # To help find affected lines in scripts run with these | |
7800 | # versions, run with '-chk', and it will warn of any quotes or | |
7801 | # patterns which might have been modified by these early | |
7802 | # versions. | |
7803 | if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) { | |
7804 | warning( | |
7805 | "-chk: please check this line for extra leading whitespace\n" | |
7806 | ); | |
7807 | } | |
7808 | } | |
7809 | ||
7810 | # delete trailing blank tokens | |
7811 | if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- } | |
7812 | ||
7813 | # Handle a blank line.. | |
7814 | if ( $jmax < 0 ) { | |
7815 | ||
7816 | # For the 'swallow-optional-blank-lines' option, we delete all | |
7817 | # old blank lines and let the blank line rules generate any | |
7818 | # needed blanks. | |
7819 | if ( !$rOpts_swallow_optional_blank_lines ) { | |
7820 | flush(); | |
7821 | $file_writer_object->write_blank_code_line(); | |
7822 | $last_line_leading_type = 'b'; | |
7823 | } | |
7824 | $last_line_had_side_comment = 0; | |
7825 | return; | |
7826 | } | |
7827 | ||
7828 | # see if this is a static block comment (starts with ##) | |
7829 | my $is_static_block_comment = 0; | |
7830 | my $is_static_block_comment_without_leading_space = 0; | |
7831 | if ( $jmax == 0 | |
7832 | && $$rtoken_type[0] eq '#' | |
7833 | && $rOpts->{'static-block-comments'} | |
7834 | && $input_line =~ /$static_block_comment_pattern/o ) | |
7835 | { | |
7836 | $is_static_block_comment = 1; | |
7837 | $is_static_block_comment_without_leading_space = | |
7838 | ( length($1) <= 0 ); | |
7839 | } | |
7840 | ||
7841 | # create a hanging side comment if appropriate | |
7842 | if ( | |
7843 | $jmax == 0 | |
7844 | && $$rtoken_type[0] eq '#' # only token is a comment | |
7845 | && $last_line_had_side_comment # last line had side comment | |
7846 | && $input_line =~ /^\s/ # there is some leading space | |
7847 | && !$is_static_block_comment # do not make static comment hanging | |
7848 | && $rOpts->{'hanging-side-comments'} # user is allowing this | |
7849 | ) | |
7850 | { | |
7851 | ||
7852 | # We will insert an empty qw string at the start of the token list | |
7853 | # to force this comment to be a side comment. The vertical aligner | |
7854 | # should then line it up with the previous side comment. | |
7855 | unshift @$rtoken_type, 'q'; | |
7856 | unshift @$rtokens, ''; | |
7857 | unshift @$rlevels, $$rlevels[0]; | |
7858 | unshift @$rslevels, $$rslevels[0]; | |
7859 | unshift @$rblock_type, ''; | |
7860 | unshift @$rcontainer_type, ''; | |
7861 | unshift @$rcontainer_environment, ''; | |
7862 | unshift @$rtype_sequence, ''; | |
7863 | unshift @$rnesting_tokens, $$rnesting_tokens[0]; | |
7864 | unshift @$rci_levels, $$rci_levels[0]; | |
7865 | unshift @$rnesting_blocks, $$rnesting_blocks[0]; | |
7866 | $jmax = 1; | |
7867 | } | |
7868 | ||
7869 | # remember if this line has a side comment | |
7870 | $last_line_had_side_comment = | |
7871 | ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' ); | |
7872 | ||
7873 | # Handle a block (full-line) comment.. | |
7874 | if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) { | |
7875 | ||
7876 | if ( $rOpts->{'delete-block-comments'} ) { return } | |
7877 | ||
7878 | if ( $rOpts->{'tee-block-comments'} ) { | |
7879 | $file_writer_object->tee_on(); | |
7880 | } | |
7881 | ||
7882 | destroy_one_line_block(); | |
7883 | output_line_to_go(); | |
7884 | ||
7885 | # output a blank line before block comments | |
7886 | if ( | |
7887 | $last_line_leading_type !~ /^[#b]$/ | |
7888 | && $rOpts->{'blanks-before-comments'} # only if allowed | |
7889 | && ! | |
7890 | $is_static_block_comment # never before static block comments | |
7891 | ) | |
7892 | { | |
7893 | flush(); # switching to new output stream | |
7894 | $file_writer_object->write_blank_code_line(); | |
7895 | $last_line_leading_type = 'b'; | |
7896 | } | |
7897 | ||
7898 | # TRIM COMMENTS -- This could be turned off as a option | |
7899 | $$rtokens[0] =~ s/\s*$//; # trim right end | |
7900 | ||
7901 | if ( | |
7902 | $rOpts->{'indent-block-comments'} | |
7903 | && ( !$rOpts->{'indent-spaced-block-comments'} | |
7904 | || $input_line =~ /^\s+/ ) | |
7905 | && !$is_static_block_comment_without_leading_space | |
7906 | ) | |
7907 | { | |
7908 | extract_token(0); | |
7909 | store_token_to_go(); | |
7910 | output_line_to_go(); | |
7911 | } | |
7912 | else { | |
7913 | flush(); # switching to new output stream | |
7914 | $file_writer_object->write_code_line( $$rtokens[0] . "\n" ); | |
7915 | $last_line_leading_type = '#'; | |
7916 | } | |
7917 | if ( $rOpts->{'tee-block-comments'} ) { | |
7918 | $file_writer_object->tee_off(); | |
7919 | } | |
7920 | return; | |
7921 | } | |
7922 | ||
7923 | # compare input/output indentation except for continuation lines | |
7924 | # (because they have an unknown amount of initial blank space) | |
7925 | # and lines which are quotes (because they may have been outdented) | |
7926 | # Note: this test is placed here because we know the continuation flag | |
7927 | # at this point, which allows us to avoid non-meaningful checks. | |
7928 | my $structural_indentation_level = $$rlevels[0]; | |
7929 | compare_indentation_levels( $python_indentation_level, | |
7930 | $structural_indentation_level ) | |
7931 | unless ( $python_indentation_level < 0 | |
7932 | || ( $$rci_levels[0] > 0 ) | |
7933 | || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' ) | |
7934 | ); | |
7935 | ||
7936 | # Patch needed for MakeMaker. Do not break a statement | |
7937 | # in which $VERSION may be calculated. See MakeMaker.pm; | |
7938 | # this is based on the coding in it. | |
7939 | # The first line of a file that matches this will be eval'd: | |
7940 | # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ | |
7941 | # Examples: | |
7942 | # *VERSION = \'1.01'; | |
7943 | # ( $VERSION ) = '$Revision: 1.46 $ ' =~ /\$Revision:\s+([^\s]+)/; | |
7944 | # We will pass such a line straight through without breaking | |
7945 | # it unless -npvl is used | |
7946 | ||
7947 | my $is_VERSION_statement = 0; | |
7948 | ||
7949 | if ( | |
7950 | !$saw_VERSION_in_this_file | |
7951 | && $input_line =~ /VERSION/ # quick check to reject most lines | |
7952 | && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ | |
7953 | ) | |
7954 | { | |
7955 | $saw_VERSION_in_this_file = 1; | |
7956 | $is_VERSION_statement = 1; | |
7957 | write_logfile_entry("passing VERSION line; -npvl deactivates\n"); | |
7958 | $no_internal_newlines = 1; | |
7959 | } | |
7960 | ||
7961 | # take care of indentation-only | |
7962 | # also write a line which is entirely a 'qw' list | |
7963 | if ( $rOpts->{'indent-only'} | |
7964 | || ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq 'q' ) ) ) | |
7965 | { | |
7966 | flush(); | |
7967 | $input_line =~ s/^\s*//; # trim left end | |
7968 | $input_line =~ s/\s*$//; # trim right end | |
7969 | ||
7970 | extract_token(0); | |
7971 | $token = $input_line; | |
7972 | $type = 'q'; | |
7973 | $block_type = ""; | |
7974 | $container_type = ""; | |
7975 | $container_environment = ""; | |
7976 | $type_sequence = ""; | |
7977 | store_token_to_go(); | |
7978 | output_line_to_go(); | |
7979 | return; | |
7980 | } | |
7981 | ||
7982 | push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding | |
7983 | push( @$rtoken_type, 'b', 'b' ); | |
7984 | ($rwhite_space_flag) = | |
7985 | set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type ); | |
7986 | ||
7987 | # find input tabbing to allow checks for tabbing disagreement | |
7988 | ## not used for now | |
7989 | ##$input_line_tabbing = ""; | |
7990 | ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; } | |
7991 | ||
7992 | # if the buffer hasn't been flushed, add a leading space if | |
7993 | # necessary to keep essential whitespace. This is really only | |
7994 | # necessary if we are squeezing out all ws. | |
7995 | if ( $max_index_to_go >= 0 ) { | |
7996 | ||
7997 | $old_line_count_in_batch++; | |
7998 | ||
7999 | if ( | |
8000 | is_essential_whitespace( | |
8001 | $last_last_nonblank_token, | |
8002 | $last_last_nonblank_type, | |
8003 | $tokens_to_go[$max_index_to_go], | |
8004 | $types_to_go[$max_index_to_go], | |
8005 | $$rtokens[0], | |
8006 | $$rtoken_type[0] | |
8007 | ) | |
8008 | ) | |
8009 | { | |
8010 | my $slevel = $$rslevels[0]; | |
8011 | insert_new_token_to_go( ' ', 'b', $slevel, | |
8012 | $no_internal_newlines ); | |
8013 | } | |
8014 | } | |
8015 | ||
8016 | # If we just saw the end of an elsif block, write nag message | |
8017 | # if we do not see another elseif or an else. | |
8018 | if ($looking_for_else) { | |
8019 | ||
8020 | unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) { | |
8021 | write_logfile_entry("(No else block)\n"); | |
8022 | } | |
8023 | $looking_for_else = 0; | |
8024 | } | |
8025 | ||
8026 | # This is a good place to kill incomplete one-line blocks | |
8027 | if ( ( $semicolons_before_block_self_destruct == 0 ) | |
8028 | && ( $max_index_to_go >= 0 ) | |
8029 | && ( $types_to_go[$max_index_to_go] eq ';' ) | |
8030 | && ( $$rtokens[0] ne '}' ) ) | |
8031 | { | |
8032 | destroy_one_line_block(); | |
8033 | output_line_to_go(); | |
8034 | } | |
8035 | ||
8036 | # loop to process the tokens one-by-one | |
8037 | $type = 'b'; | |
8038 | $token = ""; | |
8039 | ||
8040 | foreach $j ( 0 .. $jmax ) { | |
8041 | ||
8042 | # pull out the local values for this token | |
8043 | extract_token($j); | |
8044 | ||
8045 | if ( $type eq '#' ) { | |
8046 | ||
8047 | # trim trailing whitespace | |
8048 | # (there is no option at present to prevent this) | |
8049 | $token =~ s/\s*$//; | |
8050 | ||
8051 | if ( | |
8052 | $rOpts->{'delete-side-comments'} | |
8053 | ||
8054 | # delete closing side comments if necessary | |
8055 | || ( $rOpts->{'delete-closing-side-comments'} | |
8056 | && $token =~ /$closing_side_comment_prefix_pattern/o | |
8057 | && $last_nonblank_block_type =~ | |
8058 | /$closing_side_comment_list_pattern/o ) | |
8059 | ) | |
8060 | { | |
8061 | if ( $types_to_go[$max_index_to_go] eq 'b' ) { | |
8062 | unstore_token_to_go(); | |
8063 | } | |
8064 | last; | |
8065 | } | |
8066 | } | |
8067 | ||
8068 | # If we are continuing after seeing a right curly brace, flush | |
8069 | # buffer unless we see what we are looking for, as in | |
8070 | # } else ... | |
8071 | if ( $rbrace_follower && $type ne 'b' ) { | |
8072 | ||
8073 | unless ( $rbrace_follower->{$token} ) { | |
8074 | output_line_to_go(); | |
8075 | } | |
8076 | $rbrace_follower = undef; | |
8077 | } | |
8078 | ||
8079 | $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1; | |
8080 | $next_nonblank_token = $$rtokens[$j_next]; | |
8081 | $next_nonblank_token_type = $$rtoken_type[$j_next]; | |
8082 | ||
8083 | #-------------------------------------------------------- | |
8084 | # Start of section to patch token text | |
8085 | #-------------------------------------------------------- | |
8086 | ||
8087 | # Modify certain tokens here for whitespace | |
8088 | # The following is not yet done, but could be: | |
8089 | # sub (x x x) | |
8090 | if ( $type =~ /^[wit]$/ ) { | |
8091 | ||
8092 | # Examples: | |
8093 | # change '$ var' to '$var' etc | |
8094 | # '-> new' to '->new' | |
8095 | if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) { | |
8096 | $token =~ s/\s*//g; | |
8097 | } | |
8098 | ||
8099 | if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g } | |
8100 | } | |
8101 | ||
8102 | # change 'LABEL :' to 'LABEL:' | |
8103 | elsif ( $type eq 'J' ) { $token =~ s/\s+//g } | |
8104 | ||
8105 | # patch to add space to something like "x10" | |
8106 | # This avoids having to split this token in the pre-tokenizer | |
8107 | elsif ( $type eq 'n' ) { | |
8108 | if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / } | |
8109 | } | |
8110 | ||
8111 | elsif ( $type eq 'Q' ) { | |
8112 | note_embedded_tab() if ( $token =~ "\t" ); | |
8113 | ||
8114 | # make note of something like '$var = s/xxx/yyy/;' | |
8115 | # in case it should have been '$var =~ s/xxx/yyy/;' | |
8116 | if ( | |
8117 | $token =~ /^(s|tr|y|m|\/)/ | |
8118 | && $last_nonblank_token =~ /^(=|==|!=)$/ | |
8119 | ||
8120 | # precededed by simple scalar | |
8121 | && $last_last_nonblank_type eq 'i' | |
8122 | && $last_last_nonblank_token =~ /^\$/ | |
8123 | ||
8124 | # followed by some kind of termination | |
8125 | # (but give complaint if we can's see far enough ahead) | |
8126 | && $next_nonblank_token =~ /^[; \)\}]$/ | |
8127 | ||
8128 | # scalar is not decleared | |
8129 | && !( | |
8130 | $types_to_go[0] eq 'k' | |
8131 | && $tokens_to_go[0] =~ /^(my|our|local)$/ | |
8132 | ) | |
8133 | ) | |
8134 | { | |
8135 | my $guess = substr( $last_nonblank_token, 0, 1 ) . '~'; | |
8136 | complain( | |
8137 | "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n" | |
8138 | ); | |
8139 | } | |
8140 | } | |
8141 | ||
8142 | # trim blanks from right of qw quotes | |
8143 | # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this) | |
8144 | elsif ( $type eq 'q' ) { | |
8145 | $token =~ s/\s*$//; | |
8146 | note_embedded_tab() if ( $token =~ "\t" ); | |
8147 | } | |
8148 | ||
8149 | #-------------------------------------------------------- | |
8150 | # End of section to patch token text | |
8151 | #-------------------------------------------------------- | |
8152 | ||
8153 | # insert any needed whitespace | |
8154 | if ( ( $type ne 'b' ) | |
8155 | && ( $max_index_to_go >= 0 ) | |
8156 | && ( $types_to_go[$max_index_to_go] ne 'b' ) | |
8157 | && $rOpts_add_whitespace ) | |
8158 | { | |
8159 | my $ws = $$rwhite_space_flag[$j]; | |
8160 | ||
8161 | if ( $ws == 1 ) { | |
8162 | insert_new_token_to_go( ' ', 'b', $slevel, | |
8163 | $no_internal_newlines ); | |
8164 | } | |
8165 | } | |
8166 | ||
8167 | # Do not allow breaks which would promote a side comment to a | |
8168 | # block comment. In order to allow a break before an opening | |
8169 | # or closing BLOCK, followed by a side comment, those sections | |
8170 | # of code will handle this flag separately. | |
8171 | my $side_comment_follows = ( $next_nonblank_token_type eq '#' ); | |
8172 | my $is_opening_BLOCK = | |
8173 | ( $type eq '{' | |
8174 | && $token eq '{' | |
8175 | && $block_type | |
8176 | && $block_type ne 't' ); | |
8177 | my $is_closing_BLOCK = | |
8178 | ( $type eq '}' | |
8179 | && $token eq '}' | |
8180 | && $block_type | |
8181 | && $block_type ne 't' ); | |
8182 | ||
8183 | if ( $side_comment_follows | |
8184 | && !$is_opening_BLOCK | |
8185 | && !$is_closing_BLOCK ) | |
8186 | { | |
8187 | $no_internal_newlines = 1; | |
8188 | } | |
8189 | ||
8190 | # We're only going to handle breaking for code BLOCKS at this | |
8191 | # (top) level. Other indentation breaks will be handled by | |
8192 | # sub scan_list, which is better suited to dealing with them. | |
8193 | if ($is_opening_BLOCK) { | |
8194 | ||
8195 | # Tentatively output this token. This is required before | |
8196 | # calling starting_one_line_block. We may have to unstore | |
8197 | # it, though, if we have to break before it. | |
8198 | store_token_to_go($side_comment_follows); | |
8199 | ||
8200 | # Look ahead to see if we might form a one-line block | |
8201 | my $too_long = | |
8202 | starting_one_line_block( $j, $jmax, $level, $slevel, | |
8203 | $ci_level, $rtokens, $rtoken_type, $rblock_type ); | |
8204 | clear_breakpoint_undo_stack(); | |
8205 | ||
8206 | # to simplify the logic below, set a flag to indicate if | |
8207 | # this opening brace is far from the keyword which introduces it | |
8208 | my $keyword_on_same_line = 1; | |
8209 | if ( ( $max_index_to_go >= 0 ) | |
8210 | && ( $last_nonblank_type eq ')' ) ) | |
8211 | { | |
8212 | if ( $block_type =~ /^(if|else|elsif)$/ | |
8213 | && ( $tokens_to_go[0] eq '}' ) | |
8214 | && $rOpts_cuddled_else ) | |
8215 | { | |
8216 | $keyword_on_same_line = 1; | |
8217 | } | |
8218 | elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) | |
8219 | { | |
8220 | $keyword_on_same_line = 0; | |
8221 | } | |
8222 | } | |
8223 | ||
8224 | # decide if user requested break before '{' | |
8225 | my $want_break = | |
8226 | ||
8227 | # use -bl flag if not a sub block of any type | |
8228 | $block_type !~ /^sub/ | |
8229 | ? $rOpts->{'opening-brace-on-new-line'} | |
8230 | ||
8231 | # use -sbl flag unless this is an anonymous sub block | |
8232 | : $block_type !~ /^sub\W*$/ | |
8233 | ? $rOpts->{'opening-sub-brace-on-new-line'} | |
8234 | ||
8235 | # do not break for anonymous subs | |
8236 | : 0; | |
8237 | ||
8238 | # Break before an opening '{' ... | |
8239 | if ( | |
8240 | ||
8241 | # if requested | |
8242 | $want_break | |
8243 | ||
8244 | # and we were unable to start looking for a block, | |
8245 | && $index_start_one_line_block == UNDEFINED_INDEX | |
8246 | ||
8247 | # or if it will not be on same line as its keyword, so that | |
8248 | # it will be outdented (eval.t, overload.t), and the user | |
8249 | # has not insisted on keeping it on the right | |
8250 | || ( !$keyword_on_same_line | |
8251 | && !$rOpts->{'opening-brace-always-on-right'} ) | |
8252 | ||
8253 | ) | |
8254 | { | |
8255 | ||
8256 | # but only if allowed | |
8257 | unless ($no_internal_newlines) { | |
8258 | ||
8259 | # since we already stored this token, we must unstore it | |
8260 | unstore_token_to_go(); | |
8261 | ||
8262 | # then output the line | |
8263 | output_line_to_go(); | |
8264 | ||
8265 | # and now store this token at the start of a new line | |
8266 | store_token_to_go($side_comment_follows); | |
8267 | } | |
8268 | } | |
8269 | ||
8270 | # Now update for side comment | |
8271 | if ($side_comment_follows) { $no_internal_newlines = 1 } | |
8272 | ||
8273 | # now output this line | |
8274 | unless ($no_internal_newlines) { | |
8275 | output_line_to_go(); | |
8276 | } | |
8277 | } | |
8278 | ||
8279 | elsif ($is_closing_BLOCK) { | |
8280 | ||
8281 | # If there is a pending one-line block .. | |
8282 | if ( $index_start_one_line_block != UNDEFINED_INDEX ) { | |
8283 | ||
8284 | # we have to terminate it if.. | |
8285 | if ( | |
8286 | ||
8287 | # it is too long (final length may be different from | |
8288 | # initial estimate). note: must allow 1 space for this token | |
8289 | excess_line_length( $index_start_one_line_block, | |
8290 | $max_index_to_go ) >= 0 | |
8291 | ||
8292 | # or if it has too many semicolons | |
8293 | || ( $semicolons_before_block_self_destruct == 0 | |
8294 | && $last_nonblank_type ne ';' ) | |
8295 | ) | |
8296 | { | |
8297 | destroy_one_line_block(); | |
8298 | } | |
8299 | } | |
8300 | ||
8301 | # put a break before this closing curly brace if appropriate | |
8302 | unless ( $no_internal_newlines | |
8303 | || $index_start_one_line_block != UNDEFINED_INDEX ) | |
8304 | { | |
8305 | ||
8306 | # add missing semicolon if ... | |
8307 | # there are some tokens | |
8308 | if ( | |
8309 | ( $max_index_to_go > 0 ) | |
8310 | ||
8311 | # and we don't have one | |
8312 | && ( $last_nonblank_type ne ';' ) | |
8313 | ||
8314 | # patch until some block type issues are fixed: | |
8315 | # Do not add semi-colon for block types '{', | |
8316 | # '}', and ';' because we cannot be sure yet | |
8317 | # that this is a block and not an anonomyous | |
8318 | # hash (blktype.t, blktype1.t) | |
8319 | && ( $block_type !~ /^[\{\};]$/ ) | |
8320 | ||
8321 | # it seems best not to add semicolons in these | |
8322 | # special block types: sort|map|grep | |
8323 | && ( !$is_sort_map_grep{$block_type} ) | |
8324 | ||
8325 | # and we are allowed to do so. | |
8326 | && $rOpts->{'add-semicolons'} | |
8327 | ) | |
8328 | { | |
8329 | ||
8330 | save_current_token(); | |
8331 | $token = ';'; | |
8332 | $type = ';'; | |
8333 | $level = $levels_to_go[$max_index_to_go]; | |
8334 | $slevel = $nesting_depth_to_go[$max_index_to_go]; | |
8335 | $nesting_blocks = | |
8336 | $nesting_blocks_to_go[$max_index_to_go]; | |
8337 | $ci_level = $ci_levels_to_go[$max_index_to_go]; | |
8338 | $block_type = ""; | |
8339 | $container_type = ""; | |
8340 | $container_environment = ""; | |
8341 | $type_sequence = ""; | |
8342 | ||
8343 | # Note - we remove any blank AFTER extracting its | |
8344 | # parameters such as level, etc, above | |
8345 | if ( $types_to_go[$max_index_to_go] eq 'b' ) { | |
8346 | unstore_token_to_go(); | |
8347 | } | |
8348 | store_token_to_go(); | |
8349 | ||
8350 | note_added_semicolon(); | |
8351 | restore_current_token(); | |
8352 | } | |
8353 | ||
8354 | # then write out everything before this closing curly brace | |
8355 | output_line_to_go(); | |
8356 | ||
8357 | } | |
8358 | ||
8359 | # Now update for side comment | |
8360 | if ($side_comment_follows) { $no_internal_newlines = 1 } | |
8361 | ||
8362 | # store the closing curly brace | |
8363 | store_token_to_go(); | |
8364 | ||
8365 | # ok, we just stored a closing curly brace. Often, but | |
8366 | # not always, we want to end the line immediately. | |
8367 | # So now we have to check for special cases. | |
8368 | ||
8369 | # if this '}' successfully ends a one-line block.. | |
8370 | my $is_one_line_block = 0; | |
8371 | my $keep_going = 0; | |
8372 | if ( $index_start_one_line_block != UNDEFINED_INDEX ) { | |
8373 | ||
8374 | # Remember the type of token just before the | |
8375 | # opening brace. It would be more general to use | |
8376 | # a stack, but this will work for one-line blocks. | |
8377 | $is_one_line_block = | |
8378 | $types_to_go[$index_start_one_line_block]; | |
8379 | ||
8380 | # we have to actually make it by removing tentative | |
8381 | # breaks that were set within it | |
8382 | undo_forced_breakpoint_stack(0); | |
8383 | set_nobreaks( $index_start_one_line_block, | |
8384 | $max_index_to_go - 1 ); | |
8385 | ||
8386 | # then re-initialize for the next one-line block | |
8387 | destroy_one_line_block(); | |
8388 | ||
8389 | # then decide if we want to break after the '}' .. | |
8390 | # We will keep going to allow certain brace followers as in: | |
8391 | # do { $ifclosed = 1; last } unless $losing; | |
8392 | # | |
8393 | # But make a line break if the curly ends a | |
8394 | # significant block: | |
8395 | if ( $is_until_while_for_if_elsif_else{$block_type} ) { | |
8396 | output_line_to_go() unless ($no_internal_newlines); | |
8397 | } | |
8398 | } | |
8399 | ||
8400 | # set string indicating what we need to look for brace follower | |
8401 | # tokens | |
8402 | if ( $block_type eq 'do' ) { | |
8403 | $rbrace_follower = \%is_do_follower; | |
8404 | } | |
8405 | elsif ( $block_type =~ /^(if|elsif|unless)$/ ) { | |
8406 | $rbrace_follower = \%is_if_brace_follower; | |
8407 | } | |
8408 | elsif ( $block_type eq 'else' ) { | |
8409 | $rbrace_follower = \%is_else_brace_follower; | |
8410 | } | |
8411 | ||
8412 | # added eval for borris.t | |
8413 | elsif ($is_sort_map_grep_eval{$block_type} | |
8414 | || $is_one_line_block eq 'G' ) | |
8415 | { | |
8416 | $rbrace_follower = undef; | |
8417 | $keep_going = 1; | |
8418 | } | |
8419 | ||
8420 | # anonymous sub | |
8421 | elsif ( $block_type =~ /^sub\W*$/ ) { | |
8422 | ||
8423 | if ($is_one_line_block) { | |
8424 | $rbrace_follower = \%is_anon_sub_1_brace_follower; | |
8425 | } | |
8426 | else { | |
8427 | $rbrace_follower = \%is_anon_sub_brace_follower; | |
8428 | } | |
8429 | } | |
8430 | ||
8431 | # TESTING ONLY for SWITCH/CASE - this is where to start | |
8432 | # recoding to retain else's on the same line as a case, | |
8433 | # but there is a lot more that would need to be done. | |
8434 | ##elsif ($block_type eq 'case') {$rbrace_follower = {else=>1};} | |
8435 | ||
8436 | # None of the above: specify what can follow a closing | |
8437 | # brace of a block which is not an | |
8438 | # if/elsif/else/do/sort/map/grep/eval | |
8439 | # Testfiles: | |
8440 | # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t | |
8441 | else { | |
8442 | $rbrace_follower = \%is_other_brace_follower; | |
8443 | } | |
8444 | ||
8445 | # See if an elsif block is followed by another elsif or else; | |
8446 | # complain if not. | |
8447 | if ( $block_type eq 'elsif' ) { | |
8448 | ||
8449 | if ( $next_nonblank_token_type eq 'b' ) { # end of line? | |
8450 | $looking_for_else = 1; # ok, check on next line | |
8451 | } | |
8452 | else { | |
8453 | ||
8454 | unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) { | |
8455 | write_logfile_entry("No else block :(\n"); | |
8456 | } | |
8457 | } | |
8458 | } | |
8459 | ||
8460 | # keep going after certain block types (map,sort,grep,eval) | |
8461 | # added eval for borris.t | |
8462 | if ($keep_going) { | |
8463 | ||
8464 | # keep going | |
8465 | } | |
8466 | ||
8467 | # if no more tokens, postpone decision until re-entring | |
8468 | elsif ( ( $next_nonblank_token_type eq 'b' ) | |
8469 | && $rOpts_add_newlines ) | |
8470 | { | |
8471 | unless ($rbrace_follower) { | |
8472 | output_line_to_go() unless ($no_internal_newlines); | |
8473 | } | |
8474 | } | |
8475 | ||
8476 | elsif ($rbrace_follower) { | |
8477 | ||
8478 | unless ( $rbrace_follower->{$next_nonblank_token} ) { | |
8479 | output_line_to_go() unless ($no_internal_newlines); | |
8480 | } | |
8481 | $rbrace_follower = undef; | |
8482 | } | |
8483 | ||
8484 | else { | |
8485 | output_line_to_go() unless ($no_internal_newlines); | |
8486 | } | |
8487 | ||
8488 | } # end treatment of closing block token | |
8489 | ||
8490 | # handle semicolon | |
8491 | elsif ( $type eq ';' ) { | |
8492 | ||
8493 | # kill one-line blocks with too many semicolons | |
8494 | $semicolons_before_block_self_destruct--; | |
8495 | if ( | |
8496 | ( $semicolons_before_block_self_destruct < 0 ) | |
8497 | || ( $semicolons_before_block_self_destruct == 0 | |
8498 | && $next_nonblank_token_type !~ /^[b\}]$/ ) | |
8499 | ) | |
8500 | { | |
8501 | destroy_one_line_block(); | |
8502 | } | |
8503 | ||
8504 | # Remove unnecessary semicolons, but not after bare | |
8505 | # blocks, where it could be unsafe if the brace is | |
8506 | # mistokenized. | |
8507 | if ( | |
8508 | ( | |
8509 | $last_nonblank_token eq '}' | |
8510 | && ( | |
8511 | $is_block_without_semicolon{ | |
8512 | $last_nonblank_block_type} | |
8513 | || $last_nonblank_block_type =~ /^sub\s+\w/ | |
8514 | || $last_nonblank_block_type =~ /^\w+:$/ ) | |
8515 | ) | |
8516 | || $last_nonblank_type eq ';' | |
8517 | ) | |
8518 | { | |
8519 | ||
8520 | if ( | |
8521 | $rOpts->{'delete-semicolons'} | |
8522 | ||
8523 | # don't delete ; before a # because it would promote it | |
8524 | # to a block comment | |
8525 | && ( $next_nonblank_token_type ne '#' ) | |
8526 | ) | |
8527 | { | |
8528 | note_deleted_semicolon(); | |
8529 | output_line_to_go() | |
8530 | unless ( $no_internal_newlines | |
8531 | || $index_start_one_line_block != UNDEFINED_INDEX ); | |
8532 | next; | |
8533 | } | |
8534 | else { | |
8535 | write_logfile_entry("Extra ';'\n"); | |
8536 | } | |
8537 | } | |
8538 | store_token_to_go(); | |
8539 | ||
8540 | output_line_to_go() | |
8541 | unless ( $no_internal_newlines | |
8542 | || ( $next_nonblank_token eq '}' ) ); | |
8543 | ||
8544 | } | |
8545 | ||
8546 | # handle here_doc target string | |
8547 | elsif ( $type eq 'h' ) { | |
8548 | $no_internal_newlines = | |
8549 | 1; # no newlines after seeing here-target | |
8550 | destroy_one_line_block(); | |
8551 | store_token_to_go(); | |
8552 | } | |
8553 | ||
8554 | # handle all other token types | |
8555 | else { | |
8556 | ||
8557 | # if this is a blank... | |
8558 | if ( $type eq 'b' ) { | |
8559 | ||
8560 | # make it just one character | |
8561 | $token = ' ' if $rOpts_add_whitespace; | |
8562 | ||
8563 | # delete it if unwanted by whitespace rules | |
8564 | # or we are deleting all whitespace | |
8565 | my $ws = $$rwhite_space_flag[ $j + 1 ]; | |
8566 | if ( ( defined($ws) && $ws == -1 ) | |
8567 | || $rOpts_delete_old_whitespace ) | |
8568 | { | |
8569 | ||
8570 | # unless it might make a syntax error | |
8571 | next | |
8572 | unless is_essential_whitespace( | |
8573 | $last_last_nonblank_token, | |
8574 | $last_last_nonblank_type, | |
8575 | $tokens_to_go[$max_index_to_go], | |
8576 | $types_to_go[$max_index_to_go], | |
8577 | $$rtokens[ $j + 1 ], | |
8578 | $$rtoken_type[ $j + 1 ] | |
8579 | ); | |
8580 | } | |
8581 | } | |
8582 | store_token_to_go(); | |
8583 | } | |
8584 | ||
8585 | # remember two previous nonblank OUTPUT tokens | |
8586 | if ( $type ne '#' && $type ne 'b' ) { | |
8587 | $last_last_nonblank_token = $last_nonblank_token; | |
8588 | $last_last_nonblank_type = $last_nonblank_type; | |
8589 | $last_nonblank_token = $token; | |
8590 | $last_nonblank_type = $type; | |
8591 | $last_nonblank_block_type = $block_type; | |
8592 | } | |
8593 | ||
8594 | # unset the continued-quote flag since it only applies to the | |
8595 | # first token, and we want to resume normal formatting if | |
8596 | # there are additional tokens on the line | |
8597 | $in_continued_quote = 0; | |
8598 | ||
8599 | } # end of loop over all tokens in this 'line_of_tokens' | |
8600 | ||
8601 | # we have to flush .. | |
8602 | if ( | |
8603 | ||
8604 | # if there is a side comment | |
8605 | ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} ) | |
8606 | ||
8607 | # if this line which ends in a quote | |
8608 | || $in_quote | |
8609 | ||
8610 | # if this is a VERSION statement | |
8611 | || $is_VERSION_statement | |
8612 | ||
8613 | # to keep a label on one line if that is how it is now | |
8614 | || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) ) | |
8615 | ||
8616 | # if we are instructed to keep all old line breaks | |
8617 | || !$rOpts->{'delete-old-newlines'} | |
8618 | ) | |
8619 | { | |
8620 | destroy_one_line_block(); | |
8621 | output_line_to_go(); | |
8622 | } | |
8623 | ||
8624 | # mark old line breakpoints in current output stream | |
8625 | if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_line_breaks ) { | |
8626 | $old_breakpoint_to_go[$max_index_to_go] = 1; | |
8627 | } | |
8628 | } | |
8629 | } # end print_line_of_tokens | |
8630 | ||
8631 | sub note_added_semicolon { | |
8632 | $last_added_semicolon_at = $input_line_number; | |
8633 | if ( $added_semicolon_count == 0 ) { | |
8634 | $first_added_semicolon_at = $last_added_semicolon_at; | |
8635 | } | |
8636 | $added_semicolon_count++; | |
8637 | write_logfile_entry("Added ';' here\n"); | |
8638 | } | |
8639 | ||
8640 | sub note_deleted_semicolon { | |
8641 | $last_deleted_semicolon_at = $input_line_number; | |
8642 | if ( $deleted_semicolon_count == 0 ) { | |
8643 | $first_deleted_semicolon_at = $last_deleted_semicolon_at; | |
8644 | } | |
8645 | $deleted_semicolon_count++; | |
8646 | write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;) | |
8647 | } | |
8648 | ||
8649 | sub note_embedded_tab { | |
8650 | $embedded_tab_count++; | |
8651 | $last_embedded_tab_at = $input_line_number; | |
8652 | if ( !$first_embedded_tab_at ) { | |
8653 | $first_embedded_tab_at = $last_embedded_tab_at; | |
8654 | } | |
8655 | ||
8656 | if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) { | |
8657 | write_logfile_entry("Embedded tabs in quote or pattern\n"); | |
8658 | } | |
8659 | } | |
8660 | ||
8661 | sub starting_one_line_block { | |
8662 | ||
8663 | # after seeing an opening curly brace, look for the closing brace | |
8664 | # and see if the entire block will fit on a line. This routine is | |
8665 | # not always right because it uses the old whitespace, so a check | |
8666 | # is made later (at the closing brace) to make sure we really | |
8667 | # have a one-line block. We have to do this preliminary check, | |
8668 | # though, because otherwise we would always break at a semicolon | |
8669 | # within a one-line block if the block contains multiple statements. | |
8670 | ||
8671 | my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type, | |
8672 | $rblock_type ) | |
8673 | = @_; | |
8674 | ||
8675 | # kill any current block - we can only go 1 deep | |
8676 | destroy_one_line_block(); | |
8677 | ||
8678 | # return value: | |
8679 | # 1=distance from start of block to opening brace exceeds line length | |
8680 | # 0=otherwise | |
8681 | ||
8682 | my $i_start = 0; | |
8683 | ||
8684 | # shouldn't happen: there must have been a prior call to | |
8685 | # store_token_to_go to put the opening brace in the output stream | |
8686 | if ( $max_index_to_go < 0 ) { | |
8687 | warning("program bug: store_token_to_go called incorrectly\n"); | |
8688 | report_definite_bug(); | |
8689 | } | |
8690 | else { | |
8691 | ||
8692 | # cannot use one-line blocks with cuddled else else/elsif lines | |
8693 | if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) { | |
8694 | return 0; | |
8695 | } | |
8696 | } | |
8697 | ||
8698 | my $block_type = $$rblock_type[$j]; | |
8699 | ||
8700 | # find the starting keyword for this block (such as 'if', 'else', ...) | |
8701 | ||
8702 | if ( $block_type =~ /^[\{\}\;\:]$/ ) { | |
8703 | $i_start = $max_index_to_go; | |
8704 | } | |
8705 | ||
8706 | elsif ( $last_last_nonblank_token_to_go eq ')' ) { | |
8707 | ||
8708 | # For something like "if (xxx) {", the keyword "if" will be | |
8709 | # just after the most recent break. This will be 0 unless | |
8710 | # we have just killed a one-line block and are starting another. | |
8711 | # (doif.t) | |
8712 | $i_start = $index_max_forced_break + 1; | |
8713 | if ( $types_to_go[$i_start] eq 'b' ) { | |
8714 | $i_start++; | |
8715 | } | |
8716 | ||
8717 | unless ( $tokens_to_go[$i_start] eq $block_type ) { | |
8718 | return 0; | |
8719 | } | |
8720 | } | |
8721 | ||
8722 | # the previous nonblank token should start these block types | |
8723 | elsif ( | |
8724 | ( $last_last_nonblank_token_to_go eq $block_type ) | |
8725 | || ( $block_type =~ /^sub/ | |
8726 | && $last_last_nonblank_token_to_go =~ /^sub/ ) | |
8727 | ) | |
8728 | { | |
8729 | $i_start = $last_last_nonblank_index_to_go; | |
8730 | } | |
8731 | ||
8732 | # patch for SWITCH/CASE to retain one-line case/when blocks | |
8733 | elsif ( $block_type eq 'case' || $block_type eq 'when' ) { | |
8734 | $i_start = $index_max_forced_break + 1; | |
8735 | if ( $types_to_go[$i_start] eq 'b' ) { | |
8736 | $i_start++; | |
8737 | } | |
8738 | unless ( $tokens_to_go[$i_start] eq $block_type ) { | |
8739 | return 0; | |
8740 | } | |
8741 | } | |
8742 | ||
8743 | else { | |
8744 | return 1; | |
8745 | } | |
8746 | ||
8747 | my $pos = total_line_length( $i_start, $max_index_to_go ) - 1; | |
8748 | ||
8749 | my $i; | |
8750 | ||
8751 | # see if length is too long to even start | |
8752 | if ( $pos > $rOpts_maximum_line_length ) { | |
8753 | return 1; | |
8754 | } | |
8755 | ||
8756 | for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) { | |
8757 | ||
8758 | # old whitespace could be arbitrarily large, so don't use it | |
8759 | if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 } | |
8760 | else { $pos += length( $$rtokens[$i] ) } | |
8761 | ||
8762 | # Return false result if we exceed the maximum line length, | |
8763 | if ( $pos > $rOpts_maximum_line_length ) { | |
8764 | return 0; | |
8765 | } | |
8766 | ||
8767 | # or encounter another opening brace before finding the closing brace. | |
8768 | elsif ($$rtokens[$i] eq '{' | |
8769 | && $$rtoken_type[$i] eq '{' | |
8770 | && $$rblock_type[$i] ) | |
8771 | { | |
8772 | return 0; | |
8773 | } | |
8774 | ||
8775 | # if we find our closing brace.. | |
8776 | elsif ($$rtokens[$i] eq '}' | |
8777 | && $$rtoken_type[$i] eq '}' | |
8778 | && $$rblock_type[$i] ) | |
8779 | { | |
8780 | ||
8781 | # be sure any trailing comment also fits on the line | |
8782 | my $i_nonblank = | |
8783 | ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1; | |
8784 | ||
8785 | if ( $$rtoken_type[$i_nonblank] eq '#' ) { | |
8786 | $pos += length( $$rtokens[$i_nonblank] ); | |
8787 | ||
8788 | if ( $i_nonblank > $i + 1 ) { | |
8789 | $pos += length( $$rtokens[ $i + 1 ] ); | |
8790 | } | |
8791 | ||
8792 | if ( $pos > $rOpts_maximum_line_length ) { | |
8793 | return 0; | |
8794 | } | |
8795 | } | |
8796 | ||
8797 | # ok, it's a one-line block | |
8798 | create_one_line_block( $i_start, 20 ); | |
8799 | return 0; | |
8800 | } | |
8801 | ||
8802 | # just keep going for other characters | |
8803 | else { | |
8804 | } | |
8805 | } | |
8806 | ||
8807 | # Allow certain types of new one-line blocks to form by joining | |
8808 | # input lines. These can be safely done, but for other block types, | |
8809 | # we keep old one-line blocks but do not form new ones. It is not | |
8810 | # always a good idea to make as many one-line blocks as possible, | |
8811 | # so other types are not done. The user can always use -mangle. | |
8812 | if ( $is_sort_map_grep_eval{$block_type} ) { | |
8813 | create_one_line_block( $i_start, 1 ); | |
8814 | } | |
8815 | ||
8816 | return 0; | |
8817 | } | |
8818 | ||
8819 | sub unstore_token_to_go { | |
8820 | ||
8821 | # remove most recent token from output stream | |
8822 | if ( $max_index_to_go > 0 ) { | |
8823 | $max_index_to_go--; | |
8824 | } | |
8825 | else { | |
8826 | $max_index_to_go = UNDEFINED_INDEX; | |
8827 | } | |
8828 | ||
8829 | } | |
8830 | ||
8831 | sub want_blank_line { | |
8832 | flush(); | |
8833 | $file_writer_object->want_blank_line(); | |
8834 | } | |
8835 | ||
8836 | sub write_unindented_line { | |
8837 | flush(); | |
8838 | $file_writer_object->write_line( $_[0] ); | |
8839 | } | |
8840 | ||
8841 | sub undo_lp_ci { | |
8842 | ||
8843 | # If there is a single, long parameter within parens, like this: | |
8844 | # | |
8845 | # $self->command( "/msg " | |
8846 | # . $infoline->chan | |
8847 | # . " You said $1, but did you know that it's square was " | |
8848 | # . $1 * $1 . " ?" ); | |
8849 | # | |
8850 | # we can remove the continuation indentation of the 2nd and higher lines | |
8851 | # to achieve this effect, which is more pleasing: | |
8852 | # | |
8853 | # $self->command("/msg " | |
8854 | # . $infoline->chan | |
8855 | # . " You said $1, but did you know that it's square was " | |
8856 | # . $1 * $1 . " ?"); | |
8857 | ||
8858 | my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_; | |
8859 | my $max_line = @$ri_first - 1; | |
8860 | ||
8861 | # must be multiple lines | |
8862 | return unless $max_line > $line_open; | |
8863 | ||
8864 | my $lev_start = $levels_to_go[$i_start]; | |
8865 | my $ci_start_plus = 1 + $ci_levels_to_go[$i_start]; | |
8866 | ||
8867 | # see if all additional lines in this container have continuation | |
8868 | # indentation | |
8869 | my $n; | |
8870 | my $line_1 = 1 + $line_open; | |
8871 | for ( $n = $line_1 ; $n <= $max_line ; ++$n ) { | |
8872 | my $ibeg = $$ri_first[$n]; | |
8873 | my $iend = $$ri_last[$n]; | |
8874 | if ( $ibeg eq $closing_index ) { $n--; last } | |
8875 | return if ( $lev_start != $levels_to_go[$ibeg] ); | |
8876 | return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] ); | |
8877 | last if ( $closing_index <= $iend ); | |
8878 | } | |
8879 | ||
8880 | # we can reduce the indentation of all continuation lines | |
8881 | my $continuation_line_count = $n - $line_open; | |
8882 | @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] = | |
8883 | (0) x ($continuation_line_count); | |
8884 | @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] = | |
8885 | @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ]; | |
8886 | } | |
8887 | ||
8888 | { | |
8889 | ||
8890 | # Identify certain operators which often occur in chains. | |
8891 | # We will try to improve alignment when these lead a line. | |
8892 | my %is_chain_operator; | |
8893 | ||
8894 | BEGIN { | |
8895 | @_ = qw(&& || and or : ? .); | |
8896 | @is_chain_operator{@_} = (1) x scalar(@_); | |
8897 | } | |
8898 | ||
8899 | sub set_logical_padding { | |
8900 | ||
8901 | # Look at a batch of lines and see if extra padding can improve the | |
8902 | # alignment when there are certain leading operators. Here is an | |
8903 | # example, in which some extra space is introduced before | |
8904 | # '( $year' to make it line up with the subsequent lines: | |
8905 | # | |
8906 | # if ( ( $Year < 1601 ) | |
8907 | # || ( $Year > 2899 ) | |
8908 | # || ( $EndYear < 1601 ) | |
8909 | # || ( $EndYear > 2899 ) ) | |
8910 | # { | |
8911 | # &Error_OutOfRange; | |
8912 | # } | |
8913 | # | |
8914 | my ( $ri_first, $ri_last ) = @_; | |
8915 | my $max_line = @$ri_first - 1; | |
8916 | ||
8917 | my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, | |
8918 | $pad_spaces, $tok_next, $has_leading_op_next, $has_leading_op ); | |
8919 | ||
8920 | # looking at each line of this batch.. | |
8921 | foreach $line ( 0 .. $max_line - 1 ) { | |
8922 | ||
8923 | # see if the next line begins with a logical operator | |
8924 | $ibeg = $$ri_first[$line]; | |
8925 | $iend = $$ri_last[$line]; | |
8926 | $ibeg_next = $$ri_first[ $line + 1 ]; | |
8927 | $tok_next = $tokens_to_go[$ibeg_next]; | |
8928 | $has_leading_op_next = $is_chain_operator{$tok_next}; | |
8929 | next unless ($has_leading_op_next); | |
8930 | ||
8931 | # next line must not be at lesser depth | |
8932 | next | |
8933 | if ( $nesting_depth_to_go[$ibeg] > | |
8934 | $nesting_depth_to_go[$ibeg_next] ); | |
8935 | ||
8936 | # identify the token in this line to be padded on the left | |
8937 | $ipad = undef; | |
8938 | ||
8939 | # handle lines at same depth... | |
8940 | if ( $nesting_depth_to_go[$ibeg] == | |
8941 | $nesting_depth_to_go[$ibeg_next] ) | |
8942 | { | |
8943 | ||
8944 | # if this is not first line of the batch ... | |
8945 | if ( $line > 0 ) { | |
8946 | ||
8947 | # and we have leading operator | |
8948 | next if $has_leading_op; | |
8949 | ||
8950 | # and .. | |
8951 | # 1. the previous line is at lesser depth, or | |
8952 | # 2. the previous line ends in an assignment | |
8953 | # | |
8954 | # Example 1: previous line at lesser depth | |
8955 | # if ( ( $Year < 1601 ) # <- we are here but | |
8956 | # || ( $Year > 2899 ) # list has not yet | |
8957 | # || ( $EndYear < 1601 ) # collapsed vertically | |
8958 | # || ( $EndYear > 2899 ) ) | |
8959 | # { | |
8960 | # | |
8961 | # Example 2: previous line ending in assignment: | |
8962 | # $leapyear = | |
8963 | # $year % 4 ? 0 # <- We are here | |
8964 | # : $year % 100 ? 1 | |
8965 | # : $year % 400 ? 0 | |
8966 | # : 1; | |
8967 | next | |
8968 | unless ( | |
8969 | $is_assignment{ $types_to_go[$iendm] } | |
8970 | || ( $nesting_depth_to_go[$ibegm] < | |
8971 | $nesting_depth_to_go[$ibeg] ) | |
8972 | ); | |
8973 | ||
8974 | # we will add padding before the first token | |
8975 | $ipad = $ibeg; | |
8976 | } | |
8977 | ||
8978 | # for first line of the batch.. | |
8979 | else { | |
8980 | ||
8981 | # WARNING: Never indent if first line is starting in a | |
8982 | # continued quote, which would change the quote. | |
8983 | next if $starting_in_quote; | |
8984 | ||
8985 | # if this is text after closing '}' | |
8986 | # then look for an interior token to pad | |
8987 | if ( $types_to_go[$ibeg] eq '}' ) { | |
8988 | ||
8989 | } | |
8990 | ||
8991 | # otherwise, we might pad if it looks really good | |
8992 | else { | |
8993 | ||
8994 | # we might pad token $ibeg, so be sure that it | |
8995 | # is at the same depth as the next line. | |
8996 | next | |
8997 | if ( $nesting_depth_to_go[ $ibeg + 1 ] != | |
8998 | $nesting_depth_to_go[$ibeg_next] ); | |
8999 | ||
9000 | # We can pad on line 1 of a statement if at least 3 | |
9001 | # lines will be aligned. Otherwise, it | |
9002 | # can look very confusing. | |
9003 | if ( $max_line > 2 ) { | |
9004 | my $leading_token = $tokens_to_go[$ibeg_next]; | |
9005 | ||
9006 | # never indent line 1 of a '.' series because | |
9007 | # previous line is most likely at same level. | |
9008 | # TODO: we should also look at the leasing_spaces | |
9009 | # of the last output line and skip if it is same | |
9010 | # as this line. | |
9011 | next if ( $leading_token eq '.' ); | |
9012 | ||
9013 | my $count = 1; | |
9014 | foreach my $l ( 2 .. 3 ) { | |
9015 | my $ibeg_next_next = $$ri_first[ $line + $l ]; | |
9016 | next | |
9017 | unless $tokens_to_go[$ibeg_next_next] eq | |
9018 | $leading_token; | |
9019 | $count++; | |
9020 | } | |
9021 | next unless $count == 3; | |
9022 | $ipad = $ibeg; | |
9023 | } | |
9024 | else { | |
9025 | next; | |
9026 | } | |
9027 | } | |
9028 | } | |
9029 | } | |
9030 | ||
9031 | # find interior token to pad if necessary | |
9032 | if ( !defined($ipad) ) { | |
9033 | ||
9034 | for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) { | |
9035 | ||
9036 | # find any unclosed container | |
9037 | next | |
9038 | unless ( $type_sequence_to_go[$i] | |
9039 | && $mate_index_to_go[$i] > $iend ); | |
9040 | ||
9041 | # find next nonblank token to pad | |
9042 | $ipad = $i + 1; | |
9043 | if ( $types_to_go[$ipad] eq 'b' ) { | |
9044 | $ipad++; | |
9045 | last if ( $ipad > $iend ); | |
9046 | } | |
9047 | } | |
9048 | last unless $ipad; | |
9049 | } | |
9050 | ||
9051 | # next line must not be at greater depth | |
9052 | my $iend_next = $$ri_last[ $line + 1 ]; | |
9053 | next | |
9054 | if ( $nesting_depth_to_go[ $iend_next + 1 ] > | |
9055 | $nesting_depth_to_go[$ipad] ); | |
9056 | ||
9057 | # lines must be somewhat similar to be padded.. | |
9058 | my $inext_next = $ibeg_next + 1; | |
9059 | if ( $types_to_go[$inext_next] eq 'b' ) { | |
9060 | $inext_next++; | |
9061 | } | |
9062 | my $type = $types_to_go[$ipad]; | |
9063 | ||
9064 | # see if there are multiple continuation lines | |
9065 | my $logical_continuation_lines = 1; | |
9066 | if ( $line + 2 <= $max_line ) { | |
9067 | my $leading_token = $tokens_to_go[$ibeg_next]; | |
9068 | my $ibeg_next_next = $$ri_first[ $line + 2 ]; | |
9069 | if ( $tokens_to_go[$ibeg_next_next] eq $leading_token | |
9070 | && $nesting_depth_to_go[$ibeg_next] eq | |
9071 | $nesting_depth_to_go[$ibeg_next_next] ) | |
9072 | { | |
9073 | $logical_continuation_lines++; | |
9074 | } | |
9075 | } | |
9076 | if ( | |
9077 | ||
9078 | # either we have multiple continuation lines to follow | |
9079 | # and we are not padding the first token | |
9080 | ( $logical_continuation_lines > 1 && $ipad > 0 ) | |
9081 | ||
9082 | # or.. | |
9083 | || ( | |
9084 | ||
9085 | # types must match | |
9086 | $types_to_go[$inext_next] eq $type | |
9087 | ||
9088 | # and keywords must match if keyword | |
9089 | && !( | |
9090 | $type eq 'k' | |
9091 | && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next] | |
9092 | ) | |
9093 | ) | |
9094 | ) | |
9095 | { | |
9096 | ||
9097 | #----------------------begin special check--------------- | |
9098 | # | |
9099 | # One more check is needed before we can make the pad. | |
9100 | # If we are in a list with some long items, we want each | |
9101 | # item to stand out. So in the following example, the | |
9102 | # first line begining with '$casefold->' would look good | |
9103 | # padded to align with the next line, but then it | |
9104 | # would be indented more than the last line, so we | |
9105 | # won't do it. | |
9106 | # | |
9107 | # ok( | |
9108 | # $casefold->{code} eq '0041' | |
9109 | # && $casefold->{status} eq 'C' | |
9110 | # && $casefold->{mapping} eq '0061', | |
9111 | # 'casefold 0x41' | |
9112 | # ); | |
9113 | # | |
9114 | # Note: | |
9115 | # It would be faster, and almost as good, to use a comma | |
9116 | # count, and not pad if comma_count > 1 and the previous | |
9117 | # line did not end with a comma. | |
9118 | # | |
9119 | my $ok_to_pad = 1; | |
9120 | ||
9121 | my $ibg = $$ri_first[ $line + 1 ]; | |
9122 | my $depth = $nesting_depth_to_go[ $ibg + 1 ]; | |
9123 | ||
9124 | # just use simplified formula for leading spaces to avoid | |
9125 | # needless sub calls | |
9126 | my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg]; | |
9127 | ||
9128 | # look at each line beyond the next .. | |
9129 | my $l = $line + 1; | |
9130 | foreach $l ( $line + 2 .. $max_line ) { | |
9131 | my $ibg = $$ri_first[$l]; | |
9132 | ||
9133 | # quit looking at the end of this container | |
9134 | last | |
9135 | if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth ) | |
9136 | || ( $nesting_depth_to_go[$ibg] < $depth ); | |
9137 | ||
9138 | # cannot do the pad if a later line would be | |
9139 | # outdented more | |
9140 | if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) { | |
9141 | $ok_to_pad = 0; | |
9142 | last; | |
9143 | } | |
9144 | } | |
9145 | ||
9146 | # don't pad if we end in a broken list | |
9147 | if ( $l == $max_line ) { | |
9148 | my $i2 = $$ri_last[$l]; | |
9149 | if ( $types_to_go[$i2] eq '#' ) { | |
9150 | my $i1 = $$ri_first[$l]; | |
9151 | next | |
9152 | if ( | |
9153 | terminal_type( \@types_to_go, \@block_type_to_go, | |
9154 | $i1, $i2 ) eq ',' | |
9155 | ); | |
9156 | } | |
9157 | } | |
9158 | next unless $ok_to_pad; | |
9159 | ||
9160 | #----------------------end special check--------------- | |
9161 | ||
9162 | my $length_1 = total_line_length( $ibeg, $ipad - 1 ); | |
9163 | my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 ); | |
9164 | $pad_spaces = $length_2 - $length_1; | |
9165 | ||
9166 | # make sure this won't change if -lp is used | |
9167 | my $indentation_1 = $leading_spaces_to_go[$ibeg]; | |
9168 | if ( ref($indentation_1) ) { | |
9169 | if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) { | |
9170 | my $indentation_2 = $leading_spaces_to_go[$ibeg_next]; | |
9171 | unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) | |
9172 | { | |
9173 | $pad_spaces = 0; | |
9174 | } | |
9175 | } | |
9176 | } | |
9177 | ||
9178 | # we might be able to handle a pad of -1 by removing a blank | |
9179 | # token | |
9180 | if ( $pad_spaces < 0 ) { | |
9181 | if ( $pad_spaces == -1 ) { | |
9182 | if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) | |
9183 | { | |
9184 | $tokens_to_go[ $ipad - 1 ] = ''; | |
9185 | } | |
9186 | } | |
9187 | $pad_spaces = 0; | |
9188 | } | |
9189 | ||
9190 | # now apply any padding for alignment | |
9191 | if ( $ipad >= 0 && $pad_spaces ) { | |
9192 | my $length_t = total_line_length( $ibeg, $iend ); | |
9193 | if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) | |
9194 | { | |
9195 | $tokens_to_go[$ipad] = | |
9196 | ' ' x $pad_spaces . $tokens_to_go[$ipad]; | |
9197 | } | |
9198 | } | |
9199 | } | |
9200 | } | |
9201 | continue { | |
9202 | $iendm = $iend; | |
9203 | $ibegm = $ibeg; | |
9204 | $has_leading_op = $has_leading_op_next; | |
9205 | } # end of loop over lines | |
9206 | return; | |
9207 | } | |
9208 | } | |
9209 | ||
9210 | sub correct_lp_indentation { | |
9211 | ||
9212 | # When the -lp option is used, we need to make a last pass through | |
9213 | # each line to correct the indentation positions in case they differ | |
9214 | # from the predictions. This is necessary because perltidy uses a | |
9215 | # predictor/corrector method for aligning with opening parens. The | |
9216 | # predictor is usually good, but sometimes stumbles. The corrector | |
9217 | # tries to patch things up once the actual opening paren locations | |
9218 | # are known. | |
9219 | my ( $ri_first, $ri_last ) = @_; | |
9220 | my $do_not_pad = 0; | |
9221 | ||
9222 | # Note on flag '$do_not_pad': | |
9223 | # We want to avoid a situation like this, where the aligner inserts | |
9224 | # whitespace before the '=' to align it with a previous '=', because | |
9225 | # otherwise the parens might become mis-aligned in a situation like | |
9226 | # this, where the '=' has become aligned with the previous line, | |
9227 | # pushing the opening '(' forward beyond where we want it. | |
9228 | # | |
9229 | # $mkFloor::currentRoom = ''; | |
9230 | # $mkFloor::c_entry = $c->Entry( | |
9231 | # -width => '10', | |
9232 | # -relief => 'sunken', | |
9233 | # ... | |
9234 | # ); | |
9235 | # | |
9236 | # We leave it to the aligner to decide how to do this. | |
9237 | ||
9238 | # first remove continuation indentation if appropriate | |
9239 | my $max_line = @$ri_first - 1; | |
9240 | ||
9241 | # looking at each line of this batch.. | |
9242 | my ( $ibeg, $iend ); | |
9243 | my $line; | |
9244 | foreach $line ( 0 .. $max_line ) { | |
9245 | $ibeg = $$ri_first[$line]; | |
9246 | $iend = $$ri_last[$line]; | |
9247 | ||
9248 | # looking at each token in this output line.. | |
9249 | my $i; | |
9250 | foreach $i ( $ibeg .. $iend ) { | |
9251 | ||
9252 | # How many space characters to place before this token | |
9253 | # for special alignment. Actual padding is done in the | |
9254 | # continue block. | |
9255 | ||
9256 | # looking for next unvisited indentation item | |
9257 | my $indentation = $leading_spaces_to_go[$i]; | |
9258 | if ( !$indentation->get_MARKED() ) { | |
9259 | $indentation->set_MARKED(1); | |
9260 | ||
9261 | # looking for indentation item for which we are aligning | |
9262 | # with parens, braces, and brackets | |
9263 | next unless ( $indentation->get_ALIGN_PAREN() ); | |
9264 | ||
9265 | # skip closed container on this line | |
9266 | if ( $i > $ibeg ) { | |
9267 | my $im = $i - 1; | |
9268 | if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- } | |
9269 | if ( $type_sequence_to_go[$im] | |
9270 | && $mate_index_to_go[$im] <= $iend ) | |
9271 | { | |
9272 | next; | |
9273 | } | |
9274 | } | |
9275 | ||
9276 | if ( $line == 1 && $i == $ibeg ) { | |
9277 | $do_not_pad = 1; | |
9278 | } | |
9279 | ||
9280 | # Ok, let's see what the error is and try to fix it | |
9281 | my $actual_pos; | |
9282 | my $predicted_pos = $indentation->get_SPACES(); | |
9283 | if ( $i > $ibeg ) { | |
9284 | ||
9285 | # token is mid-line - use length to previous token | |
9286 | $actual_pos = total_line_length( $ibeg, $i - 1 ); | |
9287 | ||
9288 | # for mid-line token, we must check to see if all | |
9289 | # additional lines have continuation indentation, | |
9290 | # and remove it if so. Otherwise, we do not get | |
9291 | # good alignment. | |
9292 | my $closing_index = $indentation->get_CLOSED(); | |
9293 | if ( $closing_index > $iend ) { | |
9294 | my $ibeg_next = $$ri_first[ $line + 1 ]; | |
9295 | if ( $ci_levels_to_go[$ibeg_next] > 0 ) { | |
9296 | undo_lp_ci( $line, $i, $closing_index, $ri_first, | |
9297 | $ri_last ); | |
9298 | } | |
9299 | } | |
9300 | } | |
9301 | elsif ( $line > 0 ) { | |
9302 | ||
9303 | # handle case where token starts a new line; | |
9304 | # use length of previous line | |
9305 | my $ibegm = $$ri_first[ $line - 1 ]; | |
9306 | my $iendm = $$ri_last[ $line - 1 ]; | |
9307 | $actual_pos = total_line_length( $ibegm, $iendm ); | |
9308 | ||
9309 | # follow -pt style | |
9310 | ++$actual_pos | |
9311 | if ( $types_to_go[ $iendm + 1 ] eq 'b' ); | |
9312 | } | |
9313 | else { | |
9314 | ||
9315 | # token is first character of first line of batch | |
9316 | $actual_pos = $predicted_pos; | |
9317 | } | |
9318 | ||
9319 | my $move_right = $actual_pos - $predicted_pos; | |
9320 | ||
9321 | # done if no error to correct (gnu2.t) | |
9322 | if ( $move_right == 0 ) { | |
9323 | $indentation->set_RECOVERABLE_SPACES($move_right); | |
9324 | next; | |
9325 | } | |
9326 | ||
9327 | # if we have not seen closure for this indentation in | |
9328 | # this batch, we can only pass on a request to the | |
9329 | # vertical aligner | |
9330 | my $closing_index = $indentation->get_CLOSED(); | |
9331 | ||
9332 | if ( $closing_index < 0 ) { | |
9333 | $indentation->set_RECOVERABLE_SPACES($move_right); | |
9334 | next; | |
9335 | } | |
9336 | ||
9337 | # If necessary, look ahead to see if there is really any | |
9338 | # leading whitespace dependent on this whitespace, and | |
9339 | # also find the longest line using this whitespace. | |
9340 | # Since it is always safe to move left if there are no | |
9341 | # dependents, we only need to do this if we may have | |
9342 | # dependent nodes or need to move right. | |
9343 | ||
9344 | my $right_margin = 0; | |
9345 | my $have_child = $indentation->get_HAVE_CHILD(); | |
9346 | ||
9347 | my %saw_indentation; | |
9348 | my $line_count = 1; | |
9349 | $saw_indentation{$indentation} = $indentation; | |
9350 | ||
9351 | if ( $have_child || $move_right > 0 ) { | |
9352 | $have_child = 0; | |
9353 | my $max_length = 0; | |
9354 | if ( $i == $ibeg ) { | |
9355 | $max_length = total_line_length( $ibeg, $iend ); | |
9356 | } | |
9357 | ||
9358 | # look ahead at the rest of the lines of this batch.. | |
9359 | my $line_t; | |
9360 | foreach $line_t ( $line + 1 .. $max_line ) { | |
9361 | my $ibeg_t = $$ri_first[$line_t]; | |
9362 | my $iend_t = $$ri_last[$line_t]; | |
9363 | last if ( $closing_index <= $ibeg_t ); | |
9364 | ||
9365 | # remember all different indentation objects | |
9366 | my $indentation_t = $leading_spaces_to_go[$ibeg_t]; | |
9367 | $saw_indentation{$indentation_t} = $indentation_t; | |
9368 | $line_count++; | |
9369 | ||
9370 | # remember longest line in the group | |
9371 | my $length_t = total_line_length( $ibeg_t, $iend_t ); | |
9372 | if ( $length_t > $max_length ) { | |
9373 | $max_length = $length_t; | |
9374 | } | |
9375 | } | |
9376 | $right_margin = $rOpts_maximum_line_length - $max_length; | |
9377 | if ( $right_margin < 0 ) { $right_margin = 0 } | |
9378 | } | |
9379 | ||
9380 | my $first_line_comma_count = | |
9381 | grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ]; | |
9382 | my $comma_count = $indentation->get_COMMA_COUNT(); | |
9383 | my $arrow_count = $indentation->get_ARROW_COUNT(); | |
9384 | ||
9385 | # This is a simple approximate test for vertical alignment: | |
9386 | # if we broke just after an opening paren, brace, bracket, | |
9387 | # and there are 2 or more commas in the first line, | |
9388 | # and there are no '=>'s, | |
9389 | # then we are probably vertically aligned. We could set | |
9390 | # an exact flag in sub scan_list, but this is good | |
9391 | # enough. | |
9392 | my $indentation_count = keys %saw_indentation; | |
9393 | my $is_vertically_aligned = | |
9394 | ( $i == $ibeg | |
9395 | && $first_line_comma_count > 1 | |
9396 | && $indentation_count == 1 | |
9397 | && ( $arrow_count == 0 || $arrow_count == $line_count ) ); | |
9398 | ||
9399 | # Make the move if possible .. | |
9400 | if ( | |
9401 | ||
9402 | # we can always move left | |
9403 | $move_right < 0 | |
9404 | ||
9405 | # but we should only move right if we are sure it will | |
9406 | # not spoil vertical alignment | |
9407 | || ( $comma_count == 0 ) | |
9408 | || ( $comma_count > 0 && !$is_vertically_aligned ) | |
9409 | ) | |
9410 | { | |
9411 | my $move = | |
9412 | ( $move_right <= $right_margin ) | |
9413 | ? $move_right | |
9414 | : $right_margin; | |
9415 | ||
9416 | foreach ( keys %saw_indentation ) { | |
9417 | $saw_indentation{$_} | |
9418 | ->permanently_decrease_AVAILABLE_SPACES( -$move ); | |
9419 | } | |
9420 | } | |
9421 | ||
9422 | # Otherwise, record what we want and the vertical aligner | |
9423 | # will try to recover it. | |
9424 | else { | |
9425 | $indentation->set_RECOVERABLE_SPACES($move_right); | |
9426 | } | |
9427 | } | |
9428 | } | |
9429 | } | |
9430 | return $do_not_pad; | |
9431 | } | |
9432 | ||
9433 | # flush is called to output any tokens in the pipeline, so that | |
9434 | # an alternate source of lines can be written in the correct order | |
9435 | ||
9436 | sub flush { | |
9437 | destroy_one_line_block(); | |
9438 | output_line_to_go(); | |
9439 | Perl::Tidy::VerticalAligner::flush(); | |
9440 | } | |
9441 | ||
9442 | # output_line_to_go sends one logical line of tokens on down the | |
9443 | # pipeline to the VerticalAligner package, breaking the line into continuation | |
9444 | # lines as necessary. The line of tokens is ready to go in the "to_go" | |
9445 | # arrays. | |
9446 | ||
9447 | sub output_line_to_go { | |
9448 | ||
9449 | # debug stuff; this routine can be called from many points | |
9450 | FORMATTER_DEBUG_FLAG_OUTPUT && do { | |
9451 | my ( $a, $b, $c ) = caller; | |
9452 | write_diagnostics( | |
9453 | "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n" | |
9454 | ); | |
9455 | my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ]; | |
9456 | write_diagnostics("$output_str\n"); | |
9457 | }; | |
9458 | ||
9459 | # just set a tentative breakpoint if we might be in a one-line block | |
9460 | if ( $index_start_one_line_block != UNDEFINED_INDEX ) { | |
9461 | set_forced_breakpoint($max_index_to_go); | |
9462 | return; | |
9463 | } | |
9464 | ||
9465 | my $cscw_block_comment; | |
9466 | $cscw_block_comment = add_closing_side_comment() | |
9467 | if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ); | |
9468 | ||
9469 | match_opening_and_closing_tokens(); | |
9470 | ||
9471 | # tell the -lp option we are outputting a batch so it can close | |
9472 | # any unfinished items in its stack | |
9473 | finish_lp_batch(); | |
9474 | ||
9475 | my $imin = 0; | |
9476 | my $imax = $max_index_to_go; | |
9477 | ||
9478 | # trim any blank tokens | |
9479 | if ( $max_index_to_go >= 0 ) { | |
9480 | if ( $types_to_go[$imin] eq 'b' ) { $imin++ } | |
9481 | if ( $types_to_go[$imax] eq 'b' ) { $imax-- } | |
9482 | } | |
9483 | ||
9484 | # anything left to write? | |
9485 | if ( $imin <= $imax ) { | |
9486 | ||
9487 | # add a blank line before certain key types | |
9488 | if ( $last_line_leading_type !~ /^[#b]/ ) { | |
9489 | my $want_blank = 0; | |
9490 | my $leading_token = $tokens_to_go[$imin]; | |
9491 | my $leading_type = $types_to_go[$imin]; | |
9492 | ||
9493 | # blank lines before subs except declarations and one-liners | |
9494 | # MCONVERSION LOCATION - for sub tokenization change | |
9495 | if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) { | |
9496 | $want_blank = ( $rOpts->{'blanks-before-subs'} ) | |
9497 | && ( | |
9498 | terminal_type( \@types_to_go, \@block_type_to_go, $imin, | |
9499 | $imax ) !~ /^[\;\}]$/ | |
9500 | ); | |
9501 | } | |
9502 | ||
9503 | # break before all package declarations | |
9504 | # MCONVERSION LOCATION - for tokenizaton change | |
9505 | elsif ( $leading_token =~ /^(package\s)/ && $leading_type eq 'i' ) { | |
9506 | $want_blank = ( $rOpts->{'blanks-before-subs'} ); | |
9507 | } | |
9508 | ||
9509 | # break before certain key blocks except one-liners | |
9510 | if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) { | |
9511 | $want_blank = ( $rOpts->{'blanks-before-subs'} ) | |
9512 | && ( | |
9513 | terminal_type( \@types_to_go, \@block_type_to_go, $imin, | |
9514 | $imax ) ne '}' | |
9515 | ); | |
9516 | } | |
9517 | ||
9518 | # Break before certain block types if we haven't had a break at this | |
9519 | # level for a while. This is the difficult decision.. | |
9520 | elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/ | |
9521 | && $leading_type eq 'k' ) | |
9522 | { | |
9523 | my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; | |
9524 | if ( !defined($lc) ) { $lc = 0 } | |
9525 | ||
9526 | $want_blank = $rOpts->{'blanks-before-blocks'} | |
9527 | && $lc >= $rOpts->{'long-block-line-count'} | |
9528 | && $file_writer_object->get_consecutive_nonblank_lines() >= | |
9529 | $rOpts->{'long-block-line-count'} | |
9530 | && ( | |
9531 | terminal_type( \@types_to_go, \@block_type_to_go, $imin, | |
9532 | $imax ) ne '}' | |
9533 | ); | |
9534 | } | |
9535 | ||
9536 | if ($want_blank) { | |
9537 | ||
9538 | # future: send blank line down normal path to VerticalAligner | |
9539 | Perl::Tidy::VerticalAligner::flush(); | |
9540 | $file_writer_object->write_blank_code_line(); | |
9541 | } | |
9542 | } | |
9543 | ||
9544 | # update blank line variables and count number of consecutive | |
9545 | # non-blank, non-comment lines at this level | |
9546 | $last_last_line_leading_level = $last_line_leading_level; | |
9547 | $last_line_leading_level = $levels_to_go[$imin]; | |
9548 | if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 } | |
9549 | $last_line_leading_type = $types_to_go[$imin]; | |
9550 | if ( $last_line_leading_level == $last_last_line_leading_level | |
9551 | && $last_line_leading_type ne 'b' | |
9552 | && $last_line_leading_type ne '#' | |
9553 | && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) ) | |
9554 | { | |
9555 | $nonblank_lines_at_depth[$last_line_leading_level]++; | |
9556 | } | |
9557 | else { | |
9558 | $nonblank_lines_at_depth[$last_line_leading_level] = 1; | |
9559 | } | |
9560 | ||
9561 | FORMATTER_DEBUG_FLAG_FLUSH && do { | |
9562 | my ( $package, $file, $line ) = caller; | |
9563 | ||
9564 | "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n"; | |
9565 | }; | |
9566 | ||
9567 | # add a couple of extra terminal blank tokens | |
9568 | pad_array_to_go(); | |
9569 | ||
9570 | # set all forced breakpoints for good list formatting | |
9571 | my $saw_good_break = 0; | |
9572 | my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0; | |
9573 | ||
9574 | if ( | |
9575 | $max_index_to_go > 0 | |
9576 | && ( | |
9577 | $is_long_line | |
9578 | || $old_line_count_in_batch > 1 | |
9579 | || is_unbalanced_batch() | |
9580 | || ( | |
9581 | $comma_count_in_batch | |
9582 | && ( $rOpts_maximum_fields_per_table > 0 | |
9583 | || $rOpts_comma_arrow_breakpoints == 0 ) | |
9584 | ) | |
9585 | ) | |
9586 | ) | |
9587 | { | |
9588 | $saw_good_break = scan_list(); | |
9589 | } | |
9590 | ||
9591 | # let $ri_first and $ri_last be references to lists of | |
9592 | # first and last tokens of line fragments to output.. | |
9593 | my ( $ri_first, $ri_last ); | |
9594 | ||
9595 | # write a single line if.. | |
9596 | if ( | |
9597 | ||
9598 | # we aren't allowed to add any newlines | |
9599 | !$rOpts_add_newlines | |
9600 | ||
9601 | # or, we don't already have an interior breakpoint | |
9602 | # and we didn't see a good breakpoint | |
9603 | || ( | |
9604 | !$forced_breakpoint_count | |
9605 | && !$saw_good_break | |
9606 | ||
9607 | # and this line is 'short' | |
9608 | && !$is_long_line | |
9609 | ) | |
9610 | ) | |
9611 | { | |
9612 | @$ri_first = ($imin); | |
9613 | @$ri_last = ($imax); | |
9614 | } | |
9615 | ||
9616 | # otherwise use multiple lines | |
9617 | else { | |
9618 | ||
9619 | ( $ri_first, $ri_last ) = set_continuation_breaks($saw_good_break); | |
9620 | ||
9621 | # now we do a correction step to clean this up a bit | |
9622 | # (The only time we would not do this is for debugging) | |
9623 | if ( $rOpts->{'recombine'} ) { | |
9624 | ( $ri_first, $ri_last ) = | |
9625 | recombine_breakpoints( $ri_first, $ri_last ); | |
9626 | } | |
9627 | } | |
9628 | ||
9629 | # do corrector step if -lp option is used | |
9630 | my $do_not_pad = 0; | |
9631 | if ($rOpts_line_up_parentheses) { | |
9632 | $do_not_pad = correct_lp_indentation( $ri_first, $ri_last ); | |
9633 | } | |
9634 | send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad ); | |
9635 | } | |
9636 | prepare_for_new_input_lines(); | |
9637 | ||
9638 | # output any new -cscw block comment | |
9639 | if ($cscw_block_comment) { | |
9640 | flush(); | |
9641 | $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); | |
9642 | } | |
9643 | } | |
9644 | ||
9645 | sub reset_block_text_accumulator { | |
9646 | ||
9647 | # save text after 'if' and 'elsif' to append after 'else' | |
9648 | if ($accumulating_text_for_block) { | |
9649 | ||
9650 | if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) { | |
9651 | push @{$rleading_block_if_elsif_text}, $leading_block_text; | |
9652 | } | |
9653 | } | |
9654 | $accumulating_text_for_block = ""; | |
9655 | $leading_block_text = ""; | |
9656 | $leading_block_text_level = 0; | |
9657 | $leading_block_text_length_exceeded = 0; | |
9658 | $leading_block_text_line_number = 0; | |
9659 | $leading_block_text_line_length = 0; | |
9660 | } | |
9661 | ||
9662 | sub set_block_text_accumulator { | |
9663 | my $i = shift; | |
9664 | $accumulating_text_for_block = $tokens_to_go[$i]; | |
9665 | if ( $accumulating_text_for_block !~ /^els/ ) { | |
9666 | $rleading_block_if_elsif_text = []; | |
9667 | } | |
9668 | $leading_block_text = ""; | |
9669 | $leading_block_text_level = $levels_to_go[$i]; | |
9670 | $leading_block_text_line_number = | |
9671 | $vertical_aligner_object->get_output_line_number(); | |
9672 | $leading_block_text_length_exceeded = 0; | |
9673 | ||
9674 | # this will contain the column number of the last character | |
9675 | # of the closing side comment | |
9676 | $leading_block_text_line_length = | |
9677 | length($accumulating_text_for_block) + | |
9678 | length( $rOpts->{'closing-side-comment-prefix'} ) + | |
9679 | $leading_block_text_level * $rOpts_indent_columns + 3; | |
9680 | } | |
9681 | ||
9682 | sub accumulate_block_text { | |
9683 | my $i = shift; | |
9684 | ||
9685 | # accumulate leading text for -csc, ignoring any side comments | |
9686 | if ( $accumulating_text_for_block | |
9687 | && !$leading_block_text_length_exceeded | |
9688 | && $types_to_go[$i] ne '#' ) | |
9689 | { | |
9690 | ||
9691 | my $added_length = length( $tokens_to_go[$i] ); | |
9692 | $added_length += 1 if $i == 0; | |
9693 | my $new_line_length = $leading_block_text_line_length + $added_length; | |
9694 | ||
9695 | # we can add this text if we don't exceed some limits.. | |
9696 | if ( | |
9697 | ||
9698 | # we must not have already exceeded the text length limit | |
9699 | length($leading_block_text) < | |
9700 | $rOpts_closing_side_comment_maximum_text | |
9701 | ||
9702 | # and either: | |
9703 | # the new total line length must be below the line length limit | |
9704 | # or the new length must be below the text length limit | |
9705 | # (ie, we may allow one token to exceed the text length limit) | |
9706 | && ( $new_line_length < $rOpts_maximum_line_length | |
9707 | || length($leading_block_text) + $added_length < | |
9708 | $rOpts_closing_side_comment_maximum_text ) | |
9709 | ||
9710 | # UNLESS: we are adding a closing paren before the brace we seek. | |
9711 | # This is an attempt to avoid situations where the ... to be | |
9712 | # added are longer than the omitted right paren, as in: | |
9713 | ||
9714 | # foreach my $item (@a_rather_long_variable_name_here) { | |
9715 | # &whatever; | |
9716 | # } ## end foreach my $item (@a_rather_long_variable_name_here... | |
9717 | ||
9718 | || ( | |
9719 | $tokens_to_go[$i] eq ')' | |
9720 | && ( | |
9721 | ( | |
9722 | $i + 1 <= $max_index_to_go | |
9723 | && $block_type_to_go[ $i + 1 ] eq | |
9724 | $accumulating_text_for_block | |
9725 | ) | |
9726 | || ( $i + 2 <= $max_index_to_go | |
9727 | && $block_type_to_go[ $i + 2 ] eq | |
9728 | $accumulating_text_for_block ) | |
9729 | ) | |
9730 | ) | |
9731 | ) | |
9732 | { | |
9733 | ||
9734 | # add an extra space at each newline | |
9735 | if ( $i == 0 ) { $leading_block_text .= ' ' } | |
9736 | ||
9737 | # add the token text | |
9738 | $leading_block_text .= $tokens_to_go[$i]; | |
9739 | $leading_block_text_line_length = $new_line_length; | |
9740 | } | |
9741 | ||
9742 | # show that text was truncated if necessary | |
9743 | elsif ( $types_to_go[$i] ne 'b' ) { | |
9744 | $leading_block_text_length_exceeded = 1; | |
9745 | $leading_block_text .= '...'; | |
9746 | } | |
9747 | } | |
9748 | } | |
9749 | ||
9750 | { | |
9751 | my %is_if_elsif_else_unless_while_until_for_foreach; | |
9752 | ||
9753 | BEGIN { | |
9754 | ||
9755 | # These block types may have text between the keyword and opening | |
9756 | # curly. Note: 'else' does not, but must be included to allow trailing | |
9757 | # if/elsif text to be appended. | |
9758 | # patch for SWITCH/CASE: added 'case' and 'when' | |
9759 | @_ = qw(if elsif else unless while until for foreach case when); | |
9760 | @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_); | |
9761 | } | |
9762 | ||
9763 | sub accumulate_csc_text { | |
9764 | ||
9765 | # called once per output buffer when -csc is used. Accumulates | |
9766 | # the text placed after certain closing block braces. | |
9767 | # Defines and returns the following for this buffer: | |
9768 | ||
9769 | my $block_leading_text = ""; # the leading text of the last '}' | |
9770 | my $rblock_leading_if_elsif_text; | |
9771 | my $i_block_leading_text = | |
9772 | -1; # index of token owning block_leading_text | |
9773 | my $block_line_count = 100; # how many lines the block spans | |
9774 | my $terminal_type = 'b'; # type of last nonblank token | |
9775 | my $i_terminal = 0; # index of last nonblank token | |
9776 | my $terminal_block_type = ""; | |
9777 | ||
9778 | for my $i ( 0 .. $max_index_to_go ) { | |
9779 | my $type = $types_to_go[$i]; | |
9780 | my $block_type = $block_type_to_go[$i]; | |
9781 | my $token = $tokens_to_go[$i]; | |
9782 | ||
9783 | # remember last nonblank token type | |
9784 | if ( $type ne '#' && $type ne 'b' ) { | |
9785 | $terminal_type = $type; | |
9786 | $terminal_block_type = $block_type; | |
9787 | $i_terminal = $i; | |
9788 | } | |
9789 | ||
9790 | my $type_sequence = $type_sequence_to_go[$i]; | |
9791 | if ( $block_type && $type_sequence ) { | |
9792 | ||
9793 | if ( $token eq '}' ) { | |
9794 | ||
9795 | # restore any leading text saved when we entered this block | |
9796 | if ( defined( $block_leading_text{$type_sequence} ) ) { | |
9797 | ( $block_leading_text, $rblock_leading_if_elsif_text ) = | |
9798 | @{ $block_leading_text{$type_sequence} }; | |
9799 | $i_block_leading_text = $i; | |
9800 | delete $block_leading_text{$type_sequence}; | |
9801 | $rleading_block_if_elsif_text = | |
9802 | $rblock_leading_if_elsif_text; | |
9803 | } | |
9804 | ||
9805 | # if we run into a '}' then we probably started accumulating | |
9806 | # at something like a trailing 'if' clause..no harm done. | |
9807 | if ( $accumulating_text_for_block | |
9808 | && $levels_to_go[$i] <= $leading_block_text_level ) | |
9809 | { | |
9810 | my $lev = $levels_to_go[$i]; | |
9811 | reset_block_text_accumulator(); | |
9812 | } | |
9813 | ||
9814 | if ( defined( $block_opening_line_number{$type_sequence} ) ) | |
9815 | { | |
9816 | my $output_line_number = | |
9817 | $vertical_aligner_object->get_output_line_number(); | |
9818 | $block_line_count = $output_line_number - | |
9819 | $block_opening_line_number{$type_sequence} + 1; | |
9820 | delete $block_opening_line_number{$type_sequence}; | |
9821 | } | |
9822 | else { | |
9823 | ||
9824 | # Error: block opening line undefined for this line.. | |
9825 | # This shouldn't be possible, but it is not a | |
9826 | # significant problem. | |
9827 | } | |
9828 | } | |
9829 | ||
9830 | elsif ( $token eq '{' ) { | |
9831 | ||
9832 | my $line_number = | |
9833 | $vertical_aligner_object->get_output_line_number(); | |
9834 | $block_opening_line_number{$type_sequence} = $line_number; | |
9835 | ||
9836 | if ( $accumulating_text_for_block | |
9837 | && $levels_to_go[$i] == $leading_block_text_level ) | |
9838 | { | |
9839 | ||
9840 | if ( $accumulating_text_for_block eq $block_type ) { | |
9841 | ||
9842 | # save any leading text before we enter this block | |
9843 | $block_leading_text{$type_sequence} = [ | |
9844 | $leading_block_text, | |
9845 | $rleading_block_if_elsif_text | |
9846 | ]; | |
9847 | $block_opening_line_number{$type_sequence} = | |
9848 | $leading_block_text_line_number; | |
9849 | reset_block_text_accumulator(); | |
9850 | } | |
9851 | else { | |
9852 | ||
9853 | # shouldn't happen, but not a serious error. | |
9854 | # We were accumulating -csc text for block type | |
9855 | # $accumulating_text_for_block and unexpectedly | |
9856 | # encountered a '{' for block type $block_type. | |
9857 | } | |
9858 | } | |
9859 | } | |
9860 | } | |
9861 | ||
9862 | if ( $type eq 'k' | |
9863 | && $csc_new_statement_ok | |
9864 | && $is_if_elsif_else_unless_while_until_for_foreach{$token} | |
9865 | && $token =~ /$closing_side_comment_list_pattern/o ) | |
9866 | { | |
9867 | set_block_text_accumulator($i); | |
9868 | } | |
9869 | else { | |
9870 | ||
9871 | # note: ignoring type 'q' because of tricks being played | |
9872 | # with 'q' for hanging side comments | |
9873 | if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) { | |
9874 | $csc_new_statement_ok = | |
9875 | ( $block_type || $type eq 'J' || $type eq ';' ); | |
9876 | } | |
9877 | if ( $type eq ';' | |
9878 | && $accumulating_text_for_block | |
9879 | && $levels_to_go[$i] == $leading_block_text_level ) | |
9880 | { | |
9881 | reset_block_text_accumulator(); | |
9882 | } | |
9883 | else { | |
9884 | accumulate_block_text($i); | |
9885 | } | |
9886 | } | |
9887 | } | |
9888 | ||
9889 | # Treat an 'else' block specially by adding preceding 'if' and | |
9890 | # 'elsif' text. Otherwise, the 'end else' is not helpful, | |
9891 | # especially for cuddled-else formatting. | |
9892 | if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) { | |
9893 | $block_leading_text = | |
9894 | make_else_csc_text( $i_terminal, $terminal_block_type, | |
9895 | $block_leading_text, $rblock_leading_if_elsif_text ); | |
9896 | } | |
9897 | ||
9898 | return ( $terminal_type, $i_terminal, $i_block_leading_text, | |
9899 | $block_leading_text, $block_line_count ); | |
9900 | } | |
9901 | } | |
9902 | ||
9903 | sub make_else_csc_text { | |
9904 | ||
9905 | # create additional -csc text for an 'else' and optionally 'elsif', | |
9906 | # depending on the value of switch | |
9907 | # $rOpts_closing_side_comment_else_flag: | |
9908 | # | |
9909 | # = 0 add 'if' text to trailing else | |
9910 | # = 1 same as 0 plus: | |
9911 | # add 'if' to 'elsif's if can fit in line length | |
9912 | # add last 'elsif' to trailing else if can fit in one line | |
9913 | # = 2 same as 1 but do not check if exceed line length | |
9914 | # | |
9915 | # $rif_elsif_text = a reference to a list of all previous closing | |
9916 | # side comments created for this if block | |
9917 | # | |
9918 | my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_; | |
9919 | my $csc_text = $block_leading_text; | |
9920 | ||
9921 | if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 ) | |
9922 | { | |
9923 | return $csc_text; | |
9924 | } | |
9925 | ||
9926 | my $count = @{$rif_elsif_text}; | |
9927 | return $csc_text unless ($count); | |
9928 | ||
9929 | my $if_text = '[ if' . $rif_elsif_text->[0]; | |
9930 | ||
9931 | # always show the leading 'if' text on 'else' | |
9932 | if ( $block_type eq 'else' ) { | |
9933 | $csc_text .= $if_text; | |
9934 | } | |
9935 | ||
9936 | # see if that's all | |
9937 | if ( $rOpts_closing_side_comment_else_flag == 0 ) { | |
9938 | return $csc_text; | |
9939 | } | |
9940 | ||
9941 | my $last_elsif_text = ""; | |
9942 | if ( $count > 1 ) { | |
9943 | $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ]; | |
9944 | if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; } | |
9945 | } | |
9946 | ||
9947 | # tentatively append one more item | |
9948 | my $saved_text = $csc_text; | |
9949 | if ( $block_type eq 'else' ) { | |
9950 | $csc_text .= $last_elsif_text; | |
9951 | } | |
9952 | else { | |
9953 | $csc_text .= ' ' . $if_text; | |
9954 | } | |
9955 | ||
9956 | # all done if no length checks requested | |
9957 | if ( $rOpts_closing_side_comment_else_flag == 2 ) { | |
9958 | return $csc_text; | |
9959 | } | |
9960 | ||
9961 | # undo it if line length exceeded | |
9962 | my $length = | |
9963 | length($csc_text) + length($block_type) + | |
9964 | length( $rOpts->{'closing-side-comment-prefix'} ) + | |
9965 | $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3; | |
9966 | if ( $length > $rOpts_maximum_line_length ) { | |
9967 | $csc_text = $saved_text; | |
9968 | } | |
9969 | return $csc_text; | |
9970 | } | |
9971 | ||
9972 | sub add_closing_side_comment { | |
9973 | ||
9974 | # add closing side comments after closing block braces if -csc used | |
9975 | my $cscw_block_comment; | |
9976 | ||
9977 | #--------------------------------------------------------------- | |
9978 | # Step 1: loop through all tokens of this line to accumulate | |
9979 | # the text needed to create the closing side comments. Also see | |
9980 | # how the line ends. | |
9981 | #--------------------------------------------------------------- | |
9982 | ||
9983 | my ( $terminal_type, $i_terminal, $i_block_leading_text, | |
9984 | $block_leading_text, $block_line_count ) | |
9985 | = accumulate_csc_text(); | |
9986 | ||
9987 | #--------------------------------------------------------------- | |
9988 | # Step 2: make the closing side comment if this ends a block | |
9989 | #--------------------------------------------------------------- | |
9990 | my $have_side_comment = $i_terminal != $max_index_to_go; | |
9991 | ||
9992 | # if this line might end in a block closure.. | |
9993 | if ( | |
9994 | $terminal_type eq '}' | |
9995 | ||
9996 | # ..and either | |
9997 | && ( | |
9998 | ||
9999 | # the block is long enough | |
10000 | ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} ) | |
10001 | ||
10002 | # or there is an existing comment to check | |
10003 | || ( $have_side_comment | |
10004 | && $rOpts->{'closing-side-comment-warnings'} ) | |
10005 | ) | |
10006 | ||
10007 | # .. and if this is one of the types of interest | |
10008 | && $block_type_to_go[$i_terminal] =~ | |
10009 | /$closing_side_comment_list_pattern/o | |
10010 | ||
10011 | # ..and the corresponding opening brace must is not in this batch | |
10012 | # (because we do not need to tag one-line blocks, although this | |
10013 | # should also be caught with a positive -csci value) | |
10014 | && $mate_index_to_go[$i_terminal] < 0 | |
10015 | ||
10016 | # ..and either | |
10017 | && ( | |
10018 | ||
10019 | # this is the last token (line doesnt have a side comment) | |
10020 | !$have_side_comment | |
10021 | ||
10022 | # or the old side comment is a closing side comment | |
10023 | || $tokens_to_go[$max_index_to_go] =~ | |
10024 | /$closing_side_comment_prefix_pattern/o | |
10025 | ) | |
10026 | ) | |
10027 | { | |
10028 | ||
10029 | # then make the closing side comment text | |
10030 | my $token = | |
10031 | "$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]"; | |
10032 | ||
10033 | # append any extra descriptive text collected above | |
10034 | if ( $i_block_leading_text == $i_terminal ) { | |
10035 | $token .= $block_leading_text; | |
10036 | } | |
10037 | $token =~ s/\s*$//; # trim any trailing whitespace | |
10038 | ||
10039 | # handle case of existing closing side comment | |
10040 | if ($have_side_comment) { | |
10041 | ||
10042 | # warn if requested and tokens differ significantly | |
10043 | if ( $rOpts->{'closing-side-comment-warnings'} ) { | |
10044 | my $old_csc = $tokens_to_go[$max_index_to_go]; | |
10045 | my $new_csc = $token; | |
10046 | $new_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...' | |
10047 | my $new_trailing_dots = $1; | |
10048 | $old_csc =~ s/\.\.\.\s*$//; | |
10049 | $new_csc =~ s/\s+//g; # trim all whitespace | |
10050 | $old_csc =~ s/\s+//g; | |
10051 | ||
10052 | # Patch to handle multiple closing side comments at | |
10053 | # else and elsif's. These have become too complicated | |
10054 | # to check, so if we see an indication of | |
10055 | # '[ if' or '[ # elsif', then assume they were made | |
10056 | # by perltidy. | |
10057 | if ( $block_type_to_go[$i_terminal] eq 'else' ) { | |
10058 | if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc } | |
10059 | } | |
10060 | elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) { | |
10061 | if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc } | |
10062 | } | |
10063 | ||
10064 | # if old comment is contained in new comment, | |
10065 | # only compare the common part. | |
10066 | if ( length($new_csc) > length($old_csc) ) { | |
10067 | $new_csc = substr( $new_csc, 0, length($old_csc) ); | |
10068 | } | |
10069 | ||
10070 | # if the new comment is shorter and has been limited, | |
10071 | # only compare the common part. | |
10072 | if ( length($new_csc) < length($old_csc) && $new_trailing_dots ) | |
10073 | { | |
10074 | $old_csc = substr( $old_csc, 0, length($new_csc) ); | |
10075 | } | |
10076 | ||
10077 | # any remaining difference? | |
10078 | if ( $new_csc ne $old_csc ) { | |
10079 | ||
10080 | # just leave the old comment if we are below the threshold | |
10081 | # for creating side comments | |
10082 | if ( $block_line_count < | |
10083 | $rOpts->{'closing-side-comment-interval'} ) | |
10084 | { | |
10085 | $token = undef; | |
10086 | } | |
10087 | ||
10088 | # otherwise we'll make a note of it | |
10089 | else { | |
10090 | ||
10091 | warning( | |
10092 | "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n" | |
10093 | ); | |
10094 | ||
10095 | # save the old side comment in a new trailing block comment | |
10096 | my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ]; | |
10097 | $year += 1900; | |
10098 | $month += 1; | |
10099 | $cscw_block_comment = | |
10100 | "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]"; | |
10101 | } | |
10102 | } | |
10103 | else { | |
10104 | ||
10105 | # No differences.. we can safely delete old comment if we | |
10106 | # are below the threshold | |
10107 | if ( $block_line_count < | |
10108 | $rOpts->{'closing-side-comment-interval'} ) | |
10109 | { | |
10110 | $token = undef; | |
10111 | unstore_token_to_go() | |
10112 | if ( $types_to_go[$max_index_to_go] eq '#' ); | |
10113 | unstore_token_to_go() | |
10114 | if ( $types_to_go[$max_index_to_go] eq 'b' ); | |
10115 | } | |
10116 | } | |
10117 | } | |
10118 | ||
10119 | # switch to the new csc (unless we deleted it!) | |
10120 | $tokens_to_go[$max_index_to_go] = $token if $token; | |
10121 | } | |
10122 | ||
10123 | # handle case of NO existing closing side comment | |
10124 | else { | |
10125 | ||
10126 | # insert the new side comment into the output token stream | |
10127 | my $type = '#'; | |
10128 | my $block_type = ''; | |
10129 | my $type_sequence = ''; | |
10130 | my $container_environment = | |
10131 | $container_environment_to_go[$max_index_to_go]; | |
10132 | my $level = $levels_to_go[$max_index_to_go]; | |
10133 | my $slevel = $nesting_depth_to_go[$max_index_to_go]; | |
10134 | my $no_internal_newlines = 0; | |
10135 | ||
10136 | my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; | |
10137 | my $ci_level = $ci_levels_to_go[$max_index_to_go]; | |
10138 | my $in_continued_quote = 0; | |
10139 | ||
10140 | # first insert a blank token | |
10141 | insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines ); | |
10142 | ||
10143 | # then the side comment | |
10144 | insert_new_token_to_go( $token, $type, $slevel, | |
10145 | $no_internal_newlines ); | |
10146 | } | |
10147 | } | |
10148 | return $cscw_block_comment; | |
10149 | } | |
10150 | ||
10151 | sub previous_nonblank_token { | |
10152 | my ($i) = @_; | |
10153 | if ( $i <= 0 ) { | |
10154 | return ""; | |
10155 | } | |
10156 | elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { | |
10157 | return $tokens_to_go[ $i - 1 ]; | |
10158 | } | |
10159 | elsif ( $i > 1 ) { | |
10160 | return $tokens_to_go[ $i - 2 ]; | |
10161 | } | |
10162 | else { | |
10163 | return ""; | |
10164 | } | |
10165 | } | |
10166 | ||
10167 | sub send_lines_to_vertical_aligner { | |
10168 | ||
10169 | my ( $ri_first, $ri_last, $do_not_pad ) = @_; | |
10170 | ||
10171 | my $rindentation_list = [0]; # ref to indentations for each line | |
10172 | ||
10173 | set_vertical_alignment_markers( $ri_first, $ri_last ); | |
10174 | ||
10175 | # flush if necessary to avoid unwanted alignment | |
10176 | my $must_flush = 0; | |
10177 | if ( @$ri_first > 1 ) { | |
10178 | ||
10179 | # flush before a long if statement | |
10180 | if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) { | |
10181 | $must_flush = 1; | |
10182 | } | |
10183 | } | |
10184 | if ($must_flush) { | |
10185 | Perl::Tidy::VerticalAligner::flush(); | |
10186 | } | |
10187 | ||
10188 | set_logical_padding( $ri_first, $ri_last ); | |
10189 | ||
10190 | # loop to prepare each line for shipment | |
10191 | my $n_last_line = @$ri_first - 1; | |
10192 | my $in_comma_list; | |
10193 | for my $n ( 0 .. $n_last_line ) { | |
10194 | my $ibeg = $$ri_first[$n]; | |
10195 | my $iend = $$ri_last[$n]; | |
10196 | ||
10197 | my @patterns = (); | |
10198 | my @tokens = (); | |
10199 | my @fields = (); | |
10200 | my $i_start = $ibeg; | |
10201 | my $i; | |
10202 | ||
10203 | my $depth = 0; | |
10204 | my @container_name = (""); | |
10205 | my @multiple_comma_arrows = (undef); | |
10206 | ||
10207 | my $j = 0; # field index | |
10208 | ||
10209 | $patterns[0] = ""; | |
10210 | for $i ( $ibeg .. $iend ) { | |
10211 | ||
10212 | # Keep track of containers balanced on this line only. | |
10213 | # These are used below to prevent unwanted cross-line alignments. | |
10214 | # Unbalanced containers already avoid aligning across | |
10215 | # container boundaries. | |
10216 | if ( $tokens_to_go[$i] eq '(' ) { | |
10217 | my $i_mate = $mate_index_to_go[$i]; | |
10218 | if ( $i_mate > $i && $i_mate <= $iend ) { | |
10219 | $depth++; | |
10220 | my $seqno = $type_sequence_to_go[$i]; | |
10221 | my $count = comma_arrow_count($seqno); | |
10222 | $multiple_comma_arrows[$depth] = $count && $count > 1; | |
10223 | my $name = previous_nonblank_token($i); | |
10224 | $name =~ s/^->//; | |
10225 | $container_name[$depth] = "+" . $name; | |
10226 | } | |
10227 | } | |
10228 | elsif ( $tokens_to_go[$i] eq ')' ) { | |
10229 | $depth-- if $depth > 0; | |
10230 | } | |
10231 | ||
10232 | # if we find a new synchronization token, we are done with | |
10233 | # a field | |
10234 | if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) { | |
10235 | ||
10236 | my $tok = my $raw_tok = $matching_token_to_go[$i]; | |
10237 | ||
10238 | # make separators in different nesting depths unique | |
10239 | # by appending the nesting depth digit. | |
10240 | if ( $raw_tok ne '#' ) { | |
10241 | $tok .= "$nesting_depth_to_go[$i]"; | |
10242 | } | |
10243 | ||
10244 | # do any special decorations for commas to avoid unwanted | |
10245 | # cross-line alignments. | |
10246 | if ( $raw_tok eq ',' ) { | |
10247 | if ( $container_name[$depth] ) { | |
10248 | $tok .= $container_name[$depth]; | |
10249 | } | |
10250 | } | |
10251 | ||
10252 | # decorate '=>' with: | |
10253 | # - Nothing if this container is unbalanced on this line. | |
10254 | # - The previous token if it is balanced and multiple '=>'s | |
10255 | # - The container name if it is bananced and no other '=>'s | |
10256 | elsif ( $raw_tok eq '=>' ) { | |
10257 | if ( $container_name[$depth] ) { | |
10258 | if ( $multiple_comma_arrows[$depth] ) { | |
10259 | $tok .= "+" . previous_nonblank_token($i); | |
10260 | } | |
10261 | else { | |
10262 | $tok .= $container_name[$depth]; | |
10263 | } | |
10264 | } | |
10265 | } | |
10266 | ||
10267 | # concatenate the text of the consecutive tokens to form | |
10268 | # the field | |
10269 | push( @fields, | |
10270 | join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); | |
10271 | ||
10272 | # store the alignment token for this field | |
10273 | push( @tokens, $tok ); | |
10274 | ||
10275 | # get ready for the next batch | |
10276 | $i_start = $i; | |
10277 | $j++; | |
10278 | $patterns[$j] = ""; | |
10279 | } | |
10280 | ||
10281 | # continue accumulating tokens | |
10282 | # handle non-keywords.. | |
10283 | if ( $types_to_go[$i] ne 'k' ) { | |
10284 | my $type = $types_to_go[$i]; | |
10285 | ||
10286 | # Mark most things before arrows as a quote to | |
10287 | # get them to line up. Testfile: mixed.pl. | |
10288 | if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) { | |
10289 | my $next_type = $types_to_go[ $i + 1 ]; | |
10290 | my $i_next_nonblank = | |
10291 | ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); | |
10292 | ||
10293 | if ( $types_to_go[$i_next_nonblank] eq '=>' ) { | |
10294 | $type = 'Q'; | |
10295 | } | |
10296 | } | |
10297 | ||
10298 | # minor patch to make numbers and quotes align | |
10299 | if ( $type eq 'n' ) { $type = 'Q' } | |
10300 | ||
10301 | $patterns[$j] .= $type; | |
10302 | } | |
10303 | ||
10304 | # for keywords we have to use the actual text | |
10305 | else { | |
10306 | ||
10307 | # map certain keywords to the same 'if' class to align | |
10308 | # long if/elsif sequences. my testfile: elsif.pl | |
10309 | my $tok = $tokens_to_go[$i]; | |
10310 | if ( $n == 0 && $tok =~ /^(elsif|else|unless)$/ ) { | |
10311 | $tok = 'if'; | |
10312 | } | |
10313 | $patterns[$j] .= $tok; | |
10314 | } | |
10315 | } | |
10316 | ||
10317 | # done with this line .. join text of tokens to make the last field | |
10318 | push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) ); | |
10319 | ||
10320 | my ( $indentation, $lev, $level_end, $is_semicolon_terminated, | |
10321 | $is_outdented_line ) | |
10322 | = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns, | |
10323 | $ri_first, $ri_last, $rindentation_list ); | |
10324 | ||
10325 | # we will allow outdenting of long lines.. | |
10326 | my $outdent_long_lines = ( | |
10327 | ||
10328 | # which are long quotes, if allowed | |
10329 | ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} ) | |
10330 | ||
10331 | # which are long block comments, if allowed | |
10332 | || ( | |
10333 | $types_to_go[$ibeg] eq '#' | |
10334 | && $rOpts->{'outdent-long-comments'} | |
10335 | ||
10336 | # but not if this is a static block comment | |
10337 | && !( | |
10338 | $rOpts->{'static-block-comments'} | |
10339 | && $tokens_to_go[$ibeg] =~ /$static_block_comment_pattern/o | |
10340 | ) | |
10341 | ) | |
10342 | ); | |
10343 | ||
10344 | my $level_jump = | |
10345 | $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg]; | |
10346 | ||
10347 | my $rvertical_tightness_flags = | |
10348 | set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, | |
10349 | $ri_first, $ri_last ); | |
10350 | ||
10351 | # flush an outdented line to avoid any unwanted vertical alignment | |
10352 | Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); | |
10353 | ||
10354 | # send this new line down the pipe | |
10355 | my $forced_breakpoint = $forced_breakpoint_to_go[$iend]; | |
10356 | Perl::Tidy::VerticalAligner::append_line( | |
10357 | $lev, | |
10358 | $level_end, | |
10359 | $indentation, | |
10360 | \@fields, | |
10361 | \@tokens, | |
10362 | \@patterns, | |
10363 | $forced_breakpoint_to_go[$iend] || $in_comma_list, | |
10364 | $outdent_long_lines, | |
10365 | $is_semicolon_terminated, | |
10366 | $do_not_pad, | |
10367 | $rvertical_tightness_flags, | |
10368 | $level_jump, | |
10369 | ); | |
10370 | $in_comma_list = | |
10371 | $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend]; | |
10372 | ||
10373 | # flush an outdented line to avoid any unwanted vertical alignment | |
10374 | Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); | |
10375 | ||
10376 | $do_not_pad = 0; | |
10377 | ||
10378 | } # end of loop to output each line | |
10379 | ||
10380 | # remember indentation of lines containing opening containers for | |
10381 | # later use by sub set_adjusted_indentation | |
10382 | save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); | |
10383 | } | |
10384 | ||
10385 | { # begin unmatched_indexes | |
10386 | ||
10387 | # closure to keep track of unbalanced containers. | |
10388 | # arrays shared by the routines in this block: | |
10389 | my @unmatched_opening_indexes_in_this_batch; | |
10390 | my @unmatched_closing_indexes_in_this_batch; | |
10391 | my %comma_arrow_count; | |
10392 | ||
10393 | sub is_unbalanced_batch { | |
10394 | @unmatched_opening_indexes_in_this_batch + | |
10395 | @unmatched_closing_indexes_in_this_batch; | |
10396 | } | |
10397 | ||
10398 | sub comma_arrow_count { | |
10399 | my $seqno = $_[0]; | |
10400 | return $comma_arrow_count{$seqno}; | |
10401 | } | |
10402 | ||
10403 | sub match_opening_and_closing_tokens { | |
10404 | ||
10405 | # Match up indexes of opening and closing braces, etc, in this batch. | |
10406 | # This has to be done after all tokens are stored because unstoring | |
10407 | # of tokens would otherwise cause trouble. | |
10408 | ||
10409 | @unmatched_opening_indexes_in_this_batch = (); | |
10410 | @unmatched_closing_indexes_in_this_batch = (); | |
10411 | %comma_arrow_count = (); | |
10412 | ||
10413 | my ( $i, $i_mate, $token ); | |
10414 | foreach $i ( 0 .. $max_index_to_go ) { | |
10415 | if ( $type_sequence_to_go[$i] ) { | |
10416 | $token = $tokens_to_go[$i]; | |
10417 | if ( $token =~ /^[\(\[\{\?]$/ ) { | |
10418 | push @unmatched_opening_indexes_in_this_batch, $i; | |
10419 | } | |
10420 | elsif ( $token =~ /^[\)\]\}\:]$/ ) { | |
10421 | ||
10422 | $i_mate = pop @unmatched_opening_indexes_in_this_batch; | |
10423 | if ( defined($i_mate) && $i_mate >= 0 ) { | |
10424 | if ( $type_sequence_to_go[$i_mate] == | |
10425 | $type_sequence_to_go[$i] ) | |
10426 | { | |
10427 | $mate_index_to_go[$i] = $i_mate; | |
10428 | $mate_index_to_go[$i_mate] = $i; | |
10429 | } | |
10430 | else { | |
10431 | push @unmatched_opening_indexes_in_this_batch, | |
10432 | $i_mate; | |
10433 | push @unmatched_closing_indexes_in_this_batch, $i; | |
10434 | } | |
10435 | } | |
10436 | else { | |
10437 | push @unmatched_closing_indexes_in_this_batch, $i; | |
10438 | } | |
10439 | } | |
10440 | } | |
10441 | elsif ( $tokens_to_go[$i] eq '=>' ) { | |
10442 | if (@unmatched_opening_indexes_in_this_batch) { | |
10443 | my $j = $unmatched_opening_indexes_in_this_batch[-1]; | |
10444 | my $seqno = $type_sequence_to_go[$j]; | |
10445 | $comma_arrow_count{$seqno}++; | |
10446 | } | |
10447 | } | |
10448 | } | |
10449 | } | |
10450 | ||
10451 | sub save_opening_indentation { | |
10452 | ||
10453 | # This should be called after each batch of tokens is output. It | |
10454 | # saves indentations of lines of all unmatched opening tokens. | |
10455 | # These will be used by sub get_opening_indentation. | |
10456 | ||
10457 | my ( $ri_first, $ri_last, $rindentation_list ) = @_; | |
10458 | ||
10459 | # we no longer need indentations of any saved indentations which | |
10460 | # are unmatched closing tokens in this batch, because we will | |
10461 | # never encounter them again. So we can delete them to keep | |
10462 | # the hash size down. | |
10463 | foreach (@unmatched_closing_indexes_in_this_batch) { | |
10464 | my $seqno = $type_sequence_to_go[$_]; | |
10465 | delete $saved_opening_indentation{$seqno}; | |
10466 | } | |
10467 | ||
10468 | # we need to save indentations of any unmatched opening tokens | |
10469 | # in this batch because we may need them in a subsequent batch. | |
10470 | foreach (@unmatched_opening_indexes_in_this_batch) { | |
10471 | my $seqno = $type_sequence_to_go[$_]; | |
10472 | $saved_opening_indentation{$seqno} = [ | |
10473 | lookup_opening_indentation( | |
10474 | $_, $ri_first, $ri_last, $rindentation_list | |
10475 | ) | |
10476 | ]; | |
10477 | } | |
10478 | } | |
10479 | } # end unmatched_indexes | |
10480 | ||
10481 | sub get_opening_indentation { | |
10482 | ||
10483 | # get the indentation of the line which output the opening token | |
10484 | # corresponding to a given closing token in the current output batch. | |
10485 | # | |
10486 | # given: | |
10487 | # $i_closing - index in this line of a closing token ')' '}' or ']' | |
10488 | # | |
10489 | # $ri_first - reference to list of the first index $i for each output | |
10490 | # line in this batch | |
10491 | # $ri_last - reference to list of the last index $i for each output line | |
10492 | # in this batch | |
10493 | # $rindentation_list - reference to a list containing the indentation | |
10494 | # used for each line. | |
10495 | # | |
10496 | # return: | |
10497 | # -the indentation of the line which contained the opening token | |
10498 | # which matches the token at index $i_opening | |
10499 | # -and its offset (number of columns) from the start of the line | |
10500 | # | |
10501 | my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_; | |
10502 | ||
10503 | # first, see if the opening token is in the current batch | |
10504 | my $i_opening = $mate_index_to_go[$i_closing]; | |
10505 | my ( $indent, $offset ); | |
10506 | if ( $i_opening >= 0 ) { | |
10507 | ||
10508 | # it is..look up the indentation | |
10509 | ( $indent, $offset ) = | |
10510 | lookup_opening_indentation( $i_opening, $ri_first, $ri_last, | |
10511 | $rindentation_list ); | |
10512 | } | |
10513 | ||
10514 | # if not, it should have been stored in the hash by a previous batch | |
10515 | else { | |
10516 | my $seqno = $type_sequence_to_go[$i_closing]; | |
10517 | if ($seqno) { | |
10518 | if ( $saved_opening_indentation{$seqno} ) { | |
10519 | ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} }; | |
10520 | } | |
10521 | } | |
10522 | ||
10523 | # if no sequence number it must be an unbalanced container | |
10524 | else { | |
10525 | $indent = 0; | |
10526 | $offset = 0; | |
10527 | } | |
10528 | } | |
10529 | return ( $indent, $offset ); | |
10530 | } | |
10531 | ||
10532 | sub lookup_opening_indentation { | |
10533 | ||
10534 | # get the indentation of the line in the current output batch | |
10535 | # which output a selected opening token | |
10536 | # | |
10537 | # given: | |
10538 | # $i_opening - index of an opening token in the current output batch | |
10539 | # whose line indentation we need | |
10540 | # $ri_first - reference to list of the first index $i for each output | |
10541 | # line in this batch | |
10542 | # $ri_last - reference to list of the last index $i for each output line | |
10543 | # in this batch | |
10544 | # $rindentation_list - reference to a list containing the indentation | |
10545 | # used for each line. (NOTE: the first slot in | |
10546 | # this list is the last returned line number, and this is | |
10547 | # followed by the list of indentations). | |
10548 | # | |
10549 | # return | |
10550 | # -the indentation of the line which contained token $i_opening | |
10551 | # -and its offset (number of columns) from the start of the line | |
10552 | ||
10553 | my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; | |
10554 | ||
10555 | my $nline = $rindentation_list->[0]; # line number of previous lookup | |
10556 | ||
10557 | # reset line location if necessary | |
10558 | $nline = 0 if ( $i_opening < $ri_start->[$nline] ); | |
10559 | ||
10560 | # find the correct line | |
10561 | unless ( $i_opening > $ri_last->[-1] ) { | |
10562 | while ( $i_opening > $ri_last->[$nline] ) { $nline++; } | |
10563 | } | |
10564 | ||
10565 | # error - token index is out of bounds - shouldn't happen | |
10566 | else { | |
10567 | warning( | |
10568 | "non-fatal program bug in lookup_opening_indentation - index out of range\n" | |
10569 | ); | |
10570 | report_definite_bug(); | |
10571 | $nline = $#{$ri_last}; | |
10572 | } | |
10573 | ||
10574 | $rindentation_list->[0] = | |
10575 | $nline; # save line number to start looking next call | |
10576 | my $ibeg = $ri_start->[$nline]; | |
10577 | my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; | |
10578 | return ( $rindentation_list->[ $nline + 1 ], $offset ); | |
10579 | } | |
10580 | ||
10581 | sub set_adjusted_indentation { | |
10582 | ||
10583 | # This routine has the final say regarding the actual indentation of | |
10584 | # a line. It starts with the basic indentation which has been | |
10585 | # defined for the leading token, and then takes into account any | |
10586 | # options that the user has set regarding special indenting and | |
10587 | # outdenting. | |
10588 | ||
10589 | my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last, | |
10590 | $rindentation_list ) | |
10591 | = @_; | |
10592 | ||
10593 | # we need to know the last token of this line | |
10594 | my ( $terminal_type, $i_terminal ) = | |
10595 | terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend ); | |
10596 | ||
10597 | my $is_outdented_line = 0; | |
10598 | ||
10599 | my $is_semicolon_terminated = $terminal_type eq ';' | |
10600 | && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]; | |
10601 | ||
10602 | # Most lines are indented according to the initial token. | |
10603 | # But it is common to outdent to the level just after the | |
10604 | # terminal token in certain cases... | |
10605 | # adjust_indentation flag: | |
10606 | # 0 - do not adjust | |
10607 | # 1 - outdent | |
10608 | # 2 - vertically align with opening token | |
10609 | # 3 - indent | |
10610 | my $adjust_indentation = 0; | |
10611 | my $default_adjust_indentation = $adjust_indentation; | |
10612 | ||
10613 | my ( $opening_indentation, $opening_offset ); | |
10614 | ||
10615 | # if we are at a closing token of some type.. | |
10616 | if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) { | |
10617 | ||
10618 | # get the indentation of the line containing the corresponding | |
10619 | # opening token | |
10620 | ( $opening_indentation, $opening_offset ) = | |
10621 | get_opening_indentation( $ibeg, $ri_first, $ri_last, | |
10622 | $rindentation_list ); | |
10623 | ||
10624 | # First set the default behavior: | |
10625 | # default behavior is to outdent closing lines | |
10626 | # of the form: "); }; ]; )->xxx;" | |
10627 | if ( | |
10628 | $is_semicolon_terminated | |
10629 | ||
10630 | # and 'cuddled parens' of the form: ")->pack(" | |
10631 | || ( | |
10632 | $terminal_type eq '(' | |
10633 | && $types_to_go[$ibeg] eq ')' | |
10634 | && ( $nesting_depth_to_go[$iend] + 1 == | |
10635 | $nesting_depth_to_go[$ibeg] ) | |
10636 | ) | |
10637 | ) | |
10638 | { | |
10639 | $adjust_indentation = 1; | |
10640 | } | |
10641 | ||
10642 | # TESTING: outdent something like '),' | |
10643 | if ( | |
10644 | $terminal_type eq ',' | |
10645 | ||
10646 | # allow just one character before the comma | |
10647 | && $i_terminal == $ibeg + 1 | |
10648 | ||
10649 | # requre LIST environment; otherwise, we may outdent too much -- | |
10650 | # this can happen in calls without parentheses (overload.t); | |
10651 | && $container_environment_to_go[$i_terminal] eq 'LIST' | |
10652 | ) | |
10653 | { | |
10654 | $adjust_indentation = 1; | |
10655 | } | |
10656 | ||
10657 | # undo continuation indentation of a terminal closing token if | |
10658 | # it is the last token before a level decrease. This will allow | |
10659 | # a closing token to line up with its opening counterpart, and | |
10660 | # avoids a indentation jump larger than 1 level. | |
10661 | if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/ | |
10662 | && $i_terminal == $ibeg ) | |
10663 | { | |
10664 | my $ci = $ci_levels_to_go[$ibeg]; | |
10665 | my $lev = $levels_to_go[$ibeg]; | |
10666 | my $next_type = $types_to_go[ $ibeg + 1 ]; | |
10667 | my $i_next_nonblank = | |
10668 | ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 ); | |
10669 | if ( $i_next_nonblank <= $max_index_to_go | |
10670 | && $levels_to_go[$i_next_nonblank] < $lev ) | |
10671 | { | |
10672 | $adjust_indentation = 1; | |
10673 | } | |
10674 | } | |
10675 | ||
10676 | $default_adjust_indentation = $adjust_indentation; | |
10677 | ||
10678 | # Now modify default behavior according to user request: | |
10679 | # handle option to indent non-blocks of the form ); }; ]; | |
10680 | # But don't do special indentation to something like ')->pack(' | |
10681 | if ( !$block_type_to_go[$ibeg] ) { | |
10682 | my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] }; | |
10683 | if ( | |
10684 | $cti == 1 | |
10685 | && ( $i_terminal <= $ibeg + 1 | |
10686 | || $is_semicolon_terminated ) | |
10687 | ) | |
10688 | { | |
10689 | $adjust_indentation = 2; | |
10690 | } | |
10691 | elsif ($cti == 2 | |
10692 | && $is_semicolon_terminated | |
10693 | && $i_terminal == $ibeg + 1 ) | |
10694 | { | |
10695 | $adjust_indentation = 3; | |
10696 | } | |
10697 | } | |
10698 | ||
10699 | # handle option to indent blocks | |
10700 | else { | |
10701 | if ( | |
10702 | $rOpts->{'indent-closing-brace'} | |
10703 | && ( | |
10704 | $i_terminal == $ibeg # isolated terminal '}' | |
10705 | || $is_semicolon_terminated | |
10706 | ) | |
10707 | ) # } xxxx ; | |
10708 | { | |
10709 | $adjust_indentation = 3; | |
10710 | } | |
10711 | } | |
10712 | } | |
10713 | ||
10714 | # if at ');', '};', '>;', and '];' of a terminal qw quote | |
10715 | elsif ( $$rpatterns[0] =~ /^qb*;$/ && $$rfields[0] =~ /^([\)\}\]\>]);$/ ) { | |
10716 | if ( $closing_token_indentation{$1} == 0 ) { | |
10717 | $adjust_indentation = 1; | |
10718 | } | |
10719 | else { | |
10720 | $adjust_indentation = 3; | |
10721 | } | |
10722 | } | |
10723 | ||
10724 | # Handle variation in indentation styles... | |
10725 | # Select the indentation object to define leading | |
10726 | # whitespace. If we are outdenting something like '} } );' | |
10727 | # then we want to use one level below the last token | |
10728 | # ($i_terminal) in order to get it to fully outdent through | |
10729 | # all levels. | |
10730 | my $indentation; | |
10731 | my $lev; | |
10732 | my $level_end = $levels_to_go[$iend]; | |
10733 | ||
10734 | if ( $adjust_indentation == 0 ) { | |
10735 | $indentation = $leading_spaces_to_go[$ibeg]; | |
10736 | $lev = $levels_to_go[$ibeg]; | |
10737 | } | |
10738 | elsif ( $adjust_indentation == 1 ) { | |
10739 | $indentation = $reduced_spaces_to_go[$i_terminal]; | |
10740 | $lev = $levels_to_go[$i_terminal]; | |
10741 | } | |
10742 | ||
10743 | # handle indented closing token which aligns with opening token | |
10744 | elsif ( $adjust_indentation == 2 ) { | |
10745 | ||
10746 | # handle option to align closing token with opening token | |
10747 | $lev = $levels_to_go[$ibeg]; | |
10748 | ||
10749 | # calculate spaces needed to align with opening token | |
10750 | my $space_count = get_SPACES($opening_indentation) + $opening_offset; | |
10751 | ||
10752 | # Indent less than the previous line. | |
10753 | # | |
10754 | # Problem: For -lp we don't exactly know what it was if there were | |
10755 | # recoverable spaces sent to the aligner. A good solution would be to | |
10756 | # force a flush of the vertical alignment buffer, so that we would | |
10757 | # know. For now, this rule is used for -lp: | |
10758 | # | |
10759 | # When the last line did not start with a closing token we will be | |
10760 | # optimistic that the aligner will recover everything wanted. | |
10761 | # | |
10762 | # This rule will prevent us from breaking a hierarchy of closing | |
10763 | # tokens, and in a worst case will leave a closing paren too far | |
10764 | # indented, but this is better than frequently leaving it not indented | |
10765 | # enough. | |
10766 | my $last_spaces = get_SPACES($last_indentation_written); | |
10767 | if ( $last_leading_token !~ /^[\}\]\)]$/ ) { | |
10768 | $last_spaces += get_RECOVERABLE_SPACES($last_indentation_written); | |
10769 | } | |
10770 | ||
10771 | # reset the indentation to the new space count if it works | |
10772 | # only options are all or none: nothing in-between looks good | |
10773 | $lev = $levels_to_go[$ibeg]; | |
10774 | if ( $space_count < $last_spaces ) { | |
10775 | if ($rOpts_line_up_parentheses) { | |
10776 | my $lev = $levels_to_go[$ibeg]; | |
10777 | $indentation = | |
10778 | new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); | |
10779 | } | |
10780 | else { | |
10781 | $indentation = $space_count; | |
10782 | } | |
10783 | } | |
10784 | ||
10785 | # revert to default if it doesnt work | |
10786 | else { | |
10787 | $space_count = leading_spaces_to_go($ibeg); | |
10788 | if ( $default_adjust_indentation == 0 ) { | |
10789 | $indentation = $leading_spaces_to_go[$ibeg]; | |
10790 | } | |
10791 | elsif ( $default_adjust_indentation == 1 ) { | |
10792 | $indentation = $reduced_spaces_to_go[$i_terminal]; | |
10793 | $lev = $levels_to_go[$i_terminal]; | |
10794 | } | |
10795 | } | |
10796 | } | |
10797 | ||
10798 | # Full indentaion of closing tokens (-icb and -icp or -cti=2) | |
10799 | else { | |
10800 | ||
10801 | # There are two ways to handle -icb and -icp... | |
10802 | # One way is to use the indentation of the previous line: | |
10803 | # $indentation = $last_indentation_written; | |
10804 | ||
10805 | # The other way is to use the indentation that the previous line | |
10806 | # would have had if it hadn't been adjusted: | |
10807 | $indentation = $last_unadjusted_indentation; | |
10808 | ||
10809 | # Current method: use the minimum of the two. This avoids inconsistent | |
10810 | # indentation. | |
10811 | if ( get_SPACES($last_indentation_written) < get_SPACES($indentation) ) | |
10812 | { | |
10813 | $indentation = $last_indentation_written; | |
10814 | } | |
10815 | ||
10816 | # use previous indentation but use own level | |
10817 | # to cause list to be flushed properly | |
10818 | $lev = $levels_to_go[$ibeg]; | |
10819 | } | |
10820 | ||
10821 | # remember indentation except for multi-line quotes, which get | |
10822 | # no indentation | |
10823 | unless ( $types_to_go[$ibeg] eq 'Q' && $lev == 0 ) { | |
10824 | $last_indentation_written = $indentation; | |
10825 | $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg]; | |
10826 | $last_leading_token = $tokens_to_go[$ibeg]; | |
10827 | } | |
10828 | ||
10829 | # be sure lines with leading closing tokens are not outdented more | |
10830 | # than the line which contained the corresponding opening token. | |
10831 | my $is_isolated_block_brace = | |
10832 | ( $iend == $ibeg ) && $block_type_to_go[$ibeg]; | |
10833 | if ( !$is_isolated_block_brace && defined($opening_indentation) ) { | |
10834 | if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) { | |
10835 | $indentation = $opening_indentation; | |
10836 | } | |
10837 | } | |
10838 | ||
10839 | # remember the indentation of each line of this batch | |
10840 | push @{$rindentation_list}, $indentation; | |
10841 | ||
10842 | # outdent lines with certain leading tokens... | |
10843 | if ( | |
10844 | ||
10845 | # must be first word of this batch | |
10846 | $ibeg == 0 | |
10847 | ||
10848 | # and ... | |
10849 | && ( | |
10850 | ||
10851 | # certain leading keywords if requested | |
10852 | ( | |
10853 | $rOpts->{'outdent-keywords'} | |
10854 | && $types_to_go[$ibeg] eq 'k' | |
10855 | && $outdent_keyword{ $tokens_to_go[$ibeg] } | |
10856 | ) | |
10857 | ||
10858 | # or labels if requested | |
10859 | || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' ) | |
10860 | ||
10861 | # or static block comments if requested | |
10862 | || ( $types_to_go[$ibeg] eq '#' | |
10863 | && $rOpts->{'outdent-static-block-comments'} | |
10864 | && $tokens_to_go[$ibeg] =~ /$static_block_comment_pattern/o | |
10865 | && $rOpts->{'static-block-comments'} ) | |
10866 | ) | |
10867 | ) | |
10868 | ||
10869 | { | |
10870 | my $space_count = leading_spaces_to_go($ibeg); | |
10871 | if ( $space_count > 0 ) { | |
10872 | $space_count -= $rOpts_continuation_indentation; | |
10873 | $is_outdented_line = 1; | |
10874 | if ( $space_count < 0 ) { $space_count = 0 } | |
10875 | ||
10876 | # do not promote a spaced static block comment to non-spaced; | |
10877 | # this is not normally necessary but could be for some | |
10878 | # unusual user inputs (such as -ci = -i) | |
10879 | if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) { | |
10880 | $space_count = 1; | |
10881 | } | |
10882 | ||
10883 | if ($rOpts_line_up_parentheses) { | |
10884 | $indentation = | |
10885 | new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); | |
10886 | } | |
10887 | else { | |
10888 | $indentation = $space_count; | |
10889 | } | |
10890 | } | |
10891 | } | |
10892 | ||
10893 | return ( $indentation, $lev, $level_end, $is_semicolon_terminated, | |
10894 | $is_outdented_line ); | |
10895 | } | |
10896 | ||
10897 | sub set_vertical_tightness_flags { | |
10898 | ||
10899 | my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_; | |
10900 | ||
10901 | # Define vertical tightness controls for the nth line of a batch. | |
10902 | # We create an array of parameters which tell the vertical aligner | |
10903 | # if we should combine this line with the next line to achieve the | |
10904 | # desired vertical tightness. The array of parameters contains: | |
10905 | # | |
10906 | # [0] type: 1=is opening tok 2=is closing tok 3=is opening block brace | |
10907 | # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok | |
10908 | # if closing: spaces of padding to use | |
10909 | # [2] sequence number of container | |
10910 | # [3] valid flag: do not append if this flag is false. Will be | |
10911 | # true if appropriate -vt flag is set. Otherwise, Will be | |
10912 | # made true only for 2 line container in parens with -lp | |
10913 | # | |
10914 | # These flags are used by sub set_leading_whitespace in | |
10915 | # the vertical aligner | |
10916 | ||
10917 | my $rvertical_tightness_flags; | |
10918 | ||
10919 | # For non-BLOCK tokens, we will need to examine the next line | |
10920 | # too, so we won't consider the last line. | |
10921 | if ( $n < $n_last_line ) { | |
10922 | ||
10923 | # see if last token is an opening token...not a BLOCK... | |
10924 | my $ibeg_next = $$ri_first[ $n + 1 ]; | |
10925 | my $token_end = $tokens_to_go[$iend]; | |
10926 | my $iend_next = $$ri_last[ $n + 1 ]; | |
10927 | if ( | |
10928 | $type_sequence_to_go[$iend] | |
10929 | && !$block_type_to_go[$iend] | |
10930 | && $is_opening_token{$token_end} | |
10931 | && ( | |
10932 | $opening_vertical_tightness{$token_end} > 0 | |
10933 | ||
10934 | # allow 2-line method call to be closed up | |
10935 | || ( $rOpts_line_up_parentheses | |
10936 | && $token_end eq '(' | |
10937 | && $iend > $ibeg | |
10938 | && $types_to_go[ $iend - 1 ] ne 'b' ) | |
10939 | ) | |
10940 | ) | |
10941 | { | |
10942 | ||
10943 | # avoid multiple jumps in nesting depth in one line if | |
10944 | # requested | |
10945 | my $ovt = $opening_vertical_tightness{$token_end}; | |
10946 | my $iend_next = $$ri_last[ $n + 1 ]; | |
10947 | unless ( | |
10948 | $ovt < 2 | |
10949 | && ( $nesting_depth_to_go[ $iend_next + 1 ] != | |
10950 | $nesting_depth_to_go[$ibeg_next] ) | |
10951 | ) | |
10952 | { | |
10953 | ||
10954 | # If -vt flag has not been set, mark this as invalid | |
10955 | # and aligner will validate it if it sees the closing paren | |
10956 | # within 2 lines. | |
10957 | my $valid_flag = $ovt; | |
10958 | @{$rvertical_tightness_flags} = | |
10959 | ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag ); | |
10960 | } | |
10961 | } | |
10962 | ||
10963 | # see if first token of next line is a closing token... | |
10964 | # ..and be sure this line does not have a side comment | |
10965 | my $token_next = $tokens_to_go[$ibeg_next]; | |
10966 | if ( $type_sequence_to_go[$ibeg_next] | |
10967 | && !$block_type_to_go[$ibeg_next] | |
10968 | && $is_closing_token{$token_next} | |
10969 | && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen! | |
10970 | { | |
10971 | my $ovt = $opening_vertical_tightness{$token_next}; | |
10972 | my $cvt = $closing_vertical_tightness{$token_next}; | |
10973 | if ( | |
10974 | ||
10975 | # never append a trailing line like )->pack( | |
10976 | # because it will throw off later alignment | |
10977 | ( | |
10978 | $nesting_depth_to_go[$ibeg_next] == | |
10979 | $nesting_depth_to_go[ $iend_next + 1 ] + 1 | |
10980 | ) | |
10981 | && ( | |
10982 | $cvt == 2 | |
10983 | || ( | |
10984 | $container_environment_to_go[$ibeg_next] ne 'LIST' | |
10985 | && ( | |
10986 | $cvt == 1 | |
10987 | ||
10988 | # allow closing up 2-line method calls | |
10989 | || ( $rOpts_line_up_parentheses | |
10990 | && $token_next eq ')' ) | |
10991 | ) | |
10992 | ) | |
10993 | ) | |
10994 | ) | |
10995 | { | |
10996 | ||
10997 | # decide which trailing closing tokens to append.. | |
10998 | my $ok = 0; | |
10999 | if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 } | |
11000 | else { | |
11001 | my $str = join( '', | |
11002 | @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] ); | |
11003 | ||
11004 | # append closing token if followed by comment or ';' | |
11005 | if ( $str =~ /^b?[#;]/ ) { $ok = 1 } | |
11006 | } | |
11007 | ||
11008 | if ($ok) { | |
11009 | my $valid_flag = $cvt; | |
11010 | @{$rvertical_tightness_flags} = ( | |
11011 | 2, | |
11012 | $tightness{$token_next} == 2 ? 0 : 1, | |
11013 | $type_sequence_to_go[$ibeg_next], $valid_flag, | |
11014 | ); | |
11015 | } | |
11016 | } | |
11017 | } | |
11018 | } | |
11019 | ||
11020 | # Check for a last line with isolated opening BLOCK curly | |
11021 | elsif ($rOpts_block_brace_vertical_tightness | |
11022 | && $ibeg eq $iend | |
11023 | && $types_to_go[$iend] eq '{' | |
11024 | && $block_type_to_go[$iend] =~ | |
11025 | /$block_brace_vertical_tightness_pattern/o ) | |
11026 | { | |
11027 | @{$rvertical_tightness_flags} = | |
11028 | ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 ); | |
11029 | } | |
11030 | ||
11031 | return $rvertical_tightness_flags; | |
11032 | } | |
11033 | ||
11034 | { | |
11035 | my %is_vertical_alignment_type; | |
11036 | my %is_vertical_alignment_keyword; | |
11037 | ||
11038 | BEGIN { | |
11039 | ||
11040 | @_ = qw# | |
11041 | = **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x= | |
11042 | { ? : => =~ && || | |
11043 | #; | |
11044 | @is_vertical_alignment_type{@_} = (1) x scalar(@_); | |
11045 | ||
11046 | @_ = qw(if unless and or eq ne for foreach while until); | |
11047 | @is_vertical_alignment_keyword{@_} = (1) x scalar(@_); | |
11048 | } | |
11049 | ||
11050 | sub set_vertical_alignment_markers { | |
11051 | ||
11052 | # Look at the tokens in this output batch and define the array | |
11053 | # 'matching_token_to_go' which marks tokens at which we would | |
11054 | # accept vertical alignment. | |
11055 | ||
11056 | # nothing to do if we aren't allowed to change whitespace | |
11057 | if ( !$rOpts_add_whitespace ) { | |
11058 | for my $i ( 0 .. $max_index_to_go ) { | |
11059 | $matching_token_to_go[$i] = ''; | |
11060 | } | |
11061 | return; | |
11062 | } | |
11063 | ||
11064 | my ( $ri_first, $ri_last ) = @_; | |
11065 | ||
11066 | # look at each line of this batch.. | |
11067 | my $last_vertical_alignment_before_index; | |
11068 | my $vert_last_nonblank_type; | |
11069 | my $vert_last_nonblank_token; | |
11070 | my $vert_last_nonblank_block_type; | |
11071 | my $max_line = @$ri_first - 1; | |
11072 | my ( $i, $type, $token, $block_type, $alignment_type ); | |
11073 | my ( $ibeg, $iend, $line ); | |
11074 | foreach $line ( 0 .. $max_line ) { | |
11075 | $ibeg = $$ri_first[$line]; | |
11076 | $iend = $$ri_last[$line]; | |
11077 | $last_vertical_alignment_before_index = -1; | |
11078 | $vert_last_nonblank_type = ''; | |
11079 | $vert_last_nonblank_token = ''; | |
11080 | $vert_last_nonblank_block_type = ''; | |
11081 | ||
11082 | # look at each token in this output line.. | |
11083 | foreach $i ( $ibeg .. $iend ) { | |
11084 | $alignment_type = ''; | |
11085 | $type = $types_to_go[$i]; | |
11086 | $block_type = $block_type_to_go[$i]; | |
11087 | $token = $tokens_to_go[$i]; | |
11088 | ||
11089 | # check for flag indicating that we should not align | |
11090 | # this token | |
11091 | if ( $matching_token_to_go[$i] ) { | |
11092 | $matching_token_to_go[$i] = ''; | |
11093 | next; | |
11094 | } | |
11095 | ||
11096 | #-------------------------------------------------------- | |
11097 | # First see if we want to align BEFORE this token | |
11098 | #-------------------------------------------------------- | |
11099 | ||
11100 | # The first possible token that we can align before | |
11101 | # is index 2 because: 1) it doesn't normally make sense to | |
11102 | # align before the first token and 2) the second | |
11103 | # token must be a blank if we are to align before | |
11104 | # the third | |
11105 | if ( $i < $ibeg + 2 ) { | |
11106 | } | |
11107 | ||
11108 | # must follow a blank token | |
11109 | elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { | |
11110 | } | |
11111 | ||
11112 | # align a side comment -- | |
11113 | elsif ( $type eq '#' ) { | |
11114 | ||
11115 | unless ( | |
11116 | ||
11117 | # it is a static side comment | |
11118 | ( | |
11119 | $rOpts->{'static-side-comments'} | |
11120 | && $token =~ /$static_side_comment_pattern/o | |
11121 | ) | |
11122 | ||
11123 | # or a closing side comment | |
11124 | || ( $vert_last_nonblank_block_type | |
11125 | && $token =~ | |
11126 | /$closing_side_comment_prefix_pattern/o ) | |
11127 | ) | |
11128 | { | |
11129 | $alignment_type = $type; | |
11130 | } ## Example of a static side comment | |
11131 | } | |
11132 | ||
11133 | # otherwise, do not align two in a row to create a | |
11134 | # blank field | |
11135 | elsif ( $last_vertical_alignment_before_index == $i - 2 ) { | |
11136 | } | |
11137 | ||
11138 | # align before one of these keywords | |
11139 | # (within a line, since $i>1) | |
11140 | elsif ( $type eq 'k' ) { | |
11141 | ||
11142 | # /^(if|unless|and|or|eq|ne)$/ | |
11143 | if ( $is_vertical_alignment_keyword{$token} ) { | |
11144 | $alignment_type = $token; | |
11145 | } | |
11146 | } | |
11147 | ||
11148 | # align before one of these types.. | |
11149 | # Note: add '.' after new vertical aligner is operational | |
11150 | elsif ( $is_vertical_alignment_type{$type} ) { | |
11151 | $alignment_type = $token; | |
11152 | ||
11153 | # For a paren after keyword, only align something like this: | |
11154 | # if ( $a ) { &a } | |
11155 | # elsif ( $b ) { &b } | |
11156 | if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) { | |
11157 | $alignment_type = "" | |
11158 | unless $vert_last_nonblank_token =~ | |
11159 | /^(if|unless|elsif)$/; | |
11160 | } | |
11161 | ||
11162 | # be sure the alignment tokens are unique | |
11163 | # This didn't work well: reason not determined | |
11164 | # if ($token ne $type) {$alignment_type .= $type} | |
11165 | } | |
11166 | ||
11167 | # NOTE: This is deactivated until the new vertical aligner | |
11168 | # is finished because it causes the previous if/elsif alignment | |
11169 | # to fail | |
11170 | #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) { | |
11171 | # $alignment_type = $type; | |
11172 | #} | |
11173 | ||
11174 | if ($alignment_type) { | |
11175 | $last_vertical_alignment_before_index = $i; | |
11176 | } | |
11177 | ||
11178 | #-------------------------------------------------------- | |
11179 | # Next see if we want to align AFTER the previous nonblank | |
11180 | #-------------------------------------------------------- | |
11181 | ||
11182 | # We want to line up ',' and interior ';' tokens, with the added | |
11183 | # space AFTER these tokens. (Note: interior ';' is included | |
11184 | # because it may occur in short blocks). | |
11185 | if ( | |
11186 | ||
11187 | # we haven't already set it | |
11188 | !$alignment_type | |
11189 | ||
11190 | # and its not the first token of the line | |
11191 | && ( $i > $ibeg ) | |
11192 | ||
11193 | # and it follows a blank | |
11194 | && $types_to_go[ $i - 1 ] eq 'b' | |
11195 | ||
11196 | # and previous token IS one of these: | |
11197 | && ( $vert_last_nonblank_type =~ /^[\,\;]$/ ) | |
11198 | ||
11199 | # and it's NOT one of these | |
11200 | && ( $type !~ /^[b\#\)\]\}]$/ ) | |
11201 | ||
11202 | # then go ahead and align | |
11203 | ) | |
11204 | ||
11205 | { | |
11206 | $alignment_type = $vert_last_nonblank_type; | |
11207 | } | |
11208 | ||
11209 | #-------------------------------------------------------- | |
11210 | # then store the value | |
11211 | #-------------------------------------------------------- | |
11212 | $matching_token_to_go[$i] = $alignment_type; | |
11213 | if ( $type ne 'b' ) { | |
11214 | $vert_last_nonblank_type = $type; | |
11215 | $vert_last_nonblank_token = $token; | |
11216 | $vert_last_nonblank_block_type = $block_type; | |
11217 | } | |
11218 | } | |
11219 | } | |
11220 | } | |
11221 | } | |
11222 | ||
11223 | sub terminal_type { | |
11224 | ||
11225 | # returns type of last token on this line (terminal token), as follows: | |
11226 | # returns # for a full-line comment | |
11227 | # returns ' ' for a blank line | |
11228 | # otherwise returns final token type | |
11229 | ||
11230 | my ( $rtype, $rblock_type, $ibeg, $iend ) = @_; | |
11231 | ||
11232 | # check for full-line comment.. | |
11233 | if ( $$rtype[$ibeg] eq '#' ) { | |
11234 | return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg]; | |
11235 | } | |
11236 | else { | |
11237 | ||
11238 | # start at end and walk bakwards.. | |
11239 | for ( my $i = $iend ; $i >= $ibeg ; $i-- ) { | |
11240 | ||
11241 | # skip past any side comment and blanks | |
11242 | next if ( $$rtype[$i] eq 'b' ); | |
11243 | next if ( $$rtype[$i] eq '#' ); | |
11244 | ||
11245 | # found it..make sure it is a BLOCK termination, | |
11246 | # but hide a terminal } after sort/grep/map because it is not | |
11247 | # necessarily the end of the line. (terminal.t) | |
11248 | my $terminal_type = $$rtype[$i]; | |
11249 | if ( | |
11250 | $terminal_type eq '}' | |
11251 | && ( !$$rblock_type[$i] | |
11252 | || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) ) | |
11253 | ) | |
11254 | { | |
11255 | $terminal_type = 'b'; | |
11256 | } | |
11257 | return wantarray ? ( $terminal_type, $i ) : $terminal_type; | |
11258 | } | |
11259 | ||
11260 | # empty line | |
11261 | return wantarray ? ( ' ', $ibeg ) : ' '; | |
11262 | } | |
11263 | } | |
11264 | ||
11265 | { | |
11266 | my %is_good_keyword_breakpoint; | |
11267 | my %is_lt_gt_le_ge; | |
11268 | ||
11269 | sub set_bond_strengths { | |
11270 | ||
11271 | BEGIN { | |
11272 | ||
11273 | @_ = qw(if unless while until for foreach); | |
11274 | @is_good_keyword_breakpoint{@_} = (1) x scalar(@_); | |
11275 | ||
11276 | @_ = qw(lt gt le ge); | |
11277 | @is_lt_gt_le_ge{@_} = (1) x scalar(@_); | |
11278 | ||
11279 | ############################################################### | |
11280 | # NOTE: NO_BREAK's set here are HINTS which may not be honored; | |
11281 | # essential NO_BREAKS's must be enforced in section 2, below. | |
11282 | ############################################################### | |
11283 | ||
11284 | # adding NEW_TOKENS: add a left and right bond strength by | |
11285 | # mimmicking what is done for an existing token type. You | |
11286 | # can skip this step at first and take the default, then | |
11287 | # tweak later to get desired results. | |
11288 | ||
11289 | # The bond strengths should roughly follow precenence order where | |
11290 | # possible. If you make changes, please check the results very | |
11291 | # carefully on a variety of scripts. | |
11292 | ||
11293 | # no break around possible filehandle | |
11294 | $left_bond_strength{'Z'} = NO_BREAK; | |
11295 | $right_bond_strength{'Z'} = NO_BREAK; | |
11296 | ||
11297 | # never put a bare word on a new line: | |
11298 | # example print (STDERR, "bla"); will fail with break after ( | |
11299 | $left_bond_strength{'w'} = NO_BREAK; | |
11300 | ||
11301 | # blanks always have infinite strength to force breaks after real tokens | |
11302 | $right_bond_strength{'b'} = NO_BREAK; | |
11303 | ||
11304 | # try not to break on exponentation | |
11305 | @_ = qw" ** .. ... <=> "; | |
11306 | @left_bond_strength{@_} = (STRONG) x scalar(@_); | |
11307 | @right_bond_strength{@_} = (STRONG) x scalar(@_); | |
11308 | ||
11309 | # The comma-arrow has very low precedence but not a good break point | |
11310 | $left_bond_strength{'=>'} = NO_BREAK; | |
11311 | $right_bond_strength{'=>'} = NOMINAL; | |
11312 | ||
11313 | # ok to break after label | |
11314 | $left_bond_strength{'J'} = NO_BREAK; | |
11315 | $right_bond_strength{'J'} = NOMINAL; | |
11316 | $left_bond_strength{'j'} = STRONG; | |
11317 | $right_bond_strength{'j'} = STRONG; | |
11318 | $left_bond_strength{'A'} = STRONG; | |
11319 | $right_bond_strength{'A'} = STRONG; | |
11320 | ||
11321 | $left_bond_strength{'->'} = STRONG; | |
11322 | $right_bond_strength{'->'} = VERY_STRONG; | |
11323 | ||
11324 | # breaking AFTER these is just ok: | |
11325 | @_ = qw" % + - * / x "; | |
11326 | @left_bond_strength{@_} = (STRONG) x scalar(@_); | |
11327 | @right_bond_strength{@_} = (NOMINAL) x scalar(@_); | |
11328 | ||
11329 | # breaking BEFORE these is just ok: | |
11330 | @_ = qw" >> << "; | |
11331 | @right_bond_strength{@_} = (STRONG) x scalar(@_); | |
11332 | @left_bond_strength{@_} = (NOMINAL) x scalar(@_); | |
11333 | ||
11334 | # I prefer breaking before the string concatenation operator | |
11335 | # because it can be hard to see at the end of a line | |
11336 | # swap these to break after a '.' | |
11337 | # this could be a future option | |
11338 | $right_bond_strength{'.'} = STRONG; | |
11339 | $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK; | |
11340 | ||
11341 | @_ = qw"} ] ) "; | |
11342 | @left_bond_strength{@_} = (STRONG) x scalar(@_); | |
11343 | @right_bond_strength{@_} = (NOMINAL) x scalar(@_); | |
11344 | ||
11345 | # make these a little weaker than nominal so that they get | |
11346 | # favored for end-of-line characters | |
11347 | @_ = qw"!= == =~ !~"; | |
11348 | @left_bond_strength{@_} = (STRONG) x scalar(@_); | |
11349 | @right_bond_strength{@_} = | |
11350 | ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_); | |
11351 | ||
11352 | # break AFTER these | |
11353 | @_ = qw" < > | & >= <="; | |
11354 | @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_); | |
11355 | @right_bond_strength{@_} = | |
11356 | ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_); | |
11357 | ||
11358 | # breaking either before or after a quote is ok | |
11359 | # but bias for breaking before a quote | |
11360 | $left_bond_strength{'Q'} = NOMINAL; | |
11361 | $right_bond_strength{'Q'} = NOMINAL + 0.02; | |
11362 | $left_bond_strength{'q'} = NOMINAL; | |
11363 | $right_bond_strength{'q'} = NOMINAL; | |
11364 | ||
11365 | # starting a line with a keyword is usually ok | |
11366 | $left_bond_strength{'k'} = NOMINAL; | |
11367 | ||
11368 | # we usually want to bond a keyword strongly to what immediately | |
11369 | # follows, rather than leaving it stranded at the end of a line | |
11370 | $right_bond_strength{'k'} = STRONG; | |
11371 | ||
11372 | $left_bond_strength{'G'} = NOMINAL; | |
11373 | $right_bond_strength{'G'} = STRONG; | |
11374 | ||
11375 | # it is very good to break AFTER various assignment operators | |
11376 | @_ = qw( | |
11377 | = **= += *= &= <<= &&= | |
11378 | -= /= |= >>= ||= | |
11379 | .= %= ^= | |
11380 | x= | |
11381 | ); | |
11382 | @left_bond_strength{@_} = (STRONG) x scalar(@_); | |
11383 | @right_bond_strength{@_} = | |
11384 | ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_); | |
11385 | ||
11386 | # break BEFORE '&&' and '||' | |
11387 | # set strength of '||' to same as '=' so that chains like | |
11388 | # $a = $b || $c || $d will break before the first '||' | |
11389 | $right_bond_strength{'||'} = NOMINAL; | |
11390 | $left_bond_strength{'||'} = $right_bond_strength{'='}; | |
11391 | ||
11392 | # set strength of && a little higher than || | |
11393 | $right_bond_strength{'&&'} = NOMINAL; | |
11394 | $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1; | |
11395 | ||
11396 | $left_bond_strength{';'} = VERY_STRONG; | |
11397 | $right_bond_strength{';'} = VERY_WEAK; | |
11398 | $left_bond_strength{'f'} = VERY_STRONG; | |
11399 | ||
11400 | # make right strength of for ';' a little less than '=' | |
11401 | # to make for contents break after the ';' to avoid this: | |
11402 | # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j += | |
11403 | # $number_of_fields ) | |
11404 | # and make it weaker than ',' and 'and' too | |
11405 | $right_bond_strength{'f'} = VERY_WEAK - 0.03; | |
11406 | ||
11407 | # The strengths of ?/: should be somewhere between | |
11408 | # an '=' and a quote (NOMINAL), | |
11409 | # make strength of ':' slightly less than '?' to help | |
11410 | # break long chains of ? : after the colons | |
11411 | $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL; | |
11412 | $right_bond_strength{':'} = NO_BREAK; | |
11413 | $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01; | |
11414 | $right_bond_strength{'?'} = NO_BREAK; | |
11415 | ||
11416 | $left_bond_strength{','} = VERY_STRONG; | |
11417 | $right_bond_strength{','} = VERY_WEAK; | |
11418 | ||
11419 | # Set bond strengths of certain keywords | |
11420 | # make 'or', 'and' slightly weaker than a ',' | |
11421 | $left_bond_strength{'and'} = VERY_WEAK - 0.01; | |
11422 | $left_bond_strength{'or'} = VERY_WEAK - 0.02; | |
11423 | $left_bond_strength{'xor'} = NOMINAL; | |
11424 | $right_bond_strength{'and'} = NOMINAL; | |
11425 | $right_bond_strength{'or'} = NOMINAL; | |
11426 | $right_bond_strength{'xor'} = STRONG; | |
11427 | } | |
11428 | ||
11429 | # patch-its always ok to break at end of line | |
11430 | $nobreak_to_go[$max_index_to_go] = 0; | |
11431 | ||
11432 | # adding a small 'bias' to strengths is a simple way to make a line | |
11433 | # break at the first of a sequence of identical terms. For example, | |
11434 | # to force long string of conditional operators to break with | |
11435 | # each line ending in a ':', we can add a small number to the bond | |
11436 | # strength of each ':' | |
11437 | my $colon_bias = 0; | |
11438 | my $amp_bias = 0; | |
11439 | my $bar_bias = 0; | |
11440 | my $and_bias = 0; | |
11441 | my $or_bias = 0; | |
11442 | my $dot_bias = 0; | |
11443 | my $f_bias = 0; | |
11444 | my $code_bias = -.01; | |
11445 | my $type = 'b'; | |
11446 | my $token = ' '; | |
11447 | my $last_type; | |
11448 | my $last_nonblank_type = $type; | |
11449 | my $last_nonblank_token = $token; | |
11450 | my $delta_bias = 0.0001; | |
11451 | my $list_str = $left_bond_strength{'?'}; | |
11452 | ||
11453 | my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token, | |
11454 | $next_nonblank_type, $next_token, $next_type, $total_nesting_depth, | |
11455 | ); | |
11456 | ||
11457 | # preliminary loop to compute bond strengths | |
11458 | for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) { | |
11459 | $last_type = $type; | |
11460 | if ( $type ne 'b' ) { | |
11461 | $last_nonblank_type = $type; | |
11462 | $last_nonblank_token = $token; | |
11463 | } | |
11464 | $type = $types_to_go[$i]; | |
11465 | ||
11466 | # strength on both sides of a blank is the same | |
11467 | if ( $type eq 'b' && $last_type ne 'b' ) { | |
11468 | $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ]; | |
11469 | next; | |
11470 | } | |
11471 | ||
11472 | $token = $tokens_to_go[$i]; | |
11473 | $block_type = $block_type_to_go[$i]; | |
11474 | $i_next = $i + 1; | |
11475 | $next_type = $types_to_go[$i_next]; | |
11476 | $next_token = $tokens_to_go[$i_next]; | |
11477 | $total_nesting_depth = $nesting_depth_to_go[$i_next]; | |
11478 | $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); | |
11479 | $next_nonblank_type = $types_to_go[$i_next_nonblank]; | |
11480 | $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; | |
11481 | ||
11482 | # Some token chemistry... The decision about where to break a | |
11483 | # line depends upon a "bond strength" between tokens. The LOWER | |
11484 | # the bond strength, the MORE likely a break. The strength | |
11485 | # values are based on trial-and-error, and need to be tweaked | |
11486 | # occasionally to get desired results. Things to keep in mind | |
11487 | # are: | |
11488 | # 1. relative strengths are important. small differences | |
11489 | # in strengths can make big formatting differences. | |
11490 | # 2. each indentation level adds one unit of bond strength | |
11491 | # 3. a value of NO_BREAK makes an unbreakable bond | |
11492 | # 4. a value of VERY_WEAK is the strength of a ',' | |
11493 | # 5. values below NOMINAL are considered ok break points | |
11494 | # 6. values above NOMINAL are considered poor break points | |
11495 | # We are computing the strength of the bond between the current | |
11496 | # token and the NEXT token. | |
11497 | my $bond_str = VERY_STRONG; # a default, high strength | |
11498 | ||
11499 | #--------------------------------------------------------------- | |
11500 | # section 1: | |
11501 | # use minimum of left and right bond strengths if defined; | |
11502 | # digraphs and trigraphs like to break on their left | |
11503 | #--------------------------------------------------------------- | |
11504 | my $bsr = $right_bond_strength{$type}; | |
11505 | ||
11506 | if ( !defined($bsr) ) { | |
11507 | ||
11508 | if ( $is_digraph{$type} || $is_trigraph{$type} ) { | |
11509 | $bsr = STRONG; | |
11510 | } | |
11511 | else { | |
11512 | $bsr = VERY_STRONG; | |
11513 | } | |
11514 | } | |
11515 | ||
11516 | # define right bond strengths of certain keywords | |
11517 | if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) { | |
11518 | $bsr = $right_bond_strength{$token}; | |
11519 | } | |
11520 | elsif ( $token eq 'ne' or $token eq 'eq' ) { | |
11521 | $bsr = NOMINAL; | |
11522 | } | |
11523 | my $bsl = $left_bond_strength{$next_nonblank_type}; | |
11524 | ||
11525 | # set terminal bond strength to the nominal value | |
11526 | # this will cause good preceding breaks to be retained | |
11527 | if ( $i_next_nonblank > $max_index_to_go ) { | |
11528 | $bsl = NOMINAL; | |
11529 | } | |
11530 | ||
11531 | if ( !defined($bsl) ) { | |
11532 | ||
11533 | if ( $is_digraph{$next_nonblank_type} | |
11534 | || $is_trigraph{$next_nonblank_type} ) | |
11535 | { | |
11536 | $bsl = WEAK; | |
11537 | } | |
11538 | else { | |
11539 | $bsl = VERY_STRONG; | |
11540 | } | |
11541 | } | |
11542 | ||
11543 | # define right bond strengths of certain keywords | |
11544 | if ( $next_nonblank_type eq 'k' | |
11545 | && defined( $left_bond_strength{$next_nonblank_token} ) ) | |
11546 | { | |
11547 | $bsl = $left_bond_strength{$next_nonblank_token}; | |
11548 | } | |
11549 | elsif ($next_nonblank_token eq 'ne' | |
11550 | or $next_nonblank_token eq 'eq' ) | |
11551 | { | |
11552 | $bsl = NOMINAL; | |
11553 | } | |
11554 | elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) { | |
11555 | $bsl = 0.9 * NOMINAL + 0.1 * STRONG; | |
11556 | } | |
11557 | ||
11558 | # Note: it might seem that we would want to keep a NO_BREAK if | |
11559 | # either token has this value. This didn't work, because in an | |
11560 | # arrow list, it prevents the comma from separating from the | |
11561 | # following bare word (which is probably quoted by its arrow). | |
11562 | # So necessary NO_BREAK's have to be handled as special cases | |
11563 | # in the final section. | |
11564 | $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl; | |
11565 | my $bond_str_1 = $bond_str; | |
11566 | ||
11567 | #--------------------------------------------------------------- | |
11568 | # section 2: | |
11569 | # special cases | |
11570 | #--------------------------------------------------------------- | |
11571 | ||
11572 | # allow long lines before final { in an if statement, as in: | |
11573 | # if (.......... | |
11574 | # ..........) | |
11575 | # { | |
11576 | # | |
11577 | # Otherwise, the line before the { tends to be too short. | |
11578 | if ( $type eq ')' ) { | |
11579 | if ( $next_nonblank_type eq '{' ) { | |
11580 | $bond_str = VERY_WEAK + 0.03; | |
11581 | } | |
11582 | } | |
11583 | ||
11584 | elsif ( $type eq '(' ) { | |
11585 | if ( $next_nonblank_type eq '{' ) { | |
11586 | $bond_str = NOMINAL; | |
11587 | } | |
11588 | } | |
11589 | ||
11590 | # break on something like '} (', but keep this stronger than a ',' | |
11591 | # example is in 'howe.pl' | |
11592 | elsif ( $type eq 'R' or $type eq '}' ) { | |
11593 | if ( $next_nonblank_type eq '(' ) { | |
11594 | $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK; | |
11595 | } | |
11596 | } | |
11597 | ||
11598 | #----------------------------------------------------------------- | |
11599 | # adjust bond strength bias | |
11600 | #----------------------------------------------------------------- | |
11601 | ||
11602 | elsif ( $type eq 'f' ) { | |
11603 | $bond_str += $f_bias; | |
11604 | $f_bias += $delta_bias; | |
11605 | } | |
11606 | ||
11607 | # in long ?: conditionals, bias toward just one set per line (colon.t) | |
11608 | elsif ( $type eq ':' ) { | |
11609 | if ( !$want_break_before{$type} ) { | |
11610 | $bond_str += $colon_bias; | |
11611 | $colon_bias += $delta_bias; | |
11612 | } | |
11613 | } | |
11614 | ||
11615 | if ( $next_nonblank_type eq ':' | |
11616 | && $want_break_before{$next_nonblank_type} ) | |
11617 | { | |
11618 | $bond_str += $colon_bias; | |
11619 | $colon_bias += $delta_bias; | |
11620 | } | |
11621 | ||
11622 | # if leading '.' is used, align all but 'short' quotes; | |
11623 | # the idea is to not place something like "\n" on a single line. | |
11624 | elsif ( $next_nonblank_type eq '.' ) { | |
11625 | if ( $want_break_before{'.'} ) { | |
11626 | unless ( | |
11627 | $last_nonblank_type eq '.' | |
11628 | && ( | |
11629 | length($token) <= | |
11630 | $rOpts_short_concatenation_item_length ) | |
11631 | && ( $token !~ /^[\)\]\}]$/ ) | |
11632 | ) | |
11633 | { | |
11634 | $dot_bias += $delta_bias; | |
11635 | } | |
11636 | $bond_str += $dot_bias; | |
11637 | } | |
11638 | } | |
11639 | elsif ($next_nonblank_type eq '&&' | |
11640 | && $want_break_before{$next_nonblank_type} ) | |
11641 | { | |
11642 | $bond_str += $amp_bias; | |
11643 | $amp_bias += $delta_bias; | |
11644 | } | |
11645 | elsif ($next_nonblank_type eq '||' | |
11646 | && $want_break_before{$next_nonblank_type} ) | |
11647 | { | |
11648 | $bond_str += $bar_bias; | |
11649 | $bar_bias += $delta_bias; | |
11650 | } | |
11651 | elsif ( $next_nonblank_type eq 'k' ) { | |
11652 | ||
11653 | if ( $next_nonblank_token eq 'and' | |
11654 | && $want_break_before{$next_nonblank_token} ) | |
11655 | { | |
11656 | $bond_str += $and_bias; | |
11657 | $and_bias += $delta_bias; | |
11658 | } | |
11659 | elsif ($next_nonblank_token eq 'or' | |
11660 | && $want_break_before{$next_nonblank_token} ) | |
11661 | { | |
11662 | $bond_str += $or_bias; | |
11663 | $or_bias += $delta_bias; | |
11664 | } | |
11665 | ||
11666 | # FIXME: needs more testing | |
11667 | elsif ( $is_keyword_returning_list{$next_nonblank_token} ) { | |
11668 | $bond_str = $list_str if ( $bond_str > $list_str ); | |
11669 | } | |
11670 | } | |
11671 | ||
11672 | if ( $type eq ':' | |
11673 | && !$want_break_before{$type} ) | |
11674 | { | |
11675 | $bond_str += $colon_bias; | |
11676 | $colon_bias += $delta_bias; | |
11677 | } | |
11678 | elsif ( $type eq '&&' | |
11679 | && !$want_break_before{$type} ) | |
11680 | { | |
11681 | $bond_str += $amp_bias; | |
11682 | $amp_bias += $delta_bias; | |
11683 | } | |
11684 | elsif ( $type eq '||' | |
11685 | && !$want_break_before{$type} ) | |
11686 | { | |
11687 | $bond_str += $bar_bias; | |
11688 | $bar_bias += $delta_bias; | |
11689 | } | |
11690 | elsif ( $type eq 'k' ) { | |
11691 | ||
11692 | if ( $token eq 'and' | |
11693 | && !$want_break_before{$token} ) | |
11694 | { | |
11695 | $bond_str += $and_bias; | |
11696 | $and_bias += $delta_bias; | |
11697 | } | |
11698 | elsif ( $token eq 'or' | |
11699 | && !$want_break_before{$token} ) | |
11700 | { | |
11701 | $bond_str += $or_bias; | |
11702 | $or_bias += $delta_bias; | |
11703 | } | |
11704 | } | |
11705 | ||
11706 | # keep matrix and hash indices together | |
11707 | # but make them a little below STRONG to allow breaking open | |
11708 | # something like {'some-word'}{'some-very-long-word'} at the }{ | |
11709 | # (bracebrk.t) | |
11710 | if ( ( $type eq ']' or $type eq 'R' ) | |
11711 | && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' ) | |
11712 | ) | |
11713 | { | |
11714 | $bond_str = 0.9 * STRONG + 0.1 * NOMINAL; | |
11715 | } | |
11716 | ||
11717 | if ( $next_nonblank_token =~ /^->/ ) { | |
11718 | ||
11719 | # increase strength to the point where a break in the following | |
11720 | # will be after the opening paren rather than at the arrow: | |
11721 | # $a->$b($c); | |
11722 | if ( $type eq 'i' ) { | |
11723 | $bond_str = 1.45 * STRONG; | |
11724 | } | |
11725 | ||
11726 | elsif ( $type =~ /^[\)\]\}R]$/ ) { | |
11727 | $bond_str = 0.1 * STRONG + 0.9 * NOMINAL; | |
11728 | } | |
11729 | ||
11730 | # otherwise make strength before an '->' a little over a '+' | |
11731 | else { | |
11732 | if ( $bond_str <= NOMINAL ) { | |
11733 | $bond_str = NOMINAL + 0.01; | |
11734 | } | |
11735 | } | |
11736 | } | |
11737 | ||
11738 | if ( $token eq ')' && $next_nonblank_token eq '[' ) { | |
11739 | $bond_str = 0.2 * STRONG + 0.8 * NOMINAL; | |
11740 | } | |
11741 | ||
11742 | # map1.t -- correct for a quirk in perl | |
11743 | if ( $token eq '(' | |
11744 | && $next_nonblank_type eq 'i' | |
11745 | && $last_nonblank_type eq 'k' | |
11746 | && $is_sort_map_grep{$last_nonblank_token} ) | |
11747 | ||
11748 | # /^(sort|map|grep)$/ ) | |
11749 | { | |
11750 | $bond_str = NO_BREAK; | |
11751 | } | |
11752 | ||
11753 | # extrude.t: do not break before paren at: | |
11754 | # -l pid_filename( | |
11755 | if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) { | |
11756 | $bond_str = NO_BREAK; | |
11757 | } | |
11758 | ||
11759 | # good to break after end of code blocks | |
11760 | if ( $type eq '}' && $block_type ) { | |
11761 | ||
11762 | $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias; | |
11763 | $code_bias += $delta_bias; | |
11764 | } | |
11765 | ||
11766 | if ( $type eq 'k' ) { | |
11767 | ||
11768 | # allow certain control keywords to stand out | |
11769 | if ( $next_nonblank_type eq 'k' | |
11770 | && $is_last_next_redo_return{$token} ) | |
11771 | { | |
11772 | $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK; | |
11773 | } | |
11774 | ||
11775 | # Don't break after keyword my. This is a quick fix for a | |
11776 | # rare problem with perl. An example is this line from file | |
11777 | # Container.pm: | |
11778 | # foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) ) | |
11779 | ||
11780 | if ( $token eq 'my' ) { | |
11781 | $bond_str = NO_BREAK; | |
11782 | } | |
11783 | ||
11784 | } | |
11785 | ||
11786 | # good to break before 'if', 'unless', etc | |
11787 | if ( $is_if_brace_follower{$next_nonblank_token} ) { | |
11788 | $bond_str = VERY_WEAK; | |
11789 | } | |
11790 | ||
11791 | if ( $next_nonblank_type eq 'k' ) { | |
11792 | ||
11793 | # keywords like 'unless', 'if', etc, within statements | |
11794 | # make good breaks | |
11795 | if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) { | |
11796 | $bond_str = VERY_WEAK / 1.05; | |
11797 | } | |
11798 | } | |
11799 | ||
11800 | # try not to break before a comma-arrow | |
11801 | elsif ( $next_nonblank_type eq '=>' ) { | |
11802 | if ( $bond_str < STRONG ) { $bond_str = STRONG } | |
11803 | } | |
11804 | ||
11805 | #---------------------------------------------------------------------- | |
11806 | # only set NO_BREAK's from here on | |
11807 | #---------------------------------------------------------------------- | |
11808 | if ( $type eq 'C' or $type eq 'U' ) { | |
11809 | ||
11810 | # use strict requires that bare word and => not be separated | |
11811 | if ( $next_nonblank_type eq '=>' ) { | |
11812 | $bond_str = NO_BREAK; | |
11813 | } | |
11814 | ||
11815 | } | |
11816 | ||
11817 | # use strict requires that bare word within braces not start new line | |
11818 | elsif ( $type eq 'L' ) { | |
11819 | ||
11820 | if ( $next_nonblank_type eq 'w' ) { | |
11821 | $bond_str = NO_BREAK; | |
11822 | } | |
11823 | } | |
11824 | ||
11825 | # in older version of perl, use strict can cause problems with | |
11826 | # breaks before bare words following opening parens. For example, | |
11827 | # this will fail under older versions if a break is made between | |
11828 | # '(' and 'MAIL': | |
11829 | # use strict; | |
11830 | # open( MAIL, "a long filename or command"); | |
11831 | # close MAIL; | |
11832 | elsif ( $type eq '{' ) { | |
11833 | ||
11834 | if ( $token eq '(' && $next_nonblank_type eq 'w' ) { | |
11835 | ||
11836 | # but it's fine to break if the word is followed by a '=>' | |
11837 | # or if it is obviously a sub call | |
11838 | my $i_next_next_nonblank = $i_next_nonblank + 1; | |
11839 | my $next_next_type = $types_to_go[$i_next_next_nonblank]; | |
11840 | if ( $next_next_type eq 'b' | |
11841 | && $i_next_nonblank < $max_index_to_go ) | |
11842 | { | |
11843 | $i_next_next_nonblank++; | |
11844 | $next_next_type = $types_to_go[$i_next_next_nonblank]; | |
11845 | } | |
11846 | ||
11847 | ##if ( $next_next_type ne '=>' ) { | |
11848 | # these are ok: '->xxx', '=>', '(' | |
11849 | ||
11850 | # We'll check for an old breakpoint and keep a leading | |
11851 | # bareword if it was that way in the input file. Presumably | |
11852 | # it was ok that way. For example, the following would remain | |
11853 | # unchanged: | |
11854 | # | |
11855 | # @months = ( | |
11856 | # January, February, March, April, | |
11857 | # May, June, July, August, | |
11858 | # September, October, November, December, | |
11859 | # ); | |
11860 | # | |
11861 | # This should be sufficient: | |
11862 | if ( !$old_breakpoint_to_go[$i] | |
11863 | && ( $next_next_type eq ',' || $next_next_type eq '}' ) | |
11864 | ) | |
11865 | { | |
11866 | $bond_str = NO_BREAK; | |
11867 | } | |
11868 | } | |
11869 | } | |
11870 | ||
11871 | elsif ( $type eq 'w' ) { | |
11872 | ||
11873 | if ( $next_nonblank_type eq 'R' ) { | |
11874 | $bond_str = NO_BREAK; | |
11875 | } | |
11876 | ||
11877 | # use strict requires that bare word and => not be separated | |
11878 | if ( $next_nonblank_type eq '=>' ) { | |
11879 | $bond_str = NO_BREAK; | |
11880 | } | |
11881 | } | |
11882 | ||
11883 | # in fact, use strict hates bare words on any new line. For example, | |
11884 | # a break before the underscore here provokes the wrath of use strict: | |
11885 | # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) { | |
11886 | elsif ( $type eq 'F' ) { | |
11887 | $bond_str = NO_BREAK; | |
11888 | } | |
11889 | ||
11890 | # use strict does not allow separating type info from trailing { } | |
11891 | # testfile is readmail.pl | |
11892 | elsif ( $type eq 't' or $type eq 'i' ) { | |
11893 | ||
11894 | if ( $next_nonblank_type eq 'L' ) { | |
11895 | $bond_str = NO_BREAK; | |
11896 | } | |
11897 | } | |
11898 | ||
11899 | # Do not break between a possible filehandle and a ? or / | |
11900 | # and do not introduce a break after it if there is no blank (extrude.t) | |
11901 | elsif ( $type eq 'Z' ) { | |
11902 | ||
11903 | # dont break.. | |
11904 | if ( | |
11905 | ||
11906 | # if there is no blank and we do not want one. Examples: | |
11907 | # print $x++ # do not break after $x | |
11908 | # print HTML"HELLO" # break ok after HTML | |
11909 | ( | |
11910 | $next_type ne 'b' | |
11911 | && defined( $want_left_space{$next_type} ) | |
11912 | && $want_left_space{$next_type} == WS_NO | |
11913 | ) | |
11914 | ||
11915 | # or we might be followed by the start of a quote | |
11916 | || $next_nonblank_type =~ /^[\/\?]$/ | |
11917 | ) | |
11918 | { | |
11919 | $bond_str = NO_BREAK; | |
11920 | } | |
11921 | } | |
11922 | ||
11923 | # Do not break before a possible file handle | |
11924 | if ( $next_nonblank_type eq 'Z' ) { | |
11925 | $bond_str = NO_BREAK; | |
11926 | } | |
11927 | ||
11928 | # As a defensive measure, do not break between a '(' and a | |
11929 | # filehandle. In some cases, this can cause an error. For | |
11930 | # example, the following program works: | |
11931 | # my $msg="hi!\n"; | |
11932 | ||
11933 | # ( STDOUT | |
11934 | # $msg | |
11935 | # ); | |
11936 | # | |
11937 | # But this program fails: | |
11938 | # my $msg="hi!\n"; | |
11939 | ||
11940 | # ( | |
11941 | # STDOUT | |
11942 | # $msg | |
11943 | # ); | |
11944 | # | |
11945 | # This is normally only a problem with the 'extrude' option | |
11946 | if ( $next_nonblank_type eq 'Y' && $token eq '(' ) { | |
11947 | $bond_str = NO_BREAK; | |
11948 | } | |
11949 | ||
11950 | # patch to put cuddled elses back together when on multiple | |
11951 | # lines, as in: } \n else \n { \n | |
11952 | if ($rOpts_cuddled_else) { | |
11953 | ||
11954 | if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' ) | |
11955 | || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) ) | |
11956 | { | |
11957 | $bond_str = NO_BREAK; | |
11958 | } | |
11959 | } | |
11960 | ||
11961 | # keep '}' together with ';' | |
11962 | if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) { | |
11963 | $bond_str = NO_BREAK; | |
11964 | } | |
11965 | ||
11966 | # never break between sub name and opening paren | |
11967 | if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) { | |
11968 | $bond_str = NO_BREAK; | |
11969 | } | |
11970 | ||
11971 | #--------------------------------------------------------------- | |
11972 | # section 3: | |
11973 | # now take nesting depth into account | |
11974 | #--------------------------------------------------------------- | |
11975 | # final strength incorporates the bond strength and nesting depth | |
11976 | my $strength; | |
11977 | ||
11978 | if ( defined($bond_str) && !$nobreak_to_go[$i] ) { | |
11979 | if ( $total_nesting_depth > 0 ) { | |
11980 | $strength = $bond_str + $total_nesting_depth; | |
11981 | } | |
11982 | else { | |
11983 | $strength = $bond_str; | |
11984 | } | |
11985 | } | |
11986 | else { | |
11987 | $strength = NO_BREAK; | |
11988 | } | |
11989 | ||
11990 | # always break after side comment | |
11991 | if ( $type eq '#' ) { $strength = 0 } | |
11992 | ||
11993 | $bond_strength_to_go[$i] = $strength; | |
11994 | ||
11995 | FORMATTER_DEBUG_FLAG_BOND && do { | |
11996 | my $str = substr( $token, 0, 15 ); | |
11997 | $str .= ' ' x ( 16 - length($str) ); | |
11998 | ||
11999 | "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n"; | |
12000 | }; | |
12001 | } | |
12002 | } | |
12003 | ||
12004 | } | |
12005 | ||
12006 | sub pad_array_to_go { | |
12007 | ||
12008 | # to simplify coding in scan_list and set_bond_strengths, it helps | |
12009 | # to create some extra blank tokens at the end of the arrays | |
12010 | $tokens_to_go[ $max_index_to_go + 1 ] = ''; | |
12011 | $tokens_to_go[ $max_index_to_go + 2 ] = ''; | |
12012 | $types_to_go[ $max_index_to_go + 1 ] = 'b'; | |
12013 | $types_to_go[ $max_index_to_go + 2 ] = 'b'; | |
12014 | $nesting_depth_to_go[ $max_index_to_go + 1 ] = | |
12015 | $nesting_depth_to_go[$max_index_to_go]; | |
12016 | ||
12017 | # /^[R\}\)\]]$/ | |
12018 | if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) { | |
12019 | if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) { | |
12020 | ||
12021 | # shouldn't happen: | |
12022 | unless ( get_saw_brace_error() ) { | |
12023 | warning( | |
12024 | "Program bug in scan_list: hit nesting error which should have been caught\n" | |
12025 | ); | |
12026 | report_definite_bug(); | |
12027 | } | |
12028 | } | |
12029 | else { | |
12030 | $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1; | |
12031 | } | |
12032 | } | |
12033 | ||
12034 | # /^[L\{\(\[]$/ | |
12035 | elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) { | |
12036 | $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1; | |
12037 | } | |
12038 | } | |
12039 | ||
12040 | { # begin scan_list | |
12041 | ||
12042 | my ( | |
12043 | $block_type, $current_depth, | |
12044 | $depth, $i, | |
12045 | $i_last_nonblank_token, $last_colon_sequence_number, | |
12046 | $last_nonblank_token, $last_nonblank_type, | |
12047 | $last_old_breakpoint_count, $minimum_depth, | |
12048 | $next_nonblank_block_type, $next_nonblank_token, | |
12049 | $next_nonblank_type, $old_breakpoint_count, | |
12050 | $starting_breakpoint_count, $starting_depth, | |
12051 | $token, $type, | |
12052 | $type_sequence, | |
12053 | ); | |
12054 | ||
12055 | my ( | |
12056 | @breakpoint_stack, @breakpoint_undo_stack, | |
12057 | @comma_index, @container_type, | |
12058 | @identifier_count_stack, @index_before_arrow, | |
12059 | @interrupted_list, @item_count_stack, | |
12060 | @last_comma_index, @last_dot_index, | |
12061 | @last_nonblank_type, @old_breakpoint_count_stack, | |
12062 | @opening_structure_index_stack, @rfor_semicolon_list, | |
12063 | @has_old_logical_breakpoints, @rand_or_list, | |
12064 | @i_equals, | |
12065 | ); | |
12066 | ||
12067 | # routine to define essential variables when we go 'up' to | |
12068 | # a new depth | |
12069 | sub check_for_new_minimum_depth { | |
12070 | my $depth = shift; | |
12071 | if ( $depth < $minimum_depth ) { | |
12072 | ||
12073 | $minimum_depth = $depth; | |
12074 | ||
12075 | # these arrays need not retain values between calls | |
12076 | $breakpoint_stack[$depth] = $starting_breakpoint_count; | |
12077 | $container_type[$depth] = ""; | |
12078 | $identifier_count_stack[$depth] = 0; | |
12079 | $index_before_arrow[$depth] = -1; | |
12080 | $interrupted_list[$depth] = 1; | |
12081 | $item_count_stack[$depth] = 0; | |
12082 | $last_nonblank_type[$depth] = ""; | |
12083 | $opening_structure_index_stack[$depth] = -1; | |
12084 | ||
12085 | $breakpoint_undo_stack[$depth] = undef; | |
12086 | $comma_index[$depth] = undef; | |
12087 | $last_comma_index[$depth] = undef; | |
12088 | $last_dot_index[$depth] = undef; | |
12089 | $old_breakpoint_count_stack[$depth] = undef; | |
12090 | $has_old_logical_breakpoints[$depth] = 0; | |
12091 | $rand_or_list[$depth] = []; | |
12092 | $rfor_semicolon_list[$depth] = []; | |
12093 | $i_equals[$depth] = -1; | |
12094 | ||
12095 | # these arrays must retain values between calls | |
12096 | if ( !defined( $has_broken_sublist[$depth] ) ) { | |
12097 | $dont_align[$depth] = 0; | |
12098 | $has_broken_sublist[$depth] = 0; | |
12099 | $want_comma_break[$depth] = 0; | |
12100 | } | |
12101 | } | |
12102 | } | |
12103 | ||
12104 | # routine to decide which commas to break at within a container; | |
12105 | # returns: | |
12106 | # $bp_count = number of comma breakpoints set | |
12107 | # $do_not_break_apart = a flag indicating if container need not | |
12108 | # be broken open | |
12109 | sub set_comma_breakpoints { | |
12110 | ||
12111 | my $dd = shift; | |
12112 | my $bp_count = 0; | |
12113 | my $do_not_break_apart = 0; | |
12114 | if ( $item_count_stack[$dd] && !$dont_align[$dd] ) { | |
12115 | ||
12116 | my $fbc = $forced_breakpoint_count; | |
12117 | ||
12118 | # always open comma lists not preceded by keywords, | |
12119 | # barewords, identifiers (that is, anything that doesn't | |
12120 | # look like a function call) | |
12121 | my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; | |
12122 | ||
12123 | set_comma_breakpoints_do( | |
12124 | $dd, | |
12125 | $opening_structure_index_stack[$dd], | |
12126 | $i, | |
12127 | $item_count_stack[$dd], | |
12128 | $identifier_count_stack[$dd], | |
12129 | $comma_index[$dd], | |
12130 | $next_nonblank_type, | |
12131 | $container_type[$dd], | |
12132 | $interrupted_list[$dd], | |
12133 | \$do_not_break_apart, | |
12134 | $must_break_open, | |
12135 | ); | |
12136 | $bp_count = $forced_breakpoint_count - $fbc; | |
12137 | $do_not_break_apart = 0 if $must_break_open; | |
12138 | } | |
12139 | return ( $bp_count, $do_not_break_apart ); | |
12140 | } | |
12141 | ||
12142 | my %is_logical_container; | |
12143 | ||
12144 | BEGIN { | |
12145 | @_ = qw# if elsif unless while and or not && | || ? : ! #; | |
12146 | @is_logical_container{@_} = (1) x scalar(@_); | |
12147 | } | |
12148 | ||
12149 | sub set_for_semicolon_breakpoints { | |
12150 | my $dd = shift; | |
12151 | foreach ( @{ $rfor_semicolon_list[$dd] } ) { | |
12152 | set_forced_breakpoint($_); | |
12153 | } | |
12154 | } | |
12155 | ||
12156 | sub set_logical_breakpoints { | |
12157 | my $dd = shift; | |
12158 | if ( | |
12159 | $item_count_stack[$dd] == 0 | |
12160 | && $is_logical_container{ $container_type[$dd] } | |
12161 | ||
12162 | # TESTING: | |
12163 | || $has_old_logical_breakpoints[$dd] | |
12164 | ) | |
12165 | { | |
12166 | ||
12167 | # Look for breaks in this order: | |
12168 | # 0 1 2 3 | |
12169 | # or and || && | |
12170 | foreach my $i ( 0 .. 3 ) { | |
12171 | if ( $rand_or_list[$dd][$i] ) { | |
12172 | foreach ( @{ $rand_or_list[$dd][$i] } ) { | |
12173 | set_forced_breakpoint($_); | |
12174 | } | |
12175 | ||
12176 | # break at any 'if' and 'unless' too | |
12177 | foreach ( @{ $rand_or_list[$dd][4] } ) { | |
12178 | set_forced_breakpoint($_); | |
12179 | } | |
12180 | $rand_or_list[$dd] = []; | |
12181 | last; | |
12182 | } | |
12183 | } | |
12184 | } | |
12185 | } | |
12186 | ||
12187 | sub is_unbreakable_container { | |
12188 | ||
12189 | # never break a container of one of these types | |
12190 | # because bad things can happen (map1.t) | |
12191 | my $dd = shift; | |
12192 | $is_sort_map_grep{ $container_type[$dd] }; | |
12193 | } | |
12194 | ||
12195 | sub scan_list { | |
12196 | ||
12197 | # This routine is responsible for setting line breaks for all lists, | |
12198 | # so that hierarchical structure can be displayed and so that list | |
12199 | # items can be vertically aligned. The output of this routine is | |
12200 | # stored in the array @forced_breakpoint_to_go, which is used to set | |
12201 | # final breakpoints. | |
12202 | ||
12203 | $starting_depth = $nesting_depth_to_go[0]; | |
12204 | ||
12205 | $block_type = ' '; | |
12206 | $current_depth = $starting_depth; | |
12207 | $i = -1; | |
12208 | $last_colon_sequence_number = -1; | |
12209 | $last_nonblank_token = ';'; | |
12210 | $last_nonblank_type = ';'; | |
12211 | $last_old_breakpoint_count = 0; | |
12212 | $minimum_depth = $current_depth + 1; # forces update in check below | |
12213 | $old_breakpoint_count = 0; | |
12214 | $starting_breakpoint_count = $forced_breakpoint_count; | |
12215 | $token = ';'; | |
12216 | $type = ';'; | |
12217 | $type_sequence = ''; | |
12218 | ||
12219 | check_for_new_minimum_depth($current_depth); | |
12220 | ||
12221 | my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0; | |
12222 | my $want_previous_breakpoint = -1; | |
12223 | ||
12224 | my $saw_good_breakpoint; | |
12225 | my $i_line_end = -1; | |
12226 | my $i_line_start = -1; | |
12227 | ||
12228 | # loop over all tokens in this batch | |
12229 | while ( ++$i <= $max_index_to_go ) { | |
12230 | if ( $type ne 'b' ) { | |
12231 | $i_last_nonblank_token = $i - 1; | |
12232 | $last_nonblank_type = $type; | |
12233 | $last_nonblank_token = $token; | |
12234 | } | |
12235 | $type = $types_to_go[$i]; | |
12236 | $block_type = $block_type_to_go[$i]; | |
12237 | $token = $tokens_to_go[$i]; | |
12238 | $type_sequence = $type_sequence_to_go[$i]; | |
12239 | my $next_type = $types_to_go[ $i + 1 ]; | |
12240 | my $next_token = $tokens_to_go[ $i + 1 ]; | |
12241 | my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); | |
12242 | $next_nonblank_type = $types_to_go[$i_next_nonblank]; | |
12243 | $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; | |
12244 | $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; | |
12245 | ||
12246 | # set break if flag was set | |
12247 | if ( $want_previous_breakpoint >= 0 ) { | |
12248 | set_forced_breakpoint($want_previous_breakpoint); | |
12249 | $want_previous_breakpoint = -1; | |
12250 | } | |
12251 | ||
12252 | $last_old_breakpoint_count = $old_breakpoint_count; | |
12253 | if ( $old_breakpoint_to_go[$i] ) { | |
12254 | $i_line_end = $i; | |
12255 | $i_line_start = $i_next_nonblank; | |
12256 | ||
12257 | $old_breakpoint_count++; | |
12258 | ||
12259 | # Break before certain keywords if user broke there and | |
12260 | # this is a 'safe' break point. The idea is to retain | |
12261 | # any preferred breaks for sequential list operations, | |
12262 | # like a schwartzian transform. | |
12263 | if ($rOpts_break_at_old_keyword_breakpoints) { | |
12264 | if ( | |
12265 | $next_nonblank_type eq 'k' | |
12266 | && $is_keyword_returning_list{$next_nonblank_token} | |
12267 | && ( $type =~ /^[=\)\]\}Riw]$/ | |
12268 | || $type eq 'k' | |
12269 | && $is_keyword_returning_list{$token} ) | |
12270 | ) | |
12271 | { | |
12272 | ||
12273 | # we actually have to set this break next time through | |
12274 | # the loop because if we are at a closing token (such | |
12275 | # as '}') which forms a one-line block, this break might | |
12276 | # get undone. | |
12277 | $want_previous_breakpoint = $i; | |
12278 | } | |
12279 | } | |
12280 | } | |
12281 | next if ( $type eq 'b' ); | |
12282 | $depth = $nesting_depth_to_go[ $i + 1 ]; | |
12283 | ||
12284 | # safety check - be sure we always break after a comment | |
12285 | # Shouldn't happen .. an error here probably means that the | |
12286 | # nobreak flag did not get turned off correctly during | |
12287 | # formatting. | |
12288 | if ( $type eq '#' ) { | |
12289 | if ( $i != $max_index_to_go ) { | |
12290 | warning( | |
12291 | "Non-fatal program bug: backup logic needed to break after a comment\n" | |
12292 | ); | |
12293 | report_definite_bug(); | |
12294 | $nobreak_to_go[$i] = 0; | |
12295 | set_forced_breakpoint($i); | |
12296 | } | |
12297 | } | |
12298 | ||
12299 | # Force breakpoints at certain tokens in long lines. | |
12300 | # Note that such breakpoints will be undone later if these tokens | |
12301 | # are fully contained within parens on a line. | |
12302 | if ( | |
12303 | $type eq 'k' | |
12304 | && $i > 0 | |
12305 | && $token =~ /^(if|unless)$/ | |
12306 | && ( | |
12307 | $is_long_line | |
12308 | ||
12309 | # or container is broken (by side-comment, etc) | |
12310 | || ( $next_nonblank_token eq '(' | |
12311 | && $mate_index_to_go[$i_next_nonblank] < $i ) | |
12312 | ) | |
12313 | ) | |
12314 | { | |
12315 | set_forced_breakpoint( $i - 1 ); | |
12316 | } | |
12317 | ||
12318 | # remember locations of '||' and '&&' for possible breaks if we | |
12319 | # decide this is a long logical expression. | |
12320 | if ( $type eq '||' ) { | |
12321 | push @{ $rand_or_list[$depth][2] }, $i; | |
12322 | ++$has_old_logical_breakpoints[$depth] | |
12323 | if ( ( $i == $i_line_start || $i == $i_line_end ) | |
12324 | && $rOpts_break_at_old_logical_breakpoints ); | |
12325 | } | |
12326 | elsif ( $type eq '&&' ) { | |
12327 | push @{ $rand_or_list[$depth][3] }, $i; | |
12328 | ++$has_old_logical_breakpoints[$depth] | |
12329 | if ( ( $i == $i_line_start || $i == $i_line_end ) | |
12330 | && $rOpts_break_at_old_logical_breakpoints ); | |
12331 | } | |
12332 | elsif ( $type eq 'f' ) { | |
12333 | push @{ $rfor_semicolon_list[$depth] }, $i; | |
12334 | } | |
12335 | elsif ( $type eq 'k' ) { | |
12336 | if ( $token eq 'and' ) { | |
12337 | push @{ $rand_or_list[$depth][1] }, $i; | |
12338 | ++$has_old_logical_breakpoints[$depth] | |
12339 | if ( ( $i == $i_line_start || $i == $i_line_end ) | |
12340 | && $rOpts_break_at_old_logical_breakpoints ); | |
12341 | } | |
12342 | ||
12343 | # break immediately at 'or's which are probably not in a logical | |
12344 | # block -- but we will break in logical breaks below so that | |
12345 | # they do not add to the forced_breakpoint_count | |
12346 | elsif ( $token eq 'or' ) { | |
12347 | push @{ $rand_or_list[$depth][0] }, $i; | |
12348 | ++$has_old_logical_breakpoints[$depth] | |
12349 | if ( ( $i == $i_line_start || $i == $i_line_end ) | |
12350 | && $rOpts_break_at_old_logical_breakpoints ); | |
12351 | if ( $is_logical_container{ $container_type[$depth] } ) { | |
12352 | } | |
12353 | else { | |
12354 | if ($is_long_line) { set_forced_breakpoint($i) } | |
12355 | elsif ( ( $i == $i_line_start || $i == $i_line_end ) | |
12356 | && $rOpts_break_at_old_logical_breakpoints ) | |
12357 | { | |
12358 | $saw_good_breakpoint = 1; | |
12359 | } | |
12360 | } | |
12361 | } | |
12362 | elsif ( $token eq 'if' || $token eq 'unless' ) { | |
12363 | push @{ $rand_or_list[$depth][4] }, $i; | |
12364 | if ( ( $i == $i_line_start || $i == $i_line_end ) | |
12365 | && $rOpts_break_at_old_logical_breakpoints ) | |
12366 | { | |
12367 | set_forced_breakpoint($i); | |
12368 | } | |
12369 | } | |
12370 | } | |
12371 | elsif ( $is_assignment{$type} ) { | |
12372 | $i_equals[$depth] = $i; | |
12373 | } | |
12374 | ||
12375 | if ($type_sequence) { | |
12376 | ||
12377 | # handle any postponed closing breakpoints | |
12378 | if ( $token =~ /^[\)\]\}\:]$/ ) { | |
12379 | if ( $type eq ':' ) { | |
12380 | $last_colon_sequence_number = $type_sequence; | |
12381 | ||
12382 | # TESTING: retain break at a ':' line break | |
12383 | if ( ( $i == $i_line_start || $i == $i_line_end ) | |
12384 | && $rOpts_break_at_old_trinary_breakpoints ) | |
12385 | { | |
12386 | ||
12387 | # TESTING: | |
12388 | set_forced_breakpoint($i); | |
12389 | ||
12390 | # break at previous '=' | |
12391 | if ( $i_equals[$depth] > 0 ) { | |
12392 | set_forced_breakpoint( $i_equals[$depth] ); | |
12393 | $i_equals[$depth] = -1; | |
12394 | } | |
12395 | } | |
12396 | } | |
12397 | if ( defined( $postponed_breakpoint{$type_sequence} ) ) { | |
12398 | my $inc = ( $type eq ':' ) ? 0 : 1; | |
12399 | set_forced_breakpoint( $i - $inc ); | |
12400 | delete $postponed_breakpoint{$type_sequence}; | |
12401 | } | |
12402 | } | |
12403 | ||
12404 | # set breaks at ?/: if they will get separated (and are | |
12405 | # not a ?/: chain), or if the '?' is at the end of the | |
12406 | # line | |
12407 | elsif ( $token eq '?' ) { | |
12408 | my $i_colon = $mate_index_to_go[$i]; | |
12409 | if ( | |
12410 | $i_colon <= 0 # the ':' is not in this batch | |
12411 | || $i == 0 # this '?' is the first token of the line | |
12412 | || $i == | |
12413 | $max_index_to_go # or this '?' is the last token | |
12414 | ) | |
12415 | { | |
12416 | ||
12417 | # don't break at a '?' if preceded by ':' on | |
12418 | # this line of previous ?/: pair on this line. | |
12419 | # This is an attempt to preserve a chain of ?/: | |
12420 | # expressions (elsif2.t). And don't break if | |
12421 | # this has a side comment. | |
12422 | set_forced_breakpoint($i) | |
12423 | unless ( | |
12424 | $type_sequence == ( | |
12425 | $last_colon_sequence_number + | |
12426 | TYPE_SEQUENCE_INCREMENT | |
12427 | ) | |
12428 | || $tokens_to_go[$max_index_to_go] eq '#' | |
12429 | ); | |
12430 | set_closing_breakpoint($i); | |
12431 | } | |
12432 | } | |
12433 | } | |
12434 | ||
12435 | #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n"; | |
12436 | ||
12437 | #------------------------------------------------------------ | |
12438 | # Handle Increasing Depth.. | |
12439 | # | |
12440 | # prepare for a new list when depth increases | |
12441 | # token $i is a '(','{', or '[' | |
12442 | #------------------------------------------------------------ | |
12443 | if ( $depth > $current_depth ) { | |
12444 | ||
12445 | $breakpoint_stack[$depth] = $forced_breakpoint_count; | |
12446 | $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; | |
12447 | $has_broken_sublist[$depth] = 0; | |
12448 | $identifier_count_stack[$depth] = 0; | |
12449 | $index_before_arrow[$depth] = -1; | |
12450 | $interrupted_list[$depth] = 0; | |
12451 | $item_count_stack[$depth] = 0; | |
12452 | $last_comma_index[$depth] = undef; | |
12453 | $last_dot_index[$depth] = undef; | |
12454 | $last_nonblank_type[$depth] = $last_nonblank_type; | |
12455 | $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; | |
12456 | $opening_structure_index_stack[$depth] = $i; | |
12457 | $rand_or_list[$depth] = []; | |
12458 | $rfor_semicolon_list[$depth] = []; | |
12459 | $i_equals[$depth] = -1; | |
12460 | $want_comma_break[$depth] = 0; | |
12461 | $container_type[$depth] = | |
12462 | ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ ) | |
12463 | ? $last_nonblank_token | |
12464 | : ""; | |
12465 | $has_old_logical_breakpoints[$depth] = 0; | |
12466 | ||
12467 | # if line ends here then signal closing token to break | |
12468 | if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) | |
12469 | { | |
12470 | set_closing_breakpoint($i); | |
12471 | } | |
12472 | ||
12473 | # Not all lists of values should be vertically aligned.. | |
12474 | $dont_align[$depth] = | |
12475 | ||
12476 | # code BLOCKS are handled at a higher level | |
12477 | ( $block_type ne "" ) | |
12478 | ||
12479 | # certain paren lists | |
12480 | || ( $type eq '(' ) && ( | |
12481 | ||
12482 | # it does not usually look good to align a list of | |
12483 | # identifiers in a parameter list, as in: | |
12484 | # my($var1, $var2, ...) | |
12485 | # (This test should probably be refined, for now I'm just | |
12486 | # testing for any keyword) | |
12487 | ( $last_nonblank_type eq 'k' ) | |
12488 | ||
12489 | # a trailing '(' usually indicates a non-list | |
12490 | || ( $next_nonblank_type eq '(' ) | |
12491 | ); | |
12492 | ||
12493 | # patch to outdent opening brace of long if/for/.. | |
12494 | # statements (like this one). See similar coding in | |
12495 | # set_continuation breaks. We have also catch it here for | |
12496 | # short line fragments which otherwise will not go through | |
12497 | # set_continuation_breaks. | |
12498 | if ( | |
12499 | $block_type | |
12500 | ||
12501 | # if we have the ')' but not its '(' in this batch.. | |
12502 | && ( $last_nonblank_token eq ')' ) | |
12503 | && $mate_index_to_go[$i_last_nonblank_token] < 0 | |
12504 | ||
12505 | # and user wants brace to left | |
12506 | && !$rOpts->{'opening-brace-always-on-right'} | |
12507 | ||
12508 | && ( $type eq '{' ) # should be true | |
12509 | && ( $token eq '{' ) # should be true | |
12510 | ) | |
12511 | { | |
12512 | set_forced_breakpoint( $i - 1 ); | |
12513 | } | |
12514 | } | |
12515 | ||
12516 | #------------------------------------------------------------ | |
12517 | # Handle Decreasing Depth.. | |
12518 | # | |
12519 | # finish off any old list when depth decreases | |
12520 | # token $i is a ')','}', or ']' | |
12521 | #------------------------------------------------------------ | |
12522 | elsif ( $depth < $current_depth ) { | |
12523 | ||
12524 | check_for_new_minimum_depth($depth); | |
12525 | ||
12526 | # force all outer logical containers to break after we see on | |
12527 | # old breakpoint | |
12528 | $has_old_logical_breakpoints[$depth] ||= | |
12529 | $has_old_logical_breakpoints[$current_depth]; | |
12530 | ||
12531 | # Patch to break between ') {' if the paren list is broken. | |
12532 | # There is similar logic in set_continuation_breaks for | |
12533 | # non-broken lists. | |
12534 | if ( $token eq ')' | |
12535 | && $next_nonblank_block_type | |
12536 | && $interrupted_list[$current_depth] | |
12537 | && $next_nonblank_type eq '{' | |
12538 | && !$rOpts->{'opening-brace-always-on-right'} ) | |
12539 | { | |
12540 | set_forced_breakpoint($i); | |
12541 | } | |
12542 | ||
12543 | #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; | |
12544 | ||
12545 | # set breaks at commas if necessary | |
12546 | my ( $bp_count, $do_not_break_apart ) = | |
12547 | set_comma_breakpoints($current_depth); | |
12548 | ||
12549 | my $i_opening = $opening_structure_index_stack[$current_depth]; | |
12550 | my $saw_opening_structure = ( $i_opening >= 0 ); | |
12551 | ||
12552 | # this term is long if we had to break at interior commas.. | |
12553 | my $is_long_term = $bp_count > 0; | |
12554 | ||
12555 | # ..or if the length between opening and closing parens exceeds | |
12556 | # allowed line length | |
12557 | if ( !$is_long_term && $saw_opening_structure ) { | |
12558 | my $i_opening_minus = find_token_starting_list($i_opening); | |
12559 | ||
12560 | # Note: we have to allow for one extra space after a | |
12561 | # closing token so that we do not strand a comma or | |
12562 | # semicolon, hence the '>=' here (oneline.t) | |
12563 | $is_long_term = | |
12564 | excess_line_length( $i_opening_minus, $i ) >= 0; | |
12565 | } | |
12566 | ||
12567 | # We've set breaks after all comma-arrows. Now we have to | |
12568 | # undo them if this can be a one-line block | |
12569 | # (the only breakpoints set will be due to comma-arrows) | |
12570 | if ( | |
12571 | ||
12572 | # user doesn't require breaking after all comma-arrows | |
12573 | ( $rOpts_comma_arrow_breakpoints != 0 ) | |
12574 | ||
12575 | # and if the opening structure is in this batch | |
12576 | && $saw_opening_structure | |
12577 | ||
12578 | # and either on the same old line | |
12579 | && ( | |
12580 | $old_breakpoint_count_stack[$current_depth] == | |
12581 | $last_old_breakpoint_count | |
12582 | ||
12583 | # or user wants to form long blocks with arrows | |
12584 | || $rOpts_comma_arrow_breakpoints == 2 | |
12585 | ) | |
12586 | ||
12587 | # and we made some breakpoints between the opening and closing | |
12588 | && ( $breakpoint_undo_stack[$current_depth] < | |
12589 | $forced_breakpoint_undo_count ) | |
12590 | ||
12591 | # and this block is short enough to fit on one line | |
12592 | # Note: use < because need 1 more space for possible comma | |
12593 | && !$is_long_term | |
12594 | ||
12595 | ) | |
12596 | { | |
12597 | undo_forced_breakpoint_stack( | |
12598 | $breakpoint_undo_stack[$current_depth] ); | |
12599 | } | |
12600 | ||
12601 | # now see if we have any comma breakpoints left | |
12602 | my $has_comma_breakpoints = | |
12603 | ( $breakpoint_stack[$current_depth] != | |
12604 | $forced_breakpoint_count ); | |
12605 | ||
12606 | # update broken-sublist flag of the outer container | |
12607 | $has_broken_sublist[$depth] = $has_broken_sublist[$depth] | |
12608 | || $has_broken_sublist[$current_depth] | |
12609 | || $is_long_term | |
12610 | || $has_comma_breakpoints; | |
12611 | ||
12612 | # Having come to the closing ')', '}', or ']', now we have to decide if we | |
12613 | # should 'open up' the structure by placing breaks at the opening and | |
12614 | # closing containers. This is a tricky decision. Here are some of the | |
12615 | # basic considerations: | |
12616 | # | |
12617 | # -If this is a BLOCK container, then any breakpoints will have already | |
12618 | # been set (and according to user preferences), so we need do nothing here. | |
12619 | # | |
12620 | # -If we have a comma-separated list for which we can align the list items, | |
12621 | # then we need to do so because otherwise the vertical aligner cannot | |
12622 | # currently do the alignment. | |
12623 | # | |
12624 | # -If this container does itself contain a container which has been broken | |
12625 | # open, then it should be broken open to properly show the structure. | |
12626 | # | |
12627 | # -If there is nothing to align, and no other reason to break apart, | |
12628 | # then do not do it. | |
12629 | # | |
12630 | # We will not break open the parens of a long but 'simple' logical expression. | |
12631 | # For example: | |
12632 | # | |
12633 | # This is an example of a simple logical expression and its formatting: | |
12634 | # | |
12635 | # if ( $bigwasteofspace1 && $bigwasteofspace2 | |
12636 | # || $bigwasteofspace3 && $bigwasteofspace4 ) | |
12637 | # | |
12638 | # Most people would prefer this than the 'spacey' version: | |
12639 | # | |
12640 | # if ( | |
12641 | # $bigwasteofspace1 && $bigwasteofspace2 | |
12642 | # || $bigwasteofspace3 && $bigwasteofspace4 | |
12643 | # ) | |
12644 | # | |
12645 | # To illustrate the rules for breaking logical expressions, consider: | |
12646 | # | |
12647 | # FULLY DENSE: | |
12648 | # if ( $opt_excl | |
12649 | # and ( exists $ids_excl_uc{$id_uc} | |
12650 | # or grep $id_uc =~ /$_/, @ids_excl_uc )) | |
12651 | # | |
12652 | # This is on the verge of being difficult to read. The current default is to | |
12653 | # open it up like this: | |
12654 | # | |
12655 | # DEFAULT: | |
12656 | # if ( | |
12657 | # $opt_excl | |
12658 | # and ( exists $ids_excl_uc{$id_uc} | |
12659 | # or grep $id_uc =~ /$_/, @ids_excl_uc ) | |
12660 | # ) | |
12661 | # | |
12662 | # This is a compromise which tries to avoid being too dense and to spacey. | |
12663 | # A more spaced version would be: | |
12664 | # | |
12665 | # SPACEY: | |
12666 | # if ( | |
12667 | # $opt_excl | |
12668 | # and ( | |
12669 | # exists $ids_excl_uc{$id_uc} | |
12670 | # or grep $id_uc =~ /$_/, @ids_excl_uc | |
12671 | # ) | |
12672 | # ) | |
12673 | # | |
12674 | # Some people might prefer the spacey version -- an option could be added. The | |
12675 | # innermost expression contains a long block '( exists $ids_... ')'. | |
12676 | # | |
12677 | # Here is how the logic goes: We will force a break at the 'or' that the | |
12678 | # innermost expression contains, but we will not break apart its opening and | |
12679 | # closing containers because (1) it contains no multi-line sub-containers itself, | |
12680 | # and (2) there is no alignment to be gained by breaking it open like this | |
12681 | # | |
12682 | # and ( | |
12683 | # exists $ids_excl_uc{$id_uc} | |
12684 | # or grep $id_uc =~ /$_/, @ids_excl_uc | |
12685 | # ) | |
12686 | # | |
12687 | # (although this looks perfectly ok and might be good for long expressions). The | |
12688 | # outer 'if' container, though, contains a broken sub-container, so it will be | |
12689 | # broken open to avoid too much density. Also, since it contains no 'or's, there | |
12690 | # will be a forced break at its 'and'. | |
12691 | ||
12692 | # set some flags telling something about this container.. | |
12693 | my $is_simple_logical_expression = 0; | |
12694 | if ( $item_count_stack[$current_depth] == 0 | |
12695 | && $saw_opening_structure | |
12696 | && $tokens_to_go[$i_opening] eq '(' | |
12697 | && $is_logical_container{ $container_type[$current_depth] } | |
12698 | ) | |
12699 | { | |
12700 | ||
12701 | # This seems to be a simple logical expression with | |
12702 | # no existing breakpoints. Set a flag to prevent | |
12703 | # opening it up. | |
12704 | if ( !$has_comma_breakpoints ) { | |
12705 | $is_simple_logical_expression = 1; | |
12706 | } | |
12707 | ||
12708 | # This seems to be a simple logical expression with | |
12709 | # breakpoints (broken sublists, for example). Break | |
12710 | # at all 'or's and '||'s. | |
12711 | else { | |
12712 | set_logical_breakpoints($current_depth); | |
12713 | } | |
12714 | } | |
12715 | ||
12716 | if ( $is_long_term | |
12717 | && @{ $rfor_semicolon_list[$current_depth] } ) | |
12718 | { | |
12719 | set_for_semicolon_breakpoints($current_depth); | |
12720 | ||
12721 | # open up a long 'for' or 'foreach' container to allow | |
12722 | # leading term alignment unless -lp is used. | |
12723 | $has_comma_breakpoints = 1 | |
12724 | unless $rOpts_line_up_parentheses; | |
12725 | } | |
12726 | ||
12727 | if ( | |
12728 | ||
12729 | # breaks for code BLOCKS are handled at a higher level | |
12730 | !$block_type | |
12731 | ||
12732 | # we do not need to break at the top level of an 'if' | |
12733 | # type expression | |
12734 | && !$is_simple_logical_expression | |
12735 | ||
12736 | ## modification to keep ': (' containers vertically tight; | |
12737 | ## but probably better to let user set -vt=1 to avoid | |
12738 | ## inconsistency with other paren types | |
12739 | ## && ($container_type[$current_depth] ne ':') | |
12740 | ||
12741 | # otherwise, we require one of these reasons for breaking: | |
12742 | && ( | |
12743 | ||
12744 | # - this term has forced line breaks | |
12745 | $has_comma_breakpoints | |
12746 | ||
12747 | # - the opening container is separated from this batch | |
12748 | # for some reason (comment, blank line, code block) | |
12749 | # - this is a non-paren container spanning multiple lines | |
12750 | || !$saw_opening_structure | |
12751 | ||
12752 | # - this is a long block contained in another breakable | |
12753 | # container | |
12754 | || ( $is_long_term | |
12755 | && $container_environment_to_go[$i_opening] ne | |
12756 | 'BLOCK' ) | |
12757 | ) | |
12758 | ) | |
12759 | { | |
12760 | ||
12761 | # For -lp option, we must put a breakpoint before | |
12762 | # the token which has been identified as starting | |
12763 | # this indentation level. This is necessary for | |
12764 | # proper alignment. | |
12765 | if ( $rOpts_line_up_parentheses && $saw_opening_structure ) | |
12766 | { | |
12767 | my $item = $leading_spaces_to_go[ $i_opening + 1 ]; | |
12768 | if ( defined($item) ) { | |
12769 | my $i_start_2 = $item->get_STARTING_INDEX(); | |
12770 | if ( | |
12771 | defined($i_start_2) | |
12772 | ||
12773 | # we are breaking after an opening brace, paren, | |
12774 | # so don't break before it too | |
12775 | && $i_start_2 ne $i_opening | |
12776 | ) | |
12777 | { | |
12778 | ||
12779 | # Only break for breakpoints at the same | |
12780 | # indentation level as the opening paren | |
12781 | my $test1 = $nesting_depth_to_go[$i_opening]; | |
12782 | my $test2 = $nesting_depth_to_go[$i_start_2]; | |
12783 | if ( $test2 == $test1 ) { | |
12784 | set_forced_breakpoint( $i_start_2 - 1 ); | |
12785 | } | |
12786 | } | |
12787 | } | |
12788 | } | |
12789 | ||
12790 | # break after opening structure. | |
12791 | # note: break before closing structure will be automatic | |
12792 | if ( $minimum_depth <= $current_depth ) { | |
12793 | ||
12794 | set_forced_breakpoint($i_opening) | |
12795 | unless ( $do_not_break_apart | |
12796 | || is_unbreakable_container($current_depth) ); | |
12797 | ||
12798 | # break at '.' of lower depth level before opening token | |
12799 | if ( $last_dot_index[$depth] ) { | |
12800 | set_forced_breakpoint( $last_dot_index[$depth] ); | |
12801 | } | |
12802 | ||
12803 | # break before opening structure if preeced by another | |
12804 | # closing structure and a comma. This is normally | |
12805 | # done by the previous closing brace, but not | |
12806 | # if it was a one-line block. | |
12807 | if ( $i_opening > 2 ) { | |
12808 | my $i_prev = | |
12809 | ( $types_to_go[ $i_opening - 1 ] eq 'b' ) | |
12810 | ? $i_opening - 2 | |
12811 | : $i_opening - 1; | |
12812 | ||
12813 | if ( $types_to_go[$i_prev] eq ',' | |
12814 | && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ ) | |
12815 | { | |
12816 | set_forced_breakpoint($i_prev); | |
12817 | } | |
12818 | ||
12819 | # also break before something like ':(' or '?(' | |
12820 | # if appropriate. | |
12821 | elsif ( | |
12822 | $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ ) | |
12823 | { | |
12824 | my $token_prev = $tokens_to_go[$i_prev]; | |
12825 | if ( $want_break_before{$token_prev} ) { | |
12826 | set_forced_breakpoint($i_prev); | |
12827 | } | |
12828 | } | |
12829 | } | |
12830 | } | |
12831 | ||
12832 | # break after comma following closing structure | |
12833 | if ( $next_type eq ',' ) { | |
12834 | set_forced_breakpoint( $i + 1 ); | |
12835 | } | |
12836 | ||
12837 | # break before an '=' following closing structure | |
12838 | if ( | |
12839 | $is_assignment{$next_nonblank_type} | |
12840 | && ( $breakpoint_stack[$current_depth] != | |
12841 | $forced_breakpoint_count ) | |
12842 | ) | |
12843 | { | |
12844 | set_forced_breakpoint($i); | |
12845 | } | |
12846 | ||
12847 | # break at any comma before the opening structure Added | |
12848 | # for -lp, but seems to be good in general. It isn't | |
12849 | # obvious how far back to look; the '5' below seems to | |
12850 | # work well and will catch the comma in something like | |
12851 | # push @list, myfunc( $param, $param, .. | |
12852 | ||
12853 | my $icomma = $last_comma_index[$depth]; | |
12854 | if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { | |
12855 | unless ( $forced_breakpoint_to_go[$icomma] ) { | |
12856 | set_forced_breakpoint($icomma); | |
12857 | } | |
12858 | } | |
12859 | } # end logic to open up a container | |
12860 | ||
12861 | # Break open a logical container open if it was already open | |
12862 | elsif ($is_simple_logical_expression | |
12863 | && $has_old_logical_breakpoints[$current_depth] ) | |
12864 | { | |
12865 | set_logical_breakpoints($current_depth); | |
12866 | } | |
12867 | ||
12868 | # Handle long container which does not get opened up | |
12869 | elsif ($is_long_term) { | |
12870 | ||
12871 | # must set fake breakpoint to alert outer containers that | |
12872 | # they are complex | |
12873 | set_fake_breakpoint(); | |
12874 | } | |
12875 | } | |
12876 | ||
12877 | #------------------------------------------------------------ | |
12878 | # Handle this token | |
12879 | #------------------------------------------------------------ | |
12880 | ||
12881 | $current_depth = $depth; | |
12882 | ||
12883 | # handle comma-arrow | |
12884 | if ( $type eq '=>' ) { | |
12885 | next if ( $last_nonblank_type eq '=>' ); | |
12886 | next if $rOpts_break_at_old_comma_breakpoints; | |
12887 | next if $rOpts_comma_arrow_breakpoints == 3; | |
12888 | $want_comma_break[$depth] = 1; | |
12889 | $index_before_arrow[$depth] = $i_last_nonblank_token; | |
12890 | next; | |
12891 | } | |
12892 | ||
12893 | elsif ( $type eq '.' ) { | |
12894 | $last_dot_index[$depth] = $i; | |
12895 | } | |
12896 | ||
12897 | # Turn off alignment if we are sure that this is not a list | |
12898 | # environment. To be safe, we will do this if we see certain | |
12899 | # non-list tokens, such as ';', and also the environment is | |
12900 | # not a list. Note that '=' could be in any of the = operators | |
12901 | # (lextest.t). We can't just use the reported environment | |
12902 | # because it can be incorrect in some cases. | |
12903 | elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} ) | |
12904 | && $container_environment_to_go[$i] ne 'LIST' ) | |
12905 | { | |
12906 | $dont_align[$depth] = 1; | |
12907 | $want_comma_break[$depth] = 0; | |
12908 | $index_before_arrow[$depth] = -1; | |
12909 | } | |
12910 | ||
12911 | # now just handle any commas | |
12912 | next unless ( $type eq ',' ); | |
12913 | ||
12914 | $last_dot_index[$depth] = undef; | |
12915 | $last_comma_index[$depth] = $i; | |
12916 | ||
12917 | # break here if this comma follows a '=>' | |
12918 | # but not if there is a side comment after the comma | |
12919 | if ( $want_comma_break[$depth] ) { | |
12920 | ||
12921 | if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { | |
12922 | $want_comma_break[$depth] = 0; | |
12923 | $index_before_arrow[$depth] = -1; | |
12924 | next; | |
12925 | } | |
12926 | ||
12927 | set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); | |
12928 | ||
12929 | # break before the previous token if it looks safe | |
12930 | # Example of something that we will not try to break before: | |
12931 | # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, | |
12932 | my $ibreak = $index_before_arrow[$depth] - 1; | |
12933 | if ( $ibreak > 0 | |
12934 | && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) | |
12935 | { | |
12936 | if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } | |
12937 | if ( $types_to_go[$ibreak] =~ /^[,b\(\{\[]$/ ) { | |
12938 | set_forced_breakpoint($ibreak); | |
12939 | } | |
12940 | } | |
12941 | ||
12942 | $want_comma_break[$depth] = 0; | |
12943 | $index_before_arrow[$depth] = -1; | |
12944 | ||
12945 | # handle list which mixes '=>'s and ','s: | |
12946 | # treat any list items so far as an interrupted list | |
12947 | $interrupted_list[$depth] = 1; | |
12948 | next; | |
12949 | } | |
12950 | ||
12951 | # skip past these commas if we are not supposed to format them | |
12952 | next if ( $dont_align[$depth] ); | |
12953 | ||
12954 | # break after all commas above starting depth | |
12955 | if ( $depth < $starting_depth ) { | |
12956 | set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); | |
12957 | next; | |
12958 | } | |
12959 | ||
12960 | # add this comma to the list.. | |
12961 | my $item_count = $item_count_stack[$depth]; | |
12962 | if ( $item_count == 0 ) { | |
12963 | ||
12964 | # but do not form a list with no opening structure | |
12965 | # for example: | |
12966 | ||
12967 | # open INFILE_COPY, ">$input_file_copy" | |
12968 | # or die ("very long message"); | |
12969 | ||
12970 | if ( ( $opening_structure_index_stack[$depth] < 0 ) | |
12971 | && $container_environment_to_go[$i] eq 'BLOCK' ) | |
12972 | { | |
12973 | $dont_align[$depth] = 1; | |
12974 | next; | |
12975 | } | |
12976 | } | |
12977 | ||
12978 | $comma_index[$depth][$item_count] = $i; | |
12979 | ++$item_count_stack[$depth]; | |
12980 | if ( $last_nonblank_type =~ /^[iR\]]$/ ) { | |
12981 | $identifier_count_stack[$depth]++; | |
12982 | } | |
12983 | } | |
12984 | ||
12985 | #------------------------------------------- | |
12986 | # end of loop over all tokens in this batch | |
12987 | #------------------------------------------- | |
12988 | ||
12989 | # set breaks for any unfinished lists .. | |
12990 | for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) { | |
12991 | ||
12992 | $interrupted_list[$dd] = 1; | |
12993 | $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); | |
12994 | set_comma_breakpoints($dd); | |
12995 | set_logical_breakpoints($dd) | |
12996 | if ( $has_old_logical_breakpoints[$dd] ); | |
12997 | set_for_semicolon_breakpoints($dd); | |
12998 | ||
12999 | # break open container... | |
13000 | my $i_opening = $opening_structure_index_stack[$dd]; | |
13001 | set_forced_breakpoint($i_opening) | |
13002 | unless ( | |
13003 | is_unbreakable_container($dd) | |
13004 | ||
13005 | # Avoid a break which would place an isolated ' or " | |
13006 | # on a line | |
13007 | || ( $type eq 'Q' | |
13008 | && $i_opening >= $max_index_to_go - 2 | |
13009 | && $token =~ /^['"]$/ ) | |
13010 | ); | |
13011 | } | |
13012 | ||
13013 | # Return a flag indicating if the input file had some good breakpoints. | |
13014 | # This flag will be used to force a break in a line shorter than the | |
13015 | # allowed line length. | |
13016 | if ( $has_old_logical_breakpoints[$current_depth] ) { | |
13017 | $saw_good_breakpoint = 1; | |
13018 | } | |
13019 | return $saw_good_breakpoint; | |
13020 | } | |
13021 | } # end scan_list | |
13022 | ||
13023 | sub find_token_starting_list { | |
13024 | ||
13025 | # When testing to see if a block will fit on one line, some | |
13026 | # previous token(s) may also need to be on the line; particularly | |
13027 | # if this is a sub call. So we will look back at least one | |
13028 | # token. NOTE: This isn't perfect, but not critical, because | |
13029 | # if we mis-identify a block, it will be wrapped and therefore | |
13030 | # fixed the next time it is formatted. | |
13031 | my $i_opening_paren = shift; | |
13032 | my $i_opening_minus = $i_opening_paren; | |
13033 | my $im1 = $i_opening_paren - 1; | |
13034 | my $im2 = $i_opening_paren - 2; | |
13035 | my $im3 = $i_opening_paren - 3; | |
13036 | my $typem1 = $types_to_go[$im1]; | |
13037 | my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b'; | |
13038 | if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) { | |
13039 | $i_opening_minus = $i_opening_paren; | |
13040 | } | |
13041 | elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) { | |
13042 | $i_opening_minus = $im1 if $im1 >= 0; | |
13043 | ||
13044 | # walk back to improve length estimate | |
13045 | for ( my $j = $im1 ; $j >= 0 ; $j-- ) { | |
13046 | last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ ); | |
13047 | $i_opening_minus = $j; | |
13048 | } | |
13049 | if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ } | |
13050 | } | |
13051 | elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 } | |
13052 | elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) { | |
13053 | $i_opening_minus = $im2; | |
13054 | } | |
13055 | return $i_opening_minus; | |
13056 | } | |
13057 | ||
13058 | { # begin set_comma_breakpoints_do | |
13059 | ||
13060 | my %is_keyword_with_special_leading_term; | |
13061 | ||
13062 | BEGIN { | |
13063 | ||
13064 | # These keywords have prototypes which allow a special leading item | |
13065 | # followed by a list | |
13066 | @_ = | |
13067 | qw(formline grep kill map printf sprintf push chmod join pack unshift); | |
13068 | @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_); | |
13069 | } | |
13070 | ||
13071 | sub set_comma_breakpoints_do { | |
13072 | ||
13073 | # Given a list with some commas, set breakpoints at some of the | |
13074 | # commas, if necessary, to make it easy to read. This list is | |
13075 | # an example: | |
13076 | my ( | |
13077 | $depth, $i_opening_paren, $i_closing_paren, | |
13078 | $item_count, $identifier_count, $rcomma_index, | |
13079 | $next_nonblank_type, $list_type, $interrupted, | |
13080 | $rdo_not_break_apart, $must_break_open, | |
13081 | ) | |
13082 | = @_; | |
13083 | ||
13084 | # nothing to do if no commas seen | |
13085 | return if ( $item_count < 1 ); | |
13086 | my $i_first_comma = $$rcomma_index[0]; | |
13087 | my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ]; | |
13088 | my $i_last_comma = $i_true_last_comma; | |
13089 | if ( $i_last_comma >= $max_index_to_go ) { | |
13090 | $i_last_comma = $$rcomma_index[ --$item_count - 1 ]; | |
13091 | return if ( $item_count < 1 ); | |
13092 | } | |
13093 | ||
13094 | #--------------------------------------------------------------- | |
13095 | # find lengths of all items in the list to calculate page layout | |
13096 | #--------------------------------------------------------------- | |
13097 | my $comma_count = $item_count; | |
13098 | my @item_lengths; | |
13099 | my @i_term_begin; | |
13100 | my @i_term_end; | |
13101 | my @i_term_comma; | |
13102 | my $i_prev_plus; | |
13103 | my @max_length = ( 0, 0 ); | |
13104 | my $first_term_length; | |
13105 | my $i = $i_opening_paren; | |
13106 | my $is_odd = 1; | |
13107 | ||
13108 | for ( my $j = 0 ; $j < $comma_count ; $j++ ) { | |
13109 | $is_odd = 1 - $is_odd; | |
13110 | $i_prev_plus = $i + 1; | |
13111 | $i = $$rcomma_index[$j]; | |
13112 | ||
13113 | my $i_term_end = | |
13114 | ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1; | |
13115 | my $i_term_begin = | |
13116 | ( $types_to_go[$i_prev_plus] eq 'b' ) | |
13117 | ? $i_prev_plus + 1 | |
13118 | : $i_prev_plus; | |
13119 | push @i_term_begin, $i_term_begin; | |
13120 | push @i_term_end, $i_term_end; | |
13121 | push @i_term_comma, $i; | |
13122 | ||
13123 | # note: currently adding 2 to all lengths (for comma and space) | |
13124 | my $length = | |
13125 | 2 + token_sequence_length( $i_term_begin, $i_term_end ); | |
13126 | push @item_lengths, $length; | |
13127 | ||
13128 | if ( $j == 0 ) { | |
13129 | $first_term_length = $length; | |
13130 | } | |
13131 | else { | |
13132 | ||
13133 | if ( $length > $max_length[$is_odd] ) { | |
13134 | $max_length[$is_odd] = $length; | |
13135 | } | |
13136 | } | |
13137 | } | |
13138 | ||
13139 | # now we have to make a distinction between the comma count and item | |
13140 | # count, because the item count will be one greater than the comma | |
13141 | # count if the last item is not terminated with a comma | |
13142 | my $i_b = | |
13143 | ( $types_to_go[ $i_last_comma + 1 ] eq 'b' ) | |
13144 | ? $i_last_comma + 1 | |
13145 | : $i_last_comma; | |
13146 | my $i_e = | |
13147 | ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' ) | |
13148 | ? $i_closing_paren - 2 | |
13149 | : $i_closing_paren - 1; | |
13150 | my $i_effective_last_comma = $i_last_comma; | |
13151 | ||
13152 | my $last_item_length = token_sequence_length( $i_b + 1, $i_e ); | |
13153 | ||
13154 | if ( $last_item_length > 0 ) { | |
13155 | ||
13156 | # add 2 to length because other lengths include a comma and a blank | |
13157 | $last_item_length += 2; | |
13158 | push @item_lengths, $last_item_length; | |
13159 | push @i_term_begin, $i_b + 1; | |
13160 | push @i_term_end, $i_e; | |
13161 | push @i_term_comma, undef; | |
13162 | ||
13163 | my $i_odd = $item_count % 2; | |
13164 | ||
13165 | if ( $last_item_length > $max_length[$i_odd] ) { | |
13166 | $max_length[$i_odd] = $last_item_length; | |
13167 | } | |
13168 | ||
13169 | $item_count++; | |
13170 | $i_effective_last_comma = $i_e + 1; | |
13171 | ||
13172 | if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) { | |
13173 | $identifier_count++; | |
13174 | } | |
13175 | } | |
13176 | ||
13177 | #--------------------------------------------------------------- | |
13178 | # End of length calculations | |
13179 | #--------------------------------------------------------------- | |
13180 | ||
13181 | #--------------------------------------------------------------- | |
13182 | # Compound List Rule 1: | |
13183 | # Break at (almost) every comma for a list containing a broken | |
13184 | # sublist. This has higher priority than the Interrupted List | |
13185 | # Rule. | |
13186 | #--------------------------------------------------------------- | |
13187 | if ( $has_broken_sublist[$depth] ) { | |
13188 | ||
13189 | # Break at every comma except for a comma between two | |
13190 | # simple, small terms. This prevents long vertical | |
13191 | # columns of, say, just 0's. | |
13192 | my $small_length = 10; # 2 + actual maximum length wanted | |
13193 | ||
13194 | # We'll insert a break in long runs of small terms to | |
13195 | # allow alignment in uniform tables. | |
13196 | my $skipped_count = 0; | |
13197 | my $columns = table_columns_available($i_first_comma); | |
13198 | my $fields = int( $columns / $small_length ); | |
13199 | if ( $rOpts_maximum_fields_per_table | |
13200 | && $fields > $rOpts_maximum_fields_per_table ) | |
13201 | { | |
13202 | $fields = $rOpts_maximum_fields_per_table; | |
13203 | } | |
13204 | my $max_skipped_count = $fields - 1; | |
13205 | ||
13206 | my $is_simple_last_term = 0; | |
13207 | my $is_simple_next_term = 0; | |
13208 | foreach my $j ( 0 .. $item_count ) { | |
13209 | $is_simple_last_term = $is_simple_next_term; | |
13210 | $is_simple_next_term = 0; | |
13211 | if ( $j < $item_count | |
13212 | && $i_term_end[$j] == $i_term_begin[$j] | |
13213 | && $item_lengths[$j] <= $small_length ) | |
13214 | { | |
13215 | $is_simple_next_term = 1; | |
13216 | } | |
13217 | next if $j == 0; | |
13218 | if ( $is_simple_last_term | |
13219 | && $is_simple_next_term | |
13220 | && $skipped_count < $max_skipped_count ) | |
13221 | { | |
13222 | $skipped_count++; | |
13223 | } | |
13224 | else { | |
13225 | $skipped_count = 0; | |
13226 | my $i = $i_term_comma[ $j - 1 ]; | |
13227 | last unless defined $i; | |
13228 | set_forced_breakpoint($i); | |
13229 | } | |
13230 | } | |
13231 | ||
13232 | # always break at the last comma if this list is | |
13233 | # interrupted; we wouldn't want to leave a terminal '{', for | |
13234 | # example. | |
13235 | if ($interrupted) { set_forced_breakpoint($i_true_last_comma) } | |
13236 | return; | |
13237 | } | |
13238 | ||
13239 | #my ( $a, $b, $c ) = caller(); | |
13240 | #print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count | |
13241 | #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n"; | |
13242 | #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n"; | |
13243 | ||
13244 | #--------------------------------------------------------------- | |
13245 | # Interrupted List Rule: | |
13246 | # A list is is forced to use old breakpoints if it was interrupted | |
13247 | # by side comments or blank lines, or requested by user. | |
13248 | #--------------------------------------------------------------- | |
13249 | if ( $rOpts_break_at_old_comma_breakpoints | |
13250 | || $interrupted | |
13251 | || $i_opening_paren < 0 ) | |
13252 | { | |
13253 | copy_old_breakpoints( $i_first_comma, $i_true_last_comma ); | |
13254 | return; | |
13255 | } | |
13256 | ||
13257 | #--------------------------------------------------------------- | |
13258 | # Looks like a list of items. We have to look at it and size it up. | |
13259 | #--------------------------------------------------------------- | |
13260 | ||
13261 | my $opening_token = $tokens_to_go[$i_opening_paren]; | |
13262 | my $opening_environment = | |
13263 | $container_environment_to_go[$i_opening_paren]; | |
13264 | ||
13265 | #------------------------------------------------------------------- | |
13266 | # Return if this will fit on one line | |
13267 | #------------------------------------------------------------------- | |
13268 | ||
13269 | my $i_opening_minus = find_token_starting_list($i_opening_paren); | |
13270 | return | |
13271 | unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0; | |
13272 | ||
13273 | #------------------------------------------------------------------- | |
13274 | # Now we know that this block spans multiple lines; we have to set | |
13275 | # at least one breakpoint -- real or fake -- as a signal to break | |
13276 | # open any outer containers. | |
13277 | #------------------------------------------------------------------- | |
13278 | set_fake_breakpoint(); | |
13279 | ||
13280 | # be sure we do not extend beyond the current list length | |
13281 | if ( $i_effective_last_comma >= $max_index_to_go ) { | |
13282 | $i_effective_last_comma = $max_index_to_go - 1; | |
13283 | } | |
13284 | ||
13285 | # Set a flag indicating if we need to break open to keep -lp | |
13286 | # items aligned. This is necessary if any of the list terms | |
13287 | # exceeds the available space after the '('. | |
13288 | my $need_lp_break_open = $must_break_open; | |
13289 | if ( $rOpts_line_up_parentheses && !$must_break_open ) { | |
13290 | my $columns_if_unbroken = $rOpts_maximum_line_length - | |
13291 | total_line_length( $i_opening_minus, $i_opening_paren ); | |
13292 | $need_lp_break_open = ( $max_length[0] > $columns_if_unbroken ) | |
13293 | || ( $max_length[1] > $columns_if_unbroken ) | |
13294 | || ( $first_term_length > $columns_if_unbroken ); | |
13295 | } | |
13296 | ||
13297 | # Specify if the list must have an even number of fields or not. | |
13298 | # It is generally safest to assume an even number, because the | |
13299 | # list items might be a hash list. But if we can be sure that | |
13300 | # it is not a hash, then we can allow an odd number for more | |
13301 | # flexibility. | |
13302 | my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count | |
13303 | ||
13304 | if ( $identifier_count >= $item_count - 1 | |
13305 | || $is_assignment{$next_nonblank_type} | |
13306 | || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ ) | |
13307 | ) | |
13308 | { | |
13309 | $odd_or_even = 1; | |
13310 | } | |
13311 | ||
13312 | # do we have a long first term which should be | |
13313 | # left on a line by itself? | |
13314 | my $use_separate_first_term = ( | |
13315 | $odd_or_even == 1 # only if we can use 1 field/line | |
13316 | && $item_count > 3 # need several items | |
13317 | && $first_term_length > | |
13318 | 2 * $max_length[0] - 2 # need long first term | |
13319 | && $first_term_length > | |
13320 | 2 * $max_length[1] - 2 # need long first term | |
13321 | ); | |
13322 | ||
13323 | # or do we know from the type of list that the first term should | |
13324 | # be placed alone? | |
13325 | if ( !$use_separate_first_term ) { | |
13326 | if ( $is_keyword_with_special_leading_term{$list_type} ) { | |
13327 | $use_separate_first_term = 1; | |
13328 | ||
13329 | # should the container be broken open? | |
13330 | if ( $item_count < 3 ) { | |
13331 | if ( $i_first_comma - $i_opening_paren < 4 ) { | |
13332 | $$rdo_not_break_apart = 1; | |
13333 | } | |
13334 | } | |
13335 | elsif ($first_term_length < 20 | |
13336 | && $i_first_comma - $i_opening_paren < 4 ) | |
13337 | { | |
13338 | my $columns = table_columns_available($i_first_comma); | |
13339 | if ( $first_term_length < $columns ) { | |
13340 | $$rdo_not_break_apart = 1; | |
13341 | } | |
13342 | } | |
13343 | } | |
13344 | } | |
13345 | ||
13346 | # if so, | |
13347 | if ($use_separate_first_term) { | |
13348 | ||
13349 | # ..set a break and update starting values | |
13350 | $use_separate_first_term = 1; | |
13351 | set_forced_breakpoint($i_first_comma); | |
13352 | $i_opening_paren = $i_first_comma; | |
13353 | $i_first_comma = $$rcomma_index[1]; | |
13354 | $item_count--; | |
13355 | return if $comma_count == 1; | |
13356 | shift @item_lengths; | |
13357 | shift @i_term_begin; | |
13358 | shift @i_term_end; | |
13359 | shift @i_term_comma; | |
13360 | } | |
13361 | ||
13362 | # if not, update the metrics to include the first term | |
13363 | else { | |
13364 | if ( $first_term_length > $max_length[0] ) { | |
13365 | $max_length[0] = $first_term_length; | |
13366 | } | |
13367 | } | |
13368 | ||
13369 | # Field width parameters | |
13370 | my $pair_width = ( $max_length[0] + $max_length[1] ); | |
13371 | my $max_width = | |
13372 | ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1]; | |
13373 | ||
13374 | # Number of free columns across the page width for laying out tables | |
13375 | my $columns = table_columns_available($i_first_comma); | |
13376 | ||
13377 | # Estimated maximum number of fields which fit this space | |
13378 | # This will be our first guess | |
13379 | my $number_of_fields_max = | |
13380 | maximum_number_of_fields( $columns, $odd_or_even, $max_width, | |
13381 | $pair_width ); | |
13382 | my $number_of_fields = $number_of_fields_max; | |
13383 | ||
13384 | # Find the best-looking number of fields | |
13385 | # and make this our second guess if possible | |
13386 | my ( $number_of_fields_best, $ri_ragged_break_list, | |
13387 | $new_identifier_count ) | |
13388 | = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths, | |
13389 | $max_width ); | |
13390 | ||
13391 | if ( $number_of_fields_best != 0 | |
13392 | && $number_of_fields_best < $number_of_fields_max ) | |
13393 | { | |
13394 | $number_of_fields = $number_of_fields_best; | |
13395 | } | |
13396 | ||
13397 | # ---------------------------------------------------------------------- | |
13398 | # If we are crowded and the -lp option is being used, try to | |
13399 | # undo some indentation | |
13400 | # ---------------------------------------------------------------------- | |
13401 | if ( | |
13402 | $rOpts_line_up_parentheses | |
13403 | && ( | |
13404 | $number_of_fields == 0 | |
13405 | || ( $number_of_fields == 1 | |
13406 | && $number_of_fields != $number_of_fields_best ) | |
13407 | ) | |
13408 | ) | |
13409 | { | |
13410 | my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma); | |
13411 | if ( $available_spaces > 0 ) { | |
13412 | ||
13413 | my $spaces_wanted = $max_width - $columns; # for 1 field | |
13414 | ||
13415 | if ( $number_of_fields_best == 0 ) { | |
13416 | $number_of_fields_best = | |
13417 | get_maximum_fields_wanted( \@item_lengths ); | |
13418 | } | |
13419 | ||
13420 | if ( $number_of_fields_best != 1 ) { | |
13421 | my $spaces_wanted_2 = | |
13422 | 1 + $pair_width - $columns; # for 2 fields | |
13423 | if ( $available_spaces > $spaces_wanted_2 ) { | |
13424 | $spaces_wanted = $spaces_wanted_2; | |
13425 | } | |
13426 | } | |
13427 | ||
13428 | if ( $spaces_wanted > 0 ) { | |
13429 | my $deleted_spaces = | |
13430 | reduce_lp_indentation( $i_first_comma, $spaces_wanted ); | |
13431 | ||
13432 | # redo the math | |
13433 | if ( $deleted_spaces > 0 ) { | |
13434 | $columns = table_columns_available($i_first_comma); | |
13435 | $number_of_fields_max = | |
13436 | maximum_number_of_fields( $columns, $odd_or_even, | |
13437 | $max_width, $pair_width ); | |
13438 | $number_of_fields = $number_of_fields_max; | |
13439 | ||
13440 | if ( $number_of_fields_best == 1 | |
13441 | && $number_of_fields >= 1 ) | |
13442 | { | |
13443 | $number_of_fields = $number_of_fields_best; | |
13444 | } | |
13445 | } | |
13446 | } | |
13447 | } | |
13448 | } | |
13449 | ||
13450 | # try for one column if two won't work | |
13451 | if ( $number_of_fields <= 0 ) { | |
13452 | $number_of_fields = int( $columns / $max_width ); | |
13453 | } | |
13454 | ||
13455 | # The user can place an upper bound on the number of fields, | |
13456 | # which can be useful for doing maintenance on tables | |
13457 | if ( $rOpts_maximum_fields_per_table | |
13458 | && $number_of_fields > $rOpts_maximum_fields_per_table ) | |
13459 | { | |
13460 | $number_of_fields = $rOpts_maximum_fields_per_table; | |
13461 | } | |
13462 | ||
13463 | # How many columns (characters) and lines would this container take | |
13464 | # if no additional whitespace were added? | |
13465 | my $packed_columns = token_sequence_length( $i_opening_paren + 1, | |
13466 | $i_effective_last_comma + 1 ); | |
13467 | if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero | |
13468 | my $packed_lines = 1 + int( $packed_columns / $columns ); | |
13469 | ||
13470 | # are we an item contained in an outer list? | |
13471 | my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; | |
13472 | ||
13473 | if ( $number_of_fields <= 0 ) { | |
13474 | ||
13475 | # #--------------------------------------------------------------- | |
13476 | # # We're in trouble. We can't find a single field width that works. | |
13477 | # # There is no simple answer here; we may have a single long list | |
13478 | # # item, or many. | |
13479 | # #--------------------------------------------------------------- | |
13480 | # | |
13481 | # In many cases, it may be best to not force a break if there is just one | |
13482 | # comma, because the standard continuation break logic will do a better | |
13483 | # job without it. | |
13484 | # | |
13485 | # In the common case that all but one of the terms can fit | |
13486 | # on a single line, it may look better not to break open the | |
13487 | # containing parens. Consider, for example | |
13488 | # | |
13489 | # $color = | |
13490 | # join ( '/', | |
13491 | # sort { $color_value{$::a} <=> $color_value{$::b}; } | |
13492 | # keys %colors ); | |
13493 | # | |
13494 | # which will look like this with the container broken: | |
13495 | # | |
13496 | # $color = join ( | |
13497 | # '/', | |
13498 | # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors | |
13499 | # ); | |
13500 | # | |
13501 | # Here is an example of this rule for a long last term: | |
13502 | # | |
13503 | # log_message( 0, 256, 128, | |
13504 | # "Number of routes in adj-RIB-in to be considered: $peercount" ); | |
13505 | # | |
13506 | # And here is an example with a long first term: | |
13507 | # | |
13508 | # $s = sprintf( | |
13509 | # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", | |
13510 | # $r, $pu, $ps, $cu, $cs, $tt | |
13511 | # ) | |
13512 | # if $style eq 'all'; | |
13513 | ||
13514 | my $i_last_comma = $$rcomma_index[ $comma_count - 1 ]; | |
13515 | my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0; | |
13516 | my $long_first_term = | |
13517 | excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0; | |
13518 | ||
13519 | # break at every comma ... | |
13520 | if ( | |
13521 | ||
13522 | # if requested by user or is best looking | |
13523 | $number_of_fields_best == 1 | |
13524 | ||
13525 | # or if this is a sublist of a larger list | |
13526 | || $in_hierarchical_list | |
13527 | ||
13528 | # or if multiple commas and we dont have a long first or last | |
13529 | # term | |
13530 | || ( $comma_count > 1 | |
13531 | && !( $long_last_term || $long_first_term ) ) | |
13532 | ) | |
13533 | { | |
13534 | foreach ( 0 .. $comma_count - 1 ) { | |
13535 | set_forced_breakpoint( $$rcomma_index[$_] ); | |
13536 | } | |
13537 | } | |
13538 | elsif ($long_last_term) { | |
13539 | ||
13540 | set_forced_breakpoint($i_last_comma); | |
13541 | $$rdo_not_break_apart = 1 unless $must_break_open; | |
13542 | } | |
13543 | elsif ($long_first_term) { | |
13544 | ||
13545 | set_forced_breakpoint($i_first_comma); | |
13546 | } | |
13547 | else { | |
13548 | ||
13549 | # let breaks be defined by default bond strength logic | |
13550 | } | |
13551 | return; | |
13552 | } | |
13553 | ||
13554 | # -------------------------------------------------------- | |
13555 | # We have a tentative field count that seems to work. | |
13556 | # How many lines will this require? | |
13557 | # -------------------------------------------------------- | |
13558 | my $formatted_lines = $item_count / ($number_of_fields); | |
13559 | if ( $formatted_lines != int $formatted_lines ) { | |
13560 | $formatted_lines = 1 + int $formatted_lines; | |
13561 | } | |
13562 | ||
13563 | # So far we've been trying to fill out to the right margin. But | |
13564 | # compact tables are easier to read, so let's see if we can use fewer | |
13565 | # fields without increasing the number of lines. | |
13566 | $number_of_fields = | |
13567 | compactify_table( $item_count, $number_of_fields, $formatted_lines, | |
13568 | $odd_or_even ); | |
13569 | ||
13570 | # How many spaces across the page will we fill? | |
13571 | my $columns_per_line = | |
13572 | ( int $number_of_fields / 2 ) * $pair_width + | |
13573 | ( $number_of_fields % 2 ) * $max_width; | |
13574 | ||
13575 | my $formatted_columns; | |
13576 | ||
13577 | if ( $number_of_fields > 1 ) { | |
13578 | $formatted_columns = | |
13579 | ( $pair_width * ( int( $item_count / 2 ) ) + ( $item_count % 2 ) * | |
13580 | $max_width ); | |
13581 | } | |
13582 | else { | |
13583 | $formatted_columns = $max_width * $item_count; | |
13584 | } | |
13585 | if ( $formatted_columns < $packed_columns ) { | |
13586 | $formatted_columns = $packed_columns; | |
13587 | } | |
13588 | ||
13589 | my $unused_columns = $formatted_columns - $packed_columns; | |
13590 | ||
13591 | # set some empirical parameters to help decide if we should try to | |
13592 | # align; high sparsity does not look good, especially with few lines | |
13593 | my $sparsity = ($unused_columns) / ($formatted_columns); | |
13594 | my $max_allowed_sparsity = | |
13595 | ( $item_count < 3 ) ? 0.1 | |
13596 | : ( $packed_lines == 1 ) ? 0.15 | |
13597 | : ( $packed_lines == 2 ) ? 0.4 | |
13598 | : 0.7; | |
13599 | ||
13600 | # Begin check for shortcut methods, which avoid treating a list | |
13601 | # as a table for relatively small parenthesized lists. These | |
13602 | # are usually easier to read if not formatted as tables. | |
13603 | if ( | |
13604 | $packed_lines <= 2 # probably can fit in 2 lines | |
13605 | && $item_count < 9 # doesn't have too many items | |
13606 | && $opening_environment eq 'BLOCK' # not a sub-container | |
13607 | && $opening_token eq '(' # is paren list | |
13608 | ) | |
13609 | { | |
13610 | ||
13611 | # Shortcut method 1: for -lp and just one comma: | |
13612 | # This is a no-brainer, just break at the comma. | |
13613 | if ( | |
13614 | $rOpts_line_up_parentheses # -lp | |
13615 | && $item_count == 2 # two items, one comma | |
13616 | && !$must_break_open | |
13617 | ) | |
13618 | { | |
13619 | my $i_break = $$rcomma_index[0]; | |
13620 | set_forced_breakpoint($i_break); | |
13621 | $$rdo_not_break_apart = 1; | |
13622 | set_non_alignment_flags( $comma_count, $rcomma_index ); | |
13623 | return; | |
13624 | ||
13625 | } | |
13626 | ||
13627 | # method 2 is for most small ragged lists which might look | |
13628 | # best if not displayed as a table. | |
13629 | if ( | |
13630 | ( $number_of_fields == 2 && $item_count == 3 ) | |
13631 | || ( | |
13632 | $new_identifier_count > 0 # isn't all quotes | |
13633 | && $sparsity > 0.15 | |
13634 | ) # would be fairly spaced gaps if aligned | |
13635 | ) | |
13636 | { | |
13637 | ||
13638 | my $break_count = | |
13639 | set_ragged_breakpoints( \@i_term_comma, | |
13640 | $ri_ragged_break_list ); | |
13641 | ++$break_count if ($use_separate_first_term); | |
13642 | ||
13643 | # NOTE: we should really use the true break count here, | |
13644 | # which can be greater if there are large terms and | |
13645 | # little space, but usually this will work well enough. | |
13646 | unless ($must_break_open) { | |
13647 | ||
13648 | if ( $break_count <= 1 ) { | |
13649 | $$rdo_not_break_apart = 1; | |
13650 | } | |
13651 | elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) | |
13652 | { | |
13653 | $$rdo_not_break_apart = 1; | |
13654 | } | |
13655 | } | |
13656 | set_non_alignment_flags( $comma_count, $rcomma_index ); | |
13657 | return; | |
13658 | } | |
13659 | ||
13660 | } # end shortcut methods | |
13661 | ||
13662 | # debug stuff | |
13663 | ||
13664 | FORMATTER_DEBUG_FLAG_SPARSE && do { | |
13665 | ||
13666 | "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n"; | |
13667 | ||
13668 | }; | |
13669 | ||
13670 | #--------------------------------------------------------------- | |
13671 | # Compound List Rule 2: | |
13672 | # If this list is too long for one line, and it is an item of a | |
13673 | # larger list, then we must format it, regardless of sparsity | |
13674 | # (ian.t). One reason that we have to do this is to trigger | |
13675 | # Compound List Rule 1, above, which causes breaks at all commas of | |
13676 | # all outer lists. In this way, the structure will be properly | |
13677 | # displayed. | |
13678 | #--------------------------------------------------------------- | |
13679 | ||
13680 | # Decide if this list is too long for one line unless broken | |
13681 | my $total_columns = table_columns_available($i_opening_paren); | |
13682 | my $too_long = $packed_columns > $total_columns; | |
13683 | ||
13684 | # For a paren list, include the length of the token just before the | |
13685 | # '(' because this is likely a sub call, and we would have to | |
13686 | # include the sub name on the same line as the list. This is still | |
13687 | # imprecise, but not too bad. (steve.t) | |
13688 | if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) { | |
13689 | ||
13690 | $too_long = | |
13691 | excess_line_length( $i_opening_minus, | |
13692 | $i_effective_last_comma + 1 ) > 0; | |
13693 | } | |
13694 | ||
13695 | # FIXME: For an item after a '=>', try to include the length of the | |
13696 | # thing before the '=>'. This is crude and should be improved by | |
13697 | # actually looking back token by token. | |
13698 | if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) { | |
13699 | my $i_opening_minus = $i_opening_paren - 4; | |
13700 | if ( $i_opening_minus >= 0 ) { | |
13701 | $too_long = | |
13702 | excess_line_length( $i_opening_minus, | |
13703 | $i_effective_last_comma + 1 ) > 0; | |
13704 | } | |
13705 | } | |
13706 | ||
13707 | # Always break lists contained in '[' and '{' if too long for 1 line, | |
13708 | # and always break lists which are too long and part of a more complex | |
13709 | # structure. | |
13710 | my $must_break_open_container = $must_break_open | |
13711 | || ( $too_long | |
13712 | && ( $in_hierarchical_list || $opening_token ne '(' ) ); | |
13713 | ||
13714 | #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n"; | |
13715 | ||
13716 | #--------------------------------------------------------------- | |
13717 | # The main decision: | |
13718 | # Now decide if we will align the data into aligned columns. Do not | |
13719 | # attempt to align columns if this is a tiny table or it would be | |
13720 | # too spaced. It seems that the more packed lines we have, the | |
13721 | # sparser the list that can be allowed and still look ok. | |
13722 | #--------------------------------------------------------------- | |
13723 | ||
13724 | if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines ) | |
13725 | || ( $formatted_lines < 2 ) | |
13726 | || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) | |
13727 | ) | |
13728 | { | |
13729 | ||
13730 | #--------------------------------------------------------------- | |
13731 | # too sparse: would look ugly if aligned in a table; | |
13732 | #--------------------------------------------------------------- | |
13733 | ||
13734 | # use old breakpoints if this is a 'big' list | |
13735 | # FIXME: goal is to improve set_ragged_breakpoints so that | |
13736 | # this is not necessary. | |
13737 | if ( $packed_lines > 2 && $item_count > 10 ) { | |
13738 | write_logfile_entry("List sparse: using old breakpoints\n"); | |
13739 | copy_old_breakpoints( $i_first_comma, $i_last_comma ); | |
13740 | } | |
13741 | ||
13742 | # let the continuation logic handle it if 2 lines | |
13743 | else { | |
13744 | ||
13745 | my $break_count = | |
13746 | set_ragged_breakpoints( \@i_term_comma, | |
13747 | $ri_ragged_break_list ); | |
13748 | ++$break_count if ($use_separate_first_term); | |
13749 | ||
13750 | unless ($must_break_open_container) { | |
13751 | if ( $break_count <= 1 ) { | |
13752 | $$rdo_not_break_apart = 1; | |
13753 | } | |
13754 | elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) | |
13755 | { | |
13756 | $$rdo_not_break_apart = 1; | |
13757 | } | |
13758 | } | |
13759 | set_non_alignment_flags( $comma_count, $rcomma_index ); | |
13760 | } | |
13761 | return; | |
13762 | } | |
13763 | ||
13764 | #--------------------------------------------------------------- | |
13765 | # go ahead and format as a table | |
13766 | #--------------------------------------------------------------- | |
13767 | write_logfile_entry( | |
13768 | "List: auto formatting with $number_of_fields fields/row\n"); | |
13769 | ||
13770 | my $j_first_break = | |
13771 | $use_separate_first_term ? $number_of_fields : $number_of_fields - 1; | |
13772 | ||
13773 | for ( | |
13774 | my $j = $j_first_break ; | |
13775 | $j < $comma_count ; | |
13776 | $j += $number_of_fields | |
13777 | ) | |
13778 | { | |
13779 | my $i = $$rcomma_index[$j]; | |
13780 | set_forced_breakpoint($i); | |
13781 | } | |
13782 | return; | |
13783 | } | |
13784 | } | |
13785 | ||
13786 | sub set_non_alignment_flags { | |
13787 | ||
13788 | # set flag which indicates that these commas should not be | |
13789 | # aligned | |
13790 | my ( $comma_count, $rcomma_index ) = @_; | |
13791 | foreach ( 0 .. $comma_count - 1 ) { | |
13792 | $matching_token_to_go[ $$rcomma_index[$_] ] = 1; | |
13793 | } | |
13794 | } | |
13795 | ||
13796 | sub study_list_complexity { | |
13797 | ||
13798 | # Look for complex tables which should be formatted with one term per line. | |
13799 | # Returns the following: | |
13800 | # | |
13801 | # \@i_ragged_break_list = list of good breakpoints to avoid lines | |
13802 | # which are hard to read | |
13803 | # $number_of_fields_best = suggested number of fields based on | |
13804 | # complexity; = 0 if any number may be used. | |
13805 | # | |
13806 | my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_; | |
13807 | my $item_count = @{$ri_term_begin}; | |
13808 | my $complex_item_count = 0; | |
13809 | my $number_of_fields_best = $rOpts_maximum_fields_per_table; | |
13810 | my $i_max = @{$ritem_lengths} - 1; | |
13811 | ##my @item_complexity; | |
13812 | ||
13813 | my $i_last_last_break = -3; | |
13814 | my $i_last_break = -2; | |
13815 | my @i_ragged_break_list; | |
13816 | ||
13817 | my $definitely_complex = 30; | |
13818 | my $definitely_simple = 12; | |
13819 | my $quote_count = 0; | |
13820 | ||
13821 | for my $i ( 0 .. $i_max ) { | |
13822 | my $ib = $ri_term_begin->[$i]; | |
13823 | my $ie = $ri_term_end->[$i]; | |
13824 | ||
13825 | # define complexity: start with the actual term length | |
13826 | my $weighted_length = ( $ritem_lengths->[$i] - 2 ); | |
13827 | ||
13828 | ##TBD: join types here and check for variations | |
13829 | ##my $str=join "", @tokens_to_go[$ib..$ie]; | |
13830 | ||
13831 | my $is_quote = 0; | |
13832 | if ( $types_to_go[$ib] =~ /^[qQ]$/ ) { | |
13833 | $is_quote = 1; | |
13834 | $quote_count++; | |
13835 | } | |
13836 | elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) { | |
13837 | $quote_count++; | |
13838 | } | |
13839 | ||
13840 | if ( $ib eq $ie ) { | |
13841 | if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) { | |
13842 | $complex_item_count++; | |
13843 | $weighted_length *= 2; | |
13844 | } | |
13845 | else { | |
13846 | } | |
13847 | } | |
13848 | else { | |
13849 | if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) { | |
13850 | $complex_item_count++; | |
13851 | $weighted_length *= 2; | |
13852 | } | |
13853 | if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) { | |
13854 | $weighted_length += 4; | |
13855 | } | |
13856 | } | |
13857 | ||
13858 | # add weight for extra tokens. | |
13859 | $weighted_length += 2 * ( $ie - $ib ); | |
13860 | ||
13861 | ## my $BUB = join '', @tokens_to_go[$ib..$ie]; | |
13862 | ## print "# COMPLEXITY:$weighted_length $BUB\n"; | |
13863 | ||
13864 | ##push @item_complexity, $weighted_length; | |
13865 | ||
13866 | # now mark a ragged break after this item it if it is 'long and | |
13867 | # complex': | |
13868 | if ( $weighted_length >= $definitely_complex ) { | |
13869 | ||
13870 | # if we broke after the previous term | |
13871 | # then break before it too | |
13872 | if ( $i_last_break == $i - 1 | |
13873 | && $i > 1 | |
13874 | && $i_last_last_break != $i - 2 ) | |
13875 | { | |
13876 | ||
13877 | ## FIXME: don't strand a small term | |
13878 | pop @i_ragged_break_list; | |
13879 | push @i_ragged_break_list, $i - 2; | |
13880 | push @i_ragged_break_list, $i - 1; | |
13881 | } | |
13882 | ||
13883 | push @i_ragged_break_list, $i; | |
13884 | $i_last_last_break = $i_last_break; | |
13885 | $i_last_break = $i; | |
13886 | } | |
13887 | ||
13888 | # don't break before a small last term -- it will | |
13889 | # not look good on a line by itself. | |
13890 | elsif ($i == $i_max | |
13891 | && $i_last_break == $i - 1 | |
13892 | && $weighted_length <= $definitely_simple ) | |
13893 | { | |
13894 | pop @i_ragged_break_list; | |
13895 | } | |
13896 | } | |
13897 | ||
13898 | my $identifier_count = $i_max + 1 - $quote_count; | |
13899 | ||
13900 | # Need more tuning here.. | |
13901 | if ( $max_width > 12 | |
13902 | && $complex_item_count > $item_count / 2 | |
13903 | && $number_of_fields_best != 2 ) | |
13904 | { | |
13905 | $number_of_fields_best = 1; | |
13906 | } | |
13907 | ||
13908 | return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count ); | |
13909 | } | |
13910 | ||
13911 | sub get_maximum_fields_wanted { | |
13912 | ||
13913 | # Not all tables look good with more than one field of items. | |
13914 | # This routine looks at a table and decides if it should be | |
13915 | # formatted with just one field or not. | |
13916 | # This coding is still under development. | |
13917 | my ($ritem_lengths) = @_; | |
13918 | ||
13919 | my $number_of_fields_best = 0; | |
13920 | ||
13921 | # For just a few items, we tentatively assume just 1 field. | |
13922 | my $item_count = @{$ritem_lengths}; | |
13923 | if ( $item_count <= 5 ) { | |
13924 | $number_of_fields_best = 1; | |
13925 | } | |
13926 | ||
13927 | # For larger tables, look at it both ways and see what looks best | |
13928 | else { | |
13929 | ||
13930 | my $is_odd = 1; | |
13931 | my @max_length = ( 0, 0 ); | |
13932 | my @last_length_2 = ( undef, undef ); | |
13933 | my @first_length_2 = ( undef, undef ); | |
13934 | my $last_length = undef; | |
13935 | my $total_variation_1 = 0; | |
13936 | my $total_variation_2 = 0; | |
13937 | my @total_variation_2 = ( 0, 0 ); | |
13938 | for ( my $j = 0 ; $j < $item_count ; $j++ ) { | |
13939 | ||
13940 | $is_odd = 1 - $is_odd; | |
13941 | my $length = $ritem_lengths->[$j]; | |
13942 | if ( $length > $max_length[$is_odd] ) { | |
13943 | $max_length[$is_odd] = $length; | |
13944 | } | |
13945 | ||
13946 | if ( defined($last_length) ) { | |
13947 | my $dl = abs( $length - $last_length ); | |
13948 | $total_variation_1 += $dl; | |
13949 | } | |
13950 | $last_length = $length; | |
13951 | ||
13952 | my $ll = $last_length_2[$is_odd]; | |
13953 | if ( defined($ll) ) { | |
13954 | my $dl = abs( $length - $ll ); | |
13955 | $total_variation_2[$is_odd] += $dl; | |
13956 | } | |
13957 | else { | |
13958 | $first_length_2[$is_odd] = $length; | |
13959 | } | |
13960 | $last_length_2[$is_odd] = $length; | |
13961 | } | |
13962 | $total_variation_2 = $total_variation_2[0] + $total_variation_2[1]; | |
13963 | ||
13964 | my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0; | |
13965 | unless ( $total_variation_2 < $factor * $total_variation_1 ) { | |
13966 | $number_of_fields_best = 1; | |
13967 | } | |
13968 | } | |
13969 | return ($number_of_fields_best); | |
13970 | } | |
13971 | ||
13972 | sub table_columns_available { | |
13973 | my $i_first_comma = shift; | |
13974 | my $columns = | |
13975 | $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma); | |
13976 | ||
13977 | # Patch: the vertical formatter does not line up lines whose lengths | |
13978 | # exactly equal the available line length because of allowances | |
13979 | # that must be made for side comments. Therefore, the number of | |
13980 | # available columns is reduced by 1 character. | |
13981 | $columns -= 1; | |
13982 | return $columns; | |
13983 | } | |
13984 | ||
13985 | sub maximum_number_of_fields { | |
13986 | ||
13987 | # how many fields will fit in the available space? | |
13988 | my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_; | |
13989 | my $max_pairs = int( $columns / $pair_width ); | |
13990 | my $number_of_fields = $max_pairs * 2; | |
13991 | if ( $odd_or_even == 1 | |
13992 | && $max_pairs * $pair_width + $max_width <= $columns ) | |
13993 | { | |
13994 | $number_of_fields++; | |
13995 | } | |
13996 | return $number_of_fields; | |
13997 | } | |
13998 | ||
13999 | sub compactify_table { | |
14000 | ||
14001 | # given a table with a certain number of fields and a certain number | |
14002 | # of lines, see if reducing the number of fields will make it look | |
14003 | # better. | |
14004 | my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_; | |
14005 | if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) { | |
14006 | my $min_fields; | |
14007 | ||
14008 | for ( | |
14009 | $min_fields = $number_of_fields ; | |
14010 | $min_fields >= $odd_or_even | |
14011 | && $min_fields * $formatted_lines >= $item_count ; | |
14012 | $min_fields -= $odd_or_even | |
14013 | ) | |
14014 | { | |
14015 | $number_of_fields = $min_fields; | |
14016 | } | |
14017 | } | |
14018 | return $number_of_fields; | |
14019 | } | |
14020 | ||
14021 | sub set_ragged_breakpoints { | |
14022 | ||
14023 | # Set breakpoints in a list that cannot be formatted nicely as a | |
14024 | # table. | |
14025 | my ( $ri_term_comma, $ri_ragged_break_list ) = @_; | |
14026 | ||
14027 | my $break_count = 0; | |
14028 | foreach (@$ri_ragged_break_list) { | |
14029 | my $j = $ri_term_comma->[$_]; | |
14030 | if ($j) { | |
14031 | set_forced_breakpoint($j); | |
14032 | $break_count++; | |
14033 | } | |
14034 | } | |
14035 | return $break_count; | |
14036 | } | |
14037 | ||
14038 | sub copy_old_breakpoints { | |
14039 | my ( $i_first_comma, $i_last_comma ) = @_; | |
14040 | for my $i ( $i_first_comma .. $i_last_comma ) { | |
14041 | if ( $old_breakpoint_to_go[$i] ) { | |
14042 | set_forced_breakpoint($i); | |
14043 | } | |
14044 | } | |
14045 | } | |
14046 | ||
14047 | sub set_nobreaks { | |
14048 | my ( $i, $j ) = @_; | |
14049 | if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) { | |
14050 | ||
14051 | FORMATTER_DEBUG_FLAG_NOBREAK && do { | |
14052 | my ( $a, $b, $c ) = caller(); | |
14053 | print( | |
14054 | "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n" | |
14055 | ); | |
14056 | }; | |
14057 | ||
14058 | @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 ); | |
14059 | } | |
14060 | ||
14061 | # shouldn't happen; non-critical error | |
14062 | else { | |
14063 | FORMATTER_DEBUG_FLAG_NOBREAK && do { | |
14064 | my ( $a, $b, $c ) = caller(); | |
14065 | print( | |
14066 | "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n" | |
14067 | ); | |
14068 | }; | |
14069 | } | |
14070 | } | |
14071 | ||
14072 | sub set_fake_breakpoint { | |
14073 | ||
14074 | # Just bump up the breakpoint count as a signal that there are breaks. | |
14075 | # This is useful if we have breaks but may want to postpone deciding where | |
14076 | # to make them. | |
14077 | $forced_breakpoint_count++; | |
14078 | } | |
14079 | ||
14080 | sub set_forced_breakpoint { | |
14081 | my $i = shift; | |
14082 | ||
14083 | return unless defined $i && $i >= 0; | |
14084 | ||
14085 | # when called with certain tokens, use bond strengths to decide | |
14086 | # if we break before or after it | |
14087 | my $token = $tokens_to_go[$i]; | |
14088 | ||
14089 | if ( $token =~ /^([\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) { | |
14090 | if ( $want_break_before{$token} && $i >= 0 ) { $i-- } | |
14091 | } | |
14092 | ||
14093 | # breaks are forced before 'if' and 'unless' | |
14094 | elsif ( $is_if_unless{$token} ) { $i-- } | |
14095 | ||
14096 | if ( $i >= 0 && $i <= $max_index_to_go ) { | |
14097 | my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1; | |
14098 | ||
14099 | FORMATTER_DEBUG_FLAG_FORCE && do { | |
14100 | my ( $a, $b, $c ) = caller(); | |
14101 | ||
14102 | "FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n"; | |
14103 | }; | |
14104 | ||
14105 | if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) { | |
14106 | $forced_breakpoint_to_go[$i_nonblank] = 1; | |
14107 | ||
14108 | if ( $i_nonblank > $index_max_forced_break ) { | |
14109 | $index_max_forced_break = $i_nonblank; | |
14110 | } | |
14111 | $forced_breakpoint_count++; | |
14112 | $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] = | |
14113 | $i_nonblank; | |
14114 | ||
14115 | # if we break at an opening container..break at the closing | |
14116 | if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) { | |
14117 | set_closing_breakpoint($i_nonblank); | |
14118 | } | |
14119 | } | |
14120 | } | |
14121 | } | |
14122 | ||
14123 | sub clear_breakpoint_undo_stack { | |
14124 | $forced_breakpoint_undo_count = 0; | |
14125 | } | |
14126 | ||
14127 | sub undo_forced_breakpoint_stack { | |
14128 | ||
14129 | my $i_start = shift; | |
14130 | if ( $i_start < 0 ) { | |
14131 | $i_start = 0; | |
14132 | my ( $a, $b, $c ) = caller(); | |
14133 | warning( | |
14134 | "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start " | |
14135 | ); | |
14136 | } | |
14137 | ||
14138 | while ( $forced_breakpoint_undo_count > $i_start ) { | |
14139 | my $i = | |
14140 | $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ]; | |
14141 | if ( $i >= 0 && $i <= $max_index_to_go ) { | |
14142 | $forced_breakpoint_to_go[$i] = 0; | |
14143 | $forced_breakpoint_count--; | |
14144 | ||
14145 | FORMATTER_DEBUG_FLAG_UNDOBP && do { | |
14146 | my ( $a, $b, $c ) = caller(); | |
14147 | print( | |
14148 | "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n" | |
14149 | ); | |
14150 | }; | |
14151 | } | |
14152 | ||
14153 | # shouldn't happen, but not a critical error | |
14154 | else { | |
14155 | FORMATTER_DEBUG_FLAG_UNDOBP && do { | |
14156 | my ( $a, $b, $c ) = caller(); | |
14157 | print( | |
14158 | "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go" | |
14159 | ); | |
14160 | }; | |
14161 | } | |
14162 | } | |
14163 | } | |
14164 | ||
14165 | sub recombine_breakpoints { | |
14166 | ||
14167 | # sub set_continuation_breaks is very liberal in setting line breaks | |
14168 | # for long lines, always setting breaks at good breakpoints, even | |
14169 | # when that creates small lines. Occasionally small line fragments | |
14170 | # are produced which would look better if they were combined. | |
14171 | # That's the task of this routine, recombine_breakpoints. | |
14172 | my ( $ri_first, $ri_last ) = @_; | |
14173 | my $more_to_do = 1; | |
14174 | ||
14175 | # Keep looping until there are no more possible recombinations | |
14176 | my $nmax_last = @$ri_last; | |
14177 | while ($more_to_do) { | |
14178 | my $n_best = 0; | |
14179 | my $bs_best; | |
14180 | my $n; | |
14181 | my $nmax = @$ri_last - 1; | |
14182 | ||
14183 | # safety check.. | |
14184 | unless ( $nmax < $nmax_last ) { | |
14185 | ||
14186 | # shouldn't happen because splice below decreases nmax on each pass: | |
14187 | # but i get paranoid sometimes | |
14188 | die "Program bug-infinite loop in recombine breakpoints\n"; | |
14189 | } | |
14190 | $nmax_last = $nmax; | |
14191 | $more_to_do = 0; | |
14192 | ||
14193 | # loop over all remaining lines... | |
14194 | for $n ( 1 .. $nmax ) { | |
14195 | ||
14196 | #---------------------------------------------------------- | |
14197 | # Indexes of the endpoints of the two lines are: | |
14198 | # | |
14199 | # ---left---- | ---right--- | |
14200 | # $if $imid | $imidr $il | |
14201 | # | |
14202 | # We want to decide if we should join tokens $imid to $imidr | |
14203 | #---------------------------------------------------------- | |
14204 | my $if = $$ri_first[ $n - 1 ]; | |
14205 | my $il = $$ri_last[$n]; | |
14206 | my $imid = $$ri_last[ $n - 1 ]; | |
14207 | my $imidr = $$ri_first[$n]; | |
14208 | ||
14209 | #print "RECOMBINE: n=$n imid=$imid if=$if type=$types_to_go[$if] =$tokens_to_go[$if] next_type=$types_to_go[$imidr] next_tok=$tokens_to_go[$imidr]\n"; | |
14210 | ||
14211 | #---------------------------------------------------------- | |
14212 | # Start of special recombination rules | |
14213 | # These are ad-hoc rules which have been found to work ok. | |
14214 | # Skip to next pair to avoid re-combination. | |
14215 | #---------------------------------------------------------- | |
14216 | ||
14217 | # a terminal '{' should stay where it is | |
14218 | next if ( $n == $nmax && $types_to_go[$imidr] eq '{' ); | |
14219 | ||
14220 | #---------------------------------------------------------- | |
14221 | # examine token at $imid (right end of first line of pair) | |
14222 | #---------------------------------------------------------- | |
14223 | ||
14224 | # an isolated '}' may join with a ';' terminated segment | |
14225 | if ( $types_to_go[$imid] eq '}' ) { | |
14226 | next | |
14227 | unless ( | |
14228 | ||
14229 | # join } and ; | |
14230 | ( ( $if == $imid ) && ( $types_to_go[$il] eq ';' ) ) | |
14231 | ||
14232 | # handle '.' and '?' below | |
14233 | || ( $types_to_go[$imidr] =~ /^[\.\?]$/ ) | |
14234 | ); | |
14235 | } | |
14236 | ||
14237 | # do not recombine lines with ending &&, ||, or : | |
14238 | elsif ( $types_to_go[$imid] =~ /^(|:|\&\&|\|\|)$/ ) { | |
14239 | next unless $want_break_before{ $types_to_go[$imid] }; | |
14240 | } | |
14241 | ||
14242 | # for lines ending in a comma... | |
14243 | elsif ( $types_to_go[$imid] eq ',' ) { | |
14244 | ||
14245 | # an isolated '},' may join with an identifier + ';' | |
14246 | # this is useful for the class of a 'bless' statement (bless.t) | |
14247 | if ( $types_to_go[$if] eq '}' | |
14248 | && $types_to_go[$imidr] eq 'i' ) | |
14249 | { | |
14250 | next | |
14251 | unless ( ( $if == ( $imid - 1 ) ) | |
14252 | && ( $il == ( $imidr + 1 ) ) | |
14253 | && ( $types_to_go[$il] eq ';' ) ); | |
14254 | ||
14255 | # override breakpoint | |
14256 | $forced_breakpoint_to_go[$imid] = 0; | |
14257 | } | |
14258 | ||
14259 | # but otherwise, do not recombine unless this will leave | |
14260 | # just 1 more line | |
14261 | else { | |
14262 | next unless ( $n + 1 >= $nmax ); | |
14263 | } | |
14264 | } | |
14265 | ||
14266 | # opening paren.. | |
14267 | elsif ( $types_to_go[$imid] eq '(' ) { | |
14268 | ||
14269 | # No longer doing this | |
14270 | } | |
14271 | ||
14272 | elsif ( $types_to_go[$imid] eq ')' ) { | |
14273 | ||
14274 | # No longer doing this | |
14275 | } | |
14276 | ||
14277 | # keep a terminal colon | |
14278 | elsif ( $types_to_go[$imid] eq ':' ) { | |
14279 | next; | |
14280 | } | |
14281 | ||
14282 | # keep a terminal for-semicolon | |
14283 | elsif ( $types_to_go[$imid] eq 'f' ) { | |
14284 | next; | |
14285 | } | |
14286 | ||
14287 | # if '=' at end of line ... | |
14288 | elsif ( $is_assignment{ $types_to_go[$imid] } ) { | |
14289 | ||
14290 | # otherwise always ok to join isolated '=' | |
14291 | unless ( $if == $imid ) { | |
14292 | ||
14293 | my $is_math = ( | |
14294 | ( $types_to_go[$il] =~ /^[+-\/\*\)]$/ ) | |
14295 | ||
14296 | # note no '$' in pattern because -> can | |
14297 | # start long identifier | |
14298 | && !grep { $_ =~ /^(->|=>|[\,])/ } | |
14299 | @types_to_go[ $imidr .. $il ] | |
14300 | ); | |
14301 | ||
14302 | # retain the break after the '=' unless ... | |
14303 | next | |
14304 | unless ( | |
14305 | ||
14306 | # '=' is followed by a number and looks like math | |
14307 | ( $types_to_go[$imidr] eq 'n' && $is_math ) | |
14308 | ||
14309 | # or followed by a scalar and looks like math | |
14310 | || ( ( $types_to_go[$imidr] eq 'i' ) | |
14311 | && ( $tokens_to_go[$imidr] =~ /^\$/ ) | |
14312 | && $is_math ) | |
14313 | ||
14314 | # or followed by a single "short" token | |
14315 | # ('12' is arbitrary) | |
14316 | || ( $il == $imidr | |
14317 | && token_sequence_length( $imidr, $imidr ) < 12 ) | |
14318 | ||
14319 | ); | |
14320 | } | |
14321 | unless ( $tokens_to_go[$imidr] =~ /^[\{\(\[]$/ ) { | |
14322 | $forced_breakpoint_to_go[$imid] = 0; | |
14323 | } | |
14324 | } | |
14325 | ||
14326 | # for keywords.. | |
14327 | elsif ( $types_to_go[$imid] eq 'k' ) { | |
14328 | ||
14329 | # make major control keywords stand out | |
14330 | # (recombine.t) | |
14331 | next | |
14332 | if ( | |
14333 | ||
14334 | #/^(last|next|redo|return)$/ | |
14335 | $is_last_next_redo_return{ $tokens_to_go[$imid] } | |
14336 | ); | |
14337 | ||
14338 | if ( $is_and_or{ $tokens_to_go[$imid] } ) { | |
14339 | next unless $want_break_before{ $tokens_to_go[$imid] }; | |
14340 | } | |
14341 | } | |
14342 | ||
14343 | #---------------------------------------------------------- | |
14344 | # examine token at $imidr (left end of second line of pair) | |
14345 | #---------------------------------------------------------- | |
14346 | ||
14347 | # do not recombine lines with leading &&, ||, or : | |
14348 | if ( $types_to_go[$imidr] =~ /^(|:|\&\&|\|\|)$/ ) { | |
14349 | next if $want_break_before{ $types_to_go[$imidr] }; | |
14350 | } | |
14351 | ||
14352 | # Identify and recombine a broken ?/: chain | |
14353 | elsif ( $types_to_go[$imidr] eq '?' ) { | |
14354 | ||
14355 | # indexes of line first tokens -- | |
14356 | # mm - line before previous line | |
14357 | # f - previous line | |
14358 | # <-- this line | |
14359 | # ff - next line | |
14360 | # fff - line after next | |
14361 | my $iff = $n < $nmax ? $$ri_first[ $n + 1 ] : -1; | |
14362 | my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1; | |
14363 | my $imm = $n > 1 ? $$ri_first[ $n - 2 ] : -1; | |
14364 | my $seqno = $type_sequence_to_go[$imidr]; | |
14365 | my $f_ok = | |
14366 | ( $types_to_go[$if] eq ':' | |
14367 | && $type_sequence_to_go[$if] == | |
14368 | $seqno - TYPE_SEQUENCE_INCREMENT ); | |
14369 | my $mm_ok = | |
14370 | ( $imm >= 0 | |
14371 | && $types_to_go[$imm] eq ':' | |
14372 | && $type_sequence_to_go[$imm] == | |
14373 | $seqno - 2 * TYPE_SEQUENCE_INCREMENT ); | |
14374 | ||
14375 | my $ff_ok = | |
14376 | ( $iff > 0 | |
14377 | && $types_to_go[$iff] eq ':' | |
14378 | && $type_sequence_to_go[$iff] == $seqno ); | |
14379 | my $fff_ok = | |
14380 | ( $ifff > 0 | |
14381 | && $types_to_go[$ifff] eq ':' | |
14382 | && $type_sequence_to_go[$ifff] == | |
14383 | $seqno + TYPE_SEQUENCE_INCREMENT ); | |
14384 | ||
14385 | # we require that this '?' be part of a correct sequence | |
14386 | # of 3 in a row or else no recombination is done. | |
14387 | next | |
14388 | unless ( ( $ff_ok || $mm_ok ) && ( $f_ok || $fff_ok ) ); | |
14389 | $forced_breakpoint_to_go[$imid] = 0; | |
14390 | } | |
14391 | ||
14392 | # do not recombine lines with leading '.' | |
14393 | elsif ( $types_to_go[$imidr] =~ /^(\.)$/ ) { | |
14394 | my $i_next_nonblank = $imidr + 1; | |
14395 | if ( $types_to_go[$i_next_nonblank] eq 'b' ) { | |
14396 | $i_next_nonblank++; | |
14397 | } | |
14398 | ||
14399 | next | |
14400 | unless ( | |
14401 | ||
14402 | # ... unless there is just one and we can reduce this to | |
14403 | # two lines if we do. For example, this : | |
14404 | # | |
14405 | # $bodyA .= | |
14406 | # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' | |
14407 | # | |
14408 | # looks better than this: | |
14409 | # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' | |
14410 | # . '$args .= $pat;' | |
14411 | ||
14412 | ( | |
14413 | $n == 2 | |
14414 | && $n == $nmax | |
14415 | && $types_to_go[$if] ne $types_to_go[$imidr] | |
14416 | ) | |
14417 | ||
14418 | # | |
14419 | # ... or this would strand a short quote , like this | |
14420 | # . "some long qoute" | |
14421 | # . "\n"; | |
14422 | # | |
14423 | ||
14424 | || ( $types_to_go[$i_next_nonblank] eq 'Q' | |
14425 | && $i_next_nonblank >= $il - 1 | |
14426 | && length( $tokens_to_go[$i_next_nonblank] ) < | |
14427 | $rOpts_short_concatenation_item_length ) | |
14428 | ); | |
14429 | } | |
14430 | ||
14431 | # handle leading keyword.. | |
14432 | elsif ( $types_to_go[$imidr] eq 'k' ) { | |
14433 | ||
14434 | # handle leading "and" and "or" | |
14435 | if ( $is_and_or{ $tokens_to_go[$imidr] } ) { | |
14436 | ||
14437 | # Decide if we will combine a single terminal 'and' and | |
14438 | # 'or' after an 'if' or 'unless'. We should consider the | |
14439 | # possible vertical alignment, and visual clutter. | |
14440 | ||
14441 | # This looks best with the 'and' on the same line as the 'if': | |
14442 | # | |
14443 | # $a = 1 | |
14444 | # if $seconds and $nu < 2; | |
14445 | # | |
14446 | # But this looks better as shown: | |
14447 | # | |
14448 | # $a = 1 | |
14449 | # if !$this->{Parents}{$_} | |
14450 | # or $this->{Parents}{$_} eq $_; | |
14451 | # | |
14452 | # Eventually, it would be nice to look for similarities (such as 'this' or | |
14453 | # 'Parents'), but for now I'm using a simple rule that says that the | |
14454 | # resulting line length must not be more than half the maximum line length | |
14455 | # (making it 80/2 = 40 characters by default). | |
14456 | ||
14457 | next | |
14458 | unless ( | |
14459 | $n == $nmax # if this is the last line | |
14460 | && $types_to_go[$il] eq ';' # ending in ';' | |
14461 | && $types_to_go[$if] eq 'k' # after 'if' or 'unless' | |
14462 | # /^(if|unless)$/ | |
14463 | && $is_if_unless{ $tokens_to_go[$if] } | |
14464 | ||
14465 | # and if this doesn't make a long last line | |
14466 | && total_line_length( $if, $il ) <= | |
14467 | $half_maximum_line_length | |
14468 | ); | |
14469 | ||
14470 | # override breakpoint | |
14471 | $forced_breakpoint_to_go[$imid] = 0; | |
14472 | } | |
14473 | ||
14474 | # handle leading "if" and "unless" | |
14475 | elsif ( $is_if_unless{ $tokens_to_go[$imidr] } ) { | |
14476 | ||
14477 | # FIXME: This is still experimental..may not be too useful | |
14478 | next | |
14479 | unless ( | |
14480 | $n == $nmax # if this is the last line | |
14481 | && $types_to_go[$il] eq ';' # ending in ';' | |
14482 | && $types_to_go[$if] eq 'k' | |
14483 | ||
14484 | # /^(and|or)$/ | |
14485 | && $is_and_or{ $tokens_to_go[$if] } | |
14486 | ||
14487 | # and if this doesn't make a long last line | |
14488 | && total_line_length( $if, $il ) <= | |
14489 | $half_maximum_line_length | |
14490 | ); | |
14491 | ||
14492 | # override breakpoint | |
14493 | $forced_breakpoint_to_go[$imid] = 0; | |
14494 | } | |
14495 | ||
14496 | # handle all other leading keywords | |
14497 | else { | |
14498 | ||
14499 | # keywords look best at start of lines, | |
14500 | # but combine things like "1 while" | |
14501 | ||
14502 | unless ( $is_assignment{ $types_to_go[$imid] } ) { | |
14503 | next | |
14504 | if ( ( $types_to_go[$imid] ne 'k' ) | |
14505 | && ( $tokens_to_go[$imidr] !~ /^(while)$/ ) ); | |
14506 | } | |
14507 | } | |
14508 | } | |
14509 | ||
14510 | # similar treatment of && and || as above for 'and' and 'or': | |
14511 | elsif ( $types_to_go[$imidr] =~ /^(&&|\|\|)$/ ) { | |
14512 | ||
14513 | # maybe looking at something like: | |
14514 | # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i; | |
14515 | ||
14516 | next | |
14517 | unless ( | |
14518 | $n == $nmax # if this is the last line | |
14519 | && $types_to_go[$il] eq ';' # ending in ';' | |
14520 | && $types_to_go[$if] eq 'k' # after an 'if' or 'unless' | |
14521 | # /^(if|unless)$/ | |
14522 | && $is_if_unless{ $tokens_to_go[$if] } | |
14523 | ||
14524 | # and if this doesn't make a long last line | |
14525 | && total_line_length( $if, $il ) <= | |
14526 | $half_maximum_line_length | |
14527 | ); | |
14528 | ||
14529 | # override breakpoint | |
14530 | $forced_breakpoint_to_go[$imid] = 0; | |
14531 | } | |
14532 | ||
14533 | # honor hard breakpoints | |
14534 | next if ( $forced_breakpoint_to_go[$imid] > 0 ); | |
14535 | ||
14536 | #---------------------------------------------------------- | |
14537 | # end of special recombination rules | |
14538 | #---------------------------------------------------------- | |
14539 | ||
14540 | my $bs = $bond_strength_to_go[$imid]; | |
14541 | ||
14542 | # combined line cannot be too long | |
14543 | next | |
14544 | if excess_line_length( $if, $il ) > 0; | |
14545 | ||
14546 | # do not recombine if we would skip in indentation levels | |
14547 | if ( $n < $nmax ) { | |
14548 | my $if_next = $$ri_first[ $n + 1 ]; | |
14549 | next | |
14550 | if ( | |
14551 | $levels_to_go[$if] < $levels_to_go[$imidr] | |
14552 | && $levels_to_go[$imidr] < $levels_to_go[$if_next] | |
14553 | ||
14554 | # but an isolated 'if (' is undesirable | |
14555 | && !( | |
14556 | $n == 1 | |
14557 | && $imid - $if <= 2 | |
14558 | && $types_to_go[$if] eq 'k' | |
14559 | && $tokens_to_go[$if] eq 'if' | |
14560 | && $tokens_to_go[$imid] ne '(' | |
14561 | ) | |
14562 | ||
14563 | # | |
14564 | ); | |
14565 | } | |
14566 | ||
14567 | # honor no-break's | |
14568 | next if ( $bs == NO_BREAK ); | |
14569 | ||
14570 | # remember the pair with the greatest bond strength | |
14571 | if ( !$n_best ) { | |
14572 | $n_best = $n; | |
14573 | $bs_best = $bs; | |
14574 | } | |
14575 | else { | |
14576 | ||
14577 | if ( $bs > $bs_best ) { | |
14578 | $n_best = $n; | |
14579 | $bs_best = $bs; | |
14580 | } | |
14581 | ||
14582 | # we have 2 or more candidates, so need another pass | |
14583 | $more_to_do++; | |
14584 | } | |
14585 | } | |
14586 | ||
14587 | # recombine the pair with the greatest bond strength | |
14588 | if ($n_best) { | |
14589 | splice @$ri_first, $n_best, 1; | |
14590 | splice @$ri_last, $n_best - 1, 1; | |
14591 | } | |
14592 | } | |
14593 | return ( $ri_first, $ri_last ); | |
14594 | } | |
14595 | ||
14596 | sub set_continuation_breaks { | |
14597 | ||
14598 | # Define an array of indexes for inserting newline characters to | |
14599 | # keep the line lengths below the maximum desired length. There is | |
14600 | # an implied break after the last token, so it need not be included. | |
14601 | # We'll break at points where the bond strength is lowest. | |
14602 | ||
14603 | my $saw_good_break = shift; | |
14604 | my @i_first = (); # the first index to output | |
14605 | my @i_last = (); # the last index to output | |
14606 | my @i_colon_breaks = (); # needed to decide if we have to break at ?'s | |
14607 | if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } | |
14608 | ||
14609 | set_bond_strengths(); | |
14610 | ||
14611 | my $imin = 0; | |
14612 | my $imax = $max_index_to_go; | |
14613 | if ( $types_to_go[$imin] eq 'b' ) { $imin++ } | |
14614 | if ( $types_to_go[$imax] eq 'b' ) { $imax-- } | |
14615 | my $i_begin = $imin; | |
14616 | ||
14617 | my $leading_spaces = leading_spaces_to_go($imin); | |
14618 | my $line_count = 0; | |
14619 | my $last_break_strength = NO_BREAK; | |
14620 | my $i_last_break = -1; | |
14621 | my $max_bias = 0.001; | |
14622 | my $tiny_bias = 0.0001; | |
14623 | my $leading_alignment_token = ""; | |
14624 | my $leading_alignment_type = ""; | |
14625 | ||
14626 | # see if any ?/:'s are in order | |
14627 | my $colons_in_order = 1; | |
14628 | my $last_tok = ""; | |
14629 | my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ]; | |
14630 | foreach (@colon_list) { | |
14631 | if ( $_ eq $last_tok ) { $colons_in_order = 0; last } | |
14632 | $last_tok = $_; | |
14633 | } | |
14634 | ||
14635 | # This is a sufficient but not necessary condition for colon chain | |
14636 | my $is_colon_chain = ( $colons_in_order && @colon_list > 2 ); | |
14637 | ||
14638 | while ( $i_begin <= $imax ) { | |
14639 | my $lowest_strength = NO_BREAK; | |
14640 | my $starting_sum = $lengths_to_go[$i_begin]; | |
14641 | my $i_lowest = -1; | |
14642 | my $i_test = -1; | |
14643 | my $lowest_next_token = ''; | |
14644 | my $lowest_next_type = 'b'; | |
14645 | my $i_lowest_next_nonblank = -1; | |
14646 | ||
14647 | # loop to find next break point | |
14648 | for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) { | |
14649 | my $type = $types_to_go[$i_test]; | |
14650 | my $token = $tokens_to_go[$i_test]; | |
14651 | my $next_type = $types_to_go[ $i_test + 1 ]; | |
14652 | my $next_token = $tokens_to_go[ $i_test + 1 ]; | |
14653 | my $i_next_nonblank = | |
14654 | ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 ); | |
14655 | my $next_nonblank_type = $types_to_go[$i_next_nonblank]; | |
14656 | my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; | |
14657 | my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; | |
14658 | my $strength = $bond_strength_to_go[$i_test]; | |
14659 | my $must_break = 0; | |
14660 | ||
14661 | # FIXME: TESTING: Might want to be able to break after these | |
14662 | # force an immediate break at certain operators | |
14663 | # with lower level than the start of the line | |
14664 | if ( | |
14665 | ( | |
14666 | $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ | |
14667 | || ( $next_nonblank_type eq 'k' | |
14668 | && $next_nonblank_token =~ /^(and|or)$/ ) | |
14669 | ) | |
14670 | && ( $nesting_depth_to_go[$i_begin] > | |
14671 | $nesting_depth_to_go[$i_next_nonblank] ) | |
14672 | ) | |
14673 | { | |
14674 | set_forced_breakpoint($i_next_nonblank); | |
14675 | } | |
14676 | ||
14677 | if ( | |
14678 | ||
14679 | # Try to put a break where requested by scan_list | |
14680 | $forced_breakpoint_to_go[$i_test] | |
14681 | ||
14682 | # break between ) { in a continued line so that the '{' can | |
14683 | # be outdented | |
14684 | # See similar logic in scan_list which catches instances | |
14685 | # where a line is just something like ') {' | |
14686 | || ( $line_count | |
14687 | && ( $token eq ')' ) | |
14688 | && ( $next_nonblank_type eq '{' ) | |
14689 | && ($next_nonblank_block_type) | |
14690 | && !$rOpts->{'opening-brace-always-on-right'} ) | |
14691 | ||
14692 | # There is an implied forced break at a terminal opening brace | |
14693 | || ( ( $type eq '{' ) && ( $i_test == $imax ) ) | |
14694 | ||
14695 | ) | |
14696 | { | |
14697 | ||
14698 | # Forced breakpoints must sometimes be overridden, for example | |
14699 | # because of a side comment causing a NO_BREAK. It is easier | |
14700 | # to catch this here than when they are set. | |
14701 | if ( $strength < NO_BREAK ) { | |
14702 | $strength = $lowest_strength - $tiny_bias; | |
14703 | $must_break = 1; | |
14704 | } | |
14705 | } | |
14706 | ||
14707 | # quit if a break here would put a good terminal token on | |
14708 | # the next line and we already have a possible break | |
14709 | if ( | |
14710 | !$must_break | |
14711 | && ( $next_nonblank_type =~ /^[\;\,]$/ ) | |
14712 | && ( | |
14713 | ( | |
14714 | $leading_spaces + $lengths_to_go[ $i_next_nonblank + 1 ] | |
14715 | - $starting_sum | |
14716 | ) > $rOpts_maximum_line_length | |
14717 | ) | |
14718 | ) | |
14719 | { | |
14720 | last if ( $i_lowest >= 0 ); | |
14721 | } | |
14722 | ||
14723 | # Avoid a break which would strand a single punctuation | |
14724 | # token. For example, we do not want to strand a leading | |
14725 | # '.' which is followed by a long quoted string. | |
14726 | if ( | |
14727 | !$must_break | |
14728 | && ( $i_test == $i_begin ) | |
14729 | && ( $i_test < $imax ) | |
14730 | && ( $token eq $type ) | |
14731 | && ( | |
14732 | ( | |
14733 | $leading_spaces + $lengths_to_go[ $i_test + 1 ] - | |
14734 | $starting_sum | |
14735 | ) <= $rOpts_maximum_line_length | |
14736 | ) | |
14737 | ) | |
14738 | { | |
14739 | $i_test++; | |
14740 | ||
14741 | if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) { | |
14742 | $i_test++; | |
14743 | } | |
14744 | redo; | |
14745 | } | |
14746 | ||
14747 | if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) | |
14748 | { | |
14749 | ||
14750 | # break at previous best break if it would have produced | |
14751 | # a leading alignment of certain common tokens, and it | |
14752 | # is different from the latest candidate break | |
14753 | last | |
14754 | if ($leading_alignment_type); | |
14755 | ||
14756 | # Force at least one breakpoint if old code had good | |
14757 | # break It is only called if a breakpoint is required or | |
14758 | # desired. This will probably need some adjustments | |
14759 | # over time. A goal is to try to be sure that, if a new | |
14760 | # side comment is introduced into formated text, then | |
14761 | # the same breakpoints will occur. scbreak.t | |
14762 | last | |
14763 | if ( | |
14764 | $i_test == $imax # we are at the end | |
14765 | && !$forced_breakpoint_count # | |
14766 | && $saw_good_break # old line had good break | |
14767 | && $type =~ /^[#;\{]$/ # and this line ends in | |
14768 | # ';' or side comment | |
14769 | && $i_last_break < 0 # and we haven't made a break | |
14770 | && $i_lowest > 0 # and we saw a possible break | |
14771 | && $i_lowest < $imax - 1 # (but not just before this ;) | |
14772 | && $strength - $lowest_strength < 0.5 * WEAK # and it's good | |
14773 | ); | |
14774 | ||
14775 | $lowest_strength = $strength; | |
14776 | $i_lowest = $i_test; | |
14777 | $lowest_next_token = $next_nonblank_token; | |
14778 | $lowest_next_type = $next_nonblank_type; | |
14779 | $i_lowest_next_nonblank = $i_next_nonblank; | |
14780 | last if $must_break; | |
14781 | ||
14782 | # set flags to remember if a break here will produce a | |
14783 | # leading alignment of certain common tokens | |
14784 | if ( | |
14785 | $line_count > 0 | |
14786 | && $i_test < $imax | |
14787 | && ( $lowest_strength - $last_break_strength <= $max_bias ) | |
14788 | && ( $nesting_depth_to_go[$i_begin] >= | |
14789 | $nesting_depth_to_go[$i_next_nonblank] ) | |
14790 | && ( | |
14791 | ( | |
14792 | $types_to_go[$i_begin] =~ /^(\.|\&\&|\|\||:)$/ | |
14793 | && $types_to_go[$i_begin] eq $next_nonblank_type | |
14794 | ) | |
14795 | || ( $tokens_to_go[$i_begin] =~ /^(and|or)$/ | |
14796 | && $tokens_to_go[$i_begin] eq $next_nonblank_token ) | |
14797 | ) | |
14798 | ) | |
14799 | { | |
14800 | $leading_alignment_token = $next_nonblank_token; | |
14801 | $leading_alignment_type = $next_nonblank_type; | |
14802 | } | |
14803 | } | |
14804 | ||
14805 | my $too_long = | |
14806 | ( $i_test >= $imax ) | |
14807 | ? 1 | |
14808 | : ( | |
14809 | ( | |
14810 | $leading_spaces + $lengths_to_go[ $i_test + 2 ] - | |
14811 | $starting_sum | |
14812 | ) > $rOpts_maximum_line_length | |
14813 | ); | |
14814 | ||
14815 | FORMATTER_DEBUG_FLAG_BREAK | |
14816 | ||
14817 | "BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n"; | |
14818 | ||
14819 | # allow one extra terminal token after exceeding line length | |
14820 | # if it would strand this token. | |
14821 | if ( $rOpts_fuzzy_line_length | |
14822 | && $too_long | |
14823 | && ( $i_lowest == $i_test ) | |
14824 | && ( length($token) > 1 ) | |
14825 | && ( $next_nonblank_type =~ /^[\;\,]$/ ) ) | |
14826 | { | |
14827 | $too_long = 0; | |
14828 | } | |
14829 | ||
14830 | last | |
14831 | if ( | |
14832 | ( $i_test == $imax ) # we're done if no more tokens, | |
14833 | || ( | |
14834 | ( $i_lowest >= 0 ) # or no more space and we have a break | |
14835 | && $too_long | |
14836 | ) | |
14837 | ); | |
14838 | } | |
14839 | ||
14840 | # it's always ok to break at imax if no other break was found | |
14841 | if ( $i_lowest < 0 ) { $i_lowest = $imax } | |
14842 | ||
14843 | # semi-final index calculation | |
14844 | my $i_next_nonblank = ( | |
14845 | ( $types_to_go[ $i_lowest + 1 ] eq 'b' ) | |
14846 | ? $i_lowest + 2 | |
14847 | : $i_lowest + 1 | |
14848 | ); | |
14849 | my $next_nonblank_type = $types_to_go[$i_next_nonblank]; | |
14850 | my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; | |
14851 | ||
14852 | #------------------------------------------------------- | |
14853 | # ?/: rule 1 : if a break here will separate a '?' on this | |
14854 | # line from its closing ':', then break at the '?' instead. | |
14855 | #------------------------------------------------------- | |
14856 | my $i; | |
14857 | foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) { | |
14858 | next unless ( $tokens_to_go[$i] eq '?' ); | |
14859 | ||
14860 | # do not break if probable sequence of ?/: statements | |
14861 | next if ($is_colon_chain); | |
14862 | ||
14863 | # do not break if statement is broken by side comment | |
14864 | next | |
14865 | if ( | |
14866 | $tokens_to_go[$max_index_to_go] eq '#' | |
14867 | && terminal_type( \@types_to_go, \@block_type_to_go, 0, | |
14868 | $max_index_to_go ) !~ /^[\;\}]$/ | |
14869 | ); | |
14870 | ||
14871 | # no break needed if matching : is also on the line | |
14872 | next | |
14873 | if ( $mate_index_to_go[$i] >= 0 | |
14874 | && $mate_index_to_go[$i] <= $i_next_nonblank ); | |
14875 | ||
14876 | $i_lowest = $i; | |
14877 | if ( $want_break_before{'?'} ) { $i_lowest-- } | |
14878 | last; | |
14879 | } | |
14880 | ||
14881 | # final index calculation | |
14882 | $i_next_nonblank = ( | |
14883 | ( $types_to_go[ $i_lowest + 1 ] eq 'b' ) | |
14884 | ? $i_lowest + 2 | |
14885 | : $i_lowest + 1 | |
14886 | ); | |
14887 | $next_nonblank_type = $types_to_go[$i_next_nonblank]; | |
14888 | $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; | |
14889 | ||
14890 | FORMATTER_DEBUG_FLAG_BREAK | |
14891 | && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n"; | |
14892 | ||
14893 | #------------------------------------------------------- | |
14894 | # ?/: rule 2 : if we break at a '?', then break at its ':' | |
14895 | # | |
14896 | # Note: this rule is also in sub scan_list to handle a break | |
14897 | # at the start and end of a line (in case breaks are dictated | |
14898 | # by side comments). | |
14899 | #------------------------------------------------------- | |
14900 | if ( $next_nonblank_type eq '?' ) { | |
14901 | set_closing_breakpoint($i_next_nonblank); | |
14902 | } | |
14903 | elsif ( $types_to_go[$i_lowest] eq '?' ) { | |
14904 | set_closing_breakpoint($i_lowest); | |
14905 | } | |
14906 | ||
14907 | #------------------------------------------------------- | |
14908 | # ?/: rule 3 : if we break at a ':' then we save | |
14909 | # its location for further work below. We may need to go | |
14910 | # back and break at its '?'. | |
14911 | #------------------------------------------------------- | |
14912 | if ( $next_nonblank_type eq ':' ) { | |
14913 | push @i_colon_breaks, $i_next_nonblank; | |
14914 | } | |
14915 | elsif ( $types_to_go[$i_lowest] eq ':' ) { | |
14916 | push @i_colon_breaks, $i_lowest; | |
14917 | } | |
14918 | ||
14919 | # here we should set breaks for all '?'/':' pairs which are | |
14920 | # separated by this line | |
14921 | ||
14922 | $line_count++; | |
14923 | ||
14924 | # save this line segment, after trimming blanks at the ends | |
14925 | push( @i_first, | |
14926 | ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); | |
14927 | push( @i_last, | |
14928 | ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); | |
14929 | ||
14930 | # set a forced breakpoint at a container opening, if necessary, to | |
14931 | # signal a break at a closing container. Excepting '(' for now. | |
14932 | if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/ | |
14933 | && !$forced_breakpoint_to_go[$i_lowest] ) | |
14934 | { | |
14935 | set_closing_breakpoint($i_lowest); | |
14936 | } | |
14937 | ||
14938 | # get ready to go again | |
14939 | $i_begin = $i_lowest + 1; | |
14940 | $last_break_strength = $lowest_strength; | |
14941 | $i_last_break = $i_lowest; | |
14942 | $leading_alignment_token = ""; | |
14943 | $leading_alignment_type = ""; | |
14944 | $lowest_next_token = ''; | |
14945 | $lowest_next_type = 'b'; | |
14946 | ||
14947 | if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { | |
14948 | $i_begin++; | |
14949 | } | |
14950 | ||
14951 | # update indentation size | |
14952 | if ( $i_begin <= $imax ) { | |
14953 | $leading_spaces = leading_spaces_to_go($i_begin); | |
14954 | } | |
14955 | } | |
14956 | ||
14957 | #------------------------------------------------------- | |
14958 | # ?/: rule 4 -- if we broke at a ':', then break at | |
14959 | # corresponding '?' unless this is a chain of ?: expressions | |
14960 | #------------------------------------------------------- | |
14961 | if (@i_colon_breaks) { | |
14962 | ||
14963 | # using a simple method for deciding if we are in a ?/: chain -- | |
14964 | # this is a chain if it has multiple ?/: pairs all in order; | |
14965 | # otherwise not. | |
14966 | # Note that if line starts in a ':' we count that above as a break | |
14967 | my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); | |
14968 | ||
14969 | unless ($is_chain) { | |
14970 | my @insert_list = (); | |
14971 | foreach (@i_colon_breaks) { | |
14972 | my $i_question = $mate_index_to_go[$_]; | |
14973 | if ( $i_question >= 0 ) { | |
14974 | if ( $want_break_before{'?'} ) { | |
14975 | $i_question--; | |
14976 | if ( $i_question > 0 | |
14977 | && $types_to_go[$i_question] eq 'b' ) | |
14978 | { | |
14979 | $i_question--; | |
14980 | } | |
14981 | } | |
14982 | ||
14983 | if ( $i_question >= 0 ) { | |
14984 | push @insert_list, $i_question; | |
14985 | } | |
14986 | } | |
14987 | insert_additional_breaks( \@insert_list, \@i_first, \@i_last ); | |
14988 | } | |
14989 | } | |
14990 | } | |
14991 | return \@i_first, \@i_last; | |
14992 | } | |
14993 | ||
14994 | sub insert_additional_breaks { | |
14995 | ||
14996 | # this routine will add line breaks at requested locations after | |
14997 | # sub set_continuation_breaks has made preliminary breaks. | |
14998 | ||
14999 | my ( $ri_break_list, $ri_first, $ri_last ) = @_; | |
15000 | my $i_f; | |
15001 | my $i_l; | |
15002 | my $line_number = 0; | |
15003 | my $i_break_left; | |
15004 | foreach $i_break_left ( sort @$ri_break_list ) { | |
15005 | ||
15006 | $i_f = $$ri_first[$line_number]; | |
15007 | $i_l = $$ri_last[$line_number]; | |
15008 | while ( $i_break_left >= $i_l ) { | |
15009 | $line_number++; | |
15010 | ||
15011 | # shouldn't happen unless caller passes bad indexes | |
15012 | if ( $line_number >= @$ri_last ) { | |
15013 | warning( | |
15014 | "Non-fatal program bug: couldn't set break at $i_break_left\n" | |
15015 | ); | |
15016 | report_definite_bug(); | |
15017 | return; | |
15018 | } | |
15019 | $i_f = $$ri_first[$line_number]; | |
15020 | $i_l = $$ri_last[$line_number]; | |
15021 | } | |
15022 | ||
15023 | my $i_break_right = $i_break_left + 1; | |
15024 | if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ } | |
15025 | ||
15026 | if ( $i_break_left >= $i_f | |
15027 | && $i_break_left < $i_l | |
15028 | && $i_break_right > $i_f | |
15029 | && $i_break_right <= $i_l ) | |
15030 | { | |
15031 | splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) ); | |
15032 | splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) ); | |
15033 | } | |
15034 | } | |
15035 | } | |
15036 | ||
15037 | sub set_closing_breakpoint { | |
15038 | ||
15039 | # set a breakpoint at a matching closing token | |
15040 | # at present, this is only used to break at a ':' which matches a '?' | |
15041 | my $i_break = shift; | |
15042 | ||
15043 | if ( $mate_index_to_go[$i_break] >= 0 ) { | |
15044 | ||
15045 | # CAUTION: infinite recursion possible here: | |
15046 | # set_closing_breakpoint calls set_forced_breakpoint, and | |
15047 | # set_forced_breakpoint call set_closing_breakpoint | |
15048 | # ( test files attrib.t, BasicLyx.pm.html). | |
15049 | # Don't reduce the '2' in the statement below | |
15050 | if ( $mate_index_to_go[$i_break] > $i_break + 2 ) { | |
15051 | ||
15052 | # break before } ] and ), but sub set_forced_breakpoint will decide | |
15053 | # to break before or after a ? and : | |
15054 | my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1; | |
15055 | set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc ); | |
15056 | } | |
15057 | } | |
15058 | else { | |
15059 | my $type_sequence = $type_sequence_to_go[$i_break]; | |
15060 | if ($type_sequence) { | |
15061 | my $closing_token = $matching_token{ $tokens_to_go[$i_break] }; | |
15062 | $postponed_breakpoint{$type_sequence} = 1; | |
15063 | } | |
15064 | } | |
15065 | } | |
15066 | ||
15067 | # check to see if output line tabbing agrees with input line | |
15068 | # this can be very useful for debugging a script which has an extra | |
15069 | # or missing brace | |
15070 | sub compare_indentation_levels { | |
15071 | ||
15072 | my ( $python_indentation_level, $structural_indentation_level ) = @_; | |
15073 | if ( ( $python_indentation_level ne $structural_indentation_level ) ) { | |
15074 | $last_tabbing_disagreement = $input_line_number; | |
15075 | ||
15076 | if ($in_tabbing_disagreement) { | |
15077 | } | |
15078 | else { | |
15079 | $tabbing_disagreement_count++; | |
15080 | ||
15081 | if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { | |
15082 | write_logfile_entry( | |
15083 | "Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n" | |
15084 | ); | |
15085 | } | |
15086 | $in_tabbing_disagreement = $input_line_number; | |
15087 | $first_tabbing_disagreement = $in_tabbing_disagreement | |
15088 | unless ($first_tabbing_disagreement); | |
15089 | } | |
15090 | } | |
15091 | else { | |
15092 | ||
15093 | if ($in_tabbing_disagreement) { | |
15094 | ||
15095 | if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { | |
15096 | write_logfile_entry( | |
15097 | "End indentation disagreement from input line $in_tabbing_disagreement\n" | |
15098 | ); | |
15099 | ||
15100 | if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) { | |
15101 | write_logfile_entry( | |
15102 | "No further tabbing disagreements will be noted\n"); | |
15103 | } | |
15104 | } | |
15105 | $in_tabbing_disagreement = 0; | |
15106 | } | |
15107 | } | |
15108 | } | |
15109 | ||
15110 | ##################################################################### | |
15111 | # | |
15112 | # the Perl::Tidy::IndentationItem class supplies items which contain | |
15113 | # how much whitespace should be used at the start of a line | |
15114 | # | |
15115 | ##################################################################### | |
15116 | ||
15117 | package Perl::Tidy::IndentationItem; | |
15118 | ||
15119 | # Indexes for indentation items | |
15120 | use constant SPACES => 0; # total leading white spaces | |
15121 | use constant LEVEL => 1; # the indentation 'level' | |
15122 | use constant CI_LEVEL => 2; # the 'continuation level' | |
15123 | use constant AVAILABLE_SPACES => 3; # how many left spaces available | |
15124 | # for this level | |
15125 | use constant CLOSED => 4; # index where we saw closing '}' | |
15126 | use constant COMMA_COUNT => 5; # how many commas at this level? | |
15127 | use constant SEQUENCE_NUMBER => 6; # output batch number | |
15128 | use constant INDEX => 7; # index in output batch list | |
15129 | use constant HAVE_CHILD => 8; # any dependents? | |
15130 | use constant RECOVERABLE_SPACES => 9; # how many spaces to the right | |
15131 | # we would like to move to get | |
15132 | # alignment (negative if left) | |
15133 | use constant ALIGN_PAREN => 10; # do we want to try to align | |
15134 | # with an opening structure? | |
15135 | use constant MARKED => 11; # if visited by corrector logic | |
15136 | use constant STACK_DEPTH => 12; # indentation nesting depth | |
15137 | use constant STARTING_INDEX => 13; # first token index of this level | |
15138 | use constant ARROW_COUNT => 14; # how many =>'s | |
15139 | ||
15140 | sub new { | |
15141 | ||
15142 | # Create an 'indentation_item' which describes one level of leading | |
15143 | # whitespace when the '-lp' indentation is used. We return | |
15144 | # a reference to an anonymous array of associated variables. | |
15145 | # See above constants for storage scheme. | |
15146 | my ( | |
15147 | $class, $spaces, $level, | |
15148 | $ci_level, $available_spaces, $index, | |
15149 | $gnu_sequence_number, $align_paren, $stack_depth, | |
15150 | $starting_index, | |
15151 | ) | |
15152 | = @_; | |
15153 | my $closed = -1; | |
15154 | my $arrow_count = 0; | |
15155 | my $comma_count = 0; | |
15156 | my $have_child = 0; | |
15157 | my $want_right_spaces = 0; | |
15158 | my $marked = 0; | |
15159 | bless [ | |
15160 | $spaces, $level, $ci_level, | |
15161 | $available_spaces, $closed, $comma_count, | |
15162 | $gnu_sequence_number, $index, $have_child, | |
15163 | $want_right_spaces, $align_paren, $marked, | |
15164 | $stack_depth, $starting_index, $arrow_count, | |
15165 | ], $class; | |
15166 | } | |
15167 | ||
15168 | sub permanently_decrease_AVAILABLE_SPACES { | |
15169 | ||
15170 | # make a permanent reduction in the available indentation spaces | |
15171 | # at one indentation item. NOTE: if there are child nodes, their | |
15172 | # total SPACES must be reduced by the caller. | |
15173 | ||
15174 | my ( $item, $spaces_needed ) = @_; | |
15175 | my $available_spaces = $item->get_AVAILABLE_SPACES(); | |
15176 | my $deleted_spaces = | |
15177 | ( $available_spaces > $spaces_needed ) | |
15178 | ? $spaces_needed | |
15179 | : $available_spaces; | |
15180 | $item->decrease_AVAILABLE_SPACES($deleted_spaces); | |
15181 | $item->decrease_SPACES($deleted_spaces); | |
15182 | $item->set_RECOVERABLE_SPACES(0); | |
15183 | ||
15184 | return $deleted_spaces; | |
15185 | } | |
15186 | ||
15187 | sub tentatively_decrease_AVAILABLE_SPACES { | |
15188 | ||
15189 | # We are asked to tentatively delete $spaces_needed of indentation | |
15190 | # for a indentation item. We may want to undo this later. NOTE: if | |
15191 | # there are child nodes, their total SPACES must be reduced by the | |
15192 | # caller. | |
15193 | my ( $item, $spaces_needed ) = @_; | |
15194 | my $available_spaces = $item->get_AVAILABLE_SPACES(); | |
15195 | my $deleted_spaces = | |
15196 | ( $available_spaces > $spaces_needed ) | |
15197 | ? $spaces_needed | |
15198 | : $available_spaces; | |
15199 | $item->decrease_AVAILABLE_SPACES($deleted_spaces); | |
15200 | $item->decrease_SPACES($deleted_spaces); | |
15201 | $item->increase_RECOVERABLE_SPACES($deleted_spaces); | |
15202 | return $deleted_spaces; | |
15203 | } | |
15204 | ||
15205 | sub get_STACK_DEPTH { | |
15206 | my $self = shift; | |
15207 | return $self->[STACK_DEPTH]; | |
15208 | } | |
15209 | ||
15210 | sub get_SPACES { | |
15211 | my $self = shift; | |
15212 | return $self->[SPACES]; | |
15213 | } | |
15214 | ||
15215 | sub get_MARKED { | |
15216 | my $self = shift; | |
15217 | return $self->[MARKED]; | |
15218 | } | |
15219 | ||
15220 | sub set_MARKED { | |
15221 | my ( $self, $value ) = @_; | |
15222 | if ( defined($value) ) { | |
15223 | $self->[MARKED] = $value; | |
15224 | } | |
15225 | return $self->[MARKED]; | |
15226 | } | |
15227 | ||
15228 | sub get_AVAILABLE_SPACES { | |
15229 | my $self = shift; | |
15230 | return $self->[AVAILABLE_SPACES]; | |
15231 | } | |
15232 | ||
15233 | sub decrease_SPACES { | |
15234 | my ( $self, $value ) = @_; | |
15235 | if ( defined($value) ) { | |
15236 | $self->[SPACES] -= $value; | |
15237 | } | |
15238 | return $self->[SPACES]; | |
15239 | } | |
15240 | ||
15241 | sub decrease_AVAILABLE_SPACES { | |
15242 | my ( $self, $value ) = @_; | |
15243 | if ( defined($value) ) { | |
15244 | $self->[AVAILABLE_SPACES] -= $value; | |
15245 | } | |
15246 | return $self->[AVAILABLE_SPACES]; | |
15247 | } | |
15248 | ||
15249 | sub get_ALIGN_PAREN { | |
15250 | my $self = shift; | |
15251 | return $self->[ALIGN_PAREN]; | |
15252 | } | |
15253 | ||
15254 | sub get_RECOVERABLE_SPACES { | |
15255 | my $self = shift; | |
15256 | return $self->[RECOVERABLE_SPACES]; | |
15257 | } | |
15258 | ||
15259 | sub set_RECOVERABLE_SPACES { | |
15260 | my ( $self, $value ) = @_; | |
15261 | if ( defined($value) ) { | |
15262 | $self->[RECOVERABLE_SPACES] = $value; | |
15263 | } | |
15264 | return $self->[RECOVERABLE_SPACES]; | |
15265 | } | |
15266 | ||
15267 | sub increase_RECOVERABLE_SPACES { | |
15268 | my ( $self, $value ) = @_; | |
15269 | if ( defined($value) ) { | |
15270 | $self->[RECOVERABLE_SPACES] += $value; | |
15271 | } | |
15272 | return $self->[RECOVERABLE_SPACES]; | |
15273 | } | |
15274 | ||
15275 | sub get_CI_LEVEL { | |
15276 | my $self = shift; | |
15277 | return $self->[CI_LEVEL]; | |
15278 | } | |
15279 | ||
15280 | sub get_LEVEL { | |
15281 | my $self = shift; | |
15282 | return $self->[LEVEL]; | |
15283 | } | |
15284 | ||
15285 | sub get_SEQUENCE_NUMBER { | |
15286 | my $self = shift; | |
15287 | return $self->[SEQUENCE_NUMBER]; | |
15288 | } | |
15289 | ||
15290 | sub get_INDEX { | |
15291 | my $self = shift; | |
15292 | return $self->[INDEX]; | |
15293 | } | |
15294 | ||
15295 | sub get_STARTING_INDEX { | |
15296 | my $self = shift; | |
15297 | return $self->[STARTING_INDEX]; | |
15298 | } | |
15299 | ||
15300 | sub set_HAVE_CHILD { | |
15301 | my ( $self, $value ) = @_; | |
15302 | if ( defined($value) ) { | |
15303 | $self->[HAVE_CHILD] = $value; | |
15304 | } | |
15305 | return $self->[HAVE_CHILD]; | |
15306 | } | |
15307 | ||
15308 | sub get_HAVE_CHILD { | |
15309 | my $self = shift; | |
15310 | return $self->[HAVE_CHILD]; | |
15311 | } | |
15312 | ||
15313 | sub set_ARROW_COUNT { | |
15314 | my ( $self, $value ) = @_; | |
15315 | if ( defined($value) ) { | |
15316 | $self->[ARROW_COUNT] = $value; | |
15317 | } | |
15318 | return $self->[ARROW_COUNT]; | |
15319 | } | |
15320 | ||
15321 | sub get_ARROW_COUNT { | |
15322 | my $self = shift; | |
15323 | return $self->[ARROW_COUNT]; | |
15324 | } | |
15325 | ||
15326 | sub set_COMMA_COUNT { | |
15327 | my ( $self, $value ) = @_; | |
15328 | if ( defined($value) ) { | |
15329 | $self->[COMMA_COUNT] = $value; | |
15330 | } | |
15331 | return $self->[COMMA_COUNT]; | |
15332 | } | |
15333 | ||
15334 | sub get_COMMA_COUNT { | |
15335 | my $self = shift; | |
15336 | return $self->[COMMA_COUNT]; | |
15337 | } | |
15338 | ||
15339 | sub set_CLOSED { | |
15340 | my ( $self, $value ) = @_; | |
15341 | if ( defined($value) ) { | |
15342 | $self->[CLOSED] = $value; | |
15343 | } | |
15344 | return $self->[CLOSED]; | |
15345 | } | |
15346 | ||
15347 | sub get_CLOSED { | |
15348 | my $self = shift; | |
15349 | return $self->[CLOSED]; | |
15350 | } | |
15351 | ||
15352 | ##################################################################### | |
15353 | # | |
15354 | # the Perl::Tidy::VerticalAligner::Line class supplies an object to | |
15355 | # contain a single output line | |
15356 | # | |
15357 | ##################################################################### | |
15358 | ||
15359 | package Perl::Tidy::VerticalAligner::Line; | |
15360 | ||
15361 | { | |
15362 | ||
15363 | use strict; | |
15364 | use Carp; | |
15365 | ||
15366 | use constant JMAX => 0; | |
15367 | use constant JMAX_ORIGINAL_LINE => 1; | |
15368 | use constant RTOKENS => 2; | |
15369 | use constant RFIELDS => 3; | |
15370 | use constant RPATTERNS => 4; | |
15371 | use constant INDENTATION => 5; | |
15372 | use constant LEADING_SPACE_COUNT => 6; | |
15373 | use constant OUTDENT_LONG_LINES => 7; | |
15374 | use constant LIST_TYPE => 8; | |
15375 | use constant IS_HANGING_SIDE_COMMENT => 9; | |
15376 | use constant RALIGNMENTS => 10; | |
15377 | use constant MAXIMUM_LINE_LENGTH => 11; | |
15378 | use constant RVERTICAL_TIGHTNESS_FLAGS => 12; | |
15379 | ||
15380 | my %_index_map; | |
15381 | $_index_map{jmax} = JMAX; | |
15382 | $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE; | |
15383 | $_index_map{rtokens} = RTOKENS; | |
15384 | $_index_map{rfields} = RFIELDS; | |
15385 | $_index_map{rpatterns} = RPATTERNS; | |
15386 | $_index_map{indentation} = INDENTATION; | |
15387 | $_index_map{leading_space_count} = LEADING_SPACE_COUNT; | |
15388 | $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES; | |
15389 | $_index_map{list_type} = LIST_TYPE; | |
15390 | $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT; | |
15391 | $_index_map{ralignments} = RALIGNMENTS; | |
15392 | $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH; | |
15393 | $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS; | |
15394 | ||
15395 | my @_default_data = (); | |
15396 | $_default_data[JMAX] = undef; | |
15397 | $_default_data[JMAX_ORIGINAL_LINE] = undef; | |
15398 | $_default_data[RTOKENS] = undef; | |
15399 | $_default_data[RFIELDS] = undef; | |
15400 | $_default_data[RPATTERNS] = undef; | |
15401 | $_default_data[INDENTATION] = undef; | |
15402 | $_default_data[LEADING_SPACE_COUNT] = undef; | |
15403 | $_default_data[OUTDENT_LONG_LINES] = undef; | |
15404 | $_default_data[LIST_TYPE] = undef; | |
15405 | $_default_data[IS_HANGING_SIDE_COMMENT] = undef; | |
15406 | $_default_data[RALIGNMENTS] = []; | |
15407 | $_default_data[MAXIMUM_LINE_LENGTH] = undef; | |
15408 | $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef; | |
15409 | ||
15410 | { | |
15411 | ||
15412 | # methods to count object population | |
15413 | my $_count = 0; | |
15414 | sub get_count { $_count; } | |
15415 | sub _increment_count { ++$_count } | |
15416 | sub _decrement_count { --$_count } | |
15417 | } | |
15418 | ||
15419 | # Constructor may be called as a class method | |
15420 | sub new { | |
15421 | my ( $caller, %arg ) = @_; | |
15422 | my $caller_is_obj = ref($caller); | |
15423 | my $class = $caller_is_obj || $caller; | |
15424 | no strict "refs"; | |
15425 | my $self = bless [], $class; | |
15426 | ||
15427 | $self->[RALIGNMENTS] = []; | |
15428 | ||
15429 | my $index; | |
15430 | foreach ( keys %_index_map ) { | |
15431 | $index = $_index_map{$_}; | |
15432 | if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} } | |
15433 | elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } | |
15434 | else { $self->[$index] = $_default_data[$index] } | |
15435 | } | |
15436 | ||
15437 | $self->_increment_count(); | |
15438 | return $self; | |
15439 | } | |
15440 | ||
15441 | sub DESTROY { | |
15442 | $_[0]->_decrement_count(); | |
15443 | } | |
15444 | ||
15445 | sub get_jmax { $_[0]->[JMAX] } | |
15446 | sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] } | |
15447 | sub get_rtokens { $_[0]->[RTOKENS] } | |
15448 | sub get_rfields { $_[0]->[RFIELDS] } | |
15449 | sub get_rpatterns { $_[0]->[RPATTERNS] } | |
15450 | sub get_indentation { $_[0]->[INDENTATION] } | |
15451 | sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] } | |
15452 | sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] } | |
15453 | sub get_list_type { $_[0]->[LIST_TYPE] } | |
15454 | sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] } | |
15455 | sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] } | |
15456 | ||
15457 | sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) } | |
15458 | sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] } | |
15459 | sub get_alignments { @{ $_[0]->[RALIGNMENTS] } } | |
15460 | sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() } | |
15461 | ||
15462 | sub get_starting_column { | |
15463 | $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column(); | |
15464 | } | |
15465 | ||
15466 | sub increment_column { | |
15467 | $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] ); | |
15468 | } | |
15469 | sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; } | |
15470 | ||
15471 | sub current_field_width { | |
15472 | my $self = shift; | |
15473 | my ($j) = @_; | |
15474 | if ( $j == 0 ) { | |
15475 | return $self->get_column($j); | |
15476 | } | |
15477 | else { | |
15478 | return $self->get_column($j) - $self->get_column( $j - 1 ); | |
15479 | } | |
15480 | } | |
15481 | ||
15482 | sub field_width_growth { | |
15483 | my $self = shift; | |
15484 | my $j = shift; | |
15485 | return $self->get_column($j) - $self->get_starting_column($j); | |
15486 | } | |
15487 | ||
15488 | sub starting_field_width { | |
15489 | my $self = shift; | |
15490 | my $j = shift; | |
15491 | if ( $j == 0 ) { | |
15492 | return $self->get_starting_column($j); | |
15493 | } | |
15494 | else { | |
15495 | return $self->get_starting_column($j) - | |
15496 | $self->get_starting_column( $j - 1 ); | |
15497 | } | |
15498 | } | |
15499 | ||
15500 | sub increase_field_width { | |
15501 | ||
15502 | my $self = shift; | |
15503 | my ( $j, $pad ) = @_; | |
15504 | my $jmax = $self->get_jmax(); | |
15505 | for my $k ( $j .. $jmax ) { | |
15506 | $self->increment_column( $k, $pad ); | |
15507 | } | |
15508 | } | |
15509 | ||
15510 | sub get_available_space_on_right { | |
15511 | my $self = shift; | |
15512 | my $jmax = $self->get_jmax(); | |
15513 | return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax); | |
15514 | } | |
15515 | ||
15516 | sub set_jmax { $_[0]->[JMAX] = $_[1] } | |
15517 | sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] } | |
15518 | sub set_rtokens { $_[0]->[RTOKENS] = $_[1] } | |
15519 | sub set_rfields { $_[0]->[RFIELDS] = $_[1] } | |
15520 | sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] } | |
15521 | sub set_indentation { $_[0]->[INDENTATION] = $_[1] } | |
15522 | sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] } | |
15523 | sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] } | |
15524 | sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] } | |
15525 | sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] } | |
15526 | sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] } | |
15527 | ||
15528 | } | |
15529 | ||
15530 | ##################################################################### | |
15531 | # | |
15532 | # the Perl::Tidy::VerticalAligner::Alignment class holds information | |
15533 | # on a single column being aligned | |
15534 | # | |
15535 | ##################################################################### | |
15536 | package Perl::Tidy::VerticalAligner::Alignment; | |
15537 | ||
15538 | { | |
15539 | ||
15540 | use strict; | |
15541 | ||
15542 | #use Carp; | |
15543 | ||
15544 | # Symbolic array indexes | |
15545 | use constant COLUMN => 0; # the current column number | |
15546 | use constant STARTING_COLUMN => 1; # column number when created | |
15547 | use constant MATCHING_TOKEN => 2; # what token we are matching | |
15548 | use constant STARTING_LINE => 3; # the line index of creation | |
15549 | use constant ENDING_LINE => 4; # the most recent line to use it | |
15550 | use constant SAVED_COLUMN => 5; # the most recent line to use it | |
15551 | use constant SERIAL_NUMBER => 6; # unique number for this alignment | |
15552 | # (just its index in an array) | |
15553 | ||
15554 | # Correspondence between variables and array indexes | |
15555 | my %_index_map; | |
15556 | $_index_map{column} = COLUMN; | |
15557 | $_index_map{starting_column} = STARTING_COLUMN; | |
15558 | $_index_map{matching_token} = MATCHING_TOKEN; | |
15559 | $_index_map{starting_line} = STARTING_LINE; | |
15560 | $_index_map{ending_line} = ENDING_LINE; | |
15561 | $_index_map{saved_column} = SAVED_COLUMN; | |
15562 | $_index_map{serial_number} = SERIAL_NUMBER; | |
15563 | ||
15564 | my @_default_data = (); | |
15565 | $_default_data[COLUMN] = undef; | |
15566 | $_default_data[STARTING_COLUMN] = undef; | |
15567 | $_default_data[MATCHING_TOKEN] = undef; | |
15568 | $_default_data[STARTING_LINE] = undef; | |
15569 | $_default_data[ENDING_LINE] = undef; | |
15570 | $_default_data[SAVED_COLUMN] = undef; | |
15571 | $_default_data[SERIAL_NUMBER] = undef; | |
15572 | ||
15573 | # class population count | |
15574 | { | |
15575 | my $_count = 0; | |
15576 | sub get_count { $_count; } | |
15577 | sub _increment_count { ++$_count } | |
15578 | sub _decrement_count { --$_count } | |
15579 | } | |
15580 | ||
15581 | # constructor | |
15582 | sub new { | |
15583 | my ( $caller, %arg ) = @_; | |
15584 | my $caller_is_obj = ref($caller); | |
15585 | my $class = $caller_is_obj || $caller; | |
15586 | no strict "refs"; | |
15587 | my $self = bless [], $class; | |
15588 | ||
15589 | foreach ( keys %_index_map ) { | |
15590 | my $index = $_index_map{$_}; | |
15591 | if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} } | |
15592 | elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } | |
15593 | else { $self->[$index] = $_default_data[$index] } | |
15594 | } | |
15595 | $self->_increment_count(); | |
15596 | return $self; | |
15597 | } | |
15598 | ||
15599 | sub DESTROY { | |
15600 | $_[0]->_decrement_count(); | |
15601 | } | |
15602 | ||
15603 | sub get_column { return $_[0]->[COLUMN] } | |
15604 | sub get_starting_column { return $_[0]->[STARTING_COLUMN] } | |
15605 | sub get_matching_token { return $_[0]->[MATCHING_TOKEN] } | |
15606 | sub get_starting_line { return $_[0]->[STARTING_LINE] } | |
15607 | sub get_ending_line { return $_[0]->[ENDING_LINE] } | |
15608 | sub get_serial_number { return $_[0]->[SERIAL_NUMBER] } | |
15609 | ||
15610 | sub set_column { $_[0]->[COLUMN] = $_[1] } | |
15611 | sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] } | |
15612 | sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] } | |
15613 | sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] } | |
15614 | sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] } | |
15615 | sub increment_column { $_[0]->[COLUMN] += $_[1] } | |
15616 | ||
15617 | sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] } | |
15618 | sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] } | |
15619 | ||
15620 | } | |
15621 | ||
15622 | package Perl::Tidy::VerticalAligner; | |
15623 | ||
15624 | # The Perl::Tidy::VerticalAligner package collects output lines and | |
15625 | # attempts to line up certain common tokens, such as => and #, which are | |
15626 | # identified by the calling routine. | |
15627 | # | |
15628 | # There are two main routines: append_line and flush. Append acts as a | |
15629 | # storage buffer, collecting lines into a group which can be vertically | |
15630 | # aligned. When alignment is no longer possible or desirable, it dumps | |
15631 | # the group to flush. | |
15632 | # | |
15633 | # append_line -----> flush | |
15634 | # | |
15635 | # collects writes | |
15636 | # vertical one | |
15637 | # groups group | |
15638 | ||
15639 | BEGIN { | |
15640 | ||
15641 | # Caution: these debug flags produce a lot of output | |
15642 | # They should all be 0 except when debugging small scripts | |
15643 | ||
15644 | use constant VALIGN_DEBUG_FLAG_APPEND => 0; | |
15645 | use constant VALIGN_DEBUG_FLAG_APPEND0 => 0; | |
15646 | ||
15647 | my $debug_warning = sub { | |
15648 | print "VALIGN_DEBUGGING with key $_[0]\n"; | |
15649 | }; | |
15650 | ||
15651 | VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND'); | |
15652 | VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0'); | |
15653 | ||
15654 | } | |
15655 | ||
15656 | use vars qw( | |
15657 | $vertical_aligner_self | |
15658 | $current_line | |
15659 | $maximum_alignment_index | |
15660 | $ralignment_list | |
15661 | $maximum_jmax_seen | |
15662 | $minimum_jmax_seen | |
15663 | $previous_minimum_jmax_seen | |
15664 | $previous_maximum_jmax_seen | |
15665 | $maximum_line_index | |
15666 | $group_level | |
15667 | $group_type | |
15668 | $group_maximum_gap | |
15669 | $marginal_match | |
15670 | $last_group_level_written | |
15671 | $last_leading_space_count | |
15672 | $extra_indent_ok | |
15673 | $zero_count | |
15674 | @group_lines | |
15675 | $last_comment_column | |
15676 | $last_side_comment_line_number | |
15677 | $last_side_comment_length | |
15678 | $last_side_comment_level | |
15679 | $outdented_line_count | |
15680 | $first_outdented_line_at | |
15681 | $last_outdented_line_at | |
15682 | $diagnostics_object | |
15683 | $logger_object | |
15684 | $file_writer_object | |
15685 | @side_comment_history | |
15686 | $comment_leading_space_count | |
15687 | ||
15688 | $cached_line_text | |
15689 | $cached_line_type | |
15690 | $cached_line_flag | |
15691 | $cached_seqno | |
15692 | $cached_line_valid | |
15693 | ||
15694 | $rOpts | |
15695 | ||
15696 | $rOpts_maximum_line_length | |
15697 | $rOpts_continuation_indentation | |
15698 | $rOpts_indent_columns | |
15699 | $rOpts_tabs | |
15700 | $rOpts_entab_leading_whitespace | |
15701 | ||
15702 | $rOpts_minimum_space_to_comment | |
15703 | ||
15704 | ); | |
15705 | ||
15706 | sub initialize { | |
15707 | ||
15708 | my $class; | |
15709 | ||
15710 | ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object ) | |
15711 | = @_; | |
15712 | ||
15713 | # variables describing the entire space group: | |
15714 | ||
15715 | $ralignment_list = []; | |
15716 | $group_level = 0; | |
15717 | $last_group_level_written = -1; | |
15718 | $extra_indent_ok = 0; # can we move all lines to the right? | |
15719 | $last_side_comment_length = 0; | |
15720 | $maximum_jmax_seen = 0; | |
15721 | $minimum_jmax_seen = 0; | |
15722 | $previous_minimum_jmax_seen = 0; | |
15723 | $previous_maximum_jmax_seen = 0; | |
15724 | ||
15725 | # variables describing each line of the group | |
15726 | @group_lines = (); # list of all lines in group | |
15727 | ||
15728 | $outdented_line_count = 0; | |
15729 | $first_outdented_line_at = 0; | |
15730 | $last_outdented_line_at = 0; | |
15731 | $last_side_comment_line_number = 0; | |
15732 | $last_side_comment_level = -1; | |
15733 | ||
15734 | # most recent 3 side comments; [ line number, column ] | |
15735 | $side_comment_history[0] = [ -300, 0 ]; | |
15736 | $side_comment_history[1] = [ -200, 0 ]; | |
15737 | $side_comment_history[2] = [ -100, 0 ]; | |
15738 | ||
15739 | # write_leader_and_string cache: | |
15740 | $cached_line_text = ""; | |
15741 | $cached_line_type = 0; | |
15742 | $cached_line_flag = 0; | |
15743 | $cached_seqno = 0; | |
15744 | $cached_line_valid = 0; | |
15745 | ||
15746 | # frequently used parameters | |
15747 | $rOpts_indent_columns = $rOpts->{'indent-columns'}; | |
15748 | $rOpts_tabs = $rOpts->{'tabs'}; | |
15749 | $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'}; | |
15750 | $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'}; | |
15751 | $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; | |
15752 | ||
15753 | forget_side_comment(); | |
15754 | ||
15755 | initialize_for_new_group(); | |
15756 | ||
15757 | $vertical_aligner_self = {}; | |
15758 | bless $vertical_aligner_self, $class; | |
15759 | return $vertical_aligner_self; | |
15760 | } | |
15761 | ||
15762 | sub initialize_for_new_group { | |
15763 | $maximum_line_index = -1; # lines in the current group | |
15764 | $maximum_alignment_index = -1; # alignments in current group | |
15765 | $zero_count = 0; # count consecutive lines without tokens | |
15766 | $current_line = undef; # line being matched for alignment | |
15767 | $group_maximum_gap = 0; # largest gap introduced | |
15768 | $group_type = ""; | |
15769 | $marginal_match = 0; | |
15770 | $comment_leading_space_count = 0; | |
15771 | $last_leading_space_count = 0; | |
15772 | } | |
15773 | ||
15774 | # interface to Perl::Tidy::Diagnostics routines | |
15775 | sub write_diagnostics { | |
15776 | if ($diagnostics_object) { | |
15777 | $diagnostics_object->write_diagnostics(@_); | |
15778 | } | |
15779 | } | |
15780 | ||
15781 | # interface to Perl::Tidy::Logger routines | |
15782 | sub warning { | |
15783 | if ($logger_object) { | |
15784 | $logger_object->warning(@_); | |
15785 | } | |
15786 | } | |
15787 | ||
15788 | sub write_logfile_entry { | |
15789 | if ($logger_object) { | |
15790 | $logger_object->write_logfile_entry(@_); | |
15791 | } | |
15792 | } | |
15793 | ||
15794 | sub report_definite_bug { | |
15795 | if ($logger_object) { | |
15796 | $logger_object->report_definite_bug(); | |
15797 | } | |
15798 | } | |
15799 | ||
15800 | sub get_SPACES { | |
15801 | ||
15802 | # return the number of leading spaces associated with an indentation | |
15803 | # variable $indentation is either a constant number of spaces or an | |
15804 | # object with a get_SPACES method. | |
15805 | my $indentation = shift; | |
15806 | return ref($indentation) ? $indentation->get_SPACES() : $indentation; | |
15807 | } | |
15808 | ||
15809 | sub get_RECOVERABLE_SPACES { | |
15810 | ||
15811 | # return the number of spaces (+ means shift right, - means shift left) | |
15812 | # that we would like to shift a group of lines with the same indentation | |
15813 | # to get them to line up with their opening parens | |
15814 | my $indentation = shift; | |
15815 | return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; | |
15816 | } | |
15817 | ||
15818 | sub get_STACK_DEPTH { | |
15819 | ||
15820 | my $indentation = shift; | |
15821 | return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0; | |
15822 | } | |
15823 | ||
15824 | sub make_alignment { | |
15825 | my ( $col, $token ) = @_; | |
15826 | ||
15827 | # make one new alignment at column $col which aligns token $token | |
15828 | ++$maximum_alignment_index; | |
15829 | my $alignment = new Perl::Tidy::VerticalAligner::Alignment( | |
15830 | column => $col, | |
15831 | starting_column => $col, | |
15832 | matching_token => $token, | |
15833 | starting_line => $maximum_line_index, | |
15834 | ending_line => $maximum_line_index, | |
15835 | serial_number => $maximum_alignment_index, | |
15836 | ); | |
15837 | $ralignment_list->[$maximum_alignment_index] = $alignment; | |
15838 | return $alignment; | |
15839 | } | |
15840 | ||
15841 | sub dump_alignments { | |
15842 | ||
15843 | "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n"; | |
15844 | for my $i ( 0 .. $maximum_alignment_index ) { | |
15845 | my $column = $ralignment_list->[$i]->get_column(); | |
15846 | my $starting_column = $ralignment_list->[$i]->get_starting_column(); | |
15847 | my $matching_token = $ralignment_list->[$i]->get_matching_token(); | |
15848 | my $starting_line = $ralignment_list->[$i]->get_starting_line(); | |
15849 | my $ending_line = $ralignment_list->[$i]->get_ending_line(); | |
15850 | ||
15851 | "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n"; | |
15852 | } | |
15853 | } | |
15854 | ||
15855 | sub save_alignment_columns { | |
15856 | for my $i ( 0 .. $maximum_alignment_index ) { | |
15857 | $ralignment_list->[$i]->save_column(); | |
15858 | } | |
15859 | } | |
15860 | ||
15861 | sub restore_alignment_columns { | |
15862 | for my $i ( 0 .. $maximum_alignment_index ) { | |
15863 | $ralignment_list->[$i]->restore_column(); | |
15864 | } | |
15865 | } | |
15866 | ||
15867 | sub forget_side_comment { | |
15868 | $last_comment_column = 0; | |
15869 | } | |
15870 | ||
15871 | sub append_line { | |
15872 | ||
15873 | # sub append is called to place one line in the current vertical group. | |
15874 | # | |
15875 | # The input parameters are: | |
15876 | # $level = indentation level of this line | |
15877 | # $rfields = reference to array of fields | |
15878 | # $rpatterns = reference to array of patterns, one per field | |
15879 | # $rtokens = reference to array of tokens starting fields 1,2,.. | |
15880 | # | |
15881 | # Here is an example of what this package does. In this example, | |
15882 | # we are trying to line up both the '=>' and the '#'. | |
15883 | # | |
15884 | # '18' => 'grave', # \` | |
15885 | # '19' => 'acute', # `' | |
15886 | # '20' => 'caron', # \v | |
15887 | # <-tabs-><f1-><--field 2 ---><-f3-> | |
15888 | # | | | | | |
15889 | # | | | | | |
15890 | # col1 col2 col3 col4 | |
15891 | # | |
15892 | # The calling routine has already broken the entire line into 3 fields as | |
15893 | # indicated. (So the work of identifying promising common tokens has | |
15894 | # already been done). | |
15895 | # | |
15896 | # In this example, there will be 2 tokens being matched: '=>' and '#'. | |
15897 | # They are the leading parts of fields 2 and 3, but we do need to know | |
15898 | # what they are so that we can dump a group of lines when these tokens | |
15899 | # change. | |
15900 | # | |
15901 | # The fields contain the actual characters of each field. The patterns | |
15902 | # are like the fields, but they contain mainly token types instead | |
15903 | # of tokens, so they have fewer characters. They are used to be | |
15904 | # sure we are matching fields of similar type. | |
15905 | # | |
15906 | # In this example, there will be 4 column indexes being adjusted. The | |
15907 | # first one is always at zero. The interior columns are at the start of | |
15908 | # the matching tokens, and the last one tracks the maximum line length. | |
15909 | # | |
15910 | # Basically, each time a new line comes in, it joins the current vertical | |
15911 | # group if possible. Otherwise it causes the current group to be dumped | |
15912 | # and a new group is started. | |
15913 | # | |
15914 | # For each new group member, the column locations are increased, as | |
15915 | # necessary, to make room for the new fields. When the group is finally | |
15916 | # output, these column numbers are used to compute the amount of spaces of | |
15917 | # padding needed for each field. | |
15918 | # | |
15919 | # Programming note: the fields are assumed not to have any tab characters. | |
15920 | # Tabs have been previously removed except for tabs in quoted strings and | |
15921 | # side comments. Tabs in these fields can mess up the column counting. | |
15922 | # The log file warns the user if there are any such tabs. | |
15923 | ||
15924 | my ( | |
15925 | $level, $level_end, | |
15926 | $indentation, $rfields, | |
15927 | $rtokens, $rpatterns, | |
15928 | $is_forced_break, $outdent_long_lines, | |
15929 | $is_terminal_statement, $do_not_pad, | |
15930 | $rvertical_tightness_flags, $level_jump, | |
15931 | ) | |
15932 | = @_; | |
15933 | ||
15934 | # number of fields is $jmax | |
15935 | # number of tokens between fields is $jmax-1 | |
15936 | my $jmax = $#{$rfields}; | |
15937 | $previous_minimum_jmax_seen = $minimum_jmax_seen; | |
15938 | $previous_maximum_jmax_seen = $maximum_jmax_seen; | |
15939 | ||
15940 | my $leading_space_count = get_SPACES($indentation); | |
15941 | ||
15942 | # set outdented flag to be sure we either align within statements or | |
15943 | # across statement boundaries, but not both. | |
15944 | my $is_outdented = $last_leading_space_count > $leading_space_count; | |
15945 | $last_leading_space_count = $leading_space_count; | |
15946 | ||
15947 | # Patch: undo for hanging side comment | |
15948 | my $is_hanging_side_comment = | |
15949 | ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ ); | |
15950 | $is_outdented = 0 if $is_hanging_side_comment; | |
15951 | ||
15952 | VALIGN_DEBUG_FLAG_APPEND0 && do { | |
15953 | ||
15954 | "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n"; | |
15955 | }; | |
15956 | ||
15957 | # Validate cached line if necessary: If we can produce a container | |
15958 | # with just 2 lines total by combining an existing cached opening | |
15959 | # token with the closing token to follow, then we will mark both | |
15960 | # cached flags as valid. | |
15961 | if ($rvertical_tightness_flags) { | |
15962 | if ( $maximum_line_index <= 0 | |
15963 | && $cached_line_type | |
15964 | && $rvertical_tightness_flags->[2] == $cached_seqno ) | |
15965 | { | |
15966 | $rvertical_tightness_flags->[3] ||= 1; | |
15967 | $cached_line_valid ||= 1; | |
15968 | } | |
15969 | } | |
15970 | ||
15971 | # do not join an opening block brace with an unbalanced line | |
15972 | # unless requested with a flag value of 2 | |
15973 | if ( $cached_line_type == 3 | |
15974 | && $maximum_line_index < 0 | |
15975 | && $cached_line_flag < 2 | |
15976 | && $level_jump != 0 ) | |
15977 | { | |
15978 | $cached_line_valid = 0; | |
15979 | } | |
15980 | ||
15981 | # patch until new aligner is finished | |
15982 | if ($do_not_pad) { my_flush() } | |
15983 | ||
15984 | # shouldn't happen: | |
15985 | if ( $level < 0 ) { $level = 0 } | |
15986 | ||
15987 | # do not align code across indentation level changes | |
15988 | if ( $level != $group_level || $is_outdented ) { | |
15989 | ||
15990 | # we are allowed to shift a group of lines to the right if its | |
15991 | # level is greater than the previous and next group | |
15992 | $extra_indent_ok = | |
15993 | ( $level < $group_level && $last_group_level_written < $group_level ); | |
15994 | ||
15995 | my_flush(); | |
15996 | ||
15997 | # If we know that this line will get flushed out by itself because | |
15998 | # of level changes, we can leave the extra_indent_ok flag set. | |
15999 | # That way, if we get an external flush call, we will still be | |
16000 | # able to do some -lp alignment if necessary. | |
16001 | $extra_indent_ok = ( $is_terminal_statement && $level > $group_level ); | |
16002 | ||
16003 | $group_level = $level; | |
16004 | ||
16005 | # wait until after the above flush to get the leading space | |
16006 | # count because it may have been changed if the -icp flag is in | |
16007 | # effect | |
16008 | $leading_space_count = get_SPACES($indentation); | |
16009 | ||
16010 | } | |
16011 | ||
16012 | # -------------------------------------------------------------------- | |
16013 | # Patch to collect outdentable block COMMENTS | |
16014 | # -------------------------------------------------------------------- | |
16015 | my $is_blank_line = ""; | |
16016 | my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ ); | |
16017 | if ( $group_type eq 'COMMENT' ) { | |
16018 | if ( | |
16019 | ( | |
16020 | $is_block_comment | |
16021 | && $outdent_long_lines | |
16022 | && $leading_space_count == $comment_leading_space_count | |
16023 | ) | |
16024 | || $is_blank_line | |
16025 | ) | |
16026 | { | |
16027 | $group_lines[ ++$maximum_line_index ] = $rfields->[0]; | |
16028 | return; | |
16029 | } | |
16030 | else { | |
16031 | my_flush(); | |
16032 | } | |
16033 | } | |
16034 | ||
16035 | # -------------------------------------------------------------------- | |
16036 | # Step 1. Handle simple line of code with no fields to match. | |
16037 | # -------------------------------------------------------------------- | |
16038 | if ( $jmax <= 0 ) { | |
16039 | $zero_count++; | |
16040 | ||
16041 | if ( $maximum_line_index >= 0 | |
16042 | && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) ) | |
16043 | { | |
16044 | ||
16045 | # flush the current group if it has some aligned columns.. | |
16046 | if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() } | |
16047 | ||
16048 | # flush current group if we are just collecting side comments.. | |
16049 | elsif ( | |
16050 | ||
16051 | # ...and we haven't seen a comment lately | |
16052 | ( $zero_count > 3 ) | |
16053 | ||
16054 | # ..or if this new line doesn't fit to the left of the comments | |
16055 | || ( ( $leading_space_count + length( $$rfields[0] ) ) > | |
16056 | $group_lines[0]->get_column(0) ) | |
16057 | ) | |
16058 | { | |
16059 | my_flush(); | |
16060 | } | |
16061 | } | |
16062 | ||
16063 | # patch to start new COMMENT group if this comment may be outdented | |
16064 | if ( $is_block_comment | |
16065 | && $outdent_long_lines | |
16066 | && $maximum_line_index < 0 ) | |
16067 | { | |
16068 | $group_type = 'COMMENT'; | |
16069 | $comment_leading_space_count = $leading_space_count; | |
16070 | $group_lines[ ++$maximum_line_index ] = $rfields->[0]; | |
16071 | return; | |
16072 | } | |
16073 | ||
16074 | # just write this line directly if no current group, no side comment, | |
16075 | # and no space recovery is needed. | |
16076 | if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) ) | |
16077 | { | |
16078 | write_leader_and_string( $leading_space_count, $$rfields[0], 0, | |
16079 | $outdent_long_lines, $rvertical_tightness_flags ); | |
16080 | return; | |
16081 | } | |
16082 | } | |
16083 | else { | |
16084 | $zero_count = 0; | |
16085 | } | |
16086 | ||
16087 | # programming check: (shouldn't happen) | |
16088 | # an error here implies an incorrect call was made | |
16089 | if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) { | |
16090 | warning( | |
16091 | "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n" | |
16092 | ); | |
16093 | report_definite_bug(); | |
16094 | } | |
16095 | ||
16096 | # -------------------------------------------------------------------- | |
16097 | # create an object to hold this line | |
16098 | # -------------------------------------------------------------------- | |
16099 | my $new_line = new Perl::Tidy::VerticalAligner::Line( | |
16100 | jmax => $jmax, | |
16101 | jmax_original_line => $jmax, | |
16102 | rtokens => $rtokens, | |
16103 | rfields => $rfields, | |
16104 | rpatterns => $rpatterns, | |
16105 | indentation => $indentation, | |
16106 | leading_space_count => $leading_space_count, | |
16107 | outdent_long_lines => $outdent_long_lines, | |
16108 | list_type => "", | |
16109 | is_hanging_side_comment => $is_hanging_side_comment, | |
16110 | maximum_line_length => $rOpts->{'maximum-line-length'}, | |
16111 | rvertical_tightness_flags => $rvertical_tightness_flags, | |
16112 | ); | |
16113 | ||
16114 | # -------------------------------------------------------------------- | |
16115 | # It simplifies things to create a zero length side comment | |
16116 | # if none exists. | |
16117 | # -------------------------------------------------------------------- | |
16118 | make_side_comment( $new_line, $level_end ); | |
16119 | ||
16120 | # -------------------------------------------------------------------- | |
16121 | # Decide if this is a simple list of items. | |
16122 | # There are 3 list types: none, comma, comma-arrow. | |
16123 | # We use this below to be less restrictive in deciding what to align. | |
16124 | # -------------------------------------------------------------------- | |
16125 | if ($is_forced_break) { | |
16126 | decide_if_list($new_line); | |
16127 | } | |
16128 | ||
16129 | if ($current_line) { | |
16130 | ||
16131 | # -------------------------------------------------------------------- | |
16132 | # Allow hanging side comment to join current group, if any | |
16133 | # This will help keep side comments aligned, because otherwise we | |
16134 | # will have to start a new group, making alignment less likely. | |
16135 | # -------------------------------------------------------------------- | |
16136 | join_hanging_comment( $new_line, $current_line ) | |
16137 | if $is_hanging_side_comment; | |
16138 | ||
16139 | # -------------------------------------------------------------------- | |
16140 | # If there is just one previous line, and it has more fields | |
16141 | # than the new line, try to join fields together to get a match with | |
16142 | # the new line. At the present time, only a single leading '=' is | |
16143 | # allowed to be compressed out. This is useful in rare cases where | |
16144 | # a table is forced to use old breakpoints because of side comments, | |
16145 | # and the table starts out something like this: | |
16146 | # my %MonthChars = ('0', 'Jan', # side comment | |
16147 | # '1', 'Feb', | |
16148 | # '2', 'Mar', | |
16149 | # Eliminating the '=' field will allow the remaining fields to line up. | |
16150 | # This situation does not occur if there are no side comments | |
16151 | # because scan_list would put a break after the opening '('. | |
16152 | # -------------------------------------------------------------------- | |
16153 | eliminate_old_fields( $new_line, $current_line ); | |
16154 | ||
16155 | # -------------------------------------------------------------------- | |
16156 | # If the new line has more fields than the current group, | |
16157 | # see if we can match the first fields and combine the remaining | |
16158 | # fields of the new line. | |
16159 | # -------------------------------------------------------------------- | |
16160 | eliminate_new_fields( $new_line, $current_line ); | |
16161 | ||
16162 | # -------------------------------------------------------------------- | |
16163 | # Flush previous group unless all common tokens and patterns match.. | |
16164 | # -------------------------------------------------------------------- | |
16165 | check_match( $new_line, $current_line ); | |
16166 | ||
16167 | # -------------------------------------------------------------------- | |
16168 | # See if there is space for this line in the current group (if any) | |
16169 | # -------------------------------------------------------------------- | |
16170 | if ($current_line) { | |
16171 | check_fit( $new_line, $current_line ); | |
16172 | } | |
16173 | } | |
16174 | ||
16175 | # -------------------------------------------------------------------- | |
16176 | # Append this line to the current group (or start new group) | |
16177 | # -------------------------------------------------------------------- | |
16178 | accept_line($new_line); | |
16179 | ||
16180 | # Future update to allow this to vary: | |
16181 | $current_line = $new_line if ( $maximum_line_index == 0 ); | |
16182 | ||
16183 | # -------------------------------------------------------------------- | |
16184 | # Step 8. Some old debugging stuff | |
16185 | # -------------------------------------------------------------------- | |
16186 | VALIGN_DEBUG_FLAG_APPEND && do { | |
16187 | print "APPEND fields:"; | |
16188 | dump_array(@$rfields); | |
16189 | print "APPEND tokens:"; | |
16190 | dump_array(@$rtokens); | |
16191 | print "APPEND patterns:"; | |
16192 | dump_array(@$rpatterns); | |
16193 | dump_alignments(); | |
16194 | }; | |
16195 | } | |
16196 | ||
16197 | sub join_hanging_comment { | |
16198 | ||
16199 | my $line = shift; | |
16200 | my $jmax = $line->get_jmax(); | |
16201 | return 0 unless $jmax == 1; # must be 2 fields | |
16202 | my $rtokens = $line->get_rtokens(); | |
16203 | return 0 unless $$rtokens[0] eq '#'; # the second field is a comment.. | |
16204 | my $rfields = $line->get_rfields(); | |
16205 | return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty... | |
16206 | my $old_line = shift; | |
16207 | my $maximum_field_index = $old_line->get_jmax(); | |
16208 | return 0 | |
16209 | unless $maximum_field_index > $jmax; # the current line has more fields | |
16210 | my $rpatterns = $line->get_rpatterns(); | |
16211 | ||
16212 | $line->set_is_hanging_side_comment(1); | |
16213 | $jmax = $maximum_field_index; | |
16214 | $line->set_jmax($jmax); | |
16215 | $$rfields[$jmax] = $$rfields[1]; | |
16216 | $$rtokens[ $jmax - 1 ] = $$rtokens[0]; | |
16217 | $$rpatterns[ $jmax - 1 ] = $$rpatterns[0]; | |
16218 | for ( my $j = 1 ; $j < $jmax ; $j++ ) { | |
16219 | $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why? | |
16220 | $$rtokens[ $j - 1 ] = ""; | |
16221 | $$rpatterns[ $j - 1 ] = ""; | |
16222 | } | |
16223 | return 1; | |
16224 | } | |
16225 | ||
16226 | sub eliminate_old_fields { | |
16227 | ||
16228 | my $new_line = shift; | |
16229 | my $jmax = $new_line->get_jmax(); | |
16230 | if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax } | |
16231 | if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax } | |
16232 | ||
16233 | # there must be one previous line | |
16234 | return unless ( $maximum_line_index == 0 ); | |
16235 | ||
16236 | my $old_line = shift; | |
16237 | my $maximum_field_index = $old_line->get_jmax(); | |
16238 | ||
16239 | # this line must have fewer fields | |
16240 | return unless $maximum_field_index > $jmax; | |
16241 | ||
16242 | # Identify specific cases where field elimination is allowed: | |
16243 | # case=1: both lines have comma-separated lists, and the first | |
16244 | # line has an equals | |
16245 | # case=2: both lines have leading equals | |
16246 | ||
16247 | # case 1 is the default | |
16248 | my $case = 1; | |
16249 | ||
16250 | # See if case 2: both lines have leading '=' | |
16251 | # We'll require smiliar leading patterns in this case | |
16252 | my $old_rtokens = $old_line->get_rtokens(); | |
16253 | my $rtokens = $new_line->get_rtokens(); | |
16254 | my $rpatterns = $new_line->get_rpatterns(); | |
16255 | my $old_rpatterns = $old_line->get_rpatterns(); | |
16256 | if ( $rtokens->[0] =~ /^=\d*$/ | |
16257 | && $old_rtokens->[0] eq $rtokens->[0] | |
16258 | && $old_rpatterns->[0] eq $rpatterns->[0] ) | |
16259 | { | |
16260 | $case = 2; | |
16261 | } | |
16262 | ||
16263 | # not too many fewer fields in new line for case 1 | |
16264 | return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax ); | |
16265 | ||
16266 | # case 1 must have side comment | |
16267 | my $old_rfields = $old_line->get_rfields(); | |
16268 | return | |
16269 | if ( $case == 1 | |
16270 | && length( $$old_rfields[$maximum_field_index] ) == 0 ); | |
16271 | ||
16272 | my $rfields = $new_line->get_rfields(); | |
16273 | ||
16274 | my $hid_equals = 0; | |
16275 | ||
16276 | my @new_alignments = (); | |
16277 | my @new_fields = (); | |
16278 | my @new_matching_patterns = (); | |
16279 | my @new_matching_tokens = (); | |
16280 | ||
16281 | my $j = 0; | |
16282 | my $k; | |
16283 | my $current_field = ''; | |
16284 | my $current_pattern = ''; | |
16285 | ||
16286 | # loop over all old tokens | |
16287 | my $in_match = 0; | |
16288 | for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) { | |
16289 | $current_field .= $$old_rfields[$k]; | |
16290 | $current_pattern .= $$old_rpatterns[$k]; | |
16291 | last if ( $j > $jmax - 1 ); | |
16292 | ||
16293 | if ( $$old_rtokens[$k] eq $$rtokens[$j] ) { | |
16294 | $in_match = 1; | |
16295 | $new_fields[$j] = $current_field; | |
16296 | $new_matching_patterns[$j] = $current_pattern; | |
16297 | $current_field = ''; | |
16298 | $current_pattern = ''; | |
16299 | $new_matching_tokens[$j] = $$old_rtokens[$k]; | |
16300 | $new_alignments[$j] = $old_line->get_alignment($k); | |
16301 | $j++; | |
16302 | } | |
16303 | else { | |
16304 | ||
16305 | if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) { | |
16306 | last if ( $case == 2 ); # avoid problems with stuff | |
16307 | # like: $a=$b=$c=$d; | |
16308 | $hid_equals = 1; | |
16309 | } | |
16310 | last | |
16311 | if ( $in_match && $case == 1 ) | |
16312 | ; # disallow gaps in matching field types in case 1 | |
16313 | } | |
16314 | } | |
16315 | ||
16316 | # Modify the current state if we are successful. | |
16317 | # We must exactly reach the ends of both lists for success. | |
16318 | if ( ( $j == $jmax ) | |
16319 | && ( $current_field eq '' ) | |
16320 | && ( $case != 1 || $hid_equals ) ) | |
16321 | { | |
16322 | $k = $maximum_field_index; | |
16323 | $current_field .= $$old_rfields[$k]; | |
16324 | $current_pattern .= $$old_rpatterns[$k]; | |
16325 | $new_fields[$j] = $current_field; | |
16326 | $new_matching_patterns[$j] = $current_pattern; | |
16327 | ||
16328 | $new_alignments[$j] = $old_line->get_alignment($k); | |
16329 | $maximum_field_index = $j; | |
16330 | ||
16331 | $old_line->set_alignments(@new_alignments); | |
16332 | $old_line->set_jmax($jmax); | |
16333 | $old_line->set_rtokens( \@new_matching_tokens ); | |
16334 | $old_line->set_rfields( \@new_fields ); | |
16335 | $old_line->set_rpatterns( \@$rpatterns ); | |
16336 | } | |
16337 | } | |
16338 | ||
16339 | # create an empty side comment if none exists | |
16340 | sub make_side_comment { | |
16341 | my $new_line = shift; | |
16342 | my $level_end = shift; | |
16343 | my $jmax = $new_line->get_jmax(); | |
16344 | my $rtokens = $new_line->get_rtokens(); | |
16345 | ||
16346 | # if line does not have a side comment... | |
16347 | if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) { | |
16348 | my $rfields = $new_line->get_rfields(); | |
16349 | my $rpatterns = $new_line->get_rpatterns(); | |
16350 | $$rtokens[$jmax] = '#'; | |
16351 | $$rfields[ ++$jmax ] = ''; | |
16352 | $$rpatterns[$jmax] = '#'; | |
16353 | $new_line->set_jmax($jmax); | |
16354 | $new_line->set_jmax_original_line($jmax); | |
16355 | } | |
16356 | ||
16357 | # line has a side comment.. | |
16358 | else { | |
16359 | ||
16360 | # don't remember old side comment location for very long | |
16361 | my $line_number = $vertical_aligner_self->get_output_line_number(); | |
16362 | my $rfields = $new_line->get_rfields(); | |
16363 | if ( | |
16364 | $line_number - $last_side_comment_line_number > 12 | |
16365 | ||
16366 | # and don't remember comment location across block level changes | |
16367 | || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ ) | |
16368 | ) | |
16369 | { | |
16370 | forget_side_comment(); | |
16371 | } | |
16372 | $last_side_comment_line_number = $line_number; | |
16373 | $last_side_comment_level = $level_end; | |
16374 | } | |
16375 | } | |
16376 | ||
16377 | sub decide_if_list { | |
16378 | ||
16379 | my $line = shift; | |
16380 | ||
16381 | # A list will be taken to be a line with a forced break in which all | |
16382 | # of the field separators are commas or comma-arrows (except for the | |
16383 | # trailing #) | |
16384 | ||
16385 | # List separator tokens are things like ',3' or '=>2', | |
16386 | # where the trailing digit is the nesting depth. Allow braces | |
16387 | # to allow nested list items. | |
16388 | my $rtokens = $line->get_rtokens(); | |
16389 | my $test_token = $$rtokens[0]; | |
16390 | if ( $test_token =~ /^(\,|=>)/ ) { | |
16391 | my $list_type = $test_token; | |
16392 | my $jmax = $line->get_jmax(); | |
16393 | ||
16394 | foreach ( 1 .. $jmax - 2 ) { | |
16395 | if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) { | |
16396 | $list_type = ""; | |
16397 | last; | |
16398 | } | |
16399 | } | |
16400 | $line->set_list_type($list_type); | |
16401 | } | |
16402 | } | |
16403 | ||
16404 | sub eliminate_new_fields { | |
16405 | ||
16406 | return unless ( $maximum_line_index >= 0 ); | |
16407 | my $new_line = shift; | |
16408 | my $old_line = shift; | |
16409 | my $jmax = $new_line->get_jmax(); | |
16410 | ||
16411 | my $old_rtokens = $old_line->get_rtokens(); | |
16412 | my $rtokens = $new_line->get_rtokens(); | |
16413 | my $is_assignment = | |
16414 | ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) ); | |
16415 | ||
16416 | # must be monotonic variation | |
16417 | return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax ); | |
16418 | ||
16419 | # must be more fields in the new line | |
16420 | my $maximum_field_index = $old_line->get_jmax(); | |
16421 | return unless ( $maximum_field_index < $jmax ); | |
16422 | ||
16423 | unless ($is_assignment) { | |
16424 | return | |
16425 | unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen ) | |
16426 | ; # only if monotonic | |
16427 | ||
16428 | # never combine fields of a comma list | |
16429 | return | |
16430 | unless ( $maximum_field_index > 1 ) | |
16431 | && ( $new_line->get_list_type() !~ /^,/ ); | |
16432 | } | |
16433 | ||
16434 | my $rfields = $new_line->get_rfields(); | |
16435 | my $rpatterns = $new_line->get_rpatterns(); | |
16436 | my $old_rpatterns = $old_line->get_rpatterns(); | |
16437 | ||
16438 | # loop over all old tokens except comment | |
16439 | my $match = 1; | |
16440 | my $k; | |
16441 | for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) { | |
16442 | if ( ( $$old_rtokens[$k] ne $$rtokens[$k] ) | |
16443 | || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) ) | |
16444 | { | |
16445 | $match = 0; | |
16446 | last; | |
16447 | } | |
16448 | } | |
16449 | ||
16450 | # first tokens agree, so combine new tokens | |
16451 | if ($match) { | |
16452 | for $k ( $maximum_field_index .. $jmax - 1 ) { | |
16453 | ||
16454 | $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k]; | |
16455 | $$rfields[$k] = ""; | |
16456 | $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k]; | |
16457 | $$rpatterns[$k] = ""; | |
16458 | } | |
16459 | ||
16460 | $$rtokens[ $maximum_field_index - 1 ] = '#'; | |
16461 | $$rfields[$maximum_field_index] = $$rfields[$jmax]; | |
16462 | $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax]; | |
16463 | $jmax = $maximum_field_index; | |
16464 | } | |
16465 | $new_line->set_jmax($jmax); | |
16466 | } | |
16467 | ||
16468 | sub check_match { | |
16469 | ||
16470 | my $new_line = shift; | |
16471 | my $old_line = shift; | |
16472 | ||
16473 | my $jmax = $new_line->get_jmax(); | |
16474 | my $maximum_field_index = $old_line->get_jmax(); | |
16475 | ||
16476 | # flush if this line has too many fields | |
16477 | if ( $jmax > $maximum_field_index ) { my_flush(); return } | |
16478 | ||
16479 | # flush if adding this line would make a non-monotonic field count | |
16480 | if ( | |
16481 | ( $maximum_field_index > $jmax ) # this has too few fields | |
16482 | && ( | |
16483 | ( $previous_minimum_jmax_seen < $jmax ) # and wouldn't be monotonic | |
16484 | || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen ) | |
16485 | ) | |
16486 | ) | |
16487 | { | |
16488 | my_flush(); | |
16489 | return; | |
16490 | } | |
16491 | ||
16492 | # otherwise append this line if everything matches | |
16493 | my $jmax_original_line = $new_line->get_jmax_original_line(); | |
16494 | my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); | |
16495 | my $rtokens = $new_line->get_rtokens(); | |
16496 | my $rfields = $new_line->get_rfields(); | |
16497 | my $rpatterns = $new_line->get_rpatterns(); | |
16498 | my $list_type = $new_line->get_list_type(); | |
16499 | ||
16500 | my $group_list_type = $old_line->get_list_type(); | |
16501 | my $old_rpatterns = $old_line->get_rpatterns(); | |
16502 | my $old_rtokens = $old_line->get_rtokens(); | |
16503 | ||
16504 | my $jlimit = $jmax - 1; | |
16505 | if ( $maximum_field_index > $jmax ) { | |
16506 | $jlimit = $jmax_original_line; | |
16507 | --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) ); | |
16508 | } | |
16509 | ||
16510 | my $everything_matches = 1; | |
16511 | ||
16512 | # common list types always match | |
16513 | unless ( ( $group_list_type && ( $list_type eq $group_list_type ) ) | |
16514 | || $is_hanging_side_comment ) | |
16515 | { | |
16516 | ||
16517 | my $leading_space_count = $new_line->get_leading_space_count(); | |
16518 | my $saw_equals = 0; | |
16519 | for my $j ( 0 .. $jlimit ) { | |
16520 | my $match = 1; | |
16521 | ||
16522 | my $old_tok = $$old_rtokens[$j]; | |
16523 | my $new_tok = $$rtokens[$j]; | |
16524 | ||
16525 | # dumb down the match after an equals | |
16526 | if ( $saw_equals && $new_tok =~ /(.*)\+/ ) { | |
16527 | $new_tok = $1; | |
16528 | $old_tok =~ s/\+.*$//; | |
16529 | } | |
16530 | if ( $new_tok =~ /^=\d*$/ ) { $saw_equals = 1 } | |
16531 | ||
16532 | # we never match if the matching tokens differ | |
16533 | if ( $j < $jlimit | |
16534 | && $old_tok ne $new_tok ) | |
16535 | { | |
16536 | $match = 0; | |
16537 | } | |
16538 | ||
16539 | # otherwise, if patterns match, we always have a match. | |
16540 | # However, if patterns don't match, we have to be careful... | |
16541 | elsif ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) { | |
16542 | ||
16543 | # We have to be very careful about aligning commas when the | |
16544 | # pattern's don't match, because it can be worse to create an | |
16545 | # alignment where none is needed than to omit one. The current | |
16546 | # rule: if we are within a matching sub call (indicated by '+' | |
16547 | # in the matching token), we'll allow a marginal match, but | |
16548 | # otherwise not. | |
16549 | # | |
16550 | # Here's an example where we'd like to align the '=' | |
16551 | # my $cfile = File::Spec->catfile( 't', 'callext.c' ); | |
16552 | # my $inc = File::Spec->catdir( 'Basic', 'Core' ); | |
16553 | # because the function names differ. | |
16554 | # Future alignment logic should make this unnecessary. | |
16555 | # | |
16556 | # Here's an example where the ','s are not contained in a call. | |
16557 | # The first line below should probably not match the next two: | |
16558 | # ( $a, $b ) = ( $b, $r ); | |
16559 | # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 ); | |
16560 | # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 ); | |
16561 | if ( $new_tok =~ /^,/ ) { | |
16562 | if ( $$rtokens[$j] =~ /[A-Za-z]/ ) { | |
16563 | $marginal_match = 1; | |
16564 | } | |
16565 | else { | |
16566 | $match = 0; | |
16567 | } | |
16568 | } | |
16569 | ||
16570 | # parens don't align well unless patterns match | |
16571 | elsif ( $new_tok =~ /^\(/ ) { | |
16572 | $match = 0; | |
16573 | } | |
16574 | ||
16575 | # Handle an '=' alignment with different patterns to | |
16576 | # the left. | |
16577 | elsif ( $new_tok =~ /^=\d*$/ ) { | |
16578 | ||
16579 | $saw_equals = 1; | |
16580 | ||
16581 | # It is best to be a little restrictive when | |
16582 | # aligning '=' tokens. Here is an example of | |
16583 | # two lines that we will not align: | |
16584 | # my $variable=6; | |
16585 | # $bb=4; | |
16586 | # The problem is that one is a 'my' declaration, | |
16587 | # and the other isn't, so they're not very similar. | |
16588 | # We will filter these out by comparing the first | |
16589 | # letter of the pattern. This is crude, but works | |
16590 | # well enough. | |
16591 | if ( | |
16592 | substr( $$old_rpatterns[$j], 0, 1 ) ne | |
16593 | substr( $$rpatterns[$j], 0, 1 ) ) | |
16594 | { | |
16595 | $match = 0; | |
16596 | } | |
16597 | ||
16598 | # If we pass that test, we'll call it a marginal match. | |
16599 | # Here is an example of a marginal match: | |
16600 | # $done{$$op} = 1; | |
16601 | # $op = compile_bblock($op); | |
16602 | # The left tokens are both identifiers, but | |
16603 | # one accesses a hash and the other doesn't. | |
16604 | # We'll let this be a tentative match and undo | |
16605 | # it later if we don't find more than 2 lines | |
16606 | # in the group. | |
16607 | elsif ( $maximum_line_index == 0 ) { | |
16608 | $marginal_match = 1; | |
16609 | } | |
16610 | } | |
16611 | } | |
16612 | ||
16613 | # Don't let line with fewer fields increase column widths | |
16614 | # ( align3.t ) | |
16615 | if ( $maximum_field_index > $jmax ) { | |
16616 | my $pad = | |
16617 | length( $$rfields[$j] ) - $old_line->current_field_width($j); | |
16618 | ||
16619 | if ( $j == 0 ) { | |
16620 | $pad += $leading_space_count; | |
16621 | } | |
16622 | ||
16623 | # TESTING: suspend this rule to allow last lines to join | |
16624 | if ( $pad > 0 ) { $match = 0; } | |
16625 | } | |
16626 | ||
16627 | unless ($match) { | |
16628 | $everything_matches = 0; | |
16629 | last; | |
16630 | } | |
16631 | } | |
16632 | } | |
16633 | ||
16634 | if ( $maximum_field_index > $jmax ) { | |
16635 | ||
16636 | if ($everything_matches) { | |
16637 | ||
16638 | my $comment = $$rfields[$jmax]; | |
16639 | for $jmax ( $jlimit .. $maximum_field_index ) { | |
16640 | $$rtokens[$jmax] = $$old_rtokens[$jmax]; | |
16641 | $$rfields[ ++$jmax ] = ''; | |
16642 | $$rpatterns[$jmax] = $$old_rpatterns[$jmax]; | |
16643 | } | |
16644 | $$rfields[$jmax] = $comment; | |
16645 | $new_line->set_jmax($jmax); | |
16646 | } | |
16647 | } | |
16648 | ||
16649 | my_flush() unless ($everything_matches); | |
16650 | } | |
16651 | ||
16652 | sub check_fit { | |
16653 | ||
16654 | return unless ( $maximum_line_index >= 0 ); | |
16655 | my $new_line = shift; | |
16656 | my $old_line = shift; | |
16657 | ||
16658 | my $jmax = $new_line->get_jmax(); | |
16659 | my $leading_space_count = $new_line->get_leading_space_count(); | |
16660 | my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); | |
16661 | my $rtokens = $new_line->get_rtokens(); | |
16662 | my $rfields = $new_line->get_rfields(); | |
16663 | my $rpatterns = $new_line->get_rpatterns(); | |
16664 | ||
16665 | my $group_list_type = $group_lines[0]->get_list_type(); | |
16666 | ||
16667 | my $padding_so_far = 0; | |
16668 | my $padding_available = $old_line->get_available_space_on_right(); | |
16669 | ||
16670 | # save current columns in case this doesn't work | |
16671 | save_alignment_columns(); | |
16672 | ||
16673 | my ( $j, $pad, $eight ); | |
16674 | my $maximum_field_index = $old_line->get_jmax(); | |
16675 | for $j ( 0 .. $jmax ) { | |
16676 | ||
16677 | ## testing patch to avoid excessive gaps in previous lines, | |
16678 | # due to a line of fewer fields. | |
16679 | # return join( ".", | |
16680 | # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"}, | |
16681 | # $self->{"area"}, $self->{"id"}, $self->{"sel"} ); | |
16682 | ## MOVED BELOW AS A TEST | |
16683 | ##next if ($jmax < $maximum_field_index && $j==$jmax-1); | |
16684 | ||
16685 | $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j); | |
16686 | ||
16687 | if ( $j == 0 ) { | |
16688 | $pad += $leading_space_count; | |
16689 | } | |
16690 | ||
16691 | # remember largest gap of the group, excluding gap to side comment | |
16692 | if ( $pad < 0 | |
16693 | && $group_maximum_gap < -$pad | |
16694 | && $j > 0 | |
16695 | && $j < $jmax - 1 ) | |
16696 | { | |
16697 | $group_maximum_gap = -$pad; | |
16698 | } | |
16699 | ||
16700 | next if $pad < 0; | |
16701 | ||
16702 | # This line will need space; lets see if we want to accept it.. | |
16703 | if ( | |
16704 | ||
16705 | # not if this won't fit | |
16706 | ( $pad > $padding_available ) | |
16707 | ||
16708 | # previously, there were upper bounds placed on padding here | |
16709 | # (maximum_whitespace_columns), but they were not really helpful | |
16710 | ||
16711 | ) | |
16712 | { | |
16713 | ||
16714 | # revert to starting state then flush; things didn't work out | |
16715 | restore_alignment_columns(); | |
16716 | my_flush(); | |
16717 | last; | |
16718 | } | |
16719 | ||
16720 | # TESTING PATCH moved from above to be sure we fit | |
16721 | next if ( $jmax < $maximum_field_index && $j == $jmax - 1 ); | |
16722 | ||
16723 | # looks ok, squeeze this field in | |
16724 | $old_line->increase_field_width( $j, $pad ); | |
16725 | $padding_available -= $pad; | |
16726 | ||
16727 | # remember largest gap of the group, excluding gap to side comment | |
16728 | if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) { | |
16729 | $group_maximum_gap = $pad; | |
16730 | } | |
16731 | } | |
16732 | } | |
16733 | ||
16734 | sub accept_line { | |
16735 | ||
16736 | my $new_line = shift; | |
16737 | $group_lines[ ++$maximum_line_index ] = $new_line; | |
16738 | ||
16739 | # initialize field lengths if starting new group | |
16740 | if ( $maximum_line_index == 0 ) { | |
16741 | ||
16742 | my $jmax = $new_line->get_jmax(); | |
16743 | my $rfields = $new_line->get_rfields(); | |
16744 | my $rtokens = $new_line->get_rtokens(); | |
16745 | my $j; | |
16746 | my $col = $new_line->get_leading_space_count(); | |
16747 | ||
16748 | for $j ( 0 .. $jmax ) { | |
16749 | $col += length( $$rfields[$j] ); | |
16750 | ||
16751 | # create initial alignments for the new group | |
16752 | my $token = ""; | |
16753 | if ( $j < $jmax ) { $token = $$rtokens[$j] } | |
16754 | my $alignment = make_alignment( $col, $token ); | |
16755 | $new_line->set_alignment( $j, $alignment ); | |
16756 | } | |
16757 | ||
16758 | $maximum_jmax_seen = $jmax; | |
16759 | $minimum_jmax_seen = $jmax; | |
16760 | } | |
16761 | ||
16762 | # use previous alignments otherwise | |
16763 | else { | |
16764 | my @new_alignments = | |
16765 | $group_lines[ $maximum_line_index - 1 ]->get_alignments(); | |
16766 | $new_line->set_alignments(@new_alignments); | |
16767 | } | |
16768 | } | |
16769 | ||
16770 | sub dump_array { | |
16771 | ||
16772 | # debug routine to dump array contents | |
16773 | local $" = ')('; | |
16774 | print "(@_)\n"; | |
16775 | } | |
16776 | ||
16777 | # flush() sends the current Perl::Tidy::VerticalAligner group down the | |
16778 | # pipeline to Perl::Tidy::FileWriter. | |
16779 | ||
16780 | # This is the external flush, which also empties the cache | |
16781 | sub flush { | |
16782 | ||
16783 | if ( $maximum_line_index < 0 ) { | |
16784 | if ($cached_line_type) { | |
16785 | $file_writer_object->write_code_line( $cached_line_text . "\n" ); | |
16786 | $cached_line_type = 0; | |
16787 | $cached_line_text = ""; | |
16788 | } | |
16789 | } | |
16790 | else { | |
16791 | my_flush(); | |
16792 | } | |
16793 | } | |
16794 | ||
16795 | # This is the internal flush, which leaves the cache intact | |
16796 | sub my_flush { | |
16797 | ||
16798 | return if ( $maximum_line_index < 0 ); | |
16799 | ||
16800 | # handle a group of comment lines | |
16801 | if ( $group_type eq 'COMMENT' ) { | |
16802 | ||
16803 | VALIGN_DEBUG_FLAG_APPEND0 && do { | |
16804 | my ( $a, $b, $c ) = caller(); | |
16805 | ||
16806 | "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n"; | |
16807 | ||
16808 | }; | |
16809 | my $leading_space_count = $comment_leading_space_count; | |
16810 | my $leading_string = get_leading_string($leading_space_count); | |
16811 | ||
16812 | # zero leading space count if any lines are too long | |
16813 | my $max_excess = 0; | |
16814 | for my $i ( 0 .. $maximum_line_index ) { | |
16815 | my $str = $group_lines[$i]; | |
16816 | my $excess = | |
16817 | length($str) + $leading_space_count - $rOpts_maximum_line_length; | |
16818 | if ( $excess > $max_excess ) { | |
16819 | $max_excess = $excess; | |
16820 | } | |
16821 | } | |
16822 | ||
16823 | if ( $max_excess > 0 ) { | |
16824 | $leading_space_count -= $max_excess; | |
16825 | if ( $leading_space_count < 0 ) { $leading_space_count = 0 } | |
16826 | $last_outdented_line_at = | |
16827 | $file_writer_object->get_output_line_number(); | |
16828 | unless ($outdented_line_count) { | |
16829 | $first_outdented_line_at = $last_outdented_line_at; | |
16830 | } | |
16831 | $outdented_line_count += ( $maximum_line_index + 1 ); | |
16832 | } | |
16833 | ||
16834 | # write the group of lines | |
16835 | my $outdent_long_lines = 0; | |
16836 | for my $i ( 0 .. $maximum_line_index ) { | |
16837 | write_leader_and_string( $leading_space_count, $group_lines[$i], 0, | |
16838 | $outdent_long_lines, "" ); | |
16839 | } | |
16840 | } | |
16841 | ||
16842 | # handle a group of code lines | |
16843 | else { | |
16844 | ||
16845 | VALIGN_DEBUG_FLAG_APPEND0 && do { | |
16846 | my $group_list_type = $group_lines[0]->get_list_type(); | |
16847 | my ( $a, $b, $c ) = caller(); | |
16848 | my $maximum_field_index = $group_lines[0]->get_jmax(); | |
16849 | ||
16850 | "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n"; | |
16851 | ||
16852 | }; | |
16853 | ||
16854 | # some small groups are best left unaligned | |
16855 | my $do_not_align = decide_if_aligned(); | |
16856 | ||
16857 | # optimize side comment location | |
16858 | $do_not_align = adjust_side_comment($do_not_align); | |
16859 | ||
16860 | # recover spaces for -lp option if possible | |
16861 | my $extra_leading_spaces = get_extra_leading_spaces(); | |
16862 | ||
16863 | # all lines of this group have the same basic leading spacing | |
16864 | my $group_leader_length = $group_lines[0]->get_leading_space_count(); | |
16865 | ||
16866 | # add extra leading spaces if helpful | |
16867 | my $min_ci_gap = | |
16868 | improve_continuation_indentation( $do_not_align, | |
16869 | $group_leader_length ); | |
16870 | ||
16871 | # loop to output all lines | |
16872 | for my $i ( 0 .. $maximum_line_index ) { | |
16873 | my $line = $group_lines[$i]; | |
16874 | write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align, | |
16875 | $group_leader_length, $extra_leading_spaces ); | |
16876 | } | |
16877 | } | |
16878 | initialize_for_new_group(); | |
16879 | } | |
16880 | ||
16881 | sub decide_if_aligned { | |
16882 | ||
16883 | # Do not try to align two lines which are not really similar | |
16884 | return unless $maximum_line_index == 1; | |
16885 | ||
16886 | my $group_list_type = $group_lines[0]->get_list_type(); | |
16887 | ||
16888 | my $do_not_align = ( | |
16889 | ||
16890 | # always align lists | |
16891 | !$group_list_type | |
16892 | ||
16893 | && ( | |
16894 | ||
16895 | # don't align if it was just a marginal match | |
16896 | $marginal_match | |
16897 | ||
16898 | # don't align two lines with big gap | |
16899 | || $group_maximum_gap > 12 | |
16900 | ||
16901 | # or lines with differing number of alignment tokens | |
16902 | || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen | |
16903 | ) | |
16904 | ); | |
16905 | ||
16906 | # But try to convert them into a simple comment group if the first line | |
16907 | # a has side comment | |
16908 | my $rfields = $group_lines[0]->get_rfields(); | |
16909 | my $maximum_field_index = $group_lines[0]->get_jmax(); | |
16910 | if ( $do_not_align | |
16911 | && ( $maximum_line_index > 0 ) | |
16912 | && ( length( $$rfields[$maximum_field_index] ) > 0 ) ) | |
16913 | { | |
16914 | combine_fields(); | |
16915 | $do_not_align = 0; | |
16916 | } | |
16917 | return $do_not_align; | |
16918 | } | |
16919 | ||
16920 | sub adjust_side_comment { | |
16921 | ||
16922 | my $do_not_align = shift; | |
16923 | ||
16924 | # let's see if we can move the side comment field out a little | |
16925 | # to improve readability (the last field is always a side comment field) | |
16926 | my $have_side_comment = 0; | |
16927 | my $first_side_comment_line = -1; | |
16928 | my $maximum_field_index = $group_lines[0]->get_jmax(); | |
16929 | for my $i ( 0 .. $maximum_line_index ) { | |
16930 | my $line = $group_lines[$i]; | |
16931 | ||
16932 | if ( length( $line->get_rfields()->[$maximum_field_index] ) ) { | |
16933 | $have_side_comment = 1; | |
16934 | $first_side_comment_line = $i; | |
16935 | last; | |
16936 | } | |
16937 | } | |
16938 | ||
16939 | my $kmax = $maximum_field_index + 1; | |
16940 | ||
16941 | if ($have_side_comment) { | |
16942 | ||
16943 | my $line = $group_lines[0]; | |
16944 | ||
16945 | # the maximum space without exceeding the line length: | |
16946 | my $avail = $line->get_available_space_on_right(); | |
16947 | ||
16948 | # try to use the previous comment column | |
16949 | my $side_comment_column = $line->get_column( $kmax - 2 ); | |
16950 | my $move = $last_comment_column - $side_comment_column; | |
16951 | ||
16952 | ## my $sc_line0 = $side_comment_history[0]->[0]; | |
16953 | ## my $sc_col0 = $side_comment_history[0]->[1]; | |
16954 | ## my $sc_line1 = $side_comment_history[1]->[0]; | |
16955 | ## my $sc_col1 = $side_comment_history[1]->[1]; | |
16956 | ## my $sc_line2 = $side_comment_history[2]->[0]; | |
16957 | ## my $sc_col2 = $side_comment_history[2]->[1]; | |
16958 | ## | |
16959 | ## # FUTURE UPDATES: | |
16960 | ## # Be sure to ignore 'do not align' and '} # end comments' | |
16961 | ## # Find first $move > 0 and $move <= $avail as follows: | |
16962 | ## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12 | |
16963 | ## # 2. try sc_col2 if (line-sc_line2) < 12 | |
16964 | ## # 3. try min possible space, plus up to 8, | |
16965 | ## # 4. try min possible space | |
16966 | ||
16967 | if ( $kmax > 0 && !$do_not_align ) { | |
16968 | ||
16969 | # but if this doesn't work, give up and use the minimum space | |
16970 | if ( $move > $avail ) { | |
16971 | $move = $rOpts_minimum_space_to_comment - 1; | |
16972 | } | |
16973 | ||
16974 | # but we want some minimum space to the comment | |
16975 | my $min_move = $rOpts_minimum_space_to_comment - 1; | |
16976 | if ( $move >= 0 | |
16977 | && $last_side_comment_length > 0 | |
16978 | && ( $first_side_comment_line == 0 ) | |
16979 | && $group_level == $last_group_level_written ) | |
16980 | { | |
16981 | $min_move = 0; | |
16982 | } | |
16983 | ||
16984 | if ( $move < $min_move ) { | |
16985 | $move = $min_move; | |
16986 | } | |
16987 | ||
16988 | # prevously, an upper bound was placed on $move here, | |
16989 | # (maximum_space_to_comment), but it was not helpful | |
16990 | ||
16991 | # don't exceed the available space | |
16992 | if ( $move > $avail ) { $move = $avail } | |
16993 | ||
16994 | # we can only increase space, never decrease | |
16995 | if ( $move > 0 ) { | |
16996 | $line->increase_field_width( $maximum_field_index - 1, $move ); | |
16997 | } | |
16998 | ||
16999 | # remember this column for the next group | |
17000 | $last_comment_column = $line->get_column( $kmax - 2 ); | |
17001 | } | |
17002 | else { | |
17003 | ||
17004 | # try to at least line up the existing side comment location | |
17005 | if ( $kmax > 0 && $move > 0 && $move < $avail ) { | |
17006 | $line->increase_field_width( $maximum_field_index - 1, $move ); | |
17007 | $do_not_align = 0; | |
17008 | } | |
17009 | ||
17010 | # reset side comment column if we can't align | |
17011 | else { | |
17012 | forget_side_comment(); | |
17013 | } | |
17014 | } | |
17015 | } | |
17016 | return $do_not_align; | |
17017 | } | |
17018 | ||
17019 | sub improve_continuation_indentation { | |
17020 | my ( $do_not_align, $group_leader_length ) = @_; | |
17021 | ||
17022 | # See if we can increase the continuation indentation | |
17023 | # to move all continuation lines closer to the next field | |
17024 | # (unless it is a comment). | |
17025 | # | |
17026 | # '$min_ci_gap'is the extra indentation that we may need to introduce. | |
17027 | # We will only introduce this to fields which already have some ci. | |
17028 | # Without this variable, we would occasionally get something like this | |
17029 | # (Complex.pm): | |
17030 | # | |
17031 | # use overload '+' => \&plus, | |
17032 | # '-' => \&minus, | |
17033 | # '*' => \&multiply, | |
17034 | # ... | |
17035 | # 'tan' => \&tan, | |
17036 | # 'atan2' => \&atan2, | |
17037 | # | |
17038 | # Whereas with this variable, we can shift variables over to get this: | |
17039 | # | |
17040 | # use overload '+' => \&plus, | |
17041 | # '-' => \&minus, | |
17042 | # '*' => \&multiply, | |
17043 | # ... | |
17044 | # 'tan' => \&tan, | |
17045 | # 'atan2' => \&atan2, | |
17046 | ||
17047 | ## BUB: Deactivated#################### | |
17048 | # The trouble with this patch is that it may, for example, | |
17049 | # move in some 'or's or ':'s, and leave some out, so that the | |
17050 | # left edge alignment suffers. | |
17051 | return 0; | |
17052 | ########################################### | |
17053 | ||
17054 | my $maximum_field_index = $group_lines[0]->get_jmax(); | |
17055 | ||
17056 | my $min_ci_gap = $rOpts_maximum_line_length; | |
17057 | if ( $maximum_field_index > 1 && !$do_not_align ) { | |
17058 | ||
17059 | for my $i ( 0 .. $maximum_line_index ) { | |
17060 | my $line = $group_lines[$i]; | |
17061 | my $leading_space_count = $line->get_leading_space_count(); | |
17062 | my $rfields = $line->get_rfields(); | |
17063 | ||
17064 | my $gap = $line->get_column(0) - $leading_space_count - | |
17065 | length( $$rfields[0] ); | |
17066 | ||
17067 | if ( $leading_space_count > $group_leader_length ) { | |
17068 | if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap } | |
17069 | } | |
17070 | } | |
17071 | ||
17072 | if ( $min_ci_gap >= $rOpts_maximum_line_length ) { | |
17073 | $min_ci_gap = 0; | |
17074 | } | |
17075 | } | |
17076 | else { | |
17077 | $min_ci_gap = 0; | |
17078 | } | |
17079 | return $min_ci_gap; | |
17080 | } | |
17081 | ||
17082 | sub write_vertically_aligned_line { | |
17083 | ||
17084 | my ( $line, $min_ci_gap, $do_not_align, $group_leader_length, | |
17085 | $extra_leading_spaces ) | |
17086 | = @_; | |
17087 | my $rfields = $line->get_rfields(); | |
17088 | my $leading_space_count = $line->get_leading_space_count(); | |
17089 | my $outdent_long_lines = $line->get_outdent_long_lines(); | |
17090 | my $maximum_field_index = $line->get_jmax(); | |
17091 | my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags(); | |
17092 | ||
17093 | # add any extra spaces | |
17094 | if ( $leading_space_count > $group_leader_length ) { | |
17095 | $leading_space_count += $min_ci_gap; | |
17096 | } | |
17097 | ||
17098 | my $str = $$rfields[0]; | |
17099 | ||
17100 | # loop to concatenate all fields of this line and needed padding | |
17101 | my $total_pad_count = 0; | |
17102 | my ( $j, $pad ); | |
17103 | for $j ( 1 .. $maximum_field_index ) { | |
17104 | ||
17105 | # skip zero-length side comments | |
17106 | last | |
17107 | if ( ( $j == $maximum_field_index ) | |
17108 | && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) ) | |
17109 | ); | |
17110 | ||
17111 | # compute spaces of padding before this field | |
17112 | my $col = $line->get_column( $j - 1 ); | |
17113 | $pad = $col - ( length($str) + $leading_space_count ); | |
17114 | ||
17115 | if ($do_not_align) { | |
17116 | $pad = | |
17117 | ( $j < $maximum_field_index ) | |
17118 | ? 0 | |
17119 | : $rOpts_minimum_space_to_comment - 1; | |
17120 | } | |
17121 | ||
17122 | # accumulate the padding | |
17123 | if ( $pad > 0 ) { $total_pad_count += $pad; } | |
17124 | ||
17125 | # add this field | |
17126 | if ( !defined $$rfields[$j] ) { | |
17127 | write_diagnostics("UNDEFined field at j=$j\n"); | |
17128 | } | |
17129 | ||
17130 | # only add padding when we have a finite field; | |
17131 | # this avoids extra terminal spaces if we have empty fields | |
17132 | if ( length( $$rfields[$j] ) > 0 ) { | |
17133 | $str .= ' ' x $total_pad_count; | |
17134 | $total_pad_count = 0; | |
17135 | $str .= $$rfields[$j]; | |
17136 | } | |
17137 | ||
17138 | # update side comment history buffer | |
17139 | if ( $j == $maximum_field_index ) { | |
17140 | my $lineno = $file_writer_object->get_output_line_number(); | |
17141 | shift @side_comment_history; | |
17142 | push @side_comment_history, [ $lineno, $col ]; | |
17143 | } | |
17144 | } | |
17145 | ||
17146 | my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) ); | |
17147 | ||
17148 | # ship this line off | |
17149 | write_leader_and_string( $leading_space_count + $extra_leading_spaces, | |
17150 | $str, $side_comment_length, $outdent_long_lines, | |
17151 | $rvertical_tightness_flags ); | |
17152 | } | |
17153 | ||
17154 | sub get_extra_leading_spaces { | |
17155 | ||
17156 | #---------------------------------------------------------- | |
17157 | # Define any extra indentation space (for the -lp option). | |
17158 | # Here is why: | |
17159 | # If a list has side comments, sub scan_list must dump the | |
17160 | # list before it sees everything. When this happens, it sets | |
17161 | # the indentation to the standard scheme, but notes how | |
17162 | # many spaces it would have liked to use. We may be able | |
17163 | # to recover that space here in the event that that all of the | |
17164 | # lines of a list are back together again. | |
17165 | #---------------------------------------------------------- | |
17166 | ||
17167 | my $extra_leading_spaces = 0; | |
17168 | if ($extra_indent_ok) { | |
17169 | my $object = $group_lines[0]->get_indentation(); | |
17170 | if ( ref($object) ) { | |
17171 | my $extra_indentation_spaces_wanted = | |
17172 | get_RECOVERABLE_SPACES($object); | |
17173 | ||
17174 | # all indentation objects must be the same | |
17175 | my $i; | |
17176 | for $i ( 1 .. $maximum_line_index ) { | |
17177 | if ( $object != $group_lines[$i]->get_indentation() ) { | |
17178 | $extra_indentation_spaces_wanted = 0; | |
17179 | last; | |
17180 | } | |
17181 | } | |
17182 | ||
17183 | if ($extra_indentation_spaces_wanted) { | |
17184 | ||
17185 | # the maximum space without exceeding the line length: | |
17186 | my $avail = $group_lines[0]->get_available_space_on_right(); | |
17187 | $extra_leading_spaces = | |
17188 | ( $avail > $extra_indentation_spaces_wanted ) | |
17189 | ? $extra_indentation_spaces_wanted | |
17190 | : $avail; | |
17191 | ||
17192 | # update the indentation object because with -icp the terminal | |
17193 | # ');' will use the same adjustment. | |
17194 | $object->permanently_decrease_AVAILABLE_SPACES( | |
17195 | -$extra_leading_spaces ); | |
17196 | } | |
17197 | } | |
17198 | } | |
17199 | return $extra_leading_spaces; | |
17200 | } | |
17201 | ||
17202 | sub combine_fields { | |
17203 | ||
17204 | # combine all fields except for the comment field ( sidecmt.t ) | |
17205 | my ( $j, $k ); | |
17206 | my $maximum_field_index = $group_lines[0]->get_jmax(); | |
17207 | for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) { | |
17208 | my $line = $group_lines[$j]; | |
17209 | my $rfields = $line->get_rfields(); | |
17210 | foreach ( 1 .. $maximum_field_index - 1 ) { | |
17211 | $$rfields[0] .= $$rfields[$_]; | |
17212 | } | |
17213 | $$rfields[1] = $$rfields[$maximum_field_index]; | |
17214 | ||
17215 | $line->set_jmax(1); | |
17216 | $line->set_column( 0, 0 ); | |
17217 | $line->set_column( 1, 0 ); | |
17218 | ||
17219 | } | |
17220 | $maximum_field_index = 1; | |
17221 | ||
17222 | for $j ( 0 .. $maximum_line_index ) { | |
17223 | my $line = $group_lines[$j]; | |
17224 | my $rfields = $line->get_rfields(); | |
17225 | for $k ( 0 .. $maximum_field_index ) { | |
17226 | my $pad = length( $$rfields[$k] ) - $line->current_field_width($k); | |
17227 | if ( $k == 0 ) { | |
17228 | $pad += $group_lines[$j]->get_leading_space_count(); | |
17229 | } | |
17230 | ||
17231 | if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) } | |
17232 | ||
17233 | } | |
17234 | } | |
17235 | } | |
17236 | ||
17237 | sub get_output_line_number { | |
17238 | ||
17239 | # the output line number reported to a caller is the number of items | |
17240 | # written plus the number of items in the buffer | |
17241 | my $self = shift; | |
17242 | 1 + $maximum_line_index + $file_writer_object->get_output_line_number(); | |
17243 | } | |
17244 | ||
17245 | sub write_leader_and_string { | |
17246 | ||
17247 | my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines, | |
17248 | $rvertical_tightness_flags ) | |
17249 | = @_; | |
17250 | ||
17251 | my $leading_string = get_leading_string($leading_space_count); | |
17252 | ||
17253 | # handle outdenting of long lines: | |
17254 | if ($outdent_long_lines) { | |
17255 | my $excess = | |
17256 | length($str) - $side_comment_length + $leading_space_count - | |
17257 | $rOpts_maximum_line_length; | |
17258 | if ( $excess > 0 ) { | |
17259 | $leading_string = ""; | |
17260 | $last_outdented_line_at = | |
17261 | $file_writer_object->get_output_line_number(); | |
17262 | ||
17263 | unless ($outdented_line_count) { | |
17264 | $first_outdented_line_at = $last_outdented_line_at; | |
17265 | } | |
17266 | $outdented_line_count++; | |
17267 | } | |
17268 | } | |
17269 | ||
17270 | # Unpack any recombination data; it was packed by | |
17271 | # sub send_lines_to_vertical_aligner. Contents: | |
17272 | # | |
17273 | # [0] type: 1=opening 2=closing 3=opening block brace | |
17274 | # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok | |
17275 | # if closing: spaces of padding to use | |
17276 | # [2] sequence number of container | |
17277 | # [3] valid flag: do not append if this flag is false | |
17278 | # | |
17279 | my ( $open_or_close, $tightness_flag, $seqno, $valid ); | |
17280 | if ($rvertical_tightness_flags) { | |
17281 | ( $open_or_close, $tightness_flag, $seqno, $valid ) = | |
17282 | @{$rvertical_tightness_flags}; | |
17283 | } | |
17284 | ||
17285 | # handle any cached line .. | |
17286 | # either append this line to it or write it out | |
17287 | if ($cached_line_text) { | |
17288 | ||
17289 | if ( !$cached_line_valid ) { | |
17290 | $file_writer_object->write_code_line( $cached_line_text . "\n" ); | |
17291 | } | |
17292 | ||
17293 | # handle cached line with opening container token | |
17294 | elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) { | |
17295 | ||
17296 | my $gap = $leading_space_count - length($cached_line_text); | |
17297 | ||
17298 | # handle option of just one tight opening per line: | |
17299 | if ( $cached_line_flag == 1 ) { | |
17300 | if ( defined($open_or_close) && $open_or_close == 1 ) { | |
17301 | $gap = -1; | |
17302 | } | |
17303 | } | |
17304 | ||
17305 | if ( $gap >= 0 ) { | |
17306 | $leading_string = $cached_line_text . ' ' x $gap; | |
17307 | } | |
17308 | else { | |
17309 | $file_writer_object->write_code_line( | |
17310 | $cached_line_text . "\n" ); | |
17311 | } | |
17312 | } | |
17313 | ||
17314 | # handle cached line to place before this closing container token | |
17315 | else { | |
17316 | my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str; | |
17317 | ||
17318 | if ( length($test_line) <= $rOpts_maximum_line_length ) { | |
17319 | $str = $test_line; | |
17320 | $leading_string = ""; | |
17321 | } | |
17322 | else { | |
17323 | $file_writer_object->write_code_line( | |
17324 | $cached_line_text . "\n" ); | |
17325 | } | |
17326 | } | |
17327 | } | |
17328 | $cached_line_type = 0; | |
17329 | $cached_line_text = ""; | |
17330 | ||
17331 | my $line = $leading_string . $str; | |
17332 | ||
17333 | # write or cache this line | |
17334 | if ( !$rvertical_tightness_flags || $side_comment_length > 0 ) { | |
17335 | $file_writer_object->write_code_line( $line . "\n" ); | |
17336 | } | |
17337 | else { | |
17338 | $cached_line_text = $line; | |
17339 | $cached_line_type = $open_or_close; | |
17340 | $cached_line_flag = $tightness_flag; | |
17341 | $cached_seqno = $seqno; | |
17342 | $cached_line_valid = $valid; | |
17343 | } | |
17344 | ||
17345 | $last_group_level_written = $group_level; | |
17346 | $last_side_comment_length = $side_comment_length; | |
17347 | $extra_indent_ok = 0; | |
17348 | } | |
17349 | ||
17350 | { # begin get_leading_string | |
17351 | ||
17352 | my @leading_string_cache; | |
17353 | ||
17354 | sub get_leading_string { | |
17355 | ||
17356 | # define the leading whitespace string for this line.. | |
17357 | my $leading_whitespace_count = shift; | |
17358 | ||
17359 | # Handle case of zero whitespace, which includes multi-line quotes | |
17360 | # (which may have a finite level; this prevents tab problems) | |
17361 | if ( $leading_whitespace_count <= 0 ) { | |
17362 | return ""; | |
17363 | } | |
17364 | ||
17365 | # look for previous result | |
17366 | elsif ( $leading_string_cache[$leading_whitespace_count] ) { | |
17367 | return $leading_string_cache[$leading_whitespace_count]; | |
17368 | } | |
17369 | ||
17370 | # must compute a string for this number of spaces | |
17371 | my $leading_string; | |
17372 | ||
17373 | # Handle simple case of no tabs | |
17374 | if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) | |
17375 | || $rOpts_indent_columns <= 0 ) | |
17376 | { | |
17377 | $leading_string = ( ' ' x $leading_whitespace_count ); | |
17378 | } | |
17379 | ||
17380 | # Handle entab option | |
17381 | elsif ($rOpts_entab_leading_whitespace) { | |
17382 | my $space_count = | |
17383 | $leading_whitespace_count % $rOpts_entab_leading_whitespace; | |
17384 | my $tab_count = | |
17385 | int( | |
17386 | $leading_whitespace_count / $rOpts_entab_leading_whitespace ); | |
17387 | $leading_string = "\t" x $tab_count . ' ' x $space_count; | |
17388 | } | |
17389 | ||
17390 | # Handle option of one tab per level | |
17391 | else { | |
17392 | $leading_string = ( "\t" x $group_level ); | |
17393 | my $space_count = | |
17394 | $leading_whitespace_count - $group_level * $rOpts_indent_columns; | |
17395 | ||
17396 | # shouldn't happen: | |
17397 | if ( $space_count < 0 ) { | |
17398 | warning( | |
17399 | "Error in append_line: for level=$group_level count=$leading_whitespace_count\n" | |
17400 | ); | |
17401 | $leading_string = ( ' ' x $leading_whitespace_count ); | |
17402 | } | |
17403 | else { | |
17404 | $leading_string .= ( ' ' x $space_count ); | |
17405 | } | |
17406 | } | |
17407 | $leading_string_cache[$leading_whitespace_count] = $leading_string; | |
17408 | return $leading_string; | |
17409 | } | |
17410 | } # end get_leading_string | |
17411 | ||
17412 | sub report_anything_unusual { | |
17413 | my $self = shift; | |
17414 | if ( $outdented_line_count > 0 ) { | |
17415 | write_logfile_entry( | |
17416 | "$outdented_line_count long lines were outdented:\n"); | |
17417 | write_logfile_entry( | |
17418 | " First at output line $first_outdented_line_at\n"); | |
17419 | ||
17420 | if ( $outdented_line_count > 1 ) { | |
17421 | write_logfile_entry( | |
17422 | " Last at output line $last_outdented_line_at\n"); | |
17423 | } | |
17424 | write_logfile_entry( | |
17425 | " use -noll to prevent outdenting, -l=n to increase line length\n" | |
17426 | ); | |
17427 | write_logfile_entry("\n"); | |
17428 | } | |
17429 | } | |
17430 | ||
17431 | ##################################################################### | |
17432 | # | |
17433 | # the Perl::Tidy::FileWriter class writes the output file | |
17434 | # | |
17435 | ##################################################################### | |
17436 | ||
17437 | package Perl::Tidy::FileWriter; | |
17438 | ||
17439 | # Maximum number of little messages; probably need not be changed. | |
17440 | use constant MAX_NAG_MESSAGES => 6; | |
17441 | ||
17442 | sub write_logfile_entry { | |
17443 | my $self = shift; | |
17444 | my $logger_object = $self->{_logger_object}; | |
17445 | if ($logger_object) { | |
17446 | $logger_object->write_logfile_entry(@_); | |
17447 | } | |
17448 | } | |
17449 | ||
17450 | sub new { | |
17451 | my $class = shift; | |
17452 | my ( $line_sink_object, $rOpts, $logger_object ) = @_; | |
17453 | ||
17454 | bless { | |
17455 | _line_sink_object => $line_sink_object, | |
17456 | _logger_object => $logger_object, | |
17457 | _rOpts => $rOpts, | |
17458 | _output_line_number => 1, | |
17459 | _consecutive_blank_lines => 0, | |
17460 | _consecutive_nonblank_lines => 0, | |
17461 | _first_line_length_error => 0, | |
17462 | _max_line_length_error => 0, | |
17463 | _last_line_length_error => 0, | |
17464 | _first_line_length_error_at => 0, | |
17465 | _max_line_length_error_at => 0, | |
17466 | _last_line_length_error_at => 0, | |
17467 | _line_length_error_count => 0, | |
17468 | _max_output_line_length => 0, | |
17469 | _max_output_line_length_at => 0, | |
17470 | }, $class; | |
17471 | } | |
17472 | ||
17473 | sub tee_on { | |
17474 | my $self = shift; | |
17475 | $self->{_line_sink_object}->tee_on(); | |
17476 | } | |
17477 | ||
17478 | sub tee_off { | |
17479 | my $self = shift; | |
17480 | $self->{_line_sink_object}->tee_off(); | |
17481 | } | |
17482 | ||
17483 | sub get_output_line_number { | |
17484 | my $self = shift; | |
17485 | return $self->{_output_line_number}; | |
17486 | } | |
17487 | ||
17488 | sub decrement_output_line_number { | |
17489 | my $self = shift; | |
17490 | $self->{_output_line_number}--; | |
17491 | } | |
17492 | ||
17493 | sub get_consecutive_nonblank_lines { | |
17494 | my $self = shift; | |
17495 | return $self->{_consecutive_nonblank_lines}; | |
17496 | } | |
17497 | ||
17498 | sub reset_consecutive_blank_lines { | |
17499 | my $self = shift; | |
17500 | $self->{_consecutive_blank_lines} = 0; | |
17501 | } | |
17502 | ||
17503 | sub want_blank_line { | |
17504 | my $self = shift; | |
17505 | unless ( $self->{_consecutive_blank_lines} ) { | |
17506 | $self->write_blank_code_line(); | |
17507 | } | |
17508 | } | |
17509 | ||
17510 | sub write_blank_code_line { | |
17511 | my $self = shift; | |
17512 | my $rOpts = $self->{_rOpts}; | |
17513 | return | |
17514 | if ( $self->{_consecutive_blank_lines} >= | |
17515 | $rOpts->{'maximum-consecutive-blank-lines'} ); | |
17516 | $self->{_consecutive_blank_lines}++; | |
17517 | $self->{_consecutive_nonblank_lines} = 0; | |
17518 | $self->write_line("\n"); | |
17519 | } | |
17520 | ||
17521 | sub write_code_line { | |
17522 | my $self = shift; | |
17523 | my $a = shift; | |
17524 | ||
17525 | if ( $a =~ /^\s*$/ ) { | |
17526 | my $rOpts = $self->{_rOpts}; | |
17527 | return | |
17528 | if ( $self->{_consecutive_blank_lines} >= | |
17529 | $rOpts->{'maximum-consecutive-blank-lines'} ); | |
17530 | $self->{_consecutive_blank_lines}++; | |
17531 | $self->{_consecutive_nonblank_lines} = 0; | |
17532 | } | |
17533 | else { | |
17534 | $self->{_consecutive_blank_lines} = 0; | |
17535 | $self->{_consecutive_nonblank_lines}++; | |
17536 | } | |
17537 | $self->write_line($a); | |
17538 | } | |
17539 | ||
17540 | sub write_line { | |
17541 | my $self = shift; | |
17542 | my $a = shift; | |
17543 | ||
17544 | # TODO: go through and see if the test is necessary here | |
17545 | if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; } | |
17546 | ||
17547 | $self->{_line_sink_object}->write_line($a); | |
17548 | ||
17549 | # This calculation of excess line length ignores any internal tabs | |
17550 | my $rOpts = $self->{_rOpts}; | |
17551 | my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1; | |
17552 | if ( $a =~ /^\t+/g ) { | |
17553 | $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 ); | |
17554 | } | |
17555 | ||
17556 | # Note that we just incremented output line number to future value | |
17557 | # so we must subtract 1 for current line number | |
17558 | if ( length($a) > 1 + $self->{_max_output_line_length} ) { | |
17559 | $self->{_max_output_line_length} = length($a) - 1; | |
17560 | $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1; | |
17561 | } | |
17562 | ||
17563 | if ( $exceed > 0 ) { | |
17564 | my $output_line_number = $self->{_output_line_number}; | |
17565 | $self->{_last_line_length_error} = $exceed; | |
17566 | $self->{_last_line_length_error_at} = $output_line_number - 1; | |
17567 | if ( $self->{_line_length_error_count} == 0 ) { | |
17568 | $self->{_first_line_length_error} = $exceed; | |
17569 | $self->{_first_line_length_error_at} = $output_line_number - 1; | |
17570 | } | |
17571 | ||
17572 | if ( | |
17573 | $self->{_last_line_length_error} > $self->{_max_line_length_error} ) | |
17574 | { | |
17575 | $self->{_max_line_length_error} = $exceed; | |
17576 | $self->{_max_line_length_error_at} = $output_line_number - 1; | |
17577 | } | |
17578 | ||
17579 | if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) { | |
17580 | $self->write_logfile_entry( | |
17581 | "Line length exceeded by $exceed characters\n"); | |
17582 | } | |
17583 | $self->{_line_length_error_count}++; | |
17584 | } | |
17585 | ||
17586 | } | |
17587 | ||
17588 | sub report_line_length_errors { | |
17589 | my $self = shift; | |
17590 | my $rOpts = $self->{_rOpts}; | |
17591 | my $line_length_error_count = $self->{_line_length_error_count}; | |
17592 | if ( $line_length_error_count == 0 ) { | |
17593 | $self->write_logfile_entry( | |
17594 | "No lines exceeded $rOpts->{'maximum-line-length'} characters\n"); | |
17595 | my $max_output_line_length = $self->{_max_output_line_length}; | |
17596 | my $max_output_line_length_at = $self->{_max_output_line_length_at}; | |
17597 | $self->write_logfile_entry( | |
17598 | " Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n" | |
17599 | ); | |
17600 | ||
17601 | } | |
17602 | else { | |
17603 | ||
17604 | my $word = ( $line_length_error_count > 1 ) ? "s" : ""; | |
17605 | $self->write_logfile_entry( | |
17606 | "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n" | |
17607 | ); | |
17608 | ||
17609 | $word = ( $line_length_error_count > 1 ) ? "First" : ""; | |
17610 | my $first_line_length_error = $self->{_first_line_length_error}; | |
17611 | my $first_line_length_error_at = $self->{_first_line_length_error_at}; | |
17612 | $self->write_logfile_entry( | |
17613 | " $word at line $first_line_length_error_at by $first_line_length_error characters\n" | |
17614 | ); | |
17615 | ||
17616 | if ( $line_length_error_count > 1 ) { | |
17617 | my $max_line_length_error = $self->{_max_line_length_error}; | |
17618 | my $max_line_length_error_at = $self->{_max_line_length_error_at}; | |
17619 | my $last_line_length_error = $self->{_last_line_length_error}; | |
17620 | my $last_line_length_error_at = $self->{_last_line_length_error_at}; | |
17621 | $self->write_logfile_entry( | |
17622 | " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n" | |
17623 | ); | |
17624 | $self->write_logfile_entry( | |
17625 | " Last at line $last_line_length_error_at by $last_line_length_error characters\n" | |
17626 | ); | |
17627 | } | |
17628 | } | |
17629 | } | |
17630 | ||
17631 | ##################################################################### | |
17632 | # | |
17633 | # The Perl::Tidy::Debugger class shows line tokenization | |
17634 | # | |
17635 | ##################################################################### | |
17636 | ||
17637 | package Perl::Tidy::Debugger; | |
17638 | ||
17639 | sub new { | |
17640 | ||
17641 | my ( $class, $filename ) = @_; | |
17642 | ||
17643 | bless { | |
17644 | _debug_file => $filename, | |
17645 | _debug_file_opened => 0, | |
17646 | _fh => undef, | |
17647 | }, $class; | |
17648 | } | |
17649 | ||
17650 | sub really_open_debug_file { | |
17651 | ||
17652 | my $self = shift; | |
17653 | my $debug_file = $self->{_debug_file}; | |
17654 | my $fh; | |
17655 | unless ( $fh = IO::File->new("> $debug_file") ) { | |
17656 | warn("can't open $debug_file: $!\n"); | |
17657 | } | |
17658 | $self->{_debug_file_opened} = 1; | |
17659 | $self->{_fh} = $fh; | |
17660 | print $fh | |
17661 | "Use -dump-token-types (-dtt) to get a list of token type codes\n"; | |
17662 | } | |
17663 | ||
17664 | sub close_debug_file { | |
17665 | ||
17666 | my $self = shift; | |
17667 | my $fh = $self->{_fh}; | |
17668 | if ( $self->{_debug_file_opened} ) { | |
17669 | ||
17670 | eval { $self->{_fh}->close() }; | |
17671 | } | |
17672 | } | |
17673 | ||
17674 | sub write_debug_entry { | |
17675 | ||
17676 | # This is a debug dump routine which may be modified as necessary | |
17677 | # to dump tokens on a line-by-line basis. The output will be written | |
17678 | # to the .DEBUG file when the -D flag is entered. | |
17679 | my $self = shift; | |
17680 | my $line_of_tokens = shift; | |
17681 | ||
17682 | my $input_line = $line_of_tokens->{_line_text}; | |
17683 | my $rtoken_type = $line_of_tokens->{_rtoken_type}; | |
17684 | my $rtokens = $line_of_tokens->{_rtokens}; | |
17685 | my $rlevels = $line_of_tokens->{_rlevels}; | |
17686 | my $rslevels = $line_of_tokens->{_rslevels}; | |
17687 | my $rblock_type = $line_of_tokens->{_rblock_type}; | |
17688 | my $input_line_number = $line_of_tokens->{_line_number}; | |
17689 | my $line_type = $line_of_tokens->{_line_type}; | |
17690 | ||
17691 | my ( $j, $num ); | |
17692 | ||
17693 | my $token_str = "$input_line_number: "; | |
17694 | my $reconstructed_original = "$input_line_number: "; | |
17695 | my $block_str = "$input_line_number: "; | |
17696 | ||
17697 | #$token_str .= "$line_type: "; | |
17698 | #$reconstructed_original .= "$line_type: "; | |
17699 | ||
17700 | my $pattern = ""; | |
17701 | my @next_char = ( '"', '"' ); | |
17702 | my $i_next = 0; | |
17703 | unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() } | |
17704 | my $fh = $self->{_fh}; | |
17705 | ||
17706 | for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { | |
17707 | ||
17708 | # testing patterns | |
17709 | if ( $$rtoken_type[$j] eq 'k' ) { | |
17710 | $pattern .= $$rtokens[$j]; | |
17711 | } | |
17712 | else { | |
17713 | $pattern .= $$rtoken_type[$j]; | |
17714 | } | |
17715 | $reconstructed_original .= $$rtokens[$j]; | |
17716 | $block_str .= "($$rblock_type[$j])"; | |
17717 | $num = length( $$rtokens[$j] ); | |
17718 | my $type_str = $$rtoken_type[$j]; | |
17719 | ||
17720 | # be sure there are no blank tokens (shouldn't happen) | |
17721 | # This can only happen if a programming error has been made | |
17722 | # because all valid tokens are non-blank | |
17723 | if ( $type_str eq ' ' ) { | |
17724 | print $fh "BLANK TOKEN on the next line\n"; | |
17725 | $type_str = $next_char[$i_next]; | |
17726 | $i_next = 1 - $i_next; | |
17727 | } | |
17728 | ||
17729 | if ( length($type_str) == 1 ) { | |
17730 | $type_str = $type_str x $num; | |
17731 | } | |
17732 | $token_str .= $type_str; | |
17733 | } | |
17734 | ||
17735 | # Write what you want here ... | |
17736 | # print $fh "$input_line\n"; | |
17737 | # print $fh "$pattern\n"; | |
17738 | print $fh "$reconstructed_original\n"; | |
17739 | print $fh "$token_str\n"; | |
17740 | ||
17741 | #print $fh "$block_str\n"; | |
17742 | } | |
17743 | ||
17744 | ##################################################################### | |
17745 | # | |
17746 | # The Perl::Tidy::LineBuffer class supplies a 'get_line()' | |
17747 | # method for returning the next line to be parsed, as well as a | |
17748 | # 'peek_ahead()' method | |
17749 | # | |
17750 | # The input parameter is an object with a 'get_line()' method | |
17751 | # which returns the next line to be parsed | |
17752 | # | |
17753 | ##################################################################### | |
17754 | ||
17755 | package Perl::Tidy::LineBuffer; | |
17756 | ||
17757 | sub new { | |
17758 | ||
17759 | my $class = shift; | |
17760 | my $line_source_object = shift; | |
17761 | ||
17762 | return bless { | |
17763 | _line_source_object => $line_source_object, | |
17764 | _rlookahead_buffer => [], | |
17765 | }, $class; | |
17766 | } | |
17767 | ||
17768 | sub peek_ahead { | |
17769 | my $self = shift; | |
17770 | my $buffer_index = shift; | |
17771 | my $line = undef; | |
17772 | my $line_source_object = $self->{_line_source_object}; | |
17773 | my $rlookahead_buffer = $self->{_rlookahead_buffer}; | |
17774 | if ( $buffer_index < scalar(@$rlookahead_buffer) ) { | |
17775 | $line = $$rlookahead_buffer[$buffer_index]; | |
17776 | } | |
17777 | else { | |
17778 | $line = $line_source_object->get_line(); | |
17779 | push( @$rlookahead_buffer, $line ); | |
17780 | } | |
17781 | return $line; | |
17782 | } | |
17783 | ||
17784 | sub get_line { | |
17785 | my $self = shift; | |
17786 | my $line = undef; | |
17787 | my $line_source_object = $self->{_line_source_object}; | |
17788 | my $rlookahead_buffer = $self->{_rlookahead_buffer}; | |
17789 | ||
17790 | if ( scalar(@$rlookahead_buffer) ) { | |
17791 | $line = shift @$rlookahead_buffer; | |
17792 | } | |
17793 | else { | |
17794 | $line = $line_source_object->get_line(); | |
17795 | } | |
17796 | return $line; | |
17797 | } | |
17798 | ||
17799 | ######################################################################## | |
17800 | # | |
17801 | # the Perl::Tidy::Tokenizer package is essentially a filter which | |
17802 | # reads lines of perl source code from a source object and provides | |
17803 | # corresponding tokenized lines through its get_line() method. Lines | |
17804 | # flow from the source_object to the caller like this: | |
17805 | # | |
17806 | # source_object --> LineBuffer_object --> Tokenizer --> calling routine | |
17807 | # get_line() get_line() get_line() line_of_tokens | |
17808 | # | |
17809 | # The source object can be any object with a get_line() method which | |
17810 | # supplies one line (a character string) perl call. | |
17811 | # The LineBuffer object is created by the Tokenizer. | |
17812 | # The Tokenizer returns a reference to a data structure 'line_of_tokens' | |
17813 | # containing one tokenized line for each call to its get_line() method. | |
17814 | # | |
17815 | # WARNING: This is not a real class yet. Only one tokenizer my be used. | |
17816 | # | |
17817 | ######################################################################## | |
17818 | ||
17819 | package Perl::Tidy::Tokenizer; | |
17820 | ||
17821 | BEGIN { | |
17822 | ||
17823 | # Caution: these debug flags produce a lot of output | |
17824 | # They should all be 0 except when debugging small scripts | |
17825 | ||
17826 | use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0; | |
17827 | use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0; | |
17828 | use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0; | |
17829 | use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0; | |
17830 | use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0; | |
17831 | ||
17832 | my $debug_warning = sub { | |
17833 | print "TOKENIZER_DEBUGGING with key $_[0]\n"; | |
17834 | }; | |
17835 | ||
17836 | TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT'); | |
17837 | TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN'); | |
17838 | TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE'); | |
17839 | TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID'); | |
17840 | TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE'); | |
17841 | ||
17842 | } | |
17843 | ||
17844 | use Carp; | |
17845 | use vars qw{ | |
17846 | $tokenizer_self | |
17847 | $level_in_tokenizer | |
17848 | $slevel_in_tokenizer | |
17849 | $nesting_token_string | |
17850 | $nesting_type_string | |
17851 | $nesting_block_string | |
17852 | $nesting_block_flag | |
17853 | $nesting_list_string | |
17854 | $nesting_list_flag | |
17855 | $saw_negative_indentation | |
17856 | $id_scan_state | |
17857 | $last_nonblank_token | |
17858 | $last_nonblank_type | |
17859 | $last_nonblank_block_type | |
17860 | $last_nonblank_container_type | |
17861 | $last_nonblank_type_sequence | |
17862 | $last_last_nonblank_token | |
17863 | $last_last_nonblank_type | |
17864 | $last_last_nonblank_block_type | |
17865 | $last_last_nonblank_container_type | |
17866 | $last_last_nonblank_type_sequence | |
17867 | $last_nonblank_prototype | |
17868 | $statement_type | |
17869 | $identifier | |
17870 | $in_quote | |
17871 | $quote_type | |
17872 | $quote_character | |
17873 | $quote_pos | |
17874 | $quote_depth | |
17875 | $allowed_quote_modifiers | |
17876 | $paren_depth | |
17877 | @paren_type | |
17878 | @paren_semicolon_count | |
17879 | @paren_structural_type | |
17880 | $brace_depth | |
17881 | @brace_type | |
17882 | @brace_structural_type | |
17883 | @brace_statement_type | |
17884 | @brace_context | |
17885 | @brace_package | |
17886 | $square_bracket_depth | |
17887 | @square_bracket_type | |
17888 | @square_bracket_structural_type | |
17889 | @depth_array | |
17890 | @starting_line_of_current_depth | |
17891 | @current_depth | |
17892 | @current_sequence_number | |
17893 | @nesting_sequence_number | |
17894 | @lower_case_labels_at | |
17895 | $saw_v_string | |
17896 | %is_constant | |
17897 | %is_user_function | |
17898 | %user_function_prototype | |
17899 | %saw_function_definition | |
17900 | $max_token_index | |
17901 | $peeked_ahead | |
17902 | $current_package | |
17903 | $unexpected_error_count | |
17904 | $input_line | |
17905 | $input_line_number | |
17906 | $rpretokens | |
17907 | $rpretoken_map | |
17908 | $rpretoken_type | |
17909 | $want_paren | |
17910 | $context | |
17911 | @slevel_stack | |
17912 | $ci_string_in_tokenizer | |
17913 | $continuation_string_in_tokenizer | |
17914 | $in_statement_continuation | |
17915 | $started_looking_for_here_target_at | |
17916 | $nearly_matched_here_target_at | |
17917 | ||
17918 | %is_indirect_object_taker | |
17919 | %is_block_operator | |
17920 | %expecting_operator_token | |
17921 | %expecting_operator_types | |
17922 | %expecting_term_types | |
17923 | %expecting_term_token | |
17924 | %is_block_function | |
17925 | %is_block_list_function | |
17926 | %is_digraph | |
17927 | %is_file_test_operator | |
17928 | %is_trigraph | |
17929 | %is_valid_token_type | |
17930 | %is_keyword | |
17931 | %is_code_block_token | |
17932 | %really_want_term | |
17933 | @opening_brace_names | |
17934 | @closing_brace_names | |
17935 | %is_keyword_taking_list | |
17936 | %is_q_qq_qw_qx_qr_s_y_tr_m | |
17937 | }; | |
17938 | ||
17939 | # possible values of operator_expected() | |
17940 | use constant TERM => -1; | |
17941 | use constant UNKNOWN => 0; | |
17942 | use constant OPERATOR => 1; | |
17943 | ||
17944 | # possible values of context | |
17945 | use constant SCALAR_CONTEXT => -1; | |
17946 | use constant UNKNOWN_CONTEXT => 0; | |
17947 | use constant LIST_CONTEXT => 1; | |
17948 | ||
17949 | # Maximum number of little messages; probably need not be changed. | |
17950 | use constant MAX_NAG_MESSAGES => 6; | |
17951 | ||
17952 | { | |
17953 | ||
17954 | # methods to count instances | |
17955 | my $_count = 0; | |
17956 | sub get_count { $_count; } | |
17957 | sub _increment_count { ++$_count } | |
17958 | sub _decrement_count { --$_count } | |
17959 | } | |
17960 | ||
17961 | sub DESTROY { | |
17962 | $_[0]->_decrement_count(); | |
17963 | } | |
17964 | ||
17965 | sub new { | |
17966 | ||
17967 | my $class = shift; | |
17968 | ||
17969 | # Note: 'tabs' and 'indent_columns' are temporary and should be | |
17970 | # removed asap | |
17971 | my %defaults = ( | |
17972 | source_object => undef, | |
17973 | debugger_object => undef, | |
17974 | diagnostics_object => undef, | |
17975 | logger_object => undef, | |
17976 | starting_level => undef, | |
17977 | indent_columns => 4, | |
17978 | tabs => 0, | |
17979 | look_for_hash_bang => 0, | |
17980 | trim_qw => 1, | |
17981 | look_for_autoloader => 1, | |
17982 | look_for_selfloader => 1, | |
17983 | ); | |
17984 | my %args = ( %defaults, @_ ); | |
17985 | ||
17986 | # we are given an object with a get_line() method to supply source lines | |
17987 | my $source_object = $args{source_object}; | |
17988 | ||
17989 | # we create another object with a get_line() and peek_ahead() method | |
17990 | my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object); | |
17991 | ||
17992 | # Tokenizer state data is as follows: | |
17993 | # _rhere_target_list reference to list of here-doc targets | |
17994 | # _here_doc_target the target string for a here document | |
17995 | # _here_quote_character the type of here-doc quoting (" ' ` or none) | |
17996 | # to determine if interpolation is done | |
17997 | # _quote_target character we seek if chasing a quote | |
17998 | # _line_start_quote line where we started looking for a long quote | |
17999 | # _in_here_doc flag indicating if we are in a here-doc | |
18000 | # _in_pod flag set if we are in pod documentation | |
18001 | # _in_error flag set if we saw severe error (binary in script) | |
18002 | # _in_data flag set if we are in __DATA__ section | |
18003 | # _in_end flag set if we are in __END__ section | |
18004 | # _in_format flag set if we are in a format description | |
18005 | # _in_quote flag telling if we are chasing a quote | |
18006 | # _starting_level indentation level of first line | |
18007 | # _input_tabstr string denoting one indentation level of input file | |
18008 | # _know_input_tabstr flag indicating if we know _input_tabstr | |
18009 | # _line_buffer_object object with get_line() method to supply source code | |
18010 | # _diagnostics_object place to write debugging information | |
18011 | $tokenizer_self = { | |
18012 | _rhere_target_list => undef, | |
18013 | _in_here_doc => 0, | |
18014 | _here_doc_target => "", | |
18015 | _here_quote_character => "", | |
18016 | _in_data => 0, | |
18017 | _in_end => 0, | |
18018 | _in_format => 0, | |
18019 | _in_error => 0, | |
18020 | _in_pod => 0, | |
18021 | _in_quote => 0, | |
18022 | _quote_target => "", | |
18023 | _line_start_quote => -1, | |
18024 | _starting_level => $args{starting_level}, | |
18025 | _know_starting_level => defined( $args{starting_level} ), | |
18026 | _tabs => $args{tabs}, | |
18027 | _indent_columns => $args{indent_columns}, | |
18028 | _look_for_hash_bang => $args{look_for_hash_bang}, | |
18029 | _trim_qw => $args{trim_qw}, | |
18030 | _input_tabstr => "", | |
18031 | _know_input_tabstr => -1, | |
18032 | _last_line_number => 0, | |
18033 | _saw_perl_dash_P => 0, | |
18034 | _saw_perl_dash_w => 0, | |
18035 | _saw_use_strict => 0, | |
18036 | _look_for_autoloader => $args{look_for_autoloader}, | |
18037 | _look_for_selfloader => $args{look_for_selfloader}, | |
18038 | _saw_autoloader => 0, | |
18039 | _saw_selfloader => 0, | |
18040 | _saw_hash_bang => 0, | |
18041 | _saw_end => 0, | |
18042 | _saw_data => 0, | |
18043 | _saw_lc_filehandle => 0, | |
18044 | _started_tokenizing => 0, | |
18045 | _line_buffer_object => $line_buffer_object, | |
18046 | _debugger_object => $args{debugger_object}, | |
18047 | _diagnostics_object => $args{diagnostics_object}, | |
18048 | _logger_object => $args{logger_object}, | |
18049 | }; | |
18050 | ||
18051 | prepare_for_a_new_file(); | |
18052 | find_starting_indentation_level(); | |
18053 | ||
18054 | bless $tokenizer_self, $class; | |
18055 | ||
18056 | # This is not a full class yet, so die if an attempt is made to | |
18057 | # create more than one object. | |
18058 | ||
18059 | if ( _increment_count() > 1 ) { | |
18060 | confess | |
18061 | "Attempt to create more than 1 object in $class, which is not a true class yet\n"; | |
18062 | } | |
18063 | ||
18064 | return $tokenizer_self; | |
18065 | ||
18066 | } | |
18067 | ||
18068 | # interface to Perl::Tidy::Logger routines | |
18069 | sub warning { | |
18070 | my $logger_object = $tokenizer_self->{_logger_object}; | |
18071 | if ($logger_object) { | |
18072 | $logger_object->warning(@_); | |
18073 | } | |
18074 | } | |
18075 | ||
18076 | sub complain { | |
18077 | my $logger_object = $tokenizer_self->{_logger_object}; | |
18078 | if ($logger_object) { | |
18079 | $logger_object->complain(@_); | |
18080 | } | |
18081 | } | |
18082 | ||
18083 | sub write_logfile_entry { | |
18084 | my $logger_object = $tokenizer_self->{_logger_object}; | |
18085 | if ($logger_object) { | |
18086 | $logger_object->write_logfile_entry(@_); | |
18087 | } | |
18088 | } | |
18089 | ||
18090 | sub interrupt_logfile { | |
18091 | my $logger_object = $tokenizer_self->{_logger_object}; | |
18092 | if ($logger_object) { | |
18093 | $logger_object->interrupt_logfile(); | |
18094 | } | |
18095 | } | |
18096 | ||
18097 | sub resume_logfile { | |
18098 | my $logger_object = $tokenizer_self->{_logger_object}; | |
18099 | if ($logger_object) { | |
18100 | $logger_object->resume_logfile(); | |
18101 | } | |
18102 | } | |
18103 | ||
18104 | sub increment_brace_error { | |
18105 | my $logger_object = $tokenizer_self->{_logger_object}; | |
18106 | if ($logger_object) { | |
18107 | $logger_object->increment_brace_error(); | |
18108 | } | |
18109 | } | |
18110 | ||
18111 | sub report_definite_bug { | |
18112 | my $logger_object = $tokenizer_self->{_logger_object}; | |
18113 | if ($logger_object) { | |
18114 | $logger_object->report_definite_bug(); | |
18115 | } | |
18116 | } | |
18117 | ||
18118 | sub brace_warning { | |
18119 | my $logger_object = $tokenizer_self->{_logger_object}; | |
18120 | if ($logger_object) { | |
18121 | $logger_object->brace_warning(@_); | |
18122 | } | |
18123 | } | |
18124 | ||
18125 | sub get_saw_brace_error { | |
18126 | my $logger_object = $tokenizer_self->{_logger_object}; | |
18127 | if ($logger_object) { | |
18128 | $logger_object->get_saw_brace_error(); | |
18129 | } | |
18130 | else { | |
18131 | 0; | |
18132 | } | |
18133 | } | |
18134 | ||
18135 | # interface to Perl::Tidy::Diagnostics routines | |
18136 | sub write_diagnostics { | |
18137 | if ( $tokenizer_self->{_diagnostics_object} ) { | |
18138 | $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_); | |
18139 | } | |
18140 | } | |
18141 | ||
18142 | sub report_tokenization_errors { | |
18143 | ||
18144 | my $self = shift; | |
18145 | ||
18146 | my $level = get_indentation_level(); | |
18147 | if ( $level != $tokenizer_self->{_starting_level} ) { | |
18148 | warning("final indentation level: $level\n"); | |
18149 | } | |
18150 | ||
18151 | check_final_nesting_depths(); | |
18152 | ||
18153 | if ( $tokenizer_self->{_look_for_hash_bang} | |
18154 | && !$tokenizer_self->{_saw_hash_bang} ) | |
18155 | { | |
18156 | warning( | |
18157 | "hit EOF without seeing hash-bang line; maybe don't need -x?\n"); | |
18158 | } | |
18159 | ||
18160 | if ( $tokenizer_self->{_in_format} ) { | |
18161 | warning("hit EOF while in format description\n"); | |
18162 | } | |
18163 | ||
18164 | # this check may be removed after a year or so | |
18165 | if ( $tokenizer_self->{_saw_lc_filehandle} ) { | |
18166 | ||
18167 | warning( <<'EOM' ); | |
18168 | ------------------------------------------------------------------------ | |
18169 | PLEASE NOTE: If you get this message, it is because perltidy noticed | |
18170 | possible ambiguous syntax at one or more places in your script, as | |
18171 | noted above. The problem is with statements accepting indirect objects, | |
18172 | such as print and printf statements of the form | |
18173 | ||
18174 | print bareword ( $etc | |
18175 | ||
18176 | Perltidy needs your help in deciding if 'bareword' is a filehandle or a | |
18177 | function call. The problem is the space between 'bareword' and '('. If | |
18178 | 'bareword' is a function call, you should remove the trailing space. If | |
18179 | 'bareword' is a filehandle, you should avoid the opening paren or else | |
18180 | globally capitalize 'bareword' to be BAREWORD. So the above line | |
18181 | would be: | |
18182 | ||
18183 | print bareword( $etc # function | |
18184 | or | |
18185 | print bareword @list # filehandle | |
18186 | or | |
18187 | print BAREWORD ( $etc # filehandle | |
18188 | ||
18189 | If you want to keep the line as it is, and are sure it is correct, | |
18190 | you can use -w=0 to prevent this message. | |
18191 | ------------------------------------------------------------------------ | |
18192 | EOM | |
18193 | ||
18194 | } | |
18195 | ||
18196 | if ( $tokenizer_self->{_in_pod} ) { | |
18197 | ||
18198 | # Just write log entry if this is after __END__ or __DATA__ | |
18199 | # because this happens to often, and it is not likely to be | |
18200 | # a parsing error. | |
18201 | if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { | |
18202 | write_logfile_entry( | |
18203 | "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" | |
18204 | ); | |
18205 | } | |
18206 | ||
18207 | else { | |
18208 | complain( | |
18209 | "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" | |
18210 | ); | |
18211 | } | |
18212 | ||
18213 | } | |
18214 | ||
18215 | if ( $tokenizer_self->{_in_here_doc} ) { | |
18216 | my $here_doc_target = $tokenizer_self->{_here_doc_target}; | |
18217 | if ($here_doc_target) { | |
18218 | warning( | |
18219 | "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n" | |
18220 | ); | |
18221 | } | |
18222 | else { | |
18223 | warning( | |
18224 | "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n" | |
18225 | ); | |
18226 | } | |
18227 | if ($nearly_matched_here_target_at) { | |
18228 | warning( | |
18229 | "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n" | |
18230 | ); | |
18231 | } | |
18232 | } | |
18233 | ||
18234 | if ( $tokenizer_self->{_in_quote} ) { | |
18235 | my $line_start_quote = $tokenizer_self->{_line_start_quote}; | |
18236 | my $quote_target = $tokenizer_self->{_quote_target}; | |
18237 | warning( | |
18238 | "hit EOF seeking end of quote/pattern starting at line $line_start_quote ending in $quote_target\n" | |
18239 | ); | |
18240 | } | |
18241 | ||
18242 | unless ( $tokenizer_self->{_saw_perl_dash_w} ) { | |
18243 | if ( $] < 5.006 ) { | |
18244 | write_logfile_entry("Suggest including '-w parameter'\n"); | |
18245 | } | |
18246 | else { | |
18247 | write_logfile_entry("Suggest including 'use warnings;'\n"); | |
18248 | } | |
18249 | } | |
18250 | ||
18251 | if ( $tokenizer_self->{_saw_perl_dash_P} ) { | |
18252 | write_logfile_entry("Use of -P parameter for defines is discouraged\n"); | |
18253 | } | |
18254 | ||
18255 | unless ( $tokenizer_self->{_saw_use_strict} ) { | |
18256 | write_logfile_entry("Suggest including 'use strict;'\n"); | |
18257 | } | |
18258 | ||
18259 | # it is suggested that lables have at least one upper case character | |
18260 | # for legibility and to avoid code breakage as new keywords are introduced | |
18261 | if (@lower_case_labels_at) { | |
18262 | my $num = @lower_case_labels_at; | |
18263 | write_logfile_entry( | |
18264 | "Suggest using upper case characters in label(s)\n"); | |
18265 | local $" = ')('; | |
18266 | write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n"); | |
18267 | } | |
18268 | } | |
18269 | ||
18270 | sub report_v_string { | |
18271 | ||
18272 | # warn if this version can't handle v-strings | |
18273 | my $tok = shift; | |
18274 | $saw_v_string = $input_line_number; | |
18275 | if ( $] < 5.006 ) { | |
18276 | warning( | |
18277 | "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n" | |
18278 | ); | |
18279 | } | |
18280 | } | |
18281 | ||
18282 | sub get_input_line_number { | |
18283 | return $tokenizer_self->{_last_line_number}; | |
18284 | } | |
18285 | ||
18286 | # returns the next tokenized line | |
18287 | sub get_line { | |
18288 | ||
18289 | my $self = shift; | |
18290 | ||
18291 | my $input_line = $tokenizer_self->{_line_buffer_object}->get_line(); | |
18292 | ||
18293 | return undef unless ($input_line); | |
18294 | ||
18295 | $tokenizer_self->{_last_line_number}++; | |
18296 | ||
18297 | # Find and remove what characters terminate this line, including any | |
18298 | # control r | |
18299 | my $input_line_separator = ""; | |
18300 | if ( chomp($input_line) ) { $input_line_separator = $/ } | |
18301 | ||
18302 | # TODO: what other characters should be included here? | |
18303 | if ( $input_line =~ s/((\r|\035|\032)+)$// ) { | |
18304 | $input_line_separator = $2 . $input_line_separator; | |
18305 | } | |
18306 | ||
18307 | # for backwards compatability we keep the line text terminated with | |
18308 | # a newline character | |
18309 | $input_line .= "\n"; | |
18310 | ||
18311 | my $input_line_number = $tokenizer_self->{_last_line_number}; | |
18312 | ||
18313 | # create a data structure describing this line which will be | |
18314 | # returned to the caller. | |
18315 | ||
18316 | # _line_type codes are: | |
18317 | # SYSTEM - system-specific code before hash-bang line | |
18318 | # CODE - line of perl code (including comments) | |
18319 | # POD_START - line starting pod, such as '=head' | |
18320 | # POD - pod documentation text | |
18321 | # POD_END - last line of pod section, '=cut' | |
18322 | # HERE - text of here-document | |
18323 | # HERE_END - last line of here-doc (target word) | |
18324 | # FORMAT - format section | |
18325 | # FORMAT_END - last line of format section, '.' | |
18326 | # DATA_START - __DATA__ line | |
18327 | # DATA - unidentified text following __DATA__ | |
18328 | # END_START - __END__ line | |
18329 | # END - unidentified text following __END__ | |
18330 | # ERROR - we are in big trouble, probably not a perl script | |
18331 | ||
18332 | # Other variables: | |
18333 | # _curly_brace_depth - depth of curly braces at start of line | |
18334 | # _square_bracket_depth - depth of square brackets at start of line | |
18335 | # _paren_depth - depth of parens at start of line | |
18336 | # _starting_in_quote - this line continues a multi-line quote | |
18337 | # (so don't trim leading blanks!) | |
18338 | # _ending_in_quote - this line ends in a multi-line quote | |
18339 | # (so don't trim trailing blanks!) | |
18340 | my $line_of_tokens = { | |
18341 | _line_type => 'EOF', | |
18342 | _line_text => $input_line, | |
18343 | _line_number => $input_line_number, | |
18344 | _rtoken_type => undef, | |
18345 | _rtokens => undef, | |
18346 | _rlevels => undef, | |
18347 | _rslevels => undef, | |
18348 | _rblock_type => undef, | |
18349 | _rcontainer_type => undef, | |
18350 | _rcontainer_environment => undef, | |
18351 | _rtype_sequence => undef, | |
18352 | _rnesting_tokens => undef, | |
18353 | _rci_levels => undef, | |
18354 | _rnesting_blocks => undef, | |
18355 | _python_indentation_level => -1, ## 0, | |
18356 | _starting_in_quote => | |
18357 | ( $tokenizer_self->{_in_quote} && ( $quote_type eq 'Q' ) ), | |
18358 | _ending_in_quote => 0, | |
18359 | _curly_brace_depth => $brace_depth, | |
18360 | _square_bracket_depth => $square_bracket_depth, | |
18361 | _paren_depth => $paren_depth, | |
18362 | _quote_character => '', | |
18363 | }; | |
18364 | ||
18365 | # must print line unchanged if we are in a here document | |
18366 | if ( $tokenizer_self->{_in_here_doc} ) { | |
18367 | ||
18368 | $line_of_tokens->{_line_type} = 'HERE'; | |
18369 | my $here_doc_target = $tokenizer_self->{_here_doc_target}; | |
18370 | my $here_quote_character = $tokenizer_self->{_here_quote_character}; | |
18371 | my $candidate_target = $input_line; | |
18372 | chomp $candidate_target; | |
18373 | if ( $candidate_target eq $here_doc_target ) { | |
18374 | $nearly_matched_here_target_at = undef; | |
18375 | $line_of_tokens->{_line_type} = 'HERE_END'; | |
18376 | write_logfile_entry("Exiting HERE document $here_doc_target\n"); | |
18377 | ||
18378 | my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; | |
18379 | if (@$rhere_target_list) { # there can be multiple here targets | |
18380 | ( $here_doc_target, $here_quote_character ) = | |
18381 | @{ shift @$rhere_target_list }; | |
18382 | $tokenizer_self->{_here_doc_target} = $here_doc_target; | |
18383 | $tokenizer_self->{_here_quote_character} = | |
18384 | $here_quote_character; | |
18385 | write_logfile_entry( | |
18386 | "Entering HERE document $here_doc_target\n"); | |
18387 | $nearly_matched_here_target_at = undef; | |
18388 | $started_looking_for_here_target_at = $input_line_number; | |
18389 | } | |
18390 | else { | |
18391 | $tokenizer_self->{_in_here_doc} = 0; | |
18392 | $tokenizer_self->{_here_doc_target} = ""; | |
18393 | $tokenizer_self->{_here_quote_character} = ""; | |
18394 | } | |
18395 | } | |
18396 | ||
18397 | # check for error of extra whitespace | |
18398 | # note for PERL6: leading whitespace is allowed | |
18399 | else { | |
18400 | $candidate_target =~ s/\s*$//; | |
18401 | $candidate_target =~ s/^\s*//; | |
18402 | if ( $candidate_target eq $here_doc_target ) { | |
18403 | $nearly_matched_here_target_at = $input_line_number; | |
18404 | } | |
18405 | } | |
18406 | return $line_of_tokens; | |
18407 | } | |
18408 | ||
18409 | # must print line unchanged if we are in a format section | |
18410 | elsif ( $tokenizer_self->{_in_format} ) { | |
18411 | ||
18412 | if ( $input_line =~ /^\.[\s#]*$/ ) { | |
18413 | write_logfile_entry("Exiting format section\n"); | |
18414 | $tokenizer_self->{_in_format} = 0; | |
18415 | $line_of_tokens->{_line_type} = 'FORMAT_END'; | |
18416 | } | |
18417 | else { | |
18418 | $line_of_tokens->{_line_type} = 'FORMAT'; | |
18419 | } | |
18420 | return $line_of_tokens; | |
18421 | } | |
18422 | ||
18423 | # must print line unchanged if we are in pod documentation | |
18424 | elsif ( $tokenizer_self->{_in_pod} ) { | |
18425 | ||
18426 | $line_of_tokens->{_line_type} = 'POD'; | |
18427 | if ( $input_line =~ /^=cut/ ) { | |
18428 | $line_of_tokens->{_line_type} = 'POD_END'; | |
18429 | write_logfile_entry("Exiting POD section\n"); | |
18430 | $tokenizer_self->{_in_pod} = 0; | |
18431 | } | |
18432 | if ( $input_line =~ /^\#\!.*perl\b/ ) { | |
18433 | warning("Hash-bang in pod can cause perl to fail! \n"); | |
18434 | } | |
18435 | ||
18436 | return $line_of_tokens; | |
18437 | } | |
18438 | ||
18439 | # must print line unchanged if we have seen a severe error (i.e., we | |
18440 | # are seeing illegal tokens and connot continue. Syntax errors do | |
18441 | # not pass this route). Calling routine can decide what to do, but | |
18442 | # the default can be to just pass all lines as if they were after __END__ | |
18443 | elsif ( $tokenizer_self->{_in_error} ) { | |
18444 | $line_of_tokens->{_line_type} = 'ERROR'; | |
18445 | return $line_of_tokens; | |
18446 | } | |
18447 | ||
18448 | # print line unchanged if we are __DATA__ section | |
18449 | elsif ( $tokenizer_self->{_in_data} ) { | |
18450 | ||
18451 | # ...but look for POD | |
18452 | # Note that the _in_data and _in_end flags remain set | |
18453 | # so that we return to that state after seeing the | |
18454 | # end of a pod section | |
18455 | if ( $input_line =~ /^=(?!cut)/ ) { | |
18456 | $line_of_tokens->{_line_type} = 'POD_START'; | |
18457 | write_logfile_entry("Entering POD section\n"); | |
18458 | $tokenizer_self->{_in_pod} = 1; | |
18459 | return $line_of_tokens; | |
18460 | } | |
18461 | else { | |
18462 | $line_of_tokens->{_line_type} = 'DATA'; | |
18463 | return $line_of_tokens; | |
18464 | } | |
18465 | } | |
18466 | ||
18467 | # print line unchanged if we are in __END__ section | |
18468 | elsif ( $tokenizer_self->{_in_end} ) { | |
18469 | ||
18470 | # ...but look for POD | |
18471 | # Note that the _in_data and _in_end flags remain set | |
18472 | # so that we return to that state after seeing the | |
18473 | # end of a pod section | |
18474 | if ( $input_line =~ /^=(?!cut)/ ) { | |
18475 | $line_of_tokens->{_line_type} = 'POD_START'; | |
18476 | write_logfile_entry("Entering POD section\n"); | |
18477 | $tokenizer_self->{_in_pod} = 1; | |
18478 | return $line_of_tokens; | |
18479 | } | |
18480 | else { | |
18481 | $line_of_tokens->{_line_type} = 'END'; | |
18482 | return $line_of_tokens; | |
18483 | } | |
18484 | } | |
18485 | ||
18486 | # check for a hash-bang line if we haven't seen one | |
18487 | if ( !$tokenizer_self->{_saw_hash_bang} ) { | |
18488 | if ( $input_line =~ /^\#\!.*perl\b/ ) { | |
18489 | $tokenizer_self->{_saw_hash_bang} = $input_line_number; | |
18490 | ||
18491 | # check for -w and -P flags | |
18492 | if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) { | |
18493 | $tokenizer_self->{_saw_perl_dash_P} = 1; | |
18494 | } | |
18495 | ||
18496 | if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) { | |
18497 | $tokenizer_self->{_saw_perl_dash_w} = 1; | |
18498 | } | |
18499 | ||
18500 | if ( ( $input_line_number > 1 ) | |
18501 | && ( !$tokenizer_self->{_look_for_hash_bang} ) ) | |
18502 | { | |
18503 | ||
18504 | # this is helpful for VMS systems; we may have accidentally | |
18505 | # tokenized some DCL commands | |
18506 | if ( $tokenizer_self->{_started_tokenizing} ) { | |
18507 | warning( | |
18508 | "There seems to be a hash-bang after line 1; do you need to run with -x ?\n" | |
18509 | ); | |
18510 | } | |
18511 | else { | |
18512 | complain("Useless hash-bang after line 1\n"); | |
18513 | } | |
18514 | } | |
18515 | ||
18516 | # Report the leading hash-bang as a system line | |
18517 | # This will prevent -dac from deleting it | |
18518 | else { | |
18519 | $line_of_tokens->{_line_type} = 'SYSTEM'; | |
18520 | return $line_of_tokens; | |
18521 | } | |
18522 | } | |
18523 | } | |
18524 | ||
18525 | # wait for a hash-bang before parsing if the user invoked us with -x | |
18526 | if ( $tokenizer_self->{_look_for_hash_bang} | |
18527 | && !$tokenizer_self->{_saw_hash_bang} ) | |
18528 | { | |
18529 | $line_of_tokens->{_line_type} = 'SYSTEM'; | |
18530 | return $line_of_tokens; | |
18531 | } | |
18532 | ||
18533 | # a first line of the form ': #' will be marked as SYSTEM | |
18534 | # since lines of this form may be used by tcsh | |
18535 | if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) { | |
18536 | $line_of_tokens->{_line_type} = 'SYSTEM'; | |
18537 | return $line_of_tokens; | |
18538 | } | |
18539 | ||
18540 | # now we know that it is ok to tokenize the line... | |
18541 | # the line tokenizer will modify any of these private variables: | |
18542 | # _rhere_target_list | |
18543 | # _in_data | |
18544 | # _in_end | |
18545 | # _in_format | |
18546 | # _in_error | |
18547 | # _in_pod | |
18548 | # _in_quote | |
18549 | my $ending_in_quote_last = $tokenizer_self->{_in_quote}; | |
18550 | tokenize_this_line($line_of_tokens); | |
18551 | ||
18552 | # Now finish defining the return structure and return it | |
18553 | $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote}; | |
18554 | ||
18555 | # handle severe error (binary data in script) | |
18556 | if ( $tokenizer_self->{_in_error} ) { | |
18557 | $tokenizer_self->{_in_quote} = 0; # to avoid any more messages | |
18558 | warning("Giving up after error\n"); | |
18559 | $line_of_tokens->{_line_type} = 'ERROR'; | |
18560 | reset_indentation_level(0); # avoid error messages | |
18561 | return $line_of_tokens; | |
18562 | } | |
18563 | ||
18564 | # handle start of pod documentation | |
18565 | if ( $tokenizer_self->{_in_pod} ) { | |
18566 | ||
18567 | # This gets tricky..above a __DATA__ or __END__ section, perl | |
18568 | # accepts '=cut' as the start of pod section. But afterwards, | |
18569 | # only pod utilities see it and they may ignore an =cut without | |
18570 | # leading =head. In any case, this isn't good. | |
18571 | if ( $input_line =~ /^=cut\b/ ) { | |
18572 | if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { | |
18573 | complain("=cut while not in pod ignored\n"); | |
18574 | $tokenizer_self->{_in_pod} = 0; | |
18575 | $line_of_tokens->{_line_type} = 'POD_STOP'; | |
18576 | } | |
18577 | else { | |
18578 | $line_of_tokens->{_line_type} = 'POD_END'; | |
18579 | complain( | |
18580 | "=cut starts a pod section .. this can fool pod utilities.\n" | |
18581 | ); | |
18582 | write_logfile_entry("Entering POD section\n"); | |
18583 | } | |
18584 | } | |
18585 | ||
18586 | else { | |
18587 | $line_of_tokens->{_line_type} = 'POD_START'; | |
18588 | write_logfile_entry("Entering POD section\n"); | |
18589 | } | |
18590 | ||
18591 | return $line_of_tokens; | |
18592 | } | |
18593 | ||
18594 | # update indentation levels for log messages | |
18595 | if ( $input_line !~ /^\s*$/ ) { | |
18596 | my $rlevels = $line_of_tokens->{_rlevels}; | |
18597 | my $structural_indentation_level = $$rlevels[0]; | |
18598 | my ( $python_indentation_level, $msg ) = | |
18599 | find_indentation_level( $input_line, $structural_indentation_level ); | |
18600 | if ($msg) { write_logfile_entry("$msg") } | |
18601 | if ( $tokenizer_self->{_know_input_tabstr} == 1 ) { | |
18602 | $line_of_tokens->{_python_indentation_level} = | |
18603 | $python_indentation_level; | |
18604 | } | |
18605 | } | |
18606 | ||
18607 | # see if this line contains here doc targets | |
18608 | my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; | |
18609 | if (@$rhere_target_list) { | |
18610 | ||
18611 | #my $here_doc_target = shift @$rhere_target_list; | |
18612 | my ( $here_doc_target, $here_quote_character ) = | |
18613 | @{ shift @$rhere_target_list }; | |
18614 | $tokenizer_self->{_in_here_doc} = 1; | |
18615 | $tokenizer_self->{_here_doc_target} = $here_doc_target; | |
18616 | $tokenizer_self->{_here_quote_character} = $here_quote_character; | |
18617 | write_logfile_entry("Entering HERE document $here_doc_target\n"); | |
18618 | $started_looking_for_here_target_at = $input_line_number; | |
18619 | } | |
18620 | ||
18621 | # NOTE: __END__ and __DATA__ statements are written unformatted | |
18622 | # because they can theoretically contain additional characters | |
18623 | # which are not tokenized (and cannot be read with <DATA> either!). | |
18624 | if ( $tokenizer_self->{_in_data} ) { | |
18625 | $line_of_tokens->{_line_type} = 'DATA_START'; | |
18626 | write_logfile_entry("Starting __DATA__ section\n"); | |
18627 | $tokenizer_self->{_saw_data} = 1; | |
18628 | ||
18629 | # keep parsing after __DATA__ if use SelfLoader was seen | |
18630 | if ( $tokenizer_self->{_saw_selfloader} ) { | |
18631 | $tokenizer_self->{_in_data} = 0; | |
18632 | write_logfile_entry( | |
18633 | "SelfLoader seen, continuing; -nlsl deactivates\n"); | |
18634 | } | |
18635 | ||
18636 | return $line_of_tokens; | |
18637 | } | |
18638 | ||
18639 | elsif ( $tokenizer_self->{_in_end} ) { | |
18640 | $line_of_tokens->{_line_type} = 'END_START'; | |
18641 | write_logfile_entry("Starting __END__ section\n"); | |
18642 | $tokenizer_self->{_saw_end} = 1; | |
18643 | ||
18644 | # keep parsing after __END__ if use AutoLoader was seen | |
18645 | if ( $tokenizer_self->{_saw_autoloader} ) { | |
18646 | $tokenizer_self->{_in_end} = 0; | |
18647 | write_logfile_entry( | |
18648 | "AutoLoader seen, continuing; -nlal deactivates\n"); | |
18649 | } | |
18650 | return $line_of_tokens; | |
18651 | } | |
18652 | ||
18653 | # now, finally, we know that this line is type 'CODE' | |
18654 | $line_of_tokens->{_line_type} = 'CODE'; | |
18655 | ||
18656 | # remember if we have seen any real code | |
18657 | if ( !$tokenizer_self->{_started_tokenizing} | |
18658 | && $input_line !~ /^\s*$/ | |
18659 | && $input_line !~ /^\s*#/ ) | |
18660 | { | |
18661 | $tokenizer_self->{_started_tokenizing} = 1; | |
18662 | } | |
18663 | ||
18664 | if ( $tokenizer_self->{_debugger_object} ) { | |
18665 | $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens); | |
18666 | } | |
18667 | ||
18668 | # Note: if keyword 'format' occurs in this line code, it is still CODE | |
18669 | # (keyword 'format' need not start a line) | |
18670 | if ( $tokenizer_self->{_in_format} ) { | |
18671 | write_logfile_entry("Entering format section\n"); | |
18672 | } | |
18673 | ||
18674 | if ( $tokenizer_self->{_in_quote} | |
18675 | and ( $tokenizer_self->{_line_start_quote} < 0 ) ) | |
18676 | { | |
18677 | ||
18678 | if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) { | |
18679 | $tokenizer_self->{_line_start_quote} = $input_line_number; | |
18680 | $tokenizer_self->{_quote_target} = $quote_target; | |
18681 | write_logfile_entry( | |
18682 | "Start multi-line quote or pattern ending in $quote_target\n"); | |
18683 | } | |
18684 | } | |
18685 | elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 ) | |
18686 | and !$tokenizer_self->{_in_quote} ) | |
18687 | { | |
18688 | $tokenizer_self->{_line_start_quote} = -1; | |
18689 | write_logfile_entry("End of multi-line quote or pattern\n"); | |
18690 | } | |
18691 | ||
18692 | # we are returning a line of CODE | |
18693 | return $line_of_tokens; | |
18694 | } | |
18695 | ||
18696 | sub find_starting_indentation_level { | |
18697 | ||
18698 | my $starting_level = 0; | |
18699 | my $know_input_tabstr = -1; # flag for find_indentation_level | |
18700 | ||
18701 | # use value if given as parameter | |
18702 | if ( $tokenizer_self->{_know_starting_level} ) { | |
18703 | $starting_level = $tokenizer_self->{_starting_level}; | |
18704 | } | |
18705 | ||
18706 | # if we know there is a hash_bang line, the level must be zero | |
18707 | elsif ( $tokenizer_self->{_look_for_hash_bang} ) { | |
18708 | $tokenizer_self->{_know_starting_level} = 1; | |
18709 | } | |
18710 | ||
18711 | # otherwise figure it out from the input file | |
18712 | else { | |
18713 | my $line; | |
18714 | my $i = 0; | |
18715 | my $structural_indentation_level = -1; # flag for find_indentation_level | |
18716 | ||
18717 | my $msg = ""; | |
18718 | while ( $line = | |
18719 | $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) | |
18720 | { | |
18721 | ||
18722 | # if first line is #! then assume starting level is zero | |
18723 | if ( $i == 1 && $line =~ /^\#\!/ ) { | |
18724 | $starting_level = 0; | |
18725 | last; | |
18726 | } | |
18727 | next if ( $line =~ /^\s*#/ ); # must not be comment | |
18728 | next if ( $line =~ /^\s*$/ ); # must not be blank | |
18729 | ( $starting_level, $msg ) = | |
18730 | find_indentation_level( $line, $structural_indentation_level ); | |
18731 | if ($msg) { write_logfile_entry("$msg") } | |
18732 | last; | |
18733 | } | |
18734 | $msg = "Line $i implies starting-indentation-level = $starting_level\n"; | |
18735 | ||
18736 | if ( $starting_level > 0 ) { | |
18737 | ||
18738 | my $input_tabstr = $tokenizer_self->{_input_tabstr}; | |
18739 | if ( $input_tabstr eq "\t" ) { | |
18740 | $msg .= "by guessing input tabbing uses 1 tab per level\n"; | |
18741 | } | |
18742 | else { | |
18743 | my $cols = length($input_tabstr); | |
18744 | $msg .= | |
18745 | "by guessing input tabbing uses $cols blanks per level\n"; | |
18746 | } | |
18747 | } | |
18748 | write_logfile_entry("$msg"); | |
18749 | } | |
18750 | $tokenizer_self->{_starting_level} = $starting_level; | |
18751 | reset_indentation_level($starting_level); | |
18752 | } | |
18753 | ||
18754 | # Find indentation level given a input line. At the same time, try to | |
18755 | # figure out the input tabbing scheme. | |
18756 | # | |
18757 | # There are two types of calls: | |
18758 | # | |
18759 | # Type 1: $structural_indentation_level < 0 | |
18760 | # In this case we have to guess $input_tabstr to figure out the level. | |
18761 | # | |
18762 | # Type 2: $structural_indentation_level >= 0 | |
18763 | # In this case the level of this line is known, and this routine can | |
18764 | # update the tabbing string, if still unknown, to make the level correct. | |
18765 | ||
18766 | sub find_indentation_level { | |
18767 | my ( $line, $structural_indentation_level ) = @_; | |
18768 | my $level = 0; | |
18769 | my $msg = ""; | |
18770 | ||
18771 | my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr}; | |
18772 | my $input_tabstr = $tokenizer_self->{_input_tabstr}; | |
18773 | ||
18774 | # find leading whitespace | |
18775 | my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : ""; | |
18776 | ||
18777 | # make first guess at input tabbing scheme if necessary | |
18778 | if ( $know_input_tabstr < 0 ) { | |
18779 | ||
18780 | $know_input_tabstr = 0; | |
18781 | ||
18782 | if ( $tokenizer_self->{_tabs} ) { | |
18783 | $input_tabstr = "\t"; | |
18784 | if ( length($leading_whitespace) > 0 ) { | |
18785 | if ( $leading_whitespace !~ /\t/ ) { | |
18786 | ||
18787 | my $cols = $tokenizer_self->{_indent_columns}; | |
18788 | ||
18789 | if ( length($leading_whitespace) < $cols ) { | |
18790 | $cols = length($leading_whitespace); | |
18791 | } | |
18792 | $input_tabstr = " " x $cols; | |
18793 | } | |
18794 | } | |
18795 | } | |
18796 | else { | |
18797 | $input_tabstr = " " x $tokenizer_self->{_indent_columns}; | |
18798 | ||
18799 | if ( length($leading_whitespace) > 0 ) { | |
18800 | if ( $leading_whitespace =~ /^\t/ ) { | |
18801 | $input_tabstr = "\t"; | |
18802 | } | |
18803 | } | |
18804 | } | |
18805 | $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr; | |
18806 | $tokenizer_self->{_input_tabstr} = $input_tabstr; | |
18807 | } | |
18808 | ||
18809 | # determine the input tabbing scheme if possible | |
18810 | if ( ( $know_input_tabstr == 0 ) | |
18811 | && ( length($leading_whitespace) > 0 ) | |
18812 | && ( $structural_indentation_level > 0 ) ) | |
18813 | { | |
18814 | my $saved_input_tabstr = $input_tabstr; | |
18815 | ||
18816 | # check for common case of one tab per indentation level | |
18817 | if ( $leading_whitespace eq "\t" x $structural_indentation_level ) { | |
18818 | if ( $leading_whitespace eq "\t" x $structural_indentation_level ) { | |
18819 | $input_tabstr = "\t"; | |
18820 | $msg = "Guessing old indentation was tab character\n"; | |
18821 | } | |
18822 | } | |
18823 | ||
18824 | else { | |
18825 | ||
18826 | # detab any tabs based on 8 blanks per tab | |
18827 | my $entabbed = ""; | |
18828 | if ( $leading_whitespace =~ s/^\t+/ /g ) { | |
18829 | $entabbed = "entabbed"; | |
18830 | } | |
18831 | ||
18832 | # now compute tabbing from number of spaces | |
18833 | my $columns = | |
18834 | length($leading_whitespace) / $structural_indentation_level; | |
18835 | if ( $columns == int $columns ) { | |
18836 | $msg = | |
18837 | "Guessing old indentation was $columns $entabbed spaces\n"; | |
18838 | } | |
18839 | else { | |
18840 | $columns = int $columns; | |
18841 | $msg = | |
18842 | "old indentation is unclear, using $columns $entabbed spaces\n"; | |
18843 | } | |
18844 | $input_tabstr = " " x $columns; | |
18845 | } | |
18846 | $know_input_tabstr = 1; | |
18847 | $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr; | |
18848 | $tokenizer_self->{_input_tabstr} = $input_tabstr; | |
18849 | ||
18850 | # see if mistakes were made | |
18851 | if ( ( $tokenizer_self->{_starting_level} > 0 ) | |
18852 | && !$tokenizer_self->{_know_starting_level} ) | |
18853 | { | |
18854 | ||
18855 | if ( $input_tabstr ne $saved_input_tabstr ) { | |
18856 | complain( | |
18857 | "I made a bad starting level guess; rerun with a value for -sil \n" | |
18858 | ); | |
18859 | } | |
18860 | } | |
18861 | } | |
18862 | ||
18863 | # use current guess at input tabbing to get input indentation level | |
18864 | # | |
18865 | # Patch to handle a common case of entabbed leading whitespace | |
18866 | # If the leading whitespace equals 4 spaces and we also have | |
18867 | # tabs, detab the input whitespace assuming 8 spaces per tab. | |
18868 | if ( length($input_tabstr) == 4 ) { | |
18869 | $leading_whitespace =~ s/^\t+/ /g; | |
18870 | } | |
18871 | ||
18872 | if ( ( my $len_tab = length($input_tabstr) ) > 0 ) { | |
18873 | my $pos = 0; | |
18874 | ||
18875 | while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr ) | |
18876 | { | |
18877 | $pos += $len_tab; | |
18878 | $level++; | |
18879 | } | |
18880 | } | |
18881 | return ( $level, $msg ); | |
18882 | } | |
18883 | ||
18884 | sub dump_token_types { | |
18885 | my $class = shift; | |
18886 | my $fh = shift; | |
18887 | ||
18888 | # This should be the latest list of token types in use | |
18889 | # adding NEW_TOKENS: add a comment here | |
18890 | print $fh <<'END_OF_LIST'; | |
18891 | ||
18892 | Here is a list of the token types currently used for lines of type 'CODE'. | |
18893 | For the following tokens, the "type" of a token is just the token itself. | |
18894 | ||
18895 | .. :: << >> ** && .. || -> => += -= .= %= &= |= ^= *= <> | |
18896 | ( ) <= >= == =~ !~ != ++ -- /= x= | |
18897 | ... **= <<= >>= &&= ||= <=> | |
18898 | , + - / * | % ! x ~ = \ ? : . < > ^ & | |
18899 | ||
18900 | The following additional token types are defined: | |
18901 | ||
18902 | type meaning | |
18903 | b blank (white space) | |
18904 | { indent: opening structural curly brace or square bracket or paren | |
18905 | (code block, anonymous hash reference, or anonymous array reference) | |
18906 | } outdent: right structural curly brace or square bracket or paren | |
18907 | [ left non-structural square bracket (enclosing an array index) | |
18908 | ] right non-structural square bracket | |
18909 | ( left non-structural paren (all but a list right of an =) | |
18910 | ) right non-structural parena | |
18911 | L left non-structural curly brace (enclosing a key) | |
18912 | R right non-structural curly brace | |
18913 | ; terminal semicolon | |
18914 | f indicates a semicolon in a "for" statement | |
18915 | h here_doc operator << | |
18916 | # a comment | |
18917 | Q indicates a quote or pattern | |
18918 | q indicates a qw quote block | |
18919 | k a perl keyword | |
18920 | C user-defined constant or constant function (with void prototype = ()) | |
18921 | U user-defined function taking parameters | |
18922 | G user-defined function taking block parameter (like grep/map/eval) | |
18923 | M (unused, but reserved for subroutine definition name) | |
18924 | P (unused, but -html uses it to label pod text) | |
18925 | t type indicater such as %,$,@,*,&,sub | |
18926 | w bare word (perhaps a subroutine call) | |
18927 | i identifier of some type (with leading %, $, @, *, &, sub, -> ) | |
18928 | n a number | |
18929 | v a v-string | |
18930 | F a file test operator (like -e) | |
18931 | Y File handle | |
18932 | Z identifier in indirect object slot: may be file handle, object | |
18933 | J LABEL: code block label | |
18934 | j LABEL after next, last, redo, goto | |
18935 | p unary + | |
18936 | m unary - | |
18937 | pp pre-increment operator ++ | |
18938 | mm pre-decrement operator -- | |
18939 | A : used as attribute separator | |
18940 | ||
18941 | Here are the '_line_type' codes used internally: | |
18942 | SYSTEM - system-specific code before hash-bang line | |
18943 | CODE - line of perl code (including comments) | |
18944 | POD_START - line starting pod, such as '=head' | |
18945 | POD - pod documentation text | |
18946 | POD_END - last line of pod section, '=cut' | |
18947 | HERE - text of here-document | |
18948 | HERE_END - last line of here-doc (target word) | |
18949 | FORMAT - format section | |
18950 | FORMAT_END - last line of format section, '.' | |
18951 | DATA_START - __DATA__ line | |
18952 | DATA - unidentified text following __DATA__ | |
18953 | END_START - __END__ line | |
18954 | END - unidentified text following __END__ | |
18955 | ERROR - we are in big trouble, probably not a perl script | |
18956 | END_OF_LIST | |
18957 | } | |
18958 | ||
18959 | # This is a currently unused debug routine | |
18960 | sub dump_functions { | |
18961 | ||
18962 | my $fh = *STDOUT; | |
18963 | my ( $pkg, $sub ); | |
18964 | foreach $pkg ( keys %is_user_function ) { | |
18965 | print $fh "\nnon-constant subs in package $pkg\n"; | |
18966 | ||
18967 | foreach $sub ( keys %{ $is_user_function{$pkg} } ) { | |
18968 | my $msg = ""; | |
18969 | if ( $is_block_list_function{$pkg}{$sub} ) { | |
18970 | $msg = 'block_list'; | |
18971 | } | |
18972 | ||
18973 | if ( $is_block_function{$pkg}{$sub} ) { | |
18974 | $msg = 'block'; | |
18975 | } | |
18976 | print $fh "$sub $msg\n"; | |
18977 | } | |
18978 | } | |
18979 | ||
18980 | foreach $pkg ( keys %is_constant ) { | |
18981 | print $fh "\nconstants and constant subs in package $pkg\n"; | |
18982 | ||
18983 | foreach $sub ( keys %{ $is_constant{$pkg} } ) { | |
18984 | print $fh "$sub\n"; | |
18985 | } | |
18986 | } | |
18987 | } | |
18988 | ||
18989 | sub prepare_for_a_new_file { | |
18990 | $saw_negative_indentation = 0; | |
18991 | $id_scan_state = ''; | |
18992 | $statement_type = ''; # '' or 'use' or 'sub..' or 'case..' | |
18993 | $last_nonblank_token = ';'; # the only possible starting state which | |
18994 | $last_nonblank_type = ';'; # will make a leading brace a code block | |
18995 | $last_nonblank_block_type = ''; | |
18996 | $last_nonblank_container_type = ''; | |
18997 | $last_nonblank_type_sequence = ''; | |
18998 | $last_last_nonblank_token = ';'; | |
18999 | $last_last_nonblank_type = ';'; | |
19000 | $last_last_nonblank_block_type = ''; | |
19001 | $last_last_nonblank_container_type = ''; | |
19002 | $last_last_nonblank_type_sequence = ''; | |
19003 | $last_nonblank_prototype = ""; | |
19004 | $identifier = ''; | |
19005 | $in_quote = 0; # flag telling if we are chasing a quote, and what kind | |
19006 | $quote_type = 'Q'; | |
19007 | $quote_character = ""; # character we seek if chasing a quote | |
19008 | $quote_pos = 0; # next character index to check for case of alphanum char | |
19009 | $quote_depth = 0; | |
19010 | $allowed_quote_modifiers = ""; | |
19011 | $paren_depth = 0; | |
19012 | $brace_depth = 0; | |
19013 | $square_bracket_depth = 0; | |
19014 | $current_package = "main"; | |
19015 | @current_depth[ 0 .. $#closing_brace_names ] = | |
19016 | (0) x scalar @closing_brace_names; | |
19017 | @nesting_sequence_number[ 0 .. $#closing_brace_names ] = | |
19018 | ( 0 .. $#closing_brace_names ); | |
19019 | @current_sequence_number = (); | |
19020 | ||
19021 | $paren_type[$paren_depth] = ''; | |
19022 | $paren_semicolon_count[$paren_depth] = 0; | |
19023 | $brace_type[$brace_depth] = ';'; # identify opening brace as code block | |
19024 | $brace_structural_type[$brace_depth] = ''; | |
19025 | $brace_statement_type[$brace_depth] = ""; | |
19026 | $brace_context[$brace_depth] = UNKNOWN_CONTEXT; | |
19027 | $paren_structural_type[$brace_depth] = ''; | |
19028 | $square_bracket_type[$square_bracket_depth] = ''; | |
19029 | $square_bracket_structural_type[$square_bracket_depth] = ''; | |
19030 | $brace_package[$paren_depth] = $current_package; | |
19031 | %is_constant = (); # user-defined constants | |
19032 | %is_user_function = (); # user-defined functions | |
19033 | %user_function_prototype = (); # their prototypes | |
19034 | %is_block_function = (); | |
19035 | %is_block_list_function = (); | |
19036 | %saw_function_definition = (); | |
19037 | $unexpected_error_count = 0; | |
19038 | $want_paren = ""; | |
19039 | $context = UNKNOWN_CONTEXT; | |
19040 | @slevel_stack = (); | |
19041 | $ci_string_in_tokenizer = ""; | |
19042 | $continuation_string_in_tokenizer = "0"; | |
19043 | $in_statement_continuation = 0; | |
19044 | @lower_case_labels_at = (); | |
19045 | $saw_v_string = 0; # for warning of v-strings on older perl | |
19046 | $nesting_token_string = ""; | |
19047 | $nesting_type_string = ""; | |
19048 | $nesting_block_string = '1'; # initially in a block | |
19049 | $nesting_block_flag = 1; | |
19050 | $nesting_list_string = '0'; # initially not in a list | |
19051 | $nesting_list_flag = 0; # initially not in a list | |
19052 | $nearly_matched_here_target_at = undef; | |
19053 | } | |
19054 | ||
19055 | sub get_quote_target { | |
19056 | return matching_end_token($quote_character); | |
19057 | } | |
19058 | ||
19059 | sub get_indentation_level { | |
19060 | return $level_in_tokenizer; | |
19061 | } | |
19062 | ||
19063 | sub reset_indentation_level { | |
19064 | $level_in_tokenizer = $_[0]; | |
19065 | $slevel_in_tokenizer = $_[0]; | |
19066 | push @slevel_stack, $slevel_in_tokenizer; | |
19067 | } | |
19068 | ||
19069 | { # begin tokenize_this_line | |
19070 | ||
19071 | use constant BRACE => 0; | |
19072 | use constant SQUARE_BRACKET => 1; | |
19073 | use constant PAREN => 2; | |
19074 | use constant QUESTION_COLON => 3; | |
19075 | ||
19076 | my ( | |
19077 | $block_type, $container_type, $expecting, | |
19078 | $here_doc_target, $here_quote_character, $i, | |
19079 | $i_tok, $last_nonblank_i, $next_tok, | |
19080 | $next_type, $prototype, $rtoken_map, | |
19081 | $rtoken_type, $rtokens, $tok, | |
19082 | $type, $type_sequence, | |
19083 | ); | |
19084 | ||
19085 | my @output_token_list = (); # stack of output token indexes | |
19086 | my @output_token_type = (); # token types | |
19087 | my @output_block_type = (); # types of code block | |
19088 | my @output_container_type = (); # paren types, such as if, elsif, .. | |
19089 | my @output_type_sequence = (); # nesting sequential number | |
19090 | ||
19091 | my @here_target_list = (); # list of here-doc target strings | |
19092 | ||
19093 | # ------------------------------------------------------------ | |
19094 | # beginning of various scanner interfaces to simplify coding | |
19095 | # ------------------------------------------------------------ | |
19096 | sub scan_bare_identifier { | |
19097 | ( $i, $tok, $type, $prototype ) = | |
19098 | scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype, | |
19099 | $rtoken_map ); | |
19100 | } | |
19101 | ||
19102 | sub scan_identifier { | |
19103 | ( $i, $tok, $type, $id_scan_state, $identifier ) = | |
19104 | scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens ); | |
19105 | } | |
19106 | ||
19107 | sub scan_id { | |
19108 | ( $i, $tok, $type, $id_scan_state ) = | |
19109 | scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map, | |
19110 | $id_scan_state ); | |
19111 | } | |
19112 | ||
19113 | my $number; | |
19114 | ||
19115 | sub scan_number { | |
19116 | ( $i, $type, $number ) = | |
19117 | scan_number_do( $input_line, $i, $rtoken_map, $type ); | |
19118 | } | |
19119 | ||
19120 | # a sub to warn if token found where term expected | |
19121 | sub error_if_expecting_TERM { | |
19122 | if ( $expecting == TERM ) { | |
19123 | if ( $really_want_term{$last_nonblank_type} ) { | |
19124 | unexpected( $tok, "term", $i_tok, $last_nonblank_i ); | |
19125 | 1; | |
19126 | } | |
19127 | } | |
19128 | } | |
19129 | ||
19130 | # a sub to warn if token found where operator expected | |
19131 | sub error_if_expecting_OPERATOR { | |
19132 | if ( $expecting == OPERATOR ) { | |
19133 | my $thing = defined $_[0] ? $_[0] : $tok; | |
19134 | unexpected( $thing, "operator", $i_tok, $last_nonblank_i ); | |
19135 | if ( $i_tok == 0 ) { | |
19136 | interrupt_logfile(); | |
19137 | warning("Missing ';' above?\n"); | |
19138 | resume_logfile(); | |
19139 | } | |
19140 | 1; | |
19141 | } | |
19142 | } | |
19143 | ||
19144 | # ------------------------------------------------------------ | |
19145 | # end scanner interfaces | |
19146 | # ------------------------------------------------------------ | |
19147 | ||
19148 | my %is_for_foreach; | |
19149 | @_ = qw(for foreach); | |
19150 | @is_for_foreach{@_} = (1) x scalar(@_); | |
19151 | ||
19152 | my %is_my_our; | |
19153 | @_ = qw(my our); | |
19154 | @is_my_our{@_} = (1) x scalar(@_); | |
19155 | ||
19156 | # These keywords may introduce blocks after parenthesized expressions, | |
19157 | # in the form: | |
19158 | # keyword ( .... ) { BLOCK } | |
19159 | # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when' | |
19160 | my %is_blocktype_with_paren; | |
19161 | @_ = qw(if elsif unless while until for foreach switch case given when); | |
19162 | @is_blocktype_with_paren{@_} = (1) x scalar(@_); | |
19163 | ||
19164 | # ------------------------------------------------------------ | |
19165 | # begin hash of code for handling most token types | |
19166 | # ------------------------------------------------------------ | |
19167 | my $tokenization_code = { | |
19168 | ||
19169 | # no special code for these types yet, but syntax checks | |
19170 | # could be added | |
19171 | ||
19172 | ## '!' => undef, | |
19173 | ## '!=' => undef, | |
19174 | ## '!~' => undef, | |
19175 | ## '%=' => undef, | |
19176 | ## '&&=' => undef, | |
19177 | ## '&=' => undef, | |
19178 | ## '+=' => undef, | |
19179 | ## '-=' => undef, | |
19180 | ## '..' => undef, | |
19181 | ## '..' => undef, | |
19182 | ## '...' => undef, | |
19183 | ## '.=' => undef, | |
19184 | ## '<<=' => undef, | |
19185 | ## '<=' => undef, | |
19186 | ## '<=>' => undef, | |
19187 | ## '<>' => undef, | |
19188 | ## '=' => undef, | |
19189 | ## '==' => undef, | |
19190 | ## '=~' => undef, | |
19191 | ## '>=' => undef, | |
19192 | ## '>>' => undef, | |
19193 | ## '>>=' => undef, | |
19194 | ## '\\' => undef, | |
19195 | ## '^=' => undef, | |
19196 | ## '|=' => undef, | |
19197 | ## '||=' => undef, | |
19198 | ## '~' => undef, | |
19199 | ||
19200 | '>' => sub { | |
19201 | error_if_expecting_TERM() | |
19202 | if ( $expecting == TERM ); | |
19203 | }, | |
19204 | '|' => sub { | |
19205 | error_if_expecting_TERM() | |
19206 | if ( $expecting == TERM ); | |
19207 | }, | |
19208 | '$' => sub { | |
19209 | ||
19210 | # start looking for a scalar | |
19211 | error_if_expecting_OPERATOR("Scalar") | |
19212 | if ( $expecting == OPERATOR ); | |
19213 | scan_identifier(); | |
19214 | ||
19215 | if ( $identifier eq '$^W' ) { | |
19216 | $tokenizer_self->{_saw_perl_dash_w} = 1; | |
19217 | } | |
19218 | ||
19219 | # Check for indentifier in indirect object slot | |
19220 | # (vorboard.pl, sort.t). Something like: | |
19221 | # /^(print|printf|sort|exec|system)$/ | |
19222 | if ( | |
19223 | $is_indirect_object_taker{$last_nonblank_token} | |
19224 | ||
19225 | || ( ( $last_nonblank_token eq '(' ) | |
19226 | && $is_indirect_object_taker{ $paren_type[$paren_depth] } ) | |
19227 | || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object | |
19228 | ) | |
19229 | { | |
19230 | $type = 'Z'; | |
19231 | } | |
19232 | }, | |
19233 | '(' => sub { | |
19234 | ||
19235 | ++$paren_depth; | |
19236 | $paren_semicolon_count[$paren_depth] = 0; | |
19237 | if ($want_paren) { | |
19238 | $container_type = $want_paren; | |
19239 | $want_paren = ""; | |
19240 | } | |
19241 | else { | |
19242 | $container_type = $last_nonblank_token; | |
19243 | ||
19244 | # We can check for a syntax error here of unexpected '(', | |
19245 | # but this is going to get messy... | |
19246 | if ( | |
19247 | $expecting == OPERATOR | |
19248 | ||
19249 | # be sure this is not a method call of the form | |
19250 | # &method(...), $method->(..), &{method}(...), | |
19251 | # $ref[2](list) is ok & short for $ref[2]->(list) | |
19252 | # NOTE: at present, braces in something like &{ xxx } | |
19253 | # are not marked as a block, we might have a method call | |
19254 | && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/ | |
19255 | ||
19256 | ) | |
19257 | { | |
19258 | ||
19259 | # ref: camel 3 p 703. | |
19260 | if ( $last_last_nonblank_token eq 'do' ) { | |
19261 | complain( | |
19262 | "do SUBROUTINE is deprecated; consider & or -> notation\n" | |
19263 | ); | |
19264 | } | |
19265 | else { | |
19266 | ||
19267 | # if this is an empty list, (), then it is not an | |
19268 | # error; for example, we might have a constant pi and | |
19269 | # invoke it with pi() or just pi; | |
19270 | my ( $next_nonblank_token, $i_next ) = | |
19271 | find_next_nonblank_token( $i, $rtokens ); | |
19272 | if ( $next_nonblank_token ne ')' ) { | |
19273 | my $hint; | |
19274 | error_if_expecting_OPERATOR('('); | |
19275 | ||
19276 | if ( $last_nonblank_type eq 'C' ) { | |
19277 | $hint = | |
19278 | "$last_nonblank_token has a void prototype\n"; | |
19279 | } | |
19280 | elsif ( $last_nonblank_type eq 'i' ) { | |
19281 | if ( $i_tok > 0 | |
19282 | && $last_nonblank_token =~ /^\$/ ) | |
19283 | { | |
19284 | $hint = | |
19285 | "Do you mean '$last_nonblank_token->(' ?\n"; | |
19286 | } | |
19287 | } | |
19288 | if ($hint) { | |
19289 | interrupt_logfile(); | |
19290 | warning($hint); | |
19291 | resume_logfile(); | |
19292 | } | |
19293 | } ## end if ( $next_nonblank_token... | |
19294 | } ## end else [ if ( $last_last_nonblank_token... | |
19295 | } ## end if ( $expecting == OPERATOR... | |
19296 | } | |
19297 | $paren_type[$paren_depth] = $container_type; | |
19298 | $type_sequence = increase_nesting_depth( PAREN, $i_tok ); | |
19299 | ||
19300 | # propagate types down through nested parens | |
19301 | # for example: the second paren in 'if ((' would be structural | |
19302 | # since the first is. | |
19303 | ||
19304 | if ( $last_nonblank_token eq '(' ) { | |
19305 | $type = $last_nonblank_type; | |
19306 | } | |
19307 | ||
19308 | # We exclude parens as structural after a ',' because it | |
19309 | # causes subtle problems with continuation indentation for | |
19310 | # something like this, where the first 'or' will not get | |
19311 | # indented. | |
19312 | # | |
19313 | # assert( | |
19314 | # __LINE__, | |
19315 | # ( not defined $check ) | |
19316 | # or ref $check | |
19317 | # or $check eq "new" | |
19318 | # or $check eq "old", | |
19319 | # ); | |
19320 | # | |
19321 | # Likewise, we exclude parens where a statement can start | |
19322 | # because of problems with continuation indentation, like | |
19323 | # these: | |
19324 | # | |
19325 | # ($firstline =~ /^#\!.*perl/) | |
19326 | # and (print $File::Find::name, "\n") | |
19327 | # and (return 1); | |
19328 | # | |
19329 | # (ref($usage_fref) =~ /CODE/) | |
19330 | # ? &$usage_fref | |
19331 | # : (&blast_usage, &blast_params, &blast_general_params); | |
19332 | ||
19333 | else { | |
19334 | $type = '{'; | |
19335 | } | |
19336 | ||
19337 | if ( $last_nonblank_type eq ')' ) { | |
19338 | warning( | |
19339 | "Syntax error? found token '$last_nonblank_type' then '('\n" | |
19340 | ); | |
19341 | } | |
19342 | $paren_structural_type[$paren_depth] = $type; | |
19343 | ||
19344 | }, | |
19345 | ')' => sub { | |
19346 | $type_sequence = decrease_nesting_depth( PAREN, $i_tok ); | |
19347 | ||
19348 | if ( $paren_structural_type[$paren_depth] eq '{' ) { | |
19349 | $type = '}'; | |
19350 | } | |
19351 | ||
19352 | $container_type = $paren_type[$paren_depth]; | |
19353 | ||
19354 | # /^(for|foreach)$/ | |
19355 | if ( $is_for_foreach{ $paren_type[$paren_depth] } ) { | |
19356 | my $num_sc = $paren_semicolon_count[$paren_depth]; | |
19357 | if ( $num_sc > 0 && $num_sc != 2 ) { | |
19358 | warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n"); | |
19359 | } | |
19360 | } | |
19361 | ||
19362 | if ( $paren_depth > 0 ) { $paren_depth-- } | |
19363 | }, | |
19364 | ',' => sub { | |
19365 | if ( $last_nonblank_type eq ',' ) { | |
19366 | complain("Repeated ','s \n"); | |
19367 | } | |
19368 | ## FIXME: need to move this elsewhere, perhaps check after a '(' | |
19369 | ## elsif ($last_nonblank_token eq '(') { | |
19370 | ## warning("Leading ','s illegal in some versions of perl\n"); | |
19371 | ## } | |
19372 | }, | |
19373 | ';' => sub { | |
19374 | $context = UNKNOWN_CONTEXT; | |
19375 | $statement_type = ''; | |
19376 | ||
19377 | # /^(for|foreach)$/ | |
19378 | if ( $is_for_foreach{ $paren_type[$paren_depth] } ) | |
19379 | { # mark ; in for loop | |
19380 | ||
19381 | # Be careful: we do not want a semicolon such as the | |
19382 | # following to be included: | |
19383 | # | |
19384 | # for (sort {strcoll($a,$b);} keys %investments) { | |
19385 | ||
19386 | if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth] | |
19387 | && $square_bracket_depth == | |
19388 | $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] ) | |
19389 | { | |
19390 | ||
19391 | $type = 'f'; | |
19392 | $paren_semicolon_count[$paren_depth]++; | |
19393 | } | |
19394 | } | |
19395 | ||
19396 | }, | |
19397 | '"' => sub { | |
19398 | error_if_expecting_OPERATOR("String") | |
19399 | if ( $expecting == OPERATOR ); | |
19400 | $in_quote = 1; | |
19401 | $type = 'Q'; | |
19402 | $allowed_quote_modifiers = ""; | |
19403 | }, | |
19404 | "'" => sub { | |
19405 | error_if_expecting_OPERATOR("String") | |
19406 | if ( $expecting == OPERATOR ); | |
19407 | $in_quote = 1; | |
19408 | $type = 'Q'; | |
19409 | $allowed_quote_modifiers = ""; | |
19410 | }, | |
19411 | '`' => sub { | |
19412 | error_if_expecting_OPERATOR("String") | |
19413 | if ( $expecting == OPERATOR ); | |
19414 | $in_quote = 1; | |
19415 | $type = 'Q'; | |
19416 | $allowed_quote_modifiers = ""; | |
19417 | }, | |
19418 | '/' => sub { | |
19419 | my $is_pattern; | |
19420 | ||
19421 | if ( $expecting == UNKNOWN ) { # indeterminte, must guess.. | |
19422 | my $msg; | |
19423 | ( $is_pattern, $msg ) = | |
19424 | guess_if_pattern_or_division( $i, $rtokens, $rtoken_map ); | |
19425 | ||
19426 | if ($msg) { | |
19427 | write_diagnostics("DIVIDE:$msg\n"); | |
19428 | write_logfile_entry($msg); | |
19429 | } | |
19430 | } | |
19431 | else { $is_pattern = ( $expecting == TERM ) } | |
19432 | ||
19433 | if ($is_pattern) { | |
19434 | $in_quote = 1; | |
19435 | $type = 'Q'; | |
19436 | $allowed_quote_modifiers = '[cgimosx]'; | |
19437 | } | |
19438 | else { # not a pattern; check for a /= token | |
19439 | ||
19440 | if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /= | |
19441 | $i++; | |
19442 | $tok = '/='; | |
19443 | $type = $tok; | |
19444 | } | |
19445 | ||
19446 | #DEBUG - collecting info on what tokens follow a divide | |
19447 | # for development of guessing algorithm | |
19448 | #if ( numerator_expected( $i, $rtokens ) < 0 ) { | |
19449 | # #write_diagnostics( "DIVIDE? $input_line\n" ); | |
19450 | #} | |
19451 | } | |
19452 | }, | |
19453 | '{' => sub { | |
19454 | ||
19455 | # if we just saw a ')', we will label this block with | |
19456 | # its type. We need to do this to allow sub | |
19457 | # code_block_type to determine if this brace starts a | |
19458 | # code block or anonymous hash. (The type of a paren | |
19459 | # pair is the preceding token, such as 'if', 'else', | |
19460 | # etc). | |
19461 | $container_type = ""; | |
19462 | ||
19463 | # ATTRS: for a '{' following an attribute list, reset | |
19464 | # things to look like we just saw the sub name | |
19465 | if ( $statement_type =~ /^sub/ ) { | |
19466 | $last_nonblank_token = $statement_type; | |
19467 | $last_nonblank_type = 'i'; | |
19468 | $statement_type = ""; | |
19469 | } | |
19470 | ||
19471 | # patch for SWITCH/CASE: hide these keywords from an immediately | |
19472 | # following opening brace | |
19473 | elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' ) | |
19474 | && $statement_type eq $last_nonblank_token ) | |
19475 | { | |
19476 | $last_nonblank_token = ";"; | |
19477 | } | |
19478 | ||
19479 | elsif ( $last_nonblank_token eq ')' ) { | |
19480 | $last_nonblank_token = $paren_type[ $paren_depth + 1 ]; | |
19481 | ||
19482 | # defensive move in case of a nesting error (pbug.t) | |
19483 | # in which this ')' had no previous '(' | |
19484 | # this nesting error will have been caught | |
19485 | if ( !defined($last_nonblank_token) ) { | |
19486 | $last_nonblank_token = 'if'; | |
19487 | } | |
19488 | ||
19489 | # check for syntax error here; | |
19490 | unless ( $is_blocktype_with_paren{$last_nonblank_token} ) { | |
19491 | my $list = join( ' ', sort keys %is_blocktype_with_paren ); | |
19492 | warning( | |
19493 | "syntax error at ') {', didn't see one of: $list\n"); | |
19494 | } | |
19495 | } | |
19496 | ||
19497 | # patch for paren-less for/foreach glitch, part 2. | |
19498 | # see note below under 'qw' | |
19499 | elsif ($last_nonblank_token eq 'qw' | |
19500 | && $is_for_foreach{$want_paren} ) | |
19501 | { | |
19502 | $last_nonblank_token = $want_paren; | |
19503 | if ( $last_last_nonblank_token eq $want_paren ) { | |
19504 | warning( | |
19505 | "syntax error at '$want_paren .. {' -- missing \$ loop variable\n" | |
19506 | ); | |
19507 | ||
19508 | } | |
19509 | $want_paren = ""; | |
19510 | } | |
19511 | ||
19512 | # now identify which of the three possible types of | |
19513 | # curly braces we have: hash index container, anonymous | |
19514 | # hash reference, or code block. | |
19515 | ||
19516 | # non-structural (hash index) curly brace pair | |
19517 | # get marked 'L' and 'R' | |
19518 | if ( is_non_structural_brace() ) { | |
19519 | $type = 'L'; | |
19520 | ||
19521 | # patch for SWITCH/CASE: | |
19522 | # allow paren-less identifier after 'when' | |
19523 | # if the brace is preceded by a space | |
19524 | if ( $statement_type eq 'when' | |
19525 | && $last_nonblank_type eq 'i' | |
19526 | && $last_last_nonblank_type eq 'k' | |
19527 | && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) ) | |
19528 | { | |
19529 | $type = '{'; | |
19530 | $block_type = $statement_type; | |
19531 | } | |
19532 | } | |
19533 | ||
19534 | # code and anonymous hash have the same type, '{', but are | |
19535 | # distinguished by 'block_type', | |
19536 | # which will be blank for an anonymous hash | |
19537 | else { | |
19538 | ||
19539 | $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type ); | |
19540 | ||
19541 | # patch to promote bareword type to function taking block | |
19542 | if ( $block_type | |
19543 | && $last_nonblank_type eq 'w' | |
19544 | && $last_nonblank_i >= 0 ) | |
19545 | { | |
19546 | if ( $output_token_type[$last_nonblank_i] eq 'w' ) { | |
19547 | $output_token_type[$last_nonblank_i] = 'G'; | |
19548 | } | |
19549 | } | |
19550 | ||
19551 | # patch for SWITCH/CASE: if we find a stray opening block brace | |
19552 | # where we might accept a 'case' or 'when' block, then take it | |
19553 | if ( $statement_type eq 'case' | |
19554 | || $statement_type eq 'when' ) | |
19555 | { | |
19556 | if ( !$block_type || $block_type eq '}' ) { | |
19557 | $block_type = $statement_type; | |
19558 | } | |
19559 | } | |
19560 | } | |
19561 | $brace_type[ ++$brace_depth ] = $block_type; | |
19562 | $brace_package[$brace_depth] = $current_package; | |
19563 | $type_sequence = increase_nesting_depth( BRACE, $i_tok ); | |
19564 | $brace_structural_type[$brace_depth] = $type; | |
19565 | $brace_context[$brace_depth] = $context; | |
19566 | $brace_statement_type[$brace_depth] = $statement_type; | |
19567 | }, | |
19568 | '}' => sub { | |
19569 | $block_type = $brace_type[$brace_depth]; | |
19570 | if ($block_type) { $statement_type = '' } | |
19571 | if ( defined( $brace_package[$brace_depth] ) ) { | |
19572 | $current_package = $brace_package[$brace_depth]; | |
19573 | } | |
19574 | ||
19575 | # can happen on brace error (caught elsewhere) | |
19576 | else { | |
19577 | } | |
19578 | $type_sequence = decrease_nesting_depth( BRACE, $i_tok ); | |
19579 | ||
19580 | if ( $brace_structural_type[$brace_depth] eq 'L' ) { | |
19581 | $type = 'R'; | |
19582 | } | |
19583 | ||
19584 | # propagate type information for 'do' and 'eval' blocks. | |
19585 | # This is necessary to enable us to know if an operator | |
19586 | # or term is expected next | |
19587 | if ( $is_block_operator{ $brace_type[$brace_depth] } ) { | |
19588 | $tok = $brace_type[$brace_depth]; | |
19589 | } | |
19590 | ||
19591 | $context = $brace_context[$brace_depth]; | |
19592 | $statement_type = $brace_statement_type[$brace_depth]; | |
19593 | if ( $brace_depth > 0 ) { $brace_depth--; } | |
19594 | }, | |
19595 | '&' => sub { # maybe sub call? start looking | |
19596 | ||
19597 | # We have to check for sub call unless we are sure we | |
19598 | # are expecting an operator. This example from s2p | |
19599 | # got mistaken as a q operator in an early version: | |
19600 | # print BODY &q(<<'EOT'); | |
19601 | if ( $expecting != OPERATOR ) { | |
19602 | scan_identifier(); | |
19603 | } | |
19604 | else { | |
19605 | } | |
19606 | }, | |
19607 | '<' => sub { # angle operator or less than? | |
19608 | ||
19609 | if ( $expecting != OPERATOR ) { | |
19610 | ( $i, $type ) = | |
19611 | find_angle_operator_termination( $input_line, $i, $rtoken_map, | |
19612 | $expecting ); | |
19613 | ||
19614 | } | |
19615 | else { | |
19616 | } | |
19617 | }, | |
19618 | '?' => sub { # ?: conditional or starting pattern? | |
19619 | ||
19620 | my $is_pattern; | |
19621 | ||
19622 | if ( $expecting == UNKNOWN ) { | |
19623 | ||
19624 | my $msg; | |
19625 | ( $is_pattern, $msg ) = | |
19626 | guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map ); | |
19627 | ||
19628 | if ($msg) { write_logfile_entry($msg) } | |
19629 | } | |
19630 | else { $is_pattern = ( $expecting == TERM ) } | |
19631 | ||
19632 | if ($is_pattern) { | |
19633 | $in_quote = 1; | |
19634 | $type = 'Q'; | |
19635 | $allowed_quote_modifiers = '[cgimosx]'; # TBD:check this | |
19636 | } | |
19637 | else { | |
19638 | ||
19639 | $type_sequence = | |
19640 | increase_nesting_depth( QUESTION_COLON, $i_tok ); | |
19641 | } | |
19642 | }, | |
19643 | '*' => sub { # typeglob, or multiply? | |
19644 | ||
19645 | if ( $expecting == TERM ) { | |
19646 | scan_identifier(); | |
19647 | } | |
19648 | else { | |
19649 | ||
19650 | if ( $$rtokens[ $i + 1 ] eq '=' ) { | |
19651 | $tok = '*='; | |
19652 | $type = $tok; | |
19653 | $i++; | |
19654 | } | |
19655 | elsif ( $$rtokens[ $i + 1 ] eq '*' ) { | |
19656 | $tok = '**'; | |
19657 | $type = $tok; | |
19658 | $i++; | |
19659 | if ( $$rtokens[ $i + 1 ] eq '=' ) { | |
19660 | $tok = '**='; | |
19661 | $type = $tok; | |
19662 | $i++; | |
19663 | } | |
19664 | } | |
19665 | } | |
19666 | }, | |
19667 | '.' => sub { # what kind of . ? | |
19668 | ||
19669 | if ( $expecting != OPERATOR ) { | |
19670 | scan_number(); | |
19671 | if ( $type eq '.' ) { | |
19672 | error_if_expecting_TERM() | |
19673 | if ( $expecting == TERM ); | |
19674 | } | |
19675 | } | |
19676 | else { | |
19677 | } | |
19678 | }, | |
19679 | ':' => sub { | |
19680 | ||
19681 | # if this is the first nonblank character, call it a label | |
19682 | # since perl seems to just swallow it | |
19683 | if ( $input_line_number == 1 && $last_nonblank_i == -1 ) { | |
19684 | $type = 'J'; | |
19685 | } | |
19686 | ||
19687 | # ATTRS: check for a ':' which introduces an attribute list | |
19688 | # (this might eventually get its own token type) | |
19689 | elsif ( $statement_type =~ /^sub/ ) { | |
19690 | $type = 'A'; | |
19691 | } | |
19692 | ||
19693 | # check for scalar attribute, such as | |
19694 | # my $foo : shared = 1; | |
19695 | elsif ($is_my_our{$statement_type} | |
19696 | && $current_depth[QUESTION_COLON] == 0 ) | |
19697 | { | |
19698 | $type = 'A'; | |
19699 | } | |
19700 | ||
19701 | # otherwise, it should be part of a ?/: operator | |
19702 | else { | |
19703 | $type_sequence = | |
19704 | decrease_nesting_depth( QUESTION_COLON, $i_tok ); | |
19705 | if ( $last_nonblank_token eq '?' ) { | |
19706 | warning("Syntax error near ? :\n"); | |
19707 | } | |
19708 | } | |
19709 | }, | |
19710 | '+' => sub { # what kind of plus? | |
19711 | ||
19712 | if ( $expecting == TERM ) { | |
19713 | scan_number(); | |
19714 | ||
19715 | # unary plus is safest assumption if not a number | |
19716 | if ( !defined($number) ) { $type = 'p'; } | |
19717 | } | |
19718 | elsif ( $expecting == OPERATOR ) { | |
19719 | } | |
19720 | else { | |
19721 | if ( $next_type eq 'w' ) { $type = 'p' } | |
19722 | } | |
19723 | }, | |
19724 | '@' => sub { | |
19725 | ||
19726 | error_if_expecting_OPERATOR("Array") | |
19727 | if ( $expecting == OPERATOR ); | |
19728 | scan_identifier(); | |
19729 | }, | |
19730 | '%' => sub { # hash or modulo? | |
19731 | ||
19732 | # first guess is hash if no following blank | |
19733 | if ( $expecting == UNKNOWN ) { | |
19734 | if ( $next_type ne 'b' ) { $expecting = TERM } | |
19735 | } | |
19736 | if ( $expecting == TERM ) { | |
19737 | scan_identifier(); | |
19738 | } | |
19739 | }, | |
19740 | '[' => sub { | |
19741 | $square_bracket_type[ ++$square_bracket_depth ] = | |
19742 | $last_nonblank_token; | |
19743 | $type_sequence = increase_nesting_depth( SQUARE_BRACKET, $i_tok ); | |
19744 | ||
19745 | # It may seem odd, but structural square brackets have | |
19746 | # type '{' and '}'. This simplifies the indentation logic. | |
19747 | if ( !is_non_structural_brace() ) { | |
19748 | $type = '{'; | |
19749 | } | |
19750 | $square_bracket_structural_type[$square_bracket_depth] = $type; | |
19751 | }, | |
19752 | ']' => sub { | |
19753 | $type_sequence = decrease_nesting_depth( SQUARE_BRACKET, $i_tok ); | |
19754 | ||
19755 | if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' ) | |
19756 | { | |
19757 | $type = '}'; | |
19758 | } | |
19759 | if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } | |
19760 | }, | |
19761 | '-' => sub { # what kind of minus? | |
19762 | ||
19763 | if ( ( $expecting != OPERATOR ) | |
19764 | && $is_file_test_operator{$next_tok} ) | |
19765 | { | |
19766 | $i++; | |
19767 | $tok .= $next_tok; | |
19768 | $type = 'F'; | |
19769 | } | |
19770 | elsif ( $expecting == TERM ) { | |
19771 | scan_number(); | |
19772 | ||
19773 | # maybe part of bareword token? unary is safest | |
19774 | if ( !defined($number) ) { $type = 'm'; } | |
19775 | ||
19776 | } | |
19777 | elsif ( $expecting == OPERATOR ) { | |
19778 | } | |
19779 | else { | |
19780 | ||
19781 | if ( $next_type eq 'w' ) { | |
19782 | $type = 'm'; | |
19783 | } | |
19784 | } | |
19785 | }, | |
19786 | ||
19787 | '^' => sub { | |
19788 | ||
19789 | # check for special variables like ${^WARNING_BITS} | |
19790 | if ( $expecting == TERM ) { | |
19791 | ||
19792 | # FIXME: this should work but will not catch errors | |
19793 | # because we also have to be sure that previous token is | |
19794 | # a type character ($,@,%). | |
19795 | if ( $last_nonblank_token eq '{' | |
19796 | && ( $next_tok =~ /^[A-Za-z_]/ ) ) | |
19797 | { | |
19798 | ||
19799 | if ( $next_tok eq 'W' ) { | |
19800 | $tokenizer_self->{_saw_perl_dash_w} = 1; | |
19801 | } | |
19802 | $tok = $tok . $next_tok; | |
19803 | $i = $i + 1; | |
19804 | $type = 'w'; | |
19805 | } | |
19806 | ||
19807 | else { | |
19808 | unless ( error_if_expecting_TERM() ) { | |
19809 | ||
19810 | # Something like this is valid but strange: | |
19811 | # undef ^I; | |
19812 | complain("The '^' seems unusual here\n"); | |
19813 | } | |
19814 | } | |
19815 | } | |
19816 | }, | |
19817 | ||
19818 | '::' => sub { # probably a sub call | |
19819 | scan_bare_identifier(); | |
19820 | }, | |
19821 | '<<' => sub { # maybe a here-doc? | |
19822 | return | |
19823 | unless ( $i < $max_token_index ) | |
19824 | ; # here-doc not possible if end of line | |
19825 | ||
19826 | if ( $expecting != OPERATOR ) { | |
19827 | my ($found_target); | |
19828 | ( $found_target, $here_doc_target, $here_quote_character, $i ) = | |
19829 | find_here_doc( $expecting, $i, $rtokens, $rtoken_map ); | |
19830 | ||
19831 | if ($found_target) { | |
19832 | push @here_target_list, | |
19833 | [ $here_doc_target, $here_quote_character ]; | |
19834 | $type = 'h'; | |
19835 | if ( length($here_doc_target) > 80 ) { | |
19836 | my $truncated = substr( $here_doc_target, 0, 80 ); | |
19837 | complain("Long here-target: '$truncated' ...\n"); | |
19838 | } | |
19839 | elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { | |
19840 | complain( | |
19841 | "Unconventional here-target: '$here_doc_target'\n" | |
19842 | ); | |
19843 | } | |
19844 | } | |
19845 | elsif ( $expecting == TERM ) { | |
19846 | ||
19847 | # shouldn't happen.. | |
19848 | warning("Program bug; didn't find here doc target\n"); | |
19849 | report_definite_bug(); | |
19850 | } | |
19851 | } | |
19852 | else { | |
19853 | } | |
19854 | }, | |
19855 | '->' => sub { | |
19856 | ||
19857 | # if -> points to a bare word, we must scan for an identifier, | |
19858 | # otherwise something like ->y would look like the y operator | |
19859 | scan_identifier(); | |
19860 | }, | |
19861 | ||
19862 | # type = 'pp' for pre-increment, '++' for post-increment | |
19863 | '++' => sub { | |
19864 | if ( $expecting == TERM ) { $type = 'pp' } | |
19865 | elsif ( $expecting == UNKNOWN ) { | |
19866 | my ( $next_nonblank_token, $i_next ) = | |
19867 | find_next_nonblank_token( $i, $rtokens ); | |
19868 | if ( $next_nonblank_token eq '$' ) { $type = 'pp' } | |
19869 | } | |
19870 | }, | |
19871 | ||
19872 | '=>' => sub { | |
19873 | if ( $last_nonblank_type eq $tok ) { | |
19874 | complain("Repeated '=>'s \n"); | |
19875 | } | |
19876 | }, | |
19877 | ||
19878 | # type = 'mm' for pre-decrement, '--' for post-decrement | |
19879 | '--' => sub { | |
19880 | ||
19881 | if ( $expecting == TERM ) { $type = 'mm' } | |
19882 | elsif ( $expecting == UNKNOWN ) { | |
19883 | my ( $next_nonblank_token, $i_next ) = | |
19884 | find_next_nonblank_token( $i, $rtokens ); | |
19885 | if ( $next_nonblank_token eq '$' ) { $type = 'mm' } | |
19886 | } | |
19887 | }, | |
19888 | ||
19889 | '&&' => sub { | |
19890 | error_if_expecting_TERM() | |
19891 | if ( $expecting == TERM ); | |
19892 | }, | |
19893 | ||
19894 | '||' => sub { | |
19895 | error_if_expecting_TERM() | |
19896 | if ( $expecting == TERM ); | |
19897 | }, | |
19898 | }; | |
19899 | ||
19900 | # ------------------------------------------------------------ | |
19901 | # end hash of code for handling individual token types | |
19902 | # ------------------------------------------------------------ | |
19903 | ||
19904 | my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' ); | |
19905 | ||
19906 | # These block types terminate statements and do not need a trailing | |
19907 | # semicolon | |
19908 | # patched for SWITCH/CASE: | |
19909 | my %is_zero_continuation_block_type; | |
19910 | @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY continue ; | |
19911 | if elsif else unless while until for foreach switch case given when); | |
19912 | @is_zero_continuation_block_type{@_} = (1) x scalar(@_); | |
19913 | ||
19914 | my %is_not_zero_continuation_block_type; | |
19915 | @_ = qw(sort grep map do eval); | |
19916 | @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_); | |
19917 | ||
19918 | my %is_logical_container; | |
19919 | @_ = qw(if elsif unless while and or not && ! || for foreach); | |
19920 | @is_logical_container{@_} = (1) x scalar(@_); | |
19921 | ||
19922 | my %is_binary_type; | |
19923 | @_ = qw(|| &&); | |
19924 | @is_binary_type{@_} = (1) x scalar(@_); | |
19925 | ||
19926 | my %is_binary_keyword; | |
19927 | @_ = qw(and or eq ne cmp); | |
19928 | @is_binary_keyword{@_} = (1) x scalar(@_); | |
19929 | ||
19930 | # 'L' is token for opening { at hash key | |
19931 | my %is_opening_type; | |
19932 | @_ = qw" L { ( [ "; | |
19933 | @is_opening_type{@_} = (1) x scalar(@_); | |
19934 | ||
19935 | # 'R' is token for closing } at hash key | |
19936 | my %is_closing_type; | |
19937 | @_ = qw" R } ) ] "; | |
19938 | @is_closing_type{@_} = (1) x scalar(@_); | |
19939 | ||
19940 | my %is_redo_last_next_goto; | |
19941 | @_ = qw(redo last next goto); | |
19942 | @is_redo_last_next_goto{@_} = (1) x scalar(@_); | |
19943 | ||
19944 | my %is_use_require; | |
19945 | @_ = qw(use require); | |
19946 | @is_use_require{@_} = (1) x scalar(@_); | |
19947 | ||
19948 | my %is_sub_package; | |
19949 | @_ = qw(sub package); | |
19950 | @is_sub_package{@_} = (1) x scalar(@_); | |
19951 | ||
19952 | # This hash holds the hash key in $tokenizer_self for these keywords: | |
19953 | my %is_format_END_DATA = ( | |
19954 | 'format' => '_in_format', | |
19955 | '__END__' => '_in_end', | |
19956 | '__DATA__' => '_in_data', | |
19957 | ); | |
19958 | ||
19959 | # ref: camel 3 p 147, | |
19960 | # but perl may accept undocumented flags | |
19961 | my %quote_modifiers = ( | |
19962 | 's' => '[cegimosx]', | |
19963 | 'y' => '[cds]', | |
19964 | 'tr' => '[cds]', | |
19965 | 'm' => '[cgimosx]', | |
19966 | 'qr' => '[imosx]', | |
19967 | 'q' => "", | |
19968 | 'qq' => "", | |
19969 | 'qw' => "", | |
19970 | 'qx' => "", | |
19971 | ); | |
19972 | ||
19973 | # table showing how many quoted things to look for after quote operator.. | |
19974 | # s, y, tr have 2 (pattern and replacement) | |
19975 | # others have 1 (pattern only) | |
19976 | my %quote_items = ( | |
19977 | 's' => 2, | |
19978 | 'y' => 2, | |
19979 | 'tr' => 2, | |
19980 | 'm' => 1, | |
19981 | 'qr' => 1, | |
19982 | 'q' => 1, | |
19983 | 'qq' => 1, | |
19984 | 'qw' => 1, | |
19985 | 'qx' => 1, | |
19986 | ); | |
19987 | ||
19988 | sub tokenize_this_line { | |
19989 | ||
19990 | # This routine breaks a line of perl code into tokens which are of use in | |
19991 | # indentation and reformatting. One of my goals has been to define tokens | |
19992 | # such that a newline may be inserted between any pair of tokens without | |
19993 | # changing or invalidating the program. This version comes close to this, | |
19994 | # although there are necessarily a few exceptions which must be caught by | |
19995 | # the formatter. Many of these involve the treatment of bare words. | |
19996 | # | |
19997 | # The tokens and their types are returned in arrays. See previous | |
19998 | # routine for their names. | |
19999 | # | |
20000 | # See also the array "valid_token_types" in the BEGIN section for an | |
20001 | # up-to-date list. | |
20002 | # | |
20003 | # To simplify things, token types are either a single character, or they | |
20004 | # are identical to the tokens themselves. | |
20005 | # | |
20006 | # As a debugging aid, the -D flag creates a file containing a side-by-side | |
20007 | # comparison of the input string and its tokenization for each line of a file. | |
20008 | # This is an invaluable debugging aid. | |
20009 | # | |
20010 | # In addition to tokens, and some associated quantities, the tokenizer | |
20011 | # also returns flags indication any special line types. These include | |
20012 | # quotes, here_docs, formats. | |
20013 | # | |
20014 | # ----------------------------------------------------------------------- | |
20015 | # | |
20016 | # How to add NEW_TOKENS: | |
20017 | # | |
20018 | # New token types will undoubtedly be needed in the future both to keep up | |
20019 | # with changes in perl and to help adapt the tokenizer to other applications. | |
20020 | # | |
20021 | # Here are some notes on the minimal steps. I wrote these notes while | |
20022 | # adding the 'v' token type for v-strings, which are things like version | |
20023 | # numbers 5.6.0, and ip addresses, and will use that as an example. ( You | |
20024 | # can use your editor to search for the string "NEW_TOKENS" to find the | |
20025 | # appropriate sections to change): | |
20026 | # | |
20027 | # *. Try to talk somebody else into doing it! If not, .. | |
20028 | # | |
20029 | # *. Make a backup of your current version in case things don't work out! | |
20030 | # | |
20031 | # *. Think of a new, unused character for the token type, and add to | |
20032 | # the array @valid_token_types in the BEGIN section of this package. | |
20033 | # For example, I used 'v' for v-strings. | |
20034 | # | |
20035 | # *. Implement coding to recognize the $type of the token in this routine. | |
20036 | # This is the hardest part, and is best done by immitating or modifying | |
20037 | # some of the existing coding. For example, to recognize v-strings, I | |
20038 | # patched 'sub scan_bare_identifier' to recognize v-strings beginning with | |
20039 | # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. | |
20040 | # | |
20041 | # *. Update sub operator_expected. This update is critically important but | |
20042 | # the coding is trivial. Look at the comments in that routine for help. | |
20043 | # For v-strings, which should behave like numbers, I just added 'v' to the | |
20044 | # regex used to handle numbers and strings (types 'n' and 'Q'). | |
20045 | # | |
20046 | # *. Implement a 'bond strength' rule in sub set_bond_strengths in | |
20047 | # Perl::Tidy::Formatter for breaking lines around this token type. You can | |
20048 | # skip this step and take the default at first, then adjust later to get | |
20049 | # desired results. For adding type 'v', I looked at sub bond_strength and | |
20050 | # saw that number type 'n' was using default strengths, so I didn't do | |
20051 | # anything. I may tune it up someday if I don't like the way line | |
20052 | # breaks with v-strings look. | |
20053 | # | |
20054 | # *. Implement a 'whitespace' rule in sub set_white_space_flag in | |
20055 | # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine | |
20056 | # and saw that type 'n' used spaces on both sides, so I just added 'v' | |
20057 | # to the array @spaces_both_sides. | |
20058 | # | |
20059 | # *. Update HtmlWriter package so that users can colorize the token as | |
20060 | # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in | |
20061 | # that package. For v-strings, I initially chose to use a default color | |
20062 | # equal to the default for numbers, but it might be nice to change that | |
20063 | # eventually. | |
20064 | # | |
20065 | # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types. | |
20066 | # | |
20067 | # *. Run lots and lots of debug tests. Start with special files designed | |
20068 | # to test the new token type. Run with the -D flag to create a .DEBUG | |
20069 | # file which shows the tokenization. When these work ok, test as many old | |
20070 | # scripts as possible. Start with all of the '.t' files in the 'test' | |
20071 | # directory of the distribution file. Compare .tdy output with previous | |
20072 | # version and updated version to see the differences. Then include as | |
20073 | # many more files as possible. My own technique has been to collect a huge | |
20074 | # number of perl scripts (thousands!) into one directory and run perltidy | |
20075 | # *, then run diff between the output of the previous version and the | |
20076 | # current version. | |
20077 | # | |
20078 | # ----------------------------------------------------------------------- | |
20079 | ||
20080 | my $line_of_tokens = shift; | |
20081 | my ($untrimmed_input_line) = $line_of_tokens->{_line_text}; | |
20082 | ||
20083 | # patch while coding change is underway | |
20084 | # make callers private data to allow access | |
20085 | # $tokenizer_self = $caller_tokenizer_self; | |
20086 | ||
20087 | # extract line number for use in error messages | |
20088 | $input_line_number = $line_of_tokens->{_line_number}; | |
20089 | ||
20090 | # check for pod documentation | |
20091 | if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) { | |
20092 | ||
20093 | # must not be in multi-line quote | |
20094 | # and must not be in an eqn | |
20095 | if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) ) | |
20096 | { | |
20097 | $tokenizer_self->{_in_pod} = 1; | |
20098 | return; | |
20099 | } | |
20100 | } | |
20101 | ||
20102 | $input_line = $untrimmed_input_line; | |
20103 | ||
20104 | chomp $input_line; | |
20105 | ||
20106 | # trim start of this line unless we are continuing a quoted line | |
20107 | # do not trim end because we might end in a quote (test: deken4.pl) | |
20108 | # Perl::Tidy::Formatter will delete needless trailing blanks | |
20109 | unless ( $in_quote && ( $quote_type eq 'Q' ) ) { | |
20110 | $input_line =~ s/^\s*//; # trim left end | |
20111 | } | |
20112 | ||
20113 | # re-initialize for the main loop | |
20114 | @output_token_list = (); # stack of output token indexes | |
20115 | @output_token_type = (); # token types | |
20116 | @output_block_type = (); # types of code block | |
20117 | @output_container_type = (); # paren types, such as if, elsif, .. | |
20118 | @output_type_sequence = (); # nesting sequential number | |
20119 | ||
20120 | $tok = $last_nonblank_token; | |
20121 | $type = $last_nonblank_type; | |
20122 | $prototype = $last_nonblank_prototype; | |
20123 | $last_nonblank_i = -1; | |
20124 | $block_type = $last_nonblank_block_type; | |
20125 | $container_type = $last_nonblank_container_type; | |
20126 | $type_sequence = $last_nonblank_type_sequence; | |
20127 | @here_target_list = (); # list of here-doc target strings | |
20128 | ||
20129 | $peeked_ahead = 0; | |
20130 | ||
20131 | # tokenization is done in two stages.. | |
20132 | # stage 1 is a very simple pre-tokenization | |
20133 | my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens | |
20134 | ||
20135 | # a little optimization for a full-line comment | |
20136 | if ( !$in_quote && ( $input_line =~ /^#/ ) ) { | |
20137 | $max_tokens_wanted = 1 # no use tokenizing a comment | |
20138 | } | |
20139 | ||
20140 | # start by breaking the line into pre-tokens | |
20141 | ( $rpretokens, $rpretoken_map, $rpretoken_type ) = | |
20142 | pre_tokenize( $input_line, $max_tokens_wanted ); | |
20143 | ||
20144 | $max_token_index = scalar(@$rpretokens) - 1; | |
20145 | push( @$rpretokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic | |
20146 | push( @$rpretoken_map, 0, 0, 0 ); # shouldn't be referenced | |
20147 | push( @$rpretoken_type, 'b', 'b', 'b' ); | |
20148 | ||
20149 | # temporary copies while coding change is underway | |
20150 | ( $rtokens, $rtoken_map, $rtoken_type ) = | |
20151 | ( $rpretokens, $rpretoken_map, $rpretoken_type ); | |
20152 | ||
20153 | # initialize for main loop | |
20154 | for $i ( 0 .. $max_token_index + 3 ) { | |
20155 | $output_token_type[$i] = ""; | |
20156 | $output_block_type[$i] = ""; | |
20157 | $output_container_type[$i] = ""; | |
20158 | $output_type_sequence[$i] = ""; | |
20159 | } | |
20160 | $i = -1; | |
20161 | $i_tok = -1; | |
20162 | ||
20163 | # ------------------------------------------------------------ | |
20164 | # begin main tokenization loop | |
20165 | # ------------------------------------------------------------ | |
20166 | ||
20167 | # we are looking at each pre-token of one line and combining them | |
20168 | # into tokens | |
20169 | while ( ++$i <= $max_token_index ) { | |
20170 | ||
20171 | if ($in_quote) { # continue looking for end of a quote | |
20172 | $type = $quote_type; | |
20173 | ||
20174 | unless (@output_token_list) { # initialize if continuation line | |
20175 | push( @output_token_list, $i ); | |
20176 | $output_token_type[$i] = $type; | |
20177 | ||
20178 | } | |
20179 | $tok = $quote_character unless ( $quote_character =~ /^\s*$/ ); | |
20180 | ||
20181 | # scan for the end of the quote or pattern | |
20182 | ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) = | |
20183 | do_quote( $i, $in_quote, $quote_character, $quote_pos, | |
20184 | $quote_depth, $rtokens, $rtoken_map ); | |
20185 | ||
20186 | # all done if we didn't find it | |
20187 | last if ($in_quote); | |
20188 | ||
20189 | # re-initialize for next search | |
20190 | $quote_character = ''; | |
20191 | $quote_pos = 0; | |
20192 | $quote_type = 'Q'; | |
20193 | last if ( ++$i > $max_token_index ); | |
20194 | ||
20195 | # look for any modifiers | |
20196 | if ($allowed_quote_modifiers) { | |
20197 | ||
20198 | # check for exact quote modifiers | |
20199 | if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) { | |
20200 | my $str = $$rtokens[$i]; | |
20201 | while ( $str =~ /\G$allowed_quote_modifiers/gc ) { } | |
20202 | ||
20203 | if ( defined( pos($str) ) ) { | |
20204 | ||
20205 | # matched | |
20206 | if ( pos($str) == length($str) ) { | |
20207 | last if ( ++$i > $max_token_index ); | |
20208 | } | |
20209 | ||
20210 | # Looks like a joined quote modifier | |
20211 | # and keyword, maybe something like | |
20212 | # s/xxx/yyy/gefor @k=... | |
20213 | # Example is "galgen.pl". Would have to split | |
20214 | # the word and insert a new token in the | |
20215 | # pre-token list. This is so rare that I haven't | |
20216 | # done it. Will just issue a warning citation. | |
20217 | ||
20218 | # This error might also be triggered if my quote | |
20219 | # modifier characters are incomplete | |
20220 | else { | |
20221 | warning(<<EOM); | |
20222 | ||
20223 | Partial match to quote modifier $allowed_quote_modifiers at word: '$str' | |
20224 | Please put a space between quote modifiers and trailing keywords. | |
20225 | EOM | |
20226 | ||
20227 | # print "token $$rtokens[$i]\n"; | |
20228 | # my $num = length($str) - pos($str); | |
20229 | # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num); | |
20230 | # print "continuing with new token $$rtokens[$i]\n"; | |
20231 | ||
20232 | # skipping past this token does least damage | |
20233 | last if ( ++$i > $max_token_index ); | |
20234 | } | |
20235 | } | |
20236 | else { | |
20237 | ||
20238 | # example file: rokicki4.pl | |
20239 | # This error might also be triggered if my quote | |
20240 | # modifier characters are incomplete | |
20241 | write_logfile_entry( | |
20242 | "Note: found word $str at quote modifier location\n" | |
20243 | ); | |
20244 | } | |
20245 | } | |
20246 | ||
20247 | # re-initialize | |
20248 | $allowed_quote_modifiers = ""; | |
20249 | } | |
20250 | } | |
20251 | ||
20252 | unless ( $tok =~ /^\s*$/ ) { | |
20253 | ||
20254 | # try to catch some common errors | |
20255 | if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { | |
20256 | ||
20257 | if ( $last_nonblank_token eq 'eq' ) { | |
20258 | complain("Should 'eq' be '==' here ?\n"); | |
20259 | } | |
20260 | elsif ( $last_nonblank_token eq 'ne' ) { | |
20261 | complain("Should 'ne' be '!=' here ?\n"); | |
20262 | } | |
20263 | } | |
20264 | ||
20265 | $last_last_nonblank_token = $last_nonblank_token; | |
20266 | $last_last_nonblank_type = $last_nonblank_type; | |
20267 | $last_last_nonblank_block_type = $last_nonblank_block_type; | |
20268 | $last_last_nonblank_container_type = | |
20269 | $last_nonblank_container_type; | |
20270 | $last_last_nonblank_type_sequence = | |
20271 | $last_nonblank_type_sequence; | |
20272 | $last_nonblank_token = $tok; | |
20273 | $last_nonblank_type = $type; | |
20274 | $last_nonblank_prototype = $prototype; | |
20275 | $last_nonblank_block_type = $block_type; | |
20276 | $last_nonblank_container_type = $container_type; | |
20277 | $last_nonblank_type_sequence = $type_sequence; | |
20278 | $last_nonblank_i = $i_tok; | |
20279 | } | |
20280 | ||
20281 | # store previous token type | |
20282 | if ( $i_tok >= 0 ) { | |
20283 | $output_token_type[$i_tok] = $type; | |
20284 | $output_block_type[$i_tok] = $block_type; | |
20285 | $output_container_type[$i_tok] = $container_type; | |
20286 | $output_type_sequence[$i_tok] = $type_sequence; | |
20287 | } | |
20288 | my $pre_tok = $$rtokens[$i]; # get the next pre-token | |
20289 | my $pre_type = $$rtoken_type[$i]; # and type | |
20290 | $tok = $pre_tok; | |
20291 | $type = $pre_type; # to be modified as necessary | |
20292 | $block_type = ""; # blank for all tokens except code block braces | |
20293 | $container_type = ""; # blank for all tokens except some parens | |
20294 | $type_sequence = ""; # blank for all tokens except ?/: | |
20295 | $prototype = ""; # blank for all tokens except user defined subs | |
20296 | $i_tok = $i; | |
20297 | ||
20298 | # this pre-token will start an output token | |
20299 | push( @output_token_list, $i_tok ); | |
20300 | ||
20301 | # continue gathering identifier if necessary | |
20302 | # but do not start on blanks and comments | |
20303 | if ( $id_scan_state && $pre_type !~ /[b#]/ ) { | |
20304 | ||
20305 | if ( $id_scan_state =~ /^(sub|package)/ ) { | |
20306 | scan_id(); | |
20307 | } | |
20308 | else { | |
20309 | scan_identifier(); | |
20310 | } | |
20311 | ||
20312 | last if ($id_scan_state); | |
20313 | next if ( ( $i > 0 ) || $type ); | |
20314 | ||
20315 | # didn't find any token; start over | |
20316 | $type = $pre_type; | |
20317 | $tok = $pre_tok; | |
20318 | } | |
20319 | ||
20320 | # handle whitespace tokens.. | |
20321 | next if ( $type eq 'b' ); | |
20322 | my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' '; | |
20323 | my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b'; | |
20324 | ||
20325 | # Build larger tokens where possible, since we are not in a quote. | |
20326 | # | |
20327 | # First try to assemble digraphs. The following tokens are | |
20328 | # excluded and handled specially: | |
20329 | # '/=' is excluded because the / might start a pattern. | |
20330 | # 'x=' is excluded since it might be $x=, with $ on previous line | |
20331 | # '**' and *= might be typeglobs of punctuation variables | |
20332 | # I have allowed tokens starting with <, such as <=, | |
20333 | # because I don't think these could be valid angle operators. | |
20334 | # test file: storrs4.pl | |
20335 | my $test_tok = $tok . $$rtokens[ $i + 1 ]; | |
20336 | ||
20337 | if ( | |
20338 | $is_digraph{$test_tok} | |
20339 | && ( $test_tok ne '/=' ) # might be pattern | |
20340 | && ( $test_tok ne 'x=' ) # might be $x | |
20341 | && ( $test_tok ne '**' ) # typeglob? | |
20342 | && ( $test_tok ne '*=' ) # typeglob? | |
20343 | ) | |
20344 | { | |
20345 | $tok = $test_tok; | |
20346 | $i++; | |
20347 | ||
20348 | # Now try to assemble trigraphs. Note that all possible | |
20349 | # perl trigraphs can be constructed by appending a character | |
20350 | # to a digraph. | |
20351 | $test_tok = $tok . $$rtokens[ $i + 1 ]; | |
20352 | ||
20353 | if ( $is_trigraph{$test_tok} ) { | |
20354 | $tok = $test_tok; | |
20355 | $i++; | |
20356 | } | |
20357 | } | |
20358 | $type = $tok; | |
20359 | $next_tok = $$rtokens[ $i + 1 ]; | |
20360 | $next_type = $$rtoken_type[ $i + 1 ]; | |
20361 | ||
20362 | TOKENIZER_DEBUG_FLAG_TOKENIZE && do { | |
20363 | local $" = ')('; | |
20364 | my @debug_list = ( | |
20365 | $last_nonblank_token, $tok, | |
20366 | $next_tok, $brace_depth, | |
20367 | $brace_type[$brace_depth], $paren_depth, | |
20368 | $paren_type[$paren_depth] | |
20369 | ); | |
20370 | print "TOKENIZE:(@debug_list)\n"; | |
20371 | }; | |
20372 | ||
20373 | ############################################################### | |
20374 | # We have the next token, $tok. | |
20375 | # Now we have to examine this token and decide what it is | |
20376 | # and define its $type | |
20377 | # | |
20378 | # section 1: bare words | |
20379 | ############################################################### | |
20380 | ||
20381 | if ( $pre_type eq 'w' ) { | |
20382 | $expecting = operator_expected( $prev_type, $tok, $next_type ); | |
20383 | my ( $next_nonblank_token, $i_next ) = | |
20384 | find_next_nonblank_token( $i, $rtokens ); | |
20385 | ||
20386 | # quote a word followed by => operator | |
20387 | if ( $next_nonblank_token eq '=' ) { | |
20388 | ||
20389 | if ( $$rtokens[ $i_next + 1 ] eq '>' ) { | |
20390 | if ( $is_constant{$current_package}{$tok} ) { | |
20391 | $type = 'C'; | |
20392 | } | |
20393 | elsif ( $is_user_function{$current_package}{$tok} ) { | |
20394 | $type = 'U'; | |
20395 | $prototype = | |
20396 | $user_function_prototype{$current_package}{$tok}; | |
20397 | } | |
20398 | elsif ( $tok =~ /^v\d+$/ ) { | |
20399 | $type = 'v'; | |
20400 | unless ($saw_v_string) { report_v_string($tok) } | |
20401 | } | |
20402 | else { $type = 'w' } | |
20403 | ||
20404 | next; | |
20405 | } | |
20406 | } | |
20407 | ||
20408 | # quote a bare word within braces..like xxx->{s}; note that we | |
20409 | # must be sure this is not a structural brace, to avoid | |
20410 | # mistaking {s} in the following for a quoted bare word: | |
20411 | # for(@[){s}bla}BLA} | |
20412 | if ( ( $last_nonblank_type eq 'L' ) | |
20413 | && ( $next_nonblank_token eq '}' ) ) | |
20414 | { | |
20415 | $type = 'w'; | |
20416 | next; | |
20417 | } | |
20418 | ||
20419 | # a bare word immediately followed by :: is not a keyword; | |
20420 | # use $tok_kw when testing for keywords to avoid a mistake | |
20421 | my $tok_kw = $tok; | |
20422 | if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' ) | |
20423 | { | |
20424 | $tok_kw .= '::'; | |
20425 | } | |
20426 | ||
20427 | # handle operator x (now we know it isn't $x=) | |
20428 | if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) { | |
20429 | if ( $tok eq 'x' ) { | |
20430 | ||
20431 | if ( $$rtokens[ $i + 1 ] eq '=' ) { # x= | |
20432 | $tok = 'x='; | |
20433 | $type = $tok; | |
20434 | $i++; | |
20435 | } | |
20436 | else { | |
20437 | $type = 'x'; | |
20438 | } | |
20439 | } | |
20440 | ||
20441 | # FIXME: Patch: mark something like x4 as an integer for now | |
20442 | # It gets fixed downstream. This is easier than | |
20443 | # splitting the pretoken. | |
20444 | else { | |
20445 | $type = 'n'; | |
20446 | } | |
20447 | } | |
20448 | ||
20449 | elsif ( ( $tok eq 'strict' ) | |
20450 | and ( $last_nonblank_token eq 'use' ) ) | |
20451 | { | |
20452 | $tokenizer_self->{_saw_use_strict} = 1; | |
20453 | scan_bare_identifier(); | |
20454 | } | |
20455 | ||
20456 | elsif ( ( $tok eq 'warnings' ) | |
20457 | and ( $last_nonblank_token eq 'use' ) ) | |
20458 | { | |
20459 | $tokenizer_self->{_saw_perl_dash_w} = 1; | |
20460 | ||
20461 | # scan as identifier, so that we pick up something like: | |
20462 | # use warnings::register | |
20463 | scan_bare_identifier(); | |
20464 | } | |
20465 | ||
20466 | elsif ( | |
20467 | $tok eq 'AutoLoader' | |
20468 | && $tokenizer_self->{_look_for_autoloader} | |
20469 | && ( | |
20470 | $last_nonblank_token eq 'use' | |
20471 | ||
20472 | # these regexes are from AutoSplit.pm, which we want | |
20473 | # to mimic | |
20474 | || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/ | |
20475 | || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/ | |
20476 | ) | |
20477 | ) | |
20478 | { | |
20479 | write_logfile_entry("AutoLoader seen, -nlal deactivates\n"); | |
20480 | $tokenizer_self->{_saw_autoloader} = 1; | |
20481 | $tokenizer_self->{_look_for_autoloader} = 0; | |
20482 | scan_bare_identifier(); | |
20483 | } | |
20484 | ||
20485 | elsif ( | |
20486 | $tok eq 'SelfLoader' | |
20487 | && $tokenizer_self->{_look_for_selfloader} | |
20488 | && ( $last_nonblank_token eq 'use' | |
20489 | || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/ | |
20490 | || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ ) | |
20491 | ) | |
20492 | { | |
20493 | write_logfile_entry("SelfLoader seen, -nlsl deactivates\n"); | |
20494 | $tokenizer_self->{_saw_selfloader} = 1; | |
20495 | $tokenizer_self->{_look_for_selfloader} = 0; | |
20496 | scan_bare_identifier(); | |
20497 | } | |
20498 | ||
20499 | elsif ( ( $tok eq 'constant' ) | |
20500 | and ( $last_nonblank_token eq 'use' ) ) | |
20501 | { | |
20502 | scan_bare_identifier(); | |
20503 | my ( $next_nonblank_token, $i_next ) = | |
20504 | find_next_nonblank_token( $i, $rtokens ); | |
20505 | ||
20506 | if ($next_nonblank_token) { | |
20507 | ||
20508 | if ( $is_keyword{$next_nonblank_token} ) { | |
20509 | warning( | |
20510 | "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n" | |
20511 | ); | |
20512 | } | |
20513 | ||
20514 | # FIXME: could check for error in which next token is | |
20515 | # not a word (number, punctuation, ..) | |
20516 | else { | |
20517 | $is_constant{$current_package} | |
20518 | {$next_nonblank_token} = 1; | |
20519 | } | |
20520 | } | |
20521 | } | |
20522 | ||
20523 | # various quote operators | |
20524 | elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { | |
20525 | if ( $expecting == OPERATOR ) { | |
20526 | ||
20527 | # patch for paren-less for/foreach glitch, part 1 | |
20528 | # perl will accept this construct as valid: | |
20529 | # | |
20530 | # foreach my $key qw\Uno Due Tres Quadro\ { | |
20531 | # print "Set $key\n"; | |
20532 | # } | |
20533 | unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} ) | |
20534 | { | |
20535 | error_if_expecting_OPERATOR(); | |
20536 | } | |
20537 | } | |
20538 | $in_quote = $quote_items{$tok}; | |
20539 | $allowed_quote_modifiers = $quote_modifiers{$tok}; | |
20540 | ||
20541 | # All quote types are 'Q' except possibly qw quotes. | |
20542 | # qw quotes are special in that they may generally be trimmed | |
20543 | # of leading and trailing whitespace. So they are given a | |
20544 | # separate type, 'q', unless requested otherwise. | |
20545 | $type = | |
20546 | ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} ) | |
20547 | ? 'q' | |
20548 | : 'Q'; | |
20549 | $quote_type = $type; | |
20550 | } | |
20551 | ||
20552 | # check for a statement label | |
20553 | elsif ( | |
20554 | ( $next_nonblank_token eq ':' ) | |
20555 | && ( $$rtokens[ $i_next + 1 ] ne ':' ) | |
20556 | && ( $i_next <= $max_token_index ) # colon on same line | |
20557 | && label_ok() | |
20558 | ) | |
20559 | { | |
20560 | if ( $tok !~ /A-Z/ ) { | |
20561 | push @lower_case_labels_at, $input_line_number; | |
20562 | } | |
20563 | $type = 'J'; | |
20564 | $tok .= ':'; | |
20565 | $i = $i_next; | |
20566 | next; | |
20567 | } | |
20568 | ||
20569 | # 'sub' || 'package' | |
20570 | elsif ( $is_sub_package{$tok_kw} ) { | |
20571 | error_if_expecting_OPERATOR() | |
20572 | if ( $expecting == OPERATOR ); | |
20573 | scan_id(); | |
20574 | } | |
20575 | ||
20576 | # Note on token types for format, __DATA__, __END__: | |
20577 | # It simplifies things to give these type ';', so that when we | |
20578 | # start rescanning we will be expecting a token of type TERM. | |
20579 | # We will switch to type 'k' before outputting the tokens. | |
20580 | elsif ( $is_format_END_DATA{$tok_kw} ) { | |
20581 | $type = ';'; # make tokenizer look for TERM next | |
20582 | $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1; | |
20583 | last; | |
20584 | } | |
20585 | ||
20586 | elsif ( $is_keyword{$tok_kw} ) { | |
20587 | $type = 'k'; | |
20588 | ||
20589 | # Since for and foreach may not be followed immediately | |
20590 | # by an opening paren, we have to remember which keyword | |
20591 | # is associated with the next '(' | |
20592 | if ( $is_for_foreach{$tok} ) { | |
20593 | if ( new_statement_ok() ) { | |
20594 | $want_paren = $tok; | |
20595 | } | |
20596 | } | |
20597 | ||
20598 | # recognize 'use' statements, which are special | |
20599 | elsif ( $is_use_require{$tok} ) { | |
20600 | $statement_type = $tok; | |
20601 | error_if_expecting_OPERATOR() | |
20602 | if ( $expecting == OPERATOR ); | |
20603 | } | |
20604 | ||
20605 | # remember my and our to check for trailing ": shared" | |
20606 | elsif ( $is_my_our{$tok} ) { | |
20607 | $statement_type = $tok; | |
20608 | } | |
20609 | ||
20610 | # Check for misplaced 'elsif' and 'else', but allow isolated | |
20611 | # else or elsif blocks to be formatted. This is indicated | |
20612 | # by a last noblank token of ';' | |
20613 | elsif ( $tok eq 'elsif' ) { | |
20614 | if ( $last_nonblank_token ne ';' | |
20615 | && $last_nonblank_block_type !~ | |
20616 | /^(if|elsif|unless)$/ ) | |
20617 | { | |
20618 | warning( | |
20619 | "expecting '$tok' to follow one of 'if|elsif|unless'\n" | |
20620 | ); | |
20621 | } | |
20622 | } | |
20623 | elsif ( $tok eq 'else' ) { | |
20624 | ||
20625 | # patched for SWITCH/CASE | |
20626 | if ( $last_nonblank_token ne ';' | |
20627 | && $last_nonblank_block_type !~ | |
20628 | /^(if|elsif|unless|case|when)$/ ) | |
20629 | { | |
20630 | warning( | |
20631 | "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n" | |
20632 | ); | |
20633 | } | |
20634 | } | |
20635 | elsif ( $tok eq 'continue' ) { | |
20636 | if ( $last_nonblank_token ne ';' | |
20637 | && $last_nonblank_block_type !~ | |
20638 | /(^(\{|\}|;|while|until|for|foreach)|:$)/ ) | |
20639 | { | |
20640 | ||
20641 | # note: ';' '{' and '}' in list above | |
20642 | # because continues can follow bare blocks; | |
20643 | # ':' is labeled block | |
20644 | warning("'$tok' should follow a block\n"); | |
20645 | } | |
20646 | } | |
20647 | ||
20648 | # patch for SWITCH/CASE if 'case' and 'when are | |
20649 | # treated as keywords. | |
20650 | elsif ( $tok eq 'when' || $tok eq 'case' ) { | |
20651 | $statement_type = $tok; # next '{' is block | |
20652 | } | |
20653 | } | |
20654 | ||
20655 | # check for inline label following | |
20656 | # /^(redo|last|next|goto)$/ | |
20657 | elsif (( $last_nonblank_type eq 'k' ) | |
20658 | && ( $is_redo_last_next_goto{$last_nonblank_token} ) ) | |
20659 | { | |
20660 | $type = 'j'; | |
20661 | next; | |
20662 | } | |
20663 | ||
20664 | # something else -- | |
20665 | else { | |
20666 | ||
20667 | scan_bare_identifier(); | |
20668 | if ( $type eq 'w' ) { | |
20669 | ||
20670 | if ( $expecting == OPERATOR ) { | |
20671 | ||
20672 | # don't complain about possible indirect object | |
20673 | # notation. | |
20674 | # For example: | |
20675 | # package main; | |
20676 | # sub new($) { ... } | |
20677 | # $b = new A::; # calls A::new | |
20678 | # $c = new A; # same thing but suspicious | |
20679 | # This will call A::new but we have a 'new' in | |
20680 | # main:: which looks like a constant. | |
20681 | # | |
20682 | if ( $last_nonblank_type eq 'C' ) { | |
20683 | if ( $tok !~ /::$/ ) { | |
20684 | complain(<<EOM); | |
20685 | Expecting operator after '$last_nonblank_token' but found bare word '$tok' | |
20686 | Maybe indirectet object notation? | |
20687 | EOM | |
20688 | } | |
20689 | } | |
20690 | else { | |
20691 | error_if_expecting_OPERATOR("bareword"); | |
20692 | } | |
20693 | } | |
20694 | ||
20695 | # mark bare words immediately followed by a paren as | |
20696 | # functions | |
20697 | $next_tok = $$rtokens[ $i + 1 ]; | |
20698 | if ( $next_tok eq '(' ) { | |
20699 | $type = 'U'; | |
20700 | } | |
20701 | ||
20702 | # mark bare words following a file test operator as | |
20703 | # something that will expect an operator next. | |
20704 | # patch 072901: unless followed immediately by a paren, | |
20705 | # in which case it must be a function call (pid.t) | |
20706 | if ( $last_nonblank_type eq 'F' && $next_tok ne '(' ) { | |
20707 | $type = 'C'; | |
20708 | } | |
20709 | ||
20710 | # patch for SWITCH/CASE if 'case' and 'when are | |
20711 | # not treated as keywords: | |
20712 | if ( | |
20713 | ( | |
20714 | $tok eq 'case' | |
20715 | && $brace_type[$brace_depth] eq 'switch' | |
20716 | ) | |
20717 | || ( $tok eq 'when' | |
20718 | && $brace_type[$brace_depth] eq 'given' ) | |
20719 | ) | |
20720 | { | |
20721 | $statement_type = $tok; # next '{' is block | |
20722 | $type = 'k'; # for keyword syntax coloring | |
20723 | } | |
20724 | ||
20725 | # patch for SWITCH/CASE if switch and given not keywords | |
20726 | # Switch is not a perl 5 keyword, but we will gamble | |
20727 | # and mark switch followed by paren as a keyword. This | |
20728 | # is only necessary to get html syntax coloring nice, | |
20729 | # and does not commit this as being a switch/case. | |
20730 | if ( $next_nonblank_token eq '(' | |
20731 | && ( $tok eq 'switch' || $tok eq 'given' ) ) | |
20732 | { | |
20733 | $type = 'k'; # for keyword syntax coloring | |
20734 | } | |
20735 | } | |
20736 | } | |
20737 | } | |
20738 | ||
20739 | ############################################################### | |
20740 | # section 2: strings of digits | |
20741 | ############################################################### | |
20742 | elsif ( $pre_type eq 'd' ) { | |
20743 | $expecting = operator_expected( $prev_type, $tok, $next_type ); | |
20744 | error_if_expecting_OPERATOR("Number") | |
20745 | if ( $expecting == OPERATOR ); | |
20746 | scan_number(); | |
20747 | if ( !defined($number) ) { | |
20748 | ||
20749 | # shouldn't happen - we should always get a number | |
20750 | warning("non-number beginning with digit--program bug\n"); | |
20751 | report_definite_bug(); | |
20752 | } | |
20753 | } | |
20754 | ||
20755 | ############################################################### | |
20756 | # section 3: all other tokens | |
20757 | ############################################################### | |
20758 | ||
20759 | else { | |
20760 | last if ( $tok eq '#' ); | |
20761 | my $code = $tokenization_code->{$tok}; | |
20762 | if ($code) { | |
20763 | $expecting = | |
20764 | operator_expected( $prev_type, $tok, $next_type ); | |
20765 | $code->(); | |
20766 | redo if $in_quote; | |
20767 | } | |
20768 | } | |
20769 | } | |
20770 | ||
20771 | # ----------------------------- | |
20772 | # end of main tokenization loop | |
20773 | # ----------------------------- | |
20774 | ||
20775 | if ( $i_tok >= 0 ) { | |
20776 | $output_token_type[$i_tok] = $type; | |
20777 | $output_block_type[$i_tok] = $block_type; | |
20778 | $output_container_type[$i_tok] = $container_type; | |
20779 | $output_type_sequence[$i_tok] = $type_sequence; | |
20780 | } | |
20781 | ||
20782 | unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) { | |
20783 | $last_last_nonblank_token = $last_nonblank_token; | |
20784 | $last_last_nonblank_type = $last_nonblank_type; | |
20785 | $last_last_nonblank_block_type = $last_nonblank_block_type; | |
20786 | $last_last_nonblank_container_type = $last_nonblank_container_type; | |
20787 | $last_last_nonblank_type_sequence = $last_nonblank_type_sequence; | |
20788 | $last_nonblank_token = $tok; | |
20789 | $last_nonblank_type = $type; | |
20790 | $last_nonblank_block_type = $block_type; | |
20791 | $last_nonblank_container_type = $container_type; | |
20792 | $last_nonblank_type_sequence = $type_sequence; | |
20793 | $last_nonblank_prototype = $prototype; | |
20794 | } | |
20795 | ||
20796 | # reset indentation level if necessary at a sub or package | |
20797 | # in an attempt to recover from a nesting error | |
20798 | if ( $level_in_tokenizer < 0 ) { | |
20799 | if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) { | |
20800 | reset_indentation_level(0); | |
20801 | brace_warning("resetting level to 0 at $1 $2\n"); | |
20802 | } | |
20803 | } | |
20804 | ||
20805 | # all done tokenizing this line ... | |
20806 | # now prepare the final list of tokens and types | |
20807 | ||
20808 | my @token_type = (); # stack of output token types | |
20809 | my @block_type = (); # stack of output code block types | |
20810 | my @container_type = (); # stack of output code container types | |
20811 | my @type_sequence = (); # stack of output type sequence numbers | |
20812 | my @tokens = (); # output tokens | |
20813 | my @levels = (); # structural brace levels of output tokens | |
20814 | my @slevels = (); # secondary nesting levels of output tokens | |
20815 | my @nesting_tokens = (); # string of tokens leading to this depth | |
20816 | my @nesting_types = (); # string of token types leading to this depth | |
20817 | my @nesting_blocks = (); # string of block types leading to this depth | |
20818 | my @nesting_lists = (); # string of list types leading to this depth | |
20819 | my @ci_string = (); # string needed to compute continuation indentation | |
20820 | my @container_environment = (); # BLOCK or LIST | |
20821 | my $container_environment = ''; | |
20822 | my $im = -1; # previous $i value | |
20823 | my $num; | |
20824 | my $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/; | |
20825 | ||
20826 | # =head1 Computing Token Indentation | |
20827 | # | |
20828 | # The final section of the tokenizer forms tokens and also computes | |
20829 | # parameters needed to find indentation. It is much easier to do it | |
20830 | # in the tokenizer than elsewhere. Here is a brief description of how | |
20831 | # indentation is computed. Perl::Tidy computes indentation as the sum | |
20832 | # of 2 terms: | |
20833 | # | |
20834 | # (1) structural indentation, such as if/else/elsif blocks | |
20835 | # (2) continuation indentation, such as long parameter call lists. | |
20836 | # | |
20837 | # These are occasionally called primary and secondary indentation. | |
20838 | # | |
20839 | # Structural indentation is introduced by tokens of type '{', although | |
20840 | # the actual tokens might be '{', '(', or '['. Structural indentation | |
20841 | # is of two types: BLOCK and non-BLOCK. Default structural indentation | |
20842 | # is 4 characters if the standard indentation scheme is used. | |
20843 | # | |
20844 | # Continuation indentation is introduced whenever a line at BLOCK level | |
20845 | # is broken before its termination. Default continuation indentation | |
20846 | # is 2 characters in the standard indentation scheme. | |
20847 | # | |
20848 | # Both types of indentation may be nested arbitrarily deep and | |
20849 | # interlaced. The distinction between the two is somewhat arbitrary. | |
20850 | # | |
20851 | # For each token, we will define two variables which would apply if | |
20852 | # the current statement were broken just before that token, so that | |
20853 | # that token started a new line: | |
20854 | # | |
20855 | # $level = the structural indentation level, | |
20856 | # $ci_level = the continuation indentation level | |
20857 | # | |
20858 | # The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces), | |
20859 | # assuming defaults. However, in some special cases it is customary | |
20860 | # to modify $ci_level from this strict value. | |
20861 | # | |
20862 | # The total structural indentation is easy to compute by adding and | |
20863 | # subtracting 1 from a saved value as types '{' and '}' are seen. The | |
20864 | # running value of this variable is $level_in_tokenizer. | |
20865 | # | |
20866 | # The total continuation is much more difficult to compute, and requires | |
20867 | # several variables. These veriables are: | |
20868 | # | |
20869 | # $ci_string_in_tokenizer = a string of 1's and 0's indicating, for | |
20870 | # each indentation level, if there are intervening open secondary | |
20871 | # structures just prior to that level. | |
20872 | # $continuation_string_in_tokenizer = a string of 1's and 0's indicating | |
20873 | # if the last token at that level is "continued", meaning that it | |
20874 | # is not the first token of an expression. | |
20875 | # $nesting_block_string = a string of 1's and 0's indicating, for each | |
20876 | # indentation level, if the level is of type BLOCK or not. | |
20877 | # $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string | |
20878 | # $nesting_list_string = a string of 1's and 0's indicating, for each | |
20879 | # indentation level, if it is is appropriate for list formatting. | |
20880 | # If so, continuation indentation is used to indent long list items. | |
20881 | # $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string | |
20882 | # @slevel_stack = a stack of total nesting depths at each | |
20883 | # structural indentation level, where "total nesting depth" means | |
20884 | # the nesting depth that would occur if every nesting token -- '{', '[', | |
20885 | # and '(' -- , regardless of context, is used to compute a nesting | |
20886 | # depth. | |
20887 | ||
20888 | #my $nesting_block_flag = ($nesting_block_string =~ /1$/); | |
20889 | #my $nesting_list_flag = ($nesting_list_string =~ /1$/); | |
20890 | ||
20891 | my ( $ci_string_i, $level_i, $nesting_block_string_i, | |
20892 | $nesting_list_string_i, $nesting_token_string_i, | |
20893 | $nesting_type_string_i, ); | |
20894 | ||
20895 | foreach $i (@output_token_list) { # scan the list of pre-tokens indexes | |
20896 | ||
20897 | # self-checking for valid token types | |
20898 | my $type = $output_token_type[$i]; | |
20899 | my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken | |
20900 | $level_i = $level_in_tokenizer; | |
20901 | ||
20902 | # This can happen by running perltidy on non-scripts | |
20903 | # although it could also be bug introduced by programming change. | |
20904 | # Perl silently accepts a 032 (^Z) and takes it as the end | |
20905 | if ( !$is_valid_token_type{$type} ) { | |
20906 | my $val = ord($type); | |
20907 | warning( | |
20908 | "unexpected character decimal $val ($type) in script\n"); | |
20909 | $tokenizer_self->{_in_error} = 1; | |
20910 | } | |
20911 | ||
20912 | # ---------------------------------------------------------------- | |
20913 | # TOKEN TYPE PATCHES | |
20914 | # output __END__, __DATA__, and format as type 'k' instead of ';' | |
20915 | # to make html colors correct, etc. | |
20916 | my $fix_type = $type; | |
20917 | if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' } | |
20918 | ||
20919 | # output anonymous 'sub' as keyword | |
20920 | if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' } | |
20921 | ||
20922 | # ----------------------------------------------------------------- | |
20923 | ||
20924 | $nesting_token_string_i = $nesting_token_string; | |
20925 | $nesting_type_string_i = $nesting_type_string; | |
20926 | $nesting_block_string_i = $nesting_block_string; | |
20927 | $nesting_list_string_i = $nesting_list_string; | |
20928 | ||
20929 | # set primary indentation levels based on structural braces | |
20930 | # Note: these are set so that the leading braces have a HIGHER | |
20931 | # level than their CONTENTS, which is convenient for indentation | |
20932 | # Also, define continuation indentation for each token. | |
20933 | if ( $type eq '{' || $type eq 'L' ) { | |
20934 | ||
20935 | # use environment before updating | |
20936 | $container_environment = | |
20937 | $nesting_block_flag ? 'BLOCK' | |
20938 | : $nesting_list_flag ? 'LIST' | |
20939 | : ""; | |
20940 | ||
20941 | # if the difference between total nesting levels is not 1, | |
20942 | # there are intervening non-structural nesting types between | |
20943 | # this '{' and the previous unclosed '{' | |
20944 | my $intervening_secondary_structure = 0; | |
20945 | if (@slevel_stack) { | |
20946 | $intervening_secondary_structure = | |
20947 | $slevel_in_tokenizer - $slevel_stack[-1]; | |
20948 | } | |
20949 | ||
20950 | # =head1 Continuation Indentation | |
20951 | # | |
20952 | # Having tried setting continuation indentation both in the formatter and | |
20953 | # in the tokenizer, I can say that setting it in the tokenizer is much, | |
20954 | # much easier. The formatter already has too much to do, and can't | |
20955 | # make decisions on line breaks without knowing what 'ci' will be at | |
20956 | # arbitrary locations. | |
20957 | # | |
20958 | # But a problem with setting the continuation indentation (ci) here | |
20959 | # in the tokenizer is that we do not know where line breaks will actually | |
20960 | # be. As a result, we don't know if we should propagate continuation | |
20961 | # indentation to higher levels of structure. | |
20962 | # | |
20963 | # For nesting of only structural indentation, we never need to do this. | |
20964 | # For example, in a long if statement, like this | |
20965 | # | |
20966 | # if ( !$output_block_type[$i] | |
20967 | # && ($in_statement_continuation) ) | |
20968 | # { <--outdented | |
20969 | # do_something(); | |
20970 | # } | |
20971 | # | |
20972 | # the second line has ci but we do normally give the lines within the BLOCK | |
20973 | # any ci. This would be true if we had blocks nested arbitrarily deeply. | |
20974 | # | |
20975 | # But consider something like this, where we have created a break after | |
20976 | # an opening paren on line 1, and the paren is not (currently) a | |
20977 | # structural indentation token: | |
20978 | # | |
20979 | # my $file = $menubar->Menubutton( | |
20980 | # qw/-text File -underline 0 -menuitems/ => [ | |
20981 | # [ | |
20982 | # Cascade => '~View', | |
20983 | # -menuitems => [ | |
20984 | # ... | |
20985 | # | |
20986 | # The second line has ci, so it would seem reasonable to propagate it | |
20987 | # down, giving the third line 1 ci + 1 indentation. This suggests the | |
20988 | # following rule, which is currently used to propagating ci down: if there | |
20989 | # are any non-structural opening parens (or brackets, or braces), before | |
20990 | # an opening structural brace, then ci is propagated down, and otherwise | |
20991 | # not. The variable $intervening_secondary_structure contains this | |
20992 | # information for the current token, and the string | |
20993 | # "$ci_string_in_tokenizer" is a stack of previous values of this | |
20994 | # variable. | |
20995 | ||
20996 | # save the current states | |
20997 | push( @slevel_stack, 1 + $slevel_in_tokenizer ); | |
20998 | $level_in_tokenizer++; | |
20999 | ||
21000 | if ( $output_block_type[$i] ) { | |
21001 | $nesting_block_flag = 1; | |
21002 | $nesting_block_string .= '1'; | |
21003 | } | |
21004 | else { | |
21005 | $nesting_block_flag = 0; | |
21006 | $nesting_block_string .= '0'; | |
21007 | } | |
21008 | ||
21009 | # we will use continuation indentation within containers | |
21010 | # which are not blocks and not logical expressions | |
21011 | my $bit = 0; | |
21012 | if ( !$output_block_type[$i] ) { | |
21013 | ||
21014 | # propagate flag down at nested open parens | |
21015 | if ( $output_container_type[$i] eq '(' ) { | |
21016 | $bit = 1 if $nesting_list_flag; | |
21017 | } | |
21018 | ||
21019 | # use list continuation if not a logical grouping | |
21020 | # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/ | |
21021 | else { | |
21022 | $bit = 1 | |
21023 | unless | |
21024 | $is_logical_container{ $output_container_type[$i] }; | |
21025 | } | |
21026 | } | |
21027 | $nesting_list_string .= $bit; | |
21028 | $nesting_list_flag = $bit; | |
21029 | ||
21030 | $ci_string_in_tokenizer .= | |
21031 | ( $intervening_secondary_structure != 0 ) ? '1' : '0'; | |
21032 | $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/; | |
21033 | $continuation_string_in_tokenizer .= | |
21034 | ( $in_statement_continuation > 0 ) ? '1' : '0'; | |
21035 | ||
21036 | # Sometimes we want to give an opening brace continuation indentation, | |
21037 | # and sometimes not. For code blocks, we don't do it, so that the leading | |
21038 | # '{' gets outdented, like this: | |
21039 | # | |
21040 | # if ( !$output_block_type[$i] | |
21041 | # && ($in_statement_continuation) ) | |
21042 | # { <--outdented | |
21043 | # | |
21044 | # For other types, we will give them continuation indentation. For example, | |
21045 | # here is how a list looks with the opening paren indented: | |
21046 | # | |
21047 | # @LoL = | |
21048 | # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], | |
21049 | # [ "homer", "marge", "bart" ], ); | |
21050 | # | |
21051 | # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4) | |
21052 | ||
21053 | my $total_ci = $ci_string_sum; | |
21054 | if ( | |
21055 | !$output_block_type[$i] # patch: skip for BLOCK | |
21056 | && ($in_statement_continuation) | |
21057 | ) | |
21058 | { | |
21059 | $total_ci += $in_statement_continuation | |
21060 | unless ( $ci_string_in_tokenizer =~ /1$/ ); | |
21061 | } | |
21062 | ||
21063 | $ci_string_i = $total_ci; | |
21064 | $in_statement_continuation = 0; | |
21065 | } | |
21066 | ||
21067 | elsif ( $type eq '}' || $type eq 'R' ) { | |
21068 | ||
21069 | # only a nesting error in the script would prevent popping here | |
21070 | if ( @slevel_stack > 1 ) { pop(@slevel_stack); } | |
21071 | ||
21072 | $level_i = --$level_in_tokenizer; | |
21073 | ||
21074 | # restore previous level values | |
21075 | if ( length($nesting_block_string) > 1 ) | |
21076 | { # true for valid script | |
21077 | chop $nesting_block_string; | |
21078 | $nesting_block_flag = ( $nesting_block_string =~ /1$/ ); | |
21079 | chop $nesting_list_string; | |
21080 | $nesting_list_flag = ( $nesting_list_string =~ /1$/ ); | |
21081 | ||
21082 | chop $ci_string_in_tokenizer; | |
21083 | $ci_string_sum = | |
21084 | ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/; | |
21085 | ||
21086 | $in_statement_continuation = | |
21087 | chop $continuation_string_in_tokenizer; | |
21088 | ||
21089 | # zero continuation flag at terminal BLOCK '}' which | |
21090 | # ends a statement. | |
21091 | if ( $output_block_type[$i] ) { | |
21092 | ||
21093 | # ...These include non-anonymous subs | |
21094 | # note: could be sub ::abc { or sub 'abc | |
21095 | if ( $output_block_type[$i] =~ m/^sub\s*/gc ) { | |
21096 | ||
21097 | # note: older versions of perl require the /gc modifier | |
21098 | # here or else the \G does not work. | |
21099 | if ( $output_block_type[$i] =~ /\G('|::|\w)/gc ) { | |
21100 | $in_statement_continuation = 0; | |
21101 | } | |
21102 | } | |
21103 | ||
21104 | # ...and include all block types except user subs with | |
21105 | # block prototypes and these: (sort|grep|map|do|eval) | |
21106 | # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/ | |
21107 | elsif ( | |
21108 | $is_zero_continuation_block_type{ $output_block_type | |
21109 | [$i] } ) | |
21110 | { | |
21111 | $in_statement_continuation = 0; | |
21112 | } | |
21113 | ||
21114 | # ..but these are not terminal types: | |
21115 | # /^(sort|grep|map|do|eval)$/ ) | |
21116 | elsif ( | |
21117 | $is_not_zero_continuation_block_type{ | |
21118 | $output_block_type[$i] } ) | |
21119 | { | |
21120 | } | |
21121 | ||
21122 | # ..and a block introduced by a label | |
21123 | # /^\w+\s*:$/gc ) { | |
21124 | elsif ( $output_block_type[$i] =~ /:$/ ) { | |
21125 | $in_statement_continuation = 0; | |
21126 | } | |
21127 | ||
21128 | # ..nor user function with block prototype | |
21129 | else { | |
21130 | } | |
21131 | } | |
21132 | ||
21133 | # If we are in a list, then | |
21134 | # we must set continuatoin indentation at the closing | |
21135 | # paren of something like this (paren after $check): | |
21136 | # assert( | |
21137 | # __LINE__, | |
21138 | # ( not defined $check ) | |
21139 | # or ref $check | |
21140 | # or $check eq "new" | |
21141 | # or $check eq "old", | |
21142 | # ); | |
21143 | elsif ( $tok eq ')' ) { | |
21144 | $in_statement_continuation = 1 | |
21145 | if $output_container_type[$i] =~ /^[;,\{\}]$/; | |
21146 | } | |
21147 | } | |
21148 | ||
21149 | # use environment after updating | |
21150 | $container_environment = | |
21151 | $nesting_block_flag ? 'BLOCK' | |
21152 | : $nesting_list_flag ? 'LIST' | |
21153 | : ""; | |
21154 | $ci_string_i = $ci_string_sum + $in_statement_continuation; | |
21155 | $nesting_block_string_i = $nesting_block_string; | |
21156 | $nesting_list_string_i = $nesting_list_string; | |
21157 | } | |
21158 | ||
21159 | # not a structural indentation type.. | |
21160 | else { | |
21161 | ||
21162 | $container_environment = | |
21163 | $nesting_block_flag ? 'BLOCK' | |
21164 | : $nesting_list_flag ? 'LIST' | |
21165 | : ""; | |
21166 | ||
21167 | # zero the continuation indentation at certain tokens so | |
21168 | # that they will be at the same level as its container. For | |
21169 | # commas, this simplifies the -lp indentation logic, which | |
21170 | # counts commas. For ?: it makes them stand out. | |
21171 | if ($nesting_list_flag) { | |
21172 | if ( $type =~ /^[,\?\:]$/ ) { | |
21173 | $in_statement_continuation = 0; | |
21174 | } | |
21175 | } | |
21176 | ||
21177 | # be sure binary operators get continuation indentation | |
21178 | if ( | |
21179 | $container_environment | |
21180 | && ( $type eq 'k' && $is_binary_keyword{$tok} | |
21181 | || $is_binary_type{$type} ) | |
21182 | ) | |
21183 | { | |
21184 | $in_statement_continuation = 1; | |
21185 | } | |
21186 | ||
21187 | # continuation indentation is sum of any open ci from previous | |
21188 | # levels plus the current level | |
21189 | $ci_string_i = $ci_string_sum + $in_statement_continuation; | |
21190 | ||
21191 | # update continuation flag ... | |
21192 | # if this isn't a blank or comment.. | |
21193 | if ( $type ne 'b' && $type ne '#' ) { | |
21194 | ||
21195 | # and we are in a BLOCK | |
21196 | if ($nesting_block_flag) { | |
21197 | ||
21198 | # the next token after a ';' and label starts a new stmt | |
21199 | if ( $type eq ';' || $type eq 'J' ) { | |
21200 | $in_statement_continuation = 0; | |
21201 | } | |
21202 | ||
21203 | # otherwise, we are continuing the current statement | |
21204 | else { | |
21205 | $in_statement_continuation = 1; | |
21206 | } | |
21207 | } | |
21208 | ||
21209 | # if we are not in a BLOCK.. | |
21210 | else { | |
21211 | ||
21212 | # do not use continuation indentation if not list | |
21213 | # environment (could be within if/elsif clause) | |
21214 | if ( !$nesting_list_flag ) { | |
21215 | $in_statement_continuation = 0; | |
21216 | } | |
21217 | ||
21218 | # otherwise, the next token after a ',' starts a new term | |
21219 | elsif ( $type eq ',' ) { | |
21220 | $in_statement_continuation = 0; | |
21221 | } | |
21222 | ||
21223 | # otherwise, we are continuing the current term | |
21224 | else { | |
21225 | $in_statement_continuation = 1; | |
21226 | } | |
21227 | } | |
21228 | } | |
21229 | } | |
21230 | ||
21231 | if ( $level_in_tokenizer < 0 ) { | |
21232 | unless ($saw_negative_indentation) { | |
21233 | $saw_negative_indentation = 1; | |
21234 | warning("Starting negative indentation\n"); | |
21235 | } | |
21236 | } | |
21237 | ||
21238 | # set secondary nesting levels based on all continment token types | |
21239 | # Note: these are set so that the nesting depth is the depth | |
21240 | # of the PREVIOUS TOKEN, which is convenient for setting | |
21241 | # the stength of token bonds | |
21242 | my $slevel_i = $slevel_in_tokenizer; | |
21243 | ||
21244 | # /^[L\{\(\[]$/ | |
21245 | if ( $is_opening_type{$type} ) { | |
21246 | $slevel_in_tokenizer++; | |
21247 | $nesting_token_string .= $tok; | |
21248 | $nesting_type_string .= $type; | |
21249 | } | |
21250 | ||
21251 | # /^[R\}\)\]]$/ | |
21252 | elsif ( $is_closing_type{$type} ) { | |
21253 | $slevel_in_tokenizer--; | |
21254 | my $char = chop $nesting_token_string; | |
21255 | ||
21256 | if ( $char ne $matching_start_token{$tok} ) { | |
21257 | $nesting_token_string .= $char . $tok; | |
21258 | $nesting_type_string .= $type; | |
21259 | } | |
21260 | else { | |
21261 | chop $nesting_type_string; | |
21262 | } | |
21263 | } | |
21264 | ||
21265 | push( @block_type, $output_block_type[$i] ); | |
21266 | push( @ci_string, $ci_string_i ); | |
21267 | push( @container_environment, $container_environment ); | |
21268 | push( @container_type, $output_container_type[$i] ); | |
21269 | push( @levels, $level_i ); | |
21270 | push( @nesting_tokens, $nesting_token_string_i ); | |
21271 | push( @nesting_types, $nesting_type_string_i ); | |
21272 | push( @slevels, $slevel_i ); | |
21273 | push( @token_type, $fix_type ); | |
21274 | push( @type_sequence, $output_type_sequence[$i] ); | |
21275 | push( @nesting_blocks, $nesting_block_string ); | |
21276 | push( @nesting_lists, $nesting_list_string ); | |
21277 | ||
21278 | # now form the previous token | |
21279 | if ( $im >= 0 ) { | |
21280 | $num = | |
21281 | $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters | |
21282 | ||
21283 | if ( $num > 0 ) { | |
21284 | push( @tokens, | |
21285 | substr( $input_line, $$rtoken_map[$im], $num ) ); | |
21286 | } | |
21287 | } | |
21288 | $im = $i; | |
21289 | } | |
21290 | ||
21291 | $num = length($input_line) - $$rtoken_map[$im]; # make the last token | |
21292 | if ( $num > 0 ) { | |
21293 | push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) ); | |
21294 | } | |
21295 | ||
21296 | $tokenizer_self->{_in_quote} = $in_quote; | |
21297 | $tokenizer_self->{_rhere_target_list} = \@here_target_list; | |
21298 | ||
21299 | $line_of_tokens->{_rtoken_type} = \@token_type; | |
21300 | $line_of_tokens->{_rtokens} = \@tokens; | |
21301 | $line_of_tokens->{_rblock_type} = \@block_type; | |
21302 | $line_of_tokens->{_rcontainer_type} = \@container_type; | |
21303 | $line_of_tokens->{_rcontainer_environment} = \@container_environment; | |
21304 | $line_of_tokens->{_rtype_sequence} = \@type_sequence; | |
21305 | $line_of_tokens->{_rlevels} = \@levels; | |
21306 | $line_of_tokens->{_rslevels} = \@slevels; | |
21307 | $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens; | |
21308 | $line_of_tokens->{_rci_levels} = \@ci_string; | |
21309 | $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks; | |
21310 | ||
21311 | return; | |
21312 | } | |
21313 | } # end tokenize_this_line | |
21314 | ||
21315 | sub new_statement_ok { | |
21316 | ||
21317 | # return true if the current token can start a new statement | |
21318 | ||
21319 | return label_ok() # a label would be ok here | |
21320 | ||
21321 | || $last_nonblank_type eq 'J'; # or we follow a label | |
21322 | ||
21323 | } | |
21324 | ||
21325 | sub label_ok { | |
21326 | ||
21327 | # Decide if a bare word followed by a colon here is a label | |
21328 | ||
21329 | # if it follows an opening or closing code block curly brace.. | |
21330 | if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' ) | |
21331 | && $last_nonblank_type eq $last_nonblank_token ) | |
21332 | { | |
21333 | ||
21334 | # it is a label if and only if the curly encloses a code block | |
21335 | return $brace_type[$brace_depth]; | |
21336 | } | |
21337 | ||
21338 | # otherwise, it is a label if and only if it follows a ';' | |
21339 | # (real or fake) | |
21340 | else { | |
21341 | return ( $last_nonblank_type eq ';' ); | |
21342 | } | |
21343 | } | |
21344 | ||
21345 | sub code_block_type { | |
21346 | ||
21347 | # Decide if this is a block of code, and its type. | |
21348 | # Must be called only when $type = $token = '{' | |
21349 | # The problem is to distinguish between the start of a block of code | |
21350 | # and the start of an anonymous hash reference | |
21351 | # Returns "" if not code block, otherwise returns 'last_nonblank_token' | |
21352 | # to indicate the type of code block. (For example, 'last_nonblank_token' | |
21353 | # might be 'if' for an if block, 'else' for an else block, etc). | |
21354 | ||
21355 | # handle case of multiple '{'s | |
21356 | ||
21357 | # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; | |
21358 | ||
21359 | my ( $i, $rtokens, $rtoken_type ) = @_; | |
21360 | if ( $last_nonblank_token eq '{' | |
21361 | && $last_nonblank_type eq $last_nonblank_token ) | |
21362 | { | |
21363 | ||
21364 | # opening brace where a statement may appear is probably | |
21365 | # a code block but might be and anonymous hash reference | |
21366 | if ( $brace_type[$brace_depth] ) { | |
21367 | return decide_if_code_block( $i, $rtokens, $rtoken_type ); | |
21368 | } | |
21369 | ||
21370 | # cannot start a code block within an anonymous hash | |
21371 | else { | |
21372 | return ""; | |
21373 | } | |
21374 | } | |
21375 | ||
21376 | elsif ( $last_nonblank_token eq ';' ) { | |
21377 | ||
21378 | # an opening brace where a statement may appear is probably | |
21379 | # a code block but might be and anonymous hash reference | |
21380 | return decide_if_code_block( $i, $rtokens, $rtoken_type ); | |
21381 | } | |
21382 | ||
21383 | # handle case of '}{' | |
21384 | elsif ($last_nonblank_token eq '}' | |
21385 | && $last_nonblank_type eq $last_nonblank_token ) | |
21386 | { | |
21387 | ||
21388 | # a } { situation ... | |
21389 | # could be hash reference after code block..(blktype1.t) | |
21390 | if ($last_nonblank_block_type) { | |
21391 | return decide_if_code_block( $i, $rtokens, $rtoken_type ); | |
21392 | } | |
21393 | ||
21394 | # must be a block if it follows a closing hash reference | |
21395 | else { | |
21396 | return $last_nonblank_token; | |
21397 | } | |
21398 | } | |
21399 | ||
21400 | # NOTE: braces after type characters start code blocks, but for | |
21401 | # simplicity these are not identified as such. See also | |
21402 | # sub is_non_structural_brace. | |
21403 | # elsif ( $last_nonblank_type eq 't' ) { | |
21404 | # return $last_nonblank_token; | |
21405 | # } | |
21406 | ||
21407 | # brace after label: | |
21408 | elsif ( $last_nonblank_type eq 'J' ) { | |
21409 | return $last_nonblank_token; | |
21410 | } | |
21411 | ||
21412 | # otherwise, look at previous token. This must be a code block if | |
21413 | # it follows any of these: | |
21414 | # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ | |
21415 | elsif ( $is_code_block_token{$last_nonblank_token} ) { | |
21416 | return $last_nonblank_token; | |
21417 | } | |
21418 | ||
21419 | # or a sub definition | |
21420 | elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' ) | |
21421 | && $last_nonblank_token =~ /^sub\b/ ) | |
21422 | { | |
21423 | return $last_nonblank_token; | |
21424 | } | |
21425 | ||
21426 | # user-defined subs with block parameters (like grep/map/eval) | |
21427 | elsif ( $last_nonblank_type eq 'G' ) { | |
21428 | return $last_nonblank_token; | |
21429 | } | |
21430 | ||
21431 | # check bareword | |
21432 | elsif ( $last_nonblank_type eq 'w' ) { | |
21433 | return decide_if_code_block( $i, $rtokens, $rtoken_type ); | |
21434 | } | |
21435 | ||
21436 | # anything else must be anonymous hash reference | |
21437 | else { | |
21438 | return ""; | |
21439 | } | |
21440 | } | |
21441 | ||
21442 | sub decide_if_code_block { | |
21443 | ||
21444 | my ( $i, $rtokens, $rtoken_type ) = @_; | |
21445 | my ( $next_nonblank_token, $i_next ) = | |
21446 | find_next_nonblank_token( $i, $rtokens ); | |
21447 | ||
21448 | # we are at a '{' where a statement may appear. | |
21449 | # We must decide if this brace starts an anonymous hash or a code | |
21450 | # block. | |
21451 | # return "" if anonymous hash, and $last_nonblank_token otherwise | |
21452 | ||
21453 | # initialize to be code BLOCK | |
21454 | my $code_block_type = $last_nonblank_token; | |
21455 | ||
21456 | # Check for the common case of an empty anonymous hash reference: | |
21457 | # Maybe something like sub { { } } | |
21458 | if ( $next_nonblank_token eq '}' ) { | |
21459 | $code_block_type = ""; | |
21460 | } | |
21461 | ||
21462 | else { | |
21463 | ||
21464 | # To guess if this '{' is an anonymous hash reference, look ahead | |
21465 | # and test as follows: | |
21466 | # | |
21467 | # it is a hash reference if next come: | |
21468 | # - a string or digit followed by a comma or => | |
21469 | # - bareword followed by => | |
21470 | # otherwise it is a code block | |
21471 | # | |
21472 | # Examples of anonymous hash ref: | |
21473 | # {'aa',}; | |
21474 | # {1,2} | |
21475 | # | |
21476 | # Examples of code blocks: | |
21477 | # {1; print "hello\n", 1;} | |
21478 | # {$a,1}; | |
21479 | ||
21480 | # We are only going to look ahead one more (nonblank/comment) line. | |
21481 | # Strange formatting could cause a bad guess, but that's unlikely. | |
21482 | my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ]; | |
21483 | my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ]; | |
21484 | my ( $rpre_tokens, $rpre_types ) = | |
21485 | peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but | |
21486 | # generous, and prevents | |
21487 | # wasting lots of | |
21488 | # time in mangled files | |
21489 | if ( defined($rpre_types) && @$rpre_types ) { | |
21490 | push @pre_types, @$rpre_types; | |
21491 | push @pre_tokens, @$rpre_tokens; | |
21492 | } | |
21493 | ||
21494 | # put a sentinal token to simplify stopping the search | |
21495 | push @pre_types, '}'; | |
21496 | ||
21497 | my $jbeg = 0; | |
21498 | $jbeg = 1 if $pre_types[0] eq 'b'; | |
21499 | ||
21500 | # first look for one of these | |
21501 | # - bareword | |
21502 | # - bareword with leading - | |
21503 | # - digit | |
21504 | # - quoted string | |
21505 | my $j = $jbeg; | |
21506 | if ( $pre_types[$j] =~ /^[\'\"]/ ) { | |
21507 | ||
21508 | # find the closing quote; don't worry about escapes | |
21509 | my $quote_mark = $pre_types[$j]; | |
21510 | for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) { | |
21511 | if ( $pre_types[$k] eq $quote_mark ) { | |
21512 | $j = $k + 1; | |
21513 | my $next = $pre_types[$j]; | |
21514 | last; | |
21515 | } | |
21516 | } | |
21517 | } | |
21518 | elsif ( $pre_types[$j] eq 'd' ) { | |
21519 | $j++; | |
21520 | } | |
21521 | elsif ( $pre_types[$j] eq 'w' ) { | |
21522 | unless ( $is_keyword{ $pre_tokens[$j] } ) { | |
21523 | $j++; | |
21524 | } | |
21525 | } | |
21526 | elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) { | |
21527 | $j++; | |
21528 | } | |
21529 | if ( $j > $jbeg ) { | |
21530 | ||
21531 | $j++ if $pre_types[$j] eq 'b'; | |
21532 | ||
21533 | # it's a hash ref if a comma or => follow next | |
21534 | if ( $pre_types[$j] eq ',' | |
21535 | || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) ) | |
21536 | { | |
21537 | $code_block_type = ""; | |
21538 | } | |
21539 | } | |
21540 | } | |
21541 | ||
21542 | return $code_block_type; | |
21543 | } | |
21544 | ||
21545 | sub unexpected { | |
21546 | ||
21547 | # report unexpected token type and show where it is | |
21548 | my ( $found, $expecting, $i_tok, $last_nonblank_i ) = @_; | |
21549 | $unexpected_error_count++; | |
21550 | if ( $unexpected_error_count <= MAX_NAG_MESSAGES ) { | |
21551 | my $msg = "found $found where $expecting expected"; | |
21552 | my $pos = $$rpretoken_map[$i_tok]; | |
21553 | interrupt_logfile(); | |
21554 | my ( $offset, $numbered_line, $underline ) = | |
21555 | make_numbered_line( $input_line_number, $input_line, $pos ); | |
21556 | $underline = write_on_underline( $underline, $pos - $offset, '^' ); | |
21557 | ||
21558 | my $trailer = ""; | |
21559 | if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) { | |
21560 | my $pos_prev = $$rpretoken_map[$last_nonblank_i]; | |
21561 | my $num; | |
21562 | if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) { | |
21563 | $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev; | |
21564 | } | |
21565 | else { | |
21566 | $num = $pos - $pos_prev; | |
21567 | } | |
21568 | if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; } | |
21569 | ||
21570 | $underline = | |
21571 | write_on_underline( $underline, $pos_prev - $offset, '-' x $num ); | |
21572 | $trailer = " (previous token underlined)"; | |
21573 | } | |
21574 | warning( $numbered_line . "\n" ); | |
21575 | warning( $underline . "\n" ); | |
21576 | warning( $msg . $trailer . "\n" ); | |
21577 | resume_logfile(); | |
21578 | } | |
21579 | } | |
21580 | ||
21581 | sub indicate_error { | |
21582 | my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_; | |
21583 | interrupt_logfile(); | |
21584 | warning($msg); | |
21585 | write_error_indicator_pair( $line_number, $input_line, $pos, $carrat ); | |
21586 | resume_logfile(); | |
21587 | } | |
21588 | ||
21589 | sub write_error_indicator_pair { | |
21590 | my ( $line_number, $input_line, $pos, $carrat ) = @_; | |
21591 | my ( $offset, $numbered_line, $underline ) = | |
21592 | make_numbered_line( $line_number, $input_line, $pos ); | |
21593 | $underline = write_on_underline( $underline, $pos - $offset, $carrat ); | |
21594 | warning( $numbered_line . "\n" ); | |
21595 | $underline =~ s/\s*$//; | |
21596 | warning( $underline . "\n" ); | |
21597 | } | |
21598 | ||
21599 | sub make_numbered_line { | |
21600 | ||
21601 | # Given an input line, its line number, and a character position of | |
21602 | # interest, create a string not longer than 80 characters of the form | |
21603 | # $lineno: sub_string | |
21604 | # such that the sub_string of $str contains the position of interest | |
21605 | # | |
21606 | # Here is an example of what we want, in this case we add trailing | |
21607 | # '...' because the line is long. | |
21608 | # | |
21609 | # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... | |
21610 | # | |
21611 | # Here is another example, this time in which we used leading '...' | |
21612 | # because of excessive length: | |
21613 | # | |
21614 | # 2: ... er of the World Wide Web Consortium's | |
21615 | # | |
21616 | # input parameters are: | |
21617 | # $lineno = line number | |
21618 | # $str = the text of the line | |
21619 | # $pos = position of interest (the error) : 0 = first character | |
21620 | # | |
21621 | # We return : | |
21622 | # - $offset = an offset which corrects the position in case we only | |
21623 | # display part of a line, such that $pos-$offset is the effective | |
21624 | # position from the start of the displayed line. | |
21625 | # - $numbered_line = the numbered line as above, | |
21626 | # - $underline = a blank 'underline' which is all spaces with the same | |
21627 | # number of characters as the numbered line. | |
21628 | ||
21629 | my ( $lineno, $str, $pos ) = @_; | |
21630 | my $offset = ( $pos < 60 ) ? 0 : $pos - 40; | |
21631 | my $excess = length($str) - $offset - 68; | |
21632 | my $numc = ( $excess > 0 ) ? 68 : undef; | |
21633 | ||
21634 | if ( defined($numc) ) { | |
21635 | if ( $offset == 0 ) { | |
21636 | $str = substr( $str, $offset, $numc - 4 ) . " ..."; | |
21637 | } | |
21638 | else { | |
21639 | $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ..."; | |
21640 | } | |
21641 | } | |
21642 | else { | |
21643 | ||
21644 | if ( $offset == 0 ) { | |
21645 | } | |
21646 | else { | |
21647 | $str = "... " . substr( $str, $offset + 4 ); | |
21648 | } | |
21649 | } | |
21650 | ||
21651 | my $numbered_line = sprintf( "%d: ", $lineno ); | |
21652 | $offset -= length($numbered_line); | |
21653 | $numbered_line .= $str; | |
21654 | my $underline = " " x length($numbered_line); | |
21655 | return ( $offset, $numbered_line, $underline ); | |
21656 | } | |
21657 | ||
21658 | sub write_on_underline { | |
21659 | ||
21660 | # The "underline" is a string that shows where an error is; it starts | |
21661 | # out as a string of blanks with the same length as the numbered line of | |
21662 | # code above it, and we have to add marking to show where an error is. | |
21663 | # In the example below, we want to write the string '--^' just below | |
21664 | # the line of bad code: | |
21665 | # | |
21666 | # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... | |
21667 | # ---^ | |
21668 | # We are given the current underline string, plus a position and a | |
21669 | # string to write on it. | |
21670 | # | |
21671 | # In the above example, there will be 2 calls to do this: | |
21672 | # First call: $pos=19, pos_chr=^ | |
21673 | # Second call: $pos=16, pos_chr=--- | |
21674 | # | |
21675 | # This is a trivial thing to do with substr, but there is some | |
21676 | # checking to do. | |
21677 | ||
21678 | my ( $underline, $pos, $pos_chr ) = @_; | |
21679 | ||
21680 | # check for error..shouldn't happen | |
21681 | unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) { | |
21682 | return $underline; | |
21683 | } | |
21684 | my $excess = length($pos_chr) + $pos - length($underline); | |
21685 | if ( $excess > 0 ) { | |
21686 | $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess ); | |
21687 | } | |
21688 | substr( $underline, $pos, length($pos_chr) ) = $pos_chr; | |
21689 | return ($underline); | |
21690 | } | |
21691 | ||
21692 | sub is_non_structural_brace { | |
21693 | ||
21694 | # Decide if a brace or bracket is structural or non-structural | |
21695 | # by looking at the previous token and type | |
21696 | ||
21697 | # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting. | |
21698 | # Tentatively deactivated because it caused the wrong operator expectation | |
21699 | # for this code: | |
21700 | # $user = @vars[1] / 100; | |
21701 | # Must update sub operator_expected before re-implementing. | |
21702 | # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) { | |
21703 | # return 0; | |
21704 | # } | |
21705 | ||
21706 | # NOTE: braces after type characters start code blocks, but for | |
21707 | # simplicity these are not identified as such. See also | |
21708 | # sub code_block_type | |
21709 | # if ($last_nonblank_type eq 't') {return 0} | |
21710 | ||
21711 | # otherwise, it is non-structural if it is decorated | |
21712 | # by type information. | |
21713 | # For example, the '{' here is non-structural: ${xxx} | |
21714 | ( | |
21715 | $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/ | |
21716 | ||
21717 | # or if we follow a hash or array closing curly brace or bracket | |
21718 | # For example, the second '{' in this is non-structural: $a{'x'}{'y'} | |
21719 | # because the first '}' would have been given type 'R' | |
21720 | || $last_nonblank_type =~ /^([R\]])$/ | |
21721 | ); | |
21722 | } | |
21723 | ||
21724 | sub operator_expected { | |
21725 | ||
21726 | # Many perl symbols have two or more meanings. For example, '<<' | |
21727 | # can be a shift operator or a here-doc operator. The | |
21728 | # interpretation of these symbols depends on the current state of | |
21729 | # the tokenizer, which may either be expecting a term or an | |
21730 | # operator. For this example, a << would be a shift if an operator | |
21731 | # is expected, and a here-doc if a term is expected. This routine | |
21732 | # is called to make this decision for any current token. It returns | |
21733 | # one of three possible values: | |
21734 | # | |
21735 | # OPERATOR - operator expected (or at least, not a term) | |
21736 | # UNKNOWN - can't tell | |
21737 | # TERM - a term is expected (or at least, not an operator) | |
21738 | # | |
21739 | # The decision is based on what has been seen so far. This | |
21740 | # information is stored in the "$last_nonblank_type" and | |
21741 | # "$last_nonblank_token" variables. For example, if the | |
21742 | # $last_nonblank_type is '=~', then we are expecting a TERM, whereas | |
21743 | # if $last_nonblank_type is 'n' (numeric), we are expecting an | |
21744 | # OPERATOR. | |
21745 | # | |
21746 | # If a UNKNOWN is returned, the calling routine must guess. A major | |
21747 | # goal of this tokenizer is to minimize the possiblity of returning | |
21748 | # UNKNOWN, because a wrong guess can spoil the formatting of a | |
21749 | # script. | |
21750 | # | |
21751 | # adding NEW_TOKENS: it is critically important that this routine be | |
21752 | # updated to allow it to determine if an operator or term is to be | |
21753 | # expected after the new token. Doing this simply involves adding | |
21754 | # the new token character to one of the regexes in this routine or | |
21755 | # to one of the hash lists | |
21756 | # that it uses, which are initialized in the BEGIN section. | |
21757 | ||
21758 | my ( $prev_type, $tok, $next_type ) = @_; | |
21759 | my $op_expected = UNKNOWN; | |
21760 | ||
21761 | # Note: function prototype is available for token type 'U' for future | |
21762 | # program development. It contains the leading and trailing parens, | |
21763 | # and no blanks. It might be used to eliminate token type 'C', for | |
21764 | # example (prototype = '()'). Thus: | |
21765 | # if ($last_nonblank_type eq 'U') { | |
21766 | # print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n"; | |
21767 | # } | |
21768 | ||
21769 | # A possible filehandle (or object) requires some care... | |
21770 | if ( $last_nonblank_type eq 'Z' ) { | |
21771 | ||
21772 | # angle.t | |
21773 | if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) { | |
21774 | $op_expected = UNKNOWN; | |
21775 | } | |
21776 | ||
21777 | # For possible file handle like "$a", Perl uses weird parsing rules. | |
21778 | # For example: | |
21779 | # print $a/2,"/hi"; - division | |
21780 | # print $a / 2,"/hi"; - division | |
21781 | # print $a/ 2,"/hi"; - division | |
21782 | # print $a /2,"/hi"; - pattern (and error)! | |
21783 | elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) { | |
21784 | $op_expected = TERM; | |
21785 | } | |
21786 | ||
21787 | # Note when an operation is being done where a | |
21788 | # filehandle might be expected, since a change in whitespace | |
21789 | # could change the interpretation of the statement. | |
21790 | else { | |
21791 | if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) { | |
21792 | complain("operator in print statement not recommended\n"); | |
21793 | $op_expected = OPERATOR; | |
21794 | } | |
21795 | } | |
21796 | } | |
21797 | ||
21798 | # handle something after 'do' and 'eval' | |
21799 | elsif ( $is_block_operator{$last_nonblank_token} ) { | |
21800 | ||
21801 | # something like $a = eval "expression"; | |
21802 | # ^ | |
21803 | if ( $last_nonblank_type eq 'k' ) { | |
21804 | $op_expected = TERM; # expression or list mode following keyword | |
21805 | } | |
21806 | ||
21807 | # something like $a = do { BLOCK } / 2; | |
21808 | # ^ | |
21809 | else { | |
21810 | $op_expected = OPERATOR; # block mode following } | |
21811 | } | |
21812 | } | |
21813 | ||
21814 | # handle bare word.. | |
21815 | elsif ( $last_nonblank_type eq 'w' ) { | |
21816 | ||
21817 | # unfortunately, we can't tell what type of token to expect next | |
21818 | # after most bare words | |
21819 | $op_expected = UNKNOWN; | |
21820 | } | |
21821 | ||
21822 | # operator, but not term possible after these types | |
21823 | # Note: moved ')' from type to token because parens in list context | |
21824 | # get marked as '{' '}' now. This is a minor glitch in the following: | |
21825 | # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : (); | |
21826 | # | |
21827 | elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ ) | |
21828 | || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) ) | |
21829 | { | |
21830 | $op_expected = OPERATOR; | |
21831 | ||
21832 | # in a 'use' statement, numbers and v-strings are not really | |
21833 | # numbers, so to avoid incorrect error messages, we will | |
21834 | # mark them as unknown for now (use.t) | |
21835 | if ( ( $statement_type eq 'use' ) | |
21836 | && ( $last_nonblank_type =~ /^[nv]$/ ) ) | |
21837 | { | |
21838 | $op_expected = UNKNOWN; | |
21839 | } | |
21840 | } | |
21841 | ||
21842 | # no operator after many keywords, such as "die", "warn", etc | |
21843 | elsif ( $expecting_term_token{$last_nonblank_token} ) { | |
21844 | $op_expected = TERM; | |
21845 | } | |
21846 | ||
21847 | # no operator after things like + - ** (i.e., other operators) | |
21848 | elsif ( $expecting_term_types{$last_nonblank_type} ) { | |
21849 | $op_expected = TERM; | |
21850 | } | |
21851 | ||
21852 | # a few operators, like "time", have an empty prototype () and so | |
21853 | # take no parameters but produce a value to operate on | |
21854 | elsif ( $expecting_operator_token{$last_nonblank_token} ) { | |
21855 | $op_expected = OPERATOR; | |
21856 | } | |
21857 | ||
21858 | # post-increment and decrement produce values to be operated on | |
21859 | elsif ( $expecting_operator_types{$last_nonblank_type} ) { | |
21860 | $op_expected = OPERATOR; | |
21861 | } | |
21862 | ||
21863 | # no value to operate on after sub block | |
21864 | elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; } | |
21865 | ||
21866 | # a right brace here indicates the end of a simple block. | |
21867 | # all non-structural right braces have type 'R' | |
21868 | # all braces associated with block operator keywords have been given those | |
21869 | # keywords as "last_nonblank_token" and caught above. | |
21870 | # (This statement is order dependent, and must come after checking | |
21871 | # $last_nonblank_token). | |
21872 | elsif ( $last_nonblank_type eq '}' ) { | |
21873 | $op_expected = TERM; | |
21874 | } | |
21875 | ||
21876 | # something else..what did I forget? | |
21877 | else { | |
21878 | ||
21879 | # collecting diagnostics on unknown operator types..see what was missed | |
21880 | $op_expected = UNKNOWN; | |
21881 | write_diagnostics( | |
21882 | "OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n" | |
21883 | ); | |
21884 | } | |
21885 | ||
21886 | TOKENIZER_DEBUG_FLAG_EXPECT && do { | |
21887 | ||
21888 | "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; | |
21889 | }; | |
21890 | return $op_expected; | |
21891 | } | |
21892 | ||
21893 | # The following routines keep track of nesting depths of the nesting | |
21894 | # types, ( [ { and ?. This is necessary for determining the indentation | |
21895 | # level, and also for debugging programs. Not only do they keep track of | |
21896 | # nesting depths of the individual brace types, but they check that each | |
21897 | # of the other brace types is balanced within matching pairs. For | |
21898 | # example, if the program sees this sequence: | |
21899 | # | |
21900 | # { ( ( ) } | |
21901 | # | |
21902 | # then it can determine that there is an extra left paren somewhere | |
21903 | # between the { and the }. And so on with every other possible | |
21904 | # combination of outer and inner brace types. For another | |
21905 | # example: | |
21906 | # | |
21907 | # ( [ ..... ] ] ) | |
21908 | # | |
21909 | # which has an extra ] within the parens. | |
21910 | # | |
21911 | # The brace types have indexes 0 .. 3 which are indexes into | |
21912 | # the matrices. | |
21913 | # | |
21914 | # The pair ? : are treated as just another nesting type, with ? acting | |
21915 | # as the opening brace and : acting as the closing brace. | |
21916 | # | |
21917 | # The matrix | |
21918 | # | |
21919 | # $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b]; | |
21920 | # | |
21921 | # saves the nesting depth of brace type $b (where $b is either of the other | |
21922 | # nesting types) when brace type $a enters a new depth. When this depth | |
21923 | # decreases, a check is made that the current depth of brace types $b is | |
21924 | # unchanged, or otherwise there must have been an error. This can | |
21925 | # be very useful for localizing errors, particularly when perl runs to | |
21926 | # the end of a large file (such as this one) and announces that there | |
21927 | # is a problem somewhere. | |
21928 | # | |
21929 | # A numerical sequence number is maintained for every nesting type, | |
21930 | # so that each matching pair can be uniquely identified in a simple | |
21931 | # way. | |
21932 | ||
21933 | sub increase_nesting_depth { | |
21934 | my ( $a, $i_tok ) = @_; | |
21935 | my $b; | |
21936 | $current_depth[$a]++; | |
21937 | ||
21938 | # Sequence numbers increment by number of items. This keeps | |
21939 | # a unique set of numbers but still allows the relative location | |
21940 | # of any type to be determined. | |
21941 | $nesting_sequence_number[$a] += scalar(@closing_brace_names); | |
21942 | my $seqno = $nesting_sequence_number[$a]; | |
21943 | $current_sequence_number[$a][ $current_depth[$a] ] = $seqno; | |
21944 | ||
21945 | my $pos = $$rpretoken_map[$i_tok]; | |
21946 | $starting_line_of_current_depth[$a][ $current_depth[$a] ] = | |
21947 | [ $input_line_number, $input_line, $pos ]; | |
21948 | ||
21949 | for $b ( 0 .. $#closing_brace_names ) { | |
21950 | next if ( $b == $a ); | |
21951 | $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b]; | |
21952 | } | |
21953 | return $seqno; | |
21954 | } | |
21955 | ||
21956 | sub decrease_nesting_depth { | |
21957 | ||
21958 | my ( $a, $i_tok ) = @_; | |
21959 | my $pos = $$rpretoken_map[$i_tok]; | |
21960 | my $b; | |
21961 | my $seqno = 0; | |
21962 | ||
21963 | if ( $current_depth[$a] > 0 ) { | |
21964 | ||
21965 | $seqno = $current_sequence_number[$a][ $current_depth[$a] ]; | |
21966 | ||
21967 | # check that any brace types $b contained within are balanced | |
21968 | for $b ( 0 .. $#closing_brace_names ) { | |
21969 | next if ( $b == $a ); | |
21970 | ||
21971 | unless ( $depth_array[$a][$b][ $current_depth[$a] ] == | |
21972 | $current_depth[$b] ) | |
21973 | { | |
21974 | my $diff = $current_depth[$b] - | |
21975 | $depth_array[$a][$b][ $current_depth[$a] ]; | |
21976 | ||
21977 | # don't whine too many times | |
21978 | my $saw_brace_error = get_saw_brace_error(); | |
21979 | if ( | |
21980 | $saw_brace_error <= MAX_NAG_MESSAGES | |
21981 | ||
21982 | # if too many closing types have occured, we probably | |
21983 | # already caught this error | |
21984 | && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) | |
21985 | ) | |
21986 | { | |
21987 | interrupt_logfile(); | |
21988 | my $rsl = | |
21989 | $starting_line_of_current_depth[$a][ $current_depth[$a] ]; | |
21990 | my $sl = $$rsl[0]; | |
21991 | my $rel = [ $input_line_number, $input_line, $pos ]; | |
21992 | my $el = $$rel[0]; | |
21993 | my ($ess); | |
21994 | ||
21995 | if ( $diff == 1 || $diff == -1 ) { | |
21996 | $ess = ''; | |
21997 | } | |
21998 | else { | |
21999 | $ess = 's'; | |
22000 | } | |
22001 | my $bname = | |
22002 | ( $diff > 0 ) | |
22003 | ? $opening_brace_names[$b] | |
22004 | : $closing_brace_names[$b]; | |
22005 | write_error_indicator_pair( @$rsl, '^' ); | |
22006 | my $msg = <<"EOM"; | |
22007 | Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el | |
22008 | EOM | |
22009 | ||
22010 | if ( $diff > 0 ) { | |
22011 | my $rml = | |
22012 | $starting_line_of_current_depth[$b] | |
22013 | [ $current_depth[$b] ]; | |
22014 | my $ml = $$rml[0]; | |
22015 | $msg .= | |
22016 | " The most recent un-matched $bname is on line $ml\n"; | |
22017 | write_error_indicator_pair( @$rml, '^' ); | |
22018 | } | |
22019 | write_error_indicator_pair( @$rel, '^' ); | |
22020 | warning($msg); | |
22021 | resume_logfile(); | |
22022 | } | |
22023 | increment_brace_error(); | |
22024 | } | |
22025 | } | |
22026 | $current_depth[$a]--; | |
22027 | } | |
22028 | else { | |
22029 | ||
22030 | my $saw_brace_error = get_saw_brace_error(); | |
22031 | if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { | |
22032 | my $msg = <<"EOM"; | |
22033 | There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number | |
22034 | EOM | |
22035 | indicate_error( $msg, $input_line_number, $input_line, $pos, '^' ); | |
22036 | } | |
22037 | increment_brace_error(); | |
22038 | } | |
22039 | return $seqno; | |
22040 | } | |
22041 | ||
22042 | sub check_final_nesting_depths { | |
22043 | my ($a); | |
22044 | ||
22045 | for $a ( 0 .. $#closing_brace_names ) { | |
22046 | ||
22047 | if ( $current_depth[$a] ) { | |
22048 | my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ]; | |
22049 | my $sl = $$rsl[0]; | |
22050 | my $msg = <<"EOM"; | |
22051 | Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a] | |
22052 | The most recent un-matched $opening_brace_names[$a] is on line $sl | |
22053 | EOM | |
22054 | indicate_error( $msg, @$rsl, '^' ); | |
22055 | increment_brace_error(); | |
22056 | } | |
22057 | } | |
22058 | } | |
22059 | ||
22060 | sub numerator_expected { | |
22061 | ||
22062 | # this is a filter for a possible numerator, in support of guessing | |
22063 | # for the / pattern delimiter token. | |
22064 | # returns - | |
22065 | # 1 - yes | |
22066 | # 0 - can't tell | |
22067 | # -1 - no | |
22068 | # Note: I am using the convention that variables ending in | |
22069 | # _expected have these 3 possible values. | |
22070 | my ( $i, $rtokens ) = @_; | |
22071 | my $next_token = $$rtokens[ $i + 1 ]; | |
22072 | if ( $next_token eq '=' ) { $i++; } # handle /= | |
22073 | my ( $next_nonblank_token, $i_next ) = | |
22074 | find_next_nonblank_token( $i, $rtokens ); | |
22075 | ||
22076 | if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) { | |
22077 | 1; | |
22078 | } | |
22079 | else { | |
22080 | ||
22081 | if ( $next_nonblank_token =~ /^\s*$/ ) { | |
22082 | 0; | |
22083 | } | |
22084 | else { | |
22085 | -1; | |
22086 | } | |
22087 | } | |
22088 | } | |
22089 | ||
22090 | sub pattern_expected { | |
22091 | ||
22092 | # This is the start of a filter for a possible pattern. | |
22093 | # It looks at the token after a possbible pattern and tries to | |
22094 | # determine if that token could end a pattern. | |
22095 | # returns - | |
22096 | # 1 - yes | |
22097 | # 0 - can't tell | |
22098 | # -1 - no | |
22099 | my ( $i, $rtokens ) = @_; | |
22100 | my $next_token = $$rtokens[ $i + 1 ]; | |
22101 | if ( $next_token =~ /^[cgimosx]/ ) { $i++; } # skip possible modifier | |
22102 | my ( $next_nonblank_token, $i_next ) = | |
22103 | find_next_nonblank_token( $i, $rtokens ); | |
22104 | ||
22105 | # list of tokens which may follow a pattern | |
22106 | # (can probably be expanded) | |
22107 | if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ ) | |
22108 | { | |
22109 | 1; | |
22110 | } | |
22111 | else { | |
22112 | ||
22113 | if ( $next_nonblank_token =~ /^\s*$/ ) { | |
22114 | 0; | |
22115 | } | |
22116 | else { | |
22117 | -1; | |
22118 | } | |
22119 | } | |
22120 | } | |
22121 | ||
22122 | sub find_next_nonblank_token_on_this_line { | |
22123 | my ( $i, $rtokens ) = @_; | |
22124 | my $next_nonblank_token; | |
22125 | ||
22126 | if ( $i < $max_token_index ) { | |
22127 | $next_nonblank_token = $$rtokens[ ++$i ]; | |
22128 | ||
22129 | if ( $next_nonblank_token =~ /^\s*$/ ) { | |
22130 | ||
22131 | if ( $i < $max_token_index ) { | |
22132 | $next_nonblank_token = $$rtokens[ ++$i ]; | |
22133 | } | |
22134 | } | |
22135 | } | |
22136 | else { | |
22137 | $next_nonblank_token = ""; | |
22138 | } | |
22139 | return ( $next_nonblank_token, $i ); | |
22140 | } | |
22141 | ||
22142 | sub find_next_nonblank_token { | |
22143 | my ( $i, $rtokens ) = @_; | |
22144 | ||
22145 | if ( $i >= $max_token_index ) { | |
22146 | ||
22147 | if ( !$peeked_ahead ) { | |
22148 | $peeked_ahead = 1; | |
22149 | $rtokens = peek_ahead_for_nonblank_token($rtokens); | |
22150 | } | |
22151 | } | |
22152 | my $next_nonblank_token = $$rtokens[ ++$i ]; | |
22153 | ||
22154 | if ( $next_nonblank_token =~ /^\s*$/ ) { | |
22155 | $next_nonblank_token = $$rtokens[ ++$i ]; | |
22156 | } | |
22157 | return ( $next_nonblank_token, $i ); | |
22158 | } | |
22159 | ||
22160 | sub peek_ahead_for_n_nonblank_pre_tokens { | |
22161 | ||
22162 | # returns next n pretokens if they exist | |
22163 | # returns undef's if hits eof without seeing any pretokens | |
22164 | my $max_pretokens = shift; | |
22165 | my $line; | |
22166 | my $i = 0; | |
22167 | my ( $rpre_tokens, $rmap, $rpre_types ); | |
22168 | ||
22169 | while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) | |
22170 | { | |
22171 | $line =~ s/^\s*//; # trim leading blanks | |
22172 | next if ( length($line) <= 0 ); # skip blank | |
22173 | next if ( $line =~ /^#/ ); # skip comment | |
22174 | ( $rpre_tokens, $rmap, $rpre_types ) = | |
22175 | pre_tokenize( $line, $max_pretokens ); | |
22176 | last; | |
22177 | } | |
22178 | return ( $rpre_tokens, $rpre_types ); | |
22179 | } | |
22180 | ||
22181 | # look ahead for next non-blank, non-comment line of code | |
22182 | sub peek_ahead_for_nonblank_token { | |
22183 | my $rtokens = shift; | |
22184 | my $line; | |
22185 | my $i = 0; | |
22186 | ||
22187 | while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) | |
22188 | { | |
22189 | $line =~ s/^\s*//; # trim leading blanks | |
22190 | next if ( length($line) <= 0 ); # skip blank | |
22191 | next if ( $line =~ /^#/ ); # skip comment | |
22192 | my ( $rtok, $rmap, $rtype ) = | |
22193 | pre_tokenize( $line, 2 ); # only need 2 pre-tokens | |
22194 | my $j = $max_token_index + 1; | |
22195 | my $tok; | |
22196 | ||
22197 | foreach $tok (@$rtok) { | |
22198 | last if ( $tok =~ "\n" ); | |
22199 | $$rtokens[ ++$j ] = $tok; | |
22200 | } | |
22201 | last; | |
22202 | } | |
22203 | return $rtokens; | |
22204 | } | |
22205 | ||
22206 | sub pre_tokenize { | |
22207 | ||
22208 | # Break a string, $str, into a sequence of preliminary tokens. We | |
22209 | # are interested in these types of tokens: | |
22210 | # words (type='w'), example: 'max_tokens_wanted' | |
22211 | # digits (type = 'd'), example: '0755' | |
22212 | # whitespace (type = 'b'), example: ' ' | |
22213 | # any other single character (i.e. punct; type = the character itself). | |
22214 | # We cannot do better than this yet because we might be in a quoted | |
22215 | # string or pattern. Caller sets $max_tokens_wanted to 0 to get all | |
22216 | # tokens. | |
22217 | my ( $str, $max_tokens_wanted ) = @_; | |
22218 | ||
22219 | # we return references to these 3 arrays: | |
22220 | my @tokens = (); # array of the tokens themselves | |
22221 | my @token_map = (0); # string position of start of each token | |
22222 | my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct | |
22223 | ||
22224 | do { | |
22225 | ||
22226 | # whitespace | |
22227 | if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; } | |
22228 | ||
22229 | # numbers | |
22230 | # note that this must come before words! | |
22231 | elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; } | |
22232 | ||
22233 | # words | |
22234 | elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; } | |
22235 | ||
22236 | # single-character punctuation | |
22237 | elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; } | |
22238 | ||
22239 | # that's all.. | |
22240 | else { | |
22241 | return ( \@tokens, \@token_map, \@type ); | |
22242 | } | |
22243 | ||
22244 | push @tokens, $1; | |
22245 | push @token_map, pos($str); | |
22246 | ||
22247 | } while ( --$max_tokens_wanted != 0 ); | |
22248 | ||
22249 | return ( \@tokens, \@token_map, \@type ); | |
22250 | } | |
22251 | ||
22252 | sub show_tokens { | |
22253 | ||
22254 | # this is an old debug routine | |
22255 | my ( $rtokens, $rtoken_map ) = @_; | |
22256 | my $num = scalar(@$rtokens); | |
22257 | my $i; | |
22258 | ||
22259 | for ( $i = 0 ; $i < $num ; $i++ ) { | |
22260 | my $len = length( $$rtokens[$i] ); | |
22261 | print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n"; | |
22262 | } | |
22263 | } | |
22264 | ||
22265 | sub find_angle_operator_termination { | |
22266 | ||
22267 | # We are looking at a '<' and want to know if it is an angle operator. | |
22268 | # We are to return: | |
22269 | # $i = pretoken index of ending '>' if found, current $i otherwise | |
22270 | # $type = 'Q' if found, '>' otherwise | |
22271 | my ( $input_line, $i_beg, $rtoken_map, $expecting ) = @_; | |
22272 | my $i = $i_beg; | |
22273 | my $type = '<'; | |
22274 | pos($input_line) = 1 + $$rtoken_map[$i]; | |
22275 | ||
22276 | my $filter; | |
22277 | ||
22278 | # we just have to find the next '>' if a term is expected | |
22279 | if ( $expecting == TERM ) { $filter = '[\>]' } | |
22280 | ||
22281 | # we have to guess if we don't know what is expected | |
22282 | elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' } | |
22283 | ||
22284 | # shouldn't happen - we shouldn't be here if operator is expected | |
22285 | else { warning("Program Bug in find_angle_operator_termination\n") } | |
22286 | ||
22287 | # To illustrate what we might be looking at, in case we are | |
22288 | # guessing, here are some examples of valid angle operators | |
22289 | # (or file globs): | |
22290 | # <tmp_imp/*> | |
22291 | # <FH> | |
22292 | # <$fh> | |
22293 | # <*.c *.h> | |
22294 | # <_> | |
22295 | # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t) | |
22296 | # <${PREFIX}*img*.$IMAGE_TYPE> | |
22297 | # <img*.$IMAGE_TYPE> | |
22298 | # <Timg*.$IMAGE_TYPE> | |
22299 | # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl> | |
22300 | # | |
22301 | # Here are some examples of lines which do not have angle operators: | |
22302 | # return undef unless $self->[2]++ < $#{$self->[1]}; | |
22303 | # < 2 || @$t > | |
22304 | # | |
22305 | # the following line from dlister.pl caused trouble: | |
22306 | # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n"; | |
22307 | # | |
22308 | # If the '<' starts an angle operator, it must end on this line and | |
22309 | # it must not have certain characters like ';' and '=' in it. I use | |
22310 | # this to limit the testing. This filter should be improved if | |
22311 | # possible. | |
22312 | ||
22313 | if ( $input_line =~ /($filter)/g ) { | |
22314 | ||
22315 | if ( $1 eq '>' ) { | |
22316 | ||
22317 | # We MAY have found an angle operator termination if we get | |
22318 | # here, but we need to do more to be sure we haven't been | |
22319 | # fooled. | |
22320 | my $pos = pos($input_line); | |
22321 | ||
22322 | my $pos_beg = $$rtoken_map[$i]; | |
22323 | my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); | |
22324 | ||
22325 | ######################################debug##### | |
22326 | #write_diagnostics( "ANGLE? :$str\n"); | |
22327 | #print "ANGLE: found $1 at pos=$pos\n"; | |
22328 | ######################################debug##### | |
22329 | $type = 'Q'; | |
22330 | my $error; | |
22331 | ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map ); | |
22332 | ||
22333 | # It may be possible that a quote ends midway in a pretoken. | |
22334 | # If this happens, it may be necessary to split the pretoken. | |
22335 | if ($error) { | |
22336 | warning( | |
22337 | "Possible tokinization error..please check this line\n"); | |
22338 | report_possible_bug(); | |
22339 | } | |
22340 | ||
22341 | # Now let's see where we stand.... | |
22342 | # OK if math op not possible | |
22343 | if ( $expecting == TERM ) { | |
22344 | } | |
22345 | ||
22346 | # OK if there are no more than 2 pre-tokens inside | |
22347 | # (not possible to write 2 token math between < and >) | |
22348 | # This catches most common cases | |
22349 | elsif ( $i <= $i_beg + 3 ) { | |
22350 | write_diagnostics("ANGLE(1 or 2 tokens): $str\n"); | |
22351 | } | |
22352 | ||
22353 | # Not sure.. | |
22354 | else { | |
22355 | ||
22356 | # Let's try a Brace Test: any braces inside must balance | |
22357 | my $br = 0; | |
22358 | while ( $str =~ /\{/g ) { $br++ } | |
22359 | while ( $str =~ /\}/g ) { $br-- } | |
22360 | my $sb = 0; | |
22361 | while ( $str =~ /\[/g ) { $sb++ } | |
22362 | while ( $str =~ /\]/g ) { $sb-- } | |
22363 | my $pr = 0; | |
22364 | while ( $str =~ /\(/g ) { $pr++ } | |
22365 | while ( $str =~ /\)/g ) { $pr-- } | |
22366 | ||
22367 | # if braces do not balance - not angle operator | |
22368 | if ( $br || $sb || $pr ) { | |
22369 | $i = $i_beg; | |
22370 | $type = '<'; | |
22371 | write_diagnostics( | |
22372 | "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"); | |
22373 | } | |
22374 | ||
22375 | # we should keep doing more checks here...to be continued | |
22376 | # Tentatively accepting this as a valid angle operator. | |
22377 | # There are lots more things that can be checked. | |
22378 | else { | |
22379 | write_diagnostics( | |
22380 | "ANGLE-Guessing yes: $str expecting=$expecting\n"); | |
22381 | write_logfile_entry("Guessing angle operator here: $str\n"); | |
22382 | } | |
22383 | } | |
22384 | } | |
22385 | ||
22386 | # didn't find ending > | |
22387 | else { | |
22388 | if ( $expecting == TERM ) { | |
22389 | warning("No ending > for angle operator\n"); | |
22390 | } | |
22391 | } | |
22392 | } | |
22393 | return ( $i, $type ); | |
22394 | } | |
22395 | ||
22396 | sub inverse_pretoken_map { | |
22397 | ||
22398 | # Starting with the current pre_token index $i, scan forward until | |
22399 | # finding the index of the next pre_token whose position is $pos. | |
22400 | my ( $i, $pos, $rtoken_map ) = @_; | |
22401 | my $error = 0; | |
22402 | ||
22403 | while ( ++$i <= $max_token_index ) { | |
22404 | ||
22405 | if ( $pos <= $$rtoken_map[$i] ) { | |
22406 | ||
22407 | # Let the calling routine handle errors in which we do not | |
22408 | # land on a pre-token boundary. It can happen by running | |
22409 | # perltidy on some non-perl scripts, for example. | |
22410 | if ( $pos < $$rtoken_map[$i] ) { $error = 1 } | |
22411 | $i--; | |
22412 | last; | |
22413 | } | |
22414 | } | |
22415 | return ( $i, $error ); | |
22416 | } | |
22417 | ||
22418 | sub guess_if_pattern_or_conditional { | |
22419 | ||
22420 | # this routine is called when we have encountered a ? following an | |
22421 | # unknown bareword, and we must decide if it starts a pattern or not | |
22422 | # input parameters: | |
22423 | # $i - token index of the ? starting possible pattern | |
22424 | # output parameters: | |
22425 | # $is_pattern = 0 if probably not pattern, =1 if probably a pattern | |
22426 | # msg = a warning or diagnostic message | |
22427 | my ( $i, $rtokens, $rtoken_map ) = @_; | |
22428 | my $is_pattern = 0; | |
22429 | my $msg = "guessing that ? after $last_nonblank_token starts a "; | |
22430 | ||
22431 | if ( $i >= $max_token_index ) { | |
22432 | $msg .= "conditional (no end to pattern found on the line)\n"; | |
22433 | } | |
22434 | else { | |
22435 | my $ibeg = $i; | |
22436 | $i = $ibeg + 1; | |
22437 | my $next_token = $$rtokens[$i]; # first token after ? | |
22438 | ||
22439 | # look for a possible ending ? on this line.. | |
22440 | my $in_quote = 1; | |
22441 | my $quote_depth = 0; | |
22442 | my $quote_character = ''; | |
22443 | my $quote_pos = 0; | |
22444 | ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) = | |
22445 | follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, | |
22446 | $quote_pos, $quote_depth ); | |
22447 | ||
22448 | if ($in_quote) { | |
22449 | ||
22450 | # we didn't find an ending ? on this line, | |
22451 | # so we bias towards conditional | |
22452 | $is_pattern = 0; | |
22453 | $msg .= "conditional (no ending ? on this line)\n"; | |
22454 | ||
22455 | # we found an ending ?, so we bias towards a pattern | |
22456 | } | |
22457 | else { | |
22458 | ||
22459 | if ( pattern_expected( $i, $rtokens ) >= 0 ) { | |
22460 | $is_pattern = 1; | |
22461 | $msg .= "pattern (found ending ? and pattern expected)\n"; | |
22462 | } | |
22463 | else { | |
22464 | $msg .= "pattern (uncertain, but found ending ?)\n"; | |
22465 | } | |
22466 | } | |
22467 | } | |
22468 | return ( $is_pattern, $msg ); | |
22469 | } | |
22470 | ||
22471 | sub guess_if_pattern_or_division { | |
22472 | ||
22473 | # this routine is called when we have encountered a / following an | |
22474 | # unknown bareword, and we must decide if it starts a pattern or is a | |
22475 | # division | |
22476 | # input parameters: | |
22477 | # $i - token index of the / starting possible pattern | |
22478 | # output parameters: | |
22479 | # $is_pattern = 0 if probably division, =1 if probably a pattern | |
22480 | # msg = a warning or diagnostic message | |
22481 | my ( $i, $rtokens, $rtoken_map ) = @_; | |
22482 | my $is_pattern = 0; | |
22483 | my $msg = "guessing that / after $last_nonblank_token starts a "; | |
22484 | ||
22485 | if ( $i >= $max_token_index ) { | |
22486 | "division (no end to pattern found on the line)\n"; | |
22487 | } | |
22488 | else { | |
22489 | my $ibeg = $i; | |
22490 | my $divide_expected = numerator_expected( $i, $rtokens ); | |
22491 | $i = $ibeg + 1; | |
22492 | my $next_token = $$rtokens[$i]; # first token after slash | |
22493 | ||
22494 | # look for a possible ending / on this line.. | |
22495 | my $in_quote = 1; | |
22496 | my $quote_depth = 0; | |
22497 | my $quote_character = ''; | |
22498 | my $quote_pos = 0; | |
22499 | ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) = | |
22500 | follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, | |
22501 | $quote_pos, $quote_depth ); | |
22502 | ||
22503 | if ($in_quote) { | |
22504 | ||
22505 | # we didn't find an ending / on this line, | |
22506 | # so we bias towards division | |
22507 | if ( $divide_expected >= 0 ) { | |
22508 | $is_pattern = 0; | |
22509 | $msg .= "division (no ending / on this line)\n"; | |
22510 | } | |
22511 | else { | |
22512 | $msg = "multi-line pattern (division not possible)\n"; | |
22513 | $is_pattern = 1; | |
22514 | } | |
22515 | ||
22516 | } | |
22517 | ||
22518 | # we found an ending /, so we bias towards a pattern | |
22519 | else { | |
22520 | ||
22521 | if ( pattern_expected( $i, $rtokens ) >= 0 ) { | |
22522 | ||
22523 | if ( $divide_expected >= 0 ) { | |
22524 | ||
22525 | if ( $i - $ibeg > 60 ) { | |
22526 | $msg .= "division (matching / too distant)\n"; | |
22527 | $is_pattern = 0; | |
22528 | } | |
22529 | else { | |
22530 | $msg .= "pattern (but division possible too)\n"; | |
22531 | $is_pattern = 1; | |
22532 | } | |
22533 | } | |
22534 | else { | |
22535 | $is_pattern = 1; | |
22536 | $msg .= "pattern (division not possible)\n"; | |
22537 | } | |
22538 | } | |
22539 | else { | |
22540 | ||
22541 | if ( $divide_expected >= 0 ) { | |
22542 | $is_pattern = 0; | |
22543 | $msg .= "division (pattern not possible)\n"; | |
22544 | } | |
22545 | else { | |
22546 | $is_pattern = 1; | |
22547 | $msg .= | |
22548 | "pattern (uncertain, but division would not work here)\n"; | |
22549 | } | |
22550 | } | |
22551 | } | |
22552 | } | |
22553 | return ( $is_pattern, $msg ); | |
22554 | } | |
22555 | ||
22556 | sub find_here_doc { | |
22557 | ||
22558 | # find the target of a here document, if any | |
22559 | # input parameters: | |
22560 | # $i - token index of the second < of << | |
22561 | # ($i must be less than the last token index if this is called) | |
22562 | # output parameters: | |
22563 | # $found_target = 0 didn't find target; =1 found target | |
22564 | # HERE_TARGET - the target string (may be empty string) | |
22565 | # $i - unchanged if not here doc, | |
22566 | # or index of the last token of the here target | |
22567 | my ( $expecting, $i, $rtokens, $rtoken_map ) = @_; | |
22568 | my $ibeg = $i; | |
22569 | my $found_target = 0; | |
22570 | my $here_doc_target = ''; | |
22571 | my $here_quote_character = ''; | |
22572 | my ( $next_nonblank_token, $i_next_nonblank, $next_token ); | |
22573 | $next_token = $$rtokens[ $i + 1 ]; | |
22574 | ||
22575 | # perl allows a backslash before the target string (heredoc.t) | |
22576 | my $backslash = 0; | |
22577 | if ( $next_token eq '\\' ) { | |
22578 | $backslash = 1; | |
22579 | $next_token = $$rtokens[ $i + 2 ]; | |
22580 | } | |
22581 | ||
22582 | ( $next_nonblank_token, $i_next_nonblank ) = | |
22583 | find_next_nonblank_token_on_this_line( $i, $rtokens ); | |
22584 | ||
22585 | if ( $next_nonblank_token =~ /[\'\"\`]/ ) { | |
22586 | ||
22587 | my $in_quote = 1; | |
22588 | my $quote_depth = 0; | |
22589 | my $quote_pos = 0; | |
22590 | ||
22591 | ( $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth ) = | |
22592 | follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens, | |
22593 | $here_quote_character, $quote_pos, $quote_depth ); | |
22594 | ||
22595 | if ($in_quote) { # didn't find end of quote, so no target found | |
22596 | $i = $ibeg; | |
22597 | } | |
22598 | else { # found ending quote | |
22599 | my $j; | |
22600 | $found_target = 1; | |
22601 | ||
22602 | my $tokj; | |
22603 | for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) { | |
22604 | $tokj = $$rtokens[$j]; | |
22605 | ||
22606 | # we have to remove any backslash before the quote character | |
22607 | # so that the here-doc-target exactly matches this string | |
22608 | next | |
22609 | if ( $tokj eq "\\" | |
22610 | && $j < $i - 1 | |
22611 | && $$rtokens[ $j + 1 ] eq $here_quote_character ); | |
22612 | $here_doc_target .= $tokj; | |
22613 | } | |
22614 | } | |
22615 | } | |
22616 | ||
22617 | elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) { | |
22618 | $found_target = 1; | |
22619 | write_logfile_entry( | |
22620 | "found blank here-target after <<; suggest using \"\"\n"); | |
22621 | $i = $ibeg; | |
22622 | } | |
22623 | elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after << | |
22624 | ||
22625 | my $here_doc_expected; | |
22626 | if ( $expecting == UNKNOWN ) { | |
22627 | $here_doc_expected = guess_if_here_doc($next_token); | |
22628 | } | |
22629 | else { | |
22630 | $here_doc_expected = 1; | |
22631 | } | |
22632 | ||
22633 | if ($here_doc_expected) { | |
22634 | $found_target = 1; | |
22635 | $here_doc_target = $next_token; | |
22636 | $i = $ibeg + 1; | |
22637 | } | |
22638 | ||
22639 | } | |
22640 | else { | |
22641 | ||
22642 | if ( $expecting == TERM ) { | |
22643 | $found_target = 1; | |
22644 | write_logfile_entry("Note: bare here-doc operator <<\n"); | |
22645 | } | |
22646 | else { | |
22647 | $i = $ibeg; | |
22648 | } | |
22649 | } | |
22650 | ||
22651 | # patch to neglect any prepended backslash | |
22652 | if ( $found_target && $backslash ) { $i++ } | |
22653 | ||
22654 | return ( $found_target, $here_doc_target, $here_quote_character, $i ); | |
22655 | } | |
22656 | ||
22657 | # try to resolve here-doc vs. shift by looking ahead for | |
22658 | # non-code or the end token (currently only looks for end token) | |
22659 | # returns 1 if it is probably a here doc, 0 if not | |
22660 | sub guess_if_here_doc { | |
22661 | ||
22662 | # This is how many lines we will search for a target as part of the | |
22663 | # guessing strategy. It is a constant because there is probably | |
22664 | # little reason to change it. | |
22665 | use constant HERE_DOC_WINDOW => 40; | |
22666 | ||
22667 | my $next_token = shift; | |
22668 | my $here_doc_expected = 0; | |
22669 | my $line; | |
22670 | my $k = 0; | |
22671 | my $msg = "checking <<"; | |
22672 | ||
22673 | while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) ) | |
22674 | { | |
22675 | chomp $line; | |
22676 | ||
22677 | if ( $line =~ /^$next_token$/ ) { | |
22678 | $msg .= " -- found target $next_token ahead $k lines\n"; | |
22679 | $here_doc_expected = 1; # got it | |
22680 | last; | |
22681 | } | |
22682 | last if ( $k >= HERE_DOC_WINDOW ); | |
22683 | } | |
22684 | ||
22685 | unless ($here_doc_expected) { | |
22686 | ||
22687 | if ( !defined($line) ) { | |
22688 | $here_doc_expected = -1; # hit eof without seeing target | |
22689 | $msg .= " -- must be shift; target $next_token not in file\n"; | |
22690 | ||
22691 | } | |
22692 | else { # still unsure..taking a wild guess | |
22693 | ||
22694 | if ( !$is_constant{$current_package}{$next_token} ) { | |
22695 | $here_doc_expected = 1; | |
22696 | $msg .= | |
22697 | " -- guessing it's a here-doc ($next_token not a constant)\n"; | |
22698 | } | |
22699 | else { | |
22700 | $msg .= | |
22701 | " -- guessing it's a shift ($next_token is a constant)\n"; | |
22702 | } | |
22703 | } | |
22704 | } | |
22705 | write_logfile_entry($msg); | |
22706 | return $here_doc_expected; | |
22707 | } | |
22708 | ||
22709 | sub do_quote { | |
22710 | ||
22711 | # follow (or continue following) quoted string or pattern | |
22712 | # $in_quote return code: | |
22713 | # 0 - ok, found end | |
22714 | # 1 - still must find end of quote whose target is $quote_character | |
22715 | # 2 - still looking for end of first of two quotes | |
22716 | my ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, $rtokens, | |
22717 | $rtoken_map ) | |
22718 | = @_; | |
22719 | ||
22720 | if ( $in_quote == 2 ) { # two quotes/patterns to follow | |
22721 | my $ibeg = $i; | |
22722 | ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) = | |
22723 | follow_quoted_string( $i, $in_quote, $rtokens, $quote_character, | |
22724 | $quote_pos, $quote_depth ); | |
22725 | ||
22726 | if ( $in_quote == 1 ) { | |
22727 | if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; } | |
22728 | $quote_character = ''; | |
22729 | } | |
22730 | } | |
22731 | ||
22732 | if ( $in_quote == 1 ) { # one (more) quote to follow | |
22733 | my $ibeg = $i; | |
22734 | ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) = | |
22735 | follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, | |
22736 | $quote_pos, $quote_depth ); | |
22737 | } | |
22738 | return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ); | |
22739 | } | |
22740 | ||
22741 | sub scan_number_do { | |
22742 | ||
22743 | # scan a number in any of the formats that Perl accepts | |
22744 | # Underbars (_) are allowed in decimal numbers. | |
22745 | # input parameters - | |
22746 | # $input_line - the string to scan | |
22747 | # $i - pre_token index to start scanning | |
22748 | # $rtoken_map - reference to the pre_token map giving starting | |
22749 | # character position in $input_line of token $i | |
22750 | # output parameters - | |
22751 | # $i - last pre_token index of the number just scanned | |
22752 | # number - the number (characters); or undef if not a number | |
22753 | ||
22754 | my ( $input_line, $i, $rtoken_map, $input_type ) = @_; | |
22755 | my $pos_beg = $$rtoken_map[$i]; | |
22756 | my $pos; | |
22757 | my $i_begin = $i; | |
22758 | my $number = undef; | |
22759 | my $type = $input_type; | |
22760 | ||
22761 | my $first_char = substr( $input_line, $pos_beg, 1 ); | |
22762 | ||
22763 | # Look for bad starting characters; Shouldn't happen.. | |
22764 | if ( $first_char !~ /[\d\.\+\-Ee]/ ) { | |
22765 | warning("Program bug - scan_number given character $first_char\n"); | |
22766 | report_definite_bug(); | |
22767 | return ( $i, $type, $number ); | |
22768 | } | |
22769 | ||
22770 | # handle v-string without leading 'v' character ('Two Dot' rule) | |
22771 | # (vstring.t) | |
22772 | pos($input_line) = $pos_beg; | |
22773 | if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { | |
22774 | $pos = pos($input_line); | |
22775 | my $numc = $pos - $pos_beg; | |
22776 | $number = substr( $input_line, $pos_beg, $numc ); | |
22777 | $type = 'v'; | |
22778 | unless ($saw_v_string) { report_v_string($number) } | |
22779 | } | |
22780 | ||
22781 | # handle octal, hex, binary | |
22782 | if ( !defined($number) ) { | |
22783 | pos($input_line) = $pos_beg; | |
22784 | if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g ) | |
22785 | { | |
22786 | $pos = pos($input_line); | |
22787 | my $numc = $pos - $pos_beg; | |
22788 | $number = substr( $input_line, $pos_beg, $numc ); | |
22789 | $type = 'n'; | |
22790 | } | |
22791 | } | |
22792 | ||
22793 | # handle decimal | |
22794 | if ( !defined($number) ) { | |
22795 | pos($input_line) = $pos_beg; | |
22796 | ||
22797 | if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) { | |
22798 | $pos = pos($input_line); | |
22799 | ||
22800 | # watch out for things like 0..40 which would give 0. by this; | |
22801 | if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' ) | |
22802 | && ( substr( $input_line, $pos, 1 ) eq '.' ) ) | |
22803 | { | |
22804 | $pos--; | |
22805 | } | |
22806 | my $numc = $pos - $pos_beg; | |
22807 | $number = substr( $input_line, $pos_beg, $numc ); | |
22808 | $type = 'n'; | |
22809 | } | |
22810 | } | |
22811 | ||
22812 | # filter out non-numbers like e + - . e2 .e3 +e6 | |
22813 | # the rule: at least one digit, and any 'e' must be preceded by a digit | |
22814 | if ( | |
22815 | $number !~ /\d/ # no digits | |
22816 | || ( $number =~ /^(.*)[eE]/ | |
22817 | && $1 !~ /\d/ ) # or no digits before the 'e' | |
22818 | ) | |
22819 | { | |
22820 | $number = undef; | |
22821 | $type = $input_type; | |
22822 | return ( $i, $type, $number ); | |
22823 | } | |
22824 | ||
22825 | # Found a number; now we must convert back from character position | |
22826 | # to pre_token index. An error here implies user syntax error. | |
22827 | # An example would be an invalid octal number like '009'. | |
22828 | my $error; | |
22829 | ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map ); | |
22830 | if ($error) { warning("Possibly invalid number\n") } | |
22831 | ||
22832 | return ( $i, $type, $number ); | |
22833 | } | |
22834 | ||
22835 | sub scan_bare_identifier_do { | |
22836 | ||
22837 | # this routine is called to scan a token starting with an alphanumeric | |
22838 | # variable or package separator, :: or '. | |
22839 | ||
22840 | my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map ) = @_; | |
22841 | my $i_begin = $i; | |
22842 | my $package = undef; | |
22843 | ||
22844 | my $i_beg = $i; | |
22845 | ||
22846 | # we have to back up one pretoken at a :: since each : is one pretoken | |
22847 | if ( $tok eq '::' ) { $i_beg-- } | |
22848 | if ( $tok eq '->' ) { $i_beg-- } | |
22849 | my $pos_beg = $$rtoken_map[$i_beg]; | |
22850 | pos($input_line) = $pos_beg; | |
22851 | ||
22852 | # Examples: | |
22853 | # A::B::C | |
22854 | # A:: | |
22855 | # ::A | |
22856 | # A'B | |
22857 | if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) { | |
22858 | ||
22859 | my $pos = pos($input_line); | |
22860 | my $numc = $pos - $pos_beg; | |
22861 | $tok = substr( $input_line, $pos_beg, $numc ); | |
22862 | ||
22863 | # type 'w' includes anything without leading type info | |
22864 | # ($,%,@,*) including something like abc::def::ghi | |
22865 | $type = 'w'; | |
22866 | ||
22867 | my $sub_name = ""; | |
22868 | if ( defined($2) ) { $sub_name = $2; } | |
22869 | if ( defined($1) ) { | |
22870 | $package = $1; | |
22871 | ||
22872 | # patch: don't allow isolated package name which just ends | |
22873 | # in the old style package separator (single quote). Example: | |
22874 | # use CGI':all'; | |
22875 | if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) { | |
22876 | $pos--; | |
22877 | } | |
22878 | ||
22879 | $package =~ s/\'/::/g; | |
22880 | if ( $package =~ /^\:/ ) { $package = 'main' . $package } | |
22881 | $package =~ s/::$//; | |
22882 | } | |
22883 | else { | |
22884 | $package = $current_package; | |
22885 | ||
22886 | if ( $is_keyword{$tok} ) { | |
22887 | $type = 'k'; | |
22888 | } | |
22889 | } | |
22890 | ||
22891 | # if it is a bareword.. | |
22892 | if ( $type eq 'w' ) { | |
22893 | ||
22894 | # check for v-string with leading 'v' type character | |
22895 | # (This seems to have presidence over filehandle, type 'Y') | |
22896 | if ( $tok =~ /^v\d+$/ ) { | |
22897 | ||
22898 | # we only have the first part - something like 'v101' - | |
22899 | # look for more | |
22900 | if ( $input_line =~ m/\G(\.\d+)+/gc ) { | |
22901 | $pos = pos($input_line); | |
22902 | $numc = $pos - $pos_beg; | |
22903 | $tok = substr( $input_line, $pos_beg, $numc ); | |
22904 | } | |
22905 | $type = 'v'; | |
22906 | ||
22907 | # warn if this version can't handle v-strings | |
22908 | unless ($saw_v_string) { report_v_string($tok) } | |
22909 | } | |
22910 | ||
22911 | elsif ( $is_constant{$package}{$sub_name} ) { | |
22912 | $type = 'C'; | |
22913 | } | |
22914 | ||
22915 | # bareword after sort has implied empty prototype; for example: | |
22916 | # @sorted = sort numerically ( 53, 29, 11, 32, 7 ); | |
22917 | # This has priority over whatever the user has specified. | |
22918 | elsif ($last_nonblank_token eq 'sort' | |
22919 | && $last_nonblank_type eq 'k' ) | |
22920 | { | |
22921 | $type = 'Z'; | |
22922 | } | |
22923 | ||
22924 | # Note: strangely, perl does not seem to really let you create | |
22925 | # functions which act like eval and do, in the sense that eval | |
22926 | # and do may have operators following the final }, but any operators | |
22927 | # that you create with prototype (&) apparently do not allow | |
22928 | # trailing operators, only terms. This seems strange. | |
22929 | # If this ever changes, here is the update | |
22930 | # to make perltidy behave accordingly: | |
22931 | ||
22932 | # elsif ( $is_block_function{$package}{$tok} ) { | |
22933 | # $tok='eval'; # patch to do braces like eval - doesn't work | |
22934 | # $type = 'k'; | |
22935 | #} | |
22936 | # FIXME: This could become a separate type to allow for different | |
22937 | # future behavior: | |
22938 | elsif ( $is_block_function{$package}{$sub_name} ) { | |
22939 | $type = 'G'; | |
22940 | } | |
22941 | ||
22942 | elsif ( $is_block_list_function{$package}{$sub_name} ) { | |
22943 | $type = 'G'; | |
22944 | } | |
22945 | elsif ( $is_user_function{$package}{$sub_name} ) { | |
22946 | $type = 'U'; | |
22947 | $prototype = $user_function_prototype{$package}{$sub_name}; | |
22948 | } | |
22949 | ||
22950 | # check for indirect object | |
22951 | elsif ( | |
22952 | ||
22953 | # added 2001-03-27: must not be followed immediately by '(' | |
22954 | # see fhandle.t | |
22955 | ( $input_line !~ m/\G\(/gc ) | |
22956 | ||
22957 | # and | |
22958 | && ( | |
22959 | ||
22960 | # preceded by keyword like 'print', 'printf' and friends | |
22961 | $is_indirect_object_taker{$last_nonblank_token} | |
22962 | ||
22963 | # or preceded by something like 'print(' or 'printf(' | |
22964 | || ( | |
22965 | ( $last_nonblank_token eq '(' ) | |
22966 | && $is_indirect_object_taker{ $paren_type[$paren_depth] | |
22967 | } | |
22968 | ||
22969 | ) | |
22970 | ) | |
22971 | ) | |
22972 | { | |
22973 | ||
22974 | # may not be indirect object unless followed by a space | |
22975 | if ( $input_line =~ m/\G\s+/gc ) { | |
22976 | $type = 'Y'; | |
22977 | ||
22978 | # Abandon Hope ... | |
22979 | # Perl's indirect object notation is a very bad | |
22980 | # thing and can cause subtle bugs, especially for | |
22981 | # beginning programmers. And I haven't even been | |
22982 | # able to figure out a sane warning scheme which | |
22983 | # doesn't get in the way of good scripts. | |
22984 | ||
22985 | # Complain if a filehandle has any lower case | |
22986 | # letters. This is suggested good practice, but the | |
22987 | # main reason for this warning is that prior to | |
22988 | # release 20010328, perltidy incorrectly parsed a | |
22989 | # function call after a print/printf, with the | |
22990 | # result that a space got added before the opening | |
22991 | # paren, thereby converting the function name to a | |
22992 | # filehandle according to perl's weird rules. This | |
22993 | # will not usually generate a syntax error, so this | |
22994 | # is a potentially serious bug. By warning | |
22995 | # of filehandles with any lower case letters, | |
22996 | # followed by opening parens, we will help the user | |
22997 | # find almost all of these older errors. | |
22998 | # use 'sub_name' because something like | |
22999 | # main::MYHANDLE is ok for filehandle | |
23000 | if ( $sub_name =~ /[a-z]/ ) { | |
23001 | ||
23002 | # could be bug caused by older perltidy if | |
23003 | # followed by '(' | |
23004 | if ( $input_line =~ m/\G\s*\(/gc ) { | |
23005 | complain( | |
23006 | "Caution: unknown word '$tok' in indirect object slot\n" | |
23007 | ); | |
23008 | } | |
23009 | } | |
23010 | } | |
23011 | ||
23012 | # bareword not followed by a space -- may not be filehandle | |
23013 | # (may be function call defined in a 'use' statement) | |
23014 | else { | |
23015 | $type = 'Z'; | |
23016 | } | |
23017 | } | |
23018 | } | |
23019 | ||
23020 | # Now we must convert back from character position | |
23021 | # to pre_token index. | |
23022 | # I don't think an error flag can occur here ..but who knows | |
23023 | my $error; | |
23024 | ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map ); | |
23025 | if ($error) { | |
23026 | warning("scan_bare_identifier: Possibly invalid tokenization\n"); | |
23027 | } | |
23028 | } | |
23029 | ||
23030 | # no match but line not blank - could be syntax error | |
23031 | # perl will take '::' alone without complaint | |
23032 | else { | |
23033 | $type = 'w'; | |
23034 | ||
23035 | # change this warning to log message if it becomes annoying | |
23036 | warning("didn't find identifier after leading ::\n"); | |
23037 | } | |
23038 | return ( $i, $tok, $type, $prototype ); | |
23039 | } | |
23040 | ||
23041 | sub scan_id_do { | |
23042 | ||
23043 | # This is the new scanner and will eventually replace scan_identifier. | |
23044 | # Only type 'sub' and 'package' are implemented. | |
23045 | # Token types $ * % @ & -> are not yet implemented. | |
23046 | # | |
23047 | # Scan identifier following a type token. | |
23048 | # The type of call depends on $id_scan_state: $id_scan_state = '' | |
23049 | # for starting call, in which case $tok must be the token defining | |
23050 | # the type. | |
23051 | # | |
23052 | # If the type token is the last nonblank token on the line, a value | |
23053 | # of $id_scan_state = $tok is returned, indicating that further | |
23054 | # calls must be made to get the identifier. If the type token is | |
23055 | # not the last nonblank token on the line, the identifier is | |
23056 | # scanned and handled and a value of '' is returned. | |
23057 | ||
23058 | my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state ) = @_; | |
23059 | my $type = ''; | |
23060 | my ( $i_beg, $pos_beg ); | |
23061 | ||
23062 | #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; | |
23063 | #my ($a,$b,$c) = caller; | |
23064 | #print "NSCAN: scan_id called with tok=$tok $a $b $c\n"; | |
23065 | ||
23066 | # on re-entry, start scanning at first token on the line | |
23067 | if ($id_scan_state) { | |
23068 | $i_beg = $i; | |
23069 | $type = ''; | |
23070 | } | |
23071 | ||
23072 | # on initial entry, start scanning just after type token | |
23073 | else { | |
23074 | $i_beg = $i + 1; | |
23075 | $id_scan_state = $tok; | |
23076 | $type = 't'; | |
23077 | } | |
23078 | ||
23079 | # find $i_beg = index of next nonblank token, | |
23080 | # and handle empty lines | |
23081 | my $blank_line = 0; | |
23082 | my $next_nonblank_token = $$rtokens[$i_beg]; | |
23083 | if ( $i_beg > $max_token_index ) { | |
23084 | $blank_line = 1; | |
23085 | } | |
23086 | else { | |
23087 | ||
23088 | # only a '#' immediately after a '$' is not a comment | |
23089 | if ( $next_nonblank_token eq '#' ) { | |
23090 | unless ( $tok eq '$' ) { | |
23091 | $blank_line = 1; | |
23092 | } | |
23093 | } | |
23094 | ||
23095 | if ( $next_nonblank_token =~ /^\s/ ) { | |
23096 | ( $next_nonblank_token, $i_beg ) = | |
23097 | find_next_nonblank_token_on_this_line( $i_beg, $rtokens ); | |
23098 | if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) { | |
23099 | $blank_line = 1; | |
23100 | } | |
23101 | } | |
23102 | } | |
23103 | ||
23104 | # handle non-blank line; identifier, if any, must follow | |
23105 | unless ($blank_line) { | |
23106 | ||
23107 | if ( $id_scan_state eq 'sub' ) { | |
23108 | ( $i, $tok, $type, $id_scan_state ) = | |
23109 | do_scan_sub( $input_line, $i, $i_beg, $tok, $type, $rtokens, | |
23110 | $rtoken_map, $id_scan_state ); | |
23111 | } | |
23112 | ||
23113 | elsif ( $id_scan_state eq 'package' ) { | |
23114 | ( $i, $tok, $type ) = | |
23115 | do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens, | |
23116 | $rtoken_map ); | |
23117 | $id_scan_state = ''; | |
23118 | } | |
23119 | ||
23120 | else { | |
23121 | warning("invalid token in scan_id: $tok\n"); | |
23122 | $id_scan_state = ''; | |
23123 | } | |
23124 | } | |
23125 | ||
23126 | if ( $id_scan_state && ( !defined($type) || !$type ) ) { | |
23127 | ||
23128 | # shouldn't happen: | |
23129 | warning( | |
23130 | "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n" | |
23131 | ); | |
23132 | report_definite_bug(); | |
23133 | } | |
23134 | ||
23135 | TOKENIZER_DEBUG_FLAG_NSCAN && do { | |
23136 | ||
23137 | "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; | |
23138 | }; | |
23139 | return ( $i, $tok, $type, $id_scan_state ); | |
23140 | } | |
23141 | ||
23142 | { | |
23143 | ||
23144 | # saved package and subnames in case prototype is on separate line | |
23145 | my ( $package_saved, $subname_saved ); | |
23146 | ||
23147 | sub do_scan_sub { | |
23148 | ||
23149 | # do_scan_sub parses a sub name and prototype | |
23150 | # it is called with $i_beg equal to the index of the first nonblank | |
23151 | # token following a 'sub' token. | |
23152 | ||
23153 | # TODO: add future error checks to be sure we have a valid | |
23154 | # sub name. For example, 'sub &doit' is wrong. Also, be sure | |
23155 | # a name is given if and only if a non-anonymous sub is | |
23156 | # appropriate. | |
23157 | ||
23158 | my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map, | |
23159 | $id_scan_state ) | |
23160 | = @_; | |
23161 | $id_scan_state = ""; # normally we get everything in one call | |
23162 | my $subname = undef; | |
23163 | my $package = undef; | |
23164 | my $proto = undef; | |
23165 | my $attrs = undef; | |
23166 | my $match; | |
23167 | ||
23168 | my $pos_beg = $$rtoken_map[$i_beg]; | |
23169 | pos($input_line) = $pos_beg; | |
23170 | ||
23171 | # sub NAME PROTO ATTRS | |
23172 | if ( | |
23173 | $input_line =~ m/\G\s* | |
23174 | ((?:\w*(?:'|::))*) # package - something that ends in :: or ' | |
23175 | (\w+) # NAME - required | |
23176 | (\s*\([^){]*\))? # PROTO - something in parens | |
23177 | (\s*:)? # ATTRS - leading : of attribute list | |
23178 | /gcx | |
23179 | ) | |
23180 | { | |
23181 | $match = 1; | |
23182 | $subname = $2; | |
23183 | $proto = $3; | |
23184 | $attrs = $4; | |
23185 | ||
23186 | $package = ( defined($1) && $1 ) ? $1 : $current_package; | |
23187 | $package =~ s/\'/::/g; | |
23188 | if ( $package =~ /^\:/ ) { $package = 'main' . $package } | |
23189 | $package =~ s/::$//; | |
23190 | my $pos = pos($input_line); | |
23191 | my $numc = $pos - $pos_beg; | |
23192 | $tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); | |
23193 | $type = 'i'; | |
23194 | } | |
23195 | ||
23196 | # Look for prototype/attributes not preceded on this line by subname; | |
23197 | # This might be an anonymous sub with attributes, | |
23198 | # or a prototype on a separate line from its sub name | |
23199 | elsif ( | |
23200 | $input_line =~ m/\G(\s*\([^){]*\))? # PROTO | |
23201 | (\s*:)? # ATTRS leading ':' | |
23202 | /gcx | |
23203 | && ( $1 || $2 ) | |
23204 | ) | |
23205 | { | |
23206 | $match = 1; | |
23207 | $proto = $1; | |
23208 | $attrs = $2; | |
23209 | ||
23210 | # Handle prototype on separate line from subname | |
23211 | if ($subname_saved) { | |
23212 | $package = $package_saved; | |
23213 | $subname = $subname_saved; | |
23214 | $tok = $last_nonblank_token; | |
23215 | } | |
23216 | $type = 'i'; | |
23217 | } | |
23218 | ||
23219 | if ($match) { | |
23220 | ||
23221 | # ATTRS: if there are attributes, back up and let the ':' be | |
23222 | # found later by the scanner. | |
23223 | my $pos = pos($input_line); | |
23224 | if ($attrs) { | |
23225 | $pos -= length($attrs); | |
23226 | } | |
23227 | ||
23228 | my $next_nonblank_token = $tok; | |
23229 | ||
23230 | # catch case of line with leading ATTR ':' after anonymous sub | |
23231 | if ( $pos == $pos_beg && $tok eq ':' ) { | |
23232 | $type = 'A'; | |
23233 | } | |
23234 | ||
23235 | # We must convert back from character position | |
23236 | # to pre_token index. | |
23237 | else { | |
23238 | ||
23239 | # I don't think an error flag can occur here ..but ? | |
23240 | my $error; | |
23241 | ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map ); | |
23242 | if ($error) { warning("Possibly invalid sub\n") } | |
23243 | ||
23244 | # check for multiple definitions of a sub | |
23245 | ( $next_nonblank_token, my $i_next ) = | |
23246 | find_next_nonblank_token_on_this_line( $i, $rtokens ); | |
23247 | } | |
23248 | ||
23249 | if ( $next_nonblank_token =~ /^(\s*|#)$/ ) | |
23250 | { # skip blank or side comment | |
23251 | my ( $rpre_tokens, $rpre_types ) = | |
23252 | peek_ahead_for_n_nonblank_pre_tokens(1); | |
23253 | if ( defined($rpre_tokens) && @$rpre_tokens ) { | |
23254 | $next_nonblank_token = $rpre_tokens->[0]; | |
23255 | } | |
23256 | else { | |
23257 | $next_nonblank_token = '}'; | |
23258 | } | |
23259 | } | |
23260 | $package_saved = ""; | |
23261 | $subname_saved = ""; | |
23262 | if ( $next_nonblank_token eq '{' ) { | |
23263 | if ($subname) { | |
23264 | if ( $saw_function_definition{$package}{$subname} ) { | |
23265 | my $lno = $saw_function_definition{$package}{$subname}; | |
23266 | warning( | |
23267 | "already saw definition of 'sub $subname' in package '$package' at line $lno\n" | |
23268 | ); | |
23269 | } | |
23270 | $saw_function_definition{$package}{$subname} = | |
23271 | $input_line_number; | |
23272 | } | |
23273 | } | |
23274 | elsif ( $next_nonblank_token eq ';' ) { | |
23275 | } | |
23276 | elsif ( $next_nonblank_token eq '}' ) { | |
23277 | } | |
23278 | ||
23279 | # ATTRS - if an attribute list follows, remember the name | |
23280 | # of the sub so the next opening brace can be labeled. | |
23281 | # Setting 'statement_type' causes any ':'s to introduce | |
23282 | # attributes. | |
23283 | elsif ( $next_nonblank_token eq ':' ) { | |
23284 | $statement_type = $tok; | |
23285 | } | |
23286 | ||
23287 | # see if PROTO follows on another line: | |
23288 | elsif ( $next_nonblank_token eq '(' ) { | |
23289 | if ( $attrs || $proto ) { | |
23290 | warning( | |
23291 | "unexpected '(' after definition or declaration of sub '$subname'\n" | |
23292 | ); | |
23293 | } | |
23294 | else { | |
23295 | $id_scan_state = 'sub'; # we must come back to get proto | |
23296 | $statement_type = $tok; | |
23297 | $package_saved = $package; | |
23298 | $subname_saved = $subname; | |
23299 | } | |
23300 | } | |
23301 | elsif ($next_nonblank_token) { # EOF technically ok | |
23302 | warning( | |
23303 | "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n" | |
23304 | ); | |
23305 | } | |
23306 | check_prototype( $proto, $package, $subname ); | |
23307 | } | |
23308 | ||
23309 | # no match but line not blank | |
23310 | else { | |
23311 | } | |
23312 | return ( $i, $tok, $type, $id_scan_state ); | |
23313 | } | |
23314 | } | |
23315 | ||
23316 | sub check_prototype { | |
23317 | my ( $proto, $package, $subname ) = @_; | |
23318 | return unless ( defined($package) && defined($subname) ); | |
23319 | if ( defined($proto) ) { | |
23320 | $proto =~ s/^\s*\(\s*//; | |
23321 | $proto =~ s/\s*\)$//; | |
23322 | if ($proto) { | |
23323 | $is_user_function{$package}{$subname} = 1; | |
23324 | $user_function_prototype{$package}{$subname} = "($proto)"; | |
23325 | ||
23326 | # prototypes containing '&' must be treated specially.. | |
23327 | if ( $proto =~ /\&/ ) { | |
23328 | ||
23329 | # right curly braces of prototypes ending in | |
23330 | # '&' may be followed by an operator | |
23331 | if ( $proto =~ /\&$/ ) { | |
23332 | $is_block_function{$package}{$subname} = 1; | |
23333 | } | |
23334 | ||
23335 | # right curly braces of prototypes NOT ending in | |
23336 | # '&' may NOT be followed by an operator | |
23337 | elsif ( $proto !~ /\&$/ ) { | |
23338 | $is_block_list_function{$package}{$subname} = 1; | |
23339 | } | |
23340 | } | |
23341 | } | |
23342 | else { | |
23343 | $is_constant{$package}{$subname} = 1; | |
23344 | } | |
23345 | } | |
23346 | else { | |
23347 | $is_user_function{$package}{$subname} = 1; | |
23348 | } | |
23349 | } | |
23350 | ||
23351 | sub do_scan_package { | |
23352 | ||
23353 | # do_scan_package parses a package name | |
23354 | # it is called with $i_beg equal to the index of the first nonblank | |
23355 | # token following a 'package' token. | |
23356 | ||
23357 | my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map ) = @_; | |
23358 | my $package = undef; | |
23359 | my $pos_beg = $$rtoken_map[$i_beg]; | |
23360 | pos($input_line) = $pos_beg; | |
23361 | ||
23362 | # handle non-blank line; package name, if any, must follow | |
23363 | if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) { | |
23364 | $package = $1; | |
23365 | $package = ( defined($1) && $1 ) ? $1 : 'main'; | |
23366 | $package =~ s/\'/::/g; | |
23367 | if ( $package =~ /^\:/ ) { $package = 'main' . $package } | |
23368 | $package =~ s/::$//; | |
23369 | my $pos = pos($input_line); | |
23370 | my $numc = $pos - $pos_beg; | |
23371 | $tok = 'package ' . substr( $input_line, $pos_beg, $numc ); | |
23372 | $type = 'i'; | |
23373 | ||
23374 | # Now we must convert back from character position | |
23375 | # to pre_token index. | |
23376 | # I don't think an error flag can occur here ..but ? | |
23377 | my $error; | |
23378 | ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map ); | |
23379 | if ($error) { warning("Possibly invalid package\n") } | |
23380 | $current_package = $package; | |
23381 | ||
23382 | # check for error | |
23383 | my ( $next_nonblank_token, $i_next ) = | |
23384 | find_next_nonblank_token( $i, $rtokens ); | |
23385 | if ( $next_nonblank_token !~ /^[;\}]$/ ) { | |
23386 | warning( | |
23387 | "Unexpected '$next_nonblank_token' after package name '$tok'\n" | |
23388 | ); | |
23389 | } | |
23390 | } | |
23391 | ||
23392 | # no match but line not blank -- | |
23393 | # could be a label with name package, like package: , for example. | |
23394 | else { | |
23395 | $type = 'k'; | |
23396 | } | |
23397 | ||
23398 | return ( $i, $tok, $type ); | |
23399 | } | |
23400 | ||
23401 | sub scan_identifier_do { | |
23402 | ||
23403 | # This routine assembles tokens into identifiers. It maintains a | |
23404 | # scan state, id_scan_state. It updates id_scan_state based upon | |
23405 | # current id_scan_state and token, and returns an updated | |
23406 | # id_scan_state and the next index after the identifier. | |
23407 | ||
23408 | my ( $i, $id_scan_state, $identifier, $rtokens ) = @_; | |
23409 | my $i_begin = $i; | |
23410 | my $type = ''; | |
23411 | my $tok_begin = $$rtokens[$i_begin]; | |
23412 | if ( $tok_begin eq ':' ) { $tok_begin = '::' } | |
23413 | my $id_scan_state_begin = $id_scan_state; | |
23414 | my $identifier_begin = $identifier; | |
23415 | my $tok = $tok_begin; | |
23416 | my $message = ""; | |
23417 | ||
23418 | # these flags will be used to help figure out the type: | |
23419 | my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ ); | |
23420 | my $saw_type; | |
23421 | ||
23422 | # allow old package separator (') except in 'use' statement | |
23423 | my $allow_tick = ( $last_nonblank_token ne 'use' ); | |
23424 | ||
23425 | # get started by defining a type and a state if necessary | |
23426 | unless ($id_scan_state) { | |
23427 | $context = UNKNOWN_CONTEXT; | |
23428 | ||
23429 | # fixup for digraph | |
23430 | if ( $tok eq '>' ) { | |
23431 | $tok = '->'; | |
23432 | $tok_begin = $tok; | |
23433 | } | |
23434 | $identifier = $tok; | |
23435 | ||
23436 | if ( $tok eq '$' || $tok eq '*' ) { | |
23437 | $id_scan_state = '$'; | |
23438 | $context = SCALAR_CONTEXT; | |
23439 | } | |
23440 | elsif ( $tok eq '%' || $tok eq '@' ) { | |
23441 | $id_scan_state = '$'; | |
23442 | $context = LIST_CONTEXT; | |
23443 | } | |
23444 | elsif ( $tok eq '&' ) { | |
23445 | $id_scan_state = '&'; | |
23446 | } | |
23447 | elsif ( $tok eq 'sub' or $tok eq 'package' ) { | |
23448 | $saw_alpha = 0; # 'sub' is considered type info here | |
23449 | $id_scan_state = '$'; | |
23450 | $identifier .= ' '; # need a space to separate sub from sub name | |
23451 | } | |
23452 | elsif ( $tok eq '::' ) { | |
23453 | $id_scan_state = 'A'; | |
23454 | } | |
23455 | elsif ( $tok =~ /^[A-Za-z_]/ ) { | |
23456 | $id_scan_state = ':'; | |
23457 | } | |
23458 | elsif ( $tok eq '->' ) { | |
23459 | $id_scan_state = '$'; | |
23460 | } | |
23461 | else { | |
23462 | ||
23463 | # shouldn't happen | |
23464 | my ( $a, $b, $c ) = caller; | |
23465 | warning("Program Bug: scan_identifier given bad token = $tok \n"); | |
23466 | warning(" called from sub $a line: $c\n"); | |
23467 | report_definite_bug(); | |
23468 | } | |
23469 | $saw_type = !$saw_alpha; | |
23470 | } | |
23471 | else { | |
23472 | $i--; | |
23473 | $saw_type = ( $tok =~ /([\$\%\@\*\&])/ ); | |
23474 | } | |
23475 | ||
23476 | # now loop to gather the identifier | |
23477 | my $i_save = $i; | |
23478 | ||
23479 | while ( $i < $max_token_index ) { | |
23480 | $i_save = $i unless ( $tok =~ /^\s*$/ ); | |
23481 | $tok = $$rtokens[ ++$i ]; | |
23482 | ||
23483 | if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) { | |
23484 | $tok = '::'; | |
23485 | $i++; | |
23486 | } | |
23487 | ||
23488 | if ( $id_scan_state eq '$' ) { # starting variable name | |
23489 | ||
23490 | if ( $tok eq '$' ) { | |
23491 | ||
23492 | $identifier .= $tok; | |
23493 | ||
23494 | # we've got a punctuation variable if end of line (punct.t) | |
23495 | if ( $i == $max_token_index ) { | |
23496 | $type = 'i'; | |
23497 | $id_scan_state = ''; | |
23498 | last; | |
23499 | } | |
23500 | } | |
23501 | elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric .. | |
23502 | $saw_alpha = 1; | |
23503 | $id_scan_state = ':'; # now need :: | |
23504 | $identifier .= $tok; | |
23505 | } | |
23506 | elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. | |
23507 | $saw_alpha = 1; | |
23508 | $id_scan_state = ':'; # now need :: | |
23509 | $identifier .= $tok; | |
23510 | ||
23511 | # Perl will accept leading digits in identifiers, | |
23512 | # although they may not always produce useful results. | |
23513 | # Something like $main::0 is ok. But this also works: | |
23514 | # | |
23515 | # sub howdy::123::bubba{ print "bubba $54321!\n" } | |
23516 | # howdy::123::bubba(); | |
23517 | # | |
23518 | } | |
23519 | elsif ( $tok =~ /^[0-9]/ ) { # numeric | |
23520 | $saw_alpha = 1; | |
23521 | $id_scan_state = ':'; # now need :: | |
23522 | $identifier .= $tok; | |
23523 | } | |
23524 | elsif ( $tok eq '::' ) { | |
23525 | $id_scan_state = 'A'; | |
23526 | $identifier .= $tok; | |
23527 | } | |
23528 | elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array | |
23529 | $identifier .= $tok; # keep same state, a $ could follow | |
23530 | } | |
23531 | elsif ( $tok eq '{' ) { | |
23532 | ||
23533 |