Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | #!/usr/bin/perl -w |
2 | # | |
3 | # `Diff' program in Perl | |
4 | # Copyright 1998 M-J. Dominus. (mjd-perl-diff@plover.com) | |
5 | # | |
6 | # This program is free software; you can redistribute it and/or modify it | |
7 | # under the same terms as Perl itself. | |
8 | # | |
9 | # Altered to output in `context diff' format (but without context) | |
10 | # September 1998 Christian Murphy (cpm@muc.de) | |
11 | # | |
12 | # Command-line arguments and context lines feature added | |
13 | # September 1998 Amir D. Karger (karger@bead.aecom.yu.edu) | |
14 | # | |
15 | # In this file, "item" usually means "line of text", and "item number" usually | |
16 | # means "line number". But theoretically the code could be used more generally | |
17 | use strict; | |
18 | ||
19 | use Algorithm::Diff qw(diff); | |
20 | use File::stat; | |
21 | use vars qw ($opt_C $opt_c $opt_u $opt_U); | |
22 | use Getopt::Std; | |
23 | ||
24 | my $usage = << "ENDUSAGE"; | |
25 | Usage: $0 [{-c | -u}] [{-C | -U} lines] oldfile newfile | |
26 | -c will do a context diff with 3 lines of context | |
27 | -C will do a context diff with 'lines' lines of context | |
28 | -u will do a unified diff with 3 lines of context | |
29 | -U will do a unified diff with 'lines' lines of context | |
30 | ENDUSAGE | |
31 | ||
32 | getopts('U:C:cu') or bag("$usage"); | |
33 | bag("$usage") unless @ARGV == 2; | |
34 | my ($file1, $file2) = @ARGV; | |
35 | if (defined $opt_C || defined $opt_c) { | |
36 | $opt_c = ""; # -c on if -C given on command line | |
37 | $opt_u = undef; | |
38 | } elsif (defined $opt_U || defined $opt_u) { | |
39 | $opt_u = ""; # -u on if -U given on command line | |
40 | $opt_c = undef; | |
41 | } else { | |
42 | $opt_c = ""; # by default, do context diff, not old diff | |
43 | } | |
44 | ||
45 | my ($char1, $char2); # string to print before file names | |
46 | my $Context_Lines; # lines of context to print | |
47 | if (defined $opt_c) { | |
48 | $Context_Lines = defined $opt_C ? $opt_C : 3; | |
49 | $char1 = '*' x 3; $char2 = '-' x 3; | |
50 | } elsif (defined $opt_u) { | |
51 | $Context_Lines = defined $opt_U ? $opt_U : 3; | |
52 | $char1 = '-' x 3; $char2 = '+' x 3; | |
53 | } | |
54 | ||
55 | # After we've read up to a certain point in each file, the number of items | |
56 | # we've read from each file will differ by $FLD (could be 0) | |
57 | my $File_Length_Difference = 0; | |
58 | ||
59 | open (F1, $file1) or bag("Couldn't open $file1: $!"); | |
60 | open (F2, $file2) or bag("Couldn't open $file2: $!"); | |
61 | my (@f1, @f2); | |
62 | chomp(@f1 = <F1>); | |
63 | close F1; | |
64 | chomp(@f2 = <F2>); | |
65 | close F2; | |
66 | ||
67 | # diff yields lots of pieces, each of which is basically a Block object | |
68 | my $diffs = diff(\@f1, \@f2); | |
69 | exit 0 unless @$diffs; | |
70 | ||
71 | my $st = stat($file1); | |
72 | print "$char1 $file1\t", scalar localtime($st->mtime), "\n"; | |
73 | $st = stat($file2); | |
74 | print "$char2 $file2\t", scalar localtime($st->mtime), "\n"; | |
75 | ||
76 | my ($hunk,$oldhunk); | |
77 | # Loop over hunks. If a hunk overlaps with the last hunk, join them. | |
78 | # Otherwise, print out the old one. | |
79 | foreach my $piece (@$diffs) { | |
80 | $hunk = new Hunk ($piece, $Context_Lines); | |
81 | next unless $oldhunk; | |
82 | ||
83 | if ($hunk->does_overlap($oldhunk)) { | |
84 | $hunk->prepend_hunk($oldhunk); | |
85 | } else { | |
86 | $oldhunk->output_diff(\@f1, \@f2); | |
87 | } | |
88 | ||
89 | } continue { | |
90 | $oldhunk = $hunk; | |
91 | } | |
92 | ||
93 | # print the last hunk | |
94 | $oldhunk->output_diff(\@f1, \@f2); | |
95 | exit 1; | |
96 | # END MAIN PROGRAM | |
97 | ||
98 | sub bag { | |
99 | my $msg = shift; | |
100 | $msg .= "\n"; | |
101 | warn $msg; | |
102 | exit 2; | |
103 | } | |
104 | ||
105 | # Package Hunk. A Hunk is a group of Blocks which overlap because of the | |
106 | # context surrounding each block. (So if we're not using context, every | |
107 | # hunk will contain one block.) | |
108 | { | |
109 | package Hunk; | |
110 | ||
111 | sub new { | |
112 | # Arg1 is output from &LCS::diff (which corresponds to one Block) | |
113 | # Arg2 is the number of items (lines, e.g.,) of context around each block | |
114 | # | |
115 | # This subroutine changes $File_Length_Difference | |
116 | # | |
117 | # Fields in a Hunk: | |
118 | # blocks - a list of Block objects | |
119 | # start - index in file 1 where first block of the hunk starts | |
120 | # end - index in file 1 where last block of the hunk ends | |
121 | # | |
122 | # Variables: | |
123 | # before_diff - how much longer file 2 is than file 1 due to all hunks | |
124 | # until but NOT including this one | |
125 | # after_diff - difference due to all hunks including this one | |
126 | my ($class, $piece, $context_items) = @_; | |
127 | ||
128 | my $block = new Block ($piece); # this modifies $FLD! | |
129 | ||
130 | my $before_diff = $File_Length_Difference; # BEFORE this hunk | |
131 | my $after_diff = $before_diff + $block->{"length_diff"}; | |
132 | $File_Length_Difference += $block->{"length_diff"}; | |
133 | ||
134 | # @remove_array and @insert_array hold the items to insert and remove | |
135 | # Save the start & beginning of each array. If the array doesn't exist | |
136 | # though (e.g., we're only adding items in this block), then figure | |
137 | # out the line number based on the line number of the other file and | |
138 | # the current difference in file lenghts | |
139 | my @remove_array = $block->remove; | |
140 | my @insert_array = $block->insert; | |
141 | my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2); | |
142 | $a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1; | |
143 | $a2 = @remove_array ? $remove_array[-1]->{"item_no"} : -1; | |
144 | $b1 = @insert_array ? $insert_array[0 ]->{"item_no"} : -1; | |
145 | $b2 = @insert_array ? $insert_array[-1]->{"item_no"} : -1; | |
146 | ||
147 | $start1 = $a1 == -1 ? $b1 - $before_diff : $a1; | |
148 | $end1 = $a2 == -1 ? $b2 - $after_diff : $a2; | |
149 | $start2 = $b1 == -1 ? $a1 + $before_diff : $b1; | |
150 | $end2 = $b2 == -1 ? $a2 + $after_diff : $b2; | |
151 | ||
152 | # At first, a hunk will have just one Block in it | |
153 | my $hunk = { | |
154 | "start1" => $start1, | |
155 | "start2" => $start2, | |
156 | "end1" => $end1, | |
157 | "end2" => $end2, | |
158 | "blocks" => [$block], | |
159 | }; | |
160 | bless $hunk, $class; | |
161 | ||
162 | $hunk->flag_context($context_items); | |
163 | ||
164 | return $hunk; | |
165 | } | |
166 | ||
167 | # Change the "start" and "end" fields to note that context should be added | |
168 | # to this hunk | |
169 | sub flag_context { | |
170 | my ($hunk, $context_items) = @_; | |
171 | return unless $context_items; # no context | |
172 | ||
173 | # add context before | |
174 | my $start1 = $hunk->{"start1"}; | |
175 | my $num_added = $context_items > $start1 ? $start1 : $context_items; | |
176 | $hunk->{"start1"} -= $num_added; | |
177 | $hunk->{"start2"} -= $num_added; | |
178 | ||
179 | # context after | |
180 | my $end1 = $hunk->{"end1"}; | |
181 | $num_added = ($end1+$context_items > $#f1) ? | |
182 | $#f1 - $end1 : | |
183 | $context_items; | |
184 | $hunk->{"end1"} += $num_added; | |
185 | $hunk->{"end2"} += $num_added; | |
186 | } | |
187 | ||
188 | # Is there an overlap between hunk arg0 and old hunk arg1? | |
189 | # Note: if end of old hunk is one less than beginning of second, they overlap | |
190 | sub does_overlap { | |
191 | my ($hunk, $oldhunk) = @_; | |
192 | return "" unless $oldhunk; # first time through, $oldhunk is empty | |
193 | ||
194 | # Do I actually need to test both? | |
195 | return ($hunk->{"start1"} - $oldhunk->{"end1"} <= 1 || | |
196 | $hunk->{"start2"} - $oldhunk->{"end2"} <= 1); | |
197 | } | |
198 | ||
199 | # Prepend hunk arg1 to hunk arg0 | |
200 | # Note that arg1 isn't updated! Only arg0 is. | |
201 | sub prepend_hunk { | |
202 | my ($hunk, $oldhunk) = @_; | |
203 | ||
204 | $hunk->{"start1"} = $oldhunk->{"start1"}; | |
205 | $hunk->{"start2"} = $oldhunk->{"start2"}; | |
206 | ||
207 | unshift (@{$hunk->{"blocks"}}, @{$oldhunk->{"blocks"}}); | |
208 | } | |
209 | ||
210 | ||
211 | # DIFF OUTPUT ROUTINES. THESE ROUTINES CONTAIN DIFF FORMATTING INFO... | |
212 | sub output_diff { | |
213 | if (defined $main::opt_u) {&output_unified_diff(@_)} | |
214 | elsif (defined $main::opt_c) {&output_context_diff(@_)} | |
215 | else {die "unknown diff"} | |
216 | } | |
217 | ||
218 | sub output_unified_diff { | |
219 | my ($hunk, $fileref1, $fileref2) = @_; | |
220 | my @blocklist; | |
221 | ||
222 | # Calculate item number range. | |
223 | my $range1 = $hunk->unified_range(1); | |
224 | my $range2 = $hunk->unified_range(2); | |
225 | print "@@ -$range1 +$range2 @@\n"; | |
226 | ||
227 | # Outlist starts containing the hunk of file 1. | |
228 | # Removing an item just means putting a '-' in front of it. | |
229 | # Inserting an item requires getting it from file2 and splicing it in. | |
230 | # We splice in $num_added items. Remove blocks use $num_added because | |
231 | # splicing changed the length of outlist. | |
232 | # We remove $num_removed items. Insert blocks use $num_removed because | |
233 | # their item numbers---corresponding to positions in file *2*--- don't take | |
234 | # removed items into account. | |
235 | my $low = $hunk->{"start1"}; | |
236 | my $hi = $hunk->{"end1"}; | |
237 | my ($num_added, $num_removed) = (0,0); | |
238 | my @outlist = @$fileref1[$low..$hi]; | |
239 | map {s/^/ /} @outlist; # assume it's just context | |
240 | ||
241 | foreach my $block (@{$hunk->{"blocks"}}) { | |
242 | foreach my $item ($block->remove) { | |
243 | my $op = $item->{"sign"}; # - | |
244 | my $offset = $item->{"item_no"} - $low + $num_added; | |
245 | $outlist[$offset] =~ s/^ /$op/; | |
246 | $num_removed++; | |
247 | } | |
248 | foreach my $item ($block->insert) { | |
249 | my $op = $item->{"sign"}; # + | |
250 | my $i = $item->{"item_no"}; | |
251 | my $offset = $i - $hunk->{"start2"} + $num_removed; | |
252 | splice(@outlist,$offset,0,"$op$$fileref2[$i]"); | |
253 | $num_added++; | |
254 | } | |
255 | } | |
256 | ||
257 | map {s/$/\n/} @outlist; # add \n's | |
258 | print @outlist; | |
259 | ||
260 | } | |
261 | ||
262 | sub output_context_diff { | |
263 | my ($hunk, $fileref1, $fileref2) = @_; | |
264 | my @blocklist; | |
265 | ||
266 | print "***************\n"; | |
267 | # Calculate item number range. | |
268 | my $range1 = $hunk->context_range(1); | |
269 | my $range2 = $hunk->context_range(2); | |
270 | ||
271 | # Print out file 1 part for each block in context diff format if there are | |
272 | # any blocks that remove items | |
273 | print "*** $range1 ****\n"; | |
274 | my $low = $hunk->{"start1"}; | |
275 | my $hi = $hunk->{"end1"}; | |
276 | if (@blocklist = grep {$_->remove} @{$hunk->{"blocks"}}) { | |
277 | my @outlist = @$fileref1[$low..$hi]; | |
278 | map {s/^/ /} @outlist; # assume it's just context | |
279 | foreach my $block (@blocklist) { | |
280 | my $op = $block->op; # - or ! | |
281 | foreach my $item ($block->remove) { | |
282 | $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/; | |
283 | } | |
284 | } | |
285 | map {s/$/\n/} @outlist; # add \n's | |
286 | print @outlist; | |
287 | } | |
288 | ||
289 | print "--- $range2 ----\n"; | |
290 | $low = $hunk->{"start2"}; | |
291 | $hi = $hunk->{"end2"}; | |
292 | if (@blocklist = grep {$_->insert} @{$hunk->{"blocks"}}) { | |
293 | my @outlist = @$fileref2[$low..$hi]; | |
294 | map {s/^/ /} @outlist; # assume it's just context | |
295 | foreach my $block (@blocklist) { | |
296 | my $op = $block->op; # + or ! | |
297 | foreach my $item ($block->insert) { | |
298 | $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/; | |
299 | } | |
300 | } | |
301 | map {s/$/\n/} @outlist; # add \n's | |
302 | print @outlist; | |
303 | } | |
304 | } | |
305 | ||
306 | sub context_range { | |
307 | # Generate a range of item numbers to print. Only print 1 number if the range | |
308 | # has only one item in it. Otherwise, it's 'start,end' | |
309 | my ($hunk, $flag) = @_; | |
310 | my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"}); | |
311 | $start++; $end++; # index from 1, not zero | |
312 | my $range = ($start < $end) ? "$start,$end" : $end; | |
313 | return $range; | |
314 | } | |
315 | ||
316 | sub unified_range { | |
317 | # Generate a range of item numbers to print for unified diff | |
318 | # Print number where block starts, followed by number of lines in the block | |
319 | # (don't print number of lines if it's 1) | |
320 | my ($hunk, $flag) = @_; | |
321 | my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"}); | |
322 | $start++; $end++; # index from 1, not zero | |
323 | my $length = $end - $start + 1; | |
324 | my $first = $length < 2 ? $end : $start; # strange, but correct... | |
325 | my $range = $length== 1 ? $first : "$first,$length"; | |
326 | return $range; | |
327 | } | |
328 | } # end Package Hunk | |
329 | ||
330 | # Package Block. A block is an operation removing, adding, or changing | |
331 | # a group of items. Basically, this is just a list of changes, where each | |
332 | # change adds or deletes a single item. | |
333 | # (Change could be a separate class, but it didn't seem worth it) | |
334 | { | |
335 | package Block; | |
336 | sub new { | |
337 | # Input is a chunk from &Algorithm::LCS::diff | |
338 | # Fields in a block: | |
339 | # length_diff - how much longer file 2 is than file 1 due to this block | |
340 | # Each change has: | |
341 | # sign - '+' for insert, '-' for remove | |
342 | # item_no - number of the item in the file (e.g., line number) | |
343 | # We don't bother storing the text of the item | |
344 | # | |
345 | my ($class,$chunk) = @_; | |
346 | my @changes = (); | |
347 | ||
348 | # This just turns each change into a hash. | |
349 | foreach my $item (@$chunk) { | |
350 | my ($sign, $item_no, $text) = @$item; | |
351 | my $hashref = {"sign" => $sign, "item_no" => $item_no}; | |
352 | push @changes, $hashref; | |
353 | } | |
354 | ||
355 | my $block = { "changes" => \@changes }; | |
356 | bless $block, $class; | |
357 | ||
358 | $block->{"length_diff"} = $block->insert - $block->remove; | |
359 | return $block; | |
360 | } | |
361 | ||
362 | ||
363 | # LOW LEVEL FUNCTIONS | |
364 | sub op { | |
365 | # what kind of block is this? | |
366 | my $block = shift; | |
367 | my $insert = $block->insert; | |
368 | my $remove = $block->remove; | |
369 | ||
370 | $remove && $insert and return '!'; | |
371 | $remove and return '-'; | |
372 | $insert and return '+'; | |
373 | warn "unknown block type"; | |
374 | return '^'; # context block | |
375 | } | |
376 | ||
377 | # Returns a list of the changes in this block that remove items | |
378 | # (or the number of removals if called in scalar context) | |
379 | sub remove { return grep {$_->{"sign"} eq '-'} @{shift->{"changes"}}; } | |
380 | ||
381 | # Returns a list of the changes in this block that insert items | |
382 | sub insert { return grep {$_->{"sign"} eq '+'} @{shift->{"changes"}}; } | |
383 | ||
384 | } # end of package Block | |
385 |