git.subgeniuskitty.com - OpenSPARC-T2-DV/.git/blame_incremental - tools/perl-5.8.0/lib/site

... / ...

Commit	Line	Data
	1	package Algorithm::Diff;
	2	use strict;
	3	use vars qw($VERSION @EXPORT_OK @ISA @EXPORT);
	4	use integer; # see below in _replaceNextLargerWith() for mod to make
	5	# if you don't use this
	6	require Exporter;
	7	@ISA = qw(Exporter);
	8	@EXPORT = qw();
	9	@EXPORT_OK = qw(LCS diff traverse_sequences traverse_balanced sdiff);
	10	$VERSION = sprintf('%d.%02d', (q$Revision: 1.15 $ =~ /\d+/g));
	11
	12	# McIlroy-Hunt diff algorithm
	13	# Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com>
	14	# by Ned Konz, perl@bike-nomad.com
	15
	16	=head1 NAME
	17
	18	Algorithm::Diff - Compute `intelligent' differences between two files / lists
	19
	20	=head1 SYNOPSIS
	21
	22	use Algorithm::Diff qw(diff sdiff LCS traverse_sequences
	23	traverse_balanced);
	24
	25	@lcs = LCS( \@seq1, \@seq2 );
	26
	27	@lcs = LCS( \@seq1, \@seq2, $key_generation_function );
	28
	29	$lcsref = LCS( \@seq1, \@seq2 );
	30
	31	$lcsref = LCS( \@seq1, \@seq2, $key_generation_function );
	32
	33	@diffs = diff( \@seq1, \@seq2 );
	34
	35	@diffs = diff( \@seq1, \@seq2, $key_generation_function );
	36
	37	@sdiffs = sdiff( \@seq1, \@seq2 );
	38
	39	@sdiffs = sdiff( \@seq1, \@seq2, $key_generation_function );
	40
	41	traverse_sequences( \@seq1, \@seq2,
	42	{ MATCH => $callback,
	43	DISCARD_A => $callback,
	44	DISCARD_B => $callback,
	45	} );
	46
	47	traverse_sequences( \@seq1, \@seq2,
	48	{ MATCH => $callback,
	49	DISCARD_A => $callback,
	50	DISCARD_B => $callback,
	51	},
	52	$key_generation_function );
	53
	54	traverse_balanced( \@seq1, \@seq2,
	55	{ MATCH => $callback,
	56	DISCARD_A => $callback,
	57	DISCARD_B => $callback,
	58	CHANGE => $callback,
	59	} );
	60
	61	=head1 INTRODUCTION
	62
	63	(by Mark-Jason Dominus)
	64
	65	I once read an article written by the authors of C<diff>; they said
	66	that they hard worked very hard on the algorithm until they found the
	67	right one.
	68
	69	I think what they ended up using (and I hope someone will correct me,
	70	because I am not very confident about this) was the `longest common
	71	subsequence' method. in the LCS problem, you have two sequences of
	72	items:
	73
	74	a b c d f g h j q z
	75
	76	a b c d e f g i j k r x y z
	77
	78	and you want to find the longest sequence of items that is present in
	79	both original sequences in the same order. That is, you want to find
	80	a new sequence I<S> which can be obtained from the first sequence by
	81	deleting some items, and from the secend sequence by deleting other
	82	items. You also want I<S> to be as long as possible. In this case
	83	I<S> is
	84
	85	a b c d f g j z
	86
	87	From there it's only a small step to get diff-like output:
	88
	89	e h i k q r x y
	90	+ - + + - + + +
	91
	92	This module solves the LCS problem. It also includes a canned
	93	function to generate C<diff>-like output.
	94
	95	It might seem from the example above that the LCS of two sequences is
	96	always pretty obvious, but that's not always the case, especially when
	97	the two sequences have many repeated elements. For example, consider
	98
	99	a x b y c z p d q
	100	a b c a x b y c z
	101
	102	A naive approach might start by matching up the C<a> and C<b> that
	103	appear at the beginning of each sequence, like this:
	104
	105	a x b y c z p d q
	106	a b c a b y c z
	107
	108	This finds the common subsequence C<a b c z>. But actually, the LCS
	109	is C<a x b y c z>:
	110
	111	a x b y c z p d q
	112	a b c a x b y c z
	113
	114	=head1 USAGE
	115
	116	This module provides three exportable functions, which we'll deal with in
	117	ascending order of difficulty: C<LCS>,
	118	C<diff>, C<sdiff>, C<traverse_sequences>, and C<traverse_balanced>.
	119
	120	=head2 C<LCS>
	121
	122	Given references to two lists of items, LCS returns an array containing their
	123	longest common subsequence. In scalar context, it returns a reference to
	124	such a list.
	125
	126	@lcs = LCS( \@seq1, \@seq2 );
	127	$lcsref = LCS( \@seq1, \@seq2 );
	128
	129	C<LCS> may be passed an optional third parameter; this is a CODE
	130	reference to a key generation function. See L</KEY GENERATION
	131	FUNCTIONS>.
	132
	133	@lcs = LCS( \@seq1, \@seq2, $keyGen );
	134	$lcsref = LCS( \@seq1, \@seq2, $keyGen );
	135
	136	Additional parameters, if any, will be passed to the key generation
	137	routine.
	138
	139	=head2 C<diff>
	140
	141	@diffs = diff( \@seq1, \@seq2 );
	142	$diffs_ref = diff( \@seq1, \@seq2 );
	143
	144	C<diff> computes the smallest set of additions and deletions necessary
	145	to turn the first sequence into the second, and returns a description
	146	of these changes. The description is a list of I<hunks>; each hunk
	147	represents a contiguous section of items which should be added,
	148	deleted, or replaced. The return value of C<diff> is a list of
	149	hunks, or, in scalar context, a reference to such a list.
	150
	151	Here is an example: The diff of the following two sequences:
	152
	153	a b c e h j l m n p
	154	b c d e f j k l m r s t
	155
	156	Result:
	157
	158	[
	159	[ [ '-', 0, 'a' ] ],
	160
	161	[ [ '+', 2, 'd' ] ],
	162
	163	[ [ '-', 4, 'h' ] ,
	164	[ '+', 4, 'f' ] ],
	165
	166	[ [ '+', 6, 'k' ] ],
	167
	168	[ [ '-', 8, 'n' ],
	169	[ '-', 9, 'p' ],
	170	[ '+', 9, 'r' ],
	171	[ '+', 10, 's' ],
	172	[ '+', 11, 't' ],
	173	]
	174	]
	175
	176	There are five hunks here. The first hunk says that the C<a> at
	177	position 0 of the first sequence should be deleted (C<->). The second
	178	hunk says that the C<d> at position 2 of the second sequence should
	179	be inserted (C<+>). The third hunk says that the C<h> at position 4
	180	of the first sequence should be removed and replaced with the C<f>
	181	from position 4 of the second sequence. The other two hunks similarly.
	182
	183	C<diff> may be passed an optional third parameter; this is a CODE
	184	reference to a key generation function. See L</KEY GENERATION
	185	FUNCTIONS>.
	186
	187	Additional parameters, if any, will be passed to the key generation
	188	routine.
	189
	190	=head2 C<sdiff>
	191
	192	@sdiffs = sdiff( \@seq1, \@seq2 );
	193	$sdiffs_ref = sdiff( \@seq1, \@seq2 );
	194
	195	C<sdiff> computes all necessary components to show two sequences
	196	and their minimized differences side by side, just like the
	197	Unix-utility I<sdiff> does:
	198
	199	same same
	200	before \| after
	201	old < -
	202	- > new
	203
	204	It returns a list of array refs, each pointing to an array of
	205	display instructions. In scalar context it returns a reference
	206	to such a list.
	207
	208	Display instructions consist of three elements: A modifier indicator
	209	(C<+>: Element added, C<->: Element removed, C<u>: Element unmodified,
	210	C<c>: Element changed) and the value of the old and new elements, to
	211	be displayed side by side.
	212
	213	An C<sdiff> of the following two sequences:
	214
	215	a b c e h j l m n p
	216	b c d e f j k l m r s t
	217
	218	results in
	219
	220	[ [ '-', 'a', '' ],
	221	[ 'u', 'b', 'b' ],
	222	[ 'u', 'c', 'c' ],
	223	[ '+', '', 'd' ],
	224	[ 'u', 'e', 'e' ],
	225	[ 'c', 'h', 'f' ],
	226	[ 'u', 'j', 'j' ],
	227	[ '+', '', 'k' ],
	228	[ 'u', 'l', 'l' ],
	229	[ 'u', 'm', 'm' ],
	230	[ 'c', 'n', 'r' ],
	231	[ 'c', 'p', 's' ],
	232	[ '+', '', 't' ] ]
	233
	234	C<sdiff> may be passed an optional third parameter; this is a CODE
	235	reference to a key generation function. See L</KEY GENERATION
	236	FUNCTIONS>.
	237
	238	Additional parameters, if any, will be passed to the key generation
	239	routine.
	240
	241	=head2 C<traverse_sequences>
	242
	243	C<traverse_sequences> is the most general facility provided by this
	244	module; C<diff> and C<LCS> are implemented as calls to it.
	245
	246	Imagine that there are two arrows. Arrow A points to an element of sequence A,
	247	and arrow B points to an element of the sequence B. Initially, the arrows
	248	point to the first elements of the respective sequences. C<traverse_sequences>
	249	will advance the arrows through the sequences one element at a time, calling an
	250	appropriate user-specified callback function before each advance. It
	251	willadvance the arrows in such a way that if there are equal elements C<$A[$i]>
	252	and C<$B[$j]> which are equal and which are part of the LCS, there will be
	253	some moment during the execution of C<traverse_sequences> when arrow A is
	254	pointing to C<$A[$i]> and arrow B is pointing to C<$B[$j]>. When this happens,
	255	C<traverse_sequences> will call the C<MATCH> callback function and then it will
	256	advance both arrows.
	257
	258	Otherwise, one of the arrows is pointing to an element of its sequence that is
	259	not part of the LCS. C<traverse_sequences> will advance that arrow and will
	260	call the C<DISCARD_A> or the C<DISCARD_B> callback, depending on which arrow it
	261	advanced. If both arrows point to elements that are not part of the LCS, then
	262	C<traverse_sequences> will advance one of them and call the appropriate
	263	callback, but it is not specified which it will call.
	264
	265	The arguments to C<traverse_sequences> are the two sequences to traverse, and a
	266	hash which specifies the callback functions, like this:
	267
	268	traverse_sequences( \@seq1, \@seq2,
	269	{ MATCH => $callback_1,
	270	DISCARD_A => $callback_2,
	271	DISCARD_B => $callback_3,
	272	} );
	273
	274	Callbacks for MATCH, DISCARD_A, and DISCARD_B are invoked with at least the
	275	indices of the two arrows as their arguments. They are not expected to return
	276	any values. If a callback is omitted from the table, it is not called.
	277
	278	Callbacks for A_FINISHED and B_FINISHED are invoked with at least the
	279	corresponding index in A or B.
	280
	281	If arrow A reaches the end of its sequence, before arrow B does,
	282	C<traverse_sequences> will call the C<A_FINISHED> callback when it advances
	283	arrow B, if there is such a function; if not it will call C<DISCARD_B> instead.
	284	Similarly if arrow B finishes first. C<traverse_sequences> returns when both
	285	arrows are at the ends of their respective sequences. It returns true on
	286	success and false on failure. At present there is no way to fail.
	287
	288	C<traverse_sequences> may be passed an optional fourth parameter; this is a
	289	CODE reference to a key generation function. See L</KEY GENERATION FUNCTIONS>.
	290
	291	Additional parameters, if any, will be passed to the key generation function.
	292
	293	=head2 C<traverse_balanced>
	294
	295	C<traverse_balanced> is an alternative to C<traverse_sequences>. It
	296	uses a different algorithm to iterate through the entries in the
	297	computed LCS. Instead of sticking to one side and showing element changes
	298	as insertions and deletions only, it will jump back and forth between
	299	the two sequences and report I<changes> occurring as deletions on one
	300	side followed immediatly by an insertion on the other side.
	301
	302	In addition to the
	303	C<DISCARD_A>,
	304	C<DISCARD_B>, and
	305	C<MATCH>
	306	callbacks supported by C<traverse_sequences>, C<traverse_balanced> supports
	307	a C<CHANGE> callback indicating that one element got C<replaced> by another:
	308
	309	traverse_sequences( \@seq1, \@seq2,
	310	{ MATCH => $callback_1,
	311	DISCARD_A => $callback_2,
	312	DISCARD_B => $callback_3,
	313	CHANGE => $callback_4,
	314	} );
	315
	316	If no C<CHANGE> callback is specified, C<traverse_balanced>
	317	will map C<CHANGE> events to C<DISCARD_A> and C<DISCARD_B> actions,
	318	therefore resulting in a similar behaviour as C<traverse_sequences>
	319	with different order of events.
	320
	321	C<traverse_balanced> might be a bit slower than C<traverse_sequences>,
	322	noticable only while processing huge amounts of data.
	323
	324	The C<sdiff> function of this module
	325	is implemented as call to C<traverse_balanced>.
	326
	327	=head1 KEY GENERATION FUNCTIONS
	328
	329	C<diff>, C<LCS>, and C<traverse_sequences> accept an optional last parameter.
	330	This is a CODE reference to a key generating (hashing) function that should
	331	return a string that uniquely identifies a given element. It should be the
	332	case that if two elements are to be considered equal, their keys should be the
	333	same (and the other way around). If no key generation function is provided,
	334	the key will be the element as a string.
	335
	336	By default, comparisons will use "eq" and elements will be turned into keys
	337	using the default stringizing operator '""'.
	338
	339	Where this is important is when you're comparing something other than strings.
	340	If it is the case that you have multiple different objects that should be
	341	considered to be equal, you should supply a key generation function. Otherwise,
	342	you have to make sure that your arrays contain unique references.
	343
	344	For instance, consider this example:
	345
	346	package Person;
	347
	348	sub new
	349	{
	350	my $package = shift;
	351	return bless { name => '', ssn => '', @_ }, $package;
	352	}
	353
	354	sub clone
	355	{
	356	my $old = shift;
	357	my $new = bless { %$old }, ref($old);
	358	}
	359
	360	sub hash
	361	{
	362	return shift()->{'ssn'};
	363	}
	364
	365	my $person1 = Person->new( name => 'Joe', ssn => '123-45-6789' );
	366	my $person2 = Person->new( name => 'Mary', ssn => '123-47-0000' );
	367	my $person3 = Person->new( name => 'Pete', ssn => '999-45-2222' );
	368	my $person4 = Person->new( name => 'Peggy', ssn => '123-45-9999' );
	369	my $person5 = Person->new( name => 'Frank', ssn => '000-45-9999' );
	370
	371	If you did this:
	372
	373	my $array1 = [ $person1, $person2, $person4 ];
	374	my $array2 = [ $person1, $person3, $person4, $person5 ];
	375	Algorithm::Diff::diff( $array1, $array2 );
	376
	377	everything would work out OK (each of the objects would be converted
	378	into a string like "Person=HASH(0x82425b0)" for comparison).
	379
	380	But if you did this:
	381
	382	my $array1 = [ $person1, $person2, $person4 ];
	383	my $array2 = [ $person1, $person3, $person4->clone(), $person5 ];
	384	Algorithm::Diff::diff( $array1, $array2 );
	385
	386	$person4 and $person4->clone() (which have the same name and SSN)
	387	would be seen as different objects. If you wanted them to be considered
	388	equivalent, you would have to pass in a key generation function:
	389
	390	my $array1 = [ $person1, $person2, $person4 ];
	391	my $array2 = [ $person1, $person3, $person4->clone(), $person5 ];
	392	Algorithm::Diff::diff( $array1, $array2, \&Person::hash );
	393
	394	This would use the 'ssn' field in each Person as a comparison key, and
	395	so would consider $person4 and $person4->clone() as equal.
	396
	397	You may also pass additional parameters to the key generation function
	398	if you wish.
	399
	400	=head1 AUTHOR
	401
	402	This version by Ned Konz, perl@bike-nomad.com
	403
	404	=head1 LICENSE
	405
	406	Copyright (c) 2000-2002 Ned Konz. All rights reserved.
	407	This program is free software;
	408	you can redistribute it and/or modify it under the same terms
	409	as Perl itself.
	410
	411	=head1 CREDITS
	412
	413	Versions through 0.59 (and much of this documentation) were written by:
	414
	415	Mark-Jason Dominus, mjd-perl-diff@plover.com
	416
	417	This version borrows the documentation and names of the routines
	418	from Mark-Jason's, but has all new code in Diff.pm.
	419
	420	This code was adapted from the Smalltalk code of
	421	Mario Wolczko <mario@wolczko.com>, which is available at
	422	ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st
	423
	424	C<sdiff> and C<traverse_balanced> were written by Mike Schilli
	425	<m@perlmeister.com>.
	426
	427	The algorithm is that described in
	428	I<A Fast Algorithm for Computing Longest Common Subsequences>,
	429	CACM, vol.20, no.5, pp.350-353, May 1977, with a few
	430	minor improvements to improve the speed.
	431
	432	=cut
	433
	434	# Create a hash that maps each element of $aCollection to the set of positions
	435	# it occupies in $aCollection, restricted to the elements within the range of
	436	# indexes specified by $start and $end.
	437	# The fourth parameter is a subroutine reference that will be called to
	438	# generate a string to use as a key.
	439	# Additional parameters, if any, will be passed to this subroutine.
	440	#
	441	# my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen );
	442
	443	sub _withPositionsOfInInterval
	444	{
	445	my $aCollection = shift; # array ref
	446	my $start = shift;
	447	my $end = shift;
	448	my $keyGen = shift;
	449	my %d;
	450	my $index;
	451	for ( $index = $start ; $index <= $end ; $index++ )
	452	{
	453	my $element = $aCollection->[$index];
	454	my $key = &$keyGen( $element, @_ );
	455	if ( exists( $d{$key} ) )
	456	{
	457	unshift ( @{ $d{$key} }, $index );
	458	}
	459	else
	460	{
	461	$d{$key} = [$index];
	462	}
	463	}
	464	return wantarray ? %d : \%d;
	465	}
	466
	467	# Find the place at which aValue would normally be inserted into the array. If
	468	# that place is already occupied by aValue, do nothing, and return undef. If
	469	# the place does not exist (i.e., it is off the end of the array), add it to
	470	# the end, otherwise replace the element at that point with aValue.
	471	# It is assumed that the array's values are numeric.
	472	# This is where the bulk (75%) of the time is spent in this module, so try to
	473	# make it fast!
	474
	475	sub _replaceNextLargerWith
	476	{
	477	my ( $array, $aValue, $high ) = @_;
	478	$high \|\|= $#$array;
	479
	480	# off the end?
	481	if ( $high == -1 \|\| $aValue > $array->[-1] )
	482	{
	483	push ( @$array, $aValue );
	484	return $high + 1;
	485	}
	486
	487	# binary search for insertion point...
	488	my $low = 0;
	489	my $index;
	490	my $found;
	491	while ( $low <= $high )
	492	{
	493	$index = ( $high + $low ) / 2;
	494
	495	# $index = int(( $high + $low ) / 2); # without 'use integer'
	496	$found = $array->[$index];
	497
	498	if ( $aValue == $found )
	499	{
	500	return undef;
	501	}
	502	elsif ( $aValue > $found )
	503	{
	504	$low = $index + 1;
	505	}
	506	else
	507	{
	508	$high = $index - 1;
	509	}
	510	}
	511
	512	# now insertion point is in $low.
	513	$array->[$low] = $aValue; # overwrite next larger
	514	return $low;
	515	}
	516
	517	# This method computes the longest common subsequence in $a and $b.
	518
	519	# Result is array or ref, whose contents is such that
	520	# $a->[ $i ] == $b->[ $result[ $i ] ]
	521	# foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined.
	522
	523	# An additional argument may be passed; this is a hash or key generating
	524	# function that should return a string that uniquely identifies the given
	525	# element. It should be the case that if the key is the same, the elements
	526	# will compare the same. If this parameter is undef or missing, the key
	527	# will be the element as a string.
	528
	529	# By default, comparisons will use "eq" and elements will be turned into keys
	530	# using the default stringizing operator '""'.
	531
	532	# Additional parameters, if any, will be passed to the key generation routine.
	533
	534	sub _longestCommonSubsequence
	535	{
	536	my $a = shift; # array ref
	537	my $b = shift; # array ref
	538	my $keyGen = shift; # code ref
	539	my $compare; # code ref
	540
	541	# set up code refs
	542	# Note that these are optimized.
	543	if ( !defined($keyGen) ) # optimize for strings
	544	{
	545	$keyGen = sub { $_[0] };
	546	$compare = sub { my ( $a, $b ) = @_; $a eq $b };
	547	}
	548	else
	549	{
	550	$compare = sub {
	551	my $a = shift;
	552	my $b = shift;
	553	&$keyGen( $a, @_ ) eq &$keyGen( $b, @_ );
	554	};
	555	}
	556
	557	my ( $aStart, $aFinish, $bStart, $bFinish, $matchVector ) =
	558	( 0, $#$a, 0, $#$b, [] );
	559
	560	# First we prune off any common elements at the beginning
	561	while ( $aStart <= $aFinish
	562	and $bStart <= $bFinish
	563	and &$compare( $a->[$aStart], $b->[$bStart], @_ ) )
	564	{
	565	$matchVector->[ $aStart++ ] = $bStart++;
	566	}
	567
	568	# now the end
	569	while ( $aStart <= $aFinish
	570	and $bStart <= $bFinish
	571	and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) )
	572	{
	573	$matchVector->[ $aFinish-- ] = $bFinish--;
	574	}
	575
	576	# Now compute the equivalence classes of positions of elements
	577	my $bMatches =
	578	_withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ );
	579	my $thresh = [];
	580	my $links = [];
	581
	582	my ( $i, $ai, $j, $k );
	583	for ( $i = $aStart ; $i <= $aFinish ; $i++ )
	584	{
	585	$ai = &$keyGen( $a->[$i], @_ );
	586	if ( exists( $bMatches->{$ai} ) )
	587	{
	588	$k = 0;
	589	for $j ( @{ $bMatches->{$ai} } )
	590	{
	591
	592	# optimization: most of the time this will be true
	593	if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j )
	594	{
	595	$thresh->[$k] = $j;
	596	}
	597	else
	598	{
	599	$k = _replaceNextLargerWith( $thresh, $j, $k );
	600	}
	601
	602	# oddly, it's faster to always test this (CPU cache?).
	603	if ( defined($k) )
	604	{
	605	$links->[$k] =
	606	[ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ];
	607	}
	608	}
	609	}
	610	}
	611
	612	if (@$thresh)
	613	{
	614	for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] )
	615	{
	616	$matchVector->[ $link->[1] ] = $link->[2];
	617	}
	618	}
	619
	620	return wantarray ? @$matchVector : $matchVector;
	621	}
	622
	623	sub traverse_sequences
	624	{
	625	my $a = shift; # array ref
	626	my $b = shift; # array ref
	627	my $callbacks = shift \|\| {};
	628	my $keyGen = shift;
	629	my $matchCallback = $callbacks->{'MATCH'} \|\| sub { };
	630	my $discardACallback = $callbacks->{'DISCARD_A'} \|\| sub { };
	631	my $finishedACallback = $callbacks->{'A_FINISHED'};
	632	my $discardBCallback = $callbacks->{'DISCARD_B'} \|\| sub { };
	633	my $finishedBCallback = $callbacks->{'B_FINISHED'};
	634	my $matchVector = _longestCommonSubsequence( $a, $b, $keyGen, @_ );
	635
	636	# Process all the lines in @$matchVector
	637	my $lastA = $#$a;
	638	my $lastB = $#$b;
	639	my $bi = 0;
	640	my $ai;
	641
	642	for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ )
	643	{
	644	my $bLine = $matchVector->[$ai];
	645	if ( defined($bLine) ) # matched
	646	{
	647	&$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine;
	648	&$matchCallback( $ai, $bi++, @_ );
	649	}
	650	else
	651	{
	652	&$discardACallback( $ai, $bi, @_ );
	653	}
	654	}
	655
	656	# The last entry (if any) processed was a match.
	657	# $ai and $bi point just past the last matching lines in their sequences.
	658
	659	while ( $ai <= $lastA or $bi <= $lastB )
	660	{
	661
	662	# last A?
	663	if ( $ai == $lastA + 1 and $bi <= $lastB )
	664	{
	665	if ( defined($finishedACallback) )
	666	{
	667	&$finishedACallback( $lastA, @_ );
	668	$finishedACallback = undef;
	669	}
	670	else
	671	{
	672	&$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB;
	673	}
	674	}
	675
	676	# last B?
	677	if ( $bi == $lastB + 1 and $ai <= $lastA )
	678	{
	679	if ( defined($finishedBCallback) )
	680	{
	681	&$finishedBCallback( $lastB, @_ );
	682	$finishedBCallback = undef;
	683	}
	684	else
	685	{
	686	&$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA;
	687	}
	688	}
	689
	690	&$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA;
	691	&$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB;
	692	}
	693
	694	return 1;
	695	}
	696
	697	sub traverse_balanced
	698	{
	699	my $a = shift; # array ref
	700	my $b = shift; # array ref
	701	my $callbacks = shift \|\| {};
	702	my $keyGen = shift;
	703	my $matchCallback = $callbacks->{'MATCH'} \|\| sub { };
	704	my $discardACallback = $callbacks->{'DISCARD_A'} \|\| sub { };
	705	my $discardBCallback = $callbacks->{'DISCARD_B'} \|\| sub { };
	706	my $changeCallback = $callbacks->{'CHANGE'};
	707	my $matchVector = _longestCommonSubsequence( $a, $b, $keyGen, @_ );
	708
	709	# Process all the lines in match vector
	710	my $lastA = $#$a;
	711	my $lastB = $#$b;
	712	my $bi = 0;
	713	my $ai = 0;
	714	my $ma = -1;
	715	my $mb;
	716
	717	while (1)
	718	{
	719
	720	# Find next match indices $ma and $mb
	721	do { $ma++ } while ( $ma <= $#$matchVector && !defined $matchVector->[$ma] );
	722
	723	last if $ma > $#$matchVector; # end of matchVector?
	724	$mb = $matchVector->[$ma];
	725
	726	# Proceed with discard a/b or change events until
	727	# next match
	728	while ( $ai < $ma \|\| $bi < $mb )
	729	{
	730
	731	if ( $ai < $ma && $bi < $mb )
	732	{
	733
	734	# Change
	735	if ( defined $changeCallback )
	736	{
	737	&$changeCallback( $ai++, $bi++, @_ );
	738	}
	739	else
	740	{
	741	&$discardACallback( $ai++, $bi, @_ );
	742	&$discardBCallback( $ai, $bi++, @_ );
	743	}
	744	}
	745	elsif ( $ai < $ma )
	746	{
	747	&$discardACallback( $ai++, $bi, @_ );
	748	}
	749	else
	750	{
	751
	752	# $bi < $mb
	753	&$discardBCallback( $ai, $bi++, @_ );
	754	}
	755	}
	756
	757	# Match
	758	&$matchCallback( $ai++, $bi++, @_ );
	759	}
	760
	761	while ( $ai <= $lastA \|\| $bi <= $lastB )
	762	{
	763	if ( $ai <= $lastA && $bi <= $lastB )
	764	{
	765
	766	# Change
	767	if ( defined $changeCallback )
	768	{
	769	&$changeCallback( $ai++, $bi++, @_ );
	770	}
	771	else
	772	{
	773	&$discardACallback( $ai++, $bi, @_ );
	774	&$discardBCallback( $ai, $bi++, @_ );
	775	}
	776	}
	777	elsif ( $ai <= $lastA )
	778	{
	779	&$discardACallback( $ai++, $bi, @_ );
	780	}
	781	else
	782	{
	783
	784	# $bi <= $lastB
	785	&$discardBCallback( $ai, $bi++, @_ );
	786	}
	787	}
	788
	789	return 1;
	790	}
	791
	792	sub LCS
	793	{
	794	my $a = shift; # array ref
	795	my $matchVector = _longestCommonSubsequence( $a, @_ );
	796	my @retval;
	797	my $i;
	798	for ( $i = 0 ; $i <= $#$matchVector ; $i++ )
	799	{
	800	if ( defined( $matchVector->[$i] ) )
	801	{
	802	push ( @retval, $a->[$i] );
	803	}
	804	}
	805	return wantarray ? @retval : \@retval;
	806	}
	807
	808	sub diff
	809	{
	810	my $a = shift; # array ref
	811	my $b = shift; # array ref
	812	my $retval = [];
	813	my $hunk = [];
	814	my $discard = sub { push ( @$hunk, [ '-', $_[0], $a->[ $_[0] ] ] ) };
	815	my $add = sub { push ( @$hunk, [ '+', $_[1], $b->[ $_[1] ] ] ) };
	816	my $match = sub { push ( @$retval, $hunk ) if scalar(@$hunk); $hunk = [] };
	817	traverse_sequences( $a, $b,
	818	{ MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ );
	819	&$match();
	820	return wantarray ? @$retval : $retval;
	821	}
	822
	823	sub sdiff
	824	{
	825	my $a = shift; # array ref
	826	my $b = shift; # array ref
	827	my $retval = [];
	828	my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) };
	829	my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) };
	830	my $change = sub {
	831	push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] );
	832	};
	833	my $match = sub {
	834	push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] );
	835	};
	836	traverse_balanced(
	837	$a,
	838	$b,
	839	{
	840	MATCH => $match,
	841	DISCARD_A => $discard,
	842	DISCARD_B => $add,
	843	CHANGE => $change,
	844	},
	845	@_
	846	);
	847	return wantarray ? @$retval : $retval;
	848	}
	849
	850	1;