git.subgeniuskitty.com - OpenSPARC-T2-DV/.git/blame - tools/perl-5.8.0/man/man3/Algorithm::Diff.3

Commit	Line	Data
86530b38 AT	1	.\" Automatically generated by Pod::Man v1.34, Pod::Parser v1.13
	2	.\"
	3	.\" Standard preamble:
	4	.\" ========================================================================
	5	.de Sh \" Subsection heading
	6	.br
	7	.if t .Sp
	8	.ne 5
	9	.PP
	10	\fB\\$1\fR
	11	.PP
	12	..
	13	.de Sp \" Vertical space (when we can't use .PP)
	14	.if t .sp .5v
	15	.if n .sp
	16	..
	17	.de Vb \" Begin verbatim text
	18	.ft CW
	19	.nf
	20	.ne \\$1
	21	..
	22	.de Ve \" End verbatim text
	23	.ft R
	24	.fi
	25	..
	26	.\" Set up some character translations and predefined strings. \*(-- will
	27	.\" give an unbreakable dash, \(PI will give pi, \(L" will give a left
	28	.\" double quote, and \*(R" will give a right double quote. \| will give a
	29	.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
	30	.\" do unbreakable dashes and therefore won't be available. \(C` and \(C'
	31	.\" expand to `' in nroff, nothing in troff, for use with C<>.
	32	.tr \(W-\|\(bv\(Tr
	33	.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
	34	.ie n \{\
	35	. ds -- \(*W-
	36	. ds PI pi
	37	. if (\n(.H=4u)&(1m=24u) .ds -- \(W\h'-12u'\(W\h'-12u'-\" diablo 10 pitch
	38	. if (\n(.H=4u)&(1m=20u) .ds -- \(W\h'-12u'\(W\h'-8u'-\" diablo 12 pitch
	39	. ds L" ""
	40	. ds R" ""
	41	. ds C` ""
	42	. ds C' ""
	43	'br\}
	44	.el\{\
	45	. ds -- \\|\(em\\|
	46	. ds PI \(*p
	47	. ds L" ``
	48	. ds R" ''
	49	'br\}
	50	.\"
	51	.\" If the F register is turned on, we'll generate index entries on stderr for
	52	.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
	53	.\" entries marked with X<> in POD. Of course, you'll have to process the
	54	.\" output yourself in some meaningful fashion.
	55	.if \nF \{\
	56	. de IX
	57	. tm Index:\\$1\t\\n%\t"\\$2"
	58	..
	59	. nr % 0
	60	. rr F
	61	.\}
	62	.\"
	63	.\" For nroff, turn off justification. Always turn off hyphenation; it makes
	64	.\" way too many mistakes in technical documents.
65	.hy 0
66	.if n .na
67	.\"
68	.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
69	.\" Fear. Run. Save yourself. No user-serviceable parts.
70	. \" fudge factors for nroff and troff
71	.if n \{\
72	. ds #H 0
73	. ds #V .8m
74	. ds #F .3m
75	. ds #[ \f1
76	. ds #] \fP
77	.\}
78	.if t \{\
79	. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
80	. ds #V .6m
81	. ds #F 0
82	. ds #[ \&
83	. ds #] \&
84	.\}
85	. \" simple accents for nroff and troff
86	.if n \{\
87	. ds ' \&
88	. ds ` \&
89	. ds ^ \&
90	. ds , \&
91	. ds ~ ~
92	. ds /
93	.\}
94	.if t \{\
95	. ds ' \\k:\h'-(\\n(.wu8/10-\(#H)'\'\h"\|\\n:u"
96	. ds ` \\k:\h'-(\\n(.wu8/10-\(#H)'\`\h'\|\\n:u'
97	. ds ^ \\k:\h'-(\\n(.wu10/11-\(#H)'^\h'\|\\n:u'
98	. ds , \\k:\h'-(\\n(.wu*8/10)',\h'\|\\n:u'
99	. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'\|\\n:u'
100	. ds / \\k:\h'-(\\n(.wu8/10-\(#H)'\z\(sl\h'\|\\n:u'
101	.\}
102	. \" troff and (daisy-wheel) nroff accents
103	.ds : \\k:\h'-(\\n(.wu8/10-\(#H+.1m+\(#F)'\v'-\(#V'\z.\h'.2m+\(#F'.\h'\|\\n:u'\v'\(#V'
104	.ds 8 \h'\(#H'\(b\h'-\*(#H'
105	.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\(#H)/2u'\v'-.3n'\(#[\z\(de\v'.3n'\h'\|\\n:u'\*(#]
106	.ds d- \h'\(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\(#H'
107	.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'\|\\n:u'
108	.ds th \(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u2/3)'\s-1o\s+1\*(#]
109	.ds Th \(#[\s+2I\s-2\h'-\w'I'u3/5'\v'-.3m'o\v'.3m'\*(#]
110	.ds ae a\h'-(\w'a'u*4/10)'e
111	.ds Ae A\h'-(\w'A'u*4/10)'E
112	. \" corrections for vroff
113	.if v .ds ~ \\k:\h'-(\\n(.wu9/10-\(#H)'\s-2\u~\d\s+2\h'\|\\n:u'
114	.if v .ds ^ \\k:\h'-(\\n(.wu10/11-\(#H)'\v'-.4m'^\v'.4m'\h'\|\\n:u'
115	. \" for low resolution devices (crt and lpr)
116	.if \n(.H>23 .if \n(.V>19 \
117	\{\
118	. ds : e
119	. ds 8 ss
120	. ds o a
121	. ds d- d\h'-1'\(ga
122	. ds D- D\h'-1'\(hy
123	. ds th \o'bp'
124	. ds Th \o'LP'
125	. ds ae ae
126	. ds Ae AE
127	.\}
128	.rm #[ #] #H #V #F C
129	.\" ========================================================================
130	.\"
131	.IX Title "Algorithm::Diff 3"
132	.TH Algorithm::Diff 3 "2002-03-24" "perl v5.8.0" "User Contributed Perl Documentation"
133	.SH "NAME"
134	Algorithm::Diff \- Compute `intelligent' differences between two files / lists
135	.SH "SYNOPSIS"
136	.IX Header "SYNOPSIS"
137	.Vb 2
138	\& use Algorithm::Diff qw(diff sdiff LCS traverse_sequences
139	\& traverse_balanced);
140	.Ve
141	.PP
142	.Vb 1
143	\& @lcs = LCS( \e@seq1, \e@seq2 );
144	.Ve
145	.PP
146	.Vb 1
147	\& @lcs = LCS( \e@seq1, \e@seq2, $key_generation_function );
148	.Ve
149	.PP
150	.Vb 1
151	\& $lcsref = LCS( \e@seq1, \e@seq2 );
152	.Ve
153	.PP
154	.Vb 1
155	\& $lcsref = LCS( \e@seq1, \e@seq2, $key_generation_function );
156	.Ve
157	.PP
158	.Vb 1
159	\& @diffs = diff( \e@seq1, \e@seq2 );
160	.Ve
161	.PP
162	.Vb 1
163	\& @diffs = diff( \e@seq1, \e@seq2, $key_generation_function );
164	.Ve
165	.PP
166	.Vb 1
167	\& @sdiffs = sdiff( \e@seq1, \e@seq2 );
168	.Ve
169	.PP
170	.Vb 1
171	\& @sdiffs = sdiff( \e@seq1, \e@seq2, $key_generation_function );
172	.Ve
173	.PP
174	.Vb 5
175	\& traverse_sequences( \e@seq1, \e@seq2,
176	\& { MATCH => $callback,
177	\& DISCARD_A => $callback,
178	\& DISCARD_B => $callback,
179	\& } );
180	.Ve
181	.PP
182	.Vb 6
183	\& traverse_sequences( \e@seq1, \e@seq2,
184	\& { MATCH => $callback,
185	\& DISCARD_A => $callback,
186	\& DISCARD_B => $callback,
187	\& },
188	\& $key_generation_function );
189	.Ve
190	.PP
191	.Vb 6
192	\& traverse_balanced( \e@seq1, \e@seq2,
193	\& { MATCH => $callback,
194	\& DISCARD_A => $callback,
195	\& DISCARD_B => $callback,
196	\& CHANGE => $callback,
197	\& } );
198	.Ve
199	.SH "INTRODUCTION"
200	.IX Header "INTRODUCTION"
201	(by Mark-Jason Dominus)
202	.PP
203	I once read an article written by the authors of \f(CW\(C`diff\(C'\fR; they said
204	that they hard worked very hard on the algorithm until they found the
205	right one.
206	.PP
207	I think what they ended up using (and I hope someone will correct me,
208	because I am not very confident about this) was the `longest common
209	subsequence' method. in the \s-1LCS\s0 problem, you have two sequences of
210	items:
211	.PP
212	.Vb 1
213	\& a b c d f g h j q z
214	.Ve
215	.PP
216	.Vb 1
217	\& a b c d e f g i j k r x y z
218	.Ve
219	.PP
220	and you want to find the longest sequence of items that is present in
221	both original sequences in the same order. That is, you want to find
222	a new sequence \fIS\fR which can be obtained from the first sequence by
223	deleting some items, and from the secend sequence by deleting other
224	items. You also want \fIS\fR to be as long as possible. In this case
225	\&\fIS\fR is
226	.PP
227	.Vb 1
228	\& a b c d f g j z
229	.Ve
230	.PP
231	From there it's only a small step to get diff-like output:
232	.PP
233	.Vb 2
234	\& e h i k q r x y
235	\& + - + + - + + +
236	.Ve
237	.PP
238	This module solves the \s-1LCS\s0 problem. It also includes a canned
239	function to generate \f(CW\(C`diff\(C'\fR\-like output.
240	.PP
241	It might seem from the example above that the \s-1LCS\s0 of two sequences is
242	always pretty obvious, but that's not always the case, especially when
243	the two sequences have many repeated elements. For example, consider
244	.PP
245	.Vb 2
246	\& a x b y c z p d q
247	\& a b c a x b y c z
248	.Ve
249	.PP
250	A naive approach might start by matching up the \f(CW\(C`a\(C'\fR and \f(CW\(C`b\(C'\fR that
251	appear at the beginning of each sequence, like this:
252	.PP
253	.Vb 2
254	\& a x b y c z p d q
255	\& a b c a b y c z
256	.Ve
257	.PP
258	This finds the common subsequence \f(CW\(C`a b c z\(C'\fR. But actually, the \s-1LCS\s0
259	is \f(CW\(C`a x b y c z\(C'\fR:
260	.PP
261	.Vb 2
262	\& a x b y c z p d q
263	\& a b c a x b y c z
264	.Ve
265	.SH "USAGE"
266	.IX Header "USAGE"
267	This module provides three exportable functions, which we'll deal with in
268	ascending order of difficulty: \f(CW\(C`LCS\(C'\fR,
269	\&\f(CW\(C`diff\(C'\fR, \f(CW\(C`sdiff\(C'\fR, \f(CW\(C`traverse_sequences\(C'\fR, and \f(CW\(C`traverse_balanced\(C'\fR.
270	.ie n .Sh """LCS"""
271	.el .Sh "\f(CWLCS\fP"
272	.IX Subsection "LCS"
273	Given references to two lists of items, \s-1LCS\s0 returns an array containing their
274	longest common subsequence. In scalar context, it returns a reference to
275	such a list.
276	.PP
277	.Vb 2
278	\& @lcs = LCS( \e@seq1, \e@seq2 );
279	\& $lcsref = LCS( \e@seq1, \e@seq2 );
280	.Ve
281	.PP
282	\&\f(CW\(C`LCS\(C'\fR may be passed an optional third parameter; this is a \s-1CODE\s0
283	reference to a key generation function. See \(L"\s-1KEY\s0 \s-1GENERATION\s0 \s-1FUNCTIONS\s0\(R".
284	.PP
285	.Vb 2
286	\& @lcs = LCS( \e@seq1, \e@seq2, $keyGen );
287	\& $lcsref = LCS( \e@seq1, \e@seq2, $keyGen );
288	.Ve
289	.PP
290	Additional parameters, if any, will be passed to the key generation
291	routine.
292	.ie n .Sh """diff"""
293	.el .Sh "\f(CWdiff\fP"
294	.IX Subsection "diff"
295	.Vb 2
296	\& @diffs = diff( \e@seq1, \e@seq2 );
297	\& $diffs_ref = diff( \e@seq1, \e@seq2 );
298	.Ve
299	.PP
300	\&\f(CW\(C`diff\(C'\fR computes the smallest set of additions and deletions necessary
301	to turn the first sequence into the second, and returns a description
302	of these changes. The description is a list of \fIhunks\fR; each hunk
303	represents a contiguous section of items which should be added,
304	deleted, or replaced. The return value of \f(CW\(C`diff\(C'\fR is a list of
305	hunks, or, in scalar context, a reference to such a list.
306	.PP
307	Here is an example: The diff of the following two sequences:
308	.PP
309	.Vb 2
310	\& a b c e h j l m n p
311	\& b c d e f j k l m r s t
312	.Ve
313	.PP
314	Result:
315	.PP
316	.Vb 2
317	\& [
318	\& [ [ '-', 0, 'a' ] ],
319	.Ve
320	.PP
321	.Vb 1
322	\& [ [ '+', 2, 'd' ] ],
323	.Ve
324	.PP
325	.Vb 2
326	\& [ [ '-', 4, 'h' ] ,
327	\& [ '+', 4, 'f' ] ],
328	.Ve
329	.PP
330	.Vb 1
331	\& [ [ '+', 6, 'k' ] ],
332	.Ve
333	.PP
334	.Vb 7
335	\& [ [ '-', 8, 'n' ],
336	\& [ '-', 9, 'p' ],
337	\& [ '+', 9, 'r' ],
338	\& [ '+', 10, 's' ],
339	\& [ '+', 11, 't' ],
340	\& ]
341	\& ]
342	.Ve
343	.PP
344	There are five hunks here. The first hunk says that the \f(CW\(C`a\(C'\fR at
345	position 0 of the first sequence should be deleted (\f(CW\(C`\-\(C'\fR). The second
346	hunk says that the \f(CW\(C`d\(C'\fR at position 2 of the second sequence should
347	be inserted (\f(CW\(C`+\(C'\fR). The third hunk says that the \f(CW\(C`h\(C'\fR at position 4
348	of the first sequence should be removed and replaced with the \f(CW\(C`f\(C'\fR
349	from position 4 of the second sequence. The other two hunks similarly.
350	.PP
351	\&\f(CW\(C`diff\(C'\fR may be passed an optional third parameter; this is a \s-1CODE\s0
352	reference to a key generation function. See \(L"\s-1KEY\s0 \s-1GENERATION\s0 \s-1FUNCTIONS\s0\(R".
353	.PP
354	Additional parameters, if any, will be passed to the key generation
355	routine.
356	.ie n .Sh """sdiff"""
357	.el .Sh "\f(CWsdiff\fP"
358	.IX Subsection "sdiff"
359	.Vb 2
360	\& @sdiffs = sdiff( \e@seq1, \e@seq2 );
361	\& $sdiffs_ref = sdiff( \e@seq1, \e@seq2 );
362	.Ve
363	.PP
364	\&\f(CW\(C`sdiff\(C'\fR computes all necessary components to show two sequences
365	and their minimized differences side by side, just like the
366	Unix-utility \fIsdiff\fR does:
367	.PP
368	.Vb 4
369	\& same same
370	\& before \| after
371	\& old < -
372	\& - > new
373	.Ve
374	.PP
375	It returns a list of array refs, each pointing to an array of
376	display instructions. In scalar context it returns a reference
377	to such a list.
378	.PP
379	Display instructions consist of three elements: A modifier indicator
380	(\f(CW\(C`+\(C'\fR: Element added, \f(CW\(C`\-\(C'\fR: Element removed, \f(CW\(C`u\(C'\fR: Element unmodified,
381	\&\f(CW\(C`c\(C'\fR: Element changed) and the value of the old and new elements, to
382	be displayed side by side.
383	.PP
384	An \f(CW\(C`sdiff\(C'\fR of the following two sequences:
385	.PP
386	.Vb 2
387	\& a b c e h j l m n p
388	\& b c d e f j k l m r s t
389	.Ve
390	.PP
391	results in
392	.PP
393	[ [ '\-', 'a', '' ],
394	[ 'u', 'b', 'b' ],
395	[ 'u', 'c', 'c' ],
396	[ '+', '', 'd' ],
397	[ 'u', 'e', 'e' ],
398	[ 'c', 'h', 'f' ],
399	[ 'u', 'j', 'j' ],
400	[ '+', '', 'k' ],
401	[ 'u', 'l', 'l' ],
402	[ 'u', 'm', 'm' ],
403	[ 'c', 'n', 'r' ],
404	[ 'c', 'p', 's' ],
405	[ '+', '', 't' ] ]
406	.PP
407	\&\f(CW\(C`sdiff\(C'\fR may be passed an optional third parameter; this is a \s-1CODE\s0
408	reference to a key generation function. See \(L"\s-1KEY\s0 \s-1GENERATION\s0 \s-1FUNCTIONS\s0\(R".
409	.PP
410	Additional parameters, if any, will be passed to the key generation
411	routine.
412	.ie n .Sh """traverse_sequences"""
413	.el .Sh "\f(CWtraverse_sequences\fP"
414	.IX Subsection "traverse_sequences"
415	\&\f(CW\(C`traverse_sequences\(C'\fR is the most general facility provided by this
416	module; \f(CW\(C`diff\(C'\fR and \f(CW\(C`LCS\(C'\fR are implemented as calls to it.
417	.PP
418	Imagine that there are two arrows. Arrow A points to an element of sequence A,
419	and arrow B points to an element of the sequence B. Initially, the arrows
420	point to the first elements of the respective sequences. \f(CW\(C`traverse_sequences\(C'\fR
421	will advance the arrows through the sequences one element at a time, calling an
422	appropriate user-specified callback function before each advance. It
423	willadvance the arrows in such a way that if there are equal elements \f(CW$A[$i]\fR
424	and \f(CW$B[$j]\fR which are equal and which are part of the \s-1LCS\s0, there will be
425	some moment during the execution of \f(CW\(C`traverse_sequences\(C'\fR when arrow A is
426	pointing to \f(CW$A[$i]\fR and arrow B is pointing to \f(CW$B[$j]\fR. When this happens,
427	\&\f(CW\(C`traverse_sequences\(C'\fR will call the \f(CW\(C`MATCH\(C'\fR callback function and then it will
428	advance both arrows.
429	.PP
430	Otherwise, one of the arrows is pointing to an element of its sequence that is
431	not part of the \s-1LCS\s0. \f(CW\(C`traverse_sequences\(C'\fR will advance that arrow and will
432	call the \f(CW\(C`DISCARD_A\(C'\fR or the \f(CW\(C`DISCARD_B\(C'\fR callback, depending on which arrow it
433	advanced. If both arrows point to elements that are not part of the \s-1LCS\s0, then
434	\&\f(CW\(C`traverse_sequences\(C'\fR will advance one of them and call the appropriate
435	callback, but it is not specified which it will call.
436	.PP
437	The arguments to \f(CW\(C`traverse_sequences\(C'\fR are the two sequences to traverse, and a
438	hash which specifies the callback functions, like this:
439	.PP
440	.Vb 5
441	\& traverse_sequences( \e@seq1, \e@seq2,
442	\& { MATCH => $callback_1,
443	\& DISCARD_A => $callback_2,
444	\& DISCARD_B => $callback_3,
445	\& } );
446	.Ve
447	.PP
448	Callbacks for \s-1MATCH\s0, \s-1DISCARD_A\s0, and \s-1DISCARD_B\s0 are invoked with at least the
449	indices of the two arrows as their arguments. They are not expected to return
450	any values. If a callback is omitted from the table, it is not called.
451	.PP
452	Callbacks for A_FINISHED and B_FINISHED are invoked with at least the
453	corresponding index in A or B.
454	.PP
455	If arrow A reaches the end of its sequence, before arrow B does,
456	\&\f(CW\(C`traverse_sequences\(C'\fR will call the \f(CW\(C`A_FINISHED\(C'\fR callback when it advances
457	arrow B, if there is such a function; if not it will call \f(CW\(C`DISCARD_B\(C'\fR instead.
458	Similarly if arrow B finishes first. \f(CW\(C`traverse_sequences\(C'\fR returns when both
459	arrows are at the ends of their respective sequences. It returns true on
460	success and false on failure. At present there is no way to fail.
461	.PP
462	\&\f(CW\(C`traverse_sequences\(C'\fR may be passed an optional fourth parameter; this is a
463	\&\s-1CODE\s0 reference to a key generation function. See \(L"\s-1KEY\s0 \s-1GENERATION\s0 \s-1FUNCTIONS\s0\(R".
464	.PP
465	Additional parameters, if any, will be passed to the key generation function.
466	.ie n .Sh """traverse_balanced"""
467	.el .Sh "\f(CWtraverse_balanced\fP"
468	.IX Subsection "traverse_balanced"
469	\&\f(CW\(C`traverse_balanced\(C'\fR is an alternative to \f(CW\(C`traverse_sequences\(C'\fR. It
470	uses a different algorithm to iterate through the entries in the
471	computed \s-1LCS\s0. Instead of sticking to one side and showing element changes
472	as insertions and deletions only, it will jump back and forth between
473	the two sequences and report \fIchanges\fR occurring as deletions on one
474	side followed immediatly by an insertion on the other side.
475	.PP
476	In addition to the
477	\&\f(CW\(C`DISCARD_A\(C'\fR,
478	\&\f(CW\(C`DISCARD_B\(C'\fR, and
479	\&\f(CW\(C`MATCH\(C'\fR
480	callbacks supported by \f(CW\(C`traverse_sequences\(C'\fR, \f(CW\(C`traverse_balanced\(C'\fR supports
481	a \f(CW\(C`CHANGE\(C'\fR callback indicating that one element got \f(CW\(C`replaced\(C'\fR by another:
482	.PP
483	.Vb 6
484	\& traverse_sequences( \e@seq1, \e@seq2,
485	\& { MATCH => $callback_1,
486	\& DISCARD_A => $callback_2,
487	\& DISCARD_B => $callback_3,
488	\& CHANGE => $callback_4,
489	\& } );
490	.Ve
491	.PP
492	If no \f(CW\(C`CHANGE\(C'\fR callback is specified, \f(CW\(C`traverse_balanced\(C'\fR
493	will map \f(CW\(C`CHANGE\(C'\fR events to \f(CW\(C`DISCARD_A\(C'\fR and \f(CW\(C`DISCARD_B\(C'\fR actions,
494	therefore resulting in a similar behaviour as \f(CW\(C`traverse_sequences\(C'\fR
495	with different order of events.
496	.PP
497	\&\f(CW\(C`traverse_balanced\(C'\fR might be a bit slower than \f(CW\(C`traverse_sequences\(C'\fR,
498	noticable only while processing huge amounts of data.
499	.PP
500	The \f(CW\(C`sdiff\(C'\fR function of this module
501	is implemented as call to \f(CW\(C`traverse_balanced\(C'\fR.
502	.SH "KEY GENERATION FUNCTIONS"
503	.IX Header "KEY GENERATION FUNCTIONS"
504	\&\f(CW\(C`diff\(C'\fR, \f(CW\(C`LCS\(C'\fR, and \f(CW\(C`traverse_sequences\(C'\fR accept an optional last parameter.
505	This is a \s-1CODE\s0 reference to a key generating (hashing) function that should
506	return a string that uniquely identifies a given element. It should be the
507	case that if two elements are to be considered equal, their keys should be the
508	same (and the other way around). If no key generation function is provided,
509	the key will be the element as a string.
510	.PP
511	By default, comparisons will use \(L"eq\(R" and elements will be turned into keys
512	using the default stringizing operator '""'.
513	.PP
514	Where this is important is when you're comparing something other than strings.
515	If it is the case that you have multiple different objects that should be
516	considered to be equal, you should supply a key generation function. Otherwise,
517	you have to make sure that your arrays contain unique references.
518	.PP
519	For instance, consider this example:
520	.PP
521	.Vb 1
522	\& package Person;
523	.Ve
524	.PP
525	.Vb 5
526	\& sub new
527	\& {
528	\& my $package = shift;
529	\& return bless { name => '', ssn => '', @_ }, $package;
530	\& }
531	.Ve
532	.PP
533	.Vb 5
534	\& sub clone
535	\& {
536	\& my $old = shift;
537	\& my $new = bless { %$old }, ref($old);
538	\& }
539	.Ve
540	.PP
541	.Vb 4
542	\& sub hash
543	\& {
544	\& return shift()->{'ssn'};
545	\& }
546	.Ve
547	.PP
548	.Vb 5
549	\& my $person1 = Person->new( name => 'Joe', ssn => '123-45-6789' );
550	\& my $person2 = Person->new( name => 'Mary', ssn => '123-47-0000' );
551	\& my $person3 = Person->new( name => 'Pete', ssn => '999-45-2222' );
552	\& my $person4 = Person->new( name => 'Peggy', ssn => '123-45-9999' );
553	\& my $person5 = Person->new( name => 'Frank', ssn => '000-45-9999' );
554	.Ve
555	.PP
556	If you did this:
557	.PP
558	.Vb 3
559	\& my $array1 = [ $person1, $person2, $person4 ];
560	\& my $array2 = [ $person1, $person3, $person4, $person5 ];
561	\& Algorithm::Diff::diff( $array1, $array2 );
562	.Ve
563	.PP
564	everything would work out \s-1OK\s0 (each of the objects would be converted
565	into a string like \(L"Person=HASH(0x82425b0)\(R" for comparison).
566	.PP
567	But if you did this:
568	.PP
569	.Vb 3
570	\& my $array1 = [ $person1, $person2, $person4 ];
571	\& my $array2 = [ $person1, $person3, $person4->clone(), $person5 ];
572	\& Algorithm::Diff::diff( $array1, $array2 );
573	.Ve
574	.PP
575	$person4 and \f(CW$person4\fR\->\fIclone()\fR (which have the same name and \s-1SSN\s0)
576	would be seen as different objects. If you wanted them to be considered
577	equivalent, you would have to pass in a key generation function:
578	.PP
579	.Vb 3
580	\& my $array1 = [ $person1, $person2, $person4 ];
581	\& my $array2 = [ $person1, $person3, $person4->clone(), $person5 ];
582	\& Algorithm::Diff::diff( $array1, $array2, \e&Person::hash );
583	.Ve
584	.PP
585	This would use the 'ssn' field in each Person as a comparison key, and
586	so would consider \f(CW$person4\fR and \f(CW$person4\fR\->\fIclone()\fR as equal.
587	.PP
588	You may also pass additional parameters to the key generation function
589	if you wish.
590	.SH "AUTHOR"
591	.IX Header "AUTHOR"
592	This version by Ned Konz, perl@bike\-nomad.com
593	.SH "LICENSE"
594	.IX Header "LICENSE"
595	Copyright (c) 2000\-2002 Ned Konz. All rights reserved.
596	This program is free software;
597	you can redistribute it and/or modify it under the same terms
598	as Perl itself.
599	.SH "CREDITS"
600	.IX Header "CREDITS"
601	Versions through 0.59 (and much of this documentation) were written by:
602	.PP
603	Mark-Jason Dominus, mjd\-perl\-diff@plover.com
604	.PP
605	This version borrows the documentation and names of the routines
606	from Mark\-Jason's, but has all new code in Diff.pm.
607	.PP
608	This code was adapted from the Smalltalk code of
609	Mario Wolczko <mario@wolczko.com>, which is available at
610	ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st
611	.PP
612	\&\f(CW\(C`sdiff\(C'\fR and \f(CW\(C`traverse_balanced\(C'\fR were written by Mike Schilli
613	<m@perlmeister.com>.
614	.PP
615	The algorithm is that described in
616	\&\fIA Fast Algorithm for Computing Longest Common Subsequences\fR,
617	\&\s-1CACM\s0, vol.20, no.5, pp.350\-353, May 1977, with a few
618	minor improvements to improve the speed.