/* diff - differential file comparison
* Uses an algorithm due to Harold Stone, which finds
* a pair of longest identical subsequences in the two
* The major goal is to generate the match vector J.
* J[i] is the index of the line in file1 corresponding
* to line i file0. J[i] = 0 if there is no
* Lines are hashed so as to work in core. All potential
* matches are located by sorting the lines of each file
* on the hash (called value\b\b\b\b\b_____). In particular, this
* collects the equivalence classes in file1 together.
* Subroutine equiv\b\b\b\b____ replaces the value of each line in
* file0 by the index of the first element of its
* matching equivalence in (the reordered) file1.
* To save space equiv\b\b\b\b\b_____ squeezes file1 into a single
* array member\b\b\b\b\b\b______ in which the equivalence classes
* are simply concatenated, except that their first
* members are flagged by changing sign.
* Next the indices that point into member\b\b\b\b\b\b______ are unsorted\b\b\b\b\b\b\b\b_______ into
* array class\b\b\b\b\b_____ according to the original order of file0.
* The cleverness lies in routine stone\b\b\b\b\b______. This marches
* through the lines of file0, developing a vector klist\b\b\b\b\b_____
* of "k-candidates". At step i a k-candidate is a matched
* pair of lines x,y (x in file0 y in file1) such that
* there is a common subsequence of lenght k
* between the first i lines of file0 and the first y
* lines of file1, but there is no such subsequence for
* any smaller y. x is the earliest possible mate to y
* that occurs in such a subsequence.
* Whenever any of the members of the equivalence class of
* lines in file1 matable to a line in file0 has serial number
* less than the y of some k-candidate, that k-candidate
* with the smallest such y is replaced. The new
* k-candidate is chained (via pred\b\b\b\b____) to the current
* k-1 candidate so that the actual subsequence can
* be recovered. When a member has serial number greater
* that the y of all k-candidates, the klist is extended.
* At the end, the longest subsequence is pulled out
* and placed in the array J by unravel\b\b\b\b\b\b\b_______.
* With J in hand, the matches there recorded are
* check\b\b\b\b\b_____ed against reality to assure that no spurious
* matches have crept in due to hashing. If they have,
* they are broken, and "jackpot " is recorded--a harmless
* matter except that a true match for a spuriously
* mated line may now be unnecessarily reported as a change.
* Much of the complexity of the program comes simply
* from trying to minimize core utilization and
* maximize the range of doable problems by dynamically
* allocating what is needed and reusing what is not.
* The core requirements for problems larger than somewhat
* are (in words) 2*length(file0) + length(file1) +
* 3*(number of k-candidates installed), typically about
* 6n words for files of length n. There is also space for buf1
* used which could, by moving data underfoot and reallocating
* buf1 together with buf2, be completely overlaid.
int *class; /*will be overlaid on file[0]*/
int *member
; /*will be overlaid on file[1]*/
struct cand
**klist
; /*will be overlaid on file[0] after class*/
int *J
; /*will be overlaid on class*/
int *ixold
; /*will be overlaid on klist*/
int *ixnew
; /*will be overlaid on file[1]*/
sort(a
,n
) /*shellsort CACM #201*/
register struct line
*aim
;
for(ai
= &a
[j
]; ai
> a
; ai
=- m
) {
if(aim
->value
> ai
[0].value
||
aim
->value
== ai
[0].value
&&
aim
->serial
> ai
[0].serial
)
ai
[0].value
= aim
->value
;
ai
[0].serial
= aim
->serial
;
a
= alloc((l
+1)*sizeof(a
[0]));
a
[f
[i
].serial
] = f
[i
].value
;
len
[i
] = (area
- temp
)/sizeof(line
) - 1;
if(fopen(arg
,buf1
) == -1) {
for(i
=0; h
=readhash(buf1
);) {
if(a
[i
].value
<b
[j
].value
)
else if(a
[i
].value
== b
[j
].value
)
while(b
[j
+1].value
== b
[j
].value
) {
if(argc
>1 && *argv
[1]=='-') {
buf1
= alloc(sizeof(*buf1
));
equiv(file
[0], len
[0], file
[1], len
[1], member
);
unsort(file
[0], len
[0], class);
klist
= &class[len
[0]+2];
area
= &member
[len
[1]+2];
k
= stone(class, len
[0], member
, klist
);
buf2
= alloc(sizeof(*buf2
));
c
[k
+1] = newcand(i
,y
,c
[k
]);
else if(c
[l
]->y
> y
&& c
[l
]->x
< i
)
c
[l
] = newcand(i
,y
,c
[l
-1]);
/* check does double duty:
1. ferret out any fortuitous correspondences due
to counfounding by hashing (which result in "jackpot")
2. collect random access indexes to the two files */
while(getc(buf1
)!='\n') ctold
++;
while(getc(buf2
)!='\n') ctnew
++;
while((c
=getc(buf1
))==(d
=getc(buf2
))) {
while(getc(buf2
)!='\n') ctnew
++;
buf1
->fdes
= open(argv
[1],0);
buf2
->fdes
= open(argv
[2],0);
if(dir
==0) for(i0
=1;i0
<=m
;i0
=i1
+1) {
while(i0
<=m
&&J
[i0
]==J
[i0
-1]+1) i0
++;
while(i1
<m
&&J
[i1
+1]==0) i1
++;
} else for(i0
=m
;i0
>=1;i0
=i1
-1) {
while(i0
>=1&&J
[i0
]==J
[i0
+1]-1&&J
[i0
]!=0) i0
--;
while(i1
>1&&J
[i1
-1]==0) i1
--;
change(1,0,1,len
[1],dir
);
putchar(a
>b
?'a':c
>d
?'d':'c');
fetch(ixold
,a
,b
,buf1
,"* ");
if(a
<=b
&&c
<=d
) printf("---\n");
fetch(ixnew
,c
,d
,buf2
,dir
==0?". ":"");
if(dir
!=0&&c
<=d
) printf(".\n");
nc
= read(lb
->fdes
,lb
->data
,f
[i
]-f
[i
-1]);