static char sccsid
[] = "@(#)diffreg.c 4.8 %G%";
* diff - compare two files.
* Uses an algorithm due to Harold Stone, which finds
* a pair of longest identical subsequences in the two
* The major goal is to generate the match vector J.
* J[i] is the index of the line in file1 corresponding
* to line i file0. J[i] = 0 if there is no
* Lines are hashed so as to work in core. All potential
* matches are located by sorting the lines of each file
* on the hash (called ``value''). In particular, this
* collects the equivalence classes in file1 together.
* Subroutine equiv replaces the value of each line in
* file0 by the index of the first element of its
* matching equivalence in (the reordered) file1.
* To save space equiv squeezes file1 into a single
* array member in which the equivalence classes
* are simply concatenated, except that their first
* members are flagged by changing sign.
* Next the indices that point into member are unsorted into
* array class according to the original order of file0.
* The cleverness lies in routine stone. This marches
* through the lines of file0, developing a vector klist
* of "k-candidates". At step i a k-candidate is a matched
* pair of lines x,y (x in file0 y in file1) such that
* there is a common subsequence of length k
* between the first i lines of file0 and the first y
* lines of file1, but there is no such subsequence for
* any smaller y. x is the earliest possible mate to y
* that occurs in such a subsequence.
* Whenever any of the members of the equivalence class of
* lines in file1 matable to a line in file0 has serial number
* less than the y of some k-candidate, that k-candidate
* with the smallest such y is replaced. The new
* k-candidate is chained (via pred) to the current
* k-1 candidate so that the actual subsequence can
* be recovered. When a member has serial number greater
* that the y of all k-candidates, the klist is extended.
* At the end, the longest subsequence is pulled out
* and placed in the array J by unravel
* With J in hand, the matches there recorded are
* check'ed against reality to assure that no spurious
* matches have crept in due to hashing. If they have,
* they are broken, and "jackpot" is recorded--a harmless
* matter except that a true match for a spuriously
* mated line may now be unnecessarily reported as a change.
* Much of the complexity of the program comes simply
* from trying to minimize core utilization and
* maximize the range of doable problems by dynamically
* allocating what is needed and reusing what is not.
* The core requirements for problems larger than somewhat
* are (in words) 2*length(file0) + length(file1) +
* 3*(number of k-candidates installed), typically about
* 6n words for files of length n.
#define prints(s) fputs(s,stdout)
struct line
*sfile
[2]; /* shortened by pruning common prefix and suffix */
int pref
, suff
; /* length of prefix and suffix */
int *class; /* will be overlaid on file[0] */
int *member
; /* will be overlaid on file[1] */
int *klist
; /* will be overlaid on file[0] after class */
struct cand
*clist
; /* merely a free storage pot for candidates */
int *J
; /* will be overlaid on class */
long *ixold
; /* will be overlaid on klist */
long *ixnew
; /* will be overlaid on file[1] */
char buf1
[BUFSIZ
], buf2
[BUFSIZ
];
fprintf(stderr
, "diff: ");
if ((stb1
.st_mode
& S_IFMT
) == S_IFDIR
)
file1
= splice(file1
, file2
);
else if ((stb2
.st_mode
& S_IFMT
) == S_IFDIR
)
file2
= splice(file2
, file1
);
else if (!strcmp(file1
, "-")) {
if (!strcmp(file2
, "-")) {
fprintf(stderr
, "diff: can't specify - -\n");
} else if (!strcmp(file2
, "-"))
if ((f1
= fopen(file1
, "r")) == NULL
) {
fprintf(stderr
, "diff: ");
if ((f2
= fopen(file2
, "r")) == NULL
) {
fprintf(stderr
, "diff: ");
if (stb1
.st_size
!= stb2
.st_size
)
i
= fread(buf1
, 1, BUFSIZ
, f1
);
j
= fread(buf2
, 1, BUFSIZ
, f2
);
if (i
< 0 || j
< 0 || i
!= j
)
status
= 0; /* files don't differ */
* Files certainly differ at this point; set status accordingly
if (!asciifile(f1
) || !asciifile(f2
)) {
printf("Binary files %s and %s differ\n", file1
, file2
);
equiv(sfile
[0], slen
[0], sfile
[1], slen
[1], member
);
member
= (int *)ralloc((char *)member
,(slen
[1]+2)*sizeof(int));
unsort(sfile
[0], slen
[0], class);
class = (int *)ralloc((char *)class,(slen
[0]+2)*sizeof(int));
klist
= (int *)talloc((slen
[0]+2)*sizeof(int));
clist
= (struct cand
*)talloc(sizeof(cand
));
i
= stone(class, slen
[0], member
, klist
);
J
= (int *)talloc((len
[0]+2)*sizeof(int));
ixold
= (long *)talloc((len
[0]+2)*sizeof(long));
ixnew
= (long *)talloc((len
[1]+2)*sizeof(long));
if (opt
== D_CONTEXT
&& anychange
== 0)
printf("No differences encountered\n");
tempfile
= mktemp("/tmp/dXXXXX");
f
= creat(tempfile
,0600);
fprintf(stderr
, "diff: ");
while ((i
= read(0,buf
,BUFSIZ
)) > 0)
if (write(f
,buf
,i
) != i
) {
fprintf(stderr
, "diff: ");
if (!strcmp(file
, "-")) {
fprintf(stderr
, "diff: can't specify - with other arg directory\n");
tail
= rindex(file
, '/');
sprintf(buf
, "%s/%s", dir
, tail
);
p
= (struct line
*)talloc(3*sizeof(line
));
for(j
=0; h
=readhash(fd
);) {
p
= (struct line
*)ralloc((char *)p
,(++j
+3)*sizeof(line
));
for(pref
=0;pref
<len
[0]&&pref
<len
[1]&&
file
[0][pref
+1].value
==file
[1][pref
+1].value
;
for(suff
=0;suff
<len
[0]-pref
&&suff
<len
[1]-pref
&&
file
[0][len
[0]-suff
].value
==file
[1][len
[1]-suff
].value
;
slen
[j
] = len
[j
]-pref
-suff
;
if(a
[i
].value
<b
[j
].value
)
else if(a
[i
].value
== b
[j
].value
)
while(b
[j
+1].value
== b
[j
].value
) {
c
[l
] = newcand(i
,y
,oldc
);
c
[l
] = newcand(i
,y
,oldc
);
clist
= (struct cand
*)ralloc((char *)clist
,++clen
*sizeof(cand
));
if(clist
[c
[k
]].y
<y
) /*quick look for typical case*/
i
>len
[0]-suff
? i
+len
[1]-len
[0]:
for(q
=clist
+p
;q
->y
!=0;q
=clist
+q
->pred
)
J
[q
->x
+pref
] = q
->y
+pref
;
/* check does double duty:
1. ferret out any fortuitous correspondences due
to confounding by hashing (which result in "jackpot")
2. collect random access indexes to the two files */
if ((input
[0] = fopen(file1
,"r")) == NULL
) {
if ((input
[1] = fopen(file2
,"r")) == NULL
) {
ixold
[i
] = ctold
+= skipline(0);
ixnew
[j
] = ctnew
+= skipline(1);
if(bflag
&& isspace(c
) && isspace(d
)) {
} while(isspace(c
=getc(input
[0])));
} while(isspace(d
=getc(input
[1])));
ixnew
[j
] = ctnew
+= skipline(1);
fprintf(stderr, "jackpot\n");
sort(a
,n
) /*shellsort CACM #201*/
register struct line
*aim
;
for(ai
= &a
[j
]; ai
> a
; ai
-= m
) {
if(aim
->value
> ai
[0].value
||
aim
->value
== ai
[0].value
&&
aim
->serial
> ai
[0].serial
)
ai
[0].value
= aim
->value
;
ai
[0].serial
= aim
->serial
;
a
= (int *)talloc((l
+1)*sizeof(int));
a
[f
[i
].serial
] = f
[i
].value
;
for(i
=1;(c
=getc(input
[f
]))!='\n';i
++)
input
[0] = fopen(file1
,"r");
input
[1] = fopen(file2
,"r");
if(opt
!=D_EDIT
) for(i0
=1;i0
<=m
;i0
=i1
+1) {
while(i0
<=m
&&J
[i0
]==J
[i0
-1]+1) i0
++;
while(i1
<m
&&J
[i1
+1]==0) i1
++;
} else for(i0
=m
;i0
>=1;i0
=i1
-1) {
while(i0
>=1&&J
[i0
]==J
[i0
+1]-1&&J
[i0
]!=0) i0
--;
while(i1
>1&&J
[i1
-1]==0) i1
--;
/* indicate that there is a difference between lines a and b of the from file
to get to lines c to d of the to file.
If a is greater then b then there are no lines in the from file involved
and this means that there were lines appended (beginning at b).
If c is greater than d then there are lines missing from the to file.
if (opt
!= D_IFDEF
&& a
>b
&& c
>d
)
printf("*** %s ", file1
);
ctime(&stbuf
.st_mtime
), file2
);
printf("%s", ctime(&stbuf
.st_mtime
));
ch
= (a
<= b
) ? 'd' : 'a';
lowa
= max(1, a
-context
);
upb
= min(len
[0], b
+context
);
lowc
= max(1, c
-context
);
upd
= min(len
[1], d
+context
);
/* print out from file */
printf("***************\n*** ");
fetch(ixold
,lowa
,upb
,input
[0]," ");
fetch(ixold
,lowa
,a
-1,input
[0]," ");
fetch(ixold
,a
,b
,input
[0],ch
== 'c' ? "! " : "- ");
fetch(ixold
,b
+1,upb
,input
[0]," ");
fetch(ixnew
,lowc
,upd
,input
[1]," ");
fetch(ixnew
,lowc
,c
-1,input
[1]," ");
fetch(ixnew
,c
,d
,input
[1],ch
== 'c' ? "! " : "+ ");
fetch(ixnew
,d
+1,upd
,input
[1]," ");
putchar(a
>b
?'a':c
>d
?'d':'c');
putchar(a
>b
?'a':c
>d
?'d':'c');
if(opt
== D_NORMAL
|| opt
== D_IFDEF
) {
fetch(ixold
,a
,b
,input
[0],"< ", 1);
if(a
<=b
&&c
<=d
&& opt
== D_NORMAL
)
fetch(ixnew
,c
,d
,input
[1],opt
==D_NORMAL
?"> ":"", 0);
if ((opt
==D_EDIT
|| opt
== D_REVERSE
) && c
<=d
)
fprintf(stdout
, "#endif %s\n", endifname
);
printf("%s%d", separator
, b
);
fetch(f
,a
,b
,lb
,s
,oldfile
)
int oneflag
= (*ifdef1
!='\0') != (*ifdef2
!='\0');
* When doing #ifdef's, copy down to current line
* if this is the first file, so that stuff makes it to output.
if (opt
== D_IFDEF
&& oldfile
){
/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
nc
= f
[a
>b
? b
: a
-1 ] - curpos
;
fprintf(stdout
, "#else %s%s\n", oneflag
&& oldfile
==1 ? "!" : "", ifdef2
);
/* There was only one ifdef given */
fprintf(stdout
, "#ifndef %s\n", endifname
);
fprintf(stdout
, "#ifdef %s\n", endifname
);
endifname
= oldfile
? ifdef1
: ifdef2
;
fprintf(stdout
, "#ifdef %s\n", endifname
);
if (inifdef
&& !wantelses
) {
fprintf(stdout
, "#endif %s\n", endifname
);
#define low(x) (x&((1L<<HALFLONG)-1))
#define high(x) (x>>HALFLONG)
* hashing has the effect of
* arranging line in 7-bit bytes and then
* summing 1-s complement in 16-bit hunks
if(!bflag
) for(shift
=0;(t
=getc(f
))!='\n';shift
+=7) {
sum
+= (long)t
<< (shift
&= HALFLONG
- 1);
sum
+= (long)t
<< (shift
&= HALFLONG
- 1);
sum
= low(sum
) + high(sum
);
return((short)low(sum
) + (short)high(sum
));
cnt
= fread(buf
, 1, BUFSIZ
, f
);
if (cnt
>= sizeof (struct exec
)) {
hdr
= *(struct exec
*)buf
;