static char sccsid
[] = "@(#)diffreg.c 4.19 %G%";
* diff - compare two files.
* Uses an algorithm due to Harold Stone, which finds
* a pair of longest identical subsequences in the two
* The major goal is to generate the match vector J.
* J[i] is the index of the line in file1 corresponding
* to line i file0. J[i] = 0 if there is no
* Lines are hashed so as to work in core. All potential
* matches are located by sorting the lines of each file
* on the hash (called ``value''). In particular, this
* collects the equivalence classes in file1 together.
* Subroutine equiv replaces the value of each line in
* file0 by the index of the first element of its
* matching equivalence in (the reordered) file1.
* To save space equiv squeezes file1 into a single
* array member in which the equivalence classes
* are simply concatenated, except that their first
* members are flagged by changing sign.
* Next the indices that point into member are unsorted into
* array class according to the original order of file0.
* The cleverness lies in routine stone. This marches
* through the lines of file0, developing a vector klist
* of "k-candidates". At step i a k-candidate is a matched
* pair of lines x,y (x in file0 y in file1) such that
* there is a common subsequence of length k
* between the first i lines of file0 and the first y
* lines of file1, but there is no such subsequence for
* any smaller y. x is the earliest possible mate to y
* that occurs in such a subsequence.
* Whenever any of the members of the equivalence class of
* lines in file1 matable to a line in file0 has serial number
* less than the y of some k-candidate, that k-candidate
* with the smallest such y is replaced. The new
* k-candidate is chained (via pred) to the current
* k-1 candidate so that the actual subsequence can
* be recovered. When a member has serial number greater
* that the y of all k-candidates, the klist is extended.
* At the end, the longest subsequence is pulled out
* and placed in the array J by unravel
* With J in hand, the matches there recorded are
* check'ed against reality to assure that no spurious
* matches have crept in due to hashing. If they have,
* they are broken, and "jackpot" is recorded--a harmless
* matter except that a true match for a spuriously
* mated line may now be unnecessarily reported as a change.
* Much of the complexity of the program comes simply
* from trying to minimize core utilization and
* maximize the range of doable problems by dynamically
* allocating what is needed and reusing what is not.
* The core requirements for problems larger than somewhat
* are (in words) 2*length(file0) + length(file1) +
* 3*(number of k-candidates installed), typically about
* 6n words for files of length n.
#define prints(s) fputs(s,stdout)
struct line
*sfile
[2]; /* shortened by pruning common prefix and suffix */
int pref
, suff
; /* length of prefix and suffix */
int *class; /* will be overlaid on file[0] */
int *member
; /* will be overlaid on file[1] */
int *klist
; /* will be overlaid on file[0] after class */
struct cand
*clist
; /* merely a free storage pot for candidates */
int *J
; /* will be overlaid on class */
long *ixold
; /* will be overlaid on klist */
long *ixnew
; /* will be overlaid on file[1] */
char *chrtran
; /* translation table for case-folding */
/* chrtran points to one of 2 translation tables:
* cup2low if folding upper to lower case
* clow2low if not folding case
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
char buf1
[BUFSIZ
], buf2
[BUFSIZ
];
fprintf(stderr
, "diff: ");
chrtran
= (iflag
? cup2low
: clow2low
);
if ((stb1
.st_mode
& S_IFMT
) == S_IFDIR
) {
file1
= splice(file1
, file2
);
if (stat(file1
, &stb1
) < 0) {
fprintf(stderr
, "diff: ");
} else if ((stb2
.st_mode
& S_IFMT
) == S_IFDIR
) {
file2
= splice(file2
, file1
);
if (stat(file2
, &stb2
) < 0) {
fprintf(stderr
, "diff: ");
} else if (!strcmp(file1
, "-")) {
if (!strcmp(file2
, "-")) {
fprintf(stderr
, "diff: can't specify - -\n");
if (stat(file1
, &stb1
) < 0) {
fprintf(stderr
, "diff: ");
} else if (!strcmp(file2
, "-")) {
if (stat(file2
, &stb2
) < 0) {
fprintf(stderr
, "diff: ");
if ((f1
= fopen(file1
, "r")) == NULL
) {
fprintf(stderr
, "diff: ");
if ((f2
= fopen(file2
, "r")) == NULL
) {
fprintf(stderr
, "diff: ");
if (stb1
.st_size
!= stb2
.st_size
)
i
= fread(buf1
, 1, BUFSIZ
, f1
);
j
= fread(buf2
, 1, BUFSIZ
, f2
);
if (i
< 0 || j
< 0 || i
!= j
)
status
= 0; /* files don't differ */
* Files certainly differ at this point; set status accordingly
if (!asciifile(f1
) || !asciifile(f2
)) {
printf("Binary files %s and %s differ\n", file1
, file2
);
equiv(sfile
[0], slen
[0], sfile
[1], slen
[1], member
);
member
= (int *)ralloc((char *)member
,(slen
[1]+2)*sizeof(int));
unsort(sfile
[0], slen
[0], class);
class = (int *)ralloc((char *)class,(slen
[0]+2)*sizeof(int));
klist
= (int *)talloc((slen
[0]+2)*sizeof(int));
clist
= (struct cand
*)talloc(sizeof(cand
));
i
= stone(class, slen
[0], member
, klist
);
J
= (int *)talloc((len
[0]+2)*sizeof(int));
ixold
= (long *)talloc((len
[0]+2)*sizeof(long));
ixnew
= (long *)talloc((len
[1]+2)*sizeof(long));
if (opt
== D_CONTEXT
&& anychange
== 0)
printf("No differences encountered\n");
char *tempfile
= "/tmp/dXXXXX";
f
= creat(tempfile
,0600);
fprintf(stderr
, "diff: ");
while ((i
= read(0,buf
,BUFSIZ
)) > 0)
if (write(f
,buf
,i
) != i
) {
fprintf(stderr
, "diff: ");
if (!strcmp(file
, "-")) {
fprintf(stderr
, "diff: can't specify - with other arg directory\n");
tail
= rindex(file
, '/');
(void)sprintf(buf
, "%s/%s", dir
, tail
);
p
= (struct line
*)talloc(3*sizeof(line
));
for(j
=0; h
=readhash(fd
);) {
p
= (struct line
*)ralloc((char *)p
,(++j
+3)*sizeof(line
));
for(pref
=0;pref
<len
[0]&&pref
<len
[1]&&
file
[0][pref
+1].value
==file
[1][pref
+1].value
;
for(suff
=0;suff
<len
[0]-pref
&&suff
<len
[1]-pref
&&
file
[0][len
[0]-suff
].value
==file
[1][len
[1]-suff
].value
;
slen
[j
] = len
[j
]-pref
-suff
;
if(a
[i
].value
<b
[j
].value
)
else if(a
[i
].value
== b
[j
].value
)
while(b
[j
+1].value
== b
[j
].value
) {
c
[l
] = newcand(i
,y
,oldc
);
c
[l
] = newcand(i
,y
,oldc
);
clist
= (struct cand
*)ralloc((char *)clist
,++clen
*sizeof(cand
));
if(clist
[c
[k
]].y
<y
) /*quick look for typical case*/
i
>len
[0]-suff
? i
+len
[1]-len
[0]:
for(q
=clist
+p
;q
->y
!=0;q
=clist
+q
->pred
)
J
[q
->x
+pref
] = q
->y
+pref
;
/* check does double duty:
1. ferret out any fortuitous correspondences due
to confounding by hashing (which result in "jackpot")
2. collect random access indexes to the two files */
if ((input
[0] = fopen(file1
,"r")) == NULL
) {
if ((input
[1] = fopen(file2
,"r")) == NULL
) {
ixold
[i
] = ctold
+= skipline(0);
ixnew
[j
] = ctnew
+= skipline(1);
if(bflag
|| wflag
|| iflag
) {
if(bflag
&& isspace(c
) && isspace(d
)) {
} while(isspace(c
=getc(input
[0])));
} while(isspace(d
=getc(input
[1])));
while( isspace(c
) && c
!='\n' ) {
while( isspace(d
) && d
!='\n' ) {
if(chrtran
[c
] != chrtran
[d
]) {
if((c
=getc(input
[0])) != (d
=getc(input
[1]))) {
ixnew
[j
] = ctnew
+= skipline(1);
fprintf(stderr, "jackpot\n");
sort(a
,n
) /*shellsort CACM #201*/
register struct line
*aim
;
for(ai
= &a
[j
]; ai
> a
; ai
-= m
) {
if(aim
->value
> ai
[0].value
||
aim
->value
== ai
[0].value
&&
aim
->serial
> ai
[0].serial
)
ai
[0].value
= aim
->value
;
ai
[0].serial
= aim
->serial
;
a
= (int *)talloc((l
+1)*sizeof(int));
a
[f
[i
].serial
] = f
[i
].value
;
for(i
=1;(c
=getc(input
[f
]))!='\n';i
++)
input
[0] = fopen(file1
,"r");
input
[1] = fopen(file2
,"r");
if(opt
!=D_EDIT
) for(i0
=1;i0
<=m
;i0
=i1
+1) {
while(i0
<=m
&&J
[i0
]==J
[i0
-1]+1) i0
++;
while(i1
<m
&&J
[i1
+1]==0) i1
++;
} else for(i0
=m
;i0
>=1;i0
=i1
-1) {
while(i0
>=1&&J
[i0
]==J
[i0
+1]-1&&J
[i0
]!=0) i0
--;
while(i1
>1&&J
[i1
-1]==0) i1
--;
if (anychange
&& opt
== D_CONTEXT
)
* The following struct is used to record change information when
* doing a "context" diff. (see routine "change" to understand the
* highly mneumonic field names)
int a
; /* start line in old file */
int b
; /* end line in old file */
int c
; /* start line in new file */
int d
; /* end line in new file */
struct context_vec
*context_vec_start
,
/* indicate that there is a difference between lines a and b of the from file
to get to lines c to d of the to file.
If a is greater then b then there are no lines in the from file involved
and this means that there were lines appended (beginning at b).
If c is greater than d then there are lines missing from the to file.
if (opt
!= D_IFDEF
&& a
>b
&& c
>d
)
printf("*** %s ", file1
);
ctime(&stbuf
.st_mtime
), file2
);
printf("%s", ctime(&stbuf
.st_mtime
));
context_vec_start
= (struct context_vec
*)
sizeof(struct context_vec
));
context_vec_end
= context_vec_start
+ MAX_CONTEXT
;
context_vec_ptr
= context_vec_start
- 1;
ch
= (a
<= b
) ? 'd' : 'a';
* if this new change is within 'context' lines of
* the previous change, just add it to the change
* record. If the record is full or if this
* change is more than 'context' lines from the previous
* change, dump the record, reset it & add the new change.
if ( context_vec_ptr
>= context_vec_end
||
( context_vec_ptr
>= context_vec_start
&&
a
> (context_vec_ptr
->b
+ 2*context
) &&
c
> (context_vec_ptr
->d
+ 2*context
) ) )
putchar(a
>b
?'a':c
>d
?'d':'c');
putchar(a
>b
?'a':c
>d
?'d':'c');
printf("a%d %d\n",b
,d
-c
+1);
printf("d%d %d\n",a
,b
-a
+1);
printf("a%d %d\n",b
, d
-c
+1);
if(opt
== D_NORMAL
|| opt
== D_IFDEF
) {
fetch(ixold
,a
,b
,input
[0],"< ", 1);
if(a
<=b
&&c
<=d
&& opt
== D_NORMAL
)
fetch(ixnew
,c
,d
,input
[1],opt
==D_NORMAL
?"> ":"", 0);
if ((opt
==D_EDIT
|| opt
== D_REVERSE
) && c
<=d
)
fprintf(stdout
, "#endif /* %s */\n", endifname
);
printf("%s%d", separator
, b
);
fetch(f
,a
,b
,lb
,s
,oldfile
)
int oneflag
= (*ifdef1
!='\0') != (*ifdef2
!='\0');
* When doing #ifdef's, copy down to current line
* if this is the first file, so that stuff makes it to output.
if (opt
== D_IFDEF
&& oldfile
){
/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
nc
= f
[a
>b
? b
: a
-1 ] - curpos
;
fprintf(stdout
, "#else /* %s%s */\n", oneflag
&& oldfile
==1 ? "!" : "", ifdef2
);
/* There was only one ifdef given */
fprintf(stdout
, "#ifndef %s\n", endifname
);
fprintf(stdout
, "#ifdef %s\n", endifname
);
endifname
= oldfile
? ifdef1
: ifdef2
;
fprintf(stdout
, "#ifdef %s\n", endifname
);
if (inifdef
&& !wantelses
) {
fprintf(stdout
, "#endif /* %s */\n", endifname
);
#define POW2 /* define only if HALFLONG is 2**n */
#define low(x) (x&((1L<<HALFLONG)-1))
#define high(x) (x>>HALFLONG)
* hashing has the effect of
* arranging line in 7-bit bytes and then
* summing 1-s complement in 16-bit hunks
for(shift
=0;(t
=getc(f
))!='\n';shift
+=7) {
sum
+= (long)chrtran
[t
] << (shift
for(shift
=0;(t
=getc(f
))!='\n';shift
+=7) {
sum
+= (long)chrtran
[t
] << (shift
sum
= low(sum
) + high(sum
);
return((short)low(sum
) + (short)high(sum
));
cnt
= fread(buf
, 1, BUFSIZ
, f
);
if (cnt
>= sizeof (struct exec
)) {
hdr
= *(struct exec
*)buf
;
/* dump accumulated "context" diff changes */
register struct context_vec
*cvp
= context_vec_start
;
register int lowa
, upb
, lowc
, upd
;
if ( cvp
> context_vec_ptr
)
lowa
= max(1, cvp
->a
- context
);
upb
= min(len
[0], context_vec_ptr
->b
+ context
);
lowc
= max(1, cvp
->c
- context
);
upd
= min(len
[1], context_vec_ptr
->d
+ context
);
printf("***************\n*** ");
* output changes to the "old" file. The first loop suppresses
* output if there were no changes to the "old" file (we'll see
* the "old" lines as context in the "new" list).
for ( ; cvp
<= context_vec_ptr
; cvp
++)
while (cvp
<= context_vec_ptr
) {
a
= cvp
->a
; b
= cvp
->b
; c
= cvp
->c
; d
= cvp
->d
;
ch
= (a
<= b
) ? 'd' : 'a';
fetch(ixold
,lowa
,b
,input
[0]," ");
fetch(ixold
,lowa
,a
-1,input
[0]," ");
fetch(ixold
,a
,b
,input
[0],ch
== 'c' ? "! " : "- ");
fetch(ixold
, b
+1, upb
, input
[0], " ");
/* output changes to the "new" file */
for (cvp
= context_vec_start
; cvp
<= context_vec_ptr
; cvp
++)
while (cvp
<= context_vec_ptr
) {
a
= cvp
->a
; b
= cvp
->b
; c
= cvp
->c
; d
= cvp
->d
;
ch
= (a
<= b
) ? 'd' : 'a';
fetch(ixnew
,lowc
,d
,input
[1]," ");
fetch(ixnew
,lowc
,c
-1,input
[1]," ");
fetch(ixnew
,c
,d
,input
[1],ch
== 'c' ? "! " : "+ ");
fetch(ixnew
, d
+1, upd
, input
[1], " ");
context_vec_ptr
= context_vec_start
- 1;