do not remove .s files so that .o files need not always be remade
[unix-history] / usr / src / usr.bin / diff / diff / diffreg.c
CommitLineData
87f863b7 1static char sccsid[] = "@(#)diffreg.c 4.3 %G%";
43454f97
BJ
2
3#include "diff.h"
4/*
5 * diff - compare two files.
6 */
7
8/*
9 * Uses an algorithm due to Harold Stone, which finds
10 * a pair of longest identical subsequences in the two
11 * files.
12 *
13 * The major goal is to generate the match vector J.
14 * J[i] is the index of the line in file1 corresponding
15 * to line i file0. J[i] = 0 if there is no
16 * such line in file1.
17 *
18 * Lines are hashed so as to work in core. All potential
19 * matches are located by sorting the lines of each file
20 * on the hash (called ``value''). In particular, this
21 * collects the equivalence classes in file1 together.
22 * Subroutine equiv replaces the value of each line in
23 * file0 by the index of the first element of its
24 * matching equivalence in (the reordered) file1.
25 * To save space equiv squeezes file1 into a single
26 * array member in which the equivalence classes
27 * are simply concatenated, except that their first
28 * members are flagged by changing sign.
29 *
30 * Next the indices that point into member are unsorted into
31 * array class according to the original order of file0.
32 *
33 * The cleverness lies in routine stone. This marches
34 * through the lines of file0, developing a vector klist
35 * of "k-candidates". At step i a k-candidate is a matched
36 * pair of lines x,y (x in file0 y in file1) such that
37 * there is a common subsequence of length k
38 * between the first i lines of file0 and the first y
39 * lines of file1, but there is no such subsequence for
40 * any smaller y. x is the earliest possible mate to y
41 * that occurs in such a subsequence.
42 *
43 * Whenever any of the members of the equivalence class of
44 * lines in file1 matable to a line in file0 has serial number
45 * less than the y of some k-candidate, that k-candidate
46 * with the smallest such y is replaced. The new
47 * k-candidate is chained (via pred) to the current
48 * k-1 candidate so that the actual subsequence can
49 * be recovered. When a member has serial number greater
50 * that the y of all k-candidates, the klist is extended.
51 * At the end, the longest subsequence is pulled out
52 * and placed in the array J by unravel
53 *
54 * With J in hand, the matches there recorded are
55 * check'ed against reality to assure that no spurious
56 * matches have crept in due to hashing. If they have,
57 * they are broken, and "jackpot" is recorded--a harmless
58 * matter except that a true match for a spuriously
59 * mated line may now be unnecessarily reported as a change.
60 *
61 * Much of the complexity of the program comes simply
62 * from trying to minimize core utilization and
63 * maximize the range of doable problems by dynamically
64 * allocating what is needed and reusing what is not.
65 * The core requirements for problems larger than somewhat
66 * are (in words) 2*length(file0) + length(file1) +
67 * 3*(number of k-candidates installed), typically about
68 * 6n words for files of length n.
69 */
70
71#define prints(s) fputs(s,stdout)
72
73FILE *input[2];
74FILE *fopen();
75
76struct cand {
77 int x;
78 int y;
79 int pred;
80} cand;
81struct line {
82 int serial;
83 int value;
84} *file[2], line;
85int len[2];
86struct line *sfile[2]; /* shortened by pruning common prefix and suffix */
87int slen[2];
88int pref, suff; /* length of prefix and suffix */
89int *class; /* will be overlaid on file[0] */
90int *member; /* will be overlaid on file[1] */
91int *klist; /* will be overlaid on file[0] after class */
92struct cand *clist; /* merely a free storage pot for candidates */
93int clen = 0;
94int *J; /* will be overlaid on class */
95long *ixold; /* will be overlaid on klist */
96long *ixnew; /* will be overlaid on file[1] */
97
98diffreg()
99{
87f863b7
KM
100 register int i, j;
101 FILE *f1, *f2;
102 char buf1[BUFSIZ], buf2[BUFSIZ];
43454f97
BJ
103
104 if (hflag) {
105 diffargv[0] = "diffh";
106 execv(diffh, diffargv);
107 fprintf(stderr, "diff: ");
108 perror(diffh);
109 done();
110 }
111 dummy = malloc(1);
112 if ((stb1.st_mode & S_IFMT) == S_IFDIR)
113 file1 = splice(file1, file2);
114 else if ((stb2.st_mode & S_IFMT) == S_IFDIR)
115 file2 = splice(file2, file1);
116 else if (!strcmp(file1, "-")) {
117 if (!strcmp(file2, "-")) {
118 fprintf(stderr, "diff: can't specify - -\n");
119 done();
120 }
121 file1 = copytemp();
122 } else if (!strcmp(file2, "-"))
123 file2 = copytemp();
87f863b7
KM
124 if ((f1 = fopen(file1, "r")) == NULL) {
125 fprintf(stderr, "diff: ");
126 perror(file1);
127 fclose(f1);
128 done();
129 }
130 if ((f2 = fopen(file2, "r")) == NULL) {
131 fprintf(stderr, "diff: ");
132 perror(file2);
133 fclose(f1);
134 fclose(f2);
135 done();
136 }
137 if (stb1.st_size != stb2.st_size)
138 goto notsame;
139 for (;;) {
140 i = fread(buf1, BUFSIZ, 1, f1);
141 j = fread(buf2, BUFSIZ, 1, f2);
142 if (i < 0 || j < 0 || i != j)
143 goto notsame;
144 if (i == 0 && j == 0) {
145 fclose(f1);
146 fclose(f2);
147 goto same;
148 }
149 for (j = 0; j < i; j++)
150 if (buf1[j] != buf2[j])
151 goto notsame;
152 }
153notsame:
154 if (!ascii(fileno(f1)) || !ascii(fileno(f2))) {
155 printf("Binary files %s and %s differ\n", file1, file2);
156 fclose(f1);
157 fclose(f2);
158 done();
159 }
160 prepare(0, f1);
161 prepare(1, f2);
162 fclose(f1);
163 fclose(f2);
43454f97
BJ
164 prune();
165 sort(sfile[0],slen[0]);
166 sort(sfile[1],slen[1]);
167
168 member = (int *)file[1];
169 equiv(sfile[0], slen[0], sfile[1], slen[1], member);
170 member = (int *)ralloc((char *)member,(slen[1]+2)*sizeof(int));
171
172 class = (int *)file[0];
173 unsort(sfile[0], slen[0], class);
174 class = (int *)ralloc((char *)class,(slen[0]+2)*sizeof(int));
175
176 klist = (int *)talloc((slen[0]+2)*sizeof(int));
177 clist = (struct cand *)talloc(sizeof(cand));
87f863b7 178 i = stone(class, slen[0], member, klist);
43454f97
BJ
179 free((char *)member);
180 free((char *)class);
181
182 J = (int *)talloc((len[0]+2)*sizeof(int));
87f863b7 183 unravel(klist[i]);
43454f97
BJ
184 free((char *)clist);
185 free((char *)klist);
186
187 ixold = (long *)talloc((len[0]+2)*sizeof(long));
188 ixnew = (long *)talloc((len[1]+2)*sizeof(long));
189 check();
190 output();
191 status = anychange;
87f863b7 192same:
43454f97
BJ
193 if (opt == D_CONTEXT && anychange == 0)
194 printf("No differences encountered\n");
195 done();
196}
197
198char *
199copytemp()
200{
201 char buf[BUFSIZ];
202 register int i, f;
203
204 signal(SIGHUP,done);
205 signal(SIGINT,done);
206 signal(SIGPIPE,done);
207 signal(SIGTERM,done);
208 tempfile = mktemp("/tmp/dXXXXX");
209 f = creat(tempfile,0600);
210 if (f < 0) {
804c14e9 211 fprintf(stderr, "diff: ");
43454f97
BJ
212 perror(tempfile);
213 done();
214 }
215 while ((i = read(0,buf,BUFSIZ)) > 0)
216 if (write(f,buf,i) != i) {
217 fprintf(stderr, "diff: ");
218 perror(tempfile);
219 done();
220 }
221 close(f);
222 return (tempfile);
223}
224
225char *
226splice(dir, file)
227 char *dir, *file;
228{
229 char *tail;
230 char buf[BUFSIZ];
231
232 if (!strcmp(file, "-")) {
233 fprintf(stderr, "diff: can't specify - with other arg directory\n");
234 done();
235 }
236 tail = rindex(file, '/');
237 if (tail == 0)
238 tail = file;
239 else
240 tail++;
241 sprintf(buf, "%s/%s", dir, tail);
242 return (savestr(buf));
243}
244
87f863b7
KM
245prepare(i, fd)
246 int i;
247 FILE *fd;
43454f97
BJ
248{
249 register struct line *p;
250 register j,h;
87f863b7
KM
251
252 input[i] = fd;
253 fseek(fd, (long)0, 0);
43454f97 254 p = (struct line *)talloc(3*sizeof(line));
87f863b7 255 for(j=0; h=readhash(fd);) {
43454f97
BJ
256 p = (struct line *)ralloc((char *)p,(++j+3)*sizeof(line));
257 p[j].value = h;
258 }
259 len[i] = j;
260 file[i] = p;
43454f97
BJ
261}
262
263prune()
264{
265 register i,j;
266 for(pref=0;pref<len[0]&&pref<len[1]&&
267 file[0][pref+1].value==file[1][pref+1].value;
268 pref++ ) ;
269 for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&
270 file[0][len[0]-suff].value==file[1][len[1]-suff].value;
271 suff++) ;
272 for(j=0;j<2;j++) {
273 sfile[j] = file[j]+pref;
274 slen[j] = len[j]-pref-suff;
275 for(i=0;i<=slen[j];i++)
276 sfile[j][i].serial = i;
277 }
278}
279
280equiv(a,n,b,m,c)
281struct line *a, *b;
282int *c;
283{
284 register int i, j;
285 i = j = 1;
286 while(i<=n && j<=m) {
287 if(a[i].value <b[j].value)
288 a[i++].value = 0;
289 else if(a[i].value == b[j].value)
290 a[i++].value = j;
291 else
292 j++;
293 }
294 while(i <= n)
295 a[i++].value = 0;
296 b[m+1].value = 0;
297 j = 0;
298 while(++j <= m) {
299 c[j] = -b[j].serial;
300 while(b[j+1].value == b[j].value) {
301 j++;
302 c[j] = b[j].serial;
303 }
304 }
305 c[j] = -1;
306}
307
308stone(a,n,b,c)
309int *a;
310int *b;
311int *c;
312{
313 register int i, k,y;
314 int j, l;
315 int oldc, tc;
316 int oldl;
317 k = 0;
318 c[0] = newcand(0,0,0);
319 for(i=1; i<=n; i++) {
320 j = a[i];
321 if(j==0)
322 continue;
323 y = -b[j];
324 oldl = 0;
325 oldc = c[0];
326 do {
327 if(y <= clist[oldc].y)
328 continue;
329 l = search(c, k, y);
330 if(l!=oldl+1)
331 oldc = c[l-1];
332 if(l<=k) {
333 if(clist[c[l]].y <= y)
334 continue;
335 tc = c[l];
336 c[l] = newcand(i,y,oldc);
337 oldc = tc;
338 oldl = l;
339 } else {
340 c[l] = newcand(i,y,oldc);
341 k++;
342 break;
343 }
344 } while((y=b[++j]) > 0);
345 }
346 return(k);
347}
348
349newcand(x,y,pred)
350{
351 register struct cand *q;
352 clist = (struct cand *)ralloc((char *)clist,++clen*sizeof(cand));
353 q = clist + clen -1;
354 q->x = x;
355 q->y = y;
356 q->pred = pred;
357 return(clen-1);
358}
359
360search(c, k, y)
361int *c;
362{
363 register int i, j, l;
364 int t;
365 if(clist[c[k]].y<y) /*quick look for typical case*/
366 return(k+1);
367 i = 0;
368 j = k+1;
369 while((l=(i+j)/2) > i) {
370 t = clist[c[l]].y;
371 if(t > y)
372 j = l;
373 else if(t < y)
374 i = l;
375 else
376 return(l);
377 }
378 return(l+1);
379}
380
381unravel(p)
382{
383 register int i;
384 register struct cand *q;
385 for(i=0; i<=len[0]; i++)
386 J[i] = i<=pref ? i:
387 i>len[0]-suff ? i+len[1]-len[0]:
388 0;
389 for(q=clist+p;q->y!=0;q=clist+q->pred)
390 J[q->x+pref] = q->y+pref;
391}
392
393/* check does double duty:
3941. ferret out any fortuitous correspondences due
395to confounding by hashing (which result in "jackpot")
3962. collect random access indexes to the two files */
397
398check()
399{
400 register int i, j;
401 int jackpot;
402 long ctold, ctnew;
403 char c,d;
404 input[0] = fopen(file1,"r");
405 input[1] = fopen(file2,"r");
406 j = 1;
407 ixold[0] = ixnew[0] = 0;
408 jackpot = 0;
409 ctold = ctnew = 0;
410 for(i=1;i<=len[0];i++) {
411 if(J[i]==0) {
412 ixold[i] = ctold += skipline(0);
413 continue;
414 }
415 while(j<J[i]) {
416 ixnew[j] = ctnew += skipline(1);
417 j++;
418 }
419 for(;;) {
420 c = getc(input[0]);
421 d = getc(input[1]);
422 ctold++;
423 ctnew++;
424 if(bflag && isspace(c) && isspace(d)) {
425 do {
426 if(c=='\n') break;
427 ctold++;
428 } while(isspace(c=getc(input[0])));
429 do {
430 if(d=='\n') break;
431 ctnew++;
432 } while(isspace(d=getc(input[1])));
433 }
434 if(c!=d) {
435 jackpot++;
436 J[i] = 0;
437 if(c!='\n')
438 ctold += skipline(0);
439 if(d!='\n')
440 ctnew += skipline(1);
441 break;
442 }
443 if(c=='\n')
444 break;
445 }
446 ixold[i] = ctold;
447 ixnew[j] = ctnew;
448 j++;
449 }
450 for(;j<=len[1];j++) {
451 ixnew[j] = ctnew += skipline(1);
452 }
453 fclose(input[0]);
454 fclose(input[1]);
455/*
456 if(jackpot)
457 fprintf(stderr, "jackpot\n");
458*/
459}
460
461sort(a,n) /*shellsort CACM #201*/
462struct line *a;
463{
464 struct line w;
465 register int j,m;
466 struct line *ai;
467 register struct line *aim;
468 int k;
469 for(j=1;j<=n;j*= 2)
470 m = 2*j - 1;
471 for(m/=2;m!=0;m/=2) {
472 k = n-m;
473 for(j=1;j<=k;j++) {
474 for(ai = &a[j]; ai > a; ai -= m) {
475 aim = &ai[m];
476 if(aim < ai)
477 break; /*wraparound*/
478 if(aim->value > ai[0].value ||
479 aim->value == ai[0].value &&
480 aim->serial > ai[0].serial)
481 break;
482 w.value = ai[0].value;
483 ai[0].value = aim->value;
484 aim->value = w.value;
485 w.serial = ai[0].serial;
486 ai[0].serial = aim->serial;
487 aim->serial = w.serial;
488 }
489 }
490 }
491}
492
493unsort(f, l, b)
494struct line *f;
495int *b;
496{
497 register int *a;
498 register int i;
499 a = (int *)talloc((l+1)*sizeof(int));
500 for(i=1;i<=l;i++)
501 a[f[i].serial] = f[i].value;
502 for(i=1;i<=l;i++)
503 b[i] = a[i];
504 free((char *)a);
505}
506
507skipline(f)
508{
509 register i;
510 for(i=1;getc(input[f])!='\n';i++) ;
511 return(i);
512}
513
514output()
515{
516 int m;
517 register int i0, i1, j1;
518 int j0;
519 input[0] = fopen(file1,"r");
520 input[1] = fopen(file2,"r");
521 m = len[0];
522 J[0] = 0;
523 J[m+1] = len[1]+1;
524 if(opt!=D_EDIT) for(i0=1;i0<=m;i0=i1+1) {
525 while(i0<=m&&J[i0]==J[i0-1]+1) i0++;
526 j0 = J[i0-1]+1;
527 i1 = i0-1;
528 while(i1<m&&J[i1+1]==0) i1++;
529 j1 = J[i1+1]-1;
530 J[i1] = j1;
531 change(i0,i1,j0,j1);
532 } else for(i0=m;i0>=1;i0=i1-1) {
533 while(i0>=1&&J[i0]==J[i0+1]-1&&J[i0]!=0) i0--;
534 j0 = J[i0+1]-1;
535 i1 = i0+1;
536 while(i1>1&&J[i1-1]==0) i1--;
537 j1 = J[i1-1]+1;
538 J[i1] = j1;
539 change(i1,i0,j1,j0);
540 }
541 if(m==0)
542 change(1,0,1,len[1]);
543 if (opt==D_IFDEF) {
544 for (;;) {
545#define c i0
546 c = getc(input[0]);
547 if (c < 0)
548 return;
549 putchar(c);
550 }
551#undef c
552 }
553}
554
555/* indicate that there is a difference between lines a and b of the from file
556 to get to lines c to d of the to file.
557 If a is greater then b then there are no lines in the from file involved
558 and this means that there were lines appended (beginning at b).
559 If c is greater than d then there are lines missing from the to file.
560*/
561change(a,b,c,d)
562{
563 char ch;
564 int lowa,upb,lowc,upd;
565 struct stat stbuf;
566
567 if (opt != D_IFDEF && a>b && c>d)
568 return;
569 if (anychange == 0) {
570 anychange = 1;
571 if(opt == D_CONTEXT) {
572 printf("*** %s ", file1);
573 stat(file1, &stbuf);
574 printf("%s--- %s ",
575 ctime(&stbuf.st_mtime), file2);
576 stat(file2, &stbuf);
577 printf("%s", ctime(&stbuf.st_mtime));
578 }
579 }
580 if (a <= b && c <= d)
581 ch = 'c';
582 else
583 ch = (a <= b) ? 'd' : 'a';
584 if(opt == D_CONTEXT) {
585 lowa = max(1, a-context);
586 upb = min(len[0], b+context);
587 lowc = max(1, c-context);
588 upd = min(len[1], d+context);
589
590 /* print out from file */
591 printf("***************\n*** ");
592 range(lowa,upb,",");
593 printf("\n");
594 if (ch == 'a')
595 fetch(ixold,lowa,upb,input[0]," ");
596 else {
597 fetch(ixold,lowa,a-1,input[0]," ");
598 fetch(ixold,a,b,input[0],ch == 'c' ? "! " : "- ");
599 fetch(ixold,b+1,upb,input[0]," ");
600 }
601 putchar('\n');
602 printf("--- ");
603 range(lowc,upd,",");
604 printf(" -----\n");
605 if (ch == 'd')
606 fetch(ixnew,lowc,upd,input[1]," ");
607 else {
608 fetch(ixnew,lowc,c-1,input[1]," ");
609 fetch(ixnew,c,d,input[1],ch == 'c' ? "! " : "+ ");
610 fetch(ixnew,d+1,upd,input[1]," ");
611 }
612 return;
613 }
614 switch (opt) {
615
616 case D_NORMAL:
617 case D_EDIT:
618 range(a,b,",");
619 putchar(a>b?'a':c>d?'d':'c');
620 if(opt==D_NORMAL)
621 range(c,d,",");
622 putchar('\n');
623 break;
624 case D_REVERSE:
625 putchar(a>b?'a':c>d?'d':'c');
626 range(a,b," ");
627 putchar('\n');
628 break;
629 }
630 if(opt == D_NORMAL || opt == D_IFDEF) {
631 fetch(ixold,a,b,input[0],"< ", 1);
632 if(a<=b&&c<=d && opt == D_NORMAL)
633 prints("---\n");
634 }
635 fetch(ixnew,c,d,input[1],opt==D_NORMAL?"> ":"", 0);
636 if ((opt ==D_EDIT || opt == D_REVERSE) && c<=d)
637 prints(".\n");
638 if (inifdef) {
639 fprintf(stdout, "#endif %s\n", endifname);
640 inifdef = 0;
641 }
642}
643
644range(a,b,separator)
645char *separator;
646{
647 printf("%d", a>b?b:a);
648 if(a<b) {
649 printf("%s%d", separator, b);
650 }
651}
652
653fetch(f,a,b,lb,s,oldfile)
654long *f;
655FILE *lb;
656char *s;
657{
658 register int i, j;
659 register int nc;
660 int oneflag = (*ifdef1!='\0') != (*ifdef2!='\0');
661
662 /*
663 * When doing #ifdef's, copy down to current line
664 * if this is the first file, so that stuff makes it to output.
665 */
666 if (opt == D_IFDEF && oldfile){
667 long curpos = ftell(lb);
668 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
669 nc = f[a>b? b : a-1 ] - curpos;
670 for (i = 0; i < nc; i++)
671 putchar(getc(lb));
672 }
673 if (a > b)
674 return;
675 if (opt == D_IFDEF) {
676 if (inifdef)
677 fprintf(stdout, "#else %s%s\n", oneflag && oldfile==1 ? "!" : "", ifdef2);
678 else {
679 if (oneflag) {
680 /* There was only one ifdef given */
681 endifname = ifdef2;
682 if (oldfile)
683 fprintf(stdout, "#ifndef %s\n", endifname);
684 else
685 fprintf(stdout, "#ifdef %s\n", endifname);
686 }
687 else {
688 endifname = oldfile ? ifdef1 : ifdef2;
689 fprintf(stdout, "#ifdef %s\n", endifname);
690 }
691 }
692 inifdef = 1+oldfile;
693 }
694 for(i=a;i<=b;i++) {
695 fseek(lb,f[i-1],0);
696 nc = f[i]-f[i-1];
697 if (opt != D_IFDEF)
698 prints(s);
699 for(j=0;j<nc;j++)
700 putchar(getc(lb));
701 }
702 if (inifdef && !wantelses) {
703 fprintf(stdout, "#endif %s\n", endifname);
704 inifdef = 0;
705 }
706}
707
708#define HALFLONG 16
709#define low(x) (x&((1L<<HALFLONG)-1))
710#define high(x) (x>>HALFLONG)
711
712/*
713 * hashing has the effect of
714 * arranging line in 7-bit bytes and then
715 * summing 1-s complement in 16-bit hunks
716 */
717readhash(f)
718FILE *f;
719{
720 long sum;
721 register unsigned shift;
722 register space;
723 register t;
724 sum = 1;
725 space = 0;
726 if(!bflag) for(shift=0;(t=getc(f))!='\n';shift+=7) {
727 if(t==-1)
728 return(0);
729 sum += (long)t << (shift%=HALFLONG);
730 }
731 else for(shift=0;;) {
732 switch(t=getc(f)) {
733 case -1:
734 return(0);
735 case '\t':
736 case ' ':
737 space++;
738 continue;
739 default:
740 if(space) {
741 shift += 7;
742 space = 0;
743 }
744 sum += (long)t << (shift%=HALFLONG);
745 shift += 7;
746 continue;
747 case '\n':
748 break;
749 }
750 break;
751 }
752 sum = low(sum) + high(sum);
753 return((short)low(sum) + (short)high(sum));
754}