386BSD 0.1 development
[unix-history] / usr / src / usr.bin / diff / diff.c
CommitLineData
e3754cda
WJ
1/* GNU DIFF main routine.
2 Copyright (C) 1988, 1989 Free Software Foundation, Inc.
3
4This file is part of GNU DIFF.
5
6GNU DIFF is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 1, or (at your option)
9any later version.
10
11GNU DIFF is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU DIFF; see the file COPYING. If not, write to
18the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20/* GNU DIFF was written by Mike Haertel, David Hayes,
21 Richard Stallman and Len Tower. */
22
23#define GDIFF_MAIN
24#include "regex.h"
25#include "diff.h"
26#include "getopt.h"
27
28
29/* Nonzero for -r: if comparing two directories,
30 compare their common subdirectories recursively. */
31
32int recursive;
33
34/* For debugging: don't do discard_confusing_lines. */
35
36int no_discards;
37
38/* Return a string containing the command options with which diff was invoked.
39 Spaces appear between what were separate ARGV-elements.
40 There is a space at the beginning but none at the end.
41 If there were no options, the result is an empty string.
42
43 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
44 the length of that vector. */
45
46static char *
47option_list (optionvec, count)
48 char **optionvec; /* Was `vector', but that collides on Alliant. */
49 int count;
50{
51 int i;
52 int length = 0;
53 char *result;
54
55 for (i = 0; i < count; i++)
56 length += strlen (optionvec[i]) + 1;
57
58 result = (char *) xmalloc (length + 1);
59 result[0] = 0;
60
61 for (i = 0; i < count; i++)
62 {
63 strcat (result, " ");
64 strcat (result, optionvec[i]);
65 }
66
67 return result;
68}
69\f
70/* The numbers 129 and 130 that appear in the fourth element
71 for the context and unidiff entries are used as a way of
72 telling the big switch in `main' how to process those options. */
73
74static struct option longopts[] =
75{
76 {"ignore-blank-lines", 0, 0, 'B'},
77 {"context", 2, 0, 129},
78 {"ifdef", 1, 0, 'D'},
79 {"show-function-line", 1, 0, 'F'},
80 {"speed-large-files", 0, 0, 'H'},
81 {"ignore-matching-lines", 1, 0, 'I'},
82 {"file-label", 1, 0, 'L'},
83 {"entire-new-files", 0, 0, 'N'},
84 {"new-files", 0, 0, 'N'},
85 {"starting-file", 1, 0, 'S'},
86 {"initial-tab", 0, 0, 'T'},
87 {"text", 0, 0, 'a'},
88 {"all-text", 0, 0, 'a'},
89 {"ascii", 0, 0, 'a'},
90 {"ignore-space-change", 0, 0, 'b'},
91 {"minimal", 0, 0, 'd'},
92 {"ed", 0, 0, 'e'},
93 {"reversed-ed", 0, 0, 'f'},
94 {"ignore-case", 0, 0, 'i'},
95 {"print", 0, 0, 'l'},
96 {"rcs", 0, 0, 'n'},
97 {"show-c-function", 0, 0, 'p'},
98 {"binary", 0, 0, 'q'},
99 {"brief", 0, 0, 'q'},
100 {"recursive", 0, 0, 'r'},
101 {"report-identical-files", 0, 0, 's'},
102 {"expand-tabs", 0, 0, 't'},
103 {"ignore-all-space", 0, 0, 'w'},
104 {"unified", 2, 0, 130},
105 {"version", 0, 0, 'v'},
106 {0, 0, 0, 0}
107};
108
109main (argc, argv)
110 int argc;
111 char *argv[];
112{
113 int val;
114 int c;
115 int prev = -1;
116 int longind;
117 extern char *version_string;
118
119 program = argv[0];
120
121 /* Do our initializations. */
122 output_style = OUTPUT_NORMAL;
123 always_text_flag = FALSE;
124 ignore_space_change_flag = FALSE;
125 ignore_all_space_flag = FALSE;
126 length_varies = FALSE;
127 ignore_case_flag = FALSE;
128 ignore_blank_lines_flag = FALSE;
129 ignore_regexp = 0;
130 function_regexp = 0;
131 print_file_same_flag = FALSE;
132 entire_new_file_flag = FALSE;
133 no_details_flag = FALSE;
134 context = -1;
135 line_end_char = '\n';
136 tab_align_flag = FALSE;
137 tab_expand_flag = FALSE;
138 recursive = FALSE;
139 paginate_flag = FALSE;
140 ifdef_string = NULL;
141 heuristic = FALSE;
142 dir_start_file = NULL;
143 msg_chain = NULL;
144 msg_chain_end = NULL;
145 no_discards = 0;
146
147 /* Decode the options. */
148
149 while ((c = getopt_long (argc, argv,
150 "0123456789abBcC:dD:efF:hHiI:lL:nNpqrsS:tTuvw",
151 longopts, &longind)) != EOF)
152 {
153 if (c == 0) /* Long option. */
154 c = longopts[longind].val;
155 switch (c)
156 {
157 /* All digits combine in decimal to specify the context-size. */
158 case '1':
159 case '2':
160 case '3':
161 case '4':
162 case '5':
163 case '6':
164 case '7':
165 case '8':
166 case '9':
167 case '0':
168 if (context == -1)
169 context = 0;
170 /* If a context length has already been specified,
171 more digits allowed only if they follow right after the others.
172 Reject two separate runs of digits, or digits after -C. */
173 else if (prev < '0' || prev > '9')
174 fatal ("context length specified twice");
175
176 context = context * 10 + c - '0';
177 break;
178
179 case 'a':
180 /* Treat all files as text files; never treat as binary. */
181 always_text_flag = 1;
182 break;
183
184 case 'b':
185 /* Ignore changes in amount of whitespace. */
186 ignore_space_change_flag = 1;
187 length_varies = 1;
188 break;
189
190 case 'B':
191 /* Ignore changes affecting only blank lines. */
192 ignore_blank_lines_flag = 1;
193 break;
194
195 case 'C':
196 case 129: /* +context[=lines] */
197 case 130: /* +unified[=lines] */
198 if (optarg)
199 {
200 if (context >= 0)
201 fatal ("context length specified twice");
202 {
203 char *p;
204 for (p = optarg; *p; p++)
205 if (*p < '0' || *p > '9')
206 fatal ("invalid context length argument");
207 }
208 context = atoi (optarg);
209 }
210
211 /* Falls through. */
212 case 'c':
213 /* Make context-style output. */
214 specify_style (c == 130 ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
215 break;
216
217 case 'd':
218 /* Don't discard lines. This makes things slower (sometimes much
219 slower) but will find a guaranteed minimal set of changes. */
220 no_discards = 1;
221 break;
222
223 case 'D':
224 /* Make merged #ifdef output. */
225 specify_style (OUTPUT_IFDEF);
226 ifdef_string = optarg;
227 break;
228
229 case 'e':
230 /* Make output that is a valid `ed' script. */
231 specify_style (OUTPUT_ED);
232 break;
233
234 case 'f':
235 /* Make output that looks vaguely like an `ed' script
236 but has changes in the order they appear in the file. */
237 specify_style (OUTPUT_FORWARD_ED);
238 break;
239
240 case 'F':
241 /* Show, for each set of changes, the previous line that
242 matches the specified regexp. Currently affects only
243 context-style output. */
244 function_regexp = optarg;
245 break;
246
247 case 'h':
248 /* Split the files into chunks of around 1500 lines
249 for faster processing. Usually does not change the result.
250
251 This currently has no effect. */
252 break;
253
254 case 'H':
255 /* Turn on heuristics that speed processing of large files
256 with a small density of changes. */
257 heuristic = 1;
258 break;
259
260 case 'i':
261 /* Ignore changes in case. */
262 ignore_case_flag = 1;
263 break;
264
265 case 'I':
266 /* Ignore changes affecting only lines that match the
267 specified regexp. */
268 ignore_regexp = optarg;
269 break;
270
271 case 'l':
272 /* Pass the output through `pr' to paginate it. */
273 paginate_flag = 1;
274 break;
275
276 case 'L':
277 /* Specify file labels for `-c' output headers. */
278 if (!file_label[0])
279 file_label[0] = optarg;
280 else if (!file_label[1])
281 file_label[1] = optarg;
282 else
283 fatal ("too many file label options");
284 break;
285
286 case 'n':
287 /* Output RCS-style diffs, like `-f' except that each command
288 specifies the number of lines affected. */
289 specify_style (OUTPUT_RCS);
290 break;
291
292 case 'N':
293 /* When comparing directories, if a file appears only in one
294 directory, treat it as present but empty in the other. */
295 entire_new_file_flag = 1;
296 break;
297
298 case 'p':
299 /* Make context-style output and show name of last C function. */
300 specify_style (OUTPUT_CONTEXT);
301 function_regexp = "^[_a-zA-Z]";
302 break;
303
304 case 'q':
305 no_details_flag = 1;
306 break;
307
308 case 'r':
309 /* When comparing directories,
310 recursively compare any subdirectories found. */
311 recursive = 1;
312 break;
313
314 case 's':
315 /* Print a message if the files are the same. */
316 print_file_same_flag = 1;
317 break;
318
319 case 'S':
320 /* When comparing directories, start with the specified
321 file name. This is used for resuming an aborted comparison. */
322 dir_start_file = optarg;
323 break;
324
325 case 't':
326 /* Expand tabs to spaces in the output so that it preserves
327 the alignment of the input files. */
328 tab_expand_flag = 1;
329 break;
330
331 case 'T':
332 /* Use a tab in the output, rather than a space, before the
333 text of an input line, so as to keep the proper alignment
334 in the input line without changing the characters in it. */
335 tab_align_flag = 1;
336 break;
337
338 case 'v':
339 printf ("GNU diff version %s\n", version_string);
340 break;
341
342 case 'u':
343 /* Output the context diff in unidiff format. */
344 specify_style (OUTPUT_UNIFIED);
345 break;
346
347 case 'w':
348 /* Ignore horizontal whitespace when comparing lines. */
349 ignore_all_space_flag = 1;
350 length_varies = 1;
351 break;
352
353 default:
354 usage ();
355 }
356 prev = c;
357 }
358
359 if (optind != argc - 2)
360 usage ();
361
362 if (ignore_regexp)
363 {
364 char *val;
365 bzero (&ignore_regexp_compiled, sizeof ignore_regexp_compiled);
366 val = re_compile_pattern (ignore_regexp, strlen (ignore_regexp),
367 &ignore_regexp_compiled);
368 if (val != 0)
369 error ("%s: %s", ignore_regexp, val);
370 ignore_regexp_compiled.fastmap = (char *) xmalloc (256);
371 }
372
373 if (function_regexp)
374 {
375 char *val;
376 bzero (&function_regexp_compiled, sizeof function_regexp_compiled);
377 val = re_compile_pattern (function_regexp, strlen (function_regexp),
378 &function_regexp_compiled);
379 if (val != 0)
380 error ("%s: %s", function_regexp, val);
381 function_regexp_compiled.fastmap = (char *) xmalloc (256);
382 }
383
384 if (output_style != OUTPUT_CONTEXT && output_style != OUTPUT_UNIFIED)
385 context = 0;
386 else if (context == -1)
387 /* Default amount of context for -c. */
388 context = 3;
389
390 switch_string = option_list (argv + 1, optind - 1);
391
392 val = compare_files (0, argv[optind], 0, argv[optind + 1], 0);
393
394 /* Print any messages that were saved up for last. */
395 print_message_queue ();
396
397 if (ferror (stdout) || fclose (stdout) != 0)
398 fatal ("write error");
399 exit (val);
400}
401
402usage ()
403{
404 fprintf (stderr, "\
405Usage: diff [-#] [-abBcdefhHilnNprstTuvw] [-C lines] [-F regexp] [-I regexp]\n\
406 [-L label [-L label]] [-S file] [-D symbol] [+ignore-blank-lines]\n\
407 [+context[=lines]] [+unified[=lines]] [+ifdef=symbol]\n\
408 [+show-function-line=regexp]\n");
409 fprintf (stderr, "\
410 [+speed-large-files] [+ignore-matching-lines=regexp] [+new-file]\n\
411 [+initial-tab] [+starting-file=file] [+text] [+all-text] [+ascii]\n\
412 [+minimal] [+ignore-space-change] [+ed] [+reversed-ed] [+ignore-case]\n");
413 fprintf (stderr, "\
414 [+print] [+rcs] [+show-c-function] [+binary] [+brief] [+recursive]\n\
415 [+report-identical-files] [+expand-tabs] [+ignore-all-space]\n\
416 [+file-label=label [+file-label=label]] [+version] path1 path2\n");
417 exit (2);
418}
419
420specify_style (style)
421 enum output_style style;
422{
423 if (output_style != OUTPUT_NORMAL
424 && output_style != style)
425 error ("conflicting specifications of output style");
426 output_style = style;
427}
428\f
429/* Compare two files (or dirs) with specified names
430 DIR0/NAME0 and DIR1/NAME1, at level DEPTH in directory recursion.
431 (if DIR0 is 0, then the name is just NAME0, etc.)
432 This is self-contained; it opens the files and closes them.
433
434 Value is 0 if files are identical, 1 if different,
435 2 if there is a problem opening them. */
436
437int
438compare_files (dir0, name0, dir1, name1, depth)
439 char *dir0, *dir1;
440 char *name0, *name1;
441 int depth;
442{
443 static char Standard_Input[] = "Standard Input";
444 struct file_data inf[2];
445 register int i;
446 int val;
447 int errorcount = 0;
448 int stat_result[2];
449
450 /* If this is directory comparison, perhaps we have a file
451 that exists only in one of the directories.
452 If so, just print a message to that effect. */
453
454 if (! entire_new_file_flag && (name0 == 0 || name1 == 0))
455 {
456 char *name = name0 == 0 ? name1 : name0;
457 char *dir = name0 == 0 ? dir1 : dir0;
458 message ("Only in %s: %s\n", dir, name);
459 /* Return 1 so that diff_dirs will return 1 ("some files differ"). */
460 return 1;
461 }
462
463 /* Mark any nonexistent file with -1 in the desc field. */
464 /* Mark unopened files (i.e. directories) with -2. */
465
466 inf[0].desc = name0 == 0 ? -1 : -2;
467 inf[1].desc = name1 == 0 ? -1 : -2;
468
469 /* Now record the full name of each file, including nonexistent ones. */
470
471 if (name0 == 0)
472 name0 = name1;
473 if (name1 == 0)
474 name1 = name0;
475
476 inf[0].name = dir0 == 0 ? name0 : concat (dir0, "/", name0);
477 inf[1].name = dir1 == 0 ? name1 : concat (dir1, "/", name1);
478
479 /* Stat the files. Record whether they are directories.
480 Record in stat_result whether stat fails. */
481
482 for (i = 0; i <= 1; i++)
483 {
484 bzero (&inf[i].stat, sizeof(struct stat));
485 inf[i].dir_p = 0;
486 stat_result[i] = 0;
487
488 if (inf[i].desc != -1)
489 {
490 char *filename = inf[i].name;
491
492 stat_result[i] =
493 strcmp (filename, "-")
494 ? stat (filename, &inf[i].stat)
495 : fstat (0, &inf[i].stat);
496
497 if (stat_result[i] < 0)
498 {
499 perror_with_name (filename);
500 errorcount = 1;
501 }
502 else
503 inf[i].dir_p =
504 S_IFDIR == (inf[i].stat.st_mode & S_IFMT)
505 && strcmp (filename, "-");
506 }
507 }
508
509 /* See if the two named files are actually the same physical file.
510 If so, we know they are identical without actually reading them. */
511
512 if (output_style != OUTPUT_IFDEF
513 && inf[0].stat.st_ino == inf[1].stat.st_ino
514 && inf[0].stat.st_dev == inf[1].stat.st_dev
515 && stat_result[0] == 0
516 && stat_result[1] == 0)
517 {
518 val = 0;
519 goto done;
520 }
521
522 if (name0 == 0)
523 inf[0].dir_p = inf[1].dir_p;
524 if (name1 == 0)
525 inf[1].dir_p = inf[0].dir_p;
526
527 /* Open the files and record their descriptors. */
528
529 for (i = 0; i <= 1; i++)
530 {
531 if (inf[i].desc == -1)
532 ;
533 else if (!strcmp (inf[i].name, "-"))
534 {
535 inf[i].desc = 0;
536 inf[i].name = Standard_Input;
537 }
538 /* Don't bother opening if stat already failed. */
539 else if (stat_result[i] == 0 && ! inf[i].dir_p)
540 {
541 char *filename = inf[i].name;
542
543 inf[i].desc = open (filename, O_RDONLY, 0);
544 if (0 > inf[i].desc)
545 {
546 perror_with_name (filename);
547 errorcount = 1;
548 }
549 }
550 }
551
552 if (errorcount)
553 {
554
555 /* If either file should exist but fails to be opened, return 2. */
556
557 val = 2;
558
559 }
560 else if (inf[0].dir_p && inf[1].dir_p)
561 {
562 if (output_style == OUTPUT_IFDEF)
563 fatal ("-D option not supported with directories");
564
565 /* If both are directories, compare the files in them. */
566
567 if (depth > 0 && !recursive)
568 {
569 /* But don't compare dir contents one level down
570 unless -r was specified. */
571 message ("Common subdirectories: %s and %s\n",
572 inf[0].name, inf[1].name);
573 val = 0;
574 }
575 else
576 {
577 val = diff_dirs (inf[0].name, inf[1].name,
578 compare_files, depth, 0, 0);
579 }
580
581 }
582 else if (depth == 0 && (inf[0].dir_p || inf[1].dir_p))
583 {
584
585 /* If only one is a directory, and it was specified in the command line,
586 use the file in that dir whose basename matches the other file. */
587
588 int dir_arg = (inf[0].dir_p ? 0 : 1);
589 int fnm_arg = (inf[0].dir_p ? 1 : 0);
590 char *p = rindex (inf[fnm_arg].name, '/');
591 char *filename = concat (inf[dir_arg].name, "/",
592 (p ? p+1 : inf[fnm_arg].name));
593
594 if (inf[fnm_arg].name == Standard_Input)
595 fatal ("can't compare - to a directory");
596
597 inf[dir_arg].desc = open (filename, O_RDONLY, 0);
598
599 if (0 > inf[dir_arg].desc)
600 {
601 perror_with_name (filename);
602 val = 2;
603 }
604 else
605 {
606 /* JF: patch from the net to check and make sure we can really free
607 this. If it's from argv[], freeing it is a *really* bad idea */
608 if (0 != (dir_arg ? dir1 : dir0))
609 free (inf[dir_arg].name);
610 inf[dir_arg].name = filename;
611 if (fstat (inf[dir_arg].desc, &inf[dir_arg].stat) < 0)
612 pfatal_with_name (inf[dir_arg].name);
613
614 inf[dir_arg].dir_p
615 = (S_IFDIR == (inf[dir_arg].stat.st_mode & S_IFMT));
616 if (inf[dir_arg].dir_p)
617 {
618 error ("%s is a directory but %s is not",
619 inf[dir_arg].name, inf[fnm_arg].name);
620 val = 1;
621 }
622 else
623 val = diff_2_files (inf, depth);
624 }
625
626 }
627 else if (depth > 0 && (inf[0].dir_p || inf[1].dir_p))
628 {
629 /* Perhaps we have a subdirectory that exists only in one directory.
630 If so, just print a message to that effect. */
631
632 if (inf[0].desc == -1 || inf[1].desc == -1)
633 {
634 if (entire_new_file_flag && recursive)
635 val = diff_dirs (inf[0].name, inf[1].name, compare_files, depth,
636 inf[0].desc == -1, inf[1].desc == -1);
637 else
638 {
639 char *dir = (inf[0].desc == -1) ? dir1 : dir0;
640 message ("Only in %s: %s\n", dir, name0);
641 val = 1;
642 }
643 }
644 else
645 {
646 /* We have a subdirectory in one directory
647 and a file in the other. */
648
649 if (inf[0].dir_p)
650 message ("%s is a directory but %s is not\n",
651 inf[0].name, inf[1].name);
652 else
653 message ("%s is a directory but %s is not\n",
654 inf[1].name, inf[0].name);
655 /* This is a difference. */
656 val = 1;
657 }
658 }
659 else
660 {
661
662 /* Both exist and both are ordinary files. */
663
664 val = diff_2_files (inf, depth);
665
666 }
667
668 /* Now the comparison has been done, if no error prevented it,
669 and VAL is the value this function will return. */
670
671 if (inf[0].desc >= 0)
672 close (inf[0].desc);
673 if (inf[1].desc >= 0)
674 close (inf[1].desc);
675
676 done:
677 if (val == 0 && !inf[0].dir_p)
678 {
679 if (print_file_same_flag)
680 message ("Files %s and %s are identical\n",
681 inf[0].name, inf[1].name);
682 }
683 else
684 fflush (stdout);
685
686 if (dir0 != 0)
687 free (inf[0].name);
688 if (dir1 != 0)
689 free (inf[1].name);
690
691 return val;
692}