386BSD 0.1 development
[unix-history] / usr / src / usr.bin / diff / diff.c
/* GNU DIFF main routine.
Copyright (C) 1988, 1989 Free Software Foundation, Inc.
This file is part of GNU DIFF.
GNU DIFF is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 1, or (at your option)
any later version.
GNU DIFF is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU DIFF; see the file COPYING. If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
/* GNU DIFF was written by Mike Haertel, David Hayes,
Richard Stallman and Len Tower. */
#define GDIFF_MAIN
#include "regex.h"
#include "diff.h"
#include "getopt.h"
/* Nonzero for -r: if comparing two directories,
compare their common subdirectories recursively. */
int recursive;
/* For debugging: don't do discard_confusing_lines. */
int no_discards;
/* Return a string containing the command options with which diff was invoked.
Spaces appear between what were separate ARGV-elements.
There is a space at the beginning but none at the end.
If there were no options, the result is an empty string.
Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
the length of that vector. */
static char *
option_list (optionvec, count)
char **optionvec; /* Was `vector', but that collides on Alliant. */
int count;
{
int i;
int length = 0;
char *result;
for (i = 0; i < count; i++)
length += strlen (optionvec[i]) + 1;
result = (char *) xmalloc (length + 1);
result[0] = 0;
for (i = 0; i < count; i++)
{
strcat (result, " ");
strcat (result, optionvec[i]);
}
return result;
}
\f
/* The numbers 129 and 130 that appear in the fourth element
for the context and unidiff entries are used as a way of
telling the big switch in `main' how to process those options. */
static struct option longopts[] =
{
{"ignore-blank-lines", 0, 0, 'B'},
{"context", 2, 0, 129},
{"ifdef", 1, 0, 'D'},
{"show-function-line", 1, 0, 'F'},
{"speed-large-files", 0, 0, 'H'},
{"ignore-matching-lines", 1, 0, 'I'},
{"file-label", 1, 0, 'L'},
{"entire-new-files", 0, 0, 'N'},
{"new-files", 0, 0, 'N'},
{"starting-file", 1, 0, 'S'},
{"initial-tab", 0, 0, 'T'},
{"text", 0, 0, 'a'},
{"all-text", 0, 0, 'a'},
{"ascii", 0, 0, 'a'},
{"ignore-space-change", 0, 0, 'b'},
{"minimal", 0, 0, 'd'},
{"ed", 0, 0, 'e'},
{"reversed-ed", 0, 0, 'f'},
{"ignore-case", 0, 0, 'i'},
{"print", 0, 0, 'l'},
{"rcs", 0, 0, 'n'},
{"show-c-function", 0, 0, 'p'},
{"binary", 0, 0, 'q'},
{"brief", 0, 0, 'q'},
{"recursive", 0, 0, 'r'},
{"report-identical-files", 0, 0, 's'},
{"expand-tabs", 0, 0, 't'},
{"ignore-all-space", 0, 0, 'w'},
{"unified", 2, 0, 130},
{"version", 0, 0, 'v'},
{0, 0, 0, 0}
};
main (argc, argv)
int argc;
char *argv[];
{
int val;
int c;
int prev = -1;
int longind;
extern char *version_string;
program = argv[0];
/* Do our initializations. */
output_style = OUTPUT_NORMAL;
always_text_flag = FALSE;
ignore_space_change_flag = FALSE;
ignore_all_space_flag = FALSE;
length_varies = FALSE;
ignore_case_flag = FALSE;
ignore_blank_lines_flag = FALSE;
ignore_regexp = 0;
function_regexp = 0;
print_file_same_flag = FALSE;
entire_new_file_flag = FALSE;
no_details_flag = FALSE;
context = -1;
line_end_char = '\n';
tab_align_flag = FALSE;
tab_expand_flag = FALSE;
recursive = FALSE;
paginate_flag = FALSE;
ifdef_string = NULL;
heuristic = FALSE;
dir_start_file = NULL;
msg_chain = NULL;
msg_chain_end = NULL;
no_discards = 0;
/* Decode the options. */
while ((c = getopt_long (argc, argv,
"0123456789abBcC:dD:efF:hHiI:lL:nNpqrsS:tTuvw",
longopts, &longind)) != EOF)
{
if (c == 0) /* Long option. */
c = longopts[longind].val;
switch (c)
{
/* All digits combine in decimal to specify the context-size. */
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '0':
if (context == -1)
context = 0;
/* If a context length has already been specified,
more digits allowed only if they follow right after the others.
Reject two separate runs of digits, or digits after -C. */
else if (prev < '0' || prev > '9')
fatal ("context length specified twice");
context = context * 10 + c - '0';
break;
case 'a':
/* Treat all files as text files; never treat as binary. */
always_text_flag = 1;
break;
case 'b':
/* Ignore changes in amount of whitespace. */
ignore_space_change_flag = 1;
length_varies = 1;
break;
case 'B':
/* Ignore changes affecting only blank lines. */
ignore_blank_lines_flag = 1;
break;
case 'C':
case 129: /* +context[=lines] */
case 130: /* +unified[=lines] */
if (optarg)
{
if (context >= 0)
fatal ("context length specified twice");
{
char *p;
for (p = optarg; *p; p++)
if (*p < '0' || *p > '9')
fatal ("invalid context length argument");
}
context = atoi (optarg);
}
/* Falls through. */
case 'c':
/* Make context-style output. */
specify_style (c == 130 ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
break;
case 'd':
/* Don't discard lines. This makes things slower (sometimes much
slower) but will find a guaranteed minimal set of changes. */
no_discards = 1;
break;
case 'D':
/* Make merged #ifdef output. */
specify_style (OUTPUT_IFDEF);
ifdef_string = optarg;
break;
case 'e':
/* Make output that is a valid `ed' script. */
specify_style (OUTPUT_ED);
break;
case 'f':
/* Make output that looks vaguely like an `ed' script
but has changes in the order they appear in the file. */
specify_style (OUTPUT_FORWARD_ED);
break;
case 'F':
/* Show, for each set of changes, the previous line that
matches the specified regexp. Currently affects only
context-style output. */
function_regexp = optarg;
break;
case 'h':
/* Split the files into chunks of around 1500 lines
for faster processing. Usually does not change the result.
This currently has no effect. */
break;
case 'H':
/* Turn on heuristics that speed processing of large files
with a small density of changes. */
heuristic = 1;
break;
case 'i':
/* Ignore changes in case. */
ignore_case_flag = 1;
break;
case 'I':
/* Ignore changes affecting only lines that match the
specified regexp. */
ignore_regexp = optarg;
break;
case 'l':
/* Pass the output through `pr' to paginate it. */
paginate_flag = 1;
break;
case 'L':
/* Specify file labels for `-c' output headers. */
if (!file_label[0])
file_label[0] = optarg;
else if (!file_label[1])
file_label[1] = optarg;
else
fatal ("too many file label options");
break;
case 'n':
/* Output RCS-style diffs, like `-f' except that each command
specifies the number of lines affected. */
specify_style (OUTPUT_RCS);
break;
case 'N':
/* When comparing directories, if a file appears only in one
directory, treat it as present but empty in the other. */
entire_new_file_flag = 1;
break;
case 'p':
/* Make context-style output and show name of last C function. */
specify_style (OUTPUT_CONTEXT);
function_regexp = "^[_a-zA-Z]";
break;
case 'q':
no_details_flag = 1;
break;
case 'r':
/* When comparing directories,
recursively compare any subdirectories found. */
recursive = 1;
break;
case 's':
/* Print a message if the files are the same. */
print_file_same_flag = 1;
break;
case 'S':
/* When comparing directories, start with the specified
file name. This is used for resuming an aborted comparison. */
dir_start_file = optarg;
break;
case 't':
/* Expand tabs to spaces in the output so that it preserves
the alignment of the input files. */
tab_expand_flag = 1;
break;
case 'T':
/* Use a tab in the output, rather than a space, before the
text of an input line, so as to keep the proper alignment
in the input line without changing the characters in it. */
tab_align_flag = 1;
break;
case 'v':
printf ("GNU diff version %s\n", version_string);
break;
case 'u':
/* Output the context diff in unidiff format. */
specify_style (OUTPUT_UNIFIED);
break;
case 'w':
/* Ignore horizontal whitespace when comparing lines. */
ignore_all_space_flag = 1;
length_varies = 1;
break;
default:
usage ();
}
prev = c;
}
if (optind != argc - 2)
usage ();
if (ignore_regexp)
{
char *val;
bzero (&ignore_regexp_compiled, sizeof ignore_regexp_compiled);
val = re_compile_pattern (ignore_regexp, strlen (ignore_regexp),
&ignore_regexp_compiled);
if (val != 0)
error ("%s: %s", ignore_regexp, val);
ignore_regexp_compiled.fastmap = (char *) xmalloc (256);
}
if (function_regexp)
{
char *val;
bzero (&function_regexp_compiled, sizeof function_regexp_compiled);
val = re_compile_pattern (function_regexp, strlen (function_regexp),
&function_regexp_compiled);
if (val != 0)
error ("%s: %s", function_regexp, val);
function_regexp_compiled.fastmap = (char *) xmalloc (256);
}
if (output_style != OUTPUT_CONTEXT && output_style != OUTPUT_UNIFIED)
context = 0;
else if (context == -1)
/* Default amount of context for -c. */
context = 3;
switch_string = option_list (argv + 1, optind - 1);
val = compare_files (0, argv[optind], 0, argv[optind + 1], 0);
/* Print any messages that were saved up for last. */
print_message_queue ();
if (ferror (stdout) || fclose (stdout) != 0)
fatal ("write error");
exit (val);
}
usage ()
{
fprintf (stderr, "\
Usage: diff [-#] [-abBcdefhHilnNprstTuvw] [-C lines] [-F regexp] [-I regexp]\n\
[-L label [-L label]] [-S file] [-D symbol] [+ignore-blank-lines]\n\
[+context[=lines]] [+unified[=lines]] [+ifdef=symbol]\n\
[+show-function-line=regexp]\n");
fprintf (stderr, "\
[+speed-large-files] [+ignore-matching-lines=regexp] [+new-file]\n\
[+initial-tab] [+starting-file=file] [+text] [+all-text] [+ascii]\n\
[+minimal] [+ignore-space-change] [+ed] [+reversed-ed] [+ignore-case]\n");
fprintf (stderr, "\
[+print] [+rcs] [+show-c-function] [+binary] [+brief] [+recursive]\n\
[+report-identical-files] [+expand-tabs] [+ignore-all-space]\n\
[+file-label=label [+file-label=label]] [+version] path1 path2\n");
exit (2);
}
specify_style (style)
enum output_style style;
{
if (output_style != OUTPUT_NORMAL
&& output_style != style)
error ("conflicting specifications of output style");
output_style = style;
}
\f
/* Compare two files (or dirs) with specified names
DIR0/NAME0 and DIR1/NAME1, at level DEPTH in directory recursion.
(if DIR0 is 0, then the name is just NAME0, etc.)
This is self-contained; it opens the files and closes them.
Value is 0 if files are identical, 1 if different,
2 if there is a problem opening them. */
int
compare_files (dir0, name0, dir1, name1, depth)
char *dir0, *dir1;
char *name0, *name1;
int depth;
{
static char Standard_Input[] = "Standard Input";
struct file_data inf[2];
register int i;
int val;
int errorcount = 0;
int stat_result[2];
/* If this is directory comparison, perhaps we have a file
that exists only in one of the directories.
If so, just print a message to that effect. */
if (! entire_new_file_flag && (name0 == 0 || name1 == 0))
{
char *name = name0 == 0 ? name1 : name0;
char *dir = name0 == 0 ? dir1 : dir0;
message ("Only in %s: %s\n", dir, name);
/* Return 1 so that diff_dirs will return 1 ("some files differ"). */
return 1;
}
/* Mark any nonexistent file with -1 in the desc field. */
/* Mark unopened files (i.e. directories) with -2. */
inf[0].desc = name0 == 0 ? -1 : -2;
inf[1].desc = name1 == 0 ? -1 : -2;
/* Now record the full name of each file, including nonexistent ones. */
if (name0 == 0)
name0 = name1;
if (name1 == 0)
name1 = name0;
inf[0].name = dir0 == 0 ? name0 : concat (dir0, "/", name0);
inf[1].name = dir1 == 0 ? name1 : concat (dir1, "/", name1);
/* Stat the files. Record whether they are directories.
Record in stat_result whether stat fails. */
for (i = 0; i <= 1; i++)
{
bzero (&inf[i].stat, sizeof(struct stat));
inf[i].dir_p = 0;
stat_result[i] = 0;
if (inf[i].desc != -1)
{
char *filename = inf[i].name;
stat_result[i] =
strcmp (filename, "-")
? stat (filename, &inf[i].stat)
: fstat (0, &inf[i].stat);
if (stat_result[i] < 0)
{
perror_with_name (filename);
errorcount = 1;
}
else
inf[i].dir_p =
S_IFDIR == (inf[i].stat.st_mode & S_IFMT)
&& strcmp (filename, "-");
}
}
/* See if the two named files are actually the same physical file.
If so, we know they are identical without actually reading them. */
if (output_style != OUTPUT_IFDEF
&& inf[0].stat.st_ino == inf[1].stat.st_ino
&& inf[0].stat.st_dev == inf[1].stat.st_dev
&& stat_result[0] == 0
&& stat_result[1] == 0)
{
val = 0;
goto done;
}
if (name0 == 0)
inf[0].dir_p = inf[1].dir_p;
if (name1 == 0)
inf[1].dir_p = inf[0].dir_p;
/* Open the files and record their descriptors. */
for (i = 0; i <= 1; i++)
{
if (inf[i].desc == -1)
;
else if (!strcmp (inf[i].name, "-"))
{
inf[i].desc = 0;
inf[i].name = Standard_Input;
}
/* Don't bother opening if stat already failed. */
else if (stat_result[i] == 0 && ! inf[i].dir_p)
{
char *filename = inf[i].name;
inf[i].desc = open (filename, O_RDONLY, 0);
if (0 > inf[i].desc)
{
perror_with_name (filename);
errorcount = 1;
}
}
}
if (errorcount)
{
/* If either file should exist but fails to be opened, return 2. */
val = 2;
}
else if (inf[0].dir_p && inf[1].dir_p)
{
if (output_style == OUTPUT_IFDEF)
fatal ("-D option not supported with directories");
/* If both are directories, compare the files in them. */
if (depth > 0 && !recursive)
{
/* But don't compare dir contents one level down
unless -r was specified. */
message ("Common subdirectories: %s and %s\n",
inf[0].name, inf[1].name);
val = 0;
}
else
{
val = diff_dirs (inf[0].name, inf[1].name,
compare_files, depth, 0, 0);
}
}
else if (depth == 0 && (inf[0].dir_p || inf[1].dir_p))
{
/* If only one is a directory, and it was specified in the command line,
use the file in that dir whose basename matches the other file. */
int dir_arg = (inf[0].dir_p ? 0 : 1);
int fnm_arg = (inf[0].dir_p ? 1 : 0);
char *p = rindex (inf[fnm_arg].name, '/');
char *filename = concat (inf[dir_arg].name, "/",
(p ? p+1 : inf[fnm_arg].name));
if (inf[fnm_arg].name == Standard_Input)
fatal ("can't compare - to a directory");
inf[dir_arg].desc = open (filename, O_RDONLY, 0);
if (0 > inf[dir_arg].desc)
{
perror_with_name (filename);
val = 2;
}
else
{
/* JF: patch from the net to check and make sure we can really free
this. If it's from argv[], freeing it is a *really* bad idea */
if (0 != (dir_arg ? dir1 : dir0))
free (inf[dir_arg].name);
inf[dir_arg].name = filename;
if (fstat (inf[dir_arg].desc, &inf[dir_arg].stat) < 0)
pfatal_with_name (inf[dir_arg].name);
inf[dir_arg].dir_p
= (S_IFDIR == (inf[dir_arg].stat.st_mode & S_IFMT));
if (inf[dir_arg].dir_p)
{
error ("%s is a directory but %s is not",
inf[dir_arg].name, inf[fnm_arg].name);
val = 1;
}
else
val = diff_2_files (inf, depth);
}
}
else if (depth > 0 && (inf[0].dir_p || inf[1].dir_p))
{
/* Perhaps we have a subdirectory that exists only in one directory.
If so, just print a message to that effect. */
if (inf[0].desc == -1 || inf[1].desc == -1)
{
if (entire_new_file_flag && recursive)
val = diff_dirs (inf[0].name, inf[1].name, compare_files, depth,
inf[0].desc == -1, inf[1].desc == -1);
else
{
char *dir = (inf[0].desc == -1) ? dir1 : dir0;
message ("Only in %s: %s\n", dir, name0);
val = 1;
}
}
else
{
/* We have a subdirectory in one directory
and a file in the other. */
if (inf[0].dir_p)
message ("%s is a directory but %s is not\n",
inf[0].name, inf[1].name);
else
message ("%s is a directory but %s is not\n",
inf[1].name, inf[0].name);
/* This is a difference. */
val = 1;
}
}
else
{
/* Both exist and both are ordinary files. */
val = diff_2_files (inf, depth);
}
/* Now the comparison has been done, if no error prevented it,
and VAL is the value this function will return. */
if (inf[0].desc >= 0)
close (inf[0].desc);
if (inf[1].desc >= 0)
close (inf[1].desc);
done:
if (val == 0 && !inf[0].dir_p)
{
if (print_file_same_flag)
message ("Files %s and %s are identical\n",
inf[0].name, inf[1].name);
}
else
fflush (stdout);
if (dir0 != 0)
free (inf[0].name);
if (dir1 != 0)
free (inf[1].name);
return val;
}