/* join - join lines of two files on a common field
Copyright (C) 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
Written by Mike Haertel, mike@gnu.ai.mit.edu. */
#define isblank(c) ((c) == ' ' || (c) == '\t')
#define ISSPACE(c) (isascii(c) && isspace(c))
#define ISDIGIT(c) (isascii(c) && isdigit(c))
#define ISSPACE(c) isspace(c)
#define ISDIGIT(c) isdigit(c)
void error (int, int, char *, ...);
#define min(A, B) ((A) < (B) ? (A) : (B))
/* An element of the list describing the format of each
int file
; /* File to take field from (1 or 2). */
int field
; /* Field number to print. */
char *beg
; /* First character in field. */
char *lim
; /* Character after last character in field. */
/* A line read from an input file. Newlines are not stored. */
char *beg
; /* First character in line. */
char *lim
; /* Character after last character in line. */
int nfields
; /* Number of elements in `fields'. */
/* One or more consecutive lines read from a file that all have the
same join field value. */
int count
; /* Elements used in `lines'. */
int alloc
; /* Elements allocated in `lines'. */
/* If nonzero, print unpairable lines in file 1 or 2. */
static int print_unpairables_1
, print_unpairables_2
;
/* If nonzero, print pairable lines. */
static int print_pairables
;
/* Empty output field filler. */
static char *empty_filler
;
static int join_field_1
, join_field_2
;
/* List of fields to print. */
/* Last element in `outlist', where a new element can be added. */
struct outlist
*outlist_end
;
/* Tab character separating fields; if this is NUL fields are separated
by any nonempty string of white space, otherwise by exactly one
/* The name this program was run with. */
/* Fill in the `fields' structure in LINE. */
register char *ptr
, *lim
;
line
->fields
= (struct field
*) malloc (nfields
* sizeof (struct field
));
for (i
= 0; ptr
< lim
; ++i
)
line
->fields
= (struct field
*)
realloc ((char *) line
->fields
, nfields
* sizeof (struct field
));
line
->fields
[i
].beg
= ptr
;
while (ptr
< lim
&& *ptr
!= tab
)
line
->fields
[i
].lim
= ptr
;
line
->fields
[i
].beg
= ptr
;
while (ptr
< lim
&& !ISSPACE (*ptr
))
line
->fields
[i
].lim
= ptr
;
while (ptr
< lim
&& ISSPACE (*ptr
))
/* Read a line from FP into LINE and split it into fields.
Return 0 if EOF, 1 otherwise. */
static int linesize
= 80;
for (i
= 0; (c
= getc (fp
)) != EOF
&& c
!= '\n'; ++i
)
ptr
= realloc (ptr
, linesize
);
line
->lim
= line
->beg
+ i
;
free ((char *) line
->fields
);
seq
->lines
= (struct line
*) malloc (seq
->alloc
* sizeof (struct line
));
/* Read a line from FP and add it to SEQ. Return 0 if EOF, 1 otherwise. */
if (seq
->count
== seq
->alloc
)
seq
->lines
= (struct line
*)
realloc ((char *) seq
->lines
, seq
->alloc
* sizeof (struct line
));
if (get_line (fp
, &seq
->lines
[seq
->count
]))
free ((char *) seq
->lines
);
/* Return <0 if the join field in LINE1 compares less than the one in LINE2;
>0 if it compares greater; 0 if it compares equal. */
char *beg1
, *beg2
; /* Start of field to compare in each file. */
int len1
, len2
; /* Length of fields to compare. */
if (join_field_1
< line1
->nfields
)
beg1
= line1
->fields
[join_field_1
].beg
;
len1
= line1
->fields
[join_field_1
].lim
- line1
->fields
[join_field_1
].beg
;
if (join_field_2
< line2
->nfields
)
beg2
= line2
->fields
[join_field_2
].beg
;
len2
= line2
->fields
[join_field_2
].lim
- line2
->fields
[join_field_2
].beg
;
return len2
== 0 ? 0 : -1;
diff
= memcmp (beg1
, beg2
, min (len1
, len2
));
/* Print field N of LINE if it exists and is nonempty, otherwise
`empty_filler' if it is nonempty. */
len
= line
->fields
[n
].lim
- line
->fields
[n
].beg
;
fwrite (line
->fields
[n
].beg
, 1, len
, stdout
);
fputs (empty_filler
, stdout
);
fputs (empty_filler
, stdout
);
/* Print LINE, with its fields separated by `tab'. */
for (i
= 0; i
< line
->nfields
; ++i
)
if (i
== line
->nfields
- 1)
putchar (tab
? tab
: ' ');
/* Print the join of LINE1 and LINE2. */
prfield (outlist
->field
- 1, outlist
->file
== 1 ? line1
: line2
);
for (o
= outlist
->next
; o
; o
= o
->next
)
putchar (tab
? tab
: ' ');
prfield (o
->field
- 1, o
->file
== 1 ? line1
: line2
);
prfield (join_field_1
, line1
);
for (i
= 0; i
< join_field_1
&& i
< line1
->nfields
; ++i
)
putchar (tab
? tab
: ' ');
for (i
= join_field_1
+ 1; i
< line1
->nfields
; ++i
)
putchar (tab
? tab
: ' ');
for (i
= 0; i
< join_field_2
&& i
< line2
->nfields
; ++i
)
putchar (tab
? tab
: ' ');
for (i
= join_field_2
+ 1; i
< line2
->nfields
; ++i
)
putchar (tab
? tab
: ' ');
/* Print the join of the files in FP1 and FP2. */
int diff
, i
, j
, eof1
, eof2
;
/* Read the first line of each file. */
while (seq1
.count
&& seq2
.count
)
diff
= keycmp (&seq1
.lines
[0], &seq2
.lines
[0]);
freeline (&seq1
.lines
[0]);
freeline (&seq2
.lines
[0]);
/* Keep reading lines from file1 as long as they continue to
match the current line from file2. */
if (!getseq (fp1
, &seq1
))
while (!keycmp (&seq1
.lines
[seq1
.count
- 1], &seq2
.lines
[0]));
/* Keep reading lines from file2 as long as they continue to
match the current line from file1. */
if (!getseq (fp2
, &seq2
))
while (!keycmp (&seq1
.lines
[0], &seq2
.lines
[seq2
.count
- 1]));
for (i
= 0; i
< seq1
.count
- 1; ++i
)
for (j
= 0; j
< seq2
.count
- 1; ++j
)
prjoin (&seq1
.lines
[i
], &seq2
.lines
[j
]);
for (i
= 0; i
< seq1
.count
- 1; ++i
)
freeline (&seq1
.lines
[i
]);
seq1
.lines
[0] = seq1
.lines
[seq1
.count
- 1];
for (i
= 0; i
< seq2
.count
- 1; ++i
)
freeline (&seq2
.lines
[i
]);
seq2
.lines
[0] = seq2
.lines
[seq2
.count
- 1];
if (print_unpairables_1
&& seq1
.count
)
freeline (&seq1
.lines
[0]);
while (get_line (fp1
, &line
))
if (print_unpairables_2
&& seq2
.count
)
freeline (&seq2
.lines
[0]);
while (get_line (fp2
, &line
))
/* Add a field spec for field FIELD of file FILE to `outlist' and return 1,
unless either argument is invalid; then just return 0. */
if (file
< 1 || file
> 2 || field
< 1)
o
= (struct outlist
*) malloc (sizeof (struct outlist
));
/* Add to the end of the list so the fields are in the right order. */
/* Add the comma or blank separated field spec(s) in STR to `outlist'.
Return the number of fields added. */
int file
= -1, field
= -1;
if (*str
== ',' || isblank (*str
))
added
+= add_field (file
, field
);
file
= file
* 10 + *str
- '0';
field
= field
* 10 + *str
- '0';
added
+= add_field (file
, field
);
/* When using getopt_long_only, no long option can start with
a character that is a short option. */
static struct option longopts
[] =
int optc
, prev_optc
= 0, nfiles
, val
;
while ((optc
= getopt_long_only (argc
, argv
, "-a:e:1:2:o:t:v:", longopts
,
error (2, 0, "invalid file number for `-a'");
error (2, 0, "invalid field number for `-1'");
error (2, 0, "invalid field number for `-2'");
error (2, 0, "invalid field number for `-j'");
join_field_1
= join_field_2
= val
- 1;
if (add_field_list (optarg
) == 0)
error (2, 0, "invalid field list for `-o'");
error (2, 0, "invalid file number for `-v'");
case 1: /* Non-option argument. */
/* Might be continuation of args to -o. */
if (add_field_list (optarg
) > 0)
continue; /* Don't change `prev_optc'. */
names
[nfiles
++] = optarg
;
fp1
= strcmp (names
[0], "-") ? fopen (names
[0], "r") : stdin
;
error (1, errno
, "%s", names
[0]);
fp2
= strcmp (names
[1], "-") ? fopen (names
[1], "r") : stdin
;
error (1, errno
, "%s", names
[1]);
error (1, errno
, "both files cannot be standard input");
if ((fp1
== stdin
|| fp2
== stdin
) && fclose (stdin
) == EOF
)
if (ferror (stdout
) || fclose (stdout
) == EOF
)
error (1, 0, "write error");
Usage: %s [-a 1|2] [-v 1|2] [-e empty-string] [-o field-list...] [-t char]\n\
[-j[1|2] field] [-1 field] [-2 field] file1 file2\n",
/* error.c -- error handler for noninteractive utilities
Copyright (C) 1990, 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#define VA_START(args, lastarg) va_start(args, lastarg)
/* Print the program name and error message MESSAGE, which is a printf-style
format string with optional args.
If ERRNUM is nonzero, print its corresponding system error message.
Exit with status STATUS if it is nonzero. */
error (int status
, int errnum
, char *message
, ...)
extern char *program_name
;
fprintf (stderr
, "%s: ", program_name
);
va_start (args
, message
);
vfprintf (stderr
, message
, args
);
fprintf (stderr
, ": %s", strerror (errnum
));