c7b6507cdc67aa76fa7b39d56732d0ff93c3018e
* Copyright (c) 1991 The Regents of the University of California.
* This code is derived from software contributed to Berkeley by
* Steve Hayman of the Computer Science Department, Indiana University,
* Michiro Hikida and David Goodenough.
* %sccs.include.redist.c%
"@(#) Copyright (c) 1991 The Regents of the University of California.\n\
static char sccsid
[] = "@(#)join.c 5.2 (Berkeley) %G%";
* There's a structure per input file which encapsulates the state of the
* file. We repeatedly read lines from each file until we've read in all
* the consecutive lines from the file with a common join field. Then we
* compare the set of lines with an equivalent set from the other file.
u_long linealloc
; /* line allocated count */
char **fields
; /* line field(s) */
u_long fieldcnt
; /* line field(s) count */
u_long fieldalloc
; /* line field(s) allocated count */
FILE *fp
; /* file descriptor */
u_long joinf
; /* join field (-1, -2, -j) */
int unpair
; /* output unpairable lines (-a) */
int number
; /* 1 for file 1, 2 for file 2 */
LINE
*set
; /* set of lines with same field */
u_long pushback
; /* line on the stack */
u_long setcnt
; /* set count */
u_long setalloc
; /* set allocated count */
INPUT input1
= { NULL
, 0, 0, 1, NULL
, -1, 0, 0, },
input2
= { NULL
, 0, 0, 1, NULL
, -1, 0, 0, };
u_long fileno
; /* file number */
u_long fieldno
; /* field number */
OLIST
*olist
; /* output field list */
u_long olistcnt
; /* output field list count */
u_long olistalloc
; /* output field allocated count */
int joinout
= 1; /* show lines with matched join fields (-v) */
int needsep
; /* need separator character */
int showusage
= 1; /* show usage for usage err() calls */
int spans
= 1; /* span multiple delimiters (-t) */
char *empty
; /* empty field replacement string (-e) */
char *tabchar
= " \t"; /* delimiter characters (-t) */
int cmp
__P((LINE
*, u_long
, LINE
*, u_long
));
void err
__P((const char *, ...));
void fieldarg
__P((char *));
void joinlines
__P((INPUT
*, INPUT
*));
void obsolete
__P((char **));
void outfield
__P((LINE
*, u_long
));
void outoneline
__P((INPUT
*, LINE
*));
void outtwoline
__P((INPUT
*, LINE
*, INPUT
*, LINE
*));
void slurp
__P((INPUT
*));
int aflag
, ch
, cval
, vflag
;
while ((ch
= getopt(argc
, argv
, "\01a:e:j:1:2:o:t:v:")) != EOF
) {
F1
->unpair
= F2
->unpair
= 1;
if ((F1
->joinf
= strtol(optarg
, &end
, 10)) < 1)
err("-1 option field number less than 1");
err("illegal field number -- %s", optarg
);
if ((F2
->joinf
= strtol(optarg
, &end
, 10)) < 1)
err("-2 option field number less than 1");
err("illegal field number -- %s", optarg
);
switch(strtol(optarg
, &end
, 10)) {
err("-a option file number not 1 or 2");
err("illegal file number -- %s", optarg
);
if ((F1
->joinf
= F2
->joinf
=
strtol(optarg
, &end
, 10)) < 1)
err("-j option field number less than 1");
err("illegal field number -- %s", optarg
);
if (strlen(tabchar
= optarg
) != 1)
err("illegal tab character specification");
switch(strtol(optarg
, &end
, 10)) {
err("-v option file number not 1 or 2");
err("illegal file number -- %s", optarg
);
err("-a and -v options mutually exclusive");
/* Open the files; "-" means stdin. */
else if ((F1
->fp
= fopen(*argv
, "r")) == NULL
)
err("%s: %s", *argv
, strerror(errno
));
else if ((F2
->fp
= fopen(*argv
, "r")) == NULL
)
err("%s: %s", *argv
, strerror(errno
));
if (F1
->fp
== stdin
&& F2
->fp
== stdin
)
err("only one input file may be stdin");
while (F1
->setcnt
&& F2
->setcnt
) {
cval
= cmp(F1
->set
, F1
->joinf
, F2
->set
, F2
->joinf
);
/* Oh joy, oh rapture, oh beauty divine! */
/* File 1 takes the lead... */
/* File 2 takes the lead... */
* Now that one of the files is used up, optionally output any
* remaining lines from the other file.
register LINE
*lp
, *lastlp
;
char *bp
, *fieldp
, *token
;
* Read all of the lines from an input file that have the same
for (lastlp
= NULL
;; ++F
->setcnt
, lastlp
= lp
) {
* If we're out of space to hold line structures, allocate
* more. Initialize the structure so that we know that this
if (F
->setcnt
== F
->setalloc
) {
if ((F
->set
= realloc(F
->set
,
F
->setalloc
* sizeof(LINE
))) == NULL
)
bzero(F
->set
+ cnt
, 100 * sizeof(LINE
*));
* Get any pushed back line, else get the next line. Allocate
* space as necessary. If taking the line from the stack swap
* the two structures so that we don't lose the allocated space.
* This could be avoided by doing another level of indirection,
* but it's probably okay as is.
F
->set
[F
->setcnt
] = F
->set
[F
->pushback
];
F
->set
[F
->pushback
] = tmp
;
if ((bp
= fgetline(F
->fp
, &len
)) == NULL
)
if (lp
->linealloc
<= len
) {
if ((lp
->line
= realloc(lp
->line
,
lp
->linealloc
* sizeof(char))) == NULL
)
bcopy(bp
, lp
->line
, len
);
/* Split the line into fields, allocate space as necessary. */
while ((fieldp
= strsep(&token
, tabchar
)) != NULL
) {
if (spans
&& *fieldp
== '\0')
if (lp
->fieldcnt
== lp
->fieldalloc
) {
if ((lp
->fields
= realloc(lp
->fields
,
lp
->fieldalloc
* sizeof(char *))) == NULL
)
lp
->fields
[lp
->fieldcnt
++] = fieldp
;
/* See if the join field value has changed. */
if (lastlp
!= NULL
&& cmp(lp
, F
->joinf
, lastlp
, F
->joinf
)) {
cmp(lp1
, fieldno1
, lp2
, fieldno2
)
u_long fieldno1
, fieldno2
;
if (lp1
->fieldcnt
< fieldno1
)
return (lp2
->fieldcnt
< fieldno2
? 0 : 1);
if (lp2
->fieldcnt
< fieldno2
)
return (strcmp(lp1
->fields
[fieldno1
], lp2
->fields
[fieldno2
]));
* Output the results of a join comparison. The output may be from
* either file 1 or file 2 (in which case the first argument is the
* file from which to output) or from both.
for (cnt1
= 0; cnt1
< F1
->setcnt
; ++cnt1
)
outoneline(F1
, &F1
->set
[cnt1
]);
for (cnt1
= 0; cnt1
< F1
->setcnt
; ++cnt1
)
for (cnt2
= 0; cnt2
< F2
->setcnt
; ++cnt2
)
outtwoline(F1
, &F1
->set
[cnt1
], F2
, &F2
->set
[cnt2
]);
* Output a single line from one of the files, according to the
* join rules. This happens when we are writing unmatched single
* lines. Output empty fields in the right places.
for (cnt
= 0; cnt
< olistcnt
; ++cnt
) {
if (olist
[cnt
].fileno
== F
->number
)
outfield(lp
, olist
[cnt
].fieldno
);
for (cnt
= 0; cnt
< lp
->fieldcnt
; ++cnt
)
err("stdout: %s", strerror(errno
));
outtwoline(F1
, lp1
, F2
, lp2
)
register LINE
*lp1
, *lp2
;
/* Output a pair of lines according to the join list (if any). */
for (cnt
= 0; cnt
< olistcnt
; ++cnt
)
if (olist
[cnt
].fileno
== 1)
outfield(lp1
, olist
[cnt
].fieldno
);
else /* if (olist[cnt].fileno == 2) */
outfield(lp2
, olist
[cnt
].fieldno
);
* Output the join field, then the remaining fields from F1
outfield(lp1
, F1
->joinf
);
for (cnt
= 0; cnt
< lp1
->fieldcnt
; ++cnt
)
for (cnt
= 0; cnt
< lp2
->fieldcnt
; ++cnt
)
err("stdout: %s", strerror(errno
));
(void)printf("%c", *tabchar
);
if (lp
->fieldcnt
< fieldno
) {
(void)printf("%s", empty
);
if (*lp
->fields
[fieldno
] == '\0')
(void)printf("%s", lp
->fields
[fieldno
]);
err("stdout: %s", strerror(errno
));
* Convert an output list argument "2.1, 1.3, 2.4" into an array of output
while ((token
= strsep(&option
, " \t")) != NULL
) {
if (token
[0] != '1' && token
[0] != '2' || token
[1] != '.')
err("malformed -o option field");
fieldno
= strtol(token
+ 2, &end
, 10);
err("malformed -o option field");
err("field numbers are 1 based");
if (olistcnt
== olistalloc
) {
if ((olist
= realloc(olist
,
olistalloc
* sizeof(OLIST
))) == NULL
)
olist
[olistcnt
].fileno
= token
[0] - '0';
olist
[olistcnt
].fieldno
= fieldno
- 1;
if (ap
[0] == '-' && ap
[1] == '-')
* The original join allowed "-a", which meant the
* same as -a1 plus -a2. POSIX 1003.2, Draft 11.2
* only specifies this as "-a 1" and "a -2", so we
* have to use another option flag, one that is
* unlikely to ever be used or accidentally entered
* on the command line. (Well, we could reallocate
* the argv array, but that hardly seems worthwhile.)
* The original join allowed "-j[12] arg" and "-j arg".
* Convert the former to "-[12] arg". Don't convert
* the latter since getopt(3) can handle it.
jbad
: err("illegal option -- %s", ap
);
* The original join allowed "-o arg arg". Convert to
for (p
= argv
+ 2; *p
; ++p
) {
if (p
[0][0] != '1' && p
[0][0] != '2' ||
if (len
- 2 != strspn(*p
+ 2, "0123456789"))
if ((t
= malloc(len
+ 3)) == NULL
)
bcopy(*p
, t
+ 2, len
+ 1);
err("%s", strerror(errno
));
(void)fprintf(stderr
, "%s%s\n",
"usage: join [-a fileno | -v fileno ] [-e string] [-1 field] ",
"[-2 field]\n [-o list] [-t char] file1 file2");
err(const char *fmt
, ...)
(void)fprintf(stderr
, "join: ");
(void)vfprintf(stderr
, fmt
, ap
);
(void)fprintf(stderr
, "\n");