Commit | Line | Data |
---|---|---|
3922209e BJ |
1 | static char *sccsid = "@(#)join.c 4.1 (Berkeley) %G%"; |
2 | /* join F1 F2 on stuff */ | |
3 | ||
4 | #include <stdio.h> | |
5 | #define F1 0 | |
6 | #define F2 1 | |
7 | #define NFLD 20 /* max field per line */ | |
8 | #define comp() cmp(ppi[F1][j1],ppi[F2][j2]) | |
9 | ||
10 | FILE *f[2]; | |
11 | char buf[2][BUFSIZ]; /*input lines */ | |
12 | char *ppi[2][NFLD]; /* pointers to fields in lines */ | |
13 | char *s1,*s2; | |
14 | int j1 = 1; /* join of this field of file 1 */ | |
15 | int j2 = 1; /* join of this field of file 2 */ | |
16 | int olist[2*NFLD]; /* output these fields */ | |
17 | int olistf[2*NFLD]; /* from these files */ | |
18 | int no; /* number of entries in olist */ | |
19 | int sep1 = ' '; /* default field separator */ | |
20 | int sep2 = '\t'; | |
21 | char* null = ""; | |
22 | int unpub1; | |
23 | int unpub2; | |
24 | int aflg; | |
25 | ||
26 | main(argc, argv) | |
27 | char *argv[]; | |
28 | { | |
29 | int i; | |
30 | int n1, n2; | |
31 | long top2, bot2; | |
32 | long ftell(); | |
33 | ||
34 | while (argc > 1 && argv[1][0] == '-') { | |
35 | if (argv[1][1] == '\0') | |
36 | break; | |
37 | switch (argv[1][1]) { | |
38 | case 'a': | |
39 | switch(argv[1][2]) { | |
40 | case '1': | |
41 | aflg |= 1; | |
42 | break; | |
43 | case '2': | |
44 | aflg |= 2; | |
45 | break; | |
46 | default: | |
47 | aflg |= 3; | |
48 | } | |
49 | break; | |
50 | case 'e': | |
51 | null = argv[2]; | |
52 | argv++; | |
53 | argc--; | |
54 | break; | |
55 | case 't': | |
56 | sep1 = sep2 = argv[1][2]; | |
57 | break; | |
58 | case 'o': | |
59 | for (no = 0; no < 2*NFLD; no++) { | |
60 | if (argv[2][0] == '1' && argv[2][1] == '.') { | |
61 | olistf[no] = F1; | |
62 | olist[no] = atoi(&argv[2][2]); | |
63 | } else if (argv[2][0] == '2' && argv[2][1] == '.') { | |
64 | olist[no] = atoi(&argv[2][2]); | |
65 | olistf[no] = F2; | |
66 | } else | |
67 | break; | |
68 | argc--; | |
69 | argv++; | |
70 | } | |
71 | break; | |
72 | case 'j': | |
73 | if (argv[1][2] == '1') | |
74 | j1 = atoi(argv[2]); | |
75 | else if (argv[1][2] == '2') | |
76 | j2 = atoi(argv[2]); | |
77 | else | |
78 | j1 = j2 = atoi(argv[2]); | |
79 | argc--; | |
80 | argv++; | |
81 | break; | |
82 | } | |
83 | argc--; | |
84 | argv++; | |
85 | } | |
86 | for (i = 0; i < no; i++) | |
87 | olist[i]--; /* 0 origin */ | |
88 | if (argc != 3) | |
89 | error("usage: join [-j1 x -j2 y] [-o list] file1 file2"); | |
90 | j1--; | |
91 | j2--; /* everyone else believes in 0 origin */ | |
92 | s1 = ppi[F1][j1]; | |
93 | s2 = ppi[F2][j2]; | |
94 | if (argv[1][0] == '-') | |
95 | f[F1] = stdin; | |
96 | else if ((f[F1] = fopen(argv[1], "r")) == NULL) | |
97 | error("can't open %s", argv[1]); | |
98 | if ((f[F2] = fopen(argv[2], "r")) == NULL) | |
99 | error("can't open %s", argv[2]); | |
100 | ||
101 | #define get1() n1=input(F1) | |
102 | #define get2() n2=input(F2) | |
103 | get1(); | |
104 | bot2 = ftell(f[F2]); | |
105 | get2(); | |
106 | while(n1>0 && n2>0 || aflg!=0 && n1+n2>0) { | |
107 | if(n1>0 && n2>0 && comp()>0 || n1==0) { | |
108 | if(aflg&2) output(0, n2); | |
109 | bot2 = ftell(f[F2]); | |
110 | get2(); | |
111 | } else if(n1>0 && n2>0 && comp()<0 || n2==0) { | |
112 | if(aflg&1) output(n1, 0); | |
113 | get1(); | |
114 | } else /*(n1>0 && n2>0 && comp()==0)*/ { | |
115 | while(n2>0 && comp()==0) { | |
116 | output(n1, n2); | |
117 | top2 = ftell(f[F2]); | |
118 | get2(); | |
119 | } | |
120 | fseek(f[F2], bot2, 0); | |
121 | get2(); | |
122 | get1(); | |
123 | for(;;) { | |
124 | if(n1>0 && n2>0 && comp()==0) { | |
125 | output(n1, n2); | |
126 | get2(); | |
127 | } else if(n1>0 && n2>0 && comp()<0 || n2==0) { | |
128 | fseek(f[F2], bot2, 0); | |
129 | get2(); | |
130 | get1(); | |
131 | } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ | |
132 | fseek(f[F2], top2, 0); | |
133 | bot2 = top2; | |
134 | get2(); | |
135 | break; | |
136 | } | |
137 | } | |
138 | } | |
139 | } | |
140 | return(0); | |
141 | } | |
142 | ||
143 | input(n) /* get input line and split into fields */ | |
144 | { | |
145 | register int i, c; | |
146 | char *bp; | |
147 | char **pp; | |
148 | ||
149 | bp = buf[n]; | |
150 | pp = ppi[n]; | |
151 | if (fgets(bp, BUFSIZ, f[n]) == NULL) | |
152 | return(0); | |
153 | for (i = 0; ; i++) { | |
154 | if (sep1 == ' ') /* strip multiples */ | |
155 | while ((c = *bp) == sep1 || c == sep2) | |
156 | bp++; /* skip blanks */ | |
157 | else | |
158 | c = *bp; | |
159 | if (c == '\n' || c == '\0') | |
160 | break; | |
161 | *pp++ = bp; /* record beginning */ | |
162 | while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0') | |
163 | bp++; | |
164 | *bp++ = '\0'; /* mark end by overwriting blank */ | |
165 | /* fails badly if string doesn't have \n at end */ | |
166 | } | |
167 | *pp = 0; | |
168 | return(i); | |
169 | } | |
170 | ||
171 | output(on1, on2) /* print items from olist */ | |
172 | int on1, on2; | |
173 | { | |
174 | int i; | |
175 | char *temp; | |
176 | ||
177 | if (no <= 0) { /* default case */ | |
178 | printf("%s", on1? ppi[F1][j1]: ppi[F2][j2]); | |
179 | for (i = 0; i < on1; i++) | |
180 | if (i != j1) | |
181 | printf("%c%s", sep1, ppi[F1][i]); | |
182 | for (i = 0; i < on2; i++) | |
183 | if (i != j2) | |
184 | printf("%c%s", sep1, ppi[F2][i]); | |
185 | printf("\n"); | |
186 | } else { | |
187 | for (i = 0; i < no; i++) { | |
188 | temp = ppi[olistf[i]][olist[i]]; | |
189 | if(olistf[i]==F1 && on1<=olist[i] || | |
190 | olistf[i]==F2 && on2<=olist[i] || | |
191 | *temp==0) | |
192 | temp = null; | |
193 | printf("%s", temp); | |
194 | if (i == no - 1) | |
195 | printf("\n"); | |
196 | else | |
197 | printf("%c", sep1); | |
198 | } | |
199 | } | |
200 | } | |
201 | ||
202 | error(s1, s2, s3, s4, s5) | |
203 | char *s1; | |
204 | { | |
205 | fprintf(stderr, "join: "); | |
206 | fprintf(stderr, s1, s2, s3, s4, s5); | |
207 | fprintf(stderr, "\n"); | |
208 | exit(1); | |
209 | } | |
210 | ||
211 | cmp(s1, s2) | |
212 | char *s1, *s2; | |
213 | { | |
214 | return(strcmp(s1, s2)); | |
215 | } |