Commit | Line | Data |
---|---|---|
6aa8a75b TL |
1 | /* join F1 F2 on stuff */ |
2 | ||
3 | #include <stdio.h> | |
4 | #define F1 0 | |
5 | #define F2 1 | |
6 | #define NFLD 20 /* max field per line */ | |
7 | #define comp() cmp(ppi[F1][j1],ppi[F2][j2]) | |
8 | ||
9 | FILE *f[2]; | |
10 | char buf[2][BUFSIZ]; /*input lines */ | |
11 | char *ppi[2][NFLD]; /* pointers to fields in lines */ | |
12 | char *s1,*s2; | |
13 | int j1 = 1; /* join of this field of file 1 */ | |
14 | int j2 = 1; /* join of this field of file 2 */ | |
15 | int olist[2*NFLD]; /* output these fields */ | |
16 | int olistf[2*NFLD]; /* from these files */ | |
17 | int no; /* number of entries in olist */ | |
18 | int sep1 = ' '; /* default field separator */ | |
19 | int sep2 = '\t'; | |
20 | char* null = ""; | |
21 | int unpub1; | |
22 | int unpub2; | |
23 | int aflg; | |
24 | ||
25 | main(argc, argv) | |
26 | char *argv[]; | |
27 | { | |
28 | int i; | |
29 | int n1, n2; | |
30 | long top2, bot2; | |
31 | long ftell(); | |
32 | ||
33 | while (argc > 1 && argv[1][0] == '-') { | |
34 | if (argv[1][1] == '\0') | |
35 | break; | |
36 | switch (argv[1][1]) { | |
37 | case 'a': | |
38 | switch(argv[1][2]) { | |
39 | case '1': | |
40 | aflg |= 1; | |
41 | break; | |
42 | case '2': | |
43 | aflg |= 2; | |
44 | break; | |
45 | default: | |
46 | aflg |= 3; | |
47 | } | |
48 | break; | |
49 | case 'e': | |
50 | null = argv[2]; | |
51 | argv++; | |
52 | argc--; | |
53 | break; | |
54 | case 't': | |
55 | sep1 = sep2 = argv[1][2]; | |
56 | break; | |
57 | case 'o': | |
58 | for (no = 0; no < 2*NFLD; no++) { | |
59 | if (argv[2][0] == '1' && argv[2][1] == '.') { | |
60 | olistf[no] = F1; | |
61 | olist[no] = atoi(&argv[2][2]); | |
62 | } else if (argv[2][0] == '2' && argv[2][1] == '.') { | |
63 | olist[no] = atoi(&argv[2][2]); | |
64 | olistf[no] = F2; | |
65 | } else | |
66 | break; | |
67 | argc--; | |
68 | argv++; | |
69 | } | |
70 | break; | |
71 | case 'j': | |
72 | if (argv[1][2] == '1') | |
73 | j1 = atoi(argv[2]); | |
74 | else if (argv[1][2] == '2') | |
75 | j2 = atoi(argv[2]); | |
76 | else | |
77 | j1 = j2 = atoi(argv[2]); | |
78 | argc--; | |
79 | argv++; | |
80 | break; | |
81 | } | |
82 | argc--; | |
83 | argv++; | |
84 | } | |
85 | for (i = 0; i < no; i++) | |
86 | olist[i]--; /* 0 origin */ | |
87 | if (argc != 3) | |
88 | error("usage: join [-j1 x -j2 y] [-o list] file1 file2"); | |
89 | j1--; | |
90 | j2--; /* everyone else believes in 0 origin */ | |
91 | s1 = ppi[F1][j1]; | |
92 | s2 = ppi[F2][j2]; | |
93 | if (argv[1][0] == '-') | |
94 | f[F1] = stdin; | |
95 | else if ((f[F1] = fopen(argv[1], "r")) == NULL) | |
96 | error("can't open %s", argv[1]); | |
97 | if ((f[F2] = fopen(argv[2], "r")) == NULL) | |
98 | error("can't open %s", argv[2]); | |
99 | ||
100 | #define get1() n1=input(F1) | |
101 | #define get2() n2=input(F2) | |
102 | get1(); | |
103 | bot2 = ftell(f[F2]); | |
104 | get2(); | |
105 | while(n1>0 && n2>0 || aflg!=0 && n1+n2>0) { | |
106 | if(n1>0 && n2>0 && comp()>0 || n1==0) { | |
107 | if(aflg&2) output(0, n2); | |
108 | bot2 = ftell(f[F2]); | |
109 | get2(); | |
110 | } else if(n1>0 && n2>0 && comp()<0 || n2==0) { | |
111 | if(aflg&1) output(n1, 0); | |
112 | get1(); | |
113 | } else /*(n1>0 && n2>0 && comp()==0)*/ { | |
114 | while(n2>0 && comp()==0) { | |
115 | output(n1, n2); | |
116 | top2 = ftell(f[F2]); | |
117 | get2(); | |
118 | } | |
119 | fseek(f[F2], bot2, 0); | |
120 | get2(); | |
121 | get1(); | |
122 | for(;;) { | |
123 | if(n1>0 && n2>0 && comp()==0) { | |
124 | output(n1, n2); | |
125 | get2(); | |
126 | } else if(n1>0 && n2>0 && comp()<0 || n2==0) { | |
127 | fseek(f[F2], bot2, 0); | |
128 | get2(); | |
129 | get1(); | |
130 | } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ | |
131 | fseek(f[F2], top2, 0); | |
132 | bot2 = top2; | |
133 | get2(); | |
134 | break; | |
135 | } | |
136 | } | |
137 | } | |
138 | } | |
139 | return(0); | |
140 | } | |
141 | ||
142 | input(n) /* get input line and split into fields */ | |
143 | { | |
144 | register int i, c; | |
145 | char *bp; | |
146 | char **pp; | |
147 | ||
148 | bp = buf[n]; | |
149 | pp = ppi[n]; | |
150 | if (fgets(bp, BUFSIZ, f[n]) == NULL) | |
151 | return(0); | |
152 | for (i = 0; ; i++) { | |
153 | if (sep1 == ' ') /* strip multiples */ | |
154 | while ((c = *bp) == sep1 || c == sep2) | |
155 | bp++; /* skip blanks */ | |
156 | else | |
157 | c = *bp; | |
158 | if (c == '\n' || c == '\0') | |
159 | break; | |
160 | *pp++ = bp; /* record beginning */ | |
161 | while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0') | |
162 | bp++; | |
163 | *bp++ = '\0'; /* mark end by overwriting blank */ | |
164 | /* fails badly if string doesn't have \n at end */ | |
165 | } | |
166 | *pp = 0; | |
167 | return(i); | |
168 | } | |
169 | ||
170 | output(on1, on2) /* print items from olist */ | |
171 | int on1, on2; | |
172 | { | |
173 | int i; | |
174 | char *temp; | |
175 | ||
176 | if (no <= 0) { /* default case */ | |
177 | printf("%s", on1? ppi[F1][j1]: ppi[F2][j2]); | |
178 | for (i = 0; i < on1; i++) | |
179 | if (i != j1) | |
180 | printf("%c%s", sep1, ppi[F1][i]); | |
181 | for (i = 0; i < on2; i++) | |
182 | if (i != j2) | |
183 | printf("%c%s", sep1, ppi[F2][i]); | |
184 | printf("\n"); | |
185 | } else { | |
186 | for (i = 0; i < no; i++) { | |
187 | temp = ppi[olistf[i]][olist[i]]; | |
188 | if(olistf[i]==F1 && on1<=olist[i] || | |
189 | olistf[i]==F2 && on2<=olist[i] || | |
190 | *temp==0) | |
191 | temp = null; | |
192 | printf("%s", temp); | |
193 | if (i == no - 1) | |
194 | printf("\n"); | |
195 | else | |
196 | printf("%c", sep1); | |
197 | } | |
198 | } | |
199 | } | |
200 | ||
201 | error(s1, s2, s3, s4, s5) | |
202 | char *s1; | |
203 | { | |
204 | fprintf(stderr, "join: "); | |
205 | fprintf(stderr, s1, s2, s3, s4, s5); | |
206 | fprintf(stderr, "\n"); | |
207 | exit(1); | |
208 | } | |
209 | ||
210 | cmp(s1, s2) | |
211 | char *s1, *s2; | |
212 | { | |
213 | return(strcmp(s1, s2)); | |
214 | } |