| 1 | static char *sccsid = "@(#)join.c 4.1 (Berkeley) %G%"; |
| 2 | /* join F1 F2 on stuff */ |
| 3 | |
| 4 | #include <stdio.h> |
| 5 | #define F1 0 |
| 6 | #define F2 1 |
| 7 | #define NFLD 20 /* max field per line */ |
| 8 | #define comp() cmp(ppi[F1][j1],ppi[F2][j2]) |
| 9 | |
| 10 | FILE *f[2]; |
| 11 | char buf[2][BUFSIZ]; /*input lines */ |
| 12 | char *ppi[2][NFLD]; /* pointers to fields in lines */ |
| 13 | char *s1,*s2; |
| 14 | int j1 = 1; /* join of this field of file 1 */ |
| 15 | int j2 = 1; /* join of this field of file 2 */ |
| 16 | int olist[2*NFLD]; /* output these fields */ |
| 17 | int olistf[2*NFLD]; /* from these files */ |
| 18 | int no; /* number of entries in olist */ |
| 19 | int sep1 = ' '; /* default field separator */ |
| 20 | int sep2 = '\t'; |
| 21 | char* null = ""; |
| 22 | int unpub1; |
| 23 | int unpub2; |
| 24 | int aflg; |
| 25 | |
| 26 | main(argc, argv) |
| 27 | char *argv[]; |
| 28 | { |
| 29 | int i; |
| 30 | int n1, n2; |
| 31 | long top2, bot2; |
| 32 | long ftell(); |
| 33 | |
| 34 | while (argc > 1 && argv[1][0] == '-') { |
| 35 | if (argv[1][1] == '\0') |
| 36 | break; |
| 37 | switch (argv[1][1]) { |
| 38 | case 'a': |
| 39 | switch(argv[1][2]) { |
| 40 | case '1': |
| 41 | aflg |= 1; |
| 42 | break; |
| 43 | case '2': |
| 44 | aflg |= 2; |
| 45 | break; |
| 46 | default: |
| 47 | aflg |= 3; |
| 48 | } |
| 49 | break; |
| 50 | case 'e': |
| 51 | null = argv[2]; |
| 52 | argv++; |
| 53 | argc--; |
| 54 | break; |
| 55 | case 't': |
| 56 | sep1 = sep2 = argv[1][2]; |
| 57 | break; |
| 58 | case 'o': |
| 59 | for (no = 0; no < 2*NFLD; no++) { |
| 60 | if (argv[2][0] == '1' && argv[2][1] == '.') { |
| 61 | olistf[no] = F1; |
| 62 | olist[no] = atoi(&argv[2][2]); |
| 63 | } else if (argv[2][0] == '2' && argv[2][1] == '.') { |
| 64 | olist[no] = atoi(&argv[2][2]); |
| 65 | olistf[no] = F2; |
| 66 | } else |
| 67 | break; |
| 68 | argc--; |
| 69 | argv++; |
| 70 | } |
| 71 | break; |
| 72 | case 'j': |
| 73 | if (argv[1][2] == '1') |
| 74 | j1 = atoi(argv[2]); |
| 75 | else if (argv[1][2] == '2') |
| 76 | j2 = atoi(argv[2]); |
| 77 | else |
| 78 | j1 = j2 = atoi(argv[2]); |
| 79 | argc--; |
| 80 | argv++; |
| 81 | break; |
| 82 | } |
| 83 | argc--; |
| 84 | argv++; |
| 85 | } |
| 86 | for (i = 0; i < no; i++) |
| 87 | olist[i]--; /* 0 origin */ |
| 88 | if (argc != 3) |
| 89 | error("usage: join [-j1 x -j2 y] [-o list] file1 file2"); |
| 90 | j1--; |
| 91 | j2--; /* everyone else believes in 0 origin */ |
| 92 | s1 = ppi[F1][j1]; |
| 93 | s2 = ppi[F2][j2]; |
| 94 | if (argv[1][0] == '-') |
| 95 | f[F1] = stdin; |
| 96 | else if ((f[F1] = fopen(argv[1], "r")) == NULL) |
| 97 | error("can't open %s", argv[1]); |
| 98 | if ((f[F2] = fopen(argv[2], "r")) == NULL) |
| 99 | error("can't open %s", argv[2]); |
| 100 | |
| 101 | #define get1() n1=input(F1) |
| 102 | #define get2() n2=input(F2) |
| 103 | get1(); |
| 104 | bot2 = ftell(f[F2]); |
| 105 | get2(); |
| 106 | while(n1>0 && n2>0 || aflg!=0 && n1+n2>0) { |
| 107 | if(n1>0 && n2>0 && comp()>0 || n1==0) { |
| 108 | if(aflg&2) output(0, n2); |
| 109 | bot2 = ftell(f[F2]); |
| 110 | get2(); |
| 111 | } else if(n1>0 && n2>0 && comp()<0 || n2==0) { |
| 112 | if(aflg&1) output(n1, 0); |
| 113 | get1(); |
| 114 | } else /*(n1>0 && n2>0 && comp()==0)*/ { |
| 115 | while(n2>0 && comp()==0) { |
| 116 | output(n1, n2); |
| 117 | top2 = ftell(f[F2]); |
| 118 | get2(); |
| 119 | } |
| 120 | fseek(f[F2], bot2, 0); |
| 121 | get2(); |
| 122 | get1(); |
| 123 | for(;;) { |
| 124 | if(n1>0 && n2>0 && comp()==0) { |
| 125 | output(n1, n2); |
| 126 | get2(); |
| 127 | } else if(n1>0 && n2>0 && comp()<0 || n2==0) { |
| 128 | fseek(f[F2], bot2, 0); |
| 129 | get2(); |
| 130 | get1(); |
| 131 | } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ |
| 132 | fseek(f[F2], top2, 0); |
| 133 | bot2 = top2; |
| 134 | get2(); |
| 135 | break; |
| 136 | } |
| 137 | } |
| 138 | } |
| 139 | } |
| 140 | return(0); |
| 141 | } |
| 142 | |
| 143 | input(n) /* get input line and split into fields */ |
| 144 | { |
| 145 | register int i, c; |
| 146 | char *bp; |
| 147 | char **pp; |
| 148 | |
| 149 | bp = buf[n]; |
| 150 | pp = ppi[n]; |
| 151 | if (fgets(bp, BUFSIZ, f[n]) == NULL) |
| 152 | return(0); |
| 153 | for (i = 0; ; i++) { |
| 154 | if (sep1 == ' ') /* strip multiples */ |
| 155 | while ((c = *bp) == sep1 || c == sep2) |
| 156 | bp++; /* skip blanks */ |
| 157 | else |
| 158 | c = *bp; |
| 159 | if (c == '\n' || c == '\0') |
| 160 | break; |
| 161 | *pp++ = bp; /* record beginning */ |
| 162 | while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0') |
| 163 | bp++; |
| 164 | *bp++ = '\0'; /* mark end by overwriting blank */ |
| 165 | /* fails badly if string doesn't have \n at end */ |
| 166 | } |
| 167 | *pp = 0; |
| 168 | return(i); |
| 169 | } |
| 170 | |
| 171 | output(on1, on2) /* print items from olist */ |
| 172 | int on1, on2; |
| 173 | { |
| 174 | int i; |
| 175 | char *temp; |
| 176 | |
| 177 | if (no <= 0) { /* default case */ |
| 178 | printf("%s", on1? ppi[F1][j1]: ppi[F2][j2]); |
| 179 | for (i = 0; i < on1; i++) |
| 180 | if (i != j1) |
| 181 | printf("%c%s", sep1, ppi[F1][i]); |
| 182 | for (i = 0; i < on2; i++) |
| 183 | if (i != j2) |
| 184 | printf("%c%s", sep1, ppi[F2][i]); |
| 185 | printf("\n"); |
| 186 | } else { |
| 187 | for (i = 0; i < no; i++) { |
| 188 | temp = ppi[olistf[i]][olist[i]]; |
| 189 | if(olistf[i]==F1 && on1<=olist[i] || |
| 190 | olistf[i]==F2 && on2<=olist[i] || |
| 191 | *temp==0) |
| 192 | temp = null; |
| 193 | printf("%s", temp); |
| 194 | if (i == no - 1) |
| 195 | printf("\n"); |
| 196 | else |
| 197 | printf("%c", sep1); |
| 198 | } |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | error(s1, s2, s3, s4, s5) |
| 203 | char *s1; |
| 204 | { |
| 205 | fprintf(stderr, "join: "); |
| 206 | fprintf(stderr, s1, s2, s3, s4, s5); |
| 207 | fprintf(stderr, "\n"); |
| 208 | exit(1); |
| 209 | } |
| 210 | |
| 211 | cmp(s1, s2) |
| 212 | char *s1, *s2; |
| 213 | { |
| 214 | return(strcmp(s1, s2)); |
| 215 | } |