| 1 | /* join F1 F2 on stuff */ |
| 2 | |
| 3 | #include <stdio.h> |
| 4 | #define F1 0 |
| 5 | #define F2 1 |
| 6 | #define NFLD 20 /* max field per line */ |
| 7 | #define comp() cmp(ppi[F1][j1],ppi[F2][j2]) |
| 8 | |
| 9 | FILE *f[2]; |
| 10 | char buf[2][BUFSIZ]; /*input lines */ |
| 11 | char *ppi[2][NFLD]; /* pointers to fields in lines */ |
| 12 | char *s1,*s2; |
| 13 | int j1 = 1; /* join of this field of file 1 */ |
| 14 | int j2 = 1; /* join of this field of file 2 */ |
| 15 | int olist[2*NFLD]; /* output these fields */ |
| 16 | int olistf[2*NFLD]; /* from these files */ |
| 17 | int no; /* number of entries in olist */ |
| 18 | int sep1 = ' '; /* default field separator */ |
| 19 | int sep2 = '\t'; |
| 20 | char* null = ""; |
| 21 | int unpub1; |
| 22 | int unpub2; |
| 23 | int aflg; |
| 24 | |
| 25 | main(argc, argv) |
| 26 | char *argv[]; |
| 27 | { |
| 28 | int i; |
| 29 | int n1, n2; |
| 30 | long top2, bot2; |
| 31 | long ftell(); |
| 32 | |
| 33 | while (argc > 1 && argv[1][0] == '-') { |
| 34 | if (argv[1][1] == '\0') |
| 35 | break; |
| 36 | switch (argv[1][1]) { |
| 37 | case 'a': |
| 38 | switch(argv[1][2]) { |
| 39 | case '1': |
| 40 | aflg |= 1; |
| 41 | break; |
| 42 | case '2': |
| 43 | aflg |= 2; |
| 44 | break; |
| 45 | default: |
| 46 | aflg |= 3; |
| 47 | } |
| 48 | break; |
| 49 | case 'e': |
| 50 | null = argv[2]; |
| 51 | argv++; |
| 52 | argc--; |
| 53 | break; |
| 54 | case 't': |
| 55 | sep1 = sep2 = argv[1][2]; |
| 56 | break; |
| 57 | case 'o': |
| 58 | for (no = 0; no < 2*NFLD; no++) { |
| 59 | if (argv[2][0] == '1' && argv[2][1] == '.') { |
| 60 | olistf[no] = F1; |
| 61 | olist[no] = atoi(&argv[2][2]); |
| 62 | } else if (argv[2][0] == '2' && argv[2][1] == '.') { |
| 63 | olist[no] = atoi(&argv[2][2]); |
| 64 | olistf[no] = F2; |
| 65 | } else |
| 66 | break; |
| 67 | argc--; |
| 68 | argv++; |
| 69 | } |
| 70 | break; |
| 71 | case 'j': |
| 72 | if (argv[1][2] == '1') |
| 73 | j1 = atoi(argv[2]); |
| 74 | else if (argv[1][2] == '2') |
| 75 | j2 = atoi(argv[2]); |
| 76 | else |
| 77 | j1 = j2 = atoi(argv[2]); |
| 78 | argc--; |
| 79 | argv++; |
| 80 | break; |
| 81 | } |
| 82 | argc--; |
| 83 | argv++; |
| 84 | } |
| 85 | for (i = 0; i < no; i++) |
| 86 | olist[i]--; /* 0 origin */ |
| 87 | if (argc != 3) |
| 88 | error("usage: join [-j1 x -j2 y] [-o list] file1 file2"); |
| 89 | j1--; |
| 90 | j2--; /* everyone else believes in 0 origin */ |
| 91 | s1 = ppi[F1][j1]; |
| 92 | s2 = ppi[F2][j2]; |
| 93 | if (argv[1][0] == '-') |
| 94 | f[F1] = stdin; |
| 95 | else if ((f[F1] = fopen(argv[1], "r")) == NULL) |
| 96 | error("can't open %s", argv[1]); |
| 97 | if ((f[F2] = fopen(argv[2], "r")) == NULL) |
| 98 | error("can't open %s", argv[2]); |
| 99 | |
| 100 | #define get1() n1=input(F1) |
| 101 | #define get2() n2=input(F2) |
| 102 | get1(); |
| 103 | bot2 = ftell(f[F2]); |
| 104 | get2(); |
| 105 | while(n1>0 && n2>0 || aflg!=0 && n1+n2>0) { |
| 106 | if(n1>0 && n2>0 && comp()>0 || n1==0) { |
| 107 | if(aflg&2) output(0, n2); |
| 108 | bot2 = ftell(f[F2]); |
| 109 | get2(); |
| 110 | } else if(n1>0 && n2>0 && comp()<0 || n2==0) { |
| 111 | if(aflg&1) output(n1, 0); |
| 112 | get1(); |
| 113 | } else /*(n1>0 && n2>0 && comp()==0)*/ { |
| 114 | while(n2>0 && comp()==0) { |
| 115 | output(n1, n2); |
| 116 | top2 = ftell(f[F2]); |
| 117 | get2(); |
| 118 | } |
| 119 | fseek(f[F2], bot2, 0); |
| 120 | get2(); |
| 121 | get1(); |
| 122 | for(;;) { |
| 123 | if(n1>0 && n2>0 && comp()==0) { |
| 124 | output(n1, n2); |
| 125 | get2(); |
| 126 | } else if(n1>0 && n2>0 && comp()<0 || n2==0) { |
| 127 | fseek(f[F2], bot2, 0); |
| 128 | get2(); |
| 129 | get1(); |
| 130 | } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ |
| 131 | fseek(f[F2], top2, 0); |
| 132 | bot2 = top2; |
| 133 | get2(); |
| 134 | break; |
| 135 | } |
| 136 | } |
| 137 | } |
| 138 | } |
| 139 | return(0); |
| 140 | } |
| 141 | |
| 142 | input(n) /* get input line and split into fields */ |
| 143 | { |
| 144 | register int i, c; |
| 145 | char *bp; |
| 146 | char **pp; |
| 147 | |
| 148 | bp = buf[n]; |
| 149 | pp = ppi[n]; |
| 150 | if (fgets(bp, BUFSIZ, f[n]) == NULL) |
| 151 | return(0); |
| 152 | for (i = 0; ; i++) { |
| 153 | if (sep1 == ' ') /* strip multiples */ |
| 154 | while ((c = *bp) == sep1 || c == sep2) |
| 155 | bp++; /* skip blanks */ |
| 156 | else |
| 157 | c = *bp; |
| 158 | if (c == '\n' || c == '\0') |
| 159 | break; |
| 160 | *pp++ = bp; /* record beginning */ |
| 161 | while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0') |
| 162 | bp++; |
| 163 | *bp++ = '\0'; /* mark end by overwriting blank */ |
| 164 | /* fails badly if string doesn't have \n at end */ |
| 165 | } |
| 166 | *pp = 0; |
| 167 | return(i); |
| 168 | } |
| 169 | |
| 170 | output(on1, on2) /* print items from olist */ |
| 171 | int on1, on2; |
| 172 | { |
| 173 | int i; |
| 174 | char *temp; |
| 175 | |
| 176 | if (no <= 0) { /* default case */ |
| 177 | printf("%s", on1? ppi[F1][j1]: ppi[F2][j2]); |
| 178 | for (i = 0; i < on1; i++) |
| 179 | if (i != j1) |
| 180 | printf("%c%s", sep1, ppi[F1][i]); |
| 181 | for (i = 0; i < on2; i++) |
| 182 | if (i != j2) |
| 183 | printf("%c%s", sep1, ppi[F2][i]); |
| 184 | printf("\n"); |
| 185 | } else { |
| 186 | for (i = 0; i < no; i++) { |
| 187 | temp = ppi[olistf[i]][olist[i]]; |
| 188 | if(olistf[i]==F1 && on1<=olist[i] || |
| 189 | olistf[i]==F2 && on2<=olist[i] || |
| 190 | *temp==0) |
| 191 | temp = null; |
| 192 | printf("%s", temp); |
| 193 | if (i == no - 1) |
| 194 | printf("\n"); |
| 195 | else |
| 196 | printf("%c", sep1); |
| 197 | } |
| 198 | } |
| 199 | } |
| 200 | |
| 201 | error(s1, s2, s3, s4, s5) |
| 202 | char *s1; |
| 203 | { |
| 204 | fprintf(stderr, "join: "); |
| 205 | fprintf(stderr, s1, s2, s3, s4, s5); |
| 206 | fprintf(stderr, "\n"); |
| 207 | exit(1); |
| 208 | } |
| 209 | |
| 210 | cmp(s1, s2) |
| 211 | char *s1, *s2; |
| 212 | { |
| 213 | return(strcmp(s1, s2)); |
| 214 | } |