+#define L 512
+#define N 7
+#define C 20
+#define MEM (16*2048)
+#define NF 10
+
+int ibuf[259];
+int obuf[259];
+char *file;
+char *filep;
+int nfiles;
+int nlines;
+int ntext;
+int *lspace;
+char *tspace;
+int cmp();
+int term();
+int mflg;
+char *outfil;
+char tabchar;
+int eargc;
+char **eargv;
+
+char fold[128] {
+ 0000,0001,0002,0003,0004,0005,0006,0007,
+ 0010,0011,0012,0013,0014,0015,0016,0017,
+ 0020,0021,0022,0023,0024,0025,0026,0027,
+ 0030,0031,0032,0033,0034,0035,0036,0037,
+ 0040,0041,0042,0043,0044,0045,0046,0047,
+ 0050,0051,0052,0053,0054,0055,0056,0057,
+ 0060,0061,0062,0063,0064,0065,0066,0067,
+ 0070,0071,0072,0073,0074,0075,0076,0077,
+ 0100,0101,0102,0103,0104,0105,0106,0107,
+ 0110,0111,0112,0113,0114,0115,0116,0117,
+ 0120,0121,0122,0123,0124,0125,0126,0127,
+ 0130,0131,0132,0133,0134,0134,0136,0137,
+ 0140,0101,0102,0103,0104,0105,0106,0107,
+ 0110,0111,0112,0113,0114,0115,0116,0117,
+ 0120,0121,0122,0123,0124,0125,0126,0127,
+ 0130,0131,0132,0173,0174,0175,0176,0177
+};
+char nofold[128];
+char dict[128] {
+ 1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,
+ 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,
+ 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1
+};
+char nodict[128] { 1 };
+
+struct field {
+ char *code;
+ char *ignore;
+ int nflg;
+ int rflg;
+ int bflg;
+ char *m[2];
+ char *n[2];
+} fields[NF];
+int proto[9] {
+ &fold,
+ &nodict,
+ 0,
+ 1,
+ 0,
+ 0,-1,
+ 0,0
+};
+int nfields;
+int error 1;
+
+main(argc, argv)
+char **argv;
+{
+ register a, i;
+ char *arg;
+ register int *p;
+ int *q;
+
+ for(a=0; a<128; a++)
+ nofold[a] = a;
+ nodict[127] = 1;
+ copyproto();
+ eargv = argv;
+ while (--argc > 0) {
+ if(**++argv == '-') for(arg = *argv;;) {
+ switch(*++arg) {
+ case '\0':
+ if(arg[-1] == '-')
+ eargv[eargc++] = "-";
+ break;
+
+ case 'm':
+ mflg++;
+ continue;
+
+ case 'o':
+ if(--argc > 0)
+ outfil = *++argv;
+ continue;
+
+ default:
+ field(++*argv,1);
+ break;
+ }
+ break;
+ } else if (**argv == '+') {
+ if(++nfields>=NF) {
+ mess("Too many keys\n");
+ exit(1);
+ }
+ copyproto();
+ field(++*argv,0);
+ } else
+ eargv[eargc++] = *argv;
+ }
+ q = &fields[0];
+ for(a=1; a<=nfields; a++) {
+ p = &fields[a];
+ for(i=0; i<5; i++) /*sensitive to sizeof(proto)*/
+ if(p[i] != proto[i])
+ goto next;
+ for(i=0; i<5; i++)
+ p[i] = q[i];
+next: ;
+ }
+ if(eargc == 0)
+ eargv[eargc++] = "-";
+
+ a = MEM;
+ i = lspace = sbrk(0);
+ while(brk(a) == -1)
+ a =- 512;
+ brk(a =- 512); /* for recursion */
+ a =- i;
+ nlines = ((a-L)>>1) & 077777;
+ nlines =/ 5;
+ ntext = nlines*8;
+ tspace = lspace+nlines;
+ file = "/usr/tmp/stmXaa";
+loop:
+ filep = file;
+ while(*filep != 'X')
+ filep++;
+ for(*filep = 'a';;(*filep)++) {
+ if(stat(file, lspace) < 0) {
+ a = creat(file, 0600);
+ if(a >= 0)
+ break;
+ }
+ if(*filep == 'z') {
+ if(file[1] != 't') {
+ file = "/tmp/stmXaa";
+ goto loop;
+ }
+ mess("Cannot locate temp\n");
+ exit(1);
+ }
+ }
+ close(a);
+ filep++;
+ if ((signal(2, 1) & 01) == 0)
+ signal(2, term);
+ nfiles = eargc;
+ if(!mflg) {
+ ibuf[0] = -1;
+ sort();
+ close(0);
+ }
+ for(a = mflg?0:eargc; a+N < nfiles; a=+N) {
+ newfile();
+ merge(a, a+N);
+ }
+ if(a != nfiles) {
+ oldfile();
+ merge(a, nfiles);
+ }
+ error = 0;
+ term();
+}
+
+sort()
+{
+ register char *cp;
+ register *lp, c;
+ int done;
+ int i;
+ int f;
+
+ done = 0;
+ i = 0;
+ do {
+ cp = tspace;
+ lp = lspace;
+ while(lp < lspace+nlines && cp < tspace+ntext) {
+ *lp++ = cp;
+ while((*cp++ = c = getc(ibuf)) != '\n') {
+ if(c >= 0) continue;
+ cp--;
+ close(ibuf[0]);
+ if(i < eargc) {
+ if((f = setfil(i++)) == 0)
+ ibuf[0] = 0;
+ else if(fopen(f, ibuf) < 0)
+ cant(f);
+ } else
+ break;
+ }
+ if(c < 0) {
+ done++;
+ lp--;
+ break;
+ }
+ }
+ qsort(lspace, lp-lspace, 2, cmp);
+ if(done == 0 || nfiles != eargc)
+ newfile(); else
+ oldfile();
+ while(lp > lspace) {
+ cp = *--lp;
+ if(*cp)
+ do
+ putc(*cp, obuf);
+ while(*cp++ != '\n');
+ }
+ fflush(obuf);
+ close(obuf[0]);
+ } while(done == 0);
+}
+
+struct merg
+{
+ char l[L];
+ int b[259];
+};
+
+merge(a, b)
+{
+ register struct merg *p;
+ register char *cp;
+ register i;
+ struct { int *ip;};
+ int f;
+ int j;
+ int k,l,c;
+
+ p = lspace;
+ j = 0;
+ for(i=a; i<b; i++) {
+ f = setfil(i);
+ if(f == 0)
+ p->b[0] = dup(0);
+ else if(fopen(f, p->b) < 0)
+ cant(f);
+ ibuf[j] = p;
+ if(!rline(p)) j++;
+ p++;
+ }
+
+ do {
+ i = j;
+ qsort(ibuf, i, 2, cmp);
+ l = 0;
+ while(i--) {
+ cp = ibuf[i];
+ if(*cp == '\0') {
+ l = 1;
+ if(rline(ibuf[i])) {
+ k = i;
+ while(++k < j)
+ ibuf[k-1] = ibuf[k];
+ j--;
+ }
+ }
+ }
+ } while(l);
+
+ i = j;
+ if(i > 0) for(;;) {
+ cp = ibuf[i-1];
+ if(i == 1 || cmp(&ibuf[i-1], &ibuf[i-2]))
+ do
+ putc(*cp, obuf);
+ while(*cp++ != '\n');
+ if(rline(ibuf[i-1])) {
+ i--;
+ if(i == 0)
+ break;
+ }
+ cp = &ibuf[i];
+ while (--cp.ip > ibuf && cmp(cp.ip, cp.ip-1) < 0) {
+ p = *cp.ip;
+ *cp.ip = *(cp.ip-1);
+ *(cp.ip-1) = p;
+ }
+ }
+ p = lspace;
+ for(i=a; i<b; i++) {
+ close(p->b[0]);
+ p++;
+ if(i >= eargc)
+ close(creat(setfil(i)));
+ }
+ fflush(obuf);
+ close(obuf[0]);
+}
+
+rline(mp)
+struct merg *mp;
+{
+ register char *cp;
+ register *bp, c;
+
+ bp = mp->b;
+ cp = mp->l;
+ do {
+ c = getc(bp);
+ if(c < 0)
+ return(1);
+ *cp++ = c;
+ } while(c != '\n');
+ *cp = '\0';
+ return(0);
+}
+
+newfile()
+{
+
+ if(fcreat(setfil(nfiles), obuf) < 0) {
+ mess("Can't create temp\n");
+ term();
+ }
+ nfiles++;
+}
+
+char *
+setfil(i)
+{
+
+ if(i < eargc)
+ if(eargv[i][0] == '-' && eargv[i][1] == '\0')
+ return(0);
+ else
+ return(eargv[i]);
+ i =- eargc;
+ filep[0] = i/26 + 'a';
+ filep[1] = i%26 + 'a';
+ return(file);
+}
+
+oldfile()
+{
+
+ if(outfil) {
+ if(fcreat(outfil, obuf) < 0) {
+ mess("Can't create output\n");
+ term();
+ }
+ } else
+ obuf[0] = 1;
+}
+
+cant(f)
+{
+ mess("Can't open ");
+ mess(f);
+ mess("\n");
+ term();
+}
+
+term()
+{
+ register i;
+
+ if(nfiles == eargc)
+ nfiles++;
+ for(i=eargc; i<nfiles; i++) {
+ unlink(setfil(i));
+ }
+ exit(error);
+}
+
+cmp(a, b)
+int *a,*b;
+{
+ register char *ra, *rb;
+
+ ra = *a - 1;
+ rb = *b - 1;
+
+ while(*++ra == *++rb)
+ if(*ra == '\n')
+ return(0);
+
+ return(*rb - *ra);
+}
+
+
+skip(pp, fp, j)
+struct field *fp;
+char *pp;
+{
+ register i;
+ register char *p;
+
+ p = pp;
+ if( (i=fp->m[j]) < 0)
+ return(-1);
+ while(i-- > 0) {
+ if(tabchar != 0) {
+ while(*p != tabchar)
+ if(*p != '\n')
+ p++;
+ else goto ret;
+ p++;
+ } else {
+ while(blank(*p))
+ p++;
+ while(!blank(*p))
+ if(*p != '\n')
+ p++;
+ else goto ret;
+ }
+ }
+ if(fp->bflg)
+ while(blank(*p))
+ p++;
+ i = fp->n[j];
+ while(i-- > 0) {
+ if(*p != '\n')
+ p++;
+ else goto ret;
+ }
+ret:
+ return(p);
+}
+
+digit(c)
+{
+
+ return(c <= '9' && c >= '0');
+}
+
+mess(s)
+char *s;
+{
+ while(*s)
+ write(2, s++, 1);
+}
+
+copyproto()
+{
+ register int i, *p, *q;
+
+ p = proto;
+ q = &fields[nfields];
+ for(i=0; i<sizeof(proto)/2; i++)
+ *q++ = *p++;
+}
+
+field(s,k)
+char *s;
+{
+ register struct field *p;
+ p = &fields[nfields];
+ for(; *s!=0; s++) {
+ switch(*s) {
+ case '\0':
+ return;
+
+ case 'a':
+ p->code = nofold;
+ break;
+
+ case 'b':
+ p->bflg++;
+ break;
+
+ case 'd':
+ p->ignore = dict;
+ break;
+
+ case 'n':
+ p->nflg++;
+ break;
+ case 't':
+ tabchar = *++s;
+ if(tabchar == 0) s--;
+ break;
+
+ case 'r':
+ p->rflg = -1;
+ break;
+
+ default:
+ p->m[k] = number(&s);
+ if(*s == '.')
+ s++;
+ p->n[k] = number(&s);
+ s--;
+ }
+ }
+}
+
+number(ppa)
+char **ppa;
+{
+ int n;
+ register char *pa;
+ pa = *ppa;
+ n = 0;
+ while(digit(*pa))
+ n = n*10 + *pa++ - '0';
+ *ppa = pa;
+ return(n);
+}
+
+blank(c)
+{
+ if(c==' ' || c=='\t')
+ return(1);
+ return(0);
+}
+
+int (*qscmp)();
+int qses;
+
+qsort(a, n, es, fc)
+char *a;
+int n, es;
+int (*fc)();
+{
+ qscmp = fc;
+ qses = es;
+ qs1(a, a+n*es);
+}
+
+qs1(a, l)
+char *a, *l;
+{
+ register char *i, *j, *es;
+ char **k;
+ char *lp, *hp;
+ int n, c;
+
+
+ es = qses;
+
+start:
+ if((n=l-a) <= es)
+ return;
+
+
+ n = ((n/(2*es))*es) & 077777;
+ hp = lp = a+n;
+ i = a;
+ j = l-es;
+
+
+ for(;;) {
+ if(i < lp) {
+ if((c = (*qscmp)(i, lp)) == 0) {
+ qsexc(i, lp =- es);
+ continue;
+ }
+ if(c < 0) {
+ i =+ es;
+ continue;
+ }
+ }
+
+loop:
+ if(j > hp) {
+ if((c = (*qscmp)(hp, j)) == 0) {
+ qsexc(hp =+ es, j);
+ goto loop;
+ }
+ if(c > 0) {
+ if(i == lp) {
+ qstexc(i, hp =+ es, j);
+ i = lp =+ es;
+ goto loop;
+ }
+ qsexc(i, j);
+ j =- es;
+ i =+ es;
+ continue;
+ }
+ j =- es;
+ goto loop;
+ }
+
+
+ if(i == lp) {
+ for(k=lp+2; k<=hp;) *(*k++)='\0';
+ if(lp-a >= l-hp) {
+ qs1(hp+es, l);
+ l = lp;
+ } else {
+ qs1(a, lp);
+ a = hp+es;
+ }
+ goto start;
+ }
+
+
+ qstexc(j, lp =- es, i);
+ j = hp =- es;
+ }
+}
+
+qsexc(i, j)
+char *i, *j;
+{
+ register char *ri, *rj, c;
+ int n;
+
+ n = qses;
+ ri = i;
+ rj = j;
+ do {
+ c = *ri;
+ *ri++ = *rj;
+ *rj++ = c;
+ } while(--n);
+}
+
+qstexc(i, j, k)
+char *i, *j, *k;
+{
+ register char *ri, *rj, *rk;
+ char c;
+ int n;
+
+ n = qses;
+ ri = i;
+ rj = j;
+ rk = k;
+ do {
+ c = *ri;
+ *ri++ = *rk;
+ *rk++ = *rj;
+ *rj++ = c;
+ } while(--n);
+}