From eacb3a7e321eed3e4a7ed97c4241e672fd613619 Mon Sep 17 00:00:00 2001 From: Ken Thompson Date: Wed, 10 Jan 1979 15:02:17 -0500 Subject: [PATCH] Research V7 development Work on file usr/src/cmd/sort.c Co-Authored-By: Doug McIlroy Synthesized-from: v7 --- usr/src/cmd/sort.c | 901 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 901 insertions(+) create mode 100644 usr/src/cmd/sort.c diff --git a/usr/src/cmd/sort.c b/usr/src/cmd/sort.c new file mode 100644 index 0000000000..8da6251eeb --- /dev/null +++ b/usr/src/cmd/sort.c @@ -0,0 +1,901 @@ +#include +#include +#include +#include +#include + +#define L 512 +#define N 7 +#define C 20 +#define MEM (16*2048) +#define NF 10 + +FILE *is, *os; +char *dirtry[] = {"/usr/tmp", "/tmp", NULL}; +char **dirs; +char file1[30]; +char *file = file1; +char *filep; +int nfiles; +unsigned nlines; +unsigned ntext; +int *lspace; +char *tspace; +int cmp(), cmpa(); +int (*compare)() = cmpa; +char *eol(); +int term(); +int mflg; +int cflg; +int uflg; +char *outfil; +int unsafeout; /*kludge to assure -m -o works*/ +char tabchar; +int eargc; +char **eargv; + +char zero[256]; + +char fold[256] = { + 0200,0201,0202,0203,0204,0205,0206,0207, + 0210,0211,0212,0213,0214,0215,0216,0217, + 0220,0221,0222,0223,0224,0225,0226,0227, + 0230,0231,0232,0233,0234,0235,0236,0237, + 0240,0241,0242,0243,0244,0245,0246,0247, + 0250,0251,0252,0253,0254,0255,0256,0257, + 0260,0261,0262,0263,0264,0265,0266,0267, + 0270,0271,0272,0273,0274,0275,0276,0277, + 0300,0301,0302,0303,0304,0305,0306,0307, + 0310,0311,0312,0313,0314,0315,0316,0317, + 0320,0321,0322,0323,0324,0325,0326,0327, + 0330,0331,0332,0333,0334,0335,0336,0337, + 0340,0341,0342,0343,0344,0345,0346,0347, + 0350,0351,0352,0353,0354,0355,0356,0357, + 0360,0361,0362,0363,0364,0365,0366,0367, + 0370,0371,0372,0373,0374,0375,0376,0377, + 0000,0001,0002,0003,0004,0005,0006,0007, + 0010,0011,0012,0013,0014,0015,0016,0017, + 0020,0021,0022,0023,0024,0025,0026,0027, + 0030,0031,0032,0033,0034,0035,0036,0037, + 0040,0041,0042,0043,0044,0045,0046,0047, + 0050,0051,0052,0053,0054,0055,0056,0057, + 0060,0061,0062,0063,0064,0065,0066,0067, + 0070,0071,0072,0073,0074,0075,0076,0077, + 0100,0101,0102,0103,0104,0105,0106,0107, + 0110,0111,0112,0113,0114,0115,0116,0117, + 0120,0121,0122,0123,0124,0125,0126,0127, + 0130,0131,0132,0133,0134,0134,0136,0137, + 0140,0101,0102,0103,0104,0105,0106,0107, + 0110,0111,0112,0113,0114,0115,0116,0117, + 0120,0121,0122,0123,0124,0125,0126,0127, + 0130,0131,0132,0173,0174,0175,0176,0177 +}; +char nofold[256] = { + 0200,0201,0202,0203,0204,0205,0206,0207, + 0210,0211,0212,0213,0214,0215,0216,0217, + 0220,0221,0222,0223,0224,0225,0226,0227, + 0230,0231,0232,0233,0234,0235,0236,0237, + 0240,0241,0242,0243,0244,0245,0246,0247, + 0250,0251,0252,0253,0254,0255,0256,0257, + 0260,0261,0262,0263,0264,0265,0266,0267, + 0270,0271,0272,0273,0274,0275,0276,0277, + 0300,0301,0302,0303,0304,0305,0306,0307, + 0310,0311,0312,0313,0314,0315,0316,0317, + 0320,0321,0322,0323,0324,0325,0326,0327, + 0330,0331,0332,0333,0334,0335,0336,0337, + 0340,0341,0342,0343,0344,0345,0346,0347, + 0350,0351,0352,0353,0354,0355,0356,0357, + 0360,0361,0362,0363,0364,0365,0366,0367, + 0370,0371,0372,0373,0374,0375,0376,0377, + 0000,0001,0002,0003,0004,0005,0006,0007, + 0010,0011,0012,0013,0014,0015,0016,0017, + 0020,0021,0022,0023,0024,0025,0026,0027, + 0030,0031,0032,0033,0034,0035,0036,0037, + 0040,0041,0042,0043,0044,0045,0046,0047, + 0050,0051,0052,0053,0054,0055,0056,0057, + 0060,0061,0062,0063,0064,0065,0066,0067, + 0070,0071,0072,0073,0074,0075,0076,0077, + 0100,0101,0102,0103,0104,0105,0106,0107, + 0110,0111,0112,0113,0114,0115,0116,0117, + 0120,0121,0122,0123,0124,0125,0126,0127, + 0130,0131,0132,0133,0134,0135,0136,0137, + 0140,0141,0142,0143,0144,0145,0146,0147, + 0150,0151,0152,0153,0154,0155,0156,0157, + 0160,0161,0162,0163,0164,0165,0166,0167, + 0170,0171,0172,0173,0174,0175,0176,0177 +}; + +char nonprint[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +}; + +char dict[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1 +}; + +struct field { + char *code; + char *ignore; + int nflg; + int rflg; + int bflg[2]; + int m[2]; + int n[2]; +} fields[NF]; +struct field proto = { + nofold+128, + zero+128, + 0, + 1, + 0,0, + 0,-1, + 0,0 +}; +int nfields; +int error = 1; +char *setfil(); +char *sbrk(); +char *brk(); + +main(argc, argv) +char **argv; +{ + register a; + extern char end[1]; + char *ep; + char *arg; + struct field *p, *q; + int i; + + copyproto(); + eargv = argv; + while (--argc > 0) { + if(**++argv == '-') for(arg = *argv;;) { + switch(*++arg) { + case '\0': + if(arg[-1] == '-') + eargv[eargc++] = "-"; + break; + + case 'o': + if(--argc > 0) + outfil = *++argv; + continue; + + case 'T': + if (--argc > 0) + dirtry[0] = *++argv; + continue; + + default: + field(++*argv,nfields>0); + break; + } + break; + } else if (**argv == '+') { + if(++nfields>=NF) { + diag("too many keys",""); + exit(1); + } + copyproto(); + field(++*argv,0); + } else + eargv[eargc++] = *argv; + } + q = &fields[0]; + for(a=1; a<=nfields; a++) { + p = &fields[a]; + if(p->code != proto.code) continue; + if(p->ignore != proto.ignore) continue; + if(p->nflg != proto.nflg) continue; + if(p->rflg != proto.rflg) continue; + if(p->bflg[0] != proto.bflg[0]) continue; + if(p->bflg[1] != proto.bflg[1]) continue; + p->code = q->code; + p->ignore = q->ignore; + p->nflg = q->nflg; + p->rflg = q->rflg; + p->bflg[0] = p->bflg[1] = q->bflg[0]; + } + if(eargc == 0) + eargv[eargc++] = "-"; + if(cflg && eargc>1) { + diag("can check only 1 file",""); + exit(1); + } + safeoutfil(); + + ep = end + MEM; + lspace = (int *)sbrk(0); + while((int)brk(ep) == -1) + ep -= 512; + brk(ep -= 512); /* for recursion */ + a = ep - (char*)lspace; + nlines = (a-L); + nlines /= (5*(sizeof(char *)/sizeof(char))); + ntext = nlines*8; + tspace = (char *)(lspace + nlines); + a = -1; + for(dirs=dirtry; *dirs; dirs++) { + sprintf(filep=file1, "%s/stm%05uaa", *dirs, getpid()); + while (*filep) + filep++; + filep -= 2; + if ( (a=creat(file, 0600)) >=0) + break; + } + if(a < 0) { + diag("can't locate temp",""); + exit(1); + } + close(a); + signal(SIGHUP, term); + if (signal(SIGINT, SIG_IGN) != SIG_IGN) + signal(SIGINT, term); + signal(SIGPIPE,term); + signal(SIGTERM,term); + nfiles = eargc; + if(!mflg && !cflg) { + sort(); + fclose(stdin); + } + for(a = mflg|cflg?0:eargc; a+N=nfiles) + i = nfiles; + newfile(); + merge(a, i); + } + if(a != nfiles) { + oldfile(); + merge(a, nfiles); + } + error = 0; + term(); +} + +sort() +{ + register char *cp; + register char **lp; + register c; + int done; + int i; + char *f; + + done = 0; + i = 0; + c = EOF; + do { + cp = tspace; + lp = (char **)lspace; + while(lp < (char **)lspace+nlines && cp < tspace+ntext) { + *lp++ = cp; + while(c != '\n') { + if(c != EOF) { + *cp++ = c; + c = getc(is); + continue; + } else if(is) + fclose(is); + if(i < eargc) { + if((f = setfil(i++)) == 0) + is = stdin; + else if((is = fopen(f, "r")) == NULL) + cant(f); + c = getc(is); + } else + break; + } + *cp++ = '\n'; + if(c == EOF) { + done++; + lp--; + break; + } + c = getc(is); + } + qsort((char **)lspace, lp); + if(done == 0 || nfiles != eargc) + newfile(); + else + oldfile(); + while(lp > (char **)lspace) { + cp = *--lp; + if(*cp) + do + putc(*cp, os); + while(*cp++ != '\n'); + } + fclose(os); + } while(done == 0); +} + +struct merg +{ + char l[L]; + FILE *b; +} *ibuf[256]; + +merge(a,b) +{ + struct merg *p; + register char *cp, *dp; + register i; + struct merg **ip, *jp; + char *f; + int j; + int k, l; + int muflg; + + p = (struct merg *)lspace; + j = 0; + for(i=a; i < b; i++) { + f = setfil(i); + if(f == 0) + p->b = stdin; + else if((p->b = fopen(f, "r")) == NULL) + cant(f); + ibuf[j] = p; + if(!rline(p)) j++; + p++; + } + + do { + i = j; + qsort((char **)ibuf, (char **)(ibuf+i)); + l = 0; + while(i--) { + cp = ibuf[i]->l; + if(*cp == '\0') { + l = 1; + if(rline(ibuf[i])) { + k = i; + while(++k < j) + ibuf[k-1] = ibuf[k]; + j--; + } + } + } + } while(l); + + muflg = mflg & uflg | cflg; + i = j; + while(i > 0) { + cp = ibuf[i-1]->l; + if(!cflg && (uflg == 0 || muflg || + (*compare)(ibuf[i-1]->l,ibuf[i-2]->l))) + do + putc(*cp, os); + while(*cp++ != '\n'); + if(muflg){ + cp = ibuf[i-1]->l; + dp = p->l; + do { + } while((*dp++ = *cp++) != '\n'); + } + for(;;) { + if(rline(ibuf[i-1])) { + i--; + if(i == 0) + break; + if(i == 1) + muflg = uflg; + } + ip = &ibuf[i]; + while(--ip>ibuf&&(*compare)(ip[0]->l,ip[-1]->l)<0){ + jp = *ip; + *ip = *(ip-1); + *(ip-1) = jp; + } + if(!muflg) + break; + j = (*compare)(ibuf[i-1]->l,p->l); + if(cflg) { + if(j > 0) + disorder("disorder:",ibuf[i-1]->l); + else if(uflg && j==0) + disorder("nonunique:",ibuf[i-1]->l); + } else if(j == 0) + continue; + break; + } + } + p = (struct merg *)lspace; + for(i=a; ib); + p++; + if(i >= eargc) + unlink(setfil(i)); + } + fclose(os); +} + +rline(mp) +struct merg *mp; +{ + register char *cp; + register char *ce; + FILE *bp; + register c; + + bp = mp->b; + cp = mp->l; + ce = cp+L; + do { + c = getc(bp); + if(c == EOF) + return(1); + if(cp>=ce) + cp--; + *cp++ = c; + } while(c!='\n'); + return(0); +} + +disorder(s,t) +char *s, *t; +{ + register char *u; + for(u=t; *u!='\n';u++) ; + *u = 0; + diag(s,t); + term(); +} + +newfile() +{ + register char *f; + + f = setfil(nfiles); + if((os=fopen(f, "w")) == NULL) { + diag("can't create ",f); + term(); + } + nfiles++; +} + +char * +setfil(i) +{ + + if(i < eargc) + if(eargv[i][0] == '-' && eargv[i][1] == '\0') + return(0); + else + return(eargv[i]); + i -= eargc; + filep[0] = i/26 + 'a'; + filep[1] = i%26 + 'a'; + return(file); +} + +oldfile() +{ + + if(outfil) { + if((os=fopen(outfil, "w")) == NULL) { + diag("can't create ",outfil); + term(); + } + } else + os = stdout; +} + +safeoutfil() +{ + register int i; + struct stat obuf,ibuf; + + if(!mflg||outfil==0) + return; + if(stat(outfil,&obuf)==-1) + return; + for(i=eargc-N;i0; k<=nfields; k++) { + fp = &fields[k]; + pa = i; + pb = j; + if(k) { + la = skip(pa, fp, 1); + pa = skip(pa, fp, 0); + lb = skip(pb, fp, 1); + pb = skip(pb, fp, 0); + } else { + la = eol(pa); + lb = eol(pb); + } + if(fp->nflg) { + while(blank(*pa)) + pa++; + while(blank(*pb)) + pb++; + sa = sb = fp->rflg; + if(*pa == '-') { + pa++; + sa = -sa; + } + if(*pb == '-') { + pb++; + sb = -sb; + } + for(ipa = pa; ipa pa && ipb > pb) + if(b = *--ipb - *--ipa) + a = b; + while(ipa > pa) + if(*--ipa != '0') + return(-sa); + while(ipb > pb) + if(*--ipb != '0') + return(sb); + if(a) return(a*sa); + if(*(pa=jpa) == '.') + pa++; + if(*(pb=jpb) == '.') + pb++; + if(sa==sb) + while(pacode; + ignore = fp->ignore; +loop: + while(ignore[*pa]) + pa++; + while(ignore[*pb]) + pb++; + if(pa>=la || *pa=='\n') + if(pbrflg); + else continue; + if(pb>=lb || *pb=='\n') + return(-fp->rflg); + if((sa = code[*pb++]-code[*pa++]) == 0) + goto loop; + return(sa*fp->rflg); + } + if(uflg) + return(0); + return(cmpa(i, j)); +} + +cmpa(pa, pb) +register char *pa, *pb; +{ + while(*pa == *pb) { + if(*pa++ == '\n') + return(0); + pb++; + } + return( + *pa == '\n' ? fields[0].rflg: + *pb == '\n' ?-fields[0].rflg: + *pb > *pa ? fields[0].rflg: + -fields[0].rflg + ); +} + +char * +skip(pp, fp, j) +struct field *fp; +char *pp; +{ + register i; + register char *p; + + p = pp; + if( (i=fp->m[j]) < 0) + return(eol(p)); + while(i-- > 0) { + if(tabchar != 0) { + while(*p != tabchar) + if(*p != '\n') + p++; + else goto ret; + p++; + } else { + while(blank(*p)) + p++; + while(!blank(*p)) + if(*p != '\n') + p++; + else goto ret; + } + } + if(fp->bflg[j]) + while(blank(*p)) + p++; + i = fp->n[j]; + while(i-- > 0) { + if(*p != '\n') + p++; + else goto ret; + } +ret: + return(p); +} + +char * +eol(p) +register char *p; +{ + while(*p != '\n') p++; + return(p); +} + +copyproto() +{ + register i; + register int *p, *q; + + p = (int *)&proto; + q = (int *)&fields[nfields]; + for(i=0; ibflg[k]++; + break; + + case 'd': + p->ignore = dict+128; + break; + + case 'f': + p->code = fold+128; + break; + case 'i': + p->ignore = nonprint+128; + break; + + case 'c': + cflg = 1; + continue; + + case 'm': + mflg = 1; + continue; + + case 'n': + p->nflg++; + break; + case 't': + tabchar = *++s; + if(tabchar == 0) s--; + continue; + + case 'r': + p->rflg = -1; + continue; + case 'u': + uflg = 1; + break; + + case '.': + if(p->m[k] == -1) /* -m.n with m missing */ + p->m[k] = 0; + d = &fields[0].n[0]-&fields[0].m[0]; + + default: + p->m[k+d] = number(&s); + } + compare = cmp; + } +} + +number(ppa) +char **ppa; +{ + int n; + register char *pa; + pa = *ppa; + n = 0; + while(isdigit(*pa)) { + n = n*10 + *pa - '0'; + *ppa = pa++; + } + return(n); +} + +blank(c) +{ + if(c==' ' || c=='\t') + return(1); + return(0); +} + +#define qsexc(p,q) t= *p;*p= *q;*q=t +#define qstexc(p,q,r) t= *p;*p= *r;*r= *q;*q=t + +qsort(a,l) +char **a, **l; +{ + register char **i, **j; + char **k; + char **lp, **hp; + int c; + char *t; + unsigned n; + + + +start: + if((n=l-a) <= 1) + return; + + + n /= 2; + hp = lp = a+n; + i = a; + j = l-1; + + + for(;;) { + if(i < lp) { + if((c = (*compare)(*i, *lp)) == 0) { + --lp; + qsexc(i, lp); + continue; + } + if(c < 0) { + ++i; + continue; + } + } + +loop: + if(j > hp) { + if((c = (*compare)(*hp, *j)) == 0) { + ++hp; + qsexc(hp, j); + goto loop; + } + if(c > 0) { + if(i == lp) { + ++hp; + qstexc(i, hp, j); + i = ++lp; + goto loop; + } + qsexc(i, j); + --j; + ++i; + continue; + } + --j; + goto loop; + } + + + if(i == lp) { + if(uflg) + for(k=lp+1; k<=hp;) **k++ = '\0'; + if(lp-a >= l-hp) { + qsort(hp+1, l); + l = lp; + } else { + qsort(a, lp); + a = hp+1; + } + goto start; + } + + + --lp; + qstexc(j, lp, i); + j = --hp; + } +} + -- 2.20.1