Bell 32V development
[unix-history] / usr / src / cmd / spell / spell.h
#include <stdio.h>
#include <ctype.h>
#ifndef unix
#define SHIFT 5
#define TABSIZE (int)(400000/(1<<SHIFT))
int *tab; /*honeywell loader deficiency*/
#else
#define Tolower(c) (isupper(c)?tolower(c):c) /* ugh!!! */
#define SHIFT 4
#define TABSIZE 25000 /*(int)(400000/(1<<shift))--pdp11 compiler deficiency*/
short tab[TABSIZE];
#endif
long p[] = {
399871,
399887,
399899,
399911,
399913,
399937,
399941,
399953,
399979,
399983,
399989,
};
#define NP (sizeof(p)/sizeof(p[0]))
#define NW 30
/*
* Hash table for spelling checker has n bits.
* Each word w is hashed by k different (modular) hash functions, hi.
* The bits hi(w), i=1..k, are set for words in the dictionary.
* Assuming independence, the probability that no word of a d-word
* dictionary sets a particular bit is given by the Poisson formula
* P = exp(-y)*y**0/0!, where y=d*k/n.
* The probability that a random string is recognized as a word is then
* (1-P)**k. For given n and d this is minimum when y=log(2), P=1/2,
* whence one finds, for example, that a 25000-word dictionary in a
* 400000-bit table works best with k=11.
*/
long pow2[NP][NW];
prime(argc, argv) register char **argv;
{
int i, j;
long h;
register long *lp;
#ifndef unix
if ((tab = (int *)calloc(sizeof(*tab), TABSIZE)) == NULL)
return(0);
#endif
if (argc > 1) {
FILE *f;
if ((f = fopen(argv[1], "ri")) == NULL)
return(0);
if (fread((char *)tab, sizeof(*tab), TABSIZE, f) != TABSIZE)
return(0);
fclose(f);
}
for (i=0; i<NP; i++) {
h = *(lp = pow2[i]) = 1<<14;
for (j=1; j<NW; j++)
h = *++lp = (h<<7) % p[i];
}
return(1);
}
#define get(h) (tab[h>>SHIFT]&(1<<((int)h&((1<<SHIFT)-1))))
#define set(h) tab[h>>SHIFT] |= 1<<((int)h&((1<<SHIFT)-1))