Research V7 development
[unix-history] / usr / src / cmd / spell / spell.h
CommitLineData
6b525888
SJ
1#include <stdio.h>
2#include <ctype.h>
3
4#ifndef unix
5#define SHIFT 5
6#define TABSIZE (int)(400000/(1<<SHIFT))
7int *tab; /*honeywell loader deficiency*/
8#else
9#define Tolower(c) (isupper(c)?tolower(c):c) /* ugh!!! */
10#define SHIFT 4
11#define TABSIZE 25000 /*(int)(400000/(1<<shift))--pdp11 compiler deficiency*/
12short tab[TABSIZE];
13#endif
14long p[] = {
15 399871,
16 399887,
17 399899,
18 399911,
19 399913,
20 399937,
21 399941,
22 399953,
23 399979,
24 399983,
25 399989,
26};
27#define NP (sizeof(p)/sizeof(p[0]))
28#define NW 30
29
30/*
31* Hash table for spelling checker has n bits.
32* Each word w is hashed by k different (modular) hash functions, hi.
33* The bits hi(w), i=1..k, are set for words in the dictionary.
34* Assuming independence, the probability that no word of a d-word
35* dictionary sets a particular bit is given by the Poisson formula
36* P = exp(-y)*y**0/0!, where y=d*k/n.
37* The probability that a random string is recognized as a word is then
38* (1-P)**k. For given n and d this is minimum when y=log(2), P=1/2,
39* whence one finds, for example, that a 25000-word dictionary in a
40* 400000-bit table works best with k=11.
41*/
42
43long pow2[NP][NW];
44
45prime(argc, argv) register char **argv;
46{
47 int i, j;
48 long h;
49 register long *lp;
50
51#ifndef unix
52 if ((tab = (int *)calloc(sizeof(*tab), TABSIZE)) == NULL)
53 return(0);
54#endif
55 if (argc > 1) {
56 FILE *f;
57 if ((f = fopen(argv[1], "ri")) == NULL)
58 return(0);
59 if (fread((char *)tab, sizeof(*tab), TABSIZE, f) != TABSIZE)
60 return(0);
61 fclose(f);
62 }
63 for (i=0; i<NP; i++) {
64 h = *(lp = pow2[i]) = 1<<14;
65 for (j=1; j<NW; j++)
66 h = *++lp = (h<<7) % p[i];
67 }
68 return(1);
69}
70
71#define get(h) (tab[h>>SHIFT]&(1<<((int)h&((1<<SHIFT)-1))))
72#define set(h) tab[h>>SHIFT] |= 1<<((int)h&((1<<SHIFT)-1))