fixed an obscure bug that doesn't show up in English
[unix-history] / usr / src / usr.bin / spell / spell.h
CommitLineData
ac1b4ce6
SL
1/* @(#)spell.h 4.1 %G% */
2
3#include <stdio.h>
4#include <ctype.h>
5
6#ifndef unix
7#define SHIFT 5
8#define TABSIZE (int)(400000/(1<<SHIFT))
9int *tab; /*honeywell loader deficiency*/
10#else
11#define Tolower(c) (isupper(c)?tolower(c):c) /* ugh!!! */
12#define SHIFT 4
13#define TABSIZE 25000 /*(int)(400000/(1<<shift))--pdp11 compiler deficiency*/
14short tab[TABSIZE];
15#endif
16long p[] = {
17 399871,
18 399887,
19 399899,
20 399911,
21 399913,
22 399937,
23 399941,
24 399953,
25 399979,
26 399983,
27 399989,
28};
29#define NP (sizeof(p)/sizeof(p[0]))
30#define NW 30
31
32/*
33* Hash table for spelling checker has n bits.
34* Each word w is hashed by k different (modular) hash functions, hi.
35* The bits hi(w), i=1..k, are set for words in the dictionary.
36* Assuming independence, the probability that no word of a d-word
37* dictionary sets a particular bit is given by the Poisson formula
38* P = exp(-y)*y**0/0!, where y=d*k/n.
39* The probability that a random string is recognized as a word is then
40* (1-P)**k. For given n and d this is minimum when y=log(2), P=1/2,
41* whence one finds, for example, that a 25000-word dictionary in a
42* 400000-bit table works best with k=11.
43*/
44
45long pow2[NP][NW];
46
47prime(argc, argv) register char **argv;
48{
49 int i, j;
50 long h;
51 register long *lp;
52
53#ifndef unix
54 if ((tab = (int *)calloc(sizeof(*tab), TABSIZE)) == NULL)
55 return(0);
56#endif
57 if (argc > 1) {
58 FILE *f;
59 if ((f = fopen(argv[1], "ri")) == NULL)
60 return(0);
61 if (fread((char *)tab, sizeof(*tab), TABSIZE, f) != TABSIZE)
62 return(0);
63 fclose(f);
64 }
65 for (i=0; i<NP; i++) {
66 h = *(lp = pow2[i]) = 1<<14;
67 for (j=1; j<NW; j++)
68 h = *++lp = (h<<7) % p[i];
69 }
70 return(1);
71}
72
73#define get(h) (tab[h>>SHIFT]&(1<<((int)h&((1<<SHIFT)-1))))
74#define set(h) tab[h>>SHIFT] |= 1<<((int)h&((1<<SHIFT)-1))