static char sccsid
[] = "@(#)locate.code.c 4.2 (Berkeley) %G%";
* PURPOSE: sorted list compressor (works with a modified 'find'
* to encode/decode a filename database)
* USAGE: bigram < list > bigrams
* process bigrams (see updatedb) > common_bigrams
* code common_bigrams < list > squozen_list
* METHOD: Uses 'front compression' (see ";login:", March 1983, p. 8 ).
* Output format is, per line, an offset differential count byte
* followed by a partially bigram-encoded ascii residue.
* 0-28 likeliest differential counts + offset to make nonnegative
* 30 escape code for out-of-range count to follow in next word
* 128-255 bigram codes, (128 most common, as determined by 'updatedb')
* 32-127 single character (printable) ascii residue
* SEE ALSO: updatedb.csh, bigram.c, find.c
* AUTHOR: James A. Woods, Informatics General Corp.,
* NASA Ames Research Center, 10/82
#define MAXPATH 1024 /* maximum pathname length */
#define RESET 30 /* switch code */
char oldpath
[MAXPATH
] = " ";
char bigrams
[257] = { 0 };
int count
, oldcount
, diffcount
;
if ((fp
= fopen(argv
[1], "r")) == NULL
) {
printf("Usage: code common_bigrams < list > coded_list\n");
fgets ( bigrams
, 257, fp
);
fwrite ( bigrams
, 1, 256, stdout
);
while ( gets ( path
) != NULL
) {
squelch unprintable chars so as not to botch decoding
for ( j
= 0; path
[j
] != NULL
; j
++ ) {
if ( path
[j
] < 040 || path
[j
] == 0177 )
count
= prefix_length ( oldpath
, path
);
diffcount
= count
- oldcount
;
if ( (diffcount
< -14) || (diffcount
> 14) ) {
putw ( diffcount
+ 14, stdout
);
putc ( diffcount
+ 14, stdout
);
for ( j
= count
; path
[j
] != NULL
; j
+= 2 ) {
if ( path
[j
+ 1] == NULL
) {
linear search for specific bigram in string table
if ( (code
= strindex ( bigrams
, bigram
)) % 2 == 0 )
putchar ( (code
/ 2) | 0200 );
fputs ( bigram
, stdout
);
strcpy ( oldpath
, path
);
strindex ( string
, pattern
) /* return location of pattern in string or -1 */
register char *s
, *p
, *q
;
for ( s
= string
; *s
!= NULL
; s
++ )
if ( *s
== *pattern
) { /* fast first char check */
for ( p
= pattern
+ 1, q
= s
+ 1; *p
!= NULL
; p
++, q
++ )
return ( q
- strlen ( pattern
) - string
);
prefix_length ( s1
, s2
) /* return length of longest common prefix */
char *s1
, *s2
; /* ... of strings s1 and s2 */
for ( start
= s1
; *s1
== *s2
; s1
++, s2
++ )