* Copyright (c) 1989 The Regents of the University of California.
* This code is derived from software contributed to Berkeley by
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
"@(#) Copyright (c) 1989 The Regents of the University of California.\n\
static char sccsid
[] = "@(#)strfile.c 5.12 (Berkeley) 4/8/91";
# include <machine/endian.h>
* This program takes a file composed of strings seperated by
* lines starting with two consecutive delimiting character (default
* character is '%') and creates another file which consists of a table
* describing the file (structure from "strfile.h"), a table of seek
* pointers to the start of the strings, and the strings, each terminated
* % strfile [-iorsx] [ -cC ] sourcefile [ datafile ]
* c - Change delimiting character from '%' to 'C'
* s - Silent. Give no summary of data processed at the end of
* o - order the strings in alphabetic order
* i - if ordering, ignore case
* r - randomize the order of the strings
* Ken Arnold Sept. 7, 1978 --
* Added ordering options.
# define STORING_PTRS (Oflag || Rflag)
# define ALWAYS atoi("1")
# define ALLOC(ptr,sz) if (ALWAYS) { \
ptr = malloc((unsigned int) (CHUNKSIZE * sizeof *ptr)); \
else if (((sz) + 1) % CHUNKSIZE == 0) \
ptr = realloc((void *) ptr, ((unsigned int) ((sz) + CHUNKSIZE) * sizeof *ptr)); \
fprintf(stderr, "out of space\n"); \
char *Infile
= NULL
, /* input file name */
Outfile
[MAXPATHLEN
] = "", /* output file name */
Delimch
= '%'; /* delimiting character */
int Sflag
= FALSE
; /* silent run flag */
int Oflag
= FALSE
; /* ordering flag */
int Iflag
= FALSE
; /* ignore case flag */
int Rflag
= FALSE
; /* randomize order flag */
int Xflag
= FALSE
; /* set rotated bit */
long Num_pts
= 0; /* number of pointers/strings */
FILE *Sort_1
, *Sort_2
; /* pointers for sorting */
STRFILE Tbl
; /* statistics table */
STR
*Firstch
; /* first chars of each string */
char *fgets(), *strcpy(), *strcat();
void *malloc(), *realloc();
* Drive the sucker. There are two main modes -- either we store
* the seek pointers, if the table is to be sorted or randomized,
* or we write the pointer directly to the file, if we are to stay
* in file order. If the former, we allocate and re-allocate in
* CHUNKSIZE blocks; if the latter, we just write each pointer,
* and then seek back to the beginning to write in the table.
register FILE *inf
, *outf
;
register off_t last_off
, length
, pos
, *p
;
getargs(ac
, av
); /* evalute arguments */
if ((inf
= fopen(Infile
, "r")) == NULL
) {
if ((outf
= fopen(Outfile
, "w")) == NULL
) {
(void) fseek(outf
, sizeof Tbl
, 0);
* Write the strings onto the file
Tbl
.str_shortlen
= (unsigned int) 0xffffffff;
Tbl
.str_version
= VERSION
;
add_offset(outf
, ftell(inf
));
sp
= fgets(string
, 256, inf
);
if (sp
== NULL
|| sp
[0] == dc
&& sp
[1] == '\n') {
length
= pos
- last_off
- (sp
? strlen(sp
) : 0);
if (Tbl
.str_longlen
< length
)
Tbl
.str_longlen
= length
;
if (Tbl
.str_shortlen
> length
)
Tbl
.str_shortlen
= length
;
for (nsp
= sp
; !isalnum(*nsp
); nsp
++)
fp
= &Firstch
[Num_pts
- 1];
if (Iflag
&& isupper(*nsp
))
fp
->first
= tolower(*nsp
);
fp
->pos
= Seekpts
[Num_pts
- 1];
Tbl
.str_flags
|= STR_ROTATED
;
printf("\"%s\" created\n", Outfile
);
puts("There was 1 string");
printf("There were %d strings\n", Num_pts
- 1);
printf("Longest string: %lu byte%s\n", Tbl
.str_longlen
,
Tbl
.str_longlen
== 1 ? "" : "s");
printf("Shortest string: %lu byte%s\n", Tbl
.str_shortlen
,
Tbl
.str_shortlen
== 1 ? "" : "s");
(void) fseek(outf
, (off_t
) 0, 0);
Tbl
.str_version
= htonl(Tbl
.str_version
);
Tbl
.str_numstr
= htonl(Num_pts
- 1);
Tbl
.str_longlen
= htonl(Tbl
.str_longlen
);
Tbl
.str_shortlen
= htonl(Tbl
.str_shortlen
);
Tbl
.str_flags
= htonl(Tbl
.str_flags
);
(void) fwrite((char *) &Tbl
, sizeof Tbl
, 1, outf
);
for (p
= Seekpts
, cnt
= Num_pts
; cnt
--; ++p
)
(void) fwrite((char *) Seekpts
, sizeof *Seekpts
, (int) Num_pts
, outf
);
* This routine evaluates arguments from the command line
while ((ch
= getopt(argc
, argv
, "c:iorsx")) != EOF
)
case 'c': /* new delimiting char */
printf("bad delimiting character: '\\%o\n'",
case 'i': /* ignore case in ordering */
case 'o': /* order strings */
case 'r': /* randomize pointers */
case 'x': /* set the rotated bit */
(void) strcpy(Outfile
, *argv
);
puts("No input file name");
(void) strcpy(Outfile
, Infile
);
(void) strcat(Outfile
, ".dat");
"strfile [-iorsx] [-c char] sourcefile [datafile]\n");
* Add an offset to the list, or write it out, as appropriate.
fwrite(&net
, 1, sizeof net
, fp
);
ALLOC(Seekpts
, Num_pts
+ 1);
* Order the strings alphabetically (possibly ignoring case).
Sort_1
= fopen(Infile
, "r");
Sort_2
= fopen(Infile
, "r");
qsort((char *) Firstch
, (int) Tbl
.str_numstr
, sizeof *Firstch
, cmp_str
);
Tbl
.str_flags
|= STR_ORDERED
;
* Compare two strings in the file
# define SET_N(nf,ch) (nf = (ch == '\n'))
# define IS_END(ch,nf) (ch == Delimch && nf)
(void) fseek(Sort_1
, p1
->pos
, 0);
(void) fseek(Sort_2
, p2
->pos
, 0);
while (!isalnum(c1
= getc(Sort_1
)) && c1
!= '\0')
while (!isalnum(c2
= getc(Sort_2
)) && c2
!= '\0')
while (!IS_END(c1
, n1
) && !IS_END(c2
, n2
)) {
* Randomize the order of the string table. We must be careful
* not to randomize across delimiter boundaries. All
* randomization is done within each block.
srandom((int)(time((time_t *) NULL
) + getpid()));
Tbl
.str_flags
|= STR_RANDOM
;
* move things around randomly
for (sp
= Seekpts
; cnt
> 0; cnt
--, sp
++) {