* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
* This code is derived from software contributed to Berkeley by
* Vern Paxson of Lawrence Berkeley Laboratory.
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
static char sccsid
[] = "@(#)ecs.c 8.1 (Berkeley) 6/6/93";
/* ecs - equivalence class routines */
/* ccl2ecl - convert character classes to set of equivalence classes
int i
, ich
, newlen
, cclp
, ccls
, cclmec
;
for ( i
= 1; i
<= lastccl
; ++i
)
/* we loop through each character class, and for each character
* in the class, add the character's equivalence class to the
* new "character" class we are creating. Thus when we are all
* done, character classes will really consist of collections
for ( ccls
= 0; ccls
< ccllen
[i
]; ++ccls
)
ich
= ccltbl
[cclp
+ ccls
];
if ( xlation
&& cclmec
< 0 )
/* special hack--if we're doing %t tables then it's
* possible that no representative of this character's
* equivalence class is in the ccl. So waiting till
* we see the representative would be disastrous. Instead,
* we add this character's equivalence class anyway, if it's
/* this loop makes this whole process n^2; but we don't
* really care about %t performance anyway
for ( j
= 0; j
< newlen
; ++j
)
if ( ccltbl
[cclp
+ j
] == -cclmec
)
{ /* no representative yet, add this one in */
ccltbl
[cclp
+ newlen
] = -cclmec
;
ccltbl
[cclp
+ newlen
] = cclmec
;
/* cre8ecs - associate equivalence class numbers with class members
* number of classes = cre8ecs( fwd, bck, num );
* fwd is the forward linked-list of equivalence class members. bck
* is the backward linked-list, and num is the number of class members.
* Returned is the number of classes.
int cre8ecs( fwd
, bck
, num
)
/* create equivalence class numbers. From now on, abs( bck(x) )
* is the equivalence class number for object x. If bck(x)
* is positive, then x is the representative of its equivalence
for ( i
= 1; i
<= num
; ++i
)
for ( j
= fwd
[i
]; j
!= NIL
; j
= fwd
[j
] )
/* ecs_from_xlation - associate equivalence class numbers using %t table
* numecs = ecs_from_xlation( ecmap );
* Upon return, ecmap will map each character code to its equivalence
* class. The mapping will be positive if the character is the representative
* of its class, negative otherwise.
* Returns the number of equivalence classes used.
int ecs_from_xlation( ecmap
)
int nul_is_alone
= false;
int did_default_xlation_class
= false;
/* if NUL shares its translation with other characters, choose one
* of the other characters as the representative for the equivalence
* class. This allows a cheap test later to see whether we can
* do away with NUL's equivalence class.
for ( i
= 1; i
< csize
; ++i
)
if ( xlation
[i
] == -xlation
[0] )
/* didn't find a companion character--remember this fact */
for ( i
= 1; i
< csize
; ++i
)
if ( did_default_xlation_class
)
ecmap
[i
] = -num_xlations
;
/* make an equivalence class for those characters not
* specified in the %t table
did_default_xlation_class
= true;
/* force NUL's equivalence class to be the last one */
/* there's actually a bug here: if someone is fanatic enough to
* put every character in its own translation class, then right
* now we just promoted NUL's equivalence class to be csize + 1;
* we can handle NUL's class number being == csize (by instead
* putting it in its own table), but we can't handle some *other*
* character having to be put in its own table, too. So in
if ( num_xlations
> csize
)
flexfatal( "too many %t classes!" );
/* mkeccl - update equivalence classes based on character class xtions
* int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping;
* mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping );
* where ccls contains the elements of the character class, lenccl is the
* number of elements in the ccl, fwd is the forward link-list of equivalent
* characters, bck is the backward link-list, and llsiz size of the link-list
* NUL_mapping is the value which NUL (0) should be mapped to.
void mkeccl( ccls
, lenccl
, fwd
, bck
, llsiz
, NUL_mapping
)
int lenccl
, fwd
[], bck
[], llsiz
, NUL_mapping
;
static unsigned char cclflags
[CSIZE
]; /* initialized to all '\0' */
/* note that it doesn't matter whether or not the character class is
* negated. The same results will be obtained in either case.
if ( NUL_mapping
&& cclm
== 0 )
for ( i
= fwd
[cclm
]; i
!= NIL
&& i
<= llsiz
; i
= fwd
[i
] )
{ /* look for the symbol in the character class */
for ( ; j
< lenccl
; ++j
)
if ( NUL_mapping
&& ccls
[j
] == 0 )
if ( ccl_char
== i
&& ! cclflags
[j
] )
/* we found an old companion of cclm in the ccl.
* link it into the new equivalence class and flag it as
cclflags
[j
] = 1; /* set flag so we don't reprocess */
/* get next equivalence class member */
/* symbol isn't in character class. Put it in the old equivalence
if ( bck
[cclm
] != NIL
|| oldec
!= bck
[cclm
] )
/* find next ccl member to process */
for ( ++cclp
; cclflags
[cclp
] && cclp
< lenccl
; ++cclp
)
/* reset "doesn't need processing" flag */
/* mkechar - create equivalence class for single character
* mkechar( tch, fwd, bck );
void mkechar( tch
, fwd
, bck
)
/* if until now the character has been a proper subset of
* an equivalence class, break it away to create a new ec
bck
[fwd
[tch
]] = bck
[tch
];
fwd
[bck
[tch
]] = fwd
[tch
];