Commit | Line | Data |
---|---|---|
1c15e888 | 1 | /*- |
ad787160 C |
2 | * Copyright (c) 1990, 1993 |
3 | * The Regents of the University of California. All rights reserved. | |
1c15e888 C |
4 | * |
5 | * This code is derived from software contributed to Berkeley by | |
ad787160 | 6 | * Vern Paxson of Lawrence Berkeley Laboratory. |
1c15e888 C |
7 | * |
8 | * The United States Government has rights in this work pursuant | |
9 | * to contract no. DE-AC03-76SF00098 between the United States | |
10 | * Department of Energy and the University of California. | |
11 | * | |
ad787160 C |
12 | * Redistribution and use in source and binary forms, with or without |
13 | * modification, are permitted provided that the following conditions | |
14 | * are met: | |
15 | * 1. Redistributions of source code must retain the above copyright | |
16 | * notice, this list of conditions and the following disclaimer. | |
17 | * 2. Redistributions in binary form must reproduce the above copyright | |
18 | * notice, this list of conditions and the following disclaimer in the | |
19 | * documentation and/or other materials provided with the distribution. | |
20 | * 3. All advertising materials mentioning features or use of this software | |
21 | * must display the following acknowledgement: | |
22 | * This product includes software developed by the University of | |
23 | * California, Berkeley and its contributors. | |
24 | * 4. Neither the name of the University nor the names of its contributors | |
25 | * may be used to endorse or promote products derived from this software | |
26 | * without specific prior written permission. | |
27 | * | |
28 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
29 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
30 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
31 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
32 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
33 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
34 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
35 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
36 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
37 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
38 | * SUCH DAMAGE. | |
1c15e888 C |
39 | */ |
40 | ||
41 | #ifndef lint | |
ad787160 | 42 | static char sccsid[] = "@(#)ecs.c 8.1 (Berkeley) 6/6/93"; |
1c15e888 C |
43 | #endif /* not lint */ |
44 | ||
45 | /* ecs - equivalence class routines */ | |
46 | ||
47 | #include "flexdef.h" | |
48 | ||
49 | /* ccl2ecl - convert character classes to set of equivalence classes | |
50 | * | |
51 | * synopsis | |
52 | * ccl2ecl(); | |
53 | */ | |
54 | ||
55 | void ccl2ecl() | |
56 | ||
57 | { | |
58 | int i, ich, newlen, cclp, ccls, cclmec; | |
59 | ||
60 | for ( i = 1; i <= lastccl; ++i ) | |
61 | { | |
62 | /* we loop through each character class, and for each character | |
63 | * in the class, add the character's equivalence class to the | |
64 | * new "character" class we are creating. Thus when we are all | |
65 | * done, character classes will really consist of collections | |
66 | * of equivalence classes | |
67 | */ | |
68 | ||
69 | newlen = 0; | |
70 | cclp = cclmap[i]; | |
71 | ||
72 | for ( ccls = 0; ccls < ccllen[i]; ++ccls ) | |
73 | { | |
74 | ich = ccltbl[cclp + ccls]; | |
75 | cclmec = ecgroup[ich]; | |
76 | ||
77 | if ( xlation && cclmec < 0 ) | |
78 | { | |
79 | /* special hack--if we're doing %t tables then it's | |
80 | * possible that no representative of this character's | |
81 | * equivalence class is in the ccl. So waiting till | |
82 | * we see the representative would be disastrous. Instead, | |
83 | * we add this character's equivalence class anyway, if it's | |
84 | * not already present. | |
85 | */ | |
86 | int j; | |
87 | ||
88 | /* this loop makes this whole process n^2; but we don't | |
89 | * really care about %t performance anyway | |
90 | */ | |
91 | for ( j = 0; j < newlen; ++j ) | |
92 | if ( ccltbl[cclp + j] == -cclmec ) | |
93 | break; | |
94 | ||
95 | if ( j >= newlen ) | |
96 | { /* no representative yet, add this one in */ | |
97 | ccltbl[cclp + newlen] = -cclmec; | |
98 | ++newlen; | |
99 | } | |
100 | } | |
101 | ||
102 | else if ( cclmec > 0 ) | |
103 | { | |
104 | ccltbl[cclp + newlen] = cclmec; | |
105 | ++newlen; | |
106 | } | |
107 | } | |
108 | ||
109 | ccllen[i] = newlen; | |
110 | } | |
111 | } | |
112 | ||
113 | ||
114 | /* cre8ecs - associate equivalence class numbers with class members | |
115 | * | |
116 | * synopsis | |
117 | * int cre8ecs(); | |
118 | * number of classes = cre8ecs( fwd, bck, num ); | |
119 | * | |
120 | * fwd is the forward linked-list of equivalence class members. bck | |
121 | * is the backward linked-list, and num is the number of class members. | |
122 | * | |
123 | * Returned is the number of classes. | |
124 | */ | |
125 | ||
126 | int cre8ecs( fwd, bck, num ) | |
127 | int fwd[], bck[], num; | |
128 | ||
129 | { | |
130 | int i, j, numcl; | |
131 | ||
132 | numcl = 0; | |
133 | ||
134 | /* create equivalence class numbers. From now on, abs( bck(x) ) | |
135 | * is the equivalence class number for object x. If bck(x) | |
136 | * is positive, then x is the representative of its equivalence | |
137 | * class. | |
138 | */ | |
139 | for ( i = 1; i <= num; ++i ) | |
140 | if ( bck[i] == NIL ) | |
141 | { | |
142 | bck[i] = ++numcl; | |
143 | for ( j = fwd[i]; j != NIL; j = fwd[j] ) | |
144 | bck[j] = -numcl; | |
145 | } | |
146 | ||
147 | return ( numcl ); | |
148 | } | |
149 | ||
150 | ||
151 | /* ecs_from_xlation - associate equivalence class numbers using %t table | |
152 | * | |
153 | * synopsis | |
154 | * numecs = ecs_from_xlation( ecmap ); | |
155 | * | |
156 | * Upon return, ecmap will map each character code to its equivalence | |
157 | * class. The mapping will be positive if the character is the representative | |
158 | * of its class, negative otherwise. | |
159 | * | |
160 | * Returns the number of equivalence classes used. | |
161 | */ | |
162 | ||
163 | int ecs_from_xlation( ecmap ) | |
164 | int ecmap[]; | |
165 | ||
166 | { | |
167 | int i; | |
168 | int nul_is_alone = false; | |
169 | int did_default_xlation_class = false; | |
170 | ||
171 | if ( xlation[0] != 0 ) | |
172 | { | |
173 | /* if NUL shares its translation with other characters, choose one | |
174 | * of the other characters as the representative for the equivalence | |
175 | * class. This allows a cheap test later to see whether we can | |
176 | * do away with NUL's equivalence class. | |
177 | */ | |
178 | for ( i = 1; i < csize; ++i ) | |
179 | if ( xlation[i] == -xlation[0] ) | |
180 | { | |
181 | xlation[i] = xlation[0]; | |
182 | ecmap[0] = -xlation[0]; | |
183 | break; | |
184 | } | |
185 | ||
186 | if ( i >= csize ) | |
187 | /* didn't find a companion character--remember this fact */ | |
188 | nul_is_alone = true; | |
189 | } | |
190 | ||
191 | for ( i = 1; i < csize; ++i ) | |
192 | if ( xlation[i] == 0 ) | |
193 | { | |
194 | if ( did_default_xlation_class ) | |
195 | ecmap[i] = -num_xlations; | |
196 | ||
197 | else | |
198 | { | |
199 | /* make an equivalence class for those characters not | |
200 | * specified in the %t table | |
201 | */ | |
202 | ++num_xlations; | |
203 | ecmap[i] = num_xlations; | |
204 | did_default_xlation_class = true; | |
205 | } | |
206 | } | |
207 | ||
208 | else | |
209 | ecmap[i] = xlation[i]; | |
210 | ||
211 | if ( nul_is_alone ) | |
212 | /* force NUL's equivalence class to be the last one */ | |
213 | { | |
214 | ++num_xlations; | |
215 | ecmap[0] = num_xlations; | |
216 | ||
217 | /* there's actually a bug here: if someone is fanatic enough to | |
218 | * put every character in its own translation class, then right | |
219 | * now we just promoted NUL's equivalence class to be csize + 1; | |
220 | * we can handle NUL's class number being == csize (by instead | |
221 | * putting it in its own table), but we can't handle some *other* | |
222 | * character having to be put in its own table, too. So in | |
223 | * this case we bail out. | |
224 | */ | |
225 | if ( num_xlations > csize ) | |
226 | flexfatal( "too many %t classes!" ); | |
227 | } | |
228 | ||
229 | return num_xlations; | |
230 | } | |
231 | ||
232 | ||
233 | /* mkeccl - update equivalence classes based on character class xtions | |
234 | * | |
235 | * synopsis | |
236 | * Char ccls[]; | |
237 | * int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping; | |
238 | * mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping ); | |
239 | * | |
240 | * where ccls contains the elements of the character class, lenccl is the | |
241 | * number of elements in the ccl, fwd is the forward link-list of equivalent | |
242 | * characters, bck is the backward link-list, and llsiz size of the link-list | |
243 | * | |
244 | * NUL_mapping is the value which NUL (0) should be mapped to. | |
245 | */ | |
246 | ||
247 | void mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping ) | |
248 | Char ccls[]; | |
249 | int lenccl, fwd[], bck[], llsiz, NUL_mapping; | |
250 | ||
251 | { | |
252 | int cclp, oldec, newec; | |
253 | int cclm, i, j; | |
254 | static unsigned char cclflags[CSIZE]; /* initialized to all '\0' */ | |
255 | ||
256 | /* note that it doesn't matter whether or not the character class is | |
257 | * negated. The same results will be obtained in either case. | |
258 | */ | |
259 | ||
260 | cclp = 0; | |
261 | ||
262 | while ( cclp < lenccl ) | |
263 | { | |
264 | cclm = ccls[cclp]; | |
265 | ||
266 | if ( NUL_mapping && cclm == 0 ) | |
267 | cclm = NUL_mapping; | |
268 | ||
269 | oldec = bck[cclm]; | |
270 | newec = cclm; | |
271 | ||
272 | j = cclp + 1; | |
273 | ||
274 | for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] ) | |
275 | { /* look for the symbol in the character class */ | |
276 | for ( ; j < lenccl; ++j ) | |
277 | { | |
278 | register int ccl_char; | |
279 | ||
280 | if ( NUL_mapping && ccls[j] == 0 ) | |
281 | ccl_char = NUL_mapping; | |
282 | else | |
283 | ccl_char = ccls[j]; | |
284 | ||
285 | if ( ccl_char > i ) | |
286 | break; | |
287 | ||
288 | if ( ccl_char == i && ! cclflags[j] ) | |
289 | { | |
290 | /* we found an old companion of cclm in the ccl. | |
291 | * link it into the new equivalence class and flag it as | |
292 | * having been processed | |
293 | */ | |
294 | ||
295 | bck[i] = newec; | |
296 | fwd[newec] = i; | |
297 | newec = i; | |
298 | cclflags[j] = 1; /* set flag so we don't reprocess */ | |
299 | ||
300 | /* get next equivalence class member */ | |
301 | /* continue 2 */ | |
302 | goto next_pt; | |
303 | } | |
304 | } | |
305 | ||
306 | /* symbol isn't in character class. Put it in the old equivalence | |
307 | * class | |
308 | */ | |
309 | ||
310 | bck[i] = oldec; | |
311 | ||
312 | if ( oldec != NIL ) | |
313 | fwd[oldec] = i; | |
314 | ||
315 | oldec = i; | |
316 | next_pt: | |
317 | ; | |
318 | } | |
319 | ||
320 | if ( bck[cclm] != NIL || oldec != bck[cclm] ) | |
321 | { | |
322 | bck[cclm] = NIL; | |
323 | fwd[oldec] = NIL; | |
324 | } | |
325 | ||
326 | fwd[newec] = NIL; | |
327 | ||
328 | /* find next ccl member to process */ | |
329 | ||
330 | for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp ) | |
331 | { | |
332 | /* reset "doesn't need processing" flag */ | |
333 | cclflags[cclp] = 0; | |
334 | } | |
335 | } | |
336 | } | |
337 | ||
338 | ||
339 | /* mkechar - create equivalence class for single character | |
340 | * | |
341 | * synopsis | |
342 | * int tch, fwd[], bck[]; | |
343 | * mkechar( tch, fwd, bck ); | |
344 | */ | |
345 | ||
346 | void mkechar( tch, fwd, bck ) | |
347 | int tch, fwd[], bck[]; | |
348 | ||
349 | { | |
350 | /* if until now the character has been a proper subset of | |
351 | * an equivalence class, break it away to create a new ec | |
352 | */ | |
353 | ||
354 | if ( fwd[tch] != NIL ) | |
355 | bck[fwd[tch]] = bck[tch]; | |
356 | ||
357 | if ( bck[tch] != NIL ) | |
358 | fwd[bck[tch]] = fwd[tch]; | |
359 | ||
360 | fwd[tch] = NIL; | |
361 | bck[tch] = NIL; | |
362 | } |