Commit | Line | Data |
---|---|---|
4546690a | 1 | #ifndef lint |
655e9788 | 2 | static char sccsid[] = "@(#)n8.c 2.1 (CWI) 85/07/18"; |
4546690a | 3 | #endif lint |
4546690a JA |
4 | #include <ctype.h> |
5 | #include "tdef.h" | |
655e9788 JA |
6 | #include <sgtty.h> |
7 | #include "ext.h" | |
4546690a JA |
8 | #define HY_BIT 0200 /* stuff in here only works for ascii */ |
9 | ||
10 | /* | |
655e9788 JA |
11 | * troff8.c |
12 | * | |
13 | * hyphenation | |
14 | */ | |
4546690a | 15 | |
4546690a JA |
16 | char hbuf[NHEX]; |
17 | char *nexth = hbuf; | |
655e9788 | 18 | tchar *hyend; |
4546690a JA |
19 | |
20 | hyphen(wp) | |
21 | tchar *wp; | |
22 | { | |
23 | register j; | |
24 | register tchar *i; | |
25 | ||
26 | i = wp; | |
27 | while (punct(cbits(*i++))) | |
28 | ; | |
29 | if (!alph(cbits(*--i))) | |
30 | return; | |
31 | wdstart = i++; | |
32 | while (alph(cbits(*i++))) | |
33 | ; | |
34 | hyend = wdend = --i - 1; | |
35 | while (punct(cbits(*i++))) | |
36 | ; | |
37 | if (*--i) | |
38 | return; | |
39 | if ((wdend - wdstart - 4) < 0) | |
40 | return; | |
41 | hyp = hyptr; | |
42 | *hyp = 0; | |
43 | hyoff = 2; | |
44 | /* | |
45 | if (!exword() && !suffix()) | |
46 | digram(); | |
47 | */ | |
48 | if (!exword()) { | |
49 | if (hyalg == ORIGINAL && !suffix()) | |
50 | digram(); | |
51 | if (hyalg == DUTCH) | |
52 | split(wdstart, wdend); | |
53 | } | |
54 | *hyp++ = 0; | |
55 | if (*hyptr) | |
56 | for (j = 1; j; ) { | |
57 | j = 0; | |
58 | for (hyp = hyptr + 1; *hyp != 0; hyp++) { | |
59 | if (*(hyp - 1) > *hyp) { | |
60 | j++; | |
61 | i = *hyp; | |
62 | *hyp = *(hyp - 1); | |
63 | *(hyp - 1) = i; | |
64 | } | |
65 | } | |
66 | } | |
67 | } | |
68 | ||
69 | ||
70 | punct(i) | |
71 | { | |
72 | if (!i || alph(i)) | |
73 | return(0); | |
74 | else | |
75 | return(1); | |
76 | } | |
77 | ||
78 | ||
79 | alph(i) | |
80 | { | |
81 | if (i >= 'a' && i <= 'z' || i >= 'A' && i <= 'Z') | |
82 | return(1); | |
83 | else | |
84 | return(0); | |
85 | } | |
86 | ||
4546690a JA |
87 | /* |
88 | * set the hyphenation algorithm | |
89 | * | |
90 | * jna | |
91 | */ | |
92 | ||
93 | caseha() | |
94 | { register i; | |
95 | ||
96 | if ( skip()) | |
97 | i = hyalg1; | |
98 | else { | |
99 | noscale++; | |
100 | noscale = 0; | |
655e9788 | 101 | i = max(atoi(), 0); |
4546690a JA |
102 | if (nonumb) |
103 | return; | |
104 | if (i > MAXDIALECTS) { | |
655e9788 | 105 | errprint("Unknown dialect %d", i); |
4546690a JA |
106 | return; |
107 | } | |
108 | } | |
109 | hyalg1 = hyalg; | |
110 | hyalg = i; | |
111 | if( hyalg == DUTCH) | |
112 | thresh = DUTCH_THRESH; | |
113 | } | |
114 | ||
4546690a JA |
115 | caseht() |
116 | { | |
117 | switch(hyalg) { | |
118 | case ORIGINAL: | |
119 | thresh = THRESH; | |
120 | break; | |
121 | case DUTCH: | |
122 | thresh = DUTCH_THRESH; | |
123 | break; | |
124 | } | |
655e9788 | 125 | if (skip()) |
4546690a JA |
126 | return; |
127 | noscale++; | |
655e9788 JA |
128 | if (hyalg == DUTCH) |
129 | thresh = max(atoi(), 1); | |
4546690a JA |
130 | else |
131 | thresh = atoi(); | |
132 | noscale = 0; | |
133 | } | |
134 | ||
135 | ||
136 | casehw() | |
137 | { | |
138 | register i, k; | |
139 | register char *j; | |
140 | tchar t; | |
141 | ||
142 | k = 0; | |
143 | while (!skip()) { | |
144 | if ((j = nexth) >= (hbuf + NHEX - 2)) | |
145 | goto full; | |
146 | for (; ; ) { | |
147 | if (ismot(t = getch())) | |
148 | continue; | |
149 | i = cbits(t); | |
150 | if (i == ' ' || i == '\n') { | |
151 | *j++ = 0; | |
152 | nexth = j; | |
153 | *j = 0; | |
154 | if (i == ' ') | |
155 | break; | |
156 | else | |
157 | return; | |
158 | } | |
159 | if (i == '-') { | |
160 | k = HY_BIT; | |
161 | continue; | |
162 | } | |
163 | *j++ = maplow(i) | k; | |
164 | k = 0; | |
165 | if (j >= (hbuf + NHEX - 2)) | |
166 | goto full; | |
167 | } | |
168 | } | |
169 | return; | |
170 | full: | |
655e9788 | 171 | errprint("exception word list full."); |
4546690a JA |
172 | *nexth = 0; |
173 | } | |
174 | ||
175 | ||
176 | exword() | |
177 | { | |
178 | register tchar *w; | |
179 | register char *e; | |
180 | char *save; | |
181 | ||
182 | e = hbuf; | |
183 | while (1) { | |
184 | save = e; | |
185 | if (*e == 0) | |
186 | return(0); | |
187 | w = wdstart; | |
188 | while (*e && w <= hyend && (*e & 0177) == maplow(cbits(*w))) { | |
189 | e++; | |
190 | w++; | |
191 | }; | |
192 | if (!*e) { | |
193 | if (w-1 == hyend || | |
194 | (hyalg == ORIGINAL /* s-extension only in original */ | |
655e9788 | 195 | && (w == wdend && maplow(cbits(*w)) == 's'))) { |
4546690a JA |
196 | w = wdstart; |
197 | for (e = save; *e; e++) { | |
198 | if (*e & HY_BIT) | |
199 | *hyp++ = w; | |
200 | if (hyp > (hyptr + NHYP - 1)) | |
201 | hyp = hyptr + NHYP - 1; | |
202 | w++; | |
203 | } | |
204 | return(1); | |
205 | } else { | |
206 | e++; | |
207 | continue; | |
208 | } | |
209 | } else | |
210 | while (*e++) | |
211 | ; | |
212 | } | |
213 | } | |
214 | ||
215 | ||
216 | suffix() | |
217 | { | |
218 | register tchar *w; | |
219 | register char *s, *s0; | |
220 | tchar i; | |
221 | extern char *suftab[]; | |
222 | extern tchar *chkvow(); | |
223 | ||
224 | again: | |
225 | if (!alph(cbits(i = cbits(*hyend)))) | |
226 | return(0); | |
227 | if (i < 'a') | |
228 | i -= 'A' - 'a'; | |
229 | if ((s0 = suftab[i-'a']) == 0) | |
230 | return(0); | |
231 | for (; ; ) { | |
232 | if ((i = *s0 & 017) == 0) | |
233 | return(0); | |
234 | s = s0 + i - 1; | |
235 | w = hyend - 1; | |
236 | while (s > s0 && w >= wdstart && (*s & 0177) == maplow(cbits(*w))) { | |
237 | s--; | |
238 | w--; | |
239 | } | |
240 | if (s == s0) | |
241 | break; | |
242 | s0 += i; | |
243 | } | |
244 | s = s0 + i - 1; | |
245 | w = hyend; | |
246 | if (*s0 & HY_BIT) | |
247 | goto mark; | |
248 | while (s > s0) { | |
249 | w--; | |
250 | if (*s-- & HY_BIT) { | |
251 | mark: | |
252 | hyend = w - 1; | |
253 | if (*s0 & 0100) | |
254 | continue; | |
255 | if (!chkvow(w)) | |
256 | return(0); | |
257 | *hyp++ = w; | |
258 | } | |
259 | } | |
260 | if (*s0 & 040) | |
261 | return(0); | |
262 | if (exword()) | |
263 | return(1); | |
264 | goto again; | |
265 | } | |
266 | ||
267 | ||
268 | maplow(i) | |
655e9788 | 269 | register int i; |
4546690a JA |
270 | { |
271 | if (isupper(i)) | |
272 | i = tolower(i); | |
273 | return(i); | |
274 | } | |
275 | ||
276 | ||
277 | vowel(i) | |
278 | int i; | |
279 | { | |
280 | switch (maplow(i)) { | |
281 | case 'a': | |
282 | case 'e': | |
283 | case 'i': | |
284 | case 'o': | |
285 | case 'u': | |
286 | case 'y': | |
287 | return(1); | |
288 | default: | |
289 | return(0); | |
290 | } | |
291 | } | |
292 | ||
293 | ||
294 | tchar *chkvow(w) | |
295 | tchar *w; | |
296 | { | |
297 | while (--w >= wdstart) | |
298 | if (vowel(cbits(*w))) | |
299 | return(w); | |
300 | return(0); | |
301 | } | |
302 | ||
303 | ||
304 | digram() | |
305 | { | |
306 | register tchar *w; | |
307 | register val; | |
308 | tchar * nhyend, *maxw; | |
309 | int maxval; | |
310 | extern char bxh[26][13], bxxh[26][13], xxh[26][13], xhx[26][13], hxx[26][13]; | |
311 | ||
312 | again: | |
313 | if (!(w = chkvow(hyend + 1))) | |
314 | return; | |
315 | hyend = w; | |
316 | if (!(w = chkvow(hyend))) | |
317 | return; | |
318 | nhyend = w; | |
319 | maxval = 0; | |
320 | w--; | |
321 | while ((++w < hyend) && (w < (wdend - 1))) { | |
322 | val = 1; | |
323 | if (w == wdstart) | |
324 | val *= dilook('a', cbits(*w), bxh); | |
325 | else if (w == wdstart + 1) | |
326 | val *= dilook(cbits(*(w-1)), cbits(*w), bxxh); | |
327 | else | |
328 | val *= dilook(cbits(*(w-1)), cbits(*w), xxh); | |
329 | val *= dilook(cbits(*w), cbits(*(w+1)), xhx); | |
330 | val *= dilook(cbits(*(w+1)), cbits(*(w+2)), hxx); | |
331 | if (val > maxval) { | |
332 | maxval = val; | |
333 | maxw = w + 1; | |
334 | } | |
335 | } | |
336 | hyend = nhyend; | |
337 | if (maxval > thresh) | |
338 | *hyp++ = maxw; | |
339 | goto again; | |
340 | } | |
341 | ||
342 | ||
343 | dilook(a, b, t) | |
344 | int a, b; | |
345 | char t[26][13]; | |
346 | { | |
347 | register i, j; | |
348 | ||
349 | i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2]; | |
350 | if (!(j & 01)) | |
351 | i >>= 4; | |
352 | return(i & 017); | |
353 | } | |
354 | ||
355 | ||
356 | /* | |
357 | * All these jazz is to have the dialect dutch being hyphenated | |
358 | * It first appeared in the dutch version of troff (nltroff), due to | |
359 | * teus hagen. | |
360 | * The original program has converted from Algol60 to C by, I think | |
361 | * bert ijsselstein. | |
362 | * It's a mess, anyway. | |
363 | * | |
364 | * Planted in this version of troff by jaap akkerhuis (jna). | |
365 | * | |
366 | * Note that this is licensed software! | |
367 | * | |
368 | */ | |
369 | ||
370 | #ifndef NULL | |
371 | #define NULL 0 | |
372 | #endif | |
373 | #define MAXLETT 50 /* at most the first MAXLETT characters of a word | |
374 | will be processed */ | |
375 | #define MAXSYLL 20 /* at most the first MAXSYLL syllables of a word | |
376 | will be processed */ | |
377 | ||
378 | #define LETTEREE 27 | |
379 | #define LETTERJ 41 | |
380 | #define LETTERV 55 | |
381 | #define LETTERX 57 | |
382 | #define LETTERZ 58 | |
383 | ||
655e9788 JA |
384 | /* |
385 | * split(..) needs to be cleaned up, could install hjt's version... | |
386 | */ | |
387 | ||
4546690a JA |
388 | split( aword, anend ) register tchar *aword, *anend; |
389 | { register tchar *place; | |
390 | extern tchar *bestsplit1(); | |
391 | ||
392 | place = bestsplit1( aword, anend ); | |
393 | if( place != (tchar *) NULL ) | |
394 | { *hyp++ = place; | |
395 | if( place - aword > thresh && anend - place > thresh ) | |
396 | split( aword, place+1 ); | |
397 | if( anend - place > thresh && place - aword > thresh ) | |
398 | split( place, anend ); | |
399 | } | |
400 | } | |
401 | ||
402 | tchar * | |
403 | bestsplit1( tosplit , aend ) | |
404 | tchar *tosplit, *aend; | |
405 | { | |
406 | /* This function determines the "best" place to split into two parts the | |
407 | * Dutch word contained in a string of <size> characters which starts at | |
408 | * the address <tosplit> . | |
409 | * The input characters should be in ASCII code . | |
410 | * The function returns as value the number of characters of the first | |
411 | * of the two parts . | |
412 | * If the returned value exceeds the character count of the line the | |
413 | * user may try to invoke bestsplit1 again but now with <size> equal to | |
414 | * the returned value plus one . | |
415 | * The algorithm is adapted from the Mathematical Centre report NR 28/72, | |
416 | * "BESTESPLITS1, EEN PROCEDURE VOOR HET AUTOMATISCH AFBREKEN VAN NEDER- | |
417 | * LANDSE WOORDEN" , which has been written by J.C. VAN VLIET. | |
418 | */ | |
419 | extern char translate[], comprimation[][14], consonant[][23], | |
420 | prefix[][3] ; | |
655e9788 | 421 | short woord[ MAXLETT +1], reference[ MAXLETT +1], vowel[ MAXSYLL ], |
4546690a JA |
422 | turn[ MAXSYLL ] , letter, nextlett, vowel1, vowel2, |
423 | l0, l1, l2 ; | |
424 | short numlett, numsyll, turnindex, differ, start1, start2, stop, | |
425 | level, bp ; | |
426 | register int i, j, help ; | |
427 | short size = aend - tosplit + 1; | |
428 | ||
429 | /* translate into bestsplit code : */ | |
655e9788 | 430 | woord[0] = 0 ; |
4546690a JA |
431 | i = 1 ; |
432 | help = -1 ; | |
433 | while ( (++help < size) && (i < MAXLETT ) ) { | |
434 | reference[i] = i; | |
655e9788 | 435 | woord[i++] = translate[maplow(cbits(tosplit[help])) - 'a'] ; |
4546690a JA |
436 | } |
437 | /* end of translation : */ | |
438 | ||
439 | numlett = i ; | |
440 | if ( numlett < 4 ) goto nosplit ; | |
441 | i = j = 1 ; | |
442 | help = 0 ; | |
443 | while ( i < numlett ) { | |
655e9788 | 444 | letter = woord[i] ; |
4546690a JA |
445 | /* comprimation of vowels : */ |
446 | if ( (25 < letter) && (letter < 41) ) { | |
655e9788 | 447 | nextlett = woord[i+1] ; |
4546690a JA |
448 | if ( (28 < nextlett) && (nextlett < 43) ) { |
449 | letter = comprimation[letter-26][nextlett-29] ; | |
450 | if (letter > 0) { | |
451 | i++ ; | |
452 | help++ ; | |
655e9788 | 453 | woord[i] = letter ; |
4546690a JA |
454 | continue ; |
455 | } | |
456 | } | |
457 | } /* end of comprimation */ | |
458 | ||
655e9788 | 459 | woord[j] = woord[i] ; |
4546690a JA |
460 | j++ ; |
461 | i++ ; | |
462 | reference[j] += help ; | |
463 | } | |
655e9788 | 464 | woord[j] = woord[numlett] ; |
4546690a JA |
465 | numlett = j ; |
466 | ||
467 | ||
468 | /* determination of the number of syllables */ | |
469 | j = -1 ; | |
470 | i = 0 ; | |
471 | while ( ( ++i <= numlett ) && ( j < MAXSYLL ) ) { | |
655e9788 | 472 | if (woord[i] < 39) { |
4546690a JA |
473 | j++ ; |
474 | vowel[j] = i ; | |
475 | } | |
476 | } | |
477 | numsyll = j+1 ; | |
478 | ||
479 | if ( numsyll < 2 ) goto nosplit ; | |
480 | turnindex = 0 ; | |
481 | differ = 1 ; | |
482 | start1 = 0 ; | |
483 | start2 = numsyll - 1 ; | |
484 | stop = start2 ; | |
485 | ||
486 | while ( turnindex < stop ) { | |
487 | vowel1 = vowel[stop] ; | |
488 | for ( i = stop - 1 ; i >= 0 ; i-- ) { | |
489 | vowel2 = vowel[i] ; | |
490 | if ( vowel1 - vowel2 == differ) { | |
491 | turn[turnindex] = i ; | |
492 | turnindex++ ; | |
493 | } | |
494 | vowel1 = vowel2 ; | |
495 | } | |
496 | if ( differ == 1 ) start1 = turnindex ; | |
497 | else if ( differ == 2 ) start2 = turnindex ; | |
498 | differ++ ; | |
499 | } | |
500 | ||
501 | turnindex = start2 - 1 ; | |
502 | stop = numsyll - 1 ; | |
503 | level = 1 ; | |
504 | ||
505 | next : | |
506 | turnindex++ ; | |
507 | if ( turnindex >= stop ) { | |
508 | if ( level == 1 ) turnindex = start2 ; | |
509 | else if ( level == 2 ) { | |
510 | turnindex = start1 ; | |
511 | stop = start2 ; | |
512 | } | |
513 | else goto nosplit ; | |
514 | level++ ; | |
515 | if ( turnindex >= stop ) goto next ; | |
516 | } | |
517 | j = turn[turnindex] ; | |
518 | vowel1 = vowel[j] ; | |
519 | vowel2 = vowel[j+1] ; | |
520 | ||
521 | switch ( level ) { | |
522 | case 1 : | |
523 | for ( j = vowel2-2 ; j >= vowel1+1 ; j-- ) { | |
655e9788 | 524 | help = consonant[woord[j]-39][woord[j+1]-39] ; |
4546690a JA |
525 | if ( abs(help) == 1 ) goto splitafterj ; |
526 | if ( help < 0 ) goto next ; | |
527 | } | |
528 | break ; /* end of first phase */ | |
529 | ||
530 | case 2 : | |
531 | for ( i = vowel2-2 ; i >= vowel1+1 ; i-- ) { | |
655e9788 | 532 | help = consonant[woord[i]-39][woord[i+1]-39] ; |
4546690a JA |
533 | if ( abs(help) == 2 ) { |
534 | j = i ; | |
535 | goto splitafterj ; | |
536 | } | |
537 | if ( abs(help) == 3 ) { | |
538 | if ( i == vowel1+1 ) { | |
539 | j = vowel1 ; | |
540 | goto splitafterj ; | |
541 | } | |
655e9788 | 542 | help = abs(consonant[woord[i-1]-39][woord[i]-39]) ; |
4546690a JA |
543 | if ( help == 2 ) { |
544 | j = i - 1 ; | |
545 | goto splitafterj ; | |
546 | } | |
547 | if ( help == 3 ) { | |
548 | j = i - 2 ; | |
549 | goto splitafterj ; | |
550 | } | |
551 | } | |
552 | else if ( ( abs(help) == 4 ) && | |
553 | ( i == vowel2-2 ) ) { | |
554 | j = i ; | |
555 | goto splitafterj ; | |
556 | } | |
557 | if ( help < 0 ) goto next ; | |
558 | } | |
559 | break ; /* end of second phase */ | |
560 | ||
561 | case 3 : | |
562 | j = vowel1 ; | |
655e9788 | 563 | help = woord[j+1] ; |
4546690a JA |
564 | if ( (help == LETTERJ) || (help == LETTERV) || |
565 | (help == LETTERZ) ) goto splitafterj ; | |
566 | if ( help == LETTERX ) goto next ; | |
655e9788 | 567 | l1 = woord[j] ; |
4546690a JA |
568 | if ( l1 == LETTEREE ) goto next ; |
569 | if ( ( l1 > 24 ) && ( l1 < 29 ) ) { | |
570 | j++ ; | |
571 | goto splitafterj ; | |
572 | } | |
655e9788 JA |
573 | l0 = woord[j-1] ; |
574 | l2 = woord[j+1] ; | |
4546690a JA |
575 | for ( i = 0 ; i < 7 ; i++ ) |
576 | if ( ( l0 == prefix[i][0] ) && | |
577 | ( l1 == prefix[i][1] ) && | |
578 | ( l2 == prefix[i][2] ) ) goto next ; | |
579 | goto splitafterj ; | |
580 | break ; /* end of third phase */ | |
581 | ||
582 | } | |
583 | ||
584 | ||
585 | goto next ; | |
586 | ||
587 | splitafterj : | |
588 | bp = reference[j+1] - 1 ; | |
589 | if((bp < size-1) && (bp > 0)) | |
590 | goto away; | |
591 | else | |
592 | goto next; | |
593 | ||
594 | nosplit : | |
595 | bp = 0 ; | |
596 | level = 4 ; | |
597 | away : | |
598 | return(bp == 0? (tchar *) NULL : tosplit+bp) ; | |
599 | } |