date and time created 87/03/31 16:32:28 by jaap
[unix-history] / usr / src / local / ditroff / ditroff.okeeffe / n8.c
CommitLineData
4546690a 1#ifndef lint
655e9788 2static char sccsid[] = "@(#)n8.c 2.1 (CWI) 85/07/18";
4546690a 3#endif lint
4546690a
JA
4#include <ctype.h>
5#include "tdef.h"
655e9788
JA
6#include <sgtty.h>
7#include "ext.h"
4546690a
JA
8#define HY_BIT 0200 /* stuff in here only works for ascii */
9
10/*
655e9788
JA
11 * troff8.c
12 *
13 * hyphenation
14 */
4546690a 15
4546690a
JA
16char hbuf[NHEX];
17char *nexth = hbuf;
655e9788 18tchar *hyend;
4546690a
JA
19
20hyphen(wp)
21 tchar *wp;
22{
23 register j;
24 register tchar *i;
25
26 i = wp;
27 while (punct(cbits(*i++)))
28 ;
29 if (!alph(cbits(*--i)))
30 return;
31 wdstart = i++;
32 while (alph(cbits(*i++)))
33 ;
34 hyend = wdend = --i - 1;
35 while (punct(cbits(*i++)))
36 ;
37 if (*--i)
38 return;
39 if ((wdend - wdstart - 4) < 0)
40 return;
41 hyp = hyptr;
42 *hyp = 0;
43 hyoff = 2;
44/*
45 if (!exword() && !suffix())
46 digram();
47*/
48 if (!exword()) {
49 if (hyalg == ORIGINAL && !suffix())
50 digram();
51 if (hyalg == DUTCH)
52 split(wdstart, wdend);
53 }
54 *hyp++ = 0;
55 if (*hyptr)
56 for (j = 1; j; ) {
57 j = 0;
58 for (hyp = hyptr + 1; *hyp != 0; hyp++) {
59 if (*(hyp - 1) > *hyp) {
60 j++;
61 i = *hyp;
62 *hyp = *(hyp - 1);
63 *(hyp - 1) = i;
64 }
65 }
66 }
67}
68
69
70punct(i)
71{
72 if (!i || alph(i))
73 return(0);
74 else
75 return(1);
76}
77
78
79alph(i)
80{
81 if (i >= 'a' && i <= 'z' || i >= 'A' && i <= 'Z')
82 return(1);
83 else
84 return(0);
85}
86
4546690a
JA
87/*
88 * set the hyphenation algorithm
89 *
90 * jna
91 */
92
93caseha()
94{ register i;
95
96 if ( skip())
97 i = hyalg1;
98 else {
99 noscale++;
100 noscale = 0;
655e9788 101 i = max(atoi(), 0);
4546690a
JA
102 if (nonumb)
103 return;
104 if (i > MAXDIALECTS) {
655e9788 105 errprint("Unknown dialect %d", i);
4546690a
JA
106 return;
107 }
108 }
109 hyalg1 = hyalg;
110 hyalg = i;
111 if( hyalg == DUTCH)
112 thresh = DUTCH_THRESH;
113}
114
4546690a
JA
115caseht()
116{
117 switch(hyalg) {
118 case ORIGINAL:
119 thresh = THRESH;
120 break;
121 case DUTCH:
122 thresh = DUTCH_THRESH;
123 break;
124 }
655e9788 125 if (skip())
4546690a
JA
126 return;
127 noscale++;
655e9788
JA
128 if (hyalg == DUTCH)
129 thresh = max(atoi(), 1);
4546690a
JA
130 else
131 thresh = atoi();
132 noscale = 0;
133}
134
135
136casehw()
137{
138 register i, k;
139 register char *j;
140 tchar t;
141
142 k = 0;
143 while (!skip()) {
144 if ((j = nexth) >= (hbuf + NHEX - 2))
145 goto full;
146 for (; ; ) {
147 if (ismot(t = getch()))
148 continue;
149 i = cbits(t);
150 if (i == ' ' || i == '\n') {
151 *j++ = 0;
152 nexth = j;
153 *j = 0;
154 if (i == ' ')
155 break;
156 else
157 return;
158 }
159 if (i == '-') {
160 k = HY_BIT;
161 continue;
162 }
163 *j++ = maplow(i) | k;
164 k = 0;
165 if (j >= (hbuf + NHEX - 2))
166 goto full;
167 }
168 }
169 return;
170full:
655e9788 171 errprint("exception word list full.");
4546690a
JA
172 *nexth = 0;
173}
174
175
176exword()
177{
178 register tchar *w;
179 register char *e;
180 char *save;
181
182 e = hbuf;
183 while (1) {
184 save = e;
185 if (*e == 0)
186 return(0);
187 w = wdstart;
188 while (*e && w <= hyend && (*e & 0177) == maplow(cbits(*w))) {
189 e++;
190 w++;
191 };
192 if (!*e) {
193 if (w-1 == hyend ||
194 (hyalg == ORIGINAL /* s-extension only in original */
655e9788 195 && (w == wdend && maplow(cbits(*w)) == 's'))) {
4546690a
JA
196 w = wdstart;
197 for (e = save; *e; e++) {
198 if (*e & HY_BIT)
199 *hyp++ = w;
200 if (hyp > (hyptr + NHYP - 1))
201 hyp = hyptr + NHYP - 1;
202 w++;
203 }
204 return(1);
205 } else {
206 e++;
207 continue;
208 }
209 } else
210 while (*e++)
211 ;
212 }
213}
214
215
216suffix()
217{
218 register tchar *w;
219 register char *s, *s0;
220 tchar i;
221 extern char *suftab[];
222 extern tchar *chkvow();
223
224again:
225 if (!alph(cbits(i = cbits(*hyend))))
226 return(0);
227 if (i < 'a')
228 i -= 'A' - 'a';
229 if ((s0 = suftab[i-'a']) == 0)
230 return(0);
231 for (; ; ) {
232 if ((i = *s0 & 017) == 0)
233 return(0);
234 s = s0 + i - 1;
235 w = hyend - 1;
236 while (s > s0 && w >= wdstart && (*s & 0177) == maplow(cbits(*w))) {
237 s--;
238 w--;
239 }
240 if (s == s0)
241 break;
242 s0 += i;
243 }
244 s = s0 + i - 1;
245 w = hyend;
246 if (*s0 & HY_BIT)
247 goto mark;
248 while (s > s0) {
249 w--;
250 if (*s-- & HY_BIT) {
251mark:
252 hyend = w - 1;
253 if (*s0 & 0100)
254 continue;
255 if (!chkvow(w))
256 return(0);
257 *hyp++ = w;
258 }
259 }
260 if (*s0 & 040)
261 return(0);
262 if (exword())
263 return(1);
264 goto again;
265}
266
267
268maplow(i)
655e9788 269register int i;
4546690a
JA
270{
271 if (isupper(i))
272 i = tolower(i);
273 return(i);
274}
275
276
277vowel(i)
278int i;
279{
280 switch (maplow(i)) {
281 case 'a':
282 case 'e':
283 case 'i':
284 case 'o':
285 case 'u':
286 case 'y':
287 return(1);
288 default:
289 return(0);
290 }
291}
292
293
294tchar *chkvow(w)
295tchar *w;
296{
297 while (--w >= wdstart)
298 if (vowel(cbits(*w)))
299 return(w);
300 return(0);
301}
302
303
304digram()
305{
306 register tchar *w;
307 register val;
308 tchar * nhyend, *maxw;
309 int maxval;
310 extern char bxh[26][13], bxxh[26][13], xxh[26][13], xhx[26][13], hxx[26][13];
311
312again:
313 if (!(w = chkvow(hyend + 1)))
314 return;
315 hyend = w;
316 if (!(w = chkvow(hyend)))
317 return;
318 nhyend = w;
319 maxval = 0;
320 w--;
321 while ((++w < hyend) && (w < (wdend - 1))) {
322 val = 1;
323 if (w == wdstart)
324 val *= dilook('a', cbits(*w), bxh);
325 else if (w == wdstart + 1)
326 val *= dilook(cbits(*(w-1)), cbits(*w), bxxh);
327 else
328 val *= dilook(cbits(*(w-1)), cbits(*w), xxh);
329 val *= dilook(cbits(*w), cbits(*(w+1)), xhx);
330 val *= dilook(cbits(*(w+1)), cbits(*(w+2)), hxx);
331 if (val > maxval) {
332 maxval = val;
333 maxw = w + 1;
334 }
335 }
336 hyend = nhyend;
337 if (maxval > thresh)
338 *hyp++ = maxw;
339 goto again;
340}
341
342
343dilook(a, b, t)
344int a, b;
345char t[26][13];
346{
347 register i, j;
348
349 i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2];
350 if (!(j & 01))
351 i >>= 4;
352 return(i & 017);
353}
354
355
356/*
357 * All these jazz is to have the dialect dutch being hyphenated
358 * It first appeared in the dutch version of troff (nltroff), due to
359 * teus hagen.
360 * The original program has converted from Algol60 to C by, I think
361 * bert ijsselstein.
362 * It's a mess, anyway.
363 *
364 * Planted in this version of troff by jaap akkerhuis (jna).
365 *
366 * Note that this is licensed software!
367 *
368 */
369
370#ifndef NULL
371#define NULL 0
372#endif
373#define MAXLETT 50 /* at most the first MAXLETT characters of a word
374 will be processed */
375#define MAXSYLL 20 /* at most the first MAXSYLL syllables of a word
376 will be processed */
377
378#define LETTEREE 27
379#define LETTERJ 41
380#define LETTERV 55
381#define LETTERX 57
382#define LETTERZ 58
383
655e9788
JA
384/*
385 * split(..) needs to be cleaned up, could install hjt's version...
386 */
387
4546690a
JA
388split( aword, anend ) register tchar *aword, *anend;
389{ register tchar *place;
390 extern tchar *bestsplit1();
391
392 place = bestsplit1( aword, anend );
393 if( place != (tchar *) NULL )
394 { *hyp++ = place;
395 if( place - aword > thresh && anend - place > thresh )
396 split( aword, place+1 );
397 if( anend - place > thresh && place - aword > thresh )
398 split( place, anend );
399 }
400}
401
402tchar *
403bestsplit1( tosplit , aend )
404tchar *tosplit, *aend;
405{
406/* This function determines the "best" place to split into two parts the
407 * Dutch word contained in a string of <size> characters which starts at
408 * the address <tosplit> .
409 * The input characters should be in ASCII code .
410 * The function returns as value the number of characters of the first
411 * of the two parts .
412 * If the returned value exceeds the character count of the line the
413 * user may try to invoke bestsplit1 again but now with <size> equal to
414 * the returned value plus one .
415 * The algorithm is adapted from the Mathematical Centre report NR 28/72,
416 * "BESTESPLITS1, EEN PROCEDURE VOOR HET AUTOMATISCH AFBREKEN VAN NEDER-
417 * LANDSE WOORDEN" , which has been written by J.C. VAN VLIET.
418 */
419 extern char translate[], comprimation[][14], consonant[][23],
420 prefix[][3] ;
655e9788 421 short woord[ MAXLETT +1], reference[ MAXLETT +1], vowel[ MAXSYLL ],
4546690a
JA
422 turn[ MAXSYLL ] , letter, nextlett, vowel1, vowel2,
423 l0, l1, l2 ;
424 short numlett, numsyll, turnindex, differ, start1, start2, stop,
425 level, bp ;
426 register int i, j, help ;
427 short size = aend - tosplit + 1;
428
429 /* translate into bestsplit code : */
655e9788 430 woord[0] = 0 ;
4546690a
JA
431 i = 1 ;
432 help = -1 ;
433 while ( (++help < size) && (i < MAXLETT ) ) {
434 reference[i] = i;
655e9788 435 woord[i++] = translate[maplow(cbits(tosplit[help])) - 'a'] ;
4546690a
JA
436 }
437 /* end of translation : */
438
439 numlett = i ;
440 if ( numlett < 4 ) goto nosplit ;
441 i = j = 1 ;
442 help = 0 ;
443 while ( i < numlett ) {
655e9788 444 letter = woord[i] ;
4546690a
JA
445 /* comprimation of vowels : */
446 if ( (25 < letter) && (letter < 41) ) {
655e9788 447 nextlett = woord[i+1] ;
4546690a
JA
448 if ( (28 < nextlett) && (nextlett < 43) ) {
449 letter = comprimation[letter-26][nextlett-29] ;
450 if (letter > 0) {
451 i++ ;
452 help++ ;
655e9788 453 woord[i] = letter ;
4546690a
JA
454 continue ;
455 }
456 }
457 } /* end of comprimation */
458
655e9788 459 woord[j] = woord[i] ;
4546690a
JA
460 j++ ;
461 i++ ;
462 reference[j] += help ;
463 }
655e9788 464 woord[j] = woord[numlett] ;
4546690a
JA
465 numlett = j ;
466
467
468 /* determination of the number of syllables */
469 j = -1 ;
470 i = 0 ;
471 while ( ( ++i <= numlett ) && ( j < MAXSYLL ) ) {
655e9788 472 if (woord[i] < 39) {
4546690a
JA
473 j++ ;
474 vowel[j] = i ;
475 }
476 }
477 numsyll = j+1 ;
478
479 if ( numsyll < 2 ) goto nosplit ;
480 turnindex = 0 ;
481 differ = 1 ;
482 start1 = 0 ;
483 start2 = numsyll - 1 ;
484 stop = start2 ;
485
486 while ( turnindex < stop ) {
487 vowel1 = vowel[stop] ;
488 for ( i = stop - 1 ; i >= 0 ; i-- ) {
489 vowel2 = vowel[i] ;
490 if ( vowel1 - vowel2 == differ) {
491 turn[turnindex] = i ;
492 turnindex++ ;
493 }
494 vowel1 = vowel2 ;
495 }
496 if ( differ == 1 ) start1 = turnindex ;
497 else if ( differ == 2 ) start2 = turnindex ;
498 differ++ ;
499 }
500
501 turnindex = start2 - 1 ;
502 stop = numsyll - 1 ;
503 level = 1 ;
504
505next :
506 turnindex++ ;
507 if ( turnindex >= stop ) {
508 if ( level == 1 ) turnindex = start2 ;
509 else if ( level == 2 ) {
510 turnindex = start1 ;
511 stop = start2 ;
512 }
513 else goto nosplit ;
514 level++ ;
515 if ( turnindex >= stop ) goto next ;
516 }
517 j = turn[turnindex] ;
518 vowel1 = vowel[j] ;
519 vowel2 = vowel[j+1] ;
520
521 switch ( level ) {
522 case 1 :
523 for ( j = vowel2-2 ; j >= vowel1+1 ; j-- ) {
655e9788 524 help = consonant[woord[j]-39][woord[j+1]-39] ;
4546690a
JA
525 if ( abs(help) == 1 ) goto splitafterj ;
526 if ( help < 0 ) goto next ;
527 }
528 break ; /* end of first phase */
529
530 case 2 :
531 for ( i = vowel2-2 ; i >= vowel1+1 ; i-- ) {
655e9788 532 help = consonant[woord[i]-39][woord[i+1]-39] ;
4546690a
JA
533 if ( abs(help) == 2 ) {
534 j = i ;
535 goto splitafterj ;
536 }
537 if ( abs(help) == 3 ) {
538 if ( i == vowel1+1 ) {
539 j = vowel1 ;
540 goto splitafterj ;
541 }
655e9788 542 help = abs(consonant[woord[i-1]-39][woord[i]-39]) ;
4546690a
JA
543 if ( help == 2 ) {
544 j = i - 1 ;
545 goto splitafterj ;
546 }
547 if ( help == 3 ) {
548 j = i - 2 ;
549 goto splitafterj ;
550 }
551 }
552 else if ( ( abs(help) == 4 ) &&
553 ( i == vowel2-2 ) ) {
554 j = i ;
555 goto splitafterj ;
556 }
557 if ( help < 0 ) goto next ;
558 }
559 break ; /* end of second phase */
560
561 case 3 :
562 j = vowel1 ;
655e9788 563 help = woord[j+1] ;
4546690a
JA
564 if ( (help == LETTERJ) || (help == LETTERV) ||
565 (help == LETTERZ) ) goto splitafterj ;
566 if ( help == LETTERX ) goto next ;
655e9788 567 l1 = woord[j] ;
4546690a
JA
568 if ( l1 == LETTEREE ) goto next ;
569 if ( ( l1 > 24 ) && ( l1 < 29 ) ) {
570 j++ ;
571 goto splitafterj ;
572 }
655e9788
JA
573 l0 = woord[j-1] ;
574 l2 = woord[j+1] ;
4546690a
JA
575 for ( i = 0 ; i < 7 ; i++ )
576 if ( ( l0 == prefix[i][0] ) &&
577 ( l1 == prefix[i][1] ) &&
578 ( l2 == prefix[i][2] ) ) goto next ;
579 goto splitafterj ;
580 break ; /* end of third phase */
581
582 }
583
584
585 goto next ;
586
587splitafterj :
588 bp = reference[j+1] - 1 ;
589 if((bp < size-1) && (bp > 0))
590 goto away;
591 else
592 goto next;
593
594nosplit :
595 bp = 0 ;
596 level = 4 ;
597away :
598 return(bp == 0? (tchar *) NULL : tosplit+bp) ;
599}