date and time created 85/07/17 17:54:41 by jaap
[unix-history] / usr / src / local / ditroff / ditroff.okeeffe / n8.c
#ifndef lint
static char sccsid[] = "@(#)n8.c 1.1 (CWI) 85/07/17";
#endif lint
#include <ctype.h>
#include "tdef.h"
#define HY_BIT 0200 /* stuff in here only works for ascii */
/*
troff8.c
hyphenation
*/
#include <sgtty.h>
#include "ext.h"
char hbuf[NHEX];
char *nexth = hbuf;
tchar *hyend;
hyphen(wp)
tchar *wp;
{
register j;
register tchar *i;
i = wp;
while (punct(cbits(*i++)))
;
if (!alph(cbits(*--i)))
return;
wdstart = i++;
while (alph(cbits(*i++)))
;
hyend = wdend = --i - 1;
while (punct(cbits(*i++)))
;
if (*--i)
return;
if ((wdend - wdstart - 4) < 0)
return;
hyp = hyptr;
*hyp = 0;
hyoff = 2;
/*
if (!exword() && !suffix())
digram();
*/
if (!exword()) {
if (hyalg == ORIGINAL && !suffix())
digram();
if (hyalg == DUTCH)
split(wdstart, wdend);
}
*hyp++ = 0;
if (*hyptr)
for (j = 1; j; ) {
j = 0;
for (hyp = hyptr + 1; *hyp != 0; hyp++) {
if (*(hyp - 1) > *hyp) {
j++;
i = *hyp;
*hyp = *(hyp - 1);
*(hyp - 1) = i;
}
}
}
}
punct(i)
{
if (!i || alph(i))
return(0);
else
return(1);
}
alph(i)
{
if (i >= 'a' && i <= 'z' || i >= 'A' && i <= 'Z')
return(1);
else
return(0);
}
/*
* set the hyphenation algorithm
*
* jna
*/
caseha()
{ register i;
if ( skip())
i = hyalg1;
else {
noscale++;
noscale = 0;
i = max(atoi(),0);
if (nonumb)
return;
if (i > MAXDIALECTS) {
fprintf(stderr, "Unknown dialect %d.\n", i);
return;
}
}
hyalg1 = hyalg;
hyalg = i;
if( hyalg == DUTCH)
thresh = DUTCH_THRESH;
}
caseht()
{
switch(hyalg) {
case ORIGINAL:
thresh = THRESH;
break;
case DUTCH:
thresh = DUTCH_THRESH;
break;
}
if (skip() )
return;
noscale++;
if( hyalg == DUTCH)
thresh = max(atoi(),1);
else
thresh = atoi();
noscale = 0;
}
casehw()
{
register i, k;
register char *j;
tchar t;
k = 0;
while (!skip()) {
if ((j = nexth) >= (hbuf + NHEX - 2))
goto full;
for (; ; ) {
if (ismot(t = getch()))
continue;
i = cbits(t);
if (i == ' ' || i == '\n') {
*j++ = 0;
nexth = j;
*j = 0;
if (i == ' ')
break;
else
return;
}
if (i == '-') {
k = HY_BIT;
continue;
}
*j++ = maplow(i) | k;
k = 0;
if (j >= (hbuf + NHEX - 2))
goto full;
}
}
return;
full:
fprintf(stderr, "troff: exception word list full.\n");
*nexth = 0;
}
exword()
{
register tchar *w;
register char *e;
char *save;
e = hbuf;
while (1) {
save = e;
if (*e == 0)
return(0);
w = wdstart;
while (*e && w <= hyend && (*e & 0177) == maplow(cbits(*w))) {
e++;
w++;
};
if (!*e) {
if (w-1 == hyend ||
(hyalg == ORIGINAL /* s-extension only in original */
&& (w == wdend && maplow(cbits(*w)) == 's'))) {
w = wdstart;
for (e = save; *e; e++) {
if (*e & HY_BIT)
*hyp++ = w;
if (hyp > (hyptr + NHYP - 1))
hyp = hyptr + NHYP - 1;
w++;
}
return(1);
} else {
e++;
continue;
}
} else
while (*e++)
;
}
}
suffix()
{
register tchar *w;
register char *s, *s0;
tchar i;
extern char *suftab[];
extern tchar *chkvow();
again:
if (!alph(cbits(i = cbits(*hyend))))
return(0);
if (i < 'a')
i -= 'A' - 'a';
if ((s0 = suftab[i-'a']) == 0)
return(0);
for (; ; ) {
if ((i = *s0 & 017) == 0)
return(0);
s = s0 + i - 1;
w = hyend - 1;
while (s > s0 && w >= wdstart && (*s & 0177) == maplow(cbits(*w))) {
s--;
w--;
}
if (s == s0)
break;
s0 += i;
}
s = s0 + i - 1;
w = hyend;
if (*s0 & HY_BIT)
goto mark;
while (s > s0) {
w--;
if (*s-- & HY_BIT) {
mark:
hyend = w - 1;
if (*s0 & 0100)
continue;
if (!chkvow(w))
return(0);
*hyp++ = w;
}
}
if (*s0 & 040)
return(0);
if (exword())
return(1);
goto again;
}
maplow(i)
int i;
{
if (isupper(i))
i = tolower(i);
return(i);
}
vowel(i)
int i;
{
switch (maplow(i)) {
case 'a':
case 'e':
case 'i':
case 'o':
case 'u':
case 'y':
return(1);
default:
return(0);
}
}
tchar *chkvow(w)
tchar *w;
{
while (--w >= wdstart)
if (vowel(cbits(*w)))
return(w);
return(0);
}
digram()
{
register tchar *w;
register val;
tchar * nhyend, *maxw;
int maxval;
extern char bxh[26][13], bxxh[26][13], xxh[26][13], xhx[26][13], hxx[26][13];
again:
if (!(w = chkvow(hyend + 1)))
return;
hyend = w;
if (!(w = chkvow(hyend)))
return;
nhyend = w;
maxval = 0;
w--;
while ((++w < hyend) && (w < (wdend - 1))) {
val = 1;
if (w == wdstart)
val *= dilook('a', cbits(*w), bxh);
else if (w == wdstart + 1)
val *= dilook(cbits(*(w-1)), cbits(*w), bxxh);
else
val *= dilook(cbits(*(w-1)), cbits(*w), xxh);
val *= dilook(cbits(*w), cbits(*(w+1)), xhx);
val *= dilook(cbits(*(w+1)), cbits(*(w+2)), hxx);
if (val > maxval) {
maxval = val;
maxw = w + 1;
}
}
hyend = nhyend;
if (maxval > thresh)
*hyp++ = maxw;
goto again;
}
dilook(a, b, t)
int a, b;
char t[26][13];
{
register i, j;
i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2];
if (!(j & 01))
i >>= 4;
return(i & 017);
}
/*
* All these jazz is to have the dialect dutch being hyphenated
* It first appeared in the dutch version of troff (nltroff), due to
* teus hagen.
* The original program has converted from Algol60 to C by, I think
* bert ijsselstein.
* It's a mess, anyway.
*
* Planted in this version of troff by jaap akkerhuis (jna).
*
* Note that this is licensed software!
*
*/
#ifndef NULL
#define NULL 0
#endif
#define MAXLETT 50 /* at most the first MAXLETT characters of a word
will be processed */
#define MAXSYLL 20 /* at most the first MAXSYLL syllables of a word
will be processed */
#define LETTEREE 27
#define LETTERJ 41
#define LETTERV 55
#define LETTERX 57
#define LETTERZ 58
split( aword, anend ) register tchar *aword, *anend;
{ register tchar *place;
extern tchar *bestsplit1();
place = bestsplit1( aword, anend );
if( place != (tchar *) NULL )
{ *hyp++ = place;
if( place - aword > thresh && anend - place > thresh )
split( aword, place+1 );
if( anend - place > thresh && place - aword > thresh )
split( place, anend );
}
}
tchar *
bestsplit1( tosplit , aend )
tchar *tosplit, *aend;
{
/* This function determines the "best" place to split into two parts the
* Dutch word contained in a string of <size> characters which starts at
* the address <tosplit> .
* The input characters should be in ASCII code .
* The function returns as value the number of characters of the first
* of the two parts .
* If the returned value exceeds the character count of the line the
* user may try to invoke bestsplit1 again but now with <size> equal to
* the returned value plus one .
* The algorithm is adapted from the Mathematical Centre report NR 28/72,
* "BESTESPLITS1, EEN PROCEDURE VOOR HET AUTOMATISCH AFBREKEN VAN NEDER-
* LANDSE WOORDEN" , which has been written by J.C. VAN VLIET.
*/
extern char translate[], comprimation[][14], consonant[][23],
prefix[][3] ;
short word[ MAXLETT +1], reference[ MAXLETT +1], vowel[ MAXSYLL ],
turn[ MAXSYLL ] , letter, nextlett, vowel1, vowel2,
l0, l1, l2 ;
short numlett, numsyll, turnindex, differ, start1, start2, stop,
level, bp ;
register int i, j, help ;
short size = aend - tosplit + 1;
/* translate into bestsplit code : */
word[0] = 0 ;
i = 1 ;
help = -1 ;
while ( (++help < size) && (i < MAXLETT ) ) {
reference[i] = i;
word[i++] = translate[maplow(cbits(tosplit[help])) - 'a'] ;
}
/* end of translation : */
numlett = i ;
if ( numlett < 4 ) goto nosplit ;
i = j = 1 ;
help = 0 ;
while ( i < numlett ) {
letter = word[i] ;
/* comprimation of vowels : */
if ( (25 < letter) && (letter < 41) ) {
nextlett = word[i+1] ;
if ( (28 < nextlett) && (nextlett < 43) ) {
letter = comprimation[letter-26][nextlett-29] ;
if (letter > 0) {
i++ ;
help++ ;
word[i] = letter ;
continue ;
}
}
} /* end of comprimation */
word[j] = word[i] ;
j++ ;
i++ ;
reference[j] += help ;
}
word[j] = word[numlett] ;
numlett = j ;
/* determination of the number of syllables */
j = -1 ;
i = 0 ;
while ( ( ++i <= numlett ) && ( j < MAXSYLL ) ) {
if (word[i] < 39) {
j++ ;
vowel[j] = i ;
}
}
numsyll = j+1 ;
if ( numsyll < 2 ) goto nosplit ;
turnindex = 0 ;
differ = 1 ;
start1 = 0 ;
start2 = numsyll - 1 ;
stop = start2 ;
while ( turnindex < stop ) {
vowel1 = vowel[stop] ;
for ( i = stop - 1 ; i >= 0 ; i-- ) {
vowel2 = vowel[i] ;
if ( vowel1 - vowel2 == differ) {
turn[turnindex] = i ;
turnindex++ ;
}
vowel1 = vowel2 ;
}
if ( differ == 1 ) start1 = turnindex ;
else if ( differ == 2 ) start2 = turnindex ;
differ++ ;
}
turnindex = start2 - 1 ;
stop = numsyll - 1 ;
level = 1 ;
next :
turnindex++ ;
if ( turnindex >= stop ) {
if ( level == 1 ) turnindex = start2 ;
else if ( level == 2 ) {
turnindex = start1 ;
stop = start2 ;
}
else goto nosplit ;
level++ ;
if ( turnindex >= stop ) goto next ;
}
j = turn[turnindex] ;
vowel1 = vowel[j] ;
vowel2 = vowel[j+1] ;
switch ( level ) {
case 1 :
for ( j = vowel2-2 ; j >= vowel1+1 ; j-- ) {
help = consonant[word[j]-39][word[j+1]-39] ;
if ( abs(help) == 1 ) goto splitafterj ;
if ( help < 0 ) goto next ;
}
break ; /* end of first phase */
case 2 :
for ( i = vowel2-2 ; i >= vowel1+1 ; i-- ) {
help = consonant[word[i]-39][word[i+1]-39] ;
if ( abs(help) == 2 ) {
j = i ;
goto splitafterj ;
}
if ( abs(help) == 3 ) {
if ( i == vowel1+1 ) {
j = vowel1 ;
goto splitafterj ;
}
help = abs(consonant[word[i-1]-39][word[i]-39]) ;
if ( help == 2 ) {
j = i - 1 ;
goto splitafterj ;
}
if ( help == 3 ) {
j = i - 2 ;
goto splitafterj ;
}
}
else if ( ( abs(help) == 4 ) &&
( i == vowel2-2 ) ) {
j = i ;
goto splitafterj ;
}
if ( help < 0 ) goto next ;
}
break ; /* end of second phase */
case 3 :
j = vowel1 ;
help = word[j+1] ;
if ( (help == LETTERJ) || (help == LETTERV) ||
(help == LETTERZ) ) goto splitafterj ;
if ( help == LETTERX ) goto next ;
l1 = word[j] ;
if ( l1 == LETTEREE ) goto next ;
if ( ( l1 > 24 ) && ( l1 < 29 ) ) {
j++ ;
goto splitafterj ;
}
l0 = word[j-1] ;
l2 = word[j+1] ;
for ( i = 0 ; i < 7 ; i++ )
if ( ( l0 == prefix[i][0] ) &&
( l1 == prefix[i][1] ) &&
( l2 == prefix[i][2] ) ) goto next ;
goto splitafterj ;
break ; /* end of third phase */
}
goto next ;
splitafterj :
bp = reference[j+1] - 1 ;
if((bp < size-1) && (bp > 0))
goto away;
else
goto next;
nosplit :
bp = 0 ;
level = 4 ;
away :
return(bp == 0? (tchar *) NULL : tosplit+bp) ;
}