usr.bin/vi/nvi/v_word.c

/*-
 * Copyright (c) 1992, 1993
 *      The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by the University of
 *      California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef lint
static char sccsid[] = "@(#)v_word.c    8.10 (Berkeley) 10/26/93";
#endif /* not lint */

#include <sys/types.h>

#include <ctype.h>

#include "vi.h"
#include "vcmd.h"

/*
 * There are two types of "words".  Bigwords are easy -- groups of anything
 * delimited by whitespace.  Normal words are trickier.  They are either a
 * group of characters, numbers and underscores, or a group of anything but,
 * delimited by whitespace.  When for a word, if you're in whitespace, it's
 * easy, just remove the whitespace and go to the beginning or end of the
 * word.  Otherwise, figure out if the next character is in a different group.
 * If it is, go to the beginning or end of that group, otherwise, go to the
 * beginning or end of the current group.  The historic version of vi didn't
 * get this right, so, for example, there were cases where "4e" was not the
 * same as "eeee".  To get it right you have to resolve the cursor after each
 * search so that the look-ahead to figure out what type of "word" the cursor
 * is in will be correct.
 *
 * Empty lines, and lines that consist of only white-space characters count
 * as a single word, and the beginning and end of the file counts as an
 * infinite number of words.
 *
 * Movements associated with commands are different than movement commands.
 * For example, in "abc  def", with the cursor on the 'a', "cw" is from
 * 'a' to 'c', while "w" is from 'a' to 'd'.  In general, trailing white
 * space is discarded from the change movement.  Another example is that,
 * in the same string, a "cw" on any white space character replaces that
 * single character, and nothing else.  Ain't nothin' in here that's easy.
 *
 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
 * would treat groups of empty lines as individual words, i.e. the command
 * would move the cursor to each new empty line.  The 'e' and 'E' commands
 * would treat groups of empty lines as a single word, i.e. the first use
 * would move past the group of lines.  The 'b' command would just beep at
 * you.  If the lines contained only white-space characters, the 'w' and 'W'
 * commands will just beep at you, and the 'B', 'b', 'E' and 'e' commands
 * will treat the group as a single word, and the 'B' and 'b' commands will
 * treat the lines as individual words.  This implementation treats both
 * cases as a single white-space word.
 */

#define FW(test)        for (; len && (test); --len, ++p)
#define BW(test)        for (; len && (test); --len, --p)

enum which {BIGWORD, LITTLEWORD};

static int bword __P((SCR *, EXF *, VICMDARG *, MARK *, MARK *, int));
static int eword __P((SCR *, EXF *, VICMDARG *, MARK *, MARK *, int));
static int fword __P((SCR *, EXF *, VICMDARG *, MARK *, MARK *, enum which));

/*
 * v_wordw -- [count]w
 *      Move forward a word at a time.
 */
int
v_wordw(sp, ep, vp, fm, tm, rp)
        SCR *sp;
        EXF *ep;
        VICMDARG *vp;
        MARK *fm, *tm, *rp;
{
        return (fword(sp, ep, vp, fm, rp, LITTLEWORD));
}

/*
 * v_wordW -- [count]W
 *      Move forward a bigword at a time.
 */
int
v_wordW(sp, ep, vp, fm, tm, rp)
        SCR *sp;
        EXF *ep;
        VICMDARG *vp;
        MARK *fm, *tm, *rp;
{
        return (fword(sp, ep, vp, fm, rp, BIGWORD));
}

/*
 * fword --
 *      Move forward by words.
 */
static int
fword(sp, ep, vp, fm, rp, type)
        SCR *sp;
        EXF *ep;
        VICMDARG *vp;
        MARK *fm, *rp;
        enum which type;
{
        enum { INWORD, NOTWORD } state;
        VCS cs;
        u_long cnt;

        cs.cs_lno = fm->lno;
        cs.cs_cno = fm->cno;
        if (cs_init(sp, ep, &cs))
                return (1);

        cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;

        /*
         * If in white-space:
         *      If the count is 1, and it's a change command, we're done.
         *      Else, move to the first non-white-space character, which
         *      counts as a single word move.  If it's a motion command,
         *      don't move off the end of the line.
         */
        if (cs.cs_flags == CS_EMP || cs.cs_flags == 0 && isblank(cs.cs_ch)) {
                if (cs.cs_flags != CS_EMP && cnt == 1) {
                        if (F_ISSET(vp, VC_C)) {
                                ++cs.cs_cno;
                                goto ret3;
                        }
                        if (F_ISSET(vp, VC_D | VC_Y)) {
                                if (cs_fspace(sp, ep, &cs))
                                        return (1);
                                goto ret1;
                        }
                }
                if (cs_fblank(sp, ep, &cs))
                        return (1);
                --cnt;
        }

        /*
         * Cyclically move to the next word -- this involves skipping
         * over word characters and then any trailing non-word characters.
         * Note, for the 'w' command, the definition of a word keeps
         * switching.
         */
        if (type == BIGWORD)
                while (cnt--) {
                        for (;;) {
                                if (cs_next(sp, ep, &cs))
                                        return (1);
                                if (cs.cs_flags == CS_EOF)
                                        goto ret2;
                                if (cs.cs_flags != 0 || isblank(cs.cs_ch))
                                        break;
                        }
                        /*
                         * If a motion command and we're at the end of the
                         * last word, we're done.  Delete and yank eat any
                         * trailing blanks, but we don't move off the end
                         * of the line regardless.
                         */
                        if (cnt == 0 && F_ISSET(vp, VC_C | VC_D | VC_Y)) {
                                if (F_ISSET(vp, VC_D | VC_Y) &&
                                    cs_fspace(sp, ep, &cs))
                                        return (1);
                                break;
                        }

                        /* Eat whitespace characters. */
                        if (cs_fblank(sp, ep, &cs))
                                return (1);
                        if (cs.cs_flags == CS_EOF)
                                goto ret2;
                }
        else
                while (cnt--) {
                        state = cs.cs_flags == 0 &&
                            inword(cs.cs_ch) ? INWORD : NOTWORD;
                        for (;;) {
                                if (cs_next(sp, ep, &cs))
                                        return (1);
                                if (cs.cs_flags == CS_EOF)
                                        goto ret2;
                                if (cs.cs_flags != 0 || isblank(cs.cs_ch))
                                        break;
                                if (state == INWORD) {
                                        if (!inword(cs.cs_ch))
                                                break;
                                } else
                                        if (inword(cs.cs_ch))
                                                break;
                        }
                        /* See comment above. */
                        if (cnt == 0 && F_ISSET(vp, VC_C | VC_D | VC_Y)) {
                                if (F_ISSET(vp, VC_D | VC_Y) &&
                                    cs_fspace(sp, ep, &cs))
                                        return (1);
                                break;
                        }

                        /* Eat whitespace characters. */
                        if (cs.cs_flags != 0 || isblank(cs.cs_ch))
                                if (cs_fblank(sp, ep, &cs))
                                        return (1);
                        if (cs.cs_flags == CS_EOF)
                                goto ret2;
                }

        /*
         * If a motion command, and eating the trailing non-word would
         * move us off this line, don't do it.  Move the return cursor
         * to one past the EOL instead.
         */
ret1:   if (F_ISSET(vp, VC_C | VC_D | VC_Y) && cs.cs_flags == CS_EOL)
                ++cs.cs_cno;

        /* If we didn't move, we must be at EOF. */
ret2:   if (cs.cs_lno == fm->lno && cs.cs_cno == fm->cno) {
                v_eof(sp, ep, fm);
                return (1);
        }
        /*
         * If at EOF, and it's a motion command, move the return cursor
         * one past the EOF.
         */
        if (F_ISSET(vp, VC_C | VC_D | VC_Y) && cs.cs_flags == CS_EOF)
                ++cs.cs_cno;
ret3:   rp->lno = cs.cs_lno;
        rp->cno = cs.cs_cno;
        return (0);
}

/*
 * v_wordb -- [count]b
 *      Move backward a word at a time.
 */
int
v_wordb(sp, ep, vp, fm, tm, rp)
        SCR *sp;
        EXF *ep;
        VICMDARG *vp;
        MARK *fm, *tm, *rp;
{
        return (bword(sp, ep, vp, fm, rp, 0));
}

/*
 * v_WordB -- [count]B
 *      Move backward a bigword at a time.
 */
int
v_wordB(sp, ep, vp, fm, tm, rp)
        SCR *sp;
        EXF *ep;
        VICMDARG *vp;
        MARK *fm, *tm, *rp;
{
        return (bword(sp, ep, vp, fm, rp, 1));
}

/*
 * bword --
 *      Move backward by words.
 */
static int
bword(sp, ep, vp, fm, rp, spaceonly)
        SCR *sp;
        EXF *ep;
        VICMDARG *vp;
        MARK *fm, *rp;
        int spaceonly;
{
        register char *p;
        recno_t lno;
        size_t len;
        u_long cno, cnt;
        char *startp;

        lno = fm->lno;
        cno = fm->cno;

        /* Check for start of file. */
        if (lno == 1 && cno == 0) {
                v_sof(sp, NULL);
                return (1);
        }

        if ((p = file_gline(sp, ep, lno, &len)) == NULL) {
                if (file_lline(sp, ep, &lno))
                        return (1);
                if (lno == 0)
                        v_sof(sp, NULL);
                else
                        GETLINE_ERR(sp, lno);
                return (1);
        }

        cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;

        /*
         * Reset the length to the number of characters in the line; the
         * first character is the current cursor position.
         */
        len = cno ? cno + 1 : 0;
        if (len == 0)
                goto line;
        for (startp = p, p += cno; cnt--;) {
                if (spaceonly) {
                        if (!isblank(*p)) {
                                if (len < 2)
                                        goto line;
                                --p;
                                --len;
                        }
                        BW(isblank(*p));
                        if (len)
                                BW(!isblank(*p));
                        else
                                goto line;
                } else {
                        if (!isblank(*p)) {
                                if (len < 2)
                                        goto line;
                                --p;
                                --len;
                        }
                        BW(isblank(*p));
                        if (len)
                                if (inword(*p))
                                        BW(inword(*p));
                                else
                                        BW(!isblank(*p) && !inword(*p));
                        else
                                goto line;
                }

                if (cnt && len == 0) {
                        /* If we hit SOF, stay there (historic practice). */
line:                   if (lno == 1) {
                                rp->lno = 1;
                                rp->cno = 0;
                                return (0);
                        }

                        /*
                         * Get the line.  If the line is empty, decrement
                         * count and get another one.
                         */
                        if ((p = file_gline(sp, ep, --lno, &len)) == NULL) {
                                GETLINE_ERR(sp, lno);
                                return (1);
                        }
                        if (len == 0) {
                                if (cnt == 0 || --cnt == 0) {
                                        rp->lno = lno;
                                        rp->cno = 0;
                                        return (0);
                                }
                                goto line;
                        }

                        /*
                         * Set the cursor to the end of the line.  If the word
                         * at the end of this line has only a single character,
                         * we've already skipped over it.
                         */
                        startp = p;
                        if (len) {
                                p += len - 1;
                                if (cnt && len > 1 && !isblank(p[0]))
                                        if (inword(p[0])) {
                                                if (!inword(p[-1]))
                                                        --cnt;
                                        } else if (!isblank(p[-1]) &&
                                            !inword(p[-1]))
                                                        --cnt;
                        }
                } else {
                        ++p;
                        ++len;
                }
        }
        rp->lno = lno;
        rp->cno = p - startp;
        return (0);
}

/*
 * v_worde -- [count]e
 *      Move forward to the end of the word.
 */
int
v_worde(sp, ep, vp, fm, tm, rp)
        SCR *sp;
        EXF *ep;
        VICMDARG *vp;
        MARK *fm, *tm, *rp;
{
        return (eword(sp, ep, vp, fm, rp, 0));
}

/*
 * v_wordE -- [count]E
 *      Move forward to the end of the bigword.
 */
int
v_wordE(sp, ep, vp, fm, tm, rp)
        SCR *sp;
        EXF *ep;
        VICMDARG *vp;
        MARK *fm, *tm, *rp;
{
        return (eword(sp, ep, vp, fm, rp, 1));
}

/*
 * eword --
 *      Move forward to the end of the word.
 */
static int
eword(sp, ep, vp, fm, rp, spaceonly)
        SCR *sp;
        EXF *ep;
        VICMDARG *vp;
        MARK *fm, *rp;
        int spaceonly;
{
        register char *p;
        recno_t lno;
        size_t len, llen;
        u_long cno, cnt;
        int empty;
        char *startp;

        lno = fm->lno;
        cno = fm->cno;

        if ((p = file_gline(sp, ep, lno, &llen)) == NULL) {
                if (file_lline(sp, ep, &lno))
                        return (1);
                if (lno == 0)
                        v_eof(sp, ep, NULL);
                else
                        GETLINE_ERR(sp, lno);
                return (1);
        }

        cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;

        /*
         * Reset the length; the first character is the current cursor
         * position.  If no more characters in this line, may already
         * be at EOF.
         */
        len = llen - cno;
        if (empty = llen == 0 || llen == cno + 1)
                goto line;

        for (startp = p += cno; cnt--; empty = 0) {
                if (spaceonly) {
                        if (!isblank(*p)) {
                                if (len < 2)
                                        goto line;
                                ++p;
                                --len;
                        }
                        FW(isblank(*p));
                        if (len)
                                FW(!isblank(*p));
                        else
                                ++cnt;
                } else {
                        if (!isblank(*p)) {
                                if (len < 2)
                                        goto line;
                                ++p;
                                --len;
                        }
                        FW(isblank(*p));
                        if (len)
                                if (inword(*p))
                                        FW(inword(*p));
                                else
                                        FW(!isblank(*p) && !inword(*p));
                        else
                                ++cnt;
                }

                if (cnt && len == 0) {
                        /* If we hit EOF, stay there (historic practice). */
line:                   if ((p = file_gline(sp, ep, ++lno, &llen)) == NULL) {
                                /*
                                 * If already at eof, complain, unless it's
                                 * a change command or a delete command and
                                 * there's something to delete.
                                 */
                                if (empty) {
                                        if (F_ISSET(vp, VC_C) ||
                                            F_ISSET(vp, VC_D) && llen != 0) {
                                                rp->lno = lno - 1;
                                                rp->cno = llen ? llen : 1;
                                                return (0);
                                        }
                                        v_eof(sp, ep, NULL);
                                        return (1);
                                }
                                if ((p =
                                    file_gline(sp, ep, --lno, &llen)) == NULL) {
                                        GETLINE_ERR(sp, lno);
                                        return (1);
                                }
                                rp->lno = lno;
                                rp->cno = llen ? llen - 1 : 0;
                                /* The 'c', 'd' and 'y' need one more space. */
                                if (F_ISSET(vp, VC_C | VC_D | VC_Y))
                                        ++rp->cno;
                                return (0);
                        }
                        len = llen;
                        cno = 0;
                        startp = p;
                } else {
                        --p;
                        ++len;
                }
        }
        rp->lno = lno;
        rp->cno = cno + (p - startp);

        /* The 'c', 'd' and 'y' need one more space. */
        if (F_ISSET(vp, VC_C | VC_D | VC_Y))
                ++rp->cno;
        return (0);
}