* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
static char sccsid
[] = "@(#)v_word.c 8.18 (Berkeley) 3/15/94";
* There are two types of "words". Bigwords are easy -- groups of anything
* delimited by whitespace. Normal words are trickier. They are either a
* group of characters, numbers and underscores, or a group of anything but,
* delimited by whitespace. When for a word, if you're in whitespace, it's
* easy, just remove the whitespace and go to the beginning or end of the
* word. Otherwise, figure out if the next character is in a different group.
* If it is, go to the beginning or end of that group, otherwise, go to the
* beginning or end of the current group. The historic version of vi didn't
* get this right, so, for example, there were cases where "4e" was not the
* same as "eeee" -- in particular, single character words, and commands that
* began in whitespace were almost always handled incorrectly. To get it right
* you have to resolve the cursor after each search so that the look-ahead to
* figure out what type of "word" the cursor is in will be correct.
* Empty lines, and lines that consist of only white-space characters count
* as a single word, and the beginning and end of the file counts as an
* infinite number of words.
* Movements associated with commands are different than movement commands.
* For example, in "abc def", with the cursor on the 'a', "cw" is from
* 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white
* space is discarded from the change movement. Another example is that,
* in the same string, a "cw" on any white space character replaces that
* single character, and nothing else. Ain't nothin' in here that's easy.
* One historic note -- in the original vi, the 'w', 'W' and 'B' commands
* would treat groups of empty lines as individual words, i.e. the command
* would move the cursor to each new empty line. The 'e' and 'E' commands
* would treat groups of empty lines as a single word, i.e. the first use
* would move past the group of lines. The 'b' command would just beep at
* you, or, if you did it from the start of the line as part of a motion
* command, go absolutely nuts. If the lines contained only white-space
* characters, the 'w' and 'W' commands would just beep at you, and the 'B',
* 'b', 'E' and 'e' commands would treat the group as a single word, and
* the 'B' and 'b' commands will treat the lines as individual words. This
* implementation treats all of these cases as a single white-space word.
enum which
{BIGWORD
, LITTLEWORD
};
static int bword
__P((SCR
*, EXF
*, VICMDARG
*, enum which
));
static int eword
__P((SCR
*, EXF
*, VICMDARG
*, enum which
));
static int fword
__P((SCR
*, EXF
*, VICMDARG
*, enum which
));
* Move forward a bigword at a time.
return (fword(sp
, ep
, vp
, BIGWORD
));
* Move forward a word at a time.
return (fword(sp
, ep
, vp
, LITTLEWORD
));
enum { INWORD
, NOTWORD
} state
;
cnt
= F_ISSET(vp
, VC_C1SET
) ? vp
->count
: 1;
cs
.cs_lno
= vp
->m_start
.lno
;
cs
.cs_cno
= vp
->m_start
.cno
;
if (cs_init(sp
, ep
, &cs
))
* If the count is 1, and it's a change command, we're done.
* Else, move to the first non-white-space character, which
* counts as a single word move. If it's a motion command,
* don't move off the end of the line.
if (cs
.cs_flags
== CS_EMP
|| cs
.cs_flags
== 0 && isblank(cs
.cs_ch
)) {
if (cs
.cs_flags
!= CS_EMP
&& cnt
== 1) {
if (F_ISSET(vp
, VC_D
| VC_Y
)) {
if (cs_fspace(sp
, ep
, &cs
))
if (cs_fblank(sp
, ep
, &cs
))
* Cyclically move to the next word -- this involves skipping
* over word characters and then any trailing non-word characters.
* Note, for the 'w' command, the definition of a word keeps
if (cs_next(sp
, ep
, &cs
))
if (cs
.cs_flags
== CS_EOF
)
if (cs
.cs_flags
!= 0 || isblank(cs
.cs_ch
))
* If a motion command and we're at the end of the
* last word, we're done. Delete and yank eat any
* trailing blanks, but we don't move off the end
* of the line regardless.
if (cnt
== 0 && ISMOTION(vp
)) {
if (F_ISSET(vp
, VC_D
| VC_Y
) &&
/* Eat whitespace characters. */
if (cs_fblank(sp
, ep
, &cs
))
if (cs
.cs_flags
== CS_EOF
)
state
= cs
.cs_flags
== 0 &&
inword(cs
.cs_ch
) ? INWORD
: NOTWORD
;
if (cs_next(sp
, ep
, &cs
))
if (cs
.cs_flags
== CS_EOF
)
if (cs
.cs_flags
!= 0 || isblank(cs
.cs_ch
))
if (cnt
== 0 && ISMOTION(vp
)) {
if (F_ISSET(vp
, VC_D
| VC_Y
) &&
/* Eat whitespace characters. */
if (cs
.cs_flags
!= 0 || isblank(cs
.cs_ch
))
if (cs_fblank(sp
, ep
, &cs
))
if (cs
.cs_flags
== CS_EOF
)
* If we didn't move, we must be at EOF.
* That's okay for motion commands, however.
ret
: if (!ISMOTION(vp
) &&
cs
.cs_lno
== vp
->m_start
.lno
&& cs
.cs_cno
== vp
->m_start
.cno
) {
v_eof(sp
, ep
, &vp
->m_start
);
/* Adjust the end of the range for motion commands. */
vp
->m_stop
.lno
= cs
.cs_lno
;
vp
->m_stop
.cno
= cs
.cs_cno
;
if (ISMOTION(vp
) && cs
.cs_flags
== 0)
* Non-motion commands move to the end of the range. VC_D and
* VC_Y stay at the start. Ignore VC_C and VC_S.
vp
->m_final
= ISMOTION(vp
) ? vp
->m_start
: vp
->m_stop
;
* Move forward to the end of the bigword.
return (eword(sp
, ep
, vp
, BIGWORD
));
* Move forward to the end of the word.
return (eword(sp
, ep
, vp
, LITTLEWORD
));
* Move forward to the end of the word.
enum { INWORD
, NOTWORD
} state
;
cnt
= F_ISSET(vp
, VC_C1SET
) ? vp
->count
: 1;
cs
.cs_lno
= vp
->m_start
.lno
;
cs
.cs_cno
= vp
->m_start
.cno
;
if (cs_init(sp
, ep
, &cs
))
* If in whitespace, or the next character is whitespace, move past
* it. (This doesn't count as a word move.) Stay at the character
* past the current one, it sets word "state" for the 'e' command.
if (cs
.cs_flags
== 0 && !isblank(cs
.cs_ch
)) {
if (cs_next(sp
, ep
, &cs
))
if (cs
.cs_flags
== 0 && !isblank(cs
.cs_ch
))
if (cs_fblank(sp
, ep
, &cs
))
* Cyclically move to the next word -- this involves skipping
* over word characters and then any trailing non-word characters.
* Note, for the 'e' command, the definition of a word keeps
start
: if (type
== BIGWORD
)
if (cs_next(sp
, ep
, &cs
))
if (cs
.cs_flags
== CS_EOF
)
if (cs
.cs_flags
!= 0 || isblank(cs
.cs_ch
))
* When we reach the start of the word after the last
* word, we're done. If we changed state, back up one
* to the end of the previous word.
if (cs
.cs_flags
== 0 && cs_prev(sp
, ep
, &cs
))
/* Eat whitespace characters. */
if (cs_fblank(sp
, ep
, &cs
))
if (cs
.cs_flags
== CS_EOF
)
state
= cs
.cs_flags
== 0 &&
inword(cs
.cs_ch
) ? INWORD
: NOTWORD
;
if (cs_next(sp
, ep
, &cs
))
if (cs
.cs_flags
== CS_EOF
)
if (cs
.cs_flags
!= 0 || isblank(cs
.cs_ch
))
if (cs
.cs_flags
== 0 && cs_prev(sp
, ep
, &cs
))
/* Eat whitespace characters. */
if (cs
.cs_flags
!= 0 || isblank(cs
.cs_ch
))
if (cs_fblank(sp
, ep
, &cs
))
if (cs
.cs_flags
== CS_EOF
)
* If we didn't move, we must be at EOF.
* That's okay for motion commands, however.
ret
: if (!ISMOTION(vp
) &&
cs
.cs_lno
== vp
->m_start
.lno
&& cs
.cs_cno
== vp
->m_start
.cno
) {
v_eof(sp
, ep
, &vp
->m_start
);
/* Set the end of the range for motion commands. */
vp
->m_stop
.lno
= cs
.cs_lno
;
vp
->m_stop
.cno
= cs
.cs_cno
;
* Non-motion commands move to the end of the range. VC_D and
* VC_Y stay at the start. Ignore VC_C and VC_S.
vp
->m_final
= ISMOTION(vp
) ? vp
->m_start
: vp
->m_stop
;
* Move backward a bigword at a time.
return (bword(sp
, ep
, vp
, BIGWORD
));
* Move backward a word at a time.
return (bword(sp
, ep
, vp
, LITTLEWORD
));
* Move backward by words.
enum { INWORD
, NOTWORD
} state
;
cnt
= F_ISSET(vp
, VC_C1SET
) ? vp
->count
: 1;
cs
.cs_lno
= vp
->m_start
.lno
;
cs
.cs_cno
= vp
->m_start
.cno
;
if (cs_init(sp
, ep
, &cs
))
* If in whitespace, or the previous character is whitespace, move
* past it. (This doesn't count as a word move.) Stay at the
* character before the current one, it sets word "state" for the
if (cs
.cs_flags
== 0 && !isblank(cs
.cs_ch
)) {
if (cs_prev(sp
, ep
, &cs
))
if (cs
.cs_flags
== 0 && !isblank(cs
.cs_ch
))
if (cs_bblank(sp
, ep
, &cs
))
* Cyclically move to the beginning of the previous word -- this
* involves skipping over word characters and then any trailing
* non-word characters. Note, for the 'b' command, the definition
* of a word keeps switching.
start
: if (type
== BIGWORD
)
if (cs_prev(sp
, ep
, &cs
))
if (cs
.cs_flags
== CS_SOF
)
if (cs
.cs_flags
!= 0 || isblank(cs
.cs_ch
))
* When we reach the end of the word before the last
* word, we're done. If we changed state, move forward
* one to the end of the next word.
if (cs
.cs_flags
== 0 && cs_next(sp
, ep
, &cs
))
/* Eat whitespace characters. */
if (cs_bblank(sp
, ep
, &cs
))
if (cs
.cs_flags
== CS_SOF
)
state
= cs
.cs_flags
== 0 &&
inword(cs
.cs_ch
) ? INWORD
: NOTWORD
;
if (cs_prev(sp
, ep
, &cs
))
if (cs
.cs_flags
== CS_SOF
)
if (cs
.cs_flags
!= 0 || isblank(cs
.cs_ch
))
if (cs
.cs_flags
== 0 && cs_next(sp
, ep
, &cs
))
/* Eat whitespace characters. */
if (cs
.cs_flags
!= 0 || isblank(cs
.cs_ch
))
if (cs_bblank(sp
, ep
, &cs
))
if (cs
.cs_flags
== CS_SOF
)
/* If we didn't move, we must be at SOF. */
ret
: if (cs
.cs_lno
== vp
->m_start
.lno
&& cs
.cs_cno
== vp
->m_start
.cno
) {
/* Set the end of the range for motion commands. */
vp
->m_stop
.lno
= cs
.cs_lno
;
vp
->m_stop
.cno
= cs
.cs_cno
;
* Non-motion commands move to the end of the range. VC_D commands
* move to the end of the range. VC_Y stays at the start unless the
* end of the range is on a different line, when it moves to the end
* of the range. Ignore VC_C and VC_S. Motion commands adjust the
* starting point to the character before the current one.
vp
->m_final
= vp
->m_stop
;
if (F_ISSET(vp
, VC_Y
) && vp
->m_start
.lno
== vp
->m_stop
.lno
)
vp
->m_final
= vp
->m_start
;