[unix-history] / usr.bin / elvis / regsub.c

/* regsub.c */

/* This file contains the regsub() function, which performs substitutions
 * after a regexp match has been found.
 */

#include "config.h"
#include "ctype.h"
#include "vi.h"
#ifdef REGEX
# include <regex.h>
#else
# include "regexp.h"
#endif

char	*last_repl;	/* a copy of the text from the previous subst */

/* perform substitutions after a regexp match */
#ifdef REGEX
int regsub(rm, startp, endp, src, dst)
	regmatch_t	*rm;	/* the regexp with pointers into matched text */
	char		*startp, *endp;
	REG char	*src;	/* the replacement string */
	REG char	*dst;	/* where to put the result of the subst */
#else
int regsub(re, src, dst)
	regexp		*re;	/* the regexp with pointers into matched text */
	REG char	*src;	/* the replacement string */
	REG char	*dst;	/* where to put the result of the subst */
#endif
{
	REG char	*cpy;	/* pointer to start of text to copy */
	REG char	*end;	/* pointer to end of text to copy */
	REG char	c;
	char		*start;
#ifndef CRUNCH
	int		mod = 0;/* used to track \U, \L, \u, \l, and \E */
	int		len;	/* used to calculate length of subst string */

	/* replace \~ (or maybe ~) by previous substitution text */

	/* step 1: calculate the length of the new substitution text */
	for (len = strlen(src), c = '\0', cpy = src; *cpy; cpy++)
	{
# ifdef NO_MAGIC
		if (c == '\\' && *cpy == '~')
# else
		if (c == (*o_magic ? '\0' : '\\') && *cpy == '~')
# endif
		{
			if (!last_repl)
			{
				regerr("No prev text to substitute for ~");

				return -1;
			}
			len += strlen(last_repl) - 1;
# ifndef NO_MAGIC
			if (!*o_magic)
# endif
				len -= 1; /* because we lose the \ too */
		}

		/* watch backslash quoting */
		if (c != '\\' && *cpy == '\\')
			c = '\\';
		else
			c = '\0';
	}

	/* allocate memory for the ~ed version of src */
	checkmem();
	start = cpy = (char *)malloc((unsigned)(len + 1));
	if (!cpy)
	{
		regerr("Not enough memory for ~ expansion");
		return -1;
	}

	/* copy src into start, replacing the ~s by the previous text */
	while (*src)
	{
# ifndef NO_MAGIC
		if (*o_magic && *src == '~')
		{
			strcpy(cpy, last_repl);
			cpy += strlen(last_repl);
			src++;
		}
		else if (!*o_magic && *src == '\\' && *(src + 1) == '~')
# else /* NO_MAGIC */
		if (*src == '\\' && *(src + 1) == '~')
# endif /* NO_MAGIC */
		{
			strcpy(cpy, last_repl);
			cpy += strlen(last_repl);
			src += 2;
		}
		else if (*o_magic && *src == '\\' && *(src + 1) == '~') {
			*cpy++ = *src++;
			*cpy++ = *src++;
		}
		else
		{
			*cpy++ = *src++;
		}
	}
	*cpy = '\0';
#ifdef DEBUG
	if ((int)(cpy - start) != len)
	{
		msg("Bug in regsub.c! Predicted length = %d, Actual length = %d", len, (int)(cpy - start));
	}
#endif
	checkmem();

	/* remember this as the "previous" for next time */
	if (last_repl)
		_free_(last_repl);
	last_repl = src = start;

#endif /* undef CRUNCH */

	start = src;
	while ((c = *src++) != '\0')
	{
#ifndef NO_MAGIC
		/* recognize any meta characters */
		if (c == '&' && *o_magic)
		{
#ifdef REGEX
			cpy = startp;
			end = endp;
#else
			cpy = re->startp[0];
			end = re->endp[0];
#endif
		}
		else
#endif /* not NO_MAGIC */
		if (c == '\\')
		{
			c = *src++;
			switch (c)
			{
#ifndef NO_MAGIC
			  case '0':
			  case '1':
			  case '2':
			  case '3':
			  case '4':
			  case '5':
			  case '6':
			  case '7':
			  case '8':
			  case '9':
				/* \0 thru \9 mean "copy subexpression" */
				c -= '0';
#ifdef REGEX
				cpy = startp + (rm[c].rm_so - rm[0].rm_so);
				end = endp + (rm[c].rm_eo - rm[0].rm_eo);
#else
				cpy = re->startp[c];
				end = re->endp[c];
#endif
				break;
# ifndef CRUNCH
			  case 'U':
			  case 'u':
			  case 'L':
			  case 'l':
				/* \U and \L mean "convert to upper/lowercase" */
				mod = c;
				continue;

			  case 'E':
			  case 'e':
				/* \E ends the \U or \L */
				mod = 0;
				continue;
# endif /* not CRUNCH */
			  case '&':
				/* "\&" means "original text" */
				if (*o_magic)
				{
					*dst++ = c;
					continue;
				}
#ifdef REGEX
				cpy = startp;
				end = endp;
#else
				cpy = re->startp[0];
				end = re->endp[0];
#endif
				break;

#else /* NO_MAGIC */
			  case '&':
				/* "\&" means "original text" */
#ifdef REGEX
				cpy = startp;
				end = endp;
#else
				cpy = re->startp[0];
				end = re->endp[0];
#endif
				break;
#endif /* NO_MAGIC */
			  default:
				/* ordinary char preceded by backslash */
				*dst++ = c;
				continue;
			}
		}
#ifndef CRUNCH
# if OSK
		else if (c == '\l')
# else
		else if (c == '\r')
# endif
		{
			/* transliterate ^M into newline */
			*dst++ = '\n';
			continue;
		}
#endif /* !CRUNCH */
		else
		{
			/* ordinary character, so just copy it */
			*dst++ = c;
			continue;
		}

		/* Note: to reach this point in the code, we must have evaded
		 * all "continue" statements.  To do that, we must have hit
		 * a metacharacter that involves copying.
		 */

		/* if there is nothing to copy, loop */
		if (!cpy)
			continue;

		/* copy over a portion of the original */
		while (cpy < end)
		{
#ifndef NO_MAGIC
# ifndef CRUNCH
			switch (mod)
			{
			  case 'U':
			  case 'u':
				/* convert to uppercase */
				*dst++ = toupper(*cpy++);
				break;

			  case 'L':
			  case 'l':
				/* convert to lowercase */
				*dst++ = tolower(*cpy++);
				break;

			  default:
				/* copy without any conversion */
				*dst++ = *cpy++;
			}

			/* \u and \l end automatically after the first char */
			if (mod && (mod == 'u' || mod == 'l'))
			{
				mod = 0;
			}
# else /* CRUNCH */
			*dst++ = *cpy++;
# endif /* CRUNCH */
#else /* NO_MAGIC */
			*dst++ = *cpy++;
#endif /* NO_MAGIC */
		}
	}
	*dst = '\0';
	return 0;
}
Commit	Line	Data
15637ed4 RG	1	/* regsub.c */
	2
	3	/* This file contains the regsub() function, which performs substitutions
	4	* after a regexp match has been found.
	5	*/
	6
	7	#include "config.h"
	8	#include "ctype.h"
	9	#include "vi.h"
6e657cf2 AM	10	#ifdef REGEX
	11	# include <regex.h>
	12	#else
	13	# include "regexp.h"
	14	#endif
15637ed4	15
99668b43	16	char last_repl; / a copy of the text from the previous subst */
15637ed4 RG	17
15637ed4 RG	18	/* perform substitutions after a regexp match */
6e657cf2	19	#ifdef REGEX
99668b43	20	int regsub(rm, startp, endp, src, dst)
6e657cf2 AM	21	regmatch_t rm; / the regexp with pointers into matched text */
	22	char startp, endp;
	23	REG char src; / the replacement string */
	24	REG char dst; / where to put the result of the subst */
	25	#else
99668b43	26	int regsub(re, src, dst)
15637ed4 RG	27	regexp re; / the regexp with pointers into matched text */
	28	REG char src; / the replacement string */
	29	REG char dst; / where to put the result of the subst */
6e657cf2	30	#endif
15637ed4 RG	31	{
	32	REG char cpy; / pointer to start of text to copy */
	33	REG char end; / pointer to end of text to copy */
	34	REG char c;
	35	char *start;
	36	#ifndef CRUNCH
	37	int mod = 0;/* used to track \U, \L, \u, \l, and \E */
	38	int len; /* used to calculate length of subst string */
15637ed4 RG	39
	40	/* replace \~ (or maybe ~) by previous substitution text */
	41
	42	/* step 1: calculate the length of the new substitution text */
	43	for (len = strlen(src), c = '\0', cpy = src; *cpy; cpy++)
	44	{
	45	# ifdef NO_MAGIC
	46	if (c == '\\' && *cpy == '~')
	47	# else
	48	if (c == (o_magic ? '\0' : '\\') && cpy == '~')
	49	# endif
	50	{
99668b43	51	if (!last_repl)
15637ed4	52	{
6e657cf2 AM	53	regerr("No prev text to substitute for ~");
6e657cf2 AM	54
99668b43	55	return -1;
15637ed4	56	}
99668b43	57	len += strlen(last_repl) - 1;
15637ed4 RG	58	# ifndef NO_MAGIC
	59	if (!*o_magic)
	60	# endif
	61	len -= 1; /* because we lose the \ too */
	62	}
	63
	64	/* watch backslash quoting */
	65	if (c != '\\' && *cpy == '\\')
	66	c = '\\';
	67	else
	68	c = '\0';
	69	}
	70
	71	/* allocate memory for the ~ed version of src */
08746e8b	72	checkmem();
15637ed4 RG	73	start = cpy = (char *)malloc((unsigned)(len + 1));
	74	if (!cpy)
	75	{
6e657cf2	76	regerr("Not enough memory for ~ expansion");
99668b43	77	return -1;
15637ed4 RG	78	}
	79
	80	/* copy src into start, replacing the ~s by the previous text */
	81	while (*src)
	82	{
	83	# ifndef NO_MAGIC
	84	if (o_magic && src == '~')
	85	{
99668b43 AM	86	strcpy(cpy, last_repl);
99668b43 AM	87	cpy += strlen(last_repl);
15637ed4 RG	88	src++;
	89	}
	90	else if (!o_magic && src == '\\' && *(src + 1) == '~')
	91	# else /* NO_MAGIC */
	92	if (src == '\\' && (src + 1) == '~')
	93	# endif /* NO_MAGIC */
	94	{
99668b43 AM	95	strcpy(cpy, last_repl);
99668b43 AM	96	cpy += strlen(last_repl);
15637ed4 RG	97	src += 2;
15637ed4 RG	98	}
99668b43 AM	99	else if (o_magic && src == '\\' && *(src + 1) == '~') {
	100	cpy++ = src++;
	101	cpy++ = src++;
	102	}
15637ed4 RG	103	else
	104	{
	105	cpy++ = src++;
	106	}
	107	}
	108	*cpy = '\0';
	109	#ifdef DEBUG
	110	if ((int)(cpy - start) != len)
	111	{
	112	msg("Bug in regsub.c! Predicted length = %d, Actual length = %d", len, (int)(cpy - start));
	113	}
	114	#endif
08746e8b	115	checkmem();
15637ed4 RG	116
15637ed4 RG	117	/* remember this as the "previous" for next time */
99668b43 AM	118	if (last_repl)
	119	_free_(last_repl);
	120	last_repl = src = start;
15637ed4 RG	121
	122	#endif /* undef CRUNCH */
	123
	124	start = src;
	125	while ((c = *src++) != '\0')
	126	{
	127	#ifndef NO_MAGIC
	128	/* recognize any meta characters */
	129	if (c == '&' && *o_magic)
	130	{
6e657cf2 AM	131	#ifdef REGEX
	132	cpy = startp;
	133	end = endp;
	134	#else
15637ed4 RG	135	cpy = re->startp[0];
15637ed4 RG	136	end = re->endp[0];
6e657cf2	137	#endif
15637ed4 RG	138	}
	139	else
	140	#endif /* not NO_MAGIC */
	141	if (c == '\\')
	142	{
	143	c = *src++;
	144	switch (c)
	145	{
	146	#ifndef NO_MAGIC
	147	case '0':
	148	case '1':
	149	case '2':
	150	case '3':
	151	case '4':
	152	case '5':
	153	case '6':
	154	case '7':
	155	case '8':
	156	case '9':
	157	/* \0 thru \9 mean "copy subexpression" */
	158	c -= '0';
6e657cf2 AM	159	#ifdef REGEX
	160	cpy = startp + (rm[c].rm_so - rm[0].rm_so);
	161	end = endp + (rm[c].rm_eo - rm[0].rm_eo);
	162	#else
15637ed4 RG	163	cpy = re->startp[c];
15637ed4 RG	164	end = re->endp[c];
6e657cf2	165	#endif
15637ed4 RG	166	break;
	167	# ifndef CRUNCH
	168	case 'U':
	169	case 'u':
	170	case 'L':
	171	case 'l':
	172	/* \U and \L mean "convert to upper/lowercase" */
	173	mod = c;
	174	continue;
	175
	176	case 'E':
	177	case 'e':
	178	/* \E ends the \U or \L */
	179	mod = 0;
	180	continue;
	181	# endif /* not CRUNCH */
	182	case '&':
	183	/* "\&" means "original text" */
	184	if (*o_magic)
	185	{
	186	*dst++ = c;
	187	continue;
	188	}
6e657cf2 AM	189	#ifdef REGEX
	190	cpy = startp;
	191	end = endp;
	192	#else
15637ed4 RG	193	cpy = re->startp[0];
15637ed4 RG	194	end = re->endp[0];
6e657cf2	195	#endif
15637ed4 RG	196	break;
	197
	198	#else /* NO_MAGIC */
	199	case '&':
	200	/* "\&" means "original text" */
6e657cf2 AM	201	#ifdef REGEX
	202	cpy = startp;
	203	end = endp;
	204	#else
15637ed4 RG	205	cpy = re->startp[0];
15637ed4 RG	206	end = re->endp[0];
6e657cf2	207	#endif
15637ed4 RG	208	break;
	209	#endif /* NO_MAGIC */
	210	default:
	211	/* ordinary char preceded by backslash */
	212	*dst++ = c;
	213	continue;
	214	}
	215	}
	216	#ifndef CRUNCH
	217	# if OSK
	218	else if (c == '\l')
	219	# else
	220	else if (c == '\r')
	221	# endif
	222	{
	223	/* transliterate ^M into newline */
	224	*dst++ = '\n';
	225	continue;
	226	}
	227	#endif /* !CRUNCH */
	228	else
	229	{
	230	/* ordinary character, so just copy it */
	231	*dst++ = c;
	232	continue;
	233	}
	234
	235	/* Note: to reach this point in the code, we must have evaded
	236	* all "continue" statements. To do that, we must have hit
	237	* a metacharacter that involves copying.
	238	*/
	239
	240	/* if there is nothing to copy, loop */
	241	if (!cpy)
	242	continue;
	243
	244	/* copy over a portion of the original */
	245	while (cpy < end)
	246	{
	247	#ifndef NO_MAGIC
	248	# ifndef CRUNCH
	249	switch (mod)
	250	{
	251	case 'U':
	252	case 'u':
	253	/* convert to uppercase */
	254	dst++ = toupper(cpy++);
	255	break;
	256
	257	case 'L':
	258	case 'l':
	259	/* convert to lowercase */
	260	dst++ = tolower(cpy++);
	261	break;
	262
	263	default:
	264	/* copy without any conversion */
	265	dst++ = cpy++;
	266	}
	267
	268	/* \u and \l end automatically after the first char */
	269	if (mod && (mod == 'u' \|\| mod == 'l'))
	270	{
	271	mod = 0;
272	}
273	# else /* CRUNCH */
274	dst++ = cpy++;
275	# endif /* CRUNCH */
276	#else /* NO_MAGIC */
277	dst++ = cpy++;
278	#endif /* NO_MAGIC */
279	}
280	}
281	*dst = '\0';
99668b43	282	return 0;
15637ed4	283	}