8-bit characters are not ignored
[unix-history] / usr / src / contrib / ed / re.c
CommitLineData
95136f9d
KB
1/*-
2 * Copyright (c) 1992 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rodney Ruddock of the University of Guelph.
7 *
8 * %sccs.include.redist.c%
9 */
10
11#ifndef lint
e692f66f 12static char sccsid[] = "@(#)re.c 5.3 (Berkeley) %G%";
95136f9d
KB
13#endif /* not lint */
14
ecbf4ad0
KB
15#include <sys/types.h>
16
ecbf4ad0
KB
17#include <regex.h>
18#include <setjmp.h>
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
95136f9d 22
e692f66f
KB
23#ifdef DBI
24#include <db.h>
25#endif
26
ecbf4ad0
KB
27#include "ed.h"
28#include "extern.h"
95136f9d
KB
29
30/*
31 * This finds the n-th occurrence of an RE in a line. If '^' was at the start
ecbf4ad0
KB
32 * of the RE then look once (in case n=1). There is no standard RE interface
33 * to do this. Returns 0 for success. NOTE: the #ifdef REG_STARTEND is if
34 * the regex package has the BSD extensions to it.
95136f9d 35 */
95136f9d
KB
36int
37#ifdef REG_STARTEND
38regexec_n(reprecomp, strg, num_subexp, reprematch, flags, n, len, pass)
39#else
40regexec_n(reprecomp, strg, num_subexp, reprematch, flags, n, offset, pass)
41#endif
ecbf4ad0
KB
42 regex_t *reprecomp;
43 char *strg;
44 size_t num_subexp;
45 regmatch_t reprematch[];
46 int flags, n;
95136f9d 47#ifdef REG_STARTEND
ecbf4ad0 48 size_t len;
95136f9d 49#else
ecbf4ad0 50 size_t *offset;
95136f9d 51#endif
ecbf4ad0 52 int pass; /* if pass == 0 .rm_so user set, else set default */
95136f9d 53{
ecbf4ad0 54 int l_cnt;
95136f9d 55#ifndef REG_STARTEND
ecbf4ad0 56 char *l_offset = strg;
95136f9d
KB
57#endif
58
ecbf4ad0
KB
59 if (n <= 0)
60 return (REG_NOMATCH);
95136f9d 61#ifdef REG_STARTEND
ecbf4ad0
KB
62 flags = (flags | REG_STARTEND);
63 if (pass)
64 reprematch[0].rm_so = 0;
65 reprematch[0].rm_eo = len;
95136f9d 66#else
ecbf4ad0 67 strg = &strg[offset];
95136f9d 68#endif
ecbf4ad0
KB
69 for (l_cnt = 0;;) {
70 if (regexec(reprecomp,
71 strg, num_subexp, reprematch, flags) == 0)
72 l_cnt++;
73 else
74 return (REG_NOMATCH);
75 if (l_cnt >= n)
76 break;
95136f9d 77#ifdef REG_STARTEND
ecbf4ad0
KB
78 reprematch[0].rm_so = reprematch[0].rm_eo;
79 reprematch[0].rm_eo = len;
95136f9d 80#else
ecbf4ad0 81 strg = &strg[reprematch[0].rm_eo];
95136f9d 82#endif
ecbf4ad0
KB
83 /* if a "^" started the current RE we only loop once */
84 if (RE_sol)
85 return (REG_NOMATCH);
86 }
95136f9d 87#ifndef REG_STARTEND
ecbf4ad0 88 *offset = (size_t) (strg - l_offset);
95136f9d 89#endif
ecbf4ad0
KB
90 return (0); /* success */
91}
95136f9d
KB
92
93/*
94 * Replace in the line specified at the found locations with the
95 * specified replacement. There is no standard RE interface to do
96 * this.
97 */
ecbf4ad0 98char *
95136f9d 99#ifdef REG_STARTEND
ecbf4ad0 100re_replace(line, num_subexp, repmatch, replacer)
95136f9d 101#else
ecbf4ad0 102re_replace(line, num_subexp, repmatch, replacer, offset)
95136f9d 103#endif
ecbf4ad0
KB
104 char *line;
105 size_t num_subexp;
106 regmatch_t repmatch[];
107 char *replacer;
95136f9d 108#ifndef REG_STARTEND
ecbf4ad0 109 size_t offset;
95136f9d 110#endif
95136f9d 111{
ecbf4ad0
KB
112 static char *l_prev_r = NULL;
113 static int l_prev_r_flag = 0;
114 regoff_t l_len_before, l_len_whole, l_slen[RE_SEC];
115 int l_cnt, l_len_new = 0, l_new_rm_eo = 0;
116 char *l_string, *l_head;
95136f9d 117
ecbf4ad0
KB
118 if (l_prev_r_flag == 0) {
119 l_prev_r_flag = 1;
120 l_prev_r = NULL;
121 }
122 l_head = replacer;
123 /* Length of what stays the same before. */
124 l_len_before = (repmatch[0].rm_so);
125 l_len_whole = strlen(line);
126 if (num_subexp > RE_SEC - 1)
127 num_subexp = RE_SEC - 1;
128 for (l_cnt = 0; l_cnt <= num_subexp; l_cnt++)
129 l_slen[l_cnt] =
130 (repmatch[l_cnt].rm_eo) - (repmatch[l_cnt].rm_so);
95136f9d 131
ecbf4ad0
KB
132 /*
133 * l_slen[0] == len of what is to be replaced.
134 * l_slen[1-9] == len of each backref.
135 */
136 if ((*replacer == '%') && (replacer[1] == 1)) {
137 l_string = calloc(l_len_whole - l_slen[0] +
138 (strlen(l_prev_r)) + 2, sizeof(char));
139 if (l_string == NULL) {
140 /* *errnum = -1; */
141 strcpy(help_msg, "out of memory error");
142 return (NULL);
143 }
95136f9d 144#ifdef REG_STARTEND
ecbf4ad0 145 bcopy(line, l_string, (int) l_len_before);
95136f9d 146#else
ecbf4ad0 147 bcopy(line, l_string, (int) l_len_before + offset);
95136f9d
KB
148#endif
149#ifdef REG_STARTEND
ecbf4ad0 150 l_string[l_len_before] = '\0';
95136f9d 151#else
ecbf4ad0 152 l_string[l_len_before + offset] = '\0';
95136f9d 153#endif
ecbf4ad0 154 strcat(l_string, l_prev_r);
95136f9d 155#ifdef REG_STARTEND
ecbf4ad0 156 strcat(l_string, &line[repmatch[0].rm_eo]);
95136f9d 157#else
ecbf4ad0 158 strcat(l_string, &line[repmatch[0].rm_eo + offset]);
95136f9d 159#endif
ecbf4ad0
KB
160 return (l_string);
161 }
95136f9d 162
ecbf4ad0
KB
163 /* Figure out length of new line first. */
164 while (*replacer != '\0') {
165 /* Add in the length of the RE match. */
166 if (*replacer == '&')
167 l_len_new = l_len_new + l_slen[0];
168 /* Add in the length of a backref. */
169 else if (*replacer == '\\') {
170 replacer++;
171 if ((*replacer > '0') &&
172 (*replacer < ('9' + 1)) &&
173 (repmatch[*replacer - '0'].rm_so > -1))
174 /* -1 - -1 = 0 */
175 l_len_new = l_len_new + l_slen[*replacer - '0'];
176 else
177 l_len_new++;
178 } else
179 l_len_new++;
180 replacer++;
181 }
95136f9d 182
ecbf4ad0
KB
183 /* Create the line of an appropriate length. */
184 l_string =
185 calloc(l_len_whole - l_slen[0] + l_len_new + 2, sizeof(char));
186 if (l_string == NULL) {
187 strcpy(help_msg, "out of memory error");
188 return (NULL);
189 }
190 if (l_prev_r != NULL)
191 free(l_prev_r);
192 l_prev_r = calloc(l_len_new + 2, sizeof(char));
193 if (l_prev_r == NULL) {
194 strcpy(help_msg, "out of memory error");
195 return (NULL);
196 }
197 /* Copy over what doesn't change before the chars to be replaced. */
95136f9d 198#ifdef REG_STARTEND
ecbf4ad0 199 bcopy(line, l_string, (int) l_len_before);
95136f9d 200#else
ecbf4ad0 201 bcopy(line, l_string, l_len_before + offset);
95136f9d
KB
202#endif
203#ifdef REG_STARTEND
ecbf4ad0 204 l_string[l_len_before] = '\0';
95136f9d 205#else
ecbf4ad0 206 l_string[l_len_before + offset] = '\0';
95136f9d 207#endif
ecbf4ad0 208 l_prev_r[0] = '\0';
95136f9d 209
ecbf4ad0
KB
210 /* Make the replacement. */
211 replacer = l_head;
212 while (*replacer != '\0') {
213 /* Put what matched the RE into the replacement. */
214 if (*replacer == '&') {
95136f9d 215#ifdef REG_STARTEND
ecbf4ad0
KB
216 strncat(l_string,
217 &line[repmatch[0].rm_so], (int)l_slen[0]);
218 strncat(l_prev_r,
219 &line[repmatch[0].rm_so], (int) l_slen[0]);
95136f9d 220#else
ecbf4ad0
KB
221 strncat(l_string,
222 &line[repmatch[0].rm_so + offset], (int) l_slen[0]);
223 strncat(l_prev_r,
224 &line[repmatch[0].rm_so + offset], (int) l_slen[0]);
95136f9d 225#endif
ecbf4ad0
KB
226 } else if (*replacer == '\\') {
227 /* Likely a backref to be included. */
228 replacer++;
229 if ((*replacer > '0') && (*replacer < ('9' + 1)) &&
230 (repmatch[*replacer - '0'].rm_so > -1)) {
95136f9d 231#ifdef REG_STARTEND
ecbf4ad0
KB
232 strncat(l_string,
233 &line[repmatch[*replacer - '0'].rm_so],
234 (int) l_slen[*replacer - '0']);
235 strncat(l_prev_r,
236 &line[repmatch[*replacer - '0'].rm_so],
237 (int) l_slen[*replacer - '0']);
95136f9d 238#else
ecbf4ad0
KB
239 strncat(l_string,
240 &line[repmatch[*replacer - '0'].rm_so +
241 offset], (int) l_slen[*replacer - '0']);
242 strncat(l_prev_r,
243 &line[repmatch[*replacer - '0'].rm_so +
244 offset], (int) l_slen[*replacer - '0']);
95136f9d 245#endif
ecbf4ad0
KB
246 }
247 /* Put the replacement in. */
248 else {
249 strncat(l_string, replacer, 1);
250 strncat(l_prev_r, replacer, 1);
251 }
252 }
253 /* Put the replacement in. */
254 else {
255 strncat(l_string, replacer, 1);
256 strncat(l_prev_r, replacer, 1);
257 }
258 replacer++;
259 }
95136f9d 260
ecbf4ad0 261 l_new_rm_eo = strlen(l_string);
95136f9d 262
ecbf4ad0 263 /* Copy over what was after the chars to be replaced to the new line. */
95136f9d 264#ifdef REG_STARTEND
ecbf4ad0 265 strcat(l_string, &line[repmatch[0].rm_eo]);
95136f9d 266#else
ecbf4ad0 267 strcat(l_string, &line[repmatch[0].rm_eo + offset]);
95136f9d
KB
268#endif
269
ecbf4ad0 270 repmatch[0].rm_eo = l_new_rm_eo; /* Update rm_eo. */
95136f9d 271#ifndef REG_STARTEND
ecbf4ad0 272 offset += l_new_rm_eo; /* Update offset. */
95136f9d 273#endif
ecbf4ad0
KB
274 return (l_string); /* Return the new line. */
275}