Commit | Line | Data |
---|---|---|
a7e60862 WJ |
1 | |
2 | /******************************************** | |
3 | split.c | |
4 | copyright 1991, Michael D. Brennan | |
5 | ||
6 | This is a source file for mawk, an implementation of | |
7 | the AWK programming language. | |
8 | ||
9 | Mawk is distributed without warranty under the terms of | |
10 | the GNU General Public License, version 2, 1991. | |
11 | ********************************************/ | |
12 | ||
13 | /* $Log: split.c,v $ | |
14 | * Revision 5.1 91/12/05 07:56:31 brennan | |
15 | * 1.1 pre-release | |
16 | * | |
17 | */ | |
18 | ||
19 | /* split.c */ | |
20 | ||
21 | #define TEMPBUFF_GOES_HERE | |
22 | ||
23 | #include "mawk.h" | |
24 | #include "symtype.h" | |
25 | #include "bi_vars.h" | |
26 | #include "bi_funct.h" | |
27 | #include "memory.h" | |
28 | #include "scan.h" | |
29 | #include "regexp.h" | |
30 | #include "field.h" | |
31 | ||
32 | SPLIT_OV *split_ov_list ; | |
33 | ||
34 | static int PROTO(re_ov_split, (char *, PTR) ) ; | |
35 | static int PROTO(space_ov_split, (char *, char *) ) ; | |
36 | ||
37 | /* split string s of length slen on SPACE without changing s. | |
38 | load the pieces into STRINGS and ptrs into | |
39 | split_buff[] | |
40 | return the number of pieces */ | |
41 | ||
42 | int space_split( s , slen) | |
43 | register char *s ; | |
44 | unsigned slen ; | |
45 | { char *back = s + slen ; | |
46 | int i = 0 ; | |
47 | int len ; | |
48 | char *q ; | |
49 | STRING *sval ; | |
50 | int lcnt = MAX_SPLIT/3 ; | |
51 | ||
52 | #define EAT_SPACE() while ( scan_code[*(unsigned char*)s] ==\ | |
53 | SC_SPACE ) s++ | |
54 | #define EAT_NON_SPACE() \ | |
55 | *back = ' ' ; /* sentinel */\ | |
56 | while ( scan_code[*(unsigned char*)s] != SC_SPACE ) s++ ;\ | |
57 | *back = 0 | |
58 | ||
59 | ||
60 | while ( lcnt-- ) | |
61 | { | |
62 | EAT_SPACE() ; | |
63 | if ( *s == 0 ) goto done ; | |
64 | /* mark the front with q */ | |
65 | q = s++ ; | |
66 | EAT_NON_SPACE() ; | |
67 | sval = split_buff[i++] = new_STRING((char *) 0, len = s - q ) ; | |
68 | (void) memcpy(sval->str, q, SIZE_T(len)) ; | |
69 | ||
70 | EAT_SPACE() ; | |
71 | if ( *s == 0 ) goto done ; | |
72 | q = s++ ; | |
73 | EAT_NON_SPACE() ; | |
74 | sval = split_buff[i++] = new_STRING((char *) 0, len = s - q ) ; | |
75 | (void) memcpy(sval->str, q, SIZE_T(len)) ; | |
76 | ||
77 | EAT_SPACE() ; | |
78 | if ( *s == 0 ) goto done ; | |
79 | q = s++ ; | |
80 | EAT_NON_SPACE() ; | |
81 | sval = split_buff[i++] = new_STRING((char *) 0, len = s - q ) ; | |
82 | (void) memcpy(sval->str, q, SIZE_T(len)) ; | |
83 | ||
84 | } | |
85 | /* we've overflowed */ | |
86 | return i + space_ov_split(s, back) ; | |
87 | ||
88 | done: | |
89 | return i ; | |
90 | } | |
91 | ||
92 | static int space_ov_split(s, back) | |
93 | register char *s ; | |
94 | char *back ; | |
95 | ||
96 | { | |
97 | SPLIT_OV dummy ; | |
98 | register SPLIT_OV *tail = &dummy ; | |
99 | char *q ; | |
100 | int cnt = 0 ; | |
101 | unsigned len ; | |
102 | ||
103 | while ( 1 ) | |
104 | { | |
105 | EAT_SPACE() ; | |
106 | if ( *s == 0 ) break ; /* done */ | |
107 | q = s++ ; | |
108 | EAT_NON_SPACE() ; | |
109 | ||
110 | tail = tail->link = (SPLIT_OV*) zmalloc(sizeof(SPLIT_OV)) ; | |
111 | tail->sval = new_STRING((char *) 0 , len = s-q) ; | |
112 | (void) memcpy(tail->sval->str, q, SIZE_T(len)) ; | |
113 | cnt++ ; | |
114 | } | |
115 | ||
116 | tail->link = (SPLIT_OV*) 0 ; | |
117 | split_ov_list = dummy.link ; | |
118 | return cnt ; | |
119 | } | |
120 | ||
121 | ||
122 | char *re_pos_match(s, re, lenp) | |
123 | register char *s ; | |
124 | PTR re ; unsigned *lenp ; | |
125 | { | |
126 | while ( s = REmatch(s, re, lenp) ) | |
127 | if ( *lenp ) return s ; | |
128 | else | |
129 | if ( *s == 0 ) break ; | |
130 | else s++ ; | |
131 | ||
132 | return (char *) 0 ; | |
133 | } | |
134 | ||
135 | int re_split(s, re) | |
136 | char *s ; | |
137 | PTR re ; | |
138 | { register char *t ; | |
139 | int i = 0 ; | |
140 | unsigned mlen, len ; | |
141 | STRING *sval ; | |
142 | int lcnt = MAX_SPLIT / 3 ; | |
143 | ||
144 | while ( lcnt-- ) | |
145 | { | |
146 | if ( !(t = re_pos_match(s, re, &mlen)) ) goto done ; | |
147 | sval = split_buff[i++] = new_STRING( (char *)0, len = t-s) ; | |
148 | (void) memcpy(sval->str, s, SIZE_T(len)) ; | |
149 | s = t + mlen ; | |
150 | ||
151 | if ( !(t = re_pos_match(s, re, &mlen)) ) goto done ; | |
152 | sval = split_buff[i++] = new_STRING( (char *)0, len = t-s) ; | |
153 | (void) memcpy(sval->str, s, SIZE_T(len)) ; | |
154 | s = t + mlen ; | |
155 | ||
156 | if ( !(t = re_pos_match(s, re, &mlen)) ) goto done ; | |
157 | sval = split_buff[i++] = new_STRING( (char *)0, len = t-s) ; | |
158 | (void) memcpy(sval->str, s, SIZE_T(len)) ; | |
159 | s = t + mlen ; | |
160 | } | |
161 | /* we've overflowed */ | |
162 | return i + re_ov_split(s, re) ; | |
163 | ||
164 | done: | |
165 | split_buff[i++] = new_STRING(s) ; | |
166 | return i ; | |
167 | } | |
168 | ||
169 | /* | |
170 | we've overflowed split_buff[] , put | |
171 | the rest on the split_ov_list | |
172 | return number of pieces | |
173 | */ | |
174 | ||
175 | static int re_ov_split(s, re) | |
176 | char *s ; | |
177 | PTR re ; | |
178 | { | |
179 | SPLIT_OV dummy ; | |
180 | register SPLIT_OV *tail = &dummy ; | |
181 | int cnt = 1 ; | |
182 | char *t ; | |
183 | unsigned len, mlen ; | |
184 | ||
185 | while ( t = re_pos_match(s, re, &mlen) ) | |
186 | { | |
187 | tail = tail->link = (SPLIT_OV *) zmalloc(sizeof(SPLIT_OV)) ; | |
188 | tail->sval = new_STRING( (char *)0, len = t-s) ; | |
189 | (void) memcpy(tail->sval->str, s, SIZE_T(len)) ; | |
190 | s = t + mlen ; | |
191 | cnt++ ; | |
192 | } | |
193 | /* and one more */ | |
194 | tail = tail->link = (SPLIT_OV *) zmalloc(sizeof(SPLIT_OV)) ; | |
195 | tail->sval = new_STRING(s) ; | |
196 | tail->link = (SPLIT_OV*) 0 ; | |
197 | split_ov_list = dummy.link ; | |
198 | ||
199 | return cnt ; | |
200 | } | |
201 | ||
202 | /* split(s, X, r) | |
203 | split s into array X on r | |
204 | ||
205 | entry: sp[0] holds r | |
206 | sp[-1] pts at X | |
207 | sp[-2] holds s | |
208 | */ | |
209 | CELL *bi_split(sp) | |
210 | register CELL *sp ; | |
211 | { | |
212 | int cnt ; /* the number of pieces */ | |
213 | ||
214 | ||
215 | if ( sp->type < C_RE ) cast_for_split(sp) ; | |
216 | /* can be C_RE, C_SPACE or C_SNULL */ | |
217 | sp -= 2 ; | |
218 | if ( sp->type < C_STRING ) cast1_to_s(sp) ; | |
219 | ||
220 | if ( string(sp)->len == 0 ) /* nothing to split */ | |
221 | { free_STRING( string(sp) ) ; | |
222 | sp->type = C_DOUBLE ; sp->dval = 0.0 ; | |
223 | return sp ; | |
224 | } | |
225 | ||
226 | switch ( (sp+2)->type ) | |
227 | { | |
228 | case C_RE : | |
229 | cnt = re_split(string(sp)->str, (sp+2)->ptr) ; | |
230 | break ; | |
231 | ||
232 | case C_SPACE : | |
233 | cnt = space_split(string(sp)->str, string(sp)->len) ; | |
234 | break ; | |
235 | ||
236 | /* this case could be done by C_RE, but very slowly. | |
237 | Since it is the common way to eliminate fields, | |
238 | we'll treat the special case for speed */ | |
239 | case C_SNULL : /* split on empty string */ | |
240 | cnt = 1 ; | |
241 | split_buff[0] = (STRING *) sp->ptr ; | |
242 | string(sp)->ref_cnt++ ; | |
243 | break ; | |
244 | ||
245 | default : bozo("bad splitting cell in bi_split") ; | |
246 | } | |
247 | ||
248 | ||
249 | free_STRING( string(sp) ) ; | |
250 | sp->type = C_DOUBLE ; | |
251 | sp->dval = (double) cnt ; | |
252 | ||
253 | load_array((ARRAY)(sp+1)->ptr, cnt) ; | |
254 | ||
255 | return sp ; | |
256 | } | |
257 |