386BSD 0.1 development
[unix-history] / usr / src / usr.bin / awk / split.c
CommitLineData
a7e60862
WJ
1
2/********************************************
3split.c
4copyright 1991, Michael D. Brennan
5
6This is a source file for mawk, an implementation of
7the AWK programming language.
8
9Mawk is distributed without warranty under the terms of
10the GNU General Public License, version 2, 1991.
11********************************************/
12
13/* $Log: split.c,v $
14 * Revision 5.1 91/12/05 07:56:31 brennan
15 * 1.1 pre-release
16 *
17*/
18
19/* split.c */
20
21#define TEMPBUFF_GOES_HERE
22
23#include "mawk.h"
24#include "symtype.h"
25#include "bi_vars.h"
26#include "bi_funct.h"
27#include "memory.h"
28#include "scan.h"
29#include "regexp.h"
30#include "field.h"
31
32SPLIT_OV *split_ov_list ;
33
34static int PROTO(re_ov_split, (char *, PTR) ) ;
35static int PROTO(space_ov_split, (char *, char *) ) ;
36
37/* split string s of length slen on SPACE without changing s.
38 load the pieces into STRINGS and ptrs into
39 split_buff[]
40 return the number of pieces */
41
42int space_split( s , slen)
43 register char *s ;
44 unsigned slen ;
45{ char *back = s + slen ;
46 int i = 0 ;
47 int len ;
48 char *q ;
49 STRING *sval ;
50 int lcnt = MAX_SPLIT/3 ;
51
52#define EAT_SPACE() while ( scan_code[*(unsigned char*)s] ==\
53 SC_SPACE ) s++
54#define EAT_NON_SPACE() \
55 *back = ' ' ; /* sentinel */\
56 while ( scan_code[*(unsigned char*)s] != SC_SPACE ) s++ ;\
57 *back = 0
58
59
60 while ( lcnt-- )
61 {
62 EAT_SPACE() ;
63 if ( *s == 0 ) goto done ;
64 /* mark the front with q */
65 q = s++ ;
66 EAT_NON_SPACE() ;
67 sval = split_buff[i++] = new_STRING((char *) 0, len = s - q ) ;
68 (void) memcpy(sval->str, q, SIZE_T(len)) ;
69
70 EAT_SPACE() ;
71 if ( *s == 0 ) goto done ;
72 q = s++ ;
73 EAT_NON_SPACE() ;
74 sval = split_buff[i++] = new_STRING((char *) 0, len = s - q ) ;
75 (void) memcpy(sval->str, q, SIZE_T(len)) ;
76
77 EAT_SPACE() ;
78 if ( *s == 0 ) goto done ;
79 q = s++ ;
80 EAT_NON_SPACE() ;
81 sval = split_buff[i++] = new_STRING((char *) 0, len = s - q ) ;
82 (void) memcpy(sval->str, q, SIZE_T(len)) ;
83
84 }
85 /* we've overflowed */
86 return i + space_ov_split(s, back) ;
87
88done:
89 return i ;
90}
91
92static int space_ov_split(s, back)
93 register char *s ;
94 char *back ;
95
96{
97 SPLIT_OV dummy ;
98 register SPLIT_OV *tail = &dummy ;
99 char *q ;
100 int cnt = 0 ;
101 unsigned len ;
102
103 while ( 1 )
104 {
105 EAT_SPACE() ;
106 if ( *s == 0 ) break ; /* done */
107 q = s++ ;
108 EAT_NON_SPACE() ;
109
110 tail = tail->link = (SPLIT_OV*) zmalloc(sizeof(SPLIT_OV)) ;
111 tail->sval = new_STRING((char *) 0 , len = s-q) ;
112 (void) memcpy(tail->sval->str, q, SIZE_T(len)) ;
113 cnt++ ;
114 }
115
116 tail->link = (SPLIT_OV*) 0 ;
117 split_ov_list = dummy.link ;
118 return cnt ;
119}
120
121
122char *re_pos_match(s, re, lenp)
123 register char *s ;
124 PTR re ; unsigned *lenp ;
125{
126 while ( s = REmatch(s, re, lenp) )
127 if ( *lenp ) return s ;
128 else
129 if ( *s == 0 ) break ;
130 else s++ ;
131
132 return (char *) 0 ;
133}
134
135int re_split(s, re)
136 char *s ;
137 PTR re ;
138{ register char *t ;
139 int i = 0 ;
140 unsigned mlen, len ;
141 STRING *sval ;
142 int lcnt = MAX_SPLIT / 3 ;
143
144 while ( lcnt-- )
145 {
146 if ( !(t = re_pos_match(s, re, &mlen)) ) goto done ;
147 sval = split_buff[i++] = new_STRING( (char *)0, len = t-s) ;
148 (void) memcpy(sval->str, s, SIZE_T(len)) ;
149 s = t + mlen ;
150
151 if ( !(t = re_pos_match(s, re, &mlen)) ) goto done ;
152 sval = split_buff[i++] = new_STRING( (char *)0, len = t-s) ;
153 (void) memcpy(sval->str, s, SIZE_T(len)) ;
154 s = t + mlen ;
155
156 if ( !(t = re_pos_match(s, re, &mlen)) ) goto done ;
157 sval = split_buff[i++] = new_STRING( (char *)0, len = t-s) ;
158 (void) memcpy(sval->str, s, SIZE_T(len)) ;
159 s = t + mlen ;
160 }
161 /* we've overflowed */
162 return i + re_ov_split(s, re) ;
163
164done:
165 split_buff[i++] = new_STRING(s) ;
166 return i ;
167}
168
169/*
170 we've overflowed split_buff[] , put
171 the rest on the split_ov_list
172 return number of pieces
173*/
174
175static int re_ov_split(s, re)
176 char *s ;
177 PTR re ;
178{
179 SPLIT_OV dummy ;
180 register SPLIT_OV *tail = &dummy ;
181 int cnt = 1 ;
182 char *t ;
183 unsigned len, mlen ;
184
185 while ( t = re_pos_match(s, re, &mlen) )
186 {
187 tail = tail->link = (SPLIT_OV *) zmalloc(sizeof(SPLIT_OV)) ;
188 tail->sval = new_STRING( (char *)0, len = t-s) ;
189 (void) memcpy(tail->sval->str, s, SIZE_T(len)) ;
190 s = t + mlen ;
191 cnt++ ;
192 }
193 /* and one more */
194 tail = tail->link = (SPLIT_OV *) zmalloc(sizeof(SPLIT_OV)) ;
195 tail->sval = new_STRING(s) ;
196 tail->link = (SPLIT_OV*) 0 ;
197 split_ov_list = dummy.link ;
198
199 return cnt ;
200}
201
202/* split(s, X, r)
203 split s into array X on r
204
205 entry: sp[0] holds r
206 sp[-1] pts at X
207 sp[-2] holds s
208*/
209CELL *bi_split(sp)
210 register CELL *sp ;
211{
212 int cnt ; /* the number of pieces */
213
214
215 if ( sp->type < C_RE ) cast_for_split(sp) ;
216 /* can be C_RE, C_SPACE or C_SNULL */
217 sp -= 2 ;
218 if ( sp->type < C_STRING ) cast1_to_s(sp) ;
219
220 if ( string(sp)->len == 0 ) /* nothing to split */
221 { free_STRING( string(sp) ) ;
222 sp->type = C_DOUBLE ; sp->dval = 0.0 ;
223 return sp ;
224 }
225
226 switch ( (sp+2)->type )
227 {
228 case C_RE :
229 cnt = re_split(string(sp)->str, (sp+2)->ptr) ;
230 break ;
231
232 case C_SPACE :
233 cnt = space_split(string(sp)->str, string(sp)->len) ;
234 break ;
235
236 /* this case could be done by C_RE, but very slowly.
237 Since it is the common way to eliminate fields,
238 we'll treat the special case for speed */
239 case C_SNULL : /* split on empty string */
240 cnt = 1 ;
241 split_buff[0] = (STRING *) sp->ptr ;
242 string(sp)->ref_cnt++ ;
243 break ;
244
245 default : bozo("bad splitting cell in bi_split") ;
246 }
247
248
249 free_STRING( string(sp) ) ;
250 sp->type = C_DOUBLE ;
251 sp->dval = (double) cnt ;
252
253 load_array((ARRAY)(sp+1)->ptr, cnt) ;
254
255 return sp ;
256}
257