Commit | Line | Data |
---|---|---|
9e7b18d5 KB |
1 | /*- |
2 | * Copyright (c) 1992 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Christos Zoulas of Cornell University. | |
7 | * | |
8 | * %sccs.include.redist.c% | |
9 | */ | |
10 | ||
b6dd18ed CZ |
11 | #if !defined(lint) && !defined(SCCSID) |
12 | static char sccsid[] = "@(#)tokenizer.c 5.2 (Berkeley) %G%"; | |
13 | #endif /* not lint && not SCCSID */ | |
9e7b18d5 KB |
14 | |
15 | /* | |
16 | * tokenize.c: Bourne shell like tokenizer | |
17 | */ | |
18 | #include "sys.h" | |
19 | #include <string.h> | |
20 | #include <stdlib.h> | |
21 | #include "tokenizer.h" | |
22 | ||
23 | typedef enum { Q_none, Q_single, Q_double, Q_one, Q_doubleone } quote_t; | |
24 | ||
25 | #define IFS "\t \n" | |
26 | ||
27 | #define TOK_KEEP 1 | |
28 | #define TOK_EAT 2 | |
29 | ||
30 | #define WINCR 20 | |
31 | #define AINCR 10 | |
32 | ||
33 | #define tok_malloc(a) malloc(a) | |
34 | #define tok_free(a) free(a) | |
35 | #define tok_realloc(a, b) realloc(a, b) | |
36 | ||
37 | ||
38 | struct tokenizer { | |
39 | char *ifs; /* In field separator */ | |
40 | int argc, amax; /* Current and maximum number of args */ | |
41 | char **argv; /* Argument list */ | |
42 | char *wptr, *wmax; /* Space and limit on the word buffer */ | |
43 | char *wstart; /* Beginning of next word */ | |
44 | char *wspace; /* Space of word buffer */ | |
45 | quote_t quote; /* Quoting state */ | |
46 | int flags; /* flags; */ | |
47 | }; | |
48 | ||
49 | ||
50 | private void tok_finish __P((Tokenizer *)); | |
51 | ||
52 | ||
53 | /* tok_finish(): | |
54 | * Finish a word in the tokenizer. | |
55 | */ | |
56 | private void | |
57 | tok_finish(tok) | |
58 | Tokenizer *tok; | |
59 | { | |
60 | *tok->wptr = '\0'; | |
61 | if ((tok->flags & TOK_KEEP) || tok->wptr != tok->wstart) { | |
62 | tok->argv[tok->argc++] = tok->wstart; | |
63 | tok->argv[tok->argc] = NULL; | |
64 | tok->wstart = ++tok->wptr; | |
65 | } | |
66 | tok->flags &= ~TOK_KEEP; | |
67 | } | |
68 | ||
69 | ||
70 | /* tok_init(): | |
71 | * Initialize the tokenizer | |
72 | */ | |
73 | public Tokenizer * | |
74 | tok_init(ifs) | |
75 | const char *ifs; | |
76 | { | |
77 | Tokenizer* tok = (Tokenizer*) tok_malloc(sizeof(Tokenizer)); | |
78 | ||
79 | tok->ifs = strdup(ifs ? ifs : IFS); | |
80 | tok->argc = 0; | |
81 | tok->amax = AINCR; | |
82 | tok->argv = (char **) tok_malloc(sizeof(char *) * tok->amax); | |
83 | tok->argv[0] = NULL; | |
84 | tok->wspace = (char *) tok_malloc(WINCR); | |
85 | tok->wmax = tok->wspace + WINCR; | |
86 | tok->wstart = tok->wspace; | |
87 | tok->wptr = tok->wspace; | |
88 | tok->flags = 0; | |
89 | tok->quote = Q_none; | |
90 | ||
91 | return tok; | |
92 | } | |
93 | ||
94 | ||
95 | /* tok_reset(): | |
96 | * Reset the tokenizer | |
97 | */ | |
98 | public void | |
99 | tok_reset(tok) | |
100 | Tokenizer *tok; | |
101 | { | |
102 | tok->argc = 0; | |
103 | tok->wstart = tok->wspace; | |
104 | tok->wptr = tok->wspace; | |
105 | tok->flags = 0; | |
106 | tok->quote = Q_none; | |
107 | } | |
108 | ||
109 | ||
110 | /* tok_end(): | |
111 | * Clean up | |
112 | */ | |
113 | public void | |
114 | tok_end(tok) | |
115 | Tokenizer *tok; | |
116 | { | |
117 | tok_free((ptr_t) tok->ifs); | |
118 | tok_free((ptr_t) tok->wspace); | |
119 | tok_free((ptr_t) tok->argv); | |
120 | tok_free((ptr_t) tok); | |
121 | } | |
122 | ||
123 | ||
124 | ||
125 | /* tok_line(): | |
126 | * Bourne shell like tokenizing | |
127 | * Return: | |
128 | * -1: Internal error | |
129 | * 3: Quoted return | |
130 | * 2: Unmatched double quote | |
131 | * 1: Unmatched single quote | |
132 | * 0: Ok | |
133 | */ | |
134 | public int | |
135 | tok_line(tok, line, argc, argv) | |
136 | Tokenizer *tok; | |
137 | const char* line; | |
138 | int *argc; | |
139 | char ***argv; | |
140 | { | |
141 | const char *ptr; | |
142 | ||
143 | while (1) { | |
144 | switch (*(ptr = line++)) { | |
145 | case '\'': | |
146 | tok->flags |= TOK_KEEP; | |
147 | tok->flags &= ~TOK_EAT; | |
148 | switch (tok->quote) { | |
149 | case Q_none: | |
150 | tok->quote = Q_single; /* Enter single quote mode */ | |
151 | break; | |
152 | ||
153 | case Q_single: /* Exit single quote mode */ | |
154 | tok->quote = Q_none; | |
155 | break; | |
156 | ||
157 | case Q_one: /* Quote this ' */ | |
158 | tok->quote = Q_none; | |
159 | *tok->wptr++ = *ptr; | |
160 | break; | |
161 | ||
162 | case Q_double: /* Stay in double quote mode */ | |
163 | *tok->wptr++ = *ptr; | |
164 | break; | |
165 | ||
166 | case Q_doubleone: /* Quote this ' */ | |
167 | tok->quote = Q_double; | |
168 | *tok->wptr++ = *ptr; | |
169 | break; | |
170 | ||
171 | default: | |
172 | return(-1); | |
173 | } | |
174 | break; | |
175 | ||
176 | case '"': | |
177 | tok->flags &= ~TOK_EAT; | |
178 | tok->flags |= TOK_KEEP; | |
179 | switch (tok->quote) { | |
180 | case Q_none: /* Enter double quote mode */ | |
181 | tok->quote = Q_double; | |
182 | break; | |
183 | ||
184 | case Q_double: | |
185 | tok->quote = Q_none; /* Exit double quote mode */ | |
186 | break; | |
187 | ||
188 | case Q_one: /* Quote this " */ | |
189 | tok->quote = Q_none; | |
190 | *tok->wptr++ = *ptr; | |
191 | break; | |
192 | ||
193 | case Q_single: /* Stay in single quote mode */ | |
194 | *tok->wptr++ = *ptr; | |
195 | break; | |
196 | ||
197 | case Q_doubleone: /* Quote this " */ | |
198 | tok->quote = Q_double; | |
199 | *tok->wptr++ = *ptr; | |
200 | break; | |
201 | ||
202 | default: | |
203 | return(-1); | |
204 | } | |
205 | break; | |
206 | ||
207 | case '\\': | |
208 | tok->flags |= TOK_KEEP; | |
209 | tok->flags &= ~TOK_EAT; | |
210 | switch (tok->quote) { | |
211 | case Q_none: /* Quote next character */ | |
212 | tok->quote = Q_one; | |
213 | break; | |
214 | ||
215 | case Q_double: | |
216 | tok->quote = Q_doubleone;/* Quote next character */ | |
217 | break; | |
218 | ||
219 | case Q_one: | |
220 | *tok->wptr++ = *ptr; | |
221 | tok->quote = Q_none; /* Quote this, restore state */ | |
222 | break; | |
223 | ||
224 | case Q_single: /* Stay in single quote mode */ | |
225 | *tok->wptr++ = *ptr; | |
226 | break; | |
227 | ||
228 | case Q_doubleone: /* Quote this \ */ | |
229 | tok->quote = Q_double; | |
230 | *tok->wptr++ = *ptr; | |
231 | break; | |
232 | ||
233 | default: | |
234 | return(-1); | |
235 | } | |
236 | break; | |
237 | ||
238 | case '\n': | |
239 | tok->flags &= ~TOK_EAT; | |
240 | switch (tok->quote) { | |
241 | case Q_none: | |
242 | tok_finish(tok); | |
243 | *argv = tok->argv; | |
244 | *argc = tok->argc; | |
245 | return(0); | |
246 | ||
247 | case Q_single: | |
248 | case Q_double: | |
249 | *tok->wptr++ = *ptr; /* Add the return */ | |
250 | break; | |
251 | ||
252 | case Q_doubleone: | |
253 | tok->flags |= TOK_EAT; | |
254 | tok->quote = Q_double; /* Back to double, eat the '\n' */ | |
255 | break; | |
256 | ||
257 | case Q_one: | |
258 | tok->flags |= TOK_EAT; | |
259 | tok->quote = Q_none; /* No quote, more eat the '\n' */ | |
260 | break; | |
261 | ||
262 | default: | |
263 | return(0); | |
264 | } | |
265 | break; | |
266 | ||
267 | case '\0': | |
268 | switch (tok->quote) { | |
269 | case Q_none: | |
270 | /* Finish word and return */ | |
271 | if (tok->flags & TOK_EAT) { | |
272 | tok->flags &= ~TOK_EAT; | |
273 | return 3; | |
274 | } | |
275 | tok_finish(tok); | |
276 | *argv = tok->argv; | |
277 | *argc = tok->argc; | |
278 | return(0); | |
279 | ||
280 | case Q_single: | |
281 | return(1); | |
282 | ||
283 | case Q_double: | |
284 | return(2); | |
285 | ||
286 | case Q_doubleone: | |
287 | tok->quote = Q_double; | |
288 | *tok->wptr++ = *ptr; | |
289 | break; | |
290 | ||
291 | case Q_one: | |
292 | tok->quote = Q_none; | |
293 | *tok->wptr++ = *ptr; | |
294 | break; | |
295 | ||
296 | default: | |
297 | return(-1); | |
298 | } | |
299 | break; | |
300 | ||
301 | default: | |
302 | tok->flags &= ~TOK_EAT; | |
303 | switch (tok->quote) { | |
304 | case Q_none: | |
305 | if (strchr(tok->ifs, *ptr) != NULL) | |
306 | tok_finish(tok); | |
307 | else | |
308 | *tok->wptr++ = *ptr; | |
309 | break; | |
310 | ||
311 | case Q_single: | |
312 | case Q_double: | |
313 | *tok->wptr++ = *ptr; | |
314 | break; | |
315 | ||
316 | ||
317 | case Q_doubleone: | |
318 | *tok->wptr++ = '\\'; | |
319 | tok->quote = Q_double; | |
320 | *tok->wptr++ = *ptr; | |
321 | break; | |
322 | ||
323 | case Q_one: | |
324 | tok->quote = Q_none; | |
325 | *tok->wptr++ = *ptr; | |
326 | break; | |
327 | ||
328 | default: | |
329 | return(-1); | |
330 | ||
331 | } | |
332 | break; | |
333 | } | |
334 | ||
335 | if (tok->wptr >= tok->wmax - 4) { | |
336 | size_t size = tok->wmax - tok->wspace + WINCR; | |
337 | char *s = (char *) tok_realloc(tok->wspace, size); | |
338 | /*SUPPRESS 22*/ | |
339 | int offs = s - tok->wspace; | |
340 | ||
341 | if (offs != 0) { | |
342 | int i; | |
343 | for (i = 0; i < tok->argc; i++) | |
344 | tok->argv[i] = tok->argv[i] + offs; | |
345 | tok->wptr = tok->wptr + offs; | |
346 | tok->wstart = tok->wstart + offs; | |
347 | tok->wmax = s + size; | |
348 | tok->wspace = s; | |
349 | } | |
350 | } | |
351 | ||
352 | if (tok->argc >= tok->amax - 4) { | |
353 | tok->amax += AINCR; | |
354 | tok->argv = (char **) tok_realloc(tok->argv, | |
355 | tok->amax * sizeof(char*)); | |
356 | } | |
357 | ||
358 | } | |
359 | } |