BSD 4_4_Lite2 development
[unix-history] / usr / src / contrib / gawk-2.15.2 / field.c
CommitLineData
e60563be
C
1/*
2 * field.c - routines for dealing with fields and record parsing
3 */
4
5/*
6 * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
7 *
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Progamming Language.
10 *
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with GAWK; see the file COPYING. If not, write to
23 * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 */
25
26#include "awk.h"
27
28static int (*parse_field) P((int, char **, int, NODE *,
29 Regexp *, void (*)(), NODE *));
30static void rebuild_record P((void));
31static int re_parse_field P((int, char **, int, NODE *,
32 Regexp *, void (*)(), NODE *));
33static int def_parse_field P((int, char **, int, NODE *,
34 Regexp *, void (*)(), NODE *));
35static int sc_parse_field P((int, char **, int, NODE *,
36 Regexp *, void (*)(), NODE *));
37static int fw_parse_field P((int, char **, int, NODE *,
38 Regexp *, void (*)(), NODE *));
39static void set_element P((int, char *, int, NODE *));
40static void grow_fields_arr P((int num));
41static void set_field P((int num, char *str, int len, NODE *dummy));
42
43
44static Regexp *FS_regexp = NULL;
45static char *parse_extent; /* marks where to restart parse of record */
46static int parse_high_water=0; /* field number that we have parsed so far */
47static int nf_high_water = 0; /* size of fields_arr */
48static int resave_fs;
49static NODE *save_FS; /* save current value of FS when line is read,
50 * to be used in deferred parsing
51 */
52
53NODE **fields_arr; /* array of pointers to the field nodes */
54int field0_valid; /* $(>0) has not been changed yet */
55int default_FS;
56static NODE **nodes; /* permanent repository of field nodes */
57static int *FIELDWIDTHS = NULL;
58
59void
60init_fields()
61{
62 NODE *n;
63
64 emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields");
65 emalloc(nodes, NODE **, sizeof(NODE *), "init_fields");
66 getnode(n);
67 *n = *Nnull_string;
68 fields_arr[0] = nodes[0] = n;
69 parse_extent = fields_arr[0]->stptr;
70 save_FS = dupnode(FS_node->var_value);
71 field0_valid = 1;
72}
73
74
75static void
76grow_fields_arr(num)
77int num;
78{
79 register int t;
80 register NODE *n;
81
82 erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field");
83 erealloc(nodes, NODE **, (num+1) * sizeof(NODE *), "set_field");
84 for (t = nf_high_water+1; t <= num; t++) {
85 getnode(n);
86 *n = *Nnull_string;
87 fields_arr[t] = nodes[t] = n;
88 }
89 nf_high_water = num;
90}
91
92/*ARGSUSED*/
93static void
94set_field(num, str, len, dummy)
95int num;
96char *str;
97int len;
98NODE *dummy; /* not used -- just to make interface same as set_element */
99{
100 register NODE *n;
101
102 if (num > nf_high_water)
103 grow_fields_arr(num);
104 n = nodes[num];
105 n->stptr = str;
106 n->stlen = len;
107 n->flags = (PERM|STR|STRING|MAYBE_NUM);
108 fields_arr[num] = n;
109}
110
111/* Someone assigned a value to $(something). Fix up $0 to be right */
112static void
113rebuild_record()
114{
115 register int tlen;
116 register NODE *tmp;
117 NODE *ofs;
118 char *ops;
119 register char *cops;
120 register NODE **ptr;
121 register int ofslen;
122
123 tlen = 0;
124 ofs = force_string(OFS_node->var_value);
125 ofslen = ofs->stlen;
126 ptr = &fields_arr[NF];
127 while (ptr > &fields_arr[0]) {
128 tmp = force_string(*ptr);
129 tlen += tmp->stlen;
130 ptr--;
131 }
132 tlen += (NF - 1) * ofslen;
133 if (tlen < 0)
134 tlen = 0;
135 emalloc(ops, char *, tlen + 2, "fix_fields");
136 cops = ops;
137 ops[0] = '\0';
138 for (ptr = &fields_arr[1]; ptr <= &fields_arr[NF]; ptr++) {
139 tmp = *ptr;
140 if (tmp->stlen == 1)
141 *cops++ = tmp->stptr[0];
142 else if (tmp->stlen != 0) {
143 memcpy(cops, tmp->stptr, tmp->stlen);
144 cops += tmp->stlen;
145 }
146 if (ptr != &fields_arr[NF]) {
147 if (ofslen == 1)
148 *cops++ = ofs->stptr[0];
149 else if (ofslen != 0) {
150 memcpy(cops, ofs->stptr, ofslen);
151 cops += ofslen;
152 }
153 }
154 }
155 tmp = make_str_node(ops, tlen, ALREADY_MALLOCED);
156 unref(fields_arr[0]);
157 fields_arr[0] = tmp;
158 field0_valid = 1;
159}
160
161/*
162 * setup $0, but defer parsing rest of line until reference is made to $(>0)
163 * or to NF. At that point, parse only as much as necessary.
164 */
165void
166set_record(buf, cnt, freeold)
167char *buf;
168int cnt;
169int freeold;
170{
171 register int i;
172
173 NF = -1;
174 for (i = 1; i <= parse_high_water; i++) {
175 unref(fields_arr[i]);
176 }
177 parse_high_water = 0;
178 if (freeold) {
179 unref(fields_arr[0]);
180 if (resave_fs) {
181 resave_fs = 0;
182 unref(save_FS);
183 save_FS = dupnode(FS_node->var_value);
184 }
185 nodes[0]->stptr = buf;
186 nodes[0]->stlen = cnt;
187 nodes[0]->stref = 1;
188 nodes[0]->flags = (STRING|STR|PERM|MAYBE_NUM);
189 fields_arr[0] = nodes[0];
190 }
191 fields_arr[0]->flags |= MAYBE_NUM;
192 field0_valid = 1;
193}
194
195void
196reset_record()
197{
198 (void) force_string(fields_arr[0]);
199 set_record(fields_arr[0]->stptr, fields_arr[0]->stlen, 0);
200}
201
202void
203set_NF()
204{
205 register int i;
206
207 NF = (int) force_number(NF_node->var_value);
208 if (NF > nf_high_water)
209 grow_fields_arr(NF);
210 for (i = parse_high_water + 1; i <= NF; i++) {
211 unref(fields_arr[i]);
212 fields_arr[i] = Nnull_string;
213 }
214 field0_valid = 0;
215}
216
217/*
218 * this is called both from get_field() and from do_split()
219 * via (*parse_field)(). This variation is for when FS is a regular
220 * expression -- either user-defined or because RS=="" and FS==" "
221 */
222static int
223re_parse_field(up_to, buf, len, fs, rp, set, n)
224int up_to; /* parse only up to this field number */
225char **buf; /* on input: string to parse; on output: point to start next */
226int len;
227NODE *fs;
228Regexp *rp;
229void (*set) (); /* routine to set the value of the parsed field */
230NODE *n;
231{
232 register char *scan = *buf;
233 register int nf = parse_high_water;
234 register char *field;
235 register char *end = scan + len;
236
237 if (up_to == HUGE)
238 nf = 0;
239 if (len == 0)
240 return nf;
241
242 if (*RS == 0 && default_FS)
243 while (scan < end && isspace(*scan))
244 scan++;
245 field = scan;
246 while (scan < end
247 && research(rp, scan, 0, (int)(end - scan), 1) != -1
248 && nf < up_to) {
249 if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */
250 scan++;
251 if (scan == end) {
252 (*set)(++nf, field, scan - field, n);
253 up_to = nf;
254 break;
255 }
256 continue;
257 }
258 (*set)(++nf, field, scan + RESTART(rp, scan) - field, n);
259 scan += REEND(rp, scan);
260 field = scan;
261 if (scan == end) /* FS at end of record */
262 (*set)(++nf, field, 0, n);
263 }
264 if (nf != up_to && scan < end) {
265 (*set)(++nf, scan, (int)(end - scan), n);
266 scan = end;
267 }
268 *buf = scan;
269 return (nf);
270}
271
272/*
273 * this is called both from get_field() and from do_split()
274 * via (*parse_field)(). This variation is for when FS is a single space
275 * character.
276 */
277static int
278def_parse_field(up_to, buf, len, fs, rp, set, n)
279int up_to; /* parse only up to this field number */
280char **buf; /* on input: string to parse; on output: point to start next */
281int len;
282NODE *fs;
283Regexp *rp;
284void (*set) (); /* routine to set the value of the parsed field */
285NODE *n;
286{
287 register char *scan = *buf;
288 register int nf = parse_high_water;
289 register char *field;
290 register char *end = scan + len;
291 char sav;
292
293 if (up_to == HUGE)
294 nf = 0;
295 if (len == 0)
296 return nf;
297
298 /* before doing anything save the char at *end */
299 sav = *end;
300 /* because it will be destroyed now: */
301
302 *end = ' '; /* sentinel character */
303 for (; nf < up_to; scan++) {
304 /*
305 * special case: fs is single space, strip leading whitespace
306 */
307 while (scan < end && (*scan == ' ' || *scan == '\t'))
308 scan++;
309 if (scan >= end)
310 break;
311 field = scan;
312 while (*scan != ' ' && *scan != '\t')
313 scan++;
314 (*set)(++nf, field, (int)(scan - field), n);
315 if (scan == end)
316 break;
317 }
318
319 /* everything done, restore original char at *end */
320 *end = sav;
321
322 *buf = scan;
323 return nf;
324}
325
326/*
327 * this is called both from get_field() and from do_split()
328 * via (*parse_field)(). This variation is for when FS is a single character
329 * other than space.
330 */
331static int
332sc_parse_field(up_to, buf, len, fs, rp, set, n)
333int up_to; /* parse only up to this field number */
334char **buf; /* on input: string to parse; on output: point to start next */
335int len;
336NODE *fs;
337Regexp *rp;
338void (*set) (); /* routine to set the value of the parsed field */
339NODE *n;
340{
341 register char *scan = *buf;
342 register char fschar;
343 register int nf = parse_high_water;
344 register char *field;
345 register char *end = scan + len;
346 char sav;
347
348 if (up_to == HUGE)
349 nf = 0;
350 if (len == 0)
351 return nf;
352
353 if (*RS == 0 && fs->stlen == 0)
354 fschar = '\n';
355 else
356 fschar = fs->stptr[0];
357
358 /* before doing anything save the char at *end */
359 sav = *end;
360 /* because it will be destroyed now: */
361 *end = fschar; /* sentinel character */
362
363 for (; nf < up_to; scan++) {
364 field = scan;
365 while (*scan++ != fschar)
366 ;
367 scan--;
368 (*set)(++nf, field, (int)(scan - field), n);
369 if (scan == end)
370 break;
371 }
372
373 /* everything done, restore original char at *end */
374 *end = sav;
375
376 *buf = scan;
377 return nf;
378}
379
380/*
381 * this is called both from get_field() and from do_split()
382 * via (*parse_field)(). This variation is for fields are fixed widths.
383 */
384static int
385fw_parse_field(up_to, buf, len, fs, rp, set, n)
386int up_to; /* parse only up to this field number */
387char **buf; /* on input: string to parse; on output: point to start next */
388int len;
389NODE *fs;
390Regexp *rp;
391void (*set) (); /* routine to set the value of the parsed field */
392NODE *n;
393{
394 register char *scan = *buf;
395 register int nf = parse_high_water;
396 register char *end = scan + len;
397
398 if (up_to == HUGE)
399 nf = 0;
400 if (len == 0)
401 return nf;
402 for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) {
403 if (len > end - scan)
404 len = end - scan;
405 (*set)(++nf, scan, len, n);
406 scan += len;
407 }
408 if (len == -1)
409 *buf = end;
410 else
411 *buf = scan;
412 return nf;
413}
414
415NODE **
416get_field(requested, assign)
417register int requested;
418Func_ptr *assign; /* this field is on the LHS of an assign */
419{
420 /*
421 * if requesting whole line but some other field has been altered,
422 * then the whole line must be rebuilt
423 */
424 if (requested == 0) {
425 if (!field0_valid) {
426 /* first, parse remainder of input record */
427 if (NF == -1) {
428 NF = (*parse_field)(HUGE-1, &parse_extent,
429 fields_arr[0]->stlen -
430 (parse_extent - fields_arr[0]->stptr),
431 save_FS, FS_regexp, set_field,
432 (NODE *)NULL);
433 parse_high_water = NF;
434 }
435 rebuild_record();
436 }
437 if (assign)
438 *assign = reset_record;
439 return &fields_arr[0];
440 }
441
442 /* assert(requested > 0); */
443
444 if (assign)
445 field0_valid = 0; /* $0 needs reconstruction */
446
447 if (requested <= parse_high_water) /* already parsed this field */
448 return &fields_arr[requested];
449
450 if (NF == -1) { /* have not yet parsed to end of record */
451 /*
452 * parse up to requested fields, calling set_field() for each,
453 * saving in parse_extent the point where the parse left off
454 */
455 if (parse_high_water == 0) /* starting at the beginning */
456 parse_extent = fields_arr[0]->stptr;
457 parse_high_water = (*parse_field)(requested, &parse_extent,
458 fields_arr[0]->stlen - (parse_extent-fields_arr[0]->stptr),
459 save_FS, FS_regexp, set_field, (NODE *)NULL);
460
461 /*
462 * if we reached the end of the record, set NF to the number of
463 * fields so far. Note that requested might actually refer to
464 * a field that is beyond the end of the record, but we won't
465 * set NF to that value at this point, since this is only a
466 * reference to the field and NF only gets set if the field
467 * is assigned to -- this case is handled below
468 */
469 if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen)
470 NF = parse_high_water;
471 if (requested == HUGE-1) /* HUGE-1 means set NF */
472 requested = parse_high_water;
473 }
474 if (parse_high_water < requested) { /* requested beyond end of record */
475 if (assign) { /* expand record */
476 register int i;
477
478 if (requested > nf_high_water)
479 grow_fields_arr(requested);
480
481 /* fill in fields that don't exist */
482 for (i = parse_high_water + 1; i <= requested; i++)
483 fields_arr[i] = Nnull_string;
484
485 NF = requested;
486 parse_high_water = requested;
487 } else
488 return &Nnull_string;
489 }
490
491 return &fields_arr[requested];
492}
493
494static void
495set_element(num, s, len, n)
496int num;
497char *s;
498int len;
499NODE *n;
500{
501 register NODE *it;
502
503 it = make_string(s, len);
504 it->flags |= MAYBE_NUM;
505 *assoc_lookup(n, tmp_number((AWKNUM) (num))) = it;
506}
507
508NODE *
509do_split(tree)
510NODE *tree;
511{
512 NODE *t1, *t2, *t3, *tmp;
513 NODE *fs;
514 char *s;
515 int (*parseit)P((int, char **, int, NODE *,
516 Regexp *, void (*)(), NODE *));
517 Regexp *rp = NULL;
518
519 t1 = tree_eval(tree->lnode);
520 t2 = tree->rnode->lnode;
521 t3 = tree->rnode->rnode->lnode;
522
523 (void) force_string(t1);
524
525 if (t2->type == Node_param_list)
526 t2 = stack_ptr[t2->param_cnt];
527 if (t2->type != Node_var && t2->type != Node_var_array)
528 fatal("second argument of split is not a variable");
529 assoc_clear(t2);
530
531 if (t3->re_flags & FS_DFLT) {
532 parseit = parse_field;
533 fs = force_string(FS_node->var_value);
534 rp = FS_regexp;
535 } else {
536 tmp = force_string(tree_eval(t3->re_exp));
537 if (tmp->stlen == 1) {
538 if (tmp->stptr[0] == ' ')
539 parseit = def_parse_field;
540 else
541 parseit = sc_parse_field;
542 } else {
543 parseit = re_parse_field;
544 rp = re_update(t3);
545 }
546 fs = tmp;
547 }
548
549 s = t1->stptr;
550 tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, (int)t1->stlen,
551 fs, rp, set_element, t2));
552 free_temp(t1);
553 free_temp(t3);
554 return tmp;
555}
556
557void
558set_FS()
559{
560 NODE *tmp = NULL;
561 char buf[10];
562 NODE *fs;
563
564 buf[0] = '\0';
565 default_FS = 0;
566 if (FS_regexp) {
567 refree(FS_regexp);
568 FS_regexp = NULL;
569 }
570 fs = force_string(FS_node->var_value);
571 if (fs->stlen > 1)
572 parse_field = re_parse_field;
573 else if (*RS == 0) {
574 parse_field = sc_parse_field;
575 if (fs->stlen == 1) {
576 if (fs->stptr[0] == ' ') {
577 default_FS = 1;
578 strcpy(buf, "[ \t\n]+");
579 } else if (fs->stptr[0] != '\n')
580 sprintf(buf, "[%c\n]", fs->stptr[0]);
581 }
582 } else {
583 parse_field = def_parse_field;
584 if (fs->stptr[0] == ' ' && fs->stlen == 1)
585 default_FS = 1;
586 else if (fs->stptr[0] != ' ' && fs->stlen == 1) {
587 if (IGNORECASE == 0)
588 parse_field = sc_parse_field;
589 else
590 sprintf(buf, "[%c]", fs->stptr[0]);
591 }
592 }
593 if (buf[0]) {
594 FS_regexp = make_regexp(buf, strlen(buf), IGNORECASE, 1);
595 parse_field = re_parse_field;
596 } else if (parse_field == re_parse_field) {
597 FS_regexp = make_regexp(fs->stptr, fs->stlen, IGNORECASE, 1);
598 } else
599 FS_regexp = NULL;
600 resave_fs = 1;
601}
602
603void
604set_RS()
605{
606 (void) force_string(RS_node->var_value);
607 RS = RS_node->var_value->stptr;
608 set_FS();
609}
610
611void
612set_FIELDWIDTHS()
613{
614 register char *scan;
615 char *end;
616 register int i;
617 static int fw_alloc = 1;
618 static int warned = 0;
619 extern double strtod();
620
621 if (do_lint && ! warned) {
622 warned = 1;
623 warning("use of FIELDWIDTHS is a gawk extension");
624 }
625 if (do_unix) /* quick and dirty, does the trick */
626 return;
627
628 parse_field = fw_parse_field;
629 scan = force_string(FIELDWIDTHS_node->var_value)->stptr;
630 end = scan + 1;
631 if (FIELDWIDTHS == NULL)
632 emalloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
633 FIELDWIDTHS[0] = 0;
634 for (i = 1; ; i++) {
635 if (i >= fw_alloc) {
636 fw_alloc *= 2;
637 erealloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
638 }
639 FIELDWIDTHS[i] = (int) strtod(scan, &end);
640 if (end == scan)
641 break;
642 scan = end;
643 }
644 FIELDWIDTHS[i] = -1;
645}