Start development on BSD 2
[unix-history] / .ref-BSD-1 / ashell / sh_lex.c
CommitLineData
1a5078b8
BJ
1#include "sh.h"
2/*
3 * Shell:
4 * Lexical and argument processing routines
5 *
6 * lex - driver for lexical analysis
7 * word - reads next word into line and pointer thereto via args
8 * getc - gets a character from the logical input stream
9 * readc - reads a character from the 'input device'
10 * setargs - sets up the parameter variables initially
11 * rewind - backs up the shell arguments to their original values
12 * setnargs - resets nargs variable after changes to arg list
13 * shift - manipulates the shell parameters
14 */
15struct shvar2 *word();
16
17/*
18 * lex is the driver routine for the lexical input of the shell.
19 * Basic strategy is to read a logical line into linebuf
20 * with successive words pointed to by successive elements of args.
21 * Termination condition is a newline.
22 * Returns a pointer to a linked list of the words.
23 */
24lex(hp)
25 register struct shvar2 *hp;
26{
27 register struct shvar2 *wdp;
28
29 wdp = hp;
30 do {
31 wdp->next = calloc(1, sizeof *wdp);
32 wdp->next->prev = wdp;
33 wdp = wdp->next;
34 wdp->value = word();
35 } while (wdp->value[0] != '\n');
36 hp->prev = wdp;
37 wdp->next = hp;
38}
39
40freelex(vp)
41 register struct shvar *vp;
42{
43 register struct shvar *fp;
44
45 while (vp->next != vp) {
46 fp = vp->next;
47 vp->next = fp->next;
48 xfree(fp->value);
49 cfree(fp);
50 }
51 vp->prev = vp;
52}
53
54/* static */ char peekc, peekx;
55/*
56 * word breaks the input character stream into words.
57 * Blanks and tabs in the input are ignored, the characters
58 * &;<>()|^ and \n
59 * are considered to be separators.
60 * Characters may be escaped here by surrounding them with
61 * 's or "s. This causes the QUOTE (high order) bit of the
62 * corresponding character to be set so the character will
63 * fail subsequent comparisons. The quoting is eventually
64 * stripped off. More quoting by QUOTE is also done in readc.
65 * Note importantly that quoted character strings do not undergo
66 * parameter substitution!
67 * Return value is a pointer to a structure containing the word.
68 */
69struct shvar2 *
70word()
71{
72 register c;
73 char c1;
74 register char *lp;
75 char lbuf[514];
76 int i;
77
78 lp = lbuf;
79 i = 512;
80 /*
81 * Loop to get something solid
82 */
83 for (;;) {
84 c = getc();
85 switch (c) {
86 case ' ':
87 case '\t':
88 continue;
89 case '\'':
90 case '"':
91 c1 = c;
92 while ((c = echo(readc())) != c1) {
93 if (c == '\n') {
94 seterr("Unmatched ' or \"");
95 *lp++ = 0;
96 peekc = c;
97 goto ret;
98 }
99 if (--i == 0)
100 goto toochars;
101 *lp++ = c | QUOTE;
102 }
103 break;
104 case '&':
105 case ';':
106 case '<':
107 case '>':
108 case '(':
109 case ')':
110 case '|':
111 case '^':
112 case '\n':
113 *lp++ = c;
114 *lp++ = '\0';
115 goto ret;
116 default:
117 peekc = c;
118 break;
119 }
120 /*
121 * We have discovered something solid (not a separator).
122 * We want to gather in as many characters
123 * as possible but don't want to grab a separator.
124 * If we find another quotation in this word we go back to
125 * the top to take it.
126 */
127 for (;;) {
128 c = getc();
129 if (any(c, " '\"\t;&<>()|^\n")) {
130 peekc = c;
131 if (any(c, "\"'"))
132 break;
133 *lp++ = '\0';
134 goto ret;
135 }
136 if (--i == 0)
137 goto toochars;
138 *lp++ = c;
139 }
140 }
141toochars:
142 seterr("Too many characters");
143 lbuf[1] = 0;
144ret:
145 return (savestr(lbuf));
146}
147
148/* static */ char *dolp;
149/* static */ struct shvar2 paramhd, *paramp, *dolnxt;
150/* static */ int dolc;
151/*
152 * setargs sets up the initial argument linked list.
153 * paramp is a working pointer to the front of the list (actually
154 * one before the front), paramhd the actual origin which contains
155 * the true value of $0.
156 *
157 * dolnxt is used in expanding $*.
158 * dolc is maintained by setnargs who also maintains the nargs variable
159 * dolp is the pointer into the expanding string in getc
160 */
161setargs(c, v)
162 int c;
163 char *v[];
164{
165 register struct shvar2 *vp, *lvp;
166
167 vp = &paramhd;
168 for (;;) {
169 vp->value = *v++;
170 c--;
171 if (c == 0)
172 break;
173 lvp = vp;
174 vp = calloc(1, sizeof *vp);
175 lvp->next = vp;
176 vp->prev = lvp;
177 }
178 rewind();
179}
180
181/*
182 * rewind the shell arguments
183 */
184rewind()
185{
186 paramp = &paramhd;
187 setnargs();
188}
189
190/*
191 * set up nargs variable after a parameter list change
192 */
193setnargs()
194{
195 register struct shvar2 *vp;
196
197 dolc = 0;
198 for (vp = paramp; vp != 0; vp = vp->next)
199 dolc++;
200 set(n_args, putn(dolc - 1));
201 if (dolc == 1)
202 unsetv(n_args);
203}
204
205/*
206 * shift the shell arguments
207 */
208shift(v)
209 register char *v[];
210{
211 register int n;
212 register struct shvar2 *vp;
213
214 v++;
215 n = *v == 0 ? 1 : getn(*v++);
216 for (vp = paramp; vp && n;)
217 if (n > 0) {
218 n--;
219 vp = vp->next;
220 } else {
221 n++;
222 vp = vp->prev;
223 }
224 if (n || vp == 0) {
225 bferr(": Count too large");
226 return;
227 }
228 paramp = vp;
229 setnargs();
230}
231
232/* static */ char dol2bra;
233/*
234 * getc gets a character from the logical input stream.
235 * It handles parameter expansion via $[0-9], all parameters
236 * via $*, shell variables via $[A-Za-z], and the process number via $$.
237 * Also handled is the trimming of the sufficies from expanded
238 * names via the . notation. For example if $1 is "foo.p" then
239 * $.1 will be "foo".
240 *
241 * The variable dol2bra's value has the following meaning:
242 *
243 * 2 echo characters to : or }, if : discard chars to }
244 * 1 echo characters to :
245 * -1 discard characters to }
246 * -2 discard characters to : or }, if : echo to }
247 *
248 * This handles the constructs
249 *
250 * ${name?str1:str2} name set -> str1 ; t -> str2
251 * ${name:default} name set -> $name ; t -> default
252 * ${name?string} name set -> strings ; t -> ""
253 */
254getc()
255{
256 register c;
257 static char doldot;
258
259 if (peekc) {
260 c = peekc;
261 peekc = 0;
262 return (c);
263 }
264 for (;;) {
265 if (dolp) {
266 c = *dolp++;
267 if (c && (c != '.' || !doldot || any('.', dolp)))
268 return (echo(c));
269 if (dolnxt && (dolnxt = dolnxt->next)) {
270 dolp = dolnxt->value;
271 return (echo(' '));
272 }
273 dolp = 0;
274 echo(']');
275 continue;
276 }
277 if (peekx) {
278 c = peekx;
279 peekx = 0;
280 } else
281 c = readc();
282 if (c == '\\') {
283 echo(c);
284 c = readc();
285 if (c == '\n')
286 c = ' ';
287 else
288 c =| QUOTE;
289 }
290 if (dol2bra) {
291 switch (c) {
292 case '}':
293 if (dol2bra > 0)
294 echo(']');
295 dol2bra = 0;
296 echo('}');
297 continue;
298 case '\n':
299 dol2bra = 0;
300 seterr("Missing }");
301 return (echo('\n'));
302 case ':':
303 switch (dol2bra) {
304 case 2:
305 dol2bra = -1;
306 echo(']');
307 echo(':');
308 continue;
309 case -2:
310 dol2bra = 1;
311 echo(':');
312 echo('[');
313 continue;
314 }
315 default:
316 echo(c);
317 }
318 if (dol2bra < 0)
319 continue;
320 return (c);
321 }
322 if (c == '$') {
323 echo(c);
324 doldot = 0;
325 c = readc();
326 echo(c);
327 if (c == '.') {
328 doldot = 1;
329 c = readc();
330 echo(c);
331 }
332 switch (c) {
333 default:
334 if (digit(c)) {
335 dolp = rgadrof(c);
336 if (dolp == 0)
337 continue;
338 dolp = dolp->value;
339 break;
340 }
341 if (c == '{' || letter(c)) {
342 dolp = dolvbl(c);
343 if (dolp || dol2bra > 0)
344 break;
345 continue;
346 }
347 return (c & 0177);
348 case '$':
349 dolp = value(pid);
350 break;
351 case '*':
352 if (dolc <= 1)
353 break;
354 dolnxt = paramp->next;
355 dolp = dolnxt->value;
356 break;
357 }
358 echo('[');
359 continue;
360 }
361 echo(c);
362 return (c);
363 }
364}
365
366dolvbl(sc)
367 char sc;
368{
369 register char *np;
370 register struct shvar *vp;
371 char name[20], c;
372
373 np = name;
374 if (sc != '{')
375 *np++ = sc;
376 for (c = readc(); letter(c); c = readc())
377 if (np < &name[sizeof name - 1]) {
378 echo(c);
379 *np++ = c;
380 }
381 *np++ = 0;
382 vp = adrof(name);
383 if (sc != '{')
384 peekx = c;
385 else {
386 switch (c) {
387 case ':':
388 if (vp)
389 dol2bra = -1;
390 else
391 dol2bra = -2;
392 peekx = ':';
393 break;
394 case '}':
395 peekx = c;
396 dol2bra = -1;
397 break;
398 case '?':
399 echo('?');
400 if (vp)
401 dol2bra = 2;
402 else
403 dol2bra = -2;
404 return (0);
405 default:
406 echo(c);
407 seterr("Variable syntax");
408 return (0);
409 }
410 }
411 if (vp == 0) {
412 seterr("Undefined variable");
413 return (0);
414 }
415 return (vp->value);
416}
417
418/*
419 * read a character from the input device.
420 * this may be an argument e.g. for sh -c.
421 * also for sh -t stop after one line.
422 */
423readc()
424{
425 char cc;
426 register c;
427 register char *cp;
428
429again:
430 if (arginp) {
431 if (arginp == 1)
432 exit(0);
433 else if ((c = *arginp++) == '\0') {
434 arginp = 1;
435 c = '\n';
436 }
437 } else if (onelflg == 1)
438 exit(0);
439 else if (read(0, &cc, 1) != 1) {
440 doneinp = 1;
441 reset();
442 } else if ((c = cc) == '\n' && onelflg)
443 onelflg--;
444 if (c == 0)
445 goto again;
446 return (c);
447}
448
449rgadrof(r)
450 register int r;
451{
452 register struct shvar *tp;
453
454 if (!digit(r) || (r =- '0') > dolc)
455 return (0);
456 if (r == 0)
457 return (&paramhd);
458 for (tp = paramp; r > 0; tp = tp->next)
459 r--;
460 return (tp);
461}
462
463rgvalue(r)
464 int r;
465{
466 register struct shvar *tp;
467
468 tp = rgadrof(r);
469 return (tp ? tp->value : "");
470}