Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /* |
2 | * Mach Operating System | |
3 | * Copyright (c) 1991,1990 Carnegie Mellon University | |
4 | * All Rights Reserved. | |
5 | * | |
6 | * Permission to use, copy, modify and distribute this software and its | |
7 | * documentation is hereby granted, provided that both the copyright | |
8 | * notice and this permission notice appear in all copies of the | |
9 | * software, derivative works or modified versions, and any portions | |
10 | * thereof, and that both notices appear in supporting documentation. | |
11 | * | |
12 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS | |
13 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | |
14 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
15 | * | |
16 | * Carnegie Mellon requests users of this software to return to | |
17 | * | |
18 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
19 | * School of Computer Science | |
20 | * Carnegie Mellon University | |
21 | * Pittsburgh PA 15213-3890 | |
22 | * | |
23 | * any improvements or extensions that they make and grant Carnegie the | |
24 | * rights to redistribute these changes. | |
15637ed4 | 25 | * |
4c45483e | 26 | * $Id: db_lex.c,v 1.2 1993/10/16 16:47:17 rgrimes Exp $ |
15637ed4 | 27 | */ |
cbeffc91 | 28 | |
15637ed4 RG |
29 | /* |
30 | * Author: David B. Golub, Carnegie Mellon University | |
31 | * Date: 7/90 | |
32 | */ | |
33 | /* | |
34 | * Lexical analyzer. | |
35 | */ | |
4c45483e GW |
36 | #include "param.h" |
37 | #include "systm.h" | |
38 | #include "ddb/ddb.h" | |
15637ed4 RG |
39 | #include <ddb/db_lex.h> |
40 | ||
41 | char db_line[120]; | |
42 | char * db_lp, *db_endlp; | |
43 | ||
44 | int | |
45 | db_read_line() | |
46 | { | |
47 | int i; | |
48 | ||
49 | i = db_readline(db_line, sizeof(db_line)); | |
50 | if (i == 0) | |
51 | return (0); /* EOI */ | |
52 | db_lp = db_line; | |
53 | db_endlp = db_lp + i; | |
54 | return (i); | |
55 | } | |
56 | ||
57 | void | |
58 | db_flush_line() | |
59 | { | |
60 | db_lp = db_line; | |
61 | db_endlp = db_line; | |
62 | } | |
63 | ||
64 | int db_look_char = 0; | |
65 | ||
66 | int | |
67 | db_read_char() | |
68 | { | |
69 | int c; | |
70 | ||
71 | if (db_look_char != 0) { | |
72 | c = db_look_char; | |
73 | db_look_char = 0; | |
74 | } | |
75 | else if (db_lp >= db_endlp) | |
76 | c = -1; | |
77 | else | |
78 | c = *db_lp++; | |
79 | return (c); | |
80 | } | |
81 | ||
82 | void | |
83 | db_unread_char(c) | |
4c45483e | 84 | int c; |
15637ed4 RG |
85 | { |
86 | db_look_char = c; | |
87 | } | |
88 | ||
89 | int db_look_token = 0; | |
90 | ||
91 | void | |
92 | db_unread_token(t) | |
93 | int t; | |
94 | { | |
95 | db_look_token = t; | |
96 | } | |
97 | ||
98 | int | |
99 | db_read_token() | |
100 | { | |
101 | int t; | |
102 | ||
103 | if (db_look_token) { | |
104 | t = db_look_token; | |
105 | db_look_token = 0; | |
106 | } | |
107 | else | |
108 | t = db_lex(); | |
109 | return (t); | |
110 | } | |
111 | ||
112 | int db_tok_number; | |
113 | char db_tok_string[TOK_STRING_SIZE]; | |
114 | ||
115 | int db_radix = 16; | |
116 | ||
117 | void | |
118 | db_flush_lex() | |
119 | { | |
120 | db_flush_line(); | |
121 | db_look_char = 0; | |
122 | db_look_token = 0; | |
123 | } | |
124 | ||
125 | int | |
126 | db_lex() | |
127 | { | |
128 | int c; | |
129 | ||
130 | c = db_read_char(); | |
131 | while (c <= ' ' || c > '~') { | |
132 | if (c == '\n' || c == -1) | |
133 | return (tEOL); | |
134 | c = db_read_char(); | |
135 | } | |
136 | ||
137 | if (c >= '0' && c <= '9') { | |
138 | /* number */ | |
4c45483e | 139 | int r, digit = 0; |
15637ed4 RG |
140 | |
141 | if (c > '0') | |
142 | r = db_radix; | |
143 | else { | |
144 | c = db_read_char(); | |
145 | if (c == 'O' || c == 'o') | |
146 | r = 8; | |
147 | else if (c == 'T' || c == 't') | |
148 | r = 10; | |
149 | else if (c == 'X' || c == 'x') | |
150 | r = 16; | |
151 | else { | |
152 | r = db_radix; | |
153 | db_unread_char(c); | |
154 | } | |
155 | c = db_read_char(); | |
156 | } | |
157 | db_tok_number = 0; | |
158 | for (;;) { | |
159 | if (c >= '0' && c <= ((r == 8) ? '7' : '9')) | |
160 | digit = c - '0'; | |
161 | else if (r == 16 && ((c >= 'A' && c <= 'F') || | |
162 | (c >= 'a' && c <= 'f'))) { | |
163 | if (c >= 'a') | |
164 | digit = c - 'a' + 10; | |
165 | else if (c >= 'A') | |
166 | digit = c - 'A' + 10; | |
167 | } | |
168 | else | |
169 | break; | |
170 | db_tok_number = db_tok_number * r + digit; | |
171 | c = db_read_char(); | |
172 | } | |
173 | if ((c >= '0' && c <= '9') || | |
174 | (c >= 'A' && c <= 'Z') || | |
175 | (c >= 'a' && c <= 'z') || | |
176 | (c == '_')) | |
177 | { | |
178 | db_error("Bad character in number\n"); | |
179 | db_flush_lex(); | |
180 | return (tEOF); | |
181 | } | |
182 | db_unread_char(c); | |
183 | return (tNUMBER); | |
184 | } | |
185 | if ((c >= 'A' && c <= 'Z') || | |
186 | (c >= 'a' && c <= 'z') || | |
187 | c == '_' || c == '\\') | |
188 | { | |
189 | /* string */ | |
190 | char *cp; | |
191 | ||
192 | cp = db_tok_string; | |
193 | if (c == '\\') { | |
194 | c = db_read_char(); | |
195 | if (c == '\n' || c == -1) | |
196 | db_error("Bad escape\n"); | |
197 | } | |
198 | *cp++ = c; | |
199 | while (1) { | |
200 | c = db_read_char(); | |
201 | if ((c >= 'A' && c <= 'Z') || | |
202 | (c >= 'a' && c <= 'z') || | |
203 | (c >= '0' && c <= '9') || | |
204 | c == '_' || c == '\\' || c == ':') | |
205 | { | |
206 | if (c == '\\') { | |
207 | c = db_read_char(); | |
208 | if (c == '\n' || c == -1) | |
209 | db_error("Bad escape\n"); | |
210 | } | |
211 | *cp++ = c; | |
212 | if (cp == db_tok_string+sizeof(db_tok_string)) { | |
213 | db_error("String too long\n"); | |
214 | db_flush_lex(); | |
215 | return (tEOF); | |
216 | } | |
217 | continue; | |
218 | } | |
219 | else { | |
220 | *cp = '\0'; | |
221 | break; | |
222 | } | |
223 | } | |
224 | db_unread_char(c); | |
225 | return (tIDENT); | |
226 | } | |
227 | ||
228 | switch (c) { | |
229 | case '+': | |
230 | return (tPLUS); | |
231 | case '-': | |
232 | return (tMINUS); | |
233 | case '.': | |
234 | c = db_read_char(); | |
235 | if (c == '.') | |
236 | return (tDOTDOT); | |
237 | db_unread_char(c); | |
238 | return (tDOT); | |
239 | case '*': | |
240 | return (tSTAR); | |
241 | case '/': | |
242 | return (tSLASH); | |
243 | case '=': | |
244 | return (tEQ); | |
245 | case '%': | |
246 | return (tPCT); | |
247 | case '#': | |
248 | return (tHASH); | |
249 | case '(': | |
250 | return (tLPAREN); | |
251 | case ')': | |
252 | return (tRPAREN); | |
253 | case ',': | |
254 | return (tCOMMA); | |
255 | case '"': | |
256 | return (tDITTO); | |
257 | case '$': | |
258 | return (tDOLLAR); | |
259 | case '!': | |
260 | return (tEXCL); | |
261 | case '<': | |
262 | c = db_read_char(); | |
263 | if (c == '<') | |
264 | return (tSHIFT_L); | |
265 | db_unread_char(c); | |
266 | break; | |
267 | case '>': | |
268 | c = db_read_char(); | |
269 | if (c == '>') | |
270 | return (tSHIFT_R); | |
271 | db_unread_char(c); | |
272 | break; | |
273 | case -1: | |
274 | return (tEOF); | |
275 | } | |
276 | db_printf("Bad character\n"); | |
277 | db_flush_lex(); | |
278 | return (tEOF); | |
279 | } |