Commit | Line | Data |
---|---|---|
c1cfdb7a WJ |
1 | /* |
2 | * Mach Operating System | |
3 | * Copyright (c) 1991,1990 Carnegie Mellon University | |
4 | * All Rights Reserved. | |
5 | * | |
6 | * Permission to use, copy, modify and distribute this software and its | |
7 | * documentation is hereby granted, provided that both the copyright | |
8 | * notice and this permission notice appear in all copies of the | |
9 | * software, derivative works or modified versions, and any portions | |
10 | * thereof, and that both notices appear in supporting documentation. | |
11 | * | |
12 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS | |
13 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | |
14 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
15 | * | |
16 | * Carnegie Mellon requests users of this software to return to | |
17 | * | |
18 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
19 | * School of Computer Science | |
20 | * Carnegie Mellon University | |
21 | * Pittsburgh PA 15213-3890 | |
22 | * | |
23 | * any improvements or extensions that they make and grant Carnegie the | |
24 | * rights to redistribute these changes. | |
25 | */ | |
26 | /* | |
27 | * HISTORY | |
28 | * $Log: db_lex.c,v $ | |
29 | * Revision 1.1 1992/03/25 21:45:13 pace | |
30 | * Initial revision | |
31 | * | |
32 | * Revision 2.3 91/02/05 17:06:36 mrt | |
33 | * Changed to new Mach copyright | |
34 | * [91/01/31 16:18:20 mrt] | |
35 | * | |
36 | * Revision 2.2 90/08/27 21:51:10 dbg | |
37 | * Add 'dotdot' token. | |
38 | * [90/08/22 dbg] | |
39 | * | |
40 | * Allow backslash to quote any character into an identifier. | |
41 | * Allow colon in identifier for symbol table qualification. | |
42 | * [90/08/16 dbg] | |
43 | * Reduce lint. | |
44 | * [90/08/07 dbg] | |
45 | * Created. | |
46 | * [90/07/25 dbg] | |
47 | * | |
48 | */ | |
49 | /* | |
50 | * Author: David B. Golub, Carnegie Mellon University | |
51 | * Date: 7/90 | |
52 | */ | |
53 | /* | |
54 | * Lexical analyzer. | |
55 | */ | |
56 | #include <ddb/db_lex.h> | |
57 | ||
58 | char db_line[120]; | |
59 | char * db_lp, *db_endlp; | |
60 | ||
61 | int | |
62 | db_read_line() | |
63 | { | |
64 | int i; | |
65 | ||
66 | i = db_readline(db_line, sizeof(db_line)); | |
67 | if (i == 0) | |
68 | return (0); /* EOI */ | |
69 | db_lp = db_line; | |
70 | db_endlp = db_lp + i; | |
71 | return (i); | |
72 | } | |
73 | ||
74 | void | |
75 | db_flush_line() | |
76 | { | |
77 | db_lp = db_line; | |
78 | db_endlp = db_line; | |
79 | } | |
80 | ||
81 | int db_look_char = 0; | |
82 | ||
83 | int | |
84 | db_read_char() | |
85 | { | |
86 | int c; | |
87 | ||
88 | if (db_look_char != 0) { | |
89 | c = db_look_char; | |
90 | db_look_char = 0; | |
91 | } | |
92 | else if (db_lp >= db_endlp) | |
93 | c = -1; | |
94 | else | |
95 | c = *db_lp++; | |
96 | return (c); | |
97 | } | |
98 | ||
99 | void | |
100 | db_unread_char(c) | |
101 | { | |
102 | db_look_char = c; | |
103 | } | |
104 | ||
105 | int db_look_token = 0; | |
106 | ||
107 | void | |
108 | db_unread_token(t) | |
109 | int t; | |
110 | { | |
111 | db_look_token = t; | |
112 | } | |
113 | ||
114 | int | |
115 | db_read_token() | |
116 | { | |
117 | int t; | |
118 | ||
119 | if (db_look_token) { | |
120 | t = db_look_token; | |
121 | db_look_token = 0; | |
122 | } | |
123 | else | |
124 | t = db_lex(); | |
125 | return (t); | |
126 | } | |
127 | ||
128 | int db_tok_number; | |
129 | char db_tok_string[TOK_STRING_SIZE]; | |
130 | ||
131 | int db_radix = 16; | |
132 | ||
133 | void | |
134 | db_flush_lex() | |
135 | { | |
136 | db_flush_line(); | |
137 | db_look_char = 0; | |
138 | db_look_token = 0; | |
139 | } | |
140 | ||
141 | int | |
142 | db_lex() | |
143 | { | |
144 | int c; | |
145 | ||
146 | c = db_read_char(); | |
147 | while (c <= ' ' || c > '~') { | |
148 | if (c == '\n' || c == -1) | |
149 | return (tEOL); | |
150 | c = db_read_char(); | |
151 | } | |
152 | ||
153 | if (c >= '0' && c <= '9') { | |
154 | /* number */ | |
155 | int r, digit; | |
156 | ||
157 | if (c > '0') | |
158 | r = db_radix; | |
159 | else { | |
160 | c = db_read_char(); | |
161 | if (c == 'O' || c == 'o') | |
162 | r = 8; | |
163 | else if (c == 'T' || c == 't') | |
164 | r = 10; | |
165 | else if (c == 'X' || c == 'x') | |
166 | r = 16; | |
167 | else { | |
168 | r = db_radix; | |
169 | db_unread_char(c); | |
170 | } | |
171 | c = db_read_char(); | |
172 | } | |
173 | db_tok_number = 0; | |
174 | for (;;) { | |
175 | if (c >= '0' && c <= ((r == 8) ? '7' : '9')) | |
176 | digit = c - '0'; | |
177 | else if (r == 16 && ((c >= 'A' && c <= 'F') || | |
178 | (c >= 'a' && c <= 'f'))) { | |
179 | if (c >= 'a') | |
180 | digit = c - 'a' + 10; | |
181 | else if (c >= 'A') | |
182 | digit = c - 'A' + 10; | |
183 | } | |
184 | else | |
185 | break; | |
186 | db_tok_number = db_tok_number * r + digit; | |
187 | c = db_read_char(); | |
188 | } | |
189 | if ((c >= '0' && c <= '9') || | |
190 | (c >= 'A' && c <= 'Z') || | |
191 | (c >= 'a' && c <= 'z') || | |
192 | (c == '_')) | |
193 | { | |
194 | db_error("Bad character in number\n"); | |
195 | db_flush_lex(); | |
196 | return (tEOF); | |
197 | } | |
198 | db_unread_char(c); | |
199 | return (tNUMBER); | |
200 | } | |
201 | if ((c >= 'A' && c <= 'Z') || | |
202 | (c >= 'a' && c <= 'z') || | |
203 | c == '_' || c == '\\') | |
204 | { | |
205 | /* string */ | |
206 | char *cp; | |
207 | ||
208 | cp = db_tok_string; | |
209 | if (c == '\\') { | |
210 | c = db_read_char(); | |
211 | if (c == '\n' || c == -1) | |
212 | db_error("Bad escape\n"); | |
213 | } | |
214 | *cp++ = c; | |
215 | while (1) { | |
216 | c = db_read_char(); | |
217 | if ((c >= 'A' && c <= 'Z') || | |
218 | (c >= 'a' && c <= 'z') || | |
219 | (c >= '0' && c <= '9') || | |
220 | c == '_' || c == '\\' || c == ':') | |
221 | { | |
222 | if (c == '\\') { | |
223 | c = db_read_char(); | |
224 | if (c == '\n' || c == -1) | |
225 | db_error("Bad escape\n"); | |
226 | } | |
227 | *cp++ = c; | |
228 | if (cp == db_tok_string+sizeof(db_tok_string)) { | |
229 | db_error("String too long\n"); | |
230 | db_flush_lex(); | |
231 | return (tEOF); | |
232 | } | |
233 | continue; | |
234 | } | |
235 | else { | |
236 | *cp = '\0'; | |
237 | break; | |
238 | } | |
239 | } | |
240 | db_unread_char(c); | |
241 | return (tIDENT); | |
242 | } | |
243 | ||
244 | switch (c) { | |
245 | case '+': | |
246 | return (tPLUS); | |
247 | case '-': | |
248 | return (tMINUS); | |
249 | case '.': | |
250 | c = db_read_char(); | |
251 | if (c == '.') | |
252 | return (tDOTDOT); | |
253 | db_unread_char(c); | |
254 | return (tDOT); | |
255 | case '*': | |
256 | return (tSTAR); | |
257 | case '/': | |
258 | return (tSLASH); | |
259 | case '=': | |
260 | return (tEQ); | |
261 | case '%': | |
262 | return (tPCT); | |
263 | case '#': | |
264 | return (tHASH); | |
265 | case '(': | |
266 | return (tLPAREN); | |
267 | case ')': | |
268 | return (tRPAREN); | |
269 | case ',': | |
270 | return (tCOMMA); | |
271 | case '"': | |
272 | return (tDITTO); | |
273 | case '$': | |
274 | return (tDOLLAR); | |
275 | case '!': | |
276 | return (tEXCL); | |
277 | case '<': | |
278 | c = db_read_char(); | |
279 | if (c == '<') | |
280 | return (tSHIFT_L); | |
281 | db_unread_char(c); | |
282 | break; | |
283 | case '>': | |
284 | c = db_read_char(); | |
285 | if (c == '>') | |
286 | return (tSHIFT_R); | |
287 | db_unread_char(c); | |
288 | break; | |
289 | case -1: | |
290 | return (tEOF); | |
291 | } | |
292 | db_printf("Bad character\n"); | |
293 | db_flush_lex(); | |
294 | return (tEOF); | |
295 | } |