Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /* |
2 | * Mach Operating System | |
3 | * Copyright (c) 1991,1990 Carnegie Mellon University | |
4 | * All Rights Reserved. | |
5 | * | |
6 | * Permission to use, copy, modify and distribute this software and its | |
7 | * documentation is hereby granted, provided that both the copyright | |
8 | * notice and this permission notice appear in all copies of the | |
9 | * software, derivative works or modified versions, and any portions | |
10 | * thereof, and that both notices appear in supporting documentation. | |
11 | * | |
12 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS | |
13 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | |
14 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
15 | * | |
16 | * Carnegie Mellon requests users of this software to return to | |
17 | * | |
18 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
19 | * School of Computer Science | |
20 | * Carnegie Mellon University | |
21 | * Pittsburgh PA 15213-3890 | |
22 | * | |
23 | * any improvements or extensions that they make and grant Carnegie the | |
24 | * rights to redistribute these changes. | |
15637ed4 | 25 | * |
cbeffc91 | 26 | * $Id$ |
15637ed4 | 27 | */ |
cbeffc91 | 28 | |
15637ed4 RG |
29 | /* |
30 | * Author: David B. Golub, Carnegie Mellon University | |
31 | * Date: 7/90 | |
32 | */ | |
33 | /* | |
34 | * Lexical analyzer. | |
35 | */ | |
36 | #include <ddb/db_lex.h> | |
37 | ||
38 | char db_line[120]; | |
39 | char * db_lp, *db_endlp; | |
40 | ||
41 | int | |
42 | db_read_line() | |
43 | { | |
44 | int i; | |
45 | ||
46 | i = db_readline(db_line, sizeof(db_line)); | |
47 | if (i == 0) | |
48 | return (0); /* EOI */ | |
49 | db_lp = db_line; | |
50 | db_endlp = db_lp + i; | |
51 | return (i); | |
52 | } | |
53 | ||
54 | void | |
55 | db_flush_line() | |
56 | { | |
57 | db_lp = db_line; | |
58 | db_endlp = db_line; | |
59 | } | |
60 | ||
61 | int db_look_char = 0; | |
62 | ||
63 | int | |
64 | db_read_char() | |
65 | { | |
66 | int c; | |
67 | ||
68 | if (db_look_char != 0) { | |
69 | c = db_look_char; | |
70 | db_look_char = 0; | |
71 | } | |
72 | else if (db_lp >= db_endlp) | |
73 | c = -1; | |
74 | else | |
75 | c = *db_lp++; | |
76 | return (c); | |
77 | } | |
78 | ||
79 | void | |
80 | db_unread_char(c) | |
81 | { | |
82 | db_look_char = c; | |
83 | } | |
84 | ||
85 | int db_look_token = 0; | |
86 | ||
87 | void | |
88 | db_unread_token(t) | |
89 | int t; | |
90 | { | |
91 | db_look_token = t; | |
92 | } | |
93 | ||
94 | int | |
95 | db_read_token() | |
96 | { | |
97 | int t; | |
98 | ||
99 | if (db_look_token) { | |
100 | t = db_look_token; | |
101 | db_look_token = 0; | |
102 | } | |
103 | else | |
104 | t = db_lex(); | |
105 | return (t); | |
106 | } | |
107 | ||
108 | int db_tok_number; | |
109 | char db_tok_string[TOK_STRING_SIZE]; | |
110 | ||
111 | int db_radix = 16; | |
112 | ||
113 | void | |
114 | db_flush_lex() | |
115 | { | |
116 | db_flush_line(); | |
117 | db_look_char = 0; | |
118 | db_look_token = 0; | |
119 | } | |
120 | ||
121 | int | |
122 | db_lex() | |
123 | { | |
124 | int c; | |
125 | ||
126 | c = db_read_char(); | |
127 | while (c <= ' ' || c > '~') { | |
128 | if (c == '\n' || c == -1) | |
129 | return (tEOL); | |
130 | c = db_read_char(); | |
131 | } | |
132 | ||
133 | if (c >= '0' && c <= '9') { | |
134 | /* number */ | |
135 | int r, digit; | |
136 | ||
137 | if (c > '0') | |
138 | r = db_radix; | |
139 | else { | |
140 | c = db_read_char(); | |
141 | if (c == 'O' || c == 'o') | |
142 | r = 8; | |
143 | else if (c == 'T' || c == 't') | |
144 | r = 10; | |
145 | else if (c == 'X' || c == 'x') | |
146 | r = 16; | |
147 | else { | |
148 | r = db_radix; | |
149 | db_unread_char(c); | |
150 | } | |
151 | c = db_read_char(); | |
152 | } | |
153 | db_tok_number = 0; | |
154 | for (;;) { | |
155 | if (c >= '0' && c <= ((r == 8) ? '7' : '9')) | |
156 | digit = c - '0'; | |
157 | else if (r == 16 && ((c >= 'A' && c <= 'F') || | |
158 | (c >= 'a' && c <= 'f'))) { | |
159 | if (c >= 'a') | |
160 | digit = c - 'a' + 10; | |
161 | else if (c >= 'A') | |
162 | digit = c - 'A' + 10; | |
163 | } | |
164 | else | |
165 | break; | |
166 | db_tok_number = db_tok_number * r + digit; | |
167 | c = db_read_char(); | |
168 | } | |
169 | if ((c >= '0' && c <= '9') || | |
170 | (c >= 'A' && c <= 'Z') || | |
171 | (c >= 'a' && c <= 'z') || | |
172 | (c == '_')) | |
173 | { | |
174 | db_error("Bad character in number\n"); | |
175 | db_flush_lex(); | |
176 | return (tEOF); | |
177 | } | |
178 | db_unread_char(c); | |
179 | return (tNUMBER); | |
180 | } | |
181 | if ((c >= 'A' && c <= 'Z') || | |
182 | (c >= 'a' && c <= 'z') || | |
183 | c == '_' || c == '\\') | |
184 | { | |
185 | /* string */ | |
186 | char *cp; | |
187 | ||
188 | cp = db_tok_string; | |
189 | if (c == '\\') { | |
190 | c = db_read_char(); | |
191 | if (c == '\n' || c == -1) | |
192 | db_error("Bad escape\n"); | |
193 | } | |
194 | *cp++ = c; | |
195 | while (1) { | |
196 | c = db_read_char(); | |
197 | if ((c >= 'A' && c <= 'Z') || | |
198 | (c >= 'a' && c <= 'z') || | |
199 | (c >= '0' && c <= '9') || | |
200 | c == '_' || c == '\\' || c == ':') | |
201 | { | |
202 | if (c == '\\') { | |
203 | c = db_read_char(); | |
204 | if (c == '\n' || c == -1) | |
205 | db_error("Bad escape\n"); | |
206 | } | |
207 | *cp++ = c; | |
208 | if (cp == db_tok_string+sizeof(db_tok_string)) { | |
209 | db_error("String too long\n"); | |
210 | db_flush_lex(); | |
211 | return (tEOF); | |
212 | } | |
213 | continue; | |
214 | } | |
215 | else { | |
216 | *cp = '\0'; | |
217 | break; | |
218 | } | |
219 | } | |
220 | db_unread_char(c); | |
221 | return (tIDENT); | |
222 | } | |
223 | ||
224 | switch (c) { | |
225 | case '+': | |
226 | return (tPLUS); | |
227 | case '-': | |
228 | return (tMINUS); | |
229 | case '.': | |
230 | c = db_read_char(); | |
231 | if (c == '.') | |
232 | return (tDOTDOT); | |
233 | db_unread_char(c); | |
234 | return (tDOT); | |
235 | case '*': | |
236 | return (tSTAR); | |
237 | case '/': | |
238 | return (tSLASH); | |
239 | case '=': | |
240 | return (tEQ); | |
241 | case '%': | |
242 | return (tPCT); | |
243 | case '#': | |
244 | return (tHASH); | |
245 | case '(': | |
246 | return (tLPAREN); | |
247 | case ')': | |
248 | return (tRPAREN); | |
249 | case ',': | |
250 | return (tCOMMA); | |
251 | case '"': | |
252 | return (tDITTO); | |
253 | case '$': | |
254 | return (tDOLLAR); | |
255 | case '!': | |
256 | return (tEXCL); | |
257 | case '<': | |
258 | c = db_read_char(); | |
259 | if (c == '<') | |
260 | return (tSHIFT_L); | |
261 | db_unread_char(c); | |
262 | break; | |
263 | case '>': | |
264 | c = db_read_char(); | |
265 | if (c == '>') | |
266 | return (tSHIFT_R); | |
267 | db_unread_char(c); | |
268 | break; | |
269 | case -1: | |
270 | return (tEOF); | |
271 | } | |
272 | db_printf("Bad character\n"); | |
273 | db_flush_lex(); | |
274 | return (tEOF); | |
275 | } |