Commit | Line | Data |
---|---|---|
1adfc4f4 AT |
1 | /* |
2 | * (c) 2019 Aaron Taylor <ataylor at subgeniuskitty dot com> | |
3 | * All rights reserved. | |
4 | */ | |
5 | ||
6 | #include <stdio.h> | |
7 | #include <stdlib.h> | |
8 | #include <unistd.h> | |
9 | #include <string.h> | |
10 | #include <errno.h> | |
11 | #include <stdint.h> | |
12 | #include <sys/select.h> | |
13 | #include <getopt.h> | |
a2664738 | 14 | #include <termios.h> |
1adfc4f4 AT |
15 | |
16 | #define VERSION 1 | |
17 | ||
eea0db3f AT |
18 | #define HEAPSIZE 1024 /* Size of heap in words */ |
19 | #define DATASTACKSIZE 1024 /* Size of stack in words */ | |
20 | #define RETURNSTACKSIZE 1024 /* Max subroutine call depth */ | |
1adfc4f4 AT |
21 | |
22 | void | |
23 | print_usage(char ** argv) | |
24 | { | |
bf43fa3f | 25 | printf( "VVhitespace Interpreter v%d (www.subgeniuskitty.com)\n" |
1adfc4f4 AT |
26 | "Usage: %s -i <file>\n" |
27 | " -h Help (prints this message)\n" | |
bf43fa3f | 28 | " -i <file> Specify a VVhitespace source file to interpret.\n" |
1adfc4f4 AT |
29 | , VERSION, argv[0] |
30 | ); | |
31 | } | |
32 | ||
33 | int | |
34 | stdin_empty(void) | |
35 | { | |
36 | fd_set read_fds; | |
37 | FD_ZERO(&read_fds); | |
38 | FD_SET(STDIN_FILENO, &read_fds); | |
39 | ||
40 | struct timeval timeout; | |
41 | timeout.tv_sec = 0; | |
42 | timeout.tv_usec = 0; | |
43 | ||
44 | int retval = select(1, &read_fds, NULL, NULL, &timeout); | |
45 | /* retval could be -1. Ignoring that for now. */ | |
46 | if (retval > 0) return 0; | |
47 | return 1; | |
48 | } | |
49 | ||
a2664738 AT |
50 | void |
51 | set_terminal_mode(void) | |
52 | { | |
53 | struct termios options; | |
54 | tcgetattr(STDIN_FILENO, &options); | |
55 | /* Create a cbreak-like environment through the following options. */ | |
56 | options.c_lflag &= ~ECHO; /* Disable echoing of input characters. */ | |
57 | options.c_lflag &= ~ICANON; /* Disable cooked/line-oriented mode. */ | |
58 | options.c_cc[VMIN] = 1; | |
59 | options.c_cc[VTIME] = 0; | |
60 | tcsetattr(STDIN_FILENO, TCSANOW, &options); | |
61 | } | |
62 | ||
63 | void | |
64 | unset_terminal_mode(void) | |
65 | { | |
66 | struct termios options; | |
67 | tcgetattr(STDIN_FILENO, &options); | |
68 | /* Undo the changes made in set_terminal_mode(). */ | |
69 | options.c_lflag |= ECHO; /* Enable echoing of input characters. */ | |
70 | options.c_lflag |= ICANON; /* Enable cooked/line-oriented mode. */ | |
71 | options.c_cc[VMIN] = 1; /* Default value from /usr/src/sys/sys/ttydefaults.h */ | |
72 | options.c_cc[VTIME] = 0; /* Default value from /usr/src/sys/sys/ttydefaults.h */ | |
73 | tcsetattr(STDIN_FILENO, TCSANOW, &options); | |
74 | } | |
75 | ||
1adfc4f4 AT |
76 | void |
77 | ws_die(size_t * pc, char * msg) | |
78 | { | |
1adfc4f4 AT |
79 | printf("SIM_ERROR @ PC %lu: %s\n", *pc, msg); |
80 | fflush(stdout); | |
a2664738 | 81 | unset_terminal_mode(); |
1adfc4f4 AT |
82 | exit(EXIT_FAILURE); |
83 | } | |
84 | ||
85 | void | |
eea0db3f | 86 | stack_push(int64_t ** sp, int64_t word) |
1adfc4f4 AT |
87 | { |
88 | *((*sp)++) = word; | |
89 | } | |
90 | ||
eea0db3f AT |
91 | int64_t |
92 | stack_pop(int64_t ** sp) | |
1adfc4f4 AT |
93 | { |
94 | return *(--(*sp)); | |
95 | } | |
96 | ||
eea0db3f AT |
97 | int64_t |
98 | stack_peek(int64_t ** sp, size_t offset) | |
1adfc4f4 AT |
99 | /* offset=0 peeks TOS, offset=1 peeks NOS, etc. */ |
100 | { | |
101 | return *((*sp)-offset-1); | |
102 | } | |
103 | ||
104 | uint8_t | |
105 | next_code_byte(uint8_t * code, size_t * pc) | |
106 | { | |
107 | return code[(*pc)++]; | |
108 | } | |
109 | ||
9686c901 AT |
110 | /* |
111 | * In addition to returning the parsed label, this function advances the PC to | |
112 | * the next instruction. | |
113 | */ | |
1adfc4f4 AT |
114 | uint16_t |
115 | parse_label(uint8_t * code, size_t * pc) | |
116 | { | |
117 | uint16_t label = 0; | |
118 | uint8_t c; | |
119 | while ((c = code[(*pc)++]) != '\n') { | |
120 | label = label << 1; | |
84c650ed | 121 | if (c == '\t') label++; |
1adfc4f4 | 122 | } |
8d17aa41 AT |
123 | return label; |
124 | } | |
125 | ||
126 | uint16_t | |
127 | check_label(size_t * labels, uint16_t label, size_t * pc) | |
128 | { | |
439d5659 AT |
129 | if (!labels[label]) { |
130 | fprintf(stderr, "Trying to process label 0x%X.\n", label); | |
131 | ws_die(pc, "uninitialized label (forgot an include?)"); | |
132 | } | |
1adfc4f4 AT |
133 | return label; |
134 | } | |
135 | ||
bf43fa3f | 136 | void |
eea0db3f | 137 | populate_labels(size_t * labels, uint8_t * code, size_t code_size) |
bf43fa3f | 138 | { |
9686c901 AT |
139 | size_t cp = 0; |
140 | while (cp <= code_size) { | |
84c650ed | 141 | if (code[cp++] == '\v') { |
9686c901 AT |
142 | uint16_t temp_label = parse_label(code, &cp); |
143 | labels[temp_label] = cp; | |
144 | } | |
9686c901 | 145 | } |
bf43fa3f AT |
146 | } |
147 | ||
1adfc4f4 | 148 | void |
eea0db3f | 149 | process_imp_stack(uint8_t * code, size_t * pc, int64_t ** sp) |
1adfc4f4 AT |
150 | { |
151 | switch (next_code_byte(code,pc)) { | |
152 | case ' ': | |
153 | /* Push number onto TOS. */ | |
154 | { | |
155 | /* First, pick off the sign */ | |
eea0db3f | 156 | int64_t sign = 0; |
1adfc4f4 AT |
157 | switch (next_code_byte(code,pc)) { |
158 | case ' ' : sign = 1; break; | |
159 | case '\t': sign = -1; break; | |
bf43fa3f | 160 | default : ws_die(pc, "expected sign"); break; |
1adfc4f4 AT |
161 | } |
162 | ||
163 | /* Now, construct the number and push to TOS. */ | |
164 | /* I'm assuming the numbers are read MSb first. */ | |
6b5e4b38 | 165 | uint64_t number = 0; /* Unsigned to accomodate magnitude of most negative number. */ |
eea0db3f | 166 | uint8_t temp; |
1adfc4f4 | 167 | while ((temp = next_code_byte(code,pc)) != '\n') { |
bf43fa3f | 168 | if (temp == '\v') ws_die(pc, "non-binary digit in number"); |
1adfc4f4 AT |
169 | number <<= 1; |
170 | if (temp == '\t') number++; | |
171 | } | |
6b5e4b38 AT |
172 | /* Without temporarily casting to something >64-bit, the most negative */ |
173 | /* number will overflow when performing 'number*sign'. Instead, we */ | |
174 | /* pick off the most negative number as a special case. */ | |
175 | if (number == (1ULL << 63) && sign == -1) { | |
176 | /* C parses negative integer literals first as signed positive */ | |
177 | /* integer literals, then applying a unary negation operator. */ | |
178 | /* Thus, the most negative value is unreachable directly. */ | |
179 | int64_t number_temp = -9223372036854775807LL; /* First store -((2^63)-1) */ | |
180 | number_temp--; /* Now turn it into -(2^63) */ | |
181 | stack_push(sp, number_temp); | |
182 | } else { | |
183 | stack_push(sp, number*sign); | |
184 | } | |
1adfc4f4 AT |
185 | } |
186 | break; | |
187 | case '\n': | |
188 | /* Stack sub-command */ | |
189 | { | |
190 | switch (next_code_byte(code,pc)) { | |
191 | /* Duplicate the TOS. */ | |
192 | case ' ': | |
193 | stack_push(sp, stack_peek(sp,0)); | |
194 | break; | |
195 | /* Swap TOS and NOS. */ | |
196 | case '\t': | |
197 | { | |
eea0db3f AT |
198 | int64_t t1 = stack_pop(sp); |
199 | int64_t t2 = stack_pop(sp); | |
1adfc4f4 AT |
200 | stack_push(sp, t1); |
201 | stack_push(sp, t2); | |
202 | } | |
203 | break; | |
204 | /* Discard TOS. */ | |
205 | case '\n': | |
206 | stack_pop(sp); | |
207 | break; | |
bf43fa3f AT |
208 | default: |
209 | ws_die(pc, "malformed stack IMP"); | |
210 | break; | |
1adfc4f4 AT |
211 | } |
212 | } | |
213 | break; | |
bf43fa3f | 214 | default: ws_die(pc, "malformed stack IMP"); break; |
1adfc4f4 AT |
215 | } |
216 | } | |
217 | ||
218 | void | |
eea0db3f | 219 | process_imp_arithmetic(uint8_t * code, size_t * pc, int64_t ** sp) |
1adfc4f4 | 220 | { |
eea0db3f | 221 | int64_t temp; |
1adfc4f4 AT |
222 | switch (next_code_byte(code,pc)) { |
223 | case ' ': | |
224 | { | |
225 | switch (next_code_byte(code,pc)) { | |
226 | case ' ': | |
227 | /* Addition */ | |
228 | stack_push(sp, stack_pop(sp)+stack_pop(sp)); | |
229 | break; | |
230 | case '\t': | |
231 | /* Subtraction */ | |
232 | temp = stack_pop(sp); | |
233 | stack_push(sp, stack_pop(sp)-temp); | |
234 | break; | |
235 | case '\n': | |
236 | /* Multiplication */ | |
237 | stack_push(sp, stack_pop(sp)*stack_pop(sp)); | |
238 | break; | |
bf43fa3f AT |
239 | default: |
240 | ws_die(pc, "malformed arithmetic IMP"); | |
241 | break; | |
1adfc4f4 AT |
242 | } |
243 | } | |
244 | break; | |
245 | case '\t': | |
246 | { | |
247 | switch (next_code_byte(code,pc)) { | |
248 | case ' ': | |
249 | /* Division */ | |
250 | temp = stack_pop(sp); | |
251 | stack_push(sp, stack_pop(sp)/temp); | |
252 | break; | |
253 | case '\t': | |
254 | /* Modulo */ | |
255 | temp = stack_pop(sp); | |
256 | stack_push(sp, stack_pop(sp)%temp); | |
257 | break; | |
bf43fa3f | 258 | default: ws_die(pc, "malformed arithmetic IMP"); break; |
1adfc4f4 AT |
259 | } |
260 | } | |
261 | break; | |
bf43fa3f | 262 | default: ws_die(pc, "malformed arithmetic IMP"); break; |
1adfc4f4 AT |
263 | } |
264 | } | |
265 | ||
266 | void | |
eea0db3f AT |
267 | process_imp_flowcontrol(uint8_t * code, size_t * pc, int64_t ** sp, size_t * labels, |
268 | size_t ** rsp) | |
1adfc4f4 | 269 | { |
84c650ed | 270 | size_t temp_pc; |
1adfc4f4 AT |
271 | switch (next_code_byte(code,pc)) { |
272 | case '\n': | |
273 | /* Technically another LF is required but we ignore it. */ | |
1adfc4f4 | 274 | fflush(stdout); |
a2664738 | 275 | unset_terminal_mode(); |
1adfc4f4 AT |
276 | exit(EXIT_SUCCESS); |
277 | case ' ': | |
278 | { | |
279 | switch (next_code_byte(code,pc)) { | |
280 | case ' ': | |
281 | /* Mark a location in the program. */ | |
9686c901 AT |
282 | if (next_code_byte(code,pc) != '\v') ws_die(pc,"expected vtab, " |
283 | "perhaps a whitespace program, rather than vvhitespace?"); | |
284 | /* Jump to next instruction since labels were parsed during startup. */ | |
1e574f5b | 285 | parse_label(code, pc); |
1adfc4f4 AT |
286 | break; |
287 | case '\t': | |
288 | /* Call a subroutine. */ | |
8d17aa41 | 289 | temp_pc = labels[check_label(labels, parse_label(code, pc), pc)]; |
84c650ed AT |
290 | *((*rsp)++) = *pc; |
291 | *pc = temp_pc; | |
1adfc4f4 AT |
292 | break; |
293 | case '\n': | |
294 | /* Jump unconditionally to a label. */ | |
8d17aa41 | 295 | *pc = labels[check_label(labels, parse_label(code, pc), pc)]; |
1adfc4f4 | 296 | break; |
bf43fa3f AT |
297 | default: |
298 | ws_die(pc, "malformed flow control IMP"); | |
299 | break; | |
1adfc4f4 AT |
300 | } |
301 | } | |
302 | break; | |
303 | case '\t': | |
304 | { | |
305 | switch (next_code_byte(code,pc)) { | |
306 | case ' ': | |
307 | /* Jump to a label if TOS == 0 */ | |
8d17aa41 | 308 | temp_pc = labels[check_label(labels, parse_label(code, pc), pc)]; |
84c650ed | 309 | if (stack_pop(sp) == 0) *pc = temp_pc; |
1adfc4f4 AT |
310 | break; |
311 | case '\t': | |
312 | /* Jump to a label if TOS < 0. */ | |
8d17aa41 | 313 | temp_pc = labels[check_label(labels, parse_label(code, pc), pc)]; |
84c650ed | 314 | if (stack_pop(sp) < 0) *pc = temp_pc; |
1adfc4f4 AT |
315 | break; |
316 | case '\n': | |
317 | /* Return from subroutine. */ | |
318 | *pc = *(--(*rsp)); | |
319 | break; | |
bf43fa3f AT |
320 | default: |
321 | ws_die(pc, "malformed flow control IMP"); | |
322 | break; | |
1adfc4f4 AT |
323 | } |
324 | } | |
325 | break; | |
bf43fa3f AT |
326 | default: |
327 | ws_die(pc, "malformed flow control IMP"); | |
328 | break; | |
1adfc4f4 AT |
329 | } |
330 | } | |
331 | ||
332 | void | |
eea0db3f | 333 | process_imp_heap(uint8_t * code, size_t * pc, int64_t ** sp, int64_t ** hp) |
1adfc4f4 AT |
334 | { |
335 | switch (next_code_byte(code,pc)) { | |
aea85838 AT |
336 | case ' ' : |
337 | /* Store to heap */ | |
338 | { | |
eea0db3f AT |
339 | int64_t value = stack_pop(sp); |
340 | int64_t addr = stack_pop(sp); | |
aea85838 AT |
341 | *(*hp + addr) = value; |
342 | } | |
343 | break; | |
344 | case '\t': | |
345 | /* Retrieve from heap */ | |
346 | stack_push(sp, *(*hp + stack_pop(sp))); | |
347 | break; | |
348 | default: | |
349 | ws_die(pc, "malformed heap IMP"); | |
350 | break; | |
1adfc4f4 AT |
351 | } |
352 | } | |
353 | ||
354 | void | |
eea0db3f | 355 | process_imp_io(uint8_t * code, size_t * pc, int64_t ** sp, int64_t ** hp) |
1adfc4f4 AT |
356 | { |
357 | switch (next_code_byte(code,pc)) { | |
358 | case ' ': | |
359 | /* Output */ | |
360 | { | |
361 | switch (next_code_byte(code,pc)) { | |
eea0db3f AT |
362 | case ' ' : /* Output char from TOS */ printf("%c", (uint8_t) stack_pop(sp)); break; |
363 | case '\t': /* Output digit from TOS */ printf("%c", (uint8_t) stack_pop(sp)+'0'); break; | |
9686c901 | 364 | default : ws_die(pc, "malformed output IMP"); break; |
1adfc4f4 AT |
365 | } |
366 | fflush(stdout); | |
367 | } | |
368 | break; | |
369 | case '\t': | |
370 | /* Input */ | |
371 | { | |
372 | while (stdin_empty()) continue; | |
373 | char c = getchar(); | |
374 | switch (next_code_byte(code,pc)) { | |
375 | case '\t': /* Input digit */ c -= '0'; /* fallthrough */ | |
a2664738 | 376 | case ' ' : /* Input character */ *(*hp + *(--(*sp))) = c; break; |
9686c901 | 377 | default : ws_die(pc, "malformed input IMP"); break; |
1adfc4f4 AT |
378 | } |
379 | } | |
380 | break; | |
bf43fa3f | 381 | default: ws_die(pc, "malformed i/o IMP"); break; |
1adfc4f4 AT |
382 | } |
383 | } | |
384 | ||
1adfc4f4 AT |
385 | int |
386 | main(int argc, char ** argv) | |
387 | { | |
388 | /* | |
389 | * Process command line arguments | |
390 | */ | |
391 | int c; | |
392 | FILE * input = NULL; | |
393 | while ((c = getopt(argc,argv,"i:h")) != -1) { | |
394 | switch (c) { | |
395 | case 'i': | |
396 | if ((input = fopen(optarg, "r")) == NULL) { | |
397 | fprintf(stderr, "ERROR: %s: %s\n", optarg, strerror(errno)); | |
398 | } | |
399 | break; | |
400 | case 'h': | |
401 | print_usage(argv); | |
402 | exit(EXIT_SUCCESS); | |
403 | break; | |
404 | default: | |
405 | break; | |
406 | } | |
407 | } | |
408 | if (input == NULL) { | |
bf43fa3f | 409 | fprintf(stderr, "ERROR: Must specify a VVhitespace source file with -f flag.\n"); |
1adfc4f4 AT |
410 | print_usage(argv); |
411 | exit(EXIT_FAILURE); | |
412 | } | |
413 | ||
414 | /* | |
eea0db3f | 415 | * Read just the VVhitespace source code into memory, stripping comment characters. |
1adfc4f4 AT |
416 | * We will use the array indices as addresses for the virtual PC when jumping to labels. |
417 | */ | |
418 | size_t ws_code_size = 0; | |
419 | uint8_t temp_byte; | |
420 | while (fread(&temp_byte, 1, 1, input)) { | |
bf43fa3f AT |
421 | if (temp_byte == ' ' || temp_byte == '\t' || temp_byte == '\n' || temp_byte == '\v') { |
422 | ws_code_size++; | |
423 | } | |
1adfc4f4 AT |
424 | } |
425 | rewind(input); | |
426 | uint8_t * ws_code_space = malloc(ws_code_size); | |
427 | ws_code_size = 0; | |
428 | while (fread(&temp_byte, 1, 1, input)) { | |
bf43fa3f AT |
429 | if (temp_byte == ' ' || temp_byte == '\t' || temp_byte == '\n' || temp_byte == '\v') { |
430 | ws_code_space[ws_code_size++] = temp_byte; | |
431 | } | |
1adfc4f4 AT |
432 | } |
433 | fclose(input); | |
434 | ||
435 | /* | |
436 | * Setup a stack and heap. | |
1adfc4f4 | 437 | */ |
eea0db3f AT |
438 | int64_t * hp = malloc(HEAPSIZE*sizeof(int64_t)); |
439 | int64_t * sp = malloc(DATASTACKSIZE*sizeof(int64_t)); | |
1adfc4f4 AT |
440 | |
441 | /* | |
442 | * Setup the return stack and the label array. | |
443 | */ | |
eea0db3f AT |
444 | size_t * rsp = malloc(RETURNSTACKSIZE*sizeof(size_t)); |
445 | size_t labels[65536] = {0}; /* 65536 = 2^16 => Holds all possible labels. */ | |
446 | /* Default value of zero indicates an uninitialized label. */ | |
bf43fa3f | 447 | populate_labels(labels, ws_code_space, ws_code_size); |
1adfc4f4 AT |
448 | |
449 | /* | |
450 | * Main Loop | |
451 | */ | |
a2664738 | 452 | set_terminal_mode(); |
1adfc4f4 AT |
453 | size_t pc = 0; /* Virtual program counter. Operates in the ws_code_space[] address space. */ |
454 | while (1) { | |
455 | if (pc >= ws_code_size) { | |
bf43fa3f AT |
456 | fprintf(stderr, "SIM_ERROR: PC Overrun\n Requested PC: %lu\n Max Address: %lu\n", |
457 | pc, ws_code_size-1); | |
1adfc4f4 | 458 | exit(EXIT_FAILURE); |
a2664738 | 459 | unset_terminal_mode(); |
1adfc4f4 AT |
460 | } |
461 | ||
462 | /* Decode the IMPs */ | |
463 | switch (ws_code_space[pc++]) { | |
464 | case ' ': | |
465 | /* Stack Manipulation */ | |
466 | process_imp_stack(ws_code_space, &pc, &sp); | |
467 | break; | |
468 | case '\n': | |
469 | /* Flow Control */ | |
470 | process_imp_flowcontrol(ws_code_space, &pc, &sp, labels, &rsp); | |
471 | break; | |
472 | case '\t': | |
473 | /* Arithmetic, Heap Access, or I/O */ | |
474 | { | |
475 | switch (ws_code_space[pc++]) { | |
476 | case ' ': | |
477 | /* Arithmetic */ | |
478 | process_imp_arithmetic(ws_code_space, &pc, &sp); | |
479 | break; | |
480 | case '\t': | |
481 | /* Heap Access */ | |
482 | process_imp_heap(ws_code_space, &pc, &sp, &hp); | |
483 | break; | |
484 | case '\n': | |
485 | /* I/O */ | |
486 | process_imp_io(ws_code_space, &pc, &sp, &hp); | |
487 | break; | |
488 | } | |
489 | } | |
490 | break; | |
eea0db3f | 491 | default: ws_die(&pc, "unexpected byte"); break; |
1adfc4f4 AT |
492 | } |
493 | } | |
1adfc4f4 | 494 | } |