Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / obp / obp / pkg / tokenizr / tokenize.fth
CommitLineData
920dae64
AT
1\ ========== Copyright Header Begin ==========================================
2\
3\ Hypervisor Software File: tokenize.fth
4\
5\ Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
6\
7\ - Do no alter or remove copyright notices
8\
9\ - Redistribution and use of this software in source and binary forms, with
10\ or without modification, are permitted provided that the following
11\ conditions are met:
12\
13\ - Redistribution of source code must retain the above copyright notice,
14\ this list of conditions and the following disclaimer.
15\
16\ - Redistribution in binary form must reproduce the above copyright notice,
17\ this list of conditions and the following disclaimer in the
18\ documentation and/or other materials provided with the distribution.
19\
20\ Neither the name of Sun Microsystems, Inc. or the names of contributors
21\ may be used to endorse or promote products derived from this software
22\ without specific prior written permission.
23\
24\ This software is provided "AS IS," without a warranty of any kind.
25\ ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
26\ INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
27\ PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN
28\ MICROSYSTEMS, INC. ("SUN") AND ITS LICENSORS SHALL NOT BE LIABLE FOR
29\ ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
30\ DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. IN NO EVENT WILL SUN
31\ OR ITS LICENSORS BE LIABLE FOR ANY LOST REVENUE, PROFIT OR DATA, OR
32\ FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE
33\ DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY,
34\ ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, EVEN IF
35\ SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
36\
37\ You acknowledge that this software is not designed, licensed or
38\ intended for use in the design, construction, operation or maintenance of
39\ any nuclear facility.
40\
41\ ========== Copyright Header End ============================================
42id: @(#)tokenize.fth 1.16 06/02/16
43purpose: Tokenizer program source - converts FCode source to byte codes
44copyright: Copyright 2006 Sun Microsystems, Inc. All Rights Reserved
45copyright: Use is subject to license terms.
46
47\ TODO:
48\ Add a means to define symbols for use with ifdef
49\ Add a means to set the start code from the command line
50
51only forth also definitions
52
53\ ' $report-name is include-hook
54\ ' noop is include-hook
55
56\ Force externals, this is for debugging
57variable force-external? force-external? off
58
59\ Force headers, also for debugging
60variable force-headers? force-headers? off
61
62\ Force headerless. For debugging and also
63\ for trimming down your binary without
64\ changing your source-base.
65variable force-headerless? force-headerless? off
66
67\ To activate any of the above, set it "on" from the command-line
68
69vocabulary tokenizer
70also tokenizer also definitions
71
72decimal warning off caps on
73fload ${BP}/fm/lib/split.fth \ 32>8,8,8,8; 16>8,8; 8,8>16 ...
74
75\ Keep quiet about warnings and statistics.
76variable silent silent off
77
78\ when true, #, #s, #> uses single vs double stack values.
79variable pre1275 pre1275 off
80
81\ true to prepend an a.out header to the output file
82variable aout-header? aout-header? off
83
84\ Statistics variables used in final statistics word
85variable #literals
86variable #locals
87variable #apps
88variable #primitives
89
90variable #constants
91variable #values
92variable #variables
93variable #buffers
94variable #defers
95
96variable compiling \ True if in an FCode definition
97variable #end0s #end0s off \ How many END0 tokens encountered
98variable offset-8? offset-8? off \ Can be set to true by tokenize script
99
100defer fcode-start-code \ start0, start1, start2, or start4
101
102variable tokenization-error \ True if error encountered in FCode source
103
104\ File header creation, fill in size later (see a.out(5) for format)
105create header h# 01030107 , 0 , 0 , 0 , 0 , h# 4000 , 0 , 0 ,
106
107\ Monitor current output counters
108\ 'bhere' returns the total # of byte-codes output so far.
109\ 'fhere' returns the current position in the file. This will be
110\ different, because of the file header (32 bytes), and sometimes more
111\ because of debugging information being output as well as byte-codes.
112
113variable bytes-emitted \ Total # of byte-codes output so far
114: bhere ( -- #token-bytes-emitted ) bytes-emitted @ ;
115: fhere ( -- cur-file-pos ) ofd @ ftell ;
116
117
118\ Vectored output primitives
119: .byte ( c -- )
120 \ put byte to output file
121 ofd @ fputc
122;
123
124\ : .word ( w -- )
125\ wbsplit .byte .byte
126\ ;
127\
128\ : .long ( l -- )
129\ lbsplit .byte .byte .byte .byte
130\ ;
131
132
133: inc ( adr -- ) 1 swap +! ; \ increment variable
134
135variable checksum \ Running total of all emitted bytes
136: emit-fbyte ( c -- )
137 dup checksum +! .byte bytes-emitted inc
138\ bytes-emitted @ .x checksum @ .x cr
139;
140
141\ The user-level "emit-byte" will *not* affect the running-total
142\ length and checksum fields before "fcode-versionx" and after "end0"
143\ This allows embedded binary before and after the fcode to work
144\ correctly, i.e. leave the fcode-only image unaltered.
145defer emit-byte
146' .byte is emit-byte \ Will later be vectored to emit-fbyte
147
148: emit-word ( w -- ) wbsplit emit-byte emit-byte ;
149: emit-long ( l -- ) lwsplit emit-word emit-word ;
150
151: emit-token ( apf -- )
152 c@ emit-byte
153;
154
155: emit-local-escape ( apf -- ) \ (adr+1)c@=0 then output 1 byte token
156 ca1+ dup c@ if emit-token else drop then
157;
158
159: emit-local ( apf -- )
160 dup emit-local-escape emit-token
161;
162
163: pad-size ( -- ) \ Pad file to longword boundary
164 ofd @ ftell ( size )
165 dup /l round-up swap - 0 ?do 0 emit-byte loop
166;
167
168\ Compiling word to create primitive tokens
169variable which-version \ bit variable - each bit represents a version's use
170
171variable #version1s \ accumulator of version 1 tokens compiled
172variable #version1+2s \ accumulator of version 1+2 tokens compiled
173variable #version2s \ accumulator of version 2.x tokens compiled
174variable #version2.1s \ accumulator of version 2.1 tokens compiled
175variable #version2.2s \ accumulator of version 2.2 tokens compiled
176variable #version2.3s \ accumulator of version 2.3 tokens compiled
177variable #version3s \ accumulator of version 3 tokens compiled
178variable #obsoletes \ accumulator of obsolete tokens compiled
179
180: or! ( adr bit$ -- ) over @ or swap ! ; \ or in bit string
181
182: v1 ( -- ) which-version 1 or! ; \ FCode was/is versions 1
183: v2 ( -- ) which-version 2 or! ; \ FCode was/is versions 2.0
184: v2.1 ( -- ) which-version 4 or! ; \ FCode was/is versions 2.1
185: v2.2 ( -- ) which-version 8 or! ; \ FCode was/is versions 2.2
186: v2.3 ( -- ) which-version h# 10 or! ; \ FCode was/is versions 2.3
187: v3 ( -- ) which-version h# 20 or! ; \ FCode was/is versions 3
188: obs ( -- ) h# 40 which-version ! ; \ FCode is obsolete
189
190\ We need a convenient way to mark ALL the obsoleted FCodes with obs
191\ but we can't change them in their source files, because their
192\ version-designators may be used elsewhere...
193\
194vocabulary obs-fcoder also obs-fcoder definitions
195 \ We're going to redefine, in this vocabulary, all of the
196 \ version-designators (except v1 , since, if a token
197 \ designated by v1 alone is used in an FCode source,
198 \ that will invalidate the tokenization) that are used
199 \ in the collection of obsoleted FCodes to designate
200 \ obsolete versions.
201 \
202 \ Put this vocabulary at the top of the search-order before
203 \ loading the obsoleted FCodes; it will act as a filter.
204 \
205 alias v2 obs
206 alias v2.2 obs
207 \
208 \ The codes v2.1 v2.3 or v3 are not used in the collection
209 \ of obsoleted FCodes, and might not even designate obsolete
210 \ versions at all...
211previous definitions
212
213h# 40 constant max-version
214
215: check-version ( -- vers# )
216 which-version dup @ swap off
217 dup 0= abort" missing v1, v2 ... byte-code: prefix"
218 dup max-version > abort" something wrong with ver-flag; (too big)"
219;
220
221\ Print the token's name in back-and-forward quotes, thus: `name'
222: .`name' ( cfa -- )
223 ." `" >name name>string type ." '" \ No space after
224;
225
226: .token-warning ( pfa -- pfa )
227 where ." *** Warning: " dup body> .`name'
228;
229
230: count-version ( pfa vers# -- pfa )
231 case
232 1 of
233 .token-warning
234 ." is an obsoleted version 1 token. ***"
235 #version1s
236 endof
237 2 of #version2s endof
238 3 of #version1+2s endof
239
240 4 of #version2.1s endof
241 8 of #version2.2s endof
242 h# 10 of #version2.3s endof
243 h# 20 of #version3s endof
244 h# 40 of
245 silent @ 0= if
246 .token-warning
247 ." is an obsolete FCode token. ***" cr
248 then
249 #obsoletes
250 endof
251 endcase ( pfa adr )
252 inc
253;
254
255\ Common preface to a tokenization-error message:
256: .tokenization-error ( -- )
257 tokenization-error on
258 where ." Error: "
259;
260
261\ Include this in a definition of an obsolete word that will
262\ have special redeeming behavior, but still should shout
263\ a warning. Obtain the calling word's PFA from its
264\ inevitable residue on the return stack.
265: .obsolete ( -- )
266 r@ find-cfa >body \ PFA of calling word
267 h# 40 count-version drop
268;
269
270\ Incorporate this into a special definition where a sequence
271\ will replace the named token. The replacement sequence
272\ should be passed as a string which will be interpreted
273\ in the tokenization context. The calling word's name
274\ will also be obtained from the return stack to be printed.
275: replace ( $adr,len -- )
276 silent @ 0= if
277 r@ find-cfa \ CFA of calling word
278 where ." *** Replacing " ( $adr,len cfa )
279 .`name' ." with: " ( $adr,len )
280 2dup type cr
281 then
282 eval ( )
283;
284
285
286\ We're going to create the means to give an error message if
287\ the target of ' ("tick") or ['] ("bracket-tick-bracket")
288\ is not what the user expects it to be (e.g., a macro that
289\ resolves into a multi-byte-code sequence).
290\
291\ Valid targets are words that will issue a direct byte-code.
292\ Some of them can be identified by their defining-word;
293\ the word-type function will yield that information.
294\
295\ We're going to create a linked-list of word-types that identify
296\ valid "tick-targets". To do that, we'll create a modifying
297\ word -- syntactically similar to immediate in that it
298\ takes effect on the last definition made.
299\
300\ All the valid defining-words are create ... does> type words
301\ and we're going to take advantage of that fact plus the fact
302\ that the invocation of does> updates lastacf to point
303\ at exactly the location that will be returned when word-type
304\ is applied to a word defined by the defining-word just defined.
305\ (Does this make your head spin? I'm positively giddy!)
306\
307\ First, we need a place to keep the start of the linked-list:
308
309create tick-target-word-type-link null link,
310
311
312\ Each entry in the linked list will consist of two items:
313\ (1) The word-type token to be entered into the list
314\ (2) A link pointing to the preceding link (null at end of list).
315
316\ Support function. Enter the given CFA into the list whose
317\ starter-pointer address is also given:
318: (valid-tick-target) ( type-cfa link-start-addr -- )
319 swap token,
320 here \ Establish new start of linked-list
321 swap ( here link-start-addr )
322 dup link@ ( here link-start-addr prev-link )
323 link, ( here link-start-addr )
324 link! ( )
325;
326
327\ Enter the word-type of words that will be defined by a newly-defined
328\ defining-word into the list of valid "tick-targets".
329\
330\ Use this immediately after defining the defining-word.
331\ (Syntactically similar to immediate .)
332: valid-tick-target-word-type ( -- )
333 lastacf tick-target-word-type-link (valid-tick-target)
334;
335
336\ Scan through the linked-list, to find whether the word-type
337\ of the function whose CFA is given is a valid tick-target.
338\
339\ Return a not-valid? flag, indicating whether further checking
340\ is required, and under that, either the original CFA (if
341\ it's not valid) or another false, to be passed on as the
342\ not-valid? flag for the next customer...
343\
344\ Support function. Given a CFA (of a word-type or a function)
345\ and the address of the link that starts the list, do the
346\ actual search through the list:
347: (bad-tick-target?) ( cfa link-start-addr -- cfa true | false false )
348 over true 2swap begin ( cfa true cfa link-adr )
349 another-link? while ( cfa true cfa next-link )
350 2dup /token - token@ = if
351 [ also hidden ] 4drop [ previous ] false false
352 exit
353 then ( cfa true cfa next-link )
354 repeat ( cfa true cfa )
355 drop
356;
357
358\ Rather than require awkward stack-dancing, we will not attempt to
359\ retain the CFA on the stack, nor to return it. The calling
360\ routine will have a much easier time handling that...
361: bad-tick-target-word-type? ( cfa -- true | false false )
362 word-type ( word-type )
363 tick-target-word-type-link ( word-type link-start-addr )
364 (bad-tick-target?) ( word-type true | false false )
365 dup if nip then
366;
367
368
369\ We will create an "exceptions list" that is a linked-list of
370\ tokens of colon-definitions that are valid "tick-targets".
371
372\ Start with a pointer to the start of the list
373
374create tick-target-:-link null link,
375
376\ The entries in this linked list are structured similarly to
377\ the one preceding. The first item is the token of the
378\ exempted word. It is remarkably similar in form... ;-)
379
380\ Enter the newly-defined function into the list of valid
381\ "tick-targets" even though its word-type (mainly colon)
382\ doesn't qualify it.
383\
384\ Use this immediately after defining the function to validate.
385\ (Syntactically similar to immediate .)
386: valid-tick-target ( -- )
387 lastacf tick-target-:-link (valid-tick-target)
388;
389
390
391\ Scan through the linked-list, to find whether the function
392\ whose CFA is given is a valid colon-defined tick-target.
393: bad-:-tick-target? ( cfa -- cfa true | false false )
394 tick-target-:-link
395 (bad-tick-target?)
396;
397
398\ Chain together the categories we're testing.
399\ At present, there are only two of 'em...
400: bad-tick-target? ( cfa -- not-valid? )
401 \ Do the word-type test first because:
402 \ (A) It's faster (fewer entries in the table)
403 \ and (B) it's -- by far -- more frequently encountered.
404 dup bad-tick-target-word-type? if
405 dup bad-:-tick-target? nip
406 then nip
407;
408
409\ Give the error-message.
410: .bad-tick-target ( cfa -- cfa )
411 .tokenization-error
412 dup .`name'
413 ." is not a valid target for ' or [']" cr
414;
415
416
417\ Shout if the next token in the input stream
418\ isn't a valid target for ' or [']
419\
420\ Incorporate this into the ' or ['] words...
421\
422: ?valid-tick-target ( -- ) \ <name> (but does not consume it.)
423 >in @ ( old>in )
424 parse-word ( old>in $adr,len )
425 rot >in !
426
427 $find if ( cfa )
428 dup bad-tick-target? if ( cfa )
429 .bad-tick-target
430 then drop
431 else ( $adr,len )
432 \ Next token wasn't found; let normal ? mechanism handle it.
433 2drop
434 then ( )
435;
436
437
438: byte-code: \ name ( token# table# -- ) \ Compiling
439 ( -- ) \ At execute time, sends proper token# to output stream
440 check-version
441 create rot c, swap c, c,
442 does> dup 2 ca+ c@
443 count-version
444 emit-local #apps inc
445; valid-tick-target-word-type
446
447\ A v2-compat: definition need not necessarily be a valid "tick-target"
448\ but we can detect if it is and mark it as such.
449
450: v2-compat: \ old-name current-name ( -- )
451 create ' ( current-name-cfa )
452 dup bad-tick-target? 0= swap ( valid? cfa )
453 token,
454 if valid-tick-target then
455 does>
456 silent @ 0= if
457 where
458 ." Warning: Substituting " dup token@ .`name'
459 ." for old name " dup body> .`name' cr
460 then
461 token@ execute
462;
463
464: Pre-1275: \ new-name new-name old-name ( -- )
465 create
466 hide ' token, ' token, reveal
467 does>
468 silent @ 0= if
469 pre1275 @ if
470 where
471 ." Warning: Substituting single stack item "
472 dup ta1+ token@ .`name'
473 ." for " dup token@ .`name' cr
474 then
475 then
476 pre1275 @ if ta1+ then
477 token@ execute
478;
479
480: depend-load \ 'feature-symbol' file
481 safe-parse-word safe-parse-word ( symbol$ file$ )
482 2swap [get-symbol] drop if ( file$ )
483 included ( )
484 else ( file$ )
485 " DEPEND" [get-symbol] drop if ( file$ )
486 ." File: " type cr ( )
487 else ( file$ )
488 2drop ( )
489 then ( )
490 then ( )
491;
492
493fload ${BP}/fm/lib/message.fth
494
495\ When executing, forth, tokens and 'reforth' words are allowed.
496\ 'reforth' vocab. redefines certain words like : and constant.
497\
498\ When compiling, *only* prim. tokens, macros or new local tokens
499\ are allowed. The file 'crosslis.fth' holds equivalent names
500\ for missing primitives, e.g. : 2+ 2 + ;
501\
502\ Needed words that do not have primitive token equivalents
503\ (e.g.: h# etc.) are handled with aliases to standard forth.
504\
505\ Control words (if, begin, loop, etc.) have custom definitions, as
506\ do a limited set of string input words as well as words that
507\ generate special literals, (e.g.: ascii control etc.).
508
509
510\ Add allowed tokens into 'tokens' vocabulary
511only forth definitions
512vocabulary tokens
513vocabulary reforth
514
515\ don't search the tokens voc because it includes numbers (-1, 0, 1, ..)
516only forth also tokenizer also tokens definitions tokenizer
517
518fload ${BP}/pkg/tokenizr/primlist.fth \ Basic ("complete"?) set of FCodes
519
520[ifdef] v2-compat
521 also obs-fcoder
522fload ${BP}/pkg/tokenizr/obsfcode.fth
523 previous
524[then]
525only forth also tokenizer also
526
527\ For the rest of this file, the search order is *always* either:
528\
529\ context: forth forth root current: forth - or -
530\ context: tokens forth root current: tokens - or -
531\ context: reforth forth root current: reforth
532
533tokens definitions
534alias \ \
535alias 16\ \
536alias ( (
537alias (s (
538alias .( .(
539alias th th \ becoming obsolete
540alias td td \ becoming obsolete
541alias h# h#
542alias d# d#
543alias o# o#
544alias b# b#
545
546alias [define] [define] \ define a symbol
547alias [undef] [undef] \ undefine
548alias [defined] [defined] \ forward parsed symbol-value
549alias [ifdef] [ifdef]
550alias [ifndef] [ifndef]
551alias [ifexist] [ifexist]
552alias [ifnexist] [ifnexist]
553alias [message] [message] \ For spitting out compile info
554alias depend-load depend-load
555alias [then] [then]
556alias [else] [else]
557alias recursive recursive
558tokenizer definitions
559
560\ Init search path during execution
561: init-path ( ) \ Outside of definitions, allow 'tokens' and 'reforth'
562 only root also reforth also tokens definitions
563;
564
565: tokens-only ( -- ) \ Allow only tokens within definitions
566 only tokens also definitions
567;
568
569: restore-path ( )
570 only forth also definitions
571;
572
573\ More output primitives
574: emit-number ( n -- )
575 [ also tokens ] b(lit) [ previous ] emit-long
576 1 #literals +!
577;
578: tokenize-literal ( n 1 | d 2 -- )
579 2 = if swap emit-number then emit-number ;
580
581\ Lookup table primitives
582\ 'lookup' table contains 256 longword values, corresponding
583\ to 256 possible local tokens. Entry #n contains the offset from the
584\ beginning of the bytes-output file where the definition of local
585\ token#n begins. For variables and constants, entry #n+1 contains
586\ the actual value of the variable (0 initially) or constant.
587
5888 constant first-local-escape
589
590256 /l* constant 1tablesize \ Size of one 256-word table
5911tablesize 8 * buffer: lookup
592
593variable next-lookup# \ Next lookup# available
594variable local-escape \ Current local escape-code
595
596: advance-lookup# ( -- ) 1 next-lookup# +! ;
597: lookup#-range-check ( -- )
598 next-lookup# @ d# 254 > if
599 1 local-escape +! 0 next-lookup# !
600 then
601;
602
603: next-lookup ( -- addr )
604 next-lookup# @ /l*
605 local-escape @ first-local-escape - 1tablesize * +
606 lookup +
607;
608
609: set-lookup-pointer ( bhere -- ) \ Pnt cur lookup# to current byte-out
610 lookup#-range-check
611 next-lookup l!
612;
613
614
615\ Go back and patch previous output items
616: patch-byte ( byte addr -- ) \ Go back to 'addr' and insert 'val'
617 fhere >r \ Save current file pointer
618 ofd @ fseek \ Move back to 'addr' location
619 emit-fbyte -1 bytes-emitted +! \ fix forward reference
620 r> ofd @ fseek \ Restore current file pointer
621;
622: patch-word ( word addr -- ) >r wbsplit r@ patch-byte r> 1+ patch-byte ;
623: patch-long ( long addr -- ) >r lwsplit r@ patch-word r> 2+ patch-word ;
624
625variable dummy 0 dummy ! \ Use this form just so we can see
626 \ the name with emit-token during debug.
627: emit-dummy ( -- )
628 ['] dummy >body emit-token \ Emit 0-value dummy token for now
629;
630
631variable local-start \ Byte-count at start of current word
632: save-local-start ( -- ) \ Save current bytes-emitted value
633 bhere local-start !
634;
635
636
637\ Length & checksum creation
638variable checksumloc \ Location of checksum and length fields
639
640variable fcode-vers fcode-vers off
641: .mult-fcode-vers-error ( -- )
642 silent @ 0= if
643 ??cr where ." Warning: Multiple Fcode-version# commands encountered. " cr
644 then
645;
646: restore-header ( -- )
647 ['] tokenize-literal is do-literal
648 [ forth ] ['] (header) is header [ tokenizer ]
649;
650
651tokens definitions
652
653\ The accepted plug-in format is:
654\ fd - version1 fcode (1 byte) for first encountered fcode.
655\ 0 - revision byte
656\ checksum - 2 bytes containing the fcode PROM checksum.
657\ length - 4 bytes specifying the total usable length of fcode data
658\ (i.e. from 'version1' to 'end0' inclusive)
659\
660\ The checksum is calculated by summing all remaining bytes, from just after
661\ the length field to the end of the usable fcode data (as indicated
662\ by the length field).
663
664
665: (fcode-version) ( -- )
666 bhere abort" Fcode-version# should be the first FCode command!"
667 restore-header
668 [ tokenizer ]
669 fcode-vers on
670 which-version off \ init version flag to normal state
671 #version1s off \ init version 1 code counter
672 #version1+2s off \ init version 1 and 2 code counter
673 #version2s off \ init version 2 code counter
674 #version2.1s off \ init version 2.1 code counter
675 #version2.2s off \ init version 2.2 code counter
676 #version2.3s off \ init version 2.3 code counter
677 #version3s off \ init version 3 code counter
678 #obsoletes off \ init obsolete code counter
679 checksum off \ Clear checksum bits set by version1
680 pad-size
681 ['] emit-fbyte is emit-byte
682 [ tokens ]
683;
684
685: ((fcode-version ( offset16? sub-ver# -- )
686 [ tokenizer ]
687 emit-byte \ sub-version (or violence?)
688 fhere checksumloc !
689 0 emit-word \ Filler for later checksum field
690 0 emit-long \ Filler for later length field
691 checksum off \ Needed again; we emited bytes we aren't counting.
692 if
693 [ tokens ] offset16 [ tokenizer ] \ compile offset16 Fcode
694 then
695 [ tokens ]
696;
697
698: Fcode-version1 ( -- )
699 [ tokenizer ]
700 fcode-vers @ if .mult-fcode-vers-error
701 else
702 pre1275 on
703
704 [ tokens ]
705 (fcode-version) version1 \ (0xfd token)
706 [ tokenizer ]
707 offset-8? @ 0= \ Compile offset16 Fcode?
708 3 \ sub-version (2.2 with fixed checksum)
709 [ tokens ]
710 ((fcode-version
711 [ tokenizer ]
712 then
713 [ tokens ]
714;
715
716: Fcode-version2 ( -- )
717 [ tokenizer ]
718 fcode-vers @ if .mult-fcode-vers-error
719 else
720
721 pre1275 on
722
723 [ tokens ] (fcode-version) start1 [ tokenizer ]
724 offset-8? off false \ Don't compile offset16 Fcode
725 3 \ sub-version (2.2 with fixed checksum)
726 [ tokens ]
727 ((fcode-version
728 [ tokenizer ]
729 then
730 [ tokens ]
731;
732
733' start1 is fcode-start-code
734
735: Fcode-version3 ( -- )
736 [ tokenizer ]
737 fcode-vers @ if .mult-fcode-vers-error
738 else
739
740 pre1275 off
741
742 [ tokens ] (fcode-version) fcode-start-code [ tokenizer ]
743 offset-8? off false \ Don't compile offset16 Fcode
744 8 \ sub-version (3. P1275-compliant).
745 [ tokens ]
746 ((fcode-version
747 [ tokenizer ]
748 then
749 [ tokens ]
750;
751
752tokenizer definitions
753\ Test for branch offsets greter than one byte
754: test-span ( delta -- ) \ Test if offset is too great
755 d# -128 d# 127 between 0= ( error? ) if
756 .tokenization-error
757 ." Branch interval of +-127 bytes exceeded." cr
758 ." Use OFFSET16 or, better yet, use shorter dfns." cr
759 then
760;
761
762
763\ Token control structure primitives
764\ Number of bytes in a branch offset
765: /branch-offset ( -- n ) 1 offset-8? @ 0= if 1+ then ;
766variable Level
767
768: +Level ( -- ) 1 Level +! ;
769: -Level ( -- ) -1 Level +! Level @ 0< abort" Bad conditional" ; \ XXX
770
771: >Mark ( -- bhere fhere )
772 bhere fhere emit-dummy
773 offset-8? @ 0= if emit-dummy then \ Two bytes if offset-16 is true
774;
775
776: >Resolve ( oldb oldf -- )
777 bhere rot - swap ( delta oldf )
778 offset-8? @ if
779 over test-span patch-byte
780 else
781 patch-word
782 then
783;
784
785: <Mark ( -- bhere fhere ) bhere fhere ;
786: <Resolve ( oldb oldf -- )
787 drop bhere - ( delta )
788 offset-8? @ if
789 dup test-span emit-byte
790 else
791 emit-word
792 then
793;
794
795\ Bypass the abort" built in to ?pairs and ?csp
796: catch-tok.error ( XT -- error? )
797 catch dup if
798 \ We want to print the error using our format, (i.e., w/ leading " Error: ")
799 \ not the default. To do that, we need to temporarily suppress the action
800 \ of show-error and use our own instead...
801 .tokenization-error
802 ['] show-error behavior ( error# old-show-error )
803 ['] noop is show-error
804 over .error
805 ( old-show-error ) is show-error ( error# )
806 then
807;
808: tok?csp? ( -- error? ) ['] ?csp catch-tok.error ;
809: tok?pairs? ( chk2 chk1 -- error? )
810 ['] ?pairs catch-tok.error dup if nip nip then
811;
812
813: but ( b1 f1 t1 b2 f2 t2 -- b2 f2 t2 b1 f1 t1 )
814 >r rot >r 2swap ( b2 f2 b1 f1 ) ( r: t2 t1 )
815 r> r> swap >r -rot r> ( b2 f2 t2 b1 f1 t1 )
816;
817: +>Mark ( -- bhere fhere ) +Level >Mark ;
818: +<Mark ( -- bhere fhere 11 ) +Level <Mark 11 ;
819: ->Resolve ( oldb oldf chk2 chk1 -- )
820 tok?pairs? if 2drop else >Resolve -Level then
821;
822: -<Resolve ( oldb oldf chk -- )
823 11 tok?pairs? if 2drop else <Resolve -Level then
824;
825
826tokens definitions
827
828also forth
829
830\ Use this to "Roll Your Own"
831: token: ( bytecode -- ) \ Name
832 also tokens definitions
833 create w,
834 previous definitions
835 does> w@ emit-word
836; valid-tick-target-word-type
837
838previous
839
840\ Take Forth/tokenizer commands only
841: tokenizer[ ( -- )
842\ ['] interpret-do-literal is do-literal
843 [ forth ] ['] drop [ tokens ] is do-literal
844 only forth also tokenizer definitions
845;
846
847tokenizer definitions
848
849\ Restore normal FCode behavior
850: ]tokenizer ( -- )
851 ['] tokenize-literal is do-literal
852 compiling @ if tokens-only else init-path then
853;
854
855
856tokens definitions
857\ Token control structure words
858\ !! Any word followed by ( T) is an executable token, *not* forth!
859
860: ['] ( -- ) ?valid-tick-target b(') ; valid-tick-target
861: to ( -- ) b(to) ; valid-tick-target
862
863: ahead ( -- fhere 22 ) bbranch ( T) +>Mark h# 22 ;
864: if ( -- fhere 22 ) b?branch ( T) +>Mark h# 22 ;
865
866: then ( oldb oldf 22 -- )
867 b(>resolve) ( T)
868 [ tokenizer ] h# 22 ->Resolve [ tokens ]
869;
870
871: else ( fhere1 22 -- fhere2 22 )
872 ahead [ tokenizer ] but [ tokens ] ( fhere2 22 fhere1 22 )
873 then ( T) ( )
874;
875
876: begin ( -- bhere fhere 11 ) b(<mark) ( T) +<Mark ;
877: again ( oldb 11 -- ) bbranch ( T) -<Resolve ;
878: until ( oldb 11 -- ) b?branch ( T) -<Resolve ;
879: while ( bhere fhere 11 -- bhere2 fhere2 22 bhere fhere 11 )
880 if ( T)
881 [ tokenizer ] but ( whileb whilef 22 oldb 11 ) [ tokens ]
882;
883
884: repeat ( fhere 22 bhere 11 -- ) again ( T) then ( T) ;
885
886
887: case ( -- 0 44 )
888 +Level b(case) ( T) [ tokenizer ] 0 h# 44 [ tokens ]
889;
890
891: of ( 44 -- of-b of-f 55 )
892 h# 44 tok?pairs? [ also forth ] 0= if [ previous ]
893 b(of) ( T) >Mark h# 55
894 [ also forth ] then [ previous ]
895;
896
897: endof ( of-b of-f 55 -- endof-b endof-f 44 )
898 b(endof) ( T) >Mark h# 66 ( of-b of-f endof-b endof-f )
899 [ also tokenizer ] but ( T)
900 h# 55 tok?pairs? [ forth ] 0= if
901 [ tokenizer ] >Resolve h# 44
902 [ forth ] then [ previous ]
903;
904
905: endcase ( 0 [endof-address 66 ...] 44 -- )
906 b(endcase) ( T)
907 h# 44 tok?pairs? [ also forth ] 0= if
908 begin h# 66 = while
909 [ tokenizer ] >Resolve ( T) [ forth ]
910 repeat
911 [ tokenizer ] -Level ( T)
912 [ forth ] then [ previous ]
913;
914
915
916: do ( -- >b >f 33 <b <f 11 ) b(do) ( T) +>Mark h# 33 +<Mark ;
917: ?do ( -- >b >f 33 <b <f 11 ) b(?do) ( T) +>Mark h# 33 +<Mark ;
918
919: loop ( >b >f 33 <b <f 11 -- )
920 b(loop) ( T) [ tokenizer ] -<Resolve h# 33 ->Resolve [ tokens ]
921;
922
923: +loop ( >b >f 33 <b <f 11 -- )
924 b(+loop) ( T) [ tokenizer ] -<Resolve h# 33 ->Resolve [ tokens ]
925;
926
927: leave ( ??? -- ??? ) b(leave) ( T) ;
928: ?leave ( ??? -- ??? ) if ( T) leave ( T) then ( T) ;
929
930
931\ Add cross-compiler macros for common non-tokens
932fload ${BP}/pkg/tokenizr/crosslis.fth
933
934
935: hex ( -- )
936 [ also forth ]
937 compiling @ if m-hex else hex then
938 [ previous ]
939;
940: decimal ( -- )
941 [ also forth ]
942 compiling @ if m-decimal else decimal then
943 [ previous ]
944;
945: octal ( -- )
946 [ also forth ]
947 compiling @ if m-octal else octal then
948 [ previous ]
949;
950: binary ( -- )
951 [ also forth ]
952 compiling @ if m-binary else binary then
953 [ previous ]
954;
955
956
957\ String compiling words
958
959\ (Implementation word, will not be supported)
960hidden definitions
961 : ", ( adr len -- ) \ compile the string into byte-codes
962 [ tokenizer ]
963 dup emit-byte ( adr len )
964 bounds ?do
965 i c@ emit-byte
966 loop
967 [ hidden ]
968 ;
969
970 \ (Implementation word, will not be supported)
971 : ," \ name" ( -- )
972 [ tokenizer ] get-string [ hidden ] ",
973 ;
974
975tokens definitions
976
977: " \ text" ( -- ) \ Compiling ( -- adr len ) \ Executing
978 b(") [ also hidden ] ," [ previous ]
979;
980
981: s" \ text" ( -- ) \ Compiling ( -- adr len ) \ Executing
982 b(")
983 [ tokenizer ] ascii " parse [ hidden ] ", [ tokens ]
984;
985
986: ." ( -- ) \ text"
987 " type
988;
989
990: .( ( -- ) \ text)
991 b(") [ tokenizer ] ascii ) parse [ hidden ] ", [ tokens ] type
992;
993
994\ Offset16 support
995: offset16 ( -- ) \ Intentional redefinition
996 offset16 \ compile token
997 offset-8? [ tokenizer ] off \ Set flag for 16-bit branch offsets
998 [ tokens ]
999;
1000
1001
1002\ New NAME shorthand form for "name" property
1003: name ( adr len -- )
1004 encode-string b(") [ tokenizer ] " name"
1005 [ hidden ] ", [ tokens ]
1006 property
1007;
1008
1009: ascii \ name ( -- n )
1010 [ tokenizer ] safe-parse-word drop c@ emit-number [ tokens ]
1011;
1012
1013: control \ name ( -- n )
1014 [ tokenizer ] safe-parse-word drop c@ h# 1f and emit-number [ tokens ]
1015;
1016
1017: char \ name ( -- n )
1018 [ also forth ]
1019 compiling @ if
1020 .tokenization-error ." 'char' is not permitted inside FCode definitions"
1021 exit then
1022 [ previous ]
1023 ascii ( T)
1024;
1025
1026: [char] \ name ( -- n )
1027 [ also forth ]
1028 compiling @ 0= if
1029 .tokenization-error ." '[char]' is not permitted outside FCode definitions"
1030 exit then
1031 [ previous ]
1032 ascii ( T)
1033;
1034
1035\ Three ways of creating a new token's name:
1036\ Make it an external name,
1037\ make it headered only if fcode-debug? is true,
1038\ or don't make its name at all.
1039\
1040also forth also hidden definitions
1041 : make-external-token ( $adr,len -- )
1042 external-token ",
1043 ;
1044 : make-headerless-token ( $adr,len -- )
1045 2drop new-token
1046 ;
1047 : make-headered-token ( $adr,len -- )
1048 named-token ",
1049 ;
1050previous previous definitions
1051
1052
1053tokenizer definitions
1054
1055also forth also hidden
1056
1057defer make-token-name ['] make-headerless-token to make-token-name
1058
1059\ Create word for newly-defined 'local' tokens
1060: local: \ name ( -- )
1061 safe-parse-word 2dup $create ( adr len )
1062 make-token-name
1063 here next-lookup# @ c,
1064 local-escape @ c, emit-local advance-lookup#
1065 1 #locals +!
1066 does> emit-local
1067; valid-tick-target-word-type
1068
1069previous previous
1070
1071: define-local: ( -- ) also tokens definitions local: previous ;
1072
1073\ End creation of new local token
1074: end-local-token ( -- )
1075
1076 compiling @ if
1077 compiling off
1078 init-path
1079 tok?csp? if exit then
1080 else
1081 .tokenization-error ." ';' only allowed within definitions."
1082 exit
1083 then
1084
1085 [ also tokens ] reveal b(;) [ previous ]
1086;
1087
1088tokens definitions
1089
1090: ; ( -- ) \ New version of ; to end new-token definitions
1091 end-local-token
1092;
1093
1094
1095tokenizer definitions
1096\ Create new local tokens
1097: start-local-token \ name ( -- )
1098 bhere set-lookup-pointer define-local:
1099 tokens-only \ Restrict search within localword to tokens
1100;
1101
1102variable crash-site
1103: emit-crash ( -- )
1104 bhere crash-site ! [ also tokens ] crash unnest [ previous ]
1105;
1106
1107\ The user may over-ride headerless directives in the source by
1108\ turning on the variable force-headers? or force-external?
1109\ before running the tokenize command. (This is most commonly
1110\ done on the comand-line.) Similarly, the user may over-ride
1111\ headers directives by turning on force-headerless?
1112\
1113\ In case of conflict, i.e., if more than one of these variables
1114\ are turned on, force-external? over-rides force-headers?
1115\ and force-headers? over-rides force-headerless?
1116\
1117\ Don't ever over-ride or down-grade the external directive!
1118\
1119: set-make-token-name ( ACF -- )
1120 [ also hidden ]
1121 force-headerless? @ if drop ['] make-headerless-token then
1122 force-headers? @ if drop ['] make-headered-token then
1123 force-external? @ if drop ['] make-external-token then
1124 [ previous ]
1125 to make-token-name
1126;
1127
1128only forth also tokenizer also reforth definitions
1129also hidden
1130: headers ( -- ) ['] make-headered-token set-make-token-name ;
1131: headerless ( -- ) ['] make-headerless-token set-make-token-name ;
1132: external ( -- ) ['] make-external-token to make-token-name ;
1133previous
1134
1135alias fload fload
1136alias id: id:
1137alias purpose: purpose:
1138alias copyright: copyright:
1139
1140: defer \ name ( -- ) \ Compiling
1141 #defers inc
1142 crash-site @ set-lookup-pointer \ Deferred token points to 'crash'
1143 define-local: [ also tokens ] b(defer) [ previous ]
1144;
1145
1146: constant \ name ( -- ) \ Compiling ( -- n ) \ Executing
1147 start-local-token ( n ) #constants inc
1148 [ also tokens ] b(constant) [ previous ]
1149 \ advance-lookup#
1150 init-path
1151;
1152
1153: value \ name ( -- ) \ Compiling ( -- n ) \ Executing
1154 start-local-token ( n ) #values inc
1155 [ also tokens ] b(value) [ previous ]
1156 \ advance-lookup#
1157 init-path
1158;
1159
1160: variable \ name ( -- ) \ Compiling ( -- adr ) \ Executing
1161 start-local-token #variables inc
1162 [ also tokens ] b(variable) [ previous ]
1163 \ advance-lookup#
1164 init-path
1165;
1166
1167alias lvariable variable
1168alias alias alias
1169
1170\ Override certain Forth words in interpret state
1171
1172\ We only allow 'create' in interpret state, for creating data tables
1173\ using c, w, etc.
1174: create \ name ( -- ) \ This 'create' for interpreting only
1175 start-local-token
1176 [ also tokens ] b(create) [ previous ]
1177 init-path
1178;
1179
1180: buffer: \ name ( -- ) \ Tokenizing ( -- buff-adr ) \ Executing
1181 start-local-token #buffers inc
1182 [ also tokens ] b(buffer:) [ previous ]
1183 init-path
1184;
1185
1186\ Although ' is only allowed in interpret state, we can still make it a
1187\ valid target of itself because it results in the same thing as [']
1188: ' ( -- ) \ name
1189 ?valid-tick-target
1190 [ also tokens ] b(') [ previous ]
1191; valid-tick-target
1192: colon-cf ( -- )
1193 [ also tokens ] b(:) [ previous ]
1194;
1195
1196: dict-msg ( -- ) ." Dictionary storage is restricted. " where ;
1197: allot ( #bytes -- ) ." ALLOT - " dict-msg ;
1198
1199
1200\ New STRUCT structure words
1201: struct ( -- ) [ also tokens ] 0 [ previous ] ;
1202
1203: field \ name ( -- ) \ Tokenizing ( struct-adr -- field-adr ) \ Executing
1204 start-local-token
1205 [ also tokens ] b(field) [ previous ]
1206 init-path
1207;
1208
1209: vocab-msg ." Vocabulary changing is not allowed. " where ;
1210\ : only vocab-msg ; \ Escape below with 'only'
1211: also vocab-msg ;
1212: previous vocab-msg ;
1213: except vocab-msg ;
1214: seal vocab-msg ;
1215: definitions vocab-msg ;
1216: forth vocab-msg ;
1217: root vocab-msg ;
1218: hidden vocab-msg ;
1219: assembler vocab-msg ;
1220
1221
1222\ Save dangerous defining words for last
1223: : \ name ( -- ) \ New version of : to create new tokens
1224 !csp \ save stack so ";" can check it
1225 start-local-token colon-cf
1226 hide compiling on
1227;
1228
1229
1230only forth also tokenizer also definitions
1231\ Initialize prior to executing tokenize
1232 \
1233 \ Support routines: We want to be able to clear the dictionary
1234 \ after we're done so that we can do multiple invocations of
1235 \ tokenize from within a single invocation of the tokenizer.
1236 \
1237 create tkz-marker-cmnd ," marker tkz-barrier"
1238 : set-tkz-marker ( -- ) tkz-marker-cmnd count eval ;
1239 : clear-tkz-marker ( -- ) tkz-marker-cmnd count 7 /string eval ;
1240
1241: init-vars ( -- )
1242 #literals off
1243 #apps off
1244 #locals off
1245 #primitives off
1246
1247 #values off
1248 #variables off
1249 #constants off
1250 #defers off
1251 #buffers off
1252
1253 #end0s off
1254
1255 [ reforth ] headers [ tokenizer ]
1256 bytes-emitted off
1257 next-lookup# off
1258 checksum off
1259 Level off
1260 compiling off
1261 first-local-escape local-escape !
1262 next-lookup# off
1263 fcode-vers off
1264
1265 tokenization-error off
1266
1267 -1 checksumloc !
1268 set-tkz-marker
1269;
1270
1271\ Cleanup after executing tokenize
1272: tkz-cleanup ( -- )
1273 clear-tkz-marker
1274;
1275
1276: debug-interpret ( -- )
1277 begin
1278 ?stack parse-word dup
1279 while
1280 cr ." stack is: " .s
1281 cr ." word is: " dup ".
1282 cr ." order is: " order
1283 cr $compile
1284 repeat
1285 2drop
1286;
1287
1288\ Show final compilation statistics
1289
1290\ Show one at a time:
1291: .statistic ( $adr,len vble-addr -- )
1292 @ ?dup if
1293 8 .r space 2dup type cr
1294 then 2drop
1295;
1296
1297: .statistics ( -- )
1298 push-decimal
1299
1300 " :Version 1 FCodes compiled (obsolete FCodes)"
1301 #version1s .statistic
1302
1303 " :Version 1 FCodes compiled"
1304 #version1+2s .statistic
1305
1306 " :Version 2.0 FCodes compiled (may require version 2 bootprom)"
1307 #version2s .statistic
1308
1309 " :Version 2.1 FCodes compiled (may require version 2.3 bootprom)"
1310 #version2.1s .statistic
1311
1312 " :Version 2.2 FCodes compiled (may require version 2.4 bootprom)"
1313 #version2.2s .statistic
1314
1315 " :Version 2.3 FCodes compiled (may require version 2.6 bootprom)"
1316 #version2.3s .statistic
1317
1318 " :Version 3 FCodes compiled (may require version 3 bootprom)"
1319 #version3s .statistic
1320
1321 " :Obsolete FCodes compiled (may not work on version 3 bootproms)"
1322 #obsoletes .statistic
1323
1324 pop-base
1325;
1326
1327tokens definitions
1328
1329also forth
1330: end0 ( -- ) \ Intentional redefinition
1331 end0 silent @ 0= if
1332 cr ." END0 encountered." cr
1333 tokenization-error @ 0= if .statistics then
1334 then
1335 compiling @ 0= if 1 #end0s +! else #end0s off then
1336; valid-tick-target
1337previous
1338
1339tokenizer definitions
1340
1341: .stat ( $adr,len vble-addr -- )
1342 @ 8 .r space type cr
1343;
1344: .stats ( -- )
1345 push-decimal
1346 " Literals " #literals .stat
1347 " Non-(lit) Primitives" #primitives .stat
1348 " Application Codes" #apps .stat
1349 " Local Codes Created" #locals .stat
1350 " Variables" #variables .stat
1351 " Values" #values .stat
1352 " Constants" #constants .stat
1353 " Buffer:s" #buffers .stat
1354 " Defers" #defers .stat
1355 pop-base
1356;
1357
1358: write-header ( -- ) header d# 32 ofd @ fputs ; \ don't affect checksum
1359
1360: full-size ( -- size ) \ Entire file, except a.out header
1361 ofd @ ftell aout-header? @ if d# 32 - then
1362;
1363: fcode-size ( -- size ) \ fcode-versionx thru end0 ONLY
1364 bytes-emitted @
1365;
1366
1367: fix-length ( -- size )
1368 #end0s @ 0= if
1369 silent @ 0= if
1370 ??cr ." *** Warning: FCode token END0 is missing at the end of the file. ***" cr
1371 then
1372 0 emit-byte \ END0
1373 then
1374 fcode-size checksumloc @ 2+ patch-long
1375;
1376: fix-checksum ( -- )
1377 checksum @ checksum off
1378 lwsplit + lwsplit +
1379 h# ffff and checksumloc @ patch-word
1380;
1381: fix-header ( -- )
1382 aout-header? @ if full-size 4 patch-long then
1383;
1384
1385create symtab 4 , 5 c, 0 c, 0 w, 0 , 0 w, 0 c,
1386d# 15 constant /symtab
1387
1388: Fcode-version1 ( -- )
1389 [ tokens ] Fcode-version1 [ tokenizer ]
1390;
1391: Fcode-version2 ( -- )
1392 [ tokens ] Fcode-version2 [ tokenizer ]
1393;
1394: Fcode-version3 ( -- )
1395 [ tokens ] Fcode-version3 [ tokenizer ]
1396;
1397
1398\ a.out(5) symbol buffer
1399128 buffer: label-string
1400
1401variable append-label? append-label? off
1402
1403
1404: fix-symtab ( -- )
1405 append-label? @ if
1406 h# 0c h# 10 patch-long
1407 symtab /symtab ofd @ fputs ( )
1408 bl word label-string "copy ( )
1409 label-string dup c@ dup >r ( adr len )
1410 1+ 4 + swap c! ( )
1411 label-string r> 2+ ofd @ fputs ( )
1412 then
1413;
1414
1415: .fcode-vers-sb-first ( -- )
1416 true abort" Fcode-version# should be the first FCode command!"
1417;
1418
1419only forth also tokenizer also forth definitions
1420
14210 value pci-prom?
1422
1423: patch-pci-byte ( byte offset -- )
1424 aout-header? @ if d# 32 + then
1425 fhere >r \ Save current file pointer
1426 ofd @ fseek \ Move back to 'addr' location
1427 .byte
1428 r> ofd @ fseek \ Restore current file pointer
1429;
1430: patch-pci-word-le ( w offset -- )
1431 >r wbsplit swap r@ patch-pci-byte
1432 r> ca1+ patch-pci-byte
1433;
1434: set-pci-fcode-size ( -- )
1435 fcode-size h# 200 round-up h# 200 / ( size%512 )
1436 h# 2c patch-pci-word-le
1437;
1438
1439: pci-code-revision ( w -- )
1440 pci-prom? 0= if drop exit then
1441 lwsplit if
1442 ." PCI Code Revision value too large. Must be a 2-byte value, "
1443 ." truncating to h#" dup .x cr
1444 then
1445 h# 2e patch-pci-word-le
1446;
1447: pci-vpd-offset ( w -- )
1448 pci-prom? 0= if drop exit then
1449 lwsplit if
1450 ." PCI VPD Offset value too large. Must be a 2-byte value, "
1451 ." truncating to h#" dup .x cr
1452 then
1453 h# 24 patch-pci-word-le
1454;
1455
1456d# 100 buffer: output-name-buf
1457
1458\ Remove the filename extension if there is one in the last pathname component
1459: ?shorten-name ( input-file$ -- file$ )
1460 2dup ( adr len adr len )
1461 begin dup while ( adr len adr len' )
1462 1- 2dup + c@ case ( adr len adr len' )
1463 \ Stop if we encounter "/" or "\" before a "."
1464 ascii / of 2drop exit endof
1465 ascii \ of 2drop exit endof
1466 ascii . of 2swap 2drop exit endof
1467 endcase
1468 repeat ( adr len adr len' )
1469 2drop
1470;
1471: synthesize-name ( input-file$ -- input-file$ output-file$ )
1472 0 output-name-buf c!
1473 2dup ?shorten-name output-name-buf $cat
1474 " .fc" output-name-buf $cat
1475 output-name-buf count
1476;
1477
1478: ?arg ( $len tokenizer? -- )
1479 swap 0= if
1480 ." Usage: "
1481 dup 0= if ." de" then
1482 ." tokenize input-filename"
1483 if ." [ output-filename ]" then
1484 cr abort
1485 then drop
1486;
1487
1488: check-args ( input-file$ output-file$ -- )
1489 2 pick true ?arg
1490 dup 0= if 2drop synthesize-name then
1491;
1492
1493128 buffer: string3
1494: save$3 ( adr len -- pstr ) string3 pack ;
1495
1496: .not-generated ( -- )
1497 string3 _delete drop
1498 cr ." Output file not generated"
1499 cr cr
1500;
1501
1502\ Generate common text for Tokenizer and De-Tokenizer Version display.
1503
1504string-array tkz-version-string
1505 ," Version 3.3"
1506 ," Copyright 1996-2006 Sun Microsystems, Inc. All Rights Reserved"
1507 ," Use is subject to license terms."
1508end-string-array
1509
1510: .inp-fil ( $adr,len -- )
1511 ." Input file: " type
1512;
1513
1514: .outp-fil ( $adr,len -- )
1515 ." Output file: " type
1516;
1517
1518
1519: .tokenizer-version ( inp-flnm$ outp-flnm$ -- inp-flnm$ outp-flnm$ )
1520 ." FCode Tokenizer"
1521 ['] tkz-version-string /string-array
1522 0 do i tkz-version-string ". cr loop
1523 cr
1524 2over .inp-fil
1525 2dup .outp-fil cr
1526;
1527
1528: .detokenizer-version ( -- ) \ <input-filename> (but does not consume it.)
1529 >in @ >r
1530 parse-word ( inp-flnm$ )
1531 r> >in !
1532 dup false ?arg
1533 ." \ FCode Detokenizer" 0 tkz-version-string ". cr
1534 ['] tkz-version-string /string-array
1535 1 do ." \ " i tkz-version-string ". cr loop
1536 cr
1537 ." \ " .inp-fil cr
1538;
1539
1540: tokenizer-order
1541 only forth also tokenizer also forth definitions
1542;
1543
1544\ The real nitty-gritty work of the tokenizer.
1545\ In order to get messages for all our errors,
1546\ we need a substitute for $do-undefined
1547\ Of course, we'll put it back when we're done...
1548
1549: $tok-interpret-do-undefined
1550 tokenization-error on
1551 .not-found
1552;
1553
1554: (tokenize) ( input-filename$adr,len -- error? )
1555 ['] $do-undefined behavior >r
1556 ['] $tok-interpret-do-undefined
1557 to $do-undefined
1558
1559 init-path
1560
1561 ['] included catch ( n1 n2 error# | false )
1562
1563 restore-path
1564
1565 r> to $do-undefined
1566 ?dup if .error 2drop true
1567 else tokenization-error @
1568 then ( error? )
1569
1570;
1571
1572: $tokenize ( input-filename$ output-filename$ -- )
1573
1574 check-args
1575
1576 tokenizer-order
1577
1578 init-vars
1579 \ warning on
1580
1581[ifexist] xref-on
1582 xref-init if 2swap xref-push-file 2swap xref-on then
1583[then]
1584 \ Define the TOKENIZER? symbol so code can check against it.
1585 0 0 " TOKENIZER?" [set-symbol]
1586
1587 ['] .fcode-vers-sb-first is header
1588 ['] .fcode-vers-sb-first is do-literal
1589 silent @ if warning off
1590 else
1591 .tokenizer-version
1592 then
1593
1594 save$3 new-file ( input$ )
1595 aout-header? @ if write-header then ( input$ )
1596
1597 \ Save the current stack depth
1598 \ (now counting input$ ; later counting error? and the old depth)
1599 depth >r
1600
1601 (tokenize) ( error? )
1602
1603 \ Compare the current SP against the previous one
1604 r> depth <>
1605 dup if ." Error: Stack depth changed" cr
1606 then or ( error?' )
1607
1608 #version1s @
1609 dup if
1610 cr ." Fatal error: Obsolete version 1 tokens used"
1611 then or ( error?'' )
1612
1613 silent @ 0= if cr then
1614 ( error?'' )
1615 if
1616 string3 _delete drop
1617 cr ." Output file not generated"
1618 cr cr
1619 exit
1620 then
1621
1622 pad-size
1623 fix-checksum
1624 fix-length
1625 pci-prom? if set-pci-fcode-size false to pci-prom? then
1626 fix-header \ !!! fix header last so checksum is correct.
1627
1628 fix-symtab
1629
1630 ofd @ fclose
1631
1632[ifexist] xref-off
1633 xref-off
1634[then]
1635
1636 tkz-cleanup
1637
1638;
1639
1640: tokenize ( -- ) \ input-filename output-filename
1641 parse-word parse-word $tokenize
1642;
1643
1644only forth also tokenizer also forth definitions
1645
1646\ Make the following variables available in vocabulary forth
1647alias silent silent
1648alias silent? silent
1649alias pre1275 pre1275
1650alias append-label? append-label?
1651alias aout-header? aout-header?
1652alias offset-8? offset-8?
1653
1654: pci-header ( vendor-id device-id class-code -- ) \ Generate ROM header
1655
1656 pci-prom? if
1657 ." Only one PCI Header is allowed"
1658 3drop exit
1659 then
1660
1661 true to pci-prom?
1662 \ The device-id and vendor-id should be 2-byte (word) values.
1663 \ The class-code is a 3-byte value. This method will check that
1664 \ the values passed in are not too big, but will not report a
1665 \ problem if they are too small...
1666
1667 false >r
1668 ( vendor-id device-id class-code )
1669 dup h# ff00.0000 and 0<> if ( vendor-id device-id class-code )
1670 ." Class-code value too large. Must be a 3-byte value!" cr
1671 r> drop true >r ( vendor-id device-id class-code )
1672 then ( vendor-id device-id class-code )
1673
1674 ( vendor-id device-id class-code )
1675 over h# ffff.0000 and 0<> if
1676 ." Device-id value too large. Must be a 2-byte value!" cr
1677 r> drop true >r ( vendor-id device-id class-code )
1678 then
1679
1680 ( vendor-id device-id class-code )
1681 rot dup h# ffff.0000 and 0<> if ( device-id class-code vendor-id )
1682 ." Vendor-id value too large. Must be a 2-byte value!" cr
1683 r> drop true >r ( device-id class-code vendor-id )
1684 then ( device-id class-code vendor-id )
1685
1686 r> if 3drop exit then ( device-id class-code vendor-id )
1687
1688 rot swap ( class-code device-id vendor-id )
1689
1690
1691 \ Preliminaries out of the way, now to build the header...
1692
1693 ( class-code device-id vendor-id )
1694
1695 55 emit-byte aa emit-byte ( class-code device-id vendor-id ) \ PCI magic number
1696 34 emit-byte 00 emit-byte ( class-code device-id vendor-id ) \ Start of FCode
1697
1698 14 0 do 0 emit-byte loop ( class-code device-id vendor-id )
1699
1700 1c emit-byte 00 emit-byte ( class-code device-id vendor-id )
1701 00 emit-byte 00 emit-byte ( class-code device-id vendor-id ) \ Start of PCI Data Structure:
1702
1703 ascii P emit-byte ( class-code device-id vendor-id ) \ PCIR string
1704 ascii C emit-byte ( class-code device-id vendor-id )
1705 ascii I emit-byte ( class-code device-id vendor-id )
1706 ascii R emit-byte ( class-code device-id vendor-id )
1707
1708 \ Now we consume the vendor-id
1709 wbsplit swap ( class-code device-id vend-hi vend-lo )
1710 emit-byte emit-byte ( class-code device-id )
1711
1712 \ Now we consume the device-id
1713 wbsplit swap ( class-code dev-hi dev-lo )
1714 emit-byte emit-byte ( class-code )
1715
1716 00 emit-byte 00 emit-byte ( class-code ) \ 2 VPD
1717 18 emit-byte 00 emit-byte ( class-code ) \ 2 DS len
1718 00 emit-byte ( class-code ) \ 1 rev
1719
1720 \ Now we consume the class-code
1721 lwsplit swap ( class-up class-lo )
1722 wbsplit swap ( class-up class-lohi class-lolo )
1723 emit-byte emit-byte ( class-up )
1724 wbsplit swap ( class-uphi class-uplo )
1725 emit-byte drop ( )
1726
1727 \ Now finish off the header
1728 10 emit-byte 00 emit-byte ( ) \ 2 image len XXX - We can't know this yet
1729 01 emit-byte 00 emit-byte ( ) \ 2 rev of code
1730 01 emit-byte ( ) \ 1 code type
1731 80 emit-byte ( ) \ 1 indicator
1732 00 emit-byte 00 emit-byte ( ) \ 2 reserved
1733;
1734
1735only forth definitions
1736
1737fload ${BP}/pkg/fcode/detokeni.fth \ Detokenizer
1738only forth definitions
1739" No new words defined yet." $create
1740
1741warning on \ Set the default state before saving forth image.