\ ========== Copyright Header Begin ==========================================
\ Hypervisor Software File: kerncode.fth
\ Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
\ - Do no alter or remove copyright notices
\ - Redistribution and use of this software in source and binary forms, with
\ or without modification, are permitted provided that the following
\ - Redistribution of source code must retain the above copyright notice,
\ this list of conditions and the following disclaimer.
\ - Redistribution in binary form must reproduce the above copyright notice,
\ this list of conditions and the following disclaimer in the
\ documentation and/or other materials provided with the distribution.
\ Neither the name of Sun Microsystems, Inc. or the names of contributors
\ may be used to endorse or promote products derived from this software
\ without specific prior written permission.
\ This software is provided "AS IS," without a warranty of any kind.
\ ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
\ INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
\ PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN
\ MICROSYSTEMS, INC. ("SUN") AND ITS LICENSORS SHALL NOT BE LIABLE FOR
\ ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
\ DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. IN NO EVENT WILL SUN
\ OR ITS LICENSORS BE LIABLE FOR ANY LOST REVENUE, PROFIT OR DATA, OR
\ FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE
\ DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY,
\ ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, EVEN IF
\ SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
\ You acknowledge that this software is not designed, licensed or
\ intended for use in the design, construction, operation or maintenance of
\ ========== Copyright Header End ============================================
id: @(#)kerncode.fth 2.43 07/06/05 10:54:47
copyright: Copyright 2007 Sun Microsystems, Inc. All Rights Reserved.
copyright: Copyright 1985-1990 Bradley Forthware
copyright: Use is subject to license terms.
\ ident "@(#)kerncode.fth 2.43 07/06/05 SMI"
\ Meta compiler source for the Forth 83 kernel code words.
\ Change code-field: so that when compiled into a metacompiler definition,
\ that word would return the 0-relative address. When compiled into a
\ target definition, the word would return the absolute address. Essentially,
\ we need to define "dolabel" very early in the kernel source.
\ Allocate and clear the initial user area image
\ ---- Assembler macros that reside in the host environment
\ and assemble code for the target environment
\ Forth Virtual Machine registers
\ Note that the Forth Stack Pointer (%g7) is NOT the same register that
\ C uses for the stack pointer (%o6). The hardware does all sorts of
\ funny things with the C stack pointer when you do save and restore
\ instructions, and when the register windows overflow.
:-h sp %i5 ;-h :-h base %g2 ;-h :-h up %g3 ;-h
:-h tos %g4 ;-h :-h ip %i3 ;-h :-h rp %i4 ;-h
:-h scr %l0 ;-h :-h sc1 %l1 ;-h :-h sc2 %l2 ;-h :-h sc3 %l3 ;-h
:-h sc4 %l4 ;-h :-h sc5 %l5 ;-h :-h sc6 %l6 ;-h :-h sc7 %l7 ;-h
:-h spc %o7 ;-h \ Saved Program Counter - set by the CALL instruction
\ Parameter Field Address
\t32-t \dtc-t :-h apf ( -- ) spc 8 ;-h
\t32-t \itc-t :-h apf ( -- ) sc1 4 ;-h
\t16-t :-h apf ( -- ) sc1 2 ;-h
\ Put a bubble in the pipeline to patch the load interlock bug
32\ :-h slln ( rs1 rs2 rd -- ) sll ;-h
32\ :-h srln ( rs1 rs2 rd -- ) srl ;-h
32\ :-h sran ( rs1 rs2 rd -- ) sra ;-h
32\ :-h nget ( ptr off dst -- ) ld ;-h
32\ :-h nput ( src off ptr -- ) st ;-h
64\ :-h slln ( rs1 rs2 rd -- ) sllx ;-h
64\ :-h srln ( rs1 rs2 rd -- ) srlx ;-h
64\ :-h sran ( rs1 rs2 rd -- ) srax ;-h
64\ :-h nget ( ptr off dst -- ) ldx ;-h
64\ :-h nput ( src off ptr -- ) stx ;-h
:-h lget ( ptr dst -- ) 0 swap ld ;-h
:-h lput ( src ptr -- ) 0 swap st ;-h
:-h get ( ptr dst -- ) 0 swap nget ;-h
:-h put ( src ptr -- ) 0 swap nput ;-h
:-h move ( src dst -- ) %g0 -rot add ;-h
:-h ainc ( ptr -- ) dup /n swap add ;-h
:-h adec ( ptr -- ) dup /n swap sub ;-h
:-h push ( src ptr -- ) dup adec put ;-h
:-h pop ( ptr dst -- ) over -rot get ainc ;-h
:-h test ( src -- ) %g0 %g0 addcc ;-h
:-h cmp ( s1 s2 -- ) %g0 subcc ;-h
:-h rtget ( srca srcb dst -- )
\t16-t dup >r lduh r> ( dst )
\t32-t \ We could increment a counter here to gather statistics with
\t32-t \ no speed penalty in the 32-bit !
\t8-t 0 swap ldsb \ Is the limited range a problem?
64\ \t32-t tuck 0 swap lduw
:-h 'user# \ name ( -- user# )
' ( acf-of-user-variable ) >body-t
:-h 'user \ name ( -- user-addressing-mode )
meta-asm[ up 'user# ]meta-asm
:-h 'body \ name ( -- variable-apf )
' ( acf-of-user-variable ) >body-t
:-h 'acf \ name ( -- variable-apf )
' ( acf-of-user-variable ) >body-t
2dup sethi swap h# 3ff land swap tuck add
\ There are a few places in the code where moving the previous instruction
\ to the delay slot of the "next jmp" instruction won't work. Generally
\ these are places where a control structure ends just before "next".
\ inhibit-delay assembles a nop instruction in cases where that is needed.
\ This ought to be done by the assembler, but it is hard to figure out.
\t16-t meta-asm[ nop ]meta-asm
\ assembler macro to assemble next
\t8-t byte-next always branchif
\t8-t nop \ XXX should be token-table sc2 sethi
\t16-t here-t 4 - l@-t here-t l!-t \ Advance previous instruction
\t16-t h# 81c0.e000 here-t 4 - l!-t 4 allot-t \ up 0 %g0 jmpl instr.
\t32-t ip /token-t ip add
\t16-t \itc :-h tld ( src offset dst -- )
\t16-t \itc r@ tshift-t r> sll
\ Create the code for "next" in the user area
\t16-t compile-in-user-area
mlabel (next) \ Shared code for next; will be copied into user area
\t16-t restore-dictionary
\itc-t d# 64 equ #user-init \ Leaves space for the shared "next"
\ ---- Action code for target words classes.
\ "docode" eliminates the need to separately acf-align both the code field
\ and the body of a code definition, thus saving 12 bytes per code definition
\t16-t code-field: docode
\dtc \ The label's code field contains dolabel call sp adec
tos sp put \ Push the apf of the variable
\itc tos 3 tos add \ Align to a longword boundary
\dtc \ The colon definition's code field contains docolon call rp adec
ip rp put \ Save the ip on the return stack
apf ip add \ Reload ip with apf of colon definition
\dtc \ The word's code field contains docreate call sp adec
tos sp put \ Push the apf of the variable
\ In-dictionary variables are a leftover from the earliest FORTH
\ implementations. They have no place in a ROMable target-system
\ and we are deprecating support for them; but Just In Case you
\ ever want to restore support for them, define the command-line
\ symbol: in-dictionary-variables
[ifdef] in-dictionary-variables
\ Support for in-dictionary variables, i.e., where the variable's
\ storage location is in the dictionary rather than in user-space.
\dtc \ The variable's code field contains dovariable call sp adec
tos sp put \ Push the apf of the variable
\ Hey, waidaminit! This is the same as docreate just above!
\ An in-dictionary variable could be as simple as create 0 , ...
\dtc \ The user variable's code field contains douser call sp adec
\t16 apf scr lduh \ Get the user number
\t32 apf scr ld \ Get the user number
scr up tos add \ Add the base address of the user area
\dtc \ The value's code field contains dovalue call sp adec
\t16 apf scr lduh \ Get the user number
\t32 apf scr ld \ Get the user number
scr up tos nget \ Get the contents of the user area location
\ Defers could run faster by compiling the defer offset into the instruction
\ as in up user# scr ld scr base %g0 jmpl nop
\ But it would be harder to compile, metacompile, decompile, and set
\dtc \ The user variable's code field contains dodefer call apf scr ld
\t32 scr up scr ld \ Get the acf stored in that user location
\t16 scr up sc1 tld \ Get the acf stored in that user location
\t16 sc1 base scr rtget \ Read the token
scr base %g0 jmpl \ Execute that word
\dtc \ The constant's code field contains doconstant call sp adec
\dtc apf tos ld \ Get the constant's value
\itc apf tos lduh \ Get the high halfword of the constant's value
\itc tos 10 tos slln \ Shift into high halfword
\itc apf 2 + scr lduh \ Get the low halfword of the constant's value
\itc scr tos tos add \ Merge the two halves
64\ \itc apf 4 + scr lduh
64\ \itc apf 6 + scr lduh
\dtc \ The constant's code field contains do2constant call sp adec
sp adec \ Make room on the stack
tos sp /n nput \ Save the old tos on the memory stack
\dtc apf tos ld \ Get the bottom constant's value
64\ \dtc tos th 20 tos sllx
\dtc tos sp put \ Put it on the memory stack
\dtc apf /n + tos ld \ Get the top constant's value
64\ \dtc tos th 20 tos sllx
64\ \dtc apf /n 4 + + scr ld
\itc apf tos lduh \ Get the high halfword of the bottom value
\itc tos sp 0 sth \ Store on stack
\itc apf /w + tos lduh \ Get the low halfword of the bottom value
\itc tos sp 2 sth \ Store on stack
\itc apf /n + tos lduh \ Get the high halfword of the top value
\itc tos 10 tos sll \ Shift into high halfword
\itc apf /n /w + + scr lduh \ Get the low halfword of the top value
\itc scr tos tos add \ Merge the two halves
\itc \ The child word's code field contains a pointer to the doesclause
\dtc \ The child word's code field contains doesclause call apf scr add
\ The doesclause's code field contains dodoes call sp adec
\ ---- Define the format of target code fields by creating host
\ words that will create target code fields.
:-h place-cf-t ( action-apf -- )
\dtc-t meta-asm[ ( action-adr ) call sp adec ]meta-asm
\itc-t \t32-t here /token-t + aligned
\itc-t \t16-t [ tshift-t 4 <> ]-h [if] here /token-t + aligned [else] docode [then]
\itc-t place-cf-t align-t
:-h colon-cf ( -- ) ( 'body-t ) docolon place-cf-t
\dtc-t -4 allot-t meta-asm[ rp adec ]meta-asm
( 'body-t ) dodefer place-cf-t
\dtc-t -4 allot-t meta-asm[ apf scr ld ]meta-asm
:-h label-cf ( -- ) ( 'body-t ) dolabel place-cf-t align-t ;-h
:-h constant-cf ( -- ) ( 'body-t ) doconstant place-cf-t ;-h
:-h create-cf ( -- ) ( 'body-t ) docreate place-cf-t ;-h
[ifdef] in-dictionary-variables
:-h variable-cf ( -- ) ( 'body-t ) dovariable place-cf-t ;-h
:-h user-cf ( -- ) ( 'body-t ) douser place-cf-t ;-h
:-h value-cf ( -- ) ( 'body-t ) dovalue place-cf-t ;-h
\dtc-t ( 'body-t ) dodoes place-cf-t
\itc-t meta-asm[ dodoes call sp adec ]meta-asm
:-h start;code ( -- ) ;-h
\ The forward reference will be resolved later by fix-vocabularies
\dtc-t meta-asm[ apf scr add ]meta-asm \ Address of parameter field
\ ---- Run-time words compiled by compiling words.
\ We can do better; combine the incrementing in ip ainc with that in next
\t16 ip 0 scr lduh scr 10 scr slln ip 2 tos lduh scr tos tos add
64\ \t16 tos 10 tos slln ip 4 scr lduh
64\ \t16 tos scr tos add tos 10 tos slln ip 6 scr lduh scr tos tos add
64\ \t32 ip 0 scr lduw scr 20 scr sllx ip 4 tos lduw scr tos tos add
\t16 ip 0 tos lduh ip 2 ip add tos 1 tos sub
\ High level branch. The branch offset is compiled in-line.
\ High level conditional branch.
code ?branch ( f -- ) \ Takes the branch if the flag is false
scr 1 scr addcc \ increment loop index
( 0 B: ) bran1 vc brif \ branch if not done
scr rp put \ Write back the loop index (delay slot)
rp 3 /n* rp add \ done; remove loop params from stack
ip /branch ip add \ Skip the branch offset
\ Run time word for +loop
code (+loop) ( increment -- )
scr tos scr addcc \ increment loop index
scr rp put \ Write back the loop index
bran1 ( 0 B: ) vc brif \ branch if not done
rp 3 /n* rp add \ done; remove loop params from stack
ip /branch ip add \ Skip the branch offset
( 1 L: ) mloclabel pd0 ( -- r: loop-end-offset l+0x8000 i-l-0x8000 )
ip rp push \ remember the do offset address
ip /branch ip add \ skip the do offset
\ Loop index for current do loop
\ Loop index for next enclosing do loop
( 2 L: ) mloclabel pleave
rp 2 /n* ip nget \ Get the address of the ending offset
rp 3 /n* rp add \ get rid of the loop indices
code unloop ( -- ) rp 3 /n* rp add c; \ Discard the loop indices
code (of) ( selector test -- [ selector ] )
sp scr pop \ Test in tos, Selector in scr
scr tos move \ Delay slot - Copy selector to tos
ip /branch ip add \ Skip the branch offset
ip scr ip add \ Take the branch
\ (endof) is the same as branch, and (endcase) is the same as drop,
\ but redefining them this way makes the decompiler much easier.
code (endof) ( -- ) ip scr bget ip scr ip add c;
code (endcase) ( n -- ) sp tos pop c;
\ ---- Ordinary Forth words.
\ Execute a Forth word given a code field address
assembler ( 3 L: ) mlabel dofalse 0 tos move next meta
\ Convert a character to a digit according to the current base
code digit ( char base -- digit true | char false )
tos scr move \ base in scr
tos ascii 0 tos subcc \ convert to number
( 3 B: ) dofalse < brif \ Anything less than ascii 0 isn't a digit
tos h# 0a cmp \ test for >= 10
>= if annul \ Try for a letter representing a digit
tos scr cmp \ Compare digit to base
tos ascii A ascii 0 - cmp
( 3 B: ) dofalse < brif \ bad if > '9' and < 'A'
tos ascii a ascii 0 - cmp
tos ascii A ascii 0 - d# 10 - tos sub \ Delay
tos ascii a ascii A - tos sub
tos scr cmp \ Compare digit to base
( 3 B: ) dofalse >= brif \ Not a digit
tos sp put \ Replace the char on the stack with the digit
-1 tos move \ True to indicate success
\ Copy cnt characters starting at from-addr to to-addr. Copying is done
\ strictly from low to high addresses, so be careful of overlap between the
code cmove ( src dst cnt -- ) \ Copy from bottom to top
sp 1 /n* scr nget \ Src into scr
sp 0 /n* sc1 nget \ Dst into sc1
scr tos scr add \ Src = src+cnt (optimize for low-to-high copy)
sc1 tos sc1 add \ Dst = dst+cnt
sc1 1 sc1 sub \ Account for the position of the addcc instruction
%g0 tos tos subcc \ Negate cnt
scr tos sc2 ldub \ (delay) Load byte
tos 1 tos addcc \ (delay) Increment cnt
sc2 sc1 tos stb \ Store byte
sp 2 /n* tos nget \ Delete 3 stack items
code cmove> ( src dst cnt -- ) \ Copy from top to bottom
sp 1 /n* scr nget \ Src into scr
sp 0 /n* sc1 nget \ Dst into sc1
sc1 1 sc1 add \ Account for the position of the subcc instruction
tos 0 cmp \ Don't do anything if the count is 0.
tos 1 tos sub \ Decrement cnt (startup loop)
scr tos sc2 ldub \ (delay) Load byte
tos 1 tos subcc \ (delay) Decrement cnt
sc2 sc1 tos stb \ Store byte
sp 2 /n* tos nget \ Delete 3 stack items
code and ( n1 n2 -- n3 ) sp scr pop tos scr tos and c;
code or ( n1 n2 -- n3 ) sp scr pop tos scr tos or c;
code xor ( n1 n2 -- n3 ) sp scr pop tos scr tos xor c;
code << ( n1 cnt -- n2 ) sp scr pop scr tos tos slln c;
code >> ( n1 cnt -- n2 ) sp scr pop scr tos tos srln c;
code >>a ( n1 cnt -- n2 ) sp scr pop scr tos tos sran c;
code lshift ( n1 cnt -- n2 ) sp scr pop scr tos tos slln c;
code rshift ( n1 cnt -- n2 ) sp scr pop scr tos tos srln c;
code + ( n1 n2 -- n3 ) sp scr pop tos scr tos add c;
code - ( n1 n2 -- n3 ) sp scr pop scr tos tos sub c;
code invert ( n1 -- n2 ) tos -1 tos xor c;
code negate ( n1 -- n2 ) %g0 tos tos sub c;
\ Mark the first code-definition in the dictionary;
\ we will need it later...
\ XXX We might be able to make this low-dictionary-adr
\ XXX and move that from debugm.fth (or debugm16.fth )
: first-code-word ( -- acf ) (') (lit) ;
: abs ( n1 -- n2 ) dup 0< if negate then ;
: min ( n1 n2 -- n3 ) 2dup > if swap then drop ;
: max ( n1 n2 -- n3 ) 2dup < if swap then drop ;
: umin ( u1 u2 -- u3 ) 2dup u> if swap then drop ;
: umax ( u1 u2 -- u3 ) 2dup u< if swap then drop ;
code up@ ( -- addr ) tos sp push up tos move c;
code sp@ ( -- addr ) tos sp push sp tos move c;
code rp@ ( -- addr ) tos sp push rp tos move c;
code up! ( addr -- ) tos up move sp tos pop c;
code sp! ( addr -- ) tos sp move sp tos pop c;
code rp! ( addr -- ) tos rp move sp tos pop c;
code >r ( n -- ) tos rp push sp tos pop c;
code r> ( -- n ) tos sp push rp tos pop c;
code r@ ( -- n ) tos sp push rp tos get c;
code >user ( pfa -- addr )
code >ip ( n -- ) tos rp push sp tos pop c;
code ip> ( -- n ) tos sp push rp tos pop c;
code ip@ ( -- n ) tos sp push rp tos get c;
: ip>token ( ip -- token-adr ) /token - ;
code exit ( -- ) rp ip pop c;
code unnest ( -- ) rp ip pop c;
code tuck ( n1 n2 -- n2 n1 n2 )
code flip ( w1 -- w2 ) \ byte-swap the low two bytes; clear the rest.
tos 0ff scr and \ lowest byte into scr
scr 8 scr slln \ lowest byte into second byte of scr
tos 8 tos srln \ second byte into lowest byte of tos
tos 0ff tos and \ clear the rest of tos
:-h leaveflag ( condition -- )
\ macro to assemble code to leave a flag on the stack
code 0= ( n -- f ) tos test 0= leaveflag c;
code 0<> ( n -- f ) tos test 0<> leaveflag c;
code 0< ( n -- f ) tos test 0< leaveflag c;
code 0<= ( n -- f ) tos test <= leaveflag c;
code 0> ( n -- f ) tos test > leaveflag c;
code 0>= ( n -- f ) tos test 0>= leaveflag c;
code < ( n1 n2 -- f ) compare < leaveflag c;
code > ( n1 n2 -- f ) compare > leaveflag c;
code = ( n1 n2 -- f ) compare 0= leaveflag c;
code <> ( n1 n2 -- f ) compare <> leaveflag c;
code u> ( n1 n2 -- f ) compare u> leaveflag c;
code u<= ( n1 n2 -- f ) compare u<= leaveflag c;
code u< ( n1 n2 -- f ) compare u< leaveflag c;
code u>= ( n1 n2 -- f ) compare u>= leaveflag c;
code >= ( n1 n2 -- f ) compare >= leaveflag c;
code <= ( n1 n2 -- f ) compare <= leaveflag c;
code drop ( n -- ) sp tos pop c;
code dup ( n -- n n ) tos sp push c;
code over ( n1 n2 -- n1 n2 n1 ) tos sp push sp /n tos nget c;
code swap ( n1 n2 -- n2 n1 )
code rot ( n1 n2 n3 -- n2 n3 n1 )
code -rot ( n1 n2 n3 -- n3 n1 n2 )
code 2drop ( d -- ) sp ainc sp tos pop c;
code 2over ( d1 d2 -- d1 d2 d1 )
code 2swap ( d1 d2 -- d2 d1 )
code 3drop ( n1 n2 n3 -- )
code 3dup ( n1 n2 n3 -- n1 n2 n3 n1 n2 n3 )
code pick ( nm ... n1 n0 k -- nm ... n2 n0 nk )
32\ tos 2 tos sll \ Multiply by /n
64\ tos 3 tos sllx \ Multiply by /n
sp tos tos nget \ Index into stack
code 1+ ( n1 -- n2 ) tos 1 tos add c;
code 2+ ( n1 -- n2 ) tos 2 tos add c;
code 1- ( n1 -- n2 ) tos 1 tos sub c;
code 2- ( n1 -- n2 ) tos 2 tos sub c;
code 2/ ( n1 -- n2 ) tos 1 tos sran c;
code u2/ ( n1 -- n2 ) tos 1 tos srln c;
code 2* ( n1 -- n2 ) tos 1 tos slln c;
code 4* ( n1 -- n2 ) tos 2 tos slln c;
code 8* ( n1 -- n2 ) tos 3 tos slln c;
code d@ ( addr -- nlow nhigh )
64\ code x@ ( addr -- x ) \ doubleword aligned
code l@ ( addr -- l ) \ longword aligned
32\ code <l@ ( addr -- l ) tos 0 tos ld c;
code w@ ( addr -- w ) \ 16-bit word aligned
32\ code <w@ ( addr -- w ) tos 0 tos ldsh c; \ with sign extension
64\ code <w@ ( addr -- w )
64\ code <l@ ( addr -- l )
code unaligned-@ ( addr -- l )
tos 1 sc1 ldub scr 8 scr slln scr sc1 scr add
tos 2 sc1 ldub scr 8 scr slln scr sc1 scr add
tos 3 sc1 ldub scr 8 scr slln
64\ tos 4 sc1 ldub scr 8 scr slln scr sc1 scr add
64\ tos 5 sc1 ldub scr 8 scr slln scr sc1 scr add
64\ tos 6 sc1 ldub scr 8 scr slln scr sc1 scr add
64\ tos 7 sc1 ldub scr 8 scr slln
tos 1 sc1 ldub scr 8 scr slln scr sc1 scr add
tos 2 sc1 ldub scr 8 scr slln scr sc1 scr add
tos 3 sc1 ldub scr 8 scr slln scr sc1 tos add
code unaligned-l@ ( addr -- l )
tos 1 sc1 ldub scr 8 scr slln scr sc1 scr add
tos 2 sc1 ldub scr 8 scr slln scr sc1 scr add
tos 3 sc1 ldub scr 8 scr slln scr sc1 tos add
code unaligned-w@ ( addr -- w )
tos 1 sc1 ldub scr 8 scr slln scr sc1 tos add
\ 16-bit token version doesn't require alignment on a word boundary
( 4 L: ) mloclabel start-of-!
\ These two words are sufficient to implement a very fast IS
\ The first will be applied to USER definitions (primarily VALUEs
\ but also VARIABLEs) and the second to DEFER words.
\ Their actions are the same as the obsolete (is) used to be;
\ the main difference is that the determination of the word-type
\ of the target of the IS is made at compile-time rather than
tos sp push \ Do the (') in-line
ip 0 tos rtget \ Next token in caller
tos base tos add \ TOS <= ACF-of-next-token-in-caller
ip /token ip add \ Complete the (')
tos %g0 scr \ Do the >user in-line
( 4 B: ) start-of-! bra \ Go to the !
up scr tos add \ TOS <= user-addr of IS-target
code (is-defer) ( acf -- )
tos base scr sub \ Start the token!
\t16 scr tshift-t scr srl \ SCR <= token to store
ip 0 tos rtget \ Next token in caller
ip /token ip add \ Bump past next token in caller
tos base tos add \ TOS <= ACF of next token
tos %g0 sc1 \ Do the >user in-line
up sc1 tos add \ TOS <= user-addr of IS-target
scr tos \ Complete the token!
\t32 lput ( ???XXX tput )
code d! ( n-low n-high addr -- )
64\ code x! ( x addr -- )
code unaligned-d! ( d addr -- )
64\ scr tos 1 /n* 7 + stb
64\ scr 8 scr srln scr tos 1 /n* 6 + stb
64\ scr 8 scr srln scr tos 1 /n* 5 + stb
64\ scr 8 scr srln scr tos 1 /n* 4 + stb
scr 8 scr srln scr tos 1 /n* 2 + stb
scr 8 scr srln scr tos 1 /n* 1 + stb
scr 8 scr srln scr tos 1 /n* 0 + stb
64\ scr 8 scr srln scr tos 6 stb
64\ scr 8 scr srln scr tos 5 stb
64\ scr 8 scr srln scr tos 4 stb
scr 8 scr srln scr tos 2 stb
scr 8 scr srln scr tos 1 stb
scr 8 scr srln scr tos 0 stb
code unaligned-! ( n addr -- )
64\ scr 8 scr srln scr tos 6 stb
64\ scr 8 scr srln scr tos 5 stb
64\ scr 8 scr srln scr tos 4 stb
scr 8 scr srln scr tos 2 stb
scr 8 scr srln scr tos 1 stb
scr 8 scr srln scr tos 0 stb
scr 8 scr srln scr tos 2 stb
scr 8 scr srln scr tos 1 stb
scr 8 scr srln scr tos 0 stb
\ In some versions, be-l, needs to set a swap bit
: be-l, ( l -- ) here /l allot be-l! ;
code unaligned-l! ( n addr -- )
scr 8 scr srln scr tos 2 stb
scr 8 scr srln scr tos 1 stb
scr 8 scr srln scr tos 0 stb
code unaligned-w! ( w addr -- )
tos /n sc1 lduh tos /n 2 + scr lduh sc1 10 sc1 slln
64\ scr sc1 sc1 add tos /n 4 + scr lduh sc1 10 sc1 slln
64\ scr sc1 sc1 add tos /n 6 + scr lduh sc1 10 sc1 slln
tos 0 sc1 lduh tos 2 scr lduh sc1 10 sc1 slln
64\ scr sc1 sc1 add tos 4 scr lduh sc1 10 sc1 slln
64\ scr sc1 sc1 add tos 6 scr lduh sc1 10 sc1 slln
64\ scr tos 6 sth scr 10 scr srln
64\ scr tos 4 sth scr 10 scr srln
scr tos 2 sth scr 10 scr srln
64\ scr tos /n 6 + sth scr 10 scr srln
64\ scr tos /n 4 + sth scr 10 scr srln
scr tos /n 2 + sth scr 10 scr srln
\ code fill ( start-addr count char -- )
\ sp 0 /n* scr nget \ count in scr
\ sp 1 /n* sc1 nget \ start in sc1
code fill ( start-addr count char -- )
sp 0 /n* scr nget \ scr = count
>= if \ Enough to bother optimizing?
sp 1 /n* sc1 nget \ ( delay) sc1 = addr
\ Store stray bytes at top of range
scr sc1 sc2 add \ Last+1 byte location in range
sc2 3 sc3 andcc \ Count - # extra bytes at top of range (0-3)
scr sc3 scr sub \ Adjust main counter for later
0 F: bra \ Jump to the until branch
sc2 3 sc2 andn \ (delay) Starting adr at top (X X X 0|4)
tos sc2 sc3 stb \ Store data byte
sc3 1 sc3 subcc \ (delay)
\ Fill sc4-sc5 pair with repeated data bytes
tos ff sc4 and \ Mask all but desired byte
sc4 sc2 sc4 or \ sc4 = 0000abab
sc4 sc2 sc4 or \ sc4 = abababab
\ Store bulk of data, as 32-bit words (4 bytes at a time)
\ Guaranteed to execute at least once
scr 4 scr subcc \ Pre-subtract count
0 F: bra \ Jump to the until branch
sc1 4 sc3 add \ (delay) Pre-add starting address
sc4 sc3 scr st \ Store sc4 data (4 bytes)
scr 4 scr subcc \ (delay)
scr 8 scr add \ Restore correct remaining count
\ Store the few remaining bytes at bottom of range
0 F: bra \ Jump to the until branch
scr 0 %g0 subcc \ (delay)
tos sc1 scr stb \ Store data byte
scr 1 scr subcc \ (delay)
sp 2 /n* tos nget \ Remove 3 items off of stack
code noop ( -- ) inhibit-delay c;
32\ code n->l ( n.unsigned -- l ) inhibit-delay c;
64\ code n->l ( n.unsigned -- l ) tos 0 tos srl c;
: s>d ( n -- d ) dup 0< ; \ Depends on true=-1, false=0
code wbsplit ( l -- b.low b.high )
code bwjoin ( b.low b.high -- w )
code lwsplit ( l -- w.low w.high ) \ split a long into two words
code wljoin ( w.low w.high -- l )
scr 10 scr sll \ Throw away any high order bits in w.low
64\ code xlsplit ( x -- l.lo l.hi )
64\ tos 0 scr srl \ Clear high order 32 bits
64\ code lxjoin ( l.lo l.hi -- x )
64\ scr 0 scr srl \ Clear high order 32 bits
code ca+ ( addr index -- addr+index*/c )
code wa+ ( addr index -- addr+index*/w )
code la+ ( addr index -- addr+index*/l )
64\ code xa+ ( addr index -- addr+index*/x )
code na+ ( addr index -- addr+index*/n )
16\ tos 1 tos slln \ Multiply by /n
32\ tos 2 tos slln \ Multiply by /n
64\ tos 3 tos slln \ Multiply by /n
code ta+ ( addr index -- addr+index*/t )
code ca1+ ( addr -- addr+/w ) tos /c tos add c;
code char+ ( addr -- addr+/w ) tos /c tos add c;
code wa1+ ( addr -- addr+/w ) tos /w tos add c;
code la1+ ( addr -- addr+/l ) tos /l tos add c;
64\ code xa1+ ( addr -- addr+/x ) tos /x tos add c;
code na1+ ( addr -- addr+/n ) tos /n tos add c;
code cell+ ( addr -- addr+/n ) tos /n tos add c;
code ta1+ ( addr -- addr+/token ) tos /token tos add c;
code /c* ( n -- n*/c ) inhibit-delay c;
code chars ( n -- n*/c ) inhibit-delay c;
code /w* ( n -- n*/w ) tos 1 tos slln c;
code /l* ( n -- n*/l ) tos 2 tos slln c;
code /x* ( n -- n*/x ) tos 3 tos slln c;
16\ code /n* ( n -- n*/n ) tos 1 tos slln c; \ Multiply by /n
32\ code /n* ( n -- n*/n ) tos 2 tos slln c; \ Multiply by /n
64\ code /n* ( n -- n*/n ) tos 3 tos slln c; \ Multiply by /n
16\ code cells ( n -- n*/n ) tos 1 tos slln c; \ Multiply by /n
32\ code cells ( n -- n*/n ) tos 2 tos slln c; \ Multiply by /n
64\ code cells ( n -- n*/n ) tos 3 tos slln c; \ Multiply by /n
code upc ( char -- upper-case-char )
tos ascii A ascii a - tos add
code lcc ( char -- lower-case-char )
tos ascii a ascii A - tos add
\ string compare - case sensitive
code comp ( addr1 addr2 len -- -1 | 0 | 1 )
sp 0 /n* scr nget \ addr2 in scr
sp 1 /n* sc1 nget \ addr1 is sc1
0 F: bra \ jump to the subcc instruction
sc1 0 sc2 ldub \ Delay slot
\ string compare - case insensitive
code caps-comp ( addr1 addr2 len -- -1 | 0 | 1 )
sp 0 /n* scr nget \ addr2 in scr
sp 1 /n* sc1 nget \ addr1 is sc1
0 F: bra \ jump to the subcc instruction
sc2 ascii z cmp \ Delay slot
sc2 ascii A ascii a - sc2 add
sc3 ascii z cmp \ Delay slot
sc3 ascii A ascii a - sc3 add
sc1 0 sc2 ldub \ Delay slot
code pack ( str-addr len to -- to )
sp sc1 pop \ sc1 is "from"; tos is "to"
scr ff scr and \ Never store more than 257 bytes
scr tos 0 stb \ Place length byte
tos 1 tos add \ Offset "to" by 1 to skip past the length byte
%g0 tos scr stb \ Put a null byte at the end
0 F: bra \ jump to the until branch
scr 1 scr subcc \ Delay slot
sc1 scr sc2 ldub \ Delay slot
tos 1 tos sub \ Fix "to" to point to the length byte
\ Modifies caller's ip to skip over an in-line string
code skipstr ( -- addr len)
rp 0 scr nget \ Get string address in scr
scr 0 tos ldub \ Get length byte in tos
scr 1 scr add \ Address of data bytes
scr sp 0 /n* nput \ Put addr on stack
\ Now we have to skip the string
scr tos scr add \ Scr now points past the last data byte
scr #talign scr add \ Round up to token boundary + null byte
scr rp 0 nput \ Put the modified ip back
ip 0 tos ldub \ Get length byte in tos
ip 1 ip add \ Address of data bytes
ip sp 0 nput \ Put addr on stack
\ Now we have to skip the string
ip tos ip add \ ip now points past the last data byte
ip #talign ip add \ Round up to a token boundary, plus null byte
code count ( addr -- addr+1 len )
code between ( n min max -- f )
code within ( n1 min max+1 -- f )
code bounds ( adr len -- adr+len adr )
sc1 tos sc2 add \ adr+len
code origin+ ( n -- adr )
code origin- ( n -- adr )
tos 0 iflush \ This may cause a trap on MP machines
\ : instruction! ( bits adr -- )
code instruction! ( bits adr -- )
tos 0 iflush \ This may cause a trap on MP machines
: instruction, ( opcode -- )
here /l allot instruction!
\ ---- Support words for the incremental compiler
\ Create constants to represent the instructions that go into the
\ delay-slots of the code-fields of various definition-types.
\ We can use the assembler itself to construct the instruction.
\ This is more efficient and accurate than using literal numerics,
\ and will also be handy in determining definition-types.
\ Because constant is not yet properly defined, we have to use the
\ assembler to create the code-field of a constant definition-type.
\ This turns out to be not too bad, because we need the assembler anyway...
\ Integer value of the instruction that goes into the delay-slot
\ after the call in: create variable user value constant
\ and in the doesclause of a defining word that uses does>
\ The instruction itself:
\ Decrements the Stack Pointer.
\dtc sp adec \ Execute this in the delay slot
\dtc 64\ 0 l, \ High-half of longword constant for 64-bit platforms
\dtc sp adec \ This is the constant! = 8e21e00 /n or
\itc label dec-sp-instr #align-t negate allot-t \ Kind of suckey,
\ \itc code-field: dec-sp-instr \ Tried this instead; it failed BIG TIME!
\itc 64\ 0 l, \ High-half of longword constant for 64-bit platforms
\itc sp adec \ This is the constant! = 8e21e00 /n or
\dtc \ Integer value of the instruction that goes into the delay-slot
\dtc \ after the call in the CF of a word defined by : (colon).
\dtc \ The instruction itself:
\dtc \ Decrements the Return-Stack Pointer.
\dtc sp adec \ Execute this in the delay slot
\dtc 64\ 0 l, \ High-half of longword constant for 64-bit platforms
\dtc rp adec \ This is the constant! = 8c21a000 /n or
\dtc \ Integer value of the instruction that goes into the delay-slot
\dtc \ after the call in the CF of a child word of a does> definer
\dtc \ or in the CF of an action: of a word defined with used .
\dtc \ The instruction itself:
\dtc \ Adds 8 to the PC in %o7, yielding the PFA, which goes into scr
\dtc sp adec \ Execute this in the delay slot
\dtc 64\ 0 l, \ High-half of longword constant for 64-bit platforms
\dtc apf scr add \ This is the constant! = a003e008
\dtc \ Integer value of the instruction that goes into the delay-slot
\dtc \ after the call in the CF of a defer word.
\dtc \ The instruction itself:
\dtc \ Adds 8 to the PC in %o7, yielding the PFA, and loads the
\dtc \ contents of that location (i.e., the first Parameter) into scr
\dtc code param>scr-instr
\dtc sp adec \ Execute this in the delay slot
\dtc 64\ 0 l, \ High-half of longword constant for 64-bit platforms
\dtc apf scr ld \ This is the constant! = e003e008
\ Prepare the 30-bit-wide longword-offset for a call or branch instruction
: >offset-30 ( target-addr where -- longword-offset )
\ Put a call instruction to target-addr at where
: put-call ( target-addr where -- )
tuck >offset-30 ( where longword-offset )
4000.0000 or ( where call-instruction )
\ Put a branch instruction to target-addr at where
: put-branch ( target-addr where -- )
tuck >offset-30 ( where longword-offset )
3f.ffff and ( where branch-offset )
3080.0000 or ( where branch-instruction )
\ Replace the delay slot of the previous code field
: set-delay-slot ( delay-instruction -- ) here /l - instruction! ;
: place-call ( action-adr -- )
origin+ acf-align here /l 2* allot put-call
dec-sp-instr set-delay-slot \ sp adec
\ Place the "standard" code field, with a "sp /n sp sub" instruction
: place-cf ( action-adr -- )
\itc origin+ acf-align token,
\itc \t32 here ta1+ aligned origin -
\itc \t16 [ tshift-t 4 <> ] [if] here ta1+ aligned origin - [else] docode [then]
: >code ( acf-of-code-word -- address-of-start-of-machine-code )
\dtc : code? ( acf -- f ) \ True if the acf is for a code word
\dtc c@ h# c0 and h# 40 <> \ Most non-code words start with a call instr.
\itc \t16 tshift-t 4 <> [if]
\itc \t16 : code? ( acf -- f )
\itc \t16 dup token@ swap 2dup 2 + = >r 4 + = r> or
\itc \t16 : code? ( acf -- f )
\itc \t16 token@ origin- docode =
\t32 e006e000 instruction, \ ld [%i3], %l0
\t32 81c40002 instruction, \ jmp %l0, %g2, %g0
\t32 [ b606e000 /token or ]
\t32 literal instruction, \ add %i3, /token, %i3
\t16 81c0.e000 instruction, \ jmp %g3, 0, %g0
\t16 8000.0000 instruction, \ add %g0, %g0, %g0
\ The "word type" is a number that distinguishes one type of
\ word from another. This is highly implementation-dependent.
\ For the SPARC implementation, the magic number returned by
\ word-type is the offset of the action code from the origin
\itc \ Indicate whether the given location is a call instruction
\itc \ and, if so, return the target address
\itc : call-placed? ( acf -- addr true | false )
\itc dup l@ dup c000.0000 and 4000.0000 = tuck if
\itc 2 << l->n rot + swap
: word-type ( acf -- word-type )
: create-cf ( -- ) docreate place-cf ;
[ifdef] in-dictionary-variables
: variable-cf ( -- ) dovariable place-cf ;
: place-does ( -- ) dodoes place-call ;
\ Ip is assumed to point to (;code . flag is true if
\ the code at ip is a does> clause as opposed to a ;code clause.
: does-ip? ( ip -- ip' flag )
dup token@ ['] (does>) = ( ip flag )
if ta1+ acf-aligned la1+ la1+ true else ta1+ acf-aligned false then
: put-cf ( action-clause-addr where -- )
\dtc tuck put-call ( where )
\dtc pfa>scr-instr swap la1+ instruction! \ apf scr add
\ used sets the code field of the most-recently-defined word
\ so that it executes the code at action-clause-addr
: used ( action-clause-addr -- ) lastacf put-cf ;
\ Indicate whether the given address has the code-field of a does-clause.
\ (I.e., the call to dodoes).
\ Leave the address, return a flag.
: does-clause? ( addr -- addr flag )
dup la1+ l@ dec-sp-instr = if
dup \ Delay-slot instruction is right...
\ Indicate whether given ACF is of a word that was defined with
\ does> . If so, return the does-cfa under the true.
: does-cf? ( possible-acf -- does-cfa true | false )
\dtc \ Possible valid child word of a does> definer?
\dtc dup la1+ l@ pfa>scr-instr = if \ apf scr add
\dtc \ Delay-slot instruction is right...
word-type \ Possible address of the does-clause
does-clause? ?dup nip exit
\ Need this to make headerless work
\dtc dec-rp-instr set-delay-slot \ rp adec
: colon-cf? ( possible-acf -- flag )
\dtc dup word-type docolon origin+ = swap
\dtc la1+ l@ dec-rp-instr = and \ rp adec
\itc token@ ['] here token@ =
: user-cf ( -- ) douser place-cf ;
: value-cf ( -- ) dovalue place-cf ;
: constant-cf ( -- ) doconstant place-cf ;
\dtc param>scr-instr set-delay-slot \ apf scr ld
\ Indicate whether the word whose ACF is given
\ was defined with defer .
word-type dodefer origin+ =
\dtc swap la1+ l@ param>scr-instr = and \ apf scr ld
: 2constant-cf ( -- ) do2constant place-cf ;
: branch! ( offset where -- )
: branch@ ( where -- offset )
\ >target depends on the way that branches are compiled
: >target ( ip-of-branch-instruction -- target ) ta1+ dup branch@ + ;
\ ---- More ordinary Forth words.
: a@ ( adr -- adr' ) @ origin+ ;
: a! ( adr1 adr2 -- ) swap origin- swap ! ;
\t16 tos 0 tos lduh tos tshift-t tos sll
\ XX 64\ \t32 tos /l scr ld
\ XX 64\ \t32 tos tos lget
\ XX 64\ \t32 tos h# 20 tos sllx
\ XX 64\ \t32 tos scr tos or
\t16 scr tshift-t scr srl
\ XX 64\ \t32 scr tos /l st
\ XX 64\ \t32 scr h# 20 scr srlx
: a, ( adr -- ) here /a allot a! ;
code token@ ( addr -- cfa )
code token! ( cfa addr -- )
\t16 scr tshift-t scr srl
\t32 scr tos lput ( ???XXX tput )
: token, ( cfa -- ) here /token allot token! ;
: !null-link ( adr -- ) null swap link! ;
: !null-token ( adr -- ) null swap token! ;
code non-null? ( link -- false | link true )
false scr move \ Delay slot
: get-token? ( adr -- false | acf true ) token@ non-null? ;
: another-link? ( adr -- false | link true ) link@ non-null? ;
code body> ( pfa -- cfa )
code >body ( cfa -- pfa )
\ Move to a machine alignment boundary.
\ SPARC requires alignment on 32-bit boundaries, but we only require
\ 16-bit alignment in the 16-bit token version, using halfword memory
\ accesses to make this work.
: round-down ( adr granularity -- adr' ) 1- invert and ;
: round-up ( adr granularity -- adr' ) 1- tuck + swap invert and ;
: (align) ( size granularity -- )
1- begin dup here and while 0 c, repeat drop
: aligned ( adr -- adr' ) 3 + -4 and ;
code acf-aligned ( adr -- adr' )
\t16 1 tshift-t << 1 - scr move
: acf-align ( -- ) #acf-align (align) here 'lastacf token! ;
: /mod ( dividend divisor -- remainder quotient )
\ Check if either factor is negative
\ Both factors not non-negative do division by:
\ Take absolute value and do unsigned division
\ Convert to truncated signed divide by:
\ if dividend is negative then negate the remainder
\ if dividend and divisor have opposite signs then negate the quotient
\ Then convert to floored signed divide by:
\ if quotient is negative and remainder is non-zero
\ add divisor to remainder and decrement quotient
2dup swap abs swap abs ( n1 n2 u1 u2) \ Absolute values
u/mod ( n1 n2 urem uqout) \ Unsigned divide
>r >r ( n1 n2) ( uquot urem)
over 0< if ( n1 n2) ( uquot urem)
r> negate >r \ Negative dividend; negate remainder
then ( n1 n2) ( uquot trem)
swap over ( n2 n1 n2) ( uquot trem)
xor 0< if ( n2) ( uquot trem)
negate ( n2 trem tquot) \ Opposite signs; negate quotient
+ ( tquot rem) \ Negative quotient & non-zero remainder
swap 1- ( rem quot) \ add divisor to rem. & decrement quot.
else \ Both factors non-negative
: / ( n1 n2 -- quot ) /mod nip ;
: mod ( n1 n2 -- rem ) /mod drop ;
\ SPARC version is dynamically relocated, so we don't need a bitmap
: clear-relocation-bits ( adr len -- ) 2drop ;