[OpenSPARC-T2-SAM] / obp / obp / pkg / fcode / sparc / fcode32.fth

\ ========== Copyright Header Begin ==========================================
\ 
\ Hypervisor Software File: fcode32.fth
\ 
\ Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
\ 
\  - Do no alter or remove copyright notices
\ 
\  - Redistribution and use of this software in source and binary forms, with 
\    or without modification, are permitted provided that the following 
\    conditions are met: 
\ 
\  - Redistribution of source code must retain the above copyright notice, 
\    this list of conditions and the following disclaimer.
\ 
\  - Redistribution in binary form must reproduce the above copyright notice,
\    this list of conditions and the following disclaimer in the
\    documentation and/or other materials provided with the distribution. 
\ 
\    Neither the name of Sun Microsystems, Inc. or the names of contributors 
\ may be used to endorse or promote products derived from this software 
\ without specific prior written permission. 
\ 
\     This software is provided "AS IS," without a warranty of any kind. 
\ ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, 
\ INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A 
\ PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN 
\ MICROSYSTEMS, INC. ("SUN") AND ITS LICENSORS SHALL NOT BE LIABLE FOR 
\ ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR 
\ DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. IN NO EVENT WILL SUN 
\ OR ITS LICENSORS BE LIABLE FOR ANY LOST REVENUE, PROFIT OR DATA, OR 
\ FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE 
\ DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY, 
\ ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, EVEN IF 
\ SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
\ 
\ You acknowledge that this software is not designed, licensed or
\ intended for use in the design, construction, operation or maintenance of
\ any nuclear facility. 
\ 
\ ========== Copyright Header End ============================================
id: @(#)fcode32.fth 1.15 06/10/26
purpose: 
copyright: Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
copyright: Use is subject to license terms.

headers

code 2l->n ( l1 l2 -- x1 x2 )
   tos 0  tos   sra
   sp  0  scr   ldx
   scr 0  scr   sra
   scr    sp 0  stx
c;

: l-$number ( adr len -- true | n false )  swap n->l swap  $number ;

: l-move ( adr1 adr2 cnt -- )  rot n->l rot n->l rot n->l  move  ;
: l-fill ( adr cnt byte -- )  rot n->l rot n->l rot  fill  ;

: lcpeek ( adr -- { byte true } | false )  n->l  cpeek  ;
: lwpeek ( adr -- { byte true } | false )  n->l  wpeek  ;
: llpeek ( adr -- { byte true } | false )  n->l  lpeek  ;

: lcpoke ( byte adr -- ok? )  n->l  cpoke  ;
: lwpoke ( byte adr -- ok? )  n->l  wpoke  ;
: llpoke ( byte adr -- ok? )  n->l  lpoke  ;

: lb?branch  ( [ <mark ] -- [ >mark ] )

   \ New feature of IEEE 1275
   state @ 0=  if  ( flag )
      l->n  if  get-offset drop  else  skip-bytes  then
      exit
   then

   compile l->n

   get-offset 0<  if  ( )
      \ The get-backward-mark is needed in case of the following valid
      \ ANS Forth construct:    BEGIN  .. WHILE .. UNTIL .. THEN
      get-backward-mark  [compile] until
   else
      [compile] if
   then
; immediate

code l+! ( n addr -- )
      tos 0    tos  srl
      sp       scr  get
\dtc  tos      sc1  lget

\itc  tos 0    sc1  lduh
\itc  tos 2    sc2  lduh
\itc  sc1 10   sc1  sll
\itc  sc1 sc2  sc1  add
      bubble
      sc1 scr  sc1  add
\dtc   sc1      tos   lput
\itc   sc1  tos 2     sth
\itc   sc1 10   sc1   srl
\itc   sc1  tos 0     sth
   sp  1 /n*  tos  nget
   sp  2 /n*  sp   add
c;

code l>>a     ( n1 cnt -- n2 )
   sp       scr  pop
   scr tos  tos  sra
c;
code lrshift  ( n1 cnt -- n2 )
   sp  scr       pop
   scr tos  tos  srl
c;

: lb(of)     ( marks -- marks )
   drop-offset  +level compile 2l->n -level  [compile] of
; immediate
: lb(do)     ( -- )
   drop-offset  +level compile 2l->n -level  [compile]  do
; immediate
: lb(?do)    ( -- )
   drop-offset  +level compile 2l->n -level  [compile]  ?do
; immediate
: lb(+loop)  ( -- )
   drop-offset  +level compile l->n -level  [compile] +loop
; immediate

transient
also assembler definitions
: compare
   sp  scr  pop
   scr tos  cmp
;
: leaveflag  ( condition -- )
\ macro to assemble code to leave a flag on the stack
   if  ,%icc
   0  tos  move   \ Delay slot
      -1 tos move
   then
;
previous definitions
resident
warning @ warning off
\ Note: l0= and l= clash with the link defs in kernport.fth
code l0=  ( n -- f )  tos test  0=  leaveflag c;
code l0<> ( n -- f )  tos test  0<> leaveflag c;
code l0<  ( n -- f )  tos test  0<  leaveflag c;
code l0<= ( n -- f )  tos test  <=  leaveflag c;
code l0>  ( n -- f )  tos test  >   leaveflag c;
code l0>= ( n -- f )  tos test  0>= leaveflag c;

code l<   ( n1 n2 -- f )  compare <   leaveflag c;
code l>   ( n1 n2 -- f )  compare >   leaveflag c;
code l=   ( n1 n2 -- f )  compare 0=  leaveflag c;
code l<>  ( n1 n2 -- f )  compare <>  leaveflag c;
code lu>  ( n1 n2 -- f )  compare u>  leaveflag c;
code lu<= ( n1 n2 -- f )  compare u<= leaveflag c;
code lu<  ( n1 n2 -- f )  compare u<  leaveflag c;
code lu>= ( n1 n2 -- f )  compare u>= leaveflag c;
code l>=  ( n1 n2 -- f )  compare >=  leaveflag c;
code l<=  ( n1 n2 -- f )  compare <=  leaveflag c;
warning !

: l-between ( n1 n2 n3 -- flag )  >r over l<= swap r> l<= and  ;
: l-within  ( n1 n2 n3 -- flag )  over - >r - r> lu<  ;
: l-max     ( n1 n2 -- max )  2dup l< if  swap  then  drop  ;
: l-min     ( n1 n2 -- min )  2dup l> if  swap  then  drop  ;
: l-abs     ( n -- |n| )  dup l0< if  negate  then  ;

code l-@   ( addr -- n )
      tos 0    tos  srl
\dtc  tos 0     tos  ld
\itc  tos 2     scr  lduh   tos 0   tos  lduh
\itc  tos h# 10 tos  sll    scr tos tos  add
c;

code l-!   ( n addr -- )
    tos 0    tos  srl
    sp  0  scr  nget
\dtc   scr     tos 0   st
\itc   scr     tos 2  sth
\itc   scr 10  scr    srl
\itc   scr     tos 0  sth

   sp 1 /n*  tos  nget
   sp 2 /n*  sp   add
c;

code l2@  ( addr -- d )
    tos 0    tos  srl
    tos /n   sc1  lduh
    tos /n 2 +   scr  lduh  sc1 10   sc1  sll   scr sc1  sc1  add
    tos /n 4 +   scr  lduh  sc1 10   sc1  sllx  scr sc1  sc1  add
    tos /n 6 +   scr  lduh  sc1 10   sc1  sllx  scr sc1  scr  add

    scr      sp   push

    tos  0  sc1  lduh
    tos 2   scr  lduh  sc1 10   sc1  sll  scr sc1  sc1  add
    tos 4   scr  lduh  sc1 10   sc1  sllx  scr sc1  sc1  add
    tos 6   scr  lduh  sc1 10   sc1  sllx

    scr  sc1  tos  add
c;
code l2!  ( d addr -- )
    tos 0    tos  srl
    sp  0   scr    nget
    bubble

    scr   tos 6  sth  scr 10  scr  srlx
    scr   tos 4  sth  scr 10  scr  srlx
    scr   tos 2  sth  scr 10  scr  srl
    scr   tos 0  sth

    sp  /n  scr    nget

    bubble

    scr   tos /n 6 + sth  scr 10  scr  srlx
    scr   tos /n 4 + sth  scr 10  scr  srlx
    scr   tos /n 2 + sth  scr 10  scr  srl
    scr   tos /n 0 + sth

    sp  2 /n*   tos    nget
    sp  3 /n*   sp     add
c;

code ll@  ( addr -- l ) \ longword aligned
    tos 0    tos  srl
    tos tos lget
c;
code ll!  ( n addr -- )
    tos 0    tos  srl
    sp  0  scr  nget
    bubble
    scr   tos 0 st
    sp  1 /n*  tos  nget
    sp  2 /n*  sp   add
c;

code l<w@ ( addr -- w )
    tos 0  tos  srl
    tos 0  tos  ldsh
    tos 0  tos  sra
c;

code lw@  ( addr -- w ) \ 16-bit word aligned
    tos 0    tos  srl
    tos 0  tos  lduh
c;

code lw!  ( w addr -- )
   tos 0    tos  srl
   sp  0  scr  nget
   bubble
   scr   tos 0 sth
   sp 1 /n*  tos  nget
   sp 2 /n*  sp   add
c;

code lc@  ( addr -- c )
    tos 0    tos  srl
    tos 0  tos  ldub
c;
code lc!  ( c addr -- )
    tos 0    tos  srl
    sp  0  scr  nget
    bubble
    scr   tos 0 stb
    sp  1 /n*  tos  nget
    sp  2 /n*  sp   add
c;

code lon ( addr -- )
      tos  0 tos   srl
      -1   scr     move
\dtc  scr  tos 0   st
\itc  scr  tos 0   sth
\itc  scr  tos 2   sth
       sp  tos     pop
c;
code loff ( addr -- )
       tos  0 tos  srl
\dtc   %g0  tos 0  st
\itc   %g0  tos 0  sth
\itc   %g0  tos 2  sth
        sp  tos    pop
c;

: lbase  ( -- adr ) +level  compile base   compile la1+  -level  ; immediate
: l#out  ( -- adr ) +level  compile #out   compile la1+  -level  ; immediate
: l#line ( -- adr ) +level  compile #line  compile la1+  -level  ; immediate
: lspan  ( -- adr ) +level  compile span   compile la1+  -level  ; immediate

code lrl@  ( addr -- l ) \ longword aligned
    tos 0    tos  srl
    tos tos lget
c;
code lrl!  ( n addr -- )
    tos 0    tos  srl
    sp  0  scr  nget
    bubble
    scr   tos 0 st
    sp  1 /n*  tos  nget
    sp  2 /n*  sp   add
c;

code lrw@  ( addr -- w ) \ 16-bit word aligned
    tos 0    tos  srl
    tos 0  tos  lduh
c;

code lrw!  ( w addr -- )
   tos 0    tos  srl
   sp  0  scr  nget
   bubble
   scr   tos 0 sth
   sp 1 /n*  tos  nget
   sp 2 /n*  sp   add
c;

code lrb@  ( addr -- c )
    tos 0    tos  srl
    tos 0  tos  ldub
c;
code lrb!  ( c addr -- )
    tos 0    tos  srl
    sp  0  scr  nget
    bubble
    scr   tos 0 stb
    sp  1 /n*  tos  nget
    sp  2 /n*  sp   add
c;

\ Double word stuff we need to implement with 32-bit only math.
code ld+  ( x1 x2 -- x3 )
   sp 0 /n*  sc1   nget		\ x2.low
   sp 2 /n*  sc3   nget		\ x1.low
   sp 1 /n*  sc2   nget		\ x1.high
   sp 2 /n*  sp    add		\ Pop args
   sc3 sc1  sc1    addcc	\ x3.low
   sc2 tos  tos    addc		\ x3.high
   sc1      sp 0   nput		\ Push result (x3.high already in tos)
c;
code ld-  ( x1 x2 -- x3 )
   sp 0 /n*  sc1   nget		\ x2.low
   sp 2 /n*  sc3   nget		\ x1.low
   sp 1 /n*  sc2   nget		\ x1.high
   sp 2 /n*  sp    add		\ Pop args
   sc3 sc1  sc1    subcc	\ x3.low
   sc2 tos  tos    subc		\ x3.high
   sc1      sp 0   nput		\ Push result (x3.high already in tos)
c;

: l-u/mod ( u1 u2 -- u.rem u.quot )	n->l swap n->l swap u/mod ;
: l-/mod  ( n1 n2 -- n.rem n.quot )	l->n swap l->n swap /mod ;

headerless

\ The following is from obp/fm/kernel/dmuldiv.fth and obp/fm/kernel/dmul.fth:

/l 4 * constant bits/half-l
: l-scale-up    ( n -- h )  bits/half-l <<  ;
: l-scale-down  ( n -- h )  bits/half-l >>  ;
alias l-split-halves lwsplit
: l-half*  ( h1 h2 -- low<< high )  * l-split-halves  swap l-scale-up swap  ;

headers

\ Implement:
\
\ AB * CD = BD + (BC + DA)<<bits/half-cell + AC<<bits/cell
\
\ Where A, B, C, D are half "l" (or 16 bit in this case) values.

: lum*	( n1 n2 -- xlo xhi )
   l-split-halves   rot l-split-halves	( b a d c )

   \ Easy case - high halves are both 0, so result is just BD
   2 pick over or  0=  if  drop nip * 0	 exit  then

   3 pick 2 pick  * 0 2>r		( b a d c )  ( r: d.low )

   \ Check for C = 0 and optimize if so
   dup	if				( b a d c )  ( r: d.low )
      \ C is not zero, so compute and add BC<<
      3 pick  over l-half*
      2r> ld+ 2>r			( b a d c )  ( r: d.intermed )

      \ We are done with B
      2swap nip				( d c a )
      \ Check for A = 0 and optimize if so
      dup  if				( d c a )
	 \ A is not zero, so compute and add DA<< and AC<<<
	 rot over l-half*		( c a da.low da.high )
	 2r> ld+ 2>r			( c a ) ( r: d.intermed' )
	 * 0 swap 2r> ld+
      else
	 \ A is zero, so we are finished
	 3drop	2r>
      then
   else
      \ C is zero, so all we have to do is compute and add DA<<
      drop rot drop			( a d )	 ( r: d.low )
      l-half*				( low1 high1 )
      2r> ld+
   then
   n->l
;
\ This is the elementary school long-division algorithm, base 2^^16 (on a
\ 32-bit system) or 2^32 (on a 64-bit system).
\ It depends on the assumption that "/" can accurately divide a single-cell
\ (i.e. 32 or 64 bit) number by a half-cell (i.e. 16 or 32 bit) number.
\ Each "digit" is a half-cell number; thus the dividend is a 4-digit
\ number "ABCD" and the divisor is a 2-digit number "EF".

\ It would be interesting to compare the performance of this to a
\ "bit-banging" non-restoring division loop.
: l-um/mod  ( ud u -- urem uquot )
   2dup u>=  if                    \ Overflow; return max-uint for quotient
      0=  if  0 /  then            \ Force divide by zero trap
      2drop   0 -1  n->l exit      ( 0 max-u )
   then                            ( ud u )

   \ Split the divisor into two 16-bit "digits"
   dup l-split-halves		   ( ud u ulow uhigh )

   \ If the high "digit" of the divisor is zero, we can skip a lot
   \ of the steps.  In this case, we only have to worry about the
   \ middle two digits of the dividend in developing the quotient.
   ?dup 0=  if                     ( ud u ulow )

      \ Approximate the high digit of the quotient by dividing the "BC"
      \ digits by the "F" digit.  The answer could by low by one, but if
      \ so it will be fixed in the next step.
      2over swap l-scale-down swap l-scale-up + over /  l-scale-up
				   ( ud u ulow guess<< )

      \ Multiply the trial quotient by the divisor
      rot over lum*                ( ud ulow guess<< udtemp )

      \ Subtract the trial product from the dividend, giving the remainder
      2swap >r >r  ld-  drop       ( error )  ( r: guess<< ulow )

      \ Divide the remainder by the divisor, giving the rest of the
      \ quotient.
      dup r@ l-u/mod nip           ( error guess1 )
      r> r>                        ( error guess1 ulow guess<< )

      \ Merge the two halves of the quotient
      2 pick + >r                  ( error guess1 ulow ) ( r: uquot )

      \ Calculate the remainder
      * -  r>                      ( urem uquot )
      exit
   then                            ( ud u ulow uhigh )

   \ The high divisor digit is non-zero, so we have to deal with
   \ both digits, dividing "ABCD" by "EF".

   \ Approximate the high digit of the quotient.
   3 pick over l-u/mod nip          ( ud u ulow uhigh guess )

   \ Reduce guess by one if "E" = "A"
   dup 1 l-scale-up =  if  1-  then  ( ud u ulow uhigh guess' )

   \ Multiply the trial quotient by the divisor
   3 pick over l-scale-up lum*       ( ud u ulow uhigh guess' ud.temp )

   \ Subtract the trial product from the dividend, giving the remainder
   >r >r 2rot r> r> ld-             ( u ulow uhigh guess' d.resid )

   \ If the remainder is negative, add the divisor and reduce the trial
   \ quotient by one.  The following loop executes at most twice.
   begin  dup 0<  while            ( u ulow uhigh guess' d.resid )
      rot 1- -rot                  ( u ulow uhigh guess+ d.resid )
      4 pick l-scale-up 4 pick ld+ ( u ulow uhigh guess+ d.resid' )
   repeat                          ( u ulow uhigh guess+ +d.resid )

   \ Now we have the correct high quotient digit; save it for later
   rot l-scale-up >r                 ( u ulow uhigh +d.resid ) ( r: q.high )

   \ Repeat the above process, using the partial remainder as the
   \ dividend.  Ulow is no longer needed
   3 roll drop                     ( u uhigh +d.resid )

   \ Trial quotient digit...
   2dup l-scale-up swap l-scale-down + 3 roll l-u/mod nip
                                   ( u +d.resid guess1 ) ( r: q.high )
   dup 1 l-scale-up =  if  1-  then  ( u +d.resid guess1' )

   \ Trial product
   3 pick over lum*                ( u +d.resid guess1' d.err )

   \ New partial remainder
   rot >r ld-                       ( u d.resid' )  ( r: q.high guess1' )

   \ Adjust quotient digit until partial remainder is positive
   begin  dup 0<  while            ( u d.resid' )  ( r: q.high guess1' )
     r> 1- >r                      ( u d.resid' )  ( r: q.high guess1' )
	 \ There is no l-m+, so use "0< ld+" instead 
     2 pick dup 0< ld+		   ( u d.resid'' ) ( r: q.high guess1' )
   repeat                          ( u +d.resid )  ( r: q.high guess1' )

   \ Discard divisior and high cell of quotient (which must be zero)
   rot 2drop                       ( u.rem )

   \ Merge quotient digits
   r> r> +                         ( u.rem u.quot )
;

\ Need to fix # and #s

: l-mu/mod (s d n1 -- rem d.quot )
   >r  0  r@  l-um/mod  r>  swap  >r  l-um/mod  r>
;
: l-#   (s ud1 -- ud2 )
   base @ l-mu/mod			( nrem ud2 )
   rot     >digit  hold			( ud2 )
;
: l-#s  (s ud -- 0 0 )  begin   l-#  2dup or  0=  until  ;

transient
vocabulary fcode32
also fcode32 definitions

alias ,     l,
alias /n    /l
alias na+   la+
alias cell+ la1+
alias cells /l*
alias b?branch lb?branch
alias +!       l+!
alias >>a      l>>a
alias rshift   lrshift
alias b(of)    lb(of)
alias b(do)    lb(do)
alias b(?do)   lb(?do)
alias b(+loop) lb(+loop)
alias 0=       l0=
alias 0<>      l0<>
alias 0<       l0<
alias 0<=      l0<=
alias 0>       l0>
alias 0>=      l0>=
alias <        l<
alias >        l>
alias =        l=
alias <>       l<>
alias u>       lu>
alias u<=      lu<=
alias u<       lu<
alias u>=      lu>=
alias >=       l>=
alias <=       l<=
alias between  l-between
alias within   l-within
alias max      l-max
alias min      l-min
alias abs      l-abs
alias @        l-@
alias !        l-!
alias 2@       l2@
alias 2!       l2!
alias l@       ll@
alias l!       ll!
alias <w@      l<w@
alias w@       lw@
alias w!       lw!
alias c@       lc@
alias c!       lc!
alias on       lon
alias off      loff
alias base     lbase
alias #out     l#out
alias #line    l#line
alias span     lspan

alias rl!      lrl!
alias rl@      lrl@
alias rw!      lrw!
alias rw@      lrw@
alias rb!      lrb!
alias rb@      lrb@

alias $number  l-$number
alias move     l-move
alias fill     l-fill
alias cpeek    lcpeek
alias wpeek    lwpeek
alias lpeek    llpeek
alias cpoke    lcpoke
alias wpoke    lwpoke
alias lpoke    llpoke

alias d+       ld+
alias d-       ld-
alias um*      lum*
alias um/mod   l-um/mod
alias u/mod    l-u/mod
alias /mod     l-/mod

alias #        l-#
alias #s       l-#s

previous definitions
resident

headerless
variable token-table0-64
variable token-table2-64
variable token-table0-32
variable token-table2-32

token-tables 0 ta+ token@ token-table0-64 token!
token-tables 0 ta+  !null-token

token-tables 2 ta+ token@ token-table2-64 token!
token-tables 2 ta+  !null-token

headers
also fcode32 definitions
fload ${BP}/pkg/fcode/primlist.fth		\ Codes for kernel primitives
fload ${BP}/pkg/fcode/sysprims-nofb.fth	\ Codes for system primitives
fload ${BP}/pkg/fcode/obsfcod2.fth
fload ${BP}/pkg/fcode/sysprm64.fth
fload ${BP}/pkg/fcode/regcodes.fth
previous definitions

token-tables 0 ta+ token@ token-table0-32 token!
token-tables 2 ta+ token@ token-table2-32 token!

headers
: fcode-32 ( -- )
   token-table0-32 token@ token-tables 0 ta+ token!
   token-table2-32 token@ token-tables 2 ta+ token!
;

: fcode-64 ( -- )
   token-table0-64 token@ token-tables 0 ta+ token!
   token-table2-64 token@ token-tables 2 ta+ token!
;

fcode-64

stand-init: Chose fcode32 mode
   fcode-32
;
Commit	Line	Data
920dae64 AT	1	\ ========== Copyright Header Begin ==========================================
	2	\
	3	\ Hypervisor Software File: fcode32.fth
	4	\
	5	\ Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
	6	\
	7	\ - Do no alter or remove copyright notices
	8	\
	9	\ - Redistribution and use of this software in source and binary forms, with
	10	\ or without modification, are permitted provided that the following
	11	\ conditions are met:
	12	\
	13	\ - Redistribution of source code must retain the above copyright notice,
	14	\ this list of conditions and the following disclaimer.
	15	\
	16	\ - Redistribution in binary form must reproduce the above copyright notice,
	17	\ this list of conditions and the following disclaimer in the
	18	\ documentation and/or other materials provided with the distribution.
	19	\
	20	\ Neither the name of Sun Microsystems, Inc. or the names of contributors
	21	\ may be used to endorse or promote products derived from this software
	22	\ without specific prior written permission.
	23	\
	24	\ This software is provided "AS IS," without a warranty of any kind.
	25	\ ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
	26	\ INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
	27	\ PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN
	28	\ MICROSYSTEMS, INC. ("SUN") AND ITS LICENSORS SHALL NOT BE LIABLE FOR
	29	\ ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
	30	\ DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. IN NO EVENT WILL SUN
	31	\ OR ITS LICENSORS BE LIABLE FOR ANY LOST REVENUE, PROFIT OR DATA, OR
	32	\ FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE
	33	\ DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY,
	34	\ ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, EVEN IF
	35	\ SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
	36	\
	37	\ You acknowledge that this software is not designed, licensed or
	38	\ intended for use in the design, construction, operation or maintenance of
	39	\ any nuclear facility.
	40	\
	41	\ ========== Copyright Header End ============================================
	42	id: @(#)fcode32.fth 1.15 06/10/26
	43	purpose:
	44	copyright: Copyright 2006 Sun Microsystems, Inc. All rights reserved.
	45	copyright: Use is subject to license terms.
	46
	47	headers
	48
	49	code 2l->n ( l1 l2 -- x1 x2 )
	50	tos 0 tos sra
	51	sp 0 scr ldx
	52	scr 0 scr sra
	53	scr sp 0 stx
	54	c;
	55
	56	: l-$number ( adr len -- true \| n false ) swap n->l swap $number ;
	57
	58	: l-move ( adr1 adr2 cnt -- ) rot n->l rot n->l rot n->l move ;
	59	: l-fill ( adr cnt byte -- ) rot n->l rot n->l rot fill ;
	60
	61	: lcpeek ( adr -- { byte true } \| false ) n->l cpeek ;
	62	: lwpeek ( adr -- { byte true } \| false ) n->l wpeek ;
	63	: llpeek ( adr -- { byte true } \| false ) n->l lpeek ;
	64
65	: lcpoke ( byte adr -- ok? ) n->l cpoke ;
66	: lwpoke ( byte adr -- ok? ) n->l wpoke ;
67	: llpoke ( byte adr -- ok? ) n->l lpoke ;
68
69	: lb?branch ( [ <mark ] -- [ >mark ] )
70
71	\ New feature of IEEE 1275
72	state @ 0= if ( flag )
73	l->n if get-offset drop else skip-bytes then
74	exit
75	then
76
77	compile l->n
78
79	get-offset 0< if ( )
80	\ The get-backward-mark is needed in case of the following valid
81	\ ANS Forth construct: BEGIN .. WHILE .. UNTIL .. THEN
82	get-backward-mark [compile] until
83	else
84	[compile] if
85	then
86	; immediate
87
88	code l+! ( n addr -- )
89	tos 0 tos srl
90	sp scr get
91	\dtc tos sc1 lget
92
93	\itc tos 0 sc1 lduh
94	\itc tos 2 sc2 lduh
95	\itc sc1 10 sc1 sll
96	\itc sc1 sc2 sc1 add
97	bubble
98	sc1 scr sc1 add
99	\dtc sc1 tos lput
100	\itc sc1 tos 2 sth
101	\itc sc1 10 sc1 srl
102	\itc sc1 tos 0 sth
103	sp 1 /n* tos nget
104	sp 2 /n* sp add
105	c;
106
107	code l>>a ( n1 cnt -- n2 )
108	sp scr pop
109	scr tos tos sra
110	c;
111	code lrshift ( n1 cnt -- n2 )
112	sp scr pop
113	scr tos tos srl
114	c;
115
116	: lb(of) ( marks -- marks )
117	drop-offset +level compile 2l->n -level [compile] of
118	; immediate
119	: lb(do) ( -- )
120	drop-offset +level compile 2l->n -level [compile] do
121	; immediate
122	: lb(?do) ( -- )
123	drop-offset +level compile 2l->n -level [compile] ?do
124	; immediate
125	: lb(+loop) ( -- )
126	drop-offset +level compile l->n -level [compile] +loop
127	; immediate
128
129	transient
130	also assembler definitions
131	: compare
132	sp scr pop
133	scr tos cmp
134	;
135	: leaveflag ( condition -- )
136	\ macro to assemble code to leave a flag on the stack
137	if ,%icc
138	0 tos move \ Delay slot
139	-1 tos move
140	then
141	;
142	previous definitions
143	resident
144	warning @ warning off
145	\ Note: l0= and l= clash with the link defs in kernport.fth
146	code l0= ( n -- f ) tos test 0= leaveflag c;
147	code l0<> ( n -- f ) tos test 0<> leaveflag c;
148	code l0< ( n -- f ) tos test 0< leaveflag c;
149	code l0<= ( n -- f ) tos test <= leaveflag c;
150	code l0> ( n -- f ) tos test > leaveflag c;
151	code l0>= ( n -- f ) tos test 0>= leaveflag c;
152
153	code l< ( n1 n2 -- f ) compare < leaveflag c;
154	code l> ( n1 n2 -- f ) compare > leaveflag c;
155	code l= ( n1 n2 -- f ) compare 0= leaveflag c;
156	code l<> ( n1 n2 -- f ) compare <> leaveflag c;
157	code lu> ( n1 n2 -- f ) compare u> leaveflag c;
158	code lu<= ( n1 n2 -- f ) compare u<= leaveflag c;
159	code lu< ( n1 n2 -- f ) compare u< leaveflag c;
160	code lu>= ( n1 n2 -- f ) compare u>= leaveflag c;
161	code l>= ( n1 n2 -- f ) compare >= leaveflag c;
162	code l<= ( n1 n2 -- f ) compare <= leaveflag c;
163	warning !
164
165	: l-between ( n1 n2 n3 -- flag ) >r over l<= swap r> l<= and ;
166	: l-within ( n1 n2 n3 -- flag ) over - >r - r> lu< ;
167	: l-max ( n1 n2 -- max ) 2dup l< if swap then drop ;
168	: l-min ( n1 n2 -- min ) 2dup l> if swap then drop ;
169	: l-abs ( n -- \|n\| ) dup l0< if negate then ;
170
171	code l-@ ( addr -- n )
172	tos 0 tos srl
173	\dtc tos 0 tos ld
174	\itc tos 2 scr lduh tos 0 tos lduh
175	\itc tos h# 10 tos sll scr tos tos add
176	c;
177
178	code l-! ( n addr -- )
179	tos 0 tos srl
180	sp 0 scr nget
181	\dtc scr tos 0 st
182	\itc scr tos 2 sth
183	\itc scr 10 scr srl
184	\itc scr tos 0 sth
185
186	sp 1 /n* tos nget
187	sp 2 /n* sp add
188	c;
189
190	code l2@ ( addr -- d )
191	tos 0 tos srl
192	tos /n sc1 lduh
193	tos /n 2 + scr lduh sc1 10 sc1 sll scr sc1 sc1 add
194	tos /n 4 + scr lduh sc1 10 sc1 sllx scr sc1 sc1 add
195	tos /n 6 + scr lduh sc1 10 sc1 sllx scr sc1 scr add
196
197	scr sp push
198
199	tos 0 sc1 lduh
200	tos 2 scr lduh sc1 10 sc1 sll scr sc1 sc1 add
201	tos 4 scr lduh sc1 10 sc1 sllx scr sc1 sc1 add
202	tos 6 scr lduh sc1 10 sc1 sllx
203
204	scr sc1 tos add
205	c;
206	code l2! ( d addr -- )
207	tos 0 tos srl
208	sp 0 scr nget
209	bubble
210
211	scr tos 6 sth scr 10 scr srlx
212	scr tos 4 sth scr 10 scr srlx
213	scr tos 2 sth scr 10 scr srl
214	scr tos 0 sth
215
216	sp /n scr nget
217
218	bubble
219
220	scr tos /n 6 + sth scr 10 scr srlx
221	scr tos /n 4 + sth scr 10 scr srlx
222	scr tos /n 2 + sth scr 10 scr srl
223	scr tos /n 0 + sth
224
225	sp 2 /n* tos nget
226	sp 3 /n* sp add
227	c;
228
229	code ll@ ( addr -- l ) \ longword aligned
230	tos 0 tos srl
231	tos tos lget
232	c;
233	code ll! ( n addr -- )
234	tos 0 tos srl
235	sp 0 scr nget
236	bubble
237	scr tos 0 st
238	sp 1 /n* tos nget
239	sp 2 /n* sp add
240	c;
241
242	code l<w@ ( addr -- w )
243	tos 0 tos srl
244	tos 0 tos ldsh
245	tos 0 tos sra
246	c;
247
248	code lw@ ( addr -- w ) \ 16-bit word aligned
249	tos 0 tos srl
250	tos 0 tos lduh
251	c;
252
253	code lw! ( w addr -- )
254	tos 0 tos srl
255	sp 0 scr nget
256	bubble
257	scr tos 0 sth
258	sp 1 /n* tos nget
259	sp 2 /n* sp add
260	c;
261
262	code lc@ ( addr -- c )
263	tos 0 tos srl
264	tos 0 tos ldub
265	c;
266	code lc! ( c addr -- )
267	tos 0 tos srl
268	sp 0 scr nget
269	bubble
270	scr tos 0 stb
271	sp 1 /n* tos nget
272	sp 2 /n* sp add
273	c;
274
275	code lon ( addr -- )
276	tos 0 tos srl
277	-1 scr move
278	\dtc scr tos 0 st
279	\itc scr tos 0 sth
280	\itc scr tos 2 sth
281	sp tos pop
282	c;
283	code loff ( addr -- )
284	tos 0 tos srl
285	\dtc %g0 tos 0 st
286	\itc %g0 tos 0 sth
287	\itc %g0 tos 2 sth
288	sp tos pop
289	c;
290
291	: lbase ( -- adr ) +level compile base compile la1+ -level ; immediate
292	: l#out ( -- adr ) +level compile #out compile la1+ -level ; immediate
293	: l#line ( -- adr ) +level compile #line compile la1+ -level ; immediate
294	: lspan ( -- adr ) +level compile span compile la1+ -level ; immediate
295
296	code lrl@ ( addr -- l ) \ longword aligned
297	tos 0 tos srl
298	tos tos lget
299	c;
300	code lrl! ( n addr -- )
301	tos 0 tos srl
302	sp 0 scr nget
303	bubble
304	scr tos 0 st
305	sp 1 /n* tos nget
306	sp 2 /n* sp add
307	c;
308
309	code lrw@ ( addr -- w ) \ 16-bit word aligned
310	tos 0 tos srl
311	tos 0 tos lduh
312	c;
313
314	code lrw! ( w addr -- )
315	tos 0 tos srl
316	sp 0 scr nget
317	bubble
318	scr tos 0 sth
319	sp 1 /n* tos nget
320	sp 2 /n* sp add
321	c;
322
323	code lrb@ ( addr -- c )
324	tos 0 tos srl
325	tos 0 tos ldub
326	c;
327	code lrb! ( c addr -- )
328	tos 0 tos srl
329	sp 0 scr nget
330	bubble
331	scr tos 0 stb
332	sp 1 /n* tos nget
333	sp 2 /n* sp add
334	c;
335
336	\ Double word stuff we need to implement with 32-bit only math.
337	code ld+ ( x1 x2 -- x3 )
338	sp 0 /n* sc1 nget \ x2.low
339	sp 2 /n* sc3 nget \ x1.low
340	sp 1 /n* sc2 nget \ x1.high
341	sp 2 /n* sp add \ Pop args
342	sc3 sc1 sc1 addcc \ x3.low
343	sc2 tos tos addc \ x3.high
344	sc1 sp 0 nput \ Push result (x3.high already in tos)
345	c;
346	code ld- ( x1 x2 -- x3 )
347	sp 0 /n* sc1 nget \ x2.low
348	sp 2 /n* sc3 nget \ x1.low
349	sp 1 /n* sc2 nget \ x1.high
350	sp 2 /n* sp add \ Pop args
351	sc3 sc1 sc1 subcc \ x3.low
352	sc2 tos tos subc \ x3.high
353	sc1 sp 0 nput \ Push result (x3.high already in tos)
354	c;
355
356	: l-u/mod ( u1 u2 -- u.rem u.quot ) n->l swap n->l swap u/mod ;
357	: l-/mod ( n1 n2 -- n.rem n.quot ) l->n swap l->n swap /mod ;
358
359	headerless
360
361	\ The following is from obp/fm/kernel/dmuldiv.fth and obp/fm/kernel/dmul.fth:
362
363	/l 4 * constant bits/half-l
364	: l-scale-up ( n -- h ) bits/half-l << ;
365	: l-scale-down ( n -- h ) bits/half-l >> ;
366	alias l-split-halves lwsplit
367	: l-half* ( h1 h2 -- low<< high ) * l-split-halves swap l-scale-up swap ;
368
369	headers
370
371	\ Implement:
372	\
373	\ AB * CD = BD + (BC + DA)<<bits/half-cell + AC<<bits/cell
374	\
375	\ Where A, B, C, D are half "l" (or 16 bit in this case) values.
376
377	: lum* ( n1 n2 -- xlo xhi )
378	l-split-halves rot l-split-halves ( b a d c )
379
380	\ Easy case - high halves are both 0, so result is just BD
381	2 pick over or 0= if drop nip * 0 exit then
382
383	3 pick 2 pick * 0 2>r ( b a d c ) ( r: d.low )
384
385	\ Check for C = 0 and optimize if so
386	dup if ( b a d c ) ( r: d.low )
387	\ C is not zero, so compute and add BC<<
388	3 pick over l-half*
389	2r> ld+ 2>r ( b a d c ) ( r: d.intermed )
390
391	\ We are done with B
392	2swap nip ( d c a )
393	\ Check for A = 0 and optimize if so
394	dup if ( d c a )
395	\ A is not zero, so compute and add DA<< and AC<<<
396	rot over l-half* ( c a da.low da.high )
397	2r> ld+ 2>r ( c a ) ( r: d.intermed' )
398	* 0 swap 2r> ld+
399	else
400	\ A is zero, so we are finished
401	3drop 2r>
402	then
403	else
404	\ C is zero, so all we have to do is compute and add DA<<
405	drop rot drop ( a d ) ( r: d.low )
406	l-half* ( low1 high1 )
407	2r> ld+
408	then
409	n->l
410	;
411	\ This is the elementary school long-division algorithm, base 2^^16 (on a
412	\ 32-bit system) or 2^32 (on a 64-bit system).
413	\ It depends on the assumption that "/" can accurately divide a single-cell
414	\ (i.e. 32 or 64 bit) number by a half-cell (i.e. 16 or 32 bit) number.
415	\ Each "digit" is a half-cell number; thus the dividend is a 4-digit
416	\ number "ABCD" and the divisor is a 2-digit number "EF".
417
418	\ It would be interesting to compare the performance of this to a
419	\ "bit-banging" non-restoring division loop.
420	: l-um/mod ( ud u -- urem uquot )
421	2dup u>= if \ Overflow; return max-uint for quotient
422	0= if 0 / then \ Force divide by zero trap
423	2drop 0 -1 n->l exit ( 0 max-u )
424	then ( ud u )
425
426	\ Split the divisor into two 16-bit "digits"
427	dup l-split-halves ( ud u ulow uhigh )
428
429	\ If the high "digit" of the divisor is zero, we can skip a lot
430	\ of the steps. In this case, we only have to worry about the
431	\ middle two digits of the dividend in developing the quotient.
432	?dup 0= if ( ud u ulow )
433
434	\ Approximate the high digit of the quotient by dividing the "BC"
435	\ digits by the "F" digit. The answer could by low by one, but if
436	\ so it will be fixed in the next step.
437	2over swap l-scale-down swap l-scale-up + over / l-scale-up
438	( ud u ulow guess<< )
439
440	\ Multiply the trial quotient by the divisor
441	rot over lum* ( ud ulow guess<< udtemp )
442
443	\ Subtract the trial product from the dividend, giving the remainder
444	2swap >r >r ld- drop ( error ) ( r: guess<< ulow )
445
446	\ Divide the remainder by the divisor, giving the rest of the
447	\ quotient.
448	dup r@ l-u/mod nip ( error guess1 )
449	r> r> ( error guess1 ulow guess<< )
450
451	\ Merge the two halves of the quotient
452	2 pick + >r ( error guess1 ulow ) ( r: uquot )
453
454	\ Calculate the remainder
455	* - r> ( urem uquot )
456	exit
457	then ( ud u ulow uhigh )
458
459	\ The high divisor digit is non-zero, so we have to deal with
460	\ both digits, dividing "ABCD" by "EF".
461
462	\ Approximate the high digit of the quotient.
463	3 pick over l-u/mod nip ( ud u ulow uhigh guess )
464
465	\ Reduce guess by one if "E" = "A"
466	dup 1 l-scale-up = if 1- then ( ud u ulow uhigh guess' )
467
468	\ Multiply the trial quotient by the divisor
469	3 pick over l-scale-up lum* ( ud u ulow uhigh guess' ud.temp )
470
471	\ Subtract the trial product from the dividend, giving the remainder
472	>r >r 2rot r> r> ld- ( u ulow uhigh guess' d.resid )
473
474	\ If the remainder is negative, add the divisor and reduce the trial
475	\ quotient by one. The following loop executes at most twice.
476	begin dup 0< while ( u ulow uhigh guess' d.resid )
477	rot 1- -rot ( u ulow uhigh guess+ d.resid )
478	4 pick l-scale-up 4 pick ld+ ( u ulow uhigh guess+ d.resid' )
479	repeat ( u ulow uhigh guess+ +d.resid )
480
481	\ Now we have the correct high quotient digit; save it for later
482	rot l-scale-up >r ( u ulow uhigh +d.resid ) ( r: q.high )
483
484	\ Repeat the above process, using the partial remainder as the
485	\ dividend. Ulow is no longer needed
486	3 roll drop ( u uhigh +d.resid )
487
488	\ Trial quotient digit...
489	2dup l-scale-up swap l-scale-down + 3 roll l-u/mod nip
490	( u +d.resid guess1 ) ( r: q.high )
491	dup 1 l-scale-up = if 1- then ( u +d.resid guess1' )
492
493	\ Trial product
494	3 pick over lum* ( u +d.resid guess1' d.err )
495
496	\ New partial remainder
497	rot >r ld- ( u d.resid' ) ( r: q.high guess1' )
498
499	\ Adjust quotient digit until partial remainder is positive
500	begin dup 0< while ( u d.resid' ) ( r: q.high guess1' )
501	r> 1- >r ( u d.resid' ) ( r: q.high guess1' )
502	\ There is no l-m+, so use "0< ld+" instead
503	2 pick dup 0< ld+ ( u d.resid'' ) ( r: q.high guess1' )
504	repeat ( u +d.resid ) ( r: q.high guess1' )
505
506	\ Discard divisior and high cell of quotient (which must be zero)
507	rot 2drop ( u.rem )
508
509	\ Merge quotient digits
510	r> r> + ( u.rem u.quot )
511	;
512
513	\ Need to fix # and #s
514
515	: l-mu/mod (s d n1 -- rem d.quot )
516	>r 0 r@ l-um/mod r> swap >r l-um/mod r>
517	;
518	: l-# (s ud1 -- ud2 )
519	base @ l-mu/mod ( nrem ud2 )
520	rot >digit hold ( ud2 )
521	;
522	: l-#s (s ud -- 0 0 ) begin l-# 2dup or 0= until ;
523
524	transient
525	vocabulary fcode32
526	also fcode32 definitions
527
528	alias , l,
529	alias /n /l
530	alias na+ la+
531	alias cell+ la1+
532	alias cells /l*
533	alias b?branch lb?branch
534	alias +! l+!
535	alias >>a l>>a
536	alias rshift lrshift
537	alias b(of) lb(of)
538	alias b(do) lb(do)
539	alias b(?do) lb(?do)
540	alias b(+loop) lb(+loop)
541	alias 0= l0=
542	alias 0<> l0<>
543	alias 0< l0<
544	alias 0<= l0<=
545	alias 0> l0>
546	alias 0>= l0>=
547	alias < l<
548	alias > l>
549	alias = l=
550	alias <> l<>
551	alias u> lu>
552	alias u<= lu<=
553	alias u< lu<
554	alias u>= lu>=
555	alias >= l>=
556	alias <= l<=
557	alias between l-between
558	alias within l-within
559	alias max l-max
560	alias min l-min
561	alias abs l-abs
562	alias @ l-@
563	alias ! l-!
564	alias 2@ l2@
565	alias 2! l2!
566	alias l@ ll@
567	alias l! ll!
568	alias <w@ l<w@
569	alias w@ lw@
570	alias w! lw!
571	alias c@ lc@
572	alias c! lc!
573	alias on lon
574	alias off loff
575	alias base lbase
576	alias #out l#out
577	alias #line l#line
578	alias span lspan
579
580	alias rl! lrl!
581	alias rl@ lrl@
582	alias rw! lrw!
583	alias rw@ lrw@
584	alias rb! lrb!
585	alias rb@ lrb@
586
587	alias $number l-$number
588	alias move l-move
589	alias fill l-fill
590	alias cpeek lcpeek
591	alias wpeek lwpeek
592	alias lpeek llpeek
593	alias cpoke lcpoke
594	alias wpoke lwpoke
595	alias lpoke llpoke
596
597	alias d+ ld+
598	alias d- ld-
599	alias um* lum*
600	alias um/mod l-um/mod
601	alias u/mod l-u/mod
602	alias /mod l-/mod
603
604	alias # l-#
605	alias #s l-#s
606
607	previous definitions
608	resident
609
610	headerless
611	variable token-table0-64
612	variable token-table2-64
613	variable token-table0-32
614	variable token-table2-32
615
616	token-tables 0 ta+ token@ token-table0-64 token!
617	token-tables 0 ta+ !null-token
618
619	token-tables 2 ta+ token@ token-table2-64 token!
620	token-tables 2 ta+ !null-token
621
622	headers
623	also fcode32 definitions
624	fload ${BP}/pkg/fcode/primlist.fth \ Codes for kernel primitives
625	fload ${BP}/pkg/fcode/sysprims-nofb.fth \ Codes for system primitives
626	fload ${BP}/pkg/fcode/obsfcod2.fth
627	fload ${BP}/pkg/fcode/sysprm64.fth
628	fload ${BP}/pkg/fcode/regcodes.fth
629	previous definitions
630
631	token-tables 0 ta+ token@ token-table0-32 token!
632	token-tables 2 ta+ token@ token-table2-32 token!
633
634	headers
635	: fcode-32 ( -- )
636	token-table0-32 token@ token-tables 0 ta+ token!
637	token-table2-32 token@ token-tables 2 ta+ token!
638	;
639
640	: fcode-64 ( -- )
641	token-table0-64 token@ token-tables 0 ta+ token!
642	token-table2-64 token@ token-tables 2 ta+ token!
643	;
644
645	fcode-64
646
647	stand-init: Chose fcode32 mode
648	fcode-32
649	;