obp/obp/pkg/fcode/sparc/fcode32.fth

\ ========== Copyright Header Begin ==========================================
\
\ Hypervisor Software File: fcode32.fth
\
\ Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
\
\  - Do no alter or remove copyright notices
\
\  - Redistribution and use of this software in source and binary forms, with
\    or without modification, are permitted provided that the following
\    conditions are met:
\
\  - Redistribution of source code must retain the above copyright notice,
\    this list of conditions and the following disclaimer.
\
\  - Redistribution in binary form must reproduce the above copyright notice,
\    this list of conditions and the following disclaimer in the
\    documentation and/or other materials provided with the distribution.
\
\    Neither the name of Sun Microsystems, Inc. or the names of contributors
\ may be used to endorse or promote products derived from this software
\ without specific prior written permission.
\
\     This software is provided "AS IS," without a warranty of any kind.
\ ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
\ INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
\ PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN
\ MICROSYSTEMS, INC. ("SUN") AND ITS LICENSORS SHALL NOT BE LIABLE FOR
\ ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
\ DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. IN NO EVENT WILL SUN
\ OR ITS LICENSORS BE LIABLE FOR ANY LOST REVENUE, PROFIT OR DATA, OR
\ FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE
\ DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY,
\ ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, EVEN IF
\ SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
\
\ You acknowledge that this software is not designed, licensed or
\ intended for use in the design, construction, operation or maintenance of
\ any nuclear facility.
\
\ ========== Copyright Header End ============================================
id: @(#)fcode32.fth 1.15 06/10/26
purpose:
copyright: Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
copyright: Use is subject to license terms.

headers

code 2l->n ( l1 l2 -- x1 x2 )
   tos 0  tos   sra
   sp  0  scr   ldx
   scr 0  scr   sra
   scr    sp 0  stx
c;

: l-$number ( adr len -- true | n false )  swap n->l swap  $number ;

: l-move ( adr1 adr2 cnt -- )  rot n->l rot n->l rot n->l  move  ;
: l-fill ( adr cnt byte -- )  rot n->l rot n->l rot  fill  ;

: lcpeek ( adr -- { byte true } | false )  n->l  cpeek  ;
: lwpeek ( adr -- { byte true } | false )  n->l  wpeek  ;
: llpeek ( adr -- { byte true } | false )  n->l  lpeek  ;

: lcpoke ( byte adr -- ok? )  n->l  cpoke  ;
: lwpoke ( byte adr -- ok? )  n->l  wpoke  ;
: llpoke ( byte adr -- ok? )  n->l  lpoke  ;

: lb?branch  ( [ <mark ] -- [ >mark ] )

   \ New feature of IEEE 1275
   state @ 0=  if  ( flag )
      l->n  if  get-offset drop  else  skip-bytes  then
      exit
   then

   compile l->n

   get-offset 0<  if  ( )
      \ The get-backward-mark is needed in case of the following valid
      \ ANS Forth construct:    BEGIN  .. WHILE .. UNTIL .. THEN
      get-backward-mark  [compile] until
   else
      [compile] if
   then
; immediate

code l+! ( n addr -- )
      tos 0    tos  srl
      sp       scr  get
\dtc  tos      sc1  lget

\itc  tos 0    sc1  lduh
\itc  tos 2    sc2  lduh
\itc  sc1 10   sc1  sll
\itc  sc1 sc2  sc1  add
      bubble
      sc1 scr  sc1  add
\dtc   sc1      tos   lput
\itc   sc1  tos 2     sth
\itc   sc1 10   sc1   srl
\itc   sc1  tos 0     sth
   sp  1 /n*  tos  nget
   sp  2 /n*  sp   add
c;

code l>>a     ( n1 cnt -- n2 )
   sp       scr  pop
   scr tos  tos  sra
c;
code lrshift  ( n1 cnt -- n2 )
   sp  scr       pop
   scr tos  tos  srl
c;

: lb(of)     ( marks -- marks )
   drop-offset  +level compile 2l->n -level  [compile] of
; immediate
: lb(do)     ( -- )
   drop-offset  +level compile 2l->n -level  [compile]  do
; immediate
: lb(?do)    ( -- )
   drop-offset  +level compile 2l->n -level  [compile]  ?do
; immediate
: lb(+loop)  ( -- )
   drop-offset  +level compile l->n -level  [compile] +loop
; immediate

transient
also assembler definitions
: compare
   sp  scr  pop
   scr tos  cmp
;
: leaveflag  ( condition -- )
\ macro to assemble code to leave a flag on the stack
   if  ,%icc
   0  tos  move   \ Delay slot
      -1 tos move
   then
;
previous definitions
resident
warning @ warning off
\ Note: l0= and l= clash with the link defs in kernport.fth
code l0=  ( n -- f )  tos test  0=  leaveflag c;
code l0<> ( n -- f )  tos test  0<> leaveflag c;
code l0<  ( n -- f )  tos test  0<  leaveflag c;
code l0<= ( n -- f )  tos test  <=  leaveflag c;
code l0>  ( n -- f )  tos test  >   leaveflag c;
code l0>= ( n -- f )  tos test  0>= leaveflag c;

code l<   ( n1 n2 -- f )  compare <   leaveflag c;
code l>   ( n1 n2 -- f )  compare >   leaveflag c;
code l=   ( n1 n2 -- f )  compare 0=  leaveflag c;
code l<>  ( n1 n2 -- f )  compare <>  leaveflag c;
code lu>  ( n1 n2 -- f )  compare u>  leaveflag c;
code lu<= ( n1 n2 -- f )  compare u<= leaveflag c;
code lu<  ( n1 n2 -- f )  compare u<  leaveflag c;
code lu>= ( n1 n2 -- f )  compare u>= leaveflag c;
code l>=  ( n1 n2 -- f )  compare >=  leaveflag c;
code l<=  ( n1 n2 -- f )  compare <=  leaveflag c;
warning !

: l-between ( n1 n2 n3 -- flag )  >r over l<= swap r> l<= and  ;
: l-within  ( n1 n2 n3 -- flag )  over - >r - r> lu<  ;
: l-max     ( n1 n2 -- max )  2dup l< if  swap  then  drop  ;
: l-min     ( n1 n2 -- min )  2dup l> if  swap  then  drop  ;
: l-abs     ( n -- |n| )  dup l0< if  negate  then  ;

code l-@   ( addr -- n )
      tos 0    tos  srl
\dtc  tos 0     tos  ld
\itc  tos 2     scr  lduh   tos 0   tos  lduh
\itc  tos h# 10 tos  sll    scr tos tos  add
c;

code l-!   ( n addr -- )
    tos 0    tos  srl
    sp  0  scr  nget
\dtc   scr     tos 0   st
\itc   scr     tos 2  sth
\itc   scr 10  scr    srl
\itc   scr     tos 0  sth

   sp 1 /n*  tos  nget
   sp 2 /n*  sp   add
c;

code l2@  ( addr -- d )
    tos 0    tos  srl
    tos /n   sc1  lduh
    tos /n 2 +   scr  lduh  sc1 10   sc1  sll   scr sc1  sc1  add
    tos /n 4 +   scr  lduh  sc1 10   sc1  sllx  scr sc1  sc1  add
    tos /n 6 +   scr  lduh  sc1 10   sc1  sllx  scr sc1  scr  add

    scr      sp   push

    tos  0  sc1  lduh
    tos 2   scr  lduh  sc1 10   sc1  sll  scr sc1  sc1  add
    tos 4   scr  lduh  sc1 10   sc1  sllx  scr sc1  sc1  add
    tos 6   scr  lduh  sc1 10   sc1  sllx

    scr  sc1  tos  add
c;
code l2!  ( d addr -- )
    tos 0    tos  srl
    sp  0   scr    nget
    bubble

    scr   tos 6  sth  scr 10  scr  srlx
    scr   tos 4  sth  scr 10  scr  srlx
    scr   tos 2  sth  scr 10  scr  srl
    scr   tos 0  sth

    sp  /n  scr    nget

    bubble

    scr   tos /n 6 + sth  scr 10  scr  srlx
    scr   tos /n 4 + sth  scr 10  scr  srlx
    scr   tos /n 2 + sth  scr 10  scr  srl
    scr   tos /n 0 + sth

    sp  2 /n*   tos    nget
    sp  3 /n*   sp     add
c;

code ll@  ( addr -- l ) \ longword aligned
    tos 0    tos  srl
    tos tos lget
c;
code ll!  ( n addr -- )
    tos 0    tos  srl
    sp  0  scr  nget
    bubble
    scr   tos 0 st
    sp  1 /n*  tos  nget
    sp  2 /n*  sp   add
c;

code l<w@ ( addr -- w )
    tos 0  tos  srl
    tos 0  tos  ldsh
    tos 0  tos  sra
c;

code lw@  ( addr -- w ) \ 16-bit word aligned
    tos 0    tos  srl
    tos 0  tos  lduh
c;

code lw!  ( w addr -- )
   tos 0    tos  srl
   sp  0  scr  nget
   bubble
   scr   tos 0 sth
   sp 1 /n*  tos  nget
   sp 2 /n*  sp   add
c;

code lc@  ( addr -- c )
    tos 0    tos  srl
    tos 0  tos  ldub
c;
code lc!  ( c addr -- )
    tos 0    tos  srl
    sp  0  scr  nget
    bubble
    scr   tos 0 stb
    sp  1 /n*  tos  nget
    sp  2 /n*  sp   add
c;

code lon ( addr -- )
      tos  0 tos   srl
      -1   scr     move
\dtc  scr  tos 0   st
\itc  scr  tos 0   sth
\itc  scr  tos 2   sth
       sp  tos     pop
c;
code loff ( addr -- )
       tos  0 tos  srl
\dtc   %g0  tos 0  st
\itc   %g0  tos 0  sth
\itc   %g0  tos 2  sth
        sp  tos    pop
c;

: lbase  ( -- adr ) +level  compile base   compile la1+  -level  ; immediate
: l#out  ( -- adr ) +level  compile #out   compile la1+  -level  ; immediate
: l#line ( -- adr ) +level  compile #line  compile la1+  -level  ; immediate
: lspan  ( -- adr ) +level  compile span   compile la1+  -level  ; immediate

code lrl@  ( addr -- l ) \ longword aligned
    tos 0    tos  srl
    tos tos lget
c;
code lrl!  ( n addr -- )
    tos 0    tos  srl
    sp  0  scr  nget
    bubble
    scr   tos 0 st
    sp  1 /n*  tos  nget
    sp  2 /n*  sp   add
c;

code lrw@  ( addr -- w ) \ 16-bit word aligned
    tos 0    tos  srl
    tos 0  tos  lduh
c;

code lrw!  ( w addr -- )
   tos 0    tos  srl
   sp  0  scr  nget
   bubble
   scr   tos 0 sth
   sp 1 /n*  tos  nget
   sp 2 /n*  sp   add
c;

code lrb@  ( addr -- c )
    tos 0    tos  srl
    tos 0  tos  ldub
c;
code lrb!  ( c addr -- )
    tos 0    tos  srl
    sp  0  scr  nget
    bubble
    scr   tos 0 stb
    sp  1 /n*  tos  nget
    sp  2 /n*  sp   add
c;

\ Double word stuff we need to implement with 32-bit only math.
code ld+  ( x1 x2 -- x3 )
   sp 0 /n*  sc1   nget         \ x2.low
   sp 2 /n*  sc3   nget         \ x1.low
   sp 1 /n*  sc2   nget         \ x1.high
   sp 2 /n*  sp    add          \ Pop args
   sc3 sc1  sc1    addcc        \ x3.low
   sc2 tos  tos    addc         \ x3.high
   sc1      sp 0   nput         \ Push result (x3.high already in tos)
c;
code ld-  ( x1 x2 -- x3 )
   sp 0 /n*  sc1   nget         \ x2.low
   sp 2 /n*  sc3   nget         \ x1.low
   sp 1 /n*  sc2   nget         \ x1.high
   sp 2 /n*  sp    add          \ Pop args
   sc3 sc1  sc1    subcc        \ x3.low
   sc2 tos  tos    subc         \ x3.high
   sc1      sp 0   nput         \ Push result (x3.high already in tos)
c;

: l-u/mod ( u1 u2 -- u.rem u.quot )     n->l swap n->l swap u/mod ;
: l-/mod  ( n1 n2 -- n.rem n.quot )     l->n swap l->n swap /mod ;

headerless

\ The following is from obp/fm/kernel/dmuldiv.fth and obp/fm/kernel/dmul.fth:

/l 4 * constant bits/half-l
: l-scale-up    ( n -- h )  bits/half-l <<  ;
: l-scale-down  ( n -- h )  bits/half-l >>  ;
alias l-split-halves lwsplit
: l-half*  ( h1 h2 -- low<< high )  * l-split-halves  swap l-scale-up swap  ;

headers

\ Implement:
\
\ AB * CD = BD + (BC + DA)<<bits/half-cell + AC<<bits/cell
\
\ Where A, B, C, D are half "l" (or 16 bit in this case) values.

: lum*  ( n1 n2 -- xlo xhi )
   l-split-halves   rot l-split-halves  ( b a d c )

   \ Easy case - high halves are both 0, so result is just BD
   2 pick over or  0=  if  drop nip * 0  exit  then

   3 pick 2 pick  * 0 2>r               ( b a d c )  ( r: d.low )

   \ Check for C = 0 and optimize if so
   dup  if                              ( b a d c )  ( r: d.low )
      \ C is not zero, so compute and add BC<<
      3 pick  over l-half*
      2r> ld+ 2>r                       ( b a d c )  ( r: d.intermed )

      \ We are done with B
      2swap nip                         ( d c a )
      \ Check for A = 0 and optimize if so
      dup  if                           ( d c a )
         \ A is not zero, so compute and add DA<< and AC<<<
         rot over l-half*               ( c a da.low da.high )
         2r> ld+ 2>r                    ( c a ) ( r: d.intermed' )
         * 0 swap 2r> ld+
      else
         \ A is zero, so we are finished
         3drop  2r>
      then
   else
      \ C is zero, so all we have to do is compute and add DA<<
      drop rot drop                     ( a d )  ( r: d.low )
      l-half*                           ( low1 high1 )
      2r> ld+
   then
   n->l
;
\ This is the elementary school long-division algorithm, base 2^^16 (on a
\ 32-bit system) or 2^32 (on a 64-bit system).
\ It depends on the assumption that "/" can accurately divide a single-cell
\ (i.e. 32 or 64 bit) number by a half-cell (i.e. 16 or 32 bit) number.
\ Each "digit" is a half-cell number; thus the dividend is a 4-digit
\ number "ABCD" and the divisor is a 2-digit number "EF".

\ It would be interesting to compare the performance of this to a
\ "bit-banging" non-restoring division loop.
: l-um/mod  ( ud u -- urem uquot )
   2dup u>=  if                    \ Overflow; return max-uint for quotient
      0=  if  0 /  then            \ Force divide by zero trap
      2drop   0 -1  n->l exit      ( 0 max-u )
   then                            ( ud u )

   \ Split the divisor into two 16-bit "digits"
   dup l-split-halves              ( ud u ulow uhigh )

   \ If the high "digit" of the divisor is zero, we can skip a lot
   \ of the steps.  In this case, we only have to worry about the
   \ middle two digits of the dividend in developing the quotient.
   ?dup 0=  if                     ( ud u ulow )

      \ Approximate the high digit of the quotient by dividing the "BC"
      \ digits by the "F" digit.  The answer could by low by one, but if
      \ so it will be fixed in the next step.
      2over swap l-scale-down swap l-scale-up + over /  l-scale-up
                                   ( ud u ulow guess<< )

      \ Multiply the trial quotient by the divisor
      rot over lum*                ( ud ulow guess<< udtemp )

      \ Subtract the trial product from the dividend, giving the remainder
      2swap >r >r  ld-  drop       ( error )  ( r: guess<< ulow )

      \ Divide the remainder by the divisor, giving the rest of the
      \ quotient.
      dup r@ l-u/mod nip           ( error guess1 )
      r> r>                        ( error guess1 ulow guess<< )

      \ Merge the two halves of the quotient
      2 pick + >r                  ( error guess1 ulow ) ( r: uquot )

      \ Calculate the remainder
      * -  r>                      ( urem uquot )
      exit
   then                            ( ud u ulow uhigh )

   \ The high divisor digit is non-zero, so we have to deal with
   \ both digits, dividing "ABCD" by "EF".

   \ Approximate the high digit of the quotient.
   3 pick over l-u/mod nip          ( ud u ulow uhigh guess )

   \ Reduce guess by one if "E" = "A"
   dup 1 l-scale-up =  if  1-  then  ( ud u ulow uhigh guess' )

   \ Multiply the trial quotient by the divisor
   3 pick over l-scale-up lum*       ( ud u ulow uhigh guess' ud.temp )

   \ Subtract the trial product from the dividend, giving the remainder
   >r >r 2rot r> r> ld-             ( u ulow uhigh guess' d.resid )

   \ If the remainder is negative, add the divisor and reduce the trial
   \ quotient by one.  The following loop executes at most twice.
   begin  dup 0<  while            ( u ulow uhigh guess' d.resid )
      rot 1- -rot                  ( u ulow uhigh guess+ d.resid )
      4 pick l-scale-up 4 pick ld+ ( u ulow uhigh guess+ d.resid' )
   repeat                          ( u ulow uhigh guess+ +d.resid )

   \ Now we have the correct high quotient digit; save it for later
   rot l-scale-up >r                 ( u ulow uhigh +d.resid ) ( r: q.high )

   \ Repeat the above process, using the partial remainder as the
   \ dividend.  Ulow is no longer needed
   3 roll drop                     ( u uhigh +d.resid )

   \ Trial quotient digit...
   2dup l-scale-up swap l-scale-down + 3 roll l-u/mod nip
                                   ( u +d.resid guess1 ) ( r: q.high )
   dup 1 l-scale-up =  if  1-  then  ( u +d.resid guess1' )

   \ Trial product
   3 pick over lum*                ( u +d.resid guess1' d.err )

   \ New partial remainder
   rot >r ld-                       ( u d.resid' )  ( r: q.high guess1' )

   \ Adjust quotient digit until partial remainder is positive
   begin  dup 0<  while            ( u d.resid' )  ( r: q.high guess1' )
     r> 1- >r                      ( u d.resid' )  ( r: q.high guess1' )
         \ There is no l-m+, so use "0< ld+" instead
     2 pick dup 0< ld+             ( u d.resid'' ) ( r: q.high guess1' )
   repeat                          ( u +d.resid )  ( r: q.high guess1' )

   \ Discard divisior and high cell of quotient (which must be zero)
   rot 2drop                       ( u.rem )

   \ Merge quotient digits
   r> r> +                         ( u.rem u.quot )
;

\ Need to fix # and #s

: l-mu/mod (s d n1 -- rem d.quot )
   >r  0  r@  l-um/mod  r>  swap  >r  l-um/mod  r>
;
: l-#   (s ud1 -- ud2 )
   base @ l-mu/mod                      ( nrem ud2 )
   rot     >digit  hold                 ( ud2 )
;
: l-#s  (s ud -- 0 0 )  begin   l-#  2dup or  0=  until  ;

transient
vocabulary fcode32
also fcode32 definitions

alias ,     l,
alias /n    /l
alias na+   la+
alias cell+ la1+
alias cells /l*
alias b?branch lb?branch
alias +!       l+!
alias >>a      l>>a
alias rshift   lrshift
alias b(of)    lb(of)
alias b(do)    lb(do)
alias b(?do)   lb(?do)
alias b(+loop) lb(+loop)
alias 0=       l0=
alias 0<>      l0<>
alias 0<       l0<
alias 0<=      l0<=
alias 0>       l0>
alias 0>=      l0>=
alias <        l<
alias >        l>
alias =        l=
alias <>       l<>
alias u>       lu>
alias u<=      lu<=
alias u<       lu<
alias u>=      lu>=
alias >=       l>=
alias <=       l<=
alias between  l-between
alias within   l-within
alias max      l-max
alias min      l-min
alias abs      l-abs
alias @        l-@
alias !        l-!
alias 2@       l2@
alias 2!       l2!
alias l@       ll@
alias l!       ll!
alias <w@      l<w@
alias w@       lw@
alias w!       lw!
alias c@       lc@
alias c!       lc!
alias on       lon
alias off      loff
alias base     lbase
alias #out     l#out
alias #line    l#line
alias span     lspan

alias rl!      lrl!
alias rl@      lrl@
alias rw!      lrw!
alias rw@      lrw@
alias rb!      lrb!
alias rb@      lrb@

alias $number  l-$number
alias move     l-move
alias fill     l-fill
alias cpeek    lcpeek
alias wpeek    lwpeek
alias lpeek    llpeek
alias cpoke    lcpoke
alias wpoke    lwpoke
alias lpoke    llpoke

alias d+       ld+
alias d-       ld-
alias um*      lum*
alias um/mod   l-um/mod
alias u/mod    l-u/mod
alias /mod     l-/mod

alias #        l-#
alias #s       l-#s

previous definitions
resident

headerless
variable token-table0-64
variable token-table2-64
variable token-table0-32
variable token-table2-32

token-tables 0 ta+ token@ token-table0-64 token!
token-tables 0 ta+  !null-token

token-tables 2 ta+ token@ token-table2-64 token!
token-tables 2 ta+  !null-token

headers
also fcode32 definitions
fload ${BP}/pkg/fcode/primlist.fth              \ Codes for kernel primitives
fload ${BP}/pkg/fcode/sysprims-nofb.fth \ Codes for system primitives
fload ${BP}/pkg/fcode/obsfcod2.fth
fload ${BP}/pkg/fcode/sysprm64.fth
fload ${BP}/pkg/fcode/regcodes.fth
previous definitions

token-tables 0 ta+ token@ token-table0-32 token!
token-tables 2 ta+ token@ token-table2-32 token!

headers
: fcode-32 ( -- )
   token-table0-32 token@ token-tables 0 ta+ token!
   token-table2-32 token@ token-tables 2 ta+ token!
;

: fcode-64 ( -- )
   token-table0-64 token@ token-tables 0 ta+ token!
   token-table2-64 token@ token-tables 2 ta+ token!
;

fcode-64

stand-init: Chose fcode32 mode
   fcode-32
;