obp/obp/fm/kernel/sparc/move.fth

\ ========== Copyright Header Begin ==========================================
\
\ Hypervisor Software File: move.fth
\
\ Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
\
\  - Do no alter or remove copyright notices
\
\  - Redistribution and use of this software in source and binary forms, with
\    or without modification, are permitted provided that the following
\    conditions are met:
\
\  - Redistribution of source code must retain the above copyright notice,
\    this list of conditions and the following disclaimer.
\
\  - Redistribution in binary form must reproduce the above copyright notice,
\    this list of conditions and the following disclaimer in the
\    documentation and/or other materials provided with the distribution.
\
\    Neither the name of Sun Microsystems, Inc. or the names of contributors
\ may be used to endorse or promote products derived from this software
\ without specific prior written permission.
\
\     This software is provided "AS IS," without a warranty of any kind.
\ ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
\ INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
\ PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN
\ MICROSYSTEMS, INC. ("SUN") AND ITS LICENSORS SHALL NOT BE LIABLE FOR
\ ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
\ DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. IN NO EVENT WILL SUN
\ OR ITS LICENSORS BE LIABLE FOR ANY LOST REVENUE, PROFIT OR DATA, OR
\ FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE
\ DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY,
\ ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, EVEN IF
\ SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
\
\ You acknowledge that this software is not designed, licensed or
\ intended for use in the design, construction, operation or maintenance of
\ any nuclear facility.
\
\ ========== Copyright Header End ============================================
\ move.fth 2.6 93/10/20
\ Copyright 1985-1990 Bradley Forthware

\ Mike Saari's blazing `move' ...
\ This implements the MOVE instruction.  It is optimized
\ for speed, particularly when longword stores may be used.

code (move)  ( src dst cnt -- )
                        \ tos = Count
   sp 1 /n*  scr   nget \ scr = Src address
   sp 0 /n*  sc1   nget \ sc1 = Dst address
                        \ sc2 = Temp. data being transferred
                        \ sc3 = src xor drc, low bits=0 indicates compatible
                        \ sc4 = Working src in loops
                        \       (also temp last+1 address)
                        \ sc5 = Working dst in loops
                        \ sc6 = Loop index

   scr sc1  %g0  subcc  \ Src > dst?
   > if                 \ Then copy low-to-high
      scr sc1  sc3  xor         \ (delay) sc3 low bits=0 indicates compatible

      tos h# 10   %g0  subcc    \ Enough bytes to bother optimizing?
      >= if                     \ Otherwise, just skip to byte move
         sc3 1    %g0  andcc    \ (delay) =0 if at least shortword aligned
         0= if                  \ Otherwise, just skip to byte move

            scr 1   %g0  andcc          \ (delay) Not on halfword boundary?
            0<> if              \ Ensure halfword alignment (lower)
               scr 0  sc2  ldub         \ (delay) Load bottom byte
               sc2  sc1 0  stb          \ Store byte
               scr 1  scr  add          \ Advance by one byte
               sc1 1  sc1  add          \  "
               tos 1  tos  sub          \ Decrement count
            then

            sc3 2   %g0  andcc          \ =0 if at least longword aligned
            0= if               \ Otherwise, skip to halfword case

               scr 2  %g0  andcc        \ (delay) Not on longword boundary?
               0<> if           \ Ensure longword alignment (lower)
                  scr 0  sc2  lduh      \ (delay) Load bottom halfword
                  sc2  sc1 0  sth       \ Store halfword
                  scr 2  scr  add       \ Advance by one halfword
                  sc1 2  sc1  add       \  "
                  tos 2  tos  sub       \ Decrement count
               then

               sc3 4  %g0  andcc        \ =0 if doublelong aligned
               0= if            \ Otherwise, skip to longword case

                  scr 4  %g0  andcc     \ (delay) Not on doublelong boundary?
                  0<> if        \ Ensure doublelong alignment (lower)
                     scr 0  sc2  ld     \ (delay) Load bottom longword
                     sc2  sc1 0  st     \ Store longword
                     scr 4  scr  add    \ Advance by one longword
                     sc1 4  sc1  add    \  "
                     tos 4  tos  sub    \ Decrement count
                  then
                                \ Doublelong Copy Loop (low-to-high)
                  tos 7  sc6  andn      \ Index w/ even multiples of 8
                  scr sc6  scr  add     \ src = src+index
                  scr   8  sc4  sub     \ Working src = src+index-8
                  sc1 sc6  sc1  add     \ dst = dst+index
                  sc1   8  sc5  sub     \ Working dst = dst+index-8
                  %g0 sc6  sc6  subcc   \ Negate index
                  begin
                  < while
                     sc6 8     sc6  addcc       \ (delay) Increment index
32\                  sc4 sc6   sc2  ldd         \ Load doublelong
64\                  sc4 sc6   sc2  ldx         \ Load 64-bit
                  repeat
32\                  sc2   sc5 sc6  std         \ (delay) Store doublelong
64\                  sc2   sc5 sc6  stx         \ (delay) Store 64-bit

                  tos 7   tos   and     \ At end, adjust cnt for few remaining

               else             \ Longword Copy Loop (low-to-high)
                  nop                   \ (delay)
                  tos 3  sc6  andn      \ Index w/ even multiples of 4
                  scr sc6  scr  add     \ src = src+index
                  scr   4  sc4  sub     \ Working src = src+index-4
                  sc1 sc6  sc1  add     \ dst = dst+index
                  sc1   4  sc5  sub     \ Working dst = dst+index-4
                  %g0 sc6  sc6  subcc   \ Negate index
                  begin
                  < while
                     sc6 4     sc6  addcc       \ (delay) Increment index
                     sc4 sc6   sc2  ld          \ Load longword
                  repeat
                     sc2   sc5 sc6  st          \ (delay) Store longword

                  tos 3   tos   and     \ At end, adjust cnt for few remaining
               then

            else                \ Halfword Copy Loop (low-to-high)
               nop                      \ (delay)
               tos 1  sc6  andn         \ Index w/ even multiples of 2
               scr sc6  scr  add        \ src = src+index
               scr   2  sc4  sub        \ Working src = src+index-2
               sc1 sc6  sc1  add        \ dst = dst+index
               sc1   2  sc5  sub        \ Working dst = dst+index-2
               %g0 sc6  sc6  subcc      \ Negate index
               begin
               < while
                  sc6 2     sc6  addcc          \ (delay) Increment index
                  sc4 sc6   sc2  lduh           \ Load halfword
               repeat
                  sc2   sc5 sc6  sth            \ (delay) Store halfword

               tos 1   tos   and        \ At end, adjust cnt for few remaining
            then
         then
      then      \ Now do a normal byte move for all remaining bytes (at top)

      \ Byte Copy Loop (low-to-high)
                                \ (tos = index)
      scr tos  scr  add
      scr   1  sc4  sub         \ Working src = src+cnt-1
      sc1 tos  sc1  add
      sc1   1  sc5  sub         \ Working dst = dst+cnt-1
      %g0 tos  sc6  subcc       \ Negate index
      begin
      < while
         sc6 1     sc6  addcc   \ (delay) Increment cnt
         sc4 sc6   sc2  ldub    \ Load byte
      repeat
         sc2   sc5 sc6  stb     \ (delay) Store byte

   else                 \ Copy high-to-low case
      nop                       \ (delay)
      tos h# 10   %g0  subcc    \ Enough bytes to bother optimizing?
      >= if                     \ Otherwise, just skip to byte move
         sc3 1    %g0  andcc    \ (delay) =0 if at least shortword aligned
         0= if                  \ Otherwise, just skip to byte move

            scr tos  sc4  add           \ (delay) Calculate last+1 address

            sc4 1    %g0  andcc         \ Not on halfword boundary? (at top)
            0<> if              \ Ensure halfword alignment (at top)
               sc4 -1   sc2  ldub       \ (delay) Load top byte
               tos 1    tos  sub        \ Decrement count
               sc2  sc1 tos  stb        \ Store byte
               sc4 1    sc4  sub        \ Recalculate last+1 address
            then

            sc3 2   %g0  andcc          \ =0 if at least longword aligned
            0= if               \ Otherwise, skip to halfword case

               sc4 2    %g0  andcc      \ (delay) Not on longword boundary? (at top)
               0<> if           \ Ensure longword alignment (at top)
                  sc4 -2   sc2  lduh    \ (delay) Load top halfword
                  tos 2    tos  sub     \ Decrement count
                  sc2  sc1 tos  sth     \ Store halfword
                  sc4 2    sc4  sub     \ Recalculate last+1 address
               then

               sc3 4  %g0  andcc        \ =0 if doublelong aligned
               0= if            \ Otherwise, skip to longword case

                  sc4 4  %g0  andcc     \ (delay) Not on doublelong boundary? (top)
                  0<> if        \ Ensure doublelong alignment (at top)
                     sc4 -4   sc2  ld   \ (delay) Load top longword
                     tos 4    tos  sub  \ Decrement count
                     sc2  sc1 tos  st   \ Store longword
                  then
                                \ Doublelong Copy Loop (high-to-low)
                  scr 8   sc4   add     \ Working src = src+8
                  sc1 8   sc5   add     \ Working dst = dst+8
                  tos 8   sc6   subcc   \ Loop index = cnt-8
                  begin
                  >= while
                     sc6 8     sc6  subcc       \ (delay) Decrement index
32\                  sc4 sc6   sc2  ldd         \ Load doublelong
64\                  sc4 sc6   sc2  ldx         \ Load 64-bit
                  repeat
32\                  sc2   sc5 sc6  std         \ (delay) Store doublelong
64\                  sc2   sc5 sc6  stx         \ (delay) Store 64-bit

                  tos 7   tos   and     \ At end, adjust cnt for few remaining

               else             \ Longword Copy Loop (high-to-low)
                  nop                   \ (delay)
                  scr 4   sc4   add     \ Working src = src+4
                  sc1 4   sc5   add     \ Working dst = dst+4
                  tos 4   sc6   subcc   \ Loop index = cnt-4
                  begin
                  >= while
                     sc6 4     sc6  subcc       \ (delay) Decrement index
                     sc4 sc6   sc2  ld          \ Load longword
                  repeat
                     sc2   sc5 sc6  st          \ (delay) Store longword

                  tos 3   tos   and     \ At end, adjust cnt for few remaining
               then

            else                \ Halfword Copy Loop (high-to-low)
               nop                      \ (delay)
               scr 2   sc4   add        \ Working src = src+2
               sc1 2   sc5   add        \ Working dst = dst+2
               tos 2   sc6   subcc      \ Loop index = cnt-2
               begin
               >= while
                  sc6 2     sc6  subcc  \ (delay) Decrement index
                  sc4 sc6   sc2  lduh   \ Load halfword
               repeat
                  sc2   sc5 sc6  sth    \ (delay) Store halfword

               tos 1   tos   and        \ At end, adjust cnt for few remaining
            then
         then
      then      \ Now do a normal byte move for all remaining bytes (at bottom)

      \ Byte Copy Loop (high-to-low)
      scr 1     sc4  add        \ Working src = src+1
      sc1 1     sc5  add        \ Working dst = dst+1
      tos 1     tos  subcc      \ Loop index = cnt-1
      begin
      >= while
         tos 1     tos  subcc   \ (delay) Decrement index
         sc4 tos   sc2  ldub    \ Load byte
      repeat
         sc2   sc5 tos  stb     \ (delay) Store byte
   then

   sp 2 /n*  tos nget   \ Delete 3 stack items
   sp 3 /n*  sp  add    \   "
c;
defer move
' (move) is move