Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / legion / src / host / sparcv9native.S
/*
* ========== Copyright Header Begin ==========================================
*
* OpenSPARC T2 Processor File: sparcv9native.S
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
*
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*
* ========== Copyright Header End ============================================
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "@(#)sparcv9native.S 1.25 07/02/20 SMI"
#include <sys/asm_linkage.h>
#include "assembly.h"
/*
* Assembly support functions required by the simulator
*/
.section ".text"
ENTRY(sim_atomic_add_32)
ALTENTRY(sim_atomic_add_32_nv)
ALTENTRY(sim_atomic_add_long)
ALTENTRY(sim_atomic_add_long_nv)
ld [%o0], %o2
1:
add %o2, %o1, %o3
cas [%o0], %o2, %o3
cmp %o2, %o3
bne,a,pn %icc, 1b
ld [%o0], %o2
retl
add %o2, %o1, %o0 ! return new value
SET_SIZE(sim_atomic_add_32_nv)
SET_SIZE(sim_atomic_add_32)
SET_SIZE(sim_atomic_add_long_nv)
SET_SIZE(sim_atomic_add_long)
/*
* o0 = pointer to memory location
* o1 = value to compare with
* o2 = value to swap in if equal
* returns:
* o0 = original contents of memory location
*/
.global host_ldstub
host_ldstub:
ldstub [%o0], %o2
retl
mov %o2, %o0
/*
* o0 = pointer to memory location
* o1 = value to compare with
* o2 = value to swap in if equal
* returns:
* o0 = original contents of memory location
*/
.global host_cas32
host_cas32:
cas [%o0], %o1, %o2
retl
mov %o2, %o0
.global host_cas64
host_cas64:
casx [%o0], %o1, %o2
retl
mov %o2, %o0
.global host_swap
host_swap:
swap [%o0], %o1
retl
mov %o1, %o0
/*
* Atomic load of 128 bits big endian into two 64bit registers.
* Have to do this kludge because SPARC doesnt provide a 128bit atomic
* fetch that executes at user level.
* I just hope the 64byte block load is atomic on all architectures.
* %o0 points to memory location (128 bit aligned).
* %o1 points to high 64bits of result register (big endian)
* %o2 points to low 64bits of result register (big endian)
*/
#define ASI_BLK_P 0xf0 /* VIS 1.0 block load from primary AS */
.global host_atomic_get128be
host_atomic_get128be:
/* align the memory address for a block load */
andn %o0, 0x3f, %o3
membar #Sync
ldda [%o3]ASI_BLK_P, %f0
membar #Sync /* ensure the data is present */
and %o0, 0x30, %o4 /* figure out which of the 4 128bit blocks we want */
_base:
rd %pc, %o0
add %o4, %o0, %o0
jmpl %o0 + (_xxword0 - _base), %g0
_xxword0:
std %f0, [ %o1 ]
retl
std %f2, [ %o2 ]
nop
_xxword1:
std %f4, [ %o1 ]
retl
std %f6, [ %o2 ]
nop
_xxword2:
std %f8, [ %o1 ]
retl
std %f10, [ %o2 ]
nop
_xxword3:
std %f12, [ %o1 ]
retl
std %f14, [ %o2 ]
nop
/*
* Assembly version of certain simulator instruction implementations.
*/
#define ld_simm16( _Rxip, _offset, _Rdest ) \
ldsh [ _Rxip + _offset ], _Rdest /* 1 instn only !! */
#define ld_simm32( _Rxip, _offset, _Rdest ) \
ldsw [ _Rxip + _offset ], _Rdest /* 1 instn only !! */
#define ldx_ireg( _Rcpup, _Rxip, _offset, _Rdest, _Rscratch ) \
lduh [ _Rxip + _offset ], _Rscratch NL\
ldx [ _Rcpup + _Rscratch ], _Rdest
#define stx_ireg( _Rcpup, _Rxip, _offset, _Rval, _Rscratch ) \
lduh [ _Rxip + _offset ], _Rscratch NL\
stx _Rval, [ _Rcpup + _Rscratch ]
#define ld_fpreg( _ldt, _Rcpup, _Rxip, _Rfprp, _offset, _Rdest, _Rscratch ) \
lduh [ _Rxip + _offset ], _Rscratch NL\
_ldt [ _Rfprp + _Rscratch ], _Rdest
#define st_fpreg( _stt, _Rcpup, _Rxip, _Rfprp, _offset, _Rdest, _Rscratch ) \
lduh [ _Rxip + _offset ], _Rscratch NL\
_stt _Rdest, [ _Rfprp + _Rscratch ]
/*
* %o0 = simcpu_t *
* %o1 = xicache_instn_t *
* %g1 is used as a scratch register by these macros
*/
#define ldx_Rsrc1( _r ) ldx_ireg( %o0, %o1, XIC_INTREG_SRC1_OFFSET, _r, %g1 )
#define ldx_Rsrc2( _r ) ldx_ireg( %o0, %o1, XIC_INTREG_SRC2_OFFSET, _r, %g1 )
#define ld_Simm16( _r ) ld_simm16( %o1, XIC_SIMM16_OFFSET, _r )
#define ld_BrOff32( _r ) ld_simm32( %o1, XIC_BROFF32_OFFSET, _r )
#define ld_BrRegOff32( _r ) ld_simm32( %o1, XIC_BREGOFF32_OFFSET, _r )
#define stx_Rdest( _r ) stx_ireg( %o0, %o1, XIC_INTREG_DEST_OFFSET, _r, %g1 )
#define ldx_Rccr( _r ) ldx [ %o0 + SIMCPU_v9CCR_OFFSET ], _r
#define stx_Rccr( _r ) stx _r, [ %o0 + SIMCPU_v9CCR_OFFSET ]
#define ldx_Rpc( _r ) ldx [ %o0 + SIMCPU_PC_OFFSET ], _r
#define stx_Rpc( _r ) stx _r, [ %o0 + SIMCPU_PC_OFFSET ]
#define ldx_Rnpc( _r ) ldx [ %o0 + SIMCPU_NPC_OFFSET ], _r
#define stx_Rnpc( _r ) stx _r, [ %o0 + SIMCPU_NPC_OFFSET ]
/* FP support */
#define ld_FPsrc1( _ldt, _r ) ld_fpreg( _ldt, %o0, %o1, %o0, XIC_FPREG_SRC1_OFFSET, _r, %g1 )
#define ld_FPsrc2( _ldt, _r ) ld_fpreg( _ldt, %o0, %o1, %o0, XIC_FPREG_SRC2_OFFSET, _r, %g1 )
#define IMPL( _name ) \
NL\
.global decoded_impl_##_name NL\
.align 8 NL\
decoded_impl_##_name:
#define ENDI \
ldx [ %o0 + SIMCPU_NPC_OFFSET ], %o1 NL\
add %o1, 4, %o2 NL\
stx %o1, [ %o0 + SIMCPU_PC_OFFSET ] NL\
retl NL\
stx %o2, [ %o0 + SIMCPU_NPC_OFFSET ] NL\
ENDINSTN
#define ENDINSTN /* nada */
/*
* For executing floating point operations on SPARC ..
* .. specifically SPARC on SPARC we use the FSR for the
* cpu being emulated, but disable all floating point traps.
* Then we test whether an exception has occurred after the
* instruction execution, and update the simulated FSR
* accordingly, then finally signal a trap if the simulated
* machine actually desired one.
*
* the simcpu_t simulated registers are used as follows:
* v9_fsr_ctrl holds the SPARC fsr control bits ..
* for condition codes, rounding etc. The execution results
* (errors and accumulated errors) are held in the the
* v9_fsr_exc pseudo register, and the trap enable bits
* (TEM) are held in the v9_fsr_tem pseudo reg.
*
* Note: we have to use the simcpu_t scratch64 value because we
* can't get the fsr value out of the cpu without using a
* store instruction. This prob. should be per exec_thread, but
* its harder to get at than per simcpu in here.
*/
#define FPOP_setup_fsr \
ldx [ %o0 + SIMCPU_v9FSR_CTRL_OFFSET ], %o4 NL\
set 0xff, %o3 NL\
sllx %o3, 30, %o3 /* FCC[321] + RD mask */ NL\
or %o3, (3 << 10), %o3 /* | FCC0 mask */ NL\
and %o4, %o3, %o4 NL\
stx %o4, [ %o0 + SIMCPU_SCRATCH64_OFFSET ] NL\
ldx [ %o0 + SIMCPU_SCRATCH64_OFFSET ], %fsr NL\
ldx [ %o0 + SIMCPU_v9GSR_CTRL_OFFSET ], %o4 NL\
wr %o4, %gsr
#ifdef FP_DECODE_DISABLED
#define FPOP_fpu_on_check
#else /* FP_DECODE_DISABLED */
#define FPOP_fpu_on_check \
ldx [ %o0 + SIMCPU_SPECIFICP_OFFSET ], %o4 NL\
ld [ %o4 + SPARCV9_FPU_ON_OFFSET ], %o4 NL\
brz %o4, sparcv9_deliver_fp_disabled_exception NL\
nop
#endif /* FP_DECODE_DISABLED */
#define FPOP_setup \
FPOP_fpu_on_check NL\
FPOP_setup_fsr
#define FPOP_cmp( _ldt, _fpop, _fcc ) \
FPOP_setup NL\
ld_FPsrc1( _ldt, %f0 ) NL\
ld_FPsrc2( _ldt, %f4 ) NL\
_fpop %_fcc, %f0, %f4 NL\
FPOP_cleanup NL\
FPOP_save_fcc NL\
FPOP_ENDI
/* FPOP_save_fcc assumes FPOP_cleanup stored %fsr in scratch */
#define FPOP_save_fcc \
ldx [ %o0 + SIMCPU_SCRATCH64_OFFSET ], %o4 NL\
set 0x3f, %o3 NL\
sllx %o3, 32, %o3 /* FCC[321] mask */ NL\
or %o3, (3 << 10), %o3 /* | FCC0 mask */ NL\
ldx [ %o0 + SIMCPU_v9FSR_CTRL_OFFSET ], %o2 NL\
andn %o2, %o3, %o2 NL\
and %o4, %o3, %o4 NL\
or %o2, %o4, %o2 NL\
stx %o2, [ %o0 + SIMCPU_v9FSR_CTRL_OFFSET ]
/*
* Since we're running as a user process, we're not
* going to see anything here other than ieee754 exceptions
*
* But these have to be handled carefully, since the simulated FSR
* configuration may require that a proper exception is generated
* ...
* annoyingly we have to save the fsr somewhere in order to get access to
* the execution results - we use a per-cpu scratch area so we avoid MT conflicts
*
* So retrieve the FSR, stash it back into ctrl sans error bits
* (tem bits should still be zero).
* Then, look for errors from the last executed instrucion .. if none, then
* do nothing. If some then accumulate or generate a trap as necessary.
*/
/* Must not modify %o0 or %o1 */
#define FPOP_cleanup \
stx %fsr, [ %o0 + SIMCPU_SCRATCH64_OFFSET ] NL\
ldx [ %o0 + SIMCPU_SCRATCH64_OFFSET ], %o4 NL\
/* must clear cexec field if no exceptions */ NL\
ldx [ %o0 + SIMCPU_v9FSR_EXC_OFFSET ], %o3 NL\
and %o3, 0x1f<<5, %o3 NL\
stx %o3, [ %o0 + SIMCPU_v9FSR_EXC_OFFSET ] NL\
andcc %o4, 0x1f, %o2 NL\
bne,a,pt %xcc, sparcv9_fsr_exception_update NL\
rd %pc, %o5 NL\
/* fall through to the update part of the instruction */
#define FPOP_ENDI \
ENDI
/*
* Hand off routine for floating point closure
* If any IEEE exception occurred, we need now to check and see if the simulated
* FSR required a trap to be generated, or the error to be accumulated.
* NOTE: error is not accumulated if a trap is to be delivered.
*/
.section ".text"
.align 8
sparcv9_fsr_exception_update:
ldx [ %o0 + SIMCPU_v9FSR_TEM_OFFSET ], %o3
ldx [ %o0 + SIMCPU_v9FSR_CTRL_OFFSET ], %o4
srlx %o4, 14, %o4 /* FTT field - no trap if non-zero */
and %o4, 7, %o4
movrnz %o4, %g0, %o3
andcc %o2, %o3, %g0
/* OK build the EXC group ... */
sllx %o2, 5, %o3
/* clear the accumulation if trap to be delivered */
movne %xcc, %g0, %o3
ldx [ %o0 + SIMCPU_v9FSR_EXC_OFFSET ], %o4
/* build error and accum bits */
or %o2, %o3, %o2
/* mask out previous accum bits */
and %o4, 0x1f<<5, %o4
/* combine new error and old accum bits */
or %o2, %o4, %o2
/* update the execution FSR state */
stx %o2, [ %o0 + SIMCPU_v9FSR_EXC_OFFSET ]
/* now that the status is updated, branch into the
* C function to deliver the IEEE trap if appropriate
*/
bne,pn %xcc, sparcv9_deliver_ieee_exception
nop
jmp %o5 + 4 /* finish instruction */
nop
/*
* instruction targets
* implemented in assembly language to improve performance on
* certain host machines.
*
* This file is for a sparcv9 host.
*/
.section ".text"
.align 8
/* Args are: %o0 = simcpu_t*, %o1 = xicache_instn_t * */
/*
* Sparc v9 add and substract instructions
*/
IMPL( sparcv9_add_co_imm )
ba internal_add_co;
ld_Simm16(%o3)
IMPL( sparcv9_add_co_rrr )
ldx_Rsrc2(%o3)
internal_add_co:
ldx_Rsrc1(%o2)
addcc %o2, %o3, %o2
stx_Rdest(%o2)
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
IMPL( sparcv9_add_co_imm_rd0 )
ba internal_add_co_rd0;
ld_Simm16(%o3)
IMPL( sparcv9_add_co_rrr_rd0 )
ldx_Rsrc2(%o3)
internal_add_co_rd0:
ldx_Rsrc1(%o2)
addcc %o2, %o3, %g0
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
IMPL( sparcv9_add_ci_imm )
ba internal_add_ci;
ld_Simm16(%o3)
IMPL( sparcv9_add_ci_rrr )
ldx_Rsrc2(%o3)
internal_add_ci:
ldx_Rccr(%o4)
wr %o4, %ccr
ldx_Rsrc1(%o2)
addc %o2, %o3, %o2
stx_Rdest(%o2)
ENDI
IMPL( sparcv9_add_cico_imm )
ba internal_add_cico;
ld_Simm16(%o3)
IMPL( sparcv9_add_cico_rrr )
ldx_Rsrc2(%o3)
internal_add_cico:
ldx_Rccr(%o4)
wr %o4, %ccr
ldx_Rsrc1(%o2)
addccc %o2, %o3, %o2
stx_Rdest(%o2)
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
IMPL( sparcv9_add_cico_imm_rd0 )
ba internal_add_cico_rd0;
ld_Simm16(%o3)
IMPL( sparcv9_add_cico_rrr_rd0 )
ldx_Rsrc2(%o3)
internal_add_cico_rd0:
ldx_Rccr(%o4)
wr %o4, %ccr
ldx_Rsrc1(%o2)
addccc %o2, %o3, %g0
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
IMPL( sparcv9_sub_co_imm )
ba internal_sub_co;
ld_Simm16(%o3)
IMPL( sparcv9_sub_co_rrr )
ldx_Rsrc2(%o3)
internal_sub_co:
ldx_Rsrc1(%o2)
subcc %o2, %o3, %o2
stx_Rdest(%o2)
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
IMPL( sparcv9_sub_co_imm_rd0 )
ba internal_sub_co_rd0;
ld_Simm16(%o3)
IMPL( sparcv9_sub_co_rrr_rd0 )
ldx_Rsrc2(%o3)
internal_sub_co_rd0:
ldx_Rsrc1(%o2)
subcc %o2, %o3, %g0
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
IMPL( sparcv9_sub_ci_imm )
ba internal_sub_ci;
ld_Simm16(%o3)
IMPL( sparcv9_sub_ci_rrr )
ldx_Rsrc2(%o3)
internal_sub_ci:
ldx_Rccr(%o4)
wr %o4, %ccr
ldx_Rsrc1(%o2)
subc %o2, %o3, %o2
stx_Rdest(%o2)
ENDI
IMPL( sparcv9_sub_cico_imm )
ba internal_sub_cico;
ld_Simm16(%o3)
IMPL( sparcv9_sub_cico_rrr )
ldx_Rsrc2(%o3)
internal_sub_cico:
ldx_Rccr(%o4)
wr %o4, %ccr
ldx_Rsrc1(%o2)
subccc %o2, %o3, %o2
stx_Rdest(%o2)
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
IMPL( sparcv9_sub_cico_imm_rd0 )
ba internal_sub_cico_rd0;
ld_Simm16(%o3)
IMPL( sparcv9_sub_cico_rrr_rd0 )
ldx_Rsrc2(%o3)
internal_sub_cico_rd0:
ldx_Rccr(%o4)
wr %o4, %ccr
ldx_Rsrc1(%o2)
subccc %o2, %o3, %g0
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
/*
* Logic CC instructions ...
*/
IMPL( sparcv9_and_cc_imm )
ba internal_and_cc;
ld_Simm16(%o3)
IMPL( sparcv9_and_cc_rrr )
ldx_Rsrc2(%o3)
internal_and_cc:
ldx_Rsrc1(%o2)
andcc %o2, %o3, %o2
stx_Rdest(%o2)
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
IMPL( sparcv9_and_cc_imm_rd0 )
ba internal_and_cc_rd0;
ld_Simm16(%o3)
IMPL( sparcv9_and_cc_rrr_rd0 )
ldx_Rsrc2(%o3)
internal_and_cc_rd0:
ldx_Rsrc1(%o2)
andcc %o2, %o3, %g0
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
/* sparcv9_andn_cc_imm - synthesysed by inverting imm for andcc */
IMPL( sparcv9_andn_cc_rrr )
ldx_Rsrc2(%o3)
ldx_Rsrc1(%o2)
andncc %o2, %o3, %o2
stx_Rdest(%o2)
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
/* sparcv9_andn_cc_imm_rd0 - synthesysed by inverting imm for andcc */
IMPL( sparcv9_andn_cc_rrr_rd0 )
ldx_Rsrc2(%o3)
ldx_Rsrc1(%o2)
andncc %o2, %o3, %g0
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
IMPL( sparcv9_or_cc_imm )
ba internal_or_cc;
ld_Simm16(%o3)
IMPL( sparcv9_or_cc_rrr )
ldx_Rsrc2(%o3)
internal_or_cc:
ldx_Rsrc1(%o2)
orcc %o2, %o3, %o2
stx_Rdest(%o2)
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
IMPL( sparcv9_or_cc_imm_rd0 )
ba internal_or_cc_rd0;
ld_Simm16(%o3)
IMPL( sparcv9_or_cc_rrr_rd0 )
ldx_Rsrc2(%o3)
internal_or_cc_rd0:
ldx_Rsrc1(%o2)
orcc %o2, %o3, %g0
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
/* sparcv9_orn_cc_imm - synth by inverting imm field for orcc */
IMPL( sparcv9_orn_cc_rrr )
ldx_Rsrc2(%o3)
ldx_Rsrc1(%o2)
orncc %o2, %o3, %o2
stx_Rdest(%o2)
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
/* sparcv9_orn_cc_imm_rd0 - synth by inverting imm field for orcc */
IMPL( sparcv9_orn_cc_rrr_rd0 )
ldx_Rsrc2(%o3)
ldx_Rsrc1(%o2)
orncc %o2, %o3, %g0
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
IMPL( sparcv9_xor_cc_imm )
ba internal_xor_cc;
ld_Simm16(%o3)
IMPL( sparcv9_xor_cc_rrr )
ldx_Rsrc2(%o3)
internal_xor_cc:
ldx_Rsrc1(%o2)
xorcc %o2, %o3, %o2
stx_Rdest(%o2)
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
IMPL( sparcv9_xor_cc_imm_rd0 )
ba internal_xor_cc_rd0;
ld_Simm16(%o3)
IMPL( sparcv9_xor_cc_rrr_rd0 )
ldx_Rsrc2(%o3)
internal_xor_cc_rd0:
ldx_Rsrc1(%o2)
xorcc %o2, %o3, %g0
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
/* sparcv9_xnor_cc_imm - can synth by ~imm using orcc imm */
IMPL( sparcv9_xnor_cc_rrr )
ldx_Rsrc2(%o3)
ldx_Rsrc1(%o2)
xnorcc %o2, %o3, %o2
stx_Rdest(%o2)
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
/* sparcv9_xnor_cc_imm_rd0 - can synth by ~imm using xorcc imm */
IMPL( sparcv9_xnor_cc_rrr_rd0 )
ldx_Rsrc2(%o3)
ldx_Rsrc1(%o2)
xnorcc %o2, %o3, %g0
rd %ccr, %o3
stx_Rccr(%o3)
ENDI
/*
* Branch instructions change the value of npc
* Could encode a mask into the xi immediate, but that would
* be slow to extract, and leave us with a less than useful
* immediate field.
*/
/* delay slot:
* always executed - IF a branch IS taken
* annulled - if a bit set and branch IS NOT taken
*/
/* Sparc branches are bloody awful - delay slots plus mutiple
* condition varients ...
*
* X: br Y
* br Z
*
* Y: slot instn
*
* Z:
*
* ... instn @X executes instn @Y, but then X+4 in DS of X causes branch to Z
*/
/*
* Policy
annul:
not taken: taken:
pc = oldnpc+4 pc = oldnpc
npc = oldnpc + 8; npc = target
no annul:
pc = npc
npc = target | npc + 4;
*/
#define BRANCH( _opc, _cc ) \
ldx_Rccr (%o2) NL\
wr %o2, %ccr NL\
ldx_Rpc (%o3) NL\
ldx_Rnpc(%o4) NL\
ld_BrOff32(%o5) NL\
stx_Rpc (%o4) NL\
add %o3, %o5, %o5 /* branch target */ NL\
add %o4, 4, %o4 /* npc + 4 */ NL\
mov##_opc _cc, %o5, %o4 /* overwrite npc if branch taken */ NL\
retl NL\
stx_Rnpc(%o4)
#define BRANCH_an( _opc, _cc ) \
ldx_Rccr (%o2) NL\
wr %o2, %ccr NL\
ldx_Rpc (%o3) NL\
ldx_Rnpc(%o4) NL\
ld_BrOff32(%o2) NL\
add %o3, %o2, %o3 /* branch target */ NL\
add %o4, 4, %o5 /* oldnpc + 4 */ NL\
add %o4, 8, %g1 /* oldnpc + 8 */ NL\
mov##_opc _cc, %o4, %o5 /* overwrite pc if branch taken */ NL\
mov##_opc _cc, %o3, %g1 /* overwrite npc if branch taken */ NL\
stx_Rpc (%o5) /* no annul ds not squashed */ NL\
retl NL\
stx_Rnpc(%g1)
/* There has to be a better way than to ennunciate every instruction form !! */
/* icc version */
IMPL( sparcv9_bne_icc )
BRANCH( ne, %icc )
ENDINSTN
IMPL( sparcv9_be_icc )
BRANCH( e, %icc )
ENDINSTN
IMPL( sparcv9_bg_icc )
BRANCH( g, %icc )
ENDINSTN
IMPL( sparcv9_ble_icc )
BRANCH( le, %icc )
ENDINSTN
IMPL( sparcv9_bge_icc )
BRANCH( ge, %icc )
ENDINSTN
IMPL( sparcv9_bl_icc )
BRANCH( l, %icc )
ENDINSTN
IMPL( sparcv9_bgu_icc )
BRANCH( gu, %icc )
ENDINSTN
IMPL( sparcv9_bleu_icc )
BRANCH( leu, %icc )
ENDINSTN
IMPL( sparcv9_bcc_icc )
BRANCH( cc, %icc )
ENDINSTN
IMPL( sparcv9_bcs_icc )
BRANCH( cs, %icc )
ENDINSTN
IMPL( sparcv9_bpos_icc )
BRANCH( pos, %icc )
ENDINSTN
IMPL( sparcv9_bneg_icc )
BRANCH( neg, %icc )
ENDINSTN
IMPL( sparcv9_bvc_icc )
BRANCH( vc, %icc )
ENDINSTN
IMPL( sparcv9_bvs_icc )
BRANCH( vs, %icc )
ENDINSTN
/* xcc versions */
IMPL( sparcv9_bne_xcc )
BRANCH( ne, %xcc )
ENDINSTN
IMPL( sparcv9_be_xcc )
BRANCH( e, %xcc )
ENDINSTN
IMPL( sparcv9_bg_xcc )
BRANCH( g, %xcc )
ENDINSTN
IMPL( sparcv9_ble_xcc )
BRANCH( le, %xcc )
ENDINSTN
IMPL( sparcv9_bge_xcc )
BRANCH( ge, %xcc )
ENDINSTN
IMPL( sparcv9_bl_xcc )
BRANCH( l, %xcc )
ENDINSTN
IMPL( sparcv9_bgu_xcc )
BRANCH( gu, %xcc )
ENDINSTN
IMPL( sparcv9_bleu_xcc )
BRANCH( leu, %xcc )
ENDINSTN
IMPL( sparcv9_bcc_xcc )
BRANCH( cc, %xcc )
ENDINSTN
IMPL( sparcv9_bcs_xcc )
BRANCH( cs, %xcc )
ENDINSTN
IMPL( sparcv9_bpos_xcc )
BRANCH( pos, %xcc )
ENDINSTN
IMPL( sparcv9_bneg_xcc )
BRANCH( neg, %xcc )
ENDINSTN
IMPL( sparcv9_bvc_xcc )
BRANCH( vc, %xcc )
ENDINSTN
IMPL( sparcv9_bvs_xcc )
BRANCH( vs, %xcc )
ENDINSTN
/*
* Annulled delay slot versions !!
*/
/* icc version */
IMPL( sparcv9_bne_icc_an )
BRANCH_an( ne, %icc )
ENDINSTN
IMPL( sparcv9_be_icc_an )
BRANCH_an( e, %icc )
ENDINSTN
IMPL( sparcv9_bg_icc_an )
BRANCH_an( g, %icc )
ENDINSTN
IMPL( sparcv9_ble_icc_an )
BRANCH_an( le, %icc )
ENDINSTN
IMPL( sparcv9_bge_icc_an )
BRANCH_an( ge, %icc )
ENDINSTN
IMPL( sparcv9_bl_icc_an )
BRANCH_an( l, %icc )
ENDINSTN
IMPL( sparcv9_bgu_icc_an )
BRANCH_an( gu, %icc )
ENDINSTN
IMPL( sparcv9_bleu_icc_an )
BRANCH_an( leu, %icc )
ENDINSTN
IMPL( sparcv9_bcc_icc_an )
BRANCH_an( cc, %icc )
ENDINSTN
IMPL( sparcv9_bcs_icc_an )
BRANCH_an( cs, %icc )
ENDINSTN
IMPL( sparcv9_bpos_icc_an )
BRANCH_an( pos, %icc )
ENDINSTN
IMPL( sparcv9_bneg_icc_an )
BRANCH_an( neg, %icc )
ENDINSTN
IMPL( sparcv9_bvc_icc_an )
BRANCH_an( vc, %icc )
ENDINSTN
IMPL( sparcv9_bvs_icc_an )
BRANCH_an( vs, %icc )
ENDINSTN
/* xcc versions */
IMPL( sparcv9_bne_xcc_an )
BRANCH_an( ne, %xcc )
ENDINSTN
IMPL( sparcv9_be_xcc_an )
BRANCH_an( e, %xcc )
ENDINSTN
IMPL( sparcv9_bg_xcc_an )
BRANCH_an( g, %xcc )
ENDINSTN
IMPL( sparcv9_ble_xcc_an )
BRANCH_an( le, %xcc )
ENDINSTN
IMPL( sparcv9_bge_xcc_an )
BRANCH_an( ge, %xcc )
ENDINSTN
IMPL( sparcv9_bl_xcc_an )
BRANCH_an( l, %xcc )
ENDINSTN
IMPL( sparcv9_bgu_xcc_an )
BRANCH_an( gu, %xcc )
ENDINSTN
IMPL( sparcv9_bleu_xcc_an )
BRANCH_an( leu, %xcc )
ENDINSTN
IMPL( sparcv9_bcc_xcc_an )
BRANCH_an( cc, %xcc )
ENDINSTN
IMPL( sparcv9_bcs_xcc_an )
BRANCH_an( cs, %xcc )
ENDINSTN
IMPL( sparcv9_bpos_xcc_an )
BRANCH_an( pos, %xcc )
ENDINSTN
IMPL( sparcv9_bneg_xcc_an )
BRANCH_an( neg, %xcc )
ENDINSTN
IMPL( sparcv9_bvc_xcc_an )
BRANCH_an( vc, %xcc )
ENDINSTN
IMPL( sparcv9_bvs_xcc_an )
BRANCH_an( vs, %xcc )
ENDINSTN
#undef BRANCH
#undef BRANCH_an
/*
* versions for the branch on register value operations
*/
/*
* Policy
no annul:
pc = npc
npc = target | npc + 4;
annul:
not taken: taken:
pc = oldnpc+4 pc = oldnpc
npc = oldnpc + 8; npc = target
*/
#define BRANCH( _opc ) \
ldx_Rsrc1(%o2) NL\
ldx_Rpc (%o3) NL\
ldx_Rnpc(%o4) NL\
ld_BrRegOff32(%o5) NL\
stx_Rpc (%o4) /* pc = npc */ NL\
add %o3, %o5, %o5 /* branch target */ NL\
add %o4, 4, %o4 /* npc + 4 */ NL\
movr##_opc %o2, %o5, %o4 /* overwrite npc if branch taken */ NL\
retl NL\
stx_Rnpc(%o4)
#define BRANCH_an( _opc ) \
ldx_Rsrc1(%o2) NL\
ldx_Rpc (%o3) NL\
ldx_Rnpc(%o4) NL\
add %o4, 4, %o5 /* oldnpc + 4 */ NL\
movr##_opc %o2, %o4, %o5 /* overwrite pc if branch taken */ NL\
stx_Rpc (%o5) /* no annul ds not squashed */ NL\
ld_BrRegOff32(%o5) NL\
add %o3, %o5, %o3 /* branch target */ NL\
add %o4, 8, %o5 /* oldnpc + 8 */ NL\
movr##_opc %o2, %o3, %o5 /* overwrite npc if branch taken */ NL\
retl NL\
stx_Rnpc(%o5)
IMPL( sparcv9_brz )
BRANCH( z )
ENDINSTN
IMPL( sparcv9_brlez )
BRANCH( lez )
ENDINSTN
IMPL( sparcv9_brlz )
BRANCH( lz )
ENDINSTN
IMPL( sparcv9_brnz )
BRANCH( nz )
ENDINSTN
IMPL( sparcv9_brgz )
BRANCH( gz )
ENDINSTN
IMPL( sparcv9_brgez )
BRANCH( gez )
ENDINSTN
IMPL( sparcv9_brz_an )
BRANCH_an( z )
ENDINSTN
IMPL( sparcv9_brlez_an )
BRANCH_an( lez )
ENDINSTN
IMPL( sparcv9_brlz_an )
BRANCH_an( lz )
ENDINSTN
IMPL( sparcv9_brnz_an )
BRANCH_an( nz )
ENDINSTN
IMPL( sparcv9_brgz_an )
BRANCH_an( gz )
ENDINSTN
IMPL( sparcv9_brgez_an )
BRANCH_an( gez )
ENDINSTN
/*
* SPARC floating point compares
*/
IMPL( sparcv9_fcmps_fcc0 )
FPOP_cmp( ld, fcmps, fcc0 )
ENDINSTN
IMPL( sparcv9_fcmps_fcc1 )
FPOP_cmp( ld, fcmps, fcc1 )
ENDINSTN
IMPL( sparcv9_fcmps_fcc2 )
FPOP_cmp( ld, fcmps, fcc2 )
ENDINSTN
IMPL( sparcv9_fcmps_fcc3 )
FPOP_cmp( ld, fcmps, fcc3 )
ENDINSTN
IMPL( sparcv9_fcmpd_fcc0 )
FPOP_cmp( ldd, fcmpd, fcc0 )
ENDINSTN
IMPL( sparcv9_fcmpd_fcc1 )
FPOP_cmp( ldd, fcmpd, fcc1 )
ENDINSTN
IMPL( sparcv9_fcmpd_fcc2 )
FPOP_cmp( ldd, fcmpd, fcc2 )
ENDINSTN
IMPL( sparcv9_fcmpd_fcc3 )
FPOP_cmp( ldd, fcmpd, fcc3 )
ENDINSTN
IMPL( sparcv9_fcmpes_fcc0 )
FPOP_cmp( ld, fcmpes, fcc0 )
ENDINSTN
IMPL( sparcv9_fcmpes_fcc1 )
FPOP_cmp( ld, fcmpes, fcc1 )
ENDINSTN
IMPL( sparcv9_fcmpes_fcc2 )
FPOP_cmp( ld, fcmpes, fcc2 )
ENDINSTN
IMPL( sparcv9_fcmpes_fcc3 )
FPOP_cmp( ld, fcmpes, fcc3 )
ENDINSTN
IMPL( sparcv9_fcmped_fcc0 )
FPOP_cmp( ldd, fcmped, fcc0 )
ENDINSTN
IMPL( sparcv9_fcmped_fcc1 )
FPOP_cmp( ldd, fcmped, fcc1 )
ENDINSTN
IMPL( sparcv9_fcmped_fcc2 )
FPOP_cmp( ldd, fcmped, fcc2 )
ENDINSTN
IMPL( sparcv9_fcmped_fcc3 )
FPOP_cmp( ldd, fcmped, fcc3 )
ENDINSTN