// ========== Copyright Header Begin ==========================================
// OpenSPARC T2 Processor File: SS_Strand.cc
// Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
// The above named program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public
// License version 2 as published by the Free Software Foundation.
// The above named program is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
// You should have received a copy of the GNU General Public
// License along with this work; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
// ========== Copyright Header End ============================================
extern "C" SS_Vaddr
ss_run_dec( SS_Vaddr pc
, SS_Vaddr npc
, SS_Strand
* s
, SS_Instr
* i
)/*{{{*/
SS_Decode d
= s
->dec_table
->decode(i
->opc
);
return (d
)(pc
,npc
,s
,i
,i
->opc());
extern "C" SS_Vaddr
ss_ibe_dec( SS_Vaddr pc
, SS_Vaddr npc
, SS_Strand
* s
, SS_Instr
* i
)/*{{{*/
if (s
->inst_breakpoint_hit(i
->opc()))
return (s
->trap
)(pc
,npc
,s
,i
,SS_Trap::INSTRUCTION_BREAKPOINT
);
SS_Decode d
= s
->dec_table
->decode(i
->opc
);
return (d
)(pc
,npc
,s
,i
,i
->opc());
extern "C" SS_Vaddr
ss_break_inst_va_dec( SS_Vaddr pc
, SS_Vaddr npc
, SS_Strand
* s
, SS_Instr
* i
)/*{{{*/
if (s
->skip_break_inst_va
|| !s
->test_break_inst_va(pc
))
s
->skip_break_inst_va
= false;
// If the breakpoint did not trigger then decode: ss_run_dec or
// ss_ibe_dec ... and execute the instruction.
pc
= (s
->inst_dec
)(pc
,npc
,s
,i
);
// Undo the decode execute caching to make sure resume after
// breakpoint hits the breakpoint again.
i
->exe
= ss_break_inst_va_dec
;
class SS_FailTte
: public SS_Tte
/*{{{*/
class SS_PhysTte
: public SS_Tte
/*{{{*/
tte_flags
= VALID_BIT
; // Make sure we can use valid_bit() test on tte
static SS_FailTte fail_tte
;
static SS_FailTte junk_tte
;
// For pa2pa translations we have two areas, memory and i/o. For some
// product we use a third one which is only used when pstate.am masking
// is applied in pa2pa mode.
static SS_PhysTte phys_tte_mem
;
static SS_PhysTte phys_tte_io
;
static SS_PhysTte phys_tte_mem_am
;
static uint8_t ss_stpartial16
[] = /*{{{*/
static uint8_t ss_stpartial32
[] = /*{{{*/
SS_Strand::SS_Strand( SS_Node
& _parent
, const char* _name
, /*{{{*/
SS_Execute run_exe_table_init
[],
SS_Memop mem_run_table_init
[][4],
SS_Memop mem_trc_table_init
[][4],
SS_MemErrDetector
& _mem_err_detector
)
run_exe_table_ref(run_exe_table_init
),
mem_run_table_ref(mem_run_table_init
),
mem_trc_table_ref(mem_trc_table_init
),
mem_err_detector(_mem_err_detector
),
memory(&SS_Memory::memory
),
inst_wp_va_addr(1), // bit0 == 1 is disabled
trc_inst_tte(&::junk_tte
),
phys_tte_mem(&::phys_tte_mem
),
phys_tte_io(&::phys_tte_io
),
phys_tte_mem_am(&::phys_tte_mem_am
),
sim_update(ss_sim_update
),
trap((SS_TrapFun
)ss_trap
),
invalid_asi(0), // ToDo provide default routine
change_running_from_snapshot(false),
skip_break_inst_va(false),
inst_cache_va_pri_priv(0),
inst_cache_va_nuc_nuc_nuc_priv(0),
inst_cache_va_nuc_nuc_sec_priv(0),
inst_cache_va_nuc_pri_sec_priv(0),
inst_cache_va_pri_user(0),
inst_cache_va_nuc_user(0),
inst_cache_ra_pri_user(0),
inst_cache_ra_nuc_user(0),
inst_cache_ra_pri_priv(0),
inst_cache_ra_nuc_priv(0),
ras_rs1(0), // Ras Irf hooks
ras_frs1(0), // Single precison FP RAS hooks
ras_drs1(0), // Double precison FP RAS hooks
model(0), // derived strand sets pointer model
, data_tlb_read_skip(false)
inst_cache_va_pri_priv
= (SS_InstrCache
*)ss_memalign(1024,sizeof(SS_InstrCache
));
inst_cache_va_nuc_nuc_nuc_priv
= (SS_InstrCache
*)ss_memalign(1024,sizeof(SS_InstrCache
));
inst_cache_va_nuc_nuc_sec_priv
= (SS_InstrCache
*)ss_memalign(1024,sizeof(SS_InstrCache
));
inst_cache_va_nuc_pri_sec_priv
= (SS_InstrCache
*)ss_memalign(1024,sizeof(SS_InstrCache
));
inst_cache_va_pri_user
= (SS_InstrCache
*)ss_memalign(1024,sizeof(SS_InstrCache
));
inst_cache_va_nuc_user
= (SS_InstrCache
*)ss_memalign(1024,sizeof(SS_InstrCache
));
inst_cache_ra_pri_user
= (SS_InstrCache
*)ss_memalign(1024,sizeof(SS_InstrCache
));
inst_cache_ra_nuc_user
= (SS_InstrCache
*)ss_memalign(1024,sizeof(SS_InstrCache
));
inst_cache_ra_pri_priv
= (SS_InstrCache
*)ss_memalign(1024,sizeof(SS_InstrCache
));
inst_cache_ra_nuc_priv
= (SS_InstrCache
*)ss_memalign(1024,sizeof(SS_InstrCache
));
inst_cache_pa
= (SS_InstrCache
*)ss_memalign(1024,sizeof(SS_InstrCache
));
inst_cache_va_pri_priv
->init ("vpp", &::junk_tte
);
inst_cache_va_nuc_nuc_nuc_priv
->init("vnp-00",&::junk_tte
);
inst_cache_va_nuc_nuc_sec_priv
->init("vnp-0s",&::junk_tte
);
inst_cache_va_nuc_pri_sec_priv
->init("vnp-ps",&::junk_tte
);
inst_cache_va_pri_user
->init ("vpu", &::junk_tte
);
inst_cache_va_nuc_user
->init ("vnu", &::junk_tte
);
inst_cache_ra_pri_user
->init ("rpu", &::junk_tte
);
inst_cache_ra_nuc_user
->init ("rnu", &::junk_tte
);
inst_cache_ra_pri_priv
->init ("rpp", &::junk_tte
);
inst_cache_ra_nuc_priv
->init ("rnp", &::junk_tte
);
inst_cache_pa
->init ("p--", &::junk_tte
);
wrf
= (uint64_t*)ss_memalign(64,sizeof(uint64_t) * (MAX_WP
+ 1) * 16);
wrf_ecc
= (BL_EccBits
*)ss_malloc( sizeof(BL_EccBits
) * (MAX_WP
+ 1) * 16);
grf
= (uint64_t*)ss_memalign(64,sizeof(uint64_t) * (MAX_GL
+ 1) * 8);
grf_ecc
= (BL_EccBits
*)ss_malloc( sizeof(BL_EccBits
) * (MAX_GL
+ 1) * 8);
memset(irf
,0,32 * sizeof(uint64_t));
memset(drf
,0,32 * sizeof(uint64_t));
memset(wrf
,0,(MAX_WP
+ 1) * 16 * sizeof(uint64_t));
memset(wrf_ecc
,0,(MAX_WP
+ 1) * 16 * sizeof(BL_EccBits
));
memset(grf
,0,(MAX_GL
+ 1) * 8 * sizeof(uint64_t));
memset(grf_ecc
,0,(MAX_GL
+ 1) * 8 * sizeof(BL_EccBits
));
rstv_addr
= 0xfffffffff0000000;
sim_state
.fp_disabled(1);
stpartial16
[h
] = ss_stpartial16
[h
];
stpartial32
[w
] = ss_stpartial32
[w
];
inst_iw
[0].mask_data
= 0;
inst_iw
[1].mask_data
= 0;
ras_enable
= default_ras_enable
;
v8_trap
= trap_v9_to_v8plus
;
v8_inst_trap
= inst_trap_v9_to_v8plus
;
v8_data_trap
= data_trap_v9_to_v8plus
;
v8_data_mmu
= data_mmu_v9_to_v8plus
;
v8_invalid_asi
= invalid_asi_v9_to_v8plus
;
v8_inst_dec
= inst_dec_v9_to_v8plus
;
SS_Strand::~SS_Strand()/*{{{*/
void SS_Strand::hard_reset()/*{{{*/
ss_trap(0,0,this,0,SS_Trap::POWER_ON_RESET
);
void SS_Strand::warm_reset(bool intp
)/*{{{*/
// ToDo we need a better trap type for this and handle the
// common warm reset there like hard and xtrn reset
ss_trap(0,0,this,0,SS_Trap::POWER_ON_RESET
);
void SS_Strand::xtrn_reset()/*{{{*/
ss_trap(0,0,this,0,SS_Trap::EXTERNALLY_INITIATED_RESET
);
const char* SS_Strand::ss_get_state_name( SS_Strand
* s
, SS_Registers::Index index
)/*{{{*/
if (SS_Registers::is_asr(index
))
case SS_Registers::ASR_Y
: return s
->y
.name();
case SS_Registers::ASR_CCR
: return s
->ccr
.name();
case SS_Registers::ASR_ASI
: return "asi"; // Internal name is asi_reg ...
case SS_Registers::ASR_TICK
: return s
->tick
.name();
case SS_Registers::ASR_PC
: return s
->pc
.name();
case SS_Registers::ASR_FPRS
: return s
->fprs
.name();
case SS_Registers::ASR_GSR
: return s
->gsr
.name();
case SS_Registers::ASR_SOFTINT_SET
: return SS_SoftintSet().name();
case SS_Registers::ASR_SOFTINT_CLR
: return SS_SoftintClr().name();
case SS_Registers::ASR_SOFTINT
: return s
->softint
.name();
case SS_Registers::ASR_TICK_CMPR
: return s
->tick_cmpr
.name();
case SS_Registers::ASR_STICK
: return s
->stick
.name();
case SS_Registers::ASR_STICK_CMPR
: return s
->stick_cmpr
.name();
else if (SS_Registers::is_pr(index
))
case SS_Registers::PR_TPC
: return s
->tpc
.name();
case SS_Registers::PR_TNPC
: return s
->tnpc
.name();
case SS_Registers::PR_TSTATE
: return s
->tstate
.name();
case SS_Registers::PR_TT
: return s
->tt
.name();
case SS_Registers::PR_TICK
: return "pr_tick"; // Avoid cases with same name
case SS_Registers::PR_TBA
: return s
->tba
.name();
case SS_Registers::PR_PSTATE
: return s
->pstate
.name();
case SS_Registers::PR_TL
: return s
->tl
.name();
case SS_Registers::PR_PIL
: return s
->pil
.name();
case SS_Registers::PR_CWP
: return s
->cwp
.name();
case SS_Registers::PR_CANSAVE
: return s
->cansave
.name();
case SS_Registers::PR_CANRESTORE
: return s
->canrestore
.name();
case SS_Registers::PR_CLEANWIN
: return s
->cleanwin
.name();
case SS_Registers::PR_OTHERWIN
: return s
->otherwin
.name();
case SS_Registers::PR_WSTATE
: return s
->wstate
.name();
case SS_Registers::PR_GL
: return s
->gl
.name();
else if (SS_Registers::is_hpr(index
))
case SS_Registers::HPR_HPSTATE
: return s
->hpstate
.name();
case SS_Registers::HPR_HTSTATE
: return s
->htstate
.name();
case SS_Registers::HPR_HINTP
: return s
->hintp
.name();
case SS_Registers::HPR_HTBA
: return s
->htba
.name();
case SS_Registers::HPR_HVER
: return s
->hver
.name();
case SS_Registers::HPR_HSTICK_CMPR
: return s
->hstick_cmpr
.name();
else if (SS_Registers::is_sim(index
))
case SS_Registers::SIM_MAX_WP
: return s
->max_wp
.name();
case SS_Registers::SIM_MAX_TL
: return s
->max_tl
.name();
case SS_Registers::SIM_MAX_PTL
: return s
->max_ptl
.name();
case SS_Registers::SIM_MAX_GL
: return s
->max_gl
.name();
case SS_Registers::SIM_MAX_PGL
: return s
->max_pgl
.name();
case SS_Registers::SIM_RSTV_ADDR
: return s
->rstv_addr
.name();
case SS_Registers::SIM_NPC
: return s
->npc
.name();
case SS_Registers::SIM_FSR
: return s
->fsr
.name();
case SS_Registers::SIM_STATE
: return s
->sim_state
.name();
case SS_Registers::SIM_STRAND_ID
: return s
->strand_id
.name();
case SS_Registers::SIM_PA_BITS
: return s
->pa_bits
.name();
case SS_Registers::SIM_VA_BITS
: return s
->va_bits
.name();
case SS_Registers::SIM_INST_COUNT
: return s
->inst_count
.name();
else if (SS_Registers::is_irf(index
))
return SS_Registers::irf_name
[index
- SS_Registers::IRF_OFS
];
else if (SS_Registers::is_drf(index
))
return SS_Registers::drf_name
[index
- SS_Registers::DRF_OFS
];
else if (SS_Registers::is_frf(index
))
return SS_Registers::frf_name
[index
- SS_Registers::FRF_OFS
];
SS_Registers::Error
SS_Strand::ss_get_state( SS_Strand
* s
, SS_Registers::Index index
, uint64_t* value
)/*{{{*/
if (SS_Registers::is_irf(index
))
*value
= s
->irf
[index
- SS_Registers::IRF_OFS
];
else if (SS_Registers::is_drf(index
))
*value
= s
->drf
[index
- SS_Registers::DRF_OFS
];
else if (SS_Registers::is_frf(index
))
*value
= s
->get_frf(SS_Strand::freg_idx2off(index
- SS_Registers::FRF_OFS
));
// For processors that do not implement the full 64bit for virtual address
// we need to make sure that the unimplemented bits are sign extended based
// on bit[va_bits() - 1]. This is done for pc, npc, tpc, tnpc
uint_t sft
= 64 - s
->va_bits();
case SS_Registers::ASR_Y
: *value
= s
->y(); break;
case SS_Registers::ASR_CCR
: *value
= s
->ccr(); break;
case SS_Registers::ASR_ASI
: *value
= s
->asi(); break;
case SS_Registers::ASR_TICK
: *value
= s
->tick(); break;
case SS_Registers::ASR_PC
: *value
= int64_t(s
->pc() << sft
) >> sft
; break;
case SS_Registers::ASR_FPRS
: *value
= s
->fprs(); break;
case SS_Registers::ASR_GSR
: *value
= s
->gsr(); break;
case SS_Registers::ASR_SOFTINT
: *value
= s
->softint(); break;
case SS_Registers::ASR_SOFTINT_CLR
: *value
= s
->softint(); break;
case SS_Registers::ASR_SOFTINT_SET
: *value
= s
->softint(); break;
case SS_Registers::ASR_TICK_CMPR
: *value
= s
->tick_cmpr(); break;
case SS_Registers::ASR_STICK
: *value
= s
->stick(); break;
case SS_Registers::ASR_STICK_CMPR
: *value
= s
->stick_cmpr(); break;
case SS_Registers::PR_TICK
: *value
= s
->tick(); break;
case SS_Registers::PR_TBA
: *value
= s
->tba(); break;
case SS_Registers::PR_PSTATE
: *value
= s
->pstate(); break;
case SS_Registers::PR_TL
: *value
= s
->tl(); break;
case SS_Registers::PR_PIL
: *value
= s
->pil(); break;
case SS_Registers::PR_CWP
: *value
= s
->cwp(); break;
case SS_Registers::PR_CANSAVE
: *value
= s
->cansave(); break;
case SS_Registers::PR_CANRESTORE
: *value
= s
->canrestore(); break;
case SS_Registers::PR_CLEANWIN
: *value
= s
->cleanwin(); break;
case SS_Registers::PR_OTHERWIN
: *value
= s
->otherwin(); break;
case SS_Registers::PR_WSTATE
: *value
= s
->wstate(); break;
case SS_Registers::PR_GL
: *value
= s
->gl(); break;
case SS_Registers::HPR_HPSTATE
: *value
= s
->hpstate(); break;
case SS_Registers::HPR_HINTP
: *value
= s
->hintp(); break;
case SS_Registers::HPR_HTBA
: *value
= s
->htba(); break;
case SS_Registers::HPR_HVER
: *value
= s
->hver(); break;
case SS_Registers::HPR_HSTICK_CMPR
: *value
= s
->hstick_cmpr(); break;
case SS_Registers::SIM_MAX_WP
: *value
= s
->max_wp(); break;
case SS_Registers::SIM_MAX_TL
: *value
= s
->max_tl(); break;
case SS_Registers::SIM_MAX_PTL
: *value
= s
->max_ptl(); break;
case SS_Registers::SIM_MAX_GL
: *value
= s
->max_gl(); break;
case SS_Registers::SIM_MAX_PGL
: *value
= s
->max_pgl(); break;
case SS_Registers::SIM_RSTV_ADDR
: *value
= s
->rstv_addr(); break;
case SS_Registers::SIM_NPC
: *value
= int64_t(s
->npc() << sft
) >> sft
; break;
case SS_Registers::SIM_STATE
: *value
= s
->sim_state(); break;
case SS_Registers::SIM_STRAND_ID
: *value
= s
->strand_id(); break;
case SS_Registers::SIM_PA_BITS
: *value
= s
->pa_bits(); break;
case SS_Registers::SIM_VA_BITS
: *value
= s
->va_bits(); break;
case SS_Registers::SIM_INST_COUNT
: *value
= s
->inst_count(); break;
case SS_Registers::SIM_FSR
:
case SS_Registers::PR_TPC
:
return SS_Registers::NOT_AVAILABLE
;
*value
= int64_t(s
->tpc() << sft
) >> sft
;
case SS_Registers::PR_TNPC
:
return SS_Registers::NOT_AVAILABLE
;
*value
= int64_t(s
->tnpc() << sft
) >> sft
;
case SS_Registers::PR_TSTATE
:
return SS_Registers::NOT_AVAILABLE
;
case SS_Registers::PR_TT
:
return SS_Registers::NOT_AVAILABLE
;
case SS_Registers::HPR_HTSTATE
:
return SS_Registers::NOT_AVAILABLE
;
return SS_Registers::NOT_AVAILABLE
;
SS_Registers::Error
SS_Strand::ss_set_state( SS_Strand
* s
, SS_Registers::Index index
, uint64_t value
)/*{{{*/
if (SS_Registers::is_irf(index
))
if (index
!= SS_Registers::G0
)
s
->irf
[index
- SS_Registers::IRF_OFS
] = value
;
else if (SS_Registers::is_drf(index
))
s
->drf
[index
- SS_Registers::DRF_OFS
] = value
;
else if (SS_Registers::is_frf(index
))
s
->get_frf(SS_Strand::freg_idx2off(index
- SS_Registers::FRF_OFS
)) = value
;
// For processors that do not implement the full 64bit for virtual address
// we need to make sure that the unimplemented bits are sign extended based
// on bit[va_bits() - 1]. This is done for pc, npc, tpc, tnpc, and also
// tba, htba and rstv_addr.
uint_t sft
= 64 - s
->va_bits();
case SS_Registers::ASR_Y
: s
->y
.set(value
); break;
case SS_Registers::ASR_CCR
: s
->ccr
.set(value
); break;
case SS_Registers::ASR_ASI
: s
->asi
.set(value
); break;
case SS_Registers::ASR_TICK
: s
->tick
.set(value
); break;
case SS_Registers::ASR_FPRS
: s
->fprs
.set(value
); break;
case SS_Registers::ASR_GSR
: s
->gsr
.set(value
); break;
case SS_Registers::ASR_TICK_CMPR
: s
->tick_cmpr
.set(value
); break;
case SS_Registers::ASR_STICK
: s
->stick
.set(value
); break;
case SS_Registers::ASR_STICK_CMPR
: s
->stick_cmpr
.set(value
); break;
case SS_Registers::PR_TICK
: s
->tick
.set(value
); break;
case SS_Registers::PR_TBA
: s
->tba
.set(int64_t(value
<< sft
) >> sft
); break;
case SS_Registers::PR_PSTATE
: s
->pstate
.set(value
); break;
case SS_Registers::PR_CANSAVE
: s
->cansave
.set(value
); break;
case SS_Registers::PR_CANRESTORE
: s
->canrestore
.set(value
); break;
case SS_Registers::PR_CLEANWIN
: s
->cleanwin
.set(value
); break;
case SS_Registers::PR_OTHERWIN
: s
->otherwin
.set(value
); break;
case SS_Registers::PR_WSTATE
: s
->wstate
.set(value
); break;
case SS_Registers::HPR_HPSTATE
: s
->hpstate
.set(value
); break;
case SS_Registers::HPR_HTBA
: s
->htba
.set(int64_t(value
<< sft
) >> sft
); break;
case SS_Registers::HPR_HVER
: s
->hver
.set(value
); break;
case SS_Registers::HPR_HSTICK_CMPR
: s
->hstick_cmpr
.set(value
); break;
case SS_Registers::SIM_RSTV_ADDR
: s
->rstv_addr
.set(int64_t(value
<< sft
) >> sft
); break;
case SS_Registers::SIM_NPC
: s
->npc
.set(int64_t(value
<< sft
) >> sft
); break;
case SS_Registers::SIM_STATE
: s
->sim_state
.set(value
); break;
case SS_Registers::SIM_INST_COUNT
: s
->inst_count
.set(value
); break;
// When the PC is set from the front end then we clean all the
// breakpoint related information that might be pending. This
// means that when we do sim.s0.pc = sim.s0.pc then we will hit
// the breakpoint on pc again if we just hit it.
case SS_Registers::ASR_PC
:
s
->skip_break_inst_va
= false;
s
->break_hit
->triggered
= false;
s
->pc
.set(int64_t(value
<< sft
) >> sft
);
case SS_Registers::PR_CWP
:
return SS_Registers::VALUE_OUT_OF_RANGE
;
case SS_Registers::PR_TL
:
return SS_Registers::VALUE_OUT_OF_RANGE
;
case SS_Registers::PR_GL
:
return SS_Registers::VALUE_OUT_OF_RANGE
;
case SS_Registers::SIM_FSR
:
s
->set_fsr(); // ToDo: Why do I keep fsr if I have to do get/set_fsr ?
case SS_Registers::PR_PIL
:
case SS_Registers::ASR_SOFTINT_SET
:
s
->softint
.set(s
->softint() | value
);
s
->irq
.update_softint(s
);
case SS_Registers::ASR_SOFTINT_CLR
:
s
->softint
.set(s
->softint() &~ value
);
s
->irq
.update_softint(s
);
case SS_Registers::ASR_SOFTINT
:
s
->irq
.update_softint(s
);
case SS_Registers::HPR_HINTP
:
s
->irq
.raise(s
,SS_Interrupt::BIT_HSTICK_MATCH
);
s
->irq
.retract(SS_Interrupt::BIT_HSTICK_MATCH
);
case SS_Registers::PR_TPC
:
return SS_Registers::NOT_AVAILABLE
;
s
->tpc
.set(int64_t(value
<< sft
) >> sft
);
case SS_Registers::PR_TNPC
:
return SS_Registers::NOT_AVAILABLE
;
s
->tnpc
.set(int64_t(value
<< sft
) >> sft
);
case SS_Registers::PR_TSTATE
:
return SS_Registers::NOT_AVAILABLE
;
case SS_Registers::PR_TT
:
return SS_Registers::NOT_AVAILABLE
;
case SS_Registers::HPR_HTSTATE
:
return SS_Registers::NOT_AVAILABLE
;
return SS_Registers::NOT_AVAILABLE
;
void SS_Strand::get_name( char* dst
)/*{{{*/
void SS_Strand::snapshot( SS_SnapShot
& ss
)/*{{{*/
// Before we dump the strand state we save all the
// duplicate state to get one coherent view of the strand.
// Save all the registers windows,
for (int wp
=0; wp
<= max_wp(); wp
++)
sprintf(ss
.tag
,"%s.wp.%d.l%d",prefix
,wp
,i
);
ss
.val(&wrf
[wp
* 16 + i
]);
sprintf(ss
.tag
,"%s.wp.%d.i%d",prefix
,wp
,i
);
ss
.val(&wrf
[wp
* 16 + 8 + i
]);
for (int gp
=0; gp
<= max_gl(); gp
++)
for (i
=1; i
<8; i
++) // skip %g0
sprintf(ss
.tag
,"%s.gl.%d.g%d",prefix
,gp
,i
);
ss
.val(&grf
[gp
* 8 + i
]);
sprintf(ss
.tag
,"%s.d%d",prefix
,i
);
// Save the trap stack (tl=0 is never used)
for (int tp
=1; tp
<= max_tl(); tp
++)
sprintf(ss
.tag
,"%s.tl.%d.pc",prefix
,tp
); ss
.val(&trap_state
[tp
].pc
);
sprintf(ss
.tag
,"%s.tl.%d.npc",prefix
,tp
); ss
.val(&trap_state
[tp
].npc
);
sprintf(ss
.tag
,"%s.tl.%d.tstate",prefix
,tp
); ss
.val(&trap_state
[tp
].tstate
);
sprintf(ss
.tag
,"%s.tl.%d.htstate",prefix
,tp
); ss
.val(&trap_state
[tp
].htstate
);
sprintf(ss
.tag
,"%s.tl.%d.tt",prefix
,tp
); ss
.val(&trap_state
[tp
].tt
);
// tpc, tnpc, tstat, htstate, and tt get saved/restored by tl_save()/tl_load()
// respectively an that got save above. So we don't have to handle those here.
sprintf(ss
.tag
,"%s.%s",prefix
,pc
.name()); pc
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,npc
.name()); npc
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,gsr
.name()); gsr
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,tick
.name()); tick
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,stick
.name()); stick
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,tick_cmpr
.name()); tick_cmpr
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,stick_cmpr
.name()); stick_cmpr
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,hstick_cmpr
.name()); hstick_cmpr
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,softint
.name()); softint
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,tba
.name()); tba
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,htba
.name()); htba
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,rstv_addr
.name()); rstv_addr
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,hver
.name()); hver
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,y
.name()); y
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,pstate
.name()); pstate
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,hpstate
.name()); hpstate
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,sim_state
.name()); sim_state
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,ccr
.name()); ccr
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,asi
.name()); asi
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,fprs
.name()); fprs
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,tl
.name()); tl
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,gl
.name()); gl
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,cwp
.name()); cwp
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,cansave
.name()); cansave
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,canrestore
.name()); canrestore
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,cleanwin
.name()); cleanwin
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,otherwin
.name()); otherwin
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,wstate
.name()); wstate
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,pil
.name()); pil
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,hintp
.name()); hintp
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,fsr
.name()); fsr
.snapshot(ss
);
sprintf(ss
.tag
,"%s.halted",prefix
); ss
.val(&halted
);
sprintf(ss
.tag
,"%s.%s",prefix
,"inst_dft_asi"); inst_dft_asi
.snapshot(ss
);
sprintf(ss
.tag
,"%s.%s",prefix
,"data_dft_asi"); data_dft_asi
.snapshot(ss
);
for (int ps
=0; ps
< 8; ps
++)
sprintf(ss
.tag
,"%s.scratch.%d",prefix
,ps
);
for (int hs
=0; hs
< 8; hs
++)
sprintf(ss
.tag
,"%s.hscratch.%d",prefix
,hs
);
ss
.val(&hscratchpad
[hs
]);
// Now restore the state that didn't get saved back to
// sensible values. Flush the decode caches, and update
// the simulator state (which decode cache to use etc)
// We flush the decode caches on snapshot, to make running
// from saved snapshot behave identical to run after dump.
SS_BreakPoint::Error
SS_Strand::break_delete( SS_BreakPoint::Ident id
)/*{{{*/
for (SS_BreakPoint
* self
= break_points
; self
; self
= self
->next
)
(prev
? prev
->next
: break_points
) = self
->next
;
case SS_BreakPoint::ON_TRAP
:
self
->unlink((SS_BreakPoint
**)&break_trap
[((SS_BreakTrap
*)self
)->tt
]);
case SS_BreakPoint::ON_RED_MODE
:
self
->unlink((SS_BreakPoint
**)&break_red_mode
);
case SS_BreakPoint::ON_INST_VA
:
self
->unlink((SS_BreakPoint
**)&break_inst_va
);
return SS_BreakPoint::OK
;
return SS_BreakPoint::ID_UNKNOWN
;
SS_BreakPoint::Error
SS_Strand::break_enable( SS_BreakPoint::Ident id
)/*{{{*/
for (SS_BreakPoint
* self
= break_points
; self
; self
= self
->next
)
return SS_BreakPoint::OK
;
return SS_BreakPoint::ID_UNKNOWN
;
SS_BreakPoint::Error
SS_Strand::break_disable( SS_BreakPoint::Ident id
)/*{{{*/
for (SS_BreakPoint
* self
= break_points
; self
; self
= self
->next
)
return SS_BreakPoint::OK
;
return SS_BreakPoint::ID_UNKNOWN
;
SS_BreakPoint::Ident
SS_Strand::break_on_trap( uint_t _tt
)/*{{{*/
SS_BreakTrap
* bp
= new SS_BreakTrap(SS_Trap::Type(_tt
),break_points
);
bp
->link
= break_trap
[_tt
];
SS_BreakPoint::Ident
SS_Strand::break_on_red_mode()/*{{{*/
SS_BreakRedMode
* bp
= new SS_BreakRedMode(break_points
);
bp
->link
= break_red_mode
;
SS_BreakPoint::Ident
SS_Strand::break_on_inst_va( SS_Vaddr va
)/*{{{*/
SS_BreakInstVa
* bp
= new SS_BreakInstVa(va
,break_points
);
bp
->link
= break_inst_va
;
void SS_Strand::tl_save()/*{{{*/
TrapState
* p
= &trap_state
[tl()];
void SS_Strand::tl_load()/*{{{*/
TrapState
* p
= &trap_state
[tl()];
void SS_Strand::merge_asi_map()/*{{{*/
parent
->merge_asi_map(asi_map
);
void SS_Strand::ss_sim_update( SS_Strand
* s
)/*{{{*/
// This routine is the main routine to keep all our fancy caches and
// things in sync and do special things only when they need to be done.
// (sim_update)(me) should get called whenever a state change happens
// that requires switching decode caches ets. E.g we track changes to
// pstate, hpstate, tl, fprs.fef (fpr fpu enabled), softint in
// combination with pstate.ie, and lsu_ctr (if used), etc.
bool inst_cache_flush
= false;
uint64_t prev_priv
= s
->sim_state
.priv();
// First figure out which privileged level we're at. Additionally,
// to safe tests, we check whether interrupts are enabled and allowed
// and whether trap level zero traps should be thrown.
s
->sim_state
.priv(SS_HPRV
);
// Some processor implement crosscall with interrupt_vector_trap
// that can be blocked in hypervisor by pstate.ie ... so if we're
// here and ca can deal with them ... just checking.
s
->sim_state
.priv(SS_PRIV
);
s
->sim_state
.priv(SS_USER
);
// Check for trap level zero condition here first. Launch a disrupting
// tlz trap when the tlz condition arises: note TLZ has high priority!
if (!s
->irq
.is_pending(SS_Interrupt::BIT_TRAP_LEVEL_ZERO
) && s
->hpstate
.tlz() && (s
->tl() == 0))
s
->irq
.raise(s
,SS_Interrupt::BIT_TRAP_LEVEL_ZERO
);
// Figure out which decode cache to use. We use multiple caches to
// allow a better hit rate and to safe on privileged checks in critical
// code (mainly memory ops). For ra2pa and va2pa we add extra decode
// caches to differentiate the used default data asi.
// We set the inst_mmu to the one with the correct translation mode.
// ToDo: perhaps we should keep the default data asi in the instruction
// cache so that the primary/nuclues can be folded into one and the
// little/big endian default asi test can use that too. Additionally
// we should do some performance analisys to see which modes are mainly
// used under solaris so that we can proper optimize for those, and
// use flushing for others. 9 decode caches is a little much ...
// Alternatively we might want to 'cache' a few context's ... so
// that we can keep a few users alive ... need to investigate context
// switch patters on s10 boot and application run
if ((s
->sim_state
.priv() == SS_HPRV
) || s
->hpstate
.red())
s
->inst_mmu
= s
->inst_mmu_pa
;
s
->inst_ctx
= s
->inst_ctx_pa
;
s
->inst_cache
= s
->inst_cache_pa
;
// We don't detect context switches in hyperprivileged mode.
// This means that we have to be carefull when caching data
// TTEs. In general when we are in hyperprivileged mode and
// the data mmu is not bypassing, the we don't cache the TTE.
if (!s
->sim_state
.inst_mmu())
s
->inst_mmu
= s
->inst_mmu_ra
;
s
->inst_ctx
= s
->inst_ctx_ra
;
if (s
->sim_state
.priv() == SS_PRIV
)
s
->inst_cache
= s
->inst_cache_ra_nuc_priv
;
s
->inst_cache
= s
->inst_cache_ra_pri_priv
;
s
->inst_cache
= s
->inst_cache_ra_nuc_user
;
s
->inst_cache
= s
->inst_cache_ra_pri_user
;
s
->inst_mmu
= s
->inst_mmu_va
;
s
->inst_ctx
= s
->inst_ctx_va
;
if (s
->sim_state
.priv() == SS_PRIV
)
if (s
->tl() || (s
->inst_ctx
.get_pri() == 0))
if (s
->data_ctx
.get() == 0)
s
->inst_cache
= s
->inst_cache_va_nuc_nuc_nuc_priv
;
else if (s
->data_ctx
.get_pri() == 0)
s
->inst_cache
= s
->inst_cache_va_nuc_nuc_sec_priv
;
s
->inst_cache
= s
->inst_cache_va_nuc_pri_sec_priv
;
s
->inst_cache
= s
->inst_cache_va_pri_priv
;
s
->inst_cache
= s
->inst_cache_va_nuc_user
;
s
->inst_cache
= s
->inst_cache_va_pri_user
;
if (s
->inst_cache
->data_ctx
.get() != s
->data_ctx
.get())
//fprintf(stderr,"CSD: %6s %d %d %016llx\n",s->inst_cache->id,s->sim_state.mode(),s->tl(),s->data_ctx.get());
s
->inst_cache
->data_ctx
= s
->data_ctx
;
// In red state we only use one decode cache as it's a rare state,
// the same one as in hyper privileged mode (for pa->pa immu).
// Note that we can be at user, privileged or hyper privileged level
// and be in red state. Thus when we enter or leave the red state
// or when privilege level changes whilst in red state, or when TL
// changes between zero and non-zero the decode cache is flushed.
// Additionally keep track of whether we entered red mode or not so
// that we can check breakpoints on red mode entry.
for (SS_BreakPoint
* bp
= s
->break_red_mode
; bp
; bp
= bp
->link
)
if (bp
->enabled
&& bp
->trigger(s
))
else if ((prev_priv
!= s
->sim_state
.priv())
|| ((s
->tl() == 0) && (s
->sim_state
.red_tl() != 0))
|| ((s
->tl() != 0) && (s
->sim_state
.red_tl() == 0)))
s
->sim_state
.red_tl(s
->tl());
// Set the default asi used for fetch and load/store operations
s
->inst_dft_asi
= SS_Asi::ASI_NUCLEUS
;
s
->data_dft_asi
= s
->pstate
.cle() ? SS_Asi::ASI_NUCLEUS_LITTLE
: SS_Asi::ASI_NUCLEUS
;
s
->inst_dft_asi
= SS_Asi::ASI_PRIMARY
;
s
->data_dft_asi
= s
->pstate
.cle() ? SS_Asi::ASI_PRIMARY_LITTLE
: SS_Asi::ASI_PRIMARY
;
// If the default data asi used in the instr cache changed from big to little
// or from little to big we flush the inst cache so that all load and stores
// with default asi pick up the correct asi value.
if (s
->inst_cache
->pstate_cle_flag
!= s
->pstate
.cle())
s
->inst_cache
->pstate_cle_flag
= s
->pstate
.cle();
// Keep track of whether we used 32bit mode or 64bit mode when
// fetching and excuting instructions. If the mode changed compared
// to the previous time the cache was used mode then we flush the cache.
// Note we have to do this mainly because we cache inst and data TTEs.
if (s
->inst_cache
->pstate_am_flag
!= s
->pstate
.am())
s
->inst_cache
->pstate_am_flag
= s
->pstate
.am();
// Keep track of whether pstate.tct was set or not, and if there
// is a change then flush the decode cache in question. The new
// decoded instruction will chech the pstate.tct bit and launch
// transfer control traps when apropriate.
if (s
->inst_cache
->pstate_tct_flag
!= s
->pstate
.tct())
s
->inst_cache
->pstate_tct_flag
= s
->pstate
.tct();
// Keep track of whether hpstate.ibe was set or not, and if there
// is a change then flush the decode cache in question and use a
// decoder that checks the opcode for match before decoding.
if (s
->hpstate
.ibe() || s
->sim_state
.ibe_sig())
s
->sim_state
.ib_enabled(1);
if (!s
->inst_cache
->hpstate_ibe_flag
)
s
->inst_cache
->hpstate_ibe_flag
= true;
s
->sim_state
.ib_enabled(0);
if (s
->inst_cache
->hpstate_ibe_flag
)
s
->inst_cache
->hpstate_ibe_flag
= false;
// Switch the decoder if ib enabled
if (s
->sim_state
.ib_enabled())
// Some products, like N2 for example reversed the instruction
// breakpoint and illegal instruction trap priority. For those
// products the decode functions do the checks all over the place.
// For product that are as SunSparc specifies we can simply flip
// the main decoder, e.g. check before you really decode. The
// others require a decode cache flush so that we go through the
// decode routines again. Some processors have no hpstate.ibe.
// These will have to use the SS_Signal::SET_INST_BRKPT method.
if (SS_Trap::table
[SS_Trap::INSTRUCTION_BREAKPOINT
].priority
<
SS_Trap::table
[SS_Trap::ILLEGAL_INSTRUCTION
].priority
)
s
->inst_dec
= ss_ibe_dec
;
s
->save_dec
= ss_ibe_dec
;
s
->inst_dec
= ss_run_dec
;
s
->save_dec
= ss_run_dec
;
// Nuke the tte pointers in the current decode cache so that we
// start from fresh, as previous cached contents is likely invalid.
for (int l
=0; l
< SS_InstrCache::SIZE
; l
++)
s
->inst_cache
->tag
[l
].tte
= &::junk_tte
;
// Check for fpu enabled or not. We throw the two flags into
// a single flag to make check for enabled fpu easier and faster.
// ToDo the disabled check should be part of decode and we
// should flush the decode cache on enable/disable. This so that
// we can finish a proper ill_ibe implementation.
if (s
->sim_state
.fp_disabled())
if (s
->fprs
.fef() && s
->pstate
.pef())
s
->sim_state
.fp_disabled(0);
if (!(s
->fprs
.fef() && s
->pstate
.pef()))
s
->sim_state
.fp_disabled(1);
// Set the current address mask value to be used (v8 compatible mode)
s
->mask_pstate_am
= ~uint32_t(0);
s
->mask_pstate_am
= ~uint64_t(0);
// Make sure the first executed instruction does check the cached TTE or
// does a lookup in the TLB.
s
->inst_tte
= s
->fail_tte
;
// Now send a message that we need to exit the inner run_loop, and
// reenter if we need to execute more code. This is to propagate
// the configuration that was setup above.
s
->msg
.set_reenter_loop();
void SS_Strand::set_fsr()/*{{{*/
// For hardware floating point emulation we keep more then one version of the
// frs around. The fsr_run is the %fsr being used for the hardware floating
// point instruction. It has the tem=0 so traps don't occur during execution of
// the hardware floating point instruction, and aexc and cexc are cleared so
// we can find out if a trap occured. The simulated fsr.tem bits are kept in
// fsr_tem and the simulated fsr.eaxc and fsr.cexc are kept in fsr_exc.
// The final simulated fsr is composed by get_fsr().
fsr_run
.tem(0); // clear tem field so we don't take traps
fsr_tem
.cexc(fsr
.tem()); // put tem field in cexc so we don;t have to shift it
fsr_exc
.cexc(fsr
.cexc());
fsr_exc
.aexc(fsr
.aexc());
void SS_Strand::get_fsr()/*{{{*/
fsr
.aexc(fsr_exc
.aexc());
fsr
.cexc(fsr_exc
.cexc());
void SS_Strand::setup_tte_link_tables()/*{{{*/
inst_tte_link
= new SS_Chain
[inst_tlb
->size()];
inst_tte_link_size
= inst_tlb
->size();
data_tte_link
= new SS_Chain
[data_tlb
->size()];
data_tte_link_size
= data_tlb
->size();
void SS_Strand::flush_tte( SS_Tlb
* tlb
, SS_Tte
* tte
)/*{{{*/
// Note a tlb can be both inst and data tlb. So when we flush a
// tte from the decode cache we have to be carefull and check
// both inst and data flavors.
SS_Chain
* head
= &data_tte_link
[tte
->index
];
for (SS_Chain
* next
= head
->next
; head
!= next
; next
= next
->next
)
char* ptr_tag
= (char*)next
- ptr_ofs(SS_Instr
,lnk
) + ptr_ofs(SS_Instr
,tte
);
((SS_InstrCache::Tag
*)ptr_tag
)->tte
= &::junk_tte
;
SS_Chain
* head
= &inst_tte_link
[tte
->index
];
for (SS_Chain
* next
= head
->next
; head
!= next
; next
= next
->next
)
char* ptr_tag
= (char*)next
- ptr_ofs(SS_InstrCache::Tag
,lnk
);
((SS_InstrCache::Tag
*)ptr_tag
)->tte
= &::junk_tte
;
// Make sure we also clear the current used TTE by the inst_mmu.
// Not doing so will create a timing window in the inst_mmu in which
// a TTE can be reinserted after flush.
// Give the tte back when we are not in cosim mode
void SS_Strand::inst_tlb_set( SS_Tlb
* tlb
)/*{{{*/
assert(sim_state
.cosim());
void SS_Strand::flush_tte_all()/*{{{*/
// Flushes all used inst TTEs from the decode caches.
// This automatically invalidates all cached data TTEs.
// First wipe out all va2pa and ra2pa TTE's
for (uint_t i
=0; i
< inst_tte_link_size
; i
++)
SS_Chain
* head
= &inst_tte_link
[i
];
SS_Chain
* next
= head
->next
;
char* ptr_tag
= (char*)next
- ptr_ofs(SS_InstrCache::Tag
,lnk
);
((SS_InstrCache::Tag
*)ptr_tag
)->tte
= &::junk_tte
;
// For the pa2pa decode cache we keep a single link, just
// for the pedantic flush instruction's purpose.
SS_Chain
* next
= phys_tte_link
.next
;
if (&phys_tte_link
!= next
)
while (&phys_tte_link
!= next
)
char* ptr_tag
= (char*)next
- ptr_ofs(SS_InstrCache::Tag
,lnk
);
((SS_InstrCache::Tag
*)ptr_tag
)->tte
= &::junk_tte
;
// In case of separate inst and data tlb we can remove all data TTE links
// without looking as we have removed all instructions from the decode
// cache, e.g. all cached instructions are now invalid.
if (inst_tlb
!= data_tlb
)
for (uint_t i
=0; i
< data_tte_link_size
; i
++)
SS_Chain
* head
= &data_tte_link
[i
];
SS_Chain
* next
= head
->next
;
data_tte_link
[i
].unlink();
data_tte_link
[i
].clean();
void SS_Strand::flush_va( SS_Vaddr ea
)/*{{{*/
// This routine implements flush for va watchpoints and breakpoints.
uint_t line
= (ea
>> (SS_InstrCache::LINE_BITS
+ 2)) & SS_InstrCache::MASK
;
inst_cache_va_pri_user
->tag
[line
].tte
= &::junk_tte
;
inst_cache_va_nuc_user
->tag
[line
].tte
= &::junk_tte
;
inst_cache_va_pri_priv
->tag
[line
].tte
= &::junk_tte
;
inst_cache_va_nuc_nuc_nuc_priv
->tag
[line
].tte
= &::junk_tte
;
inst_cache_va_nuc_nuc_sec_priv
->tag
[line
].tte
= &::junk_tte
;
inst_cache_va_nuc_pri_sec_priv
->tag
[line
].tte
= &::junk_tte
;
inst_cache_ra_pri_user
->tag
[line
].tte
= &::junk_tte
;
inst_cache_ra_nuc_user
->tag
[line
].tte
= &::junk_tte
;
inst_cache_ra_pri_priv
->tag
[line
].tte
= &::junk_tte
;
inst_cache_ra_nuc_priv
->tag
[line
].tte
= &::junk_tte
;
inst_cache_pa
->tag
[line
].tte
= &::junk_tte
;
void SS_Strand::flush( SS_Paddr pa
, bool for_ras
)/*{{{*/
// This routine implements flush as sun sparc specifies. E.g. the
// flush instruction flushes 8 bytes from the caches at the given
// effective address ea. However, currently no product uses this, we just
// flush the whole decode cache instead.
// The routine is mainly used from the frontend when user write code to
assert(!for_ras
|| sim_state
.ras_enabled());
pa
>>= (SS_InstrCache::LINE_BITS
+ 2);
uint_t line
= pa
& SS_InstrCache::MASK
;
inst_cache_pa
->tag
[line
].tte_bits
|= RAS_TTE_POISON
;
inst_cache_pa
->tag
[line
].tte
= &::junk_tte
;
// The smallest pages size is 8KB so we loop through the caches
// in 8KB increments and invalidate the virtual TTEs. Note that we
// take the cacheline size into account. Since we flush a PA we
// need to make sure we hit all the aliases.
// Note, if flush causes more decode then we can consider keeping the
// PA of the cacheline as well and do a check before we flush.
// Also we do a lot of storing here, that's bound to be bad for L2.
// Is there a better way of doing this flush ...
const uint_t size8k
= 8192 >> (SS_InstrCache::LINE_BITS
+ 2);
const uint_t mask8k
= size8k
- 1;
for (; line
< SS_InstrCache::SIZE
; line
+= size8k
)
inst_cache_va_pri_user
->tag
[line
].tte_bits
|= RAS_TTE_POISON
;
inst_cache_va_nuc_user
->tag
[line
].tte_bits
|= RAS_TTE_POISON
;
inst_cache_va_pri_priv
->tag
[line
].tte_bits
|= RAS_TTE_POISON
;
inst_cache_va_nuc_nuc_nuc_priv
->tag
[line
].tte_bits
|= RAS_TTE_POISON
;
inst_cache_va_nuc_nuc_sec_priv
->tag
[line
].tte_bits
|= RAS_TTE_POISON
;
inst_cache_va_nuc_pri_sec_priv
->tag
[line
].tte_bits
|= RAS_TTE_POISON
;
inst_cache_ra_pri_user
->tag
[line
].tte_bits
|= RAS_TTE_POISON
;
inst_cache_ra_nuc_user
->tag
[line
].tte_bits
|= RAS_TTE_POISON
;
inst_cache_ra_pri_priv
->tag
[line
].tte_bits
|= RAS_TTE_POISON
;
inst_cache_ra_nuc_priv
->tag
[line
].tte_bits
|= RAS_TTE_POISON
;
for (; line
< SS_InstrCache::SIZE
; line
+= size8k
)
inst_cache_va_pri_user
->tag
[line
].tte
= &::junk_tte
;
inst_cache_va_nuc_user
->tag
[line
].tte
= &::junk_tte
;
inst_cache_va_pri_priv
->tag
[line
].tte
= &::junk_tte
;
inst_cache_va_nuc_nuc_nuc_priv
->tag
[line
].tte
= &::junk_tte
;
inst_cache_va_nuc_nuc_sec_priv
->tag
[line
].tte
= &::junk_tte
;
inst_cache_va_nuc_pri_sec_priv
->tag
[line
].tte
= &::junk_tte
;
inst_cache_ra_pri_user
->tag
[line
].tte
= &::junk_tte
;
inst_cache_ra_nuc_user
->tag
[line
].tte
= &::junk_tte
;
inst_cache_ra_pri_priv
->tag
[line
].tte
= &::junk_tte
;
inst_cache_ra_nuc_priv
->tag
[line
].tte
= &::junk_tte
;
void SS_Strand::do_retry()/*{{{*/
pstate
= tstate
.pstate();
hpstate
= htstate
.hpstate();
if (cwp() != tstate
.cwp())
if (!hpstate
.hpriv() && (gl() > max_pgl()))
bool SS_Strand::peek_signal()/*{{{*/
SS_Trap::Type trap_type
= msg
.get_handle_trap();
if (trap_type
!= SS_Trap::RESERVED
)
assert(!halted
); // An interrupt should wake us up.
(trap
)(pc(),npc(),this,0,trap_type
);
while (msg
.test_signal())
SS_Signal
* sgn
= msg
.get_signal();
case SS_Signal::BREAKPOINT
:
break_hit
= sgn
->breakpoint
;
case SS_Signal::FLUSH_TTE
:
flush_tte(sgn
->tlb
,sgn
->tte
);
case SS_Signal::FLUSH_VA
:
// Called on say inst va watchpoint enable, need to make sure
// we will hit the watchpoint, so flush the decode cache and
// ensure we go through the mmu lookup again to check watchpoint.
case SS_Signal::FLUSH_8B
:
// FLUSH_PA flushes the decode cache lines that could map the pa
case SS_Signal::FLUSH_8K
:
case SS_Signal::FLUSH_ALL
:
// FLUSH_8K flushes the whole decode cache
case SS_Signal::SET_INST_BRKPT
:
// Processors that have no hpstate.ibe bit switch to instruction
// breakpointing this way. Every time it gets enabled we flush the
// decode cache, to make sure we redecode all teh instructions.
sim_state
.ibe_sig(sgn
->ib_enable
);
// In cosim we always have all strands registered with the TLB as
// we can have tlb syncing (copy TLB) going on which complicates
// management of the active strands that use the TLB. It's an
// optiomisation, in cosim we don;t care about performance.
// When not in cosim we optimise the number of strands that receive
// FLUSH_TTE messages to the strands that are running. All strands
// that are not running don;t cache code so don't have to flush ...
inst_tlb
->add_strand(this);
if (inst_tlb
!= data_tlb
)
data_tlb
->add_strand(this);
inst_tlb
->rem_strand(this);
if (inst_tlb
!= data_tlb
)
data_tlb
->rem_strand(this);
// When in halted mode we transition to running or parked and hence
// get out of halted mode.
case SS_Signal::EXTERNAL_INTERRUPT
:
unhalt(); // Force wakeup on external interrupt receive
(internal_interrupt
)(this,sgn
->irq_type
,sgn
->irq_raise
);
fprintf(stderr
,"SS_Strand: Free signal got onto the wrong list\n");
// Now enable the next irq if any. The EXTERNAL_INTERRUPT above
// needs to be prevented from raising an interrupt when one is
if (trap_type
!= SS_Trap::RESERVED
)
sim_state
.irq_pending(0);
void SS_Strand::irq_launch( SS_Trap::Type trap_type
, bool do_time_out
)/*{{{*/
// In cosim mode we can not throw a disrupting trap directly as it
// will cause a PC mismatch for sure. RTL usually tells us when it
// is time to take a trap. So it's here that we store the disrupting
// traps into an interrupt sync buffer and it there that we wait for
// RTL to tell us when it's time to actually launch it.
sim_state
.irq_pending(false);
(irq_store
)(irq_sync
,trap_type
,do_time_out
);
msg
.set_handle_trap(trap_type
);
SS_Vaddr
SS_Strand::ss_trap( SS_Vaddr pc
, SS_Vaddr npc
, SS_Strand
* s
, SS_Instr
* i
, SS_Trap::Type tt
)/*{{{*/
assert(tt
< SS_Trap::MAX_TT
);
// In case the mmu is near an inst va breakpoint and the mmu traps then we
// end up taking this route back to step. So we need to reset the saved
// decoder back to the instruction decoder.
s
->inst_dec
= s
->save_dec
;
// RESET_GEN_WMR & RESET_GEN_DBR are not real trap, they are similar to
// POR, used for warm_reset or dbx_reset, respectively
if ((tt
!= SS_Trap::RESET_GEN_WMR
) && (tt
!= SS_Trap::RESET_GEN_DBR
))
if ((SS_Trap::MAX_TT
> tt
) && (tt
>= SS_Trap::TCC_INSTRUCTION_HPRV
))
// The code generated for the tcc instruction does not make a special case for
// user mode; when only 7 bits of the software trap number are valid iso 8 bits.
// We'll take care of that corner case here.
if (s
->sim_state
.priv() == SS_Strand::SS_USER
)
tt
= SS_Trap::Type(int(tt
) - 0x80);
else if (tt
>= SS_Trap::TCC_INSTRUCTION
)
// Legion hacks the solaris binary and inserts tcc with tt=0x175 that get called
// in hprv mode. They use that to shortcut some expensive bcopy I believe to speed
// up solaris boot on Legion. Since we don't seem to be able to build our own binary for
// solaris we get them from the Legion fooks and hence have to support this hack (bleh!)
if ((s
->sim_state
.priv() == SS_Strand::SS_HPRV
) && !s
->sim_state
.cosim() && (int(tt
) == 0x175))
// Check the breakpoints and if one or more triggered we leave ss_trap()
// before handling the trap. ToDo ... do we want this or do we want
// watchpoint or do we want both.
for (SS_BreakPoint
* bp
= s
->break_trap
[tt
]; bp
; bp
= bp
->link
)
if (bp
->enabled
&& bp
->trigger(s
))
// For breakpoints on disrupting traps we have to reinject the trap
// into the system else it will get dropped.
if (SS_Trap::table
[tt
].disrupting
)
s
->msg
.set_handle_trap(tt
);
// Check the trace callback to see if we are tracing traps
s
->trc_hook
->trap(SS_Trap::Type(tt
));
// Handle the trap and set the state accordingly
if (tt
== SS_Trap::POWER_ON_RESET
)
s
->trap_state
.memset(0); // Clear the whole trap stack
s
->stick_cmpr
.int_dis(1);
s
->hstick_cmpr
.int_dis(1);
s
->pstate
.tle(0).mm(0).pef(1).am(0).priv(1).ie(0).cle(0).tct(0); // priv(1) comes from N2
s
->hpstate
.red(1).hpriv(1).ibe(0).tlz(0);
pc
= s
->rstv_addr() + (SS_Vaddr(tt
) << 5);
s
->cansave
= s
->max_wp() - 1;
s
->cleanwin
= s
->max_wp();
else if ((tt
== SS_Trap::RESET_GEN_WMR
) || (tt
== SS_Trap::RESET_GEN_DBR
))
// wmr and dbr are triggered by reset_gen (0x89_0000_0808)
// RESET_GEN_WMR and RESET_GEN_DBR are not real trap type, they are used
// to distinguish them from POWER_ON_RESET
tt
= SS_Trap::POWER_ON_RESET
;
// The PRM (Chapter 13, June 2006) is in error with respect to TPC and TNPC
// on warm reset. The PC and NPC registers are not protected throughout
// warm reset (specifically during the scan flush) so they go to 0 before
// the trap is taken, and therefore TPC and TNPC at MAXTL get set to 0.
s
->tstate
.gl(s
->gl()).ccr(s
->ccr()).asi(s
->asi()).pstate(0).cwp(s
->cwp());
s
->stick_cmpr
.int_dis(1);
s
->hstick_cmpr
.int_dis(1);
s
->pstate
.tle(0).mm(0).pef(1).am(0).priv(1).ie(0).cle(0).tct(0); // priv(1) comes from N2
s
->hpstate
.red(1).hpriv(1).ibe(0).tlz(0);
pc
= s
->rstv_addr() + (SS_Vaddr(tt
) << 5);
// For processors that don't implement the full 64bits of address space we
// don't expect to keep the full 64bit of tpc and tnpc: sign extend
// the tpn and tnpc on read and write to hide the upper bits. We keep the
// upper bits so that we can properly detect when we fall into a va-hole,
// regardless of whether the pstate.am bit was on for a while or not.
s
->tpc
= pc
& (s
->mask_pstate_am
| (s
->mask_pstate_am
<< s
->va_bits()));
s
->tnpc
= npc
& (s
->mask_pstate_am
| (s
->mask_pstate_am
<< s
->va_bits()));
s
->tstate
.pstate(s
->pstate()).cwp(s
->cwp()).asi(s
->asi()).ccr(s
->ccr()).gl(s
->gl());
s
->htstate
.hpstate(s
->hpstate());
case SS_Trap::WATCHDOG_RESET
:
case SS_Trap::EXTERNALLY_INITIATED_RESET
:
// XIR on N2 does not go through error state
// when tl maxes out. On those product the xir_error_state() flag
// is false. Products like do circulate through the error state.
// So for those the bit is set.
if (s
->sim_state
.xir_error_state() && (s
->tl() == s
->max_tl()))
tt
= SS_Trap::WATCHDOG_RESET
;
case SS_Trap::SOFTWARE_INITIATED_RESET
:
if (s
->tl() == s
->max_tl())
tt
= SS_Trap::WATCHDOG_RESET
;
if ((tt
== SS_Trap::HSTICK_MATCH
) && (s
->sim_state
.hintp_hsp_clear()))
if (s
->tl() == s
->max_tl())
tt
= SS_Trap::WATCHDOG_RESET
;
else if (s
->hpstate
.red() || (s
->tl() == (s
->max_tl() - 1)))
tt
= SS_Trap::RED_STATE_EXCEPTION
;
s
->tl
= (s
->tl() == s
->max_tl()) ? s
->max_tl() : (s
->tl() + 1);
s
->gl
= (s
->gl() == s
->max_gl()) ? s
->max_gl() : (s
->gl() + 1);
if (tt
<= SS_Trap::RED_STATE_EXCEPTION
)
s
->pstate
.mm(0).pef(1).priv(1).am(0).ie(0).tct(0).cle(0).tle(0); // priv(1).tle(0) comes from N2
s
->hpstate
.red(1).hpriv(1).ibe(0).tlz(0);
s
->lsu_ctr
= 0; // from N2 this probably has to move out ... does not have lsu_ctr.
s
->sim_state
.inst_mmu(0);
s
->sim_state
.data_mmu(0);
pc
= s
->rstv_addr() + (SS_Vaddr(tt
) << 5);
else if (!SS_Trap::is_trap_to_priv(tt
) || s
->hpstate
.hpriv())
s
->pstate
.priv(0).cle(0).pef(1).am(0).ie(0).tct(0);
s
->hpstate
.red(0).hpriv(1).ibe(0);
pc
= s
->htba() + (SS_Vaddr(tt
) << 5);
else if (s
->tl() > s
->max_ptl())
s
->pstate
.priv(0).cle(0).pef(1).am(0).ie(0).tct(0);
s
->hpstate
.red(0).hpriv(1).ibe(0);
pc
= s
->htba() + (SS_Vaddr(SS_Trap::WATCHDOG_RESET
) << 5);
s
->gl
= (s
->gl() > s
->max_pgl()) ? s
->max_pgl() : s
->gl();
s
->pstate
.priv(1).cle(s
->pstate
.tle()).pef(1).am(0).ie(0).tct(0);
pc
= s
->tba() + ((s
->tl() > 1) ? (SS_Vaddr(1) << 14) : 0) + (SS_Vaddr(tt
) << 5);
if (s
->tt() == SS_Trap::CLEAN_WINDOW
)
s
->cwp
= (s
->cwp() == s
->max_wp()) ? 0 : (s
->cwp() + 1);
else if (SS_Trap::is_spill(SS_Trap::Type(s
->tt())))
s
->cwp
= (s
->cwp() + s
->cansave() + 2) % (s
->max_wp() + 1);
else if (SS_Trap::is_fill(SS_Trap::Type(s
->tt())))
s
->cwp
= s
->cwp() ? (s
->cwp() - 1) : s
->max_wp();
// In trace mode we use the trap_taken flag and only in that
// mode does it make sense as there is a corresponding trap_taken(0).
s
->sim_state
.trap_taken(1);
bool SS_Strand::trap_launch_ok( SS_Trap::Type _tt
)/*{{{*/
// For the outside world (cosim) this function returns true
// when a particular trap can be thrown through (s->trap)(...).
case SS_Trap::POWER_ON_RESET
:
case SS_Trap::EXTERNALLY_INITIATED_RESET
:
// for cosim "INTP 00 00"
case SS_Trap::RESET_GEN_WMR
:
case SS_Trap::CTRL_WORD_QUEUE_INT
:
case SS_Trap::MODULAR_ARITH_INT
:
case SS_Trap::SW_RECOVERABLE_ERROR
:
case SS_Trap::HSTICK_MATCH
:
case SS_Trap::INTERRUPT_VECTOR
:
case SS_Trap::HW_CORRECTED_ERROR
:
return !hpstate
.hpriv() || pstate
.ie();
case SS_Trap::CPU_MONDO_TRAP
:
case SS_Trap::DEV_MONDO_TRAP
:
case SS_Trap::RESUMABLE_ERROR
:
return pstate
.ie() && !hpstate
.hpriv();
case SS_Trap::INTERRUPT_LEVEL_1
:
case SS_Trap::INTERRUPT_LEVEL_2
:
case SS_Trap::INTERRUPT_LEVEL_3
:
case SS_Trap::INTERRUPT_LEVEL_4
:
case SS_Trap::INTERRUPT_LEVEL_5
:
case SS_Trap::INTERRUPT_LEVEL_6
:
case SS_Trap::INTERRUPT_LEVEL_7
:
case SS_Trap::INTERRUPT_LEVEL_8
:
case SS_Trap::INTERRUPT_LEVEL_9
:
case SS_Trap::INTERRUPT_LEVEL_10
:
case SS_Trap::INTERRUPT_LEVEL_11
:
case SS_Trap::INTERRUPT_LEVEL_12
:
case SS_Trap::INTERRUPT_LEVEL_13
:
case SS_Trap::INTERRUPT_LEVEL_14
:
case SS_Trap::INTERRUPT_LEVEL_15
:
return !hpstate
.hpriv() && (pil() <= (_tt
- SS_Trap::INTERRUPT_LEVEL_1
));
// I don;t expect any other traps but lets assume we can take then and complain silently
fprintf(stderr
,"SS_Strand::trap_launch_ok called with tt=%x, update switch\n",_tt
);
SS_AsiSpace::Error
SS_Strand::scratchpad_ld64( SS_Node
*, void* _reg
, SS_Strand
*, SS_Vaddr va
, uint64_t* data
)/*{{{*/
uint64_t* reg
= (uint64_t*)_reg
;
*data
= reg
[(va
>> 3) & 7];
SS_AsiSpace::Error
SS_Strand::scratchpad_st64( SS_Node
*, void* _reg
, SS_Strand
*, SS_Vaddr va
, uint64_t data
)/*{{{*/
uint64_t* reg
= (uint64_t*)_reg
;
reg
[(va
>> 3) & 7] = data
;
SS_AsiSpace::Error
SS_Strand::lsu_ctr_st64( SS_Node
*, void* _reg
, SS_Strand
* s
, SS_Vaddr
, uint64_t data
)/*{{{*/
SS_LsuCtr
* lc
= (SS_LsuCtr
*)_reg
;
inline uint64_t run_loop( SS_Strand
* s
, SS_Instr
* opc
, SS_InstrCache::Tag
* tag
, uint64_t n
)/*{{{*/
SS_InstrCache::Tag
* info_ptr
;
// Preload s->inst_mmu so that compiler does not do load followed by call
// Keep s->signal in a local var so that compiler loads well before use
// Keep the instruction to execute in local variable to break load followed by call
SS_InstMmu inst_mmu
= s
->inst_mmu
;
uint64_t sgn
= s
->msg
.is_pending();
// Start the most critical loop of the simulator. Every line of code
// here is overhead ... really. So don't add more code. Loop while we
// still have n instructions to execute and we don;t have to deal with
// an exceptional case (sgn == 0), e.g. have not received a signal.
assert(sizeof(SS_InstrCache::Tag
) == 32); // line_idx << 4 ... below
line_idx
= ((pc
>> (SS_InstrCache::LINE_BITS
+ 2)) & SS_InstrCache::MASK
);
line_ptr
= (SS_Instr
*)((char*)opc
+ (line_idx
<< (SS_Instr::BITS
+ SS_InstrCache::LINE_BITS
)));
info_ptr
= (SS_InstrCache::Tag
*)((char*)tag
+ (line_idx
<< 5)); // see assert above
line_pc
= pc
&~ (SS_InstrCache::LINE_MASK
<< 2);
inst_idx
= (pc
& (SS_InstrCache::LINE_MASK
<< 2)) << (SS_Instr::BITS
- 2 - SS_Instr::SKEW
);
inst_ptr
= (SS_Instr
*)((char*)line_ptr
+ inst_idx
);
inst_exe
= inst_ptr
->exe
;
// Look ahead and prefetch the L2 ... this makes as perform better
ss_prefetch((char*)inst_ptr
+ 1024);
// Check line tte and tag: the instruction cache is smaller then the
// largest page size. If either of them mismatches then call the mmu
// to do an tlb lookup. The current instuction tte is set to the fail_tte
// in case a trap occured in the mmu; bail out as we most certainly
// will have to handle a reenterloop signal due to trap handling.
if ((info_ptr
->tte
!= s
->inst_tte
) || (info_ptr
->tag
!= line_pc
))
pc
= (inst_mmu
)(pc
,s
->npc(),s
,line_ptr
,info_ptr
);
// In case we hit a breakpoint in inst_mmu then we will not execute the
// while loop, but we will account n wrongly by 1 after that, fix it.
// Note sgn != 0 when a breakpoint is hit. Also when inst_tte == fail_tte
// then we will exit here below, so correct by 1 there too.
if (s
->inst_tte
== s
->fail_tte
)
return s
->break_hit
? n
: (n
- 1);
inst_exe
= inst_ptr
->exe
;
sgn
= s
->msg
.is_pending();
// Now execute the instructions in the cache line, until the loop count
// (n) becomes zero, or the pc falls of the line, or we have to
// deal with a signal due to priviledge mode switches, breakpoints, etc.
while (n
&& ((pc
&~ (SS_InstrCache::LINE_MASK
<< 2)) == line_pc
) && (sgn
== 0))
pc
= (inst_exe
)(pc
,s
->npc(),s
,inst_ptr
);
inst_idx
= (pc
& (SS_InstrCache::LINE_MASK
<< 2)) << (SS_Instr::BITS
- 2 - SS_Instr::SKEW
);
inst_ptr
= (SS_Instr
*)((char*)line_ptr
+ inst_idx
);
inst_exe
= inst_ptr
->exe
;
sgn
= s
->msg
.is_pending();
// Breakpoints can be hit during inst_exe. This means that we
// have to break out and not execute the instruction. The code
// above does one n-- too many in case of a breakpoint hit, fix that.
inline uint64_t trc_loop( SS_Strand
* s
, SS_Instr
* opc
, SS_InstrCache::Tag
* tag
, uint64_t n
)/*{{{*/
assert(n
== 1); // Assert the obvious, execute one instruction, output trace for it if we executed, etc.
// @@ha144505, for now this code is a copy of the run_loop ... bad style
// but I have to fix a tracing bug first. We'll clean this up later.
SS_InstrCache::Tag
* info_ptr
;
// Preload s->inst_mmu so that compiler does not do load followed by call
// Keep s->signal in a local var so that compiler loads well before use
// Keep the instruction to execute in local variable to break load followed by call
SS_InstMmu inst_mmu
= s
->inst_mmu
;
bool sgn
= s
->msg
.is_pending();
// Start the most critical loop of the simulator. Every line of code
// here is overhead ... really. So don't add more code. Loop while we
// still have n instructions to execute and we don;t have to deal with
// an exceptional case (sgn == 0), e.g. have not received a signal.
assert(sizeof(SS_InstrCache::Tag
) == 32); // line_idx << 4 ... below
line_idx
= ((pc
>> (SS_InstrCache::LINE_BITS
+ 2)) & SS_InstrCache::MASK
);
line_ptr
= (SS_Instr
*)((char*)opc
+ (line_idx
<< (SS_Instr::BITS
+ SS_InstrCache::LINE_BITS
)));
info_ptr
= (SS_InstrCache::Tag
*)((char*)tag
+ (line_idx
<< 5)); // see assert above
line_pc
= pc
&~ (SS_InstrCache::LINE_MASK
<< 2);
inst_idx
= (pc
& (SS_InstrCache::LINE_MASK
<< 2)) << (SS_Instr::BITS
- 2 - SS_Instr::SKEW
);
inst_ptr
= (SS_Instr
*)((char*)line_ptr
+ inst_idx
);
inst_exe
= inst_ptr
->exe
;
// Look ahead and prefetch the L2 ... this makes as perform better
ss_prefetch((char*)inst_ptr
+ 1024);
// Check line tte and tag: the instruction cache is smaller then the
// largest page size. If either of them mismatches then call the mmu
// to do an tlb lookup. The current instuction tte is set to the fail_tte
// in case a trap occured in the mmu; bail out as we most certainly
// will have to handle a reenterloop signal due to trap handling.
// Decode cache and RAS I$ cache flushing are now decoupled somewhat to
// reduce the cost of instruction decoding.
// The I$ is now a subset of the decode cache. If a line is cast out of
// the I$, the corresponding decode tte entry is marked RAS_TTE_POISON
// (which just sets the least significant bit), while preserving the tte
// and its decode cache entry.
// The predicate is that:
// 1) Every decode cache entry without RAS_TTE_POISON is in I$ cache.
// 2) Every decode cache entry *with* RAS_TTE_POISON corresponds to an
// I$ line that has a RAS error or has been flushed from the I$.
if (info_ptr
->tte_bits
& SS_Strand::RAS_TTE_POISON
)
assert(s
->sim_state
.ras_enabled());
info_ptr
->tte_bits
&= ~SS_Strand::RAS_TTE_POISON
;
if ((info_ptr
->tte
== s
->inst_tte
) && (info_ptr
->tag
== line_pc
))
SS_Paddr pa
= s
->inst_tte
->trans(line_pc
);
SS_Trap::Type trap_type
=
s
->mem_err_detector
.detect_fetch_err(SS_MemErrDetector::L1_CACHE_AND_STB
,
if (trap_type
!= SS_Trap::NO_TRAP
)
pc
= (s
->trap
)(pc
,s
->npc(),s
,line_ptr
,trap_type
);
s
->inst_tte
= s
->fail_tte
;
info_ptr
->tte_bits
|= SS_Strand::RAS_TTE_POISON
;
if ((info_ptr
->tte
!= s
->inst_tte
) || (info_ptr
->tag
!= line_pc
))
pc
= (inst_mmu
)(pc
,s
->npc(),s
,line_ptr
,info_ptr
);
if (s
->inst_tte
== s
->fail_tte
)
// ToDo: The if cosim below is to make regression pass. The tte used
// for tracing is whatever was used last for tracing. This is not correct
// but it makes the regession pas to get this bug fix out.
// We should rerun all the cosims with this if taken out and the
// tte set to zero. The trace should report a non number for the pa and
// opcode field, say dashes.
if (!s
->sim_state
.cosim())
s
->trc_inst_tte
= s
->inst_tte
;
// return 0 means trace instruction, hence lock the tte for reuse
// as we need it. After outputting the trace the tte is given up.
s
->inst_tlb
->lock_reuse_tte(s
->trc_inst_tte
);
inst_exe
= inst_ptr
->exe
;
sgn
= s
->msg
.is_pending();
// In tracing mode we need to hold on to the TTE until we have traced
// the instruction ... we do this because demap could invalidate the TTE
// in the decode cache of the very instruction we are tracing.
s
->trc_inst_tte
= s
->inst_tte
;
s
->inst_tlb
->lock_reuse_tte(s
->trc_inst_tte
);
// Now execute the instructions in the cache line, until the loop count
// (n) becomes zero, or the pc falls of the line, or we have to
// deal with a signal due to priviledge mode switches, breakpoints, etc.
pc
= (inst_exe
)(pc
,s
->npc(),s
,inst_ptr
);
inst_idx
= (pc
& (SS_InstrCache::LINE_MASK
<< 2)) << (SS_Instr::BITS
- 2 - SS_Instr::SKEW
);
inst_ptr
= (SS_Instr
*)((char*)line_ptr
+ inst_idx
);
inst_exe
= inst_ptr
->exe
;
sgn
= s
->msg
.is_pending();
// Breakpoints can be hit during inst_exe. This means that we
// have to break out and not execute the instruction. The code
// above does one n-- too many in case of a breakpoint hit, fix that.
void SS_Strand::run_tick( uint64_t incr
)/*{{{*/
// run_tick() updates the (s)tick value and compares it
// against the tick_cmpr, stick_cmp and hstick_cmpr
// and raises traps if tick matches occur.
// In cosim mode we don't change the softint stick and tick
// match bits (sm and tm) ourselves: rtl doesn't trust our
// our administration of time ... and rightfully so.
// In other simulation modes the tick and stick value can
// increase by more then 1 between steps, so compare for >=
// i.s.o. the exact match. Additionally to prevent multiple
// interrupts we only check the interrupt when the sm or
// tm bits are getting set (rising edge).
uint64_t t
= stick
.counter() + incr
;
if (sim_state
.ras_enabled())
if(mem_err_detector
.tick_err_detector
)
err_found
= mem_err_detector
.tick_err_detector(this);
// Check tick against hstick_cmpr, stick_cmpr and
// tck_cmpr in that order. The order is important for
// trap prioprity reasons.
// The comparison operation is suppressed, if an error is detected in any of
// tick compare registers
// just drop them, follow me ??
else if (hintp
.hsp() == 0)
unhalt(); // Force wakeup in case the irq is blocked
irq
.raise(this,SS_Interrupt::BIT_HSTICK_MATCH
);
else if (softint
.sm() == 0)
unhalt(); // Force wakeup in case the irq is blocked
irq
.update_softint(this);
else if (softint
.tm() == 0)
unhalt(); // Forece wakeup in case the irq is blocked
irq
.update_softint(this);
uint64_t SS_Strand::run_step( uint64_t count
)/*{{{*/
uint64_t save_count
= count
;
running
= sim_state
.running();
// If step is called and we're not in running mode, and we
// have some signal in the queue that requires us to leave
// step (breakpoint) then we exit .. and return the number of
// instructions executed .... which in case we got out of
// running mode is 0 and in case of a breakpoint is count.
if ((running
== 0) && peek_signal())
return running
? count
: 0;
inst_count
= inst_count() + save_count
- count
;
return running
? count
: 0;
count
= run_loop(this,inst_cache
->opc
,inst_cache
->tag
,count
);
inst_count
= inst_count() + save_count
- count
;
sim_state
.running(running
);
uint64_t SS_Strand::trc_step( uint64_t count
)/*{{{*/
uint64_t save_count
= count
;
running
= sim_state
.running();
if (running
== 0 && peek_signal())
return running
? count
: 0;
inst_count
= inst_count() + save_count
- count
;
return running
? count
: 0;
SS_InstrCache
* trc_cache
= inst_cache
;
// Now step one instruction in trace mode ...
if (!trc_loop(this,inst_cache
->opc
,inst_cache
->tag
,1))
uint_t sft
= 64 - va_bits();
trc_hook
->exe_instr((trc_pc
<< sft
) >> sft
,trc_inst_tte
,trc_cache
->pc_inst(trc_pc
));
trc_hook
->cmp_state(SS_Registers::ASR_PC
,pc());
trc_hook
->cmp_state(SS_Registers::SIM_NPC
,npc());
for (i
=SS_Registers::IRF_OFS
; i
< SS_Registers::IRF_END
; i
= i
+1)
trc_hook
->cmp_state(i
,irf
[i
- SS_Registers::IRF_OFS
]);
for (i
=SS_Registers::DRF_OFS
; i
< SS_Registers::DRF_END
; i
= i
+1)
trc_hook
->cmp_state(i
,drf
[i
- SS_Registers::DRF_OFS
]);
trc_hook
->cmp_state(SS_Registers::ASR_Y
,y());
trc_hook
->cmp_state(SS_Registers::ASR_CCR
,ccr());
trc_hook
->cmp_state(SS_Registers::ASR_ASI
,asi());
trc_hook
->cmp_state(SS_Registers::ASR_FPRS
,fprs());
trc_hook
->cmp_state(SS_Registers::ASR_GSR
,gsr());
trc_hook
->cmp_state(SS_Registers::SIM_FSR
,fsr());
trc_hook
->cmp_state(SS_Registers::PR_PSTATE
,pstate());
trc_hook
->cmp_state(SS_Registers::HPR_HPSTATE
,hpstate());
trc_hook
->cmp_state(SS_Registers::PR_GL
,gl());
trc_hook
->cmp_state(SS_Registers::PR_TBA
,tba());
trc_hook
->cmp_state(SS_Registers::HPR_HTBA
,htba());
trc_hook
->cmp_state(SS_Registers::PR_TL
,tl());
trc_hook
->cmp_state(SS_Registers::PR_TT
,tt());
trc_hook
->cmp_state(SS_Registers::PR_TPC
,SS_Vaddr(tpc() << (64 - va_bits())) >> (64 - va_bits()));
trc_hook
->cmp_state(SS_Registers::PR_TNPC
,SS_Vaddr(tnpc() << (64 - va_bits())) >> (64 - va_bits()));
trc_hook
->cmp_state(SS_Registers::PR_TSTATE
,tstate());
trc_hook
->cmp_state(SS_Registers::HPR_HTSTATE
,htstate());
trc_hook
->cmp_state(SS_Registers::PR_CWP
,cwp());
trc_hook
->cmp_state(SS_Registers::PR_CANSAVE
,cansave());
trc_hook
->cmp_state(SS_Registers::PR_CANRESTORE
,canrestore());
trc_hook
->cmp_state(SS_Registers::PR_CLEANWIN
,cleanwin());
trc_hook
->cmp_state(SS_Registers::PR_OTHERWIN
,otherwin());
trc_hook
->cmp_state(SS_Registers::ASR_TICK_CMPR
,tick_cmpr());
trc_hook
->cmp_state(SS_Registers::ASR_STICK_CMPR
,stick_cmpr());
trc_hook
->cmp_state(SS_Registers::HPR_HSTICK_CMPR
,hstick_cmpr());
trc_hook
->cmp_state(SS_Registers::HPR_HINTP
,hintp());
trc_hook
->cmp_state(SS_Registers::PR_PIL
,pil());
trc_hook
->cmp_state(SS_Registers::ASR_SOFTINT
,softint());
// Now release the trc_tte as we are not using it any longer.
inst_tlb
->reuse_tte(trc_inst_tte
);
if(mem_err_detector
.step_hook
)
//traps taken here have priorities that don't follow the trap priority hierarchy
SS_Trap::Type trap_type
= mem_err_detector
.step_hook(this);
if(trap_type
!= SS_Trap::NO_TRAP
)
(trap
)(pc(),npc(),this,0,trap_type
);
inst_count
= inst_count() + save_count
- count
;
sim_state
.running(running
);
void SS_Strand::add_tracer( SS_Tracer
* trc
)/*{{{*/
if (trc_hook
->need_mem_trc() && !sim_state
.ras_enabled())
mem_table
= mem_trc_table_ref
;
void SS_Strand::del_tracer( SS_Tracer
* trc
)/*{{{*/
trc_hook
= trc_hook
->next
;
SS_Tracer
* head
= trc_hook
;
for (SS_Tracer
* next
= trc_hook
->next
; trc
&& next
; head
= next
, next
= next
->next
)
if (((trc_hook
== 0) || !trc_hook
->need_mem_trc()) && !sim_state
.ras_enabled())
mem_table
= mem_run_table_ref
;
fprintf(stderr
,"ERROR: SS_Tracer: There is no tracer connected\n");
static inline SS_Instr
* line_index( SS_Instr
* line
, uint_t n
)/*{{{*/
return (SS_Instr
*)((char*)line
+ (n
<< (SS_Instr::BITS
- SS_Instr::SKEW
)));
SS_Vaddr
mem_run_fetch512( SS_Vaddr pc
, SS_Vaddr npc
, SS_Strand
* s
, SS_Instr
* line
, SS_Vaddr va
, SS_Tte
* tte
)/*{{{*/
SS_Paddr pa
= tte
->trans(va
);
#if defined(MEMORY_MSYNC)
((SS_MsyncMemory
*)(s
->memory
))->msync_info(s
->strand_id(),va
);
#elif defined(MEMORY_EXTERNAL)
((SS_ExternalMemory
*)(s
->memory
))->set_strand_id(s
->strand_id());
((SS_Memory
*)(s
->memory
))->SS_Memory::fetch512(pa
,s
->mem_data
);
uint64_t d0
= s
->mem_data
[0];
uint64_t d1
= s
->mem_data
[1];
uint64_t d2
= s
->mem_data
[2];
uint64_t d3
= s
->mem_data
[3];
uint64_t d4
= s
->mem_data
[4];
uint64_t d5
= s
->mem_data
[5];
uint64_t d6
= s
->mem_data
[6];
uint64_t d7
= s
->mem_data
[7];
line
->line_index(0)->opc
= d0
>> 32;
line
->line_index(1)->opc
= d0
;
line
->line_index(2)->opc
= d1
>> 32;
line
->line_index(3)->opc
= d1
;
line
->line_index(4)->opc
= d2
>> 32;
line
->line_index(5)->opc
= d2
;
line
->line_index(6)->opc
= d3
>> 32;
line
->line_index(7)->opc
= d3
;
line
->line_index(8)->opc
= d4
>> 32;
line
->line_index(9)->opc
= d4
;
line
->line_index(10)->opc
= d5
>> 32;
line
->line_index(11)->opc
= d5
;
line
->line_index(12)->opc
= d6
>> 32;
line
->line_index(13)->opc
= d6
;
line
->line_index(14)->opc
= d7
>> 32;
line
->line_index(15)->opc
= d7
;
SS_Execute dec
= s
->inst_dec
;
line
->line_index(0)->exe
= dec
;
line
->line_index(1)->exe
= dec
;
line
->line_index(2)->exe
= dec
;
line
->line_index(3)->exe
= dec
;
line
->line_index(4)->exe
= dec
;
line
->line_index(5)->exe
= dec
;
line
->line_index(6)->exe
= dec
;
line
->line_index(7)->exe
= dec
;
line
->line_index(8)->exe
= dec
;
line
->line_index(9)->exe
= dec
;
line
->line_index(10)->exe
= dec
;
line
->line_index(11)->exe
= dec
;
line
->line_index(12)->exe
= dec
;
line
->line_index(13)->exe
= dec
;
line
->line_index(14)->exe
= dec
;
line
->line_index(15)->exe
= dec
;
// Reset the decoder back to the normal decoder. In some cases
// (inst va breakpoint) the inst_mmu can set the decoder to a
// special decoder that is for just one cache line
s
->inst_dec
= s
->save_dec
;
SS_Vaddr
io_run_fetch512( SS_Vaddr pc
, SS_Vaddr npc
, SS_Strand
* s
, SS_Instr
* line
, SS_Vaddr va
, SS_Tte
* tte
)/*{{{*/
SS_Paddr pa
= tte
->trans(va
);
s
->io
->fetch512(s
->strand_id(),pa
,s
->mem_data
);
uint64_t d0
= s
->mem_data
[0];
uint64_t d1
= s
->mem_data
[1];
uint64_t d2
= s
->mem_data
[2];
uint64_t d3
= s
->mem_data
[3];
uint64_t d4
= s
->mem_data
[4];
uint64_t d5
= s
->mem_data
[5];
uint64_t d6
= s
->mem_data
[6];
uint64_t d7
= s
->mem_data
[7];
line_index(line
,0)->opc
= d0
>> 32;
line_index(line
,1)->opc
= d0
;
line_index(line
,2)->opc
= d1
>> 32;
line_index(line
,3)->opc
= d1
;
line_index(line
,4)->opc
= d2
>> 32;
line_index(line
,5)->opc
= d2
;
line_index(line
,6)->opc
= d3
>> 32;
line_index(line
,7)->opc
= d3
;
line_index(line
,8)->opc
= d4
>> 32;
line_index(line
,9)->opc
= d4
;
line_index(line
,10)->opc
= d5
>> 32;
line_index(line
,11)->opc
= d5
;
line_index(line
,12)->opc
= d6
>> 32;
line_index(line
,13)->opc
= d6
;
line_index(line
,14)->opc
= d7
>> 32;
line_index(line
,15)->opc
= d7
;
SS_Execute dec
= s
->inst_dec
;
line_index(line
,0)->exe
= dec
;
line_index(line
,1)->exe
= dec
;
line_index(line
,2)->exe
= dec
;
line_index(line
,3)->exe
= dec
;
line_index(line
,4)->exe
= dec
;
line_index(line
,5)->exe
= dec
;
line_index(line
,6)->exe
= dec
;
line_index(line
,7)->exe
= dec
;
line_index(line
,8)->exe
= dec
;
line_index(line
,9)->exe
= dec
;
line_index(line
,10)->exe
= dec
;
line_index(line
,11)->exe
= dec
;
line_index(line
,12)->exe
= dec
;
line_index(line
,13)->exe
= dec
;
line_index(line
,14)->exe
= dec
;
line_index(line
,15)->exe
= dec
;
// Reset the decoder back to the normal decoder. In some cases
// (inst va breakpoint) the inst_mmu can set the decoder to a
// special decoder that is for just one cache line
s
->inst_dec
= s
->save_dec
;
SS_Vaddr
mem_trc_fetch512( SS_Vaddr pc
, SS_Vaddr npc
, SS_Strand
* s
, SS_Instr
* line
, SS_Vaddr va
, SS_Tte
* tte
)/*{{{*/
SS_Paddr pa
= tte
->trans(va
);
if (s
->sim_state
.ras_enabled())
SS_Trap::Type trap_type
=
s
->mem_err_detector
.detect_fetch_err(SS_MemErrDetector::L1_CACHE_AND_STB
,
if (trap_type
!= SS_Trap::NO_TRAP
)
return (s
->trap
)(pc
,npc
,s
,line
,trap_type
);
#if defined(MEMORY_MSYNC)
((SS_MsyncMemory
*)(s
->memory
))->msync_info(s
->strand_id(),va
);
#elif defined(MEMORY_EXTERNAL)
((SS_ExternalMemory
*)(s
->memory
))->set_strand_id(s
->strand_id());
s
->memory
->fetch512(pa
,s
->mem_data
);
uint64_t d0
= s
->mem_data
[0];
uint64_t d1
= s
->mem_data
[1];
uint64_t d2
= s
->mem_data
[2];
uint64_t d3
= s
->mem_data
[3];
uint64_t d4
= s
->mem_data
[4];
uint64_t d5
= s
->mem_data
[5];
uint64_t d6
= s
->mem_data
[6];
uint64_t d7
= s
->mem_data
[7];
s
->trc_hook
->mem_access(SS_Tracer::LD_CODE
,va
,tte
,64,s
->mem_data
);
line_index(line
,0)->opc
= d0
>> 32;
line_index(line
,1)->opc
= d0
;
line_index(line
,2)->opc
= d1
>> 32;
line_index(line
,3)->opc
= d1
;
line_index(line
,4)->opc
= d2
>> 32;
line_index(line
,5)->opc
= d2
;
line_index(line
,6)->opc
= d3
>> 32;
line_index(line
,7)->opc
= d3
;
line_index(line
,8)->opc
= d4
>> 32;
line_index(line
,9)->opc
= d4
;
line_index(line
,10)->opc
= d5
>> 32;
line_index(line
,11)->opc
= d5
;
line_index(line
,12)->opc
= d6
>> 32;
line_index(line
,13)->opc
= d6
;
line_index(line
,14)->opc
= d7
>> 32;
line_index(line
,15)->opc
= d7
;
SS_Execute dec
= s
->inst_dec
;
line_index(line
,0)->exe
= dec
;
line_index(line
,1)->exe
= dec
;
line_index(line
,2)->exe
= dec
;
line_index(line
,3)->exe
= dec
;
line_index(line
,4)->exe
= dec
;
line_index(line
,5)->exe
= dec
;
line_index(line
,6)->exe
= dec
;
line_index(line
,7)->exe
= dec
;
line_index(line
,8)->exe
= dec
;
line_index(line
,9)->exe
= dec
;
line_index(line
,10)->exe
= dec
;
line_index(line
,11)->exe
= dec
;
line_index(line
,12)->exe
= dec
;
line_index(line
,13)->exe
= dec
;
line_index(line
,14)->exe
= dec
;
line_index(line
,15)->exe
= dec
;
// Reset the decoder back to the normal decoder. In some cases
// (inst va breakpoint) the inst_mmu can set the decoder to a
// special decoder that is for just one cache line
s
->inst_dec
= s
->save_dec
;
SS_Vaddr
io_trc_fetch512( SS_Vaddr pc
, SS_Vaddr npc
, SS_Strand
* s
, SS_Instr
* line
, SS_Vaddr va
, SS_Tte
* tte
)/*{{{*/
SS_Paddr pa
= tte
->trans(va
);
s
->io
->fetch512(s
->strand_id(),pa
,s
->mem_data
);
s
->trc_hook
->mem_access(SS_Tracer::LD_CODE
,va
,tte
,64,s
->mem_data
);
uint64_t d0
= s
->mem_data
[0];
uint64_t d1
= s
->mem_data
[1];
uint64_t d2
= s
->mem_data
[2];
uint64_t d3
= s
->mem_data
[3];
uint64_t d4
= s
->mem_data
[4];
uint64_t d5
= s
->mem_data
[5];
uint64_t d6
= s
->mem_data
[6];
uint64_t d7
= s
->mem_data
[7];
line_index(line
,0)->opc
= d0
>> 32;
line_index(line
,1)->opc
= d0
;
line_index(line
,2)->opc
= d1
>> 32;
line_index(line
,3)->opc
= d1
;
line_index(line
,4)->opc
= d2
>> 32;
line_index(line
,5)->opc
= d2
;
line_index(line
,6)->opc
= d3
>> 32;
line_index(line
,7)->opc
= d3
;
line_index(line
,8)->opc
= d4
>> 32;
line_index(line
,9)->opc
= d4
;
line_index(line
,10)->opc
= d5
>> 32;
line_index(line
,11)->opc
= d5
;
line_index(line
,12)->opc
= d6
>> 32;
line_index(line
,13)->opc
= d6
;
line_index(line
,14)->opc
= d7
>> 32;
line_index(line
,15)->opc
= d7
;
SS_Execute dec
= s
->inst_dec
;
line_index(line
,0)->exe
= dec
;
line_index(line
,1)->exe
= dec
;
line_index(line
,2)->exe
= dec
;
line_index(line
,3)->exe
= dec
;
line_index(line
,4)->exe
= dec
;
line_index(line
,5)->exe
= dec
;
line_index(line
,6)->exe
= dec
;
line_index(line
,7)->exe
= dec
;
line_index(line
,8)->exe
= dec
;
line_index(line
,9)->exe
= dec
;
line_index(line
,10)->exe
= dec
;
line_index(line
,11)->exe
= dec
;
line_index(line
,12)->exe
= dec
;
line_index(line
,13)->exe
= dec
;
line_index(line
,14)->exe
= dec
;
line_index(line
,15)->exe
= dec
;
// Reset the decoder back to the normal decoder. In some cases
// (inst va breakpoint) the inst_mmu can set the decoder to a
// special decoder that is for just one cache line
s
->inst_dec
= s
->save_dec
;
SS_AsiSpace::Error
SS_Strand::asi_ext_ld64( SS_Node
* a
, void* b
, SS_Strand
* s
, SS_Vaddr va
, uint64_t* data
)/*{{{*/
return (*s
->asi_ext_ld64_fp
)(a
,b
,s
,va
,data
);
SS_AsiSpace::Error
SS_Strand::asi_ext_st64( SS_Node
* a
, void* b
, SS_Strand
* s
, SS_Vaddr va
, uint64_t data
)/*{{{*/
return (*s
->asi_ext_st64_fp
)(a
,b
,s
,va
,data
);
void SS_Strand::default_ras_enable(SS_Strand
* s
, char*)/*{{{*/
fprintf(stderr
,"RAS Un-implemented\n");
/*static*/ void SS_Strand::ss_run_perf( SS_Strand
* s
, Sam::Vcpu::perfcntr which
,
fprintf (stderr
, "Performance Instrumentation Counters "
"not yet implemented for this CPU\n");