sam-t2/sam/cpus/vonk/ss/lib/cpu/src/SS_Strand.cc

// ========== Copyright Header Begin ==========================================
//
// OpenSPARC T2 Processor File: SS_Strand.cc
// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
//
// The above named program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public
// License version 2 as published by the Free Software Foundation.
//
// The above named program is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public
// License along with this work; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
//
// ========== Copyright Header End ============================================

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "SS_Strand.h"
#include "SS_Tlb.h"
#if !defined(ARCH_X64)
#include "SS_Prefetch.h"
#endif
#include "SS_V8Code.h"
#include <new>

extern "C" SS_Vaddr ss_run_dec( SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Instr* i )/*{{{*/
{
  SS_Decode d = s->dec_table->decode(i->opc);
  return (d)(pc,npc,s,i,i->opc());
}
/*}}}*/
extern "C" SS_Vaddr ss_ibe_dec( SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Instr* i )/*{{{*/
{
  if (s->inst_breakpoint_hit(i->opc()))
  {
    return (s->trap)(pc,npc,s,i,SS_Trap::INSTRUCTION_BREAKPOINT);
  }
  else
  {
    SS_Decode d = s->dec_table->decode(i->opc);
    return (d)(pc,npc,s,i,i->opc());
  }
}
/*}}}*/

extern "C" SS_Vaddr ss_break_inst_va_dec( SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Instr* i )/*{{{*/
{
  if (s->skip_break_inst_va || !s->test_break_inst_va(pc))
  {
    s->skip_break_inst_va = false;

    // If the breakpoint did not trigger then decode: ss_run_dec or
    // ss_ibe_dec ... and execute the instruction.

    pc = (s->inst_dec)(pc,npc,s,i);

    // Undo the decode execute caching to make sure resume after
    // breakpoint hits the breakpoint again.

    i->exe = ss_break_inst_va_dec;
  }
  return pc;
}
/*}}}*/

class SS_FailTte : public SS_Tte/*{{{*/
{
  public:
    SS_FailTte()
     :
      SS_Tte()
    {
      virt_page = ~0;
      virt_mask = 0;
      phys_page = 0;
      phys_mask = ~0;

      usage_count = 1;
    }
};
/*}}}*/
class SS_PhysTte : public SS_Tte/*{{{*/
{
  public:
    SS_PhysTte()
     :
      SS_Tte()
    {
      tte_flags = VALID_BIT;    // Make sure we can use valid_bit() test on tte
      virt_page = 0;
      virt_mask = 0;
      phys_page = 0;
      phys_mask = 0;

      usage_count = 1;
    }
};
/*}}}*/

static SS_FailTte fail_tte;
static SS_FailTte junk_tte;

// For pa2pa translations we have two areas, memory and i/o. For some
// product we use a third one which is only used when pstate.am masking
// is applied in pa2pa mode.

static SS_PhysTte phys_tte_mem;
static SS_PhysTte phys_tte_io;
static SS_PhysTte phys_tte_mem_am;

static uint8_t ss_stpartial16[] = /*{{{*/
{
  0x00, 0x03, 0x0c, 0x0f,
  0x30, 0x33, 0x3c, 0x3f,
  0xc0, 0xc3, 0xcc, 0xcf,
  0xf0, 0xf3, 0xfc, 0xff
};
/*}}}*/
static uint8_t ss_stpartial32[] = /*{{{*/
{
  0x00, 0x0f, 0xf0, 0xff
};
/*}}}*/

SS_Strand::SS_Strand( SS_Node& _parent, const char* _name, /*{{{*/
                      SS_Execute run_exe_table_init[],
                      SS_Memop mem_run_table_init[][4],
                      SS_Memop mem_trc_table_init[][4],
                      SS_MemErrDetector& _mem_err_detector)
 :
  run_exe_table_ref(run_exe_table_init),
  mem_run_table_ref(mem_run_table_init),
  mem_trc_table_ref(mem_trc_table_init),
  mem_err_detector(_mem_err_detector),
  running(false),
  dec_table(0),
  exe_table(0),
  mem_table(0),
#ifndef COMPILE_FOR_SAM
  memory(&SS_Memory::memory),
#endif
  io(&SS_Io::io),
  inst_wp_va_mask(0),
  inst_wp_va_addr(1),           // bit0 == 1 is disabled
  inst_dec(ss_run_dec),
  save_dec(ss_run_dec),
  fail_tte(&::fail_tte),
  inst_tte(&::junk_tte),
  trc_inst_tte(&::junk_tte),
  phys_tte_mem(&::phys_tte_mem),
  phys_tte_io(&::phys_tte_io),
  phys_tte_mem_am(&::phys_tte_mem_am),
  inst_dft_asi(),
  data_dft_asi(),
  inst_tlb(0),
  data_tlb(0),
  inst_tte_link(0),
  data_tte_link(0),
  phys_tte_link(),
  sim_update(ss_sim_update),
  trap((SS_TrapFun)ss_trap),
  invalid_asi(0),       // ToDo provide default routine
  get_state(ss_get_state),
  set_state(ss_set_state),
  inst_mmu(0),
  inst_mmu_va(0),
  inst_mmu_ra(0),
  inst_mmu_pa(0),
  inst_trap(0),
  data_mmu(0),
  data_trap(0),
  change_running_from_snapshot(false),
  change_running(0),
  break_red_mode(0),
  break_inst_va(0),
  break_points(0),
  break_hit(0),
  skip_break_inst_va(false),
  inst_cache(0),
  trc_hook(0),
  inst_cache_va_pri_priv(0),
  inst_cache_va_nuc_nuc_nuc_priv(0),
  inst_cache_va_nuc_nuc_sec_priv(0),
  inst_cache_va_nuc_pri_sec_priv(0),
  inst_cache_va_pri_user(0),
  inst_cache_va_nuc_user(0),
  inst_cache_ra_pri_user(0),
  inst_cache_ra_nuc_user(0),
  inst_cache_ra_pri_priv(0),
  inst_cache_ra_nuc_priv(0),
  inst_cache_pa(0),
  ras_rs1(0),           // Ras Irf hooks
  ras_rs2(0),
  ras_rs3(0),
  ras_rd(0),
  ras_frs1(0),          // Single precison FP RAS hooks
  ras_frs2(0),
  ras_frs3(0),
  ras_frd(0),
  ras_drs1(0),          // Double precison FP RAS hooks
  ras_drs2(0),
  ras_drs3(0),
  ras_drd(0),
  model(0),             // derived strand sets pointer model
  parent(&_parent),
  name(strdup(_name))
, irq_sync(0)
, irq_store(0)
, tlb_sync(0)
, inst_tlb_read(0)
, inst_tlb_write(0)
, inst_tlb_lookup(0)
, data_tlb_read(0)
, data_tlb_read_skip(false)
, data_tlb_write(0)
, data_tlb_lookup(0)
, inst_hwtw(0)
, data_hwtw(0)
, inst_mmu_error(false)
, data_mmu_error(false)
{
  inst_cache_va_pri_priv         = (SS_InstrCache*)ss_memalign(1024,sizeof(SS_InstrCache));
  inst_cache_va_nuc_nuc_nuc_priv = (SS_InstrCache*)ss_memalign(1024,sizeof(SS_InstrCache));
  inst_cache_va_nuc_nuc_sec_priv = (SS_InstrCache*)ss_memalign(1024,sizeof(SS_InstrCache));
  inst_cache_va_nuc_pri_sec_priv = (SS_InstrCache*)ss_memalign(1024,sizeof(SS_InstrCache));
  inst_cache_va_pri_user         = (SS_InstrCache*)ss_memalign(1024,sizeof(SS_InstrCache));
  inst_cache_va_nuc_user         = (SS_InstrCache*)ss_memalign(1024,sizeof(SS_InstrCache));
  inst_cache_ra_pri_user         = (SS_InstrCache*)ss_memalign(1024,sizeof(SS_InstrCache));
  inst_cache_ra_nuc_user         = (SS_InstrCache*)ss_memalign(1024,sizeof(SS_InstrCache));
  inst_cache_ra_pri_priv         = (SS_InstrCache*)ss_memalign(1024,sizeof(SS_InstrCache));
  inst_cache_ra_nuc_priv         = (SS_InstrCache*)ss_memalign(1024,sizeof(SS_InstrCache));
  inst_cache_pa                  = (SS_InstrCache*)ss_memalign(1024,sizeof(SS_InstrCache));

  inst_cache_va_pri_priv->init        ("vpp",   &::junk_tte);
  inst_cache_va_nuc_nuc_nuc_priv->init("vnp-00",&::junk_tte);
  inst_cache_va_nuc_nuc_sec_priv->init("vnp-0s",&::junk_tte);
  inst_cache_va_nuc_pri_sec_priv->init("vnp-ps",&::junk_tte);
  inst_cache_va_pri_user->init        ("vpu",   &::junk_tte);
  inst_cache_va_nuc_user->init        ("vnu",   &::junk_tte);
  inst_cache_ra_pri_user->init        ("rpu",   &::junk_tte);
  inst_cache_ra_nuc_user->init        ("rnu",   &::junk_tte);
  inst_cache_ra_pri_priv->init        ("rpp",   &::junk_tte);
  inst_cache_ra_nuc_priv->init        ("rnp",   &::junk_tte);
  inst_cache_pa->init                 ("p--",   &::junk_tte);

  wrf     = (uint64_t*)ss_memalign(64,sizeof(uint64_t)   * (MAX_WP + 1) * 16);
  wrf_ecc = (BL_EccBits*)ss_malloc(   sizeof(BL_EccBits) * (MAX_WP + 1) * 16);
  grf     = (uint64_t*)ss_memalign(64,sizeof(uint64_t)   * (MAX_GL + 1) *  8);
  grf_ecc = (BL_EccBits*)ss_malloc(   sizeof(BL_EccBits) * (MAX_GL + 1) *  8);

  break_trap.memset(0);
  memset(irf,0,32 * sizeof(uint64_t));
  memset(drf,0,32 * sizeof(uint64_t));
  trap_state.memset(0);
  scratchpad.memset(0);
  hscratchpad.memset(0);
  memset(wrf,0,(MAX_WP + 1) * 16 * sizeof(uint64_t));
  memset(wrf_ecc,0,(MAX_WP + 1) * 16 * sizeof(BL_EccBits));
  memset(grf,0,(MAX_GL + 1) *  8 * sizeof(uint64_t));
  memset(grf_ecc,0,(MAX_GL + 1) *  8 * sizeof(BL_EccBits));

  max_tl = 5;
  max_gl = 4;
  max_ptl = 2;
  max_pgl = 2;
  max_wp = 7;

  va_bits = 64;
  pa_bits = 55;

  rstv_addr = 0xfffffffff0000000;

  sim_state = 0;
  sim_state.fp_disabled(1);
  sim_state.ib_enabled(0);
  sim_state.red(1);

#ifdef COMPILE_FOR_COSIM
  sim_state.cosim(1);
#endif

  for (int h=0; h<16; h++)
    stpartial16[h] = ss_stpartial16[h];
  for (int w=0; w<4; w++)
    stpartial32[w]  = ss_stpartial32[w];

  inst_iw[0].mask_data = 0;
  inst_iw[1].mask_data = 0;

  ras_enable = default_ras_enable;

  run_perf = ss_run_perf;

#if !defined(ARCH_X64)
  v8_trap        = trap_v9_to_v8plus;
  v8_inst_trap   = inst_trap_v9_to_v8plus;
  v8_data_trap   = data_trap_v9_to_v8plus;
  v8_data_mmu    = data_mmu_v9_to_v8plus;
  v8_invalid_asi = invalid_asi_v9_to_v8plus;
  v8_inst_dec    = inst_dec_v9_to_v8plus;
#endif

}
/*}}}*/
SS_Strand::~SS_Strand()/*{{{*/
{
  free((char*)name);
}
/*}}}*/

void SS_Strand::hard_reset()/*{{{*/
{
  ss_trap(0,0,this,0,SS_Trap::POWER_ON_RESET);
}
/*}}}*/
void SS_Strand::warm_reset(bool intp)/*{{{*/
{
  // ToDo we need a better trap type for this and handle the
  // common warm reset there like hard and xtrn reset
  if (intp)
    ss_trap(0,0,this,0,SS_Trap::POWER_ON_RESET);
}
/*}}}*/
void SS_Strand::xtrn_reset()/*{{{*/
{
  ss_trap(0,0,this,0,SS_Trap::EXTERNALLY_INITIATED_RESET);
}
/*}}}*/

const char* SS_Strand::ss_get_state_name( SS_Strand* s, SS_Registers::Index index )/*{{{*/
{
  if (SS_Registers::is_asr(index))
  {
    switch (index)
    {
      case SS_Registers::ASR_Y:                 return s->y.name();
      case SS_Registers::ASR_CCR:               return s->ccr.name();
      case SS_Registers::ASR_ASI:               return "asi"; // Internal name is asi_reg ...
      case SS_Registers::ASR_TICK:              return s->tick.name();
      case SS_Registers::ASR_PC:                return s->pc.name();
      case SS_Registers::ASR_FPRS:              return s->fprs.name();
      case SS_Registers::ASR_GSR:               return s->gsr.name();
      case SS_Registers::ASR_SOFTINT_SET:       return SS_SoftintSet().name();
      case SS_Registers::ASR_SOFTINT_CLR:       return SS_SoftintClr().name();
      case SS_Registers::ASR_SOFTINT:           return s->softint.name();
      case SS_Registers::ASR_TICK_CMPR:         return s->tick_cmpr.name();
      case SS_Registers::ASR_STICK:             return s->stick.name();
      case SS_Registers::ASR_STICK_CMPR:        return s->stick_cmpr.name();
      default: break;
    }
  }
  else if (SS_Registers::is_pr(index))
  {
    switch (index)
    {
      case SS_Registers::PR_TPC:                return s->tpc.name();
      case SS_Registers::PR_TNPC:               return s->tnpc.name();
      case SS_Registers::PR_TSTATE:             return s->tstate.name();
      case SS_Registers::PR_TT:                 return s->tt.name();
      case SS_Registers::PR_TICK:               return "pr_tick"; // Avoid cases with same name
      case SS_Registers::PR_TBA:                return s->tba.name();
      case SS_Registers::PR_PSTATE:             return s->pstate.name();
      case SS_Registers::PR_TL:                 return s->tl.name();
      case SS_Registers::PR_PIL:                return s->pil.name();
      case SS_Registers::PR_CWP:                return s->cwp.name();
      case SS_Registers::PR_CANSAVE:            return s->cansave.name();
      case SS_Registers::PR_CANRESTORE:         return s->canrestore.name();
      case SS_Registers::PR_CLEANWIN:           return s->cleanwin.name();
      case SS_Registers::PR_OTHERWIN:           return s->otherwin.name();
      case SS_Registers::PR_WSTATE:             return s->wstate.name();
      case SS_Registers::PR_GL:                 return s->gl.name();
      default: break;
    }
  }
  else if (SS_Registers::is_hpr(index))
  {
    switch (index)
    {
      case SS_Registers::HPR_HPSTATE:           return s->hpstate.name();
      case SS_Registers::HPR_HTSTATE:           return s->htstate.name();
      case SS_Registers::HPR_HINTP:             return s->hintp.name();
      case SS_Registers::HPR_HTBA:              return s->htba.name();
      case SS_Registers::HPR_HVER:              return s->hver.name();
      case SS_Registers::HPR_HSTICK_CMPR:       return s->hstick_cmpr.name();
      default: break;
    }
  }
  else if (SS_Registers::is_sim(index))
  {
    switch (index)
    {
      case SS_Registers::SIM_MAX_WP:            return s->max_wp.name();
      case SS_Registers::SIM_MAX_TL:            return s->max_tl.name();
      case SS_Registers::SIM_MAX_PTL:           return s->max_ptl.name();
      case SS_Registers::SIM_MAX_GL:            return s->max_gl.name();
      case SS_Registers::SIM_MAX_PGL:           return s->max_pgl.name();
      case SS_Registers::SIM_RSTV_ADDR:         return s->rstv_addr.name();
      case SS_Registers::SIM_NPC:               return s->npc.name();
      case SS_Registers::SIM_FSR:               return s->fsr.name();
      case SS_Registers::SIM_STATE:             return s->sim_state.name();
      case SS_Registers::SIM_STRAND_ID:         return s->strand_id.name();
      case SS_Registers::SIM_PA_BITS:           return s->pa_bits.name();
      case SS_Registers::SIM_VA_BITS:           return s->va_bits.name();
      case SS_Registers::SIM_INST_COUNT:        return s->inst_count.name();
      default: break;
    }
  }
  else if (SS_Registers::is_irf(index))
    return SS_Registers::irf_name[index - SS_Registers::IRF_OFS];
  else if (SS_Registers::is_drf(index))
    return SS_Registers::drf_name[index - SS_Registers::DRF_OFS];
  else if (SS_Registers::is_frf(index))
    return SS_Registers::frf_name[index - SS_Registers::FRF_OFS];

  return 0;
}
/*}}}*/
SS_Registers::Error SS_Strand::ss_get_state( SS_Strand* s, SS_Registers::Index index, uint64_t* value )/*{{{*/
{
  if (SS_Registers::is_irf(index))
    *value = s->irf[index - SS_Registers::IRF_OFS];
  else if (SS_Registers::is_drf(index))
    *value = s->drf[index - SS_Registers::DRF_OFS];
  else if (SS_Registers::is_frf(index))
    *value = s->get_frf(SS_Strand::freg_idx2off(index - SS_Registers::FRF_OFS));
  else
  {
    // For processors that do not implement the full 64bit for virtual address
    // we need to make sure that the unimplemented bits are sign extended based
    // on bit[va_bits() - 1]. This is done for pc, npc, tpc, tnpc

    uint_t sft = 64 - s->va_bits();

    switch (index)
    {
      case SS_Registers::ASR_Y:                 *value = s->y(); break;
      case SS_Registers::ASR_CCR:               *value = s->ccr(); break;
      case SS_Registers::ASR_ASI:               *value = s->asi(); break;
      case SS_Registers::ASR_TICK:              *value = s->tick(); break;
      case SS_Registers::ASR_PC:                *value = int64_t(s->pc() << sft) >> sft; break;
      case SS_Registers::ASR_FPRS:              *value = s->fprs(); break;
      case SS_Registers::ASR_GSR:               *value = s->gsr(); break;
      case SS_Registers::ASR_SOFTINT:           *value = s->softint(); break;
      case SS_Registers::ASR_SOFTINT_CLR:       *value = s->softint(); break;
      case SS_Registers::ASR_SOFTINT_SET:       *value = s->softint(); break;
      case SS_Registers::ASR_TICK_CMPR:         *value = s->tick_cmpr(); break;
      case SS_Registers::ASR_STICK:             *value = s->stick(); break;
      case SS_Registers::ASR_STICK_CMPR:        *value = s->stick_cmpr(); break;
      case SS_Registers::PR_TICK:               *value = s->tick(); break;
      case SS_Registers::PR_TBA:                *value = s->tba(); break;
      case SS_Registers::PR_PSTATE:             *value = s->pstate(); break;
      case SS_Registers::PR_TL:                 *value = s->tl(); break;
      case SS_Registers::PR_PIL:                *value = s->pil(); break;
      case SS_Registers::PR_CWP:                *value = s->cwp(); break;
      case SS_Registers::PR_CANSAVE:            *value = s->cansave(); break;
      case SS_Registers::PR_CANRESTORE:         *value = s->canrestore(); break;
      case SS_Registers::PR_CLEANWIN:           *value = s->cleanwin(); break;
      case SS_Registers::PR_OTHERWIN:           *value = s->otherwin(); break;
      case SS_Registers::PR_WSTATE:             *value = s->wstate(); break;
      case SS_Registers::PR_GL:                 *value = s->gl(); break;
      case SS_Registers::HPR_HPSTATE:           *value = s->hpstate(); break;
      case SS_Registers::HPR_HINTP:             *value = s->hintp(); break;
      case SS_Registers::HPR_HTBA:              *value = s->htba(); break;
      case SS_Registers::HPR_HVER:              *value = s->hver(); break;
      case SS_Registers::HPR_HSTICK_CMPR:       *value = s->hstick_cmpr(); break;
      case SS_Registers::SIM_MAX_WP:            *value = s->max_wp(); break;
      case SS_Registers::SIM_MAX_TL:            *value = s->max_tl(); break;
      case SS_Registers::SIM_MAX_PTL:           *value = s->max_ptl(); break;
      case SS_Registers::SIM_MAX_GL:            *value = s->max_gl(); break;
      case SS_Registers::SIM_MAX_PGL:           *value = s->max_pgl(); break;
      case SS_Registers::SIM_RSTV_ADDR:         *value = s->rstv_addr(); break;
      case SS_Registers::SIM_NPC:               *value = int64_t(s->npc() << sft) >> sft; break;
      case SS_Registers::SIM_STATE:             *value = s->sim_state(); break;
      case SS_Registers::SIM_STRAND_ID:         *value = s->strand_id(); break;
      case SS_Registers::SIM_PA_BITS:           *value = s->pa_bits(); break;
      case SS_Registers::SIM_VA_BITS:           *value = s->va_bits(); break;
      case SS_Registers::SIM_INST_COUNT:        *value = s->inst_count(); break;

      case SS_Registers::SIM_FSR:
        s->get_fsr();
        *value = s->fsr();
        break;

      case SS_Registers::PR_TPC:
        if (s->tl() == 0)
          return SS_Registers::NOT_AVAILABLE;
        *value = int64_t(s->tpc() << sft) >> sft;
        break;
      case SS_Registers::PR_TNPC:
        if (s->tl() == 0)
          return SS_Registers::NOT_AVAILABLE;
        *value = int64_t(s->tnpc() << sft) >> sft;
        break;
      case SS_Registers::PR_TSTATE:
        if (s->tl() == 0)
          return SS_Registers::NOT_AVAILABLE;
        *value = s->tstate();
        break;
      case SS_Registers::PR_TT:
        if (s->tl() == 0)
          return SS_Registers::NOT_AVAILABLE;
        *value = s->tt();
        break;
      case SS_Registers::HPR_HTSTATE:
        if (s->tl() == 0)
          return SS_Registers::NOT_AVAILABLE;
        *value = s->htstate();
        break;

      default:
        return SS_Registers::NOT_AVAILABLE;
    }
  }
  return SS_Registers::OK;
}
/*}}}*/
SS_Registers::Error SS_Strand::ss_set_state( SS_Strand* s, SS_Registers::Index index, uint64_t value )/*{{{*/
{
  if (SS_Registers::is_irf(index))
  {
    if (index != SS_Registers::G0)
      s->irf[index - SS_Registers::IRF_OFS] = value;
  }
  else if (SS_Registers::is_drf(index))
    s->drf[index - SS_Registers::DRF_OFS] = value;
  else if (SS_Registers::is_frf(index))
    s->get_frf(SS_Strand::freg_idx2off(index - SS_Registers::FRF_OFS)) = value;
  else
  {
    // For processors that do not implement the full 64bit for virtual address
    // we need to make sure that the unimplemented bits are sign extended based
    // on bit[va_bits() - 1]. This is done for pc, npc, tpc, tnpc, and also
    // tba, htba and rstv_addr.

    uint_t sft = 64 - s->va_bits();

    switch (index)
    {
      case SS_Registers::ASR_Y:                 s->y.set(value); break;
      case SS_Registers::ASR_CCR:               s->ccr.set(value); break;
      case SS_Registers::ASR_ASI:               s->asi.set(value); break;
      case SS_Registers::ASR_TICK:              s->tick.set(value); break;
      case SS_Registers::ASR_FPRS:              s->fprs.set(value); break;
      case SS_Registers::ASR_GSR:               s->gsr.set(value); break;
      case SS_Registers::ASR_TICK_CMPR:         s->tick_cmpr.set(value); break;
      case SS_Registers::ASR_STICK:             s->stick.set(value); break;
      case SS_Registers::ASR_STICK_CMPR:        s->stick_cmpr.set(value); break;
      case SS_Registers::PR_TICK:               s->tick.set(value); break;
      case SS_Registers::PR_TBA:                s->tba.set(int64_t(value << sft) >> sft); break;
      case SS_Registers::PR_PSTATE:             s->pstate.set(value); break;
      case SS_Registers::PR_CANSAVE:            s->cansave.set(value); break;
      case SS_Registers::PR_CANRESTORE:         s->canrestore.set(value); break;
      case SS_Registers::PR_CLEANWIN:           s->cleanwin.set(value); break;
      case SS_Registers::PR_OTHERWIN:           s->otherwin.set(value); break;
      case SS_Registers::PR_WSTATE:             s->wstate.set(value); break;
      case SS_Registers::HPR_HPSTATE:           s->hpstate.set(value); break;
      case SS_Registers::HPR_HTBA:              s->htba.set(int64_t(value << sft) >> sft); break;
      case SS_Registers::HPR_HVER:              s->hver.set(value); break;
      case SS_Registers::HPR_HSTICK_CMPR:       s->hstick_cmpr.set(value); break;
      case SS_Registers::SIM_RSTV_ADDR:         s->rstv_addr.set(int64_t(value << sft) >> sft); break;
      case SS_Registers::SIM_NPC:               s->npc.set(int64_t(value << sft) >> sft); break;
      case SS_Registers::SIM_STATE:             s->sim_state.set(value); break;
      case SS_Registers::SIM_INST_COUNT:        s->inst_count.set(value); break;

      // When the PC is set from the front end then we clean all the
      // breakpoint related information that might be pending. This
      // means that when we do sim.s0.pc = sim.s0.pc then we will hit
      // the breakpoint on pc again if we just hit it.

      case SS_Registers::ASR_PC:
        s->skip_break_inst_va = false;
        if (s->break_hit)
          s->break_hit->triggered = false;
        s->break_hit = 0;
        s->pc.set(int64_t(value << sft) >> sft);
        break;

      case SS_Registers::PR_CWP:
        if (value > s->max_wp())
          return SS_Registers::VALUE_OUT_OF_RANGE;
        s->cwp_save();
        s->cwp.set(value);
        s->cwp_load();
        break;

      case SS_Registers::PR_TL:
        if (value > s->max_tl())
          return SS_Registers::VALUE_OUT_OF_RANGE;
        s->tl_save();
        s->tl.set(value);
        s->tl_load();
        break;

      case SS_Registers::PR_GL:
        if (value > s->max_gl())
          return SS_Registers::VALUE_OUT_OF_RANGE;
        s->gl_save();
        s->gl.set(value);
        s->gl_load();
        break;

      case SS_Registers::SIM_FSR:
        s->fsr.set(value);
        s->set_fsr();           // ToDo: Why do I keep fsr if I have to do get/set_fsr ?
        break;

      case SS_Registers::PR_PIL:
        s->pil.set(value);
        s->irq.check(s);
        break;

      case SS_Registers::ASR_SOFTINT_SET:
        s->softint.set(s->softint() | value);
        s->irq.update_softint(s);
        break;

      case SS_Registers::ASR_SOFTINT_CLR:
        s->softint.set(s->softint() &~ value);
        s->irq.update_softint(s);
        break;

      case SS_Registers::ASR_SOFTINT:
        s->softint.set(value);
        s->irq.update_softint(s);
        break;

      case SS_Registers::HPR_HINTP:
        s->hintp.set(value);
        if (s->hintp.hsp())
          s->irq.raise(s,SS_Interrupt::BIT_HSTICK_MATCH);
        else
          s->irq.retract(SS_Interrupt::BIT_HSTICK_MATCH);
        break;

      case SS_Registers::PR_TPC:
        if (s->tl() == 0)
          return SS_Registers::NOT_AVAILABLE;
        s->tpc.set(int64_t(value << sft) >> sft);
        break;
      case SS_Registers::PR_TNPC:
        if (s->tl() == 0)
          return SS_Registers::NOT_AVAILABLE;
        s->tnpc.set(int64_t(value << sft) >> sft);
        break;
      case SS_Registers::PR_TSTATE:
        if (s->tl() == 0)
          return SS_Registers::NOT_AVAILABLE;
        s->tstate.set(value);
        break;
      case SS_Registers::PR_TT:
        if (s->tl() == 0)
          return SS_Registers::NOT_AVAILABLE;
        s->tt.set(value);
        break;
      case SS_Registers::HPR_HTSTATE:
        if (s->tl() == 0)
          return SS_Registers::NOT_AVAILABLE;
        s->htstate.set(value);
        break;

      default:
        return SS_Registers::NOT_AVAILABLE;
    }
  }
  return SS_Registers::OK;
}
/*}}}*/

void SS_Strand::get_name( char* dst )/*{{{*/
{
  if (parent)
  {
    parent->get_name(dst);
    strcat(dst,".");
    strcat(dst,name);
  }
  else
    strcpy(dst,name);
}
/*}}}*/

void SS_Strand::snapshot( SS_SnapShot& ss )/*{{{*/
{
  int i;
  char prefix[32];

  get_name(prefix);

  if (ss.do_save())
  {
    // Before we dump the strand state we save all the
    // duplicate state to get one coherent view of the strand.

    cwp_save();
    gl_save();
    tl_save();
    get_fsr();
  }

  // Save all the registers windows,

  for (int wp=0; wp <= max_wp(); wp++)
  {
    for (i=0; i<8; i++)
    {
      sprintf(ss.tag,"%s.wp.%d.l%d",prefix,wp,i);
      ss.val(&wrf[wp * 16 + i]);
    }
    for (i=0; i<8; i++)
    {
      sprintf(ss.tag,"%s.wp.%d.i%d",prefix,wp,i);
      ss.val(&wrf[wp * 16 + 8 + i]);
    }
  }

  // Save all the globals,

  for (int gp=0; gp <= max_gl(); gp++)
  {
    for (i=1; i<8; i++) // skip %g0
    {
      sprintf(ss.tag,"%s.gl.%d.g%d",prefix,gp,i);
      ss.val(&grf[gp * 8 + i]);
    }
  }

  // Save all the doubles

  for (i=0; i < 32; i++)
  {
    sprintf(ss.tag,"%s.d%d",prefix,i);
    ss.val(&drf[i]);
  }

  // Save the trap stack (tl=0 is never used)

  for (int tp=1; tp <= max_tl(); tp++)
  {
    sprintf(ss.tag,"%s.tl.%d.pc",prefix,tp);            ss.val(&trap_state[tp].pc);
    sprintf(ss.tag,"%s.tl.%d.npc",prefix,tp);           ss.val(&trap_state[tp].npc);
    sprintf(ss.tag,"%s.tl.%d.tstate",prefix,tp);        ss.val(&trap_state[tp].tstate);
    sprintf(ss.tag,"%s.tl.%d.htstate",prefix,tp);       ss.val(&trap_state[tp].htstate);
    sprintf(ss.tag,"%s.tl.%d.tt",prefix,tp);            ss.val(&trap_state[tp].tt);
  }

  // tpc, tnpc, tstat, htstate, and tt get saved/restored by tl_save()/tl_load()
  // respectively an that got save above. So we don't have to handle those here.

  sprintf(ss.tag,"%s.%s",prefix,pc.name());             pc.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,npc.name());            npc.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,gsr.name());            gsr.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,tick.name());           tick.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,stick.name());          stick.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,tick_cmpr.name());      tick_cmpr.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,stick_cmpr.name());     stick_cmpr.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,hstick_cmpr.name());    hstick_cmpr.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,softint.name());        softint.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,tba.name());            tba.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,htba.name());           htba.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,rstv_addr.name());      rstv_addr.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,hver.name());           hver.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,y.name());              y.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,pstate.name());         pstate.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,hpstate.name());        hpstate.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,sim_state.name());      sim_state.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,ccr.name());            ccr.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,asi.name());            asi.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,fprs.name());           fprs.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,tl.name());             tl.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,gl.name());             gl.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,cwp.name());            cwp.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,cansave.name());        cansave.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,canrestore.name());     canrestore.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,cleanwin.name());       cleanwin.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,otherwin.name());       otherwin.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,wstate.name());         wstate.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,pil.name());            pil.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,hintp.name());          hintp.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,fsr.name());            fsr.snapshot(ss);

  sprintf(ss.tag,"%s.halted",prefix);                   ss.val(&halted);

  sprintf(ss.tag,"%s.%s",prefix,"inst_dft_asi");        inst_dft_asi.snapshot(ss);
  sprintf(ss.tag,"%s.%s",prefix,"data_dft_asi");        data_dft_asi.snapshot(ss);

  for (int ps=0; ps < 8; ps++)
  {
    sprintf(ss.tag,"%s.scratch.%d",prefix,ps);
    ss.val(&scratchpad[ps]);
  }

  for (int hs=0; hs < 8; hs++)
  {
    sprintf(ss.tag,"%s.hscratch.%d",prefix,hs);
    ss.val(&hscratchpad[hs]);
  }

  irq.snapshot(ss,prefix);
  msg.snapshot(ss,prefix);

  if (ss.do_load())
  {
    // Now restore the state that didn't get saved back to
    // sensible values. Flush the decode caches, and update
    // the simulator state (which decode cache to use etc)

    cwp_load();
    gl_load();
    tl_load();
    set_fsr();
  }

  // We flush the decode caches on snapshot, to make running
  // from saved snapshot behave identical to run after dump.

  flush_tte_all();
}
/*}}}*/

SS_BreakPoint::Error SS_Strand::break_delete( SS_BreakPoint::Ident id )/*{{{*/
{
  SS_BreakPoint* prev = 0;

  break_hit = 0;

  for (SS_BreakPoint* self = break_points; self; self = self->next)
  {
    if (self->id == id)
    {
      (prev ? prev->next : break_points) = self->next;

      switch (self->type)
      {
        case SS_BreakPoint::ON_TRAP:
          self->unlink((SS_BreakPoint**)&break_trap[((SS_BreakTrap*)self)->tt]);
          break;
        case SS_BreakPoint::ON_RED_MODE:
          self->unlink((SS_BreakPoint**)&break_red_mode);
          break;
        case SS_BreakPoint::ON_INST_VA:
          self->unlink((SS_BreakPoint**)&break_inst_va);
          break;
      }
      delete self;
      return SS_BreakPoint::OK;
    }
    else
      prev = self;
  }
  return SS_BreakPoint::ID_UNKNOWN;
}
/*}}}*/
SS_BreakPoint::Error SS_Strand::break_enable( SS_BreakPoint::Ident id )/*{{{*/
{
  for (SS_BreakPoint* self = break_points; self; self = self->next)
  {
    if (self->id == id)
    {
      self->enabled = true;
      return SS_BreakPoint::OK;
    }
  }
  return SS_BreakPoint::ID_UNKNOWN;
}
/*}}}*/
SS_BreakPoint::Error SS_Strand::break_disable( SS_BreakPoint::Ident id )/*{{{*/
{
  for (SS_BreakPoint* self = break_points; self; self = self->next)
  {
    if (self->id == id)
    {
      self->enabled = false;
      return SS_BreakPoint::OK;
    }
  }
  return SS_BreakPoint::ID_UNKNOWN;
}
/*}}}*/
SS_BreakPoint::Ident SS_Strand::break_on_trap( uint_t _tt )/*{{{*/
{
  SS_BreakTrap* bp = new SS_BreakTrap(SS_Trap::Type(_tt),break_points);
  break_points = bp;
  bp->link = break_trap[_tt];
  break_trap[_tt] = bp;
  return bp->id;
}
/*}}}*/
SS_BreakPoint::Ident SS_Strand::break_on_red_mode()/*{{{*/
{
  SS_BreakRedMode* bp = new SS_BreakRedMode(break_points);
  break_points = bp;
  bp->link = break_red_mode;
  break_red_mode = bp;
  return bp->id;
}
/*}}}*/
SS_BreakPoint::Ident SS_Strand::break_on_inst_va( SS_Vaddr va )/*{{{*/
{
  flush_va(va);
  SS_BreakInstVa* bp = new SS_BreakInstVa(va,break_points);
  break_points = bp;
  bp->link = break_inst_va;
  break_inst_va = bp;
  return bp->id;
}
/*}}}*/

void SS_Strand::tl_save()/*{{{*/
{
  TrapState* p = &trap_state[tl()];

  p->pc         = tpc();
  p->npc        = tnpc();
  p->tstate     = tstate();
  p->htstate    = htstate();
  p->tt         = tt();
}
/*}}}*/
void SS_Strand::tl_load()/*{{{*/
{
  TrapState* p = &trap_state[tl()];

  tpc         = p->pc;
  tnpc        = p->npc;
  tstate      = p->tstate;
  htstate     = p->htstate;
  tt          = p->tt;
}
/*}}}*/

void SS_Strand::merge_asi_map()/*{{{*/
{
  parent->merge_asi_map(asi_map);
}
/*}}}*/

void SS_Strand::ss_sim_update( SS_Strand* s )/*{{{*/
{
  // This routine is the main routine to keep all our fancy caches and
  // things in sync and do special things only when they need to be done.
  // (sim_update)(me) should get called whenever a state change happens
  // that requires switching decode caches ets. E.g we track changes to
  // pstate, hpstate, tl, fprs.fef (fpr fpu enabled), softint in
  // combination with pstate.ie, and lsu_ctr (if used), etc.

  bool     inst_cache_flush = false;
  uint64_t prev_priv        = s->sim_state.priv();

  // First figure out which privileged level we're at. Additionally,
  // to safe tests, we check whether interrupts are enabled and allowed
  // and whether trap level zero traps should be thrown.

  if (s->hpstate.hpriv())
  {
    s->sim_state.priv(SS_HPRV);

    // Some processor implement crosscall with interrupt_vector_trap
    // that can be blocked in hypervisor by pstate.ie ... so if we're
    // here and ca can deal with them ... just checking.

    if (s->pstate.ie())
      s->irq.check(s);
  }
  else
  {
    if (s->pstate.priv())
      s->sim_state.priv(SS_PRIV);
    else
      s->sim_state.priv(SS_USER);

    // Check for trap level zero condition here first. Launch a disrupting
    // tlz trap when the tlz condition arises: note TLZ has high priority!

    if (!s->irq.is_pending(SS_Interrupt::BIT_TRAP_LEVEL_ZERO) && s->hpstate.tlz() && (s->tl() == 0))
      s->irq.raise(s,SS_Interrupt::BIT_TRAP_LEVEL_ZERO);
    else
      s->irq.check(s);
  }

  // Figure out which decode cache to use. We use multiple caches to
  // allow a better hit rate and to safe on privileged checks in critical
  // code (mainly memory ops). For ra2pa and va2pa we add extra decode
  // caches to differentiate the used default data asi.
  // We set the inst_mmu to the one with the correct translation mode.
  //
  // ToDo: perhaps we should keep the default data asi in the instruction
  // cache so that the primary/nuclues can be folded into one and the
  // little/big endian default asi test can use that too. Additionally
  // we should do some performance analisys to see which modes are mainly
  // used under solaris so that we can proper optimize for those, and
  // use flushing for others. 9 decode caches is a little much ...
  // Alternatively we might want to 'cache' a few context's ... so
  // that we can keep a few users alive ... need to investigate context
  // switch patters on s10 boot and application run

  if ((s->sim_state.priv() == SS_HPRV) || s->hpstate.red())
  {
    s->inst_mmu = s->inst_mmu_pa;
    s->inst_ctx = s->inst_ctx_pa;

    s->inst_cache = s->inst_cache_pa;

    // We don't detect context switches in hyperprivileged mode.
    // This means that we have to be carefull when caching data
    // TTEs. In general when we are in hyperprivileged mode and
    // the data mmu is not bypassing, the we don't cache the TTE.
  }
  else
  {
    if (!s->sim_state.inst_mmu())
    {
      s->inst_mmu = s->inst_mmu_ra;
      s->inst_ctx = s->inst_ctx_ra;

      if (s->sim_state.priv() == SS_PRIV)
      {
        if (s->tl())
          s->inst_cache = s->inst_cache_ra_nuc_priv;
        else
          s->inst_cache = s->inst_cache_ra_pri_priv;
      }
      else
      {
        if (s->tl())
          s->inst_cache = s->inst_cache_ra_nuc_user;
        else
          s->inst_cache = s->inst_cache_ra_pri_user;
      }
    }
    else
    {
      s->inst_mmu = s->inst_mmu_va;
      s->inst_ctx = s->inst_ctx_va;

      if (s->sim_state.priv() == SS_PRIV)
      {
        if (s->tl() || (s->inst_ctx.get_pri() == 0))
        {
          if (s->data_ctx.get() == 0)
            s->inst_cache = s->inst_cache_va_nuc_nuc_nuc_priv;
          else if (s->data_ctx.get_pri() == 0)
            s->inst_cache = s->inst_cache_va_nuc_nuc_sec_priv;
          else
            s->inst_cache = s->inst_cache_va_nuc_pri_sec_priv;
        }
        else
          s->inst_cache = s->inst_cache_va_pri_priv;
      }
      else
      {
        if (s->tl())
          s->inst_cache = s->inst_cache_va_nuc_user;
        else
          s->inst_cache = s->inst_cache_va_pri_user;
      }
    }

    if (s->inst_cache->data_ctx.get() != s->data_ctx.get())
    {
      //fprintf(stderr,"CSD: %6s %d %d %016llx\n",s->inst_cache->id,s->sim_state.mode(),s->tl(),s->data_ctx.get());
      s->inst_cache->data_ctx = s->data_ctx;
      inst_cache_flush = true;
    }
  }


  // In red state we only use one decode cache as it's a rare state,
  // the same one as in hyper privileged mode (for pa->pa immu).
  // Note that we can be at user, privileged or hyper privileged level
  // and be in red state. Thus when we enter or leave the red state
  // or when privilege level changes whilst in red state, or when TL
  // changes between zero and non-zero the decode cache is flushed.
  // Additionally keep track of whether we entered red mode or not so
  // that we can check breakpoints on red mode entry.

  if (s->hpstate.red())
  {
    if (!s->sim_state.red())
    {
      s->sim_state.red(1);
      inst_cache_flush = true;

      for (SS_BreakPoint* bp = s->break_red_mode; bp; bp = bp->link)
        if (bp->enabled && bp->trigger(s))
          bp->trigger(s);
    }
    else if ((prev_priv != s->sim_state.priv())
    || ((s->tl() == 0) && (s->sim_state.red_tl() != 0))
    || ((s->tl() != 0) && (s->sim_state.red_tl() == 0)))
    {
      inst_cache_flush = true;
    }
    s->sim_state.red_tl(s->tl());
  }
  else
  {
    if (s->sim_state.red())
    {
      s->sim_state.red(0);
      inst_cache_flush = true;
    }
  }

  // Set the default asi used for fetch and load/store operations

  if (s->tl())
  {
    s->inst_dft_asi = SS_Asi::ASI_NUCLEUS;
    s->data_dft_asi = s->pstate.cle() ? SS_Asi::ASI_NUCLEUS_LITTLE : SS_Asi::ASI_NUCLEUS;
  }
  else
  {
    s->inst_dft_asi = SS_Asi::ASI_PRIMARY;
    s->data_dft_asi = s->pstate.cle() ? SS_Asi::ASI_PRIMARY_LITTLE : SS_Asi::ASI_PRIMARY;
  }

  // If the default data asi used in the instr cache changed from big to little
  // or from little to big we flush the inst cache so that all load and stores
  // with default asi pick up the correct asi value.

  if (s->inst_cache->pstate_cle_flag != s->pstate.cle())
  {
    s->inst_cache->pstate_cle_flag = s->pstate.cle();
    inst_cache_flush = true;
  }

  // Keep track of whether we used 32bit mode or 64bit mode when
  // fetching and excuting instructions. If the mode changed compared
  // to the previous time the cache was used mode then we flush the cache.
  // Note we have to do this mainly because we cache inst and data TTEs.

  if (s->inst_cache->pstate_am_flag != s->pstate.am())
  {
    s->inst_cache->pstate_am_flag = s->pstate.am();
    inst_cache_flush = true;
  }

  // Keep track of whether pstate.tct was set or not, and if there
  // is a change then flush the decode cache in question. The new
  // decoded instruction will chech the pstate.tct bit and launch
  // transfer control traps when apropriate.

  if (s->inst_cache->pstate_tct_flag != s->pstate.tct())
  {
    s->inst_cache->pstate_tct_flag = s->pstate.tct();
    inst_cache_flush = true;
  }

  // Keep track of whether hpstate.ibe was set or not, and if there
  // is a change then flush the decode cache in question and use a
  // decoder that checks the opcode for match before decoding.

  if (s->hpstate.ibe() || s->sim_state.ibe_sig())
  {
    s->sim_state.ib_enabled(1);

    if (!s->inst_cache->hpstate_ibe_flag)
    {
      s->inst_cache->hpstate_ibe_flag = true;
      inst_cache_flush = true;
    }
  }
  else
  {
    s->sim_state.ib_enabled(0);

    if (s->inst_cache->hpstate_ibe_flag)
    {
      s->inst_cache->hpstate_ibe_flag = false;
      inst_cache_flush = true;
    }
  }

  // Switch the decoder if ib enabled

  if (s->sim_state.ib_enabled())
  {
    // Some products, like N2 for example reversed the instruction
    // breakpoint and illegal instruction trap priority. For those
    // products the decode functions do the checks all over the place.
    // For product that are as SunSparc specifies we can simply flip
    // the main decoder, e.g. check before you really decode. The
    // others require a decode cache flush so that we go through the
    // decode routines again. Some processors have no hpstate.ibe.
    // These will have to use the SS_Signal::SET_INST_BRKPT method.
    if (SS_Trap::table[SS_Trap::INSTRUCTION_BREAKPOINT].priority <
        SS_Trap::table[SS_Trap::ILLEGAL_INSTRUCTION].priority)
    {
      s->inst_dec = ss_ibe_dec;
      s->save_dec = ss_ibe_dec;
    }
  }
  else
  {
    s->inst_dec = ss_run_dec;
    s->save_dec = ss_run_dec;
  }

  // Nuke the tte pointers in the current decode cache so that we
  // start from fresh, as previous cached contents is likely invalid.

  if (inst_cache_flush)
  {
    for (int l=0; l < SS_InstrCache::SIZE; l++)
      s->inst_cache->tag[l].tte = &::junk_tte;
  }

  // Check for fpu enabled or not. We throw the two flags into
  // a single flag to make check for enabled fpu easier and faster.
  //
  // ToDo the disabled check should be part of decode and we
  // should flush the decode cache on enable/disable. This so that
  // we can finish a proper ill_ibe implementation.

  if (s->sim_state.fp_disabled())
  {
    if (s->fprs.fef() && s->pstate.pef())
      s->sim_state.fp_disabled(0);
  }
  else
  {
    if (!(s->fprs.fef() && s->pstate.pef()))
      s->sim_state.fp_disabled(1);
  }

  // Set the current address mask value to be used (v8 compatible mode)

  if (s->pstate.am())
    s->mask_pstate_am = ~uint32_t(0);
  else
    s->mask_pstate_am = ~uint64_t(0);

  // Make sure the first executed instruction does check the cached TTE or
  // does a lookup in the TLB.

  s->inst_tte = s->fail_tte;

  // Now send a message that we need to exit the inner run_loop, and
  // reenter if we need to execute more code. This is to propagate
  // the configuration that was setup above.

  s->msg.set_reenter_loop();
}
/*}}}*/
void SS_Strand::set_fsr()/*{{{*/
{
  // For hardware floating point emulation we keep more then one version of the
  // frs around. The fsr_run is the %fsr being used for the hardware floating
  // point instruction. It has the tem=0 so traps don't occur during execution of
  // the hardware floating point instruction, and aexc and cexc are cleared so
  // we can find out if a trap occured. The simulated fsr.tem bits are kept in
  // fsr_tem and the simulated fsr.eaxc and fsr.cexc are kept in fsr_exc.
  // The final simulated fsr is composed by get_fsr().

  fsr_run = fsr();

  fsr_run.tem(0);               // clear tem field so we don't take traps
  fsr_run.cexc(0);
  fsr_run.aexc(0);
  fsr_tem = 0;
  fsr_tem.cexc(fsr.tem());      // put tem field in cexc so we don;t have to shift it
  fsr_exc = 0;
  fsr_exc.cexc(fsr.cexc());
  fsr_exc.aexc(fsr.aexc());
}
/*}}}*/
void SS_Strand::get_fsr()/*{{{*/
{
  fsr.ftt(fsr_run.ftt());
  fsr.aexc(fsr_exc.aexc());
  fsr.cexc(fsr_exc.cexc());
}
/*}}}*/

void SS_Strand::setup_tte_link_tables()/*{{{*/
{
  inst_tte_link = new SS_Chain[inst_tlb->size()];
  inst_tte_link_size = inst_tlb->size();
  data_tte_link = new SS_Chain[data_tlb->size()];
  data_tte_link_size = data_tlb->size();
}
/*}}}*/

void SS_Strand::flush_tte( SS_Tlb* tlb, SS_Tte* tte )/*{{{*/
{
  assert (tte);

  // Note a tlb can be both inst and data tlb. So when we flush a
  // tte from the decode cache we have to be carefull and check
  // both inst and data flavors.

  if (tlb->is_data_tlb())
  {
    SS_Chain* head = &data_tte_link[tte->index];
    for (SS_Chain* next = head->next; head != next; next = next->next)
    {
      char* ptr_tag = (char*)next - ptr_ofs(SS_Instr,lnk) + ptr_ofs(SS_Instr,tte);
      ((SS_InstrCache::Tag*)ptr_tag)->tte = &::junk_tte;
    }
    head->unlink();
    head->clean();
  }

  if (tlb->is_inst_tlb())
  {
    SS_Chain* head = &inst_tte_link[tte->index];
    for (SS_Chain* next = head->next; head != next; next = next->next)
    {
      char* ptr_tag = (char*)next - ptr_ofs(SS_InstrCache::Tag,lnk);
      ((SS_InstrCache::Tag*)ptr_tag)->tte = &::junk_tte;
    }
    head->unlink();
    head->clean();
  }

  // Make sure we also clear the current used TTE by the inst_mmu.
  // Not doing so will create a timing window in the inst_mmu in which
  // a TTE can be reinserted after flush.

  if (inst_tte == tte)
    inst_tte = fail_tte;

  // Give the tte back when we are not in cosim mode

  if (!sim_state.cosim())
    tlb->reuse_tte(tte);
}
/*}}}*/

void SS_Strand::inst_tlb_set( SS_Tlb* tlb )/*{{{*/
{
  assert(sim_state.cosim());

  if (inst_tlb != tlb)
  {
    flush_tte_all();
  }

  inst_tlb = tlb;
}
/*}}}*/

void SS_Strand::flush_tte_all()/*{{{*/
{
  // Flushes all used inst TTEs from the decode caches.
  // This automatically invalidates all cached data TTEs.

  // First wipe out all va2pa and ra2pa TTE's

  for (uint_t i=0; i < inst_tte_link_size; i++)
  {
    SS_Chain* head = &inst_tte_link[i];
    SS_Chain* next = head->next;

    if (head != next)
    {
      while (head != next)
      {
        char* ptr_tag = (char*)next - ptr_ofs(SS_InstrCache::Tag,lnk);
        ((SS_InstrCache::Tag*)ptr_tag)->tte = &::junk_tte;
        next = next->next;
      }
      head->unlink();
      head->clean();
    }
  }

  // For the pa2pa decode cache we keep a single link, just
  // for the pedantic flush instruction's purpose.

  SS_Chain* next = phys_tte_link.next;
  if (&phys_tte_link != next)
  {
    while (&phys_tte_link != next)
    {
      char* ptr_tag = (char*)next - ptr_ofs(SS_InstrCache::Tag,lnk);
      ((SS_InstrCache::Tag*)ptr_tag)->tte = &::junk_tte;
      next = next->next;
    }
    phys_tte_link.unlink();
    phys_tte_link.clean();
  }

  // In case of separate inst and data tlb we can remove all data TTE links
  // without looking as we have removed all instructions from the decode
  // cache, e.g. all cached instructions are now invalid.

  if (inst_tlb != data_tlb)
  {
    for (uint_t i=0; i < data_tte_link_size; i++)
    {
      SS_Chain* head = &data_tte_link[i];
      SS_Chain* next = head->next;

      if (head != next)
      {
        data_tte_link[i].unlink();
        data_tte_link[i].clean();
      }
    }
  }

  inst_tte = fail_tte;
}
/*}}}*/
void SS_Strand::flush_va( SS_Vaddr ea )/*{{{*/
{
  // This routine implements flush for va watchpoints and breakpoints.

  uint_t line = (ea >> (SS_InstrCache::LINE_BITS + 2)) & SS_InstrCache::MASK;

  inst_cache_va_pri_user->tag[line].tte = &::junk_tte;
  inst_cache_va_nuc_user->tag[line].tte = &::junk_tte;
  inst_cache_va_pri_priv->tag[line].tte = &::junk_tte;
  inst_cache_va_nuc_nuc_nuc_priv->tag[line].tte = &::junk_tte;
  inst_cache_va_nuc_nuc_sec_priv->tag[line].tte = &::junk_tte;
  inst_cache_va_nuc_pri_sec_priv->tag[line].tte = &::junk_tte;
  inst_cache_ra_pri_user->tag[line].tte = &::junk_tte;
  inst_cache_ra_nuc_user->tag[line].tte = &::junk_tte;
  inst_cache_ra_pri_priv->tag[line].tte = &::junk_tte;
  inst_cache_ra_nuc_priv->tag[line].tte = &::junk_tte;
  inst_cache_pa->tag[line].tte = &::junk_tte;

  inst_tte = fail_tte;
}
/*}}}*/
void SS_Strand::flush( SS_Paddr pa, bool for_ras )/*{{{*/
{
  // This routine implements flush as sun sparc specifies. E.g. the
  // flush instruction flushes 8 bytes from the caches at the given
  // effective address ea. However, currently no product uses this, we just
  // flush the whole decode cache instead.

  // The routine is mainly used from the frontend when user write code to
  // memory.

  assert(!for_ras || sim_state.ras_enabled());

  pa >>= (SS_InstrCache::LINE_BITS + 2);

  uint_t line = pa & SS_InstrCache::MASK;

  if (for_ras)
    inst_cache_pa->tag[line].tte_bits |= RAS_TTE_POISON;
  else
    inst_cache_pa->tag[line].tte = &::junk_tte;

  // The smallest pages size is 8KB  so we loop through the caches
  // in 8KB increments and invalidate the virtual TTEs. Note that we
  // take the cacheline size into account. Since we flush a PA we
  // need to make sure we hit all the aliases.
  //
  // Note, if flush causes more decode then we can consider keeping the
  // PA of the cacheline as well and do a check before we flush.
  // Also we do a lot of storing here, that's bound to be bad for L2.
  // Is there a better way of doing this flush ...

  const uint_t size8k = 8192 >> (SS_InstrCache::LINE_BITS + 2);
  const uint_t mask8k = size8k - 1;

  line = pa & mask8k;

  if (for_ras)
  {
    for (; line < SS_InstrCache::SIZE; line += size8k)
    {
      inst_cache_va_pri_user->tag[line].tte_bits |= RAS_TTE_POISON;
      inst_cache_va_nuc_user->tag[line].tte_bits |= RAS_TTE_POISON;
      inst_cache_va_pri_priv->tag[line].tte_bits |= RAS_TTE_POISON;
      inst_cache_va_nuc_nuc_nuc_priv->tag[line].tte_bits |= RAS_TTE_POISON;
      inst_cache_va_nuc_nuc_sec_priv->tag[line].tte_bits |= RAS_TTE_POISON;
      inst_cache_va_nuc_pri_sec_priv->tag[line].tte_bits |= RAS_TTE_POISON;
      inst_cache_ra_pri_user->tag[line].tte_bits |= RAS_TTE_POISON;
      inst_cache_ra_nuc_user->tag[line].tte_bits |= RAS_TTE_POISON;
      inst_cache_ra_pri_priv->tag[line].tte_bits |= RAS_TTE_POISON;
      inst_cache_ra_nuc_priv->tag[line].tte_bits |= RAS_TTE_POISON;
    }
  }
  else
  {
    for (; line < SS_InstrCache::SIZE; line += size8k)
    {
      inst_cache_va_pri_user->tag[line].tte = &::junk_tte;
      inst_cache_va_nuc_user->tag[line].tte = &::junk_tte;
      inst_cache_va_pri_priv->tag[line].tte = &::junk_tte;
      inst_cache_va_nuc_nuc_nuc_priv->tag[line].tte = &::junk_tte;
      inst_cache_va_nuc_nuc_sec_priv->tag[line].tte = &::junk_tte;
      inst_cache_va_nuc_pri_sec_priv->tag[line].tte = &::junk_tte;
      inst_cache_ra_pri_user->tag[line].tte = &::junk_tte;
      inst_cache_ra_nuc_user->tag[line].tte = &::junk_tte;
      inst_cache_ra_pri_priv->tag[line].tte = &::junk_tte;
      inst_cache_ra_nuc_priv->tag[line].tte = &::junk_tte;
    }
  }

  inst_tte = fail_tte;
}
/*}}}*/

void SS_Strand::do_retry()/*{{{*/
{
  assert(tl());
  tl_save();

  pc      = tpc();
  npc     = tnpc();
  ccr     = tstate.ccr();
  asi     = tstate.asi();
  pstate  = tstate.pstate();
  hpstate = htstate.hpstate();

  if (cwp() != tstate.cwp())
  {
    cwp_save();
    cwp = tstate.cwp();
    cwp_load();
  }

  if (gl() != tstate.gl())
  {
    gl_save();
    gl = tstate.gl();
    gl_load();
  }
  if (!hpstate.hpriv() && (gl() > max_pgl()))
  {
    gl_save();
    gl = max_pgl();
    gl_load();
  }

  tl = tl() - 1;
  tl_load();
  (sim_update)(this);
}
/*}}}*/

bool SS_Strand::peek_signal()/*{{{*/
{
  SS_Trap::Type trap_type = msg.get_handle_trap();

  msg.clr_reenter_loop();

  if (trap_type != SS_Trap::RESERVED)
  {
    assert(!halted); // An interrupt should wake us up.
    msg.clr_handle_trap();
    (trap)(pc(),npc(),this,0,trap_type);
  }

  while (msg.test_signal())
  {
    SS_Signal* sgn = msg.get_signal();

    switch (sgn->type)
    {
      case SS_Signal::BREAKPOINT:
        break_hit = sgn->breakpoint;
        return true;

      case SS_Signal::FLUSH_TTE:
        flush_tte(sgn->tlb,sgn->tte);
        break;

      case SS_Signal::FLUSH_VA:
        // Called on say inst va watchpoint enable, need to make sure
        // we will hit the watchpoint, so flush the decode cache and
        // ensure we go through the mmu lookup again to check watchpoint.
        flush_va(sgn->flush_va);
        break;

      case SS_Signal::FLUSH_8B:
        // FLUSH_PA flushes the decode cache lines that could map the pa
        flush(sgn->flush_pa);
        break;

      case SS_Signal::FLUSH_8K:
      case SS_Signal::FLUSH_ALL:
        // FLUSH_8K flushes the whole decode cache
        flush_tte_all();
        break;

      case SS_Signal::SET_INST_BRKPT:
        // Processors that have no hpstate.ibe bit switch to instruction
        // breakpointing this way. Every time it gets enabled we flush the
        // decode cache, to make sure we redecode all teh instructions.
        sim_state.ibe_sig(sgn->ib_enable);
        (sim_update)(this);
        break;

      case SS_Signal::RUNNING:
        // In cosim we always have all strands registered with the TLB as
        // we can have tlb syncing (copy TLB) going on which complicates
        // management of the active strands that use the TLB. It's an
        // optiomisation, in cosim we don;t care about performance.
        //
        // When not in cosim we optimise the number of strands that receive
        // FLUSH_TTE messages to the strands that are running. All strands
        // that are not running don;t cache code so don't have to flush ...

        if (!sim_state.cosim())
        {
          if (sgn->running)
          {
            inst_tlb->add_strand(this);
            if (inst_tlb != data_tlb)
              data_tlb->add_strand(this);
            flush_tte_all();
          }
          else
          {
            assert(running);
            inst_tlb->rem_strand(this);
            if (inst_tlb != data_tlb)
              data_tlb->rem_strand(this);
          }
        }

        // When in halted mode we transition to running or parked and hence
        // get out of halted mode.

        unhalt(sgn->running);
        break;

      case SS_Signal::EXTERNAL_INTERRUPT:
        if (halted)
          unhalt(); // Force wakeup on external interrupt receive
        (internal_interrupt)(this,sgn->irq_type,sgn->irq_raise);
        break;

      case SS_Signal::FREE:
        fprintf(stderr,"SS_Strand: Free signal got onto the wrong list\n");
        *(char*)0 = 0;

      default:
        assert(0);
    }
  }

  // Now enable the next irq if any. The EXTERNAL_INTERRUPT above
  // needs to be prevented from raising an interrupt when one is
  // already pending.

  if (trap_type != SS_Trap::RESERVED)
    sim_state.irq_pending(0);

  return !running;
}
/*}}}*/
void SS_Strand::irq_launch( SS_Trap::Type trap_type, bool do_time_out )/*{{{*/
{
  // In cosim mode we can not throw a disrupting trap directly as it
  // will cause a PC mismatch for sure. RTL usually tells us when it
  // is time to take a trap. So it's here that we store the disrupting
  // traps into an interrupt sync buffer and it there that we wait for
  // RTL to tell us when it's time to actually launch it.

  if (irq_store)
  {
    sim_state.irq_pending(false);
    (irq_store)(irq_sync,trap_type,do_time_out);
  }
  else
  {
    msg.set_handle_trap(trap_type);
  }
}
/*}}}*/

SS_Vaddr SS_Strand::ss_trap( SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Instr* i, SS_Trap::Type tt )/*{{{*/
{
  assert(tt);
  assert(tt < SS_Trap::MAX_TT);

  // In case the mmu is near an inst va breakpoint and the mmu traps then we
  // end up taking this route back to step. So we need to reset the saved
  // decoder back to the instruction decoder.

  s->inst_dec = s->save_dec;

  // RESET_GEN_WMR & RESET_GEN_DBR are not real trap, they are similar to
  // POR, used for warm_reset or dbx_reset, respectively

  if ((tt != SS_Trap::RESET_GEN_WMR) && (tt != SS_Trap::RESET_GEN_DBR))
  {
    if ((SS_Trap::MAX_TT > tt) && (tt >= SS_Trap::TCC_INSTRUCTION_HPRV))
    {
      // The code generated for the tcc instruction does not make a special case for
      // user mode; when only 7 bits of the software trap number are valid iso 8 bits.
      // We'll take care of that corner case here.

      if (s->sim_state.priv() == SS_Strand::SS_USER)
        tt = SS_Trap::Type(int(tt) - 0x80);
    }
    else if (tt >= SS_Trap::TCC_INSTRUCTION)
    {
      // Legion hacks the solaris binary and inserts tcc with tt=0x175 that get called
      // in hprv mode. They use that to shortcut some expensive bcopy I believe to speed
      // up solaris boot on Legion. Since we don't seem to be able to build our own binary for
      // solaris we get them from the Legion fooks and hence have to support this hack (bleh!)

      if ((s->sim_state.priv() == SS_Strand::SS_HPRV) && !s->sim_state.cosim() && (int(tt) == 0x175))
      {
        s->npc = npc + 4;
        return npc;
      }
    }
  }

  // Check the breakpoints and if one or more triggered we leave ss_trap()
  // before handling the trap. ToDo ... do we want this or do we want
  // watchpoint or do we want both.

  if (s->break_trap[tt])
  {
    bool break_out = false;

    for (SS_BreakPoint* bp = s->break_trap[tt]; bp; bp = bp->link)
      if (bp->enabled && bp->trigger(s))
        break_out = true;

    if (break_out)
    {
      // For breakpoints on disrupting traps we have to reinject the trap
      // into the system else  it will get dropped.

      if (SS_Trap::table[tt].disrupting)
        s->msg.set_handle_trap(tt);

      return pc;
    }
  }

  // Check the trace callback to see if we are tracing traps

  if (s->trc_hook)
    s->trc_hook->trap(SS_Trap::Type(tt));

  // Handle the trap and set the state accordingly

  if (tt == SS_Trap::POWER_ON_RESET)
  {
    s->trap_state.memset(0);            // Clear the whole trap stack

    s->tt = tt;
    s->tpc = 0;
    s->tnpc = 0;
    s->tl = s->max_tl();
    s->gl = s->max_gl();
    s->tick.npt(1);
    s->stick.npt(1);
    s->tick_cmpr.int_dis(1);
    s->stick_cmpr.int_dis(1);
    s->hstick_cmpr.int_dis(1);
    s->fprs.fef(1);
    s->pstate.tle(0).mm(0).pef(1).am(0).priv(1).ie(0).cle(0).tct(0);    // priv(1) comes from N2
    s->hpstate.red(1).hpriv(1).ibe(0).tlz(0);

    pc = s->rstv_addr() + (SS_Vaddr(tt) << 5);

    s->cansave = s->max_wp() - 1;
    s->canrestore = 0;
    s->cleanwin = s->max_wp();
    s->otherwin = 0;
  }
  else if ((tt == SS_Trap::RESET_GEN_WMR) || (tt == SS_Trap::RESET_GEN_DBR))
  {
    // wmr and dbr are triggered by reset_gen (0x89_0000_0808)
    // RESET_GEN_WMR and RESET_GEN_DBR are not real trap type, they are used
    // to distinguish them from POWER_ON_RESET

    tt = SS_Trap::POWER_ON_RESET;

    s->tt = tt;
    // The PRM (Chapter 13, June 2006) is in error with respect to TPC and TNPC
    // on warm reset.  The PC and NPC registers are not protected throughout
    // warm reset (specifically during the scan flush) so they go to 0 before
    // the trap is taken, and therefore TPC and TNPC at MAXTL get set to 0.
    s->tpc  = 0;
    s->tnpc = 0;
    s->tstate.gl(s->gl()).ccr(s->ccr()).asi(s->asi()).pstate(0).cwp(s->cwp());
    s->htstate = 0;
    s->tl = s->max_tl();
    s->gl = s->max_gl();
    s->tick.npt(1);
    s->stick.npt(1);
    s->tick_cmpr.int_dis(1);
    s->stick_cmpr.int_dis(1);
    s->hstick_cmpr.int_dis(1);
    s->pstate.tle(0).mm(0).pef(1).am(0).priv(1).ie(0).cle(0).tct(0);    // priv(1) comes from N2
    s->hpstate.red(1).hpriv(1).ibe(0).tlz(0);

    pc = s->rstv_addr() + (SS_Vaddr(tt) << 5);

    s->gl_load();
  }
  else
  {
    s->gl_save();
    s->tl_save();

    // For processors that don't implement the full 64bits of address space we
    // don't expect to keep the full 64bit of tpc and tnpc: sign extend
    // the tpn and tnpc on read and write to hide the upper bits. We keep the
    // upper bits so that we can properly detect when we fall into a va-hole,
    // regardless of whether the pstate.am bit was on for a while or not.

    s->tpc  = pc  & (s->mask_pstate_am | (s->mask_pstate_am << s->va_bits()));
    s->tnpc = npc & (s->mask_pstate_am | (s->mask_pstate_am << s->va_bits()));

    s->tstate.pstate(s->pstate()).cwp(s->cwp()).asi(s->asi()).ccr(s->ccr()).gl(s->gl());
    s->htstate.hpstate(s->hpstate());

    switch (tt)
    {
      case SS_Trap::WATCHDOG_RESET:
        s->tt = tt;
        break;
      case SS_Trap::EXTERNALLY_INITIATED_RESET:
        s->tt = tt;
        // XIR on N2 does not go through error state
        // when tl maxes out. On those product the xir_error_state() flag
        // is false. Products like do circulate through the error state.
        // So for those the bit is set.
        if (s->sim_state.xir_error_state() && (s->tl() == s->max_tl()))
          tt = SS_Trap::WATCHDOG_RESET;
        break;
      case SS_Trap::SOFTWARE_INITIATED_RESET:
        s->tt = tt;
        if (s->tl() == s->max_tl())
          tt = SS_Trap::WATCHDOG_RESET;
        break;
      default:
        s->tt = tt;
        if ((tt == SS_Trap::HSTICK_MATCH) && (s->sim_state.hintp_hsp_clear()))
          s->hintp.hsp(0);
        if (s->tl() == s->max_tl())
          tt = SS_Trap::WATCHDOG_RESET;
        else if (s->hpstate.red() || (s->tl() == (s->max_tl() - 1)))
          tt = SS_Trap::RED_STATE_EXCEPTION;
        break;
    }

    s->tl = (s->tl() == s->max_tl()) ? s->max_tl() : (s->tl() + 1);
    s->gl = (s->gl() == s->max_gl()) ? s->max_gl() : (s->gl() + 1);

    if (tt <= SS_Trap::RED_STATE_EXCEPTION)
    {
      s->pstate.mm(0).pef(1).priv(1).am(0).ie(0).tct(0).cle(0).tle(0);  // priv(1).tle(0) comes from N2
      s->hpstate.red(1).hpriv(1).ibe(0).tlz(0);
      s->lsu_ctr = 0; // from N2 this probably has to move out ... does not have lsu_ctr.
      s->sim_state.inst_mmu(0);
      s->sim_state.data_mmu(0);
      pc = s->rstv_addr() + (SS_Vaddr(tt) << 5);
    }
    else if (!SS_Trap::is_trap_to_priv(tt) || s->hpstate.hpriv())
    {
      s->pstate.priv(0).cle(0).pef(1).am(0).ie(0).tct(0);
      s->hpstate.red(0).hpriv(1).ibe(0);
      pc = s->htba() + (SS_Vaddr(tt) << 5);
    }
    else if (s->tl() > s->max_ptl())
    {
      s->pstate.priv(0).cle(0).pef(1).am(0).ie(0).tct(0);
      s->hpstate.red(0).hpriv(1).ibe(0);
      pc = s->htba() + (SS_Vaddr(SS_Trap::WATCHDOG_RESET) << 5);
    }
    else
    {
      s->gl = (s->gl() > s->max_pgl()) ? s->max_pgl() : s->gl();
      s->pstate.priv(1).cle(s->pstate.tle()).pef(1).am(0).ie(0).tct(0);
      pc = s->tba() + ((s->tl() > 1) ? (SS_Vaddr(1) << 14) : 0) + (SS_Vaddr(tt) << 5);
    }

    s->gl_load();
  }

  (s->sim_update)(s);

  if (s->tt() == SS_Trap::CLEAN_WINDOW)
  {
    s->cwp_save();
    s->cwp = (s->cwp() == s->max_wp()) ? 0 : (s->cwp() + 1);
    s->cwp_load();
  }
  else if (SS_Trap::is_spill(SS_Trap::Type(s->tt())))
  {
    s->cwp_save();
    s->cwp = (s->cwp() + s->cansave() + 2) % (s->max_wp() + 1);
    s->cwp_load();
  }
  else if (SS_Trap::is_fill(SS_Trap::Type(s->tt())))
  {
    s->cwp_save();
    s->cwp = s->cwp() ? (s->cwp() - 1) : s->max_wp();
    s->cwp_load();
  }

  s->pc = pc;
  s->npc = pc + 4;

  // In trace mode we use the trap_taken flag and only in that
  // mode does it make sense as there is a corresponding trap_taken(0).

  s->sim_state.trap_taken(1);

  return pc;
}
/*}}}*/

bool SS_Strand::trap_launch_ok( SS_Trap::Type _tt )/*{{{*/
{
  // For the outside world (cosim) this function returns true
  // when a particular trap can be thrown through (s->trap)(...).

  switch (_tt)
  {
    case SS_Trap::POWER_ON_RESET:
    case SS_Trap::EXTERNALLY_INITIATED_RESET:
    // for cosim "INTP 00 00"
    case SS_Trap::RESET_GEN_WMR:
      return true;

    case SS_Trap::CTRL_WORD_QUEUE_INT:
    case SS_Trap::MODULAR_ARITH_INT:
    case SS_Trap::SW_RECOVERABLE_ERROR:
    case SS_Trap::HSTICK_MATCH:
    case SS_Trap::INTERRUPT_VECTOR:
    case SS_Trap::HW_CORRECTED_ERROR:
      return !hpstate.hpriv() || pstate.ie();

    case SS_Trap::CPU_MONDO_TRAP:
    case SS_Trap::DEV_MONDO_TRAP:
    case SS_Trap::RESUMABLE_ERROR:
      return pstate.ie() && !hpstate.hpriv();

    case SS_Trap::INTERRUPT_LEVEL_1:
    case SS_Trap::INTERRUPT_LEVEL_2:
    case SS_Trap::INTERRUPT_LEVEL_3:
    case SS_Trap::INTERRUPT_LEVEL_4:
    case SS_Trap::INTERRUPT_LEVEL_5:
    case SS_Trap::INTERRUPT_LEVEL_6:
    case SS_Trap::INTERRUPT_LEVEL_7:
    case SS_Trap::INTERRUPT_LEVEL_8:
    case SS_Trap::INTERRUPT_LEVEL_9:
    case SS_Trap::INTERRUPT_LEVEL_10:
    case SS_Trap::INTERRUPT_LEVEL_11:
    case SS_Trap::INTERRUPT_LEVEL_12:
    case SS_Trap::INTERRUPT_LEVEL_13:
    case SS_Trap::INTERRUPT_LEVEL_14:
    case SS_Trap::INTERRUPT_LEVEL_15:
      return !hpstate.hpriv() && (pil() <= (_tt - SS_Trap::INTERRUPT_LEVEL_1));

    // I don;t expect any other traps but lets assume we can take then and complain silently

    default:
      fprintf(stderr,"SS_Strand::trap_launch_ok called with tt=%x, update switch\n",_tt);
      return true;
  }
}
/*}}}*/

SS_AsiSpace::Error SS_Strand::scratchpad_ld64( SS_Node*, void* _reg, SS_Strand*, SS_Vaddr va, uint64_t* data )/*{{{*/
{
  assert(va < 64);
  uint64_t* reg = (uint64_t*)_reg;
  *data = reg[(va >> 3) & 7];
  return SS_AsiSpace::OK;
}
/*}}}*/
SS_AsiSpace::Error SS_Strand::scratchpad_st64( SS_Node*, void* _reg, SS_Strand*, SS_Vaddr va, uint64_t  data )/*{{{*/
{
  assert(va < 64);
  uint64_t* reg = (uint64_t*)_reg;
  reg[(va >> 3) & 7] = data;
  return SS_AsiSpace::OK;
}
/*}}}*/

SS_AsiSpace::Error SS_Strand::lsu_ctr_st64( SS_Node*, void* _reg, SS_Strand* s, SS_Vaddr, uint64_t data )/*{{{*/
{
  SS_LsuCtr* lc = (SS_LsuCtr*)_reg;
  (*lc) = data;
  (s->sim_update)(s);
  return SS_AsiSpace::OK;
}
/*}}}*/

inline uint64_t run_loop( SS_Strand* s, SS_Instr* opc, SS_InstrCache::Tag* tag, uint64_t n )/*{{{*/
{
  SS_InstrCache::Tag* info_ptr;

  // Preload s->inst_mmu so that compiler does not do load followed by call
  // Keep s->signal in a local var so that compiler loads well before use
  // Keep the instruction to execute in local variable to break load followed by call

  SS_InstMmu inst_mmu = s->inst_mmu;
  uint64_t sgn = s->msg.is_pending();
  SS_Execute inst_exe;

  SS_Vaddr  line_idx;
  SS_Instr* line_ptr;
  SS_Vaddr  inst_idx;
  SS_Instr* inst_ptr;
  SS_Vaddr  line_pc;
  SS_Vaddr  pc = s->pc();

  // Start the most critical loop of the simulator. Every line of code
  // here is overhead ... really. So don't add more code. Loop while we
  // still have n instructions to execute and we don;t have to deal with
  // an exceptional case (sgn == 0), e.g. have not received a signal.

  while (n && (sgn == 0))
  {
    assert(sizeof(SS_InstrCache::Tag) == 32); // line_idx << 4 ... below
    line_idx = ((pc >> (SS_InstrCache::LINE_BITS + 2)) & SS_InstrCache::MASK);
    line_ptr = (SS_Instr*)((char*)opc + (line_idx << (SS_Instr::BITS + SS_InstrCache::LINE_BITS)));
    info_ptr = (SS_InstrCache::Tag*)((char*)tag + (line_idx << 5)); // see assert above
    line_pc  = pc &~ (SS_InstrCache::LINE_MASK << 2);
    inst_idx = (pc & (SS_InstrCache::LINE_MASK << 2)) << (SS_Instr::BITS - 2 - SS_Instr::SKEW);
    inst_ptr = (SS_Instr*)((char*)line_ptr + inst_idx);
    inst_exe = inst_ptr->exe;

#if !defined(ARCH_X64)
    // Look ahead and prefetch the L2 ... this makes as perform better
    // on busy machines.

    ss_prefetch((char*)inst_ptr + 1024);
#endif

    // Check line tte and tag: the instruction cache is smaller then the
    // largest page size. If either of them mismatches then call the mmu
    // to do an tlb lookup. The current instuction tte is set to the fail_tte
    // in case a trap occured in the mmu; bail out as we most certainly
    // will have to handle a reenterloop signal due to trap handling.

    if ((info_ptr->tte != s->inst_tte) || (info_ptr->tag != line_pc))
    {
      pc = (inst_mmu)(pc,s->npc(),s,line_ptr,info_ptr);

      // In case we hit a breakpoint in inst_mmu then we will not execute the
      // while loop, but we will account n wrongly by 1 after that, fix it.
      // Note sgn != 0 when a breakpoint is hit. Also when inst_tte == fail_tte
      // then we will exit here below, so correct by 1 there too.

      if (s->inst_tte == s->fail_tte)
      {
        s->pc = pc;
        return s->break_hit ? n : (n - 1);
      }

      inst_exe = inst_ptr->exe;
      sgn = s->msg.is_pending();

      if (s->break_hit)
        n--;
    }

    // Now execute the instructions in the cache line, until the loop count
    // (n) becomes zero, or the pc falls of the line, or we have to
    // deal with a signal due to priviledge mode switches, breakpoints, etc.

    while (n && ((pc &~ (SS_InstrCache::LINE_MASK << 2)) == line_pc) && (sgn == 0))
    {
      pc = (inst_exe)(pc,s->npc(),s,inst_ptr);

      inst_idx = (pc & (SS_InstrCache::LINE_MASK << 2)) << (SS_Instr::BITS - 2 - SS_Instr::SKEW);
      inst_ptr = (SS_Instr*)((char*)line_ptr + inst_idx);
      inst_exe = inst_ptr->exe;
      sgn = s->msg.is_pending();

      n--;
    }

    // Breakpoints can be hit during inst_exe. This means that we
    // have to break out and not execute the instruction. The code
    // above does one n-- too many in case of a breakpoint hit, fix that.

    if (s->break_hit)
      n++;
  }
  s->pc = pc;
  return n;
}
/*}}}*/
inline uint64_t trc_loop( SS_Strand* s, SS_Instr* opc, SS_InstrCache::Tag* tag, uint64_t n )/*{{{*/
{
  assert(n == 1); // Assert the obvious, execute one instruction, output trace for it if we executed, etc.

  // @@ha144505, for now this code is a copy of the run_loop ... bad style
  // but I have to fix a tracing bug first. We'll clean this up later.

  SS_InstrCache::Tag* info_ptr;

  // Preload s->inst_mmu so that compiler does not do load followed by call
  // Keep s->signal in a local var so that compiler loads well before use
  // Keep the instruction to execute in local variable to break load followed by call

  SS_InstMmu inst_mmu = s->inst_mmu;
  bool  sgn = s->msg.is_pending();
  SS_Execute inst_exe;

  SS_Vaddr  line_idx;
  SS_Instr* line_ptr;
  SS_Vaddr  inst_idx;
  SS_Instr* inst_ptr;
  SS_Vaddr  line_pc;
  SS_Vaddr  pc = s->pc();

  // Start the most critical loop of the simulator. Every line of code
  // here is overhead ... really. So don't add more code. Loop while we
  // still have n instructions to execute and we don;t have to deal with
  // an exceptional case (sgn == 0), e.g. have not received a signal.

  if (sgn == 0)
  {
    assert(sizeof(SS_InstrCache::Tag) == 32); // line_idx << 4 ... below
    line_idx = ((pc >> (SS_InstrCache::LINE_BITS + 2)) & SS_InstrCache::MASK);
    line_ptr = (SS_Instr*)((char*)opc + (line_idx << (SS_Instr::BITS + SS_InstrCache::LINE_BITS)));
    info_ptr = (SS_InstrCache::Tag*)((char*)tag + (line_idx << 5)); // see assert above
    line_pc  = pc &~ (SS_InstrCache::LINE_MASK << 2);
    inst_idx = (pc & (SS_InstrCache::LINE_MASK << 2)) << (SS_Instr::BITS - 2 - SS_Instr::SKEW);
    inst_ptr = (SS_Instr*)((char*)line_ptr + inst_idx);
    inst_exe = inst_ptr->exe;

#if !defined(ARCH_X64)
    // Look ahead and prefetch the L2 ... this makes as perform better
    // on busy machines.

    ss_prefetch((char*)inst_ptr + 1024);
#endif

    // Check line tte and tag: the instruction cache is smaller then the
    // largest page size. If either of them mismatches then call the mmu
    // to do an tlb lookup. The current instuction tte is set to the fail_tte
    // in case a trap occured in the mmu; bail out as we most certainly
    // will have to handle a reenterloop signal due to trap handling.

    // Decode cache and RAS I$ cache flushing are now decoupled somewhat to
    // reduce the cost of instruction decoding.
    // The I$ is now a subset of the decode cache.  If a line is cast out of
    // the I$, the corresponding decode tte entry is marked RAS_TTE_POISON
    // (which just sets the least significant bit), while preserving the tte
    // and its decode cache entry.

    // The predicate is that:

    // 1) Every decode cache entry without RAS_TTE_POISON is in I$ cache.
    // 2) Every decode cache entry *with* RAS_TTE_POISON corresponds to an
    // I$ line that has a RAS error or has been flushed from the I$.

    if (info_ptr->tte_bits & SS_Strand::RAS_TTE_POISON)
    {
      assert(s->sim_state.ras_enabled());
      info_ptr->tte_bits &= ~SS_Strand::RAS_TTE_POISON;

      if ((info_ptr->tte == s->inst_tte) && (info_ptr->tag == line_pc))
      {
        SS_Paddr pa = s->inst_tte->trans(line_pc);
        SS_Trap::Type trap_type =
          s->mem_err_detector.detect_fetch_err(SS_MemErrDetector::L1_CACHE_AND_STB,
            pc, s->npc(), s, pa);

        if (trap_type != SS_Trap::NO_TRAP)
        {
          pc = (s->trap)(pc,s->npc(),s,line_ptr,trap_type);
          s->inst_tte = s->fail_tte;
          info_ptr->tte_bits |= SS_Strand::RAS_TTE_POISON;
          goto mmu_ras_trap;
        }
      }
    }

    if ((info_ptr->tte != s->inst_tte) || (info_ptr->tag != line_pc))
    {
      pc = (inst_mmu)(pc,s->npc(),s,line_ptr,info_ptr);

      if (s->inst_tte == s->fail_tte)
      {
      mmu_ras_trap:

        s->pc = pc;

        if (s->break_hit)
          return n;

        // ToDo: The if cosim below is to make regression pass. The tte used
        // for tracing is whatever was used last for tracing. This is not correct
        // but it makes the regession pas to get this bug fix out.
        // We should rerun all the cosims with this if taken out and the
        // tte set to zero. The trace should report a non number for the pa and
        // opcode field, say dashes.

        if (!s->sim_state.cosim())
          s->trc_inst_tte = s->inst_tte;

        // return 0 means trace instruction, hence lock the tte for reuse
        // as we need it. After outputting the trace the tte is given up.

        if (s->trc_hook)
          s->inst_tlb->lock_reuse_tte(s->trc_inst_tte);

        return 0;
      }

      inst_exe = inst_ptr->exe;
      sgn = s->msg.is_pending();
    }

    if (sgn == 0)
    {
      // In tracing mode we need to hold on to the TTE until we have traced
      // the instruction ... we do this because demap could invalidate the TTE
      // in the decode cache of the very instruction we are tracing.

      s->trc_inst_tte = s->inst_tte;
      if (s->trc_hook)
        s->inst_tlb->lock_reuse_tte(s->trc_inst_tte);

      // Now execute the instructions in the cache line, until the loop count
      // (n) becomes zero, or the pc falls of the line, or we have to
      // deal with a signal due to priviledge mode switches, breakpoints, etc.

      pc = (inst_exe)(pc,s->npc(),s,inst_ptr);

      inst_idx = (pc & (SS_InstrCache::LINE_MASK << 2)) << (SS_Instr::BITS - 2 - SS_Instr::SKEW);
      inst_ptr = (SS_Instr*)((char*)line_ptr + inst_idx);
      inst_exe = inst_ptr->exe;
      sgn = s->msg.is_pending();

      // Breakpoints can be hit during inst_exe. This means that we
      // have to break out and not execute the instruction. The code
      // above does one n-- too many in case of a breakpoint hit, fix that.

      if (!s->break_hit)
        n = 0;
    }
  }

  s->pc = pc;
  return n;
}
/*}}}*/

void SS_Strand::run_tick( uint64_t incr )/*{{{*/
//
// run_tick() updates the (s)tick value and compares it
// against the tick_cmpr, stick_cmp and hstick_cmpr
// and raises traps if tick matches occur.
//
// In cosim mode we don't change the softint stick and tick
// match bits (sm and tm) ourselves: rtl doesn't trust our
// our administration of time ... and rightfully so.
//
// In other simulation modes the tick and stick value can
// increase by more then 1 between steps, so compare for >=
// i.s.o. the exact match. Additionally to prevent multiple
// interrupts we only check the interrupt when the sm or
// tm bits are getting set (rising edge).
{
  uint64_t t = stick.counter() + incr;

  tick.counter(t);
  stick.counter(t);

  bool err_found = false;
  if (sim_state.ras_enabled())
  {
    if(mem_err_detector.tick_err_detector)
    {
      err_found = mem_err_detector.tick_err_detector(this);
    }
  }

  // Check tick against hstick_cmpr, stick_cmpr and
  // tck_cmpr in that order. The order is important for
  // trap prioprity reasons.

  // The comparison operation is suppressed, if an error is detected in any of
  // tick compare registers

  if (!err_found)
  {
    if (t >= hstick_cmpr())
    {
      if (sim_state.cosim())
      {
        // just drop them, follow me ??
      }
      else if (hintp.hsp() == 0)
      {
        if (halted)
          unhalt(); // Force wakeup in case the irq is blocked
        hintp.hsp(1);
        irq.raise(this,SS_Interrupt::BIT_HSTICK_MATCH);
      }
    }

    if (t >= stick_cmpr())
    {
      if (sim_state.cosim())
      {
        irq.check(this);
      }
      else if (softint.sm() == 0)
      {
        if (halted)
          unhalt(); // Force wakeup in case the irq is blocked
        softint.sm(1);
        irq.update_softint(this);
      }
    }

    if (t >= tick_cmpr())
    {
      if (sim_state.cosim())
      {
        irq.check(this);
      }
      else if (softint.tm() == 0)
      {
        if (halted)
          unhalt(); // Forece wakeup in case the irq is blocked
        softint.tm(1);
        irq.update_softint(this);
      }
    }
  }

}
/*}}}*/

uint64_t SS_Strand::run_step( uint64_t count )/*{{{*/
{
  uint64_t save_count = count;
  running = sim_state.running();

  break_hit  = 0;

  // If step is called and we're not in running mode, and we
  // have some signal in the queue that requires us to leave
  // step (breakpoint) then we exit .. and return the number of
  // instructions executed .... which in case we got out of
  // running mode is 0 and in case of a breakpoint is count.

  if ((running == 0) && peek_signal())
    return running ? count : 0;

  while (running && count)
  {
    if (peek_signal())
    {
      inst_count = inst_count() + save_count - count;
      return running ? count : 0;
    }

    count = run_loop(this,inst_cache->opc,inst_cache->tag,count);
  }

  inst_count = inst_count() + save_count - count;

  sim_state.running(running);
  return 0;
}
/*}}}*/
uint64_t SS_Strand::trc_step( uint64_t count )/*{{{*/
{
  uint64_t save_count = count;
  running = sim_state.running();

  break_hit = 0;

  if (running == 0 && peek_signal())
    return running ? count : 0;

  while (running && count)
  {
    if (peek_signal())
    {
      inst_count = inst_count() + save_count - count;
      return running ? count : 0;
    }

    SS_InstrCache* trc_cache = inst_cache;
    SS_Vaddr       trc_pc    = pc();

    // Now step one instruction in trace mode ...

    if (!trc_loop(this,inst_cache->opc,inst_cache->tag,1))
    {
      count -= 1;

      if (trc_hook)
      {
        SS_Registers::Index i;

        uint_t sft = 64 - va_bits();

        trc_hook->exe_instr((trc_pc << sft) >> sft,trc_inst_tte,trc_cache->pc_inst(trc_pc));

        trc_hook->cmp_state(SS_Registers::ASR_PC,pc());
        trc_hook->cmp_state(SS_Registers::SIM_NPC,npc());

        for (i=SS_Registers::IRF_OFS; i < SS_Registers::IRF_END; i = i+1)
          trc_hook->cmp_state(i,irf[i - SS_Registers::IRF_OFS]);
        for (i=SS_Registers::DRF_OFS; i < SS_Registers::DRF_END; i = i+1)
          trc_hook->cmp_state(i,drf[i - SS_Registers::DRF_OFS]);

        trc_hook->cmp_state(SS_Registers::ASR_Y,y());
        trc_hook->cmp_state(SS_Registers::ASR_CCR,ccr());
        trc_hook->cmp_state(SS_Registers::ASR_ASI,asi());
        trc_hook->cmp_state(SS_Registers::ASR_FPRS,fprs());
        trc_hook->cmp_state(SS_Registers::ASR_GSR,gsr());
        trc_hook->cmp_state(SS_Registers::SIM_FSR,fsr());
        trc_hook->cmp_state(SS_Registers::PR_PSTATE,pstate());
        trc_hook->cmp_state(SS_Registers::HPR_HPSTATE,hpstate());
        trc_hook->cmp_state(SS_Registers::PR_GL,gl());
        trc_hook->cmp_state(SS_Registers::PR_TBA,tba());
        trc_hook->cmp_state(SS_Registers::HPR_HTBA,htba());
        trc_hook->cmp_state(SS_Registers::PR_TL,tl());
        trc_hook->cmp_state(SS_Registers::PR_TT,tt());
        trc_hook->cmp_state(SS_Registers::PR_TPC,SS_Vaddr(tpc() << (64 - va_bits())) >> (64 - va_bits()));
        trc_hook->cmp_state(SS_Registers::PR_TNPC,SS_Vaddr(tnpc() << (64 - va_bits())) >> (64 - va_bits()));
        trc_hook->cmp_state(SS_Registers::PR_TSTATE,tstate());
        trc_hook->cmp_state(SS_Registers::HPR_HTSTATE,htstate());
        trc_hook->cmp_state(SS_Registers::PR_CWP,cwp());
        trc_hook->cmp_state(SS_Registers::PR_CANSAVE,cansave());
        trc_hook->cmp_state(SS_Registers::PR_CANRESTORE,canrestore());
        trc_hook->cmp_state(SS_Registers::PR_CLEANWIN,cleanwin());
        trc_hook->cmp_state(SS_Registers::PR_OTHERWIN,otherwin());
        trc_hook->cmp_state(SS_Registers::ASR_TICK_CMPR,tick_cmpr());
        trc_hook->cmp_state(SS_Registers::ASR_STICK_CMPR,stick_cmpr());
        trc_hook->cmp_state(SS_Registers::HPR_HSTICK_CMPR,hstick_cmpr());
        trc_hook->cmp_state(SS_Registers::HPR_HINTP,hintp());
        trc_hook->cmp_state(SS_Registers::PR_PIL,pil());
        trc_hook->cmp_state(SS_Registers::ASR_SOFTINT,softint());

        trc_hook->end_instr();

        // Now release the trc_tte as we are not using it any longer.

        inst_tlb->reuse_tte(trc_inst_tte);
      }

      if(mem_err_detector.step_hook)
      {
          //traps taken here have priorities that don't follow the trap priority hierarchy
          SS_Trap::Type trap_type = mem_err_detector.step_hook(this);
          if(trap_type != SS_Trap::NO_TRAP)
          {
            (trap)(pc(),npc(),this,0,trap_type);
          }
      }
    }
  }

  inst_count = inst_count() + save_count - count;

  sim_state.running(running);
  return 0;
}
/*}}}*/

void SS_Strand::add_tracer( SS_Tracer* trc )/*{{{*/
{
  assert(trc);

  trc->next = trc_hook;
  trc_hook = trc;
  if (trc_hook->need_mem_trc() && !sim_state.ras_enabled())
    mem_table = mem_trc_table_ref;
}
/*}}}*/
void SS_Strand::del_tracer( SS_Tracer* trc )/*{{{*/
{
  assert(trc);

  if (trc_hook)
  {
    if (trc_hook == trc)
    {
      trc_hook = trc_hook->next;
      trc->next = 0;
      trc = 0;
    }
    else
    {
      SS_Tracer* head = trc_hook;
      for (SS_Tracer* next = trc_hook->next; trc && next; head = next, next = next->next)
      {
        if (next == trc)
        {
          head->next = next->next;
          trc = 0;
        }
      }
    }
  }

  if (((trc_hook == 0) || !trc_hook->need_mem_trc()) && !sim_state.ras_enabled())
    mem_table = mem_run_table_ref;

  if (trc)
    fprintf(stderr,"ERROR: SS_Tracer: There is no tracer connected\n");
}
/*}}}*/

static inline SS_Instr* line_index( SS_Instr* line, uint_t n )/*{{{*/
{
  return (SS_Instr*)((char*)line + (n << (SS_Instr::BITS - SS_Instr::SKEW)));
}
/*}}}*/

SS_Vaddr mem_run_fetch512( SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Instr* line, SS_Vaddr va, SS_Tte* tte )/*{{{*/
{
  SS_Paddr pa = tte->trans(va);
#if defined(MEMORY_MSYNC)
  ((SS_MsyncMemory*)(s->memory))->msync_info(s->strand_id(),va);
#elif defined(MEMORY_EXTERNAL)
  ((SS_ExternalMemory*)(s->memory))->set_strand_id(s->strand_id());
#endif
  ((SS_Memory*)(s->memory))->SS_Memory::fetch512(pa,s->mem_data);

  uint64_t d0 = s->mem_data[0];
  uint64_t d1 = s->mem_data[1];
  uint64_t d2 = s->mem_data[2];
  uint64_t d3 = s->mem_data[3];
  uint64_t d4 = s->mem_data[4];
  uint64_t d5 = s->mem_data[5];
  uint64_t d6 = s->mem_data[6];
  uint64_t d7 = s->mem_data[7];

  line->line_index(0)->opc = d0 >> 32;
  line->line_index(1)->opc = d0;
  line->line_index(2)->opc = d1 >> 32;
  line->line_index(3)->opc = d1;
  line->line_index(4)->opc = d2 >> 32;
  line->line_index(5)->opc = d2;
  line->line_index(6)->opc = d3 >> 32;
  line->line_index(7)->opc = d3;
  line->line_index(8)->opc = d4 >> 32;
  line->line_index(9)->opc = d4;
  line->line_index(10)->opc = d5 >> 32;
  line->line_index(11)->opc = d5;
  line->line_index(12)->opc = d6 >> 32;
  line->line_index(13)->opc = d6;
  line->line_index(14)->opc = d7 >> 32;
  line->line_index(15)->opc = d7;

  SS_Execute dec = s->inst_dec;

  line->line_index(0)->exe = dec;
  line->line_index(1)->exe = dec;
  line->line_index(2)->exe = dec;
  line->line_index(3)->exe = dec;
  line->line_index(4)->exe = dec;
  line->line_index(5)->exe = dec;
  line->line_index(6)->exe = dec;
  line->line_index(7)->exe = dec;
  line->line_index(8)->exe = dec;
  line->line_index(9)->exe = dec;
  line->line_index(10)->exe = dec;
  line->line_index(11)->exe = dec;
  line->line_index(12)->exe = dec;
  line->line_index(13)->exe = dec;
  line->line_index(14)->exe = dec;
  line->line_index(15)->exe = dec;

  // Reset the decoder back to the normal decoder. In some cases
  // (inst va breakpoint) the inst_mmu can set the decoder to a
  // special decoder that is for just one cache line

  s->inst_dec = s->save_dec;

  return pc;
}
/*}}}*/
SS_Vaddr io_run_fetch512( SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Instr* line, SS_Vaddr va, SS_Tte* tte )/*{{{*/
{
  SS_Paddr pa = tte->trans(va);
  s->io->fetch512(s->strand_id(),pa,s->mem_data);

  uint64_t d0 = s->mem_data[0];
  uint64_t d1 = s->mem_data[1];
  uint64_t d2 = s->mem_data[2];
  uint64_t d3 = s->mem_data[3];
  uint64_t d4 = s->mem_data[4];
  uint64_t d5 = s->mem_data[5];
  uint64_t d6 = s->mem_data[6];
  uint64_t d7 = s->mem_data[7];

  line_index(line,0)->opc = d0 >> 32;
  line_index(line,1)->opc = d0;
  line_index(line,2)->opc = d1 >> 32;
  line_index(line,3)->opc = d1;
  line_index(line,4)->opc = d2 >> 32;
  line_index(line,5)->opc = d2;
  line_index(line,6)->opc = d3 >> 32;
  line_index(line,7)->opc = d3;
  line_index(line,8)->opc = d4 >> 32;
  line_index(line,9)->opc = d4;
  line_index(line,10)->opc = d5 >> 32;
  line_index(line,11)->opc = d5;
  line_index(line,12)->opc = d6 >> 32;
  line_index(line,13)->opc = d6;
  line_index(line,14)->opc = d7 >> 32;
  line_index(line,15)->opc = d7;

  SS_Execute dec = s->inst_dec;

  line_index(line,0)->exe = dec;
  line_index(line,1)->exe = dec;
  line_index(line,2)->exe = dec;
  line_index(line,3)->exe = dec;
  line_index(line,4)->exe = dec;
  line_index(line,5)->exe = dec;
  line_index(line,6)->exe = dec;
  line_index(line,7)->exe = dec;
  line_index(line,8)->exe = dec;
  line_index(line,9)->exe = dec;
  line_index(line,10)->exe = dec;
  line_index(line,11)->exe = dec;
  line_index(line,12)->exe = dec;
  line_index(line,13)->exe = dec;
  line_index(line,14)->exe = dec;
  line_index(line,15)->exe = dec;

  // Reset the decoder back to the normal decoder. In some cases
  // (inst va breakpoint) the inst_mmu can set the decoder to a
  // special decoder that is for just one cache line

  s->inst_dec = s->save_dec;

  return pc;
}
/*}}}*/
SS_Vaddr mem_trc_fetch512( SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Instr* line, SS_Vaddr va, SS_Tte* tte )/*{{{*/
{
  SS_Paddr pa = tte->trans(va);

  if (s->sim_state.ras_enabled())
  {
    SS_Trap::Type trap_type =
      s->mem_err_detector.detect_fetch_err(SS_MemErrDetector::L1_CACHE_AND_STB,
                                           pc, npc, s, pa);
    if (trap_type != SS_Trap::NO_TRAP)
      return (s->trap)(pc,npc,s,line,trap_type);
  }

#if defined(MEMORY_MSYNC)
  ((SS_MsyncMemory*)(s->memory))->msync_info(s->strand_id(),va);
#elif defined(MEMORY_EXTERNAL)
  ((SS_ExternalMemory*)(s->memory))->set_strand_id(s->strand_id());
#endif
  s->memory->fetch512(pa,s->mem_data);

  uint64_t d0 = s->mem_data[0];
  uint64_t d1 = s->mem_data[1];
  uint64_t d2 = s->mem_data[2];
  uint64_t d3 = s->mem_data[3];
  uint64_t d4 = s->mem_data[4];
  uint64_t d5 = s->mem_data[5];
  uint64_t d6 = s->mem_data[6];
  uint64_t d7 = s->mem_data[7];

  if(s->trc_hook)
    s->trc_hook->mem_access(SS_Tracer::LD_CODE,va,tte,64,s->mem_data);

  line_index(line,0)->opc = d0 >> 32;
  line_index(line,1)->opc = d0;
  line_index(line,2)->opc = d1 >> 32;
  line_index(line,3)->opc = d1;
  line_index(line,4)->opc = d2 >> 32;
  line_index(line,5)->opc = d2;
  line_index(line,6)->opc = d3 >> 32;
  line_index(line,7)->opc = d3;
  line_index(line,8)->opc = d4 >> 32;
  line_index(line,9)->opc = d4;
  line_index(line,10)->opc = d5 >> 32;
  line_index(line,11)->opc = d5;
  line_index(line,12)->opc = d6 >> 32;
  line_index(line,13)->opc = d6;
  line_index(line,14)->opc = d7 >> 32;
  line_index(line,15)->opc = d7;

  SS_Execute dec = s->inst_dec;

  line_index(line,0)->exe = dec;
  line_index(line,1)->exe = dec;
  line_index(line,2)->exe = dec;
  line_index(line,3)->exe = dec;
  line_index(line,4)->exe = dec;
  line_index(line,5)->exe = dec;
  line_index(line,6)->exe = dec;
  line_index(line,7)->exe = dec;
  line_index(line,8)->exe = dec;
  line_index(line,9)->exe = dec;
  line_index(line,10)->exe = dec;
  line_index(line,11)->exe = dec;
  line_index(line,12)->exe = dec;
  line_index(line,13)->exe = dec;
  line_index(line,14)->exe = dec;
  line_index(line,15)->exe = dec;

  // Reset the decoder back to the normal decoder. In some cases
  // (inst va breakpoint) the inst_mmu can set the decoder to a
  // special decoder that is for just one cache line

  s->inst_dec = s->save_dec;

  return pc;
}
/*}}}*/
SS_Vaddr io_trc_fetch512( SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Instr* line, SS_Vaddr va, SS_Tte* tte )/*{{{*/
{
  SS_Paddr pa = tte->trans(va);

  s->io->fetch512(s->strand_id(),pa,s->mem_data);

  if(s->trc_hook)
    s->trc_hook->mem_access(SS_Tracer::LD_CODE,va,tte,64,s->mem_data);

  uint64_t d0 = s->mem_data[0];
  uint64_t d1 = s->mem_data[1];
  uint64_t d2 = s->mem_data[2];
  uint64_t d3 = s->mem_data[3];
  uint64_t d4 = s->mem_data[4];
  uint64_t d5 = s->mem_data[5];
  uint64_t d6 = s->mem_data[6];
  uint64_t d7 = s->mem_data[7];

  line_index(line,0)->opc = d0 >> 32;
  line_index(line,1)->opc = d0;
  line_index(line,2)->opc = d1 >> 32;
  line_index(line,3)->opc = d1;
  line_index(line,4)->opc = d2 >> 32;
  line_index(line,5)->opc = d2;
  line_index(line,6)->opc = d3 >> 32;
  line_index(line,7)->opc = d3;
  line_index(line,8)->opc = d4 >> 32;
  line_index(line,9)->opc = d4;
  line_index(line,10)->opc = d5 >> 32;
  line_index(line,11)->opc = d5;
  line_index(line,12)->opc = d6 >> 32;
  line_index(line,13)->opc = d6;
  line_index(line,14)->opc = d7 >> 32;
  line_index(line,15)->opc = d7;

  SS_Execute dec = s->inst_dec;

  line_index(line,0)->exe = dec;
  line_index(line,1)->exe = dec;
  line_index(line,2)->exe = dec;
  line_index(line,3)->exe = dec;
  line_index(line,4)->exe = dec;
  line_index(line,5)->exe = dec;
  line_index(line,6)->exe = dec;
  line_index(line,7)->exe = dec;
  line_index(line,8)->exe = dec;
  line_index(line,9)->exe = dec;
  line_index(line,10)->exe = dec;
  line_index(line,11)->exe = dec;
  line_index(line,12)->exe = dec;
  line_index(line,13)->exe = dec;
  line_index(line,14)->exe = dec;
  line_index(line,15)->exe = dec;

  // Reset the decoder back to the normal decoder. In some cases
  // (inst va breakpoint) the inst_mmu can set the decoder to a
  // special decoder that is for just one cache line

  s->inst_dec = s->save_dec;

  return pc;
}
/*}}}*/

SS_AsiSpace::Error SS_Strand::asi_ext_ld64( SS_Node* a, void* b, SS_Strand* s, SS_Vaddr va, uint64_t* data )/*{{{*/
{
#ifdef COMPILE_FOR_SAM
  return (*s->asi_ext_ld64_fp)(a,b,s,va,data);
#else
  return SS_AsiSpace::OK;
#endif
}
/*}}}*/
SS_AsiSpace::Error SS_Strand::asi_ext_st64( SS_Node* a, void* b, SS_Strand* s, SS_Vaddr va, uint64_t data )/*{{{*/
{
#ifdef COMPILE_FOR_SAM
  return (*s->asi_ext_st64_fp)(a,b,s,va,data);
#else
  return SS_AsiSpace::OK;
#endif
}
/*}}}*/

void SS_Strand::default_ras_enable(SS_Strand* s, char*)/*{{{*/
{
  fprintf(stderr,"RAS Un-implemented\n");
}
/*}}}*/

/*static*/ void SS_Strand::ss_run_perf( SS_Strand* s, Sam::Vcpu::perfcntr which,
 int64_t incr )/*{{{*/
{
        static int warned = 0;
        if (!warned) {
                fprintf (stderr, "Performance Instrumentation Counters "
                                 "not yet implemented for this CPU\n");
                warned = 1;
        }
}
/*}}}*/