Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / legion / src / simcore / execkern.c
/*
* ========== Copyright Header Begin ==========================================
*
* OpenSPARC T2 Processor File: execkern.c
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
*
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*
* ========== Copyright Header End ============================================
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "@(#)execkern.c 1.34 07/02/08 SMI"
/*
* Execution kernel for the simualtor.
*
* Basic management of simcpu_t's and core simualtor
* instruction set.
*/
/*
* # #
* ## # #### ##### ######
* # # # # # # #
* # # # # # # #####
* # # # # # # #
* # ## # # # #
* # # #### # ######
*
* If you think you need to edit this file - in particular
* the exec_loop function - you are probably wrong. Please
* talk with Ash to discuss what you want to do before trying
* to hack in here. exec_loop is the most performance critical
* function in the simulator, and is fully processor
* independent. So hands off !
*/
#include <assert.h>
#include "basics.h"
#include "simcore.h"
#include "config.h"
#include "xicache.h"
#include "barrier.h"
#include "fatal.h"
#if EXEC_TIMING /* { */
#include "tsparcv9.h"
#endif /* } */
extern barrier_t stop_barrier;
extern barrier_t run_barrier;
extern barrier_busy_t sync_busy_barrier;
#ifdef DEBUG_HOOK_LOOP /* { */
#define exec_loop exec_loop_dh
#endif /* } DEBUG_HOOK_LOOP */
/*
* Some pre-amble has to set up the xicache pointer, but
* just execute from it ...
* ... assume the next function to execute has already been
* prefetched ... which in the case of a trap/alarm etc.
* is not the actual instruction ...
*
* Probably should replace this code with assembler for
* efficiency !
*
* This loop is called by each exec_thread and is responsible
* for scheduling all the simcpus for that exec_thread to
* ensure they make equal progress. We allow the first simcpu
* to run for an EXEC_QUANTUM of instructions. We use the
* target_match functionality to detect when the EXEC_QUANTUM
* is complete and then we move to the next simcpu in the list
* until we reach the end of the list.
*
* Once an exec_thread has executed each of it's simcpus for
* an EXEC_QUANTUM, it will wait for all other exec_threads
* to reach that same point. Once all exec_threads have reached
* this point, they all continue scheduling their simcpus
* starting with the head of the list again.
*/
void
exec_loop(exec_thread_t *etp)
{
simcycle_t local_cycle;
simcycle_t prev_cycle;
simcycle_t cycles_quant;
simcycle_t local_cycle_target;
simcycle_t quantum_target;
xicache_t *xicp;
uint64_t xic_line_tag;
simcpu_t *sp;
xicache_line_t *xic_linep;
uint64_t memoryoffset;
simcpu_t *headsp;
void (*decodemep)(simcpu_t *, xicache_instn_t *);
#if EXEC_TIMING /* { */
hrtime_t hr_start;
hrtime_t hr_exec;
uint64_t natt;
/* [0] if decoding, [1] already decoded */
hrtime_t ihrt_max[2];
uint32_t ihrt_rawi[2];
uint64_t ihrt_pc[2];
char ibuf[64];
#endif /* } */
#ifdef DEBUG_HOOK_LOOP /* { */
void (*debug_hookp)(simcpu_t *sp, uint32_t rawi);
#endif /* } DEBUG_HOOK_LOOP */
ATTENTION_SANITY_CHECK;
/*
* Start all exec threads at the same time.
*/
barrier_busy_wait(&sync_busy_barrier);
cycles_quant = EXEC_QUANTUM;
sp = headsp = etp->allp;
top:
if (!RUNNABLE(sp)) {
sp->cycle += cycles_quant;
sp->cycle_quantum_start = sp->cycle;
goto next_simcpu;
}
#if EXEC_TIMING /* { */
DBGEXECLOOP( lprintf(sp->gid, "start quantum attention=0x%llx "
"sync_pending=%d\n", sp->attention, sp->sync_pending); );
#endif /* } */
prev_cycle = sp->cycle;
quantum_target = sp->cycle + cycles_quant;
xicp = sp->xicachep;
#ifdef DEBUG_HOOK_LOOP /* { */
debug_hookp = (void(*)(simcpu_t *, uint32_t))
sp->config_procp->proc_typep->debug_hookp;
#endif /* } DEBUG_HOOK_LOOP */
#if EXEC_TIMING /* { */
hr_start = gethrtime();
hr_exec = 0;
ihrt_max[0] = 0;
ihrt_max[1] = 0;
natt = 0;
#endif /* } */
do { /* outer loop */
/* cache some loads */
decodemep = sp->decodemep;
local_cycle = sp->cycle;
/* Note cycle_target could be < cycle. */
local_cycle_target = sp->cycle_target;
if (quantum_target < local_cycle_target)
local_cycle_target = quantum_target;
if (sp->attention)
goto pay_attention;
/* force a miss of the current xic block */
xic_line_tag = ~sp->pc;
do {
uint64_t xidx;
uint32_t rawi;
tvaddr_t xpc;
xicache_instn_t *xip;
#if EXEC_TIMING /* { */
hrtime_t ihrt;
uint_t ihrt_which;
#endif /* } */
/*
* This is the dumb way to do this, but we
* mirror the hardware, and check that
* we're still on the xic block we thought
* we were .. if not re-validate and try
* again.
*/
xpc = sp->pc;
/* tag and align ok ? */
if (((xpc ^ xic_line_tag) & XICACHE_TAG_MASK) != 0) {
xic_block_load:;
/*
* Fell off XC block - maybe:
* a) moved to another line ...
* b) problem with pc alignment !
* c) page-fault (/cache miss)
* d) xic_cache_miss
*
* First check the xic cache as this is
* the fast case.
*/
/*
* assume we just moved to another line ...
* if not it's a miss of some kind
*/
xic_line_tag = xpc & XICACHE_TAG_PURE_MASK;
xic_line_tag |= sp->tagstate;
/*
* Need to stop using hard coded
* constants here ... and use
* variable values - but how to do
* without performance hit ...
*/
xic_linep = &(xicp->line[(xic_line_tag >>
XICACHE_LINE_SIZE_BITS)&XICACHE_LINE_MASK]);
if (xic_linep->tag != xic_line_tag) {
sp->xic_miss(sp, xic_linep, xpc);
/*
* Commented out for speed:
* ASSERT(sp->pc == xpc);
*/
if (sp->attention) goto pay_attention;
/*
* Go because XC may have changed.
*/
goto xic_block_load;
}
memoryoffset = xic_linep->memoryoffset;
}
/* Not using '==' as sync_pending can skip cycles. */
if (local_cycle >= local_cycle_target)
break;
xidx = (xpc>>2) & XICACHE_NUM_INSTR_MASK;
xip = &xicp->instn[xidx];
rawi = *(uint32_t *)(void*)(xpc + memoryoffset);
if (xip->rawi != rawi) {
xip->rawi = rawi;
xip->exec_funcp = decodemep;
}
#ifdef DEBUG_HOOK_LOOP /* { */
debug_hookp(sp, xip->rawi);
#endif /* } DEBUG_HOOK_LOOP */
#if EXEC_TIMING /* { */
ihrt_which = (xip->exec_funcp != decodemep);
ihrt = gethrtime();
#endif /* } */
xip->exec_funcp(sp, xip);
#if EXEC_TIMING /* { */
ihrt = gethrtime() - ihrt;
hr_exec += ihrt;
if (ihrt > ihrt_max[ihrt_which]) {
ihrt_max[ihrt_which] = ihrt;
ihrt_rawi[ihrt_which] = rawi;
/* sp->pc has changed - use xpc */
ihrt_pc[ihrt_which] = xpc;
}
#endif /* } */
/*
* Only get here after successful execution of an
* instn.
*/
sp->cycle = ++local_cycle;
} while (!sp->attention);
pay_attention:;
#if EXEC_TIMING /* { */
natt++;
#endif /* } */
/*
* The attention flag is a composite and is cleared
* by clearing the individual flags.
*/
sp->exec_loop_reset = false;
if (sp->xicache_trans_flush_pending) {
sp->xicache_trans_flush_pending = false;
xicache_trans_flush(sp);
}
if (sp->xdcache_trans_flush_pending) {
sp->xdcache_trans_flush_pending = false;
xdcache_flush(sp);
}
if (local_cycle >= quantum_target)
sp->sync_pending = true;
if (local_cycle >= sp->cycle_target)
sp->cycle_target_match(sp);
if (sp->async_event) {
sp->config_procp->proc_typep->check_async_event(sp);
}
if (sp->exception_pending) {
sp->config_procp->proc_typep->take_exception(sp);
}
} while (!sp->sync_pending);
/*
* We fall out of the loop above when sp->sync_pending is set.
* This happens when the current simcpu has executed it's
* EXEC_QUANTUM of instns and it's time for the exec_thread
* to schedule the next simcpu in the list -or- if we are
* at the end of the list, wait for all the other
* exec_threads to reach the same point.
*/
sp->sync_pending = false;
if (sp->xicache_instn_flush_pending) {
sp->xicache_instn_flush_pending = false;
xicache_instn_flush(sp);
}
/*
* increment the total instruction count executed during this
* EXEC_QUANTUM cycle
*/
sp->total_instr += sp->cycle - prev_cycle;
#if EXEC_TIMING /* { */
hr_start = gethrtime() - hr_start;
#define INSTS (sp->cycle - prev_cycle)
DBGEXECLOOP( lprintf(sp->gid, "end quantum executed %llu "
"out of %llu, mips=%.2lf, attentions=%llu\n", INSTS, cycles_quant,
((1.0e3*(double)INSTS)/(double)hr_start), natt); );
DBGEXECLOOP(
lprintf(sp->gid, "end quantum exec+overhead=total nS: "
"%llu + %llu = %llu\n", hr_exec, hr_start - hr_exec, hr_start);
if (ihrt_max[0] != 0) {
sparcv9_idis(ibuf, sizeof (ibuf), FE_INSTN(ihrt_rawi[0]),
ihrt_pc[0]);
lprintf(sp->gid, "end quantum longest undecoded "
"instruction: pc=0x%llx nS=%llu: [0x%08x] %s\n", ihrt_pc[0],
ihrt_max[0], FE_INSTN(ihrt_rawi[0]), ibuf);
}
if (ihrt_max[1] != 0) {
sparcv9_idis(ibuf, sizeof (ibuf), FE_INSTN(ihrt_rawi[1]),
ihrt_pc[1]);
lprintf(sp->gid, "end quantum longest decoded "
"instruction: pc=0x%llx nS=%llu: [0x%08x] %s\n", ihrt_pc[1],
ihrt_max[1], FE_INSTN(ihrt_rawi[1]), ibuf);
}
);
#endif /* } */
/*
* If sync_pending was set before the end of the quantum,
* cycle must be advanced - just set to the quantum target.
*/
sp->cycle = quantum_target;
sp->cycle_quantum_start = sp->cycle;
next_simcpu:;
/* Switch to next sp in scheduler list if we are not at the end */
sp = sp->nextp;
if (sp != NULL) {
goto top;
}
/*
* We have reached the end of the scheduler list. All simcpus on
* this exec_thread have have a chance to execute an EXEC_QUANTUM
* of instructions so we wait here until all exec_threads reach
* this point before continuing.
*/
barrier_busy_wait(&sync_busy_barrier);
/*
* We need to check if the debugger wants us to stop running.
* If so, we put hold all threads here. The ctrl_thread
* will handle the "stop" state housekeeping.
*/
if (simstatus.running == false) {
/* indicate exec_thread in stop state */
barrier_wait(&stop_barrier);
/* wait to enter run state (via ctrl thread) */
barrier_wait(&run_barrier);
/*
* Give everyone a chance to check the value of "running"
* before we make another round. Otherwise can split up --
* some will go to "stop" state while others will run
* another EXEC_QUANTUM.
*/
barrier_busy_wait(&sync_busy_barrier);
}
/*
* All exec_threads have reached the same point so we start
* at the begining of the list of simcpus and continue
* executing.
*/
sp = headsp;
goto top;
}