* ========== Copyright Header Begin ==========================================
* OpenSPARC T2 Processor File: execkern.c
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
* The above named program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
* The above named program is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
* You should have received a copy of the GNU General Public
* License along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
* ========== Copyright Header End ============================================
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
#pragma ident "@(#)execkern.c 1.34 07/02/08 SMI"
* Execution kernel for the simualtor.
* Basic management of simcpu_t's and core simualtor
* If you think you need to edit this file - in particular
* the exec_loop function - you are probably wrong. Please
* talk with Ash to discuss what you want to do before trying
* to hack in here. exec_loop is the most performance critical
* function in the simulator, and is fully processor
* independent. So hands off !
extern barrier_t stop_barrier
;
extern barrier_t run_barrier
;
extern barrier_busy_t sync_busy_barrier
;
#ifdef DEBUG_HOOK_LOOP /* { */
#define exec_loop exec_loop_dh
#endif /* } DEBUG_HOOK_LOOP */
* Some pre-amble has to set up the xicache pointer, but
* just execute from it ...
* ... assume the next function to execute has already been
* prefetched ... which in the case of a trap/alarm etc.
* is not the actual instruction ...
* Probably should replace this code with assembler for
* This loop is called by each exec_thread and is responsible
* for scheduling all the simcpus for that exec_thread to
* ensure they make equal progress. We allow the first simcpu
* to run for an EXEC_QUANTUM of instructions. We use the
* target_match functionality to detect when the EXEC_QUANTUM
* is complete and then we move to the next simcpu in the list
* until we reach the end of the list.
* Once an exec_thread has executed each of it's simcpus for
* an EXEC_QUANTUM, it will wait for all other exec_threads
* to reach that same point. Once all exec_threads have reached
* this point, they all continue scheduling their simcpus
* starting with the head of the list again.
exec_loop(exec_thread_t
*etp
)
simcycle_t local_cycle_target
;
simcycle_t quantum_target
;
xicache_line_t
*xic_linep
;
void (*decodemep
)(simcpu_t
*, xicache_instn_t
*);
/* [0] if decoding, [1] already decoded */
#ifdef DEBUG_HOOK_LOOP /* { */
void (*debug_hookp
)(simcpu_t
*sp
, uint32_t rawi
);
#endif /* } DEBUG_HOOK_LOOP */
* Start all exec threads at the same time.
barrier_busy_wait(&sync_busy_barrier
);
cycles_quant
= EXEC_QUANTUM
;
sp
->cycle
+= cycles_quant
;
sp
->cycle_quantum_start
= sp
->cycle
;
DBGEXECLOOP( lprintf(sp
->gid
, "start quantum attention=0x%llx "
"sync_pending=%d\n", sp
->attention
, sp
->sync_pending
); );
quantum_target
= sp
->cycle
+ cycles_quant
;
#ifdef DEBUG_HOOK_LOOP /* { */
debug_hookp
= (void(*)(simcpu_t
*, uint32_t))
sp
->config_procp
->proc_typep
->debug_hookp
;
#endif /* } DEBUG_HOOK_LOOP */
decodemep
= sp
->decodemep
;
/* Note cycle_target could be < cycle. */
local_cycle_target
= sp
->cycle_target
;
if (quantum_target
< local_cycle_target
)
local_cycle_target
= quantum_target
;
/* force a miss of the current xic block */
* This is the dumb way to do this, but we
* mirror the hardware, and check that
* we're still on the xic block we thought
* we were .. if not re-validate and try
if (((xpc
^ xic_line_tag
) & XICACHE_TAG_MASK
) != 0) {
* Fell off XC block - maybe:
* a) moved to another line ...
* b) problem with pc alignment !
* c) page-fault (/cache miss)
* First check the xic cache as this is
* assume we just moved to another line ...
* if not it's a miss of some kind
xic_line_tag
= xpc
& XICACHE_TAG_PURE_MASK
;
xic_line_tag
|= sp
->tagstate
;
* Need to stop using hard coded
* constants here ... and use
* variable values - but how to do
* without performance hit ...
xic_linep
= &(xicp
->line
[(xic_line_tag
>>
XICACHE_LINE_SIZE_BITS
)&XICACHE_LINE_MASK
]);
if (xic_linep
->tag
!= xic_line_tag
) {
sp
->xic_miss(sp
, xic_linep
, xpc
);
* Commented out for speed:
if (sp
->attention
) goto pay_attention
;
* Go because XC may have changed.
memoryoffset
= xic_linep
->memoryoffset
;
/* Not using '==' as sync_pending can skip cycles. */
if (local_cycle
>= local_cycle_target
)
xidx
= (xpc
>>2) & XICACHE_NUM_INSTR_MASK
;
xip
= &xicp
->instn
[xidx
];
rawi
= *(uint32_t *)(void*)(xpc
+ memoryoffset
);
xip
->exec_funcp
= decodemep
;
#ifdef DEBUG_HOOK_LOOP /* { */
debug_hookp(sp
, xip
->rawi
);
#endif /* } DEBUG_HOOK_LOOP */
ihrt_which
= (xip
->exec_funcp
!= decodemep
);
xip
->exec_funcp(sp
, xip
);
ihrt
= gethrtime() - ihrt
;
if (ihrt
> ihrt_max
[ihrt_which
]) {
ihrt_max
[ihrt_which
] = ihrt
;
ihrt_rawi
[ihrt_which
] = rawi
;
/* sp->pc has changed - use xpc */
ihrt_pc
[ihrt_which
] = xpc
;
* Only get here after successful execution of an
sp
->cycle
= ++local_cycle
;
} while (!sp
->attention
);
* The attention flag is a composite and is cleared
* by clearing the individual flags.
sp
->exec_loop_reset
= false;
if (sp
->xicache_trans_flush_pending
) {
sp
->xicache_trans_flush_pending
= false;
if (sp
->xdcache_trans_flush_pending
) {
sp
->xdcache_trans_flush_pending
= false;
if (local_cycle
>= quantum_target
)
if (local_cycle
>= sp
->cycle_target
)
sp
->cycle_target_match(sp
);
sp
->config_procp
->proc_typep
->check_async_event(sp
);
if (sp
->exception_pending
) {
sp
->config_procp
->proc_typep
->take_exception(sp
);
} while (!sp
->sync_pending
);
* We fall out of the loop above when sp->sync_pending is set.
* This happens when the current simcpu has executed it's
* EXEC_QUANTUM of instns and it's time for the exec_thread
* to schedule the next simcpu in the list -or- if we are
* at the end of the list, wait for all the other
* exec_threads to reach the same point.
sp
->sync_pending
= false;
if (sp
->xicache_instn_flush_pending
) {
sp
->xicache_instn_flush_pending
= false;
* increment the total instruction count executed during this
sp
->total_instr
+= sp
->cycle
- prev_cycle
;
hr_start
= gethrtime() - hr_start
;
#define INSTS (sp->cycle - prev_cycle)
DBGEXECLOOP( lprintf(sp
->gid
, "end quantum executed %llu "
"out of %llu, mips=%.2lf, attentions=%llu\n", INSTS
, cycles_quant
,
((1.0e3
*(double)INSTS
)/(double)hr_start
), natt
); );
lprintf(sp
->gid
, "end quantum exec+overhead=total nS: "
"%llu + %llu = %llu\n", hr_exec
, hr_start
- hr_exec
, hr_start
);
sparcv9_idis(ibuf
, sizeof (ibuf
), FE_INSTN(ihrt_rawi
[0]),
lprintf(sp
->gid
, "end quantum longest undecoded "
"instruction: pc=0x%llx nS=%llu: [0x%08x] %s\n", ihrt_pc
[0],
ihrt_max
[0], FE_INSTN(ihrt_rawi
[0]), ibuf
);
sparcv9_idis(ibuf
, sizeof (ibuf
), FE_INSTN(ihrt_rawi
[1]),
lprintf(sp
->gid
, "end quantum longest decoded "
"instruction: pc=0x%llx nS=%llu: [0x%08x] %s\n", ihrt_pc
[1],
ihrt_max
[1], FE_INSTN(ihrt_rawi
[1]), ibuf
);
* If sync_pending was set before the end of the quantum,
* cycle must be advanced - just set to the quantum target.
sp
->cycle
= quantum_target
;
sp
->cycle_quantum_start
= sp
->cycle
;
/* Switch to next sp in scheduler list if we are not at the end */
* We have reached the end of the scheduler list. All simcpus on
* this exec_thread have have a chance to execute an EXEC_QUANTUM
* of instructions so we wait here until all exec_threads reach
* this point before continuing.
barrier_busy_wait(&sync_busy_barrier
);
* We need to check if the debugger wants us to stop running.
* If so, we put hold all threads here. The ctrl_thread
* will handle the "stop" state housekeeping.
if (simstatus
.running
== false) {
/* indicate exec_thread in stop state */
barrier_wait(&stop_barrier
);
/* wait to enter run state (via ctrl thread) */
barrier_wait(&run_barrier
);
* Give everyone a chance to check the value of "running"
* before we make another round. Otherwise can split up --
* some will go to "stop" state while others will run
barrier_busy_wait(&sync_busy_barrier
);
* All exec_threads have reached the same point so we start
* at the begining of the list of simcpus and continue