Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / legion / src / simcore / execkern.c
CommitLineData
920dae64
AT
1/*
2* ========== Copyright Header Begin ==========================================
3*
4* OpenSPARC T2 Processor File: execkern.c
5* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
6* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
7*
8* The above named program is free software; you can redistribute it and/or
9* modify it under the terms of the GNU General Public
10* License version 2 as published by the Free Software Foundation.
11*
12* The above named program is distributed in the hope that it will be
13* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
14* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15* General Public License for more details.
16*
17* You should have received a copy of the GNU General Public
18* License along with this work; if not, write to the Free Software
19* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
20*
21* ========== Copyright Header End ============================================
22*/
23/*
24 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
26 */
27#pragma ident "@(#)execkern.c 1.34 07/02/08 SMI"
28
29 /*
30 * Execution kernel for the simualtor.
31 *
32 * Basic management of simcpu_t's and core simualtor
33 * instruction set.
34 */
35
36
37 /*
38 * # #
39 * ## # #### ##### ######
40 * # # # # # # #
41 * # # # # # # #####
42 * # # # # # # #
43 * # ## # # # #
44 * # # #### # ######
45 *
46 * If you think you need to edit this file - in particular
47 * the exec_loop function - you are probably wrong. Please
48 * talk with Ash to discuss what you want to do before trying
49 * to hack in here. exec_loop is the most performance critical
50 * function in the simulator, and is fully processor
51 * independent. So hands off !
52 */
53
54
55#include <assert.h>
56
57
58
59#include "basics.h"
60#include "simcore.h"
61#include "config.h"
62#include "xicache.h"
63#include "barrier.h"
64#include "fatal.h"
65#if EXEC_TIMING /* { */
66#include "tsparcv9.h"
67#endif /* } */
68
69
70extern barrier_t stop_barrier;
71extern barrier_t run_barrier;
72extern barrier_busy_t sync_busy_barrier;
73
74
75#ifdef DEBUG_HOOK_LOOP /* { */
76#define exec_loop exec_loop_dh
77#endif /* } DEBUG_HOOK_LOOP */
78
79
80 /*
81 * Some pre-amble has to set up the xicache pointer, but
82 * just execute from it ...
83 * ... assume the next function to execute has already been
84 * prefetched ... which in the case of a trap/alarm etc.
85 * is not the actual instruction ...
86 *
87 * Probably should replace this code with assembler for
88 * efficiency !
89 *
90 * This loop is called by each exec_thread and is responsible
91 * for scheduling all the simcpus for that exec_thread to
92 * ensure they make equal progress. We allow the first simcpu
93 * to run for an EXEC_QUANTUM of instructions. We use the
94 * target_match functionality to detect when the EXEC_QUANTUM
95 * is complete and then we move to the next simcpu in the list
96 * until we reach the end of the list.
97 *
98 * Once an exec_thread has executed each of it's simcpus for
99 * an EXEC_QUANTUM, it will wait for all other exec_threads
100 * to reach that same point. Once all exec_threads have reached
101 * this point, they all continue scheduling their simcpus
102 * starting with the head of the list again.
103 */
104
105void
106exec_loop(exec_thread_t *etp)
107{
108 simcycle_t local_cycle;
109 simcycle_t prev_cycle;
110 simcycle_t cycles_quant;
111 simcycle_t local_cycle_target;
112 simcycle_t quantum_target;
113 xicache_t *xicp;
114 uint64_t xic_line_tag;
115 simcpu_t *sp;
116 xicache_line_t *xic_linep;
117 uint64_t memoryoffset;
118 simcpu_t *headsp;
119 void (*decodemep)(simcpu_t *, xicache_instn_t *);
120#if EXEC_TIMING /* { */
121 hrtime_t hr_start;
122 hrtime_t hr_exec;
123 uint64_t natt;
124 /* [0] if decoding, [1] already decoded */
125 hrtime_t ihrt_max[2];
126 uint32_t ihrt_rawi[2];
127 uint64_t ihrt_pc[2];
128 char ibuf[64];
129#endif /* } */
130#ifdef DEBUG_HOOK_LOOP /* { */
131 void (*debug_hookp)(simcpu_t *sp, uint32_t rawi);
132#endif /* } DEBUG_HOOK_LOOP */
133
134 ATTENTION_SANITY_CHECK;
135
136 /*
137 * Start all exec threads at the same time.
138 */
139 barrier_busy_wait(&sync_busy_barrier);
140
141 cycles_quant = EXEC_QUANTUM;
142
143 sp = headsp = etp->allp;
144
145top:
146
147 if (!RUNNABLE(sp)) {
148 sp->cycle += cycles_quant;
149 sp->cycle_quantum_start = sp->cycle;
150 goto next_simcpu;
151 }
152
153#if EXEC_TIMING /* { */
154 DBGEXECLOOP( lprintf(sp->gid, "start quantum attention=0x%llx "
155 "sync_pending=%d\n", sp->attention, sp->sync_pending); );
156#endif /* } */
157
158 prev_cycle = sp->cycle;
159 quantum_target = sp->cycle + cycles_quant;
160
161 xicp = sp->xicachep;
162
163#ifdef DEBUG_HOOK_LOOP /* { */
164 debug_hookp = (void(*)(simcpu_t *, uint32_t))
165 sp->config_procp->proc_typep->debug_hookp;
166#endif /* } DEBUG_HOOK_LOOP */
167
168#if EXEC_TIMING /* { */
169 hr_start = gethrtime();
170 hr_exec = 0;
171 ihrt_max[0] = 0;
172 ihrt_max[1] = 0;
173 natt = 0;
174#endif /* } */
175
176 do { /* outer loop */
177 /* cache some loads */
178 decodemep = sp->decodemep;
179 local_cycle = sp->cycle;
180 /* Note cycle_target could be < cycle. */
181 local_cycle_target = sp->cycle_target;
182
183 if (quantum_target < local_cycle_target)
184 local_cycle_target = quantum_target;
185
186 if (sp->attention)
187 goto pay_attention;
188
189 /* force a miss of the current xic block */
190 xic_line_tag = ~sp->pc;
191
192 do {
193 uint64_t xidx;
194 uint32_t rawi;
195 tvaddr_t xpc;
196 xicache_instn_t *xip;
197#if EXEC_TIMING /* { */
198 hrtime_t ihrt;
199 uint_t ihrt_which;
200#endif /* } */
201
202 /*
203 * This is the dumb way to do this, but we
204 * mirror the hardware, and check that
205 * we're still on the xic block we thought
206 * we were .. if not re-validate and try
207 * again.
208 */
209 xpc = sp->pc;
210
211 /* tag and align ok ? */
212 if (((xpc ^ xic_line_tag) & XICACHE_TAG_MASK) != 0) {
213
214xic_block_load:;
215 /*
216 * Fell off XC block - maybe:
217 * a) moved to another line ...
218 * b) problem with pc alignment !
219 * c) page-fault (/cache miss)
220 * d) xic_cache_miss
221 *
222 * First check the xic cache as this is
223 * the fast case.
224 */
225
226 /*
227 * assume we just moved to another line ...
228 * if not it's a miss of some kind
229 */
230
231 xic_line_tag = xpc & XICACHE_TAG_PURE_MASK;
232 xic_line_tag |= sp->tagstate;
233
234 /*
235 * Need to stop using hard coded
236 * constants here ... and use
237 * variable values - but how to do
238 * without performance hit ...
239 */
240
241 xic_linep = &(xicp->line[(xic_line_tag >>
242 XICACHE_LINE_SIZE_BITS)&XICACHE_LINE_MASK]);
243 if (xic_linep->tag != xic_line_tag) {
244 sp->xic_miss(sp, xic_linep, xpc);
245 /*
246 * Commented out for speed:
247 * ASSERT(sp->pc == xpc);
248 */
249 if (sp->attention) goto pay_attention;
250
251 /*
252 * Go because XC may have changed.
253 */
254 goto xic_block_load;
255 }
256 memoryoffset = xic_linep->memoryoffset;
257 }
258
259 /* Not using '==' as sync_pending can skip cycles. */
260 if (local_cycle >= local_cycle_target)
261 break;
262
263 xidx = (xpc>>2) & XICACHE_NUM_INSTR_MASK;
264 xip = &xicp->instn[xidx];
265 rawi = *(uint32_t *)(void*)(xpc + memoryoffset);
266 if (xip->rawi != rawi) {
267 xip->rawi = rawi;
268 xip->exec_funcp = decodemep;
269 }
270
271#ifdef DEBUG_HOOK_LOOP /* { */
272 debug_hookp(sp, xip->rawi);
273#endif /* } DEBUG_HOOK_LOOP */
274#if EXEC_TIMING /* { */
275 ihrt_which = (xip->exec_funcp != decodemep);
276 ihrt = gethrtime();
277#endif /* } */
278 xip->exec_funcp(sp, xip);
279#if EXEC_TIMING /* { */
280 ihrt = gethrtime() - ihrt;
281 hr_exec += ihrt;
282 if (ihrt > ihrt_max[ihrt_which]) {
283 ihrt_max[ihrt_which] = ihrt;
284 ihrt_rawi[ihrt_which] = rawi;
285 /* sp->pc has changed - use xpc */
286 ihrt_pc[ihrt_which] = xpc;
287 }
288#endif /* } */
289
290 /*
291 * Only get here after successful execution of an
292 * instn.
293 */
294 sp->cycle = ++local_cycle;
295
296 } while (!sp->attention);
297
298pay_attention:;
299#if EXEC_TIMING /* { */
300 natt++;
301#endif /* } */
302 /*
303 * The attention flag is a composite and is cleared
304 * by clearing the individual flags.
305 */
306
307 sp->exec_loop_reset = false;
308
309 if (sp->xicache_trans_flush_pending) {
310 sp->xicache_trans_flush_pending = false;
311 xicache_trans_flush(sp);
312 }
313 if (sp->xdcache_trans_flush_pending) {
314 sp->xdcache_trans_flush_pending = false;
315 xdcache_flush(sp);
316 }
317 if (local_cycle >= quantum_target)
318 sp->sync_pending = true;
319
320 if (local_cycle >= sp->cycle_target)
321 sp->cycle_target_match(sp);
322
323 if (sp->async_event) {
324 sp->config_procp->proc_typep->check_async_event(sp);
325 }
326
327 if (sp->exception_pending) {
328 sp->config_procp->proc_typep->take_exception(sp);
329 }
330
331 } while (!sp->sync_pending);
332
333 /*
334 * We fall out of the loop above when sp->sync_pending is set.
335 * This happens when the current simcpu has executed it's
336 * EXEC_QUANTUM of instns and it's time for the exec_thread
337 * to schedule the next simcpu in the list -or- if we are
338 * at the end of the list, wait for all the other
339 * exec_threads to reach the same point.
340 */
341
342 sp->sync_pending = false;
343
344 if (sp->xicache_instn_flush_pending) {
345 sp->xicache_instn_flush_pending = false;
346 xicache_instn_flush(sp);
347 }
348
349 /*
350 * increment the total instruction count executed during this
351 * EXEC_QUANTUM cycle
352 */
353 sp->total_instr += sp->cycle - prev_cycle;
354
355#if EXEC_TIMING /* { */
356 hr_start = gethrtime() - hr_start;
357
358#define INSTS (sp->cycle - prev_cycle)
359 DBGEXECLOOP( lprintf(sp->gid, "end quantum executed %llu "
360 "out of %llu, mips=%.2lf, attentions=%llu\n", INSTS, cycles_quant,
361 ((1.0e3*(double)INSTS)/(double)hr_start), natt); );
362
363DBGEXECLOOP(
364 lprintf(sp->gid, "end quantum exec+overhead=total nS: "
365 "%llu + %llu = %llu\n", hr_exec, hr_start - hr_exec, hr_start);
366 if (ihrt_max[0] != 0) {
367 sparcv9_idis(ibuf, sizeof (ibuf), FE_INSTN(ihrt_rawi[0]),
368 ihrt_pc[0]);
369 lprintf(sp->gid, "end quantum longest undecoded "
370 "instruction: pc=0x%llx nS=%llu: [0x%08x] %s\n", ihrt_pc[0],
371 ihrt_max[0], FE_INSTN(ihrt_rawi[0]), ibuf);
372 }
373 if (ihrt_max[1] != 0) {
374 sparcv9_idis(ibuf, sizeof (ibuf), FE_INSTN(ihrt_rawi[1]),
375 ihrt_pc[1]);
376 lprintf(sp->gid, "end quantum longest decoded "
377 "instruction: pc=0x%llx nS=%llu: [0x%08x] %s\n", ihrt_pc[1],
378 ihrt_max[1], FE_INSTN(ihrt_rawi[1]), ibuf);
379 }
380);
381#endif /* } */
382
383 /*
384 * If sync_pending was set before the end of the quantum,
385 * cycle must be advanced - just set to the quantum target.
386 */
387 sp->cycle = quantum_target;
388 sp->cycle_quantum_start = sp->cycle;
389
390next_simcpu:;
391
392 /* Switch to next sp in scheduler list if we are not at the end */
393 sp = sp->nextp;
394 if (sp != NULL) {
395 goto top;
396 }
397
398 /*
399 * We have reached the end of the scheduler list. All simcpus on
400 * this exec_thread have have a chance to execute an EXEC_QUANTUM
401 * of instructions so we wait here until all exec_threads reach
402 * this point before continuing.
403 */
404 barrier_busy_wait(&sync_busy_barrier);
405
406 /*
407 * We need to check if the debugger wants us to stop running.
408 * If so, we put hold all threads here. The ctrl_thread
409 * will handle the "stop" state housekeeping.
410 */
411 if (simstatus.running == false) {
412 /* indicate exec_thread in stop state */
413 barrier_wait(&stop_barrier);
414
415 /* wait to enter run state (via ctrl thread) */
416 barrier_wait(&run_barrier);
417
418 /*
419 * Give everyone a chance to check the value of "running"
420 * before we make another round. Otherwise can split up --
421 * some will go to "stop" state while others will run
422 * another EXEC_QUANTUM.
423 */
424 barrier_busy_wait(&sync_busy_barrier);
425 }
426
427 /*
428 * All exec_threads have reached the same point so we start
429 * at the begining of the list of simcpus and continue
430 * executing.
431 */
432 sp = headsp;
433 goto top;
434}