Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | /* |
2 | * ========== Copyright Header Begin ========================================== | |
3 | * | |
4 | * OpenSPARC T2 Processor File: execkern.c | |
5 | * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. | |
6 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. | |
7 | * | |
8 | * The above named program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public | |
10 | * License version 2 as published by the Free Software Foundation. | |
11 | * | |
12 | * The above named program is distributed in the hope that it will be | |
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public | |
18 | * License along with this work; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
20 | * | |
21 | * ========== Copyright Header End ============================================ | |
22 | */ | |
23 | /* | |
24 | * Copyright 2007 Sun Microsystems, Inc. All rights reserved. | |
25 | * Use is subject to license terms. | |
26 | */ | |
27 | #pragma ident "@(#)execkern.c 1.34 07/02/08 SMI" | |
28 | ||
29 | /* | |
30 | * Execution kernel for the simualtor. | |
31 | * | |
32 | * Basic management of simcpu_t's and core simualtor | |
33 | * instruction set. | |
34 | */ | |
35 | ||
36 | ||
37 | /* | |
38 | * # # | |
39 | * ## # #### ##### ###### | |
40 | * # # # # # # # | |
41 | * # # # # # # ##### | |
42 | * # # # # # # # | |
43 | * # ## # # # # | |
44 | * # # #### # ###### | |
45 | * | |
46 | * If you think you need to edit this file - in particular | |
47 | * the exec_loop function - you are probably wrong. Please | |
48 | * talk with Ash to discuss what you want to do before trying | |
49 | * to hack in here. exec_loop is the most performance critical | |
50 | * function in the simulator, and is fully processor | |
51 | * independent. So hands off ! | |
52 | */ | |
53 | ||
54 | ||
55 | #include <assert.h> | |
56 | ||
57 | ||
58 | ||
59 | #include "basics.h" | |
60 | #include "simcore.h" | |
61 | #include "config.h" | |
62 | #include "xicache.h" | |
63 | #include "barrier.h" | |
64 | #include "fatal.h" | |
65 | #if EXEC_TIMING /* { */ | |
66 | #include "tsparcv9.h" | |
67 | #endif /* } */ | |
68 | ||
69 | ||
70 | extern barrier_t stop_barrier; | |
71 | extern barrier_t run_barrier; | |
72 | extern barrier_busy_t sync_busy_barrier; | |
73 | ||
74 | ||
75 | #ifdef DEBUG_HOOK_LOOP /* { */ | |
76 | #define exec_loop exec_loop_dh | |
77 | #endif /* } DEBUG_HOOK_LOOP */ | |
78 | ||
79 | ||
80 | /* | |
81 | * Some pre-amble has to set up the xicache pointer, but | |
82 | * just execute from it ... | |
83 | * ... assume the next function to execute has already been | |
84 | * prefetched ... which in the case of a trap/alarm etc. | |
85 | * is not the actual instruction ... | |
86 | * | |
87 | * Probably should replace this code with assembler for | |
88 | * efficiency ! | |
89 | * | |
90 | * This loop is called by each exec_thread and is responsible | |
91 | * for scheduling all the simcpus for that exec_thread to | |
92 | * ensure they make equal progress. We allow the first simcpu | |
93 | * to run for an EXEC_QUANTUM of instructions. We use the | |
94 | * target_match functionality to detect when the EXEC_QUANTUM | |
95 | * is complete and then we move to the next simcpu in the list | |
96 | * until we reach the end of the list. | |
97 | * | |
98 | * Once an exec_thread has executed each of it's simcpus for | |
99 | * an EXEC_QUANTUM, it will wait for all other exec_threads | |
100 | * to reach that same point. Once all exec_threads have reached | |
101 | * this point, they all continue scheduling their simcpus | |
102 | * starting with the head of the list again. | |
103 | */ | |
104 | ||
105 | void | |
106 | exec_loop(exec_thread_t *etp) | |
107 | { | |
108 | simcycle_t local_cycle; | |
109 | simcycle_t prev_cycle; | |
110 | simcycle_t cycles_quant; | |
111 | simcycle_t local_cycle_target; | |
112 | simcycle_t quantum_target; | |
113 | xicache_t *xicp; | |
114 | uint64_t xic_line_tag; | |
115 | simcpu_t *sp; | |
116 | xicache_line_t *xic_linep; | |
117 | uint64_t memoryoffset; | |
118 | simcpu_t *headsp; | |
119 | void (*decodemep)(simcpu_t *, xicache_instn_t *); | |
120 | #if EXEC_TIMING /* { */ | |
121 | hrtime_t hr_start; | |
122 | hrtime_t hr_exec; | |
123 | uint64_t natt; | |
124 | /* [0] if decoding, [1] already decoded */ | |
125 | hrtime_t ihrt_max[2]; | |
126 | uint32_t ihrt_rawi[2]; | |
127 | uint64_t ihrt_pc[2]; | |
128 | char ibuf[64]; | |
129 | #endif /* } */ | |
130 | #ifdef DEBUG_HOOK_LOOP /* { */ | |
131 | void (*debug_hookp)(simcpu_t *sp, uint32_t rawi); | |
132 | #endif /* } DEBUG_HOOK_LOOP */ | |
133 | ||
134 | ATTENTION_SANITY_CHECK; | |
135 | ||
136 | /* | |
137 | * Start all exec threads at the same time. | |
138 | */ | |
139 | barrier_busy_wait(&sync_busy_barrier); | |
140 | ||
141 | cycles_quant = EXEC_QUANTUM; | |
142 | ||
143 | sp = headsp = etp->allp; | |
144 | ||
145 | top: | |
146 | ||
147 | if (!RUNNABLE(sp)) { | |
148 | sp->cycle += cycles_quant; | |
149 | sp->cycle_quantum_start = sp->cycle; | |
150 | goto next_simcpu; | |
151 | } | |
152 | ||
153 | #if EXEC_TIMING /* { */ | |
154 | DBGEXECLOOP( lprintf(sp->gid, "start quantum attention=0x%llx " | |
155 | "sync_pending=%d\n", sp->attention, sp->sync_pending); ); | |
156 | #endif /* } */ | |
157 | ||
158 | prev_cycle = sp->cycle; | |
159 | quantum_target = sp->cycle + cycles_quant; | |
160 | ||
161 | xicp = sp->xicachep; | |
162 | ||
163 | #ifdef DEBUG_HOOK_LOOP /* { */ | |
164 | debug_hookp = (void(*)(simcpu_t *, uint32_t)) | |
165 | sp->config_procp->proc_typep->debug_hookp; | |
166 | #endif /* } DEBUG_HOOK_LOOP */ | |
167 | ||
168 | #if EXEC_TIMING /* { */ | |
169 | hr_start = gethrtime(); | |
170 | hr_exec = 0; | |
171 | ihrt_max[0] = 0; | |
172 | ihrt_max[1] = 0; | |
173 | natt = 0; | |
174 | #endif /* } */ | |
175 | ||
176 | do { /* outer loop */ | |
177 | /* cache some loads */ | |
178 | decodemep = sp->decodemep; | |
179 | local_cycle = sp->cycle; | |
180 | /* Note cycle_target could be < cycle. */ | |
181 | local_cycle_target = sp->cycle_target; | |
182 | ||
183 | if (quantum_target < local_cycle_target) | |
184 | local_cycle_target = quantum_target; | |
185 | ||
186 | if (sp->attention) | |
187 | goto pay_attention; | |
188 | ||
189 | /* force a miss of the current xic block */ | |
190 | xic_line_tag = ~sp->pc; | |
191 | ||
192 | do { | |
193 | uint64_t xidx; | |
194 | uint32_t rawi; | |
195 | tvaddr_t xpc; | |
196 | xicache_instn_t *xip; | |
197 | #if EXEC_TIMING /* { */ | |
198 | hrtime_t ihrt; | |
199 | uint_t ihrt_which; | |
200 | #endif /* } */ | |
201 | ||
202 | /* | |
203 | * This is the dumb way to do this, but we | |
204 | * mirror the hardware, and check that | |
205 | * we're still on the xic block we thought | |
206 | * we were .. if not re-validate and try | |
207 | * again. | |
208 | */ | |
209 | xpc = sp->pc; | |
210 | ||
211 | /* tag and align ok ? */ | |
212 | if (((xpc ^ xic_line_tag) & XICACHE_TAG_MASK) != 0) { | |
213 | ||
214 | xic_block_load:; | |
215 | /* | |
216 | * Fell off XC block - maybe: | |
217 | * a) moved to another line ... | |
218 | * b) problem with pc alignment ! | |
219 | * c) page-fault (/cache miss) | |
220 | * d) xic_cache_miss | |
221 | * | |
222 | * First check the xic cache as this is | |
223 | * the fast case. | |
224 | */ | |
225 | ||
226 | /* | |
227 | * assume we just moved to another line ... | |
228 | * if not it's a miss of some kind | |
229 | */ | |
230 | ||
231 | xic_line_tag = xpc & XICACHE_TAG_PURE_MASK; | |
232 | xic_line_tag |= sp->tagstate; | |
233 | ||
234 | /* | |
235 | * Need to stop using hard coded | |
236 | * constants here ... and use | |
237 | * variable values - but how to do | |
238 | * without performance hit ... | |
239 | */ | |
240 | ||
241 | xic_linep = &(xicp->line[(xic_line_tag >> | |
242 | XICACHE_LINE_SIZE_BITS)&XICACHE_LINE_MASK]); | |
243 | if (xic_linep->tag != xic_line_tag) { | |
244 | sp->xic_miss(sp, xic_linep, xpc); | |
245 | /* | |
246 | * Commented out for speed: | |
247 | * ASSERT(sp->pc == xpc); | |
248 | */ | |
249 | if (sp->attention) goto pay_attention; | |
250 | ||
251 | /* | |
252 | * Go because XC may have changed. | |
253 | */ | |
254 | goto xic_block_load; | |
255 | } | |
256 | memoryoffset = xic_linep->memoryoffset; | |
257 | } | |
258 | ||
259 | /* Not using '==' as sync_pending can skip cycles. */ | |
260 | if (local_cycle >= local_cycle_target) | |
261 | break; | |
262 | ||
263 | xidx = (xpc>>2) & XICACHE_NUM_INSTR_MASK; | |
264 | xip = &xicp->instn[xidx]; | |
265 | rawi = *(uint32_t *)(void*)(xpc + memoryoffset); | |
266 | if (xip->rawi != rawi) { | |
267 | xip->rawi = rawi; | |
268 | xip->exec_funcp = decodemep; | |
269 | } | |
270 | ||
271 | #ifdef DEBUG_HOOK_LOOP /* { */ | |
272 | debug_hookp(sp, xip->rawi); | |
273 | #endif /* } DEBUG_HOOK_LOOP */ | |
274 | #if EXEC_TIMING /* { */ | |
275 | ihrt_which = (xip->exec_funcp != decodemep); | |
276 | ihrt = gethrtime(); | |
277 | #endif /* } */ | |
278 | xip->exec_funcp(sp, xip); | |
279 | #if EXEC_TIMING /* { */ | |
280 | ihrt = gethrtime() - ihrt; | |
281 | hr_exec += ihrt; | |
282 | if (ihrt > ihrt_max[ihrt_which]) { | |
283 | ihrt_max[ihrt_which] = ihrt; | |
284 | ihrt_rawi[ihrt_which] = rawi; | |
285 | /* sp->pc has changed - use xpc */ | |
286 | ihrt_pc[ihrt_which] = xpc; | |
287 | } | |
288 | #endif /* } */ | |
289 | ||
290 | /* | |
291 | * Only get here after successful execution of an | |
292 | * instn. | |
293 | */ | |
294 | sp->cycle = ++local_cycle; | |
295 | ||
296 | } while (!sp->attention); | |
297 | ||
298 | pay_attention:; | |
299 | #if EXEC_TIMING /* { */ | |
300 | natt++; | |
301 | #endif /* } */ | |
302 | /* | |
303 | * The attention flag is a composite and is cleared | |
304 | * by clearing the individual flags. | |
305 | */ | |
306 | ||
307 | sp->exec_loop_reset = false; | |
308 | ||
309 | if (sp->xicache_trans_flush_pending) { | |
310 | sp->xicache_trans_flush_pending = false; | |
311 | xicache_trans_flush(sp); | |
312 | } | |
313 | if (sp->xdcache_trans_flush_pending) { | |
314 | sp->xdcache_trans_flush_pending = false; | |
315 | xdcache_flush(sp); | |
316 | } | |
317 | if (local_cycle >= quantum_target) | |
318 | sp->sync_pending = true; | |
319 | ||
320 | if (local_cycle >= sp->cycle_target) | |
321 | sp->cycle_target_match(sp); | |
322 | ||
323 | if (sp->async_event) { | |
324 | sp->config_procp->proc_typep->check_async_event(sp); | |
325 | } | |
326 | ||
327 | if (sp->exception_pending) { | |
328 | sp->config_procp->proc_typep->take_exception(sp); | |
329 | } | |
330 | ||
331 | } while (!sp->sync_pending); | |
332 | ||
333 | /* | |
334 | * We fall out of the loop above when sp->sync_pending is set. | |
335 | * This happens when the current simcpu has executed it's | |
336 | * EXEC_QUANTUM of instns and it's time for the exec_thread | |
337 | * to schedule the next simcpu in the list -or- if we are | |
338 | * at the end of the list, wait for all the other | |
339 | * exec_threads to reach the same point. | |
340 | */ | |
341 | ||
342 | sp->sync_pending = false; | |
343 | ||
344 | if (sp->xicache_instn_flush_pending) { | |
345 | sp->xicache_instn_flush_pending = false; | |
346 | xicache_instn_flush(sp); | |
347 | } | |
348 | ||
349 | /* | |
350 | * increment the total instruction count executed during this | |
351 | * EXEC_QUANTUM cycle | |
352 | */ | |
353 | sp->total_instr += sp->cycle - prev_cycle; | |
354 | ||
355 | #if EXEC_TIMING /* { */ | |
356 | hr_start = gethrtime() - hr_start; | |
357 | ||
358 | #define INSTS (sp->cycle - prev_cycle) | |
359 | DBGEXECLOOP( lprintf(sp->gid, "end quantum executed %llu " | |
360 | "out of %llu, mips=%.2lf, attentions=%llu\n", INSTS, cycles_quant, | |
361 | ((1.0e3*(double)INSTS)/(double)hr_start), natt); ); | |
362 | ||
363 | DBGEXECLOOP( | |
364 | lprintf(sp->gid, "end quantum exec+overhead=total nS: " | |
365 | "%llu + %llu = %llu\n", hr_exec, hr_start - hr_exec, hr_start); | |
366 | if (ihrt_max[0] != 0) { | |
367 | sparcv9_idis(ibuf, sizeof (ibuf), FE_INSTN(ihrt_rawi[0]), | |
368 | ihrt_pc[0]); | |
369 | lprintf(sp->gid, "end quantum longest undecoded " | |
370 | "instruction: pc=0x%llx nS=%llu: [0x%08x] %s\n", ihrt_pc[0], | |
371 | ihrt_max[0], FE_INSTN(ihrt_rawi[0]), ibuf); | |
372 | } | |
373 | if (ihrt_max[1] != 0) { | |
374 | sparcv9_idis(ibuf, sizeof (ibuf), FE_INSTN(ihrt_rawi[1]), | |
375 | ihrt_pc[1]); | |
376 | lprintf(sp->gid, "end quantum longest decoded " | |
377 | "instruction: pc=0x%llx nS=%llu: [0x%08x] %s\n", ihrt_pc[1], | |
378 | ihrt_max[1], FE_INSTN(ihrt_rawi[1]), ibuf); | |
379 | } | |
380 | ); | |
381 | #endif /* } */ | |
382 | ||
383 | /* | |
384 | * If sync_pending was set before the end of the quantum, | |
385 | * cycle must be advanced - just set to the quantum target. | |
386 | */ | |
387 | sp->cycle = quantum_target; | |
388 | sp->cycle_quantum_start = sp->cycle; | |
389 | ||
390 | next_simcpu:; | |
391 | ||
392 | /* Switch to next sp in scheduler list if we are not at the end */ | |
393 | sp = sp->nextp; | |
394 | if (sp != NULL) { | |
395 | goto top; | |
396 | } | |
397 | ||
398 | /* | |
399 | * We have reached the end of the scheduler list. All simcpus on | |
400 | * this exec_thread have have a chance to execute an EXEC_QUANTUM | |
401 | * of instructions so we wait here until all exec_threads reach | |
402 | * this point before continuing. | |
403 | */ | |
404 | barrier_busy_wait(&sync_busy_barrier); | |
405 | ||
406 | /* | |
407 | * We need to check if the debugger wants us to stop running. | |
408 | * If so, we put hold all threads here. The ctrl_thread | |
409 | * will handle the "stop" state housekeeping. | |
410 | */ | |
411 | if (simstatus.running == false) { | |
412 | /* indicate exec_thread in stop state */ | |
413 | barrier_wait(&stop_barrier); | |
414 | ||
415 | /* wait to enter run state (via ctrl thread) */ | |
416 | barrier_wait(&run_barrier); | |
417 | ||
418 | /* | |
419 | * Give everyone a chance to check the value of "running" | |
420 | * before we make another round. Otherwise can split up -- | |
421 | * some will go to "stop" state while others will run | |
422 | * another EXEC_QUANTUM. | |
423 | */ | |
424 | barrier_busy_wait(&sync_busy_barrier); | |
425 | } | |
426 | ||
427 | /* | |
428 | * All exec_threads have reached the same point so we start | |
429 | * at the begining of the list of simcpus and continue | |
430 | * executing. | |
431 | */ | |
432 | sp = headsp; | |
433 | goto top; | |
434 | } |