Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | // ========== Copyright Header Begin ========================================== |
2 | // | |
3 | // OpenSPARC T2 Processor File: workerthreads.cc | |
4 | // Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. | |
5 | // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. | |
6 | // | |
7 | // The above named program is free software; you can redistribute it and/or | |
8 | // modify it under the terms of the GNU General Public | |
9 | // License version 2 as published by the Free Software Foundation. | |
10 | // | |
11 | // The above named program is distributed in the hope that it will be | |
12 | // useful, but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | // General Public License for more details. | |
15 | // | |
16 | // You should have received a copy of the GNU General Public | |
17 | // License along with this work; if not, write to the Free Software | |
18 | // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | // | |
20 | // ========== Copyright Header End ============================================ | |
21 | /* | |
22 | * Copyright (C) 2001,2005 Sun Microsystems, Inc. | |
23 | * All rights reserved. | |
24 | */ | |
25 | #pragma ident "%%1.27 06/12/14 %%" | |
26 | ||
27 | /* | |
28 | "threads.cc" | |
29 | ||
30 | Supports multiple cpu simulation threads that "system.cc" | |
31 | can signal and wait for, ie that live for the duration of blaze rather | |
32 | than being created/destroyed for each UI command. | |
33 | ||
34 | These threads can however be destroyed/re-created, because we support | |
35 | changing numthreads (which indirectly changes cpus-per-thread), as | |
36 | well as changing the enabled state of individual cpus (which changes | |
37 | the number-of-cpus to run, and hence the distribution of running cpus | |
38 | across the worker threads). | |
39 | ||
40 | */ | |
41 | ||
42 | #include "ui.h" | |
43 | #include "workerthread.h" | |
44 | ||
45 | ||
46 | extern int blaze_debug; /* main/ui_cmds */ | |
47 | extern int blaze_option; | |
48 | ||
49 | ||
50 | ||
51 | ||
52 | // Vcpu0 calls these doneftn_t with void * arg, when all the workerthreads have | |
53 | // done their required execution. | |
54 | doneftn_t volatile wrkthdCBFunc = NULL; /* NB-one-shot callback ftn */ | |
55 | void * volatile wrkthdCBArg = NULL; /* and its callback arg */ | |
56 | ||
57 | ||
58 | // system level event queue | |
59 | // static EventQue *eventque; | |
60 | ||
61 | sema_t wrkthdDONE; // DONE semaphore. Vcpu0 workerthread signals on | |
62 | // this semaphore when all the worker threads are done | |
63 | // UI threads typically hang on this sema while | |
64 | // waiting for stepi, stept to finish | |
65 | ||
66 | ||
67 | extern void write_scalar_64 (FILE *fp, const char * name, uint64_t v); | |
68 | extern bool_t read_scalar_64 (FILE *fp, const char * name, uint64_t *v); | |
69 | ||
70 | static void inline breakpoint_hit(int cpu_id) | |
71 | { | |
72 | ui->output("cpu[%i] hit a breakpoint. stop.. \n", cpu_id); | |
73 | } | |
74 | ||
75 | volatile int WorkerThread::BarrierCount = 0; | |
76 | volatile int WorkerThread::BarrierLock = 0; | |
77 | volatile int WorkerThread::BarrierTemp = 0; | |
78 | int64_t WorkerThread::Nusecs = 0; | |
79 | int64_t WorkerThread::Ninstrs = 0; | |
80 | int64_t WorkerThread::Ncycles = 0; | |
81 | ||
82 | // step_remainder holds the number of instructions (in normal mode) | |
83 | // or cycles (in execution-driven mode) before the next stick_update needs | |
84 | // to happen. This usually happens when a simulation is stopped between two | |
85 | // stick updates (usually 1 microsecond of simulated time). | |
86 | // Implementation note: this used to be known as instrs_till_next_stick_update | |
87 | // before it was needed in execution-driven mode. | |
88 | volatile uint64_t WorkerThread::step_remainder = 0; | |
89 | ||
90 | volatile int WorkerThread::numThds = 0; | |
91 | WorkerThread * WorkerThread::wrkthds = 0; | |
92 | pthread_key_t WorkerThread::key = 0; | |
93 | volatile uint64_t WorkerThread::GlobalTimeUsecs = 0; | |
94 | volatile int64_t WorkerThread::GlobalTicks = 0; | |
95 | ||
96 | volatile uint64_t WorkerThread::stick_incr = 0; | |
97 | volatile uint64_t WorkerThread::stick_remainder = 0; | |
98 | ||
99 | /* static */ Vcpu * WorkerThread::first_cpu = 0; // first cpu has some special work to do | |
100 | ||
101 | ||
102 | ||
103 | volatile int64_t WorkerThread::u_intervals = 0; | |
104 | volatile int64_t WorkerThread::k_intervals = 0; | |
105 | ||
106 | volatile int64_t WorkerThread::u_instrs = 0; | |
107 | volatile int64_t WorkerThread::k_instrs = 0; | |
108 | ||
109 | ||
110 | ||
111 | void WorkerThread::kill_worker_threads () | |
112 | { | |
113 | if (numThds > 0) { | |
114 | ||
115 | while (sema_trywait (&wrkthdDONE) == 0) ; // "reset" just in case | |
116 | ||
117 | for (int i=0; i < numThds; i++) | |
118 | wrkthds[i].killThread(); | |
119 | ||
120 | numThds = 0; | |
121 | } | |
122 | ||
123 | if (wrkthds) { | |
124 | delete[] wrkthds; | |
125 | wrkthds = 0; | |
126 | } | |
127 | } | |
128 | ||
129 | ||
130 | ||
131 | int WorkerThread::barrier() | |
132 | { | |
133 | ||
134 | ||
135 | atomic_barrier(&numThds,&BarrierCount,&BarrierLock, | |
136 | &blaze_stop_request,&BarrierTemp); | |
137 | ||
138 | GlobalTimeUsecs = ++simTime; | |
139 | ||
140 | doEventqueCallbacks(); | |
141 | ||
142 | if (first_cpu->config.trace_on) { | |
143 | doTrace(); | |
144 | } | |
145 | ||
146 | int ret = atomic_barrier(&numThds,&BarrierCount,&BarrierLock, | |
147 | &blaze_stop_request,&BarrierTemp); | |
148 | ||
149 | ||
150 | if(cpus[0] == first_cpu) { // VCPU0 | |
151 | ||
152 | // this is *only* used for the "perf" UI command | |
153 | //@@@ and is now broken when umips != kmips @@@ | |
154 | GlobalTicks += the_arch.mips; | |
155 | ||
156 | // all vcpu's execute update_stick() with same stick_incr because of the | |
157 | // first atomic_barrier at entrance of this function | |
158 | stick_incr = the_arch.stick_freq/1000000ull; | |
159 | stick_remainder += the_arch.stick_freq % 1000000ull; | |
160 | if(stick_remainder >= 1000000ull){ | |
161 | // adjust the drift in stick_incr when stick_remainder is more then | |
162 | // 10^6 | |
163 | stick_incr += stick_remainder/1000000ull; | |
164 | stick_remainder %= 1000000ull; | |
165 | } | |
166 | } | |
167 | ||
168 | ||
169 | return ret; | |
170 | } | |
171 | ||
172 | ||
173 | ||
174 | ||
175 | static inline int pstate_is_userp (Vcpu * vcpu) | |
176 | { | |
177 | uint64_t tl, tt, tstate0; // special case TL > 0 | |
178 | vcpu->get_reg (VCPU_PR_TL, &tl); | |
179 | if (tl > 0) vcpu->get_reg (VCPU_PR_TT, &tt); | |
180 | if (tl > 0 && ((tt >= 0x064 && tt <= 0x06f) // tlb miss | |
181 | || (tt >= 0x080 && tt <= 0x0ff))) { // win spill/fill | |
182 | vcpu->get_reg (VCPU_PR_TSTATE, &tstate0); | |
183 | ||
184 | if (tstate0 & (0x0004 << 8)) return 0; | |
185 | else return 1; | |
186 | } | |
187 | ||
188 | uint64_t pstate; // regular case TL == 0 | |
189 | vcpu->get_reg (VCPU_PR_PSTATE, &pstate); | |
190 | return (pstate & 4) ? 0 : 1; | |
191 | } | |
192 | ||
193 | static inline int get_mmu_cntx (Vcpu * vcpu) | |
194 | /* Hack for sw05b, the kcfd process runs in kernel mode and with */ | |
195 | /* primary mmu context 0, but can be identified by its' */ | |
196 | /* secondary mmu context, which is unique the the kcfd (although */ | |
197 | /* you do have to be clever to figure out what that value is!) */ | |
198 | { | |
199 | uint64_t data; /* Cheetah (only?) */ | |
200 | (void) vcpu->get_asi (0x58, 0x10ll, data); /* I/D MMU Secondary cntx */ | |
201 | return (int)data; | |
202 | } | |
203 | ||
204 | ||
205 | static const uint64_t pstate_priv_mask = 0x2ull; | |
206 | ||
207 | void WorkerThread::stept (uint64_t usecs) | |
208 | { | |
209 | int rslt; | |
210 | ||
211 | if (SYSTEM_in_execution_driven_mode()) { | |
212 | stepc(usecs * the_arch.cpu_freq / 1000000ull); | |
213 | return; | |
214 | } | |
215 | ||
216 | if (step_remainder) { | |
217 | // stepi and stept have been intermixed. | |
218 | // execute the leftover instructions, | |
219 | // before simulating 'usecs' | |
220 | called_from_stept = true; | |
221 | stepi (step_remainder); | |
222 | } | |
223 | ||
224 | // When the conditional stepi() call above was called and we hit | |
225 | // a breakpoint point then we should not enter the for loop below. | |
226 | // @@@ this needs to be re-thought, should there be a barrier here @@@ | |
227 | if (!IN_STOP_STATE(barrier ())) { | |
228 | uint64_t kmips = the_arch.kmips; /* re-fetch volatiles every iter */ | |
229 | uint64_t umips = the_arch.umips; | |
230 | uint64_t cmips = the_arch_cmips; | |
231 | int ccntx = the_arch_ccntx; | |
232 | ||
233 | if (the_arch.roundrobin && (num_cpus > 1)) { | |
234 | ||
235 | // round-robin algorithm, with variable user/kernel mips: | |
236 | // | |
237 | // 1. derive user and kernel CPIs from the configured mips and cpu freq: | |
238 | // CPI = MHz/MIPS | |
239 | // 2. When a strand steps by 1 instruction, an associated cycle counter is | |
240 | // incremented by the CPI | |
241 | // 3. We round-robin over all strands, advancing each strand until it catches | |
242 | // up with the other strands on this worker-thread. | |
243 | ||
244 | // For accuracy, CPI cannot be limited to whole numbers. | |
245 | ||
246 | // we use fixed-point arithmetic with 10 bits of | |
247 | // precision, to avoid FP ops in the critical loop. | |
248 | // all quantities named *_x_1024 represent such | |
249 | // fixed-point numbers. | |
250 | ||
251 | int64_t mhz = the_arch.cpu_freq/1e+6; // NOTE: cpu freq must be in whole megahz for accuracy | |
252 | ||
253 | int64_t kernel_cpi_x_1024 = (mhz << 10)/kmips; | |
254 | int64_t user_cpi_x_1024 = (mhz << 10)/umips; | |
255 | ||
256 | int64_t mhz_x_1024 = mhz << 10; | |
257 | ||
258 | for (int usec=0; usec<usecs; usec++) { | |
259 | int cpuid; | |
260 | for (cpuid=0; cpuid<num_cpus; cpuid++) { | |
261 | done_x_1024[cpuid] = 0; | |
262 | } | |
263 | ||
264 | int64_t cdone_x_1024 = 0; // workerthread cycles done | |
265 | ||
266 | // go round-robin, while cycle counter represents less than a usec | |
267 | while(cdone_x_1024 < mhz_x_1024) { | |
268 | ||
269 | int64_t cmin_x_1024 = LLONG_MAX; // min of cycles done across all cpus | |
270 | for (cpuid=0; cpuid<num_cpus; cpuid++) { | |
271 | ||
272 | uint64_t pstate; | |
273 | cpus[cpuid]->get_reg(VCPU_PR_PSTATE, &pstate); | |
274 | int64_t cpi_x_1024 = (pstate & pstate_priv_mask)? kernel_cpi_x_1024 : user_cpi_x_1024; | |
275 | ||
276 | // step this cpu as long as its cycle counter does not exceed all other cpus | |
277 | while(done_x_1024[cpuid] <= cdone_x_1024) { | |
278 | int rslt = cpus[cpuid]->stepi(1); | |
279 | done_x_1024[cpuid] += cpi_x_1024; | |
280 | if (rslt) { | |
281 | BLAZE_STOP(blaze_stop_request); | |
282 | breakpoint_hit (cpus[cpuid]->id()); | |
283 | goto STEPT_LAST_RR; | |
284 | } | |
285 | } // while cycle counter not caught up with rest of the strands on this wt | |
286 | ||
287 | if (done_x_1024[cpuid] < cmin_x_1024) cmin_x_1024 = done_x_1024[cpuid]; | |
288 | ||
289 | } // for each cpuid | |
290 | ||
291 | // move up the threshold for the next round-robin interval | |
292 | cdone_x_1024 = cmin_x_1024; | |
293 | } // while <mhz> cycles not done | |
294 | ||
295 | STEPT_LAST_RR: | |
296 | ||
297 | for (cpuid=0; cpuid<num_cpus; cpuid++) { | |
298 | cpus[cpuid]->update_stick(stick_incr); | |
299 | } | |
300 | ||
301 | if (IN_STOP_STATE(barrier ())) | |
302 | break; | |
303 | ||
304 | } // for usecs | |
305 | } else { // chunky mode: not round robin | |
306 | ||
307 | for (int i = 0; i < usecs; i++) { | |
308 | ||
309 | for(int j = 0; j < num_cpus; j++) { | |
310 | ||
311 | Vcpu * vcpu = cpus[j]; | |
312 | ||
313 | /*---STEPI---*/ | |
314 | if (pstate_is_userp (vcpu)) { | |
315 | rslt = vcpu->stepi (umips); atomic_add_64 (&u_intervals, 1); | |
316 | atomic_add_64 (&u_instrs, umips); | |
317 | } else { | |
318 | if (ccntx != 0 && get_mmu_cntx (vcpu) == ccntx) { | |
319 | rslt = vcpu->stepi (cmips); atomic_add_64 (&k_intervals, 1); | |
320 | atomic_add_64 (&k_instrs, cmips); | |
321 | } else { | |
322 | rslt = vcpu->stepi (kmips); atomic_add_64 (&k_intervals, 1); | |
323 | atomic_add_64 (&k_instrs, kmips); | |
324 | } | |
325 | } | |
326 | /*---STICK---*/ | |
327 | vcpu->update_stick(stick_incr); | |
328 | ||
329 | ||
330 | if (rslt) { | |
331 | BLAZE_STOP(blaze_stop_request); | |
332 | breakpoint_hit (vcpu->id ()); | |
333 | } | |
334 | } // for cpus | |
335 | ||
336 | ||
337 | if (IN_STOP_STATE(barrier ())) | |
338 | break; | |
339 | ||
340 | } // for usecs | |
341 | } | |
342 | } // not already stopped | |
343 | ||
344 | // @@@ we _should_ be able to eliminate this barrier, but then sam crashes, | |
345 | // and I haven't had time to figure out why @@@ | |
346 | // | |
347 | atomic_barrier (&numThds, &BarrierCount, &BarrierLock, | |
348 | &blaze_stop_request, &BarrierTemp); | |
349 | ||
350 | if (cpus[0] == first_cpu) { // VCPU 0 does some extra work... | |
351 | if (IN_STOP_STATE(blaze_stop_request)) { // stop request -> stop | |
352 | BLAZE_STOP(blaze_run_state); | |
353 | BLAZE_CLEAR(blaze_stop_request); | |
354 | } else if (IN_GTSTEP_STATE(blaze_run_state))// time sync -> wait | |
355 | BLAZE_GTWAIT(blaze_run_state); | |
356 | else // anything else -> stop | |
357 | BLAZE_STOP(blaze_run_state); | |
358 | doneftn_t TmpFunc = wrkthdCBFunc; | |
359 | if (TmpFunc != NULL) { | |
360 | wrkthdCBFunc = NULL; // reset first to avoid race ! | |
361 | (*TmpFunc)(wrkthdCBArg); // then `do' callback ! | |
362 | } | |
363 | sema_post (&wrkthdDONE); // ------------------- DONE !!! | |
364 | } | |
365 | } | |
366 | ||
367 | ||
368 | ||
369 | ||
370 | ||
371 | ||
372 | ||
373 | ||
374 | void WorkerThread::stepi(uint64_t n){ | |
375 | ||
376 | uint64_t MIPS = the_arch.mips; // instructions before every sync | |
377 | ||
378 | uint64_t loops; // number of 1 usec loops to execute | |
379 | uint64_t leftover_to_execute; | |
380 | // number of instructions left from n | |
381 | // that have to be executed before return | |
382 | // w/o updating the STICK | |
383 | uint64_t leftovers; | |
384 | // number of instructions that have to be | |
385 | // executed next time stepi is called | |
386 | // before updating STICK. leftovers < MIPS | |
387 | ||
388 | bool update = false; // should update STICK in this call ? | |
389 | // since n could be less then leftovers | |
390 | ||
391 | ||
392 | // loops * mips + leftover_to_execute + step_remainder == n | |
393 | if(step_remainder > n){ | |
394 | loops = 0; | |
395 | leftover_to_execute = n; | |
396 | leftovers = step_remainder - n; | |
397 | update = false; | |
398 | }else{ | |
399 | n -= step_remainder; | |
400 | loops = n/MIPS; | |
401 | leftover_to_execute = n % MIPS; | |
402 | leftovers = MIPS - n % MIPS; | |
403 | if(step_remainder) | |
404 | // do not update if step_remainder was 0 | |
405 | update = true; | |
406 | } | |
407 | ||
408 | ||
409 | if(update){ | |
410 | // update STICK after executing 'step_remainder' | |
411 | for( int j = 0; j < num_cpus ; j++){ | |
412 | if(cpus[j]->stepi(step_remainder) != 0){ | |
413 | // have hit a breakpoint on this strand or cpu, so | |
414 | BLAZE_STOP(blaze_stop_request); | |
415 | breakpoint_hit(cpus[j]->id()); | |
416 | } | |
417 | cpus[j]->update_stick(stick_incr); | |
418 | } | |
419 | ||
420 | int sam_state = barrier(); | |
421 | ||
422 | if (IN_STOP_STATE(sam_state)) | |
423 | // a stop request has come either through UI, or some strand has hit a | |
424 | // breakpoint. Do not wait to complete the rest of usecs. | |
425 | goto STOPNOW; | |
426 | } | |
427 | ||
428 | for (int i = 0; i < loops; i++) { | |
429 | int sam_state; | |
430 | for (int j = 0; j < num_cpus; j++ ) { | |
431 | if(cpus[j]->stepi(MIPS) != 0){ | |
432 | // have hit a breakpoint on this strand or cpu | |
433 | BLAZE_STOP(blaze_stop_request); | |
434 | breakpoint_hit(cpus[j]->id()); | |
435 | // don't break, let other cpus on this workerthread complete | |
436 | // their required number of instructions. | |
437 | } | |
438 | cpus[j]->update_stick(stick_incr); | |
439 | } | |
440 | sam_state = barrier(); | |
441 | ||
442 | if (IN_STOP_STATE(sam_state)) | |
443 | // a stop request has come either through UI, or some strand has hit a | |
444 | // breakpoint. Do not wait to complete the rest of usecs. | |
445 | goto STOPNOW; | |
446 | } | |
447 | ||
448 | ||
449 | // execute the rest of the intructions w/o updating STICK | |
450 | for( int j = 0; j < num_cpus; j++ ) { | |
451 | if(cpus[j]->stepi(leftover_to_execute) != 0){ | |
452 | // have hit a breakpoint on this strand or cpu | |
453 | BLAZE_STOP(blaze_stop_request); | |
454 | breakpoint_hit(cpus[j]->id()); | |
455 | // don't break, let other cpus on this workerthread complete | |
456 | // their required number of instructions. | |
457 | } | |
458 | } | |
459 | ||
460 | ||
461 | STOPNOW: | |
462 | ||
463 | atomic_barrier(&numThds,&BarrierCount,&BarrierLock,&blaze_stop_request,&BarrierTemp); | |
464 | ||
465 | if(cpus[0] == first_cpu){ | |
466 | step_remainder = leftovers; | |
467 | ||
468 | if(called_from_stept){ | |
469 | called_from_stept = false; // do not want to stop here | |
470 | return; | |
471 | } | |
472 | ||
473 | if (IN_STOP_STATE(blaze_stop_request)) { // stop request -> stop | |
474 | BLAZE_STOP(blaze_run_state); | |
475 | BLAZE_CLEAR(blaze_stop_request); | |
476 | } else if (IN_GTSTEP_STATE(blaze_run_state))// time sync -> wait | |
477 | BLAZE_GTWAIT(blaze_run_state); | |
478 | else // anything else -> stop | |
479 | BLAZE_STOP(blaze_run_state); | |
480 | ||
481 | if(wrkthdCBFunc){ | |
482 | doneftn_t TmpFunc = wrkthdCBFunc; | |
483 | if (TmpFunc != NULL) { | |
484 | wrkthdCBFunc = NULL; // reset first to avoid race ! | |
485 | (*TmpFunc)(wrkthdCBArg); // then `do' callback ! | |
486 | } | |
487 | } | |
488 | sema_post (&wrkthdDONE); // ---------- DONE !!! | |
489 | } | |
490 | } | |
491 | ||
492 | ||
493 | // stepc is called in exec-driven mode | |
494 | void WorkerThread::stepc(int64_t ncycles) | |
495 | { | |
496 | int64_t cycles_per_usec = the_arch.cpu_freq / 1000000ull; | |
497 | ||
498 | int64_t i; | |
499 | for (i=step_remainder; i<=ncycles; i++) { | |
500 | int rslt = g_cpu_ex_intf.cycle(1); | |
501 | step_remainder--; | |
502 | ||
503 | if (step_remainder == 0) { | |
504 | for (int j = 0; j < num_cpus; j++) { | |
505 | Vcpu * vcpu = cpus[j]; | |
506 | vcpu->update_stick(stick_incr); | |
507 | } | |
508 | step_remainder = cycles_per_usec; | |
509 | if (IN_STOP_STATE(barrier())) { | |
510 | break; | |
511 | } | |
512 | } | |
513 | ||
514 | if (rslt) { | |
515 | BLAZE_STOP(blaze_stop_request); | |
516 | break; | |
517 | } | |
518 | ||
519 | } | |
520 | ||
521 | // FIXME FIXME FIXME | |
522 | // remember cycles left over until next sync | |
523 | if (IN_STOP_STATE(blaze_stop_request)) { // stop request -> stop | |
524 | BLAZE_STOP(blaze_run_state); | |
525 | BLAZE_CLEAR(blaze_stop_request); | |
526 | } else if (IN_GTSTEP_STATE(blaze_run_state))// time sync -> wait | |
527 | BLAZE_GTWAIT(blaze_run_state); | |
528 | else // anything else -> stop | |
529 | BLAZE_STOP(blaze_run_state); | |
530 | doneftn_t TmpFunc = wrkthdCBFunc; | |
531 | if (TmpFunc != NULL) { | |
532 | wrkthdCBFunc = NULL; // reset first to avoid race ! | |
533 | (*TmpFunc)(wrkthdCBArg); // then `do' callback ! | |
534 | } | |
535 | sema_post (&wrkthdDONE); // ------------------- DONE !!! | |
536 | } // void WorkerThread::stepc() | |
537 | ||
538 | ||
539 | ||
540 | void WorkerThread::dump(FILE * fp){ | |
541 | write_scalar_64 (fp, "GlobalTimeUsecs",GlobalTimeUsecs ); | |
542 | write_scalar_64 (fp, "GlobalTicks",GlobalTicks ); | |
543 | write_scalar_64 (fp, "step_remainder",step_remainder); | |
544 | write_scalar_64 (fp, "stick_remainder",stick_remainder); | |
545 | } | |
546 | ||
547 | int WorkerThread::restore (char * line) | |
548 | { | |
549 | if (sscanf (line, "GlobalTimeUsecs %lli", &GlobalTimeUsecs) == 1) ; | |
550 | else if (sscanf (line, "GlobalTicks %lli", &GlobalTicks) == 1) ; | |
551 | else if (sscanf (line, "stick_remainder %lli", &stick_remainder) == 1) ; | |
552 | ||
553 | // Note that the following two lines restore the same variable. This reflects | |
554 | // a name change of the variable from instr_till_next_stick_update to step_remainder | |
555 | // to reflect its broader purpose in execution-driven runs. The second restore has | |
556 | // been left for backward compatibility to keep the ability to restore older checkpoints. | |
557 | // Please do not remove that line unless you are sure no checkpoint created prior to | |
558 | // 12/13/2007 exists or they have all been patched to rename this variable. | |
559 | // Only one of those lines will ever get executed depending on when the checkpoint was | |
560 | // taken. | |
561 | else if (sscanf (line, "step_remainder %lli", &step_remainder) == 1) ; | |
562 | else if (sscanf (line, "instr_till_next_stick_update %lli", &step_remainder) == 1) ; | |
563 | ||
564 | else return FALSE; | |
565 | ||
566 | return TRUE; | |
567 | } | |
568 | ||
569 | ||
570 | void WorkerThread::create_worker_threads (int NumCpus, int cpusPerThread, int numThreads) | |
571 | { | |
572 | static int FIRST_TIME = 1; // init flag | |
573 | ||
574 | int NumEnabled = 0; | |
575 | int NumPerThread = 0; | |
576 | int NumModThread = 0; | |
577 | ||
578 | ||
579 | int newnum = numThreads; | |
580 | ||
581 | if (newnum == -1) { | |
582 | if (cpusPerThread == -1) { | |
583 | newnum = 1; | |
584 | } else { | |
585 | newnum = NumCpus / cpusPerThread; | |
586 | } | |
587 | } | |
588 | ||
589 | if (newnum <= 0) { | |
590 | ui->warning("invalid conf numthds, using 1\n"); | |
591 | newnum = 1; | |
592 | } else if (newnum > 64) { | |
593 | ui->warning("invalid conf numthds, using 64\n"); | |
594 | newnum = 64; | |
595 | } | |
596 | ||
597 | NumEnabled = 0; | |
598 | for (int i=0; i<=g_vcpu_id_max; i++) | |
599 | if (get_vcpu(i) && cpu_enabled[i]) | |
600 | NumEnabled++; | |
601 | ||
602 | if (NumEnabled == 0) { | |
603 | ui->warning("Invalid: 0 enabled cpus, using 1\n"); | |
604 | NumEnabled = 1; | |
605 | } | |
606 | ||
607 | NumPerThread = NumEnabled / newnum; | |
608 | NumModThread = NumEnabled % newnum; | |
609 | ||
610 | ||
611 | if (FIRST_TIME) { | |
612 | sema_init (&wrkthdDONE, 0, USYNC_THREAD,NULL); | |
613 | // eventque = new EventQue(); | |
614 | assert(pthread_key_create(&key,0) == 0); | |
615 | FIRST_TIME = 0; | |
616 | } | |
617 | ||
618 | if (newnum != WorkerThread::numThds || cpu_enable_changed) { | |
619 | ||
620 | if (newnum > HOSTINFO_numcpus()) { | |
621 | ui->warning("not enough host-cpus(%d) for sim-threads(%d)\n", | |
622 | HOSTINFO_numcpus(), newnum); | |
623 | } | |
624 | ||
625 | ||
626 | // retrieve any existing events on the current workerthread eq's | |
627 | std::list<Event_t *> eventlist; | |
628 | for( int i = 0 ; i < numThds; i++){ | |
629 | while(!wrkthds[i].eq->empty()){ | |
630 | Event_t *event = new Event_t; | |
631 | wrkthds[i].eq->get_top (event); | |
632 | eventlist.push_front(event); | |
633 | } | |
634 | } | |
635 | ||
636 | kill_worker_threads (); | |
637 | wrkthds = new WorkerThread[newnum]; | |
638 | for (int i = 0; i < newnum; i++) | |
639 | wrkthds[i].worker_id = i; | |
640 | ||
641 | // put all events in eventlist on new wrkthd 0 event queue | |
642 | while( !eventlist.empty() ){ | |
643 | Event_t * e = eventlist.front(); | |
644 | wrkthds[0].eq->insert_callback(e->pq_priority, | |
645 | e->pq_cbfunc,e->pq_cbarg1,e->pq_cbarg2, | |
646 | e->pq_cbunload, e->dbgstring, e->worker_id); | |
647 | eventlist.pop_front(); | |
648 | delete e; | |
649 | } | |
650 | ||
651 | ||
652 | // reassign vcpu's to the worker threads | |
653 | int nextcpu = 0; | |
654 | first_cpu = 0; | |
655 | for (int i = 0; i < newnum; i++) { | |
656 | // number of vcpu's for this worker thread | |
657 | wrkthds[i].num_cpus = NumPerThread + (i < NumModThread); | |
658 | ||
659 | for (int j = 0; j < wrkthds[i].num_cpus; j++) | |
660 | { | |
661 | // find next vcpu | |
662 | Vcpu* vcpu = get_vcpu(nextcpu); | |
663 | for (int k=nextcpu; k<=g_vcpu_id_max; k++) | |
664 | { | |
665 | if(!vcpu || !cpu_enabled[nextcpu]) // skip not enabled | |
666 | vcpu =get_vcpu(++nextcpu); | |
667 | else | |
668 | break; | |
669 | } | |
670 | ||
671 | if (first_cpu == 0) | |
672 | first_cpu = vcpu; | |
673 | ||
674 | wrkthds[i].cpus[j] = vcpu; | |
675 | nextcpu++; | |
676 | } | |
677 | } | |
678 | ||
679 | for (int i=0; i<newnum; i++) { | |
680 | ui->verbose("cpu-worker-thread[%d] ",i); | |
681 | wrkthds[i].info(); | |
682 | } | |
683 | ||
684 | WorkerThread::numThds = newnum; | |
685 | cpu_enable_changed = false; | |
686 | } | |
687 | } |