| 1 | // ========== Copyright Header Begin ========================================== |
| 2 | // |
| 3 | // OpenSPARC T2 Processor File: workerthreads.cc |
| 4 | // Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. |
| 5 | // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. |
| 6 | // |
| 7 | // The above named program is free software; you can redistribute it and/or |
| 8 | // modify it under the terms of the GNU General Public |
| 9 | // License version 2 as published by the Free Software Foundation. |
| 10 | // |
| 11 | // The above named program is distributed in the hope that it will be |
| 12 | // useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | // General Public License for more details. |
| 15 | // |
| 16 | // You should have received a copy of the GNU General Public |
| 17 | // License along with this work; if not, write to the Free Software |
| 18 | // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. |
| 19 | // |
| 20 | // ========== Copyright Header End ============================================ |
| 21 | /* |
| 22 | * Copyright (C) 2001,2005 Sun Microsystems, Inc. |
| 23 | * All rights reserved. |
| 24 | */ |
| 25 | #pragma ident "%%1.27 06/12/14 %%" |
| 26 | |
| 27 | /* |
| 28 | "threads.cc" |
| 29 | |
| 30 | Supports multiple cpu simulation threads that "system.cc" |
| 31 | can signal and wait for, ie that live for the duration of blaze rather |
| 32 | than being created/destroyed for each UI command. |
| 33 | |
| 34 | These threads can however be destroyed/re-created, because we support |
| 35 | changing numthreads (which indirectly changes cpus-per-thread), as |
| 36 | well as changing the enabled state of individual cpus (which changes |
| 37 | the number-of-cpus to run, and hence the distribution of running cpus |
| 38 | across the worker threads). |
| 39 | |
| 40 | */ |
| 41 | |
| 42 | #include "ui.h" |
| 43 | #include "workerthread.h" |
| 44 | |
| 45 | |
| 46 | extern int blaze_debug; /* main/ui_cmds */ |
| 47 | extern int blaze_option; |
| 48 | |
| 49 | |
| 50 | |
| 51 | |
| 52 | // Vcpu0 calls these doneftn_t with void * arg, when all the workerthreads have |
| 53 | // done their required execution. |
| 54 | doneftn_t volatile wrkthdCBFunc = NULL; /* NB-one-shot callback ftn */ |
| 55 | void * volatile wrkthdCBArg = NULL; /* and its callback arg */ |
| 56 | |
| 57 | |
| 58 | // system level event queue |
| 59 | // static EventQue *eventque; |
| 60 | |
| 61 | sema_t wrkthdDONE; // DONE semaphore. Vcpu0 workerthread signals on |
| 62 | // this semaphore when all the worker threads are done |
| 63 | // UI threads typically hang on this sema while |
| 64 | // waiting for stepi, stept to finish |
| 65 | |
| 66 | |
| 67 | extern void write_scalar_64 (FILE *fp, const char * name, uint64_t v); |
| 68 | extern bool_t read_scalar_64 (FILE *fp, const char * name, uint64_t *v); |
| 69 | |
| 70 | static void inline breakpoint_hit(int cpu_id) |
| 71 | { |
| 72 | ui->output("cpu[%i] hit a breakpoint. stop.. \n", cpu_id); |
| 73 | } |
| 74 | |
| 75 | volatile int WorkerThread::BarrierCount = 0; |
| 76 | volatile int WorkerThread::BarrierLock = 0; |
| 77 | volatile int WorkerThread::BarrierTemp = 0; |
| 78 | int64_t WorkerThread::Nusecs = 0; |
| 79 | int64_t WorkerThread::Ninstrs = 0; |
| 80 | int64_t WorkerThread::Ncycles = 0; |
| 81 | |
| 82 | // step_remainder holds the number of instructions (in normal mode) |
| 83 | // or cycles (in execution-driven mode) before the next stick_update needs |
| 84 | // to happen. This usually happens when a simulation is stopped between two |
| 85 | // stick updates (usually 1 microsecond of simulated time). |
| 86 | // Implementation note: this used to be known as instrs_till_next_stick_update |
| 87 | // before it was needed in execution-driven mode. |
| 88 | volatile uint64_t WorkerThread::step_remainder = 0; |
| 89 | |
| 90 | volatile int WorkerThread::numThds = 0; |
| 91 | WorkerThread * WorkerThread::wrkthds = 0; |
| 92 | pthread_key_t WorkerThread::key = 0; |
| 93 | volatile uint64_t WorkerThread::GlobalTimeUsecs = 0; |
| 94 | volatile int64_t WorkerThread::GlobalTicks = 0; |
| 95 | |
| 96 | volatile uint64_t WorkerThread::stick_incr = 0; |
| 97 | volatile uint64_t WorkerThread::stick_remainder = 0; |
| 98 | |
| 99 | /* static */ Vcpu * WorkerThread::first_cpu = 0; // first cpu has some special work to do |
| 100 | |
| 101 | |
| 102 | |
| 103 | volatile int64_t WorkerThread::u_intervals = 0; |
| 104 | volatile int64_t WorkerThread::k_intervals = 0; |
| 105 | |
| 106 | volatile int64_t WorkerThread::u_instrs = 0; |
| 107 | volatile int64_t WorkerThread::k_instrs = 0; |
| 108 | |
| 109 | |
| 110 | |
| 111 | void WorkerThread::kill_worker_threads () |
| 112 | { |
| 113 | if (numThds > 0) { |
| 114 | |
| 115 | while (sema_trywait (&wrkthdDONE) == 0) ; // "reset" just in case |
| 116 | |
| 117 | for (int i=0; i < numThds; i++) |
| 118 | wrkthds[i].killThread(); |
| 119 | |
| 120 | numThds = 0; |
| 121 | } |
| 122 | |
| 123 | if (wrkthds) { |
| 124 | delete[] wrkthds; |
| 125 | wrkthds = 0; |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | |
| 130 | |
| 131 | int WorkerThread::barrier() |
| 132 | { |
| 133 | |
| 134 | |
| 135 | atomic_barrier(&numThds,&BarrierCount,&BarrierLock, |
| 136 | &blaze_stop_request,&BarrierTemp); |
| 137 | |
| 138 | GlobalTimeUsecs = ++simTime; |
| 139 | |
| 140 | doEventqueCallbacks(); |
| 141 | |
| 142 | if (first_cpu->config.trace_on) { |
| 143 | doTrace(); |
| 144 | } |
| 145 | |
| 146 | int ret = atomic_barrier(&numThds,&BarrierCount,&BarrierLock, |
| 147 | &blaze_stop_request,&BarrierTemp); |
| 148 | |
| 149 | |
| 150 | if(cpus[0] == first_cpu) { // VCPU0 |
| 151 | |
| 152 | // this is *only* used for the "perf" UI command |
| 153 | //@@@ and is now broken when umips != kmips @@@ |
| 154 | GlobalTicks += the_arch.mips; |
| 155 | |
| 156 | // all vcpu's execute update_stick() with same stick_incr because of the |
| 157 | // first atomic_barrier at entrance of this function |
| 158 | stick_incr = the_arch.stick_freq/1000000ull; |
| 159 | stick_remainder += the_arch.stick_freq % 1000000ull; |
| 160 | if(stick_remainder >= 1000000ull){ |
| 161 | // adjust the drift in stick_incr when stick_remainder is more then |
| 162 | // 10^6 |
| 163 | stick_incr += stick_remainder/1000000ull; |
| 164 | stick_remainder %= 1000000ull; |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | |
| 169 | return ret; |
| 170 | } |
| 171 | |
| 172 | |
| 173 | |
| 174 | |
| 175 | static inline int pstate_is_userp (Vcpu * vcpu) |
| 176 | { |
| 177 | uint64_t tl, tt, tstate0; // special case TL > 0 |
| 178 | vcpu->get_reg (VCPU_PR_TL, &tl); |
| 179 | if (tl > 0) vcpu->get_reg (VCPU_PR_TT, &tt); |
| 180 | if (tl > 0 && ((tt >= 0x064 && tt <= 0x06f) // tlb miss |
| 181 | || (tt >= 0x080 && tt <= 0x0ff))) { // win spill/fill |
| 182 | vcpu->get_reg (VCPU_PR_TSTATE, &tstate0); |
| 183 | |
| 184 | if (tstate0 & (0x0004 << 8)) return 0; |
| 185 | else return 1; |
| 186 | } |
| 187 | |
| 188 | uint64_t pstate; // regular case TL == 0 |
| 189 | vcpu->get_reg (VCPU_PR_PSTATE, &pstate); |
| 190 | return (pstate & 4) ? 0 : 1; |
| 191 | } |
| 192 | |
| 193 | static inline int get_mmu_cntx (Vcpu * vcpu) |
| 194 | /* Hack for sw05b, the kcfd process runs in kernel mode and with */ |
| 195 | /* primary mmu context 0, but can be identified by its' */ |
| 196 | /* secondary mmu context, which is unique the the kcfd (although */ |
| 197 | /* you do have to be clever to figure out what that value is!) */ |
| 198 | { |
| 199 | uint64_t data; /* Cheetah (only?) */ |
| 200 | (void) vcpu->get_asi (0x58, 0x10ll, data); /* I/D MMU Secondary cntx */ |
| 201 | return (int)data; |
| 202 | } |
| 203 | |
| 204 | |
| 205 | static const uint64_t pstate_priv_mask = 0x2ull; |
| 206 | |
| 207 | void WorkerThread::stept (uint64_t usecs) |
| 208 | { |
| 209 | int rslt; |
| 210 | |
| 211 | if (SYSTEM_in_execution_driven_mode()) { |
| 212 | stepc(usecs * the_arch.cpu_freq / 1000000ull); |
| 213 | return; |
| 214 | } |
| 215 | |
| 216 | if (step_remainder) { |
| 217 | // stepi and stept have been intermixed. |
| 218 | // execute the leftover instructions, |
| 219 | // before simulating 'usecs' |
| 220 | called_from_stept = true; |
| 221 | stepi (step_remainder); |
| 222 | } |
| 223 | |
| 224 | // When the conditional stepi() call above was called and we hit |
| 225 | // a breakpoint point then we should not enter the for loop below. |
| 226 | // @@@ this needs to be re-thought, should there be a barrier here @@@ |
| 227 | if (!IN_STOP_STATE(barrier ())) { |
| 228 | uint64_t kmips = the_arch.kmips; /* re-fetch volatiles every iter */ |
| 229 | uint64_t umips = the_arch.umips; |
| 230 | uint64_t cmips = the_arch_cmips; |
| 231 | int ccntx = the_arch_ccntx; |
| 232 | |
| 233 | if (the_arch.roundrobin && (num_cpus > 1)) { |
| 234 | |
| 235 | // round-robin algorithm, with variable user/kernel mips: |
| 236 | // |
| 237 | // 1. derive user and kernel CPIs from the configured mips and cpu freq: |
| 238 | // CPI = MHz/MIPS |
| 239 | // 2. When a strand steps by 1 instruction, an associated cycle counter is |
| 240 | // incremented by the CPI |
| 241 | // 3. We round-robin over all strands, advancing each strand until it catches |
| 242 | // up with the other strands on this worker-thread. |
| 243 | |
| 244 | // For accuracy, CPI cannot be limited to whole numbers. |
| 245 | |
| 246 | // we use fixed-point arithmetic with 10 bits of |
| 247 | // precision, to avoid FP ops in the critical loop. |
| 248 | // all quantities named *_x_1024 represent such |
| 249 | // fixed-point numbers. |
| 250 | |
| 251 | int64_t mhz = the_arch.cpu_freq/1e+6; // NOTE: cpu freq must be in whole megahz for accuracy |
| 252 | |
| 253 | int64_t kernel_cpi_x_1024 = (mhz << 10)/kmips; |
| 254 | int64_t user_cpi_x_1024 = (mhz << 10)/umips; |
| 255 | |
| 256 | int64_t mhz_x_1024 = mhz << 10; |
| 257 | |
| 258 | for (int usec=0; usec<usecs; usec++) { |
| 259 | int cpuid; |
| 260 | for (cpuid=0; cpuid<num_cpus; cpuid++) { |
| 261 | done_x_1024[cpuid] = 0; |
| 262 | } |
| 263 | |
| 264 | int64_t cdone_x_1024 = 0; // workerthread cycles done |
| 265 | |
| 266 | // go round-robin, while cycle counter represents less than a usec |
| 267 | while(cdone_x_1024 < mhz_x_1024) { |
| 268 | |
| 269 | int64_t cmin_x_1024 = LLONG_MAX; // min of cycles done across all cpus |
| 270 | for (cpuid=0; cpuid<num_cpus; cpuid++) { |
| 271 | |
| 272 | uint64_t pstate; |
| 273 | cpus[cpuid]->get_reg(VCPU_PR_PSTATE, &pstate); |
| 274 | int64_t cpi_x_1024 = (pstate & pstate_priv_mask)? kernel_cpi_x_1024 : user_cpi_x_1024; |
| 275 | |
| 276 | // step this cpu as long as its cycle counter does not exceed all other cpus |
| 277 | while(done_x_1024[cpuid] <= cdone_x_1024) { |
| 278 | int rslt = cpus[cpuid]->stepi(1); |
| 279 | done_x_1024[cpuid] += cpi_x_1024; |
| 280 | if (rslt) { |
| 281 | BLAZE_STOP(blaze_stop_request); |
| 282 | breakpoint_hit (cpus[cpuid]->id()); |
| 283 | goto STEPT_LAST_RR; |
| 284 | } |
| 285 | } // while cycle counter not caught up with rest of the strands on this wt |
| 286 | |
| 287 | if (done_x_1024[cpuid] < cmin_x_1024) cmin_x_1024 = done_x_1024[cpuid]; |
| 288 | |
| 289 | } // for each cpuid |
| 290 | |
| 291 | // move up the threshold for the next round-robin interval |
| 292 | cdone_x_1024 = cmin_x_1024; |
| 293 | } // while <mhz> cycles not done |
| 294 | |
| 295 | STEPT_LAST_RR: |
| 296 | |
| 297 | for (cpuid=0; cpuid<num_cpus; cpuid++) { |
| 298 | cpus[cpuid]->update_stick(stick_incr); |
| 299 | } |
| 300 | |
| 301 | if (IN_STOP_STATE(barrier ())) |
| 302 | break; |
| 303 | |
| 304 | } // for usecs |
| 305 | } else { // chunky mode: not round robin |
| 306 | |
| 307 | for (int i = 0; i < usecs; i++) { |
| 308 | |
| 309 | for(int j = 0; j < num_cpus; j++) { |
| 310 | |
| 311 | Vcpu * vcpu = cpus[j]; |
| 312 | |
| 313 | /*---STEPI---*/ |
| 314 | if (pstate_is_userp (vcpu)) { |
| 315 | rslt = vcpu->stepi (umips); atomic_add_64 (&u_intervals, 1); |
| 316 | atomic_add_64 (&u_instrs, umips); |
| 317 | } else { |
| 318 | if (ccntx != 0 && get_mmu_cntx (vcpu) == ccntx) { |
| 319 | rslt = vcpu->stepi (cmips); atomic_add_64 (&k_intervals, 1); |
| 320 | atomic_add_64 (&k_instrs, cmips); |
| 321 | } else { |
| 322 | rslt = vcpu->stepi (kmips); atomic_add_64 (&k_intervals, 1); |
| 323 | atomic_add_64 (&k_instrs, kmips); |
| 324 | } |
| 325 | } |
| 326 | /*---STICK---*/ |
| 327 | vcpu->update_stick(stick_incr); |
| 328 | |
| 329 | |
| 330 | if (rslt) { |
| 331 | BLAZE_STOP(blaze_stop_request); |
| 332 | breakpoint_hit (vcpu->id ()); |
| 333 | } |
| 334 | } // for cpus |
| 335 | |
| 336 | |
| 337 | if (IN_STOP_STATE(barrier ())) |
| 338 | break; |
| 339 | |
| 340 | } // for usecs |
| 341 | } |
| 342 | } // not already stopped |
| 343 | |
| 344 | // @@@ we _should_ be able to eliminate this barrier, but then sam crashes, |
| 345 | // and I haven't had time to figure out why @@@ |
| 346 | // |
| 347 | atomic_barrier (&numThds, &BarrierCount, &BarrierLock, |
| 348 | &blaze_stop_request, &BarrierTemp); |
| 349 | |
| 350 | if (cpus[0] == first_cpu) { // VCPU 0 does some extra work... |
| 351 | if (IN_STOP_STATE(blaze_stop_request)) { // stop request -> stop |
| 352 | BLAZE_STOP(blaze_run_state); |
| 353 | BLAZE_CLEAR(blaze_stop_request); |
| 354 | } else if (IN_GTSTEP_STATE(blaze_run_state))// time sync -> wait |
| 355 | BLAZE_GTWAIT(blaze_run_state); |
| 356 | else // anything else -> stop |
| 357 | BLAZE_STOP(blaze_run_state); |
| 358 | doneftn_t TmpFunc = wrkthdCBFunc; |
| 359 | if (TmpFunc != NULL) { |
| 360 | wrkthdCBFunc = NULL; // reset first to avoid race ! |
| 361 | (*TmpFunc)(wrkthdCBArg); // then `do' callback ! |
| 362 | } |
| 363 | sema_post (&wrkthdDONE); // ------------------- DONE !!! |
| 364 | } |
| 365 | } |
| 366 | |
| 367 | |
| 368 | |
| 369 | |
| 370 | |
| 371 | |
| 372 | |
| 373 | |
| 374 | void WorkerThread::stepi(uint64_t n){ |
| 375 | |
| 376 | uint64_t MIPS = the_arch.mips; // instructions before every sync |
| 377 | |
| 378 | uint64_t loops; // number of 1 usec loops to execute |
| 379 | uint64_t leftover_to_execute; |
| 380 | // number of instructions left from n |
| 381 | // that have to be executed before return |
| 382 | // w/o updating the STICK |
| 383 | uint64_t leftovers; |
| 384 | // number of instructions that have to be |
| 385 | // executed next time stepi is called |
| 386 | // before updating STICK. leftovers < MIPS |
| 387 | |
| 388 | bool update = false; // should update STICK in this call ? |
| 389 | // since n could be less then leftovers |
| 390 | |
| 391 | |
| 392 | // loops * mips + leftover_to_execute + step_remainder == n |
| 393 | if(step_remainder > n){ |
| 394 | loops = 0; |
| 395 | leftover_to_execute = n; |
| 396 | leftovers = step_remainder - n; |
| 397 | update = false; |
| 398 | }else{ |
| 399 | n -= step_remainder; |
| 400 | loops = n/MIPS; |
| 401 | leftover_to_execute = n % MIPS; |
| 402 | leftovers = MIPS - n % MIPS; |
| 403 | if(step_remainder) |
| 404 | // do not update if step_remainder was 0 |
| 405 | update = true; |
| 406 | } |
| 407 | |
| 408 | |
| 409 | if(update){ |
| 410 | // update STICK after executing 'step_remainder' |
| 411 | for( int j = 0; j < num_cpus ; j++){ |
| 412 | if(cpus[j]->stepi(step_remainder) != 0){ |
| 413 | // have hit a breakpoint on this strand or cpu, so |
| 414 | BLAZE_STOP(blaze_stop_request); |
| 415 | breakpoint_hit(cpus[j]->id()); |
| 416 | } |
| 417 | cpus[j]->update_stick(stick_incr); |
| 418 | } |
| 419 | |
| 420 | int sam_state = barrier(); |
| 421 | |
| 422 | if (IN_STOP_STATE(sam_state)) |
| 423 | // a stop request has come either through UI, or some strand has hit a |
| 424 | // breakpoint. Do not wait to complete the rest of usecs. |
| 425 | goto STOPNOW; |
| 426 | } |
| 427 | |
| 428 | for (int i = 0; i < loops; i++) { |
| 429 | int sam_state; |
| 430 | for (int j = 0; j < num_cpus; j++ ) { |
| 431 | if(cpus[j]->stepi(MIPS) != 0){ |
| 432 | // have hit a breakpoint on this strand or cpu |
| 433 | BLAZE_STOP(blaze_stop_request); |
| 434 | breakpoint_hit(cpus[j]->id()); |
| 435 | // don't break, let other cpus on this workerthread complete |
| 436 | // their required number of instructions. |
| 437 | } |
| 438 | cpus[j]->update_stick(stick_incr); |
| 439 | } |
| 440 | sam_state = barrier(); |
| 441 | |
| 442 | if (IN_STOP_STATE(sam_state)) |
| 443 | // a stop request has come either through UI, or some strand has hit a |
| 444 | // breakpoint. Do not wait to complete the rest of usecs. |
| 445 | goto STOPNOW; |
| 446 | } |
| 447 | |
| 448 | |
| 449 | // execute the rest of the intructions w/o updating STICK |
| 450 | for( int j = 0; j < num_cpus; j++ ) { |
| 451 | if(cpus[j]->stepi(leftover_to_execute) != 0){ |
| 452 | // have hit a breakpoint on this strand or cpu |
| 453 | BLAZE_STOP(blaze_stop_request); |
| 454 | breakpoint_hit(cpus[j]->id()); |
| 455 | // don't break, let other cpus on this workerthread complete |
| 456 | // their required number of instructions. |
| 457 | } |
| 458 | } |
| 459 | |
| 460 | |
| 461 | STOPNOW: |
| 462 | |
| 463 | atomic_barrier(&numThds,&BarrierCount,&BarrierLock,&blaze_stop_request,&BarrierTemp); |
| 464 | |
| 465 | if(cpus[0] == first_cpu){ |
| 466 | step_remainder = leftovers; |
| 467 | |
| 468 | if(called_from_stept){ |
| 469 | called_from_stept = false; // do not want to stop here |
| 470 | return; |
| 471 | } |
| 472 | |
| 473 | if (IN_STOP_STATE(blaze_stop_request)) { // stop request -> stop |
| 474 | BLAZE_STOP(blaze_run_state); |
| 475 | BLAZE_CLEAR(blaze_stop_request); |
| 476 | } else if (IN_GTSTEP_STATE(blaze_run_state))// time sync -> wait |
| 477 | BLAZE_GTWAIT(blaze_run_state); |
| 478 | else // anything else -> stop |
| 479 | BLAZE_STOP(blaze_run_state); |
| 480 | |
| 481 | if(wrkthdCBFunc){ |
| 482 | doneftn_t TmpFunc = wrkthdCBFunc; |
| 483 | if (TmpFunc != NULL) { |
| 484 | wrkthdCBFunc = NULL; // reset first to avoid race ! |
| 485 | (*TmpFunc)(wrkthdCBArg); // then `do' callback ! |
| 486 | } |
| 487 | } |
| 488 | sema_post (&wrkthdDONE); // ---------- DONE !!! |
| 489 | } |
| 490 | } |
| 491 | |
| 492 | |
| 493 | // stepc is called in exec-driven mode |
| 494 | void WorkerThread::stepc(int64_t ncycles) |
| 495 | { |
| 496 | int64_t cycles_per_usec = the_arch.cpu_freq / 1000000ull; |
| 497 | |
| 498 | int64_t i; |
| 499 | for (i=step_remainder; i<=ncycles; i++) { |
| 500 | int rslt = g_cpu_ex_intf.cycle(1); |
| 501 | step_remainder--; |
| 502 | |
| 503 | if (step_remainder == 0) { |
| 504 | for (int j = 0; j < num_cpus; j++) { |
| 505 | Vcpu * vcpu = cpus[j]; |
| 506 | vcpu->update_stick(stick_incr); |
| 507 | } |
| 508 | step_remainder = cycles_per_usec; |
| 509 | if (IN_STOP_STATE(barrier())) { |
| 510 | break; |
| 511 | } |
| 512 | } |
| 513 | |
| 514 | if (rslt) { |
| 515 | BLAZE_STOP(blaze_stop_request); |
| 516 | break; |
| 517 | } |
| 518 | |
| 519 | } |
| 520 | |
| 521 | // FIXME FIXME FIXME |
| 522 | // remember cycles left over until next sync |
| 523 | if (IN_STOP_STATE(blaze_stop_request)) { // stop request -> stop |
| 524 | BLAZE_STOP(blaze_run_state); |
| 525 | BLAZE_CLEAR(blaze_stop_request); |
| 526 | } else if (IN_GTSTEP_STATE(blaze_run_state))// time sync -> wait |
| 527 | BLAZE_GTWAIT(blaze_run_state); |
| 528 | else // anything else -> stop |
| 529 | BLAZE_STOP(blaze_run_state); |
| 530 | doneftn_t TmpFunc = wrkthdCBFunc; |
| 531 | if (TmpFunc != NULL) { |
| 532 | wrkthdCBFunc = NULL; // reset first to avoid race ! |
| 533 | (*TmpFunc)(wrkthdCBArg); // then `do' callback ! |
| 534 | } |
| 535 | sema_post (&wrkthdDONE); // ------------------- DONE !!! |
| 536 | } // void WorkerThread::stepc() |
| 537 | |
| 538 | |
| 539 | |
| 540 | void WorkerThread::dump(FILE * fp){ |
| 541 | write_scalar_64 (fp, "GlobalTimeUsecs",GlobalTimeUsecs ); |
| 542 | write_scalar_64 (fp, "GlobalTicks",GlobalTicks ); |
| 543 | write_scalar_64 (fp, "step_remainder",step_remainder); |
| 544 | write_scalar_64 (fp, "stick_remainder",stick_remainder); |
| 545 | } |
| 546 | |
| 547 | int WorkerThread::restore (char * line) |
| 548 | { |
| 549 | if (sscanf (line, "GlobalTimeUsecs %lli", &GlobalTimeUsecs) == 1) ; |
| 550 | else if (sscanf (line, "GlobalTicks %lli", &GlobalTicks) == 1) ; |
| 551 | else if (sscanf (line, "stick_remainder %lli", &stick_remainder) == 1) ; |
| 552 | |
| 553 | // Note that the following two lines restore the same variable. This reflects |
| 554 | // a name change of the variable from instr_till_next_stick_update to step_remainder |
| 555 | // to reflect its broader purpose in execution-driven runs. The second restore has |
| 556 | // been left for backward compatibility to keep the ability to restore older checkpoints. |
| 557 | // Please do not remove that line unless you are sure no checkpoint created prior to |
| 558 | // 12/13/2007 exists or they have all been patched to rename this variable. |
| 559 | // Only one of those lines will ever get executed depending on when the checkpoint was |
| 560 | // taken. |
| 561 | else if (sscanf (line, "step_remainder %lli", &step_remainder) == 1) ; |
| 562 | else if (sscanf (line, "instr_till_next_stick_update %lli", &step_remainder) == 1) ; |
| 563 | |
| 564 | else return FALSE; |
| 565 | |
| 566 | return TRUE; |
| 567 | } |
| 568 | |
| 569 | |
| 570 | void WorkerThread::create_worker_threads (int NumCpus, int cpusPerThread, int numThreads) |
| 571 | { |
| 572 | static int FIRST_TIME = 1; // init flag |
| 573 | |
| 574 | int NumEnabled = 0; |
| 575 | int NumPerThread = 0; |
| 576 | int NumModThread = 0; |
| 577 | |
| 578 | |
| 579 | int newnum = numThreads; |
| 580 | |
| 581 | if (newnum == -1) { |
| 582 | if (cpusPerThread == -1) { |
| 583 | newnum = 1; |
| 584 | } else { |
| 585 | newnum = NumCpus / cpusPerThread; |
| 586 | } |
| 587 | } |
| 588 | |
| 589 | if (newnum <= 0) { |
| 590 | ui->warning("invalid conf numthds, using 1\n"); |
| 591 | newnum = 1; |
| 592 | } else if (newnum > 64) { |
| 593 | ui->warning("invalid conf numthds, using 64\n"); |
| 594 | newnum = 64; |
| 595 | } |
| 596 | |
| 597 | NumEnabled = 0; |
| 598 | for (int i=0; i<=g_vcpu_id_max; i++) |
| 599 | if (get_vcpu(i) && cpu_enabled[i]) |
| 600 | NumEnabled++; |
| 601 | |
| 602 | if (NumEnabled == 0) { |
| 603 | ui->warning("Invalid: 0 enabled cpus, using 1\n"); |
| 604 | NumEnabled = 1; |
| 605 | } |
| 606 | |
| 607 | NumPerThread = NumEnabled / newnum; |
| 608 | NumModThread = NumEnabled % newnum; |
| 609 | |
| 610 | |
| 611 | if (FIRST_TIME) { |
| 612 | sema_init (&wrkthdDONE, 0, USYNC_THREAD,NULL); |
| 613 | // eventque = new EventQue(); |
| 614 | assert(pthread_key_create(&key,0) == 0); |
| 615 | FIRST_TIME = 0; |
| 616 | } |
| 617 | |
| 618 | if (newnum != WorkerThread::numThds || cpu_enable_changed) { |
| 619 | |
| 620 | if (newnum > HOSTINFO_numcpus()) { |
| 621 | ui->warning("not enough host-cpus(%d) for sim-threads(%d)\n", |
| 622 | HOSTINFO_numcpus(), newnum); |
| 623 | } |
| 624 | |
| 625 | |
| 626 | // retrieve any existing events on the current workerthread eq's |
| 627 | std::list<Event_t *> eventlist; |
| 628 | for( int i = 0 ; i < numThds; i++){ |
| 629 | while(!wrkthds[i].eq->empty()){ |
| 630 | Event_t *event = new Event_t; |
| 631 | wrkthds[i].eq->get_top (event); |
| 632 | eventlist.push_front(event); |
| 633 | } |
| 634 | } |
| 635 | |
| 636 | kill_worker_threads (); |
| 637 | wrkthds = new WorkerThread[newnum]; |
| 638 | for (int i = 0; i < newnum; i++) |
| 639 | wrkthds[i].worker_id = i; |
| 640 | |
| 641 | // put all events in eventlist on new wrkthd 0 event queue |
| 642 | while( !eventlist.empty() ){ |
| 643 | Event_t * e = eventlist.front(); |
| 644 | wrkthds[0].eq->insert_callback(e->pq_priority, |
| 645 | e->pq_cbfunc,e->pq_cbarg1,e->pq_cbarg2, |
| 646 | e->pq_cbunload, e->dbgstring, e->worker_id); |
| 647 | eventlist.pop_front(); |
| 648 | delete e; |
| 649 | } |
| 650 | |
| 651 | |
| 652 | // reassign vcpu's to the worker threads |
| 653 | int nextcpu = 0; |
| 654 | first_cpu = 0; |
| 655 | for (int i = 0; i < newnum; i++) { |
| 656 | // number of vcpu's for this worker thread |
| 657 | wrkthds[i].num_cpus = NumPerThread + (i < NumModThread); |
| 658 | |
| 659 | for (int j = 0; j < wrkthds[i].num_cpus; j++) |
| 660 | { |
| 661 | // find next vcpu |
| 662 | Vcpu* vcpu = get_vcpu(nextcpu); |
| 663 | for (int k=nextcpu; k<=g_vcpu_id_max; k++) |
| 664 | { |
| 665 | if(!vcpu || !cpu_enabled[nextcpu]) // skip not enabled |
| 666 | vcpu =get_vcpu(++nextcpu); |
| 667 | else |
| 668 | break; |
| 669 | } |
| 670 | |
| 671 | if (first_cpu == 0) |
| 672 | first_cpu = vcpu; |
| 673 | |
| 674 | wrkthds[i].cpus[j] = vcpu; |
| 675 | nextcpu++; |
| 676 | } |
| 677 | } |
| 678 | |
| 679 | for (int i=0; i<newnum; i++) { |
| 680 | ui->verbose("cpu-worker-thread[%d] ",i); |
| 681 | wrkthds[i].info(); |
| 682 | } |
| 683 | |
| 684 | WorkerThread::numThds = newnum; |
| 685 | cpu_enable_changed = false; |
| 686 | } |
| 687 | } |