| 1 | // ========== Copyright Header Begin ========================================== |
| 2 | // |
| 3 | // OpenSPARC T2 Processor File: compress_engine.C |
| 4 | // Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. |
| 5 | // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. |
| 6 | // |
| 7 | // The above named program is free software; you can redistribute it and/or |
| 8 | // modify it under the terms of the GNU General Public |
| 9 | // License version 2 as published by the Free Software Foundation. |
| 10 | // |
| 11 | // The above named program is distributed in the hope that it will be |
| 12 | // useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | // General Public License for more details. |
| 15 | // |
| 16 | // You should have received a copy of the GNU General Public |
| 17 | // License along with this work; if not, write to the Free Software |
| 18 | // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. |
| 19 | // |
| 20 | // ========== Copyright Header End ============================================ |
| 21 | /* compress_engine.C */ |
| 22 | |
| 23 | #include <stdio.h> |
| 24 | #include <stdlib.h> |
| 25 | #include <string.h> |
| 26 | #include <assert.h> |
| 27 | |
| 28 | #include "rstf/rstf.h" |
| 29 | |
| 30 | #if defined(ARCH_AMD64) |
| 31 | #include "rstf/rstf_convert.h" |
| 32 | #endif |
| 33 | |
| 34 | #include "rstzip3.h" |
| 35 | #include "rz3_section.h" |
| 36 | |
| 37 | #include "rz3iu.h" |
| 38 | |
| 39 | /* debug stuff */ |
| 40 | static const bool dbg_ras = false; |
| 41 | static const bool dbg_regid = false; |
| 42 | |
| 43 | |
| 44 | // rstbufsize <= rz3_bufsize |
| 45 | int rstzip3::compress_buffer(rstf_unionT * rstbuf, int rstbufsize) |
| 46 | { |
| 47 | |
| 48 | shdr->clear(); |
| 49 | sdata->clear(); |
| 50 | |
| 51 | // set shdr->clearflag if records_since_prev_clear >= clear_interval |
| 52 | // clear predictor tables in tdata if shdr->clearflag is set |
| 53 | |
| 54 | // if (verbose) clear_stats(); |
| 55 | clear_stats(); |
| 56 | |
| 57 | // write record count to header |
| 58 | shdr->nrecords = rstbufsize; |
| 59 | |
| 60 | int i; |
| 61 | for (i=0; i<rstbufsize; i++) { |
| 62 | if (rfs_phase) { |
| 63 | if (rfs_cw_phase) { |
| 64 | if (rstbuf[i].proto.rtype == RFS_CW_T) { |
| 65 | sdata->bitarrays[rfs_rtype_pred_array]->Push(1); |
| 66 | rfs_records_seen++; |
| 67 | if (rfs_records_seen == rfs_nrecords) { |
| 68 | rfs_phase = rfs_cw_phase = false; |
| 69 | } |
| 70 | } else /* rfs cw rtype mispred */ { |
| 71 | sdata->bitarrays[rfs_rtype_pred_array]->Push(0); |
| 72 | sdata->bitarrays[rtype_array]->Push(rstbuf[i].proto.rtype); |
| 73 | rfs_phase = rfs_cw_phase = false; |
| 74 | } // rfs cw rtype pred |
| 75 | } else if (rfs_bt_phase) { |
| 76 | if (rstbuf[i].proto.rtype == RFS_BT_T) { |
| 77 | sdata->bitarrays[rfs_rtype_pred_array]->Push(1); |
| 78 | rfs_records_seen++; |
| 79 | if (rfs_records_seen == rfs_nrecords) { |
| 80 | rfs_phase = rfs_bt_phase = false; |
| 81 | } |
| 82 | } else /* rfs bt rtype mispred */ { |
| 83 | sdata->bitarrays[rfs_rtype_pred_array]->Push(0); |
| 84 | sdata->bitarrays[rtype_array]->Push(rstbuf[i].proto.rtype); |
| 85 | rfs_phase = rfs_bt_phase = false; |
| 86 | } // rfs bt rtype pred |
| 87 | } // which rfs phase? */ |
| 88 | } else /* regular rst phase */ { |
| 89 | // rtype compression |
| 90 | if (rstbuf[i].proto.rtype == INSTR_T) { |
| 91 | sdata->bitarrays[rtype_key_array]->Push(rtype_key_INSTR); |
| 92 | } else if (rstbuf[i].proto.rtype == REGVAL_T) { |
| 93 | sdata->bitarrays[rtype_key_array]->Push(rtype_key_REGVAL); |
| 94 | } else if (rstbuf[i].proto.rtype == PAVADIFF_T) { |
| 95 | sdata->bitarrays[rtype_key_array]->Push(rtype_key_PAVADIFF); |
| 96 | } else { |
| 97 | sdata->bitarrays[rtype_key_array]->Push(rtype_key_RAW); |
| 98 | sdata->bitarrays[rtype_array]->Push(rstbuf[i].proto.rtype); |
| 99 | } |
| 100 | } // phase: rfs cw, rfs bt or regular rst? |
| 101 | |
| 102 | switch(rstbuf[i].proto.rtype) { |
| 103 | case INSTR_T: |
| 104 | compress_inst(rstbuf, i); |
| 105 | break; |
| 106 | case REGVAL_T: |
| 107 | compress_regval(rstbuf, i); |
| 108 | break; |
| 109 | case PAVADIFF_T: |
| 110 | compress_pavadiff(rstbuf, i); |
| 111 | break; |
| 112 | case TLB_T: |
| 113 | compress_tlb(rstbuf, i); |
| 114 | break; |
| 115 | case PREG_T: |
| 116 | compress_preg(rstbuf, i); |
| 117 | break; |
| 118 | case TRAP_T: |
| 119 | compress_trap(rstbuf, i); |
| 120 | break; |
| 121 | case DMA_T: |
| 122 | compress_dma(rstbuf, i); |
| 123 | break; |
| 124 | case MEMVAL_T: |
| 125 | compress_memval(rstbuf, i); |
| 126 | break; |
| 127 | case RFS_CW_T: |
| 128 | if ((rfs_records_seen == 0) && ! rfs_cw_phase) { |
| 129 | // in case there was no rfs preamble, section header etc. |
| 130 | rfs_phase = rfs_cw_phase = true; |
| 131 | rfs_nrecords = rfs_unknown_nrecords; |
| 132 | rfs_records_seen = 1; |
| 133 | } |
| 134 | compress_rfs_cw(rstbuf, i); |
| 135 | break; |
| 136 | case RFS_BT_T: |
| 137 | if ((rfs_records_seen == 0) && ! rfs_bt_phase) { |
| 138 | // in case there was no rfs preamble, section header etc. |
| 139 | rfs_phase = rfs_bt_phase = true; |
| 140 | rfs_nrecords = rfs_unknown_nrecords; |
| 141 | rfs_records_seen = 1; |
| 142 | } |
| 143 | compress_rfs_bt(rstbuf, i); |
| 144 | break; |
| 145 | |
| 146 | case RSTHEADER_T: |
| 147 | // write raw records to output |
| 148 | #if defined(ARCH_AMD64) |
| 149 | { |
| 150 | rstf_unionT temp; |
| 151 | memcpy(&temp, &rstbuf[i], sizeof(rstf_unionT)); |
| 152 | rstf_convertT::l2b((rstf_uint8T*)&temp); |
| 153 | sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[0])); |
| 154 | sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[1])); |
| 155 | sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[2])); |
| 156 | } |
| 157 | #else |
| 158 | sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[0]); |
| 159 | sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[1]); |
| 160 | sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[2]); |
| 161 | #endif |
| 162 | if (rstbuf[i].header.majorVer*1000+rstbuf[i].header.minorVer <= 2011) { |
| 163 | rstf_pre212 = true; |
| 164 | } |
| 165 | |
| 166 | break; |
| 167 | |
| 168 | default: |
| 169 | // write raw records to output |
| 170 | #if defined(ARCH_AMD64) |
| 171 | { |
| 172 | rstf_unionT temp; |
| 173 | memcpy(&temp, &rstbuf[i], sizeof(rstf_unionT)); |
| 174 | rstf_convertT::l2b((rstf_uint8T*)&temp); |
| 175 | sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[0])); |
| 176 | sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[1])); |
| 177 | sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[2])); |
| 178 | } |
| 179 | #else |
| 180 | sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[0]); |
| 181 | sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[1]); |
| 182 | sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[2]); |
| 183 | #endif |
| 184 | |
| 185 | if (rstbuf[i].proto.rtype == RFS_SECTION_HEADER_T) { |
| 186 | if (rstbuf[i].rfs_section_header.section_type == RFS_CW_T) { |
| 187 | rfs_phase = rfs_cw_phase = true; |
| 188 | rfs_nrecords = rstbuf[i].rfs_section_header.n_records; |
| 189 | rfs_records_seen = 0; |
| 190 | } else if (rstbuf[i].rfs_section_header.section_type == RFS_BT_T) { |
| 191 | rfs_phase = rfs_bt_phase = true; |
| 192 | rfs_nrecords = rstbuf[i].rfs_section_header.n_records; |
| 193 | rfs_records_seen = 0; |
| 194 | } // else - do nothing |
| 195 | } // if rfs section header |
| 196 | |
| 197 | break; |
| 198 | } // what rtype? */ |
| 199 | |
| 200 | prev_rtype = rstbuf[i].proto.rtype; |
| 201 | } // for each record |
| 202 | |
| 203 | sdata->update_counts(); |
| 204 | |
| 205 | if (stats) update_stats(); |
| 206 | |
| 207 | if (! shdr->write(gzf)) { |
| 208 | perror("ERROR: rstzip3::compress_Buffer(): could not write section header to output file\n"); |
| 209 | return 0; |
| 210 | } |
| 211 | |
| 212 | if (! sdata->write(gzf)) { |
| 213 | perror("ERROR: rstzip3::compress_buffer(): could not write section data to output file\n"); |
| 214 | return 0; |
| 215 | } |
| 216 | |
| 217 | |
| 218 | if (verbose) { |
| 219 | fprintf(stderr, "Section %d\n", nsections); |
| 220 | sdata->print(); |
| 221 | } |
| 222 | |
| 223 | if (stats) print_stats(); |
| 224 | |
| 225 | nsections++; |
| 226 | |
| 227 | return rstbufsize; |
| 228 | } // rstzip3::compress_buffer |
| 229 | |
| 230 | |
| 231 | static bool ds_indicates_tail_call(uint32_t instr) { |
| 232 | return (instr == MOV_G1_G7_INSTR) || ((instr & RESTORE_OPCODE_MASK) == RESTORE_OPCODE_BITS); |
| 233 | } |
| 234 | |
| 235 | void rstzip3::compress_inst(rstf_unionT * rstbuf, int idx) |
| 236 | { |
| 237 | |
| 238 | rstf_instrT *ir = &(rstbuf[idx].instr); |
| 239 | |
| 240 | // check cpuid |
| 241 | uint16_t cpuid = rstf_pre212 ? ir->cpuid : rstf_instrT_get_cpuid(ir); |
| 242 | if (pred_cpuid == cpuid) { |
| 243 | sdata->bitarrays[cpuid_pred_array]->Push(1); |
| 244 | } else { |
| 245 | sdata->bitarrays[cpuid_pred_array]->Push(0); |
| 246 | sdata->bitarrays[raw_cpuid_array]->Push(cpuid); |
| 247 | } |
| 248 | |
| 249 | // predict cpuid. assume round robin. FIXME: for now, assump uP traces |
| 250 | if (tdata[cpuid+1] == NULL) { |
| 251 | pred_cpuid = 0; |
| 252 | } else { |
| 253 | pred_cpuid = cpuid+1; |
| 254 | } |
| 255 | last_instr_cpuid = cpuid; |
| 256 | |
| 257 | if (tdata[cpuid] == NULL) { |
| 258 | tdata[cpuid] = new rz3_percpu_data(cpuid); |
| 259 | } |
| 260 | |
| 261 | instr_preds = instr_mispred_none; |
| 262 | |
| 263 | // amask bit: if amask is 0, all 64-bits of pred_pc are used. if not, only the lower 32-bits are used |
| 264 | // we check and set the amask bit on a pc misprediction. if the misprediction leaves the lower 32-bits unchanged |
| 265 | // but differs in the upper 32-bits, we set/clear amask accordingly |
| 266 | // check pc |
| 267 | uint64_t pc = ir->pc_va; |
| 268 | uint64_t pred_pc = tdata[cpuid]->pred_pc; |
| 269 | bool pc_pred = (pred_pc == ir->pc_va); |
| 270 | if (!pc_pred) { |
| 271 | instr_preds &= instr_mispred_pc; |
| 272 | |
| 273 | sdata->bitarrays[raw_value64_array]->Push(pc); |
| 274 | |
| 275 | // is our amask to blame? |
| 276 | if ((pc & rz3_amask_mask) == (pred_pc & rz3_amask_mask)) { |
| 277 | // lower 32 bits match |
| 278 | if ((pc >> 32) != 0) { |
| 279 | // if amask was 1, it should be 0. if it was already zero, amask is not to blame, but set it to 0 anyway |
| 280 | tdata[cpuid]->pred_amask = 0; |
| 281 | } else { |
| 282 | // if amask was 0, it should be 1. if it was already 1, we shouldn't be here. |
| 283 | if (0 && tdata[cpuid]->pred_amask) { |
| 284 | fprintf(stderr, "rz3: compress_inst: amask was set but predicted pc was > 32 bits: pred_pc %llx actual %llx\n", pred_pc, pc); |
| 285 | } |
| 286 | tdata[cpuid]->pred_amask = 1; |
| 287 | } |
| 288 | } |
| 289 | |
| 290 | tdata[cpuid]->pred_npc = pc+4; |
| 291 | } |
| 292 | |
| 293 | // (pc, npc) <= (npc, npc+4) |
| 294 | tdata[cpuid]->pred_pc = tdata[cpuid]->pred_npc; |
| 295 | tdata[cpuid]->pred_npc += 4; // this may be updated later in case of CTIs |
| 296 | |
| 297 | tdata[cpuid]->prev_pc = pc; |
| 298 | |
| 299 | // check annul bit |
| 300 | if (tdata[cpuid]->pred_an != ir->an) { |
| 301 | instr_preds &= instr_mispred_an; |
| 302 | perf_stats[ps_an_misses]++; |
| 303 | // sdata->an_mispred_count++; |
| 304 | } |
| 305 | |
| 306 | // predict and check instr |
| 307 | rz3iu_icache_data * icdata = tdata[cpuid]->icache->get(pc); |
| 308 | uint32_t instr = ir->instr; |
| 309 | if ((icdata == NULL) || (icdata->instr != ir->instr)) { |
| 310 | // ic miss |
| 311 | instr_preds &= instr_mispred_instr; |
| 312 | |
| 313 | sdata->bitarrays[raw_instr_array]->Push(instr); |
| 314 | |
| 315 | icdata = tdata[cpuid]->icache->set(pc, instr, rstzip3_major_version, rstzip3_minor_version); |
| 316 | |
| 317 | if ((!ir->an) && icdata->dinfo.flags.isdcti) { |
| 318 | icdata->gen_target(pc); |
| 319 | } |
| 320 | } |
| 321 | tdata[cpuid]->last_instr = ir->an ? 0x0 : instr; |
| 322 | |
| 323 | // if this is a delay slot of a call instr, we need to pop ras if "restore" or mov_g1_g7 instr |
| 324 | if (tdata[cpuid]->call_delay_slot) { |
| 325 | if ( ((instr & RESTORE_OPCODE_MASK) == RESTORE_OPCODE_BITS) || (instr == MOV_G1_G7_INSTR) ) { |
| 326 | tdata[cpuid]->ras->pop(); |
| 327 | } |
| 328 | tdata[cpuid]->call_delay_slot = false; |
| 329 | } |
| 330 | |
| 331 | |
| 332 | // tr and pr bits. |
| 333 | // predict and set tr BEFORE decompress_ea_va because ea_valid prediction depends on the tr bit |
| 334 | // tr is usually 0. we follow the convention of |
| 335 | // inserting all 1's where possible. so we *invert* the tr bit |
| 336 | if (ir->tr) { |
| 337 | instr_preds &= instr_mispred_tr; |
| 338 | } |
| 339 | |
| 340 | // for the hpriv bit, we predict it based on the previous instr |
| 341 | // this is new in v3.20 and up |
| 342 | uint32_t hpriv = rstf_pre212 ? 0 : ir->hpriv; |
| 343 | if (hpriv != tdata[cpuid]->pred_hpriv) { |
| 344 | instr_preds &= instr_mispred_hpriv; |
| 345 | tdata[cpuid]->pred_hpriv = hpriv; |
| 346 | if (hpriv) { |
| 347 | tdata[cpuid]->pred_pr = 0; |
| 348 | } |
| 349 | } |
| 350 | |
| 351 | // for the pr bit, we predict it based on the previous instr |
| 352 | if (ir->pr != tdata[cpuid]->pred_pr) { |
| 353 | instr_preds &= instr_mispred_pr; |
| 354 | tdata[cpuid]->pred_pr = ir->pr; |
| 355 | } |
| 356 | |
| 357 | // predict ea_valid, ea_va, bt, NEXT-instr an |
| 358 | |
| 359 | if (!ir->an) { |
| 360 | if (icdata->dinfo.flags.isdcti) { |
| 361 | |
| 362 | compress_dcti(rstbuf, idx, icdata); |
| 363 | |
| 364 | } else /* not dcti */ { |
| 365 | |
| 366 | // predict bt == 0 |
| 367 | int pred_bt = icdata->dinfo.flags.is_done_retry; |
| 368 | if (pred_bt != ir->bt) { |
| 369 | instr_preds &= instr_mispred_bt; |
| 370 | } |
| 371 | |
| 372 | // ea_valid=1 for ld/st/pf |
| 373 | int pred_ea_valid; |
| 374 | if (icdata->is_ldstpf) { |
| 375 | // FIXME: make sure this is not an internal ASI |
| 376 | pred_ea_valid = 1; |
| 377 | } else if (icdata->dinfo.flags.is_done_retry) { |
| 378 | pred_ea_valid = 1; |
| 379 | } else if (ir->tr) { |
| 380 | pred_ea_valid = 1; |
| 381 | } else { |
| 382 | pred_ea_valid = 0; |
| 383 | } |
| 384 | |
| 385 | if (pred_ea_valid != ir->ea_valid) { |
| 386 | instr_preds &= instr_mispred_ea_valid; |
| 387 | perf_stats[ps_ea_valid_misses]++; |
| 388 | } |
| 389 | |
| 390 | if (ir->ea_valid) { |
| 391 | compress_ea_va(rstbuf, idx); |
| 392 | } |
| 393 | |
| 394 | tdata[cpuid]->pred_an = 0; |
| 395 | } // dcti? |
| 396 | } // if not annulled |
| 397 | |
| 398 | if (instr_preds == instr_mispred_none) { |
| 399 | sdata->bitarrays[instr_pred_all_array]->Push(1); |
| 400 | } else { |
| 401 | sdata->bitarrays[instr_pred_all_array]->Push(0); |
| 402 | sdata->bitarrays[instr_pred_raw_array]->Push(instr_preds); |
| 403 | } |
| 404 | |
| 405 | } // rstzip3::compress_inst() |
| 406 | |
| 407 | |
| 408 | |
| 409 | void rstzip3::compress_ea_va(rstf_unionT * rstbuf, int idx) |
| 410 | { |
| 411 | rstf_instrT * ir = &(rstbuf[idx].instr); |
| 412 | uint16_t cpuid = rstf_pre212 ? ir->cpuid : rstf_instrT_get_cpuid(ir); |
| 413 | |
| 414 | // if value trace: predict ea using known reg values |
| 415 | |
| 416 | // predict ea using the rz3 value cache |
| 417 | compress_value(cpuid, ir->ea_va); |
| 418 | } // rstzip3::compress_ea_va |
| 419 | |
| 420 | void rstzip3::compress_pavadiff(rstf_unionT * rstbuf, int idx) |
| 421 | { |
| 422 | if (0 && idx == 102577) { |
| 423 | printf("debug: decompress_pavadiff idx %d\n", idx); |
| 424 | } |
| 425 | |
| 426 | rstf_pavadiffT * dr = &(rstbuf[idx].pavadiff); |
| 427 | int cpuid = rstf_pre212 ? dr->cpuid : rstf_pavadiffT_get_cpuid(dr); |
| 428 | |
| 429 | // check and predict cpuid |
| 430 | if (pred_cpuid == cpuid) { |
| 431 | sdata->bitarrays[cpuid_pred_array]->Push(1); |
| 432 | } else { |
| 433 | sdata->bitarrays[cpuid_pred_array]->Push(0); |
| 434 | sdata->bitarrays[raw_cpuid_array]->Push(cpuid); |
| 435 | } |
| 436 | pred_cpuid = cpuid; |
| 437 | |
| 438 | if (tdata[cpuid] == NULL) { |
| 439 | tdata[cpuid] = new rz3_percpu_data(cpuid); |
| 440 | } |
| 441 | |
| 442 | // predict icontext the same as prev icontext |
| 443 | if (tdata[cpuid]->pred_icontext == dr->icontext) { |
| 444 | sdata->bitarrays[pavadiff_ictxt_pred_array]->Push(1); |
| 445 | } else { |
| 446 | sdata->bitarrays[pavadiff_ictxt_pred_array]->Push(0); |
| 447 | sdata->bitarrays[pavadiff_raw_ictxt_array]->Push(dr->icontext); |
| 448 | tdata[cpuid]->pred_icontext = dr->icontext; |
| 449 | } |
| 450 | |
| 451 | // dcontext - predict same as prev dcontext for this cpu |
| 452 | if (tdata[cpuid]->pred_dcontext == dr->dcontext) { |
| 453 | sdata->bitarrays[pavadiff_dctxt_pred_array]->Push(1); |
| 454 | } else { |
| 455 | sdata->bitarrays[pavadiff_dctxt_pred_array]->Push(0); |
| 456 | sdata->bitarrays[pavadiff_raw_dctxt_array]->Push(dr->dcontext); |
| 457 | tdata[cpuid]->pred_dcontext = dr->dcontext; |
| 458 | } |
| 459 | |
| 460 | bool found_pc_va = false; |
| 461 | uint64_t nextpc_va; |
| 462 | bool found_ea_va = false; |
| 463 | uint64_t nextea_va; |
| 464 | |
| 465 | int i; |
| 466 | for (i=idx+1; i<shdr->nrecords; i++) { |
| 467 | if (rstbuf[i].proto.rtype == INSTR_T) { |
| 468 | rstf_instrT * ir = &(rstbuf[i].instr); |
| 469 | uint16_t i_cpuid = rstf_pre212 ? ir->cpuid : rstf_instrT_get_cpuid(ir); |
| 470 | |
| 471 | if (i_cpuid == cpuid) { |
| 472 | nextpc_va = ir->pc_va; |
| 473 | found_pc_va = true; |
| 474 | if (dr->ea_valid && ir->ea_valid) { // we only care about ea_va if dr->ea_valid |
| 475 | nextea_va = ir->ea_va; |
| 476 | found_ea_va = true; |
| 477 | } |
| 478 | } // if cpuid match |
| 479 | break; |
| 480 | } else if (rstbuf[i].proto.rtype == PAVADIFF_T) { |
| 481 | rstf_pavadiffT * pd = &(rstbuf[i].pavadiff); |
| 482 | uint16_t pd_cpuid = rstf_pre212 ? pd->cpuid : rstf_pavadiffT_get_cpuid(pd); |
| 483 | if (pd_cpuid == cpuid) { |
| 484 | // We ran into a second pavadiff record before seeing an instr record. |
| 485 | // flag this as a no-pred (hence no lookahead). |
| 486 | // If we don't do this, the decompression algorithm will break |
| 487 | // because we only have a 1 item limit on the number of pending |
| 488 | // pavadiffs to patch, and patching this pavadiff will break the next one. |
| 489 | break; |
| 490 | } |
| 491 | } // if instr or pavadiff |
| 492 | } // for each subsequent record |
| 493 | |
| 494 | // ea_valid |
| 495 | sdata->bitarrays[pavadiff_ea_valid_array]->Push(dr->ea_valid); |
| 496 | |
| 497 | bool pc_pa_va_hit = false; |
| 498 | bool ea_pa_va_hit = false; |
| 499 | |
| 500 | uint64_t pred_pa_va_diff; |
| 501 | |
| 502 | if (found_pc_va) { |
| 503 | pred_pa_va_diff = tdata[cpuid]->itlb->get(nextpc_va >> 13); |
| 504 | if (pred_pa_va_diff == (dr->pc_pa_va >> 13)) { |
| 505 | pc_pa_va_hit = true; |
| 506 | } |
| 507 | } |
| 508 | |
| 509 | if (pc_pa_va_hit) { |
| 510 | sdata->bitarrays[pavadiff_pc_pa_va_pred_array]->Push(1); |
| 511 | } else { |
| 512 | sdata->bitarrays[pavadiff_pc_pa_va_pred_array]->Push(0); |
| 513 | sdata->bitarrays[raw_value64_array]->Push(dr->pc_pa_va); |
| 514 | |
| 515 | |
| 516 | if (found_pc_va) { |
| 517 | if (0) printf("%d: cpu%d itlb update: %llx => %llx\n", idx, cpuid, nextpc_va, dr->pc_pa_va); |
| 518 | tdata[cpuid]->itlb->set(nextpc_va>>13, dr->pc_pa_va>>13); |
| 519 | } |
| 520 | } |
| 521 | |
| 522 | |
| 523 | if (dr->ea_valid) { |
| 524 | // ea_pa_va - use next instr (if available) and a tlb simulator |
| 525 | if (found_ea_va) { |
| 526 | // tlb lookup |
| 527 | pred_pa_va_diff = tdata[cpuid]->dtlb->get(nextea_va >> 13); |
| 528 | if (pred_pa_va_diff == (dr->ea_pa_va >> 13)) { |
| 529 | ea_pa_va_hit = true; |
| 530 | } |
| 531 | } |
| 532 | |
| 533 | if (ea_pa_va_hit) { |
| 534 | sdata->bitarrays[pavadiff_ea_pa_va_pred_array]->Push(1); |
| 535 | } else { |
| 536 | sdata->bitarrays[pavadiff_ea_pa_va_pred_array]->Push(0); |
| 537 | sdata->bitarrays[raw_value64_array]->Push(dr->ea_pa_va); |
| 538 | |
| 539 | if (found_ea_va) { |
| 540 | if (0) printf("%d: cpu%d dtlb update: %llx => %llx\n", idx, cpuid, nextea_va, dr->ea_pa_va); |
| 541 | tdata[cpuid]->dtlb->set((nextea_va >> 13), (dr->ea_pa_va >> 13)); |
| 542 | } |
| 543 | } |
| 544 | } |
| 545 | |
| 546 | // the lookahead flag tells the decompressor to look for the next instr (to update the tlb) |
| 547 | // if we predicted pc_pa_va and/or ea_pa_va correctly, the decompressor knows from the pred bit to lookahead. |
| 548 | // we set the lookahead flag so that the decomprssor knows the difference between no prediction (could not find corresponding instr) and misprediction |
| 549 | |
| 550 | if ((found_pc_va && pc_pa_va_hit) || (dr->ea_valid && found_ea_va && ea_pa_va_hit)) { |
| 551 | // dont need lookahead flag since the pc_pa_va_pred flag and/or the ea_pa_va_pred flag will indicate lookahead |
| 552 | } else { |
| 553 | // we need to indicate whether there was no prediction or misprediction(s) |
| 554 | int lookahead_flag = (found_pc_va || found_ea_va); |
| 555 | sdata->bitarrays[pavadiff_lookahead_array]->Push(lookahead_flag); |
| 556 | } |
| 557 | } // void rstzip3::compress_pavadiff(rstf_unionT * rstbuf, int idx) |
| 558 | |
| 559 | |
| 560 | // predict bt, ea_valid, ea_va, NEXT-instr an for a dcti instr. also set pred_npc |
| 561 | void rstzip3::compress_dcti(rstf_unionT * rstbuf, int idx, rz3iu_icache_data * icdata) |
| 562 | { |
| 563 | rstf_instrT * ir = &(rstbuf[idx].instr); |
| 564 | uint16_t cpuid = rstf_pre212 ? ir->cpuid : rstf_instrT_get_cpuid(ir); |
| 565 | uint64_t pc = ir->pc_va; |
| 566 | |
| 567 | int bt_pred_hit; |
| 568 | |
| 569 | if (icdata->dinfo.flags.iscbranch) { |
| 570 | |
| 571 | // use branch predictor |
| 572 | bt_pred_hit = tdata[cpuid]->bp->pred_hit(pc, ir->bt); |
| 573 | perf_stats[ps_brpred_refs]++; |
| 574 | if (!bt_pred_hit) { |
| 575 | perf_stats[ps_brpred_misses]++; |
| 576 | } |
| 577 | |
| 578 | if (ir->bt) { |
| 579 | tdata[cpuid]->pred_npc = icdata->target; |
| 580 | if (tdata[cpuid]->pred_amask) { |
| 581 | tdata[cpuid]->pred_npc &= rz3_amask_mask; |
| 582 | } |
| 583 | } // else - pred_npc is already set to pc+8 |
| 584 | |
| 585 | } else if (icdata->dinfo.flags.isubranch && ! icdata->dinfo.flags.isubranch_nottaken) { |
| 586 | |
| 587 | // pred_npc is branch target |
| 588 | bt_pred_hit = ir->bt; // we predict taken. if not taken, we mispredict |
| 589 | tdata[cpuid]->pred_npc = icdata->target; |
| 590 | if (tdata[cpuid]->pred_amask) { |
| 591 | tdata[cpuid]->pred_npc &= rz3_amask_mask; |
| 592 | } |
| 593 | } else if (icdata->dinfo.flags.iscall) { |
| 594 | |
| 595 | bt_pred_hit = ir->bt; |
| 596 | tdata[cpuid]->pred_npc = icdata->target; |
| 597 | if (tdata[cpuid]->pred_amask) { |
| 598 | tdata[cpuid]->pred_npc &= rz3_amask_mask; |
| 599 | } |
| 600 | // push pc to ras unless following (delay slot) instr is restore |
| 601 | tdata[cpuid]->ras->push(pc); |
| 602 | tdata[cpuid]->call_delay_slot = true; |
| 603 | |
| 604 | } else if (icdata->dinfo.flags.isindirect) { |
| 605 | |
| 606 | bt_pred_hit = ir->bt; |
| 607 | // if jmpl, use prediction table |
| 608 | // if ret/retl, use RAS |
| 609 | if (icdata->dinfo.flags.is_ret|icdata->dinfo.flags.is_retl) { |
| 610 | |
| 611 | perf_stats[ps_ras_refs]++; |
| 612 | tdata[cpuid]->pred_npc = tdata[cpuid]->ras->pop() + 8; |
| 613 | |
| 614 | if (tdata[cpuid]->pred_amask) { |
| 615 | tdata[cpuid]->pred_npc &= rz3_amask_mask; |
| 616 | } |
| 617 | if (tdata[cpuid]->pred_npc == ir->ea_va) { |
| 618 | } else { |
| 619 | tdata[cpuid]->ras->clear(); |
| 620 | perf_stats[ps_ras_misses]++; |
| 621 | } |
| 622 | |
| 623 | } else if ( ((ir->instr >> 25) & 0x1f) == 15 ) { |
| 624 | |
| 625 | // push unless following (delay-slot) instr is restore |
| 626 | tdata[cpuid]->ras->push(pc); |
| 627 | tdata[cpuid]->call_delay_slot = true; |
| 628 | |
| 629 | tdata[cpuid]->pred_npc = tdata[cpuid]->jmpl_table->get(pc >> 2); |
| 630 | if (tdata[cpuid]->pred_amask) { |
| 631 | tdata[cpuid]->pred_npc &= rz3_amask_mask; |
| 632 | } |
| 633 | if (tdata[cpuid]->pred_npc != ir->ea_va) { // we are going to see an ea_va misprediction (pred_ea_va is set to pred_npc for dctis) |
| 634 | tdata[cpuid]->jmpl_table->set(pc>>2, ir->ea_va); |
| 635 | } |
| 636 | |
| 637 | } // is this a ret/retl or indirect call? |
| 638 | /* else do nothing */ |
| 639 | } else { |
| 640 | bt_pred_hit = ! ir->bt; |
| 641 | } // what type of dcti? |
| 642 | |
| 643 | // bt pred |
| 644 | if (!bt_pred_hit) { |
| 645 | instr_preds &= instr_mispred_bt; |
| 646 | } |
| 647 | |
| 648 | // ea_valid pred: predict ea_valid is true |
| 649 | if (!ir->ea_valid) { |
| 650 | instr_preds &= instr_mispred_ea_valid; |
| 651 | perf_stats[ps_ea_valid_misses]++; |
| 652 | } |
| 653 | |
| 654 | // ea_va: predict pred_npc is ea_va |
| 655 | if (tdata[cpuid]->pred_npc == ir->ea_va) { |
| 656 | sdata->bitarrays[dcti_ea_va_pred_array]->Push(1); |
| 657 | } else { |
| 658 | sdata->bitarrays[dcti_ea_va_pred_array]->Push(0); |
| 659 | sdata->bitarrays[raw_value64_array]->Push(ir->ea_va); |
| 660 | |
| 661 | // at this point we know the real ea_va. predict npc=ea_va |
| 662 | tdata[cpuid]->pred_npc = ir->ea_va; |
| 663 | } |
| 664 | |
| 665 | // annul flag for *next* instr |
| 666 | if (icdata->dinfo.flags.annul_flag) { |
| 667 | if ((icdata->dinfo.flags.iscbranch && !ir->bt) || icdata->dinfo.flags.isubranch) { |
| 668 | tdata[cpuid]->pred_an = 1; |
| 669 | } |
| 670 | } |
| 671 | |
| 672 | } // rstzip3::compress_dcti() |
| 673 | |
| 674 | |
| 675 | // theres not much room for architectural compression |
| 676 | // here, except in case of value traces. all we do here |
| 677 | // is not store rtype and unused fields. |
| 678 | void rstzip3::compress_tlb(rstf_unionT * rstbuf, int idx) |
| 679 | { |
| 680 | rstf_tlbT *tr = &(rstbuf[idx].tlb); |
| 681 | // pack demap(25), tlb_index(24:9), tlb_type(8), tlb_no(7:6), cpuid(5:0) into a single |
| 682 | // 26-bit field. we thus save only 38 bits/tlb record. |
| 683 | // pack demap(29), tlb_index(28:13), tlb_type(12), tlb_no(11:10), cpuid(9:0) into a single |
| 684 | // 30-bit field. we thus save only 34 bits/tlb record. |
| 685 | int cpuid = rstf_pre212 ? tr->cpuid : rstf_tlbT_get_cpuid(tr); |
| 686 | |
| 687 | uint32_t tlb_info = (tr->demap<<29) | (((uint32_t)tr->tlb_index) << 13) | (tr->tlb_type << 12) |
| 688 | | (tr->tlb_no << 10) | cpuid; |
| 689 | sdata->bitarrays[tlb_info_array]->Push(tlb_info); |
| 690 | |
| 691 | sdata->bitarrays[raw_value64_array]->Push(tr->tte_tag); |
| 692 | sdata->bitarrays[raw_value64_array]->Push(tr->tte_data); |
| 693 | |
| 694 | |
| 695 | } // void rstzip3::compress_tlb(rstf_unionT * rstbuf, int idx) |
| 696 | |
| 697 | |
| 698 | // try to predict pc and npc. |
| 699 | // at the time of this writing, trap records occur *before* the |
| 700 | // instr record at the time the trap occurred. |
| 701 | // For future RST versions, we will change this assumption if necessary |
| 702 | void rstzip3::compress_trap(rstf_unionT * rstbuf, int idx) |
| 703 | { |
| 704 | rstf_trapT * tr = &(rstbuf[idx].trap); |
| 705 | |
| 706 | // predict cpuid as the predicted cpuid of the next instr |
| 707 | int cpuid = rstf_pre212 ? tr->cpuid : rstf_trapT_get_cpuid(tr); |
| 708 | |
| 709 | if (cpuid == pred_cpuid) { |
| 710 | sdata->bitarrays[cpuid_pred_array]->Push(1); |
| 711 | } else { |
| 712 | sdata->bitarrays[cpuid_pred_array]->Push(0); |
| 713 | sdata->bitarrays[raw_cpuid_array]->Push(cpuid); |
| 714 | } |
| 715 | |
| 716 | if (tdata[cpuid] == NULL) { |
| 717 | tdata[cpuid] = new rz3_percpu_data(cpuid); |
| 718 | } |
| 719 | |
| 720 | // put is_async(48), tl(47:44), ttype(43:34), pstate(33:18), syscall(17:2), pc_pred(1), npc_pred(0) |
| 721 | // in one 49-bit field |
| 722 | uint64_t trap_info = (((uint64_t)tr->is_async) << 48) | (((uint64_t)tr->tl) << 44) | (((uint64_t)tr->ttype) << 34) | |
| 723 | (((uint64_t)tr->pstate) << 18) | (((uint64_t)tr->syscall) << 2); |
| 724 | |
| 725 | uint64_t pred_pc = tdata[cpuid]->pred_pc; |
| 726 | uint64_t pred_npc; |
| 727 | if (tr->pc == pred_pc) { |
| 728 | trap_info |= 2ull; |
| 729 | pred_npc = tdata[cpuid]->pred_npc; |
| 730 | } else { |
| 731 | sdata->bitarrays[raw_value64_array]->Push(tr->pc); |
| 732 | |
| 733 | pred_npc = tr->pc + 4; |
| 734 | } |
| 735 | |
| 736 | if (tr->npc == pred_npc) { |
| 737 | trap_info |= 1ull; |
| 738 | } else { |
| 739 | sdata->bitarrays[raw_value64_array]->Push(tr->npc); |
| 740 | |
| 741 | } |
| 742 | |
| 743 | sdata->bitarrays[trap_info_array]->Push(trap_info); |
| 744 | } // void rstzip3::compress_trap(rstf_unionT * rstbuf, int idx) |
| 745 | |
| 746 | |
| 747 | void rstzip3::compress_preg(rstf_unionT * rstbuf, int idx) |
| 748 | { |
| 749 | rstf_pregT * pr = &(rstbuf[idx].preg); |
| 750 | |
| 751 | // cpuid: predict same as previous instr cpuid |
| 752 | int cpuid = rstf_pre212 ? pr->cpuid : rstf_pregT_get_cpuid(pr); |
| 753 | int cpuid_pred = (cpuid==pred_cpuid) ? 1 : 0; |
| 754 | if (!cpuid_pred) { |
| 755 | sdata->bitarrays[raw_cpuid_array]->Push(cpuid); |
| 756 | } |
| 757 | |
| 758 | // pack cpuid_pred[61], primD[60:48], secD[47:35] asiReg{34:27], traplevel[26:24], traptype[23:16], pstate[15:0] in one 64-bit value |
| 759 | uint64_t preg_info = (((uint64_t)cpuid_pred) << 61) | (((uint64_t)pr->primD) << 48) | (((uint64_t)pr->secD) << 35) | |
| 760 | (((uint64_t)pr->asiReg) << 27) | (((uint64_t)pr->traplevel) << 24) | (((uint64_t)pr->traptype) << 16) | ((uint64_t)pr->pstate); |
| 761 | sdata->bitarrays[raw_value64_array]->Push(preg_info); |
| 762 | |
| 763 | |
| 764 | // primA and secA are not used - ignore |
| 765 | } // void rstzip3::compress_preg(rstf_unionT * rstbuf, int idx) |
| 766 | |
| 767 | void rstzip3::compress_dma(rstf_unionT * rstbuf, int idx) |
| 768 | { |
| 769 | rstf_dmaT * dr = &(rstbuf[idx].dma); |
| 770 | sdata->bitarrays[dma_iswrite_array]->Push(dr->iswrite); |
| 771 | sdata->bitarrays[dma_nbytes_array]->Push(dr->nbytes); |
| 772 | sdata->bitarrays[raw_value64_array]->Push(dr->start_pa); |
| 773 | sdata->bitarrays[raw_value64_array]->Push(dr->devid); |
| 774 | } // void rstzip3::compress_dma(rstf_unionT * rstbuf, int idx) |
| 775 | |
| 776 | void rstzip3::compress_regval(rstf_unionT * rstbuf, int idx) |
| 777 | { |
| 778 | // for now, try to compress the reg64 fields using the same mechanism as ea_va compression |
| 779 | rstf_regvalT * vr = &(rstbuf[idx].regval); |
| 780 | |
| 781 | // cpuid |
| 782 | int cpuid = rstf_pre212 ? vr->cpuid : rstf_regvalT_get_cpuid(vr); |
| 783 | |
| 784 | if (cpuid == last_instr_cpuid) { |
| 785 | sdata->bitarrays[cpuid_pred_array]->Push(1); |
| 786 | } else { |
| 787 | sdata->bitarrays[cpuid_pred_array]->Push(0); |
| 788 | sdata->bitarrays[raw_cpuid_array]->Push(cpuid); |
| 789 | } |
| 790 | |
| 791 | // tdata |
| 792 | if (tdata[cpuid] == NULL) { |
| 793 | tdata[cpuid] = new rz3_percpu_data(cpuid); |
| 794 | } |
| 795 | |
| 796 | // postInstr |
| 797 | sdata->bitarrays[regval_postInstr_array]->Push(vr->postInstr); |
| 798 | |
| 799 | #if 0 |
| 800 | // if prev instr can be emulated, regenerate values using emulation |
| 801 | if (regen_value(vr, idx)) return; // FIXME: testing |
| 802 | if (vr->regtype[0] == RSTREG_INT_RT) { |
| 803 | tdata[cpuid]->regs[vr->regid[0]] = vr->reg64[0]; |
| 804 | } |
| 805 | if (vr->regtype[1] == RSTREG_INT_RT) { |
| 806 | tdata[cpuid]->regs[vr->regid[1]] = vr->reg64[1]; |
| 807 | } |
| 808 | #endif |
| 809 | |
| 810 | // regtype, regid |
| 811 | uint64_t prev_pc = tdata[cpuid]->prev_pc; |
| 812 | int regtype_tbl_idx = (prev_pc >> 2) & (rz3_percpu_data::rz3_tdata_regval_regtype_tbl_size-1); |
| 813 | int regid_tbl_idx = (prev_pc >> 2) & (rz3_percpu_data::rz3_tdata_regval_regid_tbl_size-1); |
| 814 | |
| 815 | int k; |
| 816 | for (k=0; k<2; k++) { |
| 817 | |
| 818 | // predict regtype: use prev_instr |
| 819 | |
| 820 | uint8_t pred_regtype = tdata[cpuid]->regval_regtype_tbl[k][regtype_tbl_idx]; |
| 821 | |
| 822 | if (pred_regtype == vr->regtype[k]) { |
| 823 | sdata->bitarrays[regval_regtype_pred_array]->Push(1); |
| 824 | } else { |
| 825 | sdata->bitarrays[regval_regtype_pred_array]->Push(0); |
| 826 | sdata->bitarrays[regval_raw_regtype_array]->Push(vr->regtype[k]); |
| 827 | tdata[cpuid]->regval_regtype_tbl[k][regtype_tbl_idx] = vr->regtype[k]; |
| 828 | } |
| 829 | |
| 830 | if (vr->regtype[k] != RSTREG_UNUSED_RT) { |
| 831 | |
| 832 | // regid |
| 833 | uint8_t pred_regid = tdata[cpuid]->regval_regid_tbl[k][regid_tbl_idx]; |
| 834 | if (prev_rtype == REGVAL_T) { // probably in save/restore code: predict regid = prev_regid+2 |
| 835 | pred_regid += 2; |
| 836 | } |
| 837 | if (pred_regid == vr->regid[k]) { |
| 838 | sdata->bitarrays[regval_regid_pred_array]->Push(1); |
| 839 | } else { |
| 840 | sdata->bitarrays[regval_regid_pred_array]->Push(0); |
| 841 | sdata->bitarrays[regval_raw_regid_array]->Push(vr->regid[k]); |
| 842 | } |
| 843 | // we always update update the table. |
| 844 | // even if our prediction is correct, the predicted value is different from the value read from the table in case of save/restore |
| 845 | tdata[cpuid]->regval_regid_tbl[k][regid_tbl_idx] = vr->regid[k]; |
| 846 | |
| 847 | // reg64 |
| 848 | uint64_t v64 = vr->reg64[k]; |
| 849 | |
| 850 | if ((vr->regtype[k] == RSTREG_INT_RT) && (vr->regid[k] == 0)) { |
| 851 | if (v64 != 0x0) { |
| 852 | if (g0_nonzero_warn) { |
| 853 | fprintf(stderr, "warning: rz3: compress_regval: int reg %%g0 has non-zero value %llx. will be ignored\n", v64); |
| 854 | if (!verbose) { |
| 855 | fprintf(stderr, " (further %%g0!=0 warnings will be suppressed)\n"); |
| 856 | g0_nonzero_warn = false; |
| 857 | } |
| 858 | } |
| 859 | } |
| 860 | } |
| 861 | |
| 862 | if (v64 == 0) { |
| 863 | sdata->bitarrays[value_iszero_array]->Push(1); |
| 864 | } else { |
| 865 | static int regval_vc_refs = 0; |
| 866 | static int regval_vc_hits = 0; |
| 867 | sdata->bitarrays[value_iszero_array]->Push(0); |
| 868 | regval_vc_refs++; |
| 869 | if (compress_value(cpuid, v64)) { |
| 870 | regval_vc_hits++; |
| 871 | } else { |
| 872 | } |
| 873 | |
| 874 | if (regval_vc_refs % 1000 == 0) { |
| 875 | // printf("regval vc refs %d hits %d (%0.4f%%)\n", regval_vc_refs, regval_vc_hits, 100.0*regval_vc_hits/regval_vc_refs); |
| 876 | } |
| 877 | } |
| 878 | |
| 879 | } // if regtype != UNUSED |
| 880 | } // for reg field = 0,1 |
| 881 | } // rstzip3::compress_regval |
| 882 | |
| 883 | void rstzip3::compress_memval(rstf_unionT * rstbuf, int idx) |
| 884 | { |
| 885 | // rtype: in raw rtype array |
| 886 | // ismemval128: raw |
| 887 | |
| 888 | // addrisVA: raw |
| 889 | // isContRec: ignore for m64; raw for m128 |
| 890 | // cpuid: same as predicted cpuid for next instr |
| 891 | |
| 892 | // memval64.size: store raw size |
| 893 | // memval64.addr: use valuecache |
| 894 | // memval64.val: use valuecache |
| 895 | |
| 896 | // memval128.addr36_43: ignore if isContRec; raw otherwise |
| 897 | // memval128.addr04_35: ignore if isContReg; raw otherwise |
| 898 | |
| 899 | // memval128.val[]: use valuecache |
| 900 | |
| 901 | rstf_memval64T * m64 = & (rstbuf[idx].memval64); |
| 902 | rstf_memval128T * m128 = & (rstbuf[idx].memval128); |
| 903 | |
| 904 | sdata->bitarrays[memval_fields_array]->Push(m128->ismemval128); |
| 905 | sdata->bitarrays[memval_fields_array]->Push(! m128->addrisVA); |
| 906 | |
| 907 | // cpuid |
| 908 | int cpuid = rstf_pre212 ? m128->cpuid : rstf_memval128T_get_cpuid(m128); |
| 909 | if (cpuid == pred_cpuid) { |
| 910 | sdata->bitarrays[cpuid_pred_array]->Push(1); |
| 911 | } else { |
| 912 | sdata->bitarrays[cpuid_pred_array]->Push(0); |
| 913 | sdata->bitarrays[raw_cpuid_array]->Push(cpuid); |
| 914 | } |
| 915 | |
| 916 | if (tdata[cpuid] == NULL) { |
| 917 | tdata[cpuid] = new rz3_percpu_data(cpuid); |
| 918 | } |
| 919 | |
| 920 | if (m128->ismemval128) { |
| 921 | sdata->bitarrays[memval_fields_array]->Push(m128->isContRec); |
| 922 | if (! m128->isContRec) { |
| 923 | sdata->bitarrays[memval_addr36_43_array]->Push(m128->addr36_43); |
| 924 | sdata->bitarrays[memval_addr04_35_array]->Push(m128->addr04_35); |
| 925 | } |
| 926 | |
| 927 | // vals |
| 928 | |
| 929 | compress_value(cpuid, m128->val[0]); |
| 930 | |
| 931 | compress_value(cpuid, m128->val[1]); |
| 932 | |
| 933 | } else /* memval64 */ { |
| 934 | sdata->bitarrays[memval_size_array]->Push(m64->size-1); |
| 935 | |
| 936 | |
| 937 | // predict addr using valuecache |
| 938 | compress_value(cpuid, m64->addr); |
| 939 | compress_value(cpuid, m64->val); |
| 940 | |
| 941 | } |
| 942 | |
| 943 | } // compress_memval |
| 944 | |
| 945 | |
| 946 | void rstzip3::compress_rfs_cw(rstf_unionT * rstbuf, int idx) |
| 947 | { |
| 948 | rstf_cachewarmingT *cw = &(rstbuf[idx].cachewarming); |
| 949 | |
| 950 | // there is no architectural method to predict reftype. |
| 951 | sdata->bitarrays[rfs_cw_raw_reftype_array]->Push(cw->reftype); |
| 952 | |
| 953 | // dont predict cpuid |
| 954 | |
| 955 | int cpuid; |
| 956 | |
| 957 | if ((cw->reftype == cw_reftype_DMA_R) || (cw->reftype == cw_reftype_DMA_W)) { |
| 958 | cpuid = 0; |
| 959 | } else { |
| 960 | cpuid = rstf_cachewarmingT_get_cpuid(cw); |
| 961 | } |
| 962 | |
| 963 | if (tdata[cpuid] == NULL) { |
| 964 | // fprintf(stderr, "compress_rfs_cw: new cpuid %d\n", cpuid); |
| 965 | tdata[cpuid] = new rz3_percpu_data(cpuid); |
| 966 | } |
| 967 | |
| 968 | sdata->bitarrays[rfs_raw_cpuid_array]->Push(cpuid); |
| 969 | |
| 970 | if ((cw->reftype == cw_reftype_DMA_R)|| (cw->reftype == cw_reftype_DMA_W)) { |
| 971 | sdata->bitarrays[raw_value64_array]->Push(cw->pa); |
| 972 | |
| 973 | sdata->bitarrays[rfs_cw_dma_size_array]->Push(cw->refinfo.dma_size); |
| 974 | } else { |
| 975 | // asi |
| 976 | sdata->bitarrays[rfs_cw_asi_array]->Push(cw->refinfo.s.asi); |
| 977 | |
| 978 | // fcn |
| 979 | if (cw->reftype==cw_reftype_PF_D) { |
| 980 | sdata->bitarrays[rfs_cw_pf_fcn_array]->Push(cw->refinfo.s.fcn); |
| 981 | } |
| 982 | |
| 983 | // va_valid |
| 984 | sdata->bitarrays[rfs_cw_va_valid_array]->Push(cw->refinfo.s.va_valid); |
| 985 | |
| 986 | if (cw->refinfo.s.va_valid) { |
| 987 | |
| 988 | compress_value(cpuid, cw->va); |
| 989 | |
| 990 | // tlb hit/miss |
| 991 | uint64_t pred_pa; |
| 992 | if (cw->reftype == cw_reftype_I) { |
| 993 | pred_pa = tdata[cpuid]->itlb->get(cw->va>>13) << 13; |
| 994 | } else { |
| 995 | pred_pa = tdata[cpuid]->dtlb->get(cw->va>>13) << 13; |
| 996 | } |
| 997 | pred_pa |= (cw->va & 0x1fffull); |
| 998 | if (pred_pa != cw->pa) { |
| 999 | sdata->bitarrays[rfs_cw_pa_pred_array]->Push(0); |
| 1000 | sdata->bitarrays[raw_value64_array]->Push(cw->pa); |
| 1001 | |
| 1002 | if (cw->reftype == cw_reftype_I) { |
| 1003 | tdata[cpuid]->itlb->set(cw->va>>13, cw->pa>>13); |
| 1004 | } else { |
| 1005 | tdata[cpuid]->dtlb->set(cw->va>>13, cw->pa>>13); |
| 1006 | } |
| 1007 | } else { |
| 1008 | sdata->bitarrays[rfs_cw_pa_pred_array]->Push(1); |
| 1009 | } |
| 1010 | } else /* va invalid - no way to predict pa? */ { |
| 1011 | sdata->bitarrays[raw_value64_array]->Push(cw->pa); |
| 1012 | } |
| 1013 | } |
| 1014 | } // rstzip3::compress_rfs_cw(rstf_unionT * rstbuf, int idx) |
| 1015 | |
| 1016 | |
| 1017 | void rstzip3::compress_rfs_bt(rstf_unionT * rstbuf, int idx) |
| 1018 | { |
| 1019 | rstf_bpwarmingT * bt = &(rstbuf[idx].bpwarming); |
| 1020 | |
| 1021 | // a bt record consists of cpuid, taken, instr, pc_va, npc_va |
| 1022 | |
| 1023 | // no easy way to compress cpuid: store raw |
| 1024 | int cpuid = rstf_bpwarmingT_get_cpuid(bt); |
| 1025 | sdata->bitarrays[rfs_raw_cpuid_array]->Push(cpuid); |
| 1026 | if (tdata[cpuid] == NULL) { |
| 1027 | tdata[cpuid] = new rz3_percpu_data(cpuid); |
| 1028 | } |
| 1029 | |
| 1030 | // pc |
| 1031 | uint64_t pred_pc = tdata[cpuid]->rfs_pc_pred_table->get(tdata[cpuid]->rfs_prev_npc); |
| 1032 | if (pred_pc == bt->pc_va) { |
| 1033 | sdata->bitarrays[rfs_pc_pred_array]->Push(1); |
| 1034 | } else { |
| 1035 | sdata->bitarrays[rfs_pc_pred_array]->Push(0); |
| 1036 | sdata->bitarrays[raw_value64_array]->Push(bt->pc_va>>2); |
| 1037 | |
| 1038 | tdata[cpuid]->rfs_pc_pred_table->set(tdata[cpuid]->rfs_prev_npc, bt->pc_va); |
| 1039 | } |
| 1040 | |
| 1041 | // instr: use icache |
| 1042 | rz3iu_icache_data * icdata = tdata[cpuid]->icache->get(bt->pc_va); |
| 1043 | uint32_t instr = bt->instr; |
| 1044 | if ((icdata == NULL) || (icdata->instr != instr)) { |
| 1045 | // ic miss |
| 1046 | sdata->bitarrays[rfs_instr_pred_array]->Push(0); |
| 1047 | sdata->bitarrays[raw_instr_array]->Push(instr); |
| 1048 | icdata = tdata[cpuid]->icache->set(bt->pc_va, instr, rstzip3_major_version, rstzip3_minor_version); |
| 1049 | icdata->gen_target(bt->pc_va); |
| 1050 | } else { |
| 1051 | sdata->bitarrays[rfs_instr_pred_array]->Push(1); |
| 1052 | } |
| 1053 | |
| 1054 | // bt |
| 1055 | int bt_pred_hit; |
| 1056 | if (icdata->dinfo.flags.iscbranch) { |
| 1057 | bt_pred_hit = tdata[cpuid]->bp->pred_hit(bt->pc_va, bt->taken); |
| 1058 | if (!bt_pred_hit) perf_stats[ps_brpred_misses]++; |
| 1059 | } else if (icdata->dinfo.flags.isubranch && icdata->dinfo.flags.isubranch_nottaken) { |
| 1060 | bt_pred_hit = ! bt->taken; // in other words, we predict uncond nt branches as not taken. if the taken bit is 0, then our prediction is correct (1) and vice versa |
| 1061 | } else { |
| 1062 | bt_pred_hit = bt->taken; // in other words, we predict all other branches as taken |
| 1063 | } |
| 1064 | |
| 1065 | sdata->bitarrays[rfs_bt_pred_array]->Push(bt_pred_hit); |
| 1066 | |
| 1067 | // target |
| 1068 | uint64_t pred_npc_va; |
| 1069 | if (bt->taken) { |
| 1070 | pred_npc_va = icdata->target; |
| 1071 | } else { |
| 1072 | pred_npc_va = bt->pc_va + 8; |
| 1073 | } |
| 1074 | if (pred_npc_va == bt->npc_va) { |
| 1075 | sdata->bitarrays[dcti_ea_va_pred_array]->Push(1); |
| 1076 | } else { |
| 1077 | sdata->bitarrays[dcti_ea_va_pred_array]->Push(0); |
| 1078 | sdata->bitarrays[raw_value64_array]->Push(bt->npc_va); |
| 1079 | } |
| 1080 | |
| 1081 | tdata[cpuid]->rfs_prev_npc = bt->npc_va; |
| 1082 | |
| 1083 | tdata[cpuid]->pred_pc = tdata[cpuid]->rfs_pc_pred_table->get(bt->npc_va); |
| 1084 | |
| 1085 | } // rstzip3::compress_rstf_bt(rfs_unionT * rstbuf, int idx) |
| 1086 | |
| 1087 | |
| 1088 | |
| 1089 | // return true if could compress using valuecache |
| 1090 | bool rstzip3::compress_value(int cpuid, uint64_t v64) |
| 1091 | { |
| 1092 | if (tdata[cpuid] == NULL) { |
| 1093 | tdata[cpuid] = new rz3_percpu_data(cpuid); |
| 1094 | } |
| 1095 | |
| 1096 | uint64_t key; |
| 1097 | int level = tdata[cpuid]->valuecache->Ref(v64, key); |
| 1098 | sdata->bitarrays[valuecache_level_array]->Push(level); |
| 1099 | sdata->bitarrays[valuecache_data0_array+level]->Push(key); |
| 1100 | |
| 1101 | return (level < 7); |
| 1102 | } |
| 1103 | |
| 1104 | |
| 1105 | |
| 1106 | |
| 1107 | |
| 1108 | #if 0 // leave this obsolete code in here. it is useful for making sense of the decompress_pavadiff_v315 code in decompress_engine.C |
| 1109 | void rstzip3::compress_pavadiff_v315(rstf_unionT * rstbuf, int idx) |
| 1110 | { |
| 1111 | rstf_pavadiffT * dr = &(rstbuf[idx].pavadiff); |
| 1112 | int cpuid = rstf_pavadiffT_get_cpuid(dr); |
| 1113 | |
| 1114 | // check and predict cpuid |
| 1115 | if (pred_cpuid == cpuid) { |
| 1116 | sdata->bitarrays[cpuid_pred_array]->Push(1); |
| 1117 | } else { |
| 1118 | sdata->bitarrays[cpuid_pred_array]->Push(0); |
| 1119 | sdata->bitarrays[raw_cpuid_array]->Push(cpuid); |
| 1120 | } |
| 1121 | pred_cpuid = cpuid; |
| 1122 | |
| 1123 | if (tdata[cpuid] == NULL) { |
| 1124 | tdata[cpuid] = new rz3_percpu_data(cpuid); |
| 1125 | } |
| 1126 | |
| 1127 | // predict icontext the same as prev icontext |
| 1128 | if (tdata[cpuid]->pred_icontext == dr->icontext) { |
| 1129 | sdata->bitarrays[pavadiff_ictxt_pred_array]->Push(1); |
| 1130 | } else { |
| 1131 | sdata->bitarrays[pavadiff_ictxt_pred_array]->Push(0); |
| 1132 | sdata->bitarrays[pavadiff_raw_ictxt_array]->Push(dr->icontext); |
| 1133 | tdata[cpuid]->pred_icontext = dr->icontext; |
| 1134 | } |
| 1135 | |
| 1136 | // dcontext - predict same as prev dcontext for this cpu |
| 1137 | if (tdata[cpuid]->pred_dcontext == dr->dcontext) { |
| 1138 | sdata->bitarrays[pavadiff_dctxt_pred_array]->Push(1); |
| 1139 | } else { |
| 1140 | sdata->bitarrays[pavadiff_dctxt_pred_array]->Push(0); |
| 1141 | sdata->bitarrays[pavadiff_raw_dctxt_array]->Push(dr->dcontext); |
| 1142 | tdata[cpuid]->pred_dcontext = dr->dcontext; |
| 1143 | } |
| 1144 | |
| 1145 | bool found_pc_va = false; |
| 1146 | uint64_t nextpc_va; |
| 1147 | bool found_ea_va = false; |
| 1148 | uint64_t nextea_va; |
| 1149 | |
| 1150 | int i; |
| 1151 | for (i=idx+1; i<shdr->nrecords; i++) { |
| 1152 | if (rstbuf[i].proto.rtype == INSTR_T) { |
| 1153 | if (rstf_instrT_get_cpuid(&rstbuf[i].instr) == cpuid) { |
| 1154 | nextpc_va = rstbuf[i].instr.pc_va; |
| 1155 | found_pc_va = (nextpc_va != 0x0); |
| 1156 | if (dr->ea_valid && rstbuf[i].instr.ea_valid) { // we only care about ea_va if dr->ea_valid |
| 1157 | nextea_va = rstbuf[i].instr.ea_va; |
| 1158 | found_ea_va = (nextea_va != 0x0); |
| 1159 | } |
| 1160 | } // if cpuid match |
| 1161 | break; |
| 1162 | } // if instr |
| 1163 | } // for each subsequent record |
| 1164 | |
| 1165 | // ea_valid |
| 1166 | sdata->bitarrays[pavadiff_ea_valid_array]->Push(dr->ea_valid); |
| 1167 | |
| 1168 | bool pc_pa_va_hit; |
| 1169 | bool ea_pa_va_hit; |
| 1170 | |
| 1171 | uint64_t pred_pa_va_diff; |
| 1172 | |
| 1173 | if (found_pc_va) { |
| 1174 | pred_pa_va_diff = tdata[cpuid]->itlb->get(nextpc_va >> 13); |
| 1175 | } else { |
| 1176 | pred_pa_va_diff = 42; // some nonsensical value |
| 1177 | } |
| 1178 | |
| 1179 | if (pred_pa_va_diff == (dr->pc_pa_va>>13)) { |
| 1180 | sdata->bitarrays[pavadiff_pc_pa_va_pred_array]->Push(1); |
| 1181 | pc_pa_va_hit = true; |
| 1182 | } else { |
| 1183 | sdata->bitarrays[pavadiff_pc_pa_va_pred_array]->Push(0); |
| 1184 | sdata->bitarrays[raw_value64_array]->Push(dr->pc_pa_va); |
| 1185 | |
| 1186 | if (found_pc_va) { |
| 1187 | tdata[cpuid]->itlb->set(nextpc_va>>13, dr->pc_pa_va>>13); |
| 1188 | pc_pa_va_hit = false; |
| 1189 | } |
| 1190 | } |
| 1191 | |
| 1192 | |
| 1193 | if (dr->ea_valid) { |
| 1194 | |
| 1195 | // ea_pa_va - use next instr (if available) and a tlb simulator |
| 1196 | if (found_ea_va) { |
| 1197 | // tlb lookup |
| 1198 | pred_pa_va_diff = tdata[cpuid]->dtlb->get(nextea_va >> 13); |
| 1199 | } else { |
| 1200 | pred_pa_va_diff = 42; // some nonsensical value |
| 1201 | } |
| 1202 | |
| 1203 | if (pred_pa_va_diff == (dr->ea_pa_va >> 13)) { |
| 1204 | sdata->bitarrays[pavadiff_ea_pa_va_pred_array]->Push(1); |
| 1205 | ea_pa_va_hit = true; |
| 1206 | } else { |
| 1207 | sdata->bitarrays[pavadiff_ea_pa_va_pred_array]->Push(0); |
| 1208 | sdata->bitarrays[raw_value64_array]->Push(dr->ea_pa_va); |
| 1209 | |
| 1210 | if (found_ea_va) { |
| 1211 | tdata[cpuid]->dtlb->set((nextea_va >> 13), (dr->ea_pa_va >> 13)); |
| 1212 | ea_pa_va_hit = false; |
| 1213 | } |
| 1214 | } |
| 1215 | } else { |
| 1216 | ea_pa_va_hit = false; |
| 1217 | } // if ea_valid |
| 1218 | |
| 1219 | // the lookahead flag tells the decompressor to look for the next instr (to update the tlb) |
| 1220 | // if we predicted pc_pa_va and/or ea_pa_va correctly, the decompressor knows from the pred bit to lookahead. |
| 1221 | // we set the lookahead flag so that the decomprssor knows the difference between no prediction (could not find corresponding instr) and misprediction |
| 1222 | |
| 1223 | if ((found_pc_va && pc_pa_va_hit) || (dr->ea_valid && found_ea_va && ea_pa_va_hit)) { |
| 1224 | // dont need lookahead since the pc_pa_va_pred_array and/or the ea_pa_va_pred_array will indicate lookahead |
| 1225 | } else { |
| 1226 | // we need to indicate whether there was no prediction or misprediction(s) |
| 1227 | int lookahead_flag = (found_pc_va || found_ea_va); |
| 1228 | sdata->bitarrays[pavadiff_lookahead_array]->Push(lookahead_flag); |
| 1229 | } |
| 1230 | } // rstzip3::compress_pavadiff() |
| 1231 | #endif // #if 0 (obsolete code - left here as a reference for the corresponding decompress code |
| 1232 | |
| 1233 | |
| 1234 | |