Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / rst / rstzip3 / rstzip_v3 / compress_engine.C
CommitLineData
920dae64
AT
1// ========== Copyright Header Begin ==========================================
2//
3// OpenSPARC T2 Processor File: compress_engine.C
4// Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
5// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
6//
7// The above named program is free software; you can redistribute it and/or
8// modify it under the terms of the GNU General Public
9// License version 2 as published by the Free Software Foundation.
10//
11// The above named program is distributed in the hope that it will be
12// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14// General Public License for more details.
15//
16// You should have received a copy of the GNU General Public
17// License along with this work; if not, write to the Free Software
18// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19//
20// ========== Copyright Header End ============================================
21/* compress_engine.C */
22
23#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
26#include <assert.h>
27
28#include "rstf/rstf.h"
29
30#if defined(ARCH_AMD64)
31#include "rstf/rstf_convert.h"
32#endif
33
34#include "rstzip3.h"
35#include "rz3_section.h"
36
37#include "rz3iu.h"
38
39/* debug stuff */
40static const bool dbg_ras = false;
41static const bool dbg_regid = false;
42
43
44// rstbufsize <= rz3_bufsize
45int rstzip3::compress_buffer(rstf_unionT * rstbuf, int rstbufsize)
46{
47
48 shdr->clear();
49 sdata->clear();
50
51 // set shdr->clearflag if records_since_prev_clear >= clear_interval
52 // clear predictor tables in tdata if shdr->clearflag is set
53
54 // if (verbose) clear_stats();
55 clear_stats();
56
57 // write record count to header
58 shdr->nrecords = rstbufsize;
59
60 int i;
61 for (i=0; i<rstbufsize; i++) {
62 if (rfs_phase) {
63 if (rfs_cw_phase) {
64 if (rstbuf[i].proto.rtype == RFS_CW_T) {
65 sdata->bitarrays[rfs_rtype_pred_array]->Push(1);
66 rfs_records_seen++;
67 if (rfs_records_seen == rfs_nrecords) {
68 rfs_phase = rfs_cw_phase = false;
69 }
70 } else /* rfs cw rtype mispred */ {
71 sdata->bitarrays[rfs_rtype_pred_array]->Push(0);
72 sdata->bitarrays[rtype_array]->Push(rstbuf[i].proto.rtype);
73 rfs_phase = rfs_cw_phase = false;
74 } // rfs cw rtype pred
75 } else if (rfs_bt_phase) {
76 if (rstbuf[i].proto.rtype == RFS_BT_T) {
77 sdata->bitarrays[rfs_rtype_pred_array]->Push(1);
78 rfs_records_seen++;
79 if (rfs_records_seen == rfs_nrecords) {
80 rfs_phase = rfs_bt_phase = false;
81 }
82 } else /* rfs bt rtype mispred */ {
83 sdata->bitarrays[rfs_rtype_pred_array]->Push(0);
84 sdata->bitarrays[rtype_array]->Push(rstbuf[i].proto.rtype);
85 rfs_phase = rfs_bt_phase = false;
86 } // rfs bt rtype pred
87 } // which rfs phase? */
88 } else /* regular rst phase */ {
89 // rtype compression
90 if (rstbuf[i].proto.rtype == INSTR_T) {
91 sdata->bitarrays[rtype_key_array]->Push(rtype_key_INSTR);
92 } else if (rstbuf[i].proto.rtype == REGVAL_T) {
93 sdata->bitarrays[rtype_key_array]->Push(rtype_key_REGVAL);
94 } else if (rstbuf[i].proto.rtype == PAVADIFF_T) {
95 sdata->bitarrays[rtype_key_array]->Push(rtype_key_PAVADIFF);
96 } else {
97 sdata->bitarrays[rtype_key_array]->Push(rtype_key_RAW);
98 sdata->bitarrays[rtype_array]->Push(rstbuf[i].proto.rtype);
99 }
100 } // phase: rfs cw, rfs bt or regular rst?
101
102 switch(rstbuf[i].proto.rtype) {
103 case INSTR_T:
104 compress_inst(rstbuf, i);
105 break;
106 case REGVAL_T:
107 compress_regval(rstbuf, i);
108 break;
109 case PAVADIFF_T:
110 compress_pavadiff(rstbuf, i);
111 break;
112 case TLB_T:
113 compress_tlb(rstbuf, i);
114 break;
115 case PREG_T:
116 compress_preg(rstbuf, i);
117 break;
118 case TRAP_T:
119 compress_trap(rstbuf, i);
120 break;
121 case DMA_T:
122 compress_dma(rstbuf, i);
123 break;
124 case MEMVAL_T:
125 compress_memval(rstbuf, i);
126 break;
127 case RFS_CW_T:
128 if ((rfs_records_seen == 0) && ! rfs_cw_phase) {
129 // in case there was no rfs preamble, section header etc.
130 rfs_phase = rfs_cw_phase = true;
131 rfs_nrecords = rfs_unknown_nrecords;
132 rfs_records_seen = 1;
133 }
134 compress_rfs_cw(rstbuf, i);
135 break;
136 case RFS_BT_T:
137 if ((rfs_records_seen == 0) && ! rfs_bt_phase) {
138 // in case there was no rfs preamble, section header etc.
139 rfs_phase = rfs_bt_phase = true;
140 rfs_nrecords = rfs_unknown_nrecords;
141 rfs_records_seen = 1;
142 }
143 compress_rfs_bt(rstbuf, i);
144 break;
145
146 case RSTHEADER_T:
147 // write raw records to output
148#if defined(ARCH_AMD64)
149 {
150 rstf_unionT temp;
151 memcpy(&temp, &rstbuf[i], sizeof(rstf_unionT));
152 rstf_convertT::l2b((rstf_uint8T*)&temp);
153 sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[0]));
154 sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[1]));
155 sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[2]));
156 }
157#else
158 sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[0]);
159 sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[1]);
160 sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[2]);
161#endif
162 if (rstbuf[i].header.majorVer*1000+rstbuf[i].header.minorVer <= 2011) {
163 rstf_pre212 = true;
164 }
165
166 break;
167
168 default:
169 // write raw records to output
170#if defined(ARCH_AMD64)
171 {
172 rstf_unionT temp;
173 memcpy(&temp, &rstbuf[i], sizeof(rstf_unionT));
174 rstf_convertT::l2b((rstf_uint8T*)&temp);
175 sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[0]));
176 sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[1]));
177 sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[2]));
178 }
179#else
180 sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[0]);
181 sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[1]);
182 sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[2]);
183#endif
184
185 if (rstbuf[i].proto.rtype == RFS_SECTION_HEADER_T) {
186 if (rstbuf[i].rfs_section_header.section_type == RFS_CW_T) {
187 rfs_phase = rfs_cw_phase = true;
188 rfs_nrecords = rstbuf[i].rfs_section_header.n_records;
189 rfs_records_seen = 0;
190 } else if (rstbuf[i].rfs_section_header.section_type == RFS_BT_T) {
191 rfs_phase = rfs_bt_phase = true;
192 rfs_nrecords = rstbuf[i].rfs_section_header.n_records;
193 rfs_records_seen = 0;
194 } // else - do nothing
195 } // if rfs section header
196
197 break;
198 } // what rtype? */
199
200 prev_rtype = rstbuf[i].proto.rtype;
201 } // for each record
202
203 sdata->update_counts();
204
205 if (stats) update_stats();
206
207 if (! shdr->write(gzf)) {
208 perror("ERROR: rstzip3::compress_Buffer(): could not write section header to output file\n");
209 return 0;
210 }
211
212 if (! sdata->write(gzf)) {
213 perror("ERROR: rstzip3::compress_buffer(): could not write section data to output file\n");
214 return 0;
215 }
216
217
218 if (verbose) {
219 fprintf(stderr, "Section %d\n", nsections);
220 sdata->print();
221 }
222
223 if (stats) print_stats();
224
225 nsections++;
226
227 return rstbufsize;
228} // rstzip3::compress_buffer
229
230
231static bool ds_indicates_tail_call(uint32_t instr) {
232 return (instr == MOV_G1_G7_INSTR) || ((instr & RESTORE_OPCODE_MASK) == RESTORE_OPCODE_BITS);
233}
234
235void rstzip3::compress_inst(rstf_unionT * rstbuf, int idx)
236{
237
238 rstf_instrT *ir = &(rstbuf[idx].instr);
239
240 // check cpuid
241 uint16_t cpuid = rstf_pre212 ? ir->cpuid : rstf_instrT_get_cpuid(ir);
242 if (pred_cpuid == cpuid) {
243 sdata->bitarrays[cpuid_pred_array]->Push(1);
244 } else {
245 sdata->bitarrays[cpuid_pred_array]->Push(0);
246 sdata->bitarrays[raw_cpuid_array]->Push(cpuid);
247 }
248
249 // predict cpuid. assume round robin. FIXME: for now, assump uP traces
250 if (tdata[cpuid+1] == NULL) {
251 pred_cpuid = 0;
252 } else {
253 pred_cpuid = cpuid+1;
254 }
255 last_instr_cpuid = cpuid;
256
257 if (tdata[cpuid] == NULL) {
258 tdata[cpuid] = new rz3_percpu_data(cpuid);
259 }
260
261 instr_preds = instr_mispred_none;
262
263 // amask bit: if amask is 0, all 64-bits of pred_pc are used. if not, only the lower 32-bits are used
264 // we check and set the amask bit on a pc misprediction. if the misprediction leaves the lower 32-bits unchanged
265 // but differs in the upper 32-bits, we set/clear amask accordingly
266 // check pc
267 uint64_t pc = ir->pc_va;
268 uint64_t pred_pc = tdata[cpuid]->pred_pc;
269 bool pc_pred = (pred_pc == ir->pc_va);
270 if (!pc_pred) {
271 instr_preds &= instr_mispred_pc;
272
273 sdata->bitarrays[raw_value64_array]->Push(pc);
274
275 // is our amask to blame?
276 if ((pc & rz3_amask_mask) == (pred_pc & rz3_amask_mask)) {
277 // lower 32 bits match
278 if ((pc >> 32) != 0) {
279 // if amask was 1, it should be 0. if it was already zero, amask is not to blame, but set it to 0 anyway
280 tdata[cpuid]->pred_amask = 0;
281 } else {
282 // if amask was 0, it should be 1. if it was already 1, we shouldn't be here.
283 if (0 && tdata[cpuid]->pred_amask) {
284 fprintf(stderr, "rz3: compress_inst: amask was set but predicted pc was > 32 bits: pred_pc %llx actual %llx\n", pred_pc, pc);
285 }
286 tdata[cpuid]->pred_amask = 1;
287 }
288 }
289
290 tdata[cpuid]->pred_npc = pc+4;
291 }
292
293 // (pc, npc) <= (npc, npc+4)
294 tdata[cpuid]->pred_pc = tdata[cpuid]->pred_npc;
295 tdata[cpuid]->pred_npc += 4; // this may be updated later in case of CTIs
296
297 tdata[cpuid]->prev_pc = pc;
298
299 // check annul bit
300 if (tdata[cpuid]->pred_an != ir->an) {
301 instr_preds &= instr_mispred_an;
302 perf_stats[ps_an_misses]++;
303 // sdata->an_mispred_count++;
304 }
305
306 // predict and check instr
307 rz3iu_icache_data * icdata = tdata[cpuid]->icache->get(pc);
308 uint32_t instr = ir->instr;
309 if ((icdata == NULL) || (icdata->instr != ir->instr)) {
310 // ic miss
311 instr_preds &= instr_mispred_instr;
312
313 sdata->bitarrays[raw_instr_array]->Push(instr);
314
315 icdata = tdata[cpuid]->icache->set(pc, instr, rstzip3_major_version, rstzip3_minor_version);
316
317 if ((!ir->an) && icdata->dinfo.flags.isdcti) {
318 icdata->gen_target(pc);
319 }
320 }
321 tdata[cpuid]->last_instr = ir->an ? 0x0 : instr;
322
323 // if this is a delay slot of a call instr, we need to pop ras if "restore" or mov_g1_g7 instr
324 if (tdata[cpuid]->call_delay_slot) {
325 if ( ((instr & RESTORE_OPCODE_MASK) == RESTORE_OPCODE_BITS) || (instr == MOV_G1_G7_INSTR) ) {
326 tdata[cpuid]->ras->pop();
327 }
328 tdata[cpuid]->call_delay_slot = false;
329 }
330
331
332 // tr and pr bits.
333 // predict and set tr BEFORE decompress_ea_va because ea_valid prediction depends on the tr bit
334 // tr is usually 0. we follow the convention of
335 // inserting all 1's where possible. so we *invert* the tr bit
336 if (ir->tr) {
337 instr_preds &= instr_mispred_tr;
338 }
339
340 // for the hpriv bit, we predict it based on the previous instr
341 // this is new in v3.20 and up
342 uint32_t hpriv = rstf_pre212 ? 0 : ir->hpriv;
343 if (hpriv != tdata[cpuid]->pred_hpriv) {
344 instr_preds &= instr_mispred_hpriv;
345 tdata[cpuid]->pred_hpriv = hpriv;
346 if (hpriv) {
347 tdata[cpuid]->pred_pr = 0;
348 }
349 }
350
351 // for the pr bit, we predict it based on the previous instr
352 if (ir->pr != tdata[cpuid]->pred_pr) {
353 instr_preds &= instr_mispred_pr;
354 tdata[cpuid]->pred_pr = ir->pr;
355 }
356
357 // predict ea_valid, ea_va, bt, NEXT-instr an
358
359 if (!ir->an) {
360 if (icdata->dinfo.flags.isdcti) {
361
362 compress_dcti(rstbuf, idx, icdata);
363
364 } else /* not dcti */ {
365
366 // predict bt == 0
367 int pred_bt = icdata->dinfo.flags.is_done_retry;
368 if (pred_bt != ir->bt) {
369 instr_preds &= instr_mispred_bt;
370 }
371
372 // ea_valid=1 for ld/st/pf
373 int pred_ea_valid;
374 if (icdata->is_ldstpf) {
375 // FIXME: make sure this is not an internal ASI
376 pred_ea_valid = 1;
377 } else if (icdata->dinfo.flags.is_done_retry) {
378 pred_ea_valid = 1;
379 } else if (ir->tr) {
380 pred_ea_valid = 1;
381 } else {
382 pred_ea_valid = 0;
383 }
384
385 if (pred_ea_valid != ir->ea_valid) {
386 instr_preds &= instr_mispred_ea_valid;
387 perf_stats[ps_ea_valid_misses]++;
388 }
389
390 if (ir->ea_valid) {
391 compress_ea_va(rstbuf, idx);
392 }
393
394 tdata[cpuid]->pred_an = 0;
395 } // dcti?
396 } // if not annulled
397
398 if (instr_preds == instr_mispred_none) {
399 sdata->bitarrays[instr_pred_all_array]->Push(1);
400 } else {
401 sdata->bitarrays[instr_pred_all_array]->Push(0);
402 sdata->bitarrays[instr_pred_raw_array]->Push(instr_preds);
403 }
404
405} // rstzip3::compress_inst()
406
407
408
409void rstzip3::compress_ea_va(rstf_unionT * rstbuf, int idx)
410{
411 rstf_instrT * ir = &(rstbuf[idx].instr);
412 uint16_t cpuid = rstf_pre212 ? ir->cpuid : rstf_instrT_get_cpuid(ir);
413
414 // if value trace: predict ea using known reg values
415
416 // predict ea using the rz3 value cache
417 compress_value(cpuid, ir->ea_va);
418} // rstzip3::compress_ea_va
419
420void rstzip3::compress_pavadiff(rstf_unionT * rstbuf, int idx)
421{
422 if (0 && idx == 102577) {
423 printf("debug: decompress_pavadiff idx %d\n", idx);
424 }
425
426 rstf_pavadiffT * dr = &(rstbuf[idx].pavadiff);
427 int cpuid = rstf_pre212 ? dr->cpuid : rstf_pavadiffT_get_cpuid(dr);
428
429 // check and predict cpuid
430 if (pred_cpuid == cpuid) {
431 sdata->bitarrays[cpuid_pred_array]->Push(1);
432 } else {
433 sdata->bitarrays[cpuid_pred_array]->Push(0);
434 sdata->bitarrays[raw_cpuid_array]->Push(cpuid);
435 }
436 pred_cpuid = cpuid;
437
438 if (tdata[cpuid] == NULL) {
439 tdata[cpuid] = new rz3_percpu_data(cpuid);
440 }
441
442 // predict icontext the same as prev icontext
443 if (tdata[cpuid]->pred_icontext == dr->icontext) {
444 sdata->bitarrays[pavadiff_ictxt_pred_array]->Push(1);
445 } else {
446 sdata->bitarrays[pavadiff_ictxt_pred_array]->Push(0);
447 sdata->bitarrays[pavadiff_raw_ictxt_array]->Push(dr->icontext);
448 tdata[cpuid]->pred_icontext = dr->icontext;
449 }
450
451 // dcontext - predict same as prev dcontext for this cpu
452 if (tdata[cpuid]->pred_dcontext == dr->dcontext) {
453 sdata->bitarrays[pavadiff_dctxt_pred_array]->Push(1);
454 } else {
455 sdata->bitarrays[pavadiff_dctxt_pred_array]->Push(0);
456 sdata->bitarrays[pavadiff_raw_dctxt_array]->Push(dr->dcontext);
457 tdata[cpuid]->pred_dcontext = dr->dcontext;
458 }
459
460 bool found_pc_va = false;
461 uint64_t nextpc_va;
462 bool found_ea_va = false;
463 uint64_t nextea_va;
464
465 int i;
466 for (i=idx+1; i<shdr->nrecords; i++) {
467 if (rstbuf[i].proto.rtype == INSTR_T) {
468 rstf_instrT * ir = &(rstbuf[i].instr);
469 uint16_t i_cpuid = rstf_pre212 ? ir->cpuid : rstf_instrT_get_cpuid(ir);
470
471 if (i_cpuid == cpuid) {
472 nextpc_va = ir->pc_va;
473 found_pc_va = true;
474 if (dr->ea_valid && ir->ea_valid) { // we only care about ea_va if dr->ea_valid
475 nextea_va = ir->ea_va;
476 found_ea_va = true;
477 }
478 } // if cpuid match
479 break;
480 } else if (rstbuf[i].proto.rtype == PAVADIFF_T) {
481 rstf_pavadiffT * pd = &(rstbuf[i].pavadiff);
482 uint16_t pd_cpuid = rstf_pre212 ? pd->cpuid : rstf_pavadiffT_get_cpuid(pd);
483 if (pd_cpuid == cpuid) {
484 // We ran into a second pavadiff record before seeing an instr record.
485 // flag this as a no-pred (hence no lookahead).
486 // If we don't do this, the decompression algorithm will break
487 // because we only have a 1 item limit on the number of pending
488 // pavadiffs to patch, and patching this pavadiff will break the next one.
489 break;
490 }
491 } // if instr or pavadiff
492 } // for each subsequent record
493
494 // ea_valid
495 sdata->bitarrays[pavadiff_ea_valid_array]->Push(dr->ea_valid);
496
497 bool pc_pa_va_hit = false;
498 bool ea_pa_va_hit = false;
499
500 uint64_t pred_pa_va_diff;
501
502 if (found_pc_va) {
503 pred_pa_va_diff = tdata[cpuid]->itlb->get(nextpc_va >> 13);
504 if (pred_pa_va_diff == (dr->pc_pa_va >> 13)) {
505 pc_pa_va_hit = true;
506 }
507 }
508
509 if (pc_pa_va_hit) {
510 sdata->bitarrays[pavadiff_pc_pa_va_pred_array]->Push(1);
511 } else {
512 sdata->bitarrays[pavadiff_pc_pa_va_pred_array]->Push(0);
513 sdata->bitarrays[raw_value64_array]->Push(dr->pc_pa_va);
514
515
516 if (found_pc_va) {
517 if (0) printf("%d: cpu%d itlb update: %llx => %llx\n", idx, cpuid, nextpc_va, dr->pc_pa_va);
518 tdata[cpuid]->itlb->set(nextpc_va>>13, dr->pc_pa_va>>13);
519 }
520 }
521
522
523 if (dr->ea_valid) {
524 // ea_pa_va - use next instr (if available) and a tlb simulator
525 if (found_ea_va) {
526 // tlb lookup
527 pred_pa_va_diff = tdata[cpuid]->dtlb->get(nextea_va >> 13);
528 if (pred_pa_va_diff == (dr->ea_pa_va >> 13)) {
529 ea_pa_va_hit = true;
530 }
531 }
532
533 if (ea_pa_va_hit) {
534 sdata->bitarrays[pavadiff_ea_pa_va_pred_array]->Push(1);
535 } else {
536 sdata->bitarrays[pavadiff_ea_pa_va_pred_array]->Push(0);
537 sdata->bitarrays[raw_value64_array]->Push(dr->ea_pa_va);
538
539 if (found_ea_va) {
540 if (0) printf("%d: cpu%d dtlb update: %llx => %llx\n", idx, cpuid, nextea_va, dr->ea_pa_va);
541 tdata[cpuid]->dtlb->set((nextea_va >> 13), (dr->ea_pa_va >> 13));
542 }
543 }
544 }
545
546 // the lookahead flag tells the decompressor to look for the next instr (to update the tlb)
547 // if we predicted pc_pa_va and/or ea_pa_va correctly, the decompressor knows from the pred bit to lookahead.
548 // we set the lookahead flag so that the decomprssor knows the difference between no prediction (could not find corresponding instr) and misprediction
549
550 if ((found_pc_va && pc_pa_va_hit) || (dr->ea_valid && found_ea_va && ea_pa_va_hit)) {
551 // dont need lookahead flag since the pc_pa_va_pred flag and/or the ea_pa_va_pred flag will indicate lookahead
552 } else {
553 // we need to indicate whether there was no prediction or misprediction(s)
554 int lookahead_flag = (found_pc_va || found_ea_va);
555 sdata->bitarrays[pavadiff_lookahead_array]->Push(lookahead_flag);
556 }
557} // void rstzip3::compress_pavadiff(rstf_unionT * rstbuf, int idx)
558
559
560// predict bt, ea_valid, ea_va, NEXT-instr an for a dcti instr. also set pred_npc
561void rstzip3::compress_dcti(rstf_unionT * rstbuf, int idx, rz3iu_icache_data * icdata)
562{
563 rstf_instrT * ir = &(rstbuf[idx].instr);
564 uint16_t cpuid = rstf_pre212 ? ir->cpuid : rstf_instrT_get_cpuid(ir);
565 uint64_t pc = ir->pc_va;
566
567 int bt_pred_hit;
568
569 if (icdata->dinfo.flags.iscbranch) {
570
571 // use branch predictor
572 bt_pred_hit = tdata[cpuid]->bp->pred_hit(pc, ir->bt);
573 perf_stats[ps_brpred_refs]++;
574 if (!bt_pred_hit) {
575 perf_stats[ps_brpred_misses]++;
576 }
577
578 if (ir->bt) {
579 tdata[cpuid]->pred_npc = icdata->target;
580 if (tdata[cpuid]->pred_amask) {
581 tdata[cpuid]->pred_npc &= rz3_amask_mask;
582 }
583 } // else - pred_npc is already set to pc+8
584
585 } else if (icdata->dinfo.flags.isubranch && ! icdata->dinfo.flags.isubranch_nottaken) {
586
587 // pred_npc is branch target
588 bt_pred_hit = ir->bt; // we predict taken. if not taken, we mispredict
589 tdata[cpuid]->pred_npc = icdata->target;
590 if (tdata[cpuid]->pred_amask) {
591 tdata[cpuid]->pred_npc &= rz3_amask_mask;
592 }
593 } else if (icdata->dinfo.flags.iscall) {
594
595 bt_pred_hit = ir->bt;
596 tdata[cpuid]->pred_npc = icdata->target;
597 if (tdata[cpuid]->pred_amask) {
598 tdata[cpuid]->pred_npc &= rz3_amask_mask;
599 }
600 // push pc to ras unless following (delay slot) instr is restore
601 tdata[cpuid]->ras->push(pc);
602 tdata[cpuid]->call_delay_slot = true;
603
604 } else if (icdata->dinfo.flags.isindirect) {
605
606 bt_pred_hit = ir->bt;
607 // if jmpl, use prediction table
608 // if ret/retl, use RAS
609 if (icdata->dinfo.flags.is_ret|icdata->dinfo.flags.is_retl) {
610
611 perf_stats[ps_ras_refs]++;
612 tdata[cpuid]->pred_npc = tdata[cpuid]->ras->pop() + 8;
613
614 if (tdata[cpuid]->pred_amask) {
615 tdata[cpuid]->pred_npc &= rz3_amask_mask;
616 }
617 if (tdata[cpuid]->pred_npc == ir->ea_va) {
618 } else {
619 tdata[cpuid]->ras->clear();
620 perf_stats[ps_ras_misses]++;
621 }
622
623 } else if ( ((ir->instr >> 25) & 0x1f) == 15 ) {
624
625 // push unless following (delay-slot) instr is restore
626 tdata[cpuid]->ras->push(pc);
627 tdata[cpuid]->call_delay_slot = true;
628
629 tdata[cpuid]->pred_npc = tdata[cpuid]->jmpl_table->get(pc >> 2);
630 if (tdata[cpuid]->pred_amask) {
631 tdata[cpuid]->pred_npc &= rz3_amask_mask;
632 }
633 if (tdata[cpuid]->pred_npc != ir->ea_va) { // we are going to see an ea_va misprediction (pred_ea_va is set to pred_npc for dctis)
634 tdata[cpuid]->jmpl_table->set(pc>>2, ir->ea_va);
635 }
636
637 } // is this a ret/retl or indirect call?
638 /* else do nothing */
639 } else {
640 bt_pred_hit = ! ir->bt;
641 } // what type of dcti?
642
643 // bt pred
644 if (!bt_pred_hit) {
645 instr_preds &= instr_mispred_bt;
646 }
647
648 // ea_valid pred: predict ea_valid is true
649 if (!ir->ea_valid) {
650 instr_preds &= instr_mispred_ea_valid;
651 perf_stats[ps_ea_valid_misses]++;
652 }
653
654 // ea_va: predict pred_npc is ea_va
655 if (tdata[cpuid]->pred_npc == ir->ea_va) {
656 sdata->bitarrays[dcti_ea_va_pred_array]->Push(1);
657 } else {
658 sdata->bitarrays[dcti_ea_va_pred_array]->Push(0);
659 sdata->bitarrays[raw_value64_array]->Push(ir->ea_va);
660
661 // at this point we know the real ea_va. predict npc=ea_va
662 tdata[cpuid]->pred_npc = ir->ea_va;
663 }
664
665 // annul flag for *next* instr
666 if (icdata->dinfo.flags.annul_flag) {
667 if ((icdata->dinfo.flags.iscbranch && !ir->bt) || icdata->dinfo.flags.isubranch) {
668 tdata[cpuid]->pred_an = 1;
669 }
670 }
671
672} // rstzip3::compress_dcti()
673
674
675// theres not much room for architectural compression
676// here, except in case of value traces. all we do here
677// is not store rtype and unused fields.
678void rstzip3::compress_tlb(rstf_unionT * rstbuf, int idx)
679{
680 rstf_tlbT *tr = &(rstbuf[idx].tlb);
681 // pack demap(25), tlb_index(24:9), tlb_type(8), tlb_no(7:6), cpuid(5:0) into a single
682 // 26-bit field. we thus save only 38 bits/tlb record.
683 // pack demap(29), tlb_index(28:13), tlb_type(12), tlb_no(11:10), cpuid(9:0) into a single
684 // 30-bit field. we thus save only 34 bits/tlb record.
685 int cpuid = rstf_pre212 ? tr->cpuid : rstf_tlbT_get_cpuid(tr);
686
687 uint32_t tlb_info = (tr->demap<<29) | (((uint32_t)tr->tlb_index) << 13) | (tr->tlb_type << 12)
688 | (tr->tlb_no << 10) | cpuid;
689 sdata->bitarrays[tlb_info_array]->Push(tlb_info);
690
691 sdata->bitarrays[raw_value64_array]->Push(tr->tte_tag);
692 sdata->bitarrays[raw_value64_array]->Push(tr->tte_data);
693
694
695} // void rstzip3::compress_tlb(rstf_unionT * rstbuf, int idx)
696
697
698// try to predict pc and npc.
699// at the time of this writing, trap records occur *before* the
700// instr record at the time the trap occurred.
701// For future RST versions, we will change this assumption if necessary
702void rstzip3::compress_trap(rstf_unionT * rstbuf, int idx)
703{
704 rstf_trapT * tr = &(rstbuf[idx].trap);
705
706 // predict cpuid as the predicted cpuid of the next instr
707 int cpuid = rstf_pre212 ? tr->cpuid : rstf_trapT_get_cpuid(tr);
708
709 if (cpuid == pred_cpuid) {
710 sdata->bitarrays[cpuid_pred_array]->Push(1);
711 } else {
712 sdata->bitarrays[cpuid_pred_array]->Push(0);
713 sdata->bitarrays[raw_cpuid_array]->Push(cpuid);
714 }
715
716 if (tdata[cpuid] == NULL) {
717 tdata[cpuid] = new rz3_percpu_data(cpuid);
718 }
719
720 // put is_async(48), tl(47:44), ttype(43:34), pstate(33:18), syscall(17:2), pc_pred(1), npc_pred(0)
721 // in one 49-bit field
722 uint64_t trap_info = (((uint64_t)tr->is_async) << 48) | (((uint64_t)tr->tl) << 44) | (((uint64_t)tr->ttype) << 34) |
723 (((uint64_t)tr->pstate) << 18) | (((uint64_t)tr->syscall) << 2);
724
725 uint64_t pred_pc = tdata[cpuid]->pred_pc;
726 uint64_t pred_npc;
727 if (tr->pc == pred_pc) {
728 trap_info |= 2ull;
729 pred_npc = tdata[cpuid]->pred_npc;
730 } else {
731 sdata->bitarrays[raw_value64_array]->Push(tr->pc);
732
733 pred_npc = tr->pc + 4;
734 }
735
736 if (tr->npc == pred_npc) {
737 trap_info |= 1ull;
738 } else {
739 sdata->bitarrays[raw_value64_array]->Push(tr->npc);
740
741 }
742
743 sdata->bitarrays[trap_info_array]->Push(trap_info);
744} // void rstzip3::compress_trap(rstf_unionT * rstbuf, int idx)
745
746
747void rstzip3::compress_preg(rstf_unionT * rstbuf, int idx)
748{
749 rstf_pregT * pr = &(rstbuf[idx].preg);
750
751 // cpuid: predict same as previous instr cpuid
752 int cpuid = rstf_pre212 ? pr->cpuid : rstf_pregT_get_cpuid(pr);
753 int cpuid_pred = (cpuid==pred_cpuid) ? 1 : 0;
754 if (!cpuid_pred) {
755 sdata->bitarrays[raw_cpuid_array]->Push(cpuid);
756 }
757
758 // pack cpuid_pred[61], primD[60:48], secD[47:35] asiReg{34:27], traplevel[26:24], traptype[23:16], pstate[15:0] in one 64-bit value
759 uint64_t preg_info = (((uint64_t)cpuid_pred) << 61) | (((uint64_t)pr->primD) << 48) | (((uint64_t)pr->secD) << 35) |
760 (((uint64_t)pr->asiReg) << 27) | (((uint64_t)pr->traplevel) << 24) | (((uint64_t)pr->traptype) << 16) | ((uint64_t)pr->pstate);
761 sdata->bitarrays[raw_value64_array]->Push(preg_info);
762
763
764 // primA and secA are not used - ignore
765} // void rstzip3::compress_preg(rstf_unionT * rstbuf, int idx)
766
767void rstzip3::compress_dma(rstf_unionT * rstbuf, int idx)
768{
769 rstf_dmaT * dr = &(rstbuf[idx].dma);
770 sdata->bitarrays[dma_iswrite_array]->Push(dr->iswrite);
771 sdata->bitarrays[dma_nbytes_array]->Push(dr->nbytes);
772 sdata->bitarrays[raw_value64_array]->Push(dr->start_pa);
773 sdata->bitarrays[raw_value64_array]->Push(dr->devid);
774} // void rstzip3::compress_dma(rstf_unionT * rstbuf, int idx)
775
776void rstzip3::compress_regval(rstf_unionT * rstbuf, int idx)
777{
778 // for now, try to compress the reg64 fields using the same mechanism as ea_va compression
779 rstf_regvalT * vr = &(rstbuf[idx].regval);
780
781 // cpuid
782 int cpuid = rstf_pre212 ? vr->cpuid : rstf_regvalT_get_cpuid(vr);
783
784 if (cpuid == last_instr_cpuid) {
785 sdata->bitarrays[cpuid_pred_array]->Push(1);
786 } else {
787 sdata->bitarrays[cpuid_pred_array]->Push(0);
788 sdata->bitarrays[raw_cpuid_array]->Push(cpuid);
789 }
790
791 // tdata
792 if (tdata[cpuid] == NULL) {
793 tdata[cpuid] = new rz3_percpu_data(cpuid);
794 }
795
796 // postInstr
797 sdata->bitarrays[regval_postInstr_array]->Push(vr->postInstr);
798
799#if 0
800 // if prev instr can be emulated, regenerate values using emulation
801 if (regen_value(vr, idx)) return; // FIXME: testing
802 if (vr->regtype[0] == RSTREG_INT_RT) {
803 tdata[cpuid]->regs[vr->regid[0]] = vr->reg64[0];
804 }
805 if (vr->regtype[1] == RSTREG_INT_RT) {
806 tdata[cpuid]->regs[vr->regid[1]] = vr->reg64[1];
807 }
808#endif
809
810 // regtype, regid
811 uint64_t prev_pc = tdata[cpuid]->prev_pc;
812 int regtype_tbl_idx = (prev_pc >> 2) & (rz3_percpu_data::rz3_tdata_regval_regtype_tbl_size-1);
813 int regid_tbl_idx = (prev_pc >> 2) & (rz3_percpu_data::rz3_tdata_regval_regid_tbl_size-1);
814
815 int k;
816 for (k=0; k<2; k++) {
817
818 // predict regtype: use prev_instr
819
820 uint8_t pred_regtype = tdata[cpuid]->regval_regtype_tbl[k][regtype_tbl_idx];
821
822 if (pred_regtype == vr->regtype[k]) {
823 sdata->bitarrays[regval_regtype_pred_array]->Push(1);
824 } else {
825 sdata->bitarrays[regval_regtype_pred_array]->Push(0);
826 sdata->bitarrays[regval_raw_regtype_array]->Push(vr->regtype[k]);
827 tdata[cpuid]->regval_regtype_tbl[k][regtype_tbl_idx] = vr->regtype[k];
828 }
829
830 if (vr->regtype[k] != RSTREG_UNUSED_RT) {
831
832 // regid
833 uint8_t pred_regid = tdata[cpuid]->regval_regid_tbl[k][regid_tbl_idx];
834 if (prev_rtype == REGVAL_T) { // probably in save/restore code: predict regid = prev_regid+2
835 pred_regid += 2;
836 }
837 if (pred_regid == vr->regid[k]) {
838 sdata->bitarrays[regval_regid_pred_array]->Push(1);
839 } else {
840 sdata->bitarrays[regval_regid_pred_array]->Push(0);
841 sdata->bitarrays[regval_raw_regid_array]->Push(vr->regid[k]);
842 }
843 // we always update update the table.
844 // even if our prediction is correct, the predicted value is different from the value read from the table in case of save/restore
845 tdata[cpuid]->regval_regid_tbl[k][regid_tbl_idx] = vr->regid[k];
846
847 // reg64
848 uint64_t v64 = vr->reg64[k];
849
850 if ((vr->regtype[k] == RSTREG_INT_RT) && (vr->regid[k] == 0)) {
851 if (v64 != 0x0) {
852 if (g0_nonzero_warn) {
853 fprintf(stderr, "warning: rz3: compress_regval: int reg %%g0 has non-zero value %llx. will be ignored\n", v64);
854 if (!verbose) {
855 fprintf(stderr, " (further %%g0!=0 warnings will be suppressed)\n");
856 g0_nonzero_warn = false;
857 }
858 }
859 }
860 }
861
862 if (v64 == 0) {
863 sdata->bitarrays[value_iszero_array]->Push(1);
864 } else {
865 static int regval_vc_refs = 0;
866 static int regval_vc_hits = 0;
867 sdata->bitarrays[value_iszero_array]->Push(0);
868 regval_vc_refs++;
869 if (compress_value(cpuid, v64)) {
870 regval_vc_hits++;
871 } else {
872 }
873
874 if (regval_vc_refs % 1000 == 0) {
875 // printf("regval vc refs %d hits %d (%0.4f%%)\n", regval_vc_refs, regval_vc_hits, 100.0*regval_vc_hits/regval_vc_refs);
876 }
877 }
878
879 } // if regtype != UNUSED
880 } // for reg field = 0,1
881} // rstzip3::compress_regval
882
883void rstzip3::compress_memval(rstf_unionT * rstbuf, int idx)
884{
885 // rtype: in raw rtype array
886 // ismemval128: raw
887
888 // addrisVA: raw
889 // isContRec: ignore for m64; raw for m128
890 // cpuid: same as predicted cpuid for next instr
891
892 // memval64.size: store raw size
893 // memval64.addr: use valuecache
894 // memval64.val: use valuecache
895
896 // memval128.addr36_43: ignore if isContRec; raw otherwise
897 // memval128.addr04_35: ignore if isContReg; raw otherwise
898
899 // memval128.val[]: use valuecache
900
901 rstf_memval64T * m64 = & (rstbuf[idx].memval64);
902 rstf_memval128T * m128 = & (rstbuf[idx].memval128);
903
904 sdata->bitarrays[memval_fields_array]->Push(m128->ismemval128);
905 sdata->bitarrays[memval_fields_array]->Push(! m128->addrisVA);
906
907 // cpuid
908 int cpuid = rstf_pre212 ? m128->cpuid : rstf_memval128T_get_cpuid(m128);
909 if (cpuid == pred_cpuid) {
910 sdata->bitarrays[cpuid_pred_array]->Push(1);
911 } else {
912 sdata->bitarrays[cpuid_pred_array]->Push(0);
913 sdata->bitarrays[raw_cpuid_array]->Push(cpuid);
914 }
915
916 if (tdata[cpuid] == NULL) {
917 tdata[cpuid] = new rz3_percpu_data(cpuid);
918 }
919
920 if (m128->ismemval128) {
921 sdata->bitarrays[memval_fields_array]->Push(m128->isContRec);
922 if (! m128->isContRec) {
923 sdata->bitarrays[memval_addr36_43_array]->Push(m128->addr36_43);
924 sdata->bitarrays[memval_addr04_35_array]->Push(m128->addr04_35);
925 }
926
927 // vals
928
929 compress_value(cpuid, m128->val[0]);
930
931 compress_value(cpuid, m128->val[1]);
932
933 } else /* memval64 */ {
934 sdata->bitarrays[memval_size_array]->Push(m64->size-1);
935
936
937 // predict addr using valuecache
938 compress_value(cpuid, m64->addr);
939 compress_value(cpuid, m64->val);
940
941 }
942
943} // compress_memval
944
945
946void rstzip3::compress_rfs_cw(rstf_unionT * rstbuf, int idx)
947{
948 rstf_cachewarmingT *cw = &(rstbuf[idx].cachewarming);
949
950 // there is no architectural method to predict reftype.
951 sdata->bitarrays[rfs_cw_raw_reftype_array]->Push(cw->reftype);
952
953 // dont predict cpuid
954
955 int cpuid;
956
957 if ((cw->reftype == cw_reftype_DMA_R) || (cw->reftype == cw_reftype_DMA_W)) {
958 cpuid = 0;
959 } else {
960 cpuid = rstf_cachewarmingT_get_cpuid(cw);
961 }
962
963 if (tdata[cpuid] == NULL) {
964 // fprintf(stderr, "compress_rfs_cw: new cpuid %d\n", cpuid);
965 tdata[cpuid] = new rz3_percpu_data(cpuid);
966 }
967
968 sdata->bitarrays[rfs_raw_cpuid_array]->Push(cpuid);
969
970 if ((cw->reftype == cw_reftype_DMA_R)|| (cw->reftype == cw_reftype_DMA_W)) {
971 sdata->bitarrays[raw_value64_array]->Push(cw->pa);
972
973 sdata->bitarrays[rfs_cw_dma_size_array]->Push(cw->refinfo.dma_size);
974 } else {
975 // asi
976 sdata->bitarrays[rfs_cw_asi_array]->Push(cw->refinfo.s.asi);
977
978 // fcn
979 if (cw->reftype==cw_reftype_PF_D) {
980 sdata->bitarrays[rfs_cw_pf_fcn_array]->Push(cw->refinfo.s.fcn);
981 }
982
983 // va_valid
984 sdata->bitarrays[rfs_cw_va_valid_array]->Push(cw->refinfo.s.va_valid);
985
986 if (cw->refinfo.s.va_valid) {
987
988 compress_value(cpuid, cw->va);
989
990 // tlb hit/miss
991 uint64_t pred_pa;
992 if (cw->reftype == cw_reftype_I) {
993 pred_pa = tdata[cpuid]->itlb->get(cw->va>>13) << 13;
994 } else {
995 pred_pa = tdata[cpuid]->dtlb->get(cw->va>>13) << 13;
996 }
997 pred_pa |= (cw->va & 0x1fffull);
998 if (pred_pa != cw->pa) {
999 sdata->bitarrays[rfs_cw_pa_pred_array]->Push(0);
1000 sdata->bitarrays[raw_value64_array]->Push(cw->pa);
1001
1002 if (cw->reftype == cw_reftype_I) {
1003 tdata[cpuid]->itlb->set(cw->va>>13, cw->pa>>13);
1004 } else {
1005 tdata[cpuid]->dtlb->set(cw->va>>13, cw->pa>>13);
1006 }
1007 } else {
1008 sdata->bitarrays[rfs_cw_pa_pred_array]->Push(1);
1009 }
1010 } else /* va invalid - no way to predict pa? */ {
1011 sdata->bitarrays[raw_value64_array]->Push(cw->pa);
1012 }
1013 }
1014} // rstzip3::compress_rfs_cw(rstf_unionT * rstbuf, int idx)
1015
1016
1017void rstzip3::compress_rfs_bt(rstf_unionT * rstbuf, int idx)
1018{
1019 rstf_bpwarmingT * bt = &(rstbuf[idx].bpwarming);
1020
1021 // a bt record consists of cpuid, taken, instr, pc_va, npc_va
1022
1023 // no easy way to compress cpuid: store raw
1024 int cpuid = rstf_bpwarmingT_get_cpuid(bt);
1025 sdata->bitarrays[rfs_raw_cpuid_array]->Push(cpuid);
1026 if (tdata[cpuid] == NULL) {
1027 tdata[cpuid] = new rz3_percpu_data(cpuid);
1028 }
1029
1030 // pc
1031 uint64_t pred_pc = tdata[cpuid]->rfs_pc_pred_table->get(tdata[cpuid]->rfs_prev_npc);
1032 if (pred_pc == bt->pc_va) {
1033 sdata->bitarrays[rfs_pc_pred_array]->Push(1);
1034 } else {
1035 sdata->bitarrays[rfs_pc_pred_array]->Push(0);
1036 sdata->bitarrays[raw_value64_array]->Push(bt->pc_va>>2);
1037
1038 tdata[cpuid]->rfs_pc_pred_table->set(tdata[cpuid]->rfs_prev_npc, bt->pc_va);
1039 }
1040
1041 // instr: use icache
1042 rz3iu_icache_data * icdata = tdata[cpuid]->icache->get(bt->pc_va);
1043 uint32_t instr = bt->instr;
1044 if ((icdata == NULL) || (icdata->instr != instr)) {
1045 // ic miss
1046 sdata->bitarrays[rfs_instr_pred_array]->Push(0);
1047 sdata->bitarrays[raw_instr_array]->Push(instr);
1048 icdata = tdata[cpuid]->icache->set(bt->pc_va, instr, rstzip3_major_version, rstzip3_minor_version);
1049 icdata->gen_target(bt->pc_va);
1050 } else {
1051 sdata->bitarrays[rfs_instr_pred_array]->Push(1);
1052 }
1053
1054 // bt
1055 int bt_pred_hit;
1056 if (icdata->dinfo.flags.iscbranch) {
1057 bt_pred_hit = tdata[cpuid]->bp->pred_hit(bt->pc_va, bt->taken);
1058 if (!bt_pred_hit) perf_stats[ps_brpred_misses]++;
1059 } else if (icdata->dinfo.flags.isubranch && icdata->dinfo.flags.isubranch_nottaken) {
1060 bt_pred_hit = ! bt->taken; // in other words, we predict uncond nt branches as not taken. if the taken bit is 0, then our prediction is correct (1) and vice versa
1061 } else {
1062 bt_pred_hit = bt->taken; // in other words, we predict all other branches as taken
1063 }
1064
1065 sdata->bitarrays[rfs_bt_pred_array]->Push(bt_pred_hit);
1066
1067 // target
1068 uint64_t pred_npc_va;
1069 if (bt->taken) {
1070 pred_npc_va = icdata->target;
1071 } else {
1072 pred_npc_va = bt->pc_va + 8;
1073 }
1074 if (pred_npc_va == bt->npc_va) {
1075 sdata->bitarrays[dcti_ea_va_pred_array]->Push(1);
1076 } else {
1077 sdata->bitarrays[dcti_ea_va_pred_array]->Push(0);
1078 sdata->bitarrays[raw_value64_array]->Push(bt->npc_va);
1079 }
1080
1081 tdata[cpuid]->rfs_prev_npc = bt->npc_va;
1082
1083 tdata[cpuid]->pred_pc = tdata[cpuid]->rfs_pc_pred_table->get(bt->npc_va);
1084
1085} // rstzip3::compress_rstf_bt(rfs_unionT * rstbuf, int idx)
1086
1087
1088
1089// return true if could compress using valuecache
1090bool rstzip3::compress_value(int cpuid, uint64_t v64)
1091{
1092 if (tdata[cpuid] == NULL) {
1093 tdata[cpuid] = new rz3_percpu_data(cpuid);
1094 }
1095
1096 uint64_t key;
1097 int level = tdata[cpuid]->valuecache->Ref(v64, key);
1098 sdata->bitarrays[valuecache_level_array]->Push(level);
1099 sdata->bitarrays[valuecache_data0_array+level]->Push(key);
1100
1101 return (level < 7);
1102}
1103
1104
1105
1106
1107
1108#if 0 // leave this obsolete code in here. it is useful for making sense of the decompress_pavadiff_v315 code in decompress_engine.C
1109void rstzip3::compress_pavadiff_v315(rstf_unionT * rstbuf, int idx)
1110{
1111 rstf_pavadiffT * dr = &(rstbuf[idx].pavadiff);
1112 int cpuid = rstf_pavadiffT_get_cpuid(dr);
1113
1114 // check and predict cpuid
1115 if (pred_cpuid == cpuid) {
1116 sdata->bitarrays[cpuid_pred_array]->Push(1);
1117 } else {
1118 sdata->bitarrays[cpuid_pred_array]->Push(0);
1119 sdata->bitarrays[raw_cpuid_array]->Push(cpuid);
1120 }
1121 pred_cpuid = cpuid;
1122
1123 if (tdata[cpuid] == NULL) {
1124 tdata[cpuid] = new rz3_percpu_data(cpuid);
1125 }
1126
1127 // predict icontext the same as prev icontext
1128 if (tdata[cpuid]->pred_icontext == dr->icontext) {
1129 sdata->bitarrays[pavadiff_ictxt_pred_array]->Push(1);
1130 } else {
1131 sdata->bitarrays[pavadiff_ictxt_pred_array]->Push(0);
1132 sdata->bitarrays[pavadiff_raw_ictxt_array]->Push(dr->icontext);
1133 tdata[cpuid]->pred_icontext = dr->icontext;
1134 }
1135
1136 // dcontext - predict same as prev dcontext for this cpu
1137 if (tdata[cpuid]->pred_dcontext == dr->dcontext) {
1138 sdata->bitarrays[pavadiff_dctxt_pred_array]->Push(1);
1139 } else {
1140 sdata->bitarrays[pavadiff_dctxt_pred_array]->Push(0);
1141 sdata->bitarrays[pavadiff_raw_dctxt_array]->Push(dr->dcontext);
1142 tdata[cpuid]->pred_dcontext = dr->dcontext;
1143 }
1144
1145 bool found_pc_va = false;
1146 uint64_t nextpc_va;
1147 bool found_ea_va = false;
1148 uint64_t nextea_va;
1149
1150 int i;
1151 for (i=idx+1; i<shdr->nrecords; i++) {
1152 if (rstbuf[i].proto.rtype == INSTR_T) {
1153 if (rstf_instrT_get_cpuid(&rstbuf[i].instr) == cpuid) {
1154 nextpc_va = rstbuf[i].instr.pc_va;
1155 found_pc_va = (nextpc_va != 0x0);
1156 if (dr->ea_valid && rstbuf[i].instr.ea_valid) { // we only care about ea_va if dr->ea_valid
1157 nextea_va = rstbuf[i].instr.ea_va;
1158 found_ea_va = (nextea_va != 0x0);
1159 }
1160 } // if cpuid match
1161 break;
1162 } // if instr
1163 } // for each subsequent record
1164
1165 // ea_valid
1166 sdata->bitarrays[pavadiff_ea_valid_array]->Push(dr->ea_valid);
1167
1168 bool pc_pa_va_hit;
1169 bool ea_pa_va_hit;
1170
1171 uint64_t pred_pa_va_diff;
1172
1173 if (found_pc_va) {
1174 pred_pa_va_diff = tdata[cpuid]->itlb->get(nextpc_va >> 13);
1175 } else {
1176 pred_pa_va_diff = 42; // some nonsensical value
1177 }
1178
1179 if (pred_pa_va_diff == (dr->pc_pa_va>>13)) {
1180 sdata->bitarrays[pavadiff_pc_pa_va_pred_array]->Push(1);
1181 pc_pa_va_hit = true;
1182 } else {
1183 sdata->bitarrays[pavadiff_pc_pa_va_pred_array]->Push(0);
1184 sdata->bitarrays[raw_value64_array]->Push(dr->pc_pa_va);
1185
1186 if (found_pc_va) {
1187 tdata[cpuid]->itlb->set(nextpc_va>>13, dr->pc_pa_va>>13);
1188 pc_pa_va_hit = false;
1189 }
1190 }
1191
1192
1193 if (dr->ea_valid) {
1194
1195 // ea_pa_va - use next instr (if available) and a tlb simulator
1196 if (found_ea_va) {
1197 // tlb lookup
1198 pred_pa_va_diff = tdata[cpuid]->dtlb->get(nextea_va >> 13);
1199 } else {
1200 pred_pa_va_diff = 42; // some nonsensical value
1201 }
1202
1203 if (pred_pa_va_diff == (dr->ea_pa_va >> 13)) {
1204 sdata->bitarrays[pavadiff_ea_pa_va_pred_array]->Push(1);
1205 ea_pa_va_hit = true;
1206 } else {
1207 sdata->bitarrays[pavadiff_ea_pa_va_pred_array]->Push(0);
1208 sdata->bitarrays[raw_value64_array]->Push(dr->ea_pa_va);
1209
1210 if (found_ea_va) {
1211 tdata[cpuid]->dtlb->set((nextea_va >> 13), (dr->ea_pa_va >> 13));
1212 ea_pa_va_hit = false;
1213 }
1214 }
1215 } else {
1216 ea_pa_va_hit = false;
1217 } // if ea_valid
1218
1219 // the lookahead flag tells the decompressor to look for the next instr (to update the tlb)
1220 // if we predicted pc_pa_va and/or ea_pa_va correctly, the decompressor knows from the pred bit to lookahead.
1221 // we set the lookahead flag so that the decomprssor knows the difference between no prediction (could not find corresponding instr) and misprediction
1222
1223 if ((found_pc_va && pc_pa_va_hit) || (dr->ea_valid && found_ea_va && ea_pa_va_hit)) {
1224 // dont need lookahead since the pc_pa_va_pred_array and/or the ea_pa_va_pred_array will indicate lookahead
1225 } else {
1226 // we need to indicate whether there was no prediction or misprediction(s)
1227 int lookahead_flag = (found_pc_va || found_ea_va);
1228 sdata->bitarrays[pavadiff_lookahead_array]->Push(lookahead_flag);
1229 }
1230} // rstzip3::compress_pavadiff()
1231#endif // #if 0 (obsolete code - left here as a reference for the corresponding decompress code
1232
1233
1234