Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / rst / rstzip3 / rstzip_v3 / decompress_engine.C
CommitLineData
920dae64
AT
1// ========== Copyright Header Begin ==========================================
2//
3// OpenSPARC T2 Processor File: decompress_engine.C
4// Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
5// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
6//
7// The above named program is free software; you can redistribute it and/or
8// modify it under the terms of the GNU General Public
9// License version 2 as published by the Free Software Foundation.
10//
11// The above named program is distributed in the hope that it will be
12// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14// General Public License for more details.
15//
16// You should have received a copy of the GNU General Public
17// License along with this work; if not, write to the Free Software
18// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19//
20// ========== Copyright Header End ============================================
21/* decompress_engine.C */
22
23#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
26#include <assert.h>
27
28#include "rstf/rstf.h"
29
30#if defined(ARCH_AMD64)
31#include "rstf/rstf_convert.h"
32#endif
33
34#include "rstzip3.h"
35#include "rz3_section.h"
36
37#include "rz3iu.h"
38
39
40
41int rstzip3::decompress_buffer(rstf_unionT * rstbuf, int rstbufsize)
42{
43 if (verbose) fprintf(stderr, "Section %d\n", nsections);
44
45 // read section header
46 if (!shdr->read(gzf)) {
47 return 0;
48 }
49
50 if (rstbufsize < shdr->nrecords) {
51 fprintf(stderr, "ERROR: rstzip3::decompress_buffer: caller buffer size (%d) smaller than section size (%d)\n", rstbufsize, shdr->nrecords);
52 return 0;
53 }
54
55 sdata->clear(); // clear all bitarrays
56
57 // FIXME: do not bzero rstbuf (cut corners) if fast decompression specified.
58 bzero(rstbuf, rstbufsize*sizeof(rstf_unionT));
59
60 // clear predictor tables in tdata if shdr->clearflag
61
62 if (!sdata->read(gzf)) {
63 perror("ERROR: rstzip3::decompress_buffer(): could not read section data from input file\n");
64 return 0;
65 }
66
67 int i;
68 uint64_t v;
69 for (i=0; i<shdr->nrecords; i++) {
70 if (rfs_phase) {
71 if (rfs_cw_phase) {
72 sdata->bitarrays[rfs_rtype_pred_array]->GetNext(v);
73 if (v) {
74 rstbuf[i].proto.rtype = RFS_CW_T;
75 rfs_records_seen++;
76 if (rfs_records_seen == rfs_nrecords) {
77 rfs_phase = rfs_cw_phase = false;
78 }
79 } else /* rfs cw rtype misprediction */ {
80 sdata->bitarrays[rtype_array]->GetNext(v);
81 rstbuf[i].proto.rtype = v;
82 rfs_phase = rfs_cw_phase = false;
83 } // rfs cw rtype pred
84 } else if (rfs_bt_phase) {
85 sdata->bitarrays[rfs_rtype_pred_array]->GetNext(v);
86 if (v) {
87 rstbuf[i].proto.rtype = RFS_BT_T;
88 rfs_records_seen++;
89 if (rfs_records_seen == rfs_nrecords) {
90 rfs_phase = rfs_bt_phase = false;
91 }
92 } else /* rfs cw rtype misprediction */ {
93 sdata->bitarrays[rtype_array]->GetNext(v);
94 rstbuf[i].proto.rtype = v;
95 rfs_phase = rfs_bt_phase = false;
96 } // rfs bt rtype pred
97 } // which rfs phase?
98 } else /* regular rst phase */ {
99 sdata->bitarrays[rtype_key_array]->GetNext(v);
100 switch(v) {
101 case rtype_key_INSTR:
102 rstbuf[i].proto.rtype = INSTR_T;
103 break;
104 case rtype_key_REGVAL:
105 rstbuf[i].proto.rtype = REGVAL_T;
106 break;
107 case rtype_key_PAVADIFF:
108 rstbuf[i].proto.rtype = PAVADIFF_T;
109 break;
110 default:
111 sdata->bitarrays[rtype_array]->GetNext(v);
112 rstbuf[i].proto.rtype = v;
113 }
114 }
115
116
117 switch(rstbuf[i].proto.rtype) {
118 case INSTR_T:
119 decompress_inst(rstbuf, i);
120 break;
121 case PAVADIFF_T:
122 decompress_pavadiff(rstbuf, i);
123 break;
124 case REGVAL_T:
125 decompress_regval(rstbuf, i);
126 break;
127 case MEMVAL_T:
128 decompress_memval(rstbuf, i);
129 break;
130 case TRAP_T:
131 decompress_trap(rstbuf, i);
132 break;
133 case TLB_T:
134 decompress_tlb(rstbuf, i);
135 break;
136 case PREG_T:
137 decompress_preg(rstbuf, i);
138 break;
139 case DMA_T:
140 decompress_dma(rstbuf, i);
141 break;
142 case RFS_CW_T:
143 if ((rfs_records_seen == 0) && ! rfs_cw_phase) {
144 // in case there was no rfs preamble, section header etc.
145 rfs_phase = rfs_cw_phase = true;
146 rfs_nrecords = rfs_unknown_nrecords;
147 rfs_records_seen = 1;
148 }
149 decompress_rfs_cw(rstbuf, i);
150 break;
151 case RFS_BT_T:
152 if ((rfs_records_seen == 0) && ! rfs_bt_phase) {
153 // in case there was no rfs preamble, section header etc.
154 rfs_phase = rfs_bt_phase = true;
155 rfs_nrecords = rfs_unknown_nrecords;
156 rfs_records_seen = 1;
157 }
158 decompress_rfs_bt(rstbuf, i);
159 break;
160
161 default:
162 sdata->bitarrays[raw_value64_array]->GetNext(rstbuf[i].arr64.arr64[0]);
163 sdata->bitarrays[raw_value64_array]->GetNext(rstbuf[i].arr64.arr64[1]);
164 sdata->bitarrays[raw_value64_array]->GetNext(rstbuf[i].arr64.arr64[2]);
165
166#if defined(ARCH_AMD64)
167 // turns into BE layout
168 rstbuf[i].arr64.arr64[0] = byteswap64(rstbuf[i].arr64.arr64[0]);
169 rstbuf[i].arr64.arr64[1] = byteswap64(rstbuf[i].arr64.arr64[1]);
170 rstbuf[i].arr64.arr64[2] = byteswap64(rstbuf[i].arr64.arr64[2]);
171#endif
172
173 if (rstbuf[i].proto.rtype == RFS_SECTION_HEADER_T) {
174 if (rstbuf[i].rfs_section_header.section_type == RFS_CW_T) {
175 rfs_phase = rfs_cw_phase = true;
176 rfs_nrecords = rstbuf[i].rfs_section_header.n_records;
177#if defined(ARCH_AMD64)
178 rfs_nrecords = byteswap64(rfs_nrecords);
179#endif
180 rfs_records_seen = 0;
181 } else if (rstbuf[i].rfs_section_header.section_type == RFS_BT_T) {
182 rfs_phase = rfs_bt_phase = true;
183 rfs_nrecords = rstbuf[i].rfs_section_header.n_records;
184#if defined(ARCH_AMD64)
185 rfs_nrecords = byteswap64(rfs_nrecords);
186#endif
187 rfs_records_seen = 0;
188 } // else - do nothing
189 } // if rfs section header
190
191 // fwrite(rstbuf+i, sizeof(rstf_unionT), 1, testfp); fflush(testfp);
192
193 break;
194 } // what rtype?
195
196 prev_rtype = rstbuf[i].proto.rtype;
197 } // for each record
198
199 nsections++;
200
201
202 return shdr->nrecords;
203} // int rstzip3::decompress_buffer(rstf_unionT * rstbuf, int nrec)
204
205
206
207
208void rstzip3::decompress_inst(rstf_unionT * rstbuf, int idx)
209{
210 uint64_t v;
211
212 rstf_instrT * ir = &(rstbuf[idx].instr);
213
214 // cpuid pred
215 uint16_t cpuid;
216 sdata->bitarrays[cpuid_pred_array]->GetNext(v);
217 if (v) {
218 cpuid = pred_cpuid;
219 } else {
220 sdata->bitarrays[raw_cpuid_array]->GetNext(v);
221 cpuid = v;
222 }
223
224 rstf_instrT_set_cpuid(ir, cpuid);
225
226 // predict cpuid. assume round robin FIXME: for now, assump uP traces
227 if (tdata[cpuid+1] == NULL) {
228 pred_cpuid = 0;
229 } else {
230 pred_cpuid = cpuid+1;
231 }
232 last_instr_cpuid = cpuid;
233
234 if (tdata[cpuid] == NULL) {
235 tdata[cpuid] = new rz3_percpu_data(cpuid);
236 }
237
238 // instr pred bits
239 sdata->bitarrays[instr_pred_all_array]->GetNext(v);
240 if (v) {
241 instr_preds = instr_pred_all;
242 } else {
243 sdata->bitarrays[instr_pred_raw_array]->GetNext(v);
244 instr_preds = v;
245 }
246
247 // amask bit: if amask is 0, all 64-bits of pred_pc are used. if not, only the lower 32-bits are used
248 // we check and set the amask bit on a pc misprediction. if the misprediction leaves the lower 32-bits unchanged
249 // but differs in the upper 32-bits, we set/clear amask accordingly
250 // check pc
251 uint64_t pc;
252 if (instr_preds & instr_pred_pc) {
253 ir->pc_va = tdata[cpuid]->pred_pc;
254 pc = tdata[cpuid]->pred_pc;
255 } else /* pc mispredicted */ {
256 sdata->bitarrays[raw_value64_array]->GetNext(v);
257 pc = v;
258 ir->pc_va = pc;
259
260 uint64_t pred_pc = tdata[cpuid]->pred_pc;
261
262 // is our amask to blame?
263 if ((pc & rz3_amask_mask) == (pred_pc & rz3_amask_mask)) {
264 // lower 32 bits match
265 if ((pc >> 32) != 0) {
266 // if amask was 1, it should be 0. if it was already zero, amask is not to blame, but set it to 0 anyway
267 tdata[cpuid]->pred_amask = 0;
268 } else {
269 // if amask was 0, it should be 1. if it was already 1, we shouldn't be here.
270 if (0 && tdata[cpuid]->pred_amask) {
271 fprintf(stderr, "rz3: decompress_inst: amask was set but predicted pc was > 32 bits: pred_pc %llx actual %llx\n", pred_pc, pc);
272 }
273 tdata[cpuid]->pred_amask = 1;
274 }
275 }
276
277 // we mispredicted the PC of the current instr
278 tdata[cpuid]->pred_npc = pc+4;
279 }
280
281 // pc, npc
282 tdata[cpuid]->pred_pc = tdata[cpuid]->pred_npc;
283 tdata[cpuid]->pred_npc += 4; // this may be modified later, in case of dctis
284
285 tdata[cpuid]->prev_pc = pc;
286
287 // annul bit
288 ir->an = (instr_preds & instr_pred_an) ? tdata[cpuid]->pred_an : !tdata[cpuid]->pred_an;
289
290 // instr
291 rz3iu_icache_data * icdata = tdata[cpuid]->icache->get(pc);
292 if (instr_preds & instr_pred_instr) {
293 ir->instr = icdata->instr;
294 } else {
295 sdata->bitarrays[raw_instr_array]->GetNext(v);
296 ir->instr = v;
297 icdata = tdata[cpuid]->icache->set(pc, ir->instr, header->major_version, header->minor_version);
298
299 if ((!ir->an) && (icdata->dinfo.flags.isdcti)) {
300 icdata->gen_target(pc);
301 }
302 }
303 uint32_t instr = ir->instr;
304
305 if (tdata[cpuid]->call_delay_slot) {
306 if ( ((instr & RESTORE_OPCODE_MASK) == RESTORE_OPCODE_BITS) || (instr == MOV_G1_G7_INSTR) ) {
307 tdata[cpuid]->ras->pop();
308 }
309 tdata[cpuid]->call_delay_slot = false;
310 }
311
312
313 // tr and pr bits. we predict tr=0 and pr=prev_pr
314 // predict and set tr BEFORE decompress_ea_va because ea_valid prediction depends on the tr bit
315 ir->tr = (instr_preds & instr_pred_tr) ? 0 : 1;
316
317 if (instr_preds & instr_pred_pr) {
318 ir->pr = tdata[cpuid]->pred_pr;
319 } else {
320 ir->pr = tdata[cpuid]->pred_pr ? 0 : 1;
321 tdata[cpuid]->pred_pr = ir->pr;
322 }
323
324 if (!pre320) {
325 if (instr_preds & instr_pred_hpriv) {
326 ir->hpriv = tdata[cpuid]->pred_hpriv;
327 } else {
328 ir->hpriv = tdata[cpuid]->pred_hpriv ? 0 : 1;
329 tdata[cpuid]->pred_hpriv = ir->hpriv;
330 }
331 if (ir->hpriv) {
332 tdata[cpuid]->pred_pr = 0;
333 }
334 } // else if pre320 = do nothing
335
336 // predict ea_valid, ea_va, bt, NEXT-instr an
337 if (!ir->an) {
338 if (icdata->dinfo.flags.isdcti) {
339
340 decompress_dcti(rstbuf, idx, icdata);
341
342 } else /* not dcti */ {
343
344 // bt: prediction is 0 unless done_retry. resolution: ir->bt = (v == is_done_retry)
345 if (instr_preds & instr_pred_bt) {
346 ir->bt = icdata->dinfo.flags.is_done_retry;
347 } else {
348 ir->bt = ! icdata->dinfo.flags.is_done_retry;
349 }
350
351 // ea_valid
352 bool ea_valid_pred = (instr_preds & instr_pred_ea_valid);
353 if (icdata->is_ldstpf) {
354 ir->ea_valid = ea_valid_pred; // predict ea_valid=1
355 } else if (icdata->dinfo.flags.is_done_retry) {
356 ir->ea_valid = ea_valid_pred; // predict ea_valid=1
357 } else if (ir->tr) {
358 ir->ea_valid = ea_valid_pred; // predict ea_valid = 1
359 } else {
360 ir->ea_valid = !ea_valid_pred; // predict ea_valid = 0;
361 }
362
363 if (ir->ea_valid) {
364 decompress_ea_va(rstbuf, idx);
365 }
366
367 tdata[cpuid]->pred_an = 0;
368
369 }
370 } // if not annulled
371
372 // pavadiff: pass 2
373 if (tdata[cpuid]->pending_pavadiff_idx != -1) {
374 decompress_pavadiff_pass2(rstbuf, idx); // pass the index of the instrution to the pavadiff decompressor
375 }
376
377 // fwrite(rstbuf+idx, sizeof(rstf_unionT), 1, testfp); fflush(testfp);
378
379#if defined(ARCH_AMD64)
380 rstf_convertT::l2b((rstf_uint8T*)&rstbuf[idx]);
381#endif
382} // void rstzip3::decompress_inst(rstf_unionT * rstbuf, int idx)
383
384
385void rstzip3::decompress_pavadiff(rstf_unionT * rstbuf, int idx)
386{
387 if (0 && idx == 102577) {
388 printf("debug: decompress_pavadiff idx %d\n", idx);
389 }
390
391 uint64_t v;
392
393 rstf_pavadiffT * dr = &(rstbuf[idx].pavadiff);
394
395 // cpuid
396 int cpuid;
397 sdata->bitarrays[cpuid_pred_array]->GetNext(v);
398 if (v) {
399 rstf_pavadiffT_set_cpuid(dr, pred_cpuid); // dr->cpuid = pred_cpuid;
400 cpuid = pred_cpuid;
401 } else {
402 sdata->bitarrays[raw_cpuid_array]->GetNext(v);
403 rstf_pavadiffT_set_cpuid(dr, v); // dr->cpuid = v;
404 cpuid = v;
405 }
406 pred_cpuid = cpuid; // for next instr
407
408 if (tdata[cpuid] == NULL) {
409 tdata[cpuid] = new rz3_percpu_data(cpuid);
410 }
411
412
413 // icontext
414 sdata->bitarrays[pavadiff_ictxt_pred_array]->GetNext(v);
415 if (v) {
416 dr->icontext = tdata[cpuid]->pred_icontext;
417 } else {
418 sdata->bitarrays[pavadiff_raw_ictxt_array]->GetNext(v);
419 dr->icontext = v;
420 tdata[cpuid]->pred_icontext = dr->icontext;
421 }
422
423 // dcontext
424
425 sdata->bitarrays[pavadiff_dctxt_pred_array]->GetNext(v);
426 if (v) {
427 dr->dcontext = tdata[cpuid]->pred_dcontext;
428 } else {
429 sdata->bitarrays[pavadiff_raw_dctxt_array]->GetNext(v);
430 dr->dcontext = v;
431 tdata[cpuid]->pred_dcontext = dr->dcontext;
432 }
433
434
435 // ea_valid
436 sdata->bitarrays[pavadiff_ea_valid_array]->GetNext(v);
437 dr->ea_valid = v;
438
439 // to predict pc_pa_va and ea_pa_va, we need the NEXT instr from this cpuid
440 // if the prediction was successful. Otherwise, we read those values from
441 // the raw arrays
442 sdata->bitarrays[pavadiff_pc_pa_va_pred_array]->GetNext(v);
443 int pc_pa_va_hit = v;
444 if (pc_pa_va_hit) {
445 tdata[cpuid]->pending_pavadiff_pc_pa_va_pred = 1;
446 } else {
447 sdata->bitarrays[raw_value64_array]->GetNext(v);
448 dr->pc_pa_va = v;
449 }
450
451 int ea_pa_va_hit = 0;
452 if (dr->ea_valid) {
453 sdata->bitarrays[pavadiff_ea_pa_va_pred_array]->GetNext(v);
454 ea_pa_va_hit = v;
455 if (ea_pa_va_hit) {
456 tdata[cpuid]->pending_pavadiff_ea_pa_va_pred = 1;
457 } else {
458 sdata->bitarrays[raw_value64_array]->GetNext(v);
459 dr->ea_pa_va = v;
460 }
461 }
462
463 if (tdata[cpuid]->pending_pavadiff_pc_pa_va_pred || tdata[cpuid]->pending_pavadiff_ea_pa_va_pred) {
464 tdata[cpuid]->pending_pavadiff_idx = idx;
465 } else /* neither pc_pa_va no ea_pa_va could be predicted */ {
466 // is there a next instr for this cpuid (do we need to update itlb and dtlb?
467 sdata->bitarrays[pavadiff_lookahead_array]->GetNext(v);
468 if (v) {
469 tdata[cpuid]->pending_pavadiff_idx = idx;
470 } else {
471 tdata[cpuid]->pending_pavadiff_idx = -1;
472 // fwrite(rstbuf+idx, sizeof(rstf_unionT), 1, testfp); fflush(testfp);
473 }
474 }
475
476
477#if defined(ARCH_AMD64)
478 rstf_convertT::l2b((rstf_uint8T*)&rstbuf[idx]);
479#endif
480} // rstzip3::decompress_pavadiff()
481
482
483void rstzip3::decompress_pavadiff_pass2(rstf_unionT * rstbuf, int instr_idx)
484{
485 if (header->minor_version <= 15) {
486 decompress_pavadiff_pass2_v315(rstbuf, instr_idx);
487 return;
488 }
489
490 rstf_instrT * ir = &(rstbuf[instr_idx].instr);
491
492 int cpuid = rstf_instrT_get_cpuid(ir);
493
494 int idx = tdata[cpuid]->pending_pavadiff_idx;
495 rstf_pavadiffT * dr = &(rstbuf[idx].pavadiff);
496
497 if (tdata[cpuid]->pending_pavadiff_pc_pa_va_pred) {
498 dr->pc_pa_va = tdata[cpuid]->itlb->get(ir->pc_va >> 13) << 13;
499 tdata[cpuid]->pending_pavadiff_pc_pa_va_pred = false;
500 } else /* there was an itlb miss */ {
501 if (0) printf("%d: cpu%d itlb update: %llx => %llx\n", idx, cpuid, ir->pc_va, dr->pc_pa_va);
502 tdata[cpuid]->itlb->set(ir->pc_va >> 13, dr->pc_pa_va >> 13);
503 }
504
505 if (tdata[cpuid]->pending_pavadiff_ea_pa_va_pred) {
506 dr->ea_pa_va = tdata[cpuid]->dtlb->get(ir->ea_va >> 13) << 13;
507 tdata[cpuid]->pending_pavadiff_ea_pa_va_pred = false;
508 } else if (ir->ea_valid && dr->ea_valid) /* there was a dtlb miss */ {
509 if (0) printf("%d: cpu%d dtlb update: %llx => %llx\n", idx, cpuid, ir->ea_va, dr->ea_pa_va);
510 tdata[cpuid]->dtlb->set(ir->ea_va >> 13, dr->ea_pa_va >> 13);
511 } // else - ea_valid = 0. do nothing
512
513 tdata[cpuid]->pending_pavadiff_idx = -1;
514
515 // fwrite(rstbuf+idx, sizeof(rstf_unionT), 1, testfp); fflush(testfp);
516
517}
518
519void rstzip3::decompress_pavadiff_pass2_v315(rstf_unionT * rstbuf, int instr_idx)
520{
521 rstf_instrT * ir = &(rstbuf[instr_idx].instr);
522
523 int cpuid = rstf_instrT_get_cpuid(ir);
524
525 int idx = tdata[cpuid]->pending_pavadiff_idx;
526 rstf_pavadiffT * dr = &(rstbuf[idx].pavadiff);
527
528 if (tdata[cpuid]->pending_pavadiff_pc_pa_va_pred) {
529 dr->pc_pa_va = tdata[cpuid]->itlb->get(ir->pc_va >> 13) << 13;
530 tdata[cpuid]->pending_pavadiff_pc_pa_va_pred = false;
531 } else /* there was an itlb miss */ {
532 if (ir->pc_va != 0x0) {
533 tdata[cpuid]->itlb->set(ir->pc_va >> 13, dr->pc_pa_va >> 13);
534 }
535 }
536
537 if (tdata[cpuid]->pending_pavadiff_ea_pa_va_pred) {
538 if (ir->ea_va == 0) {
539 dr->ea_pa_va = 42ull << 13;
540 } else {
541 dr->ea_pa_va = tdata[cpuid]->dtlb->get(ir->ea_va >> 13) << 13;
542 }
543 tdata[cpuid]->pending_pavadiff_ea_pa_va_pred = false;
544 } else if (dr->ea_valid) /* there was a dtlb miss */ {
545 if (ir->ea_va != 0x0) {
546 tdata[cpuid]->dtlb->set(ir->ea_va >> 13, dr->ea_pa_va >> 13);
547 }
548 } // else - ea_valid = 0. do nothing
549
550 tdata[cpuid]->pending_pavadiff_idx = -1;
551
552 // fwrite(rstbuf+idx, sizeof(rstf_unionT), 1, testfp); fflush(testfp);
553
554} // void decompress_pavadiff_pass2_v315(rstf_unionT * outbuf, int instr_idx)
555
556
557// predict bt, ea_valid, ea_va, NEXT-instr an for a dcti instr. also set pred_npc
558void rstzip3::decompress_dcti(rstf_unionT * rstbuf, int idx, rz3iu_icache_data * icdata)
559{
560 uint64_t v;
561
562 rstf_instrT * ir = &(rstbuf[idx].instr);
563 int cpuid = rstf_instrT_get_cpuid(ir);
564 uint64_t pc = ir->pc_va;
565
566 int bt_pred_hit = (instr_preds & instr_pred_bt) ? 1 : 0;
567
568 // ea_valid pred: predict ea_valid is true
569 ir->ea_valid = (instr_preds & instr_pred_ea_valid) ? 1 : 0;
570 if (!ir->ea_valid) {
571 perf_stats[ps_ea_valid_misses]++;
572 }
573
574 sdata->bitarrays[dcti_ea_va_pred_array]->GetNext(v);
575 int ea_pred_hit = v;
576 if (!ea_pred_hit) {
577 sdata->bitarrays[raw_value64_array]->GetNext(v);
578 ir->ea_va = v;
579 }
580
581 if (icdata->dinfo.flags.iscbranch) {
582
583 // use branch predictor
584 // pred_bt = tdata[cpuid]->bp->predict(pc, ir->bt);
585 ir->bt = tdata[cpuid]->bp->actual_outcome(pc, bt_pred_hit);
586
587 perf_stats[ps_brpred_refs]++;
588 if (!bt_pred_hit) {
589 perf_stats[ps_brpred_misses]++;
590 }
591
592 if (ir->bt) {
593 tdata[cpuid]->pred_npc = icdata->target;
594 if (tdata[cpuid]->pred_amask) {
595 tdata[cpuid]->pred_npc &= rz3_amask_mask;
596 }
597 } // else - pred_npc is already set to pc+4
598
599 } else if (icdata->dinfo.flags.isubranch && ! icdata->dinfo.flags.isubranch_nottaken) {
600
601 // pred_npc is branch target
602 ir->bt = bt_pred_hit; // pred_bt = 1;
603 tdata[cpuid]->pred_npc = icdata->target;
604 if (tdata[cpuid]->pred_amask) {
605 tdata[cpuid]->pred_npc &= rz3_amask_mask;
606 }
607 } else if (icdata->dinfo.flags.iscall) {
608
609 ir->bt = bt_pred_hit; // pred_bt = 1;
610 tdata[cpuid]->pred_npc = icdata->target;
611 if (tdata[cpuid]->pred_amask) {
612 tdata[cpuid]->pred_npc &= rz3_amask_mask;
613 }
614 // push pc to ras unless following (delay slot) instr is restore
615 tdata[cpuid]->ras->push(pc);
616 tdata[cpuid]->call_delay_slot = true;
617
618 } else if (icdata->dinfo.flags.isindirect) {
619
620 ir->bt = bt_pred_hit; // pred_bt = 1;
621 // if jmpl, use prediction table
622 // if ret/retl, use RAS
623 if (icdata->dinfo.flags.is_ret|icdata->dinfo.flags.is_retl) {
624
625 perf_stats[ps_ras_refs]++;
626 tdata[cpuid]->pred_npc = tdata[cpuid]->ras->pop() + 8;
627 if (tdata[cpuid]->pred_amask) {
628 tdata[cpuid]->pred_npc &= rz3_amask_mask;
629 }
630 if (ea_pred_hit) { // if (tdata[cpuid]->pred_npc == ir->ea_va) {
631 } else {
632 tdata[cpuid]->ras->clear();
633 // sdata->ras_miss_count++;
634 perf_stats[ps_ras_misses]++;
635 }
636
637 } else if ( ((ir->instr >> 25) & 0x1f) == 15 ) {
638
639 // push unless following (delay-slot) instr is restore
640 tdata[cpuid]->ras->push(pc);
641 tdata[cpuid]->call_delay_slot = true;
642
643 tdata[cpuid]->pred_npc = tdata[cpuid]->jmpl_table->get(pc >> 2);
644 if (tdata[cpuid]->pred_amask) {
645 tdata[cpuid]->pred_npc &= rz3_amask_mask;
646 }
647 if (! ea_pred_hit) { // if (tdata[cpuid]->pred_npc != ir->ea_va) {
648 // ea_va misprediction (pred_ea_va is set to pred_npc for dctis)
649 tdata[cpuid]->jmpl_table->set(pc>>2, ir->ea_va);
650 }
651 } // is this a ret/retl or indirect call?
652
653 /* else do nothing */
654 } else {
655 ir->bt = ! bt_pred_hit;
656 } // what type of dcti?
657
658 // ea_va: predict pred_npc is ea_va
659 if (ea_pred_hit) {
660 ir->ea_va = tdata[cpuid]->pred_npc;
661 } else {
662 // we got ea_va from the raw_value64_array
663 tdata[cpuid]->pred_npc = ir->ea_va;
664 }
665
666 // annul flag for *next* instr
667 if (icdata->dinfo.flags.annul_flag) {
668 if ((icdata->dinfo.flags.iscbranch && !ir->bt) || icdata->dinfo.flags.isubranch) {
669 tdata[cpuid]->pred_an = 1;
670 }
671 }
672
673} // rstzip3::compress_dcti()
674
675
676void rstzip3::decompress_ea_va(rstf_unionT * rstbuf, int idx)
677{
678 uint64_t v;
679 rstf_instrT * ir = &(rstbuf[idx].instr);
680 int cpuid = rstf_instrT_get_cpuid(ir);
681
682 decompress_value(cpuid, v);
683 ir->ea_va = v;
684} // void rstzip3::decompress_ea_va(rstf_unionT * rstbuf, int idx)
685
686
687
688
689void rstzip3::decompress_regval(rstf_unionT * rstbuf, int idx)
690{
691 uint64_t v;
692
693 rstf_regvalT * vr = &(rstbuf[idx].regval);
694
695 // cpuid
696 int cpuid;
697 sdata->bitarrays[cpuid_pred_array]->GetNext(v);
698 if (v) {
699 cpuid = last_instr_cpuid;
700 } else {
701 sdata->bitarrays[raw_cpuid_array]->GetNext(v);
702 cpuid = v;
703 }
704 rstf_regvalT_set_cpuid(vr, cpuid);
705
706 // tdata
707 if (tdata[cpuid] == NULL) {
708 tdata[cpuid] = new rz3_percpu_data(cpuid);
709 }
710
711 // postInstr
712 sdata->bitarrays[regval_postInstr_array]->GetNext(v);
713 vr->postInstr = v;
714
715 // regtype, regid
716 uint64_t prev_pc = tdata[cpuid]->prev_pc;
717 int regtype_tbl_idx = (prev_pc >> 2) & (rz3_percpu_data::rz3_tdata_regval_regtype_tbl_size-1);
718 int regid_tbl_idx = (prev_pc >> 2) & (rz3_percpu_data::rz3_tdata_regval_regid_tbl_size-1);
719
720 int k;
721 for (k=0; k<2; k++) {
722
723 // predict regtype: use prev_instr
724 uint8_t pred_regtype = tdata[cpuid]->regval_regtype_tbl[k][regtype_tbl_idx];
725
726 sdata->bitarrays[regval_regtype_pred_array]->GetNext(v);
727 if (v) {
728 vr->regtype[k] = pred_regtype;
729 } else {
730 sdata->bitarrays[regval_raw_regtype_array]->GetNext(v);
731 vr->regtype[k] = v;
732 tdata[cpuid]->regval_regtype_tbl[k][regtype_tbl_idx] = vr->regtype[k];
733 }
734
735 if (vr->regtype[k] != RSTREG_UNUSED_RT) {
736
737 // regid
738 uint8_t pred_regid = tdata[cpuid]->regval_regid_tbl[k][regid_tbl_idx];
739 if (prev_rtype == REGVAL_T) { // probably in save/restore code: predict regid = prev_regid+2
740 pred_regid += 2;
741 }
742 sdata->bitarrays[regval_regid_pred_array]->GetNext(v);
743 if (v) {
744 vr->regid[k] = pred_regid;
745 } else {
746 sdata->bitarrays[regval_raw_regid_array]->GetNext(v);
747 vr->regid[k] = v;
748 }
749
750 // we always update update the table.
751 // even if our prediction is correct, the predicted value is different from the value read from the table in case of save/restore
752 tdata[cpuid]->regval_regid_tbl[k][regid_tbl_idx] = vr->regid[k];
753
754 // is this reg %g0 ? if so, set value to zero
755 if ((vr->regtype[k] == RSTREG_INT_RT) && (vr->regid[k] == 0)) {
756 vr->reg64[k] = 0x0;
757 }
758
759 // reg64
760 sdata->bitarrays[value_iszero_array]->GetNext(v);
761 if (v) {
762 vr->reg64[k] = 0;
763 } else {
764 decompress_value(cpuid, v);
765 vr->reg64[k] = v;
766 }
767 } // if regtype != UNUSED
768 } // for reg field = 0,1
769
770 // fwrite(rstbuf+idx, sizeof(rstf_unionT), 1, testfp); fflush(testfp);
771
772#if defined(ARCH_AMD64)
773 rstf_convertT::l2b((rstf_uint8T*)&rstbuf[idx]);
774#endif
775} // void rstzip3::decompress_regval(rstf_unionT * rstbuf, int idx)
776
777
778void rstzip3::decompress_memval(rstf_unionT * rstbuf, int idx)
779{
780 uint64_t v;
781
782 rstf_memval64T * m64 = & (rstbuf[idx].memval64);
783 rstf_memval128T * m128 = & (rstbuf[idx].memval128);
784
785 sdata->bitarrays[memval_fields_array]->GetNext(v);
786 m128->ismemval128 = v;
787
788 sdata->bitarrays[memval_fields_array]->GetNext(v);
789 m128->addrisVA = ! v;
790
791 // cpuid
792 int cpuid;
793 sdata->bitarrays[cpuid_pred_array]->GetNext(v);
794 if (v) {
795 cpuid = pred_cpuid;
796 } else {
797 sdata->bitarrays[raw_cpuid_array]->GetNext(v);
798 cpuid = v;
799 }
800 rstf_memval128T_set_cpuid(m128, cpuid);
801 if (tdata[cpuid] == NULL) {
802 tdata[cpuid] = new rz3_percpu_data(cpuid);
803 }
804
805 if (m128->ismemval128) {
806 sdata->bitarrays[memval_fields_array]->GetNext(v);
807 m128->isContRec = v;
808 if (! m128->isContRec) {
809 sdata->bitarrays[memval_addr36_43_array]->GetNext(v);
810 m128->addr36_43 = v;
811 sdata->bitarrays[memval_addr04_35_array]->GetNext(v);
812 m128->addr04_35 = v;
813 }
814
815 // vals
816 decompress_value(cpuid, v);
817 m128->val[0] = v;
818 decompress_value(cpuid, v);
819 m128->val[1] = v;
820
821 } else {
822
823 // size
824 sdata->bitarrays[memval_size_array]->GetNext(v);
825 m64->size = v+1;
826
827 decompress_value(cpuid, v);
828 m64->addr = v;
829 decompress_value(cpuid, v);
830 m64->val = v;
831
832 }
833#if defined(ARCH_AMD64)
834 rstf_convertT::l2b((rstf_uint8T*)&rstbuf[idx]);
835#endif
836} // void rstzip3::decompress_memval(rstf_unionT * rstbuf, int idx)
837
838void rstzip3::decompress_trap(rstf_unionT * rstbuf, int idx)
839{
840 uint64_t v;
841 rstf_trapT * tr = &(rstbuf[idx].trap);
842 sdata->bitarrays[cpuid_pred_array]->GetNext(v);
843 int cpuid;
844 if (v) {
845 cpuid = pred_cpuid;
846 } else {
847 sdata->bitarrays[raw_cpuid_array]->GetNext(v);
848 cpuid = v;
849 }
850 rstf_trapT_set_cpuid(tr, cpuid);
851
852 sdata->bitarrays[trap_info_array]->GetNext(v);
853 tr->is_async = (v>>48) & 1;
854 tr->tl = (v>>44) & 0xf;
855 tr->ttype = (v>>34) & 0x3ff;
856 tr->pstate = (v>>18) & 0xffff;
857 tr->syscall = (v>>2) & 0xfff;
858 uint64_t pred_npc;
859 if ((v>>1) & 1) { // pred_pc = true
860 tr->pc = tdata[cpuid]->pred_pc;
861 pred_npc = tdata[cpuid]->pred_npc;
862 } else {
863 uint64_t pc;
864 sdata->bitarrays[raw_value64_array]->GetNext(pc);
865 tr->pc = pc;
866 pred_npc = pc+4;
867 }
868
869 if (v & 1) {
870 tr->npc = pred_npc;
871 } else {
872 uint64_t npc;
873 sdata->bitarrays[raw_value64_array]->GetNext(npc);
874 tr->npc = npc;
875 }
876#if defined(ARCH_AMD64)
877 rstf_convertT::l2b((rstf_uint8T*)&rstbuf[idx]);
878#endif
879} // void rstzip3::decompress_trap(rstf_unionT * rstbuf, int idx)
880
881
882void rstzip3::decompress_tlb(rstf_unionT * rstbuf, int idx)
883{
884 rstf_tlbT * tr = &(rstbuf[idx].tlb);
885 uint64_t tlb_info;
886 sdata->bitarrays[tlb_info_array]->GetNext(tlb_info);
887 if ((header->major_version == 3) && (header->minor_version <= 19)) {
888 tr->demap = (tlb_info>>25) & 0x1;
889 tr->tlb_index = (tlb_info >> 9) & 0xffff;
890 tr->tlb_type = (tlb_info >> 8) & 1;
891 tr->tlb_no = (tlb_info >> 6) & 3;
892 int cpuid = (tlb_info) & 0x3f;
893 rstf_tlbT_set_cpuid(tr, cpuid);
894 } else {
895 tr->demap = (tlb_info>>29) & 0x1;
896 tr->tlb_index = (tlb_info >> 13) & 0xffff;
897 tr->tlb_type = (tlb_info >> 12) & 1;
898 tr->tlb_no = (tlb_info >> 10) & 3;
899 int cpuid = (tlb_info) & 0x3ff;
900 rstf_tlbT_set_cpuid(tr, cpuid);
901 }
902
903 uint64_t v;
904 sdata->bitarrays[raw_value64_array]->GetNext(v);
905 tr->tte_tag = v;
906 sdata->bitarrays[raw_value64_array]->GetNext(v);
907 tr->tte_data = v;
908
909#if defined(ARCH_AMD64)
910 rstf_convertT::l2b((rstf_uint8T*)&rstbuf[idx]);
911#endif
912} // void rstzip3::decompress_tlb(rstf_unionT * rstbuf, int idx)
913
914
915void rstzip3::decompress_preg(rstf_unionT * rstbuf, int idx)
916{
917 rstf_pregT * pr = &(rstbuf[idx].preg);
918
919 uint64_t preg_info;
920 sdata->bitarrays[raw_value64_array]->GetNext(preg_info);
921
922 int cpuid;
923 if ((preg_info>>61) & 1) {
924 cpuid = pred_cpuid;
925 } else {
926 uint64_t v;
927 sdata->bitarrays[raw_cpuid_array]->GetNext(v);
928 cpuid = v;
929 }
930 rstf_pregT_set_cpuid(pr, cpuid);
931
932 pr->primD = (preg_info >> 48) & 0x1fff;
933 pr->primA = pr->primD;
934 pr->secD = (preg_info >> 35) & 0x1fff;
935 pr->secA = pr->secD;
936 pr->asiReg = (preg_info >> 27) & 0xff;
937 pr->traplevel = (preg_info >> 24) & 7;
938 pr->traptype = (preg_info >> 16) & 0xff;
939 pr->pstate = preg_info & 0xffff;
940
941#if defined(ARCH_AMD64)
942 rstf_convertT::l2b((rstf_uint8T*)&rstbuf[idx]);
943#endif
944} // void rstzip3::decompress_preg(rstf_unionT * rstbuf, int idx)
945
946
947void rstzip3::decompress_dma(rstf_unionT * rstbuf, int idx)
948{
949 uint64_t v;
950 rstf_dmaT * dr = &(rstbuf[idx].dma);
951
952 sdata->bitarrays[dma_iswrite_array]->GetNext(v);
953 dr->iswrite = v;
954
955 sdata->bitarrays[dma_nbytes_array]->GetNext(v);
956 dr->nbytes = v;
957
958 sdata->bitarrays[raw_value64_array]->GetNext(v);
959 dr->start_pa = v;
960
961 if (!pre323) {
962 sdata->bitarrays[raw_value64_array]->GetNext(v);
963 dr->devid = v;
964 }
965
966#if defined(ARCH_AMD64)
967 rstf_convertT::l2b((rstf_uint8T*)&rstbuf[idx]);
968#endif
969} // void rstzip3::decompress_dma(rstf_unionT * rstbuf, int idx)
970
971
972
973void rstzip3::decompress_rfs_cw(rstf_unionT * rstbuf, int idx)
974{
975 uint64_t v;
976
977 rstf_cachewarmingT *cw = &(rstbuf[idx].cachewarming);
978
979 sdata->bitarrays[rfs_cw_raw_reftype_array]->GetNext(v);
980 cw->reftype = v;
981
982 sdata->bitarrays[rfs_raw_cpuid_array]->GetNext(v);
983 int cpuid;
984 if ((cw->reftype != cw_reftype_DMA_R) && (cw->reftype != cw_reftype_DMA_W)) {
985 rstf_cachewarmingT_set_cpuid(cw, v);
986 cpuid = v;
987 } else {
988 // cw cpuid is already 0 because we had cleared the memory
989 cpuid = 0;
990 }
991
992 if (tdata[cpuid] == NULL) {
993 tdata[cpuid] = new rz3_percpu_data(cpuid);
994 }
995
996 if ((cw->reftype == cw_reftype_DMA_R) || (cw->reftype == cw_reftype_DMA_W)) {
997 sdata->bitarrays[raw_value64_array]->GetNext(v);
998 cw->pa = v;
999 sdata->bitarrays[rfs_cw_dma_size_array]->GetNext(v);
1000 cw->refinfo.dma_size = v;
1001 } else /* not DMA */ {
1002 // asi
1003 sdata->bitarrays[rfs_cw_asi_array]->GetNext(v); cw->refinfo.s.asi = v;
1004
1005 // fcn
1006 if (cw->reftype == cw_reftype_PF_D) {
1007 sdata->bitarrays[rfs_cw_pf_fcn_array]->GetNext(v); cw->refinfo.s.fcn = v;
1008 }
1009
1010 // va_valid
1011 sdata->bitarrays[rfs_cw_va_valid_array]->GetNext(v); cw->refinfo.s.va_valid = v;
1012
1013 if (cw->refinfo.s.va_valid) {
1014 // va
1015 decompress_value(cpuid, v); cw->va = v;
1016
1017 // tlb hit/miss
1018 sdata->bitarrays[rfs_cw_pa_pred_array]->GetNext(v);
1019 if (v) {
1020 uint64_t pred_pa;
1021 if (cw->reftype == cw_reftype_I) {
1022 pred_pa = tdata[cpuid]->itlb->get(cw->va>>13) << 13;
1023 } else {
1024 if (header->minor_version <= 20) {
1025 // backward compatibility: this was a bug in both compress & decompress fixed in 3.21
1026 pred_pa = tdata[cpuid]->itlb->get(cw->va>>13) << 13;
1027 } else {
1028 pred_pa = tdata[cpuid]->dtlb->get(cw->va>>13) << 13;
1029 }
1030 }
1031 pred_pa |= (cw->va & 0x1fffull);
1032 cw->pa = pred_pa;
1033 } else {
1034 sdata->bitarrays[raw_value64_array]->GetNext(v); cw->pa = v;
1035 if (cw->reftype == cw_reftype_I) {
1036 tdata[cpuid]->itlb->set(cw->va>>13, cw->pa>>13);
1037 } else {
1038 tdata[cpuid]->dtlb->set(cw->va>>13, cw->pa>>13);
1039 }
1040 }
1041
1042 } else {
1043 sdata->bitarrays[raw_value64_array]->GetNext(v); cw->pa = v;
1044 }
1045 } // DMA?
1046
1047#if defined(ARCH_AMD64)
1048 rstf_convertT::l2b((rstf_uint8T*)&rstbuf[idx]);
1049#endif
1050} // void rstzip3::decompress_rfs_cw(rstf_unionT * rstbuf, int idx)
1051
1052
1053
1054
1055void rstzip3::decompress_rfs_bt(rstf_unionT * rstbuf, int idx)
1056{
1057 uint64_t v;
1058
1059 rstf_bpwarmingT * bt = &(rstbuf[idx].bpwarming);
1060
1061 // cpuid
1062 sdata->bitarrays[rfs_raw_cpuid_array]->GetNext(v);
1063 int cpuid = v;
1064 rstf_bpwarmingT_set_cpuid(bt, cpuid);
1065 if (tdata[cpuid] == NULL) {
1066 tdata[cpuid] = new rz3_percpu_data(cpuid);
1067 }
1068
1069 // pc
1070 sdata->bitarrays[rfs_pc_pred_array]->GetNext(v);
1071 if (v) {
1072 bt->pc_va = tdata[cpuid]->rfs_pc_pred_table->get(tdata[cpuid]->rfs_prev_npc);
1073 } else {
1074 sdata->bitarrays[raw_value64_array]->GetNext(v); bt->pc_va = v;
1075 tdata[cpuid]->rfs_pc_pred_table->set(tdata[cpuid]->rfs_prev_npc, bt->pc_va);
1076 }
1077
1078 // instr: use icache
1079 sdata->bitarrays[rfs_instr_pred_array]->GetNext(v);
1080 rz3iu_icache_data * icdata;
1081 if (v) {
1082 icdata = tdata[cpuid]->icache->get(bt->pc_va);
1083 bt->instr = icdata->instr;
1084 } else {
1085 sdata->bitarrays[raw_instr_array]->GetNext(v);
1086 bt->instr = v;
1087 icdata = tdata[cpuid]->icache->set(bt->pc_va, bt->instr, header->major_version, header->minor_version);
1088 icdata->gen_target(bt->pc_va);
1089 }
1090
1091 // bt
1092 sdata->bitarrays[rfs_bt_pred_array]->GetNext(v);
1093 int bt_pred_hit = v;
1094 if (icdata->dinfo.flags.iscbranch) {
1095 bt->taken = tdata[cpuid]->bp->actual_outcome(bt->pc_va, bt_pred_hit);
1096 } else if (icdata->dinfo.flags.isubranch && icdata->dinfo.flags.isubranch_nottaken) {
1097 bt->taken = ! bt_pred_hit;
1098 } else {
1099 bt->taken = bt_pred_hit;
1100 }
1101
1102 // target
1103 sdata->bitarrays[dcti_ea_va_pred_array]->GetNext(v);
1104 if (v) {
1105 bt->npc_va = bt->taken ? icdata->target : (bt->pc_va+8);
1106 } else {
1107 sdata->bitarrays[raw_value64_array]->GetNext(v); bt->npc_va = v;
1108 }
1109
1110 tdata[cpuid]->rfs_prev_npc = bt->npc_va;
1111
1112 tdata[cpuid]->pred_pc = tdata[cpuid]->rfs_pc_pred_table->get(bt->npc_va);
1113
1114#if defined(ARCH_AMD64)
1115 rstf_convertT::l2b((rstf_uint8T*)&rstbuf[idx]);
1116#endif
1117} // void rstzip3::decompress_rfs_bt(rstf_unionT * rstbuf, int idx)
1118
1119
1120bool rstzip3::decompress_value(int cpuid, uint64_t & v64)
1121{
1122 uint64_t key;
1123 uint64_t level;
1124 sdata->bitarrays[valuecache_level_array]->GetNext(level);
1125 sdata->bitarrays[valuecache_data0_array+level]->GetNext(key);
1126 return tdata[cpuid]->valuecache->Retrieve(level, key, v64);
1127}