Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | // ========== Copyright Header Begin ========================================== |
2 | // | |
3 | // OpenSPARC T2 Processor File: compress_engine.C | |
4 | // Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. | |
5 | // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. | |
6 | // | |
7 | // The above named program is free software; you can redistribute it and/or | |
8 | // modify it under the terms of the GNU General Public | |
9 | // License version 2 as published by the Free Software Foundation. | |
10 | // | |
11 | // The above named program is distributed in the hope that it will be | |
12 | // useful, but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | // General Public License for more details. | |
15 | // | |
16 | // You should have received a copy of the GNU General Public | |
17 | // License along with this work; if not, write to the Free Software | |
18 | // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | // | |
20 | // ========== Copyright Header End ============================================ | |
21 | /* compress_engine.C */ | |
22 | ||
23 | #include <stdio.h> | |
24 | #include <stdlib.h> | |
25 | #include <string.h> | |
26 | #include <assert.h> | |
27 | ||
28 | #include "rstf/rstf.h" | |
29 | ||
30 | #if defined(ARCH_AMD64) | |
31 | #include "rstf/rstf_convert.h" | |
32 | #endif | |
33 | ||
34 | #include "rstzip3.h" | |
35 | #include "rz3_section.h" | |
36 | ||
37 | #include "rz3iu.h" | |
38 | ||
39 | /* debug stuff */ | |
40 | static const bool dbg_ras = false; | |
41 | static const bool dbg_regid = false; | |
42 | ||
43 | ||
44 | // rstbufsize <= rz3_bufsize | |
45 | int rstzip3::compress_buffer(rstf_unionT * rstbuf, int rstbufsize) | |
46 | { | |
47 | ||
48 | shdr->clear(); | |
49 | sdata->clear(); | |
50 | ||
51 | // set shdr->clearflag if records_since_prev_clear >= clear_interval | |
52 | // clear predictor tables in tdata if shdr->clearflag is set | |
53 | ||
54 | // if (verbose) clear_stats(); | |
55 | clear_stats(); | |
56 | ||
57 | // write record count to header | |
58 | shdr->nrecords = rstbufsize; | |
59 | ||
60 | int i; | |
61 | for (i=0; i<rstbufsize; i++) { | |
62 | if (rfs_phase) { | |
63 | if (rfs_cw_phase) { | |
64 | if (rstbuf[i].proto.rtype == RFS_CW_T) { | |
65 | sdata->bitarrays[rfs_rtype_pred_array]->Push(1); | |
66 | rfs_records_seen++; | |
67 | if (rfs_records_seen == rfs_nrecords) { | |
68 | rfs_phase = rfs_cw_phase = false; | |
69 | } | |
70 | } else /* rfs cw rtype mispred */ { | |
71 | sdata->bitarrays[rfs_rtype_pred_array]->Push(0); | |
72 | sdata->bitarrays[rtype_array]->Push(rstbuf[i].proto.rtype); | |
73 | rfs_phase = rfs_cw_phase = false; | |
74 | } // rfs cw rtype pred | |
75 | } else if (rfs_bt_phase) { | |
76 | if (rstbuf[i].proto.rtype == RFS_BT_T) { | |
77 | sdata->bitarrays[rfs_rtype_pred_array]->Push(1); | |
78 | rfs_records_seen++; | |
79 | if (rfs_records_seen == rfs_nrecords) { | |
80 | rfs_phase = rfs_bt_phase = false; | |
81 | } | |
82 | } else /* rfs bt rtype mispred */ { | |
83 | sdata->bitarrays[rfs_rtype_pred_array]->Push(0); | |
84 | sdata->bitarrays[rtype_array]->Push(rstbuf[i].proto.rtype); | |
85 | rfs_phase = rfs_bt_phase = false; | |
86 | } // rfs bt rtype pred | |
87 | } // which rfs phase? */ | |
88 | } else /* regular rst phase */ { | |
89 | // rtype compression | |
90 | if (rstbuf[i].proto.rtype == INSTR_T) { | |
91 | sdata->bitarrays[rtype_key_array]->Push(rtype_key_INSTR); | |
92 | } else if (rstbuf[i].proto.rtype == REGVAL_T) { | |
93 | sdata->bitarrays[rtype_key_array]->Push(rtype_key_REGVAL); | |
94 | } else if (rstbuf[i].proto.rtype == PAVADIFF_T) { | |
95 | sdata->bitarrays[rtype_key_array]->Push(rtype_key_PAVADIFF); | |
96 | } else { | |
97 | sdata->bitarrays[rtype_key_array]->Push(rtype_key_RAW); | |
98 | sdata->bitarrays[rtype_array]->Push(rstbuf[i].proto.rtype); | |
99 | } | |
100 | } // phase: rfs cw, rfs bt or regular rst? | |
101 | ||
102 | switch(rstbuf[i].proto.rtype) { | |
103 | case INSTR_T: | |
104 | compress_inst(rstbuf, i); | |
105 | break; | |
106 | case REGVAL_T: | |
107 | compress_regval(rstbuf, i); | |
108 | break; | |
109 | case PAVADIFF_T: | |
110 | compress_pavadiff(rstbuf, i); | |
111 | break; | |
112 | case TLB_T: | |
113 | compress_tlb(rstbuf, i); | |
114 | break; | |
115 | case PREG_T: | |
116 | compress_preg(rstbuf, i); | |
117 | break; | |
118 | case TRAP_T: | |
119 | compress_trap(rstbuf, i); | |
120 | break; | |
121 | case DMA_T: | |
122 | compress_dma(rstbuf, i); | |
123 | break; | |
124 | case MEMVAL_T: | |
125 | compress_memval(rstbuf, i); | |
126 | break; | |
127 | case RFS_CW_T: | |
128 | if ((rfs_records_seen == 0) && ! rfs_cw_phase) { | |
129 | // in case there was no rfs preamble, section header etc. | |
130 | rfs_phase = rfs_cw_phase = true; | |
131 | rfs_nrecords = rfs_unknown_nrecords; | |
132 | rfs_records_seen = 1; | |
133 | } | |
134 | compress_rfs_cw(rstbuf, i); | |
135 | break; | |
136 | case RFS_BT_T: | |
137 | if ((rfs_records_seen == 0) && ! rfs_bt_phase) { | |
138 | // in case there was no rfs preamble, section header etc. | |
139 | rfs_phase = rfs_bt_phase = true; | |
140 | rfs_nrecords = rfs_unknown_nrecords; | |
141 | rfs_records_seen = 1; | |
142 | } | |
143 | compress_rfs_bt(rstbuf, i); | |
144 | break; | |
145 | ||
146 | case RSTHEADER_T: | |
147 | // write raw records to output | |
148 | #if defined(ARCH_AMD64) | |
149 | { | |
150 | rstf_unionT temp; | |
151 | memcpy(&temp, &rstbuf[i], sizeof(rstf_unionT)); | |
152 | rstf_convertT::l2b((rstf_uint8T*)&temp); | |
153 | sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[0])); | |
154 | sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[1])); | |
155 | sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[2])); | |
156 | } | |
157 | #else | |
158 | sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[0]); | |
159 | sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[1]); | |
160 | sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[2]); | |
161 | #endif | |
162 | if (rstbuf[i].header.majorVer*1000+rstbuf[i].header.minorVer <= 2011) { | |
163 | rstf_pre212 = true; | |
164 | } | |
165 | ||
166 | break; | |
167 | ||
168 | default: | |
169 | // write raw records to output | |
170 | #if defined(ARCH_AMD64) | |
171 | { | |
172 | rstf_unionT temp; | |
173 | memcpy(&temp, &rstbuf[i], sizeof(rstf_unionT)); | |
174 | rstf_convertT::l2b((rstf_uint8T*)&temp); | |
175 | sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[0])); | |
176 | sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[1])); | |
177 | sdata->bitarrays[raw_value64_array]->Push(byteswap64(temp.arr64.arr64[2])); | |
178 | } | |
179 | #else | |
180 | sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[0]); | |
181 | sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[1]); | |
182 | sdata->bitarrays[raw_value64_array]->Push(rstbuf[i].arr64.arr64[2]); | |
183 | #endif | |
184 | ||
185 | if (rstbuf[i].proto.rtype == RFS_SECTION_HEADER_T) { | |
186 | if (rstbuf[i].rfs_section_header.section_type == RFS_CW_T) { | |
187 | rfs_phase = rfs_cw_phase = true; | |
188 | rfs_nrecords = rstbuf[i].rfs_section_header.n_records; | |
189 | rfs_records_seen = 0; | |
190 | } else if (rstbuf[i].rfs_section_header.section_type == RFS_BT_T) { | |
191 | rfs_phase = rfs_bt_phase = true; | |
192 | rfs_nrecords = rstbuf[i].rfs_section_header.n_records; | |
193 | rfs_records_seen = 0; | |
194 | } // else - do nothing | |
195 | } // if rfs section header | |
196 | ||
197 | break; | |
198 | } // what rtype? */ | |
199 | ||
200 | prev_rtype = rstbuf[i].proto.rtype; | |
201 | } // for each record | |
202 | ||
203 | sdata->update_counts(); | |
204 | ||
205 | if (stats) update_stats(); | |
206 | ||
207 | if (! shdr->write(gzf)) { | |
208 | perror("ERROR: rstzip3::compress_Buffer(): could not write section header to output file\n"); | |
209 | return 0; | |
210 | } | |
211 | ||
212 | if (! sdata->write(gzf)) { | |
213 | perror("ERROR: rstzip3::compress_buffer(): could not write section data to output file\n"); | |
214 | return 0; | |
215 | } | |
216 | ||
217 | ||
218 | if (verbose) { | |
219 | fprintf(stderr, "Section %d\n", nsections); | |
220 | sdata->print(); | |
221 | } | |
222 | ||
223 | if (stats) print_stats(); | |
224 | ||
225 | nsections++; | |
226 | ||
227 | return rstbufsize; | |
228 | } // rstzip3::compress_buffer | |
229 | ||
230 | ||
231 | static bool ds_indicates_tail_call(uint32_t instr) { | |
232 | return (instr == MOV_G1_G7_INSTR) || ((instr & RESTORE_OPCODE_MASK) == RESTORE_OPCODE_BITS); | |
233 | } | |
234 | ||
235 | void rstzip3::compress_inst(rstf_unionT * rstbuf, int idx) | |
236 | { | |
237 | ||
238 | rstf_instrT *ir = &(rstbuf[idx].instr); | |
239 | ||
240 | // check cpuid | |
241 | uint16_t cpuid = rstf_pre212 ? ir->cpuid : rstf_instrT_get_cpuid(ir); | |
242 | if (pred_cpuid == cpuid) { | |
243 | sdata->bitarrays[cpuid_pred_array]->Push(1); | |
244 | } else { | |
245 | sdata->bitarrays[cpuid_pred_array]->Push(0); | |
246 | sdata->bitarrays[raw_cpuid_array]->Push(cpuid); | |
247 | } | |
248 | ||
249 | // predict cpuid. assume round robin. FIXME: for now, assump uP traces | |
250 | if (tdata[cpuid+1] == NULL) { | |
251 | pred_cpuid = 0; | |
252 | } else { | |
253 | pred_cpuid = cpuid+1; | |
254 | } | |
255 | last_instr_cpuid = cpuid; | |
256 | ||
257 | if (tdata[cpuid] == NULL) { | |
258 | tdata[cpuid] = new rz3_percpu_data(cpuid); | |
259 | } | |
260 | ||
261 | instr_preds = instr_mispred_none; | |
262 | ||
263 | // amask bit: if amask is 0, all 64-bits of pred_pc are used. if not, only the lower 32-bits are used | |
264 | // we check and set the amask bit on a pc misprediction. if the misprediction leaves the lower 32-bits unchanged | |
265 | // but differs in the upper 32-bits, we set/clear amask accordingly | |
266 | // check pc | |
267 | uint64_t pc = ir->pc_va; | |
268 | uint64_t pred_pc = tdata[cpuid]->pred_pc; | |
269 | bool pc_pred = (pred_pc == ir->pc_va); | |
270 | if (!pc_pred) { | |
271 | instr_preds &= instr_mispred_pc; | |
272 | ||
273 | sdata->bitarrays[raw_value64_array]->Push(pc); | |
274 | ||
275 | // is our amask to blame? | |
276 | if ((pc & rz3_amask_mask) == (pred_pc & rz3_amask_mask)) { | |
277 | // lower 32 bits match | |
278 | if ((pc >> 32) != 0) { | |
279 | // if amask was 1, it should be 0. if it was already zero, amask is not to blame, but set it to 0 anyway | |
280 | tdata[cpuid]->pred_amask = 0; | |
281 | } else { | |
282 | // if amask was 0, it should be 1. if it was already 1, we shouldn't be here. | |
283 | if (0 && tdata[cpuid]->pred_amask) { | |
284 | fprintf(stderr, "rz3: compress_inst: amask was set but predicted pc was > 32 bits: pred_pc %llx actual %llx\n", pred_pc, pc); | |
285 | } | |
286 | tdata[cpuid]->pred_amask = 1; | |
287 | } | |
288 | } | |
289 | ||
290 | tdata[cpuid]->pred_npc = pc+4; | |
291 | } | |
292 | ||
293 | // (pc, npc) <= (npc, npc+4) | |
294 | tdata[cpuid]->pred_pc = tdata[cpuid]->pred_npc; | |
295 | tdata[cpuid]->pred_npc += 4; // this may be updated later in case of CTIs | |
296 | ||
297 | tdata[cpuid]->prev_pc = pc; | |
298 | ||
299 | // check annul bit | |
300 | if (tdata[cpuid]->pred_an != ir->an) { | |
301 | instr_preds &= instr_mispred_an; | |
302 | perf_stats[ps_an_misses]++; | |
303 | // sdata->an_mispred_count++; | |
304 | } | |
305 | ||
306 | // predict and check instr | |
307 | rz3iu_icache_data * icdata = tdata[cpuid]->icache->get(pc); | |
308 | uint32_t instr = ir->instr; | |
309 | if ((icdata == NULL) || (icdata->instr != ir->instr)) { | |
310 | // ic miss | |
311 | instr_preds &= instr_mispred_instr; | |
312 | ||
313 | sdata->bitarrays[raw_instr_array]->Push(instr); | |
314 | ||
315 | icdata = tdata[cpuid]->icache->set(pc, instr, rstzip3_major_version, rstzip3_minor_version); | |
316 | ||
317 | if ((!ir->an) && icdata->dinfo.flags.isdcti) { | |
318 | icdata->gen_target(pc); | |
319 | } | |
320 | } | |
321 | tdata[cpuid]->last_instr = ir->an ? 0x0 : instr; | |
322 | ||
323 | // if this is a delay slot of a call instr, we need to pop ras if "restore" or mov_g1_g7 instr | |
324 | if (tdata[cpuid]->call_delay_slot) { | |
325 | if ( ((instr & RESTORE_OPCODE_MASK) == RESTORE_OPCODE_BITS) || (instr == MOV_G1_G7_INSTR) ) { | |
326 | tdata[cpuid]->ras->pop(); | |
327 | } | |
328 | tdata[cpuid]->call_delay_slot = false; | |
329 | } | |
330 | ||
331 | ||
332 | // tr and pr bits. | |
333 | // predict and set tr BEFORE decompress_ea_va because ea_valid prediction depends on the tr bit | |
334 | // tr is usually 0. we follow the convention of | |
335 | // inserting all 1's where possible. so we *invert* the tr bit | |
336 | if (ir->tr) { | |
337 | instr_preds &= instr_mispred_tr; | |
338 | } | |
339 | ||
340 | // for the hpriv bit, we predict it based on the previous instr | |
341 | // this is new in v3.20 and up | |
342 | uint32_t hpriv = rstf_pre212 ? 0 : ir->hpriv; | |
343 | if (hpriv != tdata[cpuid]->pred_hpriv) { | |
344 | instr_preds &= instr_mispred_hpriv; | |
345 | tdata[cpuid]->pred_hpriv = hpriv; | |
346 | if (hpriv) { | |
347 | tdata[cpuid]->pred_pr = 0; | |
348 | } | |
349 | } | |
350 | ||
351 | // for the pr bit, we predict it based on the previous instr | |
352 | if (ir->pr != tdata[cpuid]->pred_pr) { | |
353 | instr_preds &= instr_mispred_pr; | |
354 | tdata[cpuid]->pred_pr = ir->pr; | |
355 | } | |
356 | ||
357 | // predict ea_valid, ea_va, bt, NEXT-instr an | |
358 | ||
359 | if (!ir->an) { | |
360 | if (icdata->dinfo.flags.isdcti) { | |
361 | ||
362 | compress_dcti(rstbuf, idx, icdata); | |
363 | ||
364 | } else /* not dcti */ { | |
365 | ||
366 | // predict bt == 0 | |
367 | int pred_bt = icdata->dinfo.flags.is_done_retry; | |
368 | if (pred_bt != ir->bt) { | |
369 | instr_preds &= instr_mispred_bt; | |
370 | } | |
371 | ||
372 | // ea_valid=1 for ld/st/pf | |
373 | int pred_ea_valid; | |
374 | if (icdata->is_ldstpf) { | |
375 | // FIXME: make sure this is not an internal ASI | |
376 | pred_ea_valid = 1; | |
377 | } else if (icdata->dinfo.flags.is_done_retry) { | |
378 | pred_ea_valid = 1; | |
379 | } else if (ir->tr) { | |
380 | pred_ea_valid = 1; | |
381 | } else { | |
382 | pred_ea_valid = 0; | |
383 | } | |
384 | ||
385 | if (pred_ea_valid != ir->ea_valid) { | |
386 | instr_preds &= instr_mispred_ea_valid; | |
387 | perf_stats[ps_ea_valid_misses]++; | |
388 | } | |
389 | ||
390 | if (ir->ea_valid) { | |
391 | compress_ea_va(rstbuf, idx); | |
392 | } | |
393 | ||
394 | tdata[cpuid]->pred_an = 0; | |
395 | } // dcti? | |
396 | } // if not annulled | |
397 | ||
398 | if (instr_preds == instr_mispred_none) { | |
399 | sdata->bitarrays[instr_pred_all_array]->Push(1); | |
400 | } else { | |
401 | sdata->bitarrays[instr_pred_all_array]->Push(0); | |
402 | sdata->bitarrays[instr_pred_raw_array]->Push(instr_preds); | |
403 | } | |
404 | ||
405 | } // rstzip3::compress_inst() | |
406 | ||
407 | ||
408 | ||
409 | void rstzip3::compress_ea_va(rstf_unionT * rstbuf, int idx) | |
410 | { | |
411 | rstf_instrT * ir = &(rstbuf[idx].instr); | |
412 | uint16_t cpuid = rstf_pre212 ? ir->cpuid : rstf_instrT_get_cpuid(ir); | |
413 | ||
414 | // if value trace: predict ea using known reg values | |
415 | ||
416 | // predict ea using the rz3 value cache | |
417 | compress_value(cpuid, ir->ea_va); | |
418 | } // rstzip3::compress_ea_va | |
419 | ||
420 | void rstzip3::compress_pavadiff(rstf_unionT * rstbuf, int idx) | |
421 | { | |
422 | if (0 && idx == 102577) { | |
423 | printf("debug: decompress_pavadiff idx %d\n", idx); | |
424 | } | |
425 | ||
426 | rstf_pavadiffT * dr = &(rstbuf[idx].pavadiff); | |
427 | int cpuid = rstf_pre212 ? dr->cpuid : rstf_pavadiffT_get_cpuid(dr); | |
428 | ||
429 | // check and predict cpuid | |
430 | if (pred_cpuid == cpuid) { | |
431 | sdata->bitarrays[cpuid_pred_array]->Push(1); | |
432 | } else { | |
433 | sdata->bitarrays[cpuid_pred_array]->Push(0); | |
434 | sdata->bitarrays[raw_cpuid_array]->Push(cpuid); | |
435 | } | |
436 | pred_cpuid = cpuid; | |
437 | ||
438 | if (tdata[cpuid] == NULL) { | |
439 | tdata[cpuid] = new rz3_percpu_data(cpuid); | |
440 | } | |
441 | ||
442 | // predict icontext the same as prev icontext | |
443 | if (tdata[cpuid]->pred_icontext == dr->icontext) { | |
444 | sdata->bitarrays[pavadiff_ictxt_pred_array]->Push(1); | |
445 | } else { | |
446 | sdata->bitarrays[pavadiff_ictxt_pred_array]->Push(0); | |
447 | sdata->bitarrays[pavadiff_raw_ictxt_array]->Push(dr->icontext); | |
448 | tdata[cpuid]->pred_icontext = dr->icontext; | |
449 | } | |
450 | ||
451 | // dcontext - predict same as prev dcontext for this cpu | |
452 | if (tdata[cpuid]->pred_dcontext == dr->dcontext) { | |
453 | sdata->bitarrays[pavadiff_dctxt_pred_array]->Push(1); | |
454 | } else { | |
455 | sdata->bitarrays[pavadiff_dctxt_pred_array]->Push(0); | |
456 | sdata->bitarrays[pavadiff_raw_dctxt_array]->Push(dr->dcontext); | |
457 | tdata[cpuid]->pred_dcontext = dr->dcontext; | |
458 | } | |
459 | ||
460 | bool found_pc_va = false; | |
461 | uint64_t nextpc_va; | |
462 | bool found_ea_va = false; | |
463 | uint64_t nextea_va; | |
464 | ||
465 | int i; | |
466 | for (i=idx+1; i<shdr->nrecords; i++) { | |
467 | if (rstbuf[i].proto.rtype == INSTR_T) { | |
468 | rstf_instrT * ir = &(rstbuf[i].instr); | |
469 | uint16_t i_cpuid = rstf_pre212 ? ir->cpuid : rstf_instrT_get_cpuid(ir); | |
470 | ||
471 | if (i_cpuid == cpuid) { | |
472 | nextpc_va = ir->pc_va; | |
473 | found_pc_va = true; | |
474 | if (dr->ea_valid && ir->ea_valid) { // we only care about ea_va if dr->ea_valid | |
475 | nextea_va = ir->ea_va; | |
476 | found_ea_va = true; | |
477 | } | |
478 | } // if cpuid match | |
479 | break; | |
480 | } else if (rstbuf[i].proto.rtype == PAVADIFF_T) { | |
481 | rstf_pavadiffT * pd = &(rstbuf[i].pavadiff); | |
482 | uint16_t pd_cpuid = rstf_pre212 ? pd->cpuid : rstf_pavadiffT_get_cpuid(pd); | |
483 | if (pd_cpuid == cpuid) { | |
484 | // We ran into a second pavadiff record before seeing an instr record. | |
485 | // flag this as a no-pred (hence no lookahead). | |
486 | // If we don't do this, the decompression algorithm will break | |
487 | // because we only have a 1 item limit on the number of pending | |
488 | // pavadiffs to patch, and patching this pavadiff will break the next one. | |
489 | break; | |
490 | } | |
491 | } // if instr or pavadiff | |
492 | } // for each subsequent record | |
493 | ||
494 | // ea_valid | |
495 | sdata->bitarrays[pavadiff_ea_valid_array]->Push(dr->ea_valid); | |
496 | ||
497 | bool pc_pa_va_hit = false; | |
498 | bool ea_pa_va_hit = false; | |
499 | ||
500 | uint64_t pred_pa_va_diff; | |
501 | ||
502 | if (found_pc_va) { | |
503 | pred_pa_va_diff = tdata[cpuid]->itlb->get(nextpc_va >> 13); | |
504 | if (pred_pa_va_diff == (dr->pc_pa_va >> 13)) { | |
505 | pc_pa_va_hit = true; | |
506 | } | |
507 | } | |
508 | ||
509 | if (pc_pa_va_hit) { | |
510 | sdata->bitarrays[pavadiff_pc_pa_va_pred_array]->Push(1); | |
511 | } else { | |
512 | sdata->bitarrays[pavadiff_pc_pa_va_pred_array]->Push(0); | |
513 | sdata->bitarrays[raw_value64_array]->Push(dr->pc_pa_va); | |
514 | ||
515 | ||
516 | if (found_pc_va) { | |
517 | if (0) printf("%d: cpu%d itlb update: %llx => %llx\n", idx, cpuid, nextpc_va, dr->pc_pa_va); | |
518 | tdata[cpuid]->itlb->set(nextpc_va>>13, dr->pc_pa_va>>13); | |
519 | } | |
520 | } | |
521 | ||
522 | ||
523 | if (dr->ea_valid) { | |
524 | // ea_pa_va - use next instr (if available) and a tlb simulator | |
525 | if (found_ea_va) { | |
526 | // tlb lookup | |
527 | pred_pa_va_diff = tdata[cpuid]->dtlb->get(nextea_va >> 13); | |
528 | if (pred_pa_va_diff == (dr->ea_pa_va >> 13)) { | |
529 | ea_pa_va_hit = true; | |
530 | } | |
531 | } | |
532 | ||
533 | if (ea_pa_va_hit) { | |
534 | sdata->bitarrays[pavadiff_ea_pa_va_pred_array]->Push(1); | |
535 | } else { | |
536 | sdata->bitarrays[pavadiff_ea_pa_va_pred_array]->Push(0); | |
537 | sdata->bitarrays[raw_value64_array]->Push(dr->ea_pa_va); | |
538 | ||
539 | if (found_ea_va) { | |
540 | if (0) printf("%d: cpu%d dtlb update: %llx => %llx\n", idx, cpuid, nextea_va, dr->ea_pa_va); | |
541 | tdata[cpuid]->dtlb->set((nextea_va >> 13), (dr->ea_pa_va >> 13)); | |
542 | } | |
543 | } | |
544 | } | |
545 | ||
546 | // the lookahead flag tells the decompressor to look for the next instr (to update the tlb) | |
547 | // if we predicted pc_pa_va and/or ea_pa_va correctly, the decompressor knows from the pred bit to lookahead. | |
548 | // we set the lookahead flag so that the decomprssor knows the difference between no prediction (could not find corresponding instr) and misprediction | |
549 | ||
550 | if ((found_pc_va && pc_pa_va_hit) || (dr->ea_valid && found_ea_va && ea_pa_va_hit)) { | |
551 | // dont need lookahead flag since the pc_pa_va_pred flag and/or the ea_pa_va_pred flag will indicate lookahead | |
552 | } else { | |
553 | // we need to indicate whether there was no prediction or misprediction(s) | |
554 | int lookahead_flag = (found_pc_va || found_ea_va); | |
555 | sdata->bitarrays[pavadiff_lookahead_array]->Push(lookahead_flag); | |
556 | } | |
557 | } // void rstzip3::compress_pavadiff(rstf_unionT * rstbuf, int idx) | |
558 | ||
559 | ||
560 | // predict bt, ea_valid, ea_va, NEXT-instr an for a dcti instr. also set pred_npc | |
561 | void rstzip3::compress_dcti(rstf_unionT * rstbuf, int idx, rz3iu_icache_data * icdata) | |
562 | { | |
563 | rstf_instrT * ir = &(rstbuf[idx].instr); | |
564 | uint16_t cpuid = rstf_pre212 ? ir->cpuid : rstf_instrT_get_cpuid(ir); | |
565 | uint64_t pc = ir->pc_va; | |
566 | ||
567 | int bt_pred_hit; | |
568 | ||
569 | if (icdata->dinfo.flags.iscbranch) { | |
570 | ||
571 | // use branch predictor | |
572 | bt_pred_hit = tdata[cpuid]->bp->pred_hit(pc, ir->bt); | |
573 | perf_stats[ps_brpred_refs]++; | |
574 | if (!bt_pred_hit) { | |
575 | perf_stats[ps_brpred_misses]++; | |
576 | } | |
577 | ||
578 | if (ir->bt) { | |
579 | tdata[cpuid]->pred_npc = icdata->target; | |
580 | if (tdata[cpuid]->pred_amask) { | |
581 | tdata[cpuid]->pred_npc &= rz3_amask_mask; | |
582 | } | |
583 | } // else - pred_npc is already set to pc+8 | |
584 | ||
585 | } else if (icdata->dinfo.flags.isubranch && ! icdata->dinfo.flags.isubranch_nottaken) { | |
586 | ||
587 | // pred_npc is branch target | |
588 | bt_pred_hit = ir->bt; // we predict taken. if not taken, we mispredict | |
589 | tdata[cpuid]->pred_npc = icdata->target; | |
590 | if (tdata[cpuid]->pred_amask) { | |
591 | tdata[cpuid]->pred_npc &= rz3_amask_mask; | |
592 | } | |
593 | } else if (icdata->dinfo.flags.iscall) { | |
594 | ||
595 | bt_pred_hit = ir->bt; | |
596 | tdata[cpuid]->pred_npc = icdata->target; | |
597 | if (tdata[cpuid]->pred_amask) { | |
598 | tdata[cpuid]->pred_npc &= rz3_amask_mask; | |
599 | } | |
600 | // push pc to ras unless following (delay slot) instr is restore | |
601 | tdata[cpuid]->ras->push(pc); | |
602 | tdata[cpuid]->call_delay_slot = true; | |
603 | ||
604 | } else if (icdata->dinfo.flags.isindirect) { | |
605 | ||
606 | bt_pred_hit = ir->bt; | |
607 | // if jmpl, use prediction table | |
608 | // if ret/retl, use RAS | |
609 | if (icdata->dinfo.flags.is_ret|icdata->dinfo.flags.is_retl) { | |
610 | ||
611 | perf_stats[ps_ras_refs]++; | |
612 | tdata[cpuid]->pred_npc = tdata[cpuid]->ras->pop() + 8; | |
613 | ||
614 | if (tdata[cpuid]->pred_amask) { | |
615 | tdata[cpuid]->pred_npc &= rz3_amask_mask; | |
616 | } | |
617 | if (tdata[cpuid]->pred_npc == ir->ea_va) { | |
618 | } else { | |
619 | tdata[cpuid]->ras->clear(); | |
620 | perf_stats[ps_ras_misses]++; | |
621 | } | |
622 | ||
623 | } else if ( ((ir->instr >> 25) & 0x1f) == 15 ) { | |
624 | ||
625 | // push unless following (delay-slot) instr is restore | |
626 | tdata[cpuid]->ras->push(pc); | |
627 | tdata[cpuid]->call_delay_slot = true; | |
628 | ||
629 | tdata[cpuid]->pred_npc = tdata[cpuid]->jmpl_table->get(pc >> 2); | |
630 | if (tdata[cpuid]->pred_amask) { | |
631 | tdata[cpuid]->pred_npc &= rz3_amask_mask; | |
632 | } | |
633 | if (tdata[cpuid]->pred_npc != ir->ea_va) { // we are going to see an ea_va misprediction (pred_ea_va is set to pred_npc for dctis) | |
634 | tdata[cpuid]->jmpl_table->set(pc>>2, ir->ea_va); | |
635 | } | |
636 | ||
637 | } // is this a ret/retl or indirect call? | |
638 | /* else do nothing */ | |
639 | } else { | |
640 | bt_pred_hit = ! ir->bt; | |
641 | } // what type of dcti? | |
642 | ||
643 | // bt pred | |
644 | if (!bt_pred_hit) { | |
645 | instr_preds &= instr_mispred_bt; | |
646 | } | |
647 | ||
648 | // ea_valid pred: predict ea_valid is true | |
649 | if (!ir->ea_valid) { | |
650 | instr_preds &= instr_mispred_ea_valid; | |
651 | perf_stats[ps_ea_valid_misses]++; | |
652 | } | |
653 | ||
654 | // ea_va: predict pred_npc is ea_va | |
655 | if (tdata[cpuid]->pred_npc == ir->ea_va) { | |
656 | sdata->bitarrays[dcti_ea_va_pred_array]->Push(1); | |
657 | } else { | |
658 | sdata->bitarrays[dcti_ea_va_pred_array]->Push(0); | |
659 | sdata->bitarrays[raw_value64_array]->Push(ir->ea_va); | |
660 | ||
661 | // at this point we know the real ea_va. predict npc=ea_va | |
662 | tdata[cpuid]->pred_npc = ir->ea_va; | |
663 | } | |
664 | ||
665 | // annul flag for *next* instr | |
666 | if (icdata->dinfo.flags.annul_flag) { | |
667 | if ((icdata->dinfo.flags.iscbranch && !ir->bt) || icdata->dinfo.flags.isubranch) { | |
668 | tdata[cpuid]->pred_an = 1; | |
669 | } | |
670 | } | |
671 | ||
672 | } // rstzip3::compress_dcti() | |
673 | ||
674 | ||
675 | // theres not much room for architectural compression | |
676 | // here, except in case of value traces. all we do here | |
677 | // is not store rtype and unused fields. | |
678 | void rstzip3::compress_tlb(rstf_unionT * rstbuf, int idx) | |
679 | { | |
680 | rstf_tlbT *tr = &(rstbuf[idx].tlb); | |
681 | // pack demap(25), tlb_index(24:9), tlb_type(8), tlb_no(7:6), cpuid(5:0) into a single | |
682 | // 26-bit field. we thus save only 38 bits/tlb record. | |
683 | // pack demap(29), tlb_index(28:13), tlb_type(12), tlb_no(11:10), cpuid(9:0) into a single | |
684 | // 30-bit field. we thus save only 34 bits/tlb record. | |
685 | int cpuid = rstf_pre212 ? tr->cpuid : rstf_tlbT_get_cpuid(tr); | |
686 | ||
687 | uint32_t tlb_info = (tr->demap<<29) | (((uint32_t)tr->tlb_index) << 13) | (tr->tlb_type << 12) | |
688 | | (tr->tlb_no << 10) | cpuid; | |
689 | sdata->bitarrays[tlb_info_array]->Push(tlb_info); | |
690 | ||
691 | sdata->bitarrays[raw_value64_array]->Push(tr->tte_tag); | |
692 | sdata->bitarrays[raw_value64_array]->Push(tr->tte_data); | |
693 | ||
694 | ||
695 | } // void rstzip3::compress_tlb(rstf_unionT * rstbuf, int idx) | |
696 | ||
697 | ||
698 | // try to predict pc and npc. | |
699 | // at the time of this writing, trap records occur *before* the | |
700 | // instr record at the time the trap occurred. | |
701 | // For future RST versions, we will change this assumption if necessary | |
702 | void rstzip3::compress_trap(rstf_unionT * rstbuf, int idx) | |
703 | { | |
704 | rstf_trapT * tr = &(rstbuf[idx].trap); | |
705 | ||
706 | // predict cpuid as the predicted cpuid of the next instr | |
707 | int cpuid = rstf_pre212 ? tr->cpuid : rstf_trapT_get_cpuid(tr); | |
708 | ||
709 | if (cpuid == pred_cpuid) { | |
710 | sdata->bitarrays[cpuid_pred_array]->Push(1); | |
711 | } else { | |
712 | sdata->bitarrays[cpuid_pred_array]->Push(0); | |
713 | sdata->bitarrays[raw_cpuid_array]->Push(cpuid); | |
714 | } | |
715 | ||
716 | if (tdata[cpuid] == NULL) { | |
717 | tdata[cpuid] = new rz3_percpu_data(cpuid); | |
718 | } | |
719 | ||
720 | // put is_async(48), tl(47:44), ttype(43:34), pstate(33:18), syscall(17:2), pc_pred(1), npc_pred(0) | |
721 | // in one 49-bit field | |
722 | uint64_t trap_info = (((uint64_t)tr->is_async) << 48) | (((uint64_t)tr->tl) << 44) | (((uint64_t)tr->ttype) << 34) | | |
723 | (((uint64_t)tr->pstate) << 18) | (((uint64_t)tr->syscall) << 2); | |
724 | ||
725 | uint64_t pred_pc = tdata[cpuid]->pred_pc; | |
726 | uint64_t pred_npc; | |
727 | if (tr->pc == pred_pc) { | |
728 | trap_info |= 2ull; | |
729 | pred_npc = tdata[cpuid]->pred_npc; | |
730 | } else { | |
731 | sdata->bitarrays[raw_value64_array]->Push(tr->pc); | |
732 | ||
733 | pred_npc = tr->pc + 4; | |
734 | } | |
735 | ||
736 | if (tr->npc == pred_npc) { | |
737 | trap_info |= 1ull; | |
738 | } else { | |
739 | sdata->bitarrays[raw_value64_array]->Push(tr->npc); | |
740 | ||
741 | } | |
742 | ||
743 | sdata->bitarrays[trap_info_array]->Push(trap_info); | |
744 | } // void rstzip3::compress_trap(rstf_unionT * rstbuf, int idx) | |
745 | ||
746 | ||
747 | void rstzip3::compress_preg(rstf_unionT * rstbuf, int idx) | |
748 | { | |
749 | rstf_pregT * pr = &(rstbuf[idx].preg); | |
750 | ||
751 | // cpuid: predict same as previous instr cpuid | |
752 | int cpuid = rstf_pre212 ? pr->cpuid : rstf_pregT_get_cpuid(pr); | |
753 | int cpuid_pred = (cpuid==pred_cpuid) ? 1 : 0; | |
754 | if (!cpuid_pred) { | |
755 | sdata->bitarrays[raw_cpuid_array]->Push(cpuid); | |
756 | } | |
757 | ||
758 | // pack cpuid_pred[61], primD[60:48], secD[47:35] asiReg{34:27], traplevel[26:24], traptype[23:16], pstate[15:0] in one 64-bit value | |
759 | uint64_t preg_info = (((uint64_t)cpuid_pred) << 61) | (((uint64_t)pr->primD) << 48) | (((uint64_t)pr->secD) << 35) | | |
760 | (((uint64_t)pr->asiReg) << 27) | (((uint64_t)pr->traplevel) << 24) | (((uint64_t)pr->traptype) << 16) | ((uint64_t)pr->pstate); | |
761 | sdata->bitarrays[raw_value64_array]->Push(preg_info); | |
762 | ||
763 | ||
764 | // primA and secA are not used - ignore | |
765 | } // void rstzip3::compress_preg(rstf_unionT * rstbuf, int idx) | |
766 | ||
767 | void rstzip3::compress_dma(rstf_unionT * rstbuf, int idx) | |
768 | { | |
769 | rstf_dmaT * dr = &(rstbuf[idx].dma); | |
770 | sdata->bitarrays[dma_iswrite_array]->Push(dr->iswrite); | |
771 | sdata->bitarrays[dma_nbytes_array]->Push(dr->nbytes); | |
772 | sdata->bitarrays[raw_value64_array]->Push(dr->start_pa); | |
773 | sdata->bitarrays[raw_value64_array]->Push(dr->devid); | |
774 | } // void rstzip3::compress_dma(rstf_unionT * rstbuf, int idx) | |
775 | ||
776 | void rstzip3::compress_regval(rstf_unionT * rstbuf, int idx) | |
777 | { | |
778 | // for now, try to compress the reg64 fields using the same mechanism as ea_va compression | |
779 | rstf_regvalT * vr = &(rstbuf[idx].regval); | |
780 | ||
781 | // cpuid | |
782 | int cpuid = rstf_pre212 ? vr->cpuid : rstf_regvalT_get_cpuid(vr); | |
783 | ||
784 | if (cpuid == last_instr_cpuid) { | |
785 | sdata->bitarrays[cpuid_pred_array]->Push(1); | |
786 | } else { | |
787 | sdata->bitarrays[cpuid_pred_array]->Push(0); | |
788 | sdata->bitarrays[raw_cpuid_array]->Push(cpuid); | |
789 | } | |
790 | ||
791 | // tdata | |
792 | if (tdata[cpuid] == NULL) { | |
793 | tdata[cpuid] = new rz3_percpu_data(cpuid); | |
794 | } | |
795 | ||
796 | // postInstr | |
797 | sdata->bitarrays[regval_postInstr_array]->Push(vr->postInstr); | |
798 | ||
799 | #if 0 | |
800 | // if prev instr can be emulated, regenerate values using emulation | |
801 | if (regen_value(vr, idx)) return; // FIXME: testing | |
802 | if (vr->regtype[0] == RSTREG_INT_RT) { | |
803 | tdata[cpuid]->regs[vr->regid[0]] = vr->reg64[0]; | |
804 | } | |
805 | if (vr->regtype[1] == RSTREG_INT_RT) { | |
806 | tdata[cpuid]->regs[vr->regid[1]] = vr->reg64[1]; | |
807 | } | |
808 | #endif | |
809 | ||
810 | // regtype, regid | |
811 | uint64_t prev_pc = tdata[cpuid]->prev_pc; | |
812 | int regtype_tbl_idx = (prev_pc >> 2) & (rz3_percpu_data::rz3_tdata_regval_regtype_tbl_size-1); | |
813 | int regid_tbl_idx = (prev_pc >> 2) & (rz3_percpu_data::rz3_tdata_regval_regid_tbl_size-1); | |
814 | ||
815 | int k; | |
816 | for (k=0; k<2; k++) { | |
817 | ||
818 | // predict regtype: use prev_instr | |
819 | ||
820 | uint8_t pred_regtype = tdata[cpuid]->regval_regtype_tbl[k][regtype_tbl_idx]; | |
821 | ||
822 | if (pred_regtype == vr->regtype[k]) { | |
823 | sdata->bitarrays[regval_regtype_pred_array]->Push(1); | |
824 | } else { | |
825 | sdata->bitarrays[regval_regtype_pred_array]->Push(0); | |
826 | sdata->bitarrays[regval_raw_regtype_array]->Push(vr->regtype[k]); | |
827 | tdata[cpuid]->regval_regtype_tbl[k][regtype_tbl_idx] = vr->regtype[k]; | |
828 | } | |
829 | ||
830 | if (vr->regtype[k] != RSTREG_UNUSED_RT) { | |
831 | ||
832 | // regid | |
833 | uint8_t pred_regid = tdata[cpuid]->regval_regid_tbl[k][regid_tbl_idx]; | |
834 | if (prev_rtype == REGVAL_T) { // probably in save/restore code: predict regid = prev_regid+2 | |
835 | pred_regid += 2; | |
836 | } | |
837 | if (pred_regid == vr->regid[k]) { | |
838 | sdata->bitarrays[regval_regid_pred_array]->Push(1); | |
839 | } else { | |
840 | sdata->bitarrays[regval_regid_pred_array]->Push(0); | |
841 | sdata->bitarrays[regval_raw_regid_array]->Push(vr->regid[k]); | |
842 | } | |
843 | // we always update update the table. | |
844 | // even if our prediction is correct, the predicted value is different from the value read from the table in case of save/restore | |
845 | tdata[cpuid]->regval_regid_tbl[k][regid_tbl_idx] = vr->regid[k]; | |
846 | ||
847 | // reg64 | |
848 | uint64_t v64 = vr->reg64[k]; | |
849 | ||
850 | if ((vr->regtype[k] == RSTREG_INT_RT) && (vr->regid[k] == 0)) { | |
851 | if (v64 != 0x0) { | |
852 | if (g0_nonzero_warn) { | |
853 | fprintf(stderr, "warning: rz3: compress_regval: int reg %%g0 has non-zero value %llx. will be ignored\n", v64); | |
854 | if (!verbose) { | |
855 | fprintf(stderr, " (further %%g0!=0 warnings will be suppressed)\n"); | |
856 | g0_nonzero_warn = false; | |
857 | } | |
858 | } | |
859 | } | |
860 | } | |
861 | ||
862 | if (v64 == 0) { | |
863 | sdata->bitarrays[value_iszero_array]->Push(1); | |
864 | } else { | |
865 | static int regval_vc_refs = 0; | |
866 | static int regval_vc_hits = 0; | |
867 | sdata->bitarrays[value_iszero_array]->Push(0); | |
868 | regval_vc_refs++; | |
869 | if (compress_value(cpuid, v64)) { | |
870 | regval_vc_hits++; | |
871 | } else { | |
872 | } | |
873 | ||
874 | if (regval_vc_refs % 1000 == 0) { | |
875 | // printf("regval vc refs %d hits %d (%0.4f%%)\n", regval_vc_refs, regval_vc_hits, 100.0*regval_vc_hits/regval_vc_refs); | |
876 | } | |
877 | } | |
878 | ||
879 | } // if regtype != UNUSED | |
880 | } // for reg field = 0,1 | |
881 | } // rstzip3::compress_regval | |
882 | ||
883 | void rstzip3::compress_memval(rstf_unionT * rstbuf, int idx) | |
884 | { | |
885 | // rtype: in raw rtype array | |
886 | // ismemval128: raw | |
887 | ||
888 | // addrisVA: raw | |
889 | // isContRec: ignore for m64; raw for m128 | |
890 | // cpuid: same as predicted cpuid for next instr | |
891 | ||
892 | // memval64.size: store raw size | |
893 | // memval64.addr: use valuecache | |
894 | // memval64.val: use valuecache | |
895 | ||
896 | // memval128.addr36_43: ignore if isContRec; raw otherwise | |
897 | // memval128.addr04_35: ignore if isContReg; raw otherwise | |
898 | ||
899 | // memval128.val[]: use valuecache | |
900 | ||
901 | rstf_memval64T * m64 = & (rstbuf[idx].memval64); | |
902 | rstf_memval128T * m128 = & (rstbuf[idx].memval128); | |
903 | ||
904 | sdata->bitarrays[memval_fields_array]->Push(m128->ismemval128); | |
905 | sdata->bitarrays[memval_fields_array]->Push(! m128->addrisVA); | |
906 | ||
907 | // cpuid | |
908 | int cpuid = rstf_pre212 ? m128->cpuid : rstf_memval128T_get_cpuid(m128); | |
909 | if (cpuid == pred_cpuid) { | |
910 | sdata->bitarrays[cpuid_pred_array]->Push(1); | |
911 | } else { | |
912 | sdata->bitarrays[cpuid_pred_array]->Push(0); | |
913 | sdata->bitarrays[raw_cpuid_array]->Push(cpuid); | |
914 | } | |
915 | ||
916 | if (tdata[cpuid] == NULL) { | |
917 | tdata[cpuid] = new rz3_percpu_data(cpuid); | |
918 | } | |
919 | ||
920 | if (m128->ismemval128) { | |
921 | sdata->bitarrays[memval_fields_array]->Push(m128->isContRec); | |
922 | if (! m128->isContRec) { | |
923 | sdata->bitarrays[memval_addr36_43_array]->Push(m128->addr36_43); | |
924 | sdata->bitarrays[memval_addr04_35_array]->Push(m128->addr04_35); | |
925 | } | |
926 | ||
927 | // vals | |
928 | ||
929 | compress_value(cpuid, m128->val[0]); | |
930 | ||
931 | compress_value(cpuid, m128->val[1]); | |
932 | ||
933 | } else /* memval64 */ { | |
934 | sdata->bitarrays[memval_size_array]->Push(m64->size-1); | |
935 | ||
936 | ||
937 | // predict addr using valuecache | |
938 | compress_value(cpuid, m64->addr); | |
939 | compress_value(cpuid, m64->val); | |
940 | ||
941 | } | |
942 | ||
943 | } // compress_memval | |
944 | ||
945 | ||
946 | void rstzip3::compress_rfs_cw(rstf_unionT * rstbuf, int idx) | |
947 | { | |
948 | rstf_cachewarmingT *cw = &(rstbuf[idx].cachewarming); | |
949 | ||
950 | // there is no architectural method to predict reftype. | |
951 | sdata->bitarrays[rfs_cw_raw_reftype_array]->Push(cw->reftype); | |
952 | ||
953 | // dont predict cpuid | |
954 | ||
955 | int cpuid; | |
956 | ||
957 | if ((cw->reftype == cw_reftype_DMA_R) || (cw->reftype == cw_reftype_DMA_W)) { | |
958 | cpuid = 0; | |
959 | } else { | |
960 | cpuid = rstf_cachewarmingT_get_cpuid(cw); | |
961 | } | |
962 | ||
963 | if (tdata[cpuid] == NULL) { | |
964 | // fprintf(stderr, "compress_rfs_cw: new cpuid %d\n", cpuid); | |
965 | tdata[cpuid] = new rz3_percpu_data(cpuid); | |
966 | } | |
967 | ||
968 | sdata->bitarrays[rfs_raw_cpuid_array]->Push(cpuid); | |
969 | ||
970 | if ((cw->reftype == cw_reftype_DMA_R)|| (cw->reftype == cw_reftype_DMA_W)) { | |
971 | sdata->bitarrays[raw_value64_array]->Push(cw->pa); | |
972 | ||
973 | sdata->bitarrays[rfs_cw_dma_size_array]->Push(cw->refinfo.dma_size); | |
974 | } else { | |
975 | // asi | |
976 | sdata->bitarrays[rfs_cw_asi_array]->Push(cw->refinfo.s.asi); | |
977 | ||
978 | // fcn | |
979 | if (cw->reftype==cw_reftype_PF_D) { | |
980 | sdata->bitarrays[rfs_cw_pf_fcn_array]->Push(cw->refinfo.s.fcn); | |
981 | } | |
982 | ||
983 | // va_valid | |
984 | sdata->bitarrays[rfs_cw_va_valid_array]->Push(cw->refinfo.s.va_valid); | |
985 | ||
986 | if (cw->refinfo.s.va_valid) { | |
987 | ||
988 | compress_value(cpuid, cw->va); | |
989 | ||
990 | // tlb hit/miss | |
991 | uint64_t pred_pa; | |
992 | if (cw->reftype == cw_reftype_I) { | |
993 | pred_pa = tdata[cpuid]->itlb->get(cw->va>>13) << 13; | |
994 | } else { | |
995 | pred_pa = tdata[cpuid]->dtlb->get(cw->va>>13) << 13; | |
996 | } | |
997 | pred_pa |= (cw->va & 0x1fffull); | |
998 | if (pred_pa != cw->pa) { | |
999 | sdata->bitarrays[rfs_cw_pa_pred_array]->Push(0); | |
1000 | sdata->bitarrays[raw_value64_array]->Push(cw->pa); | |
1001 | ||
1002 | if (cw->reftype == cw_reftype_I) { | |
1003 | tdata[cpuid]->itlb->set(cw->va>>13, cw->pa>>13); | |
1004 | } else { | |
1005 | tdata[cpuid]->dtlb->set(cw->va>>13, cw->pa>>13); | |
1006 | } | |
1007 | } else { | |
1008 | sdata->bitarrays[rfs_cw_pa_pred_array]->Push(1); | |
1009 | } | |
1010 | } else /* va invalid - no way to predict pa? */ { | |
1011 | sdata->bitarrays[raw_value64_array]->Push(cw->pa); | |
1012 | } | |
1013 | } | |
1014 | } // rstzip3::compress_rfs_cw(rstf_unionT * rstbuf, int idx) | |
1015 | ||
1016 | ||
1017 | void rstzip3::compress_rfs_bt(rstf_unionT * rstbuf, int idx) | |
1018 | { | |
1019 | rstf_bpwarmingT * bt = &(rstbuf[idx].bpwarming); | |
1020 | ||
1021 | // a bt record consists of cpuid, taken, instr, pc_va, npc_va | |
1022 | ||
1023 | // no easy way to compress cpuid: store raw | |
1024 | int cpuid = rstf_bpwarmingT_get_cpuid(bt); | |
1025 | sdata->bitarrays[rfs_raw_cpuid_array]->Push(cpuid); | |
1026 | if (tdata[cpuid] == NULL) { | |
1027 | tdata[cpuid] = new rz3_percpu_data(cpuid); | |
1028 | } | |
1029 | ||
1030 | // pc | |
1031 | uint64_t pred_pc = tdata[cpuid]->rfs_pc_pred_table->get(tdata[cpuid]->rfs_prev_npc); | |
1032 | if (pred_pc == bt->pc_va) { | |
1033 | sdata->bitarrays[rfs_pc_pred_array]->Push(1); | |
1034 | } else { | |
1035 | sdata->bitarrays[rfs_pc_pred_array]->Push(0); | |
1036 | sdata->bitarrays[raw_value64_array]->Push(bt->pc_va>>2); | |
1037 | ||
1038 | tdata[cpuid]->rfs_pc_pred_table->set(tdata[cpuid]->rfs_prev_npc, bt->pc_va); | |
1039 | } | |
1040 | ||
1041 | // instr: use icache | |
1042 | rz3iu_icache_data * icdata = tdata[cpuid]->icache->get(bt->pc_va); | |
1043 | uint32_t instr = bt->instr; | |
1044 | if ((icdata == NULL) || (icdata->instr != instr)) { | |
1045 | // ic miss | |
1046 | sdata->bitarrays[rfs_instr_pred_array]->Push(0); | |
1047 | sdata->bitarrays[raw_instr_array]->Push(instr); | |
1048 | icdata = tdata[cpuid]->icache->set(bt->pc_va, instr, rstzip3_major_version, rstzip3_minor_version); | |
1049 | icdata->gen_target(bt->pc_va); | |
1050 | } else { | |
1051 | sdata->bitarrays[rfs_instr_pred_array]->Push(1); | |
1052 | } | |
1053 | ||
1054 | // bt | |
1055 | int bt_pred_hit; | |
1056 | if (icdata->dinfo.flags.iscbranch) { | |
1057 | bt_pred_hit = tdata[cpuid]->bp->pred_hit(bt->pc_va, bt->taken); | |
1058 | if (!bt_pred_hit) perf_stats[ps_brpred_misses]++; | |
1059 | } else if (icdata->dinfo.flags.isubranch && icdata->dinfo.flags.isubranch_nottaken) { | |
1060 | bt_pred_hit = ! bt->taken; // in other words, we predict uncond nt branches as not taken. if the taken bit is 0, then our prediction is correct (1) and vice versa | |
1061 | } else { | |
1062 | bt_pred_hit = bt->taken; // in other words, we predict all other branches as taken | |
1063 | } | |
1064 | ||
1065 | sdata->bitarrays[rfs_bt_pred_array]->Push(bt_pred_hit); | |
1066 | ||
1067 | // target | |
1068 | uint64_t pred_npc_va; | |
1069 | if (bt->taken) { | |
1070 | pred_npc_va = icdata->target; | |
1071 | } else { | |
1072 | pred_npc_va = bt->pc_va + 8; | |
1073 | } | |
1074 | if (pred_npc_va == bt->npc_va) { | |
1075 | sdata->bitarrays[dcti_ea_va_pred_array]->Push(1); | |
1076 | } else { | |
1077 | sdata->bitarrays[dcti_ea_va_pred_array]->Push(0); | |
1078 | sdata->bitarrays[raw_value64_array]->Push(bt->npc_va); | |
1079 | } | |
1080 | ||
1081 | tdata[cpuid]->rfs_prev_npc = bt->npc_va; | |
1082 | ||
1083 | tdata[cpuid]->pred_pc = tdata[cpuid]->rfs_pc_pred_table->get(bt->npc_va); | |
1084 | ||
1085 | } // rstzip3::compress_rstf_bt(rfs_unionT * rstbuf, int idx) | |
1086 | ||
1087 | ||
1088 | ||
1089 | // return true if could compress using valuecache | |
1090 | bool rstzip3::compress_value(int cpuid, uint64_t v64) | |
1091 | { | |
1092 | if (tdata[cpuid] == NULL) { | |
1093 | tdata[cpuid] = new rz3_percpu_data(cpuid); | |
1094 | } | |
1095 | ||
1096 | uint64_t key; | |
1097 | int level = tdata[cpuid]->valuecache->Ref(v64, key); | |
1098 | sdata->bitarrays[valuecache_level_array]->Push(level); | |
1099 | sdata->bitarrays[valuecache_data0_array+level]->Push(key); | |
1100 | ||
1101 | return (level < 7); | |
1102 | } | |
1103 | ||
1104 | ||
1105 | ||
1106 | ||
1107 | ||
1108 | #if 0 // leave this obsolete code in here. it is useful for making sense of the decompress_pavadiff_v315 code in decompress_engine.C | |
1109 | void rstzip3::compress_pavadiff_v315(rstf_unionT * rstbuf, int idx) | |
1110 | { | |
1111 | rstf_pavadiffT * dr = &(rstbuf[idx].pavadiff); | |
1112 | int cpuid = rstf_pavadiffT_get_cpuid(dr); | |
1113 | ||
1114 | // check and predict cpuid | |
1115 | if (pred_cpuid == cpuid) { | |
1116 | sdata->bitarrays[cpuid_pred_array]->Push(1); | |
1117 | } else { | |
1118 | sdata->bitarrays[cpuid_pred_array]->Push(0); | |
1119 | sdata->bitarrays[raw_cpuid_array]->Push(cpuid); | |
1120 | } | |
1121 | pred_cpuid = cpuid; | |
1122 | ||
1123 | if (tdata[cpuid] == NULL) { | |
1124 | tdata[cpuid] = new rz3_percpu_data(cpuid); | |
1125 | } | |
1126 | ||
1127 | // predict icontext the same as prev icontext | |
1128 | if (tdata[cpuid]->pred_icontext == dr->icontext) { | |
1129 | sdata->bitarrays[pavadiff_ictxt_pred_array]->Push(1); | |
1130 | } else { | |
1131 | sdata->bitarrays[pavadiff_ictxt_pred_array]->Push(0); | |
1132 | sdata->bitarrays[pavadiff_raw_ictxt_array]->Push(dr->icontext); | |
1133 | tdata[cpuid]->pred_icontext = dr->icontext; | |
1134 | } | |
1135 | ||
1136 | // dcontext - predict same as prev dcontext for this cpu | |
1137 | if (tdata[cpuid]->pred_dcontext == dr->dcontext) { | |
1138 | sdata->bitarrays[pavadiff_dctxt_pred_array]->Push(1); | |
1139 | } else { | |
1140 | sdata->bitarrays[pavadiff_dctxt_pred_array]->Push(0); | |
1141 | sdata->bitarrays[pavadiff_raw_dctxt_array]->Push(dr->dcontext); | |
1142 | tdata[cpuid]->pred_dcontext = dr->dcontext; | |
1143 | } | |
1144 | ||
1145 | bool found_pc_va = false; | |
1146 | uint64_t nextpc_va; | |
1147 | bool found_ea_va = false; | |
1148 | uint64_t nextea_va; | |
1149 | ||
1150 | int i; | |
1151 | for (i=idx+1; i<shdr->nrecords; i++) { | |
1152 | if (rstbuf[i].proto.rtype == INSTR_T) { | |
1153 | if (rstf_instrT_get_cpuid(&rstbuf[i].instr) == cpuid) { | |
1154 | nextpc_va = rstbuf[i].instr.pc_va; | |
1155 | found_pc_va = (nextpc_va != 0x0); | |
1156 | if (dr->ea_valid && rstbuf[i].instr.ea_valid) { // we only care about ea_va if dr->ea_valid | |
1157 | nextea_va = rstbuf[i].instr.ea_va; | |
1158 | found_ea_va = (nextea_va != 0x0); | |
1159 | } | |
1160 | } // if cpuid match | |
1161 | break; | |
1162 | } // if instr | |
1163 | } // for each subsequent record | |
1164 | ||
1165 | // ea_valid | |
1166 | sdata->bitarrays[pavadiff_ea_valid_array]->Push(dr->ea_valid); | |
1167 | ||
1168 | bool pc_pa_va_hit; | |
1169 | bool ea_pa_va_hit; | |
1170 | ||
1171 | uint64_t pred_pa_va_diff; | |
1172 | ||
1173 | if (found_pc_va) { | |
1174 | pred_pa_va_diff = tdata[cpuid]->itlb->get(nextpc_va >> 13); | |
1175 | } else { | |
1176 | pred_pa_va_diff = 42; // some nonsensical value | |
1177 | } | |
1178 | ||
1179 | if (pred_pa_va_diff == (dr->pc_pa_va>>13)) { | |
1180 | sdata->bitarrays[pavadiff_pc_pa_va_pred_array]->Push(1); | |
1181 | pc_pa_va_hit = true; | |
1182 | } else { | |
1183 | sdata->bitarrays[pavadiff_pc_pa_va_pred_array]->Push(0); | |
1184 | sdata->bitarrays[raw_value64_array]->Push(dr->pc_pa_va); | |
1185 | ||
1186 | if (found_pc_va) { | |
1187 | tdata[cpuid]->itlb->set(nextpc_va>>13, dr->pc_pa_va>>13); | |
1188 | pc_pa_va_hit = false; | |
1189 | } | |
1190 | } | |
1191 | ||
1192 | ||
1193 | if (dr->ea_valid) { | |
1194 | ||
1195 | // ea_pa_va - use next instr (if available) and a tlb simulator | |
1196 | if (found_ea_va) { | |
1197 | // tlb lookup | |
1198 | pred_pa_va_diff = tdata[cpuid]->dtlb->get(nextea_va >> 13); | |
1199 | } else { | |
1200 | pred_pa_va_diff = 42; // some nonsensical value | |
1201 | } | |
1202 | ||
1203 | if (pred_pa_va_diff == (dr->ea_pa_va >> 13)) { | |
1204 | sdata->bitarrays[pavadiff_ea_pa_va_pred_array]->Push(1); | |
1205 | ea_pa_va_hit = true; | |
1206 | } else { | |
1207 | sdata->bitarrays[pavadiff_ea_pa_va_pred_array]->Push(0); | |
1208 | sdata->bitarrays[raw_value64_array]->Push(dr->ea_pa_va); | |
1209 | ||
1210 | if (found_ea_va) { | |
1211 | tdata[cpuid]->dtlb->set((nextea_va >> 13), (dr->ea_pa_va >> 13)); | |
1212 | ea_pa_va_hit = false; | |
1213 | } | |
1214 | } | |
1215 | } else { | |
1216 | ea_pa_va_hit = false; | |
1217 | } // if ea_valid | |
1218 | ||
1219 | // the lookahead flag tells the decompressor to look for the next instr (to update the tlb) | |
1220 | // if we predicted pc_pa_va and/or ea_pa_va correctly, the decompressor knows from the pred bit to lookahead. | |
1221 | // we set the lookahead flag so that the decomprssor knows the difference between no prediction (could not find corresponding instr) and misprediction | |
1222 | ||
1223 | if ((found_pc_va && pc_pa_va_hit) || (dr->ea_valid && found_ea_va && ea_pa_va_hit)) { | |
1224 | // dont need lookahead since the pc_pa_va_pred_array and/or the ea_pa_va_pred_array will indicate lookahead | |
1225 | } else { | |
1226 | // we need to indicate whether there was no prediction or misprediction(s) | |
1227 | int lookahead_flag = (found_pc_va || found_ea_va); | |
1228 | sdata->bitarrays[pavadiff_lookahead_array]->Push(lookahead_flag); | |
1229 | } | |
1230 | } // rstzip3::compress_pavadiff() | |
1231 | #endif // #if 0 (obsolete code - left here as a reference for the corresponding decompress code | |
1232 | ||
1233 | ||
1234 |