Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | /* |
2 | * ========== Copyright Header Begin ========================================== | |
3 | * | |
4 | * OpenSPARC T2 Processor File: rz3iu.h | |
5 | * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. | |
6 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. | |
7 | * | |
8 | * The above named program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public | |
10 | * License version 2 as published by the Free Software Foundation. | |
11 | * | |
12 | * The above named program is distributed in the hope that it will be | |
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public | |
18 | * License along with this work; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
20 | * | |
21 | * ========== Copyright Header End ============================================ | |
22 | */ | |
23 | /* rz3iu.h | |
24 | * rz3 instr unit data structures | |
25 | */ | |
26 | ||
27 | #ifndef _rz3iu_h_ | |
28 | #define _rz3iu_h_ | |
29 | ||
30 | #include <stdio.h> | |
31 | #include <sys/types.h> | |
32 | #include <stdlib.h> | |
33 | #include <assert.h> | |
34 | #include <string.h> | |
35 | ||
36 | #include "rz3utils.h" | |
37 | ||
38 | // #include "spix_sparc.h" | |
39 | ||
40 | #include "rz_insttypes.h" | |
41 | ||
42 | /* we use a simple fast branch predictor similar to the one on cheetah | |
43 | * the bpa is a 4k array of 2-bit up/down saturating counters | |
44 | * the bpa is indexed by a combination of the branch PC and global branch history | |
45 | */ | |
46 | struct rz3iu_brpred { | |
47 | ||
48 | rz3iu_brpred() { | |
49 | bpa = new uint8_t [1<<14]; | |
50 | clear(); | |
51 | int i; | |
52 | for (i=0; i<4; i++) { | |
53 | update_taken[i] = i+1; | |
54 | update_nottaken[i] = i-1; | |
55 | } | |
56 | // up-down counters saturate at strongly taken/not-taken | |
57 | update_taken[3] = 3; | |
58 | update_nottaken[0] = 0; | |
59 | } | |
60 | ||
61 | ~rz3iu_brpred() { | |
62 | delete [] bpa; | |
63 | } | |
64 | ||
65 | void clear() { | |
66 | int i; | |
67 | memset(bpa, 0, (1<<14)); | |
68 | bhr = 0x0; | |
69 | } | |
70 | ||
71 | enum brpred_feedback_e { bp_feedback_INV=0, bp_no_feedback, bp_feedback_hit, bp_feedback_miss }; | |
72 | ||
73 | // there are two operating modes of the predictor: | |
74 | // 1. we know the actual outcome of the branch. generate a prediction and return the *accuracy* of the prediction | |
75 | // 2. we know the *accuracy* of the prediction. generate the prediction and return the *actual outcome* of the branch | |
76 | ||
77 | int pred_hit(uint64_t pc, int actual_outcome) | |
78 | { | |
79 | int index = (int) (((pc>>2) & 0x3fff) ^ (bhr << 2)); | |
80 | uint8_t countervalue = bpa[index]; | |
81 | int pred = (countervalue >> 1); | |
82 | bpa[index] = actual_outcome ? update_taken[countervalue] : update_nottaken[countervalue]; | |
83 | return (pred == actual_outcome); | |
84 | } | |
85 | ||
86 | int actual_outcome(uint64_t pc, int pred_hit) | |
87 | { | |
88 | int index = (int) (((pc>>2) & 0x3fff) ^ (bhr << 2)); | |
89 | uint8_t countervalue = bpa[index]; | |
90 | int pred = (countervalue >> 1); | |
91 | int actual_outcome = (pred == pred_hit); | |
92 | bpa[index] = actual_outcome ? update_taken[countervalue] : update_nottaken[countervalue]; | |
93 | return actual_outcome; | |
94 | } | |
95 | ||
96 | uint8_t * bpa; | |
97 | uint16_t bhr; // 12-bit branch history | |
98 | uint8_t update_taken[4]; | |
99 | uint8_t update_nottaken[4]; | |
100 | }; // struct rz3iu_brpred | |
101 | ||
102 | ||
103 | union dcti_info_u { | |
104 | struct dcti_flags { | |
105 | unsigned isdcti : 1; | |
106 | unsigned isbranch : 1; | |
107 | unsigned iscbranch : 1; | |
108 | unsigned isubranch : 1; | |
109 | unsigned isubranch_nottaken : 1; | |
110 | unsigned annul_flag : 1; | |
111 | unsigned iscall : 1; | |
112 | unsigned isindirect : 1; // jmpl | |
113 | unsigned is_retl : 1; | |
114 | unsigned is_ret : 1; | |
115 | unsigned isBPcc : 1; | |
116 | unsigned isBPR : 1; | |
117 | unsigned isFBfcc : 1; | |
118 | unsigned isFBPfcc : 1; | |
119 | unsigned isBicc : 1; | |
120 | ||
121 | unsigned is_done_retry : 1; | |
122 | } flags; | |
123 | ||
124 | unsigned u32; | |
125 | }; // union dcti_info_u | |
126 | ||
127 | ||
128 | ||
129 | ||
130 | // union dcti_info_u gen_dcti_info(uint32_t instr, spix_sparc_iop_t iop); | |
131 | union dcti_info_u gen_dcti_info(uint32_t instr); | |
132 | union dcti_info_u gen_dcti_info_v317(uint32_t instr); | |
133 | ||
134 | /* instructions that we emulate to regenerate value records: | |
135 | * AND, OR, ANDCC, ORCC, XOR | |
136 | * ADD, ADDCC, SUB, SUBCC | |
137 | * SETHI | |
138 | * SRLX, SLLX, SRA, SLL, SRA | |
139 | */ | |
140 | ||
141 | /* icache with predecode information for DCTIs */ | |
142 | struct rz3iu_icache_data { | |
143 | uint32_t instr; | |
144 | // spix_sparc_iop_t iop; | |
145 | union dcti_info_u dinfo; | |
146 | uint64_t target; | |
147 | ||
148 | bool is_ldstpf; | |
149 | ||
150 | void gen_target(uint64_t pc); | |
151 | }; // struct rz3iu_icache_data | |
152 | ||
153 | ||
154 | ||
155 | ||
156 | static const int rz3iu_icache_bshift = 0; // 8-instr block | |
157 | static const uint64_t rz3iu_icache_bsize = 1<<rz3iu_icache_bshift; | |
158 | static const uint64_t rz3iu_icache_size = 256<<10; | |
159 | static const uint64_t rz3iu_icache_sets = rz3iu_icache_size >> 1+rz3iu_icache_bshift; // 2-way set-assoc | |
160 | static const uint64_t rz3iu_icache_blocks = rz3iu_icache_size >> rz3iu_icache_bshift; | |
161 | ||
162 | struct rz3iu_icache { | |
163 | uint64_t * tags; | |
164 | rz3iu_icache_data * data; | |
165 | uint8_t * lru; | |
166 | ||
167 | rz3iu_icache() { | |
168 | tags = new uint64_t [rz3iu_icache_blocks]; | |
169 | data = new rz3iu_icache_data [rz3iu_icache_size]; | |
170 | lru = new uint8_t [rz3iu_icache_sets]; | |
171 | clear(); | |
172 | } | |
173 | ||
174 | ~rz3iu_icache() { | |
175 | delete [] tags; | |
176 | delete [] data; | |
177 | delete [] lru; | |
178 | } | |
179 | ||
180 | void clear() { | |
181 | memset(tags, 0, rz3iu_icache_blocks*sizeof(uint64_t)); | |
182 | memset(data, 0, rz3iu_icache_size * sizeof(rz3iu_icache_data)); | |
183 | memset(lru, 0, rz3iu_icache_sets * sizeof(uint8_t)); | |
184 | } | |
185 | ||
186 | rz3iu_icache_data * set(uint64_t pc, uint32_t instr, uint8_t rz3_major_version, uint8_t rz3_minor_version) { | |
187 | // replace lru | |
188 | uint64_t tag; | |
189 | int idx; | |
190 | int offset; | |
191 | tag_idx_ofs(pc, tag, idx, offset); | |
192 | int lruway; | |
193 | if (tags[idx] == tag) { | |
194 | lruway = 0; | |
195 | lru[idx] = 1; | |
196 | } else if (tags[idx+rz3iu_icache_sets] == tag) { | |
197 | lruway = 1; | |
198 | lru[idx] = 0; | |
199 | } else { | |
200 | lruway = lru[idx]; | |
201 | } | |
202 | ||
203 | int w = idx + (lruway ? rz3iu_icache_sets : 0); | |
204 | ||
205 | tags[w] = tag; | |
206 | int loc = (w << rz3iu_icache_bshift) | offset; | |
207 | rz3iu_icache_data * icdata = &(data[loc]); | |
208 | icdata->instr = instr; | |
209 | // icdata->iop = spix_sparc_iop(SPIX_SPARC_V9, &(instr)); | |
210 | // icdata->dinfo = gen_dcti_info(instr, icdata->iop); | |
211 | if (rz3_minor_version > 17) { | |
212 | icdata->dinfo = gen_dcti_info(instr); | |
213 | } else { | |
214 | icdata->dinfo = gen_dcti_info_v317(instr); | |
215 | } | |
216 | icdata->target = 0x0; // target of cti inst | |
217 | if (!icdata->dinfo.flags.isdcti && ! icdata->dinfo.flags.is_done_retry) { | |
218 | // icdata->is_ldstpf = (spix_sparc_iop_isload(icdata->iop) || spix_sparc_iop_isustore(icdata->iop) || spix_sparc_iop_iscstore(icdata->iop) || (icdata->iop == SPIX_SPARC_IOP_PREFETCH)); | |
219 | icdata->is_ldstpf = rz_is_ldstpf(instr); | |
220 | } else { | |
221 | icdata->is_ldstpf = false; | |
222 | } | |
223 | return icdata; | |
224 | } | |
225 | ||
226 | struct rz3iu_icache_data * get(uint64_t pc) { | |
227 | uint64_t tag; | |
228 | int idx; | |
229 | int offset; | |
230 | tag_idx_ofs(pc, tag, idx, offset); | |
231 | if (tags[idx] == tag) { | |
232 | int loc = (idx << rz3iu_icache_bshift); | |
233 | loc |= offset; | |
234 | lru[idx] = 1; | |
235 | return &(data[loc]); | |
236 | } else if (tags[idx+rz3iu_icache_sets] == tag) { | |
237 | int loc = (idx+rz3iu_icache_sets) << rz3iu_icache_bshift; | |
238 | loc |= offset; | |
239 | lru[idx] = 0; | |
240 | return & (data[loc]); | |
241 | } else { | |
242 | return NULL; | |
243 | } | |
244 | } | |
245 | ||
246 | void tag_idx_ofs(uint64_t pc, uint64_t & tag, int & idx, int & ofs) | |
247 | { | |
248 | tag = (pc >> (2+rz3iu_icache_bshift)); | |
249 | idx = (int) (tag & (rz3iu_icache_sets - 1)); | |
250 | ofs = (pc >> 2) & (rz3iu_icache_bsize - 1); | |
251 | } | |
252 | }; // struct rz3iu_icache | |
253 | ||
254 | ||
255 | struct rz3_ras { | |
256 | ||
257 | enum consts_e { ras_sz = 16 }; | |
258 | uint64_t arr[ras_sz]; | |
259 | int top; | |
260 | int n; | |
261 | ||
262 | rz3_ras() { | |
263 | clear(); | |
264 | } | |
265 | ||
266 | void clear() { | |
267 | n = 0; | |
268 | top = 0; | |
269 | } | |
270 | ||
271 | void push(uint64_t pc) { | |
272 | int idx = (top+1) % ras_sz; | |
273 | arr[idx] = pc; | |
274 | top = idx; | |
275 | if (n<ras_sz) n++; | |
276 | } | |
277 | ||
278 | uint64_t pop() { | |
279 | if (n == 0) return 0x0; | |
280 | uint64_t rv = arr[top]; | |
281 | if (top) { | |
282 | top--; | |
283 | } else { | |
284 | top = ras_sz-1; | |
285 | } | |
286 | n--; | |
287 | return rv; | |
288 | } | |
289 | }; // rz3_ras | |
290 | ||
291 | #endif // _rz3iu_util_h_ | |
292 |