Commit | Line | Data |
---|---|---|
86530b38 AT |
1 | // ========== Copyright Header Begin ========================================== |
2 | // | |
3 | // OpenSPARC T2 Processor File: MCUStub_class.vr | |
4 | // Copyright (C) 1995-2007 Sun Microsystems, Inc. All Rights Reserved | |
5 | // 4150 Network Circle, Santa Clara, California 95054, U.S.A. | |
6 | // | |
7 | // * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
8 | // | |
9 | // This program is free software; you can redistribute it and/or modify | |
10 | // it under the terms of the GNU General Public License as published by | |
11 | // the Free Software Foundation; version 2 of the License. | |
12 | // | |
13 | // This program is distributed in the hope that it will be useful, | |
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | // GNU General Public License for more details. | |
17 | // | |
18 | // You should have received a copy of the GNU General Public License | |
19 | // along with this program; if not, write to the Free Software | |
20 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
21 | // | |
22 | // For the avoidance of doubt, and except that if any non-GPL license | |
23 | // choice is available it will apply instead, Sun elects to use only | |
24 | // the General Public License version 2 (GPLv2) at this time for any | |
25 | // software where a choice of GPL license versions is made | |
26 | // available with the language indicating that GPLv2 or any later version | |
27 | // may be used, or where a choice of which version of the GPL is applied is | |
28 | // otherwise unspecified. | |
29 | // | |
30 | // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
31 | // CA 95054 USA or visit www.sun.com if you need additional information or | |
32 | // have any questions. | |
33 | // | |
34 | // ========== Copyright Header End ============================================ | |
35 | #include <vera_defines.vrh> | |
36 | ||
37 | #include <std_display_defines.vri> | |
38 | ||
39 | // L2/MCU interface | |
40 | #include <MCUStub.if.vrh> | |
41 | #include <MCUStub.bind.vrh> | |
42 | // all externs, etc | |
43 | #include <globals.vri> | |
44 | // std disp | |
45 | #include <std_display_class.vrh> | |
46 | // global memory array gMem | |
47 | #include <memArray.vrh> | |
48 | // bench paramaters/knobs | |
49 | #include <baseParamsClass.vrh> | |
50 | #include <sparcParams.vrh> | |
51 | ||
52 | ||
53 | ||
54 | // print stuff | |
55 | //#define DEBUGMCU | |
56 | ||
57 | //#define IDLE_DATA {urandom(),urandom(),urandom(),urandom()} | |
58 | #define IDLE_DATA 128'hDEAD_BEEF_DEAD_BEEF_DEAD_BEEF_DEAD_BEEF | |
59 | #define IDLE_ECC 28'hBCD_ABCD | |
60 | #define READ 0 | |
61 | #define WRITE 1 | |
62 | ||
63 | ///////////////////////////////////////////////////////////////////////////////// | |
64 | // MCU Stub class | |
65 | ///////////////////////////////////////////////////////////////////////////////// | |
66 | ||
67 | ||
68 | class ReadPacket { | |
69 | ||
70 | reg [511:0] cacheline; | |
71 | reg [39:0] address; // holds address [39:7] and [5], others = 0; | |
72 | integer req_id; | |
73 | reg dummy; | |
74 | ||
75 | rand integer req_to_ack_delay; | |
76 | rand integer ack_to_data_delay; | |
77 | rand integer intra_data_delay; | |
78 | ||
79 | constraint delays { | |
80 | req_to_ack_delay >= gParam.mcuReq2ackDelayMin; | |
81 | req_to_ack_delay <= gParam.mcuReq2ackDelayMax; | |
82 | ack_to_data_delay >= gParam.mcuAck2dataDelayMin; | |
83 | ack_to_data_delay <= gParam.mcuAck2dataDelayMax; | |
84 | intra_data_delay >= gParam.mcuIntraDataDelayMin; | |
85 | intra_data_delay <= gParam.mcuIntraDataDelayMax; | |
86 | } | |
87 | ||
88 | task new() { | |
89 | void = this.randomize(); | |
90 | address = 0; | |
91 | dummy = 0; | |
92 | } | |
93 | ||
94 | } | |
95 | ||
96 | // If I have a rd and wr to the same block at the same time, the wr must always | |
97 | // go first. If a write is active, delay the read until it is done. Always do | |
98 | // mem array writes in zero time and always do mem array reads when driving the wires. | |
99 | class MCUStub_class { | |
100 | ||
101 | local mcu_port L2port; | |
102 | local mcu_data_port L2dataPort; | |
103 | static local integer interface_lock [4]; | |
104 | local reg [2:0] instance; | |
105 | local reg [1:0] pair; // what pair of L2 banks we are talking to | |
106 | local integer readRespBox; | |
107 | local integer outstandingReads; | |
108 | local integer outstandingDummy; | |
109 | //local reg [63:0] rdReqId [8]; // track the (up to) 8 read addresses that are active | |
110 | local reg [31:0] activeWrBlk; // this address is being written NOW. Do not read it. | |
111 | task new(mcu_port portvar, | |
112 | mcu_data_port dataportvar, | |
113 | integer instance); | |
114 | local task CollectReadReq(); | |
115 | local task CollectWriteReq(); | |
116 | local task InjectReadData(ReadPacket read_pkt, integer dummy=0); | |
117 | local function reg [6:0] DECC(reg [31:0] d); | |
118 | ||
119 | } | |
120 | ||
121 | ||
122 | task MCUStub_class::new(mcu_port portvar, | |
123 | mcu_data_port dataportvar, | |
124 | integer instance) | |
125 | { | |
126 | ||
127 | integer i; | |
128 | bit [31:0] tmp; | |
129 | ||
130 | ||
131 | // lock for the 4 shared data buses | |
132 | for (i=0;i<4;i++) interface_lock[i] = alloc(SEMAPHORE,0,1,1); | |
133 | ||
134 | activeWrBlk = 32'hffffffff; | |
135 | ||
136 | readRespBox = alloc (MAILBOX,0,1); | |
137 | outstandingReads = 0; | |
138 | outstandingDummy = 0; | |
139 | L2port = portvar; | |
140 | L2dataPort = dataportvar; | |
141 | ||
142 | this.instance = instance; | |
143 | // what pair of L2 banks we are talking to on a common bus | |
144 | this.pair = this.instance >> 1; | |
145 | ||
146 | L2port.$mcu_l2t_rd_ack= 0; | |
147 | L2port.$mcu_l2t_rd_req_id_r0 = 0; | |
148 | L2port.$mcu_l2t_data_vld_r0 = 0; | |
149 | L2dataPort.$mcu_l2b_data_r2 = 0; | |
150 | L2port.$mcu_l2t_qword_id_r0 = 0; | |
151 | L2dataPort.$mcu_l2b_ecc_r2 = 0; | |
152 | ||
153 | L2port.$mcu_l2t_secc_err_r2 = 0; | |
154 | L2port.$mcu_l2t_mecc_err_r2 = 0; | |
155 | L2port.$mcu_l2t_scb_secc_err = 0; | |
156 | L2port.$mcu_l2t_scb_mecc_err = 0; | |
157 | ||
158 | L2port.$mcu_l2t_wr_ack = 0; | |
159 | ||
160 | ||
161 | fork | |
162 | { | |
163 | CollectReadReq(); | |
164 | } | |
165 | ||
166 | { | |
167 | CollectWriteReq(); | |
168 | } | |
169 | join none | |
170 | ||
171 | } //end MCUStub_class::new | |
172 | ||
173 | ||
174 | task MCUStub_class::CollectReadReq() { | |
175 | ||
176 | ReadPacket read_pkt; | |
177 | integer qword_id, dummy=0; | |
178 | ||
179 | while (1) { | |
180 | ||
181 | @(posedge L2port.$l2t_mcu_rd_req); | |
182 | if (L2port.$l2t_mcu_rd_dummy_req) dummy = 1; | |
183 | ||
184 | read_pkt = new(); | |
185 | read_pkt.address[39:7] = L2port.$l2t_mcu_addr; | |
186 | read_pkt.address[6] = instance[0]; | |
187 | read_pkt.address[5] = L2port.$l2t_mcu_addr_5; | |
188 | read_pkt.req_id = L2port.$l2t_mcu_rd_req_id; | |
189 | ||
190 | if (gParam.mcuMemPrint[READ]) printf("%0d MCUStub_class::CollectReadReq (MCU#%0d) ID%0d read req addr [39:0]=%h (%h) dummy=%0d\n", get_cycle(),instance,read_pkt.req_id,read_pkt.address[39:0],L2port.$l2t_mcu_addr,dummy); | |
191 | ||
192 | // do not respond if we have 8 requests in flight | |
193 | // or 1 dummy. | |
194 | if (outstandingReads >= 8) wait_var(outstandingReads); | |
195 | if (outstandingDummy && dummy) wait_var(outstandingDummy); | |
196 | ||
197 | repeat(read_pkt.req_to_ack_delay) @(posedge L2port.$clk); | |
198 | ||
199 | L2port.$mcu_l2t_rd_ack = 1; | |
200 | ||
201 | #ifdef DEBUGMCU | |
202 | printf("%0d MCUStub_class::CollectReadReq (MCU#%0d) ID%0d %0h asserting mcu_l2t_rd_ack after delay of %0d. dummy=%0d\n",get_cycle(),instance,read_pkt.req_id,read_pkt.address[39:0],read_pkt.req_to_ack_delay,dummy); | |
203 | #endif | |
204 | ||
205 | @(posedge L2port.$clk); | |
206 | L2port.$mcu_l2t_rd_ack = 0; | |
207 | ||
208 | if (dummy == 0) { | |
209 | outstandingReads++; | |
210 | InjectReadData(read_pkt); // not blocking | |
211 | } | |
212 | else { | |
213 | read_pkt.dummy = 1; | |
214 | read_pkt.cacheline = 0; | |
215 | // one outstanding dummy read permitted | |
216 | outstandingReads++; | |
217 | outstandingDummy = 1; | |
218 | InjectReadData(read_pkt, dummy); // not blocking | |
219 | dummy = 0; | |
220 | } | |
221 | ||
222 | } | |
223 | } | |
224 | ||
225 | ||
226 | ||
227 | task MCUStub_class::InjectReadData(ReadPacket read_pkt, integer dummy=0) { | |
228 | ||
229 | reg [63:0] temp_data[8]; | |
230 | reg [27:0] temp_ecc; | |
231 | integer i; | |
232 | reg [511:0] temp_line; | |
233 | integer qword_id, qword_idi; | |
234 | reg [31:0] tmp32; | |
235 | ||
236 | // fork off a delayed response for this request | |
237 | fork { | |
238 | ||
239 | #ifdef DEBUGMCU | |
240 | printf("%0d MCUStub_class::InjectReadData (MCU#%0d) ID%0d %0h delaying %0d before data drive\n", get_cycle(),instance,read_pkt.req_id,read_pkt.address[39:0],read_pkt.ack_to_data_delay); | |
241 | #endif | |
242 | ||
243 | repeat (read_pkt.ack_to_data_delay) @(posedge L2port.$clk); | |
244 | ||
245 | #ifdef DEBUGMCU | |
246 | printf("%0d MCUStub_class::InjectReadData (MCU#%0d) ID%0d %0h waiting for bus lock\n", get_cycle(),instance,read_pkt.req_id,read_pkt.address[39:0]); | |
247 | #endif | |
248 | // lock the shared data/ecc bus | |
249 | semaphore_get(WAIT, interface_lock[pair], 1); | |
250 | ||
251 | ||
252 | // If a write is in progress to this block, wait before reading it! | |
253 | while (activeWrBlk[30:0] == read_pkt.address[39:9]) { | |
254 | #ifdef DEBUGMCU | |
255 | printf("%0d MCUStub_class::InjectReadData (MCU#%0d) ID%0d %0h waiting to avoid RAW hazard\n", get_cycle(),instance,read_pkt.req_id,read_pkt.address[39:0]); | |
256 | #endif | |
257 | wait_var(activeWrBlk); | |
258 | } | |
259 | ||
260 | ||
261 | // read mem array now that we know a write is not in progress to this address. | |
262 | if (! read_pkt.dummy) read_pkt.cacheline = gMem.read512({read_pkt.address[39:6],6'b0}); | |
263 | ||
264 | // make 8 8 byte words | |
265 | for (i=0; i<8; i++) { | |
266 | temp_data[i] = read_pkt.cacheline[(i*64)+63:i*64]; | |
267 | #ifdef DEBUGMCU | |
268 | printf("%0d MCUStub_class::InjectReadData (MCU#%0d) ID%0d %0h temp_data[%0d]=%h\n", get_cycle(),instance,read_pkt.req_id,read_pkt.address[39:0],i,temp_data[i]); | |
269 | #endif | |
270 | } | |
271 | ||
272 | //always 0 or 2 | |
273 | qword_id = read_pkt.address[5:4]; | |
274 | qword_idi = read_pkt.address[5:4]; | |
275 | ||
276 | // drive 4 16 byte words in pairs w/ delay between pairs | |
277 | for (i=0; i<2; i++) { | |
278 | ||
279 | L2port.$mcu_l2t_rd_req_id_r0 = read_pkt.req_id; | |
280 | L2port.$mcu_l2t_data_vld_r0 = 1; | |
281 | repeat (2) { | |
282 | L2port.$mcu_l2t_qword_id_r0 = qword_idi; | |
283 | @(posedge L2port.$clk); | |
284 | qword_idi = (qword_idi+1)%4; | |
285 | } | |
286 | L2port.$mcu_l2t_data_vld_r0 = 0; | |
287 | ||
288 | // leave interface random | |
289 | tmp32 = urandom(); | |
290 | L2port.$mcu_l2t_qword_id_r0 = tmp32[5:3]; | |
291 | L2port.$mcu_l2t_rd_req_id_r0 = tmp32[2:0]; | |
292 | @(posedge L2port.$clk); | |
293 | ||
294 | repeat (2) { | |
295 | // drive 128 bits | |
296 | // 127:64 has low chunk, [63:0] has high chunk. | |
297 | L2dataPort.$mcu_l2b_data_r2 = {temp_data[(qword_id*2)], temp_data[(qword_id*2)+1]}; | |
298 | L2dataPort.$mcu_l2b_ecc_r2 = { | |
299 | DECC(temp_data[(qword_id*2)][63:32]), | |
300 | DECC(temp_data[(qword_id*2)][31:0]), | |
301 | DECC(temp_data[(qword_id*2)+1][63:32]), | |
302 | DECC(temp_data[(qword_id*2)+1][31:0]) }; | |
303 | ||
304 | if (gParam.mcuMemPrint[READ]) printf("%0d MCUStub_class::InjectReadData (MCU#%0d) ID%0d %0h pair %0d drive %h_%h\n",get_cycle(),instance,read_pkt.req_id,read_pkt.address[39:0],i,temp_data[(qword_id*2)], temp_data[(qword_id*2)+1]); | |
305 | ||
306 | @(posedge L2port.$clk); | |
307 | qword_id = (qword_id+1)%4; | |
308 | } | |
309 | ||
310 | // leave bus randomized when idle | |
311 | tmp32 = urandom(); | |
312 | // @1 L2dataPort.$mcu_l2b_data_r2 <= IDLE_DATA; | |
313 | // @1 L2dataPort.$mcu_l2b_ecc_r2 <= tmp32[27:0]; | |
314 | L2dataPort.$mcu_l2b_data_r2 = IDLE_DATA; | |
315 | L2dataPort.$mcu_l2b_ecc_r2 = tmp32[27:0]; | |
316 | ||
317 | // delay before next data | |
318 | if (i == 0 && read_pkt.intra_data_delay) { | |
319 | #ifdef DEBUGMCU | |
320 | printf("%0d MCUStub_class::InjectReadData (MCU#%0d) ID%0d %0h delaying %0d before next data pkt pair (%0d).\n",get_cycle(),instance,read_pkt.req_id,read_pkt.address[39:0],read_pkt.intra_data_delay,i); | |
321 | #endif | |
322 | repeat (read_pkt.intra_data_delay) @(posedge L2port.$clk); | |
323 | } | |
324 | ||
325 | } // for | |
326 | ||
327 | semaphore_put(interface_lock[pair], 1); | |
328 | outstandingReads--; | |
329 | if (dummy) outstandingDummy = 0; | |
330 | ||
331 | if (gParam.mcuMemPrint[READ]) { | |
332 | printf("%0d MCUStub_class::InjectReadData (MCU#%0d) ID%0d %0h done injecting read %h %h_%h_%h_%h\n",get_cycle(),instance,read_pkt.req_id,read_pkt.address[39:0],read_pkt.address[39:0], read_pkt.cacheline[511:384], read_pkt.cacheline[383:256], read_pkt.cacheline[255:128], read_pkt.cacheline[127:0]); | |
333 | gMem.dumpMem(read_pkt.address[39:0] & 40'hFFFFFFFFC0, 8); | |
334 | } | |
335 | ||
336 | } join none | |
337 | ||
338 | } | |
339 | ||
340 | ||
341 | // The L2 cache has the potential to exceed | |
342 | // the Write Request Queue limit of eight. If all eight entries in the | |
343 | // Write Request Queue are full when the ninth request comes in, the MCU | |
344 | // will hold the request and not send an acknowledge until an entry frees | |
345 | // up. | |
346 | // | |
347 | // Task has periodic ack holdoffs to emulate the previous blurb. | |
348 | // We don't have a queue since it would be pointless (the RTL wouldn't | |
349 | // know we had it, it only knows fast/slow ack delays so we will do that). | |
350 | task MCUStub_class::CollectWriteReq() { | |
351 | ||
352 | reg [511:0] cacheline; | |
353 | reg [39:0] address; | |
354 | integer i; | |
355 | reg [5:0] writes = 0; | |
356 | ||
357 | while (1) { | |
358 | ||
359 | @(posedge L2port.$l2t_mcu_wr_req); | |
360 | ||
361 | writes++; | |
362 | ||
363 | address = 0; | |
364 | address[39:7] = L2port.$l2t_mcu_addr; | |
365 | address[6] = instance[0]; | |
366 | address[5] = L2port.$l2t_mcu_addr_5; | |
367 | ||
368 | activeWrBlk = address[39:9]; | |
369 | ||
370 | if (gParam.mcuMemPrint[WRITE]) printf("%0d MCUStub_class::CollectWriteReq (MCU#%0d) write req addr [39:0]=%h (%h)\n",get_cycle(),instance,address[39:0],L2port.$l2t_mcu_addr); | |
371 | ||
372 | // ack delay | |
373 | repeat (urandom_range(gParam.mcuWrReq2ackDelayMax,gParam.mcuWrReq2ackDelayMin)) | |
374 | @(posedge L2port.$clk); | |
375 | ||
376 | // every 64 writes, really extend the ack delay as if the 8 write buffers | |
377 | // are full. This should be good enough to test L2 back pressuring. | |
378 | if (writes == 6'h3f) | |
379 | repeat (gParam.mcuWrReq2ackFullDelay) | |
380 | @(posedge L2port.$clk); | |
381 | ||
382 | L2port.$mcu_l2t_wr_ack = 1; | |
383 | @0 L2port.$l2b_mcu_data_vld_r5 == 0; | |
384 | @(posedge L2port.$clk); | |
385 | L2port.$mcu_l2t_wr_ack = 0; | |
386 | ||
387 | @(posedge L2port.$l2b_mcu_data_vld_r5); | |
388 | ||
389 | for (i=0; i<8;i++) { | |
390 | cacheline[(i*64)+63:i*64] = L2port.$l2b_mcu_wr_data_r5; | |
391 | ||
392 | if (gParam.mcuMemPrint[WRITE]) printf("%0d MCUStub_class::CollectWriteReq (MCU#%0d) %0h collecting write data[%0d] = %h\n",get_cycle(),instance,address[39:0], i, L2port.$l2b_mcu_wr_data_r5); | |
393 | ||
394 | @(posedge L2port.$clk); | |
395 | } | |
396 | @0 L2port.$l2b_mcu_data_vld_r5 == 0; | |
397 | ||
398 | gMem.write512({address[39:6],6'b0}, cacheline); | |
399 | if (gParam.mcuMemPrint[WRITE]) gMem.dumpMem(address[39:0] & 40'hFFFFFFFFC0, 8); | |
400 | ||
401 | activeWrBlk = 32'hffffffff; | |
402 | ||
403 | #ifdef DEBUGMCU | |
404 | printf("MCUStub_class::CollectWriteReq (MCU#%0d) finished write %h %h\n",instance,address[39:0],cacheline); | |
405 | #endif | |
406 | ||
407 | } //end while (1) | |
408 | ||
409 | } | |
410 | ||
411 | ||
412 | ///////////////////////////////////////////////////////////////////////////////// | |
413 | // This task generates the 7b ECC for a 32b data segment. | |
414 | // The input is a 32b data segment. | |
415 | // The output is {1b_parity, 6b_ecc}. | |
416 | ///////////////////////////////////////////////////////////////////////////////// | |
417 | /* function reg [6:0] DECC(reg [31:0] d) {{{1*/ | |
418 | function reg [6:0] MCUStub_class::DECC(reg [31:0] d) { | |
419 | ||
420 | // parity bit | |
421 | DECC[6] = d[0] ^ d[1] ^ d[2] ^ d[4] ^ d[5] ^ d[7] ^ d[10] ^ d[11] ^ d[12] ^ | |
422 | d[14] ^ d[17] ^ d[18] ^ d[21] ^ d[23] ^ d[24] ^ d[26] ^ d[27] ^ d[29]; | |
423 | ||
424 | // ecc bits | |
425 | DECC[5] = ^d[31:26]; | |
426 | ||
427 | DECC[4] = ^d[25:11]; | |
428 | ||
429 | DECC[3] = (^d[25:18]) ^ (^d[10:4]); | |
430 | ||
431 | DECC[2] = (^d[31:29]) ^ (^d[25:22]) ^ (^d[17:14]) ^ (^d[10:7]) ^ (^d[3:1]); | |
432 | ||
433 | DECC[1] = d[0] ^ d[2] ^ d[3] ^ d[5] ^ d[6] ^ d[9] ^ d[10] ^ d[12] ^ d[13] ^ | |
434 | d[16] ^ d[17] ^ d[20] ^ d[21] ^ d[24] ^ d[25] ^ d[27] ^ d[28] ^ d[31]; | |
435 | ||
436 | DECC[0] = d[0] ^ d[1] ^ d[3] ^ d[4] ^ d[6] ^ d[8] ^ d[10] ^ d[11] ^ d[13] ^ | |
437 | d[15] ^ d[17] ^ d[19] ^ d[21] ^ d[23] ^ d[25] ^ d[26] ^ d[28] ^ d[30]; | |
438 | } |