Initial commit of OpenSPARC T2 architecture model.
[OpenSPARC-T2-SAM] / sam-t2 / sam / cpus / vonk / n2 / lib / ras / src / N2_MemErrDetector.cc
CommitLineData
920dae64
AT
1// ========== Copyright Header Begin ==========================================
2//
3// OpenSPARC T2 Processor File: N2_MemErrDetector.cc
4// Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
5// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
6//
7// The above named program is free software; you can redistribute it and/or
8// modify it under the terms of the GNU General Public
9// License version 2 as published by the Free Software Foundation.
10//
11// The above named program is distributed in the hope that it will be
12// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14// General Public License for more details.
15//
16// You should have received a copy of the GNU General Public
17// License along with this work; if not, write to the Free Software
18// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19//
20// ========== Copyright Header End ============================================
21/************************************************************************
22**
23** Copyright (C) 2005, Sun Microsystems, Inc.
24**
25** Sun considers its source code as an unpublished, proprietary
26** trade secret and it is available only under strict license provisions.
27** This copyright notice is placed here only to protect Sun in the event
28** the source is deemed a published work. Disassembly, decompilation,
29** or other means of reducing the object code to human readable form
30** is prohibited by the license agreement under which this code is
31** provided to the user or company in possession of this copy.
32**
33*************************************************************************/
34
35#include <stdlib.h>
36#include <sstream>
37#include "BL_Hamming_22_6_Synd.h"
38#include "BL_Hamming_64_8_Synd.h"
39#include "N2_Model.h"
40#include "N2_Core.h"
41#include "N2_Strand.h"
42#include "N2_State.h"
43#include "N2_MemErrDetector.h"
44#include "SS_CKMemory.h"
45#include "BL_Utils.h"
46
47
48// Used to call a pointer to member function
49// Localizes the nasty syntax for this language feature
50#define CALL_MEMBER_FN(object,ptrToMember) ((object).*(ptrToMember))
51
52/**
53 * The N2_MemErrDetector class is used to detect injected RAS errors
54 * associated with the memory hierarchy. In particular, it models and
55 * detects errors in the primary and secondary caches and DRAM.
56 */
57
58using namespace std;
59
60
61SS_Trap::Type N2_MemErrDetector::detect_fetch_err( MemoryLevel level, SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Paddr pa) {
62
63
64 MemoryTransaction mem_xact;
65
66 mem_xact.setStrand(s->strand_id());
67 mem_xact.paddr(pa);
68 mem_xact.size(64);
69 mem_xact.access(MemoryTransaction::READ);
70 mem_xact.referenceType(MemoryTransaction::INSTR);
71
72 return detectErr(mem_xact);
73}
74
75
76SS_Trap::Type N2_MemErrDetector::detect_load_err( MemoryLevel level, SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Instr* line, SS_Paddr pa) {
77
78
79 MemoryTransaction mem_xact;
80
81 mem_xact.setStrand(s->strand_id());
82 mem_xact.paddr(pa);
83 mem_xact.size(line->len);
84 mem_xact.access(MemoryTransaction::READ);
85 mem_xact.referenceType(MemoryTransaction::DATA);
86
87 return detectErr(mem_xact);
88}
89
90SS_Trap::Type N2_MemErrDetector::inject_store_err( MemoryLevel level, SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Instr* line, SS_Paddr pa, uint64_t data) {
91
92
93 // Stores coherently update all the primary caches so we call
94 // SS_Model::ras_flush() to approximate this behavior.
95 s->model->ras_flush(s, pa, line->len, SS_MemErrDetector::INSTR_CACHE);
96 s->model->ras_flush(s, pa, line->len, SS_MemErrDetector::DATA_CACHE);
97
98 MemoryTransaction mem_xact;
99
100 mem_xact.setStrand(s->strand_id());
101 mem_xact.paddr(pa);
102 mem_xact.size(line->len);
103 mem_xact.access(MemoryTransaction::WRITE);
104 mem_xact.referenceType(MemoryTransaction::DATA);
105 mem_xact.setData(data);
106
107
108 return detectErr(mem_xact);
109}
110
111//Injecting errors into registers in the Tick Compare array
112BL_EccBits N2_MemErrDetector::n2_tick_cmpr_err_injector(SS_Strand* s, uint64_t data)
113{
114 N2_Strand* n2 = (N2_Strand*)s;
115 N2_Core& n2_core = n2->core;
116 // The INTDIS bit (bit 63) has to be flipped before sending the value
117 // for ecc calculation - 28.11 - N2 PRM rev 1.1
118 BL_EccBits ecc_obj = BL_Hamming_64_8_Synd::calc_check_bits((1ULL<<63)^data);
119 unsigned ecc = 0;
120 if(ecc_obj.valid())
121 {
122 ecc = ecc_obj.get();
123 }
124 // Check if ENB and TCCU bits are set in N2 Error Injection Register
125 if ((n2_core.error_inject.ene() == 1) && (n2_core.error_inject.tccu() == 1))
126 {
127 ecc ^= n2_core.error_inject.eccmask();
128 // Set back the corrputed ecc
129 ecc_obj.set(ecc);
130 }
131 return ecc_obj;
132}
133
134// (ASR) Reads to any reg in the Tick Compare Array (TCA) triggers this routine.
135// This routine scans for the presence of precise single bit or multi bit errors
136// and records the error information in DSFAR and throws an precise
137// internal_processor_error trap
138// Correctable errors are detected only if CERER.TCCP bit is set
139// Uncorrectable errors are detected only if CERER.TCUP bit is set
140// Errors are recorded only if the PSCCE bit is set in the SETER
141// The syndrome is stored in bits 2 thru 9 of DSFAR
142// The tick compare array index is stored in bits 0 and 1 of DSFAR
143// 00 - Tick Cmpr 01 - Stick Cmpr 10 - Hstick Cmpr
144SS_Trap::Type N2_MemErrDetector::n2_tick_cmpr_precise_err_detector(SS_Strand* s, N2_TickAccess::TickAccessIndex array_index)
145{
146#if 0
147 N2_Strand* n2 = (N2_Strand*)s;
148 N2_Core& n2_core = n2->core;
149 bool update_dsfar = false;
150
151 uint64_t val = 0;
152
153 if (array_index == N2_TickAccess::TICK_CMPR_INDX)
154 val = n2->tick_cmpr();
155 else if (array_index == N2_TickAccess::STICK_CMPR_INDX)
156 val = n2->stick_cmpr();
157 else if (array_index == N2_TickAccess::HSTICK_CMPR_INDX)
158 val = n2->hstick_cmpr();
159
160 BL_EccBits ecc_obj = n2->tick_cmpr_array_ecc[array_index];
161 if(!ecc_obj.valid())
162 {
163 return SS_Trap::NO_TRAP;
164 }
165 BL_Hamming_64_8_Synd syndrome = BL_Hamming_64_8_Synd(val,ecc_obj);
166
167 if (n2_core.cerer.tccp())
168 {
169 if (syndrome.isSingleBitError())
170 {
171 if (n2->seter.pscce())
172 {
173 n2->data_sfsr.error_type(N2_DataSfsr::TCCP);
174 update_dsfar = true;
175 }
176 }
177 }
178 else if (n2_core.cerer.tcup())
179 {
180 if (syndrome.isDoubleBitError() || syndrome.isMultipleBitError())
181 {
182 if (n2->seter.pscce())
183 {
184 n2->data_sfsr.error_type(N2_DataSfsr::TCUP);
185 update_dsfar = true;
186 }
187 }
188 }
189 if (update_dsfar)
190 {
191 uint64_t error_add = 0;
192 error_add = BL_BitUtility::set_subfield(error_add,syndrome.getSyndrome(),2,9);
193 error_add = BL_BitUtility::set_subfield(error_add,array_index,0,1);
194 n2->data_sfar.error_addr(error_add);
195 return SS_Trap::INTERNAL_PROCESSOR_ERROR;
196 }
197#endif
198 return SS_Trap::NO_TRAP;
199}
200
201// This routine checks for the presence of disrupting errors (for all the regs
202// in the Tick Compare Array). If there is an error,the information is recorded
203// in the DESR and a 'sw_recoverable_error' is thrown. Correctable errors are
204// detected only if CERER.TCCD bit is set.Uncorrectable errors are detected
205// only if CERER.TCUD bit is set.Errors are recorded only if the DE bit is set
206// in the SETER.The syndrome is stored in bits 2 thru 9 of DESR.The tick compare
207// array index is stored in bits 0 and 1 of DESR.
208// 00 - Tick Cmpr 01 - Stick Cmpr 10 - Hstick Cmpr
209bool N2_MemErrDetector::n2_tick_cmpr_disrupting_err_detector(SS_Strand* s)
210{
211 bool err_found = false;
212 bool update_desr = false;
213 int error_type = 0;
214
215 N2_Strand* n2 = (N2_Strand*)s;
216 N2_Core& n2_core = n2->core;
217
218 // Check for errors in all the three tick_cmpr registers
219 for (uint64_t array_index = 0; array_index < N2_TickAccess::TICK_ACCESS_MAX; array_index++)
220 {
221 uint64_t val = 0;
222
223 if (array_index == N2_TickAccess::TICK_CMPR_INDX)
224 val = n2->tick_cmpr();
225 else if (array_index == N2_TickAccess::STICK_CMPR_INDX)
226 val = n2->stick_cmpr();
227 else if (array_index == N2_TickAccess::HSTICK_CMPR_INDX)
228 val = n2->hstick_cmpr();
229
230 BL_EccBits ecc_obj = n2->tick_cmpr_array_ecc[array_index];
231 if(!ecc_obj.valid())
232 {
233 continue;
234 }
235 BL_Hamming_64_8_Synd syndrome = BL_Hamming_64_8_Synd(val,ecc_obj);
236
237 if (n2_core.cerer.tccd())
238 {
239 if (syndrome.isSingleBitError())
240 {
241 error_type = N2_Desr::RE_TCCD;
242 err_found = true;
243 }
244 }
245 else if (n2_core.cerer.tcud())
246 {
247 if (syndrome.isDoubleBitError() || syndrome.isMultipleBitError())
248 {
249 error_type = N2_Desr::RE_TCUD;
250 err_found = true;
251 }
252 }
253 if (err_found)
254 {
255 if (n2->desr.f())
256 {
257 if (n2->desr.s())
258 {
259 // If the DESR already has a pending sw_recoverable_error, the details
260 // about the current error is not recorded. The presence of muliple
261 // errors is denoted by setting the 'me' bit in the DESR
262 n2->desr.me(1);
263 update_desr = false;
264 }
265 else
266 {
267 // If the DESR already has a pending hw_corrected_error, the details
268 // about the previous error is flushed out and the details about the
269 // current sw_recoverable_error is recorded. 'sw_recoverable' errors
270 // have higher precedence than hw_corrected errors. The presence of
271 // muliple errors is denoted by setting the 'me' bit in the DESR
272 n2->desr.s(1);
273 n2->desr.me(1);
274 update_desr = true;
275 }
276 }
277 else
278 {
279 // No prior error.
280 n2->desr.f(1);
281 n2->desr.s(1);
282 update_desr = true;
283 }
284
285 if (update_desr)
286 {
287 if (n2->seter.de())
288 {
289 n2->desr.errtype(error_type);
290 uint64_t error_add = 0;
291 error_add = BL_BitUtility::set_subfield(error_add,syndrome.getSyndrome(),2,9);
292 error_add = BL_BitUtility::set_subfield(error_add,array_index,0,1);
293 n2->desr.erraddr(error_add);
294 n2->irq.raise(n2,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR);
295 break;
296 }
297 }
298 }
299 }
300 return err_found;
301}
302
303// Routine to flush L2 cache.
304// Checks that the "key" in the pa is correct and then calls
305// L2CacheFlush() to flush the correct lines.
306void N2_MemErrDetector::prefetchICE(SS_Paddr pa)
307{
308 N2_L2CacheFlushAddrFields l2CacheFlushAddr;
309 l2CacheFlushAddr.set_data(pa);
310
311 if (l2CacheFlushAddr.checkKey())
312 L2CacheFlush(l2CacheFlushAddr);
313 else
314 fprintf(stderr, "prefetchICE: bad key: %x\n", l2CacheFlushAddr.getKEY());
315}
316
317SS_Trap::Type N2_MemErrDetector::n2_step_hook(SS_Strand* s)
318{
319 N2_Strand *strand = (N2_Strand*)s;
320 return strand->flush_store_buffer();
321}
322
323// ras_flush() flushes part of a strand's I$ or D$. Which cache is
324// selected by the "type" argument and the range of the cache to
325// invalidate is selected by "pa" and "size".
326void N2_MemErrDetector::ras_flush( SS_Strand*_s, SS_Strand* requesting_strand,
327 SS_Paddr pa, uint64_t size,
328 CacheType type)
329{
330 if ((_s->strand_id() % N2_Model::NO_STRANDS_PER_CORE) != 0)
331 return;
332
333 if (type == SS_MemErrDetector::DATA_CACHE &&
334 requesting_strand != NULL &&
335 (_s->strand_id() / N2_Model::NO_STRANDS_PER_CORE) ==
336 (requesting_strand->strand_id() / N2_Model::NO_STRANDS_PER_CORE))
337 return;
338
339 N2_Strand* s = (N2_Strand*)_s;
340
341 const uint_t line_size = 1 << (N2_IcacheAddressingFields::WIDTH_RSVD0 +
342 N2_IcacheAddressingFields::WIDTH_INSTR);
343 SS_Paddr start_pa = round_down_to_power_of_two(pa, line_size);
344 SS_Paddr end_pa = round_down_to_power_of_two(pa + size, line_size);
345
346 // Clear all I$ lines matching the (pa, pa_size) address range.
347 while (start_pa <= end_pa)
348 {
349 if (type == SS_MemErrDetector::DATA_CACHE)
350 s->core.flush_dcache(start_pa);
351 else
352 s->core.flush_icache(start_pa);
353 start_pa += line_size;
354 }
355}
356
357// The central memory hierarchy error detector.
358//
359// Detects I- and D-cache, L2 cache, Dram, and SOC FBDIMM RAS errors
360// produced by a MemoryTransaction. Models the I- and D-cache, the
361// L2$ and their associated error detection mechanisms. Also, detects
362// RAS errors produced by Dram and the FBDIMM channels.
363//
364// If a trap is detected, detectErr() either throws a BasicTrap with
365// the correct trap number for precise traps or directs the trap to
366// the correct strand with setIntpTrap(). detectErr() checks various
367// control registers before throwing traps or changing state. If a
368// trap is thrown detectErr() sets error information the correct
369// status registers.
370//
371// Conventions: if multiple traps are generated by the same
372// instruction, no more than one is guaranteed to be thrown. Also,
373// error state may not be updated correctly for multiple errors
374// produced on different cycles. The multiple error bits will be set
375// correctly, but the detailed error information will not necessary
376// match the hardware's prioritization of information capture. The
377// error information will be consistent for one of the errors, but
378// detectErr() pick the wrong error's information to save.
379//
380// detectErr() ignores i/o space accesses. For memory accesses,
381// detectErr() dispatches the memory request to either the I-cache or
382// the D-cache RAS routines in N2_Core. These routines check for
383// primary cache errors and cache misses. If a there is a miss in a
384// primary cache, it calls the N2_MemErrDetector::L2CacheFill() method
385// to model loading the L2$ and detecting any errors at that level.
386// If the L2$ doesn't contain the line, it invokes ??? to model loading the
387// line from Dram and detecting any Chip-Kill or FBDIMM errors.
388
389SS_Trap::Type N2_MemErrDetector::detectErr(const MemoryTransaction &memXact)
390{
391 // skip accesses in i/o space or uncorrected data access for Chip-Kill
392 //if (memXact.paddr() >= 0x8000000000 || memXact.noDramErrorCorrect())
393 // return;
394
395 /* DOWNCAST */
396 assert(n2_model);
397 N2_Strand *strand = (N2_Strand*)n2_model->cpu[0]->strand[memXact.getStrand()];
398 MemoryTransaction::RefT refT = memXact.getReferenceType();
399
400 /* DOWNCAST */
401 if (strand != NULL)
402 {
403 N2_Core *core = &strand->core;
404
405 // I-cache fetch?
406 if (refT == MemoryTransaction::INSTR)
407 {
408
409 // check icache RAS errors
410 return core->icache_ifetch(memXact,
411 (!strand->hpstate.hpriv() ||
412 strand->pstate.ie()) &&
413 strand->seter.dhcce(),
414 memXact.getStrand(),
415 this);
416 }
417 // D-cache fetch?
418 else if (refT == MemoryTransaction::DATA)
419 {
420 // If the memory transaction is a read, check the store buffer for
421 // pending stores that alias the read's address.
422 if (memXact.readXact())
423 {
424 SS_Trap::Type tt = strand->check_store_buffer_RAWtrap(memXact);
425 if(tt != SS_Trap::NO_TRAP)
426 return tt;
427 }
428
429 return core->dcache_trans(memXact,
430 (!strand->hpstate.hpriv() ||
431 strand->pstate.ie()) &&
432 strand->seter.dhcce(),
433 memXact.getStrand(),
434 false,
435 this);
436 }
437 }
438}
439
440
441// L2 Cache Line Fill routine
442//
443// Given a MemoryTransaction, L2CacheFill() loads the corresponding
444// L2$ line. First, it checks all the tags in the line's way set,
445// then the line's VuaD entry for ECC errors. Then, it looks for a
446// tag match with the valid bit set. If the appropriate cache line is
447// present in the cache, it checks its ECC and, if the memory
448// transaction is a store, marks the line dirty. Otherwise,
449// L2CacheFill() picks a line to cast out of the cache, checks this
450// line's ECC, and then loads the new line into the cache, calculating
451// its ECC.
452//
453// Note that any ECC error will throw the appropriate trap.
454//
455// Returns trap number to throw if NotData is present in the cache.
456
457SS_Trap::Type N2_MemErrDetector::L2CacheFill(const MemoryTransaction &memXact)
458{
459 N2_Strand *strand = (N2_Strand*)n2_model->cpu[0]->strand[memXact.getStrand()];
460 N2_L2AddressingFields paddr;
461 paddr.set(memXact.getPaddr());
462
463 int way;
464
465 L2FixTagsAndTrap(strand, paddr, memXact);
466
467 N2_L2DiagVdMemWithECC diagVD = L2FixVUADAndTrap(strand, paddr, memXact);
468
469 int hit_way = L2FindWay(paddr, diagVD);
470
471 // update L2 cache tag, VauD, and data
472
473 // Was this a miss?
474 if (hit_way == NO_WAY)
475 {
476 return L2CacheMiss(strand, memXact, diagVD);
477 }
478 else
479 {
480 // hit. hit_way contains matching way
481 return L2CacheHit(strand, memXact, diagVD, hit_way);
482 }
483 return SS_Trap::NO_TRAP;
484}
485
486// L2 Cache Line Flush routine
487//
488// Given a way in the L$2 selected by diagAddr, L2CacheFlush() flushes
489// the corresponding L2$ line, writing it back to memory if it's
490// dirty. First, it checks all the tags in the line's way set, then
491// the line's VuaD entry for ECC errors. It corrects these errors
492// without trapping. Then it clears the valid and dirty VuaD bits for
493// the cache associated with the address.
494//
495// Note that the "real" L2$ must write the line back to memory;
496// however in this L2$ implementation, the correct data has already
497// been written to memory, so invalidation is all that's needed.
498
499void N2_MemErrDetector::L2CacheFlush(N2_L2CacheFlushAddrFields diagAddr)
500{
501 // create a physical address that matches the way and bank of diagAddr
502 N2_L2AddressingFields paddr;
503 paddr.setSET(diagAddr.getSET());
504 paddr.setBANK(diagAddr.getBANK());
505 L2FixTags(paddr);
506
507 uint_t way = diagAddr.getWAY();
508
509 uint32_t diagNdx = paddrToWaySetBankNdx(paddr, way);
510
511 N2_L2DiagTagMem diagTag;
512 L2DiagTagMemAccess_.access(diagNdx, diagTag, true);
513 paddr.setTAG(diagTag.getTAG());
514
515 N2_L2DiagVdMemWithECC diagVD;
516 L2FixVUAD(paddr, diagVD);
517
518 // if the way is valid, invalidate it by clearing the way's dirty
519 // and valid bits in VuaD
520 if (diagVD.getVALID() & (1<<way)) {
521 // do we need to flush the cache line?
522 if (diagVD.getDIRTY() & (1<<way)) {
523 int i;
524 N2_L2AddressingFields ckPaddr;
525 ckPaddr.setNative(paddr.getNative() & ~(SS_CKMemory::DRAM_LINE_LENGTH - 1));
526 N2_L2CacheLineError lineError(ckPaddr.getNative());
527
528 for (i = 0; i < SS_CKMemory::DRAM_LINE_LENGTH/N2_L2_CACHE_LINE_SIZE; ++i) {
529 lineError = L2ProcessCacheLine(ckPaddr, way, false);
530 if (lineError.isUncorrectable()) {
531 break;
532 }
533 ckPaddr.setNative(ckPaddr.getNative() + N2_L2_CACHE_LINE_SIZE);
534 }
535 ckPaddr.setNative(ckPaddr.getNative() & ~(SS_CKMemory::DRAM_LINE_LENGTH - 1));
536 dramUpdateECC(ckPaddr, lineError.isUncorrectable());
537 }
538
539 diagVD.setVALID(diagVD.getVALID() & ~(1<<way));
540 diagVD.setDIRTY(diagVD.getDIRTY() & ~(1<<way));
541 diagVD.setVDECC(diagVD.calcECC());
542
543 L2DiagVdMemAccess_.access(paddrToSetBankNdx(paddr), diagVD, false);
544 }
545
546 // Now flush the primary caches for all the Cores
547
548 paddr.setWORD(0); // align L2$ address to beginning of L2$ line
549
550 N2_IcacheAddressingFields icacheAddr;
551 icacheAddr.set(paddr.getNative());
552
553 // Clear all decode cache lines matching the (pa, pa_size) address range.
554 assert(N2_L2_CACHE_LINE_SIZE == SS_InstrCache::LINE_SIZE *4);
555 for (uint_t cpu_ndx = 0; cpu_ndx < N2_Model::NO_CPUS;++cpu_ndx)
556 for (uint_t strand_ndx = 0; strand_ndx < N2_Model::NO_STRANDS_PER_CPU;
557 ++strand_ndx)
558 n2_model->cpu[cpu_ndx]->strand[strand_ndx]->flush(paddr.getNative(),true);
559
560 for(int i=0;i < N2_L2_CACHE_LINE_SIZE/N2_IcacheAddressingFields::N2_ICACHE_LINE_SIZE;i++){
561 //TODO verify size
562 n2_model->ras_flush(NULL, icacheAddr(), 8,
563 SS_MemErrDetector::INSTR_CACHE);
564 icacheAddr.sets(icacheAddr.sets() + 1);
565 }
566
567 N2_DcacheAddressingFields dcacheAddr;
568 dcacheAddr.set(paddr.getNative());
569 for(int i=0;i < N2_L2_CACHE_LINE_SIZE/N2_DcacheAddressingFields::N2_DCACHE_LINE_SIZE;i++){
570 //TODO verify size
571 n2_model->ras_flush(NULL, dcacheAddr(), 8,
572 SS_MemErrDetector::DATA_CACHE);
573 dcacheAddr.sets(dcacheAddr.sets() + 1);
574 }
575}
576
577// L2FixTags() checks for RAS errors in all the L2$ tags that match
578// the address in paddr. If there are any single-bit errors, it
579// corrects them without throwing a trap.
580
581void N2_MemErrDetector::L2FixTags(N2_L2AddressingFields paddr)
582{
583 // Check all ways for tag ECC error
584 for (int way = 0; way < (1<<N2_L2DiagDataAddressingFields::bitSizeWAY);
585 way++) {
586 uint32_t diagNdx = paddrToWaySetBankNdx(paddr, way);
587
588 L2FixTag(diagNdx);
589 }
590}
591
592// L2 Cache Line Check Tags routine
593//
594// Given a MemoryTransaction, L2FixTagsAndTrap() checks all the tags
595// that match the address in paddr. If there are any single-bit
596// errors, it corrects and throws the appropriate trap.
597
598void N2_MemErrDetector::L2FixTagsAndTrap(N2_Strand *strand,
599 N2_L2AddressingFields paddr,
600 const MemoryTransaction &memXact)
601{
602 // Check all ways for tag ECC error
603 for (int way = 0;way < (1<<N2_L2DiagDataAddressingFields::bitSizeWAY);way++)
604 {
605 uint32_t diagNdx = paddrToWaySetBankNdx(paddr, way);
606
607 if (L2FixTag(diagNdx))
608 {
609 // Set LTC in L2_ERROR_STATUS register and
610 // paddr[39:6] in L2_ERROR_ADDRESS register
611 setL2ErrorStatusReg(paddr.getBANK(), N2_L2ErrorStatusReg::setLTC,
612 true, 0, 0, paddr.getNative());
613
614 // Set error information in DESR
615 N2_CererWithBitMux cerer(strand->core.cerer);
616 // The N2 PRM Rev 1.1 is vague. Sect 12.9.5 refers to the
617 // L2C bit in the CERER, which doesn't exist. We
618 // interpret this to mean the family of L2C bits in Table
619 // 12-4 and select any of them.
620 if (getCEEN(paddr.getBANK()) && cerer.checkOneL2Cbit(memXact))
621 {
622 uint32_t strandId = getErrorSteer(paddr.getBANK());
623 setDESR(strandId,false, N2_Desr::CE_L2C,0);
624
625 // Throw trap to ERRORSTEER
626 trapToErrorSteer(strand, paddr.getBANK(),
627 SS_Interrupt::BIT_HW_CORRECTED_ERROR);
628
629 }
630 }
631 }
632}
633
634
635// L2FixTag() checks and fixes any L2$ tag RAS error at diagnostic
636// access index "diagNdx". It returns true if there is an tag error.
637
638bool N2_MemErrDetector::L2FixTag(uint32_t diagNdx)
639{
640 N2_L2DiagTagMem diagTag;
641
642 L2DiagTagMemAccess_.access(diagNdx, diagTag, true);
643
644 BL_Hamming_22_6_Synd tagSyndrome(diagTag.getTAG(),
645 diagTag.getECC());
646
647 if (!tagSyndrome.noError())
648 {
649 if (tagSyndrome.isDataBitError()) {
650 uint32_t dataBit = tagSyndrome.getDataBit();
651 diagTag.setTAG(diagTag.getTAG() ^ (1<<dataBit));
652 RAS_OSTR << "L2CacheFill: correcting data bit " <<
653 dataBit << endl;
654 } else if (tagSyndrome.isCheckBitError())
655 {
656 uint32_t checkBit = tagSyndrome.getCheckBit();
657 diagTag.setECC(diagTag.getECC() ^ (1<<checkBit));
658 RAS_OSTR << "L2CacheFill: correcting check bit " <<
659 checkBit << endl;
660 } else
661 {
662 fprintf(stderr,"L2CheckTags: double bit tag error");
663 exit(-1);
664 }
665 L2DiagTagMemAccess_.access(diagNdx, diagTag, false);
666
667 return true;
668 }
669 return false;
670}
671
672// L2FixVUAD() checks and fixes a RAS error for the VuaD bits associated
673// with the physical address, "paddr". It returns the value of VUAD
674// diagnostic register (with ECC).
675N2_MemErrDetector::N2_L2VaudSyndrome
676N2_MemErrDetector::L2FixVUAD(N2_L2AddressingFields paddr,
677 N2_L2DiagVdMemWithECC &diagVD)
678{
679
680 uint32_t setBankNdx = paddrToSetBankNdx(paddr);
681 // Get UA bits for this set
682 N2_L2DiagUaMemWithECC diagUA;
683 L2DiagUaMemAccess_.access(setBankNdx, diagUA, true);
684
685 // Get VD bits for this set
686 L2DiagVdMemAccess_.access(setBankNdx, diagVD, true);
687
688 // Check VD Ecc
689 BL_Hamming_32_7_Synd vdSyndrome = diagVD.getSyndrome();
690 BL_Hamming_32_7_Synd uaSyndrome = diagUA.getSyndrome();
691 N2_L2VaudSyndrome vuadSyndrome(vdSyndrome.getSyndrome(),uaSyndrome.getSyndrome()) ;
692
693 if (!vdSyndrome.noError()) {
694 RAS_OSTR << "L2FixVUAD: bad VD ECC expected 0x" <<
695 hex << diagVD.getVDECC() <<
696 " got 0x" << hex << vdSyndrome.getSyndrome() << endl;
697
698 if (vdSyndrome.isDataBitError()) {
699 uint32_t dataBit = vdSyndrome.getDataBit();
700 diagVD.setVD(diagVD.getVD() ^ (1<<dataBit));
701 } else if (vdSyndrome.isCheckBitError()) {
702 uint32_t checkBit = vdSyndrome.getCheckBit();
703 diagVD.setVDECC(diagVD.getVDECC() ^ (1<<checkBit));
704 } else {
705 fprintf(stderr,"L2FixVUAD: double bit "
706 "VuaD error");
707 exit(-1);
708 }
709 L2DiagVdMemAccess_.access(setBankNdx, diagVD, false);
710 }
711
712 if (!uaSyndrome.noError()) {
713 RAS_OSTR << "L2FixVUAD: bad UA ECC expected 0x" <<
714 hex << diagUA.getUAECC() <<
715 " got 0x" << hex << uaSyndrome.getSyndrome() << endl;
716
717 if (uaSyndrome.isDataBitError()) {
718 uint32_t dataBit = uaSyndrome.getDataBit();
719 diagUA.setUA(diagUA.getUA() ^ (1<<dataBit));
720 } else if (uaSyndrome.isCheckBitError()) {
721 uint32_t checkBit = uaSyndrome.getCheckBit();
722 diagUA.setUAECC(diagUA.getUAECC() ^ (1<<checkBit));
723 } else {
724 fprintf(stderr,"L2FixVUAD: double bit "
725 "VuaD error\n");
726 exit(-1);
727 }
728 L2DiagUaMemAccess_.access(setBankNdx, diagUA, false);
729 }
730
731 return vuadSyndrome;
732}
733
734
735// L2FixVUADAndTrap() corrects an error in the VuaD bits for a physical
736// address and throws any appropriate trap.
737
738N2_MemErrDetector::N2_L2DiagVdMemWithECC
739N2_MemErrDetector::L2FixVUADAndTrap(N2_Strand *strand,
740 N2_L2AddressingFields paddr,
741 const MemoryTransaction &memXact)
742{
743 N2_L2DiagVdMemWithECC diagVD;
744 N2_L2VaudSyndrome vuad_syndrome = L2FixVUAD(paddr, diagVD);
745 uint16_t vuadSyndrome = ((vuad_syndrome.vdSyndrome_.getSyndrome() << 0x7) | vuad_syndrome.uaSyndrome_.getSyndrome());
746
747 if (vuadSyndrome) {
748 // Set LVC in L2_ERROR_STATUS register
749 // See N2 PRM Rev 1.1 Tbl 12-22
750 // Set paddr[39:6] in L2_ERROR_ADDRESS register
751 setL2ErrorStatusReg(paddr.getBANK(),
752 N2_L2ErrorStatusReg::setLVC,
753 true,
754 getErrorSteer(paddr.getBANK()),
755 vuadSyndrome,
756 paddr.getNative());
757
758 N2_CererWithBitMux cerer(strand->core.cerer);
759 if (getCEEN(paddr.getBANK()) && cerer.checkOneL2Cbit(memXact)) {
760 // Set error information in DESR
761 uint32_t strandId = getErrorSteer(paddr.getBANK());
762 setDESR(strandId,false, N2_Desr::CE_L2C,0);
763
764 // Throw trap to ERRORSTEER
765 trapToErrorSteer(strand, paddr.getBANK(),
766 SS_Interrupt::BIT_HW_CORRECTED_ERROR);
767 }
768 }
769 return diagVD;
770}
771
772
773// L2FindWay() searches the L2$ to see if any cache lines match the
774// passed address. L2FindWay() also checks the valid bits in cache
775// set's VuaD information to make sure the line is valid.
776
777int
778N2_MemErrDetector::L2FindWay(N2_L2AddressingFields paddr,
779 N2_L2DiagVdMemWithECC diagVD)
780{
781 int hit_way = NO_WAY; // assume miss
782 for (int way = 0; way < (1<<N2_L2DiagDataAddressingFields::bitSizeWAY);
783 way++) {
784 uint32_t diagNdx = paddrToWaySetBankNdx(paddr, way);
785 N2_L2DiagTagMem diagTag;
786
787 L2DiagTagMemAccess_.access(diagNdx, diagTag, true);
788
789 if (diagTag.getTAG() == paddr.getTAG()) {
790 if (diagVD.getVALID() & (1<<way)) {
791 hit_way = way;
792 }
793 }
794 }
795 return hit_way;
796}
797
798
799// L2CacheMiss() processes an L2$ miss. Selects a way to invalidate
800// (or victimize) using the per bank Not Recently Used pointer,
801// L2CacheWaysNRUPtr[]. If the way's cache line is dirty, update the
802// Chip-Kill ECC for line flushed to memory. If the memory transaction
803// is a write, mark the line as dirty.
804//
805// The L2$ cache line's data is read from memory (using Chip-Kill
806// correction) and its data ECC is calcuated and saved.
807//
808// Finally, dramProcessMemOp() is called to detect Chip-Kill errors.
809// If the memory Chip-Kill line is poisoned (NotData), then the current
810// L2$ line is poisoned as well.
811//
812// If the miss detects poison, L2CacheMiss() returns a trap number,
813// else 0.
814
815SS_Trap::Type
816N2_MemErrDetector::L2CacheMiss(N2_Strand *strand,
817 const MemoryTransaction &memXact,
818 N2_L2DiagVdMemWithECC &diagVD)
819{
820 N2_L2AddressingFields paddr;
821 paddr.setNative(memXact.getPaddr());
822 uint32_t bank = paddr.getBANK();
823 int way = L2CacheWaysNRUPtr_[bank];
824
825 // flush line if dirty, checking data ECC -- and clear dirty bit
826 // If ECC error, throw trap to ERRORSTEER.
827 uint32_t valid = diagVD.getVALID();
828 uint32_t dirty = diagVD.getDIRTY();
829 if (dirty & (1<<way)) {
830 // Update DRAM's ECC for this cache line
831 // Note that the flushed line has a different physical address
832 // than the original memory transaction.
833 N2_L2AddressingFields writeBackPaddr;
834
835 writeBackPaddr.setBANK(bank);
836 writeBackPaddr.setSET(paddr.getSET());
837 uint64_t writeBackDiagNdx =
838 paddrToWaySetBankNdx(writeBackPaddr, way);
839 N2_L2DiagTagMem writeBackDiagTag;
840
841 L2DiagTagMemAccess_.access(writeBackDiagNdx, writeBackDiagTag,
842 true);
843 writeBackPaddr.setTAG(writeBackDiagTag.getTAG());
844
845 // flush line if dirty, checking data ECC -- and clear dirty bit
846 // If ECC error, throw trap to ERRORSTEER.
847 N2_L2CacheLineError lineError = L2ProcessCacheLine(writeBackPaddr, way, false);
848
849 // Throw a trap on all bad ECC
850 if (lineError.isError() && !lineError.isNotData()) {
851 ThrowL2DataWriteBackTrap(memXact, strand, bank, lineError);
852 }
853 dirty &= ~(1<<way);
854
855 // Truncate to memory cache line alignemnt
856 writeBackPaddr.setNative(writeBackPaddr.getNative() &
857 ~(SS_CKMemory::DRAM_LINE_LENGTH - 1));
858 dramUpdateECC(writeBackPaddr, lineError.isNotData());
859 }
860 valid |= (1<<way); // line is valid
861 // if write or read-write, set dirty bit
862 if (memXact.writeXact()) {
863 dirty |= (1<<way);
864 // If debugging chip-kill, set ECC for every write
865 if (debugChipKill_) {
866 N2_L2AddressingFields tmpPaddr;
867 tmpPaddr.setNative(paddr.getNative() &
868 ~(SS_CKMemory::DRAM_LINE_LENGTH - 1));
869 dramUpdateECC(tmpPaddr, false);
870 }
871 }
872
873 diagVD.setVALID(valid);
874 diagVD.setDIRTY(dirty);
875 diagVD.setVDECC(diagVD.calcECC());
876 uint32_t setBankNdx = paddrToSetBankNdx(paddr);
877 L2DiagVdMemAccess_.access(setBankNdx, diagVD, false);
878
879 // fetch line from memory
880 (void)L2ProcessCacheLine(paddr, way, true);
881
882 // update tag with filled line
883 uint32_t diagNdx = paddrToWaySetBankNdx(paddr, way);
884 N2_L2DiagTagMem diagTag;
885
886 diagTag.setTAG(paddr.getTAG());
887 diagTag.setECC((BL_Hamming_22_6_Synd::calc_check_bits(diagTag.getTAG())).get());
888
889 L2DiagTagMemAccess_.access(diagNdx, diagTag, false);
890
891 // advance bank's NRU pointer
892 L2CacheWaysNRUPtr_[bank] = ((way+1) %
893 (1<<N2_L2DiagDataAddressingFields::bitSizeWAY));
894
895 // Truncate to memory cache line alignemnt
896 paddr.setNative(paddr.getNative() & ~(SS_CKMemory::DRAM_LINE_LENGTH - 1));
897 // Check DRAM's ECC; poison L2$ line and primary $ line if uncorrectable
898 bool isUncorrectable = false;
899 SS_Trap::Type trap = dramProcessMemOp(strand, memXact, paddr,isUncorrectable);
900 if(trap != SS_Trap::NO_TRAP)
901 return trap;
902 if(isUncorrectable){
903 poisonL2Line(paddr, way);
904 }
905
906 return SS_Trap::NO_TRAP;
907}
908
909// L2CacheHit() processes an L2$ hit. If the memory transaction
910// is a write, marks the line as dirty.
911//
912// Verifies the cache line's ECC with L2ProcessCacheLine(). If line
913// has an ECC error, ThrowL2DataTrap() is called to cough up the
914// appropriate hairball (i.e. throw the correct ECC trap).
915//
916// If cache line contains NotData, L2CacheHit() returns trap number to
917// throw, else 0
918
919SS_Trap::Type
920N2_MemErrDetector::L2CacheHit(N2_Strand *strand,
921 const MemoryTransaction &memXact,
922 N2_L2DiagVdMemWithECC &diagVD,
923 int hit_way)
924{
925 N2_L2AddressingFields paddr;
926 paddr.setNative(memXact.getPaddr());
927
928 // if write or read-write, set dirty bit
929 if (memXact.writeXact()) {
930 uint32_t dirty = diagVD.getDIRTY();
931 dirty |= (1<<hit_way);
932 diagVD.setDIRTY(dirty);
933 diagVD.setVDECC(diagVD.calcECC());
934
935 L2DiagVdMemAccess_.access(paddrToSetBankNdx(paddr), diagVD, false);
936 /* update the data ecc for the complete cache line*/
937 L2ProcessCacheLine(paddr, hit_way, true);
938 }
939 else{
940 // verify data ECC
941 N2_L2CacheLineError lineError =
942 L2ProcessCacheLine(paddr, hit_way, false);
943 if (lineError.isError()) {
944 return ThrowL2DataTrap(memXact, strand, paddr.getBANK(), lineError);
945 }
946 }
947
948 // Do we need to access DRAM for READ_WRITE to check ECC? We
949 // don't need to actually write to memory here. The
950 // memXact.access() routine, which has called us, handles that.
951 return SS_Trap::NO_TRAP;
952}
953
954
955// L2ProcessCacheLine() either reads a cache line, calculating its ECC
956// or verifies a cache line's ECC.
957//
958// If verifying the ECC for a cache line and an ECC error is found, it
959// returns the ECC syndromes for the first quarterline where an error occurs.
960
961N2_MemErrDetector::N2_L2CacheLineError
962N2_MemErrDetector::L2ProcessCacheLine(N2_L2AddressingFields paddr,
963 uint32_t way,
964 bool isRead)
965{
966 // mask out LSB's to set paddr to the beginning of the cache
967 // quarterline
968 paddr.setNative(paddr.getNative() & ~(N2_L2_CACHE_LINE_SIZE/4-1));
969
970 N2_L2CacheLineError returnError(paddr.getNative());
971
972 for (int i = 0; i < 4; ++i) {
973 N2_L2AddressingFields quarterLinePaddr;
974 quarterLinePaddr.setNative(paddr.getNative() |
975 (i*N2_L2_CACHE_LINE_SIZE/4) % N2_L2_CACHE_LINE_SIZE);
976 N2_L2CacheLineError qLineError =
977 L2ProcessQuarterLine(quarterLinePaddr, way, isRead);
978 if(qLineError.isError()){
979 RAS_OSTR << "L2ProcessCacheLine: qline syndrome 0x" <<
980 hex << qLineError.qLineSyndrome() <<
981 " paddr 0x" << hex << qLineError.errorPaddr() << endl;
982 }
983 if (qLineError.isError() && !returnError.isError()) {
984 returnError = qLineError;
985 }
986 }
987 return returnError;
988}
989
990
991// L2ProcessQuarterLine() either reads a quarter of a cache line,
992// calculating its ECC or verifies a quarter cache line's ECC.
993//
994// If verifying the ECC for a cache line and an ECC error is found, it
995// returns the ECC syndromes for this quarterline.
996
997N2_MemErrDetector::N2_L2CacheLineError
998N2_MemErrDetector::L2ProcessQuarterLine(N2_L2AddressingFields paddr,
999 uint32_t way,
1000 bool isRead)
1001{
1002 if (paddr.getNative() % (N2_L2_CACHE_LINE_SIZE/4)){
1003 fprintf(stderr,"L2ProcessQuarterLine: bad paddr");
1004 exit(-1);
1005 }
1006
1007 int word = 0;
1008 N2_L2CacheLineError l2CacheLineError(paddr.getNative());
1009
1010 // words are 64-bits in N2 land -- at least in PRM Rev 1.1 Tbl 28-43.
1011 // There are two 64-bit "words" in a quarter cache line
1012 for (word = 0; word < (N2_L2_CACHE_LINE_SIZE/4)/sizeof(double); ++word) {
1013 N2_L2DiagDataAddressingFields diagAddr;
1014
1015 diagAddr.setBANK(paddr.getBANK());
1016 diagAddr.setWAY(way);
1017 diagAddr.setWORD(paddr.getWORD() + word);
1018 diagAddr.setSET(paddr.getSET());
1019 diagAddr.setODDEVEN(0);
1020
1021 uint32_t hi_data, lo_data;
1022
1023 if (isRead) {
1024 // get the cache line from memory
1025 uint64_t data = ((SS_CKMemory*)(n2_model->cpu[0]->strand[0]->memory))->peek8u(paddr.getNative());
1026 hi_data = (data >> 32) & 0xffffffff;
1027 lo_data= data & 0xffffffff;
1028 }
1029 N2_L2DiagDataMemWithECC diagData;
1030 uint32_t diagNdx = //
1031 diagAddr.getNative()/N2_L2DiagDataMemWithECC::Stride;
1032
1033 if (isRead) {
1034 diagData.setDATA(lo_data);
1035 diagData.setECC(diagData.calcECC());
1036 // set even half of 64-bit word
1037 L2DiagDataMemAccess_.access(diagNdx, diagData, false);
1038 } else {
1039 L2DiagDataMemAccess_.access(diagNdx, diagData, true);
1040 l2CacheLineError.addQuarterLine(diagData.getSyndrome());
1041 }
1042
1043 diagAddr.setODDEVEN(1);
1044 diagNdx = diagAddr.getNative()/N2_L2DiagDataMemWithECC::Stride;
1045 if (isRead) {
1046 diagData.setDATA(hi_data);
1047 diagData.setECC(diagData.calcECC());
1048 // set odd half of 64-bit word
1049 L2DiagDataMemAccess_.access(diagNdx, diagData, false);
1050 } else {
1051 L2DiagDataMemAccess_.access(diagNdx, diagData, true);
1052 l2CacheLineError.addQuarterLine(diagData.getSyndrome());
1053 }
1054 }
1055 return l2CacheLineError;
1056}
1057
1058
1059// ThrowL2DataTrap() handles the details of setting status registers
1060// and conditionally throwing the right trap.
1061//
1062// Returns trap number to throw if the cache line contains NotData
1063// (i.e. poison).
1064
1065SS_Trap::Type
1066N2_MemErrDetector::ThrowL2DataTrap(const MemoryTransaction &memXact,
1067 N2_Strand *strand,
1068 uint32_t bank,
1069 N2_L2CacheLineError lineError)
1070{
1071 if (lineError.isCorrectable()) {
1072 ThrowL2DataCorrectableTrap(memXact, strand, bank, lineError);
1073 return SS_Trap::NO_TRAP;
1074 } else {
1075 return ThrowL2DataUncorrectableTrap(memXact, strand, bank, lineError);
1076 }
1077}
1078
1079// ThrowL2DataCorrectableTrap() throws the correct disrupting trap
1080// after setting the correct bits in various error status registers.
1081//
1082// This routine is quite meticulous as the memory transaction, the
1083// error conditions, processor state, and, even it might seem, the
1084// phase of the moon, influence the behavior of the trap processing.
1085
1086void
1087N2_MemErrDetector::ThrowL2DataCorrectableTrap(const MemoryTransaction &memXact,
1088 N2_Strand *strand,
1089 uint32_t bank,
1090 N2_L2CacheLineError lineError)
1091{
1092 N2_CererWithBitMux cerer(strand->core.cerer);
1093 bool itablewalk = ((memXact.referenceType() == MemoryTransaction::INSTR) && memXact.tablewalk());
1094 // Handle L2$ data ECC errors during I-tlb or D-tlb tablewalk
1095 if (memXact.tablewalk()) {
1096 // Set in L2_ERROR_STATUS register and set paddr[39:6] for the bad
1097 // quarter line in L2_ERROR_ADDRESS register
1098 setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setLDAC,
1099 true,
1100 strand->core_id(),
1101 lineError.qLineSyndrome(),
1102 lineError.errorPaddr());
1103
1104 // Set error information in DESR
1105 if (getCEEN(bank) && cerer.hwtwl2()) {
1106 N2_Strand *target_strand = (N2_Strand*)n2_model->cpu[0]->strand[getErrorSteer(bank)];
1107 setDESR(target_strand->strand_id(),true, itablewalk?N2_Desr::RE_ITL2C : N2_Desr::RE_DTL2C,0);
1108 // and throw disrupting SW_RECOVERABLE_ERROR trap
1109 if(strand->seter.de())
1110 strand->irq.raise(target_strand,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR);
1111
1112 }
1113 }
1114 // Handle L2$ data ECC errors during instruction fetch
1115 else if (memXact.readXact() &&
1116 memXact.referenceType() == MemoryTransaction::INSTR) {
1117 // Set in L2_ERROR_STATUS register and paddr[39:6] for the bad
1118 // quarter line in L2_ERROR_ADDRESS register
1119 setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setLDAC,
1120 true,
1121 strand->core_id(),
1122 lineError.qLineSyndrome(),
1123 lineError.errorPaddr());
1124
1125 // Do we set error information in DESR and throw a trap?
1126 if (getCEEN(bank) && cerer.checkOneL2Cbit(memXact)) {
1127 N2_Strand *target_strand = (N2_Strand*)n2_model->cpu[0]->strand[getErrorSteer(bank)];
1128 setDESR(target_strand->strand_id(),true,N2_Desr::RE_ICL2C,0);
1129 if(strand->seter.de())
1130 strand->irq.raise(target_strand,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR);
1131 }
1132 }
1133 // Handle L2$ data ECC errors during data read or partial store(TODO)
1134 // This also covers Atomic Hits in Sect 12.9.1.6 because are
1135 // issued as a READ memXact followed by a WRITE memXact, with the
1136 // atomic bit set for both.
1137 else if (memXact.referenceType() == MemoryTransaction::DATA &&
1138 memXact.readXact()) {
1139 // Set in L2_ERROR_STATUS register and paddr[39:6] for the bad
1140 // quarter line in L2_ERROR_ADDRESS register
1141 setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setLDAC,
1142 true,
1143 strand->core_id(),
1144 lineError.qLineSyndrome(),
1145 lineError.errorPaddr());
1146
1147 // If CEEN set in L2_ERROR_ENABLE, throw trap to ERRORSTEER
1148 if (getCEEN(bank) && cerer.checkOneL2Cbit(memXact)) {
1149 N2_Strand *target_strand = (N2_Strand*)n2_model->cpu[0]->strand[getErrorSteer(bank)];
1150
1151 if (memXact.writeXact()) {
1152 // partial store
1153 setDESR(target_strand->strand_id(),false,N2_Desr::CE_L2C,0);
1154 if(strand->seter.dhcce())
1155 strand->irq.raise(target_strand,SS_Interrupt::BIT_HW_CORRECTED_ERROR);
1156 }
1157 else {
1158 // data read
1159 setDESR(target_strand->strand_id(),true,N2_Desr::RE_DCL2C,0);
1160 if(strand->seter.de())
1161 strand->irq.raise(target_strand,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR);
1162 }
1163 }
1164 }
1165 // Writes just set the ECC for quarter line.
1166 // so they can't throw traps.
1167 else if (memXact.referenceType() == MemoryTransaction::DATA &&
1168 memXact.writeXact()) {
1169 return;
1170 }
1171 else {
1172 fprintf(stderr,"N2_MemErrDetector::"
1173 "ThrowL2DataCorrectableTrap(): "
1174 "unknown MemoryTranaction type");
1175 exit(-1);
1176 }
1177}
1178
1179
1180// ThrowL2DataUncorrectableTrap() throws the correct disrupting trap
1181// after setting the correct bits in various error status registers.
1182//
1183// As before, this routine is quite meticulous as each of the memory
1184// transaction, the error conditions, processor state, and, even it
1185// might seem, the phase of the moon, influence the behavior of the
1186// trap processing.
1187//
1188// Returns precise trap number to throw, if needed.
1189
1190SS_Trap::Type
1191N2_MemErrDetector::ThrowL2DataUncorrectableTrap(
1192 const MemoryTransaction &memXact,
1193 N2_Strand *strand,
1194 uint32_t bank,
1195 N2_L2CacheLineError lineError)
1196{
1197 N2_CererWithBitMux cerer(strand->core.cerer);
1198 N2_InstSfsr *isfsr = &(strand->inst_sfsr);
1199 N2_DataSfsr *dsfsr = &(strand->data_sfsr);
1200 N2_DataSfar *dsfar = &(strand->data_sfar);
1201 SS_Trap::Type trapNumber = SS_Trap::NO_TRAP;
1202 bool itablewalk = ((memXact.referenceType() == MemoryTransaction::INSTR) && memXact.tablewalk());
1203
1204 // Handle L2$ data ECC errors during I-tlb or D-tlb tablewalk
1205 if (memXact.tablewalk()) {
1206 // Is this error NotData?
1207 if (!lineError.isNotData()) {
1208 // Set the L2_ERROR_STATUS register and paddr[39:6] for the bad
1209 // quarter line in L2_ERROR_ADDRESS register
1210 setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setLDAU,
1211 false,
1212 strand->core_id(),
1213 lineError.qLineSyndrome(),
1214 lineError.errorPaddr());
1215 }
1216 else {
1217 // Set the L2_NOTDATA_STATUS register
1218 setL2NotdataErrorReg(bank,strand->core_id(),
1219 lineError.errorPaddr());
1220
1221 }
1222 // Set error information in ISFAR or DSFAR and DSFAR
1223 if (getNCEEN(bank) && cerer.hwtwl2()) {
1224 if (!lineError.isNotData()) {
1225 // If PSCCE set, throw trap
1226 if (strand->seter.pscce()) {
1227 if (itablewalk) {
1228 isfsr->error_type(N2_InstSfsr::ITL2U);
1229 return SS_Trap::INSTRUCTION_ACCESS_MMU_ERROR;
1230 } else {
1231 dsfsr->error_type(N2_DataSfsr::DTL2U);
1232 dsfar->error_addr(memXact.getVaddr());
1233 return SS_Trap::DATA_ACCESS_MMU_ERROR;
1234 }
1235 }
1236 }
1237 else {
1238 // report NotData
1239 // If PSCCE set, throw trap
1240
1241 if (strand->seter.pscce()) {
1242 if (itablewalk) {
1243 isfsr->error_type(N2_InstSfsr::ITL2ND);
1244 trapNumber = SS_Trap::INSTRUCTION_ACCESS_MMU_ERROR;
1245 } else {
1246 dsfsr->error_type(N2_DataSfsr::DTL2ND);
1247 trapNumber = SS_Trap::DATA_ACCESS_MMU_ERROR;
1248 }
1249 }
1250 }
1251 }
1252 }
1253 // Handle L2$ data ECC errors during instruction fetch
1254 else if (memXact.readXact() &&
1255 memXact.referenceType() == MemoryTransaction::INSTR) {
1256 bool notData = lineError.isNotData();
1257 bool cererSet = notData ? cerer.icl2nd() : cerer.icl2u();
1258
1259 if (notData) {
1260 // Set the L2_NOTDATA_STATUS register
1261 setL2NotdataErrorReg(bank,
1262 strand->core_id(),
1263 lineError.errorPaddr());
1264 }
1265 else{
1266 // Set in L2_ERROR_STATUS registerand paddr[39:6] for the bad
1267 // quarter line in L2_ERROR_ADDRESS register
1268 setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setLDAU,
1269 false,
1270 strand->core_id(),
1271 lineError.qLineSyndrome(),
1272 lineError.errorPaddr());
1273 }
1274
1275 if (cererSet) {
1276 if (notData) {
1277 if (getNCEEN(bank) &&
1278 strand->seter.pscce()) {
1279 // Set error in ISFSR
1280 isfsr->error_type(N2_InstSfsr::ICL2ND);
1281 trapNumber = SS_Trap::INSTRUCTION_ACCESS_ERROR;
1282 }
1283 }
1284 else {
1285 // If NCEEN and PSCCE set, throw trap
1286 if (getNCEEN(bank) && strand->seter.pscce()) {
1287 // Set error in ISFSR
1288 isfsr->error_type(N2_InstSfsr::ICL2U);
1289 return SS_Trap::INSTRUCTION_ACCESS_ERROR;
1290 }
1291 }
1292 }
1293 }
1294 // Handle L2$ data ECC errors during data read.
1295 //
1296 // This also covers Atomic Hits in Sect 12.9.7.6 because are
1297 // issued as a READ memXact followed by a WRITE memXact, with the
1298 // atomic bit set for both.
1299 // However, the write (following this read) updates memory (hard to stop,
1300 // given the current Riesling implementation, do we need to for RUST?
1301 else if (memXact.readXact() &&
1302 memXact.referenceType() == MemoryTransaction::DATA) {
1303 bool notData = lineError.isNotData();
1304 bool cererSet = notData ? cerer.dcl2u() : cerer.dcl2nd();
1305
1306 if (notData) {
1307 // Set the L2_NOTDATA_STATUS register
1308 setL2NotdataErrorReg(bank,
1309 strand->core_id(),
1310 lineError.errorPaddr());
1311 }
1312 else{
1313 // Set in L2_ERROR_STATUS register and paddr[39:6] for the bad
1314 // quarter line in L2_ERROR_ADDRESS register
1315 setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setLDAU,
1316 false,
1317 strand->core_id(),
1318 lineError.qLineSyndrome(),
1319 lineError.errorPaddr());
1320 }
1321 if (cererSet) {
1322 if (notData) {
1323 // If NCEEN and PSCCE set, throw trap
1324 if (getNCEEN(bank) &&
1325 strand->seter.pscce()) {
1326 // Set error in DSFSR
1327 dsfsr->error_type(N2_DataSfsr::DCL2ND);
1328 trapNumber = SS_Trap::DATA_ACCESS_ERROR;
1329 }
1330 }
1331 else {
1332 // If NCEEN and PSCCE set, throw trap
1333 if (getNCEEN(bank) &&
1334 strand->seter.pscce()) {
1335 // Set error in DSFSR
1336 dsfsr->error_type(N2_DataSfsr::DCL2U);
1337 return SS_Trap::DATA_ACCESS_ERROR;
1338 }
1339 }
1340 }
1341 }
1342 // Handle L2$ partial stores TODO
1343#if 0
1344 else if (memXact.writeXact() &&
1345 memXact.referenceType() == MemoryTransaction::DATA
1346 ) {
1347 // Set in L2_ERROR_STATUS register
1348 // Don't set L2 Error Address reg, per N2 PRM Rev 1.1 12.9.7.7.
1349 setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setLDAU,
1350 false,
1351 strand->core_id(),
1352 lineError.qLineSyndrome(), 0);
1353
1354 // Again, what about bad parity?
1355
1356 // If NCEEN set in L2_ERROR_ENABLE, etc., throw trap
1357 if (getNCEEN(bank) && cerer.dcl2u()) {
1358 // Set error in DESR
1359 // NB: this is dependent on NCEEN and DCL2U, different
1360 // from everywhere else
1361 N2_Strand *target_strand = (N2_Strand*)n2_model->cpu[0]->strand[getErrorSteer(bank)];
1362 setDESR(target_strand->strand_id(),false,N2_Desr::RE_L2U,0);
1363 return SS_Trap::DATA_ACCESS_ERROR;
1364 }
1365 }
1366#endif
1367 // Writes just set the ECC for quarter line.
1368 // so they can't throw traps.
1369 else if (memXact.referenceType() == MemoryTransaction::DATA &&
1370 memXact.writeXact()) {
1371 return SS_Trap::NO_TRAP;
1372 }
1373 else {
1374 fprintf(stderr,"N2_MemErrDetector::"
1375 "ThrowL2DataUncorrectableTrap(): "
1376 "unknown MemoryTranaction type");
1377 exit(-1);
1378 }
1379
1380 return trapNumber;
1381}
1382
1383
1384// poisonL2Line() sets the ECC values for all the words in an L2$ line
1385// to NotData. The line is selected by the bank and index (aka set)
1386// values in 'paddr' and the set's way in 'way'.
1387
1388void
1389N2_MemErrDetector::poisonL2Line(N2_L2AddressingFields paddr, int way)
1390{
1391
1392 for (int word = 0;
1393 word < N2_L2_CACHE_LINE_SIZE/sizeof(double);
1394 ++word) {
1395 N2_L2DiagDataAddressingFields diagAddr;
1396
1397 diagAddr.setBANK(paddr.getBANK());
1398 diagAddr.setWAY(way);
1399 diagAddr.setWORD(paddr.getWORD() + word);
1400 diagAddr.setSET(paddr.getSET());
1401 diagAddr.setODDEVEN(0);
1402
1403 uint32_t diagNdx =
1404 diagAddr.getNative()/N2_L2DiagDataMemWithECC::Stride;
1405 L2DiagDataMemAccess_.
1406 set(diagNdx, N2_L2DiagDataMem::setECC,
1407 N2_L2DiagDataMemWithECC::L2_NOT_DATA);
1408
1409 diagAddr.setODDEVEN(1);
1410 diagNdx = diagAddr.getNative()/N2_L2DiagDataMemWithECC::Stride;
1411 L2DiagDataMemAccess_.
1412 set(diagNdx, N2_L2DiagDataMem::setECC,
1413 N2_L2DiagDataMemWithECC::L2_NOT_DATA);
1414 }
1415}
1416
1417
1418// ThrowL2DataWriteBackTrap() sets the various error status registers
1419// and throws the appropriate disrupting trap to the ERRROSTEER strand
1420// for the bank number found in the original memory transaction that
1421// causes the cache line writeback.
1422
1423void
1424N2_MemErrDetector::ThrowL2DataWriteBackTrap(const MemoryTransaction &memXact,
1425 N2_Strand *strand,
1426 uint32_t bank,
1427 N2_L2CacheLineError lineError)
1428{
1429 // Don't trap on NotData N2 1.1 PRM Sect 12.9.16
1430 if (lineError.isNotData()) {
1431 return;
1432 }
1433
1434 uint32_t strandId = getErrorSteer(bank);
1435
1436 // Set in L2_ERROR_STATUS register and paddr[39:6] for the bad
1437 // quarter line in L2_ERROR_ADDRESS register
1438 ErrorStatusRegBitSetFn bitSetFcn = lineError.isCorrectable() ?
1439 N2_L2ErrorStatusReg::setLDWC : N2_L2ErrorStatusReg::setLDWU;
1440 setL2ErrorStatusReg(bank, bitSetFcn, lineError.isCorrectable(), 0, 0,
1441 lineError.errorPaddr());
1442
1443
1444 // Set error information in DESR
1445 N2_CererWithBitMux cerer(strand->core.cerer);
1446 if (lineError.isCorrectable()) { // is there hope?
1447 if(getCEEN(bank) && cerer.l2c_socc()/*cerer.checkOneL2Cbit(memXact)*/){
1448 setDESR(strandId,false, N2_Desr::CE_L2C,0);
1449 trapToErrorSteer(strand, bank, SS_Interrupt::BIT_HW_CORRECTED_ERROR);
1450 }
1451 }
1452 else { // nope...
1453 if (getNCEEN(bank) && cerer.l2u_socu() ) {
1454 setDESR(strandId,true, N2_Desr::RE_L2U,0);
1455 trapToErrorSteer(strand, bank, SS_Interrupt::BIT_SW_RECOVERABLE_ERROR);
1456 }
1457 }
1458}
1459
1460
1461// This routine implements DRAM RAS error detection and injection.
1462//
1463// Assumption: All errors are detected during critical data load only
1464// and reported prior to linefill. This routine does not model the
1465// scrubbing mechanism. Also does not detect errors that might arise
1466// in the FBDIMM channel.
1467//
1468// Returns true if the Dram read accesses a uncorrectable Chip-Kill error
1469//
1470// NB: This routine is missing the hooks for MBR*ECC and MBR*FBR SOC errors.
1471SS_Trap::Type
1472N2_MemErrDetector::dramProcessMemOp(N2_Strand *strand,
1473 const MemoryTransaction &memXact,
1474 N2_L2AddressingFields paddress,bool &isUncorrectable)
1475{
1476 isUncorrectable = false;
1477
1478 // Extracting necessary information from input
1479 uint64_t paddr = paddress.getNative();
1480
1481 // Verify if PA is 16B Aligned
1482 if ((paddr % SS_CKMemory::DRAM_LINE_LENGTH) != 0) {
1483 fprintf(stderr,"N2_MemErrDetector::dramProcessMemOp(): "
1484 "misaligned address.");
1485 exit(-1);
1486 }
1487
1488 // MCU ID is determined from bits 7:6 of the PA
1489 uint32_t mcuID = bit_shift(paddr, N2_DRAM_PADDR_MCU_SHIFT,
1490 N2_DRAM_PADDR_NR_MCU_LOG2);
1491 uint32_t bank = paddress.getBANK();
1492 N2_Cerer *cerer = &(strand->core.cerer);
1493 SS_CKMemory *ck_memory=((SS_CKMemory*)(n2_model->cpu[0]->strand[0]->memory));
1494
1495 // DRAM Error Detection and Handling
1496 // Detect ECC error
1497 // If the paddr has an entry in the ECC Map
1498 if (ck_memory->ecc_exists(paddr)) {
1499 BL_CKSyndrome ck_syndrome(ck_memory->read_raw_CK_line(paddr), ck_memory->fetch_ecc(paddr));
1500
1501 RAS_OSTR << "N2_MemErrDetector::dramProcessMemOp: " <<
1502 "CK syndrome 0x" << hex << ck_syndrome.getSyndrome() << "\n";
1503
1504 // Error Detection
1505 if (!ck_syndrome.noError()) {
1506 N2_L2ErrorStatusReg L2ErrorStatusReg;
1507 L2ErrorStatusRegAccess_.access(bank, L2ErrorStatusReg, true);
1508 N2_DramErrorStatusMem dramESR;
1509 DramErrorStatusMemAccess_.access(mcuID, dramESR, true);
1510 // Verify if the error is correctable or uncorrectable
1511 if (ck_syndrome.isUncorrectableError()) {
1512 isUncorrectable = true;
1513
1514 // Set DAU, R/W, VCID and MODA information in the
1515 // L2 Cache Error Status Register -> PRM 12.11.1.1
1516 // MODA(Modular Arithmatic) and R/W need not be
1517 // set.
1518 // Set paddr[39:6] in L2_ERROR_ADDRESS register -
1519 // All DRAM error address should be stored in L2
1520 // EAR. DRAM EAR stores address only for Scrub
1521 // errors. -> PRM 12.12.2
1522
1523 setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setDAU,
1524 false, getErrorSteer(bank),
1525 ck_syndrome.getSyndrome(),
1526 paddr);
1527
1528 // Check the ESR for the presence of multiple
1529 // errors (both correctable and uncorrectable)
1530
1531 // Check to see if an uncorrectable error is
1532 // already present If yes dont log details about
1533 // current error. Instead set the MEU bit in the
1534 // ESR
1535 if (dramESR.getDAU() == 1) {
1536 DramErrorStatusMemAccess_.
1537 set(mcuID,
1538 N2_DramErrorStatusMem::setMEU,
1539 1);
1540 }
1541 // Else check to see if a correctable error already exists
1542 else if (dramESR.getDAC() == 1) {
1543 // If yes overwrite the previous error (UE
1544 // has higher precedence over CE)
1545
1546 // Set the DAU bit
1547 DramErrorStatusMemAccess_.
1548 set(mcuID,
1549 N2_DramErrorStatusMem::setDAU,
1550 1);
1551 DramErrorStatusMemAccess_.
1552 set(mcuID,
1553 N2_DramErrorStatusMem::setSYND,
1554 ck_syndrome.getSyndrome());
1555 // Reset the DAC bit
1556 DramErrorStatusMemAccess_.
1557 set(mcuID,
1558 N2_DramErrorStatusMem::setDAC,
1559 0);
1560 DramErrorStatusMemAccess_.
1561 set(mcuID,
1562 N2_DramErrorStatusMem::setMEC,
1563 1);
1564 }
1565 else { // No error stored in Dram ESR
1566
1567 // Set DAU and Syndrome in DRAM ESR
1568 DramErrorStatusMemAccess_.
1569 set(mcuID,
1570 N2_DramErrorStatusMem::setDAU,
1571 1);
1572 DramErrorStatusMemAccess_.
1573 set(mcuID,
1574 N2_DramErrorStatusMem::setSYND,
1575 ck_syndrome.getSyndrome());
1576 }
1577 // if NCEEN bit is set, then signal an L2U error
1578 // to the requesting virtual core and throw a trap
1579 // to ERRORSTEER
1580 if (getNCEEN(bank)) {
1581 SS_Trap::Type trap = DramThrowUncorrectableTrap(strand, memXact, bank);
1582 if(trap != SS_Trap::NO_TRAP)
1583 return trap;
1584 }
1585 }
1586 // Verify if the error is a correctable data bit
1587 // or check bit error
1588 else if (ck_syndrome.isCorrectableDataBitError() ||
1589 ck_syndrome.isCorrectableCheckBitError()) {
1590 // Check the ESR for the presence of multiple
1591 // errors (both correctable and uncorrectable)
1592
1593 // Check to see if any error is already present
1594 if ((L2ErrorStatusReg.getVEU() == 1) ||
1595 (L2ErrorStatusReg.getVEC() == 1)) {
1596
1597 // If yes do not log info about current
1598 // error just set the MEC bit
1599 L2ErrorStatusReg.setMEC(1);
1600 L2ErrorStatusRegAccess_.access(bank,
1601 L2ErrorStatusReg,
1602 false);
1603 }
1604 else {
1605 // If no error is stored in the the Dram ESR,
1606 // then log the information Set DAC, R/W,
1607 // VCID and MODA information in the L2
1608 // Cache Error Status Register -> PRM
1609 // 12.11.1.1 MODA(Modular Arithmatic) and
1610 // R/W need not be set fore RUST.
1611 //
1612 // Set paddr[39:6] in L2_ERROR_ADDRESS
1613 // register - All DRAM error address should be
1614 // stored in L2 EAR. DRAM EAR stores address
1615 // only for Scrub errors. -> PRM 12.12.2
1616 setL2ErrorStatusReg(bank,
1617 N2_L2ErrorStatusReg::setDAC,
1618 true, getErrorSteer(bank),
1619 ck_syndrome.getSyndrome(),
1620 paddr);
1621
1622 // Check to see if any error is already present
1623 if (dramESR.getDAU() || dramESR.getDAC()) {
1624 // If yes do not log info about current
1625 // error just set the MEC bit
1626 DramErrorStatusMemAccess_.
1627 set(mcuID,
1628 N2_DramErrorStatusMem::setMEC,
1629 1);
1630 }
1631 // This is the first error, log the information
1632 else {
1633 // Set DAC and Syndrome in DRAM ESR
1634 DramErrorStatusMemAccess_.
1635 set(mcuID,
1636 N2_DramErrorStatusMem::setDAC,
1637 1);
1638 DramErrorStatusMemAccess_.
1639 set(mcuID,
1640 N2_DramErrorStatusMem::setSYND,
1641 ck_syndrome.getSyndrome());
1642 }
1643 // Add stuff for DRAM Error Counter and DRAM
1644 // Error Location Registers
1645
1646 // if CEEN is set, then signal an L2C error to
1647 // the requesting virtual core and throw a
1648 // trap to ERRORSTEER
1649 if (getCEEN(bank)) {
1650 DramThrowCorrectableTrap(strand, memXact, bank);
1651 }
1652 }
1653 }
1654 }
1655 }
1656
1657 N2_SocErrorReg::SocErrRegBitGetFn getFBR =
1658 N2_SocErrorReg::getSocErrRegMCUFBR(mcuID);
1659
1660 // If SOC FBDIMM error injection is enabled for this mcu
1661 if (socErrorInjectRegAccess_.get(getFBR)) {
1662 setL2ErrorStatusReg(bank,
1663 N2_L2ErrorStatusReg::setDAC,
1664 true, getErrorSteer(bank),
1665 0,
1666 paddr);
1667
1668 // Legal FBR errors are correctable
1669 DramErrorStatusMemAccess_.set(mcuID, N2_DramErrorStatusMem::setFBR);
1670
1671 N2_CererWithBitMux cerer(strand->core.cerer);
1672 if (getCEEN(bank) && cerer.dcl2c()) {
1673 setDESR(getErrorSteer(bank),false, N2_Desr::CE_L2C,0);
1674 trapToErrorSteer(strand,bank,
1675 SS_Interrupt::BIT_HW_CORRECTED_ERROR);
1676 }
1677
1678 // handle SOC FBR errors here
1679 processSocFbdError(*strand, paddress, getFBR);
1680 }
1681
1682 return SS_Trap::NO_TRAP;
1683}
1684
1685// dramUpdateECC() updates the ECC value associated with paddress.
1686//
1687// If dram error injection is enabled, the ECC for the physical
1688// address' Chip-Kill line calculated, xor'ed with the injection mask,
1689// and saved in the dram ECC map. If dram error injection is disabled
1690// (or there is no longer an ECC error), the ECC value will not be
1691// saved in the map (or the value will be deleted).
1692
1693void
1694N2_MemErrDetector::dramUpdateECC(N2_L2AddressingFields paddress,
1695 bool isNotData)
1696{
1697 // Extracting necessary information from input
1698 uint64_t paddr = paddress.get();
1699 SS_CKMemory *ck_memory=((SS_CKMemory*)(n2_model->cpu[0]->strand[0]->memory));
1700
1701 // Verify if PA is 16B Aligned
1702 if ((paddr % SS_CKMemory::DRAM_LINE_LENGTH) != 0) {
1703 fprintf(stderr,"N2_MemErrDetector::dramUpdateECC(): misaligned address.");
1704 exit(-1);
1705 }
1706
1707 // MCU ID is determined from bits 7:6 of the PA
1708 uint32_t mcuID = bit_shift(paddr, N2_DRAM_PADDR_MCU_SHIFT,
1709 N2_DRAM_PADDR_NR_MCU_LOG2);
1710
1711 // DRAM Error Injection
1712 // Case: DRAM Access - STORE (WRITE) (L2 Miss)
1713 if (debugChipKill_ ||
1714 isNotData ||
1715 ck_memory->ecc_exists(paddr) ||
1716 DramErrorInjectMemAccess_.get(mcuID, N2_DramErrorInjectMem::getENB)) {
1717
1718 uint64_t newDramECC;
1719 // If the L2$ line contains NotData, write special syndrome
1720 if (isNotData) {
1721 newDramECC = SS_CKMemory::DRAM_NOT_DATA;
1722 } else {
1723 newDramECC = ck_memory->calculate_dram_ecc(paddr);
1724 if (DramErrorInjectMemAccess_.get(mcuID,
1725 N2_DramErrorInjectMem::getENB)) {
1726 newDramECC ^= DramErrorInjectMemAccess_.
1727 get(mcuID, N2_DramErrorInjectMem::getECCMASK);
1728 }
1729 }
1730
1731 RAS_OSTR << "DRAM Error injected at paddr :0x" << std::hex << paddr << " newDRAMECC:0x" << std::hex << newDramECC << endl;
1732
1733 // Store PA,MASKED ECC in MAP The higher order PA is
1734 // (still) unique enough to be maintained as key The
1735 // value stored in the map will (eventually) be the
1736 // ECC value for 128 bits of data addressed by HO-PA
1737 // and LO-PA
1738 ck_memory->dram_update_ecc(paddr,newDramECC);
1739 // If Single Shot then disable error injection
1740 if (DramErrorInjectMemAccess_.
1741 get(mcuID, N2_DramErrorInjectMem::getSSHOT)) {
1742 DramErrorInjectMemAccess_.
1743 set(mcuID, N2_DramErrorInjectMem::setENB, 0);
1744 }
1745 }
1746}
1747
1748// DramThrowCorrectableTrap() throws an HW_CORRECTED_ERROR trap to the
1749// ERRORSTEER strand for bank.
1750
1751void
1752N2_MemErrDetector::DramThrowCorrectableTrap(N2_Strand *strand,
1753 const MemoryTransaction &memXact,
1754 uint32_t bank)
1755{
1756 N2_CererWithBitMux cerer(strand->core.cerer);
1757 bool itablewalk = ((memXact.referenceType() == MemoryTransaction::INSTR) && memXact.tablewalk());
1758
1759 // If the correct L2C bit is set in the CERER, throw a trap
1760 if (cerer.checkOneL2Cbit(memXact)) {
1761 int errorCode;
1762 if (itablewalk) {
1763 errorCode = N2_Desr::RE_ITL2C;
1764 } else if (memXact.tablewalk() && (memXact.referenceType() == MemoryTransaction::DATA)) {
1765 errorCode = N2_Desr::RE_DTL2C;
1766 } else if (memXact.referenceType() == MemoryTransaction::INSTR) {
1767 errorCode = N2_Desr::RE_ICL2C;
1768 } else if (memXact.referenceType() == MemoryTransaction::DATA) {
1769 errorCode = N2_Desr::RE_DCL2C;
1770 } else {
1771 fprintf(stderr,"N2_MemErrDetector::DramThrowCorrectableTrap: bad "
1772 "memXact ");
1773 exit(-1);
1774 }
1775
1776 // Set error information in DESR
1777 setDESR(getErrorSteer(bank),true, errorCode,0);
1778 trapToErrorSteer(strand,bank,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR);
1779 }
1780}
1781
1782// DramThrowUncorrectableTrap() throws the appropriate trap to the
1783// ERRORSTEER strand for bank, based on the kind of memory transaction.
1784
1785SS_Trap::Type
1786N2_MemErrDetector::DramThrowUncorrectableTrap(N2_Strand *strand,
1787 const MemoryTransaction &memXact,
1788 uint32_t bank)
1789{
1790 bool itablewalk = ((memXact.referenceType() == MemoryTransaction::INSTR) && memXact.tablewalk());
1791 N2_CererWithBitMux cerer(strand->core.cerer);
1792
1793 setDESR(getErrorSteer(bank),false, N2_Desr::RE_L2U,0);
1794 if (strand->seter.pscce()) {
1795 // Hardware Tablewalk
1796 if (memXact.tablewalk() && cerer.hwtwl2()) {
1797 if (itablewalk) {
1798 return SS_Trap::INSTRUCTION_ACCESS_MMU_ERROR;
1799 }
1800 else {
1801 return SS_Trap::DATA_ACCESS_MMU_ERROR;
1802 }
1803 }
1804 // Instruction Fetch
1805 else if (memXact.referenceType() == MemoryTransaction::INSTR &&
1806 cerer.icl2u()) {
1807 N2_InstSfsr *isfsr = &(strand->inst_sfsr);
1808 isfsr->error_type(N2_InstSfsr::ICL2U);
1809 N2_DataSfar *dsfar = &(strand->data_sfar);
1810 dsfar->error_addr(memXact.getVaddr());
1811 return SS_Trap::INSTRUCTION_ACCESS_ERROR;
1812 }
1813 // Data Fetch
1814 else if (memXact.referenceType() == MemoryTransaction::DATA &&
1815 memXact.readXact() &&
1816 cerer.dcl2u()) {
1817 N2_DataSfsr *dsfsr = &(strand->data_sfsr);
1818 dsfsr->error_type(N2_DataSfsr::DCL2U);
1819 return SS_Trap::DATA_ACCESS_ERROR;
1820 }
1821 // Data Store
1822 else if (memXact.referenceType() == MemoryTransaction::DATA &&
1823 memXact.writeXact() &&
1824 cerer.dcl2u()) {
1825
1826 setDESR(getErrorSteer(bank),true, N2_Desr::RE_L2U,0);
1827 trapToErrorSteer(strand,bank,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR);
1828 }
1829 else {
1830 fprintf(stderr,"N2_MemErrDetector::"
1831 "DramThrowUncorrectableTrap(): "
1832 "unknown MemoryTranaction type");
1833 exit(-1);
1834 }
1835 }
1836 return SS_Trap::NO_TRAP;
1837}
1838
1839// processSocFbdError() injects and detects SOC FBR RAS errors.
1840
1841void
1842N2_MemErrDetector::processSocFbdError(N2_Strand &strand,
1843 N2_L2AddressingFields paddress,
1844 N2_SocErrorReg::SocErrRegBitGetFn getFBR)
1845{
1846 uint32_t mcuID = bit_shift(paddress.getNative(),
1847 N2_DRAM_PADDR_MCU_SHIFT,
1848 N2_DRAM_PADDR_NR_MCU_LOG2);
1849
1850 // if the FBD error syndrome register is clear, then the error can
1851 // be logged
1852 if (DramFbdErrorSyndromeRegAccess_.
1853 get(N2_DramFbdErrorSyndromeReg::getVALID) == 0) {
1854 DramFbdErrorSyndromeRegAccess_.
1855 set(N2_DramFbdErrorSyndromeReg::setVALID);
1856
1857 // decode the error type and set the correct bit in the Dram
1858 // Error Syndrome Register
1859 uint64_t errSource = DramFbdInjectedErrSrcRegAccess_.
1860 get(N2_DramFbdInjectedErrSrcReg::getERRORSOURCE);
1861 switch (errSource) {
1862 case N2_DramFbdInjectedErrSrcReg::CRC_ERROR:
1863 DramFbdErrorSyndromeRegAccess_.
1864 set(N2_DramFbdErrorSyndromeReg::setSFPE);
1865 break;
1866 case N2_DramFbdInjectedErrSrcReg::ALERT_FRAME_ERROR:
1867 DramFbdErrorSyndromeRegAccess_.
1868 set(N2_DramFbdErrorSyndromeReg::setAA);
1869 break;
1870 case N2_DramFbdInjectedErrSrcReg::ALERT_ASSERTED:
1871 DramFbdErrorSyndromeRegAccess_.
1872 set(N2_DramFbdErrorSyndromeReg::setAFE);
1873 break;
1874 case N2_DramFbdInjectedErrSrcReg::STATUS_FRAME_PARITY_ERROR:
1875 DramFbdErrorSyndromeRegAccess_.
1876 set(N2_DramFbdErrorSyndromeReg::setC);
1877 break;
1878 default:
1879 fprintf(stderr,"N2_MemErrDetector::processSOCErrors: bad "
1880 "error source: %d", errSource);
1881 exit(-1);
1882 break;
1883 }
1884 // Legal FBR errors are correctable
1885 DramErrorStatusMemAccess_.set(mcuID, N2_DramErrorStatusMem::setFBR);
1886 }
1887
1888/*
1889 // decide whether to throw the trap.
1890 bool throwTrap = false;
1891 N2_DramFbdCountReg dramFbdCountReg;
1892 DramFbdCountRegAccess_.access(mcuID, dramFbdCountReg, true);
1893 // If the COUNTONE bit is set, then always try to throw the trap
1894 if (dramFbdCountReg.getCOUNTONE()) {
1895 throwTrap = true;
1896 }
1897 // Decrement the count in the Dram FBD register, saturating at 0.
1898 // If the register tranisitioned from 1 to 0, try to throw the
1899 // trap.
1900 else {
1901 uint64_t count = dramFbdCountReg.getCOUNT();
1902 if (count != 0 && --count == 0) {
1903 throwTrap = true;
1904 }
1905 dramFbdCountReg.setCOUNT(count);
1906 DramFbdCountRegAccess_.access(mcuID, dramFbdCountReg, false);
1907 }
1908
1909*/
1910
1911 // If error logging in enabled for this MCU
1912 if (socErrorLogEnableRegAccess_.get(getFBR)) {
1913 setL2ErrorStatusReg(paddress.getBANK(),
1914 N2_L2ErrorStatusReg::setDAC,
1915 true,
1916 strand.core_id(),
1917 0, paddress.getNative());
1918
1919 socErrorStatusRegAccess_.set(N2_SocErrorStatusReg::setV);
1920 N2_SocErrorReg::SocErrRegBitSetFn setFBR =
1921 N2_SocErrorReg::setSocErrRegMCUFBR(mcuID);
1922 socErrorStatusRegAccess_.set(setFBR);
1923 // We should try to throw the trap and there is no already
1924 // pending trap, toss it ...
1925 }
1926/*
1927 if (throwTrap &&
1928 socPendingErrStatusRegAccess_.get(N2_SocPendingErrStatusReg::getV)
1929 == 0) {
1930 socErrorStatusRegAccess_.set(N2_SocErrorStatusReg::setV);
1931 N2_SocErrorStatusReg socErrorStatusReg =
1932 socErrorStatusRegAccess_.set(setFBR);
1933 socPendingErrStatusRegAccess_.
1934 setNative(mcuID, socErrorStatusReg.getNative());
1935
1936 // If FBR errors are fatal, die...
1937 if (fatal) {
1938 RIESLING_THROW_RUNTIME_ERROR("N2_MemErrDetector::"
1939 "processSocRFbdError: don't "
1940 "support soft reset errors");
1941 }
1942 // otherwise, direct the HW_CORRECTED_ERROR to the correct strand
1943 else {
1944 uint_t vcID = socErrorSteeringRegAccess_.
1945 get(mcuID, N2_SocErrorSteeringReg::getVCID);
1946 strand.setIntpTaken(vcID, SS_Trap::HW_CORRECTED_ERROR);
1947 }
1948 }
1949*/
1950
1951 N2_CererWithBitMux cerer(strand.core.cerer);
1952 if (cerer.l2c_socc()) {
1953 setDESR(getErrorSteer(paddress.getBANK()),false, N2_Desr::CE_L2C,0);
1954
1955 trapToErrorSteer(&strand,paddress.getBANK(),
1956 SS_Interrupt::BIT_HW_CORRECTED_ERROR);
1957 }
1958}
1959
1960
1961// The access() method for the N2_CheckedL2ESRAccess class verifies
1962// writes to a bank's L2 ESR, as well as processing simple reads.
1963// This method offers some assurance that udpates to a bank's L2 ESR
1964// conform to N2's behavior.
1965//
1966// The L2 ESR records the presence of many different errors, but only
1967// has one field for error specific information. Also, there is no
1968// mechanism to count multiple errors.
1969//
1970// N2 deals with these constraints by providing a multiple correctable
1971// and uncorrectable bit (MEC & MEU). If the L2 ESR has a (un)correctable
1972// error already set, then the MEC (or MEU) bit is set instead of the
1973// bit corresponding to the error.
1974//
1975// This method mimics this behavior by checking whether the current
1976// value of the L2 ESR already contains error state and modifying how
1977// the register is updated if it does. See N2 1.1 PRM Tables 12-23
1978// and 12-24. The present implementation doesn't claim to precisely
1979// match the documented behavior, especially if multiple errors occur
1980// in one cycle.
1981//
1982// Because there is only one error address register per bank, N2 only
1983// allows more severe errors to overwrite this register once it is
1984// set. This method indicates that the error address register should
1985// be overridden by returning true.
1986//
1987// Reads are just passed through to the SS_CsrAccess template's
1988// access() method.
1989
1990bool
1991N2_MemErrDetector::N2_CheckedL2ESRAccess::access(uint64_t ndx,
1992 N2_L2ErrorStatusReg &csr,
1993 bool isRead)
1994{
1995 if (isRead) {
1996 SS_CsrAccess<N2_L2ErrorStatusReg>::access(ndx, csr, true);
1997 return true;
1998 }
1999
2000 bool updateErrorAddress = false;
2001 N2_L2ErrorStatusReg oldCsr;
2002 SS_CsrAccess<N2_L2ErrorStatusReg>::access(ndx, oldCsr, true);
2003
2004 // Check the ESR for the presence of multiple
2005 // errors (both correctable and uncorrectable)
2006 if (oldCsr.isVeryUncorrectable()) {
2007 if (csr.isVeryUncorrectable()) {
2008 csr.setMEU(1);
2009 }
2010 if (csr.isUncorrectable()) {
2011 csr.setMEU(1);
2012 }
2013 if (csr.isCorrectable()) {
2014 csr.setMEC(1);
2015 }
2016 }
2017 else if (oldCsr.isUncorrectable()) {
2018 if (csr.isVeryUncorrectable()) {
2019 csr = oldCsr;
2020 csr.setMEU(1);
2021 updateErrorAddress = true;
2022 }
2023 if (csr.isUncorrectable()) {
2024 csr.setMEU(1);
2025 }
2026 if (csr.isCorrectable()) {
2027 csr.setMEC(1);
2028 }
2029 }
2030 // Else check to see if a correctable error already exists
2031 else if (oldCsr.isCorrectable()) {
2032 if (csr.isVeryUncorrectable()) {
2033 csr.setMEC(1);
2034 updateErrorAddress = true;
2035 }
2036 else if (csr.isUncorrectable()) {
2037 csr.setMEC(1);
2038 updateErrorAddress = true;
2039 }
2040 else if (csr.isCorrectable()) {
2041 csr.setMEC(1);
2042 }
2043 }
2044 else {
2045 updateErrorAddress = true;
2046 }
2047
2048 SS_CsrAccess<N2_L2ErrorStatusReg>::access(ndx, csr, false);
2049
2050 return updateErrorAddress;
2051}
2052
2053
2054// setL2ErrorStatusReg() sets an error bit, the core/strand id, and the
2055// syndrome in the L2$ error status register for the given bank.
2056// The error bit is selected by passing the corresponding member
2057// function in bitSetFunction, e.g. N2_L2ErrorStatusReg::setLDAC.
2058
2059void N2_MemErrDetector::setL2ErrorStatusReg(uint32_t bank,
2060 ErrorStatusRegBitSetFn bitSetFunction,
2061 bool isCorrectable,
2062 uint32_t vcid,
2063 uint32_t syndrome,
2064 uint64_t errorAddress)
2065{
2066 N2_L2ErrorStatusReg L2ErrorStatusReg;
2067 L2ErrorStatusRegAccess_.access(bank, L2ErrorStatusReg, true);
2068 CALL_MEMBER_FN(L2ErrorStatusReg, bitSetFunction)(1);
2069 if (isCorrectable) {
2070 L2ErrorStatusReg.setVEC(1);
2071 } else {
2072 L2ErrorStatusReg.setVEU(1);
2073 }
2074 L2ErrorStatusReg.setVCID(vcid);
2075 L2ErrorStatusReg.setSYND(syndrome);
2076 if (L2ErrorStatusRegAccess_.access(bank, L2ErrorStatusReg, false)) {
2077 // setL2ErrorAddressReg() sets the address field in the L2$ error
2078 // address register for the given bank.
2079 N2_L2ErrorAddressReg L2ErrorAddressReg;
2080 L2ErrorAddressRegAccess_.access(bank, L2ErrorAddressReg, true);
2081 L2ErrorAddressReg.setADDRESS(errorAddress >>
2082 N2_L2ErrorAddressReg::bitSizeRSVD0);
2083 L2ErrorAddressRegAccess_.access(bank, L2ErrorAddressReg, false);
2084 }
2085 L2ErrorStatusRegAccess_.access(bank, L2ErrorStatusReg, true);
2086}
2087
2088
2089// setN2_L2NotdataErrorReg() sets the NDSP bit, the core/strand id, and the
2090// syndrome in the L2$ NotData error status register for the given bank.
2091// The MEND is set if either NDSP or NDDM is already set.
2092void
2093N2_MemErrDetector::setL2NotdataErrorReg(uint32_t bank,
2094 uint32_t vcid,
2095 uint64_t errorAddress)
2096{
2097 N2_L2NotdataErrorReg L2NotdataErrorReg;
2098 L2NotdataErrorRegAccess_.access(bank, L2NotdataErrorReg, true);
2099 if (!L2NotdataErrorReg.getNDSP() && !L2NotdataErrorReg.getNDDM()) {
2100 L2NotdataErrorReg.setVCID(vcid);
2101 L2NotdataErrorReg.setADDRESS(errorAddress >>
2102 N2_L2NotdataErrorReg::bitSizeRSVD0);
2103 L2NotdataErrorRegAccess_.access(bank, L2NotdataErrorReg, false);
2104 }
2105 else {
2106 L2NotdataErrorRegAccess_.set(bank,
2107 N2_L2NotdataErrorReg::setMEND);
2108 }
2109}