// ========== Copyright Header Begin ==========================================
// OpenSPARC T2 Processor File: N2_MemErrDetector.cc
// Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
// The above named program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public
// License version 2 as published by the Free Software Foundation.
// The above named program is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
// You should have received a copy of the GNU General Public
// License along with this work; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
// ========== Copyright Header End ============================================
/************************************************************************
** Copyright (C) 2005, Sun Microsystems, Inc.
** Sun considers its source code as an unpublished, proprietary
** trade secret and it is available only under strict license provisions.
** This copyright notice is placed here only to protect Sun in the event
** the source is deemed a published work. Disassembly, decompilation,
** or other means of reducing the object code to human readable form
** is prohibited by the license agreement under which this code is
** provided to the user or company in possession of this copy.
*************************************************************************/
#include "BL_Hamming_22_6_Synd.h"
#include "BL_Hamming_64_8_Synd.h"
#include "N2_MemErrDetector.h"
// Used to call a pointer to member function
// Localizes the nasty syntax for this language feature
#define CALL_MEMBER_FN(object,ptrToMember) ((object).*(ptrToMember))
* The N2_MemErrDetector class is used to detect injected RAS errors
* associated with the memory hierarchy. In particular, it models and
* detects errors in the primary and secondary caches and DRAM.
SS_Trap::Type
N2_MemErrDetector::detect_fetch_err( MemoryLevel level
, SS_Vaddr pc
, SS_Vaddr npc
, SS_Strand
* s
, SS_Paddr pa
) {
MemoryTransaction mem_xact
;
mem_xact
.setStrand(s
->strand_id());
mem_xact
.access(MemoryTransaction::READ
);
mem_xact
.referenceType(MemoryTransaction::INSTR
);
return detectErr(mem_xact
);
SS_Trap::Type
N2_MemErrDetector::detect_load_err( MemoryLevel level
, SS_Vaddr pc
, SS_Vaddr npc
, SS_Strand
* s
, SS_Instr
* line
, SS_Paddr pa
) {
MemoryTransaction mem_xact
;
mem_xact
.setStrand(s
->strand_id());
mem_xact
.size(line
->len
);
mem_xact
.access(MemoryTransaction::READ
);
mem_xact
.referenceType(MemoryTransaction::DATA
);
return detectErr(mem_xact
);
SS_Trap::Type
N2_MemErrDetector::inject_store_err( MemoryLevel level
, SS_Vaddr pc
, SS_Vaddr npc
, SS_Strand
* s
, SS_Instr
* line
, SS_Paddr pa
, uint64_t data
) {
// Stores coherently update all the primary caches so we call
// SS_Model::ras_flush() to approximate this behavior.
s
->model
->ras_flush(s
, pa
, line
->len
, SS_MemErrDetector::INSTR_CACHE
);
s
->model
->ras_flush(s
, pa
, line
->len
, SS_MemErrDetector::DATA_CACHE
);
MemoryTransaction mem_xact
;
mem_xact
.setStrand(s
->strand_id());
mem_xact
.size(line
->len
);
mem_xact
.access(MemoryTransaction::WRITE
);
mem_xact
.referenceType(MemoryTransaction::DATA
);
return detectErr(mem_xact
);
//Injecting errors into registers in the Tick Compare array
BL_EccBits
N2_MemErrDetector::n2_tick_cmpr_err_injector(SS_Strand
* s
, uint64_t data
)
N2_Strand
* n2
= (N2_Strand
*)s
;
N2_Core
& n2_core
= n2
->core
;
// The INTDIS bit (bit 63) has to be flipped before sending the value
// for ecc calculation - 28.11 - N2 PRM rev 1.1
BL_EccBits ecc_obj
= BL_Hamming_64_8_Synd::calc_check_bits((1ULL<<63)^data
);
// Check if ENB and TCCU bits are set in N2 Error Injection Register
if ((n2_core
.error_inject
.ene() == 1) && (n2_core
.error_inject
.tccu() == 1))
ecc
^= n2_core
.error_inject
.eccmask();
// Set back the corrputed ecc
// (ASR) Reads to any reg in the Tick Compare Array (TCA) triggers this routine.
// This routine scans for the presence of precise single bit or multi bit errors
// and records the error information in DSFAR and throws an precise
// internal_processor_error trap
// Correctable errors are detected only if CERER.TCCP bit is set
// Uncorrectable errors are detected only if CERER.TCUP bit is set
// Errors are recorded only if the PSCCE bit is set in the SETER
// The syndrome is stored in bits 2 thru 9 of DSFAR
// The tick compare array index is stored in bits 0 and 1 of DSFAR
// 00 - Tick Cmpr 01 - Stick Cmpr 10 - Hstick Cmpr
SS_Trap::Type
N2_MemErrDetector::n2_tick_cmpr_precise_err_detector(SS_Strand
* s
, N2_TickAccess::TickAccessIndex array_index
)
N2_Strand
* n2
= (N2_Strand
*)s
;
N2_Core
& n2_core
= n2
->core
;
bool update_dsfar
= false;
if (array_index
== N2_TickAccess::TICK_CMPR_INDX
)
else if (array_index
== N2_TickAccess::STICK_CMPR_INDX
)
else if (array_index
== N2_TickAccess::HSTICK_CMPR_INDX
)
BL_EccBits ecc_obj
= n2
->tick_cmpr_array_ecc
[array_index
];
BL_Hamming_64_8_Synd syndrome
= BL_Hamming_64_8_Synd(val
,ecc_obj
);
if (n2_core
.cerer
.tccp())
if (syndrome
.isSingleBitError())
n2
->data_sfsr
.error_type(N2_DataSfsr::TCCP
);
else if (n2_core
.cerer
.tcup())
if (syndrome
.isDoubleBitError() || syndrome
.isMultipleBitError())
n2
->data_sfsr
.error_type(N2_DataSfsr::TCUP
);
error_add
= BL_BitUtility::set_subfield(error_add
,syndrome
.getSyndrome(),2,9);
error_add
= BL_BitUtility::set_subfield(error_add
,array_index
,0,1);
n2
->data_sfar
.error_addr(error_add
);
return SS_Trap::INTERNAL_PROCESSOR_ERROR
;
// This routine checks for the presence of disrupting errors (for all the regs
// in the Tick Compare Array). If there is an error,the information is recorded
// in the DESR and a 'sw_recoverable_error' is thrown. Correctable errors are
// detected only if CERER.TCCD bit is set.Uncorrectable errors are detected
// only if CERER.TCUD bit is set.Errors are recorded only if the DE bit is set
// in the SETER.The syndrome is stored in bits 2 thru 9 of DESR.The tick compare
// array index is stored in bits 0 and 1 of DESR.
// 00 - Tick Cmpr 01 - Stick Cmpr 10 - Hstick Cmpr
bool N2_MemErrDetector::n2_tick_cmpr_disrupting_err_detector(SS_Strand
* s
)
bool update_desr
= false;
N2_Strand
* n2
= (N2_Strand
*)s
;
N2_Core
& n2_core
= n2
->core
;
// Check for errors in all the three tick_cmpr registers
for (uint64_t array_index
= 0; array_index
< N2_TickAccess::TICK_ACCESS_MAX
; array_index
++)
if (array_index
== N2_TickAccess::TICK_CMPR_INDX
)
else if (array_index
== N2_TickAccess::STICK_CMPR_INDX
)
else if (array_index
== N2_TickAccess::HSTICK_CMPR_INDX
)
BL_EccBits ecc_obj
= n2
->tick_cmpr_array_ecc
[array_index
];
BL_Hamming_64_8_Synd syndrome
= BL_Hamming_64_8_Synd(val
,ecc_obj
);
if (n2_core
.cerer
.tccd())
if (syndrome
.isSingleBitError())
error_type
= N2_Desr::RE_TCCD
;
else if (n2_core
.cerer
.tcud())
if (syndrome
.isDoubleBitError() || syndrome
.isMultipleBitError())
error_type
= N2_Desr::RE_TCUD
;
// If the DESR already has a pending sw_recoverable_error, the details
// about the current error is not recorded. The presence of muliple
// errors is denoted by setting the 'me' bit in the DESR
// If the DESR already has a pending hw_corrected_error, the details
// about the previous error is flushed out and the details about the
// current sw_recoverable_error is recorded. 'sw_recoverable' errors
// have higher precedence than hw_corrected errors. The presence of
// muliple errors is denoted by setting the 'me' bit in the DESR
n2
->desr
.errtype(error_type
);
error_add
= BL_BitUtility::set_subfield(error_add
,syndrome
.getSyndrome(),2,9);
error_add
= BL_BitUtility::set_subfield(error_add
,array_index
,0,1);
n2
->desr
.erraddr(error_add
);
n2
->irq
.raise(n2
,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR
);
// Routine to flush L2 cache.
// Checks that the "key" in the pa is correct and then calls
// L2CacheFlush() to flush the correct lines.
void N2_MemErrDetector::prefetchICE(SS_Paddr pa
)
N2_L2CacheFlushAddrFields l2CacheFlushAddr
;
l2CacheFlushAddr
.set_data(pa
);
if (l2CacheFlushAddr
.checkKey())
L2CacheFlush(l2CacheFlushAddr
);
fprintf(stderr
, "prefetchICE: bad key: %x\n", l2CacheFlushAddr
.getKEY());
SS_Trap::Type
N2_MemErrDetector::n2_step_hook(SS_Strand
* s
)
N2_Strand
*strand
= (N2_Strand
*)s
;
return strand
->flush_store_buffer();
// ras_flush() flushes part of a strand's I$ or D$. Which cache is
// selected by the "type" argument and the range of the cache to
// invalidate is selected by "pa" and "size".
void N2_MemErrDetector::ras_flush( SS_Strand
*_s
, SS_Strand
* requesting_strand
,
SS_Paddr pa
, uint64_t size
,
if ((_s
->strand_id() % N2_Model::NO_STRANDS_PER_CORE
) != 0)
if (type
== SS_MemErrDetector::DATA_CACHE
&&
requesting_strand
!= NULL
&&
(_s
->strand_id() / N2_Model::NO_STRANDS_PER_CORE
) ==
(requesting_strand
->strand_id() / N2_Model::NO_STRANDS_PER_CORE
))
N2_Strand
* s
= (N2_Strand
*)_s
;
const uint_t line_size
= 1 << (N2_IcacheAddressingFields::WIDTH_RSVD0
+
N2_IcacheAddressingFields::WIDTH_INSTR
);
SS_Paddr start_pa
= round_down_to_power_of_two(pa
, line_size
);
SS_Paddr end_pa
= round_down_to_power_of_two(pa
+ size
, line_size
);
// Clear all I$ lines matching the (pa, pa_size) address range.
while (start_pa
<= end_pa
)
if (type
== SS_MemErrDetector::DATA_CACHE
)
s
->core
.flush_dcache(start_pa
);
s
->core
.flush_icache(start_pa
);
// The central memory hierarchy error detector.
// Detects I- and D-cache, L2 cache, Dram, and SOC FBDIMM RAS errors
// produced by a MemoryTransaction. Models the I- and D-cache, the
// L2$ and their associated error detection mechanisms. Also, detects
// RAS errors produced by Dram and the FBDIMM channels.
// If a trap is detected, detectErr() either throws a BasicTrap with
// the correct trap number for precise traps or directs the trap to
// the correct strand with setIntpTrap(). detectErr() checks various
// control registers before throwing traps or changing state. If a
// trap is thrown detectErr() sets error information the correct
// Conventions: if multiple traps are generated by the same
// instruction, no more than one is guaranteed to be thrown. Also,
// error state may not be updated correctly for multiple errors
// produced on different cycles. The multiple error bits will be set
// correctly, but the detailed error information will not necessary
// match the hardware's prioritization of information capture. The
// error information will be consistent for one of the errors, but
// detectErr() pick the wrong error's information to save.
// detectErr() ignores i/o space accesses. For memory accesses,
// detectErr() dispatches the memory request to either the I-cache or
// the D-cache RAS routines in N2_Core. These routines check for
// primary cache errors and cache misses. If a there is a miss in a
// primary cache, it calls the N2_MemErrDetector::L2CacheFill() method
// to model loading the L2$ and detecting any errors at that level.
// If the L2$ doesn't contain the line, it invokes ??? to model loading the
// line from Dram and detecting any Chip-Kill or FBDIMM errors.
SS_Trap::Type
N2_MemErrDetector::detectErr(const MemoryTransaction
&memXact
)
// skip accesses in i/o space or uncorrected data access for Chip-Kill
//if (memXact.paddr() >= 0x8000000000 || memXact.noDramErrorCorrect())
N2_Strand
*strand
= (N2_Strand
*)n2_model
->cpu
[0]->strand
[memXact
.getStrand()];
MemoryTransaction::RefT refT
= memXact
.getReferenceType();
N2_Core
*core
= &strand
->core
;
if (refT
== MemoryTransaction::INSTR
)
// check icache RAS errors
return core
->icache_ifetch(memXact
,
(!strand
->hpstate
.hpriv() ||
else if (refT
== MemoryTransaction::DATA
)
// If the memory transaction is a read, check the store buffer for
// pending stores that alias the read's address.
SS_Trap::Type tt
= strand
->check_store_buffer_RAWtrap(memXact
);
if(tt
!= SS_Trap::NO_TRAP
)
return core
->dcache_trans(memXact
,
(!strand
->hpstate
.hpriv() ||
// L2 Cache Line Fill routine
// Given a MemoryTransaction, L2CacheFill() loads the corresponding
// L2$ line. First, it checks all the tags in the line's way set,
// then the line's VuaD entry for ECC errors. Then, it looks for a
// tag match with the valid bit set. If the appropriate cache line is
// present in the cache, it checks its ECC and, if the memory
// transaction is a store, marks the line dirty. Otherwise,
// L2CacheFill() picks a line to cast out of the cache, checks this
// line's ECC, and then loads the new line into the cache, calculating
// Note that any ECC error will throw the appropriate trap.
// Returns trap number to throw if NotData is present in the cache.
SS_Trap::Type
N2_MemErrDetector::L2CacheFill(const MemoryTransaction
&memXact
)
N2_Strand
*strand
= (N2_Strand
*)n2_model
->cpu
[0]->strand
[memXact
.getStrand()];
N2_L2AddressingFields paddr
;
paddr
.set(memXact
.getPaddr());
L2FixTagsAndTrap(strand
, paddr
, memXact
);
N2_L2DiagVdMemWithECC diagVD
= L2FixVUADAndTrap(strand
, paddr
, memXact
);
int hit_way
= L2FindWay(paddr
, diagVD
);
// update L2 cache tag, VauD, and data
return L2CacheMiss(strand
, memXact
, diagVD
);
// hit. hit_way contains matching way
return L2CacheHit(strand
, memXact
, diagVD
, hit_way
);
// L2 Cache Line Flush routine
// Given a way in the L$2 selected by diagAddr, L2CacheFlush() flushes
// the corresponding L2$ line, writing it back to memory if it's
// dirty. First, it checks all the tags in the line's way set, then
// the line's VuaD entry for ECC errors. It corrects these errors
// without trapping. Then it clears the valid and dirty VuaD bits for
// the cache associated with the address.
// Note that the "real" L2$ must write the line back to memory;
// however in this L2$ implementation, the correct data has already
// been written to memory, so invalidation is all that's needed.
void N2_MemErrDetector::L2CacheFlush(N2_L2CacheFlushAddrFields diagAddr
)
// create a physical address that matches the way and bank of diagAddr
N2_L2AddressingFields paddr
;
paddr
.setSET(diagAddr
.getSET());
paddr
.setBANK(diagAddr
.getBANK());
uint_t way
= diagAddr
.getWAY();
uint32_t diagNdx
= paddrToWaySetBankNdx(paddr
, way
);
L2DiagTagMemAccess_
.access(diagNdx
, diagTag
, true);
paddr
.setTAG(diagTag
.getTAG());
N2_L2DiagVdMemWithECC diagVD
;
L2FixVUAD(paddr
, diagVD
);
// if the way is valid, invalidate it by clearing the way's dirty
// and valid bits in VuaD
if (diagVD
.getVALID() & (1<<way
)) {
// do we need to flush the cache line?
if (diagVD
.getDIRTY() & (1<<way
)) {
N2_L2AddressingFields ckPaddr
;
ckPaddr
.setNative(paddr
.getNative() & ~(SS_CKMemory::DRAM_LINE_LENGTH
- 1));
N2_L2CacheLineError
lineError(ckPaddr
.getNative());
for (i
= 0; i
< SS_CKMemory::DRAM_LINE_LENGTH
/N2_L2_CACHE_LINE_SIZE
; ++i
) {
lineError
= L2ProcessCacheLine(ckPaddr
, way
, false);
if (lineError
.isUncorrectable()) {
ckPaddr
.setNative(ckPaddr
.getNative() + N2_L2_CACHE_LINE_SIZE
);
ckPaddr
.setNative(ckPaddr
.getNative() & ~(SS_CKMemory::DRAM_LINE_LENGTH
- 1));
dramUpdateECC(ckPaddr
, lineError
.isUncorrectable());
diagVD
.setVALID(diagVD
.getVALID() & ~(1<<way
));
diagVD
.setDIRTY(diagVD
.getDIRTY() & ~(1<<way
));
diagVD
.setVDECC(diagVD
.calcECC());
L2DiagVdMemAccess_
.access(paddrToSetBankNdx(paddr
), diagVD
, false);
// Now flush the primary caches for all the Cores
paddr
.setWORD(0); // align L2$ address to beginning of L2$ line
N2_IcacheAddressingFields icacheAddr
;
icacheAddr
.set(paddr
.getNative());
// Clear all decode cache lines matching the (pa, pa_size) address range.
assert(N2_L2_CACHE_LINE_SIZE
== SS_InstrCache::LINE_SIZE
*4);
for (uint_t cpu_ndx
= 0; cpu_ndx
< N2_Model::NO_CPUS
;++cpu_ndx
)
for (uint_t strand_ndx
= 0; strand_ndx
< N2_Model::NO_STRANDS_PER_CPU
;
n2_model
->cpu
[cpu_ndx
]->strand
[strand_ndx
]->flush(paddr
.getNative(),true);
for(int i
=0;i
< N2_L2_CACHE_LINE_SIZE
/N2_IcacheAddressingFields::N2_ICACHE_LINE_SIZE
;i
++){
n2_model
->ras_flush(NULL
, icacheAddr(), 8,
SS_MemErrDetector::INSTR_CACHE
);
icacheAddr
.sets(icacheAddr
.sets() + 1);
N2_DcacheAddressingFields dcacheAddr
;
dcacheAddr
.set(paddr
.getNative());
for(int i
=0;i
< N2_L2_CACHE_LINE_SIZE
/N2_DcacheAddressingFields::N2_DCACHE_LINE_SIZE
;i
++){
n2_model
->ras_flush(NULL
, dcacheAddr(), 8,
SS_MemErrDetector::DATA_CACHE
);
dcacheAddr
.sets(dcacheAddr
.sets() + 1);
// L2FixTags() checks for RAS errors in all the L2$ tags that match
// the address in paddr. If there are any single-bit errors, it
// corrects them without throwing a trap.
void N2_MemErrDetector::L2FixTags(N2_L2AddressingFields paddr
)
// Check all ways for tag ECC error
for (int way
= 0; way
< (1<<N2_L2DiagDataAddressingFields::bitSizeWAY
);
uint32_t diagNdx
= paddrToWaySetBankNdx(paddr
, way
);
// L2 Cache Line Check Tags routine
// Given a MemoryTransaction, L2FixTagsAndTrap() checks all the tags
// that match the address in paddr. If there are any single-bit
// errors, it corrects and throws the appropriate trap.
void N2_MemErrDetector::L2FixTagsAndTrap(N2_Strand
*strand
,
N2_L2AddressingFields paddr
,
const MemoryTransaction
&memXact
)
// Check all ways for tag ECC error
for (int way
= 0;way
< (1<<N2_L2DiagDataAddressingFields::bitSizeWAY
);way
++)
uint32_t diagNdx
= paddrToWaySetBankNdx(paddr
, way
);
// Set LTC in L2_ERROR_STATUS register and
// paddr[39:6] in L2_ERROR_ADDRESS register
setL2ErrorStatusReg(paddr
.getBANK(), N2_L2ErrorStatusReg::setLTC
,
true, 0, 0, paddr
.getNative());
// Set error information in DESR
N2_CererWithBitMux
cerer(strand
->core
.cerer
);
// The N2 PRM Rev 1.1 is vague. Sect 12.9.5 refers to the
// L2C bit in the CERER, which doesn't exist. We
// interpret this to mean the family of L2C bits in Table
// 12-4 and select any of them.
if (getCEEN(paddr
.getBANK()) && cerer
.checkOneL2Cbit(memXact
))
uint32_t strandId
= getErrorSteer(paddr
.getBANK());
setDESR(strandId
,false, N2_Desr::CE_L2C
,0);
// Throw trap to ERRORSTEER
trapToErrorSteer(strand
, paddr
.getBANK(),
SS_Interrupt::BIT_HW_CORRECTED_ERROR
);
// L2FixTag() checks and fixes any L2$ tag RAS error at diagnostic
// access index "diagNdx". It returns true if there is an tag error.
bool N2_MemErrDetector::L2FixTag(uint32_t diagNdx
)
L2DiagTagMemAccess_
.access(diagNdx
, diagTag
, true);
BL_Hamming_22_6_Synd
tagSyndrome(diagTag
.getTAG(),
if (!tagSyndrome
.noError())
if (tagSyndrome
.isDataBitError()) {
uint32_t dataBit
= tagSyndrome
.getDataBit();
diagTag
.setTAG(diagTag
.getTAG() ^ (1<<dataBit
));
RAS_OSTR
<< "L2CacheFill: correcting data bit " <<
} else if (tagSyndrome
.isCheckBitError())
uint32_t checkBit
= tagSyndrome
.getCheckBit();
diagTag
.setECC(diagTag
.getECC() ^ (1<<checkBit
));
RAS_OSTR
<< "L2CacheFill: correcting check bit " <<
fprintf(stderr
,"L2CheckTags: double bit tag error");
L2DiagTagMemAccess_
.access(diagNdx
, diagTag
, false);
// L2FixVUAD() checks and fixes a RAS error for the VuaD bits associated
// with the physical address, "paddr". It returns the value of VUAD
// diagnostic register (with ECC).
N2_MemErrDetector::N2_L2VaudSyndrome
N2_MemErrDetector::L2FixVUAD(N2_L2AddressingFields paddr
,
N2_L2DiagVdMemWithECC
&diagVD
)
uint32_t setBankNdx
= paddrToSetBankNdx(paddr
);
// Get UA bits for this set
N2_L2DiagUaMemWithECC diagUA
;
L2DiagUaMemAccess_
.access(setBankNdx
, diagUA
, true);
// Get VD bits for this set
L2DiagVdMemAccess_
.access(setBankNdx
, diagVD
, true);
BL_Hamming_32_7_Synd vdSyndrome
= diagVD
.getSyndrome();
BL_Hamming_32_7_Synd uaSyndrome
= diagUA
.getSyndrome();
N2_L2VaudSyndrome
vuadSyndrome(vdSyndrome
.getSyndrome(),uaSyndrome
.getSyndrome()) ;
if (!vdSyndrome
.noError()) {
RAS_OSTR
<< "L2FixVUAD: bad VD ECC expected 0x" <<
hex
<< diagVD
.getVDECC() <<
" got 0x" << hex
<< vdSyndrome
.getSyndrome() << endl
;
if (vdSyndrome
.isDataBitError()) {
uint32_t dataBit
= vdSyndrome
.getDataBit();
diagVD
.setVD(diagVD
.getVD() ^ (1<<dataBit
));
} else if (vdSyndrome
.isCheckBitError()) {
uint32_t checkBit
= vdSyndrome
.getCheckBit();
diagVD
.setVDECC(diagVD
.getVDECC() ^ (1<<checkBit
));
fprintf(stderr
,"L2FixVUAD: double bit "
L2DiagVdMemAccess_
.access(setBankNdx
, diagVD
, false);
if (!uaSyndrome
.noError()) {
RAS_OSTR
<< "L2FixVUAD: bad UA ECC expected 0x" <<
hex
<< diagUA
.getUAECC() <<
" got 0x" << hex
<< uaSyndrome
.getSyndrome() << endl
;
if (uaSyndrome
.isDataBitError()) {
uint32_t dataBit
= uaSyndrome
.getDataBit();
diagUA
.setUA(diagUA
.getUA() ^ (1<<dataBit
));
} else if (uaSyndrome
.isCheckBitError()) {
uint32_t checkBit
= uaSyndrome
.getCheckBit();
diagUA
.setUAECC(diagUA
.getUAECC() ^ (1<<checkBit
));
fprintf(stderr
,"L2FixVUAD: double bit "
L2DiagUaMemAccess_
.access(setBankNdx
, diagUA
, false);
// L2FixVUADAndTrap() corrects an error in the VuaD bits for a physical
// address and throws any appropriate trap.
N2_MemErrDetector::N2_L2DiagVdMemWithECC
N2_MemErrDetector::L2FixVUADAndTrap(N2_Strand
*strand
,
N2_L2AddressingFields paddr
,
const MemoryTransaction
&memXact
)
N2_L2DiagVdMemWithECC diagVD
;
N2_L2VaudSyndrome vuad_syndrome
= L2FixVUAD(paddr
, diagVD
);
uint16_t vuadSyndrome
= ((vuad_syndrome
.vdSyndrome_
.getSyndrome() << 0x7) | vuad_syndrome
.uaSyndrome_
.getSyndrome());
// Set LVC in L2_ERROR_STATUS register
// See N2 PRM Rev 1.1 Tbl 12-22
// Set paddr[39:6] in L2_ERROR_ADDRESS register
setL2ErrorStatusReg(paddr
.getBANK(),
N2_L2ErrorStatusReg::setLVC
,
getErrorSteer(paddr
.getBANK()),
N2_CererWithBitMux
cerer(strand
->core
.cerer
);
if (getCEEN(paddr
.getBANK()) && cerer
.checkOneL2Cbit(memXact
)) {
// Set error information in DESR
uint32_t strandId
= getErrorSteer(paddr
.getBANK());
setDESR(strandId
,false, N2_Desr::CE_L2C
,0);
// Throw trap to ERRORSTEER
trapToErrorSteer(strand
, paddr
.getBANK(),
SS_Interrupt::BIT_HW_CORRECTED_ERROR
);
// L2FindWay() searches the L2$ to see if any cache lines match the
// passed address. L2FindWay() also checks the valid bits in cache
// set's VuaD information to make sure the line is valid.
N2_MemErrDetector::L2FindWay(N2_L2AddressingFields paddr
,
N2_L2DiagVdMemWithECC diagVD
)
int hit_way
= NO_WAY
; // assume miss
for (int way
= 0; way
< (1<<N2_L2DiagDataAddressingFields::bitSizeWAY
);
uint32_t diagNdx
= paddrToWaySetBankNdx(paddr
, way
);
L2DiagTagMemAccess_
.access(diagNdx
, diagTag
, true);
if (diagTag
.getTAG() == paddr
.getTAG()) {
if (diagVD
.getVALID() & (1<<way
)) {
// L2CacheMiss() processes an L2$ miss. Selects a way to invalidate
// (or victimize) using the per bank Not Recently Used pointer,
// L2CacheWaysNRUPtr[]. If the way's cache line is dirty, update the
// Chip-Kill ECC for line flushed to memory. If the memory transaction
// is a write, mark the line as dirty.
// The L2$ cache line's data is read from memory (using Chip-Kill
// correction) and its data ECC is calcuated and saved.
// Finally, dramProcessMemOp() is called to detect Chip-Kill errors.
// If the memory Chip-Kill line is poisoned (NotData), then the current
// L2$ line is poisoned as well.
// If the miss detects poison, L2CacheMiss() returns a trap number,
N2_MemErrDetector::L2CacheMiss(N2_Strand
*strand
,
const MemoryTransaction
&memXact
,
N2_L2DiagVdMemWithECC
&diagVD
)
N2_L2AddressingFields paddr
;
paddr
.setNative(memXact
.getPaddr());
uint32_t bank
= paddr
.getBANK();
int way
= L2CacheWaysNRUPtr_
[bank
];
// flush line if dirty, checking data ECC -- and clear dirty bit
// If ECC error, throw trap to ERRORSTEER.
uint32_t valid
= diagVD
.getVALID();
uint32_t dirty
= diagVD
.getDIRTY();
// Update DRAM's ECC for this cache line
// Note that the flushed line has a different physical address
// than the original memory transaction.
N2_L2AddressingFields writeBackPaddr
;
writeBackPaddr
.setBANK(bank
);
writeBackPaddr
.setSET(paddr
.getSET());
uint64_t writeBackDiagNdx
=
paddrToWaySetBankNdx(writeBackPaddr
, way
);
N2_L2DiagTagMem writeBackDiagTag
;
L2DiagTagMemAccess_
.access(writeBackDiagNdx
, writeBackDiagTag
,
writeBackPaddr
.setTAG(writeBackDiagTag
.getTAG());
// flush line if dirty, checking data ECC -- and clear dirty bit
// If ECC error, throw trap to ERRORSTEER.
N2_L2CacheLineError lineError
= L2ProcessCacheLine(writeBackPaddr
, way
, false);
// Throw a trap on all bad ECC
if (lineError
.isError() && !lineError
.isNotData()) {
ThrowL2DataWriteBackTrap(memXact
, strand
, bank
, lineError
);
// Truncate to memory cache line alignemnt
writeBackPaddr
.setNative(writeBackPaddr
.getNative() &
~(SS_CKMemory::DRAM_LINE_LENGTH
- 1));
dramUpdateECC(writeBackPaddr
, lineError
.isNotData());
valid
|= (1<<way
); // line is valid
// if write or read-write, set dirty bit
if (memXact
.writeXact()) {
// If debugging chip-kill, set ECC for every write
N2_L2AddressingFields tmpPaddr
;
tmpPaddr
.setNative(paddr
.getNative() &
~(SS_CKMemory::DRAM_LINE_LENGTH
- 1));
dramUpdateECC(tmpPaddr
, false);
diagVD
.setVDECC(diagVD
.calcECC());
uint32_t setBankNdx
= paddrToSetBankNdx(paddr
);
L2DiagVdMemAccess_
.access(setBankNdx
, diagVD
, false);
// fetch line from memory
(void)L2ProcessCacheLine(paddr
, way
, true);
// update tag with filled line
uint32_t diagNdx
= paddrToWaySetBankNdx(paddr
, way
);
diagTag
.setTAG(paddr
.getTAG());
diagTag
.setECC((BL_Hamming_22_6_Synd::calc_check_bits(diagTag
.getTAG())).get());
L2DiagTagMemAccess_
.access(diagNdx
, diagTag
, false);
// advance bank's NRU pointer
L2CacheWaysNRUPtr_
[bank
] = ((way
+1) %
(1<<N2_L2DiagDataAddressingFields::bitSizeWAY
));
// Truncate to memory cache line alignemnt
paddr
.setNative(paddr
.getNative() & ~(SS_CKMemory::DRAM_LINE_LENGTH
- 1));
// Check DRAM's ECC; poison L2$ line and primary $ line if uncorrectable
bool isUncorrectable
= false;
SS_Trap::Type trap
= dramProcessMemOp(strand
, memXact
, paddr
,isUncorrectable
);
if(trap
!= SS_Trap::NO_TRAP
)
poisonL2Line(paddr
, way
);
// L2CacheHit() processes an L2$ hit. If the memory transaction
// is a write, marks the line as dirty.
// Verifies the cache line's ECC with L2ProcessCacheLine(). If line
// has an ECC error, ThrowL2DataTrap() is called to cough up the
// appropriate hairball (i.e. throw the correct ECC trap).
// If cache line contains NotData, L2CacheHit() returns trap number to
N2_MemErrDetector::L2CacheHit(N2_Strand
*strand
,
const MemoryTransaction
&memXact
,
N2_L2DiagVdMemWithECC
&diagVD
,
N2_L2AddressingFields paddr
;
paddr
.setNative(memXact
.getPaddr());
// if write or read-write, set dirty bit
if (memXact
.writeXact()) {
uint32_t dirty
= diagVD
.getDIRTY();
diagVD
.setVDECC(diagVD
.calcECC());
L2DiagVdMemAccess_
.access(paddrToSetBankNdx(paddr
), diagVD
, false);
/* update the data ecc for the complete cache line*/
L2ProcessCacheLine(paddr
, hit_way
, true);
N2_L2CacheLineError lineError
=
L2ProcessCacheLine(paddr
, hit_way
, false);
if (lineError
.isError()) {
return ThrowL2DataTrap(memXact
, strand
, paddr
.getBANK(), lineError
);
// Do we need to access DRAM for READ_WRITE to check ECC? We
// don't need to actually write to memory here. The
// memXact.access() routine, which has called us, handles that.
// L2ProcessCacheLine() either reads a cache line, calculating its ECC
// or verifies a cache line's ECC.
// If verifying the ECC for a cache line and an ECC error is found, it
// returns the ECC syndromes for the first quarterline where an error occurs.
N2_MemErrDetector::N2_L2CacheLineError
N2_MemErrDetector::L2ProcessCacheLine(N2_L2AddressingFields paddr
,
// mask out LSB's to set paddr to the beginning of the cache
paddr
.setNative(paddr
.getNative() & ~(N2_L2_CACHE_LINE_SIZE
/4-1));
N2_L2CacheLineError
returnError(paddr
.getNative());
for (int i
= 0; i
< 4; ++i
) {
N2_L2AddressingFields quarterLinePaddr
;
quarterLinePaddr
.setNative(paddr
.getNative() |
(i
*N2_L2_CACHE_LINE_SIZE
/4) % N2_L2_CACHE_LINE_SIZE
);
N2_L2CacheLineError qLineError
=
L2ProcessQuarterLine(quarterLinePaddr
, way
, isRead
);
if(qLineError
.isError()){
RAS_OSTR
<< "L2ProcessCacheLine: qline syndrome 0x" <<
hex
<< qLineError
.qLineSyndrome() <<
" paddr 0x" << hex
<< qLineError
.errorPaddr() << endl
;
if (qLineError
.isError() && !returnError
.isError()) {
returnError
= qLineError
;
// L2ProcessQuarterLine() either reads a quarter of a cache line,
// calculating its ECC or verifies a quarter cache line's ECC.
// If verifying the ECC for a cache line and an ECC error is found, it
// returns the ECC syndromes for this quarterline.
N2_MemErrDetector::N2_L2CacheLineError
N2_MemErrDetector::L2ProcessQuarterLine(N2_L2AddressingFields paddr
,
if (paddr
.getNative() % (N2_L2_CACHE_LINE_SIZE
/4)){
fprintf(stderr
,"L2ProcessQuarterLine: bad paddr");
N2_L2CacheLineError
l2CacheLineError(paddr
.getNative());
// words are 64-bits in N2 land -- at least in PRM Rev 1.1 Tbl 28-43.
// There are two 64-bit "words" in a quarter cache line
for (word
= 0; word
< (N2_L2_CACHE_LINE_SIZE
/4)/sizeof(double); ++word
) {
N2_L2DiagDataAddressingFields diagAddr
;
diagAddr
.setBANK(paddr
.getBANK());
diagAddr
.setWORD(paddr
.getWORD() + word
);
diagAddr
.setSET(paddr
.getSET());
uint32_t hi_data
, lo_data
;
// get the cache line from memory
uint64_t data
= ((SS_CKMemory
*)(n2_model
->cpu
[0]->strand
[0]->memory
))->peek8u(paddr
.getNative());
hi_data
= (data
>> 32) & 0xffffffff;
lo_data
= data
& 0xffffffff;
N2_L2DiagDataMemWithECC diagData
;
diagAddr
.getNative()/N2_L2DiagDataMemWithECC::Stride
;
diagData
.setDATA(lo_data
);
diagData
.setECC(diagData
.calcECC());
// set even half of 64-bit word
L2DiagDataMemAccess_
.access(diagNdx
, diagData
, false);
L2DiagDataMemAccess_
.access(diagNdx
, diagData
, true);
l2CacheLineError
.addQuarterLine(diagData
.getSyndrome());
diagNdx
= diagAddr
.getNative()/N2_L2DiagDataMemWithECC::Stride
;
diagData
.setDATA(hi_data
);
diagData
.setECC(diagData
.calcECC());
// set odd half of 64-bit word
L2DiagDataMemAccess_
.access(diagNdx
, diagData
, false);
L2DiagDataMemAccess_
.access(diagNdx
, diagData
, true);
l2CacheLineError
.addQuarterLine(diagData
.getSyndrome());
// ThrowL2DataTrap() handles the details of setting status registers
// and conditionally throwing the right trap.
// Returns trap number to throw if the cache line contains NotData
N2_MemErrDetector::ThrowL2DataTrap(const MemoryTransaction
&memXact
,
N2_L2CacheLineError lineError
)
if (lineError
.isCorrectable()) {
ThrowL2DataCorrectableTrap(memXact
, strand
, bank
, lineError
);
return ThrowL2DataUncorrectableTrap(memXact
, strand
, bank
, lineError
);
// ThrowL2DataCorrectableTrap() throws the correct disrupting trap
// after setting the correct bits in various error status registers.
// This routine is quite meticulous as the memory transaction, the
// error conditions, processor state, and, even it might seem, the
// phase of the moon, influence the behavior of the trap processing.
N2_MemErrDetector::ThrowL2DataCorrectableTrap(const MemoryTransaction
&memXact
,
N2_L2CacheLineError lineError
)
N2_CererWithBitMux
cerer(strand
->core
.cerer
);
bool itablewalk
= ((memXact
.referenceType() == MemoryTransaction::INSTR
) && memXact
.tablewalk());
// Handle L2$ data ECC errors during I-tlb or D-tlb tablewalk
if (memXact
.tablewalk()) {
// Set in L2_ERROR_STATUS register and set paddr[39:6] for the bad
// quarter line in L2_ERROR_ADDRESS register
setL2ErrorStatusReg(bank
, N2_L2ErrorStatusReg::setLDAC
,
lineError
.qLineSyndrome(),
// Set error information in DESR
if (getCEEN(bank
) && cerer
.hwtwl2()) {
N2_Strand
*target_strand
= (N2_Strand
*)n2_model
->cpu
[0]->strand
[getErrorSteer(bank
)];
setDESR(target_strand
->strand_id(),true, itablewalk
?N2_Desr::RE_ITL2C
: N2_Desr::RE_DTL2C
,0);
// and throw disrupting SW_RECOVERABLE_ERROR trap
strand
->irq
.raise(target_strand
,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR
);
// Handle L2$ data ECC errors during instruction fetch
else if (memXact
.readXact() &&
memXact
.referenceType() == MemoryTransaction::INSTR
) {
// Set in L2_ERROR_STATUS register and paddr[39:6] for the bad
// quarter line in L2_ERROR_ADDRESS register
setL2ErrorStatusReg(bank
, N2_L2ErrorStatusReg::setLDAC
,
lineError
.qLineSyndrome(),
// Do we set error information in DESR and throw a trap?
if (getCEEN(bank
) && cerer
.checkOneL2Cbit(memXact
)) {
N2_Strand
*target_strand
= (N2_Strand
*)n2_model
->cpu
[0]->strand
[getErrorSteer(bank
)];
setDESR(target_strand
->strand_id(),true,N2_Desr::RE_ICL2C
,0);
strand
->irq
.raise(target_strand
,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR
);
// Handle L2$ data ECC errors during data read or partial store(TODO)
// This also covers Atomic Hits in Sect 12.9.1.6 because are
// issued as a READ memXact followed by a WRITE memXact, with the
// atomic bit set for both.
else if (memXact
.referenceType() == MemoryTransaction::DATA
&&
// Set in L2_ERROR_STATUS register and paddr[39:6] for the bad
// quarter line in L2_ERROR_ADDRESS register
setL2ErrorStatusReg(bank
, N2_L2ErrorStatusReg::setLDAC
,
lineError
.qLineSyndrome(),
// If CEEN set in L2_ERROR_ENABLE, throw trap to ERRORSTEER
if (getCEEN(bank
) && cerer
.checkOneL2Cbit(memXact
)) {
N2_Strand
*target_strand
= (N2_Strand
*)n2_model
->cpu
[0]->strand
[getErrorSteer(bank
)];
if (memXact
.writeXact()) {
setDESR(target_strand
->strand_id(),false,N2_Desr::CE_L2C
,0);
if(strand
->seter
.dhcce())
strand
->irq
.raise(target_strand
,SS_Interrupt::BIT_HW_CORRECTED_ERROR
);
setDESR(target_strand
->strand_id(),true,N2_Desr::RE_DCL2C
,0);
strand
->irq
.raise(target_strand
,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR
);
// Writes just set the ECC for quarter line.
// so they can't throw traps.
else if (memXact
.referenceType() == MemoryTransaction::DATA
&&
fprintf(stderr
,"N2_MemErrDetector::"
"ThrowL2DataCorrectableTrap(): "
"unknown MemoryTranaction type");
// ThrowL2DataUncorrectableTrap() throws the correct disrupting trap
// after setting the correct bits in various error status registers.
// As before, this routine is quite meticulous as each of the memory
// transaction, the error conditions, processor state, and, even it
// might seem, the phase of the moon, influence the behavior of the
// Returns precise trap number to throw, if needed.
N2_MemErrDetector::ThrowL2DataUncorrectableTrap(
const MemoryTransaction
&memXact
,
N2_L2CacheLineError lineError
)
N2_CererWithBitMux
cerer(strand
->core
.cerer
);
N2_InstSfsr
*isfsr
= &(strand
->inst_sfsr
);
N2_DataSfsr
*dsfsr
= &(strand
->data_sfsr
);
N2_DataSfar
*dsfar
= &(strand
->data_sfar
);
SS_Trap::Type trapNumber
= SS_Trap::NO_TRAP
;
bool itablewalk
= ((memXact
.referenceType() == MemoryTransaction::INSTR
) && memXact
.tablewalk());
// Handle L2$ data ECC errors during I-tlb or D-tlb tablewalk
if (memXact
.tablewalk()) {
// Is this error NotData?
if (!lineError
.isNotData()) {
// Set the L2_ERROR_STATUS register and paddr[39:6] for the bad
// quarter line in L2_ERROR_ADDRESS register
setL2ErrorStatusReg(bank
, N2_L2ErrorStatusReg::setLDAU
,
lineError
.qLineSyndrome(),
// Set the L2_NOTDATA_STATUS register
setL2NotdataErrorReg(bank
,strand
->core_id(),
// Set error information in ISFAR or DSFAR and DSFAR
if (getNCEEN(bank
) && cerer
.hwtwl2()) {
if (!lineError
.isNotData()) {
// If PSCCE set, throw trap
if (strand
->seter
.pscce()) {
isfsr
->error_type(N2_InstSfsr::ITL2U
);
return SS_Trap::INSTRUCTION_ACCESS_MMU_ERROR
;
dsfsr
->error_type(N2_DataSfsr::DTL2U
);
dsfar
->error_addr(memXact
.getVaddr());
return SS_Trap::DATA_ACCESS_MMU_ERROR
;
// If PSCCE set, throw trap
if (strand
->seter
.pscce()) {
isfsr
->error_type(N2_InstSfsr::ITL2ND
);
trapNumber
= SS_Trap::INSTRUCTION_ACCESS_MMU_ERROR
;
dsfsr
->error_type(N2_DataSfsr::DTL2ND
);
trapNumber
= SS_Trap::DATA_ACCESS_MMU_ERROR
;
// Handle L2$ data ECC errors during instruction fetch
else if (memXact
.readXact() &&
memXact
.referenceType() == MemoryTransaction::INSTR
) {
bool notData
= lineError
.isNotData();
bool cererSet
= notData
? cerer
.icl2nd() : cerer
.icl2u();
// Set the L2_NOTDATA_STATUS register
setL2NotdataErrorReg(bank
,
// Set in L2_ERROR_STATUS registerand paddr[39:6] for the bad
// quarter line in L2_ERROR_ADDRESS register
setL2ErrorStatusReg(bank
, N2_L2ErrorStatusReg::setLDAU
,
lineError
.qLineSyndrome(),
isfsr
->error_type(N2_InstSfsr::ICL2ND
);
trapNumber
= SS_Trap::INSTRUCTION_ACCESS_ERROR
;
// If NCEEN and PSCCE set, throw trap
if (getNCEEN(bank
) && strand
->seter
.pscce()) {
isfsr
->error_type(N2_InstSfsr::ICL2U
);
return SS_Trap::INSTRUCTION_ACCESS_ERROR
;
// Handle L2$ data ECC errors during data read.
// This also covers Atomic Hits in Sect 12.9.7.6 because are
// issued as a READ memXact followed by a WRITE memXact, with the
// atomic bit set for both.
// However, the write (following this read) updates memory (hard to stop,
// given the current Riesling implementation, do we need to for RUST?
else if (memXact
.readXact() &&
memXact
.referenceType() == MemoryTransaction::DATA
) {
bool notData
= lineError
.isNotData();
bool cererSet
= notData
? cerer
.dcl2u() : cerer
.dcl2nd();
// Set the L2_NOTDATA_STATUS register
setL2NotdataErrorReg(bank
,
// Set in L2_ERROR_STATUS register and paddr[39:6] for the bad
// quarter line in L2_ERROR_ADDRESS register
setL2ErrorStatusReg(bank
, N2_L2ErrorStatusReg::setLDAU
,
lineError
.qLineSyndrome(),
// If NCEEN and PSCCE set, throw trap
dsfsr
->error_type(N2_DataSfsr::DCL2ND
);
trapNumber
= SS_Trap::DATA_ACCESS_ERROR
;
// If NCEEN and PSCCE set, throw trap
dsfsr
->error_type(N2_DataSfsr::DCL2U
);
return SS_Trap::DATA_ACCESS_ERROR
;
// Handle L2$ partial stores TODO
else if (memXact
.writeXact() &&
memXact
.referenceType() == MemoryTransaction::DATA
// Set in L2_ERROR_STATUS register
// Don't set L2 Error Address reg, per N2 PRM Rev 1.1 12.9.7.7.
setL2ErrorStatusReg(bank
, N2_L2ErrorStatusReg::setLDAU
,
lineError
.qLineSyndrome(), 0);
// Again, what about bad parity?
// If NCEEN set in L2_ERROR_ENABLE, etc., throw trap
if (getNCEEN(bank
) && cerer
.dcl2u()) {
// NB: this is dependent on NCEEN and DCL2U, different
N2_Strand
*target_strand
= (N2_Strand
*)n2_model
->cpu
[0]->strand
[getErrorSteer(bank
)];
setDESR(target_strand
->strand_id(),false,N2_Desr::RE_L2U
,0);
return SS_Trap::DATA_ACCESS_ERROR
;
// Writes just set the ECC for quarter line.
// so they can't throw traps.
else if (memXact
.referenceType() == MemoryTransaction::DATA
&&
fprintf(stderr
,"N2_MemErrDetector::"
"ThrowL2DataUncorrectableTrap(): "
"unknown MemoryTranaction type");
// poisonL2Line() sets the ECC values for all the words in an L2$ line
// to NotData. The line is selected by the bank and index (aka set)
// values in 'paddr' and the set's way in 'way'.
N2_MemErrDetector::poisonL2Line(N2_L2AddressingFields paddr
, int way
)
word
< N2_L2_CACHE_LINE_SIZE
/sizeof(double);
N2_L2DiagDataAddressingFields diagAddr
;
diagAddr
.setBANK(paddr
.getBANK());
diagAddr
.setWORD(paddr
.getWORD() + word
);
diagAddr
.setSET(paddr
.getSET());
diagAddr
.getNative()/N2_L2DiagDataMemWithECC::Stride
;
set(diagNdx
, N2_L2DiagDataMem::setECC
,
N2_L2DiagDataMemWithECC::L2_NOT_DATA
);
diagNdx
= diagAddr
.getNative()/N2_L2DiagDataMemWithECC::Stride
;
set(diagNdx
, N2_L2DiagDataMem::setECC
,
N2_L2DiagDataMemWithECC::L2_NOT_DATA
);
// ThrowL2DataWriteBackTrap() sets the various error status registers
// and throws the appropriate disrupting trap to the ERRROSTEER strand
// for the bank number found in the original memory transaction that
// causes the cache line writeback.
N2_MemErrDetector::ThrowL2DataWriteBackTrap(const MemoryTransaction
&memXact
,
N2_L2CacheLineError lineError
)
// Don't trap on NotData N2 1.1 PRM Sect 12.9.16
if (lineError
.isNotData()) {
uint32_t strandId
= getErrorSteer(bank
);
// Set in L2_ERROR_STATUS register and paddr[39:6] for the bad
// quarter line in L2_ERROR_ADDRESS register
ErrorStatusRegBitSetFn bitSetFcn
= lineError
.isCorrectable() ?
N2_L2ErrorStatusReg::setLDWC
: N2_L2ErrorStatusReg::setLDWU
;
setL2ErrorStatusReg(bank
, bitSetFcn
, lineError
.isCorrectable(), 0, 0,
// Set error information in DESR
N2_CererWithBitMux
cerer(strand
->core
.cerer
);
if (lineError
.isCorrectable()) { // is there hope?
if(getCEEN(bank
) && cerer
.l2c_socc()/*cerer.checkOneL2Cbit(memXact)*/){
setDESR(strandId
,false, N2_Desr::CE_L2C
,0);
trapToErrorSteer(strand
, bank
, SS_Interrupt::BIT_HW_CORRECTED_ERROR
);
if (getNCEEN(bank
) && cerer
.l2u_socu() ) {
setDESR(strandId
,true, N2_Desr::RE_L2U
,0);
trapToErrorSteer(strand
, bank
, SS_Interrupt::BIT_SW_RECOVERABLE_ERROR
);
// This routine implements DRAM RAS error detection and injection.
// Assumption: All errors are detected during critical data load only
// and reported prior to linefill. This routine does not model the
// scrubbing mechanism. Also does not detect errors that might arise
// in the FBDIMM channel.
// Returns true if the Dram read accesses a uncorrectable Chip-Kill error
// NB: This routine is missing the hooks for MBR*ECC and MBR*FBR SOC errors.
N2_MemErrDetector::dramProcessMemOp(N2_Strand
*strand
,
const MemoryTransaction
&memXact
,
N2_L2AddressingFields paddress
,bool &isUncorrectable
)
// Extracting necessary information from input
uint64_t paddr
= paddress
.getNative();
// Verify if PA is 16B Aligned
if ((paddr
% SS_CKMemory::DRAM_LINE_LENGTH
) != 0) {
fprintf(stderr
,"N2_MemErrDetector::dramProcessMemOp(): "
// MCU ID is determined from bits 7:6 of the PA
uint32_t mcuID
= bit_shift(paddr
, N2_DRAM_PADDR_MCU_SHIFT
,
N2_DRAM_PADDR_NR_MCU_LOG2
);
uint32_t bank
= paddress
.getBANK();
N2_Cerer
*cerer
= &(strand
->core
.cerer
);
SS_CKMemory
*ck_memory
=((SS_CKMemory
*)(n2_model
->cpu
[0]->strand
[0]->memory
));
// DRAM Error Detection and Handling
// If the paddr has an entry in the ECC Map
if (ck_memory
->ecc_exists(paddr
)) {
BL_CKSyndrome
ck_syndrome(ck_memory
->read_raw_CK_line(paddr
), ck_memory
->fetch_ecc(paddr
));
RAS_OSTR
<< "N2_MemErrDetector::dramProcessMemOp: " <<
"CK syndrome 0x" << hex
<< ck_syndrome
.getSyndrome() << "\n";
if (!ck_syndrome
.noError()) {
N2_L2ErrorStatusReg L2ErrorStatusReg
;
L2ErrorStatusRegAccess_
.access(bank
, L2ErrorStatusReg
, true);
N2_DramErrorStatusMem dramESR
;
DramErrorStatusMemAccess_
.access(mcuID
, dramESR
, true);
// Verify if the error is correctable or uncorrectable
if (ck_syndrome
.isUncorrectableError()) {
// Set DAU, R/W, VCID and MODA information in the
// L2 Cache Error Status Register -> PRM 12.11.1.1
// MODA(Modular Arithmatic) and R/W need not be
// Set paddr[39:6] in L2_ERROR_ADDRESS register -
// All DRAM error address should be stored in L2
// EAR. DRAM EAR stores address only for Scrub
// errors. -> PRM 12.12.2
setL2ErrorStatusReg(bank
, N2_L2ErrorStatusReg::setDAU
,
false, getErrorSteer(bank
),
ck_syndrome
.getSyndrome(),
// Check the ESR for the presence of multiple
// errors (both correctable and uncorrectable)
// Check to see if an uncorrectable error is
// already present If yes dont log details about
// current error. Instead set the MEU bit in the
if (dramESR
.getDAU() == 1) {
DramErrorStatusMemAccess_
.
N2_DramErrorStatusMem::setMEU
,
// Else check to see if a correctable error already exists
else if (dramESR
.getDAC() == 1) {
// If yes overwrite the previous error (UE
// has higher precedence over CE)
DramErrorStatusMemAccess_
.
N2_DramErrorStatusMem::setDAU
,
DramErrorStatusMemAccess_
.
N2_DramErrorStatusMem::setSYND
,
ck_syndrome
.getSyndrome());
DramErrorStatusMemAccess_
.
N2_DramErrorStatusMem::setDAC
,
DramErrorStatusMemAccess_
.
N2_DramErrorStatusMem::setMEC
,
else { // No error stored in Dram ESR
// Set DAU and Syndrome in DRAM ESR
DramErrorStatusMemAccess_
.
N2_DramErrorStatusMem::setDAU
,
DramErrorStatusMemAccess_
.
N2_DramErrorStatusMem::setSYND
,
ck_syndrome
.getSyndrome());
// if NCEEN bit is set, then signal an L2U error
// to the requesting virtual core and throw a trap
SS_Trap::Type trap
= DramThrowUncorrectableTrap(strand
, memXact
, bank
);
if(trap
!= SS_Trap::NO_TRAP
)
// Verify if the error is a correctable data bit
else if (ck_syndrome
.isCorrectableDataBitError() ||
ck_syndrome
.isCorrectableCheckBitError()) {
// Check the ESR for the presence of multiple
// errors (both correctable and uncorrectable)
// Check to see if any error is already present
if ((L2ErrorStatusReg
.getVEU() == 1) ||
(L2ErrorStatusReg
.getVEC() == 1)) {
// If yes do not log info about current
// error just set the MEC bit
L2ErrorStatusReg
.setMEC(1);
L2ErrorStatusRegAccess_
.access(bank
,
// If no error is stored in the the Dram ESR,
// then log the information Set DAC, R/W,
// VCID and MODA information in the L2
// Cache Error Status Register -> PRM
// 12.11.1.1 MODA(Modular Arithmatic) and
// R/W need not be set fore RUST.
// Set paddr[39:6] in L2_ERROR_ADDRESS
// register - All DRAM error address should be
// stored in L2 EAR. DRAM EAR stores address
// only for Scrub errors. -> PRM 12.12.2
setL2ErrorStatusReg(bank
,
N2_L2ErrorStatusReg::setDAC
,
true, getErrorSteer(bank
),
ck_syndrome
.getSyndrome(),
// Check to see if any error is already present
if (dramESR
.getDAU() || dramESR
.getDAC()) {
// If yes do not log info about current
// error just set the MEC bit
DramErrorStatusMemAccess_
.
N2_DramErrorStatusMem::setMEC
,
// This is the first error, log the information
// Set DAC and Syndrome in DRAM ESR
DramErrorStatusMemAccess_
.
N2_DramErrorStatusMem::setDAC
,
DramErrorStatusMemAccess_
.
N2_DramErrorStatusMem::setSYND
,
ck_syndrome
.getSyndrome());
// Add stuff for DRAM Error Counter and DRAM
// Error Location Registers
// if CEEN is set, then signal an L2C error to
// the requesting virtual core and throw a
DramThrowCorrectableTrap(strand
, memXact
, bank
);
N2_SocErrorReg::SocErrRegBitGetFn getFBR
=
N2_SocErrorReg::getSocErrRegMCUFBR(mcuID
);
// If SOC FBDIMM error injection is enabled for this mcu
if (socErrorInjectRegAccess_
.get(getFBR
)) {
setL2ErrorStatusReg(bank
,
N2_L2ErrorStatusReg::setDAC
,
true, getErrorSteer(bank
),
// Legal FBR errors are correctable
DramErrorStatusMemAccess_
.set(mcuID
, N2_DramErrorStatusMem::setFBR
);
N2_CererWithBitMux
cerer(strand
->core
.cerer
);
if (getCEEN(bank
) && cerer
.dcl2c()) {
setDESR(getErrorSteer(bank
),false, N2_Desr::CE_L2C
,0);
trapToErrorSteer(strand
,bank
,
SS_Interrupt::BIT_HW_CORRECTED_ERROR
);
// handle SOC FBR errors here
processSocFbdError(*strand
, paddress
, getFBR
);
// dramUpdateECC() updates the ECC value associated with paddress.
// If dram error injection is enabled, the ECC for the physical
// address' Chip-Kill line calculated, xor'ed with the injection mask,
// and saved in the dram ECC map. If dram error injection is disabled
// (or there is no longer an ECC error), the ECC value will not be
// saved in the map (or the value will be deleted).
N2_MemErrDetector::dramUpdateECC(N2_L2AddressingFields paddress
,
// Extracting necessary information from input
uint64_t paddr
= paddress
.get();
SS_CKMemory
*ck_memory
=((SS_CKMemory
*)(n2_model
->cpu
[0]->strand
[0]->memory
));
// Verify if PA is 16B Aligned
if ((paddr
% SS_CKMemory::DRAM_LINE_LENGTH
) != 0) {
fprintf(stderr
,"N2_MemErrDetector::dramUpdateECC(): misaligned address.");
// MCU ID is determined from bits 7:6 of the PA
uint32_t mcuID
= bit_shift(paddr
, N2_DRAM_PADDR_MCU_SHIFT
,
N2_DRAM_PADDR_NR_MCU_LOG2
);
// Case: DRAM Access - STORE (WRITE) (L2 Miss)
ck_memory
->ecc_exists(paddr
) ||
DramErrorInjectMemAccess_
.get(mcuID
, N2_DramErrorInjectMem::getENB
)) {
// If the L2$ line contains NotData, write special syndrome
newDramECC
= SS_CKMemory::DRAM_NOT_DATA
;
newDramECC
= ck_memory
->calculate_dram_ecc(paddr
);
if (DramErrorInjectMemAccess_
.get(mcuID
,
N2_DramErrorInjectMem::getENB
)) {
newDramECC
^= DramErrorInjectMemAccess_
.
get(mcuID
, N2_DramErrorInjectMem::getECCMASK
);
RAS_OSTR
<< "DRAM Error injected at paddr :0x" << std::hex
<< paddr
<< " newDRAMECC:0x" << std::hex
<< newDramECC
<< endl
;
// Store PA,MASKED ECC in MAP The higher order PA is
// (still) unique enough to be maintained as key The
// value stored in the map will (eventually) be the
// ECC value for 128 bits of data addressed by HO-PA
ck_memory
->dram_update_ecc(paddr
,newDramECC
);
// If Single Shot then disable error injection
if (DramErrorInjectMemAccess_
.
get(mcuID
, N2_DramErrorInjectMem::getSSHOT
)) {
DramErrorInjectMemAccess_
.
set(mcuID
, N2_DramErrorInjectMem::setENB
, 0);
// DramThrowCorrectableTrap() throws an HW_CORRECTED_ERROR trap to the
// ERRORSTEER strand for bank.
N2_MemErrDetector::DramThrowCorrectableTrap(N2_Strand
*strand
,
const MemoryTransaction
&memXact
,
N2_CererWithBitMux
cerer(strand
->core
.cerer
);
bool itablewalk
= ((memXact
.referenceType() == MemoryTransaction::INSTR
) && memXact
.tablewalk());
// If the correct L2C bit is set in the CERER, throw a trap
if (cerer
.checkOneL2Cbit(memXact
)) {
errorCode
= N2_Desr::RE_ITL2C
;
} else if (memXact
.tablewalk() && (memXact
.referenceType() == MemoryTransaction::DATA
)) {
errorCode
= N2_Desr::RE_DTL2C
;
} else if (memXact
.referenceType() == MemoryTransaction::INSTR
) {
errorCode
= N2_Desr::RE_ICL2C
;
} else if (memXact
.referenceType() == MemoryTransaction::DATA
) {
errorCode
= N2_Desr::RE_DCL2C
;
fprintf(stderr
,"N2_MemErrDetector::DramThrowCorrectableTrap: bad "
// Set error information in DESR
setDESR(getErrorSteer(bank
),true, errorCode
,0);
trapToErrorSteer(strand
,bank
,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR
);
// DramThrowUncorrectableTrap() throws the appropriate trap to the
// ERRORSTEER strand for bank, based on the kind of memory transaction.
N2_MemErrDetector::DramThrowUncorrectableTrap(N2_Strand
*strand
,
const MemoryTransaction
&memXact
,
bool itablewalk
= ((memXact
.referenceType() == MemoryTransaction::INSTR
) && memXact
.tablewalk());
N2_CererWithBitMux
cerer(strand
->core
.cerer
);
setDESR(getErrorSteer(bank
),false, N2_Desr::RE_L2U
,0);
if (strand
->seter
.pscce()) {
if (memXact
.tablewalk() && cerer
.hwtwl2()) {
return SS_Trap::INSTRUCTION_ACCESS_MMU_ERROR
;
return SS_Trap::DATA_ACCESS_MMU_ERROR
;
else if (memXact
.referenceType() == MemoryTransaction::INSTR
&&
N2_InstSfsr
*isfsr
= &(strand
->inst_sfsr
);
isfsr
->error_type(N2_InstSfsr::ICL2U
);
N2_DataSfar
*dsfar
= &(strand
->data_sfar
);
dsfar
->error_addr(memXact
.getVaddr());
return SS_Trap::INSTRUCTION_ACCESS_ERROR
;
else if (memXact
.referenceType() == MemoryTransaction::DATA
&&
N2_DataSfsr
*dsfsr
= &(strand
->data_sfsr
);
dsfsr
->error_type(N2_DataSfsr::DCL2U
);
return SS_Trap::DATA_ACCESS_ERROR
;
else if (memXact
.referenceType() == MemoryTransaction::DATA
&&
setDESR(getErrorSteer(bank
),true, N2_Desr::RE_L2U
,0);
trapToErrorSteer(strand
,bank
,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR
);
fprintf(stderr
,"N2_MemErrDetector::"
"DramThrowUncorrectableTrap(): "
"unknown MemoryTranaction type");
// processSocFbdError() injects and detects SOC FBR RAS errors.
N2_MemErrDetector::processSocFbdError(N2_Strand
&strand
,
N2_L2AddressingFields paddress
,
N2_SocErrorReg::SocErrRegBitGetFn getFBR
)
uint32_t mcuID
= bit_shift(paddress
.getNative(),
N2_DRAM_PADDR_NR_MCU_LOG2
);
// if the FBD error syndrome register is clear, then the error can
if (DramFbdErrorSyndromeRegAccess_
.
get(N2_DramFbdErrorSyndromeReg::getVALID
) == 0) {
DramFbdErrorSyndromeRegAccess_
.
set(N2_DramFbdErrorSyndromeReg::setVALID
);
// decode the error type and set the correct bit in the Dram
// Error Syndrome Register
uint64_t errSource
= DramFbdInjectedErrSrcRegAccess_
.
get(N2_DramFbdInjectedErrSrcReg::getERRORSOURCE
);
case N2_DramFbdInjectedErrSrcReg::CRC_ERROR
:
DramFbdErrorSyndromeRegAccess_
.
set(N2_DramFbdErrorSyndromeReg::setSFPE
);
case N2_DramFbdInjectedErrSrcReg::ALERT_FRAME_ERROR
:
DramFbdErrorSyndromeRegAccess_
.
set(N2_DramFbdErrorSyndromeReg::setAA
);
case N2_DramFbdInjectedErrSrcReg::ALERT_ASSERTED
:
DramFbdErrorSyndromeRegAccess_
.
set(N2_DramFbdErrorSyndromeReg::setAFE
);
case N2_DramFbdInjectedErrSrcReg::STATUS_FRAME_PARITY_ERROR
:
DramFbdErrorSyndromeRegAccess_
.
set(N2_DramFbdErrorSyndromeReg::setC
);
fprintf(stderr
,"N2_MemErrDetector::processSOCErrors: bad "
"error source: %d", errSource
);
// Legal FBR errors are correctable
DramErrorStatusMemAccess_
.set(mcuID
, N2_DramErrorStatusMem::setFBR
);
// decide whether to throw the trap.
N2_DramFbdCountReg dramFbdCountReg;
DramFbdCountRegAccess_.access(mcuID, dramFbdCountReg, true);
// If the COUNTONE bit is set, then always try to throw the trap
if (dramFbdCountReg.getCOUNTONE()) {
// Decrement the count in the Dram FBD register, saturating at 0.
// If the register tranisitioned from 1 to 0, try to throw the
uint64_t count = dramFbdCountReg.getCOUNT();
if (count != 0 && --count == 0) {
dramFbdCountReg.setCOUNT(count);
DramFbdCountRegAccess_.access(mcuID, dramFbdCountReg, false);
// If error logging in enabled for this MCU
if (socErrorLogEnableRegAccess_
.get(getFBR
)) {
setL2ErrorStatusReg(paddress
.getBANK(),
N2_L2ErrorStatusReg::setDAC
,
0, paddress
.getNative());
socErrorStatusRegAccess_
.set(N2_SocErrorStatusReg::setV
);
N2_SocErrorReg::SocErrRegBitSetFn setFBR
=
N2_SocErrorReg::setSocErrRegMCUFBR(mcuID
);
socErrorStatusRegAccess_
.set(setFBR
);
// We should try to throw the trap and there is no already
// pending trap, toss it ...
socPendingErrStatusRegAccess_.get(N2_SocPendingErrStatusReg::getV)
socErrorStatusRegAccess_.set(N2_SocErrorStatusReg::setV);
N2_SocErrorStatusReg socErrorStatusReg =
socErrorStatusRegAccess_.set(setFBR);
socPendingErrStatusRegAccess_.
setNative(mcuID, socErrorStatusReg.getNative());
// If FBR errors are fatal, die...
RIESLING_THROW_RUNTIME_ERROR("N2_MemErrDetector::"
"processSocRFbdError: don't "
"support soft reset errors");
// otherwise, direct the HW_CORRECTED_ERROR to the correct strand
uint_t vcID = socErrorSteeringRegAccess_.
get(mcuID, N2_SocErrorSteeringReg::getVCID);
strand.setIntpTaken(vcID, SS_Trap::HW_CORRECTED_ERROR);
N2_CererWithBitMux
cerer(strand
.core
.cerer
);
setDESR(getErrorSteer(paddress
.getBANK()),false, N2_Desr::CE_L2C
,0);
trapToErrorSteer(&strand
,paddress
.getBANK(),
SS_Interrupt::BIT_HW_CORRECTED_ERROR
);
// The access() method for the N2_CheckedL2ESRAccess class verifies
// writes to a bank's L2 ESR, as well as processing simple reads.
// This method offers some assurance that udpates to a bank's L2 ESR
// conform to N2's behavior.
// The L2 ESR records the presence of many different errors, but only
// has one field for error specific information. Also, there is no
// mechanism to count multiple errors.
// N2 deals with these constraints by providing a multiple correctable
// and uncorrectable bit (MEC & MEU). If the L2 ESR has a (un)correctable
// error already set, then the MEC (or MEU) bit is set instead of the
// bit corresponding to the error.
// This method mimics this behavior by checking whether the current
// value of the L2 ESR already contains error state and modifying how
// the register is updated if it does. See N2 1.1 PRM Tables 12-23
// and 12-24. The present implementation doesn't claim to precisely
// match the documented behavior, especially if multiple errors occur
// Because there is only one error address register per bank, N2 only
// allows more severe errors to overwrite this register once it is
// set. This method indicates that the error address register should
// be overridden by returning true.
// Reads are just passed through to the SS_CsrAccess template's
N2_MemErrDetector::N2_CheckedL2ESRAccess::access(uint64_t ndx
,
N2_L2ErrorStatusReg
&csr
,
SS_CsrAccess
<N2_L2ErrorStatusReg
>::access(ndx
, csr
, true);
bool updateErrorAddress
= false;
N2_L2ErrorStatusReg oldCsr
;
SS_CsrAccess
<N2_L2ErrorStatusReg
>::access(ndx
, oldCsr
, true);
// Check the ESR for the presence of multiple
// errors (both correctable and uncorrectable)
if (oldCsr
.isVeryUncorrectable()) {
if (csr
.isVeryUncorrectable()) {
if (csr
.isUncorrectable()) {
if (csr
.isCorrectable()) {
else if (oldCsr
.isUncorrectable()) {
if (csr
.isVeryUncorrectable()) {
updateErrorAddress
= true;
if (csr
.isUncorrectable()) {
if (csr
.isCorrectable()) {
// Else check to see if a correctable error already exists
else if (oldCsr
.isCorrectable()) {
if (csr
.isVeryUncorrectable()) {
updateErrorAddress
= true;
else if (csr
.isUncorrectable()) {
updateErrorAddress
= true;
else if (csr
.isCorrectable()) {
updateErrorAddress
= true;
SS_CsrAccess
<N2_L2ErrorStatusReg
>::access(ndx
, csr
, false);
return updateErrorAddress
;
// setL2ErrorStatusReg() sets an error bit, the core/strand id, and the
// syndrome in the L2$ error status register for the given bank.
// The error bit is selected by passing the corresponding member
// function in bitSetFunction, e.g. N2_L2ErrorStatusReg::setLDAC.
void N2_MemErrDetector::setL2ErrorStatusReg(uint32_t bank
,
ErrorStatusRegBitSetFn bitSetFunction
,
N2_L2ErrorStatusReg L2ErrorStatusReg
;
L2ErrorStatusRegAccess_
.access(bank
, L2ErrorStatusReg
, true);
CALL_MEMBER_FN(L2ErrorStatusReg
, bitSetFunction
)(1);
L2ErrorStatusReg
.setVEC(1);
L2ErrorStatusReg
.setVEU(1);
L2ErrorStatusReg
.setVCID(vcid
);
L2ErrorStatusReg
.setSYND(syndrome
);
if (L2ErrorStatusRegAccess_
.access(bank
, L2ErrorStatusReg
, false)) {
// setL2ErrorAddressReg() sets the address field in the L2$ error
// address register for the given bank.
N2_L2ErrorAddressReg L2ErrorAddressReg
;
L2ErrorAddressRegAccess_
.access(bank
, L2ErrorAddressReg
, true);
L2ErrorAddressReg
.setADDRESS(errorAddress
>>
N2_L2ErrorAddressReg::bitSizeRSVD0
);
L2ErrorAddressRegAccess_
.access(bank
, L2ErrorAddressReg
, false);
L2ErrorStatusRegAccess_
.access(bank
, L2ErrorStatusReg
, true);
// setN2_L2NotdataErrorReg() sets the NDSP bit, the core/strand id, and the
// syndrome in the L2$ NotData error status register for the given bank.
// The MEND is set if either NDSP or NDDM is already set.
N2_MemErrDetector::setL2NotdataErrorReg(uint32_t bank
,
N2_L2NotdataErrorReg L2NotdataErrorReg
;
L2NotdataErrorRegAccess_
.access(bank
, L2NotdataErrorReg
, true);
if (!L2NotdataErrorReg
.getNDSP() && !L2NotdataErrorReg
.getNDDM()) {
L2NotdataErrorReg
.setVCID(vcid
);
L2NotdataErrorReg
.setADDRESS(errorAddress
>>
N2_L2NotdataErrorReg::bitSizeRSVD0
);
L2NotdataErrorRegAccess_
.access(bank
, L2NotdataErrorReg
, false);
L2NotdataErrorRegAccess_
.set(bank
,
N2_L2NotdataErrorReg::setMEND
);