| 1 | // ========== Copyright Header Begin ========================================== |
| 2 | // |
| 3 | // OpenSPARC T2 Processor File: N2_MemErrDetector.cc |
| 4 | // Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. |
| 5 | // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. |
| 6 | // |
| 7 | // The above named program is free software; you can redistribute it and/or |
| 8 | // modify it under the terms of the GNU General Public |
| 9 | // License version 2 as published by the Free Software Foundation. |
| 10 | // |
| 11 | // The above named program is distributed in the hope that it will be |
| 12 | // useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | // General Public License for more details. |
| 15 | // |
| 16 | // You should have received a copy of the GNU General Public |
| 17 | // License along with this work; if not, write to the Free Software |
| 18 | // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. |
| 19 | // |
| 20 | // ========== Copyright Header End ============================================ |
| 21 | /************************************************************************ |
| 22 | ** |
| 23 | ** Copyright (C) 2005, Sun Microsystems, Inc. |
| 24 | ** |
| 25 | ** Sun considers its source code as an unpublished, proprietary |
| 26 | ** trade secret and it is available only under strict license provisions. |
| 27 | ** This copyright notice is placed here only to protect Sun in the event |
| 28 | ** the source is deemed a published work. Disassembly, decompilation, |
| 29 | ** or other means of reducing the object code to human readable form |
| 30 | ** is prohibited by the license agreement under which this code is |
| 31 | ** provided to the user or company in possession of this copy. |
| 32 | ** |
| 33 | *************************************************************************/ |
| 34 | |
| 35 | #include <stdlib.h> |
| 36 | #include <sstream> |
| 37 | #include "BL_Hamming_22_6_Synd.h" |
| 38 | #include "BL_Hamming_64_8_Synd.h" |
| 39 | #include "N2_Model.h" |
| 40 | #include "N2_Core.h" |
| 41 | #include "N2_Strand.h" |
| 42 | #include "N2_State.h" |
| 43 | #include "N2_MemErrDetector.h" |
| 44 | #include "SS_CKMemory.h" |
| 45 | #include "BL_Utils.h" |
| 46 | |
| 47 | |
| 48 | // Used to call a pointer to member function |
| 49 | // Localizes the nasty syntax for this language feature |
| 50 | #define CALL_MEMBER_FN(object,ptrToMember) ((object).*(ptrToMember)) |
| 51 | |
| 52 | /** |
| 53 | * The N2_MemErrDetector class is used to detect injected RAS errors |
| 54 | * associated with the memory hierarchy. In particular, it models and |
| 55 | * detects errors in the primary and secondary caches and DRAM. |
| 56 | */ |
| 57 | |
| 58 | using namespace std; |
| 59 | |
| 60 | |
| 61 | SS_Trap::Type N2_MemErrDetector::detect_fetch_err( MemoryLevel level, SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Paddr pa) { |
| 62 | |
| 63 | |
| 64 | MemoryTransaction mem_xact; |
| 65 | |
| 66 | mem_xact.setStrand(s->strand_id()); |
| 67 | mem_xact.paddr(pa); |
| 68 | mem_xact.size(64); |
| 69 | mem_xact.access(MemoryTransaction::READ); |
| 70 | mem_xact.referenceType(MemoryTransaction::INSTR); |
| 71 | |
| 72 | return detectErr(mem_xact); |
| 73 | } |
| 74 | |
| 75 | |
| 76 | SS_Trap::Type N2_MemErrDetector::detect_load_err( MemoryLevel level, SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Instr* line, SS_Paddr pa) { |
| 77 | |
| 78 | |
| 79 | MemoryTransaction mem_xact; |
| 80 | |
| 81 | mem_xact.setStrand(s->strand_id()); |
| 82 | mem_xact.paddr(pa); |
| 83 | mem_xact.size(line->len); |
| 84 | mem_xact.access(MemoryTransaction::READ); |
| 85 | mem_xact.referenceType(MemoryTransaction::DATA); |
| 86 | |
| 87 | return detectErr(mem_xact); |
| 88 | } |
| 89 | |
| 90 | SS_Trap::Type N2_MemErrDetector::inject_store_err( MemoryLevel level, SS_Vaddr pc, SS_Vaddr npc, SS_Strand* s, SS_Instr* line, SS_Paddr pa, uint64_t data) { |
| 91 | |
| 92 | |
| 93 | // Stores coherently update all the primary caches so we call |
| 94 | // SS_Model::ras_flush() to approximate this behavior. |
| 95 | s->model->ras_flush(s, pa, line->len, SS_MemErrDetector::INSTR_CACHE); |
| 96 | s->model->ras_flush(s, pa, line->len, SS_MemErrDetector::DATA_CACHE); |
| 97 | |
| 98 | MemoryTransaction mem_xact; |
| 99 | |
| 100 | mem_xact.setStrand(s->strand_id()); |
| 101 | mem_xact.paddr(pa); |
| 102 | mem_xact.size(line->len); |
| 103 | mem_xact.access(MemoryTransaction::WRITE); |
| 104 | mem_xact.referenceType(MemoryTransaction::DATA); |
| 105 | mem_xact.setData(data); |
| 106 | |
| 107 | |
| 108 | return detectErr(mem_xact); |
| 109 | } |
| 110 | |
| 111 | //Injecting errors into registers in the Tick Compare array |
| 112 | BL_EccBits N2_MemErrDetector::n2_tick_cmpr_err_injector(SS_Strand* s, uint64_t data) |
| 113 | { |
| 114 | N2_Strand* n2 = (N2_Strand*)s; |
| 115 | N2_Core& n2_core = n2->core; |
| 116 | // The INTDIS bit (bit 63) has to be flipped before sending the value |
| 117 | // for ecc calculation - 28.11 - N2 PRM rev 1.1 |
| 118 | BL_EccBits ecc_obj = BL_Hamming_64_8_Synd::calc_check_bits((1ULL<<63)^data); |
| 119 | unsigned ecc = 0; |
| 120 | if(ecc_obj.valid()) |
| 121 | { |
| 122 | ecc = ecc_obj.get(); |
| 123 | } |
| 124 | // Check if ENB and TCCU bits are set in N2 Error Injection Register |
| 125 | if ((n2_core.error_inject.ene() == 1) && (n2_core.error_inject.tccu() == 1)) |
| 126 | { |
| 127 | ecc ^= n2_core.error_inject.eccmask(); |
| 128 | // Set back the corrputed ecc |
| 129 | ecc_obj.set(ecc); |
| 130 | } |
| 131 | return ecc_obj; |
| 132 | } |
| 133 | |
| 134 | // (ASR) Reads to any reg in the Tick Compare Array (TCA) triggers this routine. |
| 135 | // This routine scans for the presence of precise single bit or multi bit errors |
| 136 | // and records the error information in DSFAR and throws an precise |
| 137 | // internal_processor_error trap |
| 138 | // Correctable errors are detected only if CERER.TCCP bit is set |
| 139 | // Uncorrectable errors are detected only if CERER.TCUP bit is set |
| 140 | // Errors are recorded only if the PSCCE bit is set in the SETER |
| 141 | // The syndrome is stored in bits 2 thru 9 of DSFAR |
| 142 | // The tick compare array index is stored in bits 0 and 1 of DSFAR |
| 143 | // 00 - Tick Cmpr 01 - Stick Cmpr 10 - Hstick Cmpr |
| 144 | SS_Trap::Type N2_MemErrDetector::n2_tick_cmpr_precise_err_detector(SS_Strand* s, N2_TickAccess::TickAccessIndex array_index) |
| 145 | { |
| 146 | #if 0 |
| 147 | N2_Strand* n2 = (N2_Strand*)s; |
| 148 | N2_Core& n2_core = n2->core; |
| 149 | bool update_dsfar = false; |
| 150 | |
| 151 | uint64_t val = 0; |
| 152 | |
| 153 | if (array_index == N2_TickAccess::TICK_CMPR_INDX) |
| 154 | val = n2->tick_cmpr(); |
| 155 | else if (array_index == N2_TickAccess::STICK_CMPR_INDX) |
| 156 | val = n2->stick_cmpr(); |
| 157 | else if (array_index == N2_TickAccess::HSTICK_CMPR_INDX) |
| 158 | val = n2->hstick_cmpr(); |
| 159 | |
| 160 | BL_EccBits ecc_obj = n2->tick_cmpr_array_ecc[array_index]; |
| 161 | if(!ecc_obj.valid()) |
| 162 | { |
| 163 | return SS_Trap::NO_TRAP; |
| 164 | } |
| 165 | BL_Hamming_64_8_Synd syndrome = BL_Hamming_64_8_Synd(val,ecc_obj); |
| 166 | |
| 167 | if (n2_core.cerer.tccp()) |
| 168 | { |
| 169 | if (syndrome.isSingleBitError()) |
| 170 | { |
| 171 | if (n2->seter.pscce()) |
| 172 | { |
| 173 | n2->data_sfsr.error_type(N2_DataSfsr::TCCP); |
| 174 | update_dsfar = true; |
| 175 | } |
| 176 | } |
| 177 | } |
| 178 | else if (n2_core.cerer.tcup()) |
| 179 | { |
| 180 | if (syndrome.isDoubleBitError() || syndrome.isMultipleBitError()) |
| 181 | { |
| 182 | if (n2->seter.pscce()) |
| 183 | { |
| 184 | n2->data_sfsr.error_type(N2_DataSfsr::TCUP); |
| 185 | update_dsfar = true; |
| 186 | } |
| 187 | } |
| 188 | } |
| 189 | if (update_dsfar) |
| 190 | { |
| 191 | uint64_t error_add = 0; |
| 192 | error_add = BL_BitUtility::set_subfield(error_add,syndrome.getSyndrome(),2,9); |
| 193 | error_add = BL_BitUtility::set_subfield(error_add,array_index,0,1); |
| 194 | n2->data_sfar.error_addr(error_add); |
| 195 | return SS_Trap::INTERNAL_PROCESSOR_ERROR; |
| 196 | } |
| 197 | #endif |
| 198 | return SS_Trap::NO_TRAP; |
| 199 | } |
| 200 | |
| 201 | // This routine checks for the presence of disrupting errors (for all the regs |
| 202 | // in the Tick Compare Array). If there is an error,the information is recorded |
| 203 | // in the DESR and a 'sw_recoverable_error' is thrown. Correctable errors are |
| 204 | // detected only if CERER.TCCD bit is set.Uncorrectable errors are detected |
| 205 | // only if CERER.TCUD bit is set.Errors are recorded only if the DE bit is set |
| 206 | // in the SETER.The syndrome is stored in bits 2 thru 9 of DESR.The tick compare |
| 207 | // array index is stored in bits 0 and 1 of DESR. |
| 208 | // 00 - Tick Cmpr 01 - Stick Cmpr 10 - Hstick Cmpr |
| 209 | bool N2_MemErrDetector::n2_tick_cmpr_disrupting_err_detector(SS_Strand* s) |
| 210 | { |
| 211 | bool err_found = false; |
| 212 | bool update_desr = false; |
| 213 | int error_type = 0; |
| 214 | |
| 215 | N2_Strand* n2 = (N2_Strand*)s; |
| 216 | N2_Core& n2_core = n2->core; |
| 217 | |
| 218 | // Check for errors in all the three tick_cmpr registers |
| 219 | for (uint64_t array_index = 0; array_index < N2_TickAccess::TICK_ACCESS_MAX; array_index++) |
| 220 | { |
| 221 | uint64_t val = 0; |
| 222 | |
| 223 | if (array_index == N2_TickAccess::TICK_CMPR_INDX) |
| 224 | val = n2->tick_cmpr(); |
| 225 | else if (array_index == N2_TickAccess::STICK_CMPR_INDX) |
| 226 | val = n2->stick_cmpr(); |
| 227 | else if (array_index == N2_TickAccess::HSTICK_CMPR_INDX) |
| 228 | val = n2->hstick_cmpr(); |
| 229 | |
| 230 | BL_EccBits ecc_obj = n2->tick_cmpr_array_ecc[array_index]; |
| 231 | if(!ecc_obj.valid()) |
| 232 | { |
| 233 | continue; |
| 234 | } |
| 235 | BL_Hamming_64_8_Synd syndrome = BL_Hamming_64_8_Synd(val,ecc_obj); |
| 236 | |
| 237 | if (n2_core.cerer.tccd()) |
| 238 | { |
| 239 | if (syndrome.isSingleBitError()) |
| 240 | { |
| 241 | error_type = N2_Desr::RE_TCCD; |
| 242 | err_found = true; |
| 243 | } |
| 244 | } |
| 245 | else if (n2_core.cerer.tcud()) |
| 246 | { |
| 247 | if (syndrome.isDoubleBitError() || syndrome.isMultipleBitError()) |
| 248 | { |
| 249 | error_type = N2_Desr::RE_TCUD; |
| 250 | err_found = true; |
| 251 | } |
| 252 | } |
| 253 | if (err_found) |
| 254 | { |
| 255 | if (n2->desr.f()) |
| 256 | { |
| 257 | if (n2->desr.s()) |
| 258 | { |
| 259 | // If the DESR already has a pending sw_recoverable_error, the details |
| 260 | // about the current error is not recorded. The presence of muliple |
| 261 | // errors is denoted by setting the 'me' bit in the DESR |
| 262 | n2->desr.me(1); |
| 263 | update_desr = false; |
| 264 | } |
| 265 | else |
| 266 | { |
| 267 | // If the DESR already has a pending hw_corrected_error, the details |
| 268 | // about the previous error is flushed out and the details about the |
| 269 | // current sw_recoverable_error is recorded. 'sw_recoverable' errors |
| 270 | // have higher precedence than hw_corrected errors. The presence of |
| 271 | // muliple errors is denoted by setting the 'me' bit in the DESR |
| 272 | n2->desr.s(1); |
| 273 | n2->desr.me(1); |
| 274 | update_desr = true; |
| 275 | } |
| 276 | } |
| 277 | else |
| 278 | { |
| 279 | // No prior error. |
| 280 | n2->desr.f(1); |
| 281 | n2->desr.s(1); |
| 282 | update_desr = true; |
| 283 | } |
| 284 | |
| 285 | if (update_desr) |
| 286 | { |
| 287 | if (n2->seter.de()) |
| 288 | { |
| 289 | n2->desr.errtype(error_type); |
| 290 | uint64_t error_add = 0; |
| 291 | error_add = BL_BitUtility::set_subfield(error_add,syndrome.getSyndrome(),2,9); |
| 292 | error_add = BL_BitUtility::set_subfield(error_add,array_index,0,1); |
| 293 | n2->desr.erraddr(error_add); |
| 294 | n2->irq.raise(n2,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR); |
| 295 | break; |
| 296 | } |
| 297 | } |
| 298 | } |
| 299 | } |
| 300 | return err_found; |
| 301 | } |
| 302 | |
| 303 | // Routine to flush L2 cache. |
| 304 | // Checks that the "key" in the pa is correct and then calls |
| 305 | // L2CacheFlush() to flush the correct lines. |
| 306 | void N2_MemErrDetector::prefetchICE(SS_Paddr pa) |
| 307 | { |
| 308 | N2_L2CacheFlushAddrFields l2CacheFlushAddr; |
| 309 | l2CacheFlushAddr.set_data(pa); |
| 310 | |
| 311 | if (l2CacheFlushAddr.checkKey()) |
| 312 | L2CacheFlush(l2CacheFlushAddr); |
| 313 | else |
| 314 | fprintf(stderr, "prefetchICE: bad key: %x\n", l2CacheFlushAddr.getKEY()); |
| 315 | } |
| 316 | |
| 317 | SS_Trap::Type N2_MemErrDetector::n2_step_hook(SS_Strand* s) |
| 318 | { |
| 319 | N2_Strand *strand = (N2_Strand*)s; |
| 320 | return strand->flush_store_buffer(); |
| 321 | } |
| 322 | |
| 323 | // ras_flush() flushes part of a strand's I$ or D$. Which cache is |
| 324 | // selected by the "type" argument and the range of the cache to |
| 325 | // invalidate is selected by "pa" and "size". |
| 326 | void N2_MemErrDetector::ras_flush( SS_Strand*_s, SS_Strand* requesting_strand, |
| 327 | SS_Paddr pa, uint64_t size, |
| 328 | CacheType type) |
| 329 | { |
| 330 | if ((_s->strand_id() % N2_Model::NO_STRANDS_PER_CORE) != 0) |
| 331 | return; |
| 332 | |
| 333 | if (type == SS_MemErrDetector::DATA_CACHE && |
| 334 | requesting_strand != NULL && |
| 335 | (_s->strand_id() / N2_Model::NO_STRANDS_PER_CORE) == |
| 336 | (requesting_strand->strand_id() / N2_Model::NO_STRANDS_PER_CORE)) |
| 337 | return; |
| 338 | |
| 339 | N2_Strand* s = (N2_Strand*)_s; |
| 340 | |
| 341 | const uint_t line_size = 1 << (N2_IcacheAddressingFields::WIDTH_RSVD0 + |
| 342 | N2_IcacheAddressingFields::WIDTH_INSTR); |
| 343 | SS_Paddr start_pa = round_down_to_power_of_two(pa, line_size); |
| 344 | SS_Paddr end_pa = round_down_to_power_of_two(pa + size, line_size); |
| 345 | |
| 346 | // Clear all I$ lines matching the (pa, pa_size) address range. |
| 347 | while (start_pa <= end_pa) |
| 348 | { |
| 349 | if (type == SS_MemErrDetector::DATA_CACHE) |
| 350 | s->core.flush_dcache(start_pa); |
| 351 | else |
| 352 | s->core.flush_icache(start_pa); |
| 353 | start_pa += line_size; |
| 354 | } |
| 355 | } |
| 356 | |
| 357 | // The central memory hierarchy error detector. |
| 358 | // |
| 359 | // Detects I- and D-cache, L2 cache, Dram, and SOC FBDIMM RAS errors |
| 360 | // produced by a MemoryTransaction. Models the I- and D-cache, the |
| 361 | // L2$ and their associated error detection mechanisms. Also, detects |
| 362 | // RAS errors produced by Dram and the FBDIMM channels. |
| 363 | // |
| 364 | // If a trap is detected, detectErr() either throws a BasicTrap with |
| 365 | // the correct trap number for precise traps or directs the trap to |
| 366 | // the correct strand with setIntpTrap(). detectErr() checks various |
| 367 | // control registers before throwing traps or changing state. If a |
| 368 | // trap is thrown detectErr() sets error information the correct |
| 369 | // status registers. |
| 370 | // |
| 371 | // Conventions: if multiple traps are generated by the same |
| 372 | // instruction, no more than one is guaranteed to be thrown. Also, |
| 373 | // error state may not be updated correctly for multiple errors |
| 374 | // produced on different cycles. The multiple error bits will be set |
| 375 | // correctly, but the detailed error information will not necessary |
| 376 | // match the hardware's prioritization of information capture. The |
| 377 | // error information will be consistent for one of the errors, but |
| 378 | // detectErr() pick the wrong error's information to save. |
| 379 | // |
| 380 | // detectErr() ignores i/o space accesses. For memory accesses, |
| 381 | // detectErr() dispatches the memory request to either the I-cache or |
| 382 | // the D-cache RAS routines in N2_Core. These routines check for |
| 383 | // primary cache errors and cache misses. If a there is a miss in a |
| 384 | // primary cache, it calls the N2_MemErrDetector::L2CacheFill() method |
| 385 | // to model loading the L2$ and detecting any errors at that level. |
| 386 | // If the L2$ doesn't contain the line, it invokes ??? to model loading the |
| 387 | // line from Dram and detecting any Chip-Kill or FBDIMM errors. |
| 388 | |
| 389 | SS_Trap::Type N2_MemErrDetector::detectErr(const MemoryTransaction &memXact) |
| 390 | { |
| 391 | // skip accesses in i/o space or uncorrected data access for Chip-Kill |
| 392 | //if (memXact.paddr() >= 0x8000000000 || memXact.noDramErrorCorrect()) |
| 393 | // return; |
| 394 | |
| 395 | /* DOWNCAST */ |
| 396 | assert(n2_model); |
| 397 | N2_Strand *strand = (N2_Strand*)n2_model->cpu[0]->strand[memXact.getStrand()]; |
| 398 | MemoryTransaction::RefT refT = memXact.getReferenceType(); |
| 399 | |
| 400 | /* DOWNCAST */ |
| 401 | if (strand != NULL) |
| 402 | { |
| 403 | N2_Core *core = &strand->core; |
| 404 | |
| 405 | // I-cache fetch? |
| 406 | if (refT == MemoryTransaction::INSTR) |
| 407 | { |
| 408 | |
| 409 | // check icache RAS errors |
| 410 | return core->icache_ifetch(memXact, |
| 411 | (!strand->hpstate.hpriv() || |
| 412 | strand->pstate.ie()) && |
| 413 | strand->seter.dhcce(), |
| 414 | memXact.getStrand(), |
| 415 | this); |
| 416 | } |
| 417 | // D-cache fetch? |
| 418 | else if (refT == MemoryTransaction::DATA) |
| 419 | { |
| 420 | // If the memory transaction is a read, check the store buffer for |
| 421 | // pending stores that alias the read's address. |
| 422 | if (memXact.readXact()) |
| 423 | { |
| 424 | SS_Trap::Type tt = strand->check_store_buffer_RAWtrap(memXact); |
| 425 | if(tt != SS_Trap::NO_TRAP) |
| 426 | return tt; |
| 427 | } |
| 428 | |
| 429 | return core->dcache_trans(memXact, |
| 430 | (!strand->hpstate.hpriv() || |
| 431 | strand->pstate.ie()) && |
| 432 | strand->seter.dhcce(), |
| 433 | memXact.getStrand(), |
| 434 | false, |
| 435 | this); |
| 436 | } |
| 437 | } |
| 438 | } |
| 439 | |
| 440 | |
| 441 | // L2 Cache Line Fill routine |
| 442 | // |
| 443 | // Given a MemoryTransaction, L2CacheFill() loads the corresponding |
| 444 | // L2$ line. First, it checks all the tags in the line's way set, |
| 445 | // then the line's VuaD entry for ECC errors. Then, it looks for a |
| 446 | // tag match with the valid bit set. If the appropriate cache line is |
| 447 | // present in the cache, it checks its ECC and, if the memory |
| 448 | // transaction is a store, marks the line dirty. Otherwise, |
| 449 | // L2CacheFill() picks a line to cast out of the cache, checks this |
| 450 | // line's ECC, and then loads the new line into the cache, calculating |
| 451 | // its ECC. |
| 452 | // |
| 453 | // Note that any ECC error will throw the appropriate trap. |
| 454 | // |
| 455 | // Returns trap number to throw if NotData is present in the cache. |
| 456 | |
| 457 | SS_Trap::Type N2_MemErrDetector::L2CacheFill(const MemoryTransaction &memXact) |
| 458 | { |
| 459 | N2_Strand *strand = (N2_Strand*)n2_model->cpu[0]->strand[memXact.getStrand()]; |
| 460 | N2_L2AddressingFields paddr; |
| 461 | paddr.set(memXact.getPaddr()); |
| 462 | |
| 463 | int way; |
| 464 | |
| 465 | L2FixTagsAndTrap(strand, paddr, memXact); |
| 466 | |
| 467 | N2_L2DiagVdMemWithECC diagVD = L2FixVUADAndTrap(strand, paddr, memXact); |
| 468 | |
| 469 | int hit_way = L2FindWay(paddr, diagVD); |
| 470 | |
| 471 | // update L2 cache tag, VauD, and data |
| 472 | |
| 473 | // Was this a miss? |
| 474 | if (hit_way == NO_WAY) |
| 475 | { |
| 476 | return L2CacheMiss(strand, memXact, diagVD); |
| 477 | } |
| 478 | else |
| 479 | { |
| 480 | // hit. hit_way contains matching way |
| 481 | return L2CacheHit(strand, memXact, diagVD, hit_way); |
| 482 | } |
| 483 | return SS_Trap::NO_TRAP; |
| 484 | } |
| 485 | |
| 486 | // L2 Cache Line Flush routine |
| 487 | // |
| 488 | // Given a way in the L$2 selected by diagAddr, L2CacheFlush() flushes |
| 489 | // the corresponding L2$ line, writing it back to memory if it's |
| 490 | // dirty. First, it checks all the tags in the line's way set, then |
| 491 | // the line's VuaD entry for ECC errors. It corrects these errors |
| 492 | // without trapping. Then it clears the valid and dirty VuaD bits for |
| 493 | // the cache associated with the address. |
| 494 | // |
| 495 | // Note that the "real" L2$ must write the line back to memory; |
| 496 | // however in this L2$ implementation, the correct data has already |
| 497 | // been written to memory, so invalidation is all that's needed. |
| 498 | |
| 499 | void N2_MemErrDetector::L2CacheFlush(N2_L2CacheFlushAddrFields diagAddr) |
| 500 | { |
| 501 | // create a physical address that matches the way and bank of diagAddr |
| 502 | N2_L2AddressingFields paddr; |
| 503 | paddr.setSET(diagAddr.getSET()); |
| 504 | paddr.setBANK(diagAddr.getBANK()); |
| 505 | L2FixTags(paddr); |
| 506 | |
| 507 | uint_t way = diagAddr.getWAY(); |
| 508 | |
| 509 | uint32_t diagNdx = paddrToWaySetBankNdx(paddr, way); |
| 510 | |
| 511 | N2_L2DiagTagMem diagTag; |
| 512 | L2DiagTagMemAccess_.access(diagNdx, diagTag, true); |
| 513 | paddr.setTAG(diagTag.getTAG()); |
| 514 | |
| 515 | N2_L2DiagVdMemWithECC diagVD; |
| 516 | L2FixVUAD(paddr, diagVD); |
| 517 | |
| 518 | // if the way is valid, invalidate it by clearing the way's dirty |
| 519 | // and valid bits in VuaD |
| 520 | if (diagVD.getVALID() & (1<<way)) { |
| 521 | // do we need to flush the cache line? |
| 522 | if (diagVD.getDIRTY() & (1<<way)) { |
| 523 | int i; |
| 524 | N2_L2AddressingFields ckPaddr; |
| 525 | ckPaddr.setNative(paddr.getNative() & ~(SS_CKMemory::DRAM_LINE_LENGTH - 1)); |
| 526 | N2_L2CacheLineError lineError(ckPaddr.getNative()); |
| 527 | |
| 528 | for (i = 0; i < SS_CKMemory::DRAM_LINE_LENGTH/N2_L2_CACHE_LINE_SIZE; ++i) { |
| 529 | lineError = L2ProcessCacheLine(ckPaddr, way, false); |
| 530 | if (lineError.isUncorrectable()) { |
| 531 | break; |
| 532 | } |
| 533 | ckPaddr.setNative(ckPaddr.getNative() + N2_L2_CACHE_LINE_SIZE); |
| 534 | } |
| 535 | ckPaddr.setNative(ckPaddr.getNative() & ~(SS_CKMemory::DRAM_LINE_LENGTH - 1)); |
| 536 | dramUpdateECC(ckPaddr, lineError.isUncorrectable()); |
| 537 | } |
| 538 | |
| 539 | diagVD.setVALID(diagVD.getVALID() & ~(1<<way)); |
| 540 | diagVD.setDIRTY(diagVD.getDIRTY() & ~(1<<way)); |
| 541 | diagVD.setVDECC(diagVD.calcECC()); |
| 542 | |
| 543 | L2DiagVdMemAccess_.access(paddrToSetBankNdx(paddr), diagVD, false); |
| 544 | } |
| 545 | |
| 546 | // Now flush the primary caches for all the Cores |
| 547 | |
| 548 | paddr.setWORD(0); // align L2$ address to beginning of L2$ line |
| 549 | |
| 550 | N2_IcacheAddressingFields icacheAddr; |
| 551 | icacheAddr.set(paddr.getNative()); |
| 552 | |
| 553 | // Clear all decode cache lines matching the (pa, pa_size) address range. |
| 554 | assert(N2_L2_CACHE_LINE_SIZE == SS_InstrCache::LINE_SIZE *4); |
| 555 | for (uint_t cpu_ndx = 0; cpu_ndx < N2_Model::NO_CPUS;++cpu_ndx) |
| 556 | for (uint_t strand_ndx = 0; strand_ndx < N2_Model::NO_STRANDS_PER_CPU; |
| 557 | ++strand_ndx) |
| 558 | n2_model->cpu[cpu_ndx]->strand[strand_ndx]->flush(paddr.getNative(),true); |
| 559 | |
| 560 | for(int i=0;i < N2_L2_CACHE_LINE_SIZE/N2_IcacheAddressingFields::N2_ICACHE_LINE_SIZE;i++){ |
| 561 | //TODO verify size |
| 562 | n2_model->ras_flush(NULL, icacheAddr(), 8, |
| 563 | SS_MemErrDetector::INSTR_CACHE); |
| 564 | icacheAddr.sets(icacheAddr.sets() + 1); |
| 565 | } |
| 566 | |
| 567 | N2_DcacheAddressingFields dcacheAddr; |
| 568 | dcacheAddr.set(paddr.getNative()); |
| 569 | for(int i=0;i < N2_L2_CACHE_LINE_SIZE/N2_DcacheAddressingFields::N2_DCACHE_LINE_SIZE;i++){ |
| 570 | //TODO verify size |
| 571 | n2_model->ras_flush(NULL, dcacheAddr(), 8, |
| 572 | SS_MemErrDetector::DATA_CACHE); |
| 573 | dcacheAddr.sets(dcacheAddr.sets() + 1); |
| 574 | } |
| 575 | } |
| 576 | |
| 577 | // L2FixTags() checks for RAS errors in all the L2$ tags that match |
| 578 | // the address in paddr. If there are any single-bit errors, it |
| 579 | // corrects them without throwing a trap. |
| 580 | |
| 581 | void N2_MemErrDetector::L2FixTags(N2_L2AddressingFields paddr) |
| 582 | { |
| 583 | // Check all ways for tag ECC error |
| 584 | for (int way = 0; way < (1<<N2_L2DiagDataAddressingFields::bitSizeWAY); |
| 585 | way++) { |
| 586 | uint32_t diagNdx = paddrToWaySetBankNdx(paddr, way); |
| 587 | |
| 588 | L2FixTag(diagNdx); |
| 589 | } |
| 590 | } |
| 591 | |
| 592 | // L2 Cache Line Check Tags routine |
| 593 | // |
| 594 | // Given a MemoryTransaction, L2FixTagsAndTrap() checks all the tags |
| 595 | // that match the address in paddr. If there are any single-bit |
| 596 | // errors, it corrects and throws the appropriate trap. |
| 597 | |
| 598 | void N2_MemErrDetector::L2FixTagsAndTrap(N2_Strand *strand, |
| 599 | N2_L2AddressingFields paddr, |
| 600 | const MemoryTransaction &memXact) |
| 601 | { |
| 602 | // Check all ways for tag ECC error |
| 603 | for (int way = 0;way < (1<<N2_L2DiagDataAddressingFields::bitSizeWAY);way++) |
| 604 | { |
| 605 | uint32_t diagNdx = paddrToWaySetBankNdx(paddr, way); |
| 606 | |
| 607 | if (L2FixTag(diagNdx)) |
| 608 | { |
| 609 | // Set LTC in L2_ERROR_STATUS register and |
| 610 | // paddr[39:6] in L2_ERROR_ADDRESS register |
| 611 | setL2ErrorStatusReg(paddr.getBANK(), N2_L2ErrorStatusReg::setLTC, |
| 612 | true, 0, 0, paddr.getNative()); |
| 613 | |
| 614 | // Set error information in DESR |
| 615 | N2_CererWithBitMux cerer(strand->core.cerer); |
| 616 | // The N2 PRM Rev 1.1 is vague. Sect 12.9.5 refers to the |
| 617 | // L2C bit in the CERER, which doesn't exist. We |
| 618 | // interpret this to mean the family of L2C bits in Table |
| 619 | // 12-4 and select any of them. |
| 620 | if (getCEEN(paddr.getBANK()) && cerer.checkOneL2Cbit(memXact)) |
| 621 | { |
| 622 | uint32_t strandId = getErrorSteer(paddr.getBANK()); |
| 623 | setDESR(strandId,false, N2_Desr::CE_L2C,0); |
| 624 | |
| 625 | // Throw trap to ERRORSTEER |
| 626 | trapToErrorSteer(strand, paddr.getBANK(), |
| 627 | SS_Interrupt::BIT_HW_CORRECTED_ERROR); |
| 628 | |
| 629 | } |
| 630 | } |
| 631 | } |
| 632 | } |
| 633 | |
| 634 | |
| 635 | // L2FixTag() checks and fixes any L2$ tag RAS error at diagnostic |
| 636 | // access index "diagNdx". It returns true if there is an tag error. |
| 637 | |
| 638 | bool N2_MemErrDetector::L2FixTag(uint32_t diagNdx) |
| 639 | { |
| 640 | N2_L2DiagTagMem diagTag; |
| 641 | |
| 642 | L2DiagTagMemAccess_.access(diagNdx, diagTag, true); |
| 643 | |
| 644 | BL_Hamming_22_6_Synd tagSyndrome(diagTag.getTAG(), |
| 645 | diagTag.getECC()); |
| 646 | |
| 647 | if (!tagSyndrome.noError()) |
| 648 | { |
| 649 | if (tagSyndrome.isDataBitError()) { |
| 650 | uint32_t dataBit = tagSyndrome.getDataBit(); |
| 651 | diagTag.setTAG(diagTag.getTAG() ^ (1<<dataBit)); |
| 652 | RAS_OSTR << "L2CacheFill: correcting data bit " << |
| 653 | dataBit << endl; |
| 654 | } else if (tagSyndrome.isCheckBitError()) |
| 655 | { |
| 656 | uint32_t checkBit = tagSyndrome.getCheckBit(); |
| 657 | diagTag.setECC(diagTag.getECC() ^ (1<<checkBit)); |
| 658 | RAS_OSTR << "L2CacheFill: correcting check bit " << |
| 659 | checkBit << endl; |
| 660 | } else |
| 661 | { |
| 662 | fprintf(stderr,"L2CheckTags: double bit tag error"); |
| 663 | exit(-1); |
| 664 | } |
| 665 | L2DiagTagMemAccess_.access(diagNdx, diagTag, false); |
| 666 | |
| 667 | return true; |
| 668 | } |
| 669 | return false; |
| 670 | } |
| 671 | |
| 672 | // L2FixVUAD() checks and fixes a RAS error for the VuaD bits associated |
| 673 | // with the physical address, "paddr". It returns the value of VUAD |
| 674 | // diagnostic register (with ECC). |
| 675 | N2_MemErrDetector::N2_L2VaudSyndrome |
| 676 | N2_MemErrDetector::L2FixVUAD(N2_L2AddressingFields paddr, |
| 677 | N2_L2DiagVdMemWithECC &diagVD) |
| 678 | { |
| 679 | |
| 680 | uint32_t setBankNdx = paddrToSetBankNdx(paddr); |
| 681 | // Get UA bits for this set |
| 682 | N2_L2DiagUaMemWithECC diagUA; |
| 683 | L2DiagUaMemAccess_.access(setBankNdx, diagUA, true); |
| 684 | |
| 685 | // Get VD bits for this set |
| 686 | L2DiagVdMemAccess_.access(setBankNdx, diagVD, true); |
| 687 | |
| 688 | // Check VD Ecc |
| 689 | BL_Hamming_32_7_Synd vdSyndrome = diagVD.getSyndrome(); |
| 690 | BL_Hamming_32_7_Synd uaSyndrome = diagUA.getSyndrome(); |
| 691 | N2_L2VaudSyndrome vuadSyndrome(vdSyndrome.getSyndrome(),uaSyndrome.getSyndrome()) ; |
| 692 | |
| 693 | if (!vdSyndrome.noError()) { |
| 694 | RAS_OSTR << "L2FixVUAD: bad VD ECC expected 0x" << |
| 695 | hex << diagVD.getVDECC() << |
| 696 | " got 0x" << hex << vdSyndrome.getSyndrome() << endl; |
| 697 | |
| 698 | if (vdSyndrome.isDataBitError()) { |
| 699 | uint32_t dataBit = vdSyndrome.getDataBit(); |
| 700 | diagVD.setVD(diagVD.getVD() ^ (1<<dataBit)); |
| 701 | } else if (vdSyndrome.isCheckBitError()) { |
| 702 | uint32_t checkBit = vdSyndrome.getCheckBit(); |
| 703 | diagVD.setVDECC(diagVD.getVDECC() ^ (1<<checkBit)); |
| 704 | } else { |
| 705 | fprintf(stderr,"L2FixVUAD: double bit " |
| 706 | "VuaD error"); |
| 707 | exit(-1); |
| 708 | } |
| 709 | L2DiagVdMemAccess_.access(setBankNdx, diagVD, false); |
| 710 | } |
| 711 | |
| 712 | if (!uaSyndrome.noError()) { |
| 713 | RAS_OSTR << "L2FixVUAD: bad UA ECC expected 0x" << |
| 714 | hex << diagUA.getUAECC() << |
| 715 | " got 0x" << hex << uaSyndrome.getSyndrome() << endl; |
| 716 | |
| 717 | if (uaSyndrome.isDataBitError()) { |
| 718 | uint32_t dataBit = uaSyndrome.getDataBit(); |
| 719 | diagUA.setUA(diagUA.getUA() ^ (1<<dataBit)); |
| 720 | } else if (uaSyndrome.isCheckBitError()) { |
| 721 | uint32_t checkBit = uaSyndrome.getCheckBit(); |
| 722 | diagUA.setUAECC(diagUA.getUAECC() ^ (1<<checkBit)); |
| 723 | } else { |
| 724 | fprintf(stderr,"L2FixVUAD: double bit " |
| 725 | "VuaD error\n"); |
| 726 | exit(-1); |
| 727 | } |
| 728 | L2DiagUaMemAccess_.access(setBankNdx, diagUA, false); |
| 729 | } |
| 730 | |
| 731 | return vuadSyndrome; |
| 732 | } |
| 733 | |
| 734 | |
| 735 | // L2FixVUADAndTrap() corrects an error in the VuaD bits for a physical |
| 736 | // address and throws any appropriate trap. |
| 737 | |
| 738 | N2_MemErrDetector::N2_L2DiagVdMemWithECC |
| 739 | N2_MemErrDetector::L2FixVUADAndTrap(N2_Strand *strand, |
| 740 | N2_L2AddressingFields paddr, |
| 741 | const MemoryTransaction &memXact) |
| 742 | { |
| 743 | N2_L2DiagVdMemWithECC diagVD; |
| 744 | N2_L2VaudSyndrome vuad_syndrome = L2FixVUAD(paddr, diagVD); |
| 745 | uint16_t vuadSyndrome = ((vuad_syndrome.vdSyndrome_.getSyndrome() << 0x7) | vuad_syndrome.uaSyndrome_.getSyndrome()); |
| 746 | |
| 747 | if (vuadSyndrome) { |
| 748 | // Set LVC in L2_ERROR_STATUS register |
| 749 | // See N2 PRM Rev 1.1 Tbl 12-22 |
| 750 | // Set paddr[39:6] in L2_ERROR_ADDRESS register |
| 751 | setL2ErrorStatusReg(paddr.getBANK(), |
| 752 | N2_L2ErrorStatusReg::setLVC, |
| 753 | true, |
| 754 | getErrorSteer(paddr.getBANK()), |
| 755 | vuadSyndrome, |
| 756 | paddr.getNative()); |
| 757 | |
| 758 | N2_CererWithBitMux cerer(strand->core.cerer); |
| 759 | if (getCEEN(paddr.getBANK()) && cerer.checkOneL2Cbit(memXact)) { |
| 760 | // Set error information in DESR |
| 761 | uint32_t strandId = getErrorSteer(paddr.getBANK()); |
| 762 | setDESR(strandId,false, N2_Desr::CE_L2C,0); |
| 763 | |
| 764 | // Throw trap to ERRORSTEER |
| 765 | trapToErrorSteer(strand, paddr.getBANK(), |
| 766 | SS_Interrupt::BIT_HW_CORRECTED_ERROR); |
| 767 | } |
| 768 | } |
| 769 | return diagVD; |
| 770 | } |
| 771 | |
| 772 | |
| 773 | // L2FindWay() searches the L2$ to see if any cache lines match the |
| 774 | // passed address. L2FindWay() also checks the valid bits in cache |
| 775 | // set's VuaD information to make sure the line is valid. |
| 776 | |
| 777 | int |
| 778 | N2_MemErrDetector::L2FindWay(N2_L2AddressingFields paddr, |
| 779 | N2_L2DiagVdMemWithECC diagVD) |
| 780 | { |
| 781 | int hit_way = NO_WAY; // assume miss |
| 782 | for (int way = 0; way < (1<<N2_L2DiagDataAddressingFields::bitSizeWAY); |
| 783 | way++) { |
| 784 | uint32_t diagNdx = paddrToWaySetBankNdx(paddr, way); |
| 785 | N2_L2DiagTagMem diagTag; |
| 786 | |
| 787 | L2DiagTagMemAccess_.access(diagNdx, diagTag, true); |
| 788 | |
| 789 | if (diagTag.getTAG() == paddr.getTAG()) { |
| 790 | if (diagVD.getVALID() & (1<<way)) { |
| 791 | hit_way = way; |
| 792 | } |
| 793 | } |
| 794 | } |
| 795 | return hit_way; |
| 796 | } |
| 797 | |
| 798 | |
| 799 | // L2CacheMiss() processes an L2$ miss. Selects a way to invalidate |
| 800 | // (or victimize) using the per bank Not Recently Used pointer, |
| 801 | // L2CacheWaysNRUPtr[]. If the way's cache line is dirty, update the |
| 802 | // Chip-Kill ECC for line flushed to memory. If the memory transaction |
| 803 | // is a write, mark the line as dirty. |
| 804 | // |
| 805 | // The L2$ cache line's data is read from memory (using Chip-Kill |
| 806 | // correction) and its data ECC is calcuated and saved. |
| 807 | // |
| 808 | // Finally, dramProcessMemOp() is called to detect Chip-Kill errors. |
| 809 | // If the memory Chip-Kill line is poisoned (NotData), then the current |
| 810 | // L2$ line is poisoned as well. |
| 811 | // |
| 812 | // If the miss detects poison, L2CacheMiss() returns a trap number, |
| 813 | // else 0. |
| 814 | |
| 815 | SS_Trap::Type |
| 816 | N2_MemErrDetector::L2CacheMiss(N2_Strand *strand, |
| 817 | const MemoryTransaction &memXact, |
| 818 | N2_L2DiagVdMemWithECC &diagVD) |
| 819 | { |
| 820 | N2_L2AddressingFields paddr; |
| 821 | paddr.setNative(memXact.getPaddr()); |
| 822 | uint32_t bank = paddr.getBANK(); |
| 823 | int way = L2CacheWaysNRUPtr_[bank]; |
| 824 | |
| 825 | // flush line if dirty, checking data ECC -- and clear dirty bit |
| 826 | // If ECC error, throw trap to ERRORSTEER. |
| 827 | uint32_t valid = diagVD.getVALID(); |
| 828 | uint32_t dirty = diagVD.getDIRTY(); |
| 829 | if (dirty & (1<<way)) { |
| 830 | // Update DRAM's ECC for this cache line |
| 831 | // Note that the flushed line has a different physical address |
| 832 | // than the original memory transaction. |
| 833 | N2_L2AddressingFields writeBackPaddr; |
| 834 | |
| 835 | writeBackPaddr.setBANK(bank); |
| 836 | writeBackPaddr.setSET(paddr.getSET()); |
| 837 | uint64_t writeBackDiagNdx = |
| 838 | paddrToWaySetBankNdx(writeBackPaddr, way); |
| 839 | N2_L2DiagTagMem writeBackDiagTag; |
| 840 | |
| 841 | L2DiagTagMemAccess_.access(writeBackDiagNdx, writeBackDiagTag, |
| 842 | true); |
| 843 | writeBackPaddr.setTAG(writeBackDiagTag.getTAG()); |
| 844 | |
| 845 | // flush line if dirty, checking data ECC -- and clear dirty bit |
| 846 | // If ECC error, throw trap to ERRORSTEER. |
| 847 | N2_L2CacheLineError lineError = L2ProcessCacheLine(writeBackPaddr, way, false); |
| 848 | |
| 849 | // Throw a trap on all bad ECC |
| 850 | if (lineError.isError() && !lineError.isNotData()) { |
| 851 | ThrowL2DataWriteBackTrap(memXact, strand, bank, lineError); |
| 852 | } |
| 853 | dirty &= ~(1<<way); |
| 854 | |
| 855 | // Truncate to memory cache line alignemnt |
| 856 | writeBackPaddr.setNative(writeBackPaddr.getNative() & |
| 857 | ~(SS_CKMemory::DRAM_LINE_LENGTH - 1)); |
| 858 | dramUpdateECC(writeBackPaddr, lineError.isNotData()); |
| 859 | } |
| 860 | valid |= (1<<way); // line is valid |
| 861 | // if write or read-write, set dirty bit |
| 862 | if (memXact.writeXact()) { |
| 863 | dirty |= (1<<way); |
| 864 | // If debugging chip-kill, set ECC for every write |
| 865 | if (debugChipKill_) { |
| 866 | N2_L2AddressingFields tmpPaddr; |
| 867 | tmpPaddr.setNative(paddr.getNative() & |
| 868 | ~(SS_CKMemory::DRAM_LINE_LENGTH - 1)); |
| 869 | dramUpdateECC(tmpPaddr, false); |
| 870 | } |
| 871 | } |
| 872 | |
| 873 | diagVD.setVALID(valid); |
| 874 | diagVD.setDIRTY(dirty); |
| 875 | diagVD.setVDECC(diagVD.calcECC()); |
| 876 | uint32_t setBankNdx = paddrToSetBankNdx(paddr); |
| 877 | L2DiagVdMemAccess_.access(setBankNdx, diagVD, false); |
| 878 | |
| 879 | // fetch line from memory |
| 880 | (void)L2ProcessCacheLine(paddr, way, true); |
| 881 | |
| 882 | // update tag with filled line |
| 883 | uint32_t diagNdx = paddrToWaySetBankNdx(paddr, way); |
| 884 | N2_L2DiagTagMem diagTag; |
| 885 | |
| 886 | diagTag.setTAG(paddr.getTAG()); |
| 887 | diagTag.setECC((BL_Hamming_22_6_Synd::calc_check_bits(diagTag.getTAG())).get()); |
| 888 | |
| 889 | L2DiagTagMemAccess_.access(diagNdx, diagTag, false); |
| 890 | |
| 891 | // advance bank's NRU pointer |
| 892 | L2CacheWaysNRUPtr_[bank] = ((way+1) % |
| 893 | (1<<N2_L2DiagDataAddressingFields::bitSizeWAY)); |
| 894 | |
| 895 | // Truncate to memory cache line alignemnt |
| 896 | paddr.setNative(paddr.getNative() & ~(SS_CKMemory::DRAM_LINE_LENGTH - 1)); |
| 897 | // Check DRAM's ECC; poison L2$ line and primary $ line if uncorrectable |
| 898 | bool isUncorrectable = false; |
| 899 | SS_Trap::Type trap = dramProcessMemOp(strand, memXact, paddr,isUncorrectable); |
| 900 | if(trap != SS_Trap::NO_TRAP) |
| 901 | return trap; |
| 902 | if(isUncorrectable){ |
| 903 | poisonL2Line(paddr, way); |
| 904 | } |
| 905 | |
| 906 | return SS_Trap::NO_TRAP; |
| 907 | } |
| 908 | |
| 909 | // L2CacheHit() processes an L2$ hit. If the memory transaction |
| 910 | // is a write, marks the line as dirty. |
| 911 | // |
| 912 | // Verifies the cache line's ECC with L2ProcessCacheLine(). If line |
| 913 | // has an ECC error, ThrowL2DataTrap() is called to cough up the |
| 914 | // appropriate hairball (i.e. throw the correct ECC trap). |
| 915 | // |
| 916 | // If cache line contains NotData, L2CacheHit() returns trap number to |
| 917 | // throw, else 0 |
| 918 | |
| 919 | SS_Trap::Type |
| 920 | N2_MemErrDetector::L2CacheHit(N2_Strand *strand, |
| 921 | const MemoryTransaction &memXact, |
| 922 | N2_L2DiagVdMemWithECC &diagVD, |
| 923 | int hit_way) |
| 924 | { |
| 925 | N2_L2AddressingFields paddr; |
| 926 | paddr.setNative(memXact.getPaddr()); |
| 927 | |
| 928 | // if write or read-write, set dirty bit |
| 929 | if (memXact.writeXact()) { |
| 930 | uint32_t dirty = diagVD.getDIRTY(); |
| 931 | dirty |= (1<<hit_way); |
| 932 | diagVD.setDIRTY(dirty); |
| 933 | diagVD.setVDECC(diagVD.calcECC()); |
| 934 | |
| 935 | L2DiagVdMemAccess_.access(paddrToSetBankNdx(paddr), diagVD, false); |
| 936 | /* update the data ecc for the complete cache line*/ |
| 937 | L2ProcessCacheLine(paddr, hit_way, true); |
| 938 | } |
| 939 | else{ |
| 940 | // verify data ECC |
| 941 | N2_L2CacheLineError lineError = |
| 942 | L2ProcessCacheLine(paddr, hit_way, false); |
| 943 | if (lineError.isError()) { |
| 944 | return ThrowL2DataTrap(memXact, strand, paddr.getBANK(), lineError); |
| 945 | } |
| 946 | } |
| 947 | |
| 948 | // Do we need to access DRAM for READ_WRITE to check ECC? We |
| 949 | // don't need to actually write to memory here. The |
| 950 | // memXact.access() routine, which has called us, handles that. |
| 951 | return SS_Trap::NO_TRAP; |
| 952 | } |
| 953 | |
| 954 | |
| 955 | // L2ProcessCacheLine() either reads a cache line, calculating its ECC |
| 956 | // or verifies a cache line's ECC. |
| 957 | // |
| 958 | // If verifying the ECC for a cache line and an ECC error is found, it |
| 959 | // returns the ECC syndromes for the first quarterline where an error occurs. |
| 960 | |
| 961 | N2_MemErrDetector::N2_L2CacheLineError |
| 962 | N2_MemErrDetector::L2ProcessCacheLine(N2_L2AddressingFields paddr, |
| 963 | uint32_t way, |
| 964 | bool isRead) |
| 965 | { |
| 966 | // mask out LSB's to set paddr to the beginning of the cache |
| 967 | // quarterline |
| 968 | paddr.setNative(paddr.getNative() & ~(N2_L2_CACHE_LINE_SIZE/4-1)); |
| 969 | |
| 970 | N2_L2CacheLineError returnError(paddr.getNative()); |
| 971 | |
| 972 | for (int i = 0; i < 4; ++i) { |
| 973 | N2_L2AddressingFields quarterLinePaddr; |
| 974 | quarterLinePaddr.setNative(paddr.getNative() | |
| 975 | (i*N2_L2_CACHE_LINE_SIZE/4) % N2_L2_CACHE_LINE_SIZE); |
| 976 | N2_L2CacheLineError qLineError = |
| 977 | L2ProcessQuarterLine(quarterLinePaddr, way, isRead); |
| 978 | if(qLineError.isError()){ |
| 979 | RAS_OSTR << "L2ProcessCacheLine: qline syndrome 0x" << |
| 980 | hex << qLineError.qLineSyndrome() << |
| 981 | " paddr 0x" << hex << qLineError.errorPaddr() << endl; |
| 982 | } |
| 983 | if (qLineError.isError() && !returnError.isError()) { |
| 984 | returnError = qLineError; |
| 985 | } |
| 986 | } |
| 987 | return returnError; |
| 988 | } |
| 989 | |
| 990 | |
| 991 | // L2ProcessQuarterLine() either reads a quarter of a cache line, |
| 992 | // calculating its ECC or verifies a quarter cache line's ECC. |
| 993 | // |
| 994 | // If verifying the ECC for a cache line and an ECC error is found, it |
| 995 | // returns the ECC syndromes for this quarterline. |
| 996 | |
| 997 | N2_MemErrDetector::N2_L2CacheLineError |
| 998 | N2_MemErrDetector::L2ProcessQuarterLine(N2_L2AddressingFields paddr, |
| 999 | uint32_t way, |
| 1000 | bool isRead) |
| 1001 | { |
| 1002 | if (paddr.getNative() % (N2_L2_CACHE_LINE_SIZE/4)){ |
| 1003 | fprintf(stderr,"L2ProcessQuarterLine: bad paddr"); |
| 1004 | exit(-1); |
| 1005 | } |
| 1006 | |
| 1007 | int word = 0; |
| 1008 | N2_L2CacheLineError l2CacheLineError(paddr.getNative()); |
| 1009 | |
| 1010 | // words are 64-bits in N2 land -- at least in PRM Rev 1.1 Tbl 28-43. |
| 1011 | // There are two 64-bit "words" in a quarter cache line |
| 1012 | for (word = 0; word < (N2_L2_CACHE_LINE_SIZE/4)/sizeof(double); ++word) { |
| 1013 | N2_L2DiagDataAddressingFields diagAddr; |
| 1014 | |
| 1015 | diagAddr.setBANK(paddr.getBANK()); |
| 1016 | diagAddr.setWAY(way); |
| 1017 | diagAddr.setWORD(paddr.getWORD() + word); |
| 1018 | diagAddr.setSET(paddr.getSET()); |
| 1019 | diagAddr.setODDEVEN(0); |
| 1020 | |
| 1021 | uint32_t hi_data, lo_data; |
| 1022 | |
| 1023 | if (isRead) { |
| 1024 | // get the cache line from memory |
| 1025 | uint64_t data = ((SS_CKMemory*)(n2_model->cpu[0]->strand[0]->memory))->peek8u(paddr.getNative()); |
| 1026 | hi_data = (data >> 32) & 0xffffffff; |
| 1027 | lo_data= data & 0xffffffff; |
| 1028 | } |
| 1029 | N2_L2DiagDataMemWithECC diagData; |
| 1030 | uint32_t diagNdx = // |
| 1031 | diagAddr.getNative()/N2_L2DiagDataMemWithECC::Stride; |
| 1032 | |
| 1033 | if (isRead) { |
| 1034 | diagData.setDATA(lo_data); |
| 1035 | diagData.setECC(diagData.calcECC()); |
| 1036 | // set even half of 64-bit word |
| 1037 | L2DiagDataMemAccess_.access(diagNdx, diagData, false); |
| 1038 | } else { |
| 1039 | L2DiagDataMemAccess_.access(diagNdx, diagData, true); |
| 1040 | l2CacheLineError.addQuarterLine(diagData.getSyndrome()); |
| 1041 | } |
| 1042 | |
| 1043 | diagAddr.setODDEVEN(1); |
| 1044 | diagNdx = diagAddr.getNative()/N2_L2DiagDataMemWithECC::Stride; |
| 1045 | if (isRead) { |
| 1046 | diagData.setDATA(hi_data); |
| 1047 | diagData.setECC(diagData.calcECC()); |
| 1048 | // set odd half of 64-bit word |
| 1049 | L2DiagDataMemAccess_.access(diagNdx, diagData, false); |
| 1050 | } else { |
| 1051 | L2DiagDataMemAccess_.access(diagNdx, diagData, true); |
| 1052 | l2CacheLineError.addQuarterLine(diagData.getSyndrome()); |
| 1053 | } |
| 1054 | } |
| 1055 | return l2CacheLineError; |
| 1056 | } |
| 1057 | |
| 1058 | |
| 1059 | // ThrowL2DataTrap() handles the details of setting status registers |
| 1060 | // and conditionally throwing the right trap. |
| 1061 | // |
| 1062 | // Returns trap number to throw if the cache line contains NotData |
| 1063 | // (i.e. poison). |
| 1064 | |
| 1065 | SS_Trap::Type |
| 1066 | N2_MemErrDetector::ThrowL2DataTrap(const MemoryTransaction &memXact, |
| 1067 | N2_Strand *strand, |
| 1068 | uint32_t bank, |
| 1069 | N2_L2CacheLineError lineError) |
| 1070 | { |
| 1071 | if (lineError.isCorrectable()) { |
| 1072 | ThrowL2DataCorrectableTrap(memXact, strand, bank, lineError); |
| 1073 | return SS_Trap::NO_TRAP; |
| 1074 | } else { |
| 1075 | return ThrowL2DataUncorrectableTrap(memXact, strand, bank, lineError); |
| 1076 | } |
| 1077 | } |
| 1078 | |
| 1079 | // ThrowL2DataCorrectableTrap() throws the correct disrupting trap |
| 1080 | // after setting the correct bits in various error status registers. |
| 1081 | // |
| 1082 | // This routine is quite meticulous as the memory transaction, the |
| 1083 | // error conditions, processor state, and, even it might seem, the |
| 1084 | // phase of the moon, influence the behavior of the trap processing. |
| 1085 | |
| 1086 | void |
| 1087 | N2_MemErrDetector::ThrowL2DataCorrectableTrap(const MemoryTransaction &memXact, |
| 1088 | N2_Strand *strand, |
| 1089 | uint32_t bank, |
| 1090 | N2_L2CacheLineError lineError) |
| 1091 | { |
| 1092 | N2_CererWithBitMux cerer(strand->core.cerer); |
| 1093 | bool itablewalk = ((memXact.referenceType() == MemoryTransaction::INSTR) && memXact.tablewalk()); |
| 1094 | // Handle L2$ data ECC errors during I-tlb or D-tlb tablewalk |
| 1095 | if (memXact.tablewalk()) { |
| 1096 | // Set in L2_ERROR_STATUS register and set paddr[39:6] for the bad |
| 1097 | // quarter line in L2_ERROR_ADDRESS register |
| 1098 | setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setLDAC, |
| 1099 | true, |
| 1100 | strand->core_id(), |
| 1101 | lineError.qLineSyndrome(), |
| 1102 | lineError.errorPaddr()); |
| 1103 | |
| 1104 | // Set error information in DESR |
| 1105 | if (getCEEN(bank) && cerer.hwtwl2()) { |
| 1106 | N2_Strand *target_strand = (N2_Strand*)n2_model->cpu[0]->strand[getErrorSteer(bank)]; |
| 1107 | setDESR(target_strand->strand_id(),true, itablewalk?N2_Desr::RE_ITL2C : N2_Desr::RE_DTL2C,0); |
| 1108 | // and throw disrupting SW_RECOVERABLE_ERROR trap |
| 1109 | if(strand->seter.de()) |
| 1110 | strand->irq.raise(target_strand,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR); |
| 1111 | |
| 1112 | } |
| 1113 | } |
| 1114 | // Handle L2$ data ECC errors during instruction fetch |
| 1115 | else if (memXact.readXact() && |
| 1116 | memXact.referenceType() == MemoryTransaction::INSTR) { |
| 1117 | // Set in L2_ERROR_STATUS register and paddr[39:6] for the bad |
| 1118 | // quarter line in L2_ERROR_ADDRESS register |
| 1119 | setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setLDAC, |
| 1120 | true, |
| 1121 | strand->core_id(), |
| 1122 | lineError.qLineSyndrome(), |
| 1123 | lineError.errorPaddr()); |
| 1124 | |
| 1125 | // Do we set error information in DESR and throw a trap? |
| 1126 | if (getCEEN(bank) && cerer.checkOneL2Cbit(memXact)) { |
| 1127 | N2_Strand *target_strand = (N2_Strand*)n2_model->cpu[0]->strand[getErrorSteer(bank)]; |
| 1128 | setDESR(target_strand->strand_id(),true,N2_Desr::RE_ICL2C,0); |
| 1129 | if(strand->seter.de()) |
| 1130 | strand->irq.raise(target_strand,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR); |
| 1131 | } |
| 1132 | } |
| 1133 | // Handle L2$ data ECC errors during data read or partial store(TODO) |
| 1134 | // This also covers Atomic Hits in Sect 12.9.1.6 because are |
| 1135 | // issued as a READ memXact followed by a WRITE memXact, with the |
| 1136 | // atomic bit set for both. |
| 1137 | else if (memXact.referenceType() == MemoryTransaction::DATA && |
| 1138 | memXact.readXact()) { |
| 1139 | // Set in L2_ERROR_STATUS register and paddr[39:6] for the bad |
| 1140 | // quarter line in L2_ERROR_ADDRESS register |
| 1141 | setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setLDAC, |
| 1142 | true, |
| 1143 | strand->core_id(), |
| 1144 | lineError.qLineSyndrome(), |
| 1145 | lineError.errorPaddr()); |
| 1146 | |
| 1147 | // If CEEN set in L2_ERROR_ENABLE, throw trap to ERRORSTEER |
| 1148 | if (getCEEN(bank) && cerer.checkOneL2Cbit(memXact)) { |
| 1149 | N2_Strand *target_strand = (N2_Strand*)n2_model->cpu[0]->strand[getErrorSteer(bank)]; |
| 1150 | |
| 1151 | if (memXact.writeXact()) { |
| 1152 | // partial store |
| 1153 | setDESR(target_strand->strand_id(),false,N2_Desr::CE_L2C,0); |
| 1154 | if(strand->seter.dhcce()) |
| 1155 | strand->irq.raise(target_strand,SS_Interrupt::BIT_HW_CORRECTED_ERROR); |
| 1156 | } |
| 1157 | else { |
| 1158 | // data read |
| 1159 | setDESR(target_strand->strand_id(),true,N2_Desr::RE_DCL2C,0); |
| 1160 | if(strand->seter.de()) |
| 1161 | strand->irq.raise(target_strand,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR); |
| 1162 | } |
| 1163 | } |
| 1164 | } |
| 1165 | // Writes just set the ECC for quarter line. |
| 1166 | // so they can't throw traps. |
| 1167 | else if (memXact.referenceType() == MemoryTransaction::DATA && |
| 1168 | memXact.writeXact()) { |
| 1169 | return; |
| 1170 | } |
| 1171 | else { |
| 1172 | fprintf(stderr,"N2_MemErrDetector::" |
| 1173 | "ThrowL2DataCorrectableTrap(): " |
| 1174 | "unknown MemoryTranaction type"); |
| 1175 | exit(-1); |
| 1176 | } |
| 1177 | } |
| 1178 | |
| 1179 | |
| 1180 | // ThrowL2DataUncorrectableTrap() throws the correct disrupting trap |
| 1181 | // after setting the correct bits in various error status registers. |
| 1182 | // |
| 1183 | // As before, this routine is quite meticulous as each of the memory |
| 1184 | // transaction, the error conditions, processor state, and, even it |
| 1185 | // might seem, the phase of the moon, influence the behavior of the |
| 1186 | // trap processing. |
| 1187 | // |
| 1188 | // Returns precise trap number to throw, if needed. |
| 1189 | |
| 1190 | SS_Trap::Type |
| 1191 | N2_MemErrDetector::ThrowL2DataUncorrectableTrap( |
| 1192 | const MemoryTransaction &memXact, |
| 1193 | N2_Strand *strand, |
| 1194 | uint32_t bank, |
| 1195 | N2_L2CacheLineError lineError) |
| 1196 | { |
| 1197 | N2_CererWithBitMux cerer(strand->core.cerer); |
| 1198 | N2_InstSfsr *isfsr = &(strand->inst_sfsr); |
| 1199 | N2_DataSfsr *dsfsr = &(strand->data_sfsr); |
| 1200 | N2_DataSfar *dsfar = &(strand->data_sfar); |
| 1201 | SS_Trap::Type trapNumber = SS_Trap::NO_TRAP; |
| 1202 | bool itablewalk = ((memXact.referenceType() == MemoryTransaction::INSTR) && memXact.tablewalk()); |
| 1203 | |
| 1204 | // Handle L2$ data ECC errors during I-tlb or D-tlb tablewalk |
| 1205 | if (memXact.tablewalk()) { |
| 1206 | // Is this error NotData? |
| 1207 | if (!lineError.isNotData()) { |
| 1208 | // Set the L2_ERROR_STATUS register and paddr[39:6] for the bad |
| 1209 | // quarter line in L2_ERROR_ADDRESS register |
| 1210 | setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setLDAU, |
| 1211 | false, |
| 1212 | strand->core_id(), |
| 1213 | lineError.qLineSyndrome(), |
| 1214 | lineError.errorPaddr()); |
| 1215 | } |
| 1216 | else { |
| 1217 | // Set the L2_NOTDATA_STATUS register |
| 1218 | setL2NotdataErrorReg(bank,strand->core_id(), |
| 1219 | lineError.errorPaddr()); |
| 1220 | |
| 1221 | } |
| 1222 | // Set error information in ISFAR or DSFAR and DSFAR |
| 1223 | if (getNCEEN(bank) && cerer.hwtwl2()) { |
| 1224 | if (!lineError.isNotData()) { |
| 1225 | // If PSCCE set, throw trap |
| 1226 | if (strand->seter.pscce()) { |
| 1227 | if (itablewalk) { |
| 1228 | isfsr->error_type(N2_InstSfsr::ITL2U); |
| 1229 | return SS_Trap::INSTRUCTION_ACCESS_MMU_ERROR; |
| 1230 | } else { |
| 1231 | dsfsr->error_type(N2_DataSfsr::DTL2U); |
| 1232 | dsfar->error_addr(memXact.getVaddr()); |
| 1233 | return SS_Trap::DATA_ACCESS_MMU_ERROR; |
| 1234 | } |
| 1235 | } |
| 1236 | } |
| 1237 | else { |
| 1238 | // report NotData |
| 1239 | // If PSCCE set, throw trap |
| 1240 | |
| 1241 | if (strand->seter.pscce()) { |
| 1242 | if (itablewalk) { |
| 1243 | isfsr->error_type(N2_InstSfsr::ITL2ND); |
| 1244 | trapNumber = SS_Trap::INSTRUCTION_ACCESS_MMU_ERROR; |
| 1245 | } else { |
| 1246 | dsfsr->error_type(N2_DataSfsr::DTL2ND); |
| 1247 | trapNumber = SS_Trap::DATA_ACCESS_MMU_ERROR; |
| 1248 | } |
| 1249 | } |
| 1250 | } |
| 1251 | } |
| 1252 | } |
| 1253 | // Handle L2$ data ECC errors during instruction fetch |
| 1254 | else if (memXact.readXact() && |
| 1255 | memXact.referenceType() == MemoryTransaction::INSTR) { |
| 1256 | bool notData = lineError.isNotData(); |
| 1257 | bool cererSet = notData ? cerer.icl2nd() : cerer.icl2u(); |
| 1258 | |
| 1259 | if (notData) { |
| 1260 | // Set the L2_NOTDATA_STATUS register |
| 1261 | setL2NotdataErrorReg(bank, |
| 1262 | strand->core_id(), |
| 1263 | lineError.errorPaddr()); |
| 1264 | } |
| 1265 | else{ |
| 1266 | // Set in L2_ERROR_STATUS registerand paddr[39:6] for the bad |
| 1267 | // quarter line in L2_ERROR_ADDRESS register |
| 1268 | setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setLDAU, |
| 1269 | false, |
| 1270 | strand->core_id(), |
| 1271 | lineError.qLineSyndrome(), |
| 1272 | lineError.errorPaddr()); |
| 1273 | } |
| 1274 | |
| 1275 | if (cererSet) { |
| 1276 | if (notData) { |
| 1277 | if (getNCEEN(bank) && |
| 1278 | strand->seter.pscce()) { |
| 1279 | // Set error in ISFSR |
| 1280 | isfsr->error_type(N2_InstSfsr::ICL2ND); |
| 1281 | trapNumber = SS_Trap::INSTRUCTION_ACCESS_ERROR; |
| 1282 | } |
| 1283 | } |
| 1284 | else { |
| 1285 | // If NCEEN and PSCCE set, throw trap |
| 1286 | if (getNCEEN(bank) && strand->seter.pscce()) { |
| 1287 | // Set error in ISFSR |
| 1288 | isfsr->error_type(N2_InstSfsr::ICL2U); |
| 1289 | return SS_Trap::INSTRUCTION_ACCESS_ERROR; |
| 1290 | } |
| 1291 | } |
| 1292 | } |
| 1293 | } |
| 1294 | // Handle L2$ data ECC errors during data read. |
| 1295 | // |
| 1296 | // This also covers Atomic Hits in Sect 12.9.7.6 because are |
| 1297 | // issued as a READ memXact followed by a WRITE memXact, with the |
| 1298 | // atomic bit set for both. |
| 1299 | // However, the write (following this read) updates memory (hard to stop, |
| 1300 | // given the current Riesling implementation, do we need to for RUST? |
| 1301 | else if (memXact.readXact() && |
| 1302 | memXact.referenceType() == MemoryTransaction::DATA) { |
| 1303 | bool notData = lineError.isNotData(); |
| 1304 | bool cererSet = notData ? cerer.dcl2u() : cerer.dcl2nd(); |
| 1305 | |
| 1306 | if (notData) { |
| 1307 | // Set the L2_NOTDATA_STATUS register |
| 1308 | setL2NotdataErrorReg(bank, |
| 1309 | strand->core_id(), |
| 1310 | lineError.errorPaddr()); |
| 1311 | } |
| 1312 | else{ |
| 1313 | // Set in L2_ERROR_STATUS register and paddr[39:6] for the bad |
| 1314 | // quarter line in L2_ERROR_ADDRESS register |
| 1315 | setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setLDAU, |
| 1316 | false, |
| 1317 | strand->core_id(), |
| 1318 | lineError.qLineSyndrome(), |
| 1319 | lineError.errorPaddr()); |
| 1320 | } |
| 1321 | if (cererSet) { |
| 1322 | if (notData) { |
| 1323 | // If NCEEN and PSCCE set, throw trap |
| 1324 | if (getNCEEN(bank) && |
| 1325 | strand->seter.pscce()) { |
| 1326 | // Set error in DSFSR |
| 1327 | dsfsr->error_type(N2_DataSfsr::DCL2ND); |
| 1328 | trapNumber = SS_Trap::DATA_ACCESS_ERROR; |
| 1329 | } |
| 1330 | } |
| 1331 | else { |
| 1332 | // If NCEEN and PSCCE set, throw trap |
| 1333 | if (getNCEEN(bank) && |
| 1334 | strand->seter.pscce()) { |
| 1335 | // Set error in DSFSR |
| 1336 | dsfsr->error_type(N2_DataSfsr::DCL2U); |
| 1337 | return SS_Trap::DATA_ACCESS_ERROR; |
| 1338 | } |
| 1339 | } |
| 1340 | } |
| 1341 | } |
| 1342 | // Handle L2$ partial stores TODO |
| 1343 | #if 0 |
| 1344 | else if (memXact.writeXact() && |
| 1345 | memXact.referenceType() == MemoryTransaction::DATA |
| 1346 | ) { |
| 1347 | // Set in L2_ERROR_STATUS register |
| 1348 | // Don't set L2 Error Address reg, per N2 PRM Rev 1.1 12.9.7.7. |
| 1349 | setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setLDAU, |
| 1350 | false, |
| 1351 | strand->core_id(), |
| 1352 | lineError.qLineSyndrome(), 0); |
| 1353 | |
| 1354 | // Again, what about bad parity? |
| 1355 | |
| 1356 | // If NCEEN set in L2_ERROR_ENABLE, etc., throw trap |
| 1357 | if (getNCEEN(bank) && cerer.dcl2u()) { |
| 1358 | // Set error in DESR |
| 1359 | // NB: this is dependent on NCEEN and DCL2U, different |
| 1360 | // from everywhere else |
| 1361 | N2_Strand *target_strand = (N2_Strand*)n2_model->cpu[0]->strand[getErrorSteer(bank)]; |
| 1362 | setDESR(target_strand->strand_id(),false,N2_Desr::RE_L2U,0); |
| 1363 | return SS_Trap::DATA_ACCESS_ERROR; |
| 1364 | } |
| 1365 | } |
| 1366 | #endif |
| 1367 | // Writes just set the ECC for quarter line. |
| 1368 | // so they can't throw traps. |
| 1369 | else if (memXact.referenceType() == MemoryTransaction::DATA && |
| 1370 | memXact.writeXact()) { |
| 1371 | return SS_Trap::NO_TRAP; |
| 1372 | } |
| 1373 | else { |
| 1374 | fprintf(stderr,"N2_MemErrDetector::" |
| 1375 | "ThrowL2DataUncorrectableTrap(): " |
| 1376 | "unknown MemoryTranaction type"); |
| 1377 | exit(-1); |
| 1378 | } |
| 1379 | |
| 1380 | return trapNumber; |
| 1381 | } |
| 1382 | |
| 1383 | |
| 1384 | // poisonL2Line() sets the ECC values for all the words in an L2$ line |
| 1385 | // to NotData. The line is selected by the bank and index (aka set) |
| 1386 | // values in 'paddr' and the set's way in 'way'. |
| 1387 | |
| 1388 | void |
| 1389 | N2_MemErrDetector::poisonL2Line(N2_L2AddressingFields paddr, int way) |
| 1390 | { |
| 1391 | |
| 1392 | for (int word = 0; |
| 1393 | word < N2_L2_CACHE_LINE_SIZE/sizeof(double); |
| 1394 | ++word) { |
| 1395 | N2_L2DiagDataAddressingFields diagAddr; |
| 1396 | |
| 1397 | diagAddr.setBANK(paddr.getBANK()); |
| 1398 | diagAddr.setWAY(way); |
| 1399 | diagAddr.setWORD(paddr.getWORD() + word); |
| 1400 | diagAddr.setSET(paddr.getSET()); |
| 1401 | diagAddr.setODDEVEN(0); |
| 1402 | |
| 1403 | uint32_t diagNdx = |
| 1404 | diagAddr.getNative()/N2_L2DiagDataMemWithECC::Stride; |
| 1405 | L2DiagDataMemAccess_. |
| 1406 | set(diagNdx, N2_L2DiagDataMem::setECC, |
| 1407 | N2_L2DiagDataMemWithECC::L2_NOT_DATA); |
| 1408 | |
| 1409 | diagAddr.setODDEVEN(1); |
| 1410 | diagNdx = diagAddr.getNative()/N2_L2DiagDataMemWithECC::Stride; |
| 1411 | L2DiagDataMemAccess_. |
| 1412 | set(diagNdx, N2_L2DiagDataMem::setECC, |
| 1413 | N2_L2DiagDataMemWithECC::L2_NOT_DATA); |
| 1414 | } |
| 1415 | } |
| 1416 | |
| 1417 | |
| 1418 | // ThrowL2DataWriteBackTrap() sets the various error status registers |
| 1419 | // and throws the appropriate disrupting trap to the ERRROSTEER strand |
| 1420 | // for the bank number found in the original memory transaction that |
| 1421 | // causes the cache line writeback. |
| 1422 | |
| 1423 | void |
| 1424 | N2_MemErrDetector::ThrowL2DataWriteBackTrap(const MemoryTransaction &memXact, |
| 1425 | N2_Strand *strand, |
| 1426 | uint32_t bank, |
| 1427 | N2_L2CacheLineError lineError) |
| 1428 | { |
| 1429 | // Don't trap on NotData N2 1.1 PRM Sect 12.9.16 |
| 1430 | if (lineError.isNotData()) { |
| 1431 | return; |
| 1432 | } |
| 1433 | |
| 1434 | uint32_t strandId = getErrorSteer(bank); |
| 1435 | |
| 1436 | // Set in L2_ERROR_STATUS register and paddr[39:6] for the bad |
| 1437 | // quarter line in L2_ERROR_ADDRESS register |
| 1438 | ErrorStatusRegBitSetFn bitSetFcn = lineError.isCorrectable() ? |
| 1439 | N2_L2ErrorStatusReg::setLDWC : N2_L2ErrorStatusReg::setLDWU; |
| 1440 | setL2ErrorStatusReg(bank, bitSetFcn, lineError.isCorrectable(), 0, 0, |
| 1441 | lineError.errorPaddr()); |
| 1442 | |
| 1443 | |
| 1444 | // Set error information in DESR |
| 1445 | N2_CererWithBitMux cerer(strand->core.cerer); |
| 1446 | if (lineError.isCorrectable()) { // is there hope? |
| 1447 | if(getCEEN(bank) && cerer.l2c_socc()/*cerer.checkOneL2Cbit(memXact)*/){ |
| 1448 | setDESR(strandId,false, N2_Desr::CE_L2C,0); |
| 1449 | trapToErrorSteer(strand, bank, SS_Interrupt::BIT_HW_CORRECTED_ERROR); |
| 1450 | } |
| 1451 | } |
| 1452 | else { // nope... |
| 1453 | if (getNCEEN(bank) && cerer.l2u_socu() ) { |
| 1454 | setDESR(strandId,true, N2_Desr::RE_L2U,0); |
| 1455 | trapToErrorSteer(strand, bank, SS_Interrupt::BIT_SW_RECOVERABLE_ERROR); |
| 1456 | } |
| 1457 | } |
| 1458 | } |
| 1459 | |
| 1460 | |
| 1461 | // This routine implements DRAM RAS error detection and injection. |
| 1462 | // |
| 1463 | // Assumption: All errors are detected during critical data load only |
| 1464 | // and reported prior to linefill. This routine does not model the |
| 1465 | // scrubbing mechanism. Also does not detect errors that might arise |
| 1466 | // in the FBDIMM channel. |
| 1467 | // |
| 1468 | // Returns true if the Dram read accesses a uncorrectable Chip-Kill error |
| 1469 | // |
| 1470 | // NB: This routine is missing the hooks for MBR*ECC and MBR*FBR SOC errors. |
| 1471 | SS_Trap::Type |
| 1472 | N2_MemErrDetector::dramProcessMemOp(N2_Strand *strand, |
| 1473 | const MemoryTransaction &memXact, |
| 1474 | N2_L2AddressingFields paddress,bool &isUncorrectable) |
| 1475 | { |
| 1476 | isUncorrectable = false; |
| 1477 | |
| 1478 | // Extracting necessary information from input |
| 1479 | uint64_t paddr = paddress.getNative(); |
| 1480 | |
| 1481 | // Verify if PA is 16B Aligned |
| 1482 | if ((paddr % SS_CKMemory::DRAM_LINE_LENGTH) != 0) { |
| 1483 | fprintf(stderr,"N2_MemErrDetector::dramProcessMemOp(): " |
| 1484 | "misaligned address."); |
| 1485 | exit(-1); |
| 1486 | } |
| 1487 | |
| 1488 | // MCU ID is determined from bits 7:6 of the PA |
| 1489 | uint32_t mcuID = bit_shift(paddr, N2_DRAM_PADDR_MCU_SHIFT, |
| 1490 | N2_DRAM_PADDR_NR_MCU_LOG2); |
| 1491 | uint32_t bank = paddress.getBANK(); |
| 1492 | N2_Cerer *cerer = &(strand->core.cerer); |
| 1493 | SS_CKMemory *ck_memory=((SS_CKMemory*)(n2_model->cpu[0]->strand[0]->memory)); |
| 1494 | |
| 1495 | // DRAM Error Detection and Handling |
| 1496 | // Detect ECC error |
| 1497 | // If the paddr has an entry in the ECC Map |
| 1498 | if (ck_memory->ecc_exists(paddr)) { |
| 1499 | BL_CKSyndrome ck_syndrome(ck_memory->read_raw_CK_line(paddr), ck_memory->fetch_ecc(paddr)); |
| 1500 | |
| 1501 | RAS_OSTR << "N2_MemErrDetector::dramProcessMemOp: " << |
| 1502 | "CK syndrome 0x" << hex << ck_syndrome.getSyndrome() << "\n"; |
| 1503 | |
| 1504 | // Error Detection |
| 1505 | if (!ck_syndrome.noError()) { |
| 1506 | N2_L2ErrorStatusReg L2ErrorStatusReg; |
| 1507 | L2ErrorStatusRegAccess_.access(bank, L2ErrorStatusReg, true); |
| 1508 | N2_DramErrorStatusMem dramESR; |
| 1509 | DramErrorStatusMemAccess_.access(mcuID, dramESR, true); |
| 1510 | // Verify if the error is correctable or uncorrectable |
| 1511 | if (ck_syndrome.isUncorrectableError()) { |
| 1512 | isUncorrectable = true; |
| 1513 | |
| 1514 | // Set DAU, R/W, VCID and MODA information in the |
| 1515 | // L2 Cache Error Status Register -> PRM 12.11.1.1 |
| 1516 | // MODA(Modular Arithmatic) and R/W need not be |
| 1517 | // set. |
| 1518 | // Set paddr[39:6] in L2_ERROR_ADDRESS register - |
| 1519 | // All DRAM error address should be stored in L2 |
| 1520 | // EAR. DRAM EAR stores address only for Scrub |
| 1521 | // errors. -> PRM 12.12.2 |
| 1522 | |
| 1523 | setL2ErrorStatusReg(bank, N2_L2ErrorStatusReg::setDAU, |
| 1524 | false, getErrorSteer(bank), |
| 1525 | ck_syndrome.getSyndrome(), |
| 1526 | paddr); |
| 1527 | |
| 1528 | // Check the ESR for the presence of multiple |
| 1529 | // errors (both correctable and uncorrectable) |
| 1530 | |
| 1531 | // Check to see if an uncorrectable error is |
| 1532 | // already present If yes dont log details about |
| 1533 | // current error. Instead set the MEU bit in the |
| 1534 | // ESR |
| 1535 | if (dramESR.getDAU() == 1) { |
| 1536 | DramErrorStatusMemAccess_. |
| 1537 | set(mcuID, |
| 1538 | N2_DramErrorStatusMem::setMEU, |
| 1539 | 1); |
| 1540 | } |
| 1541 | // Else check to see if a correctable error already exists |
| 1542 | else if (dramESR.getDAC() == 1) { |
| 1543 | // If yes overwrite the previous error (UE |
| 1544 | // has higher precedence over CE) |
| 1545 | |
| 1546 | // Set the DAU bit |
| 1547 | DramErrorStatusMemAccess_. |
| 1548 | set(mcuID, |
| 1549 | N2_DramErrorStatusMem::setDAU, |
| 1550 | 1); |
| 1551 | DramErrorStatusMemAccess_. |
| 1552 | set(mcuID, |
| 1553 | N2_DramErrorStatusMem::setSYND, |
| 1554 | ck_syndrome.getSyndrome()); |
| 1555 | // Reset the DAC bit |
| 1556 | DramErrorStatusMemAccess_. |
| 1557 | set(mcuID, |
| 1558 | N2_DramErrorStatusMem::setDAC, |
| 1559 | 0); |
| 1560 | DramErrorStatusMemAccess_. |
| 1561 | set(mcuID, |
| 1562 | N2_DramErrorStatusMem::setMEC, |
| 1563 | 1); |
| 1564 | } |
| 1565 | else { // No error stored in Dram ESR |
| 1566 | |
| 1567 | // Set DAU and Syndrome in DRAM ESR |
| 1568 | DramErrorStatusMemAccess_. |
| 1569 | set(mcuID, |
| 1570 | N2_DramErrorStatusMem::setDAU, |
| 1571 | 1); |
| 1572 | DramErrorStatusMemAccess_. |
| 1573 | set(mcuID, |
| 1574 | N2_DramErrorStatusMem::setSYND, |
| 1575 | ck_syndrome.getSyndrome()); |
| 1576 | } |
| 1577 | // if NCEEN bit is set, then signal an L2U error |
| 1578 | // to the requesting virtual core and throw a trap |
| 1579 | // to ERRORSTEER |
| 1580 | if (getNCEEN(bank)) { |
| 1581 | SS_Trap::Type trap = DramThrowUncorrectableTrap(strand, memXact, bank); |
| 1582 | if(trap != SS_Trap::NO_TRAP) |
| 1583 | return trap; |
| 1584 | } |
| 1585 | } |
| 1586 | // Verify if the error is a correctable data bit |
| 1587 | // or check bit error |
| 1588 | else if (ck_syndrome.isCorrectableDataBitError() || |
| 1589 | ck_syndrome.isCorrectableCheckBitError()) { |
| 1590 | // Check the ESR for the presence of multiple |
| 1591 | // errors (both correctable and uncorrectable) |
| 1592 | |
| 1593 | // Check to see if any error is already present |
| 1594 | if ((L2ErrorStatusReg.getVEU() == 1) || |
| 1595 | (L2ErrorStatusReg.getVEC() == 1)) { |
| 1596 | |
| 1597 | // If yes do not log info about current |
| 1598 | // error just set the MEC bit |
| 1599 | L2ErrorStatusReg.setMEC(1); |
| 1600 | L2ErrorStatusRegAccess_.access(bank, |
| 1601 | L2ErrorStatusReg, |
| 1602 | false); |
| 1603 | } |
| 1604 | else { |
| 1605 | // If no error is stored in the the Dram ESR, |
| 1606 | // then log the information Set DAC, R/W, |
| 1607 | // VCID and MODA information in the L2 |
| 1608 | // Cache Error Status Register -> PRM |
| 1609 | // 12.11.1.1 MODA(Modular Arithmatic) and |
| 1610 | // R/W need not be set fore RUST. |
| 1611 | // |
| 1612 | // Set paddr[39:6] in L2_ERROR_ADDRESS |
| 1613 | // register - All DRAM error address should be |
| 1614 | // stored in L2 EAR. DRAM EAR stores address |
| 1615 | // only for Scrub errors. -> PRM 12.12.2 |
| 1616 | setL2ErrorStatusReg(bank, |
| 1617 | N2_L2ErrorStatusReg::setDAC, |
| 1618 | true, getErrorSteer(bank), |
| 1619 | ck_syndrome.getSyndrome(), |
| 1620 | paddr); |
| 1621 | |
| 1622 | // Check to see if any error is already present |
| 1623 | if (dramESR.getDAU() || dramESR.getDAC()) { |
| 1624 | // If yes do not log info about current |
| 1625 | // error just set the MEC bit |
| 1626 | DramErrorStatusMemAccess_. |
| 1627 | set(mcuID, |
| 1628 | N2_DramErrorStatusMem::setMEC, |
| 1629 | 1); |
| 1630 | } |
| 1631 | // This is the first error, log the information |
| 1632 | else { |
| 1633 | // Set DAC and Syndrome in DRAM ESR |
| 1634 | DramErrorStatusMemAccess_. |
| 1635 | set(mcuID, |
| 1636 | N2_DramErrorStatusMem::setDAC, |
| 1637 | 1); |
| 1638 | DramErrorStatusMemAccess_. |
| 1639 | set(mcuID, |
| 1640 | N2_DramErrorStatusMem::setSYND, |
| 1641 | ck_syndrome.getSyndrome()); |
| 1642 | } |
| 1643 | // Add stuff for DRAM Error Counter and DRAM |
| 1644 | // Error Location Registers |
| 1645 | |
| 1646 | // if CEEN is set, then signal an L2C error to |
| 1647 | // the requesting virtual core and throw a |
| 1648 | // trap to ERRORSTEER |
| 1649 | if (getCEEN(bank)) { |
| 1650 | DramThrowCorrectableTrap(strand, memXact, bank); |
| 1651 | } |
| 1652 | } |
| 1653 | } |
| 1654 | } |
| 1655 | } |
| 1656 | |
| 1657 | N2_SocErrorReg::SocErrRegBitGetFn getFBR = |
| 1658 | N2_SocErrorReg::getSocErrRegMCUFBR(mcuID); |
| 1659 | |
| 1660 | // If SOC FBDIMM error injection is enabled for this mcu |
| 1661 | if (socErrorInjectRegAccess_.get(getFBR)) { |
| 1662 | setL2ErrorStatusReg(bank, |
| 1663 | N2_L2ErrorStatusReg::setDAC, |
| 1664 | true, getErrorSteer(bank), |
| 1665 | 0, |
| 1666 | paddr); |
| 1667 | |
| 1668 | // Legal FBR errors are correctable |
| 1669 | DramErrorStatusMemAccess_.set(mcuID, N2_DramErrorStatusMem::setFBR); |
| 1670 | |
| 1671 | N2_CererWithBitMux cerer(strand->core.cerer); |
| 1672 | if (getCEEN(bank) && cerer.dcl2c()) { |
| 1673 | setDESR(getErrorSteer(bank),false, N2_Desr::CE_L2C,0); |
| 1674 | trapToErrorSteer(strand,bank, |
| 1675 | SS_Interrupt::BIT_HW_CORRECTED_ERROR); |
| 1676 | } |
| 1677 | |
| 1678 | // handle SOC FBR errors here |
| 1679 | processSocFbdError(*strand, paddress, getFBR); |
| 1680 | } |
| 1681 | |
| 1682 | return SS_Trap::NO_TRAP; |
| 1683 | } |
| 1684 | |
| 1685 | // dramUpdateECC() updates the ECC value associated with paddress. |
| 1686 | // |
| 1687 | // If dram error injection is enabled, the ECC for the physical |
| 1688 | // address' Chip-Kill line calculated, xor'ed with the injection mask, |
| 1689 | // and saved in the dram ECC map. If dram error injection is disabled |
| 1690 | // (or there is no longer an ECC error), the ECC value will not be |
| 1691 | // saved in the map (or the value will be deleted). |
| 1692 | |
| 1693 | void |
| 1694 | N2_MemErrDetector::dramUpdateECC(N2_L2AddressingFields paddress, |
| 1695 | bool isNotData) |
| 1696 | { |
| 1697 | // Extracting necessary information from input |
| 1698 | uint64_t paddr = paddress.get(); |
| 1699 | SS_CKMemory *ck_memory=((SS_CKMemory*)(n2_model->cpu[0]->strand[0]->memory)); |
| 1700 | |
| 1701 | // Verify if PA is 16B Aligned |
| 1702 | if ((paddr % SS_CKMemory::DRAM_LINE_LENGTH) != 0) { |
| 1703 | fprintf(stderr,"N2_MemErrDetector::dramUpdateECC(): misaligned address."); |
| 1704 | exit(-1); |
| 1705 | } |
| 1706 | |
| 1707 | // MCU ID is determined from bits 7:6 of the PA |
| 1708 | uint32_t mcuID = bit_shift(paddr, N2_DRAM_PADDR_MCU_SHIFT, |
| 1709 | N2_DRAM_PADDR_NR_MCU_LOG2); |
| 1710 | |
| 1711 | // DRAM Error Injection |
| 1712 | // Case: DRAM Access - STORE (WRITE) (L2 Miss) |
| 1713 | if (debugChipKill_ || |
| 1714 | isNotData || |
| 1715 | ck_memory->ecc_exists(paddr) || |
| 1716 | DramErrorInjectMemAccess_.get(mcuID, N2_DramErrorInjectMem::getENB)) { |
| 1717 | |
| 1718 | uint64_t newDramECC; |
| 1719 | // If the L2$ line contains NotData, write special syndrome |
| 1720 | if (isNotData) { |
| 1721 | newDramECC = SS_CKMemory::DRAM_NOT_DATA; |
| 1722 | } else { |
| 1723 | newDramECC = ck_memory->calculate_dram_ecc(paddr); |
| 1724 | if (DramErrorInjectMemAccess_.get(mcuID, |
| 1725 | N2_DramErrorInjectMem::getENB)) { |
| 1726 | newDramECC ^= DramErrorInjectMemAccess_. |
| 1727 | get(mcuID, N2_DramErrorInjectMem::getECCMASK); |
| 1728 | } |
| 1729 | } |
| 1730 | |
| 1731 | RAS_OSTR << "DRAM Error injected at paddr :0x" << std::hex << paddr << " newDRAMECC:0x" << std::hex << newDramECC << endl; |
| 1732 | |
| 1733 | // Store PA,MASKED ECC in MAP The higher order PA is |
| 1734 | // (still) unique enough to be maintained as key The |
| 1735 | // value stored in the map will (eventually) be the |
| 1736 | // ECC value for 128 bits of data addressed by HO-PA |
| 1737 | // and LO-PA |
| 1738 | ck_memory->dram_update_ecc(paddr,newDramECC); |
| 1739 | // If Single Shot then disable error injection |
| 1740 | if (DramErrorInjectMemAccess_. |
| 1741 | get(mcuID, N2_DramErrorInjectMem::getSSHOT)) { |
| 1742 | DramErrorInjectMemAccess_. |
| 1743 | set(mcuID, N2_DramErrorInjectMem::setENB, 0); |
| 1744 | } |
| 1745 | } |
| 1746 | } |
| 1747 | |
| 1748 | // DramThrowCorrectableTrap() throws an HW_CORRECTED_ERROR trap to the |
| 1749 | // ERRORSTEER strand for bank. |
| 1750 | |
| 1751 | void |
| 1752 | N2_MemErrDetector::DramThrowCorrectableTrap(N2_Strand *strand, |
| 1753 | const MemoryTransaction &memXact, |
| 1754 | uint32_t bank) |
| 1755 | { |
| 1756 | N2_CererWithBitMux cerer(strand->core.cerer); |
| 1757 | bool itablewalk = ((memXact.referenceType() == MemoryTransaction::INSTR) && memXact.tablewalk()); |
| 1758 | |
| 1759 | // If the correct L2C bit is set in the CERER, throw a trap |
| 1760 | if (cerer.checkOneL2Cbit(memXact)) { |
| 1761 | int errorCode; |
| 1762 | if (itablewalk) { |
| 1763 | errorCode = N2_Desr::RE_ITL2C; |
| 1764 | } else if (memXact.tablewalk() && (memXact.referenceType() == MemoryTransaction::DATA)) { |
| 1765 | errorCode = N2_Desr::RE_DTL2C; |
| 1766 | } else if (memXact.referenceType() == MemoryTransaction::INSTR) { |
| 1767 | errorCode = N2_Desr::RE_ICL2C; |
| 1768 | } else if (memXact.referenceType() == MemoryTransaction::DATA) { |
| 1769 | errorCode = N2_Desr::RE_DCL2C; |
| 1770 | } else { |
| 1771 | fprintf(stderr,"N2_MemErrDetector::DramThrowCorrectableTrap: bad " |
| 1772 | "memXact "); |
| 1773 | exit(-1); |
| 1774 | } |
| 1775 | |
| 1776 | // Set error information in DESR |
| 1777 | setDESR(getErrorSteer(bank),true, errorCode,0); |
| 1778 | trapToErrorSteer(strand,bank,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR); |
| 1779 | } |
| 1780 | } |
| 1781 | |
| 1782 | // DramThrowUncorrectableTrap() throws the appropriate trap to the |
| 1783 | // ERRORSTEER strand for bank, based on the kind of memory transaction. |
| 1784 | |
| 1785 | SS_Trap::Type |
| 1786 | N2_MemErrDetector::DramThrowUncorrectableTrap(N2_Strand *strand, |
| 1787 | const MemoryTransaction &memXact, |
| 1788 | uint32_t bank) |
| 1789 | { |
| 1790 | bool itablewalk = ((memXact.referenceType() == MemoryTransaction::INSTR) && memXact.tablewalk()); |
| 1791 | N2_CererWithBitMux cerer(strand->core.cerer); |
| 1792 | |
| 1793 | setDESR(getErrorSteer(bank),false, N2_Desr::RE_L2U,0); |
| 1794 | if (strand->seter.pscce()) { |
| 1795 | // Hardware Tablewalk |
| 1796 | if (memXact.tablewalk() && cerer.hwtwl2()) { |
| 1797 | if (itablewalk) { |
| 1798 | return SS_Trap::INSTRUCTION_ACCESS_MMU_ERROR; |
| 1799 | } |
| 1800 | else { |
| 1801 | return SS_Trap::DATA_ACCESS_MMU_ERROR; |
| 1802 | } |
| 1803 | } |
| 1804 | // Instruction Fetch |
| 1805 | else if (memXact.referenceType() == MemoryTransaction::INSTR && |
| 1806 | cerer.icl2u()) { |
| 1807 | N2_InstSfsr *isfsr = &(strand->inst_sfsr); |
| 1808 | isfsr->error_type(N2_InstSfsr::ICL2U); |
| 1809 | N2_DataSfar *dsfar = &(strand->data_sfar); |
| 1810 | dsfar->error_addr(memXact.getVaddr()); |
| 1811 | return SS_Trap::INSTRUCTION_ACCESS_ERROR; |
| 1812 | } |
| 1813 | // Data Fetch |
| 1814 | else if (memXact.referenceType() == MemoryTransaction::DATA && |
| 1815 | memXact.readXact() && |
| 1816 | cerer.dcl2u()) { |
| 1817 | N2_DataSfsr *dsfsr = &(strand->data_sfsr); |
| 1818 | dsfsr->error_type(N2_DataSfsr::DCL2U); |
| 1819 | return SS_Trap::DATA_ACCESS_ERROR; |
| 1820 | } |
| 1821 | // Data Store |
| 1822 | else if (memXact.referenceType() == MemoryTransaction::DATA && |
| 1823 | memXact.writeXact() && |
| 1824 | cerer.dcl2u()) { |
| 1825 | |
| 1826 | setDESR(getErrorSteer(bank),true, N2_Desr::RE_L2U,0); |
| 1827 | trapToErrorSteer(strand,bank,SS_Interrupt::BIT_SW_RECOVERABLE_ERROR); |
| 1828 | } |
| 1829 | else { |
| 1830 | fprintf(stderr,"N2_MemErrDetector::" |
| 1831 | "DramThrowUncorrectableTrap(): " |
| 1832 | "unknown MemoryTranaction type"); |
| 1833 | exit(-1); |
| 1834 | } |
| 1835 | } |
| 1836 | return SS_Trap::NO_TRAP; |
| 1837 | } |
| 1838 | |
| 1839 | // processSocFbdError() injects and detects SOC FBR RAS errors. |
| 1840 | |
| 1841 | void |
| 1842 | N2_MemErrDetector::processSocFbdError(N2_Strand &strand, |
| 1843 | N2_L2AddressingFields paddress, |
| 1844 | N2_SocErrorReg::SocErrRegBitGetFn getFBR) |
| 1845 | { |
| 1846 | uint32_t mcuID = bit_shift(paddress.getNative(), |
| 1847 | N2_DRAM_PADDR_MCU_SHIFT, |
| 1848 | N2_DRAM_PADDR_NR_MCU_LOG2); |
| 1849 | |
| 1850 | // if the FBD error syndrome register is clear, then the error can |
| 1851 | // be logged |
| 1852 | if (DramFbdErrorSyndromeRegAccess_. |
| 1853 | get(N2_DramFbdErrorSyndromeReg::getVALID) == 0) { |
| 1854 | DramFbdErrorSyndromeRegAccess_. |
| 1855 | set(N2_DramFbdErrorSyndromeReg::setVALID); |
| 1856 | |
| 1857 | // decode the error type and set the correct bit in the Dram |
| 1858 | // Error Syndrome Register |
| 1859 | uint64_t errSource = DramFbdInjectedErrSrcRegAccess_. |
| 1860 | get(N2_DramFbdInjectedErrSrcReg::getERRORSOURCE); |
| 1861 | switch (errSource) { |
| 1862 | case N2_DramFbdInjectedErrSrcReg::CRC_ERROR: |
| 1863 | DramFbdErrorSyndromeRegAccess_. |
| 1864 | set(N2_DramFbdErrorSyndromeReg::setSFPE); |
| 1865 | break; |
| 1866 | case N2_DramFbdInjectedErrSrcReg::ALERT_FRAME_ERROR: |
| 1867 | DramFbdErrorSyndromeRegAccess_. |
| 1868 | set(N2_DramFbdErrorSyndromeReg::setAA); |
| 1869 | break; |
| 1870 | case N2_DramFbdInjectedErrSrcReg::ALERT_ASSERTED: |
| 1871 | DramFbdErrorSyndromeRegAccess_. |
| 1872 | set(N2_DramFbdErrorSyndromeReg::setAFE); |
| 1873 | break; |
| 1874 | case N2_DramFbdInjectedErrSrcReg::STATUS_FRAME_PARITY_ERROR: |
| 1875 | DramFbdErrorSyndromeRegAccess_. |
| 1876 | set(N2_DramFbdErrorSyndromeReg::setC); |
| 1877 | break; |
| 1878 | default: |
| 1879 | fprintf(stderr,"N2_MemErrDetector::processSOCErrors: bad " |
| 1880 | "error source: %d", errSource); |
| 1881 | exit(-1); |
| 1882 | break; |
| 1883 | } |
| 1884 | // Legal FBR errors are correctable |
| 1885 | DramErrorStatusMemAccess_.set(mcuID, N2_DramErrorStatusMem::setFBR); |
| 1886 | } |
| 1887 | |
| 1888 | /* |
| 1889 | // decide whether to throw the trap. |
| 1890 | bool throwTrap = false; |
| 1891 | N2_DramFbdCountReg dramFbdCountReg; |
| 1892 | DramFbdCountRegAccess_.access(mcuID, dramFbdCountReg, true); |
| 1893 | // If the COUNTONE bit is set, then always try to throw the trap |
| 1894 | if (dramFbdCountReg.getCOUNTONE()) { |
| 1895 | throwTrap = true; |
| 1896 | } |
| 1897 | // Decrement the count in the Dram FBD register, saturating at 0. |
| 1898 | // If the register tranisitioned from 1 to 0, try to throw the |
| 1899 | // trap. |
| 1900 | else { |
| 1901 | uint64_t count = dramFbdCountReg.getCOUNT(); |
| 1902 | if (count != 0 && --count == 0) { |
| 1903 | throwTrap = true; |
| 1904 | } |
| 1905 | dramFbdCountReg.setCOUNT(count); |
| 1906 | DramFbdCountRegAccess_.access(mcuID, dramFbdCountReg, false); |
| 1907 | } |
| 1908 | |
| 1909 | */ |
| 1910 | |
| 1911 | // If error logging in enabled for this MCU |
| 1912 | if (socErrorLogEnableRegAccess_.get(getFBR)) { |
| 1913 | setL2ErrorStatusReg(paddress.getBANK(), |
| 1914 | N2_L2ErrorStatusReg::setDAC, |
| 1915 | true, |
| 1916 | strand.core_id(), |
| 1917 | 0, paddress.getNative()); |
| 1918 | |
| 1919 | socErrorStatusRegAccess_.set(N2_SocErrorStatusReg::setV); |
| 1920 | N2_SocErrorReg::SocErrRegBitSetFn setFBR = |
| 1921 | N2_SocErrorReg::setSocErrRegMCUFBR(mcuID); |
| 1922 | socErrorStatusRegAccess_.set(setFBR); |
| 1923 | // We should try to throw the trap and there is no already |
| 1924 | // pending trap, toss it ... |
| 1925 | } |
| 1926 | /* |
| 1927 | if (throwTrap && |
| 1928 | socPendingErrStatusRegAccess_.get(N2_SocPendingErrStatusReg::getV) |
| 1929 | == 0) { |
| 1930 | socErrorStatusRegAccess_.set(N2_SocErrorStatusReg::setV); |
| 1931 | N2_SocErrorStatusReg socErrorStatusReg = |
| 1932 | socErrorStatusRegAccess_.set(setFBR); |
| 1933 | socPendingErrStatusRegAccess_. |
| 1934 | setNative(mcuID, socErrorStatusReg.getNative()); |
| 1935 | |
| 1936 | // If FBR errors are fatal, die... |
| 1937 | if (fatal) { |
| 1938 | RIESLING_THROW_RUNTIME_ERROR("N2_MemErrDetector::" |
| 1939 | "processSocRFbdError: don't " |
| 1940 | "support soft reset errors"); |
| 1941 | } |
| 1942 | // otherwise, direct the HW_CORRECTED_ERROR to the correct strand |
| 1943 | else { |
| 1944 | uint_t vcID = socErrorSteeringRegAccess_. |
| 1945 | get(mcuID, N2_SocErrorSteeringReg::getVCID); |
| 1946 | strand.setIntpTaken(vcID, SS_Trap::HW_CORRECTED_ERROR); |
| 1947 | } |
| 1948 | } |
| 1949 | */ |
| 1950 | |
| 1951 | N2_CererWithBitMux cerer(strand.core.cerer); |
| 1952 | if (cerer.l2c_socc()) { |
| 1953 | setDESR(getErrorSteer(paddress.getBANK()),false, N2_Desr::CE_L2C,0); |
| 1954 | |
| 1955 | trapToErrorSteer(&strand,paddress.getBANK(), |
| 1956 | SS_Interrupt::BIT_HW_CORRECTED_ERROR); |
| 1957 | } |
| 1958 | } |
| 1959 | |
| 1960 | |
| 1961 | // The access() method for the N2_CheckedL2ESRAccess class verifies |
| 1962 | // writes to a bank's L2 ESR, as well as processing simple reads. |
| 1963 | // This method offers some assurance that udpates to a bank's L2 ESR |
| 1964 | // conform to N2's behavior. |
| 1965 | // |
| 1966 | // The L2 ESR records the presence of many different errors, but only |
| 1967 | // has one field for error specific information. Also, there is no |
| 1968 | // mechanism to count multiple errors. |
| 1969 | // |
| 1970 | // N2 deals with these constraints by providing a multiple correctable |
| 1971 | // and uncorrectable bit (MEC & MEU). If the L2 ESR has a (un)correctable |
| 1972 | // error already set, then the MEC (or MEU) bit is set instead of the |
| 1973 | // bit corresponding to the error. |
| 1974 | // |
| 1975 | // This method mimics this behavior by checking whether the current |
| 1976 | // value of the L2 ESR already contains error state and modifying how |
| 1977 | // the register is updated if it does. See N2 1.1 PRM Tables 12-23 |
| 1978 | // and 12-24. The present implementation doesn't claim to precisely |
| 1979 | // match the documented behavior, especially if multiple errors occur |
| 1980 | // in one cycle. |
| 1981 | // |
| 1982 | // Because there is only one error address register per bank, N2 only |
| 1983 | // allows more severe errors to overwrite this register once it is |
| 1984 | // set. This method indicates that the error address register should |
| 1985 | // be overridden by returning true. |
| 1986 | // |
| 1987 | // Reads are just passed through to the SS_CsrAccess template's |
| 1988 | // access() method. |
| 1989 | |
| 1990 | bool |
| 1991 | N2_MemErrDetector::N2_CheckedL2ESRAccess::access(uint64_t ndx, |
| 1992 | N2_L2ErrorStatusReg &csr, |
| 1993 | bool isRead) |
| 1994 | { |
| 1995 | if (isRead) { |
| 1996 | SS_CsrAccess<N2_L2ErrorStatusReg>::access(ndx, csr, true); |
| 1997 | return true; |
| 1998 | } |
| 1999 | |
| 2000 | bool updateErrorAddress = false; |
| 2001 | N2_L2ErrorStatusReg oldCsr; |
| 2002 | SS_CsrAccess<N2_L2ErrorStatusReg>::access(ndx, oldCsr, true); |
| 2003 | |
| 2004 | // Check the ESR for the presence of multiple |
| 2005 | // errors (both correctable and uncorrectable) |
| 2006 | if (oldCsr.isVeryUncorrectable()) { |
| 2007 | if (csr.isVeryUncorrectable()) { |
| 2008 | csr.setMEU(1); |
| 2009 | } |
| 2010 | if (csr.isUncorrectable()) { |
| 2011 | csr.setMEU(1); |
| 2012 | } |
| 2013 | if (csr.isCorrectable()) { |
| 2014 | csr.setMEC(1); |
| 2015 | } |
| 2016 | } |
| 2017 | else if (oldCsr.isUncorrectable()) { |
| 2018 | if (csr.isVeryUncorrectable()) { |
| 2019 | csr = oldCsr; |
| 2020 | csr.setMEU(1); |
| 2021 | updateErrorAddress = true; |
| 2022 | } |
| 2023 | if (csr.isUncorrectable()) { |
| 2024 | csr.setMEU(1); |
| 2025 | } |
| 2026 | if (csr.isCorrectable()) { |
| 2027 | csr.setMEC(1); |
| 2028 | } |
| 2029 | } |
| 2030 | // Else check to see if a correctable error already exists |
| 2031 | else if (oldCsr.isCorrectable()) { |
| 2032 | if (csr.isVeryUncorrectable()) { |
| 2033 | csr.setMEC(1); |
| 2034 | updateErrorAddress = true; |
| 2035 | } |
| 2036 | else if (csr.isUncorrectable()) { |
| 2037 | csr.setMEC(1); |
| 2038 | updateErrorAddress = true; |
| 2039 | } |
| 2040 | else if (csr.isCorrectable()) { |
| 2041 | csr.setMEC(1); |
| 2042 | } |
| 2043 | } |
| 2044 | else { |
| 2045 | updateErrorAddress = true; |
| 2046 | } |
| 2047 | |
| 2048 | SS_CsrAccess<N2_L2ErrorStatusReg>::access(ndx, csr, false); |
| 2049 | |
| 2050 | return updateErrorAddress; |
| 2051 | } |
| 2052 | |
| 2053 | |
| 2054 | // setL2ErrorStatusReg() sets an error bit, the core/strand id, and the |
| 2055 | // syndrome in the L2$ error status register for the given bank. |
| 2056 | // The error bit is selected by passing the corresponding member |
| 2057 | // function in bitSetFunction, e.g. N2_L2ErrorStatusReg::setLDAC. |
| 2058 | |
| 2059 | void N2_MemErrDetector::setL2ErrorStatusReg(uint32_t bank, |
| 2060 | ErrorStatusRegBitSetFn bitSetFunction, |
| 2061 | bool isCorrectable, |
| 2062 | uint32_t vcid, |
| 2063 | uint32_t syndrome, |
| 2064 | uint64_t errorAddress) |
| 2065 | { |
| 2066 | N2_L2ErrorStatusReg L2ErrorStatusReg; |
| 2067 | L2ErrorStatusRegAccess_.access(bank, L2ErrorStatusReg, true); |
| 2068 | CALL_MEMBER_FN(L2ErrorStatusReg, bitSetFunction)(1); |
| 2069 | if (isCorrectable) { |
| 2070 | L2ErrorStatusReg.setVEC(1); |
| 2071 | } else { |
| 2072 | L2ErrorStatusReg.setVEU(1); |
| 2073 | } |
| 2074 | L2ErrorStatusReg.setVCID(vcid); |
| 2075 | L2ErrorStatusReg.setSYND(syndrome); |
| 2076 | if (L2ErrorStatusRegAccess_.access(bank, L2ErrorStatusReg, false)) { |
| 2077 | // setL2ErrorAddressReg() sets the address field in the L2$ error |
| 2078 | // address register for the given bank. |
| 2079 | N2_L2ErrorAddressReg L2ErrorAddressReg; |
| 2080 | L2ErrorAddressRegAccess_.access(bank, L2ErrorAddressReg, true); |
| 2081 | L2ErrorAddressReg.setADDRESS(errorAddress >> |
| 2082 | N2_L2ErrorAddressReg::bitSizeRSVD0); |
| 2083 | L2ErrorAddressRegAccess_.access(bank, L2ErrorAddressReg, false); |
| 2084 | } |
| 2085 | L2ErrorStatusRegAccess_.access(bank, L2ErrorStatusReg, true); |
| 2086 | } |
| 2087 | |
| 2088 | |
| 2089 | // setN2_L2NotdataErrorReg() sets the NDSP bit, the core/strand id, and the |
| 2090 | // syndrome in the L2$ NotData error status register for the given bank. |
| 2091 | // The MEND is set if either NDSP or NDDM is already set. |
| 2092 | void |
| 2093 | N2_MemErrDetector::setL2NotdataErrorReg(uint32_t bank, |
| 2094 | uint32_t vcid, |
| 2095 | uint64_t errorAddress) |
| 2096 | { |
| 2097 | N2_L2NotdataErrorReg L2NotdataErrorReg; |
| 2098 | L2NotdataErrorRegAccess_.access(bank, L2NotdataErrorReg, true); |
| 2099 | if (!L2NotdataErrorReg.getNDSP() && !L2NotdataErrorReg.getNDDM()) { |
| 2100 | L2NotdataErrorReg.setVCID(vcid); |
| 2101 | L2NotdataErrorReg.setADDRESS(errorAddress >> |
| 2102 | N2_L2NotdataErrorReg::bitSizeRSVD0); |
| 2103 | L2NotdataErrorRegAccess_.access(bank, L2NotdataErrorReg, false); |
| 2104 | } |
| 2105 | else { |
| 2106 | L2NotdataErrorRegAccess_.set(bank, |
| 2107 | N2_L2NotdataErrorReg::setMEND); |
| 2108 | } |
| 2109 | } |