| 1 | /* |
| 2 | * Copyright 2010-2017 Intel Corporation. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or modify |
| 5 | * it under the terms of the GNU General Public License, version 2, |
| 6 | * as published by the Free Software Foundation. |
| 7 | * |
| 8 | * This program is distributed in the hope that it will be useful, |
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 11 | * General Public License for more details. |
| 12 | * |
| 13 | * Disclaimer: The codes contained in these modules may be specific to |
| 14 | * the Intel Software Development Platform codenamed Knights Ferry, |
| 15 | * and the Intel product codenamed Knights Corner, and are not backward |
| 16 | * compatible with other Intel products. Additionally, Intel will NOT |
| 17 | * support the codes or instruction set in future products. |
| 18 | * |
| 19 | * Intel offers no warranty of any kind regarding the code. This code is |
| 20 | * licensed on an "AS IS" basis and Intel is not obligated to provide |
| 21 | * any support, assistance, installation, training, or other services |
| 22 | * of any kind. Intel is also not obligated to provide any updates, |
| 23 | * enhancements or extensions. Intel specifically disclaims any warranty |
| 24 | * of merchantability, non-infringement, fitness for any particular |
| 25 | * purpose, and any other warranty. |
| 26 | * |
| 27 | * Further, Intel disclaims all liability of any kind, including but |
| 28 | * not limited to liability for infringement of any proprietary rights, |
| 29 | * relating to the use of the code, even if Intel is notified of the |
| 30 | * possibility of such liability. Except as expressly stated in an Intel |
| 31 | * license agreement provided with this code and agreed upon with Intel, |
| 32 | * no license, express or implied, by estoppel or otherwise, to any |
| 33 | * intellectual property rights is granted herein. |
| 34 | */ |
| 35 | |
| 36 | /* |
| 37 | * RAS handler for uncore MC events |
| 38 | * |
| 39 | * Contains code to intercept MC events, collect information |
| 40 | * from uncore MCA banks and handle the situation. |
| 41 | * |
| 42 | * In case of a severe event, defined by corrupted context, |
| 43 | * the handler will add a record of the event in the designated |
| 44 | * EEPROM hanging off the Over Clocking I2C bus. After that |
| 45 | * a message will be sent to the SMC (enabling IPMI notifications) |
| 46 | * and at last a message is sent to the host via the MC SCIF |
| 47 | * connection. |
| 48 | * |
| 49 | * Lesser events will also be sent to the host on a 'FYI' basis, |
| 50 | * but no rocord will be stored in the event log. |
| 51 | * |
| 52 | * This is in all aspects similar to the reaction to a severe |
| 53 | * core MC event. Differences are in the MC bank access (mmio), |
| 54 | * and that the event is delivered via an interrupt instead of |
| 55 | * an exception. Still, the handler cannot expect any support |
| 56 | * from the OS. |
| 57 | */ |
| 58 | |
| 59 | #include <linux/types.h> |
| 60 | #include <linux/errno.h> |
| 61 | #include <linux/kernel.h> |
| 62 | #include <linux/interrupt.h> |
| 63 | #include <linux/nmi.h> |
| 64 | #include <asm/mce.h> |
| 65 | #include <asm/msr.h> |
| 66 | #include <asm/processor.h> |
| 67 | #include <asm/mic/mic_common.h> |
| 68 | #include <asm/mic/mic_knc/autobaseaddress.h> |
| 69 | #include <asm/mic/mic_knc/micsboxdefine.h> |
| 70 | #include "micras.h" |
| 71 | |
| 72 | |
| 73 | /* |
| 74 | * Hooks placed in the native machine check handler |
| 75 | * See file arch/x86/kernel/traps.c for placement |
| 76 | * |
| 77 | * nmi Entered NMI exception handler. |
| 78 | * Called before any other tests, which allow us |
| 79 | * to test for and handle un-core MCA events before |
| 80 | * the traditional NMI handling. |
| 81 | * Note that the mce-inject mechanism also uses |
| 82 | * NMI's to distribute calls to do_machine_check(). |
| 83 | */ |
| 84 | |
| 85 | extern int (*mca_nmi)(int); |
| 86 | |
| 87 | |
| 88 | |
| 89 | /* |
| 90 | * Table of un-core MCA banks. |
| 91 | * Though there are differences in register count and sizes, un-core bank |
| 92 | * registers are always spaced 8 bytes apart, so all we need to know is |
| 93 | * the location of the first MCA bank register (CTL) to find them. |
| 94 | * If bank is present, the bank register offsets for ctl, status, addr, |
| 95 | * and misc are thus 0, 8, 16, and 24 respectively. |
| 96 | * Default CTL masks pulled from the register documentation |
| 97 | * Some SKUs don't have support for all BOXs but that will be handled |
| 98 | * at runtime in the support code, not at compile time by this table. |
| 99 | */ |
| 100 | |
| 101 | |
| 102 | #ifdef CONFIG_ML1OM |
| 103 | #define SBOX_DEF 0x000e /* All (7) */ |
| 104 | #define DBOX_DEF 0x0003 /* All (2) */ |
| 105 | #define GBOX_DEF 0x0003 /* All (2) */ |
| 106 | #endif |
| 107 | #ifdef CONFIG_MK1OM |
| 108 | #define SBOX_DEF 0x03ce /* All - PCIe errors (7) */ |
| 109 | #define DBOX_DEF 0x000f /* All (4) */ |
| 110 | #define GBOX_DEF 0x3ffffffff /* All (34) */ |
| 111 | #define TBOX_DEF 0x001f /* All (5) */ |
| 112 | #endif |
| 113 | |
| 114 | #define MCU_CTL_64 (1 << 0) /* Bank has 64 bit CTL register */ |
| 115 | #define MCU_NO_ADDR (1 << 1) /* Bank has no ADDR register */ |
| 116 | #define MCU_ADDR_32 (1 << 2) /* Bank has 32 bit ADDR register */ |
| 117 | #define MCU_NO_MISC (1 << 3) /* Bank has no MISC register */ |
| 118 | #define MCU_MISC_64 (1 << 4) /* Bank has 64 bit MISC register */ |
| 119 | |
| 120 | #define MCU_CTRL 0 |
| 121 | #define MCU_STAT 8 |
| 122 | #define MCU_ADDR 16 |
| 123 | #define MCU_MISC 24 |
| 124 | |
| 125 | typedef struct _mcu_rec { |
| 126 | uint8_t num; /* 'BOX' count */ |
| 127 | uint8_t org; /* Origin code */ |
| 128 | uint8_t qflg; /* Quirk flags */ |
| 129 | uint16_t ofs; /* MCA bank base offset */ |
| 130 | uint64_t ctl; /* Initial CTL mask */ |
| 131 | uint32_t (*rl)(int, uint32_t); /* 32-bit MMIO read */ |
| 132 | void (*wl)(int, uint32_t, uint32_t); /* 32-bit MMIO write */ |
| 133 | uint64_t (*rq)(int, uint32_t); /* 64-bit MMIO read */ |
| 134 | void (*wq)(int, uint32_t, uint64_t); /* 64-bit MMIO write */ |
| 135 | } McuRec; |
| 136 | |
| 137 | |
| 138 | static McuRec mcu_src[] = { |
| 139 | { 1, MC_ORG_SBOX, MCU_MISC_64, SBOX_MCX_CTL_LO, |
| 140 | SBOX_DEF, mr_sbox_rl, mr_sbox_wl, mr_sbox_rq, mr_sbox_wq }, |
| 141 | { DBOX_NUM, MC_ORG_DBOX, MCU_NO_MISC, DBOX_MC2_CTL, |
| 142 | DBOX_DEF, mr_dbox_rl, mr_dbox_wl, mr_dbox_rq, mr_dbox_wq }, |
| 143 | { GBOX_NUM, MC_ORG_GBOX, MCU_CTL_64, GBOX_FBOX_MCA_CTL_LO, |
| 144 | GBOX_DEF, mr_gbox_rl, mr_gbox_wl, mr_gbox_rq, mr_gbox_wq }, |
| 145 | #ifdef CONFIG_MK1OM |
| 146 | { TBOX_NUM, MC_ORG_TBOX, MCU_CTL_64 | MCU_NO_MISC | MCU_ADDR_32, TXS_MCX_CONTROL, |
| 147 | TBOX_DEF, mr_tbox_rl, mr_tbox_wl, mr_tbox_rq, mr_tbox_wq }, |
| 148 | #endif |
| 149 | }; |
| 150 | |
| 151 | #define GBOX_BROKEN 1 /* Set if GBOX MCA bank is borken */ |
| 152 | |
| 153 | #if GBOX_BROKEN |
| 154 | /* |
| 155 | * Si design managed to break the GBOX MCA bank concept |
| 156 | * by not filling useful data into ADDR and MISC registers. |
| 157 | * Instead they use a bunch of registers in another part |
| 158 | * of the GBOX (mbox to be specific) to hold this info. |
| 159 | * In order to get at the right register it is necesary |
| 160 | * to partially decode the STATUS register and from there |
| 161 | * select an GBOX.MBOX register. |
| 162 | * Since the new registers are all 32 bits wide, we'll stick |
| 163 | * the value into MISC register if Misc_V bit of STATUS is |
| 164 | * not set. The following table is used for register selection |
| 165 | * |
| 166 | * model code base width Chan Notes |
| 167 | * 0 017c 32 0 26 bit address, CRC (retrain) |
| 168 | * 1 097c 32 1 26 bit address, CRC (retrain) |
| 169 | * 2 01e0 32 0 26 bit address, ECC |
| 170 | * 3 09e0 32 1 26 bit address, ECC |
| 171 | * 4 01dc 32 0 26 bit address, UC CAPE |
| 172 | * 5 09dc 32 1 26 bit address, UC CAPE |
| 173 | * 31 01a4 32 0 26 bit address, UC ECC |
| 174 | * 32 09a4 32 1 26 bit address, UC ECC |
| 175 | * |
| 176 | * Note: model code is simply the enable bit number in CTL |
| 177 | */ |
| 178 | |
| 179 | static struct liu { |
| 180 | uint16_t mcode; |
| 181 | uint16_t base; |
| 182 | } liu[] = { |
| 183 | { 0, 0x17c }, /* Correctable CRC (retrain) ch 0 */ |
| 184 | { 1, 0x97c }, /* Correctable CRC (retrain) ch 1 */ |
| 185 | { 2, 0x1e0 }, /* Correctable ECC, ch 0 */ |
| 186 | { 3, 0x9e0 }, /* Correctable ECC, ch 1 */ |
| 187 | { 4, 0x1dc }, /* Uncorrectable CAPE, ch 0 */ |
| 188 | { 5, 0x9dc }, /* Uncorrectable CAPE, ch 1 */ |
| 189 | { 31, 0x1a4 }, /* Uncorrectable ECC, ch 0 */ |
| 190 | { 32, 0x9a4 } /* Uncorrectable ECC, ch 1 */ |
| 191 | }; |
| 192 | |
| 193 | static void |
| 194 | mcu_gbox_fixup(McuRec * mr, int num, MceInfo * mi) |
| 195 | { |
| 196 | int i; |
| 197 | uint16_t mcode; |
| 198 | |
| 199 | /* |
| 200 | * Skip if Status.Misc_v set |
| 201 | */ |
| 202 | if (mi->status & (1ULL << 59)) |
| 203 | return; |
| 204 | |
| 205 | /* |
| 206 | * Get model code and if it's in the array, then read |
| 207 | * the addressed register into MISC. We don't set the |
| 208 | * Status.Misc_v bit because we want to distinguish |
| 209 | * this hack from the real MCA bank register. |
| 210 | */ |
| 211 | mcode = GET_BITS(31, 16, mi->status); |
| 212 | for(i = 0; i < ARRAY_SIZE(liu); i++) |
| 213 | if (liu[i].mcode == mcode) { |
| 214 | mi->misc = (uint64_t) mr->rl(num, liu[i].base); |
| 215 | break; |
| 216 | } |
| 217 | } |
| 218 | #endif |
| 219 | |
| 220 | /* |
| 221 | * Read Ctrl, Addr and Misc registers from an un-core MCA bank. |
| 222 | * The Status register is read/cleared in mcu_scan(). |
| 223 | */ |
| 224 | |
| 225 | static void |
| 226 | mcu_read(McuRec * mr, int num, MceInfo * mi) |
| 227 | { |
| 228 | if (mr->qflg & MCU_CTL_64) |
| 229 | mi->ctl = mr->rq(num, mr->ofs + MCU_CTRL); |
| 230 | else |
| 231 | mi->ctl = (uint64_t) mr->rl(num, mr->ofs + MCU_CTRL); |
| 232 | |
| 233 | if (mr->qflg & MCU_NO_ADDR) |
| 234 | mi->addr = 0; |
| 235 | else { |
| 236 | if (mr->qflg & MCU_ADDR_32) |
| 237 | mi->addr = (uint64_t) mr->rl(num, mr->ofs + MCU_ADDR); |
| 238 | else |
| 239 | mi->addr = mr->rq(num, mr->ofs + MCU_ADDR); |
| 240 | } |
| 241 | |
| 242 | if (mr->qflg & MCU_NO_MISC) |
| 243 | mi->misc = 0; |
| 244 | else { |
| 245 | if (mr->qflg & MCU_MISC_64) |
| 246 | mi->misc = mr->rq(num, mr->ofs + MCU_MISC); |
| 247 | else |
| 248 | mi->misc = (uint64_t) mr->rl(num, mr->ofs + MCU_MISC); |
| 249 | } |
| 250 | |
| 251 | #if GBOX_BROKEN |
| 252 | if (mr->org == MC_ORG_GBOX) |
| 253 | mcu_gbox_fixup(mr, num, mi); |
| 254 | #endif |
| 255 | } |
| 256 | |
| 257 | |
| 258 | /* |
| 259 | * Reset one un-core MCA bank |
| 260 | * Any quirks go here. |
| 261 | */ |
| 262 | |
| 263 | static void |
| 264 | mcu_reset(McuRec * mr, int num, int arm) |
| 265 | { |
| 266 | uint64_t ctl; |
| 267 | |
| 268 | mr->wq(num, mr->ofs + MCU_STAT, 0); |
| 269 | |
| 270 | if (! (mr->qflg & MCU_NO_ADDR)) { |
| 271 | if (mr->qflg & MCU_ADDR_32) |
| 272 | mr->wl(num, mr->ofs + MCU_ADDR, 0); |
| 273 | else |
| 274 | mr->wq(num, mr->ofs + MCU_ADDR, 0); |
| 275 | } |
| 276 | |
| 277 | if (! (mr->qflg & MCU_NO_MISC)) { |
| 278 | if (mr->qflg & MCU_MISC_64) |
| 279 | mr->wq(num, mr->ofs + MCU_MISC, 0); |
| 280 | else |
| 281 | mr->wl(num, mr->ofs + MCU_MISC, 0); |
| 282 | } |
| 283 | |
| 284 | ctl = arm ? mr->ctl : 0; |
| 285 | |
| 286 | #ifdef CONFIG_MK1OM |
| 287 | if (ctl && mr->org == MC_ORG_SBOX && mic_hw_stepping(0) == KNC_A_STEP) |
| 288 | ctl &= ~PUT_BIT(3, 1); /* A0 SBOX 'unclaimed address' bug */ |
| 289 | |
| 290 | if (ctl && mr->org == MC_ORG_GBOX && mr_mch() != 16) |
| 291 | ctl &= ~(uint64_t) PUT_BIT(6, 1); /* B0 GBOX 'Invalid Channel' (SKU 3 & 4) */ |
| 292 | #endif |
| 293 | |
| 294 | if (mr->qflg & MCU_CTL_64) |
| 295 | mr->wq(num, mr->ofs + MCU_CTRL, ctl); |
| 296 | else |
| 297 | mr->wl(num, mr->ofs + MCU_CTRL, ctl); |
| 298 | } |
| 299 | |
| 300 | |
| 301 | /* |
| 302 | * Un-core MC bank pre-scan |
| 303 | * Walk through all un-core MC sources to see if any events are pending. |
| 304 | * Stops on 1st match where STATUS has both VAL bit set. On some BOXes, |
| 305 | * like GBOX, interrupt may be signalled without the EN bit being set. |
| 306 | * See HSD 4116374 for details. |
| 307 | */ |
| 308 | |
| 309 | static int |
| 310 | mcu_prescan(void) |
| 311 | { |
| 312 | int i, j; |
| 313 | uint64_t status; |
| 314 | struct _mcu_rec * mr; |
| 315 | |
| 316 | for(i = 0; i < ARRAY_SIZE(mcu_src); i++) { |
| 317 | mr = mcu_src + i; |
| 318 | |
| 319 | #ifdef CONFIG_MK1OM |
| 320 | if (mr->org == MC_ORG_TBOX && !mr_txs()) |
| 321 | continue; |
| 322 | #endif |
| 323 | |
| 324 | for(j = 0; j < mr->num; j++) { |
| 325 | status = mr->rq(j, mr->ofs + MCU_STAT); |
| 326 | if (status & MCI_STATUS_VAL) |
| 327 | return 1; |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | return 0; |
| 332 | } |
| 333 | |
| 334 | |
| 335 | /* |
| 336 | * Un-core MC bank scanner. |
| 337 | * Walks through all un-core MC sources for new events. |
| 338 | * If any found, then process them same way as core events. |
| 339 | */ |
| 340 | |
| 341 | static int |
| 342 | mcu_scan(void) |
| 343 | { |
| 344 | MceInfo mc, uc; |
| 345 | int gone, seen; |
| 346 | int i, j; |
| 347 | struct _mcu_rec * mr; |
| 348 | |
| 349 | /* |
| 350 | * Walk list of known un-core MC sources |
| 351 | */ |
| 352 | gone = seen = 0; |
| 353 | memset(&uc, 0, sizeof(uc)); |
| 354 | for(i = 0; i < ARRAY_SIZE(mcu_src); i++) { |
| 355 | mr = mcu_src + i; |
| 356 | |
| 357 | #ifdef CONFIG_MK1OM |
| 358 | if (mr->org == MC_ORG_TBOX && !mr_txs()) |
| 359 | continue; |
| 360 | #endif |
| 361 | |
| 362 | for(j = 0; j < mr->num; j++) { |
| 363 | |
| 364 | /* |
| 365 | * Read status to see if we have something of interest. |
| 366 | * As per HSD 4116374 the status register is cleared |
| 367 | * after read, if it had valid content. |
| 368 | *TBD: Clear unconditionally? |
| 369 | */ |
| 370 | mc.status = mr->rq(j, mr->ofs + MCU_STAT); |
| 371 | if (mc.status & MCI_STATUS_VAL) |
| 372 | mr->wq(j, mr->ofs + MCU_STAT, 0); |
| 373 | else |
| 374 | continue; |
| 375 | |
| 376 | /* |
| 377 | * Bank had valid content (VAL bit set). |
| 378 | * Verify the event was subscribed to (EN bit set). |
| 379 | * If not, the event is ignored. |
| 380 | */ |
| 381 | if (! (mc.status & MCI_STATUS_EN)) |
| 382 | continue; |
| 383 | |
| 384 | /* |
| 385 | * Valid and enabled event, read remaining bank registers. |
| 386 | */ |
| 387 | seen++; |
| 388 | mcu_read(mr, j, &mc); |
| 389 | |
| 390 | /* |
| 391 | * Fill out blanks in the MceInfo record |
| 392 | */ |
| 393 | mc.org = mr->org; |
| 394 | mc.id = j; |
| 395 | mc.stamp = get_seconds(); |
| 396 | mc.flags = (mc.status & MCI_STATUS_UC) ? MC_FLG_FATAL : 0; |
| 397 | |
| 398 | /* |
| 399 | * If any way to detect injected errors then this is |
| 400 | * the place to do so and indicate by MC_FLG_FALSE flag |
| 401 | */ |
| 402 | |
| 403 | if (mc.flags & MC_FLG_FATAL) { |
| 404 | #ifdef CONFIG_MK1OM |
| 405 | #if MC_VERBOSE |
| 406 | ee_printk("Uncore fatal MC: org %d, id %d, status %lx\n", mc.org, mc.id, mc.status); |
| 407 | #endif |
| 408 | |
| 409 | /* |
| 410 | * Log UC events in the eeprom. |
| 411 | */ |
| 412 | micras_mc_log(&mc); |
| 413 | mc.flags |= MC_FLG_LOG; |
| 414 | |
| 415 | /* |
| 416 | * Notify SMC that we've had a serious machine check error. |
| 417 | */ |
| 418 | micras_mc_ipmi(&mc, 1); |
| 419 | #endif |
| 420 | /* |
| 421 | * Remember 1st fatal (UC) event |
| 422 | */ |
| 423 | if (! gone++) |
| 424 | uc = mc; |
| 425 | } |
| 426 | |
| 427 | /* |
| 428 | * Notify host |
| 429 | */ |
| 430 | micras_mc_send(&mc, 1); |
| 431 | |
| 432 | /* |
| 433 | * Filter corrected errors. |
| 434 | */ |
| 435 | if (! (mc.flags & MC_FLG_FATAL)) { |
| 436 | uint64_t tsc, msk; |
| 437 | |
| 438 | tsc = rdtsc(); |
| 439 | msk = micras_mc_filter(&mc, tsc, 1); |
| 440 | if (msk) { |
| 441 | #if MC_VERBOSE |
| 442 | ee_printk("Uncore filter: org %d, id %d, ctrl %lx, mask %lx\n", mc.org, mc.id, mc.ctl, msk); |
| 443 | #endif |
| 444 | if (mr->qflg & MCU_CTL_64) |
| 445 | mr->wq(j, mr->ofs + MCU_CTRL, mc.ctl & ~msk); |
| 446 | else |
| 447 | mr->wl(j, mr->ofs + MCU_CTRL, (uint32_t)(mc.ctl & ~msk)); |
| 448 | } |
| 449 | } |
| 450 | |
| 451 | /* |
| 452 | * Any event post processing goes here. |
| 453 | * This would be things like cache line refresh and such. |
| 454 | * Actual algorithms are TBD. |
| 455 | */ |
| 456 | } |
| 457 | } |
| 458 | |
| 459 | #if RAS_HALT |
| 460 | if (gone) { |
| 461 | atomic_inc(&mce_entry); |
| 462 | panic("FATAL un-core machine check event:\n" |
| 463 | "bnk %d, id %d, ctl %llx, stat %llx, addr %llx, misc %llx\n", |
| 464 | uc.org, uc.id, uc.ctl, uc.status, uc.addr, uc.misc); |
| 465 | } |
| 466 | #endif |
| 467 | |
| 468 | return seen; |
| 469 | } |
| 470 | |
| 471 | |
| 472 | /* |
| 473 | * NMI handler. |
| 474 | * |
| 475 | * Once we get control in 1st interrupt (NMI or regular), we'll |
| 476 | * use IPIs from the local APIC to force all active CPU's into |
| 477 | * our RAS NMI handler, similar to the core MC handler. |
| 478 | * After that, the same logic as for the generic MC handler is |
| 479 | * applied to corral all CPU's through well defined rendez-vous |
| 480 | * points where only one cpu gets to run the un-core MC event |
| 481 | * scan while everybody else are sitting in a holding pen. |
| 482 | * If containment wasn't an issue we could simply let the BP |
| 483 | * run the scan without involving other CPUs at all. |
| 484 | */ |
| 485 | |
| 486 | #define SPINUNIT 50 |
| 487 | #define SERIAL_MCU 0 |
| 488 | |
| 489 | struct cpumask mcu_exc_mask; /* NMI recipients */ |
| 490 | static int mcu_cpu = -1; /* SBOX target CPU */ |
| 491 | #if MCU_NMI |
| 492 | static uint64_t mcu_redir; /* SBOX I/O-APIC redirection entry */ |
| 493 | static uint64_t mcu_old_redir; /* Restore value for redirection entry */ |
| 494 | #else |
| 495 | unsigned int mcu_eoi; /* 1st interrupt from local APIC */ |
| 496 | #endif |
| 497 | static atomic_t mcu_callin; /* Entry rendez-vous gate */ |
| 498 | static atomic_t mcu_leavin; /* Hold rendez-vous gate */ |
| 499 | |
| 500 | |
| 501 | static int |
| 502 | mcu_timed_out(int64_t * timeout) |
| 503 | { |
| 504 | if (*timeout < SPINUNIT) |
| 505 | return 1; |
| 506 | |
| 507 | *timeout -= SPINUNIT; |
| 508 | touch_nmi_watchdog(); |
| 509 | ndelay(SPINUNIT); |
| 510 | |
| 511 | return 0; |
| 512 | } |
| 513 | |
| 514 | |
| 515 | static int |
| 516 | mcu_wait(void) |
| 517 | { |
| 518 | int cpus, order; |
| 519 | int64_t timeout; |
| 520 | |
| 521 | cpus = num_online_cpus(); |
| 522 | timeout = 1 * NSEC_PER_SEC; /* 1 Second */ |
| 523 | |
| 524 | /* |
| 525 | * Flush all caches |
| 526 | */ |
| 527 | |
| 528 | /* |
| 529 | * 'Entry' rendez-vous point. |
| 530 | * Wait here until all CPUs has entered. |
| 531 | */ |
| 532 | order = atomic_inc_return(&mcu_callin); |
| 533 | while(atomic_read(&mcu_callin) != cpus) { |
| 534 | if (mcu_timed_out(&timeout)) { |
| 535 | /* |
| 536 | * Timout waiting for CPU enter rendez-vous |
| 537 | */ |
| 538 | return -1; |
| 539 | } |
| 540 | } |
| 541 | |
| 542 | /* |
| 543 | * 'Hold' rendez-vous point. |
| 544 | * All CPUs drop by here 'simultaneously'. |
| 545 | * The first CPU that 'enter'ed (order of 1) will |
| 546 | * fall thru while the others wait until their |
| 547 | * number number comes up in the 'leavin' counter |
| 548 | * (or if a timeout happens). This also has a |
| 549 | * serializing effect, where one CPU leaves this |
| 550 | * loop at a time. |
| 551 | */ |
| 552 | if (order == 1) { |
| 553 | #if SERIAL_MCU |
| 554 | atomic_set(&mcu_leavin, 1); |
| 555 | #endif |
| 556 | } |
| 557 | else { |
| 558 | while(atomic_read(&mcu_leavin) < order) { |
| 559 | if (mcu_timed_out(&timeout)) { |
| 560 | /* |
| 561 | * Timout waiting in CPU hold rendez-vous |
| 562 | */ |
| 563 | return -1; |
| 564 | } |
| 565 | } |
| 566 | } |
| 567 | |
| 568 | return order; |
| 569 | } |
| 570 | |
| 571 | |
| 572 | static int |
| 573 | mcu_go(int order) |
| 574 | { |
| 575 | int ret; |
| 576 | int64_t timeout; |
| 577 | |
| 578 | ret = -1; |
| 579 | if (order < 0) |
| 580 | goto mcu_reset; |
| 581 | |
| 582 | #if SERIAL_MCU |
| 583 | /* |
| 584 | * If any 'per-CPU' activity is needed in isolation |
| 585 | * (one CPU at a time) then that code needs to go here. |
| 586 | */ |
| 587 | |
| 588 | atomic_inc(&mcu_leavin); /* Next CPU out of hold */ |
| 589 | #endif |
| 590 | |
| 591 | timeout = NSEC_PER_SEC; /* 1 Second */ |
| 592 | if (order == 1) { |
| 593 | int cpus; |
| 594 | |
| 595 | /* |
| 596 | * The first CPU that entered (order of 1) waits here |
| 597 | * for the others to leave the 'hold' loop in mca_wait() |
| 598 | * and enter the 'exit' rendez-vous loop below. |
| 599 | * Once they are there, it will run the uncore MCA bank |
| 600 | * scan while the others are parked in 'exit' loop below. |
| 601 | */ |
| 602 | cpus = num_online_cpus(); |
| 603 | #if SERIAL_MCU |
| 604 | while(atomic_read(&mcu_leavin) <= cpus) { |
| 605 | if (mcu_timed_out(&timeout)) { |
| 606 | /* |
| 607 | * Timout waiting for CPU exit rendez-vous |
| 608 | */ |
| 609 | goto mcu_reset; |
| 610 | } |
| 611 | } |
| 612 | #else |
| 613 | atomic_set(&mcu_leavin, cpus); |
| 614 | #endif |
| 615 | mcu_scan(); |
| 616 | ret = 0; |
| 617 | } |
| 618 | else { |
| 619 | /* |
| 620 | * Exit rendez-vous point. |
| 621 | */ |
| 622 | while(atomic_read(&mcu_leavin) != 0) { |
| 623 | if (mcu_timed_out(&timeout)) { |
| 624 | /* |
| 625 | * Timout waiting in CPU exit rendez-vous |
| 626 | */ |
| 627 | goto mcu_reset; |
| 628 | } |
| 629 | } |
| 630 | return 0; |
| 631 | } |
| 632 | |
| 633 | /* |
| 634 | * Reset rendez-vous counters, letting all CPUs |
| 635 | * leave this function 'simultaneously'. |
| 636 | */ |
| 637 | mcu_reset: |
| 638 | atomic_set(&mcu_callin, 0); |
| 639 | atomic_set(&mcu_leavin, 0); |
| 640 | return ret; |
| 641 | } |
| 642 | |
| 643 | |
| 644 | /* |
| 645 | * NMI exception handler |
| 646 | * Uncertain if all cpumask_* functions implies barriers, |
| 647 | * so erroring on the safe side explicit barriers is used. |
| 648 | */ |
| 649 | |
| 650 | #if BEAM_TEST |
| 651 | static int |
| 652 | mcu_nmi(int cpu) |
| 653 | { |
| 654 | #ifdef CONFIG_MK1OM |
| 655 | uint32_t mcg_status_lo, mcg_status_hi; |
| 656 | #endif |
| 657 | struct _mcu_rec * mr; |
| 658 | MceInfo mc; |
| 659 | int i, j; |
| 660 | |
| 661 | if (cpu != mcu_cpu) |
| 662 | return 0; |
| 663 | |
| 664 | if (! mcu_prescan()) |
| 665 | return 0; |
| 666 | |
| 667 | wbinvd(); |
| 668 | |
| 669 | #ifdef CONFIG_MK1OM |
| 670 | rdmsr(MSR_IA32_MCG_STATUS, mcg_status_lo, mcg_status_hi); |
| 671 | wrmsr(MSR_IA32_MCG_STATUS, mcg_status_lo | MCG_STATUS_MCIP, mcg_status_hi); |
| 672 | #endif |
| 673 | |
| 674 | for(i = 0; i < ARRAY_SIZE(mcu_src); i++) { |
| 675 | mr = mcu_src + i; |
| 676 | |
| 677 | #ifdef CONFIG_MK1OM |
| 678 | if (mr->org == MC_ORG_TBOX && !mr_txs()) |
| 679 | continue; |
| 680 | #endif |
| 681 | |
| 682 | for(j = 0; j < mr->num; j++) { |
| 683 | mc.status = mr->rq(j, mr->ofs + MCU_STAT); |
| 684 | |
| 685 | if (! (mc.status & MCI_STATUS_VAL)) |
| 686 | continue; |
| 687 | |
| 688 | if (! (mc.status & MCI_STATUS_EN)) { |
| 689 | mr->wq(j, mr->ofs + MCU_STAT, 0); |
| 690 | continue; |
| 691 | } |
| 692 | |
| 693 | mcu_read(mr, j, &mc); |
| 694 | mr->wq(j, mr->ofs + MCU_STAT, 0); |
| 695 | |
| 696 | mc.org = mr->org; |
| 697 | mc.id = j; |
| 698 | mc.stamp = get_seconds(); |
| 699 | mc.flags = (mc.status & MCI_STATUS_UC) ? MC_FLG_FATAL : 0; |
| 700 | |
| 701 | micras_mc_send(&mc, 1); |
| 702 | } |
| 703 | } |
| 704 | |
| 705 | #ifdef CONFIG_MK1OM |
| 706 | wrmsr(MSR_IA32_MCG_STATUS, mcg_status_lo, mcg_status_hi); |
| 707 | #endif |
| 708 | return 1; |
| 709 | |
| 710 | /* |
| 711 | * Damn compiler options !!!!!! |
| 712 | * Don't want more changes than this routine, so |
| 713 | * added dummies to shut up gcc about unused code. |
| 714 | */ |
| 715 | i = mcu_wait(); |
| 716 | mcu_go(i); |
| 717 | } |
| 718 | #else |
| 719 | |
| 720 | static atomic_t mcu_entry; |
| 721 | |
| 722 | static int |
| 723 | mcu_nmi(int cpu) |
| 724 | { |
| 725 | #ifdef CONFIG_MK1OM |
| 726 | uint32_t mcg_status_lo, mcg_status_hi; |
| 727 | #endif |
| 728 | int order, eoi; |
| 729 | |
| 730 | atomic_inc(&mcu_entry); |
| 731 | |
| 732 | /* |
| 733 | * Get MCA status from SBOX. |
| 734 | */ |
| 735 | #if 0 |
| 736 | /* |
| 737 | * If no source bits set, this was not an un-core MCA |
| 738 | * This would work if the SBOX_MCA_INT_STAT actually worked |
| 739 | * as described both in HAS and register specification. |
| 740 | * Unfortunately, it doesn't, as per tribal knowledge errata. |
| 741 | */ |
| 742 | uint32_t int_stat, int_en; |
| 743 | |
| 744 | int_en = mr_sbox_rl(0, SBOX_MCA_INT_EN); |
| 745 | int_stat = mr_sbox_rl(0, SBOX_MCA_INT_STAT); |
| 746 | if (! (int_en & int_stat)) { |
| 747 | atomic_dec(&mcu_entry); |
| 748 | return 0; |
| 749 | } |
| 750 | #else |
| 751 | /* |
| 752 | * Instead of having a single source of pending un-core MCA events, |
| 753 | * we now have to walk all BOXes to check if there is a valid event |
| 754 | * pending in one of them. That is much more expensive as we have |
| 755 | * to check this on all NMIs, including our own cascade NMIs used |
| 756 | * to corrall all CPUs in their rendezvouz point(s). We try to avoid |
| 757 | * this scan if there already is an un-core NMI in progress. |
| 758 | * We know that: |
| 759 | * un-core MCA NMIs are sent to just one CPU, mcu_cpu |
| 760 | * CPUs targeted in the cascade are in mcu_exc_mask |
| 761 | * non-zero atomic variable 'mcu_callin' tells cascade is in progress |
| 762 | */ |
| 763 | if (!cpumask_empty(&mcu_exc_mask)) |
| 764 | goto invited; |
| 765 | if (cpu != mcu_cpu) { |
| 766 | atomic_dec(&mcu_entry); |
| 767 | return 0; |
| 768 | } |
| 769 | |
| 770 | /* |
| 771 | * On CPU 0 and no un-core handling in progress! |
| 772 | * Then scan all BOXes for valid events pending, |
| 773 | * If there wasn't any, this is a false alarm and |
| 774 | * we'll re-connect MC lines and return. |
| 775 | */ |
| 776 | if (! mcu_prescan()) { |
| 777 | atomic_dec(&mcu_entry); |
| 778 | return 0; |
| 779 | } |
| 780 | |
| 781 | invited: |
| 782 | #endif |
| 783 | |
| 784 | /* |
| 785 | * Flush all caches. |
| 786 | * This is uncore so it should not be necessary to |
| 787 | * empty internal (L1) caches, doesn't harm either. |
| 788 | */ |
| 789 | wbinvd(); |
| 790 | |
| 791 | /* |
| 792 | * We do not want to be interrupted by a core MC |
| 793 | * exception while handling an NMI. We can block |
| 794 | * core MC events by setting the MCG_STATUS_MCIP. |
| 795 | * This is a MSR, so it has to be done on all CPUs. |
| 796 | * On KnC that is, KnF does not have that MSR. |
| 797 | */ |
| 798 | #ifdef CONFIG_MK1OM |
| 799 | rdmsr(MSR_IA32_MCG_STATUS, mcg_status_lo, mcg_status_hi); |
| 800 | wrmsr(MSR_IA32_MCG_STATUS, mcg_status_lo | MCG_STATUS_MCIP, mcg_status_hi); |
| 801 | #endif |
| 802 | |
| 803 | /* |
| 804 | * Special for the SBOX NMI target CPU: |
| 805 | * - disconnect un-core MC lines from SBOX I/O-APIC, such |
| 806 | * that we don't get stacked NMIs in the Local APICs. |
| 807 | * - simulate a NMI broadcast by sending NMI to all _other_ |
| 808 | * active CPUs via IPIs. The SBOX could do a broadcast, |
| 809 | * but that will send NMIs to sleeping CPUs too, which |
| 810 | * we prefer to avoid if possible. |
| 811 | *TBD: should creating the mcu_exc_mask be protected by |
| 812 | * lock, similar to core events? Who can interfere? |
| 813 | */ |
| 814 | if (cpu == mcu_cpu) { |
| 815 | mr_sbox_wl(0, SBOX_MCA_INT_EN, 0); |
| 816 | cpumask_copy(&mcu_exc_mask, cpu_online_mask); |
| 817 | cpumask_clear_cpu(cpu, &mcu_exc_mask); |
| 818 | smp_wmb(); |
| 819 | // apic->send_IPI_mask(&mcu_exc_mask, NMI_VECTOR); |
| 820 | apic->send_IPI_allbutself(NMI_VECTOR); |
| 821 | #if !MCU_NMI |
| 822 | if (mcu_eoi) { |
| 823 | smp_rmb(); |
| 824 | cpumask_set_cpu(cpu, &mcc_exc_mask); |
| 825 | smp_wmb(); |
| 826 | mcu_eoi = 0; |
| 827 | } |
| 828 | #endif |
| 829 | } |
| 830 | |
| 831 | /* |
| 832 | * Corral all CPUs through the rendez-vous point maze. |
| 833 | * It guarantees that: |
| 834 | * - No CPU leaves mcu_wait() until all has entered. |
| 835 | * - One CPU leaves mcu_wait() at a time. |
| 836 | * - No CPU leaves mcu_go() until all has entered. |
| 837 | * - While one CPU is in transit between mcu_wait() |
| 838 | * and mcu_go(), all other CPUs are sitting in |
| 839 | * tight busy-wait loops in either function. |
| 840 | * - All CPUs leaves mcu_go() at the same time. |
| 841 | * If there is any 'per-cpu' activity that needs to be |
| 842 | * run in isolation, it must be placed between mcu_wait() |
| 843 | * and mcu_go(). |
| 844 | */ |
| 845 | order = mcu_wait(); |
| 846 | if (mcu_go(order)) { |
| 847 | /* |
| 848 | * Timeout waiting at one of the rendez-vous points. |
| 849 | * Scan the un-core MCA banks just in case. |
| 850 | */ |
| 851 | mcu_scan(); |
| 852 | } |
| 853 | |
| 854 | /* |
| 855 | * Special for the SBOX NMI target CPU: |
| 856 | * - reconnect un-core MC lines through to SBOX I/O-APIC. |
| 857 | * If new events already are pending, then this will |
| 858 | * result in a 'rising-edge' trigger to the I/O-APIC. |
| 859 | */ |
| 860 | if (cpu == mcu_cpu) |
| 861 | mr_sbox_wl(0, SBOX_MCA_INT_EN, mr_txs() ? 0x0fffff07 : 0xff07); |
| 862 | |
| 863 | /* |
| 864 | * If this CPU got its NMI from an IPI, then it must |
| 865 | * send an ACK to its local APIC (I think). |
| 866 | */ |
| 867 | smp_rmb(); |
| 868 | eoi = cpumask_test_and_clear_cpu(cpu, &mcu_exc_mask); |
| 869 | smp_wmb(); |
| 870 | if (eoi) |
| 871 | ack_APIC_irq(); |
| 872 | |
| 873 | /* |
| 874 | * Restore core MCG status and return 1 indicating to the |
| 875 | * kernel NMI handler we've handled it. |
| 876 | *TBD: reduce to one write per core instead of one per thread? |
| 877 | */ |
| 878 | #ifdef CONFIG_MK1OM |
| 879 | wrmsr(MSR_IA32_MCG_STATUS, mcg_status_lo, mcg_status_hi); |
| 880 | #endif |
| 881 | atomic_dec(&mcu_entry); |
| 882 | return 1; |
| 883 | } |
| 884 | #endif |
| 885 | |
| 886 | |
| 887 | #if !MCU_NMI |
| 888 | /* |
| 889 | * MCA handler if using standard interrupts |
| 890 | * It's just a trampoline to convert a regular interrupt |
| 891 | * into an NMI, which is only needed if the I/O-APIC can't |
| 892 | * generate and NMI. |
| 893 | * |
| 894 | *TBD: remove all this? It is not used on KnC, and the KnF's |
| 895 | * I've tested this on all have been OK sending NMIs. |
| 896 | */ |
| 897 | |
| 898 | static irqreturn_t |
| 899 | sbox_handler(int irq, void * tag) |
| 900 | { |
| 901 | /* |
| 902 | * Convert this regular interrupt into an NMI. |
| 903 | */ |
| 904 | mcu_cpu = smp_processor_id(); |
| 905 | mcu_eoi = 1; |
| 906 | apic->send_IPI_self(NMI_VECTOR); |
| 907 | return IRQ_HANDLED; |
| 908 | } |
| 909 | #endif |
| 910 | |
| 911 | |
| 912 | /* |
| 913 | * Reset all uncore MCA banks to defaults |
| 914 | */ |
| 915 | |
| 916 | void |
| 917 | box_reset(int arm) |
| 918 | { |
| 919 | int i, j; |
| 920 | struct _mcu_rec * mr; |
| 921 | |
| 922 | for(i = 0; i < ARRAY_SIZE(mcu_src); i++) { |
| 923 | mr = mcu_src + i; |
| 924 | |
| 925 | #ifdef CONFIG_MK1OM |
| 926 | if (mr->org == MC_ORG_TBOX && !mr_txs()) |
| 927 | continue; |
| 928 | #endif |
| 929 | |
| 930 | for(j = 0; j < mr->num; j++) { |
| 931 | uint64_t status; |
| 932 | |
| 933 | /* |
| 934 | *TBD: Do we want to pick up existing MCA events or drop |
| 935 | * them because we don't know _when_ they occurred? |
| 936 | * Reporting them would require internal buffer because |
| 937 | * it's unlikely the SCIF MC session is up at this point. |
| 938 | * For now we just enter events into the system log. |
| 939 | */ |
| 940 | status = mr->rq(j, mr->ofs + MCU_STAT); |
| 941 | if (status & MCI_STATUS_VAL) { |
| 942 | MceInfo mc; |
| 943 | |
| 944 | mcu_read(mr, j, &mc); |
| 945 | printk("RAS.uncore: discard MC event:\n" |
| 946 | "bnk %d, id %d, ctl %llx, stat %llx, addr %llx, misc %llx\n", |
| 947 | mr->org, j, mc.ctl, status, mc.addr, mc.misc); |
| 948 | } |
| 949 | |
| 950 | /* |
| 951 | * Reset MCA bank registers. |
| 952 | */ |
| 953 | mcu_reset(mr, j, arm); |
| 954 | } |
| 955 | } |
| 956 | } |
| 957 | |
| 958 | |
| 959 | /* |
| 960 | * Setup interrupt handlers by hooking into the SBOX's I/O-APIC. |
| 961 | * For now, we send an NMI to single CPU, and let it process the |
| 962 | * event. This may need to be expanded into a broadcast NMI similar |
| 963 | * to what the generic core MC event handler does in order to keep |
| 964 | * containment at high as we possibly can. |
| 965 | * |
| 966 | *TBD: code a dual rendez-vous mechanism on all active CPUs. |
| 967 | */ |
| 968 | |
| 969 | int __init |
| 970 | mcu_init(void) |
| 971 | { |
| 972 | #if MC_VERBOSE |
| 973 | int i, j; |
| 974 | #endif |
| 975 | |
| 976 | if (mce_disabled) { |
| 977 | printk("RAS.uncore: disabled\n"); |
| 978 | } |
| 979 | else { |
| 980 | /* |
| 981 | * Clear rendez-vous counters |
| 982 | */ |
| 983 | atomic_set(&mcu_callin, 0); |
| 984 | atomic_set(&mcu_leavin, 0); |
| 985 | |
| 986 | #if MC_VERBOSE |
| 987 | /* |
| 988 | * For debug only: |
| 989 | * Record all SBOX I/O-APIC registers to kernel log |
| 990 | */ |
| 991 | printk("SBOX_APICIDR: %lx\n", mr_sbox_rl(0, SBOX_APICIDR)); |
| 992 | printk("SBOX_APICVER: %lx\n", mr_sbox_rl(0, SBOX_APICVER)); |
| 993 | printk("SBOX_APICAPR: %lx\n", mr_sbox_rl(0, SBOX_APICAPR)); |
| 994 | for(i = 0; i < 26 ; i++) |
| 995 | printk("APICCRT%d: %llx\n", i, mr_sbox_rq(0, SBOX_APICRT0 + (8 * i))); |
| 996 | for(i = 0; i < 8 ; i++) |
| 997 | printk("APICICR%d: %llx\n", i, mr_sbox_rq(0, SBOX_APICICR0 + (8 * i))); |
| 998 | printk("SBOX_MCA_INT_EN: %lx\n", mr_sbox_rl(0, SBOX_MCA_INT_EN)); |
| 999 | printk("SBOX_MCA_INT_STAT: %lx\n", mr_sbox_rl(0, SBOX_MCA_INT_STAT)); |
| 1000 | #endif |
| 1001 | |
| 1002 | /* |
| 1003 | * Disconnect un-core MC lines from SBOX I/O-APIC, setup the |
| 1004 | * individual BOXes, and clear any un-core MC pending flags |
| 1005 | * from SBOX I/O-APIC |
| 1006 | */ |
| 1007 | mr_sbox_wl(0, SBOX_MCA_INT_EN, 0); |
| 1008 | box_reset(1); |
| 1009 | mr_sbox_wl(0, SBOX_MCA_INT_STAT, 0); |
| 1010 | |
| 1011 | /* |
| 1012 | * Setup the SBOX I/O-APIC. |
| 1013 | * Un-core MC events are routed through a mask in register |
| 1014 | * SBOX_MCA_INT_EN into I/O APIC redirection table entry #16. |
| 1015 | * Ideally we want all uncore MC events to be handled similar |
| 1016 | * to core MCAs, which means we'd like an NMI on all CPUs. |
| 1017 | * On KnF the I/O-APIC may not trigger an NMI (PoC security) |
| 1018 | * and on KnC where NMI delivery is possible, it appears not |
| 1019 | * to be ideal to broadcast it to all CPUs because it could |
| 1020 | * wake up cores put to sleep bu power management rules. |
| 1021 | * See MCA HAS, SBOX HAS Vol 4, and A0 Vol 2 for details. |
| 1022 | * |
| 1023 | * The redirection table entry has the following format: |
| 1024 | * 47:32 Destination ID field |
| 1025 | * 17 Interrrupt set (testing: trigger an interrupt) |
| 1026 | * 16 Interrupt mask (0=enable, 1=disable) |
| 1027 | * 15 Trigger mode (0=edge, 1=level) |
| 1028 | * 14 Remote IRR (0=inactive, 1=accepted) |
| 1029 | * 13 Interrupt polarity (0=active_high, 1=active_low) |
| 1030 | * 12 Delivery status (0=idle, 1=send_pending) |
| 1031 | * 11 Destination mode (0=physical, 1=logical) |
| 1032 | * 10:8 Delivery mode (0=fixed, low, SMI, rsvd, NMI, INIT, rsvd, ext) |
| 1033 | * 7:0 Interrupt vector |
| 1034 | * |
| 1035 | * The I/O-APIC input is 'rising edge', so we'd need to select |
| 1036 | * it to be edge triggered, active high. |
| 1037 | */ |
| 1038 | #if MCU_NMI |
| 1039 | /* |
| 1040 | * If event delivery by NMI is preferred, we want it delivered on |
| 1041 | * the BP. There is already an NMI handler present, so we have to |
| 1042 | * tap into the existing NMI handler for the event notifications. |
| 1043 | * |
| 1044 | * The bit-fiddling below says: |
| 1045 | * NMI delivery | Destination CPU APIC ID |
| 1046 | */ |
| 1047 | mcu_cpu = 0; |
| 1048 | mcu_redir = PUT_BITS(10, 8, 4) | PUT_BITS(47, 32, (uint64_t) cpu_data(mcu_cpu).apicid); |
| 1049 | mcu_old_redir = mr_sbox_rq(0, SBOX_APICRT16); |
| 1050 | mr_sbox_wq(0, SBOX_APICRT16, mcu_redir | PUT_BITS(16, 16, 1)); |
| 1051 | mr_sbox_wq(0, SBOX_APICRT16, mcu_redir); |
| 1052 | #else |
| 1053 | /* |
| 1054 | * If event delivery by regular interrupt is preferred, then all |
| 1055 | * I/O-APIC setup will be handled by calling request_irq(16,..). |
| 1056 | * There is no guarantee that the event will be sent to the BP |
| 1057 | * (though it's more than likely) so we'll defer indentifying the |
| 1058 | * event handling CPU (mcu_cpu) till we receive the callback from |
| 1059 | * the interrupt handling sus-system. |
| 1060 | * The sbox_handler() function just converts the callback into an |
| 1061 | * NMI because the only way containment can be achieved is to be |
| 1062 | * able to lock down the system completely, which is not realistic |
| 1063 | * using regular interrupts. |
| 1064 | */ |
| 1065 | mcu_eoi = 0; |
| 1066 | (void) request_irq(16, sbox_handler, IRQF_TRIGGER_HIGH, "un-core mce", (void *) 42); |
| 1067 | #endif |
| 1068 | |
| 1069 | /* |
| 1070 | * Finally, place hook in NMI handler in case there's |
| 1071 | * an un-core event pending and connect un-core MC lines |
| 1072 | * through to SBOX I/O-APIC. From this point onwards we |
| 1073 | * can get uncore MC events at any time. |
| 1074 | */ |
| 1075 | mca_nmi = mcu_nmi; |
| 1076 | mr_sbox_wl(0, SBOX_MCA_INT_EN, mr_txs() ? 0x0fffff07 : 0xff07); |
| 1077 | |
| 1078 | #if MC_VERBOSE |
| 1079 | /* |
| 1080 | * For debug only |
| 1081 | * Record initial uncore MCA banks to kernel log. |
| 1082 | */ |
| 1083 | printk("RAS.uncore: dumping all banks\n"); |
| 1084 | |
| 1085 | /* |
| 1086 | * Dump all MCA registers we set to kernel log |
| 1087 | */ |
| 1088 | for(i = 0; i < ARRAY_SIZE(mcu_src); i++) { |
| 1089 | char * boxname; |
| 1090 | struct _mcu_rec * mr; |
| 1091 | uint64_t ctl, stat, addr, misc; |
| 1092 | |
| 1093 | mr = mcu_src + i; |
| 1094 | #ifdef CONFIG_MK1OM |
| 1095 | if (mr->org == MC_ORG_TBOX && !mr_txs()) |
| 1096 | continue; |
| 1097 | #endif |
| 1098 | switch(mr->org) { |
| 1099 | case MC_ORG_SBOX: boxname = "SBOX"; break; |
| 1100 | case MC_ORG_DBOX: boxname = "DBOX"; break; |
| 1101 | case MC_ORG_GBOX: boxname = "GBOX"; break; |
| 1102 | case MC_ORG_TBOX: boxname = "TBOX"; break; |
| 1103 | default: boxname = "??"; /* Damn compiler */ |
| 1104 | } |
| 1105 | |
| 1106 | for(j = 0; j < mr->num; j++) { |
| 1107 | |
| 1108 | if (mr->qflg & MCU_CTL_64) |
| 1109 | ctl = mr->rq(j, mr->ofs + MCU_CTRL); |
| 1110 | else |
| 1111 | ctl = (uint64_t) mr->rl(j, mr->ofs + MCU_CTRL); |
| 1112 | |
| 1113 | stat = mr->rq(j, mr->ofs + MCU_STAT); |
| 1114 | |
| 1115 | if (mr->qflg & MCU_NO_ADDR) |
| 1116 | addr = 0; |
| 1117 | else { |
| 1118 | if (mr->qflg & MCU_ADDR_32) |
| 1119 | addr = (uint64_t) mr->rl(j, mr->ofs + MCU_ADDR); |
| 1120 | else |
| 1121 | addr = mr->rq(j, mr->ofs + MCU_ADDR); |
| 1122 | } |
| 1123 | |
| 1124 | if (mr->qflg & MCU_NO_MISC) |
| 1125 | misc = 0; |
| 1126 | else { |
| 1127 | if (mr->qflg & MCU_MISC_64) |
| 1128 | misc = mr->rq(j, mr->ofs + MCU_MISC); |
| 1129 | else |
| 1130 | misc = (uint64_t) mr->rl(j, mr->ofs + MCU_MISC); |
| 1131 | } |
| 1132 | |
| 1133 | printk("RAS.uncore: %s[%d] = { %llx, %llx, %llx, %llx }\n", |
| 1134 | boxname, j, ctl, stat, addr, misc); |
| 1135 | } |
| 1136 | } |
| 1137 | printk("RAS.uncore: MCA_INT_EN = %x\n", mr_sbox_rl(0, SBOX_MCA_INT_EN)); |
| 1138 | printk("RAS.uncore: APICRT16 = %llx\n", mr_sbox_rq(0, SBOX_APICRT16)); |
| 1139 | #endif |
| 1140 | |
| 1141 | printk("RAS.uncore: init complete\n"); |
| 1142 | } |
| 1143 | |
| 1144 | return 0; |
| 1145 | } |
| 1146 | |
| 1147 | |
| 1148 | /* |
| 1149 | * Cleanup for module unload. |
| 1150 | * Clear/restore hooks in the SBOX's I/O-APIC. |
| 1151 | */ |
| 1152 | |
| 1153 | int __exit |
| 1154 | mcu_exit(void) |
| 1155 | { |
| 1156 | if (! mce_disabled) { |
| 1157 | |
| 1158 | /* |
| 1159 | * Disconnect uncore MC lines from SBOX I/O-APIC. |
| 1160 | * No new uncore MC interrupts will be made. |
| 1161 | */ |
| 1162 | mr_sbox_wl(0, SBOX_MCA_INT_EN, 0); |
| 1163 | |
| 1164 | /* |
| 1165 | * Disconnect exception handler. |
| 1166 | */ |
| 1167 | #if MCU_NMI |
| 1168 | mcu_redir = 0; |
| 1169 | mr_sbox_wq(0, SBOX_APICRT16, mcu_old_redir); |
| 1170 | #else |
| 1171 | mcu_eoi = 0; |
| 1172 | free_irq(16, (void *) 42); |
| 1173 | #endif |
| 1174 | |
| 1175 | /* |
| 1176 | * Cut link from kernel's NMI handler and |
| 1177 | * wait for everybody in handler to leave. |
| 1178 | */ |
| 1179 | mca_nmi = 0; |
| 1180 | while(atomic_read(&mcu_entry)) |
| 1181 | cpu_relax(); |
| 1182 | mcu_cpu = -1; |
| 1183 | |
| 1184 | /* |
| 1185 | * No more events will be received, clear |
| 1186 | * MC reporting in all BOXes (just in case) |
| 1187 | */ |
| 1188 | box_reset(0); |
| 1189 | } |
| 1190 | |
| 1191 | printk("RAS.uncore: exit complete\n"); |
| 1192 | return 0; |
| 1193 | } |
| 1194 | |