* Copyright 2010-2017 Intel Corporation.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2,
* as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
* Disclaimer: The codes contained in these modules may be specific to
* the Intel Software Development Platform codenamed Knights Ferry,
* and the Intel product codenamed Knights Corner, and are not backward
* compatible with other Intel products. Additionally, Intel will NOT
* support the codes or instruction set in future products.
* Intel offers no warranty of any kind regarding the code. This code is
* licensed on an "AS IS" basis and Intel is not obligated to provide
* any support, assistance, installation, training, or other services
* of any kind. Intel is also not obligated to provide any updates,
* enhancements or extensions. Intel specifically disclaims any warranty
* of merchantability, non-infringement, fitness for any particular
* purpose, and any other warranty.
* Further, Intel disclaims all liability of any kind, including but
* not limited to liability for infringement of any proprietary rights,
* relating to the use of the code, even if Intel is notified of the
* possibility of such liability. Except as expressly stated in an Intel
* license agreement provided with this code and agreed upon with Intel,
* no license, express or implied, by estoppel or otherwise, to any
* intellectual property rights is granted herein.
* RAS handler for uncore MC events
* Contains code to intercept MC events, collect information
* from uncore MCA banks and handle the situation.
* In case of a severe event, defined by corrupted context,
* the handler will add a record of the event in the designated
* EEPROM hanging off the Over Clocking I2C bus. After that
* a message will be sent to the SMC (enabling IPMI notifications)
* and at last a message is sent to the host via the MC SCIF
* Lesser events will also be sent to the host on a 'FYI' basis,
* but no rocord will be stored in the event log.
* This is in all aspects similar to the reaction to a severe
* core MC event. Differences are in the MC bank access (mmio),
* and that the event is delivered via an interrupt instead of
* an exception. Still, the handler cannot expect any support
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <asm/processor.h>
#include <asm/mic/mic_common.h>
#include <asm/mic/mic_knc/autobaseaddress.h>
#include <asm/mic/mic_knc/micsboxdefine.h>
* Hooks placed in the native machine check handler
* See file arch/x86/kernel/traps.c for placement
* nmi Entered NMI exception handler.
* Called before any other tests, which allow us
* to test for and handle un-core MCA events before
* the traditional NMI handling.
* Note that the mce-inject mechanism also uses
* NMI's to distribute calls to do_machine_check().
extern int (*mca_nmi
)(int);
* Table of un-core MCA banks.
* Though there are differences in register count and sizes, un-core bank
* registers are always spaced 8 bytes apart, so all we need to know is
* the location of the first MCA bank register (CTL) to find them.
* If bank is present, the bank register offsets for ctl, status, addr,
* and misc are thus 0, 8, 16, and 24 respectively.
* Default CTL masks pulled from the register documentation
* Some SKUs don't have support for all BOXs but that will be handled
* at runtime in the support code, not at compile time by this table.
#define SBOX_DEF 0x000e /* All (7) */
#define DBOX_DEF 0x0003 /* All (2) */
#define GBOX_DEF 0x0003 /* All (2) */
#define SBOX_DEF 0x03ce /* All - PCIe errors (7) */
#define DBOX_DEF 0x000f /* All (4) */
#define GBOX_DEF 0x3ffffffff /* All (34) */
#define TBOX_DEF 0x001f /* All (5) */
#define MCU_CTL_64 (1 << 0) /* Bank has 64 bit CTL register */
#define MCU_NO_ADDR (1 << 1) /* Bank has no ADDR register */
#define MCU_ADDR_32 (1 << 2) /* Bank has 32 bit ADDR register */
#define MCU_NO_MISC (1 << 3) /* Bank has no MISC register */
#define MCU_MISC_64 (1 << 4) /* Bank has 64 bit MISC register */
typedef struct _mcu_rec
{
uint8_t num
; /* 'BOX' count */
uint8_t org
; /* Origin code */
uint8_t qflg
; /* Quirk flags */
uint16_t ofs
; /* MCA bank base offset */
uint64_t ctl
; /* Initial CTL mask */
uint32_t (*rl
)(int, uint32_t); /* 32-bit MMIO read */
void (*wl
)(int, uint32_t, uint32_t); /* 32-bit MMIO write */
uint64_t (*rq
)(int, uint32_t); /* 64-bit MMIO read */
void (*wq
)(int, uint32_t, uint64_t); /* 64-bit MMIO write */
static McuRec mcu_src
[] = {
{ 1, MC_ORG_SBOX
, MCU_MISC_64
, SBOX_MCX_CTL_LO
,
SBOX_DEF
, mr_sbox_rl
, mr_sbox_wl
, mr_sbox_rq
, mr_sbox_wq
},
{ DBOX_NUM
, MC_ORG_DBOX
, MCU_NO_MISC
, DBOX_MC2_CTL
,
DBOX_DEF
, mr_dbox_rl
, mr_dbox_wl
, mr_dbox_rq
, mr_dbox_wq
},
{ GBOX_NUM
, MC_ORG_GBOX
, MCU_CTL_64
, GBOX_FBOX_MCA_CTL_LO
,
GBOX_DEF
, mr_gbox_rl
, mr_gbox_wl
, mr_gbox_rq
, mr_gbox_wq
},
{ TBOX_NUM
, MC_ORG_TBOX
, MCU_CTL_64
| MCU_NO_MISC
| MCU_ADDR_32
, TXS_MCX_CONTROL
,
TBOX_DEF
, mr_tbox_rl
, mr_tbox_wl
, mr_tbox_rq
, mr_tbox_wq
},
#define GBOX_BROKEN 1 /* Set if GBOX MCA bank is borken */
* Si design managed to break the GBOX MCA bank concept
* by not filling useful data into ADDR and MISC registers.
* Instead they use a bunch of registers in another part
* of the GBOX (mbox to be specific) to hold this info.
* In order to get at the right register it is necesary
* to partially decode the STATUS register and from there
* select an GBOX.MBOX register.
* Since the new registers are all 32 bits wide, we'll stick
* the value into MISC register if Misc_V bit of STATUS is
* not set. The following table is used for register selection
* model code base width Chan Notes
* 0 017c 32 0 26 bit address, CRC (retrain)
* 1 097c 32 1 26 bit address, CRC (retrain)
* 2 01e0 32 0 26 bit address, ECC
* 3 09e0 32 1 26 bit address, ECC
* 4 01dc 32 0 26 bit address, UC CAPE
* 5 09dc 32 1 26 bit address, UC CAPE
* 31 01a4 32 0 26 bit address, UC ECC
* 32 09a4 32 1 26 bit address, UC ECC
* Note: model code is simply the enable bit number in CTL
{ 0, 0x17c }, /* Correctable CRC (retrain) ch 0 */
{ 1, 0x97c }, /* Correctable CRC (retrain) ch 1 */
{ 2, 0x1e0 }, /* Correctable ECC, ch 0 */
{ 3, 0x9e0 }, /* Correctable ECC, ch 1 */
{ 4, 0x1dc }, /* Uncorrectable CAPE, ch 0 */
{ 5, 0x9dc }, /* Uncorrectable CAPE, ch 1 */
{ 31, 0x1a4 }, /* Uncorrectable ECC, ch 0 */
{ 32, 0x9a4 } /* Uncorrectable ECC, ch 1 */
mcu_gbox_fixup(McuRec
* mr
, int num
, MceInfo
* mi
)
* Skip if Status.Misc_v set
if (mi
->status
& (1ULL << 59))
* Get model code and if it's in the array, then read
* the addressed register into MISC. We don't set the
* Status.Misc_v bit because we want to distinguish
* this hack from the real MCA bank register.
mcode
= GET_BITS(31, 16, mi
->status
);
for(i
= 0; i
< ARRAY_SIZE(liu
); i
++)
if (liu
[i
].mcode
== mcode
) {
mi
->misc
= (uint64_t) mr
->rl(num
, liu
[i
].base
);
* Read Ctrl, Addr and Misc registers from an un-core MCA bank.
* The Status register is read/cleared in mcu_scan().
mcu_read(McuRec
* mr
, int num
, MceInfo
* mi
)
if (mr
->qflg
& MCU_CTL_64
)
mi
->ctl
= mr
->rq(num
, mr
->ofs
+ MCU_CTRL
);
mi
->ctl
= (uint64_t) mr
->rl(num
, mr
->ofs
+ MCU_CTRL
);
if (mr
->qflg
& MCU_NO_ADDR
)
if (mr
->qflg
& MCU_ADDR_32
)
mi
->addr
= (uint64_t) mr
->rl(num
, mr
->ofs
+ MCU_ADDR
);
mi
->addr
= mr
->rq(num
, mr
->ofs
+ MCU_ADDR
);
if (mr
->qflg
& MCU_NO_MISC
)
if (mr
->qflg
& MCU_MISC_64
)
mi
->misc
= mr
->rq(num
, mr
->ofs
+ MCU_MISC
);
mi
->misc
= (uint64_t) mr
->rl(num
, mr
->ofs
+ MCU_MISC
);
if (mr
->org
== MC_ORG_GBOX
)
mcu_gbox_fixup(mr
, num
, mi
);
* Reset one un-core MCA bank
mcu_reset(McuRec
* mr
, int num
, int arm
)
mr
->wq(num
, mr
->ofs
+ MCU_STAT
, 0);
if (! (mr
->qflg
& MCU_NO_ADDR
)) {
if (mr
->qflg
& MCU_ADDR_32
)
mr
->wl(num
, mr
->ofs
+ MCU_ADDR
, 0);
mr
->wq(num
, mr
->ofs
+ MCU_ADDR
, 0);
if (! (mr
->qflg
& MCU_NO_MISC
)) {
if (mr
->qflg
& MCU_MISC_64
)
mr
->wq(num
, mr
->ofs
+ MCU_MISC
, 0);
mr
->wl(num
, mr
->ofs
+ MCU_MISC
, 0);
if (ctl
&& mr
->org
== MC_ORG_SBOX
&& mic_hw_stepping(0) == KNC_A_STEP
)
ctl
&= ~PUT_BIT(3, 1); /* A0 SBOX 'unclaimed address' bug */
if (ctl
&& mr
->org
== MC_ORG_GBOX
&& mr_mch() != 16)
ctl
&= ~(uint64_t) PUT_BIT(6, 1); /* B0 GBOX 'Invalid Channel' (SKU 3 & 4) */
if (mr
->qflg
& MCU_CTL_64
)
mr
->wq(num
, mr
->ofs
+ MCU_CTRL
, ctl
);
mr
->wl(num
, mr
->ofs
+ MCU_CTRL
, ctl
);
* Un-core MC bank pre-scan
* Walk through all un-core MC sources to see if any events are pending.
* Stops on 1st match where STATUS has both VAL bit set. On some BOXes,
* like GBOX, interrupt may be signalled without the EN bit being set.
* See HSD 4116374 for details.
for(i
= 0; i
< ARRAY_SIZE(mcu_src
); i
++) {
if (mr
->org
== MC_ORG_TBOX
&& !mr_txs())
for(j
= 0; j
< mr
->num
; j
++) {
status
= mr
->rq(j
, mr
->ofs
+ MCU_STAT
);
if (status
& MCI_STATUS_VAL
)
* Un-core MC bank scanner.
* Walks through all un-core MC sources for new events.
* If any found, then process them same way as core events.
* Walk list of known un-core MC sources
memset(&uc
, 0, sizeof(uc
));
for(i
= 0; i
< ARRAY_SIZE(mcu_src
); i
++) {
if (mr
->org
== MC_ORG_TBOX
&& !mr_txs())
for(j
= 0; j
< mr
->num
; j
++) {
* Read status to see if we have something of interest.
* As per HSD 4116374 the status register is cleared
* after read, if it had valid content.
*TBD: Clear unconditionally?
mc
.status
= mr
->rq(j
, mr
->ofs
+ MCU_STAT
);
if (mc
.status
& MCI_STATUS_VAL
)
mr
->wq(j
, mr
->ofs
+ MCU_STAT
, 0);
* Bank had valid content (VAL bit set).
* Verify the event was subscribed to (EN bit set).
* If not, the event is ignored.
if (! (mc
.status
& MCI_STATUS_EN
))
* Valid and enabled event, read remaining bank registers.
* Fill out blanks in the MceInfo record
mc
.stamp
= get_seconds();
mc
.flags
= (mc
.status
& MCI_STATUS_UC
) ? MC_FLG_FATAL
: 0;
* If any way to detect injected errors then this is
* the place to do so and indicate by MC_FLG_FALSE flag
if (mc
.flags
& MC_FLG_FATAL
) {
ee_printk("Uncore fatal MC: org %d, id %d, status %lx\n", mc
.org
, mc
.id
, mc
.status
);
* Log UC events in the eeprom.
* Notify SMC that we've had a serious machine check error.
* Remember 1st fatal (UC) event
* Filter corrected errors.
if (! (mc
.flags
& MC_FLG_FATAL
)) {
msk
= micras_mc_filter(&mc
, tsc
, 1);
ee_printk("Uncore filter: org %d, id %d, ctrl %lx, mask %lx\n", mc
.org
, mc
.id
, mc
.ctl
, msk
);
if (mr
->qflg
& MCU_CTL_64
)
mr
->wq(j
, mr
->ofs
+ MCU_CTRL
, mc
.ctl
& ~msk
);
mr
->wl(j
, mr
->ofs
+ MCU_CTRL
, (uint32_t)(mc
.ctl
& ~msk
));
* Any event post processing goes here.
* This would be things like cache line refresh and such.
* Actual algorithms are TBD.
panic("FATAL un-core machine check event:\n"
"bnk %d, id %d, ctl %llx, stat %llx, addr %llx, misc %llx\n",
uc
.org
, uc
.id
, uc
.ctl
, uc
.status
, uc
.addr
, uc
.misc
);
* Once we get control in 1st interrupt (NMI or regular), we'll
* use IPIs from the local APIC to force all active CPU's into
* our RAS NMI handler, similar to the core MC handler.
* After that, the same logic as for the generic MC handler is
* applied to corral all CPU's through well defined rendez-vous
* points where only one cpu gets to run the un-core MC event
* scan while everybody else are sitting in a holding pen.
* If containment wasn't an issue we could simply let the BP
* run the scan without involving other CPUs at all.
struct cpumask mcu_exc_mask
; /* NMI recipients */
static int mcu_cpu
= -1; /* SBOX target CPU */
static uint64_t mcu_redir
; /* SBOX I/O-APIC redirection entry */
static uint64_t mcu_old_redir
; /* Restore value for redirection entry */
unsigned int mcu_eoi
; /* 1st interrupt from local APIC */
static atomic_t mcu_callin
; /* Entry rendez-vous gate */
static atomic_t mcu_leavin
; /* Hold rendez-vous gate */
mcu_timed_out(int64_t * timeout
)
cpus
= num_online_cpus();
timeout
= 1 * NSEC_PER_SEC
; /* 1 Second */
* 'Entry' rendez-vous point.
* Wait here until all CPUs has entered.
order
= atomic_inc_return(&mcu_callin
);
while(atomic_read(&mcu_callin
) != cpus
) {
if (mcu_timed_out(&timeout
)) {
* Timout waiting for CPU enter rendez-vous
* 'Hold' rendez-vous point.
* All CPUs drop by here 'simultaneously'.
* The first CPU that 'enter'ed (order of 1) will
* fall thru while the others wait until their
* number number comes up in the 'leavin' counter
* (or if a timeout happens). This also has a
* serializing effect, where one CPU leaves this
atomic_set(&mcu_leavin
, 1);
while(atomic_read(&mcu_leavin
) < order
) {
if (mcu_timed_out(&timeout
)) {
* Timout waiting in CPU hold rendez-vous
* If any 'per-CPU' activity is needed in isolation
* (one CPU at a time) then that code needs to go here.
atomic_inc(&mcu_leavin
); /* Next CPU out of hold */
timeout
= NSEC_PER_SEC
; /* 1 Second */
* The first CPU that entered (order of 1) waits here
* for the others to leave the 'hold' loop in mca_wait()
* and enter the 'exit' rendez-vous loop below.
* Once they are there, it will run the uncore MCA bank
* scan while the others are parked in 'exit' loop below.
cpus
= num_online_cpus();
while(atomic_read(&mcu_leavin
) <= cpus
) {
if (mcu_timed_out(&timeout
)) {
* Timout waiting for CPU exit rendez-vous
atomic_set(&mcu_leavin
, cpus
);
* Exit rendez-vous point.
while(atomic_read(&mcu_leavin
) != 0) {
if (mcu_timed_out(&timeout
)) {
* Timout waiting in CPU exit rendez-vous
* Reset rendez-vous counters, letting all CPUs
* leave this function 'simultaneously'.
atomic_set(&mcu_callin
, 0);
atomic_set(&mcu_leavin
, 0);
* Uncertain if all cpumask_* functions implies barriers,
* so erroring on the safe side explicit barriers is used.
uint32_t mcg_status_lo
, mcg_status_hi
;
rdmsr(MSR_IA32_MCG_STATUS
, mcg_status_lo
, mcg_status_hi
);
wrmsr(MSR_IA32_MCG_STATUS
, mcg_status_lo
| MCG_STATUS_MCIP
, mcg_status_hi
);
for(i
= 0; i
< ARRAY_SIZE(mcu_src
); i
++) {
if (mr
->org
== MC_ORG_TBOX
&& !mr_txs())
for(j
= 0; j
< mr
->num
; j
++) {
mc
.status
= mr
->rq(j
, mr
->ofs
+ MCU_STAT
);
if (! (mc
.status
& MCI_STATUS_VAL
))
if (! (mc
.status
& MCI_STATUS_EN
)) {
mr
->wq(j
, mr
->ofs
+ MCU_STAT
, 0);
mr
->wq(j
, mr
->ofs
+ MCU_STAT
, 0);
mc
.stamp
= get_seconds();
mc
.flags
= (mc
.status
& MCI_STATUS_UC
) ? MC_FLG_FATAL
: 0;
wrmsr(MSR_IA32_MCG_STATUS
, mcg_status_lo
, mcg_status_hi
);
* Damn compiler options !!!!!!
* Don't want more changes than this routine, so
* added dummies to shut up gcc about unused code.
static atomic_t mcu_entry
;
uint32_t mcg_status_lo
, mcg_status_hi
;
* Get MCA status from SBOX.
* If no source bits set, this was not an un-core MCA
* This would work if the SBOX_MCA_INT_STAT actually worked
* as described both in HAS and register specification.
* Unfortunately, it doesn't, as per tribal knowledge errata.
uint32_t int_stat
, int_en
;
int_en
= mr_sbox_rl(0, SBOX_MCA_INT_EN
);
int_stat
= mr_sbox_rl(0, SBOX_MCA_INT_STAT
);
if (! (int_en
& int_stat
)) {
* Instead of having a single source of pending un-core MCA events,
* we now have to walk all BOXes to check if there is a valid event
* pending in one of them. That is much more expensive as we have
* to check this on all NMIs, including our own cascade NMIs used
* to corrall all CPUs in their rendezvouz point(s). We try to avoid
* this scan if there already is an un-core NMI in progress.
* un-core MCA NMIs are sent to just one CPU, mcu_cpu
* CPUs targeted in the cascade are in mcu_exc_mask
* non-zero atomic variable 'mcu_callin' tells cascade is in progress
if (!cpumask_empty(&mcu_exc_mask
))
* On CPU 0 and no un-core handling in progress!
* Then scan all BOXes for valid events pending,
* If there wasn't any, this is a false alarm and
* we'll re-connect MC lines and return.
* This is uncore so it should not be necessary to
* empty internal (L1) caches, doesn't harm either.
* We do not want to be interrupted by a core MC
* exception while handling an NMI. We can block
* core MC events by setting the MCG_STATUS_MCIP.
* This is a MSR, so it has to be done on all CPUs.
* On KnC that is, KnF does not have that MSR.
rdmsr(MSR_IA32_MCG_STATUS
, mcg_status_lo
, mcg_status_hi
);
wrmsr(MSR_IA32_MCG_STATUS
, mcg_status_lo
| MCG_STATUS_MCIP
, mcg_status_hi
);
* Special for the SBOX NMI target CPU:
* - disconnect un-core MC lines from SBOX I/O-APIC, such
* that we don't get stacked NMIs in the Local APICs.
* - simulate a NMI broadcast by sending NMI to all _other_
* active CPUs via IPIs. The SBOX could do a broadcast,
* but that will send NMIs to sleeping CPUs too, which
* we prefer to avoid if possible.
*TBD: should creating the mcu_exc_mask be protected by
* lock, similar to core events? Who can interfere?
mr_sbox_wl(0, SBOX_MCA_INT_EN
, 0);
cpumask_copy(&mcu_exc_mask
, cpu_online_mask
);
cpumask_clear_cpu(cpu
, &mcu_exc_mask
);
// apic->send_IPI_mask(&mcu_exc_mask, NMI_VECTOR);
apic
->send_IPI_allbutself(NMI_VECTOR
);
cpumask_set_cpu(cpu
, &mcc_exc_mask
);
* Corral all CPUs through the rendez-vous point maze.
* - No CPU leaves mcu_wait() until all has entered.
* - One CPU leaves mcu_wait() at a time.
* - No CPU leaves mcu_go() until all has entered.
* - While one CPU is in transit between mcu_wait()
* and mcu_go(), all other CPUs are sitting in
* tight busy-wait loops in either function.
* - All CPUs leaves mcu_go() at the same time.
* If there is any 'per-cpu' activity that needs to be
* run in isolation, it must be placed between mcu_wait()
* Timeout waiting at one of the rendez-vous points.
* Scan the un-core MCA banks just in case.
* Special for the SBOX NMI target CPU:
* - reconnect un-core MC lines through to SBOX I/O-APIC.
* If new events already are pending, then this will
* result in a 'rising-edge' trigger to the I/O-APIC.
mr_sbox_wl(0, SBOX_MCA_INT_EN
, mr_txs() ? 0x0fffff07 : 0xff07);
* If this CPU got its NMI from an IPI, then it must
* send an ACK to its local APIC (I think).
eoi
= cpumask_test_and_clear_cpu(cpu
, &mcu_exc_mask
);
* Restore core MCG status and return 1 indicating to the
* kernel NMI handler we've handled it.
*TBD: reduce to one write per core instead of one per thread?
wrmsr(MSR_IA32_MCG_STATUS
, mcg_status_lo
, mcg_status_hi
);
* MCA handler if using standard interrupts
* It's just a trampoline to convert a regular interrupt
* into an NMI, which is only needed if the I/O-APIC can't
*TBD: remove all this? It is not used on KnC, and the KnF's
* I've tested this on all have been OK sending NMIs.
sbox_handler(int irq
, void * tag
)
* Convert this regular interrupt into an NMI.
mcu_cpu
= smp_processor_id();
apic
->send_IPI_self(NMI_VECTOR
);
* Reset all uncore MCA banks to defaults
for(i
= 0; i
< ARRAY_SIZE(mcu_src
); i
++) {
if (mr
->org
== MC_ORG_TBOX
&& !mr_txs())
for(j
= 0; j
< mr
->num
; j
++) {
*TBD: Do we want to pick up existing MCA events or drop
* them because we don't know _when_ they occurred?
* Reporting them would require internal buffer because
* it's unlikely the SCIF MC session is up at this point.
* For now we just enter events into the system log.
status
= mr
->rq(j
, mr
->ofs
+ MCU_STAT
);
if (status
& MCI_STATUS_VAL
) {
printk("RAS.uncore: discard MC event:\n"
"bnk %d, id %d, ctl %llx, stat %llx, addr %llx, misc %llx\n",
mr
->org
, j
, mc
.ctl
, status
, mc
.addr
, mc
.misc
);
* Reset MCA bank registers.
* Setup interrupt handlers by hooking into the SBOX's I/O-APIC.
* For now, we send an NMI to single CPU, and let it process the
* event. This may need to be expanded into a broadcast NMI similar
* to what the generic core MC event handler does in order to keep
* containment at high as we possibly can.
*TBD: code a dual rendez-vous mechanism on all active CPUs.
printk("RAS.uncore: disabled\n");
* Clear rendez-vous counters
atomic_set(&mcu_callin
, 0);
atomic_set(&mcu_leavin
, 0);
* Record all SBOX I/O-APIC registers to kernel log
printk("SBOX_APICIDR: %lx\n", mr_sbox_rl(0, SBOX_APICIDR
));
printk("SBOX_APICVER: %lx\n", mr_sbox_rl(0, SBOX_APICVER
));
printk("SBOX_APICAPR: %lx\n", mr_sbox_rl(0, SBOX_APICAPR
));
printk("APICCRT%d: %llx\n", i
, mr_sbox_rq(0, SBOX_APICRT0
+ (8 * i
)));
printk("APICICR%d: %llx\n", i
, mr_sbox_rq(0, SBOX_APICICR0
+ (8 * i
)));
printk("SBOX_MCA_INT_EN: %lx\n", mr_sbox_rl(0, SBOX_MCA_INT_EN
));
printk("SBOX_MCA_INT_STAT: %lx\n", mr_sbox_rl(0, SBOX_MCA_INT_STAT
));
* Disconnect un-core MC lines from SBOX I/O-APIC, setup the
* individual BOXes, and clear any un-core MC pending flags
mr_sbox_wl(0, SBOX_MCA_INT_EN
, 0);
mr_sbox_wl(0, SBOX_MCA_INT_STAT
, 0);
* Setup the SBOX I/O-APIC.
* Un-core MC events are routed through a mask in register
* SBOX_MCA_INT_EN into I/O APIC redirection table entry #16.
* Ideally we want all uncore MC events to be handled similar
* to core MCAs, which means we'd like an NMI on all CPUs.
* On KnF the I/O-APIC may not trigger an NMI (PoC security)
* and on KnC where NMI delivery is possible, it appears not
* to be ideal to broadcast it to all CPUs because it could
* wake up cores put to sleep bu power management rules.
* See MCA HAS, SBOX HAS Vol 4, and A0 Vol 2 for details.
* The redirection table entry has the following format:
* 47:32 Destination ID field
* 17 Interrrupt set (testing: trigger an interrupt)
* 16 Interrupt mask (0=enable, 1=disable)
* 15 Trigger mode (0=edge, 1=level)
* 14 Remote IRR (0=inactive, 1=accepted)
* 13 Interrupt polarity (0=active_high, 1=active_low)
* 12 Delivery status (0=idle, 1=send_pending)
* 11 Destination mode (0=physical, 1=logical)
* 10:8 Delivery mode (0=fixed, low, SMI, rsvd, NMI, INIT, rsvd, ext)
* The I/O-APIC input is 'rising edge', so we'd need to select
* it to be edge triggered, active high.
* If event delivery by NMI is preferred, we want it delivered on
* the BP. There is already an NMI handler present, so we have to
* tap into the existing NMI handler for the event notifications.
* The bit-fiddling below says:
* NMI delivery | Destination CPU APIC ID
mcu_redir
= PUT_BITS(10, 8, 4) | PUT_BITS(47, 32, (uint64_t) cpu_data(mcu_cpu
).apicid
);
mcu_old_redir
= mr_sbox_rq(0, SBOX_APICRT16
);
mr_sbox_wq(0, SBOX_APICRT16
, mcu_redir
| PUT_BITS(16, 16, 1));
mr_sbox_wq(0, SBOX_APICRT16
, mcu_redir
);
* If event delivery by regular interrupt is preferred, then all
* I/O-APIC setup will be handled by calling request_irq(16,..).
* There is no guarantee that the event will be sent to the BP
* (though it's more than likely) so we'll defer indentifying the
* event handling CPU (mcu_cpu) till we receive the callback from
* the interrupt handling sus-system.
* The sbox_handler() function just converts the callback into an
* NMI because the only way containment can be achieved is to be
* able to lock down the system completely, which is not realistic
* using regular interrupts.
(void) request_irq(16, sbox_handler
, IRQF_TRIGGER_HIGH
, "un-core mce", (void *) 42);
* Finally, place hook in NMI handler in case there's
* an un-core event pending and connect un-core MC lines
* through to SBOX I/O-APIC. From this point onwards we
* can get uncore MC events at any time.
mr_sbox_wl(0, SBOX_MCA_INT_EN
, mr_txs() ? 0x0fffff07 : 0xff07);
* Record initial uncore MCA banks to kernel log.
printk("RAS.uncore: dumping all banks\n");
* Dump all MCA registers we set to kernel log
for(i
= 0; i
< ARRAY_SIZE(mcu_src
); i
++) {
uint64_t ctl
, stat
, addr
, misc
;
if (mr
->org
== MC_ORG_TBOX
&& !mr_txs())
case MC_ORG_SBOX
: boxname
= "SBOX"; break;
case MC_ORG_DBOX
: boxname
= "DBOX"; break;
case MC_ORG_GBOX
: boxname
= "GBOX"; break;
case MC_ORG_TBOX
: boxname
= "TBOX"; break;
default: boxname
= "??"; /* Damn compiler */
for(j
= 0; j
< mr
->num
; j
++) {
if (mr
->qflg
& MCU_CTL_64
)
ctl
= mr
->rq(j
, mr
->ofs
+ MCU_CTRL
);
ctl
= (uint64_t) mr
->rl(j
, mr
->ofs
+ MCU_CTRL
);
stat
= mr
->rq(j
, mr
->ofs
+ MCU_STAT
);
if (mr
->qflg
& MCU_NO_ADDR
)
if (mr
->qflg
& MCU_ADDR_32
)
addr
= (uint64_t) mr
->rl(j
, mr
->ofs
+ MCU_ADDR
);
addr
= mr
->rq(j
, mr
->ofs
+ MCU_ADDR
);
if (mr
->qflg
& MCU_NO_MISC
)
if (mr
->qflg
& MCU_MISC_64
)
misc
= mr
->rq(j
, mr
->ofs
+ MCU_MISC
);
misc
= (uint64_t) mr
->rl(j
, mr
->ofs
+ MCU_MISC
);
printk("RAS.uncore: %s[%d] = { %llx, %llx, %llx, %llx }\n",
boxname
, j
, ctl
, stat
, addr
, misc
);
printk("RAS.uncore: MCA_INT_EN = %x\n", mr_sbox_rl(0, SBOX_MCA_INT_EN
));
printk("RAS.uncore: APICRT16 = %llx\n", mr_sbox_rq(0, SBOX_APICRT16
));
printk("RAS.uncore: init complete\n");
* Cleanup for module unload.
* Clear/restore hooks in the SBOX's I/O-APIC.
* Disconnect uncore MC lines from SBOX I/O-APIC.
* No new uncore MC interrupts will be made.
mr_sbox_wl(0, SBOX_MCA_INT_EN
, 0);
* Disconnect exception handler.
mr_sbox_wq(0, SBOX_APICRT16
, mcu_old_redir
);
free_irq(16, (void *) 42);
* Cut link from kernel's NMI handler and
* wait for everybody in handler to leave.
while(atomic_read(&mcu_entry
))
* No more events will be received, clear
* MC reporting in all BOXes (just in case)
printk("RAS.uncore: exit complete\n");