git.subgeniuskitty.com - xeon-phi-kernel-module/.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright 2010-2017 Intel Corporation.
	3	*
	4	* This program is free software; you can redistribute it and/or modify
	5	* it under the terms of the GNU General Public License, version 2,
	6	* as published by the Free Software Foundation.
	7	*
	8	* This program is distributed in the hope that it will be useful,
	9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	11	* General Public License for more details.
	12	*
	13	* Disclaimer: The codes contained in these modules may be specific to
	14	* the Intel Software Development Platform codenamed Knights Ferry,
	15	* and the Intel product codenamed Knights Corner, and are not backward
	16	* compatible with other Intel products. Additionally, Intel will NOT
	17	* support the codes or instruction set in future products.
	18	*
	19	* Intel offers no warranty of any kind regarding the code. This code is
	20	* licensed on an "AS IS" basis and Intel is not obligated to provide
	21	* any support, assistance, installation, training, or other services
	22	* of any kind. Intel is also not obligated to provide any updates,
	23	* enhancements or extensions. Intel specifically disclaims any warranty
	24	* of merchantability, non-infringement, fitness for any particular
	25	* purpose, and any other warranty.
	26	*
	27	* Further, Intel disclaims all liability of any kind, including but
	28	* not limited to liability for infringement of any proprietary rights,
	29	* relating to the use of the code, even if Intel is notified of the
	30	* possibility of such liability. Except as expressly stated in an Intel
	31	* license agreement provided with this code and agreed upon with Intel,
	32	* no license, express or implied, by estoppel or otherwise, to any
	33	* intellectual property rights is granted herein.
	34	*/
	35
	36	/*
	37	* RAS handler for uncore MC events
	38	*
	39	* Contains code to intercept MC events, collect information
	40	* from uncore MCA banks and handle the situation.
	41	*
	42	* In case of a severe event, defined by corrupted context,
	43	* the handler will add a record of the event in the designated
	44	* EEPROM hanging off the Over Clocking I2C bus. After that
	45	* a message will be sent to the SMC (enabling IPMI notifications)
	46	* and at last a message is sent to the host via the MC SCIF
	47	* connection.
	48	*
	49	* Lesser events will also be sent to the host on a 'FYI' basis,
	50	* but no rocord will be stored in the event log.
	51	*
	52	* This is in all aspects similar to the reaction to a severe
	53	* core MC event. Differences are in the MC bank access (mmio),
	54	* and that the event is delivered via an interrupt instead of
	55	* an exception. Still, the handler cannot expect any support
	56	* from the OS.
	57	*/
	58
	59	#include <linux/types.h>
	60	#include <linux/errno.h>
	61	#include <linux/kernel.h>
	62	#include <linux/interrupt.h>
	63	#include <linux/nmi.h>
	64	#include <asm/mce.h>
	65	#include <asm/msr.h>
	66	#include <asm/processor.h>
	67	#include <asm/mic/mic_common.h>
	68	#include <asm/mic/mic_knc/autobaseaddress.h>
	69	#include <asm/mic/mic_knc/micsboxdefine.h>
	70	#include "micras.h"
	71
	72
	73	/*
	74	* Hooks placed in the native machine check handler
	75	* See file arch/x86/kernel/traps.c for placement
	76	*
	77	* nmi Entered NMI exception handler.
	78	* Called before any other tests, which allow us
	79	* to test for and handle un-core MCA events before
	80	* the traditional NMI handling.
	81	* Note that the mce-inject mechanism also uses
	82	* NMI's to distribute calls to do_machine_check().
	83	*/
	84
	85	extern int (*mca_nmi)(int);
	86
	87
	88
	89	/*
	90	* Table of un-core MCA banks.
	91	* Though there are differences in register count and sizes, un-core bank
	92	* registers are always spaced 8 bytes apart, so all we need to know is
	93	* the location of the first MCA bank register (CTL) to find them.
	94	* If bank is present, the bank register offsets for ctl, status, addr,
	95	* and misc are thus 0, 8, 16, and 24 respectively.
	96	* Default CTL masks pulled from the register documentation
	97	* Some SKUs don't have support for all BOXs but that will be handled
	98	* at runtime in the support code, not at compile time by this table.
	99	*/
	100
	101
	102	#ifdef CONFIG_ML1OM
	103	#define SBOX_DEF 0x000e /* All (7) */
	104	#define DBOX_DEF 0x0003 /* All (2) */
	105	#define GBOX_DEF 0x0003 /* All (2) */
	106	#endif
	107	#ifdef CONFIG_MK1OM
	108	#define SBOX_DEF 0x03ce /* All - PCIe errors (7) */
	109	#define DBOX_DEF 0x000f /* All (4) */
	110	#define GBOX_DEF 0x3ffffffff /* All (34) */
	111	#define TBOX_DEF 0x001f /* All (5) */
	112	#endif
	113
	114	#define MCU_CTL_64 (1 << 0) /* Bank has 64 bit CTL register */
	115	#define MCU_NO_ADDR (1 << 1) /* Bank has no ADDR register */
	116	#define MCU_ADDR_32 (1 << 2) /* Bank has 32 bit ADDR register */
	117	#define MCU_NO_MISC (1 << 3) /* Bank has no MISC register */
	118	#define MCU_MISC_64 (1 << 4) /* Bank has 64 bit MISC register */
	119
	120	#define MCU_CTRL 0
	121	#define MCU_STAT 8
	122	#define MCU_ADDR 16
	123	#define MCU_MISC 24
	124
	125	typedef struct _mcu_rec {
	126	uint8_t num; /* 'BOX' count */
	127	uint8_t org; /* Origin code */
	128	uint8_t qflg; /* Quirk flags */
	129	uint16_t ofs; /* MCA bank base offset */
	130	uint64_t ctl; /* Initial CTL mask */
	131	uint32_t (rl)(int, uint32_t); / 32-bit MMIO read */
	132	void (wl)(int, uint32_t, uint32_t); / 32-bit MMIO write */
	133	uint64_t (rq)(int, uint32_t); / 64-bit MMIO read */
	134	void (wq)(int, uint32_t, uint64_t); / 64-bit MMIO write */
	135	} McuRec;
	136
	137
	138	static McuRec mcu_src[] = {
	139	{ 1, MC_ORG_SBOX, MCU_MISC_64, SBOX_MCX_CTL_LO,
	140	SBOX_DEF, mr_sbox_rl, mr_sbox_wl, mr_sbox_rq, mr_sbox_wq },
	141	{ DBOX_NUM, MC_ORG_DBOX, MCU_NO_MISC, DBOX_MC2_CTL,
	142	DBOX_DEF, mr_dbox_rl, mr_dbox_wl, mr_dbox_rq, mr_dbox_wq },
	143	{ GBOX_NUM, MC_ORG_GBOX, MCU_CTL_64, GBOX_FBOX_MCA_CTL_LO,
	144	GBOX_DEF, mr_gbox_rl, mr_gbox_wl, mr_gbox_rq, mr_gbox_wq },
	145	#ifdef CONFIG_MK1OM
	146	{ TBOX_NUM, MC_ORG_TBOX, MCU_CTL_64 \| MCU_NO_MISC \| MCU_ADDR_32, TXS_MCX_CONTROL,
	147	TBOX_DEF, mr_tbox_rl, mr_tbox_wl, mr_tbox_rq, mr_tbox_wq },
	148	#endif
	149	};
	150
	151	#define GBOX_BROKEN 1 /* Set if GBOX MCA bank is borken */
	152
	153	#if GBOX_BROKEN
	154	/*
	155	* Si design managed to break the GBOX MCA bank concept
	156	* by not filling useful data into ADDR and MISC registers.
	157	* Instead they use a bunch of registers in another part
	158	* of the GBOX (mbox to be specific) to hold this info.
	159	* In order to get at the right register it is necesary
	160	* to partially decode the STATUS register and from there
	161	* select an GBOX.MBOX register.
	162	* Since the new registers are all 32 bits wide, we'll stick
	163	* the value into MISC register if Misc_V bit of STATUS is
	164	* not set. The following table is used for register selection
	165	*
	166	* model code base width Chan Notes
	167	* 0 017c 32 0 26 bit address, CRC (retrain)
	168	* 1 097c 32 1 26 bit address, CRC (retrain)
	169	* 2 01e0 32 0 26 bit address, ECC
	170	* 3 09e0 32 1 26 bit address, ECC
	171	* 4 01dc 32 0 26 bit address, UC CAPE
	172	* 5 09dc 32 1 26 bit address, UC CAPE
	173	* 31 01a4 32 0 26 bit address, UC ECC
	174	* 32 09a4 32 1 26 bit address, UC ECC
	175	*
	176	* Note: model code is simply the enable bit number in CTL
	177	*/
	178
	179	static struct liu {
	180	uint16_t mcode;
	181	uint16_t base;
	182	} liu[] = {
	183	{ 0, 0x17c }, /* Correctable CRC (retrain) ch 0 */
	184	{ 1, 0x97c }, /* Correctable CRC (retrain) ch 1 */
	185	{ 2, 0x1e0 }, /* Correctable ECC, ch 0 */
	186	{ 3, 0x9e0 }, /* Correctable ECC, ch 1 */
	187	{ 4, 0x1dc }, /* Uncorrectable CAPE, ch 0 */
	188	{ 5, 0x9dc }, /* Uncorrectable CAPE, ch 1 */
	189	{ 31, 0x1a4 }, /* Uncorrectable ECC, ch 0 */
	190	{ 32, 0x9a4 } /* Uncorrectable ECC, ch 1 */
	191	};
	192
	193	static void
	194	mcu_gbox_fixup(McuRec * mr, int num, MceInfo * mi)
	195	{
	196	int i;
	197	uint16_t mcode;
	198
	199	/*
	200	* Skip if Status.Misc_v set
	201	*/
	202	if (mi->status & (1ULL << 59))
	203	return;
	204
	205	/*
	206	* Get model code and if it's in the array, then read
	207	* the addressed register into MISC. We don't set the
	208	* Status.Misc_v bit because we want to distinguish
	209	* this hack from the real MCA bank register.
	210	*/
	211	mcode = GET_BITS(31, 16, mi->status);
	212	for(i = 0; i < ARRAY_SIZE(liu); i++)
	213	if (liu[i].mcode == mcode) {
	214	mi->misc = (uint64_t) mr->rl(num, liu[i].base);
	215	break;
	216	}
	217	}
	218	#endif
	219
	220	/*
	221	* Read Ctrl, Addr and Misc registers from an un-core MCA bank.
	222	* The Status register is read/cleared in mcu_scan().
	223	*/
	224
	225	static void
	226	mcu_read(McuRec * mr, int num, MceInfo * mi)
	227	{
	228	if (mr->qflg & MCU_CTL_64)
	229	mi->ctl = mr->rq(num, mr->ofs + MCU_CTRL);
	230	else
	231	mi->ctl = (uint64_t) mr->rl(num, mr->ofs + MCU_CTRL);
	232
	233	if (mr->qflg & MCU_NO_ADDR)
	234	mi->addr = 0;
	235	else {
	236	if (mr->qflg & MCU_ADDR_32)
	237	mi->addr = (uint64_t) mr->rl(num, mr->ofs + MCU_ADDR);
	238	else
	239	mi->addr = mr->rq(num, mr->ofs + MCU_ADDR);
	240	}
	241
	242	if (mr->qflg & MCU_NO_MISC)
	243	mi->misc = 0;
	244	else {
	245	if (mr->qflg & MCU_MISC_64)
	246	mi->misc = mr->rq(num, mr->ofs + MCU_MISC);
	247	else
	248	mi->misc = (uint64_t) mr->rl(num, mr->ofs + MCU_MISC);
	249	}
	250
	251	#if GBOX_BROKEN
	252	if (mr->org == MC_ORG_GBOX)
	253	mcu_gbox_fixup(mr, num, mi);
	254	#endif
	255	}
	256
	257
	258	/*
	259	* Reset one un-core MCA bank
	260	* Any quirks go here.
	261	*/
	262
	263	static void
	264	mcu_reset(McuRec * mr, int num, int arm)
	265	{
	266	uint64_t ctl;
	267
	268	mr->wq(num, mr->ofs + MCU_STAT, 0);
	269
	270	if (! (mr->qflg & MCU_NO_ADDR)) {
	271	if (mr->qflg & MCU_ADDR_32)
	272	mr->wl(num, mr->ofs + MCU_ADDR, 0);
	273	else
	274	mr->wq(num, mr->ofs + MCU_ADDR, 0);
	275	}
	276
	277	if (! (mr->qflg & MCU_NO_MISC)) {
	278	if (mr->qflg & MCU_MISC_64)
	279	mr->wq(num, mr->ofs + MCU_MISC, 0);
	280	else
	281	mr->wl(num, mr->ofs + MCU_MISC, 0);
	282	}
	283
	284	ctl = arm ? mr->ctl : 0;
	285
	286	#ifdef CONFIG_MK1OM
	287	if (ctl && mr->org == MC_ORG_SBOX && mic_hw_stepping(0) == KNC_A_STEP)
	288	ctl &= ~PUT_BIT(3, 1); /* A0 SBOX 'unclaimed address' bug */
	289
	290	if (ctl && mr->org == MC_ORG_GBOX && mr_mch() != 16)
	291	ctl &= ~(uint64_t) PUT_BIT(6, 1); /* B0 GBOX 'Invalid Channel' (SKU 3 & 4) */
	292	#endif
	293
	294	if (mr->qflg & MCU_CTL_64)
	295	mr->wq(num, mr->ofs + MCU_CTRL, ctl);
	296	else
	297	mr->wl(num, mr->ofs + MCU_CTRL, ctl);
	298	}
	299
	300
	301	/*
	302	* Un-core MC bank pre-scan
	303	* Walk through all un-core MC sources to see if any events are pending.
	304	* Stops on 1st match where STATUS has both VAL bit set. On some BOXes,
	305	* like GBOX, interrupt may be signalled without the EN bit being set.
	306	* See HSD 4116374 for details.
	307	*/
	308
	309	static int
	310	mcu_prescan(void)
	311	{
	312	int i, j;
	313	uint64_t status;
	314	struct _mcu_rec * mr;
	315
	316	for(i = 0; i < ARRAY_SIZE(mcu_src); i++) {
	317	mr = mcu_src + i;
	318
	319	#ifdef CONFIG_MK1OM
	320	if (mr->org == MC_ORG_TBOX && !mr_txs())
	321	continue;
	322	#endif
	323
	324	for(j = 0; j < mr->num; j++) {
	325	status = mr->rq(j, mr->ofs + MCU_STAT);
	326	if (status & MCI_STATUS_VAL)
	327	return 1;
	328	}
	329	}
	330
	331	return 0;
	332	}
	333
	334
	335	/*
	336	* Un-core MC bank scanner.
	337	* Walks through all un-core MC sources for new events.
	338	* If any found, then process them same way as core events.
	339	*/
	340
	341	static int
	342	mcu_scan(void)
	343	{
	344	MceInfo mc, uc;
	345	int gone, seen;
	346	int i, j;
	347	struct _mcu_rec * mr;
	348
	349	/*
	350	* Walk list of known un-core MC sources
	351	*/
	352	gone = seen = 0;
	353	memset(&uc, 0, sizeof(uc));
	354	for(i = 0; i < ARRAY_SIZE(mcu_src); i++) {
	355	mr = mcu_src + i;
	356
	357	#ifdef CONFIG_MK1OM
	358	if (mr->org == MC_ORG_TBOX && !mr_txs())
	359	continue;
	360	#endif
	361
	362	for(j = 0; j < mr->num; j++) {
	363
	364	/*
	365	* Read status to see if we have something of interest.
	366	* As per HSD 4116374 the status register is cleared
	367	* after read, if it had valid content.
	368	*TBD: Clear unconditionally?
	369	*/
	370	mc.status = mr->rq(j, mr->ofs + MCU_STAT);
	371	if (mc.status & MCI_STATUS_VAL)
	372	mr->wq(j, mr->ofs + MCU_STAT, 0);
	373	else
	374	continue;
	375
	376	/*
	377	* Bank had valid content (VAL bit set).
	378	* Verify the event was subscribed to (EN bit set).
	379	* If not, the event is ignored.
	380	*/
	381	if (! (mc.status & MCI_STATUS_EN))
	382	continue;
	383
	384	/*
	385	* Valid and enabled event, read remaining bank registers.
	386	*/
	387	seen++;
	388	mcu_read(mr, j, &mc);
	389
	390	/*
	391	* Fill out blanks in the MceInfo record
	392	*/
	393	mc.org = mr->org;
	394	mc.id = j;
	395	mc.stamp = get_seconds();
	396	mc.flags = (mc.status & MCI_STATUS_UC) ? MC_FLG_FATAL : 0;
	397
	398	/*
	399	* If any way to detect injected errors then this is
	400	* the place to do so and indicate by MC_FLG_FALSE flag
	401	*/
	402
	403	if (mc.flags & MC_FLG_FATAL) {
	404	#ifdef CONFIG_MK1OM
	405	#if MC_VERBOSE
	406	ee_printk("Uncore fatal MC: org %d, id %d, status %lx\n", mc.org, mc.id, mc.status);
	407	#endif
	408
	409	/*
	410	* Log UC events in the eeprom.
	411	*/
	412	micras_mc_log(&mc);
	413	mc.flags \|= MC_FLG_LOG;
	414
	415	/*
	416	* Notify SMC that we've had a serious machine check error.
	417	*/
	418	micras_mc_ipmi(&mc, 1);
	419	#endif
	420	/*
	421	* Remember 1st fatal (UC) event
	422	*/
	423	if (! gone++)
	424	uc = mc;
	425	}
	426
	427	/*
	428	* Notify host
	429	*/
	430	micras_mc_send(&mc, 1);
	431
	432	/*
	433	* Filter corrected errors.
	434	*/
	435	if (! (mc.flags & MC_FLG_FATAL)) {
	436	uint64_t tsc, msk;
	437
	438	tsc = rdtsc();
	439	msk = micras_mc_filter(&mc, tsc, 1);
	440	if (msk) {
	441	#if MC_VERBOSE
	442	ee_printk("Uncore filter: org %d, id %d, ctrl %lx, mask %lx\n", mc.org, mc.id, mc.ctl, msk);
	443	#endif
	444	if (mr->qflg & MCU_CTL_64)
	445	mr->wq(j, mr->ofs + MCU_CTRL, mc.ctl & ~msk);
	446	else
	447	mr->wl(j, mr->ofs + MCU_CTRL, (uint32_t)(mc.ctl & ~msk));
	448	}
	449	}
	450
	451	/*
	452	* Any event post processing goes here.
	453	* This would be things like cache line refresh and such.
	454	* Actual algorithms are TBD.
	455	*/
	456	}
	457	}
	458
	459	#if RAS_HALT
	460	if (gone) {
	461	atomic_inc(&mce_entry);
	462	panic("FATAL un-core machine check event:\n"
	463	"bnk %d, id %d, ctl %llx, stat %llx, addr %llx, misc %llx\n",
	464	uc.org, uc.id, uc.ctl, uc.status, uc.addr, uc.misc);
	465	}
	466	#endif
	467
	468	return seen;
	469	}
	470
	471
	472	/*
	473	* NMI handler.
	474	*
	475	* Once we get control in 1st interrupt (NMI or regular), we'll
	476	* use IPIs from the local APIC to force all active CPU's into
	477	* our RAS NMI handler, similar to the core MC handler.
	478	* After that, the same logic as for the generic MC handler is
	479	* applied to corral all CPU's through well defined rendez-vous
	480	* points where only one cpu gets to run the un-core MC event
	481	* scan while everybody else are sitting in a holding pen.
	482	* If containment wasn't an issue we could simply let the BP
	483	* run the scan without involving other CPUs at all.
	484	*/
	485
	486	#define SPINUNIT 50
	487	#define SERIAL_MCU 0
	488
	489	struct cpumask mcu_exc_mask; /* NMI recipients */
	490	static int mcu_cpu = -1; /* SBOX target CPU */
	491	#if MCU_NMI
	492	static uint64_t mcu_redir; /* SBOX I/O-APIC redirection entry */
	493	static uint64_t mcu_old_redir; /* Restore value for redirection entry */
	494	#else
	495	unsigned int mcu_eoi; /* 1st interrupt from local APIC */
	496	#endif
	497	static atomic_t mcu_callin; /* Entry rendez-vous gate */
	498	static atomic_t mcu_leavin; /* Hold rendez-vous gate */
	499
	500
	501	static int
	502	mcu_timed_out(int64_t * timeout)
	503	{
	504	if (*timeout < SPINUNIT)
	505	return 1;
	506
	507	*timeout -= SPINUNIT;
	508	touch_nmi_watchdog();
	509	ndelay(SPINUNIT);
	510
	511	return 0;
	512	}
	513
	514
	515	static int
	516	mcu_wait(void)
	517	{
	518	int cpus, order;
	519	int64_t timeout;
	520
	521	cpus = num_online_cpus();
	522	timeout = 1 * NSEC_PER_SEC; /* 1 Second */
	523
	524	/*
	525	* Flush all caches
	526	*/
	527
	528	/*
	529	* 'Entry' rendez-vous point.
	530	* Wait here until all CPUs has entered.
	531	*/
	532	order = atomic_inc_return(&mcu_callin);
	533	while(atomic_read(&mcu_callin) != cpus) {
	534	if (mcu_timed_out(&timeout)) {
	535	/*
	536	* Timout waiting for CPU enter rendez-vous
	537	*/
	538	return -1;
	539	}
	540	}
	541
	542	/*
	543	* 'Hold' rendez-vous point.
	544	* All CPUs drop by here 'simultaneously'.
	545	* The first CPU that 'enter'ed (order of 1) will
	546	* fall thru while the others wait until their
	547	* number number comes up in the 'leavin' counter
	548	* (or if a timeout happens). This also has a
	549	* serializing effect, where one CPU leaves this
	550	* loop at a time.
	551	*/
	552	if (order == 1) {
	553	#if SERIAL_MCU
	554	atomic_set(&mcu_leavin, 1);
	555	#endif
	556	}
	557	else {
	558	while(atomic_read(&mcu_leavin) < order) {
	559	if (mcu_timed_out(&timeout)) {
	560	/*
	561	* Timout waiting in CPU hold rendez-vous
	562	*/
	563	return -1;
	564	}
	565	}
	566	}
	567
	568	return order;
	569	}
	570
	571
	572	static int
	573	mcu_go(int order)
	574	{
	575	int ret;
	576	int64_t timeout;
	577
	578	ret = -1;
	579	if (order < 0)
	580	goto mcu_reset;
	581
	582	#if SERIAL_MCU
	583	/*
	584	* If any 'per-CPU' activity is needed in isolation
	585	* (one CPU at a time) then that code needs to go here.
	586	*/
	587
	588	atomic_inc(&mcu_leavin); /* Next CPU out of hold */
	589	#endif
	590
	591	timeout = NSEC_PER_SEC; /* 1 Second */
	592	if (order == 1) {
	593	int cpus;
	594
	595	/*
	596	* The first CPU that entered (order of 1) waits here
	597	* for the others to leave the 'hold' loop in mca_wait()
	598	* and enter the 'exit' rendez-vous loop below.
	599	* Once they are there, it will run the uncore MCA bank
	600	* scan while the others are parked in 'exit' loop below.
	601	*/
	602	cpus = num_online_cpus();
	603	#if SERIAL_MCU
	604	while(atomic_read(&mcu_leavin) <= cpus) {
	605	if (mcu_timed_out(&timeout)) {
	606	/*
	607	* Timout waiting for CPU exit rendez-vous
	608	*/
	609	goto mcu_reset;
	610	}
	611	}
	612	#else
	613	atomic_set(&mcu_leavin, cpus);
	614	#endif
	615	mcu_scan();
	616	ret = 0;
	617	}
	618	else {
	619	/*
	620	* Exit rendez-vous point.
	621	*/
	622	while(atomic_read(&mcu_leavin) != 0) {
	623	if (mcu_timed_out(&timeout)) {
	624	/*
	625	* Timout waiting in CPU exit rendez-vous
	626	*/
	627	goto mcu_reset;
	628	}
	629	}
	630	return 0;
	631	}
	632
	633	/*
	634	* Reset rendez-vous counters, letting all CPUs
	635	* leave this function 'simultaneously'.
	636	*/
	637	mcu_reset:
	638	atomic_set(&mcu_callin, 0);
	639	atomic_set(&mcu_leavin, 0);
	640	return ret;
	641	}
	642
	643
	644	/*
	645	* NMI exception handler
	646	* Uncertain if all cpumask_* functions implies barriers,
	647	* so erroring on the safe side explicit barriers is used.
	648	*/
	649
	650	#if BEAM_TEST
	651	static int
	652	mcu_nmi(int cpu)
	653	{
	654	#ifdef CONFIG_MK1OM
	655	uint32_t mcg_status_lo, mcg_status_hi;
	656	#endif
	657	struct _mcu_rec * mr;
	658	MceInfo mc;
	659	int i, j;
	660
	661	if (cpu != mcu_cpu)
	662	return 0;
	663
	664	if (! mcu_prescan())
	665	return 0;
	666
	667	wbinvd();
	668
	669	#ifdef CONFIG_MK1OM
	670	rdmsr(MSR_IA32_MCG_STATUS, mcg_status_lo, mcg_status_hi);
	671	wrmsr(MSR_IA32_MCG_STATUS, mcg_status_lo \| MCG_STATUS_MCIP, mcg_status_hi);
	672	#endif
	673
	674	for(i = 0; i < ARRAY_SIZE(mcu_src); i++) {
	675	mr = mcu_src + i;
	676
	677	#ifdef CONFIG_MK1OM
	678	if (mr->org == MC_ORG_TBOX && !mr_txs())
	679	continue;
	680	#endif
	681
	682	for(j = 0; j < mr->num; j++) {
	683	mc.status = mr->rq(j, mr->ofs + MCU_STAT);
	684
	685	if (! (mc.status & MCI_STATUS_VAL))
	686	continue;
	687
	688	if (! (mc.status & MCI_STATUS_EN)) {
	689	mr->wq(j, mr->ofs + MCU_STAT, 0);
	690	continue;
	691	}
	692
	693	mcu_read(mr, j, &mc);
	694	mr->wq(j, mr->ofs + MCU_STAT, 0);
	695
	696	mc.org = mr->org;
	697	mc.id = j;
	698	mc.stamp = get_seconds();
	699	mc.flags = (mc.status & MCI_STATUS_UC) ? MC_FLG_FATAL : 0;
	700
	701	micras_mc_send(&mc, 1);
	702	}
	703	}
	704
	705	#ifdef CONFIG_MK1OM
	706	wrmsr(MSR_IA32_MCG_STATUS, mcg_status_lo, mcg_status_hi);
	707	#endif
	708	return 1;
	709
	710	/*
	711	* Damn compiler options !!!!!!
	712	* Don't want more changes than this routine, so
	713	* added dummies to shut up gcc about unused code.
	714	*/
	715	i = mcu_wait();
	716	mcu_go(i);
	717	}
	718	#else
	719
	720	static atomic_t mcu_entry;
	721
	722	static int
	723	mcu_nmi(int cpu)
	724	{
	725	#ifdef CONFIG_MK1OM
	726	uint32_t mcg_status_lo, mcg_status_hi;
	727	#endif
	728	int order, eoi;
	729
	730	atomic_inc(&mcu_entry);
	731
	732	/*
	733	* Get MCA status from SBOX.
	734	*/
	735	#if 0
	736	/*
	737	* If no source bits set, this was not an un-core MCA
	738	* This would work if the SBOX_MCA_INT_STAT actually worked
	739	* as described both in HAS and register specification.
	740	* Unfortunately, it doesn't, as per tribal knowledge errata.
	741	*/
	742	uint32_t int_stat, int_en;
	743
	744	int_en = mr_sbox_rl(0, SBOX_MCA_INT_EN);
	745	int_stat = mr_sbox_rl(0, SBOX_MCA_INT_STAT);
	746	if (! (int_en & int_stat)) {
	747	atomic_dec(&mcu_entry);
	748	return 0;
	749	}
	750	#else
	751	/*
	752	* Instead of having a single source of pending un-core MCA events,
	753	* we now have to walk all BOXes to check if there is a valid event
	754	* pending in one of them. That is much more expensive as we have
	755	* to check this on all NMIs, including our own cascade NMIs used
	756	* to corrall all CPUs in their rendezvouz point(s). We try to avoid
	757	* this scan if there already is an un-core NMI in progress.
	758	* We know that:
	759	* un-core MCA NMIs are sent to just one CPU, mcu_cpu
	760	* CPUs targeted in the cascade are in mcu_exc_mask
	761	* non-zero atomic variable 'mcu_callin' tells cascade is in progress
	762	*/
	763	if (!cpumask_empty(&mcu_exc_mask))
	764	goto invited;
	765	if (cpu != mcu_cpu) {
	766	atomic_dec(&mcu_entry);
	767	return 0;
	768	}
	769
	770	/*
	771	* On CPU 0 and no un-core handling in progress!
	772	* Then scan all BOXes for valid events pending,
	773	* If there wasn't any, this is a false alarm and
	774	* we'll re-connect MC lines and return.
	775	*/
	776	if (! mcu_prescan()) {
	777	atomic_dec(&mcu_entry);
	778	return 0;
	779	}
	780
	781	invited:
	782	#endif
	783
	784	/*
	785	* Flush all caches.
	786	* This is uncore so it should not be necessary to
	787	* empty internal (L1) caches, doesn't harm either.
	788	*/
	789	wbinvd();
	790
	791	/*
	792	* We do not want to be interrupted by a core MC
	793	* exception while handling an NMI. We can block
	794	* core MC events by setting the MCG_STATUS_MCIP.
	795	* This is a MSR, so it has to be done on all CPUs.
	796	* On KnC that is, KnF does not have that MSR.
	797	*/
	798	#ifdef CONFIG_MK1OM
	799	rdmsr(MSR_IA32_MCG_STATUS, mcg_status_lo, mcg_status_hi);
	800	wrmsr(MSR_IA32_MCG_STATUS, mcg_status_lo \| MCG_STATUS_MCIP, mcg_status_hi);
	801	#endif
	802
	803	/*
	804	* Special for the SBOX NMI target CPU:
	805	* - disconnect un-core MC lines from SBOX I/O-APIC, such
	806	* that we don't get stacked NMIs in the Local APICs.
	807	* - simulate a NMI broadcast by sending NMI to all _other_
	808	* active CPUs via IPIs. The SBOX could do a broadcast,
	809	* but that will send NMIs to sleeping CPUs too, which
	810	* we prefer to avoid if possible.
	811	*TBD: should creating the mcu_exc_mask be protected by
	812	* lock, similar to core events? Who can interfere?
	813	*/
	814	if (cpu == mcu_cpu) {
	815	mr_sbox_wl(0, SBOX_MCA_INT_EN, 0);
	816	cpumask_copy(&mcu_exc_mask, cpu_online_mask);
	817	cpumask_clear_cpu(cpu, &mcu_exc_mask);
	818	smp_wmb();
	819	// apic->send_IPI_mask(&mcu_exc_mask, NMI_VECTOR);
	820	apic->send_IPI_allbutself(NMI_VECTOR);
	821	#if !MCU_NMI
	822	if (mcu_eoi) {
	823	smp_rmb();
	824	cpumask_set_cpu(cpu, &mcc_exc_mask);
	825	smp_wmb();
	826	mcu_eoi = 0;
	827	}
	828	#endif
	829	}
	830
	831	/*
	832	* Corral all CPUs through the rendez-vous point maze.
	833	* It guarantees that:
	834	* - No CPU leaves mcu_wait() until all has entered.
	835	* - One CPU leaves mcu_wait() at a time.
	836	* - No CPU leaves mcu_go() until all has entered.
	837	* - While one CPU is in transit between mcu_wait()
	838	* and mcu_go(), all other CPUs are sitting in
	839	* tight busy-wait loops in either function.
	840	* - All CPUs leaves mcu_go() at the same time.
	841	* If there is any 'per-cpu' activity that needs to be
	842	* run in isolation, it must be placed between mcu_wait()
	843	* and mcu_go().
	844	*/
	845	order = mcu_wait();
	846	if (mcu_go(order)) {
	847	/*
	848	* Timeout waiting at one of the rendez-vous points.
	849	* Scan the un-core MCA banks just in case.
	850	*/
	851	mcu_scan();
	852	}
	853
	854	/*
	855	* Special for the SBOX NMI target CPU:
	856	* - reconnect un-core MC lines through to SBOX I/O-APIC.
	857	* If new events already are pending, then this will
	858	* result in a 'rising-edge' trigger to the I/O-APIC.
	859	*/
	860	if (cpu == mcu_cpu)
	861	mr_sbox_wl(0, SBOX_MCA_INT_EN, mr_txs() ? 0x0fffff07 : 0xff07);
	862
	863	/*
	864	* If this CPU got its NMI from an IPI, then it must
	865	* send an ACK to its local APIC (I think).
	866	*/
	867	smp_rmb();
	868	eoi = cpumask_test_and_clear_cpu(cpu, &mcu_exc_mask);
	869	smp_wmb();
	870	if (eoi)
	871	ack_APIC_irq();
	872
	873	/*
	874	* Restore core MCG status and return 1 indicating to the
	875	* kernel NMI handler we've handled it.
	876	*TBD: reduce to one write per core instead of one per thread?
	877	*/
	878	#ifdef CONFIG_MK1OM
	879	wrmsr(MSR_IA32_MCG_STATUS, mcg_status_lo, mcg_status_hi);
	880	#endif
	881	atomic_dec(&mcu_entry);
	882	return 1;
	883	}
	884	#endif
	885
	886
	887	#if !MCU_NMI
	888	/*
	889	* MCA handler if using standard interrupts
	890	* It's just a trampoline to convert a regular interrupt
	891	* into an NMI, which is only needed if the I/O-APIC can't
	892	* generate and NMI.
	893	*
	894	*TBD: remove all this? It is not used on KnC, and the KnF's
	895	* I've tested this on all have been OK sending NMIs.
	896	*/
	897
	898	static irqreturn_t
	899	sbox_handler(int irq, void * tag)
	900	{
	901	/*
	902	* Convert this regular interrupt into an NMI.
	903	*/
	904	mcu_cpu = smp_processor_id();
	905	mcu_eoi = 1;
	906	apic->send_IPI_self(NMI_VECTOR);
	907	return IRQ_HANDLED;
	908	}
	909	#endif
	910
	911
	912	/*
	913	* Reset all uncore MCA banks to defaults
	914	*/
	915
	916	void
	917	box_reset(int arm)
	918	{
	919	int i, j;
	920	struct _mcu_rec * mr;
	921
	922	for(i = 0; i < ARRAY_SIZE(mcu_src); i++) {
	923	mr = mcu_src + i;
	924
	925	#ifdef CONFIG_MK1OM
	926	if (mr->org == MC_ORG_TBOX && !mr_txs())
	927	continue;
	928	#endif
	929
	930	for(j = 0; j < mr->num; j++) {
	931	uint64_t status;
	932
	933	/*
	934	*TBD: Do we want to pick up existing MCA events or drop
	935	* them because we don't know _when_ they occurred?
	936	* Reporting them would require internal buffer because
	937	* it's unlikely the SCIF MC session is up at this point.
	938	* For now we just enter events into the system log.
	939	*/
	940	status = mr->rq(j, mr->ofs + MCU_STAT);
	941	if (status & MCI_STATUS_VAL) {
	942	MceInfo mc;
	943
	944	mcu_read(mr, j, &mc);
	945	printk("RAS.uncore: discard MC event:\n"
	946	"bnk %d, id %d, ctl %llx, stat %llx, addr %llx, misc %llx\n",
	947	mr->org, j, mc.ctl, status, mc.addr, mc.misc);
	948	}
	949
	950	/*
	951	* Reset MCA bank registers.
	952	*/
	953	mcu_reset(mr, j, arm);
	954	}
	955	}
	956	}
	957
	958
	959	/*
	960	* Setup interrupt handlers by hooking into the SBOX's I/O-APIC.
	961	* For now, we send an NMI to single CPU, and let it process the
	962	* event. This may need to be expanded into a broadcast NMI similar
	963	* to what the generic core MC event handler does in order to keep
	964	* containment at high as we possibly can.
	965	*
	966	*TBD: code a dual rendez-vous mechanism on all active CPUs.
	967	*/
	968
	969	int __init
	970	mcu_init(void)
	971	{
	972	#if MC_VERBOSE
	973	int i, j;
	974	#endif
	975
	976	if (mce_disabled) {
	977	printk("RAS.uncore: disabled\n");
	978	}
	979	else {
	980	/*
	981	* Clear rendez-vous counters
	982	*/
	983	atomic_set(&mcu_callin, 0);
	984	atomic_set(&mcu_leavin, 0);
	985
	986	#if MC_VERBOSE
	987	/*
	988	* For debug only:
	989	* Record all SBOX I/O-APIC registers to kernel log
	990	*/
	991	printk("SBOX_APICIDR: %lx\n", mr_sbox_rl(0, SBOX_APICIDR));
	992	printk("SBOX_APICVER: %lx\n", mr_sbox_rl(0, SBOX_APICVER));
	993	printk("SBOX_APICAPR: %lx\n", mr_sbox_rl(0, SBOX_APICAPR));
	994	for(i = 0; i < 26 ; i++)
	995	printk("APICCRT%d: %llx\n", i, mr_sbox_rq(0, SBOX_APICRT0 + (8 * i)));
	996	for(i = 0; i < 8 ; i++)
	997	printk("APICICR%d: %llx\n", i, mr_sbox_rq(0, SBOX_APICICR0 + (8 * i)));
	998	printk("SBOX_MCA_INT_EN: %lx\n", mr_sbox_rl(0, SBOX_MCA_INT_EN));
	999	printk("SBOX_MCA_INT_STAT: %lx\n", mr_sbox_rl(0, SBOX_MCA_INT_STAT));
	1000	#endif
	1001
	1002	/*
	1003	* Disconnect un-core MC lines from SBOX I/O-APIC, setup the
	1004	* individual BOXes, and clear any un-core MC pending flags
	1005	* from SBOX I/O-APIC
	1006	*/
	1007	mr_sbox_wl(0, SBOX_MCA_INT_EN, 0);
	1008	box_reset(1);
	1009	mr_sbox_wl(0, SBOX_MCA_INT_STAT, 0);
	1010
	1011	/*
	1012	* Setup the SBOX I/O-APIC.
	1013	* Un-core MC events are routed through a mask in register
	1014	* SBOX_MCA_INT_EN into I/O APIC redirection table entry #16.
	1015	* Ideally we want all uncore MC events to be handled similar
	1016	* to core MCAs, which means we'd like an NMI on all CPUs.
	1017	* On KnF the I/O-APIC may not trigger an NMI (PoC security)
	1018	* and on KnC where NMI delivery is possible, it appears not
	1019	* to be ideal to broadcast it to all CPUs because it could
	1020	* wake up cores put to sleep bu power management rules.
	1021	* See MCA HAS, SBOX HAS Vol 4, and A0 Vol 2 for details.
	1022	*
	1023	* The redirection table entry has the following format:
	1024	* 47:32 Destination ID field
	1025	* 17 Interrrupt set (testing: trigger an interrupt)
	1026	* 16 Interrupt mask (0=enable, 1=disable)
	1027	* 15 Trigger mode (0=edge, 1=level)
	1028	* 14 Remote IRR (0=inactive, 1=accepted)
	1029	* 13 Interrupt polarity (0=active_high, 1=active_low)
	1030	* 12 Delivery status (0=idle, 1=send_pending)
	1031	* 11 Destination mode (0=physical, 1=logical)
	1032	* 10:8 Delivery mode (0=fixed, low, SMI, rsvd, NMI, INIT, rsvd, ext)
	1033	* 7:0 Interrupt vector
	1034	*
	1035	* The I/O-APIC input is 'rising edge', so we'd need to select
	1036	* it to be edge triggered, active high.
	1037	*/
	1038	#if MCU_NMI
	1039	/*
	1040	* If event delivery by NMI is preferred, we want it delivered on
	1041	* the BP. There is already an NMI handler present, so we have to
	1042	* tap into the existing NMI handler for the event notifications.
	1043	*
	1044	* The bit-fiddling below says:
	1045	* NMI delivery \| Destination CPU APIC ID
	1046	*/
	1047	mcu_cpu = 0;
	1048	mcu_redir = PUT_BITS(10, 8, 4) \| PUT_BITS(47, 32, (uint64_t) cpu_data(mcu_cpu).apicid);
	1049	mcu_old_redir = mr_sbox_rq(0, SBOX_APICRT16);
	1050	mr_sbox_wq(0, SBOX_APICRT16, mcu_redir \| PUT_BITS(16, 16, 1));
	1051	mr_sbox_wq(0, SBOX_APICRT16, mcu_redir);
	1052	#else
	1053	/*
	1054	* If event delivery by regular interrupt is preferred, then all
	1055	* I/O-APIC setup will be handled by calling request_irq(16,..).
	1056	* There is no guarantee that the event will be sent to the BP
	1057	* (though it's more than likely) so we'll defer indentifying the
	1058	* event handling CPU (mcu_cpu) till we receive the callback from
	1059	* the interrupt handling sus-system.
	1060	* The sbox_handler() function just converts the callback into an
	1061	* NMI because the only way containment can be achieved is to be
	1062	* able to lock down the system completely, which is not realistic
	1063	* using regular interrupts.
	1064	*/
	1065	mcu_eoi = 0;
	1066	(void) request_irq(16, sbox_handler, IRQF_TRIGGER_HIGH, "un-core mce", (void *) 42);
	1067	#endif
	1068
	1069	/*
	1070	* Finally, place hook in NMI handler in case there's
	1071	* an un-core event pending and connect un-core MC lines
	1072	* through to SBOX I/O-APIC. From this point onwards we
	1073	* can get uncore MC events at any time.
	1074	*/
	1075	mca_nmi = mcu_nmi;
	1076	mr_sbox_wl(0, SBOX_MCA_INT_EN, mr_txs() ? 0x0fffff07 : 0xff07);
	1077
	1078	#if MC_VERBOSE
	1079	/*
	1080	* For debug only
	1081	* Record initial uncore MCA banks to kernel log.
	1082	*/
	1083	printk("RAS.uncore: dumping all banks\n");
	1084
	1085	/*
	1086	* Dump all MCA registers we set to kernel log
	1087	*/
	1088	for(i = 0; i < ARRAY_SIZE(mcu_src); i++) {
	1089	char * boxname;
	1090	struct _mcu_rec * mr;
	1091	uint64_t ctl, stat, addr, misc;
	1092
	1093	mr = mcu_src + i;
	1094	#ifdef CONFIG_MK1OM
	1095	if (mr->org == MC_ORG_TBOX && !mr_txs())
	1096	continue;
	1097	#endif
	1098	switch(mr->org) {
	1099	case MC_ORG_SBOX: boxname = "SBOX"; break;
	1100	case MC_ORG_DBOX: boxname = "DBOX"; break;
	1101	case MC_ORG_GBOX: boxname = "GBOX"; break;
	1102	case MC_ORG_TBOX: boxname = "TBOX"; break;
	1103	default: boxname = "??"; /* Damn compiler */
	1104	}
	1105
	1106	for(j = 0; j < mr->num; j++) {
	1107
	1108	if (mr->qflg & MCU_CTL_64)
	1109	ctl = mr->rq(j, mr->ofs + MCU_CTRL);
	1110	else
	1111	ctl = (uint64_t) mr->rl(j, mr->ofs + MCU_CTRL);
	1112
	1113	stat = mr->rq(j, mr->ofs + MCU_STAT);
	1114
	1115	if (mr->qflg & MCU_NO_ADDR)
	1116	addr = 0;
	1117	else {
	1118	if (mr->qflg & MCU_ADDR_32)
	1119	addr = (uint64_t) mr->rl(j, mr->ofs + MCU_ADDR);
	1120	else
	1121	addr = mr->rq(j, mr->ofs + MCU_ADDR);
	1122	}
	1123
	1124	if (mr->qflg & MCU_NO_MISC)
	1125	misc = 0;
	1126	else {
	1127	if (mr->qflg & MCU_MISC_64)
	1128	misc = mr->rq(j, mr->ofs + MCU_MISC);
	1129	else
	1130	misc = (uint64_t) mr->rl(j, mr->ofs + MCU_MISC);
	1131	}
	1132
	1133	printk("RAS.uncore: %s[%d] = { %llx, %llx, %llx, %llx }\n",
	1134	boxname, j, ctl, stat, addr, misc);
	1135	}
	1136	}
	1137	printk("RAS.uncore: MCA_INT_EN = %x\n", mr_sbox_rl(0, SBOX_MCA_INT_EN));
	1138	printk("RAS.uncore: APICRT16 = %llx\n", mr_sbox_rq(0, SBOX_APICRT16));
	1139	#endif
	1140
	1141	printk("RAS.uncore: init complete\n");
	1142	}
	1143
	1144	return 0;
	1145	}
	1146
	1147
	1148	/*
	1149	* Cleanup for module unload.
	1150	* Clear/restore hooks in the SBOX's I/O-APIC.
	1151	*/
	1152
	1153	int __exit
	1154	mcu_exit(void)
	1155	{
	1156	if (! mce_disabled) {
	1157
	1158	/*
	1159	* Disconnect uncore MC lines from SBOX I/O-APIC.
	1160	* No new uncore MC interrupts will be made.
	1161	*/
	1162	mr_sbox_wl(0, SBOX_MCA_INT_EN, 0);
	1163
	1164	/*
	1165	* Disconnect exception handler.
	1166	*/
	1167	#if MCU_NMI
	1168	mcu_redir = 0;
	1169	mr_sbox_wq(0, SBOX_APICRT16, mcu_old_redir);
	1170	#else
	1171	mcu_eoi = 0;
	1172	free_irq(16, (void *) 42);
	1173	#endif
	1174
	1175	/*
	1176	* Cut link from kernel's NMI handler and
	1177	* wait for everybody in handler to leave.
	1178	*/
	1179	mca_nmi = 0;
	1180	while(atomic_read(&mcu_entry))
	1181	cpu_relax();
	1182	mcu_cpu = -1;
	1183
	1184	/*
	1185	* No more events will be received, clear
	1186	* MC reporting in all BOXes (just in case)
	1187	*/
	1188	box_reset(0);
	1189	}
	1190
	1191	printk("RAS.uncore: exit complete\n");
	1192	return 0;
	1193	}
	1194