* Copyright 2010-2017 Intel Corporation.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2,
* as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
* Disclaimer: The codes contained in these modules may be specific to
* the Intel Software Development Platform codenamed Knights Ferry,
* and the Intel product codenamed Knights Corner, and are not backward
* compatible with other Intel products. Additionally, Intel will NOT
* support the codes or instruction set in future products.
* Intel offers no warranty of any kind regarding the code. This code is
* licensed on an "AS IS" basis and Intel is not obligated to provide
* any support, assistance, installation, training, or other services
* of any kind. Intel is also not obligated to provide any updates,
* enhancements or extensions. Intel specifically disclaims any warranty
* of merchantability, non-infringement, fitness for any particular
* purpose, and any other warranty.
* Further, Intel disclaims all liability of any kind, including but
* not limited to liability for infringement of any proprietary rights,
* relating to the use of the code, even if Intel is notified of the
* possibility of such liability. Except as expressly stated in an Intel
* license agreement provided with this code and agreed upon with Intel,
* no license, express or implied, by estoppel or otherwise, to any
* intellectual property rights is granted herein.
* Contains code to handle interaction with the PM driver.
* This includes the initial upload of core voltages and
* frequencies, handling of 'turbo' mode, and accounting
* for and reporting of card throttles.
* This really is for KnC only.
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/device.h>
#include <linux/workqueue.h>
#include <linux/bitmap.h>
#include <linux/cpumask.h>
#include <asm/mic/mic_common.h>
#include <asm/mic/mic_knc/micsboxdefine.h>
#include <asm/mic/micpm_device.h>
static atomic_t pm_entry
; /* Active calls from PM */
* Local variables to keep track of throttle states
* onoff Set to 1 if throttling is in effect, otherwise 0
* count Count of complete throttles (not counting current).
* time Time spent in complete throttles
* start Time when current throttle started (or 0)
* Units of time is measured in jiffies and converted to mSecs
* at the end of a throttle period. Jiffies are lower resolution
* than mSec. If a throttle starts and ends within same jiffy,
* a standard penalty of 1/2 jiffy gets added.
*TBD: perhaps it's better simply to add 1/2 jiffy to every throttle
* period to compensate for rounding down errors. Would be fair
* if average throttle period is more than 1 jiffy long.
*TBD: Using atomics may be overkill. Calls from the RAS MT thread
* will be serialized (guaranteed), i.e. the report routine needs
* not to care about re-entrancy.
static atomic_t tmp_onoff
;
static atomic_t tmp_count
;
static atomic_long_t tmp_time
;
static atomic_long_t tmp_start
;
static atomic_t pwr_onoff
;
static atomic_t pwr_count
;
static atomic_long_t pwr_time
;
static atomic_long_t pwr_start
;
static atomic_t alrt_onoff
;
static atomic_t alrt_count
;
static atomic_long_t alrt_time
;
static atomic_long_t alrt_start
;
if (atomic_xchg(&pwr_onoff
, 1))
atomic_long_set(&pwr_start
, jiffies
);
if (! atomic_xchg(&pwr_onoff
, 0))
then
= atomic_long_xchg(&pwr_start
, 0);
atomic_long_add(jiffies_to_msecs(1) / 2, &pwr_time
);
atomic_long_add(jiffies_to_msecs(jiffies
- then
), &pwr_time
);
if (atomic_xchg(&tmp_onoff
, 1))
atomic_long_set(&tmp_start
, jiffies
);
if (! atomic_xchg(&tmp_onoff
, 0))
then
= atomic_long_xchg(&tmp_start
, 0);
atomic_long_add(jiffies_to_msecs(1) / 2, &tmp_time
);
atomic_long_add(jiffies_to_msecs(jiffies
- then
), &tmp_time
);
if (atomic_xchg(&alrt_onoff
, 1))
atomic_long_set(&alrt_start
, jiffies
);
if (! atomic_xchg(&alrt_onoff
, 0))
then
= atomic_long_xchg(&alrt_start
, 0);
atomic_long_add(jiffies_to_msecs(1) / 2, &alrt_time
);
atomic_long_add(jiffies_to_msecs(jiffies
- then
), &alrt_time
);
* Report current throttle state(s) to MT.
* Simple copy of local variables, except for the time
* measurement, where current throttle (if any) is included.
* Don't want a lock to gate access to the local variables,
* so the atomics needs to be read in the correct order.
* First throttle state, then adder if throttle is in
* progress, then counters. If PM enters or leave throttle
* while reading stats, the worst is that time for the
* current trottle is not included until next read.
mr_pm_ttl(struct mr_rsp_ttl
* rsp
)
rsp
->power
.active
= (uint8_t) atomic_read(&pwr_onoff
);
then
= atomic_long_read(&pwr_start
);
rsp
->power
.since
= jiffies_to_msecs(jiffies
- then
);
rsp
->power
.count
= atomic_read(&pwr_count
);
rsp
->power
.time
= atomic_long_read(&pwr_time
);
rsp
->thermal
.active
= (uint8_t) atomic_read(&tmp_onoff
);
if (rsp
->thermal
.active
) {
then
= atomic_long_read(&tmp_start
);
rsp
->thermal
.since
= jiffies_to_msecs(jiffies
- then
);
rsp
->thermal
.count
= atomic_read(&tmp_count
);
rsp
->thermal
.time
= atomic_long_read(&tmp_time
);
rsp
->alert
.active
= (uint8_t) atomic_read(&alrt_onoff
);
then
= atomic_long_read(&alrt_start
);
rsp
->alert
.since
= jiffies_to_msecs(jiffies
- then
);
rsp
->alert
.count
= atomic_read(&alrt_count
);
rsp
->alert
.time
= atomic_long_read(&alrt_time
);
* Throttle signaling function (call from PM)
mr_throttle(int which
, int state
)
tmp
= mr_sbox_rl(0, SBOX_THERMAL_STATUS_2
);
ttl
.die
= GET_BITS(19, 10, tmp
);
* PM is weird in the destinction of thermal and power throttle.
* Power below PLIM should be quiet. Power between PLim1 and PLim0
* results in TTL_POWER events. Power above PLim0 results in both
* TTL_POWER and TTL_THERMAL events, _even_ if temperature is well
* below Tcrit. We handle this by maintaining 3 throttle related
* event types: thermal throttles, power throttles and power alert.
* The power alert is flaggend on entry as TTL_POWER, no problems.
* The two throttles both come in as TTL_THERMAL, so we use current
* die temperature to determine whether it was a thermal threshold
* or the power limit that was exceeded. Point is power throttles
* arriving while temperature is above Tcrit _will_ be counted as
* thermal throttles, period.
(state
== TTL_OFF
) ? mr_alrt_leave() : mr_alrt_enter();
ttl
.upd
|= PM_ALRT_TTL_CHG
;
ttl
.upd
|= atomic_read(&alrt_onoff
) ? PM_ALRT_TTL
: 0;
* Careful here: may get throttle ON while die > tcrit
* and select thermal throttle correctly and then get
* the corresponding throttle OFF when die has fallen
* below tcrit in which case we must de-assert thermal
* As a shortcut, we deassert both throttles if the
* GPU_HOT signal gets de-asserted (which is correct).
if (atomic_read(&pwr_onoff
))
ttl
.upd
|= PM_PWR_TTL_CHG
;
if (atomic_read(&tmp_onoff
))
ttl
.upd
|= PM_TRM_TTL_CHG
;
if (ttl_tcrit
&& ttl
.die
< ttl_tcrit
) {
if (! atomic_read(&pwr_onoff
))
ttl
.upd
|= (PM_PWR_TTL_CHG
| PM_PWR_TTL
);
if (! atomic_read(&tmp_onoff
))
ttl
.upd
|= (PM_TRM_TTL_CHG
| PM_TRM_TTL
);
if (ttl_tcrit
&& ttl
.die
< ttl_tcrit
) {
(state
== TTL_OFF
) ? mr_pwr_leave() : mr_pwr_enter();
ttl
.upd
|= PM_PWR_TTL_CHG
;
ttl
.upd
|= atomic_read(&pwr_onoff
) ? PM_PWR_TTL
: 0;
(state
== TTL_OFF
) ? mr_tmp_leave() : mr_tmp_enter();
ttl
.upd
|= PM_TRM_TTL_CHG
;
ttl
.upd
|= atomic_read(&tmp_onoff
) ? PM_TRM_TTL
: 0;
printk("ttl - args: which %d, state %d\n", which
, state
);
printk("ttl - therm: on %d, count %d, time %ld, start %ld\n",
atomic_read(&tmp_onoff
), atomic_read(&tmp_count
),
atomic_long_read(&tmp_time
), atomic_long_read(&tmp_start
));
printk("ttl - power: on %d, count %d, time %ld, start %ld\n",
atomic_read(&pwr_onoff
), atomic_read(&pwr_count
),
atomic_long_read(&pwr_time
), atomic_long_read(&pwr_start
));
printk("ttl - alert: on %d, count %d, time %ld, start %ld\n",
atomic_read(&alrt_onoff
), atomic_read(&alrt_count
),
atomic_long_read(&alrt_time
), atomic_long_read(&alrt_start
));
* Throttle signaling function (call from notifier chain)
* TBD: should we test for odd state transitions and recursions?
mr_pm_throttle_callback(struct notifier_block
*nb
, unsigned long event
, void *msg
)
mr_throttle(TTL_THERMAL
, TTL_ON
);
mr_throttle(TTL_THERMAL
, TTL_OFF
);
mr_throttle(TTL_POWER
, TTL_ON
);
case EVENT_PWR_ALERT_OFF
:
mr_throttle(TTL_POWER
, TTL_OFF
);
* Ignore whatever else is sent this way
** Power management routines
** one_mmio_rd Read one MMIO register into memory safe
** one_mmio_wr Write one MMIO register from memory safe
** one_msr_rd Read one MSR register into memory safe
** one_msr_wr Write one MSR register from memory safe
** mc_suspend Prepare for suspend, preserve CSRs to safe
** mc_suspend_cancel Suspend canceled, restore operating mode
** mc_resume Recover from suspend, restore CSRs from safe
** For now this stores all registers that are used by this module.
** In reality, only those registers on power planes turned off in
** deep sleep states needs to be stored, but at this point it is
** not known which registers are in that group. This is a table
** driven mechanism that _only_ handles RAS related registers.
**TBD: Turn off MC handlers while in suspend?
** Both pro's and con's on this one, such as
** + Disabling uncore is easy, just clear INT_EN
** + prevents MC to interfere with PM state transitions
** - can hide corruption due to UC errors
** - requires a lot of IPIs to shut down core MC handling
** + there's nobody to handle MCs when cores are asleep.
** ? can events hide in *BOX banks during suspend/resume
** and fire when restoring the INT_EN register?
** - Disabling core is not that easy (from a module).
** Enabling core MCEs requires setting flag X86_CR4_MCE
** in CR4 on every core _and_ writing ~0 to MSR IA32_MCG_CAP
** on every CPU. Probably better to let per-CPU routines
** like mce_suspend() and mce_resume() handle it, with
** some care because we'd want to save all CTLs before
** mce_suspend() runs and restore them after mce_resume().
** Problem is how to get at these functions; they are not
** exported and seems not to be hooked into the kernel's PM
** call chains. Perhaps sysclass abstraction ties into PM.
** Even so, who's to invoke it and how?
#define SAVE_BLOCK_MCA 1 /* Disable MC handling in suspend */
#define RAS_SAVE_MSR 1 /* Include global MSRs in suspend */
#define RAS_SAVE_CPU_MSR 0 /* Include per-CPU MSRs in suspend */
#define SBOX 1 /* SBOX register (index 0) */
#define DBOX 2 /* DBOX register (index 0..1) */
#define GBOX 3 /* GBOX register (index 0..7) */
#define TBOX 4 /* TBOX register (index 0..7) */
#define GMSR 5 /* Global MSR (index 0) */
#define LMSR 6 /* Per-CPU MSR (index 0..CONFIG_NR_CPUS-1) */
#define W64 (1 << 6) /* 64 bit MMIO register (32 bit default) */
#define VLD (1 << 7) /* Register value valid, can be restored */
uint8_t box
; /* Box type + width bit + valid bit */
uint8_t num
; /* Box index (or 0) */
uint16_t ofs
; /* MMIO byte offset / MSR number */
uint64_t reg
; /* Register value */
* Rumor has it that SBOX CSRs below 0x7000 will survive deep sleep
* Think it's safer to save/restore CSRs that RAS writes to anyways.
* We'll leave out a bunch of RO CSRs, most of which are HW status.
* SCRATCH<n> CSRs are above 0x7000 and needs to be preserved.
*TBD: Somebody else to preserve scratch CSRs not used by RAS?
* For now I'll save and restore all of them.
static RegRec susp_mmio
[] = { /* Used in file */
{ SBOX
, 0, SBOX_MCA_INT_EN
, 0 }, /* Uncore, must be 1st */
{ SBOX
, 0, SBOX_SCRATCH0
, 0 }, /* - */
{ SBOX
, 0, SBOX_SCRATCH1
, 0 }, /* - */
{ SBOX
, 0, SBOX_SCRATCH2
, 0 }, /* - */
{ SBOX
, 0, SBOX_SCRATCH3
, 0 }, /* - */
{ SBOX
, 0, SBOX_SCRATCH4
, 0 }, /* Common, knc, */
{ SBOX
, 0, SBOX_SCRATCH5
, 0 }, /* - */
{ SBOX
, 0, SBOX_SCRATCH6
, 0 }, /* - */
{ SBOX
, 0, SBOX_SCRATCH7
, 0 }, /* Knc, knf */
{ SBOX
, 0, SBOX_SCRATCH8
, 0 }, /* - */
{ SBOX
, 0, SBOX_SCRATCH9
, 0 }, /* Common, knc, knf */
{ SBOX
, 0, SBOX_SCRATCH10
, 0 }, /* - */
{ SBOX
, 0, SBOX_SCRATCH11
, 0 }, /* - */
{ SBOX
, 0, SBOX_SCRATCH12
, 0 }, /* - */
{ SBOX
, 0, SBOX_SCRATCH13
, 0 }, /* Common */
{ SBOX
, 0, SBOX_SCRATCH14
, 0 }, /* - */
{ SBOX
, 0, SBOX_SCRATCH15
, 0 }, /* - */
// { SBOX, 0, SBOX_COMPONENT_ID, 0 }, /* Knc */
// { SBOX, 0, SBOX_SVIDCONTROL, 0 }, /* Knc */
// { SBOX, 0, SBOX_PCIE_PCI_SUBSYSTEM, 0 }, /* Common */
// { SBOX, 0, SBOX_PCIE_VENDOR_ID_DEVICE_ID, 0 }, /* Common */
// { SBOX, 0, SBOX_PCIE_PCI_REVISION_ID_AND_C_0X8, 0 },/* Common */
{ SBOX
, 0, SBOX_OC_I2C_ICR
+ ICR_OFFSET
, 0 }, /* Elog */
{ SBOX
, 0, SBOX_OC_I2C_ICR
+ ISR_OFFSET
, 0 }, /* Elog */
{ SBOX
, 0, SBOX_OC_I2C_ICR
+ ISAR_OFFSET
, 0 }, /* Elog */
{ SBOX
, 0, SBOX_OC_I2C_ICR
+ IDBR_OFFSET
, 0 }, /* Elog */
// { SBOX, 0, SBOX_OC_I2C_ICR + IBMR_OFFSET, 0 }, /* Elog */
// { SBOX, 0, SBOX_COREVOLT, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_COREFREQ, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_MEMVOLT, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_MEMORYFREQ, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_CURRENTRATIO, 0 }, /* Knc */
// { SBOX, 0, SBOX_BOARD_VOLTAGE_SENSE, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_THERMAL_STATUS, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_BOARD_TEMP1, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_BOARD_TEMP2, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_CURRENT_DIE_TEMP0, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_CURRENT_DIE_TEMP1, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_CURRENT_DIE_TEMP2, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_MAX_DIE_TEMP0, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_MAX_DIE_TEMP1, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_MAX_DIE_TEMP2, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_STATUS_FAN1, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_STATUS_FAN2, 0 }, /* Knc, knf */
// { SBOX, 0, SBOX_SPEED_OVERRIDE_FAN, 0 }, /* Knc, knf */
{ SBOX
, 0, SBOX_MCA_INT_STAT
, 0 }, /* Uncore */
// { SBOX, 0, SBOX_APICRT16, 0 }, /* Uncore */
{ SBOX
, 0, SBOX_MCX_CTL_LO
, 0 }, /* Uncore */
{ DBOX
, 0, DBOX_MC2_CTL
, 0 }, /* Uncore */
{ DBOX
, 1, DBOX_MC2_CTL
, 0 }, /* Uncore */
{ GBOX
| W64
, 0, GBOX_FBOX_MCA_CTL_LO
, 0 }, /* Uncore */
{ GBOX
| W64
, 1, GBOX_FBOX_MCA_CTL_LO
, 0 }, /* Uncore */
{ GBOX
| W64
, 2, GBOX_FBOX_MCA_CTL_LO
, 0 }, /* Uncore */
{ GBOX
| W64
, 3, GBOX_FBOX_MCA_CTL_LO
, 0 }, /* Uncore */
{ GBOX
| W64
, 4, GBOX_FBOX_MCA_CTL_LO
, 0 }, /* Uncore */
{ GBOX
| W64
, 5, GBOX_FBOX_MCA_CTL_LO
, 0 }, /* Uncore */
{ GBOX
| W64
, 6, GBOX_FBOX_MCA_CTL_LO
, 0 }, /* Uncore */
{ GBOX
| W64
, 7, GBOX_FBOX_MCA_CTL_LO
, 0 }, /* Uncore */
{ TBOX
| W64
, 0, TXS_MCX_CONTROL
, 0 }, /* Uncore */
{ TBOX
| W64
, 1, TXS_MCX_CONTROL
, 0 }, /* Uncore */
{ TBOX
| W64
, 2, TXS_MCX_CONTROL
, 0 }, /* Uncore */
{ TBOX
| W64
, 3, TXS_MCX_CONTROL
, 0 }, /* Uncore */
{ TBOX
| W64
, 4, TXS_MCX_CONTROL
, 0 }, /* Uncore */
{ TBOX
| W64
, 5, TXS_MCX_CONTROL
, 0 }, /* Uncore */
{ TBOX
| W64
, 6, TXS_MCX_CONTROL
, 0 }, /* Uncore */
{ TBOX
| W64
, 7, TXS_MCX_CONTROL
, 0 }, /* Uncore */
static RegRec susp_msr
[] = { /* Used in file */
{ GMSR
, 0, MSR_IA32_MCG_STATUS
, 0 }, /* Uncore, kernel */
static RegRec susp_lcl_msr
[4 * CONFIG_NR_CPUS
] = { /* Used in file */
{ LMSR
, 0, MSR_IA32_MCx_CTL(0), 0 }, /* Core, kernel */
{ LMSR
, 0, MSR_IA32_MCx_CTL(1), 0 }, /* Core, kernel */
{ LMSR
, 0, MSR_IA32_MCx_CTL(2), 0 }, /* Core, kernel */
{ LMSR
, 0, MSR_IA32_MCG_CTL
, 0 }, /* kernel */
* The remaining entries is setup/replicated by pm_init()
r
->reg
= mr_sbox_rq(0, r
->ofs
);
r
->reg
= (uint64_t) mr_sbox_rl(0, r
->ofs
);
r
->reg
= mr_dbox_rq(r
->num
, r
->ofs
);
r
->reg
= (uint64_t) mr_dbox_rl(r
->num
, r
->ofs
);
r
->reg
= mr_gbox_rq(r
->num
, r
->ofs
);
r
->reg
= (uint64_t) mr_gbox_rl(r
->num
, r
->ofs
);
r
->reg
= mr_tbox_rq(r
->num
, r
->ofs
);
r
->reg
= (uint64_t) mr_tbox_rl(r
->num
, r
->ofs
);
printk("mmio_rd: box %d, idx %3d, ofs %04x -> %llx\n",
r
->box
& 0xf, r
->num
, r
->ofs
, r
->reg
);
mr_sbox_wq(0, r
->ofs
, r
->reg
);
mr_sbox_wl(0, r
->ofs
, (uint32_t) r
->reg
);
mr_dbox_wq(r
->num
, r
->ofs
, r
->reg
);
mr_dbox_wl(r
->num
, r
->ofs
, (uint32_t) r
->reg
);
mr_gbox_wq(r
->num
, r
->ofs
, r
->reg
);
mr_gbox_wl(r
->num
, r
->ofs
, (uint32_t) r
->reg
);
mr_tbox_wq(r
->num
, r
->ofs
, r
->reg
);
mr_tbox_wl(r
->num
, r
->ofs
, (uint32_t) r
->reg
);
printk("mmio_wr: box %d, idx %3d, ofs %04x <- %llx\n",
r
->box
& 0xf, r
->num
, r
->ofs
, r
->reg
);
rdmsr_on_cpu(r
->num
, r
->ofs
, &lo
, &hi
);
r
->reg
= ((uint64_t) hi
) << 32 | (uint64_t) lo
;
printk("msr_rd: box %d, idx %3d, ofs %04x -> %llx\n",
r
->box
& 0xf, r
->num
, r
->ofs
, r
->reg
);
lo
= r
->reg
& 0xffffffff;
wrmsr_on_cpu(r
->num
, r
->ofs
, lo
, hi
);
printk("msr_wr: box %d, idx %3d, ofs %04x <- %llx\n",
r
->box
& 0xf, r
->num
, r
->ofs
, r
->reg
);
#endif /* RAS_SAVE_MSR */
* Preserve all HW registers that will be lost in
* deep sleep states. This will be SBOX registers
* above offset 0x7000 and all other BOX registers.
* Save SBOX_MCA_INT_EN first and clear it.
* No more uncore MCAs will get through.
one_mmio_rd(susp_mmio
+ 0);
mr_sbox_wl(0, SBOX_MCA_INT_EN
, 0);
* Save remaining BOX MMIOs
for(i
= 1; i
< ARRAY_SIZE(susp_mmio
); i
++)
one_mmio_rd(susp_mmio
+ i
);
* Save global MSRs and set MCIP
* No new exceptions will be asserted
for(i
= 0; i
< ARRAY_SIZE(susp_msr
); i
++)
one_msr_rd(susp_msr
+ i
);
wrmsr(MSR_IA32_MCG_STATUS
, MCG_STATUS_MCIP
, 0);
for(i
= 0; i
< ARRAY_SIZE(susp_lcl_msr
); i
++)
one_msr_rd(susp_lcl_msr
+ i
);
* Undo side effects of a suspend call.
* Nothing to do unless we turned MC handlers off.
* Restore SBOX_MCA_INT_EN to unblock uncore MCs
* Invalidate all other saved MMIO registers.
one_mmio_wr(susp_mmio
+ 0);
for(i
= 1; i
< ARRAY_SIZE(susp_mmio
); i
++)
susp_mmio
[i
].box
&= ~VLD
;
* Restore IA32_MCG_STATUS to unblock core MCs
* Invalidate all other saved MSR registers.
one_msr_wr(susp_msr
+ 0);
for(i
= 1; i
< ARRAY_SIZE(susp_msr
); i
++)
for(i
= 0; i
< ARRAY_SIZE(susp_lcl_msr
); i
++)
susp_lcl_msr
[i
].box
&= ~VLD
;
* Restore all HW registers that we use.
* Clear uncore MCA banks (just in case)
if (susp_mmio
[0].box
& VLD
)
* Restore all BOX MMIOs but SBOX_MCA_INT_EN
for(i
= 1; i
< ARRAY_SIZE(susp_mmio
); i
++)
one_mmio_wr(susp_mmio
+ i
);
* Then restore SBOX_MCA_INT_EN to enable uncore MCAs
one_mmio_wr(susp_mmio
+ 0);
* Restore all global MSRs but IA32_MCG_STATUS
for(i
= 1; i
< ARRAY_SIZE(susp_msr
); i
++)
one_msr_wr(susp_msr
+ i
);
* Then restore IA32_MCG_STATUS to allow core MCAs
one_msr_wr(susp_msr
+ 0);
* Restore all per-cpu MSRs
for(i
= 0; i
< ARRAY_SIZE(susp_lcl_msr
); i
++)
one_msr_wr(susp_lcl_msr
+ i
);
* Callback from PM notifier chain.
* TBD: should we test for odd state transitions and recursions?
mr_pm_callback(struct notifier_block
*nb
, unsigned long event
, void *msg
)
case MICPM_DEVEVENT_SUSPEND
:
case MICPM_DEVEVENT_RESUME
:
case MICPM_DEVEVENT_FAIL_SUSPEND
:
* Ignore whatever else is sent this way
** The PM module loads before RAS, so we must setup
** the API to support power management, i.e register.
** - Notification when MT changes certain variables.
** Provided by a call-out list that the PM sets
** The PM module can use micras_mt_call() for access.
** Since PM loads first, this function needs to
** be passed at registration time.
** - list of core voltages (for CVOLT query).
** We pass a pointer to the voltage list and the
** voltage list counter to PM module, who will
** fill in the actual values (not available until
** core-freq driver loads).
** - list of core frequencies (for CFREQ query).
** Same solution as for CVOLT.
** - Notifications for throttle state changes.
** - Power management notifications for suspend/resume.
** Note: can one notifier block be inserted in multiple
** chains? Its assume not, which require two blocks
** both pointing to the same local function.
extern struct mr_rsp_freq freq
;
extern struct mr_rsp_volt volt
;
struct micpm_params pm_reg
; /* Our data for PM */
struct micpm_callbacks pm_cb
; /* PM data for us */
extern void micpm_device_register(struct notifier_block
*n
);
extern void micpm_device_unregister(struct notifier_block
*n
);
extern void micpm_atomic_notifier_register(struct notifier_block
*n
);
extern void micpm_atomic_notifier_unregister(struct notifier_block
*n
);
static struct notifier_block ras_deviceevent
= {
.notifier_call
= mr_pm_callback
,
static struct notifier_block ras_throttle_event_ns
= {
.notifier_call
= mr_pm_throttle_callback
,
static struct notifier_block ras_throttle_event
= {
.notifier_call
= mr_pm_throttle_callback
,
* Setup PM callbacks and SCIF handler.
pm_mt_call(uint16_t cmd
, void * buf
)
err
= micras_mt_call(cmd
, buf
);
extern int mr_smc_rd(uint8_t, uint32_t *);
* Preset MCA bank MSR register descriptions
*TBD: We have to use IPIs to read MSRs, which will wake
* up cores at sleep when this function is called.
* PM module may not like this at all.
for(i
= 1; i
< nr_cpu_ids
; i
++) {
susp_lcl_msr
[j
] = susp_lcl_msr
[0];
susp_lcl_msr
[j
+ 1] = susp_lcl_msr
[1];
susp_lcl_msr
[j
+ 2] = susp_lcl_msr
[2];
susp_lcl_msr
[j
+ 3] = susp_lcl_msr
[3];
susp_lcl_msr
[j
+ 1].num
= i
;
susp_lcl_msr
[j
+ 2].num
= i
;
susp_lcl_msr
[j
+ 3].num
= i
;
* Get temperature where power throttle becomes thermal throttle
mr_smc_rd(0x4c, &ttl_tcrit
);
* Register with the MIC Power Management driver.
pm_reg
.volt_lst
= volt
.supt
;
pm_reg
.volt_len
= &volt
.slen
;
pm_reg
.volt_siz
= ARRAY_SIZE(volt
.supt
);
pm_reg
.freq_lst
= freq
.supt
;
pm_reg
.freq_len
= &freq
.slen
;
pm_reg
.freq_siz
= ARRAY_SIZE(freq
.supt
);
pm_reg
.mt_call
= pm_mt_call
;
pm_reg
.mt_ttl
= mr_throttle
;
if (micpm_ras_register(&pm_cb
, &pm_reg
))
* Get into the PM notifier lists
* MicPm reports events in 2 chains, one atomic and one
* blocking. Our callback will not block!
micpm_atomic_notifier_register(&ras_throttle_event_ns
);
micpm_notifier_register(&ras_throttle_event
);
if (boot_cpu_data
.x86_mask
== KNC_C_STEP
)
micpm_device_register(&ras_deviceevent
);
printk("RAS.pm: init complete\n");
printk("RAS.pm: init failed\n");
* Cleanup for module unload.
* Clear/restore hooks in the native MCA handler.
* Get off the PM notifier list
micpm_atomic_notifier_unregister(&ras_throttle_event_ns
);
micpm_notifier_unregister(&ras_throttle_event
);
if (boot_cpu_data
.x86_mask
== KNC_C_STEP
)
micpm_device_unregister(&ras_deviceevent
);
* De-register with the PM module.
* Wait for an calls to module to finish.
while(atomic_read(&pm_entry
))
printk("RAS.pm: exit complete\n");