Commit | Line | Data |
---|---|---|
800f879a AT |
1 | /* |
2 | * Copyright 2010-2017 Intel Corporation. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License, version 2, | |
6 | * as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, | |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * Disclaimer: The codes contained in these modules may be specific to | |
14 | * the Intel Software Development Platform codenamed Knights Ferry, | |
15 | * and the Intel product codenamed Knights Corner, and are not backward | |
16 | * compatible with other Intel products. Additionally, Intel will NOT | |
17 | * support the codes or instruction set in future products. | |
18 | * | |
19 | * Intel offers no warranty of any kind regarding the code. This code is | |
20 | * licensed on an "AS IS" basis and Intel is not obligated to provide | |
21 | * any support, assistance, installation, training, or other services | |
22 | * of any kind. Intel is also not obligated to provide any updates, | |
23 | * enhancements or extensions. Intel specifically disclaims any warranty | |
24 | * of merchantability, non-infringement, fitness for any particular | |
25 | * purpose, and any other warranty. | |
26 | * | |
27 | * Further, Intel disclaims all liability of any kind, including but | |
28 | * not limited to liability for infringement of any proprietary rights, | |
29 | * relating to the use of the code, even if Intel is notified of the | |
30 | * possibility of such liability. Except as expressly stated in an Intel | |
31 | * license agreement provided with this code and agreed upon with Intel, | |
32 | * no license, express or implied, by estoppel or otherwise, to any | |
33 | * intellectual property rights is granted herein. | |
34 | */ | |
35 | ||
36 | /* | |
37 | * RAS PM interface | |
38 | * | |
39 | * Contains code to handle interaction with the PM driver. | |
40 | * This includes the initial upload of core voltages and | |
41 | * frequencies, handling of 'turbo' mode, and accounting | |
42 | * for and reporting of card throttles. | |
43 | * This really is for KnC only. | |
44 | */ | |
45 | ||
46 | ||
47 | #include <linux/types.h> | |
48 | #include <linux/errno.h> | |
49 | #include <linux/init.h> | |
50 | #include <linux/kernel.h> | |
51 | #include <linux/module.h> | |
52 | #include <linux/moduleparam.h> | |
53 | #include <linux/device.h> | |
54 | #include <linux/sysfs.h> | |
55 | #include <linux/workqueue.h> | |
56 | #include <linux/sched.h> | |
57 | #include <linux/wait.h> | |
58 | #include <linux/bitmap.h> | |
59 | #include <linux/cpumask.h> | |
60 | #include <linux/io.h> | |
61 | #include <linux/cred.h> | |
62 | #include <asm/msr.h> | |
63 | #include <asm/mce.h> | |
64 | #include <asm/apic.h> | |
65 | #include <asm/mic/mic_common.h> | |
66 | #include <asm/mic/mic_knc/micsboxdefine.h> | |
67 | #include <scif.h> | |
68 | #include "micras.h" | |
69 | #include "monahan.h" | |
70 | #include <asm/mic/micpm_device.h> | |
71 | ||
72 | #if USE_PM | |
73 | ||
74 | static atomic_t pm_entry; /* Active calls from PM */ | |
75 | ||
76 | ||
77 | /* | |
78 | * Local variables to keep track of throttle states | |
79 | * | |
80 | * onoff Set to 1 if throttling is in effect, otherwise 0 | |
81 | * count Count of complete throttles (not counting current). | |
82 | * time Time spent in complete throttles | |
83 | * start Time when current throttle started (or 0) | |
84 | * | |
85 | * Units of time is measured in jiffies and converted to mSecs | |
86 | * at the end of a throttle period. Jiffies are lower resolution | |
87 | * than mSec. If a throttle starts and ends within same jiffy, | |
88 | * a standard penalty of 1/2 jiffy gets added. | |
89 | * | |
90 | *TBD: perhaps it's better simply to add 1/2 jiffy to every throttle | |
91 | * period to compensate for rounding down errors. Would be fair | |
92 | * if average throttle period is more than 1 jiffy long. | |
93 | * | |
94 | *TBD: Using atomics may be overkill. Calls from the RAS MT thread | |
95 | * will be serialized (guaranteed), i.e. the report routine needs | |
96 | * not to care about re-entrancy. | |
97 | */ | |
98 | ||
99 | static atomic_t tmp_onoff; | |
100 | static atomic_t tmp_count; | |
101 | static atomic_long_t tmp_time; | |
102 | static atomic_long_t tmp_start; | |
103 | ||
104 | static atomic_t pwr_onoff; | |
105 | static atomic_t pwr_count; | |
106 | static atomic_long_t pwr_time; | |
107 | static atomic_long_t pwr_start; | |
108 | ||
109 | static atomic_t alrt_onoff; | |
110 | static atomic_t alrt_count; | |
111 | static atomic_long_t alrt_time; | |
112 | static atomic_long_t alrt_start; | |
113 | ||
114 | ||
115 | static void | |
116 | mr_pwr_enter(void) | |
117 | { | |
118 | if (atomic_xchg(&pwr_onoff, 1)) | |
119 | return; | |
120 | ||
121 | atomic_long_set(&pwr_start, jiffies); | |
122 | } | |
123 | ||
124 | static void | |
125 | mr_pwr_leave(void) { | |
126 | unsigned long then; | |
127 | ||
128 | if (! atomic_xchg(&pwr_onoff, 0)) | |
129 | return; | |
130 | ||
131 | then = atomic_long_xchg(&pwr_start, 0); | |
132 | atomic_inc(&pwr_count); | |
133 | ||
134 | if (jiffies == then) | |
135 | atomic_long_add(jiffies_to_msecs(1) / 2, &pwr_time); | |
136 | else | |
137 | atomic_long_add(jiffies_to_msecs(jiffies - then), &pwr_time); | |
138 | } | |
139 | ||
140 | ||
141 | static void | |
142 | mr_tmp_enter(void) | |
143 | { | |
144 | if (atomic_xchg(&tmp_onoff, 1)) | |
145 | return; | |
146 | ||
147 | atomic_long_set(&tmp_start, jiffies); | |
148 | } | |
149 | ||
150 | static void | |
151 | mr_tmp_leave(void) | |
152 | { | |
153 | unsigned long then; | |
154 | ||
155 | if (! atomic_xchg(&tmp_onoff, 0)) | |
156 | return; | |
157 | ||
158 | then = atomic_long_xchg(&tmp_start, 0); | |
159 | atomic_inc(&tmp_count); | |
160 | if (jiffies == then) | |
161 | atomic_long_add(jiffies_to_msecs(1) / 2, &tmp_time); | |
162 | else | |
163 | atomic_long_add(jiffies_to_msecs(jiffies - then), &tmp_time); | |
164 | } | |
165 | ||
166 | ||
167 | static void | |
168 | mr_alrt_enter(void) | |
169 | { | |
170 | if (atomic_xchg(&alrt_onoff, 1)) | |
171 | return; | |
172 | ||
173 | atomic_long_set(&alrt_start, jiffies); | |
174 | } | |
175 | ||
176 | static void | |
177 | mr_alrt_leave(void) | |
178 | { | |
179 | unsigned long then; | |
180 | ||
181 | if (! atomic_xchg(&alrt_onoff, 0)) | |
182 | return; | |
183 | ||
184 | then = atomic_long_xchg(&alrt_start, 0); | |
185 | atomic_inc(&alrt_count); | |
186 | if (jiffies == then) | |
187 | atomic_long_add(jiffies_to_msecs(1) / 2, &alrt_time); | |
188 | else | |
189 | atomic_long_add(jiffies_to_msecs(jiffies - then), &alrt_time); | |
190 | } | |
191 | ||
192 | ||
193 | ||
194 | /* | |
195 | * Report current throttle state(s) to MT. | |
196 | * Simple copy of local variables, except for the time | |
197 | * measurement, where current throttle (if any) is included. | |
198 | * Don't want a lock to gate access to the local variables, | |
199 | * so the atomics needs to be read in the correct order. | |
200 | * First throttle state, then adder if throttle is in | |
201 | * progress, then counters. If PM enters or leave throttle | |
202 | * while reading stats, the worst is that time for the | |
203 | * current trottle is not included until next read. | |
204 | */ | |
205 | ||
206 | int | |
207 | mr_pm_ttl(struct mr_rsp_ttl * rsp) | |
208 | { | |
209 | unsigned long then; | |
210 | ||
211 | rsp->power.since = 0; | |
212 | rsp->power.active = (uint8_t) atomic_read(&pwr_onoff); | |
213 | if (rsp->power.active) { | |
214 | then = atomic_long_read(&pwr_start); | |
215 | if (then) | |
216 | rsp->power.since = jiffies_to_msecs(jiffies - then); | |
217 | } | |
218 | rsp->power.count = atomic_read(&pwr_count); | |
219 | rsp->power.time = atomic_long_read(&pwr_time); | |
220 | ||
221 | rsp->thermal.since = 0; | |
222 | rsp->thermal.active = (uint8_t) atomic_read(&tmp_onoff); | |
223 | if (rsp->thermal.active) { | |
224 | then = atomic_long_read(&tmp_start); | |
225 | if (then) | |
226 | rsp->thermal.since = jiffies_to_msecs(jiffies - then); | |
227 | } | |
228 | rsp->thermal.count = atomic_read(&tmp_count); | |
229 | rsp->thermal.time = atomic_long_read(&tmp_time); | |
230 | ||
231 | rsp->alert.since = 0; | |
232 | rsp->alert.active = (uint8_t) atomic_read(&alrt_onoff); | |
233 | if (rsp->alert.active) { | |
234 | then = atomic_long_read(&alrt_start); | |
235 | if (then) | |
236 | rsp->alert.since = jiffies_to_msecs(jiffies - then); | |
237 | } | |
238 | rsp->alert.count = atomic_read(&alrt_count); | |
239 | rsp->alert.time = atomic_long_read(&alrt_time); | |
240 | ||
241 | return 0; | |
242 | } | |
243 | ||
244 | ||
245 | /* | |
246 | * Throttle signaling function (call from PM) | |
247 | */ | |
248 | ||
249 | static int ttl_tcrit; | |
250 | ||
251 | void | |
252 | mr_throttle(int which, int state) | |
253 | { | |
254 | struct ttl_info ttl; | |
255 | uint32_t tmp; | |
256 | ||
257 | atomic_inc(&pm_entry); | |
258 | ||
259 | tmp = mr_sbox_rl(0, SBOX_THERMAL_STATUS_2); | |
260 | ttl.die = GET_BITS(19, 10, tmp); | |
261 | ||
262 | /* | |
263 | * PM is weird in the destinction of thermal and power throttle. | |
264 | * Power below PLIM should be quiet. Power between PLim1 and PLim0 | |
265 | * results in TTL_POWER events. Power above PLim0 results in both | |
266 | * TTL_POWER and TTL_THERMAL events, _even_ if temperature is well | |
267 | * below Tcrit. We handle this by maintaining 3 throttle related | |
268 | * event types: thermal throttles, power throttles and power alert. | |
269 | * The power alert is flaggend on entry as TTL_POWER, no problems. | |
270 | * The two throttles both come in as TTL_THERMAL, so we use current | |
271 | * die temperature to determine whether it was a thermal threshold | |
272 | * or the power limit that was exceeded. Point is power throttles | |
273 | * arriving while temperature is above Tcrit _will_ be counted as | |
274 | * thermal throttles, period. | |
275 | */ | |
276 | ttl.upd = 0; | |
277 | switch(which) { | |
278 | case TTL_POWER: | |
279 | (state == TTL_OFF) ? mr_alrt_leave() : mr_alrt_enter(); | |
280 | ttl.upd |= PM_ALRT_TTL_CHG; | |
281 | ttl.upd |= atomic_read(&alrt_onoff) ? PM_ALRT_TTL : 0; | |
282 | break; | |
283 | ||
284 | case TTL_THERMAL: | |
285 | #if 1 | |
286 | /* | |
287 | * Careful here: may get throttle ON while die > tcrit | |
288 | * and select thermal throttle correctly and then get | |
289 | * the corresponding throttle OFF when die has fallen | |
290 | * below tcrit in which case we must de-assert thermal | |
291 | * trottle. | |
292 | * As a shortcut, we deassert both throttles if the | |
293 | * GPU_HOT signal gets de-asserted (which is correct). | |
294 | */ | |
295 | if (state == TTL_OFF) { | |
296 | if (atomic_read(&pwr_onoff)) | |
297 | ttl.upd |= PM_PWR_TTL_CHG; | |
298 | if (atomic_read(&tmp_onoff)) | |
299 | ttl.upd |= PM_TRM_TTL_CHG; | |
300 | mr_pwr_leave(); | |
301 | mr_tmp_leave(); | |
302 | } | |
303 | else { | |
304 | if (ttl_tcrit && ttl.die < ttl_tcrit) { | |
305 | if (! atomic_read(&pwr_onoff)) | |
306 | ttl.upd |= (PM_PWR_TTL_CHG | PM_PWR_TTL); | |
307 | mr_pwr_enter(); | |
308 | } | |
309 | else { | |
310 | if (! atomic_read(&tmp_onoff)) | |
311 | ttl.upd |= (PM_TRM_TTL_CHG | PM_TRM_TTL); | |
312 | mr_tmp_enter(); | |
313 | } | |
314 | } | |
315 | #else | |
316 | if (ttl_tcrit && ttl.die < ttl_tcrit) { | |
317 | (state == TTL_OFF) ? mr_pwr_leave() : mr_pwr_enter(); | |
318 | ttl.upd |= PM_PWR_TTL_CHG; | |
319 | ttl.upd |= atomic_read(&pwr_onoff) ? PM_PWR_TTL : 0; | |
320 | } | |
321 | else { | |
322 | (state == TTL_OFF) ? mr_tmp_leave() : mr_tmp_enter(); | |
323 | ttl.upd |= PM_TRM_TTL_CHG; | |
324 | ttl.upd |= atomic_read(&tmp_onoff) ? PM_TRM_TTL : 0; | |
325 | } | |
326 | #endif | |
327 | break; | |
328 | } | |
329 | ||
330 | micras_ttl_send(&ttl); | |
331 | ||
332 | #if 0 | |
333 | printk("ttl - args: which %d, state %d\n", which, state); | |
334 | ||
335 | printk("ttl - therm: on %d, count %d, time %ld, start %ld\n", | |
336 | atomic_read(&tmp_onoff), atomic_read(&tmp_count), | |
337 | atomic_long_read(&tmp_time), atomic_long_read(&tmp_start)); | |
338 | ||
339 | printk("ttl - power: on %d, count %d, time %ld, start %ld\n", | |
340 | atomic_read(&pwr_onoff), atomic_read(&pwr_count), | |
341 | atomic_long_read(&pwr_time), atomic_long_read(&pwr_start)); | |
342 | ||
343 | printk("ttl - alert: on %d, count %d, time %ld, start %ld\n", | |
344 | atomic_read(&alrt_onoff), atomic_read(&alrt_count), | |
345 | atomic_long_read(&alrt_time), atomic_long_read(&alrt_start)); | |
346 | #endif | |
347 | ||
348 | atomic_dec(&pm_entry); | |
349 | } | |
350 | ||
351 | ||
352 | /* | |
353 | * Throttle signaling function (call from notifier chain) | |
354 | * | |
355 | * TBD: should we test for odd state transitions and recursions? | |
356 | */ | |
357 | ||
358 | static int | |
359 | mr_pm_throttle_callback(struct notifier_block *nb, unsigned long event, void *msg) | |
360 | { | |
361 | atomic_inc(&pm_entry); | |
362 | ||
363 | switch(event) { | |
364 | ||
365 | case EVENT_PROCHOT_ON: | |
366 | mr_throttle(TTL_THERMAL, TTL_ON); | |
367 | break; | |
368 | ||
369 | case EVENT_PROCHOT_OFF: | |
370 | mr_throttle(TTL_THERMAL, TTL_OFF); | |
371 | break; | |
372 | ||
373 | case EVENT_PWR_ALERT_ON: | |
374 | mr_throttle(TTL_POWER, TTL_ON); | |
375 | break; | |
376 | ||
377 | case EVENT_PWR_ALERT_OFF: | |
378 | mr_throttle(TTL_POWER, TTL_OFF); | |
379 | break; | |
380 | ||
381 | default: | |
382 | /* | |
383 | * Ignore whatever else is sent this way | |
384 | */ | |
385 | break; | |
386 | } | |
387 | ||
388 | atomic_dec(&pm_entry); | |
389 | return 0; | |
390 | } | |
391 | ||
392 | ||
393 | ||
394 | ||
395 | /* | |
396 | ** | |
397 | ** Power management routines | |
398 | ** | |
399 | ** one_mmio_rd Read one MMIO register into memory safe | |
400 | ** one_mmio_wr Write one MMIO register from memory safe | |
401 | ** | |
402 | ** one_msr_rd Read one MSR register into memory safe | |
403 | ** one_msr_wr Write one MSR register from memory safe | |
404 | ** | |
405 | ** mc_suspend Prepare for suspend, preserve CSRs to safe | |
406 | ** mc_suspend_cancel Suspend canceled, restore operating mode | |
407 | ** mc_resume Recover from suspend, restore CSRs from safe | |
408 | ** | |
409 | ** For now this stores all registers that are used by this module. | |
410 | ** In reality, only those registers on power planes turned off in | |
411 | ** deep sleep states needs to be stored, but at this point it is | |
412 | ** not known which registers are in that group. This is a table | |
413 | ** driven mechanism that _only_ handles RAS related registers. | |
414 | ** | |
415 | **TBD: Turn off MC handlers while in suspend? | |
416 | ** Both pro's and con's on this one, such as | |
417 | ** + Disabling uncore is easy, just clear INT_EN | |
418 | ** + prevents MC to interfere with PM state transitions | |
419 | ** - can hide corruption due to UC errors | |
420 | ** - requires a lot of IPIs to shut down core MC handling | |
421 | ** + there's nobody to handle MCs when cores are asleep. | |
422 | ** ? can events hide in *BOX banks during suspend/resume | |
423 | ** and fire when restoring the INT_EN register? | |
424 | ** - Disabling core is not that easy (from a module). | |
425 | ** Enabling core MCEs requires setting flag X86_CR4_MCE | |
426 | ** in CR4 on every core _and_ writing ~0 to MSR IA32_MCG_CAP | |
427 | ** on every CPU. Probably better to let per-CPU routines | |
428 | ** like mce_suspend() and mce_resume() handle it, with | |
429 | ** some care because we'd want to save all CTLs before | |
430 | ** mce_suspend() runs and restore them after mce_resume(). | |
431 | ** Problem is how to get at these functions; they are not | |
432 | ** exported and seems not to be hooked into the kernel's PM | |
433 | ** call chains. Perhaps sysclass abstraction ties into PM. | |
434 | ** Even so, who's to invoke it and how? | |
435 | */ | |
436 | ||
437 | #define SAVE_BLOCK_MCA 1 /* Disable MC handling in suspend */ | |
438 | #define RAS_SAVE_MSR 1 /* Include global MSRs in suspend */ | |
439 | #define RAS_SAVE_CPU_MSR 0 /* Include per-CPU MSRs in suspend */ | |
440 | ||
441 | #define SBOX 1 /* SBOX register (index 0) */ | |
442 | #define DBOX 2 /* DBOX register (index 0..1) */ | |
443 | #define GBOX 3 /* GBOX register (index 0..7) */ | |
444 | #define TBOX 4 /* TBOX register (index 0..7) */ | |
445 | #define GMSR 5 /* Global MSR (index 0) */ | |
446 | #define LMSR 6 /* Per-CPU MSR (index 0..CONFIG_NR_CPUS-1) */ | |
447 | ||
448 | #define W64 (1 << 6) /* 64 bit MMIO register (32 bit default) */ | |
449 | #define VLD (1 << 7) /* Register value valid, can be restored */ | |
450 | ||
451 | typedef struct _regrec { | |
452 | uint8_t box; /* Box type + width bit + valid bit */ | |
453 | uint8_t num; /* Box index (or 0) */ | |
454 | uint16_t ofs; /* MMIO byte offset / MSR number */ | |
455 | uint64_t reg; /* Register value */ | |
456 | } RegRec; | |
457 | ||
458 | ||
459 | /* | |
460 | * Rumor has it that SBOX CSRs below 0x7000 will survive deep sleep | |
461 | * Think it's safer to save/restore CSRs that RAS writes to anyways. | |
462 | * We'll leave out a bunch of RO CSRs, most of which are HW status. | |
463 | * SCRATCH<n> CSRs are above 0x7000 and needs to be preserved. | |
464 | * | |
465 | *TBD: Somebody else to preserve scratch CSRs not used by RAS? | |
466 | * For now I'll save and restore all of them. | |
467 | */ | |
468 | ||
469 | static RegRec susp_mmio[] = { /* Used in file */ | |
470 | { SBOX, 0, SBOX_MCA_INT_EN, 0 }, /* Uncore, must be 1st */ | |
471 | { SBOX, 0, SBOX_SCRATCH0, 0 }, /* - */ | |
472 | { SBOX, 0, SBOX_SCRATCH1, 0 }, /* - */ | |
473 | { SBOX, 0, SBOX_SCRATCH2, 0 }, /* - */ | |
474 | { SBOX, 0, SBOX_SCRATCH3, 0 }, /* - */ | |
475 | { SBOX, 0, SBOX_SCRATCH4, 0 }, /* Common, knc, */ | |
476 | { SBOX, 0, SBOX_SCRATCH5, 0 }, /* - */ | |
477 | { SBOX, 0, SBOX_SCRATCH6, 0 }, /* - */ | |
478 | { SBOX, 0, SBOX_SCRATCH7, 0 }, /* Knc, knf */ | |
479 | { SBOX, 0, SBOX_SCRATCH8, 0 }, /* - */ | |
480 | { SBOX, 0, SBOX_SCRATCH9, 0 }, /* Common, knc, knf */ | |
481 | { SBOX, 0, SBOX_SCRATCH10, 0 }, /* - */ | |
482 | { SBOX, 0, SBOX_SCRATCH11, 0 }, /* - */ | |
483 | { SBOX, 0, SBOX_SCRATCH12, 0 }, /* - */ | |
484 | { SBOX, 0, SBOX_SCRATCH13, 0 }, /* Common */ | |
485 | { SBOX, 0, SBOX_SCRATCH14, 0 }, /* - */ | |
486 | { SBOX, 0, SBOX_SCRATCH15, 0 }, /* - */ | |
487 | // { SBOX, 0, SBOX_COMPONENT_ID, 0 }, /* Knc */ | |
488 | // { SBOX, 0, SBOX_SVIDCONTROL, 0 }, /* Knc */ | |
489 | // { SBOX, 0, SBOX_PCIE_PCI_SUBSYSTEM, 0 }, /* Common */ | |
490 | // { SBOX, 0, SBOX_PCIE_VENDOR_ID_DEVICE_ID, 0 }, /* Common */ | |
491 | // { SBOX, 0, SBOX_PCIE_PCI_REVISION_ID_AND_C_0X8, 0 },/* Common */ | |
492 | { SBOX, 0, SBOX_OC_I2C_ICR + ICR_OFFSET, 0 }, /* Elog */ | |
493 | { SBOX, 0, SBOX_OC_I2C_ICR + ISR_OFFSET, 0 }, /* Elog */ | |
494 | { SBOX, 0, SBOX_OC_I2C_ICR + ISAR_OFFSET, 0 }, /* Elog */ | |
495 | { SBOX, 0, SBOX_OC_I2C_ICR + IDBR_OFFSET, 0 }, /* Elog */ | |
496 | // { SBOX, 0, SBOX_OC_I2C_ICR + IBMR_OFFSET, 0 }, /* Elog */ | |
497 | // { SBOX, 0, SBOX_COREVOLT, 0 }, /* Knc, knf */ | |
498 | // { SBOX, 0, SBOX_COREFREQ, 0 }, /* Knc, knf */ | |
499 | // { SBOX, 0, SBOX_MEMVOLT, 0 }, /* Knc, knf */ | |
500 | // { SBOX, 0, SBOX_MEMORYFREQ, 0 }, /* Knc, knf */ | |
501 | // { SBOX, 0, SBOX_CURRENTRATIO, 0 }, /* Knc */ | |
502 | // { SBOX, 0, SBOX_BOARD_VOLTAGE_SENSE, 0 }, /* Knc, knf */ | |
503 | // { SBOX, 0, SBOX_THERMAL_STATUS, 0 }, /* Knc, knf */ | |
504 | // { SBOX, 0, SBOX_BOARD_TEMP1, 0 }, /* Knc, knf */ | |
505 | // { SBOX, 0, SBOX_BOARD_TEMP2, 0 }, /* Knc, knf */ | |
506 | // { SBOX, 0, SBOX_CURRENT_DIE_TEMP0, 0 }, /* Knc, knf */ | |
507 | // { SBOX, 0, SBOX_CURRENT_DIE_TEMP1, 0 }, /* Knc, knf */ | |
508 | // { SBOX, 0, SBOX_CURRENT_DIE_TEMP2, 0 }, /* Knc, knf */ | |
509 | // { SBOX, 0, SBOX_MAX_DIE_TEMP0, 0 }, /* Knc, knf */ | |
510 | // { SBOX, 0, SBOX_MAX_DIE_TEMP1, 0 }, /* Knc, knf */ | |
511 | // { SBOX, 0, SBOX_MAX_DIE_TEMP2, 0 }, /* Knc, knf */ | |
512 | // { SBOX, 0, SBOX_STATUS_FAN1, 0 }, /* Knc, knf */ | |
513 | // { SBOX, 0, SBOX_STATUS_FAN2, 0 }, /* Knc, knf */ | |
514 | // { SBOX, 0, SBOX_SPEED_OVERRIDE_FAN, 0 }, /* Knc, knf */ | |
515 | { SBOX, 0, SBOX_MCA_INT_STAT, 0 }, /* Uncore */ | |
516 | // { SBOX, 0, SBOX_APICRT16, 0 }, /* Uncore */ | |
517 | { SBOX, 0, SBOX_MCX_CTL_LO, 0 }, /* Uncore */ | |
518 | { DBOX, 0, DBOX_MC2_CTL, 0 }, /* Uncore */ | |
519 | #ifdef CONFIG_MK1OM | |
520 | { DBOX, 1, DBOX_MC2_CTL, 0 }, /* Uncore */ | |
521 | #endif | |
522 | { GBOX | W64, 0, GBOX_FBOX_MCA_CTL_LO, 0 }, /* Uncore */ | |
523 | { GBOX | W64, 1, GBOX_FBOX_MCA_CTL_LO, 0 }, /* Uncore */ | |
524 | { GBOX | W64, 2, GBOX_FBOX_MCA_CTL_LO, 0 }, /* Uncore */ | |
525 | { GBOX | W64, 3, GBOX_FBOX_MCA_CTL_LO, 0 }, /* Uncore */ | |
526 | #ifdef CONFIG_MK1OM | |
527 | { GBOX | W64, 4, GBOX_FBOX_MCA_CTL_LO, 0 }, /* Uncore */ | |
528 | { GBOX | W64, 5, GBOX_FBOX_MCA_CTL_LO, 0 }, /* Uncore */ | |
529 | { GBOX | W64, 6, GBOX_FBOX_MCA_CTL_LO, 0 }, /* Uncore */ | |
530 | { GBOX | W64, 7, GBOX_FBOX_MCA_CTL_LO, 0 }, /* Uncore */ | |
531 | #endif | |
532 | #ifdef CONFIG_MK1OM | |
533 | { TBOX | W64, 0, TXS_MCX_CONTROL, 0 }, /* Uncore */ | |
534 | { TBOX | W64, 1, TXS_MCX_CONTROL, 0 }, /* Uncore */ | |
535 | { TBOX | W64, 2, TXS_MCX_CONTROL, 0 }, /* Uncore */ | |
536 | { TBOX | W64, 3, TXS_MCX_CONTROL, 0 }, /* Uncore */ | |
537 | { TBOX | W64, 4, TXS_MCX_CONTROL, 0 }, /* Uncore */ | |
538 | { TBOX | W64, 5, TXS_MCX_CONTROL, 0 }, /* Uncore */ | |
539 | { TBOX | W64, 6, TXS_MCX_CONTROL, 0 }, /* Uncore */ | |
540 | { TBOX | W64, 7, TXS_MCX_CONTROL, 0 }, /* Uncore */ | |
541 | #endif | |
542 | }; | |
543 | ||
544 | #if RAS_SAVE_MSR | |
545 | static RegRec susp_msr[] = { /* Used in file */ | |
546 | { GMSR, 0, MSR_IA32_MCG_STATUS, 0 }, /* Uncore, kernel */ | |
547 | }; | |
548 | ||
549 | #if RAS_SAVE_CPU_MSR | |
550 | static RegRec susp_lcl_msr[4 * CONFIG_NR_CPUS] = { /* Used in file */ | |
551 | { LMSR, 0, MSR_IA32_MCx_CTL(0), 0 }, /* Core, kernel */ | |
552 | { LMSR, 0, MSR_IA32_MCx_CTL(1), 0 }, /* Core, kernel */ | |
553 | { LMSR, 0, MSR_IA32_MCx_CTL(2), 0 }, /* Core, kernel */ | |
554 | { LMSR, 0, MSR_IA32_MCG_CTL, 0 }, /* kernel */ | |
555 | /* | |
556 | * The remaining entries is setup/replicated by pm_init() | |
557 | */ | |
558 | }; | |
559 | #endif | |
560 | #endif | |
561 | ||
562 | ||
563 | static void | |
564 | one_mmio_rd(RegRec * r) | |
565 | { | |
566 | switch(r->box & 0xf) { | |
567 | case SBOX: | |
568 | if (r->box & W64) | |
569 | r->reg = mr_sbox_rq(0, r->ofs); | |
570 | else | |
571 | r->reg = (uint64_t) mr_sbox_rl(0, r->ofs); | |
572 | break; | |
573 | case DBOX: | |
574 | if (r->box & W64) | |
575 | r->reg = mr_dbox_rq(r->num, r->ofs); | |
576 | else | |
577 | r->reg = (uint64_t) mr_dbox_rl(r->num, r->ofs); | |
578 | break; | |
579 | case GBOX: | |
580 | if (r->box & W64) | |
581 | r->reg = mr_gbox_rq(r->num, r->ofs); | |
582 | else | |
583 | r->reg = (uint64_t) mr_gbox_rl(r->num, r->ofs); | |
584 | break; | |
585 | case TBOX: | |
586 | if (mr_txs()) { | |
587 | if (r->box & W64) | |
588 | r->reg = mr_tbox_rq(r->num, r->ofs); | |
589 | else | |
590 | r->reg = (uint64_t) mr_tbox_rl(r->num, r->ofs); | |
591 | } | |
592 | break; | |
593 | default: | |
594 | r->box &= ~VLD; | |
595 | return; | |
596 | } | |
597 | r->box |= VLD; | |
598 | ||
599 | #if PM_VERBOSE | |
600 | printk("mmio_rd: box %d, idx %3d, ofs %04x -> %llx\n", | |
601 | r->box & 0xf, r->num, r->ofs, r->reg); | |
602 | #endif | |
603 | } | |
604 | ||
605 | static void | |
606 | one_mmio_wr(RegRec * r) | |
607 | { | |
608 | if (! (r->box & VLD)) | |
609 | return; | |
610 | ||
611 | switch(r->box & 0xf) { | |
612 | case SBOX: | |
613 | if (r->box & W64) | |
614 | mr_sbox_wq(0, r->ofs, r->reg); | |
615 | else | |
616 | mr_sbox_wl(0, r->ofs, (uint32_t) r->reg); | |
617 | break; | |
618 | case DBOX: | |
619 | if (r->box & W64) | |
620 | mr_dbox_wq(r->num, r->ofs, r->reg); | |
621 | else | |
622 | mr_dbox_wl(r->num, r->ofs, (uint32_t) r->reg); | |
623 | break; | |
624 | case GBOX: | |
625 | if (r->box & W64) | |
626 | mr_gbox_wq(r->num, r->ofs, r->reg); | |
627 | else | |
628 | mr_gbox_wl(r->num, r->ofs, (uint32_t) r->reg); | |
629 | break; | |
630 | case TBOX: | |
631 | if (mr_txs()) { | |
632 | if (r->box & W64) | |
633 | mr_tbox_wq(r->num, r->ofs, r->reg); | |
634 | else | |
635 | mr_tbox_wl(r->num, r->ofs, (uint32_t) r->reg); | |
636 | } | |
637 | break; | |
638 | } | |
639 | r->box &= ~VLD; | |
640 | ||
641 | #if PM_VERBOSE | |
642 | printk("mmio_wr: box %d, idx %3d, ofs %04x <- %llx\n", | |
643 | r->box & 0xf, r->num, r->ofs, r->reg); | |
644 | #endif | |
645 | } | |
646 | ||
647 | ||
648 | #if RAS_SAVE_MSR | |
649 | static void | |
650 | one_msr_rd(RegRec * r) | |
651 | { | |
652 | uint32_t hi, lo; | |
653 | ||
654 | switch(r->box & 0xf) { | |
655 | case GMSR: | |
656 | rdmsr(r->ofs, lo, hi); | |
657 | break; | |
658 | #if RAS_SAVE_CPU_MSR | |
659 | case LMSR: | |
660 | rdmsr_on_cpu(r->num, r->ofs, &lo, &hi); | |
661 | break; | |
662 | #endif | |
663 | default: | |
664 | r->box &= ~VLD; | |
665 | return; | |
666 | } | |
667 | r->reg = ((uint64_t) hi) << 32 | (uint64_t) lo; | |
668 | r->box |= VLD; | |
669 | ||
670 | #if PM_VERBOSE | |
671 | printk("msr_rd: box %d, idx %3d, ofs %04x -> %llx\n", | |
672 | r->box & 0xf, r->num, r->ofs, r->reg); | |
673 | #endif | |
674 | } | |
675 | ||
676 | static void | |
677 | one_msr_wr(RegRec * r) | |
678 | { | |
679 | uint32_t hi, lo; | |
680 | ||
681 | if (! (r->box & VLD)) | |
682 | return; | |
683 | ||
684 | hi = r->reg >> 32; | |
685 | lo = r->reg & 0xffffffff; | |
686 | switch(r->box & 0xf) { | |
687 | case GMSR: | |
688 | wrmsr(r->ofs, lo, hi); | |
689 | break; | |
690 | #if RAS_SAVE_CPU_MSR | |
691 | case LMSR: | |
692 | wrmsr_on_cpu(r->num, r->ofs, lo, hi); | |
693 | break; | |
694 | #endif | |
695 | } | |
696 | r->box &= ~VLD; | |
697 | ||
698 | #if PM_VERBOSE | |
699 | printk("msr_wr: box %d, idx %3d, ofs %04x <- %llx\n", | |
700 | r->box & 0xf, r->num, r->ofs, r->reg); | |
701 | #endif | |
702 | } | |
703 | #endif /* RAS_SAVE_MSR */ | |
704 | ||
705 | ||
706 | /* | |
707 | * Preserve all HW registers that will be lost in | |
708 | * deep sleep states. This will be SBOX registers | |
709 | * above offset 0x7000 and all other BOX registers. | |
710 | */ | |
711 | ||
712 | static void | |
713 | mr_suspend(void) | |
714 | { | |
715 | int i; | |
716 | ||
717 | atomic_inc(&pm_entry); | |
718 | ||
719 | /* | |
720 | * Save SBOX_MCA_INT_EN first and clear it. | |
721 | * No more uncore MCAs will get through. | |
722 | */ | |
723 | one_mmio_rd(susp_mmio + 0); | |
724 | #if SAVE_BLOCK_MCA | |
725 | mr_sbox_wl(0, SBOX_MCA_INT_EN, 0); | |
726 | #endif | |
727 | ||
728 | /* | |
729 | * Save remaining BOX MMIOs | |
730 | */ | |
731 | for(i = 1; i < ARRAY_SIZE(susp_mmio); i++) | |
732 | one_mmio_rd(susp_mmio + i); | |
733 | ||
734 | #if RAS_SAVE_MSR | |
735 | /* | |
736 | * Save global MSRs and set MCIP | |
737 | * No new exceptions will be asserted | |
738 | */ | |
739 | for(i = 0; i < ARRAY_SIZE(susp_msr); i++) | |
740 | one_msr_rd(susp_msr + i); | |
741 | #if SAVE_BLOCK_MCA | |
742 | wrmsr(MSR_IA32_MCG_STATUS, MCG_STATUS_MCIP, 0); | |
743 | #endif | |
744 | ||
745 | #if RAS_SAVE_CPU_MSR | |
746 | /* | |
747 | * Save per-CPU MSRs | |
748 | */ | |
749 | for(i = 0; i < ARRAY_SIZE(susp_lcl_msr); i++) | |
750 | one_msr_rd(susp_lcl_msr + i); | |
751 | #endif | |
752 | #endif | |
753 | ||
754 | atomic_dec(&pm_entry); | |
755 | } | |
756 | ||
757 | ||
758 | /* | |
759 | * Undo side effects of a suspend call. | |
760 | * Nothing to do unless we turned MC handlers off. | |
761 | */ | |
762 | ||
763 | static void | |
764 | mr_cancel(void) | |
765 | { | |
766 | int i; | |
767 | ||
768 | atomic_inc(&pm_entry); | |
769 | ||
770 | /* | |
771 | * Restore SBOX_MCA_INT_EN to unblock uncore MCs | |
772 | * Invalidate all other saved MMIO registers. | |
773 | */ | |
774 | one_mmio_wr(susp_mmio + 0); | |
775 | for(i = 1; i < ARRAY_SIZE(susp_mmio); i++) | |
776 | susp_mmio[i].box &= ~VLD; | |
777 | ||
778 | #if RAS_SAVE_MSR | |
779 | /* | |
780 | * Restore IA32_MCG_STATUS to unblock core MCs | |
781 | * Invalidate all other saved MSR registers. | |
782 | */ | |
783 | one_msr_wr(susp_msr + 0); | |
784 | for(i = 1; i < ARRAY_SIZE(susp_msr); i++) | |
785 | susp_msr[i].box &= ~VLD; | |
786 | ||
787 | #if RAS_SAVE_CPU_MSR | |
788 | for(i = 0; i < ARRAY_SIZE(susp_lcl_msr); i++) | |
789 | susp_lcl_msr[i].box &= ~VLD; | |
790 | #endif | |
791 | #endif | |
792 | ||
793 | atomic_dec(&pm_entry); | |
794 | } | |
795 | ||
796 | ||
797 | /* | |
798 | * Restore all HW registers that we use. | |
799 | */ | |
800 | ||
801 | static void | |
802 | mr_resume(void) | |
803 | { | |
804 | int i; | |
805 | ||
806 | atomic_inc(&pm_entry); | |
807 | ||
808 | /* | |
809 | * Clear uncore MCA banks (just in case) | |
810 | */ | |
811 | if (susp_mmio[0].box & VLD) | |
812 | box_reset(0); | |
813 | ||
814 | /* | |
815 | * Restore all BOX MMIOs but SBOX_MCA_INT_EN | |
816 | */ | |
817 | for(i = 1; i < ARRAY_SIZE(susp_mmio); i++) | |
818 | one_mmio_wr(susp_mmio + i); | |
819 | ||
820 | /* | |
821 | * Then restore SBOX_MCA_INT_EN to enable uncore MCAs | |
822 | */ | |
823 | one_mmio_wr(susp_mmio + 0); | |
824 | ||
825 | #if RAS_SAVE_MSR | |
826 | /* | |
827 | * Restore all global MSRs but IA32_MCG_STATUS | |
828 | */ | |
829 | for(i = 1; i < ARRAY_SIZE(susp_msr); i++) | |
830 | one_msr_wr(susp_msr + i); | |
831 | ||
832 | /* | |
833 | * Then restore IA32_MCG_STATUS to allow core MCAs | |
834 | */ | |
835 | one_msr_wr(susp_msr + 0); | |
836 | ||
837 | #if RAS_SAVE_CPU_MSR | |
838 | /* | |
839 | * Restore all per-cpu MSRs | |
840 | */ | |
841 | for(i = 0; i < ARRAY_SIZE(susp_lcl_msr); i++) | |
842 | one_msr_wr(susp_lcl_msr + i); | |
843 | #endif | |
844 | #endif | |
845 | ||
846 | atomic_dec(&pm_entry); | |
847 | } | |
848 | ||
849 | ||
850 | /* | |
851 | * Callback from PM notifier chain. | |
852 | * TBD: should we test for odd state transitions and recursions? | |
853 | */ | |
854 | ||
855 | static int | |
856 | mr_pm_callback(struct notifier_block *nb, unsigned long event, void *msg) | |
857 | { | |
858 | ||
859 | switch(event) { | |
860 | case MICPM_DEVEVENT_SUSPEND: | |
861 | mr_suspend(); | |
862 | break; | |
863 | ||
864 | case MICPM_DEVEVENT_RESUME: | |
865 | mr_resume(); | |
866 | break; | |
867 | ||
868 | case MICPM_DEVEVENT_FAIL_SUSPEND: | |
869 | mr_cancel(); | |
870 | break; | |
871 | ||
872 | default: | |
873 | /* | |
874 | * Ignore whatever else is sent this way | |
875 | */ | |
876 | break; | |
877 | } | |
878 | ||
879 | return 0; | |
880 | } | |
881 | ||
882 | ||
883 | ||
884 | /* | |
885 | ** | |
886 | ** The PM module loads before RAS, so we must setup | |
887 | ** the API to support power management, i.e register. | |
888 | ** PM needs: | |
889 | ** - Notification when MT changes certain variables. | |
890 | ** Provided by a call-out list that the PM sets | |
891 | ** at registration time. | |
892 | ** - Access to MT calls. | |
893 | ** The PM module can use micras_mt_call() for access. | |
894 | ** Since PM loads first, this function needs to | |
895 | ** be passed at registration time. | |
896 | ** RAS needs: | |
897 | ** - list of core voltages (for CVOLT query). | |
898 | ** We pass a pointer to the voltage list and the | |
899 | ** voltage list counter to PM module, who will | |
900 | ** fill in the actual values (not available until | |
901 | ** core-freq driver loads). | |
902 | ** - list of core frequencies (for CFREQ query). | |
903 | ** Same solution as for CVOLT. | |
904 | ** - Notifications for throttle state changes. | |
905 | ** - Power management notifications for suspend/resume. | |
906 | ** | |
907 | ** Note: can one notifier block be inserted in multiple | |
908 | ** chains? Its assume not, which require two blocks | |
909 | ** both pointing to the same local function. | |
910 | */ | |
911 | ||
912 | extern struct mr_rsp_freq freq; | |
913 | extern struct mr_rsp_volt volt; | |
914 | ||
915 | struct micpm_params pm_reg; /* Our data for PM */ | |
916 | struct micpm_callbacks pm_cb; /* PM data for us */ | |
917 | ||
918 | extern void micpm_device_register(struct notifier_block *n); | |
919 | extern void micpm_device_unregister(struct notifier_block *n); | |
920 | extern void micpm_atomic_notifier_register(struct notifier_block *n); | |
921 | extern void micpm_atomic_notifier_unregister(struct notifier_block *n); | |
922 | ||
923 | static struct notifier_block ras_deviceevent = { | |
924 | .notifier_call = mr_pm_callback, | |
925 | }; | |
926 | ||
927 | static struct notifier_block ras_throttle_event_ns = { | |
928 | .notifier_call = mr_pm_throttle_callback, | |
929 | }; | |
930 | ||
931 | static struct notifier_block ras_throttle_event = { | |
932 | .notifier_call = mr_pm_throttle_callback, | |
933 | }; | |
934 | ||
935 | ||
936 | /* | |
937 | * Setup PM callbacks and SCIF handler. | |
938 | */ | |
939 | ||
940 | static int | |
941 | pm_mt_call(uint16_t cmd, void * buf) | |
942 | { | |
943 | int err; | |
944 | ||
945 | atomic_inc(&pm_entry); | |
946 | err = micras_mt_call(cmd, buf); | |
947 | atomic_dec(&pm_entry); | |
948 | ||
949 | return err; | |
950 | } | |
951 | ||
952 | ||
953 | int __init | |
954 | pm_init(void) | |
955 | { | |
956 | extern int mr_smc_rd(uint8_t, uint32_t *); | |
957 | ||
958 | #if RAS_SAVE_CPU_MSR | |
959 | /* | |
960 | * Preset MCA bank MSR register descriptions | |
961 | * | |
962 | *TBD: We have to use IPIs to read MSRs, which will wake | |
963 | * up cores at sleep when this function is called. | |
964 | * PM module may not like this at all. | |
965 | */ | |
966 | int i, j; | |
967 | for(i = 1; i < nr_cpu_ids; i++) { | |
968 | j = 4 * i; | |
969 | susp_lcl_msr[j] = susp_lcl_msr[0]; | |
970 | susp_lcl_msr[j + 1] = susp_lcl_msr[1]; | |
971 | susp_lcl_msr[j + 2] = susp_lcl_msr[2]; | |
972 | susp_lcl_msr[j + 3] = susp_lcl_msr[3]; | |
973 | susp_lcl_msr[j].num = i; | |
974 | susp_lcl_msr[j + 1].num = i; | |
975 | susp_lcl_msr[j + 2].num = i; | |
976 | susp_lcl_msr[j + 3].num = i; | |
977 | } | |
978 | #endif | |
979 | ||
980 | /* | |
981 | * Get temperature where power throttle becomes thermal throttle | |
982 | */ | |
983 | mr_smc_rd(0x4c, &ttl_tcrit); | |
984 | ||
985 | /* | |
986 | * Register with the MIC Power Management driver. | |
987 | */ | |
988 | pm_reg.volt_lst = volt.supt; | |
989 | pm_reg.volt_len = &volt.slen; | |
990 | pm_reg.volt_siz = ARRAY_SIZE(volt.supt); | |
991 | pm_reg.freq_lst = freq.supt; | |
992 | pm_reg.freq_len = &freq.slen; | |
993 | pm_reg.freq_siz = ARRAY_SIZE(freq.supt); | |
994 | pm_reg.mt_call = pm_mt_call; | |
995 | pm_reg.mt_ttl = mr_throttle; | |
996 | if (micpm_ras_register(&pm_cb, &pm_reg)) | |
997 | goto fail_pm; | |
998 | ||
999 | /* | |
1000 | * Get into the PM notifier lists | |
1001 | * MicPm reports events in 2 chains, one atomic and one | |
1002 | * blocking. Our callback will not block! | |
1003 | */ | |
1004 | micpm_atomic_notifier_register(&ras_throttle_event_ns); | |
1005 | micpm_notifier_register(&ras_throttle_event); | |
1006 | ||
1007 | if (boot_cpu_data.x86_mask == KNC_C_STEP) | |
1008 | micpm_device_register(&ras_deviceevent); | |
1009 | ||
1010 | printk("RAS.pm: init complete\n"); | |
1011 | return 0; | |
1012 | ||
1013 | fail_pm: | |
1014 | printk("RAS.pm: init failed\n"); | |
1015 | return 1; | |
1016 | } | |
1017 | ||
1018 | ||
1019 | /* | |
1020 | * Cleanup for module unload. | |
1021 | * Clear/restore hooks in the native MCA handler. | |
1022 | */ | |
1023 | ||
1024 | void __exit | |
1025 | pm_exit(void) | |
1026 | { | |
1027 | /* | |
1028 | * Get off the PM notifier list | |
1029 | */ | |
1030 | micpm_atomic_notifier_unregister(&ras_throttle_event_ns); | |
1031 | micpm_notifier_unregister(&ras_throttle_event); | |
1032 | ||
1033 | if (boot_cpu_data.x86_mask == KNC_C_STEP) | |
1034 | micpm_device_unregister(&ras_deviceevent); | |
1035 | ||
1036 | /* | |
1037 | * De-register with the PM module. | |
1038 | */ | |
1039 | micpm_ras_unregister(); | |
1040 | ||
1041 | /* | |
1042 | * Wait for an calls to module to finish. | |
1043 | */ | |
1044 | while(atomic_read(&pm_entry)) | |
1045 | cpu_relax(); | |
1046 | ||
1047 | printk("RAS.pm: exit complete\n"); | |
1048 | } | |
1049 | ||
1050 | #endif /* USE_PM */ |