Updated `README.md` with instructions for building/using the kernel module.
[xeon-phi-kernel-module] / ras / micras_main.c
CommitLineData
800f879a
AT
1/*
2 * Copyright 2010-2017 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2,
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * Disclaimer: The codes contained in these modules may be specific to
14 * the Intel Software Development Platform codenamed Knights Ferry,
15 * and the Intel product codenamed Knights Corner, and are not backward
16 * compatible with other Intel products. Additionally, Intel will NOT
17 * support the codes or instruction set in future products.
18 *
19 * Intel offers no warranty of any kind regarding the code. This code is
20 * licensed on an "AS IS" basis and Intel is not obligated to provide
21 * any support, assistance, installation, training, or other services
22 * of any kind. Intel is also not obligated to provide any updates,
23 * enhancements or extensions. Intel specifically disclaims any warranty
24 * of merchantability, non-infringement, fitness for any particular
25 * purpose, and any other warranty.
26 *
27 * Further, Intel disclaims all liability of any kind, including but
28 * not limited to liability for infringement of any proprietary rights,
29 * relating to the use of the code, even if Intel is notified of the
30 * possibility of such liability. Except as expressly stated in an Intel
31 * license agreement provided with this code and agreed upon with Intel,
32 * no license, express or implied, by estoppel or otherwise, to any
33 * intellectual property rights is granted herein.
34 */
35
36/*
37 * RAS module driver
38 *
39 * Contains code to handle module install/deinstall
40 * and handling proper registration(s) to SCIF, sysfs
41 * pseudo file system, timer ticks, I2C driver and
42 * other one-time tasks.
43 */
44
45#include <linux/types.h>
46#include <linux/ctype.h>
47#include <linux/errno.h>
48#include <linux/init.h>
49#include <linux/kernel.h>
50#include <linux/module.h>
51#include <linux/moduleparam.h>
52#include <linux/device.h>
53#include <linux/sysfs.h>
54#include <linux/proc_fs.h>
55#include <linux/workqueue.h>
56#include <linux/kthread.h>
57#include <linux/sched.h>
58#include <linux/wait.h>
59#include <linux/bitmap.h>
60#include <linux/cpumask.h>
61#include <linux/io.h>
62#include <linux/cred.h>
63#include <asm/msr.h>
64#include <asm/mce.h>
65#include <asm/apic.h>
66#include <asm/mic/mic_common.h>
67#include <asm/mic/mic_knc/autobaseaddress.h>
68#include <asm/mic/mic_knc/micsboxdefine.h>
69#include <scif.h>
70#include "micras.h"
71
72#if MT_VERBOSE || MC_VERBOSE || PM_VERBOSE
73/*
74 * For making scif_epd_t non-opague
75 */
76#define _MIC_MICBASEDEFINE_REGISTERS_H_ 1
77#include <mic/micscif.h>
78#endif
79
80/*
81** Lookup table to map API opcode into MT function.
82**
83** As we have to deal with both KnF and KnC, functions to
84** retrieve information may be generic, in micras_common.c,
85** or platform specific, in micras_kn{cf}.c.
86** Code location is transparent to this table.
87**
88** Some MT functions can safely be called without
89** serialization, e.g. if they are read-only or use
90** atomics to get/set variables. The 'simple' flag tells
91** which functions are safe to call without serialization.
92** Other functions should be called thru micras_mt_call().
93**
94** See micras_api.h and micpm_api.h for function details.
95*/
96
97static struct fnc_tab fnc_map[] = {
98 { 0, 0, 0, 0 },
99 { MR_REQ_HWINF, 1, 0, mr_get_hwinf },
100 { MR_REQ_VERS, 1, 0, mr_get_vers },
101 { MR_REQ_CFREQ, 0, 0, mr_get_freq },
102 { MR_SET_CFREQ, 0, 1, mr_set_freq },
103 { MR_REQ_CVOLT, 0, 0, mr_get_volt },
104 { MR_SET_CVOLT, 0, 1, mr_set_volt },
105 { MR_REQ_PWR, 0, 0, mr_get_power },
106 { MR_REQ_PLIM, 0, 0, mr_get_plim },
107 { MR_SET_PLIM, 0, 1, mr_set_plim },
108 { MR_REQ_CLST, 0, 0, mr_get_clst },
109 { MR_ENB_CORE, 0, 1, 0 },
110 { MR_DIS_CORE, 0, 1, 0 },
111 { MR_REQ_GDDR, 1, 0, mr_get_gddr },
112 { MR_REQ_GFREQ, 1, 0, mr_get_gfreq },
113 { MR_SET_GFREQ, 1, 1, 0 },
114 { MR_REQ_GVOLT, 1, 0, mr_get_gvolt },
115 { MR_SET_GVOLT, 1, 1, 0 },
116 { MR_REQ_TEMP, 0, 0, mr_get_temp },
117 { MR_REQ_FAN, 0, 0, mr_get_fan },
118 { MR_SET_FAN, 0, 1, mr_set_fan },
119 { MR_REQ_ECC, 1, 0, mr_get_ecc },
120 { MR_SET_ECC, 0, 1, 0 },
121 { MR_REQ_TRC, 1, 0, mr_get_trc },
122 { MR_SET_TRC, 1, 1, mr_set_trc },
123 { MR_REQ_TRBO, 0, 0, mr_get_trbo },
124 { MR_SET_TRBO, 0, 1, mr_set_trbo },
125 { MR_REQ_OCLK, 0, 0, 0 },
126 { MR_SET_OCLK, 0, 1, 0 },
127 { MR_REQ_CUTL, 0, 0, mr_get_cutl },
128 { MR_REQ_MEM, 0, 0, mr_get_mem },
129 { MR_REQ_OS, 0, 0, mr_get_os },
130 { MR_REQ_PROC, 0, 0, mr_get_proc },
131 { MR_REQ_THRD, 0, 0, 0 },
132 { MR_REQ_PVER, 1, 0, mr_get_pver },
133 { MR_CMD_PKILL, 0, 1, mr_cmd_pkill },
134 { MR_CMD_UKILL, 0, 1, mr_cmd_ukill },
135#if defined(CONFIG_MK1OM)
136 { MR_GET_SMC, 0, 0, mr_get_smc },
137 { MR_SET_SMC, 0, 0, mr_set_smc },
138#else
139#if defined(CONFIG_ML1OM) && USE_FSC
140 { MR_GET_SMC, 0, 0, mr_get_fsc },
141 { MR_SET_SMC, 0, 1, mr_set_fsc },
142#else
143 { 0, 0, 0, 0 },
144 { 0, 0, 0, 0 },
145#endif
146#endif
147 { MR_REQ_PMCFG, 0, 0, mr_get_pmcfg },
148#if defined(CONFIG_MK1OM)
149 { MR_REQ_LED, 0, 0, mr_get_led },
150 { MR_SET_LED, 0, 1, mr_set_led },
151 { MR_REQ_PROCHOT, 0, 0, mr_get_prochot },
152 { MR_SET_PROCHOT, 0, 1, mr_set_prochot },
153 { MR_REQ_PWRALT, 0, 0, mr_get_pwralt },
154 { MR_SET_PWRALT, 0, 1, mr_set_pwralt },
155 { MR_REQ_PERST, 0, 0, mr_get_perst },
156 { MR_SET_PERST, 0, 1, mr_set_perst },
157 { MR_REQ_TTL, 0, 0, mr_get_ttl },
158#else
159 { 0, 0, 0, 0 },
160 { 0, 0, 0, 0 },
161 { 0, 0, 0, 0 },
162 { 0, 0, 0, 0 },
163 { 0, 0, 0, 0 },
164 { 0, 0, 0, 0 },
165 { 0, 0, 0, 0 },
166 { 0, 0, 0, 0 },
167 { 0, 0, 0, 0 },
168#endif
169#if defined(CONFIG_MK1OM) && USE_PM
170 { 0, 0, 0, 0 },
171 { PM_REQ_PL0, 1, 0, pm_get_pl0 },
172 { PM_SET_PL0, 1, 1, pm_set_pl0 },
173 { PM_REQ_PL1, 1, 0, pm_get_pl1 },
174 { PM_SET_PL1, 1, 1, pm_set_pl1 },
175 { PM_REQ_PAVG, 1, 0, pm_get_pavg },
176 { PM_REQ_PTTL, 1, 0, pm_get_pttl },
177 { PM_REQ_VOLT, 1, 0, pm_get_volt },
178 { PM_REQ_TEMP, 1, 0, pm_get_temp },
179 { PM_REQ_TACH, 1, 0, pm_get_tach },
180 { PM_REQ_TTTL, 1, 0, pm_get_tttl },
181 { PM_REQ_FTTL, 1, 0, pm_get_fttl },
182 { PM_SET_FTTL, 1, 1, pm_set_fttl },
183#endif
184};
185
186
187
188/*
189**
190** The monitoring thread.
191** In fact this is a work_queue, that receive work items
192** from several independent parties, such as SCIF, sysfs,
193** out of band telemetry, PM and possibly timers.
194**
195** These parties pass a structure with information necessary
196** for the call-out function called by the MT thread to operate.
197** These structures must include the work item structure, such
198** that the container_of() mechanism can be used to locate it.
199**
200** The MT thread does not by itself provide any feed-back on
201** when a task was executed nor the results from it. Therefore
202** if a feedback is requred, then the callout needs to provide
203** their own methods, such as the wait queue used by function
204** micras_mt_data() below. Experiments has shown that it is not
205** safe to place work item or the wait queue on a stack (no
206** idea why, could be a bug).
207**
208*/
209
210static int micras_stop; /* Module shutdown */
211static struct delayed_work micras_wq_init; /* Setup work item */
212static struct delayed_work micras_wq_tick; /* Timer tick token */
213static struct workqueue_struct * micras_wq; /* Monitor thread */
214 int micras_priv; /* Call-out privileged */
215
216
217typedef struct wq_task {
218 int req; /* Request opcode */
219 int rtn; /* Return value */
220 int priv; /* Privileged */
221 void * ptr; /* Response buffer */
222 int (* fnc)(void *); /* Call out */
223 struct work_struct wrk; /* Work item */
224 wait_queue_head_t wqh; /* Wait queue header */
225} WqTask;
226
227
228#if defined(CONFIG_MK1OM) && WA_4845465
229/*
230 * SMC die temp update job.
231 *
232 * As per HSD #4845465 we push the die temperature
233 * to the SMC instead of the usual reverse direction.
234 * This has to happen at around 50 mSec intervals, which should
235 * be possible with a work queue implementation. If that turns out
236 * not to be reliable enough we may need a more direct approach.
237 * During the experiment, we want to override the pushed temp.
238 */
239
240#define DIE_PROC 1 /* Enable die temp override */
241#define SMC_PERIOD 50 /* SMC update interval, mSec */
242#define JITTER_STATS 1 /* Enable jitter measurements */
243
244static struct delayed_work micras_wq_smc; /* SMC update token */
245static int smc_4845465; /* SMC push capable */
246#if DIE_PROC
247static int die_override; /* Temperature override */
248#endif
249
250static void
251micras_mt_smc(struct work_struct *work)
252{
253 extern int mr_smc_wr(uint8_t, uint32_t *);
254 static uint64_t n;
255 uint32_t tmp;
256 uint32_t ts2, mfs;
257
258 if (! micras_stop) {
259 /*
260 * Re-arm for a callback in about 1 second.
261 * There is no guarantee this will be more than approximate.
262 */
263 queue_delayed_work(micras_wq, &micras_wq_smc, msecs_to_jiffies(SMC_PERIOD));
264 }
265
266#if JITTER_STATS
267 /*
268 * Time the interval in order to get some
269 * measurement on what jitter to expect.
270 * Leave a log message once every minute.
271 */
272 {
273 static uint64_t d, t1, t2, s, hi, lo = ~0;
274
275 t2 = rdtsc();
276 if (n) {
277 d = t2 - t1;
278 s += d;
279 if (d > hi)
280 hi = d;
281 if (d < lo)
282 lo = d;
283#if 1
284 {
285 /*
286 * Show jitter in buckets representing 5 mSec.
287 * The center (#20) represent +- 2.5 mSec from reference.
288 * It is assumed TSC running at 1.1 GHz here, if PM kicks
289 * in the mesurements may be way off because it manipulate
290 * the system clock and indirectly the jiffy counter.
291 * It is assumed TSC running at 1.1 GHz here.
292 */
293 static uint64_t buckets[41];
294 int bkt;
295 int64_t err;
296
297 err = ((d * 10) / 11) - (50 * 1000 * 1000);
298 if (err < -(25 * 100 * 1000))
299 bkt = 19 + (err + (25 * 100 * 1000)) / (5 * 1000 * 1000);
300 else
301 if (err > (25 * 100 * 1000))
302 bkt = 21 + (err - (25 * 100 * 1000)) / (5 * 1000 * 1000);
303 else
304 bkt = 20;
305 if (bkt < 0)
306 bkt = 0;
307 if (bkt > 40)
308 bkt = 40;
309 buckets[bkt]++;
310 if ((n % ((10 * 1000)/SMC_PERIOD)) == ((10 * 1000)/SMC_PERIOD) - 1) {
311 printk("smc_upd: dist");
312 for(bkt = 0; bkt < 41; bkt++) {
313 if (bkt == 20)
314 printk(" | %lld |", buckets[bkt]);
315 else
316 printk(" %lld", buckets[bkt]);
317 }
318 printk("\n");
319 }
320 }
321#endif
322 if ((n % ((60 * 1000)/SMC_PERIOD)) == ((60 * 1000)/SMC_PERIOD) - 1)
323 printk("smc_upd: %lld, min %lld, max %lld, avg %lld\n", n, lo, hi, s / n);
324 }
325 t1 = t2;
326 }
327#endif /* JITTER_STATS */
328
329 /*
330 * Send update to SMC to register 0x50.
331 * The value to push at the SMC must have following content
332 *
333 * Bits 9:0 Device Temperature
334 * -> THERMAL_STATUS_2 bits 19:10
335 * Bit 10 Valid bit
336 * -> THERMAL_STATUS_2 bit 31
337 * Bits 20:11 Thermal Monitor Control value
338 * -> THERMAL_STATUS_2 bits 9:0
339 * Bits 30:21 Fan Thermal Control value
340 * -> MICROCONTROLLER_FAN_STATUS bits 17:8
341 */
342
343 n++;
344 ts2 = mr_sbox_rl(0, SBOX_THERMAL_STATUS_2);
345 mfs = mr_sbox_rl(0, SBOX_MICROCONTROLLER_FAN_STATUS);
346
347#if DIE_PROC
348 if (die_override)
349 tmp = GET_BITS(9, 0, die_override);
350 else
351#endif
352 tmp = PUT_BITS(9, 0, GET_BITS(19, 10, ts2));
353 tmp |= PUT_BIT(10, GET_BIT(31, ts2)) |
354 PUT_BITS(20, 11, GET_BITS(9, 0, ts2)) |
355 PUT_BITS(30, 21, GET_BITS(17, 8, mfs));
356
357 if (mr_smc_wr(0x50, &tmp))
358 printk("smc_upd: %lld, tmp %d, SMC write failed\n", n, tmp);
359}
360
361
362#if DIE_PROC
363/*
364 * Test proc file to override die temperature push.
365 * A value of 0 means no override, any other value is
366 * pushed as if it was a 'device temperature'.
367 */
368
369static struct proc_dir_entry * die_pe;
370
371/*
372 * On writes: scan input line for single number.
373 */
374
375static ssize_t
376die_write(struct file * file, const char __user * buff, size_t len, loff_t * off)
377{
378 char * buf;
379 char * ep, * cp;
380 unsigned long ull;
381 int err;
382
383 /*
384 * Get input line into kernel space
385 */
386 if (len > PAGE_SIZE -1)
387 len = PAGE_SIZE -1;
388 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
389 if (! buf)
390 return -ENOMEM;
391 if (copy_from_user(buf, buff, len)) {
392 err = -EFAULT;
393 goto wr_out;
394 }
395 buf[len] = '\0';
396 cp = ep = (char *) buf;
397
398 /*
399 * Read a number in strtoul format 0.
400 */
401 while(isspace(*cp))
402 cp++;
403 ull = simple_strtoull(cp, &ep, 0);
404 if (ep == cp || (*ep != '\0' && !isspace(*ep))) {
405 printk("Invalid die temp given\n");
406 err = -EINVAL;
407 goto wr_out;
408 }
409
410 die_override = GET_BITS(9, 0, ull);
411 printk("Die temp override set to %d C\n", die_override);
412
413 /*
414 * Swallow any trailing junk up to next newline
415 */
416 ep = strchr(buf, '\n');
417 if (ep)
418 cp = ep + 1;
419 err = cp - buf;
420
421wr_out:
422 kfree(buf);
423 return err;
424}
425
426
427/*
428 * On reads: return string of current override temp.
429 */
430
431static ssize_t
432die_read(struct file * file, char __user * buff, size_t count, loff_t *ppos)
433{
434 char buf[32];
435 size_t len;
436
437 len = snprintf(buf, sizeof(buf), "%d\n", die_override);
438 return simple_read_from_buffer(buff, count, ppos, buf, len);
439}
440
441
442static const struct file_operations proc_die_operations = {
443 .read = die_read,
444 .write = die_write,
445 .llseek = no_llseek,
446};
447#endif /* DIE_PROC */
448#endif /* WA_4845465 */
449
450
451/*
452 * Timer tick job
453 *
454 * This is for periodic updates from the SMC,
455 * which (with a little luck) can be avoided
456 * at the cost of I2C communications during
457 * actual CP queries.
458 */
459
460static void
461micras_mt_tick(struct work_struct *work)
462{
463#if MT_TIMER
464 static int n;
465
466 n++;
467 if (! micras_stop) {
468 /*
469 * Re-arm for a callback in about 1 second.
470 * There is no guarantee this will be more than approximate.
471 */
472 queue_delayed_work(micras_wq, &micras_wq_tick, msecs_to_jiffies(MT_PERIOD));
473 }
474
475 /*
476 * Dump elog prints into the kernel log
477 *TBD: debug tool, time-shifts messages, remove eventually.
478 */
479 {
480 int msg_top, msg_id;
481 char * buf;
482
483 msg_id = atomic_read(&ee_seen);
484 msg_top = atomic_read(&ee_msg);
485 while(++msg_id <= msg_top) {
486 buf = ee_buf + (msg_id % EE_BUF_COUNT) * EE_BUF_LINELEN;
487 if (! *buf)
488 break;
489 printk("%s", buf);
490 *buf = '\0';
491 atomic_inc(&ee_seen);
492 }
493 }
494#endif
495}
496
497
498/*
499 * Handle SCIF & sysfs show/store requests
500 *
501 * By convention we know that the work item is member of
502 * a larger struct, which can readily be found using the
503 * container_of mechanism.
504 *
505 * Otherwise this routine just calls the function stored
506 * in the larger struct's mt_data element, and on its
507 * return wake up whoever is waiting for it's completion.
508 */
509
510static void
511micras_mt_data(struct work_struct * work)
512{
513 struct wq_task * wq;
514
515 wq = container_of(work, struct wq_task, wrk);
516 micras_priv = wq->priv;
517 wq->rtn = wq->fnc(wq->ptr);
518 micras_priv = 0;
519 wake_up_all(& wq->wqh);
520}
521
522
523/*
524 * Helper to pass jobs (work items) to the monitoring thread.
525 *
526 * As input it receives details on function to be called, one
527 * argument to pass to that function, the opcode associated
528 * with the function and a function return value. The latter
529 * will be set to -MR_ERR_PEND, and we'll expect the callout
530 * function to change it.
531 *
532 * The work item is the only piece of information passed to
533 * the work queue callout, so we'll wrap it into a larger
534 * structure along with the received details such that the
535 * work queue can perform a function call on our behalf.
536 */
537
538static int
539micras_mt_tsk(struct wq_task * wq)
540{
541 int err;
542
543#if MT_VERBOSE
544 uint64_t start, stop;
545 start = rdtsc();
546#endif
547
548 /*
549 * Create a work item for the RAS thread,
550 * enqueue and wait for it's completion.
551 *
552 *TBD: Timeout length to be revisited
553 */
554 wq->rtn = -MR_ERR_PEND;
555 INIT_WORK_ONSTACK(&wq->wrk, micras_mt_data);
556 init_waitqueue_head(&wq->wqh);
557 queue_work(micras_wq, &wq->wrk);
558 err = wait_event_interruptible_timeout(wq->wqh,
559 wq->rtn != -MR_ERR_PEND, msecs_to_jiffies(1000));
560
561 /*
562 * Check for potential errors, which for now can only be
563 * "interrupted" or "timeout". In both cases try cancel the work
564 * item from MT thread. If cancel succeds (returns true) then
565 * the work item was still "pending" and is now removed from the
566 * work queue, i.e. it is safe to continue (with error).
567 * Otherwise, the cancel operation will wait for the work item's
568 * call-out function to finish, which kind of defies the purpose
569 * of "interruptable". However, we cannot leave until it is certain
570 * that it will not be accessed by the RAS thread.
571 */
572 if (err == -ERESTARTSYS || err == 0) {
573 printk("MT tsk: interrupted or failure, err %d\n", err);
574 printk("MT tsk: FAILED: cmd %d, rtn %d, fnc %p, ptr %p\n",
575 wq->req, wq->rtn, wq->fnc, wq->ptr);
576
577 err = cancel_work_sync(&wq->wrk);
578 printk("MT tsk: work canceled (%d)\n", err);
579 }
580
581 /*
582 * Completed, turn interrupts and timeouts into MR errors.
583 */
584 err = wq->rtn;
585 if (err == -MR_ERR_PEND)
586 err = -MR_ERR_NOVAL;
587
588#if MT_VERBOSE
589 stop = rdtsc();
590 printk("MT tsk: cmd %d, err %d, time %llu\n", wq->req, err, stop - start);
591#endif
592 return err;
593}
594
595
596/*
597 * Public interface to the MT functions
598 * Caller responsible for passing a buffer large enough
599 * to hold data for reads or writes (1 page will do,
600 * but structs matching the commands are recommended).
601 * Returned data are structs defined in micras.h
602 */
603
604int
605micras_mt_call(uint16_t cmd, void * buf)
606{
607 struct wq_task * wq;
608 int err;
609
610 if (micras_stop)
611 return -MR_ERR_UNSUP;
612
613 if (cmd > MR_REQ_MAX)
614 return -MR_ERR_INVOP;
615
616 err = -MR_ERR_UNSUP;
617 if (fnc_map[cmd].fnc) {
618 if (fnc_map[cmd].simple) {
619 /*
620 * Fast access, just call function
621 */
622 err = fnc_map[cmd].fnc(buf);
623 }
624 else {
625 /*
626 * Slow access, go through serializer.
627 * We allocate a work queue task for the MT thread,
628 * stuff arguments in it, run task, and then free
629 * work queue task.
630 */
631 wq = kmalloc(sizeof(* wq), GFP_KERNEL);
632 if (! wq) {
633 printk("Scif: CP work task alloc failed\n");
634 return -MR_ERR_NOMEM;
635 }
636
637 memset(wq, '\0', sizeof(*wq));
638 wq->req = cmd;
639 wq->priv = 1;
640 wq->fnc = (int (*)(void *)) fnc_map[cmd].fnc;
641 wq->ptr = buf;
642 err = micras_mt_tsk(wq);
643
644 kfree(wq);
645 }
646 }
647
648 return err;
649}
650EXPORT_SYMBOL_GPL(micras_mt_call);
651
652
653
654/*
655**
656** The sysfs nodes provided by this module is not really associated
657** with a 'struct device', since we don't create device entries for
658** access through '/dev'. Instead we register a 'struct class'
659** with nodes defined with the CLASS_ATTR macro.
660** Reasons for this choice are:
661** - we don't want a device node created
662** - we don't need (at least now) to create udev events
663** - we don't act on suspend/resume transitions
664** - we don't want to have our files unnecessarily deep
665** in the sysfs file system.
666**
667** The sysfs layout is intended to look like:
668**
669** /sys/class/micras/ Root of this driver
670** /clst Core information
671** /cutl Core utilization
672** /ecc Error correction mode
673** /fan Fan controller
674** /freq Core frequency
675** /gddr GDDR devices
676** /gfreq GDDR speed
677** /gvolt GDDR voltage
678** /hwinf Hardware Info
679** /mem Memory utilization
680** /os OS status
681** /plim Power envelope
682** /power Card power
683** /temp Board tempearatures
684** /trbo Turbo mode
685** /trc Trace level
686** /vers uOS/Flash version
687** /volt Core voltage
688**
689** The following should be removed as there are better tools
690** available in /proc/<pid>/{stat|status|smap}, /proc/meminfo,
691** /proc/stat, /proc/uptime, /proc/loadavg, and /proc/cpuinfo:
692** clst, cutl, mem, os
693**
694** Below we hand-craft a 'micras' class to sit under '/sys/class'
695** with attribute nodes directly under it. Each attribute may
696** have a 'show' and a 'store' handler, both called with a reference
697** to its class (ras_class, may hold private data), it's class_attribute,
698** a buffer reference, and for 'store's a string length. The buffer
699** passed to 'show' is one page (PAGE_SIZE, 4096) which sets the
700** upper limit on the return string(s). Return value of 'store'
701** has to be either an error code (negative) or the count of bytes
702** consumed. If consumed less than what's passed in, the store routine
703** will be called again until all input data has been consumed.
704**
705** Function pointers are hardwired by the macros below since it
706** is easy and simpler than using the fnc_map table. This may
707** change if the command set expands uncontrolled.
708** We have local helper funtions to handle array prints.
709** Any locking required is handled in called routines, not here.
710**
711** Note: This is not coded for maximum performance, since the
712** use of the MT thread to serialize access to card data
713** has a cost of two task switches attached, both which
714** may cause delays due to other system activity.
715**
716*/
717
718
719/*
720 * Hack alert!
721 * Formatting routines for arrays of 16/32/64 bit unsigned ints.
722 * This reduces the printf argument list in _SHOW() macros below
723 * considerably, though perhaps at a cost in code efficiency.
724 * They need a scratch buffer in order to construct long lines.
725 * A quick swag at the largest possible response tells that we'll
726 * never exceed half if the page we are given to scribble into.
727 * So, instead of allocating print space, we'll simply use 2nd
728 * half of the page as scratch buffer.
729 */
730
731#define BP (buf + (PAGE_SIZE/2)) /* Scratch pad location */
732#define BL (PAGE_SIZE/2 - 1) /* Scratch size */
733
734
735static char *
736arr16(int16_t * arr, int len, char * buf, int siz)
737{
738 int n, bs;
739
740 bs = 0;
741 for(n = 0; n < len && bs < siz; n++)
742 bs += scnprintf(buf + bs, siz - bs, "%s%u", n ? " " : "", arr[n]);
743 buf[bs] = '\0';
744
745 return buf;
746}
747
748
749static char *
750arr32(uint32_t * arr, int len, char * buf, int siz)
751{
752 int n, bs;
753
754 bs = 0;
755 for(n = 0; n < len && bs < siz; n++)
756 bs += scnprintf(buf + bs, siz - bs, "%s%u", n ? " " : "", arr[n]);
757 buf[bs] = '\0';
758
759 return buf;
760}
761
762
763static char *
764arr64(uint64_t * arr, int len, char * buf, int siz)
765{
766 int n, bs;
767
768 bs = 0;
769 for(n = 0; n < len && bs < siz; n++)
770 bs += scnprintf(buf + bs, siz - bs, "%s%llu", n ? " " : "", arr[n]);
771 buf[bs] = '\0';
772
773 return buf;
774}
775
776
777#define _SHOW(op,rec,nam,str...) \
778 static ssize_t \
779 micras_show_##nam(struct class *class, \
780 struct class_attribute *attr, \
781 char *buf) \
782 { \
783 struct mr_rsp_##rec * r; \
784 struct wq_task * wq; \
785 int len; \
786 int err; \
787\
788 wq = kmalloc(sizeof(* wq) + sizeof(* r), GFP_KERNEL); \
789 if (! wq) \
790 return -ENOMEM; \
791\
792 memset(wq, '\0', sizeof(* wq)); \
793 r = (struct mr_rsp_##rec *)(wq + 1); \
794 wq->req = MR_REQ_##op; \
795 wq->fnc = (int (*)(void *)) mr_get_##nam; \
796 wq->ptr = r; \
797 err = micras_mt_tsk(wq); \
798\
799 if (err < 0) { \
800 len = 0; \
801 *buf = '\0'; \
802 } \
803 else { \
804 len = scnprintf(buf, PAGE_SIZE, ##str); \
805 } \
806\
807 kfree(wq); \
808 return len; \
809 }
810
811_SHOW(HWINF, hwinf, hwinf, "%u %u %u %u %u %u "
812 "%c%c%c%c%c%c%c%c%c%c%c%c "
813 "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
814 r->rev, r->step, r->substep, r->board, r->fab, r->sku,
815 r->serial[0], r->serial[1], r->serial[2], r->serial[3],
816 r->serial[4], r->serial[5], r->serial[6], r->serial[7],
817 r->serial[8], r->serial[9], r->serial[10], r->serial[11],
818 r->guid[0], r->guid[1], r->guid[2], r->guid[3],
819 r->guid[4], r->guid[5], r->guid[6], r->guid[7],
820 r->guid[8], r->guid[9], r->guid[10], r->guid[11],
821 r->guid[12], r->guid[13], r->guid[14], r->guid[15]);
822
823_SHOW(VERS, vers, vers, "\"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\"\n",
824 r->fboot0 +1, r->fboot1 +1, r->flash[0] +1,
825 r->flash[1] +1, r->flash[2] +1, r->fsc +1, r->uos +1)
826
827_SHOW(CFREQ, freq, freq, "%u %u %s\n",
828 r->cur, r->def, arr32(r->supt, r->slen, BP, BL))
829
830_SHOW(CVOLT, volt, volt, "%u %u %s\n",
831 r->cur, r->set, arr32(r->supt, r->slen, BP, BL))
832
833#if defined(CONFIG_MK1OM) || (defined(CONFIG_ML1OM) && USE_FSC)
834_SHOW(PWR, power, power, "%d\n%d\n%d\n%d\n%d\n%d\n%d\n%s\n%s\n%s\n",
835 r->tot0.prr,
836 r->tot1.prr,
837 r->inst.prr,
838 r->imax.prr,
839 r->pcie.prr,
840 r->c2x3.prr,
841 r->c2x4.prr,
842 arr32(&r->vccp.pwr, 3, BP, 32),
843 arr32(&r->vddg.pwr, 3, BP + 32, 32),
844 arr32(&r->vddq.pwr, 3, BP + 64, 32))
845
846_SHOW(PLIM, plim, plim, "%u %u %u\n",
847 r->phys, r->hmrk, r->lmrk)
848#endif
849
850_SHOW(CLST, clst, clst, "%u %u\n",
851 r->count, r->thr)
852
853_SHOW(GDDR, gddr, gddr, "\"%s\" %u %u %u\n",
854 r->dev +1, r->rev, r->size, r->speed)
855
856_SHOW(GFREQ, gfreq, gfreq, "%u %u\n",
857 r->cur, r->def)
858
859_SHOW(GVOLT, gvolt, gvolt, "%u %u\n",
860 r->cur, r->set)
861
862_SHOW(TEMP, temp, temp, "%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n",
863 arr16(&r->die.cur, 2, BP, 32),
864 arr16(&r->brd.cur, 2, BP + 32, 32),
865 arr16(&r->fin.cur, 2, BP + 64, 32),
866 arr16(&r->fout.cur, 2, BP + 96, 32),
867 arr16(&r->gddr.cur, 2, BP + 128, 32),
868 arr16(&r->vccp.cur, 2, BP + 160, 32),
869 arr16(&r->vddg.cur, 2, BP + 224, 32),
870 arr16(&r->vddq.cur, 2, BP + 256, 32))
871
872_SHOW(FAN, fan, fan, "%u %u %u\n",
873 r->override, r->pwm, r->rpm)
874
875#ifdef CONFIG_MK1OM
876_SHOW(ECC, ecc, ecc, "%d\n",
877 r->enable)
878#endif
879
880_SHOW(TRC, trc, trc, "%d\n",
881 r->lvl)
882
883_SHOW(TRBO, trbo, trbo, "%d %d %d\n",
884 r->set, r->state, r->avail)
885
886#ifdef CONFIG_MK1OM
887_SHOW(LED, led, led, "%d\n",
888 r->led)
889
890_SHOW(PROCHOT, ptrig, prochot, "%d %d\n",
891 r->power, r->time);
892
893_SHOW(PWRALT, ptrig, pwralt, "%d %d\n",
894 r->power, r->time);
895
896_SHOW(PERST, perst, perst, "%d\n",
897 r->perst);
898
899_SHOW(TTL, ttl, ttl, "%u %u %u %u\n%u %u %u %u\n%u %u %u %u\n",
900 r->thermal.active, r->thermal.since, r->thermal.count, r->thermal.time,
901 r->power.active, r->power.since, r->power.count, r->power.time,
902 r->alert.active, r->alert.since, r->alert.count, r->alert.time);
903#endif
904
905_SHOW(CUTL, cutl, cutl, "%u %u %u %llu\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n...\n",
906 r->tck, r->core, r->thr, r->jif,
907 arr64(&r->sum.user, 4, BP, 80),
908 arr64(&r->cpu[0].user, 4, BP + 80, 80),
909 arr64(&r->cpu[1].user, 4, BP + 160, 80),
910 arr64(&r->cpu[2].user, 4, BP + 240, 80),
911 arr64(&r->cpu[3].user, 4, BP + 320, 80),
912 arr64(&r->cpu[4].user, 4, BP + 400, 80),
913 arr64(&r->cpu[5].user, 4, BP + 480, 80),
914 arr64(&r->cpu[6].user, 4, BP + 560, 80),
915 arr64(&r->cpu[7].user, 4, BP + 640, 80))
916
917_SHOW(MEM, mem, mem, "%u %u %u\n",
918 r->total, r->free, r->bufs)
919
920_SHOW(OS, os, os, "%llu %llu %llu %llu %u [%s]\n",
921 r->uptime, r->loads[0], r->loads[1], r->loads[2],
922 r->alen, arr32(r->apid, r->alen, BP, BL))
923
924
925/*
926 * Ensure caller's creditials is root on all 'set' files.
927 * Even though file creation mode should prevent writes?
928 *
929 *TBD:
930 * - How many of the 'store's are to be permitted?
931 */
932
933#define _STORE(op, nam) \
934 static ssize_t \
935 micras_store_##nam (struct class *class, \
936 struct class_attribute *attr, \
937 const char *buf, \
938 size_t count) \
939 { \
940 struct wq_task * wq; \
941 size_t ocount; \
942 uint32_t val; \
943 int err; \
944 char * ep; \
945\
946 if (current_euid() != 0) \
947 return -EPERM; \
948\
949 ocount = count; \
950 if (count && buf[count - 1] == '\n') \
951 ((char *) buf)[--count] = '\0'; \
952\
953 err = -EINVAL; \
954 if (count && *buf) { \
955 val = simple_strtoul(buf, &ep, 0); \
956 if (ep != buf && !*ep) { \
957 wq = kmalloc(sizeof(* wq), GFP_KERNEL); \
958 if (! wq) \
959 return -ENOMEM; \
960\
961 wq->req = MR_SET_##op; \
962 wq->fnc = (int (*)(void *)) mr_set_##nam; \
963 wq->ptr = (void *) &val; \
964 if (! micras_mt_tsk(wq)) \
965 err = ocount; \
966 kfree(wq); \
967 } \
968 } \
969\
970 return err; \
971 }
972
973_STORE(CFREQ, freq)
974_STORE(CVOLT, volt)
975
976#if defined(CONFIG_MK1OM) || (defined(CONFIG_ML1OM) && USE_FSC)
977_STORE(PLIM, plim)
978#endif
979
980_STORE(FAN, fan)
981_STORE(TRC, trc)
982_STORE(TRBO, trbo)
983
984#ifdef CONFIG_MK1OM
985_STORE(LED, led)
986_STORE(PERST, perst)
987#endif
988
989
990/*
991 *TBD:
992 * - Remove entries clst, cutl, mem, and os.
993 * Only included here for comparison with what cp/micinfo displays.
994 * They really need to go.
995 */
996
997static struct class_attribute micras_attr[] = {
998 __ATTR(hwinf, 0444, micras_show_hwinf, 0),
999 __ATTR(vers, 0444, micras_show_vers, 0),
1000 __ATTR(freq, 0644, micras_show_freq, micras_store_freq),
1001 __ATTR(volt, 0644, micras_show_volt, micras_store_volt),
1002#if defined(CONFIG_MK1OM) || (defined(CONFIG_ML1OM) && USE_FSC)
1003 __ATTR(power, 0444, micras_show_power, 0),
1004 __ATTR(plim, 0644, micras_show_plim, micras_store_plim),
1005#endif
1006 __ATTR(clst, 0444, micras_show_clst, 0),
1007 __ATTR(gddr, 0444, micras_show_gddr, 0),
1008 __ATTR(gfreq, 0444, micras_show_gfreq, 0),
1009 __ATTR(gvolt, 0444, micras_show_gvolt, 0),
1010 __ATTR(fan, 0644, micras_show_fan, micras_store_fan),
1011 __ATTR(temp, 0444, micras_show_temp, 0),
1012#ifdef CONFIG_MK1OM
1013 __ATTR(ecc, 0444, micras_show_ecc, 0),
1014#endif
1015 __ATTR(trc, 0644, micras_show_trc, micras_store_trc),
1016 __ATTR(trbo, 0644, micras_show_trbo, micras_store_trbo),
1017#ifdef CONFIG_MK1OM
1018 __ATTR(led, 0644, micras_show_led, micras_store_led),
1019 __ATTR(prochot, 0444, micras_show_prochot, 0),
1020 __ATTR(pwralt, 0444, micras_show_pwralt, 0),
1021 __ATTR(perst, 0644, micras_show_perst, micras_store_perst),
1022 __ATTR(ttl, 0444, micras_show_ttl, 0),
1023#endif
1024 __ATTR(cutl, 0444, micras_show_cutl, 0),
1025 __ATTR(mem, 0444, micras_show_mem, 0),
1026 __ATTR(os, 0444, micras_show_os, 0),
1027 __ATTR_NULL,
1028};
1029
1030
1031static struct class ras_class = {
1032 .name = "micras",
1033 .owner = THIS_MODULE,
1034 .class_attrs = micras_attr,
1035};
1036
1037
1038
1039/*
1040**
1041** SCIF interface & services are mostly handled here, including
1042** all aspects of setting up and tearing down SCIF channels.
1043** We create three listening SCIF sockets and create a workqueue
1044** with the initial task of waiting for 'accept's to happen.
1045**
1046** When TTL or MC accept incoming connections, their workqueue
1047** task spawns one thread just to detect if/when peer closes
1048** the session and will block any further connects until thes
1049** service thread terminates (peer closes session).
1050** The TTL or MC event handler, executing in interrupt context,
1051** will check for an open session and if one is present, deliver
1052** their event record(s) on it by using scif_send().
1053**
1054** When CP accept incoming connections, its workqueue task spawns
1055** a new thread to run a session with the peer and then proceeds
1056** to accepting a new connection. Thus, there are no strict
1057** bounds on number of incoming connections, but for internal
1058** house-keeping sessions are limited to MR_SCIF_MAX (32).
1059** Accepted requests from the peer are fulfilled through the
1060** MT thread in a similar fashion as the sysctl interface, i.e.
1061** though function micras_mt_tsk(), who guarantee synchronized
1062** (serialized) access to MT core data and handle waits as needed.
1063** Function pointers corresponding to request opcodes are found
1064** by lookup in the fnc_map table.
1065**
1066** Note: This is not coded for maximum performance, since the
1067** use of the MT thread to serialize access to card data
1068** has a cost of two task switches attached, both which
1069** may cause delays due to other system activity.
1070*/
1071
1072
1073static scif_epd_t micras_cp_lstn; /* CP listener handle */
1074static struct workqueue_struct * micras_cp_wq; /* CP listener thread */
1075static atomic_t micras_cp_rst; /* CP listener restart */
1076static struct delayed_work micras_cp_tkn; /* CP accept token */
1077static DECLARE_BITMAP(micras_cp_fd, MR_SCIF_MAX); /* CP free slots */
1078static volatile struct scif_portID micras_cp_si[MR_SCIF_MAX]; /* CP sessions */
1079static volatile struct task_struct * micras_cp_kt[MR_SCIF_MAX]; /* CP threads */
1080static volatile scif_epd_t micras_cp_ep[MR_SCIF_MAX]; /* CP handles */
1081
1082static scif_epd_t micras_mc_lstn; /* MC listener handle */
1083static struct workqueue_struct * micras_mc_wq; /* MC listener thread */
1084static struct delayed_work micras_mc_tkn; /* MC accept token */
1085static volatile struct task_struct * micras_mc_kt; /* MC session */
1086static volatile scif_epd_t micras_mc_ep; /* MC handle */
1087
1088static scif_epd_t micras_ttl_lstn; /* TTL listener handle */
1089static struct workqueue_struct * micras_ttl_wq; /* TTL listener thread */
1090static struct delayed_work micras_ttl_tkn; /* TTL accept token */
1091static volatile struct task_struct * micras_ttl_kt; /* TTL session */
1092static volatile scif_epd_t micras_ttl_ep; /* TTL handle */
1093
1094
1095/*
1096 * SCIF CP session thread
1097 */
1098
1099static int
1100micras_cp_sess(void * _slot)
1101{
1102 struct wq_task * wq;
1103 struct mr_hdr q, a;
1104 scif_epd_t ep;
1105 uint32_t slot;
1106 void * buf;
1107 uint64_t start, stop;
1108 int blen, len, priv;
1109
1110 slot = (uint32_t)((uint64_t) _slot);
1111 priv = (micras_cp_si[slot].port < 1024) ? 1 : 0;
1112#if MT_VERBOSE
1113 printk("Scif: CP session %d running%s\n", slot, priv ? " privileged" : "");
1114#endif
1115
1116 /*
1117 * Allocate local buffer from kernel
1118 * Since the I/O buffers in SCIF is just one page,
1119 * we'd never expect to need larger buffers here.
1120 */
1121 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1122 if (! buf) {
1123 printk("Scif: CP scratch pad alloc failed\n");
1124 return 0;
1125 }
1126
1127 /*
1128 * Allocate a work queue task for the MT thread
1129 */
1130 wq = kmalloc(sizeof(* wq), GFP_KERNEL);
1131 if (! wq) {
1132 printk("Scif: CP work task alloc failed\n");
1133 goto cp_sess_end;
1134 }
1135
1136 /*
1137 * Start servicing MT protocol
1138 */
1139 ep = micras_cp_ep[slot];
1140 for( ;; ) {
1141
1142 /*
1143 * Get a message header
1144 */
1145 len = scif_recv(ep, &q, sizeof(q), SCIF_RECV_BLOCK);
1146 start = rdtsc();
1147 if (len < 0) {
1148 if (len != -ECONNRESET)
1149 printk("Scif: CP recv error %d\n", len);
1150 goto cp_sess_end;
1151 }
1152 if (len != sizeof(q)) {
1153 printk("Scif: CP short recv (%d), discarding\n", len);
1154 continue;
1155 }
1156
1157 /*
1158 * Validate the query:
1159 * - known good opcode,
1160 * - expected length (zero)
1161 * - have callout in jump table
1162 * - check requestor's port ID on privileged opcodes.
1163 *
1164 *TBD: opcodes above MR_REQ_MAX is really only meant for
1165 * use by the PM module. Should it be host accessible?
1166 */
1167 blen = 0;
1168 if (q.cmd < MR_REQ_HWINF ||
1169#if defined(CONFIG_MK1OM) && USE_PM
1170 q.cmd > PM_REQ_MAX
1171#else
1172 q.cmd > MR_REQ_MAX
1173#endif
1174 ) {
1175 printk("Scif: CP opcode %d invalid\n", q.cmd);
1176 blen = -MR_ERR_INVOP;
1177 }
1178 else
1179 if (q.len != 0) {
1180 printk("Scif: CP command length %d invalid\n", q.len);
1181 blen = -MR_ERR_INVLEN;
1182 }
1183 else
1184 if (! fnc_map[q.cmd].fnc) {
1185 printk("Scif: CP opcode %d un-implemented\n", q.cmd);
1186 blen = -MR_ERR_UNSUP;
1187 }
1188 else
1189 if (fnc_map[q.cmd].privileged && !priv) {
1190 printk("Scif: CP opcode %d privileged, remote %d:%d\n",
1191 q.cmd, micras_cp_si[slot].node, micras_cp_si[slot].port);
1192 blen = -MR_ERR_PERM;
1193 }
1194
1195 /*
1196 *TBD: If there is an error at this point, it might
1197 * be a good idea to drain the SCIF channel.
1198 * If garbage has entered the channel somehow,
1199 * then how else can we get in sync such that
1200 * next recv really is a command header?
1201 * More radical solution is closing this session.
1202 */
1203
1204 /*
1205 * If header is OK (blen still zero) then pass
1206 * a work queue item to MT and wait for response.
1207 * The result will end up in buf (payload for response)
1208 * or an error code that can be sent back to requestor.
1209 * Since we don't want to care about whether it is a
1210 * get or set command here, the 'parm' value is copied
1211 * into buf prior to passing the work item to MT.
1212 * Thus, functions expecting an 'uint32_t *' to
1213 * point to a new value will be satisfied.
1214 */
1215 if (blen == 0) {
1216 if (fnc_map[q.cmd].simple) {
1217 *((uint32_t *) buf) = q.parm;
1218 blen = fnc_map[q.cmd].fnc(buf);
1219 }
1220 else {
1221 memset(wq, '\0', sizeof(*wq));
1222 wq->req = q.cmd;
1223 wq->priv = priv;
1224 wq->fnc = (int (*)(void *)) fnc_map[q.cmd].fnc;
1225 wq->ptr = buf;
1226 *((uint32_t *) buf) = q.parm;
1227 blen = micras_mt_tsk(wq);
1228 }
1229 }
1230 stop = rdtsc();
1231
1232 /*
1233 * Craft response header
1234 */
1235 a.cmd = q.cmd | MR_RESP;
1236 if (blen < 0) {
1237 /*
1238 * MT thread reported a failure.
1239 * Set error bit and make error record in buf
1240 */
1241 a.cmd |= MR_ERROR;
1242 ((struct mr_err *) buf)->err = -blen;
1243 ((struct mr_err *) buf)->len = 0;
1244 a.len = sizeof(struct mr_err);
1245 }
1246 else {
1247 /*
1248 * Payload size is set by call-out
1249 */
1250 a.len = blen;
1251 }
1252 a.stamp = q.stamp;
1253 a.spent = stop - start;
1254
1255 /*
1256 * Send response header (always)
1257 */
1258 len = scif_send(ep, &a, sizeof(a), SCIF_SEND_BLOCK);
1259 if (len < 0) {
1260 printk("Scif: header send error %d\n", len);
1261 goto cp_sess_end;
1262 }
1263 if (len != sizeof(a)) {
1264 printk("Scif: CP short header send (%d of %lu)\n", len, sizeof(a));
1265 goto cp_sess_end;
1266 }
1267
1268 /*
1269 * Send payload (if any, defined by a.len)
1270 */
1271 if (a.len > 0) {
1272 len = scif_send(ep, buf, a.len, SCIF_SEND_BLOCK);
1273 if (len < 0) {
1274 printk("Scif: CP payload send error %d\n", len);
1275 goto cp_sess_end;
1276 }
1277 if (len != a.len) {
1278 printk("Scif: CP short payload send (%d of %d)\n", len, a.len);
1279 goto cp_sess_end;
1280 }
1281 }
1282
1283 }
1284
1285cp_sess_end:
1286 if (wq)
1287 kfree(wq);
1288 if (buf)
1289 kfree(buf);
1290 ep = (scif_epd_t) atomic64_xchg((atomic64_t *)(micras_cp_ep + slot), 0);
1291 if (ep)
1292 scif_close(ep);
1293 micras_cp_kt[slot] = 0;
1294 set_bit(slot, micras_cp_fd);
1295#if MT_VERBOSE
1296 printk("Scif: CP session %d terminated, sess mask %lx\n", slot, micras_cp_fd[0]);
1297#endif
1298
1299 if (atomic_xchg(&micras_cp_rst, 0)) {
1300 printk("Scif: resume listener\n");
1301 queue_delayed_work(micras_cp_wq, &micras_cp_tkn, 0);
1302 }
1303
1304 return 0;
1305}
1306
1307
1308/*
1309 * SCIF CP session launcher
1310 */
1311
1312static void
1313micras_cp(struct work_struct * work)
1314{
1315 struct task_struct * thr;
1316 scif_epd_t sess_ep;
1317 struct scif_portID sess_id;
1318 int slot;
1319 int err;
1320
1321 /*
1322 * Wait for somebody to connect to us
1323 * We stop listening on any error whatsoever
1324 */
1325 err = scif_accept(micras_cp_lstn, &sess_id, &sess_ep, SCIF_ACCEPT_SYNC);
1326 if (err == -EINTR) {
1327 printk("Scif: CP accept interrupted, error %d\n", err);
1328 return;
1329 }
1330 if (err < 0) {
1331 printk("Scif: CP accept failed, error %d\n", err);
1332 return;
1333 }
1334#if MT_VERBOSE
1335 printk("Scif: CP accept: remote %d:%d, local %d:%d\n",
1336 sess_id.node, sess_id.port,
1337 micras_cp_lstn->port.node, micras_cp_lstn->port.port);
1338#endif
1339
1340 /*
1341 * Spawn a new thread to run session with connecting peer
1342 * We support only a limited number of connections, so first
1343 * get a free "slot" for this session.
1344 * The use of non-atomic ffs() below is safe as long as this
1345 * function is never run by more than one thread at a time
1346 * and all other manipulations of micras_cp_fd are atomic.
1347 */
1348 slot = find_first_bit(micras_cp_fd, MR_SCIF_MAX);
1349 if (slot < MR_SCIF_MAX) {
1350 if (micras_cp_kt[slot] || micras_cp_ep[slot]) {
1351 printk("Scif: CP slot %d busy (bug)\n", slot);
1352 return;
1353 }
1354
1355 clear_bit(slot, micras_cp_fd);
1356 micras_cp_ep[slot] = sess_ep;
1357 micras_cp_si[slot] = sess_id;
1358 thr = kthread_create(micras_cp_sess, (void *)(uint64_t) slot, "RAS CP svc %d", slot);
1359 if (IS_ERR(thr)) {
1360 printk("Scif: CP service thread creation failed\n");
1361 scif_close(sess_ep);
1362 micras_cp_ep[slot] = 0;
1363 set_bit(slot, micras_cp_fd);
1364 return;
1365 }
1366 micras_cp_kt[slot] = thr;
1367#if MT_VERBOSE
1368 printk("Scif: CP session %d launched, pid %d\n", slot, thr->pid);
1369#endif
1370 wake_up_process(thr);
1371 }
1372 else {
1373 printk("Scif: No open session slots, closing session\n");
1374 scif_close(sess_ep);
1375 }
1376
1377 /*
1378 * Keep listening until session limit reached.
1379 */
1380 if (bitmap_weight(micras_cp_fd, MR_SCIF_MAX))
1381 queue_delayed_work(micras_cp_wq, &micras_cp_tkn, 0);
1382 else {
1383 printk("Scif: CP connection limit reached\n");
1384 atomic_xchg(&micras_cp_rst, 1);
1385 }
1386}
1387
1388
1389/*
1390 * SCIF MC session thread
1391 */
1392
1393static int
1394micras_mc_sess(void * dummy)
1395{
1396 scif_epd_t ep;
1397 char buf[8];
1398 int len;
1399
1400#if MC_VERBOSE
1401 printk("Scif: MC session running\n");
1402#endif
1403
1404 /*
1405 * Start servicing.
1406 * This is just to get indication if peer closes connection
1407 */
1408 for( ;; ) {
1409 /*
1410 * Sync with kernel MC event log.
1411 */
1412 mcc_sync();
1413
1414 /*
1415 * Try read 1 byte from host (turns into a wait-point
1416 * keeping the connection open till host closes it)
1417 */
1418 len = scif_recv(micras_mc_ep, buf, 1, SCIF_RECV_BLOCK);
1419 if (len < 0) {
1420 if (len != -ECONNRESET)
1421 printk("Scif: MC recv error %d\n", len);
1422 goto mc_sess_end;
1423 }
1424
1425 /*
1426 * Ignore any received content.
1427 */
1428 }
1429
1430mc_sess_end:
1431 ep = (scif_epd_t) atomic64_xchg((atomic64_t *) &micras_mc_ep, 0);
1432 if (ep)
1433 scif_close(ep);
1434 micras_mc_kt = 0;
1435#if MC_VERBOSE
1436 printk("Scif: MC session terminated\n");
1437#endif
1438 return 0;
1439}
1440
1441
1442/*
1443 * SCIF MC session launcher
1444 */
1445
1446static void
1447micras_mc(struct work_struct * work)
1448{
1449 struct task_struct * thr;
1450 scif_epd_t sess_ep;
1451 struct scif_portID sess_id;
1452 int err;
1453
1454 /*
1455 * Wait for somebody to connect to us
1456 * We stop listening on any error whatsoever
1457 */
1458 err = scif_accept(micras_mc_lstn, &sess_id, &sess_ep, SCIF_ACCEPT_SYNC);
1459 if (err == -EINTR) {
1460 printk("Scif: MC accept interrupted, error %d\n", err);
1461 return;
1462 }
1463 if (err < 0) {
1464 printk("Scif: MC accept failed, error %d\n", err);
1465 return;
1466 }
1467#if MC_VERBOSE
1468 printk("Scif: MC accept: remote %d:%d, local %d:%d\n",
1469 sess_ep->peer.node, sess_ep->peer.port,
1470 sess_ep->port.node, sess_ep->port.port);
1471#endif
1472
1473 /*
1474 * Spawn a new thread to run session with connecting peer
1475 * We support only one connection, so if one already is
1476 * running this one will be rejected.
1477 */
1478 if (! micras_mc_ep) {
1479 micras_mc_ep = sess_ep;
1480 thr = kthread_create(micras_mc_sess, 0, "RAS MC svc");
1481 if (IS_ERR(thr)) {
1482 printk("Scif: MC service thread creation failed\n");
1483 scif_close(sess_ep);
1484 micras_mc_ep = 0;
1485 return;
1486 }
1487 micras_mc_kt = thr;
1488 wake_up_process(thr);
1489 }
1490 else {
1491 printk("Scif: MC connection limit reached\n");
1492 scif_close(sess_ep);
1493 }
1494
1495 /*
1496 * Keep listening
1497 */
1498 queue_delayed_work(micras_mc_wq, &micras_mc_tkn, 0);
1499}
1500
1501
1502/*
1503 * Ship a pre-packaged machine check event record to host
1504 */
1505
1506#ifndef SCIF_BLAST
1507#define SCIF_BLAST 2
1508#endif
1509
1510int
1511micras_mc_send(struct mce_info * mce, int exc)
1512{
1513 if (micras_mc_ep) {
1514 int err;
1515
1516#if ADD_DIE_TEMP
1517 err = mr_sbox_rl(0, SBOX_THERMAL_STATUS_2);
1518 mce->flags |= PUT_BITS(15, 8, GET_BITS(19, 10, err));
1519#endif
1520
1521 if (exc) {
1522 /*
1523 * Exception context SCIF access, can't sleep and can't
1524 * wait on spinlocks either. May be detrimental to
1525 * other scif communications, but this _is_ an emergency
1526 * and we _do_ need to ship this message to the host.
1527 */
1528 err = scif_send(micras_mc_ep, mce, sizeof(*mce), SCIF_BLAST);
1529 if (err != sizeof(*mce))
1530 ee_printk("micras_mc_send: scif_send failed, err %d\n", err);
1531 }
1532 else {
1533 /*
1534 * Thread context SCIF access.
1535 * Just send message.
1536 */
1537 err = scif_send(micras_mc_ep, mce, sizeof(*mce), SCIF_SEND_BLOCK);
1538 if (err != sizeof(*mce))
1539 printk("micras_mc_send: scif_send failed, err %d\n", err);
1540 }
1541 return err == sizeof(*mce);
1542 }
1543 return 0;
1544}
1545
1546
1547/*
1548 * SCIF TTL session thread
1549 */
1550
1551static int
1552micras_ttl_sess(void * dummy)
1553{
1554 scif_epd_t ep;
1555 char buf[8];
1556 int len;
1557
1558#if PM_VERBOSE
1559 printk("Scif: TTL session running\n");
1560#endif
1561
1562 /*
1563 * Start servicing.
1564 * This is just to get indication if peer closes connection
1565 */
1566 for( ;; ) {
1567 /*
1568 * Try read 1 byte from host (turns into a wait-point
1569 * keeping the connection open till host closes it)
1570 */
1571 len = scif_recv(micras_ttl_ep, buf, 1, SCIF_RECV_BLOCK);
1572 if (len < 0) {
1573 if (len != -ECONNRESET)
1574 printk("Scif: TTL recv error %d\n", len);
1575 goto ttl_sess_end;
1576 }
1577
1578 /*
1579 * Ignore any received content.
1580 */
1581 }
1582
1583ttl_sess_end:
1584 ep = (scif_epd_t) atomic64_xchg((atomic64_t *) &micras_ttl_ep, 0);
1585 if (ep)
1586 scif_close(ep);
1587 micras_ttl_kt = 0;
1588#if PM_VERBOSE
1589 printk("Scif: TTL session terminated\n");
1590#endif
1591 return 0;
1592}
1593
1594
1595/*
1596 * SCIF TTL session launcher
1597 */
1598
1599static void
1600micras_ttl(struct work_struct * work)
1601{
1602 struct task_struct * thr;
1603 scif_epd_t sess_ep;
1604 struct scif_portID sess_id;
1605 int err;
1606
1607 /*
1608 * Wait for somebody to connect to us
1609 * We stop listening on any error whatsoever
1610 */
1611 err = scif_accept(micras_ttl_lstn, &sess_id, &sess_ep, SCIF_ACCEPT_SYNC);
1612 if (err == -EINTR) {
1613 printk("Scif: TTL accept interrupted, error %d\n", err);
1614 return;
1615 }
1616 if (err < 0) {
1617 printk("Scif: TTL accept failed, error %d\n", err);
1618 return;
1619 }
1620#if PM_VERBOSE
1621 printk("Scif: TTL accept: remote %d:%d, local %d:%d\n",
1622 sess_ep->peer.node, sess_ep->peer.port,
1623 sess_ep->port.node, sess_ep->port.port);
1624#endif
1625
1626 /*
1627 * Spawn a new thread to run session with connecting peer
1628 * We support only one connection, so if one already is
1629 * running this one will be rejected.
1630 */
1631 if (! micras_ttl_ep) {
1632 micras_ttl_ep = sess_ep;
1633 thr = kthread_create(micras_ttl_sess, 0, "RAS TTL svc");
1634 if (IS_ERR(thr)) {
1635 printk("Scif: TTL service thread creation failed\n");
1636 scif_close(sess_ep);
1637 micras_ttl_ep = 0;
1638 return;
1639 }
1640 micras_ttl_kt = thr;
1641 wake_up_process(thr);
1642 }
1643 else {
1644 printk("Scif: TTL connection limit reached\n");
1645 scif_close(sess_ep);
1646 }
1647
1648 /*
1649 * Keep listening
1650 */
1651 queue_delayed_work(micras_ttl_wq, &micras_ttl_tkn, 0);
1652}
1653
1654
1655/*
1656 * Ship a pre-packaged throttle event record to host
1657 */
1658
1659void
1660micras_ttl_send(struct ttl_info * ttl)
1661{
1662 static struct ttl_info split_rec;
1663 static int split_rem;
1664 int err;
1665 char * cp;
1666
1667 if (micras_ttl_ep) {
1668
1669 if (split_rem) {
1670 cp = ((char *) &split_rec) + (sizeof(*ttl) - split_rem);
1671 err = scif_send(micras_ttl_ep, cp, split_rem, 0);
1672 if (err == split_rem) {
1673 /*
1674 * Tx of pendig buffer complete
1675 */
1676 split_rem = 0;
1677 }
1678 else {
1679 if (err < 0) {
1680 /*
1681 * SCIF failed squarely, just drop the message.
1682 * TBD: close end point?
1683 */
1684 }
1685 else {
1686 /*
1687 * Another partial send
1688 */
1689 split_rem -= err;
1690 }
1691 }
1692 }
1693
1694 if (! split_rem) {
1695 /*
1696 * Send message
1697 */
1698 err = scif_send(micras_ttl_ep, ttl, sizeof(*ttl), 0);
1699 if (err != sizeof(*ttl)) {
1700 /*
1701 * Did not send all the message
1702 */
1703 if (err < 0) {
1704 /*
1705 * SCIF failed squarely, drop the message.
1706 * TBD: close end point?
1707 */
1708 }
1709 else {
1710 split_rec = *ttl;
1711 split_rem = sizeof(*ttl) - err;
1712 }
1713 }
1714 }
1715 }
1716}
1717
1718
1719
1720/*
1721**
1722** MMIO regions used by RAS module
1723** Until some common strategy on access to BOXes and other CSRs
1724** we'll map them ourselves. All MMIO accesses are performed
1725** through 32 bit unsigned integers, but a 64 bit abstraction
1726** is provided for convenience (low 32 bit done first).
1727**
1728** We need access to the SBOX, all GBOXs, TBOXs and DBOXs.
1729**
1730** Note: I2C driver code for exception context in micras_elog.c
1731** has its own set of I/O routines in order to allow
1732** separate debugging.
1733**
1734*/
1735
1736uint8_t * micras_sbox; /* SBOX mmio region */
1737uint8_t * micras_dbox[DBOX_NUM]; /* DBOX mmio region */
1738uint8_t * micras_gbox[GBOX_NUM]; /* GBOX mmio regions */
1739#ifdef CONFIG_MK1OM
1740uint8_t * micras_tbox[TBOX_NUM]; /* TBOX mmio regions */
1741#endif
1742
1743/*
1744 * Specials: some defines are currently missing
1745 */
1746
1747#ifdef CONFIG_MK1OM
1748#define DBOX1_BASE 0x0800620000ULL
1749
1750#define GBOX4_BASE 0x08006D0000ULL
1751#define GBOX5_BASE 0x08006C0000ULL
1752#define GBOX6_BASE 0x08006B0000ULL
1753#define GBOX7_BASE 0x08006A0000ULL
1754#endif
1755
1756
1757/*
1758 * MMIO I/O dumpers (for debug)
1759 * Exception mode code needs to use the ee_print dumpers
1760 * because printk is not safe to use (works most of the time
1761 * though, but may hang the system eventually).
1762 */
1763#if 0
1764#if 0
1765extern atomic_t pxa_block;
1766#define RL if (! atomic_read(&pxa_block)) ee_print("%s: %4x -> %08x\n", __FUNCTION__, roff, val)
1767#define RQ if (! atomic_read(&pxa_block)) ee_print("%s: %4x -> %016llx\n", __FUNCTION__, roff, val)
1768#define WL if (! atomic_read(&pxa_block)) ee_print("%s: %4x <- %08x\n", __FUNCTION__, roff, val)
1769#define WQ if (! atomic_read(&pxa_block)) ee_print("%s: %4x <- %016llx\n", __FUNCTION__, roff, val)
1770#else
1771#define RL printk("%s: %4x -> %08x\n", __FUNCTION__, roff, val)
1772#define RQ printk("%s: %4x -> %016llx\n", __FUNCTION__, roff, val)
1773#define WL printk("%s: %4x <- %08x\n", __FUNCTION__, roff, val)
1774#define WQ printk("%s: %4x <- %016llx\n", __FUNCTION__, roff, val)
1775#endif
1776#else
1777#define RL /* As nothing */
1778#define RQ /* As nothing */
1779#define WL /* As nothing */
1780#define WQ /* As nothing */
1781#endif
1782
1783
1784/*
1785 * SBOX MMIO I/O routines
1786 * mr_sbox_base Return SBOX MMIO region
1787 * mr_sbox_rl Read 32-bit register
1788 * mr_sbox_rq Read 64-bit register (really two 32-bit reads)
1789 * mr_sbox_wl Write 32-bit register
1790 * mr_sbox_wq Write 64-bit register (really two 32-bit writes)
1791 */
1792
1793#if NOT_YET
1794uint8_t *
1795mr_sbox_base(int dummy)
1796{
1797 return micras_sbox;
1798}
1799#endif
1800
1801uint32_t
1802mr_sbox_rl(int dummy, uint32_t roff)
1803{
1804 uint32_t val;
1805
1806 val = * (volatile uint32_t *)(micras_sbox + roff);
1807 RL;
1808 return val;
1809}
1810
1811uint64_t
1812mr_sbox_rq(int dummy, uint32_t roff)
1813{
1814 uint32_t hi, lo;
1815 uint64_t val;
1816
1817 lo = * (volatile uint32_t *)(micras_sbox + roff);
1818 hi = * (volatile uint32_t *)(micras_sbox + roff + 4);
1819 val = ((uint64_t) hi << 32) | (uint64_t) lo;
1820 RQ;
1821 return val;
1822}
1823
1824void
1825mr_sbox_wl(int dummy, uint32_t roff, uint32_t val)
1826{
1827 WL;
1828 * (volatile uint32_t *)(micras_sbox + roff) = val;
1829}
1830
1831void
1832mr_sbox_wq(int dummy, uint32_t roff, uint64_t val)
1833{
1834 uint32_t hi, lo;
1835
1836 WQ;
1837 lo = val;
1838 hi = val >> 32;
1839
1840 * (volatile uint32_t *)(micras_sbox + roff) = lo;
1841 * (volatile uint32_t *)(micras_sbox + roff + 4) = hi;
1842}
1843
1844
1845/*
1846 * DBOX MMIO I/O routines
1847 * mr_dbox_base Return DBOX MMIO region
1848 * mr_dbox_rl Read 32-bit register
1849 * mr_dbox_rq Read 64-bit register (really two 32-bit reads)
1850 * mr_dbox_wl Write 32-bit register
1851 * mr_dbox_wq Write 64-bit register (really two 32-bit writes)
1852 */
1853
1854#if NOT_YET
1855uint8_t *
1856mr_dbox_base(int unit)
1857{
1858 return micras_dbox[unit];
1859}
1860#endif
1861
1862uint32_t
1863mr_dbox_rl(int unit, uint32_t roff)
1864{
1865 uint32_t val;
1866
1867 val = * (volatile uint32_t *)(micras_dbox[unit] + roff);
1868 RL;
1869 return val;
1870}
1871
1872uint64_t
1873mr_dbox_rq(int unit, uint32_t roff)
1874{
1875 uint32_t hi, lo;
1876 uint64_t val;
1877
1878 lo = * (volatile uint32_t *)(micras_dbox[unit] + roff);
1879 hi = * (volatile uint32_t *)(micras_dbox[unit] + roff + 4);
1880 val = ((uint64_t) hi << 32) | (uint64_t) lo;
1881 RQ;
1882 return val;
1883}
1884
1885void
1886mr_dbox_wl(int unit, uint32_t roff, uint32_t val)
1887{
1888 WL;
1889 * (volatile uint32_t *)(micras_dbox[unit] + roff) = val;
1890}
1891
1892void
1893mr_dbox_wq(int unit, uint32_t roff, uint64_t val)
1894{
1895 uint32_t hi, lo;
1896
1897 WQ;
1898 lo = val;
1899 hi = val >> 32;
1900
1901 * (volatile uint32_t *)(micras_dbox[unit] + roff) = lo;
1902 * (volatile uint32_t *)(micras_dbox[unit] + roff + 4) = hi;
1903}
1904
1905
1906/*
1907 * GBOX MMIO I/O routines
1908 * mr_gbox_base Return GBOX MMIO region
1909 * mr_gbox_rl Read 32-bit register
1910 * mr_gbox_rq Read 64-bit register (really two 32-bit reads)
1911 * mr_gbox_wl Write 32-bit register
1912 * mr_gbox_wq Write 64-bit register (really two 32-bit writes)
1913 *
1914 * Due to a Si bug, MMIO writes can be dropped by the GBOXs
1915 * during heavy DMA activity (HSD #4844222). The risk of it
1916 * happening is low enough that a 'repeat until it sticks'
1917 * workaround is sufficient. No 'read' issues so far.
1918 *
1919 *TBD: Ramesh asked that GBOX MMIOs check for sleep states.
1920 * Not sure how to do that, but here is a good spot to
1921 * add such check, as all GBOX access comes thru here.
1922 */
1923
1924#if NOT_YET
1925uint8_t *
1926mr_gbox_base(int unit)
1927{
1928 return micras_gbox[unit];
1929}
1930#endif
1931
1932uint32_t
1933mr_gbox_rl(int unit, uint32_t roff)
1934{
1935 uint32_t val;
1936
1937 val = * (volatile uint32_t *)(micras_gbox[unit] + roff);
1938 RL;
1939 return val;
1940}
1941
1942uint64_t
1943mr_gbox_rq(int unit, uint32_t roff)
1944{
1945 uint32_t hi, lo;
1946 uint64_t val;
1947
1948 lo = * (volatile uint32_t *)(micras_gbox[unit] + roff);
1949 if (roff == 0x5c) {
1950 /*
1951 * Instead of placing HI part of MCA_STATUS
1952 * at 0x60 to form a natural 64-bit register,
1953 * it located at 0xac, against all conventions.
1954 */
1955 hi = * (volatile uint32_t *)(micras_gbox[unit] + 0xac);
1956 }
1957 else
1958 hi = * (volatile uint32_t *)(micras_gbox[unit] + roff + 4);
1959 val = ((uint64_t) hi << 32) | (uint64_t) lo;
1960 RQ;
1961 return val;
1962}
1963
1964void
1965mr_gbox_wl(int unit, uint32_t roff, uint32_t val)
1966{
1967#if !GBOX_WORKING
1968 {
1969 int rpt;
1970 uint32_t rb;
1971
1972 /*
1973 * Due to bug HSD 4844222 loop until value sticks
1974 */
1975 for(rpt = 10; rpt-- ; ) {
1976#endif
1977
1978 WL;
1979 * (volatile uint32_t *)(micras_gbox[unit] + roff) = val;
1980
1981#if !GBOX_WORKING
1982 rb = mr_gbox_rl(unit, roff);
1983 if (rb == val)
1984 break;
1985 }
1986 }
1987#endif
1988}
1989
1990void
1991mr_gbox_wq(int unit, uint32_t roff, uint64_t val)
1992{
1993 uint32_t hi, lo;
1994
1995 lo = val;
1996 hi = val >> 32;
1997
1998#if !GBOX_WORKING
1999 {
2000 int rpt;
2001 uint64_t rb;
2002
2003 /*
2004 * Due to bug HSD 4844222 loop until value sticks
2005 * Note: this may result in bad things happening if
2006 * wrinting to a MMIO MCA STATUS register
2007 * since there is a non-zero chance that the
2008 * NMI handler can fire and change the register
2009 * inside this loop. Require that the caller
2010 * is on same CPU as the NMI handler (#0).
2011 */
2012 for(rpt = 10; rpt-- ; ) {
2013#endif
2014
2015 WQ;
2016 * (volatile uint32_t *)(micras_gbox[unit] + roff) = lo;
2017 if (roff == 0x5c) {
2018 /*
2019 * Instead of placing HI part of MCA_STATUS
2020 * at 0x60 to form a natural 64-bit register,
2021 * it located at 0xac, against all conventions.
2022 */
2023 * (volatile uint32_t *)(micras_gbox[unit] + 0xac) = hi;
2024 }
2025 else
2026 * (volatile uint32_t *)(micras_gbox[unit] + roff + 4) = hi;
2027
2028#if !GBOX_WORKING
2029 rb = mr_gbox_rq(unit, roff);
2030 if (rb == val)
2031 break;
2032 }
2033 }
2034#endif
2035}
2036
2037
2038#ifdef CONFIG_MK1OM
2039/*
2040 * TBOX MMIO I/O routines
2041 * mr_tbox_base Return TBOX MMIO region
2042 * mr_tbox_rl Read 32-bit register
2043 * mr_tbox_rq Read 64-bit register (really two 32-bit reads)
2044 * mr_tbox_wl Write 32-bit register
2045 * mr_tbox_wq Write 64-bit register (really two 32-bit writes)
2046 *
2047 * Some SKUs don't have TBOXs, in which case the
2048 * micras_tbox array will contain null pointers.
2049 * We do not test for this here, but expect that
2050 * caller either know what he's doing or consult
2051 * the mr_tbox_base() function first.
2052 */
2053
2054#if NOT_YET
2055uint8_t *
2056mr_tbox_base(int unit)
2057{
2058 return micras_tbox[unit];
2059}
2060#endif
2061
2062uint32_t
2063mr_tbox_rl(int unit, uint32_t roff)
2064{
2065 uint32_t val;
2066
2067 val = * (volatile uint32_t *)(micras_tbox[unit] + roff);
2068 RL;
2069 return val;
2070}
2071
2072uint64_t
2073mr_tbox_rq(int unit, uint32_t roff)
2074{
2075 uint32_t hi, lo;
2076 uint64_t val;
2077
2078 lo = * (volatile uint32_t *)(micras_tbox[unit] + roff);
2079 hi = * (volatile uint32_t *)(micras_tbox[unit] + roff + 4);
2080 val = ((uint64_t) hi << 32) | (uint64_t) lo;
2081 RQ;
2082 return val;
2083}
2084
2085void
2086mr_tbox_wl(int unit, uint32_t roff, uint32_t val)
2087{
2088 WL;
2089 * (volatile uint32_t *)(micras_tbox[unit] + roff) = val;
2090}
2091
2092void
2093mr_tbox_wq(int unit, uint32_t roff, uint64_t val)
2094{
2095 uint32_t hi, lo;
2096
2097 WQ;
2098 lo = val;
2099 hi = val >> 32;
2100
2101 * (volatile uint32_t *)(micras_tbox[unit] + roff) = lo;
2102 * (volatile uint32_t *)(micras_tbox[unit] + roff + 4) = hi;
2103}
2104#endif
2105
2106
2107
2108/*
2109**
2110** SMP utilities for CP and MC.
2111** The kernel offers routines for MSRs, but as far
2112** as I could find then there isn't any for some
2113** CPU registers we need, like CR4.
2114**
2115** rd_cr4_on_cpu Read a CR4 value on CPU
2116** set_in_cr4_on_cpu Set bits in CR4 on a CPU
2117** clear_in_cr4_on_cpu Guess...
2118** rdtsc Read time stamp counter
2119**
2120**TBD: Special case when CPU happens to be current?
2121*/
2122
2123#if NOT_YET
2124static void
2125_rd_cr4_on_cpu(void * p)
2126{
2127 *((uint32_t *) p) = read_cr4();
2128}
2129
2130uint32_t
2131rd_cr4_on_cpu(int cpu)
2132{
2133 uint32_t cr4;
2134
2135 smp_call_function_single(cpu, _rd_cr4_on_cpu, &cr4, 1);
2136 return cr4;
2137}
2138
2139static void
2140_set_in_cr4_on_cpu(void * p)
2141{
2142 uint32_t cr4;
2143
2144 cr4 = read_cr4();
2145 cr4 |= * (uint32_t *) p;
2146 write_cr4(cr4);
2147}
2148
2149void
2150set_in_cr4_on_cpu(int cpu, uint32_t m)
2151{
2152 smp_call_function_single(cpu, _set_in_cr4_on_cpu, &m, 1);
2153}
2154
2155static void
2156_clear_in_cr4_on_cpu(void * p)
2157{
2158 uint32_t cr4;
2159
2160 cr4 = read_cr4();
2161 cr4 &= ~ *(uint32_t *) p;
2162 write_cr4(cr4);
2163}
2164
2165void
2166clear_in_cr4_on_cpu(int cpu, uint32_t m)
2167{
2168 smp_call_function_single(cpu, _clear_in_cr4_on_cpu, &m, 1);
2169}
2170#endif
2171
2172uint64_t
2173rdtsc(void) {
2174 uint32_t lo, hi;
2175 __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
2176 return ((uint64_t) hi) << 32 | lo;
2177}
2178
2179
2180
2181/*
2182**
2183** Module load/unload logic
2184**
2185*/
2186
2187
2188/*
2189 * Startup job (run by MT thread)
2190 * Intended to handle tasks that cannot impact
2191 * module load status, such as kicking off service
2192 * work queues, etc.
2193 */
2194
2195static void
2196micras_init2(struct work_struct * work)
2197{
2198 /*
2199 * Make MT one-time setup and kick
2200 * off 1 sec timer and SCIF listeners
2201 */
2202 if (! micras_stop) {
2203
2204 INIT_DELAYED_WORK(&micras_wq_tick, micras_mt_tick);
2205 queue_delayed_work(micras_wq, &micras_wq_tick, msecs_to_jiffies(5000));
2206
2207 bitmap_fill(micras_cp_fd, MR_SCIF_MAX);
2208 INIT_DELAYED_WORK(&micras_cp_tkn, micras_cp);
2209 queue_delayed_work(micras_cp_wq, &micras_cp_tkn, 0);
2210
2211 INIT_DELAYED_WORK(&micras_mc_tkn, micras_mc);
2212 queue_delayed_work(micras_mc_wq, &micras_mc_tkn, 0);
2213
2214 INIT_DELAYED_WORK(&micras_ttl_tkn, micras_ttl);
2215 queue_delayed_work(micras_ttl_wq, &micras_ttl_tkn, 0);
2216
2217#if defined(CONFIG_MK1OM) && WA_4845465 && DIE_PROC
2218 if (smc_4845465)
2219 die_pe = proc_create("die", 0644, 0, &proc_die_operations);
2220#endif
2221
2222 printk("RAS.init: module operational\n");
2223 module_put(THIS_MODULE);
2224 }
2225}
2226
2227
2228static int __init
2229micras_init(void)
2230{
2231 int i;
2232 int err;
2233
2234 printk("Loading RAS module ver %s. Build date: %s\n", RAS_VER, __DATE__);
2235
2236 /*
2237 * Create work queue for the monitoring thread
2238 * and pass it some initial work to start with.
2239 */
2240#if defined(CONFIG_MK1OM) && WA_4845465
2241 micras_wq = alloc_workqueue("RAS MT", WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
2242#else
2243 micras_wq = create_singlethread_workqueue("RAS MT");
2244#endif
2245 if (! micras_wq) {
2246 err = -ESRCH;
2247 printk("RAS.init: cannot start work queue, error %d\n", err);
2248 goto fail_wq;
2249 }
2250
2251 /*
2252 * Register top sysfs class (directory) and attach attributes (files)
2253 * beneath it. No 'device's involved.
2254 */
2255 err = class_register(&ras_class);
2256 if (err) {
2257 printk("RAS.init: cannot register class 'micras', error %d\n", err);
2258 goto fail_class;
2259 }
2260
2261 /*
2262 * Setup CP SCIF port in listening mode
2263 */
2264 micras_cp_lstn = scif_open();
2265 if (! micras_cp_lstn) {
2266 printk("RAS.init: cannot get SCIF CP endpoint\n");
2267 goto fail_cp;
2268 }
2269 err = scif_bind(micras_cp_lstn, MR_MON_PORT);
2270 if (err < 0) {
2271 printk("RAS.init: cannot bind SCIF CP endpoint, error %d\n", err);
2272 goto fail_cp_ep;
2273 }
2274 err = scif_listen(micras_cp_lstn, MR_SCIF_MAX);
2275 if (err < 0) {
2276 printk("RAS.init: cannot make SCIF CP listen, error %d\n", err);
2277 goto fail_cp_ep;
2278 }
2279 micras_cp_wq = create_singlethread_workqueue("RAS CP listen");
2280 if (! micras_cp_wq) {
2281 err = -ESRCH;
2282 printk("RAS.init: cannot start CP listener work queue, error %d\n", err);
2283 goto fail_cp_ep;
2284 }
2285
2286 /*
2287 * Setup MC SCIF port in listening mode
2288 */
2289 micras_mc_lstn = scif_open();
2290 if (! micras_mc_lstn) {
2291 printk("RAS.init: cannot get SCIF MC endpoint\n");
2292 goto fail_mc;
2293 }
2294 err = scif_bind(micras_mc_lstn, MR_MCE_PORT);
2295 if (err < 0) {
2296 printk("RAS.init: cannot bind SCIF MC endpoint, error %d\n", err);
2297 goto fail_mc_ep;
2298 }
2299 err = scif_listen(micras_mc_lstn, MR_SCIF_MAX);
2300 if (err < 0) {
2301 printk("RAS.init: cannot make SCIF MC listen, error %d\n", err);
2302 goto fail_mc_ep;
2303 }
2304 micras_mc_wq = create_singlethread_workqueue("RAS MC listen");
2305 if (! micras_mc_wq) {
2306 err = -ESRCH;
2307 printk("RAS.init: cannot start listener work queue, error %d\n", err);
2308 goto fail_mc_ep;
2309 }
2310
2311 /*
2312 * Setup TTL SCIF port in listening mode
2313 */
2314 micras_ttl_lstn = scif_open();
2315 if (! micras_ttl_lstn) {
2316 printk("RAS.init: cannot get SCIF TTL endpoint\n");
2317 goto fail_ttl;
2318 }
2319 err = scif_bind(micras_ttl_lstn, MR_TTL_PORT);
2320 if (err < 0) {
2321 printk("RAS.init: cannot bind SCIF TTL endpoint, error %d\n", err);
2322 goto fail_ttl_ep;
2323 }
2324 err = scif_listen(micras_ttl_lstn, MR_SCIF_MAX);
2325 if (err < 0) {
2326 printk("RAS.init: cannot make SCIF TTL listen, error %d\n", err);
2327 goto fail_ttl_ep;
2328 }
2329 micras_ttl_wq = create_singlethread_workqueue("RAS TTL listen");
2330 if (! micras_ttl_wq) {
2331 err = -ESRCH;
2332 printk("RAS.init: cannot start listener work queue, error %d\n", err);
2333 goto fail_ttl_ep;
2334 }
2335
2336 /*
2337 * Make the MMIO maps we need.
2338 */
2339 micras_sbox = ioremap(SBOX_BASE, COMMON_MMIO_BOX_SIZE);
2340 if (! micras_sbox)
2341 goto fail_iomap;
2342
2343 micras_dbox[0] = ioremap(DBOX0_BASE, COMMON_MMIO_BOX_SIZE);
2344 if (! micras_dbox[0])
2345 goto fail_iomap;
2346
2347#ifdef CONFIG_MK1OM
2348 micras_dbox[1] = ioremap(DBOX1_BASE, COMMON_MMIO_BOX_SIZE);
2349 if (! micras_dbox[1])
2350 goto fail_iomap;
2351#endif
2352
2353 micras_gbox[0] = ioremap(GBOX0_BASE, COMMON_MMIO_BOX_SIZE);
2354 micras_gbox[1] = ioremap(GBOX1_BASE, COMMON_MMIO_BOX_SIZE);
2355 micras_gbox[2] = ioremap(GBOX2_BASE, COMMON_MMIO_BOX_SIZE);
2356 micras_gbox[3] = ioremap(GBOX3_BASE, COMMON_MMIO_BOX_SIZE);
2357 if (!micras_gbox[0] || !micras_gbox[1] ||
2358 !micras_gbox[2] || !micras_gbox[3])
2359 goto fail_iomap;
2360
2361#ifdef CONFIG_MK1OM
2362 micras_gbox[4] = ioremap(GBOX4_BASE, COMMON_MMIO_BOX_SIZE);
2363 micras_gbox[5] = ioremap(GBOX5_BASE, COMMON_MMIO_BOX_SIZE);
2364 micras_gbox[6] = ioremap(GBOX6_BASE, COMMON_MMIO_BOX_SIZE);
2365 micras_gbox[7] = ioremap(GBOX7_BASE, COMMON_MMIO_BOX_SIZE);
2366 if (!micras_gbox[4] || !micras_gbox[5] ||
2367 !micras_gbox[6] || !micras_gbox[7])
2368 goto fail_iomap;
2369#endif
2370
2371#ifdef CONFIG_MK1OM
2372 /*
2373 * Most SKUs don't have TBOXes.
2374 * If not, then don't map to their MMIO space
2375 */
2376 if (mr_txs()) {
2377 micras_tbox[0] = ioremap(TXS0_BASE, COMMON_MMIO_BOX_SIZE);
2378 micras_tbox[1] = ioremap(TXS1_BASE, COMMON_MMIO_BOX_SIZE);
2379 micras_tbox[2] = ioremap(TXS2_BASE, COMMON_MMIO_BOX_SIZE);
2380 micras_tbox[3] = ioremap(TXS3_BASE, COMMON_MMIO_BOX_SIZE);
2381 micras_tbox[4] = ioremap(TXS4_BASE, COMMON_MMIO_BOX_SIZE);
2382 micras_tbox[5] = ioremap(TXS5_BASE, COMMON_MMIO_BOX_SIZE);
2383 micras_tbox[6] = ioremap(TXS6_BASE, COMMON_MMIO_BOX_SIZE);
2384 micras_tbox[7] = ioremap(TXS7_BASE, COMMON_MMIO_BOX_SIZE);
2385 if (!micras_tbox[0] || !micras_tbox[1] ||
2386 !micras_tbox[2] || !micras_tbox[3] ||
2387 !micras_tbox[4] || !micras_tbox[5] ||
2388 !micras_tbox[6] || !micras_tbox[7])
2389 goto fail_iomap;
2390 }
2391#endif
2392
2393 /*
2394 * Setup non-volatile MC error logging device.
2395 */
2396 if (ee_init())
2397 goto fail_iomap;
2398
2399 /*
2400 * Setup core MC event handler.
2401 * If this can't fail, move into micras_wq_init instead.
2402 */
2403 if (mcc_init())
2404 goto fail_ee;
2405
2406 /*
2407 * Setup un-core MC event handler.
2408 * If this can't fail, move into micras_wq_init instead.
2409 */
2410 if (mcu_init())
2411 goto fail_core;
2412
2413 /*
2414 * Prepare MT drivers
2415 */
2416 mr_mt_init();
2417
2418#if defined(CONFIG_MK1OM) && USE_PM
2419 /*
2420 * Setup PM interface
2421 */
2422 if (pm_init())
2423 goto fail_uncore;
2424#endif
2425
2426#if defined(CONFIG_MK1OM) && WA_4845465
2427 /*
2428 * Launch SMC temperature push work.
2429 * Supported by SMC firmware later than 121.11 (build 4511).
2430 */
2431 {
2432 extern int mr_smc_rd(uint8_t, uint32_t *);
2433 int rev, ref;
2434
2435 mr_smc_rd(0x11, &rev);
2436 if (rev) {
2437 ref = PUT_BITS(31, 24, 121) |
2438 PUT_BITS(23, 16, 11) |
2439 PUT_BITS(15, 0, 4511);
2440
2441 if (rev >= ref)
2442 smc_4845465 = rev;
2443 }
2444
2445 if (smc_4845465) {
2446 INIT_DELAYED_WORK(&micras_wq_smc, micras_mt_smc);
2447 queue_delayed_work(micras_wq, &micras_wq_smc, 0);
2448 printk("RAS.init: HSD 4845465 workaround active, fw %x\n", rev);
2449 }
2450 else
2451 printk("RAS.init: SMC too old for HSD 4845465 workaround, fw %x\n", rev);
2452 }
2453#endif
2454
2455 /*
2456 * Launch deferable setup work
2457 */
2458 try_module_get(THIS_MODULE);
2459 INIT_DELAYED_WORK(&micras_wq_init, micras_init2);
2460 queue_delayed_work(micras_wq, &micras_wq_init, msecs_to_jiffies(500));
2461 printk("RAS module load completed\n");
2462 return err;
2463
2464 /*
2465 * Error exits: unwind all setup done so far and return failure
2466 *
2467 *TBD: consider calling exit function. Requires that it can tell
2468 * with certainty what has been setup and what hasn't.
2469 */
2470#if defined(CONFIG_MK1OM) && USE_PM
2471fail_uncore:
2472 mr_mt_exit();
2473 mcu_exit();
2474#endif
2475fail_core:
2476 mcc_exit();
2477fail_ee:
2478#ifdef CONFIG_MK1OM
2479 ee_exit();
2480#endif
2481fail_iomap:
2482 if (micras_sbox)
2483 iounmap(micras_sbox);
2484 for(i = 0; i < ARRAY_SIZE(micras_dbox); i++)
2485 if (micras_dbox[i])
2486 iounmap(micras_dbox[i]);
2487 for(i = 0; i < ARRAY_SIZE(micras_gbox); i++)
2488 if (micras_gbox[i])
2489 iounmap(micras_gbox[i]);
2490#ifdef CONFIG_MK1OM
2491 for(i = 0; i < ARRAY_SIZE(micras_tbox); i++)
2492 if (micras_tbox[i])
2493 iounmap(micras_tbox[i]);
2494#endif
2495
2496 destroy_workqueue(micras_ttl_wq);
2497
2498fail_ttl_ep:
2499 scif_close(micras_ttl_lstn);
2500
2501fail_ttl:
2502 destroy_workqueue(micras_mc_wq);
2503
2504fail_mc_ep:
2505 scif_close(micras_mc_lstn);
2506
2507fail_mc:
2508 destroy_workqueue(micras_cp_wq);
2509
2510fail_cp_ep:
2511 scif_close(micras_cp_lstn);
2512
2513fail_cp:
2514 class_unregister(&ras_class);
2515
2516fail_class:
2517 micras_stop = 1;
2518 flush_workqueue(micras_wq);
2519 destroy_workqueue(micras_wq);
2520
2521fail_wq:
2522 printk("RAS module load failed\n");
2523 return err;
2524}
2525
2526
2527static void __exit
2528micras_exit(void)
2529{
2530 int i;
2531 scif_epd_t ep;
2532
2533 printk("Unloading RAS module\n");
2534 micras_stop = 1;
2535
2536 /*
2537 * Disconnect MC event handlers and
2538 * close the I2C eeprom interfaces.
2539 */
2540 mcu_exit();
2541 mcc_exit();
2542 ee_exit();
2543
2544 /*
2545 * Close SCIF listeners (no more connects).
2546 */
2547 scif_close(micras_cp_lstn);
2548 scif_close(micras_mc_lstn);
2549 scif_close(micras_ttl_lstn);
2550 msleep(10);
2551 destroy_workqueue(micras_cp_wq);
2552 destroy_workqueue(micras_mc_wq);
2553 destroy_workqueue(micras_ttl_wq);
2554
2555 /*
2556 * Terminate active sessions by closing their end points.
2557 * Session threads then should clean up after themselves.
2558 */
2559 for(i = 0; i < MR_SCIF_MAX; i++) {
2560 if (micras_cp_kt[i]) {
2561 printk("RAS.exit: force closing CP session %d\n", i);
2562 ep = (scif_epd_t) atomic64_xchg((atomic64_t *)(micras_cp_ep + i), 0);
2563 if (ep)
2564 scif_close(ep);
2565 }
2566 }
2567 for(i = 0; i < 1000; i++) {
2568 if (bitmap_weight(micras_cp_fd, MR_SCIF_MAX) == MR_SCIF_MAX)
2569 break;
2570 msleep(1);
2571 }
2572 if (micras_mc_kt) {
2573 printk("RAS.exit: force closing MC session\n");
2574 ep = (scif_epd_t) atomic64_xchg((atomic64_t *) &micras_mc_ep, 0);
2575 if (ep)
2576 scif_close(ep);
2577 for(i = 0; (i < 1000) && micras_mc_kt; i++)
2578 msleep(1);
2579 }
2580 if (micras_ttl_kt) {
2581 printk("RAS.exit: force closing TTL session\n");
2582 ep = (scif_epd_t) atomic64_xchg((atomic64_t *) &micras_ttl_ep, 0);
2583 if (ep)
2584 scif_close(ep);
2585 for(i = 0; (i < 1000) && micras_ttl_kt; i++)
2586 msleep(1);
2587 }
2588
2589 /*
2590 * Tear down sysfs class and its nodes
2591 */
2592 class_unregister(&ras_class);
2593
2594#if defined(CONFIG_MK1OM) && USE_PM
2595 /*
2596 * De-register with the PM module.
2597 */
2598 pm_exit();
2599#endif
2600
2601 /*
2602 * Shut down the work queues
2603 */
2604#if defined(CONFIG_MK1OM) && WA_4845465
2605 if (smc_4845465)
2606 cancel_delayed_work(&micras_wq_smc);
2607#endif
2608 cancel_delayed_work(&micras_wq_tick);
2609 cancel_delayed_work(&micras_wq_init);
2610 flush_workqueue(micras_wq);
2611 destroy_workqueue(micras_wq);
2612
2613 /*
2614 * Restore MT state
2615 */
2616 mr_mt_exit();
2617
2618 /*
2619 * Remove MMIO region maps
2620 */
2621 iounmap(micras_sbox);
2622 for(i = 0; i < ARRAY_SIZE(micras_dbox); i++)
2623 if (micras_dbox[i])
2624 iounmap(micras_dbox[i]);
2625 for(i = 0; i < ARRAY_SIZE(micras_gbox); i++)
2626 if (micras_gbox[i])
2627 iounmap(micras_gbox[i]);
2628#ifdef CONFIG_MK1OM
2629 for(i = 0; i < ARRAY_SIZE(micras_tbox); i++)
2630 if (micras_tbox[i])
2631 iounmap(micras_tbox[i]);
2632#endif
2633
2634#if defined(CONFIG_MK1OM) && WA_4845465 && DIE_PROC
2635 if (smc_4845465 && die_pe) {
2636 remove_proc_entry("die", 0);
2637 die_pe = 0;
2638 }
2639#endif
2640
2641 printk("RAS module unload completed\n");
2642}
2643
2644module_init(micras_init);
2645module_exit(micras_exit);
2646
2647MODULE_AUTHOR("Intel Corp. 2013 (" __DATE__ ") ver " RAS_VER);
2648MODULE_DESCRIPTION("RAS and HW monitoring module for MIC");
2649MODULE_LICENSE("GPL");
2650