* Copyright 2010-2017 Intel Corporation.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2,
* as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
* Disclaimer: The codes contained in these modules may be specific to
* the Intel Software Development Platform codenamed Knights Ferry,
* and the Intel product codenamed Knights Corner, and are not backward
* compatible with other Intel products. Additionally, Intel will NOT
* support the codes or instruction set in future products.
* Intel offers no warranty of any kind regarding the code. This code is
* licensed on an "AS IS" basis and Intel is not obligated to provide
* any support, assistance, installation, training, or other services
* of any kind. Intel is also not obligated to provide any updates,
* enhancements or extensions. Intel specifically disclaims any warranty
* of merchantability, non-infringement, fitness for any particular
* purpose, and any other warranty.
* Further, Intel disclaims all liability of any kind, including but
* not limited to liability for infringement of any proprietary rights,
* relating to the use of the code, even if Intel is notified of the
* possibility of such liability. Except as expressly stated in an Intel
* license agreement provided with this code and agreed upon with Intel,
* no license, express or implied, by estoppel or otherwise, to any
* intellectual property rights is granted herein.
#include <linux/reboot.h>
#include <linux/version.h>
#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,34))
#include <linux/pm_qos_params.h>
#include <mic/micscif_smpt.h>
#include <mic/micscif_rb.h>
#include <mic/micscif_intr.h>
//#include <micscif_test.h>
#include <mic/micscif_nodeqp.h>
#include <mic/mic_dma_api.h>
#include <mic/micscif_kmem_cache.h>
/* Include this for suspend/resume notifications from pm driver */
#include <mic/micscif_nm.h>
#define MICPM_DEVEVENT_SUSPEND 1
#define MICPM_DEVEVENT_RESUME 2
#define MICPM_DEVEVENT_FAIL_SUSPEND 3
extern void micpm_device_register(struct notifier_block
*n
);
extern void micpm_device_unregister(struct notifier_block
*n
);
module_param(scif_id
, int, 0400);
MODULE_PARM_DESC(scif_id
, "Set scif driver node ID");
module_param(scif_addr
, ulong
, 0400);
MODULE_PARM_DESC(scif_addr
, "Set scif driver host address");
struct kmem_cache
*unaligned_cache
;
struct device
* m_scifdev
;
int micscif_major
= SCIF_MAJOR
;
struct micscif_info ms_info
;
// MAX MIC cards + 1 for the Host
struct micscif_dev scif_dev
[MAX_BOARD_SUPPORTED
+ 1];
extern mic_dma_handle_t mic_dma_handle
;
static int mic_pm_qos_cpu_dma_lat
= -1;
static int mic_host_numa_node
= -1;
static unsigned long mic_p2p_proxy_thresh
= -1;
static int micscif_devevent_handler(struct notifier_block
*nb
,
if (event
== MICPM_DEVEVENT_SUSPEND
)
return micscif_suspend_handler(nb
, event
, msg
);
else if (event
== MICPM_DEVEVENT_RESUME
)
return micscif_resume_handler(nb
, event
, msg
);
else if (event
== MICPM_DEVEVENT_FAIL_SUSPEND
)
return micscif_fail_suspend_handler(nb
, event
, msg
);
static struct notifier_block mic_deviceevent
= {
.notifier_call
= micscif_devevent_handler
,
static int micscif_open(struct inode
*in
, struct file
*f
)
/* base mic device access for testing */
static int micscif_ioctl(struct inode
*in
, struct file
*f
,
unsigned int cmd
, unsigned long arg
)
return scif_process_ioctl(f
, cmd
, arg
);
static long micscif_unlocked_ioctl(struct file
*f
,
unsigned int cmd
, unsigned long arg
)
return (long) micscif_ioctl(f
->f_path
.dentry
->d_inode
, f
, cmd
, arg
);
static int micscif_release(struct inode
*in
, struct file
*f
)
/* base mic device access for testing */
/* TODO: Need to flush the queue, grab some lock, and probably
* notify the remote node we're going down ... right now, we're
* just freeing things, which is probably a bad idea :-)
static int micscif_uninit_qp(struct micscif_dev
*scifdev
)
/* first, iounmap/unmap/free any memory we mapped */
for (i
= 0; i
< scifdev
->n_qpairs
; i
++) {
iounmap(scifdev
->qpairs
[i
].remote_qp
);
iounmap(scifdev
->qpairs
[i
].outbound_q
.rb_base
);
kfree((void *)scifdev
->qpairs
[i
].inbound_q
.rb_base
);
static int micscif_reboot(struct notifier_block
*notifier
, unsigned long unused1
, void *unused2
);
static struct notifier_block micscif_reboot_notifier
= {
.notifier_call
= micscif_reboot
,
extern struct attribute_group scif_attr_group
;
void micscif_destroy_base(void)
#ifdef CONFIG_MMU_NOTIFIER
destroy_workqueue(ms_info
.mi_mmu_notif_wq
);
destroy_workqueue(ms_info
.mi_misc_wq
);
destroy_workqueue(ms_info
.mi_conn_wq
);
sysfs_remove_group(&micinfo
.m_scifdev
->kobj
, &scif_attr_group
);
device_destroy(micinfo
.m_class
, micinfo
.m_dev
+ 1);
device_destroy(micinfo
.m_class
, micinfo
.m_dev
);
class_destroy(micinfo
.m_class
);
cdev_del(&(micinfo
.m_cdev
));
unregister_chrdev_region(micinfo
.m_dev
, 2);
static void _micscif_exit(void)
struct list_head
*pos
, *unused
;
struct scif_callback
*temp
;
pr_debug("Goodbye SCIF!\n");
/* Cleanup P2P Node Qp/ Interrupt Handlers */
for (i
= SCIF_HOST_NODE
+ 1; i
<= MAX_BOARD_SUPPORTED
; i
++) {
if (is_self_scifdev(dev
))
micscif_cleanup_scifdev(dev
, DESTROY_WQ
);
list_for_each_safe(pos
, unused
, &ms_info
.mi_event_cb
) {
temp
= list_entry(pos
, struct scif_callback
, list_member
);
mutex_destroy(&ms_info
.mi_event_cblock
);
micpm_device_unregister(&mic_deviceevent
);
scif_dev
[ms_info
.mi_nodeid
].sd_state
= SCIFDEV_STOPPING
;
scif_dev
[SCIF_HOST_NODE
].sd_state
= SCIFDEV_STOPPING
;
/* The EXIT message is the last message from MIC to the Host */
* Deliberate infinite wait for a host response during driver
* unload since the host must inform other SCIF nodes about
* this node going away and then only send a response back
* to this node to avoid this nodes host shutdown handler racing
* with disconnection from the SCIF network. There is a timeout
* on the host for sending a response back so a response will
* be sent else the host has crashed.
wait_event(ms_info
.mi_exitwq
,
scif_dev
[ms_info
.mi_nodeid
].sd_state
== SCIFDEV_STOPPED
);
micscif_kmem_cache_destroy();
deregister_scif_intr_handler(&scif_dev
[SCIF_HOST_NODE
]);
destroy_workqueue(scif_dev
[SCIF_HOST_NODE
].sd_intr_wq
);
micscif_destroy_loopback_qp(&scif_dev
[ms_info
.mi_nodeid
]);
close_dma_device(0, &mic_dma_handle
);
micscif_uninit_qp(&scif_dev
[SCIF_HOST_NODE
]);
iounmap(scif_dev
[SCIF_HOST_NODE
].mm_sbox
);
#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,34))
pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY
, "micscif");
static void micscif_exit(void)
unregister_reboot_notifier(&micscif_reboot_notifier
);
static int micscif_reboot(struct notifier_block
*notifier
, unsigned long unused1
, void *unused2
)
struct file_operations micscif_ops
= {
.unlocked_ioctl
= micscif_unlocked_ioctl
,
.release
= micscif_release
,
static char * scif_devnode(struct device
*dev
, mode_t
*mode
)
return kasprintf(GFP_KERNEL
, "mic/%s", dev_name(dev
));
// Setup the base informaiton for the driver. No interface specific code.
static int micscif_setup_base(void)
micinfo
.m_dev
= MKDEV(micscif_major
, micscif_minor
);
result
= register_chrdev_region(micinfo
.m_dev
, 2, "micscif");
result
= alloc_chrdev_region(&micinfo
.m_dev
, micscif_minor
, 2, "micscif");
micscif_major
= MAJOR(micinfo
.m_dev
);
cdev_init(&(micinfo
.m_cdev
), &micscif_ops
);
micinfo
.m_cdev
.owner
= THIS_MODULE
;
if ((result
= cdev_add(&(micinfo
.m_cdev
), micinfo
.m_dev
, 2)))
micinfo
.m_class
= class_create(THIS_MODULE
, "micscif");
if (IS_ERR(micinfo
.m_class
)) {
result
= PTR_ERR(micinfo
.m_class
);
micinfo
.m_class
->devnode
= scif_devnode
;
if (IS_ERR((int *)(result
=
(long int)device_create(micinfo
.m_class
, NULL
, micinfo
.m_dev
, NULL
, "mic")))) {
result
= PTR_ERR((int *)result
);
if (IS_ERR(micinfo
.m_scifdev
=
device_create(micinfo
.m_class
, NULL
, micinfo
.m_dev
+ 1, NULL
, "scif"))) {
result
= PTR_ERR(micinfo
.m_scifdev
);
if ((result
= sysfs_create_group(&micinfo
.m_scifdev
->kobj
, &scif_attr_group
)))
spin_lock_init(&ms_info
.mi_eplock
);
spin_lock_init(&ms_info
.mi_connlock
);
spin_lock_init(&ms_info
.mi_rmalock
);
mutex_init(&ms_info
.mi_fencelock
);
spin_lock_init(&ms_info
.mi_nb_connect_lock
);
INIT_LIST_HEAD(&ms_info
.mi_uaccept
);
INIT_LIST_HEAD(&ms_info
.mi_listen
);
INIT_LIST_HEAD(&ms_info
.mi_zombie
);
INIT_LIST_HEAD(&ms_info
.mi_connected
);
INIT_LIST_HEAD(&ms_info
.mi_disconnected
);
INIT_LIST_HEAD(&ms_info
.mi_rma
);
INIT_LIST_HEAD(&ms_info
.mi_rma_tc
);
INIT_LIST_HEAD(&ms_info
.mi_nb_connect_list
);
#ifdef CONFIG_MMU_NOTIFIER
INIT_LIST_HEAD(&ms_info
.mi_mmu_notif_cleanup
);
INIT_LIST_HEAD(&ms_info
.mi_fence
);
if (!(ms_info
.mi_misc_wq
= create_singlethread_workqueue("SCIF_MISC"))) {
INIT_WORK(&ms_info
.mi_misc_work
, micscif_misc_handler
);
if (!(ms_info
.mi_conn_wq
= create_singlethread_workqueue("SCIF_NB_CONN"))) {
INIT_WORK(&ms_info
.mi_conn_work
, micscif_conn_handler
);
#ifdef CONFIG_MMU_NOTIFIER
if (!(ms_info
.mi_mmu_notif_wq
= create_singlethread_workqueue("SCIF_MMU"))) {
INIT_WORK(&ms_info
.mi_mmu_notif_work
, micscif_mmu_notif_handler
);
ms_info
.mi_watchdog_to
= DEFAULT_WATCHDOG_TO
;
ms_info
.mi_watchdog_enabled
= 0;
ms_info
.mi_watchdog_enabled
= 1;
ms_info
.mi_rma_tc_limit
= SCIF_RMA_TEMP_CACHE_LIMIT
;
ms_info
.mi_proxy_dma_threshold
= mic_p2p_proxy_thresh
;
#ifdef CONFIG_MMU_NOTIFIER
destroy_workqueue(ms_info
.mi_conn_wq
);
destroy_workqueue(ms_info
.mi_misc_wq
);
sysfs_remove_group(&micinfo
.m_scifdev
->kobj
, &scif_attr_group
);
device_destroy(micinfo
.m_class
, micinfo
.m_dev
+ 1);
device_destroy(micinfo
.m_class
, micinfo
.m_dev
);
class_destroy(micinfo
.m_class
);
cdev_del(&(micinfo
.m_cdev
));
unregister_chrdev_region(micinfo
.m_dev
, 2);
#define SBOX_MMIO_LENGTH 0x10000
static int micscif_init(void)
phys_addr_t host_queue_phys
;
phys_addr_t gtt_phys_base
;
pr_debug("HELLO SCIF!\n");
#if defined(CONFIG_ML1OM)
pr_debug("micscif_init(): Hello KNF!\n");
#elif defined(CONFIG_MK1OM)
pr_debug("micscif_init(): Hello KNC!\n");
if (!scif_id
|| !scif_addr
) {
printk(KERN_ERR
"%s %d scif_id 0x%x scif_addr 0x%lx"
"not provided as module parameter. Fail module load",
__func__
, __LINE__
, scif_id
, scif_addr
);
for (i
= 1; i
<= MAX_BOARD_SUPPORTED
; i
++) {
scif_dev
[i
].sd_state
= SCIFDEV_INIT
;
scif_dev
[i
].sd_numa_node
= -1;
mutex_init (&scif_dev
[i
].sd_lock
);
init_waitqueue_head(&scif_dev
[i
].sd_mmap_wq
);
init_waitqueue_head(&scif_dev
[i
].sd_wq
);
init_waitqueue_head(&scif_dev
[i
].sd_p2p_wq
);
INIT_DELAYED_WORK(&scif_dev
[i
].sd_p2p_dwork
,
scif_dev
[i
].sd_p2p_retry
= 0;
// Setup the host node access information
// Initially only talks to the host => node 0
scif_dev
[SCIF_HOST_NODE
].sd_node
= SCIF_HOST_NODE
;
scif_dev
[SCIF_HOST_NODE
].sd_state
= SCIFDEV_RUNNING
;
if (!(scif_dev
[SCIF_HOST_NODE
].mm_sbox
=
ioremap_nocache(SBOX_BASE
, SBOX_MMIO_LENGTH
))) {
scif_dev
[SCIF_HOST_NODE
].scif_ref_cnt
= (atomic_long_t
) ATOMIC_LONG_INIT(0);
scif_dev
[SCIF_HOST_NODE
].scif_map_ref_cnt
= 0;
init_waitqueue_head(&scif_dev
[SCIF_HOST_NODE
].sd_wq
);
init_waitqueue_head(&scif_dev
[SCIF_HOST_NODE
].sd_mmap_wq
);
mutex_init(&scif_dev
[SCIF_HOST_NODE
].sd_lock
);
gtt_phys_base
= readl(scif_dev
[SCIF_HOST_NODE
].mm_sbox
+ SBOX_GTT_PHY_BASE
);
gtt_phys_base
*= ((4) * 1024);
pr_debug("GTT PHY BASE in GDDR 0x%llx\n", gtt_phys_base
);
pr_debug("micscif_init(): gtt_phy_base x%llx\n", gtt_phys_base
);
/* Get handle to DMA device */
if ((result
= open_dma_device(0, 0, &mic_dma_handle
)))
ms_info
.mi_nodeid
= scif_id
;
ms_info
.mi_maxid
= scif_id
;
ms_info
.mi_total
= 2; // Host plus this card
ms_info
.rma_unaligned_cpu_cnt
= (atomic_long_t
) ATOMIC_LONG_INIT(0);
ms_info
.rma_alloc_cnt
= (atomic_long_t
) ATOMIC_LONG_INIT(0);
ms_info
.rma_pin_cnt
= (atomic_long_t
) ATOMIC_LONG_INIT(0);
#ifdef CONFIG_MMU_NOTIFIER
ms_info
.mmu_notif_cnt
= (atomic_long_t
) ATOMIC_LONG_INIT(0);
pr_debug("micscif_init(): setup_card_qp \n");
host_queue_phys
= scif_addr
;
mutex_init(&ms_info
.mi_event_cblock
);
mutex_init(&ms_info
.mi_conflock
);
INIT_LIST_HEAD(&ms_info
.mi_event_cb
);
pr_debug("micscif_init(): setup_interrupts \n");
* Set up the workqueue thread for interrupt handling
if ((result
= micscif_setup_interrupts(&scif_dev
[SCIF_HOST_NODE
])))
pr_debug("micscif_init(): host_intr_handler \n");
if ((result
= micscif_setup_card_qp(host_queue_phys
, &scif_dev
[SCIF_HOST_NODE
]))) {
/* need to do this last -- as soon as the dev is setup, userspace
* can try to use the device
pr_debug("micscif_init(): setup_base \n");
if ((result
= micscif_setup_base()))
if ((result
= register_scif_intr_handler(&scif_dev
[SCIF_HOST_NODE
])))
// Setup information for self aka loopback.
scif_dev
[ms_info
.mi_nodeid
].sd_node
= ms_info
.mi_nodeid
;
scif_dev
[ms_info
.mi_nodeid
].sd_numa_node
= mic_host_numa_node
;
scif_dev
[ms_info
.mi_nodeid
].mm_sbox
= scif_dev
[SCIF_HOST_NODE
].mm_sbox
;
scif_dev
[ms_info
.mi_nodeid
].scif_ref_cnt
= (atomic_long_t
) ATOMIC_LONG_INIT(0);
scif_dev
[ms_info
.mi_nodeid
].scif_map_ref_cnt
= 0;
init_waitqueue_head(&scif_dev
[ms_info
.mi_nodeid
].sd_wq
);
init_waitqueue_head(&scif_dev
[ms_info
.mi_nodeid
].sd_mmap_wq
);
mutex_init(&scif_dev
[ms_info
.mi_nodeid
].sd_lock
);
if ((result
= micscif_setup_loopback_qp(&scif_dev
[ms_info
.mi_nodeid
])))
scif_dev
[ms_info
.mi_nodeid
].sd_state
= SCIFDEV_RUNNING
;
unaligned_cache
= micscif_kmem_cache_create();
pr_debug("micscif_init(): Setup successful: 0x%llx \n", host_queue_phys
);
micpm_device_register(&mic_deviceevent
);
if ((result
= register_reboot_notifier(&micscif_reboot_notifier
)))
#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,34))
result
= pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY
, "micscif", mic_pm_qos_cpu_dma_lat
);
printk("%s %d mic_pm_qos_cpu_dma_lat %d result %d\n",
__func__
, __LINE__
, mic_pm_qos_cpu_dma_lat
, result
);
/* Dont fail driver load due to PM QoS API. Fall through */
micpm_device_unregister(&mic_deviceevent
);
micscif_kmem_cache_destroy();
micscif_destroy_loopback_qp(&scif_dev
[ms_info
.mi_nodeid
]);
deregister_scif_intr_handler(&scif_dev
[SCIF_HOST_NODE
]);
pr_debug("Unable to finish scif setup for some reason: %d\n", result
);
micscif_uninit_qp(&scif_dev
[SCIF_HOST_NODE
]);
micscif_destroy_interrupts(&scif_dev
[SCIF_HOST_NODE
]);
close_dma_device(0, &mic_dma_handle
);
iounmap(scif_dev
[SCIF_HOST_NODE
].mm_sbox
);
module_init(micscif_init
);
module_exit(micscif_exit
);
module_param_named(huge_page
, mic_huge_page_enable
, bool, 0600);
MODULE_PARM_DESC(huge_page
, "SCIF Huge Page Support");
module_param_named(ulimit
, mic_ulimit_check
, bool, 0600);
MODULE_PARM_DESC(ulimit
, "SCIF ulimit check");
module_param_named(reg_cache
, mic_reg_cache_enable
, bool, 0600);
MODULE_PARM_DESC(reg_cache
, "SCIF registration caching");
module_param_named(p2p
, mic_p2p_enable
, bool, 0600);
MODULE_PARM_DESC(p2p
, "SCIF peer-to-peer");
module_param_named(p2p_proxy
, mic_p2p_proxy_enable
, bool, 0600);
MODULE_PARM_DESC(p2p_proxy
, "SCIF peer-to-peer proxy DMA support");
module_param_named(pm_qos_cpu_dma_lat
, mic_pm_qos_cpu_dma_lat
, int, 0600);
MODULE_PARM_DESC(pm_qos_cpu_dma_lat
, "PM QoS CPU DMA latency in usecs.");
module_param_named(numa_node
, mic_host_numa_node
, int, 0600);
MODULE_PARM_DESC(numa_node
, "Host Numa node to which MIC is attached");
module_param_named(p2p_proxy_thresh
, mic_p2p_proxy_thresh
, ulong
, 0600);
MODULE_PARM_DESC(numa_node
, "Transfer size after which Proxy DMA helps DMA perf");
MODULE_INFO(build_number
, BUILD_NUMBER
);
MODULE_INFO(build_bywhom
, BUILD_BYWHOM
);
MODULE_INFO(build_ondate
, BUILD_ONDATE
);
MODULE_INFO(build_scmver
, BUILD_SCMVER
);