* Copyright 2010-2017 Intel Corporation.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2,
* as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
* Disclaimer: The codes contained in these modules may be specific to
* the Intel Software Development Platform codenamed Knights Ferry,
* and the Intel product codenamed Knights Corner, and are not backward
* compatible with other Intel products. Additionally, Intel will NOT
* support the codes or instruction set in future products.
* Intel offers no warranty of any kind regarding the code. This code is
* licensed on an "AS IS" basis and Intel is not obligated to provide
* any support, assistance, installation, training, or other services
* of any kind. Intel is also not obligated to provide any updates,
* enhancements or extensions. Intel specifically disclaims any warranty
* of merchantability, non-infringement, fitness for any particular
* purpose, and any other warranty.
* Further, Intel disclaims all liability of any kind, including but
* not limited to liability for infringement of any proprietary rights,
* relating to the use of the code, even if Intel is notified of the
* possibility of such liability. Except as expressly stated in an Intel
* license agreement provided with this code and agreed upon with Intel,
* no license, express or implied, by estoppel or otherwise, to any
* intellectual property rights is granted herein.
#include "mic/micscif_smpt.h"
#include "mic/mic_dma_api.h"
#include "mic/micscif_kmem_cache.h"
#ifdef CONFIG_MMU_NOTIFIER
#include <linux/mmu_notifier.h>
#include <linux/highmem.h>
#include "mic/micscif_map.h"
* Insert a temp window to the temp registration list sorted by va_for_temp.
void micscif_insert_tcw(struct reg_range_t
*window
,
struct reg_range_t
*curr
= NULL
, *prev
= NULL
;
INIT_LIST_HEAD(&window
->list_member
);
* Hack for worst case performance
* Compare with tail and if the new entry is new tail add it to the end
curr
= list_entry(head
->prev
, struct reg_range_t
, list_member
);
if ((uint64_t) curr
->va_for_temp
< (uint64_t) window
->va_for_temp
) {
list_add_tail(&window
->list_member
, head
);
* We don't need the if(!prev) code but I am gonna leave it as
* is for now. If someone touches the above code it is likely that they
* will miss that they have to add if(!prev) block
list_for_each(item
, head
) {
curr
= list_entry(item
, struct reg_range_t
, list_member
);
if ((uint64_t) curr
->va_for_temp
> (uint64_t) window
->va_for_temp
)
list_add(&window
->list_member
, head
);
list_add(&window
->list_member
, &prev
->list_member
);
* Insert a window to the self registration list sorted by offset.
void micscif_insert_window(struct reg_range_t
*window
, struct list_head
*head
)
struct reg_range_t
*curr
= NULL
, *prev
= NULL
;
INIT_LIST_HEAD(&window
->list_member
);
list_for_each(item
, head
) {
curr
= list_entry(item
, struct reg_range_t
, list_member
);
if (curr
->offset
> window
->offset
)
list_add(&window
->list_member
, head
);
list_add(&window
->list_member
, &prev
->list_member
);
* Query the temp cached registration list of ep and check if a valid contiguous
* range of windows exist.
* If there is a partial overlap, delete the existing window and create a new one
* that encompasses the previous window and a new range
int micscif_query_tcw(struct endpt
*ep
, struct micscif_rma_req
*req
)
struct list_head
*item
, *temp
;
struct reg_range_t
*window
;
uint64_t start_va_window
, start_va_req
= (uint64_t) req
->va_for_temp
;
uint64_t end_va_window
, end_va_req
= start_va_req
+ req
->nr_bytes
;
* Hack for the worst case scenario
* Avoid traversing the entire list to find out that there is no
if (!list_empty(req
->head
)) {
window
= list_entry(temp
,
struct reg_range_t
, list_member
);
end_va_window
= (uint64_t) window
->va_for_temp
+
(window
->nr_pages
<< PAGE_SHIFT
);
if (start_va_req
> end_va_window
)
list_for_each_safe(item
, temp
, req
->head
) {
window
= list_entry(item
,
struct reg_range_t
, list_member
);
start_va_window
= (uint64_t) window
->va_for_temp
;
end_va_window
= (uint64_t) window
->va_for_temp
+
(window
->nr_pages
<< PAGE_SHIFT
);
pr_debug("%s %d start_va_window 0x%llx end_va_window 0x%llx"
" start_va_req 0x%llx end_va_req 0x%llx req->nr_bytes 0x%lx\n",
__func__
, __LINE__
, start_va_window
, end_va_window
,
start_va_req
, end_va_req
, req
->nr_bytes
);
if (start_va_req
< start_va_window
) {
if (end_va_req
< start_va_window
) {
if ((window
->prot
& req
->prot
) != req
->prot
) {
req
->nr_bytes
+= ((end_va_req
> end_va_window
) ? 0:(end_va_window
- end_va_req
));
pr_debug("%s %d Extend req->va_for_temp %p req->nr_byte 0x%lx\n",
__func__
, __LINE__
, req
->va_for_temp
, req
->nr_bytes
);
__micscif_rma_destroy_tcw_helper(window
);
if (start_va_req
> end_va_window
) {
if ((window
->prot
& req
->prot
) != req
->prot
) {
__micscif_rma_destroy_tcw_helper(window
);
if (end_va_req
> end_va_window
) {
req
->va_for_temp
= (void*) start_va_window
;
req
->nr_bytes
= end_va_req
- start_va_window
;
pr_debug("%s %d Extend req->va_for_temp %p req->nr_byte 0x%lx\n",
__func__
, __LINE__
, req
->va_for_temp
, req
->nr_bytes
);
__micscif_rma_destroy_tcw_helper(window
);
*(req
->out_window
) = window
;
pr_debug("%s %d ENXIO\n", __func__
, __LINE__
);
* Query the registration list and check if a valid contiguous
* range of windows exist.
int micscif_query_window(struct micscif_rma_req
*req
)
struct reg_range_t
*window
;
uint64_t end_offset
, offset
= req
->offset
;
uint64_t tmp_min
, nr_bytes_left
= req
->nr_bytes
;
list_for_each(item
, req
->head
) {
window
= list_entry(item
,
struct reg_range_t
, list_member
);
end_offset
= window
->offset
+
(window
->nr_pages
<< PAGE_SHIFT
);
if (offset
< window
->offset
)
if (offset
< end_offset
) {
/* Check read/write protections. */
if ((window
->prot
& req
->prot
) != req
->prot
)
if (nr_bytes_left
== req
->nr_bytes
)
/* Store the first window */
*(req
->out_window
) = window
;
tmp_min
= min(end_offset
- offset
, nr_bytes_left
);
nr_bytes_left
-= tmp_min
;
* Range requested encompasses
* multiple windows contiguously.
/* Done for partial window */
if (req
->type
== WINDOW_PARTIAL
||
req
->type
== WINDOW_SINGLE
)
/* Extra logic for full windows */
if (offset
== end_offset
)
/* Spanning multiple whole windows */
/* Not spanning multiple whole windows */
if (req
->type
== WINDOW_SINGLE
)
printk(KERN_ERR
"%s %d ENXIO\n", __func__
, __LINE__
);
* Traverse the remote registration list starting from start_window:
* 1) Check read/write protections.
* 2) Create VtoP mappings via remap_pfn_range(..)
* 3) Once step 1) and 2) complete successfully then traverse the range of
* windows again and bump the reference count.
int micscif_rma_list_mmap(struct reg_range_t
*start_window
,
uint64_t offset
, int nr_pages
, struct vm_area_struct
*vma
)
struct list_head
*item
, *head
;
uint64_t end_offset
, loop_offset
= offset
;
struct reg_range_t
*window
;
int64_t start_page_nr
, loop_nr_pages
, nr_pages_left
= nr_pages
;
struct endpt
*ep
= (struct endpt
*)start_window
->ep
;
BUG_ON(!mutex_is_locked(&ep
->rma_info
.rma_lock
));
/* Start traversing from the previous link in the list */
head
= ((&start_window
->list_member
))->prev
;
list_for_each(item
, head
) {
window
= list_entry(item
, struct reg_range_t
,
end_offset
= window
->offset
+
(window
->nr_pages
<< PAGE_SHIFT
);
start_page_nr
= (loop_offset
- window
->offset
) >> PAGE_SHIFT
;
loop_nr_pages
= min((int64_t)((end_offset
- loop_offset
) >> PAGE_SHIFT
),
for (i
= (int)start_page_nr
;
i
< ((int)start_page_nr
+ (int)loop_nr_pages
); i
++, j
++) {
#if !defined(_MIC_SCIF_) && defined(CONFIG_ML1OM)
is_self_scifdev(ep
->remote_dev
) ?
micscif_get_dma_addr(window
, loop_offset
,
NULL
, NULL
, NULL
) : window
->phys_addr
[i
];
get_phys_addr(micscif_get_dma_addr(window
, loop_offset
,
NULL
, NULL
, NULL
), ep
->remote_dev
);
* 1) remap_pfn_rnage returns an error if there is an
* attempt to create MAP_PRIVATE COW mappings.
if ((err
= remap_pfn_range(vma
,
((vma
)->vm_start
) + (j
* PAGE_SIZE
),
loop_offset
+= PAGE_SIZE
;
nr_pages_left
-= loop_nr_pages
;
* No more failures expected. Bump up the ref count for all
* the windows. Another traversal from start_window required
* for handling errors encountered across windows during
nr_pages_left
= nr_pages
;
head
= (&(start_window
->list_member
))->prev
;
list_for_each(item
, head
) {
window
= list_entry(item
, struct reg_range_t
,
end_offset
= window
->offset
+
(window
->nr_pages
<< PAGE_SHIFT
);
start_page_nr
= (loop_offset
- window
->offset
) >> PAGE_SHIFT
;
loop_nr_pages
= min((int64_t)((end_offset
- loop_offset
) >> PAGE_SHIFT
),
get_window_ref_count(window
, loop_nr_pages
);
nr_pages_left
-= loop_nr_pages
;
loop_offset
+= (loop_nr_pages
<< PAGE_SHIFT
);
printk(KERN_ERR
"%s %d err %d\n", __func__
, __LINE__
, err
);
* micscif_rma_list_munmap:
* Traverse the remote registration list starting from window:
* 1) Decrement ref count.
* 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer.
void micscif_rma_list_munmap(struct reg_range_t
*start_window
,
uint64_t offset
, int nr_pages
)
struct list_head
*item
, *tmp
, *head
;
uint64_t loop_offset
= offset
, end_offset
;
int64_t loop_nr_pages
, nr_pages_left
= nr_pages
;
struct endpt
*ep
= (struct endpt
*)start_window
->ep
;
struct reg_range_t
*window
;
BUG_ON(!mutex_is_locked(&ep
->rma_info
.rma_lock
));
nr_pages_left
= nr_pages
;
/* Start traversing from the previous link in the list */
head
= (&(start_window
->list_member
))->prev
;
list_for_each_safe(item
, tmp
, head
) {
window
= list_entry(item
, struct reg_range_t
,
end_offset
= window
->offset
+
(window
->nr_pages
<< PAGE_SHIFT
);
loop_nr_pages
= min((int64_t)((end_offset
- loop_offset
) >> PAGE_SHIFT
),
put_window_ref_count(window
, loop_nr_pages
);
if (!window
->ref_count
) {
drain_dma_intr(ep
->rma_info
.dma_chan
);
/* Inform the peer about this munmap */
msg
.payload
[0] = window
->peer_window
;
/* No error handling for Notification messages. */
micscif_nodeqp_send(ep
->remote_dev
, &msg
, ep
);
list_del(&window
->list_member
);
/* Destroy this window from the peer's registered AS */
micscif_destroy_remote_window(ep
, window
);
nr_pages_left
-= loop_nr_pages
;
loop_offset
+= (loop_nr_pages
<< PAGE_SHIFT
);
* micscif_rma_list_unregister:
* Traverse the self registration list starting from window:
* 1) Call micscif_unregister_window(..)
int micscif_rma_list_unregister(struct reg_range_t
*window
,
uint64_t offset
, int nr_pages
)
struct list_head
*item
, *tmp
, *head
;
struct endpt
*ep
= (struct endpt
*)window
->ep
;
BUG_ON(!mutex_is_locked(&ep
->rma_info
.rma_lock
));
/* Start traversing from the previous link in the list */
head
= (&window
->list_member
)->prev
;
list_for_each_safe(item
, tmp
, head
) {
window
= list_entry(item
, struct reg_range_t
,
end_offset
= window
->offset
+
(window
->nr_pages
<< PAGE_SHIFT
);
loop_nr_pages
= min((int)((end_offset
- offset
) >> PAGE_SHIFT
),
if ((err
= micscif_unregister_window(window
)))
nr_pages
-= (int)loop_nr_pages
;
offset
+= (loop_nr_pages
<< PAGE_SHIFT
);
* micscif_unregister_all_window:
* Traverse all the windows in the self registration list and:
* 1) Call micscif_unregister_window(..)
int micscif_unregister_all_windows(scif_epd_t epd
)
struct list_head
*item
, *tmp
;
struct reg_range_t
*window
;
struct endpt
*ep
= (struct endpt
*)epd
;
struct list_head
*head
= &ep
->rma_info
.reg_list
;
queue_work(ms_info
.mi_misc_wq
, &ms_info
.mi_misc_work
);
mutex_lock(&ep
->rma_info
.rma_lock
);
list_for_each_safe(item
, tmp
, head
) {
window
= list_entry(item
,
struct reg_range_t
, list_member
);
ep
->rma_info
.async_list_del
= 0;
if ((err
= micscif_unregister_window(window
)))
pr_debug("%s %d err %d\n",
__func__
, __LINE__
, err
);
* Need to restart list traversal if there has been
* an asynchronous list entry deletion.
if (ep
->rma_info
.async_list_del
)
mutex_unlock(&ep
->rma_info
.rma_lock
);
* The following waits cannot be interruptible since they are
* from the driver release() entry point.
err
= wait_event_timeout(ep
->rma_info
.fence_wq
,
!ep
->rma_info
.fence_refcount
, NODE_ALIVE_TIMEOUT
);
/* Timeout firing is unexpected. Is the DMA engine hung? */
printk(KERN_ERR
"%s %d err %d\n", __func__
, __LINE__
, err
);
#ifdef CONFIG_MMU_NOTIFIER
if (!list_empty(&ep
->rma_info
.mmn_list
)) {
spin_lock(&ms_info
.mi_rmalock
);
list_add_tail(&ep
->mmu_list
, &ms_info
.mi_mmu_notif_cleanup
);
spin_unlock(&ms_info
.mi_rmalock
);
queue_work(ms_info
.mi_mmu_notif_wq
, &ms_info
.mi_mmu_notif_work
);
* micscif_rma_list_get_pages_check:
* Traverse the remote registration list and return 0 if all the
* scif_get_pages()/scif_put_pages() ref_counts are zero else return -1.
int micscif_rma_list_get_pages_check(struct endpt
*ep
)
struct list_head
*item
, *head
= &ep
->rma_info
.remote_reg_list
;
struct reg_range_t
*window
;
mutex_lock(&ep
->rma_info
.rma_lock
);
list_for_each(item
, head
) {
window
= list_entry(item
,
struct reg_range_t
, list_member
);
if (window
->get_put_ref_count
) {
mutex_unlock(&ep
->rma_info
.rma_lock
);
/* Only debug API's below */
void micscif_display_all_windows(struct list_head
*head
)
struct reg_range_t
*window
;
pr_debug("\nWindow List Start\n");
list_for_each(item
, head
) {
window
= list_entry(item
,
struct reg_range_t
, list_member
);
micscif_display_window(window
, __func__
, __LINE__
);
pr_debug("Window List End\n\n");