Updated `README.md` with instructions for building/using the kernel module.
[xeon-phi-kernel-module] / micscif / micscif_rma_list.c
CommitLineData
800f879a
AT
1/*
2 * Copyright 2010-2017 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2,
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * Disclaimer: The codes contained in these modules may be specific to
14 * the Intel Software Development Platform codenamed Knights Ferry,
15 * and the Intel product codenamed Knights Corner, and are not backward
16 * compatible with other Intel products. Additionally, Intel will NOT
17 * support the codes or instruction set in future products.
18 *
19 * Intel offers no warranty of any kind regarding the code. This code is
20 * licensed on an "AS IS" basis and Intel is not obligated to provide
21 * any support, assistance, installation, training, or other services
22 * of any kind. Intel is also not obligated to provide any updates,
23 * enhancements or extensions. Intel specifically disclaims any warranty
24 * of merchantability, non-infringement, fitness for any particular
25 * purpose, and any other warranty.
26 *
27 * Further, Intel disclaims all liability of any kind, including but
28 * not limited to liability for infringement of any proprietary rights,
29 * relating to the use of the code, even if Intel is notified of the
30 * possibility of such liability. Except as expressly stated in an Intel
31 * license agreement provided with this code and agreed upon with Intel,
32 * no license, express or implied, by estoppel or otherwise, to any
33 * intellectual property rights is granted herein.
34 */
35
36#include "mic/micscif.h"
37#include "mic/micscif_smpt.h"
38#include "mic/mic_dma_api.h"
39#include "mic/micscif_kmem_cache.h"
40#ifdef CONFIG_MMU_NOTIFIER
41#include <linux/mmu_notifier.h>
42#include <linux/highmem.h>
43#endif
44#ifndef _MIC_SCIF_
45#include "mic_common.h"
46#endif
47#include "mic/micscif_map.h"
48
49/*
50 * micscif_insert_tcw:
51 *
52 * Insert a temp window to the temp registration list sorted by va_for_temp.
53 * RMA lock must be held.
54 */
55void micscif_insert_tcw(struct reg_range_t *window,
56 struct list_head *head)
57{
58 struct reg_range_t *curr = NULL, *prev = NULL;
59 struct list_head *item;
60 BUG_ON(!window);
61 INIT_LIST_HEAD(&window->list_member);
62 /*
63 * HSD 4845254
64 * Hack for worst case performance
65 * Compare with tail and if the new entry is new tail add it to the end
66 */
67 if (!list_empty(head)) {
68 curr = list_entry(head->prev, struct reg_range_t, list_member);
69 if ((uint64_t) curr->va_for_temp < (uint64_t) window->va_for_temp) {
70 list_add_tail(&window->list_member, head);
71 return;
72 }
73 }
74 /*
75 * We don't need the if(!prev) code but I am gonna leave it as
76 * is for now. If someone touches the above code it is likely that they
77 * will miss that they have to add if(!prev) block
78 */
79 list_for_each(item, head) {
80 curr = list_entry(item, struct reg_range_t, list_member);
81 if ((uint64_t) curr->va_for_temp > (uint64_t) window->va_for_temp)
82 break;
83 prev = curr;
84 }
85 if (!prev)
86 list_add(&window->list_member, head);
87 else
88 list_add(&window->list_member, &prev->list_member);
89}
90/*
91 * micscif_insert_window:
92 *
93 * Insert a window to the self registration list sorted by offset.
94 * RMA lock must be held.
95 */
96void micscif_insert_window(struct reg_range_t *window, struct list_head *head)
97{
98 struct reg_range_t *curr = NULL, *prev = NULL;
99 struct list_head *item;
100 BUG_ON(!window);
101 INIT_LIST_HEAD(&window->list_member);
102 list_for_each(item, head) {
103 curr = list_entry(item, struct reg_range_t, list_member);
104 if (curr->offset > window->offset)
105 break;
106 prev = curr;
107 }
108 if (!prev)
109 list_add(&window->list_member, head);
110 else
111 list_add(&window->list_member, &prev->list_member);
112}
113
114/*
115 * micscif_query_tcw:
116 *
117 * Query the temp cached registration list of ep and check if a valid contiguous
118 * range of windows exist.
119 * If there is a partial overlap, delete the existing window and create a new one
120 * that encompasses the previous window and a new range
121 * RMA lock must be held.
122 */
123int micscif_query_tcw(struct endpt *ep, struct micscif_rma_req *req)
124{
125 struct list_head *item, *temp;
126 struct reg_range_t *window;
127 uint64_t start_va_window, start_va_req = (uint64_t) req->va_for_temp;
128 uint64_t end_va_window, end_va_req = start_va_req + req->nr_bytes;
129
130 /*
131 * HSD 4845254
132 * Hack for the worst case scenario
133 * Avoid traversing the entire list to find out that there is no
134 * entry that matches
135 */
136 if (!list_empty(req->head)) {
137 temp = req->head->prev;
138 window = list_entry(temp,
139 struct reg_range_t, list_member);
140 end_va_window = (uint64_t) window->va_for_temp +
141 (window->nr_pages << PAGE_SHIFT);
142 if (start_va_req > end_va_window)
143 return -ENXIO;
144 }
145 list_for_each_safe(item, temp, req->head) {
146 window = list_entry(item,
147 struct reg_range_t, list_member);
148 start_va_window = (uint64_t) window->va_for_temp;
149 end_va_window = (uint64_t) window->va_for_temp +
150 (window->nr_pages << PAGE_SHIFT);
151 pr_debug("%s %d start_va_window 0x%llx end_va_window 0x%llx"
152 " start_va_req 0x%llx end_va_req 0x%llx req->nr_bytes 0x%lx\n",
153 __func__, __LINE__, start_va_window, end_va_window,
154 start_va_req, end_va_req, req->nr_bytes);
155 if (start_va_req < start_va_window) {
156 if (end_va_req < start_va_window) {
157 /* No overlap */
158 } else {
159 if ((window->prot & req->prot) != req->prot) {
160
161 } else {
162 req->nr_bytes += ((end_va_req > end_va_window) ? 0:(end_va_window - end_va_req));
163 pr_debug("%s %d Extend req->va_for_temp %p req->nr_byte 0x%lx\n",
164 __func__, __LINE__, req->va_for_temp, req->nr_bytes);
165 }
166 __micscif_rma_destroy_tcw_helper(window);
167 }
168 break;
169 } else {
170 if (start_va_req > end_va_window) {
171 /* No overlap */
172 continue;
173 } else {
174 if ((window->prot & req->prot) != req->prot) {
175 __micscif_rma_destroy_tcw_helper(window);
176 break;
177 }
178 if (end_va_req > end_va_window) {
179 req->va_for_temp = (void*) start_va_window;
180 req->nr_bytes = end_va_req - start_va_window;
181 pr_debug("%s %d Extend req->va_for_temp %p req->nr_byte 0x%lx\n",
182 __func__, __LINE__, req->va_for_temp, req->nr_bytes);
183 __micscif_rma_destroy_tcw_helper(window);
184 return -ENXIO;
185 } else {
186 *(req->out_window) = window;
187 return 0;
188 }
189 }
190 }
191 }
192 pr_debug("%s %d ENXIO\n", __func__, __LINE__);
193 return -ENXIO;
194}
195
196/*
197 * micscif_query_window:
198 *
199 * Query the registration list and check if a valid contiguous
200 * range of windows exist.
201 * RMA lock must be held.
202 */
203int micscif_query_window(struct micscif_rma_req *req)
204{
205 struct list_head *item;
206 struct reg_range_t *window;
207 uint64_t end_offset, offset = req->offset;
208 uint64_t tmp_min, nr_bytes_left = req->nr_bytes;
209
210 list_for_each(item, req->head) {
211 window = list_entry(item,
212 struct reg_range_t, list_member);
213 end_offset = window->offset +
214 (window->nr_pages << PAGE_SHIFT);
215 if (offset < window->offset)
216 /* Offset not found! */
217 return -ENXIO;
218 if (offset < end_offset) {
219 /* Check read/write protections. */
220 if ((window->prot & req->prot) != req->prot)
221 return -EPERM;
222 if (nr_bytes_left == req->nr_bytes)
223 /* Store the first window */
224 *(req->out_window) = window;
225 tmp_min = min(end_offset - offset, nr_bytes_left);
226 nr_bytes_left -= tmp_min;
227 offset += tmp_min;
228 /*
229 * Range requested encompasses
230 * multiple windows contiguously.
231 */
232 if (!nr_bytes_left) {
233 /* Done for partial window */
234 if (req->type == WINDOW_PARTIAL ||
235 req->type == WINDOW_SINGLE)
236 return 0;
237 /* Extra logic for full windows */
238 if (offset == end_offset)
239 /* Spanning multiple whole windows */
240 return 0;
241 /* Not spanning multiple whole windows */
242 return -ENXIO;
243 }
244 if (req->type == WINDOW_SINGLE)
245 break;
246 }
247 }
248 printk(KERN_ERR "%s %d ENXIO\n", __func__, __LINE__);
249 return -ENXIO;
250}
251
252/*
253 * micscif_rma_list_mmap:
254 *
255 * Traverse the remote registration list starting from start_window:
256 * 1) Check read/write protections.
257 * 2) Create VtoP mappings via remap_pfn_range(..)
258 * 3) Once step 1) and 2) complete successfully then traverse the range of
259 * windows again and bump the reference count.
260 * RMA lock must be held.
261 */
262int micscif_rma_list_mmap(struct reg_range_t *start_window,
263 uint64_t offset, int nr_pages, struct vm_area_struct *vma)
264{
265 struct list_head *item, *head;
266 uint64_t end_offset, loop_offset = offset;
267 struct reg_range_t *window;
268 int64_t start_page_nr, loop_nr_pages, nr_pages_left = nr_pages;
269 struct endpt *ep = (struct endpt *)start_window->ep;
270 int i, err = 0;
271 uint64_t j =0;
272 dma_addr_t phys_addr;
273
274 might_sleep();
275 BUG_ON(!mutex_is_locked(&ep->rma_info.rma_lock));
276
277 /* Start traversing from the previous link in the list */
278 head = ((&start_window->list_member))->prev;
279 list_for_each(item, head) {
280 window = list_entry(item, struct reg_range_t,
281 list_member);
282 end_offset = window->offset +
283 (window->nr_pages << PAGE_SHIFT);
284 start_page_nr = (loop_offset - window->offset) >> PAGE_SHIFT;
285 loop_nr_pages = min((int64_t)((end_offset - loop_offset) >> PAGE_SHIFT),
286 nr_pages_left);
287 for (i = (int)start_page_nr;
288 i < ((int)start_page_nr + (int)loop_nr_pages); i++, j++) {
289
290 phys_addr =
291#if !defined(_MIC_SCIF_) && defined(CONFIG_ML1OM)
292 is_self_scifdev(ep->remote_dev) ?
293 micscif_get_dma_addr(window, loop_offset,
294 NULL, NULL, NULL) : window->phys_addr[i];
295#else
296 get_phys_addr(micscif_get_dma_addr(window, loop_offset,
297 NULL, NULL, NULL), ep->remote_dev);
298#endif
299 /*
300 * Note:
301 * 1) remap_pfn_rnage returns an error if there is an
302 * attempt to create MAP_PRIVATE COW mappings.
303 */
304 if ((err = remap_pfn_range(vma,
305 ((vma)->vm_start) + (j * PAGE_SIZE),
306 phys_addr >> PAGE_SHIFT,
307 PAGE_SIZE,
308 ((vma)->vm_page_prot))))
309 goto error;
310 loop_offset += PAGE_SIZE;
311 }
312 nr_pages_left -= loop_nr_pages;
313 if (!nr_pages_left)
314 break;
315 }
316 BUG_ON(nr_pages_left);
317 /*
318 * No more failures expected. Bump up the ref count for all
319 * the windows. Another traversal from start_window required
320 * for handling errors encountered across windows during
321 * remap_pfn_range(..).
322 */
323 loop_offset = offset;
324 nr_pages_left = nr_pages;
325 head = (&(start_window->list_member))->prev;
326 list_for_each(item, head) {
327 window = list_entry(item, struct reg_range_t,
328 list_member);
329 end_offset = window->offset +
330 (window->nr_pages << PAGE_SHIFT);
331 start_page_nr = (loop_offset - window->offset) >> PAGE_SHIFT;
332 loop_nr_pages = min((int64_t)((end_offset - loop_offset) >> PAGE_SHIFT),
333 nr_pages_left);
334 get_window_ref_count(window, loop_nr_pages);
335 nr_pages_left -= loop_nr_pages;
336 loop_offset += (loop_nr_pages << PAGE_SHIFT);
337 if (!nr_pages_left)
338 break;
339 }
340 BUG_ON(nr_pages_left);
341error:
342 if (err)
343 printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
344 return err;
345}
346
347/*
348 * micscif_rma_list_munmap:
349 *
350 * Traverse the remote registration list starting from window:
351 * 1) Decrement ref count.
352 * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer.
353 * RMA lock must be held.
354 */
355void micscif_rma_list_munmap(struct reg_range_t *start_window,
356 uint64_t offset, int nr_pages)
357{
358 struct list_head *item, *tmp, *head;
359 struct nodemsg msg;
360 uint64_t loop_offset = offset, end_offset;
361 int64_t loop_nr_pages, nr_pages_left = nr_pages;
362 struct endpt *ep = (struct endpt *)start_window->ep;
363 struct reg_range_t *window;
364
365 BUG_ON(!mutex_is_locked(&ep->rma_info.rma_lock));
366
367 msg.uop = SCIF_MUNMAP;
368 msg.src = ep->port;
369 loop_offset = offset;
370 nr_pages_left = nr_pages;
371 /* Start traversing from the previous link in the list */
372 head = (&(start_window->list_member))->prev;
373 list_for_each_safe(item, tmp, head) {
374 window = list_entry(item, struct reg_range_t,
375 list_member);
376 RMA_MAGIC(window);
377 end_offset = window->offset +
378 (window->nr_pages << PAGE_SHIFT);
379 loop_nr_pages = min((int64_t)((end_offset - loop_offset) >> PAGE_SHIFT),
380 nr_pages_left);
381 put_window_ref_count(window, loop_nr_pages);
382 if (!window->ref_count) {
383 if (scifdev_alive(ep))
384 drain_dma_intr(ep->rma_info.dma_chan);
385 /* Inform the peer about this munmap */
386 msg.payload[0] = window->peer_window;
387 /* No error handling for Notification messages. */
388 micscif_nodeqp_send(ep->remote_dev, &msg, ep);
389 list_del(&window->list_member);
390 /* Destroy this window from the peer's registered AS */
391 micscif_destroy_remote_window(ep, window);
392 }
393 nr_pages_left -= loop_nr_pages;
394 loop_offset += (loop_nr_pages << PAGE_SHIFT);
395 if (!nr_pages_left)
396 break;
397 }
398 BUG_ON(nr_pages_left);
399}
400
401/*
402 * micscif_rma_list_unregister:
403 *
404 * Traverse the self registration list starting from window:
405 * 1) Call micscif_unregister_window(..)
406 * RMA lock must be held.
407 */
408int micscif_rma_list_unregister(struct reg_range_t *window,
409 uint64_t offset, int nr_pages)
410{
411 struct list_head *item, *tmp, *head;
412 uint64_t end_offset;
413 int err = 0;
414 int64_t loop_nr_pages;
415 struct endpt *ep = (struct endpt *)window->ep;
416
417 BUG_ON(!mutex_is_locked(&ep->rma_info.rma_lock));
418 /* Start traversing from the previous link in the list */
419 head = (&window->list_member)->prev;
420 list_for_each_safe(item, tmp, head) {
421 window = list_entry(item, struct reg_range_t,
422 list_member);
423 RMA_MAGIC(window);
424 end_offset = window->offset +
425 (window->nr_pages << PAGE_SHIFT);
426 loop_nr_pages = min((int)((end_offset - offset) >> PAGE_SHIFT),
427 nr_pages);
428 if ((err = micscif_unregister_window(window)))
429 return err;
430 nr_pages -= (int)loop_nr_pages;
431 offset += (loop_nr_pages << PAGE_SHIFT);
432 if (!nr_pages)
433 break;
434 }
435 BUG_ON(nr_pages);
436 return 0;
437}
438
439/*
440 * micscif_unregister_all_window:
441 *
442 * Traverse all the windows in the self registration list and:
443 * 1) Call micscif_unregister_window(..)
444 * RMA lock must be held.
445 */
446int micscif_unregister_all_windows(scif_epd_t epd)
447{
448 struct list_head *item, *tmp;
449 struct reg_range_t *window;
450 struct endpt *ep = (struct endpt *)epd;
451 struct list_head *head = &ep->rma_info.reg_list;
452 int err = 0;
453
454 queue_work(ms_info.mi_misc_wq, &ms_info.mi_misc_work);
455 mutex_lock(&ep->rma_info.rma_lock);
456retry:
457 item = NULL;
458 tmp = NULL;
459 list_for_each_safe(item, tmp, head) {
460 window = list_entry(item,
461 struct reg_range_t, list_member);
462 ep->rma_info.async_list_del = 0;
463 if ((err = micscif_unregister_window(window)))
464 pr_debug("%s %d err %d\n",
465 __func__, __LINE__, err);
466 /*
467 * Need to restart list traversal if there has been
468 * an asynchronous list entry deletion.
469 */
470 if (ep->rma_info.async_list_del)
471 goto retry;
472 }
473 mutex_unlock(&ep->rma_info.rma_lock);
474
475 /*
476 * The following waits cannot be interruptible since they are
477 * from the driver release() entry point.
478 */
479 err = wait_event_timeout(ep->rma_info.fence_wq,
480 !ep->rma_info.fence_refcount, NODE_ALIVE_TIMEOUT);
481 /* Timeout firing is unexpected. Is the DMA engine hung? */
482 if (!err)
483 printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
484
485#ifdef CONFIG_MMU_NOTIFIER
486 if (!list_empty(&ep->rma_info.mmn_list)) {
487 spin_lock(&ms_info.mi_rmalock);
488 list_add_tail(&ep->mmu_list, &ms_info.mi_mmu_notif_cleanup);
489 spin_unlock(&ms_info.mi_rmalock);
490 queue_work(ms_info.mi_mmu_notif_wq, &ms_info.mi_mmu_notif_work);
491 }
492#endif
493 return err;
494}
495
496/*
497 * micscif_rma_list_get_pages_check:
498 *
499 * Traverse the remote registration list and return 0 if all the
500 * scif_get_pages()/scif_put_pages() ref_counts are zero else return -1.
501 */
502int micscif_rma_list_get_pages_check(struct endpt *ep)
503{
504 struct list_head *item, *head = &ep->rma_info.remote_reg_list;
505 struct reg_range_t *window;
506 int err = 0;
507
508 mutex_lock(&ep->rma_info.rma_lock);
509 list_for_each(item, head) {
510 window = list_entry(item,
511 struct reg_range_t, list_member);
512 if (window->get_put_ref_count) {
513 err = -1;
514 break;
515 }
516 }
517 mutex_unlock(&ep->rma_info.rma_lock);
518 return err;
519}
520
521/* Only debug API's below */
522void micscif_display_all_windows(struct list_head *head)
523{
524 struct list_head *item;
525 struct reg_range_t *window;
526 pr_debug("\nWindow List Start\n");
527 list_for_each(item, head) {
528 window = list_entry(item,
529 struct reg_range_t, list_member);
530 micscif_display_window(window, __func__, __LINE__);
531 }
532 pr_debug("Window List End\n\n");
533}