Commit | Line | Data |
---|---|---|
800f879a AT |
1 | /* |
2 | * Copyright 2010-2017 Intel Corporation. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License, version 2, | |
6 | * as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, | |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * Disclaimer: The codes contained in these modules may be specific to | |
14 | * the Intel Software Development Platform codenamed Knights Ferry, | |
15 | * and the Intel product codenamed Knights Corner, and are not backward | |
16 | * compatible with other Intel products. Additionally, Intel will NOT | |
17 | * support the codes or instruction set in future products. | |
18 | * | |
19 | * Intel offers no warranty of any kind regarding the code. This code is | |
20 | * licensed on an "AS IS" basis and Intel is not obligated to provide | |
21 | * any support, assistance, installation, training, or other services | |
22 | * of any kind. Intel is also not obligated to provide any updates, | |
23 | * enhancements or extensions. Intel specifically disclaims any warranty | |
24 | * of merchantability, non-infringement, fitness for any particular | |
25 | * purpose, and any other warranty. | |
26 | * | |
27 | * Further, Intel disclaims all liability of any kind, including but | |
28 | * not limited to liability for infringement of any proprietary rights, | |
29 | * relating to the use of the code, even if Intel is notified of the | |
30 | * possibility of such liability. Except as expressly stated in an Intel | |
31 | * license agreement provided with this code and agreed upon with Intel, | |
32 | * no license, express or implied, by estoppel or otherwise, to any | |
33 | * intellectual property rights is granted herein. | |
34 | */ | |
35 | ||
36 | #include "mic/micscif.h" | |
37 | #include "mic/micscif_smpt.h" | |
38 | #include "mic/mic_dma_api.h" | |
39 | #include "mic/micscif_kmem_cache.h" | |
40 | #ifdef CONFIG_MMU_NOTIFIER | |
41 | #include <linux/mmu_notifier.h> | |
42 | #include <linux/highmem.h> | |
43 | #endif | |
44 | #ifndef _MIC_SCIF_ | |
45 | #include "mic_common.h" | |
46 | #endif | |
47 | #include "mic/micscif_map.h" | |
48 | ||
49 | /* | |
50 | * micscif_insert_tcw: | |
51 | * | |
52 | * Insert a temp window to the temp registration list sorted by va_for_temp. | |
53 | * RMA lock must be held. | |
54 | */ | |
55 | void micscif_insert_tcw(struct reg_range_t *window, | |
56 | struct list_head *head) | |
57 | { | |
58 | struct reg_range_t *curr = NULL, *prev = NULL; | |
59 | struct list_head *item; | |
60 | BUG_ON(!window); | |
61 | INIT_LIST_HEAD(&window->list_member); | |
62 | /* | |
63 | * HSD 4845254 | |
64 | * Hack for worst case performance | |
65 | * Compare with tail and if the new entry is new tail add it to the end | |
66 | */ | |
67 | if (!list_empty(head)) { | |
68 | curr = list_entry(head->prev, struct reg_range_t, list_member); | |
69 | if ((uint64_t) curr->va_for_temp < (uint64_t) window->va_for_temp) { | |
70 | list_add_tail(&window->list_member, head); | |
71 | return; | |
72 | } | |
73 | } | |
74 | /* | |
75 | * We don't need the if(!prev) code but I am gonna leave it as | |
76 | * is for now. If someone touches the above code it is likely that they | |
77 | * will miss that they have to add if(!prev) block | |
78 | */ | |
79 | list_for_each(item, head) { | |
80 | curr = list_entry(item, struct reg_range_t, list_member); | |
81 | if ((uint64_t) curr->va_for_temp > (uint64_t) window->va_for_temp) | |
82 | break; | |
83 | prev = curr; | |
84 | } | |
85 | if (!prev) | |
86 | list_add(&window->list_member, head); | |
87 | else | |
88 | list_add(&window->list_member, &prev->list_member); | |
89 | } | |
90 | /* | |
91 | * micscif_insert_window: | |
92 | * | |
93 | * Insert a window to the self registration list sorted by offset. | |
94 | * RMA lock must be held. | |
95 | */ | |
96 | void micscif_insert_window(struct reg_range_t *window, struct list_head *head) | |
97 | { | |
98 | struct reg_range_t *curr = NULL, *prev = NULL; | |
99 | struct list_head *item; | |
100 | BUG_ON(!window); | |
101 | INIT_LIST_HEAD(&window->list_member); | |
102 | list_for_each(item, head) { | |
103 | curr = list_entry(item, struct reg_range_t, list_member); | |
104 | if (curr->offset > window->offset) | |
105 | break; | |
106 | prev = curr; | |
107 | } | |
108 | if (!prev) | |
109 | list_add(&window->list_member, head); | |
110 | else | |
111 | list_add(&window->list_member, &prev->list_member); | |
112 | } | |
113 | ||
114 | /* | |
115 | * micscif_query_tcw: | |
116 | * | |
117 | * Query the temp cached registration list of ep and check if a valid contiguous | |
118 | * range of windows exist. | |
119 | * If there is a partial overlap, delete the existing window and create a new one | |
120 | * that encompasses the previous window and a new range | |
121 | * RMA lock must be held. | |
122 | */ | |
123 | int micscif_query_tcw(struct endpt *ep, struct micscif_rma_req *req) | |
124 | { | |
125 | struct list_head *item, *temp; | |
126 | struct reg_range_t *window; | |
127 | uint64_t start_va_window, start_va_req = (uint64_t) req->va_for_temp; | |
128 | uint64_t end_va_window, end_va_req = start_va_req + req->nr_bytes; | |
129 | ||
130 | /* | |
131 | * HSD 4845254 | |
132 | * Hack for the worst case scenario | |
133 | * Avoid traversing the entire list to find out that there is no | |
134 | * entry that matches | |
135 | */ | |
136 | if (!list_empty(req->head)) { | |
137 | temp = req->head->prev; | |
138 | window = list_entry(temp, | |
139 | struct reg_range_t, list_member); | |
140 | end_va_window = (uint64_t) window->va_for_temp + | |
141 | (window->nr_pages << PAGE_SHIFT); | |
142 | if (start_va_req > end_va_window) | |
143 | return -ENXIO; | |
144 | } | |
145 | list_for_each_safe(item, temp, req->head) { | |
146 | window = list_entry(item, | |
147 | struct reg_range_t, list_member); | |
148 | start_va_window = (uint64_t) window->va_for_temp; | |
149 | end_va_window = (uint64_t) window->va_for_temp + | |
150 | (window->nr_pages << PAGE_SHIFT); | |
151 | pr_debug("%s %d start_va_window 0x%llx end_va_window 0x%llx" | |
152 | " start_va_req 0x%llx end_va_req 0x%llx req->nr_bytes 0x%lx\n", | |
153 | __func__, __LINE__, start_va_window, end_va_window, | |
154 | start_va_req, end_va_req, req->nr_bytes); | |
155 | if (start_va_req < start_va_window) { | |
156 | if (end_va_req < start_va_window) { | |
157 | /* No overlap */ | |
158 | } else { | |
159 | if ((window->prot & req->prot) != req->prot) { | |
160 | ||
161 | } else { | |
162 | req->nr_bytes += ((end_va_req > end_va_window) ? 0:(end_va_window - end_va_req)); | |
163 | pr_debug("%s %d Extend req->va_for_temp %p req->nr_byte 0x%lx\n", | |
164 | __func__, __LINE__, req->va_for_temp, req->nr_bytes); | |
165 | } | |
166 | __micscif_rma_destroy_tcw_helper(window); | |
167 | } | |
168 | break; | |
169 | } else { | |
170 | if (start_va_req > end_va_window) { | |
171 | /* No overlap */ | |
172 | continue; | |
173 | } else { | |
174 | if ((window->prot & req->prot) != req->prot) { | |
175 | __micscif_rma_destroy_tcw_helper(window); | |
176 | break; | |
177 | } | |
178 | if (end_va_req > end_va_window) { | |
179 | req->va_for_temp = (void*) start_va_window; | |
180 | req->nr_bytes = end_va_req - start_va_window; | |
181 | pr_debug("%s %d Extend req->va_for_temp %p req->nr_byte 0x%lx\n", | |
182 | __func__, __LINE__, req->va_for_temp, req->nr_bytes); | |
183 | __micscif_rma_destroy_tcw_helper(window); | |
184 | return -ENXIO; | |
185 | } else { | |
186 | *(req->out_window) = window; | |
187 | return 0; | |
188 | } | |
189 | } | |
190 | } | |
191 | } | |
192 | pr_debug("%s %d ENXIO\n", __func__, __LINE__); | |
193 | return -ENXIO; | |
194 | } | |
195 | ||
196 | /* | |
197 | * micscif_query_window: | |
198 | * | |
199 | * Query the registration list and check if a valid contiguous | |
200 | * range of windows exist. | |
201 | * RMA lock must be held. | |
202 | */ | |
203 | int micscif_query_window(struct micscif_rma_req *req) | |
204 | { | |
205 | struct list_head *item; | |
206 | struct reg_range_t *window; | |
207 | uint64_t end_offset, offset = req->offset; | |
208 | uint64_t tmp_min, nr_bytes_left = req->nr_bytes; | |
209 | ||
210 | list_for_each(item, req->head) { | |
211 | window = list_entry(item, | |
212 | struct reg_range_t, list_member); | |
213 | end_offset = window->offset + | |
214 | (window->nr_pages << PAGE_SHIFT); | |
215 | if (offset < window->offset) | |
216 | /* Offset not found! */ | |
217 | return -ENXIO; | |
218 | if (offset < end_offset) { | |
219 | /* Check read/write protections. */ | |
220 | if ((window->prot & req->prot) != req->prot) | |
221 | return -EPERM; | |
222 | if (nr_bytes_left == req->nr_bytes) | |
223 | /* Store the first window */ | |
224 | *(req->out_window) = window; | |
225 | tmp_min = min(end_offset - offset, nr_bytes_left); | |
226 | nr_bytes_left -= tmp_min; | |
227 | offset += tmp_min; | |
228 | /* | |
229 | * Range requested encompasses | |
230 | * multiple windows contiguously. | |
231 | */ | |
232 | if (!nr_bytes_left) { | |
233 | /* Done for partial window */ | |
234 | if (req->type == WINDOW_PARTIAL || | |
235 | req->type == WINDOW_SINGLE) | |
236 | return 0; | |
237 | /* Extra logic for full windows */ | |
238 | if (offset == end_offset) | |
239 | /* Spanning multiple whole windows */ | |
240 | return 0; | |
241 | /* Not spanning multiple whole windows */ | |
242 | return -ENXIO; | |
243 | } | |
244 | if (req->type == WINDOW_SINGLE) | |
245 | break; | |
246 | } | |
247 | } | |
248 | printk(KERN_ERR "%s %d ENXIO\n", __func__, __LINE__); | |
249 | return -ENXIO; | |
250 | } | |
251 | ||
252 | /* | |
253 | * micscif_rma_list_mmap: | |
254 | * | |
255 | * Traverse the remote registration list starting from start_window: | |
256 | * 1) Check read/write protections. | |
257 | * 2) Create VtoP mappings via remap_pfn_range(..) | |
258 | * 3) Once step 1) and 2) complete successfully then traverse the range of | |
259 | * windows again and bump the reference count. | |
260 | * RMA lock must be held. | |
261 | */ | |
262 | int micscif_rma_list_mmap(struct reg_range_t *start_window, | |
263 | uint64_t offset, int nr_pages, struct vm_area_struct *vma) | |
264 | { | |
265 | struct list_head *item, *head; | |
266 | uint64_t end_offset, loop_offset = offset; | |
267 | struct reg_range_t *window; | |
268 | int64_t start_page_nr, loop_nr_pages, nr_pages_left = nr_pages; | |
269 | struct endpt *ep = (struct endpt *)start_window->ep; | |
270 | int i, err = 0; | |
271 | uint64_t j =0; | |
272 | dma_addr_t phys_addr; | |
273 | ||
274 | might_sleep(); | |
275 | BUG_ON(!mutex_is_locked(&ep->rma_info.rma_lock)); | |
276 | ||
277 | /* Start traversing from the previous link in the list */ | |
278 | head = ((&start_window->list_member))->prev; | |
279 | list_for_each(item, head) { | |
280 | window = list_entry(item, struct reg_range_t, | |
281 | list_member); | |
282 | end_offset = window->offset + | |
283 | (window->nr_pages << PAGE_SHIFT); | |
284 | start_page_nr = (loop_offset - window->offset) >> PAGE_SHIFT; | |
285 | loop_nr_pages = min((int64_t)((end_offset - loop_offset) >> PAGE_SHIFT), | |
286 | nr_pages_left); | |
287 | for (i = (int)start_page_nr; | |
288 | i < ((int)start_page_nr + (int)loop_nr_pages); i++, j++) { | |
289 | ||
290 | phys_addr = | |
291 | #if !defined(_MIC_SCIF_) && defined(CONFIG_ML1OM) | |
292 | is_self_scifdev(ep->remote_dev) ? | |
293 | micscif_get_dma_addr(window, loop_offset, | |
294 | NULL, NULL, NULL) : window->phys_addr[i]; | |
295 | #else | |
296 | get_phys_addr(micscif_get_dma_addr(window, loop_offset, | |
297 | NULL, NULL, NULL), ep->remote_dev); | |
298 | #endif | |
299 | /* | |
300 | * Note: | |
301 | * 1) remap_pfn_rnage returns an error if there is an | |
302 | * attempt to create MAP_PRIVATE COW mappings. | |
303 | */ | |
304 | if ((err = remap_pfn_range(vma, | |
305 | ((vma)->vm_start) + (j * PAGE_SIZE), | |
306 | phys_addr >> PAGE_SHIFT, | |
307 | PAGE_SIZE, | |
308 | ((vma)->vm_page_prot)))) | |
309 | goto error; | |
310 | loop_offset += PAGE_SIZE; | |
311 | } | |
312 | nr_pages_left -= loop_nr_pages; | |
313 | if (!nr_pages_left) | |
314 | break; | |
315 | } | |
316 | BUG_ON(nr_pages_left); | |
317 | /* | |
318 | * No more failures expected. Bump up the ref count for all | |
319 | * the windows. Another traversal from start_window required | |
320 | * for handling errors encountered across windows during | |
321 | * remap_pfn_range(..). | |
322 | */ | |
323 | loop_offset = offset; | |
324 | nr_pages_left = nr_pages; | |
325 | head = (&(start_window->list_member))->prev; | |
326 | list_for_each(item, head) { | |
327 | window = list_entry(item, struct reg_range_t, | |
328 | list_member); | |
329 | end_offset = window->offset + | |
330 | (window->nr_pages << PAGE_SHIFT); | |
331 | start_page_nr = (loop_offset - window->offset) >> PAGE_SHIFT; | |
332 | loop_nr_pages = min((int64_t)((end_offset - loop_offset) >> PAGE_SHIFT), | |
333 | nr_pages_left); | |
334 | get_window_ref_count(window, loop_nr_pages); | |
335 | nr_pages_left -= loop_nr_pages; | |
336 | loop_offset += (loop_nr_pages << PAGE_SHIFT); | |
337 | if (!nr_pages_left) | |
338 | break; | |
339 | } | |
340 | BUG_ON(nr_pages_left); | |
341 | error: | |
342 | if (err) | |
343 | printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err); | |
344 | return err; | |
345 | } | |
346 | ||
347 | /* | |
348 | * micscif_rma_list_munmap: | |
349 | * | |
350 | * Traverse the remote registration list starting from window: | |
351 | * 1) Decrement ref count. | |
352 | * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer. | |
353 | * RMA lock must be held. | |
354 | */ | |
355 | void micscif_rma_list_munmap(struct reg_range_t *start_window, | |
356 | uint64_t offset, int nr_pages) | |
357 | { | |
358 | struct list_head *item, *tmp, *head; | |
359 | struct nodemsg msg; | |
360 | uint64_t loop_offset = offset, end_offset; | |
361 | int64_t loop_nr_pages, nr_pages_left = nr_pages; | |
362 | struct endpt *ep = (struct endpt *)start_window->ep; | |
363 | struct reg_range_t *window; | |
364 | ||
365 | BUG_ON(!mutex_is_locked(&ep->rma_info.rma_lock)); | |
366 | ||
367 | msg.uop = SCIF_MUNMAP; | |
368 | msg.src = ep->port; | |
369 | loop_offset = offset; | |
370 | nr_pages_left = nr_pages; | |
371 | /* Start traversing from the previous link in the list */ | |
372 | head = (&(start_window->list_member))->prev; | |
373 | list_for_each_safe(item, tmp, head) { | |
374 | window = list_entry(item, struct reg_range_t, | |
375 | list_member); | |
376 | RMA_MAGIC(window); | |
377 | end_offset = window->offset + | |
378 | (window->nr_pages << PAGE_SHIFT); | |
379 | loop_nr_pages = min((int64_t)((end_offset - loop_offset) >> PAGE_SHIFT), | |
380 | nr_pages_left); | |
381 | put_window_ref_count(window, loop_nr_pages); | |
382 | if (!window->ref_count) { | |
383 | if (scifdev_alive(ep)) | |
384 | drain_dma_intr(ep->rma_info.dma_chan); | |
385 | /* Inform the peer about this munmap */ | |
386 | msg.payload[0] = window->peer_window; | |
387 | /* No error handling for Notification messages. */ | |
388 | micscif_nodeqp_send(ep->remote_dev, &msg, ep); | |
389 | list_del(&window->list_member); | |
390 | /* Destroy this window from the peer's registered AS */ | |
391 | micscif_destroy_remote_window(ep, window); | |
392 | } | |
393 | nr_pages_left -= loop_nr_pages; | |
394 | loop_offset += (loop_nr_pages << PAGE_SHIFT); | |
395 | if (!nr_pages_left) | |
396 | break; | |
397 | } | |
398 | BUG_ON(nr_pages_left); | |
399 | } | |
400 | ||
401 | /* | |
402 | * micscif_rma_list_unregister: | |
403 | * | |
404 | * Traverse the self registration list starting from window: | |
405 | * 1) Call micscif_unregister_window(..) | |
406 | * RMA lock must be held. | |
407 | */ | |
408 | int micscif_rma_list_unregister(struct reg_range_t *window, | |
409 | uint64_t offset, int nr_pages) | |
410 | { | |
411 | struct list_head *item, *tmp, *head; | |
412 | uint64_t end_offset; | |
413 | int err = 0; | |
414 | int64_t loop_nr_pages; | |
415 | struct endpt *ep = (struct endpt *)window->ep; | |
416 | ||
417 | BUG_ON(!mutex_is_locked(&ep->rma_info.rma_lock)); | |
418 | /* Start traversing from the previous link in the list */ | |
419 | head = (&window->list_member)->prev; | |
420 | list_for_each_safe(item, tmp, head) { | |
421 | window = list_entry(item, struct reg_range_t, | |
422 | list_member); | |
423 | RMA_MAGIC(window); | |
424 | end_offset = window->offset + | |
425 | (window->nr_pages << PAGE_SHIFT); | |
426 | loop_nr_pages = min((int)((end_offset - offset) >> PAGE_SHIFT), | |
427 | nr_pages); | |
428 | if ((err = micscif_unregister_window(window))) | |
429 | return err; | |
430 | nr_pages -= (int)loop_nr_pages; | |
431 | offset += (loop_nr_pages << PAGE_SHIFT); | |
432 | if (!nr_pages) | |
433 | break; | |
434 | } | |
435 | BUG_ON(nr_pages); | |
436 | return 0; | |
437 | } | |
438 | ||
439 | /* | |
440 | * micscif_unregister_all_window: | |
441 | * | |
442 | * Traverse all the windows in the self registration list and: | |
443 | * 1) Call micscif_unregister_window(..) | |
444 | * RMA lock must be held. | |
445 | */ | |
446 | int micscif_unregister_all_windows(scif_epd_t epd) | |
447 | { | |
448 | struct list_head *item, *tmp; | |
449 | struct reg_range_t *window; | |
450 | struct endpt *ep = (struct endpt *)epd; | |
451 | struct list_head *head = &ep->rma_info.reg_list; | |
452 | int err = 0; | |
453 | ||
454 | queue_work(ms_info.mi_misc_wq, &ms_info.mi_misc_work); | |
455 | mutex_lock(&ep->rma_info.rma_lock); | |
456 | retry: | |
457 | item = NULL; | |
458 | tmp = NULL; | |
459 | list_for_each_safe(item, tmp, head) { | |
460 | window = list_entry(item, | |
461 | struct reg_range_t, list_member); | |
462 | ep->rma_info.async_list_del = 0; | |
463 | if ((err = micscif_unregister_window(window))) | |
464 | pr_debug("%s %d err %d\n", | |
465 | __func__, __LINE__, err); | |
466 | /* | |
467 | * Need to restart list traversal if there has been | |
468 | * an asynchronous list entry deletion. | |
469 | */ | |
470 | if (ep->rma_info.async_list_del) | |
471 | goto retry; | |
472 | } | |
473 | mutex_unlock(&ep->rma_info.rma_lock); | |
474 | ||
475 | /* | |
476 | * The following waits cannot be interruptible since they are | |
477 | * from the driver release() entry point. | |
478 | */ | |
479 | err = wait_event_timeout(ep->rma_info.fence_wq, | |
480 | !ep->rma_info.fence_refcount, NODE_ALIVE_TIMEOUT); | |
481 | /* Timeout firing is unexpected. Is the DMA engine hung? */ | |
482 | if (!err) | |
483 | printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err); | |
484 | ||
485 | #ifdef CONFIG_MMU_NOTIFIER | |
486 | if (!list_empty(&ep->rma_info.mmn_list)) { | |
487 | spin_lock(&ms_info.mi_rmalock); | |
488 | list_add_tail(&ep->mmu_list, &ms_info.mi_mmu_notif_cleanup); | |
489 | spin_unlock(&ms_info.mi_rmalock); | |
490 | queue_work(ms_info.mi_mmu_notif_wq, &ms_info.mi_mmu_notif_work); | |
491 | } | |
492 | #endif | |
493 | return err; | |
494 | } | |
495 | ||
496 | /* | |
497 | * micscif_rma_list_get_pages_check: | |
498 | * | |
499 | * Traverse the remote registration list and return 0 if all the | |
500 | * scif_get_pages()/scif_put_pages() ref_counts are zero else return -1. | |
501 | */ | |
502 | int micscif_rma_list_get_pages_check(struct endpt *ep) | |
503 | { | |
504 | struct list_head *item, *head = &ep->rma_info.remote_reg_list; | |
505 | struct reg_range_t *window; | |
506 | int err = 0; | |
507 | ||
508 | mutex_lock(&ep->rma_info.rma_lock); | |
509 | list_for_each(item, head) { | |
510 | window = list_entry(item, | |
511 | struct reg_range_t, list_member); | |
512 | if (window->get_put_ref_count) { | |
513 | err = -1; | |
514 | break; | |
515 | } | |
516 | } | |
517 | mutex_unlock(&ep->rma_info.rma_lock); | |
518 | return err; | |
519 | } | |
520 | ||
521 | /* Only debug API's below */ | |
522 | void micscif_display_all_windows(struct list_head *head) | |
523 | { | |
524 | struct list_head *item; | |
525 | struct reg_range_t *window; | |
526 | pr_debug("\nWindow List Start\n"); | |
527 | list_for_each(item, head) { | |
528 | window = list_entry(item, | |
529 | struct reg_range_t, list_member); | |
530 | micscif_display_window(window, __func__, __LINE__); | |
531 | } | |
532 | pr_debug("Window List End\n\n"); | |
533 | } |