Updated micscif/micscif_api.c to new location for atomic_t element.
[xeon-phi-kernel-module] / micscif / micscif_rma_dma.c
CommitLineData
800f879a
AT
1/*
2 * Copyright 2010-2017 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2,
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * Disclaimer: The codes contained in these modules may be specific to
14 * the Intel Software Development Platform codenamed Knights Ferry,
15 * and the Intel product codenamed Knights Corner, and are not backward
16 * compatible with other Intel products. Additionally, Intel will NOT
17 * support the codes or instruction set in future products.
18 *
19 * Intel offers no warranty of any kind regarding the code. This code is
20 * licensed on an "AS IS" basis and Intel is not obligated to provide
21 * any support, assistance, installation, training, or other services
22 * of any kind. Intel is also not obligated to provide any updates,
23 * enhancements or extensions. Intel specifically disclaims any warranty
24 * of merchantability, non-infringement, fitness for any particular
25 * purpose, and any other warranty.
26 *
27 * Further, Intel disclaims all liability of any kind, including but
28 * not limited to liability for infringement of any proprietary rights,
29 * relating to the use of the code, even if Intel is notified of the
30 * possibility of such liability. Except as expressly stated in an Intel
31 * license agreement provided with this code and agreed upon with Intel,
32 * no license, express or implied, by estoppel or otherwise, to any
33 * intellectual property rights is granted herein.
34 */
35
36#include "mic/micscif.h"
37#include "mic/micscif_smpt.h"
38#include "mic/mic_dma_api.h"
39#include "mic/micscif_kmem_cache.h"
40#include "mic/micscif_rma.h"
41#include "mic/micscif_rma_list.h"
42#if !defined(WINDOWS) && !defined(CONFIG_PREEMPT)
43#include <linux/sched.h>
44#endif
45#include <linux/highmem.h>
46#ifndef _MIC_SCIF_
47#include "mic_common.h"
48#endif
49
50static __always_inline
51void *get_local_va(off_t off, struct reg_range_t *window, size_t len)
52{
53 uint64_t page_nr = (off - window->offset) >> PAGE_SHIFT;
54 off_t page_off = off & ~PAGE_MASK;
55 void *va;
56
57 if (RMA_WINDOW_SELF == window->type) {
58 struct page **pages = window->pinned_pages->pages;
59 va = (void *)((uint64_t)
60 (page_address(pages[page_nr])) | page_off);
61 } else {
62 dma_addr_t phys = micscif_get_dma_addr(window, off, NULL, NULL, NULL);
63#ifdef CONFIG_ML1OM
64 if (RMA_ERROR_CODE == phys)
65 return NULL;
66#endif
67 va = (void *)((uint64_t) (phys_to_virt(phys)));
68 }
69 return va;
70}
71
72#ifdef _MIC_SCIF_
73static __always_inline
74void *ioremap_remote(off_t off, struct reg_range_t *window,
75 size_t len, bool loopback, struct micscif_dev *dev, int *index, uint64_t *start_off)
76{
77 void *ret;
78 dma_addr_t phys = micscif_get_dma_addr(window, off, NULL, index, start_off);
79
80#ifdef CONFIG_ML1OM
81 if (RMA_ERROR_CODE == phys)
82 return NULL;
83#endif
84 if (!loopback)
85 ret = ioremap_nocache(phys, len);
86 else
87 ret = (void *)((uint64_t)phys_to_virt(phys));
88 return ret;
89}
90
91static __always_inline
92void *ioremap_remote_gtt(off_t off, struct reg_range_t *window,
93 size_t len, bool loopback, struct micscif_dev *dev, int ch_num, struct mic_copy_work *work)
94{
95 return ioremap_remote(off, window, len, loopback, dev, NULL, NULL);
96}
97#else
98static __always_inline
99void *ioremap_remote_gtt(off_t off, struct reg_range_t *window,
100 size_t len, bool loopback, struct micscif_dev *dev, int ch_num, struct mic_copy_work *work)
101{
102 void *ret;
103 uint64_t page_nr = (off - window->offset) >> PAGE_SHIFT;
104 off_t page_off = off & ~PAGE_MASK;
105 if (!loopback) {
106 dma_addr_t phys = micscif_get_dma_addr(window, off, NULL, NULL, NULL);
107 /* Ideally there should be a helper to do the +/-1 */
108 ret = get_per_dev_ctx(dev->sd_node - 1)->aper.va + phys;
109 } else {
110 struct page **pages = ((struct reg_range_t *)
111 (window->peer_window))->pinned_pages->pages;
112 ret = (void *)((uint64_t)phys_to_virt(page_to_phys(pages[page_nr]))
113 | page_off);
114 }
115 return ret;
116}
117
118static __always_inline
119void *ioremap_remote(off_t off, struct reg_range_t *window,
120 size_t len, bool loopback, struct micscif_dev *dev, int *index, uint64_t *start_off)
121{
122 void *ret;
123 int page_nr = (int)((off - window->offset) >> PAGE_SHIFT);
124 off_t page_off = off & ~PAGE_MASK;
125
126 if (!loopback) {
127 dma_addr_t phys;
128 mic_ctx_t *mic_ctx = get_per_dev_ctx(dev->sd_node - 1);
129 phys = micscif_get_dma_addr(window, off, NULL, index, start_off);
130 ret = mic_ctx->aper.va + phys;
131 } else {
132 struct page **pages = ((struct reg_range_t *)
133 (window->peer_window))->pinned_pages->pages;
134 ret = (void *)((uint64_t)phys_to_virt(page_to_phys(pages[page_nr]))
135 | page_off);
136 }
137 return ret;
138}
139#endif
140
141static __always_inline void
142iounmap_remote(void *virt, size_t size, struct mic_copy_work *work)
143{
144#ifdef _MIC_SCIF_
145 if (!work->loopback)
146 iounmap(virt);
147#endif
148}
149
150/*
151 * Takes care of ordering issue caused by
152 * 1. Hardware: Only in the case of cpu copy from host to card because of WC memory.
153 * 2. Software: If memcpy reorders copy instructions for optimization. This could happen
154 * at both host and card.
155 */
156static inline void ordered_memcpy(volatile char *dst,
157 const char *src, size_t count)
158{
159 if (!count)
160 return;
161
162 memcpy_toio(dst, src, --count);
163 wmb();
164 *(dst + count) = *(src + count);
165}
166
167static inline void micscif_unaligned_memcpy(volatile char *dst,
168 const char *src, size_t count, bool ordered)
169{
170 if (unlikely(ordered))
171 ordered_memcpy(dst, src, count);
172 else
173 memcpy_toio(dst, src, count);
174}
175
176/*
177 * Copy between rma window and temporary buffer
178 */
179void micscif_rma_local_cpu_copy(uint64_t offset, struct reg_range_t *window, uint8_t *temp, size_t remaining_len, bool to_temp)
180{
181 void *window_virt;
182 size_t loop_len;
183 int offset_in_page;
184 uint64_t end_offset;
185 struct list_head *item;
186
187 BUG_ON(RMA_WINDOW_SELF != window->type);
188
189 offset_in_page = offset & ~PAGE_MASK;
190 loop_len = PAGE_SIZE - offset_in_page;
191
192 if (remaining_len < loop_len)
193 loop_len = remaining_len;
194
195 if (!(window_virt = get_local_va(offset, window, loop_len)))
196 return;
197 if (to_temp)
198 memcpy(temp, window_virt, loop_len);
199 else
200 memcpy(window_virt, temp, loop_len);
201
202 offset += loop_len;
203 temp += loop_len;
204 remaining_len -= loop_len;
205
206 end_offset = window->offset +
207 (window->nr_pages << PAGE_SHIFT);
208 while (remaining_len) {
209 if (offset == end_offset) {
210 item = (
211 &window->list_member)->next;
212 window = list_entry(item,
213 struct reg_range_t,
214 list_member);
215 end_offset = window->offset +
216 (window->nr_pages << PAGE_SHIFT);
217 }
218
219 loop_len = min(PAGE_SIZE, remaining_len);
220
221 if (!(window_virt = get_local_va(offset, window, loop_len)))
222 return;
223
224 if (to_temp)
225 memcpy(temp, window_virt, loop_len);
226 else
227 memcpy(window_virt, temp, loop_len);
228
229 offset += loop_len;
230 temp += loop_len;
231 remaining_len -= loop_len;
232 }
233}
234
235/*
236 * Comment this
237 *
238 */
239static int micscif_rma_list_dma_copy_unaligned(struct mic_copy_work *work, uint8_t *temp, struct dma_channel *chan, bool src_local)
240{
241 struct dma_completion_cb *comp_cb = work->comp_cb;
242 dma_addr_t window_dma_addr, temp_dma_addr;
243#ifndef _MIC_SCIF_
244 dma_addr_t temp_phys = comp_cb->temp_phys;
245#endif
246 size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len;
247 int offset_in_page;
248 uint64_t end_offset = 0, offset = 0;
249 struct reg_range_t *window = NULL;
250 struct list_head *item = NULL;
251 int ret = 0;
252 void *window_virt_addr = NULL;
253 size_t tail_len = 0;
254
255 if (src_local) {
256 offset = work->dst_offset;
257 window = work->dst_window;
258 } else {
259 offset = work->src_offset;
260 window = work->src_window;
261 }
262
263 offset_in_page = offset & (L1_CACHE_BYTES - 1);
264 if (offset_in_page) {
265 loop_len = L1_CACHE_BYTES - offset_in_page;
266 loop_len = min(loop_len, remaining_len);
267
268 if (!(window_virt_addr = ioremap_remote_gtt(offset, window, loop_len,
269 work->loopback, work->remote_dev,
270 get_chan_num(chan), work)))
271 return -ENOMEM;
272
273 if (src_local) {
274 micscif_unaligned_memcpy(window_virt_addr, temp, loop_len, work->ordered &&
275 !(remaining_len - loop_len));
276 serializing_request(window_virt_addr);
277 } else {
278 memcpy_fromio(temp, window_virt_addr, loop_len);
279 serializing_request(temp);
280 }
281#ifdef RMA_DEBUG
282 atomic_long_add_return(loop_len, &ms_info.rma_unaligned_cpu_cnt);
283#endif
284 smp_mb();
285 iounmap_remote(window_virt_addr, loop_len, work);
286
287 offset += loop_len;
288 temp += loop_len;
289#ifndef _MIC_SCIF_
290 temp_phys += loop_len;
291#endif
292 remaining_len -= loop_len;
293 }
294
295 offset_in_page = offset & ~PAGE_MASK;
296 end_offset = window->offset +
297 (window->nr_pages << PAGE_SHIFT);
298
299 tail_len = remaining_len & (L1_CACHE_BYTES - 1);
300 remaining_len -= tail_len;
301 while (remaining_len) {
302 if (offset == end_offset) {
303 item = (&window->list_member)->next;
304 window = list_entry(item,
305 struct reg_range_t,
306 list_member);
307 end_offset = window->offset +
308 (window->nr_pages << PAGE_SHIFT);
309 }
310#ifndef _MIC_SCIF_
311 temp_dma_addr = temp_phys;
312#else
313 temp_dma_addr = (dma_addr_t)virt_to_phys(temp);
314#endif
315 window_dma_addr = micscif_get_dma_addr(window, offset, &nr_contig_bytes, NULL, NULL);
316
317#ifdef CONFIG_ML1OM
318 if (RMA_ERROR_CODE == window_dma_addr)
319 return -ENXIO;
320#endif
321 loop_len = min(nr_contig_bytes, remaining_len);
322
323 if (src_local) {
324 if (unlikely(work->ordered && !tail_len &&
325 !(remaining_len - loop_len) &&
326 loop_len != L1_CACHE_BYTES)) {
327 /*
328 * Break up the last chunk of the transfer into two steps
329 * if there is no tail to gurantee DMA ordering.
330 * Passing DO_DMA_POLLING inserts a status update descriptor
331 * in step 1 which acts as a double sided synchronization
332 * fence for the DMA engine to ensure that the last cache line
333 * in step 2 is updated last.
334 */
335 /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
336 ret = do_dma(chan, DO_DMA_POLLING, temp_dma_addr, window_dma_addr,
337 loop_len - L1_CACHE_BYTES, NULL);
338 if (ret < 0) {
339 printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n",
340 __func__, __LINE__, ret);
341 return ret;
342 }
343 offset += (loop_len - L1_CACHE_BYTES);
344 temp_dma_addr += (loop_len - L1_CACHE_BYTES);
345 window_dma_addr += (loop_len - L1_CACHE_BYTES);
346 remaining_len -= (loop_len - L1_CACHE_BYTES);
347 loop_len = remaining_len;
348
349 /* Step 2) DMA: L1_CACHE_BYTES */
350 ret = do_dma(chan, 0, temp_dma_addr, window_dma_addr,
351 loop_len, NULL);
352 if (ret < 0) {
353 printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n",
354 __func__, __LINE__, ret);
355 return ret;
356 }
357 } else {
358 int flags = 0;
359 if (remaining_len == loop_len + L1_CACHE_BYTES)
360 flags = DO_DMA_POLLING;
361 ret = do_dma(chan, flags, temp_dma_addr, window_dma_addr,
362 loop_len, NULL);
363 }
364 } else {
365 ret = do_dma(chan, 0, window_dma_addr, temp_dma_addr,
366 loop_len, NULL);
367 }
368 if (ret < 0) {
369 printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n",
370 __func__, __LINE__, ret);
371 return ret;
372 }
373 offset += loop_len;
374 temp += loop_len;
375#ifndef _MIC_SCIF_
376 temp_phys += loop_len;
377#endif
378 remaining_len -= loop_len;
379 offset_in_page = 0;
380 }
381 if (tail_len) {
382 if (offset == end_offset) {
383 item = (&window->list_member)->next;
384 window = list_entry(item,
385 struct reg_range_t,
386 list_member);
387 end_offset = window->offset +
388 (window->nr_pages << PAGE_SHIFT);
389 }
390 if (!(window_virt_addr = ioremap_remote_gtt(offset, window, tail_len,
391 work->loopback, work->remote_dev,
392 get_chan_num(chan), work)))
393 return -ENOMEM;
394
395 /*
396 * The CPU copy for the tail bytes must be initiated only once previous
397 * DMA transfers for this endpoint have completed to guarantee
398 * ordering.
399 */
400 if (unlikely(work->ordered)) {
401 free_dma_channel(chan);
402 work->dma_chan_released = true;
403 if ((ret = drain_dma_intr(chan)))
404 return ret;
405 }
406
407 if (src_local) {
408 micscif_unaligned_memcpy(window_virt_addr, temp, tail_len, work->ordered);
409 serializing_request(window_virt_addr);
410 } else {
411 memcpy_fromio(temp, window_virt_addr, tail_len);
412 serializing_request(temp);
413 }
414#ifdef RMA_DEBUG
415 atomic_long_add_return(tail_len, &ms_info.rma_unaligned_cpu_cnt);
416#endif
417 smp_mb();
418 iounmap_remote(window_virt_addr, tail_len, work);
419 }
420 if (work->dma_chan_released) {
421 if ((ret = request_dma_channel(chan)))
422 return ret;
423 /* Callee frees the DMA channel lock, if it is held */
424 work->dma_chan_released = false;
425 }
426 ret = do_dma(chan, DO_DMA_INTR, 0, 0, 0, comp_cb);
427 if (ret < 0) {
428 printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n",
429 __func__, __LINE__, ret);
430 return ret;
431 }
432 return 0;
433}
434
435static inline bool is_local_dma_addr(uint64_t addr)
436{
437#ifdef _MIC_SCIF_
438 return (addr >> PAGE_SHIFT < num_physpages);
439#else
440 return is_syspa(addr);
441#endif
442}
443
444/*
445 * micscif_rma_list_dma_copy_aligned:
446 *
447 * Traverse all the windows and perform DMA copy.
448 */
449static int micscif_rma_list_dma_copy_aligned(struct mic_copy_work *work, struct dma_channel *chan)
450{
451 dma_addr_t src_dma_addr, dst_dma_addr;
452 size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0, dst_contig_bytes = 0;
453 int src_cache_off, dst_cache_off, src_last_index = 0, dst_last_index = 0;
454 uint64_t end_src_offset, end_dst_offset;
455 void *src_virt, *dst_virt;
456 struct reg_range_t *src_window = work->src_window;
457 struct reg_range_t *dst_window = work->dst_window;
458 uint64_t src_offset = work->src_offset, dst_offset = work->dst_offset;
459 uint64_t src_start_offset = src_window->offset, dst_start_offset = dst_window->offset;
460 struct list_head *item;
461 int ret = 0;
462
463 remaining_len = work->len;
464
465 src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
466 dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1);
467 if (src_cache_off != dst_cache_off) {
468 BUG_ON(1);
469 } else if (src_cache_off != 0) {
470 /* Head */
471 loop_len = L1_CACHE_BYTES - src_cache_off;
472 loop_len = min(loop_len, remaining_len);
473 src_dma_addr = micscif_get_dma_addr(src_window, src_offset, NULL, NULL, NULL);
474 dst_dma_addr = micscif_get_dma_addr(dst_window, dst_offset, NULL, NULL, NULL);
475#ifdef CONFIG_ML1OM
476 if (RMA_ERROR_CODE == src_dma_addr)
477 return -ENXIO;
478 if (RMA_ERROR_CODE == dst_dma_addr)
479 return -ENXIO;
480 get_window_ref_count(src_window, 1);
481 get_window_ref_count(dst_window, 1);
482#endif
483 if (is_local_dma_addr(src_dma_addr))
484 src_virt = get_local_va(src_offset, src_window, loop_len);
485 else
486 src_virt = ioremap_remote_gtt(src_offset, src_window,
487 loop_len, work->loopback,
488 work->remote_dev, get_chan_num(chan), work);
489 if (!src_virt) {
490#ifdef CONFIG_ML1OM
491 put_window_ref_count(src_window, 1);
492 put_window_ref_count(dst_window, 1);
493#endif
494 return -ENOMEM;
495 }
496 if (is_local_dma_addr(dst_dma_addr))
497 dst_virt = get_local_va(dst_offset, dst_window, loop_len);
498 else
499 dst_virt = ioremap_remote_gtt(dst_offset, dst_window,
500 loop_len, work->loopback,
501 work->remote_dev, get_chan_num(chan), work);
502#ifdef CONFIG_ML1OM
503 put_window_ref_count(src_window, 1);
504 put_window_ref_count(dst_window, 1);
505#endif
506 if (!dst_virt) {
507 if (!is_local_dma_addr(src_dma_addr))
508 iounmap_remote(src_virt, loop_len, work);
509 return -ENOMEM;
510 }
511 if (is_local_dma_addr(src_dma_addr)){
512 micscif_unaligned_memcpy(dst_virt, src_virt, loop_len,
513 remaining_len == loop_len ? work->ordered : false);
514 }
515 else{
516 memcpy_fromio(dst_virt, src_virt, loop_len);
517 }
518 serializing_request(dst_virt);
519 smp_mb();
520 if (!is_local_dma_addr(src_dma_addr))
521 iounmap_remote(src_virt, loop_len, work);
522 if (!is_local_dma_addr(dst_dma_addr))
523 iounmap_remote(dst_virt, loop_len, work);
524 src_offset += loop_len;
525 dst_offset += loop_len;
526 remaining_len -= loop_len;
527 }
528
529 end_src_offset = src_window->offset +
530 (src_window->nr_pages << PAGE_SHIFT);
531 end_dst_offset = dst_window->offset +
532 (dst_window->nr_pages << PAGE_SHIFT);
533 tail_len = remaining_len & (L1_CACHE_BYTES - 1);
534 remaining_len -= tail_len;
535 while (remaining_len) {
536 if (src_offset == end_src_offset) {
537 item = (&src_window->list_member)->next;
538 src_window = list_entry(item,
539 struct reg_range_t,
540 list_member);
541 end_src_offset = src_window->offset +
542 (src_window->nr_pages << PAGE_SHIFT);
543 src_last_index = 0;
544 src_start_offset = src_window->offset;
545 }
546 if (dst_offset == end_dst_offset) {
547 item = (&dst_window->list_member)->next;
548 dst_window = list_entry(item, struct reg_range_t, list_member);
549 end_dst_offset = dst_window->offset +
550 (dst_window->nr_pages << PAGE_SHIFT);
551 dst_last_index = 0;
552 dst_start_offset = dst_window->offset;
553 }
554
555 /* compute dma addresses for transfer */
556 src_dma_addr = micscif_get_dma_addr(src_window, src_offset, &src_contig_bytes, &src_last_index, &src_start_offset);
557 dst_dma_addr = micscif_get_dma_addr(dst_window, dst_offset, &dst_contig_bytes, &dst_last_index, &dst_start_offset);
558#ifdef CONFIG_ML1OM
559 if (RMA_ERROR_CODE == src_dma_addr)
560 return -ENXIO;
561 if (RMA_ERROR_CODE == dst_dma_addr)
562 return -ENXIO;
563#endif
564 loop_len = min(src_contig_bytes, dst_contig_bytes);
565 loop_len = min(loop_len, remaining_len);
566 if (unlikely(work->ordered && !tail_len &&
567 !(remaining_len - loop_len) &&
568 loop_len != L1_CACHE_BYTES)) {
569 /*
570 * Break up the last chunk of the transfer into two steps
571 * if there is no tail to gurantee DMA ordering.
572 * Passing DO_DMA_POLLING inserts a status update descriptor
573 * in step 1 which acts as a double sided synchronization
574 * fence for the DMA engine to ensure that the last cache line
575 * in step 2 is updated last.
576 */
577 /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
578 ret = do_dma(chan, DO_DMA_POLLING, src_dma_addr, dst_dma_addr,
579 loop_len - L1_CACHE_BYTES, NULL);
580 if (ret < 0) {
581 printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n",
582 __func__, __LINE__, ret);
583 return ret;
584 }
585 src_offset += (loop_len - L1_CACHE_BYTES);
586 dst_offset += (loop_len - L1_CACHE_BYTES);
587 src_dma_addr += (loop_len - L1_CACHE_BYTES);
588 dst_dma_addr += (loop_len - L1_CACHE_BYTES);
589 remaining_len -= (loop_len - L1_CACHE_BYTES);
590 loop_len = remaining_len;
591
592 /* Step 2) DMA: L1_CACHE_BYTES */
593 ret = do_dma(chan, 0, src_dma_addr, dst_dma_addr,
594 loop_len, NULL);
595 if (ret < 0) {
596 printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n",
597 __func__, __LINE__, ret);
598 return ret;
599 }
600 } else {
601 int flags = 0;
602 if (remaining_len == loop_len + L1_CACHE_BYTES)
603 flags = DO_DMA_POLLING;
604 ret = do_dma(chan, flags, src_dma_addr, dst_dma_addr,
605 loop_len, NULL);
606 if (ret < 0) {
607 printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n",
608 __func__, __LINE__, ret);
609 return ret;
610 }
611 }
612 src_offset += loop_len;
613 dst_offset += loop_len;
614 remaining_len -= loop_len;
615 }
616#ifdef CONFIG_MK1OM
617 BUG_ON(remaining_len != 0);
618#endif
619#ifdef CONFIG_ML1OM
620 if (remaining_len)
621 return - ENXIO;
622#endif
623 remaining_len = tail_len;
624 if (remaining_len) {
625 loop_len = remaining_len;
626 if (src_offset == end_src_offset) {
627 item = (&src_window->list_member)->next;
628 src_window = list_entry(item,
629 struct reg_range_t,
630 list_member);
631 }
632 if (dst_offset == end_dst_offset) {
633 item = (&dst_window->list_member)->next;
634 dst_window = list_entry(item, struct reg_range_t, list_member);
635 }
636
637 src_dma_addr = micscif_get_dma_addr(src_window, src_offset, NULL, NULL, NULL);
638 dst_dma_addr = micscif_get_dma_addr(dst_window, dst_offset, NULL, NULL, NULL);
639#ifdef CONFIG_ML1OM
640 if (RMA_ERROR_CODE == src_dma_addr)
641 return -ENXIO;
642 if (RMA_ERROR_CODE == dst_dma_addr)
643 return -ENXIO;
644#endif
645 /*
646 * The CPU copy for the tail bytes must be initiated only once previous
647 * DMA transfers for this endpoint have completed to guarantee
648 * ordering.
649 */
650 if (unlikely(work->ordered)) {
651 free_dma_channel(chan);
652 work->dma_chan_released = true;
653 if ((ret = drain_dma_poll(chan)))
654 return ret;
655 }
656#ifdef CONFIG_ML1OM
657 get_window_ref_count(src_window, 1);
658 get_window_ref_count(dst_window, 1);
659#endif
660 if (is_local_dma_addr(src_dma_addr))
661 src_virt = get_local_va(src_offset, src_window, loop_len);
662 else
663 src_virt = ioremap_remote_gtt(src_offset, src_window,
664 loop_len, work->loopback,
665 work->remote_dev, get_chan_num(chan), work);
666 if (!src_virt) {
667#ifdef CONFIG_ML1OM
668 put_window_ref_count(src_window, 1);
669 put_window_ref_count(dst_window, 1);
670#endif
671 return -ENOMEM;
672 }
673
674 if (is_local_dma_addr(dst_dma_addr))
675 dst_virt = get_local_va(dst_offset, dst_window, loop_len);
676 else
677 dst_virt = ioremap_remote_gtt(dst_offset, dst_window,
678 loop_len, work->loopback,
679 work->remote_dev, get_chan_num(chan), work);
680#ifdef CONFIG_ML1OM
681 put_window_ref_count(src_window, 1);
682 put_window_ref_count(dst_window, 1);
683#endif
684 if (!dst_virt) {
685 if (!is_local_dma_addr(src_dma_addr))
686 iounmap_remote(src_virt, loop_len, work);
687 return -ENOMEM;
688 }
689
690 if (is_local_dma_addr(src_dma_addr)){
691 micscif_unaligned_memcpy(dst_virt, src_virt, loop_len, work->ordered);
692 }
693 else{
694 memcpy_fromio(dst_virt, src_virt, loop_len);
695 }
696 serializing_request(dst_virt);
697 smp_mb();
698 if (!is_local_dma_addr(src_dma_addr))
699 iounmap_remote(src_virt, loop_len, work);
700
701 if (!is_local_dma_addr(dst_dma_addr))
702 iounmap_remote(dst_virt, loop_len, work);
703
704 remaining_len -= loop_len;
705#ifdef CONFIG_MK1OM
706 BUG_ON(remaining_len != 0);
707#endif
708#ifdef CONFIG_ML1OM
709 if (remaining_len)
710 return - ENXIO;
711#endif
712 }
713
714 return ret;
715}
716
717int micscif_rma_list_dma_copy_wrapper(struct endpt *epd, struct mic_copy_work *work, struct dma_channel *chan, off_t loffset)
718{
719 int src_cache_off, dst_cache_off;
720 uint64_t src_offset = work->src_offset, dst_offset = work->dst_offset;
721 uint8_t *temp = NULL;
722 bool src_local = true, dst_local = false;
723 struct dma_completion_cb *comp_cb;
724 dma_addr_t src_dma_addr, dst_dma_addr;
725#ifndef _MIC_SCIF_
726 struct pci_dev *pdev;
727#endif
728
729 src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
730 dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1);
731 if (dst_cache_off == src_cache_off)
732 return micscif_rma_list_dma_copy_aligned(work, chan);
733
734 if (work->loopback) {
735#ifdef _MIC_SCIF_
736 BUG_ON(micscif_rma_list_cpu_copy(work));
737 return 0;
738#else
739 BUG_ON(1);
740#endif
741 }
742
743 src_dma_addr = micscif_get_dma_addr(work->src_window, src_offset, NULL, NULL, NULL);
744 dst_dma_addr = micscif_get_dma_addr(work->dst_window, dst_offset, NULL, NULL, NULL);
745
746 if (is_local_dma_addr(src_dma_addr))
747 src_local = true;
748 else
749 src_local = false;
750
751 if (is_local_dma_addr(dst_dma_addr))
752 dst_local = true;
753 else
754 dst_local = false;
755
756 dst_local = dst_local;
757 BUG_ON(work->len + (L1_CACHE_BYTES << 1) > KMEM_UNALIGNED_BUF_SIZE);
758
759 /* Allocate dma_completion cb */
760 if (!(comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL)))
761 goto error;
762
763 work->comp_cb = comp_cb;
764 comp_cb->cb_cookie = (uint64_t)comp_cb;
765 comp_cb->dma_completion_func = &micscif_rma_completion_cb;
766
767 if (work->len + (L1_CACHE_BYTES << 1) < KMEM_UNALIGNED_BUF_SIZE) {
768 comp_cb->is_cache = false;
769 if (!(temp = kmalloc(work->len + (L1_CACHE_BYTES << 1), GFP_KERNEL)))
770 goto free_comp_cb;
771 comp_cb->temp_buf_to_free = temp;
772 /* kmalloc(..) does not guarantee cache line alignment */
773 if ((uint64_t)temp & (L1_CACHE_BYTES - 1))
774 temp = (uint8_t*)ALIGN((uint64_t)temp, L1_CACHE_BYTES);
775 } else {
776 comp_cb->is_cache = true;
777 if (!(temp = micscif_kmem_cache_alloc()))
778 goto free_comp_cb;
779 comp_cb->temp_buf_to_free = temp;
780 }
781
782 if (src_local) {
783 temp += dst_cache_off;
784 comp_cb->tmp_offset = dst_cache_off;
785 micscif_rma_local_cpu_copy(work->src_offset, work->src_window, temp, work->len, true);
786 } else {
787 comp_cb->dst_window = work->dst_window;
788 comp_cb->dst_offset = work->dst_offset;
789 work->src_offset = work->src_offset - src_cache_off;
790 comp_cb->len = work->len;
791 work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES);
792 comp_cb->header_padding = src_cache_off;
793 }
794 comp_cb->temp_buf = temp;
795
796#ifndef _MIC_SCIF_
797 micscif_pci_dev(work->remote_dev->sd_node, &pdev);
798 comp_cb->temp_phys = mic_map_single(work->remote_dev->sd_node - 1,
799 pdev, temp, KMEM_UNALIGNED_BUF_SIZE);
800
801 if (mic_map_error(comp_cb->temp_phys)) {
802 goto free_temp_buf;
803 }
804
805 comp_cb->remote_node = work->remote_dev->sd_node;
806#endif
807 if (0 > micscif_rma_list_dma_copy_unaligned(work, temp, chan, src_local))
808 goto free_temp_buf;
809 if (!src_local)
810 work->fence_type = DO_DMA_INTR;
811 return 0;
812free_temp_buf:
813 if (comp_cb->is_cache)
814 micscif_kmem_cache_free(comp_cb->temp_buf_to_free);
815 else
816 kfree(comp_cb->temp_buf_to_free);
817free_comp_cb:
818 kfree(comp_cb);
819error:
820 printk(KERN_ERR "Unable to malloc %s %d\n", __func__, __LINE__);
821 return -ENOMEM;
822}
823
824#if !defined(WINDOWS) && !defined(CONFIG_PREEMPT)
825static int softlockup_threshold = 60;
826static void avert_softlockup(unsigned long data)
827{
828 *(unsigned long*)data = 1;
829}
830
831/*
832 * Add a timer to handle the case of hogging the cpu for
833 * time > softlockup_threshold.
834 * Add the timer every softlockup_threshold / 3 so that even if
835 * there is a huge delay in running our timer, we will still don't hit
836 * the softlockup case.(softlockup_tick() is run in hardirq() context while
837 * timers are run at softirq context)
838 *
839 */
840static inline void add_softlockup_timer(struct timer_list *timer, unsigned long *data)
841{
842 setup_timer(timer, avert_softlockup, (unsigned long) data);
843 timer->expires = jiffies + usecs_to_jiffies(softlockup_threshold * 1000000 / 3);
844 add_timer(timer);
845}
846
847static inline void del_softlockup_timer(struct timer_list *timer)
848{
849 /* We need delete synchronously since the variable being touched by
850 * timer interrupt is on the stack
851 */
852 del_timer_sync(timer);
853}
854#endif
855
856/*
857 * micscif_rma_list_cpu_copy:
858 *
859 * Traverse all the windows and perform CPU copy.
860 */
861int micscif_rma_list_cpu_copy(struct mic_copy_work *work)
862{
863 void *src_virt, *dst_virt;
864 size_t loop_len, remaining_len;
865 int src_cache_off, dst_cache_off;
866 uint64_t src_offset = work->src_offset, dst_offset = work->dst_offset;
867 struct reg_range_t *src_window = work->src_window;
868 struct reg_range_t *dst_window = work->dst_window;
869 uint64_t end_src_offset, end_dst_offset;
870 struct list_head *item;
871 int srcchunk_ind = 0;
872 int dstchunk_ind = 0;
873 uint64_t src_start_offset, dst_start_offset;
874 int ret = 0;
875#if !defined(WINDOWS) && !defined(CONFIG_PREEMPT)
876 unsigned long timer_fired = 0;
877 struct timer_list timer;
878 int cpu = smp_processor_id();
879 add_softlockup_timer(&timer, &timer_fired);
880#endif
881
882 remaining_len = work->len;
883 src_start_offset = src_window->offset;
884 dst_start_offset = dst_window->offset;
885
886 while (remaining_len) {
887#if !defined(WINDOWS) && !defined(CONFIG_PREEMPT)
888 /* Ideally we should call schedule only if we didn't sleep
889 * in between. But there is no way to know that.
890 */
891 if (timer_fired) {
892 timer_fired = 0;
893 if (smp_processor_id() == cpu)
894 touch_softlockup_watchdog();
895 else
896 cpu = smp_processor_id();
897 add_softlockup_timer(&timer, &timer_fired);
898 }
899#endif
900 src_cache_off = src_offset & ~PAGE_MASK;
901 dst_cache_off = dst_offset & ~PAGE_MASK;
902 loop_len = PAGE_SIZE -
903 ((src_cache_off > dst_cache_off) ?
904 src_cache_off : dst_cache_off);
905 if (remaining_len < loop_len)
906 loop_len = remaining_len;
907
908 if (RMA_WINDOW_SELF == src_window->type)
909 src_virt = get_local_va(src_offset, src_window, loop_len);
910 else
911 src_virt = ioremap_remote(src_offset,
912 src_window, loop_len, work->loopback, work->remote_dev, &srcchunk_ind, &src_start_offset);
913 if (!src_virt) {
914 ret = -ENOMEM;
915 goto error;
916 }
917
918 if (RMA_WINDOW_SELF == dst_window->type)
919 dst_virt = get_local_va(dst_offset, dst_window, loop_len);
920 else
921 dst_virt = ioremap_remote(dst_offset,
922 dst_window, loop_len, work->loopback, work->remote_dev, &dstchunk_ind, &dst_start_offset);
923 if (!dst_virt) {
924 if (RMA_WINDOW_PEER == src_window->type)
925 iounmap_remote(src_virt, loop_len, work);
926 ret = -ENOMEM;
927 goto error;
928 }
929
930 if (work->loopback)
931 memcpy(dst_virt, src_virt, loop_len);
932 else {
933
934 if (RMA_WINDOW_SELF == src_window->type){
935 memcpy_toio(dst_virt, src_virt, loop_len);
936 }
937 else{
938 memcpy_fromio(dst_virt, src_virt, loop_len);
939 }
940 serializing_request(dst_virt);
941 smp_mb();
942 }
943 if (RMA_WINDOW_PEER == src_window->type)
944 iounmap_remote(src_virt, loop_len, work);
945
946 if (RMA_WINDOW_PEER == dst_window->type)
947 iounmap_remote(dst_virt, loop_len, work);
948
949 src_offset += loop_len;
950 dst_offset += loop_len;
951 remaining_len -= loop_len;
952 if (remaining_len) {
953 end_src_offset = src_window->offset +
954 (src_window->nr_pages << PAGE_SHIFT);
955 end_dst_offset = dst_window->offset +
956 (dst_window->nr_pages << PAGE_SHIFT);
957 if (src_offset == end_src_offset) {
958 item = (
959 &src_window->list_member)->next;
960 src_window = list_entry(item,
961 struct reg_range_t,
962 list_member);
963 srcchunk_ind = 0;
964 src_start_offset = src_window->offset;
965 }
966 if (dst_offset == end_dst_offset) {
967 item = (
968 &dst_window->list_member)->next;
969 dst_window = list_entry(item,
970 struct reg_range_t,
971 list_member);
972 dstchunk_ind = 0;
973 dst_start_offset = dst_window->offset;
974 }
975 }
976 }
977error:
978#if !defined(WINDOWS) && !defined(CONFIG_PREEMPT)
979 del_softlockup_timer(&timer);
980#endif
981 return ret;
982}