Commit | Line | Data |
---|---|---|
800f879a AT |
1 | /* |
2 | * Copyright 2010-2017 Intel Corporation. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License, version 2, | |
6 | * as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, | |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * Disclaimer: The codes contained in these modules may be specific to | |
14 | * the Intel Software Development Platform codenamed Knights Ferry, | |
15 | * and the Intel product codenamed Knights Corner, and are not backward | |
16 | * compatible with other Intel products. Additionally, Intel will NOT | |
17 | * support the codes or instruction set in future products. | |
18 | * | |
19 | * Intel offers no warranty of any kind regarding the code. This code is | |
20 | * licensed on an "AS IS" basis and Intel is not obligated to provide | |
21 | * any support, assistance, installation, training, or other services | |
22 | * of any kind. Intel is also not obligated to provide any updates, | |
23 | * enhancements or extensions. Intel specifically disclaims any warranty | |
24 | * of merchantability, non-infringement, fitness for any particular | |
25 | * purpose, and any other warranty. | |
26 | * | |
27 | * Further, Intel disclaims all liability of any kind, including but | |
28 | * not limited to liability for infringement of any proprietary rights, | |
29 | * relating to the use of the code, even if Intel is notified of the | |
30 | * possibility of such liability. Except as expressly stated in an Intel | |
31 | * license agreement provided with this code and agreed upon with Intel, | |
32 | * no license, express or implied, by estoppel or otherwise, to any | |
33 | * intellectual property rights is granted herein. | |
34 | */ | |
35 | ||
36 | #include "mic/micscif.h" | |
37 | #include "mic/micscif_smpt.h" | |
38 | #include "mic/mic_dma_api.h" | |
39 | #include "mic/micscif_kmem_cache.h" | |
40 | #include "mic/micscif_rma.h" | |
41 | #include "mic/micscif_rma_list.h" | |
42 | #if !defined(WINDOWS) && !defined(CONFIG_PREEMPT) | |
43 | #include <linux/sched.h> | |
44 | #endif | |
45 | #include <linux/highmem.h> | |
46 | #ifndef _MIC_SCIF_ | |
47 | #include "mic_common.h" | |
48 | #endif | |
c8602a9f | 49 | #include <linux/nmi.h> |
800f879a AT |
50 | |
51 | static __always_inline | |
52 | void *get_local_va(off_t off, struct reg_range_t *window, size_t len) | |
53 | { | |
54 | uint64_t page_nr = (off - window->offset) >> PAGE_SHIFT; | |
55 | off_t page_off = off & ~PAGE_MASK; | |
56 | void *va; | |
57 | ||
58 | if (RMA_WINDOW_SELF == window->type) { | |
59 | struct page **pages = window->pinned_pages->pages; | |
60 | va = (void *)((uint64_t) | |
61 | (page_address(pages[page_nr])) | page_off); | |
62 | } else { | |
63 | dma_addr_t phys = micscif_get_dma_addr(window, off, NULL, NULL, NULL); | |
64 | #ifdef CONFIG_ML1OM | |
65 | if (RMA_ERROR_CODE == phys) | |
66 | return NULL; | |
67 | #endif | |
68 | va = (void *)((uint64_t) (phys_to_virt(phys))); | |
69 | } | |
70 | return va; | |
71 | } | |
72 | ||
73 | #ifdef _MIC_SCIF_ | |
74 | static __always_inline | |
75 | void *ioremap_remote(off_t off, struct reg_range_t *window, | |
76 | size_t len, bool loopback, struct micscif_dev *dev, int *index, uint64_t *start_off) | |
77 | { | |
78 | void *ret; | |
79 | dma_addr_t phys = micscif_get_dma_addr(window, off, NULL, index, start_off); | |
80 | ||
81 | #ifdef CONFIG_ML1OM | |
82 | if (RMA_ERROR_CODE == phys) | |
83 | return NULL; | |
84 | #endif | |
85 | if (!loopback) | |
86 | ret = ioremap_nocache(phys, len); | |
87 | else | |
88 | ret = (void *)((uint64_t)phys_to_virt(phys)); | |
89 | return ret; | |
90 | } | |
91 | ||
92 | static __always_inline | |
93 | void *ioremap_remote_gtt(off_t off, struct reg_range_t *window, | |
94 | size_t len, bool loopback, struct micscif_dev *dev, int ch_num, struct mic_copy_work *work) | |
95 | { | |
96 | return ioremap_remote(off, window, len, loopback, dev, NULL, NULL); | |
97 | } | |
98 | #else | |
99 | static __always_inline | |
100 | void *ioremap_remote_gtt(off_t off, struct reg_range_t *window, | |
101 | size_t len, bool loopback, struct micscif_dev *dev, int ch_num, struct mic_copy_work *work) | |
102 | { | |
103 | void *ret; | |
104 | uint64_t page_nr = (off - window->offset) >> PAGE_SHIFT; | |
105 | off_t page_off = off & ~PAGE_MASK; | |
106 | if (!loopback) { | |
107 | dma_addr_t phys = micscif_get_dma_addr(window, off, NULL, NULL, NULL); | |
108 | /* Ideally there should be a helper to do the +/-1 */ | |
109 | ret = get_per_dev_ctx(dev->sd_node - 1)->aper.va + phys; | |
110 | } else { | |
111 | struct page **pages = ((struct reg_range_t *) | |
112 | (window->peer_window))->pinned_pages->pages; | |
113 | ret = (void *)((uint64_t)phys_to_virt(page_to_phys(pages[page_nr])) | |
114 | | page_off); | |
115 | } | |
116 | return ret; | |
117 | } | |
118 | ||
119 | static __always_inline | |
120 | void *ioremap_remote(off_t off, struct reg_range_t *window, | |
121 | size_t len, bool loopback, struct micscif_dev *dev, int *index, uint64_t *start_off) | |
122 | { | |
123 | void *ret; | |
124 | int page_nr = (int)((off - window->offset) >> PAGE_SHIFT); | |
125 | off_t page_off = off & ~PAGE_MASK; | |
126 | ||
127 | if (!loopback) { | |
128 | dma_addr_t phys; | |
129 | mic_ctx_t *mic_ctx = get_per_dev_ctx(dev->sd_node - 1); | |
130 | phys = micscif_get_dma_addr(window, off, NULL, index, start_off); | |
131 | ret = mic_ctx->aper.va + phys; | |
132 | } else { | |
133 | struct page **pages = ((struct reg_range_t *) | |
134 | (window->peer_window))->pinned_pages->pages; | |
135 | ret = (void *)((uint64_t)phys_to_virt(page_to_phys(pages[page_nr])) | |
136 | | page_off); | |
137 | } | |
138 | return ret; | |
139 | } | |
140 | #endif | |
141 | ||
142 | static __always_inline void | |
143 | iounmap_remote(void *virt, size_t size, struct mic_copy_work *work) | |
144 | { | |
145 | #ifdef _MIC_SCIF_ | |
146 | if (!work->loopback) | |
147 | iounmap(virt); | |
148 | #endif | |
149 | } | |
150 | ||
151 | /* | |
152 | * Takes care of ordering issue caused by | |
153 | * 1. Hardware: Only in the case of cpu copy from host to card because of WC memory. | |
154 | * 2. Software: If memcpy reorders copy instructions for optimization. This could happen | |
155 | * at both host and card. | |
156 | */ | |
157 | static inline void ordered_memcpy(volatile char *dst, | |
158 | const char *src, size_t count) | |
159 | { | |
160 | if (!count) | |
161 | return; | |
162 | ||
163 | memcpy_toio(dst, src, --count); | |
164 | wmb(); | |
165 | *(dst + count) = *(src + count); | |
166 | } | |
167 | ||
168 | static inline void micscif_unaligned_memcpy(volatile char *dst, | |
169 | const char *src, size_t count, bool ordered) | |
170 | { | |
171 | if (unlikely(ordered)) | |
172 | ordered_memcpy(dst, src, count); | |
173 | else | |
174 | memcpy_toio(dst, src, count); | |
175 | } | |
176 | ||
177 | /* | |
178 | * Copy between rma window and temporary buffer | |
179 | */ | |
180 | void micscif_rma_local_cpu_copy(uint64_t offset, struct reg_range_t *window, uint8_t *temp, size_t remaining_len, bool to_temp) | |
181 | { | |
182 | void *window_virt; | |
183 | size_t loop_len; | |
184 | int offset_in_page; | |
185 | uint64_t end_offset; | |
186 | struct list_head *item; | |
187 | ||
188 | BUG_ON(RMA_WINDOW_SELF != window->type); | |
189 | ||
190 | offset_in_page = offset & ~PAGE_MASK; | |
191 | loop_len = PAGE_SIZE - offset_in_page; | |
192 | ||
193 | if (remaining_len < loop_len) | |
194 | loop_len = remaining_len; | |
195 | ||
196 | if (!(window_virt = get_local_va(offset, window, loop_len))) | |
197 | return; | |
198 | if (to_temp) | |
199 | memcpy(temp, window_virt, loop_len); | |
200 | else | |
201 | memcpy(window_virt, temp, loop_len); | |
202 | ||
203 | offset += loop_len; | |
204 | temp += loop_len; | |
205 | remaining_len -= loop_len; | |
206 | ||
207 | end_offset = window->offset + | |
208 | (window->nr_pages << PAGE_SHIFT); | |
209 | while (remaining_len) { | |
210 | if (offset == end_offset) { | |
211 | item = ( | |
212 | &window->list_member)->next; | |
213 | window = list_entry(item, | |
214 | struct reg_range_t, | |
215 | list_member); | |
216 | end_offset = window->offset + | |
217 | (window->nr_pages << PAGE_SHIFT); | |
218 | } | |
219 | ||
220 | loop_len = min(PAGE_SIZE, remaining_len); | |
221 | ||
222 | if (!(window_virt = get_local_va(offset, window, loop_len))) | |
223 | return; | |
224 | ||
225 | if (to_temp) | |
226 | memcpy(temp, window_virt, loop_len); | |
227 | else | |
228 | memcpy(window_virt, temp, loop_len); | |
229 | ||
230 | offset += loop_len; | |
231 | temp += loop_len; | |
232 | remaining_len -= loop_len; | |
233 | } | |
234 | } | |
235 | ||
236 | /* | |
237 | * Comment this | |
238 | * | |
239 | */ | |
240 | static int micscif_rma_list_dma_copy_unaligned(struct mic_copy_work *work, uint8_t *temp, struct dma_channel *chan, bool src_local) | |
241 | { | |
242 | struct dma_completion_cb *comp_cb = work->comp_cb; | |
243 | dma_addr_t window_dma_addr, temp_dma_addr; | |
244 | #ifndef _MIC_SCIF_ | |
245 | dma_addr_t temp_phys = comp_cb->temp_phys; | |
246 | #endif | |
247 | size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len; | |
248 | int offset_in_page; | |
249 | uint64_t end_offset = 0, offset = 0; | |
250 | struct reg_range_t *window = NULL; | |
251 | struct list_head *item = NULL; | |
252 | int ret = 0; | |
253 | void *window_virt_addr = NULL; | |
254 | size_t tail_len = 0; | |
255 | ||
256 | if (src_local) { | |
257 | offset = work->dst_offset; | |
258 | window = work->dst_window; | |
259 | } else { | |
260 | offset = work->src_offset; | |
261 | window = work->src_window; | |
262 | } | |
263 | ||
264 | offset_in_page = offset & (L1_CACHE_BYTES - 1); | |
265 | if (offset_in_page) { | |
266 | loop_len = L1_CACHE_BYTES - offset_in_page; | |
267 | loop_len = min(loop_len, remaining_len); | |
268 | ||
269 | if (!(window_virt_addr = ioremap_remote_gtt(offset, window, loop_len, | |
270 | work->loopback, work->remote_dev, | |
271 | get_chan_num(chan), work))) | |
272 | return -ENOMEM; | |
273 | ||
274 | if (src_local) { | |
275 | micscif_unaligned_memcpy(window_virt_addr, temp, loop_len, work->ordered && | |
276 | !(remaining_len - loop_len)); | |
277 | serializing_request(window_virt_addr); | |
278 | } else { | |
279 | memcpy_fromio(temp, window_virt_addr, loop_len); | |
280 | serializing_request(temp); | |
281 | } | |
282 | #ifdef RMA_DEBUG | |
283 | atomic_long_add_return(loop_len, &ms_info.rma_unaligned_cpu_cnt); | |
284 | #endif | |
285 | smp_mb(); | |
286 | iounmap_remote(window_virt_addr, loop_len, work); | |
287 | ||
288 | offset += loop_len; | |
289 | temp += loop_len; | |
290 | #ifndef _MIC_SCIF_ | |
291 | temp_phys += loop_len; | |
292 | #endif | |
293 | remaining_len -= loop_len; | |
294 | } | |
295 | ||
296 | offset_in_page = offset & ~PAGE_MASK; | |
297 | end_offset = window->offset + | |
298 | (window->nr_pages << PAGE_SHIFT); | |
299 | ||
300 | tail_len = remaining_len & (L1_CACHE_BYTES - 1); | |
301 | remaining_len -= tail_len; | |
302 | while (remaining_len) { | |
303 | if (offset == end_offset) { | |
304 | item = (&window->list_member)->next; | |
305 | window = list_entry(item, | |
306 | struct reg_range_t, | |
307 | list_member); | |
308 | end_offset = window->offset + | |
309 | (window->nr_pages << PAGE_SHIFT); | |
310 | } | |
311 | #ifndef _MIC_SCIF_ | |
312 | temp_dma_addr = temp_phys; | |
313 | #else | |
314 | temp_dma_addr = (dma_addr_t)virt_to_phys(temp); | |
315 | #endif | |
316 | window_dma_addr = micscif_get_dma_addr(window, offset, &nr_contig_bytes, NULL, NULL); | |
317 | ||
318 | #ifdef CONFIG_ML1OM | |
319 | if (RMA_ERROR_CODE == window_dma_addr) | |
320 | return -ENXIO; | |
321 | #endif | |
322 | loop_len = min(nr_contig_bytes, remaining_len); | |
323 | ||
324 | if (src_local) { | |
325 | if (unlikely(work->ordered && !tail_len && | |
326 | !(remaining_len - loop_len) && | |
327 | loop_len != L1_CACHE_BYTES)) { | |
328 | /* | |
329 | * Break up the last chunk of the transfer into two steps | |
330 | * if there is no tail to gurantee DMA ordering. | |
331 | * Passing DO_DMA_POLLING inserts a status update descriptor | |
332 | * in step 1 which acts as a double sided synchronization | |
333 | * fence for the DMA engine to ensure that the last cache line | |
334 | * in step 2 is updated last. | |
335 | */ | |
336 | /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */ | |
337 | ret = do_dma(chan, DO_DMA_POLLING, temp_dma_addr, window_dma_addr, | |
338 | loop_len - L1_CACHE_BYTES, NULL); | |
339 | if (ret < 0) { | |
340 | printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", | |
341 | __func__, __LINE__, ret); | |
342 | return ret; | |
343 | } | |
344 | offset += (loop_len - L1_CACHE_BYTES); | |
345 | temp_dma_addr += (loop_len - L1_CACHE_BYTES); | |
346 | window_dma_addr += (loop_len - L1_CACHE_BYTES); | |
347 | remaining_len -= (loop_len - L1_CACHE_BYTES); | |
348 | loop_len = remaining_len; | |
349 | ||
350 | /* Step 2) DMA: L1_CACHE_BYTES */ | |
351 | ret = do_dma(chan, 0, temp_dma_addr, window_dma_addr, | |
352 | loop_len, NULL); | |
353 | if (ret < 0) { | |
354 | printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", | |
355 | __func__, __LINE__, ret); | |
356 | return ret; | |
357 | } | |
358 | } else { | |
359 | int flags = 0; | |
360 | if (remaining_len == loop_len + L1_CACHE_BYTES) | |
361 | flags = DO_DMA_POLLING; | |
362 | ret = do_dma(chan, flags, temp_dma_addr, window_dma_addr, | |
363 | loop_len, NULL); | |
364 | } | |
365 | } else { | |
366 | ret = do_dma(chan, 0, window_dma_addr, temp_dma_addr, | |
367 | loop_len, NULL); | |
368 | } | |
369 | if (ret < 0) { | |
370 | printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", | |
371 | __func__, __LINE__, ret); | |
372 | return ret; | |
373 | } | |
374 | offset += loop_len; | |
375 | temp += loop_len; | |
376 | #ifndef _MIC_SCIF_ | |
377 | temp_phys += loop_len; | |
378 | #endif | |
379 | remaining_len -= loop_len; | |
380 | offset_in_page = 0; | |
381 | } | |
382 | if (tail_len) { | |
383 | if (offset == end_offset) { | |
384 | item = (&window->list_member)->next; | |
385 | window = list_entry(item, | |
386 | struct reg_range_t, | |
387 | list_member); | |
388 | end_offset = window->offset + | |
389 | (window->nr_pages << PAGE_SHIFT); | |
390 | } | |
391 | if (!(window_virt_addr = ioremap_remote_gtt(offset, window, tail_len, | |
392 | work->loopback, work->remote_dev, | |
393 | get_chan_num(chan), work))) | |
394 | return -ENOMEM; | |
395 | ||
396 | /* | |
397 | * The CPU copy for the tail bytes must be initiated only once previous | |
398 | * DMA transfers for this endpoint have completed to guarantee | |
399 | * ordering. | |
400 | */ | |
401 | if (unlikely(work->ordered)) { | |
402 | free_dma_channel(chan); | |
403 | work->dma_chan_released = true; | |
404 | if ((ret = drain_dma_intr(chan))) | |
405 | return ret; | |
406 | } | |
407 | ||
408 | if (src_local) { | |
409 | micscif_unaligned_memcpy(window_virt_addr, temp, tail_len, work->ordered); | |
410 | serializing_request(window_virt_addr); | |
411 | } else { | |
412 | memcpy_fromio(temp, window_virt_addr, tail_len); | |
413 | serializing_request(temp); | |
414 | } | |
415 | #ifdef RMA_DEBUG | |
416 | atomic_long_add_return(tail_len, &ms_info.rma_unaligned_cpu_cnt); | |
417 | #endif | |
418 | smp_mb(); | |
419 | iounmap_remote(window_virt_addr, tail_len, work); | |
420 | } | |
421 | if (work->dma_chan_released) { | |
422 | if ((ret = request_dma_channel(chan))) | |
423 | return ret; | |
424 | /* Callee frees the DMA channel lock, if it is held */ | |
425 | work->dma_chan_released = false; | |
426 | } | |
427 | ret = do_dma(chan, DO_DMA_INTR, 0, 0, 0, comp_cb); | |
428 | if (ret < 0) { | |
429 | printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", | |
430 | __func__, __LINE__, ret); | |
431 | return ret; | |
432 | } | |
433 | return 0; | |
434 | } | |
435 | ||
436 | static inline bool is_local_dma_addr(uint64_t addr) | |
437 | { | |
438 | #ifdef _MIC_SCIF_ | |
439 | return (addr >> PAGE_SHIFT < num_physpages); | |
440 | #else | |
441 | return is_syspa(addr); | |
442 | #endif | |
443 | } | |
444 | ||
445 | /* | |
446 | * micscif_rma_list_dma_copy_aligned: | |
447 | * | |
448 | * Traverse all the windows and perform DMA copy. | |
449 | */ | |
450 | static int micscif_rma_list_dma_copy_aligned(struct mic_copy_work *work, struct dma_channel *chan) | |
451 | { | |
452 | dma_addr_t src_dma_addr, dst_dma_addr; | |
453 | size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0, dst_contig_bytes = 0; | |
454 | int src_cache_off, dst_cache_off, src_last_index = 0, dst_last_index = 0; | |
455 | uint64_t end_src_offset, end_dst_offset; | |
456 | void *src_virt, *dst_virt; | |
457 | struct reg_range_t *src_window = work->src_window; | |
458 | struct reg_range_t *dst_window = work->dst_window; | |
459 | uint64_t src_offset = work->src_offset, dst_offset = work->dst_offset; | |
460 | uint64_t src_start_offset = src_window->offset, dst_start_offset = dst_window->offset; | |
461 | struct list_head *item; | |
462 | int ret = 0; | |
463 | ||
464 | remaining_len = work->len; | |
465 | ||
466 | src_cache_off = src_offset & (L1_CACHE_BYTES - 1); | |
467 | dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1); | |
468 | if (src_cache_off != dst_cache_off) { | |
469 | BUG_ON(1); | |
470 | } else if (src_cache_off != 0) { | |
471 | /* Head */ | |
472 | loop_len = L1_CACHE_BYTES - src_cache_off; | |
473 | loop_len = min(loop_len, remaining_len); | |
474 | src_dma_addr = micscif_get_dma_addr(src_window, src_offset, NULL, NULL, NULL); | |
475 | dst_dma_addr = micscif_get_dma_addr(dst_window, dst_offset, NULL, NULL, NULL); | |
476 | #ifdef CONFIG_ML1OM | |
477 | if (RMA_ERROR_CODE == src_dma_addr) | |
478 | return -ENXIO; | |
479 | if (RMA_ERROR_CODE == dst_dma_addr) | |
480 | return -ENXIO; | |
481 | get_window_ref_count(src_window, 1); | |
482 | get_window_ref_count(dst_window, 1); | |
483 | #endif | |
484 | if (is_local_dma_addr(src_dma_addr)) | |
485 | src_virt = get_local_va(src_offset, src_window, loop_len); | |
486 | else | |
487 | src_virt = ioremap_remote_gtt(src_offset, src_window, | |
488 | loop_len, work->loopback, | |
489 | work->remote_dev, get_chan_num(chan), work); | |
490 | if (!src_virt) { | |
491 | #ifdef CONFIG_ML1OM | |
492 | put_window_ref_count(src_window, 1); | |
493 | put_window_ref_count(dst_window, 1); | |
494 | #endif | |
495 | return -ENOMEM; | |
496 | } | |
497 | if (is_local_dma_addr(dst_dma_addr)) | |
498 | dst_virt = get_local_va(dst_offset, dst_window, loop_len); | |
499 | else | |
500 | dst_virt = ioremap_remote_gtt(dst_offset, dst_window, | |
501 | loop_len, work->loopback, | |
502 | work->remote_dev, get_chan_num(chan), work); | |
503 | #ifdef CONFIG_ML1OM | |
504 | put_window_ref_count(src_window, 1); | |
505 | put_window_ref_count(dst_window, 1); | |
506 | #endif | |
507 | if (!dst_virt) { | |
508 | if (!is_local_dma_addr(src_dma_addr)) | |
509 | iounmap_remote(src_virt, loop_len, work); | |
510 | return -ENOMEM; | |
511 | } | |
512 | if (is_local_dma_addr(src_dma_addr)){ | |
513 | micscif_unaligned_memcpy(dst_virt, src_virt, loop_len, | |
514 | remaining_len == loop_len ? work->ordered : false); | |
515 | } | |
516 | else{ | |
517 | memcpy_fromio(dst_virt, src_virt, loop_len); | |
518 | } | |
519 | serializing_request(dst_virt); | |
520 | smp_mb(); | |
521 | if (!is_local_dma_addr(src_dma_addr)) | |
522 | iounmap_remote(src_virt, loop_len, work); | |
523 | if (!is_local_dma_addr(dst_dma_addr)) | |
524 | iounmap_remote(dst_virt, loop_len, work); | |
525 | src_offset += loop_len; | |
526 | dst_offset += loop_len; | |
527 | remaining_len -= loop_len; | |
528 | } | |
529 | ||
530 | end_src_offset = src_window->offset + | |
531 | (src_window->nr_pages << PAGE_SHIFT); | |
532 | end_dst_offset = dst_window->offset + | |
533 | (dst_window->nr_pages << PAGE_SHIFT); | |
534 | tail_len = remaining_len & (L1_CACHE_BYTES - 1); | |
535 | remaining_len -= tail_len; | |
536 | while (remaining_len) { | |
537 | if (src_offset == end_src_offset) { | |
538 | item = (&src_window->list_member)->next; | |
539 | src_window = list_entry(item, | |
540 | struct reg_range_t, | |
541 | list_member); | |
542 | end_src_offset = src_window->offset + | |
543 | (src_window->nr_pages << PAGE_SHIFT); | |
544 | src_last_index = 0; | |
545 | src_start_offset = src_window->offset; | |
546 | } | |
547 | if (dst_offset == end_dst_offset) { | |
548 | item = (&dst_window->list_member)->next; | |
549 | dst_window = list_entry(item, struct reg_range_t, list_member); | |
550 | end_dst_offset = dst_window->offset + | |
551 | (dst_window->nr_pages << PAGE_SHIFT); | |
552 | dst_last_index = 0; | |
553 | dst_start_offset = dst_window->offset; | |
554 | } | |
555 | ||
556 | /* compute dma addresses for transfer */ | |
557 | src_dma_addr = micscif_get_dma_addr(src_window, src_offset, &src_contig_bytes, &src_last_index, &src_start_offset); | |
558 | dst_dma_addr = micscif_get_dma_addr(dst_window, dst_offset, &dst_contig_bytes, &dst_last_index, &dst_start_offset); | |
559 | #ifdef CONFIG_ML1OM | |
560 | if (RMA_ERROR_CODE == src_dma_addr) | |
561 | return -ENXIO; | |
562 | if (RMA_ERROR_CODE == dst_dma_addr) | |
563 | return -ENXIO; | |
564 | #endif | |
565 | loop_len = min(src_contig_bytes, dst_contig_bytes); | |
566 | loop_len = min(loop_len, remaining_len); | |
567 | if (unlikely(work->ordered && !tail_len && | |
568 | !(remaining_len - loop_len) && | |
569 | loop_len != L1_CACHE_BYTES)) { | |
570 | /* | |
571 | * Break up the last chunk of the transfer into two steps | |
572 | * if there is no tail to gurantee DMA ordering. | |
573 | * Passing DO_DMA_POLLING inserts a status update descriptor | |
574 | * in step 1 which acts as a double sided synchronization | |
575 | * fence for the DMA engine to ensure that the last cache line | |
576 | * in step 2 is updated last. | |
577 | */ | |
578 | /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */ | |
579 | ret = do_dma(chan, DO_DMA_POLLING, src_dma_addr, dst_dma_addr, | |
580 | loop_len - L1_CACHE_BYTES, NULL); | |
581 | if (ret < 0) { | |
582 | printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", | |
583 | __func__, __LINE__, ret); | |
584 | return ret; | |
585 | } | |
586 | src_offset += (loop_len - L1_CACHE_BYTES); | |
587 | dst_offset += (loop_len - L1_CACHE_BYTES); | |
588 | src_dma_addr += (loop_len - L1_CACHE_BYTES); | |
589 | dst_dma_addr += (loop_len - L1_CACHE_BYTES); | |
590 | remaining_len -= (loop_len - L1_CACHE_BYTES); | |
591 | loop_len = remaining_len; | |
592 | ||
593 | /* Step 2) DMA: L1_CACHE_BYTES */ | |
594 | ret = do_dma(chan, 0, src_dma_addr, dst_dma_addr, | |
595 | loop_len, NULL); | |
596 | if (ret < 0) { | |
597 | printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", | |
598 | __func__, __LINE__, ret); | |
599 | return ret; | |
600 | } | |
601 | } else { | |
602 | int flags = 0; | |
603 | if (remaining_len == loop_len + L1_CACHE_BYTES) | |
604 | flags = DO_DMA_POLLING; | |
605 | ret = do_dma(chan, flags, src_dma_addr, dst_dma_addr, | |
606 | loop_len, NULL); | |
607 | if (ret < 0) { | |
608 | printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", | |
609 | __func__, __LINE__, ret); | |
610 | return ret; | |
611 | } | |
612 | } | |
613 | src_offset += loop_len; | |
614 | dst_offset += loop_len; | |
615 | remaining_len -= loop_len; | |
616 | } | |
617 | #ifdef CONFIG_MK1OM | |
618 | BUG_ON(remaining_len != 0); | |
619 | #endif | |
620 | #ifdef CONFIG_ML1OM | |
621 | if (remaining_len) | |
622 | return - ENXIO; | |
623 | #endif | |
624 | remaining_len = tail_len; | |
625 | if (remaining_len) { | |
626 | loop_len = remaining_len; | |
627 | if (src_offset == end_src_offset) { | |
628 | item = (&src_window->list_member)->next; | |
629 | src_window = list_entry(item, | |
630 | struct reg_range_t, | |
631 | list_member); | |
632 | } | |
633 | if (dst_offset == end_dst_offset) { | |
634 | item = (&dst_window->list_member)->next; | |
635 | dst_window = list_entry(item, struct reg_range_t, list_member); | |
636 | } | |
637 | ||
638 | src_dma_addr = micscif_get_dma_addr(src_window, src_offset, NULL, NULL, NULL); | |
639 | dst_dma_addr = micscif_get_dma_addr(dst_window, dst_offset, NULL, NULL, NULL); | |
640 | #ifdef CONFIG_ML1OM | |
641 | if (RMA_ERROR_CODE == src_dma_addr) | |
642 | return -ENXIO; | |
643 | if (RMA_ERROR_CODE == dst_dma_addr) | |
644 | return -ENXIO; | |
645 | #endif | |
646 | /* | |
647 | * The CPU copy for the tail bytes must be initiated only once previous | |
648 | * DMA transfers for this endpoint have completed to guarantee | |
649 | * ordering. | |
650 | */ | |
651 | if (unlikely(work->ordered)) { | |
652 | free_dma_channel(chan); | |
653 | work->dma_chan_released = true; | |
654 | if ((ret = drain_dma_poll(chan))) | |
655 | return ret; | |
656 | } | |
657 | #ifdef CONFIG_ML1OM | |
658 | get_window_ref_count(src_window, 1); | |
659 | get_window_ref_count(dst_window, 1); | |
660 | #endif | |
661 | if (is_local_dma_addr(src_dma_addr)) | |
662 | src_virt = get_local_va(src_offset, src_window, loop_len); | |
663 | else | |
664 | src_virt = ioremap_remote_gtt(src_offset, src_window, | |
665 | loop_len, work->loopback, | |
666 | work->remote_dev, get_chan_num(chan), work); | |
667 | if (!src_virt) { | |
668 | #ifdef CONFIG_ML1OM | |
669 | put_window_ref_count(src_window, 1); | |
670 | put_window_ref_count(dst_window, 1); | |
671 | #endif | |
672 | return -ENOMEM; | |
673 | } | |
674 | ||
675 | if (is_local_dma_addr(dst_dma_addr)) | |
676 | dst_virt = get_local_va(dst_offset, dst_window, loop_len); | |
677 | else | |
678 | dst_virt = ioremap_remote_gtt(dst_offset, dst_window, | |
679 | loop_len, work->loopback, | |
680 | work->remote_dev, get_chan_num(chan), work); | |
681 | #ifdef CONFIG_ML1OM | |
682 | put_window_ref_count(src_window, 1); | |
683 | put_window_ref_count(dst_window, 1); | |
684 | #endif | |
685 | if (!dst_virt) { | |
686 | if (!is_local_dma_addr(src_dma_addr)) | |
687 | iounmap_remote(src_virt, loop_len, work); | |
688 | return -ENOMEM; | |
689 | } | |
690 | ||
691 | if (is_local_dma_addr(src_dma_addr)){ | |
692 | micscif_unaligned_memcpy(dst_virt, src_virt, loop_len, work->ordered); | |
693 | } | |
694 | else{ | |
695 | memcpy_fromio(dst_virt, src_virt, loop_len); | |
696 | } | |
697 | serializing_request(dst_virt); | |
698 | smp_mb(); | |
699 | if (!is_local_dma_addr(src_dma_addr)) | |
700 | iounmap_remote(src_virt, loop_len, work); | |
701 | ||
702 | if (!is_local_dma_addr(dst_dma_addr)) | |
703 | iounmap_remote(dst_virt, loop_len, work); | |
704 | ||
705 | remaining_len -= loop_len; | |
706 | #ifdef CONFIG_MK1OM | |
707 | BUG_ON(remaining_len != 0); | |
708 | #endif | |
709 | #ifdef CONFIG_ML1OM | |
710 | if (remaining_len) | |
711 | return - ENXIO; | |
712 | #endif | |
713 | } | |
714 | ||
715 | return ret; | |
716 | } | |
717 | ||
718 | int micscif_rma_list_dma_copy_wrapper(struct endpt *epd, struct mic_copy_work *work, struct dma_channel *chan, off_t loffset) | |
719 | { | |
720 | int src_cache_off, dst_cache_off; | |
721 | uint64_t src_offset = work->src_offset, dst_offset = work->dst_offset; | |
722 | uint8_t *temp = NULL; | |
723 | bool src_local = true, dst_local = false; | |
724 | struct dma_completion_cb *comp_cb; | |
725 | dma_addr_t src_dma_addr, dst_dma_addr; | |
726 | #ifndef _MIC_SCIF_ | |
727 | struct pci_dev *pdev; | |
728 | #endif | |
729 | ||
730 | src_cache_off = src_offset & (L1_CACHE_BYTES - 1); | |
731 | dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1); | |
732 | if (dst_cache_off == src_cache_off) | |
733 | return micscif_rma_list_dma_copy_aligned(work, chan); | |
734 | ||
735 | if (work->loopback) { | |
736 | #ifdef _MIC_SCIF_ | |
737 | BUG_ON(micscif_rma_list_cpu_copy(work)); | |
738 | return 0; | |
739 | #else | |
740 | BUG_ON(1); | |
741 | #endif | |
742 | } | |
743 | ||
744 | src_dma_addr = micscif_get_dma_addr(work->src_window, src_offset, NULL, NULL, NULL); | |
745 | dst_dma_addr = micscif_get_dma_addr(work->dst_window, dst_offset, NULL, NULL, NULL); | |
746 | ||
747 | if (is_local_dma_addr(src_dma_addr)) | |
748 | src_local = true; | |
749 | else | |
750 | src_local = false; | |
751 | ||
752 | if (is_local_dma_addr(dst_dma_addr)) | |
753 | dst_local = true; | |
754 | else | |
755 | dst_local = false; | |
756 | ||
757 | dst_local = dst_local; | |
758 | BUG_ON(work->len + (L1_CACHE_BYTES << 1) > KMEM_UNALIGNED_BUF_SIZE); | |
759 | ||
760 | /* Allocate dma_completion cb */ | |
761 | if (!(comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL))) | |
762 | goto error; | |
763 | ||
764 | work->comp_cb = comp_cb; | |
765 | comp_cb->cb_cookie = (uint64_t)comp_cb; | |
766 | comp_cb->dma_completion_func = &micscif_rma_completion_cb; | |
767 | ||
768 | if (work->len + (L1_CACHE_BYTES << 1) < KMEM_UNALIGNED_BUF_SIZE) { | |
769 | comp_cb->is_cache = false; | |
770 | if (!(temp = kmalloc(work->len + (L1_CACHE_BYTES << 1), GFP_KERNEL))) | |
771 | goto free_comp_cb; | |
772 | comp_cb->temp_buf_to_free = temp; | |
773 | /* kmalloc(..) does not guarantee cache line alignment */ | |
774 | if ((uint64_t)temp & (L1_CACHE_BYTES - 1)) | |
775 | temp = (uint8_t*)ALIGN((uint64_t)temp, L1_CACHE_BYTES); | |
776 | } else { | |
777 | comp_cb->is_cache = true; | |
778 | if (!(temp = micscif_kmem_cache_alloc())) | |
779 | goto free_comp_cb; | |
780 | comp_cb->temp_buf_to_free = temp; | |
781 | } | |
782 | ||
783 | if (src_local) { | |
784 | temp += dst_cache_off; | |
785 | comp_cb->tmp_offset = dst_cache_off; | |
786 | micscif_rma_local_cpu_copy(work->src_offset, work->src_window, temp, work->len, true); | |
787 | } else { | |
788 | comp_cb->dst_window = work->dst_window; | |
789 | comp_cb->dst_offset = work->dst_offset; | |
790 | work->src_offset = work->src_offset - src_cache_off; | |
791 | comp_cb->len = work->len; | |
792 | work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES); | |
793 | comp_cb->header_padding = src_cache_off; | |
794 | } | |
795 | comp_cb->temp_buf = temp; | |
796 | ||
797 | #ifndef _MIC_SCIF_ | |
798 | micscif_pci_dev(work->remote_dev->sd_node, &pdev); | |
799 | comp_cb->temp_phys = mic_map_single(work->remote_dev->sd_node - 1, | |
800 | pdev, temp, KMEM_UNALIGNED_BUF_SIZE); | |
801 | ||
802 | if (mic_map_error(comp_cb->temp_phys)) { | |
803 | goto free_temp_buf; | |
804 | } | |
805 | ||
806 | comp_cb->remote_node = work->remote_dev->sd_node; | |
807 | #endif | |
808 | if (0 > micscif_rma_list_dma_copy_unaligned(work, temp, chan, src_local)) | |
809 | goto free_temp_buf; | |
810 | if (!src_local) | |
811 | work->fence_type = DO_DMA_INTR; | |
812 | return 0; | |
813 | free_temp_buf: | |
814 | if (comp_cb->is_cache) | |
815 | micscif_kmem_cache_free(comp_cb->temp_buf_to_free); | |
816 | else | |
817 | kfree(comp_cb->temp_buf_to_free); | |
818 | free_comp_cb: | |
819 | kfree(comp_cb); | |
820 | error: | |
821 | printk(KERN_ERR "Unable to malloc %s %d\n", __func__, __LINE__); | |
822 | return -ENOMEM; | |
823 | } | |
824 | ||
825 | #if !defined(WINDOWS) && !defined(CONFIG_PREEMPT) | |
826 | static int softlockup_threshold = 60; | |
827 | static void avert_softlockup(unsigned long data) | |
828 | { | |
829 | *(unsigned long*)data = 1; | |
830 | } | |
831 | ||
832 | /* | |
833 | * Add a timer to handle the case of hogging the cpu for | |
834 | * time > softlockup_threshold. | |
835 | * Add the timer every softlockup_threshold / 3 so that even if | |
836 | * there is a huge delay in running our timer, we will still don't hit | |
837 | * the softlockup case.(softlockup_tick() is run in hardirq() context while | |
838 | * timers are run at softirq context) | |
839 | * | |
840 | */ | |
841 | static inline void add_softlockup_timer(struct timer_list *timer, unsigned long *data) | |
842 | { | |
843 | setup_timer(timer, avert_softlockup, (unsigned long) data); | |
844 | timer->expires = jiffies + usecs_to_jiffies(softlockup_threshold * 1000000 / 3); | |
845 | add_timer(timer); | |
846 | } | |
847 | ||
848 | static inline void del_softlockup_timer(struct timer_list *timer) | |
849 | { | |
850 | /* We need delete synchronously since the variable being touched by | |
851 | * timer interrupt is on the stack | |
852 | */ | |
853 | del_timer_sync(timer); | |
854 | } | |
855 | #endif | |
856 | ||
857 | /* | |
858 | * micscif_rma_list_cpu_copy: | |
859 | * | |
860 | * Traverse all the windows and perform CPU copy. | |
861 | */ | |
862 | int micscif_rma_list_cpu_copy(struct mic_copy_work *work) | |
863 | { | |
864 | void *src_virt, *dst_virt; | |
865 | size_t loop_len, remaining_len; | |
866 | int src_cache_off, dst_cache_off; | |
867 | uint64_t src_offset = work->src_offset, dst_offset = work->dst_offset; | |
868 | struct reg_range_t *src_window = work->src_window; | |
869 | struct reg_range_t *dst_window = work->dst_window; | |
870 | uint64_t end_src_offset, end_dst_offset; | |
871 | struct list_head *item; | |
872 | int srcchunk_ind = 0; | |
873 | int dstchunk_ind = 0; | |
874 | uint64_t src_start_offset, dst_start_offset; | |
875 | int ret = 0; | |
876 | #if !defined(WINDOWS) && !defined(CONFIG_PREEMPT) | |
877 | unsigned long timer_fired = 0; | |
878 | struct timer_list timer; | |
879 | int cpu = smp_processor_id(); | |
880 | add_softlockup_timer(&timer, &timer_fired); | |
881 | #endif | |
882 | ||
883 | remaining_len = work->len; | |
884 | src_start_offset = src_window->offset; | |
885 | dst_start_offset = dst_window->offset; | |
886 | ||
887 | while (remaining_len) { | |
888 | #if !defined(WINDOWS) && !defined(CONFIG_PREEMPT) | |
889 | /* Ideally we should call schedule only if we didn't sleep | |
890 | * in between. But there is no way to know that. | |
891 | */ | |
892 | if (timer_fired) { | |
893 | timer_fired = 0; | |
894 | if (smp_processor_id() == cpu) | |
895 | touch_softlockup_watchdog(); | |
896 | else | |
897 | cpu = smp_processor_id(); | |
898 | add_softlockup_timer(&timer, &timer_fired); | |
899 | } | |
900 | #endif | |
901 | src_cache_off = src_offset & ~PAGE_MASK; | |
902 | dst_cache_off = dst_offset & ~PAGE_MASK; | |
903 | loop_len = PAGE_SIZE - | |
904 | ((src_cache_off > dst_cache_off) ? | |
905 | src_cache_off : dst_cache_off); | |
906 | if (remaining_len < loop_len) | |
907 | loop_len = remaining_len; | |
908 | ||
909 | if (RMA_WINDOW_SELF == src_window->type) | |
910 | src_virt = get_local_va(src_offset, src_window, loop_len); | |
911 | else | |
912 | src_virt = ioremap_remote(src_offset, | |
913 | src_window, loop_len, work->loopback, work->remote_dev, &srcchunk_ind, &src_start_offset); | |
914 | if (!src_virt) { | |
915 | ret = -ENOMEM; | |
916 | goto error; | |
917 | } | |
918 | ||
919 | if (RMA_WINDOW_SELF == dst_window->type) | |
920 | dst_virt = get_local_va(dst_offset, dst_window, loop_len); | |
921 | else | |
922 | dst_virt = ioremap_remote(dst_offset, | |
923 | dst_window, loop_len, work->loopback, work->remote_dev, &dstchunk_ind, &dst_start_offset); | |
924 | if (!dst_virt) { | |
925 | if (RMA_WINDOW_PEER == src_window->type) | |
926 | iounmap_remote(src_virt, loop_len, work); | |
927 | ret = -ENOMEM; | |
928 | goto error; | |
929 | } | |
930 | ||
931 | if (work->loopback) | |
932 | memcpy(dst_virt, src_virt, loop_len); | |
933 | else { | |
934 | ||
935 | if (RMA_WINDOW_SELF == src_window->type){ | |
936 | memcpy_toio(dst_virt, src_virt, loop_len); | |
937 | } | |
938 | else{ | |
939 | memcpy_fromio(dst_virt, src_virt, loop_len); | |
940 | } | |
941 | serializing_request(dst_virt); | |
942 | smp_mb(); | |
943 | } | |
944 | if (RMA_WINDOW_PEER == src_window->type) | |
945 | iounmap_remote(src_virt, loop_len, work); | |
946 | ||
947 | if (RMA_WINDOW_PEER == dst_window->type) | |
948 | iounmap_remote(dst_virt, loop_len, work); | |
949 | ||
950 | src_offset += loop_len; | |
951 | dst_offset += loop_len; | |
952 | remaining_len -= loop_len; | |
953 | if (remaining_len) { | |
954 | end_src_offset = src_window->offset + | |
955 | (src_window->nr_pages << PAGE_SHIFT); | |
956 | end_dst_offset = dst_window->offset + | |
957 | (dst_window->nr_pages << PAGE_SHIFT); | |
958 | if (src_offset == end_src_offset) { | |
959 | item = ( | |
960 | &src_window->list_member)->next; | |
961 | src_window = list_entry(item, | |
962 | struct reg_range_t, | |
963 | list_member); | |
964 | srcchunk_ind = 0; | |
965 | src_start_offset = src_window->offset; | |
966 | } | |
967 | if (dst_offset == end_dst_offset) { | |
968 | item = ( | |
969 | &dst_window->list_member)->next; | |
970 | dst_window = list_entry(item, | |
971 | struct reg_range_t, | |
972 | list_member); | |
973 | dstchunk_ind = 0; | |
974 | dst_start_offset = dst_window->offset; | |
975 | } | |
976 | } | |
977 | } | |
978 | error: | |
979 | #if !defined(WINDOWS) && !defined(CONFIG_PREEMPT) | |
980 | del_softlockup_timer(&timer); | |
981 | #endif | |
982 | return ret; | |
983 | } |