Commit | Line | Data |
---|---|---|
800f879a AT |
1 | /* |
2 | * Copyright 2010-2017 Intel Corporation. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License, version 2, | |
6 | * as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, | |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * Disclaimer: The codes contained in these modules may be specific to | |
14 | * the Intel Software Development Platform codenamed Knights Ferry, | |
15 | * and the Intel product codenamed Knights Corner, and are not backward | |
16 | * compatible with other Intel products. Additionally, Intel will NOT | |
17 | * support the codes or instruction set in future products. | |
18 | * | |
19 | * Intel offers no warranty of any kind regarding the code. This code is | |
20 | * licensed on an "AS IS" basis and Intel is not obligated to provide | |
21 | * any support, assistance, installation, training, or other services | |
22 | * of any kind. Intel is also not obligated to provide any updates, | |
23 | * enhancements or extensions. Intel specifically disclaims any warranty | |
24 | * of merchantability, non-infringement, fitness for any particular | |
25 | * purpose, and any other warranty. | |
26 | * | |
27 | * Further, Intel disclaims all liability of any kind, including but | |
28 | * not limited to liability for infringement of any proprietary rights, | |
29 | * relating to the use of the code, even if Intel is notified of the | |
30 | * possibility of such liability. Except as expressly stated in an Intel | |
31 | * license agreement provided with this code and agreed upon with Intel, | |
32 | * no license, express or implied, by estoppel or otherwise, to any | |
33 | * intellectual property rights is granted herein. | |
34 | */ | |
35 | ||
36 | #include "mic/micscif.h" | |
37 | #include "mic/micscif_smpt.h" | |
38 | #include "mic/mic_dma_api.h" | |
39 | #include "mic/micscif_kmem_cache.h" | |
40 | #include "mic/micscif_rma.h" | |
41 | #include "mic/micscif_rma_list.h" | |
42 | #if !defined(WINDOWS) && !defined(CONFIG_PREEMPT) | |
43 | #include <linux/sched.h> | |
44 | #endif | |
45 | #include <linux/highmem.h> | |
46 | #ifndef _MIC_SCIF_ | |
47 | #include "mic_common.h" | |
48 | #endif | |
49 | ||
50 | static __always_inline | |
51 | void *get_local_va(off_t off, struct reg_range_t *window, size_t len) | |
52 | { | |
53 | uint64_t page_nr = (off - window->offset) >> PAGE_SHIFT; | |
54 | off_t page_off = off & ~PAGE_MASK; | |
55 | void *va; | |
56 | ||
57 | if (RMA_WINDOW_SELF == window->type) { | |
58 | struct page **pages = window->pinned_pages->pages; | |
59 | va = (void *)((uint64_t) | |
60 | (page_address(pages[page_nr])) | page_off); | |
61 | } else { | |
62 | dma_addr_t phys = micscif_get_dma_addr(window, off, NULL, NULL, NULL); | |
63 | #ifdef CONFIG_ML1OM | |
64 | if (RMA_ERROR_CODE == phys) | |
65 | return NULL; | |
66 | #endif | |
67 | va = (void *)((uint64_t) (phys_to_virt(phys))); | |
68 | } | |
69 | return va; | |
70 | } | |
71 | ||
72 | #ifdef _MIC_SCIF_ | |
73 | static __always_inline | |
74 | void *ioremap_remote(off_t off, struct reg_range_t *window, | |
75 | size_t len, bool loopback, struct micscif_dev *dev, int *index, uint64_t *start_off) | |
76 | { | |
77 | void *ret; | |
78 | dma_addr_t phys = micscif_get_dma_addr(window, off, NULL, index, start_off); | |
79 | ||
80 | #ifdef CONFIG_ML1OM | |
81 | if (RMA_ERROR_CODE == phys) | |
82 | return NULL; | |
83 | #endif | |
84 | if (!loopback) | |
85 | ret = ioremap_nocache(phys, len); | |
86 | else | |
87 | ret = (void *)((uint64_t)phys_to_virt(phys)); | |
88 | return ret; | |
89 | } | |
90 | ||
91 | static __always_inline | |
92 | void *ioremap_remote_gtt(off_t off, struct reg_range_t *window, | |
93 | size_t len, bool loopback, struct micscif_dev *dev, int ch_num, struct mic_copy_work *work) | |
94 | { | |
95 | return ioremap_remote(off, window, len, loopback, dev, NULL, NULL); | |
96 | } | |
97 | #else | |
98 | static __always_inline | |
99 | void *ioremap_remote_gtt(off_t off, struct reg_range_t *window, | |
100 | size_t len, bool loopback, struct micscif_dev *dev, int ch_num, struct mic_copy_work *work) | |
101 | { | |
102 | void *ret; | |
103 | uint64_t page_nr = (off - window->offset) >> PAGE_SHIFT; | |
104 | off_t page_off = off & ~PAGE_MASK; | |
105 | if (!loopback) { | |
106 | dma_addr_t phys = micscif_get_dma_addr(window, off, NULL, NULL, NULL); | |
107 | /* Ideally there should be a helper to do the +/-1 */ | |
108 | ret = get_per_dev_ctx(dev->sd_node - 1)->aper.va + phys; | |
109 | } else { | |
110 | struct page **pages = ((struct reg_range_t *) | |
111 | (window->peer_window))->pinned_pages->pages; | |
112 | ret = (void *)((uint64_t)phys_to_virt(page_to_phys(pages[page_nr])) | |
113 | | page_off); | |
114 | } | |
115 | return ret; | |
116 | } | |
117 | ||
118 | static __always_inline | |
119 | void *ioremap_remote(off_t off, struct reg_range_t *window, | |
120 | size_t len, bool loopback, struct micscif_dev *dev, int *index, uint64_t *start_off) | |
121 | { | |
122 | void *ret; | |
123 | int page_nr = (int)((off - window->offset) >> PAGE_SHIFT); | |
124 | off_t page_off = off & ~PAGE_MASK; | |
125 | ||
126 | if (!loopback) { | |
127 | dma_addr_t phys; | |
128 | mic_ctx_t *mic_ctx = get_per_dev_ctx(dev->sd_node - 1); | |
129 | phys = micscif_get_dma_addr(window, off, NULL, index, start_off); | |
130 | ret = mic_ctx->aper.va + phys; | |
131 | } else { | |
132 | struct page **pages = ((struct reg_range_t *) | |
133 | (window->peer_window))->pinned_pages->pages; | |
134 | ret = (void *)((uint64_t)phys_to_virt(page_to_phys(pages[page_nr])) | |
135 | | page_off); | |
136 | } | |
137 | return ret; | |
138 | } | |
139 | #endif | |
140 | ||
141 | static __always_inline void | |
142 | iounmap_remote(void *virt, size_t size, struct mic_copy_work *work) | |
143 | { | |
144 | #ifdef _MIC_SCIF_ | |
145 | if (!work->loopback) | |
146 | iounmap(virt); | |
147 | #endif | |
148 | } | |
149 | ||
150 | /* | |
151 | * Takes care of ordering issue caused by | |
152 | * 1. Hardware: Only in the case of cpu copy from host to card because of WC memory. | |
153 | * 2. Software: If memcpy reorders copy instructions for optimization. This could happen | |
154 | * at both host and card. | |
155 | */ | |
156 | static inline void ordered_memcpy(volatile char *dst, | |
157 | const char *src, size_t count) | |
158 | { | |
159 | if (!count) | |
160 | return; | |
161 | ||
162 | memcpy_toio(dst, src, --count); | |
163 | wmb(); | |
164 | *(dst + count) = *(src + count); | |
165 | } | |
166 | ||
167 | static inline void micscif_unaligned_memcpy(volatile char *dst, | |
168 | const char *src, size_t count, bool ordered) | |
169 | { | |
170 | if (unlikely(ordered)) | |
171 | ordered_memcpy(dst, src, count); | |
172 | else | |
173 | memcpy_toio(dst, src, count); | |
174 | } | |
175 | ||
176 | /* | |
177 | * Copy between rma window and temporary buffer | |
178 | */ | |
179 | void micscif_rma_local_cpu_copy(uint64_t offset, struct reg_range_t *window, uint8_t *temp, size_t remaining_len, bool to_temp) | |
180 | { | |
181 | void *window_virt; | |
182 | size_t loop_len; | |
183 | int offset_in_page; | |
184 | uint64_t end_offset; | |
185 | struct list_head *item; | |
186 | ||
187 | BUG_ON(RMA_WINDOW_SELF != window->type); | |
188 | ||
189 | offset_in_page = offset & ~PAGE_MASK; | |
190 | loop_len = PAGE_SIZE - offset_in_page; | |
191 | ||
192 | if (remaining_len < loop_len) | |
193 | loop_len = remaining_len; | |
194 | ||
195 | if (!(window_virt = get_local_va(offset, window, loop_len))) | |
196 | return; | |
197 | if (to_temp) | |
198 | memcpy(temp, window_virt, loop_len); | |
199 | else | |
200 | memcpy(window_virt, temp, loop_len); | |
201 | ||
202 | offset += loop_len; | |
203 | temp += loop_len; | |
204 | remaining_len -= loop_len; | |
205 | ||
206 | end_offset = window->offset + | |
207 | (window->nr_pages << PAGE_SHIFT); | |
208 | while (remaining_len) { | |
209 | if (offset == end_offset) { | |
210 | item = ( | |
211 | &window->list_member)->next; | |
212 | window = list_entry(item, | |
213 | struct reg_range_t, | |
214 | list_member); | |
215 | end_offset = window->offset + | |
216 | (window->nr_pages << PAGE_SHIFT); | |
217 | } | |
218 | ||
219 | loop_len = min(PAGE_SIZE, remaining_len); | |
220 | ||
221 | if (!(window_virt = get_local_va(offset, window, loop_len))) | |
222 | return; | |
223 | ||
224 | if (to_temp) | |
225 | memcpy(temp, window_virt, loop_len); | |
226 | else | |
227 | memcpy(window_virt, temp, loop_len); | |
228 | ||
229 | offset += loop_len; | |
230 | temp += loop_len; | |
231 | remaining_len -= loop_len; | |
232 | } | |
233 | } | |
234 | ||
235 | /* | |
236 | * Comment this | |
237 | * | |
238 | */ | |
239 | static int micscif_rma_list_dma_copy_unaligned(struct mic_copy_work *work, uint8_t *temp, struct dma_channel *chan, bool src_local) | |
240 | { | |
241 | struct dma_completion_cb *comp_cb = work->comp_cb; | |
242 | dma_addr_t window_dma_addr, temp_dma_addr; | |
243 | #ifndef _MIC_SCIF_ | |
244 | dma_addr_t temp_phys = comp_cb->temp_phys; | |
245 | #endif | |
246 | size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len; | |
247 | int offset_in_page; | |
248 | uint64_t end_offset = 0, offset = 0; | |
249 | struct reg_range_t *window = NULL; | |
250 | struct list_head *item = NULL; | |
251 | int ret = 0; | |
252 | void *window_virt_addr = NULL; | |
253 | size_t tail_len = 0; | |
254 | ||
255 | if (src_local) { | |
256 | offset = work->dst_offset; | |
257 | window = work->dst_window; | |
258 | } else { | |
259 | offset = work->src_offset; | |
260 | window = work->src_window; | |
261 | } | |
262 | ||
263 | offset_in_page = offset & (L1_CACHE_BYTES - 1); | |
264 | if (offset_in_page) { | |
265 | loop_len = L1_CACHE_BYTES - offset_in_page; | |
266 | loop_len = min(loop_len, remaining_len); | |
267 | ||
268 | if (!(window_virt_addr = ioremap_remote_gtt(offset, window, loop_len, | |
269 | work->loopback, work->remote_dev, | |
270 | get_chan_num(chan), work))) | |
271 | return -ENOMEM; | |
272 | ||
273 | if (src_local) { | |
274 | micscif_unaligned_memcpy(window_virt_addr, temp, loop_len, work->ordered && | |
275 | !(remaining_len - loop_len)); | |
276 | serializing_request(window_virt_addr); | |
277 | } else { | |
278 | memcpy_fromio(temp, window_virt_addr, loop_len); | |
279 | serializing_request(temp); | |
280 | } | |
281 | #ifdef RMA_DEBUG | |
282 | atomic_long_add_return(loop_len, &ms_info.rma_unaligned_cpu_cnt); | |
283 | #endif | |
284 | smp_mb(); | |
285 | iounmap_remote(window_virt_addr, loop_len, work); | |
286 | ||
287 | offset += loop_len; | |
288 | temp += loop_len; | |
289 | #ifndef _MIC_SCIF_ | |
290 | temp_phys += loop_len; | |
291 | #endif | |
292 | remaining_len -= loop_len; | |
293 | } | |
294 | ||
295 | offset_in_page = offset & ~PAGE_MASK; | |
296 | end_offset = window->offset + | |
297 | (window->nr_pages << PAGE_SHIFT); | |
298 | ||
299 | tail_len = remaining_len & (L1_CACHE_BYTES - 1); | |
300 | remaining_len -= tail_len; | |
301 | while (remaining_len) { | |
302 | if (offset == end_offset) { | |
303 | item = (&window->list_member)->next; | |
304 | window = list_entry(item, | |
305 | struct reg_range_t, | |
306 | list_member); | |
307 | end_offset = window->offset + | |
308 | (window->nr_pages << PAGE_SHIFT); | |
309 | } | |
310 | #ifndef _MIC_SCIF_ | |
311 | temp_dma_addr = temp_phys; | |
312 | #else | |
313 | temp_dma_addr = (dma_addr_t)virt_to_phys(temp); | |
314 | #endif | |
315 | window_dma_addr = micscif_get_dma_addr(window, offset, &nr_contig_bytes, NULL, NULL); | |
316 | ||
317 | #ifdef CONFIG_ML1OM | |
318 | if (RMA_ERROR_CODE == window_dma_addr) | |
319 | return -ENXIO; | |
320 | #endif | |
321 | loop_len = min(nr_contig_bytes, remaining_len); | |
322 | ||
323 | if (src_local) { | |
324 | if (unlikely(work->ordered && !tail_len && | |
325 | !(remaining_len - loop_len) && | |
326 | loop_len != L1_CACHE_BYTES)) { | |
327 | /* | |
328 | * Break up the last chunk of the transfer into two steps | |
329 | * if there is no tail to gurantee DMA ordering. | |
330 | * Passing DO_DMA_POLLING inserts a status update descriptor | |
331 | * in step 1 which acts as a double sided synchronization | |
332 | * fence for the DMA engine to ensure that the last cache line | |
333 | * in step 2 is updated last. | |
334 | */ | |
335 | /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */ | |
336 | ret = do_dma(chan, DO_DMA_POLLING, temp_dma_addr, window_dma_addr, | |
337 | loop_len - L1_CACHE_BYTES, NULL); | |
338 | if (ret < 0) { | |
339 | printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", | |
340 | __func__, __LINE__, ret); | |
341 | return ret; | |
342 | } | |
343 | offset += (loop_len - L1_CACHE_BYTES); | |
344 | temp_dma_addr += (loop_len - L1_CACHE_BYTES); | |
345 | window_dma_addr += (loop_len - L1_CACHE_BYTES); | |
346 | remaining_len -= (loop_len - L1_CACHE_BYTES); | |
347 | loop_len = remaining_len; | |
348 | ||
349 | /* Step 2) DMA: L1_CACHE_BYTES */ | |
350 | ret = do_dma(chan, 0, temp_dma_addr, window_dma_addr, | |
351 | loop_len, NULL); | |
352 | if (ret < 0) { | |
353 | printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", | |
354 | __func__, __LINE__, ret); | |
355 | return ret; | |
356 | } | |
357 | } else { | |
358 | int flags = 0; | |
359 | if (remaining_len == loop_len + L1_CACHE_BYTES) | |
360 | flags = DO_DMA_POLLING; | |
361 | ret = do_dma(chan, flags, temp_dma_addr, window_dma_addr, | |
362 | loop_len, NULL); | |
363 | } | |
364 | } else { | |
365 | ret = do_dma(chan, 0, window_dma_addr, temp_dma_addr, | |
366 | loop_len, NULL); | |
367 | } | |
368 | if (ret < 0) { | |
369 | printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", | |
370 | __func__, __LINE__, ret); | |
371 | return ret; | |
372 | } | |
373 | offset += loop_len; | |
374 | temp += loop_len; | |
375 | #ifndef _MIC_SCIF_ | |
376 | temp_phys += loop_len; | |
377 | #endif | |
378 | remaining_len -= loop_len; | |
379 | offset_in_page = 0; | |
380 | } | |
381 | if (tail_len) { | |
382 | if (offset == end_offset) { | |
383 | item = (&window->list_member)->next; | |
384 | window = list_entry(item, | |
385 | struct reg_range_t, | |
386 | list_member); | |
387 | end_offset = window->offset + | |
388 | (window->nr_pages << PAGE_SHIFT); | |
389 | } | |
390 | if (!(window_virt_addr = ioremap_remote_gtt(offset, window, tail_len, | |
391 | work->loopback, work->remote_dev, | |
392 | get_chan_num(chan), work))) | |
393 | return -ENOMEM; | |
394 | ||
395 | /* | |
396 | * The CPU copy for the tail bytes must be initiated only once previous | |
397 | * DMA transfers for this endpoint have completed to guarantee | |
398 | * ordering. | |
399 | */ | |
400 | if (unlikely(work->ordered)) { | |
401 | free_dma_channel(chan); | |
402 | work->dma_chan_released = true; | |
403 | if ((ret = drain_dma_intr(chan))) | |
404 | return ret; | |
405 | } | |
406 | ||
407 | if (src_local) { | |
408 | micscif_unaligned_memcpy(window_virt_addr, temp, tail_len, work->ordered); | |
409 | serializing_request(window_virt_addr); | |
410 | } else { | |
411 | memcpy_fromio(temp, window_virt_addr, tail_len); | |
412 | serializing_request(temp); | |
413 | } | |
414 | #ifdef RMA_DEBUG | |
415 | atomic_long_add_return(tail_len, &ms_info.rma_unaligned_cpu_cnt); | |
416 | #endif | |
417 | smp_mb(); | |
418 | iounmap_remote(window_virt_addr, tail_len, work); | |
419 | } | |
420 | if (work->dma_chan_released) { | |
421 | if ((ret = request_dma_channel(chan))) | |
422 | return ret; | |
423 | /* Callee frees the DMA channel lock, if it is held */ | |
424 | work->dma_chan_released = false; | |
425 | } | |
426 | ret = do_dma(chan, DO_DMA_INTR, 0, 0, 0, comp_cb); | |
427 | if (ret < 0) { | |
428 | printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", | |
429 | __func__, __LINE__, ret); | |
430 | return ret; | |
431 | } | |
432 | return 0; | |
433 | } | |
434 | ||
435 | static inline bool is_local_dma_addr(uint64_t addr) | |
436 | { | |
437 | #ifdef _MIC_SCIF_ | |
438 | return (addr >> PAGE_SHIFT < num_physpages); | |
439 | #else | |
440 | return is_syspa(addr); | |
441 | #endif | |
442 | } | |
443 | ||
444 | /* | |
445 | * micscif_rma_list_dma_copy_aligned: | |
446 | * | |
447 | * Traverse all the windows and perform DMA copy. | |
448 | */ | |
449 | static int micscif_rma_list_dma_copy_aligned(struct mic_copy_work *work, struct dma_channel *chan) | |
450 | { | |
451 | dma_addr_t src_dma_addr, dst_dma_addr; | |
452 | size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0, dst_contig_bytes = 0; | |
453 | int src_cache_off, dst_cache_off, src_last_index = 0, dst_last_index = 0; | |
454 | uint64_t end_src_offset, end_dst_offset; | |
455 | void *src_virt, *dst_virt; | |
456 | struct reg_range_t *src_window = work->src_window; | |
457 | struct reg_range_t *dst_window = work->dst_window; | |
458 | uint64_t src_offset = work->src_offset, dst_offset = work->dst_offset; | |
459 | uint64_t src_start_offset = src_window->offset, dst_start_offset = dst_window->offset; | |
460 | struct list_head *item; | |
461 | int ret = 0; | |
462 | ||
463 | remaining_len = work->len; | |
464 | ||
465 | src_cache_off = src_offset & (L1_CACHE_BYTES - 1); | |
466 | dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1); | |
467 | if (src_cache_off != dst_cache_off) { | |
468 | BUG_ON(1); | |
469 | } else if (src_cache_off != 0) { | |
470 | /* Head */ | |
471 | loop_len = L1_CACHE_BYTES - src_cache_off; | |
472 | loop_len = min(loop_len, remaining_len); | |
473 | src_dma_addr = micscif_get_dma_addr(src_window, src_offset, NULL, NULL, NULL); | |
474 | dst_dma_addr = micscif_get_dma_addr(dst_window, dst_offset, NULL, NULL, NULL); | |
475 | #ifdef CONFIG_ML1OM | |
476 | if (RMA_ERROR_CODE == src_dma_addr) | |
477 | return -ENXIO; | |
478 | if (RMA_ERROR_CODE == dst_dma_addr) | |
479 | return -ENXIO; | |
480 | get_window_ref_count(src_window, 1); | |
481 | get_window_ref_count(dst_window, 1); | |
482 | #endif | |
483 | if (is_local_dma_addr(src_dma_addr)) | |
484 | src_virt = get_local_va(src_offset, src_window, loop_len); | |
485 | else | |
486 | src_virt = ioremap_remote_gtt(src_offset, src_window, | |
487 | loop_len, work->loopback, | |
488 | work->remote_dev, get_chan_num(chan), work); | |
489 | if (!src_virt) { | |
490 | #ifdef CONFIG_ML1OM | |
491 | put_window_ref_count(src_window, 1); | |
492 | put_window_ref_count(dst_window, 1); | |
493 | #endif | |
494 | return -ENOMEM; | |
495 | } | |
496 | if (is_local_dma_addr(dst_dma_addr)) | |
497 | dst_virt = get_local_va(dst_offset, dst_window, loop_len); | |
498 | else | |
499 | dst_virt = ioremap_remote_gtt(dst_offset, dst_window, | |
500 | loop_len, work->loopback, | |
501 | work->remote_dev, get_chan_num(chan), work); | |
502 | #ifdef CONFIG_ML1OM | |
503 | put_window_ref_count(src_window, 1); | |
504 | put_window_ref_count(dst_window, 1); | |
505 | #endif | |
506 | if (!dst_virt) { | |
507 | if (!is_local_dma_addr(src_dma_addr)) | |
508 | iounmap_remote(src_virt, loop_len, work); | |
509 | return -ENOMEM; | |
510 | } | |
511 | if (is_local_dma_addr(src_dma_addr)){ | |
512 | micscif_unaligned_memcpy(dst_virt, src_virt, loop_len, | |
513 | remaining_len == loop_len ? work->ordered : false); | |
514 | } | |
515 | else{ | |
516 | memcpy_fromio(dst_virt, src_virt, loop_len); | |
517 | } | |
518 | serializing_request(dst_virt); | |
519 | smp_mb(); | |
520 | if (!is_local_dma_addr(src_dma_addr)) | |
521 | iounmap_remote(src_virt, loop_len, work); | |
522 | if (!is_local_dma_addr(dst_dma_addr)) | |
523 | iounmap_remote(dst_virt, loop_len, work); | |
524 | src_offset += loop_len; | |
525 | dst_offset += loop_len; | |
526 | remaining_len -= loop_len; | |
527 | } | |
528 | ||
529 | end_src_offset = src_window->offset + | |
530 | (src_window->nr_pages << PAGE_SHIFT); | |
531 | end_dst_offset = dst_window->offset + | |
532 | (dst_window->nr_pages << PAGE_SHIFT); | |
533 | tail_len = remaining_len & (L1_CACHE_BYTES - 1); | |
534 | remaining_len -= tail_len; | |
535 | while (remaining_len) { | |
536 | if (src_offset == end_src_offset) { | |
537 | item = (&src_window->list_member)->next; | |
538 | src_window = list_entry(item, | |
539 | struct reg_range_t, | |
540 | list_member); | |
541 | end_src_offset = src_window->offset + | |
542 | (src_window->nr_pages << PAGE_SHIFT); | |
543 | src_last_index = 0; | |
544 | src_start_offset = src_window->offset; | |
545 | } | |
546 | if (dst_offset == end_dst_offset) { | |
547 | item = (&dst_window->list_member)->next; | |
548 | dst_window = list_entry(item, struct reg_range_t, list_member); | |
549 | end_dst_offset = dst_window->offset + | |
550 | (dst_window->nr_pages << PAGE_SHIFT); | |
551 | dst_last_index = 0; | |
552 | dst_start_offset = dst_window->offset; | |
553 | } | |
554 | ||
555 | /* compute dma addresses for transfer */ | |
556 | src_dma_addr = micscif_get_dma_addr(src_window, src_offset, &src_contig_bytes, &src_last_index, &src_start_offset); | |
557 | dst_dma_addr = micscif_get_dma_addr(dst_window, dst_offset, &dst_contig_bytes, &dst_last_index, &dst_start_offset); | |
558 | #ifdef CONFIG_ML1OM | |
559 | if (RMA_ERROR_CODE == src_dma_addr) | |
560 | return -ENXIO; | |
561 | if (RMA_ERROR_CODE == dst_dma_addr) | |
562 | return -ENXIO; | |
563 | #endif | |
564 | loop_len = min(src_contig_bytes, dst_contig_bytes); | |
565 | loop_len = min(loop_len, remaining_len); | |
566 | if (unlikely(work->ordered && !tail_len && | |
567 | !(remaining_len - loop_len) && | |
568 | loop_len != L1_CACHE_BYTES)) { | |
569 | /* | |
570 | * Break up the last chunk of the transfer into two steps | |
571 | * if there is no tail to gurantee DMA ordering. | |
572 | * Passing DO_DMA_POLLING inserts a status update descriptor | |
573 | * in step 1 which acts as a double sided synchronization | |
574 | * fence for the DMA engine to ensure that the last cache line | |
575 | * in step 2 is updated last. | |
576 | */ | |
577 | /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */ | |
578 | ret = do_dma(chan, DO_DMA_POLLING, src_dma_addr, dst_dma_addr, | |
579 | loop_len - L1_CACHE_BYTES, NULL); | |
580 | if (ret < 0) { | |
581 | printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", | |
582 | __func__, __LINE__, ret); | |
583 | return ret; | |
584 | } | |
585 | src_offset += (loop_len - L1_CACHE_BYTES); | |
586 | dst_offset += (loop_len - L1_CACHE_BYTES); | |
587 | src_dma_addr += (loop_len - L1_CACHE_BYTES); | |
588 | dst_dma_addr += (loop_len - L1_CACHE_BYTES); | |
589 | remaining_len -= (loop_len - L1_CACHE_BYTES); | |
590 | loop_len = remaining_len; | |
591 | ||
592 | /* Step 2) DMA: L1_CACHE_BYTES */ | |
593 | ret = do_dma(chan, 0, src_dma_addr, dst_dma_addr, | |
594 | loop_len, NULL); | |
595 | if (ret < 0) { | |
596 | printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", | |
597 | __func__, __LINE__, ret); | |
598 | return ret; | |
599 | } | |
600 | } else { | |
601 | int flags = 0; | |
602 | if (remaining_len == loop_len + L1_CACHE_BYTES) | |
603 | flags = DO_DMA_POLLING; | |
604 | ret = do_dma(chan, flags, src_dma_addr, dst_dma_addr, | |
605 | loop_len, NULL); | |
606 | if (ret < 0) { | |
607 | printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", | |
608 | __func__, __LINE__, ret); | |
609 | return ret; | |
610 | } | |
611 | } | |
612 | src_offset += loop_len; | |
613 | dst_offset += loop_len; | |
614 | remaining_len -= loop_len; | |
615 | } | |
616 | #ifdef CONFIG_MK1OM | |
617 | BUG_ON(remaining_len != 0); | |
618 | #endif | |
619 | #ifdef CONFIG_ML1OM | |
620 | if (remaining_len) | |
621 | return - ENXIO; | |
622 | #endif | |
623 | remaining_len = tail_len; | |
624 | if (remaining_len) { | |
625 | loop_len = remaining_len; | |
626 | if (src_offset == end_src_offset) { | |
627 | item = (&src_window->list_member)->next; | |
628 | src_window = list_entry(item, | |
629 | struct reg_range_t, | |
630 | list_member); | |
631 | } | |
632 | if (dst_offset == end_dst_offset) { | |
633 | item = (&dst_window->list_member)->next; | |
634 | dst_window = list_entry(item, struct reg_range_t, list_member); | |
635 | } | |
636 | ||
637 | src_dma_addr = micscif_get_dma_addr(src_window, src_offset, NULL, NULL, NULL); | |
638 | dst_dma_addr = micscif_get_dma_addr(dst_window, dst_offset, NULL, NULL, NULL); | |
639 | #ifdef CONFIG_ML1OM | |
640 | if (RMA_ERROR_CODE == src_dma_addr) | |
641 | return -ENXIO; | |
642 | if (RMA_ERROR_CODE == dst_dma_addr) | |
643 | return -ENXIO; | |
644 | #endif | |
645 | /* | |
646 | * The CPU copy for the tail bytes must be initiated only once previous | |
647 | * DMA transfers for this endpoint have completed to guarantee | |
648 | * ordering. | |
649 | */ | |
650 | if (unlikely(work->ordered)) { | |
651 | free_dma_channel(chan); | |
652 | work->dma_chan_released = true; | |
653 | if ((ret = drain_dma_poll(chan))) | |
654 | return ret; | |
655 | } | |
656 | #ifdef CONFIG_ML1OM | |
657 | get_window_ref_count(src_window, 1); | |
658 | get_window_ref_count(dst_window, 1); | |
659 | #endif | |
660 | if (is_local_dma_addr(src_dma_addr)) | |
661 | src_virt = get_local_va(src_offset, src_window, loop_len); | |
662 | else | |
663 | src_virt = ioremap_remote_gtt(src_offset, src_window, | |
664 | loop_len, work->loopback, | |
665 | work->remote_dev, get_chan_num(chan), work); | |
666 | if (!src_virt) { | |
667 | #ifdef CONFIG_ML1OM | |
668 | put_window_ref_count(src_window, 1); | |
669 | put_window_ref_count(dst_window, 1); | |
670 | #endif | |
671 | return -ENOMEM; | |
672 | } | |
673 | ||
674 | if (is_local_dma_addr(dst_dma_addr)) | |
675 | dst_virt = get_local_va(dst_offset, dst_window, loop_len); | |
676 | else | |
677 | dst_virt = ioremap_remote_gtt(dst_offset, dst_window, | |
678 | loop_len, work->loopback, | |
679 | work->remote_dev, get_chan_num(chan), work); | |
680 | #ifdef CONFIG_ML1OM | |
681 | put_window_ref_count(src_window, 1); | |
682 | put_window_ref_count(dst_window, 1); | |
683 | #endif | |
684 | if (!dst_virt) { | |
685 | if (!is_local_dma_addr(src_dma_addr)) | |
686 | iounmap_remote(src_virt, loop_len, work); | |
687 | return -ENOMEM; | |
688 | } | |
689 | ||
690 | if (is_local_dma_addr(src_dma_addr)){ | |
691 | micscif_unaligned_memcpy(dst_virt, src_virt, loop_len, work->ordered); | |
692 | } | |
693 | else{ | |
694 | memcpy_fromio(dst_virt, src_virt, loop_len); | |
695 | } | |
696 | serializing_request(dst_virt); | |
697 | smp_mb(); | |
698 | if (!is_local_dma_addr(src_dma_addr)) | |
699 | iounmap_remote(src_virt, loop_len, work); | |
700 | ||
701 | if (!is_local_dma_addr(dst_dma_addr)) | |
702 | iounmap_remote(dst_virt, loop_len, work); | |
703 | ||
704 | remaining_len -= loop_len; | |
705 | #ifdef CONFIG_MK1OM | |
706 | BUG_ON(remaining_len != 0); | |
707 | #endif | |
708 | #ifdef CONFIG_ML1OM | |
709 | if (remaining_len) | |
710 | return - ENXIO; | |
711 | #endif | |
712 | } | |
713 | ||
714 | return ret; | |
715 | } | |
716 | ||
717 | int micscif_rma_list_dma_copy_wrapper(struct endpt *epd, struct mic_copy_work *work, struct dma_channel *chan, off_t loffset) | |
718 | { | |
719 | int src_cache_off, dst_cache_off; | |
720 | uint64_t src_offset = work->src_offset, dst_offset = work->dst_offset; | |
721 | uint8_t *temp = NULL; | |
722 | bool src_local = true, dst_local = false; | |
723 | struct dma_completion_cb *comp_cb; | |
724 | dma_addr_t src_dma_addr, dst_dma_addr; | |
725 | #ifndef _MIC_SCIF_ | |
726 | struct pci_dev *pdev; | |
727 | #endif | |
728 | ||
729 | src_cache_off = src_offset & (L1_CACHE_BYTES - 1); | |
730 | dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1); | |
731 | if (dst_cache_off == src_cache_off) | |
732 | return micscif_rma_list_dma_copy_aligned(work, chan); | |
733 | ||
734 | if (work->loopback) { | |
735 | #ifdef _MIC_SCIF_ | |
736 | BUG_ON(micscif_rma_list_cpu_copy(work)); | |
737 | return 0; | |
738 | #else | |
739 | BUG_ON(1); | |
740 | #endif | |
741 | } | |
742 | ||
743 | src_dma_addr = micscif_get_dma_addr(work->src_window, src_offset, NULL, NULL, NULL); | |
744 | dst_dma_addr = micscif_get_dma_addr(work->dst_window, dst_offset, NULL, NULL, NULL); | |
745 | ||
746 | if (is_local_dma_addr(src_dma_addr)) | |
747 | src_local = true; | |
748 | else | |
749 | src_local = false; | |
750 | ||
751 | if (is_local_dma_addr(dst_dma_addr)) | |
752 | dst_local = true; | |
753 | else | |
754 | dst_local = false; | |
755 | ||
756 | dst_local = dst_local; | |
757 | BUG_ON(work->len + (L1_CACHE_BYTES << 1) > KMEM_UNALIGNED_BUF_SIZE); | |
758 | ||
759 | /* Allocate dma_completion cb */ | |
760 | if (!(comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL))) | |
761 | goto error; | |
762 | ||
763 | work->comp_cb = comp_cb; | |
764 | comp_cb->cb_cookie = (uint64_t)comp_cb; | |
765 | comp_cb->dma_completion_func = &micscif_rma_completion_cb; | |
766 | ||
767 | if (work->len + (L1_CACHE_BYTES << 1) < KMEM_UNALIGNED_BUF_SIZE) { | |
768 | comp_cb->is_cache = false; | |
769 | if (!(temp = kmalloc(work->len + (L1_CACHE_BYTES << 1), GFP_KERNEL))) | |
770 | goto free_comp_cb; | |
771 | comp_cb->temp_buf_to_free = temp; | |
772 | /* kmalloc(..) does not guarantee cache line alignment */ | |
773 | if ((uint64_t)temp & (L1_CACHE_BYTES - 1)) | |
774 | temp = (uint8_t*)ALIGN((uint64_t)temp, L1_CACHE_BYTES); | |
775 | } else { | |
776 | comp_cb->is_cache = true; | |
777 | if (!(temp = micscif_kmem_cache_alloc())) | |
778 | goto free_comp_cb; | |
779 | comp_cb->temp_buf_to_free = temp; | |
780 | } | |
781 | ||
782 | if (src_local) { | |
783 | temp += dst_cache_off; | |
784 | comp_cb->tmp_offset = dst_cache_off; | |
785 | micscif_rma_local_cpu_copy(work->src_offset, work->src_window, temp, work->len, true); | |
786 | } else { | |
787 | comp_cb->dst_window = work->dst_window; | |
788 | comp_cb->dst_offset = work->dst_offset; | |
789 | work->src_offset = work->src_offset - src_cache_off; | |
790 | comp_cb->len = work->len; | |
791 | work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES); | |
792 | comp_cb->header_padding = src_cache_off; | |
793 | } | |
794 | comp_cb->temp_buf = temp; | |
795 | ||
796 | #ifndef _MIC_SCIF_ | |
797 | micscif_pci_dev(work->remote_dev->sd_node, &pdev); | |
798 | comp_cb->temp_phys = mic_map_single(work->remote_dev->sd_node - 1, | |
799 | pdev, temp, KMEM_UNALIGNED_BUF_SIZE); | |
800 | ||
801 | if (mic_map_error(comp_cb->temp_phys)) { | |
802 | goto free_temp_buf; | |
803 | } | |
804 | ||
805 | comp_cb->remote_node = work->remote_dev->sd_node; | |
806 | #endif | |
807 | if (0 > micscif_rma_list_dma_copy_unaligned(work, temp, chan, src_local)) | |
808 | goto free_temp_buf; | |
809 | if (!src_local) | |
810 | work->fence_type = DO_DMA_INTR; | |
811 | return 0; | |
812 | free_temp_buf: | |
813 | if (comp_cb->is_cache) | |
814 | micscif_kmem_cache_free(comp_cb->temp_buf_to_free); | |
815 | else | |
816 | kfree(comp_cb->temp_buf_to_free); | |
817 | free_comp_cb: | |
818 | kfree(comp_cb); | |
819 | error: | |
820 | printk(KERN_ERR "Unable to malloc %s %d\n", __func__, __LINE__); | |
821 | return -ENOMEM; | |
822 | } | |
823 | ||
824 | #if !defined(WINDOWS) && !defined(CONFIG_PREEMPT) | |
825 | static int softlockup_threshold = 60; | |
826 | static void avert_softlockup(unsigned long data) | |
827 | { | |
828 | *(unsigned long*)data = 1; | |
829 | } | |
830 | ||
831 | /* | |
832 | * Add a timer to handle the case of hogging the cpu for | |
833 | * time > softlockup_threshold. | |
834 | * Add the timer every softlockup_threshold / 3 so that even if | |
835 | * there is a huge delay in running our timer, we will still don't hit | |
836 | * the softlockup case.(softlockup_tick() is run in hardirq() context while | |
837 | * timers are run at softirq context) | |
838 | * | |
839 | */ | |
840 | static inline void add_softlockup_timer(struct timer_list *timer, unsigned long *data) | |
841 | { | |
842 | setup_timer(timer, avert_softlockup, (unsigned long) data); | |
843 | timer->expires = jiffies + usecs_to_jiffies(softlockup_threshold * 1000000 / 3); | |
844 | add_timer(timer); | |
845 | } | |
846 | ||
847 | static inline void del_softlockup_timer(struct timer_list *timer) | |
848 | { | |
849 | /* We need delete synchronously since the variable being touched by | |
850 | * timer interrupt is on the stack | |
851 | */ | |
852 | del_timer_sync(timer); | |
853 | } | |
854 | #endif | |
855 | ||
856 | /* | |
857 | * micscif_rma_list_cpu_copy: | |
858 | * | |
859 | * Traverse all the windows and perform CPU copy. | |
860 | */ | |
861 | int micscif_rma_list_cpu_copy(struct mic_copy_work *work) | |
862 | { | |
863 | void *src_virt, *dst_virt; | |
864 | size_t loop_len, remaining_len; | |
865 | int src_cache_off, dst_cache_off; | |
866 | uint64_t src_offset = work->src_offset, dst_offset = work->dst_offset; | |
867 | struct reg_range_t *src_window = work->src_window; | |
868 | struct reg_range_t *dst_window = work->dst_window; | |
869 | uint64_t end_src_offset, end_dst_offset; | |
870 | struct list_head *item; | |
871 | int srcchunk_ind = 0; | |
872 | int dstchunk_ind = 0; | |
873 | uint64_t src_start_offset, dst_start_offset; | |
874 | int ret = 0; | |
875 | #if !defined(WINDOWS) && !defined(CONFIG_PREEMPT) | |
876 | unsigned long timer_fired = 0; | |
877 | struct timer_list timer; | |
878 | int cpu = smp_processor_id(); | |
879 | add_softlockup_timer(&timer, &timer_fired); | |
880 | #endif | |
881 | ||
882 | remaining_len = work->len; | |
883 | src_start_offset = src_window->offset; | |
884 | dst_start_offset = dst_window->offset; | |
885 | ||
886 | while (remaining_len) { | |
887 | #if !defined(WINDOWS) && !defined(CONFIG_PREEMPT) | |
888 | /* Ideally we should call schedule only if we didn't sleep | |
889 | * in between. But there is no way to know that. | |
890 | */ | |
891 | if (timer_fired) { | |
892 | timer_fired = 0; | |
893 | if (smp_processor_id() == cpu) | |
894 | touch_softlockup_watchdog(); | |
895 | else | |
896 | cpu = smp_processor_id(); | |
897 | add_softlockup_timer(&timer, &timer_fired); | |
898 | } | |
899 | #endif | |
900 | src_cache_off = src_offset & ~PAGE_MASK; | |
901 | dst_cache_off = dst_offset & ~PAGE_MASK; | |
902 | loop_len = PAGE_SIZE - | |
903 | ((src_cache_off > dst_cache_off) ? | |
904 | src_cache_off : dst_cache_off); | |
905 | if (remaining_len < loop_len) | |
906 | loop_len = remaining_len; | |
907 | ||
908 | if (RMA_WINDOW_SELF == src_window->type) | |
909 | src_virt = get_local_va(src_offset, src_window, loop_len); | |
910 | else | |
911 | src_virt = ioremap_remote(src_offset, | |
912 | src_window, loop_len, work->loopback, work->remote_dev, &srcchunk_ind, &src_start_offset); | |
913 | if (!src_virt) { | |
914 | ret = -ENOMEM; | |
915 | goto error; | |
916 | } | |
917 | ||
918 | if (RMA_WINDOW_SELF == dst_window->type) | |
919 | dst_virt = get_local_va(dst_offset, dst_window, loop_len); | |
920 | else | |
921 | dst_virt = ioremap_remote(dst_offset, | |
922 | dst_window, loop_len, work->loopback, work->remote_dev, &dstchunk_ind, &dst_start_offset); | |
923 | if (!dst_virt) { | |
924 | if (RMA_WINDOW_PEER == src_window->type) | |
925 | iounmap_remote(src_virt, loop_len, work); | |
926 | ret = -ENOMEM; | |
927 | goto error; | |
928 | } | |
929 | ||
930 | if (work->loopback) | |
931 | memcpy(dst_virt, src_virt, loop_len); | |
932 | else { | |
933 | ||
934 | if (RMA_WINDOW_SELF == src_window->type){ | |
935 | memcpy_toio(dst_virt, src_virt, loop_len); | |
936 | } | |
937 | else{ | |
938 | memcpy_fromio(dst_virt, src_virt, loop_len); | |
939 | } | |
940 | serializing_request(dst_virt); | |
941 | smp_mb(); | |
942 | } | |
943 | if (RMA_WINDOW_PEER == src_window->type) | |
944 | iounmap_remote(src_virt, loop_len, work); | |
945 | ||
946 | if (RMA_WINDOW_PEER == dst_window->type) | |
947 | iounmap_remote(dst_virt, loop_len, work); | |
948 | ||
949 | src_offset += loop_len; | |
950 | dst_offset += loop_len; | |
951 | remaining_len -= loop_len; | |
952 | if (remaining_len) { | |
953 | end_src_offset = src_window->offset + | |
954 | (src_window->nr_pages << PAGE_SHIFT); | |
955 | end_dst_offset = dst_window->offset + | |
956 | (dst_window->nr_pages << PAGE_SHIFT); | |
957 | if (src_offset == end_src_offset) { | |
958 | item = ( | |
959 | &src_window->list_member)->next; | |
960 | src_window = list_entry(item, | |
961 | struct reg_range_t, | |
962 | list_member); | |
963 | srcchunk_ind = 0; | |
964 | src_start_offset = src_window->offset; | |
965 | } | |
966 | if (dst_offset == end_dst_offset) { | |
967 | item = ( | |
968 | &dst_window->list_member)->next; | |
969 | dst_window = list_entry(item, | |
970 | struct reg_range_t, | |
971 | list_member); | |
972 | dstchunk_ind = 0; | |
973 | dst_start_offset = dst_window->offset; | |
974 | } | |
975 | } | |
976 | } | |
977 | error: | |
978 | #if !defined(WINDOWS) && !defined(CONFIG_PREEMPT) | |
979 | del_softlockup_timer(&timer); | |
980 | #endif | |
981 | return ret; | |
982 | } |