Commit | Line | Data |
---|---|---|
800f879a AT |
1 | /* |
2 | * Copyright 2010-2017 Intel Corporation. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License, version 2, | |
6 | * as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, | |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * Disclaimer: The codes contained in these modules may be specific to | |
14 | * the Intel Software Development Platform codenamed Knights Ferry, | |
15 | * and the Intel product codenamed Knights Corner, and are not backward | |
16 | * compatible with other Intel products. Additionally, Intel will NOT | |
17 | * support the codes or instruction set in future products. | |
18 | * | |
19 | * Intel offers no warranty of any kind regarding the code. This code is | |
20 | * licensed on an "AS IS" basis and Intel is not obligated to provide | |
21 | * any support, assistance, installation, training, or other services | |
22 | * of any kind. Intel is also not obligated to provide any updates, | |
23 | * enhancements or extensions. Intel specifically disclaims any warranty | |
24 | * of merchantability, non-infringement, fitness for any particular | |
25 | * purpose, and any other warranty. | |
26 | * | |
27 | * Further, Intel disclaims all liability of any kind, including but | |
28 | * not limited to liability for infringement of any proprietary rights, | |
29 | * relating to the use of the code, even if Intel is notified of the | |
30 | * possibility of such liability. Except as expressly stated in an Intel | |
31 | * license agreement provided with this code and agreed upon with Intel, | |
32 | * no license, express or implied, by estoppel or otherwise, to any | |
33 | * intellectual property rights is granted herein. | |
34 | */ | |
35 | ||
36 | #include <mic/micscif.h> | |
37 | #include <mic/micscif_smpt.h> | |
38 | #if defined(HOST) || defined(WINDOWS) | |
39 | #include "mic_common.h" | |
40 | #endif | |
41 | ||
42 | struct _mic_ctx_t; | |
43 | // Figure out which SMPT entry based on the host addr | |
44 | #define SYSTEM_ADDR_TO_SMPT(sysaddr) ((sysaddr) >> (MIC_SYSTEM_PAGE_SHIFT)) | |
45 | #define HOSTMIC_PA_TO_SMPT(hostmic_pa) (((hostmic_pa) - MIC_SYSTEM_BASE)\ | |
46 | >> MIC_SYSTEM_PAGE_SHIFT) | |
47 | ||
48 | #define NUM_SMPT_ENTRIES_IN_USE 32 | |
49 | #define SMPT_TO_MIC_PA(smpt_index) (MIC_SYSTEM_BASE + ((smpt_index) * \ | |
50 | MIC_SYSTEM_PAGE_SIZE)) | |
51 | #define MAX_HOST_MEMORY ((NUM_SMPT_ENTRIES_IN_USE) * MIC_SYSTEM_PAGE_SIZE) | |
52 | #define MAX_SYSTEM_ADDR ((MIC_SYSTEM_BASE) + (MAX_HOST_MEMORY) - (1)) | |
53 | #define IS_MIC_SYSTEM_ADDR(addr) (((addr) >= MIC_SYSTEM_BASE) && \ | |
54 | ((addr) <= MAX_SYSTEM_ADDR)) | |
55 | ||
56 | #define _PAGE_OFFSET(x) ((x) & ((PAGE_SIZE) - (1ULL))) | |
57 | #define SMPT_OFFSET(x) ((x) & MIC_SYSTEM_PAGE_MASK) | |
58 | #define PAGE_ALIGN_LOW(x) ALIGN(((x) - ((PAGE_SIZE) - 1ULL)), (PAGE_SIZE)) | |
59 | #define PAGE_ALIGN_HIGH(x) ALIGN((x), (PAGE_SIZE)) | |
60 | #define SMPT_ALIGN_LOW(x) ALIGN(((x) - (MIC_SYSTEM_PAGE_MASK)), \ | |
61 | (MIC_SYSTEM_PAGE_SIZE)) | |
62 | #define SMPT_ALIGN_HIGH(x) ALIGN((x), (MIC_SYSTEM_PAGE_SIZE)) | |
63 | ||
64 | #if defined(HOST) | |
65 | #define SMPT_LOGGING 0 | |
66 | #if SMPT_LOGGING | |
67 | static int64_t smpt_ref_count_g[MAX_BOARD_SUPPORTED]; | |
68 | static int64_t map_count_g; | |
69 | static int64_t unmap_count_g; | |
70 | #endif | |
71 | #endif | |
72 | ||
73 | void mic_smpt_set(volatile void *mm_sbox, uint64_t dma_addr, uint64_t index) | |
74 | { | |
75 | uint32_t smpt_reg_val = BUILD_SMPT(SNOOP_ON, dma_addr >> MIC_SYSTEM_PAGE_SHIFT); | |
76 | writel(smpt_reg_val, (uint8_t*)mm_sbox + SBOX_SMPT00 + (4 * index)); | |
77 | } | |
78 | ||
79 | #if defined(HOST) | |
80 | /* | |
81 | * Called once per board as part of starting a MIC | |
82 | * to restore the SMPT state to the previous values | |
83 | * as stored in SMPT SW data structures. | |
84 | */ | |
85 | void mic_smpt_restore(mic_ctx_t *mic_ctx) | |
86 | { | |
87 | int i; | |
88 | dma_addr_t dma_addr; | |
89 | uint32_t *smpt = (uint32_t*)(mic_ctx->mmio.va + | |
90 | HOST_SBOX_BASE_ADDRESS + SBOX_SMPT00); | |
91 | uint32_t smpt_reg_val; | |
92 | ||
93 | for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) { | |
94 | dma_addr = mic_ctx->mic_smpt[i].dma_addr; | |
95 | if (mic_ctx->bi_family == FAMILY_KNC) { | |
96 | smpt_reg_val = BUILD_SMPT(SNOOP_ON, | |
97 | dma_addr >> MIC_SYSTEM_PAGE_SHIFT); | |
98 | writel(smpt_reg_val, &smpt[i]); | |
99 | } | |
100 | } | |
101 | } | |
102 | ||
103 | /* | |
104 | * Called once per board as part of smpt init | |
105 | * This does a 0-512G smpt mapping, | |
106 | */ | |
107 | void mic_smpt_init(mic_ctx_t *mic_ctx) | |
108 | { | |
109 | int i; | |
110 | dma_addr_t dma_addr; | |
111 | uint32_t *smpt = (uint32_t*)(mic_ctx->mmio.va + | |
112 | HOST_SBOX_BASE_ADDRESS + SBOX_SMPT00); | |
113 | uint32_t smpt_reg_val; | |
114 | #if SMPT_LOGGING | |
115 | smpt_ref_count_g[mic_ctx->bi_id] = 0; | |
116 | #endif | |
117 | ||
118 | spin_lock_init(&mic_ctx->smpt_lock); | |
119 | mic_ctx->mic_smpt = kmalloc(sizeof(mic_smpt_t) | |
120 | * NUM_SMPT_ENTRIES_IN_USE, GFP_KERNEL); | |
121 | ||
122 | for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) { | |
123 | dma_addr = i * MIC_SYSTEM_PAGE_SIZE; | |
124 | mic_ctx->mic_smpt[i].dma_addr = dma_addr; | |
125 | mic_ctx->mic_smpt[i].ref_count = 0; | |
126 | if (mic_ctx->bi_family == FAMILY_KNC) { | |
127 | smpt_reg_val = BUILD_SMPT(SNOOP_ON, | |
128 | dma_addr >> MIC_SYSTEM_PAGE_SHIFT); | |
129 | writel(smpt_reg_val, &smpt[i]); | |
130 | } | |
131 | } | |
132 | } | |
133 | ||
134 | /* | |
135 | * Called during mic exit per ctx (i.e once for every board) | |
136 | * If ref count is non-zero, then it means that some module | |
137 | * did not call mic_unmap_single/mic_ctx_unmap_single correctly. | |
138 | */ | |
139 | void | |
140 | mic_smpt_uninit(mic_ctx_t *mic_ctx) | |
141 | { | |
142 | #if SMPT_LOGGING | |
143 | printk("global ref count for node = %d is %lld\n", | |
144 | mic_ctx->bi_id+1, smpt_ref_count_g[mic_ctx->bi_id]); | |
145 | printk("mic map calls = %lld, mic unmap calls = %lld \n", | |
146 | map_count_g, unmap_count_g); | |
147 | ||
148 | for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) { | |
149 | printk("[smpt_san%d] smpt_entry[%d] dma_addr = 0x%llX" | |
150 | " ref_count = %lld \n", mic_ctx->bi_id+1, i, | |
151 | mic_ctx->mic_smpt[i].dma_addr, | |
152 | mic_ctx->mic_smpt[i].ref_count); | |
153 | } | |
154 | #endif | |
155 | #ifdef DEBUG | |
156 | { | |
157 | int i; | |
158 | for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) | |
159 | WARN_ON(mic_ctx->mic_smpt[i].ref_count); | |
160 | } | |
161 | #endif | |
162 | ||
163 | kfree(mic_ctx->mic_smpt); | |
164 | mic_ctx->mic_smpt = NULL; | |
165 | ; | |
166 | } | |
167 | ||
168 | dma_addr_t mic_ctx_map_single(mic_ctx_t *mic_ctx, void *p, size_t size) | |
169 | { | |
170 | struct pci_dev *hwdev = mic_ctx->bi_pdev; | |
171 | int bid = mic_ctx->bi_id; | |
172 | ||
173 | return mic_map_single(bid, hwdev, p, size); | |
174 | } | |
175 | ||
176 | void mic_unmap_single(int bid, struct pci_dev *hwdev, dma_addr_t mic_addr, | |
177 | size_t size) | |
178 | { | |
179 | dma_addr_t dma_addr = mic_to_dma_addr(bid, mic_addr); | |
180 | mic_unmap(bid, mic_addr, size); | |
181 | pci_unmap_single(hwdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); | |
182 | } | |
183 | ||
184 | void mic_ctx_unmap_single(mic_ctx_t *mic_ctx, dma_addr_t dma_addr, | |
185 | size_t size) | |
186 | { | |
187 | struct pci_dev *hwdev = mic_ctx->bi_pdev; | |
188 | int bid = mic_ctx->bi_id; | |
189 | mic_unmap_single(bid, hwdev, dma_addr, size); | |
190 | } | |
191 | ||
192 | dma_addr_t mic_map_single(int bid, struct pci_dev *hwdev, void *p, | |
193 | size_t size) | |
194 | { | |
195 | dma_addr_t mic_addr = 0; | |
196 | dma_addr_t dma_addr; | |
197 | ||
198 | dma_addr = pci_map_single(hwdev, p, size, PCI_DMA_BIDIRECTIONAL); | |
199 | ||
200 | if (!pci_dma_mapping_error(hwdev, dma_addr)) | |
201 | if (!(mic_addr = mic_map(bid, dma_addr, size))) { | |
202 | printk(KERN_ERR "mic_map failed board id %d\ | |
203 | addr %#016llx size %#016zx\n", | |
204 | bid, dma_addr, size); | |
205 | pci_unmap_single(hwdev, dma_addr, | |
206 | size, PCI_DMA_BIDIRECTIONAL); | |
207 | } | |
208 | return mic_addr; | |
209 | } | |
210 | ||
211 | void add_smpt_entry(int spt, int64_t *ref, uint64_t dma_addr, int entries, mic_ctx_t *mic_ctx) | |
212 | { | |
213 | ||
214 | struct nodemsg msg; | |
215 | dma_addr_t addr = dma_addr; | |
216 | mic_smpt_t *mic_smpt = mic_ctx->mic_smpt; | |
217 | int dev_id = mic_ctx->bi_id + 1; | |
218 | void *mm_sbox = mic_ctx->mmio.va + HOST_SBOX_BASE_ADDRESS; | |
219 | int i; | |
220 | ||
221 | for (i = spt; i < spt + entries; i++, addr += MIC_SYSTEM_PAGE_SIZE) { | |
222 | #ifdef CONFIG_ML1OM | |
223 | /* | |
224 | * For KNF if the ref count is 0 and the entry number is greater | |
225 | * than 16 then we must resend a SMPT_SET message in case the uOS | |
226 | * was rebooted and lost SMPT register state (example during host | |
227 | * suspend/hibernate. | |
228 | */ | |
229 | if (!mic_smpt[i].ref_count && i >= (NUM_SMPT_ENTRIES_IN_USE >> 1)) { | |
230 | #else | |
231 | if (!mic_smpt[i].ref_count && (mic_smpt[i].dma_addr != addr)) { | |
232 | #endif | |
233 | /* | |
234 | * ref count was zero and dma_addr requested did not | |
235 | * match the dma address in the table. So, this is a | |
236 | * new entry in the table. | |
237 | * KNF: Send a message to the card | |
238 | * to update its smpt table with a new value. | |
239 | * KNC: write to the SMPT registers from host since | |
240 | * they are accessible. | |
241 | */ | |
242 | if (mic_ctx->bi_family == FAMILY_ABR) { | |
243 | msg.uop = SMPT_SET; | |
244 | msg.payload[0] = addr; | |
245 | msg.payload[1] = i; | |
246 | msg.dst.node = scif_dev[dev_id].sd_node; | |
247 | msg.src.node = 0; | |
248 | #if SMPT_LOGGING | |
249 | printk("[smpt_node%d] ==> sending msg to " | |
250 | " node = %d dma_addr = 0x%llX, entry =" | |
251 | "0x%llX\n" , mic_ctx->bi_id + 1, | |
252 | scif_dev[dev_id].sd_node, | |
253 | msg.payload[0], msg.payload[1]); | |
254 | #endif | |
255 | micscif_inc_node_refcnt(&scif_dev[dev_id], 1); | |
256 | micscif_nodeqp_send(&scif_dev[dev_id], &msg, NULL); | |
257 | micscif_dec_node_refcnt(&scif_dev[dev_id], 1); | |
258 | } | |
259 | else | |
260 | mic_smpt_set(mm_sbox, addr, i); | |
261 | mic_smpt[i].dma_addr = addr; | |
262 | } | |
263 | mic_smpt[i].ref_count += ref[i - spt]; | |
264 | } | |
265 | } | |
266 | ||
267 | dma_addr_t smpt_op(int bid, uint64_t dma_addr, | |
268 | int entries, int64_t *ref) | |
269 | { | |
270 | int spt = -1; /* smpt index */ | |
271 | int ee = 0; /* existing entries */ | |
272 | int fe = 0; /* free entries */ | |
273 | int i; | |
274 | unsigned long flags; | |
275 | dma_addr_t mic_addr = 0; | |
276 | dma_addr_t addr = dma_addr; | |
277 | mic_ctx_t *mic_ctx = get_per_dev_ctx(bid); | |
278 | mic_smpt_t *mic_smpt = mic_ctx->mic_smpt; | |
279 | ||
280 | if (micpm_get_reference(mic_ctx, true)) | |
281 | goto exit; | |
282 | spin_lock_irqsave(&mic_ctx->smpt_lock, flags); | |
283 | ||
284 | /* find existing entries */ | |
285 | for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) { | |
286 | if (mic_smpt[i].dma_addr == addr) { | |
287 | ee++; | |
288 | addr += MIC_SYSTEM_PAGE_SIZE; | |
289 | } | |
290 | else if (ee) /* cannot find contiguous entries */ | |
291 | goto not_found; | |
292 | ||
293 | if (ee == entries) | |
294 | goto found; | |
295 | } | |
296 | ||
297 | /* find free entry */ | |
298 | #ifdef CONFIG_ML1OM | |
299 | /* | |
300 | * For KNF the SMPT registers are not host accessible so we maintain a | |
301 | * 1:1 map for SMPT registers from 0-256GB i.e. the first 16 entries and | |
302 | * look for SMPT entries for P2P and IB etc from the 16th entry onwards. | |
303 | * This allows the KNF card to boot on Host systems with < 256GB system | |
304 | * memory and access VNET/SCIF buffers without crashing. P2P and IB SMPT | |
305 | * entries are setup after SCIF driver load/reload via SCIF Node QP | |
306 | * SMPT_SET messages. | |
307 | */ | |
308 | for (i = NUM_SMPT_ENTRIES_IN_USE / 2 ; i < NUM_SMPT_ENTRIES_IN_USE; i++) { | |
309 | #else | |
310 | for (i = 0 ; i < NUM_SMPT_ENTRIES_IN_USE; i++) { | |
311 | #endif | |
312 | fe = (mic_smpt[i].ref_count == 0) ? fe + 1: 0; | |
313 | if (fe == entries) | |
314 | goto found; | |
315 | } | |
316 | ||
317 | not_found: | |
318 | spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags); | |
319 | micpm_put_reference(mic_ctx); | |
320 | exit: | |
321 | return mic_addr; | |
322 | found: | |
323 | spt = i - entries + 1; | |
324 | mic_addr = SMPT_TO_MIC_PA(spt); | |
325 | add_smpt_entry(spt, ref, dma_addr, entries, mic_ctx); | |
326 | spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags); | |
327 | micpm_put_reference(mic_ctx); | |
328 | return mic_addr; | |
329 | } | |
330 | ||
331 | ||
332 | /* | |
333 | * Returns number of smpt entries needed for dma_addr to dma_addr + size | |
334 | * also returns the reference count array for each of those entries | |
335 | * and the starting smpt address | |
336 | */ | |
337 | int get_smpt_ref_count(int64_t *ref, dma_addr_t dma_addr, size_t size, | |
338 | uint64_t *smpt_start) | |
339 | { | |
340 | uint64_t start = dma_addr; | |
341 | uint64_t end = dma_addr + size; | |
342 | int i = 0; | |
343 | ||
344 | while (start < end) { | |
345 | ref[i++] = min(SMPT_ALIGN_HIGH(start + 1), end) - start; | |
346 | start = SMPT_ALIGN_HIGH(start + 1); | |
347 | } | |
348 | ||
349 | if (smpt_start) | |
350 | *smpt_start = SMPT_ALIGN_LOW(dma_addr); | |
351 | ||
352 | return i; | |
353 | } | |
354 | ||
355 | /* | |
356 | * Maps dma_addr to dma_addr + size memory in the smpt table | |
357 | * of board bid | |
358 | */ | |
359 | dma_addr_t mic_map(int bid, dma_addr_t dma_addr, size_t size) | |
360 | { | |
361 | dma_addr_t mic_addr = 0; | |
362 | int entries; | |
363 | int64_t ref[NUM_SMPT_ENTRIES_IN_USE]; | |
364 | uint64_t smpt_start; | |
365 | #if SMPT_LOGGING | |
366 | unsigned long flags; | |
367 | mic_ctx_t *mic_ctx = get_per_dev_ctx(bid); | |
368 | spin_lock_irqsave(&mic_ctx->smpt_lock, flags); | |
369 | map_count_g++; | |
370 | smpt_ref_count_g[bid] += (int64_t)size; | |
371 | spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags); | |
372 | #endif | |
373 | if (!size) | |
374 | return mic_addr; | |
375 | ||
376 | /* | |
377 | * Get number of smpt entries to be mapped, ref count array | |
378 | * and the starting smpt address to start the search for | |
379 | * free or existing smpt entries. | |
380 | */ | |
381 | entries = get_smpt_ref_count(ref, dma_addr, size, &smpt_start); | |
382 | ||
383 | /* Set the smpt table appropriately and get 16G aligned mic address */ | |
384 | mic_addr = smpt_op(bid, smpt_start, entries, ref); | |
385 | ||
386 | /* | |
387 | * If mic_addr is zero then its a error case | |
388 | * since mic_addr can never be zero. | |
389 | * else generate mic_addr by adding the 16G offset in dma_addr | |
390 | */ | |
391 | if (!mic_addr) { | |
392 | WARN_ON(1); | |
393 | return mic_addr; | |
394 | } | |
395 | else | |
396 | return (mic_addr + (dma_addr & MIC_SYSTEM_PAGE_MASK)); | |
397 | } | |
398 | ||
399 | /* | |
400 | * Unmaps mic_addr to mic_addr + size memory in the smpt table | |
401 | * of board bid | |
402 | */ | |
403 | void mic_unmap(int bid, dma_addr_t mic_addr, size_t size) | |
404 | { | |
405 | mic_ctx_t *mic_ctx = get_per_dev_ctx(bid); | |
406 | mic_smpt_t *mic_smpt = mic_ctx->mic_smpt; | |
407 | int64_t ref[NUM_SMPT_ENTRIES_IN_USE]; | |
408 | int num_smpt; | |
409 | int spt = HOSTMIC_PA_TO_SMPT(mic_addr); | |
410 | int i; | |
411 | unsigned long flags; | |
412 | ||
413 | if (!size) | |
414 | return; | |
415 | ||
416 | if (!IS_MIC_SYSTEM_ADDR(mic_addr)) { | |
417 | WARN_ON(1); | |
418 | return; | |
419 | } | |
420 | ||
421 | /* Get number of smpt entries to be mapped, ref count array */ | |
422 | num_smpt = get_smpt_ref_count(ref, mic_addr, size, NULL); | |
423 | ||
424 | spin_lock_irqsave(&mic_ctx->smpt_lock, flags); | |
425 | ||
426 | #if SMPT_LOGGING | |
427 | unmap_count_g++; | |
428 | smpt_ref_count_g[bid] -= (int64_t)size; | |
429 | #endif | |
430 | ||
431 | for (i = spt; i < spt + num_smpt; i++) { | |
432 | mic_smpt[i].ref_count -= ref[i - spt]; | |
433 | WARN_ON(mic_smpt[i].ref_count < 0); | |
434 | } | |
435 | spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags); | |
436 | } | |
437 | ||
438 | dma_addr_t mic_to_dma_addr(int bid, dma_addr_t mic_addr) | |
439 | { | |
440 | mic_ctx_t *mic_ctx = get_per_dev_ctx(bid); | |
441 | int spt = HOSTMIC_PA_TO_SMPT(mic_addr); | |
442 | dma_addr_t dma_addr; | |
443 | ||
444 | if (!IS_MIC_SYSTEM_ADDR(mic_addr)) { | |
445 | WARN_ON(1); | |
446 | return 0; | |
447 | } | |
448 | dma_addr = mic_ctx->mic_smpt[spt].dma_addr + SMPT_OFFSET(mic_addr); | |
449 | return dma_addr; | |
450 | } | |
451 | ||
452 | #endif | |
453 | ||
454 | bool is_syspa(dma_addr_t pa) | |
455 | { | |
456 | return IS_MIC_SYSTEM_ADDR(pa); | |
457 | } |