Updated `README.md` with instructions for building/using the kernel module.
[xeon-phi-kernel-module] / micscif / micscif_smpt.c
CommitLineData
800f879a
AT
1/*
2 * Copyright 2010-2017 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2,
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * Disclaimer: The codes contained in these modules may be specific to
14 * the Intel Software Development Platform codenamed Knights Ferry,
15 * and the Intel product codenamed Knights Corner, and are not backward
16 * compatible with other Intel products. Additionally, Intel will NOT
17 * support the codes or instruction set in future products.
18 *
19 * Intel offers no warranty of any kind regarding the code. This code is
20 * licensed on an "AS IS" basis and Intel is not obligated to provide
21 * any support, assistance, installation, training, or other services
22 * of any kind. Intel is also not obligated to provide any updates,
23 * enhancements or extensions. Intel specifically disclaims any warranty
24 * of merchantability, non-infringement, fitness for any particular
25 * purpose, and any other warranty.
26 *
27 * Further, Intel disclaims all liability of any kind, including but
28 * not limited to liability for infringement of any proprietary rights,
29 * relating to the use of the code, even if Intel is notified of the
30 * possibility of such liability. Except as expressly stated in an Intel
31 * license agreement provided with this code and agreed upon with Intel,
32 * no license, express or implied, by estoppel or otherwise, to any
33 * intellectual property rights is granted herein.
34 */
35
36#include <mic/micscif.h>
37#include <mic/micscif_smpt.h>
38#if defined(HOST) || defined(WINDOWS)
39#include "mic_common.h"
40#endif
41
42struct _mic_ctx_t;
43// Figure out which SMPT entry based on the host addr
44#define SYSTEM_ADDR_TO_SMPT(sysaddr) ((sysaddr) >> (MIC_SYSTEM_PAGE_SHIFT))
45#define HOSTMIC_PA_TO_SMPT(hostmic_pa) (((hostmic_pa) - MIC_SYSTEM_BASE)\
46 >> MIC_SYSTEM_PAGE_SHIFT)
47
48#define NUM_SMPT_ENTRIES_IN_USE 32
49#define SMPT_TO_MIC_PA(smpt_index) (MIC_SYSTEM_BASE + ((smpt_index) * \
50 MIC_SYSTEM_PAGE_SIZE))
51#define MAX_HOST_MEMORY ((NUM_SMPT_ENTRIES_IN_USE) * MIC_SYSTEM_PAGE_SIZE)
52#define MAX_SYSTEM_ADDR ((MIC_SYSTEM_BASE) + (MAX_HOST_MEMORY) - (1))
53#define IS_MIC_SYSTEM_ADDR(addr) (((addr) >= MIC_SYSTEM_BASE) && \
54 ((addr) <= MAX_SYSTEM_ADDR))
55
56#define _PAGE_OFFSET(x) ((x) & ((PAGE_SIZE) - (1ULL)))
57#define SMPT_OFFSET(x) ((x) & MIC_SYSTEM_PAGE_MASK)
58#define PAGE_ALIGN_LOW(x) ALIGN(((x) - ((PAGE_SIZE) - 1ULL)), (PAGE_SIZE))
59#define PAGE_ALIGN_HIGH(x) ALIGN((x), (PAGE_SIZE))
60#define SMPT_ALIGN_LOW(x) ALIGN(((x) - (MIC_SYSTEM_PAGE_MASK)), \
61 (MIC_SYSTEM_PAGE_SIZE))
62#define SMPT_ALIGN_HIGH(x) ALIGN((x), (MIC_SYSTEM_PAGE_SIZE))
63
64#if defined(HOST)
65#define SMPT_LOGGING 0
66#if SMPT_LOGGING
67static int64_t smpt_ref_count_g[MAX_BOARD_SUPPORTED];
68static int64_t map_count_g;
69static int64_t unmap_count_g;
70#endif
71#endif
72
73void mic_smpt_set(volatile void *mm_sbox, uint64_t dma_addr, uint64_t index)
74{
75 uint32_t smpt_reg_val = BUILD_SMPT(SNOOP_ON, dma_addr >> MIC_SYSTEM_PAGE_SHIFT);
76 writel(smpt_reg_val, (uint8_t*)mm_sbox + SBOX_SMPT00 + (4 * index));
77}
78
79#if defined(HOST)
80/*
81 * Called once per board as part of starting a MIC
82 * to restore the SMPT state to the previous values
83 * as stored in SMPT SW data structures.
84 */
85void mic_smpt_restore(mic_ctx_t *mic_ctx)
86{
87 int i;
88 dma_addr_t dma_addr;
89 uint32_t *smpt = (uint32_t*)(mic_ctx->mmio.va +
90 HOST_SBOX_BASE_ADDRESS + SBOX_SMPT00);
91 uint32_t smpt_reg_val;
92
93 for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) {
94 dma_addr = mic_ctx->mic_smpt[i].dma_addr;
95 if (mic_ctx->bi_family == FAMILY_KNC) {
96 smpt_reg_val = BUILD_SMPT(SNOOP_ON,
97 dma_addr >> MIC_SYSTEM_PAGE_SHIFT);
98 writel(smpt_reg_val, &smpt[i]);
99 }
100 }
101}
102
103/*
104 * Called once per board as part of smpt init
105 * This does a 0-512G smpt mapping,
106 */
107void mic_smpt_init(mic_ctx_t *mic_ctx)
108{
109 int i;
110 dma_addr_t dma_addr;
111 uint32_t *smpt = (uint32_t*)(mic_ctx->mmio.va +
112 HOST_SBOX_BASE_ADDRESS + SBOX_SMPT00);
113 uint32_t smpt_reg_val;
114#if SMPT_LOGGING
115 smpt_ref_count_g[mic_ctx->bi_id] = 0;
116#endif
117
118 spin_lock_init(&mic_ctx->smpt_lock);
119 mic_ctx->mic_smpt = kmalloc(sizeof(mic_smpt_t)
120 * NUM_SMPT_ENTRIES_IN_USE, GFP_KERNEL);
121
122 for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) {
123 dma_addr = i * MIC_SYSTEM_PAGE_SIZE;
124 mic_ctx->mic_smpt[i].dma_addr = dma_addr;
125 mic_ctx->mic_smpt[i].ref_count = 0;
126 if (mic_ctx->bi_family == FAMILY_KNC) {
127 smpt_reg_val = BUILD_SMPT(SNOOP_ON,
128 dma_addr >> MIC_SYSTEM_PAGE_SHIFT);
129 writel(smpt_reg_val, &smpt[i]);
130 }
131 }
132}
133
134/*
135 * Called during mic exit per ctx (i.e once for every board)
136 * If ref count is non-zero, then it means that some module
137 * did not call mic_unmap_single/mic_ctx_unmap_single correctly.
138 */
139void
140mic_smpt_uninit(mic_ctx_t *mic_ctx)
141{
142#if SMPT_LOGGING
143 printk("global ref count for node = %d is %lld\n",
144 mic_ctx->bi_id+1, smpt_ref_count_g[mic_ctx->bi_id]);
145 printk("mic map calls = %lld, mic unmap calls = %lld \n",
146 map_count_g, unmap_count_g);
147
148 for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) {
149 printk("[smpt_san%d] smpt_entry[%d] dma_addr = 0x%llX"
150 " ref_count = %lld \n", mic_ctx->bi_id+1, i,
151 mic_ctx->mic_smpt[i].dma_addr,
152 mic_ctx->mic_smpt[i].ref_count);
153 }
154#endif
155#ifdef DEBUG
156 {
157 int i;
158 for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++)
159 WARN_ON(mic_ctx->mic_smpt[i].ref_count);
160 }
161#endif
162
163 kfree(mic_ctx->mic_smpt);
164 mic_ctx->mic_smpt = NULL;
165 ;
166}
167
168dma_addr_t mic_ctx_map_single(mic_ctx_t *mic_ctx, void *p, size_t size)
169{
170 struct pci_dev *hwdev = mic_ctx->bi_pdev;
171 int bid = mic_ctx->bi_id;
172
173 return mic_map_single(bid, hwdev, p, size);
174}
175
176void mic_unmap_single(int bid, struct pci_dev *hwdev, dma_addr_t mic_addr,
177 size_t size)
178{
179 dma_addr_t dma_addr = mic_to_dma_addr(bid, mic_addr);
180 mic_unmap(bid, mic_addr, size);
181 pci_unmap_single(hwdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
182}
183
184void mic_ctx_unmap_single(mic_ctx_t *mic_ctx, dma_addr_t dma_addr,
185 size_t size)
186{
187 struct pci_dev *hwdev = mic_ctx->bi_pdev;
188 int bid = mic_ctx->bi_id;
189 mic_unmap_single(bid, hwdev, dma_addr, size);
190}
191
192dma_addr_t mic_map_single(int bid, struct pci_dev *hwdev, void *p,
193 size_t size)
194{
195 dma_addr_t mic_addr = 0;
196 dma_addr_t dma_addr;
197
198 dma_addr = pci_map_single(hwdev, p, size, PCI_DMA_BIDIRECTIONAL);
199
200 if (!pci_dma_mapping_error(hwdev, dma_addr))
201 if (!(mic_addr = mic_map(bid, dma_addr, size))) {
202 printk(KERN_ERR "mic_map failed board id %d\
203 addr %#016llx size %#016zx\n",
204 bid, dma_addr, size);
205 pci_unmap_single(hwdev, dma_addr,
206 size, PCI_DMA_BIDIRECTIONAL);
207 }
208 return mic_addr;
209}
210
211void add_smpt_entry(int spt, int64_t *ref, uint64_t dma_addr, int entries, mic_ctx_t *mic_ctx)
212{
213
214 struct nodemsg msg;
215 dma_addr_t addr = dma_addr;
216 mic_smpt_t *mic_smpt = mic_ctx->mic_smpt;
217 int dev_id = mic_ctx->bi_id + 1;
218 void *mm_sbox = mic_ctx->mmio.va + HOST_SBOX_BASE_ADDRESS;
219 int i;
220
221 for (i = spt; i < spt + entries; i++, addr += MIC_SYSTEM_PAGE_SIZE) {
222#ifdef CONFIG_ML1OM
223 /*
224 * For KNF if the ref count is 0 and the entry number is greater
225 * than 16 then we must resend a SMPT_SET message in case the uOS
226 * was rebooted and lost SMPT register state (example during host
227 * suspend/hibernate.
228 */
229 if (!mic_smpt[i].ref_count && i >= (NUM_SMPT_ENTRIES_IN_USE >> 1)) {
230#else
231 if (!mic_smpt[i].ref_count && (mic_smpt[i].dma_addr != addr)) {
232#endif
233 /*
234 * ref count was zero and dma_addr requested did not
235 * match the dma address in the table. So, this is a
236 * new entry in the table.
237 * KNF: Send a message to the card
238 * to update its smpt table with a new value.
239 * KNC: write to the SMPT registers from host since
240 * they are accessible.
241 */
242 if (mic_ctx->bi_family == FAMILY_ABR) {
243 msg.uop = SMPT_SET;
244 msg.payload[0] = addr;
245 msg.payload[1] = i;
246 msg.dst.node = scif_dev[dev_id].sd_node;
247 msg.src.node = 0;
248#if SMPT_LOGGING
249 printk("[smpt_node%d] ==> sending msg to "
250 " node = %d dma_addr = 0x%llX, entry ="
251 "0x%llX\n" , mic_ctx->bi_id + 1,
252 scif_dev[dev_id].sd_node,
253 msg.payload[0], msg.payload[1]);
254#endif
255 micscif_inc_node_refcnt(&scif_dev[dev_id], 1);
256 micscif_nodeqp_send(&scif_dev[dev_id], &msg, NULL);
257 micscif_dec_node_refcnt(&scif_dev[dev_id], 1);
258 }
259 else
260 mic_smpt_set(mm_sbox, addr, i);
261 mic_smpt[i].dma_addr = addr;
262 }
263 mic_smpt[i].ref_count += ref[i - spt];
264 }
265}
266
267dma_addr_t smpt_op(int bid, uint64_t dma_addr,
268 int entries, int64_t *ref)
269{
270 int spt = -1; /* smpt index */
271 int ee = 0; /* existing entries */
272 int fe = 0; /* free entries */
273 int i;
274 unsigned long flags;
275 dma_addr_t mic_addr = 0;
276 dma_addr_t addr = dma_addr;
277 mic_ctx_t *mic_ctx = get_per_dev_ctx(bid);
278 mic_smpt_t *mic_smpt = mic_ctx->mic_smpt;
279
280 if (micpm_get_reference(mic_ctx, true))
281 goto exit;
282 spin_lock_irqsave(&mic_ctx->smpt_lock, flags);
283
284 /* find existing entries */
285 for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) {
286 if (mic_smpt[i].dma_addr == addr) {
287 ee++;
288 addr += MIC_SYSTEM_PAGE_SIZE;
289 }
290 else if (ee) /* cannot find contiguous entries */
291 goto not_found;
292
293 if (ee == entries)
294 goto found;
295 }
296
297 /* find free entry */
298#ifdef CONFIG_ML1OM
299 /*
300 * For KNF the SMPT registers are not host accessible so we maintain a
301 * 1:1 map for SMPT registers from 0-256GB i.e. the first 16 entries and
302 * look for SMPT entries for P2P and IB etc from the 16th entry onwards.
303 * This allows the KNF card to boot on Host systems with < 256GB system
304 * memory and access VNET/SCIF buffers without crashing. P2P and IB SMPT
305 * entries are setup after SCIF driver load/reload via SCIF Node QP
306 * SMPT_SET messages.
307 */
308 for (i = NUM_SMPT_ENTRIES_IN_USE / 2 ; i < NUM_SMPT_ENTRIES_IN_USE; i++) {
309#else
310 for (i = 0 ; i < NUM_SMPT_ENTRIES_IN_USE; i++) {
311#endif
312 fe = (mic_smpt[i].ref_count == 0) ? fe + 1: 0;
313 if (fe == entries)
314 goto found;
315 }
316
317not_found:
318 spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags);
319 micpm_put_reference(mic_ctx);
320exit:
321 return mic_addr;
322found:
323 spt = i - entries + 1;
324 mic_addr = SMPT_TO_MIC_PA(spt);
325 add_smpt_entry(spt, ref, dma_addr, entries, mic_ctx);
326 spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags);
327 micpm_put_reference(mic_ctx);
328 return mic_addr;
329}
330
331
332/*
333 * Returns number of smpt entries needed for dma_addr to dma_addr + size
334 * also returns the reference count array for each of those entries
335 * and the starting smpt address
336 */
337int get_smpt_ref_count(int64_t *ref, dma_addr_t dma_addr, size_t size,
338 uint64_t *smpt_start)
339{
340 uint64_t start = dma_addr;
341 uint64_t end = dma_addr + size;
342 int i = 0;
343
344 while (start < end) {
345 ref[i++] = min(SMPT_ALIGN_HIGH(start + 1), end) - start;
346 start = SMPT_ALIGN_HIGH(start + 1);
347 }
348
349 if (smpt_start)
350 *smpt_start = SMPT_ALIGN_LOW(dma_addr);
351
352 return i;
353}
354
355/*
356 * Maps dma_addr to dma_addr + size memory in the smpt table
357 * of board bid
358 */
359dma_addr_t mic_map(int bid, dma_addr_t dma_addr, size_t size)
360{
361 dma_addr_t mic_addr = 0;
362 int entries;
363 int64_t ref[NUM_SMPT_ENTRIES_IN_USE];
364 uint64_t smpt_start;
365#if SMPT_LOGGING
366 unsigned long flags;
367 mic_ctx_t *mic_ctx = get_per_dev_ctx(bid);
368 spin_lock_irqsave(&mic_ctx->smpt_lock, flags);
369 map_count_g++;
370 smpt_ref_count_g[bid] += (int64_t)size;
371 spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags);
372#endif
373 if (!size)
374 return mic_addr;
375
376 /*
377 * Get number of smpt entries to be mapped, ref count array
378 * and the starting smpt address to start the search for
379 * free or existing smpt entries.
380 */
381 entries = get_smpt_ref_count(ref, dma_addr, size, &smpt_start);
382
383 /* Set the smpt table appropriately and get 16G aligned mic address */
384 mic_addr = smpt_op(bid, smpt_start, entries, ref);
385
386 /*
387 * If mic_addr is zero then its a error case
388 * since mic_addr can never be zero.
389 * else generate mic_addr by adding the 16G offset in dma_addr
390 */
391 if (!mic_addr) {
392 WARN_ON(1);
393 return mic_addr;
394 }
395 else
396 return (mic_addr + (dma_addr & MIC_SYSTEM_PAGE_MASK));
397}
398
399/*
400 * Unmaps mic_addr to mic_addr + size memory in the smpt table
401 * of board bid
402 */
403void mic_unmap(int bid, dma_addr_t mic_addr, size_t size)
404{
405 mic_ctx_t *mic_ctx = get_per_dev_ctx(bid);
406 mic_smpt_t *mic_smpt = mic_ctx->mic_smpt;
407 int64_t ref[NUM_SMPT_ENTRIES_IN_USE];
408 int num_smpt;
409 int spt = HOSTMIC_PA_TO_SMPT(mic_addr);
410 int i;
411 unsigned long flags;
412
413 if (!size)
414 return;
415
416 if (!IS_MIC_SYSTEM_ADDR(mic_addr)) {
417 WARN_ON(1);
418 return;
419 }
420
421 /* Get number of smpt entries to be mapped, ref count array */
422 num_smpt = get_smpt_ref_count(ref, mic_addr, size, NULL);
423
424 spin_lock_irqsave(&mic_ctx->smpt_lock, flags);
425
426#if SMPT_LOGGING
427 unmap_count_g++;
428 smpt_ref_count_g[bid] -= (int64_t)size;
429#endif
430
431 for (i = spt; i < spt + num_smpt; i++) {
432 mic_smpt[i].ref_count -= ref[i - spt];
433 WARN_ON(mic_smpt[i].ref_count < 0);
434 }
435 spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags);
436}
437
438dma_addr_t mic_to_dma_addr(int bid, dma_addr_t mic_addr)
439{
440 mic_ctx_t *mic_ctx = get_per_dev_ctx(bid);
441 int spt = HOSTMIC_PA_TO_SMPT(mic_addr);
442 dma_addr_t dma_addr;
443
444 if (!IS_MIC_SYSTEM_ADDR(mic_addr)) {
445 WARN_ON(1);
446 return 0;
447 }
448 dma_addr = mic_ctx->mic_smpt[spt].dma_addr + SMPT_OFFSET(mic_addr);
449 return dma_addr;
450}
451
452#endif
453
454bool is_syspa(dma_addr_t pa)
455{
456 return IS_MIC_SYSTEM_ADDR(pa);
457}