| 1 | /* |
| 2 | * Copyright 2010-2017 Intel Corporation. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or modify |
| 5 | * it under the terms of the GNU General Public License, version 2, |
| 6 | * as published by the Free Software Foundation. |
| 7 | * |
| 8 | * This program is distributed in the hope that it will be useful, |
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 11 | * General Public License for more details. |
| 12 | * |
| 13 | * Disclaimer: The codes contained in these modules may be specific to |
| 14 | * the Intel Software Development Platform codenamed Knights Ferry, |
| 15 | * and the Intel product codenamed Knights Corner, and are not backward |
| 16 | * compatible with other Intel products. Additionally, Intel will NOT |
| 17 | * support the codes or instruction set in future products. |
| 18 | * |
| 19 | * Intel offers no warranty of any kind regarding the code. This code is |
| 20 | * licensed on an "AS IS" basis and Intel is not obligated to provide |
| 21 | * any support, assistance, installation, training, or other services |
| 22 | * of any kind. Intel is also not obligated to provide any updates, |
| 23 | * enhancements or extensions. Intel specifically disclaims any warranty |
| 24 | * of merchantability, non-infringement, fitness for any particular |
| 25 | * purpose, and any other warranty. |
| 26 | * |
| 27 | * Further, Intel disclaims all liability of any kind, including but |
| 28 | * not limited to liability for infringement of any proprietary rights, |
| 29 | * relating to the use of the code, even if Intel is notified of the |
| 30 | * possibility of such liability. Except as expressly stated in an Intel |
| 31 | * license agreement provided with this code and agreed upon with Intel, |
| 32 | * no license, express or implied, by estoppel or otherwise, to any |
| 33 | * intellectual property rights is granted herein. |
| 34 | */ |
| 35 | |
| 36 | #include <mic/micscif.h> |
| 37 | #include <mic/micscif_smpt.h> |
| 38 | #if defined(HOST) || defined(WINDOWS) |
| 39 | #include "mic_common.h" |
| 40 | #endif |
| 41 | |
| 42 | struct _mic_ctx_t; |
| 43 | // Figure out which SMPT entry based on the host addr |
| 44 | #define SYSTEM_ADDR_TO_SMPT(sysaddr) ((sysaddr) >> (MIC_SYSTEM_PAGE_SHIFT)) |
| 45 | #define HOSTMIC_PA_TO_SMPT(hostmic_pa) (((hostmic_pa) - MIC_SYSTEM_BASE)\ |
| 46 | >> MIC_SYSTEM_PAGE_SHIFT) |
| 47 | |
| 48 | #define NUM_SMPT_ENTRIES_IN_USE 32 |
| 49 | #define SMPT_TO_MIC_PA(smpt_index) (MIC_SYSTEM_BASE + ((smpt_index) * \ |
| 50 | MIC_SYSTEM_PAGE_SIZE)) |
| 51 | #define MAX_HOST_MEMORY ((NUM_SMPT_ENTRIES_IN_USE) * MIC_SYSTEM_PAGE_SIZE) |
| 52 | #define MAX_SYSTEM_ADDR ((MIC_SYSTEM_BASE) + (MAX_HOST_MEMORY) - (1)) |
| 53 | #define IS_MIC_SYSTEM_ADDR(addr) (((addr) >= MIC_SYSTEM_BASE) && \ |
| 54 | ((addr) <= MAX_SYSTEM_ADDR)) |
| 55 | |
| 56 | #define _PAGE_OFFSET(x) ((x) & ((PAGE_SIZE) - (1ULL))) |
| 57 | #define SMPT_OFFSET(x) ((x) & MIC_SYSTEM_PAGE_MASK) |
| 58 | #define PAGE_ALIGN_LOW(x) ALIGN(((x) - ((PAGE_SIZE) - 1ULL)), (PAGE_SIZE)) |
| 59 | #define PAGE_ALIGN_HIGH(x) ALIGN((x), (PAGE_SIZE)) |
| 60 | #define SMPT_ALIGN_LOW(x) ALIGN(((x) - (MIC_SYSTEM_PAGE_MASK)), \ |
| 61 | (MIC_SYSTEM_PAGE_SIZE)) |
| 62 | #define SMPT_ALIGN_HIGH(x) ALIGN((x), (MIC_SYSTEM_PAGE_SIZE)) |
| 63 | |
| 64 | #if defined(HOST) |
| 65 | #define SMPT_LOGGING 0 |
| 66 | #if SMPT_LOGGING |
| 67 | static int64_t smpt_ref_count_g[MAX_BOARD_SUPPORTED]; |
| 68 | static int64_t map_count_g; |
| 69 | static int64_t unmap_count_g; |
| 70 | #endif |
| 71 | #endif |
| 72 | |
| 73 | void mic_smpt_set(volatile void *mm_sbox, uint64_t dma_addr, uint64_t index) |
| 74 | { |
| 75 | uint32_t smpt_reg_val = BUILD_SMPT(SNOOP_ON, dma_addr >> MIC_SYSTEM_PAGE_SHIFT); |
| 76 | writel(smpt_reg_val, (uint8_t*)mm_sbox + SBOX_SMPT00 + (4 * index)); |
| 77 | } |
| 78 | |
| 79 | #if defined(HOST) |
| 80 | /* |
| 81 | * Called once per board as part of starting a MIC |
| 82 | * to restore the SMPT state to the previous values |
| 83 | * as stored in SMPT SW data structures. |
| 84 | */ |
| 85 | void mic_smpt_restore(mic_ctx_t *mic_ctx) |
| 86 | { |
| 87 | int i; |
| 88 | dma_addr_t dma_addr; |
| 89 | uint32_t *smpt = (uint32_t*)(mic_ctx->mmio.va + |
| 90 | HOST_SBOX_BASE_ADDRESS + SBOX_SMPT00); |
| 91 | uint32_t smpt_reg_val; |
| 92 | |
| 93 | for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) { |
| 94 | dma_addr = mic_ctx->mic_smpt[i].dma_addr; |
| 95 | if (mic_ctx->bi_family == FAMILY_KNC) { |
| 96 | smpt_reg_val = BUILD_SMPT(SNOOP_ON, |
| 97 | dma_addr >> MIC_SYSTEM_PAGE_SHIFT); |
| 98 | writel(smpt_reg_val, &smpt[i]); |
| 99 | } |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | /* |
| 104 | * Called once per board as part of smpt init |
| 105 | * This does a 0-512G smpt mapping, |
| 106 | */ |
| 107 | void mic_smpt_init(mic_ctx_t *mic_ctx) |
| 108 | { |
| 109 | int i; |
| 110 | dma_addr_t dma_addr; |
| 111 | uint32_t *smpt = (uint32_t*)(mic_ctx->mmio.va + |
| 112 | HOST_SBOX_BASE_ADDRESS + SBOX_SMPT00); |
| 113 | uint32_t smpt_reg_val; |
| 114 | #if SMPT_LOGGING |
| 115 | smpt_ref_count_g[mic_ctx->bi_id] = 0; |
| 116 | #endif |
| 117 | |
| 118 | spin_lock_init(&mic_ctx->smpt_lock); |
| 119 | mic_ctx->mic_smpt = kmalloc(sizeof(mic_smpt_t) |
| 120 | * NUM_SMPT_ENTRIES_IN_USE, GFP_KERNEL); |
| 121 | |
| 122 | for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) { |
| 123 | dma_addr = i * MIC_SYSTEM_PAGE_SIZE; |
| 124 | mic_ctx->mic_smpt[i].dma_addr = dma_addr; |
| 125 | mic_ctx->mic_smpt[i].ref_count = 0; |
| 126 | if (mic_ctx->bi_family == FAMILY_KNC) { |
| 127 | smpt_reg_val = BUILD_SMPT(SNOOP_ON, |
| 128 | dma_addr >> MIC_SYSTEM_PAGE_SHIFT); |
| 129 | writel(smpt_reg_val, &smpt[i]); |
| 130 | } |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | /* |
| 135 | * Called during mic exit per ctx (i.e once for every board) |
| 136 | * If ref count is non-zero, then it means that some module |
| 137 | * did not call mic_unmap_single/mic_ctx_unmap_single correctly. |
| 138 | */ |
| 139 | void |
| 140 | mic_smpt_uninit(mic_ctx_t *mic_ctx) |
| 141 | { |
| 142 | #if SMPT_LOGGING |
| 143 | printk("global ref count for node = %d is %lld\n", |
| 144 | mic_ctx->bi_id+1, smpt_ref_count_g[mic_ctx->bi_id]); |
| 145 | printk("mic map calls = %lld, mic unmap calls = %lld \n", |
| 146 | map_count_g, unmap_count_g); |
| 147 | |
| 148 | for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) { |
| 149 | printk("[smpt_san%d] smpt_entry[%d] dma_addr = 0x%llX" |
| 150 | " ref_count = %lld \n", mic_ctx->bi_id+1, i, |
| 151 | mic_ctx->mic_smpt[i].dma_addr, |
| 152 | mic_ctx->mic_smpt[i].ref_count); |
| 153 | } |
| 154 | #endif |
| 155 | #ifdef DEBUG |
| 156 | { |
| 157 | int i; |
| 158 | for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) |
| 159 | WARN_ON(mic_ctx->mic_smpt[i].ref_count); |
| 160 | } |
| 161 | #endif |
| 162 | |
| 163 | kfree(mic_ctx->mic_smpt); |
| 164 | mic_ctx->mic_smpt = NULL; |
| 165 | ; |
| 166 | } |
| 167 | |
| 168 | dma_addr_t mic_ctx_map_single(mic_ctx_t *mic_ctx, void *p, size_t size) |
| 169 | { |
| 170 | struct pci_dev *hwdev = mic_ctx->bi_pdev; |
| 171 | int bid = mic_ctx->bi_id; |
| 172 | |
| 173 | return mic_map_single(bid, hwdev, p, size); |
| 174 | } |
| 175 | |
| 176 | void mic_unmap_single(int bid, struct pci_dev *hwdev, dma_addr_t mic_addr, |
| 177 | size_t size) |
| 178 | { |
| 179 | dma_addr_t dma_addr = mic_to_dma_addr(bid, mic_addr); |
| 180 | mic_unmap(bid, mic_addr, size); |
| 181 | pci_unmap_single(hwdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); |
| 182 | } |
| 183 | |
| 184 | void mic_ctx_unmap_single(mic_ctx_t *mic_ctx, dma_addr_t dma_addr, |
| 185 | size_t size) |
| 186 | { |
| 187 | struct pci_dev *hwdev = mic_ctx->bi_pdev; |
| 188 | int bid = mic_ctx->bi_id; |
| 189 | mic_unmap_single(bid, hwdev, dma_addr, size); |
| 190 | } |
| 191 | |
| 192 | dma_addr_t mic_map_single(int bid, struct pci_dev *hwdev, void *p, |
| 193 | size_t size) |
| 194 | { |
| 195 | dma_addr_t mic_addr = 0; |
| 196 | dma_addr_t dma_addr; |
| 197 | |
| 198 | dma_addr = pci_map_single(hwdev, p, size, PCI_DMA_BIDIRECTIONAL); |
| 199 | |
| 200 | if (!pci_dma_mapping_error(hwdev, dma_addr)) |
| 201 | if (!(mic_addr = mic_map(bid, dma_addr, size))) { |
| 202 | printk(KERN_ERR "mic_map failed board id %d\ |
| 203 | addr %#016llx size %#016zx\n", |
| 204 | bid, dma_addr, size); |
| 205 | pci_unmap_single(hwdev, dma_addr, |
| 206 | size, PCI_DMA_BIDIRECTIONAL); |
| 207 | } |
| 208 | return mic_addr; |
| 209 | } |
| 210 | |
| 211 | void add_smpt_entry(int spt, int64_t *ref, uint64_t dma_addr, int entries, mic_ctx_t *mic_ctx) |
| 212 | { |
| 213 | |
| 214 | struct nodemsg msg; |
| 215 | dma_addr_t addr = dma_addr; |
| 216 | mic_smpt_t *mic_smpt = mic_ctx->mic_smpt; |
| 217 | int dev_id = mic_ctx->bi_id + 1; |
| 218 | void *mm_sbox = mic_ctx->mmio.va + HOST_SBOX_BASE_ADDRESS; |
| 219 | int i; |
| 220 | |
| 221 | for (i = spt; i < spt + entries; i++, addr += MIC_SYSTEM_PAGE_SIZE) { |
| 222 | #ifdef CONFIG_ML1OM |
| 223 | /* |
| 224 | * For KNF if the ref count is 0 and the entry number is greater |
| 225 | * than 16 then we must resend a SMPT_SET message in case the uOS |
| 226 | * was rebooted and lost SMPT register state (example during host |
| 227 | * suspend/hibernate. |
| 228 | */ |
| 229 | if (!mic_smpt[i].ref_count && i >= (NUM_SMPT_ENTRIES_IN_USE >> 1)) { |
| 230 | #else |
| 231 | if (!mic_smpt[i].ref_count && (mic_smpt[i].dma_addr != addr)) { |
| 232 | #endif |
| 233 | /* |
| 234 | * ref count was zero and dma_addr requested did not |
| 235 | * match the dma address in the table. So, this is a |
| 236 | * new entry in the table. |
| 237 | * KNF: Send a message to the card |
| 238 | * to update its smpt table with a new value. |
| 239 | * KNC: write to the SMPT registers from host since |
| 240 | * they are accessible. |
| 241 | */ |
| 242 | if (mic_ctx->bi_family == FAMILY_ABR) { |
| 243 | msg.uop = SMPT_SET; |
| 244 | msg.payload[0] = addr; |
| 245 | msg.payload[1] = i; |
| 246 | msg.dst.node = scif_dev[dev_id].sd_node; |
| 247 | msg.src.node = 0; |
| 248 | #if SMPT_LOGGING |
| 249 | printk("[smpt_node%d] ==> sending msg to " |
| 250 | " node = %d dma_addr = 0x%llX, entry =" |
| 251 | "0x%llX\n" , mic_ctx->bi_id + 1, |
| 252 | scif_dev[dev_id].sd_node, |
| 253 | msg.payload[0], msg.payload[1]); |
| 254 | #endif |
| 255 | micscif_inc_node_refcnt(&scif_dev[dev_id], 1); |
| 256 | micscif_nodeqp_send(&scif_dev[dev_id], &msg, NULL); |
| 257 | micscif_dec_node_refcnt(&scif_dev[dev_id], 1); |
| 258 | } |
| 259 | else |
| 260 | mic_smpt_set(mm_sbox, addr, i); |
| 261 | mic_smpt[i].dma_addr = addr; |
| 262 | } |
| 263 | mic_smpt[i].ref_count += ref[i - spt]; |
| 264 | } |
| 265 | } |
| 266 | |
| 267 | dma_addr_t smpt_op(int bid, uint64_t dma_addr, |
| 268 | int entries, int64_t *ref) |
| 269 | { |
| 270 | int spt = -1; /* smpt index */ |
| 271 | int ee = 0; /* existing entries */ |
| 272 | int fe = 0; /* free entries */ |
| 273 | int i; |
| 274 | unsigned long flags; |
| 275 | dma_addr_t mic_addr = 0; |
| 276 | dma_addr_t addr = dma_addr; |
| 277 | mic_ctx_t *mic_ctx = get_per_dev_ctx(bid); |
| 278 | mic_smpt_t *mic_smpt = mic_ctx->mic_smpt; |
| 279 | |
| 280 | if (micpm_get_reference(mic_ctx, true)) |
| 281 | goto exit; |
| 282 | spin_lock_irqsave(&mic_ctx->smpt_lock, flags); |
| 283 | |
| 284 | /* find existing entries */ |
| 285 | for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) { |
| 286 | if (mic_smpt[i].dma_addr == addr) { |
| 287 | ee++; |
| 288 | addr += MIC_SYSTEM_PAGE_SIZE; |
| 289 | } |
| 290 | else if (ee) /* cannot find contiguous entries */ |
| 291 | goto not_found; |
| 292 | |
| 293 | if (ee == entries) |
| 294 | goto found; |
| 295 | } |
| 296 | |
| 297 | /* find free entry */ |
| 298 | #ifdef CONFIG_ML1OM |
| 299 | /* |
| 300 | * For KNF the SMPT registers are not host accessible so we maintain a |
| 301 | * 1:1 map for SMPT registers from 0-256GB i.e. the first 16 entries and |
| 302 | * look for SMPT entries for P2P and IB etc from the 16th entry onwards. |
| 303 | * This allows the KNF card to boot on Host systems with < 256GB system |
| 304 | * memory and access VNET/SCIF buffers without crashing. P2P and IB SMPT |
| 305 | * entries are setup after SCIF driver load/reload via SCIF Node QP |
| 306 | * SMPT_SET messages. |
| 307 | */ |
| 308 | for (i = NUM_SMPT_ENTRIES_IN_USE / 2 ; i < NUM_SMPT_ENTRIES_IN_USE; i++) { |
| 309 | #else |
| 310 | for (i = 0 ; i < NUM_SMPT_ENTRIES_IN_USE; i++) { |
| 311 | #endif |
| 312 | fe = (mic_smpt[i].ref_count == 0) ? fe + 1: 0; |
| 313 | if (fe == entries) |
| 314 | goto found; |
| 315 | } |
| 316 | |
| 317 | not_found: |
| 318 | spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags); |
| 319 | micpm_put_reference(mic_ctx); |
| 320 | exit: |
| 321 | return mic_addr; |
| 322 | found: |
| 323 | spt = i - entries + 1; |
| 324 | mic_addr = SMPT_TO_MIC_PA(spt); |
| 325 | add_smpt_entry(spt, ref, dma_addr, entries, mic_ctx); |
| 326 | spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags); |
| 327 | micpm_put_reference(mic_ctx); |
| 328 | return mic_addr; |
| 329 | } |
| 330 | |
| 331 | |
| 332 | /* |
| 333 | * Returns number of smpt entries needed for dma_addr to dma_addr + size |
| 334 | * also returns the reference count array for each of those entries |
| 335 | * and the starting smpt address |
| 336 | */ |
| 337 | int get_smpt_ref_count(int64_t *ref, dma_addr_t dma_addr, size_t size, |
| 338 | uint64_t *smpt_start) |
| 339 | { |
| 340 | uint64_t start = dma_addr; |
| 341 | uint64_t end = dma_addr + size; |
| 342 | int i = 0; |
| 343 | |
| 344 | while (start < end) { |
| 345 | ref[i++] = min(SMPT_ALIGN_HIGH(start + 1), end) - start; |
| 346 | start = SMPT_ALIGN_HIGH(start + 1); |
| 347 | } |
| 348 | |
| 349 | if (smpt_start) |
| 350 | *smpt_start = SMPT_ALIGN_LOW(dma_addr); |
| 351 | |
| 352 | return i; |
| 353 | } |
| 354 | |
| 355 | /* |
| 356 | * Maps dma_addr to dma_addr + size memory in the smpt table |
| 357 | * of board bid |
| 358 | */ |
| 359 | dma_addr_t mic_map(int bid, dma_addr_t dma_addr, size_t size) |
| 360 | { |
| 361 | dma_addr_t mic_addr = 0; |
| 362 | int entries; |
| 363 | int64_t ref[NUM_SMPT_ENTRIES_IN_USE]; |
| 364 | uint64_t smpt_start; |
| 365 | #if SMPT_LOGGING |
| 366 | unsigned long flags; |
| 367 | mic_ctx_t *mic_ctx = get_per_dev_ctx(bid); |
| 368 | spin_lock_irqsave(&mic_ctx->smpt_lock, flags); |
| 369 | map_count_g++; |
| 370 | smpt_ref_count_g[bid] += (int64_t)size; |
| 371 | spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags); |
| 372 | #endif |
| 373 | if (!size) |
| 374 | return mic_addr; |
| 375 | |
| 376 | /* |
| 377 | * Get number of smpt entries to be mapped, ref count array |
| 378 | * and the starting smpt address to start the search for |
| 379 | * free or existing smpt entries. |
| 380 | */ |
| 381 | entries = get_smpt_ref_count(ref, dma_addr, size, &smpt_start); |
| 382 | |
| 383 | /* Set the smpt table appropriately and get 16G aligned mic address */ |
| 384 | mic_addr = smpt_op(bid, smpt_start, entries, ref); |
| 385 | |
| 386 | /* |
| 387 | * If mic_addr is zero then its a error case |
| 388 | * since mic_addr can never be zero. |
| 389 | * else generate mic_addr by adding the 16G offset in dma_addr |
| 390 | */ |
| 391 | if (!mic_addr) { |
| 392 | WARN_ON(1); |
| 393 | return mic_addr; |
| 394 | } |
| 395 | else |
| 396 | return (mic_addr + (dma_addr & MIC_SYSTEM_PAGE_MASK)); |
| 397 | } |
| 398 | |
| 399 | /* |
| 400 | * Unmaps mic_addr to mic_addr + size memory in the smpt table |
| 401 | * of board bid |
| 402 | */ |
| 403 | void mic_unmap(int bid, dma_addr_t mic_addr, size_t size) |
| 404 | { |
| 405 | mic_ctx_t *mic_ctx = get_per_dev_ctx(bid); |
| 406 | mic_smpt_t *mic_smpt = mic_ctx->mic_smpt; |
| 407 | int64_t ref[NUM_SMPT_ENTRIES_IN_USE]; |
| 408 | int num_smpt; |
| 409 | int spt = HOSTMIC_PA_TO_SMPT(mic_addr); |
| 410 | int i; |
| 411 | unsigned long flags; |
| 412 | |
| 413 | if (!size) |
| 414 | return; |
| 415 | |
| 416 | if (!IS_MIC_SYSTEM_ADDR(mic_addr)) { |
| 417 | WARN_ON(1); |
| 418 | return; |
| 419 | } |
| 420 | |
| 421 | /* Get number of smpt entries to be mapped, ref count array */ |
| 422 | num_smpt = get_smpt_ref_count(ref, mic_addr, size, NULL); |
| 423 | |
| 424 | spin_lock_irqsave(&mic_ctx->smpt_lock, flags); |
| 425 | |
| 426 | #if SMPT_LOGGING |
| 427 | unmap_count_g++; |
| 428 | smpt_ref_count_g[bid] -= (int64_t)size; |
| 429 | #endif |
| 430 | |
| 431 | for (i = spt; i < spt + num_smpt; i++) { |
| 432 | mic_smpt[i].ref_count -= ref[i - spt]; |
| 433 | WARN_ON(mic_smpt[i].ref_count < 0); |
| 434 | } |
| 435 | spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags); |
| 436 | } |
| 437 | |
| 438 | dma_addr_t mic_to_dma_addr(int bid, dma_addr_t mic_addr) |
| 439 | { |
| 440 | mic_ctx_t *mic_ctx = get_per_dev_ctx(bid); |
| 441 | int spt = HOSTMIC_PA_TO_SMPT(mic_addr); |
| 442 | dma_addr_t dma_addr; |
| 443 | |
| 444 | if (!IS_MIC_SYSTEM_ADDR(mic_addr)) { |
| 445 | WARN_ON(1); |
| 446 | return 0; |
| 447 | } |
| 448 | dma_addr = mic_ctx->mic_smpt[spt].dma_addr + SMPT_OFFSET(mic_addr); |
| 449 | return dma_addr; |
| 450 | } |
| 451 | |
| 452 | #endif |
| 453 | |
| 454 | bool is_syspa(dma_addr_t pa) |
| 455 | { |
| 456 | return IS_MIC_SYSTEM_ADDR(pa); |
| 457 | } |