Updated `README.md` with instructions for building/using the kernel module.
[xeon-phi-kernel-module] / micscif / micscif_nodeqp.c
CommitLineData
800f879a
AT
1/*
2 * Copyright 2010-2017 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2,
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * Disclaimer: The codes contained in these modules may be specific to
14 * the Intel Software Development Platform codenamed Knights Ferry,
15 * and the Intel product codenamed Knights Corner, and are not backward
16 * compatible with other Intel products. Additionally, Intel will NOT
17 * support the codes or instruction set in future products.
18 *
19 * Intel offers no warranty of any kind regarding the code. This code is
20 * licensed on an "AS IS" basis and Intel is not obligated to provide
21 * any support, assistance, installation, training, or other services
22 * of any kind. Intel is also not obligated to provide any updates,
23 * enhancements or extensions. Intel specifically disclaims any warranty
24 * of merchantability, non-infringement, fitness for any particular
25 * purpose, and any other warranty.
26 *
27 * Further, Intel disclaims all liability of any kind, including but
28 * not limited to liability for infringement of any proprietary rights,
29 * relating to the use of the code, even if Intel is notified of the
30 * possibility of such liability. Except as expressly stated in an Intel
31 * license agreement provided with this code and agreed upon with Intel,
32 * no license, express or implied, by estoppel or otherwise, to any
33 * intellectual property rights is granted herein.
34 */
35
36#include "mic/micscif.h"
37#include "mic/micscif_smpt.h"
38#include "mic/micscif_nodeqp.h"
39#include "mic/micscif_intr.h"
40#include "mic/micscif_nm.h"
41#include "mic_common.h"
42#include "mic/micscif_map.h"
43
44#define SBOX_MMIO_LENGTH 0x10000
45/* FIXME: HW spefic, define someplace else */
46/* SBOX Offset in MMIO space */
47#define SBOX_OFFSET 0x10000
48
49#ifdef ENABLE_TEST
50static void micscif_qp_testboth(struct micscif_dev *scifdev);
51#endif
52
53bool mic_p2p_enable = 1;
54bool mic_p2p_proxy_enable = 1;
55
56void micscif_teardown_ep(void *endpt)
57{
58 struct endpt *ep = (struct endpt *)endpt;
59 struct micscif_qp *qp = ep->qp_info.qp;
60 if (qp) {
61 if (qp->outbound_q.rb_base)
62 scif_iounmap((void *)qp->outbound_q.rb_base,
63 qp->outbound_q.size, ep->remote_dev);
64 if (qp->remote_qp)
65 scif_iounmap((void *)qp->remote_qp,
66 sizeof(struct micscif_qp), ep->remote_dev);
67 if (qp->local_buf) {
68 unmap_from_aperture(
69 qp->local_buf,
70 ep->remote_dev, ENDPT_QP_SIZE);
71 }
72 if (qp->local_qp) {
73 unmap_from_aperture(qp->local_qp, ep->remote_dev,
74 sizeof(struct micscif_qp));
75 }
76 if (qp->inbound_q.rb_base)
77 kfree((void *)qp->inbound_q.rb_base);
78 kfree(qp);
79#ifdef _MIC_SCIF_
80 micscif_teardown_proxy_dma(endpt);
81#endif
82 WARN_ON(!list_empty(&ep->rma_info.task_list));
83 }
84}
85
86/*
87 * Enqueue the endpoint to the zombie list for cleanup.
88 * The endpoint should not be accessed once this API returns.
89 */
90void micscif_add_epd_to_zombie_list(struct endpt *ep, bool mi_eplock_held)
91{
92 unsigned long sflags = 0;
93
94 /*
95 * It is an error to call scif_close() on an endpoint on which a
96 * scif_range structure of that endpoint has not been returned
97 * after a call to scif_get_pages() via scif_put_pages().
98 */
99 if (SCIFEP_CLOSING == ep->state ||
100 SCIFEP_CLOSED == ep->state ||
101 SCIFEP_DISCONNECTED == ep->state)
102 BUG_ON(micscif_rma_list_get_pages_check(ep));
103
104 if (list_empty(&ep->rma_info.task_list) && ep->remote_dev)
105 wake_up(&ep->remote_dev->sd_mmap_wq);
106 if (!mi_eplock_held)
107 spin_lock_irqsave(&ms_info.mi_eplock, sflags);
108 spin_lock(&ep->lock);
109 ep->state = SCIFEP_ZOMBIE;
110 spin_unlock(&ep->lock);
111 list_add_tail(&ep->list, &ms_info.mi_zombie);
112 ms_info.mi_nr_zombies++;
113 if (!mi_eplock_held)
114 spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
115 queue_work(ms_info.mi_misc_wq, &ms_info.mi_misc_work);
116}
117
118/* Initializes "local" data structures for the QP
119 *
120 * Allocates the QP ring buffer (rb), initializes the "in bound" queue
121 * For the host generate bus addresses for QP rb & qp, in the card's case
122 * map these into the pci aperture
123 */
124int micscif_setup_qp_connect(struct micscif_qp *qp, dma_addr_t *qp_offset,
125 int local_size, struct micscif_dev *scifdev)
126{
127 void *local_q = NULL;
128 int err = 0;
129 volatile uint32_t tmp_rd;
130
131 spin_lock_init(&qp->qp_send_lock);
132 spin_lock_init(&qp->qp_recv_lock);
133
134 if (!qp->inbound_q.rb_base) {
135 /* we need to allocate the local buffer for the incoming queue */
136 local_q = kzalloc(local_size, GFP_ATOMIC);
137 if (!local_q) {
138 printk(KERN_ERR "Ring Buffer Allocation Failed\n");
139 err = -ENOMEM;
140 return err;
141 }
142 /* to setup the inbound_q, the buffer lives locally (local_q),
143 * the read pointer is remote (in remote_qp's local_read)
144 * the write pointer is local (in local_write)
145 */
146 tmp_rd = 0;
147 micscif_rb_init(&qp->inbound_q,
148 &tmp_rd, /* No read ptr right now ... */
149 &(scifdev->qpairs[0].local_write),
150 (volatile void *) local_q,
151 local_size);
152 qp->inbound_q.read_ptr = NULL; /* it is unsafe to use the ring buffer until this changes! */
153 }
154
155 if (!qp->local_buf) {
156 err = map_virt_into_aperture(&qp->local_buf, local_q, scifdev, local_size);
157 if (err) {
158 printk(KERN_ERR "%s %d error %d\n",
159 __func__, __LINE__, err);
160 return err;
161 }
162 }
163
164 if (!qp->local_qp) {
165 err = map_virt_into_aperture(qp_offset, qp, scifdev, sizeof(struct micscif_qp));
166 if (err) {
167 printk(KERN_ERR "%s %d error %d\n",
168 __func__, __LINE__, err);
169 return err;
170 }
171 qp->local_qp = *qp_offset;
172 } else {
173 *qp_offset = qp->local_qp;
174 }
175 return err;
176}
177
178/* When the other side has already done it's allocation, this is called */
179/* TODO: Replace reads that go across the bus somehow ... */
180int micscif_setup_qp_accept(struct micscif_qp *qp, dma_addr_t *qp_offset, dma_addr_t phys, int local_size, struct micscif_dev *scifdev)
181{
182 void *local_q;
183 volatile void *remote_q;
184 struct micscif_qp *remote_qp;
185 int remote_size;
186 int err = 0;
187
188 spin_lock_init(&qp->qp_send_lock);
189 spin_lock_init(&qp->qp_recv_lock);
190 /* Start by figuring out where we need to point */
191 remote_qp = scif_ioremap(phys, sizeof(struct micscif_qp), scifdev);
192 qp->remote_qp = remote_qp;
193 qp->remote_buf = remote_qp->local_buf;
194 /* To setup the outbound_q, the buffer lives in remote memory (at scifdev->bs->buf phys),
195 * the read pointer is local (in local's local_read)
196 * the write pointer is remote (In remote_qp's local_write)
197 */
198 remote_size = qp->remote_qp->inbound_q.size; /* TODO: Remove this read for p2p */
199 remote_q = scif_ioremap(qp->remote_buf, remote_size, scifdev);
200
201 BUG_ON(qp->remote_qp->magic != SCIFEP_MAGIC);
202
203 qp->remote_qp->local_write = 0;
204 micscif_rb_init(&(qp->outbound_q),
205 &(qp->local_read), /*read ptr*/
206 &(qp->remote_qp->local_write), /*write ptr*/
207 remote_q, /*rb_base*/
208 remote_size);
209 /* to setup the inbound_q, the buffer lives locally (local_q),
210 * the read pointer is remote (in remote_qp's local_read)
211 * the write pointer is local (in local_write)
212 */
213 local_q = kzalloc(local_size, GFP_KERNEL);
214 if (!local_q) {
215 printk(KERN_ERR "Ring Buffer Allocation Failed\n");
216 err = -ENOMEM;
217 return err;
218 }
219
220 qp->remote_qp->local_read = 0;
221 micscif_rb_init(&(qp->inbound_q),
222 &(qp->remote_qp->local_read),
223 &(qp->local_write),
224 local_q,
225 local_size);
226 err = map_virt_into_aperture(&qp->local_buf, local_q, scifdev, local_size);
227 if (err) {
228 printk(KERN_ERR "%s %d error %d\n",
229 __func__, __LINE__, err);
230 return err;
231 }
232 err = map_virt_into_aperture(qp_offset, qp, scifdev, sizeof(struct micscif_qp));
233 if (err) {
234 printk(KERN_ERR "%s %d error %d\n",
235 __func__, __LINE__, err);
236 return err;
237 }
238 qp->local_qp = *qp_offset;
239 return err;
240}
241
242int micscif_setup_qp_connect_response(struct micscif_dev *scifdev, struct micscif_qp *qp, uint64_t payload)
243{
244 int err = 0;
245 void *r_buf;
246 int remote_size;
247 phys_addr_t tmp_phys;
248
249 qp->remote_qp = scif_ioremap(payload, sizeof(struct micscif_qp), scifdev);
250
251 if (!qp->remote_qp) {
252 err = -ENOMEM;
253 goto error;
254 }
255
256 if (qp->remote_qp->magic != SCIFEP_MAGIC) {
257 printk(KERN_ERR "SCIFEP_MAGIC doesnot match between node %d "
258 "(self) and %d (remote)\n", scif_dev[ms_info.mi_nodeid].sd_node,
259 scifdev->sd_node);
260 WARN_ON(1);
261 err = -ENODEV;
262 goto error;
263 }
264
265 tmp_phys = readq(&(qp->remote_qp->local_buf));
266 remote_size = readl(&qp->remote_qp->inbound_q.size);
267 r_buf = scif_ioremap(tmp_phys, remote_size, scifdev);
268
269#if 0
270 pr_debug("payload = 0x%llx remote_qp = 0x%p tmp_phys=0x%llx \
271 remote_size=%d r_buf=%p\n", payload, qp->remote_qp,
272 tmp_phys, remote_size, r_buf);
273#endif
274
275 micscif_rb_init(&(qp->outbound_q),
276 &(qp->local_read),
277 &(qp->remote_qp->local_write),
278 r_buf,
279 remote_size);
280 /* resetup the inbound_q now that we know where the inbound_read really is */
281 micscif_rb_init(&(qp->inbound_q),
282 &(qp->remote_qp->local_read),
283 &(qp->local_write),
284 qp->inbound_q.rb_base,
285 qp->inbound_q.size);
286error:
287 return err;
288}
289
290#ifdef _MIC_SCIF_
291extern int micscif_send_host_intr(struct micscif_dev *, uint32_t);
292
293int micscif_send_host_intr(struct micscif_dev *dev, uint32_t doorbell)
294{
295 uint32_t db_reg;
296
297 if (doorbell > 3)
298 return -EINVAL;
299
300 db_reg = readl(dev->mm_sbox +
301 (SBOX_SDBIC0 + (4 * doorbell))) | SBOX_SDBIC0_DBREQ_BIT;
302 writel(db_reg, dev->mm_sbox + (SBOX_SDBIC0 + (4 * doorbell)));
303 return 0;
304}
305#endif
306
307/*
308 * Interrupts remote mic
309 */
310static void
311micscif_send_mic_intr(struct micscif_dev *dev)
312{
313 /* Writes to RDMASR triggers the interrupt */
314 writel(0, (uint8_t *)dev->mm_sbox + dev->sd_rdmasr);
315}
316
317/* scifdev - remote scif device
318 * also needs the local scif device so that we can decide which RMASR
319 * to target on the remote mic
320 */
321static __always_inline void
322scif_send_msg_intr(struct micscif_dev *scifdev)
323{
324#ifdef _MIC_SCIF_
325 if (scifdev == &scif_dev[0])
326 micscif_send_host_intr(scifdev, 0);
327 else
328#endif
329 micscif_send_mic_intr(scifdev);
330}
331
332#ifdef _MIC_SCIF_
333int micscif_setup_card_qp(phys_addr_t host_phys, struct micscif_dev *scifdev)
334{
335 int local_size;
336 dma_addr_t qp_offset;
337 int err = 0;
338 struct nodemsg tmp_msg;
339 uint16_t host_scif_ver;
340
341 pr_debug("Got 0x%llx from the host\n", host_phys);
342
343 local_size = NODE_QP_SIZE;
344
345 /* FIXME: n_qpairs is always 1 OK to get rid of it ? */
346 scifdev->n_qpairs = 1;
347 scifdev->qpairs = kzalloc(sizeof(struct micscif_qp), GFP_KERNEL);
348 if (!scifdev->qpairs) {
349 printk(KERN_ERR "Node QP Allocation failed\n");
350 err = -ENOMEM;
351 return err;
352 }
353
354 scifdev->qpairs->magic = SCIFEP_MAGIC;
355 pr_debug("micscif_card(): called qp_accept\n");
356 err = micscif_setup_qp_accept(&scifdev->qpairs[0], &qp_offset, host_phys, local_size, scifdev);
357
358 if (!err) {
359 host_scif_ver = readw(&(&scifdev->qpairs[0])->remote_qp->scif_version);
360 if (host_scif_ver != SCIF_VERSION) {
361 printk(KERN_ERR "Card and host SCIF versions do not match. \n");
362 printk(KERN_ERR "Card version: %u, Host version: %u \n",
363 SCIF_VERSION, host_scif_ver);
364 err = -ENXIO;
365 goto error_exit;
366 }
367 /* now that everything is setup and mapped, we're ready to tell the
368 * host where our queue's location
369 */
370 tmp_msg.uop = SCIF_INIT;
371 tmp_msg.payload[0] = qp_offset;
372 tmp_msg.payload[1] = get_rdmasr_offset(scifdev->sd_intr_handle);
373 tmp_msg.dst.node = 0; /* host */
374
375 pr_debug("micscif_setup_card_qp: micscif_setup_qp_accept, INIT message\n");
376 err = micscif_nodeqp_send(scifdev, &tmp_msg, NULL);
377 }
378error_exit:
379 if (err)
380 printk(KERN_ERR "%s %d error %d\n",
381 __func__, __LINE__, err);
382 return err;
383}
384
385
386void micscif_send_exit(void)
387{
388 struct nodemsg msg;
389 struct micscif_dev *scifdev = &scif_dev[SCIF_HOST_NODE];
390
391 init_waitqueue_head(&ms_info.mi_exitwq);
392
393 msg.uop = SCIF_EXIT;
394 msg.src.node = ms_info.mi_nodeid;
395 msg.dst.node = scifdev->sd_node;
396 /* No error handling for Host SCIF device */
397 micscif_nodeqp_send(scifdev, &msg, NULL);
398}
399
400#else /* !_MIC_SCIF_ */
401static uint32_t tmp_r_ptr;
402int micscif_setup_host_qp(mic_ctx_t *mic_ctx, struct micscif_dev *scifdev)
403{
404 int err = 0;
405 int local_size;
406
407 /* Bail out if the node QP is already setup */
408 if (scifdev->qpairs)
409 return err;
410
411 local_size = NODE_QP_SIZE;
412
413 /* for now, assume that we only have one queue-pair -- with the host */
414 scifdev->n_qpairs = 1;
415 scifdev->qpairs = (struct micscif_qp *)kzalloc(sizeof(struct micscif_qp), GFP_ATOMIC);
416 if (!scifdev->qpairs) {
417 printk(KERN_ERR "Node QP Allocation failed\n");
418 err = -ENOMEM;
419 return err;
420 }
421
422 scifdev->qpairs->magic = SCIFEP_MAGIC;
423 scifdev->qpairs->scif_version = SCIF_VERSION;
424 err = micscif_setup_qp_connect(&scifdev->qpairs[0], &(mic_ctx->bi_scif.si_pa), local_size, scifdev);
425 /* fake the read pointer setup so we can use the inbound q */
426 scifdev->qpairs[0].inbound_q.read_ptr = &tmp_r_ptr;
427
428 /* We're as setup as we can be ... the inbound_q is setup, w/o
429 * a usable outbound q. When we get a message, the read_ptr will
430 * be updated, so we know there's something here. When that happens,
431 * we finish the setup (just point the write pointer to the real
432 * write pointer that lives on the card), and pull the message off
433 * the card.
434 * Tell the card where we are.
435 */
436 printk("My Phys addrs: 0x%llx and scif_addr 0x%llx\n", scifdev->qpairs[0].local_buf,
437 mic_ctx->bi_scif.si_pa);
438
439 if (err) printk(KERN_ERR "%s %d error %d\n", __func__, __LINE__, err);
440 return err;
441}
442
443
444/* FIXME: add to header */
445struct scatterlist * micscif_p2p_mapsg(void *va, int page_size, int page_cnt);
446void micscif_p2p_freesg(struct scatterlist *);
447mic_ctx_t* get_per_dev_ctx(uint16_t node);
448
449/* Init p2p mappings required to access peerdev from scifdev */
450static struct scif_p2p_info *
451init_p2p_info(struct micscif_dev *scifdev, struct micscif_dev *peerdev)
452{
453 struct _mic_ctx_t *mic_ctx_peer;
454 struct _mic_ctx_t *mic_ctx;
455 struct scif_p2p_info *p2p;
456 int num_mmio_pages;
457 int num_aper_pages;
458
459 mic_ctx = get_per_dev_ctx(scifdev->sd_node - 1);
460 mic_ctx_peer = get_per_dev_ctx(peerdev->sd_node - 1);
461
462 num_mmio_pages = (int) (mic_ctx_peer->mmio.len >> PAGE_SHIFT);
463 num_aper_pages = (int) (mic_ctx_peer->aper.len >> PAGE_SHIFT);
464
465 // First map the peer board addresses into the new board
466 p2p = kzalloc(sizeof(struct scif_p2p_info), GFP_KERNEL);
467
468 if (p2p){
469 int sg_page_shift = get_order(min(mic_ctx_peer->aper.len,(uint64_t)(1 << 30)));
470 /* FIXME: check return codes below */
471 p2p->ppi_sg[PPI_MMIO] = micscif_p2p_mapsg(mic_ctx_peer->mmio.va, PAGE_SIZE,
472 num_mmio_pages);
473 p2p->sg_nentries[PPI_MMIO] = num_mmio_pages;
474 p2p->ppi_sg[PPI_APER] = micscif_p2p_mapsg(mic_ctx_peer->aper.va, 1 << sg_page_shift,
475 num_aper_pages >> (sg_page_shift - PAGE_SHIFT));
476 p2p->sg_nentries[PPI_APER] = num_aper_pages >> (sg_page_shift - PAGE_SHIFT);
477
478 pci_map_sg(mic_ctx->bi_pdev, p2p->ppi_sg[PPI_MMIO], num_mmio_pages, PCI_DMA_BIDIRECTIONAL);
479 pci_map_sg(mic_ctx->bi_pdev, p2p->ppi_sg[PPI_APER],
480 num_aper_pages >> (sg_page_shift - PAGE_SHIFT), PCI_DMA_BIDIRECTIONAL);
481
482 p2p->ppi_pa[PPI_MMIO] = sg_dma_address(p2p->ppi_sg[PPI_MMIO]);
483 p2p->ppi_pa[PPI_APER] = sg_dma_address(p2p->ppi_sg[PPI_APER]);
484 p2p->ppi_len[PPI_MMIO] = num_mmio_pages;
485 p2p->ppi_len[PPI_APER] = num_aper_pages;
486 p2p->ppi_disc_state = SCIFDEV_RUNNING;
487 p2p->ppi_peer_id = peerdev->sd_node;
488
489 }
490 return (p2p);
491}
492
493
494int micscif_setuphost_response(struct micscif_dev *scifdev, uint64_t payload)
495{
496 int read_size;
497 struct nodemsg msg;
498 int err = 0;
499
500 pr_debug("micscif_setuphost_response: scif node %d\n", scifdev->sd_node);
501 err = micscif_setup_qp_connect_response(scifdev, &scifdev->qpairs[0], payload);
502 if (err) {
503 printk(KERN_ERR "%s %d error %d\n", __func__, __LINE__, err);
504 return err;
505 }
506 /* re-recieve the bootstrap message after re-init call */
507 pr_debug("micscif_host(): reading INIT message after re-init call\n");
508 read_size = micscif_rb_get_next(&(scifdev->qpairs[0].inbound_q), &msg,
509 sizeof(struct nodemsg));
510 micscif_rb_update_read_ptr(&(scifdev->qpairs[0].inbound_q));
511
512 scifdev->sd_rdmasr = (uint32_t)msg.payload[1];
513
514 /* for testing, send a message back to the card */
515 msg.uop = SCIF_INIT;
516 msg.payload[0] = 0xdeadbeef;
517 msg.dst.node = scifdev->sd_node; /* card */
518 if ((err = micscif_nodeqp_send(scifdev, &msg, NULL))) {
519 printk(KERN_ERR "%s %d error %d\n", __func__, __LINE__, err);
520 return err;
521 }
522
523#ifdef ENABLE_TEST
524 /* Launch the micscif_rb test */
525 pr_debug("micscif_host(): starting TEST\n");
526 micscif_qp_testboth(scifdev);
527#endif
528
529 /*
530 * micscif_nodeqp_intrhandler(..) increments the ref_count before calling
531 * this API hence clamp the scif_ref_cnt to 1. This is required to
532 * handle the SCIF module load/unload case on MIC. The SCIF_EXIT message
533 * keeps the ref_cnt clamped to SCIF_NODE_IDLE during module unload.
534 * Setting the ref_cnt to 1 during SCIF_INIT ensures that the ref_cnt
535 * returns back to 0 once SCIF module load completes.
536 */
537#ifdef SCIF_ENABLE_PM
538 scifdev->scif_ref_cnt = (atomic_long_t) ATOMIC_LONG_INIT(1);
539#endif
540 mutex_lock(&ms_info.mi_conflock);
541 ms_info.mi_mask |= 0x1 << scifdev->sd_node;
542 ms_info.mi_maxid = SCIF_MAX(scifdev->sd_node, ms_info.mi_maxid);
543 ms_info.mi_total++;
544 scifdev->sd_state = SCIFDEV_RUNNING;
545 mutex_unlock(&ms_info.mi_conflock);
546
547 micscif_node_add_callback(scifdev->sd_node);
548 return err;
549}
550
551void
552micscif_removehost_respose(struct micscif_dev *scifdev, struct nodemsg *msg)
553{
554 mic_ctx_t *mic_ctx = get_per_dev_ctx(scifdev->sd_node -1);
555 int err;
556
557 if (scifdev->sd_state != SCIFDEV_RUNNING)
558 return;
559
560 micscif_stop(mic_ctx);
561
562 if ((err = micscif_nodeqp_send(scifdev, msg, NULL)))
563 printk(KERN_ERR "%s %d error %d\n", __func__, __LINE__, err);
564
565 scifdev->sd_state = SCIFDEV_INIT;
566}
567#endif
568
569/* TODO: Fix the non-symmetric use of micscif_dev on the host and the card. Right
570 * now, the card's data structures are shaping up such that there is a single
571 * micscif_dev structure with multiple qp's. The host ends up with multiple
572 * micscif_devs (one per card). We should unify the way this will work.
573 */
574static struct micscif_qp *micscif_nodeqp_find(struct micscif_dev *scifdev, uint8_t node)
575{
576 struct micscif_qp *qp = NULL;
577#ifdef _MIC_SCIF_
578 /* This is also a HACK. Even though the code is identical with the host right
579 * now, I broke it into two parts because they will likely not be identical
580 * moving forward
581 */
582 qp = scifdev->qpairs;
583#else
584 /* HORRIBLE HACK! Since we only have one card, and one scifdev, we
585 * can just grab the scifdev->qp to find the qp. We don't actually have to
586 * do any kind of looking for it
587 */
588 qp = scifdev->qpairs;
589#endif /* !_MIC_SCIF_ */
590 return qp;
591}
592
593static char *scifdev_state[] = {"SCIFDEV_NOTPRESENT",
594 "SCIFDEV_INIT",
595 "SCIFDEV_RUNNING",
596 "SCIFDEV_SLEEPING",
597 "SCIFDEV_STOPPING",
598 "SCIFDEV_STOPPED"};
599
600static char *message_types[] = {"BAD",
601 "INIT",
602 "EXIT",
603 "SCIF_NODE_ADD",
604 "SCIF_NODE_ADD_ACK",
605 "CNCT_REQ",
606 "CNCT_GNT",
607 "CNCT_GNTACK",
608 "CNCT_GNTNACK",
609 "CNCT_REJ",
610 "CNCT_TERM",
611 "TERM_ACK",
612 "DISCNCT",
613 "DISCNT_ACK",
614 "REGISTER",
615 "REGISTER_ACK",
616 "REGISTER_NACK",
617 "UNREGISTER",
618 "UNREGISTER_ACK",
619 "UNREGISTER_NACK",
620 "ALLOC_REQ",
621 "ALLOC_GNT",
622 "ALLOC_REJ",
623 "FREE_PHYS",
624 "FREE_VIRT",
625 "CLIENT_SENT",
626 "CLIENT_RCVD",
627 "MUNMAP",
628 "MARK",
629 "MARK_ACK",
630 "MARK_NACK",
631 "WAIT",
632 "WAIT_ACK",
633 "WAIT_NACK",
634 "SIGNAL_LOCAL",
635 "SIGNAL_REMOTE",
636 "SIG_ACK",
637 "SIG_NACK",
638 "MAP_GTT",
639 "MAP_GTT_ACK",
640 "MAP_GTT_NACK",
641 "UNMAP_GTT",
642 "CREATE_NODE_DEP",
643 "DESTROY_NODE_DEP",
644 "REMOVE_NODE",
645 "REMOVE_NODE_ACK",
646 "WAKE_UP_NODE",
647 "WAKE_UP_NODE_ACK",
648 "WAKE_UP_NODE_NACK",
649 "SCIF_NODE_ALIVE",
650 "SCIF_NODE_ALIVE_ACK",
651 "SCIF_SMPT",
652 "SCIF_GTT_DMA_MAP",
653 "SCIF_GTT_DMA_ACK",
654 "SCIF_GTT_DMA_NACK",
655 "SCIF_GTT_DMA_UNMAP",
656 "SCIF_PROXY_DMA",
657 "SCIF_PROXY_ORDERED_DMA",
658 "SCIF_NODE_CONNECT",
659 "SCIF_NODE_CONNECT_NACK",
660 "SCIF_NODE_ADD_NACK",
661 "SCIF_GET_NODE_INFO",
662 "TEST"};
663
664static void
665micscif_display_message(struct micscif_dev *scifdev, struct nodemsg *msg,
666 const char *label)
667{
668 if (!ms_info.en_msg_log)
669 return;
670 if (msg->uop > SCIF_MAX_MSG) {
671 pr_debug("%s: unknown msg type %d\n", label, msg->uop);
672 return;
673 }
674 if (msg->uop == SCIF_TEST)
675 return;
676
677 printk("%s: %s msg type %s, src %d:%d, dest %d:%d "
678 "payload 0x%llx:0x%llx:0x%llx:0x%llx\n",
679 label, scifdev_state[scifdev->sd_state],
680 message_types[msg->uop], msg->src.node, msg->src.port,
681 msg->dst.node, msg->dst.port, msg->payload[0], msg->payload[1],
682 msg->payload[2], msg->payload[3]);
683}
684
685/**
686 * micscif_nodeqp_send - Send a message on the Node Qp.
687 * @scifdev: Scif Device.
688 * @msg: The message to be sent.
689 *
690 * This function will block till a message is not sent to the destination
691 * scif device.
692 */
693int micscif_nodeqp_send(struct micscif_dev *scifdev,
694 struct nodemsg *msg, struct endpt *ep)
695{
696 struct micscif_qp *qp;
697 int err = -ENOMEM, loop_cnt = 0;
698
699 if (oops_in_progress ||
700 (SCIF_INIT != msg->uop &&
701 SCIF_EXIT != msg->uop &&
702 SCIFDEV_RUNNING != scifdev->sd_state &&
703 SCIFDEV_SLEEPING != scifdev->sd_state) ||
704 (ep && SCIFDEV_STOPPED == ep->sd_state)) {
705 err = -ENODEV;
706 goto error;
707 }
708
709 micscif_display_message(scifdev, msg, "Sent");
710
711 qp = micscif_nodeqp_find(scifdev, (uint8_t)msg->dst.node);
712 if (!qp) {
713 err = -EINVAL;
714 goto error;
715 }
716 spin_lock(&qp->qp_send_lock);
717
718 while ((err = micscif_rb_write(&qp->outbound_q,
719 msg, sizeof(struct nodemsg)))) {
720 cpu_relax();
721 mdelay(1);
722 if (loop_cnt++ > (NODEQP_SEND_TO_MSEC)) {
723 err = -ENODEV;
724 break;
725 }
726 }
727 if (!err)
728 micscif_rb_commit(&qp->outbound_q);
729 spin_unlock(&qp->qp_send_lock);
730 if (!err) {
731 if (is_self_scifdev(scifdev))
732 /*
733 * For loopback we need to emulate an interrupt by queueing
734 * work for the queue handling real Node Qp interrupts.
735 */
736
737 queue_work(scifdev->sd_intr_wq, &scifdev->sd_intr_bh);
738 else
739 scif_send_msg_intr(scifdev);
740 }
741error:
742 if (err)
743 pr_debug("%s %d error %d uop %d\n",
744 __func__, __LINE__, err, msg->uop);
745 return err;
746}
747
748/* TODO: Make this actually figure out where the interrupt came from. For host, it can
749 * be a little easier (one "vector" per board). For the cards, we'll have to do some
750 * scanning, methinks
751 */
752struct micscif_qp *micscif_nodeqp_nextmsg(struct micscif_dev *scifdev)
753{
754 return &scifdev->qpairs[0];
755}
756
757/*
758 * micscif_misc_handler:
759 *
760 * Work queue handler for servicing miscellaneous SCIF tasks.
761 * Examples include:
762 * 1) Remote fence requests.
763 * 2) Destruction of temporary registered windows
764 * created during scif_vreadfrom()/scif_vwriteto().
765 * 3) Cleanup of zombie endpoints.
766 */
767void micscif_misc_handler(struct work_struct *work)
768{
769 micscif_rma_handle_remote_fences();
770 micscif_rma_destroy_temp_windows();
771#ifdef _MIC_SCIF_
772 vm_unmap_aliases();
773#endif
774 micscif_rma_destroy_tcw_invalid(&ms_info.mi_rma_tc);
775 micscif_cleanup_zombie_epd();
776}
777
778/**
779 * scif_init_resp() - Respond to SCIF_INIT interrupt message
780 * @scifdev: Other node device to respond to
781 * @msg: Interrupt message
782 *
783 * Loading the driver on the MIC card sends an INIT message to the host
784 * with the PCI bus memory information it needs. This function receives
785 * that message, finishes its intialization and echoes it back to the card.
786 *
787 * When the card receives the message this function starts a connection test.
788 */
789static __always_inline void
790scif_init_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
791{
792#ifdef _MIC_SCIF_
793 if (msg->payload[0] != 0xdeadbeef)
794 printk(KERN_ERR "Bad payload 0x%llx\n", msg->payload[0]);
795#ifdef ENABLE_TEST
796 else
797 micscif_qp_testboth(scifdev);
798#endif
799#else
800 pr_debug("scifhost(): sending response to INIT\n");
801 micscif_setuphost_response(scifdev, msg->payload[0]);
802 atomic_set(&scifdev->sd_node_alive, 0);
803 if (scifdev->sd_ln_wq)
804 queue_delayed_work(scifdev->sd_ln_wq,
805 &scifdev->sd_watchdog_work, NODE_ALIVE_TIMEOUT);
806#endif
807}
808
809/**
810 * scif_exit_resp() - Respond to SCIF_EXIT interrupt message
811 * @scifdev: Other node device to respond to
812 * @msg: Interrupt message
813 *
814 * Loading the driver on the MIC card sends an INIT message to the host
815 * with the PCI bus memory information it needs. This function receives
816 * that message, finishes its intialization and echoes it back to the card.
817 *
818 * When the card receives the message this function starts a connection test.
819 */
820static __always_inline void
821scif_exit_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
822{
823#ifdef _MIC_SCIF_
824 printk("card: scif node %d exiting\n", ms_info.mi_nodeid);
825 scif_dev[ms_info.mi_nodeid].sd_state = SCIFDEV_STOPPED;
826 wake_up(&ms_info.mi_exitwq);
827#else
828 printk("host: scif node %d exiting\n", msg->src.node);
829 /* The interrupt handler that received the message would have
830 * bumped up the ref_cnt by 1. micscif_removehost_response
831 * calls micscif_cleanup_scifdev which loops forever for the ref_cnt
832 * to drop to 0 thereby leading to a soft lockup. To prevent
833 * that, decrement the ref_cnt here.
834 */
835 micscif_dec_node_refcnt(scifdev, 1);
836 micscif_removehost_respose(scifdev, msg);
837 /* increment the ref_cnt here. The interrupt handler will now
838 * decrement it, leaving the ref_cnt to 0 if everything
839 * works as expected. Note that its not absolutely necessary
840 * to do this execpt to make sure ref_cnt is 0 and to catch
841 * errors that may happen if ref_cnt drops to a negative value.
842 */
843 micscif_inc_node_refcnt(scifdev, 1);
844
845#endif
846}
847
848/**
849 * scif_nodeadd_resp() - Respond to SCIF_NODE_ADD interrupt message
850 * @scifdev: Other node device to respond to
851 * @msg: Interrupt message
852 *
853 * When the host driver has finished initializing a MIC node queue pair it
854 * marks the board as online. It then looks for all currently online MIC
855 * cards and send a SCIF_NODE_ADD message to identify the ID of the new card for
856 * peer to peer initialization
857 *
858 * The local node allocates its incoming queue and sends its address in the
859 * SCIF_NODE_ADD_ACK message back to the host, the host "reflects" this message
860 * to the new node
861 */
862static __always_inline void
863scif_nodeadd_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
864{
865#ifdef _MIC_SCIF_
866 struct micscif_dev *newdev;
867 dma_addr_t qp_offset;
868 int qp_connect;
869
870 pr_debug("Scifdev %d:%d received NODE_ADD msg for node %d\n",
871 scifdev->sd_node, msg->dst.node, msg->src.node);
872 pr_debug("Remote address for this node's aperture %llx\n",
873 msg->payload[0]);
874 printk("Remote node's sbox %llx\n", msg->payload[1]);
875
876 newdev = &scif_dev[msg->src.node];
877 newdev->sd_node = msg->src.node;
878
879 if (micscif_setup_interrupts(newdev)) {
880 printk(KERN_ERR "failed to setup interrupts for %d\n", msg->src.node);
881 goto interrupt_setup_error;
882 }
883
884 newdev->mm_sbox = ioremap_nocache(msg->payload[1] + SBOX_OFFSET, SBOX_MMIO_LENGTH);
885
886 if (!newdev->mm_sbox) {
887 printk(KERN_ERR "failed to map mmio for %d\n", msg->src.node);
888 goto mmio_map_error;
889 }
890
891 if (!(newdev->qpairs = kzalloc(sizeof(struct micscif_qp), GFP_KERNEL))) {
892 printk(KERN_ERR "failed to allocate qpair for %d\n", msg->src.node);
893 goto qp_alloc_error;
894 }
895
896 /* Set the base address of the remote node's memory since it gets
897 * added to qp_offset
898 */
899 newdev->sd_base_addr = msg->payload[0];
900
901 if ((qp_connect = micscif_setup_qp_connect(newdev->qpairs, &qp_offset,
902 NODE_QP_SIZE, newdev))) {
903 printk(KERN_ERR "failed to setup qp_connect %d\n", qp_connect);
904 goto qp_connect_error;
905 }
906
907 if (register_scif_intr_handler(newdev))
908 goto qp_connect_error;
909
910 newdev->scif_ref_cnt = (atomic_long_t) ATOMIC_LONG_INIT(0);
911 micscif_node_add_callback(msg->src.node);
912 newdev->qpairs->magic = SCIFEP_MAGIC;
913 newdev->qpairs->qp_state = QP_OFFLINE;
914 wmb();
915
916 msg->uop = SCIF_NODE_ADD_ACK;
917 msg->dst.node = msg->src.node;
918 msg->src.node = ms_info.mi_nodeid;
919 msg->payload[0] = qp_offset;
920 msg->payload[2] = get_rdmasr_offset(newdev->sd_intr_handle);
921 msg->payload[3] = scif_dev[ms_info.mi_nodeid].sd_numa_node;
922 micscif_nodeqp_send(&scif_dev[SCIF_HOST_NODE], msg, NULL);
923 return;
924
925qp_connect_error:
926 kfree(newdev->qpairs);
927 newdev->qpairs = NULL;
928qp_alloc_error:
929 iounmap(newdev->mm_sbox);
930 newdev->mm_sbox = NULL;
931mmio_map_error:
932interrupt_setup_error:
933 printk(KERN_ERR "node add failed for node %d\n", msg->src.node);
934 /*
935 * Update self with NODE ADD failure and send
936 * nack to update the peer.
937 */
938 mutex_lock(&newdev->sd_lock);
939 newdev->sd_state = SCIFDEV_NOTPRESENT;
940 mutex_unlock(&newdev->sd_lock);
941 wake_up_interruptible(&newdev->sd_p2p_wq);
942 msg->uop = SCIF_NODE_ADD_NACK;
943 msg->dst.node = msg->src.node;
944 msg->src.node = ms_info.mi_nodeid;
945 micscif_nodeqp_send(&scif_dev[SCIF_HOST_NODE], msg, NULL);
946#endif
947}
948
949#ifdef _MIC_SCIF_
950static inline void scif_p2pdev_uninit(struct micscif_dev *peerdev)
951{
952 deregister_scif_intr_handler(peerdev);
953 iounmap(peerdev->mm_sbox);
954 mutex_lock(&peerdev->sd_lock);
955 peerdev->sd_state = SCIFDEV_NOTPRESENT;
956 mutex_unlock(&peerdev->sd_lock);
957}
958
959void scif_poll_qp_state(struct work_struct *work)
960{
961#define NODE_QP_RETRY 100
962 struct micscif_dev *peerdev = container_of(work, struct micscif_dev,
963 sd_p2p_dwork.work);
964 struct micscif_qp *qp = &peerdev->qpairs[0];
965
966 if (SCIFDEV_RUNNING != peerdev->sd_state)
967 return;
968 if (qp->qp_state == QP_OFFLINE) {
969 if (peerdev->sd_p2p_retry++ == NODE_QP_RETRY) {
970 printk(KERN_ERR "Warning: QP check timeout with "
971 "state %d\n", qp->qp_state);
972 goto timeout;
973 }
974 schedule_delayed_work(&peerdev->sd_p2p_dwork,
975 msecs_to_jiffies(NODE_QP_TIMEOUT));
976 return;
977 }
978 wake_up(&peerdev->sd_p2p_wq);
979 return;
980timeout:
981 printk(KERN_ERR "%s %d remote node %d offline, state = 0x%x\n",
982 __func__, __LINE__, peerdev->sd_node, qp->qp_state);
983 micscif_inc_node_refcnt(peerdev, 1);
984 qp->remote_qp->qp_state = QP_OFFLINE;
985 micscif_dec_node_refcnt(peerdev, 1);
986 scif_p2pdev_uninit(peerdev);
987 wake_up(&peerdev->sd_p2p_wq);
988}
989#endif
990
991/**
992 * scif_nodeaddack_resp() - Respond to SCIF_NODE_ADD_ACK interrupt message
993 * @scifdev: Other node device to respond to
994 * @msg: Interrupt message
995 *
996 * After a MIC node receives the SCIF_LINK_ADD_ACK message it send this
997 * message to the host to confirm the sequeuce is finished.
998 *
999 */
1000static __always_inline void
1001scif_nodeaddack_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
1002{
1003#ifdef _MIC_SCIF_
1004 struct micscif_dev *peerdev;
1005 struct micscif_qp *qp;
1006#else
1007 struct micscif_dev *dst_dev = &scif_dev[msg->dst.node];
1008#endif
1009 pr_debug("Scifdev %d received SCIF_NODE_ADD_ACK msg for src %d dst %d\n",
1010 scifdev->sd_node, msg->src.node, msg->dst.node);
1011 pr_debug("payload %llx %llx %llx %llx\n", msg->payload[0], msg->payload[1],
1012 msg->payload[2], msg->payload[3]);
1013#ifndef _MIC_SCIF_
1014
1015 /* the lock serializes with micscif_setuphost_response
1016 * The host is forwarding the NODE_ADD_ACK message from src to dst
1017 * we need to make sure that the dst has already received a NODE_ADD
1018 * for src and setup its end of the qp to dst
1019 */
1020 mutex_lock(&ms_info.mi_conflock);
1021 msg->payload[1] = ms_info.mi_maxid;
1022 mutex_unlock(&ms_info.mi_conflock);
1023 micscif_inc_node_refcnt(dst_dev, 1);
1024 micscif_nodeqp_send(dst_dev, msg, NULL);
1025 micscif_dec_node_refcnt(dst_dev, 1);
1026#else
1027 peerdev = &scif_dev[msg->src.node];
1028 peerdev->sd_node = msg->src.node;
1029
1030 if (peerdev->sd_state == SCIFDEV_NOTPRESENT)
1031 return;
1032
1033 qp = &peerdev->qpairs[0];
1034
1035 if ((micscif_setup_qp_connect_response(peerdev, &peerdev->qpairs[0],
1036 msg->payload[0])))
1037 goto local_error;
1038
1039 mutex_lock(&peerdev->sd_lock);
1040 peerdev->sd_numa_node = msg->payload[3];
1041 /*
1042 * Proxy the DMA only for P2P reads with transfer size
1043 * greater than proxy DMA threshold. Proxying reads to convert
1044 * them into writes is only required for host jaketown platforms
1045 * when the two MIC devices are connected to the same
1046 * QPI/IOH/numa node. The host will not pass the numa node
1047 * information for non Intel Jaketown platforms and it will
1048 * be -1 in that case.
1049 */
1050 peerdev->sd_proxy_dma_reads =
1051 mic_p2p_proxy_enable &&
1052 scif_dev[ms_info.mi_nodeid].sd_numa_node != -1 &&
1053 (peerdev->sd_numa_node ==
1054 scif_dev[ms_info.mi_nodeid].sd_numa_node);
1055 peerdev->sd_state = SCIFDEV_RUNNING;
1056 mutex_unlock(&peerdev->sd_lock);
1057
1058 mutex_lock(&ms_info.mi_conflock);
1059 ms_info.mi_maxid = msg->payload[1];
1060 peerdev->sd_rdmasr = msg->payload[2];
1061 mutex_unlock(&ms_info.mi_conflock);
1062
1063 /* accessing the peer qp. Make sure the peer is awake*/
1064 micscif_inc_node_refcnt(peerdev, 1);
1065 qp->remote_qp->qp_state = QP_ONLINE;
1066 micscif_dec_node_refcnt(peerdev, 1);
1067 schedule_delayed_work(&peerdev->sd_p2p_dwork,
1068 msecs_to_jiffies(NODE_QP_TIMEOUT));
1069 return;
1070local_error:
1071 scif_p2pdev_uninit(peerdev);
1072 wake_up(&peerdev->sd_p2p_wq);
1073#endif
1074}
1075
1076/**
1077 * scif_cnctreq_resp() - Respond to SCIF_CNCT_REQ interrupt message
1078 * @msg: Interrupt message
1079 *
1080 * This message is initiated by the remote node to request a connection
1081 * to the local node. This function looks for an end point in the
1082 * listen state on the requested port id.
1083 *
1084 * If it finds a listening port it places the connect request on the
1085 * listening end points queue and wakes up any pending accept calls.
1086 *
1087 * If it does not find a listening end point it sends a connection
1088 * reject message to the remote node.
1089 */
1090static __always_inline void
1091scif_cnctreq_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
1092{
1093 struct endpt *ep = NULL;
1094 struct conreq *conreq;
1095 unsigned long sflags;
1096
1097 if ((conreq = (struct conreq *)kmalloc(sizeof(struct conreq), GFP_KERNEL)) == NULL) {
1098 // Lack of resources so reject the request.
1099 goto conreq_sendrej;
1100 }
1101
1102 if ((ep = micscif_find_listen_ep(msg->dst.port, &sflags)) == NULL) {
1103 // Send reject due to no listening ports
1104 goto conreq_sendrej_free;
1105 }
1106
1107 if (ep->backlog <= ep->conreqcnt) {
1108 // Send reject due to too many pending requests
1109 spin_unlock_irqrestore(&ep->lock, sflags);
1110 goto conreq_sendrej_free;
1111 }
1112
1113 conreq->msg = *msg;
1114 list_add_tail(&conreq->list, &ep->conlist);
1115 ep->conreqcnt++;
1116 spin_unlock_irqrestore(&ep->lock, sflags);
1117
1118 wake_up_interruptible(&ep->conwq);
1119 return;
1120
1121conreq_sendrej_free:
1122 kfree(conreq);
1123conreq_sendrej:
1124 msg->uop = SCIF_CNCT_REJ;
1125 micscif_nodeqp_send(&scif_dev[msg->src.node], msg, NULL);
1126}
1127
1128/**
1129 * scif_cnctgnt_resp() - Respond to SCIF_CNCT_GNT interrupt message
1130 * @msg: Interrupt message
1131 *
1132 * An accept() on the remote node has occured and sent this message
1133 * to indicate success. Place the end point in the MAPPING state and
1134 * save the remote nodes memory information. Then wake up the connect
1135 * request so it can finish.
1136 */
1137static __always_inline void
1138scif_cnctgnt_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
1139{
1140 unsigned long sflags;
1141 struct endpt *ep = (struct endpt *)msg->payload[0];
1142
1143 spin_lock_irqsave(&ep->lock, sflags);
1144 if (SCIFEP_CONNECTING == ep->state) {
1145 ep->peer.node = msg->src.node;
1146 ep->peer.port = msg->src.port;
1147 ep->qp_info.cnct_gnt_payload = msg->payload[1];
1148 ep->remote_ep = msg->payload[2];
1149 ep->state = SCIFEP_MAPPING;
1150
1151 wake_up_interruptible(&ep->conwq);
1152 wake_up(&ep->diswq);
1153 }
1154 spin_unlock_irqrestore(&ep->lock, sflags);
1155}
1156
1157/**
1158 * scif_cnctgntack_resp() - Respond to SCIF_CNCT_GNTACK interrupt message
1159 * @msg: Interrupt message
1160 *
1161 * The remote connection request has finished mapping the local memmory.
1162 * Place the connection in the connected state and wake up the pending
1163 * accept() call.
1164 */
1165static __always_inline void
1166scif_cnctgntack_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
1167{
1168 unsigned long sflags;
1169 struct endpt *ep = (struct endpt *)msg->payload[0];
1170
1171 spin_lock_irqsave(&ms_info.mi_connlock, sflags);
1172 spin_lock(&ep->lock);
1173 // New ep is now connected with all resouces set.
1174 ep->state = SCIFEP_CONNECTED;
1175 list_add_tail(&ep->list, &ms_info.mi_connected);
1176 get_conn_count(scifdev);
1177 wake_up(&ep->conwq);
1178 spin_unlock(&ep->lock);
1179 spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
1180}
1181
1182/**
1183 * scif_cnctgntnack_resp() - Respond to SCIF_CNCT_GNTNACK interrupt message
1184 * @msg: Interrupt message
1185 *
1186 * The remote connection request failed to map the local memory it was sent.
1187 * Place the end point in the CLOSING state to indicate it and wake up
1188 * the pending accept();
1189 */
1190static __always_inline void
1191scif_cnctgntnack_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
1192{
1193 struct endpt *ep = (struct endpt *)msg->payload[0];
1194 unsigned long sflags;
1195
1196 spin_lock_irqsave(&ep->lock, sflags);
1197 ep->state = SCIFEP_CLOSING;
1198 wake_up(&ep->conwq);
1199 spin_unlock_irqrestore(&ep->lock, sflags);
1200}
1201
1202/**
1203 * scif_cnctrej_resp() - Respond to SCIF_CNCT_REJ interrupt message
1204 * @msg: Interrupt message
1205 *
1206 * The remote end has rejected the connection request. Set the end
1207 * point back to the bound state and wake up the pending connect().
1208 */
1209static __always_inline void
1210scif_cnctrej_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
1211{
1212 struct endpt *ep = (struct endpt *)msg->payload[0];
1213 unsigned long sflags;
1214
1215 spin_lock_irqsave(&ep->lock, sflags);
1216 if (SCIFEP_CONNECTING == ep->state) {
1217 ep->state = SCIFEP_BOUND;
1218 wake_up_interruptible(&ep->conwq);
1219 }
1220 spin_unlock_irqrestore(&ep->lock, sflags);
1221}
1222
1223/**
1224 * scif_cnctterm_resp() - Respond to SCIF_CNCT_TERM interrupt message
1225 * @msg: Interrupt message
1226 *
1227 * The remote connect() has waited to long for an accept() to occur and
1228 * is removing the connection request.
1229 *
1230 * If the connection request is not found then it is currently being
1231 * processed and a NACK is sent to indicate to the remote connect() to
1232 * wait for connection to complete.
1233 *
1234 * Otherwise the request is removed and an ACK is returned to indicate
1235 * success.
1236 */
1237static __always_inline void
1238scif_cnctterm_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
1239{
1240 unsigned long sflags;
1241 struct endpt *ep = NULL;
1242 struct conreq *conreq = NULL;
1243
1244 ep = micscif_find_listen_ep(msg->dst.port, &sflags);
1245
1246 if (ep != NULL) {
1247 conreq = miscscif_get_connection_request(ep, msg->payload[0]);
1248 spin_unlock_irqrestore(&ep->lock, sflags);
1249 }
1250
1251 if (conreq != NULL) {
1252 kfree(conreq);
1253 msg->uop = SCIF_TERM_ACK;
1254 micscif_nodeqp_send(&scif_dev[msg->src.node], msg, NULL);
1255 }
1256}
1257
1258/**
1259 * scif_termack_resp() - Respond to SCIF_TERM_ACK interrupt message
1260 * @msg: Interrupt message
1261 *
1262 * Connection termination has been confirmed so set the end point
1263 * to bound and allow the connection request to error out.
1264 */
1265static __always_inline void
1266scif_termack_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
1267{
1268 struct endpt *ep = (struct endpt *)msg->payload[0];
1269 unsigned long sflags;
1270
1271 spin_lock_irqsave(&ep->lock, sflags);
1272 if (ep->state != SCIFEP_BOUND) {
1273 ep->state = SCIFEP_BOUND;
1274 wake_up(&ep->diswq);
1275 }
1276 spin_unlock_irqrestore(&ep->lock, sflags);
1277}
1278
1279/**
1280 * scif_discnct_resp() - Respond to SCIF_DISCNCT interrupt message
1281 * @msg: Interrupt message
1282 *
1283 * The remote node has indicated close() has been called on its end
1284 * point. Remove the local end point from the connected list, set its
1285 * state to disconnected and ensure accesses to the remote node are
1286 * shutdown.
1287 *
1288 * When all accesses to the remote end have completed then send a
1289 * DISCNT_ACK to indicate it can remove its resources and complete
1290 * the close routine.
1291 */
1292static __always_inline void
1293scif_discnct_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
1294{
1295 unsigned long sflags;
1296 struct endpt *ep = NULL;
1297 struct endpt *tmpep;
1298 struct list_head *pos, *tmpq;
1299
1300 spin_lock_irqsave(&ms_info.mi_connlock, sflags);
1301 list_for_each_safe(pos, tmpq, &ms_info.mi_connected) {
1302 tmpep = list_entry(pos, struct endpt, list);
1303 if (((uint64_t)tmpep == msg->payload[1]) && ((uint64_t)tmpep->remote_ep == msg->payload[0])) {
1304 list_del(pos);
1305 put_conn_count(scifdev);
1306 ep = tmpep;
1307 spin_lock(&ep->lock);
1308 break;
1309 }
1310 }
1311
1312 // If the terminated end is not found then this side started closing
1313 // before the other side sent the disconnect. If so the ep will no
1314 // longer be on the connected list. Reguardless the other side
1315 // needs to be acked to let it know close is complete.
1316 if (ep == NULL) {
1317 // Need to unlock conn lock and restore irq state
1318 spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
1319 goto discnct_resp_ack;
1320 }
1321
1322 ep->state = SCIFEP_DISCONNECTED;
1323 list_add_tail(&ep->list, &ms_info.mi_disconnected);
1324
1325 // TODO Cause associated resources to be freed.
1326 // First step: wake up threads blocked in send and recv
1327 wake_up_interruptible(&ep->sendwq);
1328 wake_up_interruptible(&ep->recvwq);
1329 wake_up_interruptible(&ep->conwq);
1330 spin_unlock(&ep->lock);
1331 spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
1332
1333discnct_resp_ack:
1334 msg->uop = SCIF_DISCNT_ACK;
1335 micscif_nodeqp_send(&scif_dev[msg->src.node], msg, NULL);
1336}
1337
1338/**
1339 * scif_discnctack_resp() - Respond to SCIF_DISCNT_ACK interrupt message
1340 * @msg: Interrupt message
1341 *
1342 * Remote side has indicated it has not more references to local resources
1343 */
1344static __always_inline void
1345scif_discntack_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
1346{
1347 struct endpt *ep = (struct endpt *)msg->payload[0];
1348 unsigned long sflags;
1349
1350 spin_lock_irqsave(&ep->lock, sflags);
1351 ep->state = SCIFEP_DISCONNECTED;
1352 wake_up(&ep->disconwq);
1353 spin_unlock_irqrestore(&ep->lock, sflags);
1354}
1355
1356/**
1357 * scif_clientsend_resp() - Respond to SCIF_CLIENT_SEND interrupt message
1358 * @msg: Interrupt message
1359 *
1360 * Remote side is confirming send or recieve interrupt handling is complete.
1361 */
1362static __always_inline void
1363scif_clientsend_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
1364{
1365 struct endpt *ep = (struct endpt *)msg->payload[0];
1366
1367 if (SCIFEP_CONNECTED == ep->state) {
1368 wake_up_interruptible(&ep->recvwq);
1369 }
1370}
1371
1372/**
1373 * scif_clientrcvd_resp() - Respond to SCIF_CLIENT_RCVD interrupt message
1374 * @msg: Interrupt message
1375 *
1376 * Remote side is confirming send or recieve interrupt handling is complete.
1377 */
1378static __always_inline void
1379scif_clientrcvd_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
1380{
1381 struct endpt *ep = (struct endpt *)msg->payload[0];
1382
1383 if (SCIFEP_CONNECTED == ep->state) {
1384 wake_up_interruptible(&ep->sendwq);
1385 }
1386}
1387
1388/**
1389 * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
1390 * @msg: Interrupt message
1391 *
1392 * Remote side is requesting a memory allocation.
1393 */
1394static __always_inline void
1395scif_alloc_req(struct micscif_dev *scifdev, struct nodemsg *msg)
1396{
1397 int err, opcode = (int)msg->payload[3];
1398 struct reg_range_t *window = 0;
1399 size_t nr_pages = msg->payload[1];
1400 struct endpt *ep = (struct endpt *)msg->payload[0];
1401
1402 might_sleep();
1403
1404 if (SCIFEP_CONNECTED != ep->state) {
1405 err = -ENOTCONN;
1406 goto error;
1407 }
1408
1409 switch (opcode) {
1410 case SCIF_REGISTER:
1411 if (!(window = micscif_create_remote_window(ep,
1412 (int)nr_pages))) {
1413 err = -ENOMEM;
1414 goto error;
1415 }
1416 break;
1417 default:
1418 /* Unexpected allocation request */
1419 printk(KERN_ERR "Unexpected allocation request opcode 0x%x ep = 0x%p "
1420 " scifdev->sd_state 0x%x scifdev->sd_node 0x%x\n",
1421 opcode, ep, scifdev->sd_state, scifdev->sd_node);
1422 err = -EINVAL;
1423 goto error;
1424 };
1425
1426 /* The peer's allocation request is granted */
1427 msg->uop = SCIF_ALLOC_GNT;
1428 msg->payload[0] = (uint64_t)window;
1429 msg->payload[1] = window->mapped_offset;
1430 if ((err = micscif_nodeqp_send(ep->remote_dev, msg, ep)))
1431 micscif_destroy_remote_window(ep, window);
1432 return;
1433error:
1434 /* The peer's allocation request is rejected */
1435 printk(KERN_ERR "%s %d error %d alloc_ptr %p nr_pages 0x%lx\n",
1436 __func__, __LINE__, err, window, nr_pages);
1437 msg->uop = SCIF_ALLOC_REJ;
1438 micscif_nodeqp_send(ep->remote_dev, msg, ep);
1439}
1440
1441/**
1442 * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
1443 * @msg: Interrupt message
1444 *
1445 * Remote side responded to a memory allocation.
1446 */
1447static __always_inline void
1448scif_alloc_gnt_rej(struct micscif_dev *scifdev, struct nodemsg *msg)
1449{
1450 struct allocmsg *handle = (struct allocmsg *)msg->payload[2];
1451 switch (handle->uop) {
1452 case SCIF_REGISTER:
1453 {
1454 handle->vaddr = (void *)msg->payload[0];
1455 handle->phys_addr = msg->payload[1];
1456 if (msg->uop == SCIF_ALLOC_GNT)
1457 handle->state = OP_COMPLETED;
1458 else
1459 handle->state = OP_FAILED;
1460 wake_up(&handle->allocwq);
1461 break;
1462 }
1463 default:
1464 {
1465 printk(KERN_ERR "Bug Unknown alloc uop 0x%x\n", handle->uop);
1466 }
1467 }
1468}
1469
1470/**
1471 * scif_free_phys: Respond to SCIF_FREE_PHYS interrupt message
1472 * @msg: Interrupt message
1473 *
1474 * Remote side is done accessing earlier memory allocation.
1475 * Remove GTT/PCI mappings created earlier.
1476 */
1477static __always_inline void
1478scif_free_phys(struct micscif_dev *scifdev, struct nodemsg *msg)
1479{
1480 return;
1481}
1482
1483/**
1484 * scif_free_phys: Respond to SCIF_FREE_VIRT interrupt message
1485 * @msg: Interrupt message
1486 *
1487 * Free up memory kmalloc'd earlier.
1488 */
1489static __always_inline void
1490scif_free_virt(struct micscif_dev *scifdev, struct nodemsg *msg)
1491{
1492 struct endpt *ep = (struct endpt *)msg->payload[0];
1493 int opcode = (int)msg->payload[3];
1494 struct reg_range_t *window =
1495 (struct reg_range_t *)msg->payload[1];
1496
1497 switch (opcode) {
1498 case SCIF_REGISTER:
1499 micscif_destroy_remote_window(ep, window);
1500 break;
1501 default:
1502 /* Unexpected allocation request */
1503 BUG_ON(opcode != SCIF_REGISTER);
1504 };
1505}
1506
1507/**
1508 * scif_recv_register: Respond to SCIF_REGISTER interrupt message
1509 * @msg: Interrupt message
1510 *
1511 * Update remote window list with a new registered window.
1512 */
1513static __always_inline void
1514scif_recv_register(struct micscif_dev *scifdev, struct nodemsg *msg)
1515{
1516 unsigned long sflags;
1517 struct endpt *ep = (struct endpt *)msg->payload[0];
1518 struct reg_range_t *window =
1519 (struct reg_range_t *)msg->payload[1];
1520
1521 might_sleep();
1522 RMA_MAGIC(window);
1523 mutex_lock(&ep->rma_info.rma_lock);
1524 /* FIXME:
1525 * ep_lock lock needed ? rma_lock is already held
1526 */
1527 spin_lock_irqsave(&ep->lock, sflags);
1528 if (SCIFEP_CONNECTED == ep->state) {
1529 msg->uop = SCIF_REGISTER_ACK;
1530 micscif_nodeqp_send(ep->remote_dev, msg, ep);
1531 micscif_set_nr_pages(ep->remote_dev, window);
1532 /* No further failures expected. Insert new window */
1533 micscif_insert_window(window,
1534 &ep->rma_info.remote_reg_list);
1535 } else {
1536 msg->uop = SCIF_REGISTER_NACK;
1537 micscif_nodeqp_send(ep->remote_dev, msg, ep);
1538 }
1539 spin_unlock_irqrestore(&ep->lock, sflags);
1540 mutex_unlock(&ep->rma_info.rma_lock);
1541 /*
1542 * We could not insert the window but we need to
1543 * destroy the window.
1544 */
1545 if (SCIF_REGISTER_NACK == msg->uop)
1546 micscif_destroy_remote_window(ep, window);
1547 else {
1548#ifdef _MIC_SCIF_
1549 micscif_destroy_remote_lookup(ep, window);
1550#endif
1551 }
1552}
1553
1554/**
1555 * scif_recv_unregister: Respond to SCIF_UNREGISTER interrupt message
1556 * @msg: Interrupt message
1557 *
1558 * Remove window from remote registration list;
1559 */
1560static __always_inline void
1561scif_recv_unregister(struct micscif_dev *scifdev, struct nodemsg *msg)
1562{
1563 struct micscif_rma_req req;
1564 struct reg_range_t *window = NULL;
1565 struct reg_range_t *recv_window =
1566 (struct reg_range_t *)msg->payload[0];
1567 struct endpt *ep;
1568 int del_window = 0;
1569
1570 might_sleep();
1571 RMA_MAGIC(recv_window);
1572 ep = (struct endpt *)recv_window->ep;
1573 req.out_window = &window;
1574 req.offset = recv_window->offset;
1575 req.prot = 0;
1576 req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
1577 req.type = WINDOW_FULL;
1578 req.head = &ep->rma_info.remote_reg_list;
1579 msg->payload[0] = ep->remote_ep;
1580
1581 mutex_lock(&ep->rma_info.rma_lock);
1582 /*
1583 * Does a valid window exist?
1584 */
1585 if (micscif_query_window(&req)) {
1586 printk(KERN_ERR "%s %d -ENXIO\n", __func__, __LINE__);
1587 msg->uop = SCIF_UNREGISTER_ACK;
1588 goto error;
1589 }
1590 if (window) {
1591 RMA_MAGIC(window);
1592 if (window->ref_count)
1593 put_window_ref_count(window, window->nr_pages);
1594 window->unreg_state = OP_COMPLETED;
1595 if (!window->ref_count) {
1596 msg->uop = SCIF_UNREGISTER_ACK;
1597 atomic_inc(&ep->rma_info.tw_refcount);
1598 atomic_add_return((int32_t)window->nr_pages, &ep->rma_info.tw_total_pages);
1599 ep->rma_info.async_list_del = 1;
1600 list_del(&window->list_member);
1601 window->offset = INVALID_VA_GEN_ADDRESS;
1602 del_window = 1;
1603 } else
1604 /* NACK! There are valid references to this window */
1605 msg->uop = SCIF_UNREGISTER_NACK;
1606 } else {
1607 /* The window did not make its way to the list at all. ACK */
1608 msg->uop = SCIF_UNREGISTER_ACK;
1609 micscif_destroy_remote_window(ep, recv_window);
1610 }
1611error:
1612 mutex_unlock(&ep->rma_info.rma_lock);
1613 if (del_window)
1614 drain_dma_intr(ep->rma_info.dma_chan);
1615 micscif_nodeqp_send(ep->remote_dev, msg, ep);
1616 if (del_window)
1617 micscif_queue_for_cleanup(window, &ms_info.mi_rma);
1618 return;
1619}
1620
1621/**
1622 * scif_recv_register_ack: Respond to SCIF_REGISTER_ACK interrupt message
1623 * @msg: Interrupt message
1624 *
1625 * Wake up the window waiting to complete registration.
1626 */
1627static __always_inline void
1628scif_recv_register_ack(struct micscif_dev *scifdev, struct nodemsg *msg)
1629{
1630 struct reg_range_t *window =
1631 (struct reg_range_t *)msg->payload[2];
1632 RMA_MAGIC(window);
1633 window->reg_state = OP_COMPLETED;
1634 wake_up(&window->regwq);
1635}
1636
1637/**
1638 * scif_recv_register_nack: Respond to SCIF_REGISTER_NACK interrupt message
1639 * @msg: Interrupt message
1640 *
1641 * Wake up the window waiting to inform it that registration
1642 * cannot be completed.
1643 */
1644static __always_inline void
1645scif_recv_register_nack(struct micscif_dev *scifdev, struct nodemsg *msg)
1646{
1647 struct reg_range_t *window =
1648 (struct reg_range_t *)msg->payload[2];
1649 RMA_MAGIC(window);
1650 window->reg_state = OP_FAILED;
1651 wake_up(&window->regwq);
1652}
1653/**
1654 * scif_recv_unregister_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
1655 * @msg: Interrupt message
1656 *
1657 * Wake up the window waiting to complete unregistration.
1658 */
1659static __always_inline void
1660scif_recv_unregister_ack(struct micscif_dev *scifdev, struct nodemsg *msg)
1661{
1662 struct reg_range_t *window =
1663 (struct reg_range_t *)msg->payload[1];
1664 RMA_MAGIC(window);
1665 window->unreg_state = OP_COMPLETED;
1666 wake_up(&window->unregwq);
1667}
1668
1669/**
1670 * scif_recv_unregister_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
1671 * @msg: Interrupt message
1672 *
1673 * Wake up the window waiting to inform it that unregistration
1674 * cannot be completed immediately.
1675 */
1676static __always_inline void
1677scif_recv_unregister_nack(struct micscif_dev *scifdev, struct nodemsg *msg)
1678{
1679 struct reg_range_t *window =
1680 (struct reg_range_t *)msg->payload[1];
1681 RMA_MAGIC(window);
1682 window->unreg_state = OP_FAILED;
1683 wake_up(&window->unregwq);
1684}
1685
1686static __always_inline void
1687scif_recv_munmap(struct micscif_dev *scifdev, struct nodemsg *msg)
1688{
1689 struct micscif_rma_req req;
1690 struct reg_range_t *window = NULL;
1691 struct reg_range_t *recv_window =
1692 (struct reg_range_t *)msg->payload[0];
1693 struct endpt *ep;
1694 int del_window = 0;
1695
1696 might_sleep();
1697 RMA_MAGIC(recv_window);
1698 ep = (struct endpt *)recv_window->ep;
1699 req.out_window = &window;
1700 req.offset = recv_window->offset;
1701 req.prot = recv_window->prot;
1702 req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
1703 req.type = WINDOW_FULL;
1704 req.head = &ep->rma_info.reg_list;
1705 msg->payload[0] = ep->remote_ep;
1706
1707 mutex_lock(&ep->rma_info.rma_lock);
1708 /*
1709 * Does a valid window exist?
1710 */
1711 if (micscif_query_window(&req)) {
1712 printk(KERN_ERR "%s %d -ENXIO\n", __func__, __LINE__);
1713 msg->uop = SCIF_UNREGISTER_ACK;
1714 goto error;
1715 }
1716
1717 RMA_MAGIC(window);
1718
1719 if (window->ref_count)
1720 put_window_ref_count(window, window->nr_pages);
1721
1722 if (!window->ref_count) {
1723 atomic_inc(&ep->rma_info.tw_refcount);
1724 atomic_add_return((int32_t)window->nr_pages, &ep->rma_info.tw_total_pages);
1725 ep->rma_info.async_list_del = 1;
1726 list_del(&window->list_member);
1727 micscif_free_window_offset(ep, window->offset,
1728 window->nr_pages << PAGE_SHIFT);
1729 window->offset_freed = true;
1730 del_window = 1;
1731 }
1732error:
1733 mutex_unlock(&ep->rma_info.rma_lock);
1734 if (del_window)
1735 micscif_queue_for_cleanup(window, &ms_info.mi_rma);
1736}
1737
1738/**
1739 * scif_recv_mark: Handle SCIF_MARK request
1740 * @msg: Interrupt message
1741 *
1742 * The peer has requested a mark.
1743 */
1744static __always_inline void
1745scif_recv_mark(struct micscif_dev *scifdev, struct nodemsg *msg)
1746{
1747 struct endpt *ep = (struct endpt *)msg->payload[0];
1748 int mark;
1749
1750 if (SCIFEP_CONNECTED != ep->state) {
1751 msg->payload[0] = ep->remote_ep;
1752 msg->uop = SCIF_MARK_NACK;
1753 micscif_nodeqp_send(ep->remote_dev, msg, ep);
1754 return;
1755 }
1756
1757 if ((mark = micscif_fence_mark(ep)) < 0)
1758 msg->uop = SCIF_MARK_NACK;
1759 else
1760 msg->uop = SCIF_MARK_ACK;
1761 msg->payload[0] = ep->remote_ep;
1762 msg->payload[2] = mark;
1763 micscif_nodeqp_send(ep->remote_dev, msg, ep);
1764}
1765
1766/**
1767 * scif_recv_mark_resp: Handle SCIF_MARK_(N)ACK messages.
1768 * @msg: Interrupt message
1769 *
1770 * The peer has responded to a SCIF_MARK message.
1771 */
1772static __always_inline void
1773scif_recv_mark_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
1774{
1775 struct endpt *ep = (struct endpt *)msg->payload[0];
1776 struct fence_info *fence_req = (struct fence_info *)msg->payload[1];
1777
1778 mutex_lock(&ep->rma_info.rma_lock);
1779 if (SCIF_MARK_ACK == msg->uop) {
1780 fence_req->state = OP_COMPLETED;
1781 fence_req->dma_mark = (int)msg->payload[2];
1782 } else
1783 fence_req->state = OP_FAILED;
1784 wake_up(&fence_req->wq);
1785 mutex_unlock(&ep->rma_info.rma_lock);
1786}
1787
1788/**
1789 * scif_recv_wait: Handle SCIF_WAIT request
1790 * @msg: Interrupt message
1791 *
1792 * The peer has requested waiting on a fence.
1793 */
1794static __always_inline void
1795scif_recv_wait(struct micscif_dev *scifdev, struct nodemsg *msg)
1796{
1797 struct endpt *ep = (struct endpt *)msg->payload[0];
1798 struct remote_fence_info *fence;
1799
1800 /*
1801 * Allocate structure for remote fence information and
1802 * send a NACK if the allocation failed. The peer will
1803 * return ENOMEM upon receiving a NACK.
1804 */
1805 if (!(fence = (struct remote_fence_info *)kmalloc(
1806 sizeof(struct remote_fence_info), GFP_KERNEL))) {
1807 msg->payload[0] = ep->remote_ep;
1808 msg->uop = SCIF_WAIT_NACK;
1809 micscif_nodeqp_send(ep->remote_dev, msg, ep);
1810 return;
1811 }
1812
1813 /* Prepare the fence request */
1814 memcpy(&fence->msg, msg, sizeof(struct nodemsg));
1815 INIT_LIST_HEAD(&fence->list_member);
1816
1817 /* Insert to the global remote fence request list */
1818 mutex_lock(&ms_info.mi_fencelock);
1819 ep->rma_info.fence_refcount++;
1820 list_add_tail(&fence->list_member, &ms_info.mi_fence);
1821 mutex_unlock(&ms_info.mi_fencelock);
1822
1823 queue_work(ms_info.mi_misc_wq, &ms_info.mi_misc_work);
1824}
1825
1826/**
1827 * scif_recv_wait_resp: Handle SCIF_WAIT_(N)ACK messages.
1828 * @msg: Interrupt message
1829 *
1830 * The peer has responded to a SCIF_WAIT message.
1831 */
1832static __always_inline void
1833scif_recv_wait_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
1834{
1835 struct endpt *ep = (struct endpt *)msg->payload[0];
1836 struct fence_info *fence_req = (struct fence_info *)msg->payload[1];
1837
1838 mutex_lock(&ep->rma_info.rma_lock);
1839 if (SCIF_WAIT_ACK == msg->uop)
1840 fence_req->state = OP_COMPLETED;
1841 else
1842 fence_req->state = OP_FAILED;
1843 wake_up(&fence_req->wq);
1844 mutex_unlock(&ep->rma_info.rma_lock);
1845}
1846
1847/**
1848 * scif_recv_local_signal: Handle SCIF_SIG_LOCAL request
1849 * @msg: Interrupt message
1850 *
1851 * The peer has requested a signal on a local offset.
1852 */
1853static __always_inline void
1854scif_recv_signal_local(struct micscif_dev *scifdev, struct nodemsg *msg)
1855{
1856 int err = 0;
1857 struct endpt *ep = (struct endpt *)msg->payload[0];
1858
1859 err = micscif_prog_signal(ep,
1860 msg->payload[1],
1861 msg->payload[2],
1862 RMA_WINDOW_SELF);
1863 if (err)
1864 msg->uop = SCIF_SIG_NACK;
1865 else
1866 msg->uop = SCIF_SIG_ACK;
1867 msg->payload[0] = ep->remote_ep;
1868 if ((err = micscif_nodeqp_send(ep->remote_dev, msg, ep)))
1869 printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
1870}
1871
1872/**
1873 * scif_recv_signal_remote: Handle SCIF_SIGNAL_REMOTE request
1874 * @msg: Interrupt message
1875 *
1876 * The peer has requested a signal on a remote offset.
1877 */
1878static __always_inline void
1879scif_recv_signal_remote(struct micscif_dev *scifdev, struct nodemsg *msg)
1880{
1881 int err = 0;
1882 struct endpt *ep = (struct endpt *)msg->payload[0];
1883
1884 err = micscif_prog_signal(ep,
1885 msg->payload[1],
1886 msg->payload[2],
1887 RMA_WINDOW_PEER);
1888 if (err)
1889 msg->uop = SCIF_SIG_NACK;
1890 else
1891 msg->uop = SCIF_SIG_ACK;
1892 msg->payload[0] = ep->remote_ep;
1893 if ((err = micscif_nodeqp_send(ep->remote_dev, msg, ep)))
1894 printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
1895}
1896
1897/**
1898 * scif_recv_signal_remote: Handle SCIF_SIG_(N)ACK messages.
1899 * @msg: Interrupt message
1900 *
1901 * The peer has responded to a signal request.
1902 */
1903static __always_inline void
1904scif_recv_signal_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
1905{
1906 struct endpt *ep = (struct endpt *)msg->payload[0];
1907 struct fence_info *fence_req = (struct fence_info *)msg->payload[3];
1908
1909 mutex_lock(&ep->rma_info.rma_lock);
1910 if (SCIF_SIG_ACK == msg->uop)
1911 fence_req->state = OP_COMPLETED;
1912 else
1913 fence_req->state = OP_FAILED;
1914 wake_up(&fence_req->wq);
1915 mutex_unlock(&ep->rma_info.rma_lock);
1916}
1917
1918/*
1919 * scif_node_wake_up_ack: Handle SCIF_NODE_WAKE_UP_ACK message
1920 * @msg: Interrupt message
1921 *
1922 * Response for a SCIF_NODE_WAKE_UP message.
1923 */
1924static __always_inline void
1925scif_node_wake_up_ack(struct micscif_dev *scifdev, struct nodemsg *msg)
1926{
1927 scif_dev[msg->payload[0]].sd_wait_status = OP_COMPLETED;
1928 wake_up(&scif_dev[msg->payload[0]].sd_wq);
1929}
1930
1931/*
1932 * scif_node_wake_up_nack: Handle SCIF_NODE_WAKE_UP_NACK message
1933 * @msg: Interrupt message
1934 *
1935 * Response for a SCIF_NODE_WAKE_UP message.
1936 */
1937static __always_inline void
1938scif_node_wake_up_nack(struct micscif_dev *scifdev, struct nodemsg *msg)
1939{
1940 scif_dev[msg->payload[0]].sd_wait_status = OP_FAILED;
1941 wake_up(&scif_dev[msg->payload[0]].sd_wq);
1942}
1943
1944/*
1945 * scif_node_remove: Handle SCIF_NODE_REMOVE message
1946 * @msg: Interrupt message
1947 *
1948 * Handle node removal.
1949 */
1950static __always_inline void
1951scif_node_remove(struct micscif_dev *scifdev, struct nodemsg *msg)
1952{
1953 msg->payload[0] = micscif_handle_remove_node(msg->payload[0], msg->payload[1]);
1954 msg->uop = SCIF_NODE_REMOVE_ACK;
1955 msg->src.node = ms_info.mi_nodeid;
1956 micscif_nodeqp_send(&scif_dev[SCIF_HOST_NODE], msg, NULL);
1957}
1958
1959#ifndef _MIC_SCIF_
1960/*
1961 * scif_node_remove_ack: Handle SCIF_NODE_REMOVE_ACK message
1962 * @msg: Interrupt message
1963 *
1964 * The peer has acked a SCIF_NODE_REMOVE message.
1965 */
1966static __always_inline void
1967scif_node_remove_ack(struct micscif_dev *scifdev, struct nodemsg *msg)
1968{
1969 bool ack_is_current = true;
1970 int orig_node = (int)msg->payload[3];
1971
1972 if ((msg->payload[1] << 32) == DISCONN_TYPE_POWER_MGMT) {
1973 if (msg->payload[2] != atomic_long_read(&ms_info.mi_unique_msgid))
1974 ack_is_current = false;
1975 }
1976
1977 if (ack_is_current) {
1978 mic_ctx_t *mic_ctx = get_per_dev_ctx(orig_node - 1);
1979 if (!mic_ctx) {
1980 printk(KERN_ERR "%s %d mic_ctx %p orig_node %d\n",
1981 __func__, __LINE__, mic_ctx, orig_node);
1982 return;
1983 }
1984
1985 if (msg->payload[0]) {
1986 pr_debug("%s failed to get remove ack from node id %d", __func__, msg->src.node);
1987 ms_info.mi_disconnect_status = OP_FAILED;
1988 }
1989
1990 atomic_inc(&mic_ctx->disconn_rescnt);
1991 wake_up(&ms_info.mi_disconn_wq);
1992 }
1993}
1994
1995/*
1996 * scif_node_create_ack: Handle SCIF_NODE_CREATE_DEP message
1997 * @msg: Interrupt message
1998 *
1999 * Notification about a new SCIF dependency between two nodes.
2000 */
2001static __always_inline void
2002scif_node_create_dep(struct micscif_dev *scifdev, struct nodemsg *msg)
2003{
2004 uint32_t src_node = msg->src.node;
2005 uint32_t dst_node = (uint32_t)msg->payload[0];
2006 /*
2007 * Host driver updates dependency graph.
2008 * src_node created dependency on dst_node
2009 * src_node -> dst_node
2010 */
2011 micscif_set_nodedep(src_node, dst_node, DEP_STATE_DEPENDENT);
2012}
2013
2014/*
2015 * scif_node_destroy_ack: Handle SCIF_NODE_DESTROY_DEP message
2016 * @msg: Interrupt message
2017 *
2018 * Notification about tearing down an existing SCIF dependency
2019 * between two nodes.
2020 */
2021static __always_inline void
2022scif_node_destroy_dep(struct micscif_dev *scifdev, struct nodemsg *msg)
2023{
2024 uint32_t src_node = msg->src.node;
2025 uint32_t dst_node = (uint32_t)msg->payload[0];
2026 /*
2027 * Host driver updates dependency graph.
2028 * src_node removed dependency on dst_node
2029 */
2030 micscif_set_nodedep(src_node, dst_node, DEP_STATE_NOT_DEPENDENT);
2031}
2032
2033/*
2034 * scif_node_wake_up: Handle SCIF_NODE_WAKE_UP message
2035 * @msg: Interrupt message
2036 *
2037 * The host has received a request to wake up a remote node.
2038 */
2039static __always_inline void
2040scif_node_wake_up(struct micscif_dev *scifdev, struct nodemsg *msg)
2041{
2042 /*
2043 * Host Driver now needs to wake up the remote node
2044 * available in msg->payload[0].
2045 */
2046 uint32_t ret = 0;
2047 ret = micscif_connect_node((uint32_t)msg->payload[0], false);
2048
2049 if(!ret) {
2050 msg->uop = SCIF_NODE_WAKE_UP_ACK;
2051 micscif_update_p2p_state((uint32_t)msg->payload[0],
2052 msg->src.node, SCIFDEV_RUNNING);
2053 } else {
2054 msg->uop = SCIF_NODE_WAKE_UP_NACK;
2055 }
2056 micscif_nodeqp_send(&scif_dev[msg->src.node], msg, NULL);
2057}
2058#endif
2059
2060#ifdef _MIC_SCIF_
2061static __always_inline void
2062scif_node_alive_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
2063{
2064 msg->uop = SCIF_NODE_ALIVE_ACK;
2065 msg->src.node = ms_info.mi_nodeid;
2066 msg->dst.node = SCIF_HOST_NODE;
2067 micscif_nodeqp_send(&scif_dev[SCIF_HOST_NODE], msg, NULL);
2068 pr_debug("node alive ack sent from node %d oops_in_progress %d\n",
2069 ms_info.mi_nodeid, oops_in_progress);
2070}
2071#else
2072static __always_inline void
2073scif_node_alive_ack(struct micscif_dev *scifdev, struct nodemsg *msg)
2074{
2075 pr_debug("node alive ack received from node %d\n", msg->src.node);
2076 atomic_set(&scif_dev[msg->src.node].sd_node_alive, 1);
2077 wake_up(&scifdev->sd_watchdog_wq);
2078}
2079#endif
2080
2081
2082#ifdef _MIC_SCIF_
2083static __always_inline void
2084_scif_proxy_dma(struct micscif_dev *scifdev, struct nodemsg *msg, int flags)
2085{
2086 struct endpt *ep = (struct endpt *)msg->payload[0];
2087 off_t loffset = msg->payload[1];
2088 off_t roffset = msg->payload[2];
2089 size_t len = msg->payload[3];
2090 struct dma_channel *chan = ep->rma_info.dma_chan;
2091 struct endpt_rma_info *rma = &ep->rma_info;
2092 int err = __scif_writeto(ep, loffset, len, roffset, flags);
2093
2094 if (!err && rma->proxy_dma_peer_phys &&
2095 !request_dma_channel(chan)) {
2096 do_status_update(chan, rma->proxy_dma_peer_phys, OP_COMPLETED);
2097 free_dma_channel(chan);
2098 }
2099 if (!rma->proxy_dma_peer_phys)
2100 /* The proxy DMA physical address should have been set up? */
2101 WARN_ON(1);
2102}
2103
2104/**
2105 * scif_proxy_dma: Handle SCIF_PROXY_DMA request.
2106 * @msg: Interrupt message
2107 *
2108 * The peer has requested a Proxy DMA.
2109 */
2110static __always_inline void
2111scif_proxy_dma(struct micscif_dev *scifdev, struct nodemsg *msg)
2112{
2113 _scif_proxy_dma(scifdev, msg, 0x0);
2114}
2115
2116/**
2117 * scif_proxy_ordered_dma: Handle SCIF_PROXY_ORDERED_DMA request.
2118 * @msg: Interrupt message
2119 *
2120 * The peer has requested an ordered Proxy DMA.
2121 */
2122static __always_inline void
2123scif_proxy_ordered_dma(struct micscif_dev *scifdev, struct nodemsg *msg)
2124{
2125 _scif_proxy_dma(scifdev, msg, SCIF_RMA_ORDERED);
2126}
2127#endif
2128
2129#ifndef _MIC_SCIF_
2130/**
2131 * scif_node_connect: Respond to SCIF_NODE_CONNECT interrupt message
2132 * @msg: Interrupt message
2133 *
2134 * Connect the src and dst node by setting up the p2p connection
2135 * between them. Host here acts like a proxy.
2136 */
2137static __always_inline void
2138scif_node_connect_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
2139{
2140 struct micscif_dev *dev_j = scifdev;
2141 struct micscif_dev *dev_i = NULL;
2142 struct scif_p2p_info *p2p_ij = NULL; /* bus addr for j from i */
2143 struct scif_p2p_info *p2p_ji = NULL; /* bus addr for i from j */
2144 struct scif_p2p_info *p2p;
2145 struct list_head *pos, *tmp;
2146 uint32_t bid = (uint32_t)msg->payload[0];
2147 int err;
2148 uint64_t tmppayload;
2149
2150 pr_debug("%s:%d SCIF_NODE_CONNECT from %d connecting to %d \n",
2151 __func__, __LINE__, scifdev->sd_node, bid);
2152
2153 mutex_lock(&ms_info.mi_conflock);
2154 if (bid < 1 || bid > ms_info.mi_maxid) {
2155 printk(KERN_ERR "%s %d unknown bid %d\n", __func__, __LINE__, bid);
2156 goto nack;
2157 }
2158
2159 dev_i = &scif_dev[bid];
2160 mutex_unlock(&ms_info.mi_conflock);
2161 micscif_inc_node_refcnt(dev_i, 1);
2162 mutex_lock(&ms_info.mi_conflock);
2163
2164 if (dev_i->sd_state != SCIFDEV_RUNNING)
2165 goto ref_nack;
2166
2167 /*
2168 * If the p2p connection is already setup or in the process of setting up
2169 * then just ignore this request. The requested node will get informed
2170 * by SCIF_NODE_ADD_ACK or SCIF_NODE_ADD_NACK
2171 */
2172 if (!list_empty(&dev_i->sd_p2p)) {
2173 list_for_each_safe(pos, tmp, &dev_i->sd_p2p) {
2174 p2p = list_entry(pos, struct scif_p2p_info,
2175 ppi_list);
2176 if (p2p->ppi_peer_id == dev_j->sd_node) {
2177 mutex_unlock(&ms_info.mi_conflock);
2178 micscif_dec_node_refcnt(dev_i, 1);
2179 return;
2180 }
2181 }
2182 }
2183
2184 p2p_ij = init_p2p_info(dev_i, dev_j);
2185 p2p_ji = init_p2p_info(dev_j, dev_i);
2186
2187 list_add_tail(&p2p_ij->ppi_list, &dev_i->sd_p2p);
2188 list_add_tail(&p2p_ji->ppi_list, &dev_j->sd_p2p);
2189
2190 /* Send a SCIF_NODE_ADD to dev_i, pass it its bus address
2191 * as seen from dev_j
2192 */
2193 msg->uop = SCIF_NODE_ADD;
2194 msg->src.node = dev_j->sd_node;
2195 msg->dst.node = dev_i->sd_node;
2196
2197 p2p_ji->ppi_mic_addr[PPI_APER] = mic_map(msg->src.node - 1,
2198 p2p_ji->ppi_pa[PPI_APER],
2199 p2p_ji->ppi_len[PPI_APER] << PAGE_SHIFT);
2200 msg->payload[0] = p2p_ji->ppi_mic_addr[PPI_APER];
2201
2202 /* addresses for node j */
2203 p2p_ij->ppi_mic_addr[PPI_MMIO] = mic_map(msg->dst.node - 1,
2204 p2p_ij->ppi_pa[PPI_MMIO],
2205 p2p_ij->ppi_len[PPI_MMIO] << PAGE_SHIFT);
2206 msg->payload[1] = p2p_ij->ppi_mic_addr[PPI_MMIO];
2207
2208 p2p_ij->ppi_mic_addr[PPI_APER] = mic_map(msg->dst.node - 1,
2209 p2p_ij->ppi_pa[PPI_APER],
2210 p2p_ij->ppi_len[PPI_APER] << PAGE_SHIFT);
2211 msg->payload[2] = p2p_ij->ppi_mic_addr[PPI_APER];
2212
2213 msg->payload[3] = p2p_ij->ppi_len[PPI_APER] << PAGE_SHIFT;
2214
2215 if ((err = micscif_nodeqp_send(dev_i, msg, NULL))) {
2216 printk(KERN_ERR "%s %d error %d\n", __func__, __LINE__, err);
2217 goto ref_nack;
2218 }
2219
2220 /* Same as above but to dev_j */
2221 msg->uop = SCIF_NODE_ADD;
2222 msg->src.node = dev_i->sd_node;
2223 msg->dst.node = dev_j->sd_node;
2224
2225 tmppayload = msg->payload[0];
2226 msg->payload[0] = msg->payload[2];
2227 msg->payload[2] = tmppayload;
2228
2229 p2p_ji->ppi_mic_addr[PPI_MMIO] = mic_map(msg->dst.node - 1, p2p_ji->ppi_pa[PPI_MMIO],
2230 p2p_ji->ppi_len[PPI_MMIO] << PAGE_SHIFT);
2231 msg->payload[1] = p2p_ji->ppi_mic_addr[PPI_MMIO];
2232 msg->payload[3] = p2p_ji->ppi_len[PPI_APER] << PAGE_SHIFT;
2233
2234 if ((err = micscif_nodeqp_send(dev_j, msg, NULL))) {
2235 printk(KERN_ERR "%s %d error %d\n", __func__, __LINE__, err);
2236 goto ref_nack;
2237 }
2238
2239 mutex_unlock(&ms_info.mi_conflock);
2240 micscif_dec_node_refcnt(dev_i, 1);
2241 return;
2242ref_nack:
2243 micscif_dec_node_refcnt(dev_i, 1);
2244nack:
2245 mutex_unlock(&ms_info.mi_conflock);
2246 msg->uop = SCIF_NODE_CONNECT_NACK;
2247 msg->dst.node = dev_j->sd_node;
2248 msg->payload[0] = bid;
2249 if ((err = micscif_nodeqp_send(dev_j, msg, NULL)))
2250 printk(KERN_ERR "%s %d error %d\n", __func__, __LINE__, err);
2251}
2252#endif /* SCIF */
2253
2254#ifdef _MIC_SCIF_
2255/**
2256 * scif_node_connect_nack_resp: Respond to SCIF_NODE_CONNECT_NACK interrupt message
2257 * @msg: Interrupt message
2258 *
2259 * Tell the node that initiated SCIF_NODE_CONNECT earlier has failed.
2260 */
2261static __always_inline void
2262scif_node_connect_nack_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
2263{
2264 struct micscif_dev *peerdev;
2265 unsigned int bid = msg->payload[0];
2266
2267 if (bid > MAX_BOARD_SUPPORTED) {
2268 printk(KERN_ERR "recieved a nack for invalid bid %d\n", bid);
2269 WARN_ON(1);
2270 return;
2271 }
2272
2273 peerdev = &scif_dev[bid];
2274 mutex_lock(&peerdev->sd_lock);
2275 peerdev->sd_state = SCIFDEV_NOTPRESENT;
2276 mutex_unlock(&peerdev->sd_lock);
2277 wake_up(&peerdev->sd_p2p_wq);
2278}
2279#endif
2280
2281/**
2282 * scif_node_add_nack_resp: Respond to SCIF_NODE_ADD_NACK interrupt message
2283 * @msg: Interrupt message
2284 *
2285 * SCIF_NODE_ADD failed, so inform the waiting wq.
2286 */
2287static __always_inline void
2288scif_node_add_nack_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
2289{
2290#ifndef _MIC_SCIF_
2291 struct micscif_dev *dst_dev = &scif_dev[msg->dst.node];
2292 pr_debug("SCIF_NODE_ADD_NACK recieved from %d \n", scifdev->sd_node);
2293 micscif_inc_node_refcnt(dst_dev, 1);
2294 micscif_nodeqp_send(dst_dev, msg, NULL);
2295 micscif_dec_node_refcnt(dst_dev, 1);
2296#else
2297 struct micscif_dev *peerdev;
2298
2299 peerdev = &scif_dev[msg->src.node];
2300
2301 if (peerdev->sd_state == SCIFDEV_NOTPRESENT)
2302 return;
2303
2304 mutex_lock(&peerdev->sd_lock);
2305 peerdev->sd_state = SCIFDEV_NOTPRESENT;
2306 mutex_unlock(&peerdev->sd_lock);
2307 wake_up(&peerdev->sd_p2p_wq);
2308#endif
2309}
2310
2311/**
2312 * scif_get_node_info_resp: Respond to SCIF_GET_NODE_INFO interrupt message
2313 * @msg: Interrupt message
2314 *
2315 * Retrieve node info i.e maxid, total and node mask from the host.
2316 */
2317static __always_inline void
2318scif_get_node_info_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
2319{
2320#ifdef _MIC_SCIF_
2321 struct get_node_info *node_info = (struct get_node_info *)msg->payload[3];
2322
2323 mutex_lock(&ms_info.mi_conflock);
2324 ms_info.mi_mask = msg->payload[0];
2325 ms_info.mi_maxid = msg->payload[1];
2326 ms_info.mi_total = msg->payload[2];
2327
2328 node_info->state = OP_COMPLETED;
2329 wake_up(&node_info->wq);
2330 mutex_unlock(&ms_info.mi_conflock);
2331#else
2332 swap(msg->dst.node, msg->src.node);
2333 mutex_lock(&ms_info.mi_conflock);
2334 msg->payload[0] = ms_info.mi_mask;
2335 msg->payload[1] = ms_info.mi_maxid;
2336 msg->payload[2] = ms_info.mi_total;
2337 mutex_unlock(&ms_info.mi_conflock);
2338
2339 if (micscif_nodeqp_send(scifdev, msg, NULL))
2340 printk(KERN_ERR "%s %d error \n", __func__, __LINE__);
2341#endif
2342}
2343
2344#ifdef ENABLE_TEST
2345static void
2346scif_test(struct micscif_dev *scifdev, struct nodemsg *msg)
2347{
2348 if (msg->payload[0] != scifdev->count) {
2349 printk(KERN_ERR "Con fail: payload == %llx\n", msg->payload[0]);
2350 scifdev->test_done = -1;
2351 } else if (scifdev->count == TEST_LOOP) {
2352 pr_debug("Test success state %d!\n", scifdev->sd_state);
2353 scifdev->test_done = 1;
2354 }
2355
2356 if (scifdev->test_done != 0) {
2357 while (scifdev->test_done != 2) {
2358 cpu_relax();
2359 schedule();
2360 }
2361
2362 destroy_workqueue(scifdev->producer);
2363 destroy_workqueue(scifdev->consumer);
2364 pr_debug("Destroyed workqueue state %d!\n", scifdev->sd_state);
2365 }
2366 scifdev->count++;
2367}
2368#endif /* ENABLE_TEST */
2369
2370static void
2371scif_msg_unknown(struct micscif_dev *scifdev, struct nodemsg *msg)
2372{
2373 /* Bogus Node Qp Message? */
2374 printk(KERN_ERR "Unknown message 0x%xn scifdev->sd_state 0x%x "
2375 "scifdev->sd_node 0x%x\n",
2376 msg->uop, scifdev->sd_state, scifdev->sd_node);
2377 BUG_ON(1);
2378}
2379
2380#ifdef _MIC_SCIF_
2381static void
2382smpt_set(struct micscif_dev *scifdev, struct nodemsg *msg)
2383{
2384 printk("msd recvd : smpt add\n");
2385 printk("dma_addr = 0x%llX, entry = 0x%llX\n", msg->payload[0], msg->payload[1]);
2386 mic_smpt_set(scif_dev->mm_sbox, msg->payload[0], msg->payload[1]);
2387}
2388#endif
2389
2390void (*scif_intr_func[SCIF_MAX_MSG + 1])(struct micscif_dev *, struct nodemsg *msg) = {
2391 scif_msg_unknown, // Error
2392 scif_init_resp, // SCIF_INIT
2393 scif_exit_resp, // SCIF_EXIT
2394 scif_nodeadd_resp, // SCIF_NODE_ADD
2395 scif_nodeaddack_resp, // SCIF_NODE_ADD_ACK
2396 scif_cnctreq_resp, // SCIF_CNCT_REQ
2397 scif_cnctgnt_resp, // SCIF_CNCT_GNT
2398 scif_cnctgntack_resp, // SCIF_CNCT_GNTACK
2399 scif_cnctgntnack_resp, // SCIF_CNCT_GNTNACK
2400 scif_cnctrej_resp, // SCIF_CNCT_REJ
2401 scif_cnctterm_resp, // SCIF_CNCT_TERM 10
2402 scif_termack_resp, // SCIF_TERM_ACK
2403 scif_discnct_resp, // SCIF_DISCNCT
2404 scif_discntack_resp, // SCIF_DISCNT_ACK
2405 scif_recv_register, // SCIF_REGISTER
2406 scif_recv_register_ack, // SCIF_REGISTER_ACK
2407 scif_recv_register_nack, // SCIF_REGISTER_NACK
2408 scif_recv_unregister, // SCIF_UNREGISTER
2409 scif_recv_unregister_ack, // SCIF_UNREGISTER_ACK
2410 scif_recv_unregister_nack, // SCIF_UNREGISTER_NACK
2411 scif_alloc_req, // SCIF_ALLOC_REQ 20
2412 scif_alloc_gnt_rej, // SCIF_ALLOC_GNT
2413 scif_alloc_gnt_rej, // SCIF_ALLOC_REJ
2414 scif_free_phys, // SCIF_FREE_PHYS
2415 scif_free_virt, // SCIF_FREE_VIRT
2416 scif_clientsend_resp, // SCIF_CLIENT_SENT
2417 scif_clientrcvd_resp, // SCIF_CLIENT_RCVD
2418 scif_recv_munmap, // SCIF_MUNMAP
2419 scif_recv_mark, // SCIF_MARK
2420 scif_recv_mark_resp, // SCIF_MARK_ACK
2421 scif_recv_mark_resp, // SCIF_MARK_NACK 30
2422 scif_recv_wait, // SCIF_WAIT
2423 scif_recv_wait_resp, // SCIF_WAIT_ACK
2424 scif_recv_wait_resp, // SCIF_WAIT_NACK
2425 scif_recv_signal_local, // SCIF_SIG_LOCAL
2426 scif_recv_signal_remote, // SCIF_SIG_REMOTE
2427 scif_recv_signal_resp, // SCIF_SIG_ACK
2428 scif_recv_signal_resp, // SCIF_SIG_NACK
2429#ifdef _MIC_SCIF_
2430 scif_msg_unknown,
2431 scif_msg_unknown,
2432 scif_msg_unknown,
2433 scif_msg_unknown,
2434 scif_msg_unknown, // SCIF_NODE_CREATE_DEP Not on card
2435 scif_msg_unknown, // SCIF_NODE_DESTROY_DEP Not on card
2436#else
2437 scif_msg_unknown,
2438 scif_msg_unknown,
2439 scif_msg_unknown,
2440 scif_msg_unknown,
2441 scif_node_create_dep, // SCIF_NODE_CREATE_DEP
2442 scif_node_destroy_dep, // SCIF_NODE_DESTROY_DEP
2443#endif
2444 scif_node_remove, // SCIF_NODE_REMOVE
2445#ifdef _MIC_SCIF_
2446 scif_msg_unknown, // SCIF_NODE_REMOVE_ACK Not on card
2447 scif_msg_unknown, // SCIF_NODE_WAKE_UP Not on card
2448#else
2449 scif_node_remove_ack, // SCIF_NODE_REMOVE_ACK
2450 scif_node_wake_up, // SCIF_NODE_WAKE_UP
2451#endif
2452 scif_node_wake_up_ack, // SCIF_NODE_WAKE_UP_ACK
2453 scif_node_wake_up_nack, // SCIF_NODE_WAKE_UP_NACK
2454#ifdef _MIC_SCIF_
2455 scif_node_alive_resp, // SCIF_NODE_ALIVE
2456 scif_msg_unknown, // SCIF_NODE_ALIVE_ACK not on card
2457 smpt_set, // SMPT_SET
2458#else
2459 scif_msg_unknown, // SCIF_NODE_ALIVE not on Host
2460 scif_node_alive_ack, // SCIF_NODE_ALIVE_ACK
2461 scif_msg_unknown, // SCIF_NODE_ALIVE not on Host
2462#endif
2463 scif_msg_unknown,
2464 scif_msg_unknown,
2465 scif_msg_unknown,
2466 scif_msg_unknown,
2467#ifdef _MIC_SCIF_
2468 scif_proxy_dma, // SCIF_PROXY_DMA only for MIC
2469 scif_proxy_ordered_dma, // SCIF_PROXY_ORDERED_DMA only for MIC
2470#else
2471 scif_msg_unknown,
2472 scif_msg_unknown,
2473#endif
2474#ifdef _MIC_SCIF_
2475 scif_msg_unknown,
2476 scif_node_connect_nack_resp, //SCIF_NODE_CONNECT_NACK
2477#else
2478 scif_node_connect_resp, //SCIF_NODE_CONNECT
2479 scif_msg_unknown,
2480#endif
2481 scif_node_add_nack_resp, //SCIF_NODE_ADD_NACK
2482 scif_get_node_info_resp, //SCIF_GET_NODE_INFO
2483#ifdef ENABLE_TEST
2484 scif_test // SCIF_TEST
2485#else
2486 scif_msg_unknown
2487#endif
2488};
2489
2490/**
2491 * scif_nodeqp_msg_hander() - Common handler for node messages
2492 * @scifdev: Remote device to respond to
2493 * @qp: Remote memory pointer
2494 * @msg: The message to be handled.
2495 *
2496 * This routine calls the appriate routine to handle a Node Qp message receipt.
2497 */
2498int micscif_max_msg_id = SCIF_MAX_MSG;
2499
2500static void
2501micscif_nodeqp_msg_handler(struct micscif_dev *scifdev, struct micscif_qp *qp, struct nodemsg *msg)
2502{
2503 micscif_display_message(scifdev, msg, "Rcvd");
2504
2505 if (msg->uop > (uint32_t)micscif_max_msg_id) {
2506 /* Bogus Node Qp Message? */
2507 printk(KERN_ERR "Unknown message 0x%xn scifdev->sd_state 0x%x "
2508 "scifdev->sd_node 0x%x\n",
2509 msg->uop, scifdev->sd_state, scifdev->sd_node);
2510 BUG_ON(1);
2511 }
2512
2513 scif_intr_func[msg->uop](scifdev, msg);
2514}
2515
2516/**
2517 * scif_nodeqp_intrhander() - Interrupt handler for node messages
2518 * @scifdev: Remote device to respond to
2519 * @qp: Remote memory pointer
2520 *
2521 * This routine is triggered by the interrupt mechanism. It reads
2522 * messages from the node queue RB and calls the Node QP Message handling
2523 * routine.
2524 */
2525int
2526micscif_nodeqp_intrhandler(struct micscif_dev *scifdev, struct micscif_qp *qp)
2527{
2528 struct nodemsg msg;
2529 int read_size;
2530
2531 do {
2532#ifndef _MIC_SCIF_
2533 if (qp->blast) {
2534 scif_wakeup_ep(SCIF_WAKE_UP_RECV);
2535 qp->blast = 0;
2536 }
2537#endif
2538 if (SCIFDEV_STOPPED == scifdev->sd_state)
2539 return 0;
2540 read_size = micscif_rb_get_next(&qp->inbound_q, &msg,
2541 sizeof(msg));
2542 /* Stop handling messages if an oops is in progress */
2543 if (read_size != sizeof(msg) || oops_in_progress)
2544 break;
2545#ifndef _MIC_SCIF_
2546 atomic_set(&scifdev->sd_node_alive, 1);
2547#endif
2548
2549 micscif_inc_node_refcnt(scifdev, 1);
2550 micscif_nodeqp_msg_handler(scifdev, qp, &msg);
2551 /*
2552 * The reference count is reset to SCIF_NODE_IDLE
2553 * during scif device cleanup so decrementing the
2554 * reference count further is not required.
2555 */
2556 if (SCIFDEV_INIT == scifdev->sd_state)
2557 return 0;
2558 if (SCIFDEV_STOPPED == scifdev->sd_state) {
2559 micscif_dec_node_refcnt(scifdev, 1);
2560 return 0;
2561 }
2562 micscif_rb_update_read_ptr(&qp->inbound_q);
2563 micscif_dec_node_refcnt(scifdev, 1);
2564 } while (read_size == sizeof(msg));
2565#ifdef _MIC_SCIF_
2566 /*
2567 * Keep polling the Node QP RB in case there are active SCIF
2568 * P2P connections to provide better Node QP responsiveness
2569 * in anticipation of P2P Proxy DMA requests for performance.
2570 */
2571 if (scifdev->sd_proxy_dma_reads &&
2572 scifdev->num_active_conn &&
2573 SCIFDEV_STOPPED != scifdev->sd_state) {
2574 queue_work(scifdev->sd_intr_wq, &scifdev->sd_intr_bh);
2575 schedule();
2576 }
2577#endif
2578 return read_size;
2579}
2580
2581/**
2582 * micscif_loopb_wq_handler - Loopback Workqueue Handler.
2583 * @work: loop back work
2584 *
2585 * This work queue routine is invoked by the loopback work queue handler.
2586 * It grabs the recv lock, dequeues any available messages from the head
2587 * of the loopback message list, calls the node QP message handler,
2588 * waits for it to return, then frees up this message and dequeues more
2589 * elements of the list if available.
2590 */
2591static void micscif_loopb_wq_handler(struct work_struct *work)
2592{
2593 struct micscif_dev *scifdev =
2594 container_of(work, struct micscif_dev, sd_loopb_work);
2595 struct micscif_qp *qp = micscif_nodeqp_nextmsg(scifdev);
2596 struct loopb_msg *msg;
2597
2598 do {
2599 msg = NULL;
2600 spin_lock(&qp->qp_recv_lock);
2601 if (!list_empty(&scifdev->sd_loopb_recv_q)) {
2602 msg = list_first_entry(&scifdev->sd_loopb_recv_q,
2603 struct loopb_msg, list_member);
2604 list_del(&msg->list_member);
2605 }
2606 spin_unlock(&qp->qp_recv_lock);
2607
2608 if (msg) {
2609 micscif_nodeqp_msg_handler(scifdev, qp, &msg->msg);
2610 kfree(msg);
2611 }
2612 } while (msg);
2613}
2614
2615/**
2616 * micscif_loopb_msg_handler() - Workqueue handler for loopback messages.
2617 * @scifdev: SCIF device
2618 * @qp: Queue pair.
2619 *
2620 * This work queue routine is triggered when a loopback message is received.
2621 *
2622 * We need special handling for receiving Node Qp messages on a loopback SCIF
2623 * device via two workqueues for receiving messages.
2624 *
2625 * The reason we need the extra workqueue which is not required with *normal*
2626 * non-loopback SCIF devices is the potential classic deadlock described below:
2627 *
2628 * Thread A tries to send a message on a loopback SCIF devide and blocks since
2629 * there is no space in the RB while it has the qp_send_lock held or another
2630 * lock called lock X for example.
2631 *
2632 * Thread B: The Loopback Node QP message receive workqueue receives the message
2633 * and tries to send a message (eg an ACK) to the loopback SCIF device. It tries
2634 * to grab the send lock again or lock X and deadlocks with Thread A. The RB
2635 * cannot be drained any further due to this classic deadlock.
2636 *
2637 * In order to avoid deadlocks as mentioned above we have an extra level of
2638 * indirection achieved by having two workqueues.
2639 * 1) The first workqueue whose handler is micscif_loopb_msg_handler reads
2640 * messages from the Node QP RB, adds them to a list and queues work for the
2641 * second workqueue.
2642 *
2643 * 2) The second workqueue whose handler is micscif_loopb_wq_handler dequeues
2644 * messages from the list, handles them, frees up the memory and dequeues
2645 * more elements from the list if possible.
2646 */
2647int
2648micscif_loopb_msg_handler(struct micscif_dev *scifdev, struct micscif_qp *qp)
2649{
2650 int read_size;
2651 struct loopb_msg *msg;
2652
2653 do {
2654 if (!(msg = kmalloc(sizeof(struct loopb_msg), GFP_KERNEL))) {
2655 printk(KERN_ERR "%s %d ENOMEM\n", __func__, __LINE__);
2656 return -ENOMEM;
2657 }
2658
2659 read_size = micscif_rb_get_next(&qp->inbound_q, &msg->msg,
2660 sizeof(struct nodemsg));
2661
2662 if (read_size != sizeof(struct nodemsg)) {
2663 kfree(msg);
2664 micscif_rb_update_read_ptr(&qp->inbound_q);
2665 break;
2666 }
2667
2668 spin_lock(&qp->qp_recv_lock);
2669 list_add_tail(&msg->list_member, &scifdev->sd_loopb_recv_q);
2670 spin_unlock(&qp->qp_recv_lock);
2671 queue_work(scifdev->sd_loopb_wq, &scifdev->sd_loopb_work);
2672 micscif_rb_update_read_ptr(&qp->inbound_q);
2673 } while (read_size == sizeof(struct nodemsg));
2674 return read_size;
2675}
2676
2677/**
2678 * micscif_setup_loopback_qp - One time setup work for Loopback Node Qp.
2679 * @scifdev: SCIF device
2680 *
2681 * Sets up the required loopback workqueues, queue pairs, ring buffers
2682 * and also tests out the Queue Pairs.
2683 */
2684int micscif_setup_loopback_qp(struct micscif_dev *scifdev)
2685{
2686 int err = 0;
2687 void *local_q;
2688 struct micscif_qp *qp;
2689
2690 /* Set up the work queues */
2691 if ((err = micscif_setup_interrupts(scifdev)))
2692 goto error;
2693
2694 INIT_LIST_HEAD(&scifdev->sd_loopb_recv_q);
2695 snprintf(scifdev->sd_loopb_wqname, sizeof(scifdev->sd_loopb_wqname),
2696 "SCIF LOOPB %d", scifdev->sd_node);
2697 if (!(scifdev->sd_loopb_wq =
2698 __mic_create_singlethread_workqueue(scifdev->sd_loopb_wqname))){
2699 err = -ENOMEM;
2700 goto destroy_intr_wq;
2701 }
2702 INIT_WORK(&scifdev->sd_loopb_work, micscif_loopb_wq_handler);
2703 /* Allocate Self Qpair */
2704 scifdev->n_qpairs = 1;
2705 scifdev->qpairs = (struct micscif_qp *)kzalloc(sizeof(struct micscif_qp), GFP_KERNEL);
2706 if (!scifdev->qpairs) {
2707 printk(KERN_ERR "Node QP Allocation failed\n");
2708 err = -ENOMEM;
2709 goto destroy_loopb_wq;
2710 }
2711
2712 qp = scifdev->qpairs;
2713 qp->magic = SCIFEP_MAGIC;
2714 spin_lock_init(&qp->qp_send_lock);
2715 spin_lock_init(&qp->qp_recv_lock);
2716 init_waitqueue_head(&scifdev->sd_mmap_wq);
2717
2718 local_q = kzalloc(NODE_QP_SIZE, GFP_KERNEL);
2719 if (!local_q) {
2720 printk(KERN_ERR "Ring Buffer Allocation Failed\n");
2721 err = -ENOMEM;
2722 goto free_qpairs;
2723 }
2724
2725 /*
2726 * For loopback the inbound_q and outbound_q are essentially the same
2727 * since the Node sends a message on the loopback interface to the
2728 * outbound_q which is then received on the inbound_q.
2729 */
2730 micscif_rb_init(&qp->outbound_q,
2731 &(scifdev->qpairs[0].local_read),
2732 &(scifdev->qpairs[0].local_write),
2733 local_q,
2734 NODE_QP_SIZE);
2735
2736 micscif_rb_init(&(qp->inbound_q),
2737 &(scifdev->qpairs[0].local_read),
2738 &(scifdev->qpairs[0].local_write),
2739 local_q,
2740 NODE_QP_SIZE);
2741
2742 /* Launch the micscif_rb test */
2743#ifdef ENABLE_TEST
2744 micscif_qp_testboth(scifdev);
2745#endif
2746 return err;
2747free_qpairs:
2748 kfree(scifdev->qpairs);
2749destroy_loopb_wq:
2750 destroy_workqueue(scifdev->sd_loopb_wq);
2751destroy_intr_wq:
2752 destroy_workqueue(scifdev->sd_intr_wq);
2753error:
2754 return err;
2755}
2756
2757/**
2758 * micscif_destroy_loopback_qp - One time uninit work for Loopback Node Qp
2759 * @scifdev: SCIF device
2760 *
2761 * Detroys the workqueues and frees up the Ring Buffer and Queue Pair memory.
2762 */
2763int micscif_destroy_loopback_qp(struct micscif_dev *scifdev)
2764{
2765 micscif_destroy_interrupts(scifdev);
2766 destroy_workqueue(scifdev->sd_loopb_wq);
2767 kfree((void *)scifdev->qpairs->outbound_q.rb_base);
2768 kfree(scifdev->qpairs);
2769 return 0;
2770}
2771
2772#ifndef _MIC_SCIF_
2773void micscif_destroy_p2p(mic_ctx_t *mic_ctx)
2774{
2775 mic_ctx_t * mic_ctx_peer;
2776 struct micscif_dev *mic_scif_dev;
2777 struct micscif_dev *peer_dev;
2778 struct scif_p2p_info *p2p;
2779 struct list_head *pos, *tmp;
2780 uint32_t bd;
2781
2782 if (!mic_p2p_enable)
2783 return;
2784
2785
2786 /* FIXME: implement node deletion */
2787 mic_scif_dev = &scif_dev[mic_get_scifnode_id(mic_ctx)];
2788
2789 /* Free P2P mappings in the given node for all its peer nodes */
2790 list_for_each_safe(pos, tmp, &mic_scif_dev->sd_p2p) {
2791 p2p = list_entry(pos, struct scif_p2p_info,
2792 ppi_list);
2793
2794 mic_unmap(mic_ctx->bi_id, p2p->ppi_mic_addr[PPI_MMIO],
2795 p2p->ppi_len[PPI_MMIO] << PAGE_SHIFT);
2796 mic_unmap(mic_ctx->bi_id, p2p->ppi_mic_addr[PPI_APER],
2797 p2p->ppi_len[PPI_APER] << PAGE_SHIFT);
2798 pci_unmap_sg(mic_ctx->bi_pdev,
2799 p2p->ppi_sg[PPI_MMIO], p2p->sg_nentries[PPI_MMIO], PCI_DMA_BIDIRECTIONAL);
2800 micscif_p2p_freesg(p2p->ppi_sg[PPI_MMIO]);
2801 pci_unmap_sg(mic_ctx->bi_pdev,
2802 p2p->ppi_sg[PPI_APER], p2p->sg_nentries[PPI_APER], PCI_DMA_BIDIRECTIONAL);
2803 micscif_p2p_freesg(p2p->ppi_sg[PPI_APER]);
2804 list_del(pos);
2805 kfree(p2p);
2806 }
2807
2808 /* Free P2P mapping created in the peer nodes for the given node */
2809 for (bd = SCIF_HOST_NODE + 1; bd <= ms_info.mi_maxid; bd++) {
2810 peer_dev = &scif_dev[bd];
2811
2812 list_for_each_safe(pos, tmp, &peer_dev->sd_p2p) {
2813 p2p = list_entry(pos, struct scif_p2p_info,
2814 ppi_list);
2815 if (p2p->ppi_peer_id == mic_get_scifnode_id(mic_ctx)) {
2816
2817 mic_ctx_peer = get_per_dev_ctx(peer_dev->sd_node - 1);
2818 mic_unmap(mic_ctx_peer->bi_id, p2p->ppi_mic_addr[PPI_MMIO],
2819 p2p->ppi_len[PPI_MMIO] << PAGE_SHIFT);
2820 mic_unmap(mic_ctx_peer->bi_id, p2p->ppi_mic_addr[PPI_APER],
2821 p2p->ppi_len[PPI_APER] << PAGE_SHIFT);
2822 pci_unmap_sg(mic_ctx_peer->bi_pdev,
2823 p2p->ppi_sg[PPI_MMIO], p2p->sg_nentries[PPI_MMIO], PCI_DMA_BIDIRECTIONAL);
2824 micscif_p2p_freesg(p2p->ppi_sg[PPI_MMIO]);
2825 pci_unmap_sg(mic_ctx_peer->bi_pdev, p2p->ppi_sg[PPI_APER],
2826 p2p->sg_nentries[PPI_APER], PCI_DMA_BIDIRECTIONAL);
2827 micscif_p2p_freesg(p2p->ppi_sg[PPI_APER]);
2828 list_del(pos);
2829 kfree(p2p);
2830 }
2831 }
2832 }
2833}
2834#endif
2835
2836/**
2837 * ONLY TEST CODE BELOW
2838 */
2839#ifdef ENABLE_TEST
2840#include <linux/sched.h>
2841#include <linux/workqueue.h>
2842#include "mic/micscif_nodeqp.h"
2843
2844static void micscif_rb_trigger_consumer(struct work_struct *work)
2845{
2846 struct micscif_dev *scifdev = container_of(work, struct micscif_dev, consumer_work);
2847
2848 while (scifdev->test_done == 0) {
2849 cpu_relax();
2850 schedule();
2851 }
2852 if (scifdev->test_done != 1)
2853 printk(KERN_ERR "Consumer failed!\n");
2854 else
2855 pr_debug("Test finished: Success\n");
2856 scifdev->test_done = 2;
2857}
2858
2859/**
2860 * micscif_rb_trigger_producer
2861 * This is the producer thread to create messages and update the
2862 * RB write offset accordingly.
2863 */
2864static void micscif_rb_trigger_producer(struct work_struct *work)
2865{
2866 struct nodemsg msg;
2867 int count = 0;
2868 struct micscif_dev *scifdev = container_of(work, struct micscif_dev, producer_work);
2869
2870 msg.dst.node = scifdev->sd_node;
2871 msg.uop = SCIF_TEST;
2872
2873 while (count <= TEST_LOOP) {
2874 msg.payload[0] = count++;
2875 micscif_nodeqp_send(scifdev, &msg, NULL);
2876 /* pr_debug(("Prod payload %llu\n", msg.payload[0]); */
2877 }
2878}
2879
2880/* this is called from the host and the card at the same time on a queue pair.
2881 * Each sets up a producer and a consumer and spins on the queue pair until done
2882 */
2883static void micscif_qp_testboth(struct micscif_dev *scifdev)
2884{
2885 scifdev->count = 0;
2886 scifdev->test_done = 0;
2887 snprintf(scifdev->producer_name, sizeof(scifdev->producer_name),
2888 "PRODUCER %d", scifdev->sd_node);
2889 snprintf(scifdev->consumer_name, sizeof(scifdev->consumer_name),
2890 "CONSUMER %d", scifdev->sd_node);
2891 scifdev->producer =
2892 __mic_create_singlethread_workqueue(scifdev->producer_name);
2893 scifdev->consumer =
2894 __mic_create_singlethread_workqueue(scifdev->consumer_name);
2895
2896 INIT_WORK(&scifdev->producer_work, micscif_rb_trigger_producer);
2897 INIT_WORK(&scifdev->consumer_work, micscif_rb_trigger_consumer);
2898
2899 queue_work(scifdev->producer, &scifdev->producer_work);
2900 queue_work(scifdev->consumer, &scifdev->consumer_work);
2901}
2902#endif