Initial commit of files contained in `mpss-modules-3.8.6.tar.bz2` for Intel Xeon...
[xeon-phi-kernel-module] / include / mic / micscif.h
CommitLineData
800f879a
AT
1/*
2 * Copyright 2010-2017 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2,
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * Disclaimer: The codes contained in these modules may be specific to
14 * the Intel Software Development Platform codenamed Knights Ferry,
15 * and the Intel product codenamed Knights Corner, and are not backward
16 * compatible with other Intel products. Additionally, Intel will NOT
17 * support the codes or instruction set in future products.
18 *
19 * Intel offers no warranty of any kind regarding the code. This code is
20 * licensed on an "AS IS" basis and Intel is not obligated to provide
21 * any support, assistance, installation, training, or other services
22 * of any kind. Intel is also not obligated to provide any updates,
23 * enhancements or extensions. Intel specifically disclaims any warranty
24 * of merchantability, non-infringement, fitness for any particular
25 * purpose, and any other warranty.
26 *
27 * Further, Intel disclaims all liability of any kind, including but
28 * not limited to liability for infringement of any proprietary rights,
29 * relating to the use of the code, even if Intel is notified of the
30 * possibility of such liability. Except as expressly stated in an Intel
31 * license agreement provided with this code and agreed upon with Intel,
32 * no license, express or implied, by estoppel or otherwise, to any
33 * intellectual property rights is granted herein.
34 */
35
36#ifndef MICSCIF_H
37#define MICSCIF_H
38
39#include <linux/errno.h>
40#include <linux/hardirq.h>
41#include <linux/types.h>
42#include <linux/capability.h>
43#include <linux/slab.h>
44#include <linux/string.h>
45#include <linux/gfp.h>
46#include <linux/vmalloc.h>
47#include <asm/io.h>
48#include <linux/kernel.h>
49#include <linux/mm_types.h>
50#include <linux/jiffies.h>
51#include <linux/timer.h>
52#include <linux/irqflags.h>
53#include <linux/time.h>
54#include <linux/spinlock.h>
55#include <linux/mutex.h>
56#include <linux/semaphore.h>
57#include <linux/kthread.h>
58#include <linux/sched.h>
59#include <linux/delay.h>
60#include <linux/wait.h>
61#include <asm/bug.h>
62#include <linux/pci.h>
63#include <linux/device.h>
64#include <linux/fs.h>
65#include <linux/list.h>
66#include <linux/workqueue.h>
67#include <linux/interrupt.h>
68#include <asm/atomic.h>
69#include <linux/netdevice.h>
70#include <linux/debugfs.h>
71
72#ifdef _MODULE_SCIF_
73#include <linux/mman.h>
74#include <linux/pagemap.h>
75#include <asm/uaccess.h>
76#include <linux/poll.h>
77#include <linux/mmzone.h>
78#include <linux/version.h>
79#endif /* MODULE_SCIF */
80
81#include <linux/notifier.h>
82#include "scif.h"
83#include "mic/micbaseaddressdefine.h"
84#include "mic/micsboxdefine.h"
85
86/* The test runs in a separate thread context from the bottom
87 * half that processes messages from the card and setup p2p
88 * when these run concurrently, p2p messages get lost since they
89 * may be consumed by the test thread
90 */
91//#define ENABLE_TEST // Used to enable testing at board connect
92#ifdef MIC_IS_EMULATION
93#define TEST_LOOP 2
94#else
95#define TEST_LOOP 2000
96#endif
97
98//#define P2P_HACK 0
99#include "scif.h"
100#include "scif_ioctl.h"
101
102#define SCIF_READY_MAGIC_NUM 0x1eedfee0
103
104#ifndef SCIF_MAJOR
105#define SCIF_MAJOR 0 /* Use dynamic major number by default */
106#endif
107
108#define SCIF_HOST_NODE 0 // By default the host is always node zero
109
110#define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000
111/*
112 * The overhead for proxying a P2P DMA read to convert it to
113 * a DMA write by sending a SCIF Node QP message has been
114 * seen to be higher than programming a P2P DMA Read on self
115 * for transfer sizes less than the PROXY_DMA_THRESHOLD.
116 * The minimum threshold is different for Jaketown versus
117 * Ivytown and tuned for best DMA performance.
118 */
119#define SCIF_PROXY_DMA_THRESHOLD_JKT (32 * 1024ULL)
120#define SCIF_PROXY_DMA_THRESHOLD_IVT (1024 * 1024ULL)
121
122//#define RMA_DEBUG 0
123
124/* Pre-defined L1_CACHE_SHIFT is 6 on RH and 7 on Suse */
125#undef L1_CACHE_SHIFT
126#define L1_CACHE_SHIFT 6
127#undef L1_CACHE_BYTES
128#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
129
130#define MI_EPLOCK_HELD (true)
131#define MAX_RDMASR 8
132
133// Device wide SCIF information
134struct micscif_info {
135 uint32_t mi_nodeid; // Node ID this node is to others.
136
137 struct mutex mi_conflock; // Configuration lock (used in p2p setup)
138 uint32_t mi_maxid; // Max known board ID
139 uint32_t mi_total; // Total number of running interfaces
140 uint32_t mi_nr_zombies; // Keep track of the number of zombie EP.
141 unsigned long mi_mask; // bit mask of online scif interfaces
142 uint64_t mi_nr_ioremap; // Keep track of number of ioremap() calls on the host
143 // to decide when to purge aliases for performance.
144 spinlock_t mi_eplock;
145 spinlock_t mi_connlock;
146 spinlock_t mi_rmalock; // Synchronize access to list of temporary registered
147 // windows to be destroyed.
148 struct mutex mi_fencelock; // Synchronize access to list of remote fences requested.
149 struct mutex mi_event_cblock;
150 spinlock_t mi_nb_connect_lock;
151
152 struct list_head mi_uaccept; // List of user acceptreq waiting for acceptreg
153 struct list_head mi_listen; // List of listening end points
154 struct list_head mi_zombie; // List of zombie end points with pending RMA's.
155 struct list_head mi_connected; // List of end points in connected state
156 struct list_head mi_disconnected; // List of end points in disconnected state
157 struct list_head mi_rma; // List of temporary registered windows to be destroyed.
158 struct list_head mi_rma_tc; // List of temporary
159 // registered & cached windows
160 // to be destroyed.
161 struct list_head mi_fence; // List of remote fence requests.
162 struct list_head mi_event_cb; /* List of event handlers registered */
163 struct list_head mi_nb_connect_list;
164#ifdef CONFIG_MMU_NOTIFIER
165 struct list_head mi_mmu_notif_cleanup;
166#endif
167 struct notifier_block mi_panic_nb;
168#ifndef _MIC_SCIF_
169 /* The host needs to keep track of node dependencies in form of graph.
170 * This will need to be dynamically grown to support hotplug.
171 */
172 uint32_t **mi_depmtrx;
173 /*
174 * Wait queue used for blocking while waiting for nodes
175 * to respond for disconnect message sent from host.
176 */
177 wait_queue_head_t mi_disconn_wq;
178 /* stus of node remove operation*/
179 uint64_t mi_disconnect_status;
180 atomic_long_t mi_unique_msgid;
181#endif
182 /*
183 * Watchdog timeout on the host. Timer expiry will result in the host
184 * treating the remote node as a lost node. Default value is
185 * DEFAULT_WATCHDOG_TO and can be modified to a value greater than 1
186 * second via SCIF sysfs watchdog_to entry.
187 */
188 int mi_watchdog_to; // Watchdog timeout
189 int mi_watchdog_enabled; // Watchdog timeout enabled
190 int mi_watchdog_auto_reboot; // Watchdog auto reboot enabled
191 struct workqueue_struct *mi_misc_wq; // Workqueue for miscellaneous SCIF tasks.
192 struct work_struct mi_misc_work;
193#ifdef CONFIG_MMU_NOTIFIER
194 struct workqueue_struct *mi_mmu_notif_wq; // Workqueue for MMU notifier cleanup tasks.
195 struct work_struct mi_mmu_notif_work;
196#endif
197 int nr_gtt_entries; // GTT Debug Counter to detect leaks
198 uint64_t nr_2mb_pages; // Debug Counter for number of 2mb pages.
199 uint64_t nr_4k_pages; // Debug Counter for number of 4K pages
200 uint8_t en_msg_log;
201 wait_queue_head_t mi_exitwq;
202 unsigned long mi_rma_tc_limit;
203 uint64_t mi_proxy_dma_threshold;
204#ifdef RMA_DEBUG
205 atomic_long_t rma_mm_cnt;
206 atomic_long_t rma_unaligned_cpu_cnt;
207 atomic_long_t rma_alloc_cnt;
208 atomic_long_t rma_pin_cnt;
209#ifdef CONFIG_MMU_NOTIFIER
210 atomic_long_t mmu_notif_cnt;
211#endif
212#endif
213#ifdef _MIC_SCIF_
214 int mi_intr_rcnt[MAX_RDMASR]; // Ref count to track SCIF Interrupt Handlers
215#endif
216 struct workqueue_struct *mi_conn_wq;
217 struct work_struct mi_conn_work;
218};
219
220extern struct micscif_info ms_info;
221
222#define SCIF_NODE_MAGIC_BIT 63
223/* Magic value used to indicate a remote idle node without grabbing any locks */
224#define SCIF_NODE_IDLE (1ULL << SCIF_NODE_MAGIC_BIT)
225
226enum scif_state {
227 SCIFDEV_NOTPRESENT,
228 SCIFDEV_INIT,
229 SCIFDEV_RUNNING,
230 SCIFDEV_SLEEPING,
231 SCIFDEV_STOPPING,
232 SCIFDEV_STOPPED
233};
234
235extern bool mic_p2p_enable;
236extern bool mic_p2p_proxy_enable;
237extern bool mic_reg_cache_enable;
238extern bool mic_ulimit_check;
239/* p2p mapping from node id to peer id */
240struct scif_p2p_info {
241 int ppi_peer_id;
242 struct scatterlist *ppi_sg[2];
243 uint64_t sg_nentries[2]; // no of entries in scatterlists
244 dma_addr_t ppi_pa[2]; // one for mmio; one for aper
245 dma_addr_t ppi_mic_addr[2]; // one for mmio; one for aper
246 uint64_t ppi_len[2];
247#define PPI_MMIO 0
248#define PPI_APER 1
249 enum scif_state ppi_disc_state; //Disconnection state of this peer node.
250 struct list_head ppi_list;
251};
252
253/* one per remote node */
254struct micscif_dev {
255 uint16_t sd_node;
256 enum scif_state sd_state;
257 volatile void *mm_sbox;
258 uint64_t sd_base_addr; /* Remote node's base bus addr
259 * for the local node's aperture
260 */
261#ifndef _MIC_SCIF_
262 struct list_head sd_p2p; /* List of bus addresses for
263 * other nodes, these are allocated
264 * by the host driver and are
265 * valid only on the host node
266 */
267 struct delayed_work sd_watchdog_work;
268 wait_queue_head_t sd_watchdog_wq;
269 struct workqueue_struct *sd_ln_wq;
270 char sd_ln_wqname[16];
271#endif
272
273 int n_qpairs; /* FIXME:
274 * This is always set to 1,
275 */
276
277 struct micscif_qp *qpairs; /* Same FIXME as above
278 * There is single qp established
279 * with this remote node
280 */
281
282 struct workqueue_struct *sd_intr_wq; /* sd_intr_wq & sd_intr_bh
283 * together constitute the workqueue
284 * infrastructure needed to
285 * run the bottom half handler
286 * for messages received from
287 * this node
288 */
289 char sd_intr_wqname[16];
290 struct work_struct sd_intr_bh;
291 unsigned int sd_intr_handle;
292 uint32_t sd_rdmasr;
293 struct workqueue_struct *sd_loopb_wq;
294 char sd_loopb_wqname[16];
295 struct work_struct sd_loopb_work;
296 struct list_head sd_loopb_recv_q;
297 /* Lock to synchronize remote node state transitions */
298 struct mutex sd_lock;
299 /*
300 * Global Ref count per SCIF device tracking all SCIF API's which
301 * might communicate across PCIe.
302 */
303 atomic_long_t scif_ref_cnt;
304 /*
305 * Global Ref count per SCIF device tracking scif_mmap()/
306 * scif_get_pages(). sd_lock protects scif_map_ref_cnt
307 * hence it does not need to be an atomic operation. Note that
308 * scif_mmap()/scif_get_pages() is not in the critical
309 * perf path.
310 */
311 int scif_map_ref_cnt;
312 /*
313 * Wait queue used for blocking while waiting for nodes
314 * to wake up or to be removed.
315 */
316 wait_queue_head_t sd_wq;
317 uint64_t sd_wait_status;
318#ifdef _MIC_SCIF_
319 wait_queue_head_t sd_p2p_wq;
320 bool sd_proxy_dma_reads;
321 struct delayed_work sd_p2p_dwork;
322 int sd_p2p_retry;
323#endif
324 /*
325 * The NUMA node the peer is attached to on the host.
326 */
327 int sd_numa_node;
328 /*
329 * Waitqueue for blocking while waiting for remote memory
330 * mappings to drop to zero.
331 */
332 wait_queue_head_t sd_mmap_wq;
333
334 /* When a nodeqp message is received, this is set.
335 * And it is reset by the watchdog time */
336 atomic_t sd_node_alive;
337 int num_active_conn;
338#ifdef ENABLE_TEST
339 struct workqueue_struct *producer;
340 struct workqueue_struct *consumer;
341 char producer_name[16];
342 char consumer_name[16];
343 struct work_struct producer_work;
344 struct work_struct consumer_work;
345 int count;
346 int test_done;
347#endif // ENABLE_TEST
348};
349
350extern struct micscif_dev scif_dev[];
351
352#include "mic/micscif_nodeqp.h"
353#include "mic/micscif_nm.h"
354#include "mic/micscif_smpt.h"
355#include "mic/micscif_va_gen.h"
356#include "mic/mic_dma_api.h"
357#include "mic/mic_dma_lib.h"
358#include "mic/micscif_rma.h"
359#include "mic/micscif_rma_list.h"
360
361/*
362 * data structure used to sync SCIF_GET_NODE_INFO messaging
363 */
364struct get_node_info {
365 enum micscif_msg_state state;
366 wait_queue_head_t wq;
367};
368
369static inline uint64_t align_low(uint64_t data, uint32_t granularity)
370{
371 return ALIGN(data - (granularity - 1), granularity);
372}
373
374#define SCIF_MIN(a, b) (((a) < (b)) ? (a) : (b))
375#define SCIF_MAX(a, b) (((a) > (b)) ? (a) : (b))
376
377enum endptstate {
378 SCIFEP_CLOSED, // Internal state
379 SCIFEP_UNBOUND, // External state
380 SCIFEP_BOUND, // External state
381 SCIFEP_LISTENING, // External state
382 SCIFEP_CONNECTED, // External state
383 SCIFEP_CONNECTING, // Internal state
384 SCIFEP_MAPPING, // Internal state
385 SCIFEP_CLOSING, // Internal state
386 SCIFEP_CLLISTEN, // Internal state
387 SCIFEP_DISCONNECTED, // Internal state
388 SCIFEP_ZOMBIE // Internal state
389};
390
391extern char *scif_ep_states[];
392
393// Used for coordinating connection accept sequence. This is the data structure
394// for the conlist in the endpoint.
395struct conreq {
396 struct nodemsg msg;
397 struct list_head list;
398};
399
400/* Size of the RB for the Node QP */
401#define NODE_QP_SIZE 0x10000
402/* Size of the RB for the Endpoint QP */
403#define ENDPT_QP_SIZE 0x1000
404
405struct endpt_qp_info {
406 /* Qpair for this endpoint */
407 struct micscif_qp *qp;
408 /*
409 * Physical addr of the QP for Host or
410 * GTT offset of the QP for MIC.
411 * Required for unmapping the QP during close.
412 */
413 dma_addr_t qp_offset;
414 /*
415 * Payload in a SCIF_CNCT_GNT message containing the
416 * physical address of the remote_qp.
417 */
418 dma_addr_t cnct_gnt_payload;
419};
420
421#define SCIFEP_MAGIC 0x5c1f000000005c1f
422
423struct endpt {
424 volatile enum endptstate state;
425 spinlock_t lock;
426
427 struct scif_portID port;
428 struct scif_portID peer;
429
430 int backlog;
431
432 struct endpt_qp_info qp_info;
433 struct endpt_rma_info rma_info;
434 /*
435 * scifdev used by this endpt to communicate with remote node.
436 */
437 struct micscif_dev *remote_dev;
438 uint64_t remote_ep;
439 /*
440 * Keep track of number of connection requests.
441 */
442 int conreqcnt;
443 /*
444 * Cache remote SCIF device state.
445 */
446 enum scif_state sd_state;
447 /*
448 * True if the endpoint was created
449 * via scif_accept(..).
450 */
451 bool accepted_ep;
452 /*
453 * Open file information used to match the id passed
454 * in with the flush routine.
455 */
456 struct files_struct *files;
457 /*
458 * Reference count for functions using this endpoint.
459 */
460 struct kref ref_count;
461 struct list_head conlist;
462 wait_queue_head_t conwq;
463 wait_queue_head_t disconwq;
464 wait_queue_head_t diswq;
465 wait_queue_head_t sendwq;
466 wait_queue_head_t recvwq;
467 struct mutex sendlock;
468 struct mutex recvlock;
469 struct list_head list;
470
471#ifdef CONFIG_MMU_NOTIFIER
472 struct list_head mmu_list;
473#endif
474
475 struct list_head li_accept; /* pending ACCEPTREG */
476 int acceptcnt; /* pending ACCEPTREG cnt */
477 struct list_head liacceptlist; /* link to listen accept */
478 struct list_head miacceptlist; /* link to mi_uaccept */
479 struct endpt *listenep; /* associated listen ep */
480
481 /* Non-blocking connect */
482 struct work_struct conn_work;
483 struct scif_portID conn_port;
484 int conn_err;
485 int conn_async_state;
486 wait_queue_head_t conn_pend_wq;
487 struct list_head conn_list;
488};
489
490static __always_inline void
491micscif_queue_for_cleanup(struct reg_range_t *window, struct list_head *list)
492{
493 struct endpt *ep = (struct endpt *)window->ep;
494 INIT_LIST_HEAD(&window->list_member);
495 window->dma_mark = get_dma_mark(ep->rma_info.dma_chan);
496 spin_lock(&ms_info.mi_rmalock);
497 list_add_tail(&window->list_member, list);
498 spin_unlock(&ms_info.mi_rmalock);
499 queue_work(ms_info.mi_misc_wq, &ms_info.mi_misc_work);
500}
501
502static __always_inline void
503__micscif_rma_destroy_tcw_helper(struct reg_range_t *window)
504{
505 list_del(&window->list_member);
506 micscif_queue_for_cleanup(window, &ms_info.mi_rma_tc);
507}
508
509void print_ep_state(struct endpt *ep, char *label);
510
511// Function prototypes needed by Unix/Linux drivers linking to scif
512int scif_fdopen(struct file *f);
513int scif_fdclose(struct file *f);
514int scif_process_ioctl(struct file *f, unsigned int cmd, uint64_t arg);
515int micscif_mmap(struct file *file, struct vm_area_struct *vma);
516int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd);
517void scif_munmap(struct vm_area_struct *vma);
518void scif_proc_init(void);
519void scif_proc_cleanup(void);
520int scif_user_send(scif_epd_t epd, void *msg, int len, int flags);
521int scif_user_recv(scif_epd_t epd, void *msg, int len, int flags);
522int __scif_pin_pages(void *addr, size_t len, int *out_prot,
523 int map_flags, scif_pinned_pages_t *pages);
524scif_epd_t __scif_open(void);
525int __scif_bind(scif_epd_t epd, uint16_t pn);
526int __scif_listen(scif_epd_t epd, int backlog);
527int __scif_connect(scif_epd_t epd, struct scif_portID *dst, bool non_block);
528int __scif_accept(scif_epd_t epd, struct scif_portID *peer, scif_epd_t
529*newepd, int flags);
530int __scif_close(scif_epd_t epd);
531int __scif_send(scif_epd_t epd, void *msg, int len, int flags);
532int __scif_recv(scif_epd_t epd, void *msg, int len, int flags);
533off_t __scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
534int prot_flags, int map_flags);
535int __scif_unregister(scif_epd_t epd, off_t offset, size_t len);
536int __scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t
537roffset, int rma_flags);
538int __scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t
539roffset, int rma_flags);
540int __scif_fence_mark(scif_epd_t epd, int flags, int *mark);
541int __scif_fence_wait(scif_epd_t epd, int mark);
542int __scif_fence_signal(scif_epd_t epd, off_t loff, uint64_t lval, off_t roff,
543uint64_t rval, int flags);
544off_t __scif_register_pinned_pages(scif_epd_t epd,
545scif_pinned_pages_t pinned_pages, off_t offset, int map_flags);
546int __scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
547struct scif_range **pages);
548int __scif_put_pages(struct scif_range *pages);
549int __scif_flush(scif_epd_t epd);
550
551void micscif_misc_handler(struct work_struct *work);
552void micscif_conn_handler(struct work_struct *work);
553
554uint16_t rsrv_scif_port(uint16_t port);
555uint16_t get_scif_port(void);
556void put_scif_port(uint16_t port);
557
558void micscif_send_exit(void);
559
560void scif_ref_rel(struct kref *kref_count);
561
562#ifdef _MODULE_SCIF_
563unsigned int micscif_poll(struct file *f, poll_table *wait);
564unsigned int scif_pollfd(struct file *f, poll_table *wait, scif_epd_t epd);
565unsigned int __scif_pollfd(struct file *f, poll_table *wait, struct endpt *ep);
566int micscif_flush(struct file *f, fl_owner_t id);
567#endif
568
569#ifdef _MIC_SCIF_
570void mic_debug_init(void);
571void micscif_get_node_info(void);
572void scif_poll_qp_state(struct work_struct *work);
573#endif
574void mic_debug_uninit(void);
575
576#define serializing_request(x) ((void)*(volatile uint8_t*)(x))
577
578// State list helper functions.
579// Each of these functions must be called with the end point lock unlocked. If
580// the end point is found on the list the end point returned will have its lock
581// set and sflags will return the value to be used to do an unlock_irqrestore
582// at the end of the calling function.
583static inline struct endpt *
584micscif_find_listen_ep(uint16_t port, unsigned long *sflags)
585{
586 struct endpt *ep = NULL;
587 struct list_head *pos, *tmpq;
588 unsigned long flags;
589
590 spin_lock_irqsave(&ms_info.mi_eplock, flags);
591 list_for_each_safe(pos, tmpq, &ms_info.mi_listen) {
592 ep = list_entry(pos, struct endpt, list);
593 if (ep->port.port == port) {
594 *sflags = flags;
595 spin_lock(&ep->lock);
596 spin_unlock(&ms_info.mi_eplock);
597 return ep;
598 }
599 }
600 spin_unlock_irqrestore(&ms_info.mi_eplock, flags);
601 return (struct endpt *)NULL;
602}
603
604// Must be called with end point locked
605static inline struct conreq *
606miscscif_get_connection_request(struct endpt *ep, uint64_t payload)
607{
608 struct conreq *conreq;
609 struct list_head *pos, *tmpq;
610
611 list_for_each_safe(pos, tmpq, &ep->conlist) {
612 conreq = list_entry(pos, struct conreq, list);
613 if (conreq->msg.payload[0] == payload) {
614 list_del(pos);
615 ep->conreqcnt--;
616 return conreq;
617 }
618 }
619 return (struct conreq *)NULL;
620}
621
622// There is no requirement for the callee to have the end point
623// locked like other API's above.
624static inline void
625micscif_remove_zombie_ep(struct endpt *ep)
626{
627 struct list_head *pos, *tmpq;
628 unsigned long sflags;
629 struct endpt *tmpep;
630
631 spin_lock_irqsave(&ms_info.mi_eplock, sflags);
632 list_for_each_safe(pos, tmpq, &ms_info.mi_zombie) {
633 tmpep = list_entry(pos, struct endpt, list);
634 if (tmpep == ep) {
635 list_del(pos);
636 ms_info.mi_nr_zombies--;
637 }
638 }
639 spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
640}
641
642static inline void
643micscif_cleanup_zombie_epd(void)
644{
645 struct list_head *pos, *tmpq;
646 unsigned long sflags;
647 struct endpt *ep;
648
649 spin_lock_irqsave(&ms_info.mi_eplock, sflags);
650 list_for_each_safe(pos, tmpq, &ms_info.mi_zombie) {
651 ep = list_entry(pos, struct endpt, list);
652 if (micscif_rma_ep_can_uninit(ep)) {
653 list_del(pos);
654 ms_info.mi_nr_zombies--;
655 va_gen_destroy(&ep->rma_info.va_gen);
656 kfree(ep);
657 }
658 }
659 spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
660}
661
662#define SCIF_WAKE_UP_SEND (1 << 1)
663#define SCIF_WAKE_UP_RECV (1 << 2)
664
665/**
666 * scif_wakeup_ep() - Wake up all clients based on the type
667 * requested i.e. threads blocked in scif_send(..) and/or scif_recv(..).
668 */
669static inline void
670scif_wakeup_ep(int type)
671{
672 struct endpt *ep;
673 unsigned long sflags;
674 struct list_head *pos, *tmpq;
675
676 spin_lock_irqsave(&ms_info.mi_connlock, sflags);
677 list_for_each_safe(pos, tmpq, &ms_info.mi_connected) {
678 ep = list_entry(pos, struct endpt, list);
679 if (type & SCIF_WAKE_UP_SEND)
680 wake_up_interruptible(&ep->sendwq);
681 if (type & SCIF_WAKE_UP_RECV)
682 wake_up_interruptible(&ep->recvwq);
683 }
684 spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
685}
686
687/*
688 * is_self_scifdev:
689 * @dev: The remote SCIF Device
690 *
691 * Returns true if the SCIF Device passed is the self aka Loopback SCIF device.
692 */
693static inline int is_self_scifdev(struct micscif_dev *dev)
694{
695 return dev->sd_node == ms_info.mi_nodeid;
696}
697
698/*
699 * is_p2p_scifdev:
700 * @dev: The remote SCIF Device
701 *
702 * Returns true if the SCIF Device is a MIC Peer to Peer SCIF device.
703 */
704static inline bool is_p2p_scifdev(struct micscif_dev *dev)
705{
706#ifdef _MIC_SCIF_
707 return dev != &scif_dev[SCIF_HOST_NODE] && !is_self_scifdev(dev);
708#else
709 return false;
710#endif
711}
712
713/*
714 * get_conn_count:
715 * @dev: The remote SCIF Device
716 *
717 * Increments the number of active SCIF connections. Callee is expected
718 * to synchronize calling this API with put_conn_count.
719 */
720static __always_inline void
721get_conn_count(struct micscif_dev *dev)
722{
723 dev->num_active_conn++;
724}
725
726/*
727 * put_conn_count:
728 * @dev: The remote SCIF Device
729 *
730 * Decrements the number of active connections. Callee is expected
731 * to synchronize calling this API with get_conn_count.
732 */
733static __always_inline void
734put_conn_count(struct micscif_dev *dev)
735{
736 dev->num_active_conn--;
737 BUG_ON(dev->num_active_conn < 0);
738}
739
740/*
741 * get_kref_count:
742 * epd: SCIF endpoint
743 *
744 * Increments kmod endpoint reference count. Callee is expected
745 * to synchronize calling this API with put_kref_count.
746 */
747static __always_inline void
748get_kref_count(scif_epd_t epd)
749{
750 kref_get(&(epd->ref_count));
751}
752
753/*
754 * put_kref_count:
755 * epd: SCIF endpoint
756 *
757 * Decrements kmod endpoint reference count. Callee is expected
758 * to synchronize calling this API with get_kref_count.
759 */
760static __always_inline void
761put_kref_count(scif_epd_t epd)
762{
763 kref_put(&(epd->ref_count), scif_ref_rel);
764}
765
766/*
767 * is_scifdev_alive:
768 * @dev: The remote SCIF Device
769 *
770 * Returns true if the remote SCIF Device is running or sleeping for
771 * this endpoint.
772 */
773static inline int scifdev_alive(struct endpt *ep)
774{
775 return (((SCIFDEV_RUNNING == ep->remote_dev->sd_state) ||
776 (SCIFDEV_SLEEPING == ep->remote_dev->sd_state)) &&
777 SCIFDEV_RUNNING == ep->sd_state);
778}
779
780/*
781 * verify_epd:
782 * ep: SCIF endpoint
783 *
784 * Checks several generic error conditions and returns the
785 * appropiate error.
786 */
787static inline int verify_epd(struct endpt *ep)
788{
789 if (ep->state == SCIFEP_DISCONNECTED)
790 return -ECONNRESET;
791
792 if (ep->state != SCIFEP_CONNECTED)
793 return -ENOTCONN;
794
795 if (!scifdev_alive(ep))
796 return -ENODEV;
797
798 return 0;
799}
800
801/**
802 * scif_invalidate_ep() - Set remote SCIF device state for all connected
803 * and disconnected endpoints for a particular node to SCIFDEV_STOPPED,
804 * change endpoint state to disconnected and wake up all send/recv/con
805 * waitqueues.
806 */
807static inline void
808scif_invalidate_ep(int node)
809{
810 struct endpt *ep;
811 unsigned long sflags;
812 struct list_head *pos, *tmpq;
813
814 spin_lock_irqsave(&ms_info.mi_connlock, sflags);
815 list_for_each_safe(pos, tmpq, &ms_info.mi_disconnected) {
816 ep = list_entry(pos, struct endpt, list);
817 if (ep->remote_dev->sd_node == node) {
818 spin_lock(&ep->lock);
819 ep->sd_state = SCIFDEV_STOPPED;
820 spin_unlock(&ep->lock);
821 }
822 }
823 list_for_each_safe(pos, tmpq, &ms_info.mi_connected) {
824 ep = list_entry(pos, struct endpt, list);
825 if (ep->remote_dev->sd_node == node) {
826 list_del(pos);
827 put_conn_count(ep->remote_dev);
828 spin_lock(&ep->lock);
829 ep->state = SCIFEP_DISCONNECTED;
830 list_add_tail(&ep->list, &ms_info.mi_disconnected);
831 ep->sd_state = SCIFDEV_STOPPED;
832 wake_up_interruptible(&ep->sendwq);
833 wake_up_interruptible(&ep->recvwq);
834 wake_up_interruptible(&ep->conwq);
835 spin_unlock(&ep->lock);
836 }
837 }
838 spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
839 flush_workqueue(ms_info.mi_conn_wq);
840}
841
842/*
843 * Only Debug Functions Below
844 */
845#define SCIF_CRUMB pr_debug("%s %d\n", __func__, __LINE__)
846
847static inline void
848micscif_display_all_zombie_ep(void)
849{
850 struct list_head *pos, *tmpq;
851 unsigned long sflags;
852 struct endpt *ep;
853
854 pr_debug("Zombie Info Start\n");
855 spin_lock_irqsave(&ms_info.mi_eplock, sflags);
856 list_for_each_safe(pos, tmpq, &ms_info.mi_zombie) {
857 ep = list_entry(pos, struct endpt, list);
858 if (!list_empty(&ep->rma_info.reg_list))
859 micscif_display_all_windows(&ep->rma_info.reg_list);
860 if (!list_empty(&ep->rma_info.remote_reg_list))
861 micscif_display_all_windows(
862 &ep->rma_info.remote_reg_list);
863 }
864 spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
865 pr_debug("Zombie Info End\n");
866}
867
868static inline void dump_ep(scif_epd_t epd, const char *func, int line)
869{
870 struct endpt *ep = (struct endpt *)epd;
871 pr_debug("%s %d state %d lock %p port.node 0x%x"
872 "port.port 0x%x peer.node 0x%x peer.port 0x%x backlog %d qp %p"
873 "qp_offset 0x%llx cnct_gnt_payload 0x%llx remote_dev %p\n",
874 func, line, ep->state, &ep->lock, ep->port.node,
875 ep->port.port, ep->peer.node, ep->peer.port, ep->backlog,
876 ep->qp_info.qp, ep->qp_info.qp_offset,
877 ep->qp_info.cnct_gnt_payload, ep->remote_dev);
878}
879
880static inline void dump_qp(volatile struct micscif_qp *qp, const char *func, int line)
881{
882 pr_debug("%s %d qp %p local_buf 0x%llx"
883 " local_qp 0x%llx remote_buf 0x%llx remote_qp %p ep 0x%llx\n",
884 func, line, qp, qp->local_buf,
885 qp->local_qp, qp->remote_buf, qp->remote_qp, qp->ep);
886}
887
888static inline void dump_rb(struct micscif_rb *rb, const char *func, int line)
889{
890 pr_debug("%s %d rb %p rb_base %p *read_ptr 0x%x"
891 " *write_ptr 0x%x size 0x%x"
892 " cro 0x%x cwo 0x%x ocro 0x%x ocwo 0x%x\n",
893 func, line, rb, rb->rb_base, *rb->read_ptr,
894 *rb->write_ptr, rb->size, rb->current_read_offset,
895 rb->current_write_offset,
896 rb->old_current_read_offset,
897 rb->old_current_write_offset);
898}
899
900#endif /* MICSCIF_H */