Commit | Line | Data |
---|---|---|
800f879a AT |
1 | /* |
2 | * Copyright 2010-2017 Intel Corporation. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License, version 2, | |
6 | * as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, | |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * Disclaimer: The codes contained in these modules may be specific to | |
14 | * the Intel Software Development Platform codenamed Knights Ferry, | |
15 | * and the Intel product codenamed Knights Corner, and are not backward | |
16 | * compatible with other Intel products. Additionally, Intel will NOT | |
17 | * support the codes or instruction set in future products. | |
18 | * | |
19 | * Intel offers no warranty of any kind regarding the code. This code is | |
20 | * licensed on an "AS IS" basis and Intel is not obligated to provide | |
21 | * any support, assistance, installation, training, or other services | |
22 | * of any kind. Intel is also not obligated to provide any updates, | |
23 | * enhancements or extensions. Intel specifically disclaims any warranty | |
24 | * of merchantability, non-infringement, fitness for any particular | |
25 | * purpose, and any other warranty. | |
26 | * | |
27 | * Further, Intel disclaims all liability of any kind, including but | |
28 | * not limited to liability for infringement of any proprietary rights, | |
29 | * relating to the use of the code, even if Intel is notified of the | |
30 | * possibility of such liability. Except as expressly stated in an Intel | |
31 | * license agreement provided with this code and agreed upon with Intel, | |
32 | * no license, express or implied, by estoppel or otherwise, to any | |
33 | * intellectual property rights is granted herein. | |
34 | */ | |
35 | ||
36 | #ifndef MICSCIF_H | |
37 | #define MICSCIF_H | |
38 | ||
39 | #include <linux/errno.h> | |
40 | #include <linux/hardirq.h> | |
41 | #include <linux/types.h> | |
42 | #include <linux/capability.h> | |
43 | #include <linux/slab.h> | |
44 | #include <linux/string.h> | |
45 | #include <linux/gfp.h> | |
46 | #include <linux/vmalloc.h> | |
47 | #include <asm/io.h> | |
48 | #include <linux/kernel.h> | |
49 | #include <linux/mm_types.h> | |
50 | #include <linux/jiffies.h> | |
51 | #include <linux/timer.h> | |
52 | #include <linux/irqflags.h> | |
53 | #include <linux/time.h> | |
54 | #include <linux/spinlock.h> | |
55 | #include <linux/mutex.h> | |
56 | #include <linux/semaphore.h> | |
57 | #include <linux/kthread.h> | |
58 | #include <linux/sched.h> | |
59 | #include <linux/delay.h> | |
60 | #include <linux/wait.h> | |
61 | #include <asm/bug.h> | |
62 | #include <linux/pci.h> | |
63 | #include <linux/device.h> | |
64 | #include <linux/fs.h> | |
65 | #include <linux/list.h> | |
66 | #include <linux/workqueue.h> | |
67 | #include <linux/interrupt.h> | |
68 | #include <asm/atomic.h> | |
69 | #include <linux/netdevice.h> | |
70 | #include <linux/debugfs.h> | |
71 | ||
72 | #ifdef _MODULE_SCIF_ | |
73 | #include <linux/mman.h> | |
74 | #include <linux/pagemap.h> | |
75 | #include <asm/uaccess.h> | |
76 | #include <linux/poll.h> | |
77 | #include <linux/mmzone.h> | |
78 | #include <linux/version.h> | |
79 | #endif /* MODULE_SCIF */ | |
80 | ||
81 | #include <linux/notifier.h> | |
82 | #include "scif.h" | |
83 | #include "mic/micbaseaddressdefine.h" | |
84 | #include "mic/micsboxdefine.h" | |
85 | ||
86 | /* The test runs in a separate thread context from the bottom | |
87 | * half that processes messages from the card and setup p2p | |
88 | * when these run concurrently, p2p messages get lost since they | |
89 | * may be consumed by the test thread | |
90 | */ | |
91 | //#define ENABLE_TEST // Used to enable testing at board connect | |
92 | #ifdef MIC_IS_EMULATION | |
93 | #define TEST_LOOP 2 | |
94 | #else | |
95 | #define TEST_LOOP 2000 | |
96 | #endif | |
97 | ||
98 | //#define P2P_HACK 0 | |
99 | #include "scif.h" | |
100 | #include "scif_ioctl.h" | |
101 | ||
102 | #define SCIF_READY_MAGIC_NUM 0x1eedfee0 | |
103 | ||
104 | #ifndef SCIF_MAJOR | |
105 | #define SCIF_MAJOR 0 /* Use dynamic major number by default */ | |
106 | #endif | |
107 | ||
108 | #define SCIF_HOST_NODE 0 // By default the host is always node zero | |
109 | ||
110 | #define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000 | |
111 | /* | |
112 | * The overhead for proxying a P2P DMA read to convert it to | |
113 | * a DMA write by sending a SCIF Node QP message has been | |
114 | * seen to be higher than programming a P2P DMA Read on self | |
115 | * for transfer sizes less than the PROXY_DMA_THRESHOLD. | |
116 | * The minimum threshold is different for Jaketown versus | |
117 | * Ivytown and tuned for best DMA performance. | |
118 | */ | |
119 | #define SCIF_PROXY_DMA_THRESHOLD_JKT (32 * 1024ULL) | |
120 | #define SCIF_PROXY_DMA_THRESHOLD_IVT (1024 * 1024ULL) | |
121 | ||
122 | //#define RMA_DEBUG 0 | |
123 | ||
124 | /* Pre-defined L1_CACHE_SHIFT is 6 on RH and 7 on Suse */ | |
125 | #undef L1_CACHE_SHIFT | |
126 | #define L1_CACHE_SHIFT 6 | |
127 | #undef L1_CACHE_BYTES | |
128 | #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) | |
129 | ||
130 | #define MI_EPLOCK_HELD (true) | |
131 | #define MAX_RDMASR 8 | |
132 | ||
133 | // Device wide SCIF information | |
134 | struct micscif_info { | |
135 | uint32_t mi_nodeid; // Node ID this node is to others. | |
136 | ||
137 | struct mutex mi_conflock; // Configuration lock (used in p2p setup) | |
138 | uint32_t mi_maxid; // Max known board ID | |
139 | uint32_t mi_total; // Total number of running interfaces | |
140 | uint32_t mi_nr_zombies; // Keep track of the number of zombie EP. | |
141 | unsigned long mi_mask; // bit mask of online scif interfaces | |
142 | uint64_t mi_nr_ioremap; // Keep track of number of ioremap() calls on the host | |
143 | // to decide when to purge aliases for performance. | |
144 | spinlock_t mi_eplock; | |
145 | spinlock_t mi_connlock; | |
146 | spinlock_t mi_rmalock; // Synchronize access to list of temporary registered | |
147 | // windows to be destroyed. | |
148 | struct mutex mi_fencelock; // Synchronize access to list of remote fences requested. | |
149 | struct mutex mi_event_cblock; | |
150 | spinlock_t mi_nb_connect_lock; | |
151 | ||
152 | struct list_head mi_uaccept; // List of user acceptreq waiting for acceptreg | |
153 | struct list_head mi_listen; // List of listening end points | |
154 | struct list_head mi_zombie; // List of zombie end points with pending RMA's. | |
155 | struct list_head mi_connected; // List of end points in connected state | |
156 | struct list_head mi_disconnected; // List of end points in disconnected state | |
157 | struct list_head mi_rma; // List of temporary registered windows to be destroyed. | |
158 | struct list_head mi_rma_tc; // List of temporary | |
159 | // registered & cached windows | |
160 | // to be destroyed. | |
161 | struct list_head mi_fence; // List of remote fence requests. | |
162 | struct list_head mi_event_cb; /* List of event handlers registered */ | |
163 | struct list_head mi_nb_connect_list; | |
164 | #ifdef CONFIG_MMU_NOTIFIER | |
165 | struct list_head mi_mmu_notif_cleanup; | |
166 | #endif | |
167 | struct notifier_block mi_panic_nb; | |
168 | #ifndef _MIC_SCIF_ | |
169 | /* The host needs to keep track of node dependencies in form of graph. | |
170 | * This will need to be dynamically grown to support hotplug. | |
171 | */ | |
172 | uint32_t **mi_depmtrx; | |
173 | /* | |
174 | * Wait queue used for blocking while waiting for nodes | |
175 | * to respond for disconnect message sent from host. | |
176 | */ | |
177 | wait_queue_head_t mi_disconn_wq; | |
178 | /* stus of node remove operation*/ | |
179 | uint64_t mi_disconnect_status; | |
180 | atomic_long_t mi_unique_msgid; | |
181 | #endif | |
182 | /* | |
183 | * Watchdog timeout on the host. Timer expiry will result in the host | |
184 | * treating the remote node as a lost node. Default value is | |
185 | * DEFAULT_WATCHDOG_TO and can be modified to a value greater than 1 | |
186 | * second via SCIF sysfs watchdog_to entry. | |
187 | */ | |
188 | int mi_watchdog_to; // Watchdog timeout | |
189 | int mi_watchdog_enabled; // Watchdog timeout enabled | |
190 | int mi_watchdog_auto_reboot; // Watchdog auto reboot enabled | |
191 | struct workqueue_struct *mi_misc_wq; // Workqueue for miscellaneous SCIF tasks. | |
192 | struct work_struct mi_misc_work; | |
193 | #ifdef CONFIG_MMU_NOTIFIER | |
194 | struct workqueue_struct *mi_mmu_notif_wq; // Workqueue for MMU notifier cleanup tasks. | |
195 | struct work_struct mi_mmu_notif_work; | |
196 | #endif | |
197 | int nr_gtt_entries; // GTT Debug Counter to detect leaks | |
198 | uint64_t nr_2mb_pages; // Debug Counter for number of 2mb pages. | |
199 | uint64_t nr_4k_pages; // Debug Counter for number of 4K pages | |
200 | uint8_t en_msg_log; | |
201 | wait_queue_head_t mi_exitwq; | |
202 | unsigned long mi_rma_tc_limit; | |
203 | uint64_t mi_proxy_dma_threshold; | |
204 | #ifdef RMA_DEBUG | |
205 | atomic_long_t rma_mm_cnt; | |
206 | atomic_long_t rma_unaligned_cpu_cnt; | |
207 | atomic_long_t rma_alloc_cnt; | |
208 | atomic_long_t rma_pin_cnt; | |
209 | #ifdef CONFIG_MMU_NOTIFIER | |
210 | atomic_long_t mmu_notif_cnt; | |
211 | #endif | |
212 | #endif | |
213 | #ifdef _MIC_SCIF_ | |
214 | int mi_intr_rcnt[MAX_RDMASR]; // Ref count to track SCIF Interrupt Handlers | |
215 | #endif | |
216 | struct workqueue_struct *mi_conn_wq; | |
217 | struct work_struct mi_conn_work; | |
218 | }; | |
219 | ||
220 | extern struct micscif_info ms_info; | |
221 | ||
222 | #define SCIF_NODE_MAGIC_BIT 63 | |
223 | /* Magic value used to indicate a remote idle node without grabbing any locks */ | |
224 | #define SCIF_NODE_IDLE (1ULL << SCIF_NODE_MAGIC_BIT) | |
225 | ||
226 | enum scif_state { | |
227 | SCIFDEV_NOTPRESENT, | |
228 | SCIFDEV_INIT, | |
229 | SCIFDEV_RUNNING, | |
230 | SCIFDEV_SLEEPING, | |
231 | SCIFDEV_STOPPING, | |
232 | SCIFDEV_STOPPED | |
233 | }; | |
234 | ||
235 | extern bool mic_p2p_enable; | |
236 | extern bool mic_p2p_proxy_enable; | |
237 | extern bool mic_reg_cache_enable; | |
238 | extern bool mic_ulimit_check; | |
239 | /* p2p mapping from node id to peer id */ | |
240 | struct scif_p2p_info { | |
241 | int ppi_peer_id; | |
242 | struct scatterlist *ppi_sg[2]; | |
243 | uint64_t sg_nentries[2]; // no of entries in scatterlists | |
244 | dma_addr_t ppi_pa[2]; // one for mmio; one for aper | |
245 | dma_addr_t ppi_mic_addr[2]; // one for mmio; one for aper | |
246 | uint64_t ppi_len[2]; | |
247 | #define PPI_MMIO 0 | |
248 | #define PPI_APER 1 | |
249 | enum scif_state ppi_disc_state; //Disconnection state of this peer node. | |
250 | struct list_head ppi_list; | |
251 | }; | |
252 | ||
253 | /* one per remote node */ | |
254 | struct micscif_dev { | |
255 | uint16_t sd_node; | |
256 | enum scif_state sd_state; | |
257 | volatile void *mm_sbox; | |
258 | uint64_t sd_base_addr; /* Remote node's base bus addr | |
259 | * for the local node's aperture | |
260 | */ | |
261 | #ifndef _MIC_SCIF_ | |
262 | struct list_head sd_p2p; /* List of bus addresses for | |
263 | * other nodes, these are allocated | |
264 | * by the host driver and are | |
265 | * valid only on the host node | |
266 | */ | |
267 | struct delayed_work sd_watchdog_work; | |
268 | wait_queue_head_t sd_watchdog_wq; | |
269 | struct workqueue_struct *sd_ln_wq; | |
270 | char sd_ln_wqname[16]; | |
271 | #endif | |
272 | ||
273 | int n_qpairs; /* FIXME: | |
274 | * This is always set to 1, | |
275 | */ | |
276 | ||
277 | struct micscif_qp *qpairs; /* Same FIXME as above | |
278 | * There is single qp established | |
279 | * with this remote node | |
280 | */ | |
281 | ||
282 | struct workqueue_struct *sd_intr_wq; /* sd_intr_wq & sd_intr_bh | |
283 | * together constitute the workqueue | |
284 | * infrastructure needed to | |
285 | * run the bottom half handler | |
286 | * for messages received from | |
287 | * this node | |
288 | */ | |
289 | char sd_intr_wqname[16]; | |
290 | struct work_struct sd_intr_bh; | |
291 | unsigned int sd_intr_handle; | |
292 | uint32_t sd_rdmasr; | |
293 | struct workqueue_struct *sd_loopb_wq; | |
294 | char sd_loopb_wqname[16]; | |
295 | struct work_struct sd_loopb_work; | |
296 | struct list_head sd_loopb_recv_q; | |
297 | /* Lock to synchronize remote node state transitions */ | |
298 | struct mutex sd_lock; | |
299 | /* | |
300 | * Global Ref count per SCIF device tracking all SCIF API's which | |
301 | * might communicate across PCIe. | |
302 | */ | |
303 | atomic_long_t scif_ref_cnt; | |
304 | /* | |
305 | * Global Ref count per SCIF device tracking scif_mmap()/ | |
306 | * scif_get_pages(). sd_lock protects scif_map_ref_cnt | |
307 | * hence it does not need to be an atomic operation. Note that | |
308 | * scif_mmap()/scif_get_pages() is not in the critical | |
309 | * perf path. | |
310 | */ | |
311 | int scif_map_ref_cnt; | |
312 | /* | |
313 | * Wait queue used for blocking while waiting for nodes | |
314 | * to wake up or to be removed. | |
315 | */ | |
316 | wait_queue_head_t sd_wq; | |
317 | uint64_t sd_wait_status; | |
318 | #ifdef _MIC_SCIF_ | |
319 | wait_queue_head_t sd_p2p_wq; | |
320 | bool sd_proxy_dma_reads; | |
321 | struct delayed_work sd_p2p_dwork; | |
322 | int sd_p2p_retry; | |
323 | #endif | |
324 | /* | |
325 | * The NUMA node the peer is attached to on the host. | |
326 | */ | |
327 | int sd_numa_node; | |
328 | /* | |
329 | * Waitqueue for blocking while waiting for remote memory | |
330 | * mappings to drop to zero. | |
331 | */ | |
332 | wait_queue_head_t sd_mmap_wq; | |
333 | ||
334 | /* When a nodeqp message is received, this is set. | |
335 | * And it is reset by the watchdog time */ | |
336 | atomic_t sd_node_alive; | |
337 | int num_active_conn; | |
338 | #ifdef ENABLE_TEST | |
339 | struct workqueue_struct *producer; | |
340 | struct workqueue_struct *consumer; | |
341 | char producer_name[16]; | |
342 | char consumer_name[16]; | |
343 | struct work_struct producer_work; | |
344 | struct work_struct consumer_work; | |
345 | int count; | |
346 | int test_done; | |
347 | #endif // ENABLE_TEST | |
348 | }; | |
349 | ||
350 | extern struct micscif_dev scif_dev[]; | |
351 | ||
352 | #include "mic/micscif_nodeqp.h" | |
353 | #include "mic/micscif_nm.h" | |
354 | #include "mic/micscif_smpt.h" | |
355 | #include "mic/micscif_va_gen.h" | |
356 | #include "mic/mic_dma_api.h" | |
357 | #include "mic/mic_dma_lib.h" | |
358 | #include "mic/micscif_rma.h" | |
359 | #include "mic/micscif_rma_list.h" | |
360 | ||
361 | /* | |
362 | * data structure used to sync SCIF_GET_NODE_INFO messaging | |
363 | */ | |
364 | struct get_node_info { | |
365 | enum micscif_msg_state state; | |
366 | wait_queue_head_t wq; | |
367 | }; | |
368 | ||
369 | static inline uint64_t align_low(uint64_t data, uint32_t granularity) | |
370 | { | |
371 | return ALIGN(data - (granularity - 1), granularity); | |
372 | } | |
373 | ||
374 | #define SCIF_MIN(a, b) (((a) < (b)) ? (a) : (b)) | |
375 | #define SCIF_MAX(a, b) (((a) > (b)) ? (a) : (b)) | |
376 | ||
377 | enum endptstate { | |
378 | SCIFEP_CLOSED, // Internal state | |
379 | SCIFEP_UNBOUND, // External state | |
380 | SCIFEP_BOUND, // External state | |
381 | SCIFEP_LISTENING, // External state | |
382 | SCIFEP_CONNECTED, // External state | |
383 | SCIFEP_CONNECTING, // Internal state | |
384 | SCIFEP_MAPPING, // Internal state | |
385 | SCIFEP_CLOSING, // Internal state | |
386 | SCIFEP_CLLISTEN, // Internal state | |
387 | SCIFEP_DISCONNECTED, // Internal state | |
388 | SCIFEP_ZOMBIE // Internal state | |
389 | }; | |
390 | ||
391 | extern char *scif_ep_states[]; | |
392 | ||
393 | // Used for coordinating connection accept sequence. This is the data structure | |
394 | // for the conlist in the endpoint. | |
395 | struct conreq { | |
396 | struct nodemsg msg; | |
397 | struct list_head list; | |
398 | }; | |
399 | ||
400 | /* Size of the RB for the Node QP */ | |
401 | #define NODE_QP_SIZE 0x10000 | |
402 | /* Size of the RB for the Endpoint QP */ | |
403 | #define ENDPT_QP_SIZE 0x1000 | |
404 | ||
405 | struct endpt_qp_info { | |
406 | /* Qpair for this endpoint */ | |
407 | struct micscif_qp *qp; | |
408 | /* | |
409 | * Physical addr of the QP for Host or | |
410 | * GTT offset of the QP for MIC. | |
411 | * Required for unmapping the QP during close. | |
412 | */ | |
413 | dma_addr_t qp_offset; | |
414 | /* | |
415 | * Payload in a SCIF_CNCT_GNT message containing the | |
416 | * physical address of the remote_qp. | |
417 | */ | |
418 | dma_addr_t cnct_gnt_payload; | |
419 | }; | |
420 | ||
421 | #define SCIFEP_MAGIC 0x5c1f000000005c1f | |
422 | ||
423 | struct endpt { | |
424 | volatile enum endptstate state; | |
425 | spinlock_t lock; | |
426 | ||
427 | struct scif_portID port; | |
428 | struct scif_portID peer; | |
429 | ||
430 | int backlog; | |
431 | ||
432 | struct endpt_qp_info qp_info; | |
433 | struct endpt_rma_info rma_info; | |
434 | /* | |
435 | * scifdev used by this endpt to communicate with remote node. | |
436 | */ | |
437 | struct micscif_dev *remote_dev; | |
438 | uint64_t remote_ep; | |
439 | /* | |
440 | * Keep track of number of connection requests. | |
441 | */ | |
442 | int conreqcnt; | |
443 | /* | |
444 | * Cache remote SCIF device state. | |
445 | */ | |
446 | enum scif_state sd_state; | |
447 | /* | |
448 | * True if the endpoint was created | |
449 | * via scif_accept(..). | |
450 | */ | |
451 | bool accepted_ep; | |
452 | /* | |
453 | * Open file information used to match the id passed | |
454 | * in with the flush routine. | |
455 | */ | |
456 | struct files_struct *files; | |
457 | /* | |
458 | * Reference count for functions using this endpoint. | |
459 | */ | |
460 | struct kref ref_count; | |
461 | struct list_head conlist; | |
462 | wait_queue_head_t conwq; | |
463 | wait_queue_head_t disconwq; | |
464 | wait_queue_head_t diswq; | |
465 | wait_queue_head_t sendwq; | |
466 | wait_queue_head_t recvwq; | |
467 | struct mutex sendlock; | |
468 | struct mutex recvlock; | |
469 | struct list_head list; | |
470 | ||
471 | #ifdef CONFIG_MMU_NOTIFIER | |
472 | struct list_head mmu_list; | |
473 | #endif | |
474 | ||
475 | struct list_head li_accept; /* pending ACCEPTREG */ | |
476 | int acceptcnt; /* pending ACCEPTREG cnt */ | |
477 | struct list_head liacceptlist; /* link to listen accept */ | |
478 | struct list_head miacceptlist; /* link to mi_uaccept */ | |
479 | struct endpt *listenep; /* associated listen ep */ | |
480 | ||
481 | /* Non-blocking connect */ | |
482 | struct work_struct conn_work; | |
483 | struct scif_portID conn_port; | |
484 | int conn_err; | |
485 | int conn_async_state; | |
486 | wait_queue_head_t conn_pend_wq; | |
487 | struct list_head conn_list; | |
488 | }; | |
489 | ||
490 | static __always_inline void | |
491 | micscif_queue_for_cleanup(struct reg_range_t *window, struct list_head *list) | |
492 | { | |
493 | struct endpt *ep = (struct endpt *)window->ep; | |
494 | INIT_LIST_HEAD(&window->list_member); | |
495 | window->dma_mark = get_dma_mark(ep->rma_info.dma_chan); | |
496 | spin_lock(&ms_info.mi_rmalock); | |
497 | list_add_tail(&window->list_member, list); | |
498 | spin_unlock(&ms_info.mi_rmalock); | |
499 | queue_work(ms_info.mi_misc_wq, &ms_info.mi_misc_work); | |
500 | } | |
501 | ||
502 | static __always_inline void | |
503 | __micscif_rma_destroy_tcw_helper(struct reg_range_t *window) | |
504 | { | |
505 | list_del(&window->list_member); | |
506 | micscif_queue_for_cleanup(window, &ms_info.mi_rma_tc); | |
507 | } | |
508 | ||
509 | void print_ep_state(struct endpt *ep, char *label); | |
510 | ||
511 | // Function prototypes needed by Unix/Linux drivers linking to scif | |
512 | int scif_fdopen(struct file *f); | |
513 | int scif_fdclose(struct file *f); | |
514 | int scif_process_ioctl(struct file *f, unsigned int cmd, uint64_t arg); | |
515 | int micscif_mmap(struct file *file, struct vm_area_struct *vma); | |
516 | int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd); | |
517 | void scif_munmap(struct vm_area_struct *vma); | |
518 | void scif_proc_init(void); | |
519 | void scif_proc_cleanup(void); | |
520 | int scif_user_send(scif_epd_t epd, void *msg, int len, int flags); | |
521 | int scif_user_recv(scif_epd_t epd, void *msg, int len, int flags); | |
522 | int __scif_pin_pages(void *addr, size_t len, int *out_prot, | |
523 | int map_flags, scif_pinned_pages_t *pages); | |
524 | scif_epd_t __scif_open(void); | |
525 | int __scif_bind(scif_epd_t epd, uint16_t pn); | |
526 | int __scif_listen(scif_epd_t epd, int backlog); | |
527 | int __scif_connect(scif_epd_t epd, struct scif_portID *dst, bool non_block); | |
528 | int __scif_accept(scif_epd_t epd, struct scif_portID *peer, scif_epd_t | |
529 | *newepd, int flags); | |
530 | int __scif_close(scif_epd_t epd); | |
531 | int __scif_send(scif_epd_t epd, void *msg, int len, int flags); | |
532 | int __scif_recv(scif_epd_t epd, void *msg, int len, int flags); | |
533 | off_t __scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, | |
534 | int prot_flags, int map_flags); | |
535 | int __scif_unregister(scif_epd_t epd, off_t offset, size_t len); | |
536 | int __scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t | |
537 | roffset, int rma_flags); | |
538 | int __scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t | |
539 | roffset, int rma_flags); | |
540 | int __scif_fence_mark(scif_epd_t epd, int flags, int *mark); | |
541 | int __scif_fence_wait(scif_epd_t epd, int mark); | |
542 | int __scif_fence_signal(scif_epd_t epd, off_t loff, uint64_t lval, off_t roff, | |
543 | uint64_t rval, int flags); | |
544 | off_t __scif_register_pinned_pages(scif_epd_t epd, | |
545 | scif_pinned_pages_t pinned_pages, off_t offset, int map_flags); | |
546 | int __scif_get_pages(scif_epd_t epd, off_t offset, size_t len, | |
547 | struct scif_range **pages); | |
548 | int __scif_put_pages(struct scif_range *pages); | |
549 | int __scif_flush(scif_epd_t epd); | |
550 | ||
551 | void micscif_misc_handler(struct work_struct *work); | |
552 | void micscif_conn_handler(struct work_struct *work); | |
553 | ||
554 | uint16_t rsrv_scif_port(uint16_t port); | |
555 | uint16_t get_scif_port(void); | |
556 | void put_scif_port(uint16_t port); | |
557 | ||
558 | void micscif_send_exit(void); | |
559 | ||
560 | void scif_ref_rel(struct kref *kref_count); | |
561 | ||
562 | #ifdef _MODULE_SCIF_ | |
563 | unsigned int micscif_poll(struct file *f, poll_table *wait); | |
564 | unsigned int scif_pollfd(struct file *f, poll_table *wait, scif_epd_t epd); | |
565 | unsigned int __scif_pollfd(struct file *f, poll_table *wait, struct endpt *ep); | |
566 | int micscif_flush(struct file *f, fl_owner_t id); | |
567 | #endif | |
568 | ||
569 | #ifdef _MIC_SCIF_ | |
570 | void mic_debug_init(void); | |
571 | void micscif_get_node_info(void); | |
572 | void scif_poll_qp_state(struct work_struct *work); | |
573 | #endif | |
574 | void mic_debug_uninit(void); | |
575 | ||
576 | #define serializing_request(x) ((void)*(volatile uint8_t*)(x)) | |
577 | ||
578 | // State list helper functions. | |
579 | // Each of these functions must be called with the end point lock unlocked. If | |
580 | // the end point is found on the list the end point returned will have its lock | |
581 | // set and sflags will return the value to be used to do an unlock_irqrestore | |
582 | // at the end of the calling function. | |
583 | static inline struct endpt * | |
584 | micscif_find_listen_ep(uint16_t port, unsigned long *sflags) | |
585 | { | |
586 | struct endpt *ep = NULL; | |
587 | struct list_head *pos, *tmpq; | |
588 | unsigned long flags; | |
589 | ||
590 | spin_lock_irqsave(&ms_info.mi_eplock, flags); | |
591 | list_for_each_safe(pos, tmpq, &ms_info.mi_listen) { | |
592 | ep = list_entry(pos, struct endpt, list); | |
593 | if (ep->port.port == port) { | |
594 | *sflags = flags; | |
595 | spin_lock(&ep->lock); | |
596 | spin_unlock(&ms_info.mi_eplock); | |
597 | return ep; | |
598 | } | |
599 | } | |
600 | spin_unlock_irqrestore(&ms_info.mi_eplock, flags); | |
601 | return (struct endpt *)NULL; | |
602 | } | |
603 | ||
604 | // Must be called with end point locked | |
605 | static inline struct conreq * | |
606 | miscscif_get_connection_request(struct endpt *ep, uint64_t payload) | |
607 | { | |
608 | struct conreq *conreq; | |
609 | struct list_head *pos, *tmpq; | |
610 | ||
611 | list_for_each_safe(pos, tmpq, &ep->conlist) { | |
612 | conreq = list_entry(pos, struct conreq, list); | |
613 | if (conreq->msg.payload[0] == payload) { | |
614 | list_del(pos); | |
615 | ep->conreqcnt--; | |
616 | return conreq; | |
617 | } | |
618 | } | |
619 | return (struct conreq *)NULL; | |
620 | } | |
621 | ||
622 | // There is no requirement for the callee to have the end point | |
623 | // locked like other API's above. | |
624 | static inline void | |
625 | micscif_remove_zombie_ep(struct endpt *ep) | |
626 | { | |
627 | struct list_head *pos, *tmpq; | |
628 | unsigned long sflags; | |
629 | struct endpt *tmpep; | |
630 | ||
631 | spin_lock_irqsave(&ms_info.mi_eplock, sflags); | |
632 | list_for_each_safe(pos, tmpq, &ms_info.mi_zombie) { | |
633 | tmpep = list_entry(pos, struct endpt, list); | |
634 | if (tmpep == ep) { | |
635 | list_del(pos); | |
636 | ms_info.mi_nr_zombies--; | |
637 | } | |
638 | } | |
639 | spin_unlock_irqrestore(&ms_info.mi_eplock, sflags); | |
640 | } | |
641 | ||
642 | static inline void | |
643 | micscif_cleanup_zombie_epd(void) | |
644 | { | |
645 | struct list_head *pos, *tmpq; | |
646 | unsigned long sflags; | |
647 | struct endpt *ep; | |
648 | ||
649 | spin_lock_irqsave(&ms_info.mi_eplock, sflags); | |
650 | list_for_each_safe(pos, tmpq, &ms_info.mi_zombie) { | |
651 | ep = list_entry(pos, struct endpt, list); | |
652 | if (micscif_rma_ep_can_uninit(ep)) { | |
653 | list_del(pos); | |
654 | ms_info.mi_nr_zombies--; | |
655 | va_gen_destroy(&ep->rma_info.va_gen); | |
656 | kfree(ep); | |
657 | } | |
658 | } | |
659 | spin_unlock_irqrestore(&ms_info.mi_eplock, sflags); | |
660 | } | |
661 | ||
662 | #define SCIF_WAKE_UP_SEND (1 << 1) | |
663 | #define SCIF_WAKE_UP_RECV (1 << 2) | |
664 | ||
665 | /** | |
666 | * scif_wakeup_ep() - Wake up all clients based on the type | |
667 | * requested i.e. threads blocked in scif_send(..) and/or scif_recv(..). | |
668 | */ | |
669 | static inline void | |
670 | scif_wakeup_ep(int type) | |
671 | { | |
672 | struct endpt *ep; | |
673 | unsigned long sflags; | |
674 | struct list_head *pos, *tmpq; | |
675 | ||
676 | spin_lock_irqsave(&ms_info.mi_connlock, sflags); | |
677 | list_for_each_safe(pos, tmpq, &ms_info.mi_connected) { | |
678 | ep = list_entry(pos, struct endpt, list); | |
679 | if (type & SCIF_WAKE_UP_SEND) | |
680 | wake_up_interruptible(&ep->sendwq); | |
681 | if (type & SCIF_WAKE_UP_RECV) | |
682 | wake_up_interruptible(&ep->recvwq); | |
683 | } | |
684 | spin_unlock_irqrestore(&ms_info.mi_connlock, sflags); | |
685 | } | |
686 | ||
687 | /* | |
688 | * is_self_scifdev: | |
689 | * @dev: The remote SCIF Device | |
690 | * | |
691 | * Returns true if the SCIF Device passed is the self aka Loopback SCIF device. | |
692 | */ | |
693 | static inline int is_self_scifdev(struct micscif_dev *dev) | |
694 | { | |
695 | return dev->sd_node == ms_info.mi_nodeid; | |
696 | } | |
697 | ||
698 | /* | |
699 | * is_p2p_scifdev: | |
700 | * @dev: The remote SCIF Device | |
701 | * | |
702 | * Returns true if the SCIF Device is a MIC Peer to Peer SCIF device. | |
703 | */ | |
704 | static inline bool is_p2p_scifdev(struct micscif_dev *dev) | |
705 | { | |
706 | #ifdef _MIC_SCIF_ | |
707 | return dev != &scif_dev[SCIF_HOST_NODE] && !is_self_scifdev(dev); | |
708 | #else | |
709 | return false; | |
710 | #endif | |
711 | } | |
712 | ||
713 | /* | |
714 | * get_conn_count: | |
715 | * @dev: The remote SCIF Device | |
716 | * | |
717 | * Increments the number of active SCIF connections. Callee is expected | |
718 | * to synchronize calling this API with put_conn_count. | |
719 | */ | |
720 | static __always_inline void | |
721 | get_conn_count(struct micscif_dev *dev) | |
722 | { | |
723 | dev->num_active_conn++; | |
724 | } | |
725 | ||
726 | /* | |
727 | * put_conn_count: | |
728 | * @dev: The remote SCIF Device | |
729 | * | |
730 | * Decrements the number of active connections. Callee is expected | |
731 | * to synchronize calling this API with get_conn_count. | |
732 | */ | |
733 | static __always_inline void | |
734 | put_conn_count(struct micscif_dev *dev) | |
735 | { | |
736 | dev->num_active_conn--; | |
737 | BUG_ON(dev->num_active_conn < 0); | |
738 | } | |
739 | ||
740 | /* | |
741 | * get_kref_count: | |
742 | * epd: SCIF endpoint | |
743 | * | |
744 | * Increments kmod endpoint reference count. Callee is expected | |
745 | * to synchronize calling this API with put_kref_count. | |
746 | */ | |
747 | static __always_inline void | |
748 | get_kref_count(scif_epd_t epd) | |
749 | { | |
750 | kref_get(&(epd->ref_count)); | |
751 | } | |
752 | ||
753 | /* | |
754 | * put_kref_count: | |
755 | * epd: SCIF endpoint | |
756 | * | |
757 | * Decrements kmod endpoint reference count. Callee is expected | |
758 | * to synchronize calling this API with get_kref_count. | |
759 | */ | |
760 | static __always_inline void | |
761 | put_kref_count(scif_epd_t epd) | |
762 | { | |
763 | kref_put(&(epd->ref_count), scif_ref_rel); | |
764 | } | |
765 | ||
766 | /* | |
767 | * is_scifdev_alive: | |
768 | * @dev: The remote SCIF Device | |
769 | * | |
770 | * Returns true if the remote SCIF Device is running or sleeping for | |
771 | * this endpoint. | |
772 | */ | |
773 | static inline int scifdev_alive(struct endpt *ep) | |
774 | { | |
775 | return (((SCIFDEV_RUNNING == ep->remote_dev->sd_state) || | |
776 | (SCIFDEV_SLEEPING == ep->remote_dev->sd_state)) && | |
777 | SCIFDEV_RUNNING == ep->sd_state); | |
778 | } | |
779 | ||
780 | /* | |
781 | * verify_epd: | |
782 | * ep: SCIF endpoint | |
783 | * | |
784 | * Checks several generic error conditions and returns the | |
785 | * appropiate error. | |
786 | */ | |
787 | static inline int verify_epd(struct endpt *ep) | |
788 | { | |
789 | if (ep->state == SCIFEP_DISCONNECTED) | |
790 | return -ECONNRESET; | |
791 | ||
792 | if (ep->state != SCIFEP_CONNECTED) | |
793 | return -ENOTCONN; | |
794 | ||
795 | if (!scifdev_alive(ep)) | |
796 | return -ENODEV; | |
797 | ||
798 | return 0; | |
799 | } | |
800 | ||
801 | /** | |
802 | * scif_invalidate_ep() - Set remote SCIF device state for all connected | |
803 | * and disconnected endpoints for a particular node to SCIFDEV_STOPPED, | |
804 | * change endpoint state to disconnected and wake up all send/recv/con | |
805 | * waitqueues. | |
806 | */ | |
807 | static inline void | |
808 | scif_invalidate_ep(int node) | |
809 | { | |
810 | struct endpt *ep; | |
811 | unsigned long sflags; | |
812 | struct list_head *pos, *tmpq; | |
813 | ||
814 | spin_lock_irqsave(&ms_info.mi_connlock, sflags); | |
815 | list_for_each_safe(pos, tmpq, &ms_info.mi_disconnected) { | |
816 | ep = list_entry(pos, struct endpt, list); | |
817 | if (ep->remote_dev->sd_node == node) { | |
818 | spin_lock(&ep->lock); | |
819 | ep->sd_state = SCIFDEV_STOPPED; | |
820 | spin_unlock(&ep->lock); | |
821 | } | |
822 | } | |
823 | list_for_each_safe(pos, tmpq, &ms_info.mi_connected) { | |
824 | ep = list_entry(pos, struct endpt, list); | |
825 | if (ep->remote_dev->sd_node == node) { | |
826 | list_del(pos); | |
827 | put_conn_count(ep->remote_dev); | |
828 | spin_lock(&ep->lock); | |
829 | ep->state = SCIFEP_DISCONNECTED; | |
830 | list_add_tail(&ep->list, &ms_info.mi_disconnected); | |
831 | ep->sd_state = SCIFDEV_STOPPED; | |
832 | wake_up_interruptible(&ep->sendwq); | |
833 | wake_up_interruptible(&ep->recvwq); | |
834 | wake_up_interruptible(&ep->conwq); | |
835 | spin_unlock(&ep->lock); | |
836 | } | |
837 | } | |
838 | spin_unlock_irqrestore(&ms_info.mi_connlock, sflags); | |
839 | flush_workqueue(ms_info.mi_conn_wq); | |
840 | } | |
841 | ||
842 | /* | |
843 | * Only Debug Functions Below | |
844 | */ | |
845 | #define SCIF_CRUMB pr_debug("%s %d\n", __func__, __LINE__) | |
846 | ||
847 | static inline void | |
848 | micscif_display_all_zombie_ep(void) | |
849 | { | |
850 | struct list_head *pos, *tmpq; | |
851 | unsigned long sflags; | |
852 | struct endpt *ep; | |
853 | ||
854 | pr_debug("Zombie Info Start\n"); | |
855 | spin_lock_irqsave(&ms_info.mi_eplock, sflags); | |
856 | list_for_each_safe(pos, tmpq, &ms_info.mi_zombie) { | |
857 | ep = list_entry(pos, struct endpt, list); | |
858 | if (!list_empty(&ep->rma_info.reg_list)) | |
859 | micscif_display_all_windows(&ep->rma_info.reg_list); | |
860 | if (!list_empty(&ep->rma_info.remote_reg_list)) | |
861 | micscif_display_all_windows( | |
862 | &ep->rma_info.remote_reg_list); | |
863 | } | |
864 | spin_unlock_irqrestore(&ms_info.mi_eplock, sflags); | |
865 | pr_debug("Zombie Info End\n"); | |
866 | } | |
867 | ||
868 | static inline void dump_ep(scif_epd_t epd, const char *func, int line) | |
869 | { | |
870 | struct endpt *ep = (struct endpt *)epd; | |
871 | pr_debug("%s %d state %d lock %p port.node 0x%x" | |
872 | "port.port 0x%x peer.node 0x%x peer.port 0x%x backlog %d qp %p" | |
873 | "qp_offset 0x%llx cnct_gnt_payload 0x%llx remote_dev %p\n", | |
874 | func, line, ep->state, &ep->lock, ep->port.node, | |
875 | ep->port.port, ep->peer.node, ep->peer.port, ep->backlog, | |
876 | ep->qp_info.qp, ep->qp_info.qp_offset, | |
877 | ep->qp_info.cnct_gnt_payload, ep->remote_dev); | |
878 | } | |
879 | ||
880 | static inline void dump_qp(volatile struct micscif_qp *qp, const char *func, int line) | |
881 | { | |
882 | pr_debug("%s %d qp %p local_buf 0x%llx" | |
883 | " local_qp 0x%llx remote_buf 0x%llx remote_qp %p ep 0x%llx\n", | |
884 | func, line, qp, qp->local_buf, | |
885 | qp->local_qp, qp->remote_buf, qp->remote_qp, qp->ep); | |
886 | } | |
887 | ||
888 | static inline void dump_rb(struct micscif_rb *rb, const char *func, int line) | |
889 | { | |
890 | pr_debug("%s %d rb %p rb_base %p *read_ptr 0x%x" | |
891 | " *write_ptr 0x%x size 0x%x" | |
892 | " cro 0x%x cwo 0x%x ocro 0x%x ocwo 0x%x\n", | |
893 | func, line, rb, rb->rb_base, *rb->read_ptr, | |
894 | *rb->write_ptr, rb->size, rb->current_read_offset, | |
895 | rb->current_write_offset, | |
896 | rb->old_current_read_offset, | |
897 | rb->old_current_write_offset); | |
898 | } | |
899 | ||
900 | #endif /* MICSCIF_H */ |