Updated `README.md` with instructions for building/using the kernel module.
[xeon-phi-kernel-module] / include / scif.h
CommitLineData
800f879a
AT
1/*
2 * Copyright 2010-2017 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2,
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * Disclaimer: The codes contained in these modules may be specific to
14 * the Intel Software Development Platform codenamed Knights Ferry,
15 * and the Intel product codenamed Knights Corner, and are not backward
16 * compatible with other Intel products. Additionally, Intel will NOT
17 * support the codes or instruction set in future products.
18 *
19 * Intel offers no warranty of any kind regarding the code. This code is
20 * licensed on an "AS IS" basis and Intel is not obligated to provide
21 * any support, assistance, installation, training, or other services
22 * of any kind. Intel is also not obligated to provide any updates,
23 * enhancements or extensions. Intel specifically disclaims any warranty
24 * of merchantability, non-infringement, fitness for any particular
25 * purpose, and any other warranty.
26 *
27 * Further, Intel disclaims all liability of any kind, including but
28 * not limited to liability for infringement of any proprietary rights,
29 * relating to the use of the code, even if Intel is notified of the
30 * possibility of such liability. Except as expressly stated in an Intel
31 * license agreement provided with this code and agreed upon with Intel,
32 * no license, express or implied, by estoppel or otherwise, to any
33 * intellectual property rights is granted herein.
34 */
35
36/*
37 * Revised 15:05 11/24/2010
38 * Derived from SCIF SAS v0.41 with additional corrections
39 */
40
41#ifndef __SCIF_H__
42#define __SCIF_H__
43
44#include <linux/types.h>
45#include <linux/errno.h>
46#include <linux/poll.h>
47#include <linux/pci.h>
48
49#ifdef __cplusplus
50extern "C" {
51#endif
52
53#define SCIF_ACCEPT_SYNC 1
54#define SCIF_SEND_BLOCK 1
55#define SCIF_RECV_BLOCK 1
56
57/* Start: Deprecated Temporary definition for compatability */
58#define ACCEPT_SYNC SCIF_ACCEPT_SYNC
59#define SEND_BLOCK SCIF_SEND_BLOCK
60#define RECV_BLOCK SCIF_RECV_BLOCK
61/* End: Deprecated Temporary definition for compatability */
62
63enum {
64 SCIF_PROT_READ = (1<<0),
65 SCIF_PROT_WRITE = (1<<1)
66};
67
68/* 0x40 is used internally by scif */
69enum {
70 SCIF_MAP_FIXED = 0x10,
71 SCIF_MAP_KERNEL = 0x20,
72};
73
74enum {
75 SCIF_FENCE_INIT_SELF = (1<<0),
76 SCIF_FENCE_INIT_PEER = (1<<1)
77};
78
79enum {
80 SCIF_FENCE_RAS_SELF = (1<<2),
81 SCIF_FENCE_RAS_PEER = (1<<3)
82};
83
84enum {
85 SCIF_SIGNAL_LOCAL = (1<<4),
86 SCIF_SIGNAL_REMOTE = (1<<5)
87};
88
89#define SCIF_RMA_USECPU 1
90#define SCIF_RMA_USECACHE (1<<1)
91#define SCIF_RMA_SYNC (1<<2)
92#define SCIF_RMA_ORDERED (1<<3)
93//! @cond (Prevent doxygen from including these)
94#define SCIF_POLLIN POLLIN
95#define SCIF_POLLOUT POLLOUT
96#define SCIF_POLLERR POLLERR
97#define SCIF_POLLHUP POLLHUP
98#define SCIF_POLLNVAL POLLNVAL
99
100/* SCIF Reserved Ports */
101/* COI */
102#define SCIF_COI_PORT_0 40
103#define SCIF_COI_PORT_1 41
104#define SCIF_COI_PORT_2 42
105#define SCIF_COI_PORT_3 43
106#define SCIF_COI_PORT_4 44
107#define SCIF_COI_PORT_5 45
108#define SCIF_COI_PORT_6 46
109#define SCIF_COI_PORT_7 47
110#define SCIF_COI_PORT_8 48
111#define SCIF_COI_PORT_9 49
112
113/* OFED */
114#define SCIF_OFED_PORT_0 60
115#define SCIF_OFED_PORT_1 61
116#define SCIF_OFED_PORT_2 62
117#define SCIF_OFED_PORT_3 63
118#define SCIF_OFED_PORT_4 64
119#define SCIF_OFED_PORT_5 65
120#define SCIF_OFED_PORT_6 66
121#define SCIF_OFED_PORT_7 67
122#define SCIF_OFED_PORT_8 68
123#define SCIF_OFED_PORT_9 69
124
125/* NETDEV */
126#define SCIF_NETDEV_PORT_0 80
127#define SCIF_NETDEV_PORT_1 81
128#define SCIF_NETDEV_PORT_2 82
129#define SCIF_NETDEV_PORT_3 83
130#define SCIF_NETDEV_PORT_4 84
131#define SCIF_NETDEV_PORT_5 85
132#define SCIF_NETDEV_PORT_6 86
133#define SCIF_NETDEV_PORT_7 87
134#define SCIF_NETDEV_PORT_8 88
135#define SCIF_NETDEV_PORT_9 89
136
137/* RAS */
138#define SCIF_RAS_PORT_0 100
139#define SCIF_RAS_PORT_1 101
140#define SCIF_RAS_PORT_2 102
141#define SCIF_RAS_PORT_3 103
142#define SCIF_RAS_PORT_4 104
143#define SCIF_RAS_PORT_5 105
144#define SCIF_RAS_PORT_6 106
145#define SCIF_RAS_PORT_7 107
146#define SCIF_RAS_PORT_8 108
147#define SCIF_RAS_PORT_9 109
148
149/* Power Management */
150#define SCIF_PM_PORT_0 120
151#define SCIF_PM_PORT_1 121
152#define SCIF_PM_PORT_2 122
153#define SCIF_PM_PORT_3 123
154#define SCIF_PM_PORT_4 124
155#define SCIF_PM_PORT_5 125
156#define SCIF_PM_PORT_6 126
157#define SCIF_PM_PORT_7 127
158#define SCIF_PM_PORT_8 128
159#define SCIF_PM_PORT_9 129
160
161/* Board Tools */
162#define SCIF_BT_PORT_0 130
163#define SCIF_BT_PORT_1 131
164#define SCIF_BT_PORT_2 132
165#define SCIF_BT_PORT_3 133
166#define SCIF_BT_PORT_4 134
167#define SCIF_BT_PORT_5 135
168#define SCIF_BT_PORT_6 136
169#define SCIF_BT_PORT_7 137
170#define SCIF_BT_PORT_8 138
171#define SCIF_BT_PORT_9 139
172
173/* MIC Boot/Configuration support */
174#define MPSSD_MONRECV 160
175#define MIC_NOTIFY 161
176#define MPSSD_CRED 162
177#define MPSSD_MONSEND 163
178#define MPSSD_MICCTRL 164
179#define MPSSD_RESV5 165
180#define MPSSD_RESV6 166
181#define MPSSD_RESV7 167
182#define MPSSD_RESV8 168
183#define MPSSD_RESV9 169
184
185#define SCIF_ADMIN_PORT_END 1024
186
187/* MYO */
188#define SCIF_MYO_PORT_0 1025
189#define SCIF_MYO_PORT_1 1026
190#define SCIF_MYO_PORT_2 1027
191#define SCIF_MYO_PORT_3 1028
192#define SCIF_MYO_PORT_4 1029
193#define SCIF_MYO_PORT_5 1030
194#define SCIF_MYO_PORT_6 1031
195#define SCIF_MYO_PORT_7 1032
196#define SCIF_MYO_PORT_8 1033
197#define SCIF_MYO_PORT_9 1034
198
199/* SSG Tools */
200#define SCIF_ST_PORT_0 1044
201#define SCIF_ST_PORT_1 1045
202#define SCIF_ST_PORT_2 1046
203#define SCIF_ST_PORT_3 1047
204#define SCIF_ST_PORT_4 1048
205#define SCIF_ST_PORT_5 1049
206#define SCIF_ST_PORT_6 1050
207#define SCIF_ST_PORT_7 1051
208#define SCIF_ST_PORT_8 1052
209#define SCIF_ST_PORT_9 1053
210
211/* End of SCIF Reserved Ports */
212#define SCIF_PORT_RSVD 1088
213//! @endcond
214
215typedef struct endpt *scif_epd_t;
216
217typedef struct scif_pinned_pages *scif_pinned_pages_t;
218
219struct scif_range {
220 void *cookie; /* cookie */
221 int nr_pages; /* Number of Pages */
222 int prot_flags; /* R/W protection */
223 /* Arrays phys_addr/va below are virtually contiguous */
224 dma_addr_t *phys_addr; /* Array of physical addresses */
225 void **va; /* Array of virtual addresses
226 * and populated only when called
227 * on the host for a remote SCIF
228 * connection on MIC.
229 */
230};
231
232struct scif_pollepd {
233 scif_epd_t epd; /* endpoint descriptor */
234 short events; /* requested events */
235 short revents; /* returned events */
236};
237enum scif_event_type {
238 SCIF_NODE_ADDED = 1<<0,
239 SCIF_NODE_REMOVED = 1<<1
240};
241
242union eventd {
243 uint16_t scif_node_added;
244 uint16_t scif_node_removed;
245};
246
247typedef void (*scif_callback_t)(enum scif_event_type event, union eventd
248data);
249
250struct scif_callback {
251 struct list_head list_member;
252 scif_callback_t callback_handler;
253};
254
255#define SCIF_OPEN_FAILED ((scif_epd_t)-1)
256#define SCIF_REGISTER_FAILED ((off_t)-1)
257#define SCIF_MMAP_FAILED ((void *)-1)
258
259struct scif_portID {
260 uint16_t node; /* node on which port resides */
261 uint16_t port; /* Local port number */
262};
263
264/* Start: Deprecated Temporary definition for compatability */
265#define portID scif_portID
266typedef struct portID portID_t;
267/* End: Deprecated Temporary definition for compatability */
268
269/**
270 * scif_open - Create an endpoint
271 *
272 *\return
273 * The scif_open() function creates a new endpoint.
274 *
275 * Upon successful completion, scif_open() returns an endpoint descriptor to
276 * be used in subsequent SCIF functions calls to refer to that endpoint;
277 * otherwise: in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is
278 * returned and errno is set to indicate the error; in kernel mode a NULL
279 * scif_epd_t is returned.
280 *
281 *\par Errors:
282 *- ENOMEM
283 * - Insufficient kernel memory was available.
284 *- ENXIO
285 * - Version mismatch between micscif driver and libscif.
286 */
287scif_epd_t scif_open(void);
288
289/**
290 * scif _bind - Bind an endpoint to a port
291 * \param epd endpoint descriptor
292 * \param pn port number
293 *
294 * scif_bind() binds endpoint epd to port pn, where pn is a port number on the
295 * local node. If pn is zero, a port number greater than or equal to
296 * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to
297 * exactly one local port. Ports less than 1024 when requested can only be bound
298 * by system (or root) processes or by processes executed by privileged users.
299 *
300 *\return
301 * Upon successful completion, scif_bind() returns the port number to which epd
302 * is bound; otherwise: in user mode -1 is returned and errno is set to
303 * indicate the error; in kernel mode the negative of one of the following
304 * errors is returned.
305 *
306 *\par Errors:
307 *- EBADF
308 * - epd is not a valid endpoint descriptor
309 *- EINVAL
310 * - epd is not a valid endpoint descriptor, or
311 * - The endpoint or the port are already bound.
312 *- EISCONN
313 * - The endpoint is already connected.
314 *- ENOSPC
315 * - No port number available for assignment (when pn==0).
316 *- ENOTTY
317 * - epd is not a valid endpoint descriptor
318 *- EACCES
319 * - The port requested is protected and the user is not the superuser.
320*/
321int scif_bind(scif_epd_t epd, uint16_t pn);
322
323/**
324 * scif_listen - Listen for connections on an endpoint
325 *
326 * \param epd endpoint descriptor
327 * \param backlog maximum pending connection requests
328 *
329 * scif_listen() marks the endpoint epd as a listening endpoint - that is, as
330 * an endpoint that will be used to accept incoming connection requests. Once
331 * so marked, the endpoint is said to be in the listening state and may not be
332 * used as the endpoint of a connection.
333 *
334 * The endpoint, epd, must have been bound to a port.
335 *
336 * The backlog argument defines the maximum length to which the queue of
337 * pending connections for epd may grow. If a connection request arrives when
338 * the queue is full, the client may receive an error with an indication that
339 * the connection was refused.
340 *
341 *\return
342 * Upon successful completion, scif_listen() returns 0; otherwise: in user mode
343 * -1 is returned and errno is set to indicate the error; in kernel mode the
344 * negative of one of the following errors is returned.
345 *
346 *\par Errors:
347 *- EBADF
348 * - epd is not a valid endpoint descriptor
349 *- EINVAL
350 * - epd is not a valid endpoint descriptor, or
351 * - The endpoint is not bound to a port
352 *- EISCONN
353 * - The endpoint is already connected or listening
354 *- ENOTTY
355 * - epd is not a valid endpoint descriptor
356*/
357int scif_listen(scif_epd_t epd, int backlog);
358
359/**
360 * scif_connect - Initiate a connection on a port
361 * \param epd endpoint descriptor
362 * \param dst global id of port to which to connect
363 *
364 * The scif_connect() function requests the connection of endpoint epd to remote
365 * port dst. If the connection is successful, a peer endpoint, bound to dst, is
366 * created on node dst.node. On successful return, the connection is complete.
367 *
368 * If the endpoint epd has not already been bound to a port, scif_connect()
369 * will bind it to an unused local port.
370 *
371 * A connection is terminated when an endpoint of the connection is closed,
372 * either explicitly by scif_close(), or when a process that owns one of the
373 * endpoints of a connection is terminated.
374 *
375 *\return
376 * Upon successful completion, scif_connect() returns the port ID to which the
377 * endpoint, epd, is bound; otherwise: in user mode -1 is returned and errno is
378 * set to indicate the error; in kernel mode the negative of one of the
379 * following errors is returned.
380 *
381 *\par Errors:
382 *- EBADF
383 * - epd is not a valid endpoint descriptor
384 *- ECONNREFUSED
385 * - The destination was not listening for connections or refused the
386 * connection request.
387 *- EINTR
388 * - Interrupted function
389 *- EINVAL
390 * - epd is not a valid endpoint descriptor, or
391 * - dst.port is not a valid port ID
392 *- EISCONN
393 * - The endpoint is already connected
394 *- ENOBUFS
395 * - No buffer space is available
396 *- ENODEV
397 * - The destination node does not exist, or
398 * - The node is lost.
399 *- ENOSPC
400 * - No port number available for assignment (when pn==0).
401 *- ENOTTY
402 * - epd is not a valid endpoint descriptor
403 *- EOPNOTSUPP
404 * - The endpoint is listening and cannot be connected
405*/
406int scif_connect(scif_epd_t epd, struct scif_portID *dst);
407
408/**
409 * scif_accept - Accept a connection on an endpoint
410 * \param epd endpoint descriptor
411 * \param peer global id of port to which connected
412 * \param newepd new connected endpoint descriptor
413 * \param flags flags
414 *
415 * The scif_accept() call extracts the first connection request on the queue of
416 * pending connections for the port on which epd is listening. scif_accept()
417 * creates a new endpoint, bound to the same port as epd, and allocates a new
418 * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new
419 * endpoint is connected to the endpoint through which the connection was
420 * requested. epd is unaffected by this call, and remains in the listening
421 * state.
422 *
423 * On successful return, peer holds the global port identifier (node id and
424 * local port number) of the port which requested the connection.
425 *
426 * If the peer endpoint which requested the connection is closed, the endpoint
427 * returned by scif_accept() is closed.
428 *
429 * The number of connections that can (subsequently) be accepted on epd is only
430 * limited by system resources (memory).
431 *
432 * The flags argument is formed by OR'ing together zero or more of the
433 * following values:
434 *- SCIF_ACCEPT_SYNC: block until a connection request is presented. If
435 * SCIF_ACCEPT_SYNC is not in flags, and no pending
436 * connections are present on the queue, scif_accept()fails
437 * with an EAGAIN error
438 *
439 * On Linux in user mode, the select() and poll() functions can be used to
440 * determine when there is a connection request. On Microsoft Windows* and on
441 * Linux in kernel mode, the scif_poll() function may be used for this purpose.
442 * A readable event will be delivered when a connection is requested.
443 *
444 *\return
445 * Upon successful completion, scif_accept() returns 0; otherwise: in user mode
446 * -1 is returned and errno is set to indicate the error; in kernel mode the
447 * negative of one of the following errors is returned.
448 *
449 *\par Errors:
450 *- EAGAIN
451 * - SCIF_ACCEPT_SYNC is not set and no connections are present to be accepted, or
452 * - SCIF_ACCEPT_SYNC is not set and remote node failed to complete its
453 * connection request
454 *- EBADF
455 * - epd is not a valid endpoint descriptor
456 *- EINTR
457 * - Interrupted function
458 *- EINVAL
459 * - epd is not a valid endpoint descriptor, or
460 * - epd is not a listening endpoint
461 * - flags is invalid
462 * - peer is NULL
463 * - newepd is NULL
464 *- ENOBUFS
465 * - No buffer space is available
466 *- ENODEV
467 * - The requesting node is lost.
468 *- ENOMEM
469 * - Not enough space
470 *- ENOTTY
471 * - epd is not a valid endpoint descriptor
472 *- ENOENT
473 * - Secondary part of epd registeration failed.
474*/
475int scif_accept(scif_epd_t epd, struct scif_portID *peer, scif_epd_t
476*newepd, int flags);
477
478/**
479 * scif_close - Close an endpoint
480 * \param epd endpoint descriptor
481 *
482 * scif_close() closes an endpoint and performs necessary teardown of
483 * facilities associated with that endpoint.
484 *
485 * If epd is a listening endpoint then it will no longer accept connection
486 * requests on the port to which it is bound. Any pending connection requests
487 * are rejected.
488 *
489 * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs
490 * which are in-process through epd or its peer endpoint will complete before
491 * scif_close() returns. Registered windows of the local and peer endpoints are
492 * released as if scif_unregister() was called against each window.
493 *
494 * Closing an endpoint does not affect mappings to remote memory. These remain
495 * until explicitly removed by calling scif_munmap().
496 *
497 * If the peer endpoint's receive queue is not empty at the time that epd is
498 * closed, then the peer endpoint can be passed as the endpoint parameter to
499 * scif_recv() until the receive queue is empty.
500 *
501 * If epd is bound to a port, then the port is returned to the pool of
502 * available ports.
503 *
504 * epd is freed and may no longer be accessed.
505 *
506 *\return
507 * Upon successful completion, scif_close() returns 0; otherwise: in user mode
508 * -1 is returned and errno is set to indicate the error; in kernel mode the
509 * negative of one of the following errors is returned.
510 *
511 *\par Errors:
512 *- EBADF
513 * - epd is not a valid endpoint descriptor
514 *- EINVAL
515 * - epd is not a valid endpoint descriptor
516 */
517int scif_close(scif_epd_t epd);
518
519/**
520 * scif_send - Send a message
521 * \param epd endpoint descriptor
522 * \param msg message buffer address
523 * \param len message length
524 * \param flags blocking mode flags
525 *
526 * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data
527 * are copied from memory starting at address msg. On successful execution the
528 * return value of scif_send() is the number of bytes that were sent, and is
529 * zero if no bytes were sent because len was zero. scif_send() may be called
530 * only when the endpoint is in a connected state.
531 *
532 * If a scif_send() call is non-blocking, then it sends only those bytes which
533 * can be sent without waiting, up to a maximum of len bytes.
534 *
535 * If a scif_send() call is blocking, then it normally returns after sending
536 * all len bytes. If a blocking call is interrupted or the connection is
537 * forcibly closed, the call is considered successful if some bytes were sent
538 * or len is zero, otherwise the call is considered unsuccessful.
539 *
540 * On Linux in user mode, the select() and poll() functions can be used to
541 * determine when the send queue is not full. On Microsoft Windows* and on
542 * Linux in kernel mode, the scif_poll() function may be used for this purpose.
543 *
544 * It is recommended that scif_send()/scif_recv() only be used for short
545 * control-type message communication between SCIF endpoints. The SCIF RMA
546 * APIs are expected to provide better performance for transfer sizes of
547 * 1024 bytes or longer.
548 *
549 * The flags argument is formed by ORing together zero or more of the following
550 * values:
551 *- SCIF_SEND_BLOCK: block until the entire message is sent.
552 *
553 *\return
554 * Upon successful completion, scif_send() returns the number of bytes sent;
555 * otherwise: in user mode -1 is returned and errno is set to indicate the
556 * error; in kernel mode the negative of one of the following errors is
557 * returned.
558 *
559 *\par Errors:
560 *- EBADF
561 * - epd is not a valid endpoint descriptor
562 *- ECONNRESET
563 * - A connection was forcibly closed by a peer.
564 *- EFAULT
565 * - An invalid address was specified for a parameter.
566 *- EINTR
567 * - epd was closed by scif_close()
568 *- EINVAL
569 * - epd is not a valid endpoint descriptor, or
570 * - flags is invalid
571 * - len is negative
572 *- ENODEV
573 * - The remote node is lost.
574 *- ENOMEM
575 * - Not enough space
576 *- ENOTCONN
577 * - The endpoint is not connected
578 *- ENOTTY
579 * - epd is not a valid endpoint descriptor
580 */
581int scif_send(scif_epd_t epd, void *msg, int len, int flags);
582
583/**
584 * scif_recv - Receive a message
585 * \param epd endpoint descriptor
586 * \param msg message buffer address
587 * \param len message buffer length
588 * \param flags blocking mode flags
589 *
590 * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of
591 * data are copied to memory starting at address msg. On successful execution
592 * the return value of scif_recv() is the number of bytes that were received,
593 * and is zero if no bytes were received because len was zero. scif_recv() may
594 * be called only when the endpoint is in a connected state.
595 *
596 * If a scif_recv() call is non-blocking, then it receives only those bytes
597 * which can be received without waiting, up to a maximum of len bytes.
598 *
599 * If a scif_recv() call is blocking, then it normally returns after receiving
600 * all len bytes. If a blocking call is interrupted or the connection is
601 * forcibly closed, the call is considered successful if some bytes were
602 * received or len is zero, otherwise the call is considered unsuccessful;
603 * subsequent calls to scif_recv() will successfully receive all data sent
604 * through peer endpoint interruption or the connection was forcibly closed.
605 *
606 * On Linux in user mode, the select() and poll() functions can be used to
607 * determine when data is available to be received. On Microsoft Windows* and
608 * on Linux in kernel mode, the scif_poll() function may be used for this
609 * purpose.
610 *
611 * It is recommended that scif_send()/scif_recv() only be used for short
612 * control-type message communication between SCIF endpoints. The SCIF RMA
613 * APIs are expected to provide better performance for transfer sizes of
614 * 1024 bytes or longer.
615 *
616 * The flags argument is formed by ORing together zero or more of the following
617 * values:
618 *- SCIF_RECV_BLOCK: block until the entire message is received.
619 *
620 *\return
621 * Upon successful completion, scif_recv() returns the number of bytes
622 * received; otherwise: in user mode -1 is returned and errno is set to
623 * indicate the error; in kernel mode the negative of one of the following
624 * errors is returned.
625 *
626 *\par Errors:
627 *- EAGAIN
628 * - The destination node is returning from a low power state.
629 *- EBADF
630 * - epd is not a valid endpoint descriptor .
631 *- ECONNRESET
632 * - A connection was forcibly closed by a peer.
633 *- EFAULT
634 * - An invalid address was specified for a parameter.
635 *- EINVAL
636 * - epd is not a valid endpoint descriptor, or
637 * - flags is invalid, or
638 * - len is negative.
639 *- ENODEV
640 * - The remote node is lost.
641 *- ENOMEM
642 * - Not enough space.
643 *- ENOTCONN
644 * - The endpoint is not connected.
645 *- ENOTTY
646 * - epd is not a valid endpoint descriptor
647 */
648int scif_recv(scif_epd_t epd, void *msg, int len, int flags);
649
650/**
651 * scif_register - Mark a memory region for remote access.
652 * \param epd endpoint descriptor
653 * \param addr starting virtual address
654 * \param len length of range
655 * \param offset offset of window
656 * \param prot_flags read/write protection flags
657 * \param map_flags mapping flags
658 *
659 * The scif_register() function opens a window, a range of whole pages of the
660 * registered address space of the endpoint epd, starting at offset po and
661 * continuing for len bytes. The value of po, further described below, is a
662 * function of the parameters offset and len, and the value of map_flags. Each
663 * page of the window represents the physical memory page which backs the
664 * corresponding page of the range of virtual address pages starting at addr
665 * and continuing for len bytes. addr and len are constrained to be multiples
666 * of the page size. addr is interpreted as a user space address. A successful
667 * scif_register() call returns po as the return value.
668 *
669 * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
670 * exactly, and offset is constrained to be a multiple of the page size. The
671 * mapping established by scif_register() will not replace any existing
672 * registration; an error is returned if any page within the range [offset,
673 * offset+len-1] intersects an existing window.
674 * Note: When SCIF_MAP_FIXED is set the current implementation limits
675 * offset to the range [0..2^62-1] and returns EADDRINUSE if the offset
676 * requested with SCIF_MAP_FIXED is in the range [2^62..2^63-1].
677 *
678 * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
679 * implementation-defined manner to arrive at po. The po value so chosen will
680 * be an area of the registered address space that the implementation deems
681 * suitable for a mapping of len bytes. An offset value of 0 is interpreted as
682 * granting the implementation complete freedom in selecting po, subject to
683 * constraints described below. A non-zero value of offset is taken to be a
684 * suggestion of an offset near which the mapping should be placed. When the
685 * implementation selects a value for po, it does not replace any extant
686 * window. In all cases, po will be a multiple of the page size.
687 *
688 * The physical pages which are so represented by a window are available for
689 * access in calls to scif_mmap(), scif_readfrom(), scif_writeto(),
690 * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
691 * physical pages represented by the window will not be reused by the memory
692 * subsystem for any other purpose. Note that the same physical page may be
693 * represented by multiple windows.
694 *
695 * Subsequent operations which change the memory pages to which virtual
696 * addresses are mapped (such as mmap(), munmap(), scif_mmap() and
697 * scif_munmap()) have no effect on existing windows.
698 *
699 * On Linux, if the process will fork(), it is recommended that the registered
700 * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
701 * problems due to copy-on-write semantics.
702 *
703 * The prot_flags argument is formed by OR'ing together one or more of the
704 * following values:
705 *- SCIF_PROT_READ: allow read operations from the window
706 *- SCIF_PROT_WRITE: allow write operations to the window
707 *
708 * The map_flags argument is formed by OR'ing together zero or more of
709 * the following values:
710 *- SCIF_MAP_FIXED: interpret offset exactly
711 *
712 *\return
713 * Upon successful completion, scif_register() returns the offset at which the
714 * mapping was placed (po); otherwise: in user mode SCIF_REGISTER_FAILED (that
715 * is (off_t *)-1) is returned and errno is set to indicate the error; in
716 * kernel mode the negative of one of the following errors is returned.
717 *
718 *\par Errors:
719 *- EADDRINUSE
720 * - SCIF_MAP_FIXED is set in map_flags, and pages in the range [offset,
721 * offset+len-1] are already registered
722 *- EAGAIN
723 * - The mapping could not be performed due to lack of resources
724 *- EBADF
725 * - epd is not a valid endpoint descriptor
726 *- ECONNRESET
727 * - A connection was forcibly closed by a peer.
728 *- EFAULT
729 * - Addresses in the range [addr , addr + len - 1] are invalid
730 *- EINVAL
731 * - epd is not a valid endpoint descriptor, or
732 * - map_flags is invalid, or
733 * - prot_flags is invalid, or
734 * - SCIF_MAP_FIXED is set in flags, and offset is not a multiple of
735 * the page size, or
736 * - addr is not a multiple of the page size, or
737 * - len is not a multiple of the page size, or is 0, or
738 * - offset is negative
739 *- ENODEV
740 * - The remote node is lost.
741 *- ENOMEM
742 * - Not enough space
743 *- ENOTCONN
744 * - The endpoint is not connected
745 *- ENOTTY
746 * - epd is not a valid endpoint descriptor
747 */
748off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
749int prot_flags, int map_flags);
750
751/**
752 * scif_unregister - Mark a memory region for remote access.
753 * \param epd endpoint descriptor
754 * \param offset start of range to unregister
755 * \param len length of range to unregister
756 *
757 * The scif_unregister() function closes those previously registered windows
758 * which are entirely within the range [offset,offset+len-1]. It is an error to
759 * specify a range which intersects only a subrange of a window.
760 *
761 * On a successful return, pages within the window may no longer be specified
762 * in calls to scif_mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(),
763 * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, however,
764 * continues to exist until all previous references against it are removed. A
765 * window is referenced if there is a mapping to it created by scif_mmap(), or if
766 * scif_get_pages() was called against the window (and the pages have not been
767 * returned via scif_put_pages()). A window is also referenced while an RMA, in
768 * which some range of the window is a source or destination, is in progress.
769 * Finally a window is referenced while some offset in that window was specified
770 * to scif_fence_signal(), and the RMAs marked by that call to
771 * scif_fence_signal() have not completed. While a window is in this state, its
772 * registered address space pages are not available for use in a new registered
773 * window.
774 *
775 * When all such references to the window have been removed, its references to
776 * all the physical pages which it represents are removed. Similarly, the
777 * registered address space pages of the window become available for
778 * registration in a new window.
779 *
780 *\return
781 * Upon successful completion, scif_unregister() returns 0; otherwise: in user
782 * mode -1 is returned and errno is set to indicate the error; in kernel mode
783 * the negative of one of the following errors is returned. In the event of an
784 * error, no windows are unregistered.
785 *
786 *\par Errors:
787 *- EBADF
788 * - epd is not a valid endpoint descriptor
789 *- ECONNRESET
790 * - A connection was forcibly closed by a peer.
791 *- EINVAL
792 * - epd is not a valid endpoint descriptor, or
793 * - The range [offset,offset+len-1] intersects a subrange of a window, or
794 * - offset is negative
795 *- ENODEV
796 * -The remote node is lost.
797 *- ENOTCONN
798 * - The endpoint is not connected
799 *- ENOTTY
800 * - epd is not a valid endpoint descriptor
801 *- ENXIO
802 * - Addresses in the range [offset,offset+len-1] are invalid for the
803 * registered address space of epd.
804 */
805int scif_unregister(scif_epd_t epd, off_t offset, size_t len);
806
807
808/**
809 * scif_readfrom - Copy from a remote address space
810 * \param epd endpoint descriptor
811 * \param loffset offset in local registered address space to
812 * which to copy
813 * \param len length of range to copy
814 * \param roffset offset in remote registered address space
815 * from which to copy
816 * \param rma_flags transfer mode flags
817 *
818 * scif_readfrom() copies len bytes from the remote registered address space of
819 * the peer of endpoint epd, starting at the offset roffset to the local
820 * registered address space of epd, starting at the offset loffset.
821 *
822 * Each of the specified ranges [loffset,loffset+len-1] and [roffset,roffset+
823 * len-1] must be within some registered window or windows of the local and
824 * remote nodes respectively. A range may intersect multiple registered
825 * windows, but only if those windows are contiguous in the registered address
826 * space.
827 *
828 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
829 * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
830 * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the
831 * transfer is complete. Otherwise, the transfer may be performed asynchron-
832 * ously. The order in which any two aynchronous RMA operations complete
833 * is non-deterministic. The synchronization functions, scif_fence_mark()/
834 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
835 * the completion of asynchronous RMA operations.
836 *
837 * The DMA transfer of individual bytes is not guaranteed to complete in
838 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
839 * cacheline or partial cacheline of the source range will become visible on
840 * the destination node after all other transferred data in the source
841 * range has become visible on the destination node.
842 *
843 * The optimal DMA performance will likely be realized if both
844 * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
845 * performance will likely be realized if loffset and roffset are not
846 * cacheline aligned but are separated by some multiple of 64. The lowest level
847 * of performance is likely if loffset and roffset are not separated by a
848 * multiple of 64.
849 *
850 * The rma_flags argument is formed by ORing together zero or more of the
851 * following values:
852 *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA
853 * engine.
854 *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the
855 * transfer has completed. Passing this flag might result in
856 * the API busy waiting and consuming CPU cycles while the DMA
857 * transfer is in progress.
858 *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of
859 * the source range becomes visible on the destination node
860 * after all other transferred data in the source range has
861 * become visible on the destination
862 *
863 *\return
864 * Upon successful completion, scif_readfrom() returns 0; otherwise: in user
865 * mode -1 is returned and errno is set to indicate the error; in kernel mode
866 * the negative of one of the following errors is returned.
867 *
868 *\par Errors
869 *- EACCESS
870 * - Attempt to write to a read-only range or read from a write-only range
871 *- EBADF
872 * - epd is not a valid endpoint descriptor
873 *- ECONNRESET
874 * - A connection was forcibly closed by a peer.
875 *- EINVAL
876 * - epd is not a valid endpoint descriptor, or
877 * - rma_flags is invalid
878 *- ENODEV
879 * -The remote node is lost.
880 *- ENOTCONN
881 * - The endpoint is not connected
882 *- ENOTTY
883 * - epd is not a valid endpoint descriptor
884 *- ENXIO
885 * - The range [loffset,loffset+len-1] is invalid for the registered address
886 * space of epd, or,
887 * - The range [roffset,roffset+len-1] is invalid for the registered address
888 * space of the peer of epd, or
889 * - loffset or roffset is negative
890*/
891int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t
892roffset, int rma_flags);
893
894/**
895 * scif_writeto - Copy to a remote address space
896 * \param epd endpoint descriptor
897 * \param loffset offset in local registered address space
898 * from which to copy
899 * \param len length of range to copy
900 * \param roffset offset in remote registered address space to
901 * which to copy
902 * \param rma_flags transfer mode flags
903 *
904 * scif_writeto() copies len bytes from the local registered address space of
905 * epd, starting at the offset loffset to the remote registered address space
906 * of the peer of endpoint epd, starting at the offset roffset.
907 *
908 * Each of the specified ranges [loffset,loffset+len-1] and [roffset,roffset+
909 * len-1] must be within some registered window or windows of the local and
910 * remote nodes respectively. A range may intersect multiple registered
911 * windows, but only if those windows are contiguous in the registered address
912 * space.
913 *
914 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
915 * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
916 * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the
917 * transfer is complete. Otherwise, the transfer may be performed asynchron-
918 * ously. The order in which any two aynchronous RMA operations complete
919 * is non-deterministic. The synchronization functions, scif_fence_mark()/
920 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
921 * the completion of asynchronous RMA operations.
922 *
923 * The DMA transfer of individual bytes is not guaranteed to complete in
924 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
925 * cacheline or partial cacheline of the source range will become visible on
926 * the destination node after all other transferred data in the source
927 * range has become visible on the destination node.
928 *
929 * The optimal DMA performance will likely be realized if both
930 * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
931 * performance will likely be realized if loffset and roffset are not cacheline
932 * aligned but are separated by some multiple of 64. The lowest level of
933 * performance is likely if loffset and roffset are not separated by a multiple
934 * of 64.
935 *
936 * The rma_flags argument is formed by ORing together zero or more of the
937 * following values:
938 *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA
939 * engine.
940 *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the
941 * transfer has completed. Passing this flag might result in
942 * the API busy waiting and consuming CPU cycles while the DMA
943 * transfer is in progress.
944 *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of
945 * the source range becomes visible on the destination node
946 * after all other transferred data in the source range has
947 * become visible on the destination
948 *
949 *\return
950 * Upon successful completion, scif_readfrom() returns 0; otherwise: in user
951 * mode -1 is returned and errno is set to indicate the error; in kernel mode
952 * the negative of one of the following errors is returned.
953 *
954 *\par Errors:
955 *- EACCESS
956 * - Attempt to write to a read-only range or read from a write-only range
957 *- EBADF
958 * - epd is not a valid endpoint descriptor
959 *- ECONNRESET
960 * - A connection was forcibly closed by a peer.
961 *- EINVAL
962 * - epd is not a valid endpoint descriptor, or
963 * - rma_flags is invalid
964 *- ENODEV
965 * - The remote node is lost.
966 *- ENOTCONN
967 * - The endpoint is not connected
968 *- ENOTTY
969 * - epd is not a valid endpoint descriptor
970 *- ENXIO
971 * - The range [loffset,loffset+len-1] is invalid for the registered address
972 * space of epd, or,
973 * - The range [roffset , roffset + len -1] is invalid for the registered
974 * address space of the peer of epd, or
975 * - loffset or roffset is negative
976 */
977int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t
978roffset, int rma_flags);
979
980/**
981 * scif_vreadfrom - Copy from a remote address space
982 * \param epd endpoint descriptor
983 * \param addr address to which to copy
984 * \param len length of range to copy
985 * \param roffset offset in remote registered address space
986 * from which to copy
987 * \param rma_flags transfer mode flags
988 *
989 * scif_vreadfrom() copies len bytes from the remote registered address
990 * space of the peer of endpoint epd, starting at the offset roffset, to local
991 * memory, starting at addr. addr is interpreted as a user space address.
992 *
993 * The specified range [roffset,roffset+len-1] must be within some registered
994 * window or windows of the remote nodes respectively. The range may intersect
995 * multiple registered windows, but only if those windows are contiguous in the
996 * registered address space.
997 *
998 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
999 * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
1000 * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the
1001 * transfer is complete. Otherwise, the transfer may be performed asynchron-
1002 * ously. The order in which any two aynchronous RMA operations complete
1003 * is non-deterministic. The synchronization functions, scif_fence_mark()/
1004 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
1005 * the completion of asynchronous RMA operations.
1006 *
1007 * The DMA transfer of individual bytes is not guaranteed to complete in
1008 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
1009 * cacheline or partial cacheline of the source range will become visible on
1010 * the destination node after all other transferred data in the source
1011 * range has become visible on the destination node.
1012 *
1013 * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
1014 * the specified local memory range may be remain in a pinned state even after
1015 * the specified transfer completes. This may reduce overhead if some or all of
1016 * the same virtual address range is referenced in a subsequent call of
1017 * scif_vreadfrom() or scif_vwriteto().
1018 *
1019 * The optimal DMA performance will likely be realized if both
1020 * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
1021 * performance will likely be realized if loffset and roffset are not
1022 * cacheline aligned but are separated by some multiple of 64. The lowest level
1023 * of performance is likely if loffset and roffset are not separated by a
1024 * multiple of 64.
1025 *
1026 * The rma_flags argument is formed by ORing together zero or more of the
1027 * following values:
1028 *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA
1029 * engine.
1030 *- SCIF_RMA_USECACHE: enable registration caching
1031 *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the
1032 * transfer has completed. Passing this flag might result in
1033 * the API busy waiting and consuming CPU cycles while the DMA
1034 * transfer is in progress.
1035 *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of
1036 * the source range becomes visible on the destination node
1037 * after all other transferred data in the source range has
1038 * become visible on the destination
1039 *
1040 *\return
1041 * Upon successful completion, scif_vreadfrom() returns 0; otherwise: in user
1042 * mode -1 is returned and errno is set to indicate the error; in kernel mode
1043 * the negative of one of the following errors is returned.
1044 *
1045 *\par Errors:
1046 *- EACCESS
1047 * - Attempt to write to a read-only range or read from a write-only range
1048 *- EBADF
1049 * - epd is not a valid endpoint descriptor
1050 *- ECONNRESET
1051 * - A connection was forcibly closed by a peer.
1052 *- EFAULT
1053 * - Addresses in the range [addr,addr+len-1] are invalid
1054 *- EINVAL
1055 * - epd is not a valid endpoint descriptor, or
1056 * - rma_flags is invalid
1057 *- ENODEV
1058 * - The remote node is lost.
1059 *- ENOTCONN
1060 * - The endpoint is not connected
1061 *- ENOTTY
1062 * - epd is not a valid endpoint descriptor
1063 *- ENXIO
1064 * - Addresses in the range [roffset,roffset+len-1] are invalid for the
1065 * registered address space of epd.
1066 */
1067int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t offset,
1068int rma_flags);
1069
1070/**
1071 * scif_vwriteto - Copy to a remote address space
1072 * \param epd endpoint descriptor
1073 * \param addr address from which to copy
1074 * \param len length of range to copy
1075 * \param roffset offset in remote registered address space to
1076 * which to copy
1077 * \param rma_flags transfer mode flags
1078 *
1079 * scif_vwriteto() copies len bytes from the local memory, starting at addr, to
1080 * the remote registered address space of the peer of endpoint epd, starting at
1081 * the offset roffset. addr is interpreted as a user space address.
1082 *
1083 * The specified range [roffset,roffset+len-1] must be within some registered
1084 * window or windows of the remote nodes respectively. The range may intersect
1085 * multiple registered windows, but only if those windows are contiguous in the
1086 * registered address space.
1087 *
1088 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
1089 * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
1090 * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the
1091 * transfer is complete. Otherwise, the transfer may be performed asynchron-
1092 * ously. The order in which any two aynchronous RMA operations complete
1093 * is non-deterministic. The synchronization functions, scif_fence_mark()/
1094 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
1095 * the completion of asynchronous RMA operations.
1096 *
1097 * The DMA transfer of individual bytes is not guaranteed to complete in
1098 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
1099 * cacheline or partial cacheline of the source range will become visible on
1100 * the destination node after all other transferred data in the source
1101 * range has become visible on the destination node.
1102 *
1103 * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
1104 * the specified local memory range may be remain in a pinned state even after
1105 * the specified transfer completes. This may reduce overhead if some or all of
1106 * the same virtual address range is referenced in a subsequent call of
1107 * scif_vreadfrom() or scif_vwriteto().
1108 *
1109 * The optimal DMA performance will likely be realized if both
1110 * addr and offset are cacheline aligned (are a multiple of 64). Lower
1111 * performance will likely be realized if addr and offset are not cacheline
1112 * aligned but are separated by some multiple of 64. The lowest level of
1113 * performance is likely if addr and offset are not separated by a multiple of
1114 * 64.
1115 *
1116 * The rma_flags argument is formed by ORing together zero or more of the
1117 * following values:
1118 *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA
1119 * engine.
1120 *- SCIF_RMA_USECACHE: allow registration caching
1121 *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the
1122 * transfer has completed. Passing this flag might result in
1123 * the API busy waiting and consuming CPU cycles while the DMA
1124 * transfer is in progress.
1125 *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of
1126 * the source range becomes visible on the destination node
1127 * after all other transferred data in the source range has
1128 * become visible on the destination
1129 *
1130 *\return
1131 * Upon successful completion, scif_vwriteto () returns 0; otherwise: in user
1132 * mode -1 is returned and errno is set to indicate the error; in kernel mode
1133 * the negative of one of the following errors is returned.
1134 *
1135 *\par Errors:
1136 *- EACCESS
1137 * - Attempt to write to a read-only range or read from a write-only range
1138 *- EBADF
1139 * - epd is not a valid endpoint descriptor
1140 *- ECONNRESET
1141 * - A connection was forcibly closed by a peer.
1142 *- EFAULT
1143 * - Addresses in the range [addr,addr+len-1] are invalid
1144 *- EINVAL
1145 * - epd is not a valid endpoint descriptor, or
1146 * - rma_flags is invalid
1147 *- ENODEV
1148 * - The remote node is lost.
1149 *- ENOTCONN
1150 * - The endpoint is not connected
1151 *- ENOTTY
1152 * - epd is not a valid endpoint descriptor
1153 *- ENXIO
1154 * - Addresses in the range [roffset,roffset+len-1] are invalid for the
1155 * registered address space of epd.
1156 */
1157int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t offset,
1158int rma_flags);
1159
1160/**
1161 * scif_fence_mark - Mark previously issued RMAs
1162 * \param epd endpoint descriptor
1163 * \param flags control flags
1164 * \param mark marked handle returned as output.
1165 *
1166 * scif_fence_mark() returns after marking the current set of all uncompleted
1167 * RMAs initiated through the endpoint epd or the current set of all
1168 * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are
1169 * marked with a value returned at mark. The application may subsequently call
1170 * scif_fence_wait(), passing the value returned at mark, to await completion
1171 * of all RMAs so marked.
1172 *
1173 * The flags argument has exactly one of the following values:
1174 *- SCIF_FENCE_INIT_SELF: RMA operations initiated through endpoint
1175 * epd are marked
1176 *- SCIF_FENCE_INIT_PEER: RMA operations initiated through the peer
1177 * of endpoint epd are marked
1178 *
1179 * \return
1180 * Upon successful completion, scif_fence_mark() returns 0; otherwise: in user
1181 * mode -1 is returned and errno is set to indicate the error; in kernel mode
1182 * the negative of one of the following errors is returned.
1183 *
1184 *\par Errors:
1185 *- EBADF
1186 * - epd is not a valid endpoint descriptor
1187 *- ECONNRESET
1188 * - A connection was forcibly closed by a peer.
1189 *- EINVAL
1190 * - flags is invalid, or
1191 * - epd is not a valid endpoint descriptor, or
1192 *- ENODEV
1193 * - The remote node is lost.
1194 *- ENOTCONN
1195 * - The endpoint is not connected
1196 *- ENOMEM
1197 * - Insufficient kernel memory was available.
1198 *- ENOTTY
1199 * - epd is not a valid endpoint descriptor
1200 */
1201int scif_fence_mark(scif_epd_t epd, int flags, int *mark);
1202
1203/**
1204 * scif_fence_wait - Wait for completion of marked RMAs
1205 *
1206 * \param epd endpoint descriptor
1207 * \param mark mark request
1208 *
1209 * scif_fence_wait() returns after all RMAs marked with mark have completed.
1210 * The value passed in mark must have been obtained in a previous call to
1211 * scif_fence_mark().
1212 *
1213 *\return
1214 * Upon successful completion, scif_fence_wait() returns 0; otherwise: in user
1215 * mode -1 is returned and errno is set to indicate the error; in kernel mode
1216 * the negative of one of the following errors is returned.
1217 *
1218 *\par Errors:
1219 *- EBADF
1220 * - epd is not a valid endpoint descriptor
1221 *- ECONNRESET
1222 * - A connection was forcibly closed by a peer.
1223 *- EINVAL
1224 * - epd is not a valid endpoint descriptor, or
1225 *- ENODEV
1226 * - The remote node is lost.
1227 *- ENOTCONN
1228 * - The endpoint is not connected
1229 *- ENOMEM
1230 * - Insufficient kernel memory was available.
1231 *- ENOTTY
1232 * - epd is not a valid endpoint descriptor
1233 */
1234int scif_fence_wait(scif_epd_t epd, int mark);
1235
1236/**
1237 * scif_fence_signal - Request a signal on completion of RMAs
1238 * \param loff local offset
1239 * \param lval local value to write to loffset
1240 * \param roff remote offset
1241 * \param rval remote value to write to roffset
1242 * \param flags flags
1243 *
1244 * scif_fence_signal() returns after marking the current set of all uncompleted
1245 * RMAs initiated through the endpoint epd or marking the current set of all
1246 * uncompleted RMAs initiated through the peer of endpoint epd.
1247 *
1248 * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the
1249 * marked set, lval is written to memory at the address corresponding to offset
1250 * loff in the local registered address space of epd. loff must be within a
1251 * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion
1252 * of the RMAs in the marked set, rval is written to memory at the * address
1253 * corresponding to offset roff in the remote registered address space of epd.
1254 * roff must be within a remote registered window of the peer of epd. Note
1255 * that any specified offset must be DWORD (4 byte / 32 bit) aligned.
1256 *
1257 * The flags argument is formed by OR'ing together the following:
1258 *- Exactly one of the following values:
1259 * - SCIF_FENCE_INIT_SELF: RMA operations initiated through endpoint
1260 * epd are marked
1261 * - SCIF_FENCE_INIT_PEER: RMA operations initiated through the peer
1262 * of endpoint epd are marked
1263 *- One or more of the following values:
1264 * - SCIF_SIGNAL_LOCAL: On completion of the marked set of RMAs, write lval to
1265 * memory at the address corresponding to offset loff in the local registered
1266 * address space of epd.
1267 * - SCIF_SIGNAL_REMOTE: On completion of the marked set of RMAs, write lval to
1268 * memory at the address corresponding to offset roff in the remote registered
1269 * address space of epd.
1270 *
1271 *\return
1272 * Upon successful completion, scif_fence_signal() returns 0; otherwise: in
1273 * user mode -1 is returned and errno is set to indicate the error; in kernel
1274 * mode the negative of one of the following errors is returned.
1275 *\par Errors:
1276 *- EBADF
1277 * - epd is not a valid endpoint descriptor
1278 *- ECONNRESET
1279 * - A connection was forcibly closed by a peer.
1280 *- EINVAL
1281 * - epd is not a valid endpoint descriptor, or
1282 * - flags is invalid, or
1283 * - loff or roff are not DWORD aligned
1284 *- ENODEV
1285 * - The remote node is lost.
1286 *- ENOTCONN
1287 * - The endpoint is not connected
1288 *- ENOTTY
1289 * - epd is not a valid endpoint descriptor
1290 *- ENXIO
1291 * - loff is invalid for the registered address of epd, or
1292 * - roff is invalid for the registered address space, of the peer of epd
1293 */
1294int scif_fence_signal(scif_epd_t epd, off_t loff, uint64_t lval, off_t roff,
1295uint64_t rval, int flags);
1296
1297/**
1298 * scif_get_nodeIDs - Return information about online nodes
1299 * \param nodes array in which to return online node IDs
1300 * \param len number of entries in the nodes array
1301 * \param self address to place the node ID of the local node
1302 *
1303 * scif_get_nodeIDs() fills in the nodes array with up to len node IDs of the
1304 * nodes in the SCIF network. If there is not enough space in nodes, as
1305 * indicated by the len parameter, only len node IDs are returned in nodes. The
1306 * return value of scif_get_nodeID() is the total number of nodes currently in
1307 * the SCIF network. By checking the return value against the len parameter, the user may
1308 * determine if enough space for nodes was allocated.
1309 *
1310 * The node ID of the local node is returned at self.
1311 *
1312 *\return
1313 * Upon successful completion, scif_get_nodeIDs() returns the actual number of
1314 * online nodes in the SCIF network including 'self'; otherwise: in user mode
1315 * -1 is returned and errno is set to indicate the error; in kernel mode no
1316 * errors are returned.
1317 *
1318 *\par Errors:
1319 *- EFAULT
1320 * - Bad address
1321 */
1322int scif_get_nodeIDs(uint16_t *nodes, int len, uint16_t *self);
1323
1324
1325/**
1326 * scif_pin_pages - Pin a set of pages
1327 * \param addr Virtual address of range to pin
1328 * \param len Length of range to pin
1329 * \param prot_flags Page protection flags
1330 * \param map_flags Page classification flags
1331 * \param pinned_pages Opaque handle of pinned pages
1332 *
1333 * scif_pin_pages() pins (locks in physical memory) the physical pages which
1334 * back the range of virtual address pages starting at addr and continuing for
1335 * len bytes. addr and len are constrained to be multiples of the page size. A
1336 * successful scif_register() call returns an opaque pointer value at
1337 * pinned_pages which may be used in subsequent calls to
1338 * scif_register_pinned_pages().
1339 *
1340 * The pages will remain pinned as long as there is a reference against the
1341 * scif_pinned_pages_t value returned by scif_pin_pages() and until
1342 * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A
1343 * reference is added to a scif_pinned_pages_t value each time a window is
1344 * created by calling scif_register_pinned_pages() and passing the
1345 * scif_pinned_pages_t value. A reference is removed from a scif_pinned_pages_t value
1346 * each time such a window is deleted.
1347 *
1348 * Subsequent operations which change the memory pages to which virtual
1349 * addresses are mapped (such as mmap(), munmap(), scif_mmap() and
1350 * scif_munmap()) have no effect on the scif_pinned_pages_t value or windows
1351 * created against it.
1352 *
1353 * On Linux, if the process will fork(), it is recommended that the registered
1354 * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
1355 * problems due to copy-on-write semantics.
1356 *
1357 * The prot_flags argument is formed by OR'ing together one or more of the
1358 * following values:
1359 *- SCIF_PROT_READ: allow read operations against the pages
1360 *- SCIF_PROT_WRITE: allow write operations against the pages
1361 * The map_flags argument is formed by OR'ing together zero or more of the
1362 * following values:
1363 *- SCIF_MAP_KERNEL: interpret addr as a kernel space address. By default, addr
1364 * is interpreted as a user space address.
1365 *
1366 *\return
1367 * Upon successful completion, scif_register() returns 0; otherwise the
1368 * negative of one of the following errors is returned.
1369 *\par Errors:
1370 *- EFAULT
1371 * - Addresses in the range [addr,addr+len-1] are invalid
1372 *- EINVAL
1373 * - prot_flags is invalid,
1374 * - map_flags is invalid, or
1375 * - offset is negative
1376 *- ENOMEM
1377 * - Not enough space
1378 */
1379int
1380scif_pin_pages(
1381 void *addr,
1382 size_t len,
1383 int prot_flags,
1384 int map_flags,
1385 scif_pinned_pages_t *pinned_pages);
1386
1387/**
1388 * scif_unpin_pages - Unpin a set of pages
1389 * \param pinned_pages Opaque handle of pages to be unpinned
1390 *
1391 * scif_unpin_pages() prevents scif_register_pinned_pages()from registering new
1392 * windows against pinned_pages. The physical pages represented by pinned_pages
1393 * will remain pinned until all windows previously registered against
1394 * pinned_pages are deleted (the window is scif_unregister()'d and all
1395 * references to the window are removed (see scif_unregister()).
1396 *
1397 * pinned_pages must have been obtain from a previous call to scif_pin_pages().
1398 * After calling scif_unpin_pages(), it is an error to pass pinned_pages to
1399 * scif_register_pinned_pages().
1400 *
1401 *\return:
1402 * Upon successful completion, scif_unpin_pages() returns 0; otherwise the
1403 * negative of one of the following errors is returned.
1404 *
1405 *\par Errors:
1406 *- EINVAL
1407 * - pinned_pages is not valid
1408 */
1409int
1410scif_unpin_pages(
1411 scif_pinned_pages_t pinned_pages);
1412
1413/**
1414 * scif_register_pinned_pages - Mark a memory region for remote access.
1415 * \param epd Endpoint descriptor
1416 * \param pinned_pages Opaque handle of pinned pages
1417 * \param offset Registered address space offset
1418 * \param map_flags Flags which control where pages are mapped
1419 *
1420 * The scif_register_pinned_pages() function opens a window, a range of whole
1421 * pages of the registered address space of the endpoint epd, starting at
1422 * offset po. The value of po, further described below, is a function of the
1423 * parameters offset and pinned_pages, and the value of map_flags. Each page of
1424 * the window represents a corresponding physical memory page of the range
1425 * represented by pinned_pages; the length of the window is the same as the
1426 * length of range represented by pinned_pages. A successful scif_register()
1427 * call returns po as the return value.
1428 *
1429 * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
1430 * exactly, and offset is constrained to be a multiple of the page size. The
1431 * mapping established by scif_register() will not replace any existing
1432 * registration; an error is returned if any page of the new window would
1433 * intersect an existing window.
1434 *
1435 * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
1436 * implementation-defined manner to arrive at po. The po so chosen will be an
1437 * area of the registered address space that the implementation deems suitable
1438 * for a mapping of the required size. An offset value of 0 is interpreted as
1439 * granting the implementation complete freedom in selecting po, subject to
1440 * constraints described below. A non-zero value of offset is taken to be a
1441 * suggestion of an offset near which the mapping should be placed. When the
1442 * implementation selects a value for po, it does not replace any extant
1443 * window. In all cases, po will be a multiple of the page size.
1444 *
1445 * The physical pages which are so represented by a window are available for
1446 * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(),
1447 * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
1448 * physical pages represented by the window will not be reused by the memory
1449 * subsytem for any other purpose. Note that the same physical page may be
1450 * represented by multiple windows.
1451 *
1452 * Windows created by scif_register_pinned_pages() are unregistered by
1453 * scif_unregister().
1454 *
1455 * The map_flags argument is formed by OR'ing together zero or more of the
1456 * following values:
1457 *- SCIF_MAP_FIXED: interpret offset exactly
1458 *
1459 *\return
1460 * Upon successful completion, scif_register_pinned_pages() returns the offset
1461 * at which the mapping was placed (po); otherwise the negative of one of the
1462 * following errors is returned.
1463 *\par Errors:
1464 *- EADDRINUSE
1465 * - SCIF_MAP_FIXED is set in map_flags and pages in the new
1466 * window would intersect an existing window
1467 *- EAGAIN
1468 * - The mapping could not be performed due to lack of resources
1469 *- ECONNRESET
1470 * - A connection was forcibly closed by a peer.
1471 *- EINVAL
1472 * - epd is not a valid endpoint descriptor, or
1473 * - map_flags is invalid, or
1474 * - SCIF_MAP_FIXED is set in map_flags, and offset is not a
1475 * multiple of the page size, or
1476 * - offset is negative
1477 *- ENODEV
1478 * - The remote node is lost.
1479 *- ENOMEM
1480 * - Not enough space
1481 *- ENOTCONN
1482 * - The endpoint is not connected
1483 */
1484off_t
1485scif_register_pinned_pages(
1486 scif_epd_t epd,
1487 scif_pinned_pages_t pinned_pages,
1488 off_t offset,
1489 int map_flags);
1490
1491/**
1492 * scif_get_pages - Add references to remote registered pages
1493 * \param epd endpoint descriptor
1494 * \param offset registered address space offset
1495 * \param len length of range of pages
1496 * \param pages returned scif_range structure
1497 *
1498 * scif_get_pages() returns the addresses of the physical pages represented by
1499 * those pages of the registered address space of the peer of epd, starting at
1500 * offset and continuing for len bytes. offset and len are constrained to be
1501 * multiples of the page size.
1502 *
1503 * All of the pages in the specified range [offset,offset+len-1] must be within
1504 * a single window of the registered address space of the peer of epd.
1505 *
1506 * The addresses are returned as a virtually contiguous array pointed to by the
1507 * phys_addr component of the scif_range structure whose address is returned in
1508 * pages. The nr_pages component of scif_range is the length of the array. The
1509 * prot_flags component of scif_range holds the protection flag value passed
1510 * when the pages were registered.
1511 *
1512 * Each physical page whose address is returned by scif_get_pages() remains
1513 * available and will not be released for reuse until the scif_range structure
1514 * is returned in a call to scif_put_pages(). The scif_range structure returned
1515 * by scif_get_pages() must be unmodified.
1516 *
1517 * It is an error to call scif_close() on an endpoint on which a scif_range
1518 * structure of that endpoint has not been returned to scif_put_pages().
1519 *
1520 *\return
1521 * Upon successful completion, scif_get_pages() returns 0; otherwise the
1522 * negative of one of the following errors is returned.
1523 *\par Errors:
1524 *- ECONNRESET
1525 * - A connection was forcibly closed by a peer.
1526 *- EINVAL
1527 * - epd is not a valid endpoint descriptor, or
1528 * - offset is not a multiple of the page size, or
1529 * - offset is negative, or
1530 * - len is not a multiple of the page size
1531 *- ENODEV
1532 * -The remote node is lost.
1533 *- ENOTCONN
1534 * - The endpoint is not connected
1535 *- ENXIO
1536 * - Addresses in the range [offset,offset+len-1] are invalid
1537 * for the registered address space of the peer epd.
1538 */
1539int scif_get_pages(
1540 scif_epd_t epd,
1541 off_t offset,
1542 size_t len,
1543 struct scif_range **pages);
1544
1545/**
1546 * scif_put_pages - Remove references from remote registered pages
1547 * \param pages pages to be returned
1548 *
1549 * scif_put_pages() releases a scif_range structure previously obtained by
1550 * calling scif_get_pages(). The physical pages represented by pages may
1551 * be reused when the window which represented those pages is unregistered.
1552 * Therefore, those pages must not be accessed after calling scif_put_pages().
1553 *
1554 *\return
1555 * Upon successful completion, scif_put_pages() returns 0; otherwise the
1556 * negative of one of the following errors is returned.
1557 *\par Errors:
1558 *- EINVAL
1559 * - pages does not point to a valid scif_range structure, or
1560 * - the scif_range structure pointed to by pages was already returned.
1561 *- ENODEV
1562 * - The remote node is lost.
1563 *- ENOTCONN
1564 * - The endpoint is not connected.
1565 */
1566int scif_put_pages(
1567 struct scif_range *pages);
1568
1569/**
1570 * scif_poll - Wait for some event on an endpoint
1571 * \param epds Array of endpoint descriptors
1572 * \param nepds Length of epds
1573 * \param timeout Upper limit on time for which scif_poll() will
1574 * block
1575 *
1576 * scif_poll() waits for one of a set of endpoints to become ready to perform
1577 * an I/O operation. scif_poll() exposes a subset of the functionality of the
1578 * POSIX standard poll() function.
1579 *
1580 * The epds argument specifies the endpoint descriptors to be examined and the
1581 * events of interest for each endpoint descriptor. epds is a pointer to an
1582 * array with one member for each open endpoint descriptor of interest.
1583 *
1584 * The number of items in the epds array is specified in nepds. The epd field
1585 * of scif_pollepd is an endpoint descriptor of an open endpoint. The field
1586 * events is a bitmask specifying the events which the application is
1587 * interested in. The field revents is an output parameter, filled by the
1588 * kernel with the events that actually occurred. The bits returned in revents
1589 * can include any of those specified in events, or one of the values
1590 * SCIF_POLLERR, SCIF_POLLHUP, or SCIF_POLLNVAL. (These three bits are
1591 * meaningless in the events field, and will be set in the revents field
1592 * whenever the corresponding condition is true.)
1593 *
1594 * If none of the events requested (and no error) has occurred for any of the
1595 * endpoint descriptors, then scif_poll() blocks until one of the events occurs.
1596 *
1597 * The timeout argument specifies an upper limit on the time for which
1598 * scif_poll() will block, in milliseconds. Specifying a negative value in
1599 * timeout means an infinite timeout.
1600 *
1601 * The following bits may be set in events and returned in revents:
1602 *- SCIF_POLLIN: Data may be received without blocking. For a connected
1603 * endpoint, this means that scif_recv() may be called without blocking. For a
1604 * listening endpoint, this means that scif_accept() may be called without
1605 * blocking.
1606 *- SCIF_POLLOUT: Data may be sent without blocking. For a connected endpoint,
1607 * this means that scif_send() may be called without blocking. This bit value
1608 * has no meaning for a listening endpoint and is ignored if specified.
1609 *
1610 * The following bits are only returned in revents, and are ignored if set in
1611 * events:
1612 *- SCIF_POLLERR: An error occurred on the endpoint
1613 *- SCIF_POLLHUP: The connection to the peer endpoint was disconnected
1614 *- SCIF_POLLNVAL: The specified endpoint descriptor is invalid.
1615 *
1616 *\return
1617 * Upon successful completion, scif_poll()returns a non-negative value. A
1618 * positive value indicates the total number of endpoint descriptors that have
1619 * been selected (that is, endpoint descriptors for which the revents member is
1620 * non-zero. A value of 0 indicates that the call timed out and no endpoint
1621 * descriptors have been selected. Otherwise: in user mode -1 is returned and
1622 * errno is set to indicate the error; in kernel mode the negative of one of
1623 * the following errors is returned.
1624 *
1625 *\par Errors:
1626 *- EFAULT
1627 * - The array given as argument was not contained in the calling program's
1628 * address space.
1629 *- EINTR
1630 * - A signal occurred before any requested event.
1631 *- EINVAL
1632 * - The nepds argument is greater than {OPEN_MAX}
1633 *- ENOMEM
1634 * - There was no space to allocate file descriptor tables.
1635*/
1636int
1637scif_poll(
1638 struct scif_pollepd *epds,
1639 unsigned int nepds,
1640 long timeout);
1641
1642/**
1643 * scif_event_register - Register an event handler
1644 * \param handler Event handler to be registered
1645 *
1646 * scif_event_register() registers a routine, handler, to be called when some
1647 * event occurs. The event parameter to handler indicates the type of event
1648 * which has occurred, and the corresponding component of the data parameter to
1649 * handler provides additional data about the event.
1650 *
1651 * The following events are defined:
1652 *- SCIF_NODE_ADDED: A node has been added to the SCIF network. The
1653 * scif_node_added component of the data parameter to handler identifies the
1654 * node. This event is informational. There are no requirements on the event
1655 * handler.
1656 *- SCIF_NODE_REMOVED: A node is being removed from the SCIF network. The
1657 * scif_node_removed component of the data parameter to handler identifies the
1658 * node. Upon being called, and before returning, the event handler must
1659 * return, using scif_put_pages(), all structures obtained using
1660 * scif_get_pages() against an endpoint connected to the lost node. It is
1661 * recommended and expected that the handler will also scif_close() all
1662 * endpoints connected to the lost node.
1663 *
1664 *\return
1665 * Upon successful completion scif_event_register() returns 0.
1666 *
1667 *\par Errors:
1668 *- ENOMEM
1669 * - There was no space to allocate file descriptor tables.
1670*/
1671
1672int
1673scif_event_register(
1674 scif_callback_t handler);
1675
1676/**
1677 * scif_event_unregister - Unregister event handler
1678 * \param handler Event handler to be unregistered
1679 *
1680 * scif_event_unregister() unregisters the handler which was registered
1681 * previously by using scif_event_register().
1682 *
1683 * WARNING: scif_event_unregister must be called before the module
1684 * (that registered handles) exits for every handler that is registered.
1685 * Failure to do so will result in crash of the scif module.
1686 *
1687 *\return
1688 * Upon successful completion scif_event_unregister() returns 0.
1689 *\par Errors:
1690 *- EINVAL
1691 * -If the event handler was not found/registered.
1692*/
1693int
1694scif_event_unregister(
1695 scif_callback_t handler);
1696
1697/*
1698 * Note: The callee can use pci_resource_start(dev, index) and
1699 * pci_resource_len(dev, index) to obtain the PCI resource starting
1700 * physical address and length for valid non null indexes of the va
1701 * array. MMIO bars will not have IORESOURCE_PREFETCH set in the
1702 * flags obtained from pci_resource_flags(dev, index). va[index]
1703 * will be set to NULL for invalid resources.
1704 */
1705struct scif_pci_info {
1706 /* pci_dev pointer associated with a node */
1707 struct pci_dev *pdev;
1708 /* Ioremapped virtual address base for every valid PCIe resource */
1709 void __iomem *va[PCI_NUM_RESOURCES];
1710};
1711
1712/**
1713 * scif_pci_info - Populate the scif_pci_info structure for a node.
1714 * \param node The node to query
1715 * \param dev The scif_pci_info structure to populate.
1716 *
1717 * scif_pci_info() populates the provided scif_pci_info structure
1718 * associated with a node. The requested node ID cannot be the same as
1719 * the current node. This routine will only return success when called from
1720 * the host.
1721 *
1722 *\return
1723 * Upon successful completion, scif_pci_info() returns 0; otherwise the
1724 * negative of one of the following errors is returned.
1725 *
1726 *\par Errors:
1727 *- EINVAL
1728 * - The requested node is not valid.
1729 * - Called on MIC instead of the host.
1730 *- ENODEV
1731 * - No pci_dev association exists for the node.
1732 */
1733int
1734scif_pci_info(
1735 uint16_t node,
1736 struct scif_pci_info *dev);
1737
1738
1739#ifdef __cplusplus
1740} /* extern "C" */
1741#endif
1742
1743#endif /* __SCIF_H__ */