Commit | Line | Data |
---|---|---|
800f879a AT |
1 | /* |
2 | * Copyright 2010-2017 Intel Corporation. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License, version 2, | |
6 | * as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, | |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * Disclaimer: The codes contained in these modules may be specific to | |
14 | * the Intel Software Development Platform codenamed Knights Ferry, | |
15 | * and the Intel product codenamed Knights Corner, and are not backward | |
16 | * compatible with other Intel products. Additionally, Intel will NOT | |
17 | * support the codes or instruction set in future products. | |
18 | * | |
19 | * Intel offers no warranty of any kind regarding the code. This code is | |
20 | * licensed on an "AS IS" basis and Intel is not obligated to provide | |
21 | * any support, assistance, installation, training, or other services | |
22 | * of any kind. Intel is also not obligated to provide any updates, | |
23 | * enhancements or extensions. Intel specifically disclaims any warranty | |
24 | * of merchantability, non-infringement, fitness for any particular | |
25 | * purpose, and any other warranty. | |
26 | * | |
27 | * Further, Intel disclaims all liability of any kind, including but | |
28 | * not limited to liability for infringement of any proprietary rights, | |
29 | * relating to the use of the code, even if Intel is notified of the | |
30 | * possibility of such liability. Except as expressly stated in an Intel | |
31 | * license agreement provided with this code and agreed upon with Intel, | |
32 | * no license, express or implied, by estoppel or otherwise, to any | |
33 | * intellectual property rights is granted herein. | |
34 | */ | |
35 | ||
36 | /* | |
37 | * Revised 15:05 11/24/2010 | |
38 | * Derived from SCIF SAS v0.41 with additional corrections | |
39 | */ | |
40 | ||
41 | #ifndef __SCIF_H__ | |
42 | #define __SCIF_H__ | |
43 | ||
44 | #include <linux/types.h> | |
45 | #include <linux/errno.h> | |
46 | #include <linux/poll.h> | |
47 | #include <linux/pci.h> | |
48 | ||
49 | #ifdef __cplusplus | |
50 | extern "C" { | |
51 | #endif | |
52 | ||
53 | #define SCIF_ACCEPT_SYNC 1 | |
54 | #define SCIF_SEND_BLOCK 1 | |
55 | #define SCIF_RECV_BLOCK 1 | |
56 | ||
57 | /* Start: Deprecated Temporary definition for compatability */ | |
58 | #define ACCEPT_SYNC SCIF_ACCEPT_SYNC | |
59 | #define SEND_BLOCK SCIF_SEND_BLOCK | |
60 | #define RECV_BLOCK SCIF_RECV_BLOCK | |
61 | /* End: Deprecated Temporary definition for compatability */ | |
62 | ||
63 | enum { | |
64 | SCIF_PROT_READ = (1<<0), | |
65 | SCIF_PROT_WRITE = (1<<1) | |
66 | }; | |
67 | ||
68 | /* 0x40 is used internally by scif */ | |
69 | enum { | |
70 | SCIF_MAP_FIXED = 0x10, | |
71 | SCIF_MAP_KERNEL = 0x20, | |
72 | }; | |
73 | ||
74 | enum { | |
75 | SCIF_FENCE_INIT_SELF = (1<<0), | |
76 | SCIF_FENCE_INIT_PEER = (1<<1) | |
77 | }; | |
78 | ||
79 | enum { | |
80 | SCIF_FENCE_RAS_SELF = (1<<2), | |
81 | SCIF_FENCE_RAS_PEER = (1<<3) | |
82 | }; | |
83 | ||
84 | enum { | |
85 | SCIF_SIGNAL_LOCAL = (1<<4), | |
86 | SCIF_SIGNAL_REMOTE = (1<<5) | |
87 | }; | |
88 | ||
89 | #define SCIF_RMA_USECPU 1 | |
90 | #define SCIF_RMA_USECACHE (1<<1) | |
91 | #define SCIF_RMA_SYNC (1<<2) | |
92 | #define SCIF_RMA_ORDERED (1<<3) | |
93 | //! @cond (Prevent doxygen from including these) | |
94 | #define SCIF_POLLIN POLLIN | |
95 | #define SCIF_POLLOUT POLLOUT | |
96 | #define SCIF_POLLERR POLLERR | |
97 | #define SCIF_POLLHUP POLLHUP | |
98 | #define SCIF_POLLNVAL POLLNVAL | |
99 | ||
100 | /* SCIF Reserved Ports */ | |
101 | /* COI */ | |
102 | #define SCIF_COI_PORT_0 40 | |
103 | #define SCIF_COI_PORT_1 41 | |
104 | #define SCIF_COI_PORT_2 42 | |
105 | #define SCIF_COI_PORT_3 43 | |
106 | #define SCIF_COI_PORT_4 44 | |
107 | #define SCIF_COI_PORT_5 45 | |
108 | #define SCIF_COI_PORT_6 46 | |
109 | #define SCIF_COI_PORT_7 47 | |
110 | #define SCIF_COI_PORT_8 48 | |
111 | #define SCIF_COI_PORT_9 49 | |
112 | ||
113 | /* OFED */ | |
114 | #define SCIF_OFED_PORT_0 60 | |
115 | #define SCIF_OFED_PORT_1 61 | |
116 | #define SCIF_OFED_PORT_2 62 | |
117 | #define SCIF_OFED_PORT_3 63 | |
118 | #define SCIF_OFED_PORT_4 64 | |
119 | #define SCIF_OFED_PORT_5 65 | |
120 | #define SCIF_OFED_PORT_6 66 | |
121 | #define SCIF_OFED_PORT_7 67 | |
122 | #define SCIF_OFED_PORT_8 68 | |
123 | #define SCIF_OFED_PORT_9 69 | |
124 | ||
125 | /* NETDEV */ | |
126 | #define SCIF_NETDEV_PORT_0 80 | |
127 | #define SCIF_NETDEV_PORT_1 81 | |
128 | #define SCIF_NETDEV_PORT_2 82 | |
129 | #define SCIF_NETDEV_PORT_3 83 | |
130 | #define SCIF_NETDEV_PORT_4 84 | |
131 | #define SCIF_NETDEV_PORT_5 85 | |
132 | #define SCIF_NETDEV_PORT_6 86 | |
133 | #define SCIF_NETDEV_PORT_7 87 | |
134 | #define SCIF_NETDEV_PORT_8 88 | |
135 | #define SCIF_NETDEV_PORT_9 89 | |
136 | ||
137 | /* RAS */ | |
138 | #define SCIF_RAS_PORT_0 100 | |
139 | #define SCIF_RAS_PORT_1 101 | |
140 | #define SCIF_RAS_PORT_2 102 | |
141 | #define SCIF_RAS_PORT_3 103 | |
142 | #define SCIF_RAS_PORT_4 104 | |
143 | #define SCIF_RAS_PORT_5 105 | |
144 | #define SCIF_RAS_PORT_6 106 | |
145 | #define SCIF_RAS_PORT_7 107 | |
146 | #define SCIF_RAS_PORT_8 108 | |
147 | #define SCIF_RAS_PORT_9 109 | |
148 | ||
149 | /* Power Management */ | |
150 | #define SCIF_PM_PORT_0 120 | |
151 | #define SCIF_PM_PORT_1 121 | |
152 | #define SCIF_PM_PORT_2 122 | |
153 | #define SCIF_PM_PORT_3 123 | |
154 | #define SCIF_PM_PORT_4 124 | |
155 | #define SCIF_PM_PORT_5 125 | |
156 | #define SCIF_PM_PORT_6 126 | |
157 | #define SCIF_PM_PORT_7 127 | |
158 | #define SCIF_PM_PORT_8 128 | |
159 | #define SCIF_PM_PORT_9 129 | |
160 | ||
161 | /* Board Tools */ | |
162 | #define SCIF_BT_PORT_0 130 | |
163 | #define SCIF_BT_PORT_1 131 | |
164 | #define SCIF_BT_PORT_2 132 | |
165 | #define SCIF_BT_PORT_3 133 | |
166 | #define SCIF_BT_PORT_4 134 | |
167 | #define SCIF_BT_PORT_5 135 | |
168 | #define SCIF_BT_PORT_6 136 | |
169 | #define SCIF_BT_PORT_7 137 | |
170 | #define SCIF_BT_PORT_8 138 | |
171 | #define SCIF_BT_PORT_9 139 | |
172 | ||
173 | /* MIC Boot/Configuration support */ | |
174 | #define MPSSD_MONRECV 160 | |
175 | #define MIC_NOTIFY 161 | |
176 | #define MPSSD_CRED 162 | |
177 | #define MPSSD_MONSEND 163 | |
178 | #define MPSSD_MICCTRL 164 | |
179 | #define MPSSD_RESV5 165 | |
180 | #define MPSSD_RESV6 166 | |
181 | #define MPSSD_RESV7 167 | |
182 | #define MPSSD_RESV8 168 | |
183 | #define MPSSD_RESV9 169 | |
184 | ||
185 | #define SCIF_ADMIN_PORT_END 1024 | |
186 | ||
187 | /* MYO */ | |
188 | #define SCIF_MYO_PORT_0 1025 | |
189 | #define SCIF_MYO_PORT_1 1026 | |
190 | #define SCIF_MYO_PORT_2 1027 | |
191 | #define SCIF_MYO_PORT_3 1028 | |
192 | #define SCIF_MYO_PORT_4 1029 | |
193 | #define SCIF_MYO_PORT_5 1030 | |
194 | #define SCIF_MYO_PORT_6 1031 | |
195 | #define SCIF_MYO_PORT_7 1032 | |
196 | #define SCIF_MYO_PORT_8 1033 | |
197 | #define SCIF_MYO_PORT_9 1034 | |
198 | ||
199 | /* SSG Tools */ | |
200 | #define SCIF_ST_PORT_0 1044 | |
201 | #define SCIF_ST_PORT_1 1045 | |
202 | #define SCIF_ST_PORT_2 1046 | |
203 | #define SCIF_ST_PORT_3 1047 | |
204 | #define SCIF_ST_PORT_4 1048 | |
205 | #define SCIF_ST_PORT_5 1049 | |
206 | #define SCIF_ST_PORT_6 1050 | |
207 | #define SCIF_ST_PORT_7 1051 | |
208 | #define SCIF_ST_PORT_8 1052 | |
209 | #define SCIF_ST_PORT_9 1053 | |
210 | ||
211 | /* End of SCIF Reserved Ports */ | |
212 | #define SCIF_PORT_RSVD 1088 | |
213 | //! @endcond | |
214 | ||
215 | typedef struct endpt *scif_epd_t; | |
216 | ||
217 | typedef struct scif_pinned_pages *scif_pinned_pages_t; | |
218 | ||
219 | struct scif_range { | |
220 | void *cookie; /* cookie */ | |
221 | int nr_pages; /* Number of Pages */ | |
222 | int prot_flags; /* R/W protection */ | |
223 | /* Arrays phys_addr/va below are virtually contiguous */ | |
224 | dma_addr_t *phys_addr; /* Array of physical addresses */ | |
225 | void **va; /* Array of virtual addresses | |
226 | * and populated only when called | |
227 | * on the host for a remote SCIF | |
228 | * connection on MIC. | |
229 | */ | |
230 | }; | |
231 | ||
232 | struct scif_pollepd { | |
233 | scif_epd_t epd; /* endpoint descriptor */ | |
234 | short events; /* requested events */ | |
235 | short revents; /* returned events */ | |
236 | }; | |
237 | enum scif_event_type { | |
238 | SCIF_NODE_ADDED = 1<<0, | |
239 | SCIF_NODE_REMOVED = 1<<1 | |
240 | }; | |
241 | ||
242 | union eventd { | |
243 | uint16_t scif_node_added; | |
244 | uint16_t scif_node_removed; | |
245 | }; | |
246 | ||
247 | typedef void (*scif_callback_t)(enum scif_event_type event, union eventd | |
248 | data); | |
249 | ||
250 | struct scif_callback { | |
251 | struct list_head list_member; | |
252 | scif_callback_t callback_handler; | |
253 | }; | |
254 | ||
255 | #define SCIF_OPEN_FAILED ((scif_epd_t)-1) | |
256 | #define SCIF_REGISTER_FAILED ((off_t)-1) | |
257 | #define SCIF_MMAP_FAILED ((void *)-1) | |
258 | ||
259 | struct scif_portID { | |
260 | uint16_t node; /* node on which port resides */ | |
261 | uint16_t port; /* Local port number */ | |
262 | }; | |
263 | ||
264 | /* Start: Deprecated Temporary definition for compatability */ | |
265 | #define portID scif_portID | |
266 | typedef struct portID portID_t; | |
267 | /* End: Deprecated Temporary definition for compatability */ | |
268 | ||
269 | /** | |
270 | * scif_open - Create an endpoint | |
271 | * | |
272 | *\return | |
273 | * The scif_open() function creates a new endpoint. | |
274 | * | |
275 | * Upon successful completion, scif_open() returns an endpoint descriptor to | |
276 | * be used in subsequent SCIF functions calls to refer to that endpoint; | |
277 | * otherwise: in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is | |
278 | * returned and errno is set to indicate the error; in kernel mode a NULL | |
279 | * scif_epd_t is returned. | |
280 | * | |
281 | *\par Errors: | |
282 | *- ENOMEM | |
283 | * - Insufficient kernel memory was available. | |
284 | *- ENXIO | |
285 | * - Version mismatch between micscif driver and libscif. | |
286 | */ | |
287 | scif_epd_t scif_open(void); | |
288 | ||
289 | /** | |
290 | * scif _bind - Bind an endpoint to a port | |
291 | * \param epd endpoint descriptor | |
292 | * \param pn port number | |
293 | * | |
294 | * scif_bind() binds endpoint epd to port pn, where pn is a port number on the | |
295 | * local node. If pn is zero, a port number greater than or equal to | |
296 | * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to | |
297 | * exactly one local port. Ports less than 1024 when requested can only be bound | |
298 | * by system (or root) processes or by processes executed by privileged users. | |
299 | * | |
300 | *\return | |
301 | * Upon successful completion, scif_bind() returns the port number to which epd | |
302 | * is bound; otherwise: in user mode -1 is returned and errno is set to | |
303 | * indicate the error; in kernel mode the negative of one of the following | |
304 | * errors is returned. | |
305 | * | |
306 | *\par Errors: | |
307 | *- EBADF | |
308 | * - epd is not a valid endpoint descriptor | |
309 | *- EINVAL | |
310 | * - epd is not a valid endpoint descriptor, or | |
311 | * - The endpoint or the port are already bound. | |
312 | *- EISCONN | |
313 | * - The endpoint is already connected. | |
314 | *- ENOSPC | |
315 | * - No port number available for assignment (when pn==0). | |
316 | *- ENOTTY | |
317 | * - epd is not a valid endpoint descriptor | |
318 | *- EACCES | |
319 | * - The port requested is protected and the user is not the superuser. | |
320 | */ | |
321 | int scif_bind(scif_epd_t epd, uint16_t pn); | |
322 | ||
323 | /** | |
324 | * scif_listen - Listen for connections on an endpoint | |
325 | * | |
326 | * \param epd endpoint descriptor | |
327 | * \param backlog maximum pending connection requests | |
328 | * | |
329 | * scif_listen() marks the endpoint epd as a listening endpoint - that is, as | |
330 | * an endpoint that will be used to accept incoming connection requests. Once | |
331 | * so marked, the endpoint is said to be in the listening state and may not be | |
332 | * used as the endpoint of a connection. | |
333 | * | |
334 | * The endpoint, epd, must have been bound to a port. | |
335 | * | |
336 | * The backlog argument defines the maximum length to which the queue of | |
337 | * pending connections for epd may grow. If a connection request arrives when | |
338 | * the queue is full, the client may receive an error with an indication that | |
339 | * the connection was refused. | |
340 | * | |
341 | *\return | |
342 | * Upon successful completion, scif_listen() returns 0; otherwise: in user mode | |
343 | * -1 is returned and errno is set to indicate the error; in kernel mode the | |
344 | * negative of one of the following errors is returned. | |
345 | * | |
346 | *\par Errors: | |
347 | *- EBADF | |
348 | * - epd is not a valid endpoint descriptor | |
349 | *- EINVAL | |
350 | * - epd is not a valid endpoint descriptor, or | |
351 | * - The endpoint is not bound to a port | |
352 | *- EISCONN | |
353 | * - The endpoint is already connected or listening | |
354 | *- ENOTTY | |
355 | * - epd is not a valid endpoint descriptor | |
356 | */ | |
357 | int scif_listen(scif_epd_t epd, int backlog); | |
358 | ||
359 | /** | |
360 | * scif_connect - Initiate a connection on a port | |
361 | * \param epd endpoint descriptor | |
362 | * \param dst global id of port to which to connect | |
363 | * | |
364 | * The scif_connect() function requests the connection of endpoint epd to remote | |
365 | * port dst. If the connection is successful, a peer endpoint, bound to dst, is | |
366 | * created on node dst.node. On successful return, the connection is complete. | |
367 | * | |
368 | * If the endpoint epd has not already been bound to a port, scif_connect() | |
369 | * will bind it to an unused local port. | |
370 | * | |
371 | * A connection is terminated when an endpoint of the connection is closed, | |
372 | * either explicitly by scif_close(), or when a process that owns one of the | |
373 | * endpoints of a connection is terminated. | |
374 | * | |
375 | *\return | |
376 | * Upon successful completion, scif_connect() returns the port ID to which the | |
377 | * endpoint, epd, is bound; otherwise: in user mode -1 is returned and errno is | |
378 | * set to indicate the error; in kernel mode the negative of one of the | |
379 | * following errors is returned. | |
380 | * | |
381 | *\par Errors: | |
382 | *- EBADF | |
383 | * - epd is not a valid endpoint descriptor | |
384 | *- ECONNREFUSED | |
385 | * - The destination was not listening for connections or refused the | |
386 | * connection request. | |
387 | *- EINTR | |
388 | * - Interrupted function | |
389 | *- EINVAL | |
390 | * - epd is not a valid endpoint descriptor, or | |
391 | * - dst.port is not a valid port ID | |
392 | *- EISCONN | |
393 | * - The endpoint is already connected | |
394 | *- ENOBUFS | |
395 | * - No buffer space is available | |
396 | *- ENODEV | |
397 | * - The destination node does not exist, or | |
398 | * - The node is lost. | |
399 | *- ENOSPC | |
400 | * - No port number available for assignment (when pn==0). | |
401 | *- ENOTTY | |
402 | * - epd is not a valid endpoint descriptor | |
403 | *- EOPNOTSUPP | |
404 | * - The endpoint is listening and cannot be connected | |
405 | */ | |
406 | int scif_connect(scif_epd_t epd, struct scif_portID *dst); | |
407 | ||
408 | /** | |
409 | * scif_accept - Accept a connection on an endpoint | |
410 | * \param epd endpoint descriptor | |
411 | * \param peer global id of port to which connected | |
412 | * \param newepd new connected endpoint descriptor | |
413 | * \param flags flags | |
414 | * | |
415 | * The scif_accept() call extracts the first connection request on the queue of | |
416 | * pending connections for the port on which epd is listening. scif_accept() | |
417 | * creates a new endpoint, bound to the same port as epd, and allocates a new | |
418 | * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new | |
419 | * endpoint is connected to the endpoint through which the connection was | |
420 | * requested. epd is unaffected by this call, and remains in the listening | |
421 | * state. | |
422 | * | |
423 | * On successful return, peer holds the global port identifier (node id and | |
424 | * local port number) of the port which requested the connection. | |
425 | * | |
426 | * If the peer endpoint which requested the connection is closed, the endpoint | |
427 | * returned by scif_accept() is closed. | |
428 | * | |
429 | * The number of connections that can (subsequently) be accepted on epd is only | |
430 | * limited by system resources (memory). | |
431 | * | |
432 | * The flags argument is formed by OR'ing together zero or more of the | |
433 | * following values: | |
434 | *- SCIF_ACCEPT_SYNC: block until a connection request is presented. If | |
435 | * SCIF_ACCEPT_SYNC is not in flags, and no pending | |
436 | * connections are present on the queue, scif_accept()fails | |
437 | * with an EAGAIN error | |
438 | * | |
439 | * On Linux in user mode, the select() and poll() functions can be used to | |
440 | * determine when there is a connection request. On Microsoft Windows* and on | |
441 | * Linux in kernel mode, the scif_poll() function may be used for this purpose. | |
442 | * A readable event will be delivered when a connection is requested. | |
443 | * | |
444 | *\return | |
445 | * Upon successful completion, scif_accept() returns 0; otherwise: in user mode | |
446 | * -1 is returned and errno is set to indicate the error; in kernel mode the | |
447 | * negative of one of the following errors is returned. | |
448 | * | |
449 | *\par Errors: | |
450 | *- EAGAIN | |
451 | * - SCIF_ACCEPT_SYNC is not set and no connections are present to be accepted, or | |
452 | * - SCIF_ACCEPT_SYNC is not set and remote node failed to complete its | |
453 | * connection request | |
454 | *- EBADF | |
455 | * - epd is not a valid endpoint descriptor | |
456 | *- EINTR | |
457 | * - Interrupted function | |
458 | *- EINVAL | |
459 | * - epd is not a valid endpoint descriptor, or | |
460 | * - epd is not a listening endpoint | |
461 | * - flags is invalid | |
462 | * - peer is NULL | |
463 | * - newepd is NULL | |
464 | *- ENOBUFS | |
465 | * - No buffer space is available | |
466 | *- ENODEV | |
467 | * - The requesting node is lost. | |
468 | *- ENOMEM | |
469 | * - Not enough space | |
470 | *- ENOTTY | |
471 | * - epd is not a valid endpoint descriptor | |
472 | *- ENOENT | |
473 | * - Secondary part of epd registeration failed. | |
474 | */ | |
475 | int scif_accept(scif_epd_t epd, struct scif_portID *peer, scif_epd_t | |
476 | *newepd, int flags); | |
477 | ||
478 | /** | |
479 | * scif_close - Close an endpoint | |
480 | * \param epd endpoint descriptor | |
481 | * | |
482 | * scif_close() closes an endpoint and performs necessary teardown of | |
483 | * facilities associated with that endpoint. | |
484 | * | |
485 | * If epd is a listening endpoint then it will no longer accept connection | |
486 | * requests on the port to which it is bound. Any pending connection requests | |
487 | * are rejected. | |
488 | * | |
489 | * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs | |
490 | * which are in-process through epd or its peer endpoint will complete before | |
491 | * scif_close() returns. Registered windows of the local and peer endpoints are | |
492 | * released as if scif_unregister() was called against each window. | |
493 | * | |
494 | * Closing an endpoint does not affect mappings to remote memory. These remain | |
495 | * until explicitly removed by calling scif_munmap(). | |
496 | * | |
497 | * If the peer endpoint's receive queue is not empty at the time that epd is | |
498 | * closed, then the peer endpoint can be passed as the endpoint parameter to | |
499 | * scif_recv() until the receive queue is empty. | |
500 | * | |
501 | * If epd is bound to a port, then the port is returned to the pool of | |
502 | * available ports. | |
503 | * | |
504 | * epd is freed and may no longer be accessed. | |
505 | * | |
506 | *\return | |
507 | * Upon successful completion, scif_close() returns 0; otherwise: in user mode | |
508 | * -1 is returned and errno is set to indicate the error; in kernel mode the | |
509 | * negative of one of the following errors is returned. | |
510 | * | |
511 | *\par Errors: | |
512 | *- EBADF | |
513 | * - epd is not a valid endpoint descriptor | |
514 | *- EINVAL | |
515 | * - epd is not a valid endpoint descriptor | |
516 | */ | |
517 | int scif_close(scif_epd_t epd); | |
518 | ||
519 | /** | |
520 | * scif_send - Send a message | |
521 | * \param epd endpoint descriptor | |
522 | * \param msg message buffer address | |
523 | * \param len message length | |
524 | * \param flags blocking mode flags | |
525 | * | |
526 | * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data | |
527 | * are copied from memory starting at address msg. On successful execution the | |
528 | * return value of scif_send() is the number of bytes that were sent, and is | |
529 | * zero if no bytes were sent because len was zero. scif_send() may be called | |
530 | * only when the endpoint is in a connected state. | |
531 | * | |
532 | * If a scif_send() call is non-blocking, then it sends only those bytes which | |
533 | * can be sent without waiting, up to a maximum of len bytes. | |
534 | * | |
535 | * If a scif_send() call is blocking, then it normally returns after sending | |
536 | * all len bytes. If a blocking call is interrupted or the connection is | |
537 | * forcibly closed, the call is considered successful if some bytes were sent | |
538 | * or len is zero, otherwise the call is considered unsuccessful. | |
539 | * | |
540 | * On Linux in user mode, the select() and poll() functions can be used to | |
541 | * determine when the send queue is not full. On Microsoft Windows* and on | |
542 | * Linux in kernel mode, the scif_poll() function may be used for this purpose. | |
543 | * | |
544 | * It is recommended that scif_send()/scif_recv() only be used for short | |
545 | * control-type message communication between SCIF endpoints. The SCIF RMA | |
546 | * APIs are expected to provide better performance for transfer sizes of | |
547 | * 1024 bytes or longer. | |
548 | * | |
549 | * The flags argument is formed by ORing together zero or more of the following | |
550 | * values: | |
551 | *- SCIF_SEND_BLOCK: block until the entire message is sent. | |
552 | * | |
553 | *\return | |
554 | * Upon successful completion, scif_send() returns the number of bytes sent; | |
555 | * otherwise: in user mode -1 is returned and errno is set to indicate the | |
556 | * error; in kernel mode the negative of one of the following errors is | |
557 | * returned. | |
558 | * | |
559 | *\par Errors: | |
560 | *- EBADF | |
561 | * - epd is not a valid endpoint descriptor | |
562 | *- ECONNRESET | |
563 | * - A connection was forcibly closed by a peer. | |
564 | *- EFAULT | |
565 | * - An invalid address was specified for a parameter. | |
566 | *- EINTR | |
567 | * - epd was closed by scif_close() | |
568 | *- EINVAL | |
569 | * - epd is not a valid endpoint descriptor, or | |
570 | * - flags is invalid | |
571 | * - len is negative | |
572 | *- ENODEV | |
573 | * - The remote node is lost. | |
574 | *- ENOMEM | |
575 | * - Not enough space | |
576 | *- ENOTCONN | |
577 | * - The endpoint is not connected | |
578 | *- ENOTTY | |
579 | * - epd is not a valid endpoint descriptor | |
580 | */ | |
581 | int scif_send(scif_epd_t epd, void *msg, int len, int flags); | |
582 | ||
583 | /** | |
584 | * scif_recv - Receive a message | |
585 | * \param epd endpoint descriptor | |
586 | * \param msg message buffer address | |
587 | * \param len message buffer length | |
588 | * \param flags blocking mode flags | |
589 | * | |
590 | * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of | |
591 | * data are copied to memory starting at address msg. On successful execution | |
592 | * the return value of scif_recv() is the number of bytes that were received, | |
593 | * and is zero if no bytes were received because len was zero. scif_recv() may | |
594 | * be called only when the endpoint is in a connected state. | |
595 | * | |
596 | * If a scif_recv() call is non-blocking, then it receives only those bytes | |
597 | * which can be received without waiting, up to a maximum of len bytes. | |
598 | * | |
599 | * If a scif_recv() call is blocking, then it normally returns after receiving | |
600 | * all len bytes. If a blocking call is interrupted or the connection is | |
601 | * forcibly closed, the call is considered successful if some bytes were | |
602 | * received or len is zero, otherwise the call is considered unsuccessful; | |
603 | * subsequent calls to scif_recv() will successfully receive all data sent | |
604 | * through peer endpoint interruption or the connection was forcibly closed. | |
605 | * | |
606 | * On Linux in user mode, the select() and poll() functions can be used to | |
607 | * determine when data is available to be received. On Microsoft Windows* and | |
608 | * on Linux in kernel mode, the scif_poll() function may be used for this | |
609 | * purpose. | |
610 | * | |
611 | * It is recommended that scif_send()/scif_recv() only be used for short | |
612 | * control-type message communication between SCIF endpoints. The SCIF RMA | |
613 | * APIs are expected to provide better performance for transfer sizes of | |
614 | * 1024 bytes or longer. | |
615 | * | |
616 | * The flags argument is formed by ORing together zero or more of the following | |
617 | * values: | |
618 | *- SCIF_RECV_BLOCK: block until the entire message is received. | |
619 | * | |
620 | *\return | |
621 | * Upon successful completion, scif_recv() returns the number of bytes | |
622 | * received; otherwise: in user mode -1 is returned and errno is set to | |
623 | * indicate the error; in kernel mode the negative of one of the following | |
624 | * errors is returned. | |
625 | * | |
626 | *\par Errors: | |
627 | *- EAGAIN | |
628 | * - The destination node is returning from a low power state. | |
629 | *- EBADF | |
630 | * - epd is not a valid endpoint descriptor . | |
631 | *- ECONNRESET | |
632 | * - A connection was forcibly closed by a peer. | |
633 | *- EFAULT | |
634 | * - An invalid address was specified for a parameter. | |
635 | *- EINVAL | |
636 | * - epd is not a valid endpoint descriptor, or | |
637 | * - flags is invalid, or | |
638 | * - len is negative. | |
639 | *- ENODEV | |
640 | * - The remote node is lost. | |
641 | *- ENOMEM | |
642 | * - Not enough space. | |
643 | *- ENOTCONN | |
644 | * - The endpoint is not connected. | |
645 | *- ENOTTY | |
646 | * - epd is not a valid endpoint descriptor | |
647 | */ | |
648 | int scif_recv(scif_epd_t epd, void *msg, int len, int flags); | |
649 | ||
650 | /** | |
651 | * scif_register - Mark a memory region for remote access. | |
652 | * \param epd endpoint descriptor | |
653 | * \param addr starting virtual address | |
654 | * \param len length of range | |
655 | * \param offset offset of window | |
656 | * \param prot_flags read/write protection flags | |
657 | * \param map_flags mapping flags | |
658 | * | |
659 | * The scif_register() function opens a window, a range of whole pages of the | |
660 | * registered address space of the endpoint epd, starting at offset po and | |
661 | * continuing for len bytes. The value of po, further described below, is a | |
662 | * function of the parameters offset and len, and the value of map_flags. Each | |
663 | * page of the window represents the physical memory page which backs the | |
664 | * corresponding page of the range of virtual address pages starting at addr | |
665 | * and continuing for len bytes. addr and len are constrained to be multiples | |
666 | * of the page size. addr is interpreted as a user space address. A successful | |
667 | * scif_register() call returns po as the return value. | |
668 | * | |
669 | * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset | |
670 | * exactly, and offset is constrained to be a multiple of the page size. The | |
671 | * mapping established by scif_register() will not replace any existing | |
672 | * registration; an error is returned if any page within the range [offset, | |
673 | * offset+len-1] intersects an existing window. | |
674 | * Note: When SCIF_MAP_FIXED is set the current implementation limits | |
675 | * offset to the range [0..2^62-1] and returns EADDRINUSE if the offset | |
676 | * requested with SCIF_MAP_FIXED is in the range [2^62..2^63-1]. | |
677 | * | |
678 | * When SCIF_MAP_FIXED is not set, the implementation uses offset in an | |
679 | * implementation-defined manner to arrive at po. The po value so chosen will | |
680 | * be an area of the registered address space that the implementation deems | |
681 | * suitable for a mapping of len bytes. An offset value of 0 is interpreted as | |
682 | * granting the implementation complete freedom in selecting po, subject to | |
683 | * constraints described below. A non-zero value of offset is taken to be a | |
684 | * suggestion of an offset near which the mapping should be placed. When the | |
685 | * implementation selects a value for po, it does not replace any extant | |
686 | * window. In all cases, po will be a multiple of the page size. | |
687 | * | |
688 | * The physical pages which are so represented by a window are available for | |
689 | * access in calls to scif_mmap(), scif_readfrom(), scif_writeto(), | |
690 | * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the | |
691 | * physical pages represented by the window will not be reused by the memory | |
692 | * subsystem for any other purpose. Note that the same physical page may be | |
693 | * represented by multiple windows. | |
694 | * | |
695 | * Subsequent operations which change the memory pages to which virtual | |
696 | * addresses are mapped (such as mmap(), munmap(), scif_mmap() and | |
697 | * scif_munmap()) have no effect on existing windows. | |
698 | * | |
699 | * On Linux, if the process will fork(), it is recommended that the registered | |
700 | * virtual address range be marked with MADV_DONTFORK. Doing so will prevent | |
701 | * problems due to copy-on-write semantics. | |
702 | * | |
703 | * The prot_flags argument is formed by OR'ing together one or more of the | |
704 | * following values: | |
705 | *- SCIF_PROT_READ: allow read operations from the window | |
706 | *- SCIF_PROT_WRITE: allow write operations to the window | |
707 | * | |
708 | * The map_flags argument is formed by OR'ing together zero or more of | |
709 | * the following values: | |
710 | *- SCIF_MAP_FIXED: interpret offset exactly | |
711 | * | |
712 | *\return | |
713 | * Upon successful completion, scif_register() returns the offset at which the | |
714 | * mapping was placed (po); otherwise: in user mode SCIF_REGISTER_FAILED (that | |
715 | * is (off_t *)-1) is returned and errno is set to indicate the error; in | |
716 | * kernel mode the negative of one of the following errors is returned. | |
717 | * | |
718 | *\par Errors: | |
719 | *- EADDRINUSE | |
720 | * - SCIF_MAP_FIXED is set in map_flags, and pages in the range [offset, | |
721 | * offset+len-1] are already registered | |
722 | *- EAGAIN | |
723 | * - The mapping could not be performed due to lack of resources | |
724 | *- EBADF | |
725 | * - epd is not a valid endpoint descriptor | |
726 | *- ECONNRESET | |
727 | * - A connection was forcibly closed by a peer. | |
728 | *- EFAULT | |
729 | * - Addresses in the range [addr , addr + len - 1] are invalid | |
730 | *- EINVAL | |
731 | * - epd is not a valid endpoint descriptor, or | |
732 | * - map_flags is invalid, or | |
733 | * - prot_flags is invalid, or | |
734 | * - SCIF_MAP_FIXED is set in flags, and offset is not a multiple of | |
735 | * the page size, or | |
736 | * - addr is not a multiple of the page size, or | |
737 | * - len is not a multiple of the page size, or is 0, or | |
738 | * - offset is negative | |
739 | *- ENODEV | |
740 | * - The remote node is lost. | |
741 | *- ENOMEM | |
742 | * - Not enough space | |
743 | *- ENOTCONN | |
744 | * - The endpoint is not connected | |
745 | *- ENOTTY | |
746 | * - epd is not a valid endpoint descriptor | |
747 | */ | |
748 | off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, | |
749 | int prot_flags, int map_flags); | |
750 | ||
751 | /** | |
752 | * scif_unregister - Mark a memory region for remote access. | |
753 | * \param epd endpoint descriptor | |
754 | * \param offset start of range to unregister | |
755 | * \param len length of range to unregister | |
756 | * | |
757 | * The scif_unregister() function closes those previously registered windows | |
758 | * which are entirely within the range [offset,offset+len-1]. It is an error to | |
759 | * specify a range which intersects only a subrange of a window. | |
760 | * | |
761 | * On a successful return, pages within the window may no longer be specified | |
762 | * in calls to scif_mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(), | |
763 | * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, however, | |
764 | * continues to exist until all previous references against it are removed. A | |
765 | * window is referenced if there is a mapping to it created by scif_mmap(), or if | |
766 | * scif_get_pages() was called against the window (and the pages have not been | |
767 | * returned via scif_put_pages()). A window is also referenced while an RMA, in | |
768 | * which some range of the window is a source or destination, is in progress. | |
769 | * Finally a window is referenced while some offset in that window was specified | |
770 | * to scif_fence_signal(), and the RMAs marked by that call to | |
771 | * scif_fence_signal() have not completed. While a window is in this state, its | |
772 | * registered address space pages are not available for use in a new registered | |
773 | * window. | |
774 | * | |
775 | * When all such references to the window have been removed, its references to | |
776 | * all the physical pages which it represents are removed. Similarly, the | |
777 | * registered address space pages of the window become available for | |
778 | * registration in a new window. | |
779 | * | |
780 | *\return | |
781 | * Upon successful completion, scif_unregister() returns 0; otherwise: in user | |
782 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
783 | * the negative of one of the following errors is returned. In the event of an | |
784 | * error, no windows are unregistered. | |
785 | * | |
786 | *\par Errors: | |
787 | *- EBADF | |
788 | * - epd is not a valid endpoint descriptor | |
789 | *- ECONNRESET | |
790 | * - A connection was forcibly closed by a peer. | |
791 | *- EINVAL | |
792 | * - epd is not a valid endpoint descriptor, or | |
793 | * - The range [offset,offset+len-1] intersects a subrange of a window, or | |
794 | * - offset is negative | |
795 | *- ENODEV | |
796 | * -The remote node is lost. | |
797 | *- ENOTCONN | |
798 | * - The endpoint is not connected | |
799 | *- ENOTTY | |
800 | * - epd is not a valid endpoint descriptor | |
801 | *- ENXIO | |
802 | * - Addresses in the range [offset,offset+len-1] are invalid for the | |
803 | * registered address space of epd. | |
804 | */ | |
805 | int scif_unregister(scif_epd_t epd, off_t offset, size_t len); | |
806 | ||
807 | ||
808 | /** | |
809 | * scif_readfrom - Copy from a remote address space | |
810 | * \param epd endpoint descriptor | |
811 | * \param loffset offset in local registered address space to | |
812 | * which to copy | |
813 | * \param len length of range to copy | |
814 | * \param roffset offset in remote registered address space | |
815 | * from which to copy | |
816 | * \param rma_flags transfer mode flags | |
817 | * | |
818 | * scif_readfrom() copies len bytes from the remote registered address space of | |
819 | * the peer of endpoint epd, starting at the offset roffset to the local | |
820 | * registered address space of epd, starting at the offset loffset. | |
821 | * | |
822 | * Each of the specified ranges [loffset,loffset+len-1] and [roffset,roffset+ | |
823 | * len-1] must be within some registered window or windows of the local and | |
824 | * remote nodes respectively. A range may intersect multiple registered | |
825 | * windows, but only if those windows are contiguous in the registered address | |
826 | * space. | |
827 | * | |
828 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | |
829 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | |
830 | * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the | |
831 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | |
832 | * ously. The order in which any two aynchronous RMA operations complete | |
833 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | |
834 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | |
835 | * the completion of asynchronous RMA operations. | |
836 | * | |
837 | * The DMA transfer of individual bytes is not guaranteed to complete in | |
838 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | |
839 | * cacheline or partial cacheline of the source range will become visible on | |
840 | * the destination node after all other transferred data in the source | |
841 | * range has become visible on the destination node. | |
842 | * | |
843 | * The optimal DMA performance will likely be realized if both | |
844 | * loffset and roffset are cacheline aligned (are a multiple of 64). Lower | |
845 | * performance will likely be realized if loffset and roffset are not | |
846 | * cacheline aligned but are separated by some multiple of 64. The lowest level | |
847 | * of performance is likely if loffset and roffset are not separated by a | |
848 | * multiple of 64. | |
849 | * | |
850 | * The rma_flags argument is formed by ORing together zero or more of the | |
851 | * following values: | |
852 | *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA | |
853 | * engine. | |
854 | *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the | |
855 | * transfer has completed. Passing this flag might result in | |
856 | * the API busy waiting and consuming CPU cycles while the DMA | |
857 | * transfer is in progress. | |
858 | *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of | |
859 | * the source range becomes visible on the destination node | |
860 | * after all other transferred data in the source range has | |
861 | * become visible on the destination | |
862 | * | |
863 | *\return | |
864 | * Upon successful completion, scif_readfrom() returns 0; otherwise: in user | |
865 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
866 | * the negative of one of the following errors is returned. | |
867 | * | |
868 | *\par Errors | |
869 | *- EACCESS | |
870 | * - Attempt to write to a read-only range or read from a write-only range | |
871 | *- EBADF | |
872 | * - epd is not a valid endpoint descriptor | |
873 | *- ECONNRESET | |
874 | * - A connection was forcibly closed by a peer. | |
875 | *- EINVAL | |
876 | * - epd is not a valid endpoint descriptor, or | |
877 | * - rma_flags is invalid | |
878 | *- ENODEV | |
879 | * -The remote node is lost. | |
880 | *- ENOTCONN | |
881 | * - The endpoint is not connected | |
882 | *- ENOTTY | |
883 | * - epd is not a valid endpoint descriptor | |
884 | *- ENXIO | |
885 | * - The range [loffset,loffset+len-1] is invalid for the registered address | |
886 | * space of epd, or, | |
887 | * - The range [roffset,roffset+len-1] is invalid for the registered address | |
888 | * space of the peer of epd, or | |
889 | * - loffset or roffset is negative | |
890 | */ | |
891 | int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t | |
892 | roffset, int rma_flags); | |
893 | ||
894 | /** | |
895 | * scif_writeto - Copy to a remote address space | |
896 | * \param epd endpoint descriptor | |
897 | * \param loffset offset in local registered address space | |
898 | * from which to copy | |
899 | * \param len length of range to copy | |
900 | * \param roffset offset in remote registered address space to | |
901 | * which to copy | |
902 | * \param rma_flags transfer mode flags | |
903 | * | |
904 | * scif_writeto() copies len bytes from the local registered address space of | |
905 | * epd, starting at the offset loffset to the remote registered address space | |
906 | * of the peer of endpoint epd, starting at the offset roffset. | |
907 | * | |
908 | * Each of the specified ranges [loffset,loffset+len-1] and [roffset,roffset+ | |
909 | * len-1] must be within some registered window or windows of the local and | |
910 | * remote nodes respectively. A range may intersect multiple registered | |
911 | * windows, but only if those windows are contiguous in the registered address | |
912 | * space. | |
913 | * | |
914 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | |
915 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | |
916 | * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the | |
917 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | |
918 | * ously. The order in which any two aynchronous RMA operations complete | |
919 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | |
920 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | |
921 | * the completion of asynchronous RMA operations. | |
922 | * | |
923 | * The DMA transfer of individual bytes is not guaranteed to complete in | |
924 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | |
925 | * cacheline or partial cacheline of the source range will become visible on | |
926 | * the destination node after all other transferred data in the source | |
927 | * range has become visible on the destination node. | |
928 | * | |
929 | * The optimal DMA performance will likely be realized if both | |
930 | * loffset and roffset are cacheline aligned (are a multiple of 64). Lower | |
931 | * performance will likely be realized if loffset and roffset are not cacheline | |
932 | * aligned but are separated by some multiple of 64. The lowest level of | |
933 | * performance is likely if loffset and roffset are not separated by a multiple | |
934 | * of 64. | |
935 | * | |
936 | * The rma_flags argument is formed by ORing together zero or more of the | |
937 | * following values: | |
938 | *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA | |
939 | * engine. | |
940 | *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the | |
941 | * transfer has completed. Passing this flag might result in | |
942 | * the API busy waiting and consuming CPU cycles while the DMA | |
943 | * transfer is in progress. | |
944 | *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of | |
945 | * the source range becomes visible on the destination node | |
946 | * after all other transferred data in the source range has | |
947 | * become visible on the destination | |
948 | * | |
949 | *\return | |
950 | * Upon successful completion, scif_readfrom() returns 0; otherwise: in user | |
951 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
952 | * the negative of one of the following errors is returned. | |
953 | * | |
954 | *\par Errors: | |
955 | *- EACCESS | |
956 | * - Attempt to write to a read-only range or read from a write-only range | |
957 | *- EBADF | |
958 | * - epd is not a valid endpoint descriptor | |
959 | *- ECONNRESET | |
960 | * - A connection was forcibly closed by a peer. | |
961 | *- EINVAL | |
962 | * - epd is not a valid endpoint descriptor, or | |
963 | * - rma_flags is invalid | |
964 | *- ENODEV | |
965 | * - The remote node is lost. | |
966 | *- ENOTCONN | |
967 | * - The endpoint is not connected | |
968 | *- ENOTTY | |
969 | * - epd is not a valid endpoint descriptor | |
970 | *- ENXIO | |
971 | * - The range [loffset,loffset+len-1] is invalid for the registered address | |
972 | * space of epd, or, | |
973 | * - The range [roffset , roffset + len -1] is invalid for the registered | |
974 | * address space of the peer of epd, or | |
975 | * - loffset or roffset is negative | |
976 | */ | |
977 | int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t | |
978 | roffset, int rma_flags); | |
979 | ||
980 | /** | |
981 | * scif_vreadfrom - Copy from a remote address space | |
982 | * \param epd endpoint descriptor | |
983 | * \param addr address to which to copy | |
984 | * \param len length of range to copy | |
985 | * \param roffset offset in remote registered address space | |
986 | * from which to copy | |
987 | * \param rma_flags transfer mode flags | |
988 | * | |
989 | * scif_vreadfrom() copies len bytes from the remote registered address | |
990 | * space of the peer of endpoint epd, starting at the offset roffset, to local | |
991 | * memory, starting at addr. addr is interpreted as a user space address. | |
992 | * | |
993 | * The specified range [roffset,roffset+len-1] must be within some registered | |
994 | * window or windows of the remote nodes respectively. The range may intersect | |
995 | * multiple registered windows, but only if those windows are contiguous in the | |
996 | * registered address space. | |
997 | * | |
998 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | |
999 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | |
1000 | * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the | |
1001 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | |
1002 | * ously. The order in which any two aynchronous RMA operations complete | |
1003 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | |
1004 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | |
1005 | * the completion of asynchronous RMA operations. | |
1006 | * | |
1007 | * The DMA transfer of individual bytes is not guaranteed to complete in | |
1008 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | |
1009 | * cacheline or partial cacheline of the source range will become visible on | |
1010 | * the destination node after all other transferred data in the source | |
1011 | * range has become visible on the destination node. | |
1012 | * | |
1013 | * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back | |
1014 | * the specified local memory range may be remain in a pinned state even after | |
1015 | * the specified transfer completes. This may reduce overhead if some or all of | |
1016 | * the same virtual address range is referenced in a subsequent call of | |
1017 | * scif_vreadfrom() or scif_vwriteto(). | |
1018 | * | |
1019 | * The optimal DMA performance will likely be realized if both | |
1020 | * loffset and roffset are cacheline aligned (are a multiple of 64). Lower | |
1021 | * performance will likely be realized if loffset and roffset are not | |
1022 | * cacheline aligned but are separated by some multiple of 64. The lowest level | |
1023 | * of performance is likely if loffset and roffset are not separated by a | |
1024 | * multiple of 64. | |
1025 | * | |
1026 | * The rma_flags argument is formed by ORing together zero or more of the | |
1027 | * following values: | |
1028 | *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA | |
1029 | * engine. | |
1030 | *- SCIF_RMA_USECACHE: enable registration caching | |
1031 | *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the | |
1032 | * transfer has completed. Passing this flag might result in | |
1033 | * the API busy waiting and consuming CPU cycles while the DMA | |
1034 | * transfer is in progress. | |
1035 | *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of | |
1036 | * the source range becomes visible on the destination node | |
1037 | * after all other transferred data in the source range has | |
1038 | * become visible on the destination | |
1039 | * | |
1040 | *\return | |
1041 | * Upon successful completion, scif_vreadfrom() returns 0; otherwise: in user | |
1042 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
1043 | * the negative of one of the following errors is returned. | |
1044 | * | |
1045 | *\par Errors: | |
1046 | *- EACCESS | |
1047 | * - Attempt to write to a read-only range or read from a write-only range | |
1048 | *- EBADF | |
1049 | * - epd is not a valid endpoint descriptor | |
1050 | *- ECONNRESET | |
1051 | * - A connection was forcibly closed by a peer. | |
1052 | *- EFAULT | |
1053 | * - Addresses in the range [addr,addr+len-1] are invalid | |
1054 | *- EINVAL | |
1055 | * - epd is not a valid endpoint descriptor, or | |
1056 | * - rma_flags is invalid | |
1057 | *- ENODEV | |
1058 | * - The remote node is lost. | |
1059 | *- ENOTCONN | |
1060 | * - The endpoint is not connected | |
1061 | *- ENOTTY | |
1062 | * - epd is not a valid endpoint descriptor | |
1063 | *- ENXIO | |
1064 | * - Addresses in the range [roffset,roffset+len-1] are invalid for the | |
1065 | * registered address space of epd. | |
1066 | */ | |
1067 | int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t offset, | |
1068 | int rma_flags); | |
1069 | ||
1070 | /** | |
1071 | * scif_vwriteto - Copy to a remote address space | |
1072 | * \param epd endpoint descriptor | |
1073 | * \param addr address from which to copy | |
1074 | * \param len length of range to copy | |
1075 | * \param roffset offset in remote registered address space to | |
1076 | * which to copy | |
1077 | * \param rma_flags transfer mode flags | |
1078 | * | |
1079 | * scif_vwriteto() copies len bytes from the local memory, starting at addr, to | |
1080 | * the remote registered address space of the peer of endpoint epd, starting at | |
1081 | * the offset roffset. addr is interpreted as a user space address. | |
1082 | * | |
1083 | * The specified range [roffset,roffset+len-1] must be within some registered | |
1084 | * window or windows of the remote nodes respectively. The range may intersect | |
1085 | * multiple registered windows, but only if those windows are contiguous in the | |
1086 | * registered address space. | |
1087 | * | |
1088 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | |
1089 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | |
1090 | * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the | |
1091 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | |
1092 | * ously. The order in which any two aynchronous RMA operations complete | |
1093 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | |
1094 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | |
1095 | * the completion of asynchronous RMA operations. | |
1096 | * | |
1097 | * The DMA transfer of individual bytes is not guaranteed to complete in | |
1098 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | |
1099 | * cacheline or partial cacheline of the source range will become visible on | |
1100 | * the destination node after all other transferred data in the source | |
1101 | * range has become visible on the destination node. | |
1102 | * | |
1103 | * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back | |
1104 | * the specified local memory range may be remain in a pinned state even after | |
1105 | * the specified transfer completes. This may reduce overhead if some or all of | |
1106 | * the same virtual address range is referenced in a subsequent call of | |
1107 | * scif_vreadfrom() or scif_vwriteto(). | |
1108 | * | |
1109 | * The optimal DMA performance will likely be realized if both | |
1110 | * addr and offset are cacheline aligned (are a multiple of 64). Lower | |
1111 | * performance will likely be realized if addr and offset are not cacheline | |
1112 | * aligned but are separated by some multiple of 64. The lowest level of | |
1113 | * performance is likely if addr and offset are not separated by a multiple of | |
1114 | * 64. | |
1115 | * | |
1116 | * The rma_flags argument is formed by ORing together zero or more of the | |
1117 | * following values: | |
1118 | *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA | |
1119 | * engine. | |
1120 | *- SCIF_RMA_USECACHE: allow registration caching | |
1121 | *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the | |
1122 | * transfer has completed. Passing this flag might result in | |
1123 | * the API busy waiting and consuming CPU cycles while the DMA | |
1124 | * transfer is in progress. | |
1125 | *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of | |
1126 | * the source range becomes visible on the destination node | |
1127 | * after all other transferred data in the source range has | |
1128 | * become visible on the destination | |
1129 | * | |
1130 | *\return | |
1131 | * Upon successful completion, scif_vwriteto () returns 0; otherwise: in user | |
1132 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
1133 | * the negative of one of the following errors is returned. | |
1134 | * | |
1135 | *\par Errors: | |
1136 | *- EACCESS | |
1137 | * - Attempt to write to a read-only range or read from a write-only range | |
1138 | *- EBADF | |
1139 | * - epd is not a valid endpoint descriptor | |
1140 | *- ECONNRESET | |
1141 | * - A connection was forcibly closed by a peer. | |
1142 | *- EFAULT | |
1143 | * - Addresses in the range [addr,addr+len-1] are invalid | |
1144 | *- EINVAL | |
1145 | * - epd is not a valid endpoint descriptor, or | |
1146 | * - rma_flags is invalid | |
1147 | *- ENODEV | |
1148 | * - The remote node is lost. | |
1149 | *- ENOTCONN | |
1150 | * - The endpoint is not connected | |
1151 | *- ENOTTY | |
1152 | * - epd is not a valid endpoint descriptor | |
1153 | *- ENXIO | |
1154 | * - Addresses in the range [roffset,roffset+len-1] are invalid for the | |
1155 | * registered address space of epd. | |
1156 | */ | |
1157 | int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t offset, | |
1158 | int rma_flags); | |
1159 | ||
1160 | /** | |
1161 | * scif_fence_mark - Mark previously issued RMAs | |
1162 | * \param epd endpoint descriptor | |
1163 | * \param flags control flags | |
1164 | * \param mark marked handle returned as output. | |
1165 | * | |
1166 | * scif_fence_mark() returns after marking the current set of all uncompleted | |
1167 | * RMAs initiated through the endpoint epd or the current set of all | |
1168 | * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are | |
1169 | * marked with a value returned at mark. The application may subsequently call | |
1170 | * scif_fence_wait(), passing the value returned at mark, to await completion | |
1171 | * of all RMAs so marked. | |
1172 | * | |
1173 | * The flags argument has exactly one of the following values: | |
1174 | *- SCIF_FENCE_INIT_SELF: RMA operations initiated through endpoint | |
1175 | * epd are marked | |
1176 | *- SCIF_FENCE_INIT_PEER: RMA operations initiated through the peer | |
1177 | * of endpoint epd are marked | |
1178 | * | |
1179 | * \return | |
1180 | * Upon successful completion, scif_fence_mark() returns 0; otherwise: in user | |
1181 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
1182 | * the negative of one of the following errors is returned. | |
1183 | * | |
1184 | *\par Errors: | |
1185 | *- EBADF | |
1186 | * - epd is not a valid endpoint descriptor | |
1187 | *- ECONNRESET | |
1188 | * - A connection was forcibly closed by a peer. | |
1189 | *- EINVAL | |
1190 | * - flags is invalid, or | |
1191 | * - epd is not a valid endpoint descriptor, or | |
1192 | *- ENODEV | |
1193 | * - The remote node is lost. | |
1194 | *- ENOTCONN | |
1195 | * - The endpoint is not connected | |
1196 | *- ENOMEM | |
1197 | * - Insufficient kernel memory was available. | |
1198 | *- ENOTTY | |
1199 | * - epd is not a valid endpoint descriptor | |
1200 | */ | |
1201 | int scif_fence_mark(scif_epd_t epd, int flags, int *mark); | |
1202 | ||
1203 | /** | |
1204 | * scif_fence_wait - Wait for completion of marked RMAs | |
1205 | * | |
1206 | * \param epd endpoint descriptor | |
1207 | * \param mark mark request | |
1208 | * | |
1209 | * scif_fence_wait() returns after all RMAs marked with mark have completed. | |
1210 | * The value passed in mark must have been obtained in a previous call to | |
1211 | * scif_fence_mark(). | |
1212 | * | |
1213 | *\return | |
1214 | * Upon successful completion, scif_fence_wait() returns 0; otherwise: in user | |
1215 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
1216 | * the negative of one of the following errors is returned. | |
1217 | * | |
1218 | *\par Errors: | |
1219 | *- EBADF | |
1220 | * - epd is not a valid endpoint descriptor | |
1221 | *- ECONNRESET | |
1222 | * - A connection was forcibly closed by a peer. | |
1223 | *- EINVAL | |
1224 | * - epd is not a valid endpoint descriptor, or | |
1225 | *- ENODEV | |
1226 | * - The remote node is lost. | |
1227 | *- ENOTCONN | |
1228 | * - The endpoint is not connected | |
1229 | *- ENOMEM | |
1230 | * - Insufficient kernel memory was available. | |
1231 | *- ENOTTY | |
1232 | * - epd is not a valid endpoint descriptor | |
1233 | */ | |
1234 | int scif_fence_wait(scif_epd_t epd, int mark); | |
1235 | ||
1236 | /** | |
1237 | * scif_fence_signal - Request a signal on completion of RMAs | |
1238 | * \param loff local offset | |
1239 | * \param lval local value to write to loffset | |
1240 | * \param roff remote offset | |
1241 | * \param rval remote value to write to roffset | |
1242 | * \param flags flags | |
1243 | * | |
1244 | * scif_fence_signal() returns after marking the current set of all uncompleted | |
1245 | * RMAs initiated through the endpoint epd or marking the current set of all | |
1246 | * uncompleted RMAs initiated through the peer of endpoint epd. | |
1247 | * | |
1248 | * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the | |
1249 | * marked set, lval is written to memory at the address corresponding to offset | |
1250 | * loff in the local registered address space of epd. loff must be within a | |
1251 | * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion | |
1252 | * of the RMAs in the marked set, rval is written to memory at the * address | |
1253 | * corresponding to offset roff in the remote registered address space of epd. | |
1254 | * roff must be within a remote registered window of the peer of epd. Note | |
1255 | * that any specified offset must be DWORD (4 byte / 32 bit) aligned. | |
1256 | * | |
1257 | * The flags argument is formed by OR'ing together the following: | |
1258 | *- Exactly one of the following values: | |
1259 | * - SCIF_FENCE_INIT_SELF: RMA operations initiated through endpoint | |
1260 | * epd are marked | |
1261 | * - SCIF_FENCE_INIT_PEER: RMA operations initiated through the peer | |
1262 | * of endpoint epd are marked | |
1263 | *- One or more of the following values: | |
1264 | * - SCIF_SIGNAL_LOCAL: On completion of the marked set of RMAs, write lval to | |
1265 | * memory at the address corresponding to offset loff in the local registered | |
1266 | * address space of epd. | |
1267 | * - SCIF_SIGNAL_REMOTE: On completion of the marked set of RMAs, write lval to | |
1268 | * memory at the address corresponding to offset roff in the remote registered | |
1269 | * address space of epd. | |
1270 | * | |
1271 | *\return | |
1272 | * Upon successful completion, scif_fence_signal() returns 0; otherwise: in | |
1273 | * user mode -1 is returned and errno is set to indicate the error; in kernel | |
1274 | * mode the negative of one of the following errors is returned. | |
1275 | *\par Errors: | |
1276 | *- EBADF | |
1277 | * - epd is not a valid endpoint descriptor | |
1278 | *- ECONNRESET | |
1279 | * - A connection was forcibly closed by a peer. | |
1280 | *- EINVAL | |
1281 | * - epd is not a valid endpoint descriptor, or | |
1282 | * - flags is invalid, or | |
1283 | * - loff or roff are not DWORD aligned | |
1284 | *- ENODEV | |
1285 | * - The remote node is lost. | |
1286 | *- ENOTCONN | |
1287 | * - The endpoint is not connected | |
1288 | *- ENOTTY | |
1289 | * - epd is not a valid endpoint descriptor | |
1290 | *- ENXIO | |
1291 | * - loff is invalid for the registered address of epd, or | |
1292 | * - roff is invalid for the registered address space, of the peer of epd | |
1293 | */ | |
1294 | int scif_fence_signal(scif_epd_t epd, off_t loff, uint64_t lval, off_t roff, | |
1295 | uint64_t rval, int flags); | |
1296 | ||
1297 | /** | |
1298 | * scif_get_nodeIDs - Return information about online nodes | |
1299 | * \param nodes array in which to return online node IDs | |
1300 | * \param len number of entries in the nodes array | |
1301 | * \param self address to place the node ID of the local node | |
1302 | * | |
1303 | * scif_get_nodeIDs() fills in the nodes array with up to len node IDs of the | |
1304 | * nodes in the SCIF network. If there is not enough space in nodes, as | |
1305 | * indicated by the len parameter, only len node IDs are returned in nodes. The | |
1306 | * return value of scif_get_nodeID() is the total number of nodes currently in | |
1307 | * the SCIF network. By checking the return value against the len parameter, the user may | |
1308 | * determine if enough space for nodes was allocated. | |
1309 | * | |
1310 | * The node ID of the local node is returned at self. | |
1311 | * | |
1312 | *\return | |
1313 | * Upon successful completion, scif_get_nodeIDs() returns the actual number of | |
1314 | * online nodes in the SCIF network including 'self'; otherwise: in user mode | |
1315 | * -1 is returned and errno is set to indicate the error; in kernel mode no | |
1316 | * errors are returned. | |
1317 | * | |
1318 | *\par Errors: | |
1319 | *- EFAULT | |
1320 | * - Bad address | |
1321 | */ | |
1322 | int scif_get_nodeIDs(uint16_t *nodes, int len, uint16_t *self); | |
1323 | ||
1324 | ||
1325 | /** | |
1326 | * scif_pin_pages - Pin a set of pages | |
1327 | * \param addr Virtual address of range to pin | |
1328 | * \param len Length of range to pin | |
1329 | * \param prot_flags Page protection flags | |
1330 | * \param map_flags Page classification flags | |
1331 | * \param pinned_pages Opaque handle of pinned pages | |
1332 | * | |
1333 | * scif_pin_pages() pins (locks in physical memory) the physical pages which | |
1334 | * back the range of virtual address pages starting at addr and continuing for | |
1335 | * len bytes. addr and len are constrained to be multiples of the page size. A | |
1336 | * successful scif_register() call returns an opaque pointer value at | |
1337 | * pinned_pages which may be used in subsequent calls to | |
1338 | * scif_register_pinned_pages(). | |
1339 | * | |
1340 | * The pages will remain pinned as long as there is a reference against the | |
1341 | * scif_pinned_pages_t value returned by scif_pin_pages() and until | |
1342 | * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A | |
1343 | * reference is added to a scif_pinned_pages_t value each time a window is | |
1344 | * created by calling scif_register_pinned_pages() and passing the | |
1345 | * scif_pinned_pages_t value. A reference is removed from a scif_pinned_pages_t value | |
1346 | * each time such a window is deleted. | |
1347 | * | |
1348 | * Subsequent operations which change the memory pages to which virtual | |
1349 | * addresses are mapped (such as mmap(), munmap(), scif_mmap() and | |
1350 | * scif_munmap()) have no effect on the scif_pinned_pages_t value or windows | |
1351 | * created against it. | |
1352 | * | |
1353 | * On Linux, if the process will fork(), it is recommended that the registered | |
1354 | * virtual address range be marked with MADV_DONTFORK. Doing so will prevent | |
1355 | * problems due to copy-on-write semantics. | |
1356 | * | |
1357 | * The prot_flags argument is formed by OR'ing together one or more of the | |
1358 | * following values: | |
1359 | *- SCIF_PROT_READ: allow read operations against the pages | |
1360 | *- SCIF_PROT_WRITE: allow write operations against the pages | |
1361 | * The map_flags argument is formed by OR'ing together zero or more of the | |
1362 | * following values: | |
1363 | *- SCIF_MAP_KERNEL: interpret addr as a kernel space address. By default, addr | |
1364 | * is interpreted as a user space address. | |
1365 | * | |
1366 | *\return | |
1367 | * Upon successful completion, scif_register() returns 0; otherwise the | |
1368 | * negative of one of the following errors is returned. | |
1369 | *\par Errors: | |
1370 | *- EFAULT | |
1371 | * - Addresses in the range [addr,addr+len-1] are invalid | |
1372 | *- EINVAL | |
1373 | * - prot_flags is invalid, | |
1374 | * - map_flags is invalid, or | |
1375 | * - offset is negative | |
1376 | *- ENOMEM | |
1377 | * - Not enough space | |
1378 | */ | |
1379 | int | |
1380 | scif_pin_pages( | |
1381 | void *addr, | |
1382 | size_t len, | |
1383 | int prot_flags, | |
1384 | int map_flags, | |
1385 | scif_pinned_pages_t *pinned_pages); | |
1386 | ||
1387 | /** | |
1388 | * scif_unpin_pages - Unpin a set of pages | |
1389 | * \param pinned_pages Opaque handle of pages to be unpinned | |
1390 | * | |
1391 | * scif_unpin_pages() prevents scif_register_pinned_pages()from registering new | |
1392 | * windows against pinned_pages. The physical pages represented by pinned_pages | |
1393 | * will remain pinned until all windows previously registered against | |
1394 | * pinned_pages are deleted (the window is scif_unregister()'d and all | |
1395 | * references to the window are removed (see scif_unregister()). | |
1396 | * | |
1397 | * pinned_pages must have been obtain from a previous call to scif_pin_pages(). | |
1398 | * After calling scif_unpin_pages(), it is an error to pass pinned_pages to | |
1399 | * scif_register_pinned_pages(). | |
1400 | * | |
1401 | *\return: | |
1402 | * Upon successful completion, scif_unpin_pages() returns 0; otherwise the | |
1403 | * negative of one of the following errors is returned. | |
1404 | * | |
1405 | *\par Errors: | |
1406 | *- EINVAL | |
1407 | * - pinned_pages is not valid | |
1408 | */ | |
1409 | int | |
1410 | scif_unpin_pages( | |
1411 | scif_pinned_pages_t pinned_pages); | |
1412 | ||
1413 | /** | |
1414 | * scif_register_pinned_pages - Mark a memory region for remote access. | |
1415 | * \param epd Endpoint descriptor | |
1416 | * \param pinned_pages Opaque handle of pinned pages | |
1417 | * \param offset Registered address space offset | |
1418 | * \param map_flags Flags which control where pages are mapped | |
1419 | * | |
1420 | * The scif_register_pinned_pages() function opens a window, a range of whole | |
1421 | * pages of the registered address space of the endpoint epd, starting at | |
1422 | * offset po. The value of po, further described below, is a function of the | |
1423 | * parameters offset and pinned_pages, and the value of map_flags. Each page of | |
1424 | * the window represents a corresponding physical memory page of the range | |
1425 | * represented by pinned_pages; the length of the window is the same as the | |
1426 | * length of range represented by pinned_pages. A successful scif_register() | |
1427 | * call returns po as the return value. | |
1428 | * | |
1429 | * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset | |
1430 | * exactly, and offset is constrained to be a multiple of the page size. The | |
1431 | * mapping established by scif_register() will not replace any existing | |
1432 | * registration; an error is returned if any page of the new window would | |
1433 | * intersect an existing window. | |
1434 | * | |
1435 | * When SCIF_MAP_FIXED is not set, the implementation uses offset in an | |
1436 | * implementation-defined manner to arrive at po. The po so chosen will be an | |
1437 | * area of the registered address space that the implementation deems suitable | |
1438 | * for a mapping of the required size. An offset value of 0 is interpreted as | |
1439 | * granting the implementation complete freedom in selecting po, subject to | |
1440 | * constraints described below. A non-zero value of offset is taken to be a | |
1441 | * suggestion of an offset near which the mapping should be placed. When the | |
1442 | * implementation selects a value for po, it does not replace any extant | |
1443 | * window. In all cases, po will be a multiple of the page size. | |
1444 | * | |
1445 | * The physical pages which are so represented by a window are available for | |
1446 | * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(), | |
1447 | * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the | |
1448 | * physical pages represented by the window will not be reused by the memory | |
1449 | * subsytem for any other purpose. Note that the same physical page may be | |
1450 | * represented by multiple windows. | |
1451 | * | |
1452 | * Windows created by scif_register_pinned_pages() are unregistered by | |
1453 | * scif_unregister(). | |
1454 | * | |
1455 | * The map_flags argument is formed by OR'ing together zero or more of the | |
1456 | * following values: | |
1457 | *- SCIF_MAP_FIXED: interpret offset exactly | |
1458 | * | |
1459 | *\return | |
1460 | * Upon successful completion, scif_register_pinned_pages() returns the offset | |
1461 | * at which the mapping was placed (po); otherwise the negative of one of the | |
1462 | * following errors is returned. | |
1463 | *\par Errors: | |
1464 | *- EADDRINUSE | |
1465 | * - SCIF_MAP_FIXED is set in map_flags and pages in the new | |
1466 | * window would intersect an existing window | |
1467 | *- EAGAIN | |
1468 | * - The mapping could not be performed due to lack of resources | |
1469 | *- ECONNRESET | |
1470 | * - A connection was forcibly closed by a peer. | |
1471 | *- EINVAL | |
1472 | * - epd is not a valid endpoint descriptor, or | |
1473 | * - map_flags is invalid, or | |
1474 | * - SCIF_MAP_FIXED is set in map_flags, and offset is not a | |
1475 | * multiple of the page size, or | |
1476 | * - offset is negative | |
1477 | *- ENODEV | |
1478 | * - The remote node is lost. | |
1479 | *- ENOMEM | |
1480 | * - Not enough space | |
1481 | *- ENOTCONN | |
1482 | * - The endpoint is not connected | |
1483 | */ | |
1484 | off_t | |
1485 | scif_register_pinned_pages( | |
1486 | scif_epd_t epd, | |
1487 | scif_pinned_pages_t pinned_pages, | |
1488 | off_t offset, | |
1489 | int map_flags); | |
1490 | ||
1491 | /** | |
1492 | * scif_get_pages - Add references to remote registered pages | |
1493 | * \param epd endpoint descriptor | |
1494 | * \param offset registered address space offset | |
1495 | * \param len length of range of pages | |
1496 | * \param pages returned scif_range structure | |
1497 | * | |
1498 | * scif_get_pages() returns the addresses of the physical pages represented by | |
1499 | * those pages of the registered address space of the peer of epd, starting at | |
1500 | * offset and continuing for len bytes. offset and len are constrained to be | |
1501 | * multiples of the page size. | |
1502 | * | |
1503 | * All of the pages in the specified range [offset,offset+len-1] must be within | |
1504 | * a single window of the registered address space of the peer of epd. | |
1505 | * | |
1506 | * The addresses are returned as a virtually contiguous array pointed to by the | |
1507 | * phys_addr component of the scif_range structure whose address is returned in | |
1508 | * pages. The nr_pages component of scif_range is the length of the array. The | |
1509 | * prot_flags component of scif_range holds the protection flag value passed | |
1510 | * when the pages were registered. | |
1511 | * | |
1512 | * Each physical page whose address is returned by scif_get_pages() remains | |
1513 | * available and will not be released for reuse until the scif_range structure | |
1514 | * is returned in a call to scif_put_pages(). The scif_range structure returned | |
1515 | * by scif_get_pages() must be unmodified. | |
1516 | * | |
1517 | * It is an error to call scif_close() on an endpoint on which a scif_range | |
1518 | * structure of that endpoint has not been returned to scif_put_pages(). | |
1519 | * | |
1520 | *\return | |
1521 | * Upon successful completion, scif_get_pages() returns 0; otherwise the | |
1522 | * negative of one of the following errors is returned. | |
1523 | *\par Errors: | |
1524 | *- ECONNRESET | |
1525 | * - A connection was forcibly closed by a peer. | |
1526 | *- EINVAL | |
1527 | * - epd is not a valid endpoint descriptor, or | |
1528 | * - offset is not a multiple of the page size, or | |
1529 | * - offset is negative, or | |
1530 | * - len is not a multiple of the page size | |
1531 | *- ENODEV | |
1532 | * -The remote node is lost. | |
1533 | *- ENOTCONN | |
1534 | * - The endpoint is not connected | |
1535 | *- ENXIO | |
1536 | * - Addresses in the range [offset,offset+len-1] are invalid | |
1537 | * for the registered address space of the peer epd. | |
1538 | */ | |
1539 | int scif_get_pages( | |
1540 | scif_epd_t epd, | |
1541 | off_t offset, | |
1542 | size_t len, | |
1543 | struct scif_range **pages); | |
1544 | ||
1545 | /** | |
1546 | * scif_put_pages - Remove references from remote registered pages | |
1547 | * \param pages pages to be returned | |
1548 | * | |
1549 | * scif_put_pages() releases a scif_range structure previously obtained by | |
1550 | * calling scif_get_pages(). The physical pages represented by pages may | |
1551 | * be reused when the window which represented those pages is unregistered. | |
1552 | * Therefore, those pages must not be accessed after calling scif_put_pages(). | |
1553 | * | |
1554 | *\return | |
1555 | * Upon successful completion, scif_put_pages() returns 0; otherwise the | |
1556 | * negative of one of the following errors is returned. | |
1557 | *\par Errors: | |
1558 | *- EINVAL | |
1559 | * - pages does not point to a valid scif_range structure, or | |
1560 | * - the scif_range structure pointed to by pages was already returned. | |
1561 | *- ENODEV | |
1562 | * - The remote node is lost. | |
1563 | *- ENOTCONN | |
1564 | * - The endpoint is not connected. | |
1565 | */ | |
1566 | int scif_put_pages( | |
1567 | struct scif_range *pages); | |
1568 | ||
1569 | /** | |
1570 | * scif_poll - Wait for some event on an endpoint | |
1571 | * \param epds Array of endpoint descriptors | |
1572 | * \param nepds Length of epds | |
1573 | * \param timeout Upper limit on time for which scif_poll() will | |
1574 | * block | |
1575 | * | |
1576 | * scif_poll() waits for one of a set of endpoints to become ready to perform | |
1577 | * an I/O operation. scif_poll() exposes a subset of the functionality of the | |
1578 | * POSIX standard poll() function. | |
1579 | * | |
1580 | * The epds argument specifies the endpoint descriptors to be examined and the | |
1581 | * events of interest for each endpoint descriptor. epds is a pointer to an | |
1582 | * array with one member for each open endpoint descriptor of interest. | |
1583 | * | |
1584 | * The number of items in the epds array is specified in nepds. The epd field | |
1585 | * of scif_pollepd is an endpoint descriptor of an open endpoint. The field | |
1586 | * events is a bitmask specifying the events which the application is | |
1587 | * interested in. The field revents is an output parameter, filled by the | |
1588 | * kernel with the events that actually occurred. The bits returned in revents | |
1589 | * can include any of those specified in events, or one of the values | |
1590 | * SCIF_POLLERR, SCIF_POLLHUP, or SCIF_POLLNVAL. (These three bits are | |
1591 | * meaningless in the events field, and will be set in the revents field | |
1592 | * whenever the corresponding condition is true.) | |
1593 | * | |
1594 | * If none of the events requested (and no error) has occurred for any of the | |
1595 | * endpoint descriptors, then scif_poll() blocks until one of the events occurs. | |
1596 | * | |
1597 | * The timeout argument specifies an upper limit on the time for which | |
1598 | * scif_poll() will block, in milliseconds. Specifying a negative value in | |
1599 | * timeout means an infinite timeout. | |
1600 | * | |
1601 | * The following bits may be set in events and returned in revents: | |
1602 | *- SCIF_POLLIN: Data may be received without blocking. For a connected | |
1603 | * endpoint, this means that scif_recv() may be called without blocking. For a | |
1604 | * listening endpoint, this means that scif_accept() may be called without | |
1605 | * blocking. | |
1606 | *- SCIF_POLLOUT: Data may be sent without blocking. For a connected endpoint, | |
1607 | * this means that scif_send() may be called without blocking. This bit value | |
1608 | * has no meaning for a listening endpoint and is ignored if specified. | |
1609 | * | |
1610 | * The following bits are only returned in revents, and are ignored if set in | |
1611 | * events: | |
1612 | *- SCIF_POLLERR: An error occurred on the endpoint | |
1613 | *- SCIF_POLLHUP: The connection to the peer endpoint was disconnected | |
1614 | *- SCIF_POLLNVAL: The specified endpoint descriptor is invalid. | |
1615 | * | |
1616 | *\return | |
1617 | * Upon successful completion, scif_poll()returns a non-negative value. A | |
1618 | * positive value indicates the total number of endpoint descriptors that have | |
1619 | * been selected (that is, endpoint descriptors for which the revents member is | |
1620 | * non-zero. A value of 0 indicates that the call timed out and no endpoint | |
1621 | * descriptors have been selected. Otherwise: in user mode -1 is returned and | |
1622 | * errno is set to indicate the error; in kernel mode the negative of one of | |
1623 | * the following errors is returned. | |
1624 | * | |
1625 | *\par Errors: | |
1626 | *- EFAULT | |
1627 | * - The array given as argument was not contained in the calling program's | |
1628 | * address space. | |
1629 | *- EINTR | |
1630 | * - A signal occurred before any requested event. | |
1631 | *- EINVAL | |
1632 | * - The nepds argument is greater than {OPEN_MAX} | |
1633 | *- ENOMEM | |
1634 | * - There was no space to allocate file descriptor tables. | |
1635 | */ | |
1636 | int | |
1637 | scif_poll( | |
1638 | struct scif_pollepd *epds, | |
1639 | unsigned int nepds, | |
1640 | long timeout); | |
1641 | ||
1642 | /** | |
1643 | * scif_event_register - Register an event handler | |
1644 | * \param handler Event handler to be registered | |
1645 | * | |
1646 | * scif_event_register() registers a routine, handler, to be called when some | |
1647 | * event occurs. The event parameter to handler indicates the type of event | |
1648 | * which has occurred, and the corresponding component of the data parameter to | |
1649 | * handler provides additional data about the event. | |
1650 | * | |
1651 | * The following events are defined: | |
1652 | *- SCIF_NODE_ADDED: A node has been added to the SCIF network. The | |
1653 | * scif_node_added component of the data parameter to handler identifies the | |
1654 | * node. This event is informational. There are no requirements on the event | |
1655 | * handler. | |
1656 | *- SCIF_NODE_REMOVED: A node is being removed from the SCIF network. The | |
1657 | * scif_node_removed component of the data parameter to handler identifies the | |
1658 | * node. Upon being called, and before returning, the event handler must | |
1659 | * return, using scif_put_pages(), all structures obtained using | |
1660 | * scif_get_pages() against an endpoint connected to the lost node. It is | |
1661 | * recommended and expected that the handler will also scif_close() all | |
1662 | * endpoints connected to the lost node. | |
1663 | * | |
1664 | *\return | |
1665 | * Upon successful completion scif_event_register() returns 0. | |
1666 | * | |
1667 | *\par Errors: | |
1668 | *- ENOMEM | |
1669 | * - There was no space to allocate file descriptor tables. | |
1670 | */ | |
1671 | ||
1672 | int | |
1673 | scif_event_register( | |
1674 | scif_callback_t handler); | |
1675 | ||
1676 | /** | |
1677 | * scif_event_unregister - Unregister event handler | |
1678 | * \param handler Event handler to be unregistered | |
1679 | * | |
1680 | * scif_event_unregister() unregisters the handler which was registered | |
1681 | * previously by using scif_event_register(). | |
1682 | * | |
1683 | * WARNING: scif_event_unregister must be called before the module | |
1684 | * (that registered handles) exits for every handler that is registered. | |
1685 | * Failure to do so will result in crash of the scif module. | |
1686 | * | |
1687 | *\return | |
1688 | * Upon successful completion scif_event_unregister() returns 0. | |
1689 | *\par Errors: | |
1690 | *- EINVAL | |
1691 | * -If the event handler was not found/registered. | |
1692 | */ | |
1693 | int | |
1694 | scif_event_unregister( | |
1695 | scif_callback_t handler); | |
1696 | ||
1697 | /* | |
1698 | * Note: The callee can use pci_resource_start(dev, index) and | |
1699 | * pci_resource_len(dev, index) to obtain the PCI resource starting | |
1700 | * physical address and length for valid non null indexes of the va | |
1701 | * array. MMIO bars will not have IORESOURCE_PREFETCH set in the | |
1702 | * flags obtained from pci_resource_flags(dev, index). va[index] | |
1703 | * will be set to NULL for invalid resources. | |
1704 | */ | |
1705 | struct scif_pci_info { | |
1706 | /* pci_dev pointer associated with a node */ | |
1707 | struct pci_dev *pdev; | |
1708 | /* Ioremapped virtual address base for every valid PCIe resource */ | |
1709 | void __iomem *va[PCI_NUM_RESOURCES]; | |
1710 | }; | |
1711 | ||
1712 | /** | |
1713 | * scif_pci_info - Populate the scif_pci_info structure for a node. | |
1714 | * \param node The node to query | |
1715 | * \param dev The scif_pci_info structure to populate. | |
1716 | * | |
1717 | * scif_pci_info() populates the provided scif_pci_info structure | |
1718 | * associated with a node. The requested node ID cannot be the same as | |
1719 | * the current node. This routine will only return success when called from | |
1720 | * the host. | |
1721 | * | |
1722 | *\return | |
1723 | * Upon successful completion, scif_pci_info() returns 0; otherwise the | |
1724 | * negative of one of the following errors is returned. | |
1725 | * | |
1726 | *\par Errors: | |
1727 | *- EINVAL | |
1728 | * - The requested node is not valid. | |
1729 | * - Called on MIC instead of the host. | |
1730 | *- ENODEV | |
1731 | * - No pci_dev association exists for the node. | |
1732 | */ | |
1733 | int | |
1734 | scif_pci_info( | |
1735 | uint16_t node, | |
1736 | struct scif_pci_info *dev); | |
1737 | ||
1738 | ||
1739 | #ifdef __cplusplus | |
1740 | } /* extern "C" */ | |
1741 | #endif | |
1742 | ||
1743 | #endif /* __SCIF_H__ */ |