| 1 | /* |
| 2 | * Copyright 2010-2017 Intel Corporation. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or modify |
| 5 | * it under the terms of the GNU General Public License, version 2, |
| 6 | * as published by the Free Software Foundation. |
| 7 | * |
| 8 | * This program is distributed in the hope that it will be useful, |
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 11 | * General Public License for more details. |
| 12 | * |
| 13 | * Disclaimer: The codes contained in these modules may be specific to |
| 14 | * the Intel Software Development Platform codenamed Knights Ferry, |
| 15 | * and the Intel product codenamed Knights Corner, and are not backward |
| 16 | * compatible with other Intel products. Additionally, Intel will NOT |
| 17 | * support the codes or instruction set in future products. |
| 18 | * |
| 19 | * Intel offers no warranty of any kind regarding the code. This code is |
| 20 | * licensed on an "AS IS" basis and Intel is not obligated to provide |
| 21 | * any support, assistance, installation, training, or other services |
| 22 | * of any kind. Intel is also not obligated to provide any updates, |
| 23 | * enhancements or extensions. Intel specifically disclaims any warranty |
| 24 | * of merchantability, non-infringement, fitness for any particular |
| 25 | * purpose, and any other warranty. |
| 26 | * |
| 27 | * Further, Intel disclaims all liability of any kind, including but |
| 28 | * not limited to liability for infringement of any proprietary rights, |
| 29 | * relating to the use of the code, even if Intel is notified of the |
| 30 | * possibility of such liability. Except as expressly stated in an Intel |
| 31 | * license agreement provided with this code and agreed upon with Intel, |
| 32 | * no license, express or implied, by estoppel or otherwise, to any |
| 33 | * intellectual property rights is granted herein. |
| 34 | */ |
| 35 | |
| 36 | /* |
| 37 | * Revised 15:05 11/24/2010 |
| 38 | * Derived from SCIF SAS v0.41 with additional corrections |
| 39 | */ |
| 40 | |
| 41 | #ifndef __SCIF_H__ |
| 42 | #define __SCIF_H__ |
| 43 | |
| 44 | #include <linux/types.h> |
| 45 | #include <linux/errno.h> |
| 46 | #include <linux/poll.h> |
| 47 | #include <linux/pci.h> |
| 48 | |
| 49 | #ifdef __cplusplus |
| 50 | extern "C" { |
| 51 | #endif |
| 52 | |
| 53 | #define SCIF_ACCEPT_SYNC 1 |
| 54 | #define SCIF_SEND_BLOCK 1 |
| 55 | #define SCIF_RECV_BLOCK 1 |
| 56 | |
| 57 | /* Start: Deprecated Temporary definition for compatability */ |
| 58 | #define ACCEPT_SYNC SCIF_ACCEPT_SYNC |
| 59 | #define SEND_BLOCK SCIF_SEND_BLOCK |
| 60 | #define RECV_BLOCK SCIF_RECV_BLOCK |
| 61 | /* End: Deprecated Temporary definition for compatability */ |
| 62 | |
| 63 | enum { |
| 64 | SCIF_PROT_READ = (1<<0), |
| 65 | SCIF_PROT_WRITE = (1<<1) |
| 66 | }; |
| 67 | |
| 68 | /* 0x40 is used internally by scif */ |
| 69 | enum { |
| 70 | SCIF_MAP_FIXED = 0x10, |
| 71 | SCIF_MAP_KERNEL = 0x20, |
| 72 | }; |
| 73 | |
| 74 | enum { |
| 75 | SCIF_FENCE_INIT_SELF = (1<<0), |
| 76 | SCIF_FENCE_INIT_PEER = (1<<1) |
| 77 | }; |
| 78 | |
| 79 | enum { |
| 80 | SCIF_FENCE_RAS_SELF = (1<<2), |
| 81 | SCIF_FENCE_RAS_PEER = (1<<3) |
| 82 | }; |
| 83 | |
| 84 | enum { |
| 85 | SCIF_SIGNAL_LOCAL = (1<<4), |
| 86 | SCIF_SIGNAL_REMOTE = (1<<5) |
| 87 | }; |
| 88 | |
| 89 | #define SCIF_RMA_USECPU 1 |
| 90 | #define SCIF_RMA_USECACHE (1<<1) |
| 91 | #define SCIF_RMA_SYNC (1<<2) |
| 92 | #define SCIF_RMA_ORDERED (1<<3) |
| 93 | //! @cond (Prevent doxygen from including these) |
| 94 | #define SCIF_POLLIN POLLIN |
| 95 | #define SCIF_POLLOUT POLLOUT |
| 96 | #define SCIF_POLLERR POLLERR |
| 97 | #define SCIF_POLLHUP POLLHUP |
| 98 | #define SCIF_POLLNVAL POLLNVAL |
| 99 | |
| 100 | /* SCIF Reserved Ports */ |
| 101 | /* COI */ |
| 102 | #define SCIF_COI_PORT_0 40 |
| 103 | #define SCIF_COI_PORT_1 41 |
| 104 | #define SCIF_COI_PORT_2 42 |
| 105 | #define SCIF_COI_PORT_3 43 |
| 106 | #define SCIF_COI_PORT_4 44 |
| 107 | #define SCIF_COI_PORT_5 45 |
| 108 | #define SCIF_COI_PORT_6 46 |
| 109 | #define SCIF_COI_PORT_7 47 |
| 110 | #define SCIF_COI_PORT_8 48 |
| 111 | #define SCIF_COI_PORT_9 49 |
| 112 | |
| 113 | /* OFED */ |
| 114 | #define SCIF_OFED_PORT_0 60 |
| 115 | #define SCIF_OFED_PORT_1 61 |
| 116 | #define SCIF_OFED_PORT_2 62 |
| 117 | #define SCIF_OFED_PORT_3 63 |
| 118 | #define SCIF_OFED_PORT_4 64 |
| 119 | #define SCIF_OFED_PORT_5 65 |
| 120 | #define SCIF_OFED_PORT_6 66 |
| 121 | #define SCIF_OFED_PORT_7 67 |
| 122 | #define SCIF_OFED_PORT_8 68 |
| 123 | #define SCIF_OFED_PORT_9 69 |
| 124 | |
| 125 | /* NETDEV */ |
| 126 | #define SCIF_NETDEV_PORT_0 80 |
| 127 | #define SCIF_NETDEV_PORT_1 81 |
| 128 | #define SCIF_NETDEV_PORT_2 82 |
| 129 | #define SCIF_NETDEV_PORT_3 83 |
| 130 | #define SCIF_NETDEV_PORT_4 84 |
| 131 | #define SCIF_NETDEV_PORT_5 85 |
| 132 | #define SCIF_NETDEV_PORT_6 86 |
| 133 | #define SCIF_NETDEV_PORT_7 87 |
| 134 | #define SCIF_NETDEV_PORT_8 88 |
| 135 | #define SCIF_NETDEV_PORT_9 89 |
| 136 | |
| 137 | /* RAS */ |
| 138 | #define SCIF_RAS_PORT_0 100 |
| 139 | #define SCIF_RAS_PORT_1 101 |
| 140 | #define SCIF_RAS_PORT_2 102 |
| 141 | #define SCIF_RAS_PORT_3 103 |
| 142 | #define SCIF_RAS_PORT_4 104 |
| 143 | #define SCIF_RAS_PORT_5 105 |
| 144 | #define SCIF_RAS_PORT_6 106 |
| 145 | #define SCIF_RAS_PORT_7 107 |
| 146 | #define SCIF_RAS_PORT_8 108 |
| 147 | #define SCIF_RAS_PORT_9 109 |
| 148 | |
| 149 | /* Power Management */ |
| 150 | #define SCIF_PM_PORT_0 120 |
| 151 | #define SCIF_PM_PORT_1 121 |
| 152 | #define SCIF_PM_PORT_2 122 |
| 153 | #define SCIF_PM_PORT_3 123 |
| 154 | #define SCIF_PM_PORT_4 124 |
| 155 | #define SCIF_PM_PORT_5 125 |
| 156 | #define SCIF_PM_PORT_6 126 |
| 157 | #define SCIF_PM_PORT_7 127 |
| 158 | #define SCIF_PM_PORT_8 128 |
| 159 | #define SCIF_PM_PORT_9 129 |
| 160 | |
| 161 | /* Board Tools */ |
| 162 | #define SCIF_BT_PORT_0 130 |
| 163 | #define SCIF_BT_PORT_1 131 |
| 164 | #define SCIF_BT_PORT_2 132 |
| 165 | #define SCIF_BT_PORT_3 133 |
| 166 | #define SCIF_BT_PORT_4 134 |
| 167 | #define SCIF_BT_PORT_5 135 |
| 168 | #define SCIF_BT_PORT_6 136 |
| 169 | #define SCIF_BT_PORT_7 137 |
| 170 | #define SCIF_BT_PORT_8 138 |
| 171 | #define SCIF_BT_PORT_9 139 |
| 172 | |
| 173 | /* MIC Boot/Configuration support */ |
| 174 | #define MPSSD_MONRECV 160 |
| 175 | #define MIC_NOTIFY 161 |
| 176 | #define MPSSD_CRED 162 |
| 177 | #define MPSSD_MONSEND 163 |
| 178 | #define MPSSD_MICCTRL 164 |
| 179 | #define MPSSD_RESV5 165 |
| 180 | #define MPSSD_RESV6 166 |
| 181 | #define MPSSD_RESV7 167 |
| 182 | #define MPSSD_RESV8 168 |
| 183 | #define MPSSD_RESV9 169 |
| 184 | |
| 185 | #define SCIF_ADMIN_PORT_END 1024 |
| 186 | |
| 187 | /* MYO */ |
| 188 | #define SCIF_MYO_PORT_0 1025 |
| 189 | #define SCIF_MYO_PORT_1 1026 |
| 190 | #define SCIF_MYO_PORT_2 1027 |
| 191 | #define SCIF_MYO_PORT_3 1028 |
| 192 | #define SCIF_MYO_PORT_4 1029 |
| 193 | #define SCIF_MYO_PORT_5 1030 |
| 194 | #define SCIF_MYO_PORT_6 1031 |
| 195 | #define SCIF_MYO_PORT_7 1032 |
| 196 | #define SCIF_MYO_PORT_8 1033 |
| 197 | #define SCIF_MYO_PORT_9 1034 |
| 198 | |
| 199 | /* SSG Tools */ |
| 200 | #define SCIF_ST_PORT_0 1044 |
| 201 | #define SCIF_ST_PORT_1 1045 |
| 202 | #define SCIF_ST_PORT_2 1046 |
| 203 | #define SCIF_ST_PORT_3 1047 |
| 204 | #define SCIF_ST_PORT_4 1048 |
| 205 | #define SCIF_ST_PORT_5 1049 |
| 206 | #define SCIF_ST_PORT_6 1050 |
| 207 | #define SCIF_ST_PORT_7 1051 |
| 208 | #define SCIF_ST_PORT_8 1052 |
| 209 | #define SCIF_ST_PORT_9 1053 |
| 210 | |
| 211 | /* End of SCIF Reserved Ports */ |
| 212 | #define SCIF_PORT_RSVD 1088 |
| 213 | //! @endcond |
| 214 | |
| 215 | typedef struct endpt *scif_epd_t; |
| 216 | |
| 217 | typedef struct scif_pinned_pages *scif_pinned_pages_t; |
| 218 | |
| 219 | struct scif_range { |
| 220 | void *cookie; /* cookie */ |
| 221 | int nr_pages; /* Number of Pages */ |
| 222 | int prot_flags; /* R/W protection */ |
| 223 | /* Arrays phys_addr/va below are virtually contiguous */ |
| 224 | dma_addr_t *phys_addr; /* Array of physical addresses */ |
| 225 | void **va; /* Array of virtual addresses |
| 226 | * and populated only when called |
| 227 | * on the host for a remote SCIF |
| 228 | * connection on MIC. |
| 229 | */ |
| 230 | }; |
| 231 | |
| 232 | struct scif_pollepd { |
| 233 | scif_epd_t epd; /* endpoint descriptor */ |
| 234 | short events; /* requested events */ |
| 235 | short revents; /* returned events */ |
| 236 | }; |
| 237 | enum scif_event_type { |
| 238 | SCIF_NODE_ADDED = 1<<0, |
| 239 | SCIF_NODE_REMOVED = 1<<1 |
| 240 | }; |
| 241 | |
| 242 | union eventd { |
| 243 | uint16_t scif_node_added; |
| 244 | uint16_t scif_node_removed; |
| 245 | }; |
| 246 | |
| 247 | typedef void (*scif_callback_t)(enum scif_event_type event, union eventd |
| 248 | data); |
| 249 | |
| 250 | struct scif_callback { |
| 251 | struct list_head list_member; |
| 252 | scif_callback_t callback_handler; |
| 253 | }; |
| 254 | |
| 255 | #define SCIF_OPEN_FAILED ((scif_epd_t)-1) |
| 256 | #define SCIF_REGISTER_FAILED ((off_t)-1) |
| 257 | #define SCIF_MMAP_FAILED ((void *)-1) |
| 258 | |
| 259 | struct scif_portID { |
| 260 | uint16_t node; /* node on which port resides */ |
| 261 | uint16_t port; /* Local port number */ |
| 262 | }; |
| 263 | |
| 264 | /* Start: Deprecated Temporary definition for compatability */ |
| 265 | #define portID scif_portID |
| 266 | typedef struct portID portID_t; |
| 267 | /* End: Deprecated Temporary definition for compatability */ |
| 268 | |
| 269 | /** |
| 270 | * scif_open - Create an endpoint |
| 271 | * |
| 272 | *\return |
| 273 | * The scif_open() function creates a new endpoint. |
| 274 | * |
| 275 | * Upon successful completion, scif_open() returns an endpoint descriptor to |
| 276 | * be used in subsequent SCIF functions calls to refer to that endpoint; |
| 277 | * otherwise: in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is |
| 278 | * returned and errno is set to indicate the error; in kernel mode a NULL |
| 279 | * scif_epd_t is returned. |
| 280 | * |
| 281 | *\par Errors: |
| 282 | *- ENOMEM |
| 283 | * - Insufficient kernel memory was available. |
| 284 | *- ENXIO |
| 285 | * - Version mismatch between micscif driver and libscif. |
| 286 | */ |
| 287 | scif_epd_t scif_open(void); |
| 288 | |
| 289 | /** |
| 290 | * scif _bind - Bind an endpoint to a port |
| 291 | * \param epd endpoint descriptor |
| 292 | * \param pn port number |
| 293 | * |
| 294 | * scif_bind() binds endpoint epd to port pn, where pn is a port number on the |
| 295 | * local node. If pn is zero, a port number greater than or equal to |
| 296 | * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to |
| 297 | * exactly one local port. Ports less than 1024 when requested can only be bound |
| 298 | * by system (or root) processes or by processes executed by privileged users. |
| 299 | * |
| 300 | *\return |
| 301 | * Upon successful completion, scif_bind() returns the port number to which epd |
| 302 | * is bound; otherwise: in user mode -1 is returned and errno is set to |
| 303 | * indicate the error; in kernel mode the negative of one of the following |
| 304 | * errors is returned. |
| 305 | * |
| 306 | *\par Errors: |
| 307 | *- EBADF |
| 308 | * - epd is not a valid endpoint descriptor |
| 309 | *- EINVAL |
| 310 | * - epd is not a valid endpoint descriptor, or |
| 311 | * - The endpoint or the port are already bound. |
| 312 | *- EISCONN |
| 313 | * - The endpoint is already connected. |
| 314 | *- ENOSPC |
| 315 | * - No port number available for assignment (when pn==0). |
| 316 | *- ENOTTY |
| 317 | * - epd is not a valid endpoint descriptor |
| 318 | *- EACCES |
| 319 | * - The port requested is protected and the user is not the superuser. |
| 320 | */ |
| 321 | int scif_bind(scif_epd_t epd, uint16_t pn); |
| 322 | |
| 323 | /** |
| 324 | * scif_listen - Listen for connections on an endpoint |
| 325 | * |
| 326 | * \param epd endpoint descriptor |
| 327 | * \param backlog maximum pending connection requests |
| 328 | * |
| 329 | * scif_listen() marks the endpoint epd as a listening endpoint - that is, as |
| 330 | * an endpoint that will be used to accept incoming connection requests. Once |
| 331 | * so marked, the endpoint is said to be in the listening state and may not be |
| 332 | * used as the endpoint of a connection. |
| 333 | * |
| 334 | * The endpoint, epd, must have been bound to a port. |
| 335 | * |
| 336 | * The backlog argument defines the maximum length to which the queue of |
| 337 | * pending connections for epd may grow. If a connection request arrives when |
| 338 | * the queue is full, the client may receive an error with an indication that |
| 339 | * the connection was refused. |
| 340 | * |
| 341 | *\return |
| 342 | * Upon successful completion, scif_listen() returns 0; otherwise: in user mode |
| 343 | * -1 is returned and errno is set to indicate the error; in kernel mode the |
| 344 | * negative of one of the following errors is returned. |
| 345 | * |
| 346 | *\par Errors: |
| 347 | *- EBADF |
| 348 | * - epd is not a valid endpoint descriptor |
| 349 | *- EINVAL |
| 350 | * - epd is not a valid endpoint descriptor, or |
| 351 | * - The endpoint is not bound to a port |
| 352 | *- EISCONN |
| 353 | * - The endpoint is already connected or listening |
| 354 | *- ENOTTY |
| 355 | * - epd is not a valid endpoint descriptor |
| 356 | */ |
| 357 | int scif_listen(scif_epd_t epd, int backlog); |
| 358 | |
| 359 | /** |
| 360 | * scif_connect - Initiate a connection on a port |
| 361 | * \param epd endpoint descriptor |
| 362 | * \param dst global id of port to which to connect |
| 363 | * |
| 364 | * The scif_connect() function requests the connection of endpoint epd to remote |
| 365 | * port dst. If the connection is successful, a peer endpoint, bound to dst, is |
| 366 | * created on node dst.node. On successful return, the connection is complete. |
| 367 | * |
| 368 | * If the endpoint epd has not already been bound to a port, scif_connect() |
| 369 | * will bind it to an unused local port. |
| 370 | * |
| 371 | * A connection is terminated when an endpoint of the connection is closed, |
| 372 | * either explicitly by scif_close(), or when a process that owns one of the |
| 373 | * endpoints of a connection is terminated. |
| 374 | * |
| 375 | *\return |
| 376 | * Upon successful completion, scif_connect() returns the port ID to which the |
| 377 | * endpoint, epd, is bound; otherwise: in user mode -1 is returned and errno is |
| 378 | * set to indicate the error; in kernel mode the negative of one of the |
| 379 | * following errors is returned. |
| 380 | * |
| 381 | *\par Errors: |
| 382 | *- EBADF |
| 383 | * - epd is not a valid endpoint descriptor |
| 384 | *- ECONNREFUSED |
| 385 | * - The destination was not listening for connections or refused the |
| 386 | * connection request. |
| 387 | *- EINTR |
| 388 | * - Interrupted function |
| 389 | *- EINVAL |
| 390 | * - epd is not a valid endpoint descriptor, or |
| 391 | * - dst.port is not a valid port ID |
| 392 | *- EISCONN |
| 393 | * - The endpoint is already connected |
| 394 | *- ENOBUFS |
| 395 | * - No buffer space is available |
| 396 | *- ENODEV |
| 397 | * - The destination node does not exist, or |
| 398 | * - The node is lost. |
| 399 | *- ENOSPC |
| 400 | * - No port number available for assignment (when pn==0). |
| 401 | *- ENOTTY |
| 402 | * - epd is not a valid endpoint descriptor |
| 403 | *- EOPNOTSUPP |
| 404 | * - The endpoint is listening and cannot be connected |
| 405 | */ |
| 406 | int scif_connect(scif_epd_t epd, struct scif_portID *dst); |
| 407 | |
| 408 | /** |
| 409 | * scif_accept - Accept a connection on an endpoint |
| 410 | * \param epd endpoint descriptor |
| 411 | * \param peer global id of port to which connected |
| 412 | * \param newepd new connected endpoint descriptor |
| 413 | * \param flags flags |
| 414 | * |
| 415 | * The scif_accept() call extracts the first connection request on the queue of |
| 416 | * pending connections for the port on which epd is listening. scif_accept() |
| 417 | * creates a new endpoint, bound to the same port as epd, and allocates a new |
| 418 | * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new |
| 419 | * endpoint is connected to the endpoint through which the connection was |
| 420 | * requested. epd is unaffected by this call, and remains in the listening |
| 421 | * state. |
| 422 | * |
| 423 | * On successful return, peer holds the global port identifier (node id and |
| 424 | * local port number) of the port which requested the connection. |
| 425 | * |
| 426 | * If the peer endpoint which requested the connection is closed, the endpoint |
| 427 | * returned by scif_accept() is closed. |
| 428 | * |
| 429 | * The number of connections that can (subsequently) be accepted on epd is only |
| 430 | * limited by system resources (memory). |
| 431 | * |
| 432 | * The flags argument is formed by OR'ing together zero or more of the |
| 433 | * following values: |
| 434 | *- SCIF_ACCEPT_SYNC: block until a connection request is presented. If |
| 435 | * SCIF_ACCEPT_SYNC is not in flags, and no pending |
| 436 | * connections are present on the queue, scif_accept()fails |
| 437 | * with an EAGAIN error |
| 438 | * |
| 439 | * On Linux in user mode, the select() and poll() functions can be used to |
| 440 | * determine when there is a connection request. On Microsoft Windows* and on |
| 441 | * Linux in kernel mode, the scif_poll() function may be used for this purpose. |
| 442 | * A readable event will be delivered when a connection is requested. |
| 443 | * |
| 444 | *\return |
| 445 | * Upon successful completion, scif_accept() returns 0; otherwise: in user mode |
| 446 | * -1 is returned and errno is set to indicate the error; in kernel mode the |
| 447 | * negative of one of the following errors is returned. |
| 448 | * |
| 449 | *\par Errors: |
| 450 | *- EAGAIN |
| 451 | * - SCIF_ACCEPT_SYNC is not set and no connections are present to be accepted, or |
| 452 | * - SCIF_ACCEPT_SYNC is not set and remote node failed to complete its |
| 453 | * connection request |
| 454 | *- EBADF |
| 455 | * - epd is not a valid endpoint descriptor |
| 456 | *- EINTR |
| 457 | * - Interrupted function |
| 458 | *- EINVAL |
| 459 | * - epd is not a valid endpoint descriptor, or |
| 460 | * - epd is not a listening endpoint |
| 461 | * - flags is invalid |
| 462 | * - peer is NULL |
| 463 | * - newepd is NULL |
| 464 | *- ENOBUFS |
| 465 | * - No buffer space is available |
| 466 | *- ENODEV |
| 467 | * - The requesting node is lost. |
| 468 | *- ENOMEM |
| 469 | * - Not enough space |
| 470 | *- ENOTTY |
| 471 | * - epd is not a valid endpoint descriptor |
| 472 | *- ENOENT |
| 473 | * - Secondary part of epd registeration failed. |
| 474 | */ |
| 475 | int scif_accept(scif_epd_t epd, struct scif_portID *peer, scif_epd_t |
| 476 | *newepd, int flags); |
| 477 | |
| 478 | /** |
| 479 | * scif_close - Close an endpoint |
| 480 | * \param epd endpoint descriptor |
| 481 | * |
| 482 | * scif_close() closes an endpoint and performs necessary teardown of |
| 483 | * facilities associated with that endpoint. |
| 484 | * |
| 485 | * If epd is a listening endpoint then it will no longer accept connection |
| 486 | * requests on the port to which it is bound. Any pending connection requests |
| 487 | * are rejected. |
| 488 | * |
| 489 | * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs |
| 490 | * which are in-process through epd or its peer endpoint will complete before |
| 491 | * scif_close() returns. Registered windows of the local and peer endpoints are |
| 492 | * released as if scif_unregister() was called against each window. |
| 493 | * |
| 494 | * Closing an endpoint does not affect mappings to remote memory. These remain |
| 495 | * until explicitly removed by calling scif_munmap(). |
| 496 | * |
| 497 | * If the peer endpoint's receive queue is not empty at the time that epd is |
| 498 | * closed, then the peer endpoint can be passed as the endpoint parameter to |
| 499 | * scif_recv() until the receive queue is empty. |
| 500 | * |
| 501 | * If epd is bound to a port, then the port is returned to the pool of |
| 502 | * available ports. |
| 503 | * |
| 504 | * epd is freed and may no longer be accessed. |
| 505 | * |
| 506 | *\return |
| 507 | * Upon successful completion, scif_close() returns 0; otherwise: in user mode |
| 508 | * -1 is returned and errno is set to indicate the error; in kernel mode the |
| 509 | * negative of one of the following errors is returned. |
| 510 | * |
| 511 | *\par Errors: |
| 512 | *- EBADF |
| 513 | * - epd is not a valid endpoint descriptor |
| 514 | *- EINVAL |
| 515 | * - epd is not a valid endpoint descriptor |
| 516 | */ |
| 517 | int scif_close(scif_epd_t epd); |
| 518 | |
| 519 | /** |
| 520 | * scif_send - Send a message |
| 521 | * \param epd endpoint descriptor |
| 522 | * \param msg message buffer address |
| 523 | * \param len message length |
| 524 | * \param flags blocking mode flags |
| 525 | * |
| 526 | * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data |
| 527 | * are copied from memory starting at address msg. On successful execution the |
| 528 | * return value of scif_send() is the number of bytes that were sent, and is |
| 529 | * zero if no bytes were sent because len was zero. scif_send() may be called |
| 530 | * only when the endpoint is in a connected state. |
| 531 | * |
| 532 | * If a scif_send() call is non-blocking, then it sends only those bytes which |
| 533 | * can be sent without waiting, up to a maximum of len bytes. |
| 534 | * |
| 535 | * If a scif_send() call is blocking, then it normally returns after sending |
| 536 | * all len bytes. If a blocking call is interrupted or the connection is |
| 537 | * forcibly closed, the call is considered successful if some bytes were sent |
| 538 | * or len is zero, otherwise the call is considered unsuccessful. |
| 539 | * |
| 540 | * On Linux in user mode, the select() and poll() functions can be used to |
| 541 | * determine when the send queue is not full. On Microsoft Windows* and on |
| 542 | * Linux in kernel mode, the scif_poll() function may be used for this purpose. |
| 543 | * |
| 544 | * It is recommended that scif_send()/scif_recv() only be used for short |
| 545 | * control-type message communication between SCIF endpoints. The SCIF RMA |
| 546 | * APIs are expected to provide better performance for transfer sizes of |
| 547 | * 1024 bytes or longer. |
| 548 | * |
| 549 | * The flags argument is formed by ORing together zero or more of the following |
| 550 | * values: |
| 551 | *- SCIF_SEND_BLOCK: block until the entire message is sent. |
| 552 | * |
| 553 | *\return |
| 554 | * Upon successful completion, scif_send() returns the number of bytes sent; |
| 555 | * otherwise: in user mode -1 is returned and errno is set to indicate the |
| 556 | * error; in kernel mode the negative of one of the following errors is |
| 557 | * returned. |
| 558 | * |
| 559 | *\par Errors: |
| 560 | *- EBADF |
| 561 | * - epd is not a valid endpoint descriptor |
| 562 | *- ECONNRESET |
| 563 | * - A connection was forcibly closed by a peer. |
| 564 | *- EFAULT |
| 565 | * - An invalid address was specified for a parameter. |
| 566 | *- EINTR |
| 567 | * - epd was closed by scif_close() |
| 568 | *- EINVAL |
| 569 | * - epd is not a valid endpoint descriptor, or |
| 570 | * - flags is invalid |
| 571 | * - len is negative |
| 572 | *- ENODEV |
| 573 | * - The remote node is lost. |
| 574 | *- ENOMEM |
| 575 | * - Not enough space |
| 576 | *- ENOTCONN |
| 577 | * - The endpoint is not connected |
| 578 | *- ENOTTY |
| 579 | * - epd is not a valid endpoint descriptor |
| 580 | */ |
| 581 | int scif_send(scif_epd_t epd, void *msg, int len, int flags); |
| 582 | |
| 583 | /** |
| 584 | * scif_recv - Receive a message |
| 585 | * \param epd endpoint descriptor |
| 586 | * \param msg message buffer address |
| 587 | * \param len message buffer length |
| 588 | * \param flags blocking mode flags |
| 589 | * |
| 590 | * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of |
| 591 | * data are copied to memory starting at address msg. On successful execution |
| 592 | * the return value of scif_recv() is the number of bytes that were received, |
| 593 | * and is zero if no bytes were received because len was zero. scif_recv() may |
| 594 | * be called only when the endpoint is in a connected state. |
| 595 | * |
| 596 | * If a scif_recv() call is non-blocking, then it receives only those bytes |
| 597 | * which can be received without waiting, up to a maximum of len bytes. |
| 598 | * |
| 599 | * If a scif_recv() call is blocking, then it normally returns after receiving |
| 600 | * all len bytes. If a blocking call is interrupted or the connection is |
| 601 | * forcibly closed, the call is considered successful if some bytes were |
| 602 | * received or len is zero, otherwise the call is considered unsuccessful; |
| 603 | * subsequent calls to scif_recv() will successfully receive all data sent |
| 604 | * through peer endpoint interruption or the connection was forcibly closed. |
| 605 | * |
| 606 | * On Linux in user mode, the select() and poll() functions can be used to |
| 607 | * determine when data is available to be received. On Microsoft Windows* and |
| 608 | * on Linux in kernel mode, the scif_poll() function may be used for this |
| 609 | * purpose. |
| 610 | * |
| 611 | * It is recommended that scif_send()/scif_recv() only be used for short |
| 612 | * control-type message communication between SCIF endpoints. The SCIF RMA |
| 613 | * APIs are expected to provide better performance for transfer sizes of |
| 614 | * 1024 bytes or longer. |
| 615 | * |
| 616 | * The flags argument is formed by ORing together zero or more of the following |
| 617 | * values: |
| 618 | *- SCIF_RECV_BLOCK: block until the entire message is received. |
| 619 | * |
| 620 | *\return |
| 621 | * Upon successful completion, scif_recv() returns the number of bytes |
| 622 | * received; otherwise: in user mode -1 is returned and errno is set to |
| 623 | * indicate the error; in kernel mode the negative of one of the following |
| 624 | * errors is returned. |
| 625 | * |
| 626 | *\par Errors: |
| 627 | *- EAGAIN |
| 628 | * - The destination node is returning from a low power state. |
| 629 | *- EBADF |
| 630 | * - epd is not a valid endpoint descriptor . |
| 631 | *- ECONNRESET |
| 632 | * - A connection was forcibly closed by a peer. |
| 633 | *- EFAULT |
| 634 | * - An invalid address was specified for a parameter. |
| 635 | *- EINVAL |
| 636 | * - epd is not a valid endpoint descriptor, or |
| 637 | * - flags is invalid, or |
| 638 | * - len is negative. |
| 639 | *- ENODEV |
| 640 | * - The remote node is lost. |
| 641 | *- ENOMEM |
| 642 | * - Not enough space. |
| 643 | *- ENOTCONN |
| 644 | * - The endpoint is not connected. |
| 645 | *- ENOTTY |
| 646 | * - epd is not a valid endpoint descriptor |
| 647 | */ |
| 648 | int scif_recv(scif_epd_t epd, void *msg, int len, int flags); |
| 649 | |
| 650 | /** |
| 651 | * scif_register - Mark a memory region for remote access. |
| 652 | * \param epd endpoint descriptor |
| 653 | * \param addr starting virtual address |
| 654 | * \param len length of range |
| 655 | * \param offset offset of window |
| 656 | * \param prot_flags read/write protection flags |
| 657 | * \param map_flags mapping flags |
| 658 | * |
| 659 | * The scif_register() function opens a window, a range of whole pages of the |
| 660 | * registered address space of the endpoint epd, starting at offset po and |
| 661 | * continuing for len bytes. The value of po, further described below, is a |
| 662 | * function of the parameters offset and len, and the value of map_flags. Each |
| 663 | * page of the window represents the physical memory page which backs the |
| 664 | * corresponding page of the range of virtual address pages starting at addr |
| 665 | * and continuing for len bytes. addr and len are constrained to be multiples |
| 666 | * of the page size. addr is interpreted as a user space address. A successful |
| 667 | * scif_register() call returns po as the return value. |
| 668 | * |
| 669 | * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset |
| 670 | * exactly, and offset is constrained to be a multiple of the page size. The |
| 671 | * mapping established by scif_register() will not replace any existing |
| 672 | * registration; an error is returned if any page within the range [offset, |
| 673 | * offset+len-1] intersects an existing window. |
| 674 | * Note: When SCIF_MAP_FIXED is set the current implementation limits |
| 675 | * offset to the range [0..2^62-1] and returns EADDRINUSE if the offset |
| 676 | * requested with SCIF_MAP_FIXED is in the range [2^62..2^63-1]. |
| 677 | * |
| 678 | * When SCIF_MAP_FIXED is not set, the implementation uses offset in an |
| 679 | * implementation-defined manner to arrive at po. The po value so chosen will |
| 680 | * be an area of the registered address space that the implementation deems |
| 681 | * suitable for a mapping of len bytes. An offset value of 0 is interpreted as |
| 682 | * granting the implementation complete freedom in selecting po, subject to |
| 683 | * constraints described below. A non-zero value of offset is taken to be a |
| 684 | * suggestion of an offset near which the mapping should be placed. When the |
| 685 | * implementation selects a value for po, it does not replace any extant |
| 686 | * window. In all cases, po will be a multiple of the page size. |
| 687 | * |
| 688 | * The physical pages which are so represented by a window are available for |
| 689 | * access in calls to scif_mmap(), scif_readfrom(), scif_writeto(), |
| 690 | * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the |
| 691 | * physical pages represented by the window will not be reused by the memory |
| 692 | * subsystem for any other purpose. Note that the same physical page may be |
| 693 | * represented by multiple windows. |
| 694 | * |
| 695 | * Subsequent operations which change the memory pages to which virtual |
| 696 | * addresses are mapped (such as mmap(), munmap(), scif_mmap() and |
| 697 | * scif_munmap()) have no effect on existing windows. |
| 698 | * |
| 699 | * On Linux, if the process will fork(), it is recommended that the registered |
| 700 | * virtual address range be marked with MADV_DONTFORK. Doing so will prevent |
| 701 | * problems due to copy-on-write semantics. |
| 702 | * |
| 703 | * The prot_flags argument is formed by OR'ing together one or more of the |
| 704 | * following values: |
| 705 | *- SCIF_PROT_READ: allow read operations from the window |
| 706 | *- SCIF_PROT_WRITE: allow write operations to the window |
| 707 | * |
| 708 | * The map_flags argument is formed by OR'ing together zero or more of |
| 709 | * the following values: |
| 710 | *- SCIF_MAP_FIXED: interpret offset exactly |
| 711 | * |
| 712 | *\return |
| 713 | * Upon successful completion, scif_register() returns the offset at which the |
| 714 | * mapping was placed (po); otherwise: in user mode SCIF_REGISTER_FAILED (that |
| 715 | * is (off_t *)-1) is returned and errno is set to indicate the error; in |
| 716 | * kernel mode the negative of one of the following errors is returned. |
| 717 | * |
| 718 | *\par Errors: |
| 719 | *- EADDRINUSE |
| 720 | * - SCIF_MAP_FIXED is set in map_flags, and pages in the range [offset, |
| 721 | * offset+len-1] are already registered |
| 722 | *- EAGAIN |
| 723 | * - The mapping could not be performed due to lack of resources |
| 724 | *- EBADF |
| 725 | * - epd is not a valid endpoint descriptor |
| 726 | *- ECONNRESET |
| 727 | * - A connection was forcibly closed by a peer. |
| 728 | *- EFAULT |
| 729 | * - Addresses in the range [addr , addr + len - 1] are invalid |
| 730 | *- EINVAL |
| 731 | * - epd is not a valid endpoint descriptor, or |
| 732 | * - map_flags is invalid, or |
| 733 | * - prot_flags is invalid, or |
| 734 | * - SCIF_MAP_FIXED is set in flags, and offset is not a multiple of |
| 735 | * the page size, or |
| 736 | * - addr is not a multiple of the page size, or |
| 737 | * - len is not a multiple of the page size, or is 0, or |
| 738 | * - offset is negative |
| 739 | *- ENODEV |
| 740 | * - The remote node is lost. |
| 741 | *- ENOMEM |
| 742 | * - Not enough space |
| 743 | *- ENOTCONN |
| 744 | * - The endpoint is not connected |
| 745 | *- ENOTTY |
| 746 | * - epd is not a valid endpoint descriptor |
| 747 | */ |
| 748 | off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, |
| 749 | int prot_flags, int map_flags); |
| 750 | |
| 751 | /** |
| 752 | * scif_unregister - Mark a memory region for remote access. |
| 753 | * \param epd endpoint descriptor |
| 754 | * \param offset start of range to unregister |
| 755 | * \param len length of range to unregister |
| 756 | * |
| 757 | * The scif_unregister() function closes those previously registered windows |
| 758 | * which are entirely within the range [offset,offset+len-1]. It is an error to |
| 759 | * specify a range which intersects only a subrange of a window. |
| 760 | * |
| 761 | * On a successful return, pages within the window may no longer be specified |
| 762 | * in calls to scif_mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(), |
| 763 | * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, however, |
| 764 | * continues to exist until all previous references against it are removed. A |
| 765 | * window is referenced if there is a mapping to it created by scif_mmap(), or if |
| 766 | * scif_get_pages() was called against the window (and the pages have not been |
| 767 | * returned via scif_put_pages()). A window is also referenced while an RMA, in |
| 768 | * which some range of the window is a source or destination, is in progress. |
| 769 | * Finally a window is referenced while some offset in that window was specified |
| 770 | * to scif_fence_signal(), and the RMAs marked by that call to |
| 771 | * scif_fence_signal() have not completed. While a window is in this state, its |
| 772 | * registered address space pages are not available for use in a new registered |
| 773 | * window. |
| 774 | * |
| 775 | * When all such references to the window have been removed, its references to |
| 776 | * all the physical pages which it represents are removed. Similarly, the |
| 777 | * registered address space pages of the window become available for |
| 778 | * registration in a new window. |
| 779 | * |
| 780 | *\return |
| 781 | * Upon successful completion, scif_unregister() returns 0; otherwise: in user |
| 782 | * mode -1 is returned and errno is set to indicate the error; in kernel mode |
| 783 | * the negative of one of the following errors is returned. In the event of an |
| 784 | * error, no windows are unregistered. |
| 785 | * |
| 786 | *\par Errors: |
| 787 | *- EBADF |
| 788 | * - epd is not a valid endpoint descriptor |
| 789 | *- ECONNRESET |
| 790 | * - A connection was forcibly closed by a peer. |
| 791 | *- EINVAL |
| 792 | * - epd is not a valid endpoint descriptor, or |
| 793 | * - The range [offset,offset+len-1] intersects a subrange of a window, or |
| 794 | * - offset is negative |
| 795 | *- ENODEV |
| 796 | * -The remote node is lost. |
| 797 | *- ENOTCONN |
| 798 | * - The endpoint is not connected |
| 799 | *- ENOTTY |
| 800 | * - epd is not a valid endpoint descriptor |
| 801 | *- ENXIO |
| 802 | * - Addresses in the range [offset,offset+len-1] are invalid for the |
| 803 | * registered address space of epd. |
| 804 | */ |
| 805 | int scif_unregister(scif_epd_t epd, off_t offset, size_t len); |
| 806 | |
| 807 | |
| 808 | /** |
| 809 | * scif_readfrom - Copy from a remote address space |
| 810 | * \param epd endpoint descriptor |
| 811 | * \param loffset offset in local registered address space to |
| 812 | * which to copy |
| 813 | * \param len length of range to copy |
| 814 | * \param roffset offset in remote registered address space |
| 815 | * from which to copy |
| 816 | * \param rma_flags transfer mode flags |
| 817 | * |
| 818 | * scif_readfrom() copies len bytes from the remote registered address space of |
| 819 | * the peer of endpoint epd, starting at the offset roffset to the local |
| 820 | * registered address space of epd, starting at the offset loffset. |
| 821 | * |
| 822 | * Each of the specified ranges [loffset,loffset+len-1] and [roffset,roffset+ |
| 823 | * len-1] must be within some registered window or windows of the local and |
| 824 | * remote nodes respectively. A range may intersect multiple registered |
| 825 | * windows, but only if those windows are contiguous in the registered address |
| 826 | * space. |
| 827 | * |
| 828 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using |
| 829 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- |
| 830 | * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the |
| 831 | * transfer is complete. Otherwise, the transfer may be performed asynchron- |
| 832 | * ously. The order in which any two aynchronous RMA operations complete |
| 833 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ |
| 834 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to |
| 835 | * the completion of asynchronous RMA operations. |
| 836 | * |
| 837 | * The DMA transfer of individual bytes is not guaranteed to complete in |
| 838 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last |
| 839 | * cacheline or partial cacheline of the source range will become visible on |
| 840 | * the destination node after all other transferred data in the source |
| 841 | * range has become visible on the destination node. |
| 842 | * |
| 843 | * The optimal DMA performance will likely be realized if both |
| 844 | * loffset and roffset are cacheline aligned (are a multiple of 64). Lower |
| 845 | * performance will likely be realized if loffset and roffset are not |
| 846 | * cacheline aligned but are separated by some multiple of 64. The lowest level |
| 847 | * of performance is likely if loffset and roffset are not separated by a |
| 848 | * multiple of 64. |
| 849 | * |
| 850 | * The rma_flags argument is formed by ORing together zero or more of the |
| 851 | * following values: |
| 852 | *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA |
| 853 | * engine. |
| 854 | *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the |
| 855 | * transfer has completed. Passing this flag might result in |
| 856 | * the API busy waiting and consuming CPU cycles while the DMA |
| 857 | * transfer is in progress. |
| 858 | *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of |
| 859 | * the source range becomes visible on the destination node |
| 860 | * after all other transferred data in the source range has |
| 861 | * become visible on the destination |
| 862 | * |
| 863 | *\return |
| 864 | * Upon successful completion, scif_readfrom() returns 0; otherwise: in user |
| 865 | * mode -1 is returned and errno is set to indicate the error; in kernel mode |
| 866 | * the negative of one of the following errors is returned. |
| 867 | * |
| 868 | *\par Errors |
| 869 | *- EACCESS |
| 870 | * - Attempt to write to a read-only range or read from a write-only range |
| 871 | *- EBADF |
| 872 | * - epd is not a valid endpoint descriptor |
| 873 | *- ECONNRESET |
| 874 | * - A connection was forcibly closed by a peer. |
| 875 | *- EINVAL |
| 876 | * - epd is not a valid endpoint descriptor, or |
| 877 | * - rma_flags is invalid |
| 878 | *- ENODEV |
| 879 | * -The remote node is lost. |
| 880 | *- ENOTCONN |
| 881 | * - The endpoint is not connected |
| 882 | *- ENOTTY |
| 883 | * - epd is not a valid endpoint descriptor |
| 884 | *- ENXIO |
| 885 | * - The range [loffset,loffset+len-1] is invalid for the registered address |
| 886 | * space of epd, or, |
| 887 | * - The range [roffset,roffset+len-1] is invalid for the registered address |
| 888 | * space of the peer of epd, or |
| 889 | * - loffset or roffset is negative |
| 890 | */ |
| 891 | int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t |
| 892 | roffset, int rma_flags); |
| 893 | |
| 894 | /** |
| 895 | * scif_writeto - Copy to a remote address space |
| 896 | * \param epd endpoint descriptor |
| 897 | * \param loffset offset in local registered address space |
| 898 | * from which to copy |
| 899 | * \param len length of range to copy |
| 900 | * \param roffset offset in remote registered address space to |
| 901 | * which to copy |
| 902 | * \param rma_flags transfer mode flags |
| 903 | * |
| 904 | * scif_writeto() copies len bytes from the local registered address space of |
| 905 | * epd, starting at the offset loffset to the remote registered address space |
| 906 | * of the peer of endpoint epd, starting at the offset roffset. |
| 907 | * |
| 908 | * Each of the specified ranges [loffset,loffset+len-1] and [roffset,roffset+ |
| 909 | * len-1] must be within some registered window or windows of the local and |
| 910 | * remote nodes respectively. A range may intersect multiple registered |
| 911 | * windows, but only if those windows are contiguous in the registered address |
| 912 | * space. |
| 913 | * |
| 914 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using |
| 915 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- |
| 916 | * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the |
| 917 | * transfer is complete. Otherwise, the transfer may be performed asynchron- |
| 918 | * ously. The order in which any two aynchronous RMA operations complete |
| 919 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ |
| 920 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to |
| 921 | * the completion of asynchronous RMA operations. |
| 922 | * |
| 923 | * The DMA transfer of individual bytes is not guaranteed to complete in |
| 924 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last |
| 925 | * cacheline or partial cacheline of the source range will become visible on |
| 926 | * the destination node after all other transferred data in the source |
| 927 | * range has become visible on the destination node. |
| 928 | * |
| 929 | * The optimal DMA performance will likely be realized if both |
| 930 | * loffset and roffset are cacheline aligned (are a multiple of 64). Lower |
| 931 | * performance will likely be realized if loffset and roffset are not cacheline |
| 932 | * aligned but are separated by some multiple of 64. The lowest level of |
| 933 | * performance is likely if loffset and roffset are not separated by a multiple |
| 934 | * of 64. |
| 935 | * |
| 936 | * The rma_flags argument is formed by ORing together zero or more of the |
| 937 | * following values: |
| 938 | *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA |
| 939 | * engine. |
| 940 | *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the |
| 941 | * transfer has completed. Passing this flag might result in |
| 942 | * the API busy waiting and consuming CPU cycles while the DMA |
| 943 | * transfer is in progress. |
| 944 | *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of |
| 945 | * the source range becomes visible on the destination node |
| 946 | * after all other transferred data in the source range has |
| 947 | * become visible on the destination |
| 948 | * |
| 949 | *\return |
| 950 | * Upon successful completion, scif_readfrom() returns 0; otherwise: in user |
| 951 | * mode -1 is returned and errno is set to indicate the error; in kernel mode |
| 952 | * the negative of one of the following errors is returned. |
| 953 | * |
| 954 | *\par Errors: |
| 955 | *- EACCESS |
| 956 | * - Attempt to write to a read-only range or read from a write-only range |
| 957 | *- EBADF |
| 958 | * - epd is not a valid endpoint descriptor |
| 959 | *- ECONNRESET |
| 960 | * - A connection was forcibly closed by a peer. |
| 961 | *- EINVAL |
| 962 | * - epd is not a valid endpoint descriptor, or |
| 963 | * - rma_flags is invalid |
| 964 | *- ENODEV |
| 965 | * - The remote node is lost. |
| 966 | *- ENOTCONN |
| 967 | * - The endpoint is not connected |
| 968 | *- ENOTTY |
| 969 | * - epd is not a valid endpoint descriptor |
| 970 | *- ENXIO |
| 971 | * - The range [loffset,loffset+len-1] is invalid for the registered address |
| 972 | * space of epd, or, |
| 973 | * - The range [roffset , roffset + len -1] is invalid for the registered |
| 974 | * address space of the peer of epd, or |
| 975 | * - loffset or roffset is negative |
| 976 | */ |
| 977 | int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t |
| 978 | roffset, int rma_flags); |
| 979 | |
| 980 | /** |
| 981 | * scif_vreadfrom - Copy from a remote address space |
| 982 | * \param epd endpoint descriptor |
| 983 | * \param addr address to which to copy |
| 984 | * \param len length of range to copy |
| 985 | * \param roffset offset in remote registered address space |
| 986 | * from which to copy |
| 987 | * \param rma_flags transfer mode flags |
| 988 | * |
| 989 | * scif_vreadfrom() copies len bytes from the remote registered address |
| 990 | * space of the peer of endpoint epd, starting at the offset roffset, to local |
| 991 | * memory, starting at addr. addr is interpreted as a user space address. |
| 992 | * |
| 993 | * The specified range [roffset,roffset+len-1] must be within some registered |
| 994 | * window or windows of the remote nodes respectively. The range may intersect |
| 995 | * multiple registered windows, but only if those windows are contiguous in the |
| 996 | * registered address space. |
| 997 | * |
| 998 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using |
| 999 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- |
| 1000 | * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the |
| 1001 | * transfer is complete. Otherwise, the transfer may be performed asynchron- |
| 1002 | * ously. The order in which any two aynchronous RMA operations complete |
| 1003 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ |
| 1004 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to |
| 1005 | * the completion of asynchronous RMA operations. |
| 1006 | * |
| 1007 | * The DMA transfer of individual bytes is not guaranteed to complete in |
| 1008 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last |
| 1009 | * cacheline or partial cacheline of the source range will become visible on |
| 1010 | * the destination node after all other transferred data in the source |
| 1011 | * range has become visible on the destination node. |
| 1012 | * |
| 1013 | * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back |
| 1014 | * the specified local memory range may be remain in a pinned state even after |
| 1015 | * the specified transfer completes. This may reduce overhead if some or all of |
| 1016 | * the same virtual address range is referenced in a subsequent call of |
| 1017 | * scif_vreadfrom() or scif_vwriteto(). |
| 1018 | * |
| 1019 | * The optimal DMA performance will likely be realized if both |
| 1020 | * loffset and roffset are cacheline aligned (are a multiple of 64). Lower |
| 1021 | * performance will likely be realized if loffset and roffset are not |
| 1022 | * cacheline aligned but are separated by some multiple of 64. The lowest level |
| 1023 | * of performance is likely if loffset and roffset are not separated by a |
| 1024 | * multiple of 64. |
| 1025 | * |
| 1026 | * The rma_flags argument is formed by ORing together zero or more of the |
| 1027 | * following values: |
| 1028 | *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA |
| 1029 | * engine. |
| 1030 | *- SCIF_RMA_USECACHE: enable registration caching |
| 1031 | *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the |
| 1032 | * transfer has completed. Passing this flag might result in |
| 1033 | * the API busy waiting and consuming CPU cycles while the DMA |
| 1034 | * transfer is in progress. |
| 1035 | *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of |
| 1036 | * the source range becomes visible on the destination node |
| 1037 | * after all other transferred data in the source range has |
| 1038 | * become visible on the destination |
| 1039 | * |
| 1040 | *\return |
| 1041 | * Upon successful completion, scif_vreadfrom() returns 0; otherwise: in user |
| 1042 | * mode -1 is returned and errno is set to indicate the error; in kernel mode |
| 1043 | * the negative of one of the following errors is returned. |
| 1044 | * |
| 1045 | *\par Errors: |
| 1046 | *- EACCESS |
| 1047 | * - Attempt to write to a read-only range or read from a write-only range |
| 1048 | *- EBADF |
| 1049 | * - epd is not a valid endpoint descriptor |
| 1050 | *- ECONNRESET |
| 1051 | * - A connection was forcibly closed by a peer. |
| 1052 | *- EFAULT |
| 1053 | * - Addresses in the range [addr,addr+len-1] are invalid |
| 1054 | *- EINVAL |
| 1055 | * - epd is not a valid endpoint descriptor, or |
| 1056 | * - rma_flags is invalid |
| 1057 | *- ENODEV |
| 1058 | * - The remote node is lost. |
| 1059 | *- ENOTCONN |
| 1060 | * - The endpoint is not connected |
| 1061 | *- ENOTTY |
| 1062 | * - epd is not a valid endpoint descriptor |
| 1063 | *- ENXIO |
| 1064 | * - Addresses in the range [roffset,roffset+len-1] are invalid for the |
| 1065 | * registered address space of epd. |
| 1066 | */ |
| 1067 | int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t offset, |
| 1068 | int rma_flags); |
| 1069 | |
| 1070 | /** |
| 1071 | * scif_vwriteto - Copy to a remote address space |
| 1072 | * \param epd endpoint descriptor |
| 1073 | * \param addr address from which to copy |
| 1074 | * \param len length of range to copy |
| 1075 | * \param roffset offset in remote registered address space to |
| 1076 | * which to copy |
| 1077 | * \param rma_flags transfer mode flags |
| 1078 | * |
| 1079 | * scif_vwriteto() copies len bytes from the local memory, starting at addr, to |
| 1080 | * the remote registered address space of the peer of endpoint epd, starting at |
| 1081 | * the offset roffset. addr is interpreted as a user space address. |
| 1082 | * |
| 1083 | * The specified range [roffset,roffset+len-1] must be within some registered |
| 1084 | * window or windows of the remote nodes respectively. The range may intersect |
| 1085 | * multiple registered windows, but only if those windows are contiguous in the |
| 1086 | * registered address space. |
| 1087 | * |
| 1088 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using |
| 1089 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- |
| 1090 | * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the |
| 1091 | * transfer is complete. Otherwise, the transfer may be performed asynchron- |
| 1092 | * ously. The order in which any two aynchronous RMA operations complete |
| 1093 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ |
| 1094 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to |
| 1095 | * the completion of asynchronous RMA operations. |
| 1096 | * |
| 1097 | * The DMA transfer of individual bytes is not guaranteed to complete in |
| 1098 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last |
| 1099 | * cacheline or partial cacheline of the source range will become visible on |
| 1100 | * the destination node after all other transferred data in the source |
| 1101 | * range has become visible on the destination node. |
| 1102 | * |
| 1103 | * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back |
| 1104 | * the specified local memory range may be remain in a pinned state even after |
| 1105 | * the specified transfer completes. This may reduce overhead if some or all of |
| 1106 | * the same virtual address range is referenced in a subsequent call of |
| 1107 | * scif_vreadfrom() or scif_vwriteto(). |
| 1108 | * |
| 1109 | * The optimal DMA performance will likely be realized if both |
| 1110 | * addr and offset are cacheline aligned (are a multiple of 64). Lower |
| 1111 | * performance will likely be realized if addr and offset are not cacheline |
| 1112 | * aligned but are separated by some multiple of 64. The lowest level of |
| 1113 | * performance is likely if addr and offset are not separated by a multiple of |
| 1114 | * 64. |
| 1115 | * |
| 1116 | * The rma_flags argument is formed by ORing together zero or more of the |
| 1117 | * following values: |
| 1118 | *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA |
| 1119 | * engine. |
| 1120 | *- SCIF_RMA_USECACHE: allow registration caching |
| 1121 | *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the |
| 1122 | * transfer has completed. Passing this flag might result in |
| 1123 | * the API busy waiting and consuming CPU cycles while the DMA |
| 1124 | * transfer is in progress. |
| 1125 | *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of |
| 1126 | * the source range becomes visible on the destination node |
| 1127 | * after all other transferred data in the source range has |
| 1128 | * become visible on the destination |
| 1129 | * |
| 1130 | *\return |
| 1131 | * Upon successful completion, scif_vwriteto () returns 0; otherwise: in user |
| 1132 | * mode -1 is returned and errno is set to indicate the error; in kernel mode |
| 1133 | * the negative of one of the following errors is returned. |
| 1134 | * |
| 1135 | *\par Errors: |
| 1136 | *- EACCESS |
| 1137 | * - Attempt to write to a read-only range or read from a write-only range |
| 1138 | *- EBADF |
| 1139 | * - epd is not a valid endpoint descriptor |
| 1140 | *- ECONNRESET |
| 1141 | * - A connection was forcibly closed by a peer. |
| 1142 | *- EFAULT |
| 1143 | * - Addresses in the range [addr,addr+len-1] are invalid |
| 1144 | *- EINVAL |
| 1145 | * - epd is not a valid endpoint descriptor, or |
| 1146 | * - rma_flags is invalid |
| 1147 | *- ENODEV |
| 1148 | * - The remote node is lost. |
| 1149 | *- ENOTCONN |
| 1150 | * - The endpoint is not connected |
| 1151 | *- ENOTTY |
| 1152 | * - epd is not a valid endpoint descriptor |
| 1153 | *- ENXIO |
| 1154 | * - Addresses in the range [roffset,roffset+len-1] are invalid for the |
| 1155 | * registered address space of epd. |
| 1156 | */ |
| 1157 | int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t offset, |
| 1158 | int rma_flags); |
| 1159 | |
| 1160 | /** |
| 1161 | * scif_fence_mark - Mark previously issued RMAs |
| 1162 | * \param epd endpoint descriptor |
| 1163 | * \param flags control flags |
| 1164 | * \param mark marked handle returned as output. |
| 1165 | * |
| 1166 | * scif_fence_mark() returns after marking the current set of all uncompleted |
| 1167 | * RMAs initiated through the endpoint epd or the current set of all |
| 1168 | * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are |
| 1169 | * marked with a value returned at mark. The application may subsequently call |
| 1170 | * scif_fence_wait(), passing the value returned at mark, to await completion |
| 1171 | * of all RMAs so marked. |
| 1172 | * |
| 1173 | * The flags argument has exactly one of the following values: |
| 1174 | *- SCIF_FENCE_INIT_SELF: RMA operations initiated through endpoint |
| 1175 | * epd are marked |
| 1176 | *- SCIF_FENCE_INIT_PEER: RMA operations initiated through the peer |
| 1177 | * of endpoint epd are marked |
| 1178 | * |
| 1179 | * \return |
| 1180 | * Upon successful completion, scif_fence_mark() returns 0; otherwise: in user |
| 1181 | * mode -1 is returned and errno is set to indicate the error; in kernel mode |
| 1182 | * the negative of one of the following errors is returned. |
| 1183 | * |
| 1184 | *\par Errors: |
| 1185 | *- EBADF |
| 1186 | * - epd is not a valid endpoint descriptor |
| 1187 | *- ECONNRESET |
| 1188 | * - A connection was forcibly closed by a peer. |
| 1189 | *- EINVAL |
| 1190 | * - flags is invalid, or |
| 1191 | * - epd is not a valid endpoint descriptor, or |
| 1192 | *- ENODEV |
| 1193 | * - The remote node is lost. |
| 1194 | *- ENOTCONN |
| 1195 | * - The endpoint is not connected |
| 1196 | *- ENOMEM |
| 1197 | * - Insufficient kernel memory was available. |
| 1198 | *- ENOTTY |
| 1199 | * - epd is not a valid endpoint descriptor |
| 1200 | */ |
| 1201 | int scif_fence_mark(scif_epd_t epd, int flags, int *mark); |
| 1202 | |
| 1203 | /** |
| 1204 | * scif_fence_wait - Wait for completion of marked RMAs |
| 1205 | * |
| 1206 | * \param epd endpoint descriptor |
| 1207 | * \param mark mark request |
| 1208 | * |
| 1209 | * scif_fence_wait() returns after all RMAs marked with mark have completed. |
| 1210 | * The value passed in mark must have been obtained in a previous call to |
| 1211 | * scif_fence_mark(). |
| 1212 | * |
| 1213 | *\return |
| 1214 | * Upon successful completion, scif_fence_wait() returns 0; otherwise: in user |
| 1215 | * mode -1 is returned and errno is set to indicate the error; in kernel mode |
| 1216 | * the negative of one of the following errors is returned. |
| 1217 | * |
| 1218 | *\par Errors: |
| 1219 | *- EBADF |
| 1220 | * - epd is not a valid endpoint descriptor |
| 1221 | *- ECONNRESET |
| 1222 | * - A connection was forcibly closed by a peer. |
| 1223 | *- EINVAL |
| 1224 | * - epd is not a valid endpoint descriptor, or |
| 1225 | *- ENODEV |
| 1226 | * - The remote node is lost. |
| 1227 | *- ENOTCONN |
| 1228 | * - The endpoint is not connected |
| 1229 | *- ENOMEM |
| 1230 | * - Insufficient kernel memory was available. |
| 1231 | *- ENOTTY |
| 1232 | * - epd is not a valid endpoint descriptor |
| 1233 | */ |
| 1234 | int scif_fence_wait(scif_epd_t epd, int mark); |
| 1235 | |
| 1236 | /** |
| 1237 | * scif_fence_signal - Request a signal on completion of RMAs |
| 1238 | * \param loff local offset |
| 1239 | * \param lval local value to write to loffset |
| 1240 | * \param roff remote offset |
| 1241 | * \param rval remote value to write to roffset |
| 1242 | * \param flags flags |
| 1243 | * |
| 1244 | * scif_fence_signal() returns after marking the current set of all uncompleted |
| 1245 | * RMAs initiated through the endpoint epd or marking the current set of all |
| 1246 | * uncompleted RMAs initiated through the peer of endpoint epd. |
| 1247 | * |
| 1248 | * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the |
| 1249 | * marked set, lval is written to memory at the address corresponding to offset |
| 1250 | * loff in the local registered address space of epd. loff must be within a |
| 1251 | * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion |
| 1252 | * of the RMAs in the marked set, rval is written to memory at the * address |
| 1253 | * corresponding to offset roff in the remote registered address space of epd. |
| 1254 | * roff must be within a remote registered window of the peer of epd. Note |
| 1255 | * that any specified offset must be DWORD (4 byte / 32 bit) aligned. |
| 1256 | * |
| 1257 | * The flags argument is formed by OR'ing together the following: |
| 1258 | *- Exactly one of the following values: |
| 1259 | * - SCIF_FENCE_INIT_SELF: RMA operations initiated through endpoint |
| 1260 | * epd are marked |
| 1261 | * - SCIF_FENCE_INIT_PEER: RMA operations initiated through the peer |
| 1262 | * of endpoint epd are marked |
| 1263 | *- One or more of the following values: |
| 1264 | * - SCIF_SIGNAL_LOCAL: On completion of the marked set of RMAs, write lval to |
| 1265 | * memory at the address corresponding to offset loff in the local registered |
| 1266 | * address space of epd. |
| 1267 | * - SCIF_SIGNAL_REMOTE: On completion of the marked set of RMAs, write lval to |
| 1268 | * memory at the address corresponding to offset roff in the remote registered |
| 1269 | * address space of epd. |
| 1270 | * |
| 1271 | *\return |
| 1272 | * Upon successful completion, scif_fence_signal() returns 0; otherwise: in |
| 1273 | * user mode -1 is returned and errno is set to indicate the error; in kernel |
| 1274 | * mode the negative of one of the following errors is returned. |
| 1275 | *\par Errors: |
| 1276 | *- EBADF |
| 1277 | * - epd is not a valid endpoint descriptor |
| 1278 | *- ECONNRESET |
| 1279 | * - A connection was forcibly closed by a peer. |
| 1280 | *- EINVAL |
| 1281 | * - epd is not a valid endpoint descriptor, or |
| 1282 | * - flags is invalid, or |
| 1283 | * - loff or roff are not DWORD aligned |
| 1284 | *- ENODEV |
| 1285 | * - The remote node is lost. |
| 1286 | *- ENOTCONN |
| 1287 | * - The endpoint is not connected |
| 1288 | *- ENOTTY |
| 1289 | * - epd is not a valid endpoint descriptor |
| 1290 | *- ENXIO |
| 1291 | * - loff is invalid for the registered address of epd, or |
| 1292 | * - roff is invalid for the registered address space, of the peer of epd |
| 1293 | */ |
| 1294 | int scif_fence_signal(scif_epd_t epd, off_t loff, uint64_t lval, off_t roff, |
| 1295 | uint64_t rval, int flags); |
| 1296 | |
| 1297 | /** |
| 1298 | * scif_get_nodeIDs - Return information about online nodes |
| 1299 | * \param nodes array in which to return online node IDs |
| 1300 | * \param len number of entries in the nodes array |
| 1301 | * \param self address to place the node ID of the local node |
| 1302 | * |
| 1303 | * scif_get_nodeIDs() fills in the nodes array with up to len node IDs of the |
| 1304 | * nodes in the SCIF network. If there is not enough space in nodes, as |
| 1305 | * indicated by the len parameter, only len node IDs are returned in nodes. The |
| 1306 | * return value of scif_get_nodeID() is the total number of nodes currently in |
| 1307 | * the SCIF network. By checking the return value against the len parameter, the user may |
| 1308 | * determine if enough space for nodes was allocated. |
| 1309 | * |
| 1310 | * The node ID of the local node is returned at self. |
| 1311 | * |
| 1312 | *\return |
| 1313 | * Upon successful completion, scif_get_nodeIDs() returns the actual number of |
| 1314 | * online nodes in the SCIF network including 'self'; otherwise: in user mode |
| 1315 | * -1 is returned and errno is set to indicate the error; in kernel mode no |
| 1316 | * errors are returned. |
| 1317 | * |
| 1318 | *\par Errors: |
| 1319 | *- EFAULT |
| 1320 | * - Bad address |
| 1321 | */ |
| 1322 | int scif_get_nodeIDs(uint16_t *nodes, int len, uint16_t *self); |
| 1323 | |
| 1324 | |
| 1325 | /** |
| 1326 | * scif_pin_pages - Pin a set of pages |
| 1327 | * \param addr Virtual address of range to pin |
| 1328 | * \param len Length of range to pin |
| 1329 | * \param prot_flags Page protection flags |
| 1330 | * \param map_flags Page classification flags |
| 1331 | * \param pinned_pages Opaque handle of pinned pages |
| 1332 | * |
| 1333 | * scif_pin_pages() pins (locks in physical memory) the physical pages which |
| 1334 | * back the range of virtual address pages starting at addr and continuing for |
| 1335 | * len bytes. addr and len are constrained to be multiples of the page size. A |
| 1336 | * successful scif_register() call returns an opaque pointer value at |
| 1337 | * pinned_pages which may be used in subsequent calls to |
| 1338 | * scif_register_pinned_pages(). |
| 1339 | * |
| 1340 | * The pages will remain pinned as long as there is a reference against the |
| 1341 | * scif_pinned_pages_t value returned by scif_pin_pages() and until |
| 1342 | * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A |
| 1343 | * reference is added to a scif_pinned_pages_t value each time a window is |
| 1344 | * created by calling scif_register_pinned_pages() and passing the |
| 1345 | * scif_pinned_pages_t value. A reference is removed from a scif_pinned_pages_t value |
| 1346 | * each time such a window is deleted. |
| 1347 | * |
| 1348 | * Subsequent operations which change the memory pages to which virtual |
| 1349 | * addresses are mapped (such as mmap(), munmap(), scif_mmap() and |
| 1350 | * scif_munmap()) have no effect on the scif_pinned_pages_t value or windows |
| 1351 | * created against it. |
| 1352 | * |
| 1353 | * On Linux, if the process will fork(), it is recommended that the registered |
| 1354 | * virtual address range be marked with MADV_DONTFORK. Doing so will prevent |
| 1355 | * problems due to copy-on-write semantics. |
| 1356 | * |
| 1357 | * The prot_flags argument is formed by OR'ing together one or more of the |
| 1358 | * following values: |
| 1359 | *- SCIF_PROT_READ: allow read operations against the pages |
| 1360 | *- SCIF_PROT_WRITE: allow write operations against the pages |
| 1361 | * The map_flags argument is formed by OR'ing together zero or more of the |
| 1362 | * following values: |
| 1363 | *- SCIF_MAP_KERNEL: interpret addr as a kernel space address. By default, addr |
| 1364 | * is interpreted as a user space address. |
| 1365 | * |
| 1366 | *\return |
| 1367 | * Upon successful completion, scif_register() returns 0; otherwise the |
| 1368 | * negative of one of the following errors is returned. |
| 1369 | *\par Errors: |
| 1370 | *- EFAULT |
| 1371 | * - Addresses in the range [addr,addr+len-1] are invalid |
| 1372 | *- EINVAL |
| 1373 | * - prot_flags is invalid, |
| 1374 | * - map_flags is invalid, or |
| 1375 | * - offset is negative |
| 1376 | *- ENOMEM |
| 1377 | * - Not enough space |
| 1378 | */ |
| 1379 | int |
| 1380 | scif_pin_pages( |
| 1381 | void *addr, |
| 1382 | size_t len, |
| 1383 | int prot_flags, |
| 1384 | int map_flags, |
| 1385 | scif_pinned_pages_t *pinned_pages); |
| 1386 | |
| 1387 | /** |
| 1388 | * scif_unpin_pages - Unpin a set of pages |
| 1389 | * \param pinned_pages Opaque handle of pages to be unpinned |
| 1390 | * |
| 1391 | * scif_unpin_pages() prevents scif_register_pinned_pages()from registering new |
| 1392 | * windows against pinned_pages. The physical pages represented by pinned_pages |
| 1393 | * will remain pinned until all windows previously registered against |
| 1394 | * pinned_pages are deleted (the window is scif_unregister()'d and all |
| 1395 | * references to the window are removed (see scif_unregister()). |
| 1396 | * |
| 1397 | * pinned_pages must have been obtain from a previous call to scif_pin_pages(). |
| 1398 | * After calling scif_unpin_pages(), it is an error to pass pinned_pages to |
| 1399 | * scif_register_pinned_pages(). |
| 1400 | * |
| 1401 | *\return: |
| 1402 | * Upon successful completion, scif_unpin_pages() returns 0; otherwise the |
| 1403 | * negative of one of the following errors is returned. |
| 1404 | * |
| 1405 | *\par Errors: |
| 1406 | *- EINVAL |
| 1407 | * - pinned_pages is not valid |
| 1408 | */ |
| 1409 | int |
| 1410 | scif_unpin_pages( |
| 1411 | scif_pinned_pages_t pinned_pages); |
| 1412 | |
| 1413 | /** |
| 1414 | * scif_register_pinned_pages - Mark a memory region for remote access. |
| 1415 | * \param epd Endpoint descriptor |
| 1416 | * \param pinned_pages Opaque handle of pinned pages |
| 1417 | * \param offset Registered address space offset |
| 1418 | * \param map_flags Flags which control where pages are mapped |
| 1419 | * |
| 1420 | * The scif_register_pinned_pages() function opens a window, a range of whole |
| 1421 | * pages of the registered address space of the endpoint epd, starting at |
| 1422 | * offset po. The value of po, further described below, is a function of the |
| 1423 | * parameters offset and pinned_pages, and the value of map_flags. Each page of |
| 1424 | * the window represents a corresponding physical memory page of the range |
| 1425 | * represented by pinned_pages; the length of the window is the same as the |
| 1426 | * length of range represented by pinned_pages. A successful scif_register() |
| 1427 | * call returns po as the return value. |
| 1428 | * |
| 1429 | * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset |
| 1430 | * exactly, and offset is constrained to be a multiple of the page size. The |
| 1431 | * mapping established by scif_register() will not replace any existing |
| 1432 | * registration; an error is returned if any page of the new window would |
| 1433 | * intersect an existing window. |
| 1434 | * |
| 1435 | * When SCIF_MAP_FIXED is not set, the implementation uses offset in an |
| 1436 | * implementation-defined manner to arrive at po. The po so chosen will be an |
| 1437 | * area of the registered address space that the implementation deems suitable |
| 1438 | * for a mapping of the required size. An offset value of 0 is interpreted as |
| 1439 | * granting the implementation complete freedom in selecting po, subject to |
| 1440 | * constraints described below. A non-zero value of offset is taken to be a |
| 1441 | * suggestion of an offset near which the mapping should be placed. When the |
| 1442 | * implementation selects a value for po, it does not replace any extant |
| 1443 | * window. In all cases, po will be a multiple of the page size. |
| 1444 | * |
| 1445 | * The physical pages which are so represented by a window are available for |
| 1446 | * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(), |
| 1447 | * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the |
| 1448 | * physical pages represented by the window will not be reused by the memory |
| 1449 | * subsytem for any other purpose. Note that the same physical page may be |
| 1450 | * represented by multiple windows. |
| 1451 | * |
| 1452 | * Windows created by scif_register_pinned_pages() are unregistered by |
| 1453 | * scif_unregister(). |
| 1454 | * |
| 1455 | * The map_flags argument is formed by OR'ing together zero or more of the |
| 1456 | * following values: |
| 1457 | *- SCIF_MAP_FIXED: interpret offset exactly |
| 1458 | * |
| 1459 | *\return |
| 1460 | * Upon successful completion, scif_register_pinned_pages() returns the offset |
| 1461 | * at which the mapping was placed (po); otherwise the negative of one of the |
| 1462 | * following errors is returned. |
| 1463 | *\par Errors: |
| 1464 | *- EADDRINUSE |
| 1465 | * - SCIF_MAP_FIXED is set in map_flags and pages in the new |
| 1466 | * window would intersect an existing window |
| 1467 | *- EAGAIN |
| 1468 | * - The mapping could not be performed due to lack of resources |
| 1469 | *- ECONNRESET |
| 1470 | * - A connection was forcibly closed by a peer. |
| 1471 | *- EINVAL |
| 1472 | * - epd is not a valid endpoint descriptor, or |
| 1473 | * - map_flags is invalid, or |
| 1474 | * - SCIF_MAP_FIXED is set in map_flags, and offset is not a |
| 1475 | * multiple of the page size, or |
| 1476 | * - offset is negative |
| 1477 | *- ENODEV |
| 1478 | * - The remote node is lost. |
| 1479 | *- ENOMEM |
| 1480 | * - Not enough space |
| 1481 | *- ENOTCONN |
| 1482 | * - The endpoint is not connected |
| 1483 | */ |
| 1484 | off_t |
| 1485 | scif_register_pinned_pages( |
| 1486 | scif_epd_t epd, |
| 1487 | scif_pinned_pages_t pinned_pages, |
| 1488 | off_t offset, |
| 1489 | int map_flags); |
| 1490 | |
| 1491 | /** |
| 1492 | * scif_get_pages - Add references to remote registered pages |
| 1493 | * \param epd endpoint descriptor |
| 1494 | * \param offset registered address space offset |
| 1495 | * \param len length of range of pages |
| 1496 | * \param pages returned scif_range structure |
| 1497 | * |
| 1498 | * scif_get_pages() returns the addresses of the physical pages represented by |
| 1499 | * those pages of the registered address space of the peer of epd, starting at |
| 1500 | * offset and continuing for len bytes. offset and len are constrained to be |
| 1501 | * multiples of the page size. |
| 1502 | * |
| 1503 | * All of the pages in the specified range [offset,offset+len-1] must be within |
| 1504 | * a single window of the registered address space of the peer of epd. |
| 1505 | * |
| 1506 | * The addresses are returned as a virtually contiguous array pointed to by the |
| 1507 | * phys_addr component of the scif_range structure whose address is returned in |
| 1508 | * pages. The nr_pages component of scif_range is the length of the array. The |
| 1509 | * prot_flags component of scif_range holds the protection flag value passed |
| 1510 | * when the pages were registered. |
| 1511 | * |
| 1512 | * Each physical page whose address is returned by scif_get_pages() remains |
| 1513 | * available and will not be released for reuse until the scif_range structure |
| 1514 | * is returned in a call to scif_put_pages(). The scif_range structure returned |
| 1515 | * by scif_get_pages() must be unmodified. |
| 1516 | * |
| 1517 | * It is an error to call scif_close() on an endpoint on which a scif_range |
| 1518 | * structure of that endpoint has not been returned to scif_put_pages(). |
| 1519 | * |
| 1520 | *\return |
| 1521 | * Upon successful completion, scif_get_pages() returns 0; otherwise the |
| 1522 | * negative of one of the following errors is returned. |
| 1523 | *\par Errors: |
| 1524 | *- ECONNRESET |
| 1525 | * - A connection was forcibly closed by a peer. |
| 1526 | *- EINVAL |
| 1527 | * - epd is not a valid endpoint descriptor, or |
| 1528 | * - offset is not a multiple of the page size, or |
| 1529 | * - offset is negative, or |
| 1530 | * - len is not a multiple of the page size |
| 1531 | *- ENODEV |
| 1532 | * -The remote node is lost. |
| 1533 | *- ENOTCONN |
| 1534 | * - The endpoint is not connected |
| 1535 | *- ENXIO |
| 1536 | * - Addresses in the range [offset,offset+len-1] are invalid |
| 1537 | * for the registered address space of the peer epd. |
| 1538 | */ |
| 1539 | int scif_get_pages( |
| 1540 | scif_epd_t epd, |
| 1541 | off_t offset, |
| 1542 | size_t len, |
| 1543 | struct scif_range **pages); |
| 1544 | |
| 1545 | /** |
| 1546 | * scif_put_pages - Remove references from remote registered pages |
| 1547 | * \param pages pages to be returned |
| 1548 | * |
| 1549 | * scif_put_pages() releases a scif_range structure previously obtained by |
| 1550 | * calling scif_get_pages(). The physical pages represented by pages may |
| 1551 | * be reused when the window which represented those pages is unregistered. |
| 1552 | * Therefore, those pages must not be accessed after calling scif_put_pages(). |
| 1553 | * |
| 1554 | *\return |
| 1555 | * Upon successful completion, scif_put_pages() returns 0; otherwise the |
| 1556 | * negative of one of the following errors is returned. |
| 1557 | *\par Errors: |
| 1558 | *- EINVAL |
| 1559 | * - pages does not point to a valid scif_range structure, or |
| 1560 | * - the scif_range structure pointed to by pages was already returned. |
| 1561 | *- ENODEV |
| 1562 | * - The remote node is lost. |
| 1563 | *- ENOTCONN |
| 1564 | * - The endpoint is not connected. |
| 1565 | */ |
| 1566 | int scif_put_pages( |
| 1567 | struct scif_range *pages); |
| 1568 | |
| 1569 | /** |
| 1570 | * scif_poll - Wait for some event on an endpoint |
| 1571 | * \param epds Array of endpoint descriptors |
| 1572 | * \param nepds Length of epds |
| 1573 | * \param timeout Upper limit on time for which scif_poll() will |
| 1574 | * block |
| 1575 | * |
| 1576 | * scif_poll() waits for one of a set of endpoints to become ready to perform |
| 1577 | * an I/O operation. scif_poll() exposes a subset of the functionality of the |
| 1578 | * POSIX standard poll() function. |
| 1579 | * |
| 1580 | * The epds argument specifies the endpoint descriptors to be examined and the |
| 1581 | * events of interest for each endpoint descriptor. epds is a pointer to an |
| 1582 | * array with one member for each open endpoint descriptor of interest. |
| 1583 | * |
| 1584 | * The number of items in the epds array is specified in nepds. The epd field |
| 1585 | * of scif_pollepd is an endpoint descriptor of an open endpoint. The field |
| 1586 | * events is a bitmask specifying the events which the application is |
| 1587 | * interested in. The field revents is an output parameter, filled by the |
| 1588 | * kernel with the events that actually occurred. The bits returned in revents |
| 1589 | * can include any of those specified in events, or one of the values |
| 1590 | * SCIF_POLLERR, SCIF_POLLHUP, or SCIF_POLLNVAL. (These three bits are |
| 1591 | * meaningless in the events field, and will be set in the revents field |
| 1592 | * whenever the corresponding condition is true.) |
| 1593 | * |
| 1594 | * If none of the events requested (and no error) has occurred for any of the |
| 1595 | * endpoint descriptors, then scif_poll() blocks until one of the events occurs. |
| 1596 | * |
| 1597 | * The timeout argument specifies an upper limit on the time for which |
| 1598 | * scif_poll() will block, in milliseconds. Specifying a negative value in |
| 1599 | * timeout means an infinite timeout. |
| 1600 | * |
| 1601 | * The following bits may be set in events and returned in revents: |
| 1602 | *- SCIF_POLLIN: Data may be received without blocking. For a connected |
| 1603 | * endpoint, this means that scif_recv() may be called without blocking. For a |
| 1604 | * listening endpoint, this means that scif_accept() may be called without |
| 1605 | * blocking. |
| 1606 | *- SCIF_POLLOUT: Data may be sent without blocking. For a connected endpoint, |
| 1607 | * this means that scif_send() may be called without blocking. This bit value |
| 1608 | * has no meaning for a listening endpoint and is ignored if specified. |
| 1609 | * |
| 1610 | * The following bits are only returned in revents, and are ignored if set in |
| 1611 | * events: |
| 1612 | *- SCIF_POLLERR: An error occurred on the endpoint |
| 1613 | *- SCIF_POLLHUP: The connection to the peer endpoint was disconnected |
| 1614 | *- SCIF_POLLNVAL: The specified endpoint descriptor is invalid. |
| 1615 | * |
| 1616 | *\return |
| 1617 | * Upon successful completion, scif_poll()returns a non-negative value. A |
| 1618 | * positive value indicates the total number of endpoint descriptors that have |
| 1619 | * been selected (that is, endpoint descriptors for which the revents member is |
| 1620 | * non-zero. A value of 0 indicates that the call timed out and no endpoint |
| 1621 | * descriptors have been selected. Otherwise: in user mode -1 is returned and |
| 1622 | * errno is set to indicate the error; in kernel mode the negative of one of |
| 1623 | * the following errors is returned. |
| 1624 | * |
| 1625 | *\par Errors: |
| 1626 | *- EFAULT |
| 1627 | * - The array given as argument was not contained in the calling program's |
| 1628 | * address space. |
| 1629 | *- EINTR |
| 1630 | * - A signal occurred before any requested event. |
| 1631 | *- EINVAL |
| 1632 | * - The nepds argument is greater than {OPEN_MAX} |
| 1633 | *- ENOMEM |
| 1634 | * - There was no space to allocate file descriptor tables. |
| 1635 | */ |
| 1636 | int |
| 1637 | scif_poll( |
| 1638 | struct scif_pollepd *epds, |
| 1639 | unsigned int nepds, |
| 1640 | long timeout); |
| 1641 | |
| 1642 | /** |
| 1643 | * scif_event_register - Register an event handler |
| 1644 | * \param handler Event handler to be registered |
| 1645 | * |
| 1646 | * scif_event_register() registers a routine, handler, to be called when some |
| 1647 | * event occurs. The event parameter to handler indicates the type of event |
| 1648 | * which has occurred, and the corresponding component of the data parameter to |
| 1649 | * handler provides additional data about the event. |
| 1650 | * |
| 1651 | * The following events are defined: |
| 1652 | *- SCIF_NODE_ADDED: A node has been added to the SCIF network. The |
| 1653 | * scif_node_added component of the data parameter to handler identifies the |
| 1654 | * node. This event is informational. There are no requirements on the event |
| 1655 | * handler. |
| 1656 | *- SCIF_NODE_REMOVED: A node is being removed from the SCIF network. The |
| 1657 | * scif_node_removed component of the data parameter to handler identifies the |
| 1658 | * node. Upon being called, and before returning, the event handler must |
| 1659 | * return, using scif_put_pages(), all structures obtained using |
| 1660 | * scif_get_pages() against an endpoint connected to the lost node. It is |
| 1661 | * recommended and expected that the handler will also scif_close() all |
| 1662 | * endpoints connected to the lost node. |
| 1663 | * |
| 1664 | *\return |
| 1665 | * Upon successful completion scif_event_register() returns 0. |
| 1666 | * |
| 1667 | *\par Errors: |
| 1668 | *- ENOMEM |
| 1669 | * - There was no space to allocate file descriptor tables. |
| 1670 | */ |
| 1671 | |
| 1672 | int |
| 1673 | scif_event_register( |
| 1674 | scif_callback_t handler); |
| 1675 | |
| 1676 | /** |
| 1677 | * scif_event_unregister - Unregister event handler |
| 1678 | * \param handler Event handler to be unregistered |
| 1679 | * |
| 1680 | * scif_event_unregister() unregisters the handler which was registered |
| 1681 | * previously by using scif_event_register(). |
| 1682 | * |
| 1683 | * WARNING: scif_event_unregister must be called before the module |
| 1684 | * (that registered handles) exits for every handler that is registered. |
| 1685 | * Failure to do so will result in crash of the scif module. |
| 1686 | * |
| 1687 | *\return |
| 1688 | * Upon successful completion scif_event_unregister() returns 0. |
| 1689 | *\par Errors: |
| 1690 | *- EINVAL |
| 1691 | * -If the event handler was not found/registered. |
| 1692 | */ |
| 1693 | int |
| 1694 | scif_event_unregister( |
| 1695 | scif_callback_t handler); |
| 1696 | |
| 1697 | /* |
| 1698 | * Note: The callee can use pci_resource_start(dev, index) and |
| 1699 | * pci_resource_len(dev, index) to obtain the PCI resource starting |
| 1700 | * physical address and length for valid non null indexes of the va |
| 1701 | * array. MMIO bars will not have IORESOURCE_PREFETCH set in the |
| 1702 | * flags obtained from pci_resource_flags(dev, index). va[index] |
| 1703 | * will be set to NULL for invalid resources. |
| 1704 | */ |
| 1705 | struct scif_pci_info { |
| 1706 | /* pci_dev pointer associated with a node */ |
| 1707 | struct pci_dev *pdev; |
| 1708 | /* Ioremapped virtual address base for every valid PCIe resource */ |
| 1709 | void __iomem *va[PCI_NUM_RESOURCES]; |
| 1710 | }; |
| 1711 | |
| 1712 | /** |
| 1713 | * scif_pci_info - Populate the scif_pci_info structure for a node. |
| 1714 | * \param node The node to query |
| 1715 | * \param dev The scif_pci_info structure to populate. |
| 1716 | * |
| 1717 | * scif_pci_info() populates the provided scif_pci_info structure |
| 1718 | * associated with a node. The requested node ID cannot be the same as |
| 1719 | * the current node. This routine will only return success when called from |
| 1720 | * the host. |
| 1721 | * |
| 1722 | *\return |
| 1723 | * Upon successful completion, scif_pci_info() returns 0; otherwise the |
| 1724 | * negative of one of the following errors is returned. |
| 1725 | * |
| 1726 | *\par Errors: |
| 1727 | *- EINVAL |
| 1728 | * - The requested node is not valid. |
| 1729 | * - Called on MIC instead of the host. |
| 1730 | *- ENODEV |
| 1731 | * - No pci_dev association exists for the node. |
| 1732 | */ |
| 1733 | int |
| 1734 | scif_pci_info( |
| 1735 | uint16_t node, |
| 1736 | struct scif_pci_info *dev); |
| 1737 | |
| 1738 | |
| 1739 | #ifdef __cplusplus |
| 1740 | } /* extern "C" */ |
| 1741 | #endif |
| 1742 | |
| 1743 | #endif /* __SCIF_H__ */ |