40 #include <sys/types.h>
42 #include <sys/socket.h>
45 #include <sys/ioctl.h>
46 #include <sys/param.h>
47 #include <netinet/in.h>
48 #include <arpa/inet.h>
62 #include <sys/types.h>
63 #include <sys/socket.h>
65 #include <rdma/rdma_cma.h>
74 #include <qb/qbdefs.h>
75 #include <qb/qbloop.h>
76 #define LOGSYS_UTILS_ONLY 1
80 #define COMPLETION_QUEUE_ENTRIES 100
82 #define TOTAL_READ_POSTS 100
84 #define MAX_MTU_SIZE 4096
110 unsigned int msg_len);
194 const char *
function,
218 #define log_printf(level, format, args...) \
220 instance->totemiba_log_printf ( \
222 instance->totemiba_subsys_id, \
223 __FUNCTION__, __FILE__, __LINE__, \
224 (const char *)format, ##args); \
243 void2wrid (
void *v) {
union u u; u.
v =
v;
return u.
wr_id; }
258 static inline struct send_buf *mcast_send_buf_get (
269 send_buf = malloc (
sizeof (
struct send_buf));
270 if (send_buf == NULL) {
273 send_buf->
mr = ibv_reg_mr (instance->
mcast_pd,
275 2048, IBV_ACCESS_LOCAL_WRITE);
276 if (send_buf->
mr == NULL) {
287 static inline void mcast_send_buf_put (
289 struct send_buf *send_buf)
295 static inline struct send_buf *token_send_buf_get (
298 struct send_buf *send_buf;
306 send_buf = malloc (
sizeof (
struct send_buf));
307 if (send_buf == NULL) {
312 2048, IBV_ACCESS_LOCAL_WRITE);
313 if (send_buf->
mr == NULL) {
327 struct send_buf *send_buf;
332 ibv_dereg_mr (send_buf->
mr);
340 static inline void token_send_buf_put (
342 struct send_buf *send_buf)
348 static inline struct recv_buf *recv_token_recv_buf_create (
353 recv_buf = malloc (
sizeof (
struct recv_buf));
354 if (recv_buf == NULL) {
360 IBV_ACCESS_LOCAL_WRITE);
365 recv_buf->
recv_wr.wr_id = (uintptr_t)recv_buf;
367 recv_buf->
sge.length = 2048;
368 recv_buf->
sge.lkey = recv_buf->
mr->lkey;
369 recv_buf->
sge.addr = (uintptr_t)recv_buf->
buffer;
376 static inline int recv_token_recv_buf_post (
struct totemiba_instance *instance,
struct recv_buf *recv_buf)
378 struct ibv_recv_wr *fail_recv;
386 static inline void recv_token_recv_buf_post_initial (
struct totemiba_instance *instance)
388 struct recv_buf *recv_buf;
392 recv_buf = recv_token_recv_buf_create (instance);
394 recv_token_recv_buf_post (instance, recv_buf);
398 static inline void recv_token_recv_buf_post_destroy (
401 struct recv_buf *recv_buf;
407 recv_buf =
list_entry (list,
struct recv_buf, list_all);
409 ibv_dereg_mr (recv_buf->
mr);
415 static inline struct recv_buf *mcast_recv_buf_create (
struct totemiba_instance *instance)
417 struct recv_buf *recv_buf;
420 recv_buf = malloc (
sizeof (
struct recv_buf));
421 if (recv_buf == NULL) {
427 IBV_ACCESS_LOCAL_WRITE);
432 recv_buf->
recv_wr.wr_id = (uintptr_t)recv_buf;
434 recv_buf->
sge.length = 2048;
435 recv_buf->
sge.lkey = mr->lkey;
436 recv_buf->
sge.addr = (uintptr_t)recv_buf->
buffer;
441 static inline int mcast_recv_buf_post (
struct totemiba_instance *instance,
struct recv_buf *recv_buf)
443 struct ibv_recv_wr *fail_recv;
451 static inline void mcast_recv_buf_post_initial (
struct totemiba_instance *instance)
453 struct recv_buf *recv_buf;
457 recv_buf = mcast_recv_buf_create (instance);
459 mcast_recv_buf_post (instance, recv_buf);
463 static inline void iba_deliver_fn (
struct totemiba_instance *instance, uint64_t wr_id, uint32_t bytes)
466 const struct recv_buf *recv_buf;
468 recv_buf = wrid2void(wr_id);
469 addr = &recv_buf->
buffer[
sizeof (
struct ibv_grh)];
474 static int mcast_cq_send_event_fn (
int events,
int suck,
void *context)
477 struct ibv_wc wc[32];
478 struct ibv_cq *ev_cq;
484 ibv_ack_cq_events (ev_cq, 1);
485 res = ibv_req_notify_cq (ev_cq, 0);
489 for (i = 0; i < res; i++) {
490 mcast_send_buf_put (instance, wrid2void(wc[i].wr_id));
497 static int mcast_cq_recv_event_fn (
int events,
int suck,
void *context)
500 struct ibv_wc wc[64];
501 struct ibv_cq *ev_cq;
507 ibv_ack_cq_events (ev_cq, 1);
508 res = ibv_req_notify_cq (ev_cq, 0);
512 for (i = 0; i < res; i++) {
513 iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
514 mcast_recv_buf_post (instance, wrid2void(wc[i].wr_id));
521 static int mcast_rdma_event_fn (
int events,
int suck,
void *context)
524 struct rdma_cm_event *event;
533 switch (event->event) {
537 case RDMA_CM_EVENT_ADDR_RESOLVED:
544 case RDMA_CM_EVENT_MULTICAST_JOIN:
545 instance->
mcast_qpn =
event->param.ud.qp_num;
547 instance->
mcast_ah = ibv_create_ah (instance->
mcast_pd, &event->param.ud.ah_attr);
551 case RDMA_CM_EVENT_ADDR_ERROR:
552 case RDMA_CM_EVENT_ROUTE_ERROR:
553 case RDMA_CM_EVENT_MULTICAST_ERROR:
556 case RDMA_CM_EVENT_DEVICE_REMOVAL:
563 rdma_ack_cm_event (event);
567 static int recv_token_cq_send_event_fn (
573 struct ibv_wc wc[32];
574 struct ibv_cq *ev_cq;
580 ibv_ack_cq_events (ev_cq, 1);
581 res = ibv_req_notify_cq (ev_cq, 0);
585 for (i = 0; i < res; i++) {
586 iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
587 ibv_dereg_mr (wrid2void(wc[i].wr_id));
594 static int recv_token_cq_recv_event_fn (
int events,
int suck,
void *context)
597 struct ibv_wc wc[32];
598 struct ibv_cq *ev_cq;
604 ibv_ack_cq_events (ev_cq, 1);
605 res = ibv_req_notify_cq (ev_cq, 0);
609 for (i = 0; i < res; i++) {
610 iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
611 recv_token_recv_buf_post (instance, wrid2void(wc[i].wr_id));
626 recv_token_recv_buf_post_destroy (instance);
653 struct ibv_qp_init_attr init_qp_attr;
710 memset (&init_qp_attr, 0,
sizeof (
struct ibv_qp_init_attr));
711 init_qp_attr.cap.max_send_wr = 50;
713 init_qp_attr.cap.max_send_sge = 1;
714 init_qp_attr.cap.max_recv_sge = 1;
715 init_qp_attr.qp_context = instance;
716 init_qp_attr.sq_sig_all = 0;
717 init_qp_attr.qp_type = IBV_QPT_UD;
727 recv_token_recv_buf_post_initial (instance);
733 POLLIN, instance, recv_token_cq_recv_event_fn);
739 POLLIN, instance, recv_token_cq_send_event_fn);
746 static int recv_token_rdma_event_fn (
int events,
int suck,
void *context)
749 struct rdma_cm_event *event;
750 struct rdma_conn_param conn_param;
759 switch (event->event) {
760 case RDMA_CM_EVENT_CONNECT_REQUEST:
761 recv_token_accept_destroy (instance);
764 recv_token_accept_setup (instance);
765 memset (&conn_param, 0,
sizeof (
struct rdma_conn_param));
774 res = rdma_ack_cm_event (event);
778 static int send_token_cq_send_event_fn (
int events,
int suck,
void *context)
781 struct ibv_wc wc[32];
782 struct ibv_cq *ev_cq;
788 ibv_ack_cq_events (ev_cq, 1);
789 res = ibv_req_notify_cq (ev_cq, 0);
793 for (i = 0; i < res; i++) {
794 token_send_buf_put (instance, wrid2void(wc[i].wr_id));
801 static int send_token_cq_recv_event_fn (
int events,
int suck,
void *context)
804 struct ibv_wc wc[32];
805 struct ibv_cq *ev_cq;
811 ibv_ack_cq_events (ev_cq, 1);
812 res = ibv_req_notify_cq (ev_cq, 0);
816 for (i = 0; i < res; i++) {
817 iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
824 static int send_token_rdma_event_fn (
int events,
int suck,
void *context)
827 struct rdma_cm_event *event;
828 struct rdma_conn_param conn_param;
837 switch (event->event) {
841 case RDMA_CM_EVENT_ADDR_RESOLVED:
847 case RDMA_CM_EVENT_ROUTE_RESOLVED:
848 memset (&conn_param, 0,
sizeof (
struct rdma_conn_param));
849 conn_param.private_data = NULL;
850 conn_param.private_data_len = 0;
853 case RDMA_CM_EVENT_ESTABLISHED:
860 case RDMA_CM_EVENT_ADDR_ERROR:
861 case RDMA_CM_EVENT_ROUTE_ERROR:
862 case RDMA_CM_EVENT_MULTICAST_ERROR:
864 "send_token_rdma_event_fn multicast error");
866 case RDMA_CM_EVENT_DEVICE_REMOVAL:
868 case RDMA_CM_EVENT_UNREACHABLE:
870 "send_token_rdma_event_fn unreachable");
874 "send_token_rdma_event_fn unknown event %d",
879 rdma_ack_cm_event (event);
886 struct ibv_qp_init_attr init_qp_attr;
945 "couldn't request notifications of the completion queue");
975 "couldn't request notifications of the completion queue");
978 memset (&init_qp_attr, 0,
sizeof (
struct ibv_qp_init_attr));
979 init_qp_attr.cap.max_send_wr = 50;
981 init_qp_attr.cap.max_send_sge = 1;
982 init_qp_attr.cap.max_recv_sge = 1;
983 init_qp_attr.qp_context = instance;
984 init_qp_attr.sq_sig_all = 0;
985 init_qp_attr.qp_type = IBV_QPT_UD;
999 POLLIN, instance, send_token_cq_recv_event_fn);
1005 POLLIN, instance, send_token_cq_send_event_fn);
1011 POLLIN, instance, send_token_rdma_event_fn);
1044 token_send_buf_destroy (instance);
1088 POLLIN, instance, recv_token_rdma_event_fn);
1096 struct ibv_qp_init_attr init_qp_attr;
1180 memset (&init_qp_attr, 0,
sizeof (
struct ibv_qp_init_attr));
1181 init_qp_attr.cap.max_send_wr = 50;
1183 init_qp_attr.cap.max_send_sge = 1;
1184 init_qp_attr.cap.max_recv_sge = 1;
1185 init_qp_attr.qp_context = instance;
1186 init_qp_attr.sq_sig_all = 0;
1187 init_qp_attr.qp_type = IBV_QPT_UD;
1197 mcast_recv_buf_post_initial (instance);
1203 POLLIN, instance, mcast_cq_recv_event_fn);
1209 POLLIN, instance, mcast_cq_send_event_fn);
1215 POLLIN, instance, mcast_rdma_event_fn);
1219 static void timer_function_netif_check_timeout (
1248 (
const struct sockaddr_storage *)&instance->
bind_addr,
1256 (
struct sockaddr_storage *)&instance->
mcast_addr, &addr_len);
1258 res = recv_token_bind (instance);
1260 res = mcast_bind (instance);
1265 const char *cipher_type,
1266 const char *hash_type)
1291 qb_loop_t *qb_poll_handle,
1298 void (*deliver_fn) (
1301 unsigned int msg_len),
1303 void (*iface_change_fn) (
1307 void (*target_set_completed) (
1314 if (instance == NULL) {
1318 totemiba_instance_initialize (instance);
1339 100*QB_TIME_NS_IN_MSEC,
1341 timer_function_netif_check_timeout,
1347 *iba_context = instance;
1363 int processor_count)
1396 unsigned int msg_len)
1400 struct ibv_send_wr send_wr, *failed_send_wr;
1403 struct send_buf *send_buf;
1405 send_buf = token_send_buf_get (instance);
1406 if (send_buf == NULL) {
1410 memcpy (msg, ms, msg_len);
1412 send_wr.next = NULL;
1413 send_wr.sg_list = &sge;
1414 send_wr.num_sge = 1;
1415 send_wr.opcode = IBV_WR_SEND;
1416 send_wr.send_flags = IBV_SEND_SIGNALED;
1417 send_wr.wr_id = void2wrid(send_buf);
1418 send_wr.imm_data = 0;
1423 sge.length = msg_len;
1424 sge.lkey = send_buf->
mr->lkey;
1425 sge.addr = (uintptr_t)msg;
1428 res = ibv_post_send (instance->
send_token_cma_id->qp, &send_wr, &failed_send_wr);
1436 unsigned int msg_len)
1440 struct ibv_send_wr send_wr, *failed_send_wr;
1443 struct send_buf *send_buf;
1445 send_buf = mcast_send_buf_get (instance);
1446 if (send_buf == NULL) {
1451 memcpy (msg, ms, msg_len);
1452 send_wr.next = NULL;
1453 send_wr.sg_list = &sge;
1454 send_wr.num_sge = 1;
1455 send_wr.opcode = IBV_WR_SEND;
1456 send_wr.send_flags = IBV_SEND_SIGNALED;
1457 send_wr.wr_id = void2wrid(send_buf);
1458 send_wr.imm_data = 0;
1459 send_wr.wr.ud.ah = instance->
mcast_ah;
1460 send_wr.wr.ud.remote_qpn = instance->
mcast_qpn;
1461 send_wr.wr.ud.remote_qkey = instance->
mcast_qkey;
1463 sge.length = msg_len;
1464 sge.lkey = send_buf->
mr->lkey;
1465 sge.addr = (uintptr_t)msg;
1467 res = ibv_post_send (instance->
mcast_cma_id->qp, &send_wr, &failed_send_wr);
1474 unsigned int msg_len)
1478 struct ibv_send_wr send_wr, *failed_send_wr;
1481 struct send_buf *send_buf;
1483 send_buf = mcast_send_buf_get (instance);
1484 if (send_buf == NULL) {
1489 memcpy (msg, ms, msg_len);
1490 send_wr.next = NULL;
1491 send_wr.sg_list = &sge;
1492 send_wr.num_sge = 1;
1493 send_wr.opcode = IBV_WR_SEND;
1494 send_wr.send_flags = IBV_SEND_SIGNALED;
1495 send_wr.wr_id = void2wrid(send_buf);
1496 send_wr.imm_data = 0;
1497 send_wr.wr.ud.ah = instance->
mcast_ah;
1498 send_wr.wr.ud.remote_qpn = instance->
mcast_qpn;
1499 send_wr.wr.ud.remote_qkey = instance->
mcast_qkey;
1501 sge.length = msg_len;
1502 sge.lkey = send_buf->
mr->lkey;
1503 sge.addr = (uintptr_t)msg;
1505 res = ibv_post_send (instance->
mcast_cma_id->qp, &send_wr, &failed_send_wr);
1528 const char *ret_char;
1559 res = send_token_unbind (instance);
1561 res = send_token_bind (instance);