diff --git a/fairmq/ofi/Context.cxx b/fairmq/ofi/Context.cxx index f7d6b705..c894a7d3 100644 --- a/fairmq/ofi/Context.cxx +++ b/fairmq/ofi/Context.cxx @@ -134,16 +134,16 @@ auto Context::InitOfi(ConnectionType type, Address addr) -> void // Prepare fi_getinfo query unique_ptr ofi_hints(fi_allocinfo(), fi_freeinfo); - ofi_hints->caps = FI_MSG | FI_RMA; - ofi_hints->mode = FI_CONTEXT; + ofi_hints->caps = FI_MSG; + //ofi_hints->mode = FI_CONTEXT; ofi_hints->addr_format = FI_SOCKADDR_IN; if (addr.Protocol == "tcp") { ofi_hints->fabric_attr->prov_name = strdup("sockets"); } else if (addr.Protocol == "verbs") { - ofi_hints->fabric_attr->prov_name = strdup("verbs"); + ofi_hints->fabric_attr->prov_name = strdup("verbs;ofi_rxm"); } ofi_hints->ep_attr->type = FI_EP_RDM; - ofi_hints->domain_attr->mr_mode = ~0; + //ofi_hints->domain_attr->mr_mode = FI_MR_BASIC | FI_MR_SCALABLE; ofi_hints->domain_attr->threading = FI_THREAD_SAFE; ofi_hints->domain_attr->control_progress = FI_PROGRESS_AUTO; ofi_hints->domain_attr->data_progress = FI_PROGRESS_AUTO; diff --git a/fairmq/ofi/Socket.cxx b/fairmq/ofi/Socket.cxx index 086f24b6..da029e20 100644 --- a/fairmq/ofi/Socket.cxx +++ b/fairmq/ofi/Socket.cxx @@ -325,11 +325,13 @@ try { // Send data fi_context ctx; auto ret = fi_send(fDataEndpoint, msg->GetData(), size, nullptr, fRemoteDataAddr, &ctx); - if (ret != FI_SUCCESS) + if (ret < 0) throw SocketError(tools::ToString("Failed posting ofi send buffer, reason: ", fi_strerror(ret))); + } + if (size) { fi_cq_err_entry cqEntry; - ret = fi_cq_sread(fDataCompletionQueueTx, &cqEntry, 1, nullptr, -1); + auto ret = fi_cq_sread(fDataCompletionQueueTx, &cqEntry, 1, nullptr, -1); if (ret != 1) throw SocketError(tools::ToString("Failed reading ofi tx completion queue event, reason: ", fi_strerror(ret))); } @@ -371,7 +373,7 @@ try { auto buf = msg->GetData(); auto size2 = msg->GetSize(); auto ret = fi_recv(fDataEndpoint, buf, size2, nullptr, fRemoteDataAddr, &ctx); - if (ret != FI_SUCCESS) + if (ret < 0) throw SocketError(tools::ToString("Failed posting ofi receive buffer, reason: ", fi_strerror(ret))); // Create and send control message