mirror of
https://github.com/FairRootGroup/FairMQ.git
synced 2025-10-13 08:41:16 +00:00
Implement connection mgmt
This commit is contained in:
parent
1c5d7ca46a
commit
ba4e6f72c9
|
@ -71,7 +71,7 @@ if(BUILD_OFI_TRANSPORT)
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(BUILD_NANOMSG_TRANSPORT OR BUILD_OFI_TRANSPORT)
|
if(BUILD_NANOMSG_TRANSPORT)
|
||||||
find_package2(PRIVATE msgpack REQUIRED
|
find_package2(PRIVATE msgpack REQUIRED
|
||||||
VERSION 3.1.0
|
VERSION 3.1.0
|
||||||
)
|
)
|
||||||
|
|
|
@ -219,7 +219,10 @@ if(BUILD_NANOMSG_TRANSPORT)
|
||||||
set(NANOMSG_DEPS nanomsg msgpackc-cxx)
|
set(NANOMSG_DEPS nanomsg msgpackc-cxx)
|
||||||
endif()
|
endif()
|
||||||
if(BUILD_OFI_TRANSPORT)
|
if(BUILD_OFI_TRANSPORT)
|
||||||
set(OFI_DEPS asiofi::asiofi msgpackc-cxx)
|
set(OFI_DEPS
|
||||||
|
asiofi::asiofi
|
||||||
|
Boost::container
|
||||||
|
)
|
||||||
endif()
|
endif()
|
||||||
set(optional_deps ${NANOMSG_DEPS} ${OFI_DEPS})
|
set(optional_deps ${NANOMSG_DEPS} ${OFI_DEPS})
|
||||||
if(optional_deps)
|
if(optional_deps)
|
||||||
|
|
|
@ -51,9 +51,9 @@ auto Context::InitThreadPool(int numberIoThreads) -> void
|
||||||
|
|
||||||
for (int i = 1; i <= numberIoThreads; ++i) {
|
for (int i = 1; i <= numberIoThreads; ++i) {
|
||||||
fThreadPool.emplace_back([&, i, numberIoThreads]{
|
fThreadPool.emplace_back([&, i, numberIoThreads]{
|
||||||
LOG(debug) << "OFI transport: I/O thread #" << i << "/" << numberIoThreads << " started";
|
LOG(debug) << "OFI transport: I/O thread #" << i << " of " << numberIoThreads << " started";
|
||||||
fIoContext.run();
|
fIoContext.run();
|
||||||
LOG(debug) << "OFI transport: I/O thread #" << i << "/" << numberIoThreads << " stopped";
|
LOG(debug) << "OFI transport: I/O thread #" << i << " of " << numberIoThreads << " stopped";
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -97,12 +97,31 @@ auto Context::InitOfi(ConnectionType type, Address addr) -> void
|
||||||
} else {
|
} else {
|
||||||
fOfiInfo = tools::make_unique<asiofi::info>(addr.Ip.c_str(), std::to_string(addr.Port).c_str(), 0, hints);
|
fOfiInfo = tools::make_unique<asiofi::info>(addr.Ip.c_str(), std::to_string(addr.Port).c_str(), 0, hints);
|
||||||
}
|
}
|
||||||
|
LOG(debug) << "OFI transport: " << *fOfiInfo;
|
||||||
|
|
||||||
fOfiFabric = tools::make_unique<asiofi::fabric>(*fOfiInfo);
|
fOfiFabric = tools::make_unique<asiofi::fabric>(*fOfiInfo);
|
||||||
|
|
||||||
fOfiDomain = tools::make_unique<asiofi::domain>(*fOfiFabric);
|
fOfiDomain = tools::make_unique<asiofi::domain>(*fOfiFabric);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto Context::MakeOfiPassiveEndpoint(Address addr) -> unique_ptr<asiofi::passive_endpoint>
|
||||||
|
{
|
||||||
|
InitOfi(ConnectionType::Bind, addr);
|
||||||
|
|
||||||
|
return tools::make_unique<asiofi::passive_endpoint>(fIoContext, *fOfiFabric);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto Context::MakeOfiConnectedEndpoint(const asiofi::info& info) -> std::unique_ptr<asiofi::connected_endpoint>
|
||||||
|
{
|
||||||
|
return tools::make_unique<asiofi::connected_endpoint>(fIoContext, *fOfiDomain, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto Context::MakeOfiConnectedEndpoint(Address addr) -> std::unique_ptr<asiofi::connected_endpoint>
|
||||||
|
{
|
||||||
|
InitOfi(ConnectionType::Connect, addr);
|
||||||
|
|
||||||
|
return tools::make_unique<asiofi::connected_endpoint>(fIoContext, *fOfiDomain);
|
||||||
|
}
|
||||||
// auto Context::CreateOfiEndpoint() -> fid_ep*
|
// auto Context::CreateOfiEndpoint() -> fid_ep*
|
||||||
// {
|
// {
|
||||||
// assert(fOfiDomain);
|
// assert(fOfiDomain);
|
||||||
|
|
|
@ -9,10 +9,12 @@
|
||||||
#ifndef FAIR_MQ_OFI_CONTEXT_H
|
#ifndef FAIR_MQ_OFI_CONTEXT_H
|
||||||
#define FAIR_MQ_OFI_CONTEXT_H
|
#define FAIR_MQ_OFI_CONTEXT_H
|
||||||
|
|
||||||
|
#include <asiofi/connected_endpoint.hpp>
|
||||||
#include <asiofi/domain.hpp>
|
#include <asiofi/domain.hpp>
|
||||||
#include <asiofi/fabric.hpp>
|
#include <asiofi/fabric.hpp>
|
||||||
#include <asiofi/info.hpp>
|
#include <asiofi/info.hpp>
|
||||||
#include <boost/asio.hpp>
|
#include <asiofi/passive_endpoint.hpp>
|
||||||
|
#include <boost/asio/io_context.hpp>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <netinet/in.h>
|
#include <netinet/in.h>
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
|
@ -47,14 +49,16 @@ class Context
|
||||||
auto GetZmqVersion() const -> std::string;
|
auto GetZmqVersion() const -> std::string;
|
||||||
auto GetAsiofiVersion() const -> std::string;
|
auto GetAsiofiVersion() const -> std::string;
|
||||||
auto GetZmqContext() const -> void* { return fZmqContext; }
|
auto GetZmqContext() const -> void* { return fZmqContext; }
|
||||||
auto GetIoContext() -> boost::asio::io_service& { return fIoContext; }
|
auto GetIoContext() -> boost::asio::io_context& { return fIoContext; }
|
||||||
struct Address {
|
struct Address {
|
||||||
std::string Protocol;
|
std::string Protocol;
|
||||||
std::string Ip;
|
std::string Ip;
|
||||||
unsigned int Port;
|
unsigned int Port;
|
||||||
friend auto operator<<(std::ostream& os, const Address& a) -> std::ostream& { return os << a.Protocol << "://" << a.Ip << ":" << a.Port; }
|
friend auto operator<<(std::ostream& os, const Address& a) -> std::ostream& { return os << a.Protocol << "://" << a.Ip << ":" << a.Port; }
|
||||||
};
|
};
|
||||||
auto InitOfi(ConnectionType type, Address address) -> void;
|
auto MakeOfiPassiveEndpoint(Address addr) -> std::unique_ptr<asiofi::passive_endpoint>;
|
||||||
|
auto MakeOfiConnectedEndpoint(Address addr) -> std::unique_ptr<asiofi::connected_endpoint>;
|
||||||
|
auto MakeOfiConnectedEndpoint(const asiofi::info& info) -> std::unique_ptr<asiofi::connected_endpoint>;
|
||||||
static auto ConvertAddress(std::string address) -> Address;
|
static auto ConvertAddress(std::string address) -> Address;
|
||||||
static auto ConvertAddress(Address address) -> sockaddr_in;
|
static auto ConvertAddress(Address address) -> sockaddr_in;
|
||||||
static auto ConvertAddress(sockaddr_in address) -> Address;
|
static auto ConvertAddress(sockaddr_in address) -> Address;
|
||||||
|
@ -65,15 +69,24 @@ class Context
|
||||||
std::unique_ptr<asiofi::info> fOfiInfo;
|
std::unique_ptr<asiofi::info> fOfiInfo;
|
||||||
std::unique_ptr<asiofi::fabric> fOfiFabric;
|
std::unique_ptr<asiofi::fabric> fOfiFabric;
|
||||||
std::unique_ptr<asiofi::domain> fOfiDomain;
|
std::unique_ptr<asiofi::domain> fOfiDomain;
|
||||||
boost::asio::io_service fIoContext;
|
boost::asio::io_context fIoContext;
|
||||||
boost::asio::io_service::work fIoWork;
|
boost::asio::io_context::work fIoWork;
|
||||||
std::vector<std::thread> fThreadPool;
|
std::vector<std::thread> fThreadPool;
|
||||||
|
|
||||||
auto InitThreadPool(int numberIoThreads) -> void;
|
auto InitThreadPool(int numberIoThreads) -> void;
|
||||||
|
auto InitOfi(ConnectionType type, Address address) -> void;
|
||||||
}; /* class Context */
|
}; /* class Context */
|
||||||
|
|
||||||
struct ContextError : std::runtime_error { using std::runtime_error::runtime_error; };
|
struct ContextError : std::runtime_error { using std::runtime_error::runtime_error; };
|
||||||
|
|
||||||
|
template<typename Derived, typename Base, typename Del>
|
||||||
|
std::unique_ptr<Derived, Del>
|
||||||
|
static_unique_ptr_downcast( std::unique_ptr<Base, Del>&& p )
|
||||||
|
{
|
||||||
|
auto d = static_cast<Derived *>(p.release());
|
||||||
|
return std::unique_ptr<Derived, Del>(d, std::move(p.get_deleter()));
|
||||||
|
}
|
||||||
|
|
||||||
} /* namespace ofi */
|
} /* namespace ofi */
|
||||||
} /* namespace mq */
|
} /* namespace mq */
|
||||||
} /* namespace fair */
|
} /* namespace fair */
|
||||||
|
|
|
@ -1,25 +0,0 @@
|
||||||
syntax = "proto3";
|
|
||||||
option optimize_for = SPEED;
|
|
||||||
|
|
||||||
package fair.mq.ofi;
|
|
||||||
|
|
||||||
message DataAddressAnnouncement {
|
|
||||||
uint32 ipv4 = 1; // in_addr_t from <netinet/in.h>
|
|
||||||
uint32 port = 2; // in_port_t from <netinet/in.h>
|
|
||||||
}
|
|
||||||
|
|
||||||
message PostBuffer {
|
|
||||||
uint64 size = 1; // buffer size (size_t)
|
|
||||||
}
|
|
||||||
|
|
||||||
message PostBufferAcknowledgement {
|
|
||||||
uint64 size = 1; // size_t
|
|
||||||
}
|
|
||||||
|
|
||||||
message ControlMessage {
|
|
||||||
oneof type {
|
|
||||||
DataAddressAnnouncement data_address_announcement = 1;
|
|
||||||
PostBuffer post_buffer = 2;
|
|
||||||
PostBufferAcknowledgement post_buffer_acknowledgement = 3;
|
|
||||||
}
|
|
||||||
}
|
|
84
fairmq/ofi/ControlMessages.h
Normal file
84
fairmq/ofi/ControlMessages.h
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
/********************************************************************************
|
||||||
|
* Copyright (C) 2018 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
|
||||||
|
* *
|
||||||
|
* This software is distributed under the terms of the *
|
||||||
|
* GNU Lesser General Public Licence (LGPL) version 3, *
|
||||||
|
* copied verbatim in the file "LICENSE" *
|
||||||
|
********************************************************************************/
|
||||||
|
|
||||||
|
#ifndef FAIR_MQ_OFI_CONTROLMESSAGES_H
|
||||||
|
#define FAIR_MQ_OFI_CONTROLMESSAGES_H
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <functional>
|
||||||
|
#include <memory>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
namespace fair
|
||||||
|
{
|
||||||
|
namespace mq
|
||||||
|
{
|
||||||
|
namespace ofi
|
||||||
|
{
|
||||||
|
|
||||||
|
enum class ControlMessageType
|
||||||
|
{
|
||||||
|
DataAddressAnnouncement = 1,
|
||||||
|
PostBuffer,
|
||||||
|
PostBufferAcknowledgement
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ControlMessage {
|
||||||
|
ControlMessageType type;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DataAddressAnnouncement : ControlMessage {
|
||||||
|
uint32_t ipv4; // in_addr_t from <netinet/in.h>
|
||||||
|
uint32_t port; // in_port_t from <netinet/in.h>
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PostBuffer : ControlMessage {
|
||||||
|
uint64_t size; // buffer size (size_t)
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PostBufferAcknowledgement {
|
||||||
|
uint64_t size; // size_t
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
using CtrlMsgPtr = std::unique_ptr<T, std::function<void(T*)>>;
|
||||||
|
|
||||||
|
template<typename T, typename A, typename ... Args>
|
||||||
|
auto MakeControlMessage(A* pmr, Args&& ... args) -> CtrlMsgPtr<T>
|
||||||
|
{
|
||||||
|
void* raw_mem = pmr->allocate(sizeof(T));
|
||||||
|
T* raw_ptr = new (raw_mem) T(std::forward<Args>(args)...);
|
||||||
|
|
||||||
|
if (std::is_same<T, DataAddressAnnouncement>::value) {
|
||||||
|
raw_ptr->type = ControlMessageType::DataAddressAnnouncement;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {raw_ptr, [=](T* p) { pmr->deallocate(p, sizeof(T)); }};
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Derived, typename Base, typename Del>
|
||||||
|
auto StaticUniquePtrDowncast(std::unique_ptr<Base, Del>&& p) -> std::unique_ptr<Derived, Del>
|
||||||
|
{
|
||||||
|
auto down = static_cast<Derived*>(p.release());
|
||||||
|
return std::unique_ptr<Derived, Del>(down, std::move(p.get_deleter()));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Base, typename Derived, typename Del>
|
||||||
|
auto StaticUniquePtrUpcast(std::unique_ptr<Derived, Del>&& p) -> std::unique_ptr<Base, std::function<void(Base*)>>
|
||||||
|
{
|
||||||
|
auto up = static_cast<Base*>(p.release());
|
||||||
|
return {up, [deleter = std::move(p.get_deleter())](Base* ptr) {
|
||||||
|
deleter(static_cast<Derived*>(ptr));
|
||||||
|
}};
|
||||||
|
}
|
||||||
|
|
||||||
|
} /* namespace ofi */
|
||||||
|
} /* namespace mq */
|
||||||
|
} /* namespace fair */
|
||||||
|
|
||||||
|
#endif /* FAIR_MQ_OFI_CONTROLMESSAGES_H */
|
|
@ -6,6 +6,7 @@
|
||||||
* copied verbatim in the file "LICENSE" *
|
* copied verbatim in the file "LICENSE" *
|
||||||
********************************************************************************/
|
********************************************************************************/
|
||||||
|
|
||||||
|
#include <fairmq/ofi/ControlMessages.h>
|
||||||
#include <fairmq/ofi/Socket.h>
|
#include <fairmq/ofi/Socket.h>
|
||||||
#include <fairmq/ofi/TransportFactory.h>
|
#include <fairmq/ofi/TransportFactory.h>
|
||||||
#include <fairmq/Tools.h>
|
#include <fairmq/Tools.h>
|
||||||
|
@ -39,6 +40,7 @@ Socket::Socket(Context& context, const string& type, const string& name, const s
|
||||||
, fId(id + "." + name + "." + type)
|
, fId(id + "." + name + "." + type)
|
||||||
, fControlSocket(nullptr)
|
, fControlSocket(nullptr)
|
||||||
, fMonitorSocket(nullptr)
|
, fMonitorSocket(nullptr)
|
||||||
|
, fPassiveDataEndpoint(nullptr)
|
||||||
, fDataEndpoint(nullptr)
|
, fDataEndpoint(nullptr)
|
||||||
, fId(id + "." + name + "." + type)
|
, fId(id + "." + name + "." + type)
|
||||||
, fBytesTx(0)
|
, fBytesTx(0)
|
||||||
|
@ -92,10 +94,16 @@ Socket::Socket(Context& context, const string& type, const string& name, const s
|
||||||
auto Socket::Bind(const string& address) -> bool
|
auto Socket::Bind(const string& address) -> bool
|
||||||
try {
|
try {
|
||||||
auto addr = Context::VerifyAddress(address);
|
auto addr = Context::VerifyAddress(address);
|
||||||
|
|
||||||
BindControlSocket(addr);
|
BindControlSocket(addr);
|
||||||
fContext.InitOfi(ConnectionType::Bind, addr);
|
|
||||||
InitDataEndpoint();
|
// TODO make data port choice more robust
|
||||||
fWaitingForControlPeer = true;
|
addr.Port += 500;
|
||||||
|
fLocalDataAddr = addr;
|
||||||
|
BindDataEndpoint();
|
||||||
|
|
||||||
|
AnnounceDataAddress();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
catch (const SilentSocketError& e)
|
catch (const SilentSocketError& e)
|
||||||
|
@ -106,18 +114,20 @@ catch (const SilentSocketError& e)
|
||||||
}
|
}
|
||||||
catch (const SocketError& e)
|
catch (const SocketError& e)
|
||||||
{
|
{
|
||||||
LOG(error) << e.what();
|
LOG(error) << "OFI transport: " << e.what();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto Socket::Connect(const string& address) -> bool
|
auto Socket::Connect(const string& address) -> bool
|
||||||
{
|
{
|
||||||
auto addr = Context::VerifyAddress(address);
|
auto addr = Context::VerifyAddress(address);
|
||||||
|
|
||||||
ConnectControlSocket(addr);
|
ConnectControlSocket(addr);
|
||||||
fContext.InitOfi(ConnectionType::Connect, addr);
|
|
||||||
InitDataEndpoint();
|
ProcessControlMessage(
|
||||||
fWaitingForControlPeer = true;
|
StaticUniquePtrDowncast<DataAddressAnnouncement>(ReceiveControlMessage()));
|
||||||
return true;
|
|
||||||
|
ConnectDataEndpoint();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto Socket::BindControlSocket(Context::Address address) -> void
|
auto Socket::BindControlSocket(Context::Address address) -> void
|
||||||
|
@ -128,6 +138,26 @@ auto Socket::BindControlSocket(Context::Address address) -> void
|
||||||
if (errno == EADDRINUSE) throw SilentSocketError("EADDRINUSE");
|
if (errno == EADDRINUSE) throw SilentSocketError("EADDRINUSE");
|
||||||
throw SocketError(tools::ToString("Failed binding control socket ", fId, ", reason: ", zmq_strerror(errno)));
|
throw SocketError(tools::ToString("Failed binding control socket ", fId, ", reason: ", zmq_strerror(errno)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LOG(debug) << "OFI transport (" << fId << "): control band bound to " << address;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto Socket::BindDataEndpoint() -> void
|
||||||
|
{
|
||||||
|
assert(!fPassiveDataEndpoint);
|
||||||
|
assert(!fDataEndpoint);
|
||||||
|
|
||||||
|
fPassiveDataEndpoint = fContext.MakeOfiPassiveEndpoint(fLocalDataAddr);
|
||||||
|
fPassiveDataEndpoint->listen([&](fid_t /*handle*/, asiofi::info&& info) {
|
||||||
|
LOG(debug) << "OFI transport (" << fId << "): data band connection request received. Accepting ...";
|
||||||
|
fDataEndpoint = fContext.MakeOfiConnectedEndpoint(info);
|
||||||
|
fDataEndpoint->enable();
|
||||||
|
fDataEndpoint->accept([&]() {
|
||||||
|
LOG(debug) << "OFI transport (" << fId << "): data band connection accepted.";
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
LOG(debug) << "OFI transport (" << fId << "): data band bound to " << fLocalDataAddr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto Socket::ConnectControlSocket(Context::Address address) -> void
|
auto Socket::ConnectControlSocket(Context::Address address) -> void
|
||||||
|
@ -138,119 +168,120 @@ auto Socket::ConnectControlSocket(Context::Address address) -> void
|
||||||
throw SocketError(tools::ToString("Failed connecting control socket ", fId, ", reason: ", zmq_strerror(errno)));
|
throw SocketError(tools::ToString("Failed connecting control socket ", fId, ", reason: ", zmq_strerror(errno)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// auto Socket::ProcessDataAddressAnnouncement(std::unique_ptr<ControlMessage> ctrl) -> void
|
auto Socket::ConnectDataEndpoint() -> void
|
||||||
// {
|
|
||||||
// assert(ctrl->has_data_address_announcement());
|
|
||||||
// auto daa = ctrl->data_address_announcement();
|
|
||||||
//
|
|
||||||
// sockaddr_in remoteAddr;
|
|
||||||
// remoteAddr.sin_family = AF_INET;
|
|
||||||
// remoteAddr.sin_port = daa.port();
|
|
||||||
// remoteAddr.sin_addr.s_addr = daa.ipv4();
|
|
||||||
//
|
|
||||||
// LOG(debug) << "Data address announcement of remote ofi endpoint received: " << Context::ConvertAddress(remoteAddr);
|
|
||||||
// fRemoteDataAddr = fContext.InsertAddressVector(remoteAddr);
|
|
||||||
// }
|
|
||||||
|
|
||||||
auto Socket::InitDataEndpoint() -> void
|
|
||||||
{
|
{
|
||||||
assert(!fDataEndpoint);
|
assert(!fDataEndpoint);
|
||||||
|
|
||||||
// try {
|
fDataEndpoint = fContext.MakeOfiConnectedEndpoint(fRemoteDataAddr);
|
||||||
// fDataEndpoint = fContext.CreateOfiEndpoint();
|
fDataEndpoint->enable();
|
||||||
// } catch (ContextError& e) {
|
LOG(debug) << "OFI transport (" << fId << "): local data band address: " << Context::ConvertAddress(fDataEndpoint->get_local_address());
|
||||||
// throw SocketError(tools::ToString("Failed creating ofi endpoint, reason: ", e.what()));
|
fDataEndpoint->connect([&]() {
|
||||||
// }
|
LOG(debug) << "OFI transport (" << fId << "): data band connected.";
|
||||||
//
|
});
|
||||||
// if (!fDataCompletionQueueTx)
|
|
||||||
// fDataCompletionQueueTx = fContext.CreateOfiCompletionQueue(Direction::Transmit);
|
|
||||||
// auto ret = fi_ep_bind(fDataEndpoint, &fDataCompletionQueueTx->fid, FI_TRANSMIT);
|
|
||||||
// if (ret != FI_SUCCESS)
|
|
||||||
// throw SocketError(tools::ToString("Failed binding ofi transmit completion queue to endpoint, reason: ", fi_strerror(ret)));
|
|
||||||
//
|
|
||||||
// if (!fDataCompletionQueueRx)
|
|
||||||
// fDataCompletionQueueRx = fContext.CreateOfiCompletionQueue(Direction::Receive);
|
|
||||||
// ret = fi_ep_bind(fDataEndpoint, &fDataCompletionQueueRx->fid, FI_RECV);
|
|
||||||
// if (ret != FI_SUCCESS)
|
|
||||||
// throw SocketError(tools::ToString("Failed binding ofi receive completion queue to endpoint, reason: ", fi_strerror(ret)));
|
|
||||||
//
|
|
||||||
// ret = fi_enable(fDataEndpoint);
|
|
||||||
// if (ret != FI_SUCCESS)
|
|
||||||
// throw SocketError(tools::ToString("Failed enabling ofi endpoint, reason: ", fi_strerror(ret)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void free_string(void* /*data*/, void* hint)
|
auto Socket::ProcessControlMessage(CtrlMsgPtr<DataAddressAnnouncement> daa) -> void
|
||||||
{
|
{
|
||||||
delete static_cast<string*>(hint);
|
assert(daa->type == ControlMessageType::DataAddressAnnouncement);
|
||||||
|
|
||||||
|
sockaddr_in remoteAddr;
|
||||||
|
remoteAddr.sin_family = AF_INET;
|
||||||
|
remoteAddr.sin_port = daa->port;
|
||||||
|
remoteAddr.sin_addr.s_addr = daa->ipv4;
|
||||||
|
|
||||||
|
auto addr = Context::ConvertAddress(remoteAddr);
|
||||||
|
LOG(debug) << "OFI transport (" << fId << "): Data address announcement of remote endpoint received: " << addr;
|
||||||
|
fRemoteDataAddr = addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto Socket::AnnounceDataAddress() -> void
|
auto Socket::AnnounceDataAddress() -> void
|
||||||
try {
|
try {
|
||||||
// size_t addrlen = sizeof(sockaddr_in);
|
// fLocalDataAddr = fDataEndpoint->get_local_address();
|
||||||
// auto ret = fi_getname(&fDataEndpoint->fid, &fLocalDataAddr, &addrlen);
|
|
||||||
// if (ret != FI_SUCCESS)
|
|
||||||
// throw SocketError(tools::ToString("Failed retrieving native address from ofi endpoint, reason: ", fi_strerror(ret)));
|
|
||||||
// assert(addrlen == sizeof(sockaddr_in));
|
|
||||||
//
|
|
||||||
// LOG(debug) << "Address of local ofi endpoint in socket " << fId << ": " << Context::ConvertAddress(fLocalDataAddr);
|
// LOG(debug) << "Address of local ofi endpoint in socket " << fId << ": " << Context::ConvertAddress(fLocalDataAddr);
|
||||||
|
|
||||||
// Create new control message
|
// Create new data address announcement message
|
||||||
// auto ctrl = tools::make_unique<ControlMessage>();
|
auto daa = MakeControlMessage<DataAddressAnnouncement>(&fCtrlMemPool);
|
||||||
// auto daa = tools::make_unique<DataAddressAnnouncement>();
|
auto addr = Context::ConvertAddress(fLocalDataAddr);
|
||||||
|
daa->ipv4 = addr.sin_addr.s_addr;
|
||||||
|
daa->port = addr.sin_port;
|
||||||
|
|
||||||
// Fill data address announcement
|
SendControlMessage(StaticUniquePtrUpcast<ControlMessage>(std::move(daa)));
|
||||||
// daa->set_ipv4(fLocalDataAddr.sin_addr.s_addr);
|
|
||||||
// daa->set_port(fLocalDataAddr.sin_port);
|
|
||||||
|
|
||||||
// Fill control message
|
LOG(debug) << "OFI transport (" << fId << "): data address announced.";
|
||||||
// ctrl->set_allocated_data_address_announcement(daa.release());
|
|
||||||
// assert(ctrl->IsInitialized());
|
|
||||||
|
|
||||||
// SendControlMessage(move(ctrl));
|
|
||||||
} catch (const SocketError& e) {
|
} catch (const SocketError& e) {
|
||||||
throw SocketError(tools::ToString("Failed to announce data address, reason: ", e.what()));
|
throw SocketError(tools::ToString("Failed to announce data address, reason: ", e.what()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// auto Socket::SendControlMessage(unique_ptr<ControlMessage> ctrl) -> void
|
auto Socket::SendControlMessage(CtrlMsgPtr<ControlMessage> ctrl) -> void
|
||||||
// {
|
{
|
||||||
// assert(fControlSocket);
|
assert(fControlSocket);
|
||||||
// LOG(debug) << "About to send control message: " << ctrl->DebugString();
|
// LOG(debug) << "About to send control message: " << ctrl->DebugString();
|
||||||
//
|
|
||||||
// Serialize
|
// Serialize
|
||||||
// string* str = new string();
|
struct ZmqMsg
|
||||||
// ctrl->SerializeToString(str);
|
{
|
||||||
// zmq_msg_t msg;
|
zmq_msg_t msg;
|
||||||
// auto ret = zmq_msg_init_data(&msg, const_cast<char*>(str->c_str()), str->length(), free_string, str);
|
~ZmqMsg() { zmq_msg_close(&msg); }
|
||||||
// assert(ret == 0);
|
operator zmq_msg_t*() { return &msg; }
|
||||||
//
|
} msg;
|
||||||
|
|
||||||
|
switch (ctrl->type) {
|
||||||
|
case ControlMessageType::DataAddressAnnouncement:
|
||||||
|
{
|
||||||
|
auto ret = zmq_msg_init_size(msg, sizeof(DataAddressAnnouncement));
|
||||||
|
(void)ret;
|
||||||
|
assert(ret == 0);
|
||||||
|
std::memcpy(zmq_msg_data(msg), ctrl.get(), sizeof(DataAddressAnnouncement));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw SocketError(tools::ToString("Cannot send control message of unknown type."));
|
||||||
|
}
|
||||||
|
|
||||||
// Send
|
// Send
|
||||||
// if (zmq_msg_send(&msg, fControlSocket, 0) == -1) {
|
if (zmq_msg_send(msg, fControlSocket, 0) == -1) {
|
||||||
// zmq_msg_close(&msg);
|
throw SocketError(
|
||||||
// throw SocketError(tools::ToString("Failed to send control message, reason: ", zmq_strerror(errno)));
|
tools::ToString("Failed to send control message, reason: ", zmq_strerror(errno)));
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
//
|
|
||||||
// auto Socket::ReceiveControlMessage() -> unique_ptr<ControlMessage>
|
auto Socket::ReceiveControlMessage() -> CtrlMsgPtr<ControlMessage>
|
||||||
// {
|
{
|
||||||
// assert(fControlSocket);
|
assert(fControlSocket);
|
||||||
//
|
|
||||||
// Receive
|
// Receive
|
||||||
// zmq_msg_t msg;
|
struct ZmqMsg
|
||||||
// auto ret = zmq_msg_init(&msg);
|
{
|
||||||
// assert(ret == 0);
|
zmq_msg_t msg;
|
||||||
// if (zmq_msg_recv(&msg, fControlSocket, 0) == -1) {
|
~ZmqMsg() { zmq_msg_close(&msg); }
|
||||||
// zmq_msg_close(&msg);
|
operator zmq_msg_t*() { return &msg; }
|
||||||
// throw SocketError(tools::ToString("Failed to receive control message, reason: ", zmq_strerror(errno)));
|
} msg;
|
||||||
// }
|
auto ret = zmq_msg_init(msg);
|
||||||
//
|
(void)ret;
|
||||||
// Deserialize
|
assert(ret == 0);
|
||||||
// auto ctrl = tools::make_unique<ControlMessage>();
|
if (zmq_msg_recv(msg, fControlSocket, 0) == -1) {
|
||||||
// ctrl->ParseFromArray(zmq_msg_data(&msg), zmq_msg_size(&msg));
|
throw SocketError(
|
||||||
//
|
tools::ToString("Failed to receive control message, reason: ", zmq_strerror(errno)));
|
||||||
// zmq_msg_close(&msg);
|
}
|
||||||
// LOG(debug) << "Received control message: " << ctrl->DebugString();
|
|
||||||
// return ctrl;
|
// Deserialize and sanity check
|
||||||
// }
|
const void* msg_data = zmq_msg_data(msg);
|
||||||
|
const size_t msg_size = zmq_msg_size(msg);
|
||||||
|
(void)msg_size;
|
||||||
|
assert(msg_size >= sizeof(ControlMessage));
|
||||||
|
|
||||||
|
switch (static_cast<const ControlMessage*>(msg_data)->type) {
|
||||||
|
case ControlMessageType::DataAddressAnnouncement: {
|
||||||
|
assert(msg_size == sizeof(DataAddressAnnouncement));
|
||||||
|
auto daa = MakeControlMessage<DataAddressAnnouncement>(&fCtrlMemPool);
|
||||||
|
std::memcpy(daa.get(), msg_data, sizeof(DataAddressAnnouncement));
|
||||||
|
// LOG(debug) << "Received control message: " << ctrl->DebugString();
|
||||||
|
return StaticUniquePtrUpcast<ControlMessage>(std::move(daa));
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
throw SocketError(tools::ToString("Received control message of unknown type."));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
auto Socket::WaitForControlPeer() -> void
|
auto Socket::WaitForControlPeer() -> void
|
||||||
{
|
{
|
||||||
|
@ -302,12 +333,6 @@ auto Socket::TryReceive(std::vector<MessagePtr>& msgVec) -> int64_t { return Rec
|
||||||
|
|
||||||
auto Socket::SendImpl(FairMQMessagePtr& msg, const int /*flags*/, const int /*timeout*/) -> int
|
auto Socket::SendImpl(FairMQMessagePtr& msg, const int /*flags*/, const int /*timeout*/) -> int
|
||||||
try {
|
try {
|
||||||
if (fWaitingForControlPeer) {
|
|
||||||
WaitForControlPeer();
|
|
||||||
AnnounceDataAddress();
|
|
||||||
// ProcessDataAddressAnnouncement(ReceiveControlMessage());
|
|
||||||
}
|
|
||||||
|
|
||||||
auto size = msg->GetSize();
|
auto size = msg->GetSize();
|
||||||
|
|
||||||
// Create and send control message
|
// Create and send control message
|
||||||
|
@ -358,7 +383,7 @@ auto Socket::ReceiveImpl(FairMQMessagePtr& /*msg*/, const int /*flags*/, const i
|
||||||
try {
|
try {
|
||||||
if (fWaitingForControlPeer) {
|
if (fWaitingForControlPeer) {
|
||||||
WaitForControlPeer();
|
WaitForControlPeer();
|
||||||
AnnounceDataAddress();
|
// AnnounceDataAddress();
|
||||||
// ProcessDataAddressAnnouncement(ReceiveControlMessage());
|
// ProcessDataAddressAnnouncement(ReceiveControlMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,9 +12,11 @@
|
||||||
#include <FairMQSocket.h>
|
#include <FairMQSocket.h>
|
||||||
#include <FairMQMessage.h>
|
#include <FairMQMessage.h>
|
||||||
#include <fairmq/ofi/Context.h>
|
#include <fairmq/ofi/Context.h>
|
||||||
|
#include <fairmq/ofi/ControlMessages.h>
|
||||||
|
|
||||||
#include <asiofi/connected_endpoint.hpp>
|
#include <asiofi/connected_endpoint.hpp>
|
||||||
#include <boost/asio.hpp>
|
#include <boost/asio.hpp>
|
||||||
|
#include <boost/container/pmr/unsynchronized_pool_resource.hpp>
|
||||||
#include <memory> // unique_ptr
|
#include <memory> // unique_ptr
|
||||||
#include <netinet/in.h>
|
#include <netinet/in.h>
|
||||||
#include <rdma/fabric.h>
|
#include <rdma/fabric.h>
|
||||||
|
@ -85,6 +87,7 @@ class Socket final : public fair::mq::Socket
|
||||||
private:
|
private:
|
||||||
void* fControlSocket;
|
void* fControlSocket;
|
||||||
void* fMonitorSocket;
|
void* fMonitorSocket;
|
||||||
|
std::unique_ptr<asiofi::passive_endpoint> fPassiveDataEndpoint;
|
||||||
std::unique_ptr<asiofi::connected_endpoint> fDataEndpoint;
|
std::unique_ptr<asiofi::connected_endpoint> fDataEndpoint;
|
||||||
std::string fId;
|
std::string fId;
|
||||||
std::atomic<unsigned long> fBytesTx;
|
std::atomic<unsigned long> fBytesTx;
|
||||||
|
@ -92,10 +95,11 @@ class Socket final : public fair::mq::Socket
|
||||||
std::atomic<unsigned long> fMessagesTx;
|
std::atomic<unsigned long> fMessagesTx;
|
||||||
std::atomic<unsigned long> fMessagesRx;
|
std::atomic<unsigned long> fMessagesRx;
|
||||||
Context& fContext;
|
Context& fContext;
|
||||||
fi_addr_t fRemoteDataAddr;
|
Context::Address fRemoteDataAddr;
|
||||||
sockaddr_in fLocalDataAddr;
|
Context::Address fLocalDataAddr;
|
||||||
bool fWaitingForControlPeer;
|
bool fWaitingForControlPeer;
|
||||||
boost::asio::io_service::strand fIoStrand;
|
boost::asio::io_service::strand fIoStrand;
|
||||||
|
boost::container::pmr::unsynchronized_pool_resource fCtrlMemPool;
|
||||||
|
|
||||||
int fSndTimeout;
|
int fSndTimeout;
|
||||||
int fRcvTimeout;
|
int fRcvTimeout;
|
||||||
|
@ -105,19 +109,17 @@ class Socket final : public fair::mq::Socket
|
||||||
auto SendImpl(std::vector<MessagePtr>& msgVec, const int flags, const int timeout) -> int64_t;
|
auto SendImpl(std::vector<MessagePtr>& msgVec, const int flags, const int timeout) -> int64_t;
|
||||||
auto ReceiveImpl(std::vector<MessagePtr>& msgVec, const int flags, const int timeout) -> int64_t;
|
auto ReceiveImpl(std::vector<MessagePtr>& msgVec, const int flags, const int timeout) -> int64_t;
|
||||||
|
|
||||||
auto InitDataEndpoint() -> void;
|
|
||||||
auto WaitForControlPeer() -> void;
|
auto WaitForControlPeer() -> void;
|
||||||
auto AnnounceDataAddress() -> void;
|
auto AnnounceDataAddress() -> void;
|
||||||
// auto SendControlMessage(std::unique_ptr<ControlMessage> ctrl) -> void;
|
auto SendControlMessage(CtrlMsgPtr<ControlMessage> ctrl) -> void;
|
||||||
// auto ReceiveControlMessage() -> std::unique_ptr<ControlMessage>;
|
auto ReceiveControlMessage() -> CtrlMsgPtr<ControlMessage>;
|
||||||
// auto ProcessDataAddressAnnouncement(std::unique_ptr<ControlMessage> ctrl) -> void;
|
auto ProcessControlMessage(CtrlMsgPtr<DataAddressAnnouncement> ctrl) -> void;
|
||||||
auto ConnectControlSocket(Context::Address address) -> void;
|
auto ConnectControlSocket(Context::Address address) -> void;
|
||||||
auto BindControlSocket(Context::Address address) -> void;
|
auto BindControlSocket(Context::Address address) -> void;
|
||||||
|
auto BindDataEndpoint() -> void;
|
||||||
|
auto ConnectDataEndpoint() -> void;
|
||||||
}; /* class Socket */
|
}; /* class Socket */
|
||||||
|
|
||||||
// helper function to clean up the object holding the data after it is transported.
|
|
||||||
void free_string(void* /*data*/, void* hint);
|
|
||||||
|
|
||||||
struct SilentSocketError : SocketError { using SocketError::SocketError; };
|
struct SilentSocketError : SocketError { using SocketError::SocketError; };
|
||||||
|
|
||||||
} /* namespace ofi */
|
} /* namespace ofi */
|
||||||
|
|
|
@ -33,7 +33,15 @@ add_testhelper(runTestDevice
|
||||||
)
|
)
|
||||||
|
|
||||||
if(BUILD_NANOMSG_TRANSPORT)
|
if(BUILD_NANOMSG_TRANSPORT)
|
||||||
set(definitions DEFINITIONS BUILD_NANOMSG_TRANSPORT)
|
list(APPEND definitions BUILD_NANOMSG_TRANSPORT)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(BUILD_OFI_TRANSPORT)
|
||||||
|
LIST(APPEND definitions BUILD_OFI_TRANSPORT)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(definitions)
|
||||||
|
set(definitions DEFINITIONS ${definitions})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(MQ_CONFIG "${CMAKE_BINARY_DIR}/test/testsuite_FairMQ.IOPatterns_config.json")
|
set(MQ_CONFIG "${CMAKE_BINARY_DIR}/test/testsuite_FairMQ.IOPatterns_config.json")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user