Deactivate control band monitor socket

This commit is contained in:
Dennis Klein 2018-10-30 10:33:46 +01:00 committed by Dennis Klein
parent ba4e6f72c9
commit 91025cbc88
4 changed files with 60 additions and 69 deletions

View File

@ -97,7 +97,7 @@ auto Context::InitOfi(ConnectionType type, Address addr) -> void
} else { } else {
fOfiInfo = tools::make_unique<asiofi::info>(addr.Ip.c_str(), std::to_string(addr.Port).c_str(), 0, hints); fOfiInfo = tools::make_unique<asiofi::info>(addr.Ip.c_str(), std::to_string(addr.Port).c_str(), 0, hints);
} }
LOG(debug) << "OFI transport: " << *fOfiInfo; // LOG(debug) << "OFI transport: " << *fOfiInfo;
fOfiFabric = tools::make_unique<asiofi::fabric>(*fOfiInfo); fOfiFabric = tools::make_unique<asiofi::fabric>(*fOfiInfo);
@ -113,6 +113,8 @@ auto Context::MakeOfiPassiveEndpoint(Address addr) -> unique_ptr<asiofi::passive
auto Context::MakeOfiConnectedEndpoint(const asiofi::info& info) -> std::unique_ptr<asiofi::connected_endpoint> auto Context::MakeOfiConnectedEndpoint(const asiofi::info& info) -> std::unique_ptr<asiofi::connected_endpoint>
{ {
assert(fOfiDomain);
return tools::make_unique<asiofi::connected_endpoint>(fIoContext, *fOfiDomain, info); return tools::make_unique<asiofi::connected_endpoint>(fIoContext, *fOfiDomain, info);
} }

View File

@ -79,14 +79,6 @@ class Context
struct ContextError : std::runtime_error { using std::runtime_error::runtime_error; }; struct ContextError : std::runtime_error { using std::runtime_error::runtime_error; };
template<typename Derived, typename Base, typename Del>
std::unique_ptr<Derived, Del>
static_unique_ptr_downcast( std::unique_ptr<Base, Del>&& p )
{
auto d = static_cast<Derived *>(p.release());
return std::unique_ptr<Derived, Del>(d, std::move(p.get_deleter()));
}
} /* namespace ofi */ } /* namespace ofi */
} /* namespace mq */ } /* namespace mq */
} /* namespace fair */ } /* namespace fair */

View File

@ -32,14 +32,9 @@ namespace ofi
using namespace std; using namespace std;
Socket::Socket(Context& context, const string& type, const string& name, const string& id /*= ""*/, FairMQTransportFactory* fac) Socket::Socket(Context& context, const string& type, const string& name, const string& id /*= ""*/)
: FairMQSocket{fac} : fControlSocket(nullptr)
, fDataEndpoint(nullptr) // , fMonitorSocket(nullptr)
, fDataCompletionQueueTx(nullptr)
, fDataCompletionQueueRx(nullptr)
, fId(id + "." + name + "." + type)
, fControlSocket(nullptr)
, fMonitorSocket(nullptr)
, fPassiveDataEndpoint(nullptr) , fPassiveDataEndpoint(nullptr)
, fDataEndpoint(nullptr) , fDataEndpoint(nullptr)
, fId(id + "." + name + "." + type) , fId(id + "." + name + "." + type)
@ -77,17 +72,17 @@ Socket::Socket(Context& context, const string& type, const string& name, const s
// if (zmq_setsockopt(fControlSocket, ZMQ_RCVTIMEO, &fRcvTimeout, sizeof(fRcvTimeout)) != 0) // if (zmq_setsockopt(fControlSocket, ZMQ_RCVTIMEO, &fRcvTimeout, sizeof(fRcvTimeout)) != 0)
// throw SocketError{tools::ToString("Failed setting ZMQ_RCVTIMEO socket option, reason: ", zmq_strerror(errno))}; // throw SocketError{tools::ToString("Failed setting ZMQ_RCVTIMEO socket option, reason: ", zmq_strerror(errno))};
fMonitorSocket = zmq_socket(fContext.GetZmqContext(), ZMQ_PAIR); // fMonitorSocket = zmq_socket(fContext.GetZmqContext(), ZMQ_PAIR);
//
if (fMonitorSocket == nullptr) // if (fMonitorSocket == nullptr)
throw SocketError{tools::ToString("Failed creating zmq monitor socket ", fId, ", reason: ", zmq_strerror(errno))}; // throw SocketError{tools::ToString("Failed creating zmq monitor socket ", fId, ", reason: ", zmq_strerror(errno))};
//
auto mon_addr = tools::ToString("inproc://", fId); // auto mon_addr = tools::ToString("inproc://", fId);
if (zmq_socket_monitor(fControlSocket, mon_addr.c_str(), ZMQ_EVENT_ACCEPTED | ZMQ_EVENT_CONNECTED) < 0) // if (zmq_socket_monitor(fControlSocket, mon_addr.c_str(), ZMQ_EVENT_ACCEPTED | ZMQ_EVENT_CONNECTED) < 0)
throw SocketError{tools::ToString("Failed setting up monitor on meta socket, reason: ", zmq_strerror(errno))}; // throw SocketError{tools::ToString("Failed setting up monitor on meta socket, reason: ", zmq_strerror(errno))};
//
if (zmq_connect(fMonitorSocket, mon_addr.c_str()) != 0) // if (zmq_connect(fMonitorSocket, mon_addr.c_str()) != 0)
throw SocketError{tools::ToString("Failed connecting monitor socket to meta socket, reason: ", zmq_strerror(errno))}; // throw SocketError{tools::ToString("Failed connecting monitor socket to meta socket, reason: ", zmq_strerror(errno))};
} }
} }
@ -98,7 +93,7 @@ try {
BindControlSocket(addr); BindControlSocket(addr);
// TODO make data port choice more robust // TODO make data port choice more robust
addr.Port += 500; addr.Port += 555;
fLocalDataAddr = addr; fLocalDataAddr = addr;
BindDataEndpoint(); BindDataEndpoint();
@ -174,6 +169,7 @@ auto Socket::ConnectDataEndpoint() -> void
fDataEndpoint = fContext.MakeOfiConnectedEndpoint(fRemoteDataAddr); fDataEndpoint = fContext.MakeOfiConnectedEndpoint(fRemoteDataAddr);
fDataEndpoint->enable(); fDataEndpoint->enable();
LOG(debug) << "OFI transport (" << fId << "): local data band address: " << Context::ConvertAddress(fDataEndpoint->get_local_address()); LOG(debug) << "OFI transport (" << fId << "): local data band address: " << Context::ConvertAddress(fDataEndpoint->get_local_address());
fDataEndpoint->connect([&]() { fDataEndpoint->connect([&]() {
LOG(debug) << "OFI transport (" << fId << "): data band connected."; LOG(debug) << "OFI transport (" << fId << "): data band connected.";
@ -283,43 +279,43 @@ auto Socket::ReceiveControlMessage() -> CtrlMsgPtr<ControlMessage>
} }
} }
auto Socket::WaitForControlPeer() -> void // auto Socket::WaitForControlPeer() -> void
{ // {
assert(fWaitingForControlPeer); // assert(fWaitingForControlPeer);
//
// First frame in message contains event number and value // First frame in message contains event number and value
zmq_msg_t msg; // zmq_msg_t msg;
zmq_msg_init(&msg); // zmq_msg_init(&msg);
if (zmq_msg_recv(&msg, fMonitorSocket, 0) == -1) // if (zmq_msg_recv(&msg, fMonitorSocket, 0) == -1)
throw SocketError(tools::ToString("Failed to get monitor event, reason: ", zmq_strerror(errno))); // throw SocketError(tools::ToString("Failed to get monitor event, reason: ", zmq_strerror(errno)));
//
uint8_t* data = (uint8_t*) zmq_msg_data(&msg); // uint8_t* data = (uint8_t*) zmq_msg_data(&msg);
uint16_t event = *(uint16_t*)(data); // uint16_t event = *(uint16_t*)(data);
int value = *(uint32_t *)(data + 2); // int value = *(uint32_t *)(data + 2);
//
// Second frame in message contains event address // Second frame in message contains event address
zmq_msg_init(&msg); // zmq_msg_init(&msg);
if (zmq_msg_recv(&msg, fMonitorSocket, 0) == -1) // if (zmq_msg_recv(&msg, fMonitorSocket, 0) == -1)
throw SocketError(tools::ToString("Failed to get monitor event, reason: ", zmq_strerror(errno))); // throw SocketError(tools::ToString("Failed to get monitor event, reason: ", zmq_strerror(errno)));
//
if (event == ZMQ_EVENT_ACCEPTED) { // if (event == ZMQ_EVENT_ACCEPTED) {
// string localAddress = string(static_cast<char*>(zmq_msg_data(&msg)), zmq_msg_size(&msg)); // string localAddress = string(static_cast<char*>(zmq_msg_data(&msg)), zmq_msg_size(&msg));
sockaddr_in remoteAddr; // sockaddr_in remoteAddr;
socklen_t addrSize = sizeof(sockaddr_in); // socklen_t addrSize = sizeof(sockaddr_in);
int ret = getpeername(value, (sockaddr*)&remoteAddr, &addrSize); // int ret = getpeername(value, (sockaddr*)&remoteAddr, &addrSize);
if (ret != 0) // if (ret != 0)
throw SocketError(tools::ToString("Failed retrieving remote address, reason: ", strerror(errno))); // throw SocketError(tools::ToString("Failed retrieving remote address, reason: ", strerror(errno)));
string remoteIp(inet_ntoa(remoteAddr.sin_addr)); // string remoteIp(inet_ntoa(remoteAddr.sin_addr));
int remotePort = ntohs(remoteAddr.sin_port); // int remotePort = ntohs(remoteAddr.sin_port);
LOG(debug) << "Accepted control peer connection from " << remoteIp << ":" << remotePort; // LOG(debug) << "Accepted control peer connection from " << remoteIp << ":" << remotePort;
} else if (event == ZMQ_EVENT_CONNECTED) { // } else if (event == ZMQ_EVENT_CONNECTED) {
LOG(debug) << "Connected successfully to control peer"; // LOG(debug) << "Connected successfully to control peer";
} else { // } else {
LOG(debug) << "Unknown monitor event received: " << event << ". Ignoring."; // LOG(debug) << "Unknown monitor event received: " << event << ". Ignoring.";
} // }
//
fWaitingForControlPeer = false; // fWaitingForControlPeer = false;
} // }
auto Socket::Send(MessagePtr& msg, const int timeout) -> int { return SendImpl(msg, 0, timeout); } auto Socket::Send(MessagePtr& msg, const int timeout) -> int { return SendImpl(msg, 0, timeout); }
auto Socket::Receive(MessagePtr& msg, const int timeout) -> int { return ReceiveImpl(msg, 0, timeout); } auto Socket::Receive(MessagePtr& msg, const int timeout) -> int { return ReceiveImpl(msg, 0, timeout); }
@ -335,6 +331,7 @@ auto Socket::SendImpl(FairMQMessagePtr& msg, const int /*flags*/, const int /*ti
try { try {
auto size = msg->GetSize(); auto size = msg->GetSize();
this_thread::sleep_for(std::chrono::seconds(10));
// Create and send control message // Create and send control message
// auto ctrl = tools::make_unique<ControlMessage>(); // auto ctrl = tools::make_unique<ControlMessage>();
// auto buf = tools::make_unique<PostBuffer>(); // auto buf = tools::make_unique<PostBuffer>();
@ -381,6 +378,7 @@ catch (const std::exception& e)
auto Socket::ReceiveImpl(FairMQMessagePtr& /*msg*/, const int /*flags*/, const int /*timeout*/) -> int auto Socket::ReceiveImpl(FairMQMessagePtr& /*msg*/, const int /*flags*/, const int /*timeout*/) -> int
try { try {
this_thread::sleep_for(std::chrono::seconds(10));
if (fWaitingForControlPeer) { if (fWaitingForControlPeer) {
WaitForControlPeer(); WaitForControlPeer();
// AnnounceDataAddress(); // AnnounceDataAddress();
@ -611,8 +609,8 @@ auto Socket::Close() -> void
if (zmq_close(fControlSocket) != 0) if (zmq_close(fControlSocket) != 0)
throw SocketError(tools::ToString("Failed closing zmq meta socket, reason: ", zmq_strerror(errno))); throw SocketError(tools::ToString("Failed closing zmq meta socket, reason: ", zmq_strerror(errno)));
if (zmq_close(fMonitorSocket) != 0) // if (zmq_close(fMonitorSocket) != 0)
throw SocketError(tools::ToString("Failed closing zmq monitor socket, reason: ", zmq_strerror(errno))); // throw SocketError(tools::ToString("Failed closing zmq monitor socket, reason: ", zmq_strerror(errno)));
} }
auto Socket::SetOption(const string& option, const void* value, size_t valueSize) -> void auto Socket::SetOption(const string& option, const void* value, size_t valueSize) -> void

View File

@ -19,7 +19,6 @@
#include <boost/container/pmr/unsynchronized_pool_resource.hpp> #include <boost/container/pmr/unsynchronized_pool_resource.hpp>
#include <memory> // unique_ptr #include <memory> // unique_ptr
#include <netinet/in.h> #include <netinet/in.h>
#include <rdma/fabric.h>
class FairMQTransportFactory; class FairMQTransportFactory;
namespace fair namespace fair
@ -86,7 +85,7 @@ class Socket final : public fair::mq::Socket
private: private:
void* fControlSocket; void* fControlSocket;
void* fMonitorSocket; // void* fMonitorSocket;
std::unique_ptr<asiofi::passive_endpoint> fPassiveDataEndpoint; std::unique_ptr<asiofi::passive_endpoint> fPassiveDataEndpoint;
std::unique_ptr<asiofi::connected_endpoint> fDataEndpoint; std::unique_ptr<asiofi::connected_endpoint> fDataEndpoint;
std::string fId; std::string fId;
@ -97,7 +96,7 @@ class Socket final : public fair::mq::Socket
Context& fContext; Context& fContext;
Context::Address fRemoteDataAddr; Context::Address fRemoteDataAddr;
Context::Address fLocalDataAddr; Context::Address fLocalDataAddr;
bool fWaitingForControlPeer; // bool fWaitingForControlPeer;
boost::asio::io_service::strand fIoStrand; boost::asio::io_service::strand fIoStrand;
boost::container::pmr::unsynchronized_pool_resource fCtrlMemPool; boost::container::pmr::unsynchronized_pool_resource fCtrlMemPool;
@ -109,7 +108,7 @@ class Socket final : public fair::mq::Socket
auto SendImpl(std::vector<MessagePtr>& msgVec, const int flags, const int timeout) -> int64_t; auto SendImpl(std::vector<MessagePtr>& msgVec, const int flags, const int timeout) -> int64_t;
auto ReceiveImpl(std::vector<MessagePtr>& msgVec, const int flags, const int timeout) -> int64_t; auto ReceiveImpl(std::vector<MessagePtr>& msgVec, const int flags, const int timeout) -> int64_t;
auto WaitForControlPeer() -> void; // auto WaitForControlPeer() -> void;
auto AnnounceDataAddress() -> void; auto AnnounceDataAddress() -> void;
auto SendControlMessage(CtrlMsgPtr<ControlMessage> ctrl) -> void; auto SendControlMessage(CtrlMsgPtr<ControlMessage> ctrl) -> void;
auto ReceiveControlMessage() -> CtrlMsgPtr<ControlMessage>; auto ReceiveControlMessage() -> CtrlMsgPtr<ControlMessage>;