/******************************************************************************** * Copyright (C) 2016-2017 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH * * * * This software is distributed under the terms of the * * GNU Lesser General Public Licence (LGPL) version 3, * * copied verbatim in the file "LICENSE" * ********************************************************************************/ #include "FairMQLogger.h" #include "FairMQShmManager.h" #include "FairMQTransportFactorySHM.h" #include #include #include #include #include #include #include #include #include #include // std::system using namespace std; using namespace fair::mq::shmem; namespace bipc = boost::interprocess; namespace bfs = boost::filesystem; namespace bpt = boost::posix_time; FairMQ::Transport FairMQTransportFactorySHM::fTransportType = FairMQ::Transport::SHM; FairMQTransportFactorySHM::FairMQTransportFactorySHM(const string& id, const FairMQProgOptions* config) : FairMQTransportFactory(id) , fContext(nullptr) , fHeartbeatSocket(nullptr) , fHeartbeatThread() , fSendHeartbeats(true) , fShMutex(bipc::open_or_create, "fairmq_shmem_mutex") , fDeviceCounter(nullptr) { int major, minor, patch; zmq_version(&major, &minor, &patch); LOG(DEBUG) << "Transport: Using ZeroMQ (" << major << "." << minor << "." << patch << ") & " << "boost::interprocess (" << (BOOST_VERSION / 100000) << "." << (BOOST_VERSION / 100 % 1000) << "." << (BOOST_VERSION % 100) << ")"; fContext = zmq_ctx_new(); if (!fContext) { LOG(ERROR) << "failed creating context, reason: " << zmq_strerror(errno); exit(EXIT_FAILURE); } int numIoThreads = 1; size_t segmentSize = 2000000000; string segmentName = "fairmq_shmem_main"; if (config) { numIoThreads = config->GetValue("io-threads"); segmentSize = config->GetValue("shm-segment-size"); segmentName = config->GetValue("shm-segment-name"); } else { LOG(WARN) << "shmem: FairMQProgOptions not available! Using defaults."; } if (zmq_ctx_set(fContext, ZMQ_IO_THREADS, numIoThreads) != 0) { LOG(ERROR) << "shmem: failed configuring context, reason: " << zmq_strerror(errno); } // Set the maximum number of allowed sockets on the context. if (zmq_ctx_set(fContext, ZMQ_MAX_SOCKETS, 10000) != 0) { LOG(ERROR) << "shmem: failed configuring context, reason: " << zmq_strerror(errno); } Manager::Instance().InitializeSegment("open_or_create", segmentName, segmentSize); LOG(DEBUG) << "shmem: created/opened shared memory segment of " << segmentSize << " bytes. Available are " << Manager::Instance().Segment()->get_free_memory() << " bytes."; { bipc::scoped_lock lock(fShMutex); fDeviceCounter = Manager::Instance().Segment()->find(bipc::unique_instance).first; if (fDeviceCounter) { LOG(DEBUG) << "shmem: device counter found, with value of " << fDeviceCounter->fCount << ". incrementing."; (fDeviceCounter->fCount)++; LOG(DEBUG) << "shmem: incremented device counter, now: " << fDeviceCounter->fCount; } else { LOG(DEBUG) << "shmem: no device counter found, creating one and initializing with 1"; fDeviceCounter = Manager::Instance().Segment()->construct(bipc::unique_instance)(1); LOG(DEBUG) << "shmem: initialized device counter with: " << fDeviceCounter->fCount; } // start shm monitor // try // { // MonitorStatus* monitorStatus = fManagementSegment.find(bipc::unique_instance).first; // if (monitorStatus == nullptr) // { // LOG(DEBUG) << "shmem: no shmmonitor found, starting..."; // StartMonitor(); // } // else // { // LOG(DEBUG) << "shmem: found shmmonitor in fairmq_shmem_management."; // } // } // catch (std::exception& e) // { // LOG(ERROR) << "shmem: Exception during shmmonitor initialization: " << e.what() << ", application will now exit"; // exit(EXIT_FAILURE); // } } fSendHeartbeats = true; fHeartbeatThread = thread(&FairMQTransportFactorySHM::SendHeartbeats, this); } void FairMQTransportFactorySHM::StartMonitor() { int numTries = 0; if (!bfs::exists(bfs::path("shmmonitor"))) { LOG(ERROR) << "Could not find shmmonitor. Is it in the PATH? Monitor not started"; return; } // TODO: replace with Boost.Process once boost 1.64 is available int r = system("shmmonitor --self-destruct &"); LOG(DEBUG) << r; do { MonitorStatus* monitorStatus = Manager::Instance().ManagementSegment().find(bipc::unique_instance).first; if (monitorStatus) { LOG(DEBUG) << "shmem: shmmonitor started"; break; } else { this_thread::sleep_for(std::chrono::milliseconds(10)); if (++numTries > 100) { LOG(ERROR) << "Did not get response from shmmonitor after " << 10 * 100 << " milliseconds. Exiting."; exit(EXIT_FAILURE); } } } while (true); } void FairMQTransportFactorySHM::SendHeartbeats() { while (fSendHeartbeats) { try { bipc::message_queue mq(bipc::open_only, "fairmq_shmem_control_queue"); bool heartbeat = true; bpt::ptime sndTill = bpt::microsec_clock::universal_time() + bpt::milliseconds(100); if (mq.timed_send(&heartbeat, sizeof(heartbeat), 0, sndTill)) { this_thread::sleep_for(chrono::milliseconds(100)); } else { LOG(DEBUG) << "control queue timeout"; } } catch (bipc::interprocess_exception& ie) { this_thread::sleep_for(chrono::milliseconds(500)); // LOG(WARN) << "no fairmq_shmem_control_queue found"; } } } FairMQMessagePtr FairMQTransportFactorySHM::CreateMessage() const { return unique_ptr(new FairMQMessageSHM()); } FairMQMessagePtr FairMQTransportFactorySHM::CreateMessage(const size_t size) const { return unique_ptr(new FairMQMessageSHM(size)); } FairMQMessagePtr FairMQTransportFactorySHM::CreateMessage(void* data, const size_t size, fairmq_free_fn* ffn, void* hint) const { return unique_ptr(new FairMQMessageSHM(data, size, ffn, hint)); } FairMQMessagePtr FairMQTransportFactorySHM::CreateMessage(FairMQRegionPtr& region, void* data, const size_t size) const { return unique_ptr(new FairMQMessageSHM(region, data, size)); } FairMQSocketPtr FairMQTransportFactorySHM::CreateSocket(const string& type, const string& name) const { assert(fContext); return unique_ptr(new FairMQSocketSHM(type, name, GetId(), fContext)); } FairMQPollerPtr FairMQTransportFactorySHM::CreatePoller(const vector& channels) const { return unique_ptr(new FairMQPollerSHM(channels)); } FairMQPollerPtr FairMQTransportFactorySHM::CreatePoller(const std::vector& channels) const { return unique_ptr(new FairMQPollerSHM(channels)); } FairMQPollerPtr FairMQTransportFactorySHM::CreatePoller(const unordered_map>& channelsMap, const vector& channelList) const { return unique_ptr(new FairMQPollerSHM(channelsMap, channelList)); } FairMQPollerPtr FairMQTransportFactorySHM::CreatePoller(const FairMQSocket& cmdSocket, const FairMQSocket& dataSocket) const { return unique_ptr(new FairMQPollerSHM(cmdSocket, dataSocket)); } FairMQRegionPtr FairMQTransportFactorySHM::CreateRegion(const size_t size) const { return unique_ptr(new FairMQRegionSHM(size)); } FairMQTransportFactorySHM::~FairMQTransportFactorySHM() { fSendHeartbeats = false; fHeartbeatThread.join(); if (fContext) { if (zmq_ctx_term(fContext) != 0) { if (errno == EINTR) { LOG(ERROR) << "shmem: failed closing context, reason: " << zmq_strerror(errno); } else { fContext = nullptr; return; } } } else { LOG(ERROR) << "shmem: Terminate(): context not available for shutdown"; } { // mutex scope bipc::scoped_lock lock(fShMutex); (fDeviceCounter->fCount)--; if (fDeviceCounter->fCount == 0) { LOG(DEBUG) << "shmem: last 'fairmq_shmem_main' user, removing segment."; Manager::Instance().Remove(); } else { LOG(DEBUG) << "shmem: other 'fairmq_shmem_main' users present (" << fDeviceCounter->fCount << "), not removing it."; } } } FairMQ::Transport FairMQTransportFactorySHM::GetType() const { return fTransportType; }