shm: allow monitor::ResetContent to cleanup after a crash

This commit is contained in:
Alexey Rybalchenko 2022-02-01 18:55:09 +01:00
parent ddc7c834db
commit b264727179
7 changed files with 108 additions and 37 deletions

View File

@ -133,6 +133,7 @@ struct RegionConfig
bool removeOnDestruction = true; /// remove the region on object destruction
int creationFlags = 0; /// flags passed to the underlying transport on region creation
int64_t userFlags = 0; /// custom flags that have no effect on the transport, but can be retrieved from the region by the user
uint64_t size = 0; /// region size
std::string path = ""; /// file path, if the region is backed by a file
std::optional<uint16_t> id = std::nullopt; /// region id
uint32_t linger = 100; /// delay in ms before region destruction to collect outstanding events

View File

@ -28,6 +28,8 @@
namespace fair::mq::shmem
{
static constexpr uint64_t kManagementSegmentSize = 6553600;
struct SharedMemoryError : std::runtime_error { using std::runtime_error::runtime_error; };
using SimpleSeqFitSegment = boost::interprocess::basic_managed_shared_memory<char,

View File

@ -132,7 +132,7 @@ class Manager
: fShmId64(config ? config->GetProperty<uint64_t>("shmid", makeShmIdUint64(sessionName)) : makeShmIdUint64(sessionName))
, fShmId(makeShmIdStr(fShmId64))
, fSegmentId(config ? config->GetProperty<uint16_t>("shm-segment-id", 0) : 0)
, fManagementSegment(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_mng").c_str(), 6553600)
, fManagementSegment(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_mng").c_str(), kManagementSegmentSize)
, fShmVoidAlloc(fManagementSegment.get_segment_manager())
, fShmMtx(fManagementSegment.find_or_construct<boost::interprocess::interprocess_mutex>(boost::interprocess::unique_instance)())
, fNumObservedEvents(0)

View File

@ -6,9 +6,10 @@
* copied verbatim in the file "LICENSE" *
********************************************************************************/
#include "Monitor.h"
#include "Common.h"
#include "UnmanagedRegion.h"
#include "Monitor.h"
#include "Segment.h"
#include <fairmq/shmem/UnmanagedRegion.h>
#include <fairmq/tools/IO.h>
#include <fairmq/tools/Strings.h>
@ -415,6 +416,7 @@ void Monitor::PrintDebugInfo(const ShmId& shmId __attribute__((unused)))
size_t numMessages = 0;
if (debug) {
for (const auto& e : *debug) {
numMessages += e.second.size();
}
@ -434,6 +436,9 @@ void Monitor::PrintDebugInfo(const ShmId& shmId __attribute__((unused)))
<< ", at: " << setw(2) << tm->tm_hour << ":" << setw(2) << tm->tm_min << ":" << setw(2) << tm->tm_sec << "." << setw(6) << ms;
}
}
} else {
LOG(info) << "no debug data found";
}
} catch (bie&) {
LOG(info) << "no segments found";
}
@ -463,12 +468,17 @@ unordered_map<uint16_t, std::vector<BufferDebugInfo>> Monitor::GetDebugInfo(cons
result.reserve(debug->size());
if (debug) {
for (const auto& s : *debug) {
result[s.first].reserve(s.second.size());
for (const auto& e : s.second) {
result[s.first][e.first] = BufferDebugInfo(e.first, e.second.fPid, e.second.fSize, e.second.fCreationTime);
}
}
} else {
LOG(info) << "no debug data found";
}
} catch (bie&) {
LOG(info) << "no segments found";
}
@ -701,6 +711,43 @@ void Monitor::ResetContent(const SessionId& sessionId, bool verbose /* = true */
ResetContent(shmId, verbose);
}
void Monitor::ResetContent(const ShmId& shmIdT, const std::vector<SegmentConfig>& segmentCfgs, const std::vector<RegionConfig>& regionCfgs, bool verbose /* = true */)
{
using namespace boost::interprocess;
std::string shmId = shmIdT.shmId;
std::string managementSegmentName("fmq_" + shmId + "_mng");
// reset managed segments
ResetContent(shmIdT, verbose);
// delete management segment
Remove<bipc::shared_memory_object>(managementSegmentName, verbose);
// recreate management segment
managed_shared_memory mngSegment(create_only, managementSegmentName.c_str(), kManagementSegmentSize);
// fill management segment with segment & region infos
for (const auto& s : segmentCfgs) {
if (s.allocationAlgorithm == "rbtree_best_fit") {
Segment::Register(shmId, s.id, AllocationAlgorithm::rbtree_best_fit);
} else if (s.allocationAlgorithm == "simple_seq_fit") {
Segment::Register(shmId, s.id, AllocationAlgorithm::simple_seq_fit);
} else {
LOG(error) << "Unknown allocation algorithm provided: " << s.allocationAlgorithm;
throw MonitorError("Unknown allocation algorithm provided: " + s.allocationAlgorithm);
}
}
for (const auto& r : regionCfgs) {
fair::mq::shmem::UnmanagedRegion::Register(shmId, r);
}
}
void Monitor::ResetContent(const SessionId& sessionId, const std::vector<SegmentConfig>& segmentCfgs, const std::vector<RegionConfig>& regionCfgs, bool verbose /* = true */)
{
ShmId shmId{makeShmIdStr(sessionId.sessionId)};
if (verbose) {
cout << "ResetContent called with session id '" << sessionId.sessionId << "', translating to shared memory id '" << shmId.shmId << "'" << endl;
}
ResetContent(shmId, segmentCfgs, regionCfgs, verbose);
}
Monitor::~Monitor()
{
if (fSignalThread.joinable()) {

View File

@ -8,6 +8,8 @@
#ifndef FAIR_MQ_SHMEM_MONITOR_H_
#define FAIR_MQ_SHMEM_MONITOR_H_
#include <fairmq/UnmanagedRegion.h>
#include <fairlogger/Logger.h>
#include <thread>
@ -49,6 +51,13 @@ struct BufferDebugInfo
uint64_t fCreationTime;
};
struct SegmentConfig
{
uint16_t id;
uint64_t size;
std::string allocationAlgorithm;
};
class Monitor
{
public:
@ -88,6 +97,14 @@ class Monitor
/// @param sessionId session id
/// Only call this when segment is not in use
static void ResetContent(const SessionId& sessionId, bool verbose = true);
/// @brief [EXPERIMENTAL] cleanup the content of the shem segment, without recreating it
/// @param shmId shared memory id
/// Only call this when segment is not in use
static void ResetContent(const ShmId& shmId, const std::vector<SegmentConfig>& segmentCfgs, const std::vector<RegionConfig>& regionCfgs, bool verbose = true);
/// @brief [EXPERIMENTAL] cleanup the content of the shem segment, without recreating it
/// @param sessionId session id
/// Only call this when segment is not in use
static void ResetContent(const SessionId& sessionId, const std::vector<SegmentConfig>& segmentCfgs, const std::vector<RegionConfig>& regionCfgs, bool verbose = true);
/// @brief Outputs list of messages in shmem (if compiled with FAIRMQ_DEBUG_MODE=ON)
/// @param shmId shmem id

View File

@ -26,6 +26,8 @@ static const RBTreeBestFit rbTreeBestFit = RBTreeBestFit();
struct Segment
{
friend class Monitor;
Segment(const std::string& shmId, uint16_t id, size_t size, SimpleSeqFit)
: fSegment(SimpleSeqFitSegment(boost::interprocess::open_or_create,
std::string("fmq_" + shmId + "_m_" + std::to_string(id)).c_str(),
@ -66,15 +68,12 @@ struct Segment
static void Register(const std::string& shmId, uint16_t id, AllocationAlgorithm allocAlgo)
{
using namespace boost::interprocess;
managed_shared_memory mngSegment(open_or_create, std::string("fmq_" + shmId + "_mng").c_str(), 6553600);
managed_shared_memory mngSegment(open_or_create, std::string("fmq_" + shmId + "_mng").c_str(), kManagementSegmentSize);
VoidAlloc alloc(mngSegment.get_segment_manager());
Uint16SegmentInfoHashMap* shmSegments = mngSegment.find_or_construct<Uint16SegmentInfoHashMap>(unique_instance)(alloc);
EventCounter* eventCounter = mngSegment.find<EventCounter>(unique_instance).first;
if (!eventCounter) {
eventCounter = mngSegment.construct<EventCounter>(unique_instance)(0);
}
EventCounter* eventCounter = mngSegment.find_or_construct<EventCounter>(unique_instance)(0);
bool newSegmentRegistered = shmSegments->emplace(id, allocAlgo).second;
if (newSegmentRegistered) {

View File

@ -41,6 +41,7 @@ struct UnmanagedRegion
{
friend class Message;
friend class Manager;
friend class Monitor;
UnmanagedRegion(const std::string& shmId, uint16_t id, uint64_t size)
: UnmanagedRegion(shmId, size, false, makeRegionConfig(id))
@ -50,6 +51,10 @@ struct UnmanagedRegion
: UnmanagedRegion(shmId, size, false, std::move(cfg))
{}
UnmanagedRegion(const std::string& shmId, RegionConfig cfg)
: UnmanagedRegion(shmId, cfg.size, false, std::move(cfg))
{}
UnmanagedRegion(const std::string& shmId, uint64_t size, bool remote, RegionConfig cfg)
: fRemote(remote)
, fRemoveOnDestruction(cfg.removeOnDestruction)
@ -66,6 +71,9 @@ struct UnmanagedRegion
{
using namespace boost::interprocess;
// TODO: refactor this
cfg.size = size;
if (!cfg.path.empty()) {
fName = std::string(cfg.path + fName);
@ -119,7 +127,7 @@ struct UnmanagedRegion
}
if (!remote) {
Register(shmId, cfg, size);
Register(shmId, cfg);
}
LOG(trace) << "shmem: initialized region: " << fName << " (" << (remote ? "remote" : "local") << ")";
@ -223,20 +231,17 @@ struct UnmanagedRegion
return regionCfg;
}
static void Register(const std::string& shmId, RegionConfig& cfg, uint64_t size)
static void Register(const std::string& shmId, const RegionConfig& cfg)
{
using namespace boost::interprocess;
managed_shared_memory mngSegment(open_or_create, std::string("fmq_" + shmId + "_mng").c_str(), 6553600);
managed_shared_memory mngSegment(open_or_create, std::string("fmq_" + shmId + "_mng").c_str(), kManagementSegmentSize);
VoidAlloc alloc(mngSegment.get_segment_manager());
Uint16RegionInfoHashMap* shmRegions = mngSegment.find_or_construct<Uint16RegionInfoHashMap>(unique_instance)(alloc);
EventCounter* eventCounter = mngSegment.find<EventCounter>(unique_instance).first;
if (!eventCounter) {
eventCounter = mngSegment.construct<EventCounter>(unique_instance)(0);
}
EventCounter* eventCounter = mngSegment.find_or_construct<EventCounter>(unique_instance)(0);
bool newShmRegionCreated = shmRegions->emplace(cfg.id.value(), RegionInfo(cfg.path.c_str(), cfg.creationFlags, cfg.userFlags, size, alloc)).second;
bool newShmRegionCreated = shmRegions->emplace(cfg.id.value(), RegionInfo(cfg.path.c_str(), cfg.creationFlags, cfg.userFlags, cfg.size, alloc)).second;
if (newShmRegionCreated) {
(eventCounter->fCount)++;
}