mirror of
https://github.com/FairRootGroup/FairMQ.git
synced 2025-10-13 16:46:47 +00:00
Shm: Provide segment/msg debug infos
This commit is contained in:
parent
fe9b87e4e2
commit
70a583d08d
|
@ -12,13 +12,15 @@
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <functional> // std::equal_to
|
||||||
|
|
||||||
#include <boost/interprocess/managed_shared_memory.hpp>
|
#include <boost/functional/hash.hpp>
|
||||||
#include <boost/interprocess/allocators/allocator.hpp>
|
#include <boost/interprocess/allocators/allocator.hpp>
|
||||||
#include <boost/interprocess/containers/map.hpp>
|
#include <boost/interprocess/containers/map.hpp>
|
||||||
#include <boost/interprocess/containers/string.hpp>
|
#include <boost/interprocess/containers/string.hpp>
|
||||||
#include <boost/interprocess/containers/vector.hpp>
|
#include <boost/interprocess/containers/vector.hpp>
|
||||||
#include <boost/functional/hash.hpp>
|
#include <boost/interprocess/managed_shared_memory.hpp>
|
||||||
|
#include <boost/unordered_map.hpp>
|
||||||
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
@ -71,6 +73,15 @@ struct DeviceCounter
|
||||||
std::atomic<unsigned int> fCount;
|
std::atomic<unsigned int> fCount;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct MsgCounter
|
||||||
|
{
|
||||||
|
MsgCounter(unsigned int c)
|
||||||
|
: fCount(c)
|
||||||
|
{}
|
||||||
|
|
||||||
|
std::atomic<unsigned int> fCount;
|
||||||
|
};
|
||||||
|
|
||||||
struct RegionCounter
|
struct RegionCounter
|
||||||
{
|
{
|
||||||
RegionCounter(uint64_t c)
|
RegionCounter(uint64_t c)
|
||||||
|
@ -88,6 +99,23 @@ struct MetaHeader
|
||||||
boost::interprocess::managed_shared_memory::handle_t fHandle;
|
boost::interprocess::managed_shared_memory::handle_t fHandle;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct MsgDebug
|
||||||
|
{
|
||||||
|
MsgDebug(pid_t pid, size_t size, const uint64_t creationTime)
|
||||||
|
: fPid(pid)
|
||||||
|
, fSize(size)
|
||||||
|
, fCreationTime(creationTime)
|
||||||
|
{}
|
||||||
|
|
||||||
|
pid_t fPid;
|
||||||
|
size_t fSize;
|
||||||
|
uint64_t fCreationTime;
|
||||||
|
};
|
||||||
|
|
||||||
|
using Uint64MsgDebugPairAlloc = boost::interprocess::allocator<std::pair<const size_t, MsgDebug>, SegmentManager>;
|
||||||
|
using Uint64MsgDebugHashMap = boost::unordered_map<size_t, MsgDebug, boost::hash<size_t>, std::equal_to<size_t>, Uint64MsgDebugPairAlloc>;
|
||||||
|
using Uint64MsgDebugMap = boost::interprocess::map<size_t, MsgDebug, std::less<size_t>, Uint64MsgDebugPairAlloc>;
|
||||||
|
|
||||||
struct RegionBlock
|
struct RegionBlock
|
||||||
{
|
{
|
||||||
RegionBlock()
|
RegionBlock()
|
||||||
|
|
|
@ -71,7 +71,7 @@ class Manager
|
||||||
: fShmId(std::move(shmId))
|
: fShmId(std::move(shmId))
|
||||||
, fDeviceId(std::move(deviceId))
|
, fDeviceId(std::move(deviceId))
|
||||||
, fSegment(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_main").c_str(), size)
|
, fSegment(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_main").c_str(), size)
|
||||||
, fManagementSegment(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_mng").c_str(), 655360)
|
, fManagementSegment(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_mng").c_str(), 6553600)
|
||||||
, fShmVoidAlloc(fManagementSegment.get_segment_manager())
|
, fShmVoidAlloc(fManagementSegment.get_segment_manager())
|
||||||
, fShmMtx(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_mtx").c_str())
|
, fShmMtx(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_mtx").c_str())
|
||||||
, fRegionEventsCV(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_cv").c_str())
|
, fRegionEventsCV(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_cv").c_str())
|
||||||
|
@ -80,6 +80,8 @@ class Manager
|
||||||
, fRegionInfos(nullptr)
|
, fRegionInfos(nullptr)
|
||||||
, fInterrupted(false)
|
, fInterrupted(false)
|
||||||
, fMsgCounter(0)
|
, fMsgCounter(0)
|
||||||
|
, fMsgDebug(nullptr)
|
||||||
|
, fShmMsgCounter(nullptr)
|
||||||
, fHeartbeatThread()
|
, fHeartbeatThread()
|
||||||
, fSendHeartbeats(true)
|
, fSendHeartbeats(true)
|
||||||
, fThrowOnBadAlloc(true)
|
, fThrowOnBadAlloc(true)
|
||||||
|
@ -117,6 +119,7 @@ class Manager
|
||||||
}
|
}
|
||||||
|
|
||||||
fRegionInfos = fManagementSegment.find_or_construct<Uint64RegionInfoMap>(unique_instance)(fShmVoidAlloc);
|
fRegionInfos = fManagementSegment.find_or_construct<Uint64RegionInfoMap>(unique_instance)(fShmVoidAlloc);
|
||||||
|
fMsgDebug = fManagementSegment.find_or_construct<Uint64MsgDebugMap>(unique_instance)(fShmVoidAlloc);
|
||||||
// store info about the managed segment as region with id 0
|
// store info about the managed segment as region with id 0
|
||||||
fRegionInfos->emplace(0, RegionInfo("", 0, 0, fShmVoidAlloc));
|
fRegionInfos->emplace(0, RegionInfo("", 0, 0, fShmVoidAlloc));
|
||||||
|
|
||||||
|
@ -134,6 +137,16 @@ class Manager
|
||||||
LOG(debug) << "initialized device counter with: " << fDeviceCounter->fCount;
|
LOG(debug) << "initialized device counter with: " << fDeviceCounter->fCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fShmMsgCounter = fManagementSegment.find<MsgCounter>(unique_instance).first;
|
||||||
|
|
||||||
|
if (fShmMsgCounter) {
|
||||||
|
LOG(debug) << "message counter found, with value of " << fShmMsgCounter->fCount << ".";
|
||||||
|
} else {
|
||||||
|
LOG(debug) << "no message counter found, creating one and initializing with 0";
|
||||||
|
fShmMsgCounter = fManagementSegment.construct<MsgCounter>(unique_instance)(0);
|
||||||
|
LOG(debug) << "initialized message counter with: " << fShmMsgCounter->fCount;
|
||||||
|
}
|
||||||
|
|
||||||
fHeartbeatThread = std::thread(&Manager::SendHeartbeats, this);
|
fHeartbeatThread = std::thread(&Manager::SendHeartbeats, this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -394,6 +407,21 @@ class Manager
|
||||||
void IncrementMsgCounter() { fMsgCounter.fetch_add(1, std::memory_order_relaxed); }
|
void IncrementMsgCounter() { fMsgCounter.fetch_add(1, std::memory_order_relaxed); }
|
||||||
void DecrementMsgCounter() { fMsgCounter.fetch_sub(1, std::memory_order_relaxed); }
|
void DecrementMsgCounter() { fMsgCounter.fetch_sub(1, std::memory_order_relaxed); }
|
||||||
|
|
||||||
|
void IncrementShmMsgCounter() { ++(fShmMsgCounter->fCount); }
|
||||||
|
void DecrementShmMsgCounter() { --(fShmMsgCounter->fCount); }
|
||||||
|
|
||||||
|
void AddMsgDebug(pid_t pid, size_t size, size_t handle, uint64_t time)
|
||||||
|
{
|
||||||
|
fMsgDebug->emplace(handle, MsgDebug(pid, size, time));
|
||||||
|
}
|
||||||
|
|
||||||
|
void RemoveMsgDebug(size_t handle)
|
||||||
|
{
|
||||||
|
fMsgDebug->erase(handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
boost::interprocess::named_mutex& GetMtx() { return fShmMtx; }
|
||||||
|
|
||||||
void SendHeartbeats()
|
void SendHeartbeats()
|
||||||
{
|
{
|
||||||
std::string controlQueueName("fmq_" + fShmId + "_cq");
|
std::string controlQueueName("fmq_" + fShmId + "_cq");
|
||||||
|
@ -473,6 +501,8 @@ class Manager
|
||||||
|
|
||||||
std::atomic<bool> fInterrupted;
|
std::atomic<bool> fInterrupted;
|
||||||
std::atomic<int32_t> fMsgCounter; // TODO: find a better lifetime solution instead of the counter
|
std::atomic<int32_t> fMsgCounter; // TODO: find a better lifetime solution instead of the counter
|
||||||
|
Uint64MsgDebugMap* fMsgDebug;
|
||||||
|
MsgCounter* fShmMsgCounter;
|
||||||
|
|
||||||
std::thread fHeartbeatThread;
|
std::thread fHeartbeatThread;
|
||||||
bool fSendHeartbeats;
|
bool fSendHeartbeats;
|
||||||
|
|
|
@ -22,6 +22,9 @@
|
||||||
#include <cstddef> // size_t
|
#include <cstddef> // size_t
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
|
||||||
|
#include <sys/types.h> // getpid
|
||||||
|
#include <unistd.h> // pid_t
|
||||||
|
|
||||||
namespace fair
|
namespace fair
|
||||||
{
|
{
|
||||||
namespace mq
|
namespace mq
|
||||||
|
@ -274,6 +277,9 @@ class Message final : public fair::mq::Message
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fMeta.fHandle = fManager.Segment().get_handle_from_address(fLocalPtr);
|
fMeta.fHandle = fManager.Segment().get_handle_from_address(fLocalPtr);
|
||||||
|
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fManager.GetMtx());
|
||||||
|
fManager.IncrementShmMsgCounter();
|
||||||
|
fManager.AddMsgDebug(getpid(), size, static_cast<size_t>(fMeta.fHandle), std::chrono::system_clock::now().time_since_epoch().count());
|
||||||
}
|
}
|
||||||
|
|
||||||
fMeta.fSize = size;
|
fMeta.fSize = size;
|
||||||
|
@ -285,6 +291,9 @@ class Message final : public fair::mq::Message
|
||||||
if (fMeta.fHandle >= 0 && !fQueued) {
|
if (fMeta.fHandle >= 0 && !fQueued) {
|
||||||
if (fMeta.fRegionId == 0) {
|
if (fMeta.fRegionId == 0) {
|
||||||
fManager.Segment().deallocate(fManager.Segment().get_address_from_handle(fMeta.fHandle));
|
fManager.Segment().deallocate(fManager.Segment().get_address_from_handle(fMeta.fHandle));
|
||||||
|
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fManager.GetMtx());
|
||||||
|
fManager.DecrementShmMsgCounter();
|
||||||
|
fManager.RemoveMsgDebug(fMeta.fHandle);
|
||||||
fMeta.fHandle = -1;
|
fMeta.fHandle = -1;
|
||||||
} else {
|
} else {
|
||||||
if (!fRegionPtr) {
|
if (!fRegionPtr) {
|
||||||
|
|
|
@ -225,6 +225,9 @@ void Monitor::Interactive()
|
||||||
case '\n':
|
case '\n':
|
||||||
cout << "\n[\\n] --> invalid input." << endl;
|
cout << "\n[\\n] --> invalid input." << endl;
|
||||||
break;
|
break;
|
||||||
|
case 'b':
|
||||||
|
PrintDebug(ShmId{fShmId});
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
cout << "\n[" << c << "] --> invalid input." << endl;
|
cout << "\n[" << c << "] --> invalid input." << endl;
|
||||||
break;
|
break;
|
||||||
|
@ -284,12 +287,17 @@ void Monitor::CheckSegment()
|
||||||
fSeenOnce = true;
|
fSeenOnce = true;
|
||||||
|
|
||||||
unsigned int numDevices = 0;
|
unsigned int numDevices = 0;
|
||||||
|
unsigned int numMessages = 0;
|
||||||
|
|
||||||
if (fInteractive || fViewOnly) {
|
if (fInteractive || fViewOnly) {
|
||||||
DeviceCounter* dc = managementSegment.find<DeviceCounter>(bipc::unique_instance).first;
|
DeviceCounter* dc = managementSegment.find<DeviceCounter>(bipc::unique_instance).first;
|
||||||
if (dc) {
|
if (dc) {
|
||||||
numDevices = dc->fCount;
|
numDevices = dc->fCount;
|
||||||
}
|
}
|
||||||
|
MsgCounter* mc = managementSegment.find<MsgCounter>(bipc::unique_instance).first;
|
||||||
|
if (mc) {
|
||||||
|
numMessages = mc->fCount;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto now = chrono::high_resolution_clock::now();
|
auto now = chrono::high_resolution_clock::now();
|
||||||
|
@ -311,17 +319,27 @@ void Monitor::CheckSegment()
|
||||||
<< setw(10) << segment.get_size() << " | "
|
<< setw(10) << segment.get_size() << " | "
|
||||||
<< setw(10) << segment.get_free_memory() << " | "
|
<< setw(10) << segment.get_free_memory() << " | "
|
||||||
<< setw(8) << numDevices << " | "
|
<< setw(8) << numDevices << " | "
|
||||||
|
<< setw(8) << numMessages << " | "
|
||||||
<< setw(10) << (fViewOnly ? "view only" : to_string(duration)) << " |"
|
<< setw(10) << (fViewOnly ? "view only" : to_string(duration)) << " |"
|
||||||
<< c << flush;
|
<< c << flush;
|
||||||
} else if (fViewOnly) {
|
} else if (fViewOnly) {
|
||||||
size_t free = segment.get_free_memory();
|
size_t free = segment.get_free_memory();
|
||||||
size_t total = segment.get_size();
|
size_t total = segment.get_size();
|
||||||
size_t used = total - free;
|
size_t used = total - free;
|
||||||
|
// size_t mfree = managementSegment.get_free_memory();
|
||||||
|
// size_t mtotal = managementSegment.get_size();
|
||||||
|
// size_t mused = mtotal - mfree;
|
||||||
LOGV(info, user1) << "[" << fSegmentName
|
LOGV(info, user1) << "[" << fSegmentName
|
||||||
<< "] devices: " << numDevices
|
<< "] devices: " << numDevices
|
||||||
<< ", total: " << total
|
<< ", total: " << total
|
||||||
|
<< ", msgs: " << numMessages
|
||||||
<< ", free: " << free
|
<< ", free: " << free
|
||||||
<< ", used: " << used;
|
<< ", used: " << used;
|
||||||
|
// << "\n "
|
||||||
|
// << "[" << fManagementSegmentName
|
||||||
|
// << "] total: " << mtotal
|
||||||
|
// << ", free: " << mfree
|
||||||
|
// << ", used: " << mused;
|
||||||
}
|
}
|
||||||
} catch (bie&) {
|
} catch (bie&) {
|
||||||
fHeartbeatTriggered = false;
|
fHeartbeatTriggered = false;
|
||||||
|
@ -331,6 +349,7 @@ void Monitor::CheckSegment()
|
||||||
<< setw(10) << "-" << " | "
|
<< setw(10) << "-" << " | "
|
||||||
<< setw(10) << "-" << " | "
|
<< setw(10) << "-" << " | "
|
||||||
<< setw(8) << "-" << " | "
|
<< setw(8) << "-" << " | "
|
||||||
|
<< setw(8) << "-" << " | "
|
||||||
<< setw(10) << "-" << " |"
|
<< setw(10) << "-" << " |"
|
||||||
<< c << flush;
|
<< c << flush;
|
||||||
}
|
}
|
||||||
|
@ -356,6 +375,35 @@ void Monitor::CheckSegment()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Monitor::PrintDebug(const ShmId& shmId)
|
||||||
|
{
|
||||||
|
string managementSegmentName("fmq_" + shmId.shmId + "_mng");
|
||||||
|
try {
|
||||||
|
bipc::managed_shared_memory managementSegment(bipc::open_only, managementSegmentName.c_str());
|
||||||
|
boost::interprocess::named_mutex mtx(boost::interprocess::open_only, std::string("fmq_" + shmId.shmId + "_mtx").c_str());
|
||||||
|
boost::interprocess::scoped_lock<bipc::named_mutex> lock(mtx);
|
||||||
|
|
||||||
|
Uint64MsgDebugMap* debug = managementSegment.find<Uint64MsgDebugMap>(bipc::unique_instance).first;
|
||||||
|
|
||||||
|
cout << endl << "found " << debug->size() << " message(s):" << endl;
|
||||||
|
|
||||||
|
for (const auto& e : *debug) {
|
||||||
|
using time_point = std::chrono::system_clock::time_point;
|
||||||
|
time_point tmpt{std::chrono::duration_cast<time_point::duration>(std::chrono::nanoseconds(e.second.fCreationTime))};
|
||||||
|
std::time_t t = std::chrono::system_clock::to_time_t(tmpt);
|
||||||
|
uint64_t ms = e.second.fCreationTime % 1000000;
|
||||||
|
auto tm = localtime(&t);
|
||||||
|
cout << "offset: " << setw(12) << setfill(' ') << e.first
|
||||||
|
<< ", size: " << setw(10) << setfill(' ') << e.second.fSize
|
||||||
|
<< ", creator PID: " << e.second.fPid << setfill('0')
|
||||||
|
<< ", at: " << setw(2) << tm->tm_hour << ":" << setw(2) << tm->tm_min << ":" << setw(2) << tm->tm_sec << "." << setw(6) << ms << endl;
|
||||||
|
}
|
||||||
|
cout << setfill(' ');
|
||||||
|
} catch (bie&) {
|
||||||
|
cout << "no segment found" << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Monitor::PrintQueues()
|
void Monitor::PrintQueues()
|
||||||
{
|
{
|
||||||
cout << '\n';
|
cout << '\n';
|
||||||
|
@ -401,13 +449,14 @@ void Monitor::PrintHeader()
|
||||||
<< setw(10) << "size" << " | "
|
<< setw(10) << "size" << " | "
|
||||||
<< setw(10) << "free" << " | "
|
<< setw(10) << "free" << " | "
|
||||||
<< setw(8) << "devices" << " | "
|
<< setw(8) << "devices" << " | "
|
||||||
|
<< setw(8) << "msgs" << " | "
|
||||||
<< setw(10) << "last hb" << " |"
|
<< setw(10) << "last hb" << " |"
|
||||||
<< endl;
|
<< endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Monitor::PrintHelp()
|
void Monitor::PrintHelp()
|
||||||
{
|
{
|
||||||
cout << "controls: [x] close memory, [p] print queues, [h] help, [q] quit." << endl;
|
cout << "controls: [x] close memory, [p] print queues, [] print a list of allocated messages, [h] help, [q] quit." << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Monitor::RemoveObject(const string& name)
|
void Monitor::RemoveObject(const string& name)
|
||||||
|
|
|
@ -60,6 +60,8 @@ class Monitor
|
||||||
/// @param sessionId session id
|
/// @param sessionId session id
|
||||||
static void CleanupFull(const SessionId& sessionId);
|
static void CleanupFull(const SessionId& sessionId);
|
||||||
|
|
||||||
|
static void PrintDebug(const ShmId& shmId);
|
||||||
|
|
||||||
static void RemoveObject(const std::string&);
|
static void RemoveObject(const std::string&);
|
||||||
static void RemoveFileMapping(const std::string&);
|
static void RemoveFileMapping(const std::string&);
|
||||||
static void RemoveQueue(const std::string&);
|
static void RemoveQueue(const std::string&);
|
||||||
|
|
|
@ -78,6 +78,7 @@ int main(int argc, char** argv)
|
||||||
unsigned int timeoutInMS = 5000;
|
unsigned int timeoutInMS = 5000;
|
||||||
unsigned int intervalInMS = 100;
|
unsigned int intervalInMS = 100;
|
||||||
bool runAsDaemon = false;
|
bool runAsDaemon = false;
|
||||||
|
bool debug = false;
|
||||||
bool cleanOnExit = false;
|
bool cleanOnExit = false;
|
||||||
|
|
||||||
options_description desc("Options");
|
options_description desc("Options");
|
||||||
|
@ -90,6 +91,7 @@ int main(int argc, char** argv)
|
||||||
("view,v" , value<bool>(&viewOnly)->implicit_value(true), "Run in view only mode")
|
("view,v" , value<bool>(&viewOnly)->implicit_value(true), "Run in view only mode")
|
||||||
("timeout,t" , value<unsigned int>(&timeoutInMS)->default_value(5000), "Heartbeat timeout in milliseconds")
|
("timeout,t" , value<unsigned int>(&timeoutInMS)->default_value(5000), "Heartbeat timeout in milliseconds")
|
||||||
("daemonize,d" , value<bool>(&runAsDaemon)->implicit_value(true), "Daemonize the monitor")
|
("daemonize,d" , value<bool>(&runAsDaemon)->implicit_value(true), "Daemonize the monitor")
|
||||||
|
("debug,b" , value<bool>(&debug)->implicit_value(true), "Debug - Print a list of messages)")
|
||||||
("clean-on-exit,e", value<bool>(&cleanOnExit)->implicit_value(true), "Perform cleanup on exit")
|
("clean-on-exit,e", value<bool>(&cleanOnExit)->implicit_value(true), "Perform cleanup on exit")
|
||||||
("interval" , value<unsigned int>(&intervalInMS)->default_value(100), "Output interval for interactive/view-only mode")
|
("interval" , value<unsigned int>(&intervalInMS)->default_value(100), "Output interval for interactive/view-only mode")
|
||||||
("help,h", "Print help");
|
("help,h", "Print help");
|
||||||
|
@ -117,6 +119,11 @@ int main(int argc, char** argv)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (debug) {
|
||||||
|
Monitor::PrintDebug(ShmId{shmId});
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
cout << "Starting shared memory monitor for session: \"" << sessionName << "\" (shmId: " << shmId << ")..." << endl;
|
cout << "Starting shared memory monitor for session: \"" << sessionName << "\" (shmId: " << shmId << ")..." << endl;
|
||||||
if (viewOnly && !interactive) {
|
if (viewOnly && !interactive) {
|
||||||
cout << "running in non-interactive view-only mode, outputting with interval of " << intervalInMS << "ms. (change with --interval), press ctrl+C to exit." << endl;
|
cout << "running in non-interactive view-only mode, outputting with interval of " << intervalInMS << "ms. (change with --interval), press ctrl+C to exit." << endl;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user