mirror of
https://github.com/FairRootGroup/FairMQ.git
synced 2025-10-13 08:41:16 +00:00
shmmonitor: distinguish daemon from monitor mode (orthogonal)
This commit is contained in:
parent
30e81d58f8
commit
5228407932
|
@ -215,7 +215,7 @@ class Manager
|
|||
boost::filesystem::path p = boost::process::search_path("fairmq-shmmonitor", ownPath);
|
||||
|
||||
if (!p.empty()) {
|
||||
boost::process::spawn(p, "-x", "--shmid", id, "-d", "-t", "2000", env);
|
||||
boost::process::spawn(p, "-x", "-m", "--shmid", id, "-d", "-t", "2000", env);
|
||||
int numTries = 0;
|
||||
do {
|
||||
try {
|
||||
|
|
|
@ -71,11 +71,11 @@ void signalHandler(int signal)
|
|||
gSignalStatus = signal;
|
||||
}
|
||||
|
||||
Monitor::Monitor(const string& shmId, bool selfDestruct, bool interactive, bool viewOnly, unsigned int timeoutInMS, unsigned int intervalInMS, bool runAsDaemon, bool cleanOnExit)
|
||||
Monitor::Monitor(const string& shmId, bool selfDestruct, bool interactive, bool viewOnly, unsigned int timeoutInMS, unsigned int intervalInMS, bool monitor, bool cleanOnExit)
|
||||
: fSelfDestruct(selfDestruct)
|
||||
, fInteractive(interactive)
|
||||
, fViewOnly(viewOnly)
|
||||
, fIsDaemon(runAsDaemon)
|
||||
, fMonitor(monitor)
|
||||
, fSeenOnce(false)
|
||||
, fCleanOnExit(cleanOnExit)
|
||||
, fTimeoutInMS(timeoutInMS)
|
||||
|
@ -90,12 +90,12 @@ Monitor::Monitor(const string& shmId, bool selfDestruct, bool interactive, bool
|
|||
, fSignalThread()
|
||||
, fDeviceHeartbeats()
|
||||
{
|
||||
if (!fViewOnly) {
|
||||
if (fMonitor) {
|
||||
try {
|
||||
bipc::named_mutex monitorStatus(bipc::create_only, string("fmq_" + fShmId + "_ms").c_str());
|
||||
} catch (bie&) {
|
||||
cout << "fairmq-shmmonitor for shared memory id " << fShmId << " already started or not properly exited. Try `fairmq-shmmonitor --cleanup --shmid " << fShmId << "`" << endl;
|
||||
throw DaemonPresent(tools::ToString("fairmq-shmmonitor for shared memory id ", fShmId, " already started or not properly exited."));
|
||||
cout << "fairmq-shmmonitor (in monitoring mode) for shared memory id " << fShmId << " already started or did not not properly exited. Try `fairmq-shmmonitor --cleanup --shmid " << fShmId << "`" << endl;
|
||||
throw DaemonPresent(tools::ToString("fairmq-shmmonitor (in monitoring mode) for shared memory id ", fShmId, " already started or did not not properly exited."));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -140,7 +140,7 @@ void Monitor::Run()
|
|||
CheckSegment();
|
||||
} else {
|
||||
while (!fTerminating) {
|
||||
this_thread::sleep_for(chrono::milliseconds(fIntervalInMS));
|
||||
this_thread::sleep_for(chrono::milliseconds(500));
|
||||
CheckSegment();
|
||||
}
|
||||
}
|
||||
|
@ -288,11 +288,12 @@ void Monitor::CheckSegment()
|
|||
unsigned int duration = chrono::duration_cast<chrono::milliseconds>(now - fLastHeartbeat).count();
|
||||
|
||||
if (fHeartbeatTriggered && duration > fTimeoutInMS) {
|
||||
// memory is present, but no heartbeats since timeout duration
|
||||
cout << "no heartbeats since over " << fTimeoutInMS << " milliseconds, cleaning..." << endl;
|
||||
Cleanup(ShmId{fShmId});
|
||||
fHeartbeatTriggered = false;
|
||||
if (fSelfDestruct) {
|
||||
cout << "\nself destructing" << endl;
|
||||
cout << "self destructing (segment has been observed and cleaned up by the monitor)" << endl;
|
||||
fTerminating = true;
|
||||
}
|
||||
}
|
||||
|
@ -329,23 +330,23 @@ void Monitor::CheckSegment()
|
|||
} catch (bie&) {
|
||||
fHeartbeatTriggered = false;
|
||||
|
||||
if (fSelfDestruct) {
|
||||
if (fSeenOnce) {
|
||||
// segment has been observed at least once, can self-destruct
|
||||
cout << "self destructing (segment has been observed and cleaned up orderly)" << endl;
|
||||
fTerminating = true;
|
||||
} else {
|
||||
// if self-destruct is requested, and no segment has ever been observed, quit after double timeout duration
|
||||
auto now = chrono::high_resolution_clock::now();
|
||||
unsigned int duration = chrono::duration_cast<chrono::milliseconds>(now - fLastHeartbeat).count();
|
||||
|
||||
if (fIsDaemon && duration > fTimeoutInMS * 2) {
|
||||
if (fMonitor && duration > fTimeoutInMS * 2) {
|
||||
// clean just in case any other artifacts are left.
|
||||
Cleanup(ShmId{fShmId});
|
||||
fHeartbeatTriggered = false;
|
||||
if (fSelfDestruct) {
|
||||
cout << "\nself destructing" << endl;
|
||||
cout << "self destructing (no segments observed within (timeout * 2) since start)" << endl;
|
||||
fTerminating = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (fSelfDestruct) {
|
||||
if (fSeenOnce) {
|
||||
cout << "self destructing" << endl;
|
||||
fTerminating = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -100,7 +100,7 @@ class Monitor
|
|||
bool fSelfDestruct; // will self-destruct after the memory has been closed
|
||||
bool fInteractive; // running in interactive mode
|
||||
bool fViewOnly; // view only mode
|
||||
bool fIsDaemon;
|
||||
bool fMonitor;
|
||||
bool fSeenOnce; // true is segment has been opened successfully at least once
|
||||
bool fCleanOnExit;
|
||||
unsigned int fTimeoutInMS;
|
||||
|
|
|
@ -78,6 +78,7 @@ int main(int argc, char** argv)
|
|||
unsigned int timeoutInMS = 5000;
|
||||
unsigned int intervalInMS = 100;
|
||||
bool runAsDaemon = false;
|
||||
bool monitor = false;
|
||||
bool debug = false;
|
||||
bool cleanOnExit = false;
|
||||
bool getShmId = false;
|
||||
|
@ -93,11 +94,12 @@ int main(int argc, char** argv)
|
|||
("view,v" , value<bool>(&viewOnly)->implicit_value(true), "Run in view only mode")
|
||||
("timeout,t" , value<unsigned int>(&timeoutInMS)->default_value(5000), "Heartbeat timeout in milliseconds")
|
||||
("daemonize,d" , value<bool>(&runAsDaemon)->implicit_value(true), "Daemonize the monitor")
|
||||
("monitor,m" , value<bool>(&monitor)->implicit_value(true), "Run in monitoring mode")
|
||||
("debug,b" , value<bool>(&debug)->implicit_value(true), "Debug - Print a list of messages)")
|
||||
("clean-on-exit,e", value<bool>(&cleanOnExit)->implicit_value(true), "Perform cleanup on exit")
|
||||
("interval" , value<unsigned int>(&intervalInMS)->default_value(100), "Output interval for interactive/view-only mode")
|
||||
("get-shmid" , value<bool>(&getShmId)->implicit_value(true), "Translate given session id and user id to a shmem id (uses current user id if none provided)")
|
||||
("user-id" , value<int>(&userId)->default_value(-1), "User id")
|
||||
("user-id" , value<int>(&userId)->default_value(-1), "User id (used with --get-shmid)")
|
||||
("help,h", "Print help");
|
||||
|
||||
variables_map vm;
|
||||
|
@ -141,12 +143,17 @@ int main(int argc, char** argv)
|
|||
|
||||
cout << "Starting shared memory monitor for session: \"" << sessionName << "\" (shmId: " << shmId << ")..." << endl;
|
||||
|
||||
Monitor monitor(shmId, selfDestruct, interactive, viewOnly, timeoutInMS, intervalInMS, runAsDaemon, cleanOnExit);
|
||||
if (!viewOnly && !interactive && !monitor) {
|
||||
// if neither of the run modes are selected, use view only mode.
|
||||
viewOnly = true;
|
||||
}
|
||||
|
||||
Monitor shmmonitor(shmId, selfDestruct, interactive, viewOnly, timeoutInMS, intervalInMS, monitor, cleanOnExit);
|
||||
|
||||
if (interactive || !viewOnly) {
|
||||
monitor.CatchSignals();
|
||||
shmmonitor.CatchSignals();
|
||||
}
|
||||
monitor.Run();
|
||||
shmmonitor.Run();
|
||||
} catch (Monitor::DaemonPresent& dp) {
|
||||
return 0;
|
||||
} catch (exception& e) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user