mirror of
https://github.com/FairRootGroup/FairMQ.git
synced 2025-10-15 09:31:45 +00:00
Resolve hanging process in case of uncaught exception
This commit is contained in:
committed by
Dennis Klein
parent
e1f555bc05
commit
1aab354a5d
@@ -1,5 +1,5 @@
|
||||
/********************************************************************************
|
||||
* Copyright (C) 2017 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
|
||||
* Copyright (C) 2017-2018 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
|
||||
* *
|
||||
* This software is distributed under the terms of the *
|
||||
* GNU Lesser General Public Licence (LGPL) version 3, *
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <termios.h> // for the interactive mode
|
||||
#include <poll.h> // for the interactive mode
|
||||
#include <csignal> // catching system signals
|
||||
#include <cstdlib>
|
||||
#include <functional>
|
||||
#include <atomic>
|
||||
|
||||
@@ -18,11 +19,18 @@ using namespace std;
|
||||
|
||||
namespace
|
||||
{
|
||||
std::atomic<sig_atomic_t> gSignalStatus(0);
|
||||
std::atomic<sig_atomic_t> gLastSignal(0);
|
||||
std::atomic<int> gSignalCount(0);
|
||||
|
||||
extern "C" auto signal_handler(int signal) -> void
|
||||
{
|
||||
gSignalStatus = signal;
|
||||
++gSignalCount;
|
||||
gLastSignal = signal;
|
||||
|
||||
if (gSignalCount > 1)
|
||||
{
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,14 +45,23 @@ Control::Control(const string& name, const Plugin::Version version, const string
|
||||
: Plugin(name, version, maintainer, homepage, pluginServices)
|
||||
, fControllerThread()
|
||||
, fSignalHandlerThread()
|
||||
, fShutdownThread()
|
||||
, fEvents()
|
||||
, fEventsMutex()
|
||||
, fShutdownMutex()
|
||||
, fControllerMutex()
|
||||
, fNewEvent()
|
||||
, fDeviceTerminationRequested(false)
|
||||
, fHasShutdown(false)
|
||||
, fDeviceShutdownRequested(false)
|
||||
, fDeviceHasShutdown(false)
|
||||
, fPluginShutdownRequested(false)
|
||||
{
|
||||
SubscribeToDeviceStateChange([&](DeviceState newState)
|
||||
{
|
||||
{
|
||||
lock_guard<mutex> lock{fEventsMutex};
|
||||
fEvents.push(newState);
|
||||
}
|
||||
fNewEvent.notify_one();
|
||||
});
|
||||
|
||||
try
|
||||
{
|
||||
TakeDeviceControl();
|
||||
@@ -97,123 +114,121 @@ auto ControlPluginProgramOptions() -> Plugin::ProgOptions
|
||||
}
|
||||
|
||||
auto Control::InteractiveMode() -> void
|
||||
try
|
||||
{
|
||||
try
|
||||
RunStartupSequence();
|
||||
|
||||
char input; // hold the user console input
|
||||
pollfd cinfd[1];
|
||||
cinfd[0].fd = fileno(stdin);
|
||||
cinfd[0].events = POLLIN;
|
||||
|
||||
struct termios t;
|
||||
tcgetattr(STDIN_FILENO, &t); // get the current terminal I/O structure
|
||||
t.c_lflag &= ~ICANON; // disable canonical input
|
||||
t.c_lflag &= ~ECHO; // do not echo input chars
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &t); // apply the new settings
|
||||
|
||||
PrintInteractiveHelp();
|
||||
|
||||
bool keepRunning = true;
|
||||
|
||||
while (keepRunning)
|
||||
{
|
||||
RunStartupSequence();
|
||||
|
||||
char input; // hold the user console input
|
||||
pollfd cinfd[1];
|
||||
cinfd[0].fd = fileno(stdin);
|
||||
cinfd[0].events = POLLIN;
|
||||
|
||||
struct termios t;
|
||||
tcgetattr(STDIN_FILENO, &t); // get the current terminal I/O structure
|
||||
t.c_lflag &= ~ICANON; // disable canonical input
|
||||
t.c_lflag &= ~ECHO; // do not echo input chars
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &t); // apply the new settings
|
||||
|
||||
PrintInteractiveHelp();
|
||||
|
||||
bool keepRunning = true;
|
||||
|
||||
while (keepRunning)
|
||||
if (poll(cinfd, 1, 500))
|
||||
{
|
||||
if (poll(cinfd, 1, 500))
|
||||
if (fDeviceShutdownRequested)
|
||||
{
|
||||
if (fDeviceTerminationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
cin >> input;
|
||||
|
||||
switch (input)
|
||||
{
|
||||
case 'i':
|
||||
LOG(info) << "\n\n --> [i] init device\n";
|
||||
ChangeDeviceState(DeviceStateTransition::InitDevice);
|
||||
break;
|
||||
case 'j':
|
||||
LOG(info) << "\n\n --> [j] init task\n";
|
||||
ChangeDeviceState(DeviceStateTransition::InitTask);
|
||||
break;
|
||||
case 'p':
|
||||
LOG(info) << "\n\n --> [p] pause\n";
|
||||
ChangeDeviceState(DeviceStateTransition::Pause);
|
||||
break;
|
||||
case 'r':
|
||||
LOG(info) << "\n\n --> [r] run\n";
|
||||
ChangeDeviceState(DeviceStateTransition::Run);
|
||||
break;
|
||||
case 's':
|
||||
LOG(info) << "\n\n --> [s] stop\n";
|
||||
ChangeDeviceState(DeviceStateTransition::Stop);
|
||||
break;
|
||||
case 't':
|
||||
LOG(info) << "\n\n --> [t] reset task\n";
|
||||
ChangeDeviceState(DeviceStateTransition::ResetTask);
|
||||
break;
|
||||
case 'd':
|
||||
LOG(info) << "\n\n --> [d] reset device\n";
|
||||
ChangeDeviceState(DeviceStateTransition::ResetDevice);
|
||||
break;
|
||||
case 'k':
|
||||
LOG(info) << "\n\n --> [k] increase log severity\n";
|
||||
CycleLogConsoleSeverityUp();
|
||||
break;
|
||||
case 'l':
|
||||
LOG(info) << "\n\n --> [l] decrease log severity\n";
|
||||
CycleLogConsoleSeverityDown();
|
||||
break;
|
||||
case 'n':
|
||||
LOG(info) << "\n\n --> [n] increase log verbosity\n";
|
||||
CycleLogVerbosityUp();
|
||||
break;
|
||||
case 'm':
|
||||
LOG(info) << "\n\n --> [m] decrease log verbosity\n";
|
||||
CycleLogVerbosityDown();
|
||||
break;
|
||||
case 'h':
|
||||
LOG(info) << "\n\n --> [h] help\n";
|
||||
PrintInteractiveHelp();
|
||||
break;
|
||||
// case 'x':
|
||||
// LOG(info) << "\n\n --> [x] ERROR\n";
|
||||
// ChangeDeviceState(DeviceStateTransition::ERROR_FOUND);
|
||||
// break;
|
||||
case 'q':
|
||||
LOG(info) << "\n\n --> [q] end\n";
|
||||
keepRunning = false;
|
||||
break;
|
||||
default:
|
||||
LOG(info) << "Invalid input: [" << input << "]";
|
||||
PrintInteractiveHelp();
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (fDeviceTerminationRequested)
|
||||
cin >> input;
|
||||
|
||||
switch (input)
|
||||
{
|
||||
keepRunning = false;
|
||||
case 'i':
|
||||
LOG(info) << "\n\n --> [i] init device\n";
|
||||
ChangeDeviceState(DeviceStateTransition::InitDevice);
|
||||
break;
|
||||
case 'j':
|
||||
LOG(info) << "\n\n --> [j] init task\n";
|
||||
ChangeDeviceState(DeviceStateTransition::InitTask);
|
||||
break;
|
||||
case 'p':
|
||||
LOG(info) << "\n\n --> [p] pause\n";
|
||||
ChangeDeviceState(DeviceStateTransition::Pause);
|
||||
break;
|
||||
case 'r':
|
||||
LOG(info) << "\n\n --> [r] run\n";
|
||||
ChangeDeviceState(DeviceStateTransition::Run);
|
||||
break;
|
||||
case 's':
|
||||
LOG(info) << "\n\n --> [s] stop\n";
|
||||
ChangeDeviceState(DeviceStateTransition::Stop);
|
||||
break;
|
||||
case 't':
|
||||
LOG(info) << "\n\n --> [t] reset task\n";
|
||||
ChangeDeviceState(DeviceStateTransition::ResetTask);
|
||||
break;
|
||||
case 'd':
|
||||
LOG(info) << "\n\n --> [d] reset device\n";
|
||||
ChangeDeviceState(DeviceStateTransition::ResetDevice);
|
||||
break;
|
||||
case 'k':
|
||||
LOG(info) << "\n\n --> [k] increase log severity\n";
|
||||
CycleLogConsoleSeverityUp();
|
||||
break;
|
||||
case 'l':
|
||||
LOG(info) << "\n\n --> [l] decrease log severity\n";
|
||||
CycleLogConsoleSeverityDown();
|
||||
break;
|
||||
case 'n':
|
||||
LOG(info) << "\n\n --> [n] increase log verbosity\n";
|
||||
CycleLogVerbosityUp();
|
||||
break;
|
||||
case 'm':
|
||||
LOG(info) << "\n\n --> [m] decrease log verbosity\n";
|
||||
CycleLogVerbosityDown();
|
||||
break;
|
||||
case 'h':
|
||||
LOG(info) << "\n\n --> [h] help\n";
|
||||
PrintInteractiveHelp();
|
||||
break;
|
||||
// case 'x':
|
||||
// LOG(info) << "\n\n --> [x] ERROR\n";
|
||||
// ChangeDeviceState(DeviceStateTransition::ERROR_FOUND);
|
||||
// break;
|
||||
case 'q':
|
||||
LOG(info) << "\n\n --> [q] end\n";
|
||||
keepRunning = false;
|
||||
break;
|
||||
default:
|
||||
LOG(info) << "Invalid input: [" << input << "]";
|
||||
PrintInteractiveHelp();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
tcgetattr(STDIN_FILENO, &t); // get the current terminal I/O structure
|
||||
t.c_lflag |= ICANON; // re-enable canonical input
|
||||
t.c_lflag |= ECHO; // echo input chars
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &t); // apply the new settings
|
||||
|
||||
if (!fDeviceTerminationRequested)
|
||||
if (fDeviceShutdownRequested)
|
||||
{
|
||||
RunShutdownSequence();
|
||||
break;
|
||||
}
|
||||
}
|
||||
catch (PluginServices::DeviceControlError& e)
|
||||
{
|
||||
// If we are here, it means another plugin has taken control. That's fine, just print the exception message and do nothing else.
|
||||
LOG(debug) << e.what();
|
||||
}
|
||||
|
||||
tcgetattr(STDIN_FILENO, &t); // get the current terminal I/O structure
|
||||
t.c_lflag |= ICANON; // re-enable canonical input
|
||||
t.c_lflag |= ECHO; // echo input chars
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &t); // apply the new settings
|
||||
|
||||
RunShutdownSequence();
|
||||
}
|
||||
catch (PluginServices::DeviceControlError& e)
|
||||
{
|
||||
// If we are here, it means another plugin has taken control. That's fine, just print the exception message and do nothing else.
|
||||
LOG(debug) << e.what();
|
||||
}
|
||||
catch (DeviceErrorState&)
|
||||
{
|
||||
}
|
||||
|
||||
auto Control::PrintInteractiveHelp() -> void
|
||||
@@ -234,137 +249,119 @@ auto Control::WaitForNextState() -> DeviceState
|
||||
}
|
||||
|
||||
auto result = fEvents.front();
|
||||
|
||||
if (result == DeviceState::Error)
|
||||
{
|
||||
throw DeviceErrorState("Controlled device transitioned to error state.");
|
||||
}
|
||||
|
||||
fEvents.pop();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
auto Control::StaticMode() -> void
|
||||
try
|
||||
{
|
||||
try
|
||||
RunStartupSequence();
|
||||
|
||||
{
|
||||
RunStartupSequence();
|
||||
|
||||
// Wait for next state, which is DeviceState::Ready,
|
||||
// or for device shutdown request (Ctrl-C)
|
||||
unique_lock<mutex> lock{fEventsMutex};
|
||||
while (fEvents.empty() && !fDeviceShutdownRequested)
|
||||
{
|
||||
// Wait for next state, which is DeviceState::Ready,
|
||||
// or for device termination request
|
||||
unique_lock<mutex> lock{fEventsMutex};
|
||||
while (fEvents.empty() && !fDeviceTerminationRequested)
|
||||
{
|
||||
fNewEvent.wait(lock);
|
||||
}
|
||||
}
|
||||
|
||||
if (!fDeviceTerminationRequested)
|
||||
{
|
||||
RunShutdownSequence();
|
||||
fNewEvent.wait_for(lock, chrono::milliseconds(50));
|
||||
}
|
||||
}
|
||||
catch (PluginServices::DeviceControlError& e)
|
||||
{
|
||||
// If we are here, it means another plugin has taken control. That's fine, just print the exception message and do nothing else.
|
||||
LOG(debug) << e.what();
|
||||
}
|
||||
|
||||
RunShutdownSequence();
|
||||
}
|
||||
catch (PluginServices::DeviceControlError& e)
|
||||
{
|
||||
// If we are here, it means another plugin has taken control. That's fine, just print the exception message and do nothing else.
|
||||
LOG(debug) << e.what();
|
||||
}
|
||||
catch (DeviceErrorState&)
|
||||
{
|
||||
}
|
||||
|
||||
auto Control::SignalHandler() -> void
|
||||
{
|
||||
while (true)
|
||||
while (gSignalCount == 0 && !fPluginShutdownRequested)
|
||||
{
|
||||
if (gSignalStatus != 0 && !fHasShutdown)
|
||||
{
|
||||
LOG(info) << "Received device shutdown request (signal " << gSignalStatus << ").";
|
||||
LOG(info) << "Waiting for graceful device shutdown. Hit Ctrl-C again to abort immediately.";
|
||||
|
||||
if (!fDeviceTerminationRequested)
|
||||
{
|
||||
fDeviceTerminationRequested = true;
|
||||
gSignalStatus = 0;
|
||||
fShutdownThread = thread(&Control::HandleShutdownSignal, this);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG(warn) << "Received 2nd device shutdown request (signal " << gSignalStatus << ").";
|
||||
LOG(warn) << "Aborting immediately!";
|
||||
abort();
|
||||
}
|
||||
}
|
||||
else if (fHasShutdown)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
this_thread::sleep_for(chrono::milliseconds(100));
|
||||
}
|
||||
}
|
||||
|
||||
auto Control::HandleShutdownSignal() -> void
|
||||
{
|
||||
StealDeviceControl();
|
||||
|
||||
UnsubscribeFromDeviceStateChange(); // In case, static or interactive mode have subscribed already
|
||||
SubscribeToDeviceStateChange([&](DeviceState newState)
|
||||
if (!fPluginShutdownRequested)
|
||||
{
|
||||
{
|
||||
lock_guard<mutex> lock{fEventsMutex};
|
||||
fEvents.push(newState);
|
||||
}
|
||||
fNewEvent.notify_one();
|
||||
});
|
||||
LOG(info) << "Received device shutdown request (signal " << gLastSignal << ").";
|
||||
LOG(info) << "Waiting for graceful device shutdown. Hit Ctrl-C again to abort immediately.";
|
||||
|
||||
RunShutdownSequence();
|
||||
// Signal and wait for controller thread, if we are controller
|
||||
fDeviceShutdownRequested = true;
|
||||
{
|
||||
unique_lock<mutex> lock(fControllerMutex);
|
||||
if (fControllerThread.joinable()) fControllerThread.join();
|
||||
}
|
||||
|
||||
if (!fDeviceHasShutdown)
|
||||
{
|
||||
// Take over control and attempt graceful shutdown
|
||||
StealDeviceControl();
|
||||
try
|
||||
{
|
||||
RunShutdownSequence();
|
||||
}
|
||||
catch (PluginServices::DeviceControlError& e)
|
||||
{
|
||||
LOG(info) << "Graceful device shutdown failed: " << e.what() << " If hanging, hit Ctrl-C again to abort immediately.";
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
LOG(info) << "Graceful device shutdown failed. If hanging, hit Ctrl-C again to abort immediately.";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto Control::RunShutdownSequence() -> void
|
||||
{
|
||||
lock_guard<mutex> lock(fShutdownMutex);
|
||||
if (!fHasShutdown)
|
||||
auto nextState = GetCurrentDeviceState();
|
||||
EmptyEventQueue();
|
||||
while (nextState != DeviceState::Exiting)
|
||||
{
|
||||
auto nextState = GetCurrentDeviceState();
|
||||
EmptyEventQueue();
|
||||
while (nextState != DeviceState::Exiting)
|
||||
switch (nextState)
|
||||
{
|
||||
switch (nextState)
|
||||
{
|
||||
case DeviceState::Idle:
|
||||
ChangeDeviceState(DeviceStateTransition::End);
|
||||
break;
|
||||
case DeviceState::DeviceReady:
|
||||
ChangeDeviceState(DeviceStateTransition::ResetDevice);
|
||||
break;
|
||||
case DeviceState::Ready:
|
||||
ChangeDeviceState(DeviceStateTransition::ResetTask);
|
||||
break;
|
||||
case DeviceState::Running:
|
||||
ChangeDeviceState(DeviceStateTransition::Stop);
|
||||
break;
|
||||
case DeviceState::Paused:
|
||||
ChangeDeviceState(DeviceStateTransition::Resume);
|
||||
break;
|
||||
default:
|
||||
// ignore other states
|
||||
break;
|
||||
}
|
||||
|
||||
nextState = WaitForNextState();
|
||||
case DeviceState::Idle:
|
||||
ChangeDeviceState(DeviceStateTransition::End);
|
||||
break;
|
||||
case DeviceState::DeviceReady:
|
||||
ChangeDeviceState(DeviceStateTransition::ResetDevice);
|
||||
break;
|
||||
case DeviceState::Ready:
|
||||
ChangeDeviceState(DeviceStateTransition::ResetTask);
|
||||
break;
|
||||
case DeviceState::Running:
|
||||
ChangeDeviceState(DeviceStateTransition::Stop);
|
||||
break;
|
||||
case DeviceState::Paused:
|
||||
ChangeDeviceState(DeviceStateTransition::Resume);
|
||||
break;
|
||||
default:
|
||||
// ignore other states
|
||||
break;
|
||||
}
|
||||
|
||||
fHasShutdown = true;
|
||||
UnsubscribeFromDeviceStateChange();
|
||||
ReleaseDeviceControl();
|
||||
nextState = WaitForNextState();
|
||||
}
|
||||
|
||||
fDeviceHasShutdown = true;
|
||||
ReleaseDeviceControl();
|
||||
}
|
||||
|
||||
auto Control::RunStartupSequence() -> void
|
||||
{
|
||||
SubscribeToDeviceStateChange([&](DeviceState newState)
|
||||
{
|
||||
{
|
||||
lock_guard<mutex> lock{fEventsMutex};
|
||||
fEvents.push(newState);
|
||||
}
|
||||
fNewEvent.notify_one();
|
||||
});
|
||||
|
||||
ChangeDeviceState(DeviceStateTransition::InitDevice);
|
||||
while (WaitForNextState() != DeviceState::DeviceReady) {}
|
||||
ChangeDeviceState(DeviceStateTransition::InitTask);
|
||||
@@ -381,9 +378,16 @@ auto Control::EmptyEventQueue() -> void
|
||||
|
||||
Control::~Control()
|
||||
{
|
||||
if (fControllerThread.joinable()) fControllerThread.join();
|
||||
// Notify threads to exit
|
||||
fPluginShutdownRequested = true;
|
||||
|
||||
{
|
||||
unique_lock<mutex> lock(fControllerMutex);
|
||||
if (fControllerThread.joinable()) fControllerThread.join();
|
||||
}
|
||||
if (fSignalHandlerThread.joinable()) fSignalHandlerThread.join();
|
||||
if (fShutdownThread.joinable()) fShutdownThread.join();
|
||||
|
||||
UnsubscribeFromDeviceStateChange();
|
||||
}
|
||||
|
||||
} /* namespace plugins */
|
||||
|
Reference in New Issue
Block a user