Resolve hanging process in case of uncaught exception

This commit is contained in:
Dennis Klein 2018-10-08 21:15:56 +02:00 committed by Dennis Klein
parent e1f555bc05
commit 1aab354a5d
5 changed files with 275 additions and 260 deletions

View File

@ -222,7 +222,7 @@ void FairMQDevice::InitWrapper()
AttachChannels(uninitializedConnectingChannels);
}
Init();
CallAndHandleError(std::bind(&FairMQDevice::Init, this));
ChangeState(internal_DEVICE_READY);
}
@ -428,7 +428,7 @@ void FairMQDevice::InitTaskWrapper()
{
CallStateChangeCallbacks(INITIALIZING_TASK);
InitTask();
CallAndHandleError(std::bind(&FairMQDevice::InitTask, this));
ChangeState(internal_READY);
}
@ -503,6 +503,8 @@ void FairMQDevice::RunWrapper()
t.second->Resume();
}
CallAndHandleError([this]
{
try
{
PreRun();
@ -540,6 +542,7 @@ void FairMQDevice::RunWrapper()
LOG(error) << "out of range: " << oor.what();
LOG(error) << "incorrect/incomplete channel configuration?";
}
});
// if Run() exited and the state is still RUNNING, transition to READY.
if (CheckCurrentState(RUNNING))
@ -547,7 +550,7 @@ void FairMQDevice::RunWrapper()
ChangeState(internal_READY);
}
PostRun();
CallAndHandleError(std::bind(&FairMQDevice::PostRun, this));
rateLogger.join();
}
@ -770,7 +773,7 @@ void FairMQDevice::PauseWrapper()
{
CallStateChangeCallbacks(PAUSED);
Pause();
CallAndHandleError(std::bind(&FairMQDevice::Pause, this));
}
void FairMQDevice::Pause()
@ -936,7 +939,7 @@ void FairMQDevice::ResetTaskWrapper()
{
CallStateChangeCallbacks(RESETTING_TASK);
ResetTask();
CallAndHandleError(std::bind(&FairMQDevice::ResetTask, this));
ChangeState(internal_DEVICE_READY);
}
@ -949,7 +952,7 @@ void FairMQDevice::ResetWrapper()
{
CallStateChangeCallbacks(RESETTING_DEVICE);
Reset();
CallAndHandleError(std::bind(&FairMQDevice::Reset, this));
ChangeState(internal_IDLE);
}
@ -973,6 +976,17 @@ const FairMQChannel& FairMQDevice::GetChannel(const string& channelName, const i
return fChannels.at(channelName).at(index);
}
void FairMQDevice::CallAndHandleError(std::function<void()> callable)
try
{
callable();
}
catch(...)
{
ChangeState(ERROR_FOUND);
throw;
}
void FairMQDevice::Exit()
{
}

View File

@ -462,39 +462,30 @@ class FairMQDevice : public FairMQStateMachine
std::string fId; ///< Device ID
/// Additional user initialization (can be overloaded in child classes). Prefer to use InitTask().
/// Executed in a worker thread
virtual void Init();
/// Task initialization (can be overloaded in child classes)
/// Executed in a worker thread
virtual void InitTask();
/// Runs the device (to be overloaded in child classes)
/// Executed in a worker thread
virtual void Run();
/// Called in the RUNNING state once before executing the Run()/ConditionalRun() method
/// Executed in a worker thread
virtual void PreRun();
/// Called during RUNNING state repeatedly until it returns false or device state changes
/// Executed in a worker thread
virtual bool ConditionalRun();
/// Called in the RUNNING state once after executing the Run()/ConditionalRun() method
/// Executed in a worker thread
virtual void PostRun();
/// Handles the PAUSE state
/// Executed in a worker thread
virtual void Pause();
/// Resets the user task (to be overloaded in child classes)
/// Executed in a worker thread
virtual void ResetTask();
/// Resets the device (can be overloaded in child classes)
/// Executed in a worker thread
virtual void Reset();
private:
@ -521,6 +512,9 @@ class FairMQDevice : public FairMQStateMachine
/// Handles the Reset() method
void ResetWrapper();
/// Used to call user code and handle uncaught exceptions
void CallAndHandleError(std::function<void()> callable);
/// Unblocks blocking channel send/receive calls
void Unblock();

View File

@ -21,6 +21,7 @@
#include <unordered_map>
#include <mutex>
#include <condition_variable>
#include <stdexcept>
namespace fair
{

View File

@ -1,5 +1,5 @@
/********************************************************************************
* Copyright (C) 2017 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
* Copyright (C) 2017-2018 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
* *
* This software is distributed under the terms of the *
* GNU Lesser General Public Licence (LGPL) version 3, *
@ -11,6 +11,7 @@
#include <termios.h> // for the interactive mode
#include <poll.h> // for the interactive mode
#include <csignal> // catching system signals
#include <cstdlib>
#include <functional>
#include <atomic>
@ -18,11 +19,18 @@ using namespace std;
namespace
{
std::atomic<sig_atomic_t> gSignalStatus(0);
std::atomic<sig_atomic_t> gLastSignal(0);
std::atomic<int> gSignalCount(0);
extern "C" auto signal_handler(int signal) -> void
{
gSignalStatus = signal;
++gSignalCount;
gLastSignal = signal;
if (gSignalCount > 1)
{
std::abort();
}
}
}
@ -37,14 +45,23 @@ Control::Control(const string& name, const Plugin::Version version, const string
: Plugin(name, version, maintainer, homepage, pluginServices)
, fControllerThread()
, fSignalHandlerThread()
, fShutdownThread()
, fEvents()
, fEventsMutex()
, fShutdownMutex()
, fControllerMutex()
, fNewEvent()
, fDeviceTerminationRequested(false)
, fHasShutdown(false)
, fDeviceShutdownRequested(false)
, fDeviceHasShutdown(false)
, fPluginShutdownRequested(false)
{
SubscribeToDeviceStateChange([&](DeviceState newState)
{
{
lock_guard<mutex> lock{fEventsMutex};
fEvents.push(newState);
}
fNewEvent.notify_one();
});
try
{
TakeDeviceControl();
@ -97,9 +114,8 @@ auto ControlPluginProgramOptions() -> Plugin::ProgOptions
}
auto Control::InteractiveMode() -> void
try
{
try
{
RunStartupSequence();
char input; // hold the user console input
@ -121,7 +137,7 @@ auto Control::InteractiveMode() -> void
{
if (poll(cinfd, 1, 500))
{
if (fDeviceTerminationRequested)
if (fDeviceShutdownRequested)
{
break;
}
@ -193,9 +209,9 @@ auto Control::InteractiveMode() -> void
}
}
if (fDeviceTerminationRequested)
if (fDeviceShutdownRequested)
{
keepRunning = false;
break;
}
}
@ -204,16 +220,15 @@ auto Control::InteractiveMode() -> void
t.c_lflag |= ECHO; // echo input chars
tcsetattr(STDIN_FILENO, TCSANOW, &t); // apply the new settings
if (!fDeviceTerminationRequested)
{
RunShutdownSequence();
}
}
catch (PluginServices::DeviceControlError& e)
{
}
catch (PluginServices::DeviceControlError& e)
{
// If we are here, it means another plugin has taken control. That's fine, just print the exception message and do nothing else.
LOG(debug) << e.what();
}
}
catch (DeviceErrorState&)
{
}
auto Control::PrintInteractiveHelp() -> void
@ -234,91 +249,84 @@ auto Control::WaitForNextState() -> DeviceState
}
auto result = fEvents.front();
if (result == DeviceState::Error)
{
throw DeviceErrorState("Controlled device transitioned to error state.");
}
fEvents.pop();
return result;
}
auto Control::StaticMode() -> void
try
{
try
{
RunStartupSequence();
{
// Wait for next state, which is DeviceState::Ready,
// or for device termination request
// or for device shutdown request (Ctrl-C)
unique_lock<mutex> lock{fEventsMutex};
while (fEvents.empty() && !fDeviceTerminationRequested)
while (fEvents.empty() && !fDeviceShutdownRequested)
{
fNewEvent.wait(lock);
fNewEvent.wait_for(lock, chrono::milliseconds(50));
}
}
if (!fDeviceTerminationRequested)
{
RunShutdownSequence();
}
}
catch (PluginServices::DeviceControlError& e)
{
}
catch (PluginServices::DeviceControlError& e)
{
// If we are here, it means another plugin has taken control. That's fine, just print the exception message and do nothing else.
LOG(debug) << e.what();
}
}
catch (DeviceErrorState&)
{
}
auto Control::SignalHandler() -> void
{
while (true)
while (gSignalCount == 0 && !fPluginShutdownRequested)
{
if (gSignalStatus != 0 && !fHasShutdown)
{
LOG(info) << "Received device shutdown request (signal " << gSignalStatus << ").";
LOG(info) << "Waiting for graceful device shutdown. Hit Ctrl-C again to abort immediately.";
if (!fDeviceTerminationRequested)
{
fDeviceTerminationRequested = true;
gSignalStatus = 0;
fShutdownThread = thread(&Control::HandleShutdownSignal, this);
}
else
{
LOG(warn) << "Received 2nd device shutdown request (signal " << gSignalStatus << ").";
LOG(warn) << "Aborting immediately!";
abort();
}
}
else if (fHasShutdown)
{
break;
}
this_thread::sleep_for(chrono::milliseconds(100));
}
}
auto Control::HandleShutdownSignal() -> void
{
StealDeviceControl();
if (!fPluginShutdownRequested)
{
LOG(info) << "Received device shutdown request (signal " << gLastSignal << ").";
LOG(info) << "Waiting for graceful device shutdown. Hit Ctrl-C again to abort immediately.";
UnsubscribeFromDeviceStateChange(); // In case, static or interactive mode have subscribed already
SubscribeToDeviceStateChange([&](DeviceState newState)
// Signal and wait for controller thread, if we are controller
fDeviceShutdownRequested = true;
{
{
lock_guard<mutex> lock{fEventsMutex};
fEvents.push(newState);
unique_lock<mutex> lock(fControllerMutex);
if (fControllerThread.joinable()) fControllerThread.join();
}
fNewEvent.notify_one();
});
if (!fDeviceHasShutdown)
{
// Take over control and attempt graceful shutdown
StealDeviceControl();
try
{
RunShutdownSequence();
}
catch (PluginServices::DeviceControlError& e)
{
LOG(info) << "Graceful device shutdown failed: " << e.what() << " If hanging, hit Ctrl-C again to abort immediately.";
}
catch (...)
{
LOG(info) << "Graceful device shutdown failed. If hanging, hit Ctrl-C again to abort immediately.";
}
}
}
}
auto Control::RunShutdownSequence() -> void
{
lock_guard<mutex> lock(fShutdownMutex);
if (!fHasShutdown)
{
auto nextState = GetCurrentDeviceState();
EmptyEventQueue();
while (nextState != DeviceState::Exiting)
@ -348,23 +356,12 @@ auto Control::RunShutdownSequence() -> void
nextState = WaitForNextState();
}
fHasShutdown = true;
UnsubscribeFromDeviceStateChange();
fDeviceHasShutdown = true;
ReleaseDeviceControl();
}
}
auto Control::RunStartupSequence() -> void
{
SubscribeToDeviceStateChange([&](DeviceState newState)
{
{
lock_guard<mutex> lock{fEventsMutex};
fEvents.push(newState);
}
fNewEvent.notify_one();
});
ChangeDeviceState(DeviceStateTransition::InitDevice);
while (WaitForNextState() != DeviceState::DeviceReady) {}
ChangeDeviceState(DeviceStateTransition::InitTask);
@ -381,9 +378,16 @@ auto Control::EmptyEventQueue() -> void
Control::~Control()
{
// Notify threads to exit
fPluginShutdownRequested = true;
{
unique_lock<mutex> lock(fControllerMutex);
if (fControllerThread.joinable()) fControllerThread.join();
}
if (fSignalHandlerThread.joinable()) fSignalHandlerThread.join();
if (fShutdownThread.joinable()) fShutdownThread.join();
UnsubscribeFromDeviceStateChange();
}
} /* namespace plugins */

View File

@ -18,6 +18,7 @@
#include <queue>
#include <thread>
#include <atomic>
#include <stdexcept>
namespace fair
{
@ -35,24 +36,25 @@ class Control : public Plugin
private:
auto InteractiveMode() -> void;
auto PrintInteractiveHelp() -> void;
static auto PrintInteractiveHelp() -> void;
auto StaticMode() -> void;
auto WaitForNextState() -> DeviceState;
auto SignalHandler() -> void;
auto HandleShutdownSignal() -> void;
auto RunShutdownSequence() -> void;
auto RunStartupSequence() -> void;
auto EmptyEventQueue() -> void;
std::thread fControllerThread;
std::thread fSignalHandlerThread;
std::thread fShutdownThread;
std::queue<DeviceState> fEvents;
std::mutex fEventsMutex;
std::mutex fShutdownMutex;
std::mutex fControllerMutex;
std::condition_variable fNewEvent;
std::atomic<bool> fDeviceTerminationRequested;
std::atomic<bool> fHasShutdown;
std::atomic<bool> fDeviceShutdownRequested;
std::atomic<bool> fDeviceHasShutdown;
std::atomic<bool> fPluginShutdownRequested;
struct DeviceErrorState : std::runtime_error { using std::runtime_error::runtime_error; };
}; /* class Control */
auto ControlPluginProgramOptions() -> Plugin::ProgOptions;