mirror of
https://github.com/FairRootGroup/FairMQ.git
synced 2025-10-13 16:46:47 +00:00
SDK: send heartbeats when subscribed to state changes
This commit is contained in:
parent
330687772f
commit
5721ea9510
|
@ -109,16 +109,27 @@ DDS::DDS(const string& name,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
lock_guard<mutex> lock{fStateChangeSubscriberMutex};
|
using namespace sdk::cmd;
|
||||||
|
auto now = chrono::steady_clock::now();
|
||||||
string id = GetProperty<string>("id");
|
string id = GetProperty<string>("id");
|
||||||
fLastState = fCurrentState;
|
fLastState = fCurrentState;
|
||||||
fCurrentState = newState;
|
fCurrentState = newState;
|
||||||
using namespace sdk::cmd;
|
|
||||||
for (auto subscriberId : fStateChangeSubscribers) {
|
|
||||||
LOG(debug) << "Publishing state-change: " << fLastState << "->" << newState << " to " << subscriberId;
|
|
||||||
|
|
||||||
Cmds cmds(make<StateChange>(id, fDDSTaskId, fLastState, fCurrentState));
|
lock_guard<mutex> lock{fStateChangeSubscriberMutex};
|
||||||
fDDS.Send(cmds.Serialize(), to_string(subscriberId));
|
for (auto it = fStateChangeSubscribers.cbegin(); it != fStateChangeSubscribers.end();) {
|
||||||
|
// if a subscriber did not send a heartbeat in more than 3 times the promised interval,
|
||||||
|
// remove it from the subscriber list
|
||||||
|
if (chrono::duration<double>(now - it->second.first).count() > 3 * it->second.second) {
|
||||||
|
LOG(warn) << "Controller '" << it->first
|
||||||
|
<< "' did not send heartbeats since over 3 intervals ("
|
||||||
|
<< 3 * it->second.second << " ms), removing it.";
|
||||||
|
fStateChangeSubscribers.erase(it++);
|
||||||
|
} else {
|
||||||
|
LOG(debug) << "Publishing state-change: " << fLastState << "->" << fCurrentState << " to " << it->first;
|
||||||
|
Cmds cmds(make<StateChange>(id, fDDSTaskId, fLastState, fCurrentState));
|
||||||
|
fDDS.Send(cmds.Serialize(), to_string(it->first));
|
||||||
|
++it;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -150,7 +161,7 @@ auto DDS::WaitForExitingAck() -> void
|
||||||
unique_lock<mutex> lock(fStateChangeSubscriberMutex);
|
unique_lock<mutex> lock(fStateChangeSubscriberMutex);
|
||||||
auto timeout = GetProperty<unsigned int>("wait-for-exiting-ack-timeout");
|
auto timeout = GetProperty<unsigned int>("wait-for-exiting-ack-timeout");
|
||||||
fExitingAcked.wait_for(lock, chrono::milliseconds(timeout), [this]() {
|
fExitingAcked.wait_for(lock, chrono::milliseconds(timeout), [this]() {
|
||||||
return fExitingAckedByLastExternalController;
|
return fExitingAckedByLastExternalController || fStateChangeSubscribers.empty();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -362,8 +373,9 @@ auto DDS::HandleCmd(const string& id, sdk::cmd::Cmd& cmd, const string& cond, ui
|
||||||
fExitingAcked.notify_one();
|
fExitingAcked.notify_one();
|
||||||
} break;
|
} break;
|
||||||
case Type::subscribe_to_state_change: {
|
case Type::subscribe_to_state_change: {
|
||||||
|
auto _cmd = static_cast<cmd::SubscribeToStateChange&>(cmd);
|
||||||
lock_guard<mutex> lock{fStateChangeSubscriberMutex};
|
lock_guard<mutex> lock{fStateChangeSubscriberMutex};
|
||||||
fStateChangeSubscribers.insert(senderId);
|
fStateChangeSubscribers.emplace(senderId, make_pair(chrono::steady_clock::now(), _cmd.GetInterval()));
|
||||||
|
|
||||||
LOG(debug) << "Publishing state-change: " << fLastState << "->" << fCurrentState << " to " << senderId;
|
LOG(debug) << "Publishing state-change: " << fLastState << "->" << fCurrentState << " to " << senderId;
|
||||||
|
|
||||||
|
@ -372,6 +384,15 @@ auto DDS::HandleCmd(const string& id, sdk::cmd::Cmd& cmd, const string& cond, ui
|
||||||
|
|
||||||
fDDS.Send(outCmds.Serialize(), to_string(senderId));
|
fDDS.Send(outCmds.Serialize(), to_string(senderId));
|
||||||
} break;
|
} break;
|
||||||
|
case Type::subscription_heartbeat: {
|
||||||
|
try {
|
||||||
|
auto _cmd = static_cast<cmd::SubscriptionHeartbeat&>(cmd);
|
||||||
|
lock_guard<mutex> lock{fStateChangeSubscriberMutex};
|
||||||
|
fStateChangeSubscribers.at(senderId) = make_pair(chrono::steady_clock::now(), _cmd.GetInterval());
|
||||||
|
} catch(out_of_range& oor) {
|
||||||
|
LOG(warn) << "Received subscription heartbeat from an unknown controller with id '" << senderId << "'";
|
||||||
|
}
|
||||||
|
} break;
|
||||||
case Type::unsubscribe_from_state_change: {
|
case Type::unsubscribe_from_state_change: {
|
||||||
{
|
{
|
||||||
lock_guard<mutex> lock{fStateChangeSubscriberMutex};
|
lock_guard<mutex> lock{fStateChangeSubscriberMutex};
|
||||||
|
@ -384,12 +405,12 @@ auto DDS::HandleCmd(const string& id, sdk::cmd::Cmd& cmd, const string& cond, ui
|
||||||
auto _cmd = static_cast<cmd::GetProperties&>(cmd);
|
auto _cmd = static_cast<cmd::GetProperties&>(cmd);
|
||||||
auto const request_id(_cmd.GetRequestId());
|
auto const request_id(_cmd.GetRequestId());
|
||||||
auto result(Result::Ok);
|
auto result(Result::Ok);
|
||||||
std::vector<std::pair<std::string, std::string>> props;
|
vector<pair<string, string>> props;
|
||||||
try {
|
try {
|
||||||
for (auto const& prop : GetPropertiesAsString(_cmd.GetQuery())) {
|
for (auto const& prop : GetPropertiesAsString(_cmd.GetQuery())) {
|
||||||
props.push_back({prop.first, prop.second});
|
props.push_back({prop.first, prop.second});
|
||||||
}
|
}
|
||||||
} catch (std::exception const& e) {
|
} catch (exception const& e) {
|
||||||
LOG(warn) << "Getting properties (request id: " << request_id << ") failed: " << e.what();
|
LOG(warn) << "Getting properties (request id: " << request_id << ") failed: " << e.what();
|
||||||
result = Result::Failure;
|
result = Result::Failure;
|
||||||
}
|
}
|
||||||
|
@ -407,7 +428,7 @@ auto DDS::HandleCmd(const string& id, sdk::cmd::Cmd& cmd, const string& cond, ui
|
||||||
}
|
}
|
||||||
// TODO Handle builtin keys with different value type than string
|
// TODO Handle builtin keys with different value type than string
|
||||||
SetProperties(props);
|
SetProperties(props);
|
||||||
} catch (std::exception const& e) {
|
} catch (exception const& e) {
|
||||||
LOG(warn) << "Setting properties (request id: " << request_id << ") failed: " << e.what();
|
LOG(warn) << "Setting properties (request id: " << request_id << ") failed: " << e.what();
|
||||||
result = Result::Failure;
|
result = Result::Failure;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,12 +24,12 @@
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <set>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <utility> // pair
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace fair
|
namespace fair
|
||||||
|
@ -159,7 +159,7 @@ class DDS : public Plugin
|
||||||
|
|
||||||
std::atomic<bool> fDeviceTerminationRequested;
|
std::atomic<bool> fDeviceTerminationRequested;
|
||||||
|
|
||||||
std::set<uint64_t> fStateChangeSubscribers;
|
std::unordered_map<uint64_t, std::pair<std::chrono::steady_clock::time_point, int64_t>> fStateChangeSubscribers;
|
||||||
uint64_t fLastExternalController;
|
uint64_t fLastExternalController;
|
||||||
bool fExitingAckedByLastExternalController;
|
bool fExitingAckedByLastExternalController;
|
||||||
std::condition_variable fExitingAcked;
|
std::condition_variable fExitingAcked;
|
||||||
|
|
|
@ -53,7 +53,7 @@ struct StateSubscription
|
||||||
explicit StateSubscription(pmix::Commands& commands)
|
explicit StateSubscription(pmix::Commands& commands)
|
||||||
: fCommands(commands)
|
: fCommands(commands)
|
||||||
{
|
{
|
||||||
fCommands.Send(Cmds(make<SubscribeToStateChange>()).Serialize(Format::JSON));
|
fCommands.Send(Cmds(make<SubscribeToStateChange>(600000)).Serialize(Format::JSON));
|
||||||
}
|
}
|
||||||
|
|
||||||
~StateSubscription()
|
~StateSubscription()
|
||||||
|
|
|
@ -175,6 +175,8 @@ class BasicTopology : public AsioBase<Executor, Allocator>
|
||||||
, fDDSTopo(std::move(topo))
|
, fDDSTopo(std::move(topo))
|
||||||
, fStateData()
|
, fStateData()
|
||||||
, fStateIndex()
|
, fStateIndex()
|
||||||
|
, fHeartbeatsTimer(asio::system_executor())
|
||||||
|
, fHeartbeatInterval(600000)
|
||||||
{
|
{
|
||||||
makeTopologyState();
|
makeTopologyState();
|
||||||
|
|
||||||
|
@ -213,16 +215,36 @@ class BasicTopology : public AsioBase<Executor, Allocator>
|
||||||
|
|
||||||
void SubscribeToStateChanges()
|
void SubscribeToStateChanges()
|
||||||
{
|
{
|
||||||
using namespace fair::mq::sdk::cmd;
|
|
||||||
// FAIR_LOG(debug) << "Subscribing to state change";
|
// FAIR_LOG(debug) << "Subscribing to state change";
|
||||||
Cmds cmds(make<SubscribeToStateChange>());
|
cmd::Cmds cmds(cmd::make<cmd::SubscribeToStateChange>(fHeartbeatInterval.count()));
|
||||||
fDDSSession.SendCommand(cmds.Serialize());
|
fDDSSession.SendCommand(cmds.Serialize());
|
||||||
|
|
||||||
|
fHeartbeatsTimer.expires_after(fHeartbeatInterval);
|
||||||
|
fHeartbeatsTimer.async_wait(std::bind(&BasicTopology::SendSubscriptionHeartbeats, this, std::placeholders::_1));
|
||||||
|
}
|
||||||
|
|
||||||
|
void SendSubscriptionHeartbeats(const std::error_code& ec)
|
||||||
|
{
|
||||||
|
if (!ec) {
|
||||||
|
// Timer expired.
|
||||||
|
fDDSSession.SendCommand(cmd::Cmds(cmd::make<cmd::SubscriptionHeartbeat>(fHeartbeatInterval.count())).Serialize());
|
||||||
|
// schedule again
|
||||||
|
fHeartbeatsTimer.expires_after(fHeartbeatInterval);
|
||||||
|
fHeartbeatsTimer.async_wait(std::bind(&BasicTopology::SendSubscriptionHeartbeats, this, std::placeholders::_1));
|
||||||
|
} else if (ec == asio::error::operation_aborted) {
|
||||||
|
// FAIR_LOG(debug) << "Heartbeats timer canceled";
|
||||||
|
} else {
|
||||||
|
FAIR_LOG(error) << "Timer error: " << ec;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnsubscribeFromStateChanges()
|
void UnsubscribeFromStateChanges()
|
||||||
{
|
{
|
||||||
using namespace fair::mq::sdk::cmd;
|
// stop sending heartbeats
|
||||||
fDDSSession.SendCommand(Cmds(make<UnsubscribeFromStateChange>()).Serialize());
|
fHeartbeatsTimer.cancel();
|
||||||
|
|
||||||
|
// unsubscribe from state changes
|
||||||
|
fDDSSession.SendCommand(cmd::Cmds(cmd::make<cmd::UnsubscribeFromStateChange>()).Serialize());
|
||||||
|
|
||||||
// wait for all tasks to confirm unsubscription
|
// wait for all tasks to confirm unsubscription
|
||||||
std::unique_lock<std::mutex> lk(fMtx);
|
std::unique_lock<std::mutex> lk(fMtx);
|
||||||
|
@ -309,10 +331,8 @@ class BasicTopology : public AsioBase<Executor, Allocator>
|
||||||
|
|
||||||
auto HandleCmd(cmd::StateChange const& cmd, DDSChannel::Id const& senderId) -> void
|
auto HandleCmd(cmd::StateChange const& cmd, DDSChannel::Id const& senderId) -> void
|
||||||
{
|
{
|
||||||
using namespace fair::mq::sdk::cmd;
|
|
||||||
|
|
||||||
if (cmd.GetCurrentState() == DeviceState::Exiting) {
|
if (cmd.GetCurrentState() == DeviceState::Exiting) {
|
||||||
fDDSSession.SendCommand(Cmds(make<StateChangeExitingReceived>()).Serialize(), senderId);
|
fDDSSession.SendCommand(cmd::Cmds(cmd::make<cmd::StateChangeExitingReceived>()).Serialize(), senderId);
|
||||||
}
|
}
|
||||||
|
|
||||||
DDSTask::Id taskId(cmd.GetTaskId());
|
DDSTask::Id taskId(cmd.GetTaskId());
|
||||||
|
@ -1193,6 +1213,9 @@ class BasicTopology : public AsioBase<Executor, Allocator>
|
||||||
return {ec, failed};
|
return {ec, failed};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Duration GetHeartbeatInterval() const { return fHeartbeatInterval; }
|
||||||
|
void SetHeartbeatInterval(Duration duration) { fHeartbeatInterval = duration; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
using TransitionedCount = unsigned int;
|
using TransitionedCount = unsigned int;
|
||||||
|
|
||||||
|
@ -1200,8 +1223,12 @@ class BasicTopology : public AsioBase<Executor, Allocator>
|
||||||
DDSTopology fDDSTopo;
|
DDSTopology fDDSTopo;
|
||||||
TopologyState fStateData;
|
TopologyState fStateData;
|
||||||
TopologyStateIndex fStateIndex;
|
TopologyStateIndex fStateIndex;
|
||||||
|
|
||||||
mutable std::mutex fMtx;
|
mutable std::mutex fMtx;
|
||||||
|
|
||||||
std::condition_variable fStateChangeUnsubscriptionCV;
|
std::condition_variable fStateChangeUnsubscriptionCV;
|
||||||
|
asio::steady_timer fHeartbeatsTimer;
|
||||||
|
Duration fHeartbeatInterval;
|
||||||
|
|
||||||
std::unordered_map<typename ChangeStateOp::Id, ChangeStateOp> fChangeStateOps;
|
std::unordered_map<typename ChangeStateOp::Id, ChangeStateOp> fChangeStateOps;
|
||||||
std::unordered_map<typename WaitForStateOp::Id, WaitForStateOp> fWaitForStateOps;
|
std::unordered_map<typename WaitForStateOp::Id, WaitForStateOp> fWaitForStateOps;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user