mirror of
https://github.com/FairRootGroup/FairMQ.git
synced 2025-10-13 16:46:47 +00:00
shm: eliminate race/deadlock in region subscriptions
This commit is contained in:
parent
2db2516141
commit
10fca7d456
|
@ -384,10 +384,10 @@ class Manager
|
|||
|
||||
void RemoveRegion(const uint16_t id)
|
||||
{
|
||||
fRegions.erase(id);
|
||||
{
|
||||
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fShmMtx);
|
||||
fShmRegions->at(id).fDestroyed = true;
|
||||
fRegions.erase(id);
|
||||
(fEventCounter->fCount)++;
|
||||
}
|
||||
fRegionsGen += 1; // signal TL cache invalidation
|
||||
|
@ -483,19 +483,26 @@ class Manager
|
|||
auto infos = GetRegionInfoUnsafe();
|
||||
for (const auto& i : infos) {
|
||||
auto el = fObservedRegionEvents.find({i.id, i.managed});
|
||||
if (el == fObservedRegionEvents.end()) {
|
||||
fRegionEventCallback(i);
|
||||
if (el == fObservedRegionEvents.end()) { // if event id has not been observed
|
||||
fObservedRegionEvents.emplace(std::make_pair(i.id, i.managed), i.event);
|
||||
++fNumObservedEvents;
|
||||
} else {
|
||||
// if a region has been created and destroyed rapidly, we could see 'destroyed' without ever seeing 'created'
|
||||
// TODO: do we care to show 'created' events if we know region is already destroyed?
|
||||
if (i.event == RegionEvent::created) {
|
||||
fRegionEventCallback(i);
|
||||
++fNumObservedEvents;
|
||||
} else {
|
||||
fNumObservedEvents += 2;
|
||||
}
|
||||
} else { // if event id has been observed (expected - there are two events per id - created & destroyed)
|
||||
// fire a callback if we have observed 'created' event and incoming is 'destroyed'
|
||||
if (el->second == RegionEvent::created && i.event == RegionEvent::destroyed) {
|
||||
fRegionEventCallback(i);
|
||||
el->second = i.event;
|
||||
++fNumObservedEvents;
|
||||
} else {
|
||||
// LOG(debug) << "ignoring event for id" << i.id << ":";
|
||||
// LOG(debug) << "incoming event: " << i.event;
|
||||
// LOG(debug) << "stored event: " << el->second;
|
||||
// LOG(debug) << "ignoring event for id " << i.id << ":"
|
||||
// << " incoming: " << i.event << ","
|
||||
// << " stored: " << el->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -688,7 +695,7 @@ class Manager
|
|||
std::thread fRegionEventThread;
|
||||
bool fRegionEventsSubscriptionActive;
|
||||
std::function<void(fair::mq::RegionInfo)> fRegionEventCallback;
|
||||
std::map<std::pair<uint16_t, bool>, RegionEvent> fObservedRegionEvents;
|
||||
std::map<std::pair<uint16_t, bool>, RegionEvent> fObservedRegionEvents; // pair: <region id, managed>
|
||||
uint64_t fNumObservedEvents;
|
||||
|
||||
DeviceCounter* fDeviceCounter;
|
||||
|
|
|
@ -106,7 +106,7 @@ struct Region
|
|||
|
||||
InitializeQueues();
|
||||
StartSendingAcks();
|
||||
LOG(debug) << "shmem: initialized region: " << fName;
|
||||
LOG(trace) << "shmem: initialized region: " << fName << " (" << (fRemote ? "remote" : "local") << ")";
|
||||
}
|
||||
|
||||
Region() = delete;
|
||||
|
@ -123,7 +123,7 @@ struct Region
|
|||
} else {
|
||||
fQueue = std::make_unique<message_queue>(create_only, fQueueName.c_str(), 1024, fAckBunchSize * sizeof(RegionBlock));
|
||||
}
|
||||
LOG(debug) << "shmem: initialized region queue: " << fQueueName;
|
||||
LOG(trace) << "shmem: initialized region queue: " << fQueueName << " (" << (fRemote ? "remote" : "local") << ")";
|
||||
}
|
||||
|
||||
void StartSendingAcks() { fAcksSender = std::thread(&Region::SendAcks, this); }
|
||||
|
@ -238,11 +238,11 @@ struct Region
|
|||
}
|
||||
|
||||
if (boost::interprocess::shared_memory_object::remove(fName.c_str())) {
|
||||
LOG(debug) << "Region '" << fName << "' destroyed.";
|
||||
LOG(trace) << "Region '" << fName << "' destroyed.";
|
||||
}
|
||||
|
||||
if (boost::interprocess::file_mapping::remove(fName.c_str())) {
|
||||
LOG(debug) << "File mapping '" << fName << "' destroyed.";
|
||||
LOG(trace) << "File mapping '" << fName << "' destroyed.";
|
||||
}
|
||||
|
||||
if (fFile) {
|
||||
|
@ -250,14 +250,13 @@ struct Region
|
|||
}
|
||||
|
||||
if (boost::interprocess::message_queue::remove(fQueueName.c_str())) {
|
||||
LOG(debug) << "Region queue '" << fQueueName << "' destroyed.";
|
||||
LOG(trace) << "Region queue '" << fQueueName << "' destroyed.";
|
||||
}
|
||||
} else {
|
||||
// LOG(debug) << "shmem: region '" << fName << "' is remote, no cleanup necessary.";
|
||||
LOG(debug) << "Region queue '" << fQueueName << "' is remote, no cleanup necessary";
|
||||
// LOG(debug) << "Region queue '" << fQueueName << "' is remote, no cleanup necessary";
|
||||
}
|
||||
|
||||
LOG(debug) << "Region '" << fName << "' (" << (fRemote ? "remote" : "local") << ") destructed.";
|
||||
// LOG(debug) << "Region '" << fName << "' (" << (fRemote ? "remote" : "local") << ") destructed.";
|
||||
}
|
||||
|
||||
bool fRemote;
|
||||
|
|
Loading…
Reference in New Issue
Block a user