Compare commits

..

19 Commits

Author SHA1 Message Date
Alexey Rybalchenko
f6bade32bb modify keep-alive example executable a bit, make it configurable 2022-01-12 19:54:49 +01:00
Alexey Rybalchenko
ddf9bc7272 shm: keep mng segment around when skipping cleanup 2022-01-12 19:54:49 +01:00
Alexey Rybalchenko
f79a0714b4 shm: fix double unlock() 2022-01-12 19:54:49 +01:00
Alexey Rybalchenko
c04958e2a4 shm: reduce contention on region events 2022-01-10 19:42:08 +01:00
Alexey Rybalchenko
692576a5b1 shm: add APIs for implementing keep-alive process 2021-12-16 16:27:07 +01:00
Alexey Rybalchenko
eb4620b1ec shm: always open_or_create segment 2021-12-16 16:27:07 +01:00
Alexey Rybalchenko
9f9583eb55 shm: hide picosha2 from header 2021-12-16 16:27:07 +01:00
Alexey Rybalchenko
08ba068791 shm: remove unused member 2021-12-16 16:27:07 +01:00
Alexey Rybalchenko
1839f7e8c0 shm: integrate mtx and cv into management segment 2021-12-16 16:27:07 +01:00
Alexey Rybalchenko
80ed45df63 extend region config 2021-12-16 16:27:07 +01:00
Alexey Rybalchenko
eef42d2dea simplify region cleanup 2021-12-16 16:27:07 +01:00
Alexey Rybalchenko
d630fbb1e4 consolidate UnmanagedRegion options 2021-12-16 16:27:07 +01:00
Giulio Eulisse
acfb495411 Do not print logo, if not requested 2021-12-14 11:26:10 +01:00
Alexey Rybalchenko
953c4a75c8 refactor: deduplicate more zmq/shmem code 2021-12-06 09:45:39 +01:00
Alexey Rybalchenko
f24dee33c2 Add configurable default snd/rcv timeout 2021-12-06 09:45:39 +01:00
Alexey Rybalchenko
856780f88a fix: install tools/Exceptions.h 2021-11-12 13:20:48 +01:00
Alexey Rybalchenko
dbdf17c661 Avoid accessing Device.fChannels directly, use getters 2021-11-03 20:23:40 +01:00
Alexey Rybalchenko
a3bb5fb4b0 feat: Add Device::GetNumSubChannels(channel) 2021-11-03 20:23:40 +01:00
Alexey Rybalchenko
0eaea3c66f Do not catch and rethrow exception from state handlers 2021-11-03 20:23:40 +01:00
48 changed files with 1597 additions and 944 deletions

View File

@@ -19,7 +19,7 @@ struct Sampler : fair::mq::Device
{
void InitTask() override
{
fNumDataChannels = fChannels.at("data").size();
fNumDataChannels = GetNumSubChannels("data");
fMaxIterations = fConfig->GetProperty<uint64_t>("max-iterations");
}

View File

@@ -28,7 +28,7 @@ struct Sender : fair::mq::Device
void Run() override
{
FairMQChannel& dataInChannel = fChannels.at("sync").at(0);
FairMQChannel& dataInChannel = GetChannel("sync", 0);
while (!NewStatePending()) {
Header h;

View File

@@ -26,7 +26,7 @@ struct Receiver : Device
void Run() override
{
Channel& dataInChannel = fChannels.at("sr").at(0);
Channel& dataInChannel = GetChannel("sr", 0);
while (!NewStatePending()) {
auto msg(dataInChannel.NewMessage());

View File

@@ -12,7 +12,10 @@ target_link_libraries(fairmq-ex-region-sampler PRIVATE FairMQ)
add_executable(fairmq-ex-region-sink sink.cxx)
target_link_libraries(fairmq-ex-region-sink PRIVATE FairMQ)
add_custom_target(ExampleRegion DEPENDS fairmq-ex-region-sampler fairmq-ex-region-sink)
add_executable(fairmq-ex-region-keep-alive keep-alive.cxx)
target_link_libraries(fairmq-ex-region-keep-alive PRIVATE FairMQ)
add_custom_target(ExampleRegion DEPENDS fairmq-ex-region-sampler fairmq-ex-region-sink fairmq-ex-region-keep-alive)
set(EX_BIN_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(FAIRMQ_BIN_DIR ${CMAKE_BINARY_DIR}/fairmq)

View File

@@ -0,0 +1,144 @@
/********************************************************************************
* Copyright (C) 2021 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
* *
* This software is distributed under the terms of the *
* GNU Lesser General Public Licence (LGPL) version 3, *
* copied verbatim in the file "LICENSE" *
********************************************************************************/
#include <fairmq/shmem/Common.h>
#include <fairmq/shmem/UnmanagedRegion.h>
#include <fairmq/shmem/Segment.h>
#include <fairmq/shmem/Monitor.h>
#include <fairmq/tools/Unique.h>
#include <fairlogger/Logger.h>
#include <boost/algorithm/string.hpp>
#include <boost/program_options.hpp>
#include <csignal>
#include <chrono>
#include <map>
#include <string>
#include <thread>
using namespace std;
using namespace boost::program_options;
namespace
{
volatile sig_atomic_t gStopping = 0;
}
void signalHandler(int /* signal */)
{
gStopping = 1;
}
struct ShmManager
{
ShmManager(uint64_t _shmId, const vector<string>& _segments, const vector<string>& _regions)
: shmId(fair::mq::shmem::makeShmIdStr(_shmId))
{
for (const auto& s : _segments) {
vector<string> segmentConf;
boost::algorithm::split(segmentConf, s, boost::algorithm::is_any_of(","));
if (segmentConf.size() != 2) {
LOG(error) << "incorrect format for --segments. Expecting pairs of <id>,<size>.";
fair::mq::shmem::Monitor::Cleanup(fair::mq::shmem::ShmId{shmId});
throw runtime_error("incorrect format for --segments. Expecting pairs of <id>,<size>.");
}
uint16_t id = stoi(segmentConf.at(0));
uint64_t size = stoull(segmentConf.at(1));
auto ret = segments.emplace(id, fair::mq::shmem::Segment(shmId, id, size, fair::mq::shmem::rbTreeBestFit));
fair::mq::shmem::Segment& segment = ret.first->second;
LOG(info) << "Created segment " << id << " of size " << segment.GetSize() << ", starting at " << segment.GetData() << ". Locking...";
segment.Lock();
LOG(info) << "Done.";
LOG(info) << "Zeroing...";
segment.Zero();
LOG(info) << "Done.";
}
for (const auto& r : _regions) {
vector<string> regionConf;
boost::algorithm::split(regionConf, r, boost::algorithm::is_any_of(","));
if (regionConf.size() != 2) {
LOG(error) << "incorrect format for --regions. Expecting pairs of <id>,<size>.";
fair::mq::shmem::Monitor::Cleanup(fair::mq::shmem::ShmId{shmId});
throw runtime_error("incorrect format for --regions. Expecting pairs of <id>,<size>.");
}
uint16_t id = stoi(regionConf.at(0));
uint64_t size = stoull(regionConf.at(1));
auto ret = regions.emplace(id, make_unique<fair::mq::shmem::UnmanagedRegion>(shmId, id, size));
fair::mq::shmem::UnmanagedRegion& region = *(ret.first->second);
LOG(info) << "Created unamanged region " << id << " of size " << region.GetSize() << ", starting at " << region.GetData() << ". Locking...";
region.Lock();
LOG(info) << "Done.";
LOG(info) << "Zeroing...";
region.Zero();
LOG(info) << "Done.";
}
}
void ResetContent()
{
fair::mq::shmem::Monitor::ResetContent(fair::mq::shmem::ShmId{shmId});
}
~ShmManager()
{
// clean all segments, regions and any other shmem objects belonging to this shmId
fair::mq::shmem::Monitor::Cleanup(fair::mq::shmem::ShmId{shmId});
}
std::string shmId;
map<uint16_t, fair::mq::shmem::Segment> segments;
map<uint16_t, unique_ptr<fair::mq::shmem::UnmanagedRegion>> regions;
};
int main(int argc, char** argv)
{
fair::Logger::SetConsoleColor(true);
signal(SIGINT, signalHandler);
signal(SIGTERM, signalHandler);
try {
uint64_t shmId = 0;
vector<string> segments;
vector<string> regions;
options_description desc("Options");
desc.add_options()
("shmid", value<uint64_t>(&shmId)->required(), "Shm id")
("segments", value<vector<string>>(&segments)->multitoken()->composing(), "Segments, as <id>,<size> <id>,<size> <id>,<size> ...")
("regions", value<vector<string>>(&regions)->multitoken()->composing(), "Regions, as <id>,<size> <id>,<size> <id>,<size> ...")
("help,h", "Print help");
variables_map vm;
store(parse_command_line(argc, argv, desc), vm);
if (vm.count("help")) {
LOG(info) << "ShmManager" << "\n" << desc;
return 0;
}
notify(vm);
ShmManager shmManager(shmId, segments, regions);
while (!gStopping) {
std::this_thread::sleep_for(std::chrono::milliseconds(50));
}
LOG(info) << "stopping.";
} catch (exception& e) {
LOG(error) << "Unhandled Exception reached the top of main: " << e.what() << ", application will now exit";
return 2;
}
return 0;
}

View File

@@ -23,7 +23,7 @@ struct Sampler : fair::mq::Device
fLinger = fConfig->GetProperty<uint32_t>("region-linger");
fMaxIterations = fConfig->GetProperty<uint64_t>("max-iterations");
fChannels.at("data").at(0).Transport()->SubscribeToRegionEvents([](FairMQRegionInfo info) {
GetChannel("data", 0).Transport()->SubscribeToRegionEvents([](FairMQRegionInfo info) {
LOG(info) << "Region event: " << info.event << ": "
<< (info.managed ? "managed" : "unmanaged")
<< ", id: " << info.id
@@ -32,22 +32,20 @@ struct Sampler : fair::mq::Device
<< ", flags: " << info.flags;
});
fair::mq::RegionConfig regionCfg;
regionCfg.linger = fLinger; // delay in ms before region destruction to collect outstanding events
regionCfg.lock = true; // mlock region after creation
regionCfg.zero = true; // zero region content after creation
fRegion = FairMQUnmanagedRegionPtr(NewUnmanagedRegionFor("data", // region is created using the transport of this channel...
0, // ... and this sub-channel
10000000, // region size
[this](const std::vector<fair::mq::RegionBlock>& blocks) { // callback to be called when message buffers no longer needed by transport
std::lock_guard<std::mutex> lock(fMtx);
fNumUnackedMsgs -= blocks.size();
if (fMaxIterations > 0) {
LOG(info) << "Received " << blocks.size() << " acks";
}
},
"", // path, if a region is backed by a file
0, // flags that are passed for region creation
fair::mq::RegionConfig{true, true} // additional config: { call mlock on the region, zero the region memory }
));
fRegion->SetLinger(fLinger);
}, regionCfg));
}
bool ConditionalRun() override
@@ -87,7 +85,7 @@ struct Sampler : fair::mq::Device
LOG(info) << "All acknowledgements received.";
}
}
fChannels.at("data").at(0).Transport()->UnsubscribeFromRegionEvents();
GetChannel("data", 0).Transport()->UnsubscribeFromRegionEvents();
}
private:

View File

@@ -22,7 +22,7 @@ struct Sink : Device
{
// Get the fMaxIterations value from the command line options (via fConfig)
fMaxIterations = fConfig->GetProperty<uint64_t>("max-iterations");
fChannels.at("data").at(0).Transport()->SubscribeToRegionEvents([](RegionInfo info) {
GetChannel("data", 0).Transport()->SubscribeToRegionEvents([](RegionInfo info) {
LOG(info) << "Region event: " << info.event << ": "
<< (info.managed ? "managed" : "unmanaged") << ", id: " << info.id
<< ", ptr: " << info.ptr << ", size: " << info.size
@@ -32,7 +32,7 @@ struct Sink : Device
void Run() override
{
Channel& dataInChannel = fChannels.at("data").at(0);
Channel& dataInChannel = GetChannel("data", 0);
while (!NewStatePending()) {
auto msg(dataInChannel.Transport()->CreateMessage());
@@ -51,7 +51,7 @@ struct Sink : Device
void ResetTask() override
{
fChannels.at("data").at(0).Transport()->UnsubscribeFromRegionEvents();
GetChannel("data", 0).Transport()->UnsubscribeFromRegionEvents();
}
private:

View File

@@ -30,6 +30,7 @@ if(BUILD_FAIRMQ OR BUILD_SDK)
set(TOOLS_PUBLIC_HEADER_FILES
tools/CppSTL.h
tools/Exceptions.h
tools/InstanceLimit.h
tools/IO.h
tools/Network.h
@@ -191,6 +192,9 @@ if(BUILD_FAIRMQ)
runDevice.h
runFairMQDevice.h
shmem/Monitor.h
shmem/Common.h
shmem/UnmanagedRegion.h
shmem/Segment.h
)
set(FAIRMQ_PRIVATE_HEADER_FILES
@@ -205,12 +209,10 @@ if(BUILD_FAIRMQ)
plugins/control/Control.h
shmem/Message.h
shmem/Poller.h
shmem/UnmanagedRegion.h
shmem/UnmanagedRegionImpl.h
shmem/Socket.h
shmem/TransportFactory.h
shmem/Common.h
shmem/Manager.h
shmem/Region.h
zeromq/Common.h
zeromq/Context.h
zeromq/Message.h
@@ -248,6 +250,7 @@ if(BUILD_FAIRMQ)
TransportFactory.cxx
plugins/config/Config.cxx
plugins/control/Control.cxx
shmem/Common.cxx
shmem/Manager.cxx
shmem/Monitor.cxx
)
@@ -384,7 +387,7 @@ if(BUILD_FAIRMQ)
fairmq_target_tidy(TARGET fairmq-splitter)
endif()
add_executable(fairmq-shmmonitor shmem/Monitor.cxx shmem/Monitor.h shmem/runMonitor.cxx)
add_executable(fairmq-shmmonitor shmem/Common.cxx shmem/Monitor.cxx shmem/Monitor.h shmem/runMonitor.cxx)
target_compile_features(fairmq-shmmonitor PUBLIC cxx_std_17)
target_compile_definitions(fairmq-shmmonitor PUBLIC BOOST_ERROR_CODE_HEADER_ONLY)
if(FAIRMQ_DEBUG_MODE)

View File

@@ -39,6 +39,8 @@ constexpr int Channel::DefaultSndBufSize;
constexpr int Channel::DefaultRcvBufSize;
constexpr int Channel::DefaultSndKernelSize;
constexpr int Channel::DefaultRcvKernelSize;
constexpr int Channel::DefaultSndTimeoutMs;
constexpr int Channel::DefaultRcvTimeoutMs;
constexpr int Channel::DefaultLinger;
constexpr int Channel::DefaultRateLogging;
constexpr int Channel::DefaultPortRangeMin;
@@ -73,6 +75,8 @@ Channel::Channel(string name, string type, string method, string address, shared
, fRcvBufSize(DefaultRcvBufSize)
, fSndKernelSize(DefaultSndKernelSize)
, fRcvKernelSize(DefaultRcvKernelSize)
, fSndTimeoutMs(DefaultSndTimeoutMs)
, fRcvTimeoutMs(DefaultRcvTimeoutMs)
, fLinger(DefaultLinger)
, fRateLogging(DefaultRateLogging)
, fPortRangeMin(DefaultPortRangeMin)
@@ -97,6 +101,8 @@ Channel::Channel(const string& name, int index, const Properties& properties)
fRcvBufSize = GetPropertyOrDefault(properties, string(prefix + "rcvBufSize"), DefaultRcvBufSize);
fSndKernelSize = GetPropertyOrDefault(properties, string(prefix + "sndKernelSize"), DefaultSndKernelSize);
fRcvKernelSize = GetPropertyOrDefault(properties, string(prefix + "rcvKernelSize"), DefaultRcvKernelSize);
fSndTimeoutMs = GetPropertyOrDefault(properties, string(prefix + "sndTimeoutMs"), DefaultSndTimeoutMs);
fRcvTimeoutMs = GetPropertyOrDefault(properties, string(prefix + "rcvTimeoutMs"), DefaultRcvTimeoutMs);
fLinger = GetPropertyOrDefault(properties, string(prefix + "linger"), DefaultLinger);
fRateLogging = GetPropertyOrDefault(properties, string(prefix + "rateLogging"), DefaultRateLogging);
fPortRangeMin = GetPropertyOrDefault(properties, string(prefix + "portRangeMin"), DefaultPortRangeMin);
@@ -120,6 +126,8 @@ Channel::Channel(const Channel& chan, string newName)
, fRcvBufSize(chan.fRcvBufSize)
, fSndKernelSize(chan.fSndKernelSize)
, fRcvKernelSize(chan.fRcvKernelSize)
, fSndTimeoutMs(chan.fSndTimeoutMs)
, fRcvTimeoutMs(chan.fRcvTimeoutMs)
, fLinger(chan.fLinger)
, fRateLogging(chan.fRateLogging)
, fPortRangeMin(chan.fPortRangeMin)
@@ -146,6 +154,8 @@ Channel& Channel::operator=(const Channel& chan)
fRcvBufSize = chan.fRcvBufSize;
fSndKernelSize = chan.fSndKernelSize;
fRcvKernelSize = chan.fRcvKernelSize;
fSndTimeoutMs = chan.fSndTimeoutMs;
fRcvTimeoutMs = chan.fRcvTimeoutMs;
fLinger = chan.fLinger;
fRateLogging = chan.fRateLogging;
fPortRangeMin = chan.fPortRangeMin;

View File

@@ -166,6 +166,14 @@ class Channel
/// @return Returns socket kernel transmit receive buffer size (in bytes)
int GetRcvKernelSize() const { return fRcvKernelSize; }
/// Get socket default send timeout (in ms)
/// @return Returns socket default send timeout (in ms)
int GetSndTimeout() const { return fSndTimeoutMs; }
/// Get socket default receive timeout (in ms)
/// @return Returns socket default receive timeout (in ms)
int GetRcvTimeout() const { return fRcvTimeoutMs; }
/// Get linger duration (in milliseconds)
/// @return Returns linger duration (in milliseconds)
int GetLinger() const { return fLinger; }
@@ -230,6 +238,14 @@ class Channel
/// @param rcvKernelSize Socket receive buffer size (in bytes)
void UpdateRcvKernelSize(int rcvKernelSize) { fRcvKernelSize = rcvKernelSize; Invalidate(); }
/// Set socket default send timeout (in ms)
/// @param sndTimeoutMs Socket default send timeout (in ms)
void UpdateSndTimeout(int sndTimeoutMs) { fSndTimeoutMs = sndTimeoutMs; Invalidate(); }
/// Set socket default receive timeout (in ms)
/// @param rcvTimeoutMs Socket default receive timeout (in ms)
void UpdateRcvTimeout(int rcvTimeoutMs) { fRcvTimeoutMs = rcvTimeoutMs; Invalidate(); }
/// Set linger duration (in milliseconds)
/// @param duration linger duration (in milliseconds)
void UpdateLinger(int duration) { fLinger = duration; Invalidate(); }
@@ -267,62 +283,52 @@ class Channel
/// invalidates the channel (requires validation to be used again).
void Invalidate() { fValid = false; }
/// Sends a message to the socket queue.
/// @param msg Constant reference of unique_ptr to a Message
/// @param sndTimeoutInMs send timeout in ms. -1 will wait forever (or until interrupt (e.g. via state change)), 0 will not wait (return immediately if cannot send)
/// @return Number of bytes that have been queued, TransferCode::timeout if timed out, TransferCode::error if there was an error, TransferCode::interrupted if interrupted (e.g. by requested state change)
int64_t Send(MessagePtr& msg, int sndTimeoutInMs = -1)
/// Send message(s) to the socket queue.
/// @param m reference to MessagePtr/Parts/vector<MessagePtr>
/// @param sndTimeoutMs send timeout in ms.
/// -1 will wait forever (or until interrupt (e.g. via state change)),
/// 0 will not wait (return immediately if cannot send).
/// If not provided, default timeout will be taken.
/// @return Number of bytes that have been queued,
/// TransferCode::timeout if timed out,
/// TransferCode::error if there was an error,
/// TransferCode::interrupted if interrupted (e.g. by requested state change)
template<typename M, typename... Timeout>
std::enable_if_t<is_transferrable<M>::value, int64_t>
Send(M& m, Timeout&&... sndTimeoutMs)
{
CheckSendCompatibility(msg);
return fSocket->Send(msg, sndTimeoutInMs);
static_assert(sizeof...(sndTimeoutMs) <= 1, "Send called with too many arguments");
CheckSendCompatibility(m);
int t = fSndTimeoutMs;
if constexpr (sizeof...(sndTimeoutMs) == 1) {
t = {sndTimeoutMs...};
}
return fSocket->Send(m, t);
}
/// Receives a message from the socket queue.
/// @param msg Constant reference of unique_ptr to a Message
/// @param rcvTimeoutInMs receive timeout in ms. -1 will wait forever (or until interrupt (e.g. via state change)), 0 will not wait (return immediately if cannot receive)
/// @return Number of bytes that have been received, TransferCode::timeout if timed out, TransferCode::error if there was an error, TransferCode::interrupted if interrupted (e.g. by requested state change)
int64_t Receive(MessagePtr& msg, int rcvTimeoutInMs = -1)
/// Receive message(s) from the socket queue.
/// @param m reference to MessagePtr/Parts/vector<MessagePtr>
/// @param rcvTimeoutMs receive timeout in ms.
/// -1 will wait forever (or until interrupt (e.g. via state change)),
/// 0 will not wait (return immediately if cannot receive).
/// If not provided, default timeout will be taken.
/// @return Number of bytes that have been received,
/// TransferCode::timeout if timed out,
/// TransferCode::error if there was an error,
/// TransferCode::interrupted if interrupted (e.g. by requested state change)
template<typename M, typename... Timeout>
std::enable_if_t<is_transferrable<M>::value, int64_t>
Receive(M& m, Timeout&&... rcvTimeoutMs)
{
CheckReceiveCompatibility(msg);
return fSocket->Receive(msg, rcvTimeoutInMs);
}
static_assert(sizeof...(rcvTimeoutMs) <= 1, "Receive called with too many arguments");
/// Send a vector of messages
/// @param msgVec message vector reference
/// @param sndTimeoutInMs send timeout in ms. -1 will wait forever (or until interrupt (e.g. via state change)), 0 will not wait (return immediately if cannot send)
/// @return Number of bytes that have been queued, TransferCode::timeout if timed out, TransferCode::error if there was an error, TransferCode::interrupted if interrupted (e.g. by requested state change)
int64_t Send(std::vector<MessagePtr>& msgVec, int sndTimeoutInMs = -1)
{
CheckSendCompatibility(msgVec);
return fSocket->Send(msgVec, sndTimeoutInMs);
}
/// Receive a vector of messages
/// @param msgVec message vector reference
/// @param rcvTimeoutInMs receive timeout in ms. -1 will wait forever (or until interrupt (e.g. via state change)), 0 will not wait (return immediately if cannot receive)
/// @return Number of bytes that have been received, TransferCode::timeout if timed out, TransferCode::error if there was an error, TransferCode::interrupted if interrupted (e.g. by requested state change)
int64_t Receive(std::vector<MessagePtr>& msgVec, int rcvTimeoutInMs = -1)
{
CheckReceiveCompatibility(msgVec);
return fSocket->Receive(msgVec, rcvTimeoutInMs);
}
/// Send Parts
/// @param parts Parts reference
/// @param sndTimeoutInMs send timeout in ms. -1 will wait forever (or until interrupt (e.g. via state change)), 0 will not wait (return immediately if cannot send)
/// @return Number of bytes that have been queued, TransferCode::timeout if timed out, TransferCode::error if there was an error, TransferCode::interrupted if interrupted (e.g. by requested state change)
int64_t Send(Parts& parts, int sndTimeoutInMs = -1)
{
return Send(parts.fParts, sndTimeoutInMs);
}
/// Receive Parts
/// @param parts Parts reference
/// @param rcvTimeoutInMs receive timeout in ms. -1 will wait forever (or until interrupt (e.g. via state change)), 0 will not wait (return immediately if cannot receive)
/// @return Number of bytes that have been received, TransferCode::timeout if timed out, TransferCode::error if there was an error, TransferCode::interrupted if interrupted (e.g. by requested state change)
int64_t Receive(Parts& parts, int rcvTimeoutInMs = -1)
{
return Receive(parts.fParts, rcvTimeoutInMs);
CheckReceiveCompatibility(m);
int t = fRcvTimeoutMs;
if constexpr (sizeof...(rcvTimeoutMs) == 1) {
t = {rcvTimeoutMs...};
}
return fSocket->Receive(m, t);
}
unsigned long GetBytesTx() const { return fSocket->GetBytesTx(); }
@@ -366,6 +372,8 @@ class Channel
static constexpr int DefaultRcvBufSize = 1000;
static constexpr int DefaultSndKernelSize = 0;
static constexpr int DefaultRcvKernelSize = 0;
static constexpr int DefaultSndTimeoutMs = -1;
static constexpr int DefaultRcvTimeoutMs = -1;
static constexpr int DefaultLinger = 500;
static constexpr int DefaultRateLogging = 1;
static constexpr int DefaultPortRangeMin = 22000;
@@ -385,6 +393,8 @@ class Channel
int fRcvBufSize;
int fSndKernelSize;
int fRcvKernelSize;
int fSndTimeoutMs;
int fRcvTimeoutMs;
int fLinger;
int fRateLogging;
int fPortRangeMin;
@@ -414,6 +424,7 @@ class Channel
}
}
void CheckSendCompatibility(Parts& parts) { CheckSendCompatibility(parts.fParts); }
void CheckSendCompatibility(std::vector<MessagePtr>& msgVec)
{
for (auto& msg : msgVec) {
@@ -443,6 +454,7 @@ class Channel
}
}
void CheckReceiveCompatibility(Parts& parts) { CheckReceiveCompatibility(parts.fParts); }
void CheckReceiveCompatibility(std::vector<MessagePtr>& msgVec)
{
for (auto& msg : msgVec) {

View File

@@ -435,7 +435,7 @@ void Device::InitTaskWrapper()
void Device::RunWrapper()
{
LOG(info) << "DEVICE: Running...";
LOG(info) << "fair::mq::Device running...";
// start the rate logger thread
future<void> rateLogger = async(launch::async, &Device::LogSocketRates, this);
@@ -445,46 +445,43 @@ void Device::RunWrapper()
t.second->Resume();
}
try {
PreRun();
// change to Error state in case of an exception, to release LogSocketRates
tools::CallOnDestruction cod([&](){
ChangeState(Transition::ErrorFound);
});
// process either data callbacks or ConditionalRun/Run
if (fDataCallbacks) {
// if only one input channel, do lightweight handling without additional polling.
if (fInputChannelKeys.size() == 1 && fChannels.at(fInputChannelKeys.at(0)).size() == 1) {
HandleSingleChannelInput();
} else {// otherwise do full handling with polling
HandleMultipleChannelInput();
PreRun();
// process either data callbacks or ConditionalRun/Run
if (fDataCallbacks) {
// if only one input channel, do lightweight handling without additional polling.
if (fInputChannelKeys.size() == 1 && fChannels.at(fInputChannelKeys.at(0)).size() == 1) {
HandleSingleChannelInput();
} else {// otherwise do full handling with polling
HandleMultipleChannelInput();
}
} else {
tools::RateLimiter rateLimiter(fRate);
while (!NewStatePending() && ConditionalRun()) {
if (fRate > 0.001) {
rateLimiter.maybe_sleep();
}
} else {
tools::RateLimiter rateLimiter(fRate);
while (!NewStatePending() && ConditionalRun()) {
if (fRate > 0.001) {
rateLimiter.maybe_sleep();
}
}
Run();
}
// if Run() exited and the state is still RUNNING, transition to READY.
if (!NewStatePending()) {
UnblockTransports();
ChangeState(Transition::Stop);
}
PostRun();
} catch (const out_of_range& oor) {
LOG(error) << "out of range: " << oor.what();
LOG(error) << "incorrect/incomplete channel configuration?";
ChangeState(Transition::ErrorFound);
throw;
} catch (...) {
ChangeState(Transition::ErrorFound);
throw;
Run();
}
// if Run() exited and the state is still RUNNING, transition to READY.
if (!NewStatePending()) {
UnblockTransports();
ChangeState(Transition::Stop);
}
PostRun();
cod.disable();
rateLogger.get();
}

View File

@@ -81,72 +81,70 @@ class Device
Deserializer().Deserialize(msg, std::forward<DataType>(data), std::forward<Args>(args)...);
}
/// Shorthand method to send `msg` on `chan` at index `i`
/// @param msg message reference
/// Send `m` on `chan` at index `i`
/// @param m reference to MessagePtr/Parts/vector<MessagePtr>
/// @param chan channel name
/// @param i channel index
/// @param sndTimeoutInMs send timeout in ms, -1 will wait forever (or until interrupt (e.g. via
/// state change)), 0 will not wait (return immediately if cannot send)
/// @return Number of bytes that have been queued, TransferCode::timeout if timed out,
/// TransferCode::error if there was an error, TransferCode::interrupted if interrupted (e.g. by
/// requested state change)
int64_t Send(MessagePtr& msg,
const std::string& channel,
const int index = 0,
int sndTimeoutInMs = -1)
/// @return Number of queued bytes,
/// TransferCode::timeout if timed out,
/// TransferCode::error if there was an error,
/// TransferCode::interrupted if interrupted (e.g. by requested state change)
template<typename M>
std::enable_if_t<is_transferrable<M>::value, int64_t>
Send(M& m, const std::string& channel, const int index = 0)
{
return GetChannel(channel, index).Send(msg, sndTimeoutInMs);
return GetChannel(channel, index).Send(m);
}
/// Shorthand method to receive `msg` on `chan` at index `i`
/// @param msg message reference
/// Receive `m` on `chan` at index `i`
/// @param m reference to MessagePtr/Parts/vector<MessagePtr>
/// @param chan channel name
/// @param i channel index
/// @param rcvTimeoutInMs receive timeout in ms, -1 will wait forever (or until interrupt (e.g.
/// via state change)), 0 will not wait (return immediately if cannot receive)
/// @return Number of bytes that have been received, TransferCode::timeout if timed out,
/// TransferCode::error if there was an error, TransferCode::interrupted if interrupted (e.g. by
/// requested state change)
int64_t Receive(MessagePtr& msg,
const std::string& channel,
const int index = 0,
int rcvTimeoutInMs = -1)
/// @return Number of received bytes,
/// TransferCode::timeout if timed out,
/// TransferCode::error if there was an error,
/// TransferCode::interrupted if interrupted (e.g. by requested state change)
template<typename M>
std::enable_if_t<is_transferrable<M>::value, int64_t>
Receive(M& m, const std::string& channel, const int index = 0)
{
return GetChannel(channel, index).Receive(msg, rcvTimeoutInMs);
return GetChannel(channel, index).Receive(m);
}
/// Shorthand method to send Parts on `chan` at index `i`
/// @param parts parts reference
/// Send `m` on `chan` at index `i`
/// @param m reference to MessagePtr/Parts/vector<MessagePtr>
/// @param chan channel name
/// @param i channel index
/// @param sndTimeoutInMs send timeout in ms, -1 will wait forever (or until interrupt (e.g. via
/// state change)), 0 will not wait (return immediately if cannot send)
/// @return Number of bytes that have been queued, TransferCode::timeout if timed out,
/// TransferCode::error if there was an error, TransferCode::interrupted if interrupted (e.g. by
/// requested state change)
int64_t Send(Parts& parts,
const std::string& channel,
const int index = 0,
int sndTimeoutInMs = -1)
/// @param sndTimeoutMs send timeout in ms,
/// -1 will wait forever (or until interrupt (e.g. via state change)),
/// 0 will not wait (return immediately if cannot send)
/// @return Number of queued bytes,
/// TransferCode::timeout if timed out,
/// TransferCode::error if there was an error,
/// TransferCode::interrupted if interrupted (e.g. by requested state change)
template<typename M>
std::enable_if_t<is_transferrable<M>::value, int64_t>
Send(M& m, const std::string& channel, const int index, int sndTimeoutMs)
{
return GetChannel(channel, index).Send(parts.fParts, sndTimeoutInMs);
return GetChannel(channel, index).Send(m, sndTimeoutMs);
}
/// Shorthand method to receive Parts on `chan` at index `i`
/// @param parts parts reference
/// Receive `m` on `chan` at index `i`
/// @param m reference to MessagePtr/Parts/vector<MessagePtr>
/// @param chan channel name
/// @param i channel index
/// @param rcvTimeoutInMs receive timeout in ms, -1 will wait forever (or until interrupt (e.g.
/// via state change)), 0 will not wait (return immediately if cannot receive)
/// @return Number of bytes that have been received, TransferCode::timeout if timed out,
/// TransferCode::error if there was an error, TransferCode::interrupted if interrupted (e.g. by
/// requested state change)
int64_t Receive(Parts& parts,
const std::string& channel,
const int index = 0,
int rcvTimeoutInMs = -1)
/// @param rcvTimeoutMs receive timeout in ms,
/// -1 will wait forever (or until interrupt (e.g. via state change),
/// 0 will not wait (return immediately if cannot receive)
/// @return Number of received bytes,
/// TransferCode::timeout if timed out,
/// TransferCode::error if there was an error,
/// TransferCode::interrupted if interrupted (e.g. by requested state change)
template<typename M>
std::enable_if_t<is_transferrable<M>::value, int64_t>
Receive(M& m, const std::string& channel, const int index, int rcvTimeoutMs)
{
return GetChannel(channel, index).Receive(parts.fParts, rcvTimeoutInMs);
return GetChannel(channel, index).Receive(m, rcvTimeoutMs);
}
/// @brief Getter for default transport factory
@@ -320,10 +318,15 @@ class Device
try {
return fChannels.at(channelName).at(index);
} catch (const std::out_of_range& oor) {
LOG(error)
<< "requested channel has not been configured? check channel names/configuration.";
LOG(error) << "channel: " << channelName << ", index: " << index;
LOG(error) << "out of range: " << oor.what();
LOG(error) << "GetChannel(): '" << channelName << "[" << index << "]' does not exist.";
throw;
}
size_t GetNumSubChannels(const std::string& channelName)
try {
return fChannels.at(channelName).size();
} catch (const std::out_of_range& oor) {
LOG(error) << "GetNumSubChannels(): '" << channelName << "' does not exist.";
throw;
}

View File

@@ -128,7 +128,7 @@ auto DeviceRunner::Run() -> int
fConfig.ParseAll(fRawCmdLineArgs, true);
if (!HandleGeneralOptions(fConfig)) {
if (!HandleGeneralOptions(fConfig, fPrintLogo)) {
return 0;
}

View File

@@ -96,6 +96,8 @@ void ChannelParser(const ptree& tree, fair::mq::Properties& properties)
commonProperties.emplace("rcvBufSize", cn.second.get<int>("rcvBufSize", FairMQChannel::DefaultRcvBufSize));
commonProperties.emplace("sndKernelSize", cn.second.get<int>("sndKernelSize", FairMQChannel::DefaultSndKernelSize));
commonProperties.emplace("rcvKernelSize", cn.second.get<int>("rcvKernelSize", FairMQChannel::DefaultRcvKernelSize));
commonProperties.emplace("sndTimeoutMs", cn.second.get<int>("sndTimeoutMs", FairMQChannel::DefaultSndTimeoutMs));
commonProperties.emplace("rcvTimeoutMs", cn.second.get<int>("rcvTimeoutMs", FairMQChannel::DefaultRcvTimeoutMs));
commonProperties.emplace("linger", cn.second.get<int>("linger", FairMQChannel::DefaultLinger));
commonProperties.emplace("rateLogging", cn.second.get<int>("rateLogging", FairMQChannel::DefaultRateLogging));
commonProperties.emplace("portRangeMin", cn.second.get<int>("portRangeMin", FairMQChannel::DefaultPortRangeMin));
@@ -146,6 +148,8 @@ void SubChannelParser(const ptree& channelTree, fair::mq::Properties& properties
newProperties["rcvBufSize"] = sn.second.get<int>("rcvBufSize", boost::any_cast<int>(commonProperties.at("rcvBufSize")));
newProperties["sndKernelSize"] = sn.second.get<int>("sndKernelSize", boost::any_cast<int>(commonProperties.at("sndKernelSize")));
newProperties["rcvKernelSize"] = sn.second.get<int>("rcvKernelSize", boost::any_cast<int>(commonProperties.at("rcvKernelSize")));
newProperties["sndTimeoutMs"] = sn.second.get<int>("sndTimeoutMs", boost::any_cast<int>(commonProperties.at("sndTimeoutMs")));
newProperties["rcvTimeoutMs"] = sn.second.get<int>("rcvTimeoutMs", boost::any_cast<int>(commonProperties.at("rcvTimeoutMs")));
newProperties["linger"] = sn.second.get<int>("linger", boost::any_cast<int>(commonProperties.at("linger")));
newProperties["rateLogging"] = sn.second.get<int>("rateLogging", boost::any_cast<int>(commonProperties.at("rateLogging")));
newProperties["portRangeMin"] = sn.second.get<int>("portRangeMin", boost::any_cast<int>(commonProperties.at("portRangeMin")));

View File

@@ -10,9 +10,12 @@
#define FAIR_MQ_SOCKET_H
#include <fairmq/Message.h>
#include <fairmq/Parts.h>
#include <memory>
#include <stdexcept>
#include <string>
#include <type_traits>
#include <vector>
namespace fair::mq {
@@ -27,6 +30,12 @@ enum class TransferCode : int
interrupted = -3
};
template <typename T>
struct is_transferrable : std::disjunction<std::is_same<T, MessagePtr>,
std::is_same<T, std::vector<MessagePtr>>,
std::is_same<T, fair::mq::Parts>>
{};
struct Socket
{
Socket() = default;
@@ -45,6 +54,8 @@ struct Socket
virtual int64_t Receive(MessagePtr& msg, int timeout = -1) = 0;
virtual int64_t Send(std::vector<std::unique_ptr<Message>>& msgVec, int timeout = -1) = 0;
virtual int64_t Receive(std::vector<std::unique_ptr<Message>>& msgVec, int timeout = -1) = 0;
virtual int64_t Send(Parts& parts, int timeout = -1) { return Send(parts.fParts, timeout); }
virtual int64_t Receive(Parts& parts, int timeout = -1) { return Receive(parts.fParts, timeout); }
[[deprecated("Use Socket::~Socket() instead.")]]
virtual void Close() = 0;

View File

@@ -7,6 +7,7 @@
********************************************************************************/
#include <fairmq/StateMachine.h>
#include <fairmq/tools/Exceptions.h>
#include <fairlogger/Logger.h>
@@ -204,6 +205,7 @@ struct Machine_ : public state_machine_def<Machine_>
}
if (fState == State::Error) {
LOG(trace) << "Device transitioned to error state";
throw StateMachine::ErrorStateException("Device transitioned to error state");
}
}
@@ -366,20 +368,18 @@ void StateMachine::ProcessWork()
{
auto fsm = static_pointer_cast<FairMQFSM>(fFsm);
try {
fsm->CallStateChangeCallbacks(State::Idle);
fsm->ProcessWork();
} catch(ErrorStateException& ese) {
LOG(trace) << "ErrorStateException caught in ProcessWork(), rethrowing";
throw;
} catch(...) {
LOG(debug) << "Exception caught in ProcessWork(), going to Error state and rethrowing";
fair::mq::tools::CallOnDestruction cod([&](){
LOG(debug) << "Exception caught in ProcessWork(), going to Error state";
{
lock_guard<mutex> lock(fsm->fStateMtx);
fsm->fState = State::Error;
fsm->CallStateChangeCallbacks(State::Error);
}
ChangeState(Transition::ErrorFound);
throw;
}
});
fsm->CallStateChangeCallbacks(State::Idle);
fsm->ProcessWork();
cod.disable();
}

View File

@@ -38,6 +38,8 @@ enum channelOptionKeyIds
RCVBUFSIZE, // size of the receive queue
SNDKERNELSIZE,
RCVKERNELSIZE,
SNDTIMEOUTMS,
RCVTIMEOUTMS,
LINGER,
RATELOGGING, // logging rate
PORTRANGEMIN,
@@ -57,6 +59,8 @@ constexpr static const char* channelOptionKeys[] = {
/*[RCVBUFSIZE] = */ "rcvBufSize",
/*[SNDKERNELSIZE] = */ "sndKernelSize",
/*[RCVKERNELSIZE] = */ "rcvKernelSize",
/*[SNDTIMEOUTMS] = */ "sndTimeoutMs",
/*[RCVTIMEOUTMS] = */ "rcvTimeoutMs",
/*[LINGER] = */ "linger",
/*[RATELOGGING] = */ "rateLogging",
/*[PORTRANGEMIN] = */ "portRangeMin",

View File

@@ -11,6 +11,7 @@
// IWYU pragma: begin_exports
#include <fairmq/tools/CppSTL.h>
#include <fairmq/tools/Exceptions.h>
#include <fairmq/tools/InstanceLimit.h>
#include <fairmq/tools/Network.h>
#include <fairmq/tools/Process.h>

View File

@@ -109,13 +109,15 @@ class TransportFactory
/// @param path optional parameter to pass to the underlying transport
/// @param flags optional parameter to pass to the underlying transport
/// @return pointer to UnmanagedRegion
// [[deprecated("Use CreateUnmanagedRegion(size_t size, RegionCallback callback, RegionConfig cfg)")]]
virtual UnmanagedRegionPtr CreateUnmanagedRegion(size_t size,
RegionCallback callback = nullptr,
const std::string& path = "",
int flags = 0,
RegionConfig cfg = RegionConfig()) = 0;
// [[deprecated("Use CreateUnmanagedRegion(size_t size, RegionCallback callback, RegionConfig cfg)")]]
virtual UnmanagedRegionPtr CreateUnmanagedRegion(size_t size,
RegionBulkCallback callback = nullptr,
RegionBulkCallback bulkCallback = nullptr,
const std::string& path = "",
int flags = 0,
RegionConfig cfg = RegionConfig()) = 0;
@@ -128,19 +130,35 @@ class TransportFactory
/// @param path optional parameter to pass to the underlying transport
/// @param flags optional parameter to pass to the underlying transport
/// @return pointer to UnmanagedRegion
// [[deprecated("Use CreateUnmanagedRegion(size_t size, RegionCallback callback, RegionConfig cfg)")]]
virtual UnmanagedRegionPtr CreateUnmanagedRegion(size_t size,
int64_t userFlags,
RegionCallback callback = nullptr,
const std::string& path = "",
int flags = 0,
RegionConfig cfg = RegionConfig()) = 0;
// [[deprecated("Use CreateUnmanagedRegion(size_t size, RegionCallback callback, RegionConfig cfg)")]]
virtual UnmanagedRegionPtr CreateUnmanagedRegion(size_t size,
int64_t userFlags,
RegionBulkCallback callback = nullptr,
RegionBulkCallback bulkCallback = nullptr,
const std::string& path = "",
int flags = 0,
RegionConfig cfg = RegionConfig()) = 0;
/// @brief Create new UnmanagedRegion
/// @param size size of the region
/// @param callback callback to be called when a message belonging to this region is no longer needed by the transport
/// @param cfg region configuration
/// @return pointer to UnmanagedRegion
virtual UnmanagedRegionPtr CreateUnmanagedRegion(size_t size, RegionCallback callback, RegionConfig cfg) = 0;
/// @brief Create new UnmanagedRegion
/// @param size size of the region
/// @param bulkCallback callback to be called when message(s) belonging to this region is no longer needed by the transport
/// @param cfg region configuration
/// @return pointer to UnmanagedRegion
virtual UnmanagedRegionPtr CreateUnmanagedRegion(size_t size, RegionBulkCallback bulkCallback, RegionConfig cfg) = 0;
/// @brief Subscribe to region events (creation, destruction, ...)
/// @param callback the callback that is called when a region event occurs
virtual void SubscribeToRegionEvents(RegionEventCallback callback) = 0;

View File

@@ -9,12 +9,16 @@
#ifndef FAIR_MQ_UNMANAGEDREGION_H
#define FAIR_MQ_UNMANAGEDREGION_H
#include <fairmq/Transports.h>
#include <cstddef> // size_t
#include <cstdint> // uint32_t
#include <fairmq/Transports.h>
#include <functional> // std::function
#include <memory> // std::unique_ptr
#include <ostream> // std::ostream
#include <optional> // std::optional
#include <ostream>
#include <string>
#include <vector>
namespace fair::mq {
@@ -119,13 +123,19 @@ struct RegionConfig
{
RegionConfig() = default;
RegionConfig(bool l, bool z)
: lock(l)
, zero(z)
RegionConfig(bool _lock, bool _zero)
: lock(_lock)
, zero(_zero)
{}
bool lock = false;
bool zero = false;
bool lock = false; /// mlock region after creation
bool zero = false; /// zero region content after creation
bool removeOnDestruction = true; /// remove the region on object destruction
int creationFlags = 0; /// flags passed to the underlying transport on region creation
int64_t userFlags = 0; /// custom flags that have no effect on the transport, but can be retrieved from the region by the user
std::string path = ""; /// file path, if the region is backed by a file
std::optional<uint16_t> id = std::nullopt; /// region id
uint32_t linger = 100; /// delay in ms before region destruction to collect outstanding events
};
} // namespace fair::mq

View File

@@ -44,7 +44,7 @@ class BenchmarkSampler : public Device
void Run() override
{
// store the channel reference to avoid traversing the map on every loop iteration
FairMQChannel& dataOutChannel = fChannels.at(fOutChannelName).at(0);
FairMQChannel& dataOutChannel = GetChannel(fOutChannelName, 0);
LOG(info) << "Starting the benchmark with message size of " << fMsgSize << " and " << fMaxIterations << " iterations.";
auto tStart = std::chrono::high_resolution_clock::now();

View File

@@ -43,7 +43,7 @@ class Merger : public Device
void Run() override
{
int numInputs = fChannels.at(fInChannelName).size();
int numInputs = GetNumSubChannels(fInChannelName);
std::vector<FairMQChannel*> chans;

View File

@@ -30,7 +30,7 @@ class Multiplier : public Device
fMultipart = fConfig->GetProperty<bool>("multipart");
fInChannelName = fConfig->GetProperty<std::string>("in-channel");
fOutChannelNames = fConfig->GetProperty<std::vector<std::string>>("out-channel");
fNumOutputs = fChannels.at(fOutChannelNames.at(0)).size();
fNumOutputs = GetNumSubChannels(fOutChannelNames.at(0));
if (fMultipart) {
OnData(fInChannelName, &Multiplier::HandleMultipartData);
@@ -43,7 +43,7 @@ class Multiplier : public Device
bool HandleSingleData(std::unique_ptr<FairMQMessage>& payload, int)
{
for (unsigned int i = 0; i < fOutChannelNames.size() - 1; ++i) { // all except last channel
for (unsigned int j = 0; j < fChannels.at(fOutChannelNames.at(i)).size(); ++j) { // all subChannels in a channel
for (unsigned int j = 0; j < GetNumSubChannels(fOutChannelNames.at(i)); ++j) { // all subChannels in a channel
FairMQMessagePtr msgCopy(fTransportFactory->CreateMessage());
msgCopy->Copy(*payload);
@@ -51,7 +51,7 @@ class Multiplier : public Device
}
}
unsigned int lastChannelSize = fChannels.at(fOutChannelNames.back()).size();
unsigned int lastChannelSize = GetNumSubChannels(fOutChannelNames.back());
for (unsigned int i = 0; i < lastChannelSize - 1; ++i) { // iterate over all except last subChannels of the last channel
FairMQMessagePtr msgCopy(fTransportFactory->CreateMessage());
@@ -68,7 +68,7 @@ class Multiplier : public Device
bool HandleMultipartData(FairMQParts& payload, int)
{
for (unsigned int i = 0; i < fOutChannelNames.size() - 1; ++i) { // all except last channel
for (unsigned int j = 0; j < fChannels.at(fOutChannelNames.at(i)).size(); ++j) { // all subChannels in a channel
for (unsigned int j = 0; j < GetNumSubChannels(fOutChannelNames.at(i)); ++j) { // all subChannels in a channel
FairMQParts parts;
for (int k = 0; k < payload.Size(); ++k) {
@@ -81,7 +81,7 @@ class Multiplier : public Device
}
}
unsigned int lastChannelSize = fChannels.at(fOutChannelNames.back()).size();
unsigned int lastChannelSize = GetNumSubChannels(fOutChannelNames.back());
for (unsigned int i = 0; i < lastChannelSize - 1; ++i) { // iterate over all except last subChannels of the last channel
FairMQParts parts;

View File

@@ -48,7 +48,7 @@ class Sink : public Device
void Run() override
{
// store the channel reference to avoid traversing the map on every loop iteration
FairMQChannel& dataInChannel = fChannels.at(fInChannelName).at(0);
FairMQChannel& dataInChannel = GetChannel(fInChannelName, 0);
LOG(info) << "Starting sink and expecting to receive " << fMaxIterations << " messages.";
auto tStart = std::chrono::high_resolution_clock::now();

View File

@@ -30,7 +30,7 @@ class Splitter : public Device
fMultipart = fConfig->GetProperty<bool>("multipart");
fInChannelName = fConfig->GetProperty<std::string>("in-channel");
fOutChannelName = fConfig->GetProperty<std::string>("out-channel");
fNumOutputs = fChannels.at(fOutChannelName).size();
fNumOutputs = GetNumSubChannels(fOutChannelName);
fDirection = 0;
if (fMultipart) {

View File

@@ -166,6 +166,22 @@ struct TransportFactory final : mq::TransportFactory
throw std::runtime_error("Not yet implemented UMR.");
}
auto CreateUnmanagedRegion(std::size_t /*size*/,
RegionCallback /*callback*/,
RegionConfig /*cfg*/)
-> std::unique_ptr<mq::UnmanagedRegion> override
{
throw std::runtime_error("Not yet implemented UMR.");
}
auto CreateUnmanagedRegion(std::size_t /*size*/,
RegionBulkCallback /*callback*/,
RegionConfig /*cfg*/)
-> std::unique_ptr<mq::UnmanagedRegion> override
{
throw std::runtime_error("Not yet implemented UMR.");
}
auto SubscribeToRegionEvents(RegionEventCallback /*callback*/) -> void override
{
throw std::runtime_error("Not yet implemented.");

View File

@@ -67,6 +67,7 @@ Plugin::ProgOptions ConfigPluginProgramOptions()
("shm-segment-size", po::value<size_t >()->default_value(2ULL << 30), "Shared memory: size of the shared memory segment (in bytes).")
("shm-allocation", po::value<string >()->default_value("rbtree_best_fit"), "Shared memory allocation algorithm: rbtree_best_fit/simple_seq_fit.")
("shm-segment-id", po::value<uint16_t >()->default_value(0), "EXPERIMENTAL: Shared memory segment id for message creation.")
("shmid", po::value<uint64_t >(), "EXPERIMENTAL: Fixed shmid to use instead of deriving it from the session name.")
("shm-mlock-segment", po::value<bool >()->default_value(false), "Shared memory: mlock the shared memory segment after initialization (opened or created).")
("shm-mlock-segment-on-creation", po::value<bool >()->default_value(false), "Shared memory: mlock the shared memory segment only once when created.")
("shm-zero-segment", po::value<bool >()->default_value(false), "Shared memory: zero the shared memory segment memory after initialization (opened or created).")

56
fairmq/shmem/Common.cxx Normal file
View File

@@ -0,0 +1,56 @@
/********************************************************************************
* Copyright (C) 2021 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
* *
* This software is distributed under the terms of the *
* GNU Lesser General Public Licence (LGPL) version 3, *
* copied verbatim in the file "LICENSE" *
********************************************************************************/
#include "Common.h"
#include <picosha2.h>
#include <unistd.h>
#include <iomanip>
#include <sstream>
#include <string>
namespace fair::mq::shmem
{
std::string makeShmIdStr(const std::string& sessionId, const std::string& userId)
{
std::string seed(userId + sessionId);
// generate a 8-digit hex value out of sha256 hash
std::vector<unsigned char> hash(4);
picosha2::hash256(seed.begin(), seed.end(), hash.begin(), hash.end());
return picosha2::bytes_to_hex_string(hash.begin(), hash.end());
}
std::string makeShmIdStr(const std::string& sessionId)
{
return makeShmIdStr(sessionId, std::to_string(geteuid()));
}
uint64_t makeShmIdUint64(const std::string& sessionId)
{
std::string shmId = makeShmIdStr(sessionId);
uint64_t id = 0;
std::stringstream ss;
ss << std::hex << shmId;
ss >> id;
return id;
}
std::string makeShmIdStr(uint64_t val)
{
std::stringstream ss;
ss << std::setfill('0') << std::setw(8) << std::hex << val;
return ss.str();
}
} // namespace fair::mq::shmem

View File

@@ -8,10 +8,7 @@
#ifndef FAIR_MQ_SHMEM_COMMON_H_
#define FAIR_MQ_SHMEM_COMMON_H_
#include <picosha2.h>
#include <atomic>
#include <sstream>
#include <string>
#include <functional> // std::equal_to
@@ -26,7 +23,6 @@
#include <boost/unordered_map.hpp>
#include <boost/variant.hpp>
#include <unistd.h>
#include <sys/types.h>
namespace fair::mq::shmem
@@ -35,11 +31,11 @@ namespace fair::mq::shmem
struct SharedMemoryError : std::runtime_error { using std::runtime_error::runtime_error; };
using SimpleSeqFitSegment = boost::interprocess::basic_managed_shared_memory<char,
boost::interprocess::simple_seq_fit<boost::interprocess::mutex_family>,
boost::interprocess::simple_seq_fit<boost::interprocess::mutex_family, boost::interprocess::offset_ptr<void>>,
boost::interprocess::null_index>;
// boost::interprocess::iset_index>;
using RBTreeBestFitSegment = boost::interprocess::basic_managed_shared_memory<char,
boost::interprocess::rbtree_best_fit<boost::interprocess::mutex_family>,
boost::interprocess::rbtree_best_fit<boost::interprocess::mutex_family, boost::interprocess::offset_ptr<void>>,
boost::interprocess::null_index>;
// boost::interprocess::iset_index>;
@@ -60,20 +56,20 @@ struct RegionInfo
{
RegionInfo(const VoidAlloc& alloc)
: fPath("", alloc)
, fFlags(0)
, fCreationFlags(0)
, fUserFlags(0)
, fDestroyed(false)
{}
RegionInfo(const char* path, const int flags, const uint64_t userFlags, const VoidAlloc& alloc)
: fPath(path, alloc)
, fFlags(flags)
, fCreationFlags(flags)
, fUserFlags(userFlags)
, fDestroyed(false)
{}
Str fPath;
int fFlags;
int fCreationFlags;
uint64_t fUserFlags;
bool fDestroyed;
};
@@ -150,6 +146,7 @@ struct MetaHeader
mutable boost::interprocess::managed_shared_memory::handle_t fShared;
uint16_t fRegionId;
mutable uint16_t fSegmentId;
bool fManaged;
};
#ifdef FAIRMQ_DEBUG_MODE
@@ -212,31 +209,11 @@ struct RegionBlock
// find id for unique shmem name:
// a hash of user id + session id, truncated to 8 characters (to accommodate for name size limit on some systems (MacOS)).
inline std::string makeShmIdStr(const std::string& sessionId, const std::string& userId)
{
std::string seed(userId + sessionId);
// generate a 8-digit hex value out of sha256 hash
std::vector<unsigned char> hash(4);
picosha2::hash256(seed.begin(), seed.end(), hash.begin(), hash.end());
std::string makeShmIdStr(const std::string& sessionId, const std::string& userId);
std::string makeShmIdStr(const std::string& sessionId);
std::string makeShmIdStr(uint64_t val);
uint64_t makeShmIdUint64(const std::string& sessionId);
return picosha2::bytes_to_hex_string(hash.begin(), hash.end());
}
inline std::string makeShmIdStr(const std::string& sessionId)
{
return makeShmIdStr(sessionId, std::to_string(geteuid()));
}
inline uint64_t makeShmIdUint64(const std::string& sessionId)
{
std::string shmId = makeShmIdStr(sessionId);
uint64_t id = 0;
std::stringstream ss;
ss << std::hex << shmId;
ss >> id;
return id;
}
struct SegmentSize : public boost::static_visitor<size_t>
{

View File

@@ -11,19 +11,20 @@
#include "Common.h"
#include "Monitor.h"
#include "Region.h"
#include "UnmanagedRegion.h"
#include <fairmq/Message.h>
#include <fairmq/ProgOptions.h>
#include <fairmq/tools/Strings.h>
#include <fairmq/Transports.h>
#include <FairMQLogger.h>
#include <FairMQMessage.h>
#include <fairlogger/Logger.h>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/filesystem.hpp>
#include <boost/interprocess/ipc/message_queue.hpp>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/sync/named_condition.hpp>
#include <boost/interprocess/sync/interprocess_condition.hpp>
#include <boost/interprocess/sync/interprocess_mutex.hpp>
#include <boost/interprocess/sync/named_mutex.hpp>
#include <boost/variant.hpp>
@@ -127,15 +128,13 @@ struct ShmHeader
class Manager
{
public:
Manager(const std::string& sessionName, std::string deviceId, size_t size, const ProgOptions* config)
: fShmId64(makeShmIdUint64(sessionName))
, fShmId(makeShmIdStr(sessionName))
Manager(const std::string& sessionName, size_t size, const ProgOptions* config)
: fShmId64(config ? config->GetProperty<uint64_t>("shmid", makeShmIdUint64(sessionName)) : makeShmIdUint64(sessionName))
, fShmId(makeShmIdStr(fShmId64))
, fSegmentId(config ? config->GetProperty<uint16_t>("shm-segment-id", 0) : 0)
, fDeviceId(std::move(deviceId))
, fManagementSegment(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_mng").c_str(), 6553600)
, fShmVoidAlloc(fManagementSegment.get_segment_manager())
, fShmMtx(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_mtx").c_str())
, fRegionEventsShmCV(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_cv").c_str())
, fShmMtx(fManagementSegment.find_or_construct<boost::interprocess::interprocess_mutex>(boost::interprocess::unique_instance)())
, fNumObservedEvents(0)
, fDeviceCounter(nullptr)
, fEventCounter(nullptr)
@@ -188,17 +187,14 @@ class Manager
}
if (autolaunchMonitor) {
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fShmMtx);
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> lock(*fShmMtx);
StartMonitor(fShmId);
}
fHeartbeatThread = std::thread(&Manager::Heartbeats, this);
try {
std::stringstream ss;
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fShmMtx);
fShmSegments = fManagementSegment.find_or_construct<Uint16SegmentInfoHashMap>(unique_instance)(fShmVoidAlloc);
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> lock(*fShmMtx);
SessionInfo* sessionInfo = fManagementSegment.find<SessionInfo>(unique_instance).first;
if (sessionInfo) {
@@ -218,8 +214,6 @@ class Manager
LOG(debug) << "initialized event counter with: " << fEventCounter->fCount;
}
fShmRegions = fManagementSegment.find_or_construct<Uint16RegionInfoHashMap>(unique_instance)(fShmVoidAlloc);
fDeviceCounter = fManagementSegment.find<DeviceCounter>(unique_instance).first;
if (fDeviceCounter) {
LOG(debug) << "device counter found, with value of " << fDeviceCounter->fCount << ". incrementing.";
@@ -231,22 +225,21 @@ class Manager
LOG(debug) << "initialized device counter with: " << fDeviceCounter->fCount;
}
std::string op("create/open");
fShmSegments = fManagementSegment.find_or_construct<Uint16SegmentInfoHashMap>(unique_instance)(fShmVoidAlloc);
fShmRegions = fManagementSegment.find_or_construct<Uint16RegionInfoHashMap>(unique_instance)(fShmVoidAlloc);
try {
std::string segmentName("fmq_" + fShmId + "_m_" + std::to_string(fSegmentId));
auto it = fShmSegments->find(fSegmentId);
if (it == fShmSegments->end()) {
op = "create";
// no segment with given id exists, creating
if (allocationAlgorithm == "rbtree_best_fit") {
fSegments.emplace(fSegmentId, RBTreeBestFitSegment(create_only, std::string("fmq_" + fShmId + "_m_" + std::to_string(fSegmentId)).c_str(), size));
fSegments.emplace(fSegmentId, RBTreeBestFitSegment(open_or_create, segmentName.c_str(), size));
fShmSegments->emplace(fSegmentId, AllocationAlgorithm::rbtree_best_fit);
} else if (allocationAlgorithm == "simple_seq_fit") {
fSegments.emplace(fSegmentId, SimpleSeqFitSegment(create_only, std::string("fmq_" + fShmId + "_m_" + std::to_string(fSegmentId)).c_str(), size));
fSegments.emplace(fSegmentId, SimpleSeqFitSegment(open_or_create, segmentName.c_str(), size));
fShmSegments->emplace(fSegmentId, AllocationAlgorithm::simple_seq_fit);
}
ss << "Created ";
(fEventCounter->fCount)++;
if (mlockSegmentOnCreation) {
MlockSegment(fSegmentId);
}
@@ -254,31 +247,28 @@ class Manager
ZeroSegment(fSegmentId);
}
} else {
op = "open";
// found segment with the given id, opening
if (it->second.fAllocationAlgorithm == AllocationAlgorithm::rbtree_best_fit) {
fSegments.emplace(fSegmentId, RBTreeBestFitSegment(open_only, std::string("fmq_" + fShmId + "_m_" + std::to_string(fSegmentId)).c_str()));
fSegments.emplace(fSegmentId, RBTreeBestFitSegment(open_or_create, segmentName.c_str(), size));
if (allocationAlgorithm != "rbtree_best_fit") {
LOG(warn) << "Allocation algorithm of the opened segment is rbtree_best_fit, but requested is " << allocationAlgorithm << ". Ignoring requested setting.";
allocationAlgorithm = "rbtree_best_fit";
}
} else {
fSegments.emplace(fSegmentId, SimpleSeqFitSegment(open_only, std::string("fmq_" + fShmId + "_m_" + std::to_string(fSegmentId)).c_str()));
fSegments.emplace(fSegmentId, SimpleSeqFitSegment(open_or_create, segmentName.c_str(), size));
if (allocationAlgorithm != "simple_seq_fit") {
LOG(warn) << "Allocation algorithm of the opened segment is simple_seq_fit, but requested is " << allocationAlgorithm << ". Ignoring requested setting.";
allocationAlgorithm = "simple_seq_fit";
}
}
ss << "Opened ";
}
ss << "shared memory segment '" << "fmq_" << fShmId << "_m_" << fSegmentId << "'."
LOG(debug) << "Created/opened shared memory segment '" << "fmq_" << fShmId << "_m_" << fSegmentId << "'."
<< " Size: " << boost::apply_visitor(SegmentSize(), fSegments.at(fSegmentId)) << " bytes."
<< " Available: " << boost::apply_visitor(SegmentFreeMemory(), fSegments.at(fSegmentId)) << " bytes."
<< " Allocation algorithm: " << allocationAlgorithm;
LOG(debug) << ss.str();
} catch(interprocess_exception& bie) {
LOG(error) << "Failed to " << op << " shared memory segment (" << "fmq_" << fShmId << "_m_" << fSegmentId << "): " << bie.what();
throw TransportError(tools::ToString("Failed to ", op, " shared memory segment (", "fmq_", fShmId, "_m_", fSegmentId, "): ", bie.what()));
} catch (interprocess_exception& bie) {
LOG(error) << "Failed to create/open shared memory segment '" << "fmq_" << fShmId << "_m_" << fSegmentId << "': " << bie.what();
throw TransportError(tools::ToString("Failed to create/open shared memory segment '", "fmq_", fShmId, "_m_", fSegmentId, "': ", bie.what()));
}
if (mlockSegment) {
@@ -288,6 +278,8 @@ class Manager
ZeroSegment(fSegmentId);
}
(fEventCounter->fCount)++;
#ifdef FAIRMQ_DEBUG_MODE
fMsgDebug = fManagementSegment.find_or_construct<Uint16MsgDebugMapHashMap>(unique_instance)(fShmVoidAlloc);
fShmMsgCounters = fManagementSegment.find_or_construct<Uint16MsgCounterHashMap>(unique_instance)(fShmVoidAlloc);
@@ -368,69 +360,56 @@ class Manager
}
bool Interrupted() { return fInterrupted.load(); }
std::pair<boost::interprocess::mapped_region*, uint16_t> CreateRegion(const size_t size,
const int64_t userFlags,
RegionCallback callback,
RegionBulkCallback bulkCallback,
const std::string& path,
int flags,
fair::mq::RegionConfig cfg)
std::pair<UnmanagedRegion*, uint16_t> CreateRegion(const size_t size,
RegionCallback callback,
RegionBulkCallback bulkCallback,
RegionConfig cfg)
{
using namespace boost::interprocess;
try {
std::pair<mapped_region*, uint16_t> result;
std::pair<UnmanagedRegion*, uint16_t> result;
{
uint16_t id = 0;
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fShmMtx);
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> lock(*fShmMtx);
RegionCounter* rc = fManagementSegment.find<RegionCounter>(unique_instance).first;
if (!cfg.id.has_value()) {
RegionCounter* rc = fManagementSegment.find<RegionCounter>(unique_instance).first;
if (rc) {
LOG(debug) << "region counter found, with value of " << rc->fCount << ". incrementing.";
(rc->fCount)++;
LOG(debug) << "incremented region counter, now: " << rc->fCount;
} else {
LOG(debug) << "no region counter found, creating one and initializing with 1";
rc = fManagementSegment.construct<RegionCounter>(unique_instance)(1);
LOG(debug) << "initialized region counter with: " << rc->fCount;
}
id = rc->fCount;
auto it = fRegions.find(id);
if (it != fRegions.end()) {
LOG(error) << "Trying to create a region that already exists";
return {nullptr, id};
}
auto r = fRegions.emplace(id, std::make_unique<Region>(fShmId, id, size, false, callback, bulkCallback, path, flags));
// LOG(debug) << "Created region with id '" << id << "', path: '" << path << "', flags: '" << flags << "'";
if (cfg.lock) {
LOG(debug) << "Locking region " << id << "...";
if (mlock(r.first->second->fRegion.get_address(), r.first->second->fRegion.get_size()) == -1) {
LOG(error) << "Could not lock region " << id << ". Code: " << errno << ", reason: " << strerror(errno);
throw TransportError(tools::ToString("Could not lock region ", id, ": ", strerror(errno)));
if (rc) {
LOG(trace) << "region counter found, with value of " << rc->fCount << ". incrementing.";
(rc->fCount)++;
LOG(trace) << "incremented region counter, now: " << rc->fCount;
} else {
LOG(trace) << "no region counter found, creating one and initializing with 1024";
rc = fManagementSegment.construct<RegionCounter>(unique_instance)(1024);
LOG(trace) << "initialized region counter with: " << rc->fCount;
}
LOG(debug) << "Successfully locked region " << id << ".";
}
if (cfg.zero) {
LOG(debug) << "Zeroing free memory of region " << id << "...";
memset(r.first->second->fRegion.get_address(), 0x00, r.first->second->fRegion.get_size());
LOG(debug) << "Successfully zeroed free memory of region " << id << ".";
cfg.id = rc->fCount;
}
fShmRegions->emplace(id, RegionInfo(path.c_str(), flags, userFlags, fShmVoidAlloc));
const uint16_t id = cfg.id.value();
r.first->second->StartReceivingAcks();
result.first = &(r.first->second->fRegion);
auto res = fRegions.emplace(id, std::make_unique<UnmanagedRegion>(fShmId, size, false, cfg));
bool newRegionCreated = res.second;
UnmanagedRegion& region = *(res.first->second);
// LOG(debug) << "Created region with id '" << id << "', path: '" << cfg.path << "', flags: '" << cfg.creationFlags << "'";
if (!newRegionCreated) {
region.fRemote = false; // TODO: this should be more clear, refactor it.
}
// start ack receiver only if a callback has been provided.
if (callback || bulkCallback) {
region.SetCallbacks(callback, bulkCallback);
region.InitializeQueues();
region.StartAckSender();
region.StartAckReceiver();
}
result.first = &(region);
result.second = id;
(fEventCounter->fCount)++;
}
fRegionsGen += 1; // signal TL cache invalidation
fRegionEventsShmCV.notify_all();
return result;
} catch (interprocess_exception& e) {
@@ -440,7 +419,7 @@ class Manager
}
}
Region* GetRegion(const uint16_t id)
UnmanagedRegion* GetRegion(const uint16_t id)
{
// NOTE: gcc optimizations. Prevent loading tls addresses many times in the fast path
const auto &lTlCache = fTlRegionCache;
@@ -454,19 +433,19 @@ class Manager
}
}
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fShmMtx);
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> shmLock(*fShmMtx);
// slow path: check invalidation
if (lTlCacheGen != fRegionsGen) {
fTlRegionCache.fRegionsTLCache.clear();
}
auto *lRegion = GetRegionUnsafe(id);
auto* lRegion = GetRegionUnsafe(id, shmLock);
fTlRegionCache.fRegionsTLCache.emplace_back(std::make_tuple(lRegion, id, fShmId64));
fTlRegionCache.fRegionsTLCacheGen = fRegionsGen;
return lRegion;
}
Region* GetRegionUnsafe(const uint16_t id)
UnmanagedRegion* GetRegionUnsafe(const uint16_t id, boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex>& lockedShmLock)
{
// remote region could actually be a local one if a message originates from this device (has been sent out and returned)
auto it = fRegions.find(id);
@@ -476,11 +455,18 @@ class Manager
try {
// get region info
RegionInfo regionInfo = fShmRegions->at(id);
std::string path = regionInfo.fPath.c_str();
int flags = regionInfo.fFlags;
// LOG(debug) << "Located remote region with id '" << id << "', path: '" << path << "', flags: '" << flags << "'";
// safe to unlock now - no shm container accessed after this
lockedShmLock.unlock();
RegionConfig cfg;
cfg.id = id;
cfg.creationFlags = regionInfo.fCreationFlags;
cfg.path = regionInfo.fPath.c_str();
// LOG(debug) << "Located remote region with id '" << id << "', path: '" << cfg.path << "', flags: '" << cfg.creationFlags << "'";
auto r = fRegions.emplace(id, std::make_unique<Region>(fShmId, id, 0, true, nullptr, nullptr, path, flags));
auto r = fRegions.emplace(id, std::make_unique<UnmanagedRegion>(fShmId, 0, true, std::move(cfg)));
r.first->second->InitializeQueues();
r.first->second->StartAckSender();
lockedShmLock.lock();
return r.first->second.get();
} catch (std::out_of_range& oor) {
LOG(error) << "Could not get remote region with id '" << id << "'. Does the region creator run with the same session id?";
@@ -498,48 +484,23 @@ class Manager
try {
fRegions.at(id)->StopAcks();
{
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fShmMtx);
fShmRegions->at(id).fDestroyed = true;
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> lock(*fShmMtx);
if (fRegions.at(id)->RemoveOnDestruction()) {
fShmRegions->at(id).fDestroyed = true;
(fEventCounter->fCount)++;
}
fRegions.erase(id);
(fEventCounter->fCount)++;
}
fRegionEventsShmCV.notify_all();
} catch(std::out_of_range& oor) {
} catch (std::out_of_range& oor) {
LOG(debug) << "RemoveRegion() could not locate region with id '" << id << "'";
}
fRegionsGen += 1; // signal TL cache invalidation
}
std::vector<fair::mq::RegionInfo> GetRegionInfo()
{
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fShmMtx);
return GetRegionInfoUnsafe();
}
std::vector<fair::mq::RegionInfo> GetRegionInfoUnsafe()
{
std::vector<fair::mq::RegionInfo> result;
for (const auto& e : *fShmRegions) {
fair::mq::RegionInfo info;
info.managed = false;
info.id = e.first;
info.flags = e.second.fUserFlags;
info.event = e.second.fDestroyed ? RegionEvent::destroyed : RegionEvent::created;
if (!e.second.fDestroyed) {
auto region = GetRegionUnsafe(info.id);
if (region) {
info.ptr = region->fRegion.get_address();
info.size = region->fRegion.get_size();
} else {
throw std::runtime_error(tools::ToString("GetRegionInfoUnsafe() could not get region with id '", info.id, "'"));
}
} else {
info.ptr = nullptr;
info.size = 0;
}
result.push_back(info);
}
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> shmLock(*fShmMtx);
for (const auto& e : *fShmSegments) {
// make sure any segments in the session are found
@@ -558,6 +519,27 @@ class Manager
}
}
for (const auto& e : *fShmRegions) {
fair::mq::RegionInfo info;
info.managed = false;
info.id = e.first;
info.flags = e.second.fUserFlags;
info.event = e.second.fDestroyed ? RegionEvent::destroyed : RegionEvent::created;
if (info.event == RegionEvent::created) {
auto region = GetRegionUnsafe(info.id, shmLock);
if (region) {
info.ptr = region->GetData();
info.size = region->GetSize();
} else {
throw std::runtime_error(tools::ToString("GetRegionInfo() could not get region with id '", info.id, "'"));
}
} else {
info.ptr = nullptr;
info.size = 0;
}
result.push_back(info);
}
return result;
}
@@ -565,13 +547,13 @@ class Manager
{
if (fRegionEventThread.joinable()) {
LOG(debug) << "Already subscribed. Overwriting previous subscription.";
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fShmMtx);
std::unique_lock<std::mutex> lock(fRegionEventsMtx);
fRegionEventsSubscriptionActive = false;
lock.unlock();
fRegionEventsShmCV.notify_all();
fRegionEventsCV.notify_one();
fRegionEventThread.join();
}
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fShmMtx);
std::lock_guard<std::mutex> lock(fRegionEventsMtx);
fRegionEventCallback = callback;
fRegionEventsSubscriptionActive = true;
fRegionEventThread = std::thread(&Manager::RegionEventsSubscription, this);
@@ -582,10 +564,10 @@ class Manager
void UnsubscribeFromRegionEvents()
{
if (fRegionEventThread.joinable()) {
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fShmMtx);
std::unique_lock<std::mutex> lock(fRegionEventsMtx);
fRegionEventsSubscriptionActive = false;
lock.unlock();
fRegionEventsShmCV.notify_all();
fRegionEventsCV.notify_one();
fRegionEventThread.join();
lock.lock();
fRegionEventCallback = nullptr;
@@ -594,33 +576,38 @@ class Manager
void RegionEventsSubscription()
{
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fShmMtx);
std::unique_lock<std::mutex> lock(fRegionEventsMtx);
while (fRegionEventsSubscriptionActive) {
auto infos = GetRegionInfoUnsafe();
for (const auto& i : infos) {
auto el = fObservedRegionEvents.find({i.id, i.managed});
if (el == fObservedRegionEvents.end()) { // if event id has not been observed
fObservedRegionEvents.emplace(std::make_pair(i.id, i.managed), i.event);
// if a region has been created and destroyed rapidly, we could see 'destroyed' without ever seeing 'created'
// TODO: do we care to show 'created' events if we know region is already destroyed?
if (i.event == RegionEvent::created) {
fRegionEventCallback(i);
++fNumObservedEvents;
} else {
fNumObservedEvents += 2;
}
} else { // if event id has been observed (expected - there are two events per id - created & destroyed)
// fire a callback if we have observed 'created' event and incoming is 'destroyed'
if (el->second == RegionEvent::created && i.event == RegionEvent::destroyed) {
fRegionEventCallback(i);
el->second = i.event;
++fNumObservedEvents;
} else {
// LOG(debug) << "ignoring event " << i.id << ": incoming: " << i.event << ", stored: " << el->second;
if (fNumObservedEvents != fEventCounter->fCount) {
auto infos = GetRegionInfo();
for (const auto& i : infos) {
auto el = fObservedRegionEvents.find({i.id, i.managed});
if (el == fObservedRegionEvents.end()) { // if event id has not been observed
fObservedRegionEvents.emplace(std::make_pair(i.id, i.managed), i.event);
// if a region has been created and destroyed rapidly, we could see 'destroyed' without ever seeing 'created'
// TODO: do we care to show 'created' events if we know region is already destroyed?
if (i.event == RegionEvent::created) {
fRegionEventCallback(i);
++fNumObservedEvents;
} else {
fNumObservedEvents += 2;
}
} else { // if event id has been observed (expected - there are two events per id - created & destroyed)
// fire a callback if we have observed 'created' event and incoming is 'destroyed'
if (el->second == RegionEvent::created && i.event == RegionEvent::destroyed) {
fRegionEventCallback(i);
el->second = i.event;
++fNumObservedEvents;
} else {
// LOG(debug) << "ignoring event " << i.id << ": incoming: " << i.event << ", stored: " << el->second;
}
}
}
}
fRegionEventsShmCV.wait(lock, [&] { return !fRegionEventsSubscriptionActive || fNumObservedEvents != fEventCounter->fCount; });
// TODO: do better than polling here, without adding too much shmem contention
fRegionEventsCV.wait_for(lock, std::chrono::milliseconds(50), [&] { return !fRegionEventsSubscriptionActive; });
}
}
@@ -729,7 +716,7 @@ class Manager
}
}
#ifdef FAIRMQ_DEBUG_MODE
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fShmMtx);
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> lock(*fShmMtx);
IncrementShmMsgCounter(fSegmentId);
if (fMsgDebug->count(fSegmentId) == 0) {
fMsgDebug->emplace(fSegmentId, fShmVoidAlloc);
@@ -748,11 +735,11 @@ class Manager
{
char* ptr = GetAddressFromHandle(handle, segmentId);
#ifdef FAIRMQ_DEBUG_MODE
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fShmMtx);
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> lock(*fShmMtx);
DecrementShmMsgCounter(segmentId);
try {
fMsgDebug->at(segmentId).erase(GetHandleFromAddress(ShmHeader::UserPtr(ptr), fSegmentId));
} catch(const std::out_of_range& oor) {
} catch (const std::out_of_range& oor) {
LOG(debug) << "could not locate debug container for " << segmentId << ": " << oor.what();
}
#endif
@@ -773,7 +760,7 @@ class Manager
bool lastRemoved = false;
try {
boost::interprocess::scoped_lock<named_mutex> lock(fShmMtx);
boost::interprocess::scoped_lock<interprocess_mutex> lock(*fShmMtx);
(fDeviceCounter->fCount)--;
@@ -787,8 +774,10 @@ class Manager
LOG(error) << "Manager could not acquire lock: " << e.what();
}
if (lastRemoved && !fNoCleanup) {
Monitor::Cleanup(ShmId{fShmId});
if (lastRemoved) {
if (!fNoCleanup) {
Monitor::Cleanup(ShmId{fShmId});
}
}
}
@@ -806,13 +795,13 @@ class Manager
uint64_t fShmId64;
std::string fShmId;
uint16_t fSegmentId;
std::string fDeviceId;
std::unordered_map<uint16_t, boost::variant<RBTreeBestFitSegment, SimpleSeqFitSegment>> fSegments;
boost::interprocess::managed_shared_memory fManagementSegment;
std::unordered_map<uint16_t, boost::variant<RBTreeBestFitSegment, SimpleSeqFitSegment>> fSegments; // TODO: refactor to use Segment class
boost::interprocess::managed_shared_memory fManagementSegment; // TODO: refactor to use ManagementSegment class
VoidAlloc fShmVoidAlloc;
boost::interprocess::named_mutex fShmMtx;
boost::interprocess::interprocess_mutex* fShmMtx;
boost::interprocess::named_condition fRegionEventsShmCV;
std::mutex fRegionEventsMtx;
std::condition_variable fRegionEventsCV;
std::thread fRegionEventThread;
std::function<void(fair::mq::RegionInfo)> fRegionEventCallback;
std::map<std::pair<uint16_t, bool>, RegionEvent> fObservedRegionEvents; // pair: <region id, managed>
@@ -822,11 +811,11 @@ class Manager
EventCounter* fEventCounter;
Uint16SegmentInfoHashMap* fShmSegments;
Uint16RegionInfoHashMap* fShmRegions;
std::unordered_map<uint16_t, std::unique_ptr<Region>> fRegions;
std::unordered_map<uint16_t, std::unique_ptr<UnmanagedRegion>> fRegions;
inline static std::atomic<unsigned long> fRegionsGen = 0ul;
inline static thread_local struct ManagerTLCache {
unsigned long fRegionsTLCacheGen;
std::vector<std::tuple<Region*, uint16_t, uint64_t>> fRegionsTLCache;
std::vector<std::tuple<UnmanagedRegion*, uint16_t, uint64_t>> fRegionsTLCache;
} fTlRegionCache;
#ifdef FAIRMQ_DEBUG_MODE

View File

@@ -10,11 +10,12 @@
#include "Common.h"
#include "Manager.h"
#include "Region.h"
#include "UnmanagedRegion.h"
#include <FairMQLogger.h>
#include <FairMQMessage.h>
#include <FairMQUnmanagedRegion.h>
#include "UnmanagedRegionImpl.h"
#include <fairmq/Message.h>
#include <fairmq/UnmanagedRegion.h>
#include <fairlogger/Logger.h>
#include <boost/interprocess/mapped_region.hpp>
@@ -38,7 +39,7 @@ class Message final : public fair::mq::Message
: fair::mq::Message(factory)
, fManager(manager)
, fQueued(false)
, fMeta{0, 0, -1, -1, 0, fManager.GetSegmentId()}
, fMeta{0, 0, -1, -1, 0, fManager.GetSegmentId(), true}
, fRegionPtr(nullptr)
, fLocalPtr(nullptr)
{
@@ -49,7 +50,7 @@ class Message final : public fair::mq::Message
: fair::mq::Message(factory)
, fManager(manager)
, fQueued(false)
, fMeta{0, 0, -1, -1, 0, fManager.GetSegmentId()}
, fMeta{0, 0, -1, -1, 0, fManager.GetSegmentId(), true}
, fAlignment(alignment.alignment)
, fRegionPtr(nullptr)
, fLocalPtr(nullptr)
@@ -61,7 +62,7 @@ class Message final : public fair::mq::Message
: fair::mq::Message(factory)
, fManager(manager)
, fQueued(false)
, fMeta{0, 0, -1, -1, 0, fManager.GetSegmentId()}
, fMeta{0, 0, -1, -1, 0, fManager.GetSegmentId(), true}
, fRegionPtr(nullptr)
, fLocalPtr(nullptr)
{
@@ -73,7 +74,7 @@ class Message final : public fair::mq::Message
: fair::mq::Message(factory)
, fManager(manager)
, fQueued(false)
, fMeta{0, 0, -1, -1, 0, fManager.GetSegmentId()}
, fMeta{0, 0, -1, -1, 0, fManager.GetSegmentId(), true}
, fAlignment(alignment.alignment)
, fRegionPtr(nullptr)
, fLocalPtr(nullptr)
@@ -86,7 +87,7 @@ class Message final : public fair::mq::Message
: fair::mq::Message(factory)
, fManager(manager)
, fQueued(false)
, fMeta{0, 0, -1, -1, 0, fManager.GetSegmentId()}
, fMeta{0, 0, -1, -1, 0, fManager.GetSegmentId(), true}
, fRegionPtr(nullptr)
, fLocalPtr(nullptr)
{
@@ -105,7 +106,7 @@ class Message final : public fair::mq::Message
: fair::mq::Message(factory)
, fManager(manager)
, fQueued(false)
, fMeta{size, reinterpret_cast<size_t>(hint), -1, -1, static_cast<UnmanagedRegion*>(region.get())->fRegionId, fManager.GetSegmentId()}
, fMeta{size, reinterpret_cast<size_t>(hint), -1, -1, static_cast<UnmanagedRegionImpl*>(region.get())->fRegionId, fManager.GetSegmentId(), false}
, fRegionPtr(nullptr)
, fLocalPtr(static_cast<char*>(data))
{
@@ -186,7 +187,7 @@ class Message final : public fair::mq::Message
void* GetData() const override
{
if (!fLocalPtr) {
if (fMeta.fRegionId == 0) {
if (fMeta.fManaged) {
if (fMeta.fSize > 0) {
fManager.GetSegment(fMeta.fSegmentId);
fLocalPtr = ShmHeader::UserPtr(fManager.GetAddressFromHandle(fMeta.fHandle, fMeta.fSegmentId));
@@ -196,7 +197,7 @@ class Message final : public fair::mq::Message
} else {
fRegionPtr = fManager.GetRegion(fMeta.fRegionId);
if (fRegionPtr) {
fLocalPtr = reinterpret_cast<char*>(fRegionPtr->fRegion.get_address()) + fMeta.fHandle;
fLocalPtr = reinterpret_cast<char*>(fRegionPtr->GetData()) + fMeta.fHandle;
} else {
// LOG(warn) << "could not get pointer from a region message";
fLocalPtr = nullptr;
@@ -258,7 +259,7 @@ class Message final : public fair::mq::Message
return 1;
}
if (fMeta.fRegionId == 0) { // managed segment
if (fMeta.fManaged) { // managed segment
fManager.GetSegment(fMeta.fSegmentId);
return ShmHeader::RefCount(fManager.GetAddressFromHandle(fMeta.fHandle, fMeta.fSegmentId));
} else { // unmanaged region
@@ -285,7 +286,7 @@ class Message final : public fair::mq::Message
CloseMessage();
}
if (otherMsg.fMeta.fRegionId == 0) { // managed segment
if (otherMsg.fMeta.fManaged) { // managed segment
fMeta = otherMsg.fMeta;
fManager.GetSegment(fMeta.fSegmentId);
ShmHeader::IncrementRefCount(fManager.GetAddressFromHandle(fMeta.fHandle, fMeta.fSegmentId));
@@ -316,7 +317,7 @@ class Message final : public fair::mq::Message
bool fQueued;
MetaHeader fMeta;
size_t fAlignment;
mutable Region* fRegionPtr;
mutable UnmanagedRegion* fRegionPtr;
mutable char* fLocalPtr;
char* InitializeChunk(const size_t size, size_t alignment = 0)
@@ -335,7 +336,7 @@ class Message final : public fair::mq::Message
void Deallocate()
{
if (fMeta.fHandle >= 0 && !fQueued) {
if (fMeta.fRegionId == 0) { // managed segment
if (fMeta.fManaged) { // managed segment
fManager.GetSegment(fMeta.fSegmentId);
uint16_t refCount = ShmHeader::DecrementRefCount(fManager.GetAddressFromHandle(fMeta.fHandle, fMeta.fSegmentId));
if (refCount == 1) {
@@ -380,9 +381,9 @@ class Message final : public fair::mq::Message
Deallocate();
fAlignment = 0;
fManager.DecrementMsgCounter();
} catch(SharedMemoryError& sme) {
} catch (SharedMemoryError& sme) {
LOG(error) << "error closing message: " << sme.what();
} catch(boost::interprocess::lock_exception& le) {
} catch (boost::interprocess::lock_exception& le) {
LOG(error) << "error closing message: " << le.what();
}
}

View File

@@ -8,6 +8,7 @@
#include "Monitor.h"
#include "Common.h"
#include "UnmanagedRegion.h"
#include <fairmq/tools/IO.h>
#include <fairmq/tools/Strings.h>
@@ -16,6 +17,7 @@
#include <boost/interprocess/file_mapping.hpp>
#include <boost/interprocess/sync/named_mutex.hpp>
#include <boost/interprocess/sync/interprocess_mutex.hpp>
#include <boost/interprocess/sync/named_condition.hpp>
#include <boost/interprocess/ipc/message_queue.hpp>
@@ -179,15 +181,21 @@ bool Monitor::PrintShm(const ShmId& shmId)
managed_shared_memory managementSegment(open_read_only, std::string("fmq_" + shmId.shmId + "_mng").c_str());
VoidAlloc allocInstance(managementSegment.get_segment_manager());
Uint16SegmentInfoHashMap* segmentInfos = managementSegment.find<Uint16SegmentInfoHashMap>(unique_instance).first;
Uint16SegmentInfoHashMap* shmSegments = managementSegment.find<Uint16SegmentInfoHashMap>(unique_instance).first;
std::unordered_map<uint16_t, boost::variant<RBTreeBestFitSegment, SimpleSeqFitSegment>> segments;
if (!segmentInfos) {
Uint16RegionInfoHashMap* shmRegions = managementSegment.find<Uint16RegionInfoHashMap>(unique_instance).first;
if (!shmSegments) {
LOG(error) << "Found management segment, but cannot locate segment info, something went wrong...";
return false;
}
for (const auto& s : *segmentInfos) {
if (!shmRegions) {
LOG(error) << "Found management segment, but cannot locate region info...";
}
for (const auto& s : *shmSegments) {
if (s.second.fAllocationAlgorithm == AllocationAlgorithm::rbtree_best_fit) {
segments.emplace(s.first, RBTreeBestFitSegment(open_read_only, std::string("fmq_" + shmId.shmId + "_m_" + to_string(s.first)).c_str()));
} else {
@@ -221,19 +229,30 @@ bool Monitor::PrintShm(const ShmId& shmId)
<< ", session: " << sessionName
<< ", creator id: " << creatorId
<< ", devices: " << numDevices
<< ", segments:\n";
<< ", managed segments:\n";
for (const auto& s : segments) {
size_t free = boost::apply_visitor(SegmentFreeMemory(), s.second);
size_t total = boost::apply_visitor(SegmentSize(), s.second);
size_t used = total - free;
ss << " [" << s.first
<< "]: total: " << total
std::string msgCount;
#ifdef FAIRMQ_DEBUG_MODE
<< ", msgs: " << ( (msgCounters != nullptr) ? to_string((*msgCounters)[s.first].fCount) : "unknown")
if (msgCounters) {
auto it = msgCounters->find(s.first);
if (it != msgCounters->end()) {
msgCount = to_string(it->second.fCount.load());
} else {
msgCount = "0";
}
}
#else
<< ", msgs: NODEBUG"
msgCount = "NODEBUG";
#endif
ss << " [" << s.first << "]"
<< ": total: " << total
<< ", msgs: " << msgCount
<< ", free: " << free
<< ", used: " << used
<< "\n";
@@ -243,6 +262,20 @@ bool Monitor::PrintShm(const ShmId& shmId)
<< "total: " << mtotal
<< ", free: " << mfree
<< ", used: " << mused;
if (shmRegions && !shmRegions->empty()) {
ss << "\n unmanaged regions:";
for (const auto& r : *shmRegions) {
ss << "\n [" << r.first << "]: " << (r.second.fDestroyed ? "destroyed" : "alive");
try {
boost::interprocess::message_queue q(open_only, std::string("fmq_" + std::string(shmId) + "_rgq_" + to_string(r.first)).c_str());
ss << ", ack queue: " << q.get_num_msg() << " messages";
} catch (bie&) {
ss << ", ack queue: not found";
}
}
}
LOGV(info, user1) << ss.str();
} catch (bie&) {
return false;
@@ -374,8 +407,8 @@ void Monitor::PrintDebugInfo(const ShmId& shmId __attribute__((unused)))
string managementSegmentName("fmq_" + shmId.shmId + "_mng");
try {
bipc::managed_shared_memory managementSegment(bipc::open_only, managementSegmentName.c_str());
boost::interprocess::named_mutex mtx(boost::interprocess::open_only, string("fmq_" + shmId.shmId + "_mtx").c_str());
boost::interprocess::scoped_lock<bipc::named_mutex> lock(mtx);
bipc::interprocess_mutex* mtx(managementSegment.find_or_construct<bipc::interprocess_mutex>(bipc::unique_instance)());
bipc::scoped_lock<bipc::interprocess_mutex> lock(*mtx);
Uint16MsgDebugMapHashMap* debug = managementSegment.find<Uint16MsgDebugMapHashMap>(bipc::unique_instance).first;
@@ -422,8 +455,8 @@ unordered_map<uint16_t, std::vector<BufferDebugInfo>> Monitor::GetDebugInfo(cons
string managementSegmentName("fmq_" + shmId.shmId + "_mng");
try {
bipc::managed_shared_memory managementSegment(bipc::open_only, managementSegmentName.c_str());
boost::interprocess::named_mutex mtx(boost::interprocess::open_only, string("fmq_" + shmId.shmId + "_mtx").c_str());
boost::interprocess::scoped_lock<bipc::named_mutex> lock(mtx);
bipc::interprocess_mutex* mtx(managementSegment.find_or_construct<bipc::interprocess_mutex>(bipc::unique_instance)());
bipc::scoped_lock<bipc::interprocess_mutex> lock(*mtx);
Uint16MsgDebugMapHashMap* debug = managementSegment.find<Uint16MsgDebugMapHashMap>(bipc::unique_instance).first;
@@ -455,18 +488,18 @@ unsigned long Monitor::GetFreeMemory(const ShmId& shmId, uint16_t segmentId)
using namespace boost::interprocess;
try {
bipc::managed_shared_memory managementSegment(bipc::open_only, std::string("fmq_" + shmId.shmId + "_mng").c_str());
boost::interprocess::named_mutex mtx(boost::interprocess::open_only, std::string("fmq_" + shmId.shmId + "_mtx").c_str());
boost::interprocess::scoped_lock<bipc::named_mutex> lock(mtx);
boost::interprocess::interprocess_mutex* mtx(managementSegment.find_or_construct<bipc::interprocess_mutex>(bipc::unique_instance)());
boost::interprocess::scoped_lock<bipc::interprocess_mutex> lock(*mtx);
Uint16SegmentInfoHashMap* segmentInfos = managementSegment.find<Uint16SegmentInfoHashMap>(unique_instance).first;
Uint16SegmentInfoHashMap* shmSegments = managementSegment.find<Uint16SegmentInfoHashMap>(unique_instance).first;
if (!segmentInfos) {
if (!shmSegments) {
LOG(error) << "Found management segment, but could not locate segment info";
throw MonitorError("Found management segment, but could not locate segment info");
}
auto it = segmentInfos->find(segmentId);
if (it != segmentInfos->end()) {
auto it = shmSegments->find(segmentId);
if (it != shmSegments->end()) {
if (it->second.fAllocationAlgorithm == AllocationAlgorithm::rbtree_best_fit) {
RBTreeBestFitSegment segment(open_read_only, std::string("fmq_" + shmId.shmId + "_m_" + std::to_string(segmentId)).c_str());
return segment.get_free_memory();
@@ -497,10 +530,16 @@ void Monitor::PrintHelp()
<< "[q] quit.";
}
bool Monitor::RemoveObject(const string& name) { return bipc::shared_memory_object::remove(name.c_str()); }
bool Monitor::RemoveFileMapping(const string& name) { return bipc::file_mapping::remove(name.c_str()); }
bool Monitor::RemoveQueue(const string& name) { return bipc::message_queue::remove(name.c_str()); }
bool Monitor::RemoveMutex(const string& name) { return bipc::named_mutex::remove(name.c_str()); }
bool Monitor::RemoveCondition(const string& name) { return bipc::named_condition::remove(name.c_str()); }
std::pair<std::string, bool> RunRemoval(std::function<bool(const std::string&)> f, std::string name, bool verbose)
template<typename T>
std::pair<std::string, bool> Remove(const std::string& name, bool verbose)
{
if (f(name)) {
if (T::remove(name.c_str())) {
if (verbose) {
LOG(info) << "Successfully removed '" << name << "'.";
}
@@ -513,12 +552,6 @@ std::pair<std::string, bool> RunRemoval(std::function<bool(const std::string&)>
}
}
bool Monitor::RemoveObject(const string& name) { return bipc::shared_memory_object::remove(name.c_str()); }
bool Monitor::RemoveFileMapping(const string& name) { return bipc::file_mapping::remove(name.c_str()); }
bool Monitor::RemoveQueue(const string& name) { return bipc::message_queue::remove(name.c_str()); }
bool Monitor::RemoveMutex(const string& name) { return bipc::named_mutex::remove(name.c_str()); }
bool Monitor::RemoveCondition(const string& name) { return bipc::named_condition::remove(name.c_str()); }
std::vector<std::pair<std::string, bool>> Monitor::Cleanup(const ShmId& shmId, bool verbose /* = true */)
{
std::vector<std::pair<std::string, bool>> result;
@@ -531,51 +564,36 @@ std::vector<std::pair<std::string, bool>> Monitor::Cleanup(const ShmId& shmId, b
try {
bipc::managed_shared_memory managementSegment(bipc::open_only, managementSegmentName.c_str());
try {
RegionCounter* rc = managementSegment.find<RegionCounter>(bipc::unique_instance).first;
if (rc) {
if (verbose) {
LOG(debug) << "Region counter found: " << rc->fCount;
}
uint16_t regionCount = rc->fCount;
Uint16RegionInfoMap* m = managementSegment.find<Uint16RegionInfoMap>(bipc::unique_instance).first;
for (uint16_t i = 1; i <= regionCount; ++i) {
if (m != nullptr) {
RegionInfo ri = m->at(i);
string path = ri.fPath.c_str();
int flags = ri.fFlags;
if (verbose) {
LOG(info) << "Found RegionInfo with path: '" << path << "', flags: " << flags << ", fDestroyed: " << ri.fDestroyed << ".";
}
if (!path.empty()) {
result.emplace_back(RunRemoval(Monitor::RemoveFileMapping, path + "fmq_" + shmId.shmId + "_rg_" + to_string(i), verbose));
} else {
result.emplace_back(RunRemoval(Monitor::RemoveObject, "fmq_" + shmId.shmId + "_rg_" + to_string(i), verbose));
}
} else {
result.emplace_back(RunRemoval(Monitor::RemoveObject, "fmq_" + shmId.shmId + "_rg_" + to_string(i), verbose));
}
result.emplace_back(RunRemoval(Monitor::RemoveQueue, string("fmq_" + shmId.shmId + "_rgq_" + to_string(i)), verbose));
}
} else {
if (verbose) {
LOG(info) << "No region counter found. No regions to cleanup.";
}
}
} catch(out_of_range& oor) {
Uint16RegionInfoHashMap* shmRegions = managementSegment.find<Uint16RegionInfoHashMap>(bipc::unique_instance).first;
if (shmRegions) {
if (verbose) {
LOG(info) << "Could not locate element in the region map, out of range: " << oor.what();
LOG(info) << "Found " << shmRegions->size() << " unmanaged regions...";
}
for (const auto& region : *shmRegions) {
uint16_t id = region.first;
RegionInfo info = region.second;
string path = info.fPath.c_str();
int flags = info.fCreationFlags;
if (verbose) {
LOG(info) << "Found RegionInfo with path: '" << path << "', flags: " << flags << ", fDestroyed: " << info.fDestroyed << ".";
}
if (!path.empty()) {
result.emplace_back(Remove<bipc::file_mapping>(path + "fmq_" + shmId.shmId + "_rg_" + to_string(id), verbose));
} else {
result.emplace_back(Remove<bipc::shared_memory_object>("fmq_" + shmId.shmId + "_rg_" + to_string(id), verbose));
}
result.emplace_back(Remove<bipc::message_queue>("fmq_" + shmId.shmId + "_rgq_" + to_string(id), verbose));
}
}
Uint16SegmentInfoHashMap* segmentInfos = managementSegment.find<Uint16SegmentInfoHashMap>(bipc::unique_instance).first;
Uint16SegmentInfoHashMap* shmSegments = managementSegment.find<Uint16SegmentInfoHashMap>(bipc::unique_instance).first;
if (segmentInfos) {
for (const auto& s : *segmentInfos) {
result.emplace_back(RunRemoval(Monitor::RemoveObject, "fmq_" + shmId.shmId + "_m_" + to_string(s.first), verbose));
if (shmSegments) {
if (verbose) {
LOG(info) << "Found " << shmSegments->size() << " managed segments...";
}
for (const auto& segment : *shmSegments) {
result.emplace_back(Remove<bipc::shared_memory_object>("fmq_" + shmId.shmId + "_m_" + to_string(segment.first), verbose));
}
} else {
if (verbose) {
@@ -583,16 +601,13 @@ std::vector<std::pair<std::string, bool>> Monitor::Cleanup(const ShmId& shmId, b
}
}
result.emplace_back(RunRemoval(Monitor::RemoveObject, managementSegmentName.c_str(), verbose));
result.emplace_back(Remove<bipc::shared_memory_object>(managementSegmentName, verbose));
} catch (bie&) {
if (verbose) {
LOG(info) << "Did not find '" << managementSegmentName << "' shared memory segment. No regions to cleanup.";
LOG(info) << "Did not find '" << managementSegmentName << "' management segment. No regions to cleanup.";
}
}
result.emplace_back(RunRemoval(Monitor::RemoveMutex, "fmq_" + shmId.shmId + "_mtx", verbose));
result.emplace_back(RunRemoval(Monitor::RemoveCondition, "fmq_" + shmId.shmId + "_cv", verbose));
return result;
}
@@ -608,7 +623,7 @@ std::vector<std::pair<std::string, bool>> Monitor::Cleanup(const SessionId& sess
std::vector<std::pair<std::string, bool>> Monitor::CleanupFull(const ShmId& shmId, bool verbose /* = true */)
{
auto result = Cleanup(shmId, verbose);
result.emplace_back(RunRemoval(Monitor::RemoveMutex, "fmq_" + shmId.shmId + "_ms", verbose));
result.emplace_back(Remove<bipc::named_mutex>("fmq_" + shmId.shmId + "_ms", verbose));
return result;
}
@@ -621,6 +636,62 @@ std::vector<std::pair<std::string, bool>> Monitor::CleanupFull(const SessionId&
return CleanupFull(shmId, verbose);
}
void Monitor::ResetContent(const ShmId& shmId, bool verbose /* = true */)
{
if (verbose) {
cout << "Resetting segments content for shared memory id '" << shmId.shmId << "'..." << endl;
}
string managementSegmentName("fmq_" + shmId.shmId + "_mng");
try {
using namespace boost::interprocess;
managed_shared_memory managementSegment(open_only, managementSegmentName.c_str());
Uint16SegmentInfoHashMap* segmentInfos = managementSegment.find<Uint16SegmentInfoHashMap>(unique_instance).first;
for (const auto& s : *segmentInfos) {
if (verbose) {
cout << "Resetting content of segment '" << "fmq_" << shmId.shmId << "_m_" << s.first << "'..." << endl;
}
try {
if (s.second.fAllocationAlgorithm == AllocationAlgorithm::rbtree_best_fit) {
RBTreeBestFitSegment segment(open_only, std::string("fmq_" + shmId.shmId + "_m_" + to_string(s.first)).c_str());
void* ptr = segment.get_segment_manager();
size_t size = segment.get_segment_manager()->get_size();
new(ptr) segment_manager<char, rbtree_best_fit<mutex_family, offset_ptr<void>>, null_index>(size);
} else {
SimpleSeqFitSegment segment(open_only, std::string("fmq_" + shmId.shmId + "_m_" + to_string(s.first)).c_str());
void* ptr = segment.get_segment_manager();
size_t size = segment.get_segment_manager()->get_size();
new(ptr) segment_manager<char, simple_seq_fit<mutex_family, offset_ptr<void>>, null_index>(size);
}
} catch (bie& e) {
if (verbose) {
cout << "Error resetting content of segment '" << std::string("fmq_" + shmId.shmId + "_m_" + to_string(s.first)) << "': " << e.what() << endl;
}
}
}
} catch (bie& e) {
if (verbose) {
cout << "Could not find '" << managementSegmentName << "' segment. Nothing to cleanup." << endl;
cout << e.what() << endl;
}
}
if (verbose) {
cout << "Done resetting segment content for shared memory id '" << shmId.shmId << "'." << endl;
}
}
void Monitor::ResetContent(const SessionId& sessionId, bool verbose /* = true */)
{
ShmId shmId{makeShmIdStr(sessionId.sessionId)};
if (verbose) {
cout << "ResetContent called with session id '" << sessionId.sessionId << "', translating to shared memory id '" << shmId.shmId << "'" << endl;
}
ResetContent(shmId, verbose);
}
Monitor::~Monitor()
{
if (fSignalThread.joinable()) {

View File

@@ -80,6 +80,15 @@ class Monitor
/// @param verbose output cleanup results to stdout
static std::vector<std::pair<std::string, bool>> CleanupFull(const SessionId& sessionId, bool verbose = true);
/// @brief [EXPERIMENTAL] cleanup the content of the shem segment, without recreating it
/// @param shmId shared memory id
/// Only call this when segment is not in use
static void ResetContent(const ShmId& shmId, bool verbose = true);
/// @brief [EXPERIMENTAL] cleanup the content of the shem segment, without recreating it
/// @param sessionId session id
/// Only call this when segment is not in use
static void ResetContent(const SessionId& sessionId, bool verbose = true);
/// @brief Outputs list of messages in shmem (if compiled with FAIRMQ_DEBUG_MODE=ON)
/// @param shmId shmem id
static void PrintDebugInfo(const ShmId& shmId);

View File

@@ -14,8 +14,6 @@ FairMQ Shared Memory currently uses the following names to register shared memor
| --------------------------- | ---------------------------------------------- | ------------------ | ------------------------------ |
| `fmq_<shmId>_m_<segmentId>` | managed segment(s) (user data) | one of the devices | devices |
| `fmq_<shmId>_mng` | management segment (management data) | one of the devices | devices |
| `fmq_<shmId>_mtx` | mutex | one of the devices | devices |
| `fmq_<shmId>_cv` | condition variable | one of the devices | devices |
| `fmq_<shmId>_rg_<index>` | unmanaged region(s) | one of the devices | devices with unmanaged regions |
| `fmq_<shmId>_rgq_<index>` | unmanaged region queue(s) | one of the devices | devices with unmanaged regions |
| `fmq_<shmId>_ms` | shmmonitor status | shmmonitor | devices, shmmonitor |

View File

@@ -1,305 +0,0 @@
/********************************************************************************
* Copyright (C) 2014-2021 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
* *
* This software is distributed under the terms of the *
* GNU Lesser General Public Licence (LGPL) version 3, *
* copied verbatim in the file "LICENSE" *
********************************************************************************/
#ifndef FAIR_MQ_SHMEM_REGION_H_
#define FAIR_MQ_SHMEM_REGION_H_
#include "Common.h"
#include <FairMQLogger.h>
#include <FairMQUnmanagedRegion.h>
#include <fairmq/tools/Strings.h>
#include <boost/filesystem.hpp>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/file_mapping.hpp>
#include <boost/interprocess/ipc/message_queue.hpp>
#include <algorithm> // min
#include <atomic>
#include <thread>
#include <memory> // make_unique
#include <mutex>
#include <condition_variable>
#include <unordered_map>
#include <cerrno>
#include <chrono>
#include <ios>
#include <utility> // move
namespace fair::mq::shmem
{
struct Region
{
Region(const std::string& shmId, uint16_t id, uint64_t size, bool remote, RegionCallback callback, RegionBulkCallback bulkCallback, const std::string& path, int flags)
: fRemote(remote)
, fLinger(100)
, fStopAcks(false)
, fName("fmq_" + shmId + "_rg_" + std::to_string(id))
, fQueueName("fmq_" + shmId + "_rgq_" + std::to_string(id))
, fShmemObject()
, fFile(nullptr)
, fFileMapping()
, fQueue(nullptr)
, fCallback(std::move(callback))
, fBulkCallback(std::move(bulkCallback))
{
using namespace boost::interprocess;
if (!path.empty()) {
fName = std::string(path + fName);
if (!fRemote) {
// create a file
std::filebuf fbuf;
if (fbuf.open(fName, std::ios_base::in | std::ios_base::out | std::ios_base::trunc | std::ios_base::binary)) {
// set the size
fbuf.pubseekoff(size - 1, std::ios_base::beg);
fbuf.sputc(0);
}
}
fFile = fopen(fName.c_str(), "r+");
if (!fFile) {
LOG(error) << "Failed to initialize file: " << fName;
LOG(error) << "errno: " << errno << ": " << strerror(errno);
throw std::runtime_error(tools::ToString("Failed to initialize file for shared memory region: ", strerror(errno)));
}
fFileMapping = file_mapping(fName.c_str(), read_write);
LOG(debug) << "shmem: initialized file: " << fName;
fRegion = mapped_region(fFileMapping, read_write, 0, size, 0, flags);
} else {
try {
if (fRemote) {
fShmemObject = shared_memory_object(open_only, fName.c_str(), read_write);
} else {
fShmemObject = shared_memory_object(create_only, fName.c_str(), read_write);
fShmemObject.truncate(size);
}
} catch(interprocess_exception& e) {
LOG(error) << "Failed " << (fRemote ? "opening" : "creating") << " shared_memory_object for region id '" << id << "': " << e.what();
throw;
}
try {
fRegion = mapped_region(fShmemObject, read_write, 0, 0, 0, flags);
} catch(interprocess_exception& e) {
LOG(error) << "Failed mapping shared_memory_object for region id '" << id << "': " << e.what();
throw;
}
}
InitializeQueues();
StartSendingAcks();
LOG(trace) << "shmem: initialized region: " << fName << " (" << (fRemote ? "remote" : "local") << ")";
}
Region() = delete;
Region(const Region&) = delete;
Region(Region&&) = delete;
Region& operator=(const Region&) = delete;
Region& operator=(Region&&) = delete;
void InitializeQueues()
{
using namespace boost::interprocess;
if (fRemote) {
fQueue = std::make_unique<message_queue>(open_only, fQueueName.c_str());
} else {
fQueue = std::make_unique<message_queue>(create_only, fQueueName.c_str(), 1024, fAckBunchSize * sizeof(RegionBlock));
}
LOG(trace) << "shmem: initialized region queue: " << fQueueName << " (" << (fRemote ? "remote" : "local") << ")";
}
void StartSendingAcks()
{
fAcksSender = std::thread(&Region::SendAcks, this);
}
void SendAcks()
{
std::unique_ptr<RegionBlock[]> blocks = std::make_unique<RegionBlock[]>(fAckBunchSize);
size_t blocksToSend = 0;
while (true) {
{
std::unique_lock<std::mutex> lock(fBlockMtx);
// try to get <fAckBunchSize> blocks
if (fBlocksToFree.size() < fAckBunchSize) {
fBlockSendCV.wait_for(lock, std::chrono::milliseconds(500));
}
// send whatever blocks we have
blocksToSend = std::min(fBlocksToFree.size(), fAckBunchSize);
copy_n(fBlocksToFree.end() - blocksToSend, blocksToSend, blocks.get());
fBlocksToFree.resize(fBlocksToFree.size() - blocksToSend);
}
if (blocksToSend > 0) {
while (!fQueue->try_send(blocks.get(), blocksToSend * sizeof(RegionBlock), 0) && !fStopAcks) {
// receiver slow? yield and try again...
std::this_thread::yield();
}
// LOG(debug) << "Sent " << blocksToSend << " blocks.";
} else { // blocksToSend == 0
if (fStopAcks) {
break;
}
}
}
LOG(trace) << "AcksSender for " << fName << " leaving " << "(blocks left to free: " << fBlocksToFree.size() << ", "
<< " blocks left to send: " << blocksToSend << ").";
}
void StartReceivingAcks()
{
if (!fAcksReceiver.joinable()) {
fAcksReceiver = std::thread(&Region::ReceiveAcks, this);
}
}
void ReceiveAcks()
{
unsigned int priority = 0;
boost::interprocess::message_queue::size_type recvdSize = 0;
std::unique_ptr<RegionBlock[]> blocks = std::make_unique<RegionBlock[]>(fAckBunchSize);
std::vector<fair::mq::RegionBlock> result;
result.reserve(fAckBunchSize);
while (true) {
uint32_t timeout = 100;
bool leave = false;
if (fStopAcks) {
timeout = fLinger;
leave = true;
}
auto rcvTill = boost::posix_time::microsec_clock::universal_time() + boost::posix_time::milliseconds(timeout);
while (fQueue->timed_receive(blocks.get(), fAckBunchSize * sizeof(RegionBlock), recvdSize, priority, rcvTill)) {
const auto numBlocks = recvdSize / sizeof(RegionBlock);
// LOG(debug) << "Received " << numBlocks << " blocks (recvdSize: " << recvdSize << "). (remaining queue size: " << fQueue->get_num_msg() << ").";
if (fBulkCallback) {
result.clear();
for (size_t i = 0; i < numBlocks; i++) {
result.emplace_back(reinterpret_cast<char*>(fRegion.get_address()) + blocks[i].fHandle, blocks[i].fSize, reinterpret_cast<void*>(blocks[i].fHint));
}
fBulkCallback(result);
} else if (fCallback) {
for (size_t i = 0; i < numBlocks; i++) {
fCallback(reinterpret_cast<char*>(fRegion.get_address()) + blocks[i].fHandle, blocks[i].fSize, reinterpret_cast<void*>(blocks[i].fHint));
}
}
}
if (leave) {
break;
}
}
LOG(trace) << "AcksReceiver for " << fName << " leaving (remaining queue size: " << fQueue->get_num_msg() << ").";
}
void ReleaseBlock(const RegionBlock& block)
{
std::unique_lock<std::mutex> lock(fBlockMtx);
fBlocksToFree.emplace_back(block);
if (fBlocksToFree.size() >= fAckBunchSize) {
lock.unlock();
fBlockSendCV.notify_one();
}
}
void SetLinger(uint32_t linger) { fLinger = linger; }
uint32_t GetLinger() const { return fLinger; }
void StopAcks()
{
fStopAcks = true;
if (fAcksSender.joinable()) {
fBlockSendCV.notify_one();
fAcksSender.join();
}
if (!fRemote) {
if (fAcksReceiver.joinable()) {
fAcksReceiver.join();
}
}
}
~Region()
{
fStopAcks = true;
if (fAcksSender.joinable()) {
fBlockSendCV.notify_one();
fAcksSender.join();
}
if (!fRemote) {
if (fAcksReceiver.joinable()) {
fAcksReceiver.join();
}
if (boost::interprocess::shared_memory_object::remove(fName.c_str())) {
LOG(trace) << "Region '" << fName << "' destroyed.";
}
if (boost::interprocess::file_mapping::remove(fName.c_str())) {
LOG(trace) << "File mapping '" << fName << "' destroyed.";
}
if (fFile) {
fclose(fFile);
}
if (boost::interprocess::message_queue::remove(fQueueName.c_str())) {
LOG(trace) << "Region queue '" << fQueueName << "' destroyed.";
}
} else {
// LOG(debug) << "Region queue '" << fQueueName << "' is remote, no cleanup necessary";
}
// LOG(debug) << "Region '" << fName << "' (" << (fRemote ? "remote" : "local") << ") destructed.";
}
bool fRemote;
uint32_t fLinger;
std::atomic<bool> fStopAcks;
std::string fName;
std::string fQueueName;
boost::interprocess::shared_memory_object fShmemObject;
FILE* fFile;
boost::interprocess::file_mapping fFileMapping;
boost::interprocess::mapped_region fRegion;
std::mutex fBlockMtx;
std::condition_variable fBlockSendCV;
std::vector<RegionBlock> fBlocksToFree;
const std::size_t fAckBunchSize = 256;
std::unique_ptr<boost::interprocess::message_queue> fQueue;
std::thread fAcksReceiver;
std::thread fAcksSender;
RegionCallback fCallback;
RegionBulkCallback fBulkCallback;
};
} // namespace fair::mq::shmem
#endif /* FAIR_MQ_SHMEM_REGION_H_ */

88
fairmq/shmem/Segment.h Normal file
View File

@@ -0,0 +1,88 @@
/********************************************************************************
* Copyright (C) 2021 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
* *
* This software is distributed under the terms of the *
* GNU Lesser General Public Licence (LGPL) version 3, *
* copied verbatim in the file "LICENSE" *
********************************************************************************/
#ifndef FAIR_MQ_SHMEM_SEGMENT_H_
#define FAIR_MQ_SHMEM_SEGMENT_H_
#include <fairmq/shmem/Common.h>
#include <fairmq/shmem/Monitor.h>
#include <boost/variant.hpp>
#include <cstdint>
#include <string>
namespace fair::mq::shmem
{
struct SimpleSeqFit {};
struct RBTreeBestFit {};
static const SimpleSeqFit simpleSeqFit = SimpleSeqFit();
static const RBTreeBestFit rbTreeBestFit = RBTreeBestFit();
struct Segment
{
Segment(const std::string& shmId, uint16_t id, size_t size, SimpleSeqFit)
: fSegment(SimpleSeqFitSegment(boost::interprocess::open_or_create,
std::string("fmq_" + shmId + "_m_" + std::to_string(id)).c_str(),
size))
{
Register(shmId, id, AllocationAlgorithm::simple_seq_fit);
}
Segment(const std::string& shmId, uint16_t id, size_t size, RBTreeBestFit)
: fSegment(RBTreeBestFitSegment(boost::interprocess::open_or_create,
std::string("fmq_" + shmId + "_m_" + std::to_string(id)).c_str(),
size))
{
Register(shmId, id, AllocationAlgorithm::rbtree_best_fit);
}
size_t GetSize() const { return boost::apply_visitor(SegmentSize(), fSegment); }
void* GetData() { return boost::apply_visitor(SegmentAddress(), fSegment); }
size_t GetFreeMemory() const { return boost::apply_visitor(SegmentFreeMemory(), fSegment); }
void Zero() { boost::apply_visitor(SegmentMemoryZeroer(), fSegment); }
void Lock()
{
if (mlock(GetData(), GetSize()) == -1) {
throw TransportError(tools::ToString("Could not lock the managed segment memory: ", strerror(errno)));
}
}
static void Remove(const std::string& shmId, uint16_t id)
{
Monitor::RemoveObject("fmq_" + shmId + "_m_" + std::to_string(id));
}
private:
boost::variant<RBTreeBestFitSegment, SimpleSeqFitSegment> fSegment;
static void Register(const std::string& shmId, uint16_t id, AllocationAlgorithm allocAlgo)
{
using namespace boost::interprocess;
managed_shared_memory mngSegment(open_or_create, std::string("fmq_" + shmId + "_mng").c_str(), 6553600);
VoidAlloc alloc(mngSegment.get_segment_manager());
Uint16SegmentInfoHashMap* shmSegments = mngSegment.find_or_construct<Uint16SegmentInfoHashMap>(unique_instance)(alloc);
EventCounter* eventCounter = mngSegment.find<EventCounter>(unique_instance).first;
if (!eventCounter) {
eventCounter = mngSegment.construct<EventCounter>(unique_instance)(0);
}
bool newSegmentRegistered = shmSegments->emplace(id, allocAlgo).second;
if (newSegmentRegistered) {
(eventCounter->fCount)++;
}
}
};
} // namespace fair::mq::shmem
#endif /* FAIR_MQ_SHMEM_SEGMENT_H_ */

View File

@@ -11,13 +11,13 @@
#include "Common.h"
#include "Manager.h"
#include "Message.h"
#include <FairMQSocket.h>
#include <FairMQMessage.h>
#include <FairMQLogger.h>
#include <fairmq/Socket.h>
#include <fairmq/Message.h>
#include <fairmq/tools/Strings.h>
#include <fairmq/zeromq/Common.h>
#include <fairlogger/Logger.h>
#include <zmq.h>
#include <atomic>
@@ -117,52 +117,12 @@ class Socket final : public fair::mq::Socket
bool Bind(const std::string& address) override
{
// LOG(info) << "binding socket " << fId << " on " << address;
if (zmq_bind(fSocket, address.c_str()) != 0) {
if (errno == EADDRINUSE) {
// do not print error in this case, this is handled by FairMQDevice in case no connection could be established after trying a number of random ports from a range.
return false;
}
LOG(error) << "Failed binding socket " << fId << ", reason: " << zmq_strerror(errno);
return false;
}
return true;
return zmq::Bind(fSocket, address, fId);
}
bool Connect(const std::string& address) override
{
// LOG(info) << "connecting socket " << fId << " on " << address;
if (zmq_connect(fSocket, address.c_str()) != 0) {
LOG(error) << "Failed connecting socket " << fId << ", reason: " << zmq_strerror(errno);
return false;
}
return true;
}
bool ShouldRetry(int flags, int timeout, int& elapsed) const
{
if ((flags & ZMQ_DONTWAIT) == 0) {
if (timeout > 0) {
elapsed += fTimeout;
if (elapsed >= timeout) {
return false;
}
}
return true;
} else {
return false;
}
}
int HandleErrors() const
{
if (zmq_errno() == ETERM) {
LOG(debug) << "Terminating socket " << fId;
return static_cast<int>(TransferCode::error);
} else {
LOG(error) << "Failed transfer on socket " << fId << ", reason: " << zmq_strerror(errno);
return static_cast<int>(TransferCode::error);
}
return zmq::Connect(fSocket, address, fId);
}
int64_t Send(MessagePtr& msg, int timeout = -1) override
@@ -186,13 +146,13 @@ class Socket final : public fair::mq::Socket
} else if (zmq_errno() == EAGAIN || zmq_errno() == EINTR) {
if (fManager.Interrupted()) {
return static_cast<int>(TransferCode::interrupted);
} else if (ShouldRetry(flags, timeout, elapsed)) {
} else if (zmq::ShouldRetry(flags, fTimeout, timeout, elapsed)) {
continue;
} else {
return static_cast<int>(TransferCode::timeout);
}
} else {
return HandleErrors();
return zmq::HandleErrors(fId);
}
}
@@ -226,13 +186,13 @@ class Socket final : public fair::mq::Socket
} else if (zmq_errno() == EAGAIN || zmq_errno() == EINTR) {
if (fManager.Interrupted()) {
return static_cast<int>(TransferCode::interrupted);
} else if (ShouldRetry(flags, timeout, elapsed)) {
} else if (zmq::ShouldRetry(flags, fTimeout, timeout, elapsed)) {
continue;
} else {
return static_cast<int>(TransferCode::timeout);
}
} else {
return HandleErrors();
return zmq::HandleErrors(fId);
}
}
}
@@ -277,13 +237,13 @@ class Socket final : public fair::mq::Socket
} else if (zmq_errno() == EAGAIN || zmq_errno() == EINTR) {
if (fManager.Interrupted()) {
return static_cast<int>(TransferCode::interrupted);
} else if (ShouldRetry(flags, timeout, elapsed)) {
} else if (zmq::ShouldRetry(flags, fTimeout, timeout, elapsed)) {
continue;
} else {
return static_cast<int>(TransferCode::timeout);
}
} else {
return HandleErrors();
return zmq::HandleErrors(fId);
}
}
@@ -333,13 +293,13 @@ class Socket final : public fair::mq::Socket
} else if (zmq_errno() == EAGAIN || zmq_errno() == EINTR) {
if (fManager.Interrupted()) {
return static_cast<int>(TransferCode::interrupted);
} else if (ShouldRetry(flags, timeout, elapsed)) {
} else if (zmq::ShouldRetry(flags, fTimeout, timeout, elapsed)) {
continue;
} else {
return static_cast<int>(TransferCode::timeout);
}
} else {
return HandleErrors();
return zmq::HandleErrors(fId);
}
}

View File

@@ -9,17 +9,17 @@
#ifndef FAIR_MQ_SHMEM_TRANSPORTFACTORY_H_
#define FAIR_MQ_SHMEM_TRANSPORTFACTORY_H_
#include "Manager.h"
#include "Common.h"
#include "Manager.h"
#include "Message.h"
#include "Socket.h"
#include "Poller.h"
#include "UnmanagedRegion.h"
#include <FairMQTransportFactory.h>
#include "Socket.h"
#include "UnmanagedRegionImpl.h"
#include <fairmq/ProgOptions.h>
#include <FairMQLogger.h>
#include <fairmq/tools/Strings.h>
#include <fairmq/TransportFactory.h>
#include <fairlogger/Logger.h>
#include <boost/version.hpp>
@@ -78,7 +78,7 @@ class TransportFactory final : public fair::mq::TransportFactory
LOG(error) << "failed configuring context, reason: " << zmq_strerror(errno);
}
fManager = std::make_unique<Manager>(sessionName, deviceId, segmentSize, config);
fManager = std::make_unique<Manager>(sessionName, segmentSize, config);
} catch (boost::interprocess::interprocess_exception& e) {
LOG(error) << "Could not initialize shared memory transport: " << e.what();
throw std::runtime_error(tools::ToString("Could not initialize shared memory transport: ", e.what()));
@@ -145,27 +145,46 @@ class TransportFactory final : public fair::mq::TransportFactory
UnmanagedRegionPtr CreateUnmanagedRegion(size_t size, RegionCallback callback = nullptr, const std::string& path = "", int flags = 0, fair::mq::RegionConfig cfg = fair::mq::RegionConfig()) override
{
return CreateUnmanagedRegion(size, 0, callback, nullptr, path, flags, cfg);
cfg.path = path;
cfg.creationFlags = flags;
return CreateUnmanagedRegion(size, callback, nullptr, std::move(cfg));
}
UnmanagedRegionPtr CreateUnmanagedRegion(size_t size, RegionBulkCallback bulkCallback = nullptr, const std::string& path = "", int flags = 0, fair::mq::RegionConfig cfg = fair::mq::RegionConfig()) override
{
return CreateUnmanagedRegion(size, 0, nullptr, bulkCallback, path, flags, cfg);
cfg.path = path;
cfg.creationFlags = flags;
return CreateUnmanagedRegion(size, nullptr, bulkCallback, std::move(cfg));
}
UnmanagedRegionPtr CreateUnmanagedRegion(size_t size, int64_t userFlags, RegionCallback callback = nullptr, const std::string& path = "", int flags = 0, fair::mq::RegionConfig cfg = fair::mq::RegionConfig()) override
{
return CreateUnmanagedRegion(size, userFlags, callback, nullptr, path, flags, cfg);
cfg.path = path;
cfg.userFlags = userFlags;
cfg.creationFlags = flags;
return CreateUnmanagedRegion(size, callback, nullptr, std::move(cfg));
}
UnmanagedRegionPtr CreateUnmanagedRegion(size_t size, int64_t userFlags, RegionBulkCallback bulkCallback = nullptr, const std::string& path = "", int flags = 0, fair::mq::RegionConfig cfg = fair::mq::RegionConfig()) override
{
return CreateUnmanagedRegion(size, userFlags, nullptr, bulkCallback, path, flags, cfg);
cfg.path = path;
cfg.userFlags = userFlags;
cfg.creationFlags = flags;
return CreateUnmanagedRegion(size, nullptr, bulkCallback, std::move(cfg));
}
UnmanagedRegionPtr CreateUnmanagedRegion(size_t size, int64_t userFlags, RegionCallback callback, RegionBulkCallback bulkCallback, const std::string& path, int flags, fair::mq::RegionConfig cfg)
UnmanagedRegionPtr CreateUnmanagedRegion(size_t size, RegionCallback callback, RegionConfig cfg) override
{
return std::make_unique<UnmanagedRegion>(*fManager, size, userFlags, callback, bulkCallback, path, flags, this, cfg);
return CreateUnmanagedRegion(size, callback, nullptr, std::move(cfg));
}
UnmanagedRegionPtr CreateUnmanagedRegion(size_t size, RegionBulkCallback bulkCallback, RegionConfig cfg) override
{
return CreateUnmanagedRegion(size, nullptr, bulkCallback, std::move(cfg));
}
UnmanagedRegionPtr CreateUnmanagedRegion(size_t size, RegionCallback callback, RegionBulkCallback bulkCallback, fair::mq::RegionConfig cfg)
{
return std::make_unique<UnmanagedRegionImpl>(*fManager, size, callback, bulkCallback, std::move(cfg), this);
}
void SubscribeToRegionEvents(RegionEventCallback callback) override { fManager->SubscribeToRegionEvents(callback); }

View File

@@ -1,5 +1,5 @@
/********************************************************************************
* Copyright (C) 2014 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
* Copyright (C) 2014-2021 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
* *
* This software is distributed under the terms of the *
* GNU Lesser General Public Licence (LGPL) version 3, *
@@ -9,67 +9,370 @@
#ifndef FAIR_MQ_SHMEM_UNMANAGEDREGION_H_
#define FAIR_MQ_SHMEM_UNMANAGEDREGION_H_
#include "Manager.h"
#include <fairmq/shmem/Common.h>
#include <fairmq/shmem/Monitor.h>
#include <fairmq/tools/Strings.h>
#include <fairmq/UnmanagedRegion.h>
#include <FairMQUnmanagedRegion.h>
#include <FairMQLogger.h>
#include <fairlogger/Logger.h>
#include <boost/interprocess/shared_memory_object.hpp>
#include <boost/interprocess/mapped_region.hpp>
#include <boost/filesystem.hpp>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/file_mapping.hpp>
#include <boost/interprocess/ipc/message_queue.hpp>
#include <cstddef> // size_t
#include <string>
#include <algorithm> // min
#include <atomic>
#include <thread>
#include <memory> // make_unique
#include <mutex>
#include <condition_variable>
#include <unordered_map>
#include <cerrno>
#include <chrono>
#include <ios>
#include <utility> // move
namespace fair::mq::shmem
{
class Message;
class Socket;
class UnmanagedRegion final : public fair::mq::UnmanagedRegion
struct UnmanagedRegion
{
friend class Message;
friend class Socket;
friend class Manager;
public:
UnmanagedRegion(Manager& manager,
const size_t size,
const int64_t userFlags,
RegionCallback callback,
RegionBulkCallback bulkCallback,
const std::string& path,
int flags,
FairMQTransportFactory* factory,
fair::mq::RegionConfig cfg)
: FairMQUnmanagedRegion(factory)
, fManager(manager)
, fRegion(nullptr)
, fRegionId(0)
UnmanagedRegion(const std::string& shmId, uint16_t id, uint64_t size)
: UnmanagedRegion(shmId, size, false, makeRegionConfig(id))
{}
UnmanagedRegion(const std::string& shmId, uint64_t size, RegionConfig cfg)
: UnmanagedRegion(shmId, size, false, std::move(cfg))
{}
UnmanagedRegion(const std::string& shmId, uint64_t size, bool remote, RegionConfig cfg)
: fRemote(remote)
, fRemoveOnDestruction(cfg.removeOnDestruction)
, fLinger(cfg.linger)
, fStopAcks(false)
, fName("fmq_" + shmId + "_rg_" + std::to_string(cfg.id.value()))
, fQueueName("fmq_" + shmId + "_rgq_" + std::to_string(cfg.id.value()))
, fShmemObject()
, fFile(nullptr)
, fFileMapping()
, fQueue(nullptr)
, fCallback(nullptr)
, fBulkCallback(nullptr)
{
auto result = fManager.CreateRegion(size, userFlags, callback, bulkCallback, path, flags, cfg);
fRegion = result.first;
fRegionId = result.second;
using namespace boost::interprocess;
if (!cfg.path.empty()) {
fName = std::string(cfg.path + fName);
if (!fRemote) {
// create a file
std::filebuf fbuf;
if (fbuf.open(fName, std::ios_base::in | std::ios_base::out | std::ios_base::trunc | std::ios_base::binary)) {
// set the size
fbuf.pubseekoff(size - 1, std::ios_base::beg);
fbuf.sputc(0);
}
}
fFile = fopen(fName.c_str(), "r+");
if (!fFile) {
LOG(error) << "Failed to initialize file: " << fName;
LOG(error) << "errno: " << errno << ": " << strerror(errno);
throw std::runtime_error(tools::ToString("Failed to initialize file for shared memory region: ", strerror(errno)));
}
fFileMapping = file_mapping(fName.c_str(), read_write);
LOG(debug) << "shmem: initialized file: " << fName;
fRegion = mapped_region(fFileMapping, read_write, 0, size, 0, cfg.creationFlags);
} else {
try {
fShmemObject = shared_memory_object(open_or_create, fName.c_str(), read_write);
if (size != 0) {
fShmemObject.truncate(size);
}
} catch (interprocess_exception& e) {
LOG(error) << "Failed " << (remote ? "opening" : "creating") << " shared_memory_object for region id '" << cfg.id.value() << "': " << e.what();
throw;
}
try {
fRegion = mapped_region(fShmemObject, read_write, 0, 0, 0, cfg.creationFlags);
} catch (interprocess_exception& e) {
LOG(error) << "Failed mapping shared_memory_object for region id '" << cfg.id.value() << "': " << e.what();
throw;
}
}
if (cfg.lock) {
LOG(debug) << "Locking region " << cfg.id.value() << "...";
Lock();
LOG(debug) << "Successfully locked region " << cfg.id.value() << ".";
}
if (cfg.zero) {
LOG(debug) << "Zeroing free memory of region " << cfg.id.value() << "...";
Zero();
LOG(debug) << "Successfully zeroed free memory of region " << cfg.id.value() << ".";
}
if (!remote) {
Register(shmId, cfg);
}
LOG(trace) << "shmem: initialized region: " << fName << " (" << (remote ? "remote" : "local") << ")";
}
UnmanagedRegion() = delete;
UnmanagedRegion(const UnmanagedRegion&) = delete;
UnmanagedRegion(UnmanagedRegion&&) = delete;
UnmanagedRegion& operator=(const UnmanagedRegion&) = delete;
UnmanagedRegion& operator=(UnmanagedRegion&&) = delete;
void* GetData() const override { return fRegion->get_address(); }
size_t GetSize() const override { return fRegion->get_size(); }
uint16_t GetId() const override { return fRegionId; }
void SetLinger(uint32_t linger) override { fManager.GetRegion(fRegionId)->SetLinger(linger); }
uint32_t GetLinger() const override { return fManager.GetRegion(fRegionId)->GetLinger(); }
void Zero()
{
memset(fRegion.get_address(), 0x00, fRegion.get_size());
}
void Lock()
{
if (mlock(fRegion.get_address(), fRegion.get_size()) == -1) {
LOG(error) << "Could not lock region " << fName << ". Code: " << errno << ", reason: " << strerror(errno);
throw TransportError(tools::ToString("Could not lock region ", fName, ": ", strerror(errno)));
}
}
Transport GetType() const override { return fair::mq::Transport::SHM; }
void* GetData() const { return fRegion.get_address(); }
size_t GetSize() const { return fRegion.get_size(); }
~UnmanagedRegion() override { fManager.RemoveRegion(fRegionId); }
void SetLinger(uint32_t linger) { fLinger = linger; }
uint32_t GetLinger() const { return fLinger; }
bool RemoveOnDestruction() { return fRemoveOnDestruction; }
~UnmanagedRegion()
{
fStopAcks = true;
if (fAcksSender.joinable()) {
fBlockSendCV.notify_one();
fAcksSender.join();
}
if (!fRemote) {
if (fAcksReceiver.joinable()) {
fAcksReceiver.join();
}
if (fRemoveOnDestruction) {
if (Monitor::RemoveObject(fName.c_str())) {
LOG(trace) << "Region '" << fName << "' destroyed.";
}
if (Monitor::RemoveFileMapping(fName.c_str())) {
LOG(trace) << "File mapping '" << fName << "' destroyed.";
}
} else {
LOG(debug) << "Skipping removal of " << fName << " unmanaged region, because RegionConfig::removeOnDestruction is false";
}
if (boost::interprocess::message_queue::remove(fQueueName.c_str())) {
LOG(trace) << "Region queue '" << fQueueName << "' destroyed.";
} else {
LOG(debug) << "Region queue '" << fQueueName << "' not destroyed.";
}
if (fFile) {
fclose(fFile);
}
} else {
// LOG(debug) << "Region queue '" << fQueueName << "' is remote, no cleanup necessary";
}
// LOG(debug) << "Region '" << fName << "' (" << (fRemote ? "remote" : "local") << ") destructed.";
}
private:
Manager& fManager;
boost::interprocess::mapped_region* fRegion;
uint16_t fRegionId;
bool fRemote;
bool fRemoveOnDestruction;
uint32_t fLinger;
std::atomic<bool> fStopAcks;
std::string fName;
std::string fQueueName;
boost::interprocess::shared_memory_object fShmemObject;
FILE* fFile;
boost::interprocess::file_mapping fFileMapping;
boost::interprocess::mapped_region fRegion;
std::mutex fBlockMtx;
std::condition_variable fBlockSendCV;
std::vector<RegionBlock> fBlocksToFree;
const std::size_t fAckBunchSize = 256;
std::unique_ptr<boost::interprocess::message_queue> fQueue;
std::thread fAcksReceiver;
std::thread fAcksSender;
RegionCallback fCallback;
RegionBulkCallback fBulkCallback;
static RegionConfig makeRegionConfig(uint16_t id)
{
RegionConfig regionCfg;
regionCfg.id = id;
return regionCfg;
}
static void Register(const std::string& shmId, RegionConfig& cfg)
{
using namespace boost::interprocess;
managed_shared_memory mngSegment(open_or_create, std::string("fmq_" + shmId + "_mng").c_str(), 6553600);
VoidAlloc alloc(mngSegment.get_segment_manager());
Uint16RegionInfoHashMap* shmRegions = mngSegment.find_or_construct<Uint16RegionInfoHashMap>(unique_instance)(alloc);
EventCounter* eventCounter = mngSegment.find<EventCounter>(unique_instance).first;
if (!eventCounter) {
eventCounter = mngSegment.construct<EventCounter>(unique_instance)(0);
}
bool newShmRegionCreated = shmRegions->emplace(cfg.id.value(), RegionInfo(cfg.path.c_str(), cfg.creationFlags, cfg.userFlags, alloc)).second;
if (newShmRegionCreated) {
(eventCounter->fCount)++;
}
}
void SetCallbacks(RegionCallback callback, RegionBulkCallback bulkCallback)
{
fCallback = std::move(callback);
fBulkCallback = std::move(bulkCallback);
}
void InitializeQueues()
{
using namespace boost::interprocess;
if (!fQueue) {
fQueue = std::make_unique<message_queue>(open_or_create, fQueueName.c_str(), 1024, fAckBunchSize * sizeof(RegionBlock));
LOG(trace) << "shmem: initialized region queue: " << fQueueName;
}
}
void StartAckSender()
{
if (!fAcksSender.joinable()) {
fAcksSender = std::thread(&UnmanagedRegion::SendAcks, this);
}
}
void SendAcks()
{
std::unique_ptr<RegionBlock[]> blocks = std::make_unique<RegionBlock[]>(fAckBunchSize);
size_t blocksToSend = 0;
while (true) {
{
std::unique_lock<std::mutex> lock(fBlockMtx);
// try to get <fAckBunchSize> blocks
if (fBlocksToFree.size() < fAckBunchSize) {
fBlockSendCV.wait_for(lock, std::chrono::milliseconds(500));
}
// send whatever blocks we have
blocksToSend = std::min(fBlocksToFree.size(), fAckBunchSize);
copy_n(fBlocksToFree.end() - blocksToSend, blocksToSend, blocks.get());
fBlocksToFree.resize(fBlocksToFree.size() - blocksToSend);
}
if (blocksToSend > 0) {
while (!fQueue->try_send(blocks.get(), blocksToSend * sizeof(RegionBlock), 0) && !fStopAcks) {
// receiver slow? yield and try again...
std::this_thread::yield();
}
// LOG(debug) << "Sent " << blocksToSend << " blocks.";
} else { // blocksToSend == 0
if (fStopAcks) {
break;
}
}
}
LOG(trace) << "AcksSender for " << fName << " leaving " << "(blocks left to free: " << fBlocksToFree.size() << ", "
<< " blocks left to send: " << blocksToSend << ").";
}
void StartAckReceiver()
{
if (!fAcksReceiver.joinable()) {
fAcksReceiver = std::thread(&UnmanagedRegion::ReceiveAcks, this);
}
}
void ReceiveAcks()
{
unsigned int priority = 0;
boost::interprocess::message_queue::size_type recvdSize = 0;
std::unique_ptr<RegionBlock[]> blocks = std::make_unique<RegionBlock[]>(fAckBunchSize);
std::vector<fair::mq::RegionBlock> result;
result.reserve(fAckBunchSize);
while (true) {
uint32_t timeout = 100;
bool leave = false;
if (fStopAcks) {
timeout = fLinger;
leave = true;
}
auto rcvTill = boost::posix_time::microsec_clock::universal_time() + boost::posix_time::milliseconds(timeout);
while (fQueue->timed_receive(blocks.get(), fAckBunchSize * sizeof(RegionBlock), recvdSize, priority, rcvTill)) {
const auto numBlocks = recvdSize / sizeof(RegionBlock);
// LOG(debug) << "Received " << numBlocks << " blocks (recvdSize: " << recvdSize << "). (remaining queue size: " << fQueue->get_num_msg() << ").";
if (fBulkCallback) {
result.clear();
for (size_t i = 0; i < numBlocks; i++) {
result.emplace_back(reinterpret_cast<char*>(fRegion.get_address()) + blocks[i].fHandle, blocks[i].fSize, reinterpret_cast<void*>(blocks[i].fHint));
}
fBulkCallback(result);
} else if (fCallback) {
for (size_t i = 0; i < numBlocks; i++) {
fCallback(reinterpret_cast<char*>(fRegion.get_address()) + blocks[i].fHandle, blocks[i].fSize, reinterpret_cast<void*>(blocks[i].fHint));
}
}
}
if (leave) {
break;
}
}
LOG(trace) << "AcksReceiver for " << fName << " leaving (remaining queue size: " << fQueue->get_num_msg() << ").";
}
void ReleaseBlock(const RegionBlock& block)
{
std::unique_lock<std::mutex> lock(fBlockMtx);
fBlocksToFree.emplace_back(block);
if (fBlocksToFree.size() >= fAckBunchSize) {
lock.unlock();
fBlockSendCV.notify_one();
}
}
void StopAcks()
{
fStopAcks = true;
if (fAcksSender.joinable()) {
fBlockSendCV.notify_one();
fAcksSender.join();
}
if (fAcksReceiver.joinable()) {
fAcksReceiver.join();
}
}
};
} // namespace fair::mq::shmem

View File

@@ -0,0 +1,71 @@
/********************************************************************************
* Copyright (C) 2014 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
* *
* This software is distributed under the terms of the *
* GNU Lesser General Public Licence (LGPL) version 3, *
* copied verbatim in the file "LICENSE" *
********************************************************************************/
#ifndef FAIR_MQ_SHMEM_UNMANAGEDREGIONIMPL_H_
#define FAIR_MQ_SHMEM_UNMANAGEDREGIONIMPL_H_
#include "Manager.h"
#include "UnmanagedRegion.h"
#include <fairmq/UnmanagedRegion.h>
#include <fairlogger/Logger.h>
#include <cstddef> // size_t
namespace fair::mq::shmem
{
class Message;
class Socket;
class UnmanagedRegionImpl final : public fair::mq::UnmanagedRegion
{
friend class Message;
friend class Socket;
public:
UnmanagedRegionImpl(Manager& manager,
const size_t size,
RegionCallback callback,
RegionBulkCallback bulkCallback,
fair::mq::RegionConfig cfg,
FairMQTransportFactory* factory)
: fair::mq::UnmanagedRegion(factory)
, fManager(manager)
, fRegion(nullptr)
, fRegionId(0)
{
auto result = fManager.CreateRegion(size, callback, bulkCallback, std::move(cfg));
fRegion = result.first;
fRegionId = result.second;
}
UnmanagedRegionImpl(const UnmanagedRegionImpl&) = delete;
UnmanagedRegionImpl(UnmanagedRegionImpl&&) = delete;
UnmanagedRegionImpl& operator=(const UnmanagedRegionImpl&) = delete;
UnmanagedRegionImpl& operator=(UnmanagedRegionImpl&&) = delete;
void* GetData() const override { return fRegion->GetData(); }
size_t GetSize() const override { return fRegion->GetSize(); }
uint16_t GetId() const override { return fRegionId; }
void SetLinger(uint32_t linger) override { fRegion->SetLinger(linger); }
uint32_t GetLinger() const override { return fRegion->GetLinger(); }
Transport GetType() const override { return fair::mq::Transport::SHM; }
~UnmanagedRegionImpl() override { fManager.RemoveRegion(fRegionId); }
private:
Manager& fManager;
shmem::UnmanagedRegion* fRegion;
uint16_t fRegionId;
};
} // namespace fair::mq::shmem
#endif /* FAIR_MQ_SHMEM_UNMANAGEDREGIONIMPL_H_ */

View File

@@ -76,6 +76,7 @@ int main(int argc, char** argv)
string sessionName;
string shmId;
bool cleanup = false;
bool resetContent = false;
bool selfDestruct = false;
bool interactive = false;
bool viewOnly = false;
@@ -97,6 +98,7 @@ int main(int argc, char** argv)
("session,s" , value<string>(&sessionName)->default_value("default"), "Session id")
("shmid" , value<string>(&shmId)->default_value(""), "Shmem id (if not provided, it is generated out of session id and user id)")
("cleanup,c" , value<bool>(&cleanup)->implicit_value(true), "Perform cleanup and quit")
("reset-content,r", value<bool>(&resetContent)->implicit_value(true), "[EXPERIMENTAL] Reset content of the segments (only call this when segment is not in use)")
("self-destruct,x", value<bool>(&selfDestruct)->implicit_value(true), "Quit after first closing of the memory")
("interactive,i" , value<bool>(&interactive)->implicit_value(true), "Interactive run")
("view,v" , value<bool>(&viewOnly)->implicit_value(true), "Run in view only mode")
@@ -146,6 +148,11 @@ int main(int argc, char** argv)
return 0;
}
if (resetContent) {
Monitor::ResetContent(ShmId{shmId});
return 0;
}
if (debug) {
Monitor::PrintDebugInfo(ShmId{shmId});
return 0;

54
fairmq/tools/Exceptions.h Normal file
View File

@@ -0,0 +1,54 @@
/********************************************************************************
* Copyright (C) 2021 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
* *
* This software is distributed under the terms of the *
* GNU Lesser General Public Licence (LGPL) version 3, *
* copied verbatim in the file "LICENSE" *
********************************************************************************/
#ifndef FAIR_MQ_TOOLS_EXCEPTIONS_H
#define FAIR_MQ_TOOLS_EXCEPTIONS_H
#include <functional>
namespace fair::mq::tools
{
/**
* Executes the given callback in the destructor.
* Can be used to execute something in case of an exception when catch is undesirable, e.g.:
*
* {
* // callback will be executed only if f throws an exception
* CallOnDestruction cod([](){ cout << "exception was thrown"; }, true);
* f();
* cod.disable();
* }
*/
class CallOnDestruction
{
public:
CallOnDestruction(std::function<void()> c, bool enable = true)
: callback(c)
, enabled(enable)
{}
~CallOnDestruction()
{
if (enabled) {
callback();
}
}
void enable() { enabled = true; }
void disable() { enabled = false; }
private:
std::function<void()> callback;
bool enabled;
};
} // namespace fair::mq::tools
#endif /* FAIR_MQ_TOOLS_EXCEPTIONS_H */

View File

@@ -20,6 +20,69 @@ namespace fair::mq::zmq
struct Error : std::runtime_error { using std::runtime_error::runtime_error; };
inline bool Bind(void* socket, const std::string& address, const std::string& id)
{
// LOG(debug) << "Binding socket " << id << " on " << address;
if (zmq_bind(socket, address.c_str()) != 0) {
if (errno == EADDRINUSE) {
// do not print error in this case, this is handled upstream in case no
// connection could be established after trying a number of random ports from a range.
return false;
} else if (errno == EACCES) {
// check if TCP port 1 was given, if yes then it will be handeled upstream, print debug only
size_t protocolPos = address.find(':');
std::string protocol = address.substr(0, protocolPos);
if (protocol == "tcp") {
size_t portPos = address.rfind(':');
std::string port = address.substr(portPos + 1);
if (port == "1") {
LOG(debug) << "Failed binding socket " << id << ", address: " << address << ", reason: " << zmq_strerror(errno);
return false;
}
}
}
LOG(error) << "Failed binding socket " << id << ", address: " << address << ", reason: " << zmq_strerror(errno);
return false;
}
return true;
}
inline bool Connect(void* socket, const std::string& address, const std::string& id)
{
// LOG(debug) << "Connecting socket " << id << " on " << address;
if (zmq_connect(socket, address.c_str()) != 0) {
LOG(error) << "Failed connecting socket " << id << ", address: " << address << ", reason: " << zmq_strerror(errno);
return false;
}
return true;
}
inline bool ShouldRetry(int flags, int socketTimeout, int userTimeout, int& elapsed)
{
if ((flags & ZMQ_DONTWAIT) == 0) {
if (userTimeout > 0) {
elapsed += socketTimeout;
if (elapsed >= userTimeout) {
return false;
}
}
return true;
} else {
return false;
}
}
inline int HandleErrors(const std::string& id)
{
if (zmq_errno() == ETERM) {
LOG(debug) << "Terminating socket " << id;
return static_cast<int>(TransferCode::error);
} else {
LOG(error) << "Failed transfer on socket " << id << ", errno: " << errno << ", reason: " << zmq_strerror(errno);
return static_cast<int>(TransferCode::error);
}
}
/// Lookup table for various zmq constants
inline auto getConstant(std::string_view constant) -> int
{

View File

@@ -85,57 +85,12 @@ class Socket final : public fair::mq::Socket
bool Bind(const std::string& address) override
{
// LOG(debug) << "Binding socket " << fId << " on " << address;
if (zmq_bind(fSocket, address.c_str()) != 0) {
if (errno == EADDRINUSE) {
// do not print error in this case, this is handled by FairMQDevice in case no
// connection could be established after trying a number of random ports from a range.
return false;
}
LOG(error) << "Failed binding socket " << fId << ", address: " << address << ", reason: " << zmq_strerror(errno);
return false;
}
return true;
return zmq::Bind(fSocket, address, fId);
}
bool Connect(const std::string& address) override
{
// LOG(debug) << "Connecting socket " << fId << " on " << address;
if (zmq_connect(fSocket, address.c_str()) != 0) {
LOG(error) << "Failed connecting socket " << fId << ", address: " << address << ", reason: " << zmq_strerror(errno);
return false;
}
return true;
}
bool ShouldRetry(int flags, int timeout, int& elapsed) const
{
if ((flags & ZMQ_DONTWAIT) == 0) {
if (timeout > 0) {
elapsed += fTimeout;
if (elapsed >= timeout) {
return false;
}
}
return true;
} else {
return false;
}
}
int HandleErrors() const
{
if (zmq_errno() == ETERM) {
LOG(debug) << "Terminating socket " << fId;
return static_cast<int>(TransferCode::error);
} else {
LOG(error) << "Failed transfer on socket " << fId << ", errno: " << errno << ", reason: " << zmq_strerror(errno);
return static_cast<int>(TransferCode::error);
}
return zmq::Connect(fSocket, address, fId);
}
int64_t Send(MessagePtr& msg, int timeout = -1) override
@@ -157,13 +112,13 @@ class Socket final : public fair::mq::Socket
} else if (zmq_errno() == EAGAIN || zmq_errno() == EINTR) {
if (fCtx.Interrupted()) {
return static_cast<int>(TransferCode::interrupted);
} else if (ShouldRetry(flags, timeout, elapsed)) {
} else if (zmq::ShouldRetry(flags, fTimeout, timeout, elapsed)) {
continue;
} else {
return static_cast<int>(TransferCode::timeout);
}
} else {
return HandleErrors();
return zmq::HandleErrors(fId);
}
}
}
@@ -187,13 +142,13 @@ class Socket final : public fair::mq::Socket
} else if (zmq_errno() == EAGAIN || zmq_errno() == EINTR) {
if (fCtx.Interrupted()) {
return static_cast<int>(TransferCode::interrupted);
} else if (ShouldRetry(flags, timeout, elapsed)) {
} else if (zmq::ShouldRetry(flags, fTimeout, timeout, elapsed)) {
continue;
} else {
return static_cast<int>(TransferCode::timeout);
}
} else {
return HandleErrors();
return zmq::HandleErrors(fId);
}
}
}
@@ -222,14 +177,14 @@ class Socket final : public fair::mq::Socket
} else if (zmq_errno() == EAGAIN || zmq_errno() == EINTR) {
if (fCtx.Interrupted()) {
return static_cast<int>(TransferCode::interrupted);
} else if (ShouldRetry(flags, timeout, elapsed)) {
} else if (zmq::ShouldRetry(flags, fTimeout, timeout, elapsed)) {
repeat = true;
break;
} else {
return static_cast<int>(TransferCode::timeout);
}
} else {
return HandleErrors();
return zmq::HandleErrors(fId);
}
}
@@ -274,14 +229,14 @@ class Socket final : public fair::mq::Socket
} else if (zmq_errno() == EAGAIN || zmq_errno() == EINTR) {
if (fCtx.Interrupted()) {
return static_cast<int>(TransferCode::interrupted);
} else if (ShouldRetry(flags, timeout, elapsed)) {
} else if (zmq::ShouldRetry(flags, fTimeout, timeout, elapsed)) {
repeat = true;
break;
} else {
return static_cast<int>(TransferCode::timeout);
}
} else {
return HandleErrors();
return zmq::HandleErrors(fId);
}
size_t moreSize = sizeof(more);

View File

@@ -118,6 +118,15 @@ class TransportFactory final : public FairMQTransportFactory
return CreateUnmanagedRegion(size, userFlags, nullptr, bulkCallback, path, flags, cfg);
}
UnmanagedRegionPtr CreateUnmanagedRegion(size_t size, RegionCallback callback, RegionConfig cfg) override
{
return CreateUnmanagedRegion(size, cfg.userFlags, callback, nullptr, cfg.path, cfg.creationFlags, cfg);
}
UnmanagedRegionPtr CreateUnmanagedRegion(size_t size, RegionBulkCallback bulkCallback, RegionConfig cfg) override
{
return CreateUnmanagedRegion(size, cfg.userFlags, nullptr, bulkCallback, cfg.path, cfg.creationFlags, cfg);
}
UnmanagedRegionPtr CreateUnmanagedRegion(size_t size, int64_t userFlags, RegionCallback callback, RegionBulkCallback bulkCallback, const std::string&, int /* flags */, fair::mq::RegionConfig cfg)
{
UnmanagedRegionPtr ptr = std::make_unique<UnmanagedRegion>(*fCtx, size, userFlags, callback, bulkCallback, this, cfg);

View File

@@ -100,6 +100,7 @@ add_testsuite(Message
add_testsuite(Region
SOURCES
${CMAKE_CURRENT_BINARY_DIR}/runner.cxx
region/_creation.cxx
region/_region.cxx
LINKS FairMQ

View File

@@ -37,8 +37,8 @@ class PollIn : public FairMQDevice
{
vector<FairMQChannel*> chans;
chans.push_back(&fChannels.at("data1").at(0));
chans.push_back(&fChannels.at("data2").at(0));
chans.push_back(&GetChannel("data1", 0));
chans.push_back(&GetChannel("data2", 0));
FairMQPollerPtr poller = nullptr;

92
test/region/_creation.cxx Normal file
View File

@@ -0,0 +1,92 @@
/********************************************************************************
* Copyright (C) 2014 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
* *
* This software is distributed under the terms of the *
* GNU Lesser General Public Licence (LGPL) version 3, *
* copied verbatim in the file "LICENSE" *
********************************************************************************/
#include <fairmq/shmem/Common.h>
#include <fairmq/shmem/UnmanagedRegion.h>
#include <fairmq/shmem/Segment.h>
#include <fairmq/ProgOptions.h>
#include <fairmq/tools/Unique.h>
#include <fairmq/TransportFactory.h>
#include <fairlogger/Logger.h>
#include <gtest/gtest.h>
#include <unistd.h>
#include <string>
namespace
{
using namespace std;
struct ShmRemover
{
ShmRemover(std::string _shmId) : shmId(std::move(_shmId)) {}
~ShmRemover() { fair::mq::shmem::Monitor::Cleanup(fair::mq::shmem::ShmId{shmId}); }
std::string shmId;
};
void Preallocate()
{
const string session = to_string(fair::mq::tools::UuidHash());
// generate shmId out of session id + user id (geteuid).
const string shmId = fair::mq::shmem::makeShmIdStr(session);
// const uint64_t shmId64 = 0;
// const string shmId = fair::mq::shmem::makeShmIdStr(shmId64);
// LOG(info) << shmId;
const uint16_t s1id = 0;
const uint64_t s1size = 10000000;
const uint16_t s2id = 1;
const uint64_t s2size = 20000000;
const uint16_t r1id = 0;
const uint64_t r1size = 10000000;
const uint16_t r2id = 1;
const uint64_t r2size = 20000000;
// cleanup when done
ShmRemover shmRemover(shmId);
// managed segments
fair::mq::shmem::Segment segment1(shmId, s1id, s1size, fair::mq::shmem::rbTreeBestFit);
segment1.Lock();
segment1.Zero();
LOG(info) << "Created segment " << s1id << " of size " << segment1.GetSize() << " starting at " << segment1.GetData();
fair::mq::shmem::Segment segment2(shmId, s2id, s2size, fair::mq::shmem::rbTreeBestFit);
segment2.Lock();
segment2.Zero();
LOG(info) << "Created segment " << s2id << " of size " << segment2.GetSize() << " starting at " << segment2.GetData();
// unmanaged regions
fair::mq::shmem::UnmanagedRegion region1(shmId, r1id, r1size);
region1.Lock();
region1.Zero();
LOG(info) << "Created region " << r1id << " of size " << region1.GetSize() << " starting at " << region1.GetData();
fair::mq::shmem::UnmanagedRegion region2(shmId, r2id, r2size);
region2.Lock();
region2.Zero();
LOG(info) << "Created region " << r2id << " of size " << region2.GetSize() << " starting at " << region2.GetData();
// for a "soft reset" call (shmem should not be in active use by (no messages in flight) devices during this call):
// fair::mq::shmem::Monitor::ResetContent(fair::mq::shmem::ShmId{shmId});
}
TEST(PreallocateInsideSession, shmem)
{
Preallocate();
}
}