1 #include "artdaq/DAQdata/Globals.hh"
2 #define TRACE_NAME "TransferWrapper"
4 #include "artdaq-core/Data/Fragment.hh"
5 #include "artdaq-core/Utilities/ExceptionHandler.hh"
6 #include "artdaq-core/Utilities/TimeUtils.hh"
7 #include "artdaq/ArtModules/detail/TransferWrapper.hh"
8 #include "artdaq/DAQdata/NetMonHeader.hh"
9 #include "artdaq/ExternalComms/MakeCommanderPlugin.hh"
10 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
12 #include "cetlib/BasicPluginFactory.h"
13 #include "cetlib_except/exception.h"
14 #include "fhiclcpp/ParameterSet.h"
31 void signal_handler(
int signal)
33 gSignalStatus = signal;
37 : timeoutInUsecs_(pset.get<std::size_t>(
"timeoutInUsecs", 100000))
38 , last_received_data_()
39 , last_report_(std::chrono::steady_clock::now())
43 , dispatcherHost_(pset.get<std::string>(
"dispatcherHost",
"localhost"))
44 , dispatcherPort_(pset.get<std::string>(
"dispatcherPort",
"5266"))
45 , serverUrl_(pset.get<std::string>(
"server_url",
"http://" + dispatcherHost_ +
":" + dispatcherPort_ +
"/RPC2"))
46 , maxEventsBeforeInit_(pset.get<std::size_t>(
"maxEventsBeforeInit", 5))
47 , allowedFragmentTypes_(pset.get<std::vector<int>>(
"allowedFragmentTypes", {226, 227, 229}))
48 , runningStateTimeout_(pset.get<
double>(
"dispatcherConnectTimeout", 0))
49 , runningStateInterval_us_(pset.get<
size_t>(
"dispatcherConnectRetryInterval_us", 1000000))
50 , quitOnFragmentIntegrityProblem_(pset.get<
bool>(
"quitOnFragmentIntegrityProblem",
true))
51 , multi_run_mode_(pset.get<
bool>(
"allowMultipleRuns",
false))
52 , monitorRegistered_(
false)
54 std::signal(SIGINT, signal_handler);
60 metricMan->initialize(pset.get<fhicl::ParameterSet>(
"metrics", fhicl::ParameterSet()),
"Online Monitor");
61 metricMan->do_start();
66 ExceptionHandler(ExceptionHandlerRethrow::no,
"TransferWrapper: could not configure metrics");
70 if (runningStateInterval_us_ < 1000)
72 TLOG(TLVL_WARNING) <<
"Invalid value " << runningStateInterval_us_ <<
" us detected for dispatcherConnectRetryInterval_us. Setting to 1000 us";
73 runningStateInterval_us_ = 1000;
75 if (runningStateInterval_us_ > 30000000)
77 TLOG(TLVL_WARNING) <<
"Invalid value " << runningStateInterval_us_ <<
" us detected for dispatcherConnectRetryInterval_us. Setting to 30,000,000 us";
78 runningStateInterval_us_ = 30000000;
81 fhicl::ParameterSet new_pset(pset);
82 if (!new_pset.has_key(
"server_url"))
84 new_pset.put<std::string>(
"server_url", serverUrl_);
93 artdaq::FragmentPtrs fragmentPtrs;
94 bool receivedFragment =
false;
95 static bool initialized =
false;
96 static size_t fragments_received = 0;
98 while (gSignalStatus == 0)
100 receivedFragment =
false;
101 auto fragmentPtr = std::make_unique<artdaq::Fragment>();
103 while (!receivedFragment)
105 if (gSignalStatus != 0)
107 TLOG(TLVL_INFO) <<
"Ctrl-C appears to have been hit";
111 if (!monitorRegistered_)
114 if (!monitorRegistered_)
122 auto result = transfer_->receiveFragment(*fragmentPtr, timeoutInUsecs_);
126 receivedFragment =
true;
127 fragments_received++;
129 static size_t cntr = 0;
130 auto mod = ++cntr % 10;
144 TLOG(TLVL_INFO) <<
"Received " << cntr << suffix <<
" event, "
145 <<
"seqID == " << fragmentPtr->sequenceID()
146 <<
", type == " << fragmentPtr->typeString();
147 last_received_data_ = std::chrono::steady_clock::now();
152 TLOG(TLVL_ERROR) <<
"Transfer Plugin disconnected or other unrecoverable error. Shutting down.";
163 auto tlvl = TLVL_TRACE;
164 if (artdaq::TimeUtils::GetElapsedTime(last_report_) > 1.0 && artdaq::TimeUtils::GetElapsedTime(last_received_data_) > 1.0)
167 last_report_ = std::chrono::steady_clock::now();
170 auto last_received_milliseconds = artdaq::TimeUtils::GetElapsedTimeMilliseconds(last_received_data_);
174 TLOG(tlvl) <<
"Timeout occurred in call to transfer_->receiveFragmentFrom; will try again"
175 <<
", status = " << result <<
", last received data " << last_received_milliseconds <<
" ms ago.";
180 ExceptionHandler(ExceptionHandlerRethrow::yes,
181 "Problem receiving data in TransferWrapper::receiveMessage");
185 if (fragmentPtr->type() == artdaq::Fragment::EndOfDataFragmentType)
201 checkIntegrity(*fragmentPtr);
203 if (initialized || fragmentPtr->type() == artdaq::Fragment::InitFragmentType)
206 fragmentPtrs.push_back(std::move(fragmentPtr));
210 if (fragments_received > maxEventsBeforeInit_)
212 throw cet::exception(
"TransferWrapper") <<
"First " << maxEventsBeforeInit_ <<
" events received did not include the \"Init\" event containing necessary info for art; exiting...";
221 std::unordered_map<artdaq::Fragment::type_t, std::unique_ptr<artdaq::Fragments>> output;
223 auto ptrs = receiveMessage();
224 for (
auto& ptr : ptrs)
226 auto fragType = ptr->type();
227 auto fragPtr = ptr.release();
230 if (output.count(fragType) == 0u)
232 output[fragType] = std::make_unique<artdaq::Fragments>();
235 output[fragType]->emplace_back(std::move(*fragPtr));
241 void artdaq::TransferWrapper::checkIntegrity(
const artdaq::Fragment& fragment)
const
243 const size_t artdaqheader = artdaq::detail::RawFragmentHeader::num_words() *
244 sizeof(artdaq::detail::RawFragmentHeader::RawDataType);
245 const auto payload =
static_cast<size_t>(fragment.dataEndBytes() - fragment.dataBeginBytes());
247 const size_t totalsize = fragment.sizeBytes();
249 const auto type =
static_cast<size_t>(fragment.type());
251 if (totalsize != artdaqheader + metadata + payload)
253 std::stringstream errmsg;
254 errmsg <<
"Error: artdaq fragment of type " << fragment.typeString() <<
", sequence ID " << fragment.sequenceID() <<
" has internally inconsistent measures of its size, signalling data corruption: in bytes,"
255 <<
" total size = " << totalsize <<
", artdaq fragment header = " << artdaqheader <<
", metadata = " << metadata <<
", payload = " << payload;
257 TLOG(TLVL_ERROR) << errmsg.str();
259 if (quitOnFragmentIntegrityProblem_)
261 throw cet::exception(
"TransferWrapper") << errmsg.str();
267 auto findloc = std::find(allowedFragmentTypes_.begin(), allowedFragmentTypes_.end(),
static_cast<int>(type));
269 if (findloc == allowedFragmentTypes_.end())
271 std::stringstream errmsg;
272 errmsg <<
"Error: artdaq fragment appears to have type "
273 << type <<
", not found in the allowed fragment types list";
275 TLOG(TLVL_ERROR) << errmsg.str();
276 if (quitOnFragmentIntegrityProblem_)
278 throw cet::exception(
"TransferWrapper") << errmsg.str();
285 void artdaq::TransferWrapper::registerMonitor()
289 transfer_.reset(
nullptr);
294 ExceptionHandler(ExceptionHandlerRethrow::yes,
295 "TransferWrapper: failure in call to MakeTransferPlugin");
298 auto start = std::chrono::steady_clock::now();
299 auto sts = getDispatcherStatus();
300 while (sts !=
"Running" && (runningStateTimeout_ == 0 || TimeUtils::GetElapsedTime(start) < runningStateTimeout_))
302 TLOG(TLVL_DEBUG) <<
"Dispatcher state: " << sts;
303 if (gSignalStatus != 0)
305 TLOG(TLVL_INFO) <<
"Ctrl-C appears to have been hit";
308 TLOG(TLVL_INFO) <<
"Waited " << std::fixed << std::setprecision(2) << TimeUtils::GetElapsedTime(start) <<
" s / " << runningStateTimeout_ <<
" s for Dispatcher to enter the Running state (state=" << sts <<
")";
309 usleep(runningStateInterval_us_);
310 sts = getDispatcherStatus();
312 if (sts !=
"Running")
317 auto dispatcherConfig = pset_.get<fhicl::ParameterSet>(
"dispatcher_config");
323 TLOG(TLVL_INFO) <<
"Attempting to register this monitor (\"" << transfer_->uniqueLabel()
324 <<
"\") with the dispatcher aggregator";
326 auto status = commander_->send_register_monitor(dispatcherConfig.to_string());
328 TLOG(TLVL_INFO) <<
"Response from dispatcher is \"" << status <<
"\"";
330 if (status ==
"Success")
332 monitorRegistered_ =
true;
336 TLOG(TLVL_WARNING) <<
"Error in TransferWrapper: attempt to register with dispatcher did not result in the \"Success\" response";
343 void artdaq::TransferWrapper::unregisterMonitor()
345 if (!monitorRegistered_)
347 TLOG(TLVL_WARNING) <<
"The function to unregister the monitor was called, but the monitor doesn't appear to be registered";
351 auto start_time = std::chrono::steady_clock::now();
353 while (artdaq::TimeUtils::GetElapsedTime(start_time) < 5.0 && waiting)
355 std::string sts = getDispatcherStatus();
362 TLOG(TLVL_INFO) <<
"The Dispatcher returned \"busy\", will wait 0.5s and retry";
367 if (sts !=
"Running" && sts !=
"Ready")
369 TLOG(TLVL_WARNING) <<
"The Dispatcher is not in the Running or Ready state, will not attempt to unregister (state: " << sts <<
")";
376 TLOG(TLVL_WARNING) <<
"A timeout occurred waiting for the Dispatcher to leave the \"busy\" state, will not attempt to unregister";
383 TLOG(TLVL_INFO) <<
"Requesting that this monitor (" << transfer_->uniqueLabel()
384 <<
") be unregistered from the dispatcher aggregator";
386 auto status = commander_->send_unregister_monitor(transfer_->uniqueLabel());
388 TLOG(TLVL_INFO) <<
"Response from dispatcher is \"" << status <<
"\"";
390 if (status ==
"Success")
394 else if (status ==
"busy")
396 TLOG(TLVL_DEBUG) <<
"The Dispatcher returned \"busy\", will retry in 0.5s";
400 TLOG(TLVL_WARNING) <<
"The Dispatcher returned status " << status <<
" when attempting to unregister this monitor!";
407 TLOG(TLVL_INFO) <<
"Successfully unregistered the monitor from the Dispatcher";
408 monitorRegistered_ =
false;
411 std::string artdaq::TransferWrapper::getDispatcherStatus()
415 return commander_->send_status();
417 catch (std::exception
const& ex)
419 TLOG(TLVL_WARNING) <<
"An exception was thrown trying to collect the Dispatcher's status. Most likely cause is the application is no longer running.";
426 if (monitorRegistered_)
434 ExceptionHandler(ExceptionHandlerRethrow::no,
435 "An exception occurred when trying to unregister monitor during TransferWrapper's destruction");
Commandable is the base class for all artdaq components which implement the artdaq state machine...
TransferWrapper(const fhicl::ParameterSet &pset)
TransferWrapper Constructor.
static void CleanUpGlobals()
Clean up statically-allocated Manager class instances.
virtual ~TransferWrapper()
TransferWrapper Destructor.
std::unique_ptr< artdaq::TransferInterface > MakeTransferPlugin(const fhicl::ParameterSet &pset, const std::string &plugin_label, TransferInterface::Role role)
Load a TransferInterface plugin.
artdaq::FragmentPtrs receiveMessage()
Receive a Fragment from the TransferInterface, and send it to art.
This TransferInterface is a Receiver.
volatile std::sig_atomic_t gSignalStatus
Stores singal from signal handler.
std::unique_ptr< artdaq::CommanderInterface > MakeCommanderPlugin(const fhicl::ParameterSet &commander_pset, artdaq::Commandable &commandable)
Load a CommanderInterface plugin.
Value that is to be returned when a Transfer plugin determines that no more data will be arriving...
For code clarity, things checking for successful receive should check retval >= NO_RANK_INFO.
std::unordered_map< artdaq::Fragment::type_t, std::unique_ptr< artdaq::Fragments > > receiveMessages()
Receive all messsages for an event from ArtdaqSharedMemoryService.