artdaq  v3_08_00
TransferWrapper.cc
1 #include "artdaq/DAQdata/Globals.hh"
2 #define TRACE_NAME "TransferWrapper"
3 
4 #include "artdaq-core/Data/Fragment.hh"
5 #include "artdaq-core/Utilities/ExceptionHandler.hh"
6 #include "artdaq/ArtModules/detail/TransferWrapper.hh"
7 #include "artdaq/DAQdata/NetMonHeader.hh"
8 #include "artdaq/ExternalComms/MakeCommanderPlugin.hh"
9 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
10 
11 #include "cetlib/BasicPluginFactory.h"
12 #include "cetlib_except/exception.h"
13 #include "fhiclcpp/ParameterSet.h"
14 
15 #include <csignal>
16 #include <iostream>
17 #include <limits>
18 #include <sstream>
19 #include <string>
20 
21 namespace {
22 volatile std::sig_atomic_t gSignalStatus = 0;
23 }
24 
29 void signal_handler(int signal)
30 {
31  gSignalStatus = signal;
32 }
33 
34 artdaq::TransferWrapper::TransferWrapper(const fhicl::ParameterSet& pset)
35  : timeoutInUsecs_(pset.get<std::size_t>("timeoutInUsecs", 100000))
36  , transfer_(nullptr)
37  , commander_(nullptr)
38  , pset_(pset)
39  , dispatcherHost_(pset.get<std::string>("dispatcherHost", "localhost"))
40  , dispatcherPort_(pset.get<std::string>("dispatcherPort", "5266"))
41  , serverUrl_(pset.get<std::string>("server_url", "http://" + dispatcherHost_ + ":" + dispatcherPort_ + "/RPC2"))
42  , maxEventsBeforeInit_(pset.get<std::size_t>("maxEventsBeforeInit", 5))
43  , allowedFragmentTypes_(pset.get<std::vector<int>>("allowedFragmentTypes", {226, 227, 229}))
44  , runningStateTimeout_(pset.get<double>("dispatcherConnectTimeout", 0))
45  , runningStateInterval_us_(pset.get<size_t>("dispatcherConnectRetryInterval_us", 1000000))
46  , quitOnFragmentIntegrityProblem_(pset.get<bool>("quitOnFragmentIntegrityProblem", true))
47  , multi_run_mode_(pset.get<bool>("allowMultipleRuns", false))
48  , monitorRegistered_(false)
49 {
50  std::signal(SIGINT, signal_handler);
51 
52  try
53  {
54  if (metricMan)
55  {
56  metricMan->initialize(pset.get<fhicl::ParameterSet>("metrics", fhicl::ParameterSet()), "Online Monitor");
57  metricMan->do_start();
58  }
59  }
60  catch (...)
61  {
62  ExceptionHandler(ExceptionHandlerRethrow::no, "TransferWrapper: could not configure metrics");
63  }
64 
65  // Clamp possible values
66  if (runningStateInterval_us_ < 1000)
67  {
68  TLOG(TLVL_WARNING) << "Invalid value " << runningStateInterval_us_ << " us detected for dispatcherConnectRetryInterval_us. Setting to 1000 us";
69  runningStateInterval_us_ = 1000;
70  }
71  if (runningStateInterval_us_ > 30000000)
72  {
73  TLOG(TLVL_WARNING) << "Invalid value " << runningStateInterval_us_ << " us detected for dispatcherConnectRetryInterval_us. Setting to 30,000,000 us";
74  runningStateInterval_us_ = 30000000;
75  }
76 
77  fhicl::ParameterSet new_pset(pset);
78  if (!new_pset.has_key("server_url"))
79  {
80  new_pset.put<std::string>("server_url", serverUrl_);
81  }
82 
84  commander_ = MakeCommanderPlugin(new_pset, c);
85 }
86 
88 {
89  artdaq::FragmentPtrs fragmentPtrs;
90  bool receivedFragment = false;
91  static bool initialized = false;
92  static size_t fragments_received = 0;
93 
94  while (true && !gSignalStatus)
95  {
96  receivedFragment = false;
97  auto fragmentPtr = std::make_unique<artdaq::Fragment>();
98 
99  while (!receivedFragment)
100  {
101  if (gSignalStatus)
102  {
103  TLOG(TLVL_INFO) << "Ctrl-C appears to have been hit";
104  unregisterMonitor();
105  return fragmentPtrs;
106  }
107  if (!monitorRegistered_)
108  {
109  registerMonitor();
110  if (!monitorRegistered_) return fragmentPtrs;
111  }
112 
113  try
114  {
115  auto result = transfer_->receiveFragment(*fragmentPtr, timeoutInUsecs_);
116 
118  {
119  receivedFragment = true;
120  fragments_received++;
121 
122  static size_t cntr = 0;
123  auto mod = ++cntr % 10;
124  auto suffix = "-th";
125  if (mod == 1) suffix = "-st";
126  if (mod == 2) suffix = "-nd";
127  if (mod == 3) suffix = "-rd";
128  TLOG(TLVL_INFO) << "Received " << cntr << suffix << " event, "
129  << "seqID == " << fragmentPtr->sequenceID()
130  << ", type == " << fragmentPtr->typeString();
131  continue;
132  }
133  else if (result == artdaq::TransferInterface::DATA_END)
134  {
135  TLOG(TLVL_ERROR) << "Transfer Plugin disconnected or other unrecoverable error. Shutting down.";
136  unregisterMonitor();
137  initialized = false;
138  continue;
139  }
140  else
141  {
142  // 02-Jun-2018, KAB: added status/result printout
143  // to-do: add another else clause that explicitly checks for RECV_TIMEOUT
144  TLOG(TLVL_WARNING) << "Timeout occurred in call to transfer_->receiveFragmentFrom; will try again"
145  << ", status = " << result;
146  }
147  }
148  catch (...)
149  {
150  ExceptionHandler(ExceptionHandlerRethrow::yes,
151  "Problem receiving data in TransferWrapper::receiveMessage");
152  }
153  }
154 
155  if (fragmentPtr->type() == artdaq::Fragment::EndOfDataFragmentType)
156  {
157  //if (monitorRegistered_)
158  //{
159  // unregisterMonitor();
160  //}
161  if (multi_run_mode_)
162  {
163  unregisterMonitor();
164  initialized = false;
165  continue;
166  }
167  else
168  {
169  return fragmentPtrs;
170  }
171  }
172 
173  checkIntegrity(*fragmentPtr);
174 
175  if (initialized || fragmentPtr->type() == artdaq::Fragment::InitFragmentType)
176  {
177  initialized = true;
178  fragmentPtrs.push_back(std::move(fragmentPtr));
179  break;
180  }
181  else
182  {
183  receivedFragment = false;
184 
185  if (fragments_received > maxEventsBeforeInit_)
186  {
187  throw cet::exception("TransferWrapper") << "First " << maxEventsBeforeInit_ << " events received did not include the \"Init\" event containing necessary info for art; exiting...";
188  }
189  }
190  }
191 
192  return fragmentPtrs;
193 }
194 
195 std::unordered_map<artdaq::Fragment::type_t, std::unique_ptr<artdaq::Fragments>> artdaq::TransferWrapper::receiveMessages()
196 {
197  std::unordered_map<artdaq::Fragment::type_t, std::unique_ptr<artdaq::Fragments>> output;
198 
199  auto ptrs = receiveMessage();
200  for (auto& ptr : ptrs)
201  {
202  auto fragType = ptr->type();
203  auto fragPtr = ptr.release();
204  ptr.reset(nullptr);
205 
206  if (!output.count(fragType))
207  {
208  output[fragType].reset(new artdaq::Fragments());
209  }
210 
211  output[fragType]->emplace_back(std::move(*fragPtr));
212  }
213 
214  return output;
215 }
216 
217 void artdaq::TransferWrapper::checkIntegrity(const artdaq::Fragment& fragment) const
218 {
219  const size_t artdaqheader = artdaq::detail::RawFragmentHeader::num_words() *
220  sizeof(artdaq::detail::RawFragmentHeader::RawDataType);
221  const size_t payload = static_cast<size_t>(fragment.dataEndBytes() - fragment.dataBeginBytes());
222  const size_t metadata = sizeof(artdaq::NetMonHeader);
223  const size_t totalsize = fragment.sizeBytes();
224 
225  const size_t type = static_cast<size_t>(fragment.type());
226 
227  if (totalsize != artdaqheader + metadata + payload)
228  {
229  std::stringstream errmsg;
230  errmsg << "Error: artdaq fragment of type " << fragment.typeString() << ", sequence ID " << fragment.sequenceID() << " has internally inconsistent measures of its size, signalling data corruption: in bytes,"
231  << " total size = " << totalsize << ", artdaq fragment header = " << artdaqheader << ", metadata = " << metadata << ", payload = " << payload;
232 
233  TLOG(TLVL_ERROR) << errmsg.str();
234 
235  if (quitOnFragmentIntegrityProblem_)
236  {
237  throw cet::exception("TransferWrapper") << errmsg.str();
238  }
239  else
240  {
241  return;
242  }
243  }
244 
245  auto findloc = std::find(allowedFragmentTypes_.begin(), allowedFragmentTypes_.end(), static_cast<int>(type));
246 
247  if (findloc == allowedFragmentTypes_.end())
248  {
249  std::stringstream errmsg;
250  errmsg << "Error: artdaq fragment appears to have type "
251  << type << ", not found in the allowed fragment types list";
252 
253  TLOG(TLVL_ERROR) << errmsg.str();
254  if (quitOnFragmentIntegrityProblem_)
255  {
256  throw cet::exception("TransferWrapper") << errmsg.str();
257  }
258  else
259  {
260  return;
261  }
262  }
263 }
264 
265 void artdaq::TransferWrapper::registerMonitor()
266 {
267  try
268  {
269  transfer_.reset(nullptr);
270  transfer_ = MakeTransferPlugin(pset_, "transfer_plugin", TransferInterface::Role::kReceive);
271  }
272  catch (...)
273  {
274  ExceptionHandler(ExceptionHandlerRethrow::yes,
275  "TransferWrapper: failure in call to MakeTransferPlugin");
276  }
277 
278  auto start = std::chrono::steady_clock::now();
279  auto sts = getDispatcherStatus();
280  while (sts != "Running" && (runningStateTimeout_ == 0 || TimeUtils::GetElapsedTime(start) < runningStateTimeout_))
281  {
282  TLOG(TLVL_DEBUG) << "Dispatcher state: " << sts;
283  if (gSignalStatus)
284  {
285  TLOG(TLVL_INFO) << "Ctrl-C appears to have been hit";
286  return;
287  }
288  TLOG(TLVL_INFO) << "Waited " << std::fixed << std::setprecision(2) << TimeUtils::GetElapsedTime(start) << " s / " << runningStateTimeout_ << " s for Dispatcher to enter the Running state";
289  usleep(runningStateInterval_us_);
290  sts = getDispatcherStatus();
291  }
292  if (sts != "Running") return;
293 
294  auto dispatcherConfig = pset_.get<fhicl::ParameterSet>("dispatcher_config");
295 
296  int retry = 3;
297 
298  while (retry > 0)
299  {
300  TLOG(TLVL_INFO) << "Attempting to register this monitor (\"" << transfer_->uniqueLabel()
301  << "\") with the dispatcher aggregator";
302 
303  auto status = commander_->send_register_monitor(dispatcherConfig.to_string());
304 
305  TLOG(TLVL_INFO) << "Response from dispatcher is \"" << status << "\"";
306 
307  if (status == "Success")
308  {
309  monitorRegistered_ = true;
310  break;
311  }
312  else
313  {
314  TLOG(TLVL_WARNING) << "Error in TransferWrapper: attempt to register with dispatcher did not result in the \"Success\" response";
315  usleep(100000);
316  }
317  retry--;
318  }
319 }
320 
321 void artdaq::TransferWrapper::unregisterMonitor()
322 {
323  if (!monitorRegistered_)
324  {
325  TLOG(TLVL_WARNING) << "The function to unregister the monitor was called, but the monitor doesn't appear to be registered";
326  return;
327  }
328 
329  std::string sts = getDispatcherStatus();
330 
331  if (sts == "") return;
332 
333  if (sts != "Running" && sts != "Ready")
334  {
335  TLOG(TLVL_WARNING) << "The Dispatcher is not in the Running or Ready state, will not attempt to unregister";
336  return;
337  }
338 
339  int retry = 3;
340  while (retry > 0)
341  {
342  TLOG(TLVL_INFO) << "Requesting that this monitor (" << transfer_->uniqueLabel()
343  << ") be unregistered from the dispatcher aggregator";
344 
345  auto status = commander_->send_unregister_monitor(transfer_->uniqueLabel());
346 
347  TLOG(TLVL_INFO) << "Response from dispatcher is \"" << status << "\"";
348 
349  if (status == "Success")
350  {
351  break;
352  }
353  else if (status == "busy")
354  {}
355  else
356  {
357  TLOG(TLVL_WARNING) << "The Dispatcher returned status " << status << " when attempting to unregister this monitor!";
358  //throw cet::exception("TransferWrapper") << "Error in TransferWrapper: attempt to unregister with dispatcher did not result in the \"Success\" response";
359  }
360  retry--;
361  usleep(500000);
362  }
363  monitorRegistered_ = false;
364 }
365 
366 std::string artdaq::TransferWrapper::getDispatcherStatus()
367 {
368  try
369  {
370  return commander_->send_status();
371  }
372  catch (std::exception const& ex)
373  {
374  TLOG(TLVL_WARNING) << "An exception was thrown trying to collect the Dispatcher's status. Most likely cause is the application is no longer running.";
375  return "";
376  }
377 }
378 
380 {
381  if (monitorRegistered_)
382  {
383  try
384  {
385  unregisterMonitor();
386  }
387  catch (...)
388  {
389  ExceptionHandler(ExceptionHandlerRethrow::no,
390  "An exception occurred when trying to unregister monitor during TransferWrapper's destruction");
391  }
392  }
394 }
Commandable is the base class for all artdaq components which implement the artdaq state machine...
Definition: Commandable.hh:20
TransferWrapper(const fhicl::ParameterSet &pset)
TransferWrapper Constructor.
static void CleanUpGlobals()
Clean up statically-allocated Manager class instances.
Definition: Globals.hh:150
virtual ~TransferWrapper()
TransferWrapper Destructor.
artdaq::FragmentPtrs receiveMessage()
Receive a Fragment from the TransferInterface, and send it to art.
This TransferInterface is a Receiver.
std::unique_ptr< artdaq::TransferInterface > MakeTransferPlugin(const fhicl::ParameterSet &pset, std::string plugin_label, TransferInterface::Role role)
Load a TransferInterface plugin.
Header with length information for NetMonTransport messages.
Definition: NetMonHeader.hh:13
volatile std::sig_atomic_t gSignalStatus
Stores singal from signal handler.
std::unique_ptr< artdaq::CommanderInterface > MakeCommanderPlugin(const fhicl::ParameterSet &commander_pset, artdaq::Commandable &commandable)
Load a CommanderInterface plugin.
Value that is to be returned when a Transfer plugin determines that no more data will be arriving...
For code clarity, things checking for successful receive should check retval &gt;= NO_RANK_INFO.
std::unordered_map< artdaq::Fragment::type_t, std::unique_ptr< artdaq::Fragments > > receiveMessages()
Receive all messsages for an event from ArtdaqSharedMemoryService.