artdaq  v3_09_06a
CommandableFragmentGenerator.cc
1 #include "artdaq/DAQdata/Globals.hh"
2 #define TRACE_NAME (app_name + "_CommandableFragmentGenerator").c_str() // include these 2 first -
3 
4 #include "artdaq/Generators/CommandableFragmentGenerator.hh"
5 
6 #include <boost/exception/all.hpp>
7 #include <boost/throw_exception.hpp>
8 
9 #include <iterator>
10 #include <limits>
11 #include <thread>
12 
13 #include "canvas/Utilities/Exception.h"
14 #include "cetlib_except/exception.h"
15 #include "fhiclcpp/ParameterSet.h"
16 
17 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
18 #include "artdaq-core/Data/Fragment.hh"
19 #include "artdaq-core/Utilities/ExceptionHandler.hh"
20 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
21 #include "artdaq-core/Utilities/TimeUtils.hh"
22 
23 #include <sys/poll.h>
24 #include <algorithm>
25 #include <fstream>
26 #include <iomanip>
27 #include <iostream>
28 #include <iterator>
29 #include <memory>
31 
32 #define TLVL_GETNEXT 10
33 #define TLVL_GETNEXT_VERBOSE 20
34 #define TLVL_CHECKSTOP 11
35 #define TLVL_EVCOUNTERINC 12
36 #define TLVL_GETDATALOOP 13
37 #define TLVL_GETDATALOOP_DATABUFFWAIT 21
38 #define TLVL_GETDATALOOP_VERBOSE 20
39 #define TLVL_WAITFORBUFFERREADY 15
40 #define TLVL_GETBUFFERSTATS 16
41 #define TLVL_CHECKDATABUFFER 17
42 #define TLVL_GETMONITORINGDATA 18
43 #define TLVL_APPLYREQUESTS 9
44 #define TLVL_SENDEMPTYFRAGMENTS 19
45 #define TLVL_CHECKWINDOWS 14
46 #define TLVL_EMPTYFRAGMENT 22
47 
49  : mutex_()
50  , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
51  , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
52  , isHardwareOK_(true)
53  , run_number_(-1)
54  , subrun_number_(-1)
55  , timeout_(std::numeric_limits<uint64_t>::max())
56  , timestamp_(std::numeric_limits<uint64_t>::max())
57  , should_stop_(true)
58  , exception_(false)
59  , latest_exception_report_("none")
60  , ev_counter_(1)
61  , board_id_(-1)
62  , sleep_on_stop_us_(0)
63 {
64  board_id_ = ps.get<int>("board_id");
65  instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(board_id_);
66 
67  auto fragment_ids = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
68 
69  TLOG(TLVL_TRACE) << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)";
70  int fragment_id = ps.get<int>("fragment_id", -99);
71 
72  if (fragment_id != -99)
73  {
74  if (!fragment_ids.empty())
75  {
76  latest_exception_report_ = R"(Error in CommandableFragmentGenerator: can't both define "fragment_id" and "fragment_ids" in FHiCL document)";
77  TLOG(TLVL_ERROR) << latest_exception_report_;
78  throw cet::exception(latest_exception_report_);
79  }
80 
81  fragment_ids.emplace_back(fragment_id);
82  }
83 
84  for (auto& id : fragment_ids)
85  {
86  expectedTypes_[id] = artdaq::Fragment::EmptyFragmentType;
87  }
88 
89  sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
90 }
91 
93 {
94  joinThreads();
95 }
96 
98 {
99  should_stop_ = true;
100  TLOG(TLVL_DEBUG) << "Joining monitoringThread";
101  try
102  {
103  if (monitoringThread_.joinable())
104  {
105  monitoringThread_.join();
106  }
107  }
108  catch (...)
109  {
110  // IGNORED
111  }
112  TLOG(TLVL_DEBUG) << "joinThreads complete";
113 }
114 
116 {
117  bool result = true;
118 
119  if (check_stop()) usleep(sleep_on_stop_us_);
120  if (exception() || should_stop_) return false;
121 
122  if (!useMonitoringThread_ && monitoringInterval_ > 0)
123  {
124  TLOG(TLVL_GETNEXT) << "getNext: Checking whether to collect Monitoring Data";
125  auto now = std::chrono::steady_clock::now();
126 
127  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
128  {
129  TLOG(TLVL_GETNEXT) << "getNext: Collecting Monitoring Data";
130  isHardwareOK_ = checkHWStatus_();
131  TLOG(TLVL_GETNEXT) << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_;
132  lastMonitoringCall_ = now;
133  }
134  }
135 
136  try
137  {
138  std::lock_guard<std::mutex> lk(mutex_);
139  if (!isHardwareOK_)
140  {
141  TLOG(TLVL_ERROR) << "Stopping CFG because the hardware reports bad status!";
142  return false;
143  }
144  TLOG(TLVL_TRACE) << "getNext: Calling getNext_ w/ ev_counter()=" << ev_counter();
145  try
146  {
147  result = getNext_(output);
148  }
149  catch (...)
150  {
151  throw;
152  }
153  TLOG(TLVL_TRACE) << "getNext: Done with getNext_ - ev_counter() now " << ev_counter();
154  for (auto& dataIter : output)
155  {
156  TLOG(TLVL_GETNEXT_VERBOSE) << "getNext: getNext_() returned fragment with sequenceID = " << dataIter->sequenceID()
157  << ", type = " << dataIter->typeString() << ", id = " << std::to_string(dataIter->fragmentID())
158  << ", timestamp = " << dataIter->timestamp() << ", and sizeBytes = " << dataIter->sizeBytes();
159 
160  auto fragId = dataIter->fragmentID();
161  auto type = dataIter->type();
162 
163  // ELF, 2020 July 16: System Fragments are excluded from these checks
164  if (Fragment::isSystemFragmentType(type))
165  {
166  continue;
167  }
168 
169  if (!expectedTypes_.count(fragId))
170  {
171  TLOG(TLVL_ERROR) << "Received Fragment with Fragment ID " << fragId << ", which is not in the declared list of Fragment IDs! Aborting!";
172  return false;
173  }
174  if (expectedTypes_[fragId] == Fragment::EmptyFragmentType)
175  expectedTypes_[fragId] = type;
176  else if (expectedTypes_[fragId] != type)
177  {
178  TLOG(TLVL_WARNING) << "Received Fragment with Fragment ID " << fragId << " and type " << dataIter->typeString() << "(" << type << "), which does not match expected type for this ID (" << expectedTypes_[fragId] << ")";
179  }
180  }
181  }
182  catch (const cet::exception& e)
183  {
184  latest_exception_report_ = "cet::exception caught in getNext(): ";
185  latest_exception_report_.append(e.what());
186  TLOG(TLVL_ERROR) << "getNext: cet::exception caught: " << e;
187  set_exception(true);
188  return false;
189  }
190  catch (const boost::exception& e)
191  {
192  latest_exception_report_ = "boost::exception caught in getNext(): ";
193  latest_exception_report_.append(boost::diagnostic_information(e));
194  TLOG(TLVL_ERROR) << "getNext: boost::exception caught: " << boost::diagnostic_information(e);
195  set_exception(true);
196  return false;
197  }
198  catch (const std::exception& e)
199  {
200  latest_exception_report_ = "std::exception caught in getNext(): ";
201  latest_exception_report_.append(e.what());
202  TLOG(TLVL_ERROR) << "getNext: std::exception caught: " << e.what();
203  set_exception(true);
204  return false;
205  }
206  catch (...)
207  {
208  latest_exception_report_ = "Unknown exception caught in getNext().";
209  TLOG(TLVL_ERROR) << "getNext: unknown exception caught";
210  set_exception(true);
211  return false;
212  }
213 
214  if (!result)
215  {
216  TLOG(TLVL_DEBUG) << "getNext: Either getNext_ or applyRequests returned false, stopping";
217  }
218 
219  if (metricMan && !output.empty())
220  {
221  auto timestamp = output.front()->timestamp();
222 
223  if (output.size() > 1)
224  { // Only bother sorting if >1 entry
225  for (auto& outputfrag : output)
226  {
227  if (outputfrag->timestamp() > timestamp)
228  {
229  timestamp = outputfrag->timestamp();
230  }
231  }
232  }
233 
234  metricMan->sendMetric("Last Timestamp", timestamp, "Ticks", 1, MetricMode::LastPoint);
235  }
236 
237  return result;
238 }
239 
241 {
242  TLOG(TLVL_CHECKSTOP) << "CFG::check_stop: should_stop=" << should_stop() << ", exception status =" << int(exception());
243 
244  if (!should_stop()) return false;
245 
246  return true;
247 }
248 
250 {
251  TLOG(TLVL_EVCOUNTERINC) << "ev_counter_inc: Incrementing ev_counter from " << ev_counter() << " by " << step;
252  return ev_counter_.fetch_add(step);
253 } // returns the prev value
254 
255 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
256 {
257  TLOG(TLVL_TRACE) << "Start Command received.";
258  if (run < 0)
259  {
260  TLOG(TLVL_ERROR) << "negative run number";
261  throw cet::exception("CommandableFragmentGenerator") << "negative run number"; // NOLINT(cert-err60-cpp)
262  }
263 
264  timeout_ = timeout;
265  timestamp_ = timestamp;
266  ev_counter_.store(1);
267 
268  should_stop_.store(false);
269  exception_.store(false);
270  run_number_ = run;
271  subrun_number_ = 1;
272  latest_exception_report_ = "none";
273 
274  start();
275 
276  std::unique_lock<std::mutex> lk(mutex_);
277  if (useMonitoringThread_) startMonitoringThread();
278  TLOG(TLVL_TRACE) << "Start Command complete.";
279 }
280 
281 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
282 {
283  TLOG(TLVL_TRACE) << "Stop Command received.";
284 
285  timeout_ = timeout;
286  timestamp_ = timestamp;
287 
288  stopNoMutex();
289  should_stop_.store(true);
290  std::unique_lock<std::mutex> lk(mutex_);
291  stop();
292 
293  joinThreads();
294  TLOG(TLVL_TRACE) << "Stop Command complete.";
295 }
296 
297 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
298 {
299  TLOG(TLVL_TRACE) << "Pause Command received.";
300  timeout_ = timeout;
301  timestamp_ = timestamp;
302 
303  pauseNoMutex();
304  should_stop_.store(true);
305  std::unique_lock<std::mutex> lk(mutex_);
306 
307  pause();
308 }
309 
310 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
311 {
312  TLOG(TLVL_TRACE) << "Resume Command received.";
313  timeout_ = timeout;
314  timestamp_ = timestamp;
315 
316  subrun_number_ += 1;
317  should_stop_ = false;
318 
319  // no lock required: thread not started yet
320  resume();
321 
322  std::unique_lock<std::mutex> lk(mutex_);
323  //if (useDataThread_) startDataThread();
324  //if (useMonitoringThread_) startMonitoringThread();
325  TLOG(TLVL_TRACE) << "Resume Command complete.";
326 }
327 
328 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
329 {
330  TLOG(TLVL_TRACE) << "Report Command received.";
331  std::lock_guard<std::mutex> lk(mutex_);
332 
333  // 14-May-2015, KAB: please see the comments associated with the report()
334  // methods in the CommandableFragmentGenerator.hh file for more information
335  // on the use of those methods in this method.
336 
337  // check if the child class has something meaningful for this request
338  std::string childReport = reportSpecific(which);
339  if (childReport.length() > 0) { return childReport; }
340 
341  // handle the requests that we can take care of at this level
342  if (which == "latest_exception")
343  {
344  return latest_exception_report_;
345  }
346 
347  // check if the child class has provided a catch-all report function
348  childReport = report();
349  if (childReport.length() > 0) { return childReport; }
350 
351  // ELF: 5/31/2019: Let BoardReaderCore's report handle this...
352  /*
353  // if we haven't been able to come up with any report so far, say so
354  std::string tmpString = "The \"" + which + "\" command is not ";
355  tmpString.append("currently supported by the ");
356  tmpString.append(metricsReportingInstanceName());
357  tmpString.append(" fragment generator.");
358  */
359  TLOG(TLVL_TRACE) << "Report Command complete.";
360  return ""; //tmpString;
361 }
362 
363 // Default implemenetations of state functions
365 {
366 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
367 }
368 
370 {
371 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
372 }
373 
375 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
376 }
377 
379 {
380 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
381  return "";
382 }
383 
384 std::string artdaq::CommandableFragmentGenerator::reportSpecific(std::string const& /*unused*/)
385 {
386 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
387  return "";
388 }
389 
391 {
392 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
393  return true;
394 }
395 
396 bool artdaq::CommandableFragmentGenerator::metaCommand(std::string const& /*unused*/, std::string const& /*unused*/)
397 {
398 #pragma message "Using default implementation of CommandableFragmentGenerator::metaCommand(std::string, std::string)"
399  return true;
400 }
401 
403 {
404  if (monitoringThread_.joinable())
405  {
406  monitoringThread_.join();
407  }
408  TLOG(TLVL_INFO) << "Starting Hardware Monitoring Thread";
409  try
410  {
411  monitoringThread_ = boost::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
412  char tname[16]; // Size 16 - see man page pthread_setname_np(3) and/or prctl(2)
413  snprintf(tname, sizeof(tname) - 1, "%d-CFGMon", my_rank); // NOLINT
414  tname[sizeof(tname) - 1] = '\0'; // assure term. snprintf is not too evil :)
415  auto handle = monitoringThread_.native_handle();
416  pthread_setname_np(handle, tname);
417  }
418  catch (const boost::exception& e)
419  {
420  TLOG(TLVL_ERROR) << "Caught boost::exception starting Hardware Monitoring thread: " << boost::diagnostic_information(e) << ", errno=" << errno;
421  throw cet::exception("ThreadError") << "Caught boost::exception starting Hardware Monitoring thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl;
422  }
423 }
424 
426 {
427  while (!should_stop())
428  {
429  if (should_stop() || monitoringInterval_ <= 0)
430  {
431  TLOG(TLVL_DEBUG) << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
432  << " and monitoringInterval is " << monitoringInterval_ << ", returning";
433  return;
434  }
435  TLOG(TLVL_GETMONITORINGDATA) << "getMonitoringDataLoop: Determining whether to call checkHWStatus_";
436 
437  auto now = std::chrono::steady_clock::now();
438  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
439  {
440  isHardwareOK_ = checkHWStatus_();
441  TLOG(TLVL_GETMONITORINGDATA) << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_;
442  lastMonitoringCall_ = now;
443  }
444  usleep(monitoringInterval_ / 10);
445  }
446 }
CommandableFragmentGenerator(const fhicl::ParameterSet &ps)
CommandableFragmentGenerator Constructor.
virtual bool checkHWStatus_()
Check any relavent hardware status registers. Return false if an error condition exists that should h...
virtual ~CommandableFragmentGenerator()
CommandableFragmentGenerator Destructor.
void getMonitoringDataLoop()
This function regularly calls checkHWStatus_(), and sets the isHardwareOK flag accordingly.
artdaq::Fragment::fragment_id_t fragment_id() const
Get the Fragment ID of this Fragment generator.
std::string ReportCmd(std::string const &which="")
Get a report about a user-specified run-time quantity.
virtual bool metaCommand(std::string const &command, std::string const &arg)
The meta-command is used for implementing user-specific commands in a CommandableFragmentGenerator.
void StopCmd(uint64_t timeout, uint64_t timestamp)
Stop the CommandableFragmentGenerator.
void StartCmd(int run, uint64_t timeout, uint64_t timestamp)
Start the CommandableFragmentGenerator.
virtual void pauseNoMutex()
On call to PauseCmd, pauseNoMutex() is called prior to PauseCmd acquiring the mutex ...
bool check_stop()
Routine used by applyRequests to make sure that all outstanding requests have been fulfilled before r...
void ResumeCmd(uint64_t timeout, uint64_t timestamp)
Resume the CommandableFragmentGenerator.
bool getNext(FragmentPtrs &output) overridefinal
getNext calls either applyRequests or getNext_ to get any data that is ready to be sent to the EventB...
void PauseCmd(uint64_t timeout, uint64_t timestamp)
Pause the CommandableFragmentGenerator.
void startMonitoringThread()
Function that launches the monitoring thread (getMonitoringDataLoop())
virtual void pause()
If a CommandableFragmentGenerator subclass is reading from hardware, the implementation of pause() sh...
virtual void resume()
The subrun number will be incremented before a call to resume.
virtual std::string report()
Let&#39;s say that the contract with the report() functions is that they return a non-empty string if the...
size_t ev_counter_inc(size_t step=1)
Increment the event counter.
virtual std::string reportSpecific(std::string const &what)
Report the status of a specific quantity
void joinThreads()
Join any data-taking threads. Should be called when destructing CommandableFragmentGenerator.