artdaq  v3_12_02
CommandableFragmentGenerator.cc
1 #include "TRACE/tracemf.h" // Pre-empt TRACE/trace.h from Fragment.hh.
2 #include "artdaq/DAQdata/Globals.hh"
3 #define TRACE_NAME (app_name + "_CommandableFragmentGenerator").c_str() // include these 2 first -
4 
5 #include "artdaq/Generators/CommandableFragmentGenerator.hh"
6 
7 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
8 #include "artdaq-core/Data/Fragment.hh"
9 #include "artdaq-core/Utilities/TimeUtils.hh"
11 
12 #include "cetlib_except/exception.h"
13 #include "fhiclcpp/ParameterSet.h"
14 
15 #include <boost/exception/all.hpp>
16 #include <boost/lexical_cast.hpp>
17 #include <boost/thread.hpp>
18 
19 #include <sys/poll.h>
20 #include <algorithm>
21 #include <chrono>
22 #include <exception>
23 #include <fstream>
24 #include <iomanip>
25 #include <iostream>
26 #include <iterator>
27 #include <limits>
28 #include <memory>
29 #include <mutex>
30 #include <thread>
31 
32 #define TLVL_GETNEXT 35
33 #define TLVL_GETNEXT_VERBOSE 36
34 #define TLVL_CHECKSTOP 37
35 #define TLVL_EVCOUNTERINC 38
36 #define TLVL_GETDATALOOP 39
37 #define TLVL_GETDATALOOP_DATABUFFWAIT 40
38 #define TLVL_GETDATALOOP_VERBOSE 41
39 #define TLVL_WAITFORBUFFERREADY 42
40 #define TLVL_GETBUFFERSTATS 43
41 #define TLVL_CHECKDATABUFFER 44
42 #define TLVL_GETMONITORINGDATA 45
43 #define TLVL_APPLYREQUESTS 46
44 #define TLVL_SENDEMPTYFRAGMENTS 47
45 #define TLVL_CHECKWINDOWS 48
46 #define TLVL_EMPTYFRAGMENT 49
47 
49  : mutex_()
50  , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
51  , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
52  , isHardwareOK_(true)
53  , run_number_(-1)
54  , subrun_number_(-1)
55  , timeout_(std::numeric_limits<uint64_t>::max())
56  , timestamp_(std::numeric_limits<uint64_t>::max())
57  , should_stop_(true)
58  , exception_(false)
59  , latest_exception_report_("none")
60  , ev_counter_(1)
61  , board_id_(-1)
62  , sleep_on_stop_us_(0)
63 {
64  board_id_ = ps.get<int>("board_id");
65  instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(board_id_);
66 
67  auto fragment_ids = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
68 
69  TLOG(TLVL_DEBUG + 33) << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)";
70  int fragment_id = ps.get<int>("fragment_id", -99);
71 
72  if (fragment_id != -99)
73  {
74  if (!fragment_ids.empty())
75  {
76  latest_exception_report_ = R"(Error in CommandableFragmentGenerator: can't both define "fragment_id" and "fragment_ids" in FHiCL document)";
77  TLOG(TLVL_ERROR) << latest_exception_report_;
78  throw cet::exception(latest_exception_report_);
79  }
80 
81  fragment_ids.emplace_back(fragment_id);
82  }
83 
84  auto generated_fragments_per_event = ps.get<size_t>("generated_fragments_per_event", 1);
85  if (generated_fragments_per_event != fragment_ids.size())
86  {
87  latest_exception_report_ = R"(Error in CommandableFragmentGenerator: "generated_fragments_per_event" disagrees with size of "fragment_ids" list!)";
88  TLOG(TLVL_ERROR) << latest_exception_report_;
89  throw cet::exception(latest_exception_report_);
90  }
91 
92  for (auto& id : fragment_ids)
93  {
94  expectedTypes_[id] = artdaq::Fragment::EmptyFragmentType;
95  }
96 
97  sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
98 }
99 
101 {
102  joinThreads();
103 }
104 
106 {
107  should_stop_ = true;
108  TLOG(TLVL_DEBUG + 32) << "Joining monitoringThread";
109  try
110  {
111  if (monitoringThread_.joinable())
112  {
113  monitoringThread_.join();
114  }
115  }
116  catch (...)
117  {
118  // IGNORED
119  }
120  TLOG(TLVL_DEBUG + 32) << "joinThreads complete";
121 }
122 
124 {
125  bool result = true;
126 
127  if (check_stop()) usleep(sleep_on_stop_us_);
128  if (exception() || should_stop_) return false;
129 
130  if (!useMonitoringThread_ && monitoringInterval_ > 0)
131  {
132  TLOG(TLVL_GETNEXT) << "getNext: Checking whether to collect Monitoring Data";
133  auto now = std::chrono::steady_clock::now();
134 
135  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
136  {
137  TLOG(TLVL_GETNEXT) << "getNext: Collecting Monitoring Data";
138  isHardwareOK_ = checkHWStatus_();
139  TLOG(TLVL_GETNEXT) << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_;
140  lastMonitoringCall_ = now;
141  }
142  }
143 
144  try
145  {
146  std::lock_guard<std::mutex> lk(mutex_);
147  if (!isHardwareOK_)
148  {
149  TLOG(TLVL_ERROR) << "Stopping CFG because the hardware reports bad status!";
150  return false;
151  }
152  TLOG(TLVL_DEBUG + 33) << "getNext: Calling getNext_ w/ ev_counter()=" << ev_counter();
153  try
154  {
155  result = getNext_(output);
156  }
157  catch (...)
158  {
159  throw;
160  }
161  TLOG(TLVL_DEBUG + 33) << "getNext: Done with getNext_ - ev_counter() now " << ev_counter();
162  for (auto& dataIter : output)
163  {
164  TLOG(TLVL_GETNEXT_VERBOSE) << "getNext: getNext_() returned fragment with sequenceID = " << dataIter->sequenceID()
165  << ", type = " << dataIter->typeString() << ", id = " << std::to_string(dataIter->fragmentID())
166  << ", timestamp = " << dataIter->timestamp() << ", and sizeBytes = " << dataIter->sizeBytes();
167 
168  auto fragId = dataIter->fragmentID();
169  auto type = dataIter->type();
170 
171  // ELF, 2020 July 16: System Fragments are excluded from these checks
172  if (Fragment::isSystemFragmentType(type))
173  {
174  continue;
175  }
176 
177  if (!expectedTypes_.count(fragId))
178  {
179  TLOG(TLVL_ERROR) << "Received Fragment with Fragment ID " << fragId << ", which is not in the declared list of Fragment IDs! Aborting!";
180  return false;
181  }
182  if (expectedTypes_[fragId] == Fragment::EmptyFragmentType)
183  expectedTypes_[fragId] = type;
184  else if (expectedTypes_[fragId] != type)
185  {
186  TLOG(TLVL_WARNING) << "Received Fragment with Fragment ID " << fragId << " and type " << dataIter->typeString() << "(" << type << "), which does not match expected type for this ID (" << expectedTypes_[fragId] << ")";
187  }
188  }
189  }
190  catch (const cet::exception& e)
191  {
192  latest_exception_report_ = "cet::exception caught in getNext(): ";
193  latest_exception_report_.append(e.what());
194  TLOG(TLVL_ERROR) << "getNext: cet::exception caught: " << e;
195  set_exception(true);
196  return false;
197  }
198  catch (const boost::exception& e)
199  {
200  latest_exception_report_ = "boost::exception caught in getNext(): ";
201  latest_exception_report_.append(boost::diagnostic_information(e));
202  TLOG(TLVL_ERROR) << "getNext: boost::exception caught: " << boost::diagnostic_information(e);
203  set_exception(true);
204  return false;
205  }
206  catch (const std::exception& e)
207  {
208  latest_exception_report_ = "std::exception caught in getNext(): ";
209  latest_exception_report_.append(e.what());
210  TLOG(TLVL_ERROR) << "getNext: std::exception caught: " << e.what();
211  set_exception(true);
212  return false;
213  }
214  catch (...)
215  {
216  latest_exception_report_ = "Unknown exception caught in getNext().";
217  TLOG(TLVL_ERROR) << "getNext: unknown exception caught";
218  set_exception(true);
219  return false;
220  }
221 
222  if (!result)
223  {
224  TLOG(TLVL_DEBUG + 32) << "getNext: Either getNext_ or applyRequests returned false, stopping";
225  }
226 
227  if (metricMan && !output.empty())
228  {
229  auto timestamp = output.front()->timestamp();
230 
231  if (output.size() > 1)
232  { // Only bother sorting if >1 entry
233  for (auto& outputfrag : output)
234  {
235  if (outputfrag->timestamp() > timestamp)
236  {
237  timestamp = outputfrag->timestamp();
238  }
239  }
240  }
241 
242  metricMan->sendMetric("Last Timestamp", timestamp, "Ticks", 1, MetricMode::LastPoint);
243  }
244 
245  return result;
246 }
247 
249 {
250  TLOG(TLVL_CHECKSTOP) << "CFG::check_stop: should_stop=" << should_stop() << ", exception status =" << int(exception());
251 
252  if (!should_stop()) return false;
253 
254  return true;
255 }
256 
258 {
259  TLOG(TLVL_EVCOUNTERINC) << "ev_counter_inc: Incrementing ev_counter from " << ev_counter() << " by " << step;
260  return ev_counter_.fetch_add(step);
261 } // returns the prev value
262 
263 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
264 {
265  TLOG(TLVL_DEBUG + 33) << "Start Command received.";
266  if (run < 0)
267  {
268  TLOG(TLVL_ERROR) << "negative run number";
269  throw cet::exception("CommandableFragmentGenerator") << "negative run number"; // NOLINT(cert-err60-cpp)
270  }
271 
272  timeout_ = timeout;
273  timestamp_ = timestamp;
274  ev_counter_.store(1);
275 
276  should_stop_.store(false);
277  exception_.store(false);
278  run_number_ = run;
279  subrun_number_ = 1;
280  latest_exception_report_ = "none";
281 
282  start();
283 
284  std::unique_lock<std::mutex> lk(mutex_);
285  if (useMonitoringThread_) startMonitoringThread();
286  TLOG(TLVL_DEBUG + 33) << "Start Command complete.";
287 }
288 
289 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
290 {
291  TLOG(TLVL_DEBUG + 33) << "Stop Command received.";
292 
293  timeout_ = timeout;
294  timestamp_ = timestamp;
295 
296  stopNoMutex();
297  should_stop_.store(true);
298  std::unique_lock<std::mutex> lk(mutex_);
299  stop();
300 
301  joinThreads();
302  TLOG(TLVL_DEBUG + 33) << "Stop Command complete.";
303 }
304 
305 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
306 {
307  TLOG(TLVL_DEBUG + 33) << "Pause Command received.";
308  timeout_ = timeout;
309  timestamp_ = timestamp;
310 
311  pauseNoMutex();
312  should_stop_.store(true);
313  std::unique_lock<std::mutex> lk(mutex_);
314 
315  pause();
316 }
317 
318 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
319 {
320  TLOG(TLVL_DEBUG + 33) << "Resume Command received.";
321  timeout_ = timeout;
322  timestamp_ = timestamp;
323 
324  subrun_number_ += 1;
325  should_stop_ = false;
326 
327  // no lock required: thread not started yet
328  resume();
329 
330  std::unique_lock<std::mutex> lk(mutex_);
331  // if (useDataThread_) startDataThread();
332  // if (useMonitoringThread_) startMonitoringThread();
333  TLOG(TLVL_DEBUG + 33) << "Resume Command complete.";
334 }
335 
336 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
337 {
338  TLOG(TLVL_DEBUG + 33) << "Report Command received.";
339  std::lock_guard<std::mutex> lk(mutex_);
340 
341  // 14-May-2015, KAB: please see the comments associated with the report()
342  // methods in the CommandableFragmentGenerator.hh file for more information
343  // on the use of those methods in this method.
344 
345  // check if the child class has something meaningful for this request
346  std::string childReport = reportSpecific(which);
347  if (childReport.length() > 0) { return childReport; }
348 
349  // handle the requests that we can take care of at this level
350  if (which == "latest_exception")
351  {
352  return latest_exception_report_;
353  }
354 
355  // check if the child class has provided a catch-all report function
356  childReport = report();
357  if (childReport.length() > 0) { return childReport; }
358 
359  // ELF: 5/31/2019: Let BoardReaderCore's report handle this...
360  /*
361  // if we haven't been able to come up with any report so far, say so
362  std::string tmpString = "The \"" + which + "\" command is not ";
363  tmpString.append("currently supported by the ");
364  tmpString.append(metricsReportingInstanceName());
365  tmpString.append(" fragment generator.");
366  */
367  TLOG(TLVL_DEBUG + 33) << "Report Command complete.";
368  return ""; // tmpString;
369 }
370 
371 // Default implemenetations of state functions
373 {
374 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
375 }
376 
378 {
379 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
380 }
381 
383 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
384 }
385 
387 {
388 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
389  return "";
390 }
391 
392 std::string artdaq::CommandableFragmentGenerator::reportSpecific(std::string const& /*unused*/)
393 {
394 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
395  return "";
396 }
397 
399 {
400 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
401  return true;
402 }
403 
404 bool artdaq::CommandableFragmentGenerator::metaCommand(std::string const& /*unused*/, std::string const& /*unused*/)
405 {
406 #pragma message "Using default implementation of CommandableFragmentGenerator::metaCommand(std::string, std::string)"
407  return true;
408 }
409 
411 {
412  if (monitoringThread_.joinable())
413  {
414  monitoringThread_.join();
415  }
416  TLOG(TLVL_INFO) << "Starting Hardware Monitoring Thread";
417  try
418  {
419  monitoringThread_ = boost::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
420  char tname[16]; // Size 16 - see man page pthread_setname_np(3) and/or prctl(2)
421  snprintf(tname, sizeof(tname) - 1, "%d-CFGMon", my_rank); // NOLINT
422  tname[sizeof(tname) - 1] = '\0'; // assure term. snprintf is not too evil :)
423  auto handle = monitoringThread_.native_handle();
424  pthread_setname_np(handle, tname);
425  }
426  catch (const boost::exception& e)
427  {
428  TLOG(TLVL_ERROR) << "Caught boost::exception starting Hardware Monitoring thread: " << boost::diagnostic_information(e) << ", errno=" << errno;
429  throw cet::exception("ThreadError") << "Caught boost::exception starting Hardware Monitoring thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl;
430  }
431 }
432 
434 {
435  while (!should_stop())
436  {
437  if (should_stop() || monitoringInterval_ <= 0)
438  {
439  TLOG(TLVL_DEBUG + 32) << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
440  << " and monitoringInterval is " << monitoringInterval_ << ", returning";
441  return;
442  }
443  TLOG(TLVL_GETMONITORINGDATA) << "getMonitoringDataLoop: Determining whether to call checkHWStatus_";
444 
445  auto now = std::chrono::steady_clock::now();
446  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
447  {
448  isHardwareOK_ = checkHWStatus_();
449  TLOG(TLVL_GETMONITORINGDATA) << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_;
450  lastMonitoringCall_ = now;
451  }
452  usleep(monitoringInterval_ / 10);
453  }
454 }
CommandableFragmentGenerator(const fhicl::ParameterSet &ps)
CommandableFragmentGenerator Constructor.
virtual bool checkHWStatus_()
Check any relavent hardware status registers. Return false if an error condition exists that should h...
virtual ~CommandableFragmentGenerator()
CommandableFragmentGenerator Destructor.
void getMonitoringDataLoop()
This function regularly calls checkHWStatus_(), and sets the isHardwareOK flag accordingly.
artdaq::Fragment::fragment_id_t fragment_id() const
Get the Fragment ID of this Fragment generator.
std::string ReportCmd(std::string const &which="")
Get a report about a user-specified run-time quantity.
virtual bool metaCommand(std::string const &command, std::string const &arg)
The meta-command is used for implementing user-specific commands in a CommandableFragmentGenerator.
void StopCmd(uint64_t timeout, uint64_t timestamp)
Stop the CommandableFragmentGenerator.
void StartCmd(int run, uint64_t timeout, uint64_t timestamp)
Start the CommandableFragmentGenerator.
virtual void pauseNoMutex()
On call to PauseCmd, pauseNoMutex() is called prior to PauseCmd acquiring the mutex ...
bool check_stop()
Routine used by applyRequests to make sure that all outstanding requests have been fulfilled before r...
void ResumeCmd(uint64_t timeout, uint64_t timestamp)
Resume the CommandableFragmentGenerator.
bool getNext(FragmentPtrs &output) overridefinal
getNext calls either applyRequests or getNext_ to get any data that is ready to be sent to the EventB...
void PauseCmd(uint64_t timeout, uint64_t timestamp)
Pause the CommandableFragmentGenerator.
void startMonitoringThread()
Function that launches the monitoring thread (getMonitoringDataLoop())
virtual void pause()
If a CommandableFragmentGenerator subclass is reading from hardware, the implementation of pause() sh...
virtual void resume()
The subrun number will be incremented before a call to resume.
virtual std::string report()
Let&#39;s say that the contract with the report() functions is that they return a non-empty string if the...
size_t ev_counter_inc(size_t step=1)
Increment the event counter.
virtual std::string reportSpecific(std::string const &what)
Report the status of a specific quantity
void joinThreads()
Join any data-taking threads. Should be called when destructing CommandableFragmentGenerator.