artdaq  3.12.05
CommandableFragmentGenerator.cc
1 #include "TRACE/tracemf.h" // Pre-empt TRACE/trace.h from Fragment.hh.
2 #include "artdaq/DAQdata/Globals.hh"
3 #define TRACE_NAME (app_name + "_CommandableFragmentGenerator").c_str() // include these 2 first -
4 
5 #include "artdaq/Generators/CommandableFragmentGenerator.hh"
6 
7 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
8 #include "artdaq-core/Data/Fragment.hh"
9 #include "artdaq-core/Utilities/TimeUtils.hh"
11 
12 #include "cetlib_except/exception.h"
13 #include "fhiclcpp/ParameterSet.h"
14 
15 #include <boost/exception/all.hpp>
16 #include <boost/lexical_cast.hpp>
17 #include <boost/thread.hpp>
18 
19 #include <sys/poll.h>
20 #include <algorithm>
21 #include <chrono>
22 #include <exception>
23 #include <fstream>
24 #include <iomanip>
25 #include <iostream>
26 #include <iterator>
27 #include <limits>
28 #include <memory>
29 #include <mutex>
30 #include <thread>
31 
32 #define TLVL_GETNEXT 35
33 #define TLVL_GETNEXT_VERBOSE 36
34 #define TLVL_CHECKSTOP 37
35 #define TLVL_EVCOUNTERINC 38
36 #define TLVL_GETDATALOOP 39
37 #define TLVL_GETDATALOOP_DATABUFFWAIT 40
38 #define TLVL_GETDATALOOP_VERBOSE 41
39 #define TLVL_WAITFORBUFFERREADY 42
40 #define TLVL_GETBUFFERSTATS 43
41 #define TLVL_CHECKDATABUFFER 44
42 #define TLVL_GETMONITORINGDATA 45
43 #define TLVL_APPLYREQUESTS 46
44 #define TLVL_SENDEMPTYFRAGMENTS 47
45 #define TLVL_CHECKWINDOWS 48
46 #define TLVL_EMPTYFRAGMENT 49
47 
49  : mutex_()
50  , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
51  , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
52  , isHardwareOK_(true)
53  , run_number_(-1)
54  , subrun_number_(-1)
55  , timeout_(std::numeric_limits<uint64_t>::max())
56  , timestamp_(std::numeric_limits<uint64_t>::max())
57  , should_stop_(true)
58  , exception_(false)
59  , latest_exception_report_("none")
60  , ev_counter_(1)
61  , sleep_on_stop_us_(0)
62 {
63  auto fragment_ids = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
64 
65  TLOG(TLVL_DEBUG + 33) << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)";
66  int fragment_id = ps.get<int>("fragment_id", -99);
67 
68  if (fragment_id != -99)
69  {
70  if (!fragment_ids.empty())
71  {
72  latest_exception_report_ = R"(Error in CommandableFragmentGenerator: can't both define "fragment_id" and "fragment_ids" in FHiCL document)";
73  TLOG(TLVL_ERROR) << latest_exception_report_;
74  throw cet::exception(latest_exception_report_);
75  }
76 
77  fragment_ids.emplace_back(fragment_id);
78  }
79 
80  auto generated_fragments_per_event = ps.get<size_t>("generated_fragments_per_event", 1);
81  if (generated_fragments_per_event != fragment_ids.size())
82  {
83  latest_exception_report_ = R"(Error in CommandableFragmentGenerator: "generated_fragments_per_event" disagrees with size of "fragment_ids" list!)";
84  TLOG(TLVL_ERROR) << latest_exception_report_;
85  throw cet::exception(latest_exception_report_);
86  }
87 
88  int first_fragment_id = std::numeric_limits<int>::max();
89  for (auto& id : fragment_ids)
90  {
91  if (id < first_fragment_id) first_fragment_id = id;
92  expectedTypes_[id] = artdaq::Fragment::EmptyFragmentType;
93  }
94  instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(first_fragment_id);
95 
96  sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
97 }
98 
100 {
101  joinThreads();
102 }
103 
105 {
106  should_stop_ = true;
107  TLOG(TLVL_DEBUG + 32) << "Joining monitoringThread";
108  try
109  {
110  if (monitoringThread_.joinable())
111  {
112  monitoringThread_.join();
113  }
114  }
115  catch (...)
116  {
117  // IGNORED
118  }
119  TLOG(TLVL_DEBUG + 32) << "joinThreads complete";
120 }
121 
123 {
124  bool result = true;
125 
126  if (check_stop()) usleep(sleep_on_stop_us_);
127  if (exception() || should_stop_) return false;
128 
129  if (!useMonitoringThread_ && monitoringInterval_ > 0)
130  {
131  TLOG(TLVL_GETNEXT) << "getNext: Checking whether to collect Monitoring Data";
132  auto now = std::chrono::steady_clock::now();
133 
134  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
135  {
136  TLOG(TLVL_GETNEXT) << "getNext: Collecting Monitoring Data";
137  isHardwareOK_ = checkHWStatus_();
138  TLOG(TLVL_GETNEXT) << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_;
139  lastMonitoringCall_ = now;
140  }
141  }
142 
143  try
144  {
145  std::lock_guard<std::mutex> lk(mutex_);
146  if (!isHardwareOK_)
147  {
148  TLOG(TLVL_ERROR) << "Stopping CFG because the hardware reports bad status!";
149  return false;
150  }
151  TLOG(TLVL_DEBUG + 33) << "getNext: Calling getNext_ w/ ev_counter()=" << ev_counter();
152  try
153  {
154  result = getNext_(output);
155  }
156  catch (...)
157  {
158  throw;
159  }
160  TLOG(TLVL_DEBUG + 33) << "getNext: Done with getNext_ - ev_counter() now " << ev_counter();
161  for (auto& dataIter : output)
162  {
163  TLOG(TLVL_GETNEXT_VERBOSE) << "getNext: getNext_() returned fragment with sequenceID = " << dataIter->sequenceID()
164  << ", type = " << dataIter->typeString() << ", id = " << std::to_string(dataIter->fragmentID())
165  << ", timestamp = " << dataIter->timestamp() << ", and sizeBytes = " << dataIter->sizeBytes();
166 
167  auto fragId = dataIter->fragmentID();
168  auto type = dataIter->type();
169 
170  // ELF, 2020 July 16: System Fragments are excluded from these checks
171  if (Fragment::isSystemFragmentType(type))
172  {
173  continue;
174  }
175 
176  if (!expectedTypes_.count(fragId))
177  {
178  TLOG(TLVL_ERROR) << "Received Fragment with Fragment ID " << fragId << ", which is not in the declared list of Fragment IDs! Aborting!";
179  return false;
180  }
181  if (expectedTypes_[fragId] == Fragment::EmptyFragmentType)
182  expectedTypes_[fragId] = type;
183  else if (expectedTypes_[fragId] != type)
184  {
185  TLOG(TLVL_WARNING) << "Received Fragment with Fragment ID " << fragId << " and type " << dataIter->typeString() << "(" << type << "), which does not match expected type for this ID (" << expectedTypes_[fragId] << ")";
186  }
187  }
188  }
189  catch (const cet::exception& e)
190  {
191  latest_exception_report_ = "cet::exception caught in getNext(): ";
192  latest_exception_report_.append(e.what());
193  TLOG(TLVL_ERROR) << "getNext: cet::exception caught: " << e;
194  set_exception(true);
195  return false;
196  }
197  catch (const boost::exception& e)
198  {
199  latest_exception_report_ = "boost::exception caught in getNext(): ";
200  latest_exception_report_.append(boost::diagnostic_information(e));
201  TLOG(TLVL_ERROR) << "getNext: boost::exception caught: " << boost::diagnostic_information(e);
202  set_exception(true);
203  return false;
204  }
205  catch (const std::exception& e)
206  {
207  latest_exception_report_ = "std::exception caught in getNext(): ";
208  latest_exception_report_.append(e.what());
209  TLOG(TLVL_ERROR) << "getNext: std::exception caught: " << e.what();
210  set_exception(true);
211  return false;
212  }
213  catch (...)
214  {
215  latest_exception_report_ = "Unknown exception caught in getNext().";
216  TLOG(TLVL_ERROR) << "getNext: unknown exception caught";
217  set_exception(true);
218  return false;
219  }
220 
221  if (!result)
222  {
223  TLOG(TLVL_DEBUG + 32) << "getNext: Either getNext_ or applyRequests returned false, stopping";
224  }
225 
226  if (metricMan && !output.empty())
227  {
228  auto timestamp = output.front()->timestamp();
229 
230  if (output.size() > 1)
231  { // Only bother sorting if >1 entry
232  for (auto& outputfrag : output)
233  {
234  if (outputfrag->timestamp() > timestamp)
235  {
236  timestamp = outputfrag->timestamp();
237  }
238  }
239  }
240 
241  metricMan->sendMetric("Last Timestamp", timestamp, "Ticks", 1, MetricMode::LastPoint);
242  }
243 
244  return result;
245 }
246 
248 {
249  TLOG(TLVL_CHECKSTOP) << "CFG::check_stop: should_stop=" << should_stop() << ", exception status =" << int(exception());
250 
251  if (!should_stop()) return false;
252 
253  return true;
254 }
255 
257 {
258  TLOG(TLVL_EVCOUNTERINC) << "ev_counter_inc: Incrementing ev_counter from " << ev_counter() << " by " << step;
259  return ev_counter_.fetch_add(step);
260 } // returns the prev value
261 
262 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
263 {
264  TLOG(TLVL_DEBUG + 33) << "Start Command received.";
265  if (run < 0)
266  {
267  TLOG(TLVL_ERROR) << "negative run number";
268  throw cet::exception("CommandableFragmentGenerator") << "negative run number"; // NOLINT(cert-err60-cpp)
269  }
270 
271  timeout_ = timeout;
272  timestamp_ = timestamp;
273  ev_counter_.store(1);
274 
275  should_stop_.store(false);
276  exception_.store(false);
277  run_number_ = run;
278  subrun_number_ = 1;
279  latest_exception_report_ = "none";
280 
281  start();
282 
283  std::unique_lock<std::mutex> lk(mutex_);
284  if (useMonitoringThread_) startMonitoringThread();
285  TLOG(TLVL_DEBUG + 33) << "Start Command complete.";
286 }
287 
288 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
289 {
290  TLOG(TLVL_DEBUG + 33) << "Stop Command received.";
291 
292  timeout_ = timeout;
293  timestamp_ = timestamp;
294 
295  stopNoMutex();
296  should_stop_.store(true);
297  std::unique_lock<std::mutex> lk(mutex_);
298  stop();
299 
300  joinThreads();
301  TLOG(TLVL_DEBUG + 33) << "Stop Command complete.";
302 }
303 
304 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
305 {
306  TLOG(TLVL_DEBUG + 33) << "Pause Command received.";
307  timeout_ = timeout;
308  timestamp_ = timestamp;
309 
310  pauseNoMutex();
311  should_stop_.store(true);
312  std::unique_lock<std::mutex> lk(mutex_);
313 
314  pause();
315 }
316 
317 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
318 {
319  TLOG(TLVL_DEBUG + 33) << "Resume Command received.";
320  timeout_ = timeout;
321  timestamp_ = timestamp;
322 
323  subrun_number_ += 1;
324  should_stop_ = false;
325 
326  // no lock required: thread not started yet
327  resume();
328 
329  std::unique_lock<std::mutex> lk(mutex_);
330  // if (useDataThread_) startDataThread();
331  // if (useMonitoringThread_) startMonitoringThread();
332  TLOG(TLVL_DEBUG + 33) << "Resume Command complete.";
333 }
334 
335 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
336 {
337  TLOG(TLVL_DEBUG + 33) << "Report Command received.";
338  std::lock_guard<std::mutex> lk(mutex_);
339 
340  // 14-May-2015, KAB: please see the comments associated with the report()
341  // methods in the CommandableFragmentGenerator.hh file for more information
342  // on the use of those methods in this method.
343 
344  // check if the child class has something meaningful for this request
345  std::string childReport = reportSpecific(which);
346  if (childReport.length() > 0) { return childReport; }
347 
348  // handle the requests that we can take care of at this level
349  if (which == "latest_exception")
350  {
351  return latest_exception_report_;
352  }
353 
354  // check if the child class has provided a catch-all report function
355  childReport = report();
356  if (childReport.length() > 0) { return childReport; }
357 
358  // ELF: 5/31/2019: Let BoardReaderCore's report handle this...
359  /*
360  // if we haven't been able to come up with any report so far, say so
361  std::string tmpString = "The \"" + which + "\" command is not ";
362  tmpString.append("currently supported by the ");
363  tmpString.append(metricsReportingInstanceName());
364  tmpString.append(" fragment generator.");
365  */
366  TLOG(TLVL_DEBUG + 33) << "Report Command complete.";
367  return ""; // tmpString;
368 }
369 
370 // Default implemenetations of state functions
372 {
373 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
374 }
375 
377 {
378 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
379 }
380 
382 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
383 }
384 
386 {
387 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
388  return "";
389 }
390 
391 std::string artdaq::CommandableFragmentGenerator::reportSpecific(std::string const& /*unused*/)
392 {
393 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
394  return "";
395 }
396 
398 {
399 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
400  return true;
401 }
402 
403 bool artdaq::CommandableFragmentGenerator::metaCommand(std::string const& /*unused*/, std::string const& /*unused*/)
404 {
405 #pragma message "Using default implementation of CommandableFragmentGenerator::metaCommand(std::string, std::string)"
406  return true;
407 }
408 
410 {
411  if (monitoringThread_.joinable())
412  {
413  monitoringThread_.join();
414  }
415  TLOG(TLVL_INFO) << "Starting Hardware Monitoring Thread";
416  try
417  {
418  monitoringThread_ = boost::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
419  char tname[16]; // Size 16 - see man page pthread_setname_np(3) and/or prctl(2)
420  snprintf(tname, sizeof(tname) - 1, "%d-CFGMon", my_rank); // NOLINT
421  tname[sizeof(tname) - 1] = '\0'; // assure term. snprintf is not too evil :)
422  auto handle = monitoringThread_.native_handle();
423  pthread_setname_np(handle, tname);
424  }
425  catch (const boost::exception& e)
426  {
427  TLOG(TLVL_ERROR) << "Caught boost::exception starting Hardware Monitoring thread: " << boost::diagnostic_information(e) << ", errno=" << errno;
428  throw cet::exception("ThreadError") << "Caught boost::exception starting Hardware Monitoring thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl;
429  }
430 }
431 
433 {
434  while (!should_stop())
435  {
436  if (should_stop() || monitoringInterval_ <= 0)
437  {
438  TLOG(TLVL_DEBUG + 32) << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
439  << " and monitoringInterval is " << monitoringInterval_ << ", returning";
440  return;
441  }
442  TLOG(TLVL_GETMONITORINGDATA) << "getMonitoringDataLoop: Determining whether to call checkHWStatus_";
443 
444  auto now = std::chrono::steady_clock::now();
445  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
446  {
447  isHardwareOK_ = checkHWStatus_();
448  TLOG(TLVL_GETMONITORINGDATA) << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_;
449  lastMonitoringCall_ = now;
450  }
451  usleep(monitoringInterval_ / 10);
452  }
453 }
CommandableFragmentGenerator(const fhicl::ParameterSet &ps)
CommandableFragmentGenerator Constructor.
virtual bool checkHWStatus_()
Check any relavent hardware status registers. Return false if an error condition exists that should h...
virtual ~CommandableFragmentGenerator()
CommandableFragmentGenerator Destructor.
void getMonitoringDataLoop()
This function regularly calls checkHWStatus_(), and sets the isHardwareOK flag accordingly.
artdaq::Fragment::fragment_id_t fragment_id() const
Get the Fragment ID of this Fragment generator.
std::string ReportCmd(std::string const &which="")
Get a report about a user-specified run-time quantity.
virtual bool metaCommand(std::string const &command, std::string const &arg)
The meta-command is used for implementing user-specific commands in a CommandableFragmentGenerator.
void StopCmd(uint64_t timeout, uint64_t timestamp)
Stop the CommandableFragmentGenerator.
void StartCmd(int run, uint64_t timeout, uint64_t timestamp)
Start the CommandableFragmentGenerator.
virtual void pauseNoMutex()
On call to PauseCmd, pauseNoMutex() is called prior to PauseCmd acquiring the mutex ...
bool check_stop()
Routine used by applyRequests to make sure that all outstanding requests have been fulfilled before r...
void ResumeCmd(uint64_t timeout, uint64_t timestamp)
Resume the CommandableFragmentGenerator.
bool getNext(FragmentPtrs &output) overridefinal
getNext calls either applyRequests or getNext_ to get any data that is ready to be sent to the EventB...
void PauseCmd(uint64_t timeout, uint64_t timestamp)
Pause the CommandableFragmentGenerator.
void startMonitoringThread()
Function that launches the monitoring thread (getMonitoringDataLoop())
virtual void pause()
If a CommandableFragmentGenerator subclass is reading from hardware, the implementation of pause() sh...
virtual void resume()
The subrun number will be incremented before a call to resume.
virtual std::string report()
Let&#39;s say that the contract with the report() functions is that they return a non-empty string if the...
size_t ev_counter_inc(size_t step=1)
Increment the event counter.
virtual std::string reportSpecific(std::string const &what)
Report the status of a specific quantity
void joinThreads()
Join any data-taking threads. Should be called when destructing CommandableFragmentGenerator.