artdaq  v3_02_01
CommandableFragmentGenerator.cc
1 #define TRACE_NAME (app_name + "_CommandableFragmentGenerator").c_str() // include these 2 first -
2 #include "artdaq/DAQdata/Globals.hh"
3 
4 #include "artdaq/Application/CommandableFragmentGenerator.hh"
5 
6 #include <boost/exception/all.hpp>
7 #include <boost/throw_exception.hpp>
8 
9 #include <limits>
10 #include <iterator>
11 
12 #include "canvas/Utilities/Exception.h"
13 #include "cetlib_except/exception.h"
14 #include "fhiclcpp/ParameterSet.h"
15 
16 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
17 #include "artdaq-core/Data/Fragment.hh"
18 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
19 #include "artdaq-core/Utilities/ExceptionHandler.hh"
20 #include "artdaq-core/Utilities/TimeUtils.hh"
21 
22 #include <fstream>
23 #include <iomanip>
24 #include <iterator>
25 #include <iostream>
26 #include <iomanip>
27 #include <algorithm>
28 #include <sys/poll.h>
30 
31 #define TLVL_GETNEXT 10
32 #define TLVL_GETNEXT_VERBOSE 20
33 #define TLVL_CHECKSTOP 11
34 #define TLVL_EVCOUNTERINC 12
35 #define TLVL_GETDATALOOP 13
36 #define TLVL_GETDATALOOP_DATABUFFWAIT 21
37 #define TLVL_GETDATALOOP_VERBOSE 20
38 #define TLVL_WAITFORBUFFERREADY 15
39 #define TLVL_GETBUFFERSTATS 16
40 #define TLVL_CHECKDATABUFFER 17
41 #define TLVL_GETMONITORINGDATA 18
42 #define TLVL_APPLYREQUESTS 9
43 #define TLVL_SENDEMPTYFRAGMENTS 19
44 #define TLVL_CHECKWINDOWS 14
45 
47  : mutex_()
48  , requestReceiver_(nullptr)
49  , windowOffset_(0)
50  , windowWidth_(0)
51  , staleTimeout_(Fragment::InvalidTimestamp)
52  , expectedType_(Fragment::EmptyFragmentType)
53  , maxFragmentCount_(std::numeric_limits<size_t>::max())
54  , uniqueWindows_(true)
55  , windows_sent_ooo_()
56  , missing_request_window_timeout_us_(1000000)
57  , window_close_timeout_us_(2000000)
58  , useDataThread_(false)
59  , sleep_on_no_data_us_(0)
60  , data_thread_running_(false)
61  , dataBufferDepthFragments_(0)
62  , dataBufferDepthBytes_(0)
63  , maxDataBufferDepthFragments_(1000)
64  , maxDataBufferDepthBytes_(1000)
65  , useMonitoringThread_(false)
66  , monitoringInterval_(0)
67  , lastMonitoringCall_()
68  , isHardwareOK_(true)
69  , dataBuffer_()
70  , newDataBuffer_()
71  , run_number_(-1)
72  , subrun_number_(-1)
73  , timeout_(std::numeric_limits<uint64_t>::max())
74  , timestamp_(std::numeric_limits<uint64_t>::max())
75  , should_stop_(false)
76  , exception_(false)
77  , force_stop_(false)
78  , latest_exception_report_("none")
79  , ev_counter_(1)
80  , board_id_(-1)
81  , instance_name_for_metrics_("FragmentGenerator")
82  , sleep_on_stop_us_(0)
83 {}
84 
86  : mutex_()
87  , requestReceiver_(nullptr)
88  , windowOffset_(ps.get<Fragment::timestamp_t>("request_window_offset", 0))
89  , windowWidth_(ps.get<Fragment::timestamp_t>("request_window_width", 0))
90  , staleTimeout_(ps.get<Fragment::timestamp_t>("stale_request_timeout", 0xFFFFFFFF))
91  , expectedType_(ps.get<Fragment::type_t>("expected_fragment_type", Fragment::type_t(Fragment::EmptyFragmentType)))
92  , uniqueWindows_(ps.get<bool>("request_windows_are_unique", true))
93  , windows_sent_ooo_()
94  , missing_request_window_timeout_us_(ps.get<size_t>("missing_request_window_timeout_us", 5000000))
95  , window_close_timeout_us_(ps.get<size_t>("window_close_timeout_us", 2000000))
96  , useDataThread_(ps.get<bool>("separate_data_thread", false))
97  , sleep_on_no_data_us_(ps.get<size_t>("sleep_on_no_data_us", 0))
98  , data_thread_running_(false)
99  , dataBufferDepthFragments_(0)
100  , dataBufferDepthBytes_(0)
101  , maxDataBufferDepthFragments_(ps.get<int>("data_buffer_depth_fragments", 1000))
102  , maxDataBufferDepthBytes_(ps.get<size_t>("data_buffer_depth_mb", 1000) * 1024 * 1024)
103  , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
104  , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
105  , lastMonitoringCall_()
106  , isHardwareOK_(true)
107  , dataBuffer_()
108  , newDataBuffer_()
109  , run_number_(-1)
110  , subrun_number_(-1)
111  , timeout_(std::numeric_limits<uint64_t>::max())
112  , timestamp_(std::numeric_limits<uint64_t>::max())
113  , should_stop_(false)
114  , exception_(false)
115  , force_stop_(false)
116  , latest_exception_report_("none")
117  , ev_counter_(1)
118  , board_id_(-1)
119  , sleep_on_stop_us_(0)
120 {
121  board_id_ = ps.get<int>("board_id");
122  instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(board_id_);
123 
124  fragment_ids_ = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
125 
126  TLOG(TLVL_TRACE) << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)";
127  int fragment_id = ps.get<int>("fragment_id", -99);
128 
129  if (fragment_id != -99)
130  {
131  if (fragment_ids_.size() != 0)
132  {
133  latest_exception_report_ = "Error in CommandableFragmentGenerator: can't both define \"fragment_id\" and \"fragment_ids\" in FHiCL document";
134  throw cet::exception(latest_exception_report_);
135  }
136  else
137  {
138  fragment_ids_.emplace_back(fragment_id);
139  }
140  }
141 
142  sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
143 
144  dataBuffer_.emplace_back(FragmentPtr(new Fragment()));
145  (*dataBuffer_.begin())->setSystemType(Fragment::EmptyFragmentType);
146 
147  std::string modeString = ps.get<std::string>("request_mode", "ignored");
148  if (modeString == "single" || modeString == "Single")
149  {
150  mode_ = RequestMode::Single;
151  }
152  else if (modeString.find("buffer") != std::string::npos || modeString.find("Buffer") != std::string::npos)
153  {
154  mode_ = RequestMode::Buffer;
155  }
156  else if (modeString == "window" || modeString == "Window")
157  {
158  mode_ = RequestMode::Window;
159  }
160  else if (modeString.find("ignore") != std::string::npos || modeString.find("Ignore") != std::string::npos)
161  {
162  mode_ = RequestMode::Ignored;
163  }
164  TLOG(TLVL_DEBUG) << "Request mode is " << printMode_();
165 
166  if (mode_ != RequestMode::Ignored)
167  {
168  if (!useDataThread_)
169  {
170  latest_exception_report_ = "Error in CommandableFragmentGenerator: use_data_thread must be true when request_mode is not \"Ignored\"!";
171  throw cet::exception(latest_exception_report_);
172  }
173  requestReceiver_.reset(new RequestReceiver(ps));
174  }
175 }
176 
178 {
179  joinThreads();
180 }
181 
183 {
184  should_stop_ = true;
185  force_stop_ = true;
186  TLOG(TLVL_DEBUG) << "Joining dataThread";
187  if (dataThread_.joinable()) dataThread_.join();
188  TLOG(TLVL_DEBUG) << "Joining monitoringThread";
189  if (monitoringThread_.joinable()) monitoringThread_.join();
190  requestReceiver_.reset(nullptr);
191 }
192 
194 {
195  bool result = true;
196 
197  if (check_stop()) usleep(sleep_on_stop_us_);
198  if (exception() || force_stop_) return false;
199 
200  if (!useMonitoringThread_ && monitoringInterval_ > 0)
201  {
202  TLOG(TLVL_GETNEXT) << "getNext: Checking whether to collect Monitoring Data";
203  auto now = std::chrono::steady_clock::now();
204 
205  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
206  {
207  TLOG(TLVL_GETNEXT) << "getNext: Collecting Monitoring Data";
208  isHardwareOK_ = checkHWStatus_();
209  TLOG(TLVL_GETNEXT) << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_;
210  lastMonitoringCall_ = now;
211  }
212  }
213 
214  try
215  {
216  std::lock_guard<std::mutex> lk(mutex_);
217  if (useDataThread_)
218  {
219  TLOG(TLVL_TRACE) << "getNext: Calling applyRequests";
220  result = applyRequests(output);
221  TLOG(TLVL_TRACE) << "getNext: Done with applyRequests result=" << std::boolalpha << result;
222 
223  if (exception())
224  {
225  TLOG(TLVL_ERROR) << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
226  throw cet::exception("CommandableFragmentGenerator") << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
227  }
228  }
229  else
230  {
231  if (!isHardwareOK_)
232  {
233  TLOG(TLVL_ERROR) << "Stopping CFG because the hardware reports bad status!";
234  return false;
235  }
236  TLOG(TLVL_TRACE) << "getNext: Calling getNext_ " << ev_counter();
237  try
238  {
239  result = getNext_(output);
240  }
241  catch (...)
242  {
243  throw;
244  }
245  TLOG(TLVL_TRACE) << "getNext: Done with getNext_ " << ev_counter();
246  for (auto dataIter = output.begin(); dataIter != output.end(); ++dataIter)
247  {
248  TLOG(TLVL_GETNEXT_VERBOSE) << "getNext: getNext_() returned fragment with sequenceID = " << (*dataIter)->sequenceID()
249  << ", timestamp = " << (*dataIter)->timestamp() << ", and sizeBytes = " << (*dataIter)->sizeBytes();
250  }
251  }
252  }
253  catch (const cet::exception& e)
254  {
255  latest_exception_report_ = "cet::exception caught in getNext(): ";
256  latest_exception_report_.append(e.what());
257  TLOG(TLVL_ERROR) << "getNext: cet::exception caught: " << e;
258  set_exception(true);
259  return false;
260  }
261  catch (const boost::exception& e)
262  {
263  latest_exception_report_ = "boost::exception caught in getNext(): ";
264  latest_exception_report_.append(boost::diagnostic_information(e));
265  TLOG(TLVL_ERROR) << "getNext: boost::exception caught: " << boost::diagnostic_information(e);
266  set_exception(true);
267  return false;
268  }
269  catch (const std::exception& e)
270  {
271  latest_exception_report_ = "std::exception caught in getNext(): ";
272  latest_exception_report_.append(e.what());
273  TLOG(TLVL_ERROR) << "getNext: std::exception caught: " << e.what();
274  set_exception(true);
275  return false;
276  }
277  catch (...)
278  {
279  latest_exception_report_ = "Unknown exception caught in getNext().";
280  TLOG(TLVL_ERROR) << "getNext: unknown exception caught";
281  set_exception(true);
282  return false;
283  }
284 
285  if (!result)
286  {
287  TLOG(TLVL_DEBUG) << "stopped ";
288  }
289 
290  if (metricMan && !output.empty())
291  {
292  auto timestamp = output.front()->timestamp();
293 
294  if (output.size() > 1)
295  { // Only bother sorting if >1 entry
296  for (auto& outputfrag : output)
297  {
298  if (outputfrag->timestamp() > timestamp)
299  {
300  timestamp = outputfrag->timestamp();
301  }
302  }
303  }
304 
305  metricMan->sendMetric("Last Timestamp", timestamp, "Ticks", 1,
306  MetricMode::LastPoint, app_name);
307  }
308 
309  return result;
310 }
311 
313 {
314  TLOG(TLVL_CHECKSTOP) << "CFG::check_stop: should_stop=" << should_stop() << ", useDataThread_=" << useDataThread_ << ", exception status =" << int(exception());
315 
316  if (!should_stop()) return false;
317  if (!useDataThread_ || mode_ == RequestMode::Ignored) return true;
318  if (force_stop_) return true;
319 
320  // check_stop returns true if the CFG should stop. We should wait for the RequestReceiver to stop before stopping.
321  return !requestReceiver_->isRunning();
322 }
323 
325 {
326  if (fragment_ids_.size() != 1)
327  {
328  throw cet::exception("Error in CommandableFragmentGenerator: can't call fragment_id() unless member fragment_ids_ vector is length 1");
329  }
330  else
331  {
332  return fragment_ids_[0];
333  }
334 }
335 
337 {
338  if (force || mode_ == RequestMode::Ignored)
339  {
340  TLOG(TLVL_EVCOUNTERINC) << "ev_counter_inc: Incrementing ev_counter from " << ev_counter() << " by " << step;
341  return ev_counter_.fetch_add(step);
342  }
343  return ev_counter_.load();
344 } // returns the prev value
345 
346 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
347 {
348  TLOG(TLVL_TRACE) << "Start Command received.";
349  if (run < 0) throw cet::exception("CommandableFragmentGenerator") << "negative run number";
350 
351  timeout_ = timeout;
352  timestamp_ = timestamp;
353  ev_counter_.store(1);
354  should_stop_.store(false);
355  exception_.store(false);
356  run_number_ = run;
357  subrun_number_ = 1;
358  latest_exception_report_ = "none";
359  dataBuffer_.clear();
360  windows_sent_ooo_.clear();
361 
362  start();
363 
364  std::unique_lock<std::mutex> lk(mutex_);
365  if (useDataThread_) startDataThread();
366  if (useMonitoringThread_) startMonitoringThread();
367  if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
368  TLOG(TLVL_TRACE) << "Start Command complete.";
369 }
370 
371 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
372 {
373  TLOG(TLVL_TRACE) << "Stop Command received.";
374 
375  timeout_ = timeout;
376  timestamp_ = timestamp;
377  if (requestReceiver_ && requestReceiver_->isRunning()) requestReceiver_->stopRequestReceiverThread();
378 
379  stopNoMutex();
380  should_stop_.store(true);
381  std::unique_lock<std::mutex> lk(mutex_);
382  stop();
383  TLOG(TLVL_TRACE) << "Stop command complete.";
384 }
385 
386 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
387 {
388  TLOG(TLVL_TRACE) << "Pause Command received.";
389  timeout_ = timeout;
390  timestamp_ = timestamp;
391  if (requestReceiver_->isRunning()) requestReceiver_->stopRequestReceiverThread();
392 
393  pauseNoMutex();
394  should_stop_.store(true);
395  std::unique_lock<std::mutex> lk(mutex_);
396 
397  pause();
398  TLOG(TLVL_TRACE) << "Pause Command complete.";
399 }
400 
401 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
402 {
403  TLOG(TLVL_TRACE) << "Resume Command received.";
404  timeout_ = timeout;
405  timestamp_ = timestamp;
406 
407  subrun_number_ += 1;
408  should_stop_ = false;
409 
410  dataBuffer_.clear();
411 
412  // no lock required: thread not started yet
413  resume();
414 
415  std::unique_lock<std::mutex> lk(mutex_);
416  if (useDataThread_) startDataThread();
417  if (useMonitoringThread_) startMonitoringThread();
418  if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
419  TLOG(TLVL_TRACE) << "Resume Command complete.";
420 }
421 
422 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
423 {
424  TLOG(TLVL_TRACE) << "Report Command received.";
425  std::lock_guard<std::mutex> lk(mutex_);
426 
427  // 14-May-2015, KAB: please see the comments associated with the report()
428  // methods in the CommandableFragmentGenerator.hh file for more information
429  // on the use of those methods in this method.
430 
431  // check if the child class has something meaningful for this request
432  std::string childReport = reportSpecific(which);
433  if (childReport.length() > 0) { return childReport; }
434 
435  // handle the requests that we can take care of at this level
436  if (which == "latest_exception")
437  {
438  return latest_exception_report_;
439  }
440 
441  // check if the child class has provided a catch-all report function
442  childReport = report();
443  if (childReport.length() > 0) { return childReport; }
444 
445  // if we haven't been able to come up with any report so far, say so
446  std::string tmpString = "The \"" + which + "\" command is not ";
447  tmpString.append("currently supported by the ");
448  tmpString.append(metricsReportingInstanceName());
449  tmpString.append(" fragment generator.");
450  TLOG(TLVL_TRACE) << "Report Command complete.";
451  return tmpString;
452 }
453 
454 // Default implemenetations of state functions
456 {
457 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
458 }
459 
461 {
462 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
463 }
464 
466 {
467 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
468 }
469 
471 {
472 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
473  return "";
474 }
475 
477 {
478 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
479  return "";
480 }
481 
483 {
484 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
485  return true;
486 }
487 
488 bool artdaq::CommandableFragmentGenerator::metaCommand(std::string const&, std::string const&)
489 {
490 #pragma message "Using default implementation of CommandableFragmentGenerator::metaCommand(std::string, std::string)"
491  return true;
492 }
493 
495 {
496  if (dataThread_.joinable()) dataThread_.join();
497  TLOG(TLVL_INFO) << "Starting Data Receiver Thread";
498  dataThread_ = boost::thread(&CommandableFragmentGenerator::getDataLoop, this);
499 }
500 
502 {
503  if (monitoringThread_.joinable()) monitoringThread_.join();
504  TLOG(TLVL_INFO) << "Starting Hardware Monitoring Thread";
505  monitoringThread_ = boost::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
506 }
507 
509 {
510  switch (mode_)
511  {
512  case RequestMode::Single:
513  return "Single";
514  case RequestMode::Buffer:
515  return "Buffer";
516  case RequestMode::Window:
517  return "Window";
518  case RequestMode::Ignored:
519  return "Ignored";
520  }
521 
522  return "ERROR";
523 }
524 
526 {
527  data_thread_running_ = true;
528  while (!force_stop_)
529  {
530  if (!isHardwareOK_)
531  {
532  TLOG(TLVL_DEBUG) << "getDataLoop: isHardwareOK is " << isHardwareOK_ << ", aborting data thread";
533  data_thread_running_ = false;
534  return;
535  }
536 
537  TLOG(TLVL_GETDATALOOP) << "getDataLoop: calling getNext_";
538 
539  bool data = false;
540  auto startdata = std::chrono::steady_clock::now();
541 
542  try
543  {
544  data = getNext_(newDataBuffer_);
545  }
546  catch (...)
547  {
548  ExceptionHandler(ExceptionHandlerRethrow::no,
549  "Exception thrown by fragment generator in CommandableFragmentGenerator::getDataLoop; setting exception state to \"true\"");
550  set_exception(true);
551 
552  data_thread_running_ = false;
553  return;
554  }
555  for (auto dataIter = newDataBuffer_.begin(); dataIter != newDataBuffer_.end(); ++dataIter)
556  {
557  TLOG(TLVL_GETDATALOOP_VERBOSE) << "getDataLoop: getNext_() returned fragment with sequenceID = " << (*dataIter)->sequenceID()
558  << ", timestamp = " << (*dataIter)->timestamp() << ", and sizeBytes = " << (*dataIter)->sizeBytes();
559  }
560 
561  if (metricMan)
562  {
563  metricMan->sendMetric("Avg Data Acquisition Time", TimeUtils::GetElapsedTime(startdata), "s", 3, artdaq::MetricMode::Average);
564  }
565 
566  if (newDataBuffer_.size() == 0 && sleep_on_no_data_us_ > 0)
567  {
568  usleep(sleep_on_no_data_us_);
569  }
570 
571  TLOG(TLVL_GETDATALOOP_DATABUFFWAIT) << "Waiting for data buffer ready";
572  if (!waitForDataBufferReady()) return;
573  TLOG(TLVL_GETDATALOOP_DATABUFFWAIT) << "Done waiting for data buffer ready";
574 
575  TLOG(TLVL_GETDATALOOP) << "getDataLoop: processing data";
576  if (data && !force_stop_)
577  {
578  std::unique_lock<std::mutex> lock(dataBufferMutex_);
579  switch (mode_)
580  {
581  case RequestMode::Single:
582  // While here, if for some strange reason more than one event's worth of data is returned from getNext_...
583  while (newDataBuffer_.size() >= fragment_ids_.size())
584  {
585  dataBuffer_.clear();
586  auto it = newDataBuffer_.begin();
587  std::advance(it, fragment_ids_.size());
588  dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_, newDataBuffer_.begin(), it);
589  }
590  break;
591  case RequestMode::Buffer:
592  case RequestMode::Ignored:
593  case RequestMode::Window:
594  default:
595  //dataBuffer_.reserve(dataBuffer_.size() + newDataBuffer_.size());
596  dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_);
597  break;
598  }
599  getDataBufferStats();
600  }
601 
602  {
603  std::unique_lock<std::mutex> lock(dataBufferMutex_);
604  if (dataBuffer_.size() > 0)
605  {
606  dataCondition_.notify_all();
607  }
608  }
609  if (!data || force_stop_)
610  {
611  TLOG(TLVL_INFO) << "Data flow has stopped. Ending data collection thread";
612  data_thread_running_ = false;
613  if (requestReceiver_) requestReceiver_->ClearRequests();
614  dataBuffer_.clear();
615  newDataBuffer_.clear();
616  return;
617  }
618  }
619 }
620 
622 {
623  auto startwait = std::chrono::steady_clock::now();
624  auto first = true;
625  auto lastwaittime = 0ULL;
626  while (dataBufferIsTooLarge())
627  {
628  if (should_stop())
629  {
630  TLOG(TLVL_DEBUG) << "Run ended while waiting for buffer to shrink!";
631  std::unique_lock<std::mutex> lock(dataBufferMutex_);
632  getDataBufferStats();
633  dataCondition_.notify_all();
634  data_thread_running_ = false;
635  return false;
636  }
637  auto waittime = TimeUtils::GetElapsedTimeMilliseconds(startwait);
638 
639  if (first || (waittime != lastwaittime && waittime % 1000 == 0))
640  {
641  TLOG(TLVL_WARNING) << "Bad Omen: Data Buffer has exceeded its size limits. "
642  << "(seq_id=" << ev_counter()
643  << ", frags=" << dataBufferDepthFragments_ << "/" << maxDataBufferDepthFragments_
644  << ", szB=" << dataBufferDepthBytes_ << "/" << maxDataBufferDepthBytes_ << ")";
645  TLOG(TLVL_TRACE) << "Bad Omen: Possible causes include requests not getting through or Ignored-mode BR issues";
646  first = false;
647  }
648  if (waittime % 5 && waittime != lastwaittime)
649  {
650  TLOG(TLVL_WAITFORBUFFERREADY) << "getDataLoop: Data Retreival paused for " << waittime << " ms waiting for data buffer to drain";
651  }
652  lastwaittime = waittime;
653  usleep(1000);
654  }
655  return true;
656 }
657 
659 {
660  return (maxDataBufferDepthFragments_ > 0 && dataBufferDepthFragments_ > maxDataBufferDepthFragments_) || (maxDataBufferDepthBytes_ > 0 && dataBufferDepthBytes_ > maxDataBufferDepthBytes_);
661 }
662 
664 {
666  dataBufferDepthFragments_ = dataBuffer_.size();
667  size_t acc = 0;
668  TLOG(TLVL_GETBUFFERSTATS) << "getDataBufferStats: Calculating buffer size";
669  for (auto i = dataBuffer_.begin(); i != dataBuffer_.end(); ++i)
670  {
671  if (i->get() != nullptr)
672  {
673  acc += (*i)->sizeBytes();
674  }
675  }
676  dataBufferDepthBytes_ = acc;
677 
678  if (metricMan)
679  {
680  TLOG(TLVL_GETBUFFERSTATS) << "getDataBufferStats: Sending Metrics";
681  metricMan->sendMetric("Buffer Depth Fragments", dataBufferDepthFragments_.load(), "fragments", 1, MetricMode::LastPoint);
682  metricMan->sendMetric("Buffer Depth Bytes", dataBufferDepthBytes_.load(), "bytes", 1, MetricMode::LastPoint);
683  }
684  TLOG(TLVL_GETBUFFERSTATS) << "getDataBufferStats: frags=" << dataBufferDepthFragments_.load() << "/" << maxDataBufferDepthFragments_
685  << ", sz=" << dataBufferDepthBytes_.load() << "/" << maxDataBufferDepthBytes_;
686 }
687 
689 {
690  std::unique_lock<std::mutex> lock(dataBufferMutex_);
691  dataCondition_.wait_for(lock, std::chrono::milliseconds(10));
692  if (dataBufferDepthFragments_ > 0)
693  {
694  if ((mode_ == RequestMode::Buffer || mode_ == RequestMode::Window))
695  {
696  // Eliminate extra fragments
697  while (dataBufferIsTooLarge())
698  {
699  dataBuffer_.erase(dataBuffer_.begin());
700  getDataBufferStats();
701  }
702  if (dataBuffer_.size() > 0)
703  {
704  TLOG(TLVL_CHECKDATABUFFER) << "Determining if Fragments can be dropped from data buffer";
705  Fragment::timestamp_t last = dataBuffer_.back()->timestamp();
706  Fragment::timestamp_t min = last > staleTimeout_ ? last - staleTimeout_ : 0;
707  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
708  {
709  if ((*it)->timestamp() < min)
710  {
711  it = dataBuffer_.erase(it);
712  }
713  else
714  {
715  ++it;
716  }
717  }
718  getDataBufferStats();
719  }
720  }
721  else if (mode_ == RequestMode::Single && dataBuffer_.size() > fragment_ids_.size())
722  {
723  // Eliminate extra fragments
724  while (dataBuffer_.size() > fragment_ids_.size())
725  {
726  dataBuffer_.erase(dataBuffer_.begin());
727  }
728  }
729  }
730 }
731 
733 {
734  while (!force_stop_)
735  {
736  if (should_stop() || monitoringInterval_ <= 0)
737  {
738  TLOG(TLVL_DEBUG) << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
739  << " and monitoringInterval is " << monitoringInterval_ << ", returning";
740  return;
741  }
742  TLOG(TLVL_GETMONITORINGDATA) << "getMonitoringDataLoop: Determining whether to call checkHWStatus_";
743 
744  auto now = std::chrono::steady_clock::now();
745  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
746  {
747  isHardwareOK_ = checkHWStatus_();
748  TLOG(TLVL_GETMONITORINGDATA) << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_;
749  lastMonitoringCall_ = now;
750  }
751  usleep(monitoringInterval_ / 10);
752  }
753 }
754 
756 {
757  // We just copy everything that's here into the output.
758  TLOG(TLVL_APPLYREQUESTS) << "Mode is Ignored; Copying data to output";
759  std::move(dataBuffer_.begin(), dataBuffer_.end(), std::inserter(frags, frags.end()));
760  dataBuffer_.clear();
761 }
762 
764 {
765  // We only care about the latest request received. Send empties for all others.
766  auto requests = requestReceiver_->GetRequests();
767  while (requests.size() > 1)
768  {
769  // std::map is ordered by key => Last sequence ID in the map is the one we care about
770  requestReceiver_->RemoveRequest(requests.begin()->first);
771  requests.erase(requests.begin());
772  }
773  sendEmptyFragments(frags, requests);
774 
775  // If no requests remain after sendEmptyFragments, return
776  if (requests.size() == 0 || !requests.count(ev_counter())) return;
777 
778  if (dataBuffer_.size() > 0)
779  {
780  TLOG(TLVL_APPLYREQUESTS) << "Mode is Single; Sending copy of last event";
781  for (auto& fragptr : dataBuffer_)
782  {
783  // Return the latest data point
784  auto frag = fragptr.get();
785  auto newfrag = std::unique_ptr<artdaq::Fragment>(new Fragment(ev_counter(), frag->fragmentID()));
786  newfrag->resize(frag->size() - detail::RawFragmentHeader::num_words());
787  memcpy(newfrag->headerAddress(), frag->headerAddress(), frag->sizeBytes());
788  newfrag->setTimestamp(requests[ev_counter()]);
789  newfrag->setSequenceID(ev_counter());
790  frags.push_back(std::move(newfrag));
791  }
792  }
793  else
794  {
795  sendEmptyFragment(frags, ev_counter(), "No data for");
796  }
797  requestReceiver_->RemoveRequest(ev_counter());
798  ev_counter_inc(1, true);
799 }
800 
802 {
803  // We only care about the latest request received. Send empties for all others.
804  auto requests = requestReceiver_->GetRequests();
805  while (requests.size() > 1)
806  {
807  // std::map is ordered by key => Last sequence ID in the map is the one we care about
808  requestReceiver_->RemoveRequest(requests.begin()->first);
809  requests.erase(requests.begin());
810  }
811  sendEmptyFragments(frags, requests);
812 
813  // If no requests remain after sendEmptyFragments, return
814  if (requests.size() == 0 || !requests.count(ev_counter())) return;
815 
816  TLOG(TLVL_DEBUG) << "Creating ContainerFragment for Buffered Fragments";
817  frags.emplace_back(new artdaq::Fragment(ev_counter(), fragment_id()));
818  frags.back()->setTimestamp(requests[ev_counter()]);
819  ContainerFragmentLoader cfl(*frags.back());
820  cfl.set_missing_data(false); // Buffer mode is never missing data, even if there IS no data.
821 
822  // Buffer mode TFGs should simply copy out the whole dataBuffer_ into a ContainerFragment
823  // Window mode TFGs must do a little bit more work to decide which fragments to send for a given request
824  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
825  {
826  TLOG(TLVL_APPLYREQUESTS) << "ApplyRequests: Adding Fragment with timestamp " << (*it)->timestamp() << " to Container";
827  cfl.addFragment(*it);
828  it = dataBuffer_.erase(it);
829  }
830  requestReceiver_->RemoveRequest(ev_counter());
831  ev_counter_inc(1, true);
832 }
833 
835 {
836  TLOG(TLVL_APPLYREQUESTS) << "applyRequestsWindowMode BEGIN";
837 
838  auto requests = requestReceiver_->GetRequests();
839 
840  TLOG(TLVL_APPLYREQUESTS) << "applyRequestsWindowMode: Starting request processing";
841  for (auto req = requests.begin(); req != requests.end();)
842  {
843  TLOG(TLVL_APPLYREQUESTS) << "applyRequestsWindowMode: processing request with sequence ID " << req->first << ", timestamp " << req->second;
844 
845 
846  while (req->first < ev_counter() && requests.size() > 0)
847  {
848  TLOG(TLVL_APPLYREQUESTS) << "applyRequestsWindowMode: Clearing passed request for sequence ID " << req->first;
849  requestReceiver_->RemoveRequest(req->first);
850  req = requests.erase(req);
851  }
852  if (requests.size() == 0) break;
853 
854  auto ts = req->second;
855  TLOG(TLVL_APPLYREQUESTS) << "ApplyRequests: Checking that data exists for request window " << req->first;
856  Fragment::timestamp_t min = ts > windowOffset_ ? ts - windowOffset_ : 0;
857  Fragment::timestamp_t max = min + windowWidth_;
858  TLOG(TLVL_APPLYREQUESTS) << "ApplyRequests: min is " << min << ", max is " << max
859  << " and last point in buffer is " << (dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0) << " (sz=" << dataBuffer_.size() << ")";
860  bool windowClosed = dataBuffer_.size() > 0 && dataBuffer_.back()->timestamp() >= max;
861  bool windowTimeout = !windowClosed && TimeUtils::GetElapsedTimeMicroseconds(requestReceiver_->GetRequestTime(req->first)) > window_close_timeout_us_;
862  if (windowTimeout)
863  {
864  TLOG(TLVL_WARNING) << "A timeout occurred waiting for data to close the request window ({" << min << "-" << max
865  << "}, buffer={" << (dataBuffer_.size() > 0 ? dataBuffer_.front()->timestamp() : 0) << "-"
866  << (dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0)
867  << "} ). Time waiting: "
868  << TimeUtils::GetElapsedTimeMicroseconds(requestReceiver_->GetRequestTime(req->first)) << " us "
869  << "(> " << window_close_timeout_us_ << " us).";
870  }
871  if (windowClosed || !data_thread_running_ || windowTimeout)
872  {
873  TLOG(TLVL_DEBUG) << "Creating ContainerFragment for Buffered or Window-requested Fragments";
874  frags.emplace_back(new artdaq::Fragment(req->first, fragment_id()));
875  frags.back()->setTimestamp(ts);
876  ContainerFragmentLoader cfl(*frags.back());
877 
878  if (!windowClosed) cfl.set_missing_data(true);
879  if (dataBuffer_.size() > 0 && dataBuffer_.front()->timestamp() > min)
880  {
881  TLOG(TLVL_DEBUG) << "Request Window covers data that is either before data collection began or has fallen off the end of the buffer";
882  cfl.set_missing_data(true);
883  }
884 
885  // Buffer mode TFGs should simply copy out the whole dataBuffer_ into a ContainerFragment
886  // Window mode TFGs must do a little bit more work to decide which fragments to send for a given request
887  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
888  {
889  Fragment::timestamp_t fragT = (*it)->timestamp();
890  if (fragT < min || fragT > max || (fragT == max && windowWidth_ > 0))
891  {
892  ++it;
893  continue;
894  }
895 
896  TLOG(TLVL_APPLYREQUESTS) << "ApplyRequests: Adding Fragment with timestamp " << (*it)->timestamp() << " to Container";
897  cfl.addFragment(*it);
898 
899  if (uniqueWindows_)
900  {
901  it = dataBuffer_.erase(it);
902  }
903  else
904  {
905  ++it;
906  }
907  }
908  requestReceiver_->RemoveRequest(req->first);
909  checkOutOfOrderWindows(req->first);
910  requestReceiver_->RemoveRequest(req->first);
911  req = requests.erase(req);
912  }
913  else
914  {
915  ++req;
916  }
917  }
918 }
919 
921 {
922  if (check_stop() || exception())
923  {
924  return false;
925  }
926 
927  // Wait for data, if in ignored mode, or a request otherwise
928  if (mode_ == RequestMode::Ignored)
929  {
930  while (dataBufferDepthFragments_ <= 0)
931  {
932  if (check_stop() || exception() || !isHardwareOK_) return false;
933  std::unique_lock<std::mutex> lock(dataBufferMutex_);
934  dataCondition_.wait_for(lock, std::chrono::milliseconds(10), [this]() { return dataBufferDepthFragments_ > 0; });
935  }
936  }
937  else
938  {
939  if ((check_stop() && requestReceiver_->size() == 0) || exception()) return false;
940  checkDataBuffer();
941 
942  // Wait up to 1000 ms for a request...
943  auto counter = 0;
944 
945  while (requestReceiver_->size() == 0 && counter < 100)
946  {
947  if (check_stop() || exception()) return false;
948 
949  checkDataBuffer();
950 
951  requestReceiver_->WaitForRequests(10); // milliseconds
952  counter++;
953  }
954  }
955 
956  {
957  std::unique_lock<std::mutex> dlk(dataBufferMutex_);
958 
959  switch (mode_)
960  {
961  case RequestMode::Single:
962  applyRequestsSingleMode(frags);
963  break;
964  case RequestMode::Window:
965  applyRequestsWindowMode(frags);
966  break;
967  case RequestMode::Buffer:
968  applyRequestsBufferMode(frags);
969  break;
970  case RequestMode::Ignored:
971  default:
972  applyRequestsIgnoredMode(frags);
973  break;
974  }
975 
976  getDataBufferStats();
977  }
978 
979  if (frags.size() > 0)
980  TLOG(TLVL_APPLYREQUESTS) << "Finished Processing Event " << (*frags.begin())->sequenceID() << " for fragment_id " << fragment_id() << ".";
981  return true;
982 }
983 
984 bool artdaq::CommandableFragmentGenerator::sendEmptyFragment(artdaq::FragmentPtrs& frags, size_t seqId, std::string desc)
985 {
986  TLOG(TLVL_WARNING) << desc << " sequence ID " << seqId << ", sending empty fragment";
987  for (auto fid : fragment_ids_)
988  {
989  auto frag = new Fragment();
990  frag->setSequenceID(seqId);
991  frag->setFragmentID(fid);
992  frag->setSystemType(Fragment::EmptyFragmentType);
993  frags.emplace_back(FragmentPtr(frag));
994  }
995  return true;
996 }
997 
998 void artdaq::CommandableFragmentGenerator::sendEmptyFragments(artdaq::FragmentPtrs& frags, std::map<Fragment::sequence_id_t, Fragment::timestamp_t>& requests)
999 {
1000  if (requests.size() > 0)
1001  {
1002  TLOG(TLVL_SENDEMPTYFRAGMENTS) << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << requests.begin()->first;
1003  while (requests.begin()->first > ev_counter())
1004  {
1005  sendEmptyFragment(frags, ev_counter(), "Missed request for");
1006  ev_counter_inc(1, true);
1007  }
1008  }
1009 }
1010 
1011 void artdaq::CommandableFragmentGenerator::checkOutOfOrderWindows(artdaq::Fragment::sequence_id_t seq)
1012 {
1013  windows_sent_ooo_[seq] = std::chrono::steady_clock::now();
1014 
1015  auto it = windows_sent_ooo_.begin();
1016  while (it != windows_sent_ooo_.end())
1017  {
1018  if (seq == it->first && it->first == ev_counter())
1019  {
1020  TLOG(TLVL_CHECKWINDOWS) << "checkOutOfOrderWindows: Sequence ID matches ev_counter, incrementing ev_counter (" << ev_counter() << ")";
1021  ev_counter_inc(1, true);
1022  it = windows_sent_ooo_.erase(it);
1023  }
1024  else if (it->first <= ev_counter())
1025  {
1026  TLOG(TLVL_CHECKWINDOWS) << "checkOutOfOrderWindows: Data-taking has caught up to out-of-order window request " << it->first << ", removing from list. ev_counter=" << ev_counter();
1027  requestReceiver_->RemoveRequest(ev_counter());
1028  if (it->first == ev_counter()) ev_counter_inc(1, true);
1029  it = windows_sent_ooo_.erase(it);
1030  }
1031  else if (TimeUtils::GetElapsedTimeMicroseconds(it->second) > missing_request_window_timeout_us_)
1032  {
1033  TLOG(TLVL_CHECKWINDOWS) << "checkOutOfOrderWindows: Out-of-order window " << it->first << " has timed out, setting current sequence ID and removing from list";
1034  while (ev_counter() <= it->first)
1035  {
1036  if (ev_counter() < it->first) TLOG(TLVL_WARNING) << "Missed request for sequence ID " << ev_counter() << "! Will not send any data for this sequence ID!";
1037  requestReceiver_->RemoveRequest(ev_counter());
1038  ev_counter_inc(1, true);
1039  }
1040  windows_sent_ooo_.erase(windows_sent_ooo_.begin(), it);
1041  it = windows_sent_ooo_.erase(it);
1042  }
1043  else
1044  {
1045  ++it;
1046  }
1047  }
1048 }
int fragment_id() const
Get the current Fragment ID, if there is only one.
void applyRequestsSingleMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Single. Precondition: dataBufferMutex_ and reques...
virtual bool checkHWStatus_()
Check any relavent hardware status registers. Return false if an error condition exists that should h...
virtual ~CommandableFragmentGenerator()
CommandableFragmentGenerator Destructor.
void applyRequestsBufferMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Buffer. Precondition: dataBufferMutex_ and reques...
bool sendEmptyFragment(FragmentPtrs &frags, size_t sequenceId, std::string desc)
Send an EmptyFragmentType Fragment.
void getMonitoringDataLoop()
This function regularly calls checkHWStatus_(), and sets the isHardwareOK flag accordingly.
void startDataThread()
Function that launches the data thread (getDataLoop())
std::string ReportCmd(std::string const &which="")
Get a report about a user-specified run-time quantity.
virtual bool metaCommand(std::string const &command, std::string const &arg)
The meta-command is used for implementing user-specific commands in a CommandableFragmentGenerator.
bool dataBufferIsTooLarge()
Test the configured constraints on the data buffer.
void StopCmd(uint64_t timeout, uint64_t timestamp)
Stop the CommandableFragmentGenerator.
void applyRequestsWindowMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Window. Precondition: dataBufferMutex_ and reques...
void StartCmd(int run, uint64_t timeout, uint64_t timestamp)
Start the CommandableFragmentGenerator.
virtual void pauseNoMutex()
On call to PauseCmd, pauseNoMutex() is called prior to PauseCmd acquiring the mutex ...
bool check_stop()
Routine used by applyRequests to make sure that all outstanding requests have been fulfilled before r...
void ResumeCmd(uint64_t timeout, uint64_t timestamp)
Resume the CommandableFragmentGenerator.
CommandableFragmentGenerator()
CommandableFragmentGenerator default constructor.
bool getNext(FragmentPtrs &output) overridefinal
getNext calls either applyRequests or getNext_ to get any data that is ready to be sent to the EventB...
bool waitForDataBufferReady()
Wait for the data buffer to drain (dataBufferIsTooLarge returns false), periodically reporting status...
size_t ev_counter_inc(size_t step=1, bool force=false)
Increment the event counter, if the current RequestMode allows it.
void applyRequestsIgnoredMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Ignored. Precondition: dataBufferMutex_ and reque...
void PauseCmd(uint64_t timeout, uint64_t timestamp)
Pause the CommandableFragmentGenerator.
void getDataLoop()
When separate_data_thread is set to true, this loop repeatedly calls getNext_ and adds returned Fragm...
void sendEmptyFragments(FragmentPtrs &frags, std::map< Fragment::sequence_id_t, Fragment::timestamp_t > &requests)
This function is for Buffered and Single request modes, as they can only respond to one data request ...
Receive data requests and make them available to CommandableFragmentGenerator or other interested par...
void startMonitoringThread()
Function that launches the monitoring thread (getMonitoringDataLoop())
virtual void pause()
If a CommandableFragmentGenerator subclass is reading from hardware, the implementation of pause() sh...
virtual void resume()
The subrun number will be incremented before a call to resume.
void checkDataBuffer()
Perform data buffer pruning operations. If the RequestMode is Single, removes all but the latest Frag...
virtual std::string report()
Let&#39;s say that the contract with the report() functions is that they return a non-empty string if the...
std::string printMode_()
Return the string representation of the current RequestMode.
void getDataBufferStats()
Calculate the size of the dataBuffer and report appropriate metrics.
void checkOutOfOrderWindows(Fragment::sequence_id_t seq)
Check the windows_sent_ooo_ map for sequence IDs that may be removed.
virtual std::string reportSpecific(std::string const &what)
Report the status of a specific quantity
bool applyRequests(FragmentPtrs &output)
See if any requests have been received, and add the corresponding data Fragment objects to the output...
void joinThreads()
Join any data-taking threads. Should be called when destructing CommandableFragmentGenerator.