artdaq  v3_02_00
CommandableFragmentGenerator.cc
1 #define TRACE_NAME (app_name + "_CommandableFragmentGenerator").c_str() // include these 2 first -
2 #include "artdaq/DAQdata/Globals.hh"
3 
4 #include "artdaq/Application/CommandableFragmentGenerator.hh"
5 
6 #include <boost/exception/all.hpp>
7 #include <boost/throw_exception.hpp>
8 
9 #include <limits>
10 #include <iterator>
11 
12 #include "canvas/Utilities/Exception.h"
13 #include "cetlib_except/exception.h"
14 #include "fhiclcpp/ParameterSet.h"
15 
16 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
17 #include "artdaq-core/Data/Fragment.hh"
18 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
19 #include "artdaq-core/Utilities/ExceptionHandler.hh"
20 #include "artdaq-core/Utilities/TimeUtils.hh"
21 
22 #include <fstream>
23 #include <iomanip>
24 #include <iterator>
25 #include <iostream>
26 #include <iomanip>
27 #include <algorithm>
28 #include <sys/poll.h>
30 
32  : mutex_()
33  , requestReceiver_(new RequestReceiver())
34  , windowOffset_(0)
35  , windowWidth_(0)
36  , staleTimeout_(Fragment::InvalidTimestamp)
37  , expectedType_(Fragment::EmptyFragmentType)
38  , maxFragmentCount_(std::numeric_limits<size_t>::max())
39  , uniqueWindows_(true)
40  , missing_request_(true)
41  , missing_request_time_()
42  , last_window_send_time_()
43  , last_window_send_time_set_(false)
44  , windows_sent_ooo_()
45  , missing_request_window_timeout_us_(1000000)
46  , window_close_timeout_us_(2000000)
47  , useDataThread_(false)
48  , sleep_on_no_data_us_(0)
49  , data_thread_running_(false)
50  , dataBufferDepthFragments_(0)
51  , dataBufferDepthBytes_(0)
52  , maxDataBufferDepthFragments_(1000)
53  , maxDataBufferDepthBytes_(1000)
54  , useMonitoringThread_(false)
55  , monitoringInterval_(0)
56  , lastMonitoringCall_()
57  , isHardwareOK_(true)
58  , dataBuffer_()
59  , newDataBuffer_()
60  , run_number_(-1)
61  , subrun_number_(-1)
62  , timeout_(std::numeric_limits<uint64_t>::max())
63  , timestamp_(std::numeric_limits<uint64_t>::max())
64  , should_stop_(false)
65  , exception_(false)
66  , force_stop_(false)
67  , latest_exception_report_("none")
68  , ev_counter_(1)
69  , board_id_(-1)
70  , instance_name_for_metrics_("FragmentGenerator")
71  , sleep_on_stop_us_(0)
72 {}
73 
75  : mutex_()
76  , windowOffset_(ps.get<Fragment::timestamp_t>("request_window_offset", 0))
77  , windowWidth_(ps.get<Fragment::timestamp_t>("request_window_width", 0))
78  , staleTimeout_(ps.get<Fragment::timestamp_t>("stale_request_timeout", 0xFFFFFFFF))
79  , expectedType_(ps.get<Fragment::type_t>("expected_fragment_type", Fragment::type_t(Fragment::EmptyFragmentType)))
80  , uniqueWindows_(ps.get<bool>("request_windows_are_unique", true))
81  , missing_request_(false)
82  , missing_request_time_(decltype(missing_request_time_)::max())
83  , last_window_send_time_(decltype(last_window_send_time_)::max())
84  , last_window_send_time_set_(false)
85  , windows_sent_ooo_()
86  , missing_request_window_timeout_us_(ps.get<size_t>("missing_request_window_timeout_us", 1000000))
87  , window_close_timeout_us_(ps.get<size_t>("window_close_timeout_us", 2000000))
88  , useDataThread_(ps.get<bool>("separate_data_thread", false))
89  , sleep_on_no_data_us_(ps.get<size_t>("sleep_on_no_data_us", 0))
90  , data_thread_running_(false)
91  , dataBufferDepthFragments_(0)
92  , dataBufferDepthBytes_(0)
93  , maxDataBufferDepthFragments_(ps.get<int>("data_buffer_depth_fragments", 1000))
94  , maxDataBufferDepthBytes_(ps.get<size_t>("data_buffer_depth_mb", 1000) * 1024 * 1024)
95  , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
96  , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
97  , lastMonitoringCall_()
98  , isHardwareOK_(true)
99  , dataBuffer_()
100  , newDataBuffer_()
101  , run_number_(-1)
102  , subrun_number_(-1)
103  , timeout_(std::numeric_limits<uint64_t>::max())
104  , timestamp_(std::numeric_limits<uint64_t>::max())
105  , should_stop_(false)
106  , exception_(false)
107  , force_stop_(false)
108  , latest_exception_report_("none")
109  , ev_counter_(1)
110  , board_id_(-1)
111  , sleep_on_stop_us_(0)
112 {
113  board_id_ = ps.get<int>("board_id");
114  instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(board_id_);
115 
116  fragment_ids_ = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
117 
118  TLOG(TLVL_TRACE) << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)" ;
119  int fragment_id = ps.get<int>("fragment_id", -99);
120 
121  if (fragment_id != -99)
122  {
123  if (fragment_ids_.size() != 0)
124  {
125  latest_exception_report_ = "Error in CommandableFragmentGenerator: can't both define \"fragment_id\" and \"fragment_ids\" in FHiCL document";
126  throw cet::exception(latest_exception_report_);
127  }
128  else
129  {
130  fragment_ids_.emplace_back(fragment_id);
131  }
132  }
133 
134  sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
135 
136  dataBuffer_.emplace_back(FragmentPtr(new Fragment()));
137  (*dataBuffer_.begin())->setSystemType(Fragment::EmptyFragmentType);
138 
139  std::string modeString = ps.get<std::string>("request_mode", "ignored");
140  if (modeString == "single" || modeString == "Single")
141  {
142  mode_ = RequestMode::Single;
143  }
144  else if (modeString.find("buffer") != std::string::npos || modeString.find("Buffer") != std::string::npos)
145  {
146  mode_ = RequestMode::Buffer;
147  }
148  else if (modeString == "window" || modeString == "Window")
149  {
150  mode_ = RequestMode::Window;
151  }
152  else if (modeString.find("ignore") != std::string::npos || modeString.find("Ignore") != std::string::npos)
153  {
154  mode_ = RequestMode::Ignored;
155  }
156  TLOG(TLVL_DEBUG) << "Request mode is " << printMode_() ;
157 
158  if (mode_ != RequestMode::Ignored)
159  {
160  if (!useDataThread_)
161  {
162  latest_exception_report_ = "Error in CommandableFragmentGenerator: use_data_thread must be true when request_mode is not \"Ignored\"!";
163  throw cet::exception(latest_exception_report_);
164  }
165  requestReceiver_.reset(new RequestReceiver(ps));
166  }
167 }
168 
170 {
171  joinThreads();
172 }
173 
175 {
176  should_stop_ = true;
177  force_stop_ = true;
178  TLOG(TLVL_DEBUG) << "Joining dataThread" ;
179  if (dataThread_.joinable()) dataThread_.join();
180  TLOG(TLVL_DEBUG) << "Joining monitoringThread" ;
181  if (monitoringThread_.joinable()) monitoringThread_.join();
182  requestReceiver_.reset(nullptr);
183 }
184 
186 {
187  bool result = true;
188 
189  if (check_stop()) usleep(sleep_on_stop_us_);
190  if (exception() || force_stop_) return false;
191 
192  if (!useMonitoringThread_ && monitoringInterval_ > 0)
193  {
194  TLOG(10) << "getNext: Checking whether to collect Monitoring Data" ;
195  auto now = std::chrono::steady_clock::now();
196 
197  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
198  {
199  TLOG(10) << "getNext: Collecting Monitoring Data" ;
200  isHardwareOK_ = checkHWStatus_();
201  TLOG(10) << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ ;
202  lastMonitoringCall_ = now;
203  }
204  }
205 
206  try
207  {
208  std::lock_guard<std::mutex> lk(mutex_);
209  if (useDataThread_)
210  {
211  TLOG(TLVL_TRACE) << "getNext: Calling applyRequests" ;
212  result = applyRequests(output);
213  TLOG(TLVL_TRACE) << "getNext: Done with applyRequests result=" << std::boolalpha << result;
214 
215  if (exception())
216  {
217  TLOG(TLVL_ERROR) << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
218  throw cet::exception("CommandableFragmentGenerator") << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
219  }
220  }
221  else
222  {
223  if (!isHardwareOK_)
224  {
225  TLOG(TLVL_ERROR) << "Stopping CFG because the hardware reports bad status!" ;
226  return false;
227  }
228  TLOG(TLVL_TRACE) << "getNext: Calling getNext_ " << std::to_string(ev_counter()) ;
229  try
230  {
231  result = getNext_(output);
232  }
233  catch (...)
234  {
235  throw;
236  }
237  TLOG(TLVL_TRACE) << "getNext: Done with getNext_ " << std::to_string(ev_counter()) ;
238  for (auto dataIter = output.begin(); dataIter != output.end(); ++dataIter)
239  {
240  TLOG(20) << "getNext: getNext_() returned fragment with sequenceID = " << (*dataIter)->sequenceID()
241  << ", timestamp = " << (*dataIter)->timestamp() << ", and sizeBytes = " << (*dataIter)->sizeBytes();
242  }
243  }
244  }
245  catch (const cet::exception& e)
246  {
247  latest_exception_report_ = "cet::exception caught in getNext(): ";
248  latest_exception_report_.append(e.what());
249  TLOG(TLVL_ERROR) << "getNext: cet::exception caught: " << e ;
250  set_exception(true);
251  return false;
252  }
253  catch (const boost::exception& e)
254  {
255  latest_exception_report_ = "boost::exception caught in getNext(): ";
256  latest_exception_report_.append(boost::diagnostic_information(e));
257  TLOG(TLVL_ERROR) << "getNext: boost::exception caught: " << boost::diagnostic_information(e) ;
258  set_exception(true);
259  return false;
260  }
261  catch (const std::exception& e)
262  {
263  latest_exception_report_ = "std::exception caught in getNext(): ";
264  latest_exception_report_.append(e.what());
265  TLOG(TLVL_ERROR) << "getNext: std::exception caught: " << e.what() ;
266  set_exception(true);
267  return false;
268  }
269  catch (...)
270  {
271  latest_exception_report_ = "Unknown exception caught in getNext().";
272  TLOG(TLVL_ERROR) << "getNext: unknown exception caught" ;
273  set_exception(true);
274  return false;
275  }
276 
277  if (!result)
278  {
279  TLOG(TLVL_DEBUG) << "stopped " ;
280  }
281 
282  if (metricMan && !output.empty()) {
283 
284  auto timestamp = output.front()->timestamp();
285 
286  if (output.size() > 1) { // Only bother sorting if >1 entry
287  for (auto& outputfrag : output ) {
288  if (outputfrag->timestamp() > timestamp) {
289  timestamp = outputfrag->timestamp();
290  }
291  }
292  }
293 
294  metricMan->sendMetric("Last Timestamp", timestamp, "Ticks", 1,
295  MetricMode::LastPoint, app_name);
296  }
297 
298  return result;
299 }
300 
302 {
303  TLOG(14) << "CFG::check_stop: should_stop=" << should_stop() << ", useDataThread_=" << useDataThread_ << ", exception status =" << int(exception()) ;
304 
305  if (!should_stop()) return false;
306  if (!useDataThread_ || mode_ == RequestMode::Ignored) return true;
307  if (force_stop_) return true;
308 
309  // check_stop returns true if the CFG should stop. We should wait for the RequestReceiver to stop before stopping.
310  return !requestReceiver_->isRunning();
311 }
312 
314 {
315  if (fragment_ids_.size() != 1)
316  {
317  throw cet::exception("Error in CommandableFragmentGenerator: can't call fragment_id() unless member fragment_ids_ vector is length 1");
318  }
319  else
320  {
321  return fragment_ids_[0];
322  }
323 }
324 
326 {
327  if (force || mode_ == RequestMode::Ignored)
328  {
329  return ev_counter_.fetch_add(step);
330  }
331  return ev_counter_.load();
332 } // returns the prev value
333 
334 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
335 {
336  if (run < 0) throw cet::exception("CommandableFragmentGenerator") << "negative run number";
337 
338  timeout_ = timeout;
339  timestamp_ = timestamp;
340  ev_counter_.store(1);
341  missing_request_ = false;
342  should_stop_.store(false);
343  exception_.store(false);
344  run_number_ = run;
345  subrun_number_ = 1;
346  latest_exception_report_ = "none";
347  dataBuffer_.clear();
348  last_window_send_time_set_ = false;
349  windows_sent_ooo_.clear();
350 
351  start();
352 
353  std::unique_lock<std::mutex> lk(mutex_);
354  if (useDataThread_) startDataThread();
355  if (useMonitoringThread_) startMonitoringThread();
356  if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
357 }
358 
359 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
360 {
361  TLOG(TLVL_DEBUG) << "Stop Command received." ;
362 
363  timeout_ = timeout;
364  timestamp_ = timestamp;
365  if (requestReceiver_ && requestReceiver_->isRunning()) requestReceiver_->stopRequestReceiverThread();
366 
367  stopNoMutex();
368  should_stop_.store(true);
369  std::unique_lock<std::mutex> lk(mutex_);
370  stop();
371  TLOG(TLVL_DEBUG) << "Stop command complete.";
372 }
373 
374 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
375 {
376  timeout_ = timeout;
377  timestamp_ = timestamp;
378  if (requestReceiver_->isRunning()) requestReceiver_->stopRequestReceiverThread();
379 
380  pauseNoMutex();
381  should_stop_.store(true);
382  std::unique_lock<std::mutex> lk(mutex_);
383 
384  pause();
385 }
386 
387 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
388 {
389  timeout_ = timeout;
390  timestamp_ = timestamp;
391 
392  subrun_number_ += 1;
393  should_stop_ = false;
394 
395  dataBuffer_.clear();
396 
397  // no lock required: thread not started yet
398  resume();
399 
400  std::unique_lock<std::mutex> lk(mutex_);
401  if (useDataThread_) startDataThread();
402  if (useMonitoringThread_) startMonitoringThread();
403  if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
404 }
405 
406 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
407 {
408  std::lock_guard<std::mutex> lk(mutex_);
409 
410  // 14-May-2015, KAB: please see the comments associated with the report()
411  // methods in the CommandableFragmentGenerator.hh file for more information
412  // on the use of those methods in this method.
413 
414  // check if the child class has something meaningful for this request
415  std::string childReport = reportSpecific(which);
416  if (childReport.length() > 0) { return childReport; }
417 
418  // handle the requests that we can take care of at this level
419  if (which == "latest_exception")
420  {
421  return latest_exception_report_;
422  }
423 
424  // check if the child class has provided a catch-all report function
425  childReport = report();
426  if (childReport.length() > 0) { return childReport; }
427 
428  // if we haven't been able to come up with any report so far, say so
429  std::string tmpString = "The \"" + which + "\" command is not ";
430  tmpString.append("currently supported by the ");
431  tmpString.append(metricsReportingInstanceName());
432  tmpString.append(" fragment generator.");
433  return tmpString;
434 }
435 
436 // Default implemenetations of state functions
437 void artdaq::CommandableFragmentGenerator::pauseNoMutex()
438 {
439 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
440 }
441 
442 void artdaq::CommandableFragmentGenerator::pause()
443 {
444 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
445 }
446 
447 void artdaq::CommandableFragmentGenerator::resume()
448 {
449 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
450 }
451 
452 std::string artdaq::CommandableFragmentGenerator::report()
453 {
454 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
455  return "";
456 }
457 
458 std::string artdaq::CommandableFragmentGenerator::reportSpecific(std::string const&)
459 {
460 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
461  return "";
462 }
463 
464 bool artdaq::CommandableFragmentGenerator::checkHWStatus_()
465 {
466 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
467  return true;
468 }
469 
470 bool artdaq::CommandableFragmentGenerator::metaCommand(std::string const&, std::string const&)
471 {
472 #pragma message "Using default implementation of CommandableFragmentGenerator::metaCommand(std::string, std::string)"
473  return true;
474 }
475 
477 {
478  if (dataThread_.joinable()) dataThread_.join();
479  TLOG(TLVL_INFO) << "Starting Data Receiver Thread" ;
480  dataThread_ = boost::thread(&CommandableFragmentGenerator::getDataLoop, this);
481 }
482 
484 {
485  if (monitoringThread_.joinable()) monitoringThread_.join();
486  TLOG(TLVL_INFO) << "Starting Hardware Monitoring Thread" ;
487  monitoringThread_ = boost::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
488 }
489 
491 {
492  switch (mode_)
493  {
494  case RequestMode::Single:
495  return "Single";
496  case RequestMode::Buffer:
497  return "Buffer";
498  case RequestMode::Window:
499  return "Window";
500  case RequestMode::Ignored:
501  return "Ignored";
502  }
503 
504  return "ERROR";
505 }
506 
508 {
509  data_thread_running_ = true;
510  while (!force_stop_)
511  {
512  if (!isHardwareOK_)
513  {
514  TLOG(TLVL_DEBUG) << "getDataLoop: isHardwareOK is " << isHardwareOK_ << ", aborting data thread" ;
515  data_thread_running_ = false;
516  return;
517  }
518 
519  TLOG(13) << "getDataLoop: calling getNext_" ;
520 
521  bool data = false;
522  auto startdata = std::chrono::steady_clock::now();
523 
524  try
525  {
526  data = getNext_(newDataBuffer_);
527  }
528  catch (...)
529  {
530  ExceptionHandler(ExceptionHandlerRethrow::no,
531  "Exception thrown by fragment generator in CommandableFragmentGenerator::getDataLoop; setting exception state to \"true\"");
532  set_exception(true);
533 
534  data_thread_running_ = false;
535  return;
536  }
537  for (auto dataIter = newDataBuffer_.begin(); dataIter != newDataBuffer_.end(); ++dataIter)
538  {
539  TLOG(20) << "getDataLoop: getNext_() returned fragment with sequenceID = " << (*dataIter)->sequenceID()
540  << ", timestamp = " << (*dataIter)->timestamp() << ", and sizeBytes = " << (*dataIter)->sizeBytes();
541  }
542 
543  if (metricMan)
544  {
545  metricMan->sendMetric("Avg Data Acquisition Time", TimeUtils::GetElapsedTime(startdata), "s", 3, artdaq::MetricMode::Average);
546  }
547 
548  if (newDataBuffer_.size() == 0 && sleep_on_no_data_us_ > 0)
549  {
550  usleep(sleep_on_no_data_us_);
551  }
552 
553  TLOG(15) << "Waiting for data buffer ready" ;
554  if (!waitForDataBufferReady()) return;
555  TLOG(15) << "Done waiting for data buffer ready" ;
556 
557  TLOG(13) << "getDataLoop: processing data" ;
558  if (data && !force_stop_)
559  {
560  std::unique_lock<std::mutex> lock(dataBufferMutex_);
561  switch (mode_)
562  {
563  case RequestMode::Single:
564  // While here, if for some strange reason more than one event's worth of data is returned from getNext_...
565  while (newDataBuffer_.size() >= fragment_ids_.size())
566  {
567  dataBuffer_.clear();
568  auto it = newDataBuffer_.begin();
569  std::advance(it, fragment_ids_.size());
570  dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_, newDataBuffer_.begin(), it);
571  }
572  break;
573  case RequestMode::Buffer:
574  case RequestMode::Ignored:
575  case RequestMode::Window:
576  default:
577  //dataBuffer_.reserve(dataBuffer_.size() + newDataBuffer_.size());
578  dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_);
579  break;
580  }
581  getDataBufferStats();
582  }
583 
584  {
585  std::unique_lock<std::mutex> lock(dataBufferMutex_);
586  if (dataBuffer_.size() > 0)
587  {
588  dataCondition_.notify_all();
589  }
590  }
591  if (!data || force_stop_)
592  {
593  TLOG(TLVL_INFO) << "Data flow has stopped. Ending data collection thread" ;
594  data_thread_running_ = false;
595  if (requestReceiver_) requestReceiver_->ClearRequests();
596  dataBuffer_.clear();
597  newDataBuffer_.clear();
598  return;
599  }
600  }
601 }
602 
604 {
605  auto startwait = std::chrono::steady_clock::now();
606  auto first = true;
607  auto lastwaittime = 0ULL;
608  while (dataBufferIsTooLarge())
609  {
610  if (should_stop())
611  {
612  TLOG(TLVL_DEBUG) << "Run ended while waiting for buffer to shrink!" ;
613  std::unique_lock<std::mutex> lock(dataBufferMutex_);
614  getDataBufferStats();
615  dataCondition_.notify_all();
616  data_thread_running_ = false;
617  return false;
618  }
619  auto waittime = TimeUtils::GetElapsedTimeMilliseconds(startwait);
620 
621  if (first || (waittime != lastwaittime && waittime % 1000 == 0))
622  {
623  TLOG(TLVL_WARNING) << "Bad Omen: Data Buffer has exceeded its size limits. "
624  << "(seq_id=" << ev_counter()
625  << ", frags=" << dataBufferDepthFragments_ << "/" << maxDataBufferDepthFragments_
626  << ", szB=" << dataBufferDepthBytes_ << "/" << maxDataBufferDepthBytes_ << ")" ;
627  TLOG(TLVL_TRACE) << "Bad Omen: Possible causes include requests not getting through or Ignored-mode BR issues" ;
628  first = false;
629  }
630  if (waittime % 5 && waittime != lastwaittime)
631  {
632  TLOG(13) << "getDataLoop: Data Retreival paused for " << std::to_string(waittime) << " ms waiting for data buffer to drain" ;
633  }
634  lastwaittime = waittime;
635  usleep(1000);
636  }
637  return true;
638 }
639 
641 {
642  return (maxDataBufferDepthFragments_ > 0 && dataBufferDepthFragments_ > maxDataBufferDepthFragments_) || (maxDataBufferDepthBytes_ > 0 && dataBufferDepthBytes_ > maxDataBufferDepthBytes_);
643 }
644 
646 {
648  dataBufferDepthFragments_ = dataBuffer_.size();
649  size_t acc = 0;
650  TLOG(15) << "getDataBufferStats: Calculating buffer size" ;
651  for (auto i = dataBuffer_.begin(); i != dataBuffer_.end(); ++i)
652  {
653  if (i->get() != nullptr)
654  {
655  acc += (*i)->sizeBytes();
656  }
657  }
658  dataBufferDepthBytes_ = acc;
659 
660  if (metricMan)
661  {
662  TLOG(15) << "getDataBufferStats: Sending Metrics" ;
663  metricMan->sendMetric("Buffer Depth Fragments", dataBufferDepthFragments_.load(), "fragments", 1, MetricMode::LastPoint);
664  metricMan->sendMetric("Buffer Depth Bytes", dataBufferDepthBytes_.load(), "bytes", 1, MetricMode::LastPoint);
665  }
666  TLOG(15) << "getDataBufferStats: frags=" << dataBufferDepthFragments_.load() << "/" << maxDataBufferDepthFragments_
667  << ", sz=" << std::to_string(dataBufferDepthBytes_.load()) << "/" << std::to_string(maxDataBufferDepthBytes_) ;
668 }
669 
671 {
672  std::unique_lock<std::mutex> lock(dataBufferMutex_);
673  dataCondition_.wait_for(lock, std::chrono::milliseconds(10));
674  if (dataBufferDepthFragments_ > 0)
675  {
676  if ((mode_ == RequestMode::Buffer || mode_ == RequestMode::Window))
677  {
678  // Eliminate extra fragments
679  while (dataBufferIsTooLarge())
680  {
681  dataBuffer_.erase(dataBuffer_.begin());
682  getDataBufferStats();
683  }
684  if (dataBuffer_.size() > 0)
685  {
686  TLOG(17) << "Determining if Fragments can be dropped from data buffer" ;
687  Fragment::timestamp_t last = dataBuffer_.back()->timestamp();
688  Fragment::timestamp_t min = last > staleTimeout_ ? last - staleTimeout_ : 0;
689  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
690  {
691  if ((*it)->timestamp() < min)
692  {
693  it = dataBuffer_.erase(it);
694  }
695  else
696  {
697  ++it;
698  }
699  }
700  getDataBufferStats();
701  }
702  }
703  else if (mode_ == RequestMode::Single && dataBuffer_.size() > fragment_ids_.size())
704  {
705  // Eliminate extra fragments
706  while (dataBuffer_.size() > fragment_ids_.size())
707  {
708  dataBuffer_.erase(dataBuffer_.begin());
709  }
710  }
711  }
712 }
713 
715 {
716  while (!force_stop_)
717  {
718  if (should_stop() || monitoringInterval_ <= 0)
719  {
720  TLOG(TLVL_DEBUG) << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
721  << " and monitoringInterval is " << monitoringInterval_ << ", returning" ;
722  return;
723  }
724  TLOG(12) << "getMonitoringDataLoop: Determining whether to call checkHWStatus_" ;
725 
726  auto now = std::chrono::steady_clock::now();
727  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
728  {
729  isHardwareOK_ = checkHWStatus_();
730  TLOG(12) << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ ;
731  lastMonitoringCall_ = now;
732  }
733  usleep(monitoringInterval_ / 10);
734  }
735 }
736 
738 {
739  // We just copy everything that's here into the output.
740  TLOG(9) << "Mode is Ignored; Copying data to output" ;
741  std::move(dataBuffer_.begin(), dataBuffer_.end(), std::inserter(frags, frags.end()));
742  dataBuffer_.clear();
743 }
744 
746 {
747  // We only care about the latest request received. Send empties for all others.
748  auto requests = requestReceiver_->GetRequests();
749  while (requests.size() > 1) {
750  // std::map is ordered by key => Last sequence ID in the map is the one we care about
751  requestReceiver_->RemoveRequest(requests.begin()->first);
752  requests.erase(requests.begin());
753  }
754  sendEmptyFragments(frags, requests);
755 
756  // If no requests remain after sendEmptyFragments, return
757  if (requests.size() == 0 || !requests.count(ev_counter())) return;
758 
759  if (dataBuffer_.size() > 0)
760  {
761  TLOG(9) << "Mode is Single; Sending copy of last event" ;
762  for (auto& fragptr : dataBuffer_)
763  {
764  // Return the latest data point
765  auto frag = fragptr.get();
766  auto newfrag = std::unique_ptr<artdaq::Fragment>(new Fragment(ev_counter(), frag->fragmentID()));
767  newfrag->resize(frag->size() - detail::RawFragmentHeader::num_words());
768  memcpy(newfrag->headerAddress(), frag->headerAddress(), frag->sizeBytes());
769  newfrag->setTimestamp(requests[ev_counter()]);
770  newfrag->setSequenceID(ev_counter());
771  frags.push_back(std::move(newfrag));
772  }
773  }
774  else
775  {
776  sendEmptyFragment(frags, ev_counter(), "No data for");
777  }
778  requestReceiver_->RemoveRequest(ev_counter());
779  ev_counter_inc(1, true);
780 }
781 
783 {
784  // We only care about the latest request received. Send empties for all others.
785  auto requests = requestReceiver_->GetRequests();
786  while (requests.size() > 1) {
787  // std::map is ordered by key => Last sequence ID in the map is the one we care about
788  requestReceiver_->RemoveRequest(requests.begin()->first);
789  requests.erase(requests.begin());
790  }
791  sendEmptyFragments(frags, requests);
792 
793  // If no requests remain after sendEmptyFragments, return
794  if (requests.size() == 0 || !requests.count(ev_counter())) return;
795 
796  TLOG(TLVL_DEBUG) << "Creating ContainerFragment for Buffered Fragments" ;
797  frags.emplace_back(new artdaq::Fragment(ev_counter(), fragment_id()));
798  frags.back()->setTimestamp(requests[ev_counter()]);
799  ContainerFragmentLoader cfl(*frags.back());
800  cfl.set_missing_data(false); // Buffer mode is never missing data, even if there IS no data.
801 
802  // Buffer mode TFGs should simply copy out the whole dataBuffer_ into a ContainerFragment
803  // Window mode TFGs must do a little bit more work to decide which fragments to send for a given request
804  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
805  {
806  TLOG(9) << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" ;
807  cfl.addFragment(*it);
808  it = dataBuffer_.erase(it);
809  }
810  requestReceiver_->RemoveRequest(ev_counter());
811  ev_counter_inc(1, true);
812 }
813 
815 {
816  TLOG(10) << "applyRequestsWindowMode BEGIN";
817  if (!last_window_send_time_set_)
818  {
819  last_window_send_time_ = std::chrono::steady_clock::now();
820  last_window_send_time_set_ = true;
821  }
822 
823  auto requests = requestReceiver_->GetRequests();
824  bool now_have_desired_request = std::any_of(requests.begin(), requests.end(),
825  [this](decltype(requests)::value_type& request) {
826  return request.first == ev_counter(); });
827 
828  if (missing_request_)
829  {
830  if (!now_have_desired_request && TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) > missing_request_window_timeout_us_)
831  {
832  TLOG(TLVL_ERROR) << "Data-taking has paused for " << TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) << " us "
833  << "(> " << std::to_string(missing_request_window_timeout_us_) << " us) while waiting for missing data request messages."
834  << " Sending Empty Fragments for missing requests!" ;
835  sendEmptyFragments(frags, requests);
836 
837  missing_request_ = false;
838  missing_request_time_ = decltype(missing_request_time_)::max();
839  }
840  else if (now_have_desired_request) {
841  missing_request_ = false;
842  missing_request_time_ = decltype(missing_request_time_)::max();
843  }
844  }
845 
846  TLOG(10) << "applyRequestsWindowMode: Starting request processing";
847  for (auto req = requests.begin(); req != requests.end();)
848  {
849  TLOG(10, "CommandableFragmentGenerator") << "applyRequestsWindowMode: processing request with sequence ID " << \
850  req->first << ", timestamp " << req->second;
851 
852 
853  while (req->first < ev_counter() && requests.size() > 0)
854  {
855  TLOG(10) << "applyRequestsWindowMode: Clearing passed request for sequence ID " << req->first;
856  requestReceiver_->RemoveRequest(req->first);
857  req = requests.erase(req);
858  }
859  if (requests.size() == 0) break;
860  if (req->first > ev_counter())
861  {
862  if (!missing_request_)
863  {
864  missing_request_ = true;
865  missing_request_time_ = std::chrono::steady_clock::now();
866  }
867  }
868  auto ts = req->second;
869  TLOG(9) << "ApplyRequests: Checking that data exists for request window " << std::to_string(req->first) ;
870  Fragment::timestamp_t min = ts > windowOffset_ ? ts - windowOffset_ : 0;
871  Fragment::timestamp_t max = min + windowWidth_;
872  TLOG(9) << "ApplyRequests: min is " << std::to_string(min) << ", max is " << std::to_string(max)
873  << " and last point in buffer is " << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0)) << " (sz=" << std::to_string(dataBuffer_.size()) << ")" ;
874  bool windowClosed = dataBuffer_.size() > 0 && dataBuffer_.back()->timestamp() >= max;
875  bool windowTimeout = TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) > window_close_timeout_us_;
876  if (windowTimeout)
877  {
878  TLOG(TLVL_WARNING) << "A timeout occurred waiting for data to close the request window (max=" << std::to_string(max)
879  << ", buffer=" << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0))
880  << " (if no buffer in memory, this is shown as a 0)). Time waiting: "
881  << TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) << " us "
882  << "(> " << std::to_string(window_close_timeout_us_) << " us)." ;
883 
884  if (missing_request_) {
885  TLOG(TLVL_ERROR) << "A Window timeout has occurred while there are pending requests. Sending empties." ;
886  sendEmptyFragments(frags, requests);
887  }
888  }
889  if (windowClosed || !data_thread_running_ || windowTimeout)
890  {
891  TLOG(TLVL_DEBUG) << "Creating ContainerFragment for Buffered or Window-requested Fragments" ;
892  frags.emplace_back(new artdaq::Fragment(req->first, fragment_id()));
893  frags.back()->setTimestamp(ts);
894  ContainerFragmentLoader cfl(*frags.back());
895 
896  if (!windowClosed) cfl.set_missing_data(true);
897  if (dataBuffer_.size() > 0 && dataBuffer_.front()->timestamp() > min)
898  {
899  TLOG(TLVL_DEBUG) << "Request Window covers data that is either before data collection began or has fallen off the end of the buffer" ;
900  cfl.set_missing_data(true);
901  }
902 
903  // Buffer mode TFGs should simply copy out the whole dataBuffer_ into a ContainerFragment
904  // Window mode TFGs must do a little bit more work to decide which fragments to send for a given request
905  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
906  {
907  Fragment::timestamp_t fragT = (*it)->timestamp();
908  if (fragT < min || fragT > max || (fragT == max && windowWidth_ > 0))
909  {
910  ++it;
911  continue;
912  }
913 
914  TLOG(9) << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" ;
915  cfl.addFragment(*it);
916 
917  if (uniqueWindows_)
918  {
919  it = dataBuffer_.erase(it);
920  }
921  else
922  {
923  ++it;
924  }
925  }
926  if (req->first == ev_counter())
927  {
928  ev_counter_inc(1, true);
929  while (windows_sent_ooo_.count(ev_counter()))
930  {
931  TLOG(9) << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" ;
932  windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
933  ev_counter_inc(1, true);
934  }
935  }
936  else
937  {
938  windows_sent_ooo_.insert(req->first);
939  }
940  requestReceiver_->RemoveRequest(req->first);
941  req = requests.erase(req);
942  last_window_send_time_ = std::chrono::steady_clock::now();
943  }
944  else
945  {
946  ++req;
947  }
948  }
949 }
950 
952 {
953  if (check_stop() || exception())
954  {
955  return false;
956  }
957 
958  // Wait for data, if in ignored mode, or a request otherwise
959  if (mode_ == RequestMode::Ignored)
960  {
961  while (dataBufferDepthFragments_ <= 0)
962  {
963  if (check_stop() || exception() || !isHardwareOK_) return false;
964  std::unique_lock<std::mutex> lock(dataBufferMutex_);
965  dataCondition_.wait_for(lock, std::chrono::milliseconds(10), [this]() { return dataBufferDepthFragments_ > 0; });
966  }
967  }
968  else
969  {
970  if ((check_stop() && requestReceiver_->size() == 0) || exception()) return false;
971  checkDataBuffer();
972 
973  // Wait up to 1000 ms for a request...
974  auto counter = 0;
975 
976  while (requestReceiver_->size() == 0 && counter < 100)
977  {
978  if (check_stop() || exception()) return false;
979 
980  checkDataBuffer();
981 
982  requestReceiver_->WaitForRequests(10); // milliseconds
983  counter++;
984  }
985  }
986 
987  {
988  std::unique_lock<std::mutex> dlk(dataBufferMutex_);
989 
990  switch (mode_)
991  {
992  case RequestMode::Single:
993  applyRequestsSingleMode(frags);
994  break;
995  case RequestMode::Window:
996  applyRequestsWindowMode(frags);
997  break;
998  case RequestMode::Buffer:
999  applyRequestsBufferMode(frags);
1000  break;
1001  case RequestMode::Ignored:
1002  default:
1003  applyRequestsIgnoredMode(frags);
1004  break;
1005  }
1006 
1007  getDataBufferStats();
1008  }
1009 
1010  if (frags.size() > 0)
1011  TLOG(9) << "Finished Processing Event " << std::to_string(ev_counter() + 1) << " for fragment_id " << fragment_id() << "." ;
1012  return true;
1013 }
1014 
1015 bool artdaq::CommandableFragmentGenerator::sendEmptyFragment(artdaq::FragmentPtrs& frags, size_t seqId, std::string desc)
1016 {
1017  TLOG(TLVL_WARNING) << desc << " sequence ID " << seqId << ", sending empty fragment" ;
1018  for (auto fid : fragment_ids_)
1019  {
1020  auto frag = new Fragment();
1021  frag->setSequenceID(seqId);
1022  frag->setFragmentID(fid);
1023  frag->setSystemType(Fragment::EmptyFragmentType);
1024  frags.emplace_back(FragmentPtr(frag));
1025  }
1026  return true;
1027 }
1028 
1029 void artdaq::CommandableFragmentGenerator::sendEmptyFragments(artdaq::FragmentPtrs& frags, std::map<Fragment::sequence_id_t, Fragment::timestamp_t>& requests)
1030 {
1031  if (requests.size() == 0 && windows_sent_ooo_.size() == 0) return;
1032 
1033  if (requests.size() > 0) {
1034  TLOG(19) << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << requests.begin()->first ;
1035  while (requests.begin()->first > ev_counter())
1036  {
1037  sendEmptyFragment(frags, ev_counter(), "Missed request for");
1038  ev_counter_inc(1, true);
1039  }
1040  }
1041  else if (windows_sent_ooo_.size() > 0)
1042  {
1043  TLOG(19) << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << *windows_sent_ooo_.begin() ;
1044  while (*windows_sent_ooo_.begin() > ev_counter())
1045  {
1046  sendEmptyFragment(frags, ev_counter(), "Missed request for");
1047  ev_counter_inc(1, true);
1048  }
1049  }
1050  while (windows_sent_ooo_.count(ev_counter()))
1051  {
1052  TLOG(19) << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" ;
1053  windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
1054  ev_counter_inc(1, true);
1055  }
1056 }
int fragment_id() const
Get the current Fragment ID, if there is only one.
void applyRequestsSingleMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Single. Precondition: dataBufferMutex_ and reques...
virtual ~CommandableFragmentGenerator()
CommandableFragmentGenerator Destructor.
void applyRequestsBufferMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Buffer. Precondition: dataBufferMutex_ and reques...
bool sendEmptyFragment(FragmentPtrs &frags, size_t sequenceId, std::string desc)
Send an EmptyFragmentType Fragment.
void getMonitoringDataLoop()
This function regularly calls checkHWStatus_(), and sets the isHardwareOK flag accordingly.
void startDataThread()
Function that launches the data thread (getDataLoop())
std::string ReportCmd(std::string const &which="")
Get a report about a user-specified run-time quantity.
virtual bool metaCommand(std::string const &command, std::string const &arg)
The meta-command is used for implementing user-specific commands in a CommandableFragmentGenerator.
bool dataBufferIsTooLarge()
Test the configured constraints on the data buffer.
void StopCmd(uint64_t timeout, uint64_t timestamp)
Stop the CommandableFragmentGenerator.
void applyRequestsWindowMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Window. Precondition: dataBufferMutex_ and reques...
void StartCmd(int run, uint64_t timeout, uint64_t timestamp)
Start the CommandableFragmentGenerator.
bool check_stop()
Routine used by applyRequests to make sure that all outstanding requests have been fulfilled before r...
void ResumeCmd(uint64_t timeout, uint64_t timestamp)
Resume the CommandableFragmentGenerator.
CommandableFragmentGenerator()
CommandableFragmentGenerator default constructor.
bool getNext(FragmentPtrs &output) overridefinal
getNext calls either applyRequests or getNext_ to get any data that is ready to be sent to the EventB...
bool waitForDataBufferReady()
Wait for the data buffer to drain (dataBufferIsTooLarge returns false), periodically reporting status...
size_t ev_counter_inc(size_t step=1, bool force=false)
Increment the event counter, if the current RequestMode allows it.
void applyRequestsIgnoredMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Ignored. Precondition: dataBufferMutex_ and reque...
void PauseCmd(uint64_t timeout, uint64_t timestamp)
Pause the CommandableFragmentGenerator.
void getDataLoop()
When separate_data_thread is set to true, this loop repeatedly calls getNext_ and adds returned Fragm...
void sendEmptyFragments(FragmentPtrs &frags, std::map< Fragment::sequence_id_t, Fragment::timestamp_t > &requests)
This function is for Buffered and Single request modes, as they can only respond to one data request ...
void startMonitoringThread()
Function that launches the monitoring thread (getMonitoringDataLoop())
void checkDataBuffer()
Perform data buffer pruning operations. If the RequestMode is Single, removes all but the latest Frag...
std::string printMode_()
Return the string representation of the current RequestMode.
void getDataBufferStats()
Calculate the size of the dataBuffer and report appropriate metrics.
bool applyRequests(FragmentPtrs &output)
See if any requests have been received, and add the corresponding data Fragment objects to the output...
void joinThreads()
Join any data-taking threads. Should be called when destructing CommandableFragmentGenerator.