artdaq  v3_00_02
CommandableFragmentGenerator.cc
1 #define TRACE_NAME "CommandableFragmentGenerator"
2 #include "tracemf.h"
3 
4 #include "artdaq/Application/CommandableFragmentGenerator.hh"
5 
6 #include <boost/exception/all.hpp>
7 #include <boost/throw_exception.hpp>
8 
9 #include <limits>
10 #include <iterator>
11 
12 #include "canvas/Utilities/Exception.h"
13 #include "cetlib_except/exception.h"
14 #include "fhiclcpp/ParameterSet.h"
15 
16 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
17 #include "artdaq-core/Data/Fragment.hh"
18 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
19 #include "artdaq-core/Utilities/ExceptionHandler.hh"
20 #include "artdaq-core/Utilities/TimeUtils.hh"
21 
22 #include <fstream>
23 #include <iomanip>
24 #include <iterator>
25 #include <iostream>
26 #include <iomanip>
27 #include <algorithm>
28 #include <sys/poll.h>
30 
32  : mutex_()
33  , requestReceiver_(new RequestReceiver())
34  , windowOffset_(0)
35  , windowWidth_(0)
36  , staleTimeout_(Fragment::InvalidTimestamp)
37  , expectedType_(Fragment::EmptyFragmentType)
38  , maxFragmentCount_(std::numeric_limits<size_t>::max())
39  , uniqueWindows_(true)
40  , missing_request_(true)
41  , missing_request_time_()
42  , last_window_send_time_()
43  , last_window_send_time_set_(false)
44  , windows_sent_ooo_()
45  , missing_request_window_timeout_us_(1000000)
46  , window_close_timeout_us_(2000000)
47  , useDataThread_(false)
48  , sleep_on_no_data_us_(0)
49  , data_thread_running_(false)
50  , dataBufferDepthFragments_(0)
51  , dataBufferDepthBytes_(0)
52  , maxDataBufferDepthFragments_(1000)
53  , maxDataBufferDepthBytes_(1000)
54  , useMonitoringThread_(false)
55  , monitoringInterval_(0)
56  , lastMonitoringCall_()
57  , isHardwareOK_(true)
58  , dataBuffer_()
59  , newDataBuffer_()
60  , run_number_(-1)
61  , subrun_number_(-1)
62  , timeout_(std::numeric_limits<uint64_t>::max())
63  , timestamp_(std::numeric_limits<uint64_t>::max())
64  , should_stop_(false)
65  , exception_(false)
66  , force_stop_(false)
67  , latest_exception_report_("none")
68  , ev_counter_(1)
69  , board_id_(-1)
70  , instance_name_for_metrics_("FragmentGenerator")
71  , sleep_on_stop_us_(0)
72 {}
73 
75  : mutex_()
76  , windowOffset_(ps.get<Fragment::timestamp_t>("request_window_offset", 0))
77  , windowWidth_(ps.get<Fragment::timestamp_t>("request_window_width", 0))
78  , staleTimeout_(ps.get<Fragment::timestamp_t>("stale_request_timeout", 0xFFFFFFFF))
79  , expectedType_(ps.get<Fragment::type_t>("expected_fragment_type", Fragment::type_t(Fragment::EmptyFragmentType)))
80  , uniqueWindows_(ps.get<bool>("request_windows_are_unique", true))
81  , missing_request_(false)
82  , missing_request_time_(decltype(missing_request_time_)::max())
83  , last_window_send_time_(decltype(last_window_send_time_)::max())
84  , last_window_send_time_set_(false)
85  , windows_sent_ooo_()
86  , missing_request_window_timeout_us_(ps.get<size_t>("missing_request_window_timeout_us", 1000000))
87  , window_close_timeout_us_(ps.get<size_t>("window_close_timeout_us", 2000000))
88  , useDataThread_(ps.get<bool>("separate_data_thread", false))
89  , sleep_on_no_data_us_(ps.get<size_t>("sleep_on_no_data_us", 0))
90  , data_thread_running_(false)
91  , dataBufferDepthFragments_(0)
92  , dataBufferDepthBytes_(0)
93  , maxDataBufferDepthFragments_(ps.get<int>("data_buffer_depth_fragments", 1000))
94  , maxDataBufferDepthBytes_(ps.get<size_t>("data_buffer_depth_mb", 1000) * 1024 * 1024)
95  , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
96  , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
97  , lastMonitoringCall_()
98  , isHardwareOK_(true)
99  , dataBuffer_()
100  , newDataBuffer_()
101  , run_number_(-1)
102  , subrun_number_(-1)
103  , timeout_(std::numeric_limits<uint64_t>::max())
104  , timestamp_(std::numeric_limits<uint64_t>::max())
105  , should_stop_(false)
106  , exception_(false)
107  , force_stop_(false)
108  , latest_exception_report_("none")
109  , ev_counter_(1)
110  , board_id_(-1)
111  , sleep_on_stop_us_(0)
112 {
113  board_id_ = ps.get<int>("board_id");
114  instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(board_id_);
115 
116  fragment_ids_ = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
117 
118  TLOG_TRACE("CommandableFragmentGenerator") << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)" << TLOG_ENDL;
119  int fragment_id = ps.get<int>("fragment_id", -99);
120 
121  if (fragment_id != -99)
122  {
123  if (fragment_ids_.size() != 0)
124  {
125  latest_exception_report_ = "Error in CommandableFragmentGenerator: can't both define \"fragment_id\" and \"fragment_ids\" in FHiCL document";
126  throw cet::exception(latest_exception_report_);
127  }
128  else
129  {
130  fragment_ids_.emplace_back(fragment_id);
131  }
132  }
133 
134  sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
135 
136  dataBuffer_.emplace_back(FragmentPtr(new Fragment()));
137  (*dataBuffer_.begin())->setSystemType(Fragment::EmptyFragmentType);
138 
139  std::string modeString = ps.get<std::string>("request_mode", "ignored");
140  if (modeString == "single" || modeString == "Single")
141  {
142  mode_ = RequestMode::Single;
143  }
144  else if (modeString.find("buffer") != std::string::npos || modeString.find("Buffer") != std::string::npos)
145  {
146  mode_ = RequestMode::Buffer;
147  }
148  else if (modeString == "window" || modeString == "Window")
149  {
150  mode_ = RequestMode::Window;
151  }
152  else if (modeString.find("ignore") != std::string::npos || modeString.find("Ignore") != std::string::npos)
153  {
154  mode_ = RequestMode::Ignored;
155  }
156  TLOG_DEBUG("CommandableFragmentGenerator") << "Request mode is " << printMode_() << TLOG_ENDL;
157 
158  if (mode_ != RequestMode::Ignored)
159  {
160  if (!useDataThread_)
161  {
162  latest_exception_report_ = "Error in CommandableFragmentGenerator: use_data_thread must be true when request_mode is not \"Ignored\"!";
163  throw cet::exception(latest_exception_report_);
164  }
165  requestReceiver_.reset(new RequestReceiver(ps));
166  }
167 }
168 
170 {
171  joinThreads();
172 }
173 
175 {
176  should_stop_ = true;
177  force_stop_ = true;
178  TLOG_DEBUG("CommandableFragmentGenerator") << "Joining dataThread" << TLOG_ENDL;
179  if (dataThread_.joinable()) dataThread_.join();
180  TLOG_DEBUG("CommandableFragmentGenerator") << "Joining monitoringThread" << TLOG_ENDL;
181  if (monitoringThread_.joinable()) monitoringThread_.join();
182  requestReceiver_.reset(nullptr);
183 }
184 
186 {
187  bool result = true;
188 
189  if (check_stop()) usleep(sleep_on_stop_us_);
190  if (exception() || force_stop_) return false;
191 
192  if (!useMonitoringThread_ && monitoringInterval_ > 0)
193  {
194  TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: Checking whether to collect Monitoring Data" << TLOG_ENDL;
195  auto now = std::chrono::steady_clock::now();
196 
197  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
198  {
199  TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: Collecting Monitoring Data" << TLOG_ENDL;
200  isHardwareOK_ = checkHWStatus_();
201  TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ << TLOG_ENDL;
202  lastMonitoringCall_ = now;
203  }
204  }
205 
206  try
207  {
208  std::lock_guard<std::mutex> lk(mutex_);
209  if (useDataThread_)
210  {
211  TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Calling applyRequests" << TLOG_ENDL;
212  result = applyRequests(output);
213  TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Done with applyRequests" << TLOG_ENDL;
214 
215  if (exception())
216  {
217  throw cet::exception("CommandableFragmentGenerator") << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
218  }
219  }
220  else
221  {
222  if (!isHardwareOK_)
223  {
224  TLOG_ERROR("CommandableFragmentGenerator") << "Stopping CFG because the hardware reports bad status!" << TLOG_ENDL;
225  return false;
226  }
227  TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Calling getNext_ " << std::to_string(ev_counter()) << TLOG_ENDL;
228  try
229  {
230  result = getNext_(output);
231  }
232  catch (...)
233  {
234  throw;
235  }
236  TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Done with getNext_ " << std::to_string(ev_counter()) << TLOG_ENDL;
237  }
238  }
239  catch (const cet::exception& e)
240  {
241  latest_exception_report_ = "cet::exception caught in getNext(): ";
242  latest_exception_report_.append(e.what());
243  TLOG_ERROR("CommandableFragmentGenerator") << "getNext: cet::exception caught: " << e << TLOG_ENDL;
244  set_exception(true);
245  return false;
246  }
247  catch (const boost::exception& e)
248  {
249  latest_exception_report_ = "boost::exception caught in getNext(): ";
250  latest_exception_report_.append(boost::diagnostic_information(e));
251  TLOG_ERROR("CommandableFragmentGenerator") << "getNext: boost::exception caught: " << boost::diagnostic_information(e) << TLOG_ENDL;
252  set_exception(true);
253  return false;
254  }
255  catch (const std::exception& e)
256  {
257  latest_exception_report_ = "std::exception caught in getNext(): ";
258  latest_exception_report_.append(e.what());
259  TLOG_ERROR("CommandableFragmentGenerator") << "getNext: std::exception caught: " << e.what() << TLOG_ENDL;
260  set_exception(true);
261  return false;
262  }
263  catch (...)
264  {
265  latest_exception_report_ = "Unknown exception caught in getNext().";
266  TLOG_ERROR("CommandableFragmentGenerator") << "getNext: unknown exception caught" << TLOG_ENDL;
267  set_exception(true);
268  return false;
269  }
270 
271  if (!result)
272  {
273  TLOG_DEBUG("getNext") << "stopped " << TLOG_ENDL;
274  }
275 
276  return result;
277 }
278 
280 {
281  TLOG_ARB(14, "CommandableFragmentGeneraotr") << "CFG::check_stop: should_stop=" << should_stop() << ", useDataThread_=" << useDataThread_ << ", exception status =" << int(exception()) << TLOG_ENDL;
282 
283  if (!should_stop()) return false;
284  if (!useDataThread_ || mode_ == RequestMode::Ignored) return true;
285  if (force_stop_) return true;
286 
287  // check_stop returns true if the CFG should stop. We should wait for the RequestReceiver to stop before stopping.
288  return !requestReceiver_->isRunning();
289 }
290 
292 {
293  if (fragment_ids_.size() != 1)
294  {
295  throw cet::exception("Error in CommandableFragmentGenerator: can't call fragment_id() unless member fragment_ids_ vector is length 1");
296  }
297  else
298  {
299  return fragment_ids_[0];
300  }
301 }
302 
304 {
305  if (force || mode_ == RequestMode::Ignored)
306  {
307  return ev_counter_.fetch_add(step);
308  }
309  return ev_counter_.load();
310 } // returns the prev value
311 
312 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
313 {
314  if (run < 0) throw cet::exception("CommandableFragmentGenerator") << "negative run number";
315 
316  timeout_ = timeout;
317  timestamp_ = timestamp;
318  ev_counter_.store(1);
319  should_stop_.store(false);
320  exception_.store(false);
321  run_number_ = run;
322  subrun_number_ = 1;
323  latest_exception_report_ = "none";
324  dataBuffer_.clear();
325  last_window_send_time_set_ = false;
326 
327  start();
328 
329  std::unique_lock<std::mutex> lk(mutex_);
330  if (useDataThread_) startDataThread();
331  if (useMonitoringThread_) startMonitoringThread();
332  if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
333 }
334 
335 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
336 {
337  TLOG_DEBUG("CommandableFragmentGenerator") << "Stop Command received." << TLOG_ENDL;
338 
339  timeout_ = timeout;
340  timestamp_ = timestamp;
341 
342  stopNoMutex();
343  should_stop_.store(true);
344  std::unique_lock<std::mutex> lk(mutex_);
345  stop();
346 }
347 
348 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
349 {
350  timeout_ = timeout;
351  timestamp_ = timestamp;
352 
353  pauseNoMutex();
354  should_stop_.store(true);
355  std::unique_lock<std::mutex> lk(mutex_);
356 
357  pause();
358 }
359 
360 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
361 {
362  timeout_ = timeout;
363  timestamp_ = timestamp;
364 
365  subrun_number_ += 1;
366  should_stop_ = false;
367 
368  dataBuffer_.clear();
369 
370  // no lock required: thread not started yet
371  resume();
372 
373  std::unique_lock<std::mutex> lk(mutex_);
374  if (useDataThread_) startDataThread();
375  if (useMonitoringThread_) startMonitoringThread();
376  if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
377 }
378 
379 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
380 {
381  std::lock_guard<std::mutex> lk(mutex_);
382 
383  // 14-May-2015, KAB: please see the comments associated with the report()
384  // methods in the CommandableFragmentGenerator.hh file for more information
385  // on the use of those methods in this method.
386 
387  // check if the child class has something meaningful for this request
388  std::string childReport = reportSpecific(which);
389  if (childReport.length() > 0) { return childReport; }
390 
391  // handle the requests that we can take care of at this level
392  if (which == "latest_exception")
393  {
394  return latest_exception_report_;
395  }
396 
397  // check if the child class has provided a catch-all report function
398  childReport = report();
399  if (childReport.length() > 0) { return childReport; }
400 
401  // if we haven't been able to come up with any report so far, say so
402  std::string tmpString = "The \"" + which + "\" command is not ";
403  tmpString.append("currently supported by the ");
404  tmpString.append(metricsReportingInstanceName());
405  tmpString.append(" fragment generator.");
406  return tmpString;
407 }
408 
409 // Default implemenetations of state functions
410 void artdaq::CommandableFragmentGenerator::pauseNoMutex()
411 {
412 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
413 }
414 
415 void artdaq::CommandableFragmentGenerator::pause()
416 {
417 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
418 }
419 
420 void artdaq::CommandableFragmentGenerator::resume()
421 {
422 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
423 }
424 
425 std::string artdaq::CommandableFragmentGenerator::report()
426 {
427 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
428  return "";
429 }
430 
431 std::string artdaq::CommandableFragmentGenerator::reportSpecific(std::string const&)
432 {
433 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
434  return "";
435 }
436 
437 bool artdaq::CommandableFragmentGenerator::checkHWStatus_()
438 {
439 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
440  return true;
441 }
442 
444 {
445  if (dataThread_.joinable()) dataThread_.join();
446  TLOG_INFO("CommandableFragmentGenerator") << "Starting Data Receiver Thread" << TLOG_ENDL;
447  dataThread_ = boost::thread(&CommandableFragmentGenerator::getDataLoop, this);
448 }
449 
451 {
452  if (monitoringThread_.joinable()) monitoringThread_.join();
453  TLOG_INFO("CommandableFragmentGenerator") << "Starting Hardware Monitoring Thread" << TLOG_ENDL;
454  monitoringThread_ = boost::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
455 }
456 
458 {
459  switch (mode_)
460  {
461  case RequestMode::Single:
462  return "Single";
463  case RequestMode::Buffer:
464  return "Buffer";
465  case RequestMode::Window:
466  return "Window";
467  case RequestMode::Ignored:
468  return "Ignored";
469  }
470 
471  return "ERROR";
472 }
473 
475 {
476  data_thread_running_ = true;
477  while (!force_stop_)
478  {
479  if (!isHardwareOK_)
480  {
481  TLOG_DEBUG("CommandableFragmentGenerator") << "getDataLoop: isHardwareOK is " << isHardwareOK_ << ", aborting data thread" << TLOG_ENDL;
482  data_thread_running_ = false;
483  return;
484  }
485 
486  TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: calling getNext_" << TLOG_ENDL;
487 
488  bool data = false;
489  auto startdata = std::chrono::steady_clock::now();
490 
491  try
492  {
493  data = getNext_(newDataBuffer_);
494  }
495  catch (...)
496  {
497  ExceptionHandler(ExceptionHandlerRethrow::no,
498  "Exception thrown by fragment generator in CommandableFragmentGenerator::getDataLoop; setting exception state to \"true\"");
499  set_exception(true);
500 
501  data_thread_running_ = false;
502  return;
503  }
504 
505  if (metricMan)
506  {
507  metricMan->sendMetric("Avg Data Acquisition Time", TimeUtils::GetElapsedTime(startdata), "s", 3, artdaq::MetricMode::Average);
508  }
509 
510  if (newDataBuffer_.size() == 0 && sleep_on_no_data_us_ > 0)
511  {
512  usleep(sleep_on_no_data_us_);
513  }
514 
515  TLOG_ARB(15, "CommandableFragmentGenerator") << "Waiting for data buffer ready" << TLOG_ENDL;
516  if (!waitForDataBufferReady()) return;
517  TLOG_ARB(15, "CommandableFragmentGenerator") << "Done waiting for data buffer ready" << TLOG_ENDL;
518 
519  TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: processing data" << TLOG_ENDL;
520  if (data && !force_stop_)
521  {
522  std::unique_lock<std::mutex> lock(dataBufferMutex_);
523  switch (mode_)
524  {
525  case RequestMode::Single:
526  // While here, if for some strange reason more than one event's worth of data is returned from getNext_...
527  while (newDataBuffer_.size() >= fragment_ids_.size())
528  {
529  dataBuffer_.clear();
530  auto it = newDataBuffer_.begin();
531  std::advance(it, fragment_ids_.size());
532  dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_, newDataBuffer_.begin(), it);
533  }
534  break;
535  case RequestMode::Buffer:
536  case RequestMode::Ignored:
537  case RequestMode::Window:
538  default:
539  //dataBuffer_.reserve(dataBuffer_.size() + newDataBuffer_.size());
540  dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_);
541  break;
542  }
543  getDataBufferStats();
544  }
545 
546  {
547  std::unique_lock<std::mutex> lock(dataBufferMutex_);
548  if (dataBuffer_.size() > 0)
549  {
550  dataCondition_.notify_all();
551  }
552  }
553  if (!data || force_stop_)
554  {
555  TLOG_INFO("CommandableFragmentGenerator") << "Data flow has stopped. Ending data collection thread" << TLOG_ENDL;
556  data_thread_running_ = false;
557  return;
558  }
559  }
560 }
561 
563 {
564  auto startwait = std::chrono::steady_clock::now();
565  auto first = true;
566  auto lastwaittime = 0ULL;
567  while (dataBufferIsTooLarge())
568  {
569  if (should_stop())
570  {
571  TLOG_DEBUG("CommandableFragmentGenerator") << "Run ended while waiting for buffer to shrink!" << TLOG_ENDL;
572  std::unique_lock<std::mutex> lock(dataBufferMutex_);
573  getDataBufferStats();
574  dataCondition_.notify_all();
575  data_thread_running_ = false;
576  return false;
577  }
578  auto waittime = TimeUtils::GetElapsedTimeMilliseconds(startwait);
579 
580  if (first || (waittime != lastwaittime && waittime % 1000 == 0))
581  {
582  TLOG_WARNING("CommandableFragmentGenerator") << "Bad Omen: Data Buffer has exceeded its size limits. "
583  << "(seq_id=" << ev_counter()
584  << ", frags=" << dataBufferDepthFragments_ << "/" << maxDataBufferDepthFragments_
585  << ", szB=" << dataBufferDepthBytes_ << "/" << maxDataBufferDepthBytes_ << ")" << TLOG_ENDL;
586  TLOG_TRACE("CommandableFragmentGenerator") << "Bad Omen: Possible causes include requests not getting through or Ignored-mode BR issues" << TLOG_ENDL;
587  first = false;
588  }
589  if (waittime % 5 && waittime != lastwaittime)
590  {
591  TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: Data Retreival paused for " << std::to_string(waittime) << " ms waiting for data buffer to drain" << TLOG_ENDL;
592  }
593  lastwaittime = waittime;
594  usleep(1000);
595  }
596  return true;
597 }
598 
600 {
601  return (maxDataBufferDepthFragments_ > 0 && dataBufferDepthFragments_ > maxDataBufferDepthFragments_) || (maxDataBufferDepthBytes_ > 0 && dataBufferDepthBytes_ > maxDataBufferDepthBytes_);
602 }
603 
605 {
607  dataBufferDepthFragments_ = dataBuffer_.size();
608  size_t acc = 0;
609  TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: Calculating buffer size" << TLOG_ENDL;
610  for (auto i = dataBuffer_.begin(); i != dataBuffer_.end(); ++i)
611  {
612  if (i->get() != nullptr)
613  {
614  acc += (*i)->sizeBytes();
615  }
616  }
617  dataBufferDepthBytes_ = acc;
618 
619  if (metricMan)
620  {
621  TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: Sending Metrics" << TLOG_ENDL;
622  metricMan->sendMetric("Buffer Depth Fragments", dataBufferDepthFragments_.load(), "fragments", 1, MetricMode::LastPoint);
623  metricMan->sendMetric("Buffer Depth Bytes", dataBufferDepthBytes_.load(), "bytes", 1, MetricMode::LastPoint);
624  }
625  TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: frags=" << dataBufferDepthFragments_.load() << "/" << maxDataBufferDepthFragments_
626  << ", sz=" << std::to_string(dataBufferDepthBytes_.load()) << "/" << std::to_string(maxDataBufferDepthBytes_) << TLOG_ENDL;
627 }
628 
630 {
631  std::unique_lock<std::mutex> lock(dataBufferMutex_);
632  dataCondition_.wait_for(lock, std::chrono::milliseconds(10));
633  if (dataBufferDepthFragments_ > 0)
634  {
635  if ((mode_ == RequestMode::Buffer || mode_ == RequestMode::Window))
636  {
637  // Eliminate extra fragments
638  while (dataBufferIsTooLarge())
639  {
640  dataBuffer_.erase(dataBuffer_.begin());
641  getDataBufferStats();
642  }
643  if (dataBuffer_.size() > 0)
644  {
645  TLOG_ARB(17, "CommandableFragmentGenerator") << "Determining if Fragments can be dropped from data buffer" << TLOG_ENDL;
646  Fragment::timestamp_t last = dataBuffer_.back()->timestamp();
647  Fragment::timestamp_t min = last > staleTimeout_ ? last - staleTimeout_ : 0;
648  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
649  {
650  if ((*it)->timestamp() < min)
651  {
652  it = dataBuffer_.erase(it);
653  }
654  else
655  {
656  ++it;
657  }
658  }
659  getDataBufferStats();
660  }
661  }
662  else if (mode_ == RequestMode::Single && dataBuffer_.size() > fragment_ids_.size())
663  {
664  // Eliminate extra fragments
665  while (dataBuffer_.size() > fragment_ids_.size())
666  {
667  dataBuffer_.erase(dataBuffer_.begin());
668  }
669  }
670  }
671 }
672 
674 {
675  while (!force_stop_)
676  {
677  if (should_stop() || monitoringInterval_ <= 0)
678  {
679  TLOG_DEBUG("CommandableFragmentGenerator") << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
680  << " and monitoringInterval is " << monitoringInterval_ << ", returning" << TLOG_ENDL;
681  return;
682  }
683  TLOG_ARB(12, "CommandableFragmentGenerator") << "getMonitoringDataLoop: Determining whether to call checkHWStatus_" << TLOG_ENDL;
684 
685  auto now = std::chrono::steady_clock::now();
686  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
687  {
688  isHardwareOK_ = checkHWStatus_();
689  TLOG_ARB(12, "CommandableFragmentGenerator") << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ << TLOG_ENDL;
690  lastMonitoringCall_ = now;
691  }
692  usleep(monitoringInterval_ / 10);
693  }
694 }
695 
697 {
698  // We just copy everything that's here into the output.
699  TLOG_ARB(9, "CommandableFragmentGenerator") << "Mode is Ignored; Copying data to output" << TLOG_ENDL;
700  std::move(dataBuffer_.begin(), dataBuffer_.end(), std::inserter(frags, frags.end()));
701  dataBuffer_.clear();
702 }
703 
705 {
706  // We only care about the latest request received. Send empties for all others.
707  auto requests = requestReceiver_->GetRequests();
708  while (requests.size() > 1) {
709  // std::map is ordered by key => Last sequence ID in the map is the one we care about
710  requestReceiver_->RemoveRequest(requests.begin()->first);
711  requests.erase(requests.begin());
712  }
713  sendEmptyFragments(frags, requests);
714 
715  // If no requests remain after sendEmptyFragments, return
716  if (requests.size() == 0 || !requests.count(ev_counter())) return;
717 
718  if (dataBuffer_.size() > 0)
719  {
720  TLOG_ARB(9, "CommandableFragmentGenerator") << "Mode is Single; Sending copy of last event" << TLOG_ENDL;
721  for (auto& fragptr : dataBuffer_)
722  {
723  // Return the latest data point
724  auto frag = fragptr.get();
725  auto newfrag = std::unique_ptr<artdaq::Fragment>(new Fragment(ev_counter(), frag->fragmentID()));
726  newfrag->resize(frag->size() - detail::RawFragmentHeader::num_words());
727  memcpy(newfrag->headerAddress(), frag->headerAddress(), frag->sizeBytes());
728  newfrag->setTimestamp(requests[ev_counter()]);
729  newfrag->setSequenceID(ev_counter());
730  frags.push_back(std::move(newfrag));
731  }
732  }
733  else
734  {
735  sendEmptyFragment(frags, ev_counter(), "No data for");
736  }
737  requestReceiver_->RemoveRequest(ev_counter());
738  ev_counter_inc(1, true);
739 }
740 
742 {
743  // We only care about the latest request received. Send empties for all others.
744  auto requests = requestReceiver_->GetRequests();
745  while (requests.size() > 1) {
746  // std::map is ordered by key => Last sequence ID in the map is the one we care about
747  requestReceiver_->RemoveRequest(requests.begin()->first);
748  requests.erase(requests.begin());
749  }
750  sendEmptyFragments(frags, requests);
751 
752  // If no requests remain after sendEmptyFragments, return
753  if (requests.size() == 0 || !requests.count(ev_counter())) return;
754 
755  TLOG_DEBUG("CommandableFragmentGenerator") << "Creating ContainerFragment for Buffered Fragments" << TLOG_ENDL;
756  frags.emplace_back(new artdaq::Fragment(ev_counter(), fragment_id()));
757  frags.back()->setTimestamp(requests[ev_counter()]);
758  ContainerFragmentLoader cfl(*frags.back());
759  cfl.set_missing_data(false); // Buffer mode is never missing data, even if there IS no data.
760 
761  // Buffer mode TFGs should simply copy out the whole dataBuffer_ into a ContainerFragment
762  // Window mode TFGs must do a little bit more work to decide which fragments to send for a given request
763  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
764  {
765  TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" << TLOG_ENDL;
766  cfl.addFragment(*it);
767  it = dataBuffer_.erase(it);
768  }
769  requestReceiver_->RemoveRequest(ev_counter());
770  ev_counter_inc(1, true);
771 }
772 
774 {
775  TLOG(10) << "applyRequestsWindowMode BEGIN";
776  if (!last_window_send_time_set_)
777  {
778  last_window_send_time_ = std::chrono::steady_clock::now();
779  last_window_send_time_set_ = true;
780  }
781 
782  auto requests = requestReceiver_->GetRequests();
783  bool now_have_desired_request = std::any_of(requests.begin(), requests.end(),
784  [this](decltype(requests)::value_type& request) {
785  return request.first == ev_counter(); });
786 
787  if (missing_request_)
788  {
789  if (!now_have_desired_request && TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) > missing_request_window_timeout_us_)
790  {
791  TLOG_ERROR("CommandableFragmentGenerator") << "Data-taking has paused for " << TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) << " us "
792  << "(> " << std::to_string(missing_request_window_timeout_us_) << " us) while waiting for missing data request messages."
793  << " Sending Empty Fragments for missing requests!" << TLOG_ENDL;
794  sendEmptyFragments(frags, requests);
795 
796  missing_request_ = false;
797  missing_request_time_ = decltype(missing_request_time_)::max();
798  }
799  else if (now_have_desired_request) {
800  missing_request_ = false;
801  missing_request_time_ = decltype(missing_request_time_)::max();
802  }
803  }
804 
805  TLOG(10) << "applyRequestsWindowMode: Starting request processing";
806  for (auto req = requests.begin(); req != requests.end();)
807  {
808  while (req->first < ev_counter() && requests.size() > 0)
809  {
810  TLOG(10) << "applyRequestsWindowMode: Clearing passed request for sequence ID " << req->first;
811  requestReceiver_->RemoveRequest(req->first);
812  req = requests.erase(req);
813  }
814  if (requests.size() == 0) break;
815  if (req->first > ev_counter())
816  {
817  if (!missing_request_)
818  {
819  missing_request_ = true;
820  missing_request_time_ = std::chrono::steady_clock::now();
821  }
822  }
823  auto ts = req->second;
824  TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Checking that data exists for request window " << std::to_string(req->first) << TLOG_ENDL;
825  Fragment::timestamp_t min = ts > windowOffset_ ? ts - windowOffset_ : 0;
826  Fragment::timestamp_t max = min + windowWidth_;
827  TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: min is " << std::to_string(min) << ", max is " << std::to_string(max)
828  << " and last point in buffer is " << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0)) << " (sz=" << std::to_string(dataBuffer_.size()) << ")" << TLOG_ENDL;
829  bool windowClosed = dataBuffer_.size() > 0 && dataBuffer_.back()->timestamp() >= max;
830  bool windowTimeout = TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) > window_close_timeout_us_;
831  if (windowTimeout)
832  {
833  TLOG_WARNING("CommandableFragmentGenerator") << "A timeout occurred waiting for data to close the request window (max=" << std::to_string(max)
834  << ", buffer=" << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0))
835  << " (if no buffer in memory, this is shown as a 0)). Time waiting: "
836  << TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) << " us "
837  << "(> " << std::to_string(window_close_timeout_us_) << " us)." << TLOG_ENDL;
838 
839  if (missing_request_) {
840  TLOG_ERROR("CommandableFragmentGenerator") << "A Window timeout has occurred while there are pending requests. Sending empties." << TLOG_ENDL;
841  sendEmptyFragments(frags, requests);
842  }
843  }
844  if (windowClosed || !data_thread_running_ || windowTimeout)
845  {
846  TLOG_DEBUG("CommandableFragmentGenerator") << "Creating ContainerFragment for Buffered or Window-requested Fragments" << TLOG_ENDL;
847  frags.emplace_back(new artdaq::Fragment(req->first, fragment_id()));
848  frags.back()->setTimestamp(ts);
849  ContainerFragmentLoader cfl(*frags.back());
850 
851  if (!windowClosed) cfl.set_missing_data(true);
852  if (dataBuffer_.size() > 0 && dataBuffer_.front()->timestamp() > min)
853  {
854  TLOG_DEBUG("CommandableFragmentGenerator") << "Request Window covers data that is either before data collection began or has fallen off the end of the buffer" << TLOG_ENDL;
855  cfl.set_missing_data(true);
856  }
857 
858  // Buffer mode TFGs should simply copy out the whole dataBuffer_ into a ContainerFragment
859  // Window mode TFGs must do a little bit more work to decide which fragments to send for a given request
860  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
861  {
862  Fragment::timestamp_t fragT = (*it)->timestamp();
863  if (fragT < min || fragT > max || (fragT == max && windowWidth_ > 0))
864  {
865  ++it;
866  continue;
867  }
868 
869  TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" << TLOG_ENDL;
870  cfl.addFragment(*it);
871 
872  if (uniqueWindows_)
873  {
874  it = dataBuffer_.erase(it);
875  }
876  else
877  {
878  ++it;
879  }
880  }
881  if (req->first == ev_counter())
882  {
883  ev_counter_inc(1, true);
884  while (windows_sent_ooo_.count(ev_counter()))
885  {
886  TLOG_ARB(9, "CommandableFragmentGenerator") << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" << TLOG_ENDL;
887  windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
888  ev_counter_inc(1, true);
889  }
890  }
891  else
892  {
893  windows_sent_ooo_.insert(req->first);
894  }
895  requestReceiver_->RemoveRequest(req->first);
896  req = requests.erase(req);
897  last_window_send_time_ = std::chrono::steady_clock::now();
898  }
899  else
900  {
901  ++req;
902  }
903  }
904 }
905 
907 {
908  if (check_stop() || exception())
909  {
910  return false;
911  }
912 
913  // Wait for data, if in ignored mode, or a request otherwise
914  if (mode_ == RequestMode::Ignored)
915  {
916  while (dataBufferDepthFragments_ <= 0)
917  {
918  if (check_stop() || exception() || !isHardwareOK_) return false;
919  std::unique_lock<std::mutex> lock(dataBufferMutex_);
920  dataCondition_.wait_for(lock, std::chrono::milliseconds(10), [this]() { return dataBufferDepthFragments_ > 0; });
921  }
922  }
923  else
924  {
925  if ((check_stop() && requestReceiver_->size() == 0) || exception()) return false;
926  checkDataBuffer();
927 
928  // Wait up to 1000 ms for a request...
929  auto counter = 0;
930 
931  while (requestReceiver_->size() == 0 && counter < 100)
932  {
933  if (check_stop() || exception()) return false;
934 
935  checkDataBuffer();
936 
937  requestReceiver_->WaitForRequests(10); // milliseconds
938  counter++;
939  }
940  }
941 
942  {
943  std::unique_lock<std::mutex> dlk(dataBufferMutex_);
944 
945  switch (mode_)
946  {
947  case RequestMode::Single:
948  applyRequestsSingleMode(frags);
949  break;
950  case RequestMode::Window:
951  applyRequestsWindowMode(frags);
952  break;
953  case RequestMode::Buffer:
954  applyRequestsBufferMode(frags);
955  break;
956  case RequestMode::Ignored:
957  default:
958  applyRequestsIgnoredMode(frags);
959  break;
960  }
961 
962  getDataBufferStats();
963  }
964 
965  if (frags.size() > 0)
966  TLOG_ARB(9, "CommandableFragmentGenerator") << "Finished Processing Event " << std::to_string(ev_counter() + 1) << " for fragment_id " << fragment_id() << "." << TLOG_ENDL;
967  return true;
968 }
969 
970 bool artdaq::CommandableFragmentGenerator::sendEmptyFragment(artdaq::FragmentPtrs& frags, size_t seqId, std::string desc)
971 {
972  TLOG_WARNING("CommandableFragmentGenerator") << desc << " sequence ID " << seqId << ", sending empty fragment" << TLOG_ENDL;
973  for (auto fid : fragment_ids_)
974  {
975  auto frag = new Fragment();
976  frag->setSequenceID(seqId);
977  frag->setFragmentID(fid);
978  frag->setSystemType(Fragment::EmptyFragmentType);
979  frags.emplace_back(FragmentPtr(frag));
980  }
981  return true;
982 }
983 
984 void artdaq::CommandableFragmentGenerator::sendEmptyFragments(artdaq::FragmentPtrs& frags, std::map<Fragment::sequence_id_t, Fragment::timestamp_t>& requests)
985 {
986  if (requests.size() == 0 && windows_sent_ooo_.size() == 0) return;
987 
988  if (requests.size() > 0) {
989  TLOG_ARB(19, "CommandableFragmentGenerator") << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << requests.begin()->first << TLOG_ENDL;
990  while (requests.begin()->first > ev_counter())
991  {
992  sendEmptyFragment(frags, ev_counter(), "Missed request for");
993  ev_counter_inc(1, true);
994  }
995  }
996  else if (windows_sent_ooo_.size() > 0)
997  {
998  TLOG_ARB(19, "CommandableFragmentGenerator") << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << *windows_sent_ooo_.begin() << TLOG_ENDL;
999  while (*windows_sent_ooo_.begin() > ev_counter())
1000  {
1001  sendEmptyFragment(frags, ev_counter(), "Missed request for");
1002  ev_counter_inc(1, true);
1003  }
1004  }
1005  while (windows_sent_ooo_.count(ev_counter()))
1006  {
1007  TLOG_ARB(19, "CommandableFragmentGenerator") << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" << TLOG_ENDL;
1008  windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
1009  ev_counter_inc(1, true);
1010  }
1011 }
int fragment_id() const
Get the current Fragment ID, if there is only one.
void applyRequestsSingleMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Single. Precondition: dataBufferMutex_ and reques...
virtual ~CommandableFragmentGenerator()
CommandableFragmentGenerator Destructor.
void applyRequestsBufferMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Buffer. Precondition: dataBufferMutex_ and reques...
bool sendEmptyFragment(FragmentPtrs &frags, size_t sequenceId, std::string desc)
Send an EmptyFragmentType Fragment.
void getMonitoringDataLoop()
This function regularly calls checkHWStatus_(), and sets the isHardwareOK flag accordingly.
void startDataThread()
Function that launches the data thread (getDataLoop())
std::string ReportCmd(std::string const &which="")
Get a report about a user-specified run-time quantity.
bool dataBufferIsTooLarge()
Test the configured constraints on the data buffer.
void StopCmd(uint64_t timeout, uint64_t timestamp)
Stop the CommandableFragmentGenerator.
void applyRequestsWindowMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Window. Precondition: dataBufferMutex_ and reques...
void StartCmd(int run, uint64_t timeout, uint64_t timestamp)
Start the CommandableFragmentGenerator.
bool check_stop()
Routine used by applyRequests to make sure that all outstanding requests have been fulfilled before r...
void ResumeCmd(uint64_t timeout, uint64_t timestamp)
Resume the CommandableFragmentGenerator.
CommandableFragmentGenerator()
CommandableFragmentGenerator default constructor.
bool getNext(FragmentPtrs &output) overridefinal
getNext calls either applyRequests or getNext_ to get any data that is ready to be sent to the EventB...
bool waitForDataBufferReady()
Wait for the data buffer to drain (dataBufferIsTooLarge returns false), periodically reporting status...
size_t ev_counter_inc(size_t step=1, bool force=false)
Increment the event counter, if the current RequestMode allows it.
void applyRequestsIgnoredMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Ignored. Precondition: dataBufferMutex_ and reque...
void PauseCmd(uint64_t timeout, uint64_t timestamp)
Pause the CommandableFragmentGenerator.
void getDataLoop()
When separate_data_thread is set to true, this loop repeatedly calls getNext_ and adds returned Fragm...
void sendEmptyFragments(FragmentPtrs &frags, std::map< Fragment::sequence_id_t, Fragment::timestamp_t > &requests)
This function is for Buffered and Single request modes, as they can only respond to one data request ...
void startMonitoringThread()
Function that launches the monitoring thread (getMonitoringDataLoop())
void checkDataBuffer()
Perform data buffer pruning operations. If the RequestMode is Single, removes all but the latest Frag...
std::string printMode_()
Return the string representation of the current RequestMode.
void getDataBufferStats()
Calculate the size of the dataBuffer and report appropriate metrics.
bool applyRequests(FragmentPtrs &output)
See if any requests have been received, and add the corresponding data Fragment objects to the output...
void joinThreads()
Join any data-taking threads. Should be called when destructing CommandableFragmentGenerator.