artdaq  v3_00_03
CommandableFragmentGenerator.cc
1 #define TRACE_NAME "CommandableFragmentGenerator"
2 #include "tracemf.h"
3 
4 #include "artdaq/Application/CommandableFragmentGenerator.hh"
5 
6 #include <boost/exception/all.hpp>
7 #include <boost/throw_exception.hpp>
8 
9 #include <limits>
10 #include <iterator>
11 
12 #include "canvas/Utilities/Exception.h"
13 #include "cetlib_except/exception.h"
14 #include "fhiclcpp/ParameterSet.h"
15 
16 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
17 #include "artdaq-core/Data/Fragment.hh"
18 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
19 #include "artdaq-core/Utilities/ExceptionHandler.hh"
20 #include "artdaq-core/Utilities/TimeUtils.hh"
21 
22 #include <fstream>
23 #include <iomanip>
24 #include <iterator>
25 #include <iostream>
26 #include <iomanip>
27 #include <algorithm>
28 #include <sys/poll.h>
30 
32  : mutex_()
33  , requestReceiver_(new RequestReceiver())
34  , windowOffset_(0)
35  , windowWidth_(0)
36  , staleTimeout_(Fragment::InvalidTimestamp)
37  , expectedType_(Fragment::EmptyFragmentType)
38  , maxFragmentCount_(std::numeric_limits<size_t>::max())
39  , uniqueWindows_(true)
40  , missing_request_(true)
41  , missing_request_time_()
42  , last_window_send_time_()
43  , last_window_send_time_set_(false)
44  , windows_sent_ooo_()
45  , missing_request_window_timeout_us_(1000000)
46  , window_close_timeout_us_(2000000)
47  , useDataThread_(false)
48  , sleep_on_no_data_us_(0)
49  , data_thread_running_(false)
50  , dataBufferDepthFragments_(0)
51  , dataBufferDepthBytes_(0)
52  , maxDataBufferDepthFragments_(1000)
53  , maxDataBufferDepthBytes_(1000)
54  , useMonitoringThread_(false)
55  , monitoringInterval_(0)
56  , lastMonitoringCall_()
57  , isHardwareOK_(true)
58  , dataBuffer_()
59  , newDataBuffer_()
60  , run_number_(-1)
61  , subrun_number_(-1)
62  , timeout_(std::numeric_limits<uint64_t>::max())
63  , timestamp_(std::numeric_limits<uint64_t>::max())
64  , should_stop_(false)
65  , exception_(false)
66  , force_stop_(false)
67  , latest_exception_report_("none")
68  , ev_counter_(1)
69  , board_id_(-1)
70  , instance_name_for_metrics_("FragmentGenerator")
71  , sleep_on_stop_us_(0)
72 {}
73 
75  : mutex_()
76  , windowOffset_(ps.get<Fragment::timestamp_t>("request_window_offset", 0))
77  , windowWidth_(ps.get<Fragment::timestamp_t>("request_window_width", 0))
78  , staleTimeout_(ps.get<Fragment::timestamp_t>("stale_request_timeout", 0xFFFFFFFF))
79  , expectedType_(ps.get<Fragment::type_t>("expected_fragment_type", Fragment::type_t(Fragment::EmptyFragmentType)))
80  , uniqueWindows_(ps.get<bool>("request_windows_are_unique", true))
81  , missing_request_(false)
82  , missing_request_time_(decltype(missing_request_time_)::max())
83  , last_window_send_time_(decltype(last_window_send_time_)::max())
84  , last_window_send_time_set_(false)
85  , windows_sent_ooo_()
86  , missing_request_window_timeout_us_(ps.get<size_t>("missing_request_window_timeout_us", 1000000))
87  , window_close_timeout_us_(ps.get<size_t>("window_close_timeout_us", 2000000))
88  , useDataThread_(ps.get<bool>("separate_data_thread", false))
89  , sleep_on_no_data_us_(ps.get<size_t>("sleep_on_no_data_us", 0))
90  , data_thread_running_(false)
91  , dataBufferDepthFragments_(0)
92  , dataBufferDepthBytes_(0)
93  , maxDataBufferDepthFragments_(ps.get<int>("data_buffer_depth_fragments", 1000))
94  , maxDataBufferDepthBytes_(ps.get<size_t>("data_buffer_depth_mb", 1000) * 1024 * 1024)
95  , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
96  , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
97  , lastMonitoringCall_()
98  , isHardwareOK_(true)
99  , dataBuffer_()
100  , newDataBuffer_()
101  , run_number_(-1)
102  , subrun_number_(-1)
103  , timeout_(std::numeric_limits<uint64_t>::max())
104  , timestamp_(std::numeric_limits<uint64_t>::max())
105  , should_stop_(false)
106  , exception_(false)
107  , force_stop_(false)
108  , latest_exception_report_("none")
109  , ev_counter_(1)
110  , board_id_(-1)
111  , sleep_on_stop_us_(0)
112 {
113  board_id_ = ps.get<int>("board_id");
114  instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(board_id_);
115 
116  fragment_ids_ = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
117 
118  TLOG_TRACE("CommandableFragmentGenerator") << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)" << TLOG_ENDL;
119  int fragment_id = ps.get<int>("fragment_id", -99);
120 
121  if (fragment_id != -99)
122  {
123  if (fragment_ids_.size() != 0)
124  {
125  latest_exception_report_ = "Error in CommandableFragmentGenerator: can't both define \"fragment_id\" and \"fragment_ids\" in FHiCL document";
126  throw cet::exception(latest_exception_report_);
127  }
128  else
129  {
130  fragment_ids_.emplace_back(fragment_id);
131  }
132  }
133 
134  sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
135 
136  dataBuffer_.emplace_back(FragmentPtr(new Fragment()));
137  (*dataBuffer_.begin())->setSystemType(Fragment::EmptyFragmentType);
138 
139  std::string modeString = ps.get<std::string>("request_mode", "ignored");
140  if (modeString == "single" || modeString == "Single")
141  {
142  mode_ = RequestMode::Single;
143  }
144  else if (modeString.find("buffer") != std::string::npos || modeString.find("Buffer") != std::string::npos)
145  {
146  mode_ = RequestMode::Buffer;
147  }
148  else if (modeString == "window" || modeString == "Window")
149  {
150  mode_ = RequestMode::Window;
151  }
152  else if (modeString.find("ignore") != std::string::npos || modeString.find("Ignore") != std::string::npos)
153  {
154  mode_ = RequestMode::Ignored;
155  }
156  TLOG_DEBUG("CommandableFragmentGenerator") << "Request mode is " << printMode_() << TLOG_ENDL;
157 
158  if (mode_ != RequestMode::Ignored)
159  {
160  if (!useDataThread_)
161  {
162  latest_exception_report_ = "Error in CommandableFragmentGenerator: use_data_thread must be true when request_mode is not \"Ignored\"!";
163  throw cet::exception(latest_exception_report_);
164  }
165  requestReceiver_.reset(new RequestReceiver(ps));
166  }
167 }
168 
170 {
171  joinThreads();
172 }
173 
175 {
176  should_stop_ = true;
177  force_stop_ = true;
178  TLOG_DEBUG("CommandableFragmentGenerator") << "Joining dataThread" << TLOG_ENDL;
179  if (dataThread_.joinable()) dataThread_.join();
180  TLOG_DEBUG("CommandableFragmentGenerator") << "Joining monitoringThread" << TLOG_ENDL;
181  if (monitoringThread_.joinable()) monitoringThread_.join();
182  requestReceiver_.reset(nullptr);
183 }
184 
186 {
187  bool result = true;
188 
189  if (check_stop()) usleep(sleep_on_stop_us_);
190  if (exception() || force_stop_) return false;
191 
192  if (!useMonitoringThread_ && monitoringInterval_ > 0)
193  {
194  TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: Checking whether to collect Monitoring Data" << TLOG_ENDL;
195  auto now = std::chrono::steady_clock::now();
196 
197  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
198  {
199  TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: Collecting Monitoring Data" << TLOG_ENDL;
200  isHardwareOK_ = checkHWStatus_();
201  TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ << TLOG_ENDL;
202  lastMonitoringCall_ = now;
203  }
204  }
205 
206  try
207  {
208  std::lock_guard<std::mutex> lk(mutex_);
209  if (useDataThread_)
210  {
211  TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Calling applyRequests" << TLOG_ENDL;
212  result = applyRequests(output);
213  TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Done with applyRequests" << TLOG_ENDL;
214 
215  if (exception())
216  {
217  throw cet::exception("CommandableFragmentGenerator") << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
218  }
219  }
220  else
221  {
222  if (!isHardwareOK_)
223  {
224  TLOG_ERROR("CommandableFragmentGenerator") << "Stopping CFG because the hardware reports bad status!" << TLOG_ENDL;
225  return false;
226  }
227  TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Calling getNext_ " << std::to_string(ev_counter()) << TLOG_ENDL;
228  try
229  {
230  result = getNext_(output);
231  }
232  catch (...)
233  {
234  throw;
235  }
236  TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Done with getNext_ " << std::to_string(ev_counter()) << TLOG_ENDL;
237  }
238  }
239  catch (const cet::exception& e)
240  {
241  latest_exception_report_ = "cet::exception caught in getNext(): ";
242  latest_exception_report_.append(e.what());
243  TLOG_ERROR("CommandableFragmentGenerator") << "getNext: cet::exception caught: " << e << TLOG_ENDL;
244  set_exception(true);
245  return false;
246  }
247  catch (const boost::exception& e)
248  {
249  latest_exception_report_ = "boost::exception caught in getNext(): ";
250  latest_exception_report_.append(boost::diagnostic_information(e));
251  TLOG_ERROR("CommandableFragmentGenerator") << "getNext: boost::exception caught: " << boost::diagnostic_information(e) << TLOG_ENDL;
252  set_exception(true);
253  return false;
254  }
255  catch (const std::exception& e)
256  {
257  latest_exception_report_ = "std::exception caught in getNext(): ";
258  latest_exception_report_.append(e.what());
259  TLOG_ERROR("CommandableFragmentGenerator") << "getNext: std::exception caught: " << e.what() << TLOG_ENDL;
260  set_exception(true);
261  return false;
262  }
263  catch (...)
264  {
265  latest_exception_report_ = "Unknown exception caught in getNext().";
266  TLOG_ERROR("CommandableFragmentGenerator") << "getNext: unknown exception caught" << TLOG_ENDL;
267  set_exception(true);
268  return false;
269  }
270 
271  if (!result)
272  {
273  TLOG_DEBUG("getNext") << "stopped " << TLOG_ENDL;
274  }
275 
276  return result;
277 }
278 
280 {
281  TLOG_ARB(14, "CommandableFragmentGeneraotr") << "CFG::check_stop: should_stop=" << should_stop() << ", useDataThread_=" << useDataThread_ << ", exception status =" << int(exception()) << TLOG_ENDL;
282 
283  if (!should_stop()) return false;
284  if (!useDataThread_ || mode_ == RequestMode::Ignored) return true;
285  if (force_stop_) return true;
286 
287  // check_stop returns true if the CFG should stop. We should wait for the RequestReceiver to stop before stopping.
288  return !requestReceiver_->isRunning();
289 }
290 
292 {
293  if (fragment_ids_.size() != 1)
294  {
295  throw cet::exception("Error in CommandableFragmentGenerator: can't call fragment_id() unless member fragment_ids_ vector is length 1");
296  }
297  else
298  {
299  return fragment_ids_[0];
300  }
301 }
302 
304 {
305  if (force || mode_ == RequestMode::Ignored)
306  {
307  return ev_counter_.fetch_add(step);
308  }
309  return ev_counter_.load();
310 } // returns the prev value
311 
312 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
313 {
314  if (run < 0) throw cet::exception("CommandableFragmentGenerator") << "negative run number";
315 
316  timeout_ = timeout;
317  timestamp_ = timestamp;
318  ev_counter_.store(1);
319  should_stop_.store(false);
320  exception_.store(false);
321  run_number_ = run;
322  subrun_number_ = 1;
323  latest_exception_report_ = "none";
324  dataBuffer_.clear();
325  last_window_send_time_set_ = false;
326 
327  start();
328 
329  std::unique_lock<std::mutex> lk(mutex_);
330  if (useDataThread_) startDataThread();
331  if (useMonitoringThread_) startMonitoringThread();
332  if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
333 }
334 
335 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
336 {
337  TLOG_DEBUG("CommandableFragmentGenerator") << "Stop Command received." << TLOG_ENDL;
338 
339  timeout_ = timeout;
340  timestamp_ = timestamp;
341 
342  stopNoMutex();
343  should_stop_.store(true);
344  std::unique_lock<std::mutex> lk(mutex_);
345  stop();
346 }
347 
348 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
349 {
350  timeout_ = timeout;
351  timestamp_ = timestamp;
352 
353  pauseNoMutex();
354  should_stop_.store(true);
355  std::unique_lock<std::mutex> lk(mutex_);
356 
357  pause();
358 }
359 
360 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
361 {
362  timeout_ = timeout;
363  timestamp_ = timestamp;
364 
365  subrun_number_ += 1;
366  should_stop_ = false;
367 
368  dataBuffer_.clear();
369 
370  // no lock required: thread not started yet
371  resume();
372 
373  std::unique_lock<std::mutex> lk(mutex_);
374  if (useDataThread_) startDataThread();
375  if (useMonitoringThread_) startMonitoringThread();
376  if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
377 }
378 
379 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
380 {
381  std::lock_guard<std::mutex> lk(mutex_);
382 
383  // 14-May-2015, KAB: please see the comments associated with the report()
384  // methods in the CommandableFragmentGenerator.hh file for more information
385  // on the use of those methods in this method.
386 
387  // check if the child class has something meaningful for this request
388  std::string childReport = reportSpecific(which);
389  if (childReport.length() > 0) { return childReport; }
390 
391  // handle the requests that we can take care of at this level
392  if (which == "latest_exception")
393  {
394  return latest_exception_report_;
395  }
396 
397  // check if the child class has provided a catch-all report function
398  childReport = report();
399  if (childReport.length() > 0) { return childReport; }
400 
401  // if we haven't been able to come up with any report so far, say so
402  std::string tmpString = "The \"" + which + "\" command is not ";
403  tmpString.append("currently supported by the ");
404  tmpString.append(metricsReportingInstanceName());
405  tmpString.append(" fragment generator.");
406  return tmpString;
407 }
408 
409 // Default implemenetations of state functions
410 void artdaq::CommandableFragmentGenerator::pauseNoMutex()
411 {
412 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
413 }
414 
415 void artdaq::CommandableFragmentGenerator::pause()
416 {
417 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
418 }
419 
420 void artdaq::CommandableFragmentGenerator::resume()
421 {
422 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
423 }
424 
425 std::string artdaq::CommandableFragmentGenerator::report()
426 {
427 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
428  return "";
429 }
430 
431 std::string artdaq::CommandableFragmentGenerator::reportSpecific(std::string const&)
432 {
433 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
434  return "";
435 }
436 
437 bool artdaq::CommandableFragmentGenerator::checkHWStatus_()
438 {
439 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
440  return true;
441 }
442 
443 bool artdaq::CommandableFragmentGenerator::metaCommand(std::string const&, std::string const&)
444 {
445 #pragma message "Using default implementation of CommandableFragmentGenerator::metaCommand(std::string, std::string)"
446  return true;
447 }
448 
450 {
451  if (dataThread_.joinable()) dataThread_.join();
452  TLOG_INFO("CommandableFragmentGenerator") << "Starting Data Receiver Thread" << TLOG_ENDL;
453  dataThread_ = boost::thread(&CommandableFragmentGenerator::getDataLoop, this);
454 }
455 
457 {
458  if (monitoringThread_.joinable()) monitoringThread_.join();
459  TLOG_INFO("CommandableFragmentGenerator") << "Starting Hardware Monitoring Thread" << TLOG_ENDL;
460  monitoringThread_ = boost::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
461 }
462 
464 {
465  switch (mode_)
466  {
467  case RequestMode::Single:
468  return "Single";
469  case RequestMode::Buffer:
470  return "Buffer";
471  case RequestMode::Window:
472  return "Window";
473  case RequestMode::Ignored:
474  return "Ignored";
475  }
476 
477  return "ERROR";
478 }
479 
481 {
482  data_thread_running_ = true;
483  while (!force_stop_)
484  {
485  if (!isHardwareOK_)
486  {
487  TLOG_DEBUG("CommandableFragmentGenerator") << "getDataLoop: isHardwareOK is " << isHardwareOK_ << ", aborting data thread" << TLOG_ENDL;
488  data_thread_running_ = false;
489  return;
490  }
491 
492  TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: calling getNext_" << TLOG_ENDL;
493 
494  bool data = false;
495  auto startdata = std::chrono::steady_clock::now();
496 
497  try
498  {
499  data = getNext_(newDataBuffer_);
500  }
501  catch (...)
502  {
503  ExceptionHandler(ExceptionHandlerRethrow::no,
504  "Exception thrown by fragment generator in CommandableFragmentGenerator::getDataLoop; setting exception state to \"true\"");
505  set_exception(true);
506 
507  data_thread_running_ = false;
508  return;
509  }
510 
511  if (metricMan)
512  {
513  metricMan->sendMetric("Avg Data Acquisition Time", TimeUtils::GetElapsedTime(startdata), "s", 3, artdaq::MetricMode::Average);
514  }
515 
516  if (newDataBuffer_.size() == 0 && sleep_on_no_data_us_ > 0)
517  {
518  usleep(sleep_on_no_data_us_);
519  }
520 
521  TLOG_ARB(15, "CommandableFragmentGenerator") << "Waiting for data buffer ready" << TLOG_ENDL;
522  if (!waitForDataBufferReady()) return;
523  TLOG_ARB(15, "CommandableFragmentGenerator") << "Done waiting for data buffer ready" << TLOG_ENDL;
524 
525  TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: processing data" << TLOG_ENDL;
526  if (data && !force_stop_)
527  {
528  std::unique_lock<std::mutex> lock(dataBufferMutex_);
529  switch (mode_)
530  {
531  case RequestMode::Single:
532  // While here, if for some strange reason more than one event's worth of data is returned from getNext_...
533  while (newDataBuffer_.size() >= fragment_ids_.size())
534  {
535  dataBuffer_.clear();
536  auto it = newDataBuffer_.begin();
537  std::advance(it, fragment_ids_.size());
538  dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_, newDataBuffer_.begin(), it);
539  }
540  break;
541  case RequestMode::Buffer:
542  case RequestMode::Ignored:
543  case RequestMode::Window:
544  default:
545  //dataBuffer_.reserve(dataBuffer_.size() + newDataBuffer_.size());
546  dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_);
547  break;
548  }
549  getDataBufferStats();
550  }
551 
552  {
553  std::unique_lock<std::mutex> lock(dataBufferMutex_);
554  if (dataBuffer_.size() > 0)
555  {
556  dataCondition_.notify_all();
557  }
558  }
559  if (!data || force_stop_)
560  {
561  TLOG_INFO("CommandableFragmentGenerator") << "Data flow has stopped. Ending data collection thread" << TLOG_ENDL;
562  data_thread_running_ = false;
563  return;
564  }
565  }
566 }
567 
569 {
570  auto startwait = std::chrono::steady_clock::now();
571  auto first = true;
572  auto lastwaittime = 0ULL;
573  while (dataBufferIsTooLarge())
574  {
575  if (should_stop())
576  {
577  TLOG_DEBUG("CommandableFragmentGenerator") << "Run ended while waiting for buffer to shrink!" << TLOG_ENDL;
578  std::unique_lock<std::mutex> lock(dataBufferMutex_);
579  getDataBufferStats();
580  dataCondition_.notify_all();
581  data_thread_running_ = false;
582  return false;
583  }
584  auto waittime = TimeUtils::GetElapsedTimeMilliseconds(startwait);
585 
586  if (first || (waittime != lastwaittime && waittime % 1000 == 0))
587  {
588  TLOG_WARNING("CommandableFragmentGenerator") << "Bad Omen: Data Buffer has exceeded its size limits. "
589  << "(seq_id=" << ev_counter()
590  << ", frags=" << dataBufferDepthFragments_ << "/" << maxDataBufferDepthFragments_
591  << ", szB=" << dataBufferDepthBytes_ << "/" << maxDataBufferDepthBytes_ << ")" << TLOG_ENDL;
592  TLOG_TRACE("CommandableFragmentGenerator") << "Bad Omen: Possible causes include requests not getting through or Ignored-mode BR issues" << TLOG_ENDL;
593  first = false;
594  }
595  if (waittime % 5 && waittime != lastwaittime)
596  {
597  TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: Data Retreival paused for " << std::to_string(waittime) << " ms waiting for data buffer to drain" << TLOG_ENDL;
598  }
599  lastwaittime = waittime;
600  usleep(1000);
601  }
602  return true;
603 }
604 
606 {
607  return (maxDataBufferDepthFragments_ > 0 && dataBufferDepthFragments_ > maxDataBufferDepthFragments_) || (maxDataBufferDepthBytes_ > 0 && dataBufferDepthBytes_ > maxDataBufferDepthBytes_);
608 }
609 
611 {
613  dataBufferDepthFragments_ = dataBuffer_.size();
614  size_t acc = 0;
615  TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: Calculating buffer size" << TLOG_ENDL;
616  for (auto i = dataBuffer_.begin(); i != dataBuffer_.end(); ++i)
617  {
618  if (i->get() != nullptr)
619  {
620  acc += (*i)->sizeBytes();
621  }
622  }
623  dataBufferDepthBytes_ = acc;
624 
625  if (metricMan)
626  {
627  TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: Sending Metrics" << TLOG_ENDL;
628  metricMan->sendMetric("Buffer Depth Fragments", dataBufferDepthFragments_.load(), "fragments", 1, MetricMode::LastPoint);
629  metricMan->sendMetric("Buffer Depth Bytes", dataBufferDepthBytes_.load(), "bytes", 1, MetricMode::LastPoint);
630  }
631  TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: frags=" << dataBufferDepthFragments_.load() << "/" << maxDataBufferDepthFragments_
632  << ", sz=" << std::to_string(dataBufferDepthBytes_.load()) << "/" << std::to_string(maxDataBufferDepthBytes_) << TLOG_ENDL;
633 }
634 
636 {
637  std::unique_lock<std::mutex> lock(dataBufferMutex_);
638  dataCondition_.wait_for(lock, std::chrono::milliseconds(10));
639  if (dataBufferDepthFragments_ > 0)
640  {
641  if ((mode_ == RequestMode::Buffer || mode_ == RequestMode::Window))
642  {
643  // Eliminate extra fragments
644  while (dataBufferIsTooLarge())
645  {
646  dataBuffer_.erase(dataBuffer_.begin());
647  getDataBufferStats();
648  }
649  if (dataBuffer_.size() > 0)
650  {
651  TLOG_ARB(17, "CommandableFragmentGenerator") << "Determining if Fragments can be dropped from data buffer" << TLOG_ENDL;
652  Fragment::timestamp_t last = dataBuffer_.back()->timestamp();
653  Fragment::timestamp_t min = last > staleTimeout_ ? last - staleTimeout_ : 0;
654  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
655  {
656  if ((*it)->timestamp() < min)
657  {
658  it = dataBuffer_.erase(it);
659  }
660  else
661  {
662  ++it;
663  }
664  }
665  getDataBufferStats();
666  }
667  }
668  else if (mode_ == RequestMode::Single && dataBuffer_.size() > fragment_ids_.size())
669  {
670  // Eliminate extra fragments
671  while (dataBuffer_.size() > fragment_ids_.size())
672  {
673  dataBuffer_.erase(dataBuffer_.begin());
674  }
675  }
676  }
677 }
678 
680 {
681  while (!force_stop_)
682  {
683  if (should_stop() || monitoringInterval_ <= 0)
684  {
685  TLOG_DEBUG("CommandableFragmentGenerator") << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
686  << " and monitoringInterval is " << monitoringInterval_ << ", returning" << TLOG_ENDL;
687  return;
688  }
689  TLOG_ARB(12, "CommandableFragmentGenerator") << "getMonitoringDataLoop: Determining whether to call checkHWStatus_" << TLOG_ENDL;
690 
691  auto now = std::chrono::steady_clock::now();
692  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
693  {
694  isHardwareOK_ = checkHWStatus_();
695  TLOG_ARB(12, "CommandableFragmentGenerator") << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ << TLOG_ENDL;
696  lastMonitoringCall_ = now;
697  }
698  usleep(monitoringInterval_ / 10);
699  }
700 }
701 
703 {
704  // We just copy everything that's here into the output.
705  TLOG_ARB(9, "CommandableFragmentGenerator") << "Mode is Ignored; Copying data to output" << TLOG_ENDL;
706  std::move(dataBuffer_.begin(), dataBuffer_.end(), std::inserter(frags, frags.end()));
707  dataBuffer_.clear();
708 }
709 
711 {
712  // We only care about the latest request received. Send empties for all others.
713  auto requests = requestReceiver_->GetRequests();
714  while (requests.size() > 1) {
715  // std::map is ordered by key => Last sequence ID in the map is the one we care about
716  requestReceiver_->RemoveRequest(requests.begin()->first);
717  requests.erase(requests.begin());
718  }
719  sendEmptyFragments(frags, requests);
720 
721  // If no requests remain after sendEmptyFragments, return
722  if (requests.size() == 0 || !requests.count(ev_counter())) return;
723 
724  if (dataBuffer_.size() > 0)
725  {
726  TLOG_ARB(9, "CommandableFragmentGenerator") << "Mode is Single; Sending copy of last event" << TLOG_ENDL;
727  for (auto& fragptr : dataBuffer_)
728  {
729  // Return the latest data point
730  auto frag = fragptr.get();
731  auto newfrag = std::unique_ptr<artdaq::Fragment>(new Fragment(ev_counter(), frag->fragmentID()));
732  newfrag->resize(frag->size() - detail::RawFragmentHeader::num_words());
733  memcpy(newfrag->headerAddress(), frag->headerAddress(), frag->sizeBytes());
734  newfrag->setTimestamp(requests[ev_counter()]);
735  newfrag->setSequenceID(ev_counter());
736  frags.push_back(std::move(newfrag));
737  }
738  }
739  else
740  {
741  sendEmptyFragment(frags, ev_counter(), "No data for");
742  }
743  requestReceiver_->RemoveRequest(ev_counter());
744  ev_counter_inc(1, true);
745 }
746 
748 {
749  // We only care about the latest request received. Send empties for all others.
750  auto requests = requestReceiver_->GetRequests();
751  while (requests.size() > 1) {
752  // std::map is ordered by key => Last sequence ID in the map is the one we care about
753  requestReceiver_->RemoveRequest(requests.begin()->first);
754  requests.erase(requests.begin());
755  }
756  sendEmptyFragments(frags, requests);
757 
758  // If no requests remain after sendEmptyFragments, return
759  if (requests.size() == 0 || !requests.count(ev_counter())) return;
760 
761  TLOG_DEBUG("CommandableFragmentGenerator") << "Creating ContainerFragment for Buffered Fragments" << TLOG_ENDL;
762  frags.emplace_back(new artdaq::Fragment(ev_counter(), fragment_id()));
763  frags.back()->setTimestamp(requests[ev_counter()]);
764  ContainerFragmentLoader cfl(*frags.back());
765  cfl.set_missing_data(false); // Buffer mode is never missing data, even if there IS no data.
766 
767  // Buffer mode TFGs should simply copy out the whole dataBuffer_ into a ContainerFragment
768  // Window mode TFGs must do a little bit more work to decide which fragments to send for a given request
769  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
770  {
771  TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" << TLOG_ENDL;
772  cfl.addFragment(*it);
773  it = dataBuffer_.erase(it);
774  }
775  requestReceiver_->RemoveRequest(ev_counter());
776  ev_counter_inc(1, true);
777 }
778 
780 {
781  TLOG(10) << "applyRequestsWindowMode BEGIN";
782  if (!last_window_send_time_set_)
783  {
784  last_window_send_time_ = std::chrono::steady_clock::now();
785  last_window_send_time_set_ = true;
786  }
787 
788  auto requests = requestReceiver_->GetRequests();
789  bool now_have_desired_request = std::any_of(requests.begin(), requests.end(),
790  [this](decltype(requests)::value_type& request) {
791  return request.first == ev_counter(); });
792 
793  if (missing_request_)
794  {
795  if (!now_have_desired_request && TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) > missing_request_window_timeout_us_)
796  {
797  TLOG_ERROR("CommandableFragmentGenerator") << "Data-taking has paused for " << TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) << " us "
798  << "(> " << std::to_string(missing_request_window_timeout_us_) << " us) while waiting for missing data request messages."
799  << " Sending Empty Fragments for missing requests!" << TLOG_ENDL;
800  sendEmptyFragments(frags, requests);
801 
802  missing_request_ = false;
803  missing_request_time_ = decltype(missing_request_time_)::max();
804  }
805  else if (now_have_desired_request) {
806  missing_request_ = false;
807  missing_request_time_ = decltype(missing_request_time_)::max();
808  }
809  }
810 
811  TLOG(10) << "applyRequestsWindowMode: Starting request processing";
812  for (auto req = requests.begin(); req != requests.end();)
813  {
814  while (req->first < ev_counter() && requests.size() > 0)
815  {
816  TLOG(10) << "applyRequestsWindowMode: Clearing passed request for sequence ID " << req->first;
817  requestReceiver_->RemoveRequest(req->first);
818  req = requests.erase(req);
819  }
820  if (requests.size() == 0) break;
821  if (req->first > ev_counter())
822  {
823  if (!missing_request_)
824  {
825  missing_request_ = true;
826  missing_request_time_ = std::chrono::steady_clock::now();
827  }
828  }
829  auto ts = req->second;
830  TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Checking that data exists for request window " << std::to_string(req->first) << TLOG_ENDL;
831  Fragment::timestamp_t min = ts > windowOffset_ ? ts - windowOffset_ : 0;
832  Fragment::timestamp_t max = min + windowWidth_;
833  TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: min is " << std::to_string(min) << ", max is " << std::to_string(max)
834  << " and last point in buffer is " << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0)) << " (sz=" << std::to_string(dataBuffer_.size()) << ")" << TLOG_ENDL;
835  bool windowClosed = dataBuffer_.size() > 0 && dataBuffer_.back()->timestamp() >= max;
836  bool windowTimeout = TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) > window_close_timeout_us_;
837  if (windowTimeout)
838  {
839  TLOG_WARNING("CommandableFragmentGenerator") << "A timeout occurred waiting for data to close the request window (max=" << std::to_string(max)
840  << ", buffer=" << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0))
841  << " (if no buffer in memory, this is shown as a 0)). Time waiting: "
842  << TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) << " us "
843  << "(> " << std::to_string(window_close_timeout_us_) << " us)." << TLOG_ENDL;
844 
845  if (missing_request_) {
846  TLOG_ERROR("CommandableFragmentGenerator") << "A Window timeout has occurred while there are pending requests. Sending empties." << TLOG_ENDL;
847  sendEmptyFragments(frags, requests);
848  }
849  }
850  if (windowClosed || !data_thread_running_ || windowTimeout)
851  {
852  TLOG_DEBUG("CommandableFragmentGenerator") << "Creating ContainerFragment for Buffered or Window-requested Fragments" << TLOG_ENDL;
853  frags.emplace_back(new artdaq::Fragment(req->first, fragment_id()));
854  frags.back()->setTimestamp(ts);
855  ContainerFragmentLoader cfl(*frags.back());
856 
857  if (!windowClosed) cfl.set_missing_data(true);
858  if (dataBuffer_.size() > 0 && dataBuffer_.front()->timestamp() > min)
859  {
860  TLOG_DEBUG("CommandableFragmentGenerator") << "Request Window covers data that is either before data collection began or has fallen off the end of the buffer" << TLOG_ENDL;
861  cfl.set_missing_data(true);
862  }
863 
864  // Buffer mode TFGs should simply copy out the whole dataBuffer_ into a ContainerFragment
865  // Window mode TFGs must do a little bit more work to decide which fragments to send for a given request
866  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
867  {
868  Fragment::timestamp_t fragT = (*it)->timestamp();
869  if (fragT < min || fragT > max || (fragT == max && windowWidth_ > 0))
870  {
871  ++it;
872  continue;
873  }
874 
875  TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" << TLOG_ENDL;
876  cfl.addFragment(*it);
877 
878  if (uniqueWindows_)
879  {
880  it = dataBuffer_.erase(it);
881  }
882  else
883  {
884  ++it;
885  }
886  }
887  if (req->first == ev_counter())
888  {
889  ev_counter_inc(1, true);
890  while (windows_sent_ooo_.count(ev_counter()))
891  {
892  TLOG_ARB(9, "CommandableFragmentGenerator") << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" << TLOG_ENDL;
893  windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
894  ev_counter_inc(1, true);
895  }
896  }
897  else
898  {
899  windows_sent_ooo_.insert(req->first);
900  }
901  requestReceiver_->RemoveRequest(req->first);
902  req = requests.erase(req);
903  last_window_send_time_ = std::chrono::steady_clock::now();
904  }
905  else
906  {
907  ++req;
908  }
909  }
910 }
911 
913 {
914  if (check_stop() || exception())
915  {
916  return false;
917  }
918 
919  // Wait for data, if in ignored mode, or a request otherwise
920  if (mode_ == RequestMode::Ignored)
921  {
922  while (dataBufferDepthFragments_ <= 0)
923  {
924  if (check_stop() || exception() || !isHardwareOK_) return false;
925  std::unique_lock<std::mutex> lock(dataBufferMutex_);
926  dataCondition_.wait_for(lock, std::chrono::milliseconds(10), [this]() { return dataBufferDepthFragments_ > 0; });
927  }
928  }
929  else
930  {
931  if ((check_stop() && requestReceiver_->size() == 0) || exception()) return false;
932  checkDataBuffer();
933 
934  // Wait up to 1000 ms for a request...
935  auto counter = 0;
936 
937  while (requestReceiver_->size() == 0 && counter < 100)
938  {
939  if (check_stop() || exception()) return false;
940 
941  checkDataBuffer();
942 
943  requestReceiver_->WaitForRequests(10); // milliseconds
944  counter++;
945  }
946  }
947 
948  {
949  std::unique_lock<std::mutex> dlk(dataBufferMutex_);
950 
951  switch (mode_)
952  {
953  case RequestMode::Single:
954  applyRequestsSingleMode(frags);
955  break;
956  case RequestMode::Window:
957  applyRequestsWindowMode(frags);
958  break;
959  case RequestMode::Buffer:
960  applyRequestsBufferMode(frags);
961  break;
962  case RequestMode::Ignored:
963  default:
964  applyRequestsIgnoredMode(frags);
965  break;
966  }
967 
968  getDataBufferStats();
969  }
970 
971  if (frags.size() > 0)
972  TLOG_ARB(9, "CommandableFragmentGenerator") << "Finished Processing Event " << std::to_string(ev_counter() + 1) << " for fragment_id " << fragment_id() << "." << TLOG_ENDL;
973  return true;
974 }
975 
976 bool artdaq::CommandableFragmentGenerator::sendEmptyFragment(artdaq::FragmentPtrs& frags, size_t seqId, std::string desc)
977 {
978  TLOG_WARNING("CommandableFragmentGenerator") << desc << " sequence ID " << seqId << ", sending empty fragment" << TLOG_ENDL;
979  for (auto fid : fragment_ids_)
980  {
981  auto frag = new Fragment();
982  frag->setSequenceID(seqId);
983  frag->setFragmentID(fid);
984  frag->setSystemType(Fragment::EmptyFragmentType);
985  frags.emplace_back(FragmentPtr(frag));
986  }
987  return true;
988 }
989 
990 void artdaq::CommandableFragmentGenerator::sendEmptyFragments(artdaq::FragmentPtrs& frags, std::map<Fragment::sequence_id_t, Fragment::timestamp_t>& requests)
991 {
992  if (requests.size() == 0 && windows_sent_ooo_.size() == 0) return;
993 
994  if (requests.size() > 0) {
995  TLOG_ARB(19, "CommandableFragmentGenerator") << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << requests.begin()->first << TLOG_ENDL;
996  while (requests.begin()->first > ev_counter())
997  {
998  sendEmptyFragment(frags, ev_counter(), "Missed request for");
999  ev_counter_inc(1, true);
1000  }
1001  }
1002  else if (windows_sent_ooo_.size() > 0)
1003  {
1004  TLOG_ARB(19, "CommandableFragmentGenerator") << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << *windows_sent_ooo_.begin() << TLOG_ENDL;
1005  while (*windows_sent_ooo_.begin() > ev_counter())
1006  {
1007  sendEmptyFragment(frags, ev_counter(), "Missed request for");
1008  ev_counter_inc(1, true);
1009  }
1010  }
1011  while (windows_sent_ooo_.count(ev_counter()))
1012  {
1013  TLOG_ARB(19, "CommandableFragmentGenerator") << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" << TLOG_ENDL;
1014  windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
1015  ev_counter_inc(1, true);
1016  }
1017 }
int fragment_id() const
Get the current Fragment ID, if there is only one.
void applyRequestsSingleMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Single. Precondition: dataBufferMutex_ and reques...
virtual ~CommandableFragmentGenerator()
CommandableFragmentGenerator Destructor.
void applyRequestsBufferMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Buffer. Precondition: dataBufferMutex_ and reques...
bool sendEmptyFragment(FragmentPtrs &frags, size_t sequenceId, std::string desc)
Send an EmptyFragmentType Fragment.
void getMonitoringDataLoop()
This function regularly calls checkHWStatus_(), and sets the isHardwareOK flag accordingly.
void startDataThread()
Function that launches the data thread (getDataLoop())
std::string ReportCmd(std::string const &which="")
Get a report about a user-specified run-time quantity.
virtual bool metaCommand(std::string const &command, std::string const &arg)
The meta-command is used for implementing user-specific commands in a CommandableFragmentGenerator.
bool dataBufferIsTooLarge()
Test the configured constraints on the data buffer.
void StopCmd(uint64_t timeout, uint64_t timestamp)
Stop the CommandableFragmentGenerator.
void applyRequestsWindowMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Window. Precondition: dataBufferMutex_ and reques...
void StartCmd(int run, uint64_t timeout, uint64_t timestamp)
Start the CommandableFragmentGenerator.
bool check_stop()
Routine used by applyRequests to make sure that all outstanding requests have been fulfilled before r...
void ResumeCmd(uint64_t timeout, uint64_t timestamp)
Resume the CommandableFragmentGenerator.
CommandableFragmentGenerator()
CommandableFragmentGenerator default constructor.
bool getNext(FragmentPtrs &output) overridefinal
getNext calls either applyRequests or getNext_ to get any data that is ready to be sent to the EventB...
bool waitForDataBufferReady()
Wait for the data buffer to drain (dataBufferIsTooLarge returns false), periodically reporting status...
size_t ev_counter_inc(size_t step=1, bool force=false)
Increment the event counter, if the current RequestMode allows it.
void applyRequestsIgnoredMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Ignored. Precondition: dataBufferMutex_ and reque...
void PauseCmd(uint64_t timeout, uint64_t timestamp)
Pause the CommandableFragmentGenerator.
void getDataLoop()
When separate_data_thread is set to true, this loop repeatedly calls getNext_ and adds returned Fragm...
void sendEmptyFragments(FragmentPtrs &frags, std::map< Fragment::sequence_id_t, Fragment::timestamp_t > &requests)
This function is for Buffered and Single request modes, as they can only respond to one data request ...
void startMonitoringThread()
Function that launches the monitoring thread (getMonitoringDataLoop())
void checkDataBuffer()
Perform data buffer pruning operations. If the RequestMode is Single, removes all but the latest Frag...
std::string printMode_()
Return the string representation of the current RequestMode.
void getDataBufferStats()
Calculate the size of the dataBuffer and report appropriate metrics.
bool applyRequests(FragmentPtrs &output)
See if any requests have been received, and add the corresponding data Fragment objects to the output...
void joinThreads()
Join any data-taking threads. Should be called when destructing CommandableFragmentGenerator.