artdaq  v3_01_00
CommandableFragmentGenerator.cc
1 #define TRACE_NAME (app_name + "_CommandableFragmentGenerator").c_str() // include these 2 first -
2 #include "artdaq/DAQdata/Globals.hh"
3 
4 #include "artdaq/Application/CommandableFragmentGenerator.hh"
5 
6 #include <boost/exception/all.hpp>
7 #include <boost/throw_exception.hpp>
8 
9 #include <limits>
10 #include <iterator>
11 
12 #include "canvas/Utilities/Exception.h"
13 #include "cetlib_except/exception.h"
14 #include "fhiclcpp/ParameterSet.h"
15 
16 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
17 #include "artdaq-core/Data/Fragment.hh"
18 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
19 #include "artdaq-core/Utilities/ExceptionHandler.hh"
20 #include "artdaq-core/Utilities/TimeUtils.hh"
21 
22 #include <fstream>
23 #include <iomanip>
24 #include <iterator>
25 #include <iostream>
26 #include <iomanip>
27 #include <algorithm>
28 #include <sys/poll.h>
30 
32  : mutex_()
33  , requestReceiver_(new RequestReceiver())
34  , windowOffset_(0)
35  , windowWidth_(0)
36  , staleTimeout_(Fragment::InvalidTimestamp)
37  , expectedType_(Fragment::EmptyFragmentType)
38  , maxFragmentCount_(std::numeric_limits<size_t>::max())
39  , uniqueWindows_(true)
40  , missing_request_(true)
41  , missing_request_time_()
42  , last_window_send_time_()
43  , last_window_send_time_set_(false)
44  , windows_sent_ooo_()
45  , missing_request_window_timeout_us_(1000000)
46  , window_close_timeout_us_(2000000)
47  , useDataThread_(false)
48  , sleep_on_no_data_us_(0)
49  , data_thread_running_(false)
50  , dataBufferDepthFragments_(0)
51  , dataBufferDepthBytes_(0)
52  , maxDataBufferDepthFragments_(1000)
53  , maxDataBufferDepthBytes_(1000)
54  , useMonitoringThread_(false)
55  , monitoringInterval_(0)
56  , lastMonitoringCall_()
57  , isHardwareOK_(true)
58  , dataBuffer_()
59  , newDataBuffer_()
60  , run_number_(-1)
61  , subrun_number_(-1)
62  , timeout_(std::numeric_limits<uint64_t>::max())
63  , timestamp_(std::numeric_limits<uint64_t>::max())
64  , should_stop_(false)
65  , exception_(false)
66  , force_stop_(false)
67  , latest_exception_report_("none")
68  , ev_counter_(1)
69  , board_id_(-1)
70  , instance_name_for_metrics_("FragmentGenerator")
71  , sleep_on_stop_us_(0)
72 {}
73 
75  : mutex_()
76  , windowOffset_(ps.get<Fragment::timestamp_t>("request_window_offset", 0))
77  , windowWidth_(ps.get<Fragment::timestamp_t>("request_window_width", 0))
78  , staleTimeout_(ps.get<Fragment::timestamp_t>("stale_request_timeout", 0xFFFFFFFF))
79  , expectedType_(ps.get<Fragment::type_t>("expected_fragment_type", Fragment::type_t(Fragment::EmptyFragmentType)))
80  , uniqueWindows_(ps.get<bool>("request_windows_are_unique", true))
81  , missing_request_(false)
82  , missing_request_time_(decltype(missing_request_time_)::max())
83  , last_window_send_time_(decltype(last_window_send_time_)::max())
84  , last_window_send_time_set_(false)
85  , windows_sent_ooo_()
86  , missing_request_window_timeout_us_(ps.get<size_t>("missing_request_window_timeout_us", 1000000))
87  , window_close_timeout_us_(ps.get<size_t>("window_close_timeout_us", 2000000))
88  , useDataThread_(ps.get<bool>("separate_data_thread", false))
89  , sleep_on_no_data_us_(ps.get<size_t>("sleep_on_no_data_us", 0))
90  , data_thread_running_(false)
91  , dataBufferDepthFragments_(0)
92  , dataBufferDepthBytes_(0)
93  , maxDataBufferDepthFragments_(ps.get<int>("data_buffer_depth_fragments", 1000))
94  , maxDataBufferDepthBytes_(ps.get<size_t>("data_buffer_depth_mb", 1000) * 1024 * 1024)
95  , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
96  , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
97  , lastMonitoringCall_()
98  , isHardwareOK_(true)
99  , dataBuffer_()
100  , newDataBuffer_()
101  , run_number_(-1)
102  , subrun_number_(-1)
103  , timeout_(std::numeric_limits<uint64_t>::max())
104  , timestamp_(std::numeric_limits<uint64_t>::max())
105  , should_stop_(false)
106  , exception_(false)
107  , force_stop_(false)
108  , latest_exception_report_("none")
109  , ev_counter_(1)
110  , board_id_(-1)
111  , sleep_on_stop_us_(0)
112 {
113  board_id_ = ps.get<int>("board_id");
114  instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(board_id_);
115 
116  fragment_ids_ = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
117 
118  TLOG(TLVL_TRACE) << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)" ;
119  int fragment_id = ps.get<int>("fragment_id", -99);
120 
121  if (fragment_id != -99)
122  {
123  if (fragment_ids_.size() != 0)
124  {
125  latest_exception_report_ = "Error in CommandableFragmentGenerator: can't both define \"fragment_id\" and \"fragment_ids\" in FHiCL document";
126  throw cet::exception(latest_exception_report_);
127  }
128  else
129  {
130  fragment_ids_.emplace_back(fragment_id);
131  }
132  }
133 
134  sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
135 
136  dataBuffer_.emplace_back(FragmentPtr(new Fragment()));
137  (*dataBuffer_.begin())->setSystemType(Fragment::EmptyFragmentType);
138 
139  std::string modeString = ps.get<std::string>("request_mode", "ignored");
140  if (modeString == "single" || modeString == "Single")
141  {
142  mode_ = RequestMode::Single;
143  }
144  else if (modeString.find("buffer") != std::string::npos || modeString.find("Buffer") != std::string::npos)
145  {
146  mode_ = RequestMode::Buffer;
147  }
148  else if (modeString == "window" || modeString == "Window")
149  {
150  mode_ = RequestMode::Window;
151  }
152  else if (modeString.find("ignore") != std::string::npos || modeString.find("Ignore") != std::string::npos)
153  {
154  mode_ = RequestMode::Ignored;
155  }
156  TLOG(TLVL_DEBUG) << "Request mode is " << printMode_() ;
157 
158  if (mode_ != RequestMode::Ignored)
159  {
160  if (!useDataThread_)
161  {
162  latest_exception_report_ = "Error in CommandableFragmentGenerator: use_data_thread must be true when request_mode is not \"Ignored\"!";
163  throw cet::exception(latest_exception_report_);
164  }
165  requestReceiver_.reset(new RequestReceiver(ps));
166  }
167 }
168 
170 {
171  joinThreads();
172 }
173 
175 {
176  should_stop_ = true;
177  force_stop_ = true;
178  TLOG(TLVL_DEBUG) << "Joining dataThread" ;
179  if (dataThread_.joinable()) dataThread_.join();
180  TLOG(TLVL_DEBUG) << "Joining monitoringThread" ;
181  if (monitoringThread_.joinable()) monitoringThread_.join();
182  requestReceiver_.reset(nullptr);
183 }
184 
186 {
187  bool result = true;
188 
189  if (check_stop()) usleep(sleep_on_stop_us_);
190  if (exception() || force_stop_) return false;
191 
192  if (!useMonitoringThread_ && monitoringInterval_ > 0)
193  {
194  TLOG(10) << "getNext: Checking whether to collect Monitoring Data" ;
195  auto now = std::chrono::steady_clock::now();
196 
197  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
198  {
199  TLOG(10) << "getNext: Collecting Monitoring Data" ;
200  isHardwareOK_ = checkHWStatus_();
201  TLOG(10) << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ ;
202  lastMonitoringCall_ = now;
203  }
204  }
205 
206  try
207  {
208  std::lock_guard<std::mutex> lk(mutex_);
209  if (useDataThread_)
210  {
211  TLOG(TLVL_TRACE) << "getNext: Calling applyRequests" ;
212  result = applyRequests(output);
213  TLOG(TLVL_TRACE) << "getNext: Done with applyRequests result=" << std::boolalpha << result;
214 
215  if (exception())
216  {
217  TLOG(TLVL_ERROR) << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
218  throw cet::exception("CommandableFragmentGenerator") << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
219  }
220  }
221  else
222  {
223  if (!isHardwareOK_)
224  {
225  TLOG(TLVL_ERROR) << "Stopping CFG because the hardware reports bad status!" ;
226  return false;
227  }
228  TLOG(TLVL_TRACE) << "getNext: Calling getNext_ " << std::to_string(ev_counter()) ;
229  try
230  {
231  result = getNext_(output);
232  }
233  catch (...)
234  {
235  throw;
236  }
237  TLOG(TLVL_TRACE) << "getNext: Done with getNext_ " << std::to_string(ev_counter()) ;
238  }
239  }
240  catch (const cet::exception& e)
241  {
242  latest_exception_report_ = "cet::exception caught in getNext(): ";
243  latest_exception_report_.append(e.what());
244  TLOG(TLVL_ERROR) << "getNext: cet::exception caught: " << e ;
245  set_exception(true);
246  return false;
247  }
248  catch (const boost::exception& e)
249  {
250  latest_exception_report_ = "boost::exception caught in getNext(): ";
251  latest_exception_report_.append(boost::diagnostic_information(e));
252  TLOG(TLVL_ERROR) << "getNext: boost::exception caught: " << boost::diagnostic_information(e) ;
253  set_exception(true);
254  return false;
255  }
256  catch (const std::exception& e)
257  {
258  latest_exception_report_ = "std::exception caught in getNext(): ";
259  latest_exception_report_.append(e.what());
260  TLOG(TLVL_ERROR) << "getNext: std::exception caught: " << e.what() ;
261  set_exception(true);
262  return false;
263  }
264  catch (...)
265  {
266  latest_exception_report_ = "Unknown exception caught in getNext().";
267  TLOG(TLVL_ERROR) << "getNext: unknown exception caught" ;
268  set_exception(true);
269  return false;
270  }
271 
272  if (!result)
273  {
274  TLOG(TLVL_DEBUG) << "stopped " ;
275  }
276 
277  return result;
278 }
279 
281 {
282  TLOG(14) << "CFG::check_stop: should_stop=" << should_stop() << ", useDataThread_=" << useDataThread_ << ", exception status =" << int(exception()) ;
283 
284  if (!should_stop()) return false;
285  if (!useDataThread_ || mode_ == RequestMode::Ignored) return true;
286  if (force_stop_) return true;
287 
288  // check_stop returns true if the CFG should stop. We should wait for the RequestReceiver to stop before stopping.
289  return !requestReceiver_->isRunning();
290 }
291 
293 {
294  if (fragment_ids_.size() != 1)
295  {
296  throw cet::exception("Error in CommandableFragmentGenerator: can't call fragment_id() unless member fragment_ids_ vector is length 1");
297  }
298  else
299  {
300  return fragment_ids_[0];
301  }
302 }
303 
305 {
306  if (force || mode_ == RequestMode::Ignored)
307  {
308  return ev_counter_.fetch_add(step);
309  }
310  return ev_counter_.load();
311 } // returns the prev value
312 
313 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
314 {
315  if (run < 0) throw cet::exception("CommandableFragmentGenerator") << "negative run number";
316 
317  timeout_ = timeout;
318  timestamp_ = timestamp;
319  ev_counter_.store(1);
320  missing_request_ = false;
321  should_stop_.store(false);
322  exception_.store(false);
323  run_number_ = run;
324  subrun_number_ = 1;
325  latest_exception_report_ = "none";
326  dataBuffer_.clear();
327  last_window_send_time_set_ = false;
328  windows_sent_ooo_.clear();
329 
330  start();
331 
332  std::unique_lock<std::mutex> lk(mutex_);
333  if (useDataThread_) startDataThread();
334  if (useMonitoringThread_) startMonitoringThread();
335  if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
336 }
337 
338 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
339 {
340  TLOG(TLVL_DEBUG) << "Stop Command received." ;
341 
342  timeout_ = timeout;
343  timestamp_ = timestamp;
344  if (requestReceiver_ && requestReceiver_->isRunning()) requestReceiver_->stopRequestReceiverThread();
345 
346  stopNoMutex();
347  should_stop_.store(true);
348  std::unique_lock<std::mutex> lk(mutex_);
349  stop();
350  TLOG(TLVL_DEBUG) << "Stop command complete.";
351 }
352 
353 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
354 {
355  timeout_ = timeout;
356  timestamp_ = timestamp;
357  if (requestReceiver_->isRunning()) requestReceiver_->stopRequestReceiverThread();
358 
359  pauseNoMutex();
360  should_stop_.store(true);
361  std::unique_lock<std::mutex> lk(mutex_);
362 
363  pause();
364 }
365 
366 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
367 {
368  timeout_ = timeout;
369  timestamp_ = timestamp;
370 
371  subrun_number_ += 1;
372  should_stop_ = false;
373 
374  dataBuffer_.clear();
375 
376  // no lock required: thread not started yet
377  resume();
378 
379  std::unique_lock<std::mutex> lk(mutex_);
380  if (useDataThread_) startDataThread();
381  if (useMonitoringThread_) startMonitoringThread();
382  if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
383 }
384 
385 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
386 {
387  std::lock_guard<std::mutex> lk(mutex_);
388 
389  // 14-May-2015, KAB: please see the comments associated with the report()
390  // methods in the CommandableFragmentGenerator.hh file for more information
391  // on the use of those methods in this method.
392 
393  // check if the child class has something meaningful for this request
394  std::string childReport = reportSpecific(which);
395  if (childReport.length() > 0) { return childReport; }
396 
397  // handle the requests that we can take care of at this level
398  if (which == "latest_exception")
399  {
400  return latest_exception_report_;
401  }
402 
403  // check if the child class has provided a catch-all report function
404  childReport = report();
405  if (childReport.length() > 0) { return childReport; }
406 
407  // if we haven't been able to come up with any report so far, say so
408  std::string tmpString = "The \"" + which + "\" command is not ";
409  tmpString.append("currently supported by the ");
410  tmpString.append(metricsReportingInstanceName());
411  tmpString.append(" fragment generator.");
412  return tmpString;
413 }
414 
415 // Default implemenetations of state functions
416 void artdaq::CommandableFragmentGenerator::pauseNoMutex()
417 {
418 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
419 }
420 
421 void artdaq::CommandableFragmentGenerator::pause()
422 {
423 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
424 }
425 
426 void artdaq::CommandableFragmentGenerator::resume()
427 {
428 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
429 }
430 
431 std::string artdaq::CommandableFragmentGenerator::report()
432 {
433 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
434  return "";
435 }
436 
437 std::string artdaq::CommandableFragmentGenerator::reportSpecific(std::string const&)
438 {
439 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
440  return "";
441 }
442 
443 bool artdaq::CommandableFragmentGenerator::checkHWStatus_()
444 {
445 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
446  return true;
447 }
448 
449 bool artdaq::CommandableFragmentGenerator::metaCommand(std::string const&, std::string const&)
450 {
451 #pragma message "Using default implementation of CommandableFragmentGenerator::metaCommand(std::string, std::string)"
452  return true;
453 }
454 
456 {
457  if (dataThread_.joinable()) dataThread_.join();
458  TLOG(TLVL_INFO) << "Starting Data Receiver Thread" ;
459  dataThread_ = boost::thread(&CommandableFragmentGenerator::getDataLoop, this);
460 }
461 
463 {
464  if (monitoringThread_.joinable()) monitoringThread_.join();
465  TLOG(TLVL_INFO) << "Starting Hardware Monitoring Thread" ;
466  monitoringThread_ = boost::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
467 }
468 
470 {
471  switch (mode_)
472  {
473  case RequestMode::Single:
474  return "Single";
475  case RequestMode::Buffer:
476  return "Buffer";
477  case RequestMode::Window:
478  return "Window";
479  case RequestMode::Ignored:
480  return "Ignored";
481  }
482 
483  return "ERROR";
484 }
485 
487 {
488  data_thread_running_ = true;
489  while (!force_stop_)
490  {
491  if (!isHardwareOK_)
492  {
493  TLOG(TLVL_DEBUG) << "getDataLoop: isHardwareOK is " << isHardwareOK_ << ", aborting data thread" ;
494  data_thread_running_ = false;
495  return;
496  }
497 
498  TLOG(13) << "getDataLoop: calling getNext_" ;
499 
500  bool data = false;
501  auto startdata = std::chrono::steady_clock::now();
502 
503  try
504  {
505  data = getNext_(newDataBuffer_);
506  }
507  catch (...)
508  {
509  ExceptionHandler(ExceptionHandlerRethrow::no,
510  "Exception thrown by fragment generator in CommandableFragmentGenerator::getDataLoop; setting exception state to \"true\"");
511  set_exception(true);
512 
513  data_thread_running_ = false;
514  return;
515  }
516 
517  if (metricMan)
518  {
519  metricMan->sendMetric("Avg Data Acquisition Time", TimeUtils::GetElapsedTime(startdata), "s", 3, artdaq::MetricMode::Average);
520  }
521 
522  if (newDataBuffer_.size() == 0 && sleep_on_no_data_us_ > 0)
523  {
524  usleep(sleep_on_no_data_us_);
525  }
526 
527  TLOG(15) << "Waiting for data buffer ready" ;
528  if (!waitForDataBufferReady()) return;
529  TLOG(15) << "Done waiting for data buffer ready" ;
530 
531  TLOG(13) << "getDataLoop: processing data" ;
532  if (data && !force_stop_)
533  {
534  std::unique_lock<std::mutex> lock(dataBufferMutex_);
535  switch (mode_)
536  {
537  case RequestMode::Single:
538  // While here, if for some strange reason more than one event's worth of data is returned from getNext_...
539  while (newDataBuffer_.size() >= fragment_ids_.size())
540  {
541  dataBuffer_.clear();
542  auto it = newDataBuffer_.begin();
543  std::advance(it, fragment_ids_.size());
544  dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_, newDataBuffer_.begin(), it);
545  }
546  break;
547  case RequestMode::Buffer:
548  case RequestMode::Ignored:
549  case RequestMode::Window:
550  default:
551  //dataBuffer_.reserve(dataBuffer_.size() + newDataBuffer_.size());
552  dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_);
553  break;
554  }
555  getDataBufferStats();
556  }
557 
558  {
559  std::unique_lock<std::mutex> lock(dataBufferMutex_);
560  if (dataBuffer_.size() > 0)
561  {
562  dataCondition_.notify_all();
563  }
564  }
565  if (!data || force_stop_)
566  {
567  TLOG(TLVL_INFO) << "Data flow has stopped. Ending data collection thread" ;
568  data_thread_running_ = false;
569  if (requestReceiver_) requestReceiver_->ClearRequests();
570  dataBuffer_.clear();
571  newDataBuffer_.clear();
572  return;
573  }
574  }
575 }
576 
578 {
579  auto startwait = std::chrono::steady_clock::now();
580  auto first = true;
581  auto lastwaittime = 0ULL;
582  while (dataBufferIsTooLarge())
583  {
584  if (should_stop())
585  {
586  TLOG(TLVL_DEBUG) << "Run ended while waiting for buffer to shrink!" ;
587  std::unique_lock<std::mutex> lock(dataBufferMutex_);
588  getDataBufferStats();
589  dataCondition_.notify_all();
590  data_thread_running_ = false;
591  return false;
592  }
593  auto waittime = TimeUtils::GetElapsedTimeMilliseconds(startwait);
594 
595  if (first || (waittime != lastwaittime && waittime % 1000 == 0))
596  {
597  TLOG(TLVL_WARNING) << "Bad Omen: Data Buffer has exceeded its size limits. "
598  << "(seq_id=" << ev_counter()
599  << ", frags=" << dataBufferDepthFragments_ << "/" << maxDataBufferDepthFragments_
600  << ", szB=" << dataBufferDepthBytes_ << "/" << maxDataBufferDepthBytes_ << ")" ;
601  TLOG(TLVL_TRACE) << "Bad Omen: Possible causes include requests not getting through or Ignored-mode BR issues" ;
602  first = false;
603  }
604  if (waittime % 5 && waittime != lastwaittime)
605  {
606  TLOG(13) << "getDataLoop: Data Retreival paused for " << std::to_string(waittime) << " ms waiting for data buffer to drain" ;
607  }
608  lastwaittime = waittime;
609  usleep(1000);
610  }
611  return true;
612 }
613 
615 {
616  return (maxDataBufferDepthFragments_ > 0 && dataBufferDepthFragments_ > maxDataBufferDepthFragments_) || (maxDataBufferDepthBytes_ > 0 && dataBufferDepthBytes_ > maxDataBufferDepthBytes_);
617 }
618 
620 {
622  dataBufferDepthFragments_ = dataBuffer_.size();
623  size_t acc = 0;
624  TLOG(15) << "getDataBufferStats: Calculating buffer size" ;
625  for (auto i = dataBuffer_.begin(); i != dataBuffer_.end(); ++i)
626  {
627  if (i->get() != nullptr)
628  {
629  acc += (*i)->sizeBytes();
630  }
631  }
632  dataBufferDepthBytes_ = acc;
633 
634  if (metricMan)
635  {
636  TLOG(15) << "getDataBufferStats: Sending Metrics" ;
637  metricMan->sendMetric("Buffer Depth Fragments", dataBufferDepthFragments_.load(), "fragments", 1, MetricMode::LastPoint);
638  metricMan->sendMetric("Buffer Depth Bytes", dataBufferDepthBytes_.load(), "bytes", 1, MetricMode::LastPoint);
639  }
640  TLOG(15) << "getDataBufferStats: frags=" << dataBufferDepthFragments_.load() << "/" << maxDataBufferDepthFragments_
641  << ", sz=" << std::to_string(dataBufferDepthBytes_.load()) << "/" << std::to_string(maxDataBufferDepthBytes_) ;
642 }
643 
645 {
646  std::unique_lock<std::mutex> lock(dataBufferMutex_);
647  dataCondition_.wait_for(lock, std::chrono::milliseconds(10));
648  if (dataBufferDepthFragments_ > 0)
649  {
650  if ((mode_ == RequestMode::Buffer || mode_ == RequestMode::Window))
651  {
652  // Eliminate extra fragments
653  while (dataBufferIsTooLarge())
654  {
655  dataBuffer_.erase(dataBuffer_.begin());
656  getDataBufferStats();
657  }
658  if (dataBuffer_.size() > 0)
659  {
660  TLOG(17) << "Determining if Fragments can be dropped from data buffer" ;
661  Fragment::timestamp_t last = dataBuffer_.back()->timestamp();
662  Fragment::timestamp_t min = last > staleTimeout_ ? last - staleTimeout_ : 0;
663  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
664  {
665  if ((*it)->timestamp() < min)
666  {
667  it = dataBuffer_.erase(it);
668  }
669  else
670  {
671  ++it;
672  }
673  }
674  getDataBufferStats();
675  }
676  }
677  else if (mode_ == RequestMode::Single && dataBuffer_.size() > fragment_ids_.size())
678  {
679  // Eliminate extra fragments
680  while (dataBuffer_.size() > fragment_ids_.size())
681  {
682  dataBuffer_.erase(dataBuffer_.begin());
683  }
684  }
685  }
686 }
687 
689 {
690  while (!force_stop_)
691  {
692  if (should_stop() || monitoringInterval_ <= 0)
693  {
694  TLOG(TLVL_DEBUG) << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
695  << " and monitoringInterval is " << monitoringInterval_ << ", returning" ;
696  return;
697  }
698  TLOG(12) << "getMonitoringDataLoop: Determining whether to call checkHWStatus_" ;
699 
700  auto now = std::chrono::steady_clock::now();
701  if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
702  {
703  isHardwareOK_ = checkHWStatus_();
704  TLOG(12) << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ ;
705  lastMonitoringCall_ = now;
706  }
707  usleep(monitoringInterval_ / 10);
708  }
709 }
710 
712 {
713  // We just copy everything that's here into the output.
714  TLOG(9) << "Mode is Ignored; Copying data to output" ;
715  std::move(dataBuffer_.begin(), dataBuffer_.end(), std::inserter(frags, frags.end()));
716  dataBuffer_.clear();
717 }
718 
720 {
721  // We only care about the latest request received. Send empties for all others.
722  auto requests = requestReceiver_->GetRequests();
723  while (requests.size() > 1) {
724  // std::map is ordered by key => Last sequence ID in the map is the one we care about
725  requestReceiver_->RemoveRequest(requests.begin()->first);
726  requests.erase(requests.begin());
727  }
728  sendEmptyFragments(frags, requests);
729 
730  // If no requests remain after sendEmptyFragments, return
731  if (requests.size() == 0 || !requests.count(ev_counter())) return;
732 
733  if (dataBuffer_.size() > 0)
734  {
735  TLOG(9) << "Mode is Single; Sending copy of last event" ;
736  for (auto& fragptr : dataBuffer_)
737  {
738  // Return the latest data point
739  auto frag = fragptr.get();
740  auto newfrag = std::unique_ptr<artdaq::Fragment>(new Fragment(ev_counter(), frag->fragmentID()));
741  newfrag->resize(frag->size() - detail::RawFragmentHeader::num_words());
742  memcpy(newfrag->headerAddress(), frag->headerAddress(), frag->sizeBytes());
743  newfrag->setTimestamp(requests[ev_counter()]);
744  newfrag->setSequenceID(ev_counter());
745  frags.push_back(std::move(newfrag));
746  }
747  }
748  else
749  {
750  sendEmptyFragment(frags, ev_counter(), "No data for");
751  }
752  requestReceiver_->RemoveRequest(ev_counter());
753  ev_counter_inc(1, true);
754 }
755 
757 {
758  // We only care about the latest request received. Send empties for all others.
759  auto requests = requestReceiver_->GetRequests();
760  while (requests.size() > 1) {
761  // std::map is ordered by key => Last sequence ID in the map is the one we care about
762  requestReceiver_->RemoveRequest(requests.begin()->first);
763  requests.erase(requests.begin());
764  }
765  sendEmptyFragments(frags, requests);
766 
767  // If no requests remain after sendEmptyFragments, return
768  if (requests.size() == 0 || !requests.count(ev_counter())) return;
769 
770  TLOG(TLVL_DEBUG) << "Creating ContainerFragment for Buffered Fragments" ;
771  frags.emplace_back(new artdaq::Fragment(ev_counter(), fragment_id()));
772  frags.back()->setTimestamp(requests[ev_counter()]);
773  ContainerFragmentLoader cfl(*frags.back());
774  cfl.set_missing_data(false); // Buffer mode is never missing data, even if there IS no data.
775 
776  // Buffer mode TFGs should simply copy out the whole dataBuffer_ into a ContainerFragment
777  // Window mode TFGs must do a little bit more work to decide which fragments to send for a given request
778  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
779  {
780  TLOG(9) << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" ;
781  cfl.addFragment(*it);
782  it = dataBuffer_.erase(it);
783  }
784  requestReceiver_->RemoveRequest(ev_counter());
785  ev_counter_inc(1, true);
786 }
787 
789 {
790  TLOG(10) << "applyRequestsWindowMode BEGIN";
791  if (!last_window_send_time_set_)
792  {
793  last_window_send_time_ = std::chrono::steady_clock::now();
794  last_window_send_time_set_ = true;
795  }
796 
797  auto requests = requestReceiver_->GetRequests();
798  bool now_have_desired_request = std::any_of(requests.begin(), requests.end(),
799  [this](decltype(requests)::value_type& request) {
800  return request.first == ev_counter(); });
801 
802  if (missing_request_)
803  {
804  if (!now_have_desired_request && TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) > missing_request_window_timeout_us_)
805  {
806  TLOG(TLVL_ERROR) << "Data-taking has paused for " << TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) << " us "
807  << "(> " << std::to_string(missing_request_window_timeout_us_) << " us) while waiting for missing data request messages."
808  << " Sending Empty Fragments for missing requests!" ;
809  sendEmptyFragments(frags, requests);
810 
811  missing_request_ = false;
812  missing_request_time_ = decltype(missing_request_time_)::max();
813  }
814  else if (now_have_desired_request) {
815  missing_request_ = false;
816  missing_request_time_ = decltype(missing_request_time_)::max();
817  }
818  }
819 
820  TLOG(10) << "applyRequestsWindowMode: Starting request processing";
821  for (auto req = requests.begin(); req != requests.end();)
822  {
823  TLOG(10, "CommandableFragmentGenerator") << "applyRequestsWindowMode: processing request with sequence ID " << \
824  req->first << ", timestamp " << req->second;
825 
826 
827  while (req->first < ev_counter() && requests.size() > 0)
828  {
829  TLOG(10) << "applyRequestsWindowMode: Clearing passed request for sequence ID " << req->first;
830  requestReceiver_->RemoveRequest(req->first);
831  req = requests.erase(req);
832  }
833  if (requests.size() == 0) break;
834  if (req->first > ev_counter())
835  {
836  if (!missing_request_)
837  {
838  missing_request_ = true;
839  missing_request_time_ = std::chrono::steady_clock::now();
840  }
841  }
842  auto ts = req->second;
843  TLOG(9) << "ApplyRequests: Checking that data exists for request window " << std::to_string(req->first) ;
844  Fragment::timestamp_t min = ts > windowOffset_ ? ts - windowOffset_ : 0;
845  Fragment::timestamp_t max = min + windowWidth_;
846  TLOG(9) << "ApplyRequests: min is " << std::to_string(min) << ", max is " << std::to_string(max)
847  << " and last point in buffer is " << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0)) << " (sz=" << std::to_string(dataBuffer_.size()) << ")" ;
848  bool windowClosed = dataBuffer_.size() > 0 && dataBuffer_.back()->timestamp() >= max;
849  bool windowTimeout = TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) > window_close_timeout_us_;
850  if (windowTimeout)
851  {
852  TLOG(TLVL_WARNING) << "A timeout occurred waiting for data to close the request window (max=" << std::to_string(max)
853  << ", buffer=" << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0))
854  << " (if no buffer in memory, this is shown as a 0)). Time waiting: "
855  << TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) << " us "
856  << "(> " << std::to_string(window_close_timeout_us_) << " us)." ;
857 
858  if (missing_request_) {
859  TLOG(TLVL_ERROR) << "A Window timeout has occurred while there are pending requests. Sending empties." ;
860  sendEmptyFragments(frags, requests);
861  }
862  }
863  if (windowClosed || !data_thread_running_ || windowTimeout)
864  {
865  TLOG(TLVL_DEBUG) << "Creating ContainerFragment for Buffered or Window-requested Fragments" ;
866  frags.emplace_back(new artdaq::Fragment(req->first, fragment_id()));
867  frags.back()->setTimestamp(ts);
868  ContainerFragmentLoader cfl(*frags.back());
869 
870  if (!windowClosed) cfl.set_missing_data(true);
871  if (dataBuffer_.size() > 0 && dataBuffer_.front()->timestamp() > min)
872  {
873  TLOG(TLVL_DEBUG) << "Request Window covers data that is either before data collection began or has fallen off the end of the buffer" ;
874  cfl.set_missing_data(true);
875  }
876 
877  // Buffer mode TFGs should simply copy out the whole dataBuffer_ into a ContainerFragment
878  // Window mode TFGs must do a little bit more work to decide which fragments to send for a given request
879  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
880  {
881  Fragment::timestamp_t fragT = (*it)->timestamp();
882  if (fragT < min || fragT > max || (fragT == max && windowWidth_ > 0))
883  {
884  ++it;
885  continue;
886  }
887 
888  TLOG(9) << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" ;
889  cfl.addFragment(*it);
890 
891  if (uniqueWindows_)
892  {
893  it = dataBuffer_.erase(it);
894  }
895  else
896  {
897  ++it;
898  }
899  }
900  if (req->first == ev_counter())
901  {
902  ev_counter_inc(1, true);
903  while (windows_sent_ooo_.count(ev_counter()))
904  {
905  TLOG(9) << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" ;
906  windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
907  ev_counter_inc(1, true);
908  }
909  }
910  else
911  {
912  windows_sent_ooo_.insert(req->first);
913  }
914  requestReceiver_->RemoveRequest(req->first);
915  req = requests.erase(req);
916  last_window_send_time_ = std::chrono::steady_clock::now();
917  }
918  else
919  {
920  ++req;
921  }
922  }
923 }
924 
926 {
927  if (check_stop() || exception())
928  {
929  return false;
930  }
931 
932  // Wait for data, if in ignored mode, or a request otherwise
933  if (mode_ == RequestMode::Ignored)
934  {
935  while (dataBufferDepthFragments_ <= 0)
936  {
937  if (check_stop() || exception() || !isHardwareOK_) return false;
938  std::unique_lock<std::mutex> lock(dataBufferMutex_);
939  dataCondition_.wait_for(lock, std::chrono::milliseconds(10), [this]() { return dataBufferDepthFragments_ > 0; });
940  }
941  }
942  else
943  {
944  if ((check_stop() && requestReceiver_->size() == 0) || exception()) return false;
945  checkDataBuffer();
946 
947  // Wait up to 1000 ms for a request...
948  auto counter = 0;
949 
950  while (requestReceiver_->size() == 0 && counter < 100)
951  {
952  if (check_stop() || exception()) return false;
953 
954  checkDataBuffer();
955 
956  requestReceiver_->WaitForRequests(10); // milliseconds
957  counter++;
958  }
959  }
960 
961  {
962  std::unique_lock<std::mutex> dlk(dataBufferMutex_);
963 
964  switch (mode_)
965  {
966  case RequestMode::Single:
967  applyRequestsSingleMode(frags);
968  break;
969  case RequestMode::Window:
970  applyRequestsWindowMode(frags);
971  break;
972  case RequestMode::Buffer:
973  applyRequestsBufferMode(frags);
974  break;
975  case RequestMode::Ignored:
976  default:
977  applyRequestsIgnoredMode(frags);
978  break;
979  }
980 
981  getDataBufferStats();
982  }
983 
984  if (frags.size() > 0)
985  TLOG(9) << "Finished Processing Event " << std::to_string(ev_counter() + 1) << " for fragment_id " << fragment_id() << "." ;
986  return true;
987 }
988 
989 bool artdaq::CommandableFragmentGenerator::sendEmptyFragment(artdaq::FragmentPtrs& frags, size_t seqId, std::string desc)
990 {
991  TLOG(TLVL_WARNING) << desc << " sequence ID " << seqId << ", sending empty fragment" ;
992  for (auto fid : fragment_ids_)
993  {
994  auto frag = new Fragment();
995  frag->setSequenceID(seqId);
996  frag->setFragmentID(fid);
997  frag->setSystemType(Fragment::EmptyFragmentType);
998  frags.emplace_back(FragmentPtr(frag));
999  }
1000  return true;
1001 }
1002 
1003 void artdaq::CommandableFragmentGenerator::sendEmptyFragments(artdaq::FragmentPtrs& frags, std::map<Fragment::sequence_id_t, Fragment::timestamp_t>& requests)
1004 {
1005  if (requests.size() == 0 && windows_sent_ooo_.size() == 0) return;
1006 
1007  if (requests.size() > 0) {
1008  TLOG(19) << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << requests.begin()->first ;
1009  while (requests.begin()->first > ev_counter())
1010  {
1011  sendEmptyFragment(frags, ev_counter(), "Missed request for");
1012  ev_counter_inc(1, true);
1013  }
1014  }
1015  else if (windows_sent_ooo_.size() > 0)
1016  {
1017  TLOG(19) << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << *windows_sent_ooo_.begin() ;
1018  while (*windows_sent_ooo_.begin() > ev_counter())
1019  {
1020  sendEmptyFragment(frags, ev_counter(), "Missed request for");
1021  ev_counter_inc(1, true);
1022  }
1023  }
1024  while (windows_sent_ooo_.count(ev_counter()))
1025  {
1026  TLOG(19) << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" ;
1027  windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
1028  ev_counter_inc(1, true);
1029  }
1030 }
int fragment_id() const
Get the current Fragment ID, if there is only one.
void applyRequestsSingleMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Single. Precondition: dataBufferMutex_ and reques...
virtual ~CommandableFragmentGenerator()
CommandableFragmentGenerator Destructor.
void applyRequestsBufferMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Buffer. Precondition: dataBufferMutex_ and reques...
bool sendEmptyFragment(FragmentPtrs &frags, size_t sequenceId, std::string desc)
Send an EmptyFragmentType Fragment.
void getMonitoringDataLoop()
This function regularly calls checkHWStatus_(), and sets the isHardwareOK flag accordingly.
void startDataThread()
Function that launches the data thread (getDataLoop())
std::string ReportCmd(std::string const &which="")
Get a report about a user-specified run-time quantity.
virtual bool metaCommand(std::string const &command, std::string const &arg)
The meta-command is used for implementing user-specific commands in a CommandableFragmentGenerator.
bool dataBufferIsTooLarge()
Test the configured constraints on the data buffer.
void StopCmd(uint64_t timeout, uint64_t timestamp)
Stop the CommandableFragmentGenerator.
void applyRequestsWindowMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Window. Precondition: dataBufferMutex_ and reques...
void StartCmd(int run, uint64_t timeout, uint64_t timestamp)
Start the CommandableFragmentGenerator.
bool check_stop()
Routine used by applyRequests to make sure that all outstanding requests have been fulfilled before r...
void ResumeCmd(uint64_t timeout, uint64_t timestamp)
Resume the CommandableFragmentGenerator.
CommandableFragmentGenerator()
CommandableFragmentGenerator default constructor.
bool getNext(FragmentPtrs &output) overridefinal
getNext calls either applyRequests or getNext_ to get any data that is ready to be sent to the EventB...
bool waitForDataBufferReady()
Wait for the data buffer to drain (dataBufferIsTooLarge returns false), periodically reporting status...
size_t ev_counter_inc(size_t step=1, bool force=false)
Increment the event counter, if the current RequestMode allows it.
void applyRequestsIgnoredMode(artdaq::FragmentPtrs &frags)
Create fragments using data buffer for request mode Ignored. Precondition: dataBufferMutex_ and reque...
void PauseCmd(uint64_t timeout, uint64_t timestamp)
Pause the CommandableFragmentGenerator.
void getDataLoop()
When separate_data_thread is set to true, this loop repeatedly calls getNext_ and adds returned Fragm...
void sendEmptyFragments(FragmentPtrs &frags, std::map< Fragment::sequence_id_t, Fragment::timestamp_t > &requests)
This function is for Buffered and Single request modes, as they can only respond to one data request ...
void startMonitoringThread()
Function that launches the monitoring thread (getMonitoringDataLoop())
void checkDataBuffer()
Perform data buffer pruning operations. If the RequestMode is Single, removes all but the latest Frag...
std::string printMode_()
Return the string representation of the current RequestMode.
void getDataBufferStats()
Calculate the size of the dataBuffer and report appropriate metrics.
bool applyRequests(FragmentPtrs &output)
See if any requests have been received, and add the corresponding data Fragment objects to the output...
void joinThreads()
Join any data-taking threads. Should be called when destructing CommandableFragmentGenerator.