artdaq  v3_00_02
BoardReaderCore.cc
1 #define TRACE_NAME "BoardReaderCore"
2 #include "tracemf.h"
3 #include "artdaq/Application/TaskType.hh"
4 #include "artdaq/Application/BoardReaderCore.hh"
5 #include "artdaq-core/Data/Fragment.hh"
6 #include "artdaq-core/Utilities/ExceptionHandler.hh"
7 #include "artdaq/Application/makeCommandableFragmentGenerator.hh"
8 #include "canvas/Utilities/Exception.h"
9 #include "cetlib/exception.h"
10 #include <pthread.h>
11 #include <sched.h>
12 #include <algorithm>
13 
14 const std::string artdaq::BoardReaderCore::
15 FRAGMENTS_PROCESSED_STAT_KEY("BoardReaderCoreFragmentsProcessed");
16 const std::string artdaq::BoardReaderCore::
17 INPUT_WAIT_STAT_KEY("BoardReaderCoreInputWaitTime");
18 const std::string artdaq::BoardReaderCore::
19 BRSYNC_WAIT_STAT_KEY("BoardReaderCoreBRSyncWaitTime");
20 const std::string artdaq::BoardReaderCore::
21 OUTPUT_WAIT_STAT_KEY("BoardReaderCoreOutputWaitTime");
22 const std::string artdaq::BoardReaderCore::
23 FRAGMENTS_PER_READ_STAT_KEY("BoardReaderCoreFragmentsPerRead");
24 
25 std::unique_ptr<artdaq::DataSenderManager> artdaq::BoardReaderCore::sender_ptr_ = nullptr;
26 
28  parent_application_(parent_application)
29  /*, local_group_comm_(local_group_comm)*/
30  , generator_ptr_(nullptr)
31  , stop_requested_(false)
32  , pause_requested_(false)
33 {
34  TLOG_DEBUG(app_name) << "Constructor" << TLOG_ENDL;
40  metricMan = &metricMan_;
41 }
42 
44 {
45  TLOG_DEBUG(app_name) << "Destructor" << TLOG_ENDL;
46 }
47 
48 bool artdaq::BoardReaderCore::initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
49 {
50  TLOG_DEBUG(app_name) << "initialize method called with " << "ParameterSet = \"" << pset.to_string() << "\"." << TLOG_ENDL;
51 
52  // pull out the relevant parts of the ParameterSet
53  fhicl::ParameterSet daq_pset;
54  try
55  {
56  daq_pset = pset.get<fhicl::ParameterSet>("daq");
57  }
58  catch (...)
59  {
60  TLOG_ERROR(app_name)
61  << "Unable to find the DAQ parameters in the initialization "
62  << "ParameterSet: \"" + pset.to_string() + "\"." << TLOG_ENDL;
63  return false;
64  }
65  fhicl::ParameterSet fr_pset;
66  try
67  {
68  fr_pset = daq_pset.get<fhicl::ParameterSet>("fragment_receiver");
69  data_pset_ = fr_pset;
70  }
71  catch (...)
72  {
73  TLOG_ERROR(app_name)
74  << "Unable to find the fragment_receiver parameters in the DAQ "
75  << "initialization ParameterSet: \"" + daq_pset.to_string() + "\"." << TLOG_ENDL;
76  return false;
77  }
78 
79  // pull out the Metric part of the ParameterSet
80  fhicl::ParameterSet metric_pset;
81  try
82  {
83  metric_pset = daq_pset.get<fhicl::ParameterSet>("metrics");
84  }
85  catch (...) {} // OK if there's no metrics table defined in the FHiCL
86 
87  if (metric_pset.is_empty())
88  {
89  TLOG_INFO(app_name) << "No metric plugins appear to be defined" << TLOG_ENDL;
90  }
91  try
92  {
93  metricMan_.initialize(metric_pset, app_name);
94  }
95  catch (...)
96  {
97  ExceptionHandler(ExceptionHandlerRethrow::no,
98  "Error loading metrics in BoardReaderCore::initialize()");
99  }
100 
101  // create the requested CommandableFragmentGenerator
102  std::string frag_gen_name = fr_pset.get<std::string>("generator", "");
103  if (frag_gen_name.length() == 0)
104  {
105  TLOG_ERROR(app_name)
106  << "No fragment generator (parameter name = \"generator\") was "
107  << "specified in the fragment_receiver ParameterSet. The "
108  << "DAQ initialization PSet was \"" << daq_pset.to_string() << "\"." << TLOG_ENDL;
109  return false;
110  }
111 
112  try
113  {
114  generator_ptr_ = artdaq::makeCommandableFragmentGenerator(frag_gen_name, fr_pset);
115  }
116  catch (...)
117  {
118  std::stringstream exception_string;
119  exception_string << "Exception thrown during initialization of fragment generator of type \""
120  << frag_gen_name << "\"";
121 
122  ExceptionHandler(ExceptionHandlerRethrow::no, exception_string.str());
123 
124  TLOG_DEBUG(app_name) << "FHiCL parameter set used to initialize the fragment generator which threw an exception: " << fr_pset.to_string() << TLOG_ENDL;
125 
126  return false;
127  }
128  metricMan_.setPrefix(generator_ptr_->metricsReportingInstanceName());
129 
130  rt_priority_ = fr_pset.get<int>("rt_priority", 0);
131  /* ELF 5/10/2017 Removing in favor of DataReceiverManager source suppression logic
132  mpi_sync_fragment_interval_ = fr_pset.get<int>("mpi_sync_interval", 0);
133  if (mpi_sync_fragment_interval_ > 0)
134  {
135  mpi_sync_wait_threshold_fraction_ = fr_pset.get<double>("mpi_sync_wait_threshold", 0.5);
136  mpi_sync_wait_threshold_count_ = mpi_sync_fragment_interval_ * mpi_sync_wait_threshold_fraction_;
137  if (mpi_sync_wait_threshold_count_ >= mpi_sync_fragment_interval_)
138  {
139  TLOG_WARNING(app_name) << "The calculated mpi_sync wait threshold "
140  << "(" << mpi_sync_wait_threshold_count_ << " fragments) "
141  << "is too large, setting it to "
142  << (mpi_sync_fragment_interval_ - 1) << "." << TLOG_ENDL;
143  mpi_sync_wait_threshold_count_ = mpi_sync_fragment_interval_ - 1;
144  }
145  if (mpi_sync_wait_threshold_count_ < 0)
146  {
147  TLOG_WARNING(app_name) << "The calculated mpi_sync wait threshold "
148  << "(" << mpi_sync_wait_threshold_count_ << " fragments) "
149  << "is too small, setting it to zero." << TLOG_ENDL;
150  mpi_sync_wait_threshold_count_ = 0;
151  }
152  mpi_sync_wait_interval_usec_ = fr_pset.get<size_t>("mpi_sync_wait_interval_usec", 100);
153  mpi_sync_wait_log_level_ = fr_pset.get<int>("mpi_sync_wait_log_level", 2);
154  mpi_sync_wait_log_interval_sec_ = fr_pset.get<int>("mpi_sync_wait_log_interval_sec", 10);
155  }
156  else
157  {
158  mpi_sync_wait_threshold_fraction_ = 0.0;
159  mpi_sync_wait_threshold_count_ = 0;
160  mpi_sync_wait_interval_usec_ = 1000000;
161  mpi_sync_wait_log_level_ = 0;
162  mpi_sync_wait_log_interval_sec_ = 10;
163  }
164  TLOG_DEBUG(app_name)
165  << "mpi_sync_fragment_interval is " << mpi_sync_fragment_interval_
166  << ", mpi_sync_wait_threshold_fraction is " << mpi_sync_wait_threshold_fraction_
167  << ", mpi_sync_wait_threshold_count is " << mpi_sync_wait_threshold_count_
168  << ", mpi_sync_wait_interval_usec is " << mpi_sync_wait_interval_usec_
169  << ", mpi_sync_wait_log_level is " << mpi_sync_wait_log_level_
170  << ", mpi_sync_wait_log_interval_sec is " << mpi_sync_wait_log_interval_sec_ << TLOG_ENDL;
171  */
172  // fetch the monitoring parameters and create the MonitoredQuantity instances
173  statsHelper_.createCollectors(fr_pset, 100, 30.0, 60.0, FRAGMENTS_PROCESSED_STAT_KEY);
174 
175  // check if we should skip the sequence ID test...
176  skip_seqId_test_ = (generator_ptr_->fragmentIDs().size() > 1);
177 
178  return true;
179 }
180 
181 bool artdaq::BoardReaderCore::start(art::RunID id, uint64_t timeout, uint64_t timestamp)
182 {
183  stop_requested_.store(false);
184  pause_requested_.store(false);
185 
186  fragment_count_ = 0;
187  prev_seq_id_ = 0;
188  statsHelper_.resetStatistics();
189 
190  metricMan_.do_start();
191  generator_ptr_->StartCmd(id.run(), timeout, timestamp);
192  run_id_ = id;
193 
194  TLOG_DEBUG(app_name) << "Started run " << run_id_.run() <<
195  ", timeout = " << timeout << ", timestamp = " << timestamp << TLOG_ENDL;
196  return true;
197 }
198 
199 bool artdaq::BoardReaderCore::stop(uint64_t timeout, uint64_t timestamp)
200 {
201  TLOG_DEBUG(app_name) << "Stopping run " << run_id_.run()
202  << " after " << fragment_count_
203  << " fragments." << TLOG_ENDL;
204  stop_requested_.store(true);
205  generator_ptr_->StopCmd(timeout, timestamp);
206  return true;
207 }
208 
209 bool artdaq::BoardReaderCore::pause(uint64_t timeout, uint64_t timestamp)
210 {
211  TLOG_DEBUG(app_name) << "Pausing run " << run_id_.run()
212  << " after " << fragment_count_
213  << " fragments." << TLOG_ENDL;
214  pause_requested_.store(true);
215  generator_ptr_->PauseCmd(timeout, timestamp);
216  return true;
217 }
218 
219 bool artdaq::BoardReaderCore::resume(uint64_t timeout, uint64_t timestamp)
220 {
221  TLOG_DEBUG(app_name) << "Resuming run " << run_id_.run() << TLOG_ENDL;
222  pause_requested_.store(false);
223  metricMan_.do_start();
224  generator_ptr_->ResumeCmd(timeout, timestamp);
225  return true;
226 }
227 
229 {
230  generator_ptr_->joinThreads(); // Cleanly shut down the CommandableFragmentGenerator
231  generator_ptr_.reset(nullptr);
232  metricMan_.shutdown();
233  return true;
234 }
235 
236 bool artdaq::BoardReaderCore::soft_initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
237 {
238  TLOG_DEBUG(app_name) << "soft_initialize method called with "
239  << "ParameterSet = \"" << pset.to_string()
240  << "\"." << TLOG_ENDL;
241  return true;
242 }
243 
244 bool artdaq::BoardReaderCore::reinitialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
245 {
246  TLOG_DEBUG(app_name) << "reinitialize method called with "
247  << "ParameterSet = \"" << pset.to_string()
248  << "\"." << TLOG_ENDL;
249  return true;
250 }
251 
253 {
254  if (rt_priority_ > 0)
255  {
256 #pragma GCC diagnostic push
257 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
258  sched_param s_param = {};
259  s_param.sched_priority = rt_priority_;
260  if (pthread_setschedparam(pthread_self(), SCHED_RR, &s_param))
261  TLOG_WARNING(app_name) << "setting realtime priority failed" << TLOG_ENDL;
262 #pragma GCC diagnostic pop
263  }
264 
265  // try-catch block here?
266 
267  // how to turn RT PRI off?
268  if (rt_priority_ > 0)
269  {
270 #pragma GCC diagnostic push
271 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
272  sched_param s_param = {};
273  s_param.sched_priority = rt_priority_;
274  int status = pthread_setschedparam(pthread_self(), SCHED_RR, &s_param);
275  if (status != 0)
276  {
277  TLOG_ERROR(app_name)
278  << "Failed to set realtime priority to " << rt_priority_
279  << ", return code = " << status << TLOG_ENDL;
280  }
281 #pragma GCC diagnostic pop
282  }
283 
284  TLOG_DEBUG(app_name) << "Initializing DataSenderManager. my_rank=" << my_rank << TLOG_ENDL;
285  sender_ptr_.reset(new artdaq::DataSenderManager(data_pset_));
286 
287  //MPI_Barrier(local_group_comm_);
288 
289  TLOG_DEBUG(app_name) << "Waiting for first fragment." << TLOG_ENDL;
290  artdaq::MonitoredQuantityStats::TIME_POINT_T startTime;
291  double delta_time;
292  artdaq::FragmentPtrs frags;
293  bool active = true;
294  //MPI_Request mpi_request;
295  //bool barrier_is_pending = false;
296  while (active)
297  {
298  startTime = artdaq::MonitoredQuantity::getCurrentTime();
299 
300  TRACE(18, app_name + "::process_fragments getNext start");
301  active = generator_ptr_->getNext(frags);
302  TRACE(18, app_name + "::process_fragments getNext done (active=%i)", active);
303  // 08-May-2015, KAB & JCF: if the generator getNext() method returns false
304  // (which indicates that the data flow has stopped) *and* the reason that
305  // it has stopped is because there was an exception that wasn't handled by
306  // the experiment-specific FragmentGenerator class, we move to the
307  // InRunError state so that external observers (e.g. RunControl or
308  // DAQInterface) can see that there was a problem.
309  if (!active && generator_ptr_->exception())
310  {
311  parent_application_.in_run_failure();
312  }
313 
314  delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
315  statsHelper_.addSample(INPUT_WAIT_STAT_KEY, delta_time);
316 
317  TLOG_ARB(16,app_name) << "process_fragments INPUT_WAIT="<<std::to_string( delta_time) << TLOG_ENDL;
318 
319  if (!active) { break; }
320  statsHelper_.addSample(FRAGMENTS_PER_READ_STAT_KEY, frags.size());
321 
322  for (auto& fragPtr : frags)
323  {
324  if (!fragPtr.get())
325  {
326  TLOG_WARNING(app_name) << "Encountered a bad fragment pointer in fragment " << fragment_count_ << ". "
327  << "This is most likely caused by a problem with the Fragment Generator!" << TLOG_ENDL;
328  continue;
329  }
330  artdaq::Fragment::sequence_id_t sequence_id = fragPtr->sequenceID();
331  statsHelper_.addSample(FRAGMENTS_PROCESSED_STAT_KEY, fragPtr->size());
332 
333  if ((fragment_count_ % 250) == 0)
334  {
335  TLOG_DEBUG(app_name)
336  << "Sending fragment " << fragment_count_
337  << " (%250) with sequence id " << sequence_id << "." << TLOG_ENDL;
338  }
339 
340  /* ELF 5/10/2017 Removing in favor of DataReceiverManager source suppression logic
341  startTime = artdaq::MonitoredQuantity::getCurrentTime();
342  // 10-Sep-2015, KAB - added non-blocking synchronization between
343  // BoardReader processes. Ibarrier is called every N fragments
344  // by each BoardReader, but each BR is allowed to continue processing
345  // fragments until a specified threshold of additional fragments is
346  // reached. Once that threshold is reached, and one or more of the
347  // other BoardReaders haven't called Ibarrier, we wait.
348  if (mpi_sync_fragment_interval_ > 0 && fragment_count_ > 0 &&
349  (fragment_count_ % mpi_sync_fragment_interval_) == 0)
350  {
351  TLOG_ARB(4, "BoardReaderCore: Entering MPI Barrier");
352  MPI_Ibarrier(local_group_comm_, &mpi_request);
353  barrier_is_pending = true;
354  }
355  if (barrier_is_pending)
356  {
357  MPI_Status mpi_status;
358  int test_flag;
359  int retcode = MPI_Test(&mpi_request, &test_flag, &mpi_status);
360  if (retcode != MPI_SUCCESS)
361  {
362  TLOG_ERROR(app_name)
363  << "MPI_Test for Ibarrier completion failed with return code "
364  << retcode << TLOG_ENDL;
365  }
366 
367  if (test_flag != 0)
368  {
369  barrier_is_pending = false;
370  }
371  else
372  {
373  int tmpVal = (fragment_count_ % mpi_sync_fragment_interval_);
374  if (tmpVal >= mpi_sync_wait_threshold_count_)
375  {
376  int report_interval = mpi_sync_wait_log_interval_sec_;
377  time_t last_report_time = time(0);
378  while (test_flag == 0 && !stop_requested_.load())
379  {
380  usleep(mpi_sync_wait_interval_usec_);
381  retcode = MPI_Test(&mpi_request, &test_flag, &mpi_status);
382  if (retcode != MPI_SUCCESS || test_flag == 0)
383  {
384  time_t now = time(0);
385  if ((now - last_report_time) >= report_interval)
386  {
387  if (retcode != MPI_SUCCESS)
388  {
389  TLOG_ERROR(app_name)
390  << "MPI_Test for Ibarrier completion failed with return code "
391  << retcode << TLOG_ENDL;
392  }
393  else
394  {
395  if (mpi_sync_wait_log_level_ == 2)
396  {
397  TLOG_WARNING(app_name)
398  << "Waiting for one or more BoardReaders to catch up "
399  << "so that the sending of data fragments is reasonably "
400  << "well synchronized (fragment count is currently "
401  << fragment_count_
402  << "). If this situation persists, it may indicate that "
403  << "the data flow from one or more BoardReaders has "
404  << "stopped, possibly because of a problem reading out "
405  << "the associated hardware component(s)." << TLOG_ENDL;
406  }
407  else if (mpi_sync_wait_log_level_ == 3)
408  {
409  TLOG_ERROR(app_name)
410  << "Waiting for one or more BoardReaders to catch up "
411  << "so that the sending of data fragments is reasonably "
412  << "well synchronized (fragment count is currently "
413  << fragment_count_
414  << "). If this situation persists, it may indicate that "
415  << "the data flow from one or more BoardReaders has "
416  << "stopped, possibly because of a problem reading out "
417  << "the associated hardware component(s)." << TLOG_ENDL;
418  }
419  }
420  last_report_time = now;
421  report_interval += mpi_sync_wait_log_interval_sec_;
422  }
423  }
424  }
425  if (test_flag != 0)
426  {
427  barrier_is_pending = false;
428  }
429  }
430  }
431  }
432  statsHelper_.addSample(BRSYNC_WAIT_STAT_KEY,
433  artdaq::MonitoredQuantity::getCurrentTime() - startTime);
434  */
435 
436  // check for continous sequence IDs
437  if (!skip_seqId_test_ && abs(sequence_id - prev_seq_id_) > 1)
438  {
439  TLOG_WARNING(app_name)
440  << "Missing sequence IDs: current sequence ID = "
441  << sequence_id << ", previous sequence ID = "
442  << prev_seq_id_ << "." << TLOG_ENDL;
443  }
444  prev_seq_id_ = sequence_id;
445 
446  startTime = artdaq::MonitoredQuantity::getCurrentTime();
447  TLOG_ARB(17,app_name) << "process_fragments seq="<< std::to_string(sequence_id) << " sendFragment start" << TLOG_ENDL;
448  auto res = sender_ptr_->sendFragment(std::move(*fragPtr));
449  TLOG_ARB(17, app_name) << "process_fragments seq=" << std::to_string(sequence_id) << " sendFragment done (res="<< res<<")"<<TLOG_ENDL;
450  ++fragment_count_;
451  statsHelper_.addSample(OUTPUT_WAIT_STAT_KEY,
452  artdaq::MonitoredQuantity::getCurrentTime() - startTime);
453 
454  bool readyToReport = statsHelper_.readyToReport(fragment_count_);
455  if (readyToReport)
456  {
457  std::string statString = buildStatisticsString_();
458  TLOG_DEBUG(app_name) << statString << TLOG_ENDL;
459  }
460  if (fragment_count_ == 1 || readyToReport)
461  {
462  TLOG_DEBUG(app_name)
463  << "Sending fragment " << fragment_count_
464  << " with sequence id " << sequence_id << "." << TLOG_ENDL;
465  }
466  }
467  if (statsHelper_.statsRollingWindowHasMoved()) { sendMetrics_(); }
468  frags.clear();
469  }
470 
471  // 07-Feb-2013, KAB
472  // removing this barrier so that we can stop the trigger (V1495)
473  // generation and readout before stopping the readout of the other cards
474  //MPI_Barrier(local_group_comm_);
475 
476  // 11-May-2015, KAB: call MetricManager::do_stop whenever we exit the
477  // processing fragments loop so that metrics correctly go to zero when
478  // there is no data flowing
479  metricMan_.do_stop();
480 
481  sender_ptr_.reset(nullptr);
482 }
483 
484 std::string artdaq::BoardReaderCore::report(std::string const& which) const
485 {
486  std::string resultString;
487 
488  // pass the request to the FragmentGenerator instance, if it's available
489  if (generator_ptr_.get() != 0)
490  {
491  resultString = generator_ptr_->ReportCmd(which);
492  if (resultString.length() > 0) { return resultString; }
493  }
494 
495  // handle the request at this level, if we can
496  // --> nothing here yet
497 
498  // if we haven't been able to come up with any report so far, say so
499  std::string tmpString = app_name + " run number = ";
500  tmpString.append(boost::lexical_cast<std::string>(run_id_.run()));
501  tmpString.append(". Command=\"" + which + "\" is not currently supported.");
502  return tmpString;
503 }
504 
505 std::string artdaq::BoardReaderCore::buildStatisticsString_()
506 {
507  std::ostringstream oss;
508  oss << app_name << " statistics:" << std::endl;
509 
510  double fragmentCount = 1.0;
511  artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
512  getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
513  if (mqPtr.get() != 0)
514  {
515  artdaq::MonitoredQuantityStats stats;
516  mqPtr->getStats(stats);
517  oss << " Fragment statistics: "
518  << stats.recentSampleCount << " fragments received at "
519  << stats.recentSampleRate << " fragments/sec, effective data rate = "
520  << (stats.recentValueRate * sizeof(artdaq::RawDataType)
521  / 1024.0 / 1024.0) << " MB/sec, monitor window = "
522  << stats.recentDuration << " sec, min::max event size = "
523  << (stats.recentValueMin * sizeof(artdaq::RawDataType)
524  / 1024.0 / 1024.0)
525  << "::"
526  << (stats.recentValueMax * sizeof(artdaq::RawDataType)
527  / 1024.0 / 1024.0)
528  << " MB" << std::endl;
529  fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
530  oss << " Average times per fragment: ";
531  if (stats.recentSampleRate > 0.0)
532  {
533  oss << " elapsed time = "
534  << (1.0 / stats.recentSampleRate) << " sec";
535  }
536  }
537 
538  // 31-Dec-2014, KAB - Just a reminder that using "fragmentCount" in the
539  // denominator of the calculations below is important because the way that
540  // the accumulation of these statistics is done is not fragment-by-fragment
541  // but read-by-read (where each read can contain multiple fragments).
542  // 29-Aug-2016, KAB - BRSYNC_WAIT and OUTPUT_WAIT are now done fragment-by-
543  // fragment, but we'll leave the calculation the same. (The alternative
544  // would be to use recentValueAverage().)
545 
546  mqPtr = artdaq::StatisticsCollection::getInstance().
547  getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
548  if (mqPtr.get() != 0)
549  {
550  oss << ", input wait time = "
551  << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
552  }
553 
554  mqPtr = artdaq::StatisticsCollection::getInstance().
555  getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
556  if (mqPtr.get() != 0)
557  {
558  oss << ", BRsync wait time = "
559  << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
560  }
561 
562  mqPtr = artdaq::StatisticsCollection::getInstance().
563  getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
564  if (mqPtr.get() != 0)
565  {
566  oss << ", output wait time = "
567  << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
568  }
569 
570  oss << std::endl << " Fragments per read: ";
571  mqPtr = artdaq::StatisticsCollection::getInstance().
572  getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
573  if (mqPtr.get() != 0)
574  {
575  artdaq::MonitoredQuantityStats stats;
576  mqPtr->getStats(stats);
577  oss << "average = "
578  << stats.recentValueAverage
579  << ", min::max = "
580  << stats.recentValueMin
581  << "::"
582  << stats.recentValueMax;
583  }
584 
585  return oss.str();
586 }
587 
588 void artdaq::BoardReaderCore::sendMetrics_()
589 {
590  //TLOG_DEBUG("BoardReaderCore") << "Sending metrics " << __LINE__ << TLOG_ENDL;
591  double fragmentCount = 1.0;
592  artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
593  getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
594  if (mqPtr.get() != 0)
595  {
596  artdaq::MonitoredQuantityStats stats;
597  mqPtr->getStats(stats);
598  fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
599  metricMan_.sendMetric("Fragment Count", static_cast<unsigned long>(stats.fullSampleCount), "fragments", 1, MetricMode::Accumulate);
600  metricMan_.sendMetric("Fragment Rate", stats.recentSampleRate, "fragments/sec", 1, MetricMode::Average);
601  metricMan_.sendMetric("Average Fragment Size", (stats.recentValueAverage * sizeof(artdaq::RawDataType)), "bytes/fragment", 2, MetricMode::Average);
602  metricMan_.sendMetric("Data Rate", (stats.recentValueRate * sizeof(artdaq::RawDataType)), "bytes/sec", 2, MetricMode::Average);
603  }
604 
605  // 31-Dec-2014, KAB - Just a reminder that using "fragmentCount" in the
606  // denominator of the calculations below is important because the way that
607  // the accumulation of these statistics is done is not fragment-by-fragment
608  // but read-by-read (where each read can contain multiple fragments).
609  // 29-Aug-2016, KAB - BRSYNC_WAIT and OUTPUT_WAIT are now done fragment-by-
610  // fragment, but we'll leave the calculation the same. (The alternative
611  // would be to use recentValueAverage().)
612 
613  mqPtr = artdaq::StatisticsCollection::getInstance().
614  getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
615  if (mqPtr.get() != 0)
616  {
617  metricMan_.sendMetric("Avg Input Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
618  }
619 
620  mqPtr = artdaq::StatisticsCollection::getInstance().
621  getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
622  if (mqPtr.get() != 0)
623  {
624  metricMan_.sendMetric("Avg BoardReader Sync Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
625  }
626 
627  mqPtr = artdaq::StatisticsCollection::getInstance().
628  getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
629  if (mqPtr.get() != 0)
630  {
631  metricMan_.sendMetric("Avg Output Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
632  }
633 
634  mqPtr = artdaq::StatisticsCollection::getInstance().
635  getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
636  if (mqPtr.get() != 0)
637  {
638  metricMan_.sendMetric("Avg Frags Per Read", mqPtr->getRecentValueAverage(), "fragments/read", 4, MetricMode::Average);
639  }
640 }
void addMonitoredQuantityName(std::string const &statKey)
Add a MonitoredQuantity name to the list.
Commandable is the base class for all artdaq components which implement the artdaq state machine...
Definition: Commandable.hh:20
Sends Fragment objects using TransferInterface plugins. Uses Routing Tables if confgiured, otherwise will Round-Robin Fragments to the destinations.
static const std::string FRAGMENTS_PROCESSED_STAT_KEY
Key for the Fragments Processed MonitoredQuantity.
static const std::string INPUT_WAIT_STAT_KEY
Key for the Input Wait MonitoredQuantity.
bool stop(uint64_t timeout, uint64_t timestamp)
Stop the BoardReader, and the CommandableFragmentGenerator.
virtual ~BoardReaderCore()
BoardReaderCore Destructor.
BoardReaderCore(Commandable &parent_application)
BoardReaderCore Constructor.
std::unique_ptr< CommandableFragmentGenerator > makeCommandableFragmentGenerator(std::string const &generator_plugin_spec, fhicl::ParameterSet const &ps)
Load a CommandableFragmentGenerator plugin.
bool reinitialize(fhicl::ParameterSet const &pset, uint64_t, uint64_t)
Reinitialize the BoardReader. No-Op.
bool soft_initialize(fhicl::ParameterSet const &pset, uint64_t, uint64_t)
Soft-Initialize the BoardReader. No-Op.
static const std::string BRSYNC_WAIT_STAT_KEY
Key for the Sync Wait MonitoredQuantity.
static const std::string FRAGMENTS_PER_READ_STAT_KEY
Key for the Fragments Per Read MonitoredQuantity.
static const std::string OUTPUT_WAIT_STAT_KEY
Key for the Output Wait MonitoredQuantity.
bool initialize(fhicl::ParameterSet const &pset, uint64_t, uint64_t)
Initialize the BoardReaderCore.
std::string report(std::string const &which) const
Send a report on a given run-time quantity.
void process_fragments()
Main working loop of the BoardReaderCore.
bool start(art::RunID id, uint64_t timeout, uint64_t timestamp)
Start the BoardReader, and the CommandableFragmentGenerator.
bool resume(uint64_t timeout, uint64_t timestamp)
Resume the BoardReader, and the CommandableFragmentGenerator.
bool pause(uint64_t timeout, uint64_t timestamp)
Pause the BoardReader, and the CommandableFragmentGenerator.
bool shutdown(uint64_t)
Shutdown the BoardReader, and the CommandableFragmentGenerator.