artdaq  v3_00_01
BoardReaderCore.cc
1 #define TRACE_NAME "BoardReaderCore"
2 #include "tracemf.h"
3 #include "artdaq/Application/TaskType.hh"
4 #include "artdaq/Application/BoardReaderCore.hh"
5 #include "artdaq-core/Data/Fragment.hh"
6 #include "artdaq-core/Utilities/ExceptionHandler.hh"
7 #include "artdaq/Application/makeCommandableFragmentGenerator.hh"
8 #include "canvas/Utilities/Exception.h"
9 #include "cetlib/exception.h"
10 #include <pthread.h>
11 #include <sched.h>
12 #include <algorithm>
13 
14 const std::string artdaq::BoardReaderCore::
15 FRAGMENTS_PROCESSED_STAT_KEY("BoardReaderCoreFragmentsProcessed");
16 const std::string artdaq::BoardReaderCore::
17 INPUT_WAIT_STAT_KEY("BoardReaderCoreInputWaitTime");
18 const std::string artdaq::BoardReaderCore::
19 BRSYNC_WAIT_STAT_KEY("BoardReaderCoreBRSyncWaitTime");
20 const std::string artdaq::BoardReaderCore::
21 OUTPUT_WAIT_STAT_KEY("BoardReaderCoreOutputWaitTime");
22 const std::string artdaq::BoardReaderCore::
23 FRAGMENTS_PER_READ_STAT_KEY("BoardReaderCoreFragmentsPerRead");
24 
25 std::unique_ptr<artdaq::DataSenderManager> artdaq::BoardReaderCore::sender_ptr_ = nullptr;
26 
28  int rank, std::string name) :
29  parent_application_(parent_application)
30  /*, local_group_comm_(local_group_comm)*/
31  , generator_ptr_(nullptr)
32  , name_(name)
33  , stop_requested_(false)
34  , pause_requested_(false)
35 {
36  TLOG_DEBUG(name_) << "Constructor" << TLOG_ENDL;
42  metricMan = &metricMan_;
43  my_rank = rank;
44 }
45 
47 {
48  TLOG_DEBUG(name_) << "Destructor" << TLOG_ENDL;
49 }
50 
51 bool artdaq::BoardReaderCore::initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
52 {
53  TLOG_DEBUG(name_) << "initialize method called with " << "ParameterSet = \"" << pset.to_string() << "\"." << TLOG_ENDL;
54 
55  // pull out the relevant parts of the ParameterSet
56  fhicl::ParameterSet daq_pset;
57  try
58  {
59  daq_pset = pset.get<fhicl::ParameterSet>("daq");
60  }
61  catch (...)
62  {
63  TLOG_ERROR(name_)
64  << "Unable to find the DAQ parameters in the initialization "
65  << "ParameterSet: \"" + pset.to_string() + "\"." << TLOG_ENDL;
66  return false;
67  }
68  fhicl::ParameterSet fr_pset;
69  try
70  {
71  fr_pset = daq_pset.get<fhicl::ParameterSet>("fragment_receiver");
72  data_pset_ = fr_pset;
73  }
74  catch (...)
75  {
76  TLOG_ERROR(name_)
77  << "Unable to find the fragment_receiver parameters in the DAQ "
78  << "initialization ParameterSet: \"" + daq_pset.to_string() + "\"." << TLOG_ENDL;
79  return false;
80  }
81 
82  // pull out the Metric part of the ParameterSet
83  fhicl::ParameterSet metric_pset;
84  try
85  {
86  metric_pset = daq_pset.get<fhicl::ParameterSet>("metrics");
87  }
88  catch (...) {} // OK if there's no metrics table defined in the FHiCL
89 
90  if (metric_pset.is_empty())
91  {
92  TLOG_INFO(name_) << "No metric plugins appear to be defined" << TLOG_ENDL;
93  }
94  try
95  {
96  metricMan_.initialize(metric_pset, name_);
97  }
98  catch (...)
99  {
100  ExceptionHandler(ExceptionHandlerRethrow::no,
101  "Error loading metrics in BoardReaderCore::initialize()");
102  }
103 
104  // create the requested CommandableFragmentGenerator
105  std::string frag_gen_name = fr_pset.get<std::string>("generator", "");
106  if (frag_gen_name.length() == 0)
107  {
108  TLOG_ERROR(name_)
109  << "No fragment generator (parameter name = \"generator\") was "
110  << "specified in the fragment_receiver ParameterSet. The "
111  << "DAQ initialization PSet was \"" << daq_pset.to_string() << "\"." << TLOG_ENDL;
112  return false;
113  }
114 
115  try
116  {
117  generator_ptr_ = artdaq::makeCommandableFragmentGenerator(frag_gen_name, fr_pset);
118  }
119  catch (...)
120  {
121  std::stringstream exception_string;
122  exception_string << "Exception thrown during initialization of fragment generator of type \""
123  << frag_gen_name << "\"";
124 
125  ExceptionHandler(ExceptionHandlerRethrow::no, exception_string.str());
126 
127  TLOG_DEBUG(name_) << "FHiCL parameter set used to initialize the fragment generator which threw an exception: " << fr_pset.to_string() << TLOG_ENDL;
128 
129  return false;
130  }
131  metricMan_.setPrefix(generator_ptr_->metricsReportingInstanceName());
132 
133  rt_priority_ = fr_pset.get<int>("rt_priority", 0);
134  /* ELF 5/10/2017 Removing in favor of DataReceiverManager source suppression logic
135  mpi_sync_fragment_interval_ = fr_pset.get<int>("mpi_sync_interval", 0);
136  if (mpi_sync_fragment_interval_ > 0)
137  {
138  mpi_sync_wait_threshold_fraction_ = fr_pset.get<double>("mpi_sync_wait_threshold", 0.5);
139  mpi_sync_wait_threshold_count_ = mpi_sync_fragment_interval_ * mpi_sync_wait_threshold_fraction_;
140  if (mpi_sync_wait_threshold_count_ >= mpi_sync_fragment_interval_)
141  {
142  TLOG_WARNING(name_) << "The calculated mpi_sync wait threshold "
143  << "(" << mpi_sync_wait_threshold_count_ << " fragments) "
144  << "is too large, setting it to "
145  << (mpi_sync_fragment_interval_ - 1) << "." << TLOG_ENDL;
146  mpi_sync_wait_threshold_count_ = mpi_sync_fragment_interval_ - 1;
147  }
148  if (mpi_sync_wait_threshold_count_ < 0)
149  {
150  TLOG_WARNING(name_) << "The calculated mpi_sync wait threshold "
151  << "(" << mpi_sync_wait_threshold_count_ << " fragments) "
152  << "is too small, setting it to zero." << TLOG_ENDL;
153  mpi_sync_wait_threshold_count_ = 0;
154  }
155  mpi_sync_wait_interval_usec_ = fr_pset.get<size_t>("mpi_sync_wait_interval_usec", 100);
156  mpi_sync_wait_log_level_ = fr_pset.get<int>("mpi_sync_wait_log_level", 2);
157  mpi_sync_wait_log_interval_sec_ = fr_pset.get<int>("mpi_sync_wait_log_interval_sec", 10);
158  }
159  else
160  {
161  mpi_sync_wait_threshold_fraction_ = 0.0;
162  mpi_sync_wait_threshold_count_ = 0;
163  mpi_sync_wait_interval_usec_ = 1000000;
164  mpi_sync_wait_log_level_ = 0;
165  mpi_sync_wait_log_interval_sec_ = 10;
166  }
167  TLOG_DEBUG(name_)
168  << "mpi_sync_fragment_interval is " << mpi_sync_fragment_interval_
169  << ", mpi_sync_wait_threshold_fraction is " << mpi_sync_wait_threshold_fraction_
170  << ", mpi_sync_wait_threshold_count is " << mpi_sync_wait_threshold_count_
171  << ", mpi_sync_wait_interval_usec is " << mpi_sync_wait_interval_usec_
172  << ", mpi_sync_wait_log_level is " << mpi_sync_wait_log_level_
173  << ", mpi_sync_wait_log_interval_sec is " << mpi_sync_wait_log_interval_sec_ << TLOG_ENDL;
174  */
175  // fetch the monitoring parameters and create the MonitoredQuantity instances
176  statsHelper_.createCollectors(fr_pset, 100, 30.0, 60.0, FRAGMENTS_PROCESSED_STAT_KEY);
177 
178  // check if we should skip the sequence ID test...
179  skip_seqId_test_ = (generator_ptr_->fragmentIDs().size() > 1);
180 
181  return true;
182 }
183 
184 bool artdaq::BoardReaderCore::start(art::RunID id, uint64_t timeout, uint64_t timestamp)
185 {
186  stop_requested_.store(false);
187  pause_requested_.store(false);
188 
189  fragment_count_ = 0;
190  prev_seq_id_ = 0;
191  statsHelper_.resetStatistics();
192 
193  metricMan_.do_start();
194  generator_ptr_->StartCmd(id.run(), timeout, timestamp);
195  run_id_ = id;
196 
197  TLOG_DEBUG(name_) << "Started run " << run_id_.run() <<
198  ", timeout = " << timeout << ", timestamp = " << timestamp << TLOG_ENDL;
199  return true;
200 }
201 
202 bool artdaq::BoardReaderCore::stop(uint64_t timeout, uint64_t timestamp)
203 {
204  TLOG_DEBUG(name_) << "Stopping run " << run_id_.run()
205  << " after " << fragment_count_
206  << " fragments." << TLOG_ENDL;
207  stop_requested_.store(true);
208  generator_ptr_->StopCmd(timeout, timestamp);
209  return true;
210 }
211 
212 bool artdaq::BoardReaderCore::pause(uint64_t timeout, uint64_t timestamp)
213 {
214  TLOG_DEBUG(name_) << "Pausing run " << run_id_.run()
215  << " after " << fragment_count_
216  << " fragments." << TLOG_ENDL;
217  pause_requested_.store(true);
218  generator_ptr_->PauseCmd(timeout, timestamp);
219  return true;
220 }
221 
222 bool artdaq::BoardReaderCore::resume(uint64_t timeout, uint64_t timestamp)
223 {
224  TLOG_DEBUG(name_) << "Resuming run " << run_id_.run() << TLOG_ENDL;
225  pause_requested_.store(false);
226  metricMan_.do_start();
227  generator_ptr_->ResumeCmd(timeout, timestamp);
228  return true;
229 }
230 
232 {
233  generator_ptr_->joinThreads(); // Cleanly shut down the CommandableFragmentGenerator
234  generator_ptr_.reset(nullptr);
235  metricMan_.shutdown();
236  return true;
237 }
238 
239 bool artdaq::BoardReaderCore::soft_initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
240 {
241  TLOG_DEBUG(name_) << "soft_initialize method called with "
242  << "ParameterSet = \"" << pset.to_string()
243  << "\"." << TLOG_ENDL;
244  return true;
245 }
246 
247 bool artdaq::BoardReaderCore::reinitialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
248 {
249  TLOG_DEBUG(name_) << "reinitialize method called with "
250  << "ParameterSet = \"" << pset.to_string()
251  << "\"." << TLOG_ENDL;
252  return true;
253 }
254 
256 {
257  if (rt_priority_ > 0)
258  {
259 #pragma GCC diagnostic push
260 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
261  sched_param s_param = {};
262  s_param.sched_priority = rt_priority_;
263  if (pthread_setschedparam(pthread_self(), SCHED_RR, &s_param))
264  TLOG_WARNING(name_) << "setting realtime priority failed" << TLOG_ENDL;
265 #pragma GCC diagnostic pop
266  }
267 
268  // try-catch block here?
269 
270  // how to turn RT PRI off?
271  if (rt_priority_ > 0)
272  {
273 #pragma GCC diagnostic push
274 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
275  sched_param s_param = {};
276  s_param.sched_priority = rt_priority_;
277  int status = pthread_setschedparam(pthread_self(), SCHED_RR, &s_param);
278  if (status != 0)
279  {
280  TLOG_ERROR(name_)
281  << "Failed to set realtime priority to " << rt_priority_
282  << ", return code = " << status << TLOG_ENDL;
283  }
284 #pragma GCC diagnostic pop
285  }
286 
287  TLOG_DEBUG(name_) << "Initializing DataSenderManager. my_rank=" << my_rank << TLOG_ENDL;
288  sender_ptr_.reset(new artdaq::DataSenderManager(data_pset_));
289 
290  //MPI_Barrier(local_group_comm_);
291 
292  TLOG_DEBUG(name_) << "Waiting for first fragment." << TLOG_ENDL;
293  artdaq::MonitoredQuantityStats::TIME_POINT_T startTime;
294  double delta_time;
295  artdaq::FragmentPtrs frags;
296  bool active = true;
297  //MPI_Request mpi_request;
298  //bool barrier_is_pending = false;
299  while (active)
300  {
301  startTime = artdaq::MonitoredQuantity::getCurrentTime();
302 
303  TRACE(18, name_ + "::process_fragments getNext start");
304  active = generator_ptr_->getNext(frags);
305  TRACE(18, name_ + "::process_fragments getNext done (active=%i)", active);
306  // 08-May-2015, KAB & JCF: if the generator getNext() method returns false
307  // (which indicates that the data flow has stopped) *and* the reason that
308  // it has stopped is because there was an exception that wasn't handled by
309  // the experiment-specific FragmentGenerator class, we move to the
310  // InRunError state so that external observers (e.g. RunControl or
311  // DAQInterface) can see that there was a problem.
312  if (!active && generator_ptr_->exception())
313  {
314  parent_application_.in_run_failure();
315  }
316 
317  delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
318  statsHelper_.addSample(INPUT_WAIT_STAT_KEY, delta_time);
319 
320  TLOG_ARB(16,name_) << "process_fragments INPUT_WAIT="<<std::to_string( delta_time) << TLOG_ENDL;
321 
322  if (!active) { break; }
323  statsHelper_.addSample(FRAGMENTS_PER_READ_STAT_KEY, frags.size());
324 
325  for (auto& fragPtr : frags)
326  {
327  if (!fragPtr.get())
328  {
329  TLOG_WARNING(name_) << "Encountered a bad fragment pointer in fragment " << fragment_count_ << ". "
330  << "This is most likely caused by a problem with the Fragment Generator!" << TLOG_ENDL;
331  continue;
332  }
333  artdaq::Fragment::sequence_id_t sequence_id = fragPtr->sequenceID();
334  statsHelper_.addSample(FRAGMENTS_PROCESSED_STAT_KEY, fragPtr->size());
335 
336  if ((fragment_count_ % 250) == 0)
337  {
338  TLOG_DEBUG(name_)
339  << "Sending fragment " << fragment_count_
340  << " (%250) with sequence id " << sequence_id << "." << TLOG_ENDL;
341  }
342 
343  /* ELF 5/10/2017 Removing in favor of DataReceiverManager source suppression logic
344  startTime = artdaq::MonitoredQuantity::getCurrentTime();
345  // 10-Sep-2015, KAB - added non-blocking synchronization between
346  // BoardReader processes. Ibarrier is called every N fragments
347  // by each BoardReader, but each BR is allowed to continue processing
348  // fragments until a specified threshold of additional fragments is
349  // reached. Once that threshold is reached, and one or more of the
350  // other BoardReaders haven't called Ibarrier, we wait.
351  if (mpi_sync_fragment_interval_ > 0 && fragment_count_ > 0 &&
352  (fragment_count_ % mpi_sync_fragment_interval_) == 0)
353  {
354  TLOG_ARB(4, "BoardReaderCore: Entering MPI Barrier");
355  MPI_Ibarrier(local_group_comm_, &mpi_request);
356  barrier_is_pending = true;
357  }
358  if (barrier_is_pending)
359  {
360  MPI_Status mpi_status;
361  int test_flag;
362  int retcode = MPI_Test(&mpi_request, &test_flag, &mpi_status);
363  if (retcode != MPI_SUCCESS)
364  {
365  TLOG_ERROR(name_)
366  << "MPI_Test for Ibarrier completion failed with return code "
367  << retcode << TLOG_ENDL;
368  }
369 
370  if (test_flag != 0)
371  {
372  barrier_is_pending = false;
373  }
374  else
375  {
376  int tmpVal = (fragment_count_ % mpi_sync_fragment_interval_);
377  if (tmpVal >= mpi_sync_wait_threshold_count_)
378  {
379  int report_interval = mpi_sync_wait_log_interval_sec_;
380  time_t last_report_time = time(0);
381  while (test_flag == 0 && !stop_requested_.load())
382  {
383  usleep(mpi_sync_wait_interval_usec_);
384  retcode = MPI_Test(&mpi_request, &test_flag, &mpi_status);
385  if (retcode != MPI_SUCCESS || test_flag == 0)
386  {
387  time_t now = time(0);
388  if ((now - last_report_time) >= report_interval)
389  {
390  if (retcode != MPI_SUCCESS)
391  {
392  TLOG_ERROR(name_)
393  << "MPI_Test for Ibarrier completion failed with return code "
394  << retcode << TLOG_ENDL;
395  }
396  else
397  {
398  if (mpi_sync_wait_log_level_ == 2)
399  {
400  TLOG_WARNING(name_)
401  << "Waiting for one or more BoardReaders to catch up "
402  << "so that the sending of data fragments is reasonably "
403  << "well synchronized (fragment count is currently "
404  << fragment_count_
405  << "). If this situation persists, it may indicate that "
406  << "the data flow from one or more BoardReaders has "
407  << "stopped, possibly because of a problem reading out "
408  << "the associated hardware component(s)." << TLOG_ENDL;
409  }
410  else if (mpi_sync_wait_log_level_ == 3)
411  {
412  TLOG_ERROR(name_)
413  << "Waiting for one or more BoardReaders to catch up "
414  << "so that the sending of data fragments is reasonably "
415  << "well synchronized (fragment count is currently "
416  << fragment_count_
417  << "). If this situation persists, it may indicate that "
418  << "the data flow from one or more BoardReaders has "
419  << "stopped, possibly because of a problem reading out "
420  << "the associated hardware component(s)." << TLOG_ENDL;
421  }
422  }
423  last_report_time = now;
424  report_interval += mpi_sync_wait_log_interval_sec_;
425  }
426  }
427  }
428  if (test_flag != 0)
429  {
430  barrier_is_pending = false;
431  }
432  }
433  }
434  }
435  statsHelper_.addSample(BRSYNC_WAIT_STAT_KEY,
436  artdaq::MonitoredQuantity::getCurrentTime() - startTime);
437  */
438 
439  // check for continous sequence IDs
440  if (!skip_seqId_test_ && abs(sequence_id - prev_seq_id_) > 1)
441  {
442  TLOG_WARNING(name_)
443  << "Missing sequence IDs: current sequence ID = "
444  << sequence_id << ", previous sequence ID = "
445  << prev_seq_id_ << "." << TLOG_ENDL;
446  }
447  prev_seq_id_ = sequence_id;
448 
449  startTime = artdaq::MonitoredQuantity::getCurrentTime();
450  TLOG_ARB(17,name_) << "process_fragments seq="<< std::to_string(sequence_id) << " sendFragment start" << TLOG_ENDL;
451  auto res = sender_ptr_->sendFragment(std::move(*fragPtr));
452  TLOG_ARB(17, name_) << "process_fragments seq=" << std::to_string(sequence_id) << " sendFragment done (res="<< res<<")"<<TLOG_ENDL;
453  ++fragment_count_;
454  statsHelper_.addSample(OUTPUT_WAIT_STAT_KEY,
455  artdaq::MonitoredQuantity::getCurrentTime() - startTime);
456 
457  bool readyToReport = statsHelper_.readyToReport(fragment_count_);
458  if (readyToReport)
459  {
460  std::string statString = buildStatisticsString_();
461  TLOG_DEBUG(name_) << statString << TLOG_ENDL;
462  }
463  if (fragment_count_ == 1 || readyToReport)
464  {
465  TLOG_DEBUG(name_)
466  << "Sending fragment " << fragment_count_
467  << " with sequence id " << sequence_id << "." << TLOG_ENDL;
468  }
469  }
470  if (statsHelper_.statsRollingWindowHasMoved()) { sendMetrics_(); }
471  frags.clear();
472  }
473 
474  // 07-Feb-2013, KAB
475  // removing this barrier so that we can stop the trigger (V1495)
476  // generation and readout before stopping the readout of the other cards
477  //MPI_Barrier(local_group_comm_);
478 
479  // 11-May-2015, KAB: call MetricManager::do_stop whenever we exit the
480  // processing fragments loop so that metrics correctly go to zero when
481  // there is no data flowing
482  metricMan_.do_stop();
483 
484  sender_ptr_.reset(nullptr);
485 }
486 
487 std::string artdaq::BoardReaderCore::report(std::string const& which) const
488 {
489  std::string resultString;
490 
491  // pass the request to the FragmentGenerator instance, if it's available
492  if (generator_ptr_.get() != 0)
493  {
494  resultString = generator_ptr_->ReportCmd(which);
495  if (resultString.length() > 0) { return resultString; }
496  }
497 
498  // handle the request at this level, if we can
499  // --> nothing here yet
500 
501  // if we haven't been able to come up with any report so far, say so
502  std::string tmpString = name_ + " run number = ";
503  tmpString.append(boost::lexical_cast<std::string>(run_id_.run()));
504  tmpString.append(". Command=\"" + which + "\" is not currently supported.");
505  return tmpString;
506 }
507 
508 std::string artdaq::BoardReaderCore::buildStatisticsString_()
509 {
510  std::ostringstream oss;
511  oss << name_ << " statistics:" << std::endl;
512 
513  double fragmentCount = 1.0;
514  artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
515  getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
516  if (mqPtr.get() != 0)
517  {
518  artdaq::MonitoredQuantityStats stats;
519  mqPtr->getStats(stats);
520  oss << " Fragment statistics: "
521  << stats.recentSampleCount << " fragments received at "
522  << stats.recentSampleRate << " fragments/sec, effective data rate = "
523  << (stats.recentValueRate * sizeof(artdaq::RawDataType)
524  / 1024.0 / 1024.0) << " MB/sec, monitor window = "
525  << stats.recentDuration << " sec, min::max event size = "
526  << (stats.recentValueMin * sizeof(artdaq::RawDataType)
527  / 1024.0 / 1024.0)
528  << "::"
529  << (stats.recentValueMax * sizeof(artdaq::RawDataType)
530  / 1024.0 / 1024.0)
531  << " MB" << std::endl;
532  fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
533  oss << " Average times per fragment: ";
534  if (stats.recentSampleRate > 0.0)
535  {
536  oss << " elapsed time = "
537  << (1.0 / stats.recentSampleRate) << " sec";
538  }
539  }
540 
541  // 31-Dec-2014, KAB - Just a reminder that using "fragmentCount" in the
542  // denominator of the calculations below is important because the way that
543  // the accumulation of these statistics is done is not fragment-by-fragment
544  // but read-by-read (where each read can contain multiple fragments).
545  // 29-Aug-2016, KAB - BRSYNC_WAIT and OUTPUT_WAIT are now done fragment-by-
546  // fragment, but we'll leave the calculation the same. (The alternative
547  // would be to use recentValueAverage().)
548 
549  mqPtr = artdaq::StatisticsCollection::getInstance().
550  getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
551  if (mqPtr.get() != 0)
552  {
553  oss << ", input wait time = "
554  << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
555  }
556 
557  mqPtr = artdaq::StatisticsCollection::getInstance().
558  getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
559  if (mqPtr.get() != 0)
560  {
561  oss << ", BRsync wait time = "
562  << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
563  }
564 
565  mqPtr = artdaq::StatisticsCollection::getInstance().
566  getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
567  if (mqPtr.get() != 0)
568  {
569  oss << ", output wait time = "
570  << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
571  }
572 
573  oss << std::endl << " Fragments per read: ";
574  mqPtr = artdaq::StatisticsCollection::getInstance().
575  getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
576  if (mqPtr.get() != 0)
577  {
578  artdaq::MonitoredQuantityStats stats;
579  mqPtr->getStats(stats);
580  oss << "average = "
581  << stats.recentValueAverage
582  << ", min::max = "
583  << stats.recentValueMin
584  << "::"
585  << stats.recentValueMax;
586  }
587 
588  return oss.str();
589 }
590 
591 void artdaq::BoardReaderCore::sendMetrics_()
592 {
593  //TLOG_DEBUG("BoardReaderCore") << "Sending metrics " << __LINE__ << TLOG_ENDL;
594  double fragmentCount = 1.0;
595  artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
596  getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
597  if (mqPtr.get() != 0)
598  {
599  artdaq::MonitoredQuantityStats stats;
600  mqPtr->getStats(stats);
601  fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
602  metricMan_.sendMetric("Fragment Count", static_cast<unsigned long>(stats.fullSampleCount), "fragments", 1, MetricMode::Accumulate);
603  metricMan_.sendMetric("Fragment Rate", stats.recentSampleRate, "fragments/sec", 1, MetricMode::Average);
604  metricMan_.sendMetric("Average Fragment Size", (stats.recentValueAverage * sizeof(artdaq::RawDataType)), "bytes/fragment", 2, MetricMode::Average);
605  metricMan_.sendMetric("Data Rate", (stats.recentValueRate * sizeof(artdaq::RawDataType)), "bytes/sec", 2, MetricMode::Average);
606  }
607 
608  // 31-Dec-2014, KAB - Just a reminder that using "fragmentCount" in the
609  // denominator of the calculations below is important because the way that
610  // the accumulation of these statistics is done is not fragment-by-fragment
611  // but read-by-read (where each read can contain multiple fragments).
612  // 29-Aug-2016, KAB - BRSYNC_WAIT and OUTPUT_WAIT are now done fragment-by-
613  // fragment, but we'll leave the calculation the same. (The alternative
614  // would be to use recentValueAverage().)
615 
616  mqPtr = artdaq::StatisticsCollection::getInstance().
617  getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
618  if (mqPtr.get() != 0)
619  {
620  metricMan_.sendMetric("Avg Input Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
621  }
622 
623  mqPtr = artdaq::StatisticsCollection::getInstance().
624  getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
625  if (mqPtr.get() != 0)
626  {
627  metricMan_.sendMetric("Avg BoardReader Sync Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
628  }
629 
630  mqPtr = artdaq::StatisticsCollection::getInstance().
631  getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
632  if (mqPtr.get() != 0)
633  {
634  metricMan_.sendMetric("Avg Output Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
635  }
636 
637  mqPtr = artdaq::StatisticsCollection::getInstance().
638  getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
639  if (mqPtr.get() != 0)
640  {
641  metricMan_.sendMetric("Avg Frags Per Read", mqPtr->getRecentValueAverage(), "fragments/read", 4, MetricMode::Average);
642  }
643 }
BoardReaderCore(Commandable &parent_application, int rank, std::string name)
BoardReaderCore Constructor.
void addMonitoredQuantityName(std::string const &statKey)
Add a MonitoredQuantity name to the list.
Commandable is the base class for all artdaq components which implement the artdaq state machine...
Definition: Commandable.hh:20
Sends Fragment objects using TransferInterface plugins. Uses Routing Tables if confgiured, otherwise will Round-Robin Fragments to the destinations.
static const std::string FRAGMENTS_PROCESSED_STAT_KEY
Key for the Fragments Processed MonitoredQuantity.
static const std::string INPUT_WAIT_STAT_KEY
Key for the Input Wait MonitoredQuantity.
bool stop(uint64_t timeout, uint64_t timestamp)
Stop the BoardReader, and the CommandableFragmentGenerator.
virtual ~BoardReaderCore()
BoardReaderCore Destructor.
std::unique_ptr< CommandableFragmentGenerator > makeCommandableFragmentGenerator(std::string const &generator_plugin_spec, fhicl::ParameterSet const &ps)
Load a CommandableFragmentGenerator plugin.
bool reinitialize(fhicl::ParameterSet const &pset, uint64_t, uint64_t)
Reinitialize the BoardReader. No-Op.
bool soft_initialize(fhicl::ParameterSet const &pset, uint64_t, uint64_t)
Soft-Initialize the BoardReader. No-Op.
static const std::string BRSYNC_WAIT_STAT_KEY
Key for the Sync Wait MonitoredQuantity.
static const std::string FRAGMENTS_PER_READ_STAT_KEY
Key for the Fragments Per Read MonitoredQuantity.
static const std::string OUTPUT_WAIT_STAT_KEY
Key for the Output Wait MonitoredQuantity.
bool initialize(fhicl::ParameterSet const &pset, uint64_t, uint64_t)
Initialize the BoardReaderCore.
std::string report(std::string const &which) const
Send a report on a given run-time quantity.
void process_fragments()
Main working loop of the BoardReaderCore.
bool start(art::RunID id, uint64_t timeout, uint64_t timestamp)
Start the BoardReader, and the CommandableFragmentGenerator.
bool resume(uint64_t timeout, uint64_t timestamp)
Resume the BoardReader, and the CommandableFragmentGenerator.
bool pause(uint64_t timeout, uint64_t timestamp)
Pause the BoardReader, and the CommandableFragmentGenerator.
bool shutdown(uint64_t)
Shutdown the BoardReader, and the CommandableFragmentGenerator.