artdaq  v2_02_03
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Pages
BoardReaderCore.cc
1 #include "artdaq/Application/TaskType.hh"
2 #include "artdaq/Application/BoardReaderCore.hh"
3 #include "artdaq-core/Data/Fragment.hh"
4 #include "artdaq-core/Utilities/ExceptionHandler.hh"
5 #include "artdaq/Application/makeCommandableFragmentGenerator.hh"
6 #include "canvas/Utilities/Exception.h"
7 #include "cetlib/exception.h"
8 #include <pthread.h>
9 #include <sched.h>
10 #include <algorithm>
11 
12 #define TRACE_NAME "BoardReaderCore"
13 
14 const std::string artdaq::BoardReaderCore::
15 FRAGMENTS_PROCESSED_STAT_KEY("BoardReaderCoreFragmentsProcessed");
16 const std::string artdaq::BoardReaderCore::
17 INPUT_WAIT_STAT_KEY("BoardReaderCoreInputWaitTime");
18 const std::string artdaq::BoardReaderCore::
19 BRSYNC_WAIT_STAT_KEY("BoardReaderCoreBRSyncWaitTime");
20 const std::string artdaq::BoardReaderCore::
21 OUTPUT_WAIT_STAT_KEY("BoardReaderCoreOutputWaitTime");
22 const std::string artdaq::BoardReaderCore::
23 FRAGMENTS_PER_READ_STAT_KEY("BoardReaderCoreFragmentsPerRead");
24 
25 std::unique_ptr<artdaq::DataSenderManager> artdaq::BoardReaderCore::sender_ptr_ = nullptr;
26 
28  int rank, std::string name) :
29  parent_application_(parent_application)
30  /*, local_group_comm_(local_group_comm)*/
31  , generator_ptr_(nullptr)
32  , name_(name)
33  , stop_requested_(false)
34  , pause_requested_(false)
35 {
36  TLOG_DEBUG(name_) << "Constructor" << TLOG_ENDL;
42  metricMan = &metricMan_;
43  my_rank = rank;
44 }
45 
47 {
48  TLOG_DEBUG(name_) << "Destructor" << TLOG_ENDL;
49 }
50 
51 bool artdaq::BoardReaderCore::initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
52 {
53  TLOG_DEBUG(name_) << "initialize method called with "
54  << "ParameterSet = \"" << pset.to_string()
55  << "\"." << TLOG_ENDL;
56 
57  // pull out the relevant parts of the ParameterSet
58  fhicl::ParameterSet daq_pset;
59  try
60  {
61  daq_pset = pset.get<fhicl::ParameterSet>("daq");
62  }
63  catch (...)
64  {
65  TLOG_ERROR(name_)
66  << "Unable to find the DAQ parameters in the initialization "
67  << "ParameterSet: \"" + pset.to_string() + "\"." << TLOG_ENDL;
68  return false;
69  }
70  fhicl::ParameterSet fr_pset;
71  try
72  {
73  fr_pset = daq_pset.get<fhicl::ParameterSet>("fragment_receiver");
74  data_pset_ = fr_pset;
75  }
76  catch (...)
77  {
78  TLOG_ERROR(name_)
79  << "Unable to find the fragment_receiver parameters in the DAQ "
80  << "initialization ParameterSet: \"" + daq_pset.to_string() + "\"." << TLOG_ENDL;
81  return false;
82  }
83 
84  // pull out the Metric part of the ParameterSet
85  fhicl::ParameterSet metric_pset;
86  try
87  {
88  metric_pset = daq_pset.get<fhicl::ParameterSet>("metrics");
89  }
90  catch (...) {} // OK if there's no metrics table defined in the FHiCL
91 
92  if (metric_pset.is_empty())
93  {
94  TLOG_INFO(name_) << "No metric plugins appear to be defined" << TLOG_ENDL;
95  }
96  try
97  {
98  metricMan_.initialize(metric_pset, name_);
99  }
100  catch (...)
101  {
102  ExceptionHandler(ExceptionHandlerRethrow::no,
103  "Error loading metrics in BoardReaderCore::initialize()");
104  }
105 
106  // create the requested CommandableFragmentGenerator
107  std::string frag_gen_name = fr_pset.get<std::string>("generator", "");
108  if (frag_gen_name.length() == 0)
109  {
110  TLOG_ERROR(name_)
111  << "No fragment generator (parameter name = \"generator\") was "
112  << "specified in the fragment_receiver ParameterSet. The "
113  << "DAQ initialization PSet was \"" << daq_pset.to_string() << "\"." << TLOG_ENDL;
114  return false;
115  }
116 
117  try
118  {
119  generator_ptr_ = artdaq::makeCommandableFragmentGenerator(frag_gen_name, fr_pset);
120  }
121  catch (...)
122  {
123  std::stringstream exception_string;
124  exception_string << "Exception thrown during initialization of fragment generator of type \""
125  << frag_gen_name << "\"";
126 
127  ExceptionHandler(ExceptionHandlerRethrow::no, exception_string.str());
128 
129  TLOG_DEBUG(name_) << "FHiCL parameter set used to initialize the fragment generator which threw an exception: " << fr_pset.to_string() << TLOG_ENDL;
130 
131  return false;
132  }
133  metricMan_.setPrefix(generator_ptr_->metricsReportingInstanceName());
134 
135  rt_priority_ = fr_pset.get<int>("rt_priority", 0);
136  /* ELF 5/10/2017 Removing in favor of DataReceiverManager source suppression logic
137  mpi_sync_fragment_interval_ = fr_pset.get<int>("mpi_sync_interval", 0);
138  if (mpi_sync_fragment_interval_ > 0)
139  {
140  mpi_sync_wait_threshold_fraction_ = fr_pset.get<double>("mpi_sync_wait_threshold", 0.5);
141  mpi_sync_wait_threshold_count_ = mpi_sync_fragment_interval_ * mpi_sync_wait_threshold_fraction_;
142  if (mpi_sync_wait_threshold_count_ >= mpi_sync_fragment_interval_)
143  {
144  TLOG_WARNING(name_) << "The calculated mpi_sync wait threshold "
145  << "(" << mpi_sync_wait_threshold_count_ << " fragments) "
146  << "is too large, setting it to "
147  << (mpi_sync_fragment_interval_ - 1) << "." << TLOG_ENDL;
148  mpi_sync_wait_threshold_count_ = mpi_sync_fragment_interval_ - 1;
149  }
150  if (mpi_sync_wait_threshold_count_ < 0)
151  {
152  TLOG_WARNING(name_) << "The calculated mpi_sync wait threshold "
153  << "(" << mpi_sync_wait_threshold_count_ << " fragments) "
154  << "is too small, setting it to zero." << TLOG_ENDL;
155  mpi_sync_wait_threshold_count_ = 0;
156  }
157  mpi_sync_wait_interval_usec_ = fr_pset.get<size_t>("mpi_sync_wait_interval_usec", 100);
158  mpi_sync_wait_log_level_ = fr_pset.get<int>("mpi_sync_wait_log_level", 2);
159  mpi_sync_wait_log_interval_sec_ = fr_pset.get<int>("mpi_sync_wait_log_interval_sec", 10);
160  }
161  else
162  {
163  mpi_sync_wait_threshold_fraction_ = 0.0;
164  mpi_sync_wait_threshold_count_ = 0;
165  mpi_sync_wait_interval_usec_ = 1000000;
166  mpi_sync_wait_log_level_ = 0;
167  mpi_sync_wait_log_interval_sec_ = 10;
168  }
169  TLOG_DEBUG(name_)
170  << "mpi_sync_fragment_interval is " << mpi_sync_fragment_interval_
171  << ", mpi_sync_wait_threshold_fraction is " << mpi_sync_wait_threshold_fraction_
172  << ", mpi_sync_wait_threshold_count is " << mpi_sync_wait_threshold_count_
173  << ", mpi_sync_wait_interval_usec is " << mpi_sync_wait_interval_usec_
174  << ", mpi_sync_wait_log_level is " << mpi_sync_wait_log_level_
175  << ", mpi_sync_wait_log_interval_sec is " << mpi_sync_wait_log_interval_sec_ << TLOG_ENDL;
176  */
177  // fetch the monitoring parameters and create the MonitoredQuantity instances
178  statsHelper_.createCollectors(fr_pset, 100, 30.0, 60.0, FRAGMENTS_PROCESSED_STAT_KEY);
179 
180  // check if we should skip the sequence ID test...
181  skip_seqId_test_ = (generator_ptr_->fragmentIDs().size() > 1);
182 
183  return true;
184 }
185 
186 bool artdaq::BoardReaderCore::start(art::RunID id, uint64_t timeout, uint64_t timestamp)
187 {
188  stop_requested_.store(false);
189  pause_requested_.store(false);
190 
191  fragment_count_ = 0;
192  prev_seq_id_ = 0;
193  statsHelper_.resetStatistics();
194 
195  metricMan_.do_start();
196  generator_ptr_->StartCmd(id.run(), timeout, timestamp);
197  run_id_ = id;
198 
199  TLOG_DEBUG(name_) << "Started run " << run_id_.run() <<
200  ", timeout = " << timeout << ", timestamp = " << timestamp << TLOG_ENDL;
201  return true;
202 }
203 
204 bool artdaq::BoardReaderCore::stop(uint64_t timeout, uint64_t timestamp)
205 {
206  TLOG_DEBUG(name_) << "Stopping run " << run_id_.run()
207  << " after " << fragment_count_
208  << " fragments." << TLOG_ENDL;
209  stop_requested_.store(true);
210  generator_ptr_->StopCmd(timeout, timestamp);
211  return true;
212 }
213 
214 bool artdaq::BoardReaderCore::pause(uint64_t timeout, uint64_t timestamp)
215 {
216  TLOG_DEBUG(name_) << "Pausing run " << run_id_.run()
217  << " after " << fragment_count_
218  << " fragments." << TLOG_ENDL;
219  pause_requested_.store(true);
220  generator_ptr_->PauseCmd(timeout, timestamp);
221  return true;
222 }
223 
224 bool artdaq::BoardReaderCore::resume(uint64_t timeout, uint64_t timestamp)
225 {
226  TLOG_DEBUG(name_) << "Resuming run " << run_id_.run() << TLOG_ENDL;
227  pause_requested_.store(false);
228  metricMan_.do_start();
229  generator_ptr_->ResumeCmd(timeout, timestamp);
230  return true;
231 }
232 
234 {
235  generator_ptr_.reset(nullptr);
236  metricMan_.shutdown();
237  return true;
238 }
239 
240 bool artdaq::BoardReaderCore::soft_initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
241 {
242  TLOG_DEBUG(name_) << "soft_initialize method called with "
243  << "ParameterSet = \"" << pset.to_string()
244  << "\"." << TLOG_ENDL;
245  return true;
246 }
247 
248 bool artdaq::BoardReaderCore::reinitialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
249 {
250  TLOG_DEBUG(name_) << "reinitialize method called with "
251  << "ParameterSet = \"" << pset.to_string()
252  << "\"." << TLOG_ENDL;
253  return true;
254 }
255 
257 {
258  if (rt_priority_ > 0)
259  {
260 #pragma GCC diagnostic push
261 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
262  sched_param s_param = {};
263  s_param.sched_priority = rt_priority_;
264  if (pthread_setschedparam(pthread_self(), SCHED_RR, &s_param))
265  TLOG_WARNING(name_) << "setting realtime priority failed" << TLOG_ENDL;
266 #pragma GCC diagnostic pop
267  }
268 
269  // try-catch block here?
270 
271  // how to turn RT PRI off?
272  if (rt_priority_ > 0)
273  {
274 #pragma GCC diagnostic push
275 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
276  sched_param s_param = {};
277  s_param.sched_priority = rt_priority_;
278  int status = pthread_setschedparam(pthread_self(), SCHED_RR, &s_param);
279  if (status != 0)
280  {
281  TLOG_ERROR(name_)
282  << "Failed to set realtime priority to " << rt_priority_
283  << ", return code = " << status << TLOG_ENDL;
284  }
285 #pragma GCC diagnostic pop
286  }
287 
288  TLOG_DEBUG(name_) << "Initializing DataSenderManager. my_rank=" << my_rank << TLOG_ENDL;
289  sender_ptr_.reset(new artdaq::DataSenderManager(data_pset_));
290 
291  //MPI_Barrier(local_group_comm_);
292 
293  TLOG_DEBUG(name_) << "Waiting for first fragment." << TLOG_ENDL;
294  artdaq::MonitoredQuantityStats::TIME_POINT_T startTime;
295  double delta_time;
296  artdaq::FragmentPtrs frags;
297  bool active = true;
298  //MPI_Request mpi_request;
299  //bool barrier_is_pending = false;
300  while (active)
301  {
302  startTime = artdaq::MonitoredQuantity::getCurrentTime();
303 
304  active = generator_ptr_->getNext(frags);
305  // 08-May-2015, KAB & JCF: if the generator getNext() method returns false
306  // (which indicates that the data flow has stopped) *and* the reason that
307  // it has stopped is because there was an exception that wasn't handled by
308  // the experiment-specific FragmentGenerator class, we move to the
309  // InRunError state so that external observers (e.g. RunControl or
310  // DAQInterface) can see that there was a problem.
311  if (!active && generator_ptr_->exception())
312  {
313  parent_application_.in_run_failure();
314  }
315 
316  delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
317  statsHelper_.addSample(INPUT_WAIT_STAT_KEY, delta_time);
318 
319  TRACE(16, "%s::process_fragments INPUT_WAIT=%f", name_.c_str(), delta_time);
320 
321  if (!active) { break; }
322  statsHelper_.addSample(FRAGMENTS_PER_READ_STAT_KEY, frags.size());
323 
324  for (auto& fragPtr : frags)
325  {
326  if (!fragPtr.get())
327  {
328  TLOG_WARNING(name_) << "Encountered a bad fragment pointer in fragment " << fragment_count_ << ". "
329  << "This is most likely caused by a problem with the Fragment Generator!" << TLOG_ENDL;
330  continue;
331  }
332  artdaq::Fragment::sequence_id_t sequence_id = fragPtr->sequenceID();
333  statsHelper_.addSample(FRAGMENTS_PROCESSED_STAT_KEY, fragPtr->size());
334 
335  if ((fragment_count_ % 250) == 0)
336  {
337  TLOG_DEBUG(name_)
338  << "Sending fragment " << fragment_count_
339  << " with sequence id " << sequence_id << "." << TLOG_ENDL;
340  }
341 
342  /* ELF 5/10/2017 Removing in favor of DataReceiverManager source suppression logic
343  startTime = artdaq::MonitoredQuantity::getCurrentTime();
344  // 10-Sep-2015, KAB - added non-blocking synchronization between
345  // BoardReader processes. Ibarrier is called every N fragments
346  // by each BoardReader, but each BR is allowed to continue processing
347  // fragments until a specified threshold of additional fragments is
348  // reached. Once that threshold is reached, and one or more of the
349  // other BoardReaders haven't called Ibarrier, we wait.
350  if (mpi_sync_fragment_interval_ > 0 && fragment_count_ > 0 &&
351  (fragment_count_ % mpi_sync_fragment_interval_) == 0)
352  {
353  TRACE(4, "BoardReaderCore: Entering MPI Barrier");
354  MPI_Ibarrier(local_group_comm_, &mpi_request);
355  barrier_is_pending = true;
356  }
357  if (barrier_is_pending)
358  {
359  MPI_Status mpi_status;
360  int test_flag;
361  int retcode = MPI_Test(&mpi_request, &test_flag, &mpi_status);
362  if (retcode != MPI_SUCCESS)
363  {
364  TLOG_ERROR(name_)
365  << "MPI_Test for Ibarrier completion failed with return code "
366  << retcode << TLOG_ENDL;
367  }
368 
369  if (test_flag != 0)
370  {
371  barrier_is_pending = false;
372  }
373  else
374  {
375  int tmpVal = (fragment_count_ % mpi_sync_fragment_interval_);
376  if (tmpVal >= mpi_sync_wait_threshold_count_)
377  {
378  int report_interval = mpi_sync_wait_log_interval_sec_;
379  time_t last_report_time = time(0);
380  while (test_flag == 0 && !stop_requested_.load())
381  {
382  usleep(mpi_sync_wait_interval_usec_);
383  retcode = MPI_Test(&mpi_request, &test_flag, &mpi_status);
384  if (retcode != MPI_SUCCESS || test_flag == 0)
385  {
386  time_t now = time(0);
387  if ((now - last_report_time) >= report_interval)
388  {
389  if (retcode != MPI_SUCCESS)
390  {
391  TLOG_ERROR(name_)
392  << "MPI_Test for Ibarrier completion failed with return code "
393  << retcode << TLOG_ENDL;
394  }
395  else
396  {
397  if (mpi_sync_wait_log_level_ == 2)
398  {
399  TLOG_WARNING(name_)
400  << "Waiting for one or more BoardReaders to catch up "
401  << "so that the sending of data fragments is reasonably "
402  << "well synchronized (fragment count is currently "
403  << fragment_count_
404  << "). If this situation persists, it may indicate that "
405  << "the data flow from one or more BoardReaders has "
406  << "stopped, possibly because of a problem reading out "
407  << "the associated hardware component(s)." << TLOG_ENDL;
408  }
409  else if (mpi_sync_wait_log_level_ == 3)
410  {
411  TLOG_ERROR(name_)
412  << "Waiting for one or more BoardReaders to catch up "
413  << "so that the sending of data fragments is reasonably "
414  << "well synchronized (fragment count is currently "
415  << fragment_count_
416  << "). If this situation persists, it may indicate that "
417  << "the data flow from one or more BoardReaders has "
418  << "stopped, possibly because of a problem reading out "
419  << "the associated hardware component(s)." << TLOG_ENDL;
420  }
421  }
422  last_report_time = now;
423  report_interval += mpi_sync_wait_log_interval_sec_;
424  }
425  }
426  }
427  if (test_flag != 0)
428  {
429  barrier_is_pending = false;
430  }
431  }
432  }
433  }
434  statsHelper_.addSample(BRSYNC_WAIT_STAT_KEY,
435  artdaq::MonitoredQuantity::getCurrentTime() - startTime);
436  */
437 
438  // check for continous sequence IDs
439  if (!skip_seqId_test_ && abs(sequence_id - prev_seq_id_) > 1)
440  {
441  TLOG_WARNING(name_)
442  << "Missing sequence IDs: current sequence ID = "
443  << sequence_id << ", previous sequence ID = "
444  << prev_seq_id_ << "." << TLOG_ENDL;
445  }
446  prev_seq_id_ = sequence_id;
447 
448  startTime = artdaq::MonitoredQuantity::getCurrentTime();
449  TRACE(17, "%s::process_fragments seq=%lu sendFragment start", name_.c_str(), sequence_id);
450  auto res = sender_ptr_->sendFragment(std::move(*fragPtr));
451  TRACE(17, "%s::process_fragments seq=%lu sendFragment done (res=%i)", name_.c_str(), sequence_id,res);
452  ++fragment_count_;
453  statsHelper_.addSample(OUTPUT_WAIT_STAT_KEY,
454  artdaq::MonitoredQuantity::getCurrentTime() - startTime);
455 
456  bool readyToReport = statsHelper_.readyToReport(fragment_count_);
457  if (readyToReport)
458  {
459  std::string statString = buildStatisticsString_();
460  TLOG_DEBUG(name_) << statString << TLOG_ENDL;
461  }
462  if (fragment_count_ == 1 || readyToReport)
463  {
464  TLOG_DEBUG(name_)
465  << "Sending fragment " << fragment_count_
466  << " with sequence id " << sequence_id << "." << TLOG_ENDL;
467  }
468  }
469  if (statsHelper_.statsRollingWindowHasMoved()) { sendMetrics_(); }
470  frags.clear();
471  }
472 
473  // 07-Feb-2013, KAB
474  // removing this barrier so that we can stop the trigger (V1495)
475  // generation and readout before stopping the readout of the other cards
476  //MPI_Barrier(local_group_comm_);
477 
478  // 11-May-2015, KAB: call MetricManager::do_stop whenever we exit the
479  // processing fragments loop so that metrics correctly go to zero when
480  // there is no data flowing
481  metricMan_.do_stop();
482 
483  sender_ptr_.reset(nullptr);
484  return fragment_count_;
485 }
486 
487 std::string artdaq::BoardReaderCore::report(std::string const& which) const
488 {
489  std::string resultString;
490 
491  // pass the request to the FragmentGenerator instance, if it's available
492  if (generator_ptr_.get() != 0)
493  {
494  resultString = generator_ptr_->ReportCmd(which);
495  if (resultString.length() > 0) { return resultString; }
496  }
497 
498  // handle the request at this level, if we can
499  // --> nothing here yet
500 
501  // if we haven't been able to come up with any report so far, say so
502  std::string tmpString = name_ + " run number = ";
503  tmpString.append(boost::lexical_cast<std::string>(run_id_.run()));
504  tmpString.append(". Command=\"" + which + "\" is not currently supported.");
505  return tmpString;
506 }
507 
508 std::string artdaq::BoardReaderCore::buildStatisticsString_()
509 {
510  std::ostringstream oss;
511  oss << name_ << " statistics:" << std::endl;
512 
513  double fragmentCount = 1.0;
514  artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
515  getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
516  if (mqPtr.get() != 0)
517  {
518  artdaq::MonitoredQuantityStats stats;
519  mqPtr->getStats(stats);
520  oss << " Fragment statistics: "
521  << stats.recentSampleCount << " fragments received at "
522  << stats.recentSampleRate << " fragments/sec, effective data rate = "
523  << (stats.recentValueRate * sizeof(artdaq::RawDataType)
524  / 1024.0 / 1024.0) << " MB/sec, monitor window = "
525  << stats.recentDuration << " sec, min::max event size = "
526  << (stats.recentValueMin * sizeof(artdaq::RawDataType)
527  / 1024.0 / 1024.0)
528  << "::"
529  << (stats.recentValueMax * sizeof(artdaq::RawDataType)
530  / 1024.0 / 1024.0)
531  << " MB" << std::endl;
532  fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
533  oss << " Average times per fragment: ";
534  if (stats.recentSampleRate > 0.0)
535  {
536  oss << " elapsed time = "
537  << (1.0 / stats.recentSampleRate) << " sec";
538  }
539  }
540 
541  // 31-Dec-2014, KAB - Just a reminder that using "fragmentCount" in the
542  // denominator of the calculations below is important because the way that
543  // the accumulation of these statistics is done is not fragment-by-fragment
544  // but read-by-read (where each read can contain multiple fragments).
545  // 29-Aug-2016, KAB - BRSYNC_WAIT and OUTPUT_WAIT are now done fragment-by-
546  // fragment, but we'll leave the calculation the same. (The alternative
547  // would be to use recentValueAverage().)
548 
549  mqPtr = artdaq::StatisticsCollection::getInstance().
550  getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
551  if (mqPtr.get() != 0)
552  {
553  oss << ", input wait time = "
554  << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
555  }
556 
557  mqPtr = artdaq::StatisticsCollection::getInstance().
558  getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
559  if (mqPtr.get() != 0)
560  {
561  oss << ", BRsync wait time = "
562  << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
563  }
564 
565  mqPtr = artdaq::StatisticsCollection::getInstance().
566  getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
567  if (mqPtr.get() != 0)
568  {
569  oss << ", output wait time = "
570  << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
571  }
572 
573  oss << std::endl << " Fragments per read: ";
574  mqPtr = artdaq::StatisticsCollection::getInstance().
575  getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
576  if (mqPtr.get() != 0)
577  {
578  artdaq::MonitoredQuantityStats stats;
579  mqPtr->getStats(stats);
580  oss << "average = "
581  << stats.recentValueAverage
582  << ", min::max = "
583  << stats.recentValueMin
584  << "::"
585  << stats.recentValueMax;
586  }
587 
588  return oss.str();
589 }
590 
591 void artdaq::BoardReaderCore::sendMetrics_()
592 {
593  //TLOG_DEBUG("BoardReaderCore") << "Sending metrics " << __LINE__ << TLOG_ENDL;
594  double fragmentCount = 1.0;
595  artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
596  getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
597  if (mqPtr.get() != 0)
598  {
599  artdaq::MonitoredQuantityStats stats;
600  mqPtr->getStats(stats);
601  fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
602  metricMan_.sendMetric("Fragment Count",
603  static_cast<unsigned long>(stats.fullSampleCount),
604  "fragments", 1);
605  metricMan_.sendMetric("Fragment Rate",
606  stats.recentSampleRate, "fragments/sec", 1);
607  metricMan_.sendMetric("Average Fragment Size",
608  (stats.recentValueAverage * sizeof(artdaq::RawDataType)
609  ), "bytes/fragment", 2);
610  metricMan_.sendMetric("Data Rate",
611  (stats.recentValueRate * sizeof(artdaq::RawDataType)
612  ), "bytes/sec", 2);
613  }
614 
615  // 31-Dec-2014, KAB - Just a reminder that using "fragmentCount" in the
616  // denominator of the calculations below is important because the way that
617  // the accumulation of these statistics is done is not fragment-by-fragment
618  // but read-by-read (where each read can contain multiple fragments).
619  // 29-Aug-2016, KAB - BRSYNC_WAIT and OUTPUT_WAIT are now done fragment-by-
620  // fragment, but we'll leave the calculation the same. (The alternative
621  // would be to use recentValueAverage().)
622 
623  mqPtr = artdaq::StatisticsCollection::getInstance().
624  getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
625  if (mqPtr.get() != 0)
626  {
627  metricMan_.sendMetric("Avg Input Wait Time",
628  (mqPtr->getRecentValueSum() / fragmentCount),
629  "seconds/fragment", 3, false);
630  }
631 
632  mqPtr = artdaq::StatisticsCollection::getInstance().
633  getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
634  if (mqPtr.get() != 0)
635  {
636  metricMan_.sendMetric("Avg BoardReader Sync Wait Time",
637  (mqPtr->getRecentValueSum() / fragmentCount),
638  "seconds/fragment", 3, false);
639  }
640 
641  mqPtr = artdaq::StatisticsCollection::getInstance().
642  getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
643  if (mqPtr.get() != 0)
644  {
645  metricMan_.sendMetric("Avg Output Wait Time",
646  (mqPtr->getRecentValueSum() / fragmentCount),
647  "seconds/fragment", 3, false);
648  }
649 
650  mqPtr = artdaq::StatisticsCollection::getInstance().
651  getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
652  if (mqPtr.get() != 0)
653  {
654  metricMan_.sendMetric("Avg Frags Per Read",
655  mqPtr->getRecentValueAverage(), "fragments/read", 4, false);
656  }
657 }
BoardReaderCore(Commandable &parent_application, int rank, std::string name)
BoardReaderCore Constructor.
void addMonitoredQuantityName(std::string const &statKey)
Add a MonitoredQuantity name to the list.
Commandable is the base class for all artdaq components which implement the artdaq state machine...
Definition: Commandable.hh:20
Sends Fragment objects using TransferInterface plugins. Uses Routing Tables if confgiured, otherwise will Round-Robin Fragments to the destinations.
static const std::string FRAGMENTS_PROCESSED_STAT_KEY
Key for the Fragments Processed MonitoredQuantity.
static const std::string INPUT_WAIT_STAT_KEY
Key for the Input Wait MonitoredQuantity.
bool stop(uint64_t timeout, uint64_t timestamp)
Stop the BoardReader, and the CommandableFragmentGenerator.
virtual ~BoardReaderCore()
BoardReaderCore Destructor.
std::unique_ptr< CommandableFragmentGenerator > makeCommandableFragmentGenerator(std::string const &generator_plugin_spec, fhicl::ParameterSet const &ps)
Load a CommandableFragmentGenerator plugin.
bool reinitialize(fhicl::ParameterSet const &pset, uint64_t, uint64_t)
Reinitialize the BoardReader. No-Op.
bool soft_initialize(fhicl::ParameterSet const &pset, uint64_t, uint64_t)
Soft-Initialize the BoardReader. No-Op.
static const std::string BRSYNC_WAIT_STAT_KEY
Key for the Sync Wait MonitoredQuantity.
static const std::string FRAGMENTS_PER_READ_STAT_KEY
Key for the Fragments Per Read MonitoredQuantity.
static const std::string OUTPUT_WAIT_STAT_KEY
Key for the Output Wait MonitoredQuantity.
bool initialize(fhicl::ParameterSet const &pset, uint64_t, uint64_t)
Initialize the BoardReaderCore.
std::string report(std::string const &which) const
Send a report on a given run-time quantity.
bool start(art::RunID id, uint64_t timeout, uint64_t timestamp)
Start the BoardReader, and the CommandableFragmentGenerator.
bool resume(uint64_t timeout, uint64_t timestamp)
Resume the BoardReader, and the CommandableFragmentGenerator.
bool pause(uint64_t timeout, uint64_t timestamp)
Pause the BoardReader, and the CommandableFragmentGenerator.
bool shutdown(uint64_t)
Shutdown the BoardReader, and the CommandableFragmentGenerator.
size_t process_fragments()
Main working loop of the BoardReaderCore.