artdaq  v3_11_00
BoardReaderCore.cc
1 
2 #include "artdaq/DAQdata/Globals.hh" // include these 2 first -
3 #define TRACE_NAME (app_name + "_BoardReaderCore").c_str()
4 
5 #include "artdaq-core/Data/Fragment.hh"
6 #include "artdaq-core/Utilities/ExceptionHandler.hh"
7 #include "artdaq/Application/BoardReaderCore.hh"
8 #include "artdaq/Application/TaskType.hh"
9 #include "artdaq/Generators/makeCommandableFragmentGenerator.hh"
10 
11 #include <pthread.h>
12 #include <sched.h>
13 #include <algorithm>
14 #include <memory>
15 #include <thread>
16 #include "canvas/Utilities/Exception.h"
17 #include "cetlib_except/exception.h"
18 
19 const std::string artdaq::BoardReaderCore::
20  FRAGMENTS_PROCESSED_STAT_KEY("BoardReaderCoreFragmentsProcessed");
21 const std::string artdaq::BoardReaderCore::
22  INPUT_WAIT_STAT_KEY("BoardReaderCoreInputWaitTime");
23 const std::string artdaq::BoardReaderCore::BUFFER_WAIT_STAT_KEY("BoardReaderCoreBufferWaitTime");
24 const std::string artdaq::BoardReaderCore::REQUEST_WAIT_STAT_KEY("BoardReaderCoreRequestWaitTime");
25 const std::string artdaq::BoardReaderCore::
26  OUTPUT_WAIT_STAT_KEY("BoardReaderCoreOutputWaitTime");
27 const std::string artdaq::BoardReaderCore::
28  FRAGMENTS_PER_READ_STAT_KEY("BoardReaderCoreFragmentsPerRead");
29 
30 std::unique_ptr<artdaq::DataSenderManager> artdaq::BoardReaderCore::sender_ptr_ = nullptr;
31 
33  : parent_application_(parent_application)
34  /*, local_group_comm_(local_group_comm)*/
35  , generator_ptr_(nullptr)
36  , run_id_(art::RunID::flushRun())
37  , fragment_count_(0)
38  , stop_requested_(false)
39  , pause_requested_(false)
40 {
41  TLOG(TLVL_DEBUG) << "Constructor";
48 }
49 
51 {
52  TLOG(TLVL_DEBUG) << "Destructor";
53  TLOG(TLVL_DEBUG) << "Stopping Request Receiver BEGIN";
54  request_receiver_ptr_.reset(nullptr);
55  TLOG(TLVL_DEBUG) << "Stopping Request Receiver END";
56 }
57 
58 bool artdaq::BoardReaderCore::initialize(fhicl::ParameterSet const& pset, uint64_t /*unused*/, uint64_t /*unused*/)
59 {
60  TLOG(TLVL_DEBUG) << "initialize method called with "
61  << "ParameterSet = \"" << pset.to_string() << "\".";
62 
63  // pull out the relevant parts of the ParameterSet
64  fhicl::ParameterSet daq_pset;
65  try
66  {
67  daq_pset = pset.get<fhicl::ParameterSet>("daq");
68  }
69  catch (...)
70  {
71  TLOG(TLVL_ERROR)
72  << "Unable to find the DAQ parameters in the initialization "
73  << "ParameterSet: \"" + pset.to_string() + "\".";
74  return false;
75  }
76  fhicl::ParameterSet fr_pset;
77  try
78  {
79  fr_pset = daq_pset.get<fhicl::ParameterSet>("fragment_receiver");
80  data_pset_ = fr_pset;
81  }
82  catch (...)
83  {
84  TLOG(TLVL_ERROR)
85  << "Unable to find the fragment_receiver parameters in the DAQ "
86  << "initialization ParameterSet: \"" + daq_pset.to_string() + "\".";
87  return false;
88  }
89 
90  // pull out the Metric part of the ParameterSet
91  fhicl::ParameterSet metric_pset;
92  try
93  {
94  metric_pset = daq_pset.get<fhicl::ParameterSet>("metrics");
95  }
96  catch (...)
97  {} // OK if there's no metrics table defined in the FHiCL
98 
99  if (metric_pset.is_empty())
100  {
101  TLOG(TLVL_INFO) << "No metric plugins appear to be defined";
102  }
103  try
104  {
105  metricMan->initialize(metric_pset, app_name);
106  }
107  catch (...)
108  {
109  ExceptionHandler(ExceptionHandlerRethrow::no,
110  "Error loading metrics in BoardReaderCore::initialize()");
111  }
112 
113  if (daq_pset.has_key("rank"))
114  {
115  if (my_rank >= 0 && daq_pset.get<int>("rank") != my_rank)
116  {
117  TLOG(TLVL_WARNING) << "BoardReader rank specified at startup is different than rank specified at configure! Using rank received at configure!";
118  }
119  my_rank = daq_pset.get<int>("rank");
120  }
121  if (my_rank == -1)
122  {
123  TLOG(TLVL_ERROR) << "BoardReader rank not specified at startup or in configuration! Aborting";
124  throw cet::exception("RankNotSpecifiedError") << "BoardReader rank not specified at startup or in configuration! Aborting";
125  }
126 
127  // create the requested CommandableFragmentGenerator
128  auto frag_gen_name = fr_pset.get<std::string>("generator", "");
129  if (frag_gen_name.length() == 0)
130  {
131  TLOG(TLVL_ERROR)
132  << "No fragment generator (parameter name = \"generator\") was "
133  << "specified in the fragment_receiver ParameterSet. The "
134  << "DAQ initialization PSet was \"" << daq_pset.to_string() << "\".";
135  return false;
136  }
137 
138  try
139  {
140  generator_ptr_ = artdaq::makeCommandableFragmentGenerator(frag_gen_name, fr_pset);
141  }
142  catch (...)
143  {
144  std::stringstream exception_string;
145  exception_string << "Exception thrown during initialization of fragment generator of type \""
146  << frag_gen_name << "\"";
147 
148  ExceptionHandler(ExceptionHandlerRethrow::no, exception_string.str());
149 
150  TLOG(TLVL_DEBUG) << "FHiCL parameter set used to initialize the fragment generator which threw an exception: " << fr_pset.to_string();
151 
152  return false;
153  }
154 
155  try
156  {
157  fragment_buffer_ptr_.reset(new FragmentBuffer(fr_pset));
158  }
159  catch (...)
160  {
161  std::stringstream exception_string;
162  exception_string << "Exception thrown during initialization of Fragment Buffer";
163 
164  ExceptionHandler(ExceptionHandlerRethrow::no, exception_string.str());
165 
166  TLOG(TLVL_DEBUG) << "FHiCL parameter set used to initialize the fragment buffer which threw an exception: " << fr_pset.to_string();
167 
168  return false;
169  }
170 
171  std::shared_ptr<RequestBuffer> request_buffer = std::make_shared<RequestBuffer>(fr_pset.get<artdaq::Fragment::sequence_id_t>("request_increment", 1));
172 
173  try
174  {
175  request_receiver_ptr_.reset(new RequestReceiver(fr_pset, request_buffer));
176  generator_ptr_->SetRequestBuffer(request_buffer);
177  fragment_buffer_ptr_->SetRequestBuffer(request_buffer);
178  }
179  catch (...)
180  {
181  ExceptionHandler(ExceptionHandlerRethrow::no, "Exception thrown during initialization of request receiver");
182 
183  TLOG(TLVL_DEBUG) << "FHiCL parameter set used to initialize the request receiver which threw an exception: " << fr_pset.to_string();
184 
185  return false;
186  }
187  metricMan->setPrefix(generator_ptr_->metricsReportingInstanceName());
188 
189  rt_priority_ = fr_pset.get<int>("rt_priority", 0);
190 
191  // fetch the monitoring parameters and create the MonitoredQuantity instances
192  statsHelper_.createCollectors(fr_pset, 100, 30.0, 60.0, FRAGMENTS_PROCESSED_STAT_KEY);
193 
194  // check if we should skip the sequence ID test...
195  skip_seqId_test_ = (fr_pset.get<bool>("skip_seqID_test", false) || generator_ptr_->fragmentIDs().size() > 1 || fragment_buffer_ptr_->request_mode() != RequestMode::Ignored);
196 
197  verbose_ = fr_pset.get<bool>("verbose", true);
198 
199  return true;
200 }
201 
202 bool artdaq::BoardReaderCore::start(art::RunID id, uint64_t timeout, uint64_t timestamp)
203 {
204  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG)) << "Starting run " << id.run();
205  stop_requested_.store(false);
206  pause_requested_.store(false);
207 
208  fragment_count_ = 0;
209  prev_seq_id_ = 0;
210  statsHelper_.resetStatistics();
211 
212  fragment_buffer_ptr_->Reset(false);
213 
214  metricMan->do_start();
215  generator_ptr_->StartCmd(id.run(), timeout, timestamp);
216  run_id_ = id;
217 
218  request_receiver_ptr_->SetRunNumber(static_cast<uint32_t>(id.run()));
219  request_receiver_ptr_->startRequestReception();
220 
221  running_ = true;
222  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG)) << "Completed the Start transition (Started run) for run " << run_id_.run()
223  << ", timeout = " << timeout << ", timestamp = " << timestamp;
224  return true;
225 }
226 
227 bool artdaq::BoardReaderCore::stop(uint64_t timeout, uint64_t timestamp)
228 {
229  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG)) << "Stopping run " << run_id_.run() << " after " << fragment_count_ << " fragments.";
230  stop_requested_.store(true);
231 
232  TLOG(TLVL_DEBUG) << "Stopping Request reception BEGIN";
233  request_receiver_ptr_->stopRequestReception();
234  TLOG(TLVL_DEBUG) << "Stopping Request reception END";
235 
236  TLOG(TLVL_DEBUG) << "Stopping CommandableFragmentGenerator BEGIN";
237  generator_ptr_->StopCmd(timeout, timestamp);
238  TLOG(TLVL_DEBUG) << "Stopping CommandableFragmentGenerator END";
239 
240  TLOG(TLVL_DEBUG) << "Stopping FragmentBuffer";
241  fragment_buffer_ptr_->Stop();
242 
243  TLOG(TLVL_DEBUG) << "Stopping DataSenderManager";
244  if (sender_ptr_)
245  {
246  sender_ptr_->StopSender();
247  }
248 
249  running_ = false;
250  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG)) << "Completed the Stop transition for run " << run_id_.run();
251  return true;
252 }
253 
254 bool artdaq::BoardReaderCore::pause(uint64_t timeout, uint64_t timestamp)
255 {
256  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG)) << "Pausing run " << run_id_.run() << " after " << fragment_count_ << " fragments.";
257  pause_requested_.store(true);
258  generator_ptr_->PauseCmd(timeout, timestamp);
259  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG)) << "Completed the Pause transition for run " << run_id_.run();
260  return true;
261 }
262 
263 bool artdaq::BoardReaderCore::resume(uint64_t timeout, uint64_t timestamp)
264 {
265  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG)) << "Resuming run " << run_id_.run();
266  pause_requested_.store(false);
267  metricMan->do_start();
268  generator_ptr_->ResumeCmd(timeout, timestamp);
269  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG)) << "Completed the Resume transition for run " << run_id_.run();
270  return true;
271 }
272 
273 bool artdaq::BoardReaderCore::shutdown(uint64_t /*unused*/)
274 {
275  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG)) << "Starting Shutdown transition";
276  generator_ptr_->joinThreads(); // Cleanly shut down the CommandableFragmentGenerator
277  generator_ptr_.reset(nullptr);
278  metricMan->shutdown();
279  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG)) << "Completed Shutdown transition";
280  return true;
281 }
282 
283 bool artdaq::BoardReaderCore::soft_initialize(fhicl::ParameterSet const& pset, uint64_t timeout, uint64_t timestamp)
284 {
285  TLOG(TLVL_DEBUG) << "soft_initialize method called with "
286  << "ParameterSet = \"" << pset.to_string()
287  << "\". Forwarding to initialize.";
288  return initialize(pset, timeout, timestamp);
289 }
290 
291 bool artdaq::BoardReaderCore::reinitialize(fhicl::ParameterSet const& pset, uint64_t timeout, uint64_t timestamp)
292 {
293  TLOG(TLVL_DEBUG) << "reinitialize method called with "
294  << "ParameterSet = \"" << pset.to_string()
295  << "\". Forwarding to initalize.";
296  return initialize(pset, timeout, timestamp);
297 }
298 
300 {
301  if (rt_priority_ > 0)
302  {
303 #pragma GCC diagnostic push
304 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
305  sched_param s_param = {};
306  s_param.sched_priority = rt_priority_;
307  if (pthread_setschedparam(pthread_self(), SCHED_RR, &s_param))
308  TLOG(TLVL_WARNING) << "setting realtime priority failed";
309 #pragma GCC diagnostic pop
310  }
311 
312  // try-catch block here?
313 
314  // how to turn RT PRI off?
315  if (rt_priority_ > 0)
316  {
317 #pragma GCC diagnostic push
318 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
319  sched_param s_param = {};
320  s_param.sched_priority = rt_priority_;
321  int status = pthread_setschedparam(pthread_self(), SCHED_RR, &s_param);
322  if (status != 0)
323  {
324  TLOG(TLVL_ERROR)
325  << "Failed to set realtime priority to " << rt_priority_
326  << ", return code = " << status;
327  }
328 #pragma GCC diagnostic pop
329  }
330 
331  TLOG(TLVL_DEBUG) << "Waiting for first fragment.";
332  artdaq::MonitoredQuantityStats::TIME_POINT_T startTime, after_input, after_buffer;
333  artdaq::FragmentPtrs frags;
334 
335  receiver_thread_active_ = true;
336 
337  auto wait_start = std::chrono::steady_clock::now();
338  while (!running_ && TimeUtils::GetElapsedTime(wait_start) < start_transition_timeout_)
339  {
340  usleep(10000);
341  }
342  if (!running_)
343  {
344  TLOG(TLVL_ERROR) << "Timeout (" << start_transition_timeout_ << " s) while waiting for Start after receive_fragments thread started!";
345  receiver_thread_active_ = false;
346  }
347 
348  while (receiver_thread_active_)
349  {
350  startTime = artdaq::MonitoredQuantity::getCurrentTime();
351 
352  TLOG(18) << "receive_fragments getNext start";
353  receiver_thread_active_ = generator_ptr_->getNext(frags);
354  TLOG(18) << "receive_fragments getNext done (receiver_thread_active_=" << receiver_thread_active_ << ")";
355 
356  // 08-May-2015, KAB & JCF: if the generator getNext() method returns false
357  // (which indicates that the data flow has stopped) *and* the reason that
358  // it has stopped is because there was an exception that wasn't handled by
359  // the experiment-specific FragmentGenerator class, we move to the
360  // InRunError state so that external observers (e.g. RunControl or
361  // DAQInterface) can see that there was a problem.
362  if (!receiver_thread_active_ && generator_ptr_ && generator_ptr_->exception())
363  {
364  parent_application_.in_run_failure();
365  }
366 
367  after_input = artdaq::MonitoredQuantity::getCurrentTime();
368 
369  if (!receiver_thread_active_) { break; }
370  statsHelper_.addSample(FRAGMENTS_PER_READ_STAT_KEY, frags.size());
371 
372  if (frags.size() > 0)
373  {
374  TLOG(18) << "receive_fragments AddFragmentsToBuffer start";
375  fragment_buffer_ptr_->AddFragmentsToBuffer(std::move(frags));
376  TLOG(18) << "receive_fragments AddFragmentsToBuffer done";
377  }
378 
379  after_buffer = artdaq::MonitoredQuantity::getCurrentTime();
380  TLOG(16) << "receive_fragments INPUT_WAIT=" << (after_input - startTime) << ", BUFFER_WAIT=" << (after_buffer - after_input);
381  statsHelper_.addSample(INPUT_WAIT_STAT_KEY, after_input - startTime);
382  statsHelper_.addSample(BUFFER_WAIT_STAT_KEY, after_buffer - after_input);
383  if (statsHelper_.statsRollingWindowHasMoved()) { sendMetrics_(); }
384  frags.clear();
385  }
386 
387  // 11-May-2015, KAB: call MetricManager::do_stop whenever we exit the
388  // processing fragments loop so that metrics correctly go to zero when
389  // there is no data flowing
390  metricMan->do_stop();
391 
392  TLOG(TLVL_DEBUG) << "receive_fragments loop end";
393 }
395 {
396  if (rt_priority_ > 0)
397  {
398 #pragma GCC diagnostic push
399 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
400  sched_param s_param = {};
401  s_param.sched_priority = rt_priority_;
402  if (pthread_setschedparam(pthread_self(), SCHED_RR, &s_param) != 0)
403  {
404  TLOG(TLVL_WARNING) << "setting realtime priority failed";
405  }
406 #pragma GCC diagnostic pop
407  }
408 
409  // try-catch block here?
410 
411  // how to turn RT PRI off?
412  if (rt_priority_ > 0)
413  {
414 #pragma GCC diagnostic push
415 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
416  sched_param s_param = {};
417  s_param.sched_priority = rt_priority_;
418  int status = pthread_setschedparam(pthread_self(), SCHED_RR, &s_param);
419  if (status != 0)
420  {
421  TLOG(TLVL_ERROR)
422  << "Failed to set realtime priority to " << rt_priority_
423  << ", return code = " << status;
424  }
425 #pragma GCC diagnostic pop
426  }
427 
428  TLOG(TLVL_DEBUG) << "Initializing DataSenderManager. my_rank=" << my_rank;
429  sender_ptr_ = std::make_unique<artdaq::DataSenderManager>(data_pset_);
430 
431  TLOG(TLVL_DEBUG) << "Waiting for first fragment.";
432  artdaq::MonitoredQuantityStats::TIME_POINT_T startTime;
433  double delta_time;
434  artdaq::FragmentPtrs frags;
435  auto targetFragCount = generator_ptr_->fragmentIDs().size();
436 
437  sender_thread_active_ = true;
438 
439  auto wait_start = std::chrono::steady_clock::now();
440  while (!running_ && TimeUtils::GetElapsedTime(wait_start) < start_transition_timeout_)
441  {
442  usleep(10000);
443  }
444  if (!running_)
445  {
446  TLOG(TLVL_ERROR) << "Timeout (" << start_transition_timeout_ << " s) while waiting for Start after send_fragments thread started!";
447  sender_thread_active_ = false;
448  }
449 
450  while (sender_thread_active_)
451  {
452  startTime = artdaq::MonitoredQuantity::getCurrentTime();
453 
454  TLOG(18) << "send_fragments applyRequests start";
455  sender_thread_active_ = fragment_buffer_ptr_->applyRequests(frags);
456  TLOG(18) << "send_fragments applyRequests done (sender_thread_active_=" << sender_thread_active_ << ")";
457  // 08-May-2015, KAB & JCF: if the generator getNext() method returns false
458  // (which indicates that the data flow has stopped) *and* the reason that
459  // it has stopped is because there was an exception that wasn't handled by
460  // the experiment-specific FragmentGenerator class, we move to the
461  // InRunError state so that external observers (e.g. RunControl or
462  // DAQInterface) can see that there was a problem.
463  if (!sender_thread_active_ && generator_ptr_ && generator_ptr_->exception())
464  {
465  parent_application_.in_run_failure();
466  }
467 
468  delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
469 
470  TLOG(16) << "send_fragments REQUEST_WAIT=" << delta_time;
471  statsHelper_.addSample(REQUEST_WAIT_STAT_KEY, delta_time);
472 
473  if (!sender_thread_active_) { break; }
474 
475  for (auto& fragPtr : frags)
476  {
477  if (fragPtr == nullptr)
478  {
479  TLOG(TLVL_WARNING) << "Encountered a bad fragment pointer in fragment " << fragment_count_ << ". "
480  << "This is most likely caused by a problem with the Fragment Generator!";
481  continue;
482  }
483  if (fragment_count_ == 0)
484  {
485  TLOG(TLVL_DEBUG) << "Received first Fragment from Fragment Generator, sequence ID " << fragPtr->sequenceID() << ", size = " << fragPtr->sizeBytes() << " bytes.";
486  }
487 
488  if (fragPtr->type() == Fragment::EndOfRunFragmentType || fragPtr->type() == Fragment::EndOfSubrunFragmentType || fragPtr->type() == Fragment::InitFragmentType)
489  {
490  // Just broadcast any system Fragments in the output
491  artdaq::Fragment::sequence_id_t sequence_id = fragPtr->sequenceID();
492  statsHelper_.addSample(FRAGMENTS_PROCESSED_STAT_KEY, fragPtr->sizeBytes());
493 
494  startTime = artdaq::MonitoredQuantity::getCurrentTime();
495  TLOG(17) << "send_fragments seq=" << sequence_id << " sendFragment start";
496  auto res = sender_ptr_->sendFragment(std::move(*fragPtr));
497  TLOG(17) << "send_fragments seq=" << sequence_id << " sendFragment done (dest=" << res.first << ", sts=" << TransferInterface::CopyStatusToString(res.second) << ")";
498  ++fragment_count_;
499  statsHelper_.addSample(OUTPUT_WAIT_STAT_KEY,
500  artdaq::MonitoredQuantity::getCurrentTime() - startTime);
501  continue;
502  }
503 
504  artdaq::Fragment::sequence_id_t sequence_id = fragPtr->sequenceID();
505  SetMFIteration("Sequence ID " + std::to_string(sequence_id));
506  statsHelper_.addSample(FRAGMENTS_PROCESSED_STAT_KEY, fragPtr->sizeBytes());
507 
508  /*if ((fragment_count_ % 250) == 0)
509  {
510  TLOG(TLVL_DEBUG)
511  << "Sending fragment " << fragment_count_
512  << " with sequence id " << sequence_id << ".";
513  }*/
514 
515  // check for continous sequence IDs
516  if (!skip_seqId_test_ && abs(static_cast<int64_t>(sequence_id) - static_cast<int64_t>(prev_seq_id_)) > 1)
517  {
518  TLOG(TLVL_WARNING)
519  << "Missing sequence IDs: current sequence ID = "
520  << sequence_id << ", previous sequence ID = "
521  << prev_seq_id_ << ".";
522  }
523  prev_seq_id_ = sequence_id;
524 
525  startTime = artdaq::MonitoredQuantity::getCurrentTime();
526  TLOG(17) << "send_fragments seq=" << sequence_id << " sendFragment start";
527  auto res = sender_ptr_->sendFragment(std::move(*fragPtr));
528  if (sender_ptr_->GetSentSequenceIDCount(sequence_id) == targetFragCount)
529  {
530  sender_ptr_->RemoveRoutingTableEntry(sequence_id);
531  }
532  TLOG(17) << "send_fragments seq=" << sequence_id << " sendFragment done (dest=" << res.first << ", sts=" << TransferInterface::CopyStatusToString(res.second) << ")";
533  ++fragment_count_;
534  statsHelper_.addSample(OUTPUT_WAIT_STAT_KEY,
535  artdaq::MonitoredQuantity::getCurrentTime() - startTime);
536 
537  bool readyToReport = statsHelper_.readyToReport();
538  if (readyToReport)
539  {
540  TLOG(TLVL_INFO) << buildStatisticsString_();
541  }
542 
543  // Turn on lvls (mem and/or slow) 3,13,14 to log every send.
544  TLOG(((fragment_count_ == 1) ? TLVL_DEBUG
545  : (((fragment_count_ % 250) == 0 || readyToReport) ? 13 : 14)))
546  << ((fragment_count_ == 1)
547  ? "Sent first Fragment"
548  : "Sending fragment " + std::to_string(fragment_count_))
549  << " with SeqID " << sequence_id << ".";
550  }
551  if (statsHelper_.statsRollingWindowHasMoved()) { sendMetrics_(); }
552  frags.clear();
553  std::this_thread::yield();
554  }
555 
556  sender_ptr_.reset(nullptr);
557 
558  // 11-May-2015, KAB: call MetricManager::do_stop whenever we exit the
559  // processing fragments loop so that metrics correctly go to zero when
560  // there is no data flowing
561  metricMan->do_stop();
562 
563  TLOG(TLVL_DEBUG) << "send_fragments loop end";
564 }
565 
566 std::string artdaq::BoardReaderCore::report(std::string const& which) const
567 {
568  std::string resultString;
569 
570  // pass the request to the FragmentGenerator instance, if it's available
571  if (generator_ptr_ != nullptr && which != "core")
572  {
573  resultString = generator_ptr_->ReportCmd(which);
574  if (resultString.length() > 0) { return resultString; }
575  }
576 
577  // handle the request at this level, if we can
578  // --> nothing here yet
579 
580  // if we haven't been able to come up with any report so far, say so
581  std::string tmpString = app_name + " run number = ";
582  tmpString.append(boost::lexical_cast<std::string>(run_id_.run()));
583 
584  tmpString.append(", Sent Fragment count = ");
585  tmpString.append(boost::lexical_cast<std::string>(fragment_count_));
586 
587  if (!which.empty() && which != "core")
588  {
589  tmpString.append(". Command=\"" + which + "\" is not currently supported.");
590  }
591  return tmpString;
592 }
593 
594 bool artdaq::BoardReaderCore::metaCommand(std::string const& command, std::string const& arg)
595 {
596  TLOG(TLVL_DEBUG) << "metaCommand method called with "
597  << "command = \"" << command << "\""
598  << ", arg = \"" << arg << "\""
599  << ".";
600 
601  if (generator_ptr_)
602  {
603  return generator_ptr_->metaCommand(command, arg);
604  }
605 
606  return true;
607 }
608 
609 std::string artdaq::BoardReaderCore::buildStatisticsString_()
610 {
611  std::ostringstream oss;
612  double fragmentsGeneratedCount = 1.0;
613  double fragmentsOutputCount = 1.0;
614  oss << app_name << " statistics:" << std::endl;
615 
616  oss << " Fragments read: ";
617  artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
618  if (mqPtr.get() != nullptr)
619  {
620  artdaq::MonitoredQuantityStats stats;
621  mqPtr->getStats(stats);
622  oss << stats.recentSampleCount << " fragments generated at "
623  << stats.recentSampleRate << " reads/sec, fragment rate = "
624  << stats.recentValueRate << " fragments/sec, monitor window = "
625  << stats.recentDuration << " sec, min::max read size = "
626  << stats.recentValueMin
627  << "::"
628  << stats.recentValueMax
629  << " fragments";
630  fragmentsGeneratedCount = std::max(double(stats.recentSampleCount), 1.0);
631  oss << " Average times per fragment: ";
632  if (stats.recentSampleRate > 0.0)
633  {
634  oss << " elapsed time = "
635  << (1.0 / stats.recentSampleRate) << " sec";
636  }
637  }
638 
639  oss << std::endl;
640  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
641  if (mqPtr.get() != nullptr)
642  {
643  artdaq::MonitoredQuantityStats stats;
644  mqPtr->getStats(stats);
645  oss << " Fragment output statistics: "
646  << stats.recentSampleCount << " fragments sent at "
647  << stats.recentSampleRate << " fragments/sec, effective data rate = "
648  << (stats.recentValueRate * sizeof(artdaq::RawDataType) / 1024.0 / 1024.0) << " MB/sec, monitor window = "
649  << stats.recentDuration << " sec, min::max event size = "
650  << (stats.recentValueMin * sizeof(artdaq::RawDataType) / 1024.0 / 1024.0)
651  << "::"
652  << (stats.recentValueMax * sizeof(artdaq::RawDataType) / 1024.0 / 1024.0)
653  << " MB" << std::endl;
654  fragmentsOutputCount = std::max(double(stats.recentSampleCount), 1.0);
655  }
656 
657  // 31-Dec-2014, KAB - Just a reminder that using "fragmentCount" in the
658  // denominator of the calculations below is important because the way that
659  // the accumulation of these statistics is done is not fragment-by-fragment
660  // but read-by-read (where each read can contain multiple fragments).
661  // 29-Aug-2016, KAB - BRSYNC_WAIT and OUTPUT_WAIT are now done fragment-by-
662  // fragment, but we'll leave the calculation the same. (The alternative
663  // would be to use recentValueAverage().)
664 
665  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
666  if (mqPtr.get() != nullptr)
667  {
668  oss << " Input wait time = "
669  << (mqPtr->getRecentValueSum() / fragmentsGeneratedCount) << " s/fragment";
670  }
671  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(BUFFER_WAIT_STAT_KEY);
672  if (mqPtr.get() != 0)
673  {
674  oss << ", buffer wait time = "
675  << (mqPtr->getRecentValueSum() / fragmentsGeneratedCount) << " s/fragment";
676  }
677  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(REQUEST_WAIT_STAT_KEY);
678  if (mqPtr.get() != 0)
679  {
680  oss << ", request wait time = "
681  << (mqPtr->getRecentValueSum() / fragmentsOutputCount) << " s/fragment";
682  }
683 
684  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
685  if (mqPtr.get() != nullptr)
686  {
687  oss << ", output wait time = "
688  << (mqPtr->getRecentValueSum() / fragmentsOutputCount) << " s/fragment";
689  }
690 
691  return oss.str();
692 }
693 
694 void artdaq::BoardReaderCore::sendMetrics_()
695 {
696  //TLOG(TLVL_DEBUG) << "Sending metrics " << __LINE__ ;
697  double fragmentCount = 1.0;
698  artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
699  if (mqPtr.get() != nullptr)
700  {
701  artdaq::MonitoredQuantityStats stats;
702  mqPtr->getStats(stats);
703  fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
704  metricMan->sendMetric("Fragment Count", stats.fullSampleCount, "fragments", 1, MetricMode::LastPoint);
705  metricMan->sendMetric("Fragment Rate", stats.recentSampleRate, "fragments/sec", 1, MetricMode::Average);
706  metricMan->sendMetric("Average Fragment Size", (stats.recentValueAverage * sizeof(artdaq::RawDataType)), "bytes/fragment", 2, MetricMode::Average);
707  metricMan->sendMetric("Data Rate", (stats.recentValueRate * sizeof(artdaq::RawDataType)), "bytes/sec", 2, MetricMode::Average);
708  }
709 
710  // 31-Dec-2014, KAB - Just a reminder that using "fragmentCount" in the
711  // denominator of the calculations below is important because the way that
712  // the accumulation of these statistics is done is not fragment-by-fragment
713  // but read-by-read (where each read can contain multiple fragments).
714  // 29-Aug-2016, KAB - BRSYNC_WAIT and OUTPUT_WAIT are now done fragment-by-
715  // fragment, but we'll leave the calculation the same. (The alternative
716  // would be to use recentValueAverage().)
717 
718  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
719  if (mqPtr.get() != nullptr)
720  {
721  metricMan->sendMetric("Avg Input Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
722  }
723 
724  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(BUFFER_WAIT_STAT_KEY);
725  if (mqPtr.get() != 0)
726  {
727  metricMan->sendMetric("Avg Buffer Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
728  }
729  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(REQUEST_WAIT_STAT_KEY);
730  if (mqPtr.get() != 0)
731  {
732  metricMan->sendMetric("Avg Request Response Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
733  }
734  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
735  if (mqPtr.get() != nullptr)
736  {
737  metricMan->sendMetric("Avg Output Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
738  }
739 
740  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
741  if (mqPtr.get() != nullptr)
742  {
743  metricMan->sendMetric("Avg Frags Per Read", mqPtr->getRecentValueAverage(), "fragments/read", 4, MetricMode::Average);
744  }
745 }
static const std::string BUFFER_WAIT_STAT_KEY
Key for the Fragment Buffer Wait MonitoredQuantity.
void addMonitoredQuantityName(std::string const &statKey)
Add a MonitoredQuantity name to the list.
Commandable is the base class for all artdaq components which implement the artdaq state machine...
Definition: Commandable.hh:20
bool initialize(fhicl::ParameterSet const &pset, uint64_t timeout, uint64_t timestamp)
Initialize the BoardReaderCore.
static const std::string FRAGMENTS_PROCESSED_STAT_KEY
Key for the Fragments Processed MonitoredQuantity.
bool reinitialize(fhicl::ParameterSet const &pset, uint64_t timeout, uint64_t timestamp)
Reinitialize the BoardReader. No-Op.
static const std::string INPUT_WAIT_STAT_KEY
Key for the Input Wait MonitoredQuantity.
bool stop(uint64_t timeout, uint64_t timestamp)
Stop the BoardReader, and the CommandableFragmentGenerator.
virtual ~BoardReaderCore()
BoardReaderCore Destructor.
static std::string CopyStatusToString(CopyStatus in)
Convert a CopyStatus variable to its string represenatation
BoardReaderCore(Commandable &parent_application)
BoardReaderCore Constructor.
std::unique_ptr< CommandableFragmentGenerator > makeCommandableFragmentGenerator(std::string const &generator_plugin_spec, fhicl::ParameterSet const &ps)
Load a CommandableFragmentGenerator plugin.
FragmentBuffer is a FragmentGenerator-derived abstract class that defines the interface for a Fragmen...
Receive data requests and make them available to CommandableFragmentGenerator or other interested par...
static const std::string FRAGMENTS_PER_READ_STAT_KEY
Key for the Fragments Per Read MonitoredQuantity.
static const std::string REQUEST_WAIT_STAT_KEY
Key for the Request Buffer Wait MonitoredQuantity.
static const std::string OUTPUT_WAIT_STAT_KEY
Key for the Output Wait MonitoredQuantity.
void send_fragments()
Main working loop of the BoardReaderCore, pt. 2.
bool soft_initialize(fhicl::ParameterSet const &pset, uint64_t timeout, uint64_t timestamp)
Soft-Initialize the BoardReader. No-Op.
std::string report(std::string const &which) const
Send a report on a given run-time quantity.
bool shutdown(uint64_t timeout)
Shutdown the BoardReader, and the CommandableFragmentGenerator.
bool start(art::RunID id, uint64_t timeout, uint64_t timestamp)
Start the BoardReader, and the CommandableFragmentGenerator.
bool resume(uint64_t timeout, uint64_t timestamp)
Resume the BoardReader, and the CommandableFragmentGenerator.
void receive_fragments()
Main working loop of the BoardReaderCore.
bool pause(uint64_t timeout, uint64_t timestamp)
Pause the BoardReader, and the CommandableFragmentGenerator.
bool metaCommand(std::string const &command, std::string const &arg)
Run a user-defined command on the CommandableFragmentGenerator.