artdaq  3.13.00
BoardReaderCore.cc
1 #include "artdaq/DAQdata/Globals.hh" // include these 2 first -
2 #define TRACE_NAME (app_name + "_BoardReaderCore").c_str()
3 
4 #include "artdaq-core/Core/MonitoredQuantity.hh"
5 #include "artdaq-core/Data/Fragment.hh"
6 #include "artdaq-core/Utilities/ExceptionHandler.hh"
7 #include "artdaq/Application/BoardReaderCore.hh"
8 #include "artdaq/Application/TaskType.hh"
9 #include "artdaq/Generators/makeCommandableFragmentGenerator.hh"
10 
11 #include "cetlib_except/exception.h"
12 #include "fhiclcpp/ParameterSet.h"
13 
14 #include <boost/lexical_cast.hpp>
15 
16 #include <pthread.h>
17 #include <sched.h>
18 #include <algorithm>
19 #include <memory>
20 #include <thread>
21 
22 const std::string artdaq::BoardReaderCore::
23  FRAGMENTS_PROCESSED_STAT_KEY("BoardReaderCoreFragmentsProcessed");
24 const std::string artdaq::BoardReaderCore::
25  INPUT_WAIT_STAT_KEY("BoardReaderCoreInputWaitTime");
26 const std::string artdaq::BoardReaderCore::BUFFER_WAIT_STAT_KEY("BoardReaderCoreBufferWaitTime");
27 const std::string artdaq::BoardReaderCore::REQUEST_WAIT_STAT_KEY("BoardReaderCoreRequestWaitTime");
28 const std::string artdaq::BoardReaderCore::
29  OUTPUT_WAIT_STAT_KEY("BoardReaderCoreOutputWaitTime");
30 const std::string artdaq::BoardReaderCore::
31  FRAGMENTS_PER_READ_STAT_KEY("BoardReaderCoreFragmentsPerRead");
32 
33 std::unique_ptr<artdaq::DataSenderManager> artdaq::BoardReaderCore::sender_ptr_ = nullptr;
34 
36  : parent_application_(parent_application)
37  /*, local_group_comm_(local_group_comm)*/
38  , generator_ptr_(nullptr)
39  , run_id_(art::RunID::flushRun())
40  , fragment_count_(0)
41  , stop_requested_(false)
42  , pause_requested_(false)
43 {
44  TLOG(TLVL_DEBUG + 32) << "Constructor";
51 }
52 
54 {
55  TLOG(TLVL_DEBUG + 32) << "Destructor";
56  TLOG(TLVL_DEBUG + 32) << "Stopping Request Receiver BEGIN";
57  request_receiver_ptr_.reset(nullptr);
58  TLOG(TLVL_DEBUG + 32) << "Stopping Request Receiver END";
59 }
60 
61 bool artdaq::BoardReaderCore::initialize(fhicl::ParameterSet const& pset, uint64_t /*unused*/, uint64_t /*unused*/)
62 {
63  TLOG(TLVL_DEBUG + 32) << "initialize method called with "
64  << "ParameterSet = \"" << pset.to_string() << "\".";
65 
66  // pull out the relevant parts of the ParameterSet
67  fhicl::ParameterSet daq_pset;
68  try
69  {
70  daq_pset = pset.get<fhicl::ParameterSet>("daq");
71  }
72  catch (...)
73  {
74  TLOG(TLVL_ERROR)
75  << "Unable to find the DAQ parameters in the initialization "
76  << "ParameterSet: \"" + pset.to_string() + "\".";
77  return false;
78  }
79  fhicl::ParameterSet fr_pset;
80  try
81  {
82  fr_pset = daq_pset.get<fhicl::ParameterSet>("fragment_receiver");
83  data_pset_ = fr_pset;
84  }
85  catch (...)
86  {
87  TLOG(TLVL_ERROR)
88  << "Unable to find the fragment_receiver parameters in the DAQ "
89  << "initialization ParameterSet: \"" + daq_pset.to_string() + "\".";
90  return false;
91  }
92 
93  // pull out the Metric part of the ParameterSet
94  fhicl::ParameterSet metric_pset;
95  try
96  {
97  metric_pset = daq_pset.get<fhicl::ParameterSet>("metrics");
98  }
99  catch (...)
100  {} // OK if there's no metrics table defined in the FHiCL
101 
102  if (metric_pset.is_empty())
103  {
104  TLOG(TLVL_INFO) << "No metric plugins appear to be defined";
105  }
106  try
107  {
108  metricMan->initialize(metric_pset, app_name);
109  }
110  catch (...)
111  {
112  ExceptionHandler(ExceptionHandlerRethrow::no,
113  "Error loading metrics in BoardReaderCore::initialize()");
114  }
115 
116  if (daq_pset.has_key("rank"))
117  {
118  if (my_rank >= 0 && daq_pset.get<int>("rank") != my_rank)
119  {
120  TLOG(TLVL_WARNING) << "BoardReader rank specified at startup is different than rank specified at configure! Using rank received at configure!";
121  }
122  my_rank = daq_pset.get<int>("rank");
123  }
124  if (my_rank == -1)
125  {
126  TLOG(TLVL_ERROR) << "BoardReader rank not specified at startup or in configuration! Aborting";
127  throw cet::exception("RankNotSpecifiedError") << "BoardReader rank not specified at startup or in configuration! Aborting";
128  }
129 
130  // create the requested CommandableFragmentGenerator
131  auto frag_gen_name = fr_pset.get<std::string>("generator", "");
132  if (frag_gen_name.length() == 0)
133  {
134  TLOG(TLVL_ERROR)
135  << "No fragment generator (parameter name = \"generator\") was "
136  << "specified in the fragment_receiver ParameterSet. The "
137  << "DAQ initialization PSet was \"" << daq_pset.to_string() << "\".";
138  return false;
139  }
140 
141  try
142  {
143  generator_ptr_ = artdaq::makeCommandableFragmentGenerator(frag_gen_name, fr_pset);
144  }
145  catch (...)
146  {
147  std::stringstream exception_string;
148  exception_string << "Exception thrown during initialization of fragment generator of type \""
149  << frag_gen_name << "\"";
150 
151  ExceptionHandler(ExceptionHandlerRethrow::no, exception_string.str());
152 
153  TLOG(TLVL_DEBUG + 32) << "FHiCL parameter set used to initialize the fragment generator which threw an exception: " << fr_pset.to_string();
154 
155  return false;
156  }
157 
158  try
159  {
160  fragment_buffer_ptr_ = std::make_shared<FragmentBuffer>(fr_pset);
161  }
162  catch (...)
163  {
164  std::stringstream exception_string;
165  exception_string << "Exception thrown during initialization of Fragment Buffer";
166 
167  ExceptionHandler(ExceptionHandlerRethrow::no, exception_string.str());
168 
169  TLOG(TLVL_DEBUG + 32) << "FHiCL parameter set used to initialize the fragment buffer which threw an exception: " << fr_pset.to_string();
170 
171  return false;
172  }
173 
174  std::shared_ptr<RequestBuffer> request_buffer = std::make_shared<RequestBuffer>(fr_pset.get<artdaq::Fragment::sequence_id_t>("request_increment", 1));
175 
176  try
177  {
178  request_receiver_ptr_.reset(new RequestReceiver(fr_pset, request_buffer));
179  generator_ptr_->SetRequestBuffer(request_buffer);
180  generator_ptr_->SetFragmentBuffer(fragment_buffer_ptr_);
181  fragment_buffer_ptr_->SetRequestBuffer(request_buffer);
182  }
183  catch (...)
184  {
185  ExceptionHandler(ExceptionHandlerRethrow::no, "Exception thrown during initialization of request receiver");
186 
187  TLOG(TLVL_DEBUG + 32) << "FHiCL parameter set used to initialize the request receiver which threw an exception: " << fr_pset.to_string();
188 
189  return false;
190  }
191  metricMan->setPrefix(generator_ptr_->metricsReportingInstanceName());
192 
193  rt_priority_ = fr_pset.get<int>("rt_priority", 0);
194 
195  // fetch the monitoring parameters and create the MonitoredQuantity instances
196  statsHelper_.createCollectors(fr_pset, 100, 30.0, 60.0, FRAGMENTS_PROCESSED_STAT_KEY);
197 
198  // check if we should skip the sequence ID test...
199  skip_seqId_test_ = (fr_pset.get<bool>("skip_seqID_test", false) || generator_ptr_->fragmentIDs().size() > 1 || fragment_buffer_ptr_->request_mode() != RequestMode::Ignored);
200 
201  verbose_ = fr_pset.get<bool>("verbose", true);
202 
203  return true;
204 }
205 
206 bool artdaq::BoardReaderCore::start(art::RunID id, uint64_t timeout, uint64_t timestamp)
207 {
208  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG + 32)) << "Starting run " << id.run();
209  stop_requested_.store(false);
210  pause_requested_.store(false);
211 
212  fragment_count_ = 0;
213  prev_seq_id_ = 0;
214  statsHelper_.resetStatistics();
215 
216  fragment_buffer_ptr_->Reset(false);
217 
218  metricMan->do_start();
219  generator_ptr_->StartCmd(id.run(), timeout, timestamp);
220  run_id_ = id;
221 
222  request_receiver_ptr_->SetRunNumber(static_cast<uint32_t>(id.run()));
223  request_receiver_ptr_->startRequestReception();
224 
225  running_ = true;
226  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG + 32)) << "Completed the Start transition (Started run) for run " << run_id_.run()
227  << ", timeout = " << timeout << ", timestamp = " << timestamp;
228  return true;
229 }
230 
231 bool artdaq::BoardReaderCore::stop(uint64_t timeout, uint64_t timestamp)
232 {
233  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG + 32)) << "Stopping run " << run_id_.run() << " after " << fragment_count_ << " fragments.";
234  stop_requested_.store(true);
235 
236  TLOG(TLVL_DEBUG + 32) << "Stopping Request reception BEGIN";
237  request_receiver_ptr_->stopRequestReception();
238  TLOG(TLVL_DEBUG + 32) << "Stopping Request reception END";
239 
240  TLOG(TLVL_DEBUG + 32) << "Stopping CommandableFragmentGenerator BEGIN";
241  generator_ptr_->StopCmd(timeout, timestamp);
242  TLOG(TLVL_DEBUG + 32) << "Stopping CommandableFragmentGenerator END";
243 
244  TLOG(TLVL_DEBUG + 32) << "Stopping FragmentBuffer";
245  fragment_buffer_ptr_->Stop();
246 
247  TLOG(TLVL_DEBUG + 32) << "Stopping DataSenderManager";
248  if (sender_ptr_)
249  {
250  sender_ptr_->StopSender();
251  }
252 
253  running_ = false;
254  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG + 32)) << "Completed the Stop transition for run " << run_id_.run();
255  return true;
256 }
257 
258 bool artdaq::BoardReaderCore::pause(uint64_t timeout, uint64_t timestamp)
259 {
260  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG + 32)) << "Pausing run " << run_id_.run() << " after " << fragment_count_ << " fragments.";
261  pause_requested_.store(true);
262  generator_ptr_->PauseCmd(timeout, timestamp);
263  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG + 32)) << "Completed the Pause transition for run " << run_id_.run();
264  return true;
265 }
266 
267 bool artdaq::BoardReaderCore::resume(uint64_t timeout, uint64_t timestamp)
268 {
269  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG + 32)) << "Resuming run " << run_id_.run();
270  pause_requested_.store(false);
271  metricMan->do_start();
272  generator_ptr_->ResumeCmd(timeout, timestamp);
273  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG + 32)) << "Completed the Resume transition for run " << run_id_.run();
274  return true;
275 }
276 
277 bool artdaq::BoardReaderCore::shutdown(uint64_t /*unused*/)
278 {
279  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG + 32)) << "Starting Shutdown transition";
280  generator_ptr_->joinThreads(); // Cleanly shut down the CommandableFragmentGenerator
281  generator_ptr_.reset(nullptr);
282  metricMan->shutdown();
283  TLOG((verbose_ ? TLVL_INFO : TLVL_DEBUG + 32)) << "Completed Shutdown transition";
284  return true;
285 }
286 
287 bool artdaq::BoardReaderCore::soft_initialize(fhicl::ParameterSet const& pset, uint64_t timeout, uint64_t timestamp)
288 {
289  TLOG(TLVL_DEBUG + 32) << "soft_initialize method called with "
290  << "ParameterSet = \"" << pset.to_string()
291  << "\". Forwarding to initialize.";
292  return initialize(pset, timeout, timestamp);
293 }
294 
295 bool artdaq::BoardReaderCore::reinitialize(fhicl::ParameterSet const& pset, uint64_t timeout, uint64_t timestamp)
296 {
297  TLOG(TLVL_DEBUG + 32) << "reinitialize method called with "
298  << "ParameterSet = \"" << pset.to_string()
299  << "\". Forwarding to initalize.";
300  return initialize(pset, timeout, timestamp);
301 }
302 
304 {
305  if (rt_priority_ > 0)
306  {
307 #pragma GCC diagnostic push
308 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
309  sched_param s_param = {};
310  s_param.sched_priority = rt_priority_;
311  if (pthread_setschedparam(pthread_self(), SCHED_RR, &s_param))
312  TLOG(TLVL_WARNING) << "setting realtime priority failed";
313 #pragma GCC diagnostic pop
314  }
315 
316  // try-catch block here?
317 
318  // how to turn RT PRI off?
319  if (rt_priority_ > 0)
320  {
321 #pragma GCC diagnostic push
322 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
323  sched_param s_param = {};
324  s_param.sched_priority = rt_priority_;
325  int status = pthread_setschedparam(pthread_self(), SCHED_RR, &s_param);
326  if (status != 0)
327  {
328  TLOG(TLVL_ERROR)
329  << "Failed to set realtime priority to " << rt_priority_
330  << ", return code = " << status;
331  }
332 #pragma GCC diagnostic pop
333  }
334 
335  TLOG(TLVL_DEBUG + 32) << "Waiting for first fragment.";
336  artdaq::MonitoredQuantityStats::TIME_POINT_T startTime, after_input, after_buffer;
337  artdaq::FragmentPtrs frags;
338 
339  receiver_thread_active_ = true;
340 
341  auto wait_start = std::chrono::steady_clock::now();
342  while (!running_ && TimeUtils::GetElapsedTime(wait_start) < start_transition_timeout_)
343  {
344  usleep(10000);
345  }
346  if (!running_)
347  {
348  TLOG(TLVL_ERROR) << "Timeout (" << start_transition_timeout_ << " s) while waiting for Start after receive_fragments thread started!";
349  receiver_thread_active_ = false;
350  }
351 
352  while (receiver_thread_active_)
353  {
354  startTime = artdaq::MonitoredQuantity::getCurrentTime();
355 
356  TLOG(TLVL_DEBUG + 35) << "receive_fragments getNext start";
357  receiver_thread_active_ = generator_ptr_->getNext(frags);
358  TLOG(TLVL_DEBUG + 35) << "receive_fragments getNext done (receiver_thread_active_=" << receiver_thread_active_ << ")";
359 
360  // 08-May-2015, KAB & JCF: if the generator getNext() method returns false
361  // (which indicates that the data flow has stopped) *and* the reason that
362  // it has stopped is because there was an exception that wasn't handled by
363  // the experiment-specific FragmentGenerator class, we move to the
364  // InRunError state so that external observers (e.g. RunControl or
365  // DAQInterface) can see that there was a problem.
366  if (!receiver_thread_active_ && generator_ptr_ && generator_ptr_->exception())
367  {
368  parent_application_.in_run_failure();
369  }
370 
371  after_input = artdaq::MonitoredQuantity::getCurrentTime();
372 
373  if (!receiver_thread_active_) { break; }
374  statsHelper_.addSample(FRAGMENTS_PER_READ_STAT_KEY, frags.size());
375 
376  if (frags.size() > 0)
377  {
378  TLOG(TLVL_DEBUG + 35) << "receive_fragments AddFragmentsToBuffer start";
379  fragment_buffer_ptr_->AddFragmentsToBuffer(std::move(frags));
380  TLOG(TLVL_DEBUG + 35) << "receive_fragments AddFragmentsToBuffer done";
381  }
382 
383  after_buffer = artdaq::MonitoredQuantity::getCurrentTime();
384  TLOG(TLVL_DEBUG + 34) << "receive_fragments INPUT_WAIT=" << (after_input - startTime) << ", BUFFER_WAIT=" << (after_buffer - after_input);
385  statsHelper_.addSample(INPUT_WAIT_STAT_KEY, after_input - startTime);
386  statsHelper_.addSample(BUFFER_WAIT_STAT_KEY, after_buffer - after_input);
387  if (statsHelper_.statsRollingWindowHasMoved()) { sendMetrics_(); }
388  frags.clear();
389  }
390 
391  // 11-May-2015, KAB: call MetricManager::do_stop whenever we exit the
392  // processing fragments loop so that metrics correctly go to zero when
393  // there is no data flowing
394  metricMan->do_stop();
395 
396  TLOG(TLVL_DEBUG + 32) << "receive_fragments loop end";
397 }
399 {
400  if (rt_priority_ > 0)
401  {
402 #pragma GCC diagnostic push
403 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
404  sched_param s_param = {};
405  s_param.sched_priority = rt_priority_;
406  if (pthread_setschedparam(pthread_self(), SCHED_RR, &s_param) != 0)
407  {
408  TLOG(TLVL_WARNING) << "setting realtime priority failed";
409  }
410 #pragma GCC diagnostic pop
411  }
412 
413  // try-catch block here?
414 
415  // how to turn RT PRI off?
416  if (rt_priority_ > 0)
417  {
418 #pragma GCC diagnostic push
419 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
420  sched_param s_param = {};
421  s_param.sched_priority = rt_priority_;
422  int status = pthread_setschedparam(pthread_self(), SCHED_RR, &s_param);
423  if (status != 0)
424  {
425  TLOG(TLVL_ERROR)
426  << "Failed to set realtime priority to " << rt_priority_
427  << ", return code = " << status;
428  }
429 #pragma GCC diagnostic pop
430  }
431 
432  TLOG(TLVL_DEBUG + 32) << "Initializing DataSenderManager. my_rank=" << my_rank;
433  sender_ptr_ = std::make_unique<artdaq::DataSenderManager>(data_pset_);
434 
435  TLOG(TLVL_DEBUG + 32) << "Waiting for first fragment.";
436  artdaq::MonitoredQuantityStats::TIME_POINT_T startTime;
437  double delta_time;
438  artdaq::FragmentPtrs frags;
439  auto targetFragCount = generator_ptr_->fragmentIDs().size();
440 
441  sender_thread_active_ = true;
442 
443  auto wait_start = std::chrono::steady_clock::now();
444  while (!running_ && TimeUtils::GetElapsedTime(wait_start) < start_transition_timeout_)
445  {
446  usleep(10000);
447  }
448  if (!running_)
449  {
450  TLOG(TLVL_ERROR) << "Timeout (" << start_transition_timeout_ << " s) while waiting for Start after send_fragments thread started!";
451  sender_thread_active_ = false;
452  }
453 
454  while (sender_thread_active_)
455  {
456  startTime = artdaq::MonitoredQuantity::getCurrentTime();
457 
458  TLOG(TLVL_DEBUG + 35) << "send_fragments applyRequests start";
459  sender_thread_active_ = fragment_buffer_ptr_->applyRequests(frags);
460  TLOG(TLVL_DEBUG + 35) << "send_fragments applyRequests done (sender_thread_active_=" << sender_thread_active_ << ")";
461  // 08-May-2015, KAB & JCF: if the generator getNext() method returns false
462  // (which indicates that the data flow has stopped) *and* the reason that
463  // it has stopped is because there was an exception that wasn't handled by
464  // the experiment-specific FragmentGenerator class, we move to the
465  // InRunError state so that external observers (e.g. RunControl or
466  // DAQInterface) can see that there was a problem.
467  if (!sender_thread_active_ && generator_ptr_ && generator_ptr_->exception())
468  {
469  parent_application_.in_run_failure();
470  }
471 
472  delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
473 
474  TLOG(TLVL_DEBUG + 34) << "send_fragments REQUEST_WAIT=" << delta_time;
475  statsHelper_.addSample(REQUEST_WAIT_STAT_KEY, delta_time);
476 
477  if (!sender_thread_active_) { break; }
478 
479  for (auto& fragPtr : frags)
480  {
481  if (fragPtr == nullptr)
482  {
483  TLOG(TLVL_WARNING) << "Encountered a bad fragment pointer in fragment " << fragment_count_ << ". "
484  << "This is most likely caused by a problem with the Fragment Generator!";
485  continue;
486  }
487  if (fragment_count_ == 0)
488  {
489  TLOG(TLVL_DEBUG + 32) << "Received first Fragment from Fragment Generator, sequence ID " << fragPtr->sequenceID() << ", size = " << fragPtr->sizeBytes() << " bytes.";
490  }
491 
492  if (fragPtr->type() == Fragment::EndOfRunFragmentType || fragPtr->type() == Fragment::EndOfSubrunFragmentType || fragPtr->type() == Fragment::InitFragmentType)
493  {
494  // Just broadcast any system Fragments in the output
495  artdaq::Fragment::sequence_id_t sequence_id = fragPtr->sequenceID();
496  statsHelper_.addSample(FRAGMENTS_PROCESSED_STAT_KEY, fragPtr->sizeBytes());
497 
498  startTime = artdaq::MonitoredQuantity::getCurrentTime();
499  TLOG(TLVL_DEBUG + 36) << "send_fragments seq=" << sequence_id << " sendFragment start";
500  auto res = sender_ptr_->sendFragment(std::move(*fragPtr));
501  TLOG(TLVL_DEBUG + 36) << "send_fragments seq=" << sequence_id << " sendFragment done (dest=" << res.first << ", sts=" << TransferInterface::CopyStatusToString(res.second) << ")";
502  ++fragment_count_;
503  statsHelper_.addSample(OUTPUT_WAIT_STAT_KEY,
504  artdaq::MonitoredQuantity::getCurrentTime() - startTime);
505  continue;
506  }
507 
508  artdaq::Fragment::sequence_id_t sequence_id = fragPtr->sequenceID();
509  SetMFIteration("Sequence ID " + std::to_string(sequence_id));
510  statsHelper_.addSample(FRAGMENTS_PROCESSED_STAT_KEY, fragPtr->sizeBytes());
511 
512  /*if ((fragment_count_ % 250) == 0)
513  {
514  TLOG(TLVL_DEBUG + 32)
515  << "Sending fragment " << fragment_count_
516  << " with sequence id " << sequence_id << ".";
517  }*/
518 
519  // check for continous sequence IDs
520  if (!skip_seqId_test_ && abs(static_cast<int64_t>(sequence_id) - static_cast<int64_t>(prev_seq_id_)) > 1)
521  {
522  TLOG(TLVL_WARNING)
523  << "Missing sequence IDs: current sequence ID = "
524  << sequence_id << ", previous sequence ID = "
525  << prev_seq_id_ << ".";
526  }
527  prev_seq_id_ = sequence_id;
528 
529  startTime = artdaq::MonitoredQuantity::getCurrentTime();
530  TLOG(TLVL_DEBUG + 36) << "send_fragments seq=" << sequence_id << " sendFragment start";
531  auto res = sender_ptr_->sendFragment(std::move(*fragPtr));
532  if (sender_ptr_->GetSentSequenceIDCount(sequence_id) == targetFragCount)
533  {
534  sender_ptr_->RemoveRoutingTableEntry(sequence_id);
535  }
536  TLOG(TLVL_DEBUG + 36) << "send_fragments seq=" << sequence_id << " sendFragment done (dest=" << res.first << ", sts=" << TransferInterface::CopyStatusToString(res.second) << ")";
537  ++fragment_count_;
538  statsHelper_.addSample(OUTPUT_WAIT_STAT_KEY,
539  artdaq::MonitoredQuantity::getCurrentTime() - startTime);
540 
541  bool readyToReport = statsHelper_.readyToReport();
542  if (readyToReport)
543  {
544  TLOG(TLVL_INFO) << buildStatisticsString_();
545  }
546 
547  // Turn on lvls (mem and/or slow) 3,13,14 to log every send.
548  TLOG(((fragment_count_ == 1) ? TLVL_DEBUG + 32
549  : (((fragment_count_ % 250) == 0 || readyToReport) ? TLVL_DEBUG + 36 : TLVL_DEBUG + 37)))
550  << ((fragment_count_ == 1)
551  ? "Sent first Fragment"
552  : "Sending fragment " + std::to_string(fragment_count_))
553  << " with SeqID " << sequence_id << ".";
554  }
555  if (statsHelper_.statsRollingWindowHasMoved()) { sendMetrics_(); }
556  frags.clear();
557  std::this_thread::yield();
558  }
559 
560  sender_ptr_.reset(nullptr);
561 
562  // 11-May-2015, KAB: call MetricManager::do_stop whenever we exit the
563  // processing fragments loop so that metrics correctly go to zero when
564  // there is no data flowing
565  metricMan->do_stop();
566 
567  TLOG(TLVL_DEBUG + 32) << "send_fragments loop end";
568 }
569 
570 std::string artdaq::BoardReaderCore::report(std::string const& which) const
571 {
572  std::string resultString;
573 
574  // pass the request to the FragmentGenerator instance, if it's available
575  if (generator_ptr_ != nullptr && which != "core")
576  {
577  resultString = generator_ptr_->ReportCmd(which);
578  if (resultString.length() > 0) { return resultString; }
579  }
580 
581  // handle the request at this level, if we can
582  // --> nothing here yet
583 
584  // if we haven't been able to come up with any report so far, say so
585  std::string tmpString = app_name + " run number = ";
586  tmpString.append(boost::lexical_cast<std::string>(run_id_.run()));
587 
588  tmpString.append(", Sent Fragment count = ");
589  tmpString.append(boost::lexical_cast<std::string>(fragment_count_));
590  if (which == "core") {
591  // do nothing
592  }
593 //-----------------------------------------------------------------------------
594 // P.Murat: add statistics report, the const/non const confusion to be cleaned up
595 // by the maintainers
596 //-----------------------------------------------------------------------------
597  else if (which == "stats") {
598  auto non_const_this = (artdaq::BoardReaderCore*) this;
599  tmpString += ", "+non_const_this->buildStatisticsString_();
600  }
601  else {
602  tmpString.append(". Command=\"" + which + "\" is not currently supported.");
603  }
604  return tmpString;
605 }
606 
607 bool artdaq::BoardReaderCore::metaCommand(std::string const& command, std::string const& arg)
608 {
609  TLOG(TLVL_DEBUG + 32) << "metaCommand method called with "
610  << "command = \"" << command << "\""
611  << ", arg = \"" << arg << "\""
612  << ".";
613 
614  if (generator_ptr_)
615  {
616  return generator_ptr_->metaCommand(command, arg);
617  }
618 
619  return true;
620 }
621 
622 std::string artdaq::BoardReaderCore::buildStatisticsString_()
623 {
624  std::ostringstream oss;
625  double fragmentsGeneratedCount = 1.0;
626  double fragmentsOutputCount = 1.0;
627  oss << app_name << " statistics:" << std::endl;
628 
629  oss << " Fragments read: ";
630  artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
631  if (mqPtr.get() != nullptr)
632  {
633  artdaq::MonitoredQuantityStats stats;
634  mqPtr->getStats(stats);
635  oss << stats.recentValueSum << " fragments generated at "
636  << stats.recentSampleRate << " getNext calls/sec, fragment rate = "
637  << stats.recentValueRate << " fragments/sec, monitor window = "
638  << stats.recentDuration << " sec, min::max read size = "
639  << stats.recentValueMin
640  << "::"
641  << stats.recentValueMax
642  << " fragments";
643  fragmentsGeneratedCount = std::max(double(stats.recentSampleCount), 1.0);
644  oss << " Average times per fragment: ";
645  if (stats.recentSampleRate > 0.0)
646  {
647  oss << " elapsed time = "
648  << (1.0 / stats.recentSampleRate) << " sec";
649  }
650  }
651 
652  oss << std::endl;
653  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
654  if (mqPtr.get() != nullptr)
655  {
656  artdaq::MonitoredQuantityStats stats;
657  mqPtr->getStats(stats);
658  oss << " Fragment output statistics: "
659  << stats.recentSampleCount << " fragments sent at "
660  << stats.recentSampleRate << " fragments/sec, effective data rate = "
661  << (stats.recentValueRate / 1024.0 / 1024.0) << " MB/sec, monitor window = "
662  << stats.recentDuration << " sec, min::max event size = "
663  << (stats.recentValueMin / 1024.0 / 1024.0)
664  << "::"
665  << (stats.recentValueMax / 1024.0 / 1024.0)
666  << " MB" << std::endl;
667  fragmentsOutputCount = std::max(double(stats.recentSampleCount), 1.0);
668  }
669 
670  // 31-Dec-2014, KAB - Just a reminder that using "fragmentCount" in the
671  // denominator of the calculations below is important because the way that
672  // the accumulation of these statistics is done is not fragment-by-fragment
673  // but read-by-read (where each read can contain multiple fragments).
674  // 29-Aug-2016, KAB - BRSYNC_WAIT and OUTPUT_WAIT are now done fragment-by-
675  // fragment, but we'll leave the calculation the same. (The alternative
676  // would be to use recentValueAverage().)
677 
678  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
679  if (mqPtr.get() != nullptr)
680  {
681  oss << " Input wait time = "
682  << (mqPtr->getRecentValueSum() / fragmentsGeneratedCount) << " s/fragment";
683  }
684  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(BUFFER_WAIT_STAT_KEY);
685  if (mqPtr.get() != 0)
686  {
687  oss << ", buffer wait time = "
688  << (mqPtr->getRecentValueSum() / fragmentsGeneratedCount) << " s/fragment";
689  }
690  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(REQUEST_WAIT_STAT_KEY);
691  if (mqPtr.get() != 0)
692  {
693  oss << ", request wait time = "
694  << (mqPtr->getRecentValueSum() / fragmentsOutputCount) << " s/fragment";
695  }
696 
697  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
698  if (mqPtr.get() != nullptr)
699  {
700  oss << ", output wait time = "
701  << (mqPtr->getRecentValueSum() / fragmentsOutputCount) << " s/fragment";
702  }
703 //-----------------------------------------------------------------------------
704 // 2024-01-13 P.Murat: add SHM data
705 //-----------------------------------------------------------------------------
706  oss << fragment_buffer_ptr_->getStatReport();
707 
708  return oss.str();
709 }
710 
711 void artdaq::BoardReaderCore::sendMetrics_()
712 {
713  // TLOG(TLVL_DEBUG + 32) << "Sending metrics " << __LINE__ ;
714  double fragmentCount = 1.0;
715  artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
716  if (mqPtr.get() != nullptr)
717  {
718  artdaq::MonitoredQuantityStats stats;
719  mqPtr->getStats(stats);
720  fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
721  metricMan->sendMetric("Fragment Count", stats.fullSampleCount, "fragments", 1, MetricMode::LastPoint);
722  metricMan->sendMetric("Fragment Rate", stats.recentSampleRate, "fragments/sec", 1, MetricMode::Average);
723  metricMan->sendMetric("Average Fragment Size", stats.recentValueAverage, "bytes/fragment", 2, MetricMode::Average);
724  metricMan->sendMetric("Data Rate", stats.recentValueRate, "bytes/sec", 2, MetricMode::Average);
725  }
726 
727  // 31-Dec-2014, KAB - Just a reminder that using "fragmentCount" in the
728  // denominator of the calculations below is important because the way that
729  // the accumulation of these statistics is done is not fragment-by-fragment
730  // but read-by-read (where each read can contain multiple fragments).
731  // 29-Aug-2016, KAB - BRSYNC_WAIT and OUTPUT_WAIT are now done fragment-by-
732  // fragment, but we'll leave the calculation the same. (The alternative
733  // would be to use recentValueAverage().)
734 
735  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
736  if (mqPtr.get() != nullptr)
737  {
738  metricMan->sendMetric("Avg Input Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
739  }
740 
741  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(BUFFER_WAIT_STAT_KEY);
742  if (mqPtr.get() != 0)
743  {
744  metricMan->sendMetric("Avg Buffer Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
745  }
746  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(REQUEST_WAIT_STAT_KEY);
747  if (mqPtr.get() != 0)
748  {
749  metricMan->sendMetric("Avg Request Response Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
750  }
751  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
752  if (mqPtr.get() != nullptr)
753  {
754  metricMan->sendMetric("Avg Output Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
755  }
756 
757  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
758  if (mqPtr.get() != nullptr)
759  {
760  metricMan->sendMetric("Avg Frags Per Read", mqPtr->getRecentValueAverage(), "fragments/read", 4, MetricMode::Average);
761  }
762 }
static const std::string BUFFER_WAIT_STAT_KEY
Key for the Fragment Buffer Wait MonitoredQuantity.
void addMonitoredQuantityName(std::string const &statKey)
Add a MonitoredQuantity name to the list.
Commandable is the base class for all artdaq components which implement the artdaq state machine...
Definition: Commandable.hh:22
bool initialize(fhicl::ParameterSet const &pset, uint64_t timeout, uint64_t timestamp)
Initialize the BoardReaderCore.
static const std::string FRAGMENTS_PROCESSED_STAT_KEY
Key for the Fragments Processed MonitoredQuantity.
bool reinitialize(fhicl::ParameterSet const &pset, uint64_t timeout, uint64_t timestamp)
Reinitialize the BoardReader. No-Op.
static const std::string INPUT_WAIT_STAT_KEY
Key for the Input Wait MonitoredQuantity.
bool stop(uint64_t timeout, uint64_t timestamp)
Stop the BoardReader, and the CommandableFragmentGenerator.
BoardReaderCore implements the state machine for the BoardReader artdaq application. It contains a CommandableFragmentGenerator, which generates Fragments which are then sent to a DataSenderManager by BoardReaderCore.
virtual ~BoardReaderCore()
BoardReaderCore Destructor.
static std::string CopyStatusToString(CopyStatus in)
Convert a CopyStatus variable to its string represenatation
BoardReaderCore(Commandable &parent_application)
BoardReaderCore Constructor.
std::unique_ptr< CommandableFragmentGenerator > makeCommandableFragmentGenerator(std::string const &generator_plugin_spec, fhicl::ParameterSet const &ps)
Load a CommandableFragmentGenerator plugin.
Receive data requests and make them available to CommandableFragmentGenerator or other interested par...
static const std::string FRAGMENTS_PER_READ_STAT_KEY
Key for the Fragments Per Read MonitoredQuantity.
static const std::string REQUEST_WAIT_STAT_KEY
Key for the Request Buffer Wait MonitoredQuantity.
static const std::string OUTPUT_WAIT_STAT_KEY
Key for the Output Wait MonitoredQuantity.
void send_fragments()
Main working loop of the BoardReaderCore, pt. 2.
bool soft_initialize(fhicl::ParameterSet const &pset, uint64_t timeout, uint64_t timestamp)
Soft-Initialize the BoardReader. No-Op.
std::string report(std::string const &which) const
Send a report on a given run-time quantity.
bool shutdown(uint64_t timeout)
Shutdown the BoardReader, and the CommandableFragmentGenerator.
bool start(art::RunID id, uint64_t timeout, uint64_t timestamp)
Start the BoardReader, and the CommandableFragmentGenerator.
bool resume(uint64_t timeout, uint64_t timestamp)
Resume the BoardReader, and the CommandableFragmentGenerator.
void receive_fragments()
Main working loop of the BoardReaderCore.
bool pause(uint64_t timeout, uint64_t timestamp)
Pause the BoardReader, and the CommandableFragmentGenerator.
bool metaCommand(std::string const &command, std::string const &arg)
Run a user-defined command on the CommandableFragmentGenerator.