00001 #define TRACE_NAME "BoardReaderCore"
00002 #include "tracemf.h"
00003 #include "artdaq/Application/TaskType.hh"
00004 #include "artdaq/Application/BoardReaderCore.hh"
00005 #include "artdaq-core/Data/Fragment.hh"
00006 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00007 #include "artdaq/Application/makeCommandableFragmentGenerator.hh"
00008 #include "canvas/Utilities/Exception.h"
00009 #include "cetlib/exception.h"
00010 #include <pthread.h>
00011 #include <sched.h>
00012 #include <algorithm>
00013
00014 const std::string artdaq::BoardReaderCore::
00015 FRAGMENTS_PROCESSED_STAT_KEY("BoardReaderCoreFragmentsProcessed");
00016 const std::string artdaq::BoardReaderCore::
00017 INPUT_WAIT_STAT_KEY("BoardReaderCoreInputWaitTime");
00018 const std::string artdaq::BoardReaderCore::
00019 BRSYNC_WAIT_STAT_KEY("BoardReaderCoreBRSyncWaitTime");
00020 const std::string artdaq::BoardReaderCore::
00021 OUTPUT_WAIT_STAT_KEY("BoardReaderCoreOutputWaitTime");
00022 const std::string artdaq::BoardReaderCore::
00023 FRAGMENTS_PER_READ_STAT_KEY("BoardReaderCoreFragmentsPerRead");
00024
00025 std::unique_ptr<artdaq::DataSenderManager> artdaq::BoardReaderCore::sender_ptr_ = nullptr;
00026
00027 artdaq::BoardReaderCore::BoardReaderCore(Commandable& parent_application,
00028 int rank, std::string name) :
00029 parent_application_(parent_application)
00030
00031 , generator_ptr_(nullptr)
00032 , name_(name)
00033 , stop_requested_(false)
00034 , pause_requested_(false)
00035 {
00036 TLOG_DEBUG(name_) << "Constructor" << TLOG_ENDL;
00037 statsHelper_.addMonitoredQuantityName(FRAGMENTS_PROCESSED_STAT_KEY);
00038 statsHelper_.addMonitoredQuantityName(INPUT_WAIT_STAT_KEY);
00039 statsHelper_.addMonitoredQuantityName(BRSYNC_WAIT_STAT_KEY);
00040 statsHelper_.addMonitoredQuantityName(OUTPUT_WAIT_STAT_KEY);
00041 statsHelper_.addMonitoredQuantityName(FRAGMENTS_PER_READ_STAT_KEY);
00042 metricMan = &metricMan_;
00043 my_rank = rank;
00044 }
00045
00046 artdaq::BoardReaderCore::~BoardReaderCore()
00047 {
00048 TLOG_DEBUG(name_) << "Destructor" << TLOG_ENDL;
00049 }
00050
00051 bool artdaq::BoardReaderCore::initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00052 {
00053 TLOG_DEBUG(name_) << "initialize method called with " << "ParameterSet = \"" << pset.to_string() << "\"." << TLOG_ENDL;
00054
00055
00056 fhicl::ParameterSet daq_pset;
00057 try
00058 {
00059 daq_pset = pset.get<fhicl::ParameterSet>("daq");
00060 }
00061 catch (...)
00062 {
00063 TLOG_ERROR(name_)
00064 << "Unable to find the DAQ parameters in the initialization "
00065 << "ParameterSet: \"" + pset.to_string() + "\"." << TLOG_ENDL;
00066 return false;
00067 }
00068 fhicl::ParameterSet fr_pset;
00069 try
00070 {
00071 fr_pset = daq_pset.get<fhicl::ParameterSet>("fragment_receiver");
00072 data_pset_ = fr_pset;
00073 }
00074 catch (...)
00075 {
00076 TLOG_ERROR(name_)
00077 << "Unable to find the fragment_receiver parameters in the DAQ "
00078 << "initialization ParameterSet: \"" + daq_pset.to_string() + "\"." << TLOG_ENDL;
00079 return false;
00080 }
00081
00082
00083 fhicl::ParameterSet metric_pset;
00084 try
00085 {
00086 metric_pset = daq_pset.get<fhicl::ParameterSet>("metrics");
00087 }
00088 catch (...) {}
00089
00090 if (metric_pset.is_empty())
00091 {
00092 TLOG_INFO(name_) << "No metric plugins appear to be defined" << TLOG_ENDL;
00093 }
00094 try
00095 {
00096 metricMan_.initialize(metric_pset, name_);
00097 }
00098 catch (...)
00099 {
00100 ExceptionHandler(ExceptionHandlerRethrow::no,
00101 "Error loading metrics in BoardReaderCore::initialize()");
00102 }
00103
00104
00105 std::string frag_gen_name = fr_pset.get<std::string>("generator", "");
00106 if (frag_gen_name.length() == 0)
00107 {
00108 TLOG_ERROR(name_)
00109 << "No fragment generator (parameter name = \"generator\") was "
00110 << "specified in the fragment_receiver ParameterSet. The "
00111 << "DAQ initialization PSet was \"" << daq_pset.to_string() << "\"." << TLOG_ENDL;
00112 return false;
00113 }
00114
00115 try
00116 {
00117 generator_ptr_ = artdaq::makeCommandableFragmentGenerator(frag_gen_name, fr_pset);
00118 }
00119 catch (...)
00120 {
00121 std::stringstream exception_string;
00122 exception_string << "Exception thrown during initialization of fragment generator of type \""
00123 << frag_gen_name << "\"";
00124
00125 ExceptionHandler(ExceptionHandlerRethrow::no, exception_string.str());
00126
00127 TLOG_DEBUG(name_) << "FHiCL parameter set used to initialize the fragment generator which threw an exception: " << fr_pset.to_string() << TLOG_ENDL;
00128
00129 return false;
00130 }
00131 metricMan_.setPrefix(generator_ptr_->metricsReportingInstanceName());
00132
00133 rt_priority_ = fr_pset.get<int>("rt_priority", 0);
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176 statsHelper_.createCollectors(fr_pset, 100, 30.0, 60.0, FRAGMENTS_PROCESSED_STAT_KEY);
00177
00178
00179 skip_seqId_test_ = (generator_ptr_->fragmentIDs().size() > 1);
00180
00181 return true;
00182 }
00183
00184 bool artdaq::BoardReaderCore::start(art::RunID id, uint64_t timeout, uint64_t timestamp)
00185 {
00186 stop_requested_.store(false);
00187 pause_requested_.store(false);
00188
00189 fragment_count_ = 0;
00190 prev_seq_id_ = 0;
00191 statsHelper_.resetStatistics();
00192
00193 metricMan_.do_start();
00194 generator_ptr_->StartCmd(id.run(), timeout, timestamp);
00195 run_id_ = id;
00196
00197 TLOG_DEBUG(name_) << "Started run " << run_id_.run() <<
00198 ", timeout = " << timeout << ", timestamp = " << timestamp << TLOG_ENDL;
00199 return true;
00200 }
00201
00202 bool artdaq::BoardReaderCore::stop(uint64_t timeout, uint64_t timestamp)
00203 {
00204 TLOG_DEBUG(name_) << "Stopping run " << run_id_.run()
00205 << " after " << fragment_count_
00206 << " fragments." << TLOG_ENDL;
00207 stop_requested_.store(true);
00208 generator_ptr_->StopCmd(timeout, timestamp);
00209 return true;
00210 }
00211
00212 bool artdaq::BoardReaderCore::pause(uint64_t timeout, uint64_t timestamp)
00213 {
00214 TLOG_DEBUG(name_) << "Pausing run " << run_id_.run()
00215 << " after " << fragment_count_
00216 << " fragments." << TLOG_ENDL;
00217 pause_requested_.store(true);
00218 generator_ptr_->PauseCmd(timeout, timestamp);
00219 return true;
00220 }
00221
00222 bool artdaq::BoardReaderCore::resume(uint64_t timeout, uint64_t timestamp)
00223 {
00224 TLOG_DEBUG(name_) << "Resuming run " << run_id_.run() << TLOG_ENDL;
00225 pause_requested_.store(false);
00226 metricMan_.do_start();
00227 generator_ptr_->ResumeCmd(timeout, timestamp);
00228 return true;
00229 }
00230
00231 bool artdaq::BoardReaderCore::shutdown(uint64_t)
00232 {
00233 generator_ptr_->joinThreads();
00234 generator_ptr_.reset(nullptr);
00235 metricMan_.shutdown();
00236 return true;
00237 }
00238
00239 bool artdaq::BoardReaderCore::soft_initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00240 {
00241 TLOG_DEBUG(name_) << "soft_initialize method called with "
00242 << "ParameterSet = \"" << pset.to_string()
00243 << "\"." << TLOG_ENDL;
00244 return true;
00245 }
00246
00247 bool artdaq::BoardReaderCore::reinitialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00248 {
00249 TLOG_DEBUG(name_) << "reinitialize method called with "
00250 << "ParameterSet = \"" << pset.to_string()
00251 << "\"." << TLOG_ENDL;
00252 return true;
00253 }
00254
00255 void artdaq::BoardReaderCore::process_fragments()
00256 {
00257 if (rt_priority_ > 0)
00258 {
00259 #pragma GCC diagnostic push
00260 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
00261 sched_param s_param = {};
00262 s_param.sched_priority = rt_priority_;
00263 if (pthread_setschedparam(pthread_self(), SCHED_RR, &s_param))
00264 TLOG_WARNING(name_) << "setting realtime priority failed" << TLOG_ENDL;
00265 #pragma GCC diagnostic pop
00266 }
00267
00268
00269
00270
00271 if (rt_priority_ > 0)
00272 {
00273 #pragma GCC diagnostic push
00274 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
00275 sched_param s_param = {};
00276 s_param.sched_priority = rt_priority_;
00277 int status = pthread_setschedparam(pthread_self(), SCHED_RR, &s_param);
00278 if (status != 0)
00279 {
00280 TLOG_ERROR(name_)
00281 << "Failed to set realtime priority to " << rt_priority_
00282 << ", return code = " << status << TLOG_ENDL;
00283 }
00284 #pragma GCC diagnostic pop
00285 }
00286
00287 TLOG_DEBUG(name_) << "Initializing DataSenderManager. my_rank=" << my_rank << TLOG_ENDL;
00288 sender_ptr_.reset(new artdaq::DataSenderManager(data_pset_));
00289
00290
00291
00292 TLOG_DEBUG(name_) << "Waiting for first fragment." << TLOG_ENDL;
00293 artdaq::MonitoredQuantityStats::TIME_POINT_T startTime;
00294 double delta_time;
00295 artdaq::FragmentPtrs frags;
00296 bool active = true;
00297
00298
00299 while (active)
00300 {
00301 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00302
00303 TRACE(18, name_ + "::process_fragments getNext start");
00304 active = generator_ptr_->getNext(frags);
00305 TRACE(18, name_ + "::process_fragments getNext done (active=%i)", active);
00306
00307
00308
00309
00310
00311
00312 if (!active && generator_ptr_->exception())
00313 {
00314 parent_application_.in_run_failure();
00315 }
00316
00317 delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
00318 statsHelper_.addSample(INPUT_WAIT_STAT_KEY, delta_time);
00319
00320 TLOG_ARB(16,name_) << "process_fragments INPUT_WAIT="<<std::to_string( delta_time) << TLOG_ENDL;
00321
00322 if (!active) { break; }
00323 statsHelper_.addSample(FRAGMENTS_PER_READ_STAT_KEY, frags.size());
00324
00325 for (auto& fragPtr : frags)
00326 {
00327 if (!fragPtr.get())
00328 {
00329 TLOG_WARNING(name_) << "Encountered a bad fragment pointer in fragment " << fragment_count_ << ". "
00330 << "This is most likely caused by a problem with the Fragment Generator!" << TLOG_ENDL;
00331 continue;
00332 }
00333 artdaq::Fragment::sequence_id_t sequence_id = fragPtr->sequenceID();
00334 statsHelper_.addSample(FRAGMENTS_PROCESSED_STAT_KEY, fragPtr->size());
00335
00336 if ((fragment_count_ % 250) == 0)
00337 {
00338 TLOG_DEBUG(name_)
00339 << "Sending fragment " << fragment_count_
00340 << " (%250) with sequence id " << sequence_id << "." << TLOG_ENDL;
00341 }
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440 if (!skip_seqId_test_ && abs(sequence_id - prev_seq_id_) > 1)
00441 {
00442 TLOG_WARNING(name_)
00443 << "Missing sequence IDs: current sequence ID = "
00444 << sequence_id << ", previous sequence ID = "
00445 << prev_seq_id_ << "." << TLOG_ENDL;
00446 }
00447 prev_seq_id_ = sequence_id;
00448
00449 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00450 TLOG_ARB(17,name_) << "process_fragments seq="<< std::to_string(sequence_id) << " sendFragment start" << TLOG_ENDL;
00451 auto res = sender_ptr_->sendFragment(std::move(*fragPtr));
00452 TLOG_ARB(17, name_) << "process_fragments seq=" << std::to_string(sequence_id) << " sendFragment done (res="<< res<<")"<<TLOG_ENDL;
00453 ++fragment_count_;
00454 statsHelper_.addSample(OUTPUT_WAIT_STAT_KEY,
00455 artdaq::MonitoredQuantity::getCurrentTime() - startTime);
00456
00457 bool readyToReport = statsHelper_.readyToReport(fragment_count_);
00458 if (readyToReport)
00459 {
00460 std::string statString = buildStatisticsString_();
00461 TLOG_DEBUG(name_) << statString << TLOG_ENDL;
00462 }
00463 if (fragment_count_ == 1 || readyToReport)
00464 {
00465 TLOG_DEBUG(name_)
00466 << "Sending fragment " << fragment_count_
00467 << " with sequence id " << sequence_id << "." << TLOG_ENDL;
00468 }
00469 }
00470 if (statsHelper_.statsRollingWindowHasMoved()) { sendMetrics_(); }
00471 frags.clear();
00472 }
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482 metricMan_.do_stop();
00483
00484 sender_ptr_.reset(nullptr);
00485 }
00486
00487 std::string artdaq::BoardReaderCore::report(std::string const& which) const
00488 {
00489 std::string resultString;
00490
00491
00492 if (generator_ptr_.get() != 0)
00493 {
00494 resultString = generator_ptr_->ReportCmd(which);
00495 if (resultString.length() > 0) { return resultString; }
00496 }
00497
00498
00499
00500
00501
00502 std::string tmpString = name_ + " run number = ";
00503 tmpString.append(boost::lexical_cast<std::string>(run_id_.run()));
00504 tmpString.append(". Command=\"" + which + "\" is not currently supported.");
00505 return tmpString;
00506 }
00507
00508 std::string artdaq::BoardReaderCore::buildStatisticsString_()
00509 {
00510 std::ostringstream oss;
00511 oss << name_ << " statistics:" << std::endl;
00512
00513 double fragmentCount = 1.0;
00514 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00515 getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
00516 if (mqPtr.get() != 0)
00517 {
00518 artdaq::MonitoredQuantityStats stats;
00519 mqPtr->getStats(stats);
00520 oss << " Fragment statistics: "
00521 << stats.recentSampleCount << " fragments received at "
00522 << stats.recentSampleRate << " fragments/sec, effective data rate = "
00523 << (stats.recentValueRate * sizeof(artdaq::RawDataType)
00524 / 1024.0 / 1024.0) << " MB/sec, monitor window = "
00525 << stats.recentDuration << " sec, min::max event size = "
00526 << (stats.recentValueMin * sizeof(artdaq::RawDataType)
00527 / 1024.0 / 1024.0)
00528 << "::"
00529 << (stats.recentValueMax * sizeof(artdaq::RawDataType)
00530 / 1024.0 / 1024.0)
00531 << " MB" << std::endl;
00532 fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
00533 oss << " Average times per fragment: ";
00534 if (stats.recentSampleRate > 0.0)
00535 {
00536 oss << " elapsed time = "
00537 << (1.0 / stats.recentSampleRate) << " sec";
00538 }
00539 }
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549 mqPtr = artdaq::StatisticsCollection::getInstance().
00550 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
00551 if (mqPtr.get() != 0)
00552 {
00553 oss << ", input wait time = "
00554 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00555 }
00556
00557 mqPtr = artdaq::StatisticsCollection::getInstance().
00558 getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
00559 if (mqPtr.get() != 0)
00560 {
00561 oss << ", BRsync wait time = "
00562 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00563 }
00564
00565 mqPtr = artdaq::StatisticsCollection::getInstance().
00566 getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
00567 if (mqPtr.get() != 0)
00568 {
00569 oss << ", output wait time = "
00570 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00571 }
00572
00573 oss << std::endl << " Fragments per read: ";
00574 mqPtr = artdaq::StatisticsCollection::getInstance().
00575 getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
00576 if (mqPtr.get() != 0)
00577 {
00578 artdaq::MonitoredQuantityStats stats;
00579 mqPtr->getStats(stats);
00580 oss << "average = "
00581 << stats.recentValueAverage
00582 << ", min::max = "
00583 << stats.recentValueMin
00584 << "::"
00585 << stats.recentValueMax;
00586 }
00587
00588 return oss.str();
00589 }
00590
00591 void artdaq::BoardReaderCore::sendMetrics_()
00592 {
00593
00594 double fragmentCount = 1.0;
00595 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00596 getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
00597 if (mqPtr.get() != 0)
00598 {
00599 artdaq::MonitoredQuantityStats stats;
00600 mqPtr->getStats(stats);
00601 fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
00602 metricMan_.sendMetric("Fragment Count", static_cast<unsigned long>(stats.fullSampleCount), "fragments", 1, MetricMode::Accumulate);
00603 metricMan_.sendMetric("Fragment Rate", stats.recentSampleRate, "fragments/sec", 1, MetricMode::Average);
00604 metricMan_.sendMetric("Average Fragment Size", (stats.recentValueAverage * sizeof(artdaq::RawDataType)), "bytes/fragment", 2, MetricMode::Average);
00605 metricMan_.sendMetric("Data Rate", (stats.recentValueRate * sizeof(artdaq::RawDataType)), "bytes/sec", 2, MetricMode::Average);
00606 }
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616 mqPtr = artdaq::StatisticsCollection::getInstance().
00617 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
00618 if (mqPtr.get() != 0)
00619 {
00620 metricMan_.sendMetric("Avg Input Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00621 }
00622
00623 mqPtr = artdaq::StatisticsCollection::getInstance().
00624 getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
00625 if (mqPtr.get() != 0)
00626 {
00627 metricMan_.sendMetric("Avg BoardReader Sync Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00628 }
00629
00630 mqPtr = artdaq::StatisticsCollection::getInstance().
00631 getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
00632 if (mqPtr.get() != 0)
00633 {
00634 metricMan_.sendMetric("Avg Output Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00635 }
00636
00637 mqPtr = artdaq::StatisticsCollection::getInstance().
00638 getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
00639 if (mqPtr.get() != 0)
00640 {
00641 metricMan_.sendMetric("Avg Frags Per Read", mqPtr->getRecentValueAverage(), "fragments/read", 4, MetricMode::Average);
00642 }
00643 }