00001 #define TRACE_NAME "BoardReaderCore"
00002 #include "tracemf.h"
00003 #include "artdaq/Application/TaskType.hh"
00004 #include "artdaq/Application/BoardReaderCore.hh"
00005 #include "artdaq-core/Data/Fragment.hh"
00006 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00007 #include "artdaq/Application/makeCommandableFragmentGenerator.hh"
00008 #include "canvas/Utilities/Exception.h"
00009 #include "cetlib/exception.h"
00010 #include <pthread.h>
00011 #include <sched.h>
00012 #include <algorithm>
00013
00014 const std::string artdaq::BoardReaderCore::
00015 FRAGMENTS_PROCESSED_STAT_KEY("BoardReaderCoreFragmentsProcessed");
00016 const std::string artdaq::BoardReaderCore::
00017 INPUT_WAIT_STAT_KEY("BoardReaderCoreInputWaitTime");
00018 const std::string artdaq::BoardReaderCore::
00019 BRSYNC_WAIT_STAT_KEY("BoardReaderCoreBRSyncWaitTime");
00020 const std::string artdaq::BoardReaderCore::
00021 OUTPUT_WAIT_STAT_KEY("BoardReaderCoreOutputWaitTime");
00022 const std::string artdaq::BoardReaderCore::
00023 FRAGMENTS_PER_READ_STAT_KEY("BoardReaderCoreFragmentsPerRead");
00024
00025 std::unique_ptr<artdaq::DataSenderManager> artdaq::BoardReaderCore::sender_ptr_ = nullptr;
00026
00027 artdaq::BoardReaderCore::BoardReaderCore(Commandable& parent_application) :
00028 parent_application_(parent_application)
00029
00030 , generator_ptr_(nullptr)
00031 , stop_requested_(false)
00032 , pause_requested_(false)
00033 {
00034 TLOG_DEBUG(app_name) << "Constructor" << TLOG_ENDL;
00035 statsHelper_.addMonitoredQuantityName(FRAGMENTS_PROCESSED_STAT_KEY);
00036 statsHelper_.addMonitoredQuantityName(INPUT_WAIT_STAT_KEY);
00037 statsHelper_.addMonitoredQuantityName(BRSYNC_WAIT_STAT_KEY);
00038 statsHelper_.addMonitoredQuantityName(OUTPUT_WAIT_STAT_KEY);
00039 statsHelper_.addMonitoredQuantityName(FRAGMENTS_PER_READ_STAT_KEY);
00040 metricMan = &metricMan_;
00041 }
00042
00043 artdaq::BoardReaderCore::~BoardReaderCore()
00044 {
00045 TLOG_DEBUG(app_name) << "Destructor" << TLOG_ENDL;
00046 }
00047
00048 bool artdaq::BoardReaderCore::initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00049 {
00050 TLOG_DEBUG(app_name) << "initialize method called with " << "ParameterSet = \"" << pset.to_string() << "\"." << TLOG_ENDL;
00051
00052
00053 fhicl::ParameterSet daq_pset;
00054 try
00055 {
00056 daq_pset = pset.get<fhicl::ParameterSet>("daq");
00057 }
00058 catch (...)
00059 {
00060 TLOG_ERROR(app_name)
00061 << "Unable to find the DAQ parameters in the initialization "
00062 << "ParameterSet: \"" + pset.to_string() + "\"." << TLOG_ENDL;
00063 return false;
00064 }
00065 fhicl::ParameterSet fr_pset;
00066 try
00067 {
00068 fr_pset = daq_pset.get<fhicl::ParameterSet>("fragment_receiver");
00069 data_pset_ = fr_pset;
00070 }
00071 catch (...)
00072 {
00073 TLOG_ERROR(app_name)
00074 << "Unable to find the fragment_receiver parameters in the DAQ "
00075 << "initialization ParameterSet: \"" + daq_pset.to_string() + "\"." << TLOG_ENDL;
00076 return false;
00077 }
00078
00079
00080 fhicl::ParameterSet metric_pset;
00081 try
00082 {
00083 metric_pset = daq_pset.get<fhicl::ParameterSet>("metrics");
00084 }
00085 catch (...) {}
00086
00087 if (metric_pset.is_empty())
00088 {
00089 TLOG_INFO(app_name) << "No metric plugins appear to be defined" << TLOG_ENDL;
00090 }
00091 try
00092 {
00093 metricMan_.initialize(metric_pset, app_name);
00094 }
00095 catch (...)
00096 {
00097 ExceptionHandler(ExceptionHandlerRethrow::no,
00098 "Error loading metrics in BoardReaderCore::initialize()");
00099 }
00100
00101
00102 std::string frag_gen_name = fr_pset.get<std::string>("generator", "");
00103 if (frag_gen_name.length() == 0)
00104 {
00105 TLOG_ERROR(app_name)
00106 << "No fragment generator (parameter name = \"generator\") was "
00107 << "specified in the fragment_receiver ParameterSet. The "
00108 << "DAQ initialization PSet was \"" << daq_pset.to_string() << "\"." << TLOG_ENDL;
00109 return false;
00110 }
00111
00112 try
00113 {
00114 generator_ptr_ = artdaq::makeCommandableFragmentGenerator(frag_gen_name, fr_pset);
00115 }
00116 catch (...)
00117 {
00118 std::stringstream exception_string;
00119 exception_string << "Exception thrown during initialization of fragment generator of type \""
00120 << frag_gen_name << "\"";
00121
00122 ExceptionHandler(ExceptionHandlerRethrow::no, exception_string.str());
00123
00124 TLOG_DEBUG(app_name) << "FHiCL parameter set used to initialize the fragment generator which threw an exception: " << fr_pset.to_string() << TLOG_ENDL;
00125
00126 return false;
00127 }
00128 metricMan_.setPrefix(generator_ptr_->metricsReportingInstanceName());
00129
00130 rt_priority_ = fr_pset.get<int>("rt_priority", 0);
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173 statsHelper_.createCollectors(fr_pset, 100, 30.0, 60.0, FRAGMENTS_PROCESSED_STAT_KEY);
00174
00175
00176 skip_seqId_test_ = (generator_ptr_->fragmentIDs().size() > 1);
00177
00178 return true;
00179 }
00180
00181 bool artdaq::BoardReaderCore::start(art::RunID id, uint64_t timeout, uint64_t timestamp)
00182 {
00183 stop_requested_.store(false);
00184 pause_requested_.store(false);
00185
00186 fragment_count_ = 0;
00187 prev_seq_id_ = 0;
00188 statsHelper_.resetStatistics();
00189
00190 metricMan_.do_start();
00191 generator_ptr_->StartCmd(id.run(), timeout, timestamp);
00192 run_id_ = id;
00193
00194 TLOG_DEBUG(app_name) << "Started run " << run_id_.run() <<
00195 ", timeout = " << timeout << ", timestamp = " << timestamp << TLOG_ENDL;
00196 return true;
00197 }
00198
00199 bool artdaq::BoardReaderCore::stop(uint64_t timeout, uint64_t timestamp)
00200 {
00201 TLOG_DEBUG(app_name) << "Stopping run " << run_id_.run()
00202 << " after " << fragment_count_
00203 << " fragments." << TLOG_ENDL;
00204 stop_requested_.store(true);
00205 generator_ptr_->StopCmd(timeout, timestamp);
00206 return true;
00207 }
00208
00209 bool artdaq::BoardReaderCore::pause(uint64_t timeout, uint64_t timestamp)
00210 {
00211 TLOG_DEBUG(app_name) << "Pausing run " << run_id_.run()
00212 << " after " << fragment_count_
00213 << " fragments." << TLOG_ENDL;
00214 pause_requested_.store(true);
00215 generator_ptr_->PauseCmd(timeout, timestamp);
00216 return true;
00217 }
00218
00219 bool artdaq::BoardReaderCore::resume(uint64_t timeout, uint64_t timestamp)
00220 {
00221 TLOG_DEBUG(app_name) << "Resuming run " << run_id_.run() << TLOG_ENDL;
00222 pause_requested_.store(false);
00223 metricMan_.do_start();
00224 generator_ptr_->ResumeCmd(timeout, timestamp);
00225 return true;
00226 }
00227
00228 bool artdaq::BoardReaderCore::shutdown(uint64_t)
00229 {
00230 generator_ptr_->joinThreads();
00231 generator_ptr_.reset(nullptr);
00232 metricMan_.shutdown();
00233 return true;
00234 }
00235
00236 bool artdaq::BoardReaderCore::soft_initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00237 {
00238 TLOG_DEBUG(app_name) << "soft_initialize method called with "
00239 << "ParameterSet = \"" << pset.to_string()
00240 << "\"." << TLOG_ENDL;
00241 return true;
00242 }
00243
00244 bool artdaq::BoardReaderCore::reinitialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00245 {
00246 TLOG_DEBUG(app_name) << "reinitialize method called with "
00247 << "ParameterSet = \"" << pset.to_string()
00248 << "\"." << TLOG_ENDL;
00249 return true;
00250 }
00251
00252 void artdaq::BoardReaderCore::process_fragments()
00253 {
00254 if (rt_priority_ > 0)
00255 {
00256 #pragma GCC diagnostic push
00257 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
00258 sched_param s_param = {};
00259 s_param.sched_priority = rt_priority_;
00260 if (pthread_setschedparam(pthread_self(), SCHED_RR, &s_param))
00261 TLOG_WARNING(app_name) << "setting realtime priority failed" << TLOG_ENDL;
00262 #pragma GCC diagnostic pop
00263 }
00264
00265
00266
00267
00268 if (rt_priority_ > 0)
00269 {
00270 #pragma GCC diagnostic push
00271 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
00272 sched_param s_param = {};
00273 s_param.sched_priority = rt_priority_;
00274 int status = pthread_setschedparam(pthread_self(), SCHED_RR, &s_param);
00275 if (status != 0)
00276 {
00277 TLOG_ERROR(app_name)
00278 << "Failed to set realtime priority to " << rt_priority_
00279 << ", return code = " << status << TLOG_ENDL;
00280 }
00281 #pragma GCC diagnostic pop
00282 }
00283
00284 TLOG_DEBUG(app_name) << "Initializing DataSenderManager. my_rank=" << my_rank << TLOG_ENDL;
00285 sender_ptr_.reset(new artdaq::DataSenderManager(data_pset_));
00286
00287
00288
00289 TLOG_DEBUG(app_name) << "Waiting for first fragment." << TLOG_ENDL;
00290 artdaq::MonitoredQuantityStats::TIME_POINT_T startTime;
00291 double delta_time;
00292 artdaq::FragmentPtrs frags;
00293 bool active = true;
00294
00295
00296 while (active)
00297 {
00298 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00299
00300 TRACE(18, app_name + "::process_fragments getNext start");
00301 active = generator_ptr_->getNext(frags);
00302 TRACE(18, app_name + "::process_fragments getNext done (active=%i)", active);
00303
00304
00305
00306
00307
00308
00309 if (!active && generator_ptr_->exception())
00310 {
00311 parent_application_.in_run_failure();
00312 }
00313
00314 delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
00315 statsHelper_.addSample(INPUT_WAIT_STAT_KEY, delta_time);
00316
00317 TLOG_ARB(16,app_name) << "process_fragments INPUT_WAIT="<<std::to_string( delta_time) << TLOG_ENDL;
00318
00319 if (!active) { break; }
00320 statsHelper_.addSample(FRAGMENTS_PER_READ_STAT_KEY, frags.size());
00321
00322 for (auto& fragPtr : frags)
00323 {
00324 if (!fragPtr.get())
00325 {
00326 TLOG_WARNING(app_name) << "Encountered a bad fragment pointer in fragment " << fragment_count_ << ". "
00327 << "This is most likely caused by a problem with the Fragment Generator!" << TLOG_ENDL;
00328 continue;
00329 }
00330 artdaq::Fragment::sequence_id_t sequence_id = fragPtr->sequenceID();
00331 statsHelper_.addSample(FRAGMENTS_PROCESSED_STAT_KEY, fragPtr->size());
00332
00333 if ((fragment_count_ % 250) == 0)
00334 {
00335 TLOG_DEBUG(app_name)
00336 << "Sending fragment " << fragment_count_
00337 << " (%250) with sequence id " << sequence_id << "." << TLOG_ENDL;
00338 }
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437 if (!skip_seqId_test_ && abs(sequence_id - prev_seq_id_) > 1)
00438 {
00439 TLOG_WARNING(app_name)
00440 << "Missing sequence IDs: current sequence ID = "
00441 << sequence_id << ", previous sequence ID = "
00442 << prev_seq_id_ << "." << TLOG_ENDL;
00443 }
00444 prev_seq_id_ = sequence_id;
00445
00446 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00447 TLOG_ARB(17,app_name) << "process_fragments seq="<< std::to_string(sequence_id) << " sendFragment start" << TLOG_ENDL;
00448 auto res = sender_ptr_->sendFragment(std::move(*fragPtr));
00449 TLOG_ARB(17, app_name) << "process_fragments seq=" << std::to_string(sequence_id) << " sendFragment done (res="<< res<<")"<<TLOG_ENDL;
00450 ++fragment_count_;
00451 statsHelper_.addSample(OUTPUT_WAIT_STAT_KEY,
00452 artdaq::MonitoredQuantity::getCurrentTime() - startTime);
00453
00454 bool readyToReport = statsHelper_.readyToReport(fragment_count_);
00455 if (readyToReport)
00456 {
00457 std::string statString = buildStatisticsString_();
00458 TLOG_DEBUG(app_name) << statString << TLOG_ENDL;
00459 }
00460 if (fragment_count_ == 1 || readyToReport)
00461 {
00462 TLOG_DEBUG(app_name)
00463 << "Sending fragment " << fragment_count_
00464 << " with sequence id " << sequence_id << "." << TLOG_ENDL;
00465 }
00466 }
00467 if (statsHelper_.statsRollingWindowHasMoved()) { sendMetrics_(); }
00468 frags.clear();
00469 }
00470
00471
00472
00473
00474
00475
00476
00477
00478
00479 metricMan_.do_stop();
00480
00481 sender_ptr_.reset(nullptr);
00482 }
00483
00484 std::string artdaq::BoardReaderCore::report(std::string const& which) const
00485 {
00486 std::string resultString;
00487
00488
00489 if (generator_ptr_.get() != 0)
00490 {
00491 resultString = generator_ptr_->ReportCmd(which);
00492 if (resultString.length() > 0) { return resultString; }
00493 }
00494
00495
00496
00497
00498
00499 std::string tmpString = app_name + " run number = ";
00500 tmpString.append(boost::lexical_cast<std::string>(run_id_.run()));
00501 tmpString.append(". Command=\"" + which + "\" is not currently supported.");
00502 return tmpString;
00503 }
00504
00505 std::string artdaq::BoardReaderCore::buildStatisticsString_()
00506 {
00507 std::ostringstream oss;
00508 oss << app_name << " statistics:" << std::endl;
00509
00510 double fragmentCount = 1.0;
00511 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00512 getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
00513 if (mqPtr.get() != 0)
00514 {
00515 artdaq::MonitoredQuantityStats stats;
00516 mqPtr->getStats(stats);
00517 oss << " Fragment statistics: "
00518 << stats.recentSampleCount << " fragments received at "
00519 << stats.recentSampleRate << " fragments/sec, effective data rate = "
00520 << (stats.recentValueRate * sizeof(artdaq::RawDataType)
00521 / 1024.0 / 1024.0) << " MB/sec, monitor window = "
00522 << stats.recentDuration << " sec, min::max event size = "
00523 << (stats.recentValueMin * sizeof(artdaq::RawDataType)
00524 / 1024.0 / 1024.0)
00525 << "::"
00526 << (stats.recentValueMax * sizeof(artdaq::RawDataType)
00527 / 1024.0 / 1024.0)
00528 << " MB" << std::endl;
00529 fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
00530 oss << " Average times per fragment: ";
00531 if (stats.recentSampleRate > 0.0)
00532 {
00533 oss << " elapsed time = "
00534 << (1.0 / stats.recentSampleRate) << " sec";
00535 }
00536 }
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546 mqPtr = artdaq::StatisticsCollection::getInstance().
00547 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
00548 if (mqPtr.get() != 0)
00549 {
00550 oss << ", input wait time = "
00551 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00552 }
00553
00554 mqPtr = artdaq::StatisticsCollection::getInstance().
00555 getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
00556 if (mqPtr.get() != 0)
00557 {
00558 oss << ", BRsync wait time = "
00559 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00560 }
00561
00562 mqPtr = artdaq::StatisticsCollection::getInstance().
00563 getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
00564 if (mqPtr.get() != 0)
00565 {
00566 oss << ", output wait time = "
00567 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00568 }
00569
00570 oss << std::endl << " Fragments per read: ";
00571 mqPtr = artdaq::StatisticsCollection::getInstance().
00572 getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
00573 if (mqPtr.get() != 0)
00574 {
00575 artdaq::MonitoredQuantityStats stats;
00576 mqPtr->getStats(stats);
00577 oss << "average = "
00578 << stats.recentValueAverage
00579 << ", min::max = "
00580 << stats.recentValueMin
00581 << "::"
00582 << stats.recentValueMax;
00583 }
00584
00585 return oss.str();
00586 }
00587
00588 void artdaq::BoardReaderCore::sendMetrics_()
00589 {
00590
00591 double fragmentCount = 1.0;
00592 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00593 getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
00594 if (mqPtr.get() != 0)
00595 {
00596 artdaq::MonitoredQuantityStats stats;
00597 mqPtr->getStats(stats);
00598 fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
00599 metricMan_.sendMetric("Fragment Count", static_cast<unsigned long>(stats.fullSampleCount), "fragments", 1, MetricMode::Accumulate);
00600 metricMan_.sendMetric("Fragment Rate", stats.recentSampleRate, "fragments/sec", 1, MetricMode::Average);
00601 metricMan_.sendMetric("Average Fragment Size", (stats.recentValueAverage * sizeof(artdaq::RawDataType)), "bytes/fragment", 2, MetricMode::Average);
00602 metricMan_.sendMetric("Data Rate", (stats.recentValueRate * sizeof(artdaq::RawDataType)), "bytes/sec", 2, MetricMode::Average);
00603 }
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613 mqPtr = artdaq::StatisticsCollection::getInstance().
00614 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
00615 if (mqPtr.get() != 0)
00616 {
00617 metricMan_.sendMetric("Avg Input Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00618 }
00619
00620 mqPtr = artdaq::StatisticsCollection::getInstance().
00621 getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
00622 if (mqPtr.get() != 0)
00623 {
00624 metricMan_.sendMetric("Avg BoardReader Sync Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00625 }
00626
00627 mqPtr = artdaq::StatisticsCollection::getInstance().
00628 getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
00629 if (mqPtr.get() != 0)
00630 {
00631 metricMan_.sendMetric("Avg Output Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00632 }
00633
00634 mqPtr = artdaq::StatisticsCollection::getInstance().
00635 getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
00636 if (mqPtr.get() != 0)
00637 {
00638 metricMan_.sendMetric("Avg Frags Per Read", mqPtr->getRecentValueAverage(), "fragments/read", 4, MetricMode::Average);
00639 }
00640 }