00001 #define TRACE_NAME "BoardReaderCore"
00002 #include "tracemf.h"
00003 #include "artdaq/Application/TaskType.hh"
00004 #include "artdaq/Application/BoardReaderCore.hh"
00005 #include "artdaq-core/Data/Fragment.hh"
00006 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00007 #include "artdaq/Application/makeCommandableFragmentGenerator.hh"
00008 #include "canvas/Utilities/Exception.h"
00009 #include "cetlib/exception.h"
00010 #include <pthread.h>
00011 #include <sched.h>
00012 #include <algorithm>
00013
00014 const std::string artdaq::BoardReaderCore::
00015 FRAGMENTS_PROCESSED_STAT_KEY("BoardReaderCoreFragmentsProcessed");
00016 const std::string artdaq::BoardReaderCore::
00017 INPUT_WAIT_STAT_KEY("BoardReaderCoreInputWaitTime");
00018 const std::string artdaq::BoardReaderCore::
00019 BRSYNC_WAIT_STAT_KEY("BoardReaderCoreBRSyncWaitTime");
00020 const std::string artdaq::BoardReaderCore::
00021 OUTPUT_WAIT_STAT_KEY("BoardReaderCoreOutputWaitTime");
00022 const std::string artdaq::BoardReaderCore::
00023 FRAGMENTS_PER_READ_STAT_KEY("BoardReaderCoreFragmentsPerRead");
00024
00025 std::unique_ptr<artdaq::DataSenderManager> artdaq::BoardReaderCore::sender_ptr_ = nullptr;
00026
00027 artdaq::BoardReaderCore::BoardReaderCore(Commandable& parent_application) :
00028 parent_application_(parent_application)
00029
00030 , generator_ptr_(nullptr)
00031 , stop_requested_(false)
00032 , pause_requested_(false)
00033 {
00034 TLOG_DEBUG(app_name) << "Constructor" << TLOG_ENDL;
00035 statsHelper_.addMonitoredQuantityName(FRAGMENTS_PROCESSED_STAT_KEY);
00036 statsHelper_.addMonitoredQuantityName(INPUT_WAIT_STAT_KEY);
00037 statsHelper_.addMonitoredQuantityName(BRSYNC_WAIT_STAT_KEY);
00038 statsHelper_.addMonitoredQuantityName(OUTPUT_WAIT_STAT_KEY);
00039 statsHelper_.addMonitoredQuantityName(FRAGMENTS_PER_READ_STAT_KEY);
00040 metricMan = &metricMan_;
00041 }
00042
00043 artdaq::BoardReaderCore::~BoardReaderCore()
00044 {
00045 TLOG_DEBUG(app_name) << "Destructor" << TLOG_ENDL;
00046 }
00047
00048 bool artdaq::BoardReaderCore::initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00049 {
00050 TLOG_DEBUG(app_name) << "initialize method called with " << "ParameterSet = \"" << pset.to_string() << "\"." << TLOG_ENDL;
00051
00052
00053 fhicl::ParameterSet daq_pset;
00054 try
00055 {
00056 daq_pset = pset.get<fhicl::ParameterSet>("daq");
00057 }
00058 catch (...)
00059 {
00060 TLOG_ERROR(app_name)
00061 << "Unable to find the DAQ parameters in the initialization "
00062 << "ParameterSet: \"" + pset.to_string() + "\"." << TLOG_ENDL;
00063 return false;
00064 }
00065 fhicl::ParameterSet fr_pset;
00066 try
00067 {
00068 fr_pset = daq_pset.get<fhicl::ParameterSet>("fragment_receiver");
00069 data_pset_ = fr_pset;
00070 }
00071 catch (...)
00072 {
00073 TLOG_ERROR(app_name)
00074 << "Unable to find the fragment_receiver parameters in the DAQ "
00075 << "initialization ParameterSet: \"" + daq_pset.to_string() + "\"." << TLOG_ENDL;
00076 return false;
00077 }
00078
00079
00080 fhicl::ParameterSet metric_pset;
00081 try
00082 {
00083 metric_pset = daq_pset.get<fhicl::ParameterSet>("metrics");
00084 }
00085 catch (...) {}
00086
00087 if (metric_pset.is_empty())
00088 {
00089 TLOG_INFO(app_name) << "No metric plugins appear to be defined" << TLOG_ENDL;
00090 }
00091 try
00092 {
00093 metricMan_.initialize(metric_pset, app_name);
00094 }
00095 catch (...)
00096 {
00097 ExceptionHandler(ExceptionHandlerRethrow::no,
00098 "Error loading metrics in BoardReaderCore::initialize()");
00099 }
00100
00101
00102 std::string frag_gen_name = fr_pset.get<std::string>("generator", "");
00103 if (frag_gen_name.length() == 0)
00104 {
00105 TLOG_ERROR(app_name)
00106 << "No fragment generator (parameter name = \"generator\") was "
00107 << "specified in the fragment_receiver ParameterSet. The "
00108 << "DAQ initialization PSet was \"" << daq_pset.to_string() << "\"." << TLOG_ENDL;
00109 return false;
00110 }
00111
00112 try
00113 {
00114 generator_ptr_ = artdaq::makeCommandableFragmentGenerator(frag_gen_name, fr_pset);
00115 }
00116 catch (...)
00117 {
00118 std::stringstream exception_string;
00119 exception_string << "Exception thrown during initialization of fragment generator of type \""
00120 << frag_gen_name << "\"";
00121
00122 ExceptionHandler(ExceptionHandlerRethrow::no, exception_string.str());
00123
00124 TLOG_DEBUG(app_name) << "FHiCL parameter set used to initialize the fragment generator which threw an exception: " << fr_pset.to_string() << TLOG_ENDL;
00125
00126 return false;
00127 }
00128 metricMan_.setPrefix(generator_ptr_->metricsReportingInstanceName());
00129
00130 rt_priority_ = fr_pset.get<int>("rt_priority", 0);
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173 statsHelper_.createCollectors(fr_pset, 100, 30.0, 60.0, FRAGMENTS_PROCESSED_STAT_KEY);
00174
00175
00176 skip_seqId_test_ = (generator_ptr_->fragmentIDs().size() > 1);
00177
00178 return true;
00179 }
00180
00181 bool artdaq::BoardReaderCore::start(art::RunID id, uint64_t timeout, uint64_t timestamp)
00182 {
00183 stop_requested_.store(false);
00184 pause_requested_.store(false);
00185
00186 fragment_count_ = 0;
00187 prev_seq_id_ = 0;
00188 statsHelper_.resetStatistics();
00189
00190 metricMan_.do_start();
00191 generator_ptr_->StartCmd(id.run(), timeout, timestamp);
00192 run_id_ = id;
00193
00194 TLOG_DEBUG(app_name) << "Started run " << run_id_.run() <<
00195 ", timeout = " << timeout << ", timestamp = " << timestamp << TLOG_ENDL;
00196 return true;
00197 }
00198
00199 bool artdaq::BoardReaderCore::stop(uint64_t timeout, uint64_t timestamp)
00200 {
00201 TLOG_DEBUG(app_name) << "Stopping run " << run_id_.run()
00202 << " after " << fragment_count_
00203 << " fragments." << TLOG_ENDL;
00204 stop_requested_.store(true);
00205 generator_ptr_->StopCmd(timeout, timestamp);
00206 return true;
00207 }
00208
00209 bool artdaq::BoardReaderCore::pause(uint64_t timeout, uint64_t timestamp)
00210 {
00211 TLOG_DEBUG(app_name) << "Pausing run " << run_id_.run()
00212 << " after " << fragment_count_
00213 << " fragments." << TLOG_ENDL;
00214 pause_requested_.store(true);
00215 generator_ptr_->PauseCmd(timeout, timestamp);
00216 return true;
00217 }
00218
00219 bool artdaq::BoardReaderCore::resume(uint64_t timeout, uint64_t timestamp)
00220 {
00221 TLOG_DEBUG(app_name) << "Resuming run " << run_id_.run() << TLOG_ENDL;
00222 pause_requested_.store(false);
00223 metricMan_.do_start();
00224 generator_ptr_->ResumeCmd(timeout, timestamp);
00225 return true;
00226 }
00227
00228 bool artdaq::BoardReaderCore::shutdown(uint64_t)
00229 {
00230 generator_ptr_->joinThreads();
00231 generator_ptr_.reset(nullptr);
00232 metricMan_.shutdown();
00233 return true;
00234 }
00235
00236 bool artdaq::BoardReaderCore::soft_initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00237 {
00238 TLOG_DEBUG(app_name) << "soft_initialize method called with "
00239 << "ParameterSet = \"" << pset.to_string()
00240 << "\"." << TLOG_ENDL;
00241 return true;
00242 }
00243
00244 bool artdaq::BoardReaderCore::reinitialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00245 {
00246 TLOG_DEBUG(app_name) << "reinitialize method called with "
00247 << "ParameterSet = \"" << pset.to_string()
00248 << "\"." << TLOG_ENDL;
00249 return true;
00250 }
00251
00252 void artdaq::BoardReaderCore::process_fragments()
00253 {
00254 if (rt_priority_ > 0)
00255 {
00256 #pragma GCC diagnostic push
00257 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
00258 sched_param s_param = {};
00259 s_param.sched_priority = rt_priority_;
00260 if (pthread_setschedparam(pthread_self(), SCHED_RR, &s_param))
00261 TLOG_WARNING(app_name) << "setting realtime priority failed" << TLOG_ENDL;
00262 #pragma GCC diagnostic pop
00263 }
00264
00265
00266
00267
00268 if (rt_priority_ > 0)
00269 {
00270 #pragma GCC diagnostic push
00271 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
00272 sched_param s_param = {};
00273 s_param.sched_priority = rt_priority_;
00274 int status = pthread_setschedparam(pthread_self(), SCHED_RR, &s_param);
00275 if (status != 0)
00276 {
00277 TLOG_ERROR(app_name)
00278 << "Failed to set realtime priority to " << rt_priority_
00279 << ", return code = " << status << TLOG_ENDL;
00280 }
00281 #pragma GCC diagnostic pop
00282 }
00283
00284 TLOG_DEBUG(app_name) << "Initializing DataSenderManager. my_rank=" << my_rank << TLOG_ENDL;
00285 sender_ptr_.reset(new artdaq::DataSenderManager(data_pset_));
00286
00287 TLOG_DEBUG(app_name) << "Waiting for first fragment." << TLOG_ENDL;
00288 artdaq::MonitoredQuantityStats::TIME_POINT_T startTime;
00289 double delta_time;
00290 artdaq::FragmentPtrs frags;
00291 bool active = true;
00292
00293 while (active)
00294 {
00295 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00296
00297 TRACE(18, app_name + "::process_fragments getNext start");
00298 active = generator_ptr_->getNext(frags);
00299 TRACE(18, app_name + "::process_fragments getNext done (active=%i)", active);
00300
00301
00302
00303
00304
00305
00306 if (!active && generator_ptr_ && generator_ptr_->exception())
00307 {
00308 parent_application_.in_run_failure();
00309 }
00310
00311 delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
00312 statsHelper_.addSample(INPUT_WAIT_STAT_KEY, delta_time);
00313
00314 TLOG_ARB(16,app_name) << "process_fragments INPUT_WAIT="<<std::to_string( delta_time) << TLOG_ENDL;
00315
00316 if (!active) { break; }
00317 statsHelper_.addSample(FRAGMENTS_PER_READ_STAT_KEY, frags.size());
00318
00319 for (auto& fragPtr : frags)
00320 {
00321 if (!fragPtr.get())
00322 {
00323 TLOG_WARNING(app_name) << "Encountered a bad fragment pointer in fragment " << fragment_count_ << ". "
00324 << "This is most likely caused by a problem with the Fragment Generator!" << TLOG_ENDL;
00325 continue;
00326 }
00327 artdaq::Fragment::sequence_id_t sequence_id = fragPtr->sequenceID();
00328 statsHelper_.addSample(FRAGMENTS_PROCESSED_STAT_KEY, fragPtr->size());
00329
00330 if ((fragment_count_ % 250) == 0)
00331 {
00332 TLOG_DEBUG(app_name)
00333 << "Sending fragment " << fragment_count_
00334 << " (%250) with sequence id " << sequence_id << "." << TLOG_ENDL;
00335 }
00336
00337
00338 if (!skip_seqId_test_ && abs(sequence_id - prev_seq_id_) > 1)
00339 {
00340 TLOG_WARNING(app_name)
00341 << "Missing sequence IDs: current sequence ID = "
00342 << sequence_id << ", previous sequence ID = "
00343 << prev_seq_id_ << "." << TLOG_ENDL;
00344 }
00345 prev_seq_id_ = sequence_id;
00346
00347 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00348 TLOG_ARB(17,app_name) << "process_fragments seq="<< std::to_string(sequence_id) << " sendFragment start" << TLOG_ENDL;
00349 auto res = sender_ptr_->sendFragment(std::move(*fragPtr));
00350 TLOG_ARB(17, app_name) << "process_fragments seq=" << std::to_string(sequence_id) << " sendFragment done (res="<< res<<")"<<TLOG_ENDL;
00351 ++fragment_count_;
00352 statsHelper_.addSample(OUTPUT_WAIT_STAT_KEY,
00353 artdaq::MonitoredQuantity::getCurrentTime() - startTime);
00354
00355 bool readyToReport = statsHelper_.readyToReport(fragment_count_);
00356 if (readyToReport)
00357 {
00358 std::string statString = buildStatisticsString_();
00359 TLOG_DEBUG(app_name) << statString << TLOG_ENDL;
00360 }
00361 if (fragment_count_ == 1 || readyToReport)
00362 {
00363 TLOG_DEBUG(app_name)
00364 << "Sending fragment " << fragment_count_
00365 << " with sequence id " << sequence_id << "." << TLOG_ENDL;
00366 }
00367 }
00368 if (statsHelper_.statsRollingWindowHasMoved()) { sendMetrics_(); }
00369 frags.clear();
00370 }
00371
00372
00373
00374
00375 metricMan_.do_stop();
00376
00377 sender_ptr_.reset(nullptr);
00378 }
00379
00380 std::string artdaq::BoardReaderCore::report(std::string const& which) const
00381 {
00382 std::string resultString;
00383
00384
00385 if (generator_ptr_.get() != 0)
00386 {
00387 resultString = generator_ptr_->ReportCmd(which);
00388 if (resultString.length() > 0) { return resultString; }
00389 }
00390
00391
00392
00393
00394
00395 std::string tmpString = app_name + " run number = ";
00396 tmpString.append(boost::lexical_cast<std::string>(run_id_.run()));
00397 tmpString.append(". Command=\"" + which + "\" is not currently supported.");
00398 return tmpString;
00399 }
00400
00401 bool artdaq::BoardReaderCore::metaCommand(std::string const& command, std::string const& arg)
00402 {
00403 TLOG_DEBUG(app_name) << "metaCommand method called with "
00404 << "command = \"" << command << "\""
00405 << ", arg = \"" << arg << "\""
00406 << "." << TLOG_ENDL;
00407
00408 if (generator_ptr_) return generator_ptr_->metaCommand(command, arg);
00409
00410 return true;
00411 }
00412
00413 std::string artdaq::BoardReaderCore::buildStatisticsString_()
00414 {
00415 std::ostringstream oss;
00416 oss << app_name << " statistics:" << std::endl;
00417
00418 double fragmentCount = 1.0;
00419 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00420 getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
00421 if (mqPtr.get() != 0)
00422 {
00423 artdaq::MonitoredQuantityStats stats;
00424 mqPtr->getStats(stats);
00425 oss << " Fragment statistics: "
00426 << stats.recentSampleCount << " fragments received at "
00427 << stats.recentSampleRate << " fragments/sec, effective data rate = "
00428 << (stats.recentValueRate * sizeof(artdaq::RawDataType)
00429 / 1024.0 / 1024.0) << " MB/sec, monitor window = "
00430 << stats.recentDuration << " sec, min::max event size = "
00431 << (stats.recentValueMin * sizeof(artdaq::RawDataType)
00432 / 1024.0 / 1024.0)
00433 << "::"
00434 << (stats.recentValueMax * sizeof(artdaq::RawDataType)
00435 / 1024.0 / 1024.0)
00436 << " MB" << std::endl;
00437 fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
00438 oss << " Average times per fragment: ";
00439 if (stats.recentSampleRate > 0.0)
00440 {
00441 oss << " elapsed time = "
00442 << (1.0 / stats.recentSampleRate) << " sec";
00443 }
00444 }
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454 mqPtr = artdaq::StatisticsCollection::getInstance().
00455 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
00456 if (mqPtr.get() != 0)
00457 {
00458 oss << ", input wait time = "
00459 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00460 }
00461
00462 mqPtr = artdaq::StatisticsCollection::getInstance().
00463 getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
00464 if (mqPtr.get() != 0)
00465 {
00466 oss << ", BRsync wait time = "
00467 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00468 }
00469
00470 mqPtr = artdaq::StatisticsCollection::getInstance().
00471 getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
00472 if (mqPtr.get() != 0)
00473 {
00474 oss << ", output wait time = "
00475 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00476 }
00477
00478 oss << std::endl << " Fragments per read: ";
00479 mqPtr = artdaq::StatisticsCollection::getInstance().
00480 getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
00481 if (mqPtr.get() != 0)
00482 {
00483 artdaq::MonitoredQuantityStats stats;
00484 mqPtr->getStats(stats);
00485 oss << "average = "
00486 << stats.recentValueAverage
00487 << ", min::max = "
00488 << stats.recentValueMin
00489 << "::"
00490 << stats.recentValueMax;
00491 }
00492
00493 return oss.str();
00494 }
00495
00496 void artdaq::BoardReaderCore::sendMetrics_()
00497 {
00498
00499 double fragmentCount = 1.0;
00500 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00501 getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
00502 if (mqPtr.get() != 0)
00503 {
00504 artdaq::MonitoredQuantityStats stats;
00505 mqPtr->getStats(stats);
00506 fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
00507 metricMan_.sendMetric("Fragment Count", static_cast<unsigned long>(stats.fullSampleCount), "fragments", 1, MetricMode::Accumulate);
00508 metricMan_.sendMetric("Fragment Rate", stats.recentSampleRate, "fragments/sec", 1, MetricMode::Average);
00509 metricMan_.sendMetric("Average Fragment Size", (stats.recentValueAverage * sizeof(artdaq::RawDataType)), "bytes/fragment", 2, MetricMode::Average);
00510 metricMan_.sendMetric("Data Rate", (stats.recentValueRate * sizeof(artdaq::RawDataType)), "bytes/sec", 2, MetricMode::Average);
00511 }
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521 mqPtr = artdaq::StatisticsCollection::getInstance().
00522 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
00523 if (mqPtr.get() != 0)
00524 {
00525 metricMan_.sendMetric("Avg Input Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00526 }
00527
00528 mqPtr = artdaq::StatisticsCollection::getInstance().
00529 getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
00530 if (mqPtr.get() != 0)
00531 {
00532 metricMan_.sendMetric("Avg BoardReader Sync Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00533 }
00534
00535 mqPtr = artdaq::StatisticsCollection::getInstance().
00536 getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
00537 if (mqPtr.get() != 0)
00538 {
00539 metricMan_.sendMetric("Avg Output Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00540 }
00541
00542 mqPtr = artdaq::StatisticsCollection::getInstance().
00543 getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
00544 if (mqPtr.get() != 0)
00545 {
00546 metricMan_.sendMetric("Avg Frags Per Read", mqPtr->getRecentValueAverage(), "fragments/read", 4, MetricMode::Average);
00547 }
00548 }