00001
00002 #define TRACE_NAME (app_name + "_BoardReaderCore").c_str() // include these 2 first -
00003 #include "artdaq/DAQdata/Globals.hh"
00004 #include "artdaq/Application/TaskType.hh"
00005 #include "artdaq/Application/BoardReaderCore.hh"
00006 #include "artdaq-core/Data/Fragment.hh"
00007 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00008 #include "artdaq/Application/makeCommandableFragmentGenerator.hh"
00009 #include "canvas/Utilities/Exception.h"
00010 #include "cetlib_except/exception.h"
00011 #include <pthread.h>
00012 #include <sched.h>
00013 #include <algorithm>
00014
00015 const std::string artdaq::BoardReaderCore::
00016 FRAGMENTS_PROCESSED_STAT_KEY("BoardReaderCoreFragmentsProcessed");
00017 const std::string artdaq::BoardReaderCore::
00018 INPUT_WAIT_STAT_KEY("BoardReaderCoreInputWaitTime");
00019 const std::string artdaq::BoardReaderCore::
00020 BRSYNC_WAIT_STAT_KEY("BoardReaderCoreBRSyncWaitTime");
00021 const std::string artdaq::BoardReaderCore::
00022 OUTPUT_WAIT_STAT_KEY("BoardReaderCoreOutputWaitTime");
00023 const std::string artdaq::BoardReaderCore::
00024 FRAGMENTS_PER_READ_STAT_KEY("BoardReaderCoreFragmentsPerRead");
00025
00026 std::unique_ptr<artdaq::DataSenderManager> artdaq::BoardReaderCore::sender_ptr_ = nullptr;
00027
00028 artdaq::BoardReaderCore::BoardReaderCore(Commandable& parent_application) :
00029 parent_application_(parent_application)
00030
00031 , generator_ptr_(nullptr)
00032 , stop_requested_(false)
00033 , pause_requested_(false)
00034 {
00035 TLOG(TLVL_DEBUG) << "Constructor";
00036 statsHelper_.addMonitoredQuantityName(FRAGMENTS_PROCESSED_STAT_KEY);
00037 statsHelper_.addMonitoredQuantityName(INPUT_WAIT_STAT_KEY);
00038 statsHelper_.addMonitoredQuantityName(BRSYNC_WAIT_STAT_KEY);
00039 statsHelper_.addMonitoredQuantityName(OUTPUT_WAIT_STAT_KEY);
00040 statsHelper_.addMonitoredQuantityName(FRAGMENTS_PER_READ_STAT_KEY);
00041 metricMan = &metricMan_;
00042 }
00043
00044 artdaq::BoardReaderCore::~BoardReaderCore()
00045 {
00046 TLOG(TLVL_DEBUG) << "Destructor";
00047 }
00048
00049 bool artdaq::BoardReaderCore::initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00050 {
00051 TLOG(TLVL_DEBUG) << "initialize method called with " << "ParameterSet = \"" << pset.to_string() << "\".";
00052
00053
00054 fhicl::ParameterSet daq_pset;
00055 try
00056 {
00057 daq_pset = pset.get<fhicl::ParameterSet>("daq");
00058 }
00059 catch (...)
00060 {
00061 TLOG(TLVL_ERROR)
00062 << "Unable to find the DAQ parameters in the initialization "
00063 << "ParameterSet: \"" + pset.to_string() + "\".";
00064 return false;
00065 }
00066 fhicl::ParameterSet fr_pset;
00067 try
00068 {
00069 fr_pset = daq_pset.get<fhicl::ParameterSet>("fragment_receiver");
00070 data_pset_ = fr_pset;
00071 }
00072 catch (...)
00073 {
00074 TLOG(TLVL_ERROR)
00075 << "Unable to find the fragment_receiver parameters in the DAQ "
00076 << "initialization ParameterSet: \"" + daq_pset.to_string() + "\".";
00077 return false;
00078 }
00079
00080
00081 fhicl::ParameterSet metric_pset;
00082 try
00083 {
00084 metric_pset = daq_pset.get<fhicl::ParameterSet>("metrics");
00085 }
00086 catch (...) {}
00087
00088 if (metric_pset.is_empty())
00089 {
00090 TLOG(TLVL_INFO) << "No metric plugins appear to be defined";
00091 }
00092 try
00093 {
00094 metricMan_.initialize(metric_pset, app_name);
00095 }
00096 catch (...)
00097 {
00098 ExceptionHandler(ExceptionHandlerRethrow::no,
00099 "Error loading metrics in BoardReaderCore::initialize()");
00100 }
00101
00102 if (daq_pset.has_key("rank"))
00103 {
00104 if (my_rank >= 0 && daq_pset.get<int>("rank") != my_rank) {
00105 TLOG(TLVL_WARNING) << "BoardReader rank specified at startup is different than rank specified at configure! Using rank received at configure!";
00106 }
00107 my_rank = daq_pset.get<int>("rank");
00108 }
00109 if (my_rank == -1)
00110 {
00111 TLOG(TLVL_ERROR) << "BoardReader rank not specified at startup or in configuration! Aborting";
00112 exit(1);
00113 }
00114
00115
00116
00117 std::string frag_gen_name = fr_pset.get<std::string>("generator", "");
00118 if (frag_gen_name.length() == 0)
00119 {
00120 TLOG(TLVL_ERROR)
00121 << "No fragment generator (parameter name = \"generator\") was "
00122 << "specified in the fragment_receiver ParameterSet. The "
00123 << "DAQ initialization PSet was \"" << daq_pset.to_string() << "\".";
00124 return false;
00125 }
00126
00127 try
00128 {
00129 generator_ptr_ = artdaq::makeCommandableFragmentGenerator(frag_gen_name, fr_pset);
00130 }
00131 catch (...)
00132 {
00133 std::stringstream exception_string;
00134 exception_string << "Exception thrown during initialization of fragment generator of type \""
00135 << frag_gen_name << "\"";
00136
00137 ExceptionHandler(ExceptionHandlerRethrow::no, exception_string.str());
00138
00139 TLOG(TLVL_DEBUG) << "FHiCL parameter set used to initialize the fragment generator which threw an exception: " << fr_pset.to_string();
00140
00141 return false;
00142 }
00143 metricMan_.setPrefix(generator_ptr_->metricsReportingInstanceName());
00144
00145 rt_priority_ = fr_pset.get<int>("rt_priority", 0);
00146
00147
00148 statsHelper_.createCollectors(fr_pset, 100, 30.0, 60.0, FRAGMENTS_PROCESSED_STAT_KEY);
00149
00150
00151 skip_seqId_test_ = (generator_ptr_->fragmentIDs().size() > 1 || generator_ptr_->request_mode() != RequestMode::Ignored);
00152
00153 verbose_ = fr_pset.get<bool>("verbose", true);
00154
00155 return true;
00156 }
00157
00158 bool artdaq::BoardReaderCore::start(art::RunID id, uint64_t timeout, uint64_t timestamp)
00159 {
00160 logMessage_("Starting run " + boost::lexical_cast<std::string>(id.run()));
00161 stop_requested_.store(false);
00162 pause_requested_.store(false);
00163
00164 fragment_count_ = 0;
00165 prev_seq_id_ = 0;
00166 statsHelper_.resetStatistics();
00167
00168 metricMan_.do_start();
00169 generator_ptr_->StartCmd(id.run(), timeout, timestamp);
00170 run_id_ = id;
00171
00172 logMessage_("Completed the Start transition (Started run) for run " +
00173 boost::lexical_cast<std::string>(run_id_.run()) +
00174 ", timeout = " + boost::lexical_cast<std::string>(timeout) +
00175 ", timestamp = " + boost::lexical_cast<std::string>(timestamp));
00176 return true;
00177 }
00178
00179 bool artdaq::BoardReaderCore::stop(uint64_t timeout, uint64_t timestamp)
00180 {
00181 logMessage_("Stopping run " + boost::lexical_cast<std::string>(run_id_.run()) +
00182 " after " + boost::lexical_cast<std::string>(fragment_count_) + " fragments.");
00183 stop_requested_.store(true);
00184 generator_ptr_->StopCmd(timeout, timestamp);
00185 logMessage_("Completed the Stop transition for run " + boost::lexical_cast<std::string>(run_id_.run()));
00186 return true;
00187 }
00188
00189 bool artdaq::BoardReaderCore::pause(uint64_t timeout, uint64_t timestamp)
00190 {
00191 logMessage_("Pausing run " + boost::lexical_cast<std::string>(run_id_.run()) +
00192 " after " + boost::lexical_cast<std::string>(fragment_count_) + " fragments.");
00193 pause_requested_.store(true);
00194 generator_ptr_->PauseCmd(timeout, timestamp);
00195 logMessage_("Completed the Pause transition for run " + boost::lexical_cast<std::string>(run_id_.run()));
00196 return true;
00197 }
00198
00199 bool artdaq::BoardReaderCore::resume(uint64_t timeout, uint64_t timestamp)
00200 {
00201 logMessage_("Resuming run " + boost::lexical_cast<std::string>(run_id_.run()));
00202 pause_requested_.store(false);
00203 metricMan_.do_start();
00204 generator_ptr_->ResumeCmd(timeout, timestamp);
00205 logMessage_("Completed the Resume transition for run " + boost::lexical_cast<std::string>(run_id_.run()));
00206 return true;
00207 }
00208
00209 bool artdaq::BoardReaderCore::shutdown(uint64_t)
00210 {
00211 logMessage_("Starting Shutdown transition");
00212 generator_ptr_->joinThreads();
00213 generator_ptr_.reset(nullptr);
00214 metricMan_.shutdown();
00215 logMessage_("Completed Shutdown transition");
00216 return true;
00217 }
00218
00219 bool artdaq::BoardReaderCore::soft_initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00220 {
00221 TLOG(TLVL_DEBUG) << "soft_initialize method called with "
00222 << "ParameterSet = \"" << pset.to_string()
00223 << "\".";
00224 return true;
00225 }
00226
00227 bool artdaq::BoardReaderCore::reinitialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00228 {
00229 TLOG(TLVL_DEBUG) << "reinitialize method called with "
00230 << "ParameterSet = \"" << pset.to_string()
00231 << "\".";
00232 return true;
00233 }
00234
00235 void artdaq::BoardReaderCore::process_fragments()
00236 {
00237 if (rt_priority_ > 0)
00238 {
00239 #pragma GCC diagnostic push
00240 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
00241 sched_param s_param = {};
00242 s_param.sched_priority = rt_priority_;
00243 if (pthread_setschedparam(pthread_self(), SCHED_RR, &s_param))
00244 TLOG(TLVL_WARNING) << "setting realtime priority failed";
00245 #pragma GCC diagnostic pop
00246 }
00247
00248
00249
00250
00251 if (rt_priority_ > 0)
00252 {
00253 #pragma GCC diagnostic push
00254 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
00255 sched_param s_param = {};
00256 s_param.sched_priority = rt_priority_;
00257 int status = pthread_setschedparam(pthread_self(), SCHED_RR, &s_param);
00258 if (status != 0)
00259 {
00260 TLOG(TLVL_ERROR)
00261 << "Failed to set realtime priority to " << rt_priority_
00262 << ", return code = " << status;
00263 }
00264 #pragma GCC diagnostic pop
00265 }
00266
00267 TLOG(TLVL_DEBUG) << "Initializing DataSenderManager. my_rank=" << my_rank;
00268 sender_ptr_.reset(new artdaq::DataSenderManager(data_pset_));
00269
00270 TLOG(TLVL_DEBUG) << "Waiting for first fragment.";
00271 artdaq::MonitoredQuantityStats::TIME_POINT_T startTime;
00272 double delta_time;
00273 artdaq::FragmentPtrs frags;
00274 bool active = true;
00275
00276 while (active)
00277 {
00278 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00279
00280 TLOG(18) << "process_fragments getNext start";
00281 active = generator_ptr_->getNext(frags);
00282 TLOG(18) << "process_fragments getNext done (active=" << active << ")";
00283
00284
00285
00286
00287
00288
00289 if (!active && generator_ptr_ && generator_ptr_->exception())
00290 {
00291 parent_application_.in_run_failure();
00292 }
00293
00294 delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
00295 statsHelper_.addSample(INPUT_WAIT_STAT_KEY, delta_time);
00296
00297 TLOG(16) << "process_fragments INPUT_WAIT=" << delta_time;
00298
00299 if (!active) { break; }
00300 statsHelper_.addSample(FRAGMENTS_PER_READ_STAT_KEY, frags.size());
00301
00302 for (auto& fragPtr : frags)
00303 {
00304 if (!fragPtr.get())
00305 {
00306 TLOG(TLVL_WARNING) << "Encountered a bad fragment pointer in fragment " << fragment_count_ << ". "
00307 << "This is most likely caused by a problem with the Fragment Generator!";
00308 continue;
00309 }
00310 artdaq::Fragment::sequence_id_t sequence_id = fragPtr->sequenceID();
00311 SetMFIteration("Sequence ID " + std::to_string(sequence_id));
00312 statsHelper_.addSample(FRAGMENTS_PROCESSED_STAT_KEY, fragPtr->size());
00313
00314 if ((fragment_count_ % 250) == 0)
00315 {
00316 TLOG(TLVL_DEBUG)
00317 << "Sending fragment " << fragment_count_
00318 << " with sequence id " << sequence_id << ".";
00319 }
00320
00321
00322 if (!skip_seqId_test_ && abs(static_cast<int64_t>(sequence_id) - static_cast<int64_t>(prev_seq_id_)) > 1)
00323 {
00324 TLOG(TLVL_WARNING)
00325 << "Missing sequence IDs: current sequence ID = "
00326 << sequence_id << ", previous sequence ID = "
00327 << prev_seq_id_ << ".";
00328 }
00329 prev_seq_id_ = sequence_id;
00330
00331 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00332 TLOG(17) << "process_fragments seq=" << sequence_id << " sendFragment start";
00333 auto res = sender_ptr_->sendFragment(std::move(*fragPtr));
00334 TLOG(17) << "process_fragments seq=" << sequence_id << " sendFragment done (dest=" << res.first << ", sts=" << TransferInterface::CopyStatusToString(res.second) << ")";
00335 ++fragment_count_;
00336 statsHelper_.addSample(OUTPUT_WAIT_STAT_KEY,
00337 artdaq::MonitoredQuantity::getCurrentTime() - startTime);
00338
00339 bool readyToReport = statsHelper_.readyToReport(fragment_count_);
00340 if (readyToReport)
00341 {
00342 std::string statString = buildStatisticsString_();
00343 TLOG(TLVL_DEBUG) << statString;
00344 }
00345 if (fragment_count_ == 1 || readyToReport)
00346 {
00347 TLOG(TLVL_DEBUG)
00348 << "Sending fragment " << fragment_count_
00349 << " with sequence id " << sequence_id << ".";
00350 }
00351 }
00352 if (statsHelper_.statsRollingWindowHasMoved()) { sendMetrics_(); }
00353 frags.clear();
00354 }
00355
00356
00357
00358
00359 metricMan_.do_stop();
00360
00361 sender_ptr_.reset(nullptr);
00362 }
00363
00364 std::string artdaq::BoardReaderCore::report(std::string const& which) const
00365 {
00366 std::string resultString;
00367
00368
00369 if (generator_ptr_.get() != 0)
00370 {
00371 resultString = generator_ptr_->ReportCmd(which);
00372 if (resultString.length() > 0) { return resultString; }
00373 }
00374
00375
00376
00377
00378
00379 std::string tmpString = app_name + " run number = ";
00380 tmpString.append(boost::lexical_cast<std::string>(run_id_.run()));
00381 tmpString.append(". Command=\"" + which + "\" is not currently supported.");
00382 return tmpString;
00383 }
00384
00385 bool artdaq::BoardReaderCore::metaCommand(std::string const& command, std::string const& arg)
00386 {
00387 TLOG(TLVL_DEBUG) << "metaCommand method called with "
00388 << "command = \"" << command << "\""
00389 << ", arg = \"" << arg << "\""
00390 << ".";
00391
00392 if (generator_ptr_) return generator_ptr_->metaCommand(command, arg);
00393
00394 return true;
00395 }
00396
00397 std::string artdaq::BoardReaderCore::buildStatisticsString_()
00398 {
00399 std::ostringstream oss;
00400 oss << app_name << " statistics:" << std::endl;
00401
00402 double fragmentCount = 1.0;
00403 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00404 getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
00405 if (mqPtr.get() != 0)
00406 {
00407 artdaq::MonitoredQuantityStats stats;
00408 mqPtr->getStats(stats);
00409 oss << " Fragment statistics: "
00410 << stats.recentSampleCount << " fragments received at "
00411 << stats.recentSampleRate << " fragments/sec, effective data rate = "
00412 << (stats.recentValueRate * sizeof(artdaq::RawDataType)
00413 / 1024.0 / 1024.0) << " MB/sec, monitor window = "
00414 << stats.recentDuration << " sec, min::max event size = "
00415 << (stats.recentValueMin * sizeof(artdaq::RawDataType)
00416 / 1024.0 / 1024.0)
00417 << "::"
00418 << (stats.recentValueMax * sizeof(artdaq::RawDataType)
00419 / 1024.0 / 1024.0)
00420 << " MB" << std::endl;
00421 fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
00422 oss << " Average times per fragment: ";
00423 if (stats.recentSampleRate > 0.0)
00424 {
00425 oss << " elapsed time = "
00426 << (1.0 / stats.recentSampleRate) << " sec";
00427 }
00428 }
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438 mqPtr = artdaq::StatisticsCollection::getInstance().
00439 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
00440 if (mqPtr.get() != 0)
00441 {
00442 oss << ", input wait time = "
00443 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00444 }
00445
00446 mqPtr = artdaq::StatisticsCollection::getInstance().
00447 getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
00448 if (mqPtr.get() != 0)
00449 {
00450 oss << ", BRsync wait time = "
00451 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00452 }
00453
00454 mqPtr = artdaq::StatisticsCollection::getInstance().
00455 getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
00456 if (mqPtr.get() != 0)
00457 {
00458 oss << ", output wait time = "
00459 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00460 }
00461
00462 oss << std::endl << " Fragments per read: ";
00463 mqPtr = artdaq::StatisticsCollection::getInstance().
00464 getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
00465 if (mqPtr.get() != 0)
00466 {
00467 artdaq::MonitoredQuantityStats stats;
00468 mqPtr->getStats(stats);
00469 oss << "average = "
00470 << stats.recentValueAverage
00471 << ", min::max = "
00472 << stats.recentValueMin
00473 << "::"
00474 << stats.recentValueMax;
00475 }
00476
00477 return oss.str();
00478 }
00479
00480 void artdaq::BoardReaderCore::sendMetrics_()
00481 {
00482
00483 double fragmentCount = 1.0;
00484 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00485 getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
00486 if (mqPtr.get() != 0)
00487 {
00488 artdaq::MonitoredQuantityStats stats;
00489 mqPtr->getStats(stats);
00490 fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
00491 metricMan_.sendMetric("Fragment Count", static_cast<unsigned long>(stats.fullSampleCount), "fragments", 1, MetricMode::LastPoint);
00492 metricMan_.sendMetric("Fragment Rate", stats.recentSampleRate, "fragments/sec", 1, MetricMode::Average);
00493 metricMan_.sendMetric("Average Fragment Size", (stats.recentValueAverage * sizeof(artdaq::RawDataType)), "bytes/fragment", 2, MetricMode::Average);
00494 metricMan_.sendMetric("Data Rate", (stats.recentValueRate * sizeof(artdaq::RawDataType)), "bytes/sec", 2, MetricMode::Average);
00495 }
00496
00497
00498
00499
00500
00501
00502
00503
00504
00505 mqPtr = artdaq::StatisticsCollection::getInstance().
00506 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
00507 if (mqPtr.get() != 0)
00508 {
00509 metricMan_.sendMetric("Avg Input Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00510 }
00511
00512 mqPtr = artdaq::StatisticsCollection::getInstance().
00513 getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
00514 if (mqPtr.get() != 0)
00515 {
00516 metricMan_.sendMetric("Avg BoardReader Sync Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00517 }
00518
00519 mqPtr = artdaq::StatisticsCollection::getInstance().
00520 getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
00521 if (mqPtr.get() != 0)
00522 {
00523 metricMan_.sendMetric("Avg Output Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00524 }
00525
00526 mqPtr = artdaq::StatisticsCollection::getInstance().
00527 getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
00528 if (mqPtr.get() != 0)
00529 {
00530 metricMan_.sendMetric("Avg Frags Per Read", mqPtr->getRecentValueAverage(), "fragments/read", 4, MetricMode::Average);
00531 }
00532 }
00533
00534 void artdaq::BoardReaderCore::logMessage_(std::string const& text)
00535 {
00536 if (verbose_)
00537 {
00538 TLOG(TLVL_INFO) << text;
00539 }
00540 else
00541 {
00542 TLOG(TLVL_DEBUG) << text;
00543 }
00544 }