00001
00002 #define TRACE_NAME (app_name + "_BoardReaderCore").c_str() // include these 2 first -
00003 #include "artdaq/DAQdata/Globals.hh"
00004 #include "artdaq/Application/TaskType.hh"
00005 #include "artdaq/Application/BoardReaderCore.hh"
00006 #include "artdaq-core/Data/Fragment.hh"
00007 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00008 #include "artdaq/Application/makeCommandableFragmentGenerator.hh"
00009 #include "canvas/Utilities/Exception.h"
00010 #include "cetlib_except/exception.h"
00011 #include <pthread.h>
00012 #include <sched.h>
00013 #include <algorithm>
00014
00015 const std::string artdaq::BoardReaderCore::
00016 FRAGMENTS_PROCESSED_STAT_KEY("BoardReaderCoreFragmentsProcessed");
00017 const std::string artdaq::BoardReaderCore::
00018 INPUT_WAIT_STAT_KEY("BoardReaderCoreInputWaitTime");
00019 const std::string artdaq::BoardReaderCore::
00020 BRSYNC_WAIT_STAT_KEY("BoardReaderCoreBRSyncWaitTime");
00021 const std::string artdaq::BoardReaderCore::
00022 OUTPUT_WAIT_STAT_KEY("BoardReaderCoreOutputWaitTime");
00023 const std::string artdaq::BoardReaderCore::
00024 FRAGMENTS_PER_READ_STAT_KEY("BoardReaderCoreFragmentsPerRead");
00025
00026 std::unique_ptr<artdaq::DataSenderManager> artdaq::BoardReaderCore::sender_ptr_ = nullptr;
00027
00028 artdaq::BoardReaderCore::BoardReaderCore(Commandable& parent_application) :
00029 parent_application_(parent_application)
00030
00031 , generator_ptr_(nullptr)
00032 , stop_requested_(false)
00033 , pause_requested_(false)
00034 {
00035 TLOG(TLVL_DEBUG) << "Constructor";
00036 statsHelper_.addMonitoredQuantityName(FRAGMENTS_PROCESSED_STAT_KEY);
00037 statsHelper_.addMonitoredQuantityName(INPUT_WAIT_STAT_KEY);
00038 statsHelper_.addMonitoredQuantityName(BRSYNC_WAIT_STAT_KEY);
00039 statsHelper_.addMonitoredQuantityName(OUTPUT_WAIT_STAT_KEY);
00040 statsHelper_.addMonitoredQuantityName(FRAGMENTS_PER_READ_STAT_KEY);
00041 metricMan = &metricMan_;
00042 }
00043
00044 artdaq::BoardReaderCore::~BoardReaderCore()
00045 {
00046 TLOG(TLVL_DEBUG) << "Destructor";
00047 }
00048
00049 bool artdaq::BoardReaderCore::initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00050 {
00051 TLOG(TLVL_DEBUG) << "initialize method called with " << "ParameterSet = \"" << pset.to_string() << "\".";
00052
00053
00054 fhicl::ParameterSet daq_pset;
00055 try
00056 {
00057 daq_pset = pset.get<fhicl::ParameterSet>("daq");
00058 }
00059 catch (...)
00060 {
00061 TLOG(TLVL_ERROR)
00062 << "Unable to find the DAQ parameters in the initialization "
00063 << "ParameterSet: \"" + pset.to_string() + "\".";
00064 return false;
00065 }
00066 fhicl::ParameterSet fr_pset;
00067 try
00068 {
00069 fr_pset = daq_pset.get<fhicl::ParameterSet>("fragment_receiver");
00070 data_pset_ = fr_pset;
00071 }
00072 catch (...)
00073 {
00074 TLOG(TLVL_ERROR)
00075 << "Unable to find the fragment_receiver parameters in the DAQ "
00076 << "initialization ParameterSet: \"" + daq_pset.to_string() + "\".";
00077 return false;
00078 }
00079
00080
00081 fhicl::ParameterSet metric_pset;
00082 try
00083 {
00084 metric_pset = daq_pset.get<fhicl::ParameterSet>("metrics");
00085 }
00086 catch (...) {}
00087
00088 if (metric_pset.is_empty())
00089 {
00090 TLOG(TLVL_INFO) << "No metric plugins appear to be defined";
00091 }
00092 try
00093 {
00094 metricMan_.initialize(metric_pset, app_name);
00095 }
00096 catch (...)
00097 {
00098 ExceptionHandler(ExceptionHandlerRethrow::no,
00099 "Error loading metrics in BoardReaderCore::initialize()");
00100 }
00101
00102 if (daq_pset.has_key("rank"))
00103 {
00104 if (my_rank >= 0 && daq_pset.get<int>("rank") != my_rank) {
00105 TLOG(TLVL_WARNING) << "BoardReader rank specified at startup is different than rank specified at configure! Using rank received at configure!";
00106 }
00107 my_rank = daq_pset.get<int>("rank");
00108 }
00109 if (my_rank == -1)
00110 {
00111 TLOG(TLVL_ERROR) << "BoardReader rank not specified at startup or in configuration! Aborting";
00112 exit(1);
00113 }
00114
00115
00116
00117 std::string frag_gen_name = fr_pset.get<std::string>("generator", "");
00118 if (frag_gen_name.length() == 0)
00119 {
00120 TLOG(TLVL_ERROR)
00121 << "No fragment generator (parameter name = \"generator\") was "
00122 << "specified in the fragment_receiver ParameterSet. The "
00123 << "DAQ initialization PSet was \"" << daq_pset.to_string() << "\".";
00124 return false;
00125 }
00126
00127 try
00128 {
00129 generator_ptr_ = artdaq::makeCommandableFragmentGenerator(frag_gen_name, fr_pset);
00130 }
00131 catch (...)
00132 {
00133 std::stringstream exception_string;
00134 exception_string << "Exception thrown during initialization of fragment generator of type \""
00135 << frag_gen_name << "\"";
00136
00137 ExceptionHandler(ExceptionHandlerRethrow::no, exception_string.str());
00138
00139 TLOG(TLVL_DEBUG) << "FHiCL parameter set used to initialize the fragment generator which threw an exception: " << fr_pset.to_string();
00140
00141 return false;
00142 }
00143 metricMan_.setPrefix(generator_ptr_->metricsReportingInstanceName());
00144
00145 rt_priority_ = fr_pset.get<int>("rt_priority", 0);
00146
00147
00148 statsHelper_.createCollectors(fr_pset, 100, 30.0, 60.0, FRAGMENTS_PROCESSED_STAT_KEY);
00149
00150
00151 skip_seqId_test_ = (generator_ptr_->fragmentIDs().size() > 1);
00152
00153 verbose_ = fr_pset.get<bool>("verbose", true);
00154
00155 return true;
00156 }
00157
00158 bool artdaq::BoardReaderCore::start(art::RunID id, uint64_t timeout, uint64_t timestamp)
00159 {
00160 logMessage_("Starting run " + boost::lexical_cast<std::string>(id.run()));
00161 stop_requested_.store(false);
00162 pause_requested_.store(false);
00163
00164 fragment_count_ = 0;
00165 prev_seq_id_ = 0;
00166 statsHelper_.resetStatistics();
00167
00168 metricMan_.do_start();
00169 generator_ptr_->StartCmd(id.run(), timeout, timestamp);
00170 run_id_ = id;
00171
00172 logMessage_("Completed the Start transition (Started run) for run " +
00173 boost::lexical_cast<std::string>(run_id_.run()) +
00174 ", timeout = " + boost::lexical_cast<std::string>(timeout) +
00175 ", timestamp = " + boost::lexical_cast<std::string>(timestamp));
00176 return true;
00177 }
00178
00179 bool artdaq::BoardReaderCore::stop(uint64_t timeout, uint64_t timestamp)
00180 {
00181 logMessage_("Stopping run " + boost::lexical_cast<std::string>(run_id_.run()) +
00182 " after " + boost::lexical_cast<std::string>(fragment_count_) + " fragments.");
00183 stop_requested_.store(true);
00184 generator_ptr_->StopCmd(timeout, timestamp);
00185 logMessage_("Completed the Stop transition for run " + boost::lexical_cast<std::string>(run_id_.run()));
00186 return true;
00187 }
00188
00189 bool artdaq::BoardReaderCore::pause(uint64_t timeout, uint64_t timestamp)
00190 {
00191 logMessage_("Pausing run " + boost::lexical_cast<std::string>(run_id_.run()) +
00192 " after " + boost::lexical_cast<std::string>(fragment_count_) + " fragments.");
00193 pause_requested_.store(true);
00194 generator_ptr_->PauseCmd(timeout, timestamp);
00195 logMessage_("Completed the Pause transition for run " + boost::lexical_cast<std::string>(run_id_.run()));
00196 return true;
00197 }
00198
00199 bool artdaq::BoardReaderCore::resume(uint64_t timeout, uint64_t timestamp)
00200 {
00201 logMessage_("Resuming run " + boost::lexical_cast<std::string>(run_id_.run()));
00202 pause_requested_.store(false);
00203 metricMan_.do_start();
00204 generator_ptr_->ResumeCmd(timeout, timestamp);
00205 logMessage_("Completed the Resume transition for run " + boost::lexical_cast<std::string>(run_id_.run()));
00206 return true;
00207 }
00208
00209 bool artdaq::BoardReaderCore::shutdown(uint64_t)
00210 {
00211 logMessage_("Starting Shutdown transition");
00212 generator_ptr_->joinThreads();
00213 generator_ptr_.reset(nullptr);
00214 metricMan_.shutdown();
00215 logMessage_("Completed Shutdown transition");
00216 return true;
00217 }
00218
00219 bool artdaq::BoardReaderCore::soft_initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00220 {
00221 TLOG(TLVL_DEBUG) << "soft_initialize method called with "
00222 << "ParameterSet = \"" << pset.to_string()
00223 << "\".";
00224 return true;
00225 }
00226
00227 bool artdaq::BoardReaderCore::reinitialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00228 {
00229 TLOG(TLVL_DEBUG) << "reinitialize method called with "
00230 << "ParameterSet = \"" << pset.to_string()
00231 << "\".";
00232 return true;
00233 }
00234
00235 void artdaq::BoardReaderCore::process_fragments()
00236 {
00237 if (rt_priority_ > 0)
00238 {
00239 #pragma GCC diagnostic push
00240 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
00241 sched_param s_param = {};
00242 s_param.sched_priority = rt_priority_;
00243 if (pthread_setschedparam(pthread_self(), SCHED_RR, &s_param))
00244 TLOG(TLVL_WARNING) << "setting realtime priority failed";
00245 #pragma GCC diagnostic pop
00246 }
00247
00248
00249
00250
00251 if (rt_priority_ > 0)
00252 {
00253 #pragma GCC diagnostic push
00254 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
00255 sched_param s_param = {};
00256 s_param.sched_priority = rt_priority_;
00257 int status = pthread_setschedparam(pthread_self(), SCHED_RR, &s_param);
00258 if (status != 0)
00259 {
00260 TLOG(TLVL_ERROR)
00261 << "Failed to set realtime priority to " << rt_priority_
00262 << ", return code = " << status;
00263 }
00264 #pragma GCC diagnostic pop
00265 }
00266
00267 TLOG(TLVL_DEBUG) << "Initializing DataSenderManager. my_rank=" << my_rank;
00268 sender_ptr_.reset(new artdaq::DataSenderManager(data_pset_));
00269
00270 TLOG(TLVL_DEBUG) << "Waiting for first fragment.";
00271 artdaq::MonitoredQuantityStats::TIME_POINT_T startTime;
00272 double delta_time;
00273 artdaq::FragmentPtrs frags;
00274 bool active = true;
00275
00276 while (active)
00277 {
00278 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00279
00280 TLOG(18) << "process_fragments getNext start";
00281 active = generator_ptr_->getNext(frags);
00282 TLOG(18) << "process_fragments getNext done (active=" << active << ")";
00283
00284
00285
00286
00287
00288
00289 if (!active && generator_ptr_ && generator_ptr_->exception())
00290 {
00291 parent_application_.in_run_failure();
00292 }
00293
00294 delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
00295 statsHelper_.addSample(INPUT_WAIT_STAT_KEY, delta_time);
00296
00297 TLOG(16) << "process_fragments INPUT_WAIT=" << std::to_string(delta_time);
00298
00299 if (!active) { break; }
00300 statsHelper_.addSample(FRAGMENTS_PER_READ_STAT_KEY, frags.size());
00301
00302 for (auto& fragPtr : frags)
00303 {
00304 if (!fragPtr.get())
00305 {
00306 TLOG(TLVL_WARNING) << "Encountered a bad fragment pointer in fragment " << fragment_count_ << ". "
00307 << "This is most likely caused by a problem with the Fragment Generator!";
00308 continue;
00309 }
00310 artdaq::Fragment::sequence_id_t sequence_id = fragPtr->sequenceID();
00311 #if ART_HEX_VERSION >=0x21100
00312 SetMFIteration("Sequence ID " + std::to_string(sequence_id));
00313 #endif
00314 statsHelper_.addSample(FRAGMENTS_PROCESSED_STAT_KEY, fragPtr->size());
00315
00316 if ((fragment_count_ % 250) == 0)
00317 {
00318 TLOG(TLVL_DEBUG)
00319 << "Sending fragment " << fragment_count_
00320 << " with sequence id " << sequence_id << ".";
00321 }
00322
00323
00324 if (!skip_seqId_test_ && abs(static_cast<int64_t>(sequence_id) - static_cast<int64_t>(prev_seq_id_)) > 1)
00325 {
00326 TLOG(TLVL_WARNING)
00327 << "Missing sequence IDs: current sequence ID = "
00328 << sequence_id << ", previous sequence ID = "
00329 << prev_seq_id_ << ".";
00330 }
00331 prev_seq_id_ = sequence_id;
00332
00333 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00334 TLOG(17) << "process_fragments seq=" << std::to_string(sequence_id) << " sendFragment start";
00335 auto res = sender_ptr_->sendFragment(std::move(*fragPtr));
00336 TLOG(17) << "process_fragments seq=" << std::to_string(sequence_id) << " sendFragment done (dest=" << res.first << ", sts=" << TransferInterface::CopyStatusToString(res.second) << ")";
00337 ++fragment_count_;
00338 statsHelper_.addSample(OUTPUT_WAIT_STAT_KEY,
00339 artdaq::MonitoredQuantity::getCurrentTime() - startTime);
00340
00341 bool readyToReport = statsHelper_.readyToReport(fragment_count_);
00342 if (readyToReport)
00343 {
00344 std::string statString = buildStatisticsString_();
00345 TLOG(TLVL_DEBUG) << statString;
00346 }
00347 if (fragment_count_ == 1 || readyToReport)
00348 {
00349 TLOG(TLVL_DEBUG)
00350 << "Sending fragment " << fragment_count_
00351 << " with sequence id " << sequence_id << ".";
00352 }
00353 }
00354 if (statsHelper_.statsRollingWindowHasMoved()) { sendMetrics_(); }
00355 frags.clear();
00356 }
00357
00358
00359
00360
00361 metricMan_.do_stop();
00362
00363 sender_ptr_.reset(nullptr);
00364 }
00365
00366 std::string artdaq::BoardReaderCore::report(std::string const& which) const
00367 {
00368 std::string resultString;
00369
00370
00371 if (generator_ptr_.get() != 0)
00372 {
00373 resultString = generator_ptr_->ReportCmd(which);
00374 if (resultString.length() > 0) { return resultString; }
00375 }
00376
00377
00378
00379
00380
00381 std::string tmpString = app_name + " run number = ";
00382 tmpString.append(boost::lexical_cast<std::string>(run_id_.run()));
00383 tmpString.append(". Command=\"" + which + "\" is not currently supported.");
00384 return tmpString;
00385 }
00386
00387 bool artdaq::BoardReaderCore::metaCommand(std::string const& command, std::string const& arg)
00388 {
00389 TLOG(TLVL_DEBUG) << "metaCommand method called with "
00390 << "command = \"" << command << "\""
00391 << ", arg = \"" << arg << "\""
00392 << ".";
00393
00394 if (generator_ptr_) return generator_ptr_->metaCommand(command, arg);
00395
00396 return true;
00397 }
00398
00399 std::string artdaq::BoardReaderCore::buildStatisticsString_()
00400 {
00401 std::ostringstream oss;
00402 oss << app_name << " statistics:" << std::endl;
00403
00404 double fragmentCount = 1.0;
00405 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00406 getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
00407 if (mqPtr.get() != 0)
00408 {
00409 artdaq::MonitoredQuantityStats stats;
00410 mqPtr->getStats(stats);
00411 oss << " Fragment statistics: "
00412 << stats.recentSampleCount << " fragments received at "
00413 << stats.recentSampleRate << " fragments/sec, effective data rate = "
00414 << (stats.recentValueRate * sizeof(artdaq::RawDataType)
00415 / 1024.0 / 1024.0) << " MB/sec, monitor window = "
00416 << stats.recentDuration << " sec, min::max event size = "
00417 << (stats.recentValueMin * sizeof(artdaq::RawDataType)
00418 / 1024.0 / 1024.0)
00419 << "::"
00420 << (stats.recentValueMax * sizeof(artdaq::RawDataType)
00421 / 1024.0 / 1024.0)
00422 << " MB" << std::endl;
00423 fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
00424 oss << " Average times per fragment: ";
00425 if (stats.recentSampleRate > 0.0)
00426 {
00427 oss << " elapsed time = "
00428 << (1.0 / stats.recentSampleRate) << " sec";
00429 }
00430 }
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440 mqPtr = artdaq::StatisticsCollection::getInstance().
00441 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
00442 if (mqPtr.get() != 0)
00443 {
00444 oss << ", input wait time = "
00445 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00446 }
00447
00448 mqPtr = artdaq::StatisticsCollection::getInstance().
00449 getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
00450 if (mqPtr.get() != 0)
00451 {
00452 oss << ", BRsync wait time = "
00453 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00454 }
00455
00456 mqPtr = artdaq::StatisticsCollection::getInstance().
00457 getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
00458 if (mqPtr.get() != 0)
00459 {
00460 oss << ", output wait time = "
00461 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00462 }
00463
00464 oss << std::endl << " Fragments per read: ";
00465 mqPtr = artdaq::StatisticsCollection::getInstance().
00466 getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
00467 if (mqPtr.get() != 0)
00468 {
00469 artdaq::MonitoredQuantityStats stats;
00470 mqPtr->getStats(stats);
00471 oss << "average = "
00472 << stats.recentValueAverage
00473 << ", min::max = "
00474 << stats.recentValueMin
00475 << "::"
00476 << stats.recentValueMax;
00477 }
00478
00479 return oss.str();
00480 }
00481
00482 void artdaq::BoardReaderCore::sendMetrics_()
00483 {
00484
00485 double fragmentCount = 1.0;
00486 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00487 getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
00488 if (mqPtr.get() != 0)
00489 {
00490 artdaq::MonitoredQuantityStats stats;
00491 mqPtr->getStats(stats);
00492 fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
00493 metricMan_.sendMetric("Fragment Count", static_cast<unsigned long>(stats.fullSampleCount), "fragments", 1, MetricMode::LastPoint);
00494 metricMan_.sendMetric("Fragment Rate", stats.recentSampleRate, "fragments/sec", 1, MetricMode::Average);
00495 metricMan_.sendMetric("Average Fragment Size", (stats.recentValueAverage * sizeof(artdaq::RawDataType)), "bytes/fragment", 2, MetricMode::Average);
00496 metricMan_.sendMetric("Data Rate", (stats.recentValueRate * sizeof(artdaq::RawDataType)), "bytes/sec", 2, MetricMode::Average);
00497 }
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507 mqPtr = artdaq::StatisticsCollection::getInstance().
00508 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
00509 if (mqPtr.get() != 0)
00510 {
00511 metricMan_.sendMetric("Avg Input Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00512 }
00513
00514 mqPtr = artdaq::StatisticsCollection::getInstance().
00515 getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
00516 if (mqPtr.get() != 0)
00517 {
00518 metricMan_.sendMetric("Avg BoardReader Sync Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00519 }
00520
00521 mqPtr = artdaq::StatisticsCollection::getInstance().
00522 getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
00523 if (mqPtr.get() != 0)
00524 {
00525 metricMan_.sendMetric("Avg Output Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00526 }
00527
00528 mqPtr = artdaq::StatisticsCollection::getInstance().
00529 getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
00530 if (mqPtr.get() != 0)
00531 {
00532 metricMan_.sendMetric("Avg Frags Per Read", mqPtr->getRecentValueAverage(), "fragments/read", 4, MetricMode::Average);
00533 }
00534 }
00535
00536 void artdaq::BoardReaderCore::logMessage_(std::string const& text)
00537 {
00538 if (verbose_)
00539 {
00540 TLOG(TLVL_INFO) << text;
00541 }
00542 else
00543 {
00544 TLOG(TLVL_DEBUG) << text;
00545 }
00546 }