00001
00002 #define TRACE_NAME (app_name + "_BoardReaderCore").c_str() // include these 2 first -
00003 #include "artdaq/DAQdata/Globals.hh"
00004 #include "artdaq/Application/TaskType.hh"
00005 #include "artdaq/Application/BoardReaderCore.hh"
00006 #include "artdaq-core/Data/Fragment.hh"
00007 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00008 #include "artdaq/Application/makeCommandableFragmentGenerator.hh"
00009 #include "canvas/Utilities/Exception.h"
00010 #include "cetlib_except/exception.h"
00011 #include <pthread.h>
00012 #include <sched.h>
00013 #include <algorithm>
00014
00015 const std::string artdaq::BoardReaderCore::
00016 FRAGMENTS_PROCESSED_STAT_KEY("BoardReaderCoreFragmentsProcessed");
00017 const std::string artdaq::BoardReaderCore::
00018 INPUT_WAIT_STAT_KEY("BoardReaderCoreInputWaitTime");
00019 const std::string artdaq::BoardReaderCore::
00020 BRSYNC_WAIT_STAT_KEY("BoardReaderCoreBRSyncWaitTime");
00021 const std::string artdaq::BoardReaderCore::
00022 OUTPUT_WAIT_STAT_KEY("BoardReaderCoreOutputWaitTime");
00023 const std::string artdaq::BoardReaderCore::
00024 FRAGMENTS_PER_READ_STAT_KEY("BoardReaderCoreFragmentsPerRead");
00025
00026 std::unique_ptr<artdaq::DataSenderManager> artdaq::BoardReaderCore::sender_ptr_ = nullptr;
00027
00028 artdaq::BoardReaderCore::BoardReaderCore(Commandable& parent_application) :
00029 parent_application_(parent_application)
00030
00031 , generator_ptr_(nullptr)
00032 , stop_requested_(false)
00033 , pause_requested_(false)
00034 {
00035 TLOG(TLVL_DEBUG) << "Constructor";
00036 statsHelper_.addMonitoredQuantityName(FRAGMENTS_PROCESSED_STAT_KEY);
00037 statsHelper_.addMonitoredQuantityName(INPUT_WAIT_STAT_KEY);
00038 statsHelper_.addMonitoredQuantityName(BRSYNC_WAIT_STAT_KEY);
00039 statsHelper_.addMonitoredQuantityName(OUTPUT_WAIT_STAT_KEY);
00040 statsHelper_.addMonitoredQuantityName(FRAGMENTS_PER_READ_STAT_KEY);
00041 }
00042
00043 artdaq::BoardReaderCore::~BoardReaderCore()
00044 {
00045 TLOG(TLVL_DEBUG) << "Destructor";
00046 }
00047
00048 bool artdaq::BoardReaderCore::initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00049 {
00050 TLOG(TLVL_DEBUG) << "initialize method called with " << "ParameterSet = \"" << pset.to_string() << "\".";
00051
00052
00053 fhicl::ParameterSet daq_pset;
00054 try
00055 {
00056 daq_pset = pset.get<fhicl::ParameterSet>("daq");
00057 }
00058 catch (...)
00059 {
00060 TLOG(TLVL_ERROR)
00061 << "Unable to find the DAQ parameters in the initialization "
00062 << "ParameterSet: \"" + pset.to_string() + "\".";
00063 return false;
00064 }
00065 fhicl::ParameterSet fr_pset;
00066 try
00067 {
00068 fr_pset = daq_pset.get<fhicl::ParameterSet>("fragment_receiver");
00069 data_pset_ = fr_pset;
00070 }
00071 catch (...)
00072 {
00073 TLOG(TLVL_ERROR)
00074 << "Unable to find the fragment_receiver parameters in the DAQ "
00075 << "initialization ParameterSet: \"" + daq_pset.to_string() + "\".";
00076 return false;
00077 }
00078
00079
00080 fhicl::ParameterSet metric_pset;
00081 try
00082 {
00083 metric_pset = daq_pset.get<fhicl::ParameterSet>("metrics");
00084 }
00085 catch (...) {}
00086
00087 if (metric_pset.is_empty())
00088 {
00089 TLOG(TLVL_INFO) << "No metric plugins appear to be defined";
00090 }
00091 try
00092 {
00093 metricMan->initialize(metric_pset, app_name);
00094 }
00095 catch (...)
00096 {
00097 ExceptionHandler(ExceptionHandlerRethrow::no,
00098 "Error loading metrics in BoardReaderCore::initialize()");
00099 }
00100
00101 if (daq_pset.has_key("rank"))
00102 {
00103 if (my_rank >= 0 && daq_pset.get<int>("rank") != my_rank) {
00104 TLOG(TLVL_WARNING) << "BoardReader rank specified at startup is different than rank specified at configure! Using rank received at configure!";
00105 }
00106 my_rank = daq_pset.get<int>("rank");
00107 }
00108 if (my_rank == -1)
00109 {
00110 TLOG(TLVL_ERROR) << "BoardReader rank not specified at startup or in configuration! Aborting";
00111 exit(1);
00112 }
00113
00114
00115
00116 std::string frag_gen_name = fr_pset.get<std::string>("generator", "");
00117 if (frag_gen_name.length() == 0)
00118 {
00119 TLOG(TLVL_ERROR)
00120 << "No fragment generator (parameter name = \"generator\") was "
00121 << "specified in the fragment_receiver ParameterSet. The "
00122 << "DAQ initialization PSet was \"" << daq_pset.to_string() << "\".";
00123 return false;
00124 }
00125
00126 try
00127 {
00128 generator_ptr_ = artdaq::makeCommandableFragmentGenerator(frag_gen_name, fr_pset);
00129 }
00130 catch (...)
00131 {
00132 std::stringstream exception_string;
00133 exception_string << "Exception thrown during initialization of fragment generator of type \""
00134 << frag_gen_name << "\"";
00135
00136 ExceptionHandler(ExceptionHandlerRethrow::no, exception_string.str());
00137
00138 TLOG(TLVL_DEBUG) << "FHiCL parameter set used to initialize the fragment generator which threw an exception: " << fr_pset.to_string();
00139
00140 return false;
00141 }
00142 metricMan->setPrefix(generator_ptr_->metricsReportingInstanceName());
00143
00144 rt_priority_ = fr_pset.get<int>("rt_priority", 0);
00145
00146
00147 statsHelper_.createCollectors(fr_pset, 100, 30.0, 60.0, FRAGMENTS_PROCESSED_STAT_KEY);
00148
00149
00150 skip_seqId_test_ = (generator_ptr_->fragmentIDs().size() > 1 || generator_ptr_->request_mode() != RequestMode::Ignored);
00151
00152 verbose_ = fr_pset.get<bool>("verbose", true);
00153
00154 return true;
00155 }
00156
00157 bool artdaq::BoardReaderCore::start(art::RunID id, uint64_t timeout, uint64_t timestamp)
00158 {
00159 logMessage_("Starting run " + boost::lexical_cast<std::string>(id.run()));
00160 stop_requested_.store(false);
00161 pause_requested_.store(false);
00162
00163 fragment_count_ = 0;
00164 prev_seq_id_ = 0;
00165 statsHelper_.resetStatistics();
00166
00167 metricMan->do_start();
00168 generator_ptr_->StartCmd(id.run(), timeout, timestamp);
00169 run_id_ = id;
00170
00171 logMessage_("Completed the Start transition (Started run) for run " +
00172 boost::lexical_cast<std::string>(run_id_.run()) +
00173 ", timeout = " + boost::lexical_cast<std::string>(timeout) +
00174 ", timestamp = " + boost::lexical_cast<std::string>(timestamp));
00175 return true;
00176 }
00177
00178 bool artdaq::BoardReaderCore::stop(uint64_t timeout, uint64_t timestamp)
00179 {
00180 logMessage_("Stopping run " + boost::lexical_cast<std::string>(run_id_.run()) +
00181 " after " + boost::lexical_cast<std::string>(fragment_count_) + " fragments.");
00182 stop_requested_.store(true);
00183
00184 TLOG(TLVL_DEBUG) << "Stopping CommandableFragmentGenerator BEGIN";
00185 generator_ptr_->StopCmd(timeout, timestamp);
00186 TLOG(TLVL_DEBUG) << "Stopping CommandableFragmentGenerator END";
00187
00188 TLOG(TLVL_DEBUG) << "Stopping DataSenderManager";
00189 if(sender_ptr_) sender_ptr_->StopSender();
00190
00191 logMessage_("Completed the Stop transition for run " + boost::lexical_cast<std::string>(run_id_.run()));
00192 return true;
00193 }
00194
00195 bool artdaq::BoardReaderCore::pause(uint64_t timeout, uint64_t timestamp)
00196 {
00197 logMessage_("Pausing run " + boost::lexical_cast<std::string>(run_id_.run()) +
00198 " after " + boost::lexical_cast<std::string>(fragment_count_) + " fragments.");
00199 pause_requested_.store(true);
00200 generator_ptr_->PauseCmd(timeout, timestamp);
00201 logMessage_("Completed the Pause transition for run " + boost::lexical_cast<std::string>(run_id_.run()));
00202 return true;
00203 }
00204
00205 bool artdaq::BoardReaderCore::resume(uint64_t timeout, uint64_t timestamp)
00206 {
00207 logMessage_("Resuming run " + boost::lexical_cast<std::string>(run_id_.run()));
00208 pause_requested_.store(false);
00209 metricMan->do_start();
00210 generator_ptr_->ResumeCmd(timeout, timestamp);
00211 logMessage_("Completed the Resume transition for run " + boost::lexical_cast<std::string>(run_id_.run()));
00212 return true;
00213 }
00214
00215 bool artdaq::BoardReaderCore::shutdown(uint64_t)
00216 {
00217 logMessage_("Starting Shutdown transition");
00218 generator_ptr_->joinThreads();
00219 generator_ptr_.reset(nullptr);
00220 metricMan->shutdown();
00221 logMessage_("Completed Shutdown transition");
00222 return true;
00223 }
00224
00225 bool artdaq::BoardReaderCore::soft_initialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00226 {
00227 TLOG(TLVL_DEBUG) << "soft_initialize method called with "
00228 << "ParameterSet = \"" << pset.to_string()
00229 << "\".";
00230 return true;
00231 }
00232
00233 bool artdaq::BoardReaderCore::reinitialize(fhicl::ParameterSet const& pset, uint64_t, uint64_t)
00234 {
00235 TLOG(TLVL_DEBUG) << "reinitialize method called with "
00236 << "ParameterSet = \"" << pset.to_string()
00237 << "\".";
00238 return true;
00239 }
00240
00241 void artdaq::BoardReaderCore::process_fragments()
00242 {
00243 if (rt_priority_ > 0)
00244 {
00245 #pragma GCC diagnostic push
00246 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
00247 sched_param s_param = {};
00248 s_param.sched_priority = rt_priority_;
00249 if (pthread_setschedparam(pthread_self(), SCHED_RR, &s_param))
00250 TLOG(TLVL_WARNING) << "setting realtime priority failed";
00251 #pragma GCC diagnostic pop
00252 }
00253
00254
00255
00256
00257 if (rt_priority_ > 0)
00258 {
00259 #pragma GCC diagnostic push
00260 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
00261 sched_param s_param = {};
00262 s_param.sched_priority = rt_priority_;
00263 int status = pthread_setschedparam(pthread_self(), SCHED_RR, &s_param);
00264 if (status != 0)
00265 {
00266 TLOG(TLVL_ERROR)
00267 << "Failed to set realtime priority to " << rt_priority_
00268 << ", return code = " << status;
00269 }
00270 #pragma GCC diagnostic pop
00271 }
00272
00273 TLOG(TLVL_DEBUG) << "Initializing DataSenderManager. my_rank=" << my_rank;
00274 sender_ptr_.reset(new artdaq::DataSenderManager(data_pset_));
00275
00276 TLOG(TLVL_DEBUG) << "Waiting for first fragment.";
00277 artdaq::MonitoredQuantityStats::TIME_POINT_T startTime;
00278 double delta_time;
00279 artdaq::FragmentPtrs frags;
00280 bool active = true;
00281
00282 while (active)
00283 {
00284 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00285
00286 TLOG(18) << "process_fragments getNext start";
00287 active = generator_ptr_->getNext(frags);
00288 TLOG(18) << "process_fragments getNext done (active=" << active << ")";
00289
00290
00291
00292
00293
00294
00295 if (!active && generator_ptr_ && generator_ptr_->exception())
00296 {
00297 parent_application_.in_run_failure();
00298 }
00299
00300 delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
00301 statsHelper_.addSample(INPUT_WAIT_STAT_KEY, delta_time);
00302
00303 TLOG(16) << "process_fragments INPUT_WAIT=" << delta_time;
00304
00305 if (!active) { break; }
00306 statsHelper_.addSample(FRAGMENTS_PER_READ_STAT_KEY, frags.size());
00307
00308 for (auto& fragPtr : frags)
00309 {
00310 if (!fragPtr.get())
00311 {
00312 TLOG(TLVL_WARNING) << "Encountered a bad fragment pointer in fragment " << fragment_count_ << ". "
00313 << "This is most likely caused by a problem with the Fragment Generator!";
00314 continue;
00315 }
00316 artdaq::Fragment::sequence_id_t sequence_id = fragPtr->sequenceID();
00317 SetMFIteration("Sequence ID " + std::to_string(sequence_id));
00318 statsHelper_.addSample(FRAGMENTS_PROCESSED_STAT_KEY, fragPtr->size());
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328 if (!skip_seqId_test_ && abs(static_cast<int64_t>(sequence_id) - static_cast<int64_t>(prev_seq_id_)) > 1)
00329 {
00330 TLOG(TLVL_WARNING)
00331 << "Missing sequence IDs: current sequence ID = "
00332 << sequence_id << ", previous sequence ID = "
00333 << prev_seq_id_ << ".";
00334 }
00335 prev_seq_id_ = sequence_id;
00336
00337 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00338 TLOG(17) << "process_fragments seq=" << sequence_id << " sendFragment start";
00339 auto res = sender_ptr_->sendFragment(std::move(*fragPtr));
00340 TLOG(17) << "process_fragments seq=" << sequence_id << " sendFragment done (dest=" << res.first << ", sts=" << TransferInterface::CopyStatusToString(res.second) << ")";
00341 ++fragment_count_;
00342 statsHelper_.addSample(OUTPUT_WAIT_STAT_KEY,
00343 artdaq::MonitoredQuantity::getCurrentTime() - startTime);
00344
00345 bool readyToReport = statsHelper_.readyToReport(fragment_count_);
00346 if (readyToReport)
00347 {
00348 std::string statString = buildStatisticsString_();
00349 TLOG(TLVL_INFO) << statString;
00350 }
00351 if (fragment_count_ % 250 == 1 || readyToReport)
00352 {
00353 TLOG(TLVL_DEBUG)
00354 << "Sending fragment " << fragment_count_
00355 << " with SeqID " << sequence_id << ".";
00356 }
00357 }
00358 if (statsHelper_.statsRollingWindowHasMoved()) { sendMetrics_(); }
00359 frags.clear();
00360 }
00361
00362 sender_ptr_.reset(nullptr);
00363
00364
00365
00366
00367 metricMan->do_stop();
00368
00369 TLOG(TLVL_DEBUG) << "process_fragments loop end";
00370 }
00371
00372 std::string artdaq::BoardReaderCore::report(std::string const& which) const
00373 {
00374 std::string resultString;
00375
00376
00377 if (generator_ptr_.get() != 0)
00378 {
00379 resultString = generator_ptr_->ReportCmd(which);
00380 if (resultString.length() > 0) { return resultString; }
00381 }
00382
00383
00384
00385
00386
00387 std::string tmpString = app_name + " run number = ";
00388 tmpString.append(boost::lexical_cast<std::string>(run_id_.run()));
00389 tmpString.append(". Command=\"" + which + "\" is not currently supported.");
00390 return tmpString;
00391 }
00392
00393 bool artdaq::BoardReaderCore::metaCommand(std::string const& command, std::string const& arg)
00394 {
00395 TLOG(TLVL_DEBUG) << "metaCommand method called with "
00396 << "command = \"" << command << "\""
00397 << ", arg = \"" << arg << "\""
00398 << ".";
00399
00400 if (generator_ptr_) return generator_ptr_->metaCommand(command, arg);
00401
00402 return true;
00403 }
00404
00405 std::string artdaq::BoardReaderCore::buildStatisticsString_()
00406 {
00407 std::ostringstream oss;
00408 oss << app_name << " statistics:" << std::endl;
00409
00410 double fragmentCount = 1.0;
00411 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00412 getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
00413 if (mqPtr.get() != 0)
00414 {
00415 artdaq::MonitoredQuantityStats stats;
00416 mqPtr->getStats(stats);
00417 oss << " Fragment statistics: "
00418 << stats.recentSampleCount << " fragments received at "
00419 << stats.recentSampleRate << " fragments/sec, effective data rate = "
00420 << (stats.recentValueRate * sizeof(artdaq::RawDataType)
00421 / 1024.0 / 1024.0) << " MB/sec, monitor window = "
00422 << stats.recentDuration << " sec, min::max event size = "
00423 << (stats.recentValueMin * sizeof(artdaq::RawDataType)
00424 / 1024.0 / 1024.0)
00425 << "::"
00426 << (stats.recentValueMax * sizeof(artdaq::RawDataType)
00427 / 1024.0 / 1024.0)
00428 << " MB" << std::endl;
00429 fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
00430 oss << " Average times per fragment: ";
00431 if (stats.recentSampleRate > 0.0)
00432 {
00433 oss << " elapsed time = "
00434 << (1.0 / stats.recentSampleRate) << " sec";
00435 }
00436 }
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446 mqPtr = artdaq::StatisticsCollection::getInstance().
00447 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
00448 if (mqPtr.get() != 0)
00449 {
00450 oss << ", input wait time = "
00451 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00452 }
00453
00454 mqPtr = artdaq::StatisticsCollection::getInstance().
00455 getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
00456 if (mqPtr.get() != 0)
00457 {
00458 oss << ", BRsync wait time = "
00459 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00460 }
00461
00462 mqPtr = artdaq::StatisticsCollection::getInstance().
00463 getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
00464 if (mqPtr.get() != 0)
00465 {
00466 oss << ", output wait time = "
00467 << (mqPtr->getRecentValueSum() / fragmentCount) << " sec";
00468 }
00469
00470 oss << std::endl << " Fragments per read: ";
00471 mqPtr = artdaq::StatisticsCollection::getInstance().
00472 getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
00473 if (mqPtr.get() != 0)
00474 {
00475 artdaq::MonitoredQuantityStats stats;
00476 mqPtr->getStats(stats);
00477 oss << "average = "
00478 << stats.recentValueAverage
00479 << ", min::max = "
00480 << stats.recentValueMin
00481 << "::"
00482 << stats.recentValueMax;
00483 }
00484
00485 return oss.str();
00486 }
00487
00488 void artdaq::BoardReaderCore::sendMetrics_()
00489 {
00490
00491 double fragmentCount = 1.0;
00492 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00493 getMonitoredQuantity(FRAGMENTS_PROCESSED_STAT_KEY);
00494 if (mqPtr.get() != 0)
00495 {
00496 artdaq::MonitoredQuantityStats stats;
00497 mqPtr->getStats(stats);
00498 fragmentCount = std::max(double(stats.recentSampleCount), 1.0);
00499 metricMan->sendMetric("Fragment Count", static_cast<unsigned long>(stats.fullSampleCount), "fragments", 1, MetricMode::LastPoint);
00500 metricMan->sendMetric("Fragment Rate", stats.recentSampleRate, "fragments/sec", 1, MetricMode::Average);
00501 metricMan->sendMetric("Average Fragment Size", (stats.recentValueAverage * sizeof(artdaq::RawDataType)), "bytes/fragment", 2, MetricMode::Average);
00502 metricMan->sendMetric("Data Rate", (stats.recentValueRate * sizeof(artdaq::RawDataType)), "bytes/sec", 2, MetricMode::Average);
00503 }
00504
00505
00506
00507
00508
00509
00510
00511
00512
00513 mqPtr = artdaq::StatisticsCollection::getInstance().
00514 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
00515 if (mqPtr.get() != 0)
00516 {
00517 metricMan->sendMetric("Avg Input Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00518 }
00519
00520 mqPtr = artdaq::StatisticsCollection::getInstance().
00521 getMonitoredQuantity(BRSYNC_WAIT_STAT_KEY);
00522 if (mqPtr.get() != 0)
00523 {
00524 metricMan->sendMetric("Avg BoardReader Sync Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00525 }
00526
00527 mqPtr = artdaq::StatisticsCollection::getInstance().
00528 getMonitoredQuantity(OUTPUT_WAIT_STAT_KEY);
00529 if (mqPtr.get() != 0)
00530 {
00531 metricMan->sendMetric("Avg Output Wait Time", (mqPtr->getRecentValueSum() / fragmentCount), "seconds/fragment", 3, MetricMode::Average);
00532 }
00533
00534 mqPtr = artdaq::StatisticsCollection::getInstance().
00535 getMonitoredQuantity(FRAGMENTS_PER_READ_STAT_KEY);
00536 if (mqPtr.get() != 0)
00537 {
00538 metricMan->sendMetric("Avg Frags Per Read", mqPtr->getRecentValueAverage(), "fragments/read", 4, MetricMode::Average);
00539 }
00540 }
00541
00542 void artdaq::BoardReaderCore::logMessage_(std::string const& text)
00543 {
00544 if (verbose_)
00545 {
00546 TLOG(TLVL_INFO) << text;
00547 }
00548 else
00549 {
00550 TLOG(TLVL_DEBUG) << text;
00551 }
00552 }